aboutsummaryrefslogtreecommitdiffstatshomepage
path: root/tools/perf
diff options
context:
space:
mode:
Diffstat (limited to 'tools/perf')
-rw-r--r--tools/perf/.gitignore1
-rw-r--r--tools/perf/Documentation/Makefile5
-rw-r--r--tools/perf/Documentation/intel-pt.txt992
-rw-r--r--tools/perf/Documentation/perf-annotate.txt6
-rw-r--r--tools/perf/Documentation/perf-config.txt88
-rw-r--r--tools/perf/Documentation/perf-inject.txt3
-rw-r--r--tools/perf/Documentation/perf-intel-pt.txt1007
-rw-r--r--tools/perf/Documentation/perf-record.txt25
-rw-r--r--tools/perf/Documentation/perf-report.txt15
-rw-r--r--tools/perf/Documentation/perf-sched.txt4
-rw-r--r--tools/perf/Documentation/perf-script.txt16
-rw-r--r--tools/perf/Documentation/perf-stat.txt9
-rw-r--r--tools/perf/Documentation/perf-top.txt15
-rw-r--r--tools/perf/MANIFEST1
-rw-r--r--tools/perf/Makefile2
-rw-r--r--tools/perf/Makefile.config17
-rw-r--r--tools/perf/Makefile.perf13
-rw-r--r--tools/perf/arch/arm/util/cs-etm.c20
-rw-r--r--tools/perf/arch/arm64/util/Build2
-rw-r--r--tools/perf/arch/arm64/util/arm-spe.c37
-rw-r--r--tools/perf/arch/arm64/util/header.c63
-rw-r--r--tools/perf/arch/arm64/util/machine.c27
-rw-r--r--tools/perf/arch/arm64/util/perf_regs.c2
-rw-r--r--tools/perf/arch/arm64/util/sym-handling.c19
-rw-r--r--tools/perf/arch/powerpc/entry/syscalls/syscall.tbl2
-rw-r--r--tools/perf/arch/powerpc/util/Build1
-rw-r--r--tools/perf/arch/powerpc/util/perf_regs.c4
-rw-r--r--tools/perf/arch/powerpc/util/sym-handling.c10
-rw-r--r--tools/perf/arch/x86/entry/syscalls/syscall_64.tbl738
-rw-r--r--tools/perf/arch/x86/tests/insn-x86-dat-32.c112
-rw-r--r--tools/perf/arch/x86/tests/insn-x86-dat-64.c196
-rw-r--r--tools/perf/arch/x86/tests/insn-x86-dat-src.c236
-rw-r--r--tools/perf/arch/x86/util/auxtrace.c14
-rw-r--r--tools/perf/arch/x86/util/event.c12
-rw-r--r--tools/perf/arch/x86/util/header.c4
-rw-r--r--tools/perf/arch/x86/util/intel-bts.c41
-rw-r--r--tools/perf/arch/x86/util/intel-pt.c47
-rw-r--r--tools/perf/arch/x86/util/machine.c6
-rw-r--r--tools/perf/arch/x86/util/perf_regs.c8
-rw-r--r--tools/perf/arch/x86/util/pmu.c6
-rw-r--r--tools/perf/bench/bench.h4
-rw-r--r--tools/perf/bench/epoll-ctl.c8
-rw-r--r--tools/perf/bench/epoll-wait.c12
-rw-r--r--tools/perf/bench/futex-hash.c13
-rw-r--r--tools/perf/bench/futex-lock-pi.c12
-rw-r--r--tools/perf/bench/futex-requeue.c1
-rw-r--r--tools/perf/bench/futex-wake-parallel.c1
-rw-r--r--tools/perf/bench/futex-wake.c5
-rw-r--r--tools/perf/builtin-annotate.c11
-rw-r--r--tools/perf/builtin-c2c.c14
-rw-r--r--tools/perf/builtin-diff.c25
-rw-r--r--tools/perf/builtin-probe.c6
-rw-r--r--tools/perf/builtin-record.c44
-rw-r--r--tools/perf/builtin-report.c68
-rw-r--r--tools/perf/builtin-sched.c13
-rw-r--r--tools/perf/builtin-script.c130
-rw-r--r--tools/perf/builtin-stat.c4
-rw-r--r--tools/perf/builtin-top.c45
-rw-r--r--tools/perf/builtin-trace.c4
-rwxr-xr-xtools/perf/check-headers.sh4
-rw-r--r--tools/perf/examples/bpf/5sec.c8
-rw-r--r--tools/perf/examples/bpf/empty.c2
-rw-r--r--tools/perf/examples/bpf/sys_enter_openat.c2
-rw-r--r--tools/perf/lib/Build13
-rw-r--r--tools/perf/lib/Documentation/Makefile7
-rw-r--r--tools/perf/lib/Documentation/man/libperf.rst100
-rw-r--r--tools/perf/lib/Documentation/tutorial/tutorial.rst123
-rw-r--r--tools/perf/lib/Makefile188
-rw-r--r--tools/perf/lib/core.c38
-rw-r--r--tools/perf/lib/cpumap.c345
-rw-r--r--tools/perf/lib/evlist.c641
-rw-r--r--tools/perf/lib/evsel.c301
-rw-r--r--tools/perf/lib/include/internal/cpumap.h19
-rw-r--r--tools/perf/lib/include/internal/evlist.h127
-rw-r--r--tools/perf/lib/include/internal/evsel.h63
-rw-r--r--tools/perf/lib/include/internal/lib.h12
-rw-r--r--tools/perf/lib/include/internal/mmap.h55
-rw-r--r--tools/perf/lib/include/internal/tests.h33
-rw-r--r--tools/perf/lib/include/internal/threadmap.h23
-rw-r--r--tools/perf/lib/include/internal/xyarray.h36
-rw-r--r--tools/perf/lib/include/perf/core.h25
-rw-r--r--tools/perf/lib/include/perf/cpumap.h28
-rw-r--r--tools/perf/lib/include/perf/event.h385
-rw-r--r--tools/perf/lib/include/perf/evlist.h49
-rw-r--r--tools/perf/lib/include/perf/evsel.h40
-rw-r--r--tools/perf/lib/include/perf/mmap.h15
-rw-r--r--tools/perf/lib/include/perf/threadmap.h20
-rw-r--r--tools/perf/lib/internal.h23
-rw-r--r--tools/perf/lib/lib.c48
-rw-r--r--tools/perf/lib/libperf.map51
-rw-r--r--tools/perf/lib/libperf.pc.template11
-rw-r--r--tools/perf/lib/mmap.c275
-rw-r--r--tools/perf/lib/tests/Makefile38
-rw-r--r--tools/perf/lib/tests/test-cpumap.c31
-rw-r--r--tools/perf/lib/tests/test-evlist.c413
-rw-r--r--tools/perf/lib/tests/test-evsel.c135
-rw-r--r--tools/perf/lib/tests/test-threadmap.c31
-rw-r--r--tools/perf/lib/threadmap.c91
-rw-r--r--tools/perf/lib/xyarray.c33
-rw-r--r--tools/perf/pmu-events/arch/s390/cf_z15/crypto6.json8
-rw-r--r--tools/perf/pmu-events/arch/s390/cf_z15/extended.json30
-rw-r--r--tools/perf/pmu-events/arch/test/test_cpu/branch.json (renamed from tools/perf/pmu-events/arch/x86/amdfam17h/branch.json)0
-rw-r--r--tools/perf/pmu-events/arch/test/test_cpu/other.json26
-rw-r--r--tools/perf/pmu-events/arch/test/test_cpu/uncore.json21
-rw-r--r--tools/perf/pmu-events/arch/x86/amdfam17h/cache.json329
-rw-r--r--tools/perf/pmu-events/arch/x86/amdfam17h/other.json65
-rw-r--r--tools/perf/pmu-events/arch/x86/amdzen1/branch.json23
-rw-r--r--tools/perf/pmu-events/arch/x86/amdzen1/cache.json294
-rw-r--r--tools/perf/pmu-events/arch/x86/amdzen1/core.json (renamed from tools/perf/pmu-events/arch/x86/amdfam17h/core.json)15
-rw-r--r--tools/perf/pmu-events/arch/x86/amdzen1/floating-point.json (renamed from tools/perf/pmu-events/arch/x86/amdfam17h/floating-point.json)64
-rw-r--r--tools/perf/pmu-events/arch/x86/amdzen1/memory.json (renamed from tools/perf/pmu-events/arch/x86/amdfam17h/memory.json)82
-rw-r--r--tools/perf/pmu-events/arch/x86/amdzen1/other.json56
-rw-r--r--tools/perf/pmu-events/arch/x86/amdzen2/branch.json52
-rw-r--r--tools/perf/pmu-events/arch/x86/amdzen2/cache.json338
-rw-r--r--tools/perf/pmu-events/arch/x86/amdzen2/core.json130
-rw-r--r--tools/perf/pmu-events/arch/x86/amdzen2/floating-point.json140
-rw-r--r--tools/perf/pmu-events/arch/x86/amdzen2/memory.json341
-rw-r--r--tools/perf/pmu-events/arch/x86/amdzen2/other.json115
-rw-r--r--tools/perf/pmu-events/arch/x86/broadwell/bdw-metrics.json2
-rw-r--r--tools/perf/pmu-events/arch/x86/broadwellde/bdwde-metrics.json2
-rw-r--r--tools/perf/pmu-events/arch/x86/broadwellx/bdx-metrics.json2
-rw-r--r--tools/perf/pmu-events/arch/x86/cascadelakex/clx-metrics.json5
-rw-r--r--tools/perf/pmu-events/arch/x86/haswell/hsw-metrics.json2
-rw-r--r--tools/perf/pmu-events/arch/x86/haswellx/hsx-metrics.json2
-rw-r--r--tools/perf/pmu-events/arch/x86/ivybridge/ivb-metrics.json2
-rw-r--r--tools/perf/pmu-events/arch/x86/ivytown/ivt-metrics.json2
-rw-r--r--tools/perf/pmu-events/arch/x86/jaketown/jkt-metrics.json2
-rw-r--r--tools/perf/pmu-events/arch/x86/mapfile.csv3
-rw-r--r--tools/perf/pmu-events/arch/x86/sandybridge/snb-metrics.json2
-rw-r--r--tools/perf/pmu-events/arch/x86/skylake/skl-metrics.json5
-rw-r--r--tools/perf/pmu-events/arch/x86/skylakex/skx-metrics.json5
-rw-r--r--tools/perf/pmu-events/jevents.c64
-rw-r--r--tools/perf/pmu-events/jevents.h2
-rw-r--r--tools/perf/pmu-events/pmu-events.h1
-rw-r--r--tools/perf/scripts/perl/check-perf-trace.pl6
-rw-r--r--tools/perf/scripts/perl/failed-syscalls.pl2
-rw-r--r--tools/perf/scripts/perl/rw-by-file.pl6
-rw-r--r--tools/perf/scripts/perl/rw-by-pid.pl10
-rw-r--r--tools/perf/scripts/perl/rwtop.pl10
-rw-r--r--tools/perf/scripts/perl/wakeup-latency.pl6
-rw-r--r--tools/perf/tests/.gitignore1
-rw-r--r--tools/perf/tests/Build1
-rw-r--r--tools/perf/tests/bp_account.c2
-rw-r--r--tools/perf/tests/bp_signal.c10
-rw-r--r--tools/perf/tests/builtin-test.c9
-rw-r--r--tools/perf/tests/cpumap.c1
-rw-r--r--tools/perf/tests/expr.c10
-rw-r--r--tools/perf/tests/make10
-rw-r--r--tools/perf/tests/pmu-events.c379
-rw-r--r--tools/perf/tests/sample-parsing.c13
-rw-r--r--tools/perf/tests/shell/lib/probe_vfs_getname.sh2
-rw-r--r--tools/perf/tests/tests.h1
-rw-r--r--tools/perf/trace/beauty/beauty.h2
-rw-r--r--tools/perf/trace/beauty/clone.c1
-rw-r--r--tools/perf/trace/beauty/mmap.c1
-rw-r--r--tools/perf/trace/beauty/prctl.c3
-rw-r--r--tools/perf/trace/beauty/sockaddr.c2
-rw-r--r--tools/perf/ui/browsers/annotate.c19
-rw-r--r--tools/perf/ui/browsers/hists.c379
-rw-r--r--tools/perf/ui/browsers/hists.h2
-rw-r--r--tools/perf/ui/browsers/res_sample.c2
-rw-r--r--tools/perf/ui/browsers/scripts.c2
-rw-r--r--tools/perf/ui/gtk/Build7
-rw-r--r--tools/perf/ui/gtk/annotate.c2
-rw-r--r--tools/perf/ui/hist.c93
-rw-r--r--tools/perf/ui/keysyms.h1
-rw-r--r--tools/perf/ui/tui/util.c12
-rw-r--r--tools/perf/ui/util.h2
-rw-r--r--tools/perf/util/Build11
-rw-r--r--tools/perf/util/annotate.c215
-rw-r--r--tools/perf/util/annotate.h16
-rw-r--r--tools/perf/util/auxtrace.c22
-rw-r--r--tools/perf/util/auxtrace.h6
-rw-r--r--tools/perf/util/block-info.c109
-rw-r--r--tools/perf/util/block-info.h9
-rw-r--r--tools/perf/util/branch.h22
-rw-r--r--tools/perf/util/c++/clang.cpp4
-rw-r--r--tools/perf/util/cgroup.c143
-rw-r--r--tools/perf/util/cgroup.h17
-rw-r--r--tools/perf/util/config.c12
-rw-r--r--tools/perf/util/config.h1
-rw-r--r--tools/perf/util/cpumap.c10
-rw-r--r--tools/perf/util/cs-etm.c159
-rw-r--r--tools/perf/util/dsos.c22
-rw-r--r--tools/perf/util/env.c6
-rw-r--r--tools/perf/util/env.h6
-rw-r--r--tools/perf/util/event.c39
-rw-r--r--tools/perf/util/event.h7
-rw-r--r--tools/perf/util/evsel.c44
-rw-r--r--tools/perf/util/evsel.h7
-rw-r--r--tools/perf/util/evsel_config.h5
-rw-r--r--tools/perf/util/expr.c112
-rw-r--r--tools/perf/util/expr.h8
-rw-r--r--tools/perf/util/expr.l114
-rw-r--r--tools/perf/util/expr.y188
-rw-r--r--tools/perf/util/header.c39
-rw-r--r--tools/perf/util/hist.c16
-rw-r--r--tools/perf/util/hist.h6
-rw-r--r--tools/perf/util/intel-pt.c2
-rw-r--r--tools/perf/util/llvm-utils.c3
-rw-r--r--tools/perf/util/machine.c80
-rw-r--r--tools/perf/util/machine.h3
-rw-r--r--tools/perf/util/map.c28
-rw-r--r--tools/perf/util/metricgroup.c158
-rw-r--r--tools/perf/util/mmap.c61
-rw-r--r--tools/perf/util/mmap.h13
-rw-r--r--tools/perf/util/parse-events.c83
-rw-r--r--tools/perf/util/parse-events.l12
-rw-r--r--tools/perf/util/parse-events.y2
-rw-r--r--tools/perf/util/perf_event_attr_fprintf.c3
-rw-r--r--tools/perf/util/pmu.c39
-rw-r--r--tools/perf/util/pmu.h5
-rw-r--r--tools/perf/util/probe-file.c31
-rw-r--r--tools/perf/util/probe-finder.c14
-rw-r--r--tools/perf/util/python-ext-sources1
-rw-r--r--tools/perf/util/record.h1
-rw-r--r--tools/perf/util/scripting-engines/trace-event-python.c33
-rw-r--r--tools/perf/util/session.c12
-rw-r--r--tools/perf/util/setup.py12
-rw-r--r--tools/perf/util/sort.c46
-rw-r--r--tools/perf/util/sort.h4
-rw-r--r--tools/perf/util/srcline.c16
-rw-r--r--tools/perf/util/stat-display.c45
-rw-r--r--tools/perf/util/stat-shadow.c17
-rw-r--r--tools/perf/util/stat.h1
-rw-r--r--tools/perf/util/symbol-elf.c13
-rw-r--r--tools/perf/util/symbol.c30
-rw-r--r--tools/perf/util/symbol_conf.h1
-rw-r--r--tools/perf/util/synthetic-events.c137
-rw-r--r--tools/perf/util/synthetic-events.h1
-rw-r--r--tools/perf/util/tool.h2
-rw-r--r--tools/perf/util/util.c18
-rw-r--r--tools/perf/util/util.h2
233 files changed, 6787 insertions, 6886 deletions
diff --git a/tools/perf/.gitignore b/tools/perf/.gitignore
index bf1252dc2cb0..f3f84781fd74 100644
--- a/tools/perf/.gitignore
+++ b/tools/perf/.gitignore
@@ -1,3 +1,4 @@
+# SPDX-License-Identifier: GPL-2.0-only
PERF-CFLAGS
PERF-GUI-VARS
PERF-VERSION-FILE
diff --git a/tools/perf/Documentation/Makefile b/tools/perf/Documentation/Makefile
index adc5a7e44b98..31824d5269cc 100644
--- a/tools/perf/Documentation/Makefile
+++ b/tools/perf/Documentation/Makefile
@@ -295,7 +295,10 @@ $(OUTPUT)%.1 $(OUTPUT)%.5 $(OUTPUT)%.7 : $(OUTPUT)%.xml
$(OUTPUT)%.xml : %.txt
$(QUIET_ASCIIDOC)$(RM) $@+ $@ && \
$(ASCIIDOC) -b docbook -d manpage \
- $(ASCIIDOC_EXTRA) -aperf_version=$(PERF_VERSION) -o $@+ $< && \
+ $(ASCIIDOC_EXTRA) -aperf_version=$(PERF_VERSION) \
+ -aperf_date=$(shell git log -1 --pretty="format:%cd" \
+ --date=short $<) \
+ -o $@+ $< && \
mv $@+ $@
XSLT = docbook.xsl
diff --git a/tools/perf/Documentation/intel-pt.txt b/tools/perf/Documentation/intel-pt.txt
index 2cf2d9e9d0da..fd9241a1b987 100644
--- a/tools/perf/Documentation/intel-pt.txt
+++ b/tools/perf/Documentation/intel-pt.txt
@@ -1,991 +1 @@
-Intel Processor Trace
-=====================
-
-Overview
-========
-
-Intel Processor Trace (Intel PT) is an extension of Intel Architecture that
-collects information about software execution such as control flow, execution
-modes and timings and formats it into highly compressed binary packets.
-Technical details are documented in the Intel 64 and IA-32 Architectures
-Software Developer Manuals, Chapter 36 Intel Processor Trace.
-
-Intel PT is first supported in Intel Core M and 5th generation Intel Core
-processors that are based on the Intel micro-architecture code name Broadwell.
-
-Trace data is collected by 'perf record' and stored within the perf.data file.
-See below for options to 'perf record'.
-
-Trace data must be 'decoded' which involves walking the object code and matching
-the trace data packets. For example a TNT packet only tells whether a
-conditional branch was taken or not taken, so to make use of that packet the
-decoder must know precisely which instruction was being executed.
-
-Decoding is done on-the-fly. The decoder outputs samples in the same format as
-samples output by perf hardware events, for example as though the "instructions"
-or "branches" events had been recorded. Presently 3 tools support this:
-'perf script', 'perf report' and 'perf inject'. See below for more information
-on using those tools.
-
-The main distinguishing feature of Intel PT is that the decoder can determine
-the exact flow of software execution. Intel PT can be used to understand why
-and how did software get to a certain point, or behave a certain way. The
-software does not have to be recompiled, so Intel PT works with debug or release
-builds, however the executed images are needed - which makes use in JIT-compiled
-environments, or with self-modified code, a challenge. Also symbols need to be
-provided to make sense of addresses.
-
-A limitation of Intel PT is that it produces huge amounts of trace data
-(hundreds of megabytes per second per core) which takes a long time to decode,
-for example two or three orders of magnitude longer than it took to collect.
-Another limitation is the performance impact of tracing, something that will
-vary depending on the use-case and architecture.
-
-
-Quickstart
-==========
-
-It is important to start small. That is because it is easy to capture vastly
-more data than can possibly be processed.
-
-The simplest thing to do with Intel PT is userspace profiling of small programs.
-Data is captured with 'perf record' e.g. to trace 'ls' userspace-only:
-
- perf record -e intel_pt//u ls
-
-And profiled with 'perf report' e.g.
-
- perf report
-
-To also trace kernel space presents a problem, namely kernel self-modifying
-code. A fairly good kernel image is available in /proc/kcore but to get an
-accurate image a copy of /proc/kcore needs to be made under the same conditions
-as the data capture. A script perf-with-kcore can do that, but beware that the
-script makes use of 'sudo' to copy /proc/kcore. If you have perf installed
-locally from the source tree you can do:
-
- ~/libexec/perf-core/perf-with-kcore record pt_ls -e intel_pt// -- ls
-
-which will create a directory named 'pt_ls' and put the perf.data file and
-copies of /proc/kcore, /proc/kallsyms and /proc/modules into it. Then to use
-'perf report' becomes:
-
- ~/libexec/perf-core/perf-with-kcore report pt_ls
-
-Because samples are synthesized after-the-fact, the sampling period can be
-selected for reporting. e.g. sample every microsecond
-
- ~/libexec/perf-core/perf-with-kcore report pt_ls --itrace=i1usge
-
-See the sections below for more information about the --itrace option.
-
-Beware the smaller the period, the more samples that are produced, and the
-longer it takes to process them.
-
-Also note that the coarseness of Intel PT timing information will start to
-distort the statistical value of the sampling as the sampling period becomes
-smaller.
-
-To represent software control flow, "branches" samples are produced. By default
-a branch sample is synthesized for every single branch. To get an idea what
-data is available you can use the 'perf script' tool with all itrace sampling
-options, which will list all the samples.
-
- perf record -e intel_pt//u ls
- perf script --itrace=ibxwpe
-
-An interesting field that is not printed by default is 'flags' which can be
-displayed as follows:
-
- perf script --itrace=ibxwpe -F+flags
-
-The flags are "bcrosyiABEx" which stand for branch, call, return, conditional,
-system, asynchronous, interrupt, transaction abort, trace begin, trace end, and
-in transaction, respectively.
-
-Another interesting field that is not printed by default is 'ipc' which can be
-displayed as follows:
-
- perf script --itrace=be -F+ipc
-
-There are two ways that instructions-per-cycle (IPC) can be calculated depending
-on the recording.
-
-If the 'cyc' config term (see config terms section below) was used, then IPC is
-calculated using the cycle count from CYC packets, otherwise MTC packets are
-used - refer to the 'mtc' config term. When MTC is used, however, the values
-are less accurate because the timing is less accurate.
-
-Because Intel PT does not update the cycle count on every branch or instruction,
-the values will often be zero. When there are values, they will be the number
-of instructions and number of cycles since the last update, and thus represent
-the average IPC since the last IPC for that event type. Note IPC for "branches"
-events is calculated separately from IPC for "instructions" events.
-
-Also note that the IPC instruction count may or may not include the current
-instruction. If the cycle count is associated with an asynchronous branch
-(e.g. page fault or interrupt), then the instruction count does not include the
-current instruction, otherwise it does. That is consistent with whether or not
-that instruction has retired when the cycle count is updated.
-
-Another note, in the case of "branches" events, non-taken branches are not
-presently sampled, so IPC values for them do not appear e.g. a CYC packet with a
-TNT packet that starts with a non-taken branch. To see every possible IPC
-value, "instructions" events can be used e.g. --itrace=i0ns
-
-While it is possible to create scripts to analyze the data, an alternative
-approach is available to export the data to a sqlite or postgresql database.
-Refer to script export-to-sqlite.py or export-to-postgresql.py for more details,
-and to script exported-sql-viewer.py for an example of using the database.
-
-There is also script intel-pt-events.py which provides an example of how to
-unpack the raw data for power events and PTWRITE.
-
-As mentioned above, it is easy to capture too much data. One way to limit the
-data captured is to use 'snapshot' mode which is explained further below.
-Refer to 'new snapshot option' and 'Intel PT modes of operation' further below.
-
-Another problem that will be experienced is decoder errors. They can be caused
-by inability to access the executed image, self-modified or JIT-ed code, or the
-inability to match side-band information (such as context switches and mmaps)
-which results in the decoder not knowing what code was executed.
-
-There is also the problem of perf not being able to copy the data fast enough,
-resulting in data lost because the buffer was full. See 'Buffer handling' below
-for more details.
-
-
-perf record
-===========
-
-new event
----------
-
-The Intel PT kernel driver creates a new PMU for Intel PT. PMU events are
-selected by providing the PMU name followed by the "config" separated by slashes.
-An enhancement has been made to allow default "config" e.g. the option
-
- -e intel_pt//
-
-will use a default config value. Currently that is the same as
-
- -e intel_pt/tsc,noretcomp=0/
-
-which is the same as
-
- -e intel_pt/tsc=1,noretcomp=0/
-
-Note there are now new config terms - see section 'config terms' further below.
-
-The config terms are listed in /sys/devices/intel_pt/format. They are bit
-fields within the config member of the struct perf_event_attr which is
-passed to the kernel by the perf_event_open system call. They correspond to bit
-fields in the IA32_RTIT_CTL MSR. Here is a list of them and their definitions:
-
- $ grep -H . /sys/bus/event_source/devices/intel_pt/format/*
- /sys/bus/event_source/devices/intel_pt/format/cyc:config:1
- /sys/bus/event_source/devices/intel_pt/format/cyc_thresh:config:19-22
- /sys/bus/event_source/devices/intel_pt/format/mtc:config:9
- /sys/bus/event_source/devices/intel_pt/format/mtc_period:config:14-17
- /sys/bus/event_source/devices/intel_pt/format/noretcomp:config:11
- /sys/bus/event_source/devices/intel_pt/format/psb_period:config:24-27
- /sys/bus/event_source/devices/intel_pt/format/tsc:config:10
-
-Note that the default config must be overridden for each term i.e.
-
- -e intel_pt/noretcomp=0/
-
-is the same as:
-
- -e intel_pt/tsc=1,noretcomp=0/
-
-So, to disable TSC packets use:
-
- -e intel_pt/tsc=0/
-
-It is also possible to specify the config value explicitly:
-
- -e intel_pt/config=0x400/
-
-Note that, as with all events, the event is suffixed with event modifiers:
-
- u userspace
- k kernel
- h hypervisor
- G guest
- H host
- p precise ip
-
-'h', 'G' and 'H' are for virtualization which is not supported by Intel PT.
-'p' is also not relevant to Intel PT. So only options 'u' and 'k' are
-meaningful for Intel PT.
-
-perf_event_attr is displayed if the -vv option is used e.g.
-
- ------------------------------------------------------------
- perf_event_attr:
- type 6
- size 112
- config 0x400
- { sample_period, sample_freq } 1
- sample_type IP|TID|TIME|CPU|IDENTIFIER
- read_format ID
- disabled 1
- inherit 1
- exclude_kernel 1
- exclude_hv 1
- enable_on_exec 1
- sample_id_all 1
- ------------------------------------------------------------
- sys_perf_event_open: pid 31104 cpu 0 group_fd -1 flags 0x8
- sys_perf_event_open: pid 31104 cpu 1 group_fd -1 flags 0x8
- sys_perf_event_open: pid 31104 cpu 2 group_fd -1 flags 0x8
- sys_perf_event_open: pid 31104 cpu 3 group_fd -1 flags 0x8
- ------------------------------------------------------------
-
-
-config terms
-------------
-
-The June 2015 version of Intel 64 and IA-32 Architectures Software Developer
-Manuals, Chapter 36 Intel Processor Trace, defined new Intel PT features.
-Some of the features are reflect in new config terms. All the config terms are
-described below.
-
-tsc Always supported. Produces TSC timestamp packets to provide
- timing information. In some cases it is possible to decode
- without timing information, for example a per-thread context
- that does not overlap executable memory maps.
-
- The default config selects tsc (i.e. tsc=1).
-
-noretcomp Always supported. Disables "return compression" so a TIP packet
- is produced when a function returns. Causes more packets to be
- produced but might make decoding more reliable.
-
- The default config does not select noretcomp (i.e. noretcomp=0).
-
-psb_period Allows the frequency of PSB packets to be specified.
-
- The PSB packet is a synchronization packet that provides a
- starting point for decoding or recovery from errors.
-
- Support for psb_period is indicated by:
-
- /sys/bus/event_source/devices/intel_pt/caps/psb_cyc
-
- which contains "1" if the feature is supported and "0"
- otherwise.
-
- Valid values are given by:
-
- /sys/bus/event_source/devices/intel_pt/caps/psb_periods
-
- which contains a hexadecimal value, the bits of which represent
- valid values e.g. bit 2 set means value 2 is valid.
-
- The psb_period value is converted to the approximate number of
- trace bytes between PSB packets as:
-
- 2 ^ (value + 11)
-
- e.g. value 3 means 16KiB bytes between PSBs
-
- If an invalid value is entered, the error message
- will give a list of valid values e.g.
-
- $ perf record -e intel_pt/psb_period=15/u uname
- Invalid psb_period for intel_pt. Valid values are: 0-5
-
- If MTC packets are selected, the default config selects a value
- of 3 (i.e. psb_period=3) or the nearest lower value that is
- supported (0 is always supported). Otherwise the default is 0.
-
- If decoding is expected to be reliable and the buffer is large
- then a large PSB period can be used.
-
- Because a TSC packet is produced with PSB, the PSB period can
- also affect the granularity to timing information in the absence
- of MTC or CYC.
-
-mtc Produces MTC timing packets.
-
- MTC packets provide finer grain timestamp information than TSC
- packets. MTC packets record time using the hardware crystal
- clock (CTC) which is related to TSC packets using a TMA packet.
-
- Support for this feature is indicated by:
-
- /sys/bus/event_source/devices/intel_pt/caps/mtc
-
- which contains "1" if the feature is supported and
- "0" otherwise.
-
- The frequency of MTC packets can also be specified - see
- mtc_period below.
-
-mtc_period Specifies how frequently MTC packets are produced - see mtc
- above for how to determine if MTC packets are supported.
-
- Valid values are given by:
-
- /sys/bus/event_source/devices/intel_pt/caps/mtc_periods
-
- which contains a hexadecimal value, the bits of which represent
- valid values e.g. bit 2 set means value 2 is valid.
-
- The mtc_period value is converted to the MTC frequency as:
-
- CTC-frequency / (2 ^ value)
-
- e.g. value 3 means one eighth of CTC-frequency
-
- Where CTC is the hardware crystal clock, the frequency of which
- can be related to TSC via values provided in cpuid leaf 0x15.
-
- If an invalid value is entered, the error message
- will give a list of valid values e.g.
-
- $ perf record -e intel_pt/mtc_period=15/u uname
- Invalid mtc_period for intel_pt. Valid values are: 0,3,6,9
-
- The default value is 3 or the nearest lower value
- that is supported (0 is always supported).
-
-cyc Produces CYC timing packets.
-
- CYC packets provide even finer grain timestamp information than
- MTC and TSC packets. A CYC packet contains the number of CPU
- cycles since the last CYC packet. Unlike MTC and TSC packets,
- CYC packets are only sent when another packet is also sent.
-
- Support for this feature is indicated by:
-
- /sys/bus/event_source/devices/intel_pt/caps/psb_cyc
-
- which contains "1" if the feature is supported and
- "0" otherwise.
-
- The number of CYC packets produced can be reduced by specifying
- a threshold - see cyc_thresh below.
-
-cyc_thresh Specifies how frequently CYC packets are produced - see cyc
- above for how to determine if CYC packets are supported.
-
- Valid cyc_thresh values are given by:
-
- /sys/bus/event_source/devices/intel_pt/caps/cycle_thresholds
-
- which contains a hexadecimal value, the bits of which represent
- valid values e.g. bit 2 set means value 2 is valid.
-
- The cyc_thresh value represents the minimum number of CPU cycles
- that must have passed before a CYC packet can be sent. The
- number of CPU cycles is:
-
- 2 ^ (value - 1)
-
- e.g. value 4 means 8 CPU cycles must pass before a CYC packet
- can be sent. Note a CYC packet is still only sent when another
- packet is sent, not at, e.g. every 8 CPU cycles.
-
- If an invalid value is entered, the error message
- will give a list of valid values e.g.
-
- $ perf record -e intel_pt/cyc,cyc_thresh=15/u uname
- Invalid cyc_thresh for intel_pt. Valid values are: 0-12
-
- CYC packets are not requested by default.
-
-pt Specifies pass-through which enables the 'branch' config term.
-
- The default config selects 'pt' if it is available, so a user will
- never need to specify this term.
-
-branch Enable branch tracing. Branch tracing is enabled by default so to
- disable branch tracing use 'branch=0'.
-
- The default config selects 'branch' if it is available.
-
-ptw Enable PTWRITE packets which are produced when a ptwrite instruction
- is executed.
-
- Support for this feature is indicated by:
-
- /sys/bus/event_source/devices/intel_pt/caps/ptwrite
-
- which contains "1" if the feature is supported and
- "0" otherwise.
-
-fup_on_ptw Enable a FUP packet to follow the PTWRITE packet. The FUP packet
- provides the address of the ptwrite instruction. In the absence of
- fup_on_ptw, the decoder will use the address of the previous branch
- if branch tracing is enabled, otherwise the address will be zero.
- Note that fup_on_ptw will work even when branch tracing is disabled.
-
-pwr_evt Enable power events. The power events provide information about
- changes to the CPU C-state.
-
- Support for this feature is indicated by:
-
- /sys/bus/event_source/devices/intel_pt/caps/power_event_trace
-
- which contains "1" if the feature is supported and
- "0" otherwise.
-
-
-AUX area sampling option
-------------------------
-
-To select Intel PT "sampling" the AUX area sampling option can be used:
-
- --aux-sample
-
-Optionally it can be followed by the sample size in bytes e.g.
-
- --aux-sample=8192
-
-In addition, the Intel PT event to sample must be defined e.g.
-
- -e intel_pt//u
-
-Samples on other events will be created containing Intel PT data e.g. the
-following will create Intel PT samples on the branch-misses event, note the
-events must be grouped using {}:
-
- perf record --aux-sample -e '{intel_pt//u,branch-misses:u}'
-
-An alternative to '--aux-sample' is to add the config term 'aux-sample-size' to
-events. In this case, the grouping is implied e.g.
-
- perf record -e intel_pt//u -e branch-misses/aux-sample-size=8192/u
-
-is the same as:
-
- perf record -e '{intel_pt//u,branch-misses/aux-sample-size=8192/u}'
-
-but allows for also using an address filter e.g.:
-
- perf record -e intel_pt//u --filter 'filter * @/bin/ls' -e branch-misses/aux-sample-size=8192/u -- ls
-
-It is important to select a sample size that is big enough to contain at least
-one PSB packet. If not a warning will be displayed:
-
- Intel PT sample size (%zu) may be too small for PSB period (%zu)
-
-The calculation used for that is: if sample_size <= psb_period + 256 display the
-warning. When sampling is used, psb_period defaults to 0 (2KiB).
-
-The default sample size is 4KiB.
-
-The sample size is passed in aux_sample_size in struct perf_event_attr. The
-sample size is limited by the maximum event size which is 64KiB. It is
-difficult to know how big the event might be without the trace sample attached,
-but the tool validates that the sample size is not greater than 60KiB.
-
-
-new snapshot option
--------------------
-
-The difference between full trace and snapshot from the kernel's perspective is
-that in full trace we don't overwrite trace data that the user hasn't collected
-yet (and indicated that by advancing aux_tail), whereas in snapshot mode we let
-the trace run and overwrite older data in the buffer so that whenever something
-interesting happens, we can stop it and grab a snapshot of what was going on
-around that interesting moment.
-
-To select snapshot mode a new option has been added:
-
- -S
-
-Optionally it can be followed by the snapshot size e.g.
-
- -S0x100000
-
-The default snapshot size is the auxtrace mmap size. If neither auxtrace mmap size
-nor snapshot size is specified, then the default is 4MiB for privileged users
-(or if /proc/sys/kernel/perf_event_paranoid < 0), 128KiB for unprivileged users.
-If an unprivileged user does not specify mmap pages, the mmap pages will be
-reduced as described in the 'new auxtrace mmap size option' section below.
-
-The snapshot size is displayed if the option -vv is used e.g.
-
- Intel PT snapshot size: %zu
-
-
-new auxtrace mmap size option
----------------------------
-
-Intel PT buffer size is specified by an addition to the -m option e.g.
-
- -m,16
-
-selects a buffer size of 16 pages i.e. 64KiB.
-
-Note that the existing functionality of -m is unchanged. The auxtrace mmap size
-is specified by the optional addition of a comma and the value.
-
-The default auxtrace mmap size for Intel PT is 4MiB/page_size for privileged users
-(or if /proc/sys/kernel/perf_event_paranoid < 0), 128KiB for unprivileged users.
-If an unprivileged user does not specify mmap pages, the mmap pages will be
-reduced from the default 512KiB/page_size to 256KiB/page_size, otherwise the
-user is likely to get an error as they exceed their mlock limit (Max locked
-memory as shown in /proc/self/limits). Note that perf does not count the first
-512KiB (actually /proc/sys/kernel/perf_event_mlock_kb minus 1 page) per cpu
-against the mlock limit so an unprivileged user is allowed 512KiB per cpu plus
-their mlock limit (which defaults to 64KiB but is not multiplied by the number
-of cpus).
-
-In full-trace mode, powers of two are allowed for buffer size, with a minimum
-size of 2 pages. In snapshot mode or sampling mode, it is the same but the
-minimum size is 1 page.
-
-The mmap size and auxtrace mmap size are displayed if the -vv option is used e.g.
-
- mmap length 528384
- auxtrace mmap length 4198400
-
-
-Intel PT modes of operation
----------------------------
-
-Intel PT can be used in 2 modes:
- full-trace mode
- sample mode
- snapshot mode
-
-Full-trace mode traces continuously e.g.
-
- perf record -e intel_pt//u uname
-
-Sample mode attaches a Intel PT sample to other events e.g.
-
- perf record --aux-sample -e intel_pt//u -e branch-misses:u
-
-Snapshot mode captures the available data when a signal is sent e.g.
-
- perf record -v -e intel_pt//u -S ./loopy 1000000000 &
- [1] 11435
- kill -USR2 11435
- Recording AUX area tracing snapshot
-
-Note that the signal sent is SIGUSR2.
-Note that "Recording AUX area tracing snapshot" is displayed because the -v
-option is used.
-
-The 2 modes cannot be used together.
-
-
-Buffer handling
----------------
-
-There may be buffer limitations (i.e. single ToPa entry) which means that actual
-buffer sizes are limited to powers of 2 up to 4MiB (MAX_ORDER). In order to
-provide other sizes, and in particular an arbitrarily large size, multiple
-buffers are logically concatenated. However an interrupt must be used to switch
-between buffers. That has two potential problems:
- a) the interrupt may not be handled in time so that the current buffer
- becomes full and some trace data is lost.
- b) the interrupts may slow the system and affect the performance
- results.
-
-If trace data is lost, the driver sets 'truncated' in the PERF_RECORD_AUX event
-which the tools report as an error.
-
-In full-trace mode, the driver waits for data to be copied out before allowing
-the (logical) buffer to wrap-around. If data is not copied out quickly enough,
-again 'truncated' is set in the PERF_RECORD_AUX event. If the driver has to
-wait, the intel_pt event gets disabled. Because it is difficult to know when
-that happens, perf tools always re-enable the intel_pt event after copying out
-data.
-
-
-Intel PT and build ids
-----------------------
-
-By default "perf record" post-processes the event stream to find all build ids
-for executables for all addresses sampled. Deliberately, Intel PT is not
-decoded for that purpose (it would take too long). Instead the build ids for
-all executables encountered (due to mmap, comm or task events) are included
-in the perf.data file.
-
-To see buildids included in the perf.data file use the command:
-
- perf buildid-list
-
-If the perf.data file contains Intel PT data, that is the same as:
-
- perf buildid-list --with-hits
-
-
-Snapshot mode and event disabling
----------------------------------
-
-In order to make a snapshot, the intel_pt event is disabled using an IOCTL,
-namely PERF_EVENT_IOC_DISABLE. However doing that can also disable the
-collection of side-band information. In order to prevent that, a dummy
-software event has been introduced that permits tracking events (like mmaps) to
-continue to be recorded while intel_pt is disabled. That is important to ensure
-there is complete side-band information to allow the decoding of subsequent
-snapshots.
-
-A test has been created for that. To find the test:
-
- perf test list
- ...
- 23: Test using a dummy software event to keep tracking
-
-To run the test:
-
- perf test 23
- 23: Test using a dummy software event to keep tracking : Ok
-
-
-perf record modes (nothing new here)
-------------------------------------
-
-perf record essentially operates in one of three modes:
- per thread
- per cpu
- workload only
-
-"per thread" mode is selected by -t or by --per-thread (with -p or -u or just a
-workload).
-"per cpu" is selected by -C or -a.
-"workload only" mode is selected by not using the other options but providing a
-command to run (i.e. the workload).
-
-In per-thread mode an exact list of threads is traced. There is no inheritance.
-Each thread has its own event buffer.
-
-In per-cpu mode all processes (or processes from the selected cgroup i.e. -G
-option, or processes selected with -p or -u) are traced. Each cpu has its own
-buffer. Inheritance is allowed.
-
-In workload-only mode, the workload is traced but with per-cpu buffers.
-Inheritance is allowed. Note that you can now trace a workload in per-thread
-mode by using the --per-thread option.
-
-
-Privileged vs non-privileged users
-----------------------------------
-
-Unless /proc/sys/kernel/perf_event_paranoid is set to -1, unprivileged users
-have memory limits imposed upon them. That affects what buffer sizes they can
-have as outlined above.
-
-The v4.2 kernel introduced support for a context switch metadata event,
-PERF_RECORD_SWITCH, which allows unprivileged users to see when their processes
-are scheduled out and in, just not by whom, which is left for the
-PERF_RECORD_SWITCH_CPU_WIDE, that is only accessible in system wide context,
-which in turn requires CAP_SYS_ADMIN.
-
-Please see the 45ac1403f564 ("perf: Add PERF_RECORD_SWITCH to indicate context
-switches") commit, that introduces these metadata events for further info.
-
-When working with kernels < v4.2, the following considerations must be taken,
-as the sched:sched_switch tracepoints will be used to receive such information:
-
-Unless /proc/sys/kernel/perf_event_paranoid is set to -1, unprivileged users are
-not permitted to use tracepoints which means there is insufficient side-band
-information to decode Intel PT in per-cpu mode, and potentially workload-only
-mode too if the workload creates new processes.
-
-Note also, that to use tracepoints, read-access to debugfs is required. So if
-debugfs is not mounted or the user does not have read-access, it will again not
-be possible to decode Intel PT in per-cpu mode.
-
-
-sched_switch tracepoint
------------------------
-
-The sched_switch tracepoint is used to provide side-band data for Intel PT
-decoding in kernels where the PERF_RECORD_SWITCH metadata event isn't
-available.
-
-The sched_switch events are automatically added. e.g. the second event shown
-below:
-
- $ perf record -vv -e intel_pt//u uname
- ------------------------------------------------------------
- perf_event_attr:
- type 6
- size 112
- config 0x400
- { sample_period, sample_freq } 1
- sample_type IP|TID|TIME|CPU|IDENTIFIER
- read_format ID
- disabled 1
- inherit 1
- exclude_kernel 1
- exclude_hv 1
- enable_on_exec 1
- sample_id_all 1
- ------------------------------------------------------------
- sys_perf_event_open: pid 31104 cpu 0 group_fd -1 flags 0x8
- sys_perf_event_open: pid 31104 cpu 1 group_fd -1 flags 0x8
- sys_perf_event_open: pid 31104 cpu 2 group_fd -1 flags 0x8
- sys_perf_event_open: pid 31104 cpu 3 group_fd -1 flags 0x8
- ------------------------------------------------------------
- perf_event_attr:
- type 2
- size 112
- config 0x108
- { sample_period, sample_freq } 1
- sample_type IP|TID|TIME|CPU|PERIOD|RAW|IDENTIFIER
- read_format ID
- inherit 1
- sample_id_all 1
- exclude_guest 1
- ------------------------------------------------------------
- sys_perf_event_open: pid -1 cpu 0 group_fd -1 flags 0x8
- sys_perf_event_open: pid -1 cpu 1 group_fd -1 flags 0x8
- sys_perf_event_open: pid -1 cpu 2 group_fd -1 flags 0x8
- sys_perf_event_open: pid -1 cpu 3 group_fd -1 flags 0x8
- ------------------------------------------------------------
- perf_event_attr:
- type 1
- size 112
- config 0x9
- { sample_period, sample_freq } 1
- sample_type IP|TID|TIME|IDENTIFIER
- read_format ID
- disabled 1
- inherit 1
- exclude_kernel 1
- exclude_hv 1
- mmap 1
- comm 1
- enable_on_exec 1
- task 1
- sample_id_all 1
- mmap2 1
- comm_exec 1
- ------------------------------------------------------------
- sys_perf_event_open: pid 31104 cpu 0 group_fd -1 flags 0x8
- sys_perf_event_open: pid 31104 cpu 1 group_fd -1 flags 0x8
- sys_perf_event_open: pid 31104 cpu 2 group_fd -1 flags 0x8
- sys_perf_event_open: pid 31104 cpu 3 group_fd -1 flags 0x8
- mmap size 528384B
- AUX area mmap length 4194304
- perf event ring buffer mmapped per cpu
- Synthesizing auxtrace information
- Linux
- [ perf record: Woken up 1 times to write data ]
- [ perf record: Captured and wrote 0.042 MB perf.data ]
-
-Note, the sched_switch event is only added if the user is permitted to use it
-and only in per-cpu mode.
-
-Note also, the sched_switch event is only added if TSC packets are requested.
-That is because, in the absence of timing information, the sched_switch events
-cannot be matched against the Intel PT trace.
-
-
-perf script
-===========
-
-By default, perf script will decode trace data found in the perf.data file.
-This can be further controlled by new option --itrace.
-
-
-New --itrace option
--------------------
-
-Having no option is the same as
-
- --itrace
-
-which, in turn, is the same as
-
- --itrace=cepwx
-
-The letters are:
-
- i synthesize "instructions" events
- b synthesize "branches" events
- x synthesize "transactions" events
- w synthesize "ptwrite" events
- p synthesize "power" events
- c synthesize branches events (calls only)
- r synthesize branches events (returns only)
- e synthesize tracing error events
- d create a debug log
- g synthesize a call chain (use with i or x)
- l synthesize last branch entries (use with i or x)
- s skip initial number of events
-
-"Instructions" events look like they were recorded by "perf record -e
-instructions".
-
-"Branches" events look like they were recorded by "perf record -e branches". "c"
-and "r" can be combined to get calls and returns.
-
-"Transactions" events correspond to the start or end of transactions. The
-'flags' field can be used in perf script to determine whether the event is a
-tranasaction start, commit or abort.
-
-Note that "instructions", "branches" and "transactions" events depend on code
-flow packets which can be disabled by using the config term "branch=0". Refer
-to the config terms section above.
-
-"ptwrite" events record the payload of the ptwrite instruction and whether
-"fup_on_ptw" was used. "ptwrite" events depend on PTWRITE packets which are
-recorded only if the "ptw" config term was used. Refer to the config terms
-section above. perf script "synth" field displays "ptwrite" information like
-this: "ip: 0 payload: 0x123456789abcdef0" where "ip" is 1 if "fup_on_ptw" was
-used.
-
-"Power" events correspond to power event packets and CBR (core-to-bus ratio)
-packets. While CBR packets are always recorded when tracing is enabled, power
-event packets are recorded only if the "pwr_evt" config term was used. Refer to
-the config terms section above. The power events record information about
-C-state changes, whereas CBR is indicative of CPU frequency. perf script
-"event,synth" fields display information like this:
- cbr: cbr: 22 freq: 2189 MHz (200%)
- mwait: hints: 0x60 extensions: 0x1
- pwre: hw: 0 cstate: 2 sub-cstate: 0
- exstop: ip: 1
- pwrx: deepest cstate: 2 last cstate: 2 wake reason: 0x4
-Where:
- "cbr" includes the frequency and the percentage of maximum non-turbo
- "mwait" shows mwait hints and extensions
- "pwre" shows C-state transitions (to a C-state deeper than C0) and
- whether initiated by hardware
- "exstop" indicates execution stopped and whether the IP was recorded
- exactly,
- "pwrx" indicates return to C0
-For more details refer to the Intel 64 and IA-32 Architectures Software
-Developer Manuals.
-
-Error events show where the decoder lost the trace. Error events
-are quite important. Users must know if what they are seeing is a complete
-picture or not.
-
-The "d" option will cause the creation of a file "intel_pt.log" containing all
-decoded packets and instructions. Note that this option slows down the decoder
-and that the resulting file may be very large.
-
-In addition, the period of the "instructions" event can be specified. e.g.
-
- --itrace=i10us
-
-sets the period to 10us i.e. one instruction sample is synthesized for each 10
-microseconds of trace. Alternatives to "us" are "ms" (milliseconds),
-"ns" (nanoseconds), "t" (TSC ticks) or "i" (instructions).
-
-"ms", "us" and "ns" are converted to TSC ticks.
-
-The timing information included with Intel PT does not give the time of every
-instruction. Consequently, for the purpose of sampling, the decoder estimates
-the time since the last timing packet based on 1 tick per instruction. The time
-on the sample is *not* adjusted and reflects the last known value of TSC.
-
-For Intel PT, the default period is 100us.
-
-Setting it to a zero period means "as often as possible".
-
-In the case of Intel PT that is the same as a period of 1 and a unit of
-'instructions' (i.e. --itrace=i1i).
-
-Also the call chain size (default 16, max. 1024) for instructions or
-transactions events can be specified. e.g.
-
- --itrace=ig32
- --itrace=xg32
-
-Also the number of last branch entries (default 64, max. 1024) for instructions or
-transactions events can be specified. e.g.
-
- --itrace=il10
- --itrace=xl10
-
-Note that last branch entries are cleared for each sample, so there is no overlap
-from one sample to the next.
-
-To disable trace decoding entirely, use the option --no-itrace.
-
-It is also possible to skip events generated (instructions, branches, transactions)
-at the beginning. This is useful to ignore initialization code.
-
- --itrace=i0nss1000000
-
-skips the first million instructions.
-
-dump option
------------
-
-perf script has an option (-D) to "dump" the events i.e. display the binary
-data.
-
-When -D is used, Intel PT packets are displayed. The packet decoder does not
-pay attention to PSB packets, but just decodes the bytes - so the packets seen
-by the actual decoder may not be identical in places where the data is corrupt.
-One example of that would be when the buffer-switching interrupt has been too
-slow, and the buffer has been filled completely. In that case, the last packet
-in the buffer might be truncated and immediately followed by a PSB as the trace
-continues in the next buffer.
-
-To disable the display of Intel PT packets, combine the -D option with
---no-itrace.
-
-
-perf report
-===========
-
-By default, perf report will decode trace data found in the perf.data file.
-This can be further controlled by new option --itrace exactly the same as
-perf script, with the exception that the default is --itrace=igxe.
-
-
-perf inject
-===========
-
-perf inject also accepts the --itrace option in which case tracing data is
-removed and replaced with the synthesized events. e.g.
-
- perf inject --itrace -i perf.data -o perf.data.new
-
-Below is an example of using Intel PT with autofdo. It requires autofdo
-(https://github.com/google/autofdo) and gcc version 5. The bubble
-sort example is from the AutoFDO tutorial (https://gcc.gnu.org/wiki/AutoFDO/Tutorial)
-amended to take the number of elements as a parameter.
-
- $ gcc-5 -O3 sort.c -o sort_optimized
- $ ./sort_optimized 30000
- Bubble sorting array of 30000 elements
- 2254 ms
-
- $ cat ~/.perfconfig
- [intel-pt]
- mispred-all = on
-
- $ perf record -e intel_pt//u ./sort 3000
- Bubble sorting array of 3000 elements
- 58 ms
- [ perf record: Woken up 2 times to write data ]
- [ perf record: Captured and wrote 3.939 MB perf.data ]
- $ perf inject -i perf.data -o inj --itrace=i100usle --strip
- $ ./create_gcov --binary=./sort --profile=inj --gcov=sort.gcov -gcov_version=1
- $ gcc-5 -O3 -fauto-profile=sort.gcov sort.c -o sort_autofdo
- $ ./sort_autofdo 30000
- Bubble sorting array of 30000 elements
- 2155 ms
-
-Note there is currently no advantage to using Intel PT instead of LBR, but
-that may change in the future if greater use is made of the data.
-
-
-PEBS via Intel PT
-=================
-
-Some hardware has the feature to redirect PEBS records to the Intel PT trace.
-Recording is selected by using the aux-output config term e.g.
-
- perf record -c 10000 -e '{intel_pt/branch=0/,cycles/aux-output/ppp}' uname
-
-Note that currently, software only supports redirecting at most one PEBS event.
-
-To display PEBS events from the Intel PT trace, use the itrace 'o' option e.g.
-
- perf script --itrace=oe
+Documentation for support for Intel Processor Trace within perf tools' has moved to file perf-intel-pt.txt
diff --git a/tools/perf/Documentation/perf-annotate.txt b/tools/perf/Documentation/perf-annotate.txt
index e8c972f89357..1b5042f134a8 100644
--- a/tools/perf/Documentation/perf-annotate.txt
+++ b/tools/perf/Documentation/perf-annotate.txt
@@ -112,6 +112,12 @@ OPTIONS
--objdump=<path>::
Path to objdump binary.
+--prefix=PREFIX::
+--prefix-strip=N::
+ Remove first N entries from source file path names in executables
+ and add PREFIX. This allows to display source code compiled on systems
+ with different file system layout.
+
--skip-missing::
Skip symbols that cannot be annotated.
diff --git a/tools/perf/Documentation/perf-config.txt b/tools/perf/Documentation/perf-config.txt
index c4dd23c4b478..f16d8a71d3f5 100644
--- a/tools/perf/Documentation/perf-config.txt
+++ b/tools/perf/Documentation/perf-config.txt
@@ -239,7 +239,6 @@ buildid.*::
set buildid.dir to /dev/null. The default is $HOME/.debug
annotate.*::
- These options work only for TUI.
These are in control of addresses, jump function, source code
in lines of assembly code from a specific program.
@@ -269,6 +268,8 @@ annotate.*::
│ mov (%rdi),%rdx
│ return n;
+ This option works with tui, stdio2 browsers.
+
annotate.use_offset::
Basing on a first address of a loaded function, offset can be used.
Instead of using original addresses of assembly code,
@@ -287,6 +288,8 @@ annotate.*::
368:│ mov 0x8(%r14),%rdi
+ This option works with tui, stdio2 browsers.
+
annotate.jump_arrows::
There can be jump instruction among assembly code.
Depending on a boolean value of jump_arrows,
@@ -306,6 +309,8 @@ annotate.*::
│1330: mov %r15,%r10
│1333: cmp %r15,%r14
+ This option works with tui browser.
+
annotate.show_linenr::
When showing source code if this option is 'true',
line numbers are printed as below.
@@ -325,6 +330,8 @@ annotate.*::
│ array++;
│ }
+ This option works with tui, stdio2 browsers.
+
annotate.show_nr_jumps::
Let's see a part of assembly code.
@@ -335,6 +342,8 @@ annotate.*::
│1 1382: movb $0x1,-0x270(%rbp)
+ This option works with tui, stdio2 browsers.
+
annotate.show_total_period::
To compare two records on an instruction base, with this option
provided, display total number of samples that belong to a line
@@ -348,11 +357,30 @@ annotate.*::
99.93 │ mov %eax,%eax
+ This option works with tui, stdio2, stdio browsers.
+
+ annotate.show_nr_samples::
+ By default perf annotate shows percentage of samples. This option
+ can be used to print absolute number of samples. Ex, when set as
+ false:
+
+ Percent│
+ 74.03 │ mov %fs:0x28,%rax
+
+ When set as true:
+
+ Samples│
+ 6 │ mov %fs:0x28,%rax
+
+ This option works with tui, stdio2, stdio browsers.
+
annotate.offset_level::
Default is '1', meaning just jump targets will have offsets show right beside
the instruction. When set to '2' 'call' instructions will also have its offsets
shown, 3 or higher will show offsets for all instructions.
+ This option works with tui, stdio2 browsers.
+
hist.*::
hist.percentage::
This option control the way to calculate overhead of filtered entries -
@@ -377,14 +405,16 @@ ui.*::
This option is only applied to TUI.
call-graph.*::
- When sub-commands 'top' and 'report' work with -g/—-children
- there're options in control of call-graph.
+ The following controls the handling of call-graphs (obtained via the
+ -g/--call-graph options).
call-graph.record-mode::
- The record-mode can be 'fp' (frame pointer), 'dwarf' and 'lbr'.
- The value of 'dwarf' is effective only if perf detect needed library
- (libunwind or a recent version of libdw).
- 'lbr' only work for cpus that support it.
+ The mode for user space can be 'fp' (frame pointer), 'dwarf'
+ and 'lbr'. The value 'dwarf' is effective only if libunwind
+ (or a recent version of libdw) is present on the system;
+ the value 'lbr' only works for certain cpus. The method for
+ kernel space is controlled not by this option but by the
+ kernel config (CONFIG_UNWINDER_*).
call-graph.dump-size::
The size of stack to dump in order to do post-unwinding. Default is 8192 (byte).
@@ -490,6 +520,12 @@ top.*::
column by default.
The default is 'true'.
+ top.call-graph::
+ This is identical to 'call-graph.record-mode', except it is
+ applicable only for 'top' subcommand. This option ONLY setup
+ the unwind method. To enable 'perf top' to actually use it,
+ the command line option -g must be specified.
+
man.*::
man.viewer::
This option can assign a tool to view manual pages when 'help'
@@ -517,6 +553,16 @@ record.*::
But if this option is 'no-cache', it will not update the build-id cache.
'skip' skips post-processing and does not update the cache.
+ record.call-graph::
+ This is identical to 'call-graph.record-mode', except it is
+ applicable only for 'record' subcommand. This option ONLY setup
+ the unwind method. To enable 'perf record' to actually use it,
+ the command line option -g must be specified.
+
+ record.aio::
+ Use 'n' control blocks in asynchronous (Posix AIO) trace writing
+ mode ('n' default: 1, max: 4).
+
diff.*::
diff.order::
This option sets the number of columns to sort the result.
@@ -566,6 +612,11 @@ trace.*::
"libbeauty", the default, to use the same argument beautifiers used in the
strace-like sys_enter+sys_exit lines.
+ftrace.*::
+ ftrace.tracer::
+ Can be used to select the default tracer. Possible values are
+ 'function' and 'function_graph'.
+
llvm.*::
llvm.clang-path::
Path to clang. If omit, search it from $PATH.
@@ -610,6 +661,29 @@ scripts.*::
The script gets the same options passed as a full perf script,
in particular -i perfdata file, --cpu, --tid
+convert.*::
+
+ convert.queue-size::
+ Limit the size of ordered_events queue, so we could control
+ allocation size of perf data files without proper finished
+ round events.
+
+intel-pt.*::
+
+ intel-pt.cache-divisor::
+
+ intel-pt.mispred-all::
+ If set, Intel PT decoder will set the mispred flag on all
+ branches.
+
+auxtrace.*::
+
+ auxtrace.dumpdir::
+ s390 only. The directory to save the auxiliary trace buffer
+ can be changed using this option. Ex, auxtrace.dumpdir=/tmp.
+ If the directory does not exist or has the wrong file type,
+ the current directory is used.
+
SEE ALSO
--------
linkperf:perf[1]
diff --git a/tools/perf/Documentation/perf-inject.txt b/tools/perf/Documentation/perf-inject.txt
index a64d6588470e..70969ea73e01 100644
--- a/tools/perf/Documentation/perf-inject.txt
+++ b/tools/perf/Documentation/perf-inject.txt
@@ -66,4 +66,5 @@ include::itrace.txt[]
SEE ALSO
--------
-linkperf:perf-record[1], linkperf:perf-report[1], linkperf:perf-archive[1]
+linkperf:perf-record[1], linkperf:perf-report[1], linkperf:perf-archive[1],
+linkperf:perf-intel-pt[1]
diff --git a/tools/perf/Documentation/perf-intel-pt.txt b/tools/perf/Documentation/perf-intel-pt.txt
new file mode 100644
index 000000000000..456fdcbf26ac
--- /dev/null
+++ b/tools/perf/Documentation/perf-intel-pt.txt
@@ -0,0 +1,1007 @@
+perf-intel-pt(1)
+================
+
+NAME
+----
+perf-intel-pt - Support for Intel Processor Trace within perf tools
+
+SYNOPSIS
+--------
+[verse]
+'perf record' -e intel_pt//
+
+DESCRIPTION
+-----------
+
+Intel Processor Trace (Intel PT) is an extension of Intel Architecture that
+collects information about software execution such as control flow, execution
+modes and timings and formats it into highly compressed binary packets.
+Technical details are documented in the Intel 64 and IA-32 Architectures
+Software Developer Manuals, Chapter 36 Intel Processor Trace.
+
+Intel PT is first supported in Intel Core M and 5th generation Intel Core
+processors that are based on the Intel micro-architecture code name Broadwell.
+
+Trace data is collected by 'perf record' and stored within the perf.data file.
+See below for options to 'perf record'.
+
+Trace data must be 'decoded' which involves walking the object code and matching
+the trace data packets. For example a TNT packet only tells whether a
+conditional branch was taken or not taken, so to make use of that packet the
+decoder must know precisely which instruction was being executed.
+
+Decoding is done on-the-fly. The decoder outputs samples in the same format as
+samples output by perf hardware events, for example as though the "instructions"
+or "branches" events had been recorded. Presently 3 tools support this:
+'perf script', 'perf report' and 'perf inject'. See below for more information
+on using those tools.
+
+The main distinguishing feature of Intel PT is that the decoder can determine
+the exact flow of software execution. Intel PT can be used to understand why
+and how did software get to a certain point, or behave a certain way. The
+software does not have to be recompiled, so Intel PT works with debug or release
+builds, however the executed images are needed - which makes use in JIT-compiled
+environments, or with self-modified code, a challenge. Also symbols need to be
+provided to make sense of addresses.
+
+A limitation of Intel PT is that it produces huge amounts of trace data
+(hundreds of megabytes per second per core) which takes a long time to decode,
+for example two or three orders of magnitude longer than it took to collect.
+Another limitation is the performance impact of tracing, something that will
+vary depending on the use-case and architecture.
+
+
+Quickstart
+----------
+
+It is important to start small. That is because it is easy to capture vastly
+more data than can possibly be processed.
+
+The simplest thing to do with Intel PT is userspace profiling of small programs.
+Data is captured with 'perf record' e.g. to trace 'ls' userspace-only:
+
+ perf record -e intel_pt//u ls
+
+And profiled with 'perf report' e.g.
+
+ perf report
+
+To also trace kernel space presents a problem, namely kernel self-modifying
+code. A fairly good kernel image is available in /proc/kcore but to get an
+accurate image a copy of /proc/kcore needs to be made under the same conditions
+as the data capture. A script perf-with-kcore can do that, but beware that the
+script makes use of 'sudo' to copy /proc/kcore. If you have perf installed
+locally from the source tree you can do:
+
+ ~/libexec/perf-core/perf-with-kcore record pt_ls -e intel_pt// -- ls
+
+which will create a directory named 'pt_ls' and put the perf.data file and
+copies of /proc/kcore, /proc/kallsyms and /proc/modules into it. Then to use
+'perf report' becomes:
+
+ ~/libexec/perf-core/perf-with-kcore report pt_ls
+
+Because samples are synthesized after-the-fact, the sampling period can be
+selected for reporting. e.g. sample every microsecond
+
+ ~/libexec/perf-core/perf-with-kcore report pt_ls --itrace=i1usge
+
+See the sections below for more information about the --itrace option.
+
+Beware the smaller the period, the more samples that are produced, and the
+longer it takes to process them.
+
+Also note that the coarseness of Intel PT timing information will start to
+distort the statistical value of the sampling as the sampling period becomes
+smaller.
+
+To represent software control flow, "branches" samples are produced. By default
+a branch sample is synthesized for every single branch. To get an idea what
+data is available you can use the 'perf script' tool with all itrace sampling
+options, which will list all the samples.
+
+ perf record -e intel_pt//u ls
+ perf script --itrace=ibxwpe
+
+An interesting field that is not printed by default is 'flags' which can be
+displayed as follows:
+
+ perf script --itrace=ibxwpe -F+flags
+
+The flags are "bcrosyiABEx" which stand for branch, call, return, conditional,
+system, asynchronous, interrupt, transaction abort, trace begin, trace end, and
+in transaction, respectively.
+
+Another interesting field that is not printed by default is 'ipc' which can be
+displayed as follows:
+
+ perf script --itrace=be -F+ipc
+
+There are two ways that instructions-per-cycle (IPC) can be calculated depending
+on the recording.
+
+If the 'cyc' config term (see config terms section below) was used, then IPC is
+calculated using the cycle count from CYC packets, otherwise MTC packets are
+used - refer to the 'mtc' config term. When MTC is used, however, the values
+are less accurate because the timing is less accurate.
+
+Because Intel PT does not update the cycle count on every branch or instruction,
+the values will often be zero. When there are values, they will be the number
+of instructions and number of cycles since the last update, and thus represent
+the average IPC since the last IPC for that event type. Note IPC for "branches"
+events is calculated separately from IPC for "instructions" events.
+
+Also note that the IPC instruction count may or may not include the current
+instruction. If the cycle count is associated with an asynchronous branch
+(e.g. page fault or interrupt), then the instruction count does not include the
+current instruction, otherwise it does. That is consistent with whether or not
+that instruction has retired when the cycle count is updated.
+
+Another note, in the case of "branches" events, non-taken branches are not
+presently sampled, so IPC values for them do not appear e.g. a CYC packet with a
+TNT packet that starts with a non-taken branch. To see every possible IPC
+value, "instructions" events can be used e.g. --itrace=i0ns
+
+While it is possible to create scripts to analyze the data, an alternative
+approach is available to export the data to a sqlite or postgresql database.
+Refer to script export-to-sqlite.py or export-to-postgresql.py for more details,
+and to script exported-sql-viewer.py for an example of using the database.
+
+There is also script intel-pt-events.py which provides an example of how to
+unpack the raw data for power events and PTWRITE.
+
+As mentioned above, it is easy to capture too much data. One way to limit the
+data captured is to use 'snapshot' mode which is explained further below.
+Refer to 'new snapshot option' and 'Intel PT modes of operation' further below.
+
+Another problem that will be experienced is decoder errors. They can be caused
+by inability to access the executed image, self-modified or JIT-ed code, or the
+inability to match side-band information (such as context switches and mmaps)
+which results in the decoder not knowing what code was executed.
+
+There is also the problem of perf not being able to copy the data fast enough,
+resulting in data lost because the buffer was full. See 'Buffer handling' below
+for more details.
+
+
+perf record
+-----------
+
+new event
+~~~~~~~~~
+
+The Intel PT kernel driver creates a new PMU for Intel PT. PMU events are
+selected by providing the PMU name followed by the "config" separated by slashes.
+An enhancement has been made to allow default "config" e.g. the option
+
+ -e intel_pt//
+
+will use a default config value. Currently that is the same as
+
+ -e intel_pt/tsc,noretcomp=0/
+
+which is the same as
+
+ -e intel_pt/tsc=1,noretcomp=0/
+
+Note there are now new config terms - see section 'config terms' further below.
+
+The config terms are listed in /sys/devices/intel_pt/format. They are bit
+fields within the config member of the struct perf_event_attr which is
+passed to the kernel by the perf_event_open system call. They correspond to bit
+fields in the IA32_RTIT_CTL MSR. Here is a list of them and their definitions:
+
+ $ grep -H . /sys/bus/event_source/devices/intel_pt/format/*
+ /sys/bus/event_source/devices/intel_pt/format/cyc:config:1
+ /sys/bus/event_source/devices/intel_pt/format/cyc_thresh:config:19-22
+ /sys/bus/event_source/devices/intel_pt/format/mtc:config:9
+ /sys/bus/event_source/devices/intel_pt/format/mtc_period:config:14-17
+ /sys/bus/event_source/devices/intel_pt/format/noretcomp:config:11
+ /sys/bus/event_source/devices/intel_pt/format/psb_period:config:24-27
+ /sys/bus/event_source/devices/intel_pt/format/tsc:config:10
+
+Note that the default config must be overridden for each term i.e.
+
+ -e intel_pt/noretcomp=0/
+
+is the same as:
+
+ -e intel_pt/tsc=1,noretcomp=0/
+
+So, to disable TSC packets use:
+
+ -e intel_pt/tsc=0/
+
+It is also possible to specify the config value explicitly:
+
+ -e intel_pt/config=0x400/
+
+Note that, as with all events, the event is suffixed with event modifiers:
+
+ u userspace
+ k kernel
+ h hypervisor
+ G guest
+ H host
+ p precise ip
+
+'h', 'G' and 'H' are for virtualization which is not supported by Intel PT.
+'p' is also not relevant to Intel PT. So only options 'u' and 'k' are
+meaningful for Intel PT.
+
+perf_event_attr is displayed if the -vv option is used e.g.
+
+ ------------------------------------------------------------
+ perf_event_attr:
+ type 6
+ size 112
+ config 0x400
+ { sample_period, sample_freq } 1
+ sample_type IP|TID|TIME|CPU|IDENTIFIER
+ read_format ID
+ disabled 1
+ inherit 1
+ exclude_kernel 1
+ exclude_hv 1
+ enable_on_exec 1
+ sample_id_all 1
+ ------------------------------------------------------------
+ sys_perf_event_open: pid 31104 cpu 0 group_fd -1 flags 0x8
+ sys_perf_event_open: pid 31104 cpu 1 group_fd -1 flags 0x8
+ sys_perf_event_open: pid 31104 cpu 2 group_fd -1 flags 0x8
+ sys_perf_event_open: pid 31104 cpu 3 group_fd -1 flags 0x8
+ ------------------------------------------------------------
+
+
+config terms
+~~~~~~~~~~~~
+
+The June 2015 version of Intel 64 and IA-32 Architectures Software Developer
+Manuals, Chapter 36 Intel Processor Trace, defined new Intel PT features.
+Some of the features are reflect in new config terms. All the config terms are
+described below.
+
+tsc Always supported. Produces TSC timestamp packets to provide
+ timing information. In some cases it is possible to decode
+ without timing information, for example a per-thread context
+ that does not overlap executable memory maps.
+
+ The default config selects tsc (i.e. tsc=1).
+
+noretcomp Always supported. Disables "return compression" so a TIP packet
+ is produced when a function returns. Causes more packets to be
+ produced but might make decoding more reliable.
+
+ The default config does not select noretcomp (i.e. noretcomp=0).
+
+psb_period Allows the frequency of PSB packets to be specified.
+
+ The PSB packet is a synchronization packet that provides a
+ starting point for decoding or recovery from errors.
+
+ Support for psb_period is indicated by:
+
+ /sys/bus/event_source/devices/intel_pt/caps/psb_cyc
+
+ which contains "1" if the feature is supported and "0"
+ otherwise.
+
+ Valid values are given by:
+
+ /sys/bus/event_source/devices/intel_pt/caps/psb_periods
+
+ which contains a hexadecimal value, the bits of which represent
+ valid values e.g. bit 2 set means value 2 is valid.
+
+ The psb_period value is converted to the approximate number of
+ trace bytes between PSB packets as:
+
+ 2 ^ (value + 11)
+
+ e.g. value 3 means 16KiB bytes between PSBs
+
+ If an invalid value is entered, the error message
+ will give a list of valid values e.g.
+
+ $ perf record -e intel_pt/psb_period=15/u uname
+ Invalid psb_period for intel_pt. Valid values are: 0-5
+
+ If MTC packets are selected, the default config selects a value
+ of 3 (i.e. psb_period=3) or the nearest lower value that is
+ supported (0 is always supported). Otherwise the default is 0.
+
+ If decoding is expected to be reliable and the buffer is large
+ then a large PSB period can be used.
+
+ Because a TSC packet is produced with PSB, the PSB period can
+ also affect the granularity to timing information in the absence
+ of MTC or CYC.
+
+mtc Produces MTC timing packets.
+
+ MTC packets provide finer grain timestamp information than TSC
+ packets. MTC packets record time using the hardware crystal
+ clock (CTC) which is related to TSC packets using a TMA packet.
+
+ Support for this feature is indicated by:
+
+ /sys/bus/event_source/devices/intel_pt/caps/mtc
+
+ which contains "1" if the feature is supported and
+ "0" otherwise.
+
+ The frequency of MTC packets can also be specified - see
+ mtc_period below.
+
+mtc_period Specifies how frequently MTC packets are produced - see mtc
+ above for how to determine if MTC packets are supported.
+
+ Valid values are given by:
+
+ /sys/bus/event_source/devices/intel_pt/caps/mtc_periods
+
+ which contains a hexadecimal value, the bits of which represent
+ valid values e.g. bit 2 set means value 2 is valid.
+
+ The mtc_period value is converted to the MTC frequency as:
+
+ CTC-frequency / (2 ^ value)
+
+ e.g. value 3 means one eighth of CTC-frequency
+
+ Where CTC is the hardware crystal clock, the frequency of which
+ can be related to TSC via values provided in cpuid leaf 0x15.
+
+ If an invalid value is entered, the error message
+ will give a list of valid values e.g.
+
+ $ perf record -e intel_pt/mtc_period=15/u uname
+ Invalid mtc_period for intel_pt. Valid values are: 0,3,6,9
+
+ The default value is 3 or the nearest lower value
+ that is supported (0 is always supported).
+
+cyc Produces CYC timing packets.
+
+ CYC packets provide even finer grain timestamp information than
+ MTC and TSC packets. A CYC packet contains the number of CPU
+ cycles since the last CYC packet. Unlike MTC and TSC packets,
+ CYC packets are only sent when another packet is also sent.
+
+ Support for this feature is indicated by:
+
+ /sys/bus/event_source/devices/intel_pt/caps/psb_cyc
+
+ which contains "1" if the feature is supported and
+ "0" otherwise.
+
+ The number of CYC packets produced can be reduced by specifying
+ a threshold - see cyc_thresh below.
+
+cyc_thresh Specifies how frequently CYC packets are produced - see cyc
+ above for how to determine if CYC packets are supported.
+
+ Valid cyc_thresh values are given by:
+
+ /sys/bus/event_source/devices/intel_pt/caps/cycle_thresholds
+
+ which contains a hexadecimal value, the bits of which represent
+ valid values e.g. bit 2 set means value 2 is valid.
+
+ The cyc_thresh value represents the minimum number of CPU cycles
+ that must have passed before a CYC packet can be sent. The
+ number of CPU cycles is:
+
+ 2 ^ (value - 1)
+
+ e.g. value 4 means 8 CPU cycles must pass before a CYC packet
+ can be sent. Note a CYC packet is still only sent when another
+ packet is sent, not at, e.g. every 8 CPU cycles.
+
+ If an invalid value is entered, the error message
+ will give a list of valid values e.g.
+
+ $ perf record -e intel_pt/cyc,cyc_thresh=15/u uname
+ Invalid cyc_thresh for intel_pt. Valid values are: 0-12
+
+ CYC packets are not requested by default.
+
+pt Specifies pass-through which enables the 'branch' config term.
+
+ The default config selects 'pt' if it is available, so a user will
+ never need to specify this term.
+
+branch Enable branch tracing. Branch tracing is enabled by default so to
+ disable branch tracing use 'branch=0'.
+
+ The default config selects 'branch' if it is available.
+
+ptw Enable PTWRITE packets which are produced when a ptwrite instruction
+ is executed.
+
+ Support for this feature is indicated by:
+
+ /sys/bus/event_source/devices/intel_pt/caps/ptwrite
+
+ which contains "1" if the feature is supported and
+ "0" otherwise.
+
+fup_on_ptw Enable a FUP packet to follow the PTWRITE packet. The FUP packet
+ provides the address of the ptwrite instruction. In the absence of
+ fup_on_ptw, the decoder will use the address of the previous branch
+ if branch tracing is enabled, otherwise the address will be zero.
+ Note that fup_on_ptw will work even when branch tracing is disabled.
+
+pwr_evt Enable power events. The power events provide information about
+ changes to the CPU C-state.
+
+ Support for this feature is indicated by:
+
+ /sys/bus/event_source/devices/intel_pt/caps/power_event_trace
+
+ which contains "1" if the feature is supported and
+ "0" otherwise.
+
+
+AUX area sampling option
+~~~~~~~~~~~~~~~~~~~~~~~~
+
+To select Intel PT "sampling" the AUX area sampling option can be used:
+
+ --aux-sample
+
+Optionally it can be followed by the sample size in bytes e.g.
+
+ --aux-sample=8192
+
+In addition, the Intel PT event to sample must be defined e.g.
+
+ -e intel_pt//u
+
+Samples on other events will be created containing Intel PT data e.g. the
+following will create Intel PT samples on the branch-misses event, note the
+events must be grouped using {}:
+
+ perf record --aux-sample -e '{intel_pt//u,branch-misses:u}'
+
+An alternative to '--aux-sample' is to add the config term 'aux-sample-size' to
+events. In this case, the grouping is implied e.g.
+
+ perf record -e intel_pt//u -e branch-misses/aux-sample-size=8192/u
+
+is the same as:
+
+ perf record -e '{intel_pt//u,branch-misses/aux-sample-size=8192/u}'
+
+but allows for also using an address filter e.g.:
+
+ perf record -e intel_pt//u --filter 'filter * @/bin/ls' -e branch-misses/aux-sample-size=8192/u -- ls
+
+It is important to select a sample size that is big enough to contain at least
+one PSB packet. If not a warning will be displayed:
+
+ Intel PT sample size (%zu) may be too small for PSB period (%zu)
+
+The calculation used for that is: if sample_size <= psb_period + 256 display the
+warning. When sampling is used, psb_period defaults to 0 (2KiB).
+
+The default sample size is 4KiB.
+
+The sample size is passed in aux_sample_size in struct perf_event_attr. The
+sample size is limited by the maximum event size which is 64KiB. It is
+difficult to know how big the event might be without the trace sample attached,
+but the tool validates that the sample size is not greater than 60KiB.
+
+
+new snapshot option
+~~~~~~~~~~~~~~~~~~~
+
+The difference between full trace and snapshot from the kernel's perspective is
+that in full trace we don't overwrite trace data that the user hasn't collected
+yet (and indicated that by advancing aux_tail), whereas in snapshot mode we let
+the trace run and overwrite older data in the buffer so that whenever something
+interesting happens, we can stop it and grab a snapshot of what was going on
+around that interesting moment.
+
+To select snapshot mode a new option has been added:
+
+ -S
+
+Optionally it can be followed by the snapshot size e.g.
+
+ -S0x100000
+
+The default snapshot size is the auxtrace mmap size. If neither auxtrace mmap size
+nor snapshot size is specified, then the default is 4MiB for privileged users
+(or if /proc/sys/kernel/perf_event_paranoid < 0), 128KiB for unprivileged users.
+If an unprivileged user does not specify mmap pages, the mmap pages will be
+reduced as described in the 'new auxtrace mmap size option' section below.
+
+The snapshot size is displayed if the option -vv is used e.g.
+
+ Intel PT snapshot size: %zu
+
+
+new auxtrace mmap size option
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+Intel PT buffer size is specified by an addition to the -m option e.g.
+
+ -m,16
+
+selects a buffer size of 16 pages i.e. 64KiB.
+
+Note that the existing functionality of -m is unchanged. The auxtrace mmap size
+is specified by the optional addition of a comma and the value.
+
+The default auxtrace mmap size for Intel PT is 4MiB/page_size for privileged users
+(or if /proc/sys/kernel/perf_event_paranoid < 0), 128KiB for unprivileged users.
+If an unprivileged user does not specify mmap pages, the mmap pages will be
+reduced from the default 512KiB/page_size to 256KiB/page_size, otherwise the
+user is likely to get an error as they exceed their mlock limit (Max locked
+memory as shown in /proc/self/limits). Note that perf does not count the first
+512KiB (actually /proc/sys/kernel/perf_event_mlock_kb minus 1 page) per cpu
+against the mlock limit so an unprivileged user is allowed 512KiB per cpu plus
+their mlock limit (which defaults to 64KiB but is not multiplied by the number
+of cpus).
+
+In full-trace mode, powers of two are allowed for buffer size, with a minimum
+size of 2 pages. In snapshot mode or sampling mode, it is the same but the
+minimum size is 1 page.
+
+The mmap size and auxtrace mmap size are displayed if the -vv option is used e.g.
+
+ mmap length 528384
+ auxtrace mmap length 4198400
+
+
+Intel PT modes of operation
+~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+Intel PT can be used in 2 modes:
+ full-trace mode
+ sample mode
+ snapshot mode
+
+Full-trace mode traces continuously e.g.
+
+ perf record -e intel_pt//u uname
+
+Sample mode attaches a Intel PT sample to other events e.g.
+
+ perf record --aux-sample -e intel_pt//u -e branch-misses:u
+
+Snapshot mode captures the available data when a signal is sent e.g.
+
+ perf record -v -e intel_pt//u -S ./loopy 1000000000 &
+ [1] 11435
+ kill -USR2 11435
+ Recording AUX area tracing snapshot
+
+Note that the signal sent is SIGUSR2.
+Note that "Recording AUX area tracing snapshot" is displayed because the -v
+option is used.
+
+The 2 modes cannot be used together.
+
+
+Buffer handling
+~~~~~~~~~~~~~~~
+
+There may be buffer limitations (i.e. single ToPa entry) which means that actual
+buffer sizes are limited to powers of 2 up to 4MiB (MAX_ORDER). In order to
+provide other sizes, and in particular an arbitrarily large size, multiple
+buffers are logically concatenated. However an interrupt must be used to switch
+between buffers. That has two potential problems:
+ a) the interrupt may not be handled in time so that the current buffer
+ becomes full and some trace data is lost.
+ b) the interrupts may slow the system and affect the performance
+ results.
+
+If trace data is lost, the driver sets 'truncated' in the PERF_RECORD_AUX event
+which the tools report as an error.
+
+In full-trace mode, the driver waits for data to be copied out before allowing
+the (logical) buffer to wrap-around. If data is not copied out quickly enough,
+again 'truncated' is set in the PERF_RECORD_AUX event. If the driver has to
+wait, the intel_pt event gets disabled. Because it is difficult to know when
+that happens, perf tools always re-enable the intel_pt event after copying out
+data.
+
+
+Intel PT and build ids
+~~~~~~~~~~~~~~~~~~~~~~
+
+By default "perf record" post-processes the event stream to find all build ids
+for executables for all addresses sampled. Deliberately, Intel PT is not
+decoded for that purpose (it would take too long). Instead the build ids for
+all executables encountered (due to mmap, comm or task events) are included
+in the perf.data file.
+
+To see buildids included in the perf.data file use the command:
+
+ perf buildid-list
+
+If the perf.data file contains Intel PT data, that is the same as:
+
+ perf buildid-list --with-hits
+
+
+Snapshot mode and event disabling
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+In order to make a snapshot, the intel_pt event is disabled using an IOCTL,
+namely PERF_EVENT_IOC_DISABLE. However doing that can also disable the
+collection of side-band information. In order to prevent that, a dummy
+software event has been introduced that permits tracking events (like mmaps) to
+continue to be recorded while intel_pt is disabled. That is important to ensure
+there is complete side-band information to allow the decoding of subsequent
+snapshots.
+
+A test has been created for that. To find the test:
+
+ perf test list
+ ...
+ 23: Test using a dummy software event to keep tracking
+
+To run the test:
+
+ perf test 23
+ 23: Test using a dummy software event to keep tracking : Ok
+
+
+perf record modes (nothing new here)
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+perf record essentially operates in one of three modes:
+ per thread
+ per cpu
+ workload only
+
+"per thread" mode is selected by -t or by --per-thread (with -p or -u or just a
+workload).
+"per cpu" is selected by -C or -a.
+"workload only" mode is selected by not using the other options but providing a
+command to run (i.e. the workload).
+
+In per-thread mode an exact list of threads is traced. There is no inheritance.
+Each thread has its own event buffer.
+
+In per-cpu mode all processes (or processes from the selected cgroup i.e. -G
+option, or processes selected with -p or -u) are traced. Each cpu has its own
+buffer. Inheritance is allowed.
+
+In workload-only mode, the workload is traced but with per-cpu buffers.
+Inheritance is allowed. Note that you can now trace a workload in per-thread
+mode by using the --per-thread option.
+
+
+Privileged vs non-privileged users
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+Unless /proc/sys/kernel/perf_event_paranoid is set to -1, unprivileged users
+have memory limits imposed upon them. That affects what buffer sizes they can
+have as outlined above.
+
+The v4.2 kernel introduced support for a context switch metadata event,
+PERF_RECORD_SWITCH, which allows unprivileged users to see when their processes
+are scheduled out and in, just not by whom, which is left for the
+PERF_RECORD_SWITCH_CPU_WIDE, that is only accessible in system wide context,
+which in turn requires CAP_SYS_ADMIN.
+
+Please see the 45ac1403f564 ("perf: Add PERF_RECORD_SWITCH to indicate context
+switches") commit, that introduces these metadata events for further info.
+
+When working with kernels < v4.2, the following considerations must be taken,
+as the sched:sched_switch tracepoints will be used to receive such information:
+
+Unless /proc/sys/kernel/perf_event_paranoid is set to -1, unprivileged users are
+not permitted to use tracepoints which means there is insufficient side-band
+information to decode Intel PT in per-cpu mode, and potentially workload-only
+mode too if the workload creates new processes.
+
+Note also, that to use tracepoints, read-access to debugfs is required. So if
+debugfs is not mounted or the user does not have read-access, it will again not
+be possible to decode Intel PT in per-cpu mode.
+
+
+sched_switch tracepoint
+~~~~~~~~~~~~~~~~~~~~~~~
+
+The sched_switch tracepoint is used to provide side-band data for Intel PT
+decoding in kernels where the PERF_RECORD_SWITCH metadata event isn't
+available.
+
+The sched_switch events are automatically added. e.g. the second event shown
+below:
+
+ $ perf record -vv -e intel_pt//u uname
+ ------------------------------------------------------------
+ perf_event_attr:
+ type 6
+ size 112
+ config 0x400
+ { sample_period, sample_freq } 1
+ sample_type IP|TID|TIME|CPU|IDENTIFIER
+ read_format ID
+ disabled 1
+ inherit 1
+ exclude_kernel 1
+ exclude_hv 1
+ enable_on_exec 1
+ sample_id_all 1
+ ------------------------------------------------------------
+ sys_perf_event_open: pid 31104 cpu 0 group_fd -1 flags 0x8
+ sys_perf_event_open: pid 31104 cpu 1 group_fd -1 flags 0x8
+ sys_perf_event_open: pid 31104 cpu 2 group_fd -1 flags 0x8
+ sys_perf_event_open: pid 31104 cpu 3 group_fd -1 flags 0x8
+ ------------------------------------------------------------
+ perf_event_attr:
+ type 2
+ size 112
+ config 0x108
+ { sample_period, sample_freq } 1
+ sample_type IP|TID|TIME|CPU|PERIOD|RAW|IDENTIFIER
+ read_format ID
+ inherit 1
+ sample_id_all 1
+ exclude_guest 1
+ ------------------------------------------------------------
+ sys_perf_event_open: pid -1 cpu 0 group_fd -1 flags 0x8
+ sys_perf_event_open: pid -1 cpu 1 group_fd -1 flags 0x8
+ sys_perf_event_open: pid -1 cpu 2 group_fd -1 flags 0x8
+ sys_perf_event_open: pid -1 cpu 3 group_fd -1 flags 0x8
+ ------------------------------------------------------------
+ perf_event_attr:
+ type 1
+ size 112
+ config 0x9
+ { sample_period, sample_freq } 1
+ sample_type IP|TID|TIME|IDENTIFIER
+ read_format ID
+ disabled 1
+ inherit 1
+ exclude_kernel 1
+ exclude_hv 1
+ mmap 1
+ comm 1
+ enable_on_exec 1
+ task 1
+ sample_id_all 1
+ mmap2 1
+ comm_exec 1
+ ------------------------------------------------------------
+ sys_perf_event_open: pid 31104 cpu 0 group_fd -1 flags 0x8
+ sys_perf_event_open: pid 31104 cpu 1 group_fd -1 flags 0x8
+ sys_perf_event_open: pid 31104 cpu 2 group_fd -1 flags 0x8
+ sys_perf_event_open: pid 31104 cpu 3 group_fd -1 flags 0x8
+ mmap size 528384B
+ AUX area mmap length 4194304
+ perf event ring buffer mmapped per cpu
+ Synthesizing auxtrace information
+ Linux
+ [ perf record: Woken up 1 times to write data ]
+ [ perf record: Captured and wrote 0.042 MB perf.data ]
+
+Note, the sched_switch event is only added if the user is permitted to use it
+and only in per-cpu mode.
+
+Note also, the sched_switch event is only added if TSC packets are requested.
+That is because, in the absence of timing information, the sched_switch events
+cannot be matched against the Intel PT trace.
+
+
+perf script
+-----------
+
+By default, perf script will decode trace data found in the perf.data file.
+This can be further controlled by new option --itrace.
+
+
+New --itrace option
+~~~~~~~~~~~~~~~~~~~
+
+Having no option is the same as
+
+ --itrace
+
+which, in turn, is the same as
+
+ --itrace=cepwx
+
+The letters are:
+
+ i synthesize "instructions" events
+ b synthesize "branches" events
+ x synthesize "transactions" events
+ w synthesize "ptwrite" events
+ p synthesize "power" events
+ c synthesize branches events (calls only)
+ r synthesize branches events (returns only)
+ e synthesize tracing error events
+ d create a debug log
+ g synthesize a call chain (use with i or x)
+ l synthesize last branch entries (use with i or x)
+ s skip initial number of events
+
+"Instructions" events look like they were recorded by "perf record -e
+instructions".
+
+"Branches" events look like they were recorded by "perf record -e branches". "c"
+and "r" can be combined to get calls and returns.
+
+"Transactions" events correspond to the start or end of transactions. The
+'flags' field can be used in perf script to determine whether the event is a
+tranasaction start, commit or abort.
+
+Note that "instructions", "branches" and "transactions" events depend on code
+flow packets which can be disabled by using the config term "branch=0". Refer
+to the config terms section above.
+
+"ptwrite" events record the payload of the ptwrite instruction and whether
+"fup_on_ptw" was used. "ptwrite" events depend on PTWRITE packets which are
+recorded only if the "ptw" config term was used. Refer to the config terms
+section above. perf script "synth" field displays "ptwrite" information like
+this: "ip: 0 payload: 0x123456789abcdef0" where "ip" is 1 if "fup_on_ptw" was
+used.
+
+"Power" events correspond to power event packets and CBR (core-to-bus ratio)
+packets. While CBR packets are always recorded when tracing is enabled, power
+event packets are recorded only if the "pwr_evt" config term was used. Refer to
+the config terms section above. The power events record information about
+C-state changes, whereas CBR is indicative of CPU frequency. perf script
+"event,synth" fields display information like this:
+ cbr: cbr: 22 freq: 2189 MHz (200%)
+ mwait: hints: 0x60 extensions: 0x1
+ pwre: hw: 0 cstate: 2 sub-cstate: 0
+ exstop: ip: 1
+ pwrx: deepest cstate: 2 last cstate: 2 wake reason: 0x4
+Where:
+ "cbr" includes the frequency and the percentage of maximum non-turbo
+ "mwait" shows mwait hints and extensions
+ "pwre" shows C-state transitions (to a C-state deeper than C0) and
+ whether initiated by hardware
+ "exstop" indicates execution stopped and whether the IP was recorded
+ exactly,
+ "pwrx" indicates return to C0
+For more details refer to the Intel 64 and IA-32 Architectures Software
+Developer Manuals.
+
+Error events show where the decoder lost the trace. Error events
+are quite important. Users must know if what they are seeing is a complete
+picture or not.
+
+The "d" option will cause the creation of a file "intel_pt.log" containing all
+decoded packets and instructions. Note that this option slows down the decoder
+and that the resulting file may be very large.
+
+In addition, the period of the "instructions" event can be specified. e.g.
+
+ --itrace=i10us
+
+sets the period to 10us i.e. one instruction sample is synthesized for each 10
+microseconds of trace. Alternatives to "us" are "ms" (milliseconds),
+"ns" (nanoseconds), "t" (TSC ticks) or "i" (instructions).
+
+"ms", "us" and "ns" are converted to TSC ticks.
+
+The timing information included with Intel PT does not give the time of every
+instruction. Consequently, for the purpose of sampling, the decoder estimates
+the time since the last timing packet based on 1 tick per instruction. The time
+on the sample is *not* adjusted and reflects the last known value of TSC.
+
+For Intel PT, the default period is 100us.
+
+Setting it to a zero period means "as often as possible".
+
+In the case of Intel PT that is the same as a period of 1 and a unit of
+'instructions' (i.e. --itrace=i1i).
+
+Also the call chain size (default 16, max. 1024) for instructions or
+transactions events can be specified. e.g.
+
+ --itrace=ig32
+ --itrace=xg32
+
+Also the number of last branch entries (default 64, max. 1024) for instructions or
+transactions events can be specified. e.g.
+
+ --itrace=il10
+ --itrace=xl10
+
+Note that last branch entries are cleared for each sample, so there is no overlap
+from one sample to the next.
+
+To disable trace decoding entirely, use the option --no-itrace.
+
+It is also possible to skip events generated (instructions, branches, transactions)
+at the beginning. This is useful to ignore initialization code.
+
+ --itrace=i0nss1000000
+
+skips the first million instructions.
+
+dump option
+~~~~~~~~~~~
+
+perf script has an option (-D) to "dump" the events i.e. display the binary
+data.
+
+When -D is used, Intel PT packets are displayed. The packet decoder does not
+pay attention to PSB packets, but just decodes the bytes - so the packets seen
+by the actual decoder may not be identical in places where the data is corrupt.
+One example of that would be when the buffer-switching interrupt has been too
+slow, and the buffer has been filled completely. In that case, the last packet
+in the buffer might be truncated and immediately followed by a PSB as the trace
+continues in the next buffer.
+
+To disable the display of Intel PT packets, combine the -D option with
+--no-itrace.
+
+
+perf report
+-----------
+
+By default, perf report will decode trace data found in the perf.data file.
+This can be further controlled by new option --itrace exactly the same as
+perf script, with the exception that the default is --itrace=igxe.
+
+
+perf inject
+-----------
+
+perf inject also accepts the --itrace option in which case tracing data is
+removed and replaced with the synthesized events. e.g.
+
+ perf inject --itrace -i perf.data -o perf.data.new
+
+Below is an example of using Intel PT with autofdo. It requires autofdo
+(https://github.com/google/autofdo) and gcc version 5. The bubble
+sort example is from the AutoFDO tutorial (https://gcc.gnu.org/wiki/AutoFDO/Tutorial)
+amended to take the number of elements as a parameter.
+
+ $ gcc-5 -O3 sort.c -o sort_optimized
+ $ ./sort_optimized 30000
+ Bubble sorting array of 30000 elements
+ 2254 ms
+
+ $ cat ~/.perfconfig
+ [intel-pt]
+ mispred-all = on
+
+ $ perf record -e intel_pt//u ./sort 3000
+ Bubble sorting array of 3000 elements
+ 58 ms
+ [ perf record: Woken up 2 times to write data ]
+ [ perf record: Captured and wrote 3.939 MB perf.data ]
+ $ perf inject -i perf.data -o inj --itrace=i100usle --strip
+ $ ./create_gcov --binary=./sort --profile=inj --gcov=sort.gcov -gcov_version=1
+ $ gcc-5 -O3 -fauto-profile=sort.gcov sort.c -o sort_autofdo
+ $ ./sort_autofdo 30000
+ Bubble sorting array of 30000 elements
+ 2155 ms
+
+Note there is currently no advantage to using Intel PT instead of LBR, but
+that may change in the future if greater use is made of the data.
+
+
+PEBS via Intel PT
+-----------------
+
+Some hardware has the feature to redirect PEBS records to the Intel PT trace.
+Recording is selected by using the aux-output config term e.g.
+
+ perf record -c 10000 -e '{intel_pt/branch=0/,cycles/aux-output/ppp}' uname
+
+Note that currently, software only supports redirecting at most one PEBS event.
+
+To display PEBS events from the Intel PT trace, use the itrace 'o' option e.g.
+
+ perf script --itrace=oe
+
+
+SEE ALSO
+--------
+
+linkperf:perf-record[1], linkperf:perf-script[1], linkperf:perf-report[1],
+linkperf:perf-inject[1]
diff --git a/tools/perf/Documentation/perf-record.txt b/tools/perf/Documentation/perf-record.txt
index b23a4012a606..b3f3b3f1c161 100644
--- a/tools/perf/Documentation/perf-record.txt
+++ b/tools/perf/Documentation/perf-record.txt
@@ -237,16 +237,22 @@ OPTIONS
option and remains only for backward compatibility. See --event.
-g::
- Enables call-graph (stack chain/backtrace) recording.
+ Enables call-graph (stack chain/backtrace) recording for both
+ kernel space and user space.
--call-graph::
Setup and enable call-graph (stack chain/backtrace) recording,
- implies -g. Default is "fp".
+ implies -g. Default is "fp" (for user space).
- Allows specifying "fp" (frame pointer) or "dwarf"
- (DWARF's CFI - Call Frame Information) or "lbr"
- (Hardware Last Branch Record facility) as the method to collect
- the information used to show the call graphs.
+ The unwinding method used for kernel space is dependent on the
+ unwinder used by the active kernel configuration, i.e
+ CONFIG_UNWINDER_FRAME_POINTER (fp) or CONFIG_UNWINDER_ORC (orc)
+
+ Any option specified here controls the method used for user space.
+
+ Valid options are "fp" (frame pointer), "dwarf" (DWARF's CFI -
+ Call Frame Information) or "lbr" (Hardware Last Branch Record
+ facility).
In some systems, where binaries are build with gcc
--fomit-frame-pointer, using the "fp" method will produce bogus
@@ -385,7 +391,10 @@ displayed with the weight and local_weight sort keys. This currently works for
abort events and some memory events in precise mode on modern Intel CPUs.
--namespaces::
-Record events of type PERF_RECORD_NAMESPACES.
+Record events of type PERF_RECORD_NAMESPACES. This enables 'cgroup_id' sort key.
+
+--all-cgroups::
+Record events of type PERF_RECORD_CGROUP. This enables 'cgroup' sort key.
--transaction::
Record transaction flags for transaction related events.
@@ -589,4 +598,4 @@ appended unit character - B/K/M/G
SEE ALSO
--------
-linkperf:perf-stat[1], linkperf:perf-list[1]
+linkperf:perf-stat[1], linkperf:perf-list[1], linkperf:perf-intel-pt[1]
diff --git a/tools/perf/Documentation/perf-report.txt b/tools/perf/Documentation/perf-report.txt
index 8dbe2119686a..f569b9ea4002 100644
--- a/tools/perf/Documentation/perf-report.txt
+++ b/tools/perf/Documentation/perf-report.txt
@@ -95,6 +95,7 @@ OPTIONS
abort cost. This is the global weight.
- local_weight: Local weight version of the weight above.
- cgroup_id: ID derived from cgroup namespace device and inode numbers.
+ - cgroup: cgroup pathname in the cgroupfs.
- transaction: Transaction abort flags.
- overhead: Overhead percentage of sample
- overhead_sys: Overhead percentage of sample running in system mode
@@ -367,10 +368,21 @@ OPTIONS
--objdump=<path>::
Path to objdump binary.
+--prefix=PREFIX::
+--prefix-strip=N::
+ Remove first N entries from source file path names in executables
+ and add PREFIX. This allows to display source code compiled on systems
+ with different file system layout.
+
--group::
Show event group information together. It forces group output also
if there are no groups defined in data file.
+--group-sort-idx::
+ Sort the output by the event at the index n in group. If n is invalid,
+ sort by the first event. It can support multiple groups with different
+ amount of events. WARNING: This should be used on grouped events.
+
--demangle::
Demangle symbol names to human readable form. It's enabled by default,
disable with --no-demangle.
@@ -540,4 +552,5 @@ include::callchain-overhead-calculation.txt[]
SEE ALSO
--------
-linkperf:perf-stat[1], linkperf:perf-annotate[1], linkperf:perf-record[1]
+linkperf:perf-stat[1], linkperf:perf-annotate[1], linkperf:perf-record[1],
+linkperf:perf-intel-pt[1]
diff --git a/tools/perf/Documentation/perf-sched.txt b/tools/perf/Documentation/perf-sched.txt
index 63f938b887dd..5fbe42bd599b 100644
--- a/tools/perf/Documentation/perf-sched.txt
+++ b/tools/perf/Documentation/perf-sched.txt
@@ -110,6 +110,10 @@ OPTIONS for 'perf sched timehist'
--max-stack::
Maximum number of functions to display in backtrace, default 5.
+-C=::
+--cpu=::
+ Only show events for the given CPU(s) (comma separated list).
+
-p=::
--pid=::
Only show events for given process ID (comma separated list).
diff --git a/tools/perf/Documentation/perf-script.txt b/tools/perf/Documentation/perf-script.txt
index 2599b057e47b..963487e82edc 100644
--- a/tools/perf/Documentation/perf-script.txt
+++ b/tools/perf/Documentation/perf-script.txt
@@ -319,6 +319,9 @@ OPTIONS
--show-bpf-events
Display bpf events i.e. events of type PERF_RECORD_KSYMBOL and PERF_RECORD_BPF_EVENT.
+--show-cgroup-events
+ Display cgroup events i.e. events of type PERF_RECORD_CGROUP.
+
--demangle::
Demangle symbol names to human readable form. It's enabled by default,
disable with --no-demangle.
@@ -390,6 +393,9 @@ include::itrace.txt[]
--reltime::
Print time stamps relative to trace start.
+--deltatime::
+ Print time stamps relative to previous event.
+
--per-event-dump::
Create per event files with a "perf.data.EVENT.dump" name instead of
printing to stdout, useful, for instance, for generating flamegraphs.
@@ -406,6 +412,14 @@ include::itrace.txt[]
--xed::
Run xed disassembler on output. Requires installing the xed disassembler.
+-S::
+--symbols=symbol[,symbol...]::
+ Only consider the listed symbols. Symbols are typically a name
+ but they may also be hexadecimal address.
+
+ For example, to select the symbol noploop or the address 0x4007a0:
+ perf script --symbols=noploop,0x4007a0
+
--call-trace::
Show call stream for intel_pt traces. The CPUs are interleaved, but
can be filtered with -C.
@@ -429,4 +443,4 @@ include::itrace.txt[]
SEE ALSO
--------
linkperf:perf-record[1], linkperf:perf-script-perl[1],
-linkperf:perf-script-python[1]
+linkperf:perf-script-python[1], linkperf:perf-intel-pt[1]
diff --git a/tools/perf/Documentation/perf-stat.txt b/tools/perf/Documentation/perf-stat.txt
index 9431b8066fb4..4d56586b2fb9 100644
--- a/tools/perf/Documentation/perf-stat.txt
+++ b/tools/perf/Documentation/perf-stat.txt
@@ -334,6 +334,15 @@ Configure all used events to run in kernel space.
--all-user::
Configure all used events to run in user space.
+--percore-show-thread::
+The event modifier "percore" has supported to sum up the event counts
+for all hardware threads in a core and show the counts per core.
+
+This option with event modifier "percore" enabled also sums up the event
+counts for all hardware threads in a core but show the sum counts per
+hardware thread. This is essentially a replacement for the any bit and
+convenient for post processing.
+
EXAMPLES
--------
diff --git a/tools/perf/Documentation/perf-top.txt b/tools/perf/Documentation/perf-top.txt
index 5596129a71cf..487737a725e9 100644
--- a/tools/perf/Documentation/perf-top.txt
+++ b/tools/perf/Documentation/perf-top.txt
@@ -53,6 +53,11 @@ Default is to monitor all CPUS.
--group::
Put the counters into a counter group.
+--group-sort-idx::
+ Sort the output by the event at the index n in group. If n is invalid,
+ sort by the first event. It can support multiple groups with different
+ amount of events. WARNING: This should be used on grouped events.
+
-F <freq>::
--freq=<freq>::
Profile at this frequency. Use 'max' to use the currently maximum
@@ -158,6 +163,12 @@ Default is to monitor all CPUS.
-M::
--disassembler-style=:: Set disassembler style for objdump.
+--prefix=PREFIX::
+--prefix-strip=N::
+ Remove first N entries from source file path names in executables
+ and add PREFIX. This allows to display source code compiled on systems
+ with different file system layout.
+
--source::
Interleave source code with assembly code. Enabled by default,
disable with --no-source.
@@ -266,6 +277,10 @@ Default is to monitor all CPUS.
Record events of type PERF_RECORD_NAMESPACES and display it with the
'cgroup_id' sort key.
+--all-cgroups::
+ Record events of type PERF_RECORD_CGROUP and display it with the
+ 'cgroup' sort key.
+
--switch-on EVENT_NAME::
Only consider events after this event is found.
diff --git a/tools/perf/MANIFEST b/tools/perf/MANIFEST
index 4934edb5adfd..5d7b947320fb 100644
--- a/tools/perf/MANIFEST
+++ b/tools/perf/MANIFEST
@@ -7,6 +7,7 @@ tools/lib/traceevent
tools/lib/api
tools/lib/bpf
tools/lib/subcmd
+tools/lib/perf
tools/lib/argv_split.c
tools/lib/ctype.c
tools/lib/hweight.c
diff --git a/tools/perf/Makefile b/tools/perf/Makefile
index 7902a5681fc8..b8fc7d972be9 100644
--- a/tools/perf/Makefile
+++ b/tools/perf/Makefile
@@ -35,7 +35,7 @@ endif
# Only pass canonical directory names as the output directory:
#
ifneq ($(O),)
- FULL_O := $(shell readlink -f $(O) || echo $(O))
+ FULL_O := $(shell cd $(PWD); readlink -f $(O) || echo $(O))
endif
#
diff --git a/tools/perf/Makefile.config b/tools/perf/Makefile.config
index c90f4146e5a2..12a8204d63c6 100644
--- a/tools/perf/Makefile.config
+++ b/tools/perf/Makefile.config
@@ -228,8 +228,17 @@ strip-libs = $(filter-out -l%,$(1))
PYTHON_CONFIG_SQ := $(call shell-sq,$(PYTHON_CONFIG))
+# Python 3.8 changed the output of `python-config --ldflags` to not include the
+# '-lpythonX.Y' flag unless '--embed' is also passed. The feature check for
+# libpython fails if that flag is not included in LDFLAGS
+ifeq ($(shell $(PYTHON_CONFIG_SQ) --ldflags --embed 2>&1 1>/dev/null; echo $$?), 0)
+ PYTHON_CONFIG_LDFLAGS := --ldflags --embed
+else
+ PYTHON_CONFIG_LDFLAGS := --ldflags
+endif
+
ifdef PYTHON_CONFIG
- PYTHON_EMBED_LDOPTS := $(shell $(PYTHON_CONFIG_SQ) --ldflags 2>/dev/null)
+ PYTHON_EMBED_LDOPTS := $(shell $(PYTHON_CONFIG_SQ) $(PYTHON_CONFIG_LDFLAGS) 2>/dev/null)
PYTHON_EMBED_LDFLAGS := $(call strip-libs,$(PYTHON_EMBED_LDOPTS))
PYTHON_EMBED_LIBADD := $(call grep-libs,$(PYTHON_EMBED_LDOPTS)) -lutil
PYTHON_EMBED_CCOPTS := $(shell $(PYTHON_CONFIG_SQ) --includes 2>/dev/null)
@@ -286,7 +295,7 @@ ifeq ($(DEBUG),0)
endif
endif
-INC_FLAGS += -I$(src-perf)/lib/include
+INC_FLAGS += -I$(srctree)/tools/lib/perf/include
INC_FLAGS += -I$(src-perf)/util/include
INC_FLAGS += -I$(src-perf)/arch/$(SRCARCH)/include
INC_FLAGS += -I$(srctree)/tools/include/
@@ -348,6 +357,10 @@ ifeq ($(feature-gettid), 1)
CFLAGS += -DHAVE_GETTID
endif
+ifeq ($(feature-file-handle), 1)
+ CFLAGS += -DHAVE_FILE_HANDLE
+endif
+
ifdef NO_LIBELF
NO_DWARF := 1
NO_DEMANGLE := 1
diff --git a/tools/perf/Makefile.perf b/tools/perf/Makefile.perf
index eae5d5e95952..d15a311408f1 100644
--- a/tools/perf/Makefile.perf
+++ b/tools/perf/Makefile.perf
@@ -230,7 +230,8 @@ LIB_DIR = $(srctree)/tools/lib/api/
TRACE_EVENT_DIR = $(srctree)/tools/lib/traceevent/
BPF_DIR = $(srctree)/tools/lib/bpf/
SUBCMD_DIR = $(srctree)/tools/lib/subcmd/
-LIBPERF_DIR = $(srctree)/tools/perf/lib/
+LIBPERF_DIR = $(srctree)/tools/lib/perf/
+DOC_DIR = $(srctree)/tools/perf/Documentation/
# Set FEATURE_TESTS to 'all' so all possible feature checkers are executed.
# Without this setting the output feature dump file misses some features, for
@@ -573,7 +574,7 @@ arch_errno_hdr_dir := $(srctree)/tools
arch_errno_tbl := $(srctree)/tools/perf/trace/beauty/arch_errno_names.sh
$(arch_errno_name_array): $(arch_errno_tbl)
- $(Q)$(SHELL) '$(arch_errno_tbl)' $(CC) $(arch_errno_hdr_dir) > $@
+ $(Q)$(SHELL) '$(arch_errno_tbl)' $(firstword $(CC)) $(arch_errno_hdr_dir) > $@
sync_file_range_arrays := $(beauty_outdir)/sync_file_range_arrays.c
sync_file_range_tbls := $(srctree)/tools/perf/trace/beauty/sync_file_range.sh
@@ -792,7 +793,6 @@ $(LIBSUBCMD): FORCE
$(Q)$(MAKE) -C $(SUBCMD_DIR) O=$(OUTPUT) $(OUTPUT)libsubcmd.a
$(LIBSUBCMD)-clean:
- $(call QUIET_CLEAN, libsubcmd)
$(Q)$(MAKE) -C $(SUBCMD_DIR) O=$(OUTPUT) clean
help:
@@ -832,7 +832,7 @@ INSTALL_DOC_TARGETS += quick-install-doc quick-install-man quick-install-html
# 'make doc' should call 'make -C Documentation all'
$(DOC_TARGETS):
- $(QUIET_SUBDIR0)Documentation $(QUIET_SUBDIR1) $(@:doc=all)
+ $(Q)$(MAKE) -C $(DOC_DIR) O=$(OUTPUT) $(@:doc=all)
TAG_FOLDERS= . ../lib ../include
TAG_FILES= ../../include/uapi/linux/perf_event.h
@@ -959,7 +959,7 @@ install-python_ext:
# 'make install-doc' should call 'make -C Documentation install'
$(INSTALL_DOC_TARGETS):
- $(QUIET_SUBDIR0)Documentation $(QUIET_SUBDIR1) $(@:-doc=)
+ $(Q)$(MAKE) -C $(DOC_DIR) O=$(OUTPUT) $(@:-doc=)
### Cleaning rules
@@ -1008,7 +1008,8 @@ clean:: $(LIBTRACEEVENT)-clean $(LIBAPI)-clean $(LIBBPF)-clean $(LIBSUBCMD)-clea
$(OUTPUT)$(rename_flags_array) \
$(OUTPUT)$(arch_errno_name_array) \
$(OUTPUT)$(sync_file_range_arrays)
- $(QUIET_SUBDIR0)Documentation $(QUIET_SUBDIR1) clean
+ $(call QUIET_CLEAN, Documentation) \
+ $(MAKE) -C $(DOC_DIR) O=$(OUTPUT) clean >/dev/null
#
# To provide FEATURE-DUMP into $(FEATURE_DUMP_COPY)
diff --git a/tools/perf/arch/arm/util/cs-etm.c b/tools/perf/arch/arm/util/cs-etm.c
index ede040cf82ad..941f814820b8 100644
--- a/tools/perf/arch/arm/util/cs-etm.c
+++ b/tools/perf/arch/arm/util/cs-etm.c
@@ -226,7 +226,7 @@ static int cs_etm_set_sink_attr(struct perf_pmu *pmu,
if (term->type != PERF_EVSEL__CONFIG_TERM_DRV_CFG)
continue;
- sink = term->val.drv_cfg;
+ sink = term->val.str;
snprintf(path, PATH_MAX, "sinks/%s", sink);
ret = perf_pmu__scan_file(pmu, path, "%x", &hash);
@@ -858,21 +858,6 @@ static void cs_etm_recording_free(struct auxtrace_record *itr)
free(ptr);
}
-static int cs_etm_read_finish(struct auxtrace_record *itr, int idx)
-{
- struct cs_etm_recording *ptr =
- container_of(itr, struct cs_etm_recording, itr);
- struct evsel *evsel;
-
- evlist__for_each_entry(ptr->evlist, evsel) {
- if (evsel->core.attr.type == ptr->cs_etm_pmu->type)
- return perf_evlist__enable_event_idx(ptr->evlist,
- evsel, idx);
- }
-
- return -EINVAL;
-}
-
struct auxtrace_record *cs_etm_record_init(int *err)
{
struct perf_pmu *cs_etm_pmu;
@@ -892,6 +877,7 @@ struct auxtrace_record *cs_etm_record_init(int *err)
}
ptr->cs_etm_pmu = cs_etm_pmu;
+ ptr->itr.pmu = cs_etm_pmu;
ptr->itr.parse_snapshot_options = cs_etm_parse_snapshot_options;
ptr->itr.recording_options = cs_etm_recording_options;
ptr->itr.info_priv_size = cs_etm_info_priv_size;
@@ -901,7 +887,7 @@ struct auxtrace_record *cs_etm_record_init(int *err)
ptr->itr.snapshot_finish = cs_etm_snapshot_finish;
ptr->itr.reference = cs_etm_reference;
ptr->itr.free = cs_etm_recording_free;
- ptr->itr.read_finish = cs_etm_read_finish;
+ ptr->itr.read_finish = auxtrace_record__read_finish;
*err = 0;
return &ptr->itr;
diff --git a/tools/perf/arch/arm64/util/Build b/tools/perf/arch/arm64/util/Build
index 0a7782c61209..5c13438c7bd4 100644
--- a/tools/perf/arch/arm64/util/Build
+++ b/tools/perf/arch/arm64/util/Build
@@ -1,6 +1,6 @@
perf-y += header.o
+perf-y += machine.o
perf-y += perf_regs.o
-perf-y += sym-handling.o
perf-$(CONFIG_DWARF) += dwarf-regs.o
perf-$(CONFIG_LOCAL_LIBUNWIND) += unwind-libunwind.o
perf-$(CONFIG_LIBDW_DWARF_UNWIND) += unwind-libdw.o
diff --git a/tools/perf/arch/arm64/util/arm-spe.c b/tools/perf/arch/arm64/util/arm-spe.c
index eba6541ec0f1..27653be24447 100644
--- a/tools/perf/arch/arm64/util/arm-spe.c
+++ b/tools/perf/arch/arm64/util/arm-spe.c
@@ -11,17 +11,17 @@
#include <linux/zalloc.h>
#include <time.h>
-#include "../../util/cpumap.h"
-#include "../../util/event.h"
-#include "../../util/evsel.h"
-#include "../../util/evlist.h"
-#include "../../util/session.h"
+#include "../../../util/cpumap.h"
+#include "../../../util/event.h"
+#include "../../../util/evsel.h"
+#include "../../../util/evlist.h"
+#include "../../../util/session.h"
#include <internal/lib.h> // page_size
-#include "../../util/pmu.h"
-#include "../../util/debug.h"
-#include "../../util/auxtrace.h"
-#include "../../util/record.h"
-#include "../../util/arm-spe.h"
+#include "../../../util/pmu.h"
+#include "../../../util/debug.h"
+#include "../../../util/auxtrace.h"
+#include "../../../util/record.h"
+#include "../../../util/arm-spe.h"
#define KiB(x) ((x) * 1024)
#define MiB(x) ((x) * 1024 * 1024)
@@ -158,20 +158,6 @@ static void arm_spe_recording_free(struct auxtrace_record *itr)
free(sper);
}
-static int arm_spe_read_finish(struct auxtrace_record *itr, int idx)
-{
- struct arm_spe_recording *sper =
- container_of(itr, struct arm_spe_recording, itr);
- struct evsel *evsel;
-
- evlist__for_each_entry(sper->evlist, evsel) {
- if (evsel->core.attr.type == sper->arm_spe_pmu->type)
- return perf_evlist__enable_event_idx(sper->evlist,
- evsel, idx);
- }
- return -EINVAL;
-}
-
struct auxtrace_record *arm_spe_recording_init(int *err,
struct perf_pmu *arm_spe_pmu)
{
@@ -189,12 +175,13 @@ struct auxtrace_record *arm_spe_recording_init(int *err,
}
sper->arm_spe_pmu = arm_spe_pmu;
+ sper->itr.pmu = arm_spe_pmu;
sper->itr.recording_options = arm_spe_recording_options;
sper->itr.info_priv_size = arm_spe_info_priv_size;
sper->itr.info_fill = arm_spe_info_fill;
sper->itr.free = arm_spe_recording_free;
sper->itr.reference = arm_spe_reference;
- sper->itr.read_finish = arm_spe_read_finish;
+ sper->itr.read_finish = auxtrace_record__read_finish;
sper->itr.alignment = 0;
*err = 0;
diff --git a/tools/perf/arch/arm64/util/header.c b/tools/perf/arch/arm64/util/header.c
index a32e4b72a98f..d730666ab95d 100644
--- a/tools/perf/arch/arm64/util/header.c
+++ b/tools/perf/arch/arm64/util/header.c
@@ -1,8 +1,10 @@
#include <stdio.h>
#include <stdlib.h>
#include <perf/cpumap.h>
+#include <util/cpumap.h>
#include <internal/cpumap.h>
#include <api/fs/fs.h>
+#include <errno.h>
#include "debug.h"
#include "header.h"
@@ -12,26 +14,21 @@
#define MIDR_VARIANT_SHIFT 20
#define MIDR_VARIANT_MASK (0xf << MIDR_VARIANT_SHIFT)
-char *get_cpuid_str(struct perf_pmu *pmu)
+static int _get_cpuid(char *buf, size_t sz, struct perf_cpu_map *cpus)
{
- char *buf = NULL;
- char path[PATH_MAX];
const char *sysfs = sysfs__mountpoint();
- int cpu;
u64 midr = 0;
- struct perf_cpu_map *cpus;
- FILE *file;
+ int cpu;
- if (!sysfs || !pmu || !pmu->cpus)
- return NULL;
+ if (!sysfs || sz < MIDR_SIZE)
+ return EINVAL;
- buf = malloc(MIDR_SIZE);
- if (!buf)
- return NULL;
+ cpus = perf_cpu_map__get(cpus);
- /* read midr from list of cpus mapped to this pmu */
- cpus = perf_cpu_map__get(pmu->cpus);
for (cpu = 0; cpu < perf_cpu_map__nr(cpus); cpu++) {
+ char path[PATH_MAX];
+ FILE *file;
+
scnprintf(path, PATH_MAX, "%s/devices/system/cpu/cpu%d"MIDR,
sysfs, cpus->map[cpu]);
@@ -57,12 +54,48 @@ char *get_cpuid_str(struct perf_pmu *pmu)
break;
}
- if (!midr) {
+ perf_cpu_map__put(cpus);
+
+ if (!midr)
+ return EINVAL;
+
+ return 0;
+}
+
+int get_cpuid(char *buf, size_t sz)
+{
+ struct perf_cpu_map *cpus = perf_cpu_map__new(NULL);
+ int ret;
+
+ if (!cpus)
+ return EINVAL;
+
+ ret = _get_cpuid(buf, sz, cpus);
+
+ perf_cpu_map__put(cpus);
+
+ return ret;
+}
+
+char *get_cpuid_str(struct perf_pmu *pmu)
+{
+ char *buf = NULL;
+ int res;
+
+ if (!pmu || !pmu->cpus)
+ return NULL;
+
+ buf = malloc(MIDR_SIZE);
+ if (!buf)
+ return NULL;
+
+ /* read midr from list of cpus mapped to this pmu */
+ res = _get_cpuid(buf, MIDR_SIZE, pmu->cpus);
+ if (res) {
pr_err("failed to get cpuid string for PMU %s\n", pmu->name);
free(buf);
buf = NULL;
}
- perf_cpu_map__put(cpus);
return buf;
}
diff --git a/tools/perf/arch/arm64/util/machine.c b/tools/perf/arch/arm64/util/machine.c
new file mode 100644
index 000000000000..d41b27e781d3
--- /dev/null
+++ b/tools/perf/arch/arm64/util/machine.c
@@ -0,0 +1,27 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#include <stdio.h>
+#include <string.h>
+#include "debug.h"
+#include "symbol.h"
+
+/* On arm64, kernel text segment start at high memory address,
+ * for example 0xffff 0000 8xxx xxxx. Modules start at a low memory
+ * address, like 0xffff 0000 00ax xxxx. When only samll amount of
+ * memory is used by modules, gap between end of module's text segment
+ * and start of kernel text segment may be reach 2G.
+ * Therefore do not fill this gap and do not assign it to the kernel dso map.
+ */
+
+#define SYMBOL_LIMIT (1 << 12) /* 4K */
+
+void arch__symbols__fixup_end(struct symbol *p, struct symbol *c)
+{
+ if ((strchr(p->name, '[') && strchr(c->name, '[') == NULL) ||
+ (strchr(p->name, '[') == NULL && strchr(c->name, '[')))
+ /* Limit range of last symbol in module and kernel */
+ p->end += SYMBOL_LIMIT;
+ else
+ p->end = c->start;
+ pr_debug4("%s sym:%s end:%#lx\n", __func__, p->name, p->end);
+}
diff --git a/tools/perf/arch/arm64/util/perf_regs.c b/tools/perf/arch/arm64/util/perf_regs.c
index 2864e2e3776d..2833e101a7c6 100644
--- a/tools/perf/arch/arm64/util/perf_regs.c
+++ b/tools/perf/arch/arm64/util/perf_regs.c
@@ -1,5 +1,5 @@
// SPDX-License-Identifier: GPL-2.0
-#include "../../util/perf_regs.h"
+#include "../../../util/perf_regs.h"
const struct sample_reg sample_reg_masks[] = {
SMPL_REG_END
diff --git a/tools/perf/arch/arm64/util/sym-handling.c b/tools/perf/arch/arm64/util/sym-handling.c
deleted file mode 100644
index 8dfa3e5229f1..000000000000
--- a/tools/perf/arch/arm64/util/sym-handling.c
+++ /dev/null
@@ -1,19 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0-only
-/*
- *
- * Copyright (C) 2015 Naveen N. Rao, IBM Corporation
- */
-
-#include "symbol.h" // for the elf__needs_adjust_symbols() prototype
-#include <stdbool.h>
-
-#ifdef HAVE_LIBELF_SUPPORT
-#include <gelf.h>
-
-bool elf__needs_adjust_symbols(GElf_Ehdr ehdr)
-{
- return ehdr.e_type == ET_EXEC ||
- ehdr.e_type == ET_REL ||
- ehdr.e_type == ET_DYN;
-}
-#endif
diff --git a/tools/perf/arch/powerpc/entry/syscalls/syscall.tbl b/tools/perf/arch/powerpc/entry/syscalls/syscall.tbl
index 43f736ed47f2..35b61bfc1b1a 100644
--- a/tools/perf/arch/powerpc/entry/syscalls/syscall.tbl
+++ b/tools/perf/arch/powerpc/entry/syscalls/syscall.tbl
@@ -517,3 +517,5 @@
433 common fspick sys_fspick
434 common pidfd_open sys_pidfd_open
435 nospu clone3 ppc_clone3
+437 common openat2 sys_openat2
+438 common pidfd_getfd sys_pidfd_getfd
diff --git a/tools/perf/arch/powerpc/util/Build b/tools/perf/arch/powerpc/util/Build
index 7cf0b8803097..e5c9504f8586 100644
--- a/tools/perf/arch/powerpc/util/Build
+++ b/tools/perf/arch/powerpc/util/Build
@@ -1,5 +1,4 @@
perf-y += header.o
-perf-y += sym-handling.o
perf-y += kvm-stat.o
perf-y += perf_regs.o
perf-y += mem-events.o
diff --git a/tools/perf/arch/powerpc/util/perf_regs.c b/tools/perf/arch/powerpc/util/perf_regs.c
index e9c436eeffc9..0a5242900248 100644
--- a/tools/perf/arch/powerpc/util/perf_regs.c
+++ b/tools/perf/arch/powerpc/util/perf_regs.c
@@ -4,8 +4,8 @@
#include <regex.h>
#include <linux/zalloc.h>
-#include "../../util/perf_regs.h"
-#include "../../util/debug.h"
+#include "../../../util/perf_regs.h"
+#include "../../../util/debug.h"
#include <linux/kernel.h>
diff --git a/tools/perf/arch/powerpc/util/sym-handling.c b/tools/perf/arch/powerpc/util/sym-handling.c
index abb7a12d8f93..0856b32f9e08 100644
--- a/tools/perf/arch/powerpc/util/sym-handling.c
+++ b/tools/perf/arch/powerpc/util/sym-handling.c
@@ -10,16 +10,6 @@
#include "probe-event.h"
#include "probe-file.h"
-#ifdef HAVE_LIBELF_SUPPORT
-bool elf__needs_adjust_symbols(GElf_Ehdr ehdr)
-{
- return ehdr.e_type == ET_EXEC ||
- ehdr.e_type == ET_REL ||
- ehdr.e_type == ET_DYN;
-}
-
-#endif
-
int arch__choose_best_symbol(struct symbol *syma,
struct symbol *symb __maybe_unused)
{
diff --git a/tools/perf/arch/x86/entry/syscalls/syscall_64.tbl b/tools/perf/arch/x86/entry/syscalls/syscall_64.tbl
index c29976eca4a8..37b844f839bc 100644
--- a/tools/perf/arch/x86/entry/syscalls/syscall_64.tbl
+++ b/tools/perf/arch/x86/entry/syscalls/syscall_64.tbl
@@ -8,355 +8,357 @@
#
# The abi is "common", "64" or "x32" for this file.
#
-0 common read __x64_sys_read
-1 common write __x64_sys_write
-2 common open __x64_sys_open
-3 common close __x64_sys_close
-4 common stat __x64_sys_newstat
-5 common fstat __x64_sys_newfstat
-6 common lstat __x64_sys_newlstat
-7 common poll __x64_sys_poll
-8 common lseek __x64_sys_lseek
-9 common mmap __x64_sys_mmap
-10 common mprotect __x64_sys_mprotect
-11 common munmap __x64_sys_munmap
-12 common brk __x64_sys_brk
-13 64 rt_sigaction __x64_sys_rt_sigaction
-14 common rt_sigprocmask __x64_sys_rt_sigprocmask
-15 64 rt_sigreturn __x64_sys_rt_sigreturn/ptregs
-16 64 ioctl __x64_sys_ioctl
-17 common pread64 __x64_sys_pread64
-18 common pwrite64 __x64_sys_pwrite64
-19 64 readv __x64_sys_readv
-20 64 writev __x64_sys_writev
-21 common access __x64_sys_access
-22 common pipe __x64_sys_pipe
-23 common select __x64_sys_select
-24 common sched_yield __x64_sys_sched_yield
-25 common mremap __x64_sys_mremap
-26 common msync __x64_sys_msync
-27 common mincore __x64_sys_mincore
-28 common madvise __x64_sys_madvise
-29 common shmget __x64_sys_shmget
-30 common shmat __x64_sys_shmat
-31 common shmctl __x64_sys_shmctl
-32 common dup __x64_sys_dup
-33 common dup2 __x64_sys_dup2
-34 common pause __x64_sys_pause
-35 common nanosleep __x64_sys_nanosleep
-36 common getitimer __x64_sys_getitimer
-37 common alarm __x64_sys_alarm
-38 common setitimer __x64_sys_setitimer
-39 common getpid __x64_sys_getpid
-40 common sendfile __x64_sys_sendfile64
-41 common socket __x64_sys_socket
-42 common connect __x64_sys_connect
-43 common accept __x64_sys_accept
-44 common sendto __x64_sys_sendto
-45 64 recvfrom __x64_sys_recvfrom
-46 64 sendmsg __x64_sys_sendmsg
-47 64 recvmsg __x64_sys_recvmsg
-48 common shutdown __x64_sys_shutdown
-49 common bind __x64_sys_bind
-50 common listen __x64_sys_listen
-51 common getsockname __x64_sys_getsockname
-52 common getpeername __x64_sys_getpeername
-53 common socketpair __x64_sys_socketpair
-54 64 setsockopt __x64_sys_setsockopt
-55 64 getsockopt __x64_sys_getsockopt
-56 common clone __x64_sys_clone/ptregs
-57 common fork __x64_sys_fork/ptregs
-58 common vfork __x64_sys_vfork/ptregs
-59 64 execve __x64_sys_execve/ptregs
-60 common exit __x64_sys_exit
-61 common wait4 __x64_sys_wait4
-62 common kill __x64_sys_kill
-63 common uname __x64_sys_newuname
-64 common semget __x64_sys_semget
-65 common semop __x64_sys_semop
-66 common semctl __x64_sys_semctl
-67 common shmdt __x64_sys_shmdt
-68 common msgget __x64_sys_msgget
-69 common msgsnd __x64_sys_msgsnd
-70 common msgrcv __x64_sys_msgrcv
-71 common msgctl __x64_sys_msgctl
-72 common fcntl __x64_sys_fcntl
-73 common flock __x64_sys_flock
-74 common fsync __x64_sys_fsync
-75 common fdatasync __x64_sys_fdatasync
-76 common truncate __x64_sys_truncate
-77 common ftruncate __x64_sys_ftruncate
-78 common getdents __x64_sys_getdents
-79 common getcwd __x64_sys_getcwd
-80 common chdir __x64_sys_chdir
-81 common fchdir __x64_sys_fchdir
-82 common rename __x64_sys_rename
-83 common mkdir __x64_sys_mkdir
-84 common rmdir __x64_sys_rmdir
-85 common creat __x64_sys_creat
-86 common link __x64_sys_link
-87 common unlink __x64_sys_unlink
-88 common symlink __x64_sys_symlink
-89 common readlink __x64_sys_readlink
-90 common chmod __x64_sys_chmod
-91 common fchmod __x64_sys_fchmod
-92 common chown __x64_sys_chown
-93 common fchown __x64_sys_fchown
-94 common lchown __x64_sys_lchown
-95 common umask __x64_sys_umask
-96 common gettimeofday __x64_sys_gettimeofday
-97 common getrlimit __x64_sys_getrlimit
-98 common getrusage __x64_sys_getrusage
-99 common sysinfo __x64_sys_sysinfo
-100 common times __x64_sys_times
-101 64 ptrace __x64_sys_ptrace
-102 common getuid __x64_sys_getuid
-103 common syslog __x64_sys_syslog
-104 common getgid __x64_sys_getgid
-105 common setuid __x64_sys_setuid
-106 common setgid __x64_sys_setgid
-107 common geteuid __x64_sys_geteuid
-108 common getegid __x64_sys_getegid
-109 common setpgid __x64_sys_setpgid
-110 common getppid __x64_sys_getppid
-111 common getpgrp __x64_sys_getpgrp
-112 common setsid __x64_sys_setsid
-113 common setreuid __x64_sys_setreuid
-114 common setregid __x64_sys_setregid
-115 common getgroups __x64_sys_getgroups
-116 common setgroups __x64_sys_setgroups
-117 common setresuid __x64_sys_setresuid
-118 common getresuid __x64_sys_getresuid
-119 common setresgid __x64_sys_setresgid
-120 common getresgid __x64_sys_getresgid
-121 common getpgid __x64_sys_getpgid
-122 common setfsuid __x64_sys_setfsuid
-123 common setfsgid __x64_sys_setfsgid
-124 common getsid __x64_sys_getsid
-125 common capget __x64_sys_capget
-126 common capset __x64_sys_capset
-127 64 rt_sigpending __x64_sys_rt_sigpending
-128 64 rt_sigtimedwait __x64_sys_rt_sigtimedwait
-129 64 rt_sigqueueinfo __x64_sys_rt_sigqueueinfo
-130 common rt_sigsuspend __x64_sys_rt_sigsuspend
-131 64 sigaltstack __x64_sys_sigaltstack
-132 common utime __x64_sys_utime
-133 common mknod __x64_sys_mknod
+0 common read sys_read
+1 common write sys_write
+2 common open sys_open
+3 common close sys_close
+4 common stat sys_newstat
+5 common fstat sys_newfstat
+6 common lstat sys_newlstat
+7 common poll sys_poll
+8 common lseek sys_lseek
+9 common mmap sys_mmap
+10 common mprotect sys_mprotect
+11 common munmap sys_munmap
+12 common brk sys_brk
+13 64 rt_sigaction sys_rt_sigaction
+14 common rt_sigprocmask sys_rt_sigprocmask
+15 64 rt_sigreturn sys_rt_sigreturn
+16 64 ioctl sys_ioctl
+17 common pread64 sys_pread64
+18 common pwrite64 sys_pwrite64
+19 64 readv sys_readv
+20 64 writev sys_writev
+21 common access sys_access
+22 common pipe sys_pipe
+23 common select sys_select
+24 common sched_yield sys_sched_yield
+25 common mremap sys_mremap
+26 common msync sys_msync
+27 common mincore sys_mincore
+28 common madvise sys_madvise
+29 common shmget sys_shmget
+30 common shmat sys_shmat
+31 common shmctl sys_shmctl
+32 common dup sys_dup
+33 common dup2 sys_dup2
+34 common pause sys_pause
+35 common nanosleep sys_nanosleep
+36 common getitimer sys_getitimer
+37 common alarm sys_alarm
+38 common setitimer sys_setitimer
+39 common getpid sys_getpid
+40 common sendfile sys_sendfile64
+41 common socket sys_socket
+42 common connect sys_connect
+43 common accept sys_accept
+44 common sendto sys_sendto
+45 64 recvfrom sys_recvfrom
+46 64 sendmsg sys_sendmsg
+47 64 recvmsg sys_recvmsg
+48 common shutdown sys_shutdown
+49 common bind sys_bind
+50 common listen sys_listen
+51 common getsockname sys_getsockname
+52 common getpeername sys_getpeername
+53 common socketpair sys_socketpair
+54 64 setsockopt sys_setsockopt
+55 64 getsockopt sys_getsockopt
+56 common clone sys_clone
+57 common fork sys_fork
+58 common vfork sys_vfork
+59 64 execve sys_execve
+60 common exit sys_exit
+61 common wait4 sys_wait4
+62 common kill sys_kill
+63 common uname sys_newuname
+64 common semget sys_semget
+65 common semop sys_semop
+66 common semctl sys_semctl
+67 common shmdt sys_shmdt
+68 common msgget sys_msgget
+69 common msgsnd sys_msgsnd
+70 common msgrcv sys_msgrcv
+71 common msgctl sys_msgctl
+72 common fcntl sys_fcntl
+73 common flock sys_flock
+74 common fsync sys_fsync
+75 common fdatasync sys_fdatasync
+76 common truncate sys_truncate
+77 common ftruncate sys_ftruncate
+78 common getdents sys_getdents
+79 common getcwd sys_getcwd
+80 common chdir sys_chdir
+81 common fchdir sys_fchdir
+82 common rename sys_rename
+83 common mkdir sys_mkdir
+84 common rmdir sys_rmdir
+85 common creat sys_creat
+86 common link sys_link
+87 common unlink sys_unlink
+88 common symlink sys_symlink
+89 common readlink sys_readlink
+90 common chmod sys_chmod
+91 common fchmod sys_fchmod
+92 common chown sys_chown
+93 common fchown sys_fchown
+94 common lchown sys_lchown
+95 common umask sys_umask
+96 common gettimeofday sys_gettimeofday
+97 common getrlimit sys_getrlimit
+98 common getrusage sys_getrusage
+99 common sysinfo sys_sysinfo
+100 common times sys_times
+101 64 ptrace sys_ptrace
+102 common getuid sys_getuid
+103 common syslog sys_syslog
+104 common getgid sys_getgid
+105 common setuid sys_setuid
+106 common setgid sys_setgid
+107 common geteuid sys_geteuid
+108 common getegid sys_getegid
+109 common setpgid sys_setpgid
+110 common getppid sys_getppid
+111 common getpgrp sys_getpgrp
+112 common setsid sys_setsid
+113 common setreuid sys_setreuid
+114 common setregid sys_setregid
+115 common getgroups sys_getgroups
+116 common setgroups sys_setgroups
+117 common setresuid sys_setresuid
+118 common getresuid sys_getresuid
+119 common setresgid sys_setresgid
+120 common getresgid sys_getresgid
+121 common getpgid sys_getpgid
+122 common setfsuid sys_setfsuid
+123 common setfsgid sys_setfsgid
+124 common getsid sys_getsid
+125 common capget sys_capget
+126 common capset sys_capset
+127 64 rt_sigpending sys_rt_sigpending
+128 64 rt_sigtimedwait sys_rt_sigtimedwait
+129 64 rt_sigqueueinfo sys_rt_sigqueueinfo
+130 common rt_sigsuspend sys_rt_sigsuspend
+131 64 sigaltstack sys_sigaltstack
+132 common utime sys_utime
+133 common mknod sys_mknod
134 64 uselib
-135 common personality __x64_sys_personality
-136 common ustat __x64_sys_ustat
-137 common statfs __x64_sys_statfs
-138 common fstatfs __x64_sys_fstatfs
-139 common sysfs __x64_sys_sysfs
-140 common getpriority __x64_sys_getpriority
-141 common setpriority __x64_sys_setpriority
-142 common sched_setparam __x64_sys_sched_setparam
-143 common sched_getparam __x64_sys_sched_getparam
-144 common sched_setscheduler __x64_sys_sched_setscheduler
-145 common sched_getscheduler __x64_sys_sched_getscheduler
-146 common sched_get_priority_max __x64_sys_sched_get_priority_max
-147 common sched_get_priority_min __x64_sys_sched_get_priority_min
-148 common sched_rr_get_interval __x64_sys_sched_rr_get_interval
-149 common mlock __x64_sys_mlock
-150 common munlock __x64_sys_munlock
-151 common mlockall __x64_sys_mlockall
-152 common munlockall __x64_sys_munlockall
-153 common vhangup __x64_sys_vhangup
-154 common modify_ldt __x64_sys_modify_ldt
-155 common pivot_root __x64_sys_pivot_root
-156 64 _sysctl __x64_sys_sysctl
-157 common prctl __x64_sys_prctl
-158 common arch_prctl __x64_sys_arch_prctl
-159 common adjtimex __x64_sys_adjtimex
-160 common setrlimit __x64_sys_setrlimit
-161 common chroot __x64_sys_chroot
-162 common sync __x64_sys_sync
-163 common acct __x64_sys_acct
-164 common settimeofday __x64_sys_settimeofday
-165 common mount __x64_sys_mount
-166 common umount2 __x64_sys_umount
-167 common swapon __x64_sys_swapon
-168 common swapoff __x64_sys_swapoff
-169 common reboot __x64_sys_reboot
-170 common sethostname __x64_sys_sethostname
-171 common setdomainname __x64_sys_setdomainname
-172 common iopl __x64_sys_iopl/ptregs
-173 common ioperm __x64_sys_ioperm
+135 common personality sys_personality
+136 common ustat sys_ustat
+137 common statfs sys_statfs
+138 common fstatfs sys_fstatfs
+139 common sysfs sys_sysfs
+140 common getpriority sys_getpriority
+141 common setpriority sys_setpriority
+142 common sched_setparam sys_sched_setparam
+143 common sched_getparam sys_sched_getparam
+144 common sched_setscheduler sys_sched_setscheduler
+145 common sched_getscheduler sys_sched_getscheduler
+146 common sched_get_priority_max sys_sched_get_priority_max
+147 common sched_get_priority_min sys_sched_get_priority_min
+148 common sched_rr_get_interval sys_sched_rr_get_interval
+149 common mlock sys_mlock
+150 common munlock sys_munlock
+151 common mlockall sys_mlockall
+152 common munlockall sys_munlockall
+153 common vhangup sys_vhangup
+154 common modify_ldt sys_modify_ldt
+155 common pivot_root sys_pivot_root
+156 64 _sysctl sys_sysctl
+157 common prctl sys_prctl
+158 common arch_prctl sys_arch_prctl
+159 common adjtimex sys_adjtimex
+160 common setrlimit sys_setrlimit
+161 common chroot sys_chroot
+162 common sync sys_sync
+163 common acct sys_acct
+164 common settimeofday sys_settimeofday
+165 common mount sys_mount
+166 common umount2 sys_umount
+167 common swapon sys_swapon
+168 common swapoff sys_swapoff
+169 common reboot sys_reboot
+170 common sethostname sys_sethostname
+171 common setdomainname sys_setdomainname
+172 common iopl sys_iopl
+173 common ioperm sys_ioperm
174 64 create_module
-175 common init_module __x64_sys_init_module
-176 common delete_module __x64_sys_delete_module
+175 common init_module sys_init_module
+176 common delete_module sys_delete_module
177 64 get_kernel_syms
178 64 query_module
-179 common quotactl __x64_sys_quotactl
+179 common quotactl sys_quotactl
180 64 nfsservctl
181 common getpmsg
182 common putpmsg
183 common afs_syscall
184 common tuxcall
185 common security
-186 common gettid __x64_sys_gettid
-187 common readahead __x64_sys_readahead
-188 common setxattr __x64_sys_setxattr
-189 common lsetxattr __x64_sys_lsetxattr
-190 common fsetxattr __x64_sys_fsetxattr
-191 common getxattr __x64_sys_getxattr
-192 common lgetxattr __x64_sys_lgetxattr
-193 common fgetxattr __x64_sys_fgetxattr
-194 common listxattr __x64_sys_listxattr
-195 common llistxattr __x64_sys_llistxattr
-196 common flistxattr __x64_sys_flistxattr
-197 common removexattr __x64_sys_removexattr
-198 common lremovexattr __x64_sys_lremovexattr
-199 common fremovexattr __x64_sys_fremovexattr
-200 common tkill __x64_sys_tkill
-201 common time __x64_sys_time
-202 common futex __x64_sys_futex
-203 common sched_setaffinity __x64_sys_sched_setaffinity
-204 common sched_getaffinity __x64_sys_sched_getaffinity
+186 common gettid sys_gettid
+187 common readahead sys_readahead
+188 common setxattr sys_setxattr
+189 common lsetxattr sys_lsetxattr
+190 common fsetxattr sys_fsetxattr
+191 common getxattr sys_getxattr
+192 common lgetxattr sys_lgetxattr
+193 common fgetxattr sys_fgetxattr
+194 common listxattr sys_listxattr
+195 common llistxattr sys_llistxattr
+196 common flistxattr sys_flistxattr
+197 common removexattr sys_removexattr
+198 common lremovexattr sys_lremovexattr
+199 common fremovexattr sys_fremovexattr
+200 common tkill sys_tkill
+201 common time sys_time
+202 common futex sys_futex
+203 common sched_setaffinity sys_sched_setaffinity
+204 common sched_getaffinity sys_sched_getaffinity
205 64 set_thread_area
-206 64 io_setup __x64_sys_io_setup
-207 common io_destroy __x64_sys_io_destroy
-208 common io_getevents __x64_sys_io_getevents
-209 64 io_submit __x64_sys_io_submit
-210 common io_cancel __x64_sys_io_cancel
+206 64 io_setup sys_io_setup
+207 common io_destroy sys_io_destroy
+208 common io_getevents sys_io_getevents
+209 64 io_submit sys_io_submit
+210 common io_cancel sys_io_cancel
211 64 get_thread_area
-212 common lookup_dcookie __x64_sys_lookup_dcookie
-213 common epoll_create __x64_sys_epoll_create
+212 common lookup_dcookie sys_lookup_dcookie
+213 common epoll_create sys_epoll_create
214 64 epoll_ctl_old
215 64 epoll_wait_old
-216 common remap_file_pages __x64_sys_remap_file_pages
-217 common getdents64 __x64_sys_getdents64
-218 common set_tid_address __x64_sys_set_tid_address
-219 common restart_syscall __x64_sys_restart_syscall
-220 common semtimedop __x64_sys_semtimedop
-221 common fadvise64 __x64_sys_fadvise64
-222 64 timer_create __x64_sys_timer_create
-223 common timer_settime __x64_sys_timer_settime
-224 common timer_gettime __x64_sys_timer_gettime
-225 common timer_getoverrun __x64_sys_timer_getoverrun
-226 common timer_delete __x64_sys_timer_delete
-227 common clock_settime __x64_sys_clock_settime
-228 common clock_gettime __x64_sys_clock_gettime
-229 common clock_getres __x64_sys_clock_getres
-230 common clock_nanosleep __x64_sys_clock_nanosleep
-231 common exit_group __x64_sys_exit_group
-232 common epoll_wait __x64_sys_epoll_wait
-233 common epoll_ctl __x64_sys_epoll_ctl
-234 common tgkill __x64_sys_tgkill
-235 common utimes __x64_sys_utimes
+216 common remap_file_pages sys_remap_file_pages
+217 common getdents64 sys_getdents64
+218 common set_tid_address sys_set_tid_address
+219 common restart_syscall sys_restart_syscall
+220 common semtimedop sys_semtimedop
+221 common fadvise64 sys_fadvise64
+222 64 timer_create sys_timer_create
+223 common timer_settime sys_timer_settime
+224 common timer_gettime sys_timer_gettime
+225 common timer_getoverrun sys_timer_getoverrun
+226 common timer_delete sys_timer_delete
+227 common clock_settime sys_clock_settime
+228 common clock_gettime sys_clock_gettime
+229 common clock_getres sys_clock_getres
+230 common clock_nanosleep sys_clock_nanosleep
+231 common exit_group sys_exit_group
+232 common epoll_wait sys_epoll_wait
+233 common epoll_ctl sys_epoll_ctl
+234 common tgkill sys_tgkill
+235 common utimes sys_utimes
236 64 vserver
-237 common mbind __x64_sys_mbind
-238 common set_mempolicy __x64_sys_set_mempolicy
-239 common get_mempolicy __x64_sys_get_mempolicy
-240 common mq_open __x64_sys_mq_open
-241 common mq_unlink __x64_sys_mq_unlink
-242 common mq_timedsend __x64_sys_mq_timedsend
-243 common mq_timedreceive __x64_sys_mq_timedreceive
-244 64 mq_notify __x64_sys_mq_notify
-245 common mq_getsetattr __x64_sys_mq_getsetattr
-246 64 kexec_load __x64_sys_kexec_load
-247 64 waitid __x64_sys_waitid
-248 common add_key __x64_sys_add_key
-249 common request_key __x64_sys_request_key
-250 common keyctl __x64_sys_keyctl
-251 common ioprio_set __x64_sys_ioprio_set
-252 common ioprio_get __x64_sys_ioprio_get
-253 common inotify_init __x64_sys_inotify_init
-254 common inotify_add_watch __x64_sys_inotify_add_watch
-255 common inotify_rm_watch __x64_sys_inotify_rm_watch
-256 common migrate_pages __x64_sys_migrate_pages
-257 common openat __x64_sys_openat
-258 common mkdirat __x64_sys_mkdirat
-259 common mknodat __x64_sys_mknodat
-260 common fchownat __x64_sys_fchownat
-261 common futimesat __x64_sys_futimesat
-262 common newfstatat __x64_sys_newfstatat
-263 common unlinkat __x64_sys_unlinkat
-264 common renameat __x64_sys_renameat
-265 common linkat __x64_sys_linkat
-266 common symlinkat __x64_sys_symlinkat
-267 common readlinkat __x64_sys_readlinkat
-268 common fchmodat __x64_sys_fchmodat
-269 common faccessat __x64_sys_faccessat
-270 common pselect6 __x64_sys_pselect6
-271 common ppoll __x64_sys_ppoll
-272 common unshare __x64_sys_unshare
-273 64 set_robust_list __x64_sys_set_robust_list
-274 64 get_robust_list __x64_sys_get_robust_list
-275 common splice __x64_sys_splice
-276 common tee __x64_sys_tee
-277 common sync_file_range __x64_sys_sync_file_range
-278 64 vmsplice __x64_sys_vmsplice
-279 64 move_pages __x64_sys_move_pages
-280 common utimensat __x64_sys_utimensat
-281 common epoll_pwait __x64_sys_epoll_pwait
-282 common signalfd __x64_sys_signalfd
-283 common timerfd_create __x64_sys_timerfd_create
-284 common eventfd __x64_sys_eventfd
-285 common fallocate __x64_sys_fallocate
-286 common timerfd_settime __x64_sys_timerfd_settime
-287 common timerfd_gettime __x64_sys_timerfd_gettime
-288 common accept4 __x64_sys_accept4
-289 common signalfd4 __x64_sys_signalfd4
-290 common eventfd2 __x64_sys_eventfd2
-291 common epoll_create1 __x64_sys_epoll_create1
-292 common dup3 __x64_sys_dup3
-293 common pipe2 __x64_sys_pipe2
-294 common inotify_init1 __x64_sys_inotify_init1
-295 64 preadv __x64_sys_preadv
-296 64 pwritev __x64_sys_pwritev
-297 64 rt_tgsigqueueinfo __x64_sys_rt_tgsigqueueinfo
-298 common perf_event_open __x64_sys_perf_event_open
-299 64 recvmmsg __x64_sys_recvmmsg
-300 common fanotify_init __x64_sys_fanotify_init
-301 common fanotify_mark __x64_sys_fanotify_mark
-302 common prlimit64 __x64_sys_prlimit64
-303 common name_to_handle_at __x64_sys_name_to_handle_at
-304 common open_by_handle_at __x64_sys_open_by_handle_at
-305 common clock_adjtime __x64_sys_clock_adjtime
-306 common syncfs __x64_sys_syncfs
-307 64 sendmmsg __x64_sys_sendmmsg
-308 common setns __x64_sys_setns
-309 common getcpu __x64_sys_getcpu
-310 64 process_vm_readv __x64_sys_process_vm_readv
-311 64 process_vm_writev __x64_sys_process_vm_writev
-312 common kcmp __x64_sys_kcmp
-313 common finit_module __x64_sys_finit_module
-314 common sched_setattr __x64_sys_sched_setattr
-315 common sched_getattr __x64_sys_sched_getattr
-316 common renameat2 __x64_sys_renameat2
-317 common seccomp __x64_sys_seccomp
-318 common getrandom __x64_sys_getrandom
-319 common memfd_create __x64_sys_memfd_create
-320 common kexec_file_load __x64_sys_kexec_file_load
-321 common bpf __x64_sys_bpf
-322 64 execveat __x64_sys_execveat/ptregs
-323 common userfaultfd __x64_sys_userfaultfd
-324 common membarrier __x64_sys_membarrier
-325 common mlock2 __x64_sys_mlock2
-326 common copy_file_range __x64_sys_copy_file_range
-327 64 preadv2 __x64_sys_preadv2
-328 64 pwritev2 __x64_sys_pwritev2
-329 common pkey_mprotect __x64_sys_pkey_mprotect
-330 common pkey_alloc __x64_sys_pkey_alloc
-331 common pkey_free __x64_sys_pkey_free
-332 common statx __x64_sys_statx
-333 common io_pgetevents __x64_sys_io_pgetevents
-334 common rseq __x64_sys_rseq
+237 common mbind sys_mbind
+238 common set_mempolicy sys_set_mempolicy
+239 common get_mempolicy sys_get_mempolicy
+240 common mq_open sys_mq_open
+241 common mq_unlink sys_mq_unlink
+242 common mq_timedsend sys_mq_timedsend
+243 common mq_timedreceive sys_mq_timedreceive
+244 64 mq_notify sys_mq_notify
+245 common mq_getsetattr sys_mq_getsetattr
+246 64 kexec_load sys_kexec_load
+247 64 waitid sys_waitid
+248 common add_key sys_add_key
+249 common request_key sys_request_key
+250 common keyctl sys_keyctl
+251 common ioprio_set sys_ioprio_set
+252 common ioprio_get sys_ioprio_get
+253 common inotify_init sys_inotify_init
+254 common inotify_add_watch sys_inotify_add_watch
+255 common inotify_rm_watch sys_inotify_rm_watch
+256 common migrate_pages sys_migrate_pages
+257 common openat sys_openat
+258 common mkdirat sys_mkdirat
+259 common mknodat sys_mknodat
+260 common fchownat sys_fchownat
+261 common futimesat sys_futimesat
+262 common newfstatat sys_newfstatat
+263 common unlinkat sys_unlinkat
+264 common renameat sys_renameat
+265 common linkat sys_linkat
+266 common symlinkat sys_symlinkat
+267 common readlinkat sys_readlinkat
+268 common fchmodat sys_fchmodat
+269 common faccessat sys_faccessat
+270 common pselect6 sys_pselect6
+271 common ppoll sys_ppoll
+272 common unshare sys_unshare
+273 64 set_robust_list sys_set_robust_list
+274 64 get_robust_list sys_get_robust_list
+275 common splice sys_splice
+276 common tee sys_tee
+277 common sync_file_range sys_sync_file_range
+278 64 vmsplice sys_vmsplice
+279 64 move_pages sys_move_pages
+280 common utimensat sys_utimensat
+281 common epoll_pwait sys_epoll_pwait
+282 common signalfd sys_signalfd
+283 common timerfd_create sys_timerfd_create
+284 common eventfd sys_eventfd
+285 common fallocate sys_fallocate
+286 common timerfd_settime sys_timerfd_settime
+287 common timerfd_gettime sys_timerfd_gettime
+288 common accept4 sys_accept4
+289 common signalfd4 sys_signalfd4
+290 common eventfd2 sys_eventfd2
+291 common epoll_create1 sys_epoll_create1
+292 common dup3 sys_dup3
+293 common pipe2 sys_pipe2
+294 common inotify_init1 sys_inotify_init1
+295 64 preadv sys_preadv
+296 64 pwritev sys_pwritev
+297 64 rt_tgsigqueueinfo sys_rt_tgsigqueueinfo
+298 common perf_event_open sys_perf_event_open
+299 64 recvmmsg sys_recvmmsg
+300 common fanotify_init sys_fanotify_init
+301 common fanotify_mark sys_fanotify_mark
+302 common prlimit64 sys_prlimit64
+303 common name_to_handle_at sys_name_to_handle_at
+304 common open_by_handle_at sys_open_by_handle_at
+305 common clock_adjtime sys_clock_adjtime
+306 common syncfs sys_syncfs
+307 64 sendmmsg sys_sendmmsg
+308 common setns sys_setns
+309 common getcpu sys_getcpu
+310 64 process_vm_readv sys_process_vm_readv
+311 64 process_vm_writev sys_process_vm_writev
+312 common kcmp sys_kcmp
+313 common finit_module sys_finit_module
+314 common sched_setattr sys_sched_setattr
+315 common sched_getattr sys_sched_getattr
+316 common renameat2 sys_renameat2
+317 common seccomp sys_seccomp
+318 common getrandom sys_getrandom
+319 common memfd_create sys_memfd_create
+320 common kexec_file_load sys_kexec_file_load
+321 common bpf sys_bpf
+322 64 execveat sys_execveat
+323 common userfaultfd sys_userfaultfd
+324 common membarrier sys_membarrier
+325 common mlock2 sys_mlock2
+326 common copy_file_range sys_copy_file_range
+327 64 preadv2 sys_preadv2
+328 64 pwritev2 sys_pwritev2
+329 common pkey_mprotect sys_pkey_mprotect
+330 common pkey_alloc sys_pkey_alloc
+331 common pkey_free sys_pkey_free
+332 common statx sys_statx
+333 common io_pgetevents sys_io_pgetevents
+334 common rseq sys_rseq
# don't use numbers 387 through 423, add new calls after the last
# 'common' entry
-424 common pidfd_send_signal __x64_sys_pidfd_send_signal
-425 common io_uring_setup __x64_sys_io_uring_setup
-426 common io_uring_enter __x64_sys_io_uring_enter
-427 common io_uring_register __x64_sys_io_uring_register
-428 common open_tree __x64_sys_open_tree
-429 common move_mount __x64_sys_move_mount
-430 common fsopen __x64_sys_fsopen
-431 common fsconfig __x64_sys_fsconfig
-432 common fsmount __x64_sys_fsmount
-433 common fspick __x64_sys_fspick
-434 common pidfd_open __x64_sys_pidfd_open
-435 common clone3 __x64_sys_clone3/ptregs
+424 common pidfd_send_signal sys_pidfd_send_signal
+425 common io_uring_setup sys_io_uring_setup
+426 common io_uring_enter sys_io_uring_enter
+427 common io_uring_register sys_io_uring_register
+428 common open_tree sys_open_tree
+429 common move_mount sys_move_mount
+430 common fsopen sys_fsopen
+431 common fsconfig sys_fsconfig
+432 common fsmount sys_fsmount
+433 common fspick sys_fspick
+434 common pidfd_open sys_pidfd_open
+435 common clone3 sys_clone3
+437 common openat2 sys_openat2
+438 common pidfd_getfd sys_pidfd_getfd
#
# x32-specific system call numbers start at 512 to avoid cache impact
@@ -364,39 +366,39 @@
# on-the-fly for compat_sys_*() compatibility system calls if X86_X32
# is defined.
#
-512 x32 rt_sigaction __x32_compat_sys_rt_sigaction
-513 x32 rt_sigreturn sys32_x32_rt_sigreturn
-514 x32 ioctl __x32_compat_sys_ioctl
-515 x32 readv __x32_compat_sys_readv
-516 x32 writev __x32_compat_sys_writev
-517 x32 recvfrom __x32_compat_sys_recvfrom
-518 x32 sendmsg __x32_compat_sys_sendmsg
-519 x32 recvmsg __x32_compat_sys_recvmsg
-520 x32 execve __x32_compat_sys_execve/ptregs
-521 x32 ptrace __x32_compat_sys_ptrace
-522 x32 rt_sigpending __x32_compat_sys_rt_sigpending
-523 x32 rt_sigtimedwait __x32_compat_sys_rt_sigtimedwait_time64
-524 x32 rt_sigqueueinfo __x32_compat_sys_rt_sigqueueinfo
-525 x32 sigaltstack __x32_compat_sys_sigaltstack
-526 x32 timer_create __x32_compat_sys_timer_create
-527 x32 mq_notify __x32_compat_sys_mq_notify
-528 x32 kexec_load __x32_compat_sys_kexec_load
-529 x32 waitid __x32_compat_sys_waitid
-530 x32 set_robust_list __x32_compat_sys_set_robust_list
-531 x32 get_robust_list __x32_compat_sys_get_robust_list
-532 x32 vmsplice __x32_compat_sys_vmsplice
-533 x32 move_pages __x32_compat_sys_move_pages
-534 x32 preadv __x32_compat_sys_preadv64
-535 x32 pwritev __x32_compat_sys_pwritev64
-536 x32 rt_tgsigqueueinfo __x32_compat_sys_rt_tgsigqueueinfo
-537 x32 recvmmsg __x32_compat_sys_recvmmsg_time64
-538 x32 sendmmsg __x32_compat_sys_sendmmsg
-539 x32 process_vm_readv __x32_compat_sys_process_vm_readv
-540 x32 process_vm_writev __x32_compat_sys_process_vm_writev
-541 x32 setsockopt __x32_compat_sys_setsockopt
-542 x32 getsockopt __x32_compat_sys_getsockopt
-543 x32 io_setup __x32_compat_sys_io_setup
-544 x32 io_submit __x32_compat_sys_io_submit
-545 x32 execveat __x32_compat_sys_execveat/ptregs
-546 x32 preadv2 __x32_compat_sys_preadv64v2
-547 x32 pwritev2 __x32_compat_sys_pwritev64v2
+512 x32 rt_sigaction compat_sys_rt_sigaction
+513 x32 rt_sigreturn compat_sys_x32_rt_sigreturn
+514 x32 ioctl compat_sys_ioctl
+515 x32 readv compat_sys_readv
+516 x32 writev compat_sys_writev
+517 x32 recvfrom compat_sys_recvfrom
+518 x32 sendmsg compat_sys_sendmsg
+519 x32 recvmsg compat_sys_recvmsg
+520 x32 execve compat_sys_execve
+521 x32 ptrace compat_sys_ptrace
+522 x32 rt_sigpending compat_sys_rt_sigpending
+523 x32 rt_sigtimedwait compat_sys_rt_sigtimedwait_time64
+524 x32 rt_sigqueueinfo compat_sys_rt_sigqueueinfo
+525 x32 sigaltstack compat_sys_sigaltstack
+526 x32 timer_create compat_sys_timer_create
+527 x32 mq_notify compat_sys_mq_notify
+528 x32 kexec_load compat_sys_kexec_load
+529 x32 waitid compat_sys_waitid
+530 x32 set_robust_list compat_sys_set_robust_list
+531 x32 get_robust_list compat_sys_get_robust_list
+532 x32 vmsplice compat_sys_vmsplice
+533 x32 move_pages compat_sys_move_pages
+534 x32 preadv compat_sys_preadv64
+535 x32 pwritev compat_sys_pwritev64
+536 x32 rt_tgsigqueueinfo compat_sys_rt_tgsigqueueinfo
+537 x32 recvmmsg compat_sys_recvmmsg_time64
+538 x32 sendmmsg compat_sys_sendmmsg
+539 x32 process_vm_readv compat_sys_process_vm_readv
+540 x32 process_vm_writev compat_sys_process_vm_writev
+541 x32 setsockopt compat_sys_setsockopt
+542 x32 getsockopt compat_sys_getsockopt
+543 x32 io_setup compat_sys_io_setup
+544 x32 io_submit compat_sys_io_submit
+545 x32 execveat compat_sys_execveat
+546 x32 preadv2 compat_sys_preadv64v2
+547 x32 pwritev2 compat_sys_pwritev64v2
diff --git a/tools/perf/arch/x86/tests/insn-x86-dat-32.c b/tools/perf/arch/x86/tests/insn-x86-dat-32.c
index e6461abc9e7b..9708ae892061 100644
--- a/tools/perf/arch/x86/tests/insn-x86-dat-32.c
+++ b/tools/perf/arch/x86/tests/insn-x86-dat-32.c
@@ -2085,6 +2085,118 @@
"67 f3 0f 38 f8 1c \tenqcmds (%si),%bx",},
{{0x67, 0xf3, 0x0f, 0x38, 0xf8, 0x8c, 0x34, 0x12, }, 8, 0, "", "",
"67 f3 0f 38 f8 8c 34 12 \tenqcmds 0x1234(%si),%cx",},
+{{0xf3, 0x0f, 0xae, 0xe8, }, 4, 0, "", "",
+"f3 0f ae e8 \tincsspd %eax",},
+{{0x0f, 0xae, 0x28, }, 3, 0, "", "",
+"0f ae 28 \txrstor (%eax)",},
+{{0x0f, 0xae, 0x2d, 0x78, 0x56, 0x34, 0x12, }, 7, 0, "", "",
+"0f ae 2d 78 56 34 12 \txrstor 0x12345678",},
+{{0x0f, 0xae, 0xac, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 8, 0, "", "",
+"0f ae ac c8 78 56 34 12 \txrstor 0x12345678(%eax,%ecx,8)",},
+{{0x0f, 0xae, 0xe8, }, 3, 0, "", "",
+"0f ae e8 \tlfence ",},
+{{0xf3, 0x0f, 0x1e, 0xc8, }, 4, 0, "", "",
+"f3 0f 1e c8 \trdsspd %eax",},
+{{0xf3, 0x0f, 0x01, 0xea, }, 4, 0, "", "",
+"f3 0f 01 ea \tsaveprevssp ",},
+{{0xf3, 0x0f, 0x01, 0x28, }, 4, 0, "", "",
+"f3 0f 01 28 \trstorssp (%eax)",},
+{{0xf3, 0x0f, 0x01, 0x2d, 0x78, 0x56, 0x34, 0x12, }, 8, 0, "", "",
+"f3 0f 01 2d 78 56 34 12 \trstorssp 0x12345678",},
+{{0xf3, 0x0f, 0x01, 0xac, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 9, 0, "", "",
+"f3 0f 01 ac c8 78 56 34 12 \trstorssp 0x12345678(%eax,%ecx,8)",},
+{{0x0f, 0x38, 0xf6, 0x08, }, 4, 0, "", "",
+"0f 38 f6 08 \twrssd %ecx,(%eax)",},
+{{0x0f, 0x38, 0xf6, 0x15, 0x78, 0x56, 0x34, 0x12, }, 8, 0, "", "",
+"0f 38 f6 15 78 56 34 12 \twrssd %edx,0x12345678",},
+{{0x0f, 0x38, 0xf6, 0x94, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 9, 0, "", "",
+"0f 38 f6 94 c8 78 56 34 12 \twrssd %edx,0x12345678(%eax,%ecx,8)",},
+{{0x66, 0x0f, 0x38, 0xf5, 0x08, }, 5, 0, "", "",
+"66 0f 38 f5 08 \twrussd %ecx,(%eax)",},
+{{0x66, 0x0f, 0x38, 0xf5, 0x15, 0x78, 0x56, 0x34, 0x12, }, 9, 0, "", "",
+"66 0f 38 f5 15 78 56 34 12 \twrussd %edx,0x12345678",},
+{{0x66, 0x0f, 0x38, 0xf5, 0x94, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 10, 0, "", "",
+"66 0f 38 f5 94 c8 78 56 34 12 \twrussd %edx,0x12345678(%eax,%ecx,8)",},
+{{0xf3, 0x0f, 0x01, 0xe8, }, 4, 0, "", "",
+"f3 0f 01 e8 \tsetssbsy ",},
+{{0x0f, 0x01, 0xee, }, 3, 0, "", "",
+"0f 01 ee \trdpkru ",},
+{{0x0f, 0x01, 0xef, }, 3, 0, "", "",
+"0f 01 ef \twrpkru ",},
+{{0xf3, 0x0f, 0xae, 0x30, }, 4, 0, "", "",
+"f3 0f ae 30 \tclrssbsy (%eax)",},
+{{0xf3, 0x0f, 0xae, 0x35, 0x78, 0x56, 0x34, 0x12, }, 8, 0, "", "",
+"f3 0f ae 35 78 56 34 12 \tclrssbsy 0x12345678",},
+{{0xf3, 0x0f, 0xae, 0xb4, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 9, 0, "", "",
+"f3 0f ae b4 c8 78 56 34 12 \tclrssbsy 0x12345678(%eax,%ecx,8)",},
+{{0xf3, 0x0f, 0x1e, 0xfb, }, 4, 0, "", "",
+"f3 0f 1e fb \tendbr32 ",},
+{{0xf3, 0x0f, 0x1e, 0xfa, }, 4, 0, "", "",
+"f3 0f 1e fa \tendbr64 ",},
+{{0xff, 0xd0, }, 2, 0, "call", "indirect",
+"ff d0 \tcall *%eax",},
+{{0xff, 0x10, }, 2, 0, "call", "indirect",
+"ff 10 \tcall *(%eax)",},
+{{0xff, 0x15, 0x78, 0x56, 0x34, 0x12, }, 6, 0, "call", "indirect",
+"ff 15 78 56 34 12 \tcall *0x12345678",},
+{{0xff, 0x94, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 7, 0, "call", "indirect",
+"ff 94 c8 78 56 34 12 \tcall *0x12345678(%eax,%ecx,8)",},
+{{0xf2, 0xff, 0xd0, }, 3, 0, "call", "indirect",
+"f2 ff d0 \tbnd call *%eax",},
+{{0xf2, 0xff, 0x10, }, 3, 0, "call", "indirect",
+"f2 ff 10 \tbnd call *(%eax)",},
+{{0xf2, 0xff, 0x15, 0x78, 0x56, 0x34, 0x12, }, 7, 0, "call", "indirect",
+"f2 ff 15 78 56 34 12 \tbnd call *0x12345678",},
+{{0xf2, 0xff, 0x94, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 8, 0, "call", "indirect",
+"f2 ff 94 c8 78 56 34 12 \tbnd call *0x12345678(%eax,%ecx,8)",},
+{{0x3e, 0xff, 0xd0, }, 3, 0, "call", "indirect",
+"3e ff d0 \tnotrack call *%eax",},
+{{0x3e, 0xff, 0x10, }, 3, 0, "call", "indirect",
+"3e ff 10 \tnotrack call *(%eax)",},
+{{0x3e, 0xff, 0x15, 0x78, 0x56, 0x34, 0x12, }, 7, 0, "call", "indirect",
+"3e ff 15 78 56 34 12 \tnotrack call *0x12345678",},
+{{0x3e, 0xff, 0x94, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 8, 0, "call", "indirect",
+"3e ff 94 c8 78 56 34 12 \tnotrack call *0x12345678(%eax,%ecx,8)",},
+{{0x3e, 0xf2, 0xff, 0xd0, }, 4, 0, "call", "indirect",
+"3e f2 ff d0 \tnotrack bnd call *%eax",},
+{{0x3e, 0xf2, 0xff, 0x10, }, 4, 0, "call", "indirect",
+"3e f2 ff 10 \tnotrack bnd call *(%eax)",},
+{{0x3e, 0xf2, 0xff, 0x15, 0x78, 0x56, 0x34, 0x12, }, 8, 0, "call", "indirect",
+"3e f2 ff 15 78 56 34 12 \tnotrack bnd call *0x12345678",},
+{{0x3e, 0xf2, 0xff, 0x94, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 9, 0, "call", "indirect",
+"3e f2 ff 94 c8 78 56 34 12 \tnotrack bnd call *0x12345678(%eax,%ecx,8)",},
+{{0xff, 0xe0, }, 2, 0, "jmp", "indirect",
+"ff e0 \tjmp *%eax",},
+{{0xff, 0x20, }, 2, 0, "jmp", "indirect",
+"ff 20 \tjmp *(%eax)",},
+{{0xff, 0x25, 0x78, 0x56, 0x34, 0x12, }, 6, 0, "jmp", "indirect",
+"ff 25 78 56 34 12 \tjmp *0x12345678",},
+{{0xff, 0xa4, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 7, 0, "jmp", "indirect",
+"ff a4 c8 78 56 34 12 \tjmp *0x12345678(%eax,%ecx,8)",},
+{{0xf2, 0xff, 0xe0, }, 3, 0, "jmp", "indirect",
+"f2 ff e0 \tbnd jmp *%eax",},
+{{0xf2, 0xff, 0x20, }, 3, 0, "jmp", "indirect",
+"f2 ff 20 \tbnd jmp *(%eax)",},
+{{0xf2, 0xff, 0x25, 0x78, 0x56, 0x34, 0x12, }, 7, 0, "jmp", "indirect",
+"f2 ff 25 78 56 34 12 \tbnd jmp *0x12345678",},
+{{0xf2, 0xff, 0xa4, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 8, 0, "jmp", "indirect",
+"f2 ff a4 c8 78 56 34 12 \tbnd jmp *0x12345678(%eax,%ecx,8)",},
+{{0x3e, 0xff, 0xe0, }, 3, 0, "jmp", "indirect",
+"3e ff e0 \tnotrack jmp *%eax",},
+{{0x3e, 0xff, 0x20, }, 3, 0, "jmp", "indirect",
+"3e ff 20 \tnotrack jmp *(%eax)",},
+{{0x3e, 0xff, 0x25, 0x78, 0x56, 0x34, 0x12, }, 7, 0, "jmp", "indirect",
+"3e ff 25 78 56 34 12 \tnotrack jmp *0x12345678",},
+{{0x3e, 0xff, 0xa4, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 8, 0, "jmp", "indirect",
+"3e ff a4 c8 78 56 34 12 \tnotrack jmp *0x12345678(%eax,%ecx,8)",},
+{{0x3e, 0xf2, 0xff, 0xe0, }, 4, 0, "jmp", "indirect",
+"3e f2 ff e0 \tnotrack bnd jmp *%eax",},
+{{0x3e, 0xf2, 0xff, 0x20, }, 4, 0, "jmp", "indirect",
+"3e f2 ff 20 \tnotrack bnd jmp *(%eax)",},
+{{0x3e, 0xf2, 0xff, 0x25, 0x78, 0x56, 0x34, 0x12, }, 8, 0, "jmp", "indirect",
+"3e f2 ff 25 78 56 34 12 \tnotrack bnd jmp *0x12345678",},
+{{0x3e, 0xf2, 0xff, 0xa4, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 9, 0, "jmp", "indirect",
+"3e f2 ff a4 c8 78 56 34 12 \tnotrack bnd jmp *0x12345678(%eax,%ecx,8)",},
{{0x0f, 0x01, 0xcf, }, 3, 0, "", "",
"0f 01 cf \tencls ",},
{{0x0f, 0x01, 0xd7, }, 3, 0, "", "",
diff --git a/tools/perf/arch/x86/tests/insn-x86-dat-64.c b/tools/perf/arch/x86/tests/insn-x86-dat-64.c
index 567ecccfad7c..5da17d41d302 100644
--- a/tools/perf/arch/x86/tests/insn-x86-dat-64.c
+++ b/tools/perf/arch/x86/tests/insn-x86-dat-64.c
@@ -2263,6 +2263,202 @@
"67 f3 0f 38 f8 18 \tenqcmds (%eax),%ebx",},
{{0x67, 0xf3, 0x0f, 0x38, 0xf8, 0x88, 0x78, 0x56, 0x34, 0x12, }, 10, 0, "", "",
"67 f3 0f 38 f8 88 78 56 34 12 \tenqcmds 0x12345678(%eax),%ecx",},
+{{0xf3, 0x0f, 0xae, 0xe8, }, 4, 0, "", "",
+"f3 0f ae e8 \tincsspd %eax",},
+{{0xf3, 0x41, 0x0f, 0xae, 0xe8, }, 5, 0, "", "",
+"f3 41 0f ae e8 \tincsspd %r8d",},
+{{0xf3, 0x48, 0x0f, 0xae, 0xe8, }, 5, 0, "", "",
+"f3 48 0f ae e8 \tincsspq %rax",},
+{{0xf3, 0x49, 0x0f, 0xae, 0xe8, }, 5, 0, "", "",
+"f3 49 0f ae e8 \tincsspq %r8",},
+{{0x0f, 0xae, 0x28, }, 3, 0, "", "",
+"0f ae 28 \txrstor (%rax)",},
+{{0x41, 0x0f, 0xae, 0x28, }, 4, 0, "", "",
+"41 0f ae 28 \txrstor (%r8)",},
+{{0x0f, 0xae, 0x2c, 0x25, 0x78, 0x56, 0x34, 0x12, }, 8, 0, "", "",
+"0f ae 2c 25 78 56 34 12 \txrstor 0x12345678",},
+{{0x0f, 0xae, 0xac, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 8, 0, "", "",
+"0f ae ac c8 78 56 34 12 \txrstor 0x12345678(%rax,%rcx,8)",},
+{{0x41, 0x0f, 0xae, 0xac, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 9, 0, "", "",
+"41 0f ae ac c8 78 56 34 12 \txrstor 0x12345678(%r8,%rcx,8)",},
+{{0x0f, 0xae, 0xe8, }, 3, 0, "", "",
+"0f ae e8 \tlfence ",},
+{{0xf3, 0x0f, 0x1e, 0xc8, }, 4, 0, "", "",
+"f3 0f 1e c8 \trdsspd %eax",},
+{{0xf3, 0x41, 0x0f, 0x1e, 0xc8, }, 5, 0, "", "",
+"f3 41 0f 1e c8 \trdsspd %r8d",},
+{{0xf3, 0x48, 0x0f, 0x1e, 0xc8, }, 5, 0, "", "",
+"f3 48 0f 1e c8 \trdsspq %rax",},
+{{0xf3, 0x49, 0x0f, 0x1e, 0xc8, }, 5, 0, "", "",
+"f3 49 0f 1e c8 \trdsspq %r8",},
+{{0xf3, 0x0f, 0x01, 0xea, }, 4, 0, "", "",
+"f3 0f 01 ea \tsaveprevssp ",},
+{{0xf3, 0x0f, 0x01, 0x28, }, 4, 0, "", "",
+"f3 0f 01 28 \trstorssp (%rax)",},
+{{0xf3, 0x41, 0x0f, 0x01, 0x28, }, 5, 0, "", "",
+"f3 41 0f 01 28 \trstorssp (%r8)",},
+{{0xf3, 0x0f, 0x01, 0x2c, 0x25, 0x78, 0x56, 0x34, 0x12, }, 9, 0, "", "",
+"f3 0f 01 2c 25 78 56 34 12 \trstorssp 0x12345678",},
+{{0xf3, 0x0f, 0x01, 0xac, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 9, 0, "", "",
+"f3 0f 01 ac c8 78 56 34 12 \trstorssp 0x12345678(%rax,%rcx,8)",},
+{{0xf3, 0x41, 0x0f, 0x01, 0xac, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 10, 0, "", "",
+"f3 41 0f 01 ac c8 78 56 34 12 \trstorssp 0x12345678(%r8,%rcx,8)",},
+{{0x0f, 0x38, 0xf6, 0x08, }, 4, 0, "", "",
+"0f 38 f6 08 \twrssd %ecx,(%rax)",},
+{{0x41, 0x0f, 0x38, 0xf6, 0x10, }, 5, 0, "", "",
+"41 0f 38 f6 10 \twrssd %edx,(%r8)",},
+{{0x0f, 0x38, 0xf6, 0x14, 0x25, 0x78, 0x56, 0x34, 0x12, }, 9, 0, "", "",
+"0f 38 f6 14 25 78 56 34 12 \twrssd %edx,0x12345678",},
+{{0x0f, 0x38, 0xf6, 0x94, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 9, 0, "", "",
+"0f 38 f6 94 c8 78 56 34 12 \twrssd %edx,0x12345678(%rax,%rcx,8)",},
+{{0x41, 0x0f, 0x38, 0xf6, 0x94, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 10, 0, "", "",
+"41 0f 38 f6 94 c8 78 56 34 12 \twrssd %edx,0x12345678(%r8,%rcx,8)",},
+{{0x48, 0x0f, 0x38, 0xf6, 0x08, }, 5, 0, "", "",
+"48 0f 38 f6 08 \twrssq %rcx,(%rax)",},
+{{0x49, 0x0f, 0x38, 0xf6, 0x10, }, 5, 0, "", "",
+"49 0f 38 f6 10 \twrssq %rdx,(%r8)",},
+{{0x48, 0x0f, 0x38, 0xf6, 0x14, 0x25, 0x78, 0x56, 0x34, 0x12, }, 10, 0, "", "",
+"48 0f 38 f6 14 25 78 56 34 12 \twrssq %rdx,0x12345678",},
+{{0x48, 0x0f, 0x38, 0xf6, 0x94, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 10, 0, "", "",
+"48 0f 38 f6 94 c8 78 56 34 12 \twrssq %rdx,0x12345678(%rax,%rcx,8)",},
+{{0x49, 0x0f, 0x38, 0xf6, 0x94, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 10, 0, "", "",
+"49 0f 38 f6 94 c8 78 56 34 12 \twrssq %rdx,0x12345678(%r8,%rcx,8)",},
+{{0x66, 0x0f, 0x38, 0xf5, 0x08, }, 5, 0, "", "",
+"66 0f 38 f5 08 \twrussd %ecx,(%rax)",},
+{{0x66, 0x41, 0x0f, 0x38, 0xf5, 0x10, }, 6, 0, "", "",
+"66 41 0f 38 f5 10 \twrussd %edx,(%r8)",},
+{{0x66, 0x0f, 0x38, 0xf5, 0x14, 0x25, 0x78, 0x56, 0x34, 0x12, }, 10, 0, "", "",
+"66 0f 38 f5 14 25 78 56 34 12 \twrussd %edx,0x12345678",},
+{{0x66, 0x0f, 0x38, 0xf5, 0x94, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 10, 0, "", "",
+"66 0f 38 f5 94 c8 78 56 34 12 \twrussd %edx,0x12345678(%rax,%rcx,8)",},
+{{0x66, 0x41, 0x0f, 0x38, 0xf5, 0x94, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "",
+"66 41 0f 38 f5 94 c8 78 56 34 12 \twrussd %edx,0x12345678(%r8,%rcx,8)",},
+{{0x66, 0x48, 0x0f, 0x38, 0xf5, 0x08, }, 6, 0, "", "",
+"66 48 0f 38 f5 08 \twrussq %rcx,(%rax)",},
+{{0x66, 0x49, 0x0f, 0x38, 0xf5, 0x10, }, 6, 0, "", "",
+"66 49 0f 38 f5 10 \twrussq %rdx,(%r8)",},
+{{0x66, 0x48, 0x0f, 0x38, 0xf5, 0x14, 0x25, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "",
+"66 48 0f 38 f5 14 25 78 56 34 12 \twrussq %rdx,0x12345678",},
+{{0x66, 0x48, 0x0f, 0x38, 0xf5, 0x94, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "",
+"66 48 0f 38 f5 94 c8 78 56 34 12 \twrussq %rdx,0x12345678(%rax,%rcx,8)",},
+{{0x66, 0x49, 0x0f, 0x38, 0xf5, 0x94, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "",
+"66 49 0f 38 f5 94 c8 78 56 34 12 \twrussq %rdx,0x12345678(%r8,%rcx,8)",},
+{{0xf3, 0x0f, 0x01, 0xe8, }, 4, 0, "", "",
+"f3 0f 01 e8 \tsetssbsy ",},
+{{0x0f, 0x01, 0xee, }, 3, 0, "", "",
+"0f 01 ee \trdpkru ",},
+{{0x0f, 0x01, 0xef, }, 3, 0, "", "",
+"0f 01 ef \twrpkru ",},
+{{0xf3, 0x0f, 0xae, 0x30, }, 4, 0, "", "",
+"f3 0f ae 30 \tclrssbsy (%rax)",},
+{{0xf3, 0x41, 0x0f, 0xae, 0x30, }, 5, 0, "", "",
+"f3 41 0f ae 30 \tclrssbsy (%r8)",},
+{{0xf3, 0x0f, 0xae, 0x34, 0x25, 0x78, 0x56, 0x34, 0x12, }, 9, 0, "", "",
+"f3 0f ae 34 25 78 56 34 12 \tclrssbsy 0x12345678",},
+{{0xf3, 0x0f, 0xae, 0xb4, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 9, 0, "", "",
+"f3 0f ae b4 c8 78 56 34 12 \tclrssbsy 0x12345678(%rax,%rcx,8)",},
+{{0xf3, 0x41, 0x0f, 0xae, 0xb4, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 10, 0, "", "",
+"f3 41 0f ae b4 c8 78 56 34 12 \tclrssbsy 0x12345678(%r8,%rcx,8)",},
+{{0xf3, 0x0f, 0x1e, 0xfb, }, 4, 0, "", "",
+"f3 0f 1e fb \tendbr32 ",},
+{{0xf3, 0x0f, 0x1e, 0xfa, }, 4, 0, "", "",
+"f3 0f 1e fa \tendbr64 ",},
+{{0xff, 0xd0, }, 2, 0, "call", "indirect",
+"ff d0 \tcallq *%rax",},
+{{0xff, 0x10, }, 2, 0, "call", "indirect",
+"ff 10 \tcallq *(%rax)",},
+{{0x41, 0xff, 0x10, }, 3, 0, "call", "indirect",
+"41 ff 10 \tcallq *(%r8)",},
+{{0xff, 0x14, 0x25, 0x78, 0x56, 0x34, 0x12, }, 7, 0, "call", "indirect",
+"ff 14 25 78 56 34 12 \tcallq *0x12345678",},
+{{0xff, 0x94, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 7, 0, "call", "indirect",
+"ff 94 c8 78 56 34 12 \tcallq *0x12345678(%rax,%rcx,8)",},
+{{0x41, 0xff, 0x94, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 8, 0, "call", "indirect",
+"41 ff 94 c8 78 56 34 12 \tcallq *0x12345678(%r8,%rcx,8)",},
+{{0xf2, 0xff, 0xd0, }, 3, 0, "call", "indirect",
+"f2 ff d0 \tbnd callq *%rax",},
+{{0xf2, 0xff, 0x10, }, 3, 0, "call", "indirect",
+"f2 ff 10 \tbnd callq *(%rax)",},
+{{0xf2, 0x41, 0xff, 0x10, }, 4, 0, "call", "indirect",
+"f2 41 ff 10 \tbnd callq *(%r8)",},
+{{0xf2, 0xff, 0x14, 0x25, 0x78, 0x56, 0x34, 0x12, }, 8, 0, "call", "indirect",
+"f2 ff 14 25 78 56 34 12 \tbnd callq *0x12345678",},
+{{0xf2, 0xff, 0x94, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 8, 0, "call", "indirect",
+"f2 ff 94 c8 78 56 34 12 \tbnd callq *0x12345678(%rax,%rcx,8)",},
+{{0xf2, 0x41, 0xff, 0x94, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 9, 0, "call", "indirect",
+"f2 41 ff 94 c8 78 56 34 12 \tbnd callq *0x12345678(%r8,%rcx,8)",},
+{{0x3e, 0xff, 0xd0, }, 3, 0, "call", "indirect",
+"3e ff d0 \tnotrack callq *%rax",},
+{{0x3e, 0xff, 0x10, }, 3, 0, "call", "indirect",
+"3e ff 10 \tnotrack callq *(%rax)",},
+{{0x3e, 0x41, 0xff, 0x10, }, 4, 0, "call", "indirect",
+"3e 41 ff 10 \tnotrack callq *(%r8)",},
+{{0x3e, 0xff, 0x14, 0x25, 0x78, 0x56, 0x34, 0x12, }, 8, 0, "call", "indirect",
+"3e ff 14 25 78 56 34 12 \tnotrack callq *0x12345678",},
+{{0x3e, 0xff, 0x94, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 8, 0, "call", "indirect",
+"3e ff 94 c8 78 56 34 12 \tnotrack callq *0x12345678(%rax,%rcx,8)",},
+{{0x3e, 0x41, 0xff, 0x94, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 9, 0, "call", "indirect",
+"3e 41 ff 94 c8 78 56 34 12 \tnotrack callq *0x12345678(%r8,%rcx,8)",},
+{{0x3e, 0xf2, 0xff, 0xd0, }, 4, 0, "call", "indirect",
+"3e f2 ff d0 \tnotrack bnd callq *%rax",},
+{{0x3e, 0xf2, 0xff, 0x10, }, 4, 0, "call", "indirect",
+"3e f2 ff 10 \tnotrack bnd callq *(%rax)",},
+{{0x3e, 0xf2, 0x41, 0xff, 0x10, }, 5, 0, "call", "indirect",
+"3e f2 41 ff 10 \tnotrack bnd callq *(%r8)",},
+{{0x3e, 0xf2, 0xff, 0x14, 0x25, 0x78, 0x56, 0x34, 0x12, }, 9, 0, "call", "indirect",
+"3e f2 ff 14 25 78 56 34 12 \tnotrack bnd callq *0x12345678",},
+{{0x3e, 0xf2, 0xff, 0x94, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 9, 0, "call", "indirect",
+"3e f2 ff 94 c8 78 56 34 12 \tnotrack bnd callq *0x12345678(%rax,%rcx,8)",},
+{{0x3e, 0xf2, 0x41, 0xff, 0x94, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 10, 0, "call", "indirect",
+"3e f2 41 ff 94 c8 78 56 34 12 \tnotrack bnd callq *0x12345678(%r8,%rcx,8)",},
+{{0xff, 0xe0, }, 2, 0, "jmp", "indirect",
+"ff e0 \tjmpq *%rax",},
+{{0xff, 0x20, }, 2, 0, "jmp", "indirect",
+"ff 20 \tjmpq *(%rax)",},
+{{0x41, 0xff, 0x20, }, 3, 0, "jmp", "indirect",
+"41 ff 20 \tjmpq *(%r8)",},
+{{0xff, 0x24, 0x25, 0x78, 0x56, 0x34, 0x12, }, 7, 0, "jmp", "indirect",
+"ff 24 25 78 56 34 12 \tjmpq *0x12345678",},
+{{0xff, 0xa4, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 7, 0, "jmp", "indirect",
+"ff a4 c8 78 56 34 12 \tjmpq *0x12345678(%rax,%rcx,8)",},
+{{0x41, 0xff, 0xa4, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 8, 0, "jmp", "indirect",
+"41 ff a4 c8 78 56 34 12 \tjmpq *0x12345678(%r8,%rcx,8)",},
+{{0xf2, 0xff, 0xe0, }, 3, 0, "jmp", "indirect",
+"f2 ff e0 \tbnd jmpq *%rax",},
+{{0xf2, 0xff, 0x20, }, 3, 0, "jmp", "indirect",
+"f2 ff 20 \tbnd jmpq *(%rax)",},
+{{0xf2, 0x41, 0xff, 0x20, }, 4, 0, "jmp", "indirect",
+"f2 41 ff 20 \tbnd jmpq *(%r8)",},
+{{0xf2, 0xff, 0x24, 0x25, 0x78, 0x56, 0x34, 0x12, }, 8, 0, "jmp", "indirect",
+"f2 ff 24 25 78 56 34 12 \tbnd jmpq *0x12345678",},
+{{0xf2, 0xff, 0xa4, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 8, 0, "jmp", "indirect",
+"f2 ff a4 c8 78 56 34 12 \tbnd jmpq *0x12345678(%rax,%rcx,8)",},
+{{0xf2, 0x41, 0xff, 0xa4, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 9, 0, "jmp", "indirect",
+"f2 41 ff a4 c8 78 56 34 12 \tbnd jmpq *0x12345678(%r8,%rcx,8)",},
+{{0x3e, 0xff, 0xe0, }, 3, 0, "jmp", "indirect",
+"3e ff e0 \tnotrack jmpq *%rax",},
+{{0x3e, 0xff, 0x20, }, 3, 0, "jmp", "indirect",
+"3e ff 20 \tnotrack jmpq *(%rax)",},
+{{0x3e, 0x41, 0xff, 0x20, }, 4, 0, "jmp", "indirect",
+"3e 41 ff 20 \tnotrack jmpq *(%r8)",},
+{{0x3e, 0xff, 0x24, 0x25, 0x78, 0x56, 0x34, 0x12, }, 8, 0, "jmp", "indirect",
+"3e ff 24 25 78 56 34 12 \tnotrack jmpq *0x12345678",},
+{{0x3e, 0xff, 0xa4, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 8, 0, "jmp", "indirect",
+"3e ff a4 c8 78 56 34 12 \tnotrack jmpq *0x12345678(%rax,%rcx,8)",},
+{{0x3e, 0x41, 0xff, 0xa4, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 9, 0, "jmp", "indirect",
+"3e 41 ff a4 c8 78 56 34 12 \tnotrack jmpq *0x12345678(%r8,%rcx,8)",},
+{{0x3e, 0xf2, 0xff, 0xe0, }, 4, 0, "jmp", "indirect",
+"3e f2 ff e0 \tnotrack bnd jmpq *%rax",},
+{{0x3e, 0xf2, 0xff, 0x20, }, 4, 0, "jmp", "indirect",
+"3e f2 ff 20 \tnotrack bnd jmpq *(%rax)",},
+{{0x3e, 0xf2, 0x41, 0xff, 0x20, }, 5, 0, "jmp", "indirect",
+"3e f2 41 ff 20 \tnotrack bnd jmpq *(%r8)",},
+{{0x3e, 0xf2, 0xff, 0x24, 0x25, 0x78, 0x56, 0x34, 0x12, }, 9, 0, "jmp", "indirect",
+"3e f2 ff 24 25 78 56 34 12 \tnotrack bnd jmpq *0x12345678",},
+{{0x3e, 0xf2, 0xff, 0xa4, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 9, 0, "jmp", "indirect",
+"3e f2 ff a4 c8 78 56 34 12 \tnotrack bnd jmpq *0x12345678(%rax,%rcx,8)",},
+{{0x3e, 0xf2, 0x41, 0xff, 0xa4, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 10, 0, "jmp", "indirect",
+"3e f2 41 ff a4 c8 78 56 34 12 \tnotrack bnd jmpq *0x12345678(%r8,%rcx,8)",},
{{0x0f, 0x01, 0xcf, }, 3, 0, "", "",
"0f 01 cf \tencls ",},
{{0x0f, 0x01, 0xd7, }, 3, 0, "", "",
diff --git a/tools/perf/arch/x86/tests/insn-x86-dat-src.c b/tools/perf/arch/x86/tests/insn-x86-dat-src.c
index ddbf07c50bb8..c3808e94c46e 100644
--- a/tools/perf/arch/x86/tests/insn-x86-dat-src.c
+++ b/tools/perf/arch/x86/tests/insn-x86-dat-src.c
@@ -1771,6 +1771,145 @@ int main(void)
asm volatile("enqcmds (%eax),%ebx");
asm volatile("enqcmds 0x12345678(%eax),%ecx");
+ /* incsspd/q */
+
+ asm volatile("incsspd %eax");
+ asm volatile("incsspd %r8d");
+ asm volatile("incsspq %rax");
+ asm volatile("incsspq %r8");
+ /* Also check instructions in the same group encoding as incsspd/q */
+ asm volatile("xrstor (%rax)");
+ asm volatile("xrstor (%r8)");
+ asm volatile("xrstor (0x12345678)");
+ asm volatile("xrstor 0x12345678(%rax,%rcx,8)");
+ asm volatile("xrstor 0x12345678(%r8,%rcx,8)");
+ asm volatile("lfence");
+
+ /* rdsspd/q */
+
+ asm volatile("rdsspd %eax");
+ asm volatile("rdsspd %r8d");
+ asm volatile("rdsspq %rax");
+ asm volatile("rdsspq %r8");
+
+ /* saveprevssp */
+
+ asm volatile("saveprevssp");
+
+ /* rstorssp */
+
+ asm volatile("rstorssp (%rax)");
+ asm volatile("rstorssp (%r8)");
+ asm volatile("rstorssp (0x12345678)");
+ asm volatile("rstorssp 0x12345678(%rax,%rcx,8)");
+ asm volatile("rstorssp 0x12345678(%r8,%rcx,8)");
+
+ /* wrssd/q */
+
+ asm volatile("wrssd %ecx,(%rax)");
+ asm volatile("wrssd %edx,(%r8)");
+ asm volatile("wrssd %edx,(0x12345678)");
+ asm volatile("wrssd %edx,0x12345678(%rax,%rcx,8)");
+ asm volatile("wrssd %edx,0x12345678(%r8,%rcx,8)");
+ asm volatile("wrssq %rcx,(%rax)");
+ asm volatile("wrssq %rdx,(%r8)");
+ asm volatile("wrssq %rdx,(0x12345678)");
+ asm volatile("wrssq %rdx,0x12345678(%rax,%rcx,8)");
+ asm volatile("wrssq %rdx,0x12345678(%r8,%rcx,8)");
+
+ /* wrussd/q */
+
+ asm volatile("wrussd %ecx,(%rax)");
+ asm volatile("wrussd %edx,(%r8)");
+ asm volatile("wrussd %edx,(0x12345678)");
+ asm volatile("wrussd %edx,0x12345678(%rax,%rcx,8)");
+ asm volatile("wrussd %edx,0x12345678(%r8,%rcx,8)");
+ asm volatile("wrussq %rcx,(%rax)");
+ asm volatile("wrussq %rdx,(%r8)");
+ asm volatile("wrussq %rdx,(0x12345678)");
+ asm volatile("wrussq %rdx,0x12345678(%rax,%rcx,8)");
+ asm volatile("wrussq %rdx,0x12345678(%r8,%rcx,8)");
+
+ /* setssbsy */
+
+ asm volatile("setssbsy");
+ /* Also check instructions in the same group encoding as setssbsy */
+ asm volatile("rdpkru");
+ asm volatile("wrpkru");
+
+ /* clrssbsy */
+
+ asm volatile("clrssbsy (%rax)");
+ asm volatile("clrssbsy (%r8)");
+ asm volatile("clrssbsy (0x12345678)");
+ asm volatile("clrssbsy 0x12345678(%rax,%rcx,8)");
+ asm volatile("clrssbsy 0x12345678(%r8,%rcx,8)");
+
+ /* endbr32/64 */
+
+ asm volatile("endbr32");
+ asm volatile("endbr64");
+
+ /* call with/without notrack prefix */
+
+ asm volatile("callq *%rax"); /* Expecting: call indirect 0 */
+ asm volatile("callq *(%rax)"); /* Expecting: call indirect 0 */
+ asm volatile("callq *(%r8)"); /* Expecting: call indirect 0 */
+ asm volatile("callq *(0x12345678)"); /* Expecting: call indirect 0 */
+ asm volatile("callq *0x12345678(%rax,%rcx,8)"); /* Expecting: call indirect 0 */
+ asm volatile("callq *0x12345678(%r8,%rcx,8)"); /* Expecting: call indirect 0 */
+
+ asm volatile("bnd callq *%rax"); /* Expecting: call indirect 0 */
+ asm volatile("bnd callq *(%rax)"); /* Expecting: call indirect 0 */
+ asm volatile("bnd callq *(%r8)"); /* Expecting: call indirect 0 */
+ asm volatile("bnd callq *(0x12345678)"); /* Expecting: call indirect 0 */
+ asm volatile("bnd callq *0x12345678(%rax,%rcx,8)"); /* Expecting: call indirect 0 */
+ asm volatile("bnd callq *0x12345678(%r8,%rcx,8)"); /* Expecting: call indirect 0 */
+
+ asm volatile("notrack callq *%rax"); /* Expecting: call indirect 0 */
+ asm volatile("notrack callq *(%rax)"); /* Expecting: call indirect 0 */
+ asm volatile("notrack callq *(%r8)"); /* Expecting: call indirect 0 */
+ asm volatile("notrack callq *(0x12345678)"); /* Expecting: call indirect 0 */
+ asm volatile("notrack callq *0x12345678(%rax,%rcx,8)"); /* Expecting: call indirect 0 */
+ asm volatile("notrack callq *0x12345678(%r8,%rcx,8)"); /* Expecting: call indirect 0 */
+
+ asm volatile("notrack bnd callq *%rax"); /* Expecting: call indirect 0 */
+ asm volatile("notrack bnd callq *(%rax)"); /* Expecting: call indirect 0 */
+ asm volatile("notrack bnd callq *(%r8)"); /* Expecting: call indirect 0 */
+ asm volatile("notrack bnd callq *(0x12345678)"); /* Expecting: call indirect 0 */
+ asm volatile("notrack bnd callq *0x12345678(%rax,%rcx,8)"); /* Expecting: call indirect 0 */
+ asm volatile("notrack bnd callq *0x12345678(%r8,%rcx,8)"); /* Expecting: call indirect 0 */
+
+ /* jmp with/without notrack prefix */
+
+ asm volatile("jmpq *%rax"); /* Expecting: jmp indirect 0 */
+ asm volatile("jmpq *(%rax)"); /* Expecting: jmp indirect 0 */
+ asm volatile("jmpq *(%r8)"); /* Expecting: jmp indirect 0 */
+ asm volatile("jmpq *(0x12345678)"); /* Expecting: jmp indirect 0 */
+ asm volatile("jmpq *0x12345678(%rax,%rcx,8)"); /* Expecting: jmp indirect 0 */
+ asm volatile("jmpq *0x12345678(%r8,%rcx,8)"); /* Expecting: jmp indirect 0 */
+
+ asm volatile("bnd jmpq *%rax"); /* Expecting: jmp indirect 0 */
+ asm volatile("bnd jmpq *(%rax)"); /* Expecting: jmp indirect 0 */
+ asm volatile("bnd jmpq *(%r8)"); /* Expecting: jmp indirect 0 */
+ asm volatile("bnd jmpq *(0x12345678)"); /* Expecting: jmp indirect 0 */
+ asm volatile("bnd jmpq *0x12345678(%rax,%rcx,8)"); /* Expecting: jmp indirect 0 */
+ asm volatile("bnd jmpq *0x12345678(%r8,%rcx,8)"); /* Expecting: jmp indirect 0 */
+
+ asm volatile("notrack jmpq *%rax"); /* Expecting: jmp indirect 0 */
+ asm volatile("notrack jmpq *(%rax)"); /* Expecting: jmp indirect 0 */
+ asm volatile("notrack jmpq *(%r8)"); /* Expecting: jmp indirect 0 */
+ asm volatile("notrack jmpq *(0x12345678)"); /* Expecting: jmp indirect 0 */
+ asm volatile("notrack jmpq *0x12345678(%rax,%rcx,8)"); /* Expecting: jmp indirect 0 */
+ asm volatile("notrack jmpq *0x12345678(%r8,%rcx,8)"); /* Expecting: jmp indirect 0 */
+
+ asm volatile("notrack bnd jmpq *%rax"); /* Expecting: jmp indirect 0 */
+ asm volatile("notrack bnd jmpq *(%rax)"); /* Expecting: jmp indirect 0 */
+ asm volatile("notrack bnd jmpq *(%r8)"); /* Expecting: jmp indirect 0 */
+ asm volatile("notrack bnd jmpq *(0x12345678)"); /* Expecting: jmp indirect 0 */
+ asm volatile("notrack bnd jmpq *0x12345678(%rax,%rcx,8)"); /* Expecting: jmp indirect 0 */
+ asm volatile("notrack bnd jmpq *0x12345678(%r8,%rcx,8)"); /* Expecting: jmp indirect 0 */
+
#else /* #ifdef __x86_64__ */
/* bound r32, mem (same op code as EVEX prefix) */
@@ -3434,6 +3573,103 @@ int main(void)
asm volatile("enqcmds (%si),%bx");
asm volatile("enqcmds 0x1234(%si),%cx");
+ /* incsspd */
+
+ asm volatile("incsspd %eax");
+ /* Also check instructions in the same group encoding as incsspd */
+ asm volatile("xrstor (%eax)");
+ asm volatile("xrstor (0x12345678)");
+ asm volatile("xrstor 0x12345678(%eax,%ecx,8)");
+ asm volatile("lfence");
+
+ /* rdsspd */
+
+ asm volatile("rdsspd %eax");
+
+ /* saveprevssp */
+
+ asm volatile("saveprevssp");
+
+ /* rstorssp */
+
+ asm volatile("rstorssp (%eax)");
+ asm volatile("rstorssp (0x12345678)");
+ asm volatile("rstorssp 0x12345678(%eax,%ecx,8)");
+
+ /* wrssd */
+
+ asm volatile("wrssd %ecx,(%eax)");
+ asm volatile("wrssd %edx,(0x12345678)");
+ asm volatile("wrssd %edx,0x12345678(%eax,%ecx,8)");
+
+ /* wrussd */
+
+ asm volatile("wrussd %ecx,(%eax)");
+ asm volatile("wrussd %edx,(0x12345678)");
+ asm volatile("wrussd %edx,0x12345678(%eax,%ecx,8)");
+
+ /* setssbsy */
+
+ asm volatile("setssbsy");
+ /* Also check instructions in the same group encoding as setssbsy */
+ asm volatile("rdpkru");
+ asm volatile("wrpkru");
+
+ /* clrssbsy */
+
+ asm volatile("clrssbsy (%eax)");
+ asm volatile("clrssbsy (0x12345678)");
+ asm volatile("clrssbsy 0x12345678(%eax,%ecx,8)");
+
+ /* endbr32/64 */
+
+ asm volatile("endbr32");
+ asm volatile("endbr64");
+
+ /* call with/without notrack prefix */
+
+ asm volatile("call *%eax"); /* Expecting: call indirect 0 */
+ asm volatile("call *(%eax)"); /* Expecting: call indirect 0 */
+ asm volatile("call *(0x12345678)"); /* Expecting: call indirect 0 */
+ asm volatile("call *0x12345678(%eax,%ecx,8)"); /* Expecting: call indirect 0 */
+
+ asm volatile("bnd call *%eax"); /* Expecting: call indirect 0 */
+ asm volatile("bnd call *(%eax)"); /* Expecting: call indirect 0 */
+ asm volatile("bnd call *(0x12345678)"); /* Expecting: call indirect 0 */
+ asm volatile("bnd call *0x12345678(%eax,%ecx,8)"); /* Expecting: call indirect 0 */
+
+ asm volatile("notrack call *%eax"); /* Expecting: call indirect 0 */
+ asm volatile("notrack call *(%eax)"); /* Expecting: call indirect 0 */
+ asm volatile("notrack call *(0x12345678)"); /* Expecting: call indirect 0 */
+ asm volatile("notrack call *0x12345678(%eax,%ecx,8)"); /* Expecting: call indirect 0 */
+
+ asm volatile("notrack bnd call *%eax"); /* Expecting: call indirect 0 */
+ asm volatile("notrack bnd call *(%eax)"); /* Expecting: call indirect 0 */
+ asm volatile("notrack bnd call *(0x12345678)"); /* Expecting: call indirect 0 */
+ asm volatile("notrack bnd call *0x12345678(%eax,%ecx,8)"); /* Expecting: call indirect 0 */
+
+ /* jmp with/without notrack prefix */
+
+ asm volatile("jmp *%eax"); /* Expecting: jmp indirect 0 */
+ asm volatile("jmp *(%eax)"); /* Expecting: jmp indirect 0 */
+ asm volatile("jmp *(0x12345678)"); /* Expecting: jmp indirect 0 */
+ asm volatile("jmp *0x12345678(%eax,%ecx,8)"); /* Expecting: jmp indirect 0 */
+
+ asm volatile("bnd jmp *%eax"); /* Expecting: jmp indirect 0 */
+ asm volatile("bnd jmp *(%eax)"); /* Expecting: jmp indirect 0 */
+ asm volatile("bnd jmp *(0x12345678)"); /* Expecting: jmp indirect 0 */
+ asm volatile("bnd jmp *0x12345678(%eax,%ecx,8)"); /* Expecting: jmp indirect 0 */
+
+ asm volatile("notrack jmp *%eax"); /* Expecting: jmp indirect 0 */
+ asm volatile("notrack jmp *(%eax)"); /* Expecting: jmp indirect 0 */
+ asm volatile("notrack jmp *(0x12345678)"); /* Expecting: jmp indirect 0 */
+ asm volatile("notrack jmp *0x12345678(%eax,%ecx,8)"); /* Expecting: jmp indirect 0 */
+
+ asm volatile("notrack bnd jmp *%eax"); /* Expecting: jmp indirect 0 */
+ asm volatile("notrack bnd jmp *(%eax)"); /* Expecting: jmp indirect 0 */
+ asm volatile("notrack bnd jmp *(0x12345678)"); /* Expecting: jmp indirect 0 */
+ asm volatile("notrack bnd jmp *0x12345678(%eax,%ecx,8)"); /* Expecting: jmp indirect 0 */
+
#endif /* #ifndef __x86_64__ */
/* SGX */
diff --git a/tools/perf/arch/x86/util/auxtrace.c b/tools/perf/arch/x86/util/auxtrace.c
index 7abc9fd4cbec..3da506e13f49 100644
--- a/tools/perf/arch/x86/util/auxtrace.c
+++ b/tools/perf/arch/x86/util/auxtrace.c
@@ -7,13 +7,13 @@
#include <errno.h>
#include <stdbool.h>
-#include "../../util/header.h"
-#include "../../util/debug.h"
-#include "../../util/pmu.h"
-#include "../../util/auxtrace.h"
-#include "../../util/intel-pt.h"
-#include "../../util/intel-bts.h"
-#include "../../util/evlist.h"
+#include "../../../util/header.h"
+#include "../../../util/debug.h"
+#include "../../../util/pmu.h"
+#include "../../../util/auxtrace.h"
+#include "../../../util/intel-pt.h"
+#include "../../../util/intel-bts.h"
+#include "../../../util/evlist.h"
static
struct auxtrace_record *auxtrace_record__init_intel(struct evlist *evlist,
diff --git a/tools/perf/arch/x86/util/event.c b/tools/perf/arch/x86/util/event.c
index ac45015cc6ba..047dc00eafa6 100644
--- a/tools/perf/arch/x86/util/event.c
+++ b/tools/perf/arch/x86/util/event.c
@@ -3,12 +3,12 @@
#include <linux/string.h>
#include <linux/zalloc.h>
-#include "../../util/event.h"
-#include "../../util/synthetic-events.h"
-#include "../../util/machine.h"
-#include "../../util/tool.h"
-#include "../../util/map.h"
-#include "../../util/debug.h"
+#include "../../../util/event.h"
+#include "../../../util/synthetic-events.h"
+#include "../../../util/machine.h"
+#include "../../../util/tool.h"
+#include "../../../util/map.h"
+#include "../../../util/debug.h"
#if defined(__x86_64__)
diff --git a/tools/perf/arch/x86/util/header.c b/tools/perf/arch/x86/util/header.c
index aa6deb463bf3..578c8c568ffd 100644
--- a/tools/perf/arch/x86/util/header.c
+++ b/tools/perf/arch/x86/util/header.c
@@ -7,8 +7,8 @@
#include <string.h>
#include <regex.h>
-#include "../../util/debug.h"
-#include "../../util/header.h"
+#include "../../../util/debug.h"
+#include "../../../util/header.h"
static inline void
cpuid(unsigned int op, unsigned int *a, unsigned int *b, unsigned int *c,
diff --git a/tools/perf/arch/x86/util/intel-bts.c b/tools/perf/arch/x86/util/intel-bts.c
index 27d9e214d068..09f93800bffd 100644
--- a/tools/perf/arch/x86/util/intel-bts.c
+++ b/tools/perf/arch/x86/util/intel-bts.c
@@ -11,18 +11,18 @@
#include <linux/log2.h>
#include <linux/zalloc.h>
-#include "../../util/cpumap.h"
-#include "../../util/event.h"
-#include "../../util/evsel.h"
-#include "../../util/evlist.h"
-#include "../../util/mmap.h"
-#include "../../util/session.h"
-#include "../../util/pmu.h"
-#include "../../util/debug.h"
-#include "../../util/record.h"
-#include "../../util/tsc.h"
-#include "../../util/auxtrace.h"
-#include "../../util/intel-bts.h"
+#include "../../../util/cpumap.h"
+#include "../../../util/event.h"
+#include "../../../util/evsel.h"
+#include "../../../util/evlist.h"
+#include "../../../util/mmap.h"
+#include "../../../util/session.h"
+#include "../../../util/pmu.h"
+#include "../../../util/debug.h"
+#include "../../../util/record.h"
+#include "../../../util/tsc.h"
+#include "../../../util/auxtrace.h"
+#include "../../../util/intel-bts.h"
#include <internal/lib.h> // page_size
#define KiB(x) ((x) * 1024)
@@ -413,20 +413,6 @@ out_err:
return err;
}
-static int intel_bts_read_finish(struct auxtrace_record *itr, int idx)
-{
- struct intel_bts_recording *btsr =
- container_of(itr, struct intel_bts_recording, itr);
- struct evsel *evsel;
-
- evlist__for_each_entry(btsr->evlist, evsel) {
- if (evsel->core.attr.type == btsr->intel_bts_pmu->type)
- return perf_evlist__enable_event_idx(btsr->evlist,
- evsel, idx);
- }
- return -EINVAL;
-}
-
struct auxtrace_record *intel_bts_recording_init(int *err)
{
struct perf_pmu *intel_bts_pmu = perf_pmu__find(INTEL_BTS_PMU_NAME);
@@ -447,6 +433,7 @@ struct auxtrace_record *intel_bts_recording_init(int *err)
}
btsr->intel_bts_pmu = intel_bts_pmu;
+ btsr->itr.pmu = intel_bts_pmu;
btsr->itr.recording_options = intel_bts_recording_options;
btsr->itr.info_priv_size = intel_bts_info_priv_size;
btsr->itr.info_fill = intel_bts_info_fill;
@@ -456,7 +443,7 @@ struct auxtrace_record *intel_bts_recording_init(int *err)
btsr->itr.find_snapshot = intel_bts_find_snapshot;
btsr->itr.parse_snapshot_options = intel_bts_parse_snapshot_options;
btsr->itr.reference = intel_bts_reference;
- btsr->itr.read_finish = intel_bts_read_finish;
+ btsr->itr.read_finish = auxtrace_record__read_finish;
btsr->itr.alignment = sizeof(struct branch);
return &btsr->itr;
}
diff --git a/tools/perf/arch/x86/util/intel-pt.c b/tools/perf/arch/x86/util/intel-pt.c
index 20df442fdf36..1643aed8c4c8 100644
--- a/tools/perf/arch/x86/util/intel-pt.c
+++ b/tools/perf/arch/x86/util/intel-pt.c
@@ -13,23 +13,23 @@
#include <linux/zalloc.h>
#include <cpuid.h>
-#include "../../util/session.h"
-#include "../../util/event.h"
-#include "../../util/evlist.h"
-#include "../../util/evsel.h"
-#include "../../util/evsel_config.h"
-#include "../../util/cpumap.h"
-#include "../../util/mmap.h"
+#include "../../../util/session.h"
+#include "../../../util/event.h"
+#include "../../../util/evlist.h"
+#include "../../../util/evsel.h"
+#include "../../../util/evsel_config.h"
+#include "../../../util/cpumap.h"
+#include "../../../util/mmap.h"
#include <subcmd/parse-options.h>
-#include "../../util/parse-events.h"
-#include "../../util/pmu.h"
-#include "../../util/debug.h"
-#include "../../util/auxtrace.h"
-#include "../../util/record.h"
-#include "../../util/target.h"
-#include "../../util/tsc.h"
+#include "../../../util/parse-events.h"
+#include "../../../util/pmu.h"
+#include "../../../util/debug.h"
+#include "../../../util/auxtrace.h"
+#include "../../../util/record.h"
+#include "../../../util/target.h"
+#include "../../../util/tsc.h"
#include <internal/lib.h> // page_size
-#include "../../util/intel-pt.h"
+#include "../../../util/intel-pt.h"
#define KiB(x) ((x) * 1024)
#define MiB(x) ((x) * 1024 * 1024)
@@ -1166,20 +1166,6 @@ static u64 intel_pt_reference(struct auxtrace_record *itr __maybe_unused)
return rdtsc();
}
-static int intel_pt_read_finish(struct auxtrace_record *itr, int idx)
-{
- struct intel_pt_recording *ptr =
- container_of(itr, struct intel_pt_recording, itr);
- struct evsel *evsel;
-
- evlist__for_each_entry(ptr->evlist, evsel) {
- if (evsel->core.attr.type == ptr->intel_pt_pmu->type)
- return perf_evlist__enable_event_idx(ptr->evlist, evsel,
- idx);
- }
- return -EINVAL;
-}
-
struct auxtrace_record *intel_pt_recording_init(int *err)
{
struct perf_pmu *intel_pt_pmu = perf_pmu__find(INTEL_PT_PMU_NAME);
@@ -1200,6 +1186,7 @@ struct auxtrace_record *intel_pt_recording_init(int *err)
}
ptr->intel_pt_pmu = intel_pt_pmu;
+ ptr->itr.pmu = intel_pt_pmu;
ptr->itr.recording_options = intel_pt_recording_options;
ptr->itr.info_priv_size = intel_pt_info_priv_size;
ptr->itr.info_fill = intel_pt_info_fill;
@@ -1209,7 +1196,7 @@ struct auxtrace_record *intel_pt_recording_init(int *err)
ptr->itr.find_snapshot = intel_pt_find_snapshot;
ptr->itr.parse_snapshot_options = intel_pt_parse_snapshot_options;
ptr->itr.reference = intel_pt_reference;
- ptr->itr.read_finish = intel_pt_read_finish;
+ ptr->itr.read_finish = auxtrace_record__read_finish;
/*
* Decoding starts at a PSB packet. Minimum PSB period is 2K so 4K
* should give at least 1 PSB per sample.
diff --git a/tools/perf/arch/x86/util/machine.c b/tools/perf/arch/x86/util/machine.c
index e17e080e76f4..31679c35d493 100644
--- a/tools/perf/arch/x86/util/machine.c
+++ b/tools/perf/arch/x86/util/machine.c
@@ -5,9 +5,9 @@
#include <stdlib.h>
#include <internal/lib.h> // page_size
-#include "../../util/machine.h"
-#include "../../util/map.h"
-#include "../../util/symbol.h"
+#include "../../../util/machine.h"
+#include "../../../util/map.h"
+#include "../../../util/symbol.h"
#include <linux/ctype.h>
#include <symbol/kallsyms.h>
diff --git a/tools/perf/arch/x86/util/perf_regs.c b/tools/perf/arch/x86/util/perf_regs.c
index c218b83e063b..fca81b39b09f 100644
--- a/tools/perf/arch/x86/util/perf_regs.c
+++ b/tools/perf/arch/x86/util/perf_regs.c
@@ -5,10 +5,10 @@
#include <linux/kernel.h>
#include <linux/zalloc.h>
-#include "../../perf-sys.h"
-#include "../../util/perf_regs.h"
-#include "../../util/debug.h"
-#include "../../util/event.h"
+#include "../../../perf-sys.h"
+#include "../../../util/perf_regs.h"
+#include "../../../util/debug.h"
+#include "../../../util/event.h"
const struct sample_reg sample_reg_masks[] = {
SMPL_REG(AX, PERF_REG_X86_AX),
diff --git a/tools/perf/arch/x86/util/pmu.c b/tools/perf/arch/x86/util/pmu.c
index e33ef5bc31c5..d48d608517fd 100644
--- a/tools/perf/arch/x86/util/pmu.c
+++ b/tools/perf/arch/x86/util/pmu.c
@@ -4,9 +4,9 @@
#include <linux/stddef.h>
#include <linux/perf_event.h>
-#include "../../util/intel-pt.h"
-#include "../../util/intel-bts.h"
-#include "../../util/pmu.h"
+#include "../../../util/intel-pt.h"
+#include "../../../util/intel-bts.h"
+#include "../../../util/pmu.h"
struct perf_event_attr *perf_pmu__get_default_config(struct perf_pmu *pmu __maybe_unused)
{
diff --git a/tools/perf/bench/bench.h b/tools/perf/bench/bench.h
index fddb3ced9db6..4aa6de1aa67d 100644
--- a/tools/perf/bench/bench.h
+++ b/tools/perf/bench/bench.h
@@ -2,6 +2,10 @@
#ifndef BENCH_H
#define BENCH_H
+#include <sys/time.h>
+
+extern struct timeval bench__start, bench__end, bench__runtime;
+
/*
* The madvise transparent hugepage constants were added in glibc
* 2.13. For compatibility with older versions of glibc, define these
diff --git a/tools/perf/bench/epoll-ctl.c b/tools/perf/bench/epoll-ctl.c
index bb617e568841..cadc18d42aa4 100644
--- a/tools/perf/bench/epoll-ctl.c
+++ b/tools/perf/bench/epoll-ctl.c
@@ -35,7 +35,6 @@
static unsigned int nthreads = 0;
static unsigned int nsecs = 8;
-struct timeval start, end, runtime;
static bool done, __verbose, randomize;
/*
@@ -94,8 +93,8 @@ static void toggle_done(int sig __maybe_unused,
{
/* inform all threads that we're done for the day */
done = true;
- gettimeofday(&end, NULL);
- timersub(&end, &start, &runtime);
+ gettimeofday(&bench__end, NULL);
+ timersub(&bench__end, &bench__start, &bench__runtime);
}
static void nest_epollfd(void)
@@ -313,6 +312,7 @@ int bench_epoll_ctl(int argc, const char **argv)
exit(EXIT_FAILURE);
}
+ memset(&act, 0, sizeof(act));
sigfillset(&act.sa_mask);
act.sa_sigaction = toggle_done;
sigaction(SIGINT, &act, NULL);
@@ -361,7 +361,7 @@ int bench_epoll_ctl(int argc, const char **argv)
threads_starting = nthreads;
- gettimeofday(&start, NULL);
+ gettimeofday(&bench__start, NULL);
do_threads(worker, cpu);
diff --git a/tools/perf/bench/epoll-wait.c b/tools/perf/bench/epoll-wait.c
index 7af694437f4e..f938c585d512 100644
--- a/tools/perf/bench/epoll-wait.c
+++ b/tools/perf/bench/epoll-wait.c
@@ -90,7 +90,6 @@
static unsigned int nthreads = 0;
static unsigned int nsecs = 8;
-struct timeval start, end, runtime;
static bool wdone, done, __verbose, randomize, nonblocking;
/*
@@ -276,8 +275,8 @@ static void toggle_done(int sig __maybe_unused,
{
/* inform all threads that we're done for the day */
done = true;
- gettimeofday(&end, NULL);
- timersub(&end, &start, &runtime);
+ gettimeofday(&bench__end, NULL);
+ timersub(&bench__end, &bench__start, &bench__runtime);
}
static void print_summary(void)
@@ -287,7 +286,7 @@ static void print_summary(void)
printf("\nAveraged %ld operations/sec (+- %.2f%%), total secs = %d\n",
avg, rel_stddev_stats(stddev, avg),
- (int) runtime.tv_sec);
+ (int)bench__runtime.tv_sec);
}
static int do_threads(struct worker *worker, struct perf_cpu_map *cpu)
@@ -427,6 +426,7 @@ int bench_epoll_wait(int argc, const char **argv)
exit(EXIT_FAILURE);
}
+ memset(&act, 0, sizeof(act));
sigfillset(&act.sa_mask);
act.sa_sigaction = toggle_done;
sigaction(SIGINT, &act, NULL);
@@ -479,7 +479,7 @@ int bench_epoll_wait(int argc, const char **argv)
threads_starting = nthreads;
- gettimeofday(&start, NULL);
+ gettimeofday(&bench__start, NULL);
do_threads(worker, cpu);
@@ -519,7 +519,7 @@ int bench_epoll_wait(int argc, const char **argv)
qsort(worker, nthreads, sizeof(struct worker), cmpworker);
for (i = 0; i < nthreads; i++) {
- unsigned long t = worker[i].ops/runtime.tv_sec;
+ unsigned long t = worker[i].ops / bench__runtime.tv_sec;
update_stats(&throughput_stats, t);
diff --git a/tools/perf/bench/futex-hash.c b/tools/perf/bench/futex-hash.c
index 8ba0c3330a9a..65eebe06c04d 100644
--- a/tools/perf/bench/futex-hash.c
+++ b/tools/perf/bench/futex-hash.c
@@ -37,7 +37,7 @@ static unsigned int nfutexes = 1024;
static bool fshared = false, done = false, silent = false;
static int futex_flag = 0;
-struct timeval start, end, runtime;
+struct timeval bench__start, bench__end, bench__runtime;
static pthread_mutex_t thread_lock;
static unsigned int threads_starting;
static struct stats throughput_stats;
@@ -103,8 +103,8 @@ static void toggle_done(int sig __maybe_unused,
{
/* inform all threads that we're done for the day */
done = true;
- gettimeofday(&end, NULL);
- timersub(&end, &start, &runtime);
+ gettimeofday(&bench__end, NULL);
+ timersub(&bench__end, &bench__start, &bench__runtime);
}
static void print_summary(void)
@@ -114,7 +114,7 @@ static void print_summary(void)
printf("%sAveraged %ld operations/sec (+- %.2f%%), total secs = %d\n",
!silent ? "\n" : "", avg, rel_stddev_stats(stddev, avg),
- (int) runtime.tv_sec);
+ (int)bench__runtime.tv_sec);
}
int bench_futex_hash(int argc, const char **argv)
@@ -137,6 +137,7 @@ int bench_futex_hash(int argc, const char **argv)
if (!cpu)
goto errmem;
+ memset(&act, 0, sizeof(act));
sigfillset(&act.sa_mask);
act.sa_sigaction = toggle_done;
sigaction(SIGINT, &act, NULL);
@@ -161,7 +162,7 @@ int bench_futex_hash(int argc, const char **argv)
threads_starting = nthreads;
pthread_attr_init(&thread_attr);
- gettimeofday(&start, NULL);
+ gettimeofday(&bench__start, NULL);
for (i = 0; i < nthreads; i++) {
worker[i].tid = i;
worker[i].futex = calloc(nfutexes, sizeof(*worker[i].futex));
@@ -204,7 +205,7 @@ int bench_futex_hash(int argc, const char **argv)
pthread_mutex_destroy(&thread_lock);
for (i = 0; i < nthreads; i++) {
- unsigned long t = worker[i].ops/runtime.tv_sec;
+ unsigned long t = worker[i].ops / bench__runtime.tv_sec;
update_stats(&throughput_stats, t);
if (!silent) {
if (nfutexes == 1)
diff --git a/tools/perf/bench/futex-lock-pi.c b/tools/perf/bench/futex-lock-pi.c
index d0cae8125423..89fd8f325f38 100644
--- a/tools/perf/bench/futex-lock-pi.c
+++ b/tools/perf/bench/futex-lock-pi.c
@@ -37,7 +37,6 @@ static bool silent = false, multi = false;
static bool done = false, fshared = false;
static unsigned int nthreads = 0;
static int futex_flag = 0;
-struct timeval start, end, runtime;
static pthread_mutex_t thread_lock;
static unsigned int threads_starting;
static struct stats throughput_stats;
@@ -64,7 +63,7 @@ static void print_summary(void)
printf("%sAveraged %ld operations/sec (+- %.2f%%), total secs = %d\n",
!silent ? "\n" : "", avg, rel_stddev_stats(stddev, avg),
- (int) runtime.tv_sec);
+ (int)bench__runtime.tv_sec);
}
static void toggle_done(int sig __maybe_unused,
@@ -73,8 +72,8 @@ static void toggle_done(int sig __maybe_unused,
{
/* inform all threads that we're done for the day */
done = true;
- gettimeofday(&end, NULL);
- timersub(&end, &start, &runtime);
+ gettimeofday(&bench__end, NULL);
+ timersub(&bench__end, &bench__start, &bench__runtime);
}
static void *workerfn(void *arg)
@@ -161,6 +160,7 @@ int bench_futex_lock_pi(int argc, const char **argv)
if (!cpu)
err(EXIT_FAILURE, "calloc");
+ memset(&act, 0, sizeof(act));
sigfillset(&act.sa_mask);
act.sa_sigaction = toggle_done;
sigaction(SIGINT, &act, NULL);
@@ -185,7 +185,7 @@ int bench_futex_lock_pi(int argc, const char **argv)
threads_starting = nthreads;
pthread_attr_init(&thread_attr);
- gettimeofday(&start, NULL);
+ gettimeofday(&bench__start, NULL);
create_threads(worker, thread_attr, cpu);
pthread_attr_destroy(&thread_attr);
@@ -211,7 +211,7 @@ int bench_futex_lock_pi(int argc, const char **argv)
pthread_mutex_destroy(&thread_lock);
for (i = 0; i < nthreads; i++) {
- unsigned long t = worker[i].ops/runtime.tv_sec;
+ unsigned long t = worker[i].ops / bench__runtime.tv_sec;
update_stats(&throughput_stats, t);
if (!silent)
diff --git a/tools/perf/bench/futex-requeue.c b/tools/perf/bench/futex-requeue.c
index a00a6891447a..7a15c2e61022 100644
--- a/tools/perf/bench/futex-requeue.c
+++ b/tools/perf/bench/futex-requeue.c
@@ -128,6 +128,7 @@ int bench_futex_requeue(int argc, const char **argv)
if (!cpu)
err(EXIT_FAILURE, "cpu_map__new");
+ memset(&act, 0, sizeof(act));
sigfillset(&act.sa_mask);
act.sa_sigaction = toggle_done;
sigaction(SIGINT, &act, NULL);
diff --git a/tools/perf/bench/futex-wake-parallel.c b/tools/perf/bench/futex-wake-parallel.c
index a053cf2b7039..cd2b81a845ac 100644
--- a/tools/perf/bench/futex-wake-parallel.c
+++ b/tools/perf/bench/futex-wake-parallel.c
@@ -234,6 +234,7 @@ int bench_futex_wake_parallel(int argc, const char **argv)
exit(EXIT_FAILURE);
}
+ memset(&act, 0, sizeof(act));
sigfillset(&act.sa_mask);
act.sa_sigaction = toggle_done;
sigaction(SIGINT, &act, NULL);
diff --git a/tools/perf/bench/futex-wake.c b/tools/perf/bench/futex-wake.c
index df810096abfe..2dfcef3e371e 100644
--- a/tools/perf/bench/futex-wake.c
+++ b/tools/perf/bench/futex-wake.c
@@ -43,7 +43,7 @@ static bool done = false, silent = false, fshared = false;
static pthread_mutex_t thread_lock;
static pthread_cond_t thread_parent, thread_worker;
static struct stats waketime_stats, wakeup_stats;
-static unsigned int ncpus, threads_starting, nthreads = 0;
+static unsigned int threads_starting, nthreads = 0;
static int futex_flag = 0;
static const struct option options[] = {
@@ -136,12 +136,13 @@ int bench_futex_wake(int argc, const char **argv)
if (!cpu)
err(EXIT_FAILURE, "calloc");
+ memset(&act, 0, sizeof(act));
sigfillset(&act.sa_mask);
act.sa_sigaction = toggle_done;
sigaction(SIGINT, &act, NULL);
if (!nthreads)
- nthreads = ncpus;
+ nthreads = cpu->nr;
worker = calloc(nthreads, sizeof(*worker));
if (!worker)
diff --git a/tools/perf/builtin-annotate.c b/tools/perf/builtin-annotate.c
index 5898662bc8fb..6c0a0412502e 100644
--- a/tools/perf/builtin-annotate.c
+++ b/tools/perf/builtin-annotate.c
@@ -535,6 +535,10 @@ int cmd_annotate(int argc, const char **argv)
"Display raw encoding of assembly instructions (default)"),
OPT_STRING('M', "disassembler-style", &annotate.opts.disassembler_style, "disassembler style",
"Specify disassembler style (e.g. -M intel for intel syntax)"),
+ OPT_STRING(0, "prefix", &annotate.opts.prefix, "prefix",
+ "Add prefix to source file path names in programs (with --prefix-strip)"),
+ OPT_STRING(0, "prefix-strip", &annotate.opts.prefix_strip, "N",
+ "Strip first N entries of source file path name in programs (with --prefix)"),
OPT_STRING(0, "objdump", &annotate.opts.objdump_path, "path",
"objdump binary to use for disassembly and annotations"),
OPT_BOOLEAN(0, "group", &symbol_conf.event_group,
@@ -562,6 +566,8 @@ int cmd_annotate(int argc, const char **argv)
if (ret < 0)
return ret;
+ annotation_config__init(&annotate.opts);
+
argc = parse_options(argc, argv, options, annotate_usage, 0);
if (argc) {
/*
@@ -574,6 +580,9 @@ int cmd_annotate(int argc, const char **argv)
annotate.sym_hist_filter = argv[0];
}
+ if (annotate_check_args(&annotate.opts) < 0)
+ return -EINVAL;
+
if (symbol_conf.show_nr_samples && annotate.use_gtk) {
pr_err("--show-nr-samples is not available in --gtk mode at this time\n");
return ret;
@@ -598,8 +607,6 @@ int cmd_annotate(int argc, const char **argv)
if (ret < 0)
goto out_delete;
- annotation_config__init();
-
symbol_conf.try_vmlinux_path = true;
ret = symbol__init(&annotate.session->header.env);
diff --git a/tools/perf/builtin-c2c.c b/tools/perf/builtin-c2c.c
index e69f44941aad..246ac0b4d54f 100644
--- a/tools/perf/builtin-c2c.c
+++ b/tools/perf/builtin-c2c.c
@@ -595,8 +595,8 @@ tot_hitm_cmp(struct perf_hpp_fmt *fmt __maybe_unused,
{
struct c2c_hist_entry *c2c_left;
struct c2c_hist_entry *c2c_right;
- unsigned int tot_hitm_left;
- unsigned int tot_hitm_right;
+ uint64_t tot_hitm_left;
+ uint64_t tot_hitm_right;
c2c_left = container_of(left, struct c2c_hist_entry, he);
c2c_right = container_of(right, struct c2c_hist_entry, he);
@@ -629,7 +629,8 @@ __f ## _cmp(struct perf_hpp_fmt *fmt __maybe_unused, \
\
c2c_left = container_of(left, struct c2c_hist_entry, he); \
c2c_right = container_of(right, struct c2c_hist_entry, he); \
- return c2c_left->stats.__f - c2c_right->stats.__f; \
+ return (uint64_t) c2c_left->stats.__f - \
+ (uint64_t) c2c_right->stats.__f; \
}
#define STAT_FN(__f) \
@@ -682,7 +683,8 @@ ld_llcmiss_cmp(struct perf_hpp_fmt *fmt __maybe_unused,
c2c_left = container_of(left, struct c2c_hist_entry, he);
c2c_right = container_of(right, struct c2c_hist_entry, he);
- return llc_miss(&c2c_left->stats) - llc_miss(&c2c_right->stats);
+ return (uint64_t) llc_miss(&c2c_left->stats) -
+ (uint64_t) llc_miss(&c2c_right->stats);
}
static uint64_t total_records(struct c2c_stats *stats)
@@ -2384,7 +2386,7 @@ static int perf_c2c__browse_cacheline(struct hist_entry *he)
c2c_browser__update_nr_entries(browser);
while (1) {
- key = hist_browser__run(browser, "? - help", true);
+ key = hist_browser__run(browser, "? - help", true, 0);
switch (key) {
case 's':
@@ -2453,7 +2455,7 @@ static int perf_c2c__hists_browse(struct hists *hists)
c2c_browser__update_nr_entries(browser);
while (1) {
- key = hist_browser__run(browser, "? - help", true);
+ key = hist_browser__run(browser, "? - help", true, 0);
switch (key) {
case 'q':
diff --git a/tools/perf/builtin-diff.c b/tools/perf/builtin-diff.c
index f8b6ae557d8b..c94a002f295e 100644
--- a/tools/perf/builtin-diff.c
+++ b/tools/perf/builtin-diff.c
@@ -455,6 +455,7 @@ static struct perf_diff pdiff = {
.fork = perf_event__process_fork,
.lost = perf_event__process_lost,
.namespaces = perf_event__process_namespaces,
+ .cgroup = perf_event__process_cgroup,
.ordered_events = true,
.ordering_requires_timestamps = true,
},
@@ -572,29 +573,12 @@ static void init_block_hist(struct block_hist *bh)
bh->valid = true;
}
-static int block_pair_cmp(struct hist_entry *a, struct hist_entry *b)
-{
- struct block_info *bi_a = a->block_info;
- struct block_info *bi_b = b->block_info;
- int cmp;
-
- if (!bi_a->sym || !bi_b->sym)
- return -1;
-
- cmp = strcmp(bi_a->sym->name, bi_b->sym->name);
-
- if ((!cmp) && (bi_a->start == bi_b->start) && (bi_a->end == bi_b->end))
- return 0;
-
- return -1;
-}
-
static struct hist_entry *get_block_pair(struct hist_entry *he,
struct hists *hists_pair)
{
struct rb_root_cached *root = hists_pair->entries_in;
struct rb_node *next = rb_first_cached(root);
- int cmp;
+ int64_t cmp;
while (next != NULL) {
struct hist_entry *he_pair = rb_entry(next, struct hist_entry,
@@ -602,7 +586,7 @@ static struct hist_entry *get_block_pair(struct hist_entry *he,
next = rb_next(&he_pair->rb_node_in);
- cmp = block_pair_cmp(he_pair, he);
+ cmp = __block_info__cmp(he_pair, he);
if (!cmp)
return he_pair;
}
@@ -1312,7 +1296,8 @@ static int cycles_printf(struct hist_entry *he, struct hist_entry *pair,
end_line = map__srcline(he->ms.map, bi->sym->start + bi->end,
he->ms.sym);
- if ((start_line != SRCLINE_UNKNOWN) && (end_line != SRCLINE_UNKNOWN)) {
+ if ((strncmp(start_line, SRCLINE_UNKNOWN, strlen(SRCLINE_UNKNOWN)) != 0) &&
+ (strncmp(end_line, SRCLINE_UNKNOWN, strlen(SRCLINE_UNKNOWN)) != 0)) {
scnprintf(buf, sizeof(buf), "[%s -> %s] %4ld",
start_line, end_line, block_he->diff.cycles);
} else {
diff --git a/tools/perf/builtin-probe.c b/tools/perf/builtin-probe.c
index 26bc5923e6b5..70548df2abb9 100644
--- a/tools/perf/builtin-probe.c
+++ b/tools/perf/builtin-probe.c
@@ -449,7 +449,8 @@ static int perf_del_probe_events(struct strfilter *filter)
ret = probe_file__del_strlist(kfd, klist);
if (ret < 0)
goto error;
- }
+ } else if (ret == -ENOMEM)
+ goto error;
ret2 = probe_file__get_events(ufd, filter, ulist);
if (ret2 == 0) {
@@ -459,7 +460,8 @@ static int perf_del_probe_events(struct strfilter *filter)
ret2 = probe_file__del_strlist(ufd, ulist);
if (ret2 < 0)
goto error;
- }
+ } else if (ret2 == -ENOMEM)
+ goto error;
if (ret == -ENOENT && ret2 == -ENOENT)
pr_warning("\"%s\" does not hit any event.\n", str);
diff --git a/tools/perf/builtin-record.c b/tools/perf/builtin-record.c
index fb19ef63cc35..1ab349abe904 100644
--- a/tools/perf/builtin-record.c
+++ b/tools/perf/builtin-record.c
@@ -62,6 +62,7 @@
#include <linux/string.h>
#include <linux/time64.h>
#include <linux/zalloc.h>
+#include <linux/bitmap.h>
struct switch_output {
bool enabled;
@@ -93,7 +94,7 @@ struct record {
bool timestamp_boundary;
struct switch_output switch_output;
unsigned long long samples;
- cpu_set_t affinity_mask;
+ struct mmap_cpu_mask affinity_mask;
unsigned long output_max_size; /* = 0: unlimited */
};
@@ -961,10 +962,15 @@ static struct perf_event_header finished_round_event = {
static void record__adjust_affinity(struct record *rec, struct mmap *map)
{
if (rec->opts.affinity != PERF_AFFINITY_SYS &&
- !CPU_EQUAL(&rec->affinity_mask, &map->affinity_mask)) {
- CPU_ZERO(&rec->affinity_mask);
- CPU_OR(&rec->affinity_mask, &rec->affinity_mask, &map->affinity_mask);
- sched_setaffinity(0, sizeof(rec->affinity_mask), &rec->affinity_mask);
+ !bitmap_equal(rec->affinity_mask.bits, map->affinity_mask.bits,
+ rec->affinity_mask.nbits)) {
+ bitmap_zero(rec->affinity_mask.bits, rec->affinity_mask.nbits);
+ bitmap_or(rec->affinity_mask.bits, rec->affinity_mask.bits,
+ map->affinity_mask.bits, rec->affinity_mask.nbits);
+ sched_setaffinity(0, MMAP_CPU_MASK_BYTES(&rec->affinity_mask),
+ (cpu_set_t *)rec->affinity_mask.bits);
+ if (verbose == 2)
+ mmap_cpu_mask__scnprintf(&rec->affinity_mask, "thread");
}
}
@@ -1391,6 +1397,11 @@ static int record__synthesize(struct record *rec, bool tail)
if (err < 0)
pr_warning("Couldn't synthesize bpf events.\n");
+ err = perf_event__synthesize_cgroups(tool, process_synthesized_event,
+ machine);
+ if (err < 0)
+ pr_warning("Couldn't synthesize cgroup events.\n");
+
err = __machine__synthesize_threads(machine, tool, &opts->target, rec->evlist->core.threads,
process_synthesized_event, opts->sample_address,
1);
@@ -1422,6 +1433,15 @@ static int __cmd_record(struct record *rec, int argc, const char **argv)
if (rec->opts.record_namespaces)
tool->namespace_events = true;
+ if (rec->opts.record_cgroup) {
+#ifdef HAVE_FILE_HANDLE
+ tool->cgroup_events = true;
+#else
+ pr_err("cgroup tracking is not supported\n");
+ return -1;
+#endif
+ }
+
if (rec->opts.auxtrace_snapshot_mode || rec->switch_output.enabled) {
signal(SIGUSR2, snapshot_sig_handler);
if (rec->opts.auxtrace_snapshot_mode)
@@ -2352,6 +2372,8 @@ static struct option __record_options[] = {
"per thread proc mmap processing timeout in ms"),
OPT_BOOLEAN(0, "namespaces", &record.opts.record_namespaces,
"Record namespaces events"),
+ OPT_BOOLEAN(0, "all-cgroups", &record.opts.record_cgroup,
+ "Record cgroup events"),
OPT_BOOLEAN(0, "switch-events", &record.opts.record_switch_events,
"Record context switch events"),
OPT_BOOLEAN_FLAG(0, "all-kernel", &record.opts.all_kernel,
@@ -2433,7 +2455,6 @@ int cmd_record(int argc, const char **argv)
# undef REASON
#endif
- CPU_ZERO(&rec->affinity_mask);
rec->opts.affinity = PERF_AFFINITY_SYS;
rec->evlist = evlist__new();
@@ -2499,6 +2520,16 @@ int cmd_record(int argc, const char **argv)
symbol__init(NULL);
+ if (rec->opts.affinity != PERF_AFFINITY_SYS) {
+ rec->affinity_mask.nbits = cpu__max_cpu();
+ rec->affinity_mask.bits = bitmap_alloc(rec->affinity_mask.nbits);
+ if (!rec->affinity_mask.bits) {
+ pr_err("Failed to allocate thread mask for %zd cpus\n", rec->affinity_mask.nbits);
+ return -ENOMEM;
+ }
+ pr_debug2("thread mask[%zd]: empty\n", rec->affinity_mask.nbits);
+ }
+
err = record__auxtrace_init(rec);
if (err)
goto out;
@@ -2613,6 +2644,7 @@ int cmd_record(int argc, const char **argv)
err = __cmd_record(&record, argc, argv);
out:
+ bitmap_free(rec->affinity_mask.bits);
evlist__delete(rec->evlist);
symbol__exit();
auxtrace_record__free(rec->itr);
diff --git a/tools/perf/builtin-report.c b/tools/perf/builtin-report.c
index 387311c67264..26d8fc27e427 100644
--- a/tools/perf/builtin-report.c
+++ b/tools/perf/builtin-report.c
@@ -104,6 +104,7 @@ struct report {
bool symbol_ipc;
bool total_cycles_mode;
struct block_report *block_reports;
+ int nr_block_reports;
};
static int report__config(const char *var, const char *value, void *cb)
@@ -185,24 +186,23 @@ static int hist_iter__branch_callback(struct hist_entry_iter *iter,
{
struct hist_entry *he = iter->he;
struct report *rep = arg;
- struct branch_info *bi;
+ struct branch_info *bi = he->branch_info;
struct perf_sample *sample = iter->sample;
struct evsel *evsel = iter->evsel;
int err;
+ branch_type_count(&rep->brtype_stat, &bi->flags,
+ bi->from.addr, bi->to.addr);
+
if (!ui__has_annotation() && !rep->symbol_ipc)
return 0;
- bi = he->branch_info;
err = addr_map_symbol__inc_samples(&bi->from, sample, evsel);
if (err)
goto out;
err = addr_map_symbol__inc_samples(&bi->to, sample, evsel);
- branch_type_count(&rep->brtype_stat, &bi->flags,
- bi->from.addr, bi->to.addr);
-
out:
return err;
}
@@ -412,10 +412,10 @@ static int report__setup_sample_type(struct report *rep)
PERF_SAMPLE_BRANCH_ANY))
rep->nonany_branch_mode = true;
-#ifndef HAVE_LIBUNWIND_SUPPORT
+#if !defined(HAVE_LIBUNWIND_SUPPORT) && !defined(HAVE_DWARF_SUPPORT)
if (dwarf_callchain_users) {
- ui__warning("Please install libunwind development packages "
- "during the perf build.\n");
+ ui__warning("Please install libunwind or libdw "
+ "development packages during the perf build.\n");
}
#endif
@@ -635,7 +635,7 @@ static int report__browse_hists(struct report *rep)
* Usually "ret" is the last pressed key, and we only
* care if the key notifies us to switch data file.
*/
- if (ret != K_SWITCH_INPUT_DATA)
+ if (ret != K_SWITCH_INPUT_DATA && ret != K_RELOAD)
ret = 0;
break;
case 2:
@@ -966,8 +966,19 @@ static int __cmd_report(struct report *rep)
report__output_resort(rep);
if (rep->total_cycles_mode) {
+ int block_hpps[6] = {
+ PERF_HPP_REPORT__BLOCK_TOTAL_CYCLES_PCT,
+ PERF_HPP_REPORT__BLOCK_LBR_CYCLES,
+ PERF_HPP_REPORT__BLOCK_CYCLES_PCT,
+ PERF_HPP_REPORT__BLOCK_AVG_CYCLES,
+ PERF_HPP_REPORT__BLOCK_RANGE,
+ PERF_HPP_REPORT__BLOCK_DSO,
+ };
+
rep->block_reports = block_info__create_report(session->evlist,
- rep->total_cycles);
+ rep->total_cycles,
+ block_hpps, 6,
+ &rep->nr_block_reports);
if (!rep->block_reports)
return -1;
}
@@ -1076,6 +1087,7 @@ int cmd_report(int argc, const char **argv)
struct stat st;
bool has_br_stack = false;
int branch_mode = -1;
+ int last_key = 0;
bool branch_call_mode = false;
#define CALLCHAIN_DEFAULT_OPT "graph,0.5,caller,function,percent"
static const char report_callchain_help[] = "Display call graph (stack chain/backtrace):\n\n"
@@ -1093,6 +1105,7 @@ int cmd_report(int argc, const char **argv)
.mmap2 = perf_event__process_mmap2,
.comm = perf_event__process_comm,
.namespaces = perf_event__process_namespaces,
+ .cgroup = perf_event__process_cgroup,
.exit = perf_event__process_exit,
.fork = perf_event__process_fork,
.lost = perf_event__process_lost,
@@ -1163,7 +1176,8 @@ int cmd_report(int argc, const char **argv)
report_callchain_help, &report_parse_callchain_opt,
callchain_default_opt),
OPT_BOOLEAN(0, "children", &symbol_conf.cumulate_callchain,
- "Accumulate callchains of children and show total overhead as well"),
+ "Accumulate callchains of children and show total overhead as well. "
+ "Enabled by default, use --no-children to disable."),
OPT_INTEGER(0, "max-stack", &report.max_stack,
"Set the maximum stack depth when parsing the callchain, "
"anything beyond the specified depth will be ignored. "
@@ -1206,10 +1220,18 @@ int cmd_report(int argc, const char **argv)
"Display raw encoding of assembly instructions (default)"),
OPT_STRING('M', "disassembler-style", &report.annotation_opts.disassembler_style, "disassembler style",
"Specify disassembler style (e.g. -M intel for intel syntax)"),
+ OPT_STRING(0, "prefix", &report.annotation_opts.prefix, "prefix",
+ "Add prefix to source file path names in programs (with --prefix-strip)"),
+ OPT_STRING(0, "prefix-strip", &report.annotation_opts.prefix_strip, "N",
+ "Strip first N entries of source file path name in programs (with --prefix)"),
OPT_BOOLEAN(0, "show-total-period", &symbol_conf.show_total_period,
"Show a column with the sum of periods"),
OPT_BOOLEAN_SET(0, "group", &symbol_conf.event_group, &report.group_set,
"Show event group information together"),
+ OPT_INTEGER(0, "group-sort-idx", &symbol_conf.group_sort_idx,
+ "Sort the output by the event at the index n in group. "
+ "If n is invalid, sort by the first event. "
+ "WARNING: should be used on grouped events."),
OPT_CALLBACK_NOOPT('b', "branch-stack", &branch_mode, "",
"use branch records for per branch histogram filling",
parse_branch_mode),
@@ -1285,6 +1307,9 @@ int cmd_report(int argc, const char **argv)
report.symbol_filter_str = argv[0];
}
+ if (annotate_check_args(&report.annotation_opts) < 0)
+ return -EINVAL;
+
if (report.mmaps_mode)
report.tasks_mode = true;
@@ -1349,6 +1374,12 @@ repeat:
setup_forced_leader(&report, session->evlist);
+ if (symbol_conf.group_sort_idx && !session->evlist->nr_groups) {
+ parse_options_usage(NULL, options, "group-sort-idx", 0);
+ ret = -EINVAL;
+ goto error;
+ }
+
if (itrace_synth_opts.last_branch)
has_br_stack = true;
@@ -1450,7 +1481,8 @@ repeat:
sort_order = sort_tmp;
}
- if (setup_sorting(session->evlist) < 0) {
+ if ((last_key != K_SWITCH_INPUT_DATA && last_key != K_RELOAD) &&
+ (setup_sorting(session->evlist) < 0)) {
if (sort_order)
parse_options_usage(report_usage, options, "s", 1);
if (field_order)
@@ -1497,7 +1529,7 @@ repeat:
symbol_conf.priv_size += sizeof(u32);
symbol_conf.sort_by_name = true;
}
- annotation_config__init();
+ annotation_config__init(&report.annotation_opts);
}
if (symbol__init(&session->header.env) < 0)
@@ -1528,8 +1560,9 @@ repeat:
sort__setup_elide(stdout);
ret = __cmd_report(&report);
- if (ret == K_SWITCH_INPUT_DATA) {
+ if (ret == K_SWITCH_INPUT_DATA || ret == K_RELOAD) {
perf_session__delete(session);
+ last_key = K_SWITCH_INPUT_DATA;
goto repeat;
} else
ret = 0;
@@ -1540,8 +1573,11 @@ error:
zfree(&report.ptime_range);
}
- if (report.block_reports)
- zfree(&report.block_reports);
+ if (report.block_reports) {
+ block_info__free_report(report.block_reports,
+ report.nr_block_reports);
+ report.block_reports = NULL;
+ }
zstd_fini(&(session->zstd_data));
perf_session__delete(session);
diff --git a/tools/perf/builtin-sched.c b/tools/perf/builtin-sched.c
index 8a12d71364c3..82fcc2c15fe4 100644
--- a/tools/perf/builtin-sched.c
+++ b/tools/perf/builtin-sched.c
@@ -51,6 +51,9 @@
#define SYM_LEN 129
#define MAX_PID 1024000
+static const char *cpu_list;
+static DECLARE_BITMAP(cpu_bitmap, MAX_NR_CPUS);
+
struct sched_atom;
struct task_desc {
@@ -2008,6 +2011,9 @@ static void timehist_print_sample(struct perf_sched *sched,
char nstr[30];
u64 wait_time;
+ if (cpu_list && !test_bit(sample->cpu, cpu_bitmap))
+ return;
+
timestamp__scnprintf_usec(t, tstr, sizeof(tstr));
printf("%15s [%04d] ", tstr, sample->cpu);
@@ -2994,6 +3000,12 @@ static int perf_sched__timehist(struct perf_sched *sched)
if (IS_ERR(session))
return PTR_ERR(session);
+ if (cpu_list) {
+ err = perf_session__cpu_bitmap(session, cpu_list, cpu_bitmap);
+ if (err < 0)
+ goto out;
+ }
+
evlist = session->evlist;
symbol__init(&session->header.env);
@@ -3429,6 +3441,7 @@ int cmd_sched(int argc, const char **argv)
"analyze events only for given process id(s)"),
OPT_STRING('t', "tid", &symbol_conf.tid_list_str, "tid[,tid...]",
"analyze events only for given thread id(s)"),
+ OPT_STRING('C', "cpu", &cpu_list, "cpu", "list of cpus to profile"),
OPT_PARENT(sched_options)
};
diff --git a/tools/perf/builtin-script.c b/tools/perf/builtin-script.c
index e2406b291c1c..1f57a7ecdf3d 100644
--- a/tools/perf/builtin-script.c
+++ b/tools/perf/builtin-script.c
@@ -63,7 +63,9 @@
static char const *script_name;
static char const *generate_script_lang;
static bool reltime;
+static bool deltatime;
static u64 initial_time;
+static u64 previous_time;
static bool debug_mode;
static u64 last_timestamp;
static u64 nr_unordered;
@@ -704,6 +706,13 @@ static int perf_sample__fprintf_start(struct perf_sample *sample,
if (!initial_time)
initial_time = sample->time;
t = sample->time - initial_time;
+ } else if (deltatime) {
+ if (previous_time)
+ t = sample->time - previous_time;
+ else {
+ t = 0;
+ }
+ previous_time = sample->time;
}
nsecs = t;
secs = nsecs / NSEC_PER_SEC;
@@ -735,6 +744,7 @@ static int perf_sample__fprintf_brstack(struct perf_sample *sample,
struct perf_event_attr *attr, FILE *fp)
{
struct branch_stack *br = sample->branch_stack;
+ struct branch_entry *entries = perf_sample__branch_entries(sample);
struct addr_location alf, alt;
u64 i, from, to;
int printed = 0;
@@ -743,8 +753,8 @@ static int perf_sample__fprintf_brstack(struct perf_sample *sample,
return 0;
for (i = 0; i < br->nr; i++) {
- from = br->entries[i].from;
- to = br->entries[i].to;
+ from = entries[i].from;
+ to = entries[i].to;
if (PRINT_FIELD(DSO)) {
memset(&alf, 0, sizeof(alf));
@@ -768,10 +778,10 @@ static int perf_sample__fprintf_brstack(struct perf_sample *sample,
}
printed += fprintf(fp, "/%c/%c/%c/%d ",
- mispred_str( br->entries + i),
- br->entries[i].flags.in_tx? 'X' : '-',
- br->entries[i].flags.abort? 'A' : '-',
- br->entries[i].flags.cycles);
+ mispred_str(entries + i),
+ entries[i].flags.in_tx ? 'X' : '-',
+ entries[i].flags.abort ? 'A' : '-',
+ entries[i].flags.cycles);
}
return printed;
@@ -782,6 +792,7 @@ static int perf_sample__fprintf_brstacksym(struct perf_sample *sample,
struct perf_event_attr *attr, FILE *fp)
{
struct branch_stack *br = sample->branch_stack;
+ struct branch_entry *entries = perf_sample__branch_entries(sample);
struct addr_location alf, alt;
u64 i, from, to;
int printed = 0;
@@ -793,8 +804,8 @@ static int perf_sample__fprintf_brstacksym(struct perf_sample *sample,
memset(&alf, 0, sizeof(alf));
memset(&alt, 0, sizeof(alt));
- from = br->entries[i].from;
- to = br->entries[i].to;
+ from = entries[i].from;
+ to = entries[i].to;
thread__find_symbol_fb(thread, sample->cpumode, from, &alf);
thread__find_symbol_fb(thread, sample->cpumode, to, &alt);
@@ -813,10 +824,10 @@ static int perf_sample__fprintf_brstacksym(struct perf_sample *sample,
printed += fprintf(fp, ")");
}
printed += fprintf(fp, "/%c/%c/%c/%d ",
- mispred_str( br->entries + i),
- br->entries[i].flags.in_tx? 'X' : '-',
- br->entries[i].flags.abort? 'A' : '-',
- br->entries[i].flags.cycles);
+ mispred_str(entries + i),
+ entries[i].flags.in_tx ? 'X' : '-',
+ entries[i].flags.abort ? 'A' : '-',
+ entries[i].flags.cycles);
}
return printed;
@@ -827,6 +838,7 @@ static int perf_sample__fprintf_brstackoff(struct perf_sample *sample,
struct perf_event_attr *attr, FILE *fp)
{
struct branch_stack *br = sample->branch_stack;
+ struct branch_entry *entries = perf_sample__branch_entries(sample);
struct addr_location alf, alt;
u64 i, from, to;
int printed = 0;
@@ -838,8 +850,8 @@ static int perf_sample__fprintf_brstackoff(struct perf_sample *sample,
memset(&alf, 0, sizeof(alf));
memset(&alt, 0, sizeof(alt));
- from = br->entries[i].from;
- to = br->entries[i].to;
+ from = entries[i].from;
+ to = entries[i].to;
if (thread__find_map_fb(thread, sample->cpumode, from, &alf) &&
!alf.map->dso->adjust_symbols)
@@ -862,10 +874,10 @@ static int perf_sample__fprintf_brstackoff(struct perf_sample *sample,
printed += fprintf(fp, ")");
}
printed += fprintf(fp, "/%c/%c/%c/%d ",
- mispred_str(br->entries + i),
- br->entries[i].flags.in_tx ? 'X' : '-',
- br->entries[i].flags.abort ? 'A' : '-',
- br->entries[i].flags.cycles);
+ mispred_str(entries + i),
+ entries[i].flags.in_tx ? 'X' : '-',
+ entries[i].flags.abort ? 'A' : '-',
+ entries[i].flags.cycles);
}
return printed;
@@ -1053,6 +1065,7 @@ static int perf_sample__fprintf_brstackinsn(struct perf_sample *sample,
struct machine *machine, FILE *fp)
{
struct branch_stack *br = sample->branch_stack;
+ struct branch_entry *entries = perf_sample__branch_entries(sample);
u64 start, end;
int i, insn, len, nr, ilen, printed = 0;
struct perf_insn x;
@@ -1073,31 +1086,31 @@ static int perf_sample__fprintf_brstackinsn(struct perf_sample *sample,
printed += fprintf(fp, "%c", '\n');
/* Handle first from jump, of which we don't know the entry. */
- len = grab_bb(buffer, br->entries[nr-1].from,
- br->entries[nr-1].from,
+ len = grab_bb(buffer, entries[nr-1].from,
+ entries[nr-1].from,
machine, thread, &x.is64bit, &x.cpumode, false);
if (len > 0) {
- printed += ip__fprintf_sym(br->entries[nr - 1].from, thread,
+ printed += ip__fprintf_sym(entries[nr - 1].from, thread,
x.cpumode, x.cpu, &lastsym, attr, fp);
- printed += ip__fprintf_jump(br->entries[nr - 1].from, &br->entries[nr - 1],
+ printed += ip__fprintf_jump(entries[nr - 1].from, &entries[nr - 1],
&x, buffer, len, 0, fp, &total_cycles);
if (PRINT_FIELD(SRCCODE))
- printed += print_srccode(thread, x.cpumode, br->entries[nr - 1].from);
+ printed += print_srccode(thread, x.cpumode, entries[nr - 1].from);
}
/* Print all blocks */
for (i = nr - 2; i >= 0; i--) {
- if (br->entries[i].from || br->entries[i].to)
+ if (entries[i].from || entries[i].to)
pr_debug("%d: %" PRIx64 "-%" PRIx64 "\n", i,
- br->entries[i].from,
- br->entries[i].to);
- start = br->entries[i + 1].to;
- end = br->entries[i].from;
+ entries[i].from,
+ entries[i].to);
+ start = entries[i + 1].to;
+ end = entries[i].from;
len = grab_bb(buffer, start, end, machine, thread, &x.is64bit, &x.cpumode, false);
/* Patch up missing kernel transfers due to ring filters */
if (len == -ENXIO && i > 0) {
- end = br->entries[--i].from;
+ end = entries[--i].from;
pr_debug("\tpatching up to %" PRIx64 "-%" PRIx64 "\n", start, end);
len = grab_bb(buffer, start, end, machine, thread, &x.is64bit, &x.cpumode, false);
}
@@ -1110,7 +1123,7 @@ static int perf_sample__fprintf_brstackinsn(struct perf_sample *sample,
printed += ip__fprintf_sym(ip, thread, x.cpumode, x.cpu, &lastsym, attr, fp);
if (ip == end) {
- printed += ip__fprintf_jump(ip, &br->entries[i], &x, buffer + off, len - off, ++insn, fp,
+ printed += ip__fprintf_jump(ip, &entries[i], &x, buffer + off, len - off, ++insn, fp,
&total_cycles);
if (PRINT_FIELD(SRCCODE))
printed += print_srccode(thread, x.cpumode, ip);
@@ -1134,9 +1147,9 @@ static int perf_sample__fprintf_brstackinsn(struct perf_sample *sample,
* Hit the branch? In this case we are already done, and the target
* has not been executed yet.
*/
- if (br->entries[0].from == sample->ip)
+ if (entries[0].from == sample->ip)
goto out;
- if (br->entries[0].flags.abort)
+ if (entries[0].flags.abort)
goto out;
/*
@@ -1147,7 +1160,7 @@ static int perf_sample__fprintf_brstackinsn(struct perf_sample *sample,
* between final branch and sample. When this happens just
* continue walking after the last TO until we hit a branch.
*/
- start = br->entries[0].to;
+ start = entries[0].to;
end = sample->ip;
if (end < start) {
/* Missing jump. Scan 128 bytes for the next branch */
@@ -1681,6 +1694,7 @@ struct perf_script {
bool show_lost_events;
bool show_round_events;
bool show_bpf_events;
+ bool show_cgroup_events;
bool allocated;
bool per_event_dump;
struct evswitch evswitch;
@@ -2199,6 +2213,41 @@ out:
return ret;
}
+static int process_cgroup_event(struct perf_tool *tool,
+ union perf_event *event,
+ struct perf_sample *sample,
+ struct machine *machine)
+{
+ struct thread *thread;
+ struct perf_script *script = container_of(tool, struct perf_script, tool);
+ struct perf_session *session = script->session;
+ struct evsel *evsel = perf_evlist__id2evsel(session->evlist, sample->id);
+ int ret = -1;
+
+ thread = machine__findnew_thread(machine, sample->pid, sample->tid);
+ if (thread == NULL) {
+ pr_debug("problem processing CGROUP event, skipping it.\n");
+ return -1;
+ }
+
+ if (perf_event__process_cgroup(tool, event, sample, machine) < 0)
+ goto out;
+
+ if (!evsel->core.attr.sample_id_all) {
+ sample->cpu = 0;
+ sample->time = 0;
+ }
+ if (!filter_cpu(sample)) {
+ perf_sample__fprintf_start(sample, thread, evsel,
+ PERF_RECORD_CGROUP, stdout);
+ perf_event__fprintf(event, stdout);
+ }
+ ret = 0;
+out:
+ thread__put(thread);
+ return ret;
+}
+
static int process_fork_event(struct perf_tool *tool,
union perf_event *event,
struct perf_sample *sample,
@@ -2538,6 +2587,8 @@ static int __cmd_script(struct perf_script *script)
script->tool.context_switch = process_switch_event;
if (script->show_namespace_events)
script->tool.namespaces = process_namespaces_event;
+ if (script->show_cgroup_events)
+ script->tool.cgroup = process_cgroup_event;
if (script->show_lost_events)
script->tool.lost = process_lost_event;
if (script->show_round_events) {
@@ -3214,10 +3265,10 @@ static char *get_script_path(const char *script_root, const char *suffix)
__script_root = get_script_root(script_dirent, suffix);
if (__script_root && !strcmp(script_root, __script_root)) {
free(__script_root);
- closedir(lang_dir);
closedir(scripts_dir);
scnprintf(script_path, MAXPATHLEN, "%s/%s",
lang_path, script_dirent->d_name);
+ closedir(lang_dir);
return strdup(script_path);
}
free(__script_root);
@@ -3463,6 +3514,7 @@ int cmd_script(int argc, const char **argv)
.mmap2 = perf_event__process_mmap2,
.comm = perf_event__process_comm,
.namespaces = perf_event__process_namespaces,
+ .cgroup = perf_event__process_cgroup,
.exit = perf_event__process_exit,
.fork = perf_event__process_fork,
.attr = process_attr,
@@ -3551,6 +3603,7 @@ int cmd_script(int argc, const char **argv)
"anything beyond the specified depth will be ignored. "
"Default: kernel.perf_event_max_stack or " __stringify(PERF_MAX_STACK_DEPTH)),
OPT_BOOLEAN(0, "reltime", &reltime, "Show time stamps relative to start"),
+ OPT_BOOLEAN(0, "deltatime", &deltatime, "Show time stamps relative to previous event"),
OPT_BOOLEAN('I', "show-info", &show_full_info,
"display extended information from perf.data file"),
OPT_BOOLEAN('\0', "show-kernel-path", &symbol_conf.show_kernel_path,
@@ -3563,6 +3616,8 @@ int cmd_script(int argc, const char **argv)
"Show context switch events (if recorded)"),
OPT_BOOLEAN('\0', "show-namespace-events", &script.show_namespace_events,
"Show namespace events (if recorded)"),
+ OPT_BOOLEAN('\0', "show-cgroup-events", &script.show_cgroup_events,
+ "Show cgroup events (if recorded)"),
OPT_BOOLEAN('\0', "show-lost-events", &script.show_lost_events,
"Show lost events (if recorded)"),
OPT_BOOLEAN('\0', "show-round-events", &script.show_round_events,
@@ -3647,6 +3702,13 @@ int cmd_script(int argc, const char **argv)
}
}
+ if (reltime && deltatime) {
+ fprintf(stderr,
+ "reltime and deltatime - the two don't get along well. "
+ "Please limit to --reltime or --deltatime.\n");
+ return -1;
+ }
+
if (itrace_synth_opts.callchain &&
itrace_synth_opts.callchain_sz > scripting_max_stack)
scripting_max_stack = itrace_synth_opts.callchain_sz;
diff --git a/tools/perf/builtin-stat.c b/tools/perf/builtin-stat.c
index a098c2ebf4ea..ec053dc1e35c 100644
--- a/tools/perf/builtin-stat.c
+++ b/tools/perf/builtin-stat.c
@@ -929,6 +929,10 @@ static struct option stat_options[] = {
OPT_BOOLEAN_FLAG(0, "all-user", &stat_config.all_user,
"Configure all used events to run in user space.",
PARSE_OPT_EXCLUSIVE),
+ OPT_BOOLEAN(0, "percore-show-thread", &stat_config.percore_show_thread,
+ "Use with 'percore' event qualifier to show the event "
+ "counts of one hardware thread by sum up total hardware "
+ "threads of same physical core"),
OPT_END()
};
diff --git a/tools/perf/builtin-top.c b/tools/perf/builtin-top.c
index 795e353de095..289cf83e658a 100644
--- a/tools/perf/builtin-top.c
+++ b/tools/perf/builtin-top.c
@@ -143,7 +143,7 @@ static int perf_top__parse_source(struct perf_top *top, struct hist_entry *he)
return err;
}
- err = symbol__annotate(&he->ms, evsel, 0, &top->annotation_opts, NULL);
+ err = symbol__annotate(&he->ms, evsel, &top->annotation_opts, NULL);
if (err == 0) {
top->sym_filter_entry = he;
} else {
@@ -616,6 +616,7 @@ static void *display_thread_tui(void *arg)
.arg = top,
.refresh = top->delay_secs,
};
+ int ret;
/* In order to read symbols from other namespaces perf to needs to call
* setns(2). This isn't permitted if the struct_fs has multiple users.
@@ -626,6 +627,7 @@ static void *display_thread_tui(void *arg)
prctl(PR_SET_NAME, "perf-top-UI", 0, 0, 0);
+repeat:
perf_top__sort_new_samples(top);
/*
@@ -638,13 +640,18 @@ static void *display_thread_tui(void *arg)
hists->uid_filter_str = top->record_opts.target.uid_str;
}
- perf_evlist__tui_browse_hists(top->evlist, help, &hbt,
+ ret = perf_evlist__tui_browse_hists(top->evlist, help, &hbt,
top->min_percent,
&top->session->header.env,
!top->record_opts.overwrite,
&top->annotation_opts);
- stop_top();
+ if (ret == K_RELOAD) {
+ top->zero = true;
+ goto repeat;
+ } else
+ stop_top();
+
return NULL;
}
@@ -684,7 +691,9 @@ repeat:
delay_msecs = top->delay_secs * MSEC_PER_SEC;
set_term_quiet_input(&save);
/* trash return*/
- getc(stdin);
+ clearerr(stdin);
+ if (poll(&stdin_poll, 1, 0) > 0)
+ getc(stdin);
while (!done) {
perf_top__print_sym_table(top);
@@ -1244,6 +1253,14 @@ static int __cmd_top(struct perf_top *top)
if (opts->record_namespaces)
top->tool.namespace_events = true;
+ if (opts->record_cgroup) {
+#ifdef HAVE_FILE_HANDLE
+ top->tool.cgroup_events = true;
+#else
+ pr_err("cgroup tracking is not supported.\n");
+ return -1;
+#endif
+ }
ret = perf_event__synthesize_bpf_events(top->session, perf_event__process,
&top->session->machines.host,
@@ -1251,6 +1268,11 @@ static int __cmd_top(struct perf_top *top)
if (ret < 0)
pr_debug("Couldn't synthesize BPF events: Pre-existing BPF programs won't have symbols resolved.\n");
+ ret = perf_event__synthesize_cgroups(&top->tool, perf_event__process,
+ &top->session->machines.host);
+ if (ret < 0)
+ pr_debug("Couldn't synthesize cgroup events.\n");
+
machine__synthesize_threads(&top->session->machines.host, &opts->target,
top->evlist->core.threads, false,
top->nr_threads_synthesize);
@@ -1512,6 +1534,10 @@ int cmd_top(int argc, const char **argv)
"objdump binary to use for disassembly and annotations"),
OPT_STRING('M', "disassembler-style", &top.annotation_opts.disassembler_style, "disassembler style",
"Specify disassembler style (e.g. -M intel for intel syntax)"),
+ OPT_STRING(0, "prefix", &top.annotation_opts.prefix, "prefix",
+ "Add prefix to source file path names in programs (with --prefix-strip)"),
+ OPT_STRING(0, "prefix-strip", &top.annotation_opts.prefix_strip, "N",
+ "Strip first N entries of source file path name in programs (with --prefix)"),
OPT_STRING('u', "uid", &target->uid_str, "user", "user to profile"),
OPT_CALLBACK(0, "percent-limit", &top, "percent",
"Don't show entries under that percent", parse_percent_limit),
@@ -1539,6 +1565,12 @@ int cmd_top(int argc, const char **argv)
"number of thread to run event synthesize"),
OPT_BOOLEAN(0, "namespaces", &opts->record_namespaces,
"Record namespaces events"),
+ OPT_BOOLEAN(0, "all-cgroups", &opts->record_cgroup,
+ "Record cgroup events"),
+ OPT_INTEGER(0, "group-sort-idx", &symbol_conf.group_sort_idx,
+ "Sort the output by the event at the index n in group. "
+ "If n is invalid, sort by the first event. "
+ "WARNING: should be used on grouped events."),
OPTS_EVSWITCH(&top.evswitch),
OPT_END()
};
@@ -1582,6 +1614,9 @@ int cmd_top(int argc, const char **argv)
if (argc)
usage_with_options(top_usage, options);
+ if (annotate_check_args(&top.annotation_opts) < 0)
+ goto out_delete_evlist;
+
if (!top.evlist->core.nr_entries &&
perf_evlist__add_default(top.evlist) < 0) {
pr_err("Not enough memory for event selector list\n");
@@ -1676,7 +1711,7 @@ int cmd_top(int argc, const char **argv)
if (status < 0)
goto out_delete_evlist;
- annotation_config__init();
+ annotation_config__init(&top.annotation_opts);
symbol_conf.try_vmlinux_path = (symbol_conf.vmlinux_name == NULL);
status = symbol__init(NULL);
diff --git a/tools/perf/builtin-trace.c b/tools/perf/builtin-trace.c
index 46a72ecac427..01d542007c8b 100644
--- a/tools/perf/builtin-trace.c
+++ b/tools/perf/builtin-trace.c
@@ -1065,7 +1065,9 @@ static struct syscall_fmt syscall_fmts[] = {
{ .name = "poll", .timeout = true, },
{ .name = "ppoll", .timeout = true, },
{ .name = "prctl",
- .arg = { [0] = { .scnprintf = SCA_PRCTL_OPTION, /* option */ },
+ .arg = { [0] = { .scnprintf = SCA_PRCTL_OPTION, /* option */
+ .strtoul = STUL_STRARRAY,
+ .parm = &strarray__prctl_options, },
[1] = { .scnprintf = SCA_PRCTL_ARG2, /* arg2 */ },
[2] = { .scnprintf = SCA_PRCTL_ARG3, /* arg3 */ }, }, },
{ .name = "pread", .alias = "pread64", },
diff --git a/tools/perf/check-headers.sh b/tools/perf/check-headers.sh
index 68039a96c1dc..cf147db4e5ca 100755
--- a/tools/perf/check-headers.sh
+++ b/tools/perf/check-headers.sh
@@ -13,6 +13,7 @@ include/uapi/linux/kcmp.h
include/uapi/linux/kvm.h
include/uapi/linux/in.h
include/uapi/linux/mount.h
+include/uapi/linux/openat2.h
include/uapi/linux/perf_event.h
include/uapi/linux/prctl.h
include/uapi/linux/sched.h
@@ -21,7 +22,9 @@ include/uapi/linux/usbdevice_fs.h
include/uapi/linux/vhost.h
include/uapi/sound/asound.h
include/linux/bits.h
+include/vdso/bits.h
include/linux/const.h
+include/vdso/const.h
include/linux/hash.h
include/uapi/linux/hw_breakpoint.h
arch/x86/include/asm/disabled-features.h
@@ -114,6 +117,7 @@ check arch/x86/lib/memcpy_64.S '-I "^EXPORT_SYMBOL" -I "^#include <asm/ex
check arch/x86/lib/memset_64.S '-I "^EXPORT_SYMBOL" -I "^#include <asm/export.h>" -I"^SYM_FUNC_START\(_LOCAL\)*(memset_\(erms\|orig\))"'
check include/uapi/asm-generic/mman.h '-I "^#include <\(uapi/\)*asm-generic/mman-common\(-tools\)*.h>"'
check include/uapi/linux/mman.h '-I "^#include <\(uapi/\)*asm/mman.h>"'
+check include/linux/build_bug.h '-I "^#\(ifndef\|endif\)\( \/\/\)* static_assert$"'
check include/linux/ctype.h '-I "isdigit("'
check lib/ctype.c '-I "^EXPORT_SYMBOL" -I "^#include <linux/export.h>" -B'
check arch/x86/include/asm/inat.h '-I "^#include [\"<]\(asm/\)*inat_types.h[\">]"'
diff --git a/tools/perf/examples/bpf/5sec.c b/tools/perf/examples/bpf/5sec.c
index b9c203219691..65c4ff6892d9 100644
--- a/tools/perf/examples/bpf/5sec.c
+++ b/tools/perf/examples/bpf/5sec.c
@@ -39,11 +39,13 @@
Copyright (C) 2018 Red Hat, Inc., Arnaldo Carvalho de Melo <acme@redhat.com>
*/
-#include <bpf.h>
+#include <bpf/bpf.h>
-int probe(hrtimer_nanosleep, rqtp->tv_sec)(void *ctx, int err, long sec)
+#define NSEC_PER_SEC 1000000000L
+
+int probe(hrtimer_nanosleep, rqtp)(void *ctx, int err, long long sec)
{
- return sec == 5;
+ return sec / NSEC_PER_SEC == 5ULL;
}
license(GPL);
diff --git a/tools/perf/examples/bpf/empty.c b/tools/perf/examples/bpf/empty.c
index 3776d26db9e7..7d7fb0c9fe76 100644
--- a/tools/perf/examples/bpf/empty.c
+++ b/tools/perf/examples/bpf/empty.c
@@ -1,3 +1,3 @@
-#include <bpf.h>
+#include <bpf/bpf.h>
license(GPL);
diff --git a/tools/perf/examples/bpf/sys_enter_openat.c b/tools/perf/examples/bpf/sys_enter_openat.c
index 9cd124b09392..c4481c390d23 100644
--- a/tools/perf/examples/bpf/sys_enter_openat.c
+++ b/tools/perf/examples/bpf/sys_enter_openat.c
@@ -14,7 +14,7 @@
* the return value.
*/
-#include <bpf.h>
+#include <bpf/bpf.h>
struct syscall_enter_openat_args {
unsigned long long unused;
diff --git a/tools/perf/lib/Build b/tools/perf/lib/Build
deleted file mode 100644
index 2ef9a4ec6d99..000000000000
--- a/tools/perf/lib/Build
+++ /dev/null
@@ -1,13 +0,0 @@
-libperf-y += core.o
-libperf-y += cpumap.o
-libperf-y += threadmap.o
-libperf-y += evsel.o
-libperf-y += evlist.o
-libperf-y += mmap.o
-libperf-y += zalloc.o
-libperf-y += xyarray.o
-libperf-y += lib.o
-
-$(OUTPUT)zalloc.o: ../../lib/zalloc.c FORCE
- $(call rule_mkdir)
- $(call if_changed_dep,cc_o_c)
diff --git a/tools/perf/lib/Documentation/Makefile b/tools/perf/lib/Documentation/Makefile
deleted file mode 100644
index 586425a88795..000000000000
--- a/tools/perf/lib/Documentation/Makefile
+++ /dev/null
@@ -1,7 +0,0 @@
-all:
- rst2man man/libperf.rst > man/libperf.7
- rst2pdf tutorial/tutorial.rst
-
-clean:
- rm -f man/libperf.7
- rm -f tutorial/tutorial.pdf
diff --git a/tools/perf/lib/Documentation/man/libperf.rst b/tools/perf/lib/Documentation/man/libperf.rst
deleted file mode 100644
index 09a270fccb9c..000000000000
--- a/tools/perf/lib/Documentation/man/libperf.rst
+++ /dev/null
@@ -1,100 +0,0 @@
-.. SPDX-License-Identifier: (LGPL-2.1 OR BSD-2-Clause)
-
-libperf
-
-The libperf library provides an API to access the linux kernel perf
-events subsystem. It provides the following high level objects:
-
- - struct perf_cpu_map
- - struct perf_thread_map
- - struct perf_evlist
- - struct perf_evsel
-
-reference
-=========
-Function reference by header files:
-
-perf/core.h
------------
-.. code-block:: c
-
- typedef int (\*libperf_print_fn_t)(enum libperf_print_level level,
- const char \*, va_list ap);
-
- void libperf_set_print(libperf_print_fn_t fn);
-
-perf/cpumap.h
--------------
-.. code-block:: c
-
- struct perf_cpu_map \*perf_cpu_map__dummy_new(void);
- struct perf_cpu_map \*perf_cpu_map__new(const char \*cpu_list);
- struct perf_cpu_map \*perf_cpu_map__read(FILE \*file);
- struct perf_cpu_map \*perf_cpu_map__get(struct perf_cpu_map \*map);
- void perf_cpu_map__put(struct perf_cpu_map \*map);
- int perf_cpu_map__cpu(const struct perf_cpu_map \*cpus, int idx);
- int perf_cpu_map__nr(const struct perf_cpu_map \*cpus);
- perf_cpu_map__for_each_cpu(cpu, idx, cpus)
-
-perf/threadmap.h
-----------------
-.. code-block:: c
-
- struct perf_thread_map \*perf_thread_map__new_dummy(void);
- void perf_thread_map__set_pid(struct perf_thread_map \*map, int thread, pid_t pid);
- char \*perf_thread_map__comm(struct perf_thread_map \*map, int thread);
- struct perf_thread_map \*perf_thread_map__get(struct perf_thread_map \*map);
- void perf_thread_map__put(struct perf_thread_map \*map);
-
-perf/evlist.h
--------------
-.. code-block::
-
- void perf_evlist__init(struct perf_evlist \*evlist);
- void perf_evlist__add(struct perf_evlist \*evlist,
- struct perf_evsel \*evsel);
- void perf_evlist__remove(struct perf_evlist \*evlist,
- struct perf_evsel \*evsel);
- struct perf_evlist \*perf_evlist__new(void);
- void perf_evlist__delete(struct perf_evlist \*evlist);
- struct perf_evsel\* perf_evlist__next(struct perf_evlist \*evlist,
- struct perf_evsel \*evsel);
- int perf_evlist__open(struct perf_evlist \*evlist);
- void perf_evlist__close(struct perf_evlist \*evlist);
- void perf_evlist__enable(struct perf_evlist \*evlist);
- void perf_evlist__disable(struct perf_evlist \*evlist);
- perf_evlist__for_each_evsel(evlist, pos)
- void perf_evlist__set_maps(struct perf_evlist \*evlist,
- struct perf_cpu_map \*cpus,
- struct perf_thread_map \*threads);
-
-perf/evsel.h
-------------
-.. code-block:: c
-
- struct perf_counts_values {
- union {
- struct {
- uint64_t val;
- uint64_t ena;
- uint64_t run;
- };
- uint64_t values[3];
- };
- };
-
- void perf_evsel__init(struct perf_evsel \*evsel,
- struct perf_event_attr \*attr);
- struct perf_evsel \*perf_evsel__new(struct perf_event_attr \*attr);
- void perf_evsel__delete(struct perf_evsel \*evsel);
- int perf_evsel__open(struct perf_evsel \*evsel, struct perf_cpu_map \*cpus,
- struct perf_thread_map \*threads);
- void perf_evsel__close(struct perf_evsel \*evsel);
- int perf_evsel__read(struct perf_evsel \*evsel, int cpu, int thread,
- struct perf_counts_values \*count);
- int perf_evsel__enable(struct perf_evsel \*evsel);
- int perf_evsel__disable(struct perf_evsel \*evsel);
- int perf_evsel__apply_filter(struct perf_evsel \*evsel, const char \*filter);
- struct perf_cpu_map \*perf_evsel__cpus(struct perf_evsel \*evsel);
- struct perf_thread_map \*perf_evsel__threads(struct perf_evsel \*evsel);
- struct perf_event_attr \*perf_evsel__attr(struct perf_evsel \*evsel);
diff --git a/tools/perf/lib/Documentation/tutorial/tutorial.rst b/tools/perf/lib/Documentation/tutorial/tutorial.rst
deleted file mode 100644
index 7be7bc27b385..000000000000
--- a/tools/perf/lib/Documentation/tutorial/tutorial.rst
+++ /dev/null
@@ -1,123 +0,0 @@
-.. SPDX-License-Identifier: (LGPL-2.1 OR BSD-2-Clause)
-
-libperf tutorial
-================
-
-Compile and install libperf from kernel sources
-===============================================
-.. code-block:: bash
-
- git clone git://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
- cd linux/tools/perf/lib
- make
- sudo make install prefix=/usr
-
-Libperf object
-==============
-The libperf library provides several high level objects:
-
-struct perf_cpu_map
- Provides a cpu list abstraction.
-
-struct perf_thread_map
- Provides a thread list abstraction.
-
-struct perf_evsel
- Provides an abstraction for single a perf event.
-
-struct perf_evlist
- Gathers several struct perf_evsel object and performs functions on all of them.
-
-The exported API binds these objects together,
-for full reference see the libperf.7 man page.
-
-Examples
-========
-Examples aim to explain libperf functionality on simple use cases.
-They are based in on a checked out linux kernel git tree:
-
-.. code-block:: bash
-
- $ cd tools/perf/lib/Documentation/tutorial/
- $ ls -d ex-*
- ex-1-compile ex-2-evsel-stat ex-3-evlist-stat
-
-ex-1-compile example
-====================
-This example shows the basic usage of *struct perf_cpu_map*,
-how to create it and display its cpus:
-
-.. code-block:: bash
-
- $ cd ex-1-compile/
- $ make
- gcc -o test test.c -lperf
- $ ./test
- 0 1 2 3 4 5 6 7
-
-
-The full code listing is here:
-
-.. code-block:: c
-
- 1 #include <perf/cpumap.h>
- 2
- 3 int main(int argc, char **Argv)
- 4 {
- 5 struct perf_cpu_map *cpus;
- 6 int cpu, tmp;
- 7
- 8 cpus = perf_cpu_map__new(NULL);
- 9
- 10 perf_cpu_map__for_each_cpu(cpu, tmp, cpus)
- 11 fprintf(stdout, "%d ", cpu);
- 12
- 13 fprintf(stdout, "\n");
- 14
- 15 perf_cpu_map__put(cpus);
- 16 return 0;
- 17 }
-
-
-First you need to include the proper header to have *struct perf_cpumap*
-declaration and functions:
-
-.. code-block:: c
-
- 1 #include <perf/cpumap.h>
-
-
-The *struct perf_cpumap* object is created by *perf_cpu_map__new* call.
-The *NULL* argument asks it to populate the object with the current online CPUs list:
-
-.. code-block:: c
-
- 8 cpus = perf_cpu_map__new(NULL);
-
-This is paired with a *perf_cpu_map__put*, that drops its reference at the end, possibly deleting it.
-
-.. code-block:: c
-
- 15 perf_cpu_map__put(cpus);
-
-The iteration through the *struct perf_cpumap* CPUs is done using the *perf_cpu_map__for_each_cpu*
-macro which requires 3 arguments:
-
-- cpu - the cpu numer
-- tmp - iteration helper variable
-- cpus - the *struct perf_cpumap* object
-
-.. code-block:: c
-
- 10 perf_cpu_map__for_each_cpu(cpu, tmp, cpus)
- 11 fprintf(stdout, "%d ", cpu);
-
-ex-2-evsel-stat example
-=======================
-
-TBD
-
-ex-3-evlist-stat example
-========================
-
-TBD
diff --git a/tools/perf/lib/Makefile b/tools/perf/lib/Makefile
deleted file mode 100644
index 0f233638ef1f..000000000000
--- a/tools/perf/lib/Makefile
+++ /dev/null
@@ -1,188 +0,0 @@
-# SPDX-License-Identifier: (LGPL-2.1 OR BSD-2-Clause)
-# Most of this file is copied from tools/lib/bpf/Makefile
-
-LIBPERF_VERSION = 0
-LIBPERF_PATCHLEVEL = 0
-LIBPERF_EXTRAVERSION = 1
-
-MAKEFLAGS += --no-print-directory
-
-ifeq ($(srctree),)
-srctree := $(patsubst %/,%,$(dir $(CURDIR)))
-srctree := $(patsubst %/,%,$(dir $(srctree)))
-srctree := $(patsubst %/,%,$(dir $(srctree)))
-#$(info Determined 'srctree' to be $(srctree))
-endif
-
-INSTALL = install
-
-# Use DESTDIR for installing into a different root directory.
-# This is useful for building a package. The program will be
-# installed in this directory as if it was the root directory.
-# Then the build tool can move it later.
-DESTDIR ?=
-DESTDIR_SQ = '$(subst ','\'',$(DESTDIR))'
-
-include $(srctree)/tools/scripts/Makefile.include
-include $(srctree)/tools/scripts/Makefile.arch
-
-ifeq ($(LP64), 1)
- libdir_relative = lib64
-else
- libdir_relative = lib
-endif
-
-prefix ?=
-libdir = $(prefix)/$(libdir_relative)
-
-# Shell quotes
-libdir_SQ = $(subst ','\'',$(libdir))
-libdir_relative_SQ = $(subst ','\'',$(libdir_relative))
-
-ifeq ("$(origin V)", "command line")
- VERBOSE = $(V)
-endif
-ifndef VERBOSE
- VERBOSE = 0
-endif
-
-ifeq ($(VERBOSE),1)
- Q =
-else
- Q = @
-endif
-
-# Set compile option CFLAGS
-ifdef EXTRA_CFLAGS
- CFLAGS := $(EXTRA_CFLAGS)
-else
- CFLAGS := -g -Wall
-endif
-
-INCLUDES = \
--I$(srctree)/tools/perf/lib/include \
--I$(srctree)/tools/lib/ \
--I$(srctree)/tools/include \
--I$(srctree)/tools/arch/$(SRCARCH)/include/ \
--I$(srctree)/tools/arch/$(SRCARCH)/include/uapi \
--I$(srctree)/tools/include/uapi
-
-# Append required CFLAGS
-override CFLAGS += $(EXTRA_WARNINGS)
-override CFLAGS += -Werror -Wall
-override CFLAGS += -fPIC
-override CFLAGS += $(INCLUDES)
-override CFLAGS += -fvisibility=hidden
-
-all:
-
-export srctree OUTPUT CC LD CFLAGS V
-export DESTDIR DESTDIR_SQ
-
-include $(srctree)/tools/build/Makefile.include
-
-VERSION_SCRIPT := libperf.map
-
-PATCHLEVEL = $(LIBPERF_PATCHLEVEL)
-EXTRAVERSION = $(LIBPERF_EXTRAVERSION)
-VERSION = $(LIBPERF_VERSION).$(LIBPERF_PATCHLEVEL).$(LIBPERF_EXTRAVERSION)
-
-LIBPERF_SO := $(OUTPUT)libperf.so.$(VERSION)
-LIBPERF_A := $(OUTPUT)libperf.a
-LIBPERF_IN := $(OUTPUT)libperf-in.o
-LIBPERF_PC := $(OUTPUT)libperf.pc
-
-LIBPERF_ALL := $(LIBPERF_A) $(OUTPUT)libperf.so*
-
-LIB_DIR := $(srctree)/tools/lib/api/
-
-ifneq ($(OUTPUT),)
-ifneq ($(subdir),)
- API_PATH=$(OUTPUT)/../lib/api/
-else
- API_PATH=$(OUTPUT)
-endif
-else
- API_PATH=$(LIB_DIR)
-endif
-
-LIBAPI = $(API_PATH)libapi.a
-export LIBAPI
-
-$(LIBAPI): FORCE
- $(Q)$(MAKE) -C $(LIB_DIR) O=$(OUTPUT) $(OUTPUT)libapi.a
-
-$(LIBAPI)-clean:
- $(call QUIET_CLEAN, libapi)
- $(Q)$(MAKE) -C $(LIB_DIR) O=$(OUTPUT) clean >/dev/null
-
-$(LIBPERF_IN): FORCE
- $(Q)$(MAKE) $(build)=libperf
-
-$(LIBPERF_A): $(LIBPERF_IN)
- $(QUIET_AR)$(RM) $@ && $(AR) rcs $@ $(LIBPERF_IN)
-
-$(LIBPERF_SO): $(LIBPERF_IN) $(LIBAPI)
- $(QUIET_LINK)$(CC) --shared -Wl,-soname,libperf.so \
- -Wl,--version-script=$(VERSION_SCRIPT) $^ -o $@
- @ln -sf $(@F) $(OUTPUT)libperf.so
- @ln -sf $(@F) $(OUTPUT)libperf.so.$(LIBPERF_VERSION)
-
-
-libs: $(LIBPERF_A) $(LIBPERF_SO) $(LIBPERF_PC)
-
-all: fixdep
- $(Q)$(MAKE) libs
-
-clean: $(LIBAPI)-clean
- $(call QUIET_CLEAN, libperf) $(RM) $(LIBPERF_A) \
- *.o *~ *.a *.so *.so.$(VERSION) *.so.$(LIBPERF_VERSION) .*.d .*.cmd LIBPERF-CFLAGS $(LIBPERF_PC)
- $(Q)$(MAKE) -C tests clean
-
-tests: libs
- $(Q)$(MAKE) -C tests
- $(Q)$(MAKE) -C tests run
-
-$(LIBPERF_PC):
- $(QUIET_GEN)sed -e "s|@PREFIX@|$(prefix)|" \
- -e "s|@LIBDIR@|$(libdir_SQ)|" \
- -e "s|@VERSION@|$(VERSION)|" \
- < libperf.pc.template > $@
-
-define do_install_mkdir
- if [ ! -d '$(DESTDIR_SQ)$1' ]; then \
- $(INSTALL) -d -m 755 '$(DESTDIR_SQ)$1'; \
- fi
-endef
-
-define do_install
- if [ ! -d '$(DESTDIR_SQ)$2' ]; then \
- $(INSTALL) -d -m 755 '$(DESTDIR_SQ)$2'; \
- fi; \
- $(INSTALL) $1 $(if $3,-m $3,) '$(DESTDIR_SQ)$2'
-endef
-
-install_lib: libs
- $(call QUIET_INSTALL, $(LIBPERF_ALL)) \
- $(call do_install_mkdir,$(libdir_SQ)); \
- cp -fpR $(LIBPERF_ALL) $(DESTDIR)$(libdir_SQ)
-
-install_headers:
- $(call QUIET_INSTALL, headers) \
- $(call do_install,include/perf/core.h,$(prefix)/include/perf,644); \
- $(call do_install,include/perf/cpumap.h,$(prefix)/include/perf,644); \
- $(call do_install,include/perf/threadmap.h,$(prefix)/include/perf,644); \
- $(call do_install,include/perf/evlist.h,$(prefix)/include/perf,644); \
- $(call do_install,include/perf/evsel.h,$(prefix)/include/perf,644); \
- $(call do_install,include/perf/event.h,$(prefix)/include/perf,644); \
- $(call do_install,include/perf/mmap.h,$(prefix)/include/perf,644);
-
-install_pkgconfig: $(LIBPERF_PC)
- $(call QUIET_INSTALL, $(LIBPERF_PC)) \
- $(call do_install,$(LIBPERF_PC),$(libdir_SQ)/pkgconfig,644)
-
-install: install_lib install_headers install_pkgconfig
-
-FORCE:
-
-.PHONY: all install clean tests FORCE
diff --git a/tools/perf/lib/core.c b/tools/perf/lib/core.c
deleted file mode 100644
index 58fc894b76c5..000000000000
--- a/tools/perf/lib/core.c
+++ /dev/null
@@ -1,38 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0-only
-
-#define __printf(a, b) __attribute__((format(printf, a, b)))
-
-#include <stdio.h>
-#include <stdarg.h>
-#include <unistd.h>
-#include <linux/compiler.h>
-#include <perf/core.h>
-#include <internal/lib.h>
-#include "internal.h"
-
-static int __base_pr(enum libperf_print_level level __maybe_unused, const char *format,
- va_list args)
-{
- return vfprintf(stderr, format, args);
-}
-
-static libperf_print_fn_t __libperf_pr = __base_pr;
-
-__printf(2, 3)
-void libperf_print(enum libperf_print_level level, const char *format, ...)
-{
- va_list args;
-
- if (!__libperf_pr)
- return;
-
- va_start(args, format);
- __libperf_pr(level, format, args);
- va_end(args);
-}
-
-void libperf_init(libperf_print_fn_t fn)
-{
- page_size = sysconf(_SC_PAGE_SIZE);
- __libperf_pr = fn;
-}
diff --git a/tools/perf/lib/cpumap.c b/tools/perf/lib/cpumap.c
deleted file mode 100644
index f93f4e703e4c..000000000000
--- a/tools/perf/lib/cpumap.c
+++ /dev/null
@@ -1,345 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0-only
-#include <perf/cpumap.h>
-#include <stdlib.h>
-#include <linux/refcount.h>
-#include <internal/cpumap.h>
-#include <asm/bug.h>
-#include <stdio.h>
-#include <string.h>
-#include <unistd.h>
-#include <ctype.h>
-#include <limits.h>
-
-struct perf_cpu_map *perf_cpu_map__dummy_new(void)
-{
- struct perf_cpu_map *cpus = malloc(sizeof(*cpus) + sizeof(int));
-
- if (cpus != NULL) {
- cpus->nr = 1;
- cpus->map[0] = -1;
- refcount_set(&cpus->refcnt, 1);
- }
-
- return cpus;
-}
-
-static void cpu_map__delete(struct perf_cpu_map *map)
-{
- if (map) {
- WARN_ONCE(refcount_read(&map->refcnt) != 0,
- "cpu_map refcnt unbalanced\n");
- free(map);
- }
-}
-
-struct perf_cpu_map *perf_cpu_map__get(struct perf_cpu_map *map)
-{
- if (map)
- refcount_inc(&map->refcnt);
- return map;
-}
-
-void perf_cpu_map__put(struct perf_cpu_map *map)
-{
- if (map && refcount_dec_and_test(&map->refcnt))
- cpu_map__delete(map);
-}
-
-static struct perf_cpu_map *cpu_map__default_new(void)
-{
- struct perf_cpu_map *cpus;
- int nr_cpus;
-
- nr_cpus = sysconf(_SC_NPROCESSORS_ONLN);
- if (nr_cpus < 0)
- return NULL;
-
- cpus = malloc(sizeof(*cpus) + nr_cpus * sizeof(int));
- if (cpus != NULL) {
- int i;
-
- for (i = 0; i < nr_cpus; ++i)
- cpus->map[i] = i;
-
- cpus->nr = nr_cpus;
- refcount_set(&cpus->refcnt, 1);
- }
-
- return cpus;
-}
-
-static int cmp_int(const void *a, const void *b)
-{
- return *(const int *)a - *(const int*)b;
-}
-
-static struct perf_cpu_map *cpu_map__trim_new(int nr_cpus, int *tmp_cpus)
-{
- size_t payload_size = nr_cpus * sizeof(int);
- struct perf_cpu_map *cpus = malloc(sizeof(*cpus) + payload_size);
- int i, j;
-
- if (cpus != NULL) {
- memcpy(cpus->map, tmp_cpus, payload_size);
- qsort(cpus->map, nr_cpus, sizeof(int), cmp_int);
- /* Remove dups */
- j = 0;
- for (i = 0; i < nr_cpus; i++) {
- if (i == 0 || cpus->map[i] != cpus->map[i - 1])
- cpus->map[j++] = cpus->map[i];
- }
- cpus->nr = j;
- assert(j <= nr_cpus);
- refcount_set(&cpus->refcnt, 1);
- }
-
- return cpus;
-}
-
-struct perf_cpu_map *perf_cpu_map__read(FILE *file)
-{
- struct perf_cpu_map *cpus = NULL;
- int nr_cpus = 0;
- int *tmp_cpus = NULL, *tmp;
- int max_entries = 0;
- int n, cpu, prev;
- char sep;
-
- sep = 0;
- prev = -1;
- for (;;) {
- n = fscanf(file, "%u%c", &cpu, &sep);
- if (n <= 0)
- break;
- if (prev >= 0) {
- int new_max = nr_cpus + cpu - prev - 1;
-
- WARN_ONCE(new_max >= MAX_NR_CPUS, "Perf can support %d CPUs. "
- "Consider raising MAX_NR_CPUS\n", MAX_NR_CPUS);
-
- if (new_max >= max_entries) {
- max_entries = new_max + MAX_NR_CPUS / 2;
- tmp = realloc(tmp_cpus, max_entries * sizeof(int));
- if (tmp == NULL)
- goto out_free_tmp;
- tmp_cpus = tmp;
- }
-
- while (++prev < cpu)
- tmp_cpus[nr_cpus++] = prev;
- }
- if (nr_cpus == max_entries) {
- max_entries += MAX_NR_CPUS;
- tmp = realloc(tmp_cpus, max_entries * sizeof(int));
- if (tmp == NULL)
- goto out_free_tmp;
- tmp_cpus = tmp;
- }
-
- tmp_cpus[nr_cpus++] = cpu;
- if (n == 2 && sep == '-')
- prev = cpu;
- else
- prev = -1;
- if (n == 1 || sep == '\n')
- break;
- }
-
- if (nr_cpus > 0)
- cpus = cpu_map__trim_new(nr_cpus, tmp_cpus);
- else
- cpus = cpu_map__default_new();
-out_free_tmp:
- free(tmp_cpus);
- return cpus;
-}
-
-static struct perf_cpu_map *cpu_map__read_all_cpu_map(void)
-{
- struct perf_cpu_map *cpus = NULL;
- FILE *onlnf;
-
- onlnf = fopen("/sys/devices/system/cpu/online", "r");
- if (!onlnf)
- return cpu_map__default_new();
-
- cpus = perf_cpu_map__read(onlnf);
- fclose(onlnf);
- return cpus;
-}
-
-struct perf_cpu_map *perf_cpu_map__new(const char *cpu_list)
-{
- struct perf_cpu_map *cpus = NULL;
- unsigned long start_cpu, end_cpu = 0;
- char *p = NULL;
- int i, nr_cpus = 0;
- int *tmp_cpus = NULL, *tmp;
- int max_entries = 0;
-
- if (!cpu_list)
- return cpu_map__read_all_cpu_map();
-
- /*
- * must handle the case of empty cpumap to cover
- * TOPOLOGY header for NUMA nodes with no CPU
- * ( e.g., because of CPU hotplug)
- */
- if (!isdigit(*cpu_list) && *cpu_list != '\0')
- goto out;
-
- while (isdigit(*cpu_list)) {
- p = NULL;
- start_cpu = strtoul(cpu_list, &p, 0);
- if (start_cpu >= INT_MAX
- || (*p != '\0' && *p != ',' && *p != '-'))
- goto invalid;
-
- if (*p == '-') {
- cpu_list = ++p;
- p = NULL;
- end_cpu = strtoul(cpu_list, &p, 0);
-
- if (end_cpu >= INT_MAX || (*p != '\0' && *p != ','))
- goto invalid;
-
- if (end_cpu < start_cpu)
- goto invalid;
- } else {
- end_cpu = start_cpu;
- }
-
- WARN_ONCE(end_cpu >= MAX_NR_CPUS, "Perf can support %d CPUs. "
- "Consider raising MAX_NR_CPUS\n", MAX_NR_CPUS);
-
- for (; start_cpu <= end_cpu; start_cpu++) {
- /* check for duplicates */
- for (i = 0; i < nr_cpus; i++)
- if (tmp_cpus[i] == (int)start_cpu)
- goto invalid;
-
- if (nr_cpus == max_entries) {
- max_entries += MAX_NR_CPUS;
- tmp = realloc(tmp_cpus, max_entries * sizeof(int));
- if (tmp == NULL)
- goto invalid;
- tmp_cpus = tmp;
- }
- tmp_cpus[nr_cpus++] = (int)start_cpu;
- }
- if (*p)
- ++p;
-
- cpu_list = p;
- }
-
- if (nr_cpus > 0)
- cpus = cpu_map__trim_new(nr_cpus, tmp_cpus);
- else if (*cpu_list != '\0')
- cpus = cpu_map__default_new();
- else
- cpus = perf_cpu_map__dummy_new();
-invalid:
- free(tmp_cpus);
-out:
- return cpus;
-}
-
-int perf_cpu_map__cpu(const struct perf_cpu_map *cpus, int idx)
-{
- if (idx < cpus->nr)
- return cpus->map[idx];
-
- return -1;
-}
-
-int perf_cpu_map__nr(const struct perf_cpu_map *cpus)
-{
- return cpus ? cpus->nr : 1;
-}
-
-bool perf_cpu_map__empty(const struct perf_cpu_map *map)
-{
- return map ? map->map[0] == -1 : true;
-}
-
-int perf_cpu_map__idx(struct perf_cpu_map *cpus, int cpu)
-{
- int i;
-
- for (i = 0; i < cpus->nr; ++i) {
- if (cpus->map[i] == cpu)
- return i;
- }
-
- return -1;
-}
-
-int perf_cpu_map__max(struct perf_cpu_map *map)
-{
- int i, max = -1;
-
- for (i = 0; i < map->nr; i++) {
- if (map->map[i] > max)
- max = map->map[i];
- }
-
- return max;
-}
-
-/*
- * Merge two cpumaps
- *
- * orig either gets freed and replaced with a new map, or reused
- * with no reference count change (similar to "realloc")
- * other has its reference count increased.
- */
-
-struct perf_cpu_map *perf_cpu_map__merge(struct perf_cpu_map *orig,
- struct perf_cpu_map *other)
-{
- int *tmp_cpus;
- int tmp_len;
- int i, j, k;
- struct perf_cpu_map *merged;
-
- if (!orig && !other)
- return NULL;
- if (!orig) {
- perf_cpu_map__get(other);
- return other;
- }
- if (!other)
- return orig;
- if (orig->nr == other->nr &&
- !memcmp(orig->map, other->map, orig->nr * sizeof(int)))
- return orig;
-
- tmp_len = orig->nr + other->nr;
- tmp_cpus = malloc(tmp_len * sizeof(int));
- if (!tmp_cpus)
- return NULL;
-
- /* Standard merge algorithm from wikipedia */
- i = j = k = 0;
- while (i < orig->nr && j < other->nr) {
- if (orig->map[i] <= other->map[j]) {
- if (orig->map[i] == other->map[j])
- j++;
- tmp_cpus[k++] = orig->map[i++];
- } else
- tmp_cpus[k++] = other->map[j++];
- }
-
- while (i < orig->nr)
- tmp_cpus[k++] = orig->map[i++];
-
- while (j < other->nr)
- tmp_cpus[k++] = other->map[j++];
- assert(k <= tmp_len);
-
- merged = cpu_map__trim_new(k, tmp_cpus);
- free(tmp_cpus);
- perf_cpu_map__put(orig);
- return merged;
-}
diff --git a/tools/perf/lib/evlist.c b/tools/perf/lib/evlist.c
deleted file mode 100644
index ae9e65aa2491..000000000000
--- a/tools/perf/lib/evlist.c
+++ /dev/null
@@ -1,641 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-#include <perf/evlist.h>
-#include <perf/evsel.h>
-#include <linux/bitops.h>
-#include <linux/list.h>
-#include <linux/hash.h>
-#include <sys/ioctl.h>
-#include <internal/evlist.h>
-#include <internal/evsel.h>
-#include <internal/xyarray.h>
-#include <internal/mmap.h>
-#include <internal/cpumap.h>
-#include <internal/threadmap.h>
-#include <internal/xyarray.h>
-#include <internal/lib.h>
-#include <linux/zalloc.h>
-#include <sys/ioctl.h>
-#include <stdlib.h>
-#include <errno.h>
-#include <unistd.h>
-#include <fcntl.h>
-#include <signal.h>
-#include <poll.h>
-#include <sys/mman.h>
-#include <perf/cpumap.h>
-#include <perf/threadmap.h>
-#include <api/fd/array.h>
-
-void perf_evlist__init(struct perf_evlist *evlist)
-{
- int i;
-
- for (i = 0; i < PERF_EVLIST__HLIST_SIZE; ++i)
- INIT_HLIST_HEAD(&evlist->heads[i]);
- INIT_LIST_HEAD(&evlist->entries);
- evlist->nr_entries = 0;
- fdarray__init(&evlist->pollfd, 64);
-}
-
-static void __perf_evlist__propagate_maps(struct perf_evlist *evlist,
- struct perf_evsel *evsel)
-{
- /*
- * We already have cpus for evsel (via PMU sysfs) so
- * keep it, if there's no target cpu list defined.
- */
- if (!evsel->own_cpus || evlist->has_user_cpus) {
- perf_cpu_map__put(evsel->cpus);
- evsel->cpus = perf_cpu_map__get(evlist->cpus);
- } else if (evsel->cpus != evsel->own_cpus) {
- perf_cpu_map__put(evsel->cpus);
- evsel->cpus = perf_cpu_map__get(evsel->own_cpus);
- }
-
- perf_thread_map__put(evsel->threads);
- evsel->threads = perf_thread_map__get(evlist->threads);
- evlist->all_cpus = perf_cpu_map__merge(evlist->all_cpus, evsel->cpus);
-}
-
-static void perf_evlist__propagate_maps(struct perf_evlist *evlist)
-{
- struct perf_evsel *evsel;
-
- perf_evlist__for_each_evsel(evlist, evsel)
- __perf_evlist__propagate_maps(evlist, evsel);
-}
-
-void perf_evlist__add(struct perf_evlist *evlist,
- struct perf_evsel *evsel)
-{
- list_add_tail(&evsel->node, &evlist->entries);
- evlist->nr_entries += 1;
- __perf_evlist__propagate_maps(evlist, evsel);
-}
-
-void perf_evlist__remove(struct perf_evlist *evlist,
- struct perf_evsel *evsel)
-{
- list_del_init(&evsel->node);
- evlist->nr_entries -= 1;
-}
-
-struct perf_evlist *perf_evlist__new(void)
-{
- struct perf_evlist *evlist = zalloc(sizeof(*evlist));
-
- if (evlist != NULL)
- perf_evlist__init(evlist);
-
- return evlist;
-}
-
-struct perf_evsel *
-perf_evlist__next(struct perf_evlist *evlist, struct perf_evsel *prev)
-{
- struct perf_evsel *next;
-
- if (!prev) {
- next = list_first_entry(&evlist->entries,
- struct perf_evsel,
- node);
- } else {
- next = list_next_entry(prev, node);
- }
-
- /* Empty list is noticed here so don't need checking on entry. */
- if (&next->node == &evlist->entries)
- return NULL;
-
- return next;
-}
-
-static void perf_evlist__purge(struct perf_evlist *evlist)
-{
- struct perf_evsel *pos, *n;
-
- perf_evlist__for_each_entry_safe(evlist, n, pos) {
- list_del_init(&pos->node);
- perf_evsel__delete(pos);
- }
-
- evlist->nr_entries = 0;
-}
-
-void perf_evlist__exit(struct perf_evlist *evlist)
-{
- perf_cpu_map__put(evlist->cpus);
- perf_thread_map__put(evlist->threads);
- evlist->cpus = NULL;
- evlist->threads = NULL;
- fdarray__exit(&evlist->pollfd);
-}
-
-void perf_evlist__delete(struct perf_evlist *evlist)
-{
- if (evlist == NULL)
- return;
-
- perf_evlist__munmap(evlist);
- perf_evlist__close(evlist);
- perf_evlist__purge(evlist);
- perf_evlist__exit(evlist);
- free(evlist);
-}
-
-void perf_evlist__set_maps(struct perf_evlist *evlist,
- struct perf_cpu_map *cpus,
- struct perf_thread_map *threads)
-{
- /*
- * Allow for the possibility that one or another of the maps isn't being
- * changed i.e. don't put it. Note we are assuming the maps that are
- * being applied are brand new and evlist is taking ownership of the
- * original reference count of 1. If that is not the case it is up to
- * the caller to increase the reference count.
- */
- if (cpus != evlist->cpus) {
- perf_cpu_map__put(evlist->cpus);
- evlist->cpus = perf_cpu_map__get(cpus);
- }
-
- if (threads != evlist->threads) {
- perf_thread_map__put(evlist->threads);
- evlist->threads = perf_thread_map__get(threads);
- }
-
- perf_evlist__propagate_maps(evlist);
-}
-
-int perf_evlist__open(struct perf_evlist *evlist)
-{
- struct perf_evsel *evsel;
- int err;
-
- perf_evlist__for_each_entry(evlist, evsel) {
- err = perf_evsel__open(evsel, evsel->cpus, evsel->threads);
- if (err < 0)
- goto out_err;
- }
-
- return 0;
-
-out_err:
- perf_evlist__close(evlist);
- return err;
-}
-
-void perf_evlist__close(struct perf_evlist *evlist)
-{
- struct perf_evsel *evsel;
-
- perf_evlist__for_each_entry_reverse(evlist, evsel)
- perf_evsel__close(evsel);
-}
-
-void perf_evlist__enable(struct perf_evlist *evlist)
-{
- struct perf_evsel *evsel;
-
- perf_evlist__for_each_entry(evlist, evsel)
- perf_evsel__enable(evsel);
-}
-
-void perf_evlist__disable(struct perf_evlist *evlist)
-{
- struct perf_evsel *evsel;
-
- perf_evlist__for_each_entry(evlist, evsel)
- perf_evsel__disable(evsel);
-}
-
-u64 perf_evlist__read_format(struct perf_evlist *evlist)
-{
- struct perf_evsel *first = perf_evlist__first(evlist);
-
- return first->attr.read_format;
-}
-
-#define SID(e, x, y) xyarray__entry(e->sample_id, x, y)
-
-static void perf_evlist__id_hash(struct perf_evlist *evlist,
- struct perf_evsel *evsel,
- int cpu, int thread, u64 id)
-{
- int hash;
- struct perf_sample_id *sid = SID(evsel, cpu, thread);
-
- sid->id = id;
- sid->evsel = evsel;
- hash = hash_64(sid->id, PERF_EVLIST__HLIST_BITS);
- hlist_add_head(&sid->node, &evlist->heads[hash]);
-}
-
-void perf_evlist__id_add(struct perf_evlist *evlist,
- struct perf_evsel *evsel,
- int cpu, int thread, u64 id)
-{
- perf_evlist__id_hash(evlist, evsel, cpu, thread, id);
- evsel->id[evsel->ids++] = id;
-}
-
-int perf_evlist__id_add_fd(struct perf_evlist *evlist,
- struct perf_evsel *evsel,
- int cpu, int thread, int fd)
-{
- u64 read_data[4] = { 0, };
- int id_idx = 1; /* The first entry is the counter value */
- u64 id;
- int ret;
-
- ret = ioctl(fd, PERF_EVENT_IOC_ID, &id);
- if (!ret)
- goto add;
-
- if (errno != ENOTTY)
- return -1;
-
- /* Legacy way to get event id.. All hail to old kernels! */
-
- /*
- * This way does not work with group format read, so bail
- * out in that case.
- */
- if (perf_evlist__read_format(evlist) & PERF_FORMAT_GROUP)
- return -1;
-
- if (!(evsel->attr.read_format & PERF_FORMAT_ID) ||
- read(fd, &read_data, sizeof(read_data)) == -1)
- return -1;
-
- if (evsel->attr.read_format & PERF_FORMAT_TOTAL_TIME_ENABLED)
- ++id_idx;
- if (evsel->attr.read_format & PERF_FORMAT_TOTAL_TIME_RUNNING)
- ++id_idx;
-
- id = read_data[id_idx];
-
-add:
- perf_evlist__id_add(evlist, evsel, cpu, thread, id);
- return 0;
-}
-
-int perf_evlist__alloc_pollfd(struct perf_evlist *evlist)
-{
- int nr_cpus = perf_cpu_map__nr(evlist->cpus);
- int nr_threads = perf_thread_map__nr(evlist->threads);
- int nfds = 0;
- struct perf_evsel *evsel;
-
- perf_evlist__for_each_entry(evlist, evsel) {
- if (evsel->system_wide)
- nfds += nr_cpus;
- else
- nfds += nr_cpus * nr_threads;
- }
-
- if (fdarray__available_entries(&evlist->pollfd) < nfds &&
- fdarray__grow(&evlist->pollfd, nfds) < 0)
- return -ENOMEM;
-
- return 0;
-}
-
-int perf_evlist__add_pollfd(struct perf_evlist *evlist, int fd,
- void *ptr, short revent)
-{
- int pos = fdarray__add(&evlist->pollfd, fd, revent | POLLERR | POLLHUP);
-
- if (pos >= 0) {
- evlist->pollfd.priv[pos].ptr = ptr;
- fcntl(fd, F_SETFL, O_NONBLOCK);
- }
-
- return pos;
-}
-
-static void perf_evlist__munmap_filtered(struct fdarray *fda, int fd,
- void *arg __maybe_unused)
-{
- struct perf_mmap *map = fda->priv[fd].ptr;
-
- if (map)
- perf_mmap__put(map);
-}
-
-int perf_evlist__filter_pollfd(struct perf_evlist *evlist, short revents_and_mask)
-{
- return fdarray__filter(&evlist->pollfd, revents_and_mask,
- perf_evlist__munmap_filtered, NULL);
-}
-
-int perf_evlist__poll(struct perf_evlist *evlist, int timeout)
-{
- return fdarray__poll(&evlist->pollfd, timeout);
-}
-
-static struct perf_mmap* perf_evlist__alloc_mmap(struct perf_evlist *evlist, bool overwrite)
-{
- int i;
- struct perf_mmap *map;
-
- map = zalloc(evlist->nr_mmaps * sizeof(struct perf_mmap));
- if (!map)
- return NULL;
-
- for (i = 0; i < evlist->nr_mmaps; i++) {
- struct perf_mmap *prev = i ? &map[i - 1] : NULL;
-
- /*
- * When the perf_mmap() call is made we grab one refcount, plus
- * one extra to let perf_mmap__consume() get the last
- * events after all real references (perf_mmap__get()) are
- * dropped.
- *
- * Each PERF_EVENT_IOC_SET_OUTPUT points to this mmap and
- * thus does perf_mmap__get() on it.
- */
- perf_mmap__init(&map[i], prev, overwrite, NULL);
- }
-
- return map;
-}
-
-static void perf_evlist__set_sid_idx(struct perf_evlist *evlist,
- struct perf_evsel *evsel, int idx, int cpu,
- int thread)
-{
- struct perf_sample_id *sid = SID(evsel, cpu, thread);
-
- sid->idx = idx;
- if (evlist->cpus && cpu >= 0)
- sid->cpu = evlist->cpus->map[cpu];
- else
- sid->cpu = -1;
- if (!evsel->system_wide && evlist->threads && thread >= 0)
- sid->tid = perf_thread_map__pid(evlist->threads, thread);
- else
- sid->tid = -1;
-}
-
-static struct perf_mmap*
-perf_evlist__mmap_cb_get(struct perf_evlist *evlist, bool overwrite, int idx)
-{
- struct perf_mmap *maps;
-
- maps = overwrite ? evlist->mmap_ovw : evlist->mmap;
-
- if (!maps) {
- maps = perf_evlist__alloc_mmap(evlist, overwrite);
- if (!maps)
- return NULL;
-
- if (overwrite)
- evlist->mmap_ovw = maps;
- else
- evlist->mmap = maps;
- }
-
- return &maps[idx];
-}
-
-#define FD(e, x, y) (*(int *) xyarray__entry(e->fd, x, y))
-
-static int
-perf_evlist__mmap_cb_mmap(struct perf_mmap *map, struct perf_mmap_param *mp,
- int output, int cpu)
-{
- return perf_mmap__mmap(map, mp, output, cpu);
-}
-
-static void perf_evlist__set_mmap_first(struct perf_evlist *evlist, struct perf_mmap *map,
- bool overwrite)
-{
- if (overwrite)
- evlist->mmap_ovw_first = map;
- else
- evlist->mmap_first = map;
-}
-
-static int
-mmap_per_evsel(struct perf_evlist *evlist, struct perf_evlist_mmap_ops *ops,
- int idx, struct perf_mmap_param *mp, int cpu_idx,
- int thread, int *_output, int *_output_overwrite)
-{
- int evlist_cpu = perf_cpu_map__cpu(evlist->cpus, cpu_idx);
- struct perf_evsel *evsel;
- int revent;
-
- perf_evlist__for_each_entry(evlist, evsel) {
- bool overwrite = evsel->attr.write_backward;
- struct perf_mmap *map;
- int *output, fd, cpu;
-
- if (evsel->system_wide && thread)
- continue;
-
- cpu = perf_cpu_map__idx(evsel->cpus, evlist_cpu);
- if (cpu == -1)
- continue;
-
- map = ops->get(evlist, overwrite, idx);
- if (map == NULL)
- return -ENOMEM;
-
- if (overwrite) {
- mp->prot = PROT_READ;
- output = _output_overwrite;
- } else {
- mp->prot = PROT_READ | PROT_WRITE;
- output = _output;
- }
-
- fd = FD(evsel, cpu, thread);
-
- if (*output == -1) {
- *output = fd;
-
- /*
- * The last one will be done at perf_mmap__consume(), so that we
- * make sure we don't prevent tools from consuming every last event in
- * the ring buffer.
- *
- * I.e. we can get the POLLHUP meaning that the fd doesn't exist
- * anymore, but the last events for it are still in the ring buffer,
- * waiting to be consumed.
- *
- * Tools can chose to ignore this at their own discretion, but the
- * evlist layer can't just drop it when filtering events in
- * perf_evlist__filter_pollfd().
- */
- refcount_set(&map->refcnt, 2);
-
- if (ops->mmap(map, mp, *output, evlist_cpu) < 0)
- return -1;
-
- if (!idx)
- perf_evlist__set_mmap_first(evlist, map, overwrite);
- } else {
- if (ioctl(fd, PERF_EVENT_IOC_SET_OUTPUT, *output) != 0)
- return -1;
-
- perf_mmap__get(map);
- }
-
- revent = !overwrite ? POLLIN : 0;
-
- if (!evsel->system_wide &&
- perf_evlist__add_pollfd(evlist, fd, map, revent) < 0) {
- perf_mmap__put(map);
- return -1;
- }
-
- if (evsel->attr.read_format & PERF_FORMAT_ID) {
- if (perf_evlist__id_add_fd(evlist, evsel, cpu, thread,
- fd) < 0)
- return -1;
- perf_evlist__set_sid_idx(evlist, evsel, idx, cpu,
- thread);
- }
- }
-
- return 0;
-}
-
-static int
-mmap_per_thread(struct perf_evlist *evlist, struct perf_evlist_mmap_ops *ops,
- struct perf_mmap_param *mp)
-{
- int thread;
- int nr_threads = perf_thread_map__nr(evlist->threads);
-
- for (thread = 0; thread < nr_threads; thread++) {
- int output = -1;
- int output_overwrite = -1;
-
- if (ops->idx)
- ops->idx(evlist, mp, thread, false);
-
- if (mmap_per_evsel(evlist, ops, thread, mp, 0, thread,
- &output, &output_overwrite))
- goto out_unmap;
- }
-
- return 0;
-
-out_unmap:
- perf_evlist__munmap(evlist);
- return -1;
-}
-
-static int
-mmap_per_cpu(struct perf_evlist *evlist, struct perf_evlist_mmap_ops *ops,
- struct perf_mmap_param *mp)
-{
- int nr_threads = perf_thread_map__nr(evlist->threads);
- int nr_cpus = perf_cpu_map__nr(evlist->cpus);
- int cpu, thread;
-
- for (cpu = 0; cpu < nr_cpus; cpu++) {
- int output = -1;
- int output_overwrite = -1;
-
- if (ops->idx)
- ops->idx(evlist, mp, cpu, true);
-
- for (thread = 0; thread < nr_threads; thread++) {
- if (mmap_per_evsel(evlist, ops, cpu, mp, cpu,
- thread, &output, &output_overwrite))
- goto out_unmap;
- }
- }
-
- return 0;
-
-out_unmap:
- perf_evlist__munmap(evlist);
- return -1;
-}
-
-static int perf_evlist__nr_mmaps(struct perf_evlist *evlist)
-{
- int nr_mmaps;
-
- nr_mmaps = perf_cpu_map__nr(evlist->cpus);
- if (perf_cpu_map__empty(evlist->cpus))
- nr_mmaps = perf_thread_map__nr(evlist->threads);
-
- return nr_mmaps;
-}
-
-int perf_evlist__mmap_ops(struct perf_evlist *evlist,
- struct perf_evlist_mmap_ops *ops,
- struct perf_mmap_param *mp)
-{
- struct perf_evsel *evsel;
- const struct perf_cpu_map *cpus = evlist->cpus;
- const struct perf_thread_map *threads = evlist->threads;
-
- if (!ops || !ops->get || !ops->mmap)
- return -EINVAL;
-
- mp->mask = evlist->mmap_len - page_size - 1;
-
- evlist->nr_mmaps = perf_evlist__nr_mmaps(evlist);
-
- perf_evlist__for_each_entry(evlist, evsel) {
- if ((evsel->attr.read_format & PERF_FORMAT_ID) &&
- evsel->sample_id == NULL &&
- perf_evsel__alloc_id(evsel, perf_cpu_map__nr(cpus), threads->nr) < 0)
- return -ENOMEM;
- }
-
- if (evlist->pollfd.entries == NULL && perf_evlist__alloc_pollfd(evlist) < 0)
- return -ENOMEM;
-
- if (perf_cpu_map__empty(cpus))
- return mmap_per_thread(evlist, ops, mp);
-
- return mmap_per_cpu(evlist, ops, mp);
-}
-
-int perf_evlist__mmap(struct perf_evlist *evlist, int pages)
-{
- struct perf_mmap_param mp;
- struct perf_evlist_mmap_ops ops = {
- .get = perf_evlist__mmap_cb_get,
- .mmap = perf_evlist__mmap_cb_mmap,
- };
-
- evlist->mmap_len = (pages + 1) * page_size;
-
- return perf_evlist__mmap_ops(evlist, &ops, &mp);
-}
-
-void perf_evlist__munmap(struct perf_evlist *evlist)
-{
- int i;
-
- if (evlist->mmap) {
- for (i = 0; i < evlist->nr_mmaps; i++)
- perf_mmap__munmap(&evlist->mmap[i]);
- }
-
- if (evlist->mmap_ovw) {
- for (i = 0; i < evlist->nr_mmaps; i++)
- perf_mmap__munmap(&evlist->mmap_ovw[i]);
- }
-
- zfree(&evlist->mmap);
- zfree(&evlist->mmap_ovw);
-}
-
-struct perf_mmap*
-perf_evlist__next_mmap(struct perf_evlist *evlist, struct perf_mmap *map,
- bool overwrite)
-{
- if (map)
- return map->next;
-
- return overwrite ? evlist->mmap_ovw_first : evlist->mmap_first;
-}
diff --git a/tools/perf/lib/evsel.c b/tools/perf/lib/evsel.c
deleted file mode 100644
index 4dc06289f4c7..000000000000
--- a/tools/perf/lib/evsel.c
+++ /dev/null
@@ -1,301 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-#include <errno.h>
-#include <unistd.h>
-#include <sys/syscall.h>
-#include <perf/evsel.h>
-#include <perf/cpumap.h>
-#include <perf/threadmap.h>
-#include <linux/list.h>
-#include <internal/evsel.h>
-#include <linux/zalloc.h>
-#include <stdlib.h>
-#include <internal/xyarray.h>
-#include <internal/cpumap.h>
-#include <internal/threadmap.h>
-#include <internal/lib.h>
-#include <linux/string.h>
-#include <sys/ioctl.h>
-
-void perf_evsel__init(struct perf_evsel *evsel, struct perf_event_attr *attr)
-{
- INIT_LIST_HEAD(&evsel->node);
- evsel->attr = *attr;
-}
-
-struct perf_evsel *perf_evsel__new(struct perf_event_attr *attr)
-{
- struct perf_evsel *evsel = zalloc(sizeof(*evsel));
-
- if (evsel != NULL)
- perf_evsel__init(evsel, attr);
-
- return evsel;
-}
-
-void perf_evsel__delete(struct perf_evsel *evsel)
-{
- free(evsel);
-}
-
-#define FD(e, x, y) (*(int *) xyarray__entry(e->fd, x, y))
-
-int perf_evsel__alloc_fd(struct perf_evsel *evsel, int ncpus, int nthreads)
-{
- evsel->fd = xyarray__new(ncpus, nthreads, sizeof(int));
-
- if (evsel->fd) {
- int cpu, thread;
- for (cpu = 0; cpu < ncpus; cpu++) {
- for (thread = 0; thread < nthreads; thread++) {
- FD(evsel, cpu, thread) = -1;
- }
- }
- }
-
- return evsel->fd != NULL ? 0 : -ENOMEM;
-}
-
-static int
-sys_perf_event_open(struct perf_event_attr *attr,
- pid_t pid, int cpu, int group_fd,
- unsigned long flags)
-{
- return syscall(__NR_perf_event_open, attr, pid, cpu, group_fd, flags);
-}
-
-int perf_evsel__open(struct perf_evsel *evsel, struct perf_cpu_map *cpus,
- struct perf_thread_map *threads)
-{
- int cpu, thread, err = 0;
-
- if (cpus == NULL) {
- static struct perf_cpu_map *empty_cpu_map;
-
- if (empty_cpu_map == NULL) {
- empty_cpu_map = perf_cpu_map__dummy_new();
- if (empty_cpu_map == NULL)
- return -ENOMEM;
- }
-
- cpus = empty_cpu_map;
- }
-
- if (threads == NULL) {
- static struct perf_thread_map *empty_thread_map;
-
- if (empty_thread_map == NULL) {
- empty_thread_map = perf_thread_map__new_dummy();
- if (empty_thread_map == NULL)
- return -ENOMEM;
- }
-
- threads = empty_thread_map;
- }
-
- if (evsel->fd == NULL &&
- perf_evsel__alloc_fd(evsel, cpus->nr, threads->nr) < 0)
- return -ENOMEM;
-
- for (cpu = 0; cpu < cpus->nr; cpu++) {
- for (thread = 0; thread < threads->nr; thread++) {
- int fd;
-
- fd = sys_perf_event_open(&evsel->attr,
- threads->map[thread].pid,
- cpus->map[cpu], -1, 0);
-
- if (fd < 0)
- return -errno;
-
- FD(evsel, cpu, thread) = fd;
- }
- }
-
- return err;
-}
-
-static void perf_evsel__close_fd_cpu(struct perf_evsel *evsel, int cpu)
-{
- int thread;
-
- for (thread = 0; thread < xyarray__max_y(evsel->fd); ++thread) {
- if (FD(evsel, cpu, thread) >= 0)
- close(FD(evsel, cpu, thread));
- FD(evsel, cpu, thread) = -1;
- }
-}
-
-void perf_evsel__close_fd(struct perf_evsel *evsel)
-{
- int cpu;
-
- for (cpu = 0; cpu < xyarray__max_x(evsel->fd); cpu++)
- perf_evsel__close_fd_cpu(evsel, cpu);
-}
-
-void perf_evsel__free_fd(struct perf_evsel *evsel)
-{
- xyarray__delete(evsel->fd);
- evsel->fd = NULL;
-}
-
-void perf_evsel__close(struct perf_evsel *evsel)
-{
- if (evsel->fd == NULL)
- return;
-
- perf_evsel__close_fd(evsel);
- perf_evsel__free_fd(evsel);
-}
-
-void perf_evsel__close_cpu(struct perf_evsel *evsel, int cpu)
-{
- if (evsel->fd == NULL)
- return;
-
- perf_evsel__close_fd_cpu(evsel, cpu);
-}
-
-int perf_evsel__read_size(struct perf_evsel *evsel)
-{
- u64 read_format = evsel->attr.read_format;
- int entry = sizeof(u64); /* value */
- int size = 0;
- int nr = 1;
-
- if (read_format & PERF_FORMAT_TOTAL_TIME_ENABLED)
- size += sizeof(u64);
-
- if (read_format & PERF_FORMAT_TOTAL_TIME_RUNNING)
- size += sizeof(u64);
-
- if (read_format & PERF_FORMAT_ID)
- entry += sizeof(u64);
-
- if (read_format & PERF_FORMAT_GROUP) {
- nr = evsel->nr_members;
- size += sizeof(u64);
- }
-
- size += entry * nr;
- return size;
-}
-
-int perf_evsel__read(struct perf_evsel *evsel, int cpu, int thread,
- struct perf_counts_values *count)
-{
- size_t size = perf_evsel__read_size(evsel);
-
- memset(count, 0, sizeof(*count));
-
- if (FD(evsel, cpu, thread) < 0)
- return -EINVAL;
-
- if (readn(FD(evsel, cpu, thread), count->values, size) <= 0)
- return -errno;
-
- return 0;
-}
-
-static int perf_evsel__run_ioctl(struct perf_evsel *evsel,
- int ioc, void *arg,
- int cpu)
-{
- int thread;
-
- for (thread = 0; thread < xyarray__max_y(evsel->fd); thread++) {
- int fd = FD(evsel, cpu, thread),
- err = ioctl(fd, ioc, arg);
-
- if (err)
- return err;
- }
-
- return 0;
-}
-
-int perf_evsel__enable_cpu(struct perf_evsel *evsel, int cpu)
-{
- return perf_evsel__run_ioctl(evsel, PERF_EVENT_IOC_ENABLE, NULL, cpu);
-}
-
-int perf_evsel__enable(struct perf_evsel *evsel)
-{
- int i;
- int err = 0;
-
- for (i = 0; i < xyarray__max_x(evsel->fd) && !err; i++)
- err = perf_evsel__run_ioctl(evsel, PERF_EVENT_IOC_ENABLE, NULL, i);
- return err;
-}
-
-int perf_evsel__disable_cpu(struct perf_evsel *evsel, int cpu)
-{
- return perf_evsel__run_ioctl(evsel, PERF_EVENT_IOC_DISABLE, NULL, cpu);
-}
-
-int perf_evsel__disable(struct perf_evsel *evsel)
-{
- int i;
- int err = 0;
-
- for (i = 0; i < xyarray__max_x(evsel->fd) && !err; i++)
- err = perf_evsel__run_ioctl(evsel, PERF_EVENT_IOC_DISABLE, NULL, i);
- return err;
-}
-
-int perf_evsel__apply_filter(struct perf_evsel *evsel, const char *filter)
-{
- int err = 0, i;
-
- for (i = 0; i < evsel->cpus->nr && !err; i++)
- err = perf_evsel__run_ioctl(evsel,
- PERF_EVENT_IOC_SET_FILTER,
- (void *)filter, i);
- return err;
-}
-
-struct perf_cpu_map *perf_evsel__cpus(struct perf_evsel *evsel)
-{
- return evsel->cpus;
-}
-
-struct perf_thread_map *perf_evsel__threads(struct perf_evsel *evsel)
-{
- return evsel->threads;
-}
-
-struct perf_event_attr *perf_evsel__attr(struct perf_evsel *evsel)
-{
- return &evsel->attr;
-}
-
-int perf_evsel__alloc_id(struct perf_evsel *evsel, int ncpus, int nthreads)
-{
- if (ncpus == 0 || nthreads == 0)
- return 0;
-
- if (evsel->system_wide)
- nthreads = 1;
-
- evsel->sample_id = xyarray__new(ncpus, nthreads, sizeof(struct perf_sample_id));
- if (evsel->sample_id == NULL)
- return -ENOMEM;
-
- evsel->id = zalloc(ncpus * nthreads * sizeof(u64));
- if (evsel->id == NULL) {
- xyarray__delete(evsel->sample_id);
- evsel->sample_id = NULL;
- return -ENOMEM;
- }
-
- return 0;
-}
-
-void perf_evsel__free_id(struct perf_evsel *evsel)
-{
- xyarray__delete(evsel->sample_id);
- evsel->sample_id = NULL;
- zfree(&evsel->id);
- evsel->ids = 0;
-}
diff --git a/tools/perf/lib/include/internal/cpumap.h b/tools/perf/lib/include/internal/cpumap.h
deleted file mode 100644
index 840d4032587b..000000000000
--- a/tools/perf/lib/include/internal/cpumap.h
+++ /dev/null
@@ -1,19 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-#ifndef __LIBPERF_INTERNAL_CPUMAP_H
-#define __LIBPERF_INTERNAL_CPUMAP_H
-
-#include <linux/refcount.h>
-
-struct perf_cpu_map {
- refcount_t refcnt;
- int nr;
- int map[];
-};
-
-#ifndef MAX_NR_CPUS
-#define MAX_NR_CPUS 2048
-#endif
-
-int perf_cpu_map__idx(struct perf_cpu_map *cpus, int cpu);
-
-#endif /* __LIBPERF_INTERNAL_CPUMAP_H */
diff --git a/tools/perf/lib/include/internal/evlist.h b/tools/perf/lib/include/internal/evlist.h
deleted file mode 100644
index 74dc8c3f0b66..000000000000
--- a/tools/perf/lib/include/internal/evlist.h
+++ /dev/null
@@ -1,127 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-#ifndef __LIBPERF_INTERNAL_EVLIST_H
-#define __LIBPERF_INTERNAL_EVLIST_H
-
-#include <linux/list.h>
-#include <api/fd/array.h>
-#include <internal/evsel.h>
-
-#define PERF_EVLIST__HLIST_BITS 8
-#define PERF_EVLIST__HLIST_SIZE (1 << PERF_EVLIST__HLIST_BITS)
-
-struct perf_cpu_map;
-struct perf_thread_map;
-struct perf_mmap_param;
-
-struct perf_evlist {
- struct list_head entries;
- int nr_entries;
- bool has_user_cpus;
- struct perf_cpu_map *cpus;
- struct perf_cpu_map *all_cpus;
- struct perf_thread_map *threads;
- int nr_mmaps;
- size_t mmap_len;
- struct fdarray pollfd;
- struct hlist_head heads[PERF_EVLIST__HLIST_SIZE];
- struct perf_mmap *mmap;
- struct perf_mmap *mmap_ovw;
- struct perf_mmap *mmap_first;
- struct perf_mmap *mmap_ovw_first;
-};
-
-typedef void
-(*perf_evlist_mmap__cb_idx_t)(struct perf_evlist*, struct perf_mmap_param*, int, bool);
-typedef struct perf_mmap*
-(*perf_evlist_mmap__cb_get_t)(struct perf_evlist*, bool, int);
-typedef int
-(*perf_evlist_mmap__cb_mmap_t)(struct perf_mmap*, struct perf_mmap_param*, int, int);
-
-struct perf_evlist_mmap_ops {
- perf_evlist_mmap__cb_idx_t idx;
- perf_evlist_mmap__cb_get_t get;
- perf_evlist_mmap__cb_mmap_t mmap;
-};
-
-int perf_evlist__alloc_pollfd(struct perf_evlist *evlist);
-int perf_evlist__add_pollfd(struct perf_evlist *evlist, int fd,
- void *ptr, short revent);
-
-int perf_evlist__mmap_ops(struct perf_evlist *evlist,
- struct perf_evlist_mmap_ops *ops,
- struct perf_mmap_param *mp);
-
-void perf_evlist__init(struct perf_evlist *evlist);
-void perf_evlist__exit(struct perf_evlist *evlist);
-
-/**
- * __perf_evlist__for_each_entry - iterate thru all the evsels
- * @list: list_head instance to iterate
- * @evsel: struct perf_evsel iterator
- */
-#define __perf_evlist__for_each_entry(list, evsel) \
- list_for_each_entry(evsel, list, node)
-
-/**
- * evlist__for_each_entry - iterate thru all the evsels
- * @evlist: perf_evlist instance to iterate
- * @evsel: struct perf_evsel iterator
- */
-#define perf_evlist__for_each_entry(evlist, evsel) \
- __perf_evlist__for_each_entry(&(evlist)->entries, evsel)
-
-/**
- * __perf_evlist__for_each_entry_reverse - iterate thru all the evsels in reverse order
- * @list: list_head instance to iterate
- * @evsel: struct evsel iterator
- */
-#define __perf_evlist__for_each_entry_reverse(list, evsel) \
- list_for_each_entry_reverse(evsel, list, node)
-
-/**
- * perf_evlist__for_each_entry_reverse - iterate thru all the evsels in reverse order
- * @evlist: evlist instance to iterate
- * @evsel: struct evsel iterator
- */
-#define perf_evlist__for_each_entry_reverse(evlist, evsel) \
- __perf_evlist__for_each_entry_reverse(&(evlist)->entries, evsel)
-
-/**
- * __perf_evlist__for_each_entry_safe - safely iterate thru all the evsels
- * @list: list_head instance to iterate
- * @tmp: struct evsel temp iterator
- * @evsel: struct evsel iterator
- */
-#define __perf_evlist__for_each_entry_safe(list, tmp, evsel) \
- list_for_each_entry_safe(evsel, tmp, list, node)
-
-/**
- * perf_evlist__for_each_entry_safe - safely iterate thru all the evsels
- * @evlist: evlist instance to iterate
- * @evsel: struct evsel iterator
- * @tmp: struct evsel temp iterator
- */
-#define perf_evlist__for_each_entry_safe(evlist, tmp, evsel) \
- __perf_evlist__for_each_entry_safe(&(evlist)->entries, tmp, evsel)
-
-static inline struct perf_evsel *perf_evlist__first(struct perf_evlist *evlist)
-{
- return list_entry(evlist->entries.next, struct perf_evsel, node);
-}
-
-static inline struct perf_evsel *perf_evlist__last(struct perf_evlist *evlist)
-{
- return list_entry(evlist->entries.prev, struct perf_evsel, node);
-}
-
-u64 perf_evlist__read_format(struct perf_evlist *evlist);
-
-void perf_evlist__id_add(struct perf_evlist *evlist,
- struct perf_evsel *evsel,
- int cpu, int thread, u64 id);
-
-int perf_evlist__id_add_fd(struct perf_evlist *evlist,
- struct perf_evsel *evsel,
- int cpu, int thread, int fd);
-
-#endif /* __LIBPERF_INTERNAL_EVLIST_H */
diff --git a/tools/perf/lib/include/internal/evsel.h b/tools/perf/lib/include/internal/evsel.h
deleted file mode 100644
index 1ffd083b235e..000000000000
--- a/tools/perf/lib/include/internal/evsel.h
+++ /dev/null
@@ -1,63 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-#ifndef __LIBPERF_INTERNAL_EVSEL_H
-#define __LIBPERF_INTERNAL_EVSEL_H
-
-#include <linux/types.h>
-#include <linux/perf_event.h>
-#include <stdbool.h>
-#include <sys/types.h>
-
-struct perf_cpu_map;
-struct perf_thread_map;
-struct xyarray;
-
-/*
- * Per fd, to map back from PERF_SAMPLE_ID to evsel, only used when there are
- * more than one entry in the evlist.
- */
-struct perf_sample_id {
- struct hlist_node node;
- u64 id;
- struct perf_evsel *evsel;
- /*
- * 'idx' will be used for AUX area sampling. A sample will have AUX area
- * data that will be queued for decoding, where there are separate
- * queues for each CPU (per-cpu tracing) or task (per-thread tracing).
- * The sample ID can be used to lookup 'idx' which is effectively the
- * queue number.
- */
- int idx;
- int cpu;
- pid_t tid;
-
- /* Holds total ID period value for PERF_SAMPLE_READ processing. */
- u64 period;
-};
-
-struct perf_evsel {
- struct list_head node;
- struct perf_event_attr attr;
- struct perf_cpu_map *cpus;
- struct perf_cpu_map *own_cpus;
- struct perf_thread_map *threads;
- struct xyarray *fd;
- struct xyarray *sample_id;
- u64 *id;
- u32 ids;
-
- /* parse modifier helper */
- int nr_members;
- bool system_wide;
-};
-
-void perf_evsel__init(struct perf_evsel *evsel, struct perf_event_attr *attr);
-int perf_evsel__alloc_fd(struct perf_evsel *evsel, int ncpus, int nthreads);
-void perf_evsel__close_fd(struct perf_evsel *evsel);
-void perf_evsel__free_fd(struct perf_evsel *evsel);
-int perf_evsel__read_size(struct perf_evsel *evsel);
-int perf_evsel__apply_filter(struct perf_evsel *evsel, const char *filter);
-
-int perf_evsel__alloc_id(struct perf_evsel *evsel, int ncpus, int nthreads);
-void perf_evsel__free_id(struct perf_evsel *evsel);
-
-#endif /* __LIBPERF_INTERNAL_EVSEL_H */
diff --git a/tools/perf/lib/include/internal/lib.h b/tools/perf/lib/include/internal/lib.h
deleted file mode 100644
index 5175d491b2d4..000000000000
--- a/tools/perf/lib/include/internal/lib.h
+++ /dev/null
@@ -1,12 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-#ifndef __LIBPERF_INTERNAL_LIB_H
-#define __LIBPERF_INTERNAL_LIB_H
-
-#include <sys/types.h>
-
-extern unsigned int page_size;
-
-ssize_t readn(int fd, void *buf, size_t n);
-ssize_t writen(int fd, const void *buf, size_t n);
-
-#endif /* __LIBPERF_INTERNAL_CPUMAP_H */
diff --git a/tools/perf/lib/include/internal/mmap.h b/tools/perf/lib/include/internal/mmap.h
deleted file mode 100644
index be7556e0a2b2..000000000000
--- a/tools/perf/lib/include/internal/mmap.h
+++ /dev/null
@@ -1,55 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-#ifndef __LIBPERF_INTERNAL_MMAP_H
-#define __LIBPERF_INTERNAL_MMAP_H
-
-#include <linux/compiler.h>
-#include <linux/refcount.h>
-#include <linux/types.h>
-#include <stdbool.h>
-
-/* perf sample has 16 bits size limit */
-#define PERF_SAMPLE_MAX_SIZE (1 << 16)
-
-struct perf_mmap;
-
-typedef void (*libperf_unmap_cb_t)(struct perf_mmap *map);
-
-/**
- * struct perf_mmap - perf's ring buffer mmap details
- *
- * @refcnt - e.g. code using PERF_EVENT_IOC_SET_OUTPUT to share this
- */
-struct perf_mmap {
- void *base;
- int mask;
- int fd;
- int cpu;
- refcount_t refcnt;
- u64 prev;
- u64 start;
- u64 end;
- bool overwrite;
- u64 flush;
- libperf_unmap_cb_t unmap_cb;
- char event_copy[PERF_SAMPLE_MAX_SIZE] __aligned(8);
- struct perf_mmap *next;
-};
-
-struct perf_mmap_param {
- int prot;
- int mask;
-};
-
-size_t perf_mmap__mmap_len(struct perf_mmap *map);
-
-void perf_mmap__init(struct perf_mmap *map, struct perf_mmap *prev,
- bool overwrite, libperf_unmap_cb_t unmap_cb);
-int perf_mmap__mmap(struct perf_mmap *map, struct perf_mmap_param *mp,
- int fd, int cpu);
-void perf_mmap__munmap(struct perf_mmap *map);
-void perf_mmap__get(struct perf_mmap *map);
-void perf_mmap__put(struct perf_mmap *map);
-
-u64 perf_mmap__read_head(struct perf_mmap *map);
-
-#endif /* __LIBPERF_INTERNAL_MMAP_H */
diff --git a/tools/perf/lib/include/internal/tests.h b/tools/perf/lib/include/internal/tests.h
deleted file mode 100644
index 2093e8868a67..000000000000
--- a/tools/perf/lib/include/internal/tests.h
+++ /dev/null
@@ -1,33 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-#ifndef __LIBPERF_INTERNAL_TESTS_H
-#define __LIBPERF_INTERNAL_TESTS_H
-
-#include <stdio.h>
-
-int tests_failed;
-
-#define __T_START \
-do { \
- fprintf(stdout, "- running %s...", __FILE__); \
- fflush(NULL); \
- tests_failed = 0; \
-} while (0)
-
-#define __T_END \
-do { \
- if (tests_failed) \
- fprintf(stdout, " FAILED (%d)\n", tests_failed); \
- else \
- fprintf(stdout, "OK\n"); \
-} while (0)
-
-#define __T(text, cond) \
-do { \
- if (!(cond)) { \
- fprintf(stderr, "FAILED %s:%d %s\n", __FILE__, __LINE__, text); \
- tests_failed++; \
- return -1; \
- } \
-} while (0)
-
-#endif /* __LIBPERF_INTERNAL_TESTS_H */
diff --git a/tools/perf/lib/include/internal/threadmap.h b/tools/perf/lib/include/internal/threadmap.h
deleted file mode 100644
index df748baf9eda..000000000000
--- a/tools/perf/lib/include/internal/threadmap.h
+++ /dev/null
@@ -1,23 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-#ifndef __LIBPERF_INTERNAL_THREADMAP_H
-#define __LIBPERF_INTERNAL_THREADMAP_H
-
-#include <linux/refcount.h>
-#include <sys/types.h>
-#include <unistd.h>
-
-struct thread_map_data {
- pid_t pid;
- char *comm;
-};
-
-struct perf_thread_map {
- refcount_t refcnt;
- int nr;
- int err_thread;
- struct thread_map_data map[];
-};
-
-struct perf_thread_map *perf_thread_map__realloc(struct perf_thread_map *map, int nr);
-
-#endif /* __LIBPERF_INTERNAL_THREADMAP_H */
diff --git a/tools/perf/lib/include/internal/xyarray.h b/tools/perf/lib/include/internal/xyarray.h
deleted file mode 100644
index 51e35d6c8ec4..000000000000
--- a/tools/perf/lib/include/internal/xyarray.h
+++ /dev/null
@@ -1,36 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-#ifndef __LIBPERF_INTERNAL_XYARRAY_H
-#define __LIBPERF_INTERNAL_XYARRAY_H
-
-#include <linux/compiler.h>
-#include <sys/types.h>
-
-struct xyarray {
- size_t row_size;
- size_t entry_size;
- size_t entries;
- size_t max_x;
- size_t max_y;
- char contents[] __aligned(8);
-};
-
-struct xyarray *xyarray__new(int xlen, int ylen, size_t entry_size);
-void xyarray__delete(struct xyarray *xy);
-void xyarray__reset(struct xyarray *xy);
-
-static inline void *xyarray__entry(struct xyarray *xy, int x, int y)
-{
- return &xy->contents[x * xy->row_size + y * xy->entry_size];
-}
-
-static inline int xyarray__max_y(struct xyarray *xy)
-{
- return xy->max_y;
-}
-
-static inline int xyarray__max_x(struct xyarray *xy)
-{
- return xy->max_x;
-}
-
-#endif /* __LIBPERF_INTERNAL_XYARRAY_H */
diff --git a/tools/perf/lib/include/perf/core.h b/tools/perf/lib/include/perf/core.h
deleted file mode 100644
index a3f6d68edad7..000000000000
--- a/tools/perf/lib/include/perf/core.h
+++ /dev/null
@@ -1,25 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-#ifndef __LIBPERF_CORE_H
-#define __LIBPERF_CORE_H
-
-#include <stdarg.h>
-
-#ifndef LIBPERF_API
-#define LIBPERF_API __attribute__((visibility("default")))
-#endif
-
-enum libperf_print_level {
- LIBPERF_ERR,
- LIBPERF_WARN,
- LIBPERF_INFO,
- LIBPERF_DEBUG,
- LIBPERF_DEBUG2,
- LIBPERF_DEBUG3,
-};
-
-typedef int (*libperf_print_fn_t)(enum libperf_print_level level,
- const char *, va_list ap);
-
-LIBPERF_API void libperf_init(libperf_print_fn_t fn);
-
-#endif /* __LIBPERF_CORE_H */
diff --git a/tools/perf/lib/include/perf/cpumap.h b/tools/perf/lib/include/perf/cpumap.h
deleted file mode 100644
index 6a17ad730cbc..000000000000
--- a/tools/perf/lib/include/perf/cpumap.h
+++ /dev/null
@@ -1,28 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-#ifndef __LIBPERF_CPUMAP_H
-#define __LIBPERF_CPUMAP_H
-
-#include <perf/core.h>
-#include <stdio.h>
-#include <stdbool.h>
-
-struct perf_cpu_map;
-
-LIBPERF_API struct perf_cpu_map *perf_cpu_map__dummy_new(void);
-LIBPERF_API struct perf_cpu_map *perf_cpu_map__new(const char *cpu_list);
-LIBPERF_API struct perf_cpu_map *perf_cpu_map__read(FILE *file);
-LIBPERF_API struct perf_cpu_map *perf_cpu_map__get(struct perf_cpu_map *map);
-LIBPERF_API struct perf_cpu_map *perf_cpu_map__merge(struct perf_cpu_map *orig,
- struct perf_cpu_map *other);
-LIBPERF_API void perf_cpu_map__put(struct perf_cpu_map *map);
-LIBPERF_API int perf_cpu_map__cpu(const struct perf_cpu_map *cpus, int idx);
-LIBPERF_API int perf_cpu_map__nr(const struct perf_cpu_map *cpus);
-LIBPERF_API bool perf_cpu_map__empty(const struct perf_cpu_map *map);
-LIBPERF_API int perf_cpu_map__max(struct perf_cpu_map *map);
-
-#define perf_cpu_map__for_each_cpu(cpu, idx, cpus) \
- for ((idx) = 0, (cpu) = perf_cpu_map__cpu(cpus, idx); \
- (idx) < perf_cpu_map__nr(cpus); \
- (idx)++, (cpu) = perf_cpu_map__cpu(cpus, idx))
-
-#endif /* __LIBPERF_CPUMAP_H */
diff --git a/tools/perf/lib/include/perf/event.h b/tools/perf/lib/include/perf/event.h
deleted file mode 100644
index 18106899cb4e..000000000000
--- a/tools/perf/lib/include/perf/event.h
+++ /dev/null
@@ -1,385 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-#ifndef __LIBPERF_EVENT_H
-#define __LIBPERF_EVENT_H
-
-#include <linux/perf_event.h>
-#include <linux/types.h>
-#include <linux/limits.h>
-#include <linux/bpf.h>
-#include <sys/types.h> /* pid_t */
-
-struct perf_record_mmap {
- struct perf_event_header header;
- __u32 pid, tid;
- __u64 start;
- __u64 len;
- __u64 pgoff;
- char filename[PATH_MAX];
-};
-
-struct perf_record_mmap2 {
- struct perf_event_header header;
- __u32 pid, tid;
- __u64 start;
- __u64 len;
- __u64 pgoff;
- __u32 maj;
- __u32 min;
- __u64 ino;
- __u64 ino_generation;
- __u32 prot;
- __u32 flags;
- char filename[PATH_MAX];
-};
-
-struct perf_record_comm {
- struct perf_event_header header;
- __u32 pid, tid;
- char comm[16];
-};
-
-struct perf_record_namespaces {
- struct perf_event_header header;
- __u32 pid, tid;
- __u64 nr_namespaces;
- struct perf_ns_link_info link_info[];
-};
-
-struct perf_record_fork {
- struct perf_event_header header;
- __u32 pid, ppid;
- __u32 tid, ptid;
- __u64 time;
-};
-
-struct perf_record_lost {
- struct perf_event_header header;
- __u64 id;
- __u64 lost;
-};
-
-struct perf_record_lost_samples {
- struct perf_event_header header;
- __u64 lost;
-};
-
-/*
- * PERF_FORMAT_ENABLED | PERF_FORMAT_RUNNING | PERF_FORMAT_ID
- */
-struct perf_record_read {
- struct perf_event_header header;
- __u32 pid, tid;
- __u64 value;
- __u64 time_enabled;
- __u64 time_running;
- __u64 id;
-};
-
-struct perf_record_throttle {
- struct perf_event_header header;
- __u64 time;
- __u64 id;
- __u64 stream_id;
-};
-
-#ifndef KSYM_NAME_LEN
-#define KSYM_NAME_LEN 256
-#endif
-
-struct perf_record_ksymbol {
- struct perf_event_header header;
- __u64 addr;
- __u32 len;
- __u16 ksym_type;
- __u16 flags;
- char name[KSYM_NAME_LEN];
-};
-
-struct perf_record_bpf_event {
- struct perf_event_header header;
- __u16 type;
- __u16 flags;
- __u32 id;
-
- /* for bpf_prog types */
- __u8 tag[BPF_TAG_SIZE]; // prog tag
-};
-
-struct perf_record_sample {
- struct perf_event_header header;
- __u64 array[];
-};
-
-struct perf_record_switch {
- struct perf_event_header header;
- __u32 next_prev_pid;
- __u32 next_prev_tid;
-};
-
-struct perf_record_header_attr {
- struct perf_event_header header;
- struct perf_event_attr attr;
- __u64 id[];
-};
-
-enum {
- PERF_CPU_MAP__CPUS = 0,
- PERF_CPU_MAP__MASK = 1,
-};
-
-struct cpu_map_entries {
- __u16 nr;
- __u16 cpu[];
-};
-
-struct perf_record_record_cpu_map {
- __u16 nr;
- __u16 long_size;
- unsigned long mask[];
-};
-
-struct perf_record_cpu_map_data {
- __u16 type;
- char data[];
-};
-
-struct perf_record_cpu_map {
- struct perf_event_header header;
- struct perf_record_cpu_map_data data;
-};
-
-enum {
- PERF_EVENT_UPDATE__UNIT = 0,
- PERF_EVENT_UPDATE__SCALE = 1,
- PERF_EVENT_UPDATE__NAME = 2,
- PERF_EVENT_UPDATE__CPUS = 3,
-};
-
-struct perf_record_event_update_cpus {
- struct perf_record_cpu_map_data cpus;
-};
-
-struct perf_record_event_update_scale {
- double scale;
-};
-
-struct perf_record_event_update {
- struct perf_event_header header;
- __u64 type;
- __u64 id;
- char data[];
-};
-
-#define MAX_EVENT_NAME 64
-
-struct perf_trace_event_type {
- __u64 event_id;
- char name[MAX_EVENT_NAME];
-};
-
-struct perf_record_header_event_type {
- struct perf_event_header header;
- struct perf_trace_event_type event_type;
-};
-
-struct perf_record_header_tracing_data {
- struct perf_event_header header;
- __u32 size;
-};
-
-struct perf_record_header_build_id {
- struct perf_event_header header;
- pid_t pid;
- __u8 build_id[24];
- char filename[];
-};
-
-struct id_index_entry {
- __u64 id;
- __u64 idx;
- __u64 cpu;
- __u64 tid;
-};
-
-struct perf_record_id_index {
- struct perf_event_header header;
- __u64 nr;
- struct id_index_entry entries[0];
-};
-
-struct perf_record_auxtrace_info {
- struct perf_event_header header;
- __u32 type;
- __u32 reserved__; /* For alignment */
- __u64 priv[];
-};
-
-struct perf_record_auxtrace {
- struct perf_event_header header;
- __u64 size;
- __u64 offset;
- __u64 reference;
- __u32 idx;
- __u32 tid;
- __u32 cpu;
- __u32 reserved__; /* For alignment */
-};
-
-#define MAX_AUXTRACE_ERROR_MSG 64
-
-struct perf_record_auxtrace_error {
- struct perf_event_header header;
- __u32 type;
- __u32 code;
- __u32 cpu;
- __u32 pid;
- __u32 tid;
- __u32 fmt;
- __u64 ip;
- __u64 time;
- char msg[MAX_AUXTRACE_ERROR_MSG];
-};
-
-struct perf_record_aux {
- struct perf_event_header header;
- __u64 aux_offset;
- __u64 aux_size;
- __u64 flags;
-};
-
-struct perf_record_itrace_start {
- struct perf_event_header header;
- __u32 pid;
- __u32 tid;
-};
-
-struct perf_record_thread_map_entry {
- __u64 pid;
- char comm[16];
-};
-
-struct perf_record_thread_map {
- struct perf_event_header header;
- __u64 nr;
- struct perf_record_thread_map_entry entries[];
-};
-
-enum {
- PERF_STAT_CONFIG_TERM__AGGR_MODE = 0,
- PERF_STAT_CONFIG_TERM__INTERVAL = 1,
- PERF_STAT_CONFIG_TERM__SCALE = 2,
- PERF_STAT_CONFIG_TERM__MAX = 3,
-};
-
-struct perf_record_stat_config_entry {
- __u64 tag;
- __u64 val;
-};
-
-struct perf_record_stat_config {
- struct perf_event_header header;
- __u64 nr;
- struct perf_record_stat_config_entry data[];
-};
-
-struct perf_record_stat {
- struct perf_event_header header;
-
- __u64 id;
- __u32 cpu;
- __u32 thread;
-
- union {
- struct {
- __u64 val;
- __u64 ena;
- __u64 run;
- };
- __u64 values[3];
- };
-};
-
-struct perf_record_stat_round {
- struct perf_event_header header;
- __u64 type;
- __u64 time;
-};
-
-struct perf_record_time_conv {
- struct perf_event_header header;
- __u64 time_shift;
- __u64 time_mult;
- __u64 time_zero;
-};
-
-struct perf_record_header_feature {
- struct perf_event_header header;
- __u64 feat_id;
- char data[];
-};
-
-struct perf_record_compressed {
- struct perf_event_header header;
- char data[];
-};
-
-enum perf_user_event_type { /* above any possible kernel type */
- PERF_RECORD_USER_TYPE_START = 64,
- PERF_RECORD_HEADER_ATTR = 64,
- PERF_RECORD_HEADER_EVENT_TYPE = 65, /* deprecated */
- PERF_RECORD_HEADER_TRACING_DATA = 66,
- PERF_RECORD_HEADER_BUILD_ID = 67,
- PERF_RECORD_FINISHED_ROUND = 68,
- PERF_RECORD_ID_INDEX = 69,
- PERF_RECORD_AUXTRACE_INFO = 70,
- PERF_RECORD_AUXTRACE = 71,
- PERF_RECORD_AUXTRACE_ERROR = 72,
- PERF_RECORD_THREAD_MAP = 73,
- PERF_RECORD_CPU_MAP = 74,
- PERF_RECORD_STAT_CONFIG = 75,
- PERF_RECORD_STAT = 76,
- PERF_RECORD_STAT_ROUND = 77,
- PERF_RECORD_EVENT_UPDATE = 78,
- PERF_RECORD_TIME_CONV = 79,
- PERF_RECORD_HEADER_FEATURE = 80,
- PERF_RECORD_COMPRESSED = 81,
- PERF_RECORD_HEADER_MAX
-};
-
-union perf_event {
- struct perf_event_header header;
- struct perf_record_mmap mmap;
- struct perf_record_mmap2 mmap2;
- struct perf_record_comm comm;
- struct perf_record_namespaces namespaces;
- struct perf_record_fork fork;
- struct perf_record_lost lost;
- struct perf_record_lost_samples lost_samples;
- struct perf_record_read read;
- struct perf_record_throttle throttle;
- struct perf_record_sample sample;
- struct perf_record_bpf_event bpf;
- struct perf_record_ksymbol ksymbol;
- struct perf_record_header_attr attr;
- struct perf_record_event_update event_update;
- struct perf_record_header_event_type event_type;
- struct perf_record_header_tracing_data tracing_data;
- struct perf_record_header_build_id build_id;
- struct perf_record_id_index id_index;
- struct perf_record_auxtrace_info auxtrace_info;
- struct perf_record_auxtrace auxtrace;
- struct perf_record_auxtrace_error auxtrace_error;
- struct perf_record_aux aux;
- struct perf_record_itrace_start itrace_start;
- struct perf_record_switch context_switch;
- struct perf_record_thread_map thread_map;
- struct perf_record_cpu_map cpu_map;
- struct perf_record_stat_config stat_config;
- struct perf_record_stat stat;
- struct perf_record_stat_round stat_round;
- struct perf_record_time_conv time_conv;
- struct perf_record_header_feature feat;
- struct perf_record_compressed pack;
-};
-
-#endif /* __LIBPERF_EVENT_H */
diff --git a/tools/perf/lib/include/perf/evlist.h b/tools/perf/lib/include/perf/evlist.h
deleted file mode 100644
index 0a7479dc13bf..000000000000
--- a/tools/perf/lib/include/perf/evlist.h
+++ /dev/null
@@ -1,49 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-#ifndef __LIBPERF_EVLIST_H
-#define __LIBPERF_EVLIST_H
-
-#include <perf/core.h>
-#include <stdbool.h>
-
-struct perf_evlist;
-struct perf_evsel;
-struct perf_cpu_map;
-struct perf_thread_map;
-
-LIBPERF_API void perf_evlist__add(struct perf_evlist *evlist,
- struct perf_evsel *evsel);
-LIBPERF_API void perf_evlist__remove(struct perf_evlist *evlist,
- struct perf_evsel *evsel);
-LIBPERF_API struct perf_evlist *perf_evlist__new(void);
-LIBPERF_API void perf_evlist__delete(struct perf_evlist *evlist);
-LIBPERF_API struct perf_evsel* perf_evlist__next(struct perf_evlist *evlist,
- struct perf_evsel *evsel);
-LIBPERF_API int perf_evlist__open(struct perf_evlist *evlist);
-LIBPERF_API void perf_evlist__close(struct perf_evlist *evlist);
-LIBPERF_API void perf_evlist__enable(struct perf_evlist *evlist);
-LIBPERF_API void perf_evlist__disable(struct perf_evlist *evlist);
-
-#define perf_evlist__for_each_evsel(evlist, pos) \
- for ((pos) = perf_evlist__next((evlist), NULL); \
- (pos) != NULL; \
- (pos) = perf_evlist__next((evlist), (pos)))
-
-LIBPERF_API void perf_evlist__set_maps(struct perf_evlist *evlist,
- struct perf_cpu_map *cpus,
- struct perf_thread_map *threads);
-LIBPERF_API int perf_evlist__poll(struct perf_evlist *evlist, int timeout);
-LIBPERF_API int perf_evlist__filter_pollfd(struct perf_evlist *evlist,
- short revents_and_mask);
-
-LIBPERF_API int perf_evlist__mmap(struct perf_evlist *evlist, int pages);
-LIBPERF_API void perf_evlist__munmap(struct perf_evlist *evlist);
-
-LIBPERF_API struct perf_mmap *perf_evlist__next_mmap(struct perf_evlist *evlist,
- struct perf_mmap *map,
- bool overwrite);
-#define perf_evlist__for_each_mmap(evlist, pos, overwrite) \
- for ((pos) = perf_evlist__next_mmap((evlist), NULL, overwrite); \
- (pos) != NULL; \
- (pos) = perf_evlist__next_mmap((evlist), (pos), overwrite))
-
-#endif /* __LIBPERF_EVLIST_H */
diff --git a/tools/perf/lib/include/perf/evsel.h b/tools/perf/lib/include/perf/evsel.h
deleted file mode 100644
index c82ec39a4ad0..000000000000
--- a/tools/perf/lib/include/perf/evsel.h
+++ /dev/null
@@ -1,40 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-#ifndef __LIBPERF_EVSEL_H
-#define __LIBPERF_EVSEL_H
-
-#include <stdint.h>
-#include <perf/core.h>
-
-struct perf_evsel;
-struct perf_event_attr;
-struct perf_cpu_map;
-struct perf_thread_map;
-
-struct perf_counts_values {
- union {
- struct {
- uint64_t val;
- uint64_t ena;
- uint64_t run;
- };
- uint64_t values[3];
- };
-};
-
-LIBPERF_API struct perf_evsel *perf_evsel__new(struct perf_event_attr *attr);
-LIBPERF_API void perf_evsel__delete(struct perf_evsel *evsel);
-LIBPERF_API int perf_evsel__open(struct perf_evsel *evsel, struct perf_cpu_map *cpus,
- struct perf_thread_map *threads);
-LIBPERF_API void perf_evsel__close(struct perf_evsel *evsel);
-LIBPERF_API void perf_evsel__close_cpu(struct perf_evsel *evsel, int cpu);
-LIBPERF_API int perf_evsel__read(struct perf_evsel *evsel, int cpu, int thread,
- struct perf_counts_values *count);
-LIBPERF_API int perf_evsel__enable(struct perf_evsel *evsel);
-LIBPERF_API int perf_evsel__enable_cpu(struct perf_evsel *evsel, int cpu);
-LIBPERF_API int perf_evsel__disable(struct perf_evsel *evsel);
-LIBPERF_API int perf_evsel__disable_cpu(struct perf_evsel *evsel, int cpu);
-LIBPERF_API struct perf_cpu_map *perf_evsel__cpus(struct perf_evsel *evsel);
-LIBPERF_API struct perf_thread_map *perf_evsel__threads(struct perf_evsel *evsel);
-LIBPERF_API struct perf_event_attr *perf_evsel__attr(struct perf_evsel *evsel);
-
-#endif /* __LIBPERF_EVSEL_H */
diff --git a/tools/perf/lib/include/perf/mmap.h b/tools/perf/lib/include/perf/mmap.h
deleted file mode 100644
index 9508ad90d8b9..000000000000
--- a/tools/perf/lib/include/perf/mmap.h
+++ /dev/null
@@ -1,15 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-#ifndef __LIBPERF_MMAP_H
-#define __LIBPERF_MMAP_H
-
-#include <perf/core.h>
-
-struct perf_mmap;
-union perf_event;
-
-LIBPERF_API void perf_mmap__consume(struct perf_mmap *map);
-LIBPERF_API int perf_mmap__read_init(struct perf_mmap *map);
-LIBPERF_API void perf_mmap__read_done(struct perf_mmap *map);
-LIBPERF_API union perf_event *perf_mmap__read_event(struct perf_mmap *map);
-
-#endif /* __LIBPERF_MMAP_H */
diff --git a/tools/perf/lib/include/perf/threadmap.h b/tools/perf/lib/include/perf/threadmap.h
deleted file mode 100644
index a7c50de8d010..000000000000
--- a/tools/perf/lib/include/perf/threadmap.h
+++ /dev/null
@@ -1,20 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-#ifndef __LIBPERF_THREADMAP_H
-#define __LIBPERF_THREADMAP_H
-
-#include <perf/core.h>
-#include <sys/types.h>
-
-struct perf_thread_map;
-
-LIBPERF_API struct perf_thread_map *perf_thread_map__new_dummy(void);
-
-LIBPERF_API void perf_thread_map__set_pid(struct perf_thread_map *map, int thread, pid_t pid);
-LIBPERF_API char *perf_thread_map__comm(struct perf_thread_map *map, int thread);
-LIBPERF_API int perf_thread_map__nr(struct perf_thread_map *threads);
-LIBPERF_API pid_t perf_thread_map__pid(struct perf_thread_map *map, int thread);
-
-LIBPERF_API struct perf_thread_map *perf_thread_map__get(struct perf_thread_map *map);
-LIBPERF_API void perf_thread_map__put(struct perf_thread_map *map);
-
-#endif /* __LIBPERF_THREADMAP_H */
diff --git a/tools/perf/lib/internal.h b/tools/perf/lib/internal.h
deleted file mode 100644
index 2c27e158de6b..000000000000
--- a/tools/perf/lib/internal.h
+++ /dev/null
@@ -1,23 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-#ifndef __LIBPERF_INTERNAL_H
-#define __LIBPERF_INTERNAL_H
-
-#include <perf/core.h>
-
-void libperf_print(enum libperf_print_level level,
- const char *format, ...)
- __attribute__((format(printf, 2, 3)));
-
-#define __pr(level, fmt, ...) \
-do { \
- libperf_print(level, "libperf: " fmt, ##__VA_ARGS__); \
-} while (0)
-
-#define pr_err(fmt, ...) __pr(LIBPERF_ERR, fmt, ##__VA_ARGS__)
-#define pr_warning(fmt, ...) __pr(LIBPERF_WARN, fmt, ##__VA_ARGS__)
-#define pr_info(fmt, ...) __pr(LIBPERF_INFO, fmt, ##__VA_ARGS__)
-#define pr_debug(fmt, ...) __pr(LIBPERF_DEBUG, fmt, ##__VA_ARGS__)
-#define pr_debug2(fmt, ...) __pr(LIBPERF_DEBUG2, fmt, ##__VA_ARGS__)
-#define pr_debug3(fmt, ...) __pr(LIBPERF_DEBUG3, fmt, ##__VA_ARGS__)
-
-#endif /* __LIBPERF_INTERNAL_H */
diff --git a/tools/perf/lib/lib.c b/tools/perf/lib/lib.c
deleted file mode 100644
index 18658931fc71..000000000000
--- a/tools/perf/lib/lib.c
+++ /dev/null
@@ -1,48 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-#include <unistd.h>
-#include <stdbool.h>
-#include <errno.h>
-#include <linux/kernel.h>
-#include <internal/lib.h>
-
-unsigned int page_size;
-
-static ssize_t ion(bool is_read, int fd, void *buf, size_t n)
-{
- void *buf_start = buf;
- size_t left = n;
-
- while (left) {
- /* buf must be treated as const if !is_read. */
- ssize_t ret = is_read ? read(fd, buf, left) :
- write(fd, buf, left);
-
- if (ret < 0 && errno == EINTR)
- continue;
- if (ret <= 0)
- return ret;
-
- left -= ret;
- buf += ret;
- }
-
- BUG_ON((size_t)(buf - buf_start) != n);
- return n;
-}
-
-/*
- * Read exactly 'n' bytes or return an error.
- */
-ssize_t readn(int fd, void *buf, size_t n)
-{
- return ion(true, fd, buf, n);
-}
-
-/*
- * Write exactly 'n' bytes or return an error.
- */
-ssize_t writen(int fd, const void *buf, size_t n)
-{
- /* ion does not modify buf. */
- return ion(false, fd, (void *)buf, n);
-}
diff --git a/tools/perf/lib/libperf.map b/tools/perf/lib/libperf.map
deleted file mode 100644
index 7be1af8a546c..000000000000
--- a/tools/perf/lib/libperf.map
+++ /dev/null
@@ -1,51 +0,0 @@
-LIBPERF_0.0.1 {
- global:
- libperf_init;
- perf_cpu_map__dummy_new;
- perf_cpu_map__get;
- perf_cpu_map__put;
- perf_cpu_map__new;
- perf_cpu_map__read;
- perf_cpu_map__nr;
- perf_cpu_map__cpu;
- perf_cpu_map__empty;
- perf_cpu_map__max;
- perf_thread_map__new_dummy;
- perf_thread_map__set_pid;
- perf_thread_map__comm;
- perf_thread_map__nr;
- perf_thread_map__pid;
- perf_thread_map__get;
- perf_thread_map__put;
- perf_evsel__new;
- perf_evsel__delete;
- perf_evsel__enable;
- perf_evsel__disable;
- perf_evsel__open;
- perf_evsel__close;
- perf_evsel__read;
- perf_evsel__cpus;
- perf_evsel__threads;
- perf_evsel__attr;
- perf_evlist__new;
- perf_evlist__delete;
- perf_evlist__open;
- perf_evlist__close;
- perf_evlist__enable;
- perf_evlist__disable;
- perf_evlist__add;
- perf_evlist__remove;
- perf_evlist__next;
- perf_evlist__set_maps;
- perf_evlist__poll;
- perf_evlist__mmap;
- perf_evlist__munmap;
- perf_evlist__filter_pollfd;
- perf_evlist__next_mmap;
- perf_mmap__consume;
- perf_mmap__read_init;
- perf_mmap__read_done;
- perf_mmap__read_event;
- local:
- *;
-};
diff --git a/tools/perf/lib/libperf.pc.template b/tools/perf/lib/libperf.pc.template
deleted file mode 100644
index 117e4a237b55..000000000000
--- a/tools/perf/lib/libperf.pc.template
+++ /dev/null
@@ -1,11 +0,0 @@
-# SPDX-License-Identifier: (LGPL-2.1 OR BSD-2-Clause)
-
-prefix=@PREFIX@
-libdir=@LIBDIR@
-includedir=${prefix}/include
-
-Name: libperf
-Description: perf library
-Version: @VERSION@
-Libs: -L${libdir} -lperf
-Cflags: -I${includedir}
diff --git a/tools/perf/lib/mmap.c b/tools/perf/lib/mmap.c
deleted file mode 100644
index 79d5ed6c38cc..000000000000
--- a/tools/perf/lib/mmap.c
+++ /dev/null
@@ -1,275 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-#include <sys/mman.h>
-#include <inttypes.h>
-#include <asm/bug.h>
-#include <errno.h>
-#include <string.h>
-#include <linux/ring_buffer.h>
-#include <linux/perf_event.h>
-#include <perf/mmap.h>
-#include <perf/event.h>
-#include <internal/mmap.h>
-#include <internal/lib.h>
-#include <linux/kernel.h>
-#include "internal.h"
-
-void perf_mmap__init(struct perf_mmap *map, struct perf_mmap *prev,
- bool overwrite, libperf_unmap_cb_t unmap_cb)
-{
- map->fd = -1;
- map->overwrite = overwrite;
- map->unmap_cb = unmap_cb;
- refcount_set(&map->refcnt, 0);
- if (prev)
- prev->next = map;
-}
-
-size_t perf_mmap__mmap_len(struct perf_mmap *map)
-{
- return map->mask + 1 + page_size;
-}
-
-int perf_mmap__mmap(struct perf_mmap *map, struct perf_mmap_param *mp,
- int fd, int cpu)
-{
- map->prev = 0;
- map->mask = mp->mask;
- map->base = mmap(NULL, perf_mmap__mmap_len(map), mp->prot,
- MAP_SHARED, fd, 0);
- if (map->base == MAP_FAILED) {
- map->base = NULL;
- return -1;
- }
-
- map->fd = fd;
- map->cpu = cpu;
- return 0;
-}
-
-void perf_mmap__munmap(struct perf_mmap *map)
-{
- if (map && map->base != NULL) {
- munmap(map->base, perf_mmap__mmap_len(map));
- map->base = NULL;
- map->fd = -1;
- refcount_set(&map->refcnt, 0);
- }
- if (map && map->unmap_cb)
- map->unmap_cb(map);
-}
-
-void perf_mmap__get(struct perf_mmap *map)
-{
- refcount_inc(&map->refcnt);
-}
-
-void perf_mmap__put(struct perf_mmap *map)
-{
- BUG_ON(map->base && refcount_read(&map->refcnt) == 0);
-
- if (refcount_dec_and_test(&map->refcnt))
- perf_mmap__munmap(map);
-}
-
-static inline void perf_mmap__write_tail(struct perf_mmap *md, u64 tail)
-{
- ring_buffer_write_tail(md->base, tail);
-}
-
-u64 perf_mmap__read_head(struct perf_mmap *map)
-{
- return ring_buffer_read_head(map->base);
-}
-
-static bool perf_mmap__empty(struct perf_mmap *map)
-{
- struct perf_event_mmap_page *pc = map->base;
-
- return perf_mmap__read_head(map) == map->prev && !pc->aux_size;
-}
-
-void perf_mmap__consume(struct perf_mmap *map)
-{
- if (!map->overwrite) {
- u64 old = map->prev;
-
- perf_mmap__write_tail(map, old);
- }
-
- if (refcount_read(&map->refcnt) == 1 && perf_mmap__empty(map))
- perf_mmap__put(map);
-}
-
-static int overwrite_rb_find_range(void *buf, int mask, u64 *start, u64 *end)
-{
- struct perf_event_header *pheader;
- u64 evt_head = *start;
- int size = mask + 1;
-
- pr_debug2("%s: buf=%p, start=%"PRIx64"\n", __func__, buf, *start);
- pheader = (struct perf_event_header *)(buf + (*start & mask));
- while (true) {
- if (evt_head - *start >= (unsigned int)size) {
- pr_debug("Finished reading overwrite ring buffer: rewind\n");
- if (evt_head - *start > (unsigned int)size)
- evt_head -= pheader->size;
- *end = evt_head;
- return 0;
- }
-
- pheader = (struct perf_event_header *)(buf + (evt_head & mask));
-
- if (pheader->size == 0) {
- pr_debug("Finished reading overwrite ring buffer: get start\n");
- *end = evt_head;
- return 0;
- }
-
- evt_head += pheader->size;
- pr_debug3("move evt_head: %"PRIx64"\n", evt_head);
- }
- WARN_ONCE(1, "Shouldn't get here\n");
- return -1;
-}
-
-/*
- * Report the start and end of the available data in ringbuffer
- */
-static int __perf_mmap__read_init(struct perf_mmap *md)
-{
- u64 head = perf_mmap__read_head(md);
- u64 old = md->prev;
- unsigned char *data = md->base + page_size;
- unsigned long size;
-
- md->start = md->overwrite ? head : old;
- md->end = md->overwrite ? old : head;
-
- if ((md->end - md->start) < md->flush)
- return -EAGAIN;
-
- size = md->end - md->start;
- if (size > (unsigned long)(md->mask) + 1) {
- if (!md->overwrite) {
- WARN_ONCE(1, "failed to keep up with mmap data. (warn only once)\n");
-
- md->prev = head;
- perf_mmap__consume(md);
- return -EAGAIN;
- }
-
- /*
- * Backward ring buffer is full. We still have a chance to read
- * most of data from it.
- */
- if (overwrite_rb_find_range(data, md->mask, &md->start, &md->end))
- return -EINVAL;
- }
-
- return 0;
-}
-
-int perf_mmap__read_init(struct perf_mmap *map)
-{
- /*
- * Check if event was unmapped due to a POLLHUP/POLLERR.
- */
- if (!refcount_read(&map->refcnt))
- return -ENOENT;
-
- return __perf_mmap__read_init(map);
-}
-
-/*
- * Mandatory for overwrite mode
- * The direction of overwrite mode is backward.
- * The last perf_mmap__read() will set tail to map->core.prev.
- * Need to correct the map->core.prev to head which is the end of next read.
- */
-void perf_mmap__read_done(struct perf_mmap *map)
-{
- /*
- * Check if event was unmapped due to a POLLHUP/POLLERR.
- */
- if (!refcount_read(&map->refcnt))
- return;
-
- map->prev = perf_mmap__read_head(map);
-}
-
-/* When check_messup is true, 'end' must points to a good entry */
-static union perf_event *perf_mmap__read(struct perf_mmap *map,
- u64 *startp, u64 end)
-{
- unsigned char *data = map->base + page_size;
- union perf_event *event = NULL;
- int diff = end - *startp;
-
- if (diff >= (int)sizeof(event->header)) {
- size_t size;
-
- event = (union perf_event *)&data[*startp & map->mask];
- size = event->header.size;
-
- if (size < sizeof(event->header) || diff < (int)size)
- return NULL;
-
- /*
- * Event straddles the mmap boundary -- header should always
- * be inside due to u64 alignment of output.
- */
- if ((*startp & map->mask) + size != ((*startp + size) & map->mask)) {
- unsigned int offset = *startp;
- unsigned int len = min(sizeof(*event), size), cpy;
- void *dst = map->event_copy;
-
- do {
- cpy = min(map->mask + 1 - (offset & map->mask), len);
- memcpy(dst, &data[offset & map->mask], cpy);
- offset += cpy;
- dst += cpy;
- len -= cpy;
- } while (len);
-
- event = (union perf_event *)map->event_copy;
- }
-
- *startp += size;
- }
-
- return event;
-}
-
-/*
- * Read event from ring buffer one by one.
- * Return one event for each call.
- *
- * Usage:
- * perf_mmap__read_init()
- * while(event = perf_mmap__read_event()) {
- * //process the event
- * perf_mmap__consume()
- * }
- * perf_mmap__read_done()
- */
-union perf_event *perf_mmap__read_event(struct perf_mmap *map)
-{
- union perf_event *event;
-
- /*
- * Check if event was unmapped due to a POLLHUP/POLLERR.
- */
- if (!refcount_read(&map->refcnt))
- return NULL;
-
- /* non-overwirte doesn't pause the ringbuffer */
- if (!map->overwrite)
- map->end = perf_mmap__read_head(map);
-
- event = perf_mmap__read(map, &map->start, map->end);
-
- if (!map->overwrite)
- map->prev = map->start;
-
- return event;
-}
diff --git a/tools/perf/lib/tests/Makefile b/tools/perf/lib/tests/Makefile
deleted file mode 100644
index a43cd08c5c03..000000000000
--- a/tools/perf/lib/tests/Makefile
+++ /dev/null
@@ -1,38 +0,0 @@
-# SPDX-License-Identifier: (LGPL-2.1 OR BSD-2-Clause)
-
-TESTS = test-cpumap test-threadmap test-evlist test-evsel
-
-TESTS_SO := $(addsuffix -so,$(TESTS))
-TESTS_A := $(addsuffix -a,$(TESTS))
-
-# Set compile option CFLAGS
-ifdef EXTRA_CFLAGS
- CFLAGS := $(EXTRA_CFLAGS)
-else
- CFLAGS := -g -Wall
-endif
-
-all:
-
-include $(srctree)/tools/scripts/Makefile.include
-
-INCLUDE = -I$(srctree)/tools/perf/lib/include -I$(srctree)/tools/include -I$(srctree)/tools/lib
-
-$(TESTS_A): FORCE
- $(QUIET_LINK)$(CC) $(INCLUDE) $(CFLAGS) -o $@ $(subst -a,.c,$@) ../libperf.a $(LIBAPI)
-
-$(TESTS_SO): FORCE
- $(QUIET_LINK)$(CC) $(INCLUDE) $(CFLAGS) -L.. -o $@ $(subst -so,.c,$@) $(LIBAPI) -lperf
-
-all: $(TESTS_A) $(TESTS_SO)
-
-run:
- @echo "running static:"
- @for i in $(TESTS_A); do ./$$i; done
- @echo "running dynamic:"
- @for i in $(TESTS_SO); do LD_LIBRARY_PATH=../ ./$$i; done
-
-clean:
- $(call QUIET_CLEAN, tests)$(RM) $(TESTS_A) $(TESTS_SO)
-
-.PHONY: all clean FORCE
diff --git a/tools/perf/lib/tests/test-cpumap.c b/tools/perf/lib/tests/test-cpumap.c
deleted file mode 100644
index c8d45091e7c2..000000000000
--- a/tools/perf/lib/tests/test-cpumap.c
+++ /dev/null
@@ -1,31 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-#include <stdarg.h>
-#include <stdio.h>
-#include <perf/cpumap.h>
-#include <internal/tests.h>
-
-static int libperf_print(enum libperf_print_level level,
- const char *fmt, va_list ap)
-{
- return vfprintf(stderr, fmt, ap);
-}
-
-int main(int argc, char **argv)
-{
- struct perf_cpu_map *cpus;
-
- __T_START;
-
- libperf_init(libperf_print);
-
- cpus = perf_cpu_map__dummy_new();
- if (!cpus)
- return -1;
-
- perf_cpu_map__get(cpus);
- perf_cpu_map__put(cpus);
- perf_cpu_map__put(cpus);
-
- __T_END;
- return 0;
-}
diff --git a/tools/perf/lib/tests/test-evlist.c b/tools/perf/lib/tests/test-evlist.c
deleted file mode 100644
index 6d8ebe0c2504..000000000000
--- a/tools/perf/lib/tests/test-evlist.c
+++ /dev/null
@@ -1,413 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-#define _GNU_SOURCE // needed for sched.h to get sched_[gs]etaffinity and CPU_(ZERO,SET)
-#include <sched.h>
-#include <stdio.h>
-#include <stdarg.h>
-#include <unistd.h>
-#include <stdlib.h>
-#include <linux/perf_event.h>
-#include <linux/limits.h>
-#include <sys/types.h>
-#include <sys/wait.h>
-#include <sys/prctl.h>
-#include <perf/cpumap.h>
-#include <perf/threadmap.h>
-#include <perf/evlist.h>
-#include <perf/evsel.h>
-#include <perf/mmap.h>
-#include <perf/event.h>
-#include <internal/tests.h>
-#include <api/fs/fs.h>
-
-static int libperf_print(enum libperf_print_level level,
- const char *fmt, va_list ap)
-{
- return vfprintf(stderr, fmt, ap);
-}
-
-static int test_stat_cpu(void)
-{
- struct perf_cpu_map *cpus;
- struct perf_evlist *evlist;
- struct perf_evsel *evsel;
- struct perf_event_attr attr1 = {
- .type = PERF_TYPE_SOFTWARE,
- .config = PERF_COUNT_SW_CPU_CLOCK,
- };
- struct perf_event_attr attr2 = {
- .type = PERF_TYPE_SOFTWARE,
- .config = PERF_COUNT_SW_TASK_CLOCK,
- };
- int err, cpu, tmp;
-
- cpus = perf_cpu_map__new(NULL);
- __T("failed to create cpus", cpus);
-
- evlist = perf_evlist__new();
- __T("failed to create evlist", evlist);
-
- evsel = perf_evsel__new(&attr1);
- __T("failed to create evsel1", evsel);
-
- perf_evlist__add(evlist, evsel);
-
- evsel = perf_evsel__new(&attr2);
- __T("failed to create evsel2", evsel);
-
- perf_evlist__add(evlist, evsel);
-
- perf_evlist__set_maps(evlist, cpus, NULL);
-
- err = perf_evlist__open(evlist);
- __T("failed to open evsel", err == 0);
-
- perf_evlist__for_each_evsel(evlist, evsel) {
- cpus = perf_evsel__cpus(evsel);
-
- perf_cpu_map__for_each_cpu(cpu, tmp, cpus) {
- struct perf_counts_values counts = { .val = 0 };
-
- perf_evsel__read(evsel, cpu, 0, &counts);
- __T("failed to read value for evsel", counts.val != 0);
- }
- }
-
- perf_evlist__close(evlist);
- perf_evlist__delete(evlist);
-
- perf_cpu_map__put(cpus);
- return 0;
-}
-
-static int test_stat_thread(void)
-{
- struct perf_counts_values counts = { .val = 0 };
- struct perf_thread_map *threads;
- struct perf_evlist *evlist;
- struct perf_evsel *evsel;
- struct perf_event_attr attr1 = {
- .type = PERF_TYPE_SOFTWARE,
- .config = PERF_COUNT_SW_CPU_CLOCK,
- };
- struct perf_event_attr attr2 = {
- .type = PERF_TYPE_SOFTWARE,
- .config = PERF_COUNT_SW_TASK_CLOCK,
- };
- int err;
-
- threads = perf_thread_map__new_dummy();
- __T("failed to create threads", threads);
-
- perf_thread_map__set_pid(threads, 0, 0);
-
- evlist = perf_evlist__new();
- __T("failed to create evlist", evlist);
-
- evsel = perf_evsel__new(&attr1);
- __T("failed to create evsel1", evsel);
-
- perf_evlist__add(evlist, evsel);
-
- evsel = perf_evsel__new(&attr2);
- __T("failed to create evsel2", evsel);
-
- perf_evlist__add(evlist, evsel);
-
- perf_evlist__set_maps(evlist, NULL, threads);
-
- err = perf_evlist__open(evlist);
- __T("failed to open evsel", err == 0);
-
- perf_evlist__for_each_evsel(evlist, evsel) {
- perf_evsel__read(evsel, 0, 0, &counts);
- __T("failed to read value for evsel", counts.val != 0);
- }
-
- perf_evlist__close(evlist);
- perf_evlist__delete(evlist);
-
- perf_thread_map__put(threads);
- return 0;
-}
-
-static int test_stat_thread_enable(void)
-{
- struct perf_counts_values counts = { .val = 0 };
- struct perf_thread_map *threads;
- struct perf_evlist *evlist;
- struct perf_evsel *evsel;
- struct perf_event_attr attr1 = {
- .type = PERF_TYPE_SOFTWARE,
- .config = PERF_COUNT_SW_CPU_CLOCK,
- .disabled = 1,
- };
- struct perf_event_attr attr2 = {
- .type = PERF_TYPE_SOFTWARE,
- .config = PERF_COUNT_SW_TASK_CLOCK,
- .disabled = 1,
- };
- int err;
-
- threads = perf_thread_map__new_dummy();
- __T("failed to create threads", threads);
-
- perf_thread_map__set_pid(threads, 0, 0);
-
- evlist = perf_evlist__new();
- __T("failed to create evlist", evlist);
-
- evsel = perf_evsel__new(&attr1);
- __T("failed to create evsel1", evsel);
-
- perf_evlist__add(evlist, evsel);
-
- evsel = perf_evsel__new(&attr2);
- __T("failed to create evsel2", evsel);
-
- perf_evlist__add(evlist, evsel);
-
- perf_evlist__set_maps(evlist, NULL, threads);
-
- err = perf_evlist__open(evlist);
- __T("failed to open evsel", err == 0);
-
- perf_evlist__for_each_evsel(evlist, evsel) {
- perf_evsel__read(evsel, 0, 0, &counts);
- __T("failed to read value for evsel", counts.val == 0);
- }
-
- perf_evlist__enable(evlist);
-
- perf_evlist__for_each_evsel(evlist, evsel) {
- perf_evsel__read(evsel, 0, 0, &counts);
- __T("failed to read value for evsel", counts.val != 0);
- }
-
- perf_evlist__disable(evlist);
-
- perf_evlist__close(evlist);
- perf_evlist__delete(evlist);
-
- perf_thread_map__put(threads);
- return 0;
-}
-
-static int test_mmap_thread(void)
-{
- struct perf_evlist *evlist;
- struct perf_evsel *evsel;
- struct perf_mmap *map;
- struct perf_cpu_map *cpus;
- struct perf_thread_map *threads;
- struct perf_event_attr attr = {
- .type = PERF_TYPE_TRACEPOINT,
- .sample_period = 1,
- .wakeup_watermark = 1,
- .disabled = 1,
- };
- char path[PATH_MAX];
- int id, err, pid, go_pipe[2];
- union perf_event *event;
- char bf;
- int count = 0;
-
- snprintf(path, PATH_MAX, "%s/kernel/debug/tracing/events/syscalls/sys_enter_prctl/id",
- sysfs__mountpoint());
-
- if (filename__read_int(path, &id)) {
- fprintf(stderr, "error: failed to get tracepoint id: %s\n", path);
- return -1;
- }
-
- attr.config = id;
-
- err = pipe(go_pipe);
- __T("failed to create pipe", err == 0);
-
- fflush(NULL);
-
- pid = fork();
- if (!pid) {
- int i;
-
- read(go_pipe[0], &bf, 1);
-
- /* Generate 100 prctl calls. */
- for (i = 0; i < 100; i++)
- prctl(0, 0, 0, 0, 0);
-
- exit(0);
- }
-
- threads = perf_thread_map__new_dummy();
- __T("failed to create threads", threads);
-
- cpus = perf_cpu_map__dummy_new();
- __T("failed to create cpus", cpus);
-
- perf_thread_map__set_pid(threads, 0, pid);
-
- evlist = perf_evlist__new();
- __T("failed to create evlist", evlist);
-
- evsel = perf_evsel__new(&attr);
- __T("failed to create evsel1", evsel);
-
- perf_evlist__add(evlist, evsel);
-
- perf_evlist__set_maps(evlist, cpus, threads);
-
- err = perf_evlist__open(evlist);
- __T("failed to open evlist", err == 0);
-
- err = perf_evlist__mmap(evlist, 4);
- __T("failed to mmap evlist", err == 0);
-
- perf_evlist__enable(evlist);
-
- /* kick the child and wait for it to finish */
- write(go_pipe[1], &bf, 1);
- waitpid(pid, NULL, 0);
-
- /*
- * There's no need to call perf_evlist__disable,
- * monitored process is dead now.
- */
-
- perf_evlist__for_each_mmap(evlist, map, false) {
- if (perf_mmap__read_init(map) < 0)
- continue;
-
- while ((event = perf_mmap__read_event(map)) != NULL) {
- count++;
- perf_mmap__consume(map);
- }
-
- perf_mmap__read_done(map);
- }
-
- /* calls perf_evlist__munmap/perf_evlist__close */
- perf_evlist__delete(evlist);
-
- perf_thread_map__put(threads);
- perf_cpu_map__put(cpus);
-
- /*
- * The generated prctl calls should match the
- * number of events in the buffer.
- */
- __T("failed count", count == 100);
-
- return 0;
-}
-
-static int test_mmap_cpus(void)
-{
- struct perf_evlist *evlist;
- struct perf_evsel *evsel;
- struct perf_mmap *map;
- struct perf_cpu_map *cpus;
- struct perf_event_attr attr = {
- .type = PERF_TYPE_TRACEPOINT,
- .sample_period = 1,
- .wakeup_watermark = 1,
- .disabled = 1,
- };
- cpu_set_t saved_mask;
- char path[PATH_MAX];
- int id, err, cpu, tmp;
- union perf_event *event;
- int count = 0;
-
- snprintf(path, PATH_MAX, "%s/kernel/debug/tracing/events/syscalls/sys_enter_prctl/id",
- sysfs__mountpoint());
-
- if (filename__read_int(path, &id)) {
- fprintf(stderr, "error: failed to get tracepoint id: %s\n", path);
- return -1;
- }
-
- attr.config = id;
-
- cpus = perf_cpu_map__new(NULL);
- __T("failed to create cpus", cpus);
-
- evlist = perf_evlist__new();
- __T("failed to create evlist", evlist);
-
- evsel = perf_evsel__new(&attr);
- __T("failed to create evsel1", evsel);
-
- perf_evlist__add(evlist, evsel);
-
- perf_evlist__set_maps(evlist, cpus, NULL);
-
- err = perf_evlist__open(evlist);
- __T("failed to open evlist", err == 0);
-
- err = perf_evlist__mmap(evlist, 4);
- __T("failed to mmap evlist", err == 0);
-
- perf_evlist__enable(evlist);
-
- err = sched_getaffinity(0, sizeof(saved_mask), &saved_mask);
- __T("sched_getaffinity failed", err == 0);
-
- perf_cpu_map__for_each_cpu(cpu, tmp, cpus) {
- cpu_set_t mask;
-
- CPU_ZERO(&mask);
- CPU_SET(cpu, &mask);
-
- err = sched_setaffinity(0, sizeof(mask), &mask);
- __T("sched_setaffinity failed", err == 0);
-
- prctl(0, 0, 0, 0, 0);
- }
-
- err = sched_setaffinity(0, sizeof(saved_mask), &saved_mask);
- __T("sched_setaffinity failed", err == 0);
-
- perf_evlist__disable(evlist);
-
- perf_evlist__for_each_mmap(evlist, map, false) {
- if (perf_mmap__read_init(map) < 0)
- continue;
-
- while ((event = perf_mmap__read_event(map)) != NULL) {
- count++;
- perf_mmap__consume(map);
- }
-
- perf_mmap__read_done(map);
- }
-
- /* calls perf_evlist__munmap/perf_evlist__close */
- perf_evlist__delete(evlist);
-
- /*
- * The generated prctl events should match the
- * number of cpus or be bigger (we are system-wide).
- */
- __T("failed count", count >= perf_cpu_map__nr(cpus));
-
- perf_cpu_map__put(cpus);
-
- return 0;
-}
-
-int main(int argc, char **argv)
-{
- __T_START;
-
- libperf_init(libperf_print);
-
- test_stat_cpu();
- test_stat_thread();
- test_stat_thread_enable();
- test_mmap_thread();
- test_mmap_cpus();
-
- __T_END;
- return 0;
-}
diff --git a/tools/perf/lib/tests/test-evsel.c b/tools/perf/lib/tests/test-evsel.c
deleted file mode 100644
index 135722ac965b..000000000000
--- a/tools/perf/lib/tests/test-evsel.c
+++ /dev/null
@@ -1,135 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-#include <stdarg.h>
-#include <stdio.h>
-#include <linux/perf_event.h>
-#include <perf/cpumap.h>
-#include <perf/threadmap.h>
-#include <perf/evsel.h>
-#include <internal/tests.h>
-
-static int libperf_print(enum libperf_print_level level,
- const char *fmt, va_list ap)
-{
- return vfprintf(stderr, fmt, ap);
-}
-
-static int test_stat_cpu(void)
-{
- struct perf_cpu_map *cpus;
- struct perf_evsel *evsel;
- struct perf_event_attr attr = {
- .type = PERF_TYPE_SOFTWARE,
- .config = PERF_COUNT_SW_CPU_CLOCK,
- };
- int err, cpu, tmp;
-
- cpus = perf_cpu_map__new(NULL);
- __T("failed to create cpus", cpus);
-
- evsel = perf_evsel__new(&attr);
- __T("failed to create evsel", evsel);
-
- err = perf_evsel__open(evsel, cpus, NULL);
- __T("failed to open evsel", err == 0);
-
- perf_cpu_map__for_each_cpu(cpu, tmp, cpus) {
- struct perf_counts_values counts = { .val = 0 };
-
- perf_evsel__read(evsel, cpu, 0, &counts);
- __T("failed to read value for evsel", counts.val != 0);
- }
-
- perf_evsel__close(evsel);
- perf_evsel__delete(evsel);
-
- perf_cpu_map__put(cpus);
- return 0;
-}
-
-static int test_stat_thread(void)
-{
- struct perf_counts_values counts = { .val = 0 };
- struct perf_thread_map *threads;
- struct perf_evsel *evsel;
- struct perf_event_attr attr = {
- .type = PERF_TYPE_SOFTWARE,
- .config = PERF_COUNT_SW_TASK_CLOCK,
- };
- int err;
-
- threads = perf_thread_map__new_dummy();
- __T("failed to create threads", threads);
-
- perf_thread_map__set_pid(threads, 0, 0);
-
- evsel = perf_evsel__new(&attr);
- __T("failed to create evsel", evsel);
-
- err = perf_evsel__open(evsel, NULL, threads);
- __T("failed to open evsel", err == 0);
-
- perf_evsel__read(evsel, 0, 0, &counts);
- __T("failed to read value for evsel", counts.val != 0);
-
- perf_evsel__close(evsel);
- perf_evsel__delete(evsel);
-
- perf_thread_map__put(threads);
- return 0;
-}
-
-static int test_stat_thread_enable(void)
-{
- struct perf_counts_values counts = { .val = 0 };
- struct perf_thread_map *threads;
- struct perf_evsel *evsel;
- struct perf_event_attr attr = {
- .type = PERF_TYPE_SOFTWARE,
- .config = PERF_COUNT_SW_TASK_CLOCK,
- .disabled = 1,
- };
- int err;
-
- threads = perf_thread_map__new_dummy();
- __T("failed to create threads", threads);
-
- perf_thread_map__set_pid(threads, 0, 0);
-
- evsel = perf_evsel__new(&attr);
- __T("failed to create evsel", evsel);
-
- err = perf_evsel__open(evsel, NULL, threads);
- __T("failed to open evsel", err == 0);
-
- perf_evsel__read(evsel, 0, 0, &counts);
- __T("failed to read value for evsel", counts.val == 0);
-
- err = perf_evsel__enable(evsel);
- __T("failed to enable evsel", err == 0);
-
- perf_evsel__read(evsel, 0, 0, &counts);
- __T("failed to read value for evsel", counts.val != 0);
-
- err = perf_evsel__disable(evsel);
- __T("failed to enable evsel", err == 0);
-
- perf_evsel__close(evsel);
- perf_evsel__delete(evsel);
-
- perf_thread_map__put(threads);
- return 0;
-}
-
-int main(int argc, char **argv)
-{
- __T_START;
-
- libperf_init(libperf_print);
-
- test_stat_cpu();
- test_stat_thread();
- test_stat_thread_enable();
-
- __T_END;
- return 0;
-}
diff --git a/tools/perf/lib/tests/test-threadmap.c b/tools/perf/lib/tests/test-threadmap.c
deleted file mode 100644
index 7dc4d6fbedde..000000000000
--- a/tools/perf/lib/tests/test-threadmap.c
+++ /dev/null
@@ -1,31 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-#include <stdarg.h>
-#include <stdio.h>
-#include <perf/threadmap.h>
-#include <internal/tests.h>
-
-static int libperf_print(enum libperf_print_level level,
- const char *fmt, va_list ap)
-{
- return vfprintf(stderr, fmt, ap);
-}
-
-int main(int argc, char **argv)
-{
- struct perf_thread_map *threads;
-
- __T_START;
-
- libperf_init(libperf_print);
-
- threads = perf_thread_map__new_dummy();
- if (!threads)
- return -1;
-
- perf_thread_map__get(threads);
- perf_thread_map__put(threads);
- perf_thread_map__put(threads);
-
- __T_END;
- return 0;
-}
diff --git a/tools/perf/lib/threadmap.c b/tools/perf/lib/threadmap.c
deleted file mode 100644
index e92c368b0a6c..000000000000
--- a/tools/perf/lib/threadmap.c
+++ /dev/null
@@ -1,91 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-#include <perf/threadmap.h>
-#include <stdlib.h>
-#include <linux/refcount.h>
-#include <internal/threadmap.h>
-#include <string.h>
-#include <asm/bug.h>
-#include <stdio.h>
-
-static void perf_thread_map__reset(struct perf_thread_map *map, int start, int nr)
-{
- size_t size = (nr - start) * sizeof(map->map[0]);
-
- memset(&map->map[start], 0, size);
- map->err_thread = -1;
-}
-
-struct perf_thread_map *perf_thread_map__realloc(struct perf_thread_map *map, int nr)
-{
- size_t size = sizeof(*map) + sizeof(map->map[0]) * nr;
- int start = map ? map->nr : 0;
-
- map = realloc(map, size);
- /*
- * We only realloc to add more items, let's reset new items.
- */
- if (map)
- perf_thread_map__reset(map, start, nr);
-
- return map;
-}
-
-#define thread_map__alloc(__nr) perf_thread_map__realloc(NULL, __nr)
-
-void perf_thread_map__set_pid(struct perf_thread_map *map, int thread, pid_t pid)
-{
- map->map[thread].pid = pid;
-}
-
-char *perf_thread_map__comm(struct perf_thread_map *map, int thread)
-{
- return map->map[thread].comm;
-}
-
-struct perf_thread_map *perf_thread_map__new_dummy(void)
-{
- struct perf_thread_map *threads = thread_map__alloc(1);
-
- if (threads != NULL) {
- perf_thread_map__set_pid(threads, 0, -1);
- threads->nr = 1;
- refcount_set(&threads->refcnt, 1);
- }
- return threads;
-}
-
-static void perf_thread_map__delete(struct perf_thread_map *threads)
-{
- if (threads) {
- int i;
-
- WARN_ONCE(refcount_read(&threads->refcnt) != 0,
- "thread map refcnt unbalanced\n");
- for (i = 0; i < threads->nr; i++)
- free(perf_thread_map__comm(threads, i));
- free(threads);
- }
-}
-
-struct perf_thread_map *perf_thread_map__get(struct perf_thread_map *map)
-{
- if (map)
- refcount_inc(&map->refcnt);
- return map;
-}
-
-void perf_thread_map__put(struct perf_thread_map *map)
-{
- if (map && refcount_dec_and_test(&map->refcnt))
- perf_thread_map__delete(map);
-}
-
-int perf_thread_map__nr(struct perf_thread_map *threads)
-{
- return threads ? threads->nr : 1;
-}
-
-pid_t perf_thread_map__pid(struct perf_thread_map *map, int thread)
-{
- return map->map[thread].pid;
-}
diff --git a/tools/perf/lib/xyarray.c b/tools/perf/lib/xyarray.c
deleted file mode 100644
index dcd901d154bb..000000000000
--- a/tools/perf/lib/xyarray.c
+++ /dev/null
@@ -1,33 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-#include <internal/xyarray.h>
-#include <linux/zalloc.h>
-#include <stdlib.h>
-#include <string.h>
-
-struct xyarray *xyarray__new(int xlen, int ylen, size_t entry_size)
-{
- size_t row_size = ylen * entry_size;
- struct xyarray *xy = zalloc(sizeof(*xy) + xlen * row_size);
-
- if (xy != NULL) {
- xy->entry_size = entry_size;
- xy->row_size = row_size;
- xy->entries = xlen * ylen;
- xy->max_x = xlen;
- xy->max_y = ylen;
- }
-
- return xy;
-}
-
-void xyarray__reset(struct xyarray *xy)
-{
- size_t n = xy->entries * xy->entry_size;
-
- memset(xy->contents, 0, n);
-}
-
-void xyarray__delete(struct xyarray *xy)
-{
- free(xy);
-}
diff --git a/tools/perf/pmu-events/arch/s390/cf_z15/crypto6.json b/tools/perf/pmu-events/arch/s390/cf_z15/crypto6.json
index 5e36bc2468d0..c998e4f1d1d2 100644
--- a/tools/perf/pmu-events/arch/s390/cf_z15/crypto6.json
+++ b/tools/perf/pmu-events/arch/s390/cf_z15/crypto6.json
@@ -4,27 +4,27 @@
"EventCode": "80",
"EventName": "ECC_FUNCTION_COUNT",
"BriefDescription": "ECC Function Count",
- "PublicDescription": "Long ECC function Count"
+ "PublicDescription": "This counter counts the total number of the elliptic-curve cryptography (ECC) functions issued by the CPU."
},
{
"Unit": "CPU-M-CF",
"EventCode": "81",
"EventName": "ECC_CYCLES_COUNT",
"BriefDescription": "ECC Cycles Count",
- "PublicDescription": "Long ECC Function cycles count"
+ "PublicDescription": "This counter counts the total number of CPU cycles when the ECC coprocessor is busy performing the elliptic-curve cryptography (ECC) functions issued by the CPU."
},
{
"Unit": "CPU-M-CF",
"EventCode": "82",
"EventName": "ECC_BLOCKED_FUNCTION_COUNT",
"BriefDescription": "Ecc Blocked Function Count",
- "PublicDescription": "Long ECC blocked function count"
+ "PublicDescription": "This counter counts the total number of the elliptic-curve cryptography (ECC) functions that are issued by the CPU and are blocked because the ECC coprocessor is busy performing a function issued by another CPU."
},
{
"Unit": "CPU-M-CF",
"EventCode": "83",
"EventName": "ECC_BLOCKED_CYCLES_COUNT",
"BriefDescription": "ECC Blocked Cycles Count",
- "PublicDescription": "Long ECC blocked cycles count"
+ "PublicDescription": "This counter counts the total number of CPU cycles blocked for the elliptic-curve cryptography (ECC) functions issued by the CPU because the ECC coprocessor is busy performing a function issued by another CPU."
},
]
diff --git a/tools/perf/pmu-events/arch/s390/cf_z15/extended.json b/tools/perf/pmu-events/arch/s390/cf_z15/extended.json
index 89e070727e1b..2df2e231e9ee 100644
--- a/tools/perf/pmu-events/arch/s390/cf_z15/extended.json
+++ b/tools/perf/pmu-events/arch/s390/cf_z15/extended.json
@@ -25,7 +25,7 @@
"EventCode": "131",
"EventName": "DTLB2_HPAGE_WRITES",
"BriefDescription": "DTLB2 One-Megabyte Page Writes",
- "PublicDescription": "A translation entry was written into the Combined Region and Segment Table Entry array in the Level-2 TLB for a one-megabyte page or a Last Host Translation was done"
+ "PublicDescription": "A translation entry was written into the Combined Region and Segment Table Entry array in the Level-2 TLB for a one-megabyte page"
},
{
"Unit": "CPU-M-CF",
@@ -358,6 +358,34 @@
},
{
"Unit": "CPU-M-CF",
+ "EventCode": "247",
+ "EventName": "DFLT_ACCESS",
+ "BriefDescription": "Cycles CPU spent obtaining access to Deflate unit",
+ "PublicDescription": "Cycles CPU spent obtaining access to Deflate unit"
+ },
+ {
+ "Unit": "CPU-M-CF",
+ "EventCode": "252",
+ "EventName": "DFLT_CYCLES",
+ "BriefDescription": "Cycles CPU is using Deflate unit",
+ "PublicDescription": "Cycles CPU is using Deflate unit"
+ },
+ {
+ "Unit": "CPU-M-CF",
+ "EventCode": "264",
+ "EventName": "DFLT_CC",
+ "BriefDescription": "Increments by one for every DEFLATE CONVERSION CALL instruction executed",
+ "PublicDescription": "Increments by one for every DEFLATE CONVERSION CALL instruction executed"
+ },
+ {
+ "Unit": "CPU-M-CF",
+ "EventCode": "265",
+ "EventName": "DFLT_CCERROR",
+ "BriefDescription": "Increments by one for every DEFLATE CONVERSION CALL instruction executed that ended in Condition Codes 0, 1 or 2",
+ "PublicDescription": "Increments by one for every DEFLATE CONVERSION CALL instruction executed that ended in Condition Codes 0, 1 or 2"
+ },
+ {
+ "Unit": "CPU-M-CF",
"EventCode": "448",
"EventName": "MT_DIAG_CYCLES_ONE_THR_ACTIVE",
"BriefDescription": "Cycle count with one thread active",
diff --git a/tools/perf/pmu-events/arch/x86/amdfam17h/branch.json b/tools/perf/pmu-events/arch/test/test_cpu/branch.json
index 93ddfd8053ca..93ddfd8053ca 100644
--- a/tools/perf/pmu-events/arch/x86/amdfam17h/branch.json
+++ b/tools/perf/pmu-events/arch/test/test_cpu/branch.json
diff --git a/tools/perf/pmu-events/arch/test/test_cpu/other.json b/tools/perf/pmu-events/arch/test/test_cpu/other.json
new file mode 100644
index 000000000000..7d53d7ecd723
--- /dev/null
+++ b/tools/perf/pmu-events/arch/test/test_cpu/other.json
@@ -0,0 +1,26 @@
+[
+ {
+ "EventCode": "0x6",
+ "Counter": "0,1",
+ "UMask": "0x80",
+ "EventName": "SEGMENT_REG_LOADS.ANY",
+ "SampleAfterValue": "200000",
+ "BriefDescription": "Number of segment register loads."
+ },
+ {
+ "EventCode": "0x9",
+ "Counter": "0,1",
+ "UMask": "0x20",
+ "EventName": "DISPATCH_BLOCKED.ANY",
+ "SampleAfterValue": "200000",
+ "BriefDescription": "Memory cluster signals to block micro-op dispatch for any reason"
+ },
+ {
+ "EventCode": "0x3A",
+ "Counter": "0,1",
+ "UMask": "0x0",
+ "EventName": "EIST_TRANS",
+ "SampleAfterValue": "200000",
+ "BriefDescription": "Number of Enhanced Intel SpeedStep(R) Technology (EIST) transitions"
+ }
+] \ No newline at end of file
diff --git a/tools/perf/pmu-events/arch/test/test_cpu/uncore.json b/tools/perf/pmu-events/arch/test/test_cpu/uncore.json
new file mode 100644
index 000000000000..d0a890cc814d
--- /dev/null
+++ b/tools/perf/pmu-events/arch/test/test_cpu/uncore.json
@@ -0,0 +1,21 @@
+[
+ {
+ "EventCode": "0x02",
+ "EventName": "uncore_hisi_ddrc.flux_wcmd",
+ "BriefDescription": "DDRC write commands",
+ "PublicDescription": "DDRC write commands",
+ "Unit": "hisi_sccl,ddrc"
+ },
+ {
+ "Unit": "CBO",
+ "EventCode": "0x22",
+ "UMask": "0x81",
+ "EventName": "UNC_CBO_XSNP_RESPONSE.MISS_EVICTION",
+ "BriefDescription": "A cross-core snoop resulted from L3 Eviction which misses in some processor core.",
+ "PublicDescription": "A cross-core snoop resulted from L3 Eviction which misses in some processor core.",
+ "Counter": "0,1",
+ "CounterMask": "0",
+ "Invert": "0",
+ "EdgeDetect": "0"
+ }
+]
diff --git a/tools/perf/pmu-events/arch/x86/amdfam17h/cache.json b/tools/perf/pmu-events/arch/x86/amdfam17h/cache.json
deleted file mode 100644
index 6221a840fcea..000000000000
--- a/tools/perf/pmu-events/arch/x86/amdfam17h/cache.json
+++ /dev/null
@@ -1,329 +0,0 @@
-[
- {
- "EventName": "ic_fw32",
- "EventCode": "0x80",
- "BriefDescription": "The number of 32B fetch windows transferred from IC pipe to DE instruction decoder (includes non-cacheable and cacheable fill responses)."
- },
- {
- "EventName": "ic_fw32_miss",
- "EventCode": "0x81",
- "BriefDescription": "The number of 32B fetch windows tried to read the L1 IC and missed in the full tag."
- },
- {
- "EventName": "ic_cache_fill_l2",
- "EventCode": "0x82",
- "BriefDescription": "The number of 64 byte instruction cache line was fulfilled from the L2 cache."
- },
- {
- "EventName": "ic_cache_fill_sys",
- "EventCode": "0x83",
- "BriefDescription": "The number of 64 byte instruction cache line fulfilled from system memory or another cache."
- },
- {
- "EventName": "bp_l1_tlb_miss_l2_hit",
- "EventCode": "0x84",
- "BriefDescription": "The number of instruction fetches that miss in the L1 ITLB but hit in the L2 ITLB."
- },
- {
- "EventName": "bp_l1_tlb_miss_l2_miss",
- "EventCode": "0x85",
- "BriefDescription": "The number of instruction fetches that miss in both the L1 and L2 TLBs."
- },
- {
- "EventName": "bp_snp_re_sync",
- "EventCode": "0x86",
- "BriefDescription": "The number of pipeline restarts caused by invalidating probes that hit on the instruction stream currently being executed. This would happen if the active instruction stream was being modified by another processor in an MP system - typically a highly unlikely event."
- },
- {
- "EventName": "ic_fetch_stall.ic_stall_any",
- "EventCode": "0x87",
- "BriefDescription": "IC pipe was stalled during this clock cycle for any reason (nothing valid in pipe ICM1).",
- "PublicDescription": "Instruction Pipe Stall. IC pipe was stalled during this clock cycle for any reason (nothing valid in pipe ICM1).",
- "UMask": "0x4"
- },
- {
- "EventName": "ic_fetch_stall.ic_stall_dq_empty",
- "EventCode": "0x87",
- "BriefDescription": "IC pipe was stalled during this clock cycle (including IC to OC fetches) due to DQ empty.",
- "PublicDescription": "Instruction Pipe Stall. IC pipe was stalled during this clock cycle (including IC to OC fetches) due to DQ empty.",
- "UMask": "0x2"
- },
- {
- "EventName": "ic_fetch_stall.ic_stall_back_pressure",
- "EventCode": "0x87",
- "BriefDescription": "IC pipe was stalled during this clock cycle (including IC to OC fetches) due to back-pressure.",
- "PublicDescription": "Instruction Pipe Stall. IC pipe was stalled during this clock cycle (including IC to OC fetches) due to back-pressure.",
- "UMask": "0x1"
- },
- {
- "EventName": "ic_cache_inval.l2_invalidating_probe",
- "EventCode": "0x8c",
- "BriefDescription": "IC line invalidated due to L2 invalidating probe (external or LS).",
- "PublicDescription": "The number of instruction cache lines invalidated. A non-SMC event is CMC (cross modifying code), either from the other thread of the core or another core. IC line invalidated due to L2 invalidating probe (external or LS).",
- "UMask": "0x2"
- },
- {
- "EventName": "ic_cache_inval.fill_invalidated",
- "EventCode": "0x8c",
- "BriefDescription": "IC line invalidated due to overwriting fill response.",
- "PublicDescription": "The number of instruction cache lines invalidated. A non-SMC event is CMC (cross modifying code), either from the other thread of the core or another core. IC line invalidated due to overwriting fill response.",
- "UMask": "0x1"
- },
- {
- "EventName": "bp_tlb_rel",
- "EventCode": "0x99",
- "BriefDescription": "The number of ITLB reload requests."
- },
- {
- "EventName": "l2_request_g1.rd_blk_l",
- "EventCode": "0x60",
- "BriefDescription": "Requests to L2 Group1.",
- "PublicDescription": "Requests to L2 Group1.",
- "UMask": "0x80"
- },
- {
- "EventName": "l2_request_g1.rd_blk_x",
- "EventCode": "0x60",
- "BriefDescription": "Requests to L2 Group1.",
- "PublicDescription": "Requests to L2 Group1.",
- "UMask": "0x40"
- },
- {
- "EventName": "l2_request_g1.ls_rd_blk_c_s",
- "EventCode": "0x60",
- "BriefDescription": "Requests to L2 Group1.",
- "PublicDescription": "Requests to L2 Group1.",
- "UMask": "0x20"
- },
- {
- "EventName": "l2_request_g1.cacheable_ic_read",
- "EventCode": "0x60",
- "BriefDescription": "Requests to L2 Group1.",
- "PublicDescription": "Requests to L2 Group1.",
- "UMask": "0x10"
- },
- {
- "EventName": "l2_request_g1.change_to_x",
- "EventCode": "0x60",
- "BriefDescription": "Requests to L2 Group1.",
- "PublicDescription": "Requests to L2 Group1.",
- "UMask": "0x8"
- },
- {
- "EventName": "l2_request_g1.prefetch_l2",
- "EventCode": "0x60",
- "BriefDescription": "Requests to L2 Group1.",
- "PublicDescription": "Requests to L2 Group1.",
- "UMask": "0x4"
- },
- {
- "EventName": "l2_request_g1.l2_hw_pf",
- "EventCode": "0x60",
- "BriefDescription": "Requests to L2 Group1.",
- "PublicDescription": "Requests to L2 Group1.",
- "UMask": "0x2"
- },
- {
- "EventName": "l2_request_g1.other_requests",
- "EventCode": "0x60",
- "BriefDescription": "Events covered by l2_request_g2.",
- "PublicDescription": "Requests to L2 Group1. Events covered by l2_request_g2.",
- "UMask": "0x1"
- },
- {
- "EventName": "l2_request_g2.group1",
- "EventCode": "0x61",
- "BriefDescription": "All Group 1 commands not in unit0.",
- "PublicDescription": "Multi-events in that LS and IF requests can be received simultaneous. All Group 1 commands not in unit0.",
- "UMask": "0x80"
- },
- {
- "EventName": "l2_request_g2.ls_rd_sized",
- "EventCode": "0x61",
- "BriefDescription": "RdSized, RdSized32, RdSized64.",
- "PublicDescription": "Multi-events in that LS and IF requests can be received simultaneous. RdSized, RdSized32, RdSized64.",
- "UMask": "0x40"
- },
- {
- "EventName": "l2_request_g2.ls_rd_sized_nc",
- "EventCode": "0x61",
- "BriefDescription": "RdSizedNC, RdSized32NC, RdSized64NC.",
- "PublicDescription": "Multi-events in that LS and IF requests can be received simultaneous. RdSizedNC, RdSized32NC, RdSized64NC.",
- "UMask": "0x20"
- },
- {
- "EventName": "l2_request_g2.ic_rd_sized",
- "EventCode": "0x61",
- "BriefDescription": "Multi-events in that LS and IF requests can be received simultaneous.",
- "PublicDescription": "Multi-events in that LS and IF requests can be received simultaneous.",
- "UMask": "0x10"
- },
- {
- "EventName": "l2_request_g2.ic_rd_sized_nc",
- "EventCode": "0x61",
- "BriefDescription": "Multi-events in that LS and IF requests can be received simultaneous.",
- "PublicDescription": "Multi-events in that LS and IF requests can be received simultaneous.",
- "UMask": "0x8"
- },
- {
- "EventName": "l2_request_g2.smc_inval",
- "EventCode": "0x61",
- "BriefDescription": "Multi-events in that LS and IF requests can be received simultaneous.",
- "PublicDescription": "Multi-events in that LS and IF requests can be received simultaneous.",
- "UMask": "0x4"
- },
- {
- "EventName": "l2_request_g2.bus_locks_originator",
- "EventCode": "0x61",
- "BriefDescription": "Multi-events in that LS and IF requests can be received simultaneous.",
- "PublicDescription": "Multi-events in that LS and IF requests can be received simultaneous.",
- "UMask": "0x2"
- },
- {
- "EventName": "l2_request_g2.bus_locks_responses",
- "EventCode": "0x61",
- "BriefDescription": "Multi-events in that LS and IF requests can be received simultaneous.",
- "PublicDescription": "Multi-events in that LS and IF requests can be received simultaneous.",
- "UMask": "0x1"
- },
- {
- "EventName": "l2_latency.l2_cycles_waiting_on_fills",
- "EventCode": "0x62",
- "BriefDescription": "Total cycles spent waiting for L2 fills to complete from L3 or memory, divided by four. Event counts are for both threads. To calculate average latency, the number of fills from both threads must be used.",
- "PublicDescription": "Total cycles spent waiting for L2 fills to complete from L3 or memory, divided by four. Event counts are for both threads. To calculate average latency, the number of fills from both threads must be used.",
- "UMask": "0x1"
- },
- {
- "EventName": "l2_wcb_req.wcb_write",
- "EventCode": "0x63",
- "PublicDescription": "LS (Load/Store unit) to L2 WCB (Write Combining Buffer) write requests.",
- "BriefDescription": "LS to L2 WCB write requests.",
- "UMask": "0x40"
- },
- {
- "EventName": "l2_wcb_req.wcb_close",
- "EventCode": "0x63",
- "BriefDescription": "LS to L2 WCB close requests.",
- "PublicDescription": "LS (Load/Store unit) to L2 WCB (Write Combining Buffer) close requests.",
- "UMask": "0x20"
- },
- {
- "EventName": "l2_wcb_req.zero_byte_store",
- "EventCode": "0x63",
- "BriefDescription": "LS to L2 WCB zero byte store requests.",
- "PublicDescription": "LS (Load/Store unit) to L2 WCB (Write Combining Buffer) zero byte store requests.",
- "UMask": "0x4"
- },
- {
- "EventName": "l2_wcb_req.cl_zero",
- "EventCode": "0x63",
- "PublicDescription": "LS to L2 WCB cache line zeroing requests.",
- "BriefDescription": "LS (Load/Store unit) to L2 WCB (Write Combining Buffer) cache line zeroing requests.",
- "UMask": "0x1"
- },
- {
- "EventName": "l2_cache_req_stat.ls_rd_blk_cs",
- "EventCode": "0x64",
- "BriefDescription": "LS ReadBlock C/S Hit.",
- "PublicDescription": "This event does not count accesses to the L2 cache by the L2 prefetcher, but it does count accesses by the L1 prefetcher. LS ReadBlock C/S Hit.",
- "UMask": "0x80"
- },
- {
- "EventName": "l2_cache_req_stat.ls_rd_blk_l_hit_x",
- "EventCode": "0x64",
- "BriefDescription": "LS Read Block L Hit X.",
- "PublicDescription": "This event does not count accesses to the L2 cache by the L2 prefetcher, but it does count accesses by the L1 prefetcher. LS Read Block L Hit X.",
- "UMask": "0x40"
- },
- {
- "EventName": "l2_cache_req_stat.ls_rd_blk_l_hit_s",
- "EventCode": "0x64",
- "BriefDescription": "LsRdBlkL Hit Shared.",
- "PublicDescription": "This event does not count accesses to the L2 cache by the L2 prefetcher, but it does count accesses by the L1 prefetcher. LsRdBlkL Hit Shared.",
- "UMask": "0x20"
- },
- {
- "EventName": "l2_cache_req_stat.ls_rd_blk_x",
- "EventCode": "0x64",
- "BriefDescription": "LsRdBlkX/ChgToX Hit X. Count RdBlkX finding Shared as a Miss.",
- "PublicDescription": "This event does not count accesses to the L2 cache by the L2 prefetcher, but it does count accesses by the L1 prefetcher. LsRdBlkX/ChgToX Hit X. Count RdBlkX finding Shared as a Miss.",
- "UMask": "0x10"
- },
- {
- "EventName": "l2_cache_req_stat.ls_rd_blk_c",
- "EventCode": "0x64",
- "BriefDescription": "LS Read Block C S L X Change to X Miss.",
- "PublicDescription": "This event does not count accesses to the L2 cache by the L2 prefetcher, but it does count accesses by the L1 prefetcher. LS Read Block C S L X Change to X Miss.",
- "UMask": "0x8"
- },
- {
- "EventName": "l2_cache_req_stat.ic_fill_hit_x",
- "EventCode": "0x64",
- "BriefDescription": "IC Fill Hit Exclusive Stale.",
- "PublicDescription": "This event does not count accesses to the L2 cache by the L2 prefetcher, but it does count accesses by the L1 prefetcher. IC Fill Hit Exclusive Stale.",
- "UMask": "0x4"
- },
- {
- "EventName": "l2_cache_req_stat.ic_fill_hit_s",
- "EventCode": "0x64",
- "BriefDescription": "IC Fill Hit Shared.",
- "PublicDescription": "This event does not count accesses to the L2 cache by the L2 prefetcher, but it does count accesses by the L1 prefetcher. IC Fill Hit Shared.",
- "UMask": "0x2"
- },
- {
- "EventName": "l2_cache_req_stat.ic_fill_miss",
- "EventCode": "0x64",
- "BriefDescription": "IC Fill Miss.",
- "PublicDescription": "This event does not count accesses to the L2 cache by the L2 prefetcher, but it does count accesses by the L1 prefetcher. IC Fill Miss.",
- "UMask": "0x1"
- },
- {
- "EventName": "l2_fill_pending.l2_fill_busy",
- "EventCode": "0x6d",
- "BriefDescription": "Total cycles spent with one or more fill requests in flight from L2.",
- "PublicDescription": "Total cycles spent with one or more fill requests in flight from L2.",
- "UMask": "0x1"
- },
- {
- "EventName": "l3_request_g1.caching_l3_cache_accesses",
- "EventCode": "0x01",
- "BriefDescription": "Caching: L3 cache accesses",
- "UMask": "0x80",
- "Unit": "L3PMC"
- },
- {
- "EventName": "l3_lookup_state.all_l3_req_typs",
- "EventCode": "0x04",
- "BriefDescription": "All L3 Request Types",
- "UMask": "0xff",
- "Unit": "L3PMC"
- },
- {
- "EventName": "l3_comb_clstr_state.other_l3_miss_typs",
- "EventCode": "0x06",
- "BriefDescription": "Other L3 Miss Request Types",
- "UMask": "0xfe",
- "Unit": "L3PMC"
- },
- {
- "EventName": "l3_comb_clstr_state.request_miss",
- "EventCode": "0x06",
- "BriefDescription": "L3 cache misses",
- "UMask": "0x01",
- "Unit": "L3PMC"
- },
- {
- "EventName": "xi_sys_fill_latency",
- "EventCode": "0x90",
- "BriefDescription": "L3 Cache Miss Latency. Total cycles for all transactions divided by 16. Ignores SliceMask and ThreadMask.",
- "UMask": "0x00",
- "Unit": "L3PMC"
- },
- {
- "EventName": "xi_ccx_sdp_req1.all_l3_miss_req_typs",
- "EventCode": "0x9a",
- "BriefDescription": "All L3 Miss Request Types. Ignores SliceMask and ThreadMask.",
- "UMask": "0x3f",
- "Unit": "L3PMC"
- }
-]
diff --git a/tools/perf/pmu-events/arch/x86/amdfam17h/other.json b/tools/perf/pmu-events/arch/x86/amdfam17h/other.json
deleted file mode 100644
index b26a00d05a2e..000000000000
--- a/tools/perf/pmu-events/arch/x86/amdfam17h/other.json
+++ /dev/null
@@ -1,65 +0,0 @@
-[
- {
- "EventName": "ic_oc_mode_switch.oc_ic_mode_switch",
- "EventCode": "0x28a",
- "BriefDescription": "OC to IC mode switch.",
- "PublicDescription": "OC Mode Switch. OC to IC mode switch.",
- "UMask": "0x2"
- },
- {
- "EventName": "ic_oc_mode_switch.ic_oc_mode_switch",
- "EventCode": "0x28a",
- "BriefDescription": "IC to OC mode switch.",
- "PublicDescription": "OC Mode Switch. IC to OC mode switch.",
- "UMask": "0x1"
- },
- {
- "EventName": "de_dis_dispatch_token_stalls0.retire_token_stall",
- "EventCode": "0xaf",
- "BriefDescription": "RETIRE Tokens unavailable.",
- "PublicDescription": "Cycles where a dispatch group is valid but does not get dispatched due to a token stall. RETIRE Tokens unavailable.",
- "UMask": "0x40"
- },
- {
- "EventName": "de_dis_dispatch_token_stalls0.agsq_token_stall",
- "EventCode": "0xaf",
- "BriefDescription": "AGSQ Tokens unavailable.",
- "PublicDescription": "Cycles where a dispatch group is valid but does not get dispatched due to a token stall. AGSQ Tokens unavailable.",
- "UMask": "0x20"
- },
- {
- "EventName": "de_dis_dispatch_token_stalls0.alu_token_stall",
- "EventCode": "0xaf",
- "BriefDescription": "ALU tokens total unavailable.",
- "PublicDescription": "Cycles where a dispatch group is valid but does not get dispatched due to a token stall. ALU tokens total unavailable.",
- "UMask": "0x10"
- },
- {
- "EventName": "de_dis_dispatch_token_stalls0.alsq3_0_token_stall",
- "EventCode": "0xaf",
- "BriefDescription": "Cycles where a dispatch group is valid but does not get dispatched due to a token stall.",
- "PublicDescription": "Cycles where a dispatch group is valid but does not get dispatched due to a token stall.",
- "UMask": "0x8"
- },
- {
- "EventName": "de_dis_dispatch_token_stalls0.alsq3_token_stall",
- "EventCode": "0xaf",
- "BriefDescription": "ALSQ 3 Tokens unavailable.",
- "PublicDescription": "Cycles where a dispatch group is valid but does not get dispatched due to a token stall. ALSQ 3 Tokens unavailable.",
- "UMask": "0x4"
- },
- {
- "EventName": "de_dis_dispatch_token_stalls0.alsq2_token_stall",
- "EventCode": "0xaf",
- "BriefDescription": "ALSQ 2 Tokens unavailable.",
- "PublicDescription": "Cycles where a dispatch group is valid but does not get dispatched due to a token stall. ALSQ 2 Tokens unavailable.",
- "UMask": "0x2"
- },
- {
- "EventName": "de_dis_dispatch_token_stalls0.alsq1_token_stall",
- "EventCode": "0xaf",
- "BriefDescription": "ALSQ 1 Tokens unavailable.",
- "PublicDescription": "Cycles where a dispatch group is valid but does not get dispatched due to a token stall. ALSQ 1 Tokens unavailable.",
- "UMask": "0x1"
- }
-]
diff --git a/tools/perf/pmu-events/arch/x86/amdzen1/branch.json b/tools/perf/pmu-events/arch/x86/amdzen1/branch.json
new file mode 100644
index 000000000000..a9943eeb8d6b
--- /dev/null
+++ b/tools/perf/pmu-events/arch/x86/amdzen1/branch.json
@@ -0,0 +1,23 @@
+[
+ {
+ "EventName": "bp_l1_btb_correct",
+ "EventCode": "0x8a",
+ "BriefDescription": "L1 BTB Correction."
+ },
+ {
+ "EventName": "bp_l2_btb_correct",
+ "EventCode": "0x8b",
+ "BriefDescription": "L2 BTB Correction."
+ },
+ {
+ "EventName": "bp_dyn_ind_pred",
+ "EventCode": "0x8e",
+ "BriefDescription": "Dynamic Indirect Predictions.",
+ "PublicDescription": "Indirect Branch Prediction for potential multi-target branch (speculative)."
+ },
+ {
+ "EventName": "bp_de_redirect",
+ "EventCode": "0x91",
+ "BriefDescription": "Decoder Overrides Existing Branch Prediction (speculative)."
+ }
+]
diff --git a/tools/perf/pmu-events/arch/x86/amdzen1/cache.json b/tools/perf/pmu-events/arch/x86/amdzen1/cache.json
new file mode 100644
index 000000000000..404d4c569c01
--- /dev/null
+++ b/tools/perf/pmu-events/arch/x86/amdzen1/cache.json
@@ -0,0 +1,294 @@
+[
+ {
+ "EventName": "ic_fw32",
+ "EventCode": "0x80",
+ "BriefDescription": "The number of 32B fetch windows transferred from IC pipe to DE instruction decoder (includes non-cacheable and cacheable fill responses)."
+ },
+ {
+ "EventName": "ic_fw32_miss",
+ "EventCode": "0x81",
+ "BriefDescription": "The number of 32B fetch windows tried to read the L1 IC and missed in the full tag."
+ },
+ {
+ "EventName": "ic_cache_fill_l2",
+ "EventCode": "0x82",
+ "BriefDescription": "The number of 64 byte instruction cache line was fulfilled from the L2 cache."
+ },
+ {
+ "EventName": "ic_cache_fill_sys",
+ "EventCode": "0x83",
+ "BriefDescription": "The number of 64 byte instruction cache line fulfilled from system memory or another cache."
+ },
+ {
+ "EventName": "bp_l1_tlb_miss_l2_hit",
+ "EventCode": "0x84",
+ "BriefDescription": "The number of instruction fetches that miss in the L1 ITLB but hit in the L2 ITLB."
+ },
+ {
+ "EventName": "bp_l1_tlb_miss_l2_miss",
+ "EventCode": "0x85",
+ "BriefDescription": "The number of instruction fetches that miss in both the L1 and L2 TLBs."
+ },
+ {
+ "EventName": "bp_snp_re_sync",
+ "EventCode": "0x86",
+ "BriefDescription": "The number of pipeline restarts caused by invalidating probes that hit on the instruction stream currently being executed. This would happen if the active instruction stream was being modified by another processor in an MP system - typically a highly unlikely event."
+ },
+ {
+ "EventName": "ic_fetch_stall.ic_stall_any",
+ "EventCode": "0x87",
+ "BriefDescription": "Instruction Pipe Stall. IC pipe was stalled during this clock cycle for any reason (nothing valid in pipe ICM1).",
+ "UMask": "0x4"
+ },
+ {
+ "EventName": "ic_fetch_stall.ic_stall_dq_empty",
+ "EventCode": "0x87",
+ "BriefDescription": "Instruction Pipe Stall. IC pipe was stalled during this clock cycle (including IC to OC fetches) due to DQ empty.",
+ "UMask": "0x2"
+ },
+ {
+ "EventName": "ic_fetch_stall.ic_stall_back_pressure",
+ "EventCode": "0x87",
+ "BriefDescription": "Instruction Pipe Stall. IC pipe was stalled during this clock cycle (including IC to OC fetches) due to back-pressure.",
+ "UMask": "0x1"
+ },
+ {
+ "EventName": "ic_cache_inval.l2_invalidating_probe",
+ "EventCode": "0x8c",
+ "BriefDescription": "IC line invalidated due to L2 invalidating probe (external or LS). The number of instruction cache lines invalidated. A non-SMC event is CMC (cross modifying code), either from the other thread of the core or another core.",
+ "UMask": "0x2"
+ },
+ {
+ "EventName": "ic_cache_inval.fill_invalidated",
+ "EventCode": "0x8c",
+ "BriefDescription": "IC line invalidated due to overwriting fill response. The number of instruction cache lines invalidated. A non-SMC event is CMC (cross modifying code), either from the other thread of the core or another core.",
+ "UMask": "0x1"
+ },
+ {
+ "EventName": "bp_tlb_rel",
+ "EventCode": "0x99",
+ "BriefDescription": "The number of ITLB reload requests."
+ },
+ {
+ "EventName": "l2_request_g1.rd_blk_l",
+ "EventCode": "0x60",
+ "BriefDescription": "All L2 Cache Requests (Breakdown 1 - Common). Data cache reads (including hardware and software prefetch).",
+ "UMask": "0x80"
+ },
+ {
+ "EventName": "l2_request_g1.rd_blk_x",
+ "EventCode": "0x60",
+ "BriefDescription": "All L2 Cache Requests (Breakdown 1 - Common). Data cache stores.",
+ "UMask": "0x40"
+ },
+ {
+ "EventName": "l2_request_g1.ls_rd_blk_c_s",
+ "EventCode": "0x60",
+ "BriefDescription": "All L2 Cache Requests (Breakdown 1 - Common). Data cache shared reads.",
+ "UMask": "0x20"
+ },
+ {
+ "EventName": "l2_request_g1.cacheable_ic_read",
+ "EventCode": "0x60",
+ "BriefDescription": "All L2 Cache Requests (Breakdown 1 - Common). Instruction cache reads.",
+ "UMask": "0x10"
+ },
+ {
+ "EventName": "l2_request_g1.change_to_x",
+ "EventCode": "0x60",
+ "BriefDescription": "All L2 Cache Requests (Breakdown 1 - Common). Data cache state change requests. Request change to writable, check L2 for current state.",
+ "UMask": "0x8"
+ },
+ {
+ "EventName": "l2_request_g1.prefetch_l2_cmd",
+ "EventCode": "0x60",
+ "BriefDescription": "All L2 Cache Requests (Breakdown 1 - Common). PrefetchL2Cmd.",
+ "UMask": "0x4"
+ },
+ {
+ "EventName": "l2_request_g1.l2_hw_pf",
+ "EventCode": "0x60",
+ "BriefDescription": "All L2 Cache Requests (Breakdown 1 - Common). L2 Prefetcher. All prefetches accepted by L2 pipeline, hit or miss. Types of PF and L2 hit/miss broken out in a separate perfmon event.",
+ "UMask": "0x2"
+ },
+ {
+ "EventName": "l2_request_g1.group2",
+ "EventCode": "0x60",
+ "BriefDescription": "Miscellaneous events covered in more detail by l2_request_g2 (PMCx061).",
+ "UMask": "0x1"
+ },
+ {
+ "EventName": "l2_request_g2.group1",
+ "EventCode": "0x61",
+ "BriefDescription": "Miscellaneous events covered in more detail by l2_request_g1 (PMCx060).",
+ "UMask": "0x80"
+ },
+ {
+ "EventName": "l2_request_g2.ls_rd_sized",
+ "EventCode": "0x61",
+ "BriefDescription": "All L2 Cache Requests (Breakdown 2 - Rare). Data cache read sized.",
+ "UMask": "0x40"
+ },
+ {
+ "EventName": "l2_request_g2.ls_rd_sized_nc",
+ "EventCode": "0x61",
+ "BriefDescription": "All L2 Cache Requests (Breakdown 2 - Rare). Data cache read sized non-cacheable.",
+ "UMask": "0x20"
+ },
+ {
+ "EventName": "l2_request_g2.ic_rd_sized",
+ "EventCode": "0x61",
+ "BriefDescription": "All L2 Cache Requests (Breakdown 2 - Rare). Instruction cache read sized.",
+ "UMask": "0x10"
+ },
+ {
+ "EventName": "l2_request_g2.ic_rd_sized_nc",
+ "EventCode": "0x61",
+ "BriefDescription": "All L2 Cache Requests (Breakdown 2 - Rare). Instruction cache read sized non-cacheable.",
+ "UMask": "0x8"
+ },
+ {
+ "EventName": "l2_request_g2.smc_inval",
+ "EventCode": "0x61",
+ "BriefDescription": "All L2 Cache Requests (Breakdown 2 - Rare). Self-modifying code invalidates.",
+ "UMask": "0x4"
+ },
+ {
+ "EventName": "l2_request_g2.bus_locks_originator",
+ "EventCode": "0x61",
+ "BriefDescription": "All L2 Cache Requests (Breakdown 2 - Rare). Bus locks.",
+ "UMask": "0x2"
+ },
+ {
+ "EventName": "l2_request_g2.bus_locks_responses",
+ "EventCode": "0x61",
+ "BriefDescription": "All L2 Cache Requests (Breakdown 2 - Rare). Bus lock response.",
+ "UMask": "0x1"
+ },
+ {
+ "EventName": "l2_latency.l2_cycles_waiting_on_fills",
+ "EventCode": "0x62",
+ "BriefDescription": "Total cycles spent waiting for L2 fills to complete from L3 or memory, divided by four. Event counts are for both threads. To calculate average latency, the number of fills from both threads must be used.",
+ "UMask": "0x1"
+ },
+ {
+ "EventName": "l2_wcb_req.wcb_write",
+ "EventCode": "0x63",
+ "BriefDescription": "LS to L2 WCB write requests. LS (Load/Store unit) to L2 WCB (Write Combining Buffer) write requests.",
+ "UMask": "0x40"
+ },
+ {
+ "EventName": "l2_wcb_req.wcb_close",
+ "EventCode": "0x63",
+ "BriefDescription": "LS to L2 WCB close requests. LS (Load/Store unit) to L2 WCB (Write Combining Buffer) close requests.",
+ "UMask": "0x20"
+ },
+ {
+ "EventName": "l2_wcb_req.zero_byte_store",
+ "EventCode": "0x63",
+ "BriefDescription": "LS to L2 WCB zero byte store requests. LS (Load/Store unit) to L2 WCB (Write Combining Buffer) zero byte store requests.",
+ "UMask": "0x4"
+ },
+ {
+ "EventName": "l2_wcb_req.cl_zero",
+ "EventCode": "0x63",
+ "BriefDescription": "LS to L2 WCB cache line zeroing requests. LS (Load/Store unit) to L2 WCB (Write Combining Buffer) cache line zeroing requests.",
+ "UMask": "0x1"
+ },
+ {
+ "EventName": "l2_cache_req_stat.ls_rd_blk_cs",
+ "EventCode": "0x64",
+ "BriefDescription": "Core to L2 cacheable request access status (not including L2 Prefetch). Data cache shared read hit in L2",
+ "UMask": "0x80"
+ },
+ {
+ "EventName": "l2_cache_req_stat.ls_rd_blk_l_hit_x",
+ "EventCode": "0x64",
+ "BriefDescription": "Core to L2 cacheable request access status (not including L2 Prefetch). Data cache read hit in L2.",
+ "UMask": "0x40"
+ },
+ {
+ "EventName": "l2_cache_req_stat.ls_rd_blk_l_hit_s",
+ "EventCode": "0x64",
+ "BriefDescription": "Core to L2 cacheable request access status (not including L2 Prefetch). Data cache read hit on shared line in L2.",
+ "UMask": "0x20"
+ },
+ {
+ "EventName": "l2_cache_req_stat.ls_rd_blk_x",
+ "EventCode": "0x64",
+ "BriefDescription": "Core to L2 cacheable request access status (not including L2 Prefetch). Data cache store or state change hit in L2.",
+ "UMask": "0x10"
+ },
+ {
+ "EventName": "l2_cache_req_stat.ls_rd_blk_c",
+ "EventCode": "0x64",
+ "BriefDescription": "Core to L2 cacheable request access status (not including L2 Prefetch). Data cache request miss in L2 (all types).",
+ "UMask": "0x8"
+ },
+ {
+ "EventName": "l2_cache_req_stat.ic_fill_hit_x",
+ "EventCode": "0x64",
+ "BriefDescription": "Core to L2 cacheable request access status (not including L2 Prefetch). Instruction cache hit modifiable line in L2.",
+ "UMask": "0x4"
+ },
+ {
+ "EventName": "l2_cache_req_stat.ic_fill_hit_s",
+ "EventCode": "0x64",
+ "BriefDescription": "Core to L2 cacheable request access status (not including L2 Prefetch). Instruction cache hit clean line in L2.",
+ "UMask": "0x2"
+ },
+ {
+ "EventName": "l2_cache_req_stat.ic_fill_miss",
+ "EventCode": "0x64",
+ "BriefDescription": "Core to L2 cacheable request access status (not including L2 Prefetch). Instruction cache request miss in L2.",
+ "UMask": "0x1"
+ },
+ {
+ "EventName": "l2_fill_pending.l2_fill_busy",
+ "EventCode": "0x6d",
+ "BriefDescription": "Cycles with fill pending from L2. Total cycles spent with one or more fill requests in flight from L2.",
+ "UMask": "0x1"
+ },
+ {
+ "EventName": "l3_request_g1.caching_l3_cache_accesses",
+ "EventCode": "0x01",
+ "BriefDescription": "Caching: L3 cache accesses",
+ "UMask": "0x80",
+ "Unit": "L3PMC"
+ },
+ {
+ "EventName": "l3_lookup_state.all_l3_req_typs",
+ "EventCode": "0x04",
+ "BriefDescription": "All L3 Request Types",
+ "UMask": "0xff",
+ "Unit": "L3PMC"
+ },
+ {
+ "EventName": "l3_comb_clstr_state.other_l3_miss_typs",
+ "EventCode": "0x06",
+ "BriefDescription": "Other L3 Miss Request Types",
+ "UMask": "0xfe",
+ "Unit": "L3PMC"
+ },
+ {
+ "EventName": "l3_comb_clstr_state.request_miss",
+ "EventCode": "0x06",
+ "BriefDescription": "L3 cache misses",
+ "UMask": "0x01",
+ "Unit": "L3PMC"
+ },
+ {
+ "EventName": "xi_sys_fill_latency",
+ "EventCode": "0x90",
+ "BriefDescription": "L3 Cache Miss Latency. Total cycles for all transactions divided by 16. Ignores SliceMask and ThreadMask.",
+ "UMask": "0x00",
+ "Unit": "L3PMC"
+ },
+ {
+ "EventName": "xi_ccx_sdp_req1.all_l3_miss_req_typs",
+ "EventCode": "0x9a",
+ "BriefDescription": "All L3 Miss Request Types. Ignores SliceMask and ThreadMask.",
+ "UMask": "0x3f",
+ "Unit": "L3PMC"
+ }
+]
diff --git a/tools/perf/pmu-events/arch/x86/amdfam17h/core.json b/tools/perf/pmu-events/arch/x86/amdzen1/core.json
index 1079544eeed5..7e1aa8273935 100644
--- a/tools/perf/pmu-events/arch/x86/amdfam17h/core.json
+++ b/tools/perf/pmu-events/arch/x86/amdzen1/core.json
@@ -62,7 +62,6 @@
"EventName": "ex_ret_brn_ind_misp",
"EventCode": "0xca",
"BriefDescription": "Retired Indirect Branch Instructions Mispredicted.",
- "PublicDescription": "Retired Indirect Branch Instructions Mispredicted."
},
{
"EventName": "ex_ret_mmx_fp_instr.sse_instr",
@@ -91,11 +90,6 @@
"BriefDescription": "Retired Conditional Branch Instructions."
},
{
- "EventName": "ex_ret_cond_misp",
- "EventCode": "0xd2",
- "BriefDescription": "Retired Conditional Branch Instructions Mispredicted."
- },
- {
"EventName": "ex_div_busy",
"EventCode": "0xd3",
"BriefDescription": "Div Cycles Busy count."
@@ -108,22 +102,19 @@
{
"EventName": "ex_tagged_ibs_ops.ibs_count_rollover",
"EventCode": "0x1cf",
- "BriefDescription": "Number of times an op could not be tagged by IBS because of a previous tagged op that has not retired.",
- "PublicDescription": "Tagged IBS Ops. Number of times an op could not be tagged by IBS because of a previous tagged op that has not retired.",
+ "BriefDescription": "Tagged IBS Ops. Number of times an op could not be tagged by IBS because of a previous tagged op that has not retired.",
"UMask": "0x4"
},
{
"EventName": "ex_tagged_ibs_ops.ibs_tagged_ops_ret",
"EventCode": "0x1cf",
- "BriefDescription": "Number of Ops tagged by IBS that retired.",
- "PublicDescription": "Tagged IBS Ops. Number of Ops tagged by IBS that retired.",
+ "BriefDescription": "Tagged IBS Ops. Number of Ops tagged by IBS that retired.",
"UMask": "0x2"
},
{
"EventName": "ex_tagged_ibs_ops.ibs_tagged_ops",
"EventCode": "0x1cf",
- "BriefDescription": "Number of Ops tagged by IBS.",
- "PublicDescription": "Tagged IBS Ops. Number of Ops tagged by IBS.",
+ "BriefDescription": "Tagged IBS Ops. Number of Ops tagged by IBS.",
"UMask": "0x1"
},
{
diff --git a/tools/perf/pmu-events/arch/x86/amdfam17h/floating-point.json b/tools/perf/pmu-events/arch/x86/amdzen1/floating-point.json
index ea4711983d1d..a35542bd3b36 100644
--- a/tools/perf/pmu-events/arch/x86/amdfam17h/floating-point.json
+++ b/tools/perf/pmu-events/arch/x86/amdzen1/floating-point.json
@@ -2,18 +2,74 @@
{
"EventName": "fpu_pipe_assignment.dual",
"EventCode": "0x00",
- "BriefDescription": "Total number multi-pipe uOps.",
- "PublicDescription": "The number of operations (uOps) and dual-pipe uOps dispatched to each of the 4 FPU execution pipelines. This event reflects how busy the FPU pipelines are and may be used for workload characterization. This includes all operations performed by x87, MMX, and SSE instructions, including moves. Each increment represents a one- cycle dispatch event. This event is a speculative event. Since this event includes non-numeric operations it is not suitable for measuring MFLOPS. Total number multi-pipe uOps assigned to Pipe 3.",
+ "BriefDescription": "Total number multi-pipe uOps assigned to all pipes.",
+ "PublicDescription": "The number of operations (uOps) and dual-pipe uOps dispatched to each of the 4 FPU execution pipelines. This event reflects how busy the FPU pipelines are and may be used for workload characterization. This includes all operations performed by x87, MMX, and SSE instructions, including moves. Each increment represents a one- cycle dispatch event. This event is a speculative event. Since this event includes non-numeric operations it is not suitable for measuring MFLOPS. Total number multi-pipe uOps assigned to all pipes.",
"UMask": "0xf0"
},
{
+ "EventName": "fpu_pipe_assignment.dual3",
+ "EventCode": "0x00",
+ "BriefDescription": "Total number multi-pipe uOps assigned to pipe 3.",
+ "PublicDescription": "The number of operations (uOps) and dual-pipe uOps dispatched to each of the 4 FPU execution pipelines. This event reflects how busy the FPU pipelines are and may be used for workload characterization. This includes all operations performed by x87, MMX, and SSE instructions, including moves. Each increment represents a one- cycle dispatch event. This event is a speculative event. Since this event includes non-numeric operations it is not suitable for measuring MFLOPS. Total number multi-pipe uOps assigned to pipe 3.",
+ "UMask": "0x80"
+ },
+ {
+ "EventName": "fpu_pipe_assignment.dual2",
+ "EventCode": "0x00",
+ "BriefDescription": "Total number multi-pipe uOps assigned to pipe 2.",
+ "PublicDescription": "The number of operations (uOps) and dual-pipe uOps dispatched to each of the 4 FPU execution pipelines. This event reflects how busy the FPU pipelines are and may be used for workload characterization. This includes all operations performed by x87, MMX, and SSE instructions, including moves. Each increment represents a one- cycle dispatch event. This event is a speculative event. Since this event includes non-numeric operations it is not suitable for measuring MFLOPS. Total number multi-pipe uOps assigned to pipe 2.",
+ "UMask": "0x40"
+ },
+ {
+ "EventName": "fpu_pipe_assignment.dual1",
+ "EventCode": "0x00",
+ "BriefDescription": "Total number multi-pipe uOps assigned to pipe 1.",
+ "PublicDescription": "The number of operations (uOps) and dual-pipe uOps dispatched to each of the 4 FPU execution pipelines. This event reflects how busy the FPU pipelines are and may be used for workload characterization. This includes all operations performed by x87, MMX, and SSE instructions, including moves. Each increment represents a one- cycle dispatch event. This event is a speculative event. Since this event includes non-numeric operations it is not suitable for measuring MFLOPS. Total number multi-pipe uOps assigned to pipe 1.",
+ "UMask": "0x20"
+ },
+ {
+ "EventName": "fpu_pipe_assignment.dual0",
+ "EventCode": "0x00",
+ "BriefDescription": "Total number multi-pipe uOps assigned to pipe 0.",
+ "PublicDescription": "The number of operations (uOps) and dual-pipe uOps dispatched to each of the 4 FPU execution pipelines. This event reflects how busy the FPU pipelines are and may be used for workload characterization. This includes all operations performed by x87, MMX, and SSE instructions, including moves. Each increment represents a one- cycle dispatch event. This event is a speculative event. Since this event includes non-numeric operations it is not suitable for measuring MFLOPS. Total number multi-pipe uOps assigned to pipe 0.",
+ "UMask": "0x10"
+ },
+ {
"EventName": "fpu_pipe_assignment.total",
"EventCode": "0x00",
- "BriefDescription": "Total number uOps.",
- "PublicDescription": "The number of operations (uOps) and dual-pipe uOps dispatched to each of the 4 FPU execution pipelines. This event reflects how busy the FPU pipelines are and may be used for workload characterization. This includes all operations performed by x87, MMX, and SSE instructions, including moves. Each increment represents a one- cycle dispatch event. This event is a speculative event. Since this event includes non-numeric operations it is not suitable for measuring MFLOPS. Total number uOps assigned to Pipe 3.",
+ "BriefDescription": "Total number uOps assigned to all fpu pipes.",
+ "PublicDescription": "The number of operations (uOps) and dual-pipe uOps dispatched to each of the 4 FPU execution pipelines. This event reflects how busy the FPU pipelines are and may be used for workload characterization. This includes all operations performed by x87, MMX, and SSE instructions, including moves. Each increment represents a one- cycle dispatch event. This event is a speculative event. Since this event includes non-numeric operations it is not suitable for measuring MFLOPS. Total number uOps assigned to all pipes.",
"UMask": "0xf"
},
{
+ "EventName": "fpu_pipe_assignment.total3",
+ "EventCode": "0x00",
+ "BriefDescription": "Total number of fp uOps on pipe 3.",
+ "PublicDescription": "The number of operations (uOps) dispatched to each of the 4 FPU execution pipelines. This event reflects how busy the FPU pipelines are and may be used for workload characterization. This includes all operations performed by x87, MMX, and SSE instructions, including moves. Each increment represents a one-cycle dispatch event. This event is a speculative event. Since this event includes non-numeric operations it is not suitable for measuring MFLOPS. Total number uOps assigned to pipe 3.",
+ "UMask": "0x8"
+ },
+ {
+ "EventName": "fpu_pipe_assignment.total2",
+ "EventCode": "0x00",
+ "BriefDescription": "Total number of fp uOps on pipe 2.",
+ "PublicDescription": "The number of operations (uOps) dispatched to each of the 4 FPU execution pipelines. This event reflects how busy the FPU pipelines are and may be used for workload characterization. This includes all operations performed by x87, MMX, and SSE instructions, including moves. Each increment represents a one- cycle dispatch event. This event is a speculative event. Since this event includes non-numeric operations it is not suitable for measuring MFLOPS. Total number uOps assigned to pipe 2.",
+ "UMask": "0x4"
+ },
+ {
+ "EventName": "fpu_pipe_assignment.total1",
+ "EventCode": "0x00",
+ "BriefDescription": "Total number of fp uOps on pipe 1.",
+ "PublicDescription": "The number of operations (uOps) dispatched to each of the 4 FPU execution pipelines. This event reflects how busy the FPU pipelines are and may be used for workload characterization. This includes all operations performed by x87, MMX, and SSE instructions, including moves. Each increment represents a one- cycle dispatch event. This event is a speculative event. Since this event includes non-numeric operations it is not suitable for measuring MFLOPS. Total number uOps assigned to pipe 1.",
+ "UMask": "0x2"
+ },
+ {
+ "EventName": "fpu_pipe_assignment.total0",
+ "EventCode": "0x00",
+ "BriefDescription": "Total number of fp uOps on pipe 0.",
+ "PublicDescription": "The number of operations (uOps) dispatched to each of the 4 FPU execution pipelines. This event reflects how busy the FPU pipelines are and may be used for workload characterization. This includes all operations performed by x87, MMX, and SSE instructions, including moves. Each increment represents a one- cycle dispatch event. This event is a speculative event. Since this event includes non-numeric operations it is not suitable for measuring MFLOPS. Total number uOps assigned to pipe 0.",
+ "UMask": "0x1"
+ },
+ {
"EventName": "fp_sched_empty",
"EventCode": "0x01",
"BriefDescription": "This is a speculative event. The number of cycles in which the FPU scheduler is empty. Note that some Ops like FP loads bypass the scheduler."
diff --git a/tools/perf/pmu-events/arch/x86/amdfam17h/memory.json b/tools/perf/pmu-events/arch/x86/amdzen1/memory.json
index fa2d60d4def0..b33a3c308019 100644
--- a/tools/perf/pmu-events/arch/x86/amdfam17h/memory.json
+++ b/tools/perf/pmu-events/arch/x86/amdzen1/memory.json
@@ -3,28 +3,24 @@
"EventName": "ls_locks.bus_lock",
"EventCode": "0x25",
"BriefDescription": "Bus lock when a locked operations crosses a cache boundary or is done on an uncacheable memory type.",
- "PublicDescription": "Bus lock when a locked operations crosses a cache boundary or is done on an uncacheable memory type.",
"UMask": "0x1"
},
{
"EventName": "ls_dispatch.ld_st_dispatch",
"EventCode": "0x29",
- "BriefDescription": "Load-op-Stores.",
- "PublicDescription": "Counts the number of operations dispatched to the LS unit. Unit Masks ADDed. Load-op-Stores.",
+ "BriefDescription": "Counts the number of operations dispatched to the LS unit. Unit Masks ADDed. Load-op-Stores.",
"UMask": "0x4"
},
{
"EventName": "ls_dispatch.store_dispatch",
"EventCode": "0x29",
- "BriefDescription": "Counts the number of operations dispatched to the LS unit. Unit Masks ADDed.",
- "PublicDescription": "Counts the number of operations dispatched to the LS unit. Unit Masks ADDed.",
+ "BriefDescription": "Counts the number of stores dispatched to the LS unit. Unit Masks ADDed.",
"UMask": "0x2"
},
{
"EventName": "ls_dispatch.ld_dispatch",
"EventCode": "0x29",
- "BriefDescription": "Counts the number of operations dispatched to the LS unit. Unit Masks ADDed.",
- "PublicDescription": "Counts the number of operations dispatched to the LS unit. Unit Masks ADDed.",
+ "BriefDescription": "Counts the number of loads dispatched to the LS unit. Unit Masks ADDed.",
"UMask": "0x1"
},
{
@@ -38,83 +34,114 @@
"BriefDescription": "The number of accesses to the data cache for load and store references. This may include certain microcode scratchpad accesses, although these are generally rare. Each increment represents an eight-byte access, although the instruction may only be accessing a portion of that. This event is a speculative event."
},
{
+ "EventName": "ls_mab_alloc.dc_prefetcher",
+ "EventCode": "0x41",
+ "BriefDescription": "LS MAB allocates by type - DC prefetcher.",
+ "UMask": "0x8"
+ },
+ {
+ "EventName": "ls_mab_alloc.stores",
+ "EventCode": "0x41",
+ "BriefDescription": "LS MAB allocates by type - stores.",
+ "UMask": "0x2"
+ },
+ {
+ "EventName": "ls_mab_alloc.loads",
+ "EventCode": "0x41",
+ "BriefDescription": "LS MAB allocates by type - loads.",
+ "UMask": "0x01"
+ },
+ {
"EventName": "ls_l1_d_tlb_miss.all",
"EventCode": "0x45",
"BriefDescription": "L1 DTLB Miss or Reload off all sizes.",
- "PublicDescription": "L1 DTLB Miss or Reload off all sizes.",
"UMask": "0xff"
},
{
"EventName": "ls_l1_d_tlb_miss.tlb_reload_1g_l2_miss",
"EventCode": "0x45",
"BriefDescription": "L1 DTLB Miss of a page of 1G size.",
- "PublicDescription": "L1 DTLB Miss of a page of 1G size.",
"UMask": "0x80"
},
{
"EventName": "ls_l1_d_tlb_miss.tlb_reload_2m_l2_miss",
"EventCode": "0x45",
"BriefDescription": "L1 DTLB Miss of a page of 2M size.",
- "PublicDescription": "L1 DTLB Miss of a page of 2M size.",
"UMask": "0x40"
},
{
"EventName": "ls_l1_d_tlb_miss.tlb_reload_32k_l2_miss",
"EventCode": "0x45",
"BriefDescription": "L1 DTLB Miss of a page of 32K size.",
- "PublicDescription": "L1 DTLB Miss of a page of 32K size.",
"UMask": "0x20"
},
{
"EventName": "ls_l1_d_tlb_miss.tlb_reload_4k_l2_miss",
"EventCode": "0x45",
"BriefDescription": "L1 DTLB Miss of a page of 4K size.",
- "PublicDescription": "L1 DTLB Miss of a page of 4K size.",
"UMask": "0x10"
},
{
"EventName": "ls_l1_d_tlb_miss.tlb_reload_1g_l2_hit",
"EventCode": "0x45",
"BriefDescription": "L1 DTLB Reload of a page of 1G size.",
- "PublicDescription": "L1 DTLB Reload of a page of 1G size.",
"UMask": "0x8"
},
{
"EventName": "ls_l1_d_tlb_miss.tlb_reload_2m_l2_hit",
"EventCode": "0x45",
"BriefDescription": "L1 DTLB Reload of a page of 2M size.",
- "PublicDescription": "L1 DTLB Reload of a page of 2M size.",
"UMask": "0x4"
},
{
"EventName": "ls_l1_d_tlb_miss.tlb_reload_32k_l2_hit",
"EventCode": "0x45",
"BriefDescription": "L1 DTLB Reload of a page of 32K size.",
- "PublicDescription": "L1 DTLB Reload of a page of 32K size.",
"UMask": "0x2"
},
{
"EventName": "ls_l1_d_tlb_miss.tlb_reload_4k_l2_hit",
"EventCode": "0x45",
"BriefDescription": "L1 DTLB Reload of a page of 4K size.",
- "PublicDescription": "L1 DTLB Reload of a page of 4K size.",
"UMask": "0x1"
},
{
- "EventName": "ls_tablewalker.perf_mon_tablewalk_alloc_iside",
+ "EventName": "ls_tablewalker.iside",
"EventCode": "0x46",
- "BriefDescription": "Tablewalker allocation.",
- "PublicDescription": "Tablewalker allocation.",
+ "BriefDescription": "Total Page Table Walks on I-side.",
"UMask": "0xc"
},
{
- "EventName": "ls_tablewalker.perf_mon_tablewalk_alloc_dside",
+ "EventName": "ls_tablewalker.ic_type1",
+ "EventCode": "0x46",
+ "BriefDescription": "Total Page Table Walks IC Type 1.",
+ "UMask": "0x8"
+ },
+ {
+ "EventName": "ls_tablewalker.ic_type0",
"EventCode": "0x46",
- "BriefDescription": "Tablewalker allocation.",
- "PublicDescription": "Tablewalker allocation.",
+ "BriefDescription": "Total Page Table Walks IC Type 0.",
+ "UMask": "0x4"
+ },
+ {
+ "EventName": "ls_tablewalker.dside",
+ "EventCode": "0x46",
+ "BriefDescription": "Total Page Table Walks on D-side.",
"UMask": "0x3"
},
{
+ "EventName": "ls_tablewalker.dc_type1",
+ "EventCode": "0x46",
+ "BriefDescription": "Total Page Table Walks DC Type 1.",
+ "UMask": "0x2"
+ },
+ {
+ "EventName": "ls_tablewalker.dc_type0",
+ "EventCode": "0x46",
+ "BriefDescription": "Total Page Table Walks DC Type 0.",
+ "UMask": "0x1"
+ },
+ {
"EventName": "ls_misal_accesses",
"EventCode": "0x47",
"BriefDescription": "Misaligned loads."
@@ -123,35 +150,30 @@
"EventName": "ls_pref_instr_disp.prefetch_nta",
"EventCode": "0x4b",
"BriefDescription": "Software Prefetch Instructions (PREFETCHNTA instruction) Dispatched.",
- "PublicDescription": "Software Prefetch Instructions (PREFETCHNTA instruction) Dispatched.",
"UMask": "0x4"
},
{
"EventName": "ls_pref_instr_disp.store_prefetch_w",
"EventCode": "0x4b",
"BriefDescription": "Software Prefetch Instructions (3DNow PREFETCHW instruction) Dispatched.",
- "PublicDescription": "Software Prefetch Instructions (3DNow PREFETCHW instruction) Dispatched.",
"UMask": "0x2"
},
{
"EventName": "ls_pref_instr_disp.load_prefetch_w",
"EventCode": "0x4b",
- "BriefDescription": "Prefetch, Prefetch_T0_T1_T2.",
- "PublicDescription": "Software Prefetch Instructions Dispatched. Prefetch, Prefetch_T0_T1_T2.",
+ "BriefDescription": "Software Prefetch Instructions Dispatched. Prefetch, Prefetch_T0_T1_T2.",
"UMask": "0x1"
},
{
"EventName": "ls_inef_sw_pref.mab_mch_cnt",
"EventCode": "0x52",
- "BriefDescription": "The number of software prefetches that did not fetch data outside of the processor core.",
- "PublicDescription": "The number of software prefetches that did not fetch data outside of the processor core.",
+ "BriefDescription": "The number of software prefetches that did not fetch data outside of the processor core. Software PREFETCH instruction saw a match on an already-allocated miss request buffer.",
"UMask": "0x2"
},
{
"EventName": "ls_inef_sw_pref.data_pipe_sw_pf_dc_hit",
"EventCode": "0x52",
- "BriefDescription": "The number of software prefetches that did not fetch data outside of the processor core.",
- "PublicDescription": "The number of software prefetches that did not fetch data outside of the processor core.",
+ "BriefDescription": "The number of software prefetches that did not fetch data outside of the processor core. Software PREFETCH instruction saw a DC hit.",
"UMask": "0x1"
},
{
diff --git a/tools/perf/pmu-events/arch/x86/amdzen1/other.json b/tools/perf/pmu-events/arch/x86/amdzen1/other.json
new file mode 100644
index 000000000000..ff780098d36e
--- /dev/null
+++ b/tools/perf/pmu-events/arch/x86/amdzen1/other.json
@@ -0,0 +1,56 @@
+[
+ {
+ "EventName": "ic_oc_mode_switch.oc_ic_mode_switch",
+ "EventCode": "0x28a",
+ "BriefDescription": "OC Mode Switch. OC to IC mode switch.",
+ "UMask": "0x2"
+ },
+ {
+ "EventName": "ic_oc_mode_switch.ic_oc_mode_switch",
+ "EventCode": "0x28a",
+ "BriefDescription": "OC Mode Switch. IC to OC mode switch.",
+ "UMask": "0x1"
+ },
+ {
+ "EventName": "de_dis_dispatch_token_stalls0.retire_token_stall",
+ "EventCode": "0xaf",
+ "BriefDescription": "Cycles where a dispatch group is valid but does not get dispatched due to a token stall. RETIRE Tokens unavailable.",
+ "UMask": "0x40"
+ },
+ {
+ "EventName": "de_dis_dispatch_token_stalls0.agsq_token_stall",
+ "EventCode": "0xaf",
+ "BriefDescription": "Cycles where a dispatch group is valid but does not get dispatched due to a token stall. AGSQ Tokens unavailable.",
+ "UMask": "0x20"
+ },
+ {
+ "EventName": "de_dis_dispatch_token_stalls0.alu_token_stall",
+ "EventCode": "0xaf",
+ "BriefDescription": "Cycles where a dispatch group is valid but does not get dispatched due to a token stall. ALU tokens total unavailable.",
+ "UMask": "0x10"
+ },
+ {
+ "EventName": "de_dis_dispatch_token_stalls0.alsq3_0_token_stall",
+ "EventCode": "0xaf",
+ "BriefDescription": "Cycles where a dispatch group is valid but does not get dispatched due to a token stall. ALSQ 3_0 Tokens unavailable.",
+ "UMask": "0x8"
+ },
+ {
+ "EventName": "de_dis_dispatch_token_stalls0.alsq3_token_stall",
+ "EventCode": "0xaf",
+ "BriefDescription": "Cycles where a dispatch group is valid but does not get dispatched due to a token stall. ALSQ 3 Tokens unavailable.",
+ "UMask": "0x4"
+ },
+ {
+ "EventName": "de_dis_dispatch_token_stalls0.alsq2_token_stall",
+ "EventCode": "0xaf",
+ "BriefDescription": "Cycles where a dispatch group is valid but does not get dispatched due to a token stall. ALSQ 2 Tokens unavailable.",
+ "UMask": "0x2"
+ },
+ {
+ "EventName": "de_dis_dispatch_token_stalls0.alsq1_token_stall",
+ "EventCode": "0xaf",
+ "BriefDescription": "Cycles where a dispatch group is valid but does not get dispatched due to a token stall. ALSQ 1 Tokens unavailable.",
+ "UMask": "0x1"
+ }
+]
diff --git a/tools/perf/pmu-events/arch/x86/amdzen2/branch.json b/tools/perf/pmu-events/arch/x86/amdzen2/branch.json
new file mode 100644
index 000000000000..ef4166a66288
--- /dev/null
+++ b/tools/perf/pmu-events/arch/x86/amdzen2/branch.json
@@ -0,0 +1,52 @@
+[
+ {
+ "EventName": "bp_l1_btb_correct",
+ "EventCode": "0x8a",
+ "BriefDescription": "L1 Branch Prediction Overrides Existing Prediction (speculative)."
+ },
+ {
+ "EventName": "bp_l2_btb_correct",
+ "EventCode": "0x8b",
+ "BriefDescription": "L2 Branch Prediction Overrides Existing Prediction (speculative)."
+ },
+ {
+ "EventName": "bp_dyn_ind_pred",
+ "EventCode": "0x8e",
+ "BriefDescription": "Dynamic Indirect Predictions.",
+ "PublicDescription": "Indirect Branch Prediction for potential multi-target branch (speculative)."
+ },
+ {
+ "EventName": "bp_de_redirect",
+ "EventCode": "0x91",
+ "BriefDescription": "Decoder Overrides Existing Branch Prediction (speculative)."
+ },
+ {
+ "EventName": "bp_l1_tlb_fetch_hit",
+ "EventCode": "0x94",
+ "BriefDescription": "The number of instruction fetches that hit in the L1 ITLB.",
+ "UMask": "0xFF"
+ },
+ {
+ "EventName": "bp_l1_tlb_fetch_hit.if1g",
+ "EventCode": "0x94",
+ "BriefDescription": "The number of instruction fetches that hit in the L1 ITLB. Instruction fetches to a 1GB page.",
+ "UMask": "0x4"
+ },
+ {
+ "EventName": "bp_l1_tlb_fetch_hit.if2m",
+ "EventCode": "0x94",
+ "BriefDescription": "The number of instruction fetches that hit in the L1 ITLB. Instruction fetches to a 2MB page.",
+ "UMask": "0x2"
+ },
+ {
+ "EventName": "bp_l1_tlb_fetch_hit.if4k",
+ "EventCode": "0x94",
+ "BriefDescription": "The number of instruction fetches that hit in the L1 ITLB. Instruction fetches to a 4KB page.",
+ "UMask": "0x1"
+ },
+ {
+ "EventName": "bp_tlb_rel",
+ "EventCode": "0x99",
+ "BriefDescription": "The number of ITLB reload requests."
+ }
+]
diff --git a/tools/perf/pmu-events/arch/x86/amdzen2/cache.json b/tools/perf/pmu-events/arch/x86/amdzen2/cache.json
new file mode 100644
index 000000000000..1c60bfa0f00b
--- /dev/null
+++ b/tools/perf/pmu-events/arch/x86/amdzen2/cache.json
@@ -0,0 +1,338 @@
+[
+ {
+ "EventName": "l2_request_g1.rd_blk_l",
+ "EventCode": "0x60",
+ "BriefDescription": "All L2 Cache Requests (Breakdown 1 - Common). Data cache reads (including hardware and software prefetch).",
+ "UMask": "0x80"
+ },
+ {
+ "EventName": "l2_request_g1.rd_blk_x",
+ "EventCode": "0x60",
+ "BriefDescription": "All L2 Cache Requests (Breakdown 1 - Common). Data cache stores.",
+ "UMask": "0x40"
+ },
+ {
+ "EventName": "l2_request_g1.ls_rd_blk_c_s",
+ "EventCode": "0x60",
+ "BriefDescription": "All L2 Cache Requests (Breakdown 1 - Common). Data cache shared reads.",
+ "UMask": "0x20"
+ },
+ {
+ "EventName": "l2_request_g1.cacheable_ic_read",
+ "EventCode": "0x60",
+ "BriefDescription": "All L2 Cache Requests (Breakdown 1 - Common). Instruction cache reads.",
+ "UMask": "0x10"
+ },
+ {
+ "EventName": "l2_request_g1.change_to_x",
+ "EventCode": "0x60",
+ "BriefDescription": "All L2 Cache Requests (Breakdown 1 - Common). Data cache state change requests. Request change to writable, check L2 for current state.",
+ "UMask": "0x8"
+ },
+ {
+ "EventName": "l2_request_g1.prefetch_l2_cmd",
+ "EventCode": "0x60",
+ "BriefDescription": "All L2 Cache Requests (Breakdown 1 - Common). PrefetchL2Cmd.",
+ "UMask": "0x4"
+ },
+ {
+ "EventName": "l2_request_g1.l2_hw_pf",
+ "EventCode": "0x60",
+ "BriefDescription": "All L2 Cache Requests (Breakdown 1 - Common). L2 Prefetcher. All prefetches accepted by L2 pipeline, hit or miss. Types of PF and L2 hit/miss broken out in a separate perfmon event.",
+ "UMask": "0x2"
+ },
+ {
+ "EventName": "l2_request_g1.group2",
+ "EventCode": "0x60",
+ "BriefDescription": "Miscellaneous events covered in more detail by l2_request_g2 (PMCx061).",
+ "UMask": "0x1"
+ },
+ {
+ "EventName": "l2_request_g2.group1",
+ "EventCode": "0x61",
+ "BriefDescription": "Miscellaneous events covered in more detail by l2_request_g1 (PMCx060).",
+ "UMask": "0x80"
+ },
+ {
+ "EventName": "l2_request_g2.ls_rd_sized",
+ "EventCode": "0x61",
+ "BriefDescription": "All L2 Cache Requests (Breakdown 2 - Rare). Data cache read sized.",
+ "UMask": "0x40"
+ },
+ {
+ "EventName": "l2_request_g2.ls_rd_sized_nc",
+ "EventCode": "0x61",
+ "BriefDescription": "All L2 Cache Requests (Breakdown 2 - Rare). Data cache read sized non-cacheable.",
+ "UMask": "0x20"
+ },
+ {
+ "EventName": "l2_request_g2.ic_rd_sized",
+ "EventCode": "0x61",
+ "BriefDescription": "All L2 Cache Requests (Breakdown 2 - Rare). Instruction cache read sized.",
+ "UMask": "0x10"
+ },
+ {
+ "EventName": "l2_request_g2.ic_rd_sized_nc",
+ "EventCode": "0x61",
+ "BriefDescription": "All L2 Cache Requests (Breakdown 2 - Rare). Instruction cache read sized non-cacheable.",
+ "UMask": "0x8"
+ },
+ {
+ "EventName": "l2_request_g2.smc_inval",
+ "EventCode": "0x61",
+ "BriefDescription": "All L2 Cache Requests (Breakdown 2 - Rare). Self-modifying code invalidates.",
+ "UMask": "0x4"
+ },
+ {
+ "EventName": "l2_request_g2.bus_locks_originator",
+ "EventCode": "0x61",
+ "BriefDescription": "All L2 Cache Requests (Breakdown 2 - Rare). Bus locks.",
+ "UMask": "0x2"
+ },
+ {
+ "EventName": "l2_request_g2.bus_locks_responses",
+ "EventCode": "0x61",
+ "BriefDescription": "All L2 Cache Requests (Breakdown 2 - Rare). Bus lock response.",
+ "UMask": "0x1"
+ },
+ {
+ "EventName": "l2_latency.l2_cycles_waiting_on_fills",
+ "EventCode": "0x62",
+ "BriefDescription": "Total cycles spent waiting for L2 fills to complete from L3 or memory, divided by four. Event counts are for both threads. To calculate average latency, the number of fills from both threads must be used.",
+ "UMask": "0x1"
+ },
+ {
+ "EventName": "l2_wcb_req.wcb_write",
+ "EventCode": "0x63",
+ "BriefDescription": "LS to L2 WCB write requests. LS (Load/Store unit) to L2 WCB (Write Combining Buffer) write requests.",
+ "UMask": "0x40"
+ },
+ {
+ "EventName": "l2_wcb_req.wcb_close",
+ "EventCode": "0x63",
+ "BriefDescription": "LS to L2 WCB close requests. LS (Load/Store unit) to L2 WCB (Write Combining Buffer) close requests.",
+ "UMask": "0x20"
+ },
+ {
+ "EventName": "l2_wcb_req.zero_byte_store",
+ "EventCode": "0x63",
+ "BriefDescription": "LS to L2 WCB zero byte store requests. LS (Load/Store unit) to L2 WCB (Write Combining Buffer) zero byte store requests.",
+ "UMask": "0x4"
+ },
+ {
+ "EventName": "l2_wcb_req.cl_zero",
+ "EventCode": "0x63",
+ "BriefDescription": "LS to L2 WCB cache line zeroing requests. LS (Load/Store unit) to L2 WCB (Write Combining Buffer) cache line zeroing requests.",
+ "UMask": "0x1"
+ },
+ {
+ "EventName": "l2_cache_req_stat.ls_rd_blk_cs",
+ "EventCode": "0x64",
+ "BriefDescription": "Core to L2 cacheable request access status (not including L2 Prefetch). Data cache shared read hit in L2",
+ "UMask": "0x80"
+ },
+ {
+ "EventName": "l2_cache_req_stat.ls_rd_blk_l_hit_x",
+ "EventCode": "0x64",
+ "BriefDescription": "Core to L2 cacheable request access status (not including L2 Prefetch). Data cache read hit in L2.",
+ "UMask": "0x40"
+ },
+ {
+ "EventName": "l2_cache_req_stat.ls_rd_blk_l_hit_s",
+ "EventCode": "0x64",
+ "BriefDescription": "Core to L2 cacheable request access status (not including L2 Prefetch). Data cache read hit on shared line in L2.",
+ "UMask": "0x20"
+ },
+ {
+ "EventName": "l2_cache_req_stat.ls_rd_blk_x",
+ "EventCode": "0x64",
+ "BriefDescription": "Core to L2 cacheable request access status (not including L2 Prefetch). Data cache store or state change hit in L2.",
+ "UMask": "0x10"
+ },
+ {
+ "EventName": "l2_cache_req_stat.ls_rd_blk_c",
+ "EventCode": "0x64",
+ "BriefDescription": "Core to L2 cacheable request access status (not including L2 Prefetch). Data cache request miss in L2 (all types).",
+ "UMask": "0x8"
+ },
+ {
+ "EventName": "l2_cache_req_stat.ic_fill_hit_x",
+ "EventCode": "0x64",
+ "BriefDescription": "Core to L2 cacheable request access status (not including L2 Prefetch). Instruction cache hit modifiable line in L2.",
+ "UMask": "0x4"
+ },
+ {
+ "EventName": "l2_cache_req_stat.ic_fill_hit_s",
+ "EventCode": "0x64",
+ "BriefDescription": "Core to L2 cacheable request access status (not including L2 Prefetch). Instruction cache hit clean line in L2.",
+ "UMask": "0x2"
+ },
+ {
+ "EventName": "l2_cache_req_stat.ic_fill_miss",
+ "EventCode": "0x64",
+ "BriefDescription": "Core to L2 cacheable request access status (not including L2 Prefetch). Instruction cache request miss in L2.",
+ "UMask": "0x1"
+ },
+ {
+ "EventName": "l2_fill_pending.l2_fill_busy",
+ "EventCode": "0x6d",
+ "BriefDescription": "Cycles with fill pending from L2. Total cycles spent with one or more fill requests in flight from L2.",
+ "UMask": "0x1"
+ },
+ {
+ "EventName": "l2_pf_hit_l2",
+ "EventCode": "0x70",
+ "BriefDescription": "L2 prefetch hit in L2.",
+ "UMask": "0xff"
+ },
+ {
+ "EventName": "l2_pf_miss_l2_hit_l3",
+ "EventCode": "0x71",
+ "BriefDescription": "L2 prefetcher hits in L3. Counts all L2 prefetches accepted by the L2 pipeline which miss the L2 cache and hit the L3.",
+ "UMask": "0xff"
+ },
+ {
+ "EventName": "l2_pf_miss_l2_l3",
+ "EventCode": "0x72",
+ "BriefDescription": "L2 prefetcher misses in L3. All L2 prefetches accepted by the L2 pipeline which miss the L2 and the L3 caches.",
+ "UMask": "0xff"
+ },
+ {
+ "EventName": "ic_fw32",
+ "EventCode": "0x80",
+ "BriefDescription": "The number of 32B fetch windows transferred from IC pipe to DE instruction decoder (includes non-cacheable and cacheable fill responses)."
+ },
+ {
+ "EventName": "ic_fw32_miss",
+ "EventCode": "0x81",
+ "BriefDescription": "The number of 32B fetch windows tried to read the L1 IC and missed in the full tag."
+ },
+ {
+ "EventName": "ic_cache_fill_l2",
+ "EventCode": "0x82",
+ "BriefDescription": "The number of 64 byte instruction cache line was fulfilled from the L2 cache."
+ },
+ {
+ "EventName": "ic_cache_fill_sys",
+ "EventCode": "0x83",
+ "BriefDescription": "The number of 64 byte instruction cache line fulfilled from system memory or another cache."
+ },
+ {
+ "EventName": "bp_l1_tlb_miss_l2_hit",
+ "EventCode": "0x84",
+ "BriefDescription": "The number of instruction fetches that miss in the L1 ITLB but hit in the L2 ITLB."
+ },
+ {
+ "EventName": "bp_l1_tlb_miss_l2_tlb_miss",
+ "EventCode": "0x85",
+ "BriefDescription": "The number of instruction fetches that miss in both the L1 and L2 TLBs.",
+ "UMask": "0xff"
+ },
+ {
+ "EventName": "bp_l1_tlb_miss_l2_tlb_miss.if1g",
+ "EventCode": "0x85",
+ "BriefDescription": "The number of instruction fetches that miss in both the L1 and L2 TLBs. Instruction fetches to a 1GB page.",
+ "UMask": "0x4"
+ },
+ {
+ "EventName": "bp_l1_tlb_miss_l2_tlb_miss.if2m",
+ "EventCode": "0x85",
+ "BriefDescription": "The number of instruction fetches that miss in both the L1 and L2 TLBs. Instruction fetches to a 2MB page.",
+ "UMask": "0x2"
+ },
+ {
+ "EventName": "bp_l1_tlb_miss_l2_tlb_miss.if4k",
+ "EventCode": "0x85",
+ "BriefDescription": "The number of instruction fetches that miss in both the L1 and L2 TLBs. Instruction fetches to a 4KB page.",
+ "UMask": "0x1"
+ },
+ {
+ "EventName": "bp_snp_re_sync",
+ "EventCode": "0x86",
+ "BriefDescription": "The number of pipeline restarts caused by invalidating probes that hit on the instruction stream currently being executed. This would happen if the active instruction stream was being modified by another processor in an MP system - typically a highly unlikely event."
+ },
+ {
+ "EventName": "ic_fetch_stall.ic_stall_any",
+ "EventCode": "0x87",
+ "BriefDescription": "Instruction Pipe Stall. IC pipe was stalled during this clock cycle for any reason (nothing valid in pipe ICM1).",
+ "UMask": "0x4"
+ },
+ {
+ "EventName": "ic_fetch_stall.ic_stall_dq_empty",
+ "EventCode": "0x87",
+ "BriefDescription": "Instruction Pipe Stall. IC pipe was stalled during this clock cycle (including IC to OC fetches) due to DQ empty.",
+ "UMask": "0x2"
+ },
+ {
+ "EventName": "ic_fetch_stall.ic_stall_back_pressure",
+ "EventCode": "0x87",
+ "BriefDescription": "Instruction Pipe Stall. IC pipe was stalled during this clock cycle (including IC to OC fetches) due to back-pressure.",
+ "UMask": "0x1"
+ },
+ {
+ "EventName": "ic_cache_inval.l2_invalidating_probe",
+ "EventCode": "0x8c",
+ "BriefDescription": "IC line invalidated due to L2 invalidating probe (external or LS). The number of instruction cache lines invalidated. A non-SMC event is CMC (cross modifying code), either from the other thread of the core or another core.",
+ "UMask": "0x2"
+ },
+ {
+ "EventName": "ic_cache_inval.fill_invalidated",
+ "EventCode": "0x8c",
+ "BriefDescription": "IC line invalidated due to overwriting fill response. The number of instruction cache lines invalidated. A non-SMC event is CMC (cross modifying code), either from the other thread of the core or another core.",
+ "UMask": "0x1"
+ },
+ {
+ "EventName": "ic_oc_mode_switch.oc_ic_mode_switch",
+ "EventCode": "0x28a",
+ "BriefDescription": "OC Mode Switch. OC to IC mode switch.",
+ "UMask": "0x2"
+ },
+ {
+ "EventName": "ic_oc_mode_switch.ic_oc_mode_switch",
+ "EventCode": "0x28a",
+ "BriefDescription": "OC Mode Switch. IC to OC mode switch.",
+ "UMask": "0x1"
+ },
+ {
+ "EventName": "l3_request_g1.caching_l3_cache_accesses",
+ "EventCode": "0x01",
+ "BriefDescription": "Caching: L3 cache accesses",
+ "UMask": "0x80",
+ "Unit": "L3PMC"
+ },
+ {
+ "EventName": "l3_lookup_state.all_l3_req_typs",
+ "EventCode": "0x04",
+ "BriefDescription": "All L3 Request Types",
+ "UMask": "0xff",
+ "Unit": "L3PMC"
+ },
+ {
+ "EventName": "l3_comb_clstr_state.other_l3_miss_typs",
+ "EventCode": "0x06",
+ "BriefDescription": "Other L3 Miss Request Types",
+ "UMask": "0xfe",
+ "Unit": "L3PMC"
+ },
+ {
+ "EventName": "l3_comb_clstr_state.request_miss",
+ "EventCode": "0x06",
+ "BriefDescription": "L3 cache misses",
+ "UMask": "0x01",
+ "Unit": "L3PMC"
+ },
+ {
+ "EventName": "xi_sys_fill_latency",
+ "EventCode": "0x90",
+ "BriefDescription": "L3 Cache Miss Latency. Total cycles for all transactions divided by 16. Ignores SliceMask and ThreadMask.",
+ "UMask": "0x00",
+ "Unit": "L3PMC"
+ },
+ {
+ "EventName": "xi_ccx_sdp_req1.all_l3_miss_req_typs",
+ "EventCode": "0x9A",
+ "BriefDescription": "All L3 Miss Request Types. Ignores SliceMask and ThreadMask.",
+ "UMask": "0x3f",
+ "Unit": "L3PMC"
+ }
+]
diff --git a/tools/perf/pmu-events/arch/x86/amdzen2/core.json b/tools/perf/pmu-events/arch/x86/amdzen2/core.json
new file mode 100644
index 000000000000..de89e5a44ff1
--- /dev/null
+++ b/tools/perf/pmu-events/arch/x86/amdzen2/core.json
@@ -0,0 +1,130 @@
+[
+ {
+ "EventName": "ex_ret_instr",
+ "EventCode": "0xc0",
+ "BriefDescription": "Retired Instructions."
+ },
+ {
+ "EventName": "ex_ret_cops",
+ "EventCode": "0xc1",
+ "BriefDescription": "Retired Uops.",
+ "PublicDescription": "The number of micro-ops retired. This count includes all processor activity (instructions, exceptions, interrupts, microcode assists, etc.). The number of events logged per cycle can vary from 0 to 8."
+ },
+ {
+ "EventName": "ex_ret_brn",
+ "EventCode": "0xc2",
+ "BriefDescription": "Retired Branch Instructions.",
+ "PublicDescription": "The number of branch instructions retired. This includes all types of architectural control flow changes, including exceptions and interrupts."
+ },
+ {
+ "EventName": "ex_ret_brn_misp",
+ "EventCode": "0xc3",
+ "BriefDescription": "Retired Branch Instructions Mispredicted.",
+ "PublicDescription": "The number of branch instructions retired, of any type, that were not correctly predicted. This includes those for which prediction is not attempted (far control transfers, exceptions and interrupts)."
+ },
+ {
+ "EventName": "ex_ret_brn_tkn",
+ "EventCode": "0xc4",
+ "BriefDescription": "Retired Taken Branch Instructions.",
+ "PublicDescription": "The number of taken branches that were retired. This includes all types of architectural control flow changes, including exceptions and interrupts."
+ },
+ {
+ "EventName": "ex_ret_brn_tkn_misp",
+ "EventCode": "0xc5",
+ "BriefDescription": "Retired Taken Branch Instructions Mispredicted.",
+ "PublicDescription": "The number of retired taken branch instructions that were mispredicted."
+ },
+ {
+ "EventName": "ex_ret_brn_far",
+ "EventCode": "0xc6",
+ "BriefDescription": "Retired Far Control Transfers.",
+ "PublicDescription": "The number of far control transfers retired including far call/jump/return, IRET, SYSCALL and SYSRET, plus exceptions and interrupts. Far control transfers are not subject to branch prediction."
+ },
+ {
+ "EventName": "ex_ret_brn_resync",
+ "EventCode": "0xc7",
+ "BriefDescription": "Retired Branch Resyncs.",
+ "PublicDescription": "The number of resync branches. These reflect pipeline restarts due to certain microcode assists and events such as writes to the active instruction stream, among other things. Each occurrence reflects a restart penalty similar to a branch mispredict. This is relatively rare."
+ },
+ {
+ "EventName": "ex_ret_near_ret",
+ "EventCode": "0xc8",
+ "BriefDescription": "Retired Near Returns.",
+ "PublicDescription": "The number of near return instructions (RET or RET Iw) retired."
+ },
+ {
+ "EventName": "ex_ret_near_ret_mispred",
+ "EventCode": "0xc9",
+ "BriefDescription": "Retired Near Returns Mispredicted.",
+ "PublicDescription": "The number of near returns retired that were not correctly predicted by the return address predictor. Each such mispredict incurs the same penalty as a mispredicted conditional branch instruction."
+ },
+ {
+ "EventName": "ex_ret_brn_ind_misp",
+ "EventCode": "0xca",
+ "BriefDescription": "Retired Indirect Branch Instructions Mispredicted."
+ },
+ {
+ "EventName": "ex_ret_mmx_fp_instr.sse_instr",
+ "EventCode": "0xcb",
+ "BriefDescription": "SSE instructions (SSE, SSE2, SSE3, SSSE3, SSE4A, SSE41, SSE42, AVX).",
+ "PublicDescription": "The number of MMX, SSE or x87 instructions retired. The UnitMask allows the selection of the individual classes of instructions as given in the table. Each increment represents one complete instruction. Since this event includes non-numeric instructions it is not suitable for measuring MFLOPS. SSE instructions (SSE, SSE2, SSE3, SSSE3, SSE4A, SSE41, SSE42, AVX).",
+ "UMask": "0x4"
+ },
+ {
+ "EventName": "ex_ret_mmx_fp_instr.mmx_instr",
+ "EventCode": "0xcb",
+ "BriefDescription": "MMX instructions.",
+ "PublicDescription": "The number of MMX, SSE or x87 instructions retired. The UnitMask allows the selection of the individual classes of instructions as given in the table. Each increment represents one complete instruction. Since this event includes non-numeric instructions it is not suitable for measuring MFLOPS. MMX instructions.",
+ "UMask": "0x2"
+ },
+ {
+ "EventName": "ex_ret_mmx_fp_instr.x87_instr",
+ "EventCode": "0xcb",
+ "BriefDescription": "x87 instructions.",
+ "PublicDescription": "The number of MMX, SSE or x87 instructions retired. The UnitMask allows the selection of the individual classes of instructions as given in the table. Each increment represents one complete instruction. Since this event includes non-numeric instructions it is not suitable for measuring MFLOPS. x87 instructions.",
+ "UMask": "0x1"
+ },
+ {
+ "EventName": "ex_ret_cond",
+ "EventCode": "0xd1",
+ "BriefDescription": "Retired Conditional Branch Instructions."
+ },
+ {
+ "EventName": "ex_ret_cond_misp",
+ "EventCode": "0xd2",
+ "BriefDescription": "Retired Conditional Branch Instructions Mispredicted."
+ },
+ {
+ "EventName": "ex_div_busy",
+ "EventCode": "0xd3",
+ "BriefDescription": "Div Cycles Busy count."
+ },
+ {
+ "EventName": "ex_div_count",
+ "EventCode": "0xd4",
+ "BriefDescription": "Div Op Count."
+ },
+ {
+ "EventName": "ex_tagged_ibs_ops.ibs_count_rollover",
+ "EventCode": "0x1cf",
+ "BriefDescription": "Tagged IBS Ops. Number of times an op could not be tagged by IBS because of a previous tagged op that has not retired.",
+ "UMask": "0x4"
+ },
+ {
+ "EventName": "ex_tagged_ibs_ops.ibs_tagged_ops_ret",
+ "EventCode": "0x1cf",
+ "BriefDescription": "Tagged IBS Ops. Number of Ops tagged by IBS that retired.",
+ "UMask": "0x2"
+ },
+ {
+ "EventName": "ex_tagged_ibs_ops.ibs_tagged_ops",
+ "EventCode": "0x1cf",
+ "BriefDescription": "Tagged IBS Ops. Number of Ops tagged by IBS.",
+ "UMask": "0x1"
+ },
+ {
+ "EventName": "ex_ret_fus_brnch_inst",
+ "EventCode": "0x1d0",
+ "BriefDescription": "Retired Fused Instructions. The number of fuse-branch instructions retired per cycle. The number of events logged per cycle can vary from 0-8.",
+ }
+]
diff --git a/tools/perf/pmu-events/arch/x86/amdzen2/floating-point.json b/tools/perf/pmu-events/arch/x86/amdzen2/floating-point.json
new file mode 100644
index 000000000000..622a0c420e46
--- /dev/null
+++ b/tools/perf/pmu-events/arch/x86/amdzen2/floating-point.json
@@ -0,0 +1,140 @@
+[
+ {
+ "EventName": "fpu_pipe_assignment.total",
+ "EventCode": "0x00",
+ "BriefDescription": "Total number of fp uOps.",
+ "PublicDescription": "Total number of fp uOps. The number of operations (uOps) dispatched to each of the 4 FPU execution pipelines. This event reflects how busy the FPU pipelines are and may be used for workload characterization. This includes all operations performed by x87, MMX, and SSE instructions, including moves. Each increment represents a one- cycle dispatch event. This event is a speculative event. Since this event includes non-numeric operations it is not suitable for measuring MFLOPS.",
+ "UMask": "0xf"
+ },
+ {
+ "EventName": "fpu_pipe_assignment.total3",
+ "EventCode": "0x00",
+ "BriefDescription": "Total number uOps assigned to pipe 3.",
+ "PublicDescription": "The number of operations (uOps) dispatched to each of the 4 FPU execution pipelines. This event reflects how busy the FPU pipelines are and may be used for workload characterization. This includes all operations performed by x87, MMX, and SSE instructions, including moves. Each increment represents a one-cycle dispatch event. This event is a speculative event. Since this event includes non-numeric operations it is not suitable for measuring MFLOPS. Total number uOps assigned to pipe 3.",
+ "UMask": "0x8"
+ },
+ {
+ "EventName": "fpu_pipe_assignment.total2",
+ "EventCode": "0x00",
+ "BriefDescription": "Total number uOps assigned to pipe 2.",
+ "PublicDescription": "The number of operations (uOps) dispatched to each of the 4 FPU execution pipelines. This event reflects how busy the FPU pipelines are and may be used for workload characterization. This includes all operations performed by x87, MMX, and SSE instructions, including moves. Each increment represents a one- cycle dispatch event. This event is a speculative event. Since this event includes non-numeric operations it is not suitable for measuring MFLOPS. Total number uOps assigned to pipe 2.",
+ "UMask": "0x4"
+ },
+ {
+ "EventName": "fpu_pipe_assignment.total1",
+ "EventCode": "0x00",
+ "BriefDescription": "Total number uOps assigned to pipe 1.",
+ "PublicDescription": "The number of operations (uOps) dispatched to each of the 4 FPU execution pipelines. This event reflects how busy the FPU pipelines are and may be used for workload characterization. This includes all operations performed by x87, MMX, and SSE instructions, including moves. Each increment represents a one- cycle dispatch event. This event is a speculative event. Since this event includes non-numeric operations it is not suitable for measuring MFLOPS. Total number uOps assigned to pipe 1.",
+ "UMask": "0x2"
+ },
+ {
+ "EventName": "fpu_pipe_assignment.total0",
+ "EventCode": "0x00",
+ "BriefDescription": "Total number of fp uOps on pipe 0.",
+ "PublicDescription": "The number of operations (uOps) dispatched to each of the 4 FPU execution pipelines. This event reflects how busy the FPU pipelines are and may be used for workload characterization. This includes all operations performed by x87, MMX, and SSE instructions, including moves. Each increment represents a one- cycle dispatch event. This event is a speculative event. Since this event includes non-numeric operations it is not suitable for measuring MFLOPS. Total number uOps assigned to pipe 0.",
+ "UMask": "0x1"
+ },
+ {
+ "EventName": "fp_ret_sse_avx_ops.all",
+ "EventCode": "0x03",
+ "BriefDescription": "All FLOPS. This is a retire-based event. The number of retired SSE/AVX FLOPS. The number of events logged per cycle can vary from 0 to 64. This event can count above 15.",
+ "UMask": "0xff"
+ },
+ {
+ "EventName": "fp_ret_sse_avx_ops.mac_flops",
+ "EventCode": "0x03",
+ "BriefDescription": "Multiply-add FLOPS. Multiply-add counts as 2 FLOPS. This is a retire-based event. The number of retired SSE/AVX FLOPS. The number of events logged per cycle can vary from 0 to 64. This event can count above 15.",
+ "PublicDescription": "",
+ "UMask": "0x8"
+ },
+ {
+ "EventName": "fp_ret_sse_avx_ops.div_flops",
+ "EventCode": "0x03",
+ "BriefDescription": "Divide/square root FLOPS. This is a retire-based event. The number of retired SSE/AVX FLOPS. The number of events logged per cycle can vary from 0 to 64. This event can count above 15.",
+ "UMask": "0x4"
+ },
+ {
+ "EventName": "fp_ret_sse_avx_ops.mult_flops",
+ "EventCode": "0x03",
+ "BriefDescription": "Multiply FLOPS. This is a retire-based event. The number of retired SSE/AVX FLOPS. The number of events logged per cycle can vary from 0 to 64. This event can count above 15.",
+ "UMask": "0x2"
+ },
+ {
+ "EventName": "fp_ret_sse_avx_ops.add_sub_flops",
+ "EventCode": "0x03",
+ "BriefDescription": "Add/subtract FLOPS. This is a retire-based event. The number of retired SSE/AVX FLOPS. The number of events logged per cycle can vary from 0 to 64. This event can count above 15.",
+ "UMask": "0x1"
+ },
+ {
+ "EventName": "fp_num_mov_elim_scal_op.optimized",
+ "EventCode": "0x04",
+ "BriefDescription": "Number of Scalar Ops optimized. This is a dispatch based speculative event, and is useful for measuring the effectiveness of the Move elimination and Scalar code optimization schemes.",
+ "UMask": "0x8"
+ },
+ {
+ "EventName": "fp_num_mov_elim_scal_op.opt_potential",
+ "EventCode": "0x04",
+ "BriefDescription": "Number of Ops that are candidates for optimization (have Z-bit either set or pass). This is a dispatch based speculative event, and is useful for measuring the effectiveness of the Move elimination and Scalar code optimization schemes.",
+ "UMask": "0x4"
+ },
+ {
+ "EventName": "fp_num_mov_elim_scal_op.sse_mov_ops_elim",
+ "EventCode": "0x04",
+ "BriefDescription": "Number of SSE Move Ops eliminated. This is a dispatch based speculative event, and is useful for measuring the effectiveness of the Move elimination and Scalar code optimization schemes.",
+ "UMask": "0x2"
+ },
+ {
+ "EventName": "fp_num_mov_elim_scal_op.sse_mov_ops",
+ "EventCode": "0x04",
+ "BriefDescription": "Number of SSE Move Ops. This is a dispatch based speculative event, and is useful for measuring the effectiveness of the Move elimination and Scalar code optimization schemes.",
+ "UMask": "0x1"
+ },
+ {
+ "EventName": "fp_retired_ser_ops.sse_bot_ret",
+ "EventCode": "0x05",
+ "BriefDescription": "SSE bottom-executing uOps retired. The number of serializing Ops retired.",
+ "UMask": "0x8"
+ },
+ {
+ "EventName": "fp_retired_ser_ops.sse_ctrl_ret",
+ "EventCode": "0x05",
+ "BriefDescription": "The number of serializing Ops retired. SSE control word mispredict traps due to mispredictions in RC, FTZ or DAZ, or changes in mask bits.",
+ "UMask": "0x4"
+ },
+ {
+ "EventName": "fp_retired_ser_ops.x87_bot_ret",
+ "EventCode": "0x05",
+ "BriefDescription": "x87 bottom-executing uOps retired. The number of serializing Ops retired.",
+ "UMask": "0x2"
+ },
+ {
+ "EventName": "fp_retired_ser_ops.x87_ctrl_ret",
+ "EventCode": "0x05",
+ "BriefDescription": "x87 control word mispredict traps due to mispredictions in RC or PC, or changes in mask bits. The number of serializing Ops retired.",
+ "UMask": "0x1"
+ },
+ {
+ "EventName": "fp_disp_faults.ymm_spill_fault",
+ "EventCode": "0x0e",
+ "BriefDescription": "Floating Point Dispatch Faults. YMM spill fault.",
+ "UMask": "0x8"
+ },
+ {
+ "EventName": "fp_disp_faults.ymm_fill_fault",
+ "EventCode": "0x0e",
+ "BriefDescription": "Floating Point Dispatch Faults. YMM fill fault.",
+ "UMask": "0x4"
+ },
+ {
+ "EventName": "fp_disp_faults.xmm_fill_fault",
+ "EventCode": "0x0e",
+ "BriefDescription": "Floating Point Dispatch Faults. XMM fill fault.",
+ "UMask": "0x2"
+ },
+ {
+ "EventName": "fp_disp_faults.x87_fill_fault",
+ "EventCode": "0x0e",
+ "BriefDescription": "Floating Point Dispatch Faults. x87 fill fault.",
+ "UMask": "0x1"
+ }
+]
diff --git a/tools/perf/pmu-events/arch/x86/amdzen2/memory.json b/tools/perf/pmu-events/arch/x86/amdzen2/memory.json
new file mode 100644
index 000000000000..715046b339cb
--- /dev/null
+++ b/tools/perf/pmu-events/arch/x86/amdzen2/memory.json
@@ -0,0 +1,341 @@
+[
+ {
+ "EventName": "ls_bad_status2.stli_other",
+ "EventCode": "0x24",
+ "BriefDescription": "Non-forwardable conflict; used to reduce STLI's via software. All reasons. Store To Load Interlock (STLI) are loads that were unable to complete because of a possible match with an older store, and the older store could not do STLF for some reason.",
+ "PublicDescription" : "Store-to-load conflicts: A load was unable to complete due to a non-forwardable conflict with an older store. Most commonly, a load's address range partially but not completely overlaps with an uncompleted older store. Software can avoid this problem by using same-size and same-alignment loads and stores when accessing the same data. Vector/SIMD code is particularly susceptible to this problem; software should construct wide vector stores by manipulating vector elements in registers using shuffle/blend/swap instructions prior to storing to memory, instead of using narrow element-by-element stores.",
+ "UMask": "0x2"
+ },
+ {
+ "EventName": "ls_locks.spec_lock_hi_spec",
+ "EventCode": "0x25",
+ "BriefDescription": "Retired lock instructions. High speculative cacheable lock speculation succeeded.",
+ "UMask": "0x8"
+ },
+ {
+ "EventName": "ls_locks.spec_lock_lo_spec",
+ "EventCode": "0x25",
+ "BriefDescription": "Retired lock instructions. Low speculative cacheable lock speculation succeeded.",
+ "UMask": "0x4"
+ },
+ {
+ "EventName": "ls_locks.non_spec_lock",
+ "EventCode": "0x25",
+ "BriefDescription": "Retired lock instructions. Non-speculative lock succeeded.",
+ "UMask": "0x2"
+ },
+ {
+ "EventName": "ls_locks.bus_lock",
+ "EventCode": "0x25",
+ "BriefDescription": "Retired lock instructions. Bus lock when a locked operations crosses a cache boundary or is done on an uncacheable memory type. Comparable to legacy bus lock.",
+ "UMask": "0x1"
+ },
+ {
+ "EventName": "ls_ret_cl_flush",
+ "EventCode": "0x26",
+ "BriefDescription": "Number of retired CLFLUSH instructions."
+ },
+ {
+ "EventName": "ls_ret_cpuid",
+ "EventCode": "0x27",
+ "BriefDescription": "Number of retired CPUID instructions."
+ },
+ {
+ "EventName": "ls_dispatch.ld_st_dispatch",
+ "EventCode": "0x29",
+ "BriefDescription": "Dispatch of a single op that performs a load from and store to the same memory address. Number of single ops that do load/store to an address.",
+ "UMask": "0x4"
+ },
+ {
+ "EventName": "ls_dispatch.store_dispatch",
+ "EventCode": "0x29",
+ "BriefDescription": "Number of stores dispatched. Counts the number of operations dispatched to the LS unit. Unit Masks ADDed.",
+ "UMask": "0x2"
+ },
+ {
+ "EventName": "ls_dispatch.ld_dispatch",
+ "EventCode": "0x29",
+ "BriefDescription": "Number of loads dispatched. Counts the number of operations dispatched to the LS unit. Unit Masks ADDed.",
+ "UMask": "0x1"
+ },
+ {
+ "EventName": "ls_smi_rx",
+ "EventCode": "0x2B",
+ "BriefDescription": "Number of SMIs received."
+ },
+ {
+ "EventName": "ls_int_taken",
+ "EventCode": "0x2C",
+ "BriefDescription": "Number of interrupts taken."
+ },
+ {
+ "EventName": "ls_rdtsc",
+ "EventCode": "0x2D",
+ "BriefDescription": "Number of reads of the TSC (RDTSC instructions). The count is speculative."
+ },
+ {
+ "EventName": "ls_stlf",
+ "EventCode": "0x35",
+ "BriefDescription": "Number of STLF hits."
+ },
+ {
+ "EventName": "ls_st_commit_cancel2.st_commit_cancel_wcb_full",
+ "EventCode": "0x37",
+ "BriefDescription": "A non-cacheable store and the non-cacheable commit buffer is full."
+ },
+ {
+ "EventName": "ls_dc_accesses",
+ "EventCode": "0x40",
+ "BriefDescription": "Number of accesses to the dcache for load/store references.",
+ "PublicDescription": "The number of accesses to the data cache for load and store references. This may include certain microcode scratchpad accesses, although these are generally rare. Each increment represents an eight-byte access, although the instruction may only be accessing a portion of that. This event is a speculative event."
+ },
+ {
+ "EventName": "ls_mab_alloc.dc_prefetcher",
+ "EventCode": "0x41",
+ "BriefDescription": "LS MAB Allocates by Type. DC prefetcher.",
+ "UMask": "0x8"
+ },
+ {
+ "EventName": "ls_mab_alloc.stores",
+ "EventCode": "0x41",
+ "BriefDescription": "LS MAB Allocates by Type. Stores.",
+ "UMask": "0x2"
+ },
+ {
+ "EventName": "ls_mab_alloc.loads",
+ "EventCode": "0x41",
+ "BriefDescription": "LS MAB Allocates by Type. Loads.",
+ "UMask": "0x1"
+ },
+ {
+ "EventName": "ls_refills_from_sys.ls_mabresp_rmt_dram",
+ "EventCode": "0x43",
+ "BriefDescription": "Demand Data Cache Fills by Data Source. DRAM or IO from different die.",
+ "UMask": "0x40"
+ },
+ {
+ "EventName": "ls_refills_from_sys.ls_mabresp_rmt_cache",
+ "EventCode": "0x43",
+ "BriefDescription": "Demand Data Cache Fills by Data Source. Hit in cache; Remote CCX and the address's Home Node is on a different die.",
+ "UMask": "0x10"
+ },
+ {
+ "EventName": "ls_refills_from_sys.ls_mabresp_lcl_dram",
+ "EventCode": "0x43",
+ "BriefDescription": "Demand Data Cache Fills by Data Source. DRAM or IO from this thread's die.",
+ "UMask": "0x8"
+ },
+ {
+ "EventName": "ls_refills_from_sys.ls_mabresp_lcl_cache",
+ "EventCode": "0x43",
+ "BriefDescription": "Demand Data Cache Fills by Data Source. Hit in cache; local CCX (not Local L2), or Remote CCX and the address's Home Node is on this thread's die.",
+ "UMask": "0x2"
+ },
+ {
+ "EventName": "ls_refills_from_sys.ls_mabresp_lcl_l2",
+ "EventCode": "0x43",
+ "BriefDescription": "Demand Data Cache Fills by Data Source. Local L2 hit.",
+ "UMask": "0x1"
+ },
+ {
+ "EventName": "ls_l1_d_tlb_miss.all",
+ "EventCode": "0x45",
+ "BriefDescription": "All L1 DTLB Misses or Reloads.",
+ "UMask": "0xff"
+ },
+ {
+ "EventName": "ls_l1_d_tlb_miss.tlb_reload_1g_l2_miss",
+ "EventCode": "0x45",
+ "BriefDescription": "L1 DTLB Miss. DTLB reload to a 1G page that miss in the L2 TLB.",
+ "UMask": "0x80"
+ },
+ {
+ "EventName": "ls_l1_d_tlb_miss.tlb_reload_2m_l2_miss",
+ "EventCode": "0x45",
+ "BriefDescription": "L1 DTLB Miss. DTLB reload to a 2M page that miss in the L2 TLB.",
+ "UMask": "0x40"
+ },
+ {
+ "EventName": "ls_l1_d_tlb_miss.tlb_reload_coalesced_page_miss",
+ "EventCode": "0x45",
+ "BriefDescription": "L1 DTLB Miss. DTLB reload coalesced page miss.",
+ "UMask": "0x20"
+ },
+ {
+ "EventName": "ls_l1_d_tlb_miss.tlb_reload_4k_l2_miss",
+ "EventCode": "0x45",
+ "BriefDescription": "L1 DTLB Miss. DTLB reload to a 4K page that miss the L2 TLB.",
+ "UMask": "0x10"
+ },
+ {
+ "EventName": "ls_l1_d_tlb_miss.tlb_reload_1g_l2_hit",
+ "EventCode": "0x45",
+ "BriefDescription": "L1 DTLB Miss. DTLB reload to a 1G page that hit in the L2 TLB.",
+ "UMask": "0x8"
+ },
+ {
+ "EventName": "ls_l1_d_tlb_miss.tlb_reload_2m_l2_hit",
+ "EventCode": "0x45",
+ "BriefDescription": "L1 DTLB Miss. DTLB reload to a 2M page that hit in the L2 TLB.",
+ "UMask": "0x4"
+ },
+ {
+ "EventName": "ls_l1_d_tlb_miss.tlb_reload_coalesced_page_hit",
+ "EventCode": "0x45",
+ "BriefDescription": "L1 DTLB Miss. DTLB reload hit a coalesced page.",
+ "UMask": "0x2"
+ },
+ {
+ "EventName": "ls_l1_d_tlb_miss.tlb_reload_4k_l2_hit",
+ "EventCode": "0x45",
+ "BriefDescription": "L1 DTLB Miss. DTLB reload to a 4K page that hit in the L2 TLB.",
+ "UMask": "0x1"
+ },
+ {
+ "EventName": "ls_tablewalker.iside",
+ "EventCode": "0x46",
+ "BriefDescription": "Total Page Table Walks on I-side.",
+ "UMask": "0xc"
+ },
+ {
+ "EventName": "ls_tablewalker.ic_type1",
+ "EventCode": "0x46",
+ "BriefDescription": "Total Page Table Walks IC Type 1.",
+ "UMask": "0x8"
+ },
+ {
+ "EventName": "ls_tablewalker.ic_type0",
+ "EventCode": "0x46",
+ "BriefDescription": "Total Page Table Walks IC Type 0.",
+ "UMask": "0x4"
+ },
+ {
+ "EventName": "ls_tablewalker.dside",
+ "EventCode": "0x46",
+ "BriefDescription": "Total Page Table Walks on D-side.",
+ "UMask": "0x3"
+ },
+ {
+ "EventName": "ls_tablewalker.dc_type1",
+ "EventCode": "0x46",
+ "BriefDescription": "Total Page Table Walks DC Type 1.",
+ "UMask": "0x2"
+ },
+ {
+ "EventName": "ls_tablewalker.dc_type0",
+ "EventCode": "0x46",
+ "BriefDescription": "Total Page Table Walks DC Type 0.",
+ "UMask": "0x1"
+ },
+ {
+ "EventName": "ls_misal_accesses",
+ "EventCode": "0x47",
+ "BriefDescription": "Misaligned loads."
+ },
+ {
+ "EventName": "ls_pref_instr_disp",
+ "EventCode": "0x4b",
+ "BriefDescription": "Software Prefetch Instructions Dispatched (Speculative).",
+ "UMask": "0xff"
+ },
+ {
+ "EventName": "ls_pref_instr_disp.prefetch_nta",
+ "EventCode": "0x4b",
+ "BriefDescription": "Software Prefetch Instructions Dispatched (Speculative). PrefetchNTA instruction. See docAPM3 PREFETCHlevel.",
+ "UMask": "0x4"
+ },
+ {
+ "EventName": "ls_pref_instr_disp.prefetch_w",
+ "EventCode": "0x4b",
+ "BriefDescription": "Software Prefetch Instructions Dispatched (Speculative). See docAPM3 PREFETCHW.",
+ "UMask": "0x2"
+ },
+ {
+ "EventName": "ls_pref_instr_disp.prefetch",
+ "EventCode": "0x4b",
+ "BriefDescription": "Software Prefetch Instructions Dispatched (Speculative). Prefetch_T0_T1_T2. PrefetchT0, T1 and T2 instructions. See docAPM3 PREFETCHlevel.",
+ "UMask": "0x1"
+ },
+ {
+ "EventName": "ls_inef_sw_pref.mab_mch_cnt",
+ "EventCode": "0x52",
+ "BriefDescription": "The number of software prefetches that did not fetch data outside of the processor core. Software PREFETCH instruction saw a match on an already-allocated miss request buffer.",
+ "UMask": "0x2"
+ },
+ {
+ "EventName": "ls_inef_sw_pref.data_pipe_sw_pf_dc_hit",
+ "EventCode": "0x52",
+ "BriefDescription": "The number of software prefetches that did not fetch data outside of the processor core. Software PREFETCH instruction saw a DC hit.",
+ "UMask": "0x1"
+ },
+ {
+ "EventName": "ls_sw_pf_dc_fill.ls_mabresp_rmt_dram",
+ "EventCode": "0x59",
+ "BriefDescription": "Software Prefetch Data Cache Fills by Data Source. From DRAM (home node remote).",
+ "UMask": "0x40"
+ },
+ {
+ "EventName": "ls_sw_pf_dc_fill.ls_mabresp_rmt_cache",
+ "EventCode": "0x59",
+ "BriefDescription": "Software Prefetch Data Cache Fills by Data Source. From another cache (home node remote).",
+ "UMask": "0x10"
+ },
+ {
+ "EventName": "ls_sw_pf_dc_fill.ls_mabresp_lcl_dram",
+ "EventCode": "0x59",
+ "BriefDescription": "Software Prefetch Data Cache Fills by Data Source. DRAM or IO from this thread's die. From DRAM (home node local).",
+ "UMask": "0x8"
+ },
+ {
+ "EventName": "ls_sw_pf_dc_fill.ls_mabresp_lcl_cache",
+ "EventCode": "0x59",
+ "BriefDescription": "Software Prefetch Data Cache Fills by Data Source. From another cache (home node local).",
+ "UMask": "0x2"
+ },
+ {
+ "EventName": "ls_sw_pf_dc_fill.ls_mabresp_lcl_l2",
+ "EventCode": "0x59",
+ "BriefDescription": "Software Prefetch Data Cache Fills by Data Source. Local L2 hit.",
+ "UMask": "0x1"
+ },
+ {
+ "EventName": "ls_hw_pf_dc_fill.ls_mabresp_rmt_dram",
+ "EventCode": "0x5A",
+ "BriefDescription": "Hardware Prefetch Data Cache Fills by Data Source. From DRAM (home node remote).",
+ "UMask": "0x40"
+ },
+ {
+ "EventName": "ls_hw_pf_dc_fill.ls_mabresp_rmt_cache",
+ "EventCode": "0x5A",
+ "BriefDescription": "Hardware Prefetch Data Cache Fills by Data Source. From another cache (home node remote).",
+ "UMask": "0x10"
+ },
+ {
+ "EventName": "ls_hw_pf_dc_fill.ls_mabresp_lcl_dram",
+ "EventCode": "0x5A",
+ "BriefDescription": "Hardware Prefetch Data Cache Fills by Data Source. From DRAM (home node local).",
+ "UMask": "0x8"
+ },
+ {
+ "EventName": "ls_hw_pf_dc_fill.ls_mabresp_lcl_cache",
+ "EventCode": "0x5A",
+ "BriefDescription": "Hardware Prefetch Data Cache Fills by Data Source. From another cache (home node local).",
+ "UMask": "0x2"
+ },
+ {
+ "EventName": "ls_hw_pf_dc_fill.ls_mabresp_lcl_l2",
+ "EventCode": "0x5A",
+ "BriefDescription": "Hardware Prefetch Data Cache Fills by Data Source. Local L2 hit.",
+ "UMask": "0x1"
+ },
+ {
+ "EventName": "ls_not_halted_cyc",
+ "EventCode": "0x76",
+ "BriefDescription": "Cycles not in Halt."
+ },
+ {
+ "EventName": "ls_tlb_flush",
+ "EventCode": "0x78",
+ "BriefDescription": "All TLB Flushes"
+ }
+]
diff --git a/tools/perf/pmu-events/arch/x86/amdzen2/other.json b/tools/perf/pmu-events/arch/x86/amdzen2/other.json
new file mode 100644
index 000000000000..e94994d4a60e
--- /dev/null
+++ b/tools/perf/pmu-events/arch/x86/amdzen2/other.json
@@ -0,0 +1,115 @@
+[
+ {
+ "EventName": "de_dis_uop_queue_empty_di0",
+ "EventCode": "0xa9",
+ "BriefDescription": "Cycles where the Micro-Op Queue is empty."
+ },
+ {
+ "EventName": "de_dis_uops_from_decoder",
+ "EventCode": "0xaa",
+ "BriefDescription": "Ops dispatched from either the decoders, OpCache or both.",
+ "UMask": "0xff"
+ },
+ {
+ "EventName": "de_dis_uops_from_decoder.opcache_dispatched",
+ "EventCode": "0xaa",
+ "BriefDescription": "Count of dispatched Ops from OpCache.",
+ "UMask": "0x2"
+ },
+ {
+ "EventName": "de_dis_uops_from_decoder.decoder_dispatched",
+ "EventCode": "0xaa",
+ "BriefDescription": "Count of dispatched Ops from Decoder.",
+ "UMask": "0x1"
+ },
+ {
+ "EventName": "de_dis_dispatch_token_stalls1.fp_misc_rsrc_stall",
+ "EventCode": "0xae",
+ "BriefDescription": "Cycles where a dispatch group is valid but does not get dispatched due to a token stall. FP Miscellaneous resource unavailable. Applies to the recovery of mispredicts with FP ops.",
+ "UMask": "0x80"
+ },
+ {
+ "EventName": "de_dis_dispatch_token_stalls1.fp_sch_rsrc_stall",
+ "EventCode": "0xae",
+ "BriefDescription": "Cycles where a dispatch group is valid but does not get dispatched due to a token stall. FP scheduler resource stall. Applies to ops that use the FP scheduler.",
+ "UMask": "0x40"
+ },
+ {
+ "EventName": "de_dis_dispatch_token_stalls1.fp_reg_file_rsrc_stall",
+ "EventCode": "0xae",
+ "BriefDescription": "Cycles where a dispatch group is valid but does not get dispatched due to a token stall. Floating point register file resource stall. Applies to all FP ops that have a destination register.",
+ "UMask": "0x20"
+ },
+ {
+ "EventName": "de_dis_dispatch_token_stalls1.taken_branch_buffer_rsrc_stall",
+ "EventCode": "0xae",
+ "BriefDescription": "Cycles where a dispatch group is valid but does not get dispatched due to a token stall. Taken branch buffer resource stall.",
+ "UMask": "0x10"
+ },
+ {
+ "EventName": "de_dis_dispatch_token_stalls1.int_sched_misc_token_stall",
+ "EventCode": "0xae",
+ "BriefDescription": "Cycles where a dispatch group is valid but does not get dispatched due to a token stall. Integer Scheduler miscellaneous resource stall.",
+ "UMask": "0x8"
+ },
+ {
+ "EventName": "de_dis_dispatch_token_stalls1.store_queue_token_stall",
+ "EventCode": "0xae",
+ "BriefDescription": "Cycles where a dispatch group is valid but does not get dispatched due to a token stall. Store queue resource stall. Applies to all ops with store semantics.",
+ "UMask": "0x4"
+ },
+ {
+ "EventName": "de_dis_dispatch_token_stalls1.load_queue_token_stall",
+ "EventCode": "0xae",
+ "BriefDescription": "Cycles where a dispatch group is valid but does not get dispatched due to a token stall. Load queue resource stall. Applies to all ops with load semantics.",
+ "UMask": "0x2"
+ },
+ {
+ "EventName": "de_dis_dispatch_token_stalls1.int_phy_reg_file_token_stall",
+ "EventCode": "0xae",
+ "BriefDescription": "Cycles where a dispatch group is valid but does not get dispatched due to a token stall. Integer Physical Register File resource stall. Applies to all ops that have an integer destination register.",
+ "UMask": "0x1"
+ },
+ {
+ "EventName": "de_dis_dispatch_token_stalls0.sc_agu_dispatch_stall",
+ "EventCode": "0xaf",
+ "BriefDescription": "Cycles where a dispatch group is valid but does not get dispatched due to a token stall. SC AGU dispatch stall.",
+ "UMask": "0x40"
+ },
+ {
+ "EventName": "de_dis_dispatch_token_stalls0.retire_token_stall",
+ "EventCode": "0xaf",
+ "BriefDescription": "Cycles where a dispatch group is valid but does not get dispatched due to a token stall. RETIRE Tokens unavailable.",
+ "UMask": "0x20"
+ },
+ {
+ "EventName": "de_dis_dispatch_token_stalls0.agsq_token_stall",
+ "EventCode": "0xaf",
+ "BriefDescription": "Cycles where a dispatch group is valid but does not get dispatched due to a token stall. AGSQ Tokens unavailable.",
+ "UMask": "0x10"
+ },
+ {
+ "EventName": "de_dis_dispatch_token_stalls0.alu_token_stall",
+ "EventCode": "0xaf",
+ "BriefDescription": "Cycles where a dispatch group is valid but does not get dispatched due to a token stall. ALU tokens total unavailable.",
+ "UMask": "0x8"
+ },
+ {
+ "EventName": "de_dis_dispatch_token_stalls0.alsq3_0_token_stall",
+ "EventCode": "0xaf",
+ "BriefDescription": "Cycles where a dispatch group is valid but does not get dispatched due to a token stall. ALSQ3_0_TokenStall.",
+ "UMask": "0x4"
+ },
+ {
+ "EventName": "de_dis_dispatch_token_stalls0.alsq2_token_stall",
+ "EventCode": "0xaf",
+ "BriefDescription": "Cycles where a dispatch group is valid but does not get dispatched due to a token stall. ALSQ 2 Tokens unavailable.",
+ "UMask": "0x2"
+ },
+ {
+ "EventName": "de_dis_dispatch_token_stalls0.alsq1_token_stall",
+ "EventCode": "0xaf",
+ "BriefDescription": "Cycles where a dispatch group is valid but does not get dispatched due to a token stall. ALSQ 1 Tokens unavailable.",
+ "UMask": "0x1"
+ }
+]
diff --git a/tools/perf/pmu-events/arch/x86/broadwell/bdw-metrics.json b/tools/perf/pmu-events/arch/x86/broadwell/bdw-metrics.json
index 45a34ce4fe89..8cdc7c13dc2a 100644
--- a/tools/perf/pmu-events/arch/x86/broadwell/bdw-metrics.json
+++ b/tools/perf/pmu-events/arch/x86/broadwell/bdw-metrics.json
@@ -297,7 +297,7 @@
},
{
"BriefDescription": "Fraction of cycles spent in Kernel mode",
- "MetricExpr": "CPU_CLK_UNHALTED.REF_TSC:k / CPU_CLK_UNHALTED.REF_TSC",
+ "MetricExpr": "CPU_CLK_UNHALTED.THREAD:k / CPU_CLK_UNHALTED.THREAD",
"MetricGroup": "Summary",
"MetricName": "Kernel_Utilization"
},
diff --git a/tools/perf/pmu-events/arch/x86/broadwellde/bdwde-metrics.json b/tools/perf/pmu-events/arch/x86/broadwellde/bdwde-metrics.json
index 961fe4395758..16fd8a7490fc 100644
--- a/tools/perf/pmu-events/arch/x86/broadwellde/bdwde-metrics.json
+++ b/tools/perf/pmu-events/arch/x86/broadwellde/bdwde-metrics.json
@@ -115,7 +115,7 @@
},
{
"BriefDescription": "Fraction of cycles spent in Kernel mode",
- "MetricExpr": "CPU_CLK_UNHALTED.REF_TSC:k / CPU_CLK_UNHALTED.REF_TSC",
+ "MetricExpr": "CPU_CLK_UNHALTED.THREAD:k / CPU_CLK_UNHALTED.THREAD",
"MetricGroup": "Summary",
"MetricName": "Kernel_Utilization"
},
diff --git a/tools/perf/pmu-events/arch/x86/broadwellx/bdx-metrics.json b/tools/perf/pmu-events/arch/x86/broadwellx/bdx-metrics.json
index 746734ce09be..1eb0415fa11a 100644
--- a/tools/perf/pmu-events/arch/x86/broadwellx/bdx-metrics.json
+++ b/tools/perf/pmu-events/arch/x86/broadwellx/bdx-metrics.json
@@ -297,7 +297,7 @@
},
{
"BriefDescription": "Fraction of cycles spent in Kernel mode",
- "MetricExpr": "CPU_CLK_UNHALTED.REF_TSC:k / CPU_CLK_UNHALTED.REF_TSC",
+ "MetricExpr": "CPU_CLK_UNHALTED.THREAD:k / CPU_CLK_UNHALTED.THREAD",
"MetricGroup": "Summary",
"MetricName": "Kernel_Utilization"
},
diff --git a/tools/perf/pmu-events/arch/x86/cascadelakex/clx-metrics.json b/tools/perf/pmu-events/arch/x86/cascadelakex/clx-metrics.json
index f94653229dd4..7fde0d2943cd 100644
--- a/tools/perf/pmu-events/arch/x86/cascadelakex/clx-metrics.json
+++ b/tools/perf/pmu-events/arch/x86/cascadelakex/clx-metrics.json
@@ -215,7 +215,8 @@
"BriefDescription": "Utilization of the core's Page Walker(s) serving STLB misses triggered by instruction/Load/Store accesses",
"MetricExpr": "( ITLB_MISSES.WALK_PENDING + DTLB_LOAD_MISSES.WALK_PENDING + DTLB_STORE_MISSES.WALK_PENDING + EPT.WALK_PENDING ) / ( 2 * cycles )",
"MetricGroup": "TLB",
- "MetricName": "Page_Walks_Utilization"
+ "MetricName": "Page_Walks_Utilization",
+ "MetricConstraint": "NO_NMI_WATCHDOG"
},
{
"BriefDescription": "Utilization of the core's Page Walker(s) serving STLB misses triggered by instruction/Load/Store accesses",
@@ -315,7 +316,7 @@
},
{
"BriefDescription": "Fraction of cycles spent in Kernel mode",
- "MetricExpr": "CPU_CLK_UNHALTED.REF_TSC:k / CPU_CLK_UNHALTED.REF_TSC",
+ "MetricExpr": "CPU_CLK_UNHALTED.THREAD:k / CPU_CLK_UNHALTED.THREAD",
"MetricGroup": "Summary",
"MetricName": "Kernel_Utilization"
},
diff --git a/tools/perf/pmu-events/arch/x86/haswell/hsw-metrics.json b/tools/perf/pmu-events/arch/x86/haswell/hsw-metrics.json
index 5402cd3120f9..f57c5f3506c2 100644
--- a/tools/perf/pmu-events/arch/x86/haswell/hsw-metrics.json
+++ b/tools/perf/pmu-events/arch/x86/haswell/hsw-metrics.json
@@ -267,7 +267,7 @@
},
{
"BriefDescription": "Fraction of cycles spent in Kernel mode",
- "MetricExpr": "CPU_CLK_UNHALTED.REF_TSC:k / CPU_CLK_UNHALTED.REF_TSC",
+ "MetricExpr": "CPU_CLK_UNHALTED.THREAD:k / CPU_CLK_UNHALTED.THREAD",
"MetricGroup": "Summary",
"MetricName": "Kernel_Utilization"
},
diff --git a/tools/perf/pmu-events/arch/x86/haswellx/hsx-metrics.json b/tools/perf/pmu-events/arch/x86/haswellx/hsx-metrics.json
index 832f3cb40b34..311a005dc35b 100644
--- a/tools/perf/pmu-events/arch/x86/haswellx/hsx-metrics.json
+++ b/tools/perf/pmu-events/arch/x86/haswellx/hsx-metrics.json
@@ -267,7 +267,7 @@
},
{
"BriefDescription": "Fraction of cycles spent in Kernel mode",
- "MetricExpr": "CPU_CLK_UNHALTED.REF_TSC:k / CPU_CLK_UNHALTED.REF_TSC",
+ "MetricExpr": "CPU_CLK_UNHALTED.THREAD:k / CPU_CLK_UNHALTED.THREAD",
"MetricGroup": "Summary",
"MetricName": "Kernel_Utilization"
},
diff --git a/tools/perf/pmu-events/arch/x86/ivybridge/ivb-metrics.json b/tools/perf/pmu-events/arch/x86/ivybridge/ivb-metrics.json
index d69b2a8fc0bc..28e25447d3ef 100644
--- a/tools/perf/pmu-events/arch/x86/ivybridge/ivb-metrics.json
+++ b/tools/perf/pmu-events/arch/x86/ivybridge/ivb-metrics.json
@@ -285,7 +285,7 @@
},
{
"BriefDescription": "Fraction of cycles spent in Kernel mode",
- "MetricExpr": "CPU_CLK_UNHALTED.REF_TSC:k / CPU_CLK_UNHALTED.REF_TSC",
+ "MetricExpr": "CPU_CLK_UNHALTED.THREAD:k / CPU_CLK_UNHALTED.THREAD",
"MetricGroup": "Summary",
"MetricName": "Kernel_Utilization"
},
diff --git a/tools/perf/pmu-events/arch/x86/ivytown/ivt-metrics.json b/tools/perf/pmu-events/arch/x86/ivytown/ivt-metrics.json
index 5f465fd81315..db23db2e98be 100644
--- a/tools/perf/pmu-events/arch/x86/ivytown/ivt-metrics.json
+++ b/tools/perf/pmu-events/arch/x86/ivytown/ivt-metrics.json
@@ -285,7 +285,7 @@
},
{
"BriefDescription": "Fraction of cycles spent in Kernel mode",
- "MetricExpr": "CPU_CLK_UNHALTED.REF_TSC:k / CPU_CLK_UNHALTED.REF_TSC",
+ "MetricExpr": "CPU_CLK_UNHALTED.THREAD:k / CPU_CLK_UNHALTED.THREAD",
"MetricGroup": "Summary",
"MetricName": "Kernel_Utilization"
},
diff --git a/tools/perf/pmu-events/arch/x86/jaketown/jkt-metrics.json b/tools/perf/pmu-events/arch/x86/jaketown/jkt-metrics.json
index 3e909b306003..dbb33e00b72a 100644
--- a/tools/perf/pmu-events/arch/x86/jaketown/jkt-metrics.json
+++ b/tools/perf/pmu-events/arch/x86/jaketown/jkt-metrics.json
@@ -171,7 +171,7 @@
},
{
"BriefDescription": "Fraction of cycles spent in Kernel mode",
- "MetricExpr": "CPU_CLK_UNHALTED.REF_TSC:k / CPU_CLK_UNHALTED.REF_TSC",
+ "MetricExpr": "CPU_CLK_UNHALTED.THREAD:k / CPU_CLK_UNHALTED.THREAD",
"MetricGroup": "Summary",
"MetricName": "Kernel_Utilization"
},
diff --git a/tools/perf/pmu-events/arch/x86/mapfile.csv b/tools/perf/pmu-events/arch/x86/mapfile.csv
index 745ced083844..25b06cf98747 100644
--- a/tools/perf/pmu-events/arch/x86/mapfile.csv
+++ b/tools/perf/pmu-events/arch/x86/mapfile.csv
@@ -36,4 +36,5 @@ GenuineIntel-6-55-[56789ABCDEF],v1,cascadelakex,core
GenuineIntel-6-7D,v1,icelake,core
GenuineIntel-6-7E,v1,icelake,core
GenuineIntel-6-86,v1,tremontx,core
-AuthenticAMD-23-[[:xdigit:]]+,v1,amdfam17h,core
+AuthenticAMD-23-([12][0-9A-F]|[0-9A-F]),v2,amdzen1,core
+AuthenticAMD-23-[[:xdigit:]]+,v1,amdzen2,core
diff --git a/tools/perf/pmu-events/arch/x86/sandybridge/snb-metrics.json b/tools/perf/pmu-events/arch/x86/sandybridge/snb-metrics.json
index 50c053235752..fb2d7b8875f8 100644
--- a/tools/perf/pmu-events/arch/x86/sandybridge/snb-metrics.json
+++ b/tools/perf/pmu-events/arch/x86/sandybridge/snb-metrics.json
@@ -171,7 +171,7 @@
},
{
"BriefDescription": "Fraction of cycles spent in Kernel mode",
- "MetricExpr": "CPU_CLK_UNHALTED.REF_TSC:k / CPU_CLK_UNHALTED.REF_TSC",
+ "MetricExpr": "CPU_CLK_UNHALTED.THREAD:k / CPU_CLK_UNHALTED.THREAD",
"MetricGroup": "Summary",
"MetricName": "Kernel_Utilization"
},
diff --git a/tools/perf/pmu-events/arch/x86/skylake/skl-metrics.json b/tools/perf/pmu-events/arch/x86/skylake/skl-metrics.json
index e7feb60f9fa9..8704efeb8d31 100644
--- a/tools/perf/pmu-events/arch/x86/skylake/skl-metrics.json
+++ b/tools/perf/pmu-events/arch/x86/skylake/skl-metrics.json
@@ -215,7 +215,8 @@
"BriefDescription": "Utilization of the core's Page Walker(s) serving STLB misses triggered by instruction/Load/Store accesses",
"MetricExpr": "( ITLB_MISSES.WALK_PENDING + DTLB_LOAD_MISSES.WALK_PENDING + DTLB_STORE_MISSES.WALK_PENDING + EPT.WALK_PENDING ) / ( 2 * cycles )",
"MetricGroup": "TLB",
- "MetricName": "Page_Walks_Utilization"
+ "MetricName": "Page_Walks_Utilization",
+ "MetricConstraint": "NO_NMI_WATCHDOG"
},
{
"BriefDescription": "Utilization of the core's Page Walker(s) serving STLB misses triggered by instruction/Load/Store accesses",
@@ -303,7 +304,7 @@
},
{
"BriefDescription": "Fraction of cycles spent in Kernel mode",
- "MetricExpr": "CPU_CLK_UNHALTED.REF_TSC:k / CPU_CLK_UNHALTED.REF_TSC",
+ "MetricExpr": "CPU_CLK_UNHALTED.THREAD:k / CPU_CLK_UNHALTED.THREAD",
"MetricGroup": "Summary",
"MetricName": "Kernel_Utilization"
},
diff --git a/tools/perf/pmu-events/arch/x86/skylakex/skx-metrics.json b/tools/perf/pmu-events/arch/x86/skylakex/skx-metrics.json
index 21d7a0c2c2e8..b4f91137f40c 100644
--- a/tools/perf/pmu-events/arch/x86/skylakex/skx-metrics.json
+++ b/tools/perf/pmu-events/arch/x86/skylakex/skx-metrics.json
@@ -215,7 +215,8 @@
"BriefDescription": "Utilization of the core's Page Walker(s) serving STLB misses triggered by instruction/Load/Store accesses",
"MetricExpr": "( ITLB_MISSES.WALK_PENDING + DTLB_LOAD_MISSES.WALK_PENDING + DTLB_STORE_MISSES.WALK_PENDING + EPT.WALK_PENDING ) / ( 2 * cycles )",
"MetricGroup": "TLB",
- "MetricName": "Page_Walks_Utilization"
+ "MetricName": "Page_Walks_Utilization",
+ "MetricConstraint": "NO_NMI_WATCHDOG"
},
{
"BriefDescription": "Utilization of the core's Page Walker(s) serving STLB misses triggered by instruction/Load/Store accesses",
@@ -315,7 +316,7 @@
},
{
"BriefDescription": "Fraction of cycles spent in Kernel mode",
- "MetricExpr": "CPU_CLK_UNHALTED.REF_TSC:k / CPU_CLK_UNHALTED.REF_TSC",
+ "MetricExpr": "CPU_CLK_UNHALTED.THREAD:k / CPU_CLK_UNHALTED.THREAD",
"MetricGroup": "Summary",
"MetricName": "Kernel_Utilization"
},
diff --git a/tools/perf/pmu-events/jevents.c b/tools/perf/pmu-events/jevents.c
index 079c77b6a2fd..fa86c5f997cc 100644
--- a/tools/perf/pmu-events/jevents.c
+++ b/tools/perf/pmu-events/jevents.c
@@ -323,7 +323,7 @@ static int print_events_table_entry(void *data, char *name, char *event,
char *pmu, char *unit, char *perpkg,
char *metric_expr,
char *metric_name, char *metric_group,
- char *deprecated)
+ char *deprecated, char *metric_constraint)
{
struct perf_entry_data *pd = data;
FILE *outfp = pd->outfp;
@@ -357,6 +357,8 @@ static int print_events_table_entry(void *data, char *name, char *event,
fprintf(outfp, "\t.metric_group = \"%s\",\n", metric_group);
if (deprecated)
fprintf(outfp, "\t.deprecated = \"%s\",\n", deprecated);
+ if (metric_constraint)
+ fprintf(outfp, "\t.metric_constraint = \"%s\",\n", metric_constraint);
fprintf(outfp, "},\n");
return 0;
@@ -375,6 +377,7 @@ struct event_struct {
char *metric_name;
char *metric_group;
char *deprecated;
+ char *metric_constraint;
};
#define ADD_EVENT_FIELD(field) do { if (field) { \
@@ -422,7 +425,7 @@ static int save_arch_std_events(void *data, char *name, char *event,
char *desc, char *long_desc, char *pmu,
char *unit, char *perpkg, char *metric_expr,
char *metric_name, char *metric_group,
- char *deprecated)
+ char *deprecated, char *metric_constraint)
{
struct event_struct *es;
@@ -486,7 +489,7 @@ try_fixup(const char *fn, char *arch_std, char **event, char **desc,
char **name, char **long_desc, char **pmu, char **filter,
char **perpkg, char **unit, char **metric_expr, char **metric_name,
char **metric_group, unsigned long long eventcode,
- char **deprecated)
+ char **deprecated, char **metric_constraint)
{
/* try to find matching event from arch standard values */
struct event_struct *es;
@@ -515,7 +518,7 @@ int json_events(const char *fn,
char *pmu, char *unit, char *perpkg,
char *metric_expr,
char *metric_name, char *metric_group,
- char *deprecated),
+ char *deprecated, char *metric_constraint),
void *data)
{
int err;
@@ -545,6 +548,7 @@ int json_events(const char *fn,
char *metric_name = NULL;
char *metric_group = NULL;
char *deprecated = NULL;
+ char *metric_constraint = NULL;
char *arch_std = NULL;
unsigned long long eventcode = 0;
struct msrmap *msr = NULL;
@@ -629,6 +633,8 @@ int json_events(const char *fn,
addfield(map, &metric_name, "", "", val);
} else if (json_streq(map, field, "MetricGroup")) {
addfield(map, &metric_group, "", "", val);
+ } else if (json_streq(map, field, "MetricConstraint")) {
+ addfield(map, &metric_constraint, "", "", val);
} else if (json_streq(map, field, "MetricExpr")) {
addfield(map, &metric_expr, "", "", val);
for (s = metric_expr; *s; s++)
@@ -670,13 +676,13 @@ int json_events(const char *fn,
&long_desc, &pmu, &filter, &perpkg,
&unit, &metric_expr, &metric_name,
&metric_group, eventcode,
- &deprecated);
+ &deprecated, &metric_constraint);
if (err)
goto free_strings;
}
err = func(data, name, real_event(name, event), desc, long_desc,
pmu, unit, perpkg, metric_expr, metric_name,
- metric_group, deprecated);
+ metric_group, deprecated, metric_constraint);
free_strings:
free(event);
free(desc);
@@ -691,6 +697,7 @@ free_strings:
free(metric_expr);
free(metric_name);
free(metric_group);
+ free(metric_constraint);
free(arch_std);
if (err)
@@ -764,6 +771,19 @@ static void print_mapping_table_suffix(FILE *outfp)
fprintf(outfp, "};\n");
}
+static void print_mapping_test_table(FILE *outfp)
+{
+ /*
+ * Print the terminating, NULL entry.
+ */
+ fprintf(outfp, "{\n");
+ fprintf(outfp, "\t.cpuid = \"testcpu\",\n");
+ fprintf(outfp, "\t.version = \"v1\",\n");
+ fprintf(outfp, "\t.type = \"core\",\n");
+ fprintf(outfp, "\t.table = pme_test_cpu,\n");
+ fprintf(outfp, "},\n");
+}
+
static int process_mapfile(FILE *outfp, char *fpath)
{
int n = 16384;
@@ -841,6 +861,7 @@ static int process_mapfile(FILE *outfp, char *fpath)
}
out:
+ print_mapping_test_table(outfp);
print_mapping_table_suffix(outfp);
fclose(mapfp);
free(line);
@@ -1082,10 +1103,9 @@ static int process_one_file(const char *fpath, const struct stat *sb,
*/
int main(int argc, char *argv[])
{
- int rc;
+ int rc, ret = 0;
int maxfds;
char ldirname[PATH_MAX];
-
const char *arch;
const char *output_file;
const char *start_dirname;
@@ -1156,7 +1176,24 @@ int main(int argc, char *argv[])
/* Make build fail */
fclose(eventsfp);
free_arch_std_events();
- return 1;
+ ret = 1;
+ goto out_free_mapfile;
+ } else if (rc) {
+ goto empty_map;
+ }
+
+ sprintf(ldirname, "%s/test", start_dirname);
+
+ rc = nftw(ldirname, process_one_file, maxfds, 0);
+ if (rc && verbose) {
+ pr_info("%s: Error walking file tree %s rc=%d for test\n",
+ prog, ldirname, rc);
+ goto empty_map;
+ } else if (rc < 0) {
+ /* Make build fail */
+ free_arch_std_events();
+ ret = 1;
+ goto out_free_mapfile;
} else if (rc) {
goto empty_map;
}
@@ -1174,14 +1211,17 @@ int main(int argc, char *argv[])
/* Make build fail */
fclose(eventsfp);
free_arch_std_events();
- return 1;
+ ret = 1;
}
- return 0;
+
+ goto out_free_mapfile;
empty_map:
fclose(eventsfp);
create_empty_mapping(output_file);
free_arch_std_events();
- return 0;
+out_free_mapfile:
+ free(mapfile);
+ return ret;
}
diff --git a/tools/perf/pmu-events/jevents.h b/tools/perf/pmu-events/jevents.h
index 5cda49a42143..2afc8304529e 100644
--- a/tools/perf/pmu-events/jevents.h
+++ b/tools/perf/pmu-events/jevents.h
@@ -8,7 +8,7 @@ int json_events(const char *fn,
char *pmu,
char *unit, char *perpkg, char *metric_expr,
char *metric_name, char *metric_group,
- char *deprecated),
+ char *deprecated, char *metric_constraint),
void *data);
char *get_cpu_str(void);
diff --git a/tools/perf/pmu-events/pmu-events.h b/tools/perf/pmu-events/pmu-events.h
index caeb577d36c9..53e76d5d5b37 100644
--- a/tools/perf/pmu-events/pmu-events.h
+++ b/tools/perf/pmu-events/pmu-events.h
@@ -18,6 +18,7 @@ struct pmu_event {
const char *metric_name;
const char *metric_group;
const char *deprecated;
+ const char *metric_constraint;
};
/*
diff --git a/tools/perf/scripts/perl/check-perf-trace.pl b/tools/perf/scripts/perl/check-perf-trace.pl
index 4e7076c20616..d307ce8fd6ed 100644
--- a/tools/perf/scripts/perl/check-perf-trace.pl
+++ b/tools/perf/scripts/perl/check-perf-trace.pl
@@ -28,7 +28,7 @@ sub trace_end
sub irq::softirq_entry
{
my ($event_name, $context, $common_cpu, $common_secs, $common_nsecs,
- $common_pid, $common_comm,
+ $common_pid, $common_comm, $common_callchain,
$vec) = @_;
print_header($event_name, $common_cpu, $common_secs, $common_nsecs,
@@ -43,7 +43,7 @@ sub irq::softirq_entry
sub kmem::kmalloc
{
my ($event_name, $context, $common_cpu, $common_secs, $common_nsecs,
- $common_pid, $common_comm,
+ $common_pid, $common_comm, $common_callchain,
$call_site, $ptr, $bytes_req, $bytes_alloc,
$gfp_flags) = @_;
@@ -92,7 +92,7 @@ sub print_unhandled
sub trace_unhandled
{
my ($event_name, $context, $common_cpu, $common_secs, $common_nsecs,
- $common_pid, $common_comm) = @_;
+ $common_pid, $common_comm, $common_callchain) = @_;
$unhandled{$event_name}++;
}
diff --git a/tools/perf/scripts/perl/failed-syscalls.pl b/tools/perf/scripts/perl/failed-syscalls.pl
index 55e7ae4c5c88..05954a8f363a 100644
--- a/tools/perf/scripts/perl/failed-syscalls.pl
+++ b/tools/perf/scripts/perl/failed-syscalls.pl
@@ -18,7 +18,7 @@ my %failed_syscalls;
sub raw_syscalls::sys_exit
{
my ($event_name, $context, $common_cpu, $common_secs, $common_nsecs,
- $common_pid, $common_comm,
+ $common_pid, $common_comm, $common_callchain,
$id, $ret) = @_;
if ($ret < 0) {
diff --git a/tools/perf/scripts/perl/rw-by-file.pl b/tools/perf/scripts/perl/rw-by-file.pl
index 168fa5e94b44..92a750b8552b 100644
--- a/tools/perf/scripts/perl/rw-by-file.pl
+++ b/tools/perf/scripts/perl/rw-by-file.pl
@@ -28,7 +28,7 @@ my %writes;
sub syscalls::sys_enter_read
{
my ($event_name, $context, $common_cpu, $common_secs, $common_nsecs,
- $common_pid, $common_comm, $nr, $fd, $buf, $count) = @_;
+ $common_pid, $common_comm, $common_callchain, $nr, $fd, $buf, $count) = @_;
if ($common_comm eq $for_comm) {
$reads{$fd}{bytes_requested} += $count;
@@ -39,7 +39,7 @@ sub syscalls::sys_enter_read
sub syscalls::sys_enter_write
{
my ($event_name, $context, $common_cpu, $common_secs, $common_nsecs,
- $common_pid, $common_comm, $nr, $fd, $buf, $count) = @_;
+ $common_pid, $common_comm, $common_callchain, $nr, $fd, $buf, $count) = @_;
if ($common_comm eq $for_comm) {
$writes{$fd}{bytes_written} += $count;
@@ -98,7 +98,7 @@ sub print_unhandled
sub trace_unhandled
{
my ($event_name, $context, $common_cpu, $common_secs, $common_nsecs,
- $common_pid, $common_comm) = @_;
+ $common_pid, $common_comm, $common_callchain) = @_;
$unhandled{$event_name}++;
}
diff --git a/tools/perf/scripts/perl/rw-by-pid.pl b/tools/perf/scripts/perl/rw-by-pid.pl
index 495698250b2f..d789fe39caab 100644
--- a/tools/perf/scripts/perl/rw-by-pid.pl
+++ b/tools/perf/scripts/perl/rw-by-pid.pl
@@ -24,7 +24,7 @@ my %writes;
sub syscalls::sys_exit_read
{
my ($event_name, $context, $common_cpu, $common_secs, $common_nsecs,
- $common_pid, $common_comm,
+ $common_pid, $common_comm, $common_callchain,
$nr, $ret) = @_;
if ($ret > 0) {
@@ -40,7 +40,7 @@ sub syscalls::sys_exit_read
sub syscalls::sys_enter_read
{
my ($event_name, $context, $common_cpu, $common_secs, $common_nsecs,
- $common_pid, $common_comm,
+ $common_pid, $common_comm, $common_callchain,
$nr, $fd, $buf, $count) = @_;
$reads{$common_pid}{bytes_requested} += $count;
@@ -51,7 +51,7 @@ sub syscalls::sys_enter_read
sub syscalls::sys_exit_write
{
my ($event_name, $context, $common_cpu, $common_secs, $common_nsecs,
- $common_pid, $common_comm,
+ $common_pid, $common_comm, $common_callchain,
$nr, $ret) = @_;
if ($ret <= 0) {
@@ -62,7 +62,7 @@ sub syscalls::sys_exit_write
sub syscalls::sys_enter_write
{
my ($event_name, $context, $common_cpu, $common_secs, $common_nsecs,
- $common_pid, $common_comm,
+ $common_pid, $common_comm, $common_callchain,
$nr, $fd, $buf, $count) = @_;
$writes{$common_pid}{bytes_written} += $count;
@@ -178,7 +178,7 @@ sub print_unhandled
sub trace_unhandled
{
my ($event_name, $context, $common_cpu, $common_secs, $common_nsecs,
- $common_pid, $common_comm) = @_;
+ $common_pid, $common_comm, $common_callchain) = @_;
$unhandled{$event_name}++;
}
diff --git a/tools/perf/scripts/perl/rwtop.pl b/tools/perf/scripts/perl/rwtop.pl
index 6473442568a2..eba4df67af6b 100644
--- a/tools/perf/scripts/perl/rwtop.pl
+++ b/tools/perf/scripts/perl/rwtop.pl
@@ -35,7 +35,7 @@ if (!$interval) {
sub syscalls::sys_exit_read
{
my ($event_name, $context, $common_cpu, $common_secs, $common_nsecs,
- $common_pid, $common_comm,
+ $common_pid, $common_comm, $common_callchain,
$nr, $ret) = @_;
print_check();
@@ -53,7 +53,7 @@ sub syscalls::sys_exit_read
sub syscalls::sys_enter_read
{
my ($event_name, $context, $common_cpu, $common_secs, $common_nsecs,
- $common_pid, $common_comm,
+ $common_pid, $common_comm, $common_callchain,
$nr, $fd, $buf, $count) = @_;
print_check();
@@ -66,7 +66,7 @@ sub syscalls::sys_enter_read
sub syscalls::sys_exit_write
{
my ($event_name, $context, $common_cpu, $common_secs, $common_nsecs,
- $common_pid, $common_comm,
+ $common_pid, $common_comm, $common_callchain,
$nr, $ret) = @_;
print_check();
@@ -79,7 +79,7 @@ sub syscalls::sys_exit_write
sub syscalls::sys_enter_write
{
my ($event_name, $context, $common_cpu, $common_secs, $common_nsecs,
- $common_pid, $common_comm,
+ $common_pid, $common_comm, $common_callchain,
$nr, $fd, $buf, $count) = @_;
print_check();
@@ -197,7 +197,7 @@ sub print_unhandled
sub trace_unhandled
{
my ($event_name, $context, $common_cpu, $common_secs, $common_nsecs,
- $common_pid, $common_comm) = @_;
+ $common_pid, $common_comm, $common_callchain) = @_;
$unhandled{$event_name}++;
}
diff --git a/tools/perf/scripts/perl/wakeup-latency.pl b/tools/perf/scripts/perl/wakeup-latency.pl
index efcfec5e347a..53444ff4ec7f 100644
--- a/tools/perf/scripts/perl/wakeup-latency.pl
+++ b/tools/perf/scripts/perl/wakeup-latency.pl
@@ -28,7 +28,7 @@ my $total_wakeups = 0;
sub sched::sched_switch
{
my ($event_name, $context, $common_cpu, $common_secs, $common_nsecs,
- $common_pid, $common_comm,
+ $common_pid, $common_comm, $common_callchain,
$prev_comm, $prev_pid, $prev_prio, $prev_state, $next_comm, $next_pid,
$next_prio) = @_;
@@ -51,7 +51,7 @@ sub sched::sched_switch
sub sched::sched_wakeup
{
my ($event_name, $context, $common_cpu, $common_secs, $common_nsecs,
- $common_pid, $common_comm,
+ $common_pid, $common_comm, $common_callchain,
$comm, $pid, $prio, $success, $target_cpu) = @_;
$last_wakeup{$target_cpu}{ts} = nsecs($common_secs, $common_nsecs);
@@ -101,7 +101,7 @@ sub print_unhandled
sub trace_unhandled
{
my ($event_name, $context, $common_cpu, $common_secs, $common_nsecs,
- $common_pid, $common_comm) = @_;
+ $common_pid, $common_comm, $common_callchain) = @_;
$unhandled{$event_name}++;
}
diff --git a/tools/perf/tests/.gitignore b/tools/perf/tests/.gitignore
index 8cc30e731c73..d053b325f728 100644
--- a/tools/perf/tests/.gitignore
+++ b/tools/perf/tests/.gitignore
@@ -1,3 +1,4 @@
+# SPDX-License-Identifier: GPL-2.0-only
llvm-src-base.c
llvm-src-kbuild.c
llvm-src-prologue.c
diff --git a/tools/perf/tests/Build b/tools/perf/tests/Build
index 1692529639b0..b3d1bf13ca07 100644
--- a/tools/perf/tests/Build
+++ b/tools/perf/tests/Build
@@ -14,6 +14,7 @@ perf-y += evsel-roundtrip-name.o
perf-y += evsel-tp-sched.o
perf-y += fdarray.o
perf-y += pmu.o
+perf-y += pmu-events.o
perf-y += hists_common.o
perf-y += hists_link.o
perf-y += hists_filter.o
diff --git a/tools/perf/tests/bp_account.c b/tools/perf/tests/bp_account.c
index d0b935356274..489b50604cf2 100644
--- a/tools/perf/tests/bp_account.c
+++ b/tools/perf/tests/bp_account.c
@@ -19,7 +19,7 @@
#include "../perf-sys.h"
#include "cloexec.h"
-volatile long the_var;
+static volatile long the_var;
static noinline int test_function(void)
{
diff --git a/tools/perf/tests/bp_signal.c b/tools/perf/tests/bp_signal.c
index 415903b48578..da8ec1e8e064 100644
--- a/tools/perf/tests/bp_signal.c
+++ b/tools/perf/tests/bp_signal.c
@@ -263,20 +263,20 @@ int test__bp_signal(struct test *test __maybe_unused, int subtest __maybe_unused
if (count1 == 11)
pr_debug("failed: RF EFLAG recursion issue detected\n");
else
- pr_debug("failed: wrong count for bp1%lld\n", count1);
+ pr_debug("failed: wrong count for bp1: %lld, expected 1\n", count1);
}
if (overflows != 3)
- pr_debug("failed: wrong overflow hit\n");
+ pr_debug("failed: wrong overflow (%d) hit, expected 3\n", overflows);
if (overflows_2 != 3)
- pr_debug("failed: wrong overflow_2 hit\n");
+ pr_debug("failed: wrong overflow_2 (%d) hit, expected 3\n", overflows_2);
if (count2 != 3)
- pr_debug("failed: wrong count for bp2\n");
+ pr_debug("failed: wrong count for bp2 (%lld), expected 3\n", count2);
if (count3 != 2)
- pr_debug("failed: wrong count for bp3\n");
+ pr_debug("failed: wrong count for bp3 (%lld), expected 2\n", count3);
return count1 == 1 && overflows == 3 && count2 == 3 && overflows_2 == 3 && count3 == 2 ?
TEST_OK : TEST_FAIL;
diff --git a/tools/perf/tests/builtin-test.c b/tools/perf/tests/builtin-test.c
index 5f05db75cdd8..b6322eb0f423 100644
--- a/tools/perf/tests/builtin-test.c
+++ b/tools/perf/tests/builtin-test.c
@@ -73,6 +73,10 @@ static struct test generic_tests[] = {
.func = test__pmu,
},
{
+ .desc = "PMU events",
+ .func = test__pmu_events,
+ },
+ {
.desc = "DSO data read",
.func = test__dso_data,
},
@@ -543,8 +547,11 @@ static int run_shell_tests(int argc, const char *argv[], int i, int width)
return -1;
dir = opendir(st.dir);
- if (!dir)
+ if (!dir) {
+ pr_err("failed to open shell test directory: %s\n",
+ st.dir);
return -1;
+ }
for_each_shell_test(dir, st.dir, ent) {
int curr = i++;
diff --git a/tools/perf/tests/cpumap.c b/tools/perf/tests/cpumap.c
index 4ac56741ac5f..29c793ac7d10 100644
--- a/tools/perf/tests/cpumap.c
+++ b/tools/perf/tests/cpumap.c
@@ -131,7 +131,6 @@ int test__cpu_map_merge(struct test *test __maybe_unused, int subtest __maybe_un
TEST_ASSERT_VAL("failed to merge map: bad nr", c->nr == 5);
cpu_map__snprint(c, buf, sizeof(buf));
TEST_ASSERT_VAL("failed to merge map: bad result", !strcmp(buf, "1-2,4-5,7"));
- perf_cpu_map__put(a);
perf_cpu_map__put(b);
perf_cpu_map__put(c);
return 0;
diff --git a/tools/perf/tests/expr.c b/tools/perf/tests/expr.c
index 87843af4c118..28313e59d6f6 100644
--- a/tools/perf/tests/expr.c
+++ b/tools/perf/tests/expr.c
@@ -10,7 +10,7 @@ static int test(struct parse_ctx *ctx, const char *e, double val2)
{
double val;
- if (expr__parse(&val, ctx, &e))
+ if (expr__parse(&val, ctx, e))
TEST_ASSERT_VAL("parse test failed", 0);
TEST_ASSERT_VAL("unexpected value", val == val2);
return 0;
@@ -44,12 +44,12 @@ int test__expr(struct test *t __maybe_unused, int subtest __maybe_unused)
return ret;
p = "FOO/0";
- ret = expr__parse(&val, &ctx, &p);
- TEST_ASSERT_VAL("division by zero", ret == 1);
+ ret = expr__parse(&val, &ctx, p);
+ TEST_ASSERT_VAL("division by zero", ret == -1);
p = "BAR/";
- ret = expr__parse(&val, &ctx, &p);
- TEST_ASSERT_VAL("missing operand", ret == 1);
+ ret = expr__parse(&val, &ctx, p);
+ TEST_ASSERT_VAL("missing operand", ret == -1);
TEST_ASSERT_VAL("find other",
expr__find_other("FOO + BAR + BAZ + BOZO", "FOO", &other, &num_other) == 0);
diff --git a/tools/perf/tests/make b/tools/perf/tests/make
index c850d1664c56..5d0c3a9c47a1 100644
--- a/tools/perf/tests/make
+++ b/tools/perf/tests/make
@@ -28,9 +28,13 @@ endif
PARALLEL_OPT=
ifeq ($(SET_PARALLEL),1)
- cores := $(shell (getconf _NPROCESSORS_ONLN || egrep -c '^processor|^CPU[0-9]' /proc/cpuinfo) 2>/dev/null)
- ifeq ($(cores),0)
- cores := 1
+ ifeq ($(JOBS),)
+ cores := $(shell (getconf _NPROCESSORS_ONLN || egrep -c '^processor|^CPU[0-9]' /proc/cpuinfo) 2>/dev/null)
+ ifeq ($(cores),0)
+ cores := 1
+ endif
+ else
+ cores=$(JOBS)
endif
PARALLEL_OPT="-j$(cores)"
endif
diff --git a/tools/perf/tests/pmu-events.c b/tools/perf/tests/pmu-events.c
new file mode 100644
index 000000000000..d64261da8bf7
--- /dev/null
+++ b/tools/perf/tests/pmu-events.c
@@ -0,0 +1,379 @@
+// SPDX-License-Identifier: GPL-2.0
+#include "parse-events.h"
+#include "pmu.h"
+#include "tests.h"
+#include <errno.h>
+#include <stdio.h>
+#include <linux/kernel.h>
+#include <linux/zalloc.h>
+#include "debug.h"
+#include "../pmu-events/pmu-events.h"
+
+struct perf_pmu_test_event {
+ struct pmu_event event;
+
+ /* extra events for aliases */
+ const char *alias_str;
+
+ /*
+ * Note: For when PublicDescription does not exist in the JSON, we
+ * will have no long_desc in pmu_event.long_desc, but long_desc may
+ * be set in the alias.
+ */
+ const char *alias_long_desc;
+};
+
+static struct perf_pmu_test_event test_cpu_events[] = {
+ {
+ .event = {
+ .name = "bp_l1_btb_correct",
+ .event = "event=0x8a",
+ .desc = "L1 BTB Correction",
+ .topic = "branch",
+ },
+ .alias_str = "event=0x8a",
+ .alias_long_desc = "L1 BTB Correction",
+ },
+ {
+ .event = {
+ .name = "bp_l2_btb_correct",
+ .event = "event=0x8b",
+ .desc = "L2 BTB Correction",
+ .topic = "branch",
+ },
+ .alias_str = "event=0x8b",
+ .alias_long_desc = "L2 BTB Correction",
+ },
+ {
+ .event = {
+ .name = "segment_reg_loads.any",
+ .event = "umask=0x80,period=200000,event=0x6",
+ .desc = "Number of segment register loads",
+ .topic = "other",
+ },
+ .alias_str = "umask=0x80,(null)=0x30d40,event=0x6",
+ .alias_long_desc = "Number of segment register loads",
+ },
+ {
+ .event = {
+ .name = "dispatch_blocked.any",
+ .event = "umask=0x20,period=200000,event=0x9",
+ .desc = "Memory cluster signals to block micro-op dispatch for any reason",
+ .topic = "other",
+ },
+ .alias_str = "umask=0x20,(null)=0x30d40,event=0x9",
+ .alias_long_desc = "Memory cluster signals to block micro-op dispatch for any reason",
+ },
+ {
+ .event = {
+ .name = "eist_trans",
+ .event = "umask=0x0,period=200000,event=0x3a",
+ .desc = "Number of Enhanced Intel SpeedStep(R) Technology (EIST) transitions",
+ .topic = "other",
+ },
+ .alias_str = "umask=0,(null)=0x30d40,event=0x3a",
+ .alias_long_desc = "Number of Enhanced Intel SpeedStep(R) Technology (EIST) transitions",
+ },
+ { /* sentinel */
+ .event = {
+ .name = NULL,
+ },
+ },
+};
+
+static struct perf_pmu_test_event test_uncore_events[] = {
+ {
+ .event = {
+ .name = "uncore_hisi_ddrc.flux_wcmd",
+ .event = "event=0x2",
+ .desc = "DDRC write commands. Unit: hisi_sccl,ddrc ",
+ .topic = "uncore",
+ .long_desc = "DDRC write commands",
+ .pmu = "hisi_sccl,ddrc",
+ },
+ .alias_str = "event=0x2",
+ .alias_long_desc = "DDRC write commands",
+ },
+ {
+ .event = {
+ .name = "unc_cbo_xsnp_response.miss_eviction",
+ .event = "umask=0x81,event=0x22",
+ .desc = "Unit: uncore_cbox A cross-core snoop resulted from L3 Eviction which misses in some processor core",
+ .topic = "uncore",
+ .long_desc = "A cross-core snoop resulted from L3 Eviction which misses in some processor core",
+ .pmu = "uncore_cbox",
+ },
+ .alias_str = "umask=0x81,event=0x22",
+ .alias_long_desc = "A cross-core snoop resulted from L3 Eviction which misses in some processor core",
+ },
+ { /* sentinel */
+ .event = {
+ .name = NULL,
+ },
+ }
+};
+
+const int total_test_events_size = ARRAY_SIZE(test_uncore_events);
+
+static bool is_same(const char *reference, const char *test)
+{
+ if (!reference && !test)
+ return true;
+
+ if (reference && !test)
+ return false;
+
+ if (!reference && test)
+ return false;
+
+ return !strcmp(reference, test);
+}
+
+static struct pmu_events_map *__test_pmu_get_events_map(void)
+{
+ struct pmu_events_map *map;
+
+ for (map = &pmu_events_map[0]; map->cpuid; map++) {
+ if (!strcmp(map->cpuid, "testcpu"))
+ return map;
+ }
+
+ pr_err("could not find test events map\n");
+
+ return NULL;
+}
+
+/* Verify generated events from pmu-events.c is as expected */
+static int __test_pmu_event_table(void)
+{
+ struct pmu_events_map *map = __test_pmu_get_events_map();
+ struct pmu_event *table;
+ int map_events = 0, expected_events;
+
+ /* ignore 2x sentinels */
+ expected_events = ARRAY_SIZE(test_cpu_events) +
+ ARRAY_SIZE(test_uncore_events) - 2;
+
+ if (!map)
+ return -1;
+
+ for (table = map->table; table->name; table++) {
+ struct perf_pmu_test_event *test;
+ struct pmu_event *te;
+ bool found = false;
+
+ if (table->pmu)
+ test = &test_uncore_events[0];
+ else
+ test = &test_cpu_events[0];
+
+ te = &test->event;
+
+ for (; te->name; test++, te = &test->event) {
+ if (strcmp(table->name, te->name))
+ continue;
+ found = true;
+ map_events++;
+
+ if (!is_same(table->desc, te->desc)) {
+ pr_debug2("testing event table %s: mismatched desc, %s vs %s\n",
+ table->name, table->desc, te->desc);
+ return -1;
+ }
+
+ if (!is_same(table->topic, te->topic)) {
+ pr_debug2("testing event table %s: mismatched topic, %s vs %s\n",
+ table->name, table->topic,
+ te->topic);
+ return -1;
+ }
+
+ if (!is_same(table->long_desc, te->long_desc)) {
+ pr_debug2("testing event table %s: mismatched long_desc, %s vs %s\n",
+ table->name, table->long_desc,
+ te->long_desc);
+ return -1;
+ }
+
+ if (!is_same(table->unit, te->unit)) {
+ pr_debug2("testing event table %s: mismatched unit, %s vs %s\n",
+ table->name, table->unit,
+ te->unit);
+ return -1;
+ }
+
+ if (!is_same(table->perpkg, te->perpkg)) {
+ pr_debug2("testing event table %s: mismatched perpkg, %s vs %s\n",
+ table->name, table->perpkg,
+ te->perpkg);
+ return -1;
+ }
+
+ if (!is_same(table->metric_expr, te->metric_expr)) {
+ pr_debug2("testing event table %s: mismatched metric_expr, %s vs %s\n",
+ table->name, table->metric_expr,
+ te->metric_expr);
+ return -1;
+ }
+
+ if (!is_same(table->metric_name, te->metric_name)) {
+ pr_debug2("testing event table %s: mismatched metric_name, %s vs %s\n",
+ table->name, table->metric_name,
+ te->metric_name);
+ return -1;
+ }
+
+ if (!is_same(table->deprecated, te->deprecated)) {
+ pr_debug2("testing event table %s: mismatched deprecated, %s vs %s\n",
+ table->name, table->deprecated,
+ te->deprecated);
+ return -1;
+ }
+
+ pr_debug("testing event table %s: pass\n", table->name);
+ }
+
+ if (!found) {
+ pr_err("testing event table: could not find event %s\n",
+ table->name);
+ return -1;
+ }
+ }
+
+ if (map_events != expected_events) {
+ pr_err("testing event table: found %d, but expected %d\n",
+ map_events, expected_events);
+ return -1;
+ }
+
+ return 0;
+}
+
+static struct perf_pmu_alias *find_alias(const char *test_event, struct list_head *aliases)
+{
+ struct perf_pmu_alias *alias;
+
+ list_for_each_entry(alias, aliases, list)
+ if (!strcmp(test_event, alias->name))
+ return alias;
+
+ return NULL;
+}
+
+/* Verify aliases are as expected */
+static int __test__pmu_event_aliases(char *pmu_name, int *count)
+{
+ struct perf_pmu_test_event *test;
+ struct pmu_event *te;
+ struct perf_pmu *pmu;
+ LIST_HEAD(aliases);
+ int res = 0;
+ bool use_uncore_table;
+ struct pmu_events_map *map = __test_pmu_get_events_map();
+
+ if (!map)
+ return -1;
+
+ if (is_pmu_core(pmu_name)) {
+ test = &test_cpu_events[0];
+ use_uncore_table = false;
+ } else {
+ test = &test_uncore_events[0];
+ use_uncore_table = true;
+ }
+
+ pmu = zalloc(sizeof(*pmu));
+ if (!pmu)
+ return -1;
+
+ pmu->name = pmu_name;
+
+ pmu_add_cpu_aliases_map(&aliases, pmu, map);
+
+ for (te = &test->event; te->name; test++, te = &test->event) {
+ struct perf_pmu_alias *alias = find_alias(te->name, &aliases);
+
+ if (!alias) {
+ bool uncore_match = pmu_uncore_alias_match(pmu_name,
+ te->pmu);
+
+ if (use_uncore_table && !uncore_match) {
+ pr_debug3("testing aliases PMU %s: skip matching alias %s\n",
+ pmu_name, te->name);
+ continue;
+ }
+
+ pr_debug2("testing aliases PMU %s: no alias, alias_table->name=%s\n",
+ pmu_name, te->name);
+ res = -1;
+ break;
+ }
+
+ if (!is_same(alias->desc, te->desc)) {
+ pr_debug2("testing aliases PMU %s: mismatched desc, %s vs %s\n",
+ pmu_name, alias->desc, te->desc);
+ res = -1;
+ break;
+ }
+
+ if (!is_same(alias->long_desc, test->alias_long_desc)) {
+ pr_debug2("testing aliases PMU %s: mismatched long_desc, %s vs %s\n",
+ pmu_name, alias->long_desc,
+ test->alias_long_desc);
+ res = -1;
+ break;
+ }
+
+ if (!is_same(alias->str, test->alias_str)) {
+ pr_debug2("testing aliases PMU %s: mismatched str, %s vs %s\n",
+ pmu_name, alias->str, test->alias_str);
+ res = -1;
+ break;
+ }
+
+ if (!is_same(alias->topic, te->topic)) {
+ pr_debug2("testing aliases PMU %s: mismatched topic, %s vs %s\n",
+ pmu_name, alias->topic, te->topic);
+ res = -1;
+ break;
+ }
+
+ (*count)++;
+ pr_debug2("testing aliases PMU %s: matched event %s\n",
+ pmu_name, alias->name);
+ }
+
+ free(pmu);
+ return res;
+}
+
+int test__pmu_events(struct test *test __maybe_unused,
+ int subtest __maybe_unused)
+{
+ struct perf_pmu *pmu = NULL;
+
+ if (__test_pmu_event_table())
+ return -1;
+
+ while ((pmu = perf_pmu__scan(pmu)) != NULL) {
+ int count = 0;
+
+ if (list_empty(&pmu->format)) {
+ pr_debug2("skipping testing PMU %s\n", pmu->name);
+ continue;
+ }
+
+ if (__test__pmu_event_aliases(pmu->name, &count)) {
+ pr_debug("testing PMU %s aliases: failed\n", pmu->name);
+ return -1;
+ }
+
+ if (count == 0)
+ pr_debug3("testing PMU %s aliases: no events to match\n",
+ pmu->name);
+ else
+ pr_debug("testing PMU %s aliases: pass\n", pmu->name);
+ }
+
+ return 0;
+}
diff --git a/tools/perf/tests/sample-parsing.c b/tools/perf/tests/sample-parsing.c
index 2762e1155238..61865699c3f4 100644
--- a/tools/perf/tests/sample-parsing.c
+++ b/tools/perf/tests/sample-parsing.c
@@ -99,6 +99,7 @@ static bool samples_same(const struct perf_sample *s1,
if (type & PERF_SAMPLE_BRANCH_STACK) {
COMP(branch_stack->nr);
+ COMP(branch_stack->hw_idx);
for (i = 0; i < s1->branch_stack->nr; i++)
MCOMP(branch_stack->entries[i]);
}
@@ -150,6 +151,9 @@ static bool samples_same(const struct perf_sample *s1,
if (type & PERF_SAMPLE_PHYS_ADDR)
COMP(phys_addr);
+ if (type & PERF_SAMPLE_CGROUP)
+ COMP(cgroup);
+
if (type & PERF_SAMPLE_AUX) {
COMP(aux_sample.size);
if (memcmp(s1->aux_sample.data, s2->aux_sample.data,
@@ -186,7 +190,7 @@ static int do_test(u64 sample_type, u64 sample_regs, u64 read_format)
u64 data[64];
} branch_stack = {
/* 1 branch_entry */
- .data = {1, 211, 212, 213},
+ .data = {1, -1ULL, 211, 212, 213},
};
u64 regs[64];
const u64 raw_data[] = {0x123456780a0b0c0dULL, 0x1102030405060708ULL};
@@ -208,6 +212,7 @@ static int do_test(u64 sample_type, u64 sample_regs, u64 read_format)
.transaction = 112,
.raw_data = (void *)raw_data,
.callchain = &callchain.callchain,
+ .no_hw_idx = false,
.branch_stack = &branch_stack.branch_stack,
.user_regs = {
.abi = PERF_SAMPLE_REGS_ABI_64,
@@ -228,6 +233,7 @@ static int do_test(u64 sample_type, u64 sample_regs, u64 read_format)
.regs = regs,
},
.phys_addr = 113,
+ .cgroup = 114,
.aux_sample = {
.size = sizeof(aux_data),
.data = (void *)aux_data,
@@ -244,6 +250,9 @@ static int do_test(u64 sample_type, u64 sample_regs, u64 read_format)
if (sample_type & PERF_SAMPLE_REGS_INTR)
evsel.core.attr.sample_regs_intr = sample_regs;
+ if (sample_type & PERF_SAMPLE_BRANCH_STACK)
+ evsel.core.attr.branch_sample_type |= PERF_SAMPLE_BRANCH_HW_INDEX;
+
for (i = 0; i < sizeof(regs); i++)
*(i + (u8 *)regs) = i & 0xfe;
@@ -331,7 +340,7 @@ int test__sample_parsing(struct test *test __maybe_unused, int subtest __maybe_u
* were added. Please actually update the test rather than just change
* the condition below.
*/
- if (PERF_SAMPLE_MAX > PERF_SAMPLE_AUX << 1) {
+ if (PERF_SAMPLE_MAX > PERF_SAMPLE_CGROUP << 1) {
pr_debug("sample format has changed, some new PERF_SAMPLE_ bit was introduced - test needs updating\n");
return -1;
}
diff --git a/tools/perf/tests/shell/lib/probe_vfs_getname.sh b/tools/perf/tests/shell/lib/probe_vfs_getname.sh
index 7cb99b433888..c2cc42daf924 100644
--- a/tools/perf/tests/shell/lib/probe_vfs_getname.sh
+++ b/tools/perf/tests/shell/lib/probe_vfs_getname.sh
@@ -14,7 +14,7 @@ add_probe_vfs_getname() {
if [ $had_vfs_getname -eq 1 ] ; then
line=$(perf probe -L getname_flags 2>&1 | egrep 'result.*=.*filename;' | sed -r 's/[[:space:]]+([[:digit:]]+)[[:space:]]+result->uptr.*/\1/')
perf probe -q "vfs_getname=getname_flags:${line} pathname=result->name:string" || \
- perf probe $verbose "vfs_getname=getname_flags:${line} pathname=filename:string"
+ perf probe $verbose "vfs_getname=getname_flags:${line} pathname=filename:ustring"
fi
}
diff --git a/tools/perf/tests/tests.h b/tools/perf/tests/tests.h
index 9a160fef47c9..61a1ab032080 100644
--- a/tools/perf/tests/tests.h
+++ b/tools/perf/tests/tests.h
@@ -49,6 +49,7 @@ int test__perf_evsel__roundtrip_name_test(struct test *test, int subtest);
int test__perf_evsel__tp_sched_test(struct test *test, int subtest);
int test__syscall_openat_tp_fields(struct test *test, int subtest);
int test__pmu(struct test *test, int subtest);
+int test__pmu_events(struct test *test, int subtest);
int test__attr(struct test *test, int subtest);
int test__dso_data(struct test *test, int subtest);
int test__dso_data_cache(struct test *test, int subtest);
diff --git a/tools/perf/trace/beauty/beauty.h b/tools/perf/trace/beauty/beauty.h
index 5a61043c2ff7..d6dfe68a7612 100644
--- a/tools/perf/trace/beauty/beauty.h
+++ b/tools/perf/trace/beauty/beauty.h
@@ -213,6 +213,8 @@ size_t syscall_arg__scnprintf_x86_arch_prctl_code(char *bf, size_t size, struct
size_t syscall_arg__scnprintf_prctl_option(char *bf, size_t size, struct syscall_arg *arg);
#define SCA_PRCTL_OPTION syscall_arg__scnprintf_prctl_option
+extern struct strarray strarray__prctl_options;
+
size_t syscall_arg__scnprintf_prctl_arg2(char *bf, size_t size, struct syscall_arg *arg);
#define SCA_PRCTL_ARG2 syscall_arg__scnprintf_prctl_arg2
diff --git a/tools/perf/trace/beauty/clone.c b/tools/perf/trace/beauty/clone.c
index 062ca849c8fd..f4db894e0af6 100644
--- a/tools/perf/trace/beauty/clone.c
+++ b/tools/perf/trace/beauty/clone.c
@@ -46,6 +46,7 @@ static size_t clone__scnprintf_flags(unsigned long flags, char *bf, size_t size,
P_FLAG(NEWNET);
P_FLAG(IO);
P_FLAG(CLEAR_SIGHAND);
+ P_FLAG(INTO_CGROUP);
#undef P_FLAG
if (flags)
diff --git a/tools/perf/trace/beauty/mmap.c b/tools/perf/trace/beauty/mmap.c
index 9fa771a90d79..862c8331dded 100644
--- a/tools/perf/trace/beauty/mmap.c
+++ b/tools/perf/trace/beauty/mmap.c
@@ -69,6 +69,7 @@ static size_t syscall_arg__scnprintf_mremap_flags(char *bf, size_t size,
P_MREMAP_FLAG(MAYMOVE);
P_MREMAP_FLAG(FIXED);
+ P_MREMAP_FLAG(DONTUNMAP);
#undef P_MREMAP_FLAG
if (flags)
diff --git a/tools/perf/trace/beauty/prctl.c b/tools/perf/trace/beauty/prctl.c
index ba2179abed00..6fe5ad5f5d3a 100644
--- a/tools/perf/trace/beauty/prctl.c
+++ b/tools/perf/trace/beauty/prctl.c
@@ -11,9 +11,10 @@
#include "trace/beauty/generated/prctl_option_array.c"
+DEFINE_STRARRAY(prctl_options, "PR_");
+
static size_t prctl__scnprintf_option(int option, char *bf, size_t size, bool show_prefix)
{
- static DEFINE_STRARRAY(prctl_options, "PR_");
return strarray__scnprintf(&strarray__prctl_options, bf, size, "%d", show_prefix, option);
}
diff --git a/tools/perf/trace/beauty/sockaddr.c b/tools/perf/trace/beauty/sockaddr.c
index 173c8f760763..e0c13e6a5788 100644
--- a/tools/perf/trace/beauty/sockaddr.c
+++ b/tools/perf/trace/beauty/sockaddr.c
@@ -72,5 +72,5 @@ size_t syscall_arg__scnprintf_sockaddr(char *bf, size_t size, struct syscall_arg
if (arg->augmented.args)
return syscall_arg__scnprintf_augmented_sockaddr(arg, bf, size);
- return scnprintf(bf, size, "%#x", arg->val);
+ return scnprintf(bf, size, "%#lx", arg->val);
}
diff --git a/tools/perf/ui/browsers/annotate.c b/tools/perf/ui/browsers/annotate.c
index badbddbb30f8..9023267e5643 100644
--- a/tools/perf/ui/browsers/annotate.c
+++ b/tools/perf/ui/browsers/annotate.c
@@ -754,10 +754,9 @@ static int annotate_browser__run(struct annotate_browser *browser,
"? Search string backwards\n");
continue;
case 'r':
- {
- script_browse(NULL, NULL);
- continue;
- }
+ script_browse(NULL, NULL);
+ annotate_browser__show(&browser->b, title, help);
+ continue;
case 'k':
notes->options->show_linenr = !notes->options->show_linenr;
break;
@@ -834,13 +833,13 @@ show_sup_ins:
map_symbol__annotation_dump(ms, evsel, browser->opts);
continue;
case 't':
- if (notes->options->show_total_period) {
- notes->options->show_total_period = false;
- notes->options->show_nr_samples = true;
- } else if (notes->options->show_nr_samples)
- notes->options->show_nr_samples = false;
+ if (symbol_conf.show_total_period) {
+ symbol_conf.show_total_period = false;
+ symbol_conf.show_nr_samples = true;
+ } else if (symbol_conf.show_nr_samples)
+ symbol_conf.show_nr_samples = false;
else
- notes->options->show_total_period = true;
+ symbol_conf.show_total_period = true;
annotation__update_column_widths(notes);
continue;
case 'c':
diff --git a/tools/perf/ui/browsers/hists.c b/tools/perf/ui/browsers/hists.c
index d4d3558fdef4..487e54ef56a9 100644
--- a/tools/perf/ui/browsers/hists.c
+++ b/tools/perf/ui/browsers/hists.c
@@ -18,7 +18,9 @@
#include "../../util/evlist.h"
#include "../../util/header.h"
#include "../../util/hist.h"
+#include "../../util/machine.h"
#include "../../util/map.h"
+#include "../../util/maps.h"
#include "../../util/symbol.h"
#include "../../util/map_symbol.h"
#include "../../util/branch.h"
@@ -391,6 +393,57 @@ static void hist_entry__init_have_children(struct hist_entry *he)
he->init_have_children = true;
}
+static bool hist_browser__selection_has_children(struct hist_browser *browser)
+{
+ struct hist_entry *he = browser->he_selection;
+ struct map_symbol *ms = browser->selection;
+
+ if (!he || !ms)
+ return false;
+
+ if (ms == &he->ms)
+ return he->has_children;
+
+ return container_of(ms, struct callchain_list, ms)->has_children;
+}
+
+static bool hist_browser__he_selection_unfolded(struct hist_browser *browser)
+{
+ return browser->he_selection ? browser->he_selection->unfolded : false;
+}
+
+static bool hist_browser__selection_unfolded(struct hist_browser *browser)
+{
+ struct hist_entry *he = browser->he_selection;
+ struct map_symbol *ms = browser->selection;
+
+ if (!he || !ms)
+ return false;
+
+ if (ms == &he->ms)
+ return he->unfolded;
+
+ return container_of(ms, struct callchain_list, ms)->unfolded;
+}
+
+static char *hist_browser__selection_sym_name(struct hist_browser *browser, char *bf, size_t size)
+{
+ struct hist_entry *he = browser->he_selection;
+ struct map_symbol *ms = browser->selection;
+ struct callchain_list *callchain_entry;
+
+ if (!he || !ms)
+ return NULL;
+
+ if (ms == &he->ms) {
+ hist_entry__sym_snprintf(he, bf, size, 0);
+ return bf + 4; // skip the level, e.g. '[k] '
+ }
+
+ callchain_entry = container_of(ms, struct callchain_list, ms);
+ return callchain_list__sym_name(callchain_entry, bf, size, browser->show_dso);
+}
+
static bool hist_browser__toggle_fold(struct hist_browser *browser)
{
struct hist_entry *he = browser->he_selection;
@@ -624,10 +677,81 @@ static int hist_browser__title(struct hist_browser *browser, char *bf, size_t si
return browser->title ? browser->title(browser, bf, size) : 0;
}
+static int hist_browser__handle_hotkey(struct hist_browser *browser, bool warn_lost_event, char *title, size_t size, int key)
+{
+ switch (key) {
+ case K_TIMER: {
+ struct hist_browser_timer *hbt = browser->hbt;
+ u64 nr_entries;
+
+ WARN_ON_ONCE(!hbt);
+
+ if (hbt)
+ hbt->timer(hbt->arg);
+
+ if (hist_browser__has_filter(browser) || symbol_conf.report_hierarchy)
+ hist_browser__update_nr_entries(browser);
+
+ nr_entries = hist_browser__nr_entries(browser);
+ ui_browser__update_nr_entries(&browser->b, nr_entries);
+
+ if (warn_lost_event &&
+ (browser->hists->stats.nr_lost_warned !=
+ browser->hists->stats.nr_events[PERF_RECORD_LOST])) {
+ browser->hists->stats.nr_lost_warned =
+ browser->hists->stats.nr_events[PERF_RECORD_LOST];
+ ui_browser__warn_lost_events(&browser->b);
+ }
+
+ hist_browser__title(browser, title, size);
+ ui_browser__show_title(&browser->b, title);
+ break;
+ }
+ case 'D': { /* Debug */
+ struct hist_entry *h = rb_entry(browser->b.top, struct hist_entry, rb_node);
+ static int seq;
+
+ ui_helpline__pop();
+ ui_helpline__fpush("%d: nr_ent=(%d,%d), etl: %d, rows=%d, idx=%d, fve: idx=%d, row_off=%d, nrows=%d",
+ seq++, browser->b.nr_entries, browser->hists->nr_entries,
+ browser->b.extra_title_lines, browser->b.rows,
+ browser->b.index, browser->b.top_idx, h->row_offset, h->nr_rows);
+ }
+ break;
+ case 'C':
+ /* Collapse the whole world. */
+ hist_browser__set_folding(browser, false);
+ break;
+ case 'c':
+ /* Collapse the selected entry. */
+ hist_browser__set_folding_selected(browser, false);
+ break;
+ case 'E':
+ /* Expand the whole world. */
+ hist_browser__set_folding(browser, true);
+ break;
+ case 'e':
+ /* Expand the selected entry. */
+ hist_browser__set_folding_selected(browser, !hist_browser__he_selection_unfolded(browser));
+ break;
+ case 'H':
+ browser->show_headers = !browser->show_headers;
+ hist_browser__update_rows(browser);
+ break;
+ case '+':
+ if (hist_browser__toggle_fold(browser))
+ break;
+ /* fall thru */
+ default:
+ return -1;
+ }
+
+ return 0;
+}
+
int hist_browser__run(struct hist_browser *browser, const char *help,
- bool warn_lost_event)
+ bool warn_lost_event, int key)
{
- int key;
char title[160];
struct hist_browser_timer *hbt = browser->hbt;
int delay_secs = hbt ? hbt->refresh : 0;
@@ -640,79 +764,14 @@ int hist_browser__run(struct hist_browser *browser, const char *help,
if (ui_browser__show(&browser->b, title, "%s", help) < 0)
return -1;
+ if (key && hist_browser__handle_hotkey(browser, warn_lost_event, title, sizeof(title), key))
+ goto out;
+
while (1) {
key = ui_browser__run(&browser->b, delay_secs);
- switch (key) {
- case K_TIMER: {
- u64 nr_entries;
-
- WARN_ON_ONCE(!hbt);
-
- if (hbt)
- hbt->timer(hbt->arg);
-
- if (hist_browser__has_filter(browser) ||
- symbol_conf.report_hierarchy)
- hist_browser__update_nr_entries(browser);
-
- nr_entries = hist_browser__nr_entries(browser);
- ui_browser__update_nr_entries(&browser->b, nr_entries);
-
- if (warn_lost_event &&
- (browser->hists->stats.nr_lost_warned !=
- browser->hists->stats.nr_events[PERF_RECORD_LOST])) {
- browser->hists->stats.nr_lost_warned =
- browser->hists->stats.nr_events[PERF_RECORD_LOST];
- ui_browser__warn_lost_events(&browser->b);
- }
-
- hist_browser__title(browser, title, sizeof(title));
- ui_browser__show_title(&browser->b, title);
- continue;
- }
- case 'D': { /* Debug */
- static int seq;
- struct hist_entry *h = rb_entry(browser->b.top,
- struct hist_entry, rb_node);
- ui_helpline__pop();
- ui_helpline__fpush("%d: nr_ent=(%d,%d), etl: %d, rows=%d, idx=%d, fve: idx=%d, row_off=%d, nrows=%d",
- seq++, browser->b.nr_entries,
- browser->hists->nr_entries,
- browser->b.extra_title_lines,
- browser->b.rows,
- browser->b.index,
- browser->b.top_idx,
- h->row_offset, h->nr_rows);
- }
- break;
- case 'C':
- /* Collapse the whole world. */
- hist_browser__set_folding(browser, false);
- break;
- case 'c':
- /* Collapse the selected entry. */
- hist_browser__set_folding_selected(browser, false);
- break;
- case 'E':
- /* Expand the whole world. */
- hist_browser__set_folding(browser, true);
+ if (hist_browser__handle_hotkey(browser, warn_lost_event, title, sizeof(title), key))
break;
- case 'e':
- /* Expand the selected entry. */
- hist_browser__set_folding_selected(browser, true);
- break;
- case 'H':
- browser->show_headers = !browser->show_headers;
- hist_browser__update_rows(browser);
- break;
- case K_ENTER:
- if (hist_browser__toggle_fold(browser))
- break;
- /* fall thru */
- default:
- goto out;
- }
}
out:
ui_browser__hide(&browser->b);
@@ -2339,7 +2398,7 @@ close_file_and_continue:
closedir(pwd_dir);
if (nr_options) {
- choice = ui__popup_menu(nr_options, options);
+ choice = ui__popup_menu(nr_options, options, NULL);
if (choice < nr_options && choice >= 0) {
tmp = strdup(abs_path[choice]);
if (tmp) {
@@ -2406,12 +2465,41 @@ do_annotate(struct hist_browser *browser, struct popup_action *act)
return 0;
}
+static struct symbol *symbol__new_unresolved(u64 addr, struct map *map)
+{
+ struct annotated_source *src;
+ struct symbol *sym;
+ char name[64];
+
+ snprintf(name, sizeof(name), "%.*" PRIx64, BITS_PER_LONG / 4, addr);
+
+ sym = symbol__new(addr, ANNOTATION_DUMMY_LEN, 0, 0, name);
+ if (sym) {
+ src = symbol__hists(sym, 1);
+ if (!src) {
+ symbol__delete(sym);
+ return NULL;
+ }
+
+ dso__insert_symbol(map->dso, sym);
+ }
+
+ return sym;
+}
+
static int
add_annotate_opt(struct hist_browser *browser __maybe_unused,
struct popup_action *act, char **optstr,
- struct map_symbol *ms)
+ struct map_symbol *ms,
+ u64 addr)
{
- if (ms->sym == NULL || ms->map->dso->annotate_warned)
+ if (!ms->map || !ms->map->dso || ms->map->dso->annotate_warned)
+ return 0;
+
+ if (!ms->sym)
+ ms->sym = symbol__new_unresolved(addr, ms->map);
+
+ if (ms->sym == NULL || symbol__annotation(ms->sym)->src == NULL)
return 0;
if (asprintf(optstr, "Annotate %s", ms->sym->name) < 0)
@@ -2484,11 +2572,8 @@ add_thread_opt(struct hist_browser *browser, struct popup_action *act,
return 1;
}
-static int
-do_zoom_dso(struct hist_browser *browser, struct popup_action *act)
+static int hists_browser__zoom_map(struct hist_browser *browser, struct map *map)
{
- struct map *map = act->ms.map;
-
if (!hists__has(browser->hists, dso) || map == NULL)
return 0;
@@ -2511,13 +2596,19 @@ do_zoom_dso(struct hist_browser *browser, struct popup_action *act)
}
static int
+do_zoom_dso(struct hist_browser *browser, struct popup_action *act)
+{
+ return hists_browser__zoom_map(browser, act->ms.map);
+}
+
+static int
add_dso_opt(struct hist_browser *browser, struct popup_action *act,
char **optstr, struct map *map)
{
if (!hists__has(browser->hists, dso) || map == NULL)
return 0;
- if (asprintf(optstr, "Zoom %s %s DSO",
+ if (asprintf(optstr, "Zoom %s %s DSO (use the 'k' hotkey to zoom directly into the kernel)",
browser->hists->dso_filter ? "out of" : "into",
__map__is_kernel(map) ? "the Kernel" : map->dso->short_name) < 0)
return 0;
@@ -2527,6 +2618,28 @@ add_dso_opt(struct hist_browser *browser, struct popup_action *act,
return 1;
}
+static int do_toggle_callchain(struct hist_browser *browser, struct popup_action *act __maybe_unused)
+{
+ hist_browser__toggle_fold(browser);
+ return 0;
+}
+
+static int add_callchain_toggle_opt(struct hist_browser *browser, struct popup_action *act, char **optstr)
+{
+ char sym_name[512];
+
+ if (!hist_browser__selection_has_children(browser))
+ return 0;
+
+ if (asprintf(optstr, "%s [%s] callchain (one level, same as '+' hotkey, use 'e'/'c' for the whole main level entry)",
+ hist_browser__selection_unfolded(browser) ? "Collapse" : "Expand",
+ hist_browser__selection_sym_name(browser, sym_name, sizeof(sym_name))) < 0)
+ return 0;
+
+ act->fn = do_toggle_callchain;
+ return 1;
+}
+
static int
do_browse_map(struct hist_browser *browser __maybe_unused,
struct popup_action *act)
@@ -2858,12 +2971,15 @@ static int perf_evsel__hists_browse(struct evsel *evsel, int nr_events,
"For symbolic views (--sort has sym):\n\n" \
"ENTER Zoom into DSO/Threads & Annotate current symbol\n" \
"ESC Zoom out\n" \
+ "+ Expand/Collapse one callchain level\n" \
"a Annotate current symbol\n" \
"C Collapse all callchains\n" \
"d Zoom into current DSO\n" \
+ "e Expand/Collapse main entry callchains\n" \
"E Expand all callchains\n" \
"F Toggle percentage of filtered entries\n" \
"H Display column headers\n" \
+ "k Zoom into the kernel map\n" \
"L Change percent limit\n" \
"m Display context menu\n" \
"S Zoom into current Processor Socket\n" \
@@ -2876,7 +2992,8 @@ static int perf_evsel__hists_browse(struct evsel *evsel, int nr_events,
"s Switch to another data file in PWD\n"
"t Zoom into current Thread\n"
"V Verbose (DSO names in callchains, etc)\n"
- "/ Filter symbol by name";
+ "/ Filter symbol by name\n"
+ "0-9 Sort by event n in group";
static const char top_help[] = HIST_BROWSER_HELP_COMMON
"P Print histograms to perf.hist.N\n"
"t Zoom into current Thread\n"
@@ -2914,13 +3031,13 @@ static int perf_evsel__hists_browse(struct evsel *evsel, int nr_events,
while (1) {
struct thread *thread = NULL;
struct map *map = NULL;
- int choice = 0;
+ int choice;
int socked_id = -1;
- nr_options = 0;
-
- key = hist_browser__run(browser, helpline,
- warn_lost_event);
+ key = 0; // reset key
+do_hotkey: // key came straight from options ui__popup_menu()
+ choice = nr_options = 0;
+ key = hist_browser__run(browser, helpline, warn_lost_event, key);
if (browser->he_selection != NULL) {
thread = hist_browser__selected_thread(browser);
@@ -2937,6 +3054,31 @@ static int perf_evsel__hists_browse(struct evsel *evsel, int nr_events,
* go to the next or previous
*/
goto out_free_stack;
+ case '0' ... '9':
+ if (!symbol_conf.event_group ||
+ evsel->core.nr_members < 2) {
+ snprintf(buf, sizeof(buf),
+ "Sort by index only available with group events!");
+ helpline = buf;
+ continue;
+ }
+
+ if (key - '0' == symbol_conf.group_sort_idx)
+ continue;
+
+ symbol_conf.group_sort_idx = key - '0';
+
+ if (symbol_conf.group_sort_idx >= evsel->core.nr_members) {
+ snprintf(buf, sizeof(buf),
+ "Max event group index to sort is %d (index from 0 to %d)",
+ evsel->core.nr_members - 1,
+ evsel->core.nr_members - 1);
+ helpline = buf;
+ continue;
+ }
+
+ key = K_RELOAD;
+ goto out_free_stack;
case 'a':
if (!hists__has(hists, sym)) {
ui_browser__warning(&browser->b, delay_secs * 2,
@@ -2945,13 +3087,45 @@ static int perf_evsel__hists_browse(struct evsel *evsel, int nr_events,
continue;
}
- if (browser->selection == NULL ||
- browser->selection->sym == NULL ||
- browser->selection->map->dso->annotate_warned)
+ if (!browser->selection ||
+ !browser->selection->map ||
+ !browser->selection->map->dso ||
+ browser->selection->map->dso->annotate_warned) {
continue;
+ }
+
+ if (!browser->selection->sym) {
+ if (!browser->he_selection)
+ continue;
+
+ if (sort__mode == SORT_MODE__BRANCH) {
+ bi = browser->he_selection->branch_info;
+ if (!bi || !bi->to.ms.map)
+ continue;
+
+ actions->ms.sym = symbol__new_unresolved(bi->to.al_addr, bi->to.ms.map);
+ actions->ms.map = bi->to.ms.map;
+ } else {
+ actions->ms.sym = symbol__new_unresolved(browser->he_selection->ip,
+ browser->selection->map);
+ actions->ms.map = browser->selection->map;
+ }
+
+ if (!actions->ms.sym)
+ continue;
+ } else {
+ if (symbol__annotation(browser->selection->sym)->src == NULL) {
+ ui_browser__warning(&browser->b, delay_secs * 2,
+ "No samples for the \"%s\" symbol.\n\n"
+ "Probably appeared just in a callchain",
+ browser->selection->sym->name);
+ continue;
+ }
+
+ actions->ms.map = browser->selection->map;
+ actions->ms.sym = browser->selection->sym;
+ }
- actions->ms.map = browser->selection->map;
- actions->ms.sym = browser->selection->sym;
do_annotate(browser, actions);
continue;
case 'P':
@@ -2961,6 +3135,10 @@ static int perf_evsel__hists_browse(struct evsel *evsel, int nr_events,
actions->ms.map = map;
do_zoom_dso(browser, actions);
continue;
+ case 'k':
+ if (browser->selection != NULL)
+ hists_browser__zoom_map(browser, browser->selection->maps->machine->vmlinux_map);
+ continue;
case 'V':
verbose = (verbose + 1) % 4;
browser->show_dso = verbose > 0;
@@ -3062,6 +3240,7 @@ static int perf_evsel__hists_browse(struct evsel *evsel, int nr_events,
continue;
}
+ actions->ms.map = map;
top = pstack__peek(browser->pstack);
if (top == &browser->hists->dso_filter) {
/*
@@ -3118,23 +3297,27 @@ static int perf_evsel__hists_browse(struct evsel *evsel, int nr_events,
nr_options += add_annotate_opt(browser,
&actions[nr_options],
&options[nr_options],
- &bi->from.ms);
+ &bi->from.ms,
+ bi->from.al_addr);
if (bi->to.ms.sym != bi->from.ms.sym)
nr_options += add_annotate_opt(browser,
&actions[nr_options],
&options[nr_options],
- &bi->to.ms);
+ &bi->to.ms,
+ bi->to.al_addr);
} else {
nr_options += add_annotate_opt(browser,
&actions[nr_options],
&options[nr_options],
- browser->selection);
+ browser->selection,
+ browser->he_selection->ip);
}
skip_annotation:
nr_options += add_thread_opt(browser, &actions[nr_options],
&options[nr_options], thread);
nr_options += add_dso_opt(browser, &actions[nr_options],
&options[nr_options], map);
+ nr_options += add_callchain_toggle_opt(browser, &actions[nr_options], &options[nr_options]);
nr_options += add_map_opt(browser, &actions[nr_options],
&options[nr_options],
browser->selection ?
@@ -3193,10 +3376,13 @@ skip_scripting:
do {
struct popup_action *act;
- choice = ui__popup_menu(nr_options, options);
- if (choice == -1 || choice >= nr_options)
+ choice = ui__popup_menu(nr_options, options, &key);
+ if (choice == -1)
break;
+ if (choice == nr_options)
+ goto do_hotkey;
+
act = &actions[choice];
key = act->fn(browser, act);
} while (key == 1);
@@ -3335,6 +3521,7 @@ browse_hists:
pos = perf_evsel__prev(pos);
goto browse_hists;
case K_SWITCH_INPUT_DATA:
+ case K_RELOAD:
case 'q':
case CTRL('c'):
goto out;
@@ -3492,7 +3679,7 @@ int block_hists_tui_browse(struct block_hist *bh, struct evsel *evsel,
memset(&action, 0, sizeof(action));
while (1) {
- key = hist_browser__run(browser, "? - help", true);
+ key = hist_browser__run(browser, "? - help", true, 0);
switch (key) {
case 'q':
diff --git a/tools/perf/ui/browsers/hists.h b/tools/perf/ui/browsers/hists.h
index 078f2f2c7abd..1e938d9ffa5e 100644
--- a/tools/perf/ui/browsers/hists.h
+++ b/tools/perf/ui/browsers/hists.h
@@ -34,7 +34,7 @@ struct hist_browser {
struct hist_browser *hist_browser__new(struct hists *hists);
void hist_browser__delete(struct hist_browser *browser);
int hist_browser__run(struct hist_browser *browser, const char *help,
- bool warn_lost_event);
+ bool warn_lost_event, int key);
void hist_browser__init(struct hist_browser *browser,
struct hists *hists);
#endif /* _PERF_UI_BROWSER_HISTS_H_ */
diff --git a/tools/perf/ui/browsers/res_sample.c b/tools/perf/ui/browsers/res_sample.c
index 76d356a18790..7cb2d6678039 100644
--- a/tools/perf/ui/browsers/res_sample.c
+++ b/tools/perf/ui/browsers/res_sample.c
@@ -56,7 +56,7 @@ int res_sample_browse(struct res_sample *res_samples, int num_res,
return -1;
}
}
- choice = ui__popup_menu(num_res, names);
+ choice = ui__popup_menu(num_res, names, NULL);
for (i = 0; i < num_res; i++)
zfree(&names[i]);
free(names);
diff --git a/tools/perf/ui/browsers/scripts.c b/tools/perf/ui/browsers/scripts.c
index fc733a6354d4..47d2c7a8cbe1 100644
--- a/tools/perf/ui/browsers/scripts.c
+++ b/tools/perf/ui/browsers/scripts.c
@@ -126,7 +126,7 @@ static int list_scripts(char *script_name, bool *custom,
SCRIPT_FULLPATH_LEN);
if (num < 0)
num = 0;
- choice = ui__popup_menu(num + max_std, (char * const *)names);
+ choice = ui__popup_menu(num + max_std, (char * const *)names, NULL);
if (choice < 0) {
ret = -1;
goto out;
diff --git a/tools/perf/ui/gtk/Build b/tools/perf/ui/gtk/Build
index ec22e899a224..eef708c502f4 100644
--- a/tools/perf/ui/gtk/Build
+++ b/tools/perf/ui/gtk/Build
@@ -1,4 +1,4 @@
-CFLAGS_gtk += -fPIC $(GTK_CFLAGS)
+CFLAGS_gtk += -fPIC $(GTK_CFLAGS) -Wno-deprecated-declarations
gtk-y += browser.o
gtk-y += hists.o
@@ -7,3 +7,8 @@ gtk-y += util.o
gtk-y += helpline.o
gtk-y += progress.o
gtk-y += annotate.o
+gtk-y += zalloc.o
+
+$(OUTPUT)ui/gtk/zalloc.o: ../lib/zalloc.c FORCE
+ $(call rule_mkdir)
+ $(call if_changed_dep,cc_o_c)
diff --git a/tools/perf/ui/gtk/annotate.c b/tools/perf/ui/gtk/annotate.c
index 22cc240f7371..35f9641bf670 100644
--- a/tools/perf/ui/gtk/annotate.c
+++ b/tools/perf/ui/gtk/annotate.c
@@ -174,7 +174,7 @@ static int symbol__gtk_annotate(struct map_symbol *ms, struct evsel *evsel,
if (ms->map->dso->annotate_warned)
return -1;
- err = symbol__annotate(ms, evsel, 0, &annotation__default_options, NULL);
+ err = symbol__annotate(ms, evsel, &annotation__default_options, NULL);
if (err) {
char msg[BUFSIZ];
symbol__strerror_disassemble(ms, err, msg, sizeof(msg));
diff --git a/tools/perf/ui/hist.c b/tools/perf/ui/hist.c
index f73675500061..025f4c7f96bf 100644
--- a/tools/perf/ui/hist.c
+++ b/tools/perf/ui/hist.c
@@ -151,15 +151,90 @@ static int field_cmp(u64 field_a, u64 field_b)
return 0;
}
+static int hist_entry__new_pair(struct hist_entry *a, struct hist_entry *b,
+ hpp_field_fn get_field, int nr_members,
+ u64 **fields_a, u64 **fields_b)
+{
+ u64 *fa = calloc(nr_members, sizeof(*fa)),
+ *fb = calloc(nr_members, sizeof(*fb));
+ struct hist_entry *pair;
+
+ if (!fa || !fb)
+ goto out_free;
+
+ list_for_each_entry(pair, &a->pairs.head, pairs.node) {
+ struct evsel *evsel = hists_to_evsel(pair->hists);
+ fa[perf_evsel__group_idx(evsel)] = get_field(pair);
+ }
+
+ list_for_each_entry(pair, &b->pairs.head, pairs.node) {
+ struct evsel *evsel = hists_to_evsel(pair->hists);
+ fb[perf_evsel__group_idx(evsel)] = get_field(pair);
+ }
+
+ *fields_a = fa;
+ *fields_b = fb;
+ return 0;
+out_free:
+ free(fa);
+ free(fb);
+ *fields_a = *fields_b = NULL;
+ return -1;
+}
+
+static int __hpp__group_sort_idx(struct hist_entry *a, struct hist_entry *b,
+ hpp_field_fn get_field, int idx)
+{
+ struct evsel *evsel = hists_to_evsel(a->hists);
+ u64 *fields_a, *fields_b;
+ int cmp, nr_members, ret, i;
+
+ cmp = field_cmp(get_field(a), get_field(b));
+ if (!perf_evsel__is_group_event(evsel))
+ return cmp;
+
+ nr_members = evsel->core.nr_members;
+ if (idx < 1 || idx >= nr_members)
+ return cmp;
+
+ ret = hist_entry__new_pair(a, b, get_field, nr_members, &fields_a, &fields_b);
+ if (ret) {
+ ret = cmp;
+ goto out;
+ }
+
+ ret = field_cmp(fields_a[idx], fields_b[idx]);
+ if (ret)
+ goto out;
+
+ for (i = 1; i < nr_members; i++) {
+ if (i != idx) {
+ ret = field_cmp(fields_a[i], fields_b[i]);
+ if (ret)
+ goto out;
+ }
+ }
+
+out:
+ free(fields_a);
+ free(fields_b);
+
+ return ret;
+}
+
static int __hpp__sort(struct hist_entry *a, struct hist_entry *b,
hpp_field_fn get_field)
{
s64 ret;
int i, nr_members;
struct evsel *evsel;
- struct hist_entry *pair;
u64 *fields_a, *fields_b;
+ if (symbol_conf.group_sort_idx && symbol_conf.event_group) {
+ return __hpp__group_sort_idx(a, b, get_field,
+ symbol_conf.group_sort_idx);
+ }
+
ret = field_cmp(get_field(a), get_field(b));
if (ret || !symbol_conf.event_group)
return ret;
@@ -169,22 +244,10 @@ static int __hpp__sort(struct hist_entry *a, struct hist_entry *b,
return ret;
nr_members = evsel->core.nr_members;
- fields_a = calloc(nr_members, sizeof(*fields_a));
- fields_b = calloc(nr_members, sizeof(*fields_b));
-
- if (!fields_a || !fields_b)
+ i = hist_entry__new_pair(a, b, get_field, nr_members, &fields_a, &fields_b);
+ if (i)
goto out;
- list_for_each_entry(pair, &a->pairs.head, pairs.node) {
- evsel = hists_to_evsel(pair->hists);
- fields_a[perf_evsel__group_idx(evsel)] = get_field(pair);
- }
-
- list_for_each_entry(pair, &b->pairs.head, pairs.node) {
- evsel = hists_to_evsel(pair->hists);
- fields_b[perf_evsel__group_idx(evsel)] = get_field(pair);
- }
-
for (i = 1; i < nr_members; i++) {
ret = field_cmp(fields_a[i], fields_b[i]);
if (ret)
diff --git a/tools/perf/ui/keysyms.h b/tools/perf/ui/keysyms.h
index fbfac29077f2..04cc4e5c031f 100644
--- a/tools/perf/ui/keysyms.h
+++ b/tools/perf/ui/keysyms.h
@@ -25,5 +25,6 @@
#define K_ERROR -2
#define K_RESIZE -3
#define K_SWITCH_INPUT_DATA -4
+#define K_RELOAD -5
#endif /* _PERF_KEYSYMS_H_ */
diff --git a/tools/perf/ui/tui/util.c b/tools/perf/ui/tui/util.c
index b98dd0e31dc1..0f562e2cb1e8 100644
--- a/tools/perf/ui/tui/util.c
+++ b/tools/perf/ui/tui/util.c
@@ -23,7 +23,7 @@ static void ui_browser__argv_write(struct ui_browser *browser,
ui_browser__write_nstring(browser, *arg, browser->width);
}
-static int popup_menu__run(struct ui_browser *menu)
+static int popup_menu__run(struct ui_browser *menu, int *keyp)
{
int key;
@@ -45,6 +45,11 @@ static int popup_menu__run(struct ui_browser *menu)
key = -1;
break;
default:
+ if (keyp) {
+ *keyp = key;
+ key = menu->nr_entries;
+ break;
+ }
continue;
}
@@ -55,7 +60,7 @@ static int popup_menu__run(struct ui_browser *menu)
return key;
}
-int ui__popup_menu(int argc, char * const argv[])
+int ui__popup_menu(int argc, char * const argv[], int *keyp)
{
struct ui_browser menu = {
.entries = (void *)argv,
@@ -64,8 +69,7 @@ int ui__popup_menu(int argc, char * const argv[])
.write = ui_browser__argv_write,
.nr_entries = argc,
};
-
- return popup_menu__run(&menu);
+ return popup_menu__run(&menu, keyp);
}
int ui_browser__input_window(const char *title, const char *text, char *input,
diff --git a/tools/perf/ui/util.h b/tools/perf/ui/util.h
index 40891942f465..e30cea807564 100644
--- a/tools/perf/ui/util.h
+++ b/tools/perf/ui/util.h
@@ -5,7 +5,7 @@
#include <stdarg.h>
int ui__getch(int delay_secs);
-int ui__popup_menu(int argc, char * const argv[]);
+int ui__popup_menu(int argc, char * const argv[], int *keyp);
int ui__help_window(const char *text);
int ui__dialog_yesno(const char *msg);
void __ui__info_window(const char *title, const char *text, const char *exit_msg);
diff --git a/tools/perf/util/Build b/tools/perf/util/Build
index 07da6c790b63..c0cf8dff694e 100644
--- a/tools/perf/util/Build
+++ b/tools/perf/util/Build
@@ -121,7 +121,9 @@ perf-y += mem-events.o
perf-y += vsprintf.o
perf-y += units.o
perf-y += time-utils.o
+perf-y += expr-flex.o
perf-y += expr-bison.o
+perf-y += expr.o
perf-y += branch.o
perf-y += mem2node.o
@@ -189,9 +191,13 @@ $(OUTPUT)util/parse-events-bison.c: util/parse-events.y
$(call rule_mkdir)
$(Q)$(call echo-cmd,bison)$(BISON) -v util/parse-events.y -d $(PARSER_DEBUG_BISON) -o $@ -p parse_events_
+$(OUTPUT)util/expr-flex.c: util/expr.l $(OUTPUT)util/expr-bison.c
+ $(call rule_mkdir)
+ $(Q)$(call echo-cmd,flex)$(FLEX) -o $@ --header-file=$(OUTPUT)util/expr-flex.h $(PARSER_DEBUG_FLEX) util/expr.l
+
$(OUTPUT)util/expr-bison.c: util/expr.y
$(call rule_mkdir)
- $(Q)$(call echo-cmd,bison)$(BISON) -v util/expr.y -d $(PARSER_DEBUG_BISON) -o $@ -p expr__
+ $(Q)$(call echo-cmd,bison)$(BISON) -v util/expr.y -d $(PARSER_DEBUG_BISON) -o $@ -p expr_
$(OUTPUT)util/pmu-flex.c: util/pmu.l $(OUTPUT)util/pmu-bison.c
$(call rule_mkdir)
@@ -203,12 +209,14 @@ $(OUTPUT)util/pmu-bison.c: util/pmu.y
CFLAGS_parse-events-flex.o += -w
CFLAGS_pmu-flex.o += -w
+CFLAGS_expr-flex.o += -w
CFLAGS_parse-events-bison.o += -DYYENABLE_NLS=0 -w
CFLAGS_pmu-bison.o += -DYYENABLE_NLS=0 -DYYLTYPE_IS_TRIVIAL=0 -w
CFLAGS_expr-bison.o += -DYYENABLE_NLS=0 -DYYLTYPE_IS_TRIVIAL=0 -w
$(OUTPUT)util/parse-events.o: $(OUTPUT)util/parse-events-flex.c $(OUTPUT)util/parse-events-bison.c
$(OUTPUT)util/pmu.o: $(OUTPUT)util/pmu-flex.c $(OUTPUT)util/pmu-bison.c
+$(OUTPUT)util/expr.o: $(OUTPUT)util/expr-flex.c $(OUTPUT)util/expr-bison.c
CFLAGS_bitmap.o += -Wno-unused-parameter -DETC_PERFCONFIG="BUILD_STR($(ETC_PERFCONFIG_SQ))"
CFLAGS_find_bit.o += -Wno-unused-parameter -DETC_PERFCONFIG="BUILD_STR($(ETC_PERFCONFIG_SQ))"
@@ -216,6 +224,7 @@ CFLAGS_rbtree.o += -Wno-unused-parameter -DETC_PERFCONFIG="BUILD_STR($(ET
CFLAGS_libstring.o += -Wno-unused-parameter -DETC_PERFCONFIG="BUILD_STR($(ETC_PERFCONFIG_SQ))"
CFLAGS_hweight.o += -Wno-unused-parameter -DETC_PERFCONFIG="BUILD_STR($(ETC_PERFCONFIG_SQ))"
CFLAGS_parse-events.o += -Wno-redundant-decls
+CFLAGS_expr.o += -Wno-redundant-decls
CFLAGS_header.o += -include $(OUTPUT)PERF-VERSION-FILE
$(OUTPUT)util/kallsyms.o: ../lib/symbol/kallsyms.c FORCE
diff --git a/tools/perf/util/annotate.c b/tools/perf/util/annotate.c
index f5e77ed237e8..f1ea0d61eb5b 100644
--- a/tools/perf/util/annotate.c
+++ b/tools/perf/util/annotate.c
@@ -1143,93 +1143,70 @@ out:
}
struct annotate_args {
- size_t privsize;
- struct arch *arch;
- struct map_symbol ms;
- struct evsel *evsel;
+ struct arch *arch;
+ struct map_symbol ms;
+ struct evsel *evsel;
struct annotation_options *options;
- s64 offset;
- char *line;
- int line_nr;
+ s64 offset;
+ char *line;
+ int line_nr;
};
-static void annotation_line__delete(struct annotation_line *al)
+static void annotation_line__init(struct annotation_line *al,
+ struct annotate_args *args,
+ int nr)
{
- void *ptr = (void *) al - al->privsize;
+ al->offset = args->offset;
+ al->line = strdup(args->line);
+ al->line_nr = args->line_nr;
+ al->data_nr = nr;
+}
+static void annotation_line__exit(struct annotation_line *al)
+{
free_srcline(al->path);
zfree(&al->line);
- free(ptr);
}
-/*
- * Allocating the annotation line data with following
- * structure:
- *
- * --------------------------------------
- * private space | struct annotation_line
- * --------------------------------------
- *
- * Size of the private space is stored in 'struct annotation_line'.
- *
- */
-static struct annotation_line *
-annotation_line__new(struct annotate_args *args, size_t privsize)
+static size_t disasm_line_size(int nr)
{
struct annotation_line *al;
- struct evsel *evsel = args->evsel;
- size_t size = privsize + sizeof(*al);
- int nr = 1;
-
- if (perf_evsel__is_group_event(evsel))
- nr = evsel->core.nr_members;
-
- size += sizeof(al->data[0]) * nr;
- al = zalloc(size);
- if (al) {
- al = (void *) al + privsize;
- al->privsize = privsize;
- al->offset = args->offset;
- al->line = strdup(args->line);
- al->line_nr = args->line_nr;
- al->data_nr = nr;
- }
-
- return al;
+ return (sizeof(struct disasm_line) + (sizeof(al->data[0]) * nr));
}
/*
* Allocating the disasm annotation line data with
* following structure:
*
- * ------------------------------------------------------------
- * privsize space | struct disasm_line | struct annotation_line
- * ------------------------------------------------------------
+ * -------------------------------------------
+ * struct disasm_line | struct annotation_line
+ * -------------------------------------------
*
* We have 'struct annotation_line' member as last member
* of 'struct disasm_line' to have an easy access.
- *
*/
static struct disasm_line *disasm_line__new(struct annotate_args *args)
{
struct disasm_line *dl = NULL;
- struct annotation_line *al;
- size_t privsize = args->privsize + offsetof(struct disasm_line, al);
+ int nr = 1;
- al = annotation_line__new(args, privsize);
- if (al != NULL) {
- dl = disasm_line(al);
+ if (perf_evsel__is_group_event(args->evsel))
+ nr = args->evsel->core.nr_members;
- if (dl->al.line == NULL)
- goto out_delete;
+ dl = zalloc(disasm_line_size(nr));
+ if (!dl)
+ return NULL;
- if (args->offset != -1) {
- if (disasm_line__parse(dl->al.line, &dl->ins.name, &dl->ops.raw) < 0)
- goto out_free_line;
+ annotation_line__init(&dl->al, args, nr);
+ if (dl->al.line == NULL)
+ goto out_delete;
- disasm_line__init_ins(dl, args->arch, &args->ms);
- }
+ if (args->offset != -1) {
+ if (disasm_line__parse(dl->al.line, &dl->ins.name, &dl->ops.raw) < 0)
+ goto out_free_line;
+
+ disasm_line__init_ins(dl, args->arch, &args->ms);
}
return dl;
@@ -1248,7 +1225,8 @@ void disasm_line__free(struct disasm_line *dl)
else
ins__delete(&dl->ops);
zfree(&dl->ins.name);
- annotation_line__delete(&dl->al);
+ annotation_line__exit(&dl->al);
+ free(dl);
}
int disasm_line__scnprintf(struct disasm_line *dl, char *bf, size_t size, bool raw, int max_ins_name)
@@ -1966,14 +1944,20 @@ static int symbol__disassemble(struct symbol *sym, struct annotate_args *args)
err = asprintf(&command,
"%s %s%s --start-address=0x%016" PRIx64
" --stop-address=0x%016" PRIx64
- " -l -d %s %s -C \"$1\"",
+ " -l -d %s %s %s %c%s%c %s%s -C \"$1\"",
opts->objdump_path ?: "objdump",
opts->disassembler_style ? "-M " : "",
opts->disassembler_style ?: "",
map__rip_2objdump(map, sym->start),
map__rip_2objdump(map, sym->end),
opts->show_asm_raw ? "" : "--no-show-raw-insn",
- opts->annotate_src ? "-S" : "");
+ opts->annotate_src ? "-S" : "",
+ opts->prefix ? "--prefix " : "",
+ opts->prefix ? '"' : ' ',
+ opts->prefix ?: "",
+ opts->prefix ? '"' : ' ',
+ opts->prefix_strip ? "--prefix-strip=" : "",
+ opts->prefix_strip ?: "");
if (err < 0) {
pr_err("Failure allocating memory for the command to run\n");
@@ -2143,13 +2127,12 @@ void symbol__calc_percent(struct symbol *sym, struct evsel *evsel)
annotation__calc_percent(notes, evsel, symbol__size(sym));
}
-int symbol__annotate(struct map_symbol *ms, struct evsel *evsel, size_t privsize,
+int symbol__annotate(struct map_symbol *ms, struct evsel *evsel,
struct annotation_options *options, struct arch **parch)
{
struct symbol *sym = ms->sym;
struct annotation *notes = symbol__annotation(sym);
struct annotate_args args = {
- .privsize = privsize,
.evsel = evsel,
.options = options,
};
@@ -2628,8 +2611,6 @@ void annotation__mark_jump_targets(struct annotation *notes, struct symbol *sym)
if (++al->jump_sources > notes->max_jump_sources)
notes->max_jump_sources = al->jump_sources;
-
- ++notes->nr_jumps;
}
}
@@ -2638,6 +2619,8 @@ void annotation__set_offsets(struct annotation *notes, s64 size)
struct annotation_line *al;
notes->max_line_len = 0;
+ notes->nr_entries = 0;
+ notes->nr_asm_entries = 0;
list_for_each_entry(al, &notes->src->source, node) {
size_t line_len = strlen(al->line);
@@ -2784,7 +2767,7 @@ int symbol__tty_annotate(struct map_symbol *ms, struct evsel *evsel,
struct symbol *sym = ms->sym;
struct rb_root source_line = RB_ROOT;
- if (symbol__annotate(ms, evsel, 0, opts, NULL) < 0)
+ if (symbol__annotate(ms, evsel, opts, NULL) < 0)
return -1;
symbol__calc_percent(sym, evsel);
@@ -2909,9 +2892,9 @@ static void __annotation_line__write(struct annotation_line *al, struct annotati
percent = annotation_data__percent(&al->data[i], percent_type);
obj__set_percent_color(obj, percent, current_entry);
- if (notes->options->show_total_period) {
+ if (symbol_conf.show_total_period) {
obj__printf(obj, "%11" PRIu64 " ", al->data[i].he.period);
- } else if (notes->options->show_nr_samples) {
+ } else if (symbol_conf.show_nr_samples) {
obj__printf(obj, "%6" PRIu64 " ",
al->data[i].he.nr_samples);
} else {
@@ -2925,8 +2908,8 @@ static void __annotation_line__write(struct annotation_line *al, struct annotati
obj__printf(obj, "%-*s", pcnt_width, " ");
else {
obj__printf(obj, "%-*s", pcnt_width,
- notes->options->show_total_period ? "Period" :
- notes->options->show_nr_samples ? "Samples" : "Percent");
+ symbol_conf.show_total_period ? "Period" :
+ symbol_conf.show_nr_samples ? "Samples" : "Percent");
}
}
@@ -3064,7 +3047,7 @@ int symbol__annotate2(struct map_symbol *ms, struct evsel *evsel,
if (perf_evsel__is_group_event(evsel))
nr_pcnt = evsel->core.nr_members;
- err = symbol__annotate(ms, evsel, 0, options, parch);
+ err = symbol__annotate(ms, evsel, options, parch);
if (err)
goto out_free_offsets;
@@ -3088,69 +3071,46 @@ out_free_offsets:
return err;
}
-#define ANNOTATION__CFG(n) \
- { .name = #n, .value = &annotation__default_options.n, }
-
-/*
- * Keep the entries sorted, they are bsearch'ed
- */
-static struct annotation_config {
- const char *name;
- void *value;
-} annotation__configs[] = {
- ANNOTATION__CFG(hide_src_code),
- ANNOTATION__CFG(jump_arrows),
- ANNOTATION__CFG(offset_level),
- ANNOTATION__CFG(show_linenr),
- ANNOTATION__CFG(show_nr_jumps),
- ANNOTATION__CFG(show_nr_samples),
- ANNOTATION__CFG(show_total_period),
- ANNOTATION__CFG(use_offset),
-};
-
-#undef ANNOTATION__CFG
-
-static int annotation_config__cmp(const void *name, const void *cfgp)
+static int annotation__config(const char *var, const char *value, void *data)
{
- const struct annotation_config *cfg = cfgp;
-
- return strcmp(name, cfg->name);
-}
-
-static int annotation__config(const char *var, const char *value,
- void *data __maybe_unused)
-{
- struct annotation_config *cfg;
- const char *name;
+ struct annotation_options *opt = data;
if (!strstarts(var, "annotate."))
return 0;
- name = var + 9;
- cfg = bsearch(name, annotation__configs, ARRAY_SIZE(annotation__configs),
- sizeof(struct annotation_config), annotation_config__cmp);
-
- if (cfg == NULL)
- pr_debug("%s variable unknown, ignoring...", var);
- else if (strcmp(var, "annotate.offset_level") == 0) {
- perf_config_int(cfg->value, name, value);
-
- if (*(int *)cfg->value > ANNOTATION__MAX_OFFSET_LEVEL)
- *(int *)cfg->value = ANNOTATION__MAX_OFFSET_LEVEL;
- else if (*(int *)cfg->value < ANNOTATION__MIN_OFFSET_LEVEL)
- *(int *)cfg->value = ANNOTATION__MIN_OFFSET_LEVEL;
+ if (!strcmp(var, "annotate.offset_level")) {
+ perf_config_u8(&opt->offset_level, "offset_level", value);
+
+ if (opt->offset_level > ANNOTATION__MAX_OFFSET_LEVEL)
+ opt->offset_level = ANNOTATION__MAX_OFFSET_LEVEL;
+ else if (opt->offset_level < ANNOTATION__MIN_OFFSET_LEVEL)
+ opt->offset_level = ANNOTATION__MIN_OFFSET_LEVEL;
+ } else if (!strcmp(var, "annotate.hide_src_code")) {
+ opt->hide_src_code = perf_config_bool("hide_src_code", value);
+ } else if (!strcmp(var, "annotate.jump_arrows")) {
+ opt->jump_arrows = perf_config_bool("jump_arrows", value);
+ } else if (!strcmp(var, "annotate.show_linenr")) {
+ opt->show_linenr = perf_config_bool("show_linenr", value);
+ } else if (!strcmp(var, "annotate.show_nr_jumps")) {
+ opt->show_nr_jumps = perf_config_bool("show_nr_jumps", value);
+ } else if (!strcmp(var, "annotate.show_nr_samples")) {
+ symbol_conf.show_nr_samples = perf_config_bool("show_nr_samples",
+ value);
+ } else if (!strcmp(var, "annotate.show_total_period")) {
+ symbol_conf.show_total_period = perf_config_bool("show_total_period",
+ value);
+ } else if (!strcmp(var, "annotate.use_offset")) {
+ opt->use_offset = perf_config_bool("use_offset", value);
} else {
- *(bool *)cfg->value = perf_config_bool(name, value);
+ pr_debug("%s variable unknown, ignoring...", var);
}
+
return 0;
}
-void annotation_config__init(void)
+void annotation_config__init(struct annotation_options *opt)
{
- perf_config(annotation__config, NULL);
-
- annotation__default_options.show_total_period = symbol_conf.show_total_period;
- annotation__default_options.show_nr_samples = symbol_conf.show_nr_samples;
+ perf_config(annotation__config, opt);
}
static unsigned int parse_percent_type(char *str1, char *str2)
@@ -3204,3 +3164,12 @@ out:
free(str1);
return err;
}
+
+int annotate_check_args(struct annotation_options *args)
+{
+ if (args->prefix_strip && !args->prefix) {
+ pr_err("--prefix-strip requires --prefix\n");
+ return -1;
+ }
+ return 0;
+}
diff --git a/tools/perf/util/annotate.h b/tools/perf/util/annotate.h
index 7075d98f69d9..2d88069d6428 100644
--- a/tools/perf/util/annotate.h
+++ b/tools/perf/util/annotate.h
@@ -74,6 +74,7 @@ bool ins__is_fused(struct arch *arch, const char *ins1, const char *ins2);
#define ANNOTATION__CYCLES_WIDTH 6
#define ANNOTATION__MINMAX_CYCLES_WIDTH 19
#define ANNOTATION__AVG_IPC_WIDTH 36
+#define ANNOTATION_DUMMY_LEN 256
struct annotation_options {
bool hide_src_code,
@@ -83,8 +84,6 @@ struct annotation_options {
full_path,
show_linenr,
show_nr_jumps,
- show_nr_samples,
- show_total_period,
show_minmax_cycle,
show_asm_raw,
annotate_src;
@@ -94,6 +93,8 @@ struct annotation_options {
int context;
const char *objdump_path;
const char *disassembler_style;
+ const char *prefix;
+ const char *prefix_strip;
unsigned int percent_type;
};
@@ -139,7 +140,6 @@ struct annotation_line {
u64 cycles;
u64 cycles_max;
u64 cycles_min;
- size_t privsize;
char *path;
u32 idx;
int idx_asm;
@@ -280,7 +280,6 @@ struct annotation {
struct annotation_options *options;
struct annotation_line **offsets;
int nr_events;
- int nr_jumps;
int max_jump_sources;
int nr_entries;
int nr_asm_entries;
@@ -307,7 +306,7 @@ static inline int annotation__cycles_width(struct annotation *notes)
static inline int annotation__pcnt_width(struct annotation *notes)
{
- return (notes->options->show_total_period ? 12 : 7) * notes->nr_events;
+ return (symbol_conf.show_total_period ? 12 : 7) * notes->nr_events;
}
static inline bool annotation_line__filter(struct annotation_line *al, struct annotation *notes)
@@ -350,7 +349,7 @@ struct annotated_source *symbol__hists(struct symbol *sym, int nr_hists);
void symbol__annotate_zero_histograms(struct symbol *sym);
int symbol__annotate(struct map_symbol *ms,
- struct evsel *evsel, size_t privsize,
+ struct evsel *evsel,
struct annotation_options *options,
struct arch **parch);
int symbol__annotate2(struct map_symbol *ms,
@@ -411,8 +410,11 @@ static inline int symbol__tui_annotate(struct map_symbol *ms __maybe_unused,
}
#endif
-void annotation_config__init(void);
+void annotation_config__init(struct annotation_options *opt);
int annotate_parse_percent_type(const struct option *opt, const char *_str,
int unset);
+
+int annotate_check_args(struct annotation_options *args);
+
#endif /* __PERF_ANNOTATE_H */
diff --git a/tools/perf/util/auxtrace.c b/tools/perf/util/auxtrace.c
index eb087e7df6f4..3571ce72ca28 100644
--- a/tools/perf/util/auxtrace.c
+++ b/tools/perf/util/auxtrace.c
@@ -629,8 +629,10 @@ int auxtrace_record__options(struct auxtrace_record *itr,
struct evlist *evlist,
struct record_opts *opts)
{
- if (itr)
+ if (itr) {
+ itr->evlist = evlist;
return itr->recording_options(itr, evlist, opts);
+ }
return 0;
}
@@ -664,6 +666,24 @@ int auxtrace_parse_snapshot_options(struct auxtrace_record *itr,
return -EINVAL;
}
+int auxtrace_record__read_finish(struct auxtrace_record *itr, int idx)
+{
+ struct evsel *evsel;
+
+ if (!itr->evlist || !itr->pmu)
+ return -EINVAL;
+
+ evlist__for_each_entry(itr->evlist, evsel) {
+ if (evsel->core.attr.type == itr->pmu->type) {
+ if (evsel->disabled)
+ return 0;
+ return perf_evlist__enable_event_idx(itr->evlist, evsel,
+ idx);
+ }
+ }
+ return -EINVAL;
+}
+
/*
* Event record size is 16-bit which results in a maximum size of about 64KiB.
* Allow about 4KiB for the rest of the sample record, to give a maximum
diff --git a/tools/perf/util/auxtrace.h b/tools/perf/util/auxtrace.h
index 749d72cd9c7b..e58ef160b599 100644
--- a/tools/perf/util/auxtrace.h
+++ b/tools/perf/util/auxtrace.h
@@ -29,6 +29,7 @@ struct record_opts;
struct perf_record_auxtrace_error;
struct perf_record_auxtrace_info;
struct events_stats;
+struct perf_pmu;
enum auxtrace_error_type {
PERF_AUXTRACE_ERROR_ITRACE = 1,
@@ -322,6 +323,8 @@ struct auxtrace_mmap_params {
* @read_finish: called after reading from an auxtrace mmap
* @alignment: alignment (if any) for AUX area data
* @default_aux_sample_size: default sample size for --aux sample option
+ * @pmu: associated pmu
+ * @evlist: selected events list
*/
struct auxtrace_record {
int (*recording_options)(struct auxtrace_record *itr,
@@ -346,6 +349,8 @@ struct auxtrace_record {
int (*read_finish)(struct auxtrace_record *itr, int idx);
unsigned int alignment;
unsigned int default_aux_sample_size;
+ struct perf_pmu *pmu;
+ struct evlist *evlist;
};
/**
@@ -537,6 +542,7 @@ int auxtrace_record__find_snapshot(struct auxtrace_record *itr, int idx,
struct auxtrace_mmap *mm,
unsigned char *data, u64 *head, u64 *old);
u64 auxtrace_record__reference(struct auxtrace_record *itr);
+int auxtrace_record__read_finish(struct auxtrace_record *itr, int idx);
int auxtrace_index__auxtrace_event(struct list_head *head, union perf_event *event,
off_t file_offset);
diff --git a/tools/perf/util/block-info.c b/tools/perf/util/block-info.c
index c4b030bf6ec2..423ec69bda6c 100644
--- a/tools/perf/util/block-info.c
+++ b/tools/perf/util/block-info.c
@@ -65,8 +65,7 @@ struct block_info *block_info__new(void)
return bi;
}
-int64_t block_info__cmp(struct perf_hpp_fmt *fmt __maybe_unused,
- struct hist_entry *left, struct hist_entry *right)
+int64_t __block_info__cmp(struct hist_entry *left, struct hist_entry *right)
{
struct block_info *bi_l = left->block_info;
struct block_info *bi_r = right->block_info;
@@ -74,30 +73,27 @@ int64_t block_info__cmp(struct perf_hpp_fmt *fmt __maybe_unused,
if (!bi_l->sym || !bi_r->sym) {
if (!bi_l->sym && !bi_r->sym)
- return 0;
+ return -1;
else if (!bi_l->sym)
return -1;
else
return 1;
}
- if (bi_l->sym == bi_r->sym) {
- if (bi_l->start == bi_r->start) {
- if (bi_l->end == bi_r->end)
- return 0;
- else
- return (int64_t)(bi_r->end - bi_l->end);
- } else
- return (int64_t)(bi_r->start - bi_l->start);
- } else {
- cmp = strcmp(bi_l->sym->name, bi_r->sym->name);
+ cmp = strcmp(bi_l->sym->name, bi_r->sym->name);
+ if (cmp)
return cmp;
- }
- if (bi_l->sym->start != bi_r->sym->start)
- return (int64_t)(bi_r->sym->start - bi_l->sym->start);
+ if (bi_l->start != bi_r->start)
+ return (int64_t)(bi_r->start - bi_l->start);
- return (int64_t)(bi_r->sym->end - bi_l->sym->end);
+ return (int64_t)(bi_r->end - bi_l->end);
+}
+
+int64_t block_info__cmp(struct perf_hpp_fmt *fmt __maybe_unused,
+ struct hist_entry *left, struct hist_entry *right)
+{
+ return __block_info__cmp(left, right);
}
static void init_block_info(struct block_info *bi, struct symbol *sym,
@@ -185,6 +181,17 @@ static int block_column_width(struct perf_hpp_fmt *fmt,
return block_fmt->width;
}
+static int color_pct(struct perf_hpp *hpp, int width, double pct)
+{
+#ifdef HAVE_SLANG_SUPPORT
+ if (use_browser) {
+ return __hpp__slsmg_color_printf(hpp, "%*.2f%%",
+ width - 1, pct);
+ }
+#endif
+ return hpp_color_scnprintf(hpp, "%*.2f%%", width - 1, pct);
+}
+
static int block_total_cycles_pct_entry(struct perf_hpp_fmt *fmt,
struct perf_hpp *hpp,
struct hist_entry *he)
@@ -192,14 +199,11 @@ static int block_total_cycles_pct_entry(struct perf_hpp_fmt *fmt,
struct block_fmt *block_fmt = container_of(fmt, struct block_fmt, fmt);
struct block_info *bi = he->block_info;
double ratio = 0.0;
- char buf[16];
if (block_fmt->total_cycles)
ratio = (double)bi->cycles / (double)block_fmt->total_cycles;
- sprintf(buf, "%.2f%%", 100.0 * ratio);
-
- return scnprintf(hpp->buf, hpp->size, "%*s", block_fmt->width, buf);
+ return color_pct(hpp, block_fmt->width, 100.0 * ratio);
}
static int64_t block_total_cycles_pct_sort(struct perf_hpp_fmt *fmt,
@@ -252,16 +256,13 @@ static int block_cycles_pct_entry(struct perf_hpp_fmt *fmt,
struct block_info *bi = he->block_info;
double ratio = 0.0;
u64 avg;
- char buf[16];
if (block_fmt->block_cycles && bi->num_aggr) {
avg = bi->cycles_aggr / bi->num_aggr;
ratio = (double)avg / (double)block_fmt->block_cycles;
}
- sprintf(buf, "%.2f%%", 100.0 * ratio);
-
- return scnprintf(hpp->buf, hpp->size, "%*s", block_fmt->width, buf);
+ return color_pct(hpp, block_fmt->width, 100.0 * ratio);
}
static int block_avg_cycles_entry(struct perf_hpp_fmt *fmt,
@@ -295,7 +296,8 @@ static int block_range_entry(struct perf_hpp_fmt *fmt, struct perf_hpp *hpp,
end_line = map__srcline(he->ms.map, bi->sym->start + bi->end,
he->ms.sym);
- if ((start_line != SRCLINE_UNKNOWN) && (end_line != SRCLINE_UNKNOWN)) {
+ if ((strncmp(start_line, SRCLINE_UNKNOWN, strlen(SRCLINE_UNKNOWN)) != 0) &&
+ (strncmp(end_line, SRCLINE_UNKNOWN, strlen(SRCLINE_UNKNOWN)) != 0)) {
scnprintf(buf, sizeof(buf), "[%s -> %s]",
start_line, end_line);
} else {
@@ -348,7 +350,7 @@ static void hpp_register(struct block_fmt *block_fmt, int idx,
switch (idx) {
case PERF_HPP_REPORT__BLOCK_TOTAL_CYCLES_PCT:
- fmt->entry = block_total_cycles_pct_entry;
+ fmt->color = block_total_cycles_pct_entry;
fmt->cmp = block_info__cmp;
fmt->sort = block_total_cycles_pct_sort;
break;
@@ -356,7 +358,7 @@ static void hpp_register(struct block_fmt *block_fmt, int idx,
fmt->entry = block_cycles_lbr_entry;
break;
case PERF_HPP_REPORT__BLOCK_CYCLES_PCT:
- fmt->entry = block_cycles_pct_entry;
+ fmt->color = block_cycles_pct_entry;
break;
case PERF_HPP_REPORT__BLOCK_AVG_CYCLES:
fmt->entry = block_avg_cycles_entry;
@@ -376,33 +378,41 @@ static void hpp_register(struct block_fmt *block_fmt, int idx,
}
static void register_block_columns(struct perf_hpp_list *hpp_list,
- struct block_fmt *block_fmts)
+ struct block_fmt *block_fmts,
+ int *block_hpps, int nr_hpps)
{
- for (int i = 0; i < PERF_HPP_REPORT__BLOCK_MAX_INDEX; i++)
- hpp_register(&block_fmts[i], i, hpp_list);
+ for (int i = 0; i < nr_hpps; i++)
+ hpp_register(&block_fmts[i], block_hpps[i], hpp_list);
}
-static void init_block_hist(struct block_hist *bh, struct block_fmt *block_fmts)
+static void init_block_hist(struct block_hist *bh, struct block_fmt *block_fmts,
+ int *block_hpps, int nr_hpps)
{
__hists__init(&bh->block_hists, &bh->block_list);
perf_hpp_list__init(&bh->block_list);
bh->block_list.nr_header_lines = 1;
- register_block_columns(&bh->block_list, block_fmts);
+ register_block_columns(&bh->block_list, block_fmts,
+ block_hpps, nr_hpps);
- perf_hpp_list__register_sort_field(&bh->block_list,
- &block_fmts[PERF_HPP_REPORT__BLOCK_TOTAL_CYCLES_PCT].fmt);
+ /* Sort by the first fmt */
+ perf_hpp_list__register_sort_field(&bh->block_list, &block_fmts[0].fmt);
}
-static void process_block_report(struct hists *hists,
- struct block_report *block_report,
- u64 total_cycles)
+static int process_block_report(struct hists *hists,
+ struct block_report *block_report,
+ u64 total_cycles, int *block_hpps,
+ int nr_hpps)
{
struct rb_node *next = rb_first_cached(&hists->entries);
struct block_hist *bh = &block_report->hist;
struct hist_entry *he;
- init_block_hist(bh, block_report->fmts);
+ if (nr_hpps > PERF_HPP_REPORT__BLOCK_MAX_INDEX)
+ return -1;
+
+ block_report->nr_fmts = nr_hpps;
+ init_block_hist(bh, block_report->fmts, block_hpps, nr_hpps);
while (next) {
he = rb_entry(next, struct hist_entry, rb_node);
@@ -411,16 +421,19 @@ static void process_block_report(struct hists *hists,
next = rb_next(&he->rb_node);
}
- for (int i = 0; i < PERF_HPP_REPORT__BLOCK_MAX_INDEX; i++) {
+ for (int i = 0; i < nr_hpps; i++) {
block_report->fmts[i].total_cycles = total_cycles;
block_report->fmts[i].block_cycles = block_report->cycles;
}
hists__output_resort(&bh->block_hists, NULL);
+ return 0;
}
struct block_report *block_info__create_report(struct evlist *evlist,
- u64 total_cycles)
+ u64 total_cycles,
+ int *block_hpps, int nr_hpps,
+ int *nr_reps)
{
struct block_report *block_reports;
int nr_hists = evlist->core.nr_entries, i = 0;
@@ -433,13 +446,23 @@ struct block_report *block_info__create_report(struct evlist *evlist,
evlist__for_each_entry(evlist, pos) {
struct hists *hists = evsel__hists(pos);
- process_block_report(hists, &block_reports[i], total_cycles);
+ process_block_report(hists, &block_reports[i], total_cycles,
+ block_hpps, nr_hpps);
i++;
}
+ *nr_reps = nr_hists;
return block_reports;
}
+void block_info__free_report(struct block_report *reps, int nr_reps)
+{
+ for (int i = 0; i < nr_reps; i++)
+ hists__delete_entries(&reps[i].hist.block_hists);
+
+ free(reps);
+}
+
int report__browse_block_hists(struct block_hist *bh, float min_percent,
struct evsel *evsel, struct perf_env *env,
struct annotation_options *annotation_opts)
@@ -451,13 +474,11 @@ int report__browse_block_hists(struct block_hist *bh, float min_percent,
symbol_conf.report_individual_block = true;
hists__fprintf(&bh->block_hists, true, 0, 0, min_percent,
stdout, true);
- hists__delete_entries(&bh->block_hists);
return 0;
case 1:
symbol_conf.report_individual_block = true;
ret = block_hists_tui_browse(bh, evsel, min_percent,
env, annotation_opts);
- hists__delete_entries(&bh->block_hists);
return ret;
default:
return -1;
diff --git a/tools/perf/util/block-info.h b/tools/perf/util/block-info.h
index bef0d75e9819..42e9dcc4cf0a 100644
--- a/tools/perf/util/block-info.h
+++ b/tools/perf/util/block-info.h
@@ -45,6 +45,7 @@ struct block_report {
struct block_hist hist;
u64 cycles;
struct block_fmt fmts[PERF_HPP_REPORT__BLOCK_MAX_INDEX];
+ int nr_fmts;
};
struct block_hist;
@@ -61,6 +62,8 @@ static inline void __block_info__zput(struct block_info **bi)
#define block_info__zput(bi) __block_info__zput(&bi)
+int64_t __block_info__cmp(struct hist_entry *left, struct hist_entry *right);
+
int64_t block_info__cmp(struct perf_hpp_fmt *fmt __maybe_unused,
struct hist_entry *left, struct hist_entry *right);
@@ -68,7 +71,11 @@ int block_info__process_sym(struct hist_entry *he, struct block_hist *bh,
u64 *block_cycles_aggr, u64 total_cycles);
struct block_report *block_info__create_report(struct evlist *evlist,
- u64 total_cycles);
+ u64 total_cycles,
+ int *block_hpps, int nr_hpps,
+ int *nr_reps);
+
+void block_info__free_report(struct block_report *reps, int nr_reps);
int report__browse_block_hists(struct block_hist *bh, float min_percent,
struct evsel *evsel, struct perf_env *env,
diff --git a/tools/perf/util/branch.h b/tools/perf/util/branch.h
index 88e00d268f6f..154a05cd03af 100644
--- a/tools/perf/util/branch.h
+++ b/tools/perf/util/branch.h
@@ -12,6 +12,7 @@
#include <linux/stddef.h>
#include <linux/perf_event.h>
#include <linux/types.h>
+#include "event.h"
struct branch_flags {
u64 mispred:1;
@@ -39,9 +40,30 @@ struct branch_entry {
struct branch_stack {
u64 nr;
+ u64 hw_idx;
struct branch_entry entries[0];
};
+/*
+ * The hw_idx is only available when PERF_SAMPLE_BRANCH_HW_INDEX is applied.
+ * Otherwise, the output format of a sample with branch stack is
+ * struct branch_stack {
+ * u64 nr;
+ * struct branch_entry entries[0];
+ * }
+ * Check whether the hw_idx is available,
+ * and return the corresponding pointer of entries[0].
+ */
+static inline struct branch_entry *perf_sample__branch_entries(struct perf_sample *sample)
+{
+ u64 *entry = (u64 *)sample->branch_stack;
+
+ entry++;
+ if (sample->no_hw_idx)
+ return (struct branch_entry *)entry;
+ return (struct branch_entry *)(++entry);
+}
+
struct branch_type_stat {
bool branch_to;
u64 counts[PERF_BR_MAX];
diff --git a/tools/perf/util/c++/clang.cpp b/tools/perf/util/c++/clang.cpp
index fc361c3f8570..c8885dfa3667 100644
--- a/tools/perf/util/c++/clang.cpp
+++ b/tools/perf/util/c++/clang.cpp
@@ -71,7 +71,11 @@ getModuleFromSource(llvm::opt::ArgStringList CFlags,
CompilerInstance Clang;
Clang.createDiagnostics();
+#if CLANG_VERSION_MAJOR < 9
Clang.setVirtualFileSystem(&*VFS);
+#else
+ Clang.createFileManager(&*VFS);
+#endif
#if CLANG_VERSION_MAJOR < 4
IntrusiveRefCntPtr<CompilerInvocation> CI =
diff --git a/tools/perf/util/cgroup.c b/tools/perf/util/cgroup.c
index 4881d4af3381..b73fb7823048 100644
--- a/tools/perf/util/cgroup.c
+++ b/tools/perf/util/cgroup.c
@@ -3,75 +3,16 @@
#include "evsel.h"
#include "cgroup.h"
#include "evlist.h"
-#include <linux/stringify.h>
#include <linux/zalloc.h>
#include <sys/types.h>
#include <sys/stat.h>
#include <fcntl.h>
#include <stdlib.h>
#include <string.h>
+#include <api/fs/fs.h>
int nr_cgroups;
-static int
-cgroupfs_find_mountpoint(char *buf, size_t maxlen)
-{
- FILE *fp;
- char mountpoint[PATH_MAX + 1], tokens[PATH_MAX + 1], type[PATH_MAX + 1];
- char path_v1[PATH_MAX + 1], path_v2[PATH_MAX + 2], *path;
- char *token, *saved_ptr = NULL;
-
- fp = fopen("/proc/mounts", "r");
- if (!fp)
- return -1;
-
- /*
- * in order to handle split hierarchy, we need to scan /proc/mounts
- * and inspect every cgroupfs mount point to find one that has
- * perf_event subsystem
- */
- path_v1[0] = '\0';
- path_v2[0] = '\0';
-
- while (fscanf(fp, "%*s %"__stringify(PATH_MAX)"s %"__stringify(PATH_MAX)"s %"
- __stringify(PATH_MAX)"s %*d %*d\n",
- mountpoint, type, tokens) == 3) {
-
- if (!path_v1[0] && !strcmp(type, "cgroup")) {
-
- token = strtok_r(tokens, ",", &saved_ptr);
-
- while (token != NULL) {
- if (!strcmp(token, "perf_event")) {
- strcpy(path_v1, mountpoint);
- break;
- }
- token = strtok_r(NULL, ",", &saved_ptr);
- }
- }
-
- if (!path_v2[0] && !strcmp(type, "cgroup2"))
- strcpy(path_v2, mountpoint);
-
- if (path_v1[0] && path_v2[0])
- break;
- }
- fclose(fp);
-
- if (path_v1[0])
- path = path_v1;
- else if (path_v2[0])
- path = path_v2;
- else
- return -1;
-
- if (strlen(path) < maxlen) {
- strcpy(buf, path);
- return 0;
- }
- return -1;
-}
-
static int open_cgroup(const char *name)
{
char path[PATH_MAX + 1];
@@ -79,7 +20,7 @@ static int open_cgroup(const char *name)
int fd;
- if (cgroupfs_find_mountpoint(mnt, PATH_MAX + 1))
+ if (cgroupfs_find_mountpoint(mnt, PATH_MAX + 1, "perf_event"))
return -1;
scnprintf(path, PATH_MAX, "%s/%s", mnt, name);
@@ -250,3 +191,83 @@ int parse_cgroups(const struct option *opt, const char *str,
}
return 0;
}
+
+static struct cgroup *__cgroup__findnew(struct rb_root *root, uint64_t id,
+ bool create, const char *path)
+{
+ struct rb_node **p = &root->rb_node;
+ struct rb_node *parent = NULL;
+ struct cgroup *cgrp;
+
+ while (*p != NULL) {
+ parent = *p;
+ cgrp = rb_entry(parent, struct cgroup, node);
+
+ if (cgrp->id == id)
+ return cgrp;
+
+ if (cgrp->id < id)
+ p = &(*p)->rb_left;
+ else
+ p = &(*p)->rb_right;
+ }
+
+ if (!create)
+ return NULL;
+
+ cgrp = malloc(sizeof(*cgrp));
+ if (cgrp == NULL)
+ return NULL;
+
+ cgrp->name = strdup(path);
+ if (cgrp->name == NULL) {
+ free(cgrp);
+ return NULL;
+ }
+
+ cgrp->fd = -1;
+ cgrp->id = id;
+ refcount_set(&cgrp->refcnt, 1);
+
+ rb_link_node(&cgrp->node, parent, p);
+ rb_insert_color(&cgrp->node, root);
+
+ return cgrp;
+}
+
+struct cgroup *cgroup__findnew(struct perf_env *env, uint64_t id,
+ const char *path)
+{
+ struct cgroup *cgrp;
+
+ down_write(&env->cgroups.lock);
+ cgrp = __cgroup__findnew(&env->cgroups.tree, id, true, path);
+ up_write(&env->cgroups.lock);
+ return cgrp;
+}
+
+struct cgroup *cgroup__find(struct perf_env *env, uint64_t id)
+{
+ struct cgroup *cgrp;
+
+ down_read(&env->cgroups.lock);
+ cgrp = __cgroup__findnew(&env->cgroups.tree, id, false, NULL);
+ up_read(&env->cgroups.lock);
+ return cgrp;
+}
+
+void perf_env__purge_cgroups(struct perf_env *env)
+{
+ struct rb_node *node;
+ struct cgroup *cgrp;
+
+ down_write(&env->cgroups.lock);
+ while (!RB_EMPTY_ROOT(&env->cgroups.tree)) {
+ node = rb_first(&env->cgroups.tree);
+ cgrp = rb_entry(node, struct cgroup, node);
+
+ rb_erase(node, &env->cgroups.tree);
+ cgroup__put(cgrp);
+ }
+ up_write(&env->cgroups.lock);
+}
diff --git a/tools/perf/util/cgroup.h b/tools/perf/util/cgroup.h
index 2ec11f01090d..e98d5975fe55 100644
--- a/tools/perf/util/cgroup.h
+++ b/tools/perf/util/cgroup.h
@@ -3,16 +3,19 @@
#define __CGROUP_H__
#include <linux/refcount.h>
+#include <linux/rbtree.h>
+#include "util/env.h"
struct option;
struct cgroup {
- char *name;
- int fd;
- refcount_t refcnt;
+ struct rb_node node;
+ u64 id;
+ char *name;
+ int fd;
+ refcount_t refcnt;
};
-
extern int nr_cgroups; /* number of explicit cgroups defined */
struct cgroup *cgroup__get(struct cgroup *cgroup);
@@ -26,4 +29,10 @@ void evlist__set_default_cgroup(struct evlist *evlist, struct cgroup *cgroup);
int parse_cgroups(const struct option *opt, const char *str, int unset);
+struct cgroup *cgroup__findnew(struct perf_env *env, uint64_t id,
+ const char *path);
+struct cgroup *cgroup__find(struct perf_env *env, uint64_t id);
+
+void perf_env__purge_cgroups(struct perf_env *env);
+
#endif /* __CGROUP_H__ */
diff --git a/tools/perf/util/config.c b/tools/perf/util/config.c
index 0bc9c4d7fdc5..ef38eba56ed0 100644
--- a/tools/perf/util/config.c
+++ b/tools/perf/util/config.c
@@ -374,6 +374,18 @@ int perf_config_int(int *dest, const char *name, const char *value)
return 0;
}
+int perf_config_u8(u8 *dest, const char *name, const char *value)
+{
+ long ret = 0;
+
+ if (!perf_parse_long(value, &ret)) {
+ bad_config(name);
+ return -1;
+ }
+ *dest = ret;
+ return 0;
+}
+
static int perf_config_bool_or_int(const char *name, const char *value, int *is_bool)
{
int ret;
diff --git a/tools/perf/util/config.h b/tools/perf/util/config.h
index bd0a5897c76a..c10b66dde2f3 100644
--- a/tools/perf/util/config.h
+++ b/tools/perf/util/config.h
@@ -29,6 +29,7 @@ typedef int (*config_fn_t)(const char *, const char *, void *);
int perf_default_config(const char *, const char *, void *);
int perf_config(config_fn_t fn, void *);
int perf_config_int(int *dest, const char *, const char *);
+int perf_config_u8(u8 *dest, const char *name, const char *value);
int perf_config_u64(u64 *dest, const char *, const char *);
int perf_config_bool(const char *, const char *);
int config_error_nonbool(const char *);
diff --git a/tools/perf/util/cpumap.c b/tools/perf/util/cpumap.c
index 983b7388f22b..dc5c5e6fc502 100644
--- a/tools/perf/util/cpumap.c
+++ b/tools/perf/util/cpumap.c
@@ -317,7 +317,7 @@ static void set_max_cpu_num(void)
/* get the highest possible cpu number for a sparse allocation */
ret = snprintf(path, PATH_MAX, "%s/devices/system/cpu/possible", mnt);
- if (ret == PATH_MAX) {
+ if (ret >= PATH_MAX) {
pr_err("sysfs path crossed PATH_MAX(%d) size\n", PATH_MAX);
goto out;
}
@@ -328,7 +328,7 @@ static void set_max_cpu_num(void)
/* get the highest present cpu number for a sparse allocation */
ret = snprintf(path, PATH_MAX, "%s/devices/system/cpu/present", mnt);
- if (ret == PATH_MAX) {
+ if (ret >= PATH_MAX) {
pr_err("sysfs path crossed PATH_MAX(%d) size\n", PATH_MAX);
goto out;
}
@@ -356,7 +356,7 @@ static void set_max_node_num(void)
/* get the highest possible cpu number for a sparse allocation */
ret = snprintf(path, PATH_MAX, "%s/devices/system/node/possible", mnt);
- if (ret == PATH_MAX) {
+ if (ret >= PATH_MAX) {
pr_err("sysfs path crossed PATH_MAX(%d) size\n", PATH_MAX);
goto out;
}
@@ -441,7 +441,7 @@ int cpu__setup_cpunode_map(void)
return 0;
n = snprintf(path, PATH_MAX, "%s/devices/system/node", mnt);
- if (n == PATH_MAX) {
+ if (n >= PATH_MAX) {
pr_err("sysfs path crossed PATH_MAX(%d) size\n", PATH_MAX);
return -1;
}
@@ -456,7 +456,7 @@ int cpu__setup_cpunode_map(void)
continue;
n = snprintf(buf, PATH_MAX, "%s/%s", path, dent1->d_name);
- if (n == PATH_MAX) {
+ if (n >= PATH_MAX) {
pr_err("sysfs path crossed PATH_MAX(%d) size\n", PATH_MAX);
continue;
}
diff --git a/tools/perf/util/cs-etm.c b/tools/perf/util/cs-etm.c
index 5471045ebf5c..62d2f9b9ce1b 100644
--- a/tools/perf/util/cs-etm.c
+++ b/tools/perf/util/cs-etm.c
@@ -363,6 +363,23 @@ struct cs_etm_packet_queue
return NULL;
}
+static void cs_etm__packet_swap(struct cs_etm_auxtrace *etm,
+ struct cs_etm_traceid_queue *tidq)
+{
+ struct cs_etm_packet *tmp;
+
+ if (etm->sample_branches || etm->synth_opts.last_branch ||
+ etm->sample_instructions) {
+ /*
+ * Swap PACKET with PREV_PACKET: PACKET becomes PREV_PACKET for
+ * the next incoming packet.
+ */
+ tmp = tidq->packet;
+ tidq->packet = tidq->prev_packet;
+ tidq->prev_packet = tmp;
+ }
+}
+
static void cs_etm__packet_dump(const char *pkt_string)
{
const char *color = PERF_COLOR_BLUE;
@@ -945,7 +962,7 @@ static inline u64 cs_etm__instr_addr(struct cs_etm_queue *etmq,
if (packet->isa == CS_ETM_ISA_T32) {
u64 addr = packet->start_addr;
- while (offset > 0) {
+ while (offset) {
addr += cs_etm__t32_instr_size(etmq,
trace_chan_id, addr);
offset--;
@@ -1134,10 +1151,8 @@ static int cs_etm__synth_instruction_sample(struct cs_etm_queue *etmq,
cs_etm__copy_insn(etmq, tidq->trace_chan_id, tidq->packet, &sample);
- if (etm->synth_opts.last_branch) {
- cs_etm__copy_last_branch_rb(etmq, tidq);
+ if (etm->synth_opts.last_branch)
sample.branch_stack = tidq->last_branch;
- }
if (etm->synth_opts.inject) {
ret = cs_etm__inject_event(event, &sample,
@@ -1153,9 +1168,6 @@ static int cs_etm__synth_instruction_sample(struct cs_etm_queue *etmq,
"CS ETM Trace: failed to deliver instruction event, error %d\n",
ret);
- if (etm->synth_opts.last_branch)
- cs_etm__reset_last_branch_rb(tidq);
-
return ret;
}
@@ -1172,6 +1184,7 @@ static int cs_etm__synth_branch_sample(struct cs_etm_queue *etmq,
union perf_event *event = tidq->event_buf;
struct dummy_branch_stack {
u64 nr;
+ u64 hw_idx;
struct branch_entry entries;
} dummy_bs;
u64 ip;
@@ -1202,6 +1215,7 @@ static int cs_etm__synth_branch_sample(struct cs_etm_queue *etmq,
if (etm->synth_opts.last_branch) {
dummy_bs = (struct dummy_branch_stack){
.nr = 1,
+ .hw_idx = -1ULL,
.entries = {
.from = sample.ip,
.to = sample.addr,
@@ -1340,12 +1354,14 @@ static int cs_etm__sample(struct cs_etm_queue *etmq,
struct cs_etm_traceid_queue *tidq)
{
struct cs_etm_auxtrace *etm = etmq->etm;
- struct cs_etm_packet *tmp;
int ret;
u8 trace_chan_id = tidq->trace_chan_id;
- u64 instrs_executed = tidq->packet->instr_count;
+ u64 instrs_prev;
+
+ /* Get instructions remainder from previous packet */
+ instrs_prev = tidq->period_instructions;
- tidq->period_instructions += instrs_executed;
+ tidq->period_instructions += tidq->packet->instr_count;
/*
* Record a branch when the last instruction in
@@ -1363,26 +1379,80 @@ static int cs_etm__sample(struct cs_etm_queue *etmq,
* TODO: allow period to be defined in cycles and clock time
*/
- /* Get number of instructions executed after the sample point */
- u64 instrs_over = tidq->period_instructions -
- etm->instructions_sample_period;
+ /*
+ * Below diagram demonstrates the instruction samples
+ * generation flows:
+ *
+ * Instrs Instrs Instrs Instrs
+ * Sample(n) Sample(n+1) Sample(n+2) Sample(n+3)
+ * | | | |
+ * V V V V
+ * --------------------------------------------------
+ * ^ ^
+ * | |
+ * Period Period
+ * instructions(Pi) instructions(Pi')
+ *
+ * | |
+ * \---------------- -----------------/
+ * V
+ * tidq->packet->instr_count
+ *
+ * Instrs Sample(n...) are the synthesised samples occurring
+ * every etm->instructions_sample_period instructions - as
+ * defined on the perf command line. Sample(n) is being the
+ * last sample before the current etm packet, n+1 to n+3
+ * samples are generated from the current etm packet.
+ *
+ * tidq->packet->instr_count represents the number of
+ * instructions in the current etm packet.
+ *
+ * Period instructions (Pi) contains the the number of
+ * instructions executed after the sample point(n) from the
+ * previous etm packet. This will always be less than
+ * etm->instructions_sample_period.
+ *
+ * When generate new samples, it combines with two parts
+ * instructions, one is the tail of the old packet and another
+ * is the head of the new coming packet, to generate
+ * sample(n+1); sample(n+2) and sample(n+3) consume the
+ * instructions with sample period. After sample(n+3), the rest
+ * instructions will be used by later packet and it is assigned
+ * to tidq->period_instructions for next round calculation.
+ */
/*
- * Calculate the address of the sampled instruction (-1 as
- * sample is reported as though instruction has just been
- * executed, but PC has not advanced to next instruction)
+ * Get the initial offset into the current packet instructions;
+ * entry conditions ensure that instrs_prev is less than
+ * etm->instructions_sample_period.
*/
- u64 offset = (instrs_executed - instrs_over - 1);
- u64 addr = cs_etm__instr_addr(etmq, trace_chan_id,
- tidq->packet, offset);
+ u64 offset = etm->instructions_sample_period - instrs_prev;
+ u64 addr;
- ret = cs_etm__synth_instruction_sample(
- etmq, tidq, addr, etm->instructions_sample_period);
- if (ret)
- return ret;
+ /* Prepare last branches for instruction sample */
+ if (etm->synth_opts.last_branch)
+ cs_etm__copy_last_branch_rb(etmq, tidq);
- /* Carry remaining instructions into next sample period */
- tidq->period_instructions = instrs_over;
+ while (tidq->period_instructions >=
+ etm->instructions_sample_period) {
+ /*
+ * Calculate the address of the sampled instruction (-1
+ * as sample is reported as though instruction has just
+ * been executed, but PC has not advanced to next
+ * instruction)
+ */
+ addr = cs_etm__instr_addr(etmq, trace_chan_id,
+ tidq->packet, offset - 1);
+ ret = cs_etm__synth_instruction_sample(
+ etmq, tidq, addr,
+ etm->instructions_sample_period);
+ if (ret)
+ return ret;
+
+ offset += etm->instructions_sample_period;
+ tidq->period_instructions -=
+ etm->instructions_sample_period;
+ }
}
if (etm->sample_branches) {
@@ -1404,15 +1474,7 @@ static int cs_etm__sample(struct cs_etm_queue *etmq,
}
}
- if (etm->sample_branches || etm->synth_opts.last_branch) {
- /*
- * Swap PACKET with PREV_PACKET: PACKET becomes PREV_PACKET for
- * the next incoming packet.
- */
- tmp = tidq->packet;
- tidq->packet = tidq->prev_packet;
- tidq->prev_packet = tmp;
- }
+ cs_etm__packet_swap(etm, tidq);
return 0;
}
@@ -1441,7 +1503,6 @@ static int cs_etm__flush(struct cs_etm_queue *etmq,
{
int err = 0;
struct cs_etm_auxtrace *etm = etmq->etm;
- struct cs_etm_packet *tmp;
/* Handle start tracing packet */
if (tidq->prev_packet->sample_type == CS_ETM_EMPTY)
@@ -1449,6 +1510,11 @@ static int cs_etm__flush(struct cs_etm_queue *etmq,
if (etmq->etm->synth_opts.last_branch &&
tidq->prev_packet->sample_type == CS_ETM_RANGE) {
+ u64 addr;
+
+ /* Prepare last branches for instruction sample */
+ cs_etm__copy_last_branch_rb(etmq, tidq);
+
/*
* Generate a last branch event for the branches left in the
* circular buffer at the end of the trace.
@@ -1456,7 +1522,7 @@ static int cs_etm__flush(struct cs_etm_queue *etmq,
* Use the address of the end of the last reported execution
* range
*/
- u64 addr = cs_etm__last_executed_instr(tidq->prev_packet);
+ addr = cs_etm__last_executed_instr(tidq->prev_packet);
err = cs_etm__synth_instruction_sample(
etmq, tidq, addr,
@@ -1476,15 +1542,11 @@ static int cs_etm__flush(struct cs_etm_queue *etmq,
}
swap_packet:
- if (etm->sample_branches || etm->synth_opts.last_branch) {
- /*
- * Swap PACKET with PREV_PACKET: PACKET becomes PREV_PACKET for
- * the next incoming packet.
- */
- tmp = tidq->packet;
- tidq->packet = tidq->prev_packet;
- tidq->prev_packet = tmp;
- }
+ cs_etm__packet_swap(etm, tidq);
+
+ /* Reset last branches after flush the trace */
+ if (etm->synth_opts.last_branch)
+ cs_etm__reset_last_branch_rb(tidq);
return err;
}
@@ -1505,11 +1567,16 @@ static int cs_etm__end_block(struct cs_etm_queue *etmq,
*/
if (etmq->etm->synth_opts.last_branch &&
tidq->prev_packet->sample_type == CS_ETM_RANGE) {
+ u64 addr;
+
+ /* Prepare last branches for instruction sample */
+ cs_etm__copy_last_branch_rb(etmq, tidq);
+
/*
* Use the address of the end of the last reported execution
* range.
*/
- u64 addr = cs_etm__last_executed_instr(tidq->prev_packet);
+ addr = cs_etm__last_executed_instr(tidq->prev_packet);
err = cs_etm__synth_instruction_sample(
etmq, tidq, addr,
diff --git a/tools/perf/util/dsos.c b/tools/perf/util/dsos.c
index 591707c69c39..939471731ea6 100644
--- a/tools/perf/util/dsos.c
+++ b/tools/perf/util/dsos.c
@@ -26,13 +26,29 @@ static int __dso_id__cmp(struct dso_id *a, struct dso_id *b)
return 0;
}
+static bool dso_id__empty(struct dso_id *id)
+{
+ if (!id)
+ return true;
+
+ return !id->maj && !id->min && !id->ino && !id->ino_generation;
+}
+
+static void dso__inject_id(struct dso *dso, struct dso_id *id)
+{
+ dso->id.maj = id->maj;
+ dso->id.min = id->min;
+ dso->id.ino = id->ino;
+ dso->id.ino_generation = id->ino_generation;
+}
+
static int dso_id__cmp(struct dso_id *a, struct dso_id *b)
{
/*
* The second is always dso->id, so zeroes if not set, assume passing
* NULL for a means a zeroed id
*/
- if (a == NULL)
+ if (dso_id__empty(a) || dso_id__empty(b))
return 0;
return __dso_id__cmp(a, b);
@@ -249,6 +265,10 @@ struct dso *__dsos__addnew(struct dsos *dsos, const char *name)
static struct dso *__dsos__findnew_id(struct dsos *dsos, const char *name, struct dso_id *id)
{
struct dso *dso = __dsos__find_id(dsos, name, id, false);
+
+ if (dso && dso_id__empty(&dso->id) && !dso_id__empty(id))
+ dso__inject_id(dso, id);
+
return dso ? dso : __dsos__addnew_id(dsos, name, id);
}
diff --git a/tools/perf/util/env.c b/tools/perf/util/env.c
index 6242a9215df7..fadc59708ece 100644
--- a/tools/perf/util/env.c
+++ b/tools/perf/util/env.c
@@ -6,6 +6,7 @@
#include <linux/ctype.h>
#include <linux/zalloc.h>
#include "bpf-event.h"
+#include "cgroup.h"
#include <errno.h>
#include <sys/utsname.h>
#include <bpf/libbpf.h>
@@ -168,6 +169,7 @@ void perf_env__exit(struct perf_env *env)
int i;
perf_env__purge_bpf(env);
+ perf_env__purge_cgroups(env);
zfree(&env->hostname);
zfree(&env->os_release);
zfree(&env->version);
@@ -343,11 +345,11 @@ static const char *normalize_arch(char *arch)
const char *perf_env__arch(struct perf_env *env)
{
- struct utsname uts;
char *arch_name;
if (!env || !env->arch) { /* Assume local operation */
- if (uname(&uts) < 0)
+ static struct utsname uts = { .machine[0] = '\0', };
+ if (uts.machine[0] == '\0' && uname(&uts) < 0)
return NULL;
arch_name = uts.machine;
} else
diff --git a/tools/perf/util/env.h b/tools/perf/util/env.h
index 11d05ae3606a..7632075a8792 100644
--- a/tools/perf/util/env.h
+++ b/tools/perf/util/env.h
@@ -88,6 +88,12 @@ struct perf_env {
u32 btfs_cnt;
} bpf_progs;
+ /* same reason as above (for perf-top) */
+ struct {
+ struct rw_semaphore lock;
+ struct rb_root tree;
+ } cgroups;
+
/* For fast cpu to numa node lookup via perf_env__numa_node */
int *numa_map;
int nr_numa_map;
diff --git a/tools/perf/util/event.c b/tools/perf/util/event.c
index c5447ff516a2..dc0e11214ae1 100644
--- a/tools/perf/util/event.c
+++ b/tools/perf/util/event.c
@@ -54,6 +54,7 @@ static const char *perf_event__names[] = {
[PERF_RECORD_NAMESPACES] = "NAMESPACES",
[PERF_RECORD_KSYMBOL] = "KSYMBOL",
[PERF_RECORD_BPF_EVENT] = "BPF_EVENT",
+ [PERF_RECORD_CGROUP] = "CGROUP",
[PERF_RECORD_HEADER_ATTR] = "ATTR",
[PERF_RECORD_HEADER_EVENT_TYPE] = "EVENT_TYPE",
[PERF_RECORD_HEADER_TRACING_DATA] = "TRACING_DATA",
@@ -180,6 +181,12 @@ size_t perf_event__fprintf_namespaces(union perf_event *event, FILE *fp)
return ret;
}
+size_t perf_event__fprintf_cgroup(union perf_event *event, FILE *fp)
+{
+ return fprintf(fp, " cgroup: %" PRI_lu64 " %s\n",
+ event->cgroup.id, event->cgroup.path);
+}
+
int perf_event__process_comm(struct perf_tool *tool __maybe_unused,
union perf_event *event,
struct perf_sample *sample,
@@ -196,6 +203,14 @@ int perf_event__process_namespaces(struct perf_tool *tool __maybe_unused,
return machine__process_namespaces_event(machine, event, sample);
}
+int perf_event__process_cgroup(struct perf_tool *tool __maybe_unused,
+ union perf_event *event,
+ struct perf_sample *sample,
+ struct machine *machine)
+{
+ return machine__process_cgroup_event(machine, event, sample);
+}
+
int perf_event__process_lost(struct perf_tool *tool __maybe_unused,
union perf_event *event,
struct perf_sample *sample,
@@ -417,6 +432,9 @@ size_t perf_event__fprintf(union perf_event *event, FILE *fp)
case PERF_RECORD_NAMESPACES:
ret += perf_event__fprintf_namespaces(event, fp);
break;
+ case PERF_RECORD_CGROUP:
+ ret += perf_event__fprintf_cgroup(event, fp);
+ break;
case PERF_RECORD_MMAP2:
ret += perf_event__fprintf_mmap2(event, fp);
break;
@@ -599,10 +617,23 @@ int machine__resolve(struct machine *machine, struct addr_location *al,
al->sym = map__find_symbol(al->map, al->addr);
}
- if (symbol_conf.sym_list &&
- (!al->sym || !strlist__has_entry(symbol_conf.sym_list,
- al->sym->name))) {
- al->filtered |= (1 << HIST_FILTER__SYMBOL);
+ if (symbol_conf.sym_list) {
+ int ret = 0;
+ char al_addr_str[32];
+ size_t sz = sizeof(al_addr_str);
+
+ if (al->sym) {
+ ret = strlist__has_entry(symbol_conf.sym_list,
+ al->sym->name);
+ }
+ if (!(ret && al->sym)) {
+ snprintf(al_addr_str, sz, "0x%"PRIx64,
+ al->map->unmap_ip(al->map, al->sym->start));
+ ret = strlist__has_entry(symbol_conf.sym_list,
+ al_addr_str);
+ }
+ if (!ret)
+ al->filtered |= (1 << HIST_FILTER__SYMBOL);
}
return 0;
diff --git a/tools/perf/util/event.h b/tools/perf/util/event.h
index 85223159737c..b8289f160f07 100644
--- a/tools/perf/util/event.h
+++ b/tools/perf/util/event.h
@@ -135,10 +135,12 @@ struct perf_sample {
u32 raw_size;
u64 data_src;
u64 phys_addr;
+ u64 cgroup;
u32 flags;
u16 insn_len;
u8 cpumode;
u16 misc;
+ bool no_hw_idx; /* No hw_idx collected in branch_stack */
char insn[MAX_INSN];
void *raw_data;
struct ip_callchain *callchain;
@@ -321,6 +323,10 @@ int perf_event__process_namespaces(struct perf_tool *tool,
union perf_event *event,
struct perf_sample *sample,
struct machine *machine);
+int perf_event__process_cgroup(struct perf_tool *tool,
+ union perf_event *event,
+ struct perf_sample *sample,
+ struct machine *machine);
int perf_event__process_mmap(struct perf_tool *tool,
union perf_event *event,
struct perf_sample *sample,
@@ -376,6 +382,7 @@ size_t perf_event__fprintf_switch(union perf_event *event, FILE *fp);
size_t perf_event__fprintf_thread_map(union perf_event *event, FILE *fp);
size_t perf_event__fprintf_cpu_map(union perf_event *event, FILE *fp);
size_t perf_event__fprintf_namespaces(union perf_event *event, FILE *fp);
+size_t perf_event__fprintf_cgroup(union perf_event *event, FILE *fp);
size_t perf_event__fprintf_ksymbol(union perf_event *event, FILE *fp);
size_t perf_event__fprintf_bpf(union perf_event *event, FILE *fp);
size_t perf_event__fprintf(union perf_event *event, FILE *fp);
diff --git a/tools/perf/util/evsel.c b/tools/perf/util/evsel.c
index a69e64236120..eb880efbce16 100644
--- a/tools/perf/util/evsel.c
+++ b/tools/perf/util/evsel.c
@@ -712,7 +712,8 @@ static void __perf_evsel__config_callchain(struct evsel *evsel,
attr->branch_sample_type = PERF_SAMPLE_BRANCH_USER |
PERF_SAMPLE_BRANCH_CALL_STACK |
PERF_SAMPLE_BRANCH_NO_CYCLES |
- PERF_SAMPLE_BRANCH_NO_FLAGS;
+ PERF_SAMPLE_BRANCH_NO_FLAGS |
+ PERF_SAMPLE_BRANCH_HW_INDEX;
}
} else
pr_warning("Cannot use LBR callstack with branch stack. "
@@ -763,7 +764,8 @@ perf_evsel__reset_callgraph(struct evsel *evsel,
if (param->record_mode == CALLCHAIN_LBR) {
perf_evsel__reset_sample_bit(evsel, BRANCH_STACK);
attr->branch_sample_type &= ~(PERF_SAMPLE_BRANCH_USER |
- PERF_SAMPLE_BRANCH_CALL_STACK);
+ PERF_SAMPLE_BRANCH_CALL_STACK |
+ PERF_SAMPLE_BRANCH_HW_INDEX);
}
if (param->record_mode == CALLCHAIN_DWARF) {
perf_evsel__reset_sample_bit(evsel, REGS_USER);
@@ -808,12 +810,12 @@ static void apply_config_terms(struct evsel *evsel,
perf_evsel__reset_sample_bit(evsel, TIME);
break;
case PERF_EVSEL__CONFIG_TERM_CALLGRAPH:
- callgraph_buf = term->val.callgraph;
+ callgraph_buf = term->val.str;
break;
case PERF_EVSEL__CONFIG_TERM_BRANCH:
- if (term->val.branch && strcmp(term->val.branch, "no")) {
+ if (term->val.str && strcmp(term->val.str, "no")) {
perf_evsel__set_sample_bit(evsel, BRANCH_STACK);
- parse_branch_str(term->val.branch,
+ parse_branch_str(term->val.str,
&attr->branch_sample_type);
} else
perf_evsel__reset_sample_bit(evsel, BRANCH_STACK);
@@ -1102,6 +1104,11 @@ void perf_evsel__config(struct evsel *evsel, struct record_opts *opts,
if (opts->record_namespaces)
attr->namespaces = track;
+ if (opts->record_cgroup) {
+ attr->cgroup = track && !perf_missing_features.cgroup;
+ perf_evsel__set_sample_bit(evsel, CGROUP);
+ }
+
if (opts->record_switch_events)
attr->context_switch = track;
@@ -1265,6 +1272,8 @@ static void perf_evsel__free_config_terms(struct evsel *evsel)
list_for_each_entry_safe(term, h, &evsel->config_terms, list) {
list_del_init(&term->list);
+ if (term->free_str)
+ zfree(&term->val.str);
free(term);
}
}
@@ -1283,6 +1292,7 @@ void perf_evsel__exit(struct evsel *evsel)
perf_thread_map__put(evsel->core.threads);
zfree(&evsel->group_name);
zfree(&evsel->name);
+ zfree(&evsel->pmu_name);
perf_evsel__object.fini(evsel);
}
@@ -1671,6 +1681,8 @@ fallback_missing_features:
evsel->core.attr.ksymbol = 0;
if (perf_missing_features.bpf)
evsel->core.attr.bpf_event = 0;
+ if (perf_missing_features.branch_hw_idx)
+ evsel->core.attr.branch_sample_type &= ~PERF_SAMPLE_BRANCH_HW_INDEX;
retry_sample_id:
if (perf_missing_features.sample_id_all)
evsel->core.attr.sample_id_all = 0;
@@ -1782,7 +1794,16 @@ try_fallback:
* Must probe features in the order they were added to the
* perf_event_attr interface.
*/
- if (!perf_missing_features.aux_output && evsel->core.attr.aux_output) {
+ if (!perf_missing_features.cgroup && evsel->core.attr.cgroup) {
+ perf_missing_features.cgroup = true;
+ pr_debug2_peo("Kernel has no cgroup sampling support, bailing out\n");
+ goto out_close;
+ } else if (!perf_missing_features.branch_hw_idx &&
+ (evsel->core.attr.branch_sample_type & PERF_SAMPLE_BRANCH_HW_INDEX)) {
+ perf_missing_features.branch_hw_idx = true;
+ pr_debug2("switching off branch HW index support\n");
+ goto fallback_missing_features;
+ } else if (!perf_missing_features.aux_output && evsel->core.attr.aux_output) {
perf_missing_features.aux_output = true;
pr_debug2_peo("Kernel has no attr.aux_output support, bailing out\n");
goto out_close;
@@ -2167,7 +2188,12 @@ int perf_evsel__parse_sample(struct evsel *evsel, union perf_event *event,
if (data->branch_stack->nr > max_branch_nr)
return -EFAULT;
+
sz = data->branch_stack->nr * sizeof(struct branch_entry);
+ if (perf_evsel__has_branch_hw_idx(evsel))
+ sz += sizeof(u64);
+ else
+ data->no_hw_idx = true;
OVERFLOW_CHECK(array, sz, max_size);
array = (void *)array + sz;
}
@@ -2250,6 +2276,12 @@ int perf_evsel__parse_sample(struct evsel *evsel, union perf_event *event,
array++;
}
+ data->cgroup = 0;
+ if (type & PERF_SAMPLE_CGROUP) {
+ data->cgroup = *array;
+ array++;
+ }
+
if (type & PERF_SAMPLE_AUX) {
OVERFLOW_CHECK_u64(array);
sz = *array++;
diff --git a/tools/perf/util/evsel.h b/tools/perf/util/evsel.h
index dc14f4a823cd..53187c501ee8 100644
--- a/tools/perf/util/evsel.h
+++ b/tools/perf/util/evsel.h
@@ -119,6 +119,8 @@ struct perf_missing_features {
bool ksymbol;
bool bpf;
bool aux_output;
+ bool branch_hw_idx;
+ bool cgroup;
};
extern struct perf_missing_features perf_missing_features;
@@ -389,6 +391,11 @@ static inline bool perf_evsel__has_branch_callstack(const struct evsel *evsel)
return evsel->core.attr.branch_sample_type & PERF_SAMPLE_BRANCH_CALL_STACK;
}
+static inline bool perf_evsel__has_branch_hw_idx(const struct evsel *evsel)
+{
+ return evsel->core.attr.branch_sample_type & PERF_SAMPLE_BRANCH_HW_INDEX;
+}
+
static inline bool evsel__has_callchain(const struct evsel *evsel)
{
return (evsel->core.attr.sample_type & PERF_SAMPLE_CALLCHAIN) != 0;
diff --git a/tools/perf/util/evsel_config.h b/tools/perf/util/evsel_config.h
index 1f8d2fe0b66e..e026ab67b008 100644
--- a/tools/perf/util/evsel_config.h
+++ b/tools/perf/util/evsel_config.h
@@ -32,22 +32,21 @@ enum evsel_term_type {
struct perf_evsel_config_term {
struct list_head list;
enum evsel_term_type type;
+ bool free_str;
union {
u64 period;
u64 freq;
bool time;
- char *callgraph;
- char *drv_cfg;
u64 stack_user;
int max_stack;
bool inherit;
bool overwrite;
- char *branch;
unsigned long max_events;
bool percore;
bool aux_output;
u32 aux_sample_size;
u64 cfg_chg;
+ char *str;
} val;
bool weak;
};
diff --git a/tools/perf/util/expr.c b/tools/perf/util/expr.c
new file mode 100644
index 000000000000..fd192ddf93c1
--- /dev/null
+++ b/tools/perf/util/expr.c
@@ -0,0 +1,112 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <stdbool.h>
+#include <assert.h>
+#include "expr.h"
+#include "expr-bison.h"
+#define YY_EXTRA_TYPE int
+#include "expr-flex.h"
+
+#ifdef PARSER_DEBUG
+extern int expr_debug;
+#endif
+
+/* Caller must make sure id is allocated */
+void expr__add_id(struct parse_ctx *ctx, const char *name, double val)
+{
+ int idx;
+
+ assert(ctx->num_ids < MAX_PARSE_ID);
+ idx = ctx->num_ids++;
+ ctx->ids[idx].name = name;
+ ctx->ids[idx].val = val;
+}
+
+void expr__ctx_init(struct parse_ctx *ctx)
+{
+ ctx->num_ids = 0;
+}
+
+static int
+__expr__parse(double *val, struct parse_ctx *ctx, const char *expr,
+ int start)
+{
+ YY_BUFFER_STATE buffer;
+ void *scanner;
+ int ret;
+
+ ret = expr_lex_init_extra(start, &scanner);
+ if (ret)
+ return ret;
+
+ buffer = expr__scan_string(expr, scanner);
+
+#ifdef PARSER_DEBUG
+ expr_debug = 1;
+#endif
+
+ ret = expr_parse(val, ctx, scanner);
+
+ expr__flush_buffer(buffer, scanner);
+ expr__delete_buffer(buffer, scanner);
+ expr_lex_destroy(scanner);
+ return ret;
+}
+
+int expr__parse(double *final_val, struct parse_ctx *ctx, const char *expr)
+{
+ return __expr__parse(final_val, ctx, expr, EXPR_PARSE) ? -1 : 0;
+}
+
+static bool
+already_seen(const char *val, const char *one, const char **other,
+ int num_other)
+{
+ int i;
+
+ if (one && !strcasecmp(one, val))
+ return true;
+ for (i = 0; i < num_other; i++)
+ if (!strcasecmp(other[i], val))
+ return true;
+ return false;
+}
+
+int expr__find_other(const char *expr, const char *one, const char ***other,
+ int *num_other)
+{
+ int err, i = 0, j = 0;
+ struct parse_ctx ctx;
+
+ expr__ctx_init(&ctx);
+ err = __expr__parse(NULL, &ctx, expr, EXPR_OTHER);
+ if (err)
+ return -1;
+
+ *other = malloc((ctx.num_ids + 1) * sizeof(char *));
+ if (!*other)
+ return -ENOMEM;
+
+ for (i = 0, j = 0; i < ctx.num_ids; i++) {
+ const char *str = ctx.ids[i].name;
+
+ if (already_seen(str, one, *other, j))
+ continue;
+
+ str = strdup(str);
+ if (!str)
+ goto out;
+ (*other)[j++] = str;
+ }
+ (*other)[j] = NULL;
+
+out:
+ if (i != ctx.num_ids) {
+ while (--j)
+ free((char *) (*other)[i]);
+ free(*other);
+ err = -1;
+ }
+
+ *num_other = j;
+ return err;
+}
diff --git a/tools/perf/util/expr.h b/tools/perf/util/expr.h
index 046160831f90..9377538f4097 100644
--- a/tools/perf/util/expr.h
+++ b/tools/perf/util/expr.h
@@ -2,7 +2,7 @@
#ifndef PARSE_CTX_H
#define PARSE_CTX_H 1
-#define EXPR_MAX_OTHER 15
+#define EXPR_MAX_OTHER 20
#define MAX_PARSE_ID EXPR_MAX_OTHER
struct parse_id {
@@ -17,10 +17,8 @@ struct parse_ctx {
void expr__ctx_init(struct parse_ctx *ctx);
void expr__add_id(struct parse_ctx *ctx, const char *id, double val);
-#ifndef IN_EXPR_Y
-int expr__parse(double *final_val, struct parse_ctx *ctx, const char **pp);
-#endif
-int expr__find_other(const char *p, const char *one, const char ***other,
+int expr__parse(double *final_val, struct parse_ctx *ctx, const char *expr);
+int expr__find_other(const char *expr, const char *one, const char ***other,
int *num_other);
#endif
diff --git a/tools/perf/util/expr.l b/tools/perf/util/expr.l
new file mode 100644
index 000000000000..eaad29243c23
--- /dev/null
+++ b/tools/perf/util/expr.l
@@ -0,0 +1,114 @@
+%option prefix="expr_"
+%option reentrant
+%option bison-bridge
+
+%{
+#include <linux/compiler.h>
+#include "expr.h"
+#include "expr-bison.h"
+
+char *expr_get_text(yyscan_t yyscanner);
+YYSTYPE *expr_get_lval(yyscan_t yyscanner);
+
+static int __value(YYSTYPE *yylval, char *str, int base, int token)
+{
+ u64 num;
+
+ errno = 0;
+ num = strtoull(str, NULL, base);
+ if (errno)
+ return EXPR_ERROR;
+
+ yylval->num = num;
+ return token;
+}
+
+static int value(yyscan_t scanner, int base)
+{
+ YYSTYPE *yylval = expr_get_lval(scanner);
+ char *text = expr_get_text(scanner);
+
+ return __value(yylval, text, base, NUMBER);
+}
+
+/*
+ * Allow @ instead of / to be able to specify pmu/event/ without
+ * conflicts with normal division.
+ */
+static char *normalize(char *str)
+{
+ char *ret = str;
+ char *dst = str;
+
+ while (*str) {
+ if (*str == '@')
+ *dst++ = '/';
+ else if (*str == '\\')
+ *dst++ = *++str;
+ else
+ *dst++ = *str;
+ str++;
+ }
+
+ *dst = 0x0;
+ return ret;
+}
+
+static int str(yyscan_t scanner, int token)
+{
+ YYSTYPE *yylval = expr_get_lval(scanner);
+ char *text = expr_get_text(scanner);
+
+ yylval->str = normalize(strdup(text));
+ if (!yylval->str)
+ return EXPR_ERROR;
+
+ yylval->str = normalize(yylval->str);
+ return token;
+}
+%}
+
+number [0-9]+
+
+sch [-,=]
+spec \\{sch}
+sym [0-9a-zA-Z_\.:@]+
+symbol {spec}*{sym}*{spec}*{sym}*
+
+%%
+ {
+ int start_token;
+
+ start_token = expr_get_extra(yyscanner);
+
+ if (start_token) {
+ expr_set_extra(NULL, yyscanner);
+ return start_token;
+ }
+ }
+
+max { return MAX; }
+min { return MIN; }
+if { return IF; }
+else { return ELSE; }
+#smt_on { return SMT_ON; }
+{number} { return value(yyscanner, 10); }
+{symbol} { return str(yyscanner, ID); }
+"|" { return '|'; }
+"^" { return '^'; }
+"&" { return '&'; }
+"-" { return '-'; }
+"+" { return '+'; }
+"*" { return '*'; }
+"/" { return '/'; }
+"%" { return '%'; }
+"(" { return '('; }
+")" { return ')'; }
+"," { return ','; }
+. { }
+%%
+
+int expr_wrap(void *scanner __maybe_unused)
+{
+ return 1;
+}
diff --git a/tools/perf/util/expr.y b/tools/perf/util/expr.y
index f9a20a39b64a..4720cbe79357 100644
--- a/tools/perf/util/expr.y
+++ b/tools/perf/util/expr.y
@@ -1,30 +1,32 @@
/* Simple expression parser */
%{
+#define YYDEBUG 1
+#include <stdio.h>
#include "util.h"
#include "util/debug.h"
#include <stdlib.h> // strtod()
#define IN_EXPR_Y 1
#include "expr.h"
#include "smt.h"
-#include <assert.h>
#include <string.h>
-#define MAXIDLEN 256
%}
-%pure-parser
+%define api.pure full
+
%parse-param { double *final_val }
%parse-param { struct parse_ctx *ctx }
-%parse-param { const char **pp }
-%lex-param { const char **pp }
+%parse-param {void *scanner}
+%lex-param {void* scanner}
%union {
- double num;
- char id[MAXIDLEN+1];
+ double num;
+ char *str;
}
+%token EXPR_PARSE EXPR_OTHER EXPR_ERROR
%token <num> NUMBER
-%token <id> ID
+%token <str> ID
%token MIN MAX IF ELSE SMT_ON
%left MIN MAX IF
%left '|'
@@ -36,11 +38,9 @@
%type <num> expr if_expr
%{
-static int expr__lex(YYSTYPE *res, const char **pp);
-
-static void expr__error(double *final_val __maybe_unused,
+static void expr_error(double *final_val __maybe_unused,
struct parse_ctx *ctx __maybe_unused,
- const char **pp __maybe_unused,
+ void *scanner,
const char *s)
{
pr_debug("%s\n", s);
@@ -62,6 +62,27 @@ static int lookup_id(struct parse_ctx *ctx, char *id, double *val)
%}
%%
+start:
+EXPR_PARSE all_expr
+|
+EXPR_OTHER all_other
+
+all_other: all_other other
+|
+
+other: ID
+{
+ if (ctx->num_ids + 1 >= EXPR_MAX_OTHER) {
+ pr_err("failed: way too many variables");
+ YYABORT;
+ }
+
+ ctx->ids[ctx->num_ids++].name = $1;
+}
+|
+MIN | MAX | IF | ELSE | SMT_ON | NUMBER | '|' | '^' | '&' | '-' | '+' | '*' | '/' | '%' | '(' | ')'
+
+
all_expr: if_expr { *final_val = $1; }
;
@@ -92,146 +113,3 @@ expr: NUMBER
;
%%
-
-static int expr__symbol(YYSTYPE *res, const char *p, const char **pp)
-{
- char *dst = res->id;
- const char *s = p;
-
- if (*p == '#')
- *dst++ = *p++;
-
- while (isalnum(*p) || *p == '_' || *p == '.' || *p == ':' || *p == '@' || *p == '\\') {
- if (p - s >= MAXIDLEN)
- return -1;
- /*
- * Allow @ instead of / to be able to specify pmu/event/ without
- * conflicts with normal division.
- */
- if (*p == '@')
- *dst++ = '/';
- else if (*p == '\\')
- *dst++ = *++p;
- else
- *dst++ = *p;
- p++;
- }
- *dst = 0;
- *pp = p;
- dst = res->id;
- switch (dst[0]) {
- case 'm':
- if (!strcmp(dst, "min"))
- return MIN;
- if (!strcmp(dst, "max"))
- return MAX;
- break;
- case 'i':
- if (!strcmp(dst, "if"))
- return IF;
- break;
- case 'e':
- if (!strcmp(dst, "else"))
- return ELSE;
- break;
- case '#':
- if (!strcasecmp(dst, "#smt_on"))
- return SMT_ON;
- break;
- }
- return ID;
-}
-
-static int expr__lex(YYSTYPE *res, const char **pp)
-{
- int tok;
- const char *s;
- const char *p = *pp;
-
- while (isspace(*p))
- p++;
- s = p;
- switch (*p++) {
- case '#':
- case 'a' ... 'z':
- case 'A' ... 'Z':
- return expr__symbol(res, p - 1, pp);
- case '0' ... '9': case '.':
- res->num = strtod(s, (char **)&p);
- tok = NUMBER;
- break;
- default:
- tok = *s;
- break;
- }
- *pp = p;
- return tok;
-}
-
-/* Caller must make sure id is allocated */
-void expr__add_id(struct parse_ctx *ctx, const char *name, double val)
-{
- int idx;
- assert(ctx->num_ids < MAX_PARSE_ID);
- idx = ctx->num_ids++;
- ctx->ids[idx].name = name;
- ctx->ids[idx].val = val;
-}
-
-void expr__ctx_init(struct parse_ctx *ctx)
-{
- ctx->num_ids = 0;
-}
-
-static bool already_seen(const char *val, const char *one, const char **other,
- int num_other)
-{
- int i;
-
- if (one && !strcasecmp(one, val))
- return true;
- for (i = 0; i < num_other; i++)
- if (!strcasecmp(other[i], val))
- return true;
- return false;
-}
-
-int expr__find_other(const char *p, const char *one, const char ***other,
- int *num_otherp)
-{
- const char *orig = p;
- int err = -1;
- int num_other;
-
- *other = malloc((EXPR_MAX_OTHER + 1) * sizeof(char *));
- if (!*other)
- return -1;
-
- num_other = 0;
- for (;;) {
- YYSTYPE val;
- int tok = expr__lex(&val, &p);
- if (tok == 0) {
- err = 0;
- break;
- }
- if (tok == ID && !already_seen(val.id, one, *other, num_other)) {
- if (num_other >= EXPR_MAX_OTHER - 1) {
- pr_debug("Too many extra events in %s\n", orig);
- break;
- }
- (*other)[num_other] = strdup(val.id);
- if (!(*other)[num_other])
- return -1;
- num_other++;
- }
- }
- (*other)[num_other] = NULL;
- *num_otherp = num_other;
- if (err) {
- *num_otherp = 0;
- free(*other);
- *other = NULL;
- }
- return err;
-}
diff --git a/tools/perf/util/header.c b/tools/perf/util/header.c
index 93ad27830e2b..acbd046bf95c 100644
--- a/tools/perf/util/header.c
+++ b/tools/perf/util/header.c
@@ -1590,6 +1590,40 @@ static void free_event_desc(struct evsel *events)
free(events);
}
+static bool perf_attr_check(struct perf_event_attr *attr)
+{
+ if (attr->__reserved_1 || attr->__reserved_2 || attr->__reserved_3) {
+ pr_warning("Reserved bits are set unexpectedly. "
+ "Please update perf tool.\n");
+ return false;
+ }
+
+ if (attr->sample_type & ~(PERF_SAMPLE_MAX-1)) {
+ pr_warning("Unknown sample type (0x%llx) is detected. "
+ "Please update perf tool.\n",
+ attr->sample_type);
+ return false;
+ }
+
+ if (attr->read_format & ~(PERF_FORMAT_MAX-1)) {
+ pr_warning("Unknown read format (0x%llx) is detected. "
+ "Please update perf tool.\n",
+ attr->read_format);
+ return false;
+ }
+
+ if ((attr->sample_type & PERF_SAMPLE_BRANCH_STACK) &&
+ (attr->branch_sample_type & ~(PERF_SAMPLE_BRANCH_MAX-1))) {
+ pr_warning("Unknown branch sample type (0x%llx) is detected. "
+ "Please update perf tool.\n",
+ attr->branch_sample_type);
+
+ return false;
+ }
+
+ return true;
+}
+
static struct evsel *read_event_desc(struct feat_fd *ff)
{
struct evsel *evsel, *events = NULL;
@@ -1634,6 +1668,9 @@ static struct evsel *read_event_desc(struct feat_fd *ff)
memcpy(&evsel->core.attr, buf, msz);
+ if (!perf_attr_check(&evsel->core.attr))
+ goto error;
+
if (do_read_u32(ff, &nr))
goto error;
@@ -2922,7 +2959,7 @@ int perf_header__fprintf_info(struct perf_session *session, FILE *fp, bool full)
if (ret == -1)
return -1;
- stctime = st.st_ctime;
+ stctime = st.st_mtime;
fprintf(fp, "# captured on : %s", ctime(&stctime));
fprintf(fp, "# header version : %u\n", header->version);
diff --git a/tools/perf/util/hist.c b/tools/perf/util/hist.c
index ca5a8f4d007e..283a69ff6a3d 100644
--- a/tools/perf/util/hist.c
+++ b/tools/perf/util/hist.c
@@ -10,6 +10,7 @@
#include "mem-events.h"
#include "session.h"
#include "namespaces.h"
+#include "cgroup.h"
#include "sort.h"
#include "units.h"
#include "evlist.h"
@@ -194,6 +195,7 @@ void hists__calc_col_len(struct hists *hists, struct hist_entry *h)
hists__set_unres_dso_col_len(hists, HISTC_MEM_DADDR_DSO);
}
+ hists__new_col_len(hists, HISTC_CGROUP, 6);
hists__new_col_len(hists, HISTC_CGROUP_ID, 20);
hists__new_col_len(hists, HISTC_CPU, 3);
hists__new_col_len(hists, HISTC_SOCKET, 6);
@@ -222,6 +224,16 @@ void hists__calc_col_len(struct hists *hists, struct hist_entry *h)
if (h->trace_output)
hists__new_col_len(hists, HISTC_TRACE, strlen(h->trace_output));
+
+ if (h->cgroup) {
+ const char *cgrp_name = "unknown";
+ struct cgroup *cgrp = cgroup__find(h->ms.maps->machine->env,
+ h->cgroup);
+ if (cgrp != NULL)
+ cgrp_name = cgrp->name;
+
+ hists__new_col_len(hists, HISTC_CGROUP, strlen(cgrp_name));
+ }
}
void hists__output_recalc_col_len(struct hists *hists, int max_rows)
@@ -691,6 +703,7 @@ __hists__add_entry(struct hists *hists,
.dev = ns ? ns->link_info[CGROUP_NS_INDEX].dev : 0,
.ino = ns ? ns->link_info[CGROUP_NS_INDEX].ino : 0,
},
+ .cgroup = sample->cgroup,
.ms = {
.maps = al->maps,
.map = al->map,
@@ -2584,9 +2597,10 @@ void hist__account_cycles(struct branch_stack *bs, struct addr_location *al,
u64 *total_cycles)
{
struct branch_info *bi;
+ struct branch_entry *entries = perf_sample__branch_entries(sample);
/* If we have branch cycles always annotate them. */
- if (bs && bs->nr && bs->entries[0].flags.cycles) {
+ if (bs && bs->nr && entries[0].flags.cycles) {
int i;
bi = sample__resolve_bstack(sample, al);
diff --git a/tools/perf/util/hist.h b/tools/perf/util/hist.h
index 45286900aacb..4141295a66fa 100644
--- a/tools/perf/util/hist.h
+++ b/tools/perf/util/hist.h
@@ -38,6 +38,7 @@ enum hist_column {
HISTC_THREAD,
HISTC_COMM,
HISTC_CGROUP_ID,
+ HISTC_CGROUP,
HISTC_PARENT,
HISTC_CPU,
HISTC_SOCKET,
@@ -339,10 +340,10 @@ static inline void perf_hpp__prepend_sort_field(struct perf_hpp_fmt *format)
list_for_each_entry_safe(format, tmp, &(_list)->sorts, sort_list)
#define hists__for_each_format(hists, format) \
- perf_hpp_list__for_each_format((hists)->hpp_list, fmt)
+ perf_hpp_list__for_each_format((hists)->hpp_list, format)
#define hists__for_each_sort_list(hists, format) \
- perf_hpp_list__for_each_sort_list((hists)->hpp_list, fmt)
+ perf_hpp_list__for_each_sort_list((hists)->hpp_list, format)
extern struct perf_hpp_fmt perf_hpp__format[];
@@ -536,6 +537,7 @@ static inline int block_hists_tui_browse(struct block_hist *bh __maybe_unused,
#define K_LEFT -1000
#define K_RIGHT -2000
#define K_SWITCH_INPUT_DATA -3000
+#define K_RELOAD -4000
#endif
unsigned int hists__sort_list_width(struct hists *hists);
diff --git a/tools/perf/util/intel-pt.c b/tools/perf/util/intel-pt.c
index 33cf8928cf05..23c8289c2472 100644
--- a/tools/perf/util/intel-pt.c
+++ b/tools/perf/util/intel-pt.c
@@ -1295,6 +1295,7 @@ static int intel_pt_synth_branch_sample(struct intel_pt_queue *ptq)
struct perf_sample sample = { .ip = 0, };
struct dummy_branch_stack {
u64 nr;
+ u64 hw_idx;
struct branch_entry entries;
} dummy_bs;
@@ -1316,6 +1317,7 @@ static int intel_pt_synth_branch_sample(struct intel_pt_queue *ptq)
if (pt->synth_opts.last_branch && sort__mode == SORT_MODE__BRANCH) {
dummy_bs = (struct dummy_branch_stack){
.nr = 1,
+ .hw_idx = -1ULL,
.entries = {
.from = sample.ip,
.to = sample.addr,
diff --git a/tools/perf/util/llvm-utils.c b/tools/perf/util/llvm-utils.c
index eae47c2509eb..dbdffb6673fe 100644
--- a/tools/perf/util/llvm-utils.c
+++ b/tools/perf/util/llvm-utils.c
@@ -265,6 +265,8 @@ static int detect_kbuild_dir(char **kbuild_dir)
return -ENOMEM;
return 0;
}
+ pr_debug("%s: Couldn't find \"%s\", missing kernel-devel package?.\n",
+ __func__, autoconf_path);
free(autoconf_path);
return -ENOENT;
}
@@ -288,6 +290,7 @@ static const char *kinc_fetch_script =
"obj-y := dummy.o\n"
"\\$(obj)/%.o: \\$(src)/%.c\n"
"\t@echo -n \"\\$(NOSTDINC_FLAGS) \\$(LINUXINCLUDE) \\$(EXTRA_CFLAGS)\"\n"
+"\t\\$(CC) -c -o \\$@ \\$<\n"
"EOF\n"
"touch $TMPDIR/dummy.c\n"
"make -s -C $KBUILD_DIR M=$TMPDIR $KBUILD_OPTS dummy.o 2>/dev/null\n"
diff --git a/tools/perf/util/machine.c b/tools/perf/util/machine.c
index c8c5410315e8..97142e9671be 100644
--- a/tools/perf/util/machine.c
+++ b/tools/perf/util/machine.c
@@ -33,6 +33,7 @@
#include "asm/bug.h"
#include "bpf-event.h"
#include <internal/lib.h> // page_size
+#include "cgroup.h"
#include <linux/ctype.h>
#include <symbol/kallsyms.h>
@@ -654,6 +655,22 @@ int machine__process_namespaces_event(struct machine *machine __maybe_unused,
return err;
}
+int machine__process_cgroup_event(struct machine *machine,
+ union perf_event *event,
+ struct perf_sample *sample __maybe_unused)
+{
+ struct cgroup *cgrp;
+
+ if (dump_trace)
+ perf_event__fprintf_cgroup(event, stdout);
+
+ cgrp = cgroup__findnew(machine->env, event->cgroup.id, event->cgroup.path);
+ if (cgrp == NULL)
+ return -ENOMEM;
+
+ return 0;
+}
+
int machine__process_lost_event(struct machine *machine __maybe_unused,
union perf_event *event, struct perf_sample *sample __maybe_unused)
{
@@ -686,6 +703,7 @@ static struct dso *machine__findnew_module_dso(struct machine *machine,
dso__set_module_info(dso, m, machine);
dso__set_long_name(dso, strdup(filename), true);
+ dso->kernel = DSO_TYPE_KERNEL;
}
dso__get(dso);
@@ -726,9 +744,17 @@ static int machine__process_ksymbol_register(struct machine *machine,
struct map *map = maps__find(&machine->kmaps, event->ksymbol.addr);
if (!map) {
- map = dso__new_map(event->ksymbol.name);
- if (!map)
+ struct dso *dso = dso__new(event->ksymbol.name);
+
+ if (dso) {
+ dso->kernel = DSO_TYPE_KERNEL;
+ map = map__new2(0, dso);
+ }
+
+ if (!dso || !map) {
+ dso__put(dso);
return -ENOMEM;
+ }
map->start = event->ksymbol.addr;
map->end = map->start + event->ksymbol.len;
@@ -972,7 +998,6 @@ int machine__create_extra_kernel_map(struct machine *machine,
kmap = map__kmap(map);
- kmap->kmaps = &machine->kmaps;
strlcpy(kmap->name, xm->name, KMAP_NAME_LEN);
maps__insert(&machine->kmaps, map);
@@ -1082,9 +1107,6 @@ int __weak machine__create_extra_kernel_maps(struct machine *machine __maybe_unu
static int
__machine__create_kernel_maps(struct machine *machine, struct dso *kernel)
{
- struct kmap *kmap;
- struct map *map;
-
/* In case of renewal the kernel map, destroy previous one */
machine__destroy_kernel_maps(machine);
@@ -1093,14 +1115,7 @@ __machine__create_kernel_maps(struct machine *machine, struct dso *kernel)
return -1;
machine->vmlinux_map->map_ip = machine->vmlinux_map->unmap_ip = identity__map_ip;
- map = machine__kernel_map(machine);
- kmap = map__kmap(map);
- if (!kmap)
- return -1;
-
- kmap->kmaps = &machine->kmaps;
- maps__insert(&machine->kmaps, map);
-
+ maps__insert(&machine->kmaps, machine->vmlinux_map);
return 0;
}
@@ -1880,6 +1895,8 @@ int machine__process_event(struct machine *machine, union perf_event *event,
ret = machine__process_mmap_event(machine, event, sample); break;
case PERF_RECORD_NAMESPACES:
ret = machine__process_namespaces_event(machine, event, sample); break;
+ case PERF_RECORD_CGROUP:
+ ret = machine__process_cgroup_event(machine, event, sample); break;
case PERF_RECORD_MMAP2:
ret = machine__process_mmap2_event(machine, event, sample); break;
case PERF_RECORD_FORK:
@@ -2083,15 +2100,16 @@ struct branch_info *sample__resolve_bstack(struct perf_sample *sample,
{
unsigned int i;
const struct branch_stack *bs = sample->branch_stack;
+ struct branch_entry *entries = perf_sample__branch_entries(sample);
struct branch_info *bi = calloc(bs->nr, sizeof(struct branch_info));
if (!bi)
return NULL;
for (i = 0; i < bs->nr; i++) {
- ip__resolve_ams(al->thread, &bi[i].to, bs->entries[i].to);
- ip__resolve_ams(al->thread, &bi[i].from, bs->entries[i].from);
- bi[i].flags = bs->entries[i].flags;
+ ip__resolve_ams(al->thread, &bi[i].to, entries[i].to);
+ ip__resolve_ams(al->thread, &bi[i].from, entries[i].from);
+ bi[i].flags = entries[i].flags;
}
return bi;
}
@@ -2187,6 +2205,7 @@ static int resolve_lbr_callchain_sample(struct thread *thread,
/* LBR only affects the user callchain */
if (i != chain_nr) {
struct branch_stack *lbr_stack = sample->branch_stack;
+ struct branch_entry *entries = perf_sample__branch_entries(sample);
int lbr_nr = lbr_stack->nr, j, k;
bool branch;
struct branch_flags *flags;
@@ -2212,31 +2231,29 @@ static int resolve_lbr_callchain_sample(struct thread *thread,
ip = chain->ips[j];
else if (j > i + 1) {
k = j - i - 2;
- ip = lbr_stack->entries[k].from;
+ ip = entries[k].from;
branch = true;
- flags = &lbr_stack->entries[k].flags;
+ flags = &entries[k].flags;
} else {
- ip = lbr_stack->entries[0].to;
+ ip = entries[0].to;
branch = true;
- flags = &lbr_stack->entries[0].flags;
- branch_from =
- lbr_stack->entries[0].from;
+ flags = &entries[0].flags;
+ branch_from = entries[0].from;
}
} else {
if (j < lbr_nr) {
k = lbr_nr - j - 1;
- ip = lbr_stack->entries[k].from;
+ ip = entries[k].from;
branch = true;
- flags = &lbr_stack->entries[k].flags;
+ flags = &entries[k].flags;
}
else if (j > lbr_nr)
ip = chain->ips[i + 1 - (j - lbr_nr)];
else {
- ip = lbr_stack->entries[0].to;
+ ip = entries[0].to;
branch = true;
- flags = &lbr_stack->entries[0].flags;
- branch_from =
- lbr_stack->entries[0].from;
+ flags = &entries[0].flags;
+ branch_from = entries[0].from;
}
}
@@ -2283,6 +2300,7 @@ static int thread__resolve_callchain_sample(struct thread *thread,
int max_stack)
{
struct branch_stack *branch = sample->branch_stack;
+ struct branch_entry *entries = perf_sample__branch_entries(sample);
struct ip_callchain *chain = sample->callchain;
int chain_nr = 0;
u8 cpumode = PERF_RECORD_MISC_USER;
@@ -2330,7 +2348,7 @@ static int thread__resolve_callchain_sample(struct thread *thread,
for (i = 0; i < nr; i++) {
if (callchain_param.order == ORDER_CALLEE) {
- be[i] = branch->entries[i];
+ be[i] = entries[i];
if (chain == NULL)
continue;
@@ -2349,7 +2367,7 @@ static int thread__resolve_callchain_sample(struct thread *thread,
be[i].from >= chain->ips[first_call] - 8)
first_call++;
} else
- be[i] = branch->entries[branch->nr - i - 1];
+ be[i] = entries[branch->nr - i - 1];
}
memset(iter, 0, sizeof(struct iterations) * nr);
diff --git a/tools/perf/util/machine.h b/tools/perf/util/machine.h
index be0a930eca89..fa1be9ea00fa 100644
--- a/tools/perf/util/machine.h
+++ b/tools/perf/util/machine.h
@@ -128,6 +128,9 @@ int machine__process_switch_event(struct machine *machine,
int machine__process_namespaces_event(struct machine *machine,
union perf_event *event,
struct perf_sample *sample);
+int machine__process_cgroup_event(struct machine *machine,
+ union perf_event *event,
+ struct perf_sample *sample);
int machine__process_mmap_event(struct machine *machine, union perf_event *event,
struct perf_sample *sample);
int machine__process_mmap2_event(struct machine *machine, union perf_event *event,
diff --git a/tools/perf/util/map.c b/tools/perf/util/map.c
index fdd5bddb3075..53d96611e6a6 100644
--- a/tools/perf/util/map.c
+++ b/tools/perf/util/map.c
@@ -44,8 +44,8 @@ static inline int is_no_dso_memory(const char *filename)
static inline int is_android_lib(const char *filename)
{
- return !strncmp(filename, "/data/app-lib", 13) ||
- !strncmp(filename, "/system/lib", 11);
+ return strstarts(filename, "/data/app-lib/") ||
+ strstarts(filename, "/system/lib/");
}
static inline bool replace_android_lib(const char *filename, char *newfilename)
@@ -65,7 +65,7 @@ static inline bool replace_android_lib(const char *filename, char *newfilename)
app_abi_length = strlen(app_abi);
- if (!strncmp(filename, "/data/app-lib", 13)) {
+ if (strstarts(filename, "/data/app-lib/")) {
char *apk_path;
if (!app_abi_length)
@@ -89,7 +89,7 @@ static inline bool replace_android_lib(const char *filename, char *newfilename)
return true;
}
- if (!strncmp(filename, "/system/lib/", 11)) {
+ if (strstarts(filename, "/system/lib/")) {
char *ndk, *app;
const char *arch;
size_t ndk_length;
@@ -375,8 +375,13 @@ struct symbol *map__find_symbol_by_name(struct map *map, const char *name)
struct map *map__clone(struct map *from)
{
- struct map *map = memdup(from, sizeof(*map));
+ size_t size = sizeof(struct map);
+ struct map *map;
+
+ if (from->dso && from->dso->kernel)
+ size += sizeof(struct kmap);
+ map = memdup(from, size);
if (map != NULL) {
refcount_set(&map->refcnt, 1);
RB_CLEAR_NODE(&map->rb_node);
@@ -426,7 +431,7 @@ int map__fprintf_srcline(struct map *map, u64 addr, const char *prefix,
if (map && map->dso) {
char *srcline = map__srcline(map, addr, NULL);
- if (srcline != SRCLINE_UNKNOWN)
+ if (strncmp(srcline, SRCLINE_UNKNOWN, strlen(SRCLINE_UNKNOWN)) != 0)
ret = fprintf(fp, "%s%s", prefix, srcline);
free_srcline(srcline);
}
@@ -538,6 +543,16 @@ void maps__insert(struct maps *maps, struct map *map)
__maps__insert(maps, map);
++maps->nr_maps;
+ if (map->dso && map->dso->kernel) {
+ struct kmap *kmap = map__kmap(map);
+
+ if (kmap)
+ kmap->kmaps = maps;
+ else
+ pr_err("Internal error: kernel dso with non kernel map\n");
+ }
+
+
/*
* If we already performed some search by name, then we need to add the just
* inserted map and resort.
@@ -549,6 +564,7 @@ void maps__insert(struct maps *maps, struct map *map)
if (maps_by_name == NULL) {
__maps__free_maps_by_name(maps);
+ up_write(&maps->lock);
return;
}
diff --git a/tools/perf/util/metricgroup.c b/tools/perf/util/metricgroup.c
index 02aee946b6c1..926449a7cdbf 100644
--- a/tools/perf/util/metricgroup.c
+++ b/tools/perf/util/metricgroup.c
@@ -22,6 +22,8 @@
#include <linux/string.h>
#include <linux/zalloc.h>
#include <subcmd/parse-options.h>
+#include <api/fs/fs.h>
+#include "util.h"
struct metric_event *metricgroup__lookup(struct rblist *metric_events,
struct evsel *evsel,
@@ -93,13 +95,16 @@ struct egroup {
static struct evsel *find_evsel_group(struct evlist *perf_evlist,
const char **ids,
int idnum,
- struct evsel **metric_events)
+ struct evsel **metric_events,
+ bool *evlist_used)
{
struct evsel *ev;
- int i = 0;
+ int i = 0, j = 0;
bool leader_found;
evlist__for_each_entry (perf_evlist, ev) {
+ if (evlist_used[j++])
+ continue;
if (!strcmp(ev->name, ids[i])) {
if (!metric_events[i])
metric_events[i] = ev;
@@ -107,22 +112,17 @@ static struct evsel *find_evsel_group(struct evlist *perf_evlist,
if (i == idnum)
break;
} else {
- if (i + 1 == idnum) {
- /* Discard the whole match and start again */
- i = 0;
- memset(metric_events, 0,
- sizeof(struct evsel *) * idnum);
- continue;
- }
-
- if (!strcmp(ev->name, ids[i]))
- metric_events[i] = ev;
- else {
- /* Discard the whole match and start again */
- i = 0;
- memset(metric_events, 0,
- sizeof(struct evsel *) * idnum);
- continue;
+ /* Discard the whole match and start again */
+ i = 0;
+ memset(metric_events, 0,
+ sizeof(struct evsel *) * idnum);
+
+ if (!strcmp(ev->name, ids[i])) {
+ if (!metric_events[i])
+ metric_events[i] = ev;
+ i++;
+ if (i == idnum)
+ break;
}
}
}
@@ -144,7 +144,10 @@ static struct evsel *find_evsel_group(struct evlist *perf_evlist,
!strcmp(ev->name, metric_events[i]->name)) {
ev->metric_leader = metric_events[i];
}
+ j++;
}
+ ev = metric_events[i];
+ evlist_used[ev->idx] = true;
}
return metric_events[0];
@@ -160,6 +163,13 @@ static int metricgroup__setup_events(struct list_head *groups,
int ret = 0;
struct egroup *eg;
struct evsel *evsel;
+ bool *evlist_used;
+
+ evlist_used = calloc(perf_evlist->core.nr_entries, sizeof(bool));
+ if (!evlist_used) {
+ ret = -ENOMEM;
+ return ret;
+ }
list_for_each_entry (eg, groups, nd) {
struct evsel **metric_events;
@@ -170,7 +180,7 @@ static int metricgroup__setup_events(struct list_head *groups,
break;
}
evsel = find_evsel_group(perf_evlist, eg->ids, eg->idnum,
- metric_events);
+ metric_events, evlist_used);
if (!evsel) {
pr_debug("Cannot resolve %s: %s\n",
eg->metric_name, eg->metric_expr);
@@ -194,6 +204,9 @@ static int metricgroup__setup_events(struct list_head *groups,
expr->metric_events = metric_events;
list_add(&expr->nd, &me->head);
}
+
+ free(evlist_used);
+
return ret;
}
@@ -399,13 +412,85 @@ void metricgroup__print(bool metrics, bool metricgroups, char *filter,
strlist__delete(metriclist);
}
+static void metricgroup__add_metric_weak_group(struct strbuf *events,
+ const char **ids,
+ int idnum)
+{
+ bool no_group = false;
+ int i;
+
+ for (i = 0; i < idnum; i++) {
+ pr_debug("found event %s\n", ids[i]);
+ /*
+ * Duration time maps to a software event and can make
+ * groups not count. Always use it outside a
+ * group.
+ */
+ if (!strcmp(ids[i], "duration_time")) {
+ if (i > 0)
+ strbuf_addf(events, "}:W,");
+ strbuf_addf(events, "duration_time");
+ no_group = true;
+ continue;
+ }
+ strbuf_addf(events, "%s%s",
+ i == 0 || no_group ? "{" : ",",
+ ids[i]);
+ no_group = false;
+ }
+ if (!no_group)
+ strbuf_addf(events, "}:W");
+}
+
+static void metricgroup__add_metric_non_group(struct strbuf *events,
+ const char **ids,
+ int idnum)
+{
+ int i;
+
+ for (i = 0; i < idnum; i++)
+ strbuf_addf(events, ",%s", ids[i]);
+}
+
+static void metricgroup___watchdog_constraint_hint(const char *name, bool foot)
+{
+ static bool violate_nmi_constraint;
+
+ if (!foot) {
+ pr_warning("Splitting metric group %s into standalone metrics.\n", name);
+ violate_nmi_constraint = true;
+ return;
+ }
+
+ if (!violate_nmi_constraint)
+ return;
+
+ pr_warning("Try disabling the NMI watchdog to comply NO_NMI_WATCHDOG metric constraint:\n"
+ " echo 0 > /proc/sys/kernel/nmi_watchdog\n"
+ " perf stat ...\n"
+ " echo 1 > /proc/sys/kernel/nmi_watchdog\n");
+}
+
+static bool metricgroup__has_constraint(struct pmu_event *pe)
+{
+ if (!pe->metric_constraint)
+ return false;
+
+ if (!strcmp(pe->metric_constraint, "NO_NMI_WATCHDOG") &&
+ sysctl__nmi_watchdog_enabled()) {
+ metricgroup___watchdog_constraint_hint(pe->metric_name, false);
+ return true;
+ }
+
+ return false;
+}
+
static int metricgroup__add_metric(const char *metric, struct strbuf *events,
struct list_head *group_list)
{
struct pmu_events_map *map = perf_pmu__find_map(NULL);
struct pmu_event *pe;
- int ret = -EINVAL;
- int i, j;
+ int i, ret = -EINVAL;
if (!map)
return 0;
@@ -422,7 +507,6 @@ static int metricgroup__add_metric(const char *metric, struct strbuf *events,
const char **ids;
int idnum;
struct egroup *eg;
- bool no_group = false;
pr_debug("metric expr %s for %s\n", pe->metric_expr, pe->metric_name);
@@ -431,27 +515,11 @@ static int metricgroup__add_metric(const char *metric, struct strbuf *events,
continue;
if (events->len > 0)
strbuf_addf(events, ",");
- for (j = 0; j < idnum; j++) {
- pr_debug("found event %s\n", ids[j]);
- /*
- * Duration time maps to a software event and can make
- * groups not count. Always use it outside a
- * group.
- */
- if (!strcmp(ids[j], "duration_time")) {
- if (j > 0)
- strbuf_addf(events, "}:W,");
- strbuf_addf(events, "duration_time");
- no_group = true;
- continue;
- }
- strbuf_addf(events, "%s%s",
- j == 0 || no_group ? "{" : ",",
- ids[j]);
- no_group = false;
- }
- if (!no_group)
- strbuf_addf(events, "}:W");
+
+ if (metricgroup__has_constraint(pe))
+ metricgroup__add_metric_non_group(events, ids, idnum);
+ else
+ metricgroup__add_metric_weak_group(events, ids, idnum);
eg = malloc(sizeof(struct egroup));
if (!eg) {
@@ -493,6 +561,10 @@ static int metricgroup__add_metric_list(const char *list, struct strbuf *events,
}
}
free(nlist);
+
+ if (!ret)
+ metricgroup___watchdog_constraint_hint(NULL, true);
+
return ret;
}
diff --git a/tools/perf/util/mmap.c b/tools/perf/util/mmap.c
index 063d1b93c53d..ab7108d22428 100644
--- a/tools/perf/util/mmap.c
+++ b/tools/perf/util/mmap.c
@@ -23,6 +23,18 @@
#include "mmap.h"
#include "../perf.h"
#include <internal/lib.h> /* page_size */
+#include <linux/bitmap.h>
+
+#define MASK_SIZE 1023
+void mmap_cpu_mask__scnprintf(struct mmap_cpu_mask *mask, const char *tag)
+{
+ char buf[MASK_SIZE + 1];
+ size_t len;
+
+ len = bitmap_scnprintf(mask->bits, mask->nbits, buf, MASK_SIZE);
+ buf[len] = '\0';
+ pr_debug("%p: %s mask[%zd]: %s\n", mask, tag, mask->nbits, buf);
+}
size_t mmap__mmap_len(struct mmap *map)
{
@@ -86,20 +98,29 @@ static int perf_mmap__aio_bind(struct mmap *map, int idx, int cpu, int affinity)
{
void *data;
size_t mmap_len;
- unsigned long node_mask;
+ unsigned long *node_mask;
+ unsigned long node_index;
+ int err = 0;
if (affinity != PERF_AFFINITY_SYS && cpu__max_node() > 1) {
data = map->aio.data[idx];
mmap_len = mmap__mmap_len(map);
- node_mask = 1UL << cpu__get_node(cpu);
- if (mbind(data, mmap_len, MPOL_BIND, &node_mask, 1, 0)) {
- pr_err("Failed to bind [%p-%p] AIO buffer to node %d: error %m\n",
- data, data + mmap_len, cpu__get_node(cpu));
+ node_index = cpu__get_node(cpu);
+ node_mask = bitmap_alloc(node_index + 1);
+ if (!node_mask) {
+ pr_err("Failed to allocate node mask for mbind: error %m\n");
return -1;
}
+ set_bit(node_index, node_mask);
+ if (mbind(data, mmap_len, MPOL_BIND, node_mask, node_index + 1 + 1, 0)) {
+ pr_err("Failed to bind [%p-%p] AIO buffer to node %lu: error %m\n",
+ data, data + mmap_len, node_index);
+ err = -1;
+ }
+ bitmap_free(node_mask);
}
- return 0;
+ return err;
}
#else /* !HAVE_LIBNUMA_SUPPORT */
static int perf_mmap__aio_alloc(struct mmap *map, int idx)
@@ -207,6 +228,8 @@ static void perf_mmap__aio_munmap(struct mmap *map __maybe_unused)
void mmap__munmap(struct mmap *map)
{
+ bitmap_free(map->affinity_mask.bits);
+
perf_mmap__aio_munmap(map);
if (map->data != NULL) {
munmap(map->data, mmap__mmap_len(map));
@@ -215,7 +238,7 @@ void mmap__munmap(struct mmap *map)
auxtrace_mmap__munmap(&map->auxtrace_mmap);
}
-static void build_node_mask(int node, cpu_set_t *mask)
+static void build_node_mask(int node, struct mmap_cpu_mask *mask)
{
int c, cpu, nr_cpus;
const struct perf_cpu_map *cpu_map = NULL;
@@ -228,17 +251,23 @@ static void build_node_mask(int node, cpu_set_t *mask)
for (c = 0; c < nr_cpus; c++) {
cpu = cpu_map->map[c]; /* map c index to online cpu index */
if (cpu__get_node(cpu) == node)
- CPU_SET(cpu, mask);
+ set_bit(cpu, mask->bits);
}
}
-static void perf_mmap__setup_affinity_mask(struct mmap *map, struct mmap_params *mp)
+static int perf_mmap__setup_affinity_mask(struct mmap *map, struct mmap_params *mp)
{
- CPU_ZERO(&map->affinity_mask);
+ map->affinity_mask.nbits = cpu__max_cpu();
+ map->affinity_mask.bits = bitmap_alloc(map->affinity_mask.nbits);
+ if (!map->affinity_mask.bits)
+ return -1;
+
if (mp->affinity == PERF_AFFINITY_NODE && cpu__max_node() > 1)
build_node_mask(cpu__get_node(map->core.cpu), &map->affinity_mask);
else if (mp->affinity == PERF_AFFINITY_CPU)
- CPU_SET(map->core.cpu, &map->affinity_mask);
+ set_bit(map->core.cpu, map->affinity_mask.bits);
+
+ return 0;
}
int mmap__mmap(struct mmap *map, struct mmap_params *mp, int fd, int cpu)
@@ -249,7 +278,15 @@ int mmap__mmap(struct mmap *map, struct mmap_params *mp, int fd, int cpu)
return -1;
}
- perf_mmap__setup_affinity_mask(map, mp);
+ if (mp->affinity != PERF_AFFINITY_SYS &&
+ perf_mmap__setup_affinity_mask(map, mp)) {
+ pr_debug2("failed to alloc mmap affinity mask, error %d\n",
+ errno);
+ return -1;
+ }
+
+ if (verbose == 2)
+ mmap_cpu_mask__scnprintf(&map->affinity_mask, "mmap");
map->core.flush = mp->flush;
diff --git a/tools/perf/util/mmap.h b/tools/perf/util/mmap.h
index bee4e83f7109..9d5f589f02ae 100644
--- a/tools/perf/util/mmap.h
+++ b/tools/perf/util/mmap.h
@@ -15,6 +15,15 @@
#include "event.h"
struct aiocb;
+
+struct mmap_cpu_mask {
+ unsigned long *bits;
+ size_t nbits;
+};
+
+#define MMAP_CPU_MASK_BYTES(m) \
+ (BITS_TO_LONGS(((struct mmap_cpu_mask *)m)->nbits) * sizeof(unsigned long))
+
/**
* struct mmap - perf's ring buffer mmap details
*
@@ -31,7 +40,7 @@ struct mmap {
int nr_cblocks;
} aio;
#endif
- cpu_set_t affinity_mask;
+ struct mmap_cpu_mask affinity_mask;
void *data;
int comp_level;
};
@@ -52,4 +61,6 @@ int perf_mmap__push(struct mmap *md, void *to,
size_t mmap__mmap_len(struct mmap *map);
+void mmap_cpu_mask__scnprintf(struct mmap_cpu_mask *mask, const char *tag);
+
#endif /*__PERF_MMAP_H */
diff --git a/tools/perf/util/parse-events.c b/tools/perf/util/parse-events.c
index ed7c008b9c8b..10107747b361 100644
--- a/tools/perf/util/parse-events.c
+++ b/tools/perf/util/parse-events.c
@@ -257,21 +257,15 @@ struct tracepoint_path *tracepoint_id_to_path(u64 config)
path = zalloc(sizeof(*path));
if (!path)
return NULL;
- path->system = malloc(MAX_EVENT_LENGTH);
- if (!path->system) {
+ if (asprintf(&path->system, "%.*s", MAX_EVENT_LENGTH, sys_dirent->d_name) < 0) {
free(path);
return NULL;
}
- path->name = malloc(MAX_EVENT_LENGTH);
- if (!path->name) {
+ if (asprintf(&path->name, "%.*s", MAX_EVENT_LENGTH, evt_dirent->d_name) < 0) {
zfree(&path->system);
free(path);
return NULL;
}
- strncpy(path->system, sys_dirent->d_name,
- MAX_EVENT_LENGTH);
- strncpy(path->name, evt_dirent->d_name,
- MAX_EVENT_LENGTH);
return path;
}
}
@@ -1219,8 +1213,7 @@ static int config_attr(struct perf_event_attr *attr,
static int get_config_terms(struct list_head *head_config,
struct list_head *head_terms __maybe_unused)
{
-#define ADD_CONFIG_TERM(__type, __name, __val) \
-do { \
+#define ADD_CONFIG_TERM(__type, __weak) \
struct perf_evsel_config_term *__t; \
\
__t = zalloc(sizeof(*__t)); \
@@ -1229,9 +1222,24 @@ do { \
\
INIT_LIST_HEAD(&__t->list); \
__t->type = PERF_EVSEL__CONFIG_TERM_ ## __type; \
+ __t->weak = __weak; \
+ list_add_tail(&__t->list, head_terms)
+
+#define ADD_CONFIG_TERM_VAL(__type, __name, __val, __weak) \
+do { \
+ ADD_CONFIG_TERM(__type, __weak); \
__t->val.__name = __val; \
- __t->weak = term->weak; \
- list_add_tail(&__t->list, head_terms); \
+} while (0)
+
+#define ADD_CONFIG_TERM_STR(__type, __val, __weak) \
+do { \
+ ADD_CONFIG_TERM(__type, __weak); \
+ __t->val.str = strdup(__val); \
+ if (!__t->val.str) { \
+ zfree(&__t); \
+ return -ENOMEM; \
+ } \
+ __t->free_str = true; \
} while (0)
struct parse_events_term *term;
@@ -1239,53 +1247,62 @@ do { \
list_for_each_entry(term, head_config, list) {
switch (term->type_term) {
case PARSE_EVENTS__TERM_TYPE_SAMPLE_PERIOD:
- ADD_CONFIG_TERM(PERIOD, period, term->val.num);
+ ADD_CONFIG_TERM_VAL(PERIOD, period, term->val.num, term->weak);
break;
case PARSE_EVENTS__TERM_TYPE_SAMPLE_FREQ:
- ADD_CONFIG_TERM(FREQ, freq, term->val.num);
+ ADD_CONFIG_TERM_VAL(FREQ, freq, term->val.num, term->weak);
break;
case PARSE_EVENTS__TERM_TYPE_TIME:
- ADD_CONFIG_TERM(TIME, time, term->val.num);
+ ADD_CONFIG_TERM_VAL(TIME, time, term->val.num, term->weak);
break;
case PARSE_EVENTS__TERM_TYPE_CALLGRAPH:
- ADD_CONFIG_TERM(CALLGRAPH, callgraph, term->val.str);
+ ADD_CONFIG_TERM_STR(CALLGRAPH, term->val.str, term->weak);
break;
case PARSE_EVENTS__TERM_TYPE_BRANCH_SAMPLE_TYPE:
- ADD_CONFIG_TERM(BRANCH, branch, term->val.str);
+ ADD_CONFIG_TERM_STR(BRANCH, term->val.str, term->weak);
break;
case PARSE_EVENTS__TERM_TYPE_STACKSIZE:
- ADD_CONFIG_TERM(STACK_USER, stack_user, term->val.num);
+ ADD_CONFIG_TERM_VAL(STACK_USER, stack_user,
+ term->val.num, term->weak);
break;
case PARSE_EVENTS__TERM_TYPE_INHERIT:
- ADD_CONFIG_TERM(INHERIT, inherit, term->val.num ? 1 : 0);
+ ADD_CONFIG_TERM_VAL(INHERIT, inherit,
+ term->val.num ? 1 : 0, term->weak);
break;
case PARSE_EVENTS__TERM_TYPE_NOINHERIT:
- ADD_CONFIG_TERM(INHERIT, inherit, term->val.num ? 0 : 1);
+ ADD_CONFIG_TERM_VAL(INHERIT, inherit,
+ term->val.num ? 0 : 1, term->weak);
break;
case PARSE_EVENTS__TERM_TYPE_MAX_STACK:
- ADD_CONFIG_TERM(MAX_STACK, max_stack, term->val.num);
+ ADD_CONFIG_TERM_VAL(MAX_STACK, max_stack,
+ term->val.num, term->weak);
break;
case PARSE_EVENTS__TERM_TYPE_MAX_EVENTS:
- ADD_CONFIG_TERM(MAX_EVENTS, max_events, term->val.num);
+ ADD_CONFIG_TERM_VAL(MAX_EVENTS, max_events,
+ term->val.num, term->weak);
break;
case PARSE_EVENTS__TERM_TYPE_OVERWRITE:
- ADD_CONFIG_TERM(OVERWRITE, overwrite, term->val.num ? 1 : 0);
+ ADD_CONFIG_TERM_VAL(OVERWRITE, overwrite,
+ term->val.num ? 1 : 0, term->weak);
break;
case PARSE_EVENTS__TERM_TYPE_NOOVERWRITE:
- ADD_CONFIG_TERM(OVERWRITE, overwrite, term->val.num ? 0 : 1);
+ ADD_CONFIG_TERM_VAL(OVERWRITE, overwrite,
+ term->val.num ? 0 : 1, term->weak);
break;
case PARSE_EVENTS__TERM_TYPE_DRV_CFG:
- ADD_CONFIG_TERM(DRV_CFG, drv_cfg, term->val.str);
+ ADD_CONFIG_TERM_STR(DRV_CFG, term->val.str, term->weak);
break;
case PARSE_EVENTS__TERM_TYPE_PERCORE:
- ADD_CONFIG_TERM(PERCORE, percore,
- term->val.num ? true : false);
+ ADD_CONFIG_TERM_VAL(PERCORE, percore,
+ term->val.num ? true : false, term->weak);
break;
case PARSE_EVENTS__TERM_TYPE_AUX_OUTPUT:
- ADD_CONFIG_TERM(AUX_OUTPUT, aux_output, term->val.num ? 1 : 0);
+ ADD_CONFIG_TERM_VAL(AUX_OUTPUT, aux_output,
+ term->val.num ? 1 : 0, term->weak);
break;
case PARSE_EVENTS__TERM_TYPE_AUX_SAMPLE_SIZE:
- ADD_CONFIG_TERM(AUX_SAMPLE_SIZE, aux_sample_size, term->val.num);
+ ADD_CONFIG_TERM_VAL(AUX_SAMPLE_SIZE, aux_sample_size,
+ term->val.num, term->weak);
break;
default:
break;
@@ -1322,7 +1339,7 @@ static int get_config_chgs(struct perf_pmu *pmu, struct list_head *head_config,
}
if (bits)
- ADD_CONFIG_TERM(CFG_CHG, cfg_chg, bits);
+ ADD_CONFIG_TERM_VAL(CFG_CHG, cfg_chg, bits, false);
#undef ADD_CONFIG_TERM
return 0;
@@ -1432,7 +1449,7 @@ int parse_events_add_pmu(struct parse_events_state *parse_state,
evsel = __add_event(list, &parse_state->idx, &attr, NULL, pmu, NULL,
auto_merge_stats, NULL);
if (evsel) {
- evsel->pmu_name = name;
+ evsel->pmu_name = name ? strdup(name) : NULL;
evsel->use_uncore_alias = use_uncore_alias;
return 0;
} else {
@@ -1480,7 +1497,7 @@ int parse_events_add_pmu(struct parse_events_state *parse_state,
evsel->snapshot = info.snapshot;
evsel->metric_expr = info.metric_expr;
evsel->metric_name = info.metric_name;
- evsel->pmu_name = name;
+ evsel->pmu_name = name ? strdup(name) : NULL;
evsel->use_uncore_alias = use_uncore_alias;
evsel->percore = config_term_percore(&evsel->config_terms);
}
@@ -1530,7 +1547,7 @@ int parse_events_multi_pmu_add(struct parse_events_state *parse_state,
if (!parse_events_add_pmu(parse_state, list,
pmu->name, head,
true, true)) {
- pr_debug("%s -> %s/%s/\n", config,
+ pr_debug("%s -> %s/%s/\n", str,
pmu->name, alias->str);
ok++;
}
diff --git a/tools/perf/util/parse-events.l b/tools/perf/util/parse-events.l
index 7b1c8ee537cf..baa48f28d57d 100644
--- a/tools/perf/util/parse-events.l
+++ b/tools/perf/util/parse-events.l
@@ -342,11 +342,13 @@ bpf-output { return sym(yyscanner, PERF_TYPE_SOFTWARE, PERF_COUNT_SW_BPF_OUT
* Because the prefix cycles is mixed up with cpu-cycles.
* loads and stores are mixed up with cache event
*/
-cycles-ct { return str(yyscanner, PE_KERNEL_PMU_EVENT); }
-cycles-t { return str(yyscanner, PE_KERNEL_PMU_EVENT); }
-mem-loads { return str(yyscanner, PE_KERNEL_PMU_EVENT); }
-mem-stores { return str(yyscanner, PE_KERNEL_PMU_EVENT); }
-topdown-[a-z-]+ { return str(yyscanner, PE_KERNEL_PMU_EVENT); }
+cycles-ct |
+cycles-t |
+mem-loads |
+mem-stores |
+topdown-[a-z-]+ |
+tx-capacity-[a-z-]+ |
+el-capacity-[a-z-]+ { return str(yyscanner, PE_KERNEL_PMU_EVENT); }
L1-dcache|l1-d|l1d|L1-data |
L1-icache|l1-i|l1i|L1-instruction |
diff --git a/tools/perf/util/parse-events.y b/tools/perf/util/parse-events.y
index e2eea4e601b4..94f8bcd83582 100644
--- a/tools/perf/util/parse-events.y
+++ b/tools/perf/util/parse-events.y
@@ -1,4 +1,4 @@
-%pure-parser
+%define api.pure full
%parse-param {void *_parse_state}
%parse-param {void *scanner}
%lex-param {void* scanner}
diff --git a/tools/perf/util/perf_event_attr_fprintf.c b/tools/perf/util/perf_event_attr_fprintf.c
index 651203126c71..b94fa07f5d32 100644
--- a/tools/perf/util/perf_event_attr_fprintf.c
+++ b/tools/perf/util/perf_event_attr_fprintf.c
@@ -35,6 +35,7 @@ static void __p_sample_type(char *buf, size_t size, u64 value)
bit_name(BRANCH_STACK), bit_name(REGS_USER), bit_name(STACK_USER),
bit_name(IDENTIFIER), bit_name(REGS_INTR), bit_name(DATA_SRC),
bit_name(WEIGHT), bit_name(PHYS_ADDR), bit_name(AUX),
+ bit_name(CGROUP),
{ .name = NULL, }
};
#undef bit_name
@@ -50,6 +51,7 @@ static void __p_branch_sample_type(char *buf, size_t size, u64 value)
bit_name(ABORT_TX), bit_name(IN_TX), bit_name(NO_TX),
bit_name(COND), bit_name(CALL_STACK), bit_name(IND_JUMP),
bit_name(CALL), bit_name(NO_FLAGS), bit_name(NO_CYCLES),
+ bit_name(HW_INDEX),
{ .name = NULL, }
};
#undef bit_name
@@ -131,6 +133,7 @@ int perf_event_attr__fprintf(FILE *fp, struct perf_event_attr *attr,
PRINT_ATTRf(ksymbol, p_unsigned);
PRINT_ATTRf(bpf_event, p_unsigned);
PRINT_ATTRf(aux_output, p_unsigned);
+ PRINT_ATTRf(cgroup, p_unsigned);
PRINT_ATTRn("{ wakeup_events, wakeup_watermark }", wakeup_events, p_unsigned);
PRINT_ATTRf(bp_type, p_unsigned);
diff --git a/tools/perf/util/pmu.c b/tools/perf/util/pmu.c
index 8b99fd312aae..ef6a63f3d386 100644
--- a/tools/perf/util/pmu.c
+++ b/tools/perf/util/pmu.c
@@ -21,7 +21,6 @@
#include "pmu.h"
#include "parse-events.h"
#include "header.h"
-#include "pmu-events/pmu-events.h"
#include "string2.h"
#include "strbuf.h"
#include "fncache.h"
@@ -699,7 +698,7 @@ struct pmu_events_map *perf_pmu__find_map(struct perf_pmu *pmu)
return map;
}
-static bool pmu_uncore_alias_match(const char *pmu_name, const char *name)
+bool pmu_uncore_alias_match(const char *pmu_name, const char *name)
{
char *tmp = NULL, *tok, *str;
bool res;
@@ -744,16 +743,11 @@ out:
* to the current running CPU. Then, add all PMU events from that table
* as aliases.
*/
-static void pmu_add_cpu_aliases(struct list_head *head, struct perf_pmu *pmu)
+void pmu_add_cpu_aliases_map(struct list_head *head, struct perf_pmu *pmu,
+ struct pmu_events_map *map)
{
int i;
- struct pmu_events_map *map;
const char *name = pmu->name;
-
- map = perf_pmu__find_map(pmu);
- if (!map)
- return;
-
/*
* Found a matching PMU events table. Create aliases
*/
@@ -788,6 +782,17 @@ new_alias:
}
}
+static void pmu_add_cpu_aliases(struct list_head *head, struct perf_pmu *pmu)
+{
+ struct pmu_events_map *map;
+
+ map = perf_pmu__find_map(pmu);
+ if (!map)
+ return;
+
+ pmu_add_cpu_aliases_map(head, pmu, map);
+}
+
struct perf_event_attr * __weak
perf_pmu__get_default_config(struct perf_pmu *pmu __maybe_unused)
{
@@ -979,12 +984,11 @@ static int pmu_resolve_param_term(struct parse_events_term *term,
struct parse_events_term *t;
list_for_each_entry(t, head_terms, list) {
- if (t->type_val == PARSE_EVENTS__TERM_TYPE_NUM) {
- if (!strcmp(t->config, term->config)) {
- t->used = true;
- *value = t->val.num;
- return 0;
- }
+ if (t->type_val == PARSE_EVENTS__TERM_TYPE_NUM &&
+ t->config && !strcmp(t->config, term->config)) {
+ t->used = true;
+ *value = t->val.num;
+ return 0;
}
}
@@ -1395,6 +1399,11 @@ static void wordwrap(char *s, int start, int max, int corr)
}
}
+bool is_pmu_core(const char *name)
+{
+ return !strcmp(name, "cpu") || is_arm_pmu_core(name);
+}
+
void print_pmu_events(const char *event_glob, bool name_only, bool quiet_flag,
bool long_desc, bool details_flag, bool deprecated)
{
diff --git a/tools/perf/util/pmu.h b/tools/perf/util/pmu.h
index 6737e3d5d568..5fb3f16828df 100644
--- a/tools/perf/util/pmu.h
+++ b/tools/perf/util/pmu.h
@@ -7,6 +7,7 @@
#include <linux/perf_event.h>
#include <stdbool.h>
#include "parse-events.h"
+#include "pmu-events/pmu-events.h"
struct perf_evsel_config_term;
@@ -87,6 +88,7 @@ int perf_pmu__format_parse(char *dir, struct list_head *head);
struct perf_pmu *perf_pmu__scan(struct perf_pmu *pmu);
+bool is_pmu_core(const char *name);
void print_pmu_events(const char *event_glob, bool name_only, bool quiet,
bool long_desc, bool details_flag,
bool deprecated);
@@ -97,8 +99,11 @@ int perf_pmu__scan_file(struct perf_pmu *pmu, const char *name, const char *fmt,
int perf_pmu__test(void);
struct perf_event_attr *perf_pmu__get_default_config(struct perf_pmu *pmu);
+void pmu_add_cpu_aliases_map(struct list_head *head, struct perf_pmu *pmu,
+ struct pmu_events_map *map);
struct pmu_events_map *perf_pmu__find_map(struct perf_pmu *pmu);
+bool pmu_uncore_alias_match(const char *pmu_name, const char *name);
int perf_pmu__convert_scale(const char *scale, char **end, double *sval);
diff --git a/tools/perf/util/probe-file.c b/tools/perf/util/probe-file.c
index 5003ba403345..8c852948513e 100644
--- a/tools/perf/util/probe-file.c
+++ b/tools/perf/util/probe-file.c
@@ -206,6 +206,9 @@ static struct strlist *__probe_file__get_namelist(int fd, bool include_group)
} else
ret = strlist__add(sl, tev.event);
clear_probe_trace_event(&tev);
+ /* Skip if there is same name multi-probe event in the list */
+ if (ret == -EEXIST)
+ ret = 0;
if (ret < 0)
break;
}
@@ -301,10 +304,15 @@ int probe_file__get_events(int fd, struct strfilter *filter,
p = strchr(ent->s, ':');
if ((p && strfilter__compare(filter, p + 1)) ||
strfilter__compare(filter, ent->s)) {
- strlist__add(plist, ent->s);
+ ret = strlist__add(plist, ent->s);
+ if (ret == -ENOMEM) {
+ pr_err("strlist__add failed with -ENOMEM\n");
+ goto out;
+ }
ret = 0;
}
}
+out:
strlist__delete(namelist);
return ret;
@@ -511,7 +519,11 @@ static int probe_cache__load(struct probe_cache *pcache)
ret = -EINVAL;
goto out;
}
- strlist__add(entry->tevlist, buf);
+ ret = strlist__add(entry->tevlist, buf);
+ if (ret == -ENOMEM) {
+ pr_err("strlist__add failed with -ENOMEM\n");
+ goto out;
+ }
}
}
out:
@@ -672,7 +684,12 @@ int probe_cache__add_entry(struct probe_cache *pcache,
command = synthesize_probe_trace_command(&tevs[i]);
if (!command)
goto out_err;
- strlist__add(entry->tevlist, command);
+ ret = strlist__add(entry->tevlist, command);
+ if (ret == -ENOMEM) {
+ pr_err("strlist__add failed with -ENOMEM\n");
+ goto out_err;
+ }
+
free(command);
}
list_add_tail(&entry->node, &pcache->entries);
@@ -853,9 +870,15 @@ int probe_cache__scan_sdt(struct probe_cache *pcache, const char *pathname)
break;
}
- strlist__add(entry->tevlist, buf);
+ ret = strlist__add(entry->tevlist, buf);
+
free(buf);
entry = NULL;
+
+ if (ret == -ENOMEM) {
+ pr_err("strlist__add failed with -ENOMEM\n");
+ break;
+ }
}
if (entry) {
list_del_init(&entry->node);
diff --git a/tools/perf/util/probe-finder.c b/tools/perf/util/probe-finder.c
index c470c49a804f..e4cff49384f4 100644
--- a/tools/perf/util/probe-finder.c
+++ b/tools/perf/util/probe-finder.c
@@ -303,7 +303,8 @@ static int convert_variable_type(Dwarf_Die *vr_die,
char prefix;
/* TODO: check all types */
- if (cast && strcmp(cast, "string") != 0 && strcmp(cast, "x") != 0 &&
+ if (cast && strcmp(cast, "string") != 0 && strcmp(cast, "ustring") &&
+ strcmp(cast, "x") != 0 &&
strcmp(cast, "s") != 0 && strcmp(cast, "u") != 0) {
/* Non string type is OK */
/* and respect signedness/hexadecimal cast */
@@ -636,14 +637,19 @@ static int convert_to_trace_point(Dwarf_Die *sp_die, Dwfl_Module *mod,
return -EINVAL;
}
- /* Try to get actual symbol name from symtab */
- symbol = dwfl_module_addrsym(mod, paddr, &sym, NULL);
+ if (dwarf_entrypc(sp_die, &eaddr) == 0) {
+ /* If the DIE has entrypc, use it. */
+ symbol = dwarf_diename(sp_die);
+ } else {
+ /* Try to get actual symbol name and address from symtab */
+ symbol = dwfl_module_addrsym(mod, paddr, &sym, NULL);
+ eaddr = sym.st_value;
+ }
if (!symbol) {
pr_warning("Failed to find symbol at 0x%lx\n",
(unsigned long)paddr);
return -ENOENT;
}
- eaddr = sym.st_value;
tp->offset = (unsigned long)(paddr - eaddr);
tp->address = (unsigned long)paddr;
diff --git a/tools/perf/util/python-ext-sources b/tools/perf/util/python-ext-sources
index e7279ea6043a..a9d9c142eb7c 100644
--- a/tools/perf/util/python-ext-sources
+++ b/tools/perf/util/python-ext-sources
@@ -34,3 +34,4 @@ util/string.c
util/symbol_fprintf.c
util/units.c
util/affinity.c
+util/rwsem.c
diff --git a/tools/perf/util/record.h b/tools/perf/util/record.h
index 5421fd2ad383..24316458be20 100644
--- a/tools/perf/util/record.h
+++ b/tools/perf/util/record.h
@@ -34,6 +34,7 @@ struct record_opts {
bool auxtrace_snapshot_on_exit;
bool auxtrace_sample_mode;
bool record_namespaces;
+ bool record_cgroup;
bool record_switch_events;
bool all_kernel;
bool all_user;
diff --git a/tools/perf/util/scripting-engines/trace-event-python.c b/tools/perf/util/scripting-engines/trace-event-python.c
index 80ca5d0ab7fe..2c372cf5495e 100644
--- a/tools/perf/util/scripting-engines/trace-event-python.c
+++ b/tools/perf/util/scripting-engines/trace-event-python.c
@@ -464,6 +464,7 @@ static PyObject *python_process_brstack(struct perf_sample *sample,
struct thread *thread)
{
struct branch_stack *br = sample->branch_stack;
+ struct branch_entry *entries = perf_sample__branch_entries(sample);
PyObject *pylist;
u64 i;
@@ -484,28 +485,28 @@ static PyObject *python_process_brstack(struct perf_sample *sample,
Py_FatalError("couldn't create Python dictionary");
pydict_set_item_string_decref(pyelem, "from",
- PyLong_FromUnsignedLongLong(br->entries[i].from));
+ PyLong_FromUnsignedLongLong(entries[i].from));
pydict_set_item_string_decref(pyelem, "to",
- PyLong_FromUnsignedLongLong(br->entries[i].to));
+ PyLong_FromUnsignedLongLong(entries[i].to));
pydict_set_item_string_decref(pyelem, "mispred",
- PyBool_FromLong(br->entries[i].flags.mispred));
+ PyBool_FromLong(entries[i].flags.mispred));
pydict_set_item_string_decref(pyelem, "predicted",
- PyBool_FromLong(br->entries[i].flags.predicted));
+ PyBool_FromLong(entries[i].flags.predicted));
pydict_set_item_string_decref(pyelem, "in_tx",
- PyBool_FromLong(br->entries[i].flags.in_tx));
+ PyBool_FromLong(entries[i].flags.in_tx));
pydict_set_item_string_decref(pyelem, "abort",
- PyBool_FromLong(br->entries[i].flags.abort));
+ PyBool_FromLong(entries[i].flags.abort));
pydict_set_item_string_decref(pyelem, "cycles",
- PyLong_FromUnsignedLongLong(br->entries[i].flags.cycles));
+ PyLong_FromUnsignedLongLong(entries[i].flags.cycles));
thread__find_map_fb(thread, sample->cpumode,
- br->entries[i].from, &al);
+ entries[i].from, &al);
dsoname = get_dsoname(al.map);
pydict_set_item_string_decref(pyelem, "from_dsoname",
_PyUnicode_FromString(dsoname));
thread__find_map_fb(thread, sample->cpumode,
- br->entries[i].to, &al);
+ entries[i].to, &al);
dsoname = get_dsoname(al.map);
pydict_set_item_string_decref(pyelem, "to_dsoname",
_PyUnicode_FromString(dsoname));
@@ -561,6 +562,7 @@ static PyObject *python_process_brstacksym(struct perf_sample *sample,
struct thread *thread)
{
struct branch_stack *br = sample->branch_stack;
+ struct branch_entry *entries = perf_sample__branch_entries(sample);
PyObject *pylist;
u64 i;
char bf[512];
@@ -581,22 +583,22 @@ static PyObject *python_process_brstacksym(struct perf_sample *sample,
Py_FatalError("couldn't create Python dictionary");
thread__find_symbol_fb(thread, sample->cpumode,
- br->entries[i].from, &al);
+ entries[i].from, &al);
get_symoff(al.sym, &al, true, bf, sizeof(bf));
pydict_set_item_string_decref(pyelem, "from",
_PyUnicode_FromString(bf));
thread__find_symbol_fb(thread, sample->cpumode,
- br->entries[i].to, &al);
+ entries[i].to, &al);
get_symoff(al.sym, &al, true, bf, sizeof(bf));
pydict_set_item_string_decref(pyelem, "to",
_PyUnicode_FromString(bf));
- get_br_mspred(&br->entries[i].flags, bf, sizeof(bf));
+ get_br_mspred(&entries[i].flags, bf, sizeof(bf));
pydict_set_item_string_decref(pyelem, "pred",
_PyUnicode_FromString(bf));
- if (br->entries[i].flags.in_tx) {
+ if (entries[i].flags.in_tx) {
pydict_set_item_string_decref(pyelem, "in_tx",
_PyUnicode_FromString("X"));
} else {
@@ -604,7 +606,7 @@ static PyObject *python_process_brstacksym(struct perf_sample *sample,
_PyUnicode_FromString("-"));
}
- if (br->entries[i].flags.abort) {
+ if (entries[i].flags.abort) {
pydict_set_item_string_decref(pyelem, "abort",
_PyUnicode_FromString("A"));
} else {
@@ -692,6 +694,9 @@ static int regs_map(struct regs_dump *regs, uint64_t mask, char *bf, int size)
bf[0] = 0;
+ if (!regs || !regs->regs)
+ return 0;
+
for_each_set_bit(r, (unsigned long *) &mask, sizeof(mask) * 8) {
u64 val = regs->regs[i++];
diff --git a/tools/perf/util/session.c b/tools/perf/util/session.c
index d0d7d25b23e3..0b0bfe5bef17 100644
--- a/tools/perf/util/session.c
+++ b/tools/perf/util/session.c
@@ -471,6 +471,8 @@ void perf_tool__fill_defaults(struct perf_tool *tool)
tool->comm = process_event_stub;
if (tool->namespaces == NULL)
tool->namespaces = process_event_stub;
+ if (tool->cgroup == NULL)
+ tool->cgroup = process_event_stub;
if (tool->fork == NULL)
tool->fork = process_event_stub;
if (tool->exit == NULL)
@@ -1007,6 +1009,7 @@ static void callchain__lbr_callstack_printf(struct perf_sample *sample)
{
struct ip_callchain *callchain = sample->callchain;
struct branch_stack *lbr_stack = sample->branch_stack;
+ struct branch_entry *entries = perf_sample__branch_entries(sample);
u64 kernel_callchain_nr = callchain->nr;
unsigned int i;
@@ -1043,10 +1046,10 @@ static void callchain__lbr_callstack_printf(struct perf_sample *sample)
i, callchain->ips[i]);
printf("..... %2d: %016" PRIx64 "\n",
- (int)(kernel_callchain_nr), lbr_stack->entries[0].to);
+ (int)(kernel_callchain_nr), entries[0].to);
for (i = 0; i < lbr_stack->nr; i++)
printf("..... %2d: %016" PRIx64 "\n",
- (int)(i + kernel_callchain_nr + 1), lbr_stack->entries[i].from);
+ (int)(i + kernel_callchain_nr + 1), entries[i].from);
}
}
@@ -1068,6 +1071,7 @@ static void callchain__printf(struct evsel *evsel,
static void branch_stack__printf(struct perf_sample *sample, bool callstack)
{
+ struct branch_entry *entries = perf_sample__branch_entries(sample);
uint64_t i;
printf("%s: nr:%" PRIu64 "\n",
@@ -1075,7 +1079,7 @@ static void branch_stack__printf(struct perf_sample *sample, bool callstack)
sample->branch_stack->nr);
for (i = 0; i < sample->branch_stack->nr; i++) {
- struct branch_entry *e = &sample->branch_stack->entries[i];
+ struct branch_entry *e = &entries[i];
if (!callstack) {
printf("..... %2"PRIu64": %016" PRIx64 " -> %016" PRIx64 " %hu cycles %s%s%s%s %x\n",
@@ -1434,6 +1438,8 @@ static int machines__deliver_event(struct machines *machines,
return tool->comm(tool, event, sample, machine);
case PERF_RECORD_NAMESPACES:
return tool->namespaces(tool, event, sample, machine);
+ case PERF_RECORD_CGROUP:
+ return tool->cgroup(tool, event, sample, machine);
case PERF_RECORD_FORK:
return tool->fork(tool, event, sample, machine);
case PERF_RECORD_EXIT:
diff --git a/tools/perf/util/setup.py b/tools/perf/util/setup.py
index aa344a163eaf..c5e3e9a68162 100644
--- a/tools/perf/util/setup.py
+++ b/tools/perf/util/setup.py
@@ -2,11 +2,13 @@ from os import getenv
from subprocess import Popen, PIPE
from re import sub
+cc = getenv("CC")
+cc_is_clang = b"clang version" in Popen([cc.split()[0], "-v"], stderr=PIPE).stderr.readline()
+
def clang_has_option(option):
- return [o for o in Popen(['clang', option], stderr=PIPE).stderr.readlines() if b"unknown argument" in o] == [ ]
+ return [o for o in Popen([cc, option], stderr=PIPE).stderr.readlines() if b"unknown argument" in o] == [ ]
-cc = getenv("CC")
-if cc == "clang":
+if cc_is_clang:
from distutils.sysconfig import get_config_vars
vars = get_config_vars()
for var in ('CFLAGS', 'OPT'):
@@ -19,6 +21,8 @@ if cc == "clang":
vars[var] = sub("-fstack-clash-protection", "", vars[var])
if not clang_has_option("-fstack-protector-strong"):
vars[var] = sub("-fstack-protector-strong", "", vars[var])
+ if not clang_has_option("-fno-semantic-interposition"):
+ vars[var] = sub("-fno-semantic-interposition", "", vars[var])
from distutils.core import setup, Extension
@@ -40,7 +44,7 @@ class install_lib(_install_lib):
cflags = getenv('CFLAGS', '').split()
# switch off several checks (need to be at the end of cflags list)
cflags += ['-fno-strict-aliasing', '-Wno-write-strings', '-Wno-unused-parameter', '-Wno-redundant-decls' ]
-if cc != "clang":
+if not cc_is_clang:
cflags += ['-Wno-cast-function-type' ]
src_perf = getenv('srctree') + '/tools/perf'
diff --git a/tools/perf/util/sort.c b/tools/perf/util/sort.c
index 9fcba2872130..f14cc728c358 100644
--- a/tools/perf/util/sort.c
+++ b/tools/perf/util/sort.c
@@ -12,6 +12,7 @@
#include "cacheline.h"
#include "comm.h"
#include "map.h"
+#include "maps.h"
#include "symbol.h"
#include "map_symbol.h"
#include "branch.h"
@@ -25,6 +26,8 @@
#include "mem-events.h"
#include "annotate.h"
#include "time-utils.h"
+#include "cgroup.h"
+#include "machine.h"
#include <linux/kernel.h>
#include <linux/string.h>
@@ -324,8 +327,7 @@ static int _hist_entry__sym_snprintf(struct map_symbol *ms,
return ret;
}
-static int hist_entry__sym_snprintf(struct hist_entry *he, char *bf,
- size_t size, unsigned int width)
+int hist_entry__sym_snprintf(struct hist_entry *he, char *bf, size_t size, unsigned int width)
{
return _hist_entry__sym_snprintf(&he->ms, he->ip,
he->level, bf, size, width);
@@ -635,6 +637,39 @@ struct sort_entry sort_cgroup_id = {
.se_width_idx = HISTC_CGROUP_ID,
};
+/* --sort cgroup */
+
+static int64_t
+sort__cgroup_cmp(struct hist_entry *left, struct hist_entry *right)
+{
+ return right->cgroup - left->cgroup;
+}
+
+static int hist_entry__cgroup_snprintf(struct hist_entry *he,
+ char *bf, size_t size,
+ unsigned int width __maybe_unused)
+{
+ const char *cgrp_name = "N/A";
+
+ if (he->cgroup) {
+ struct cgroup *cgrp = cgroup__find(he->ms.maps->machine->env,
+ he->cgroup);
+ if (cgrp != NULL)
+ cgrp_name = cgrp->name;
+ else
+ cgrp_name = "unknown";
+ }
+
+ return repsep_snprintf(bf, size, "%s", cgrp_name);
+}
+
+struct sort_entry sort_cgroup = {
+ .se_header = "Cgroup",
+ .se_cmp = sort__cgroup_cmp,
+ .se_snprintf = hist_entry__cgroup_snprintf,
+ .se_width_idx = HISTC_CGROUP,
+};
+
/* --sort socket */
static int64_t
@@ -870,7 +905,8 @@ static int hist_entry__sym_from_snprintf(struct hist_entry *he, char *bf,
if (he->branch_info) {
struct addr_map_symbol *from = &he->branch_info->from;
- return _hist_entry__sym_snprintf(&from->ms, from->addr, he->level, bf, size, width);
+ return _hist_entry__sym_snprintf(&from->ms, from->al_addr,
+ he->level, bf, size, width);
}
return repsep_snprintf(bf, size, "%-*.*s", width, width, "N/A");
@@ -882,7 +918,8 @@ static int hist_entry__sym_to_snprintf(struct hist_entry *he, char *bf,
if (he->branch_info) {
struct addr_map_symbol *to = &he->branch_info->to;
- return _hist_entry__sym_snprintf(&to->ms, to->addr, he->level, bf, size, width);
+ return _hist_entry__sym_snprintf(&to->ms, to->al_addr,
+ he->level, bf, size, width);
}
return repsep_snprintf(bf, size, "%-*.*s", width, width, "N/A");
@@ -1659,6 +1696,7 @@ static struct sort_dimension common_sort_dimensions[] = {
DIM(SORT_TRACE, "trace", sort_trace),
DIM(SORT_SYM_SIZE, "symbol_size", sort_sym_size),
DIM(SORT_DSO_SIZE, "dso_size", sort_dso_size),
+ DIM(SORT_CGROUP, "cgroup", sort_cgroup),
DIM(SORT_CGROUP_ID, "cgroup_id", sort_cgroup_id),
DIM(SORT_SYM_IPC_NULL, "ipc_null", sort_sym_ipc_null),
DIM(SORT_TIME, "time", sort_time),
diff --git a/tools/perf/util/sort.h b/tools/perf/util/sort.h
index 5aff9542d9b7..cfa6ac6f7d06 100644
--- a/tools/perf/util/sort.h
+++ b/tools/perf/util/sort.h
@@ -101,6 +101,7 @@ struct hist_entry {
struct thread *thread;
struct comm *comm;
struct namespace_id cgroup_id;
+ u64 cgroup;
u64 ip;
u64 transaction;
s32 socket;
@@ -164,6 +165,8 @@ static __pure inline bool hist_entry__has_callchains(struct hist_entry *he)
return he->callchain_size != 0;
}
+int hist_entry__sym_snprintf(struct hist_entry *he, char *bf, size_t size, unsigned int width);
+
static inline bool hist_entry__has_pairs(struct hist_entry *he)
{
return !list_empty(&he->pairs.node);
@@ -222,6 +225,7 @@ enum sort_type {
SORT_TRACE,
SORT_SYM_SIZE,
SORT_DSO_SIZE,
+ SORT_CGROUP,
SORT_CGROUP_ID,
SORT_SYM_IPC_NULL,
SORT_TIME,
diff --git a/tools/perf/util/srcline.c b/tools/perf/util/srcline.c
index 6ccf6f6d09df..5b7d6c16d33f 100644
--- a/tools/perf/util/srcline.c
+++ b/tools/perf/util/srcline.c
@@ -193,16 +193,30 @@ static void find_address_in_section(bfd *abfd, asection *section, void *data)
bfd_vma pc, vma;
bfd_size_type size;
struct a2l_data *a2l = data;
+ flagword flags;
if (a2l->found)
return;
- if ((bfd_get_section_flags(abfd, section) & SEC_ALLOC) == 0)
+#ifdef bfd_get_section_flags
+ flags = bfd_get_section_flags(abfd, section);
+#else
+ flags = bfd_section_flags(section);
+#endif
+ if ((flags & SEC_ALLOC) == 0)
return;
pc = a2l->addr;
+#ifdef bfd_get_section_vma
vma = bfd_get_section_vma(abfd, section);
+#else
+ vma = bfd_section_vma(section);
+#endif
+#ifdef bfd_get_section_size
size = bfd_get_section_size(section);
+#else
+ size = bfd_section_size(section);
+#endif
if (pc < vma || pc >= vma + size)
return;
diff --git a/tools/perf/util/stat-display.c b/tools/perf/util/stat-display.c
index bc31fccc0057..9e757d18d713 100644
--- a/tools/perf/util/stat-display.c
+++ b/tools/perf/util/stat-display.c
@@ -16,6 +16,7 @@
#include <linux/ctype.h>
#include "cgroup.h"
#include <api/fs/fs.h>
+#include "util.h"
#define CNTR_NOT_SUPPORTED "<not supported>"
#define CNTR_NOT_COUNTED "<not counted>"
@@ -110,15 +111,15 @@ static void aggr_printout(struct perf_stat_config *config,
config->csv_sep);
break;
case AGGR_NONE:
- if (evsel->percore) {
+ if (evsel->percore && !config->percore_show_thread) {
fprintf(config->output, "S%d-D%d-C%*d%s",
cpu_map__id_to_socket(id),
cpu_map__id_to_die(id),
- config->csv_output ? 0 : -5,
+ config->csv_output ? 0 : -3,
cpu_map__id_to_cpu(id), config->csv_sep);
} else {
- fprintf(config->output, "CPU%*d%s ",
- config->csv_output ? 0 : -5,
+ fprintf(config->output, "CPU%*d%s",
+ config->csv_output ? 0 : -7,
evsel__cpus(evsel)->map[id],
config->csv_sep);
}
@@ -628,7 +629,7 @@ static void aggr_cb(struct perf_stat_config *config,
static void print_counter_aggrdata(struct perf_stat_config *config,
struct evsel *counter, int s,
char *prefix, bool metric_only,
- bool *first)
+ bool *first, int cpu)
{
struct aggr_data ad;
FILE *output = config->output;
@@ -654,7 +655,7 @@ static void print_counter_aggrdata(struct perf_stat_config *config,
fprintf(output, "%s", prefix);
uval = val * counter->scale;
- printout(config, id, nr, counter, uval, prefix,
+ printout(config, cpu != -1 ? cpu : id, nr, counter, uval, prefix,
run, ena, 1.0, &rt_stat);
if (!metric_only)
fputc('\n', output);
@@ -687,7 +688,7 @@ static void print_aggr(struct perf_stat_config *config,
evlist__for_each_entry(evlist, counter) {
print_counter_aggrdata(config, counter, s,
prefix, metric_only,
- &first);
+ &first, -1);
}
if (metric_only)
fputc('\n', output);
@@ -1097,7 +1098,6 @@ static void print_footer(struct perf_stat_config *config)
{
double avg = avg_stats(config->walltime_nsecs_stats) / NSEC_PER_SEC;
FILE *output = config->output;
- int n;
if (!config->null_run)
fprintf(output, "\n");
@@ -1131,9 +1131,7 @@ static void print_footer(struct perf_stat_config *config)
}
fprintf(output, "\n\n");
- if (config->print_free_counters_hint &&
- sysctl__read_int("kernel/nmi_watchdog", &n) >= 0 &&
- n > 0)
+ if (config->print_free_counters_hint && sysctl__nmi_watchdog_enabled())
fprintf(output,
"Some events weren't counted. Try disabling the NMI watchdog:\n"
" echo 0 > /proc/sys/kernel/nmi_watchdog\n"
@@ -1146,6 +1144,26 @@ static void print_footer(struct perf_stat_config *config)
"the same PMU. Try reorganizing the group.\n");
}
+static void print_percore_thread(struct perf_stat_config *config,
+ struct evsel *counter, char *prefix)
+{
+ int s, s2, id;
+ bool first = true;
+
+ for (int i = 0; i < perf_evsel__nr_cpus(counter); i++) {
+ s2 = config->aggr_get_id(config, evsel__cpus(counter), i);
+ for (s = 0; s < config->aggr_map->nr; s++) {
+ id = config->aggr_map->map[s];
+ if (s2 == id)
+ break;
+ }
+
+ print_counter_aggrdata(config, counter, s,
+ prefix, false,
+ &first, i);
+ }
+}
+
static void print_percore(struct perf_stat_config *config,
struct evsel *counter, char *prefix)
{
@@ -1157,13 +1175,16 @@ static void print_percore(struct perf_stat_config *config,
if (!(config->aggr_map || config->aggr_get_id))
return;
+ if (config->percore_show_thread)
+ return print_percore_thread(config, counter, prefix);
+
for (s = 0; s < config->aggr_map->nr; s++) {
if (prefix && metric_only)
fprintf(output, "%s", prefix);
print_counter_aggrdata(config, counter, s,
prefix, metric_only,
- &first);
+ &first, -1);
}
if (metric_only)
diff --git a/tools/perf/util/stat-shadow.c b/tools/perf/util/stat-shadow.c
index 2c41d47f6f83..03ecb8cd0eec 100644
--- a/tools/perf/util/stat-shadow.c
+++ b/tools/perf/util/stat-shadow.c
@@ -18,7 +18,6 @@
* AGGR_NONE: Use matching CPU
* AGGR_THREAD: Not supported?
*/
-static bool have_frontend_stalled;
struct runtime_stat rt_stat;
struct stats walltime_nsecs_stats;
@@ -144,7 +143,6 @@ void runtime_stat__exit(struct runtime_stat *st)
void perf_stat__init_shadow_stats(void)
{
- have_frontend_stalled = pmu_have_event("cpu", "stalled-cycles-frontend");
runtime_stat__init(&rt_stat);
}
@@ -779,9 +777,7 @@ static void generic_metric(struct perf_stat_config *config,
}
if (!metric_events[i]) {
- const char *p = metric_expr;
-
- if (expr__parse(&ratio, &pctx, &p) == 0) {
+ if (expr__parse(&ratio, &pctx, metric_expr) == 0) {
char *unit;
char metric_bf[64];
@@ -807,8 +803,11 @@ static void generic_metric(struct perf_stat_config *config,
out->force_header ?
(metric_name ? metric_name : name) : "", 0);
}
- } else
- print_metric(config, ctxp, NULL, NULL, "", 0);
+ } else {
+ print_metric(config, ctxp, NULL, NULL,
+ out->force_header ?
+ (metric_name ? metric_name : name) : "", 0);
+ }
for (i = 1; i < pctx.num_ids; i++)
zfree(&pctx.ids[i].name);
@@ -853,10 +852,6 @@ void perf_stat__print_shadow_stats(struct perf_stat_config *config,
print_metric(config, ctxp, NULL, "%7.2f ",
"stalled cycles per insn",
ratio);
- } else if (have_frontend_stalled) {
- out->new_line(config, ctxp);
- print_metric(config, ctxp, NULL, "%7.2f ",
- "stalled cycles per insn", 0);
}
} else if (perf_evsel__match(evsel, HARDWARE, HW_BRANCH_MISSES)) {
if (runtime_stat_n(st, STAT_BRANCHES, ctx, cpu) != 0)
diff --git a/tools/perf/util/stat.h b/tools/perf/util/stat.h
index fb990efa54a8..b4fdfaa7f2c0 100644
--- a/tools/perf/util/stat.h
+++ b/tools/perf/util/stat.h
@@ -109,6 +109,7 @@ struct perf_stat_config {
bool walltime_run_table;
bool all_kernel;
bool all_user;
+ bool percore_show_thread;
FILE *output;
unsigned int interval;
unsigned int timeout;
diff --git a/tools/perf/util/symbol-elf.c b/tools/perf/util/symbol-elf.c
index 6658fbf196e6..be5b493f8284 100644
--- a/tools/perf/util/symbol-elf.c
+++ b/tools/perf/util/symbol-elf.c
@@ -704,9 +704,15 @@ void symsrc__destroy(struct symsrc *ss)
close(ss->fd);
}
-bool __weak elf__needs_adjust_symbols(GElf_Ehdr ehdr)
+bool elf__needs_adjust_symbols(GElf_Ehdr ehdr)
{
- return ehdr.e_type == ET_EXEC || ehdr.e_type == ET_REL;
+ /*
+ * Usually vmlinux is an ELF file with type ET_EXEC for most
+ * architectures; except Arm64 kernel is linked with option
+ * '-share', so need to check type ET_DYN.
+ */
+ return ehdr.e_type == ET_EXEC || ehdr.e_type == ET_REL ||
+ ehdr.e_type == ET_DYN;
}
int symsrc__init(struct symsrc *ss, struct dso *dso, const char *name,
@@ -920,6 +926,9 @@ static int dso__process_kernel_symbol(struct dso *dso, struct map *map,
if (curr_map == NULL)
return -1;
+ if (curr_dso->kernel)
+ map__kmap(curr_map)->kmaps = kmaps;
+
if (adjust_kernel_syms) {
curr_map->start = shdr->sh_addr + ref_reloc(kmap);
curr_map->end = curr_map->start + shdr->sh_size;
diff --git a/tools/perf/util/symbol.c b/tools/perf/util/symbol.c
index 3b379b1296f1..26bc6a0096ce 100644
--- a/tools/perf/util/symbol.c
+++ b/tools/perf/util/symbol.c
@@ -635,9 +635,12 @@ out:
static bool symbol__is_idle(const char *name)
{
const char * const idle_symbols[] = {
+ "acpi_idle_do_entry",
+ "acpi_processor_ffh_cstate_enter",
"arch_cpu_idle",
"cpu_idle",
"cpu_startup_entry",
+ "idle_cpu",
"intel_idle",
"default_idle",
"native_safe_halt",
@@ -651,13 +654,17 @@ static bool symbol__is_idle(const char *name)
NULL
};
int i;
+ static struct strlist *idle_symbols_list;
- for (i = 0; idle_symbols[i]; i++) {
- if (!strcmp(idle_symbols[i], name))
- return true;
- }
+ if (idle_symbols_list)
+ return strlist__has_entry(idle_symbols_list, name);
- return false;
+ idle_symbols_list = strlist__new(NULL, NULL);
+
+ for (i = 0; idle_symbols[i]; i++)
+ strlist__add(idle_symbols_list, idle_symbols[i]);
+
+ return strlist__has_entry(idle_symbols_list, name);
}
static int map__process_kallsym_symbol(void *arg, const char *name,
@@ -1615,7 +1622,12 @@ int dso__load(struct dso *dso, struct map *map)
goto out;
}
- if (dso->kernel) {
+ kmod = dso->symtab_type == DSO_BINARY_TYPE__SYSTEM_PATH_KMODULE ||
+ dso->symtab_type == DSO_BINARY_TYPE__SYSTEM_PATH_KMODULE_COMP ||
+ dso->symtab_type == DSO_BINARY_TYPE__GUEST_KMODULE ||
+ dso->symtab_type == DSO_BINARY_TYPE__GUEST_KMODULE_COMP;
+
+ if (dso->kernel && !kmod) {
if (dso->kernel == DSO_TYPE_KERNEL)
ret = dso__load_kernel_sym(dso, map);
else if (dso->kernel == DSO_TYPE_GUEST_KERNEL)
@@ -1643,12 +1655,6 @@ int dso__load(struct dso *dso, struct map *map)
if (!name)
goto out;
- kmod = dso->symtab_type == DSO_BINARY_TYPE__SYSTEM_PATH_KMODULE ||
- dso->symtab_type == DSO_BINARY_TYPE__SYSTEM_PATH_KMODULE_COMP ||
- dso->symtab_type == DSO_BINARY_TYPE__GUEST_KMODULE ||
- dso->symtab_type == DSO_BINARY_TYPE__GUEST_KMODULE_COMP;
-
-
/*
* Read the build id if possible. This is required for
* DSO_BINARY_TYPE__BUILDID_DEBUGINFO to work
diff --git a/tools/perf/util/symbol_conf.h b/tools/perf/util/symbol_conf.h
index 10f1ec3e0349..b916afb95ec5 100644
--- a/tools/perf/util/symbol_conf.h
+++ b/tools/perf/util/symbol_conf.h
@@ -73,6 +73,7 @@ struct symbol_conf {
const char *symfs;
int res_sample;
int pad_output_len_dso;
+ int group_sort_idx;
};
extern struct symbol_conf symbol_conf;
diff --git a/tools/perf/util/synthetic-events.c b/tools/perf/util/synthetic-events.c
index c423298fe62d..a661b122d9d8 100644
--- a/tools/perf/util/synthetic-events.c
+++ b/tools/perf/util/synthetic-events.c
@@ -16,6 +16,7 @@
#include "util/synthetic-events.h"
#include "util/target.h"
#include "util/time-utils.h"
+#include "util/cgroup.h"
#include <linux/bitops.h>
#include <linux/kernel.h>
#include <linux/string.h>
@@ -345,6 +346,7 @@ int perf_event__synthesize_mmap_events(struct perf_tool *tool,
continue;
event->mmap2.ino = (u64)ino;
+ event->mmap2.ino_generation = 0;
/*
* Just like the kernel, see __perf_event_mmap in kernel/perf_event.c
@@ -413,6 +415,127 @@ out:
return rc;
}
+#ifdef HAVE_FILE_HANDLE
+static int perf_event__synthesize_cgroup(struct perf_tool *tool,
+ union perf_event *event,
+ char *path, size_t mount_len,
+ perf_event__handler_t process,
+ struct machine *machine)
+{
+ size_t event_size = sizeof(event->cgroup) - sizeof(event->cgroup.path);
+ size_t path_len = strlen(path) - mount_len + 1;
+ struct {
+ struct file_handle fh;
+ uint64_t cgroup_id;
+ } handle;
+ int mount_id;
+
+ while (path_len % sizeof(u64))
+ path[mount_len + path_len++] = '\0';
+
+ memset(&event->cgroup, 0, event_size);
+
+ event->cgroup.header.type = PERF_RECORD_CGROUP;
+ event->cgroup.header.size = event_size + path_len + machine->id_hdr_size;
+
+ handle.fh.handle_bytes = sizeof(handle.cgroup_id);
+ if (name_to_handle_at(AT_FDCWD, path, &handle.fh, &mount_id, 0) < 0) {
+ pr_debug("stat failed: %s\n", path);
+ return -1;
+ }
+
+ event->cgroup.id = handle.cgroup_id;
+ strncpy(event->cgroup.path, path + mount_len, path_len);
+ memset(event->cgroup.path + path_len, 0, machine->id_hdr_size);
+
+ if (perf_tool__process_synth_event(tool, event, machine, process) < 0) {
+ pr_debug("process synth event failed\n");
+ return -1;
+ }
+
+ return 0;
+}
+
+static int perf_event__walk_cgroup_tree(struct perf_tool *tool,
+ union perf_event *event,
+ char *path, size_t mount_len,
+ perf_event__handler_t process,
+ struct machine *machine)
+{
+ size_t pos = strlen(path);
+ DIR *d;
+ struct dirent *dent;
+ int ret = 0;
+
+ if (perf_event__synthesize_cgroup(tool, event, path, mount_len,
+ process, machine) < 0)
+ return -1;
+
+ d = opendir(path);
+ if (d == NULL) {
+ pr_debug("failed to open directory: %s\n", path);
+ return -1;
+ }
+
+ while ((dent = readdir(d)) != NULL) {
+ if (dent->d_type != DT_DIR)
+ continue;
+ if (!strcmp(dent->d_name, ".") ||
+ !strcmp(dent->d_name, ".."))
+ continue;
+
+ /* any sane path should be less than PATH_MAX */
+ if (strlen(path) + strlen(dent->d_name) + 1 >= PATH_MAX)
+ continue;
+
+ if (path[pos - 1] != '/')
+ strcat(path, "/");
+ strcat(path, dent->d_name);
+
+ ret = perf_event__walk_cgroup_tree(tool, event, path,
+ mount_len, process, machine);
+ if (ret < 0)
+ break;
+
+ path[pos] = '\0';
+ }
+
+ closedir(d);
+ return ret;
+}
+
+int perf_event__synthesize_cgroups(struct perf_tool *tool,
+ perf_event__handler_t process,
+ struct machine *machine)
+{
+ union perf_event event;
+ char cgrp_root[PATH_MAX];
+ size_t mount_len; /* length of mount point in the path */
+
+ if (cgroupfs_find_mountpoint(cgrp_root, PATH_MAX, "perf_event") < 0) {
+ pr_debug("cannot find cgroup mount point\n");
+ return -1;
+ }
+
+ mount_len = strlen(cgrp_root);
+ /* make sure the path starts with a slash (after mount point) */
+ strcat(cgrp_root, "/");
+
+ if (perf_event__walk_cgroup_tree(tool, &event, cgrp_root, mount_len,
+ process, machine) < 0)
+ return -1;
+
+ return 0;
+}
+#else
+int perf_event__synthesize_cgroups(struct perf_tool *tool __maybe_unused,
+ perf_event__handler_t process __maybe_unused,
+ struct machine *machine __maybe_unused)
+{
+ return -1;
+}
+#endif
+
int perf_event__synthesize_modules(struct perf_tool *tool, perf_event__handler_t process,
struct machine *machine)
{
@@ -1183,7 +1306,8 @@ size_t perf_event__sample_event_size(const struct perf_sample *sample, u64 type,
if (type & PERF_SAMPLE_BRANCH_STACK) {
sz = sample->branch_stack->nr * sizeof(struct branch_entry);
- sz += sizeof(u64);
+ /* nr, hw_idx */
+ sz += 2 * sizeof(u64);
result += sz;
}
@@ -1228,6 +1352,9 @@ size_t perf_event__sample_event_size(const struct perf_sample *sample, u64 type,
if (type & PERF_SAMPLE_PHYS_ADDR)
result += sizeof(u64);
+ if (type & PERF_SAMPLE_CGROUP)
+ result += sizeof(u64);
+
if (type & PERF_SAMPLE_AUX) {
result += sizeof(u64);
result += sample->aux_sample.size;
@@ -1344,7 +1471,8 @@ int perf_event__synthesize_sample(union perf_event *event, u64 type, u64 read_fo
if (type & PERF_SAMPLE_BRANCH_STACK) {
sz = sample->branch_stack->nr * sizeof(struct branch_entry);
- sz += sizeof(u64);
+ /* nr, hw_idx */
+ sz += 2 * sizeof(u64);
memcpy(array, sample->branch_stack, sz);
array = (void *)array + sz;
}
@@ -1401,6 +1529,11 @@ int perf_event__synthesize_sample(union perf_event *event, u64 type, u64 read_fo
array++;
}
+ if (type & PERF_SAMPLE_CGROUP) {
+ *array = sample->cgroup;
+ array++;
+ }
+
if (type & PERF_SAMPLE_AUX) {
sz = sample->aux_sample.size;
*array++ = sz;
diff --git a/tools/perf/util/synthetic-events.h b/tools/perf/util/synthetic-events.h
index baead0cdc381..e7a3e9589738 100644
--- a/tools/perf/util/synthetic-events.h
+++ b/tools/perf/util/synthetic-events.h
@@ -45,6 +45,7 @@ int perf_event__synthesize_kernel_mmap(struct perf_tool *tool, perf_event__handl
int perf_event__synthesize_mmap_events(struct perf_tool *tool, union perf_event *event, pid_t pid, pid_t tgid, perf_event__handler_t process, struct machine *machine, bool mmap_data);
int perf_event__synthesize_modules(struct perf_tool *tool, perf_event__handler_t process, struct machine *machine);
int perf_event__synthesize_namespaces(struct perf_tool *tool, union perf_event *event, pid_t pid, pid_t tgid, perf_event__handler_t process, struct machine *machine);
+int perf_event__synthesize_cgroups(struct perf_tool *tool, perf_event__handler_t process, struct machine *machine);
int perf_event__synthesize_sample(union perf_event *event, u64 type, u64 read_format, const struct perf_sample *sample);
int perf_event__synthesize_stat_config(struct perf_tool *tool, struct perf_stat_config *config, perf_event__handler_t process, struct machine *machine);
int perf_event__synthesize_stat_events(struct perf_stat_config *config, struct perf_tool *tool, struct evlist *evlist, perf_event__handler_t process, bool attrs);
diff --git a/tools/perf/util/tool.h b/tools/perf/util/tool.h
index 2abbf668b8de..3fb67bd31e4a 100644
--- a/tools/perf/util/tool.h
+++ b/tools/perf/util/tool.h
@@ -46,6 +46,7 @@ struct perf_tool {
mmap2,
comm,
namespaces,
+ cgroup,
fork,
exit,
lost,
@@ -78,6 +79,7 @@ struct perf_tool {
bool ordered_events;
bool ordering_requires_timestamps;
bool namespace_events;
+ bool cgroup_events;
bool no_warn;
enum show_feature_header show_feat_hdr;
};
diff --git a/tools/perf/util/util.c b/tools/perf/util/util.c
index 969ae560dad9..d707c9624dd9 100644
--- a/tools/perf/util/util.c
+++ b/tools/perf/util/util.c
@@ -55,6 +55,24 @@ int sysctl__max_stack(void)
return sysctl_perf_event_max_stack;
}
+bool sysctl__nmi_watchdog_enabled(void)
+{
+ static bool cached;
+ static bool nmi_watchdog;
+ int value;
+
+ if (cached)
+ return nmi_watchdog;
+
+ if (sysctl__read_int("kernel/nmi_watchdog", &value) < 0)
+ return false;
+
+ nmi_watchdog = (value > 0) ? true : false;
+ cached = true;
+
+ return nmi_watchdog;
+}
+
bool test_attr__enabled;
bool perf_host = true;
diff --git a/tools/perf/util/util.h b/tools/perf/util/util.h
index 9969b8b46f7c..f486fdd3a538 100644
--- a/tools/perf/util/util.h
+++ b/tools/perf/util/util.h
@@ -29,6 +29,8 @@ size_t hex_width(u64 v);
int sysctl__max_stack(void);
+bool sysctl__nmi_watchdog_enabled(void);
+
int fetch_kernel_version(unsigned int *puint,
char *str, size_t str_sz);
#define KVER_VERSION(x) (((x) >> 16) & 0xff)