aboutsummaryrefslogtreecommitdiffstats
path: root/tools
diff options
context:
space:
mode:
Diffstat (limited to 'tools')
-rw-r--r--tools/build/Makefile.feature1
-rw-r--r--tools/build/feature/Makefile4
-rw-r--r--tools/build/feature/test-all.c5
-rw-r--r--tools/build/feature/test-pthread-barrier.c12
-rw-r--r--tools/perf/Documentation/perf-buildid-cache.txt3
-rw-r--r--tools/perf/Documentation/perf-evlist.txt4
-rw-r--r--tools/perf/Documentation/perf-inject.txt4
-rw-r--r--tools/perf/Documentation/perf-lock.txt4
-rw-r--r--tools/perf/Documentation/perf-sched.txt4
-rw-r--r--tools/perf/Documentation/perf-script.txt10
-rw-r--r--tools/perf/Documentation/perf-timechart.txt4
-rw-r--r--tools/perf/Documentation/perf-top.txt6
-rw-r--r--tools/perf/Documentation/perf-trace.txt16
-rw-r--r--tools/perf/Documentation/perf.data-file-format.txt23
-rw-r--r--tools/perf/Documentation/tips.txt2
-rw-r--r--tools/perf/Makefile.config14
-rw-r--r--tools/perf/arch/arm64/util/Build1
-rw-r--r--tools/perf/arch/arm64/util/header.c65
-rw-r--r--tools/perf/arch/powerpc/util/header.c2
-rw-r--r--tools/perf/arch/s390/annotate/instructions.c3
-rw-r--r--tools/perf/arch/x86/tests/perf-time-to-tsc.c2
-rw-r--r--tools/perf/arch/x86/util/header.c2
-rw-r--r--tools/perf/bench/futex-hash.c19
-rw-r--r--tools/perf/bench/futex-lock-pi.c23
-rw-r--r--tools/perf/bench/futex-requeue.c22
-rw-r--r--tools/perf/bench/futex-wake-parallel.c46
-rw-r--r--tools/perf/bench/futex-wake.c18
-rw-r--r--tools/perf/builtin-buildid-cache.c4
-rw-r--r--tools/perf/builtin-c2c.c8
-rw-r--r--tools/perf/builtin-kvm.c10
-rw-r--r--tools/perf/builtin-record.c40
-rw-r--r--tools/perf/builtin-report.c14
-rw-r--r--tools/perf/builtin-script.c102
-rw-r--r--tools/perf/builtin-stat.c62
-rw-r--r--tools/perf/builtin-top.c10
-rw-r--r--tools/perf/builtin-trace.c2
-rwxr-xr-xtools/perf/check-headers.sh1
-rw-r--r--tools/perf/pmu-events/arch/arm64/cavium/thunderx2-imp-def.json62
-rw-r--r--tools/perf/pmu-events/arch/arm64/mapfile.csv15
-rw-r--r--tools/perf/pmu-events/arch/powerpc/mapfile.csv12
-rw-r--r--tools/perf/pmu-events/arch/powerpc/power9/cache.json5
-rw-r--r--tools/perf/pmu-events/arch/powerpc/power9/frontend.json7
-rw-r--r--tools/perf/pmu-events/arch/powerpc/power9/marked.json27
-rw-r--r--tools/perf/pmu-events/arch/powerpc/power9/other.json276
-rw-r--r--tools/perf/pmu-events/arch/powerpc/power9/pipeline.json14
-rw-r--r--tools/perf/pmu-events/arch/powerpc/power9/pmc.json2
-rw-r--r--tools/perf/pmu-events/arch/powerpc/power9/translation.json5
-rw-r--r--tools/perf/pmu-events/arch/x86/mapfile.csv5
-rw-r--r--tools/perf/pmu-events/jevents.c39
-rw-r--r--tools/perf/tests/attr.c6
-rw-r--r--tools/perf/tests/backward-ring-buffer.c6
-rw-r--r--tools/perf/tests/bp_signal.c2
-rw-r--r--tools/perf/tests/bpf.c2
-rw-r--r--tools/perf/tests/code-reading.c2
-rw-r--r--tools/perf/tests/keep-tracking.c2
-rw-r--r--tools/perf/tests/mmap-basic.c2
-rw-r--r--tools/perf/tests/openat-syscall-tp-fields.c2
-rw-r--r--tools/perf/tests/perf-record.c2
-rw-r--r--tools/perf/tests/sw-clock.c2
-rw-r--r--tools/perf/tests/switch-tracking.c2
-rw-r--r--tools/perf/tests/task-exit.c2
-rw-r--r--tools/perf/ui/browsers/annotate.c401
-rw-r--r--tools/perf/ui/gtk/annotate.c25
-rw-r--r--tools/perf/util/annotate.c641
-rw-r--r--tools/perf/util/annotate.h76
-rw-r--r--tools/perf/util/evlist.c64
-rw-r--r--tools/perf/util/evlist.h12
-rw-r--r--tools/perf/util/evsel.c78
-rw-r--r--tools/perf/util/evsel.h9
-rw-r--r--tools/perf/util/header.c68
-rw-r--r--tools/perf/util/header.h8
-rw-r--r--tools/perf/util/intel-pt-decoder/Build24
-rw-r--r--tools/perf/util/machine.c2
-rw-r--r--tools/perf/util/metricgroup.c8
-rw-r--r--tools/perf/util/mmap.c73
-rw-r--r--tools/perf/util/mmap.h4
-rw-r--r--tools/perf/util/ordered-events.c3
-rw-r--r--tools/perf/util/ordered-events.h2
-rw-r--r--tools/perf/util/pmu.c87
-rw-r--r--tools/perf/util/pmu.h2
-rw-r--r--tools/perf/util/python.c2
-rw-r--r--tools/perf/util/rblist.c19
-rw-r--r--tools/perf/util/rblist.h1
-rw-r--r--tools/perf/util/session.c48
-rw-r--r--tools/perf/util/session.h2
-rw-r--r--tools/perf/util/stat-shadow.c12
-rw-r--r--tools/perf/util/thread_map.c22
-rw-r--r--tools/perf/util/thread_map.h1
88 files changed, 1618 insertions, 1149 deletions
diff --git a/tools/build/Makefile.feature b/tools/build/Makefile.feature
index c71a05b9c984..e52fcefee379 100644
--- a/tools/build/Makefile.feature
+++ b/tools/build/Makefile.feature
@@ -56,6 +56,7 @@ FEATURE_TESTS_BASIC := \
libunwind-arm \
libunwind-aarch64 \
pthread-attr-setaffinity-np \
+ pthread-barrier \
stackprotector-all \
timerfd \
libdw-dwarf-unwind \
diff --git a/tools/build/feature/Makefile b/tools/build/feature/Makefile
index 96982640fbf8..cff38f342283 100644
--- a/tools/build/feature/Makefile
+++ b/tools/build/feature/Makefile
@@ -37,6 +37,7 @@ FILES= \
test-libunwind-debug-frame-arm.bin \
test-libunwind-debug-frame-aarch64.bin \
test-pthread-attr-setaffinity-np.bin \
+ test-pthread-barrier.bin \
test-stackprotector-all.bin \
test-timerfd.bin \
test-libdw-dwarf-unwind.bin \
@@ -79,6 +80,9 @@ $(OUTPUT)test-hello.bin:
$(OUTPUT)test-pthread-attr-setaffinity-np.bin:
$(BUILD) -D_GNU_SOURCE -lpthread
+$(OUTPUT)test-pthread-barrier.bin:
+ $(BUILD) -lpthread
+
$(OUTPUT)test-stackprotector-all.bin:
$(BUILD) -fstack-protector-all
diff --git a/tools/build/feature/test-all.c b/tools/build/feature/test-all.c
index 4112702e4aed..6fdf83263ab7 100644
--- a/tools/build/feature/test-all.c
+++ b/tools/build/feature/test-all.c
@@ -118,6 +118,10 @@
# include "test-pthread-attr-setaffinity-np.c"
#undef main
+#define main main_test_pthread_barrier
+# include "test-pthread-barrier.c"
+#undef main
+
#define main main_test_sched_getcpu
# include "test-sched_getcpu.c"
#undef main
@@ -187,6 +191,7 @@ int main(int argc, char *argv[])
main_test_sync_compare_and_swap(argc, argv);
main_test_zlib();
main_test_pthread_attr_setaffinity_np();
+ main_test_pthread_barrier();
main_test_lzma();
main_test_get_cpuid();
main_test_bpf();
diff --git a/tools/build/feature/test-pthread-barrier.c b/tools/build/feature/test-pthread-barrier.c
new file mode 100644
index 000000000000..0558d9334d97
--- /dev/null
+++ b/tools/build/feature/test-pthread-barrier.c
@@ -0,0 +1,12 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <stdint.h>
+#include <pthread.h>
+
+int main(void)
+{
+ pthread_barrier_t barrier;
+
+ pthread_barrier_init(&barrier, NULL, 1);
+ pthread_barrier_wait(&barrier);
+ return pthread_barrier_destroy(&barrier);
+}
diff --git a/tools/perf/Documentation/perf-buildid-cache.txt b/tools/perf/Documentation/perf-buildid-cache.txt
index 84681007f80f..73c2650bd0db 100644
--- a/tools/perf/Documentation/perf-buildid-cache.txt
+++ b/tools/perf/Documentation/perf-buildid-cache.txt
@@ -24,6 +24,9 @@ OPTIONS
-a::
--add=::
Add specified file to the cache.
+-f::
+--force::
+ Don't complain, do it.
-k::
--kcore::
Add specified kcore file to the cache. For the current host that is
diff --git a/tools/perf/Documentation/perf-evlist.txt b/tools/perf/Documentation/perf-evlist.txt
index 6f7200fb85cf..c0a66400a960 100644
--- a/tools/perf/Documentation/perf-evlist.txt
+++ b/tools/perf/Documentation/perf-evlist.txt
@@ -20,6 +20,10 @@ OPTIONS
--input=::
Input file name. (default: perf.data unless stdin is a fifo)
+-f::
+--force::
+ Don't complain, do it.
+
-F::
--freq=::
Show just the sample frequency used for each event.
diff --git a/tools/perf/Documentation/perf-inject.txt b/tools/perf/Documentation/perf-inject.txt
index 87b2588d1cbd..a64d6588470e 100644
--- a/tools/perf/Documentation/perf-inject.txt
+++ b/tools/perf/Documentation/perf-inject.txt
@@ -60,6 +60,10 @@ include::itrace.txt[]
found in the jitdumps files captured in the input perf.data file. Use this option
if you are monitoring environment using JIT runtimes, such as Java, DART or V8.
+-f::
+--force::
+ Don't complain, do it.
+
SEE ALSO
--------
linkperf:perf-record[1], linkperf:perf-report[1], linkperf:perf-archive[1]
diff --git a/tools/perf/Documentation/perf-lock.txt b/tools/perf/Documentation/perf-lock.txt
index ab25be28c9dc..74d774592196 100644
--- a/tools/perf/Documentation/perf-lock.txt
+++ b/tools/perf/Documentation/perf-lock.txt
@@ -42,6 +42,10 @@ COMMON OPTIONS
--dump-raw-trace::
Dump raw trace in ASCII.
+-f::
+--force::
+ Don't complan, do it.
+
REPORT OPTIONS
--------------
diff --git a/tools/perf/Documentation/perf-sched.txt b/tools/perf/Documentation/perf-sched.txt
index 55b67338548e..c7e50f263887 100644
--- a/tools/perf/Documentation/perf-sched.txt
+++ b/tools/perf/Documentation/perf-sched.txt
@@ -74,6 +74,10 @@ OPTIONS
--dump-raw-trace=::
Display verbose dump of the sched data.
+-f::
+--force::
+ Don't complain, do it.
+
OPTIONS for 'perf sched map'
----------------------------
diff --git a/tools/perf/Documentation/perf-script.txt b/tools/perf/Documentation/perf-script.txt
index 2811fcf684cb..974ceb12c7f3 100644
--- a/tools/perf/Documentation/perf-script.txt
+++ b/tools/perf/Documentation/perf-script.txt
@@ -117,7 +117,7 @@ OPTIONS
Comma separated list of fields to print. Options are:
comm, tid, pid, time, cpu, event, trace, ip, sym, dso, addr, symoff,
srcline, period, iregs, uregs, brstack, brstacksym, flags, bpf-output, brstackinsn,
- brstackoff, callindent, insn, insnlen, synth, phys_addr.
+ brstackoff, callindent, insn, insnlen, synth, phys_addr, metric.
Field list can be prepended with the type, trace, sw or hw,
to indicate to which event type the field list applies.
e.g., -F sw:comm,tid,time,ip,sym and -F trace:time,cpu,trace
@@ -217,6 +217,14 @@ OPTIONS
The brstackoff field will print an offset into a specific dso/binary.
+ With the metric option perf script can compute metrics for
+ sampling periods, similar to perf stat. This requires
+ specifying a group with multiple metrics with the :S option
+ for perf record. perf will sample on the first event, and
+ compute metrics for all the events in the group. Please note
+ that the metric computed is averaged over the whole sampling
+ period, not just for the sample point.
+
-k::
--vmlinux=<file>::
vmlinux pathname
diff --git a/tools/perf/Documentation/perf-timechart.txt b/tools/perf/Documentation/perf-timechart.txt
index df98d1c82688..ef0c7565bd5c 100644
--- a/tools/perf/Documentation/perf-timechart.txt
+++ b/tools/perf/Documentation/perf-timechart.txt
@@ -50,7 +50,9 @@ TIMECHART OPTIONS
-p::
--process::
Select the processes to display, by name or PID
-
+-f::
+--force::
+ Don't complain, do it.
--symfs=<directory>::
Look for files with symbols relative to this directory.
-n::
diff --git a/tools/perf/Documentation/perf-top.txt b/tools/perf/Documentation/perf-top.txt
index 4353262bc462..8a32cc77bead 100644
--- a/tools/perf/Documentation/perf-top.txt
+++ b/tools/perf/Documentation/perf-top.txt
@@ -268,6 +268,12 @@ INTERACTIVE PROMPTING KEYS
[S]::
Stop annotation, return to full profile display.
+[K]::
+ Hide kernel symbols.
+
+[U]::
+ Hide user symbols.
+
[z]::
Toggle event count zeroing across display updates.
diff --git a/tools/perf/Documentation/perf-trace.txt b/tools/perf/Documentation/perf-trace.txt
index d53bea6bd571..6909cf1e0eea 100644
--- a/tools/perf/Documentation/perf-trace.txt
+++ b/tools/perf/Documentation/perf-trace.txt
@@ -86,18 +86,18 @@ comma-separated list with no space: 0,1. Ranges of CPUs are specified with -: 0-
In per-thread mode with inheritance mode on (default), Events are captured only when
the thread executes on the designated CPUs. Default is to monitor all CPUs.
---duration:
+--duration::
Show only events that had a duration greater than N.M ms.
---sched:
+--sched::
Accrue thread runtime and provide a summary at the end of the session.
--i
---input
+-i::
+--input::
Process events from a given perf data file.
--T
---time
+-T::
+--time::
Print full timestamp rather time relative to first sample.
--comm::
@@ -117,6 +117,10 @@ the thread executes on the designated CPUs. Default is to monitor all CPUs.
Show tool stats such as number of times fd->pathname was discovered thru
hooking the open syscall return + vfs_getname or via reading /proc/pid/fd, etc.
+-f::
+--force::
+ Don't complain, do it.
+
-F=[all|min|maj]::
--pf=[all|min|maj]::
Trace pagefaults. Optionally, you can specify whether you want minor,
diff --git a/tools/perf/Documentation/perf.data-file-format.txt b/tools/perf/Documentation/perf.data-file-format.txt
index e90c59c6d815..15e8b48077ba 100644
--- a/tools/perf/Documentation/perf.data-file-format.txt
+++ b/tools/perf/Documentation/perf.data-file-format.txt
@@ -238,6 +238,29 @@ struct auxtrace_index {
struct auxtrace_index_entry entries[PERF_AUXTRACE_INDEX_ENTRY_COUNT];
};
+ HEADER_STAT = 19,
+
+This is merely a flag signifying that the data section contains data
+recorded from perf stat record.
+
+ HEADER_CACHE = 20,
+
+Description of the cache hierarchy. Based on the Linux sysfs format
+in /sys/devices/system/cpu/cpu*/cache/
+
+ u32 version Currently always 1
+ u32 number_of_cache_levels
+
+struct {
+ u32 level;
+ u32 line_size;
+ u32 sets;
+ u32 ways;
+ struct perf_header_string type;
+ struct perf_header_string size;
+ struct perf_header_string map;
+}[number_of_cache_levels];
+
other bits are reserved and should ignored for now
HEADER_FEAT_BITS = 256,
diff --git a/tools/perf/Documentation/tips.txt b/tools/perf/Documentation/tips.txt
index db0ca3063eae..849599f39c5e 100644
--- a/tools/perf/Documentation/tips.txt
+++ b/tools/perf/Documentation/tips.txt
@@ -32,3 +32,5 @@ Order by the overhead of source file name and line number: perf report -s srclin
System-wide collection from all CPUs: perf record -a
Show current config key-value pairs: perf config --list
Show user configuration overrides: perf config --user --list
+To add Node.js USDT(User-Level Statically Defined Tracing): perf buildid-cache --add `which node`
+To report cacheline events from previous recording: perf c2c report
diff --git a/tools/perf/Makefile.config b/tools/perf/Makefile.config
index ed65e82f034e..79b117a03fd7 100644
--- a/tools/perf/Makefile.config
+++ b/tools/perf/Makefile.config
@@ -41,6 +41,7 @@ ifeq ($(SRCARCH),x86)
LIBUNWIND_LIBS = -lunwind-x86 -llzma -lunwind
endif
NO_PERF_REGS := 0
+ CFLAGS += -fPIC
endif
ifeq ($(SRCARCH),arm)
@@ -188,9 +189,7 @@ ifdef PYTHON_CONFIG
PYTHON_EMBED_LDFLAGS := $(call strip-libs,$(PYTHON_EMBED_LDOPTS))
PYTHON_EMBED_LIBADD := $(call grep-libs,$(PYTHON_EMBED_LDOPTS)) -lutil
PYTHON_EMBED_CCOPTS := $(shell $(PYTHON_CONFIG_SQ) --cflags 2>/dev/null)
- ifeq ($(CC_NO_CLANG), 1)
- PYTHON_EMBED_CCOPTS := $(filter-out -specs=%,$(PYTHON_EMBED_CCOPTS))
- endif
+ PYTHON_EMBED_CCOPTS := $(filter-out -specs=%,$(PYTHON_EMBED_CCOPTS))
FLAGS_PYTHON_EMBED := $(PYTHON_EMBED_CCOPTS) $(PYTHON_EMBED_LDOPTS)
endif
@@ -267,6 +266,10 @@ ifeq ($(feature-pthread-attr-setaffinity-np), 1)
CFLAGS += -DHAVE_PTHREAD_ATTR_SETAFFINITY_NP
endif
+ifeq ($(feature-pthread-barrier), 1)
+ CFLAGS += -DHAVE_PTHREAD_BARRIER
+endif
+
ifndef NO_BIONIC
$(call feature_check,bionic)
ifeq ($(feature-bionic), 1)
@@ -576,14 +579,15 @@ ifndef NO_GTK2
endif
endif
-
ifdef NO_LIBPERL
CFLAGS += -DNO_LIBPERL
else
PERL_EMBED_LDOPTS = $(shell perl -MExtUtils::Embed -e ldopts 2>/dev/null)
PERL_EMBED_LDFLAGS = $(call strip-libs,$(PERL_EMBED_LDOPTS))
PERL_EMBED_LIBADD = $(call grep-libs,$(PERL_EMBED_LDOPTS))
- PERL_EMBED_CCOPTS = `perl -MExtUtils::Embed -e ccopts 2>/dev/null`
+ PERL_EMBED_CCOPTS = $(shell perl -MExtUtils::Embed -e ccopts 2>/dev/null)
+ PERL_EMBED_CCOPTS := $(filter-out -specs=%,$(PERL_EMBED_CCOPTS))
+ PERL_EMBED_LDOPTS := $(filter-out -specs=%,$(PERL_EMBED_LDOPTS))
FLAGS_PERL_EMBED=$(PERL_EMBED_CCOPTS) $(PERL_EMBED_LDOPTS)
ifneq ($(feature-libperl), 1)
diff --git a/tools/perf/arch/arm64/util/Build b/tools/perf/arch/arm64/util/Build
index cef6fb38d17e..b1ab72d2a42e 100644
--- a/tools/perf/arch/arm64/util/Build
+++ b/tools/perf/arch/arm64/util/Build
@@ -1,3 +1,4 @@
+libperf-y += header.o
libperf-$(CONFIG_DWARF) += dwarf-regs.o
libperf-$(CONFIG_LOCAL_LIBUNWIND) += unwind-libunwind.o
diff --git a/tools/perf/arch/arm64/util/header.c b/tools/perf/arch/arm64/util/header.c
new file mode 100644
index 000000000000..534cd2507d83
--- /dev/null
+++ b/tools/perf/arch/arm64/util/header.c
@@ -0,0 +1,65 @@
+#include <stdio.h>
+#include <stdlib.h>
+#include <api/fs/fs.h>
+#include "header.h"
+
+#define MIDR "/regs/identification/midr_el1"
+#define MIDR_SIZE 19
+#define MIDR_REVISION_MASK 0xf
+#define MIDR_VARIANT_SHIFT 20
+#define MIDR_VARIANT_MASK (0xf << MIDR_VARIANT_SHIFT)
+
+char *get_cpuid_str(struct perf_pmu *pmu)
+{
+ char *buf = NULL;
+ char path[PATH_MAX];
+ const char *sysfs = sysfs__mountpoint();
+ int cpu;
+ u64 midr = 0;
+ struct cpu_map *cpus;
+ FILE *file;
+
+ if (!sysfs || !pmu || !pmu->cpus)
+ return NULL;
+
+ buf = malloc(MIDR_SIZE);
+ if (!buf)
+ return NULL;
+
+ /* read midr from list of cpus mapped to this pmu */
+ cpus = cpu_map__get(pmu->cpus);
+ for (cpu = 0; cpu < cpus->nr; cpu++) {
+ scnprintf(path, PATH_MAX, "%s/devices/system/cpu/cpu%d"MIDR,
+ sysfs, cpus->map[cpu]);
+
+ file = fopen(path, "r");
+ if (!file) {
+ pr_debug("fopen failed for file %s\n", path);
+ continue;
+ }
+
+ if (!fgets(buf, MIDR_SIZE, file)) {
+ fclose(file);
+ continue;
+ }
+ fclose(file);
+
+ /* Ignore/clear Variant[23:20] and
+ * Revision[3:0] of MIDR
+ */
+ midr = strtoul(buf, NULL, 16);
+ midr &= (~(MIDR_VARIANT_MASK | MIDR_REVISION_MASK));
+ scnprintf(buf, MIDR_SIZE, "0x%016lx", midr);
+ /* got midr break loop */
+ break;
+ }
+
+ if (!midr) {
+ pr_err("failed to get cpuid string for PMU %s\n", pmu->name);
+ free(buf);
+ buf = NULL;
+ }
+
+ cpu_map__put(cpus);
+ return buf;
+}
diff --git a/tools/perf/arch/powerpc/util/header.c b/tools/perf/arch/powerpc/util/header.c
index 7a4cf80c207a..0b242664f5ea 100644
--- a/tools/perf/arch/powerpc/util/header.c
+++ b/tools/perf/arch/powerpc/util/header.c
@@ -35,7 +35,7 @@ get_cpuid(char *buffer, size_t sz)
}
char *
-get_cpuid_str(void)
+get_cpuid_str(struct perf_pmu *pmu __maybe_unused)
{
char *bufp;
diff --git a/tools/perf/arch/s390/annotate/instructions.c b/tools/perf/arch/s390/annotate/instructions.c
index e0e466c650df..8c72b44444cb 100644
--- a/tools/perf/arch/s390/annotate/instructions.c
+++ b/tools/perf/arch/s390/annotate/instructions.c
@@ -18,7 +18,8 @@ static struct ins_ops *s390__associate_ins_ops(struct arch *arch, const char *na
if (!strcmp(name, "br"))
ops = &ret_ops;
- arch__associate_ins_ops(arch, name, ops);
+ if (ops)
+ arch__associate_ins_ops(arch, name, ops);
return ops;
}
diff --git a/tools/perf/arch/x86/tests/perf-time-to-tsc.c b/tools/perf/arch/x86/tests/perf-time-to-tsc.c
index b59678e8c1e2..06abe8108b33 100644
--- a/tools/perf/arch/x86/tests/perf-time-to-tsc.c
+++ b/tools/perf/arch/x86/tests/perf-time-to-tsc.c
@@ -84,7 +84,7 @@ int test__perf_time_to_tsc(struct test *test __maybe_unused, int subtest __maybe
CHECK__(perf_evlist__open(evlist));
- CHECK__(perf_evlist__mmap(evlist, UINT_MAX, false));
+ CHECK__(perf_evlist__mmap(evlist, UINT_MAX));
pc = evlist->mmap[0].base;
ret = perf_read_tsc_conversion(pc, &tc);
diff --git a/tools/perf/arch/x86/util/header.c b/tools/perf/arch/x86/util/header.c
index 33027c5e6f92..b626d2bad9f1 100644
--- a/tools/perf/arch/x86/util/header.c
+++ b/tools/perf/arch/x86/util/header.c
@@ -66,7 +66,7 @@ get_cpuid(char *buffer, size_t sz)
}
char *
-get_cpuid_str(void)
+get_cpuid_str(struct perf_pmu *pmu __maybe_unused)
{
char *buf = malloc(128);
diff --git a/tools/perf/bench/futex-hash.c b/tools/perf/bench/futex-hash.c
index 58ae6ed8f38b..2defb6df7fd0 100644
--- a/tools/perf/bench/futex-hash.c
+++ b/tools/perf/bench/futex-hash.c
@@ -24,6 +24,7 @@
#include <subcmd/parse-options.h>
#include "bench.h"
#include "futex.h"
+#include "cpumap.h"
#include <err.h>
#include <sys/time.h>
@@ -118,11 +119,12 @@ static void print_summary(void)
int bench_futex_hash(int argc, const char **argv)
{
int ret = 0;
- cpu_set_t cpu;
+ cpu_set_t cpuset;
struct sigaction act;
- unsigned int i, ncpus;
+ unsigned int i;
pthread_attr_t thread_attr;
struct worker *worker = NULL;
+ struct cpu_map *cpu;
argc = parse_options(argc, argv, options, bench_futex_hash_usage, 0);
if (argc) {
@@ -130,14 +132,16 @@ int bench_futex_hash(int argc, const char **argv)
exit(EXIT_FAILURE);
}
- ncpus = sysconf(_SC_NPROCESSORS_ONLN);
+ cpu = cpu_map__new(NULL);
+ if (!cpu)
+ goto errmem;
sigfillset(&act.sa_mask);
act.sa_sigaction = toggle_done;
sigaction(SIGINT, &act, NULL);
if (!nthreads) /* default to the number of CPUs */
- nthreads = ncpus;
+ nthreads = cpu->nr;
worker = calloc(nthreads, sizeof(*worker));
if (!worker)
@@ -163,10 +167,10 @@ int bench_futex_hash(int argc, const char **argv)
if (!worker[i].futex)
goto errmem;
- CPU_ZERO(&cpu);
- CPU_SET(i % ncpus, &cpu);
+ CPU_ZERO(&cpuset);
+ CPU_SET(cpu->map[i % cpu->nr], &cpuset);
- ret = pthread_attr_setaffinity_np(&thread_attr, sizeof(cpu_set_t), &cpu);
+ ret = pthread_attr_setaffinity_np(&thread_attr, sizeof(cpu_set_t), &cpuset);
if (ret)
err(EXIT_FAILURE, "pthread_attr_setaffinity_np");
@@ -217,6 +221,7 @@ int bench_futex_hash(int argc, const char **argv)
print_summary();
free(worker);
+ free(cpu);
return ret;
errmem:
err(EXIT_FAILURE, "calloc");
diff --git a/tools/perf/bench/futex-lock-pi.c b/tools/perf/bench/futex-lock-pi.c
index 08653ae8a8c4..8e9c4753e304 100644
--- a/tools/perf/bench/futex-lock-pi.c
+++ b/tools/perf/bench/futex-lock-pi.c
@@ -15,6 +15,7 @@
#include <errno.h>
#include "bench.h"
#include "futex.h"
+#include "cpumap.h"
#include <err.h>
#include <stdlib.h>
@@ -32,7 +33,7 @@ static struct worker *worker;
static unsigned int nsecs = 10;
static bool silent = false, multi = false;
static bool done = false, fshared = false;
-static unsigned int ncpus, nthreads = 0;
+static unsigned int nthreads = 0;
static int futex_flag = 0;
struct timeval start, end, runtime;
static pthread_mutex_t thread_lock;
@@ -113,9 +114,10 @@ static void *workerfn(void *arg)
return NULL;
}
-static void create_threads(struct worker *w, pthread_attr_t thread_attr)
+static void create_threads(struct worker *w, pthread_attr_t thread_attr,
+ struct cpu_map *cpu)
{
- cpu_set_t cpu;
+ cpu_set_t cpuset;
unsigned int i;
threads_starting = nthreads;
@@ -130,10 +132,10 @@ static void create_threads(struct worker *w, pthread_attr_t thread_attr)
} else
worker[i].futex = &global_futex;
- CPU_ZERO(&cpu);
- CPU_SET(i % ncpus, &cpu);
+ CPU_ZERO(&cpuset);
+ CPU_SET(cpu->map[i % cpu->nr], &cpuset);
- if (pthread_attr_setaffinity_np(&thread_attr, sizeof(cpu_set_t), &cpu))
+ if (pthread_attr_setaffinity_np(&thread_attr, sizeof(cpu_set_t), &cpuset))
err(EXIT_FAILURE, "pthread_attr_setaffinity_np");
if (pthread_create(&w[i].thread, &thread_attr, workerfn, &worker[i]))
@@ -147,19 +149,22 @@ int bench_futex_lock_pi(int argc, const char **argv)
unsigned int i;
struct sigaction act;
pthread_attr_t thread_attr;
+ struct cpu_map *cpu;
argc = parse_options(argc, argv, options, bench_futex_lock_pi_usage, 0);
if (argc)
goto err;
- ncpus = sysconf(_SC_NPROCESSORS_ONLN);
+ cpu = cpu_map__new(NULL);
+ if (!cpu)
+ err(EXIT_FAILURE, "calloc");
sigfillset(&act.sa_mask);
act.sa_sigaction = toggle_done;
sigaction(SIGINT, &act, NULL);
if (!nthreads)
- nthreads = ncpus;
+ nthreads = cpu->nr;
worker = calloc(nthreads, sizeof(*worker));
if (!worker)
@@ -180,7 +185,7 @@ int bench_futex_lock_pi(int argc, const char **argv)
pthread_attr_init(&thread_attr);
gettimeofday(&start, NULL);
- create_threads(worker, thread_attr);
+ create_threads(worker, thread_attr, cpu);
pthread_attr_destroy(&thread_attr);
pthread_mutex_lock(&thread_lock);
diff --git a/tools/perf/bench/futex-requeue.c b/tools/perf/bench/futex-requeue.c
index 1058c194608a..fc692efa0c05 100644
--- a/tools/perf/bench/futex-requeue.c
+++ b/tools/perf/bench/futex-requeue.c
@@ -22,6 +22,7 @@
#include <errno.h>
#include "bench.h"
#include "futex.h"
+#include "cpumap.h"
#include <err.h>
#include <stdlib.h>
@@ -40,7 +41,7 @@ static bool done = false, silent = false, fshared = false;
static pthread_mutex_t thread_lock;
static pthread_cond_t thread_parent, thread_worker;
static struct stats requeuetime_stats, requeued_stats;
-static unsigned int ncpus, threads_starting, nthreads = 0;
+static unsigned int threads_starting, nthreads = 0;
static int futex_flag = 0;
static const struct option options[] = {
@@ -83,19 +84,19 @@ static void *workerfn(void *arg __maybe_unused)
}
static void block_threads(pthread_t *w,
- pthread_attr_t thread_attr)
+ pthread_attr_t thread_attr, struct cpu_map *cpu)
{
- cpu_set_t cpu;
+ cpu_set_t cpuset;
unsigned int i;
threads_starting = nthreads;
/* create and block all threads */
for (i = 0; i < nthreads; i++) {
- CPU_ZERO(&cpu);
- CPU_SET(i % ncpus, &cpu);
+ CPU_ZERO(&cpuset);
+ CPU_SET(cpu->map[i % cpu->nr], &cpuset);
- if (pthread_attr_setaffinity_np(&thread_attr, sizeof(cpu_set_t), &cpu))
+ if (pthread_attr_setaffinity_np(&thread_attr, sizeof(cpu_set_t), &cpuset))
err(EXIT_FAILURE, "pthread_attr_setaffinity_np");
if (pthread_create(&w[i], &thread_attr, workerfn, NULL))
@@ -116,19 +117,22 @@ int bench_futex_requeue(int argc, const char **argv)
unsigned int i, j;
struct sigaction act;
pthread_attr_t thread_attr;
+ struct cpu_map *cpu;
argc = parse_options(argc, argv, options, bench_futex_requeue_usage, 0);
if (argc)
goto err;
- ncpus = sysconf(_SC_NPROCESSORS_ONLN);
+ cpu = cpu_map__new(NULL);
+ if (!cpu)
+ err(EXIT_FAILURE, "cpu_map__new");
sigfillset(&act.sa_mask);
act.sa_sigaction = toggle_done;
sigaction(SIGINT, &act, NULL);
if (!nthreads)
- nthreads = ncpus;
+ nthreads = cpu->nr;
worker = calloc(nthreads, sizeof(*worker));
if (!worker)
@@ -156,7 +160,7 @@ int bench_futex_requeue(int argc, const char **argv)
struct timeval start, end, runtime;
/* create, launch & block all threads */
- block_threads(worker, thread_attr);
+ block_threads(worker, thread_attr, cpu);
/* make sure all threads are already blocked */
pthread_mutex_lock(&thread_lock);
diff --git a/tools/perf/bench/futex-wake-parallel.c b/tools/perf/bench/futex-wake-parallel.c
index b4732dad9f89..69d8fdc87315 100644
--- a/tools/perf/bench/futex-wake-parallel.c
+++ b/tools/perf/bench/futex-wake-parallel.c
@@ -7,7 +7,17 @@
* for each individual thread to service its share of work. Ultimately
* it can be used to measure futex_wake() changes.
*/
+#include "bench.h"
+#include <linux/compiler.h>
+#include "../util/debug.h"
+#ifndef HAVE_PTHREAD_BARRIER
+int bench_futex_wake_parallel(int argc __maybe_unused, const char **argv __maybe_unused)
+{
+ pr_err("%s: pthread_barrier_t unavailable, disabling this test...\n", __func__);
+ return 0;
+}
+#else /* HAVE_PTHREAD_BARRIER */
/* For the CLR_() macros */
#include <string.h>
#include <pthread.h>
@@ -15,12 +25,11 @@
#include <signal.h>
#include "../util/stat.h"
#include <subcmd/parse-options.h>
-#include <linux/compiler.h>
#include <linux/kernel.h>
#include <linux/time64.h>
#include <errno.h>
-#include "bench.h"
#include "futex.h"
+#include "cpumap.h"
#include <err.h>
#include <stdlib.h>
@@ -42,8 +51,9 @@ static bool done = false, silent = false, fshared = false;
static unsigned int nblocked_threads = 0, nwaking_threads = 0;
static pthread_mutex_t thread_lock;
static pthread_cond_t thread_parent, thread_worker;
+static pthread_barrier_t barrier;
static struct stats waketime_stats, wakeup_stats;
-static unsigned int ncpus, threads_starting;
+static unsigned int threads_starting;
static int futex_flag = 0;
static const struct option options[] = {
@@ -64,6 +74,8 @@ static void *waking_workerfn(void *arg)
struct thread_data *waker = (struct thread_data *) arg;
struct timeval start, end;
+ pthread_barrier_wait(&barrier);
+
gettimeofday(&start, NULL);
waker->nwoken = futex_wake(&futex, nwakes, futex_flag);
@@ -84,6 +96,8 @@ static void wakeup_threads(struct thread_data *td, pthread_attr_t thread_attr)
pthread_attr_setdetachstate(&thread_attr, PTHREAD_CREATE_JOINABLE);
+ pthread_barrier_init(&barrier, NULL, nwaking_threads + 1);
+
/* create and block all threads */
for (i = 0; i < nwaking_threads; i++) {
/*
@@ -96,9 +110,13 @@ static void wakeup_threads(struct thread_data *td, pthread_attr_t thread_attr)
err(EXIT_FAILURE, "pthread_create");
}
+ pthread_barrier_wait(&barrier);
+
for (i = 0; i < nwaking_threads; i++)
if (pthread_join(td[i].worker, NULL))
err(EXIT_FAILURE, "pthread_join");
+
+ pthread_barrier_destroy(&barrier);
}
static void *blocked_workerfn(void *arg __maybe_unused)
@@ -119,19 +137,20 @@ static void *blocked_workerfn(void *arg __maybe_unused)
return NULL;
}
-static void block_threads(pthread_t *w, pthread_attr_t thread_attr)
+static void block_threads(pthread_t *w, pthread_attr_t thread_attr,
+ struct cpu_map *cpu)
{
- cpu_set_t cpu;
+ cpu_set_t cpuset;
unsigned int i;
threads_starting = nblocked_threads;
/* create and block all threads */
for (i = 0; i < nblocked_threads; i++) {
- CPU_ZERO(&cpu);
- CPU_SET(i % ncpus, &cpu);
+ CPU_ZERO(&cpuset);
+ CPU_SET(cpu->map[i % cpu->nr], &cpuset);
- if (pthread_attr_setaffinity_np(&thread_attr, sizeof(cpu_set_t), &cpu))
+ if (pthread_attr_setaffinity_np(&thread_attr, sizeof(cpu_set_t), &cpuset))
err(EXIT_FAILURE, "pthread_attr_setaffinity_np");
if (pthread_create(&w[i], &thread_attr, blocked_workerfn, NULL))
@@ -205,6 +224,7 @@ int bench_futex_wake_parallel(int argc, const char **argv)
struct sigaction act;
pthread_attr_t thread_attr;
struct thread_data *waking_worker;
+ struct cpu_map *cpu;
argc = parse_options(argc, argv, options,
bench_futex_wake_parallel_usage, 0);
@@ -217,9 +237,12 @@ int bench_futex_wake_parallel(int argc, const char **argv)
act.sa_sigaction = toggle_done;
sigaction(SIGINT, &act, NULL);
- ncpus = sysconf(_SC_NPROCESSORS_ONLN);
+ cpu = cpu_map__new(NULL);
+ if (!cpu)
+ err(EXIT_FAILURE, "calloc");
+
if (!nblocked_threads)
- nblocked_threads = ncpus;
+ nblocked_threads = cpu->nr;
/* some sanity checks */
if (nwaking_threads > nblocked_threads || !nwaking_threads)
@@ -259,7 +282,7 @@ int bench_futex_wake_parallel(int argc, const char **argv)
err(EXIT_FAILURE, "calloc");
/* create, launch & block all threads */
- block_threads(blocked_worker, thread_attr);
+ block_threads(blocked_worker, thread_attr, cpu);
/* make sure all threads are already blocked */
pthread_mutex_lock(&thread_lock);
@@ -297,3 +320,4 @@ int bench_futex_wake_parallel(int argc, const char **argv)
free(blocked_worker);
return ret;
}
+#endif /* HAVE_PTHREAD_BARRIER */
diff --git a/tools/perf/bench/futex-wake.c b/tools/perf/bench/futex-wake.c
index 8c5c0b6b5c97..e8181ad7d088 100644
--- a/tools/perf/bench/futex-wake.c
+++ b/tools/perf/bench/futex-wake.c
@@ -22,6 +22,7 @@
#include <errno.h>
#include "bench.h"
#include "futex.h"
+#include "cpumap.h"
#include <err.h>
#include <stdlib.h>
@@ -89,19 +90,19 @@ static void print_summary(void)
}
static void block_threads(pthread_t *w,
- pthread_attr_t thread_attr)
+ pthread_attr_t thread_attr, struct cpu_map *cpu)
{
- cpu_set_t cpu;
+ cpu_set_t cpuset;
unsigned int i;
threads_starting = nthreads;
/* create and block all threads */
for (i = 0; i < nthreads; i++) {
- CPU_ZERO(&cpu);
- CPU_SET(i % ncpus, &cpu);
+ CPU_ZERO(&cpuset);
+ CPU_SET(cpu->map[i % cpu->nr], &cpuset);
- if (pthread_attr_setaffinity_np(&thread_attr, sizeof(cpu_set_t), &cpu))
+ if (pthread_attr_setaffinity_np(&thread_attr, sizeof(cpu_set_t), &cpuset))
err(EXIT_FAILURE, "pthread_attr_setaffinity_np");
if (pthread_create(&w[i], &thread_attr, workerfn, NULL))
@@ -122,6 +123,7 @@ int bench_futex_wake(int argc, const char **argv)
unsigned int i, j;
struct sigaction act;
pthread_attr_t thread_attr;
+ struct cpu_map *cpu;
argc = parse_options(argc, argv, options, bench_futex_wake_usage, 0);
if (argc) {
@@ -129,7 +131,9 @@ int bench_futex_wake(int argc, const char **argv)
exit(EXIT_FAILURE);
}
- ncpus = sysconf(_SC_NPROCESSORS_ONLN);
+ cpu = cpu_map__new(NULL);
+ if (!cpu)
+ err(EXIT_FAILURE, "calloc");
sigfillset(&act.sa_mask);
act.sa_sigaction = toggle_done;
@@ -161,7 +165,7 @@ int bench_futex_wake(int argc, const char **argv)
struct timeval start, end, runtime;
/* create, launch & block all threads */
- block_threads(worker, thread_attr);
+ block_threads(worker, thread_attr, cpu);
/* make sure all threads are already blocked */
pthread_mutex_lock(&thread_lock);
diff --git a/tools/perf/builtin-buildid-cache.c b/tools/perf/builtin-buildid-cache.c
index 3d354ba6e9c5..41db2cba77eb 100644
--- a/tools/perf/builtin-buildid-cache.c
+++ b/tools/perf/builtin-buildid-cache.c
@@ -325,8 +325,8 @@ int cmd_buildid_cache(int argc, const char **argv)
"file", "kcore file to add"),
OPT_STRING('r', "remove", &remove_name_list_str, "file list",
"file(s) to remove"),
- OPT_STRING('p', "purge", &purge_name_list_str, "path list",
- "path(s) to remove (remove old caches too)"),
+ OPT_STRING('p', "purge", &purge_name_list_str, "file list",
+ "file(s) to remove (remove old caches too)"),
OPT_STRING('M', "missing", &missing_filename, "file",
"to find missing build ids in the cache"),
OPT_BOOLEAN('f', "force", &force, "don't complain, do it"),
diff --git a/tools/perf/builtin-c2c.c b/tools/perf/builtin-c2c.c
index 17855c4626a0..f1da9b0833c0 100644
--- a/tools/perf/builtin-c2c.c
+++ b/tools/perf/builtin-c2c.c
@@ -2224,9 +2224,9 @@ static int perf_c2c__browse_cacheline(struct hist_entry *he)
struct hist_browser *browser;
int key = -1;
const char help[] =
- " ENTER Togle callchains (if present) \n"
- " n Togle Node details info \n"
- " s Togle full lenght of symbol and source line columns \n"
+ " ENTER Toggle callchains (if present) \n"
+ " n Toggle Node details info \n"
+ " s Toggle full length of symbol and source line columns \n"
" q Return back to cacheline list \n";
/* Display compact version first. */
@@ -2303,7 +2303,7 @@ static int perf_c2c__hists_browse(struct hists *hists)
int key = -1;
const char help[] =
" d Display cacheline details \n"
- " ENTER Togle callchains (if present) \n"
+ " ENTER Toggle callchains (if present) \n"
" q Quit \n";
browser = perf_c2c_browser__new(hists);
diff --git a/tools/perf/builtin-kvm.c b/tools/perf/builtin-kvm.c
index 0c36f2ac6a0e..98853162eae9 100644
--- a/tools/perf/builtin-kvm.c
+++ b/tools/perf/builtin-kvm.c
@@ -741,20 +741,20 @@ static s64 perf_kvm__mmap_read_idx(struct perf_kvm_stat *kvm, int idx,
u64 *mmap_time)
{
union perf_event *event;
- struct perf_sample sample;
+ u64 timestamp;
s64 n = 0;
int err;
*mmap_time = ULLONG_MAX;
while ((event = perf_evlist__mmap_read(kvm->evlist, idx)) != NULL) {
- err = perf_evlist__parse_sample(kvm->evlist, event, &sample);
+ err = perf_evlist__parse_sample_timestamp(kvm->evlist, event, &timestamp);
if (err) {
perf_evlist__mmap_consume(kvm->evlist, idx);
pr_err("Failed to parse sample\n");
return -1;
}
- err = perf_session__queue_event(kvm->session, event, &sample, 0);
+ err = perf_session__queue_event(kvm->session, event, timestamp, 0);
/*
* FIXME: Here we can't consume the event, as perf_session__queue_event will
* point to it, and it'll get possibly overwritten by the kernel.
@@ -768,7 +768,7 @@ static s64 perf_kvm__mmap_read_idx(struct perf_kvm_stat *kvm, int idx,
/* save time stamp of our first sample for this mmap */
if (n == 0)
- *mmap_time = sample.time;
+ *mmap_time = timestamp;
/* limit events per mmap handled all at once */
n++;
@@ -1044,7 +1044,7 @@ static int kvm_live_open_events(struct perf_kvm_stat *kvm)
goto out;
}
- if (perf_evlist__mmap(evlist, kvm->opts.mmap_pages, false) < 0) {
+ if (perf_evlist__mmap(evlist, kvm->opts.mmap_pages) < 0) {
ui__error("Failed to mmap the events: %s\n",
str_error_r(errno, sbuf, sizeof(sbuf)));
perf_evlist__close(evlist);
diff --git a/tools/perf/builtin-record.c b/tools/perf/builtin-record.c
index 003255910c05..0a5749ef8b94 100644
--- a/tools/perf/builtin-record.c
+++ b/tools/perf/builtin-record.c
@@ -301,7 +301,7 @@ static int record__mmap_evlist(struct record *rec,
struct record_opts *opts = &rec->opts;
char msg[512];
- if (perf_evlist__mmap_ex(evlist, opts->mmap_pages, false,
+ if (perf_evlist__mmap_ex(evlist, opts->mmap_pages,
opts->auxtrace_mmap_pages,
opts->auxtrace_snapshot_mode) < 0) {
if (errno == EPERM) {
@@ -372,6 +372,8 @@ try_again:
ui__error("%s\n", msg);
goto out;
}
+
+ pos->supported = true;
}
if (perf_evlist__apply_filters(evlist, &pos)) {
@@ -477,7 +479,7 @@ static struct perf_event_header finished_round_event = {
};
static int record__mmap_read_evlist(struct record *rec, struct perf_evlist *evlist,
- bool backward)
+ bool overwrite)
{
u64 bytes_written = rec->bytes_written;
int i;
@@ -487,18 +489,18 @@ static int record__mmap_read_evlist(struct record *rec, struct perf_evlist *evli
if (!evlist)
return 0;
- maps = backward ? evlist->backward_mmap : evlist->mmap;
+ maps = overwrite ? evlist->overwrite_mmap : evlist->mmap;
if (!maps)
return 0;
- if (backward && evlist->bkw_mmap_state != BKW_MMAP_DATA_PENDING)
+ if (overwrite && evlist->bkw_mmap_state != BKW_MMAP_DATA_PENDING)
return 0;
for (i = 0; i < evlist->nr_mmaps; i++) {
struct auxtrace_mmap *mm = &maps[i].auxtrace_mmap;
if (maps[i].base) {
- if (perf_mmap__push(&maps[i], evlist->overwrite, backward, rec, record__pushfn) != 0) {
+ if (perf_mmap__push(&maps[i], overwrite, rec, record__pushfn) != 0) {
rc = -1;
goto out;
}
@@ -518,7 +520,7 @@ static int record__mmap_read_evlist(struct record *rec, struct perf_evlist *evli
if (bytes_written != rec->bytes_written)
rc = record__write(rec, &finished_round_event, sizeof(finished_round_event));
- if (backward)
+ if (overwrite)
perf_evlist__toggle_bkw_mmap(evlist, BKW_MMAP_EMPTY);
out:
return rc;
@@ -690,8 +692,8 @@ perf_evlist__pick_pc(struct perf_evlist *evlist)
if (evlist) {
if (evlist->mmap && evlist->mmap[0].base)
return evlist->mmap[0].base;
- if (evlist->backward_mmap && evlist->backward_mmap[0].base)
- return evlist->backward_mmap[0].base;
+ if (evlist->overwrite_mmap && evlist->overwrite_mmap[0].base)
+ return evlist->overwrite_mmap[0].base;
}
return NULL;
}
@@ -784,6 +786,28 @@ static int record__synthesize(struct record *rec, bool tail)
perf_event__synthesize_guest_os, tool);
}
+ err = perf_event__synthesize_extra_attr(&rec->tool,
+ rec->evlist,
+ process_synthesized_event,
+ data->is_pipe);
+ if (err)
+ goto out;
+
+ err = perf_event__synthesize_thread_map2(&rec->tool, rec->evlist->threads,
+ process_synthesized_event,
+ NULL);
+ if (err < 0) {
+ pr_err("Couldn't synthesize thread map.\n");
+ return err;
+ }
+
+ err = perf_event__synthesize_cpu_map(&rec->tool, rec->evlist->cpus,
+ process_synthesized_event, NULL);
+ if (err < 0) {
+ pr_err("Couldn't synthesize cpu map.\n");
+ return err;
+ }
+
err = __machine__synthesize_threads(machine, tool, &opts->target, rec->evlist->threads,
process_synthesized_event, opts->sample_address,
opts->proc_map_timeout, 1);
diff --git a/tools/perf/builtin-report.c b/tools/perf/builtin-report.c
index af5dd038195e..eb9ce6327e71 100644
--- a/tools/perf/builtin-report.c
+++ b/tools/perf/builtin-report.c
@@ -921,13 +921,6 @@ int cmd_report(int argc, const char **argv)
return -EINVAL;
}
- if (report.use_stdio)
- use_browser = 0;
- else if (report.use_tui)
- use_browser = 1;
- else if (report.use_gtk)
- use_browser = 2;
-
if (report.inverted_callchain)
callchain_param.order = ORDER_CALLER;
if (symbol_conf.cumulate_callchain && !callchain_param.order_set)
@@ -1014,6 +1007,13 @@ repeat:
perf_hpp_list.need_collapse = true;
}
+ if (report.use_stdio)
+ use_browser = 0;
+ else if (report.use_tui)
+ use_browser = 1;
+ else if (report.use_gtk)
+ use_browser = 2;
+
/* Force tty output for header output and per-thread stat. */
if (report.header || report.header_only || report.show_threads)
use_browser = 0;
diff --git a/tools/perf/builtin-script.c b/tools/perf/builtin-script.c
index 9b43bda45a41..39d8b55f0db3 100644
--- a/tools/perf/builtin-script.c
+++ b/tools/perf/builtin-script.c
@@ -22,6 +22,7 @@
#include "util/cpumap.h"
#include "util/thread_map.h"
#include "util/stat.h"
+#include "util/color.h"
#include "util/string2.h"
#include "util/thread-stack.h"
#include "util/time-utils.h"
@@ -90,6 +91,7 @@ enum perf_output_field {
PERF_OUTPUT_SYNTH = 1U << 25,
PERF_OUTPUT_PHYS_ADDR = 1U << 26,
PERF_OUTPUT_UREGS = 1U << 27,
+ PERF_OUTPUT_METRIC = 1U << 28,
};
struct output_option {
@@ -124,6 +126,7 @@ struct output_option {
{.str = "brstackoff", .field = PERF_OUTPUT_BRSTACKOFF},
{.str = "synth", .field = PERF_OUTPUT_SYNTH},
{.str = "phys_addr", .field = PERF_OUTPUT_PHYS_ADDR},
+ {.str = "metric", .field = PERF_OUTPUT_METRIC},
};
enum {
@@ -215,12 +218,20 @@ struct perf_evsel_script {
char *filename;
FILE *fp;
u64 samples;
+ /* For metric output */
+ u64 val;
+ int gnum;
};
+static inline struct perf_evsel_script *evsel_script(struct perf_evsel *evsel)
+{
+ return (struct perf_evsel_script *)evsel->priv;
+}
+
static struct perf_evsel_script *perf_evsel_script__new(struct perf_evsel *evsel,
struct perf_data *data)
{
- struct perf_evsel_script *es = malloc(sizeof(*es));
+ struct perf_evsel_script *es = zalloc(sizeof(*es));
if (es != NULL) {
if (asprintf(&es->filename, "%s.%s.dump", data->file.path, perf_evsel__name(evsel)) < 0)
@@ -228,7 +239,6 @@ static struct perf_evsel_script *perf_evsel_script__new(struct perf_evsel *evsel
es->fp = fopen(es->filename, "w");
if (es->fp == NULL)
goto out_free_filename;
- es->samples = 0;
}
return es;
@@ -423,11 +433,6 @@ static int perf_evsel__check_attr(struct perf_evsel *evsel,
PERF_OUTPUT_CPU, allow_user_set))
return -EINVAL;
- if (PRINT_FIELD(PERIOD) &&
- perf_evsel__check_stype(evsel, PERF_SAMPLE_PERIOD, "PERIOD",
- PERF_OUTPUT_PERIOD))
- return -EINVAL;
-
if (PRINT_FIELD(IREGS) &&
perf_evsel__check_stype(evsel, PERF_SAMPLE_REGS_INTR, "IREGS",
PERF_OUTPUT_IREGS))
@@ -1477,6 +1482,86 @@ static int data_src__fprintf(u64 data_src, FILE *fp)
return fprintf(fp, "%-*s", maxlen, out);
}
+struct metric_ctx {
+ struct perf_sample *sample;
+ struct thread *thread;
+ struct perf_evsel *evsel;
+ FILE *fp;
+};
+
+static void script_print_metric(void *ctx, const char *color,
+ const char *fmt,
+ const char *unit, double val)
+{
+ struct metric_ctx *mctx = ctx;
+
+ if (!fmt)
+ return;
+ perf_sample__fprintf_start(mctx->sample, mctx->thread, mctx->evsel,
+ mctx->fp);
+ fputs("\tmetric: ", mctx->fp);
+ if (color)
+ color_fprintf(mctx->fp, color, fmt, val);
+ else
+ printf(fmt, val);
+ fprintf(mctx->fp, " %s\n", unit);
+}
+
+static void script_new_line(void *ctx)
+{
+ struct metric_ctx *mctx = ctx;
+
+ perf_sample__fprintf_start(mctx->sample, mctx->thread, mctx->evsel,
+ mctx->fp);
+ fputs("\tmetric: ", mctx->fp);
+}
+
+static void perf_sample__fprint_metric(struct perf_script *script,
+ struct thread *thread,
+ struct perf_evsel *evsel,
+ struct perf_sample *sample,
+ FILE *fp)
+{
+ struct perf_stat_output_ctx ctx = {
+ .print_metric = script_print_metric,
+ .new_line = script_new_line,
+ .ctx = &(struct metric_ctx) {
+ .sample = sample,
+ .thread = thread,
+ .evsel = evsel,
+ .fp = fp,
+ },
+ .force_header = false,
+ };
+ struct perf_evsel *ev2;
+ static bool init;
+ u64 val;
+
+ if (!init) {
+ perf_stat__init_shadow_stats();
+ init = true;
+ }
+ if (!evsel->stats)
+ perf_evlist__alloc_stats(script->session->evlist, false);
+ if (evsel_script(evsel->leader)->gnum++ == 0)
+ perf_stat__reset_shadow_stats();
+ val = sample->period * evsel->scale;
+ perf_stat__update_shadow_stats(evsel,
+ val,
+ sample->cpu);
+ evsel_script(evsel)->val = val;
+ if (evsel_script(evsel->leader)->gnum == evsel->leader->nr_members) {
+ for_each_group_member (ev2, evsel->leader) {
+ perf_stat__print_shadow_stats(ev2,
+ evsel_script(ev2)->val,
+ sample->cpu,
+ &ctx,
+ NULL);
+ }
+ evsel_script(evsel->leader)->gnum = 0;
+ }
+}
+
static void process_event(struct perf_script *script,
struct perf_sample *sample, struct perf_evsel *evsel,
struct addr_location *al,
@@ -1564,6 +1649,9 @@ static void process_event(struct perf_script *script,
if (PRINT_FIELD(PHYS_ADDR))
fprintf(fp, "%16" PRIx64, sample->phys_addr);
fprintf(fp, "\n");
+
+ if (PRINT_FIELD(METRIC))
+ perf_sample__fprint_metric(script, thread, evsel, sample, fp);
}
static struct scripting_ops *scripting_ops;
diff --git a/tools/perf/builtin-stat.c b/tools/perf/builtin-stat.c
index 59af5a8419e2..a027b4712e48 100644
--- a/tools/perf/builtin-stat.c
+++ b/tools/perf/builtin-stat.c
@@ -458,19 +458,8 @@ static void workload_exec_failed_signal(int signo __maybe_unused, siginfo_t *inf
workload_exec_errno = info->si_value.sival_int;
}
-static bool has_unit(struct perf_evsel *counter)
-{
- return counter->unit && *counter->unit;
-}
-
-static bool has_scale(struct perf_evsel *counter)
-{
- return counter->scale != 1;
-}
-
static int perf_stat_synthesize_config(bool is_pipe)
{
- struct perf_evsel *counter;
int err;
if (is_pipe) {
@@ -482,53 +471,10 @@ static int perf_stat_synthesize_config(bool is_pipe)
}
}
- /*
- * Synthesize other events stuff not carried within
- * attr event - unit, scale, name
- */
- evlist__for_each_entry(evsel_list, counter) {
- if (!counter->supported)
- continue;
-
- /*
- * Synthesize unit and scale only if it's defined.
- */
- if (has_unit(counter)) {
- err = perf_event__synthesize_event_update_unit(NULL, counter, process_synthesized_event);
- if (err < 0) {
- pr_err("Couldn't synthesize evsel unit.\n");
- return err;
- }
- }
-
- if (has_scale(counter)) {
- err = perf_event__synthesize_event_update_scale(NULL, counter, process_synthesized_event);
- if (err < 0) {
- pr_err("Couldn't synthesize evsel scale.\n");
- return err;
- }
- }
-
- if (counter->own_cpus) {
- err = perf_event__synthesize_event_update_cpus(NULL, counter, process_synthesized_event);
- if (err < 0) {
- pr_err("Couldn't synthesize evsel scale.\n");
- return err;
- }
- }
-
- /*
- * Name is needed only for pipe output,
- * perf.data carries event names.
- */
- if (is_pipe) {
- err = perf_event__synthesize_event_update_name(NULL, counter, process_synthesized_event);
- if (err < 0) {
- pr_err("Couldn't synthesize evsel name.\n");
- return err;
- }
- }
- }
+ err = perf_event__synthesize_extra_attr(NULL,
+ evsel_list,
+ process_synthesized_event,
+ is_pipe);
err = perf_event__synthesize_thread_map2(NULL, evsel_list->threads,
process_synthesized_event,
diff --git a/tools/perf/builtin-top.c b/tools/perf/builtin-top.c
index 9e0d2645ae13..540461f5e345 100644
--- a/tools/perf/builtin-top.c
+++ b/tools/perf/builtin-top.c
@@ -99,6 +99,7 @@ static void perf_top__resize(struct perf_top *top)
static int perf_top__parse_source(struct perf_top *top, struct hist_entry *he)
{
+ struct perf_evsel *evsel = hists_to_evsel(he->hists);
struct symbol *sym;
struct annotation *notes;
struct map *map;
@@ -137,7 +138,7 @@ static int perf_top__parse_source(struct perf_top *top, struct hist_entry *he)
return err;
}
- err = symbol__disassemble(sym, map, NULL, 0, NULL, NULL);
+ err = symbol__annotate(sym, map, evsel, 0, NULL, NULL);
if (err == 0) {
out_assign:
top->sym_filter_entry = he;
@@ -229,6 +230,7 @@ static void perf_top__record_precise_ip(struct perf_top *top,
static void perf_top__show_details(struct perf_top *top)
{
struct hist_entry *he = top->sym_filter_entry;
+ struct perf_evsel *evsel = hists_to_evsel(he->hists);
struct annotation *notes;
struct symbol *symbol;
int more;
@@ -241,6 +243,8 @@ static void perf_top__show_details(struct perf_top *top)
pthread_mutex_lock(&notes->lock);
+ symbol__calc_percent(symbol, evsel);
+
if (notes->src == NULL)
goto out_unlock;
@@ -412,7 +416,7 @@ static void perf_top__print_mapped_keys(struct perf_top *top)
fprintf(stdout, "\t[S] stop annotation.\n");
fprintf(stdout,
- "\t[K] hide kernel_symbols symbols. \t(%s)\n",
+ "\t[K] hide kernel symbols. \t(%s)\n",
top->hide_kernel_symbols ? "yes" : "no");
fprintf(stdout,
"\t[U] hide user symbols. \t(%s)\n",
@@ -903,7 +907,7 @@ try_again:
}
}
- if (perf_evlist__mmap(evlist, opts->mmap_pages, false) < 0) {
+ if (perf_evlist__mmap(evlist, opts->mmap_pages) < 0) {
ui__error("Failed to mmap with %d (%s)\n",
errno, str_error_r(errno, msg, sizeof(msg)));
goto out_err;
diff --git a/tools/perf/builtin-trace.c b/tools/perf/builtin-trace.c
index 84debdbad327..7c57898095ea 100644
--- a/tools/perf/builtin-trace.c
+++ b/tools/perf/builtin-trace.c
@@ -2437,7 +2437,7 @@ static int trace__run(struct trace *trace, int argc, const char **argv)
if (err < 0)
goto out_error_apply_filters;
- err = perf_evlist__mmap(evlist, trace->opts.mmap_pages, false);
+ err = perf_evlist__mmap(evlist, trace->opts.mmap_pages);
if (err < 0)
goto out_error_mmap;
diff --git a/tools/perf/check-headers.sh b/tools/perf/check-headers.sh
index 77406d25e521..e66a8a7bcced 100755
--- a/tools/perf/check-headers.sh
+++ b/tools/perf/check-headers.sh
@@ -45,7 +45,6 @@ include/uapi/asm-generic/mman-common.h
check () {
file=$1
- opts="--ignore-blank-lines --ignore-space-change"
shift
while [ -n "$*" ]; do
diff --git a/tools/perf/pmu-events/arch/arm64/cavium/thunderx2-imp-def.json b/tools/perf/pmu-events/arch/arm64/cavium/thunderx2-imp-def.json
new file mode 100644
index 000000000000..2db45c40ebc7
--- /dev/null
+++ b/tools/perf/pmu-events/arch/arm64/cavium/thunderx2-imp-def.json
@@ -0,0 +1,62 @@
+[
+ {
+ "PublicDescription": "Attributable Level 1 data cache access, read",
+ "EventCode": "0x40",
+ "EventName": "l1d_cache_rd",
+ "BriefDescription": "L1D cache read",
+ },
+ {
+ "PublicDescription": "Attributable Level 1 data cache access, write ",
+ "EventCode": "0x41",
+ "EventName": "l1d_cache_wr",
+ "BriefDescription": "L1D cache write",
+ },
+ {
+ "PublicDescription": "Attributable Level 1 data cache refill, read",
+ "EventCode": "0x42",
+ "EventName": "l1d_cache_refill_rd",
+ "BriefDescription": "L1D cache refill read",
+ },
+ {
+ "PublicDescription": "Attributable Level 1 data cache refill, write",
+ "EventCode": "0x43",
+ "EventName": "l1d_cache_refill_wr",
+ "BriefDescription": "L1D refill write",
+ },
+ {
+ "PublicDescription": "Attributable Level 1 data TLB refill, read",
+ "EventCode": "0x4C",
+ "EventName": "l1d_tlb_refill_rd",
+ "BriefDescription": "L1D tlb refill read",
+ },
+ {
+ "PublicDescription": "Attributable Level 1 data TLB refill, write",
+ "EventCode": "0x4D",
+ "EventName": "l1d_tlb_refill_wr",
+ "BriefDescription": "L1D tlb refill write",
+ },
+ {
+ "PublicDescription": "Attributable Level 1 data or unified TLB access, read",
+ "EventCode": "0x4E",
+ "EventName": "l1d_tlb_rd",
+ "BriefDescription": "L1D tlb read",
+ },
+ {
+ "PublicDescription": "Attributable Level 1 data or unified TLB access, write",
+ "EventCode": "0x4F",
+ "EventName": "l1d_tlb_wr",
+ "BriefDescription": "L1D tlb write",
+ },
+ {
+ "PublicDescription": "Bus access read",
+ "EventCode": "0x60",
+ "EventName": "bus_access_rd",
+ "BriefDescription": "Bus access read",
+ },
+ {
+ "PublicDescription": "Bus access write",
+ "EventCode": "0x61",
+ "EventName": "bus_access_wr",
+ "BriefDescription": "Bus access write",
+ }
+]
diff --git a/tools/perf/pmu-events/arch/arm64/mapfile.csv b/tools/perf/pmu-events/arch/arm64/mapfile.csv
new file mode 100644
index 000000000000..219d6756134e
--- /dev/null
+++ b/tools/perf/pmu-events/arch/arm64/mapfile.csv
@@ -0,0 +1,15 @@
+# Format:
+# MIDR,Version,JSON/file/pathname,Type
+#
+# where
+# MIDR Processor version
+# Variant[23:20] and Revision [3:0] should be zero.
+# Version could be used to track version of of JSON file
+# but currently unused.
+# JSON/file/pathname is the path to JSON file, relative
+# to tools/perf/pmu-events/arch/arm64/.
+# Type is core, uncore etc
+#
+#
+#Family-model,Version,Filename,EventType
+0x00000000420f5160,v1,cavium,core
diff --git a/tools/perf/pmu-events/arch/powerpc/mapfile.csv b/tools/perf/pmu-events/arch/powerpc/mapfile.csv
index a0f3a11ca19f..229150e7ab7d 100644
--- a/tools/perf/pmu-events/arch/powerpc/mapfile.csv
+++ b/tools/perf/pmu-events/arch/powerpc/mapfile.csv
@@ -13,13 +13,5 @@
#
# Power8 entries
-004b0000,1,power8,core
-004b0201,1,power8,core
-004c0000,1,power8,core
-004d0000,1,power8,core
-004d0100,1,power8,core
-004d0200,1,power8,core
-004c0100,1,power8,core
-004e0100,1,power9,core
-004e0200,1,power9,core
-004e1200,1,power9,core
+004[bcd][[:xdigit:]]{4},1,power8,core
+004e[[:xdigit:]]{4},1,power9,core
diff --git a/tools/perf/pmu-events/arch/powerpc/power9/cache.json b/tools/perf/pmu-events/arch/powerpc/power9/cache.json
index 18f6645f2897..7945c5196c43 100644
--- a/tools/perf/pmu-events/arch/powerpc/power9/cache.json
+++ b/tools/perf/pmu-events/arch/powerpc/power9/cache.json
@@ -125,11 +125,6 @@
"BriefDescription": "Finish stall because the NTF instruction was a larx waiting to be satisfied"
},
{,
- "EventCode": "0x3006C",
- "EventName": "PM_RUN_CYC_SMT2_MODE",
- "BriefDescription": "Cycles in which this thread's run latch is set and the core is in SMT2 mode"
- },
- {,
"EventCode": "0x1C058",
"EventName": "PM_DTLB_MISS_16G",
"BriefDescription": "Data TLB Miss page size 16G"
diff --git a/tools/perf/pmu-events/arch/powerpc/power9/frontend.json b/tools/perf/pmu-events/arch/powerpc/power9/frontend.json
index c63a919eda98..bd8361b5fd6a 100644
--- a/tools/perf/pmu-events/arch/powerpc/power9/frontend.json
+++ b/tools/perf/pmu-events/arch/powerpc/power9/frontend.json
@@ -1,10 +1,5 @@
[
{,
- "EventCode": "0x3E15C",
- "EventName": "PM_MRK_L2_TM_ST_ABORT_SISTER",
- "BriefDescription": "TM marked store abort for this thread"
- },
- {,
"EventCode": "0x25044",
"EventName": "PM_IPTEG_FROM_L31_MOD",
"BriefDescription": "A Page Table Entry was loaded into the TLB with Modified (M) data from another core's L3 on the same chip due to a instruction side request"
@@ -369,4 +364,4 @@
"EventName": "PM_IPTEG_FROM_L31_ECO_MOD",
"BriefDescription": "A Page Table Entry was loaded into the TLB with Modified (M) data from another core's ECO L3 on the same chip due to a instruction side request"
}
-]
+] \ No newline at end of file
diff --git a/tools/perf/pmu-events/arch/powerpc/power9/marked.json b/tools/perf/pmu-events/arch/powerpc/power9/marked.json
index b9df54fb37e3..22f9f32060a8 100644
--- a/tools/perf/pmu-events/arch/powerpc/power9/marked.json
+++ b/tools/perf/pmu-events/arch/powerpc/power9/marked.json
@@ -1,10 +1,5 @@
[
{,
- "EventCode": "0x3C052",
- "EventName": "PM_DATA_SYS_PUMP_MPRED",
- "BriefDescription": "Final Pump Scope (system) mispredicted. Either the original scope was too small (Chip/Group) or the original scope was System and it should have been smaller. Counts for a demand load"
- },
- {,
"EventCode": "0x3013E",
"EventName": "PM_MRK_STALL_CMPLU_CYC",
"BriefDescription": "Number of cycles the marked instruction is experiencing a stall while it is next to complete (NTC)"
@@ -255,6 +250,11 @@
"BriefDescription": "A Page Directory Entry was reloaded to a level 1 page walk cache from the core's L3 data cache"
},
{,
+ "EventCode": "0x3C052",
+ "EventName": "PM_DATA_SYS_PUMP_MPRED",
+ "BriefDescription": "Final Pump Scope (system) mispredicted. Either the original scope was too small (Chip/Group) or the original scope was System and it should have been smaller. Counts for a demand load"
+ },
+ {,
"EventCode": "0x4D142",
"EventName": "PM_MRK_DATA_FROM_L3",
"BriefDescription": "The processor's data cache was reloaded from local core's L3 due to a marked load"
@@ -435,21 +435,6 @@
"BriefDescription": "ITLB Reloaded. Counts 1 per ITLB miss for HPT but multiple for radix depending on number of levels traveresed"
},
{,
- "EventCode": "0x2D024",
- "EventName": "PM_RADIX_PWC_L2_HIT",
- "BriefDescription": "A radix translation attempt missed in the TLB but hit on both the first and second levels of page walk cache."
- },
- {,
- "EventCode": "0x3F056",
- "EventName": "PM_RADIX_PWC_L3_HIT",
- "BriefDescription": "A radix translation attempt missed in the TLB but hit on the first, second, and third levels of page walk cache."
- },
- {,
- "EventCode": "0x4E014",
- "EventName": "PM_TM_TX_PASS_RUN_INST",
- "BriefDescription": "Run instructions spent in successful transactions"
- },
- {,
"EventCode": "0x1E044",
"EventName": "PM_DPTEG_FROM_L3_NO_CONFLICT",
"BriefDescription": "A Page Table Entry was loaded into the TLB from local core's L3 without conflict due to a data side request. When using Radix Page Translation, this count excludes PDE reloads. Only PTE reloads are included"
@@ -644,4 +629,4 @@
"EventName": "PM_MRK_BR_MPRED_CMPL",
"BriefDescription": "Marked Branch Mispredicted"
}
-]
+] \ No newline at end of file
diff --git a/tools/perf/pmu-events/arch/powerpc/power9/other.json b/tools/perf/pmu-events/arch/powerpc/power9/other.json
index 54cc3be00fc2..5ce312973f1e 100644
--- a/tools/perf/pmu-events/arch/powerpc/power9/other.json
+++ b/tools/perf/pmu-events/arch/powerpc/power9/other.json
@@ -80,6 +80,11 @@
"BriefDescription": "A radix translation attempt missed in the TLB and all levels of page walk cache."
},
{,
+ "EventCode": "0x26882",
+ "EventName": "PM_L2_DC_INV",
+ "BriefDescription": "D-cache invalidates sent over the reload bus to the core"
+ },
+ {,
"EventCode": "0x24048",
"EventName": "PM_INST_FROM_LMEM",
"BriefDescription": "The processor's Instruction cache was reloaded from the local chip's Memory due to an instruction fetch (not prefetch)"
@@ -95,11 +100,6 @@
"BriefDescription": "Number of TM transactions that passed"
},
{,
- "EventCode": "0xD1A0",
- "EventName": "PM_MRK_LSU_FLUSH_LHS",
- "BriefDescription": "Effective Address alias flush : no EA match but Real Address match. If the data has not yet been returned for this load, the instruction will just be rejected, but if it has returned data, it will be flushed"
- },
- {,
"EventCode": "0xF088",
"EventName": "PM_LSU0_STORE_REJECT",
"BriefDescription": "All internal store rejects cause the instruction to go back to the SRQ and go to sleep until woken up to try again after the condition has been met"
@@ -127,7 +127,7 @@
{,
"EventCode": "0xD08C",
"EventName": "PM_LSU2_LDMX_FIN",
- "BriefDescription": "New P9 instruction LDMX. The definition of this new PMU event is (from the ldmx RFC02491): The thread has executed an ldmx instruction that accessed a doubleword that contains an effective address within an enabled section of the Load Monitored region. This event, therefore, should not occur if the FSCR has disabled the load monitored facility (FSCR[52]) or disabled the EBB facility (FSCR[56])"
+ "BriefDescription": "New P9 instruction LDMX. The definition of this new PMU event is (from the ldmx RFC02491): The thread has executed an ldmx instruction that accessed a doubleword that contains an effective address within an enabled section of the Load Monitored region. This event, therefore, should not occur if the FSCR has disabled the load monitored facility (FSCR[52]) or disabled the EBB facility (FSCR[56])."
},
{,
"EventCode": "0x300F8",
@@ -205,11 +205,6 @@
"BriefDescription": "Duration in cycles to reload with Modified (M) data from another core's ECO L3 on the same chip due to a marked load"
},
{,
- "EventCode": "0xF0B4",
- "EventName": "PM_DC_PREF_CONS_ALLOC",
- "BriefDescription": "Prefetch stream allocated in the conservative phase by either the hardware prefetch mechanism or software prefetch"
- },
- {,
"EventCode": "0xF894",
"EventName": "PM_LSU3_L1_CAM_CANCEL",
"BriefDescription": "ls3 l1 tm cam cancel"
@@ -220,21 +215,11 @@
"BriefDescription": "Dispatch Flush: TLBIE"
},
{,
- "EventCode": "0xD1A4",
- "EventName": "PM_MRK_LSU_FLUSH_SAO",
- "BriefDescription": "A load-hit-load condition with Strong Address Ordering will have address compare disabled and flush"
- },
- {,
"EventCode": "0x4E11E",
"EventName": "PM_MRK_DATA_FROM_DMEM_CYC",
"BriefDescription": "Duration in cycles to reload from another chip's memory on the same Node or Group (Distant) due to a marked load"
},
{,
- "EventCode": "0x5894",
- "EventName": "PM_LWSYNC",
- "BriefDescription": "Lwsync instruction decoded and transferred"
- },
- {,
"EventCode": "0x14156",
"EventName": "PM_MRK_DATA_FROM_L2_CYC",
"BriefDescription": "Duration in cycles to reload from local core's L2 due to a marked load"
@@ -245,11 +230,6 @@
"BriefDescription": "Read clearing SC"
},
{,
- "EventCode": "0x50A0",
- "EventName": "PM_HWSYNC",
- "BriefDescription": "Hwsync instruction decoded and transferred"
- },
- {,
"EventCode": "0x168B0",
"EventName": "PM_L3_P1_NODE_PUMP",
"BriefDescription": "L3 PF sent with nodal scope port 1, counts even retried requests"
@@ -265,6 +245,11 @@
"BriefDescription": "The processor's data cache was reloaded from local core's L2 with load hit store conflict due to a marked load"
},
{,
+ "EventCode": "0x468AE",
+ "EventName": "PM_L3_P3_CO_RTY",
+ "BriefDescription": "L3 CO received retry port 3 (memory only), every retry counted"
+ },
+ {,
"EventCode": "0x460A8",
"EventName": "PM_SN_HIT",
"BriefDescription": "Any port snooper hit L3. Up to 4 can happen in a cycle but we only count 1"
@@ -280,11 +265,6 @@
"BriefDescription": "Prefetch stream allocated by the hardware prefetch mechanism"
},
{,
- "EventCode": "0xF0BC",
- "EventName": "PM_LS2_UNALIGNED_ST",
- "BriefDescription": "Store instructions whose data crosses a double-word boundary, which causes it to require an additional slice than than what normally would be required of the Store of that size. If the Store wraps from slice 3 to slice 0, thee is an additional 3-cycle penalty"
- },
- {,
"EventCode": "0xD0AC",
"EventName": "PM_SRQ_SYNC_CYC",
"BriefDescription": "A sync is in the S2Q (edge detect to count)"
@@ -380,26 +360,11 @@
"BriefDescription": "Cycles in which this thread's run latch is set and the core is in SMT4 mode"
},
{,
- "EventCode": "0x5088",
- "EventName": "PM_DECODE_FUSION_OP_PRESERV",
- "BriefDescription": "Destructive op operand preservation"
- },
- {,
"EventCode": "0x1D14E",
"EventName": "PM_MRK_DATA_FROM_OFF_CHIP_CACHE_CYC",
"BriefDescription": "Duration in cycles to reload either shared or modified data from another core's L2/L3 on a different chip (remote or distant) due to a marked load"
},
{,
- "EventCode": "0x509C",
- "EventName": "PM_FORCED_NOP",
- "BriefDescription": "Instruction was forced to execute as a nop because it was found to behave like a nop (have no effect) at decode time"
- },
- {,
- "EventCode": "0xC098",
- "EventName": "PM_LS2_UNALIGNED_LD",
- "BriefDescription": "Load instructions whose data crosses a double-word boundary, which causes it to require an additional slice than than what normally would be required of the load of that size. If the load wraps from slice 3 to slice 0, thee is an additional 3-cycle penalty"
- },
- {,
"EventCode": "0x20058",
"EventName": "PM_DARQ1_10_12_ENTRIES",
"BriefDescription": "Cycles in which 10 or more DARQ1 entries (out of 12) are in use"
@@ -435,11 +400,6 @@
"BriefDescription": "All internal store rejects cause the instruction to go back to the SRQ and go to sleep until woken up to try again after the condition has been met"
},
{,
- "EventCode": "0x4505E",
- "EventName": "PM_FLOP_CMPL",
- "BriefDescription": "Floating Point Operation Finished"
- },
- {,
"EventCode": "0x1D144",
"EventName": "PM_MRK_DATA_FROM_L3_DISP_CONFLICT",
"BriefDescription": "The processor's data cache was reloaded from local core's L3 with dispatch conflict due to a marked load"
@@ -480,14 +440,9 @@
"BriefDescription": "XL-form branch was mispredicted due to the predicted target address missing from EAT. The EAT forces a mispredict in this case since there is no predicated target to validate. This is a rare case that may occur when the EAT is full and a branch is issued"
},
{,
- "EventCode": "0xC094",
- "EventName": "PM_LS0_UNALIGNED_LD",
- "BriefDescription": "Load instructions whose data crosses a double-word boundary, which causes it to require an additional slice than than what normally would be required of the load of that size. If the load wraps from slice 3 to slice 0, thee is an additional 3-cycle penalty"
- },
- {,
- "EventCode": "0xF8BC",
- "EventName": "PM_LS3_UNALIGNED_ST",
- "BriefDescription": "Store instructions whose data crosses a double-word boundary, which causes it to require an additional slice than than what normally would be required of the Store of that size. If the Store wraps from slice 3 to slice 0, thee is an additional 3-cycle penalty"
+ "EventCode": "0x460AE",
+ "EventName": "PM_L3_P2_CO_RTY",
+ "BriefDescription": "L3 CO received retry port 2 (memory only), every retry counted"
},
{,
"EventCode": "0x58B0",
@@ -505,11 +460,6 @@
"BriefDescription": "TM Store (fav or non-fav) ran into conflict (failed)"
},
{,
- "EventCode": "0xD998",
- "EventName": "PM_MRK_LSU_FLUSH_EMSH",
- "BriefDescription": "An ERAT miss was detected after a set-p hit. Erat tracker indicates fail due to tlbmiss and the instruction gets flushed because the instruction was working on the wrong address"
- },
- {,
"EventCode": "0xF8A0",
"EventName": "PM_NON_DATA_STORE",
"BriefDescription": "All ops that drain from s2q to L2 and contain no data"
@@ -525,11 +475,6 @@
"BriefDescription": "Unconditional Branch Completed. HW branch prediction was not used for this branch. This can be an I-form branch, a B-form branch with BO-field set to branch always, or a B-form branch which was covenrted to a Resolve."
},
{,
- "EventCode": "0x1F056",
- "EventName": "PM_RADIX_PWC_L1_HIT",
- "BriefDescription": "A radix translation attempt missed in the TLB and only the first level page walk cache was a hit."
- },
- {,
"EventCode": "0xF8A8",
"EventName": "PM_DC_PREF_FUZZY_CONF",
"BriefDescription": "A demand load referenced a line in an active fuzzy prefetch stream. The stream could have been allocated through the hardware prefetch mechanism or through software.Fuzzy stream confirm (out of order effects, or pf cant keep up)"
@@ -545,6 +490,11 @@
"BriefDescription": "Load tm L1 miss"
},
{,
+ "EventCode": "0xC880",
+ "EventName": "PM_LS1_LD_VECTOR_FIN",
+ "BriefDescription": ""
+ },
+ {,
"EventCode": "0x2894",
"EventName": "PM_TM_OUTER_TEND",
"BriefDescription": "Completion time outer tend"
@@ -565,21 +515,11 @@
"BriefDescription": "Marked derat reload (miss) for any page size"
},
{,
- "EventCode": "0x160A0",
- "EventName": "PM_L3_PF_MISS_L3",
- "BriefDescription": "L3 PF missed in L3"
- },
- {,
"EventCode": "0x1C04A",
"EventName": "PM_DATA_FROM_RL2L3_SHR",
"BriefDescription": "The processor's data cache was reloaded with Shared (S) data from another chip's L2 or L3 on the same Node or Group (Remote), as this chip due to a demand load"
},
{,
- "EventCode": "0xD99C",
- "EventName": "PM_MRK_LSU_FLUSH_UE",
- "BriefDescription": "Correctable ECC error on reload data, reported at critical data forward time"
- },
- {,
"EventCode": "0x268B0",
"EventName": "PM_L3_P1_GRP_PUMP",
"BriefDescription": "L3 PF sent with grp scope port 1, counts even retried requests"
@@ -630,11 +570,6 @@
"BriefDescription": "addrs only req to L2 only on the first one,Indication that Load footprint is not expanding"
},
{,
- "EventCode": "0x5884",
- "EventName": "PM_DECODE_LANES_NOT_AVAIL",
- "BriefDescription": "Decode has something to transmit but dispatch lanes are not available"
- },
- {,
"EventCode": "0x3C042",
"EventName": "PM_DATA_FROM_L3_DISP_CONFLICT",
"BriefDescription": "The processor's data cache was reloaded from local core's L3 with dispatch conflict due to a demand load"
@@ -690,9 +625,9 @@
"BriefDescription": "False LHS match detected"
},
{,
- "EventCode": "0xD9A4",
- "EventName": "PM_MRK_LSU_FLUSH_LARX_STCX",
- "BriefDescription": "A larx is flushed because an older larx has an LMQ reservation for the same thread. A stcx is flushed because an older stcx is in the LMQ. The flush happens when the older larx/stcx relaunches"
+ "EventCode": "0xF0B0",
+ "EventName": "PM_L3_LD_PREF",
+ "BriefDescription": "L3 load prefetch, sourced from a hardware or software stream, was sent to the nest"
},
{,
"EventCode": "0x4D012",
@@ -715,9 +650,9 @@
"BriefDescription": "All successful Ld/St dispatches for this thread that were an L2 miss (excludes i_l2mru_tch_reqs)"
},
{,
- "EventCode": "0xF8B8",
- "EventName": "PM_LS1_UNALIGNED_ST",
- "BriefDescription": "Store instructions whose data crosses a double-word boundary, which causes it to require an additional slice than than what normally would be required of the Store of that size. If the Store wraps from slice 3 to slice 0, thee is an additional 3-cycle penalty"
+ "EventCode": "0x160A0",
+ "EventName": "PM_L3_PF_MISS_L3",
+ "BriefDescription": "L3 PF missed in L3"
},
{,
"EventCode": "0x408C",
@@ -765,11 +700,6 @@
"BriefDescription": "Completion time nested tend"
},
{,
- "EventCode": "0x36084",
- "EventName": "PM_L2_RCST_DISP",
- "BriefDescription": "All D-side store dispatch attempts for this thread"
- },
- {,
"EventCode": "0x368A0",
"EventName": "PM_L3_PF_OFF_CHIP_CACHE",
"BriefDescription": "L3 PF from Off chip cache"
@@ -830,11 +760,6 @@
"BriefDescription": "Rotating sample of 16 snoop valids"
},
{,
- "EventCode": "0x16084",
- "EventName": "PM_L2_RCLD_DISP",
- "BriefDescription": "All I-or-D side load dispatch attempts for this thread (excludes i_l2mru_tch_reqs)"
- },
- {,
"EventCode": "0x1608C",
"EventName": "PM_RC0_BUSY",
"BriefDescription": "RC mach 0 Busy. Used by PMU to sample ave RC lifetime (mach0 used as sample point)"
@@ -842,7 +767,7 @@
{,
"EventCode": "0x36082",
"EventName": "PM_L2_LD_DISP",
- "BriefDescription": "All successful I-or-D side load dispatches for this thread (excludes i_l2mru_tch_reqs)."
+ "BriefDescription": "All successful I-or-D side load dispatches for this thread (excludes i_l2mru_tch_reqs)"
},
{,
"EventCode": "0xF8B0",
@@ -905,11 +830,6 @@
"BriefDescription": "Instruction prefetch requests"
},
{,
- "EventCode": "0xC898",
- "EventName": "PM_LS3_UNALIGNED_LD",
- "BriefDescription": "Load instructions whose data crosses a double-word boundary, which causes it to require an additional slice than than what normally would be required of the load of that size. If the load wraps from slice 3 to slice 0, thee is an additional 3-cycle penalty"
- },
- {,
"EventCode": "0x488C",
"EventName": "PM_IC_PREF_WRITE",
"BriefDescription": "Instruction prefetch written into IL1"
@@ -1017,7 +937,7 @@
{,
"EventCode": "0x3E05E",
"EventName": "PM_L3_CO_MEPF",
- "BriefDescription": "L3 castouts in Mepf state for this thread"
+ "BriefDescription": "L3 CO of line in Mep state (includes casthrough to memory). The Mepf state indicates that a line was brought in to satisfy an L3 prefetch request"
},
{,
"EventCode": "0x460A2",
@@ -1205,11 +1125,6 @@
"BriefDescription": "Non transactional conflict from LSU, gets reported to TEXASR"
},
{,
- "EventCode": "0xD198",
- "EventName": "PM_MRK_LSU_FLUSH_ATOMIC",
- "BriefDescription": "Quad-word loads (lq) are considered atomic because they always span at least 2 slices. If a snoop or store from another thread changes the data the load is accessing between the 2 or 3 pieces of the lq instruction, the lq will be flushed"
- },
- {,
"EventCode": "0x201E0",
"EventName": "PM_MRK_DATA_FROM_MEMORY",
"BriefDescription": "The processor's data cache was reloaded from a memory location including L4 from local remote or distant due to a marked load"
@@ -1295,11 +1210,6 @@
"BriefDescription": "Ict empty for this thread due to dispatch holds because the History Buffer was full. Could be GPR/VSR/VMR/FPR/CR/XVF; CR; XVF (XER/VSCR/FPSCR)"
},
{,
- "EventCode": "0xC894",
- "EventName": "PM_LS1_UNALIGNED_LD",
- "BriefDescription": "Load instructions whose data crosses a double-word boundary, which causes it to require an additional slice than than what normally would be required of the load of that size. If the load wraps from slice 3 to slice 0, thee is an additional 3-cycle penalty"
- },
- {,
"EventCode": "0x360A2",
"EventName": "PM_L3_L2_CO_HIT",
"BriefDescription": "L2 CO hits"
@@ -1325,11 +1235,6 @@
"BriefDescription": "L2 Castouts - Shared (Tx,Sx)"
},
{,
- "EventCode": "0xD884",
- "EventName": "PM_LSU3_SET_MPRED",
- "BriefDescription": "Set prediction(set-p) miss. The entry was not found in the Set prediction table"
- },
- {,
"EventCode": "0x26092",
"EventName": "PM_L2_LD_MISS_64B",
"BriefDescription": "All successful D-side load dispatches that were an L2 miss (NOT Sx,Tx,Mx) for this thread and the RC calculated the request should be for 64B(i.e., M=1)"
@@ -1362,12 +1267,12 @@
{,
"EventCode": "0xD8A8",
"EventName": "PM_ISLB_MISS",
- "BriefDescription": "Instruction SLB miss - Total of all segment sizes"
+ "BriefDescription": "Instruction SLB Miss - Total of all segment sizes"
},
{,
- "EventCode": "0xD19C",
- "EventName": "PM_MRK_LSU_FLUSH_RELAUNCH_MISS",
- "BriefDescription": "If a load that has already returned data and has to relaunch for any reason then gets a miss (erat, setp, data cache), it will often be flushed at relaunch time because the data might be inconsistent"
+ "EventCode": "0x368AE",
+ "EventName": "PM_L3_P1_CO_RTY",
+ "BriefDescription": "L3 CO received retry port 1 (memory only), every retry counted"
},
{,
"EventCode": "0x260A2",
@@ -1385,6 +1290,11 @@
"BriefDescription": "Completion stall because the ISU is updating the TEXASR to keep track of the nested tbegin. This is a short delay, and it includes ROT"
},
{,
+ "EventCode": "0xC084",
+ "EventName": "PM_LS2_LD_VECTOR_FIN",
+ "BriefDescription": ""
+ },
+ {,
"EventCode": "0x1608E",
"EventName": "PM_ST_CAUSED_FAIL",
"BriefDescription": "Non-TM Store caused any thread to fail"
@@ -1410,11 +1320,6 @@
"BriefDescription": "Continuous 16 cycle (2to1) window where this signals rotates thru sampling each CO machine busy. PMU uses this wave to then do 16 cyc count to sample total number of machs running"
},
{,
- "EventCode": "0xD084",
- "EventName": "PM_LSU2_SET_MPRED",
- "BriefDescription": "Set prediction(set-p) miss. The entry was not found in the Set prediction table"
- },
- {,
"EventCode": "0x48B8",
"EventName": "PM_BR_MPRED_TAKEN_TA",
"BriefDescription": "Conditional Branch Completed that was Mispredicted due to the Target Address Prediction from the Count Cache or Link Stack. Only XL-form branches that resolved Taken set this event."
@@ -1450,29 +1355,24 @@
"BriefDescription": "A demand load referenced a line in an active strided prefetch stream. The stream could have been allocated through the hardware prefetch mechanism or through software."
},
{,
+ "EventCode": "0x36084",
+ "EventName": "PM_L2_RCST_DISP",
+ "BriefDescription": "All D-side store dispatch attempts for this thread"
+ },
+ {,
"EventCode": "0x45054",
"EventName": "PM_FMA_CMPL",
"BriefDescription": "two flops operation completed (fmadd, fnmadd, fmsub, fnmsub) Scalar instructions only. "
},
{,
- "EventCode": "0x5090",
- "EventName": "PM_SHL_ST_DISABLE",
- "BriefDescription": "Store-Hit-Load Table Read Hit with entry Disabled (entry was disabled due to the entry shown to not prevent the flush)"
- },
- {,
"EventCode": "0x201E8",
"EventName": "PM_THRESH_EXC_512",
"BriefDescription": "Threshold counter exceeded a value of 512"
},
{,
- "EventCode": "0x5084",
- "EventName": "PM_DECODE_FUSION_EXT_ADD",
- "BriefDescription": "32-bit extended addition"
- },
- {,
"EventCode": "0x36080",
"EventName": "PM_L2_INST",
- "BriefDescription": "All successful I-side dispatches for this thread (excludes i_l2mru_tch reqs)."
+ "BriefDescription": "All successful I-side dispatches for this thread (excludes i_l2mru_tch reqs)"
},
{,
"EventCode": "0x3504C",
@@ -1555,21 +1455,11 @@
"BriefDescription": "Memory Read With Intent to Modify for this thread"
},
{,
- "EventCode": "0x26882",
- "EventName": "PM_L2_DC_INV",
- "BriefDescription": "D-cache invalidates sent over the reload bus to the core"
- },
- {,
"EventCode": "0xC090",
"EventName": "PM_LSU_STCX",
"BriefDescription": "STCX sent to nest, i.e. total"
},
{,
- "EventCode": "0xD080",
- "EventName": "PM_LSU0_SET_MPRED",
- "BriefDescription": "Set prediction(set-p) miss. The entry was not found in the Set prediction table"
- },
- {,
"EventCode": "0x2C120",
"EventName": "PM_MRK_DATA_FROM_L2_NO_CONFLICT",
"BriefDescription": "The processor's data cache was reloaded from local core's L2 without conflict due to a marked load"
@@ -1610,11 +1500,6 @@
"BriefDescription": "A Page Table Entry was loaded into the TLB from local core's L2 without conflict due to a instruction side request"
},
{,
- "EventCode": "0xD9A0",
- "EventName": "PM_MRK_LSU_FLUSH_LHL_SHL",
- "BriefDescription": "The instruction was flushed because of a sequential load/store consistency. If a load or store hits on an older load that has either been snooped (for loads) or has stale data (for stores)."
- },
- {,
"EventCode": "0x35042",
"EventName": "PM_IPTEG_FROM_L3_DISP_CONFLICT",
"BriefDescription": "A Page Table Entry was loaded into the TLB from local core's L3 with dispatch conflict due to a instruction side request"
@@ -1692,7 +1577,7 @@
{,
"EventCode": "0x2001A",
"EventName": "PM_NTC_ALL_FIN",
- "BriefDescription": "Cycles after all instructions have finished to group completed"
+ "BriefDescription": "Cycles after instruction finished to instruction completed."
},
{,
"EventCode": "0x3005A",
@@ -1710,6 +1595,11 @@
"BriefDescription": "ls1 l1 tm cam cancel"
},
{,
+ "EventCode": "0x268AE",
+ "EventName": "PM_L3_P3_PF_RTY",
+ "BriefDescription": "L3 PF received retry port 3, every retry counted"
+ },
+ {,
"EventCode": "0xE884",
"EventName": "PM_LS1_ERAT_MISS_PREF",
"BriefDescription": "LS1 Erat miss due to prefetch"
@@ -1742,7 +1632,7 @@
{,
"EventCode": "0x160B6",
"EventName": "PM_L3_WI0_BUSY",
- "BriefDescription": "Rotating sample of 8 WI valid"
+ "BriefDescription": "Rotating sample of 8 WI valid (duplicate)"
},
{,
"EventCode": "0x368AC",
@@ -1790,9 +1680,9 @@
"BriefDescription": "L2 guess system (VGS or RNS) and guess was correct (ie data beyond-group)"
},
{,
- "EventCode": "0x589C",
- "EventName": "PM_PTESYNC",
- "BriefDescription": "ptesync instruction counted when the instruction is decoded and transmitted"
+ "EventCode": "0x260AE",
+ "EventName": "PM_L3_P2_PF_RTY",
+ "BriefDescription": "L3 PF received retry port 2, every retry counted"
},
{,
"EventCode": "0x26086",
@@ -1825,6 +1715,11 @@
"BriefDescription": "Store-Hit-Load Table Read Hit with entry Enabled"
},
{,
+ "EventCode": "0x46882",
+ "EventName": "PM_L2_ST_HIT",
+ "BriefDescription": "All successful D-side store dispatches for this thread that were L2 hits"
+ },
+ {,
"EventCode": "0x360AC",
"EventName": "PM_L3_SN0_BUSY",
"BriefDescription": "Lifetime, sample of snooper machine 0 valid"
@@ -1845,11 +1740,6 @@
"BriefDescription": "All successful D-Side Store dispatches that were an L2 miss for this thread"
},
{,
- "EventCode": "0xF8B4",
- "EventName": "PM_DC_PREF_XCONS_ALLOC",
- "BriefDescription": "Prefetch stream allocated in the Ultra conservative phase by either the hardware prefetch mechanism or software prefetch"
- },
- {,
"EventCode": "0x35048",
"EventName": "PM_IPTEG_FROM_DL2L3_SHR",
"BriefDescription": "A Page Table Entry was loaded into the TLB with Shared (S) data from another chip's L2 or L3 on a different Node or Group (Distant), as this chip due to a instruction side request"
@@ -1970,11 +1860,6 @@
"BriefDescription": "Cycles thread running at priority level 2 or 3"
},
{,
- "EventCode": "0x10134",
- "EventName": "PM_MRK_ST_DONE_L2",
- "BriefDescription": "marked store completed in L2 ( RC machine done)"
- },
- {,
"EventCode": "0x368B2",
"EventName": "PM_L3_GRP_GUESS_WRONG_HIGH",
"BriefDescription": "Initial scope=group (GS or NNS) but data from local node. Prediction too high"
@@ -2005,11 +1890,6 @@
"BriefDescription": "L2 guess grp (GS or NNS) and guess was not correct (ie data on-chip OR beyond-group)"
},
{,
- "EventCode": "0x368AE",
- "EventName": "PM_L3_P1_CO_RTY",
- "BriefDescription": "L3 CO received retry port 1 (memory only), every retry counted"
- },
- {,
"EventCode": "0xC0AC",
"EventName": "PM_LSU_FLUSH_EMSH",
"BriefDescription": "An ERAT miss was detected after a set-p hit. Erat tracker indicates fail due to tlbmiss and the instruction gets flushed because the instruction was working on the wrong address"
@@ -2035,11 +1915,6 @@
"BriefDescription": "RC requests that were on group (aka nodel) pump attempts"
},
{,
- "EventCode": "0xF0B0",
- "EventName": "PM_L3_LD_PREF",
- "BriefDescription": "L3 load prefetch, sourced from a hardware or software stream, was sent to the nest"
- },
- {,
"EventCode": "0x16080",
"EventName": "PM_L2_LD",
"BriefDescription": "All successful D-side Load dispatches for this thread (L2 miss + L2 hits)"
@@ -2050,6 +1925,11 @@
"BriefDescription": "Math flop instruction completed"
},
{,
+ "EventCode": "0xC080",
+ "EventName": "PM_LS0_LD_VECTOR_FIN",
+ "BriefDescription": ""
+ },
+ {,
"EventCode": "0x368B0",
"EventName": "PM_L3_P1_SYS_PUMP",
"BriefDescription": "L3 PF sent with sys scope port 1, counts even retried requests"
@@ -2120,11 +2000,6 @@
"BriefDescription": "Conditional Branch Completed in which the HW correctly predicted the direction as taken. Counted at completion time"
},
{,
- "EventCode": "0xF0B8",
- "EventName": "PM_LS0_UNALIGNED_ST",
- "BriefDescription": "Store instructions whose data crosses a double-word boundary, which causes it to require an additional slice than than what normally would be required of the Store of that size. If the Store wraps from slice 3 to slice 0, thee is an additional 3-cycle penalty"
- },
- {,
"EventCode": "0x20132",
"EventName": "PM_MRK_DFU_FIN",
"BriefDescription": "Decimal Unit marked Instruction Finish"
@@ -2140,6 +2015,11 @@
"BriefDescription": "Effective Address alias flush : no EA match but Real Address match. If the data has not yet been returned for this load, the instruction will just be rejected, but if it has returned data, it will be flushed"
},
{,
+ "EventCode": "0x16084",
+ "EventName": "PM_L2_RCLD_DISP",
+ "BriefDescription": "All I-or-D side load dispatch attempts for this thread (excludes i_l2mru_tch_reqs)"
+ },
+ {,
"EventCode": "0x3F150",
"EventName": "PM_MRK_ST_DRAIN_TO_L2DISP_CYC",
"BriefDescription": "cycles to drain st from core to L2"
@@ -2225,11 +2105,6 @@
"BriefDescription": "Prefetch Canceled due to page boundary"
},
{,
- "EventCode": "0xF09C",
- "EventName": "PM_SLB_TABLEWALK_CYC",
- "BriefDescription": "Cycles when a tablewalk is pending on this thread on the SLB table"
- },
- {,
"EventCode": "0x460AA",
"EventName": "PM_L3_P0_CO_L31",
"BriefDescription": "L3 CO to L3.1 (LCO) port 0 with or without data"
@@ -2247,10 +2122,10 @@
{,
"EventCode": "0x46082",
"EventName": "PM_L2_ST_DISP",
- "BriefDescription": "All successful D-side store dispatches for this thread "
+ "BriefDescription": "All successful D-side store dispatches for this thread (L2 miss + L2 hits)"
},
{,
- "EventCode": "0x4609E",
+ "EventCode": "0x36880",
"EventName": "PM_L2_INST_MISS",
"BriefDescription": "All successful I-side dispatches that were an L2 miss for this thread (excludes i_l2mru_tch reqs)"
},
@@ -2340,9 +2215,9 @@
"BriefDescription": "All ISU rejects"
},
{,
- "EventCode": "0x46882",
- "EventName": "PM_L2_ST_HIT",
- "BriefDescription": "All successful D-side store dispatches for this thread that were L2 hits"
+ "EventCode": "0xC884",
+ "EventName": "PM_LS3_LD_VECTOR_FIN",
+ "BriefDescription": ""
},
{,
"EventCode": "0x360A8",
@@ -2360,11 +2235,6 @@
"BriefDescription": "Asserts when a i=1 store op is sent to the nest. No record of issue pipe (LS0/LS1) is maintained so this is for both pipes. Probably don't need separate LS0 and LS1"
},
{,
- "EventCode": "0xD880",
- "EventName": "PM_LSU1_SET_MPRED",
- "BriefDescription": "Set prediction(set-p) miss. The entry was not found in the Set prediction table"
- },
- {,
"EventCode": "0xD0B8",
"EventName": "PM_LSU_LMQ_FULL_CYC",
"BriefDescription": "Counts the number of cycles the LMQ is full"
@@ -2389,4 +2259,4 @@
"EventName": "PM_L3_PF_USAGE",
"BriefDescription": "Rotating sample of 32 PF actives"
}
-]
+] \ No newline at end of file
diff --git a/tools/perf/pmu-events/arch/powerpc/power9/pipeline.json b/tools/perf/pmu-events/arch/powerpc/power9/pipeline.json
index bc2db636dabf..5af1abbe82c4 100644
--- a/tools/perf/pmu-events/arch/powerpc/power9/pipeline.json
+++ b/tools/perf/pmu-events/arch/powerpc/power9/pipeline.json
@@ -125,6 +125,11 @@
"BriefDescription": "Overflow from counter 5"
},
{,
+ "EventCode": "0x4505E",
+ "EventName": "PM_FLOP_CMPL",
+ "BriefDescription": "Floating Point Operation Finished"
+ },
+ {,
"EventCode": "0x2C018",
"EventName": "PM_CMPLU_STALL_DMISS_L21_L31",
"BriefDescription": "Completion stall by Dcache miss which resolved on chip ( excluding local L2/L3)"
@@ -390,11 +395,6 @@
"BriefDescription": "Ict empty for this thread due to branch mispred"
},
{,
- "EventCode": "0x3405E",
- "EventName": "PM_IFETCH_THROTTLE",
- "BriefDescription": "Cycles in which Instruction fetch throttle was active."
- },
- {,
"EventCode": "0x1F148",
"EventName": "PM_MRK_DPTEG_FROM_ON_CHIP_CACHE",
"BriefDescription": "A Page Table Entry was loaded into the TLB either shared or modified data from another core's L2/L3 on the same chip due to a marked data side request.. When using Radix Page Translation, this count excludes PDE reloads. Only PTE reloads are included"
@@ -422,7 +422,7 @@
{,
"EventCode": "0xD0A8",
"EventName": "PM_DSLB_MISS",
- "BriefDescription": "Data SLB Miss - Total of all segment sizes"
+ "BriefDescription": "gate_and(sd_pc_c0_comp_valid AND sd_pc_c0_comp_thread(0:1)=tid,sd_pc_c0_comp_ppc_count(0:3)) + gate_and(sd_pc_c1_comp_valid AND sd_pc_c1_comp_thread(0:1)=tid,sd_pc_c1_comp_ppc_count(0:3))"
},
{,
"EventCode": "0x4C058",
@@ -549,4 +549,4 @@
"EventName": "PM_MRK_DATA_FROM_L21_SHR_CYC",
"BriefDescription": "Duration in cycles to reload with Shared (S) data from another core's L2 on the same chip due to a marked load"
}
-]
+] \ No newline at end of file
diff --git a/tools/perf/pmu-events/arch/powerpc/power9/pmc.json b/tools/perf/pmu-events/arch/powerpc/power9/pmc.json
index 3ef8a10aac86..d0b89f930567 100644
--- a/tools/perf/pmu-events/arch/powerpc/power9/pmc.json
+++ b/tools/perf/pmu-events/arch/powerpc/power9/pmc.json
@@ -119,4 +119,4 @@
"EventName": "PM_1FLOP_CMPL",
"BriefDescription": "one flop (fadd, fmul, fsub, fcmp, fsel, fabs, fnabs, fres, fsqrte, fneg) operation completed"
}
-]
+] \ No newline at end of file
diff --git a/tools/perf/pmu-events/arch/powerpc/power9/translation.json b/tools/perf/pmu-events/arch/powerpc/power9/translation.json
index 8c0f12024afa..bc8e03d7a6b0 100644
--- a/tools/perf/pmu-events/arch/powerpc/power9/translation.json
+++ b/tools/perf/pmu-events/arch/powerpc/power9/translation.json
@@ -90,11 +90,6 @@
"BriefDescription": "stcx failed"
},
{,
- "EventCode": "0x20112",
- "EventName": "PM_MRK_NTF_FIN",
- "BriefDescription": "Marked next to finish instruction finished"
- },
- {,
"EventCode": "0x300F0",
"EventName": "PM_ST_MISS_L1",
"BriefDescription": "Store Missed L1"
diff --git a/tools/perf/pmu-events/arch/x86/mapfile.csv b/tools/perf/pmu-events/arch/x86/mapfile.csv
index fe1a2c47cabf..93656f2fd53a 100644
--- a/tools/perf/pmu-events/arch/x86/mapfile.csv
+++ b/tools/perf/pmu-events/arch/x86/mapfile.csv
@@ -23,10 +23,7 @@ GenuineIntel-6-1E,v2,nehalemep,core
GenuineIntel-6-1F,v2,nehalemep,core
GenuineIntel-6-1A,v2,nehalemep,core
GenuineIntel-6-2E,v2,nehalemex,core
-GenuineIntel-6-4E,v24,skylake,core
-GenuineIntel-6-5E,v24,skylake,core
-GenuineIntel-6-8E,v24,skylake,core
-GenuineIntel-6-9E,v24,skylake,core
+GenuineIntel-6-[4589]E,v24,skylake,core
GenuineIntel-6-37,v13,silvermont,core
GenuineIntel-6-4D,v13,silvermont,core
GenuineIntel-6-4C,v13,silvermont,core
diff --git a/tools/perf/pmu-events/jevents.c b/tools/perf/pmu-events/jevents.c
index 9eb7047bafe4..b578aa26e375 100644
--- a/tools/perf/pmu-events/jevents.c
+++ b/tools/perf/pmu-events/jevents.c
@@ -116,6 +116,43 @@ static void fixdesc(char *s)
*e = 0;
}
+/* Add escapes for '\' so they are proper C strings. */
+static char *fixregex(char *s)
+{
+ int len = 0;
+ int esc_count = 0;
+ char *fixed = NULL;
+ char *p, *q;
+
+ /* Count the number of '\' in string */
+ for (p = s; *p; p++) {
+ ++len;
+ if (*p == '\\')
+ ++esc_count;
+ }
+
+ if (esc_count == 0)
+ return s;
+
+ /* allocate space for a new string */
+ fixed = (char *) malloc(len + 1);
+ if (!fixed)
+ return NULL;
+
+ /* copy over the characters */
+ q = fixed;
+ for (p = s; *p; p++) {
+ if (*p == '\\') {
+ *q = '\\';
+ ++q;
+ }
+ *q = *p;
+ ++q;
+ }
+ *q = '\0';
+ return fixed;
+}
+
static struct msrmap {
const char *num;
const char *pname;
@@ -648,7 +685,7 @@ static int process_mapfile(FILE *outfp, char *fpath)
}
line[strlen(line)-1] = '\0';
- cpuid = strtok_r(p, ",", &save);
+ cpuid = fixregex(strtok_r(p, ",", &save));
version = strtok_r(NULL, ",", &save);
fname = strtok_r(NULL, ",", &save);
type = strtok_r(NULL, ",", &save);
diff --git a/tools/perf/tests/attr.c b/tools/perf/tests/attr.c
index 0e1367f90af5..97f64ad7fa08 100644
--- a/tools/perf/tests/attr.c
+++ b/tools/perf/tests/attr.c
@@ -124,6 +124,12 @@ static int store_event(struct perf_event_attr *attr, pid_t pid, int cpu,
WRITE_ASS(exclude_guest, "d");
WRITE_ASS(exclude_callchain_kernel, "d");
WRITE_ASS(exclude_callchain_user, "d");
+ WRITE_ASS(mmap2, "d");
+ WRITE_ASS(comm_exec, "d");
+ WRITE_ASS(context_switch, "d");
+ WRITE_ASS(write_backward, "d");
+ WRITE_ASS(namespaces, "d");
+ WRITE_ASS(use_clockid, "d");
WRITE_ASS(wakeup_events, PRIu32);
WRITE_ASS(bp_type, PRIu32);
WRITE_ASS(config1, "llu");
diff --git a/tools/perf/tests/backward-ring-buffer.c b/tools/perf/tests/backward-ring-buffer.c
index 71b9a0b613d2..4035d43523c3 100644
--- a/tools/perf/tests/backward-ring-buffer.c
+++ b/tools/perf/tests/backward-ring-buffer.c
@@ -33,8 +33,8 @@ static int count_samples(struct perf_evlist *evlist, int *sample_count,
for (i = 0; i < evlist->nr_mmaps; i++) {
union perf_event *event;
- perf_mmap__read_catchup(&evlist->backward_mmap[i]);
- while ((event = perf_mmap__read_backward(&evlist->backward_mmap[i])) != NULL) {
+ perf_mmap__read_catchup(&evlist->overwrite_mmap[i]);
+ while ((event = perf_mmap__read_backward(&evlist->overwrite_mmap[i])) != NULL) {
const u32 type = event->header.type;
switch (type) {
@@ -59,7 +59,7 @@ static int do_test(struct perf_evlist *evlist, int mmap_pages,
int err;
char sbuf[STRERR_BUFSIZE];
- err = perf_evlist__mmap(evlist, mmap_pages, true);
+ err = perf_evlist__mmap(evlist, mmap_pages);
if (err < 0) {
pr_debug("perf_evlist__mmap: %s\n",
str_error_r(errno, sbuf, sizeof(sbuf)));
diff --git a/tools/perf/tests/bp_signal.c b/tools/perf/tests/bp_signal.c
index 335b695f4970..a467615c5a0e 100644
--- a/tools/perf/tests/bp_signal.c
+++ b/tools/perf/tests/bp_signal.c
@@ -296,7 +296,7 @@ bool test__bp_signal_is_supported(void)
* instruction breakpoint using the perf event interface.
* Once it's there we can release this.
*/
-#ifdef __powerpc__
+#if defined(__powerpc__) || defined(__s390x__)
return false;
#else
return true;
diff --git a/tools/perf/tests/bpf.c b/tools/perf/tests/bpf.c
index 34c22cdf4d5d..c433dd30975a 100644
--- a/tools/perf/tests/bpf.c
+++ b/tools/perf/tests/bpf.c
@@ -167,7 +167,7 @@ static int do_test(struct bpf_object *obj, int (*func)(void),
goto out_delete_evlist;
}
- err = perf_evlist__mmap(evlist, opts.mmap_pages, false);
+ err = perf_evlist__mmap(evlist, opts.mmap_pages);
if (err < 0) {
pr_debug("perf_evlist__mmap: %s\n",
str_error_r(errno, sbuf, sizeof(sbuf)));
diff --git a/tools/perf/tests/code-reading.c b/tools/perf/tests/code-reading.c
index fcc8984bc329..3bf7b145b826 100644
--- a/tools/perf/tests/code-reading.c
+++ b/tools/perf/tests/code-reading.c
@@ -639,7 +639,7 @@ static int do_test_code_reading(bool try_kcore)
break;
}
- ret = perf_evlist__mmap(evlist, UINT_MAX, false);
+ ret = perf_evlist__mmap(evlist, UINT_MAX);
if (ret < 0) {
pr_debug("perf_evlist__mmap failed\n");
goto out_put;
diff --git a/tools/perf/tests/keep-tracking.c b/tools/perf/tests/keep-tracking.c
index 842d33637a18..c46530918938 100644
--- a/tools/perf/tests/keep-tracking.c
+++ b/tools/perf/tests/keep-tracking.c
@@ -95,7 +95,7 @@ int test__keep_tracking(struct test *test __maybe_unused, int subtest __maybe_un
goto out_err;
}
- CHECK__(perf_evlist__mmap(evlist, UINT_MAX, false));
+ CHECK__(perf_evlist__mmap(evlist, UINT_MAX));
/*
* First, test that a 'comm' event can be found when the event is
diff --git a/tools/perf/tests/mmap-basic.c b/tools/perf/tests/mmap-basic.c
index 5a8bf318f8a7..c0e971da965c 100644
--- a/tools/perf/tests/mmap-basic.c
+++ b/tools/perf/tests/mmap-basic.c
@@ -94,7 +94,7 @@ int test__basic_mmap(struct test *test __maybe_unused, int subtest __maybe_unuse
expected_nr_events[i] = 1 + rand() % 127;
}
- if (perf_evlist__mmap(evlist, 128, true) < 0) {
+ if (perf_evlist__mmap(evlist, 128) < 0) {
pr_debug("failed to mmap events: %d (%s)\n", errno,
str_error_r(errno, sbuf, sizeof(sbuf)));
goto out_delete_evlist;
diff --git a/tools/perf/tests/openat-syscall-tp-fields.c b/tools/perf/tests/openat-syscall-tp-fields.c
index d9619d265314..97c9407d02a0 100644
--- a/tools/perf/tests/openat-syscall-tp-fields.c
+++ b/tools/perf/tests/openat-syscall-tp-fields.c
@@ -64,7 +64,7 @@ int test__syscall_openat_tp_fields(struct test *test __maybe_unused, int subtest
goto out_delete_evlist;
}
- err = perf_evlist__mmap(evlist, UINT_MAX, false);
+ err = perf_evlist__mmap(evlist, UINT_MAX);
if (err < 0) {
pr_debug("perf_evlist__mmap: %s\n",
str_error_r(errno, sbuf, sizeof(sbuf)));
diff --git a/tools/perf/tests/perf-record.c b/tools/perf/tests/perf-record.c
index c34904d37705..0afafab85238 100644
--- a/tools/perf/tests/perf-record.c
+++ b/tools/perf/tests/perf-record.c
@@ -141,7 +141,7 @@ int test__PERF_RECORD(struct test *test __maybe_unused, int subtest __maybe_unus
* fds in the same CPU to be injected in the same mmap ring buffer
* (using ioctl(PERF_EVENT_IOC_SET_OUTPUT)).
*/
- err = perf_evlist__mmap(evlist, opts.mmap_pages, false);
+ err = perf_evlist__mmap(evlist, opts.mmap_pages);
if (err < 0) {
pr_debug("perf_evlist__mmap: %s\n",
str_error_r(errno, sbuf, sizeof(sbuf)));
diff --git a/tools/perf/tests/sw-clock.c b/tools/perf/tests/sw-clock.c
index 725a196991a8..f6c72f915d48 100644
--- a/tools/perf/tests/sw-clock.c
+++ b/tools/perf/tests/sw-clock.c
@@ -78,7 +78,7 @@ static int __test__sw_clock_freq(enum perf_sw_ids clock_id)
goto out_delete_evlist;
}
- err = perf_evlist__mmap(evlist, 128, true);
+ err = perf_evlist__mmap(evlist, 128);
if (err < 0) {
pr_debug("failed to mmap event: %d (%s)\n", errno,
str_error_r(errno, sbuf, sizeof(sbuf)));
diff --git a/tools/perf/tests/switch-tracking.c b/tools/perf/tests/switch-tracking.c
index 7d3f4bf9534f..33e00295a972 100644
--- a/tools/perf/tests/switch-tracking.c
+++ b/tools/perf/tests/switch-tracking.c
@@ -449,7 +449,7 @@ int test__switch_tracking(struct test *test __maybe_unused, int subtest __maybe_
goto out;
}
- err = perf_evlist__mmap(evlist, UINT_MAX, false);
+ err = perf_evlist__mmap(evlist, UINT_MAX);
if (err) {
pr_debug("perf_evlist__mmap failed!\n");
goto out_err;
diff --git a/tools/perf/tests/task-exit.c b/tools/perf/tests/task-exit.c
index 89c8e1604ca7..01b62b81751b 100644
--- a/tools/perf/tests/task-exit.c
+++ b/tools/perf/tests/task-exit.c
@@ -101,7 +101,7 @@ int test__task_exit(struct test *test __maybe_unused, int subtest __maybe_unused
goto out_delete_evlist;
}
- if (perf_evlist__mmap(evlist, 128, true) < 0) {
+ if (perf_evlist__mmap(evlist, 128) < 0) {
pr_debug("failed to mmap events: %d (%s)\n", errno,
str_error_r(errno, sbuf, sizeof(sbuf)));
goto out_delete_evlist;
diff --git a/tools/perf/ui/browsers/annotate.c b/tools/perf/ui/browsers/annotate.c
index 8f7f59d1a2b5..03b7363a49c9 100644
--- a/tools/perf/ui/browsers/annotate.c
+++ b/tools/perf/ui/browsers/annotate.c
@@ -25,16 +25,10 @@ struct disasm_line_samples {
#define IPC_WIDTH 6
#define CYCLES_WIDTH 6
-struct browser_disasm_line {
- struct rb_node rb_node;
- u32 idx;
- int idx_asm;
- int jump_sources;
- /*
- * actual length of this array is saved on the nr_events field
- * of the struct annotate_browser
- */
- struct disasm_line_samples samples[1];
+struct browser_line {
+ u32 idx;
+ int idx_asm;
+ int jump_sources;
};
static struct annotate_browser_opt {
@@ -53,39 +47,43 @@ static struct annotate_browser_opt {
struct arch;
struct annotate_browser {
- struct ui_browser b;
- struct rb_root entries;
- struct rb_node *curr_hot;
- struct disasm_line *selection;
- struct disasm_line **offsets;
- struct arch *arch;
- int nr_events;
- u64 start;
- int nr_asm_entries;
- int nr_entries;
- int max_jump_sources;
- int nr_jumps;
- bool searching_backwards;
- bool have_cycles;
- u8 addr_width;
- u8 jumps_width;
- u8 target_width;
- u8 min_addr_width;
- u8 max_addr_width;
- char search_bf[128];
+ struct ui_browser b;
+ struct rb_root entries;
+ struct rb_node *curr_hot;
+ struct annotation_line *selection;
+ struct annotation_line **offsets;
+ struct arch *arch;
+ int nr_events;
+ u64 start;
+ int nr_asm_entries;
+ int nr_entries;
+ int max_jump_sources;
+ int nr_jumps;
+ bool searching_backwards;
+ bool have_cycles;
+ u8 addr_width;
+ u8 jumps_width;
+ u8 target_width;
+ u8 min_addr_width;
+ u8 max_addr_width;
+ char search_bf[128];
};
-static inline struct browser_disasm_line *disasm_line__browser(struct disasm_line *dl)
+static inline struct browser_line *browser_line(struct annotation_line *al)
{
- return (struct browser_disasm_line *)(dl + 1);
+ void *ptr = al;
+
+ ptr = container_of(al, struct disasm_line, al);
+ return ptr - sizeof(struct browser_line);
}
static bool disasm_line__filter(struct ui_browser *browser __maybe_unused,
void *entry)
{
if (annotate_browser__opts.hide_src_code) {
- struct disasm_line *dl = list_entry(entry, struct disasm_line, node);
- return dl->offset == -1;
+ struct annotation_line *al = list_entry(entry, struct annotation_line, node);
+
+ return al->offset == -1;
}
return false;
@@ -120,11 +118,37 @@ static int annotate_browser__cycles_width(struct annotate_browser *ab)
return ab->have_cycles ? IPC_WIDTH + CYCLES_WIDTH : 0;
}
+static void disasm_line__write(struct disasm_line *dl, struct ui_browser *browser,
+ char *bf, size_t size)
+{
+ if (dl->ins.ops && dl->ins.ops->scnprintf) {
+ if (ins__is_jump(&dl->ins)) {
+ bool fwd = dl->ops.target.offset > dl->al.offset;
+
+ ui_browser__write_graph(browser, fwd ? SLSMG_DARROW_CHAR :
+ SLSMG_UARROW_CHAR);
+ SLsmg_write_char(' ');
+ } else if (ins__is_call(&dl->ins)) {
+ ui_browser__write_graph(browser, SLSMG_RARROW_CHAR);
+ SLsmg_write_char(' ');
+ } else if (ins__is_ret(&dl->ins)) {
+ ui_browser__write_graph(browser, SLSMG_LARROW_CHAR);
+ SLsmg_write_char(' ');
+ } else {
+ ui_browser__write_nstring(browser, " ", 2);
+ }
+ } else {
+ ui_browser__write_nstring(browser, " ", 2);
+ }
+
+ disasm_line__scnprintf(dl, bf, size, !annotate_browser__opts.use_offset);
+}
+
static void annotate_browser__write(struct ui_browser *browser, void *entry, int row)
{
struct annotate_browser *ab = container_of(browser, struct annotate_browser, b);
- struct disasm_line *dl = list_entry(entry, struct disasm_line, node);
- struct browser_disasm_line *bdl = disasm_line__browser(dl);
+ struct annotation_line *al = list_entry(entry, struct annotation_line, node);
+ struct browser_line *bl = browser_line(al);
bool current_entry = ui_browser__is_current_entry(browser, row);
bool change_color = (!annotate_browser__opts.hide_src_code &&
(!current_entry || (browser->use_navkeypressed &&
@@ -137,32 +161,32 @@ static void annotate_browser__write(struct ui_browser *browser, void *entry, int
bool show_title = false;
for (i = 0; i < ab->nr_events; i++) {
- if (bdl->samples[i].percent > percent_max)
- percent_max = bdl->samples[i].percent;
+ if (al->samples[i].percent > percent_max)
+ percent_max = al->samples[i].percent;
}
- if ((row == 0) && (dl->offset == -1 || percent_max == 0.0)) {
+ if ((row == 0) && (al->offset == -1 || percent_max == 0.0)) {
if (ab->have_cycles) {
- if (dl->ipc == 0.0 && dl->cycles == 0)
+ if (al->ipc == 0.0 && al->cycles == 0)
show_title = true;
} else
show_title = true;
}
- if (dl->offset != -1 && percent_max != 0.0) {
+ if (al->offset != -1 && percent_max != 0.0) {
for (i = 0; i < ab->nr_events; i++) {
ui_browser__set_percent_color(browser,
- bdl->samples[i].percent,
+ al->samples[i].percent,
current_entry);
if (annotate_browser__opts.show_total_period) {
ui_browser__printf(browser, "%11" PRIu64 " ",
- bdl->samples[i].he.period);
+ al->samples[i].he.period);
} else if (annotate_browser__opts.show_nr_samples) {
ui_browser__printf(browser, "%6" PRIu64 " ",
- bdl->samples[i].he.nr_samples);
+ al->samples[i].he.nr_samples);
} else {
ui_browser__printf(browser, "%6.2f ",
- bdl->samples[i].percent);
+ al->samples[i].percent);
}
}
} else {
@@ -177,16 +201,16 @@ static void annotate_browser__write(struct ui_browser *browser, void *entry, int
}
}
if (ab->have_cycles) {
- if (dl->ipc)
- ui_browser__printf(browser, "%*.2f ", IPC_WIDTH - 1, dl->ipc);
+ if (al->ipc)
+ ui_browser__printf(browser, "%*.2f ", IPC_WIDTH - 1, al->ipc);
else if (!show_title)
ui_browser__write_nstring(browser, " ", IPC_WIDTH);
else
ui_browser__printf(browser, "%*s ", IPC_WIDTH - 1, "IPC");
- if (dl->cycles)
+ if (al->cycles)
ui_browser__printf(browser, "%*" PRIu64 " ",
- CYCLES_WIDTH - 1, dl->cycles);
+ CYCLES_WIDTH - 1, al->cycles);
else if (!show_title)
ui_browser__write_nstring(browser, " ", CYCLES_WIDTH);
else
@@ -199,19 +223,19 @@ static void annotate_browser__write(struct ui_browser *browser, void *entry, int
if (!browser->navkeypressed)
width += 1;
- if (!*dl->line)
+ if (!*al->line)
ui_browser__write_nstring(browser, " ", width - pcnt_width - cycles_width);
- else if (dl->offset == -1) {
- if (dl->line_nr && annotate_browser__opts.show_linenr)
+ else if (al->offset == -1) {
+ if (al->line_nr && annotate_browser__opts.show_linenr)
printed = scnprintf(bf, sizeof(bf), "%-*d ",
- ab->addr_width + 1, dl->line_nr);
+ ab->addr_width + 1, al->line_nr);
else
printed = scnprintf(bf, sizeof(bf), "%*s ",
ab->addr_width, " ");
ui_browser__write_nstring(browser, bf, printed);
- ui_browser__write_nstring(browser, dl->line, width - printed - pcnt_width - cycles_width + 1);
+ ui_browser__write_nstring(browser, al->line, width - printed - pcnt_width - cycles_width + 1);
} else {
- u64 addr = dl->offset;
+ u64 addr = al->offset;
int color = -1;
if (!annotate_browser__opts.use_offset)
@@ -220,13 +244,13 @@ static void annotate_browser__write(struct ui_browser *browser, void *entry, int
if (!annotate_browser__opts.use_offset) {
printed = scnprintf(bf, sizeof(bf), "%" PRIx64 ": ", addr);
} else {
- if (bdl->jump_sources) {
+ if (bl->jump_sources) {
if (annotate_browser__opts.show_nr_jumps) {
int prev;
printed = scnprintf(bf, sizeof(bf), "%*d ",
ab->jumps_width,
- bdl->jump_sources);
- prev = annotate_browser__set_jumps_percent_color(ab, bdl->jump_sources,
+ bl->jump_sources);
+ prev = annotate_browser__set_jumps_percent_color(ab, bl->jump_sources,
current_entry);
ui_browser__write_nstring(browser, bf, printed);
ui_browser__set_color(browser, prev);
@@ -245,32 +269,14 @@ static void annotate_browser__write(struct ui_browser *browser, void *entry, int
ui_browser__write_nstring(browser, bf, printed);
if (change_color)
ui_browser__set_color(browser, color);
- if (dl->ins.ops && dl->ins.ops->scnprintf) {
- if (ins__is_jump(&dl->ins)) {
- bool fwd = dl->ops.target.offset > dl->offset;
-
- ui_browser__write_graph(browser, fwd ? SLSMG_DARROW_CHAR :
- SLSMG_UARROW_CHAR);
- SLsmg_write_char(' ');
- } else if (ins__is_call(&dl->ins)) {
- ui_browser__write_graph(browser, SLSMG_RARROW_CHAR);
- SLsmg_write_char(' ');
- } else if (ins__is_ret(&dl->ins)) {
- ui_browser__write_graph(browser, SLSMG_LARROW_CHAR);
- SLsmg_write_char(' ');
- } else {
- ui_browser__write_nstring(browser, " ", 2);
- }
- } else {
- ui_browser__write_nstring(browser, " ", 2);
- }
- disasm_line__scnprintf(dl, bf, sizeof(bf), !annotate_browser__opts.use_offset);
+ disasm_line__write(disasm_line(al), browser, bf, sizeof(bf));
+
ui_browser__write_nstring(browser, bf, width - pcnt_width - cycles_width - 3 - printed);
}
if (current_entry)
- ab->selection = dl;
+ ab->selection = al;
}
static bool disasm_line__is_valid_jump(struct disasm_line *dl, struct symbol *sym)
@@ -286,7 +292,7 @@ static bool disasm_line__is_valid_jump(struct disasm_line *dl, struct symbol *sy
static bool is_fused(struct annotate_browser *ab, struct disasm_line *cursor)
{
- struct disasm_line *pos = list_prev_entry(cursor, node);
+ struct disasm_line *pos = list_prev_entry(cursor, al.node);
const char *name;
if (!pos)
@@ -306,8 +312,9 @@ static bool is_fused(struct annotate_browser *ab, struct disasm_line *cursor)
static void annotate_browser__draw_current_jump(struct ui_browser *browser)
{
struct annotate_browser *ab = container_of(browser, struct annotate_browser, b);
- struct disasm_line *cursor = ab->selection, *target;
- struct browser_disasm_line *btarget, *bcursor;
+ struct disasm_line *cursor = disasm_line(ab->selection);
+ struct annotation_line *target;
+ struct browser_line *btarget, *bcursor;
unsigned int from, to;
struct map_symbol *ms = ab->b.priv;
struct symbol *sym = ms->sym;
@@ -321,11 +328,9 @@ static void annotate_browser__draw_current_jump(struct ui_browser *browser)
return;
target = ab->offsets[cursor->ops.target.offset];
- if (!target)
- return;
- bcursor = disasm_line__browser(cursor);
- btarget = disasm_line__browser(target);
+ bcursor = browser_line(&cursor->al);
+ btarget = browser_line(target);
if (annotate_browser__opts.hide_src_code) {
from = bcursor->idx_asm;
@@ -361,12 +366,11 @@ static unsigned int annotate_browser__refresh(struct ui_browser *browser)
return ret;
}
-static int disasm__cmp(struct browser_disasm_line *a,
- struct browser_disasm_line *b, int nr_pcnt)
+static int disasm__cmp(struct annotation_line *a, struct annotation_line *b)
{
int i;
- for (i = 0; i < nr_pcnt; i++) {
+ for (i = 0; i < a->samples_nr; i++) {
if (a->samples[i].percent == b->samples[i].percent)
continue;
return a->samples[i].percent < b->samples[i].percent;
@@ -374,28 +378,27 @@ static int disasm__cmp(struct browser_disasm_line *a,
return 0;
}
-static void disasm_rb_tree__insert(struct rb_root *root, struct browser_disasm_line *bdl,
- int nr_events)
+static void disasm_rb_tree__insert(struct rb_root *root, struct annotation_line *al)
{
struct rb_node **p = &root->rb_node;
struct rb_node *parent = NULL;
- struct browser_disasm_line *l;
+ struct annotation_line *l;
while (*p != NULL) {
parent = *p;
- l = rb_entry(parent, struct browser_disasm_line, rb_node);
+ l = rb_entry(parent, struct annotation_line, rb_node);
- if (disasm__cmp(bdl, l, nr_events))
+ if (disasm__cmp(al, l))
p = &(*p)->rb_left;
else
p = &(*p)->rb_right;
}
- rb_link_node(&bdl->rb_node, parent, p);
- rb_insert_color(&bdl->rb_node, root);
+ rb_link_node(&al->rb_node, parent, p);
+ rb_insert_color(&al->rb_node, root);
}
static void annotate_browser__set_top(struct annotate_browser *browser,
- struct disasm_line *pos, u32 idx)
+ struct annotation_line *pos, u32 idx)
{
unsigned back;
@@ -404,7 +407,7 @@ static void annotate_browser__set_top(struct annotate_browser *browser,
browser->b.top_idx = browser->b.index = idx;
while (browser->b.top_idx != 0 && back != 0) {
- pos = list_entry(pos->node.prev, struct disasm_line, node);
+ pos = list_entry(pos->node.prev, struct annotation_line, node);
if (disasm_line__filter(&browser->b, &pos->node))
continue;
@@ -420,12 +423,13 @@ static void annotate_browser__set_top(struct annotate_browser *browser,
static void annotate_browser__set_rb_top(struct annotate_browser *browser,
struct rb_node *nd)
{
- struct browser_disasm_line *bpos;
- struct disasm_line *pos;
+ struct browser_line *bpos;
+ struct annotation_line *pos;
u32 idx;
- bpos = rb_entry(nd, struct browser_disasm_line, rb_node);
- pos = ((struct disasm_line *)bpos) - 1;
+ pos = rb_entry(nd, struct annotation_line, rb_node);
+ bpos = browser_line(pos);
+
idx = bpos->idx;
if (annotate_browser__opts.hide_src_code)
idx = bpos->idx_asm;
@@ -439,46 +443,35 @@ static void annotate_browser__calc_percent(struct annotate_browser *browser,
struct map_symbol *ms = browser->b.priv;
struct symbol *sym = ms->sym;
struct annotation *notes = symbol__annotation(sym);
- struct disasm_line *pos, *next;
- s64 len = symbol__size(sym);
+ struct disasm_line *pos;
browser->entries = RB_ROOT;
pthread_mutex_lock(&notes->lock);
- list_for_each_entry(pos, &notes->src->source, node) {
- struct browser_disasm_line *bpos = disasm_line__browser(pos);
- const char *path = NULL;
+ symbol__calc_percent(sym, evsel);
+
+ list_for_each_entry(pos, &notes->src->source, al.node) {
double max_percent = 0.0;
int i;
- if (pos->offset == -1) {
- RB_CLEAR_NODE(&bpos->rb_node);
+ if (pos->al.offset == -1) {
+ RB_CLEAR_NODE(&pos->al.rb_node);
continue;
}
- next = disasm__get_next_ip_line(&notes->src->source, pos);
-
- for (i = 0; i < browser->nr_events; i++) {
- struct sym_hist_entry sample;
-
- bpos->samples[i].percent = disasm__calc_percent(notes,
- evsel->idx + i,
- pos->offset,
- next ? next->offset : len,
- &path, &sample);
- bpos->samples[i].he = sample;
+ for (i = 0; i < pos->al.samples_nr; i++) {
+ struct annotation_data *sample = &pos->al.samples[i];
- if (max_percent < bpos->samples[i].percent)
- max_percent = bpos->samples[i].percent;
+ if (max_percent < sample->percent)
+ max_percent = sample->percent;
}
- if (max_percent < 0.01 && pos->ipc == 0) {
- RB_CLEAR_NODE(&bpos->rb_node);
+ if (max_percent < 0.01 && pos->al.ipc == 0) {
+ RB_CLEAR_NODE(&pos->al.rb_node);
continue;
}
- disasm_rb_tree__insert(&browser->entries, bpos,
- browser->nr_events);
+ disasm_rb_tree__insert(&browser->entries, &pos->al);
}
pthread_mutex_unlock(&notes->lock);
@@ -487,38 +480,38 @@ static void annotate_browser__calc_percent(struct annotate_browser *browser,
static bool annotate_browser__toggle_source(struct annotate_browser *browser)
{
- struct disasm_line *dl;
- struct browser_disasm_line *bdl;
+ struct annotation_line *al;
+ struct browser_line *bl;
off_t offset = browser->b.index - browser->b.top_idx;
browser->b.seek(&browser->b, offset, SEEK_CUR);
- dl = list_entry(browser->b.top, struct disasm_line, node);
- bdl = disasm_line__browser(dl);
+ al = list_entry(browser->b.top, struct annotation_line, node);
+ bl = browser_line(al);
if (annotate_browser__opts.hide_src_code) {
- if (bdl->idx_asm < offset)
- offset = bdl->idx;
+ if (bl->idx_asm < offset)
+ offset = bl->idx;
browser->b.nr_entries = browser->nr_entries;
annotate_browser__opts.hide_src_code = false;
browser->b.seek(&browser->b, -offset, SEEK_CUR);
- browser->b.top_idx = bdl->idx - offset;
- browser->b.index = bdl->idx;
+ browser->b.top_idx = bl->idx - offset;
+ browser->b.index = bl->idx;
} else {
- if (bdl->idx_asm < 0) {
+ if (bl->idx_asm < 0) {
ui_helpline__puts("Only available for assembly lines.");
browser->b.seek(&browser->b, -offset, SEEK_CUR);
return false;
}
- if (bdl->idx_asm < offset)
- offset = bdl->idx_asm;
+ if (bl->idx_asm < offset)
+ offset = bl->idx_asm;
browser->b.nr_entries = browser->nr_asm_entries;
annotate_browser__opts.hide_src_code = true;
browser->b.seek(&browser->b, -offset, SEEK_CUR);
- browser->b.top_idx = bdl->idx_asm - offset;
- browser->b.index = bdl->idx_asm;
+ browser->b.top_idx = bl->idx_asm - offset;
+ browser->b.index = bl->idx_asm;
}
return true;
@@ -543,7 +536,7 @@ static bool annotate_browser__callq(struct annotate_browser *browser,
struct hist_browser_timer *hbt)
{
struct map_symbol *ms = browser->b.priv;
- struct disasm_line *dl = browser->selection;
+ struct disasm_line *dl = disasm_line(browser->selection);
struct annotation *notes;
struct addr_map_symbol target = {
.map = ms->map,
@@ -589,10 +582,10 @@ struct disasm_line *annotate_browser__find_offset(struct annotate_browser *brows
struct disasm_line *pos;
*idx = 0;
- list_for_each_entry(pos, &notes->src->source, node) {
- if (pos->offset == offset)
+ list_for_each_entry(pos, &notes->src->source, al.node) {
+ if (pos->al.offset == offset)
return pos;
- if (!disasm_line__filter(&browser->b, &pos->node))
+ if (!disasm_line__filter(&browser->b, &pos->al.node))
++*idx;
}
@@ -601,7 +594,7 @@ struct disasm_line *annotate_browser__find_offset(struct annotate_browser *brows
static bool annotate_browser__jump(struct annotate_browser *browser)
{
- struct disasm_line *dl = browser->selection;
+ struct disasm_line *dl = disasm_line(browser->selection);
u64 offset;
s64 idx;
@@ -615,29 +608,29 @@ static bool annotate_browser__jump(struct annotate_browser *browser)
return true;
}
- annotate_browser__set_top(browser, dl, idx);
+ annotate_browser__set_top(browser, &dl->al, idx);
return true;
}
static
-struct disasm_line *annotate_browser__find_string(struct annotate_browser *browser,
+struct annotation_line *annotate_browser__find_string(struct annotate_browser *browser,
char *s, s64 *idx)
{
struct map_symbol *ms = browser->b.priv;
struct symbol *sym = ms->sym;
struct annotation *notes = symbol__annotation(sym);
- struct disasm_line *pos = browser->selection;
+ struct annotation_line *al = browser->selection;
*idx = browser->b.index;
- list_for_each_entry_continue(pos, &notes->src->source, node) {
- if (disasm_line__filter(&browser->b, &pos->node))
+ list_for_each_entry_continue(al, &notes->src->source, node) {
+ if (disasm_line__filter(&browser->b, &al->node))
continue;
++*idx;
- if (pos->line && strstr(pos->line, s) != NULL)
- return pos;
+ if (al->line && strstr(al->line, s) != NULL)
+ return al;
}
return NULL;
@@ -645,38 +638,38 @@ struct disasm_line *annotate_browser__find_string(struct annotate_browser *brows
static bool __annotate_browser__search(struct annotate_browser *browser)
{
- struct disasm_line *dl;
+ struct annotation_line *al;
s64 idx;
- dl = annotate_browser__find_string(browser, browser->search_bf, &idx);
- if (dl == NULL) {
+ al = annotate_browser__find_string(browser, browser->search_bf, &idx);
+ if (al == NULL) {
ui_helpline__puts("String not found!");
return false;
}
- annotate_browser__set_top(browser, dl, idx);
+ annotate_browser__set_top(browser, al, idx);
browser->searching_backwards = false;
return true;
}
static
-struct disasm_line *annotate_browser__find_string_reverse(struct annotate_browser *browser,
+struct annotation_line *annotate_browser__find_string_reverse(struct annotate_browser *browser,
char *s, s64 *idx)
{
struct map_symbol *ms = browser->b.priv;
struct symbol *sym = ms->sym;
struct annotation *notes = symbol__annotation(sym);
- struct disasm_line *pos = browser->selection;
+ struct annotation_line *al = browser->selection;
*idx = browser->b.index;
- list_for_each_entry_continue_reverse(pos, &notes->src->source, node) {
- if (disasm_line__filter(&browser->b, &pos->node))
+ list_for_each_entry_continue_reverse(al, &notes->src->source, node) {
+ if (disasm_line__filter(&browser->b, &al->node))
continue;
--*idx;
- if (pos->line && strstr(pos->line, s) != NULL)
- return pos;
+ if (al->line && strstr(al->line, s) != NULL)
+ return al;
}
return NULL;
@@ -684,16 +677,16 @@ struct disasm_line *annotate_browser__find_string_reverse(struct annotate_browse
static bool __annotate_browser__search_reverse(struct annotate_browser *browser)
{
- struct disasm_line *dl;
+ struct annotation_line *al;
s64 idx;
- dl = annotate_browser__find_string_reverse(browser, browser->search_bf, &idx);
- if (dl == NULL) {
+ al = annotate_browser__find_string_reverse(browser, browser->search_bf, &idx);
+ if (al == NULL) {
ui_helpline__puts("String not found!");
return false;
}
- annotate_browser__set_top(browser, dl, idx);
+ annotate_browser__set_top(browser, al, idx);
browser->searching_backwards = true;
return true;
}
@@ -899,13 +892,16 @@ show_help:
continue;
case K_ENTER:
case K_RIGHT:
+ {
+ struct disasm_line *dl = disasm_line(browser->selection);
+
if (browser->selection == NULL)
ui_helpline__puts("Huh? No selection. Report to linux-kernel@vger.kernel.org");
else if (browser->selection->offset == -1)
ui_helpline__puts("Actions are only available for assembly lines.");
- else if (!browser->selection->ins.ops)
+ else if (!dl->ins.ops)
goto show_sup_ins;
- else if (ins__is_ret(&browser->selection->ins))
+ else if (ins__is_ret(&dl->ins))
goto out;
else if (!(annotate_browser__jump(browser) ||
annotate_browser__callq(browser, evsel, hbt))) {
@@ -913,6 +909,7 @@ show_sup_ins:
ui_helpline__puts("Actions are only available for function call/return & jump/branch instructions.");
}
continue;
+ }
case 't':
if (annotate_browser__opts.show_total_period) {
annotate_browser__opts.show_total_period = false;
@@ -990,10 +987,10 @@ static void count_and_fill(struct annotate_browser *browser, u64 start, u64 end,
return;
for (offset = start; offset <= end; offset++) {
- struct disasm_line *dl = browser->offsets[offset];
+ struct annotation_line *al = browser->offsets[offset];
- if (dl)
- dl->ipc = ipc;
+ if (al)
+ al->ipc = ipc;
}
}
}
@@ -1018,13 +1015,13 @@ static void annotate__compute_ipc(struct annotate_browser *browser, size_t size,
ch = &notes->src->cycles_hist[offset];
if (ch && ch->cycles) {
- struct disasm_line *dl;
+ struct annotation_line *al;
if (ch->have_start)
count_and_fill(browser, ch->start, offset, ch);
- dl = browser->offsets[offset];
- if (dl && ch->num_aggr)
- dl->cycles = ch->cycles_aggr / ch->num_aggr;
+ al = browser->offsets[offset];
+ if (al && ch->num_aggr)
+ al->cycles = ch->cycles_aggr / ch->num_aggr;
browser->have_cycles = true;
}
}
@@ -1043,23 +1040,27 @@ static void annotate_browser__mark_jump_targets(struct annotate_browser *browser
return;
for (offset = 0; offset < size; ++offset) {
- struct disasm_line *dl = browser->offsets[offset], *dlt;
- struct browser_disasm_line *bdlt;
+ struct annotation_line *al = browser->offsets[offset];
+ struct disasm_line *dl;
+ struct browser_line *blt;
+
+ dl = disasm_line(al);
if (!disasm_line__is_valid_jump(dl, sym))
continue;
- dlt = browser->offsets[dl->ops.target.offset];
+ al = browser->offsets[dl->ops.target.offset];
+
/*
* FIXME: Oops, no jump target? Buggy disassembler? Or do we
* have to adjust to the previous offset?
*/
- if (dlt == NULL)
+ if (al == NULL)
continue;
- bdlt = disasm_line__browser(dlt);
- if (++bdlt->jump_sources > browser->max_jump_sources)
- browser->max_jump_sources = bdlt->jump_sources;
+ blt = browser_line(al);
+ if (++blt->jump_sources > browser->max_jump_sources)
+ browser->max_jump_sources = blt->jump_sources;
++browser->nr_jumps;
}
@@ -1078,7 +1079,7 @@ int symbol__tui_annotate(struct symbol *sym, struct map *map,
struct perf_evsel *evsel,
struct hist_browser_timer *hbt)
{
- struct disasm_line *pos, *n;
+ struct annotation_line *al;
struct annotation *notes;
size_t size;
struct map_symbol ms = {
@@ -1097,7 +1098,6 @@ int symbol__tui_annotate(struct symbol *sym, struct map *map,
};
int ret = -1, err;
int nr_pcnt = 1;
- size_t sizeof_bdl = sizeof(struct browser_disasm_line);
if (sym == NULL)
return -1;
@@ -1107,21 +1107,18 @@ int symbol__tui_annotate(struct symbol *sym, struct map *map,
if (map->dso->annotate_warned)
return -1;
- browser.offsets = zalloc(size * sizeof(struct disasm_line *));
+ browser.offsets = zalloc(size * sizeof(struct annotation_line *));
if (browser.offsets == NULL) {
ui__error("Not enough memory!");
return -1;
}
- if (perf_evsel__is_group_event(evsel)) {
+ if (perf_evsel__is_group_event(evsel))
nr_pcnt = evsel->nr_members;
- sizeof_bdl += sizeof(struct disasm_line_samples) *
- (nr_pcnt - 1);
- }
- err = symbol__disassemble(sym, map, perf_evsel__env_arch(evsel),
- sizeof_bdl, &browser.arch,
- perf_evsel__env_cpuid(evsel));
+ err = symbol__annotate(sym, map, evsel,
+ sizeof(struct browser_line), &browser.arch,
+ perf_evsel__env_cpuid(evsel));
if (err) {
char msg[BUFSIZ];
symbol__strerror_disassemble(sym, map, err, msg, sizeof(msg));
@@ -1129,20 +1126,22 @@ int symbol__tui_annotate(struct symbol *sym, struct map *map,
goto out_free_offsets;
}
+ symbol__calc_percent(sym, evsel);
+
ui_helpline__push("Press ESC to exit");
notes = symbol__annotation(sym);
browser.start = map__rip_2objdump(map, sym->start);
- list_for_each_entry(pos, &notes->src->source, node) {
- struct browser_disasm_line *bpos;
- size_t line_len = strlen(pos->line);
+ list_for_each_entry(al, &notes->src->source, node) {
+ struct browser_line *bpos;
+ size_t line_len = strlen(al->line);
if (browser.b.width < line_len)
browser.b.width = line_len;
- bpos = disasm_line__browser(pos);
+ bpos = browser_line(al);
bpos->idx = browser.nr_entries++;
- if (pos->offset != -1) {
+ if (al->offset != -1) {
bpos->idx_asm = browser.nr_asm_entries++;
/*
* FIXME: short term bandaid to cope with assembly
@@ -1151,8 +1150,8 @@ int symbol__tui_annotate(struct symbol *sym, struct map *map,
*
* E.g. copy_user_generic_unrolled
*/
- if (pos->offset < (s64)size)
- browser.offsets[pos->offset] = pos;
+ if (al->offset < (s64)size)
+ browser.offsets[al->offset] = al;
} else
bpos->idx_asm = -1;
}
@@ -1174,10 +1173,8 @@ int symbol__tui_annotate(struct symbol *sym, struct map *map,
annotate_browser__update_addr_width(&browser);
ret = annotate_browser__run(&browser, evsel, hbt);
- list_for_each_entry_safe(pos, n, &notes->src->source, node) {
- list_del(&pos->node);
- disasm_line__free(pos);
- }
+
+ annotated_source__purge(notes->src);
out_free_offsets:
free(browser.offsets);
diff --git a/tools/perf/ui/gtk/annotate.c b/tools/perf/ui/gtk/annotate.c
index fc7a2e105bfd..cdb5ecf91666 100644
--- a/tools/perf/ui/gtk/annotate.c
+++ b/tools/perf/ui/gtk/annotate.c
@@ -31,14 +31,14 @@ static int perf_gtk__get_percent(char *buf, size_t size, struct symbol *sym,
strcpy(buf, "");
- if (dl->offset == (s64) -1)
+ if (dl->al.offset == (s64) -1)
return 0;
symhist = annotation__histogram(symbol__annotation(sym), evidx);
- if (!symbol_conf.event_group && !symhist->addr[dl->offset].nr_samples)
+ if (!symbol_conf.event_group && !symhist->addr[dl->al.offset].nr_samples)
return 0;
- percent = 100.0 * symhist->addr[dl->offset].nr_samples / symhist->nr_samples;
+ percent = 100.0 * symhist->addr[dl->al.offset].nr_samples / symhist->nr_samples;
markup = perf_gtk__get_percent_color(percent);
if (markup)
@@ -57,16 +57,16 @@ static int perf_gtk__get_offset(char *buf, size_t size, struct symbol *sym,
strcpy(buf, "");
- if (dl->offset == (s64) -1)
+ if (dl->al.offset == (s64) -1)
return 0;
- return scnprintf(buf, size, "%"PRIx64, start + dl->offset);
+ return scnprintf(buf, size, "%"PRIx64, start + dl->al.offset);
}
static int perf_gtk__get_line(char *buf, size_t size, struct disasm_line *dl)
{
int ret = 0;
- char *line = g_markup_escape_text(dl->line, -1);
+ char *line = g_markup_escape_text(dl->al.line, -1);
const char *markup = "<span fgcolor='gray'>";
strcpy(buf, "");
@@ -74,7 +74,7 @@ static int perf_gtk__get_line(char *buf, size_t size, struct disasm_line *dl)
if (!line)
return 0;
- if (dl->offset != (s64) -1)
+ if (dl->al.offset != (s64) -1)
markup = NULL;
if (markup)
@@ -119,7 +119,7 @@ static int perf_gtk__annotate_symbol(GtkWidget *window, struct symbol *sym,
gtk_tree_view_set_model(GTK_TREE_VIEW(view), GTK_TREE_MODEL(store));
g_object_unref(GTK_TREE_MODEL(store));
- list_for_each_entry(pos, &notes->src->source, node) {
+ list_for_each_entry(pos, &notes->src->source, al.node) {
GtkTreeIter iter;
int ret = 0;
@@ -148,8 +148,8 @@ static int perf_gtk__annotate_symbol(GtkWidget *window, struct symbol *sym,
gtk_container_add(GTK_CONTAINER(window), view);
- list_for_each_entry_safe(pos, n, &notes->src->source, node) {
- list_del(&pos->node);
+ list_for_each_entry_safe(pos, n, &notes->src->source, al.node) {
+ list_del(&pos->al.node);
disasm_line__free(pos);
}
@@ -169,8 +169,7 @@ static int symbol__gtk_annotate(struct symbol *sym, struct map *map,
if (map->dso->annotate_warned)
return -1;
- err = symbol__disassemble(sym, map, perf_evsel__env_arch(evsel),
- 0, NULL, NULL);
+ err = symbol__annotate(sym, map, evsel, 0, NULL, NULL);
if (err) {
char msg[BUFSIZ];
symbol__strerror_disassemble(sym, map, err, msg, sizeof(msg));
@@ -178,6 +177,8 @@ static int symbol__gtk_annotate(struct symbol *sym, struct map *map,
return -1;
}
+ symbol__calc_percent(sym, evsel);
+
if (perf_gtk__is_active_context(pgctx)) {
window = pgctx->main_window;
notebook = pgctx->notebook;
diff --git a/tools/perf/util/annotate.c b/tools/perf/util/annotate.c
index 3369c7830260..facad1e279a8 100644
--- a/tools/perf/util/annotate.c
+++ b/tools/perf/util/annotate.c
@@ -322,6 +322,8 @@ static int comment__symbol(char *raw, char *comment, u64 *addrp, char **namep)
return 0;
*addrp = strtoull(comment, &endptr, 16);
+ if (endptr == comment)
+ return 0;
name = strchr(endptr, '<');
if (name == NULL)
return -1;
@@ -435,8 +437,8 @@ static int mov__parse(struct arch *arch, struct ins_operands *ops, struct map *m
return 0;
comment = ltrim(comment);
- comment__symbol(ops->source.raw, comment, &ops->source.addr, &ops->source.name);
- comment__symbol(ops->target.raw, comment, &ops->target.addr, &ops->target.name);
+ comment__symbol(ops->source.raw, comment + 1, &ops->source.addr, &ops->source.name);
+ comment__symbol(ops->target.raw, comment + 1, &ops->target.addr, &ops->target.name);
return 0;
@@ -480,7 +482,7 @@ static int dec__parse(struct arch *arch __maybe_unused, struct ins_operands *ops
return 0;
comment = ltrim(comment);
- comment__symbol(ops->target.raw, comment, &ops->target.addr, &ops->target.name);
+ comment__symbol(ops->target.raw, comment + 1, &ops->target.addr, &ops->target.name);
return 0;
}
@@ -878,32 +880,99 @@ out_free_name:
return -1;
}
-static struct disasm_line *disasm_line__new(s64 offset, char *line,
- size_t privsize, int line_nr,
- struct arch *arch,
- struct map *map)
+struct annotate_args {
+ size_t privsize;
+ struct arch *arch;
+ struct map *map;
+ struct perf_evsel *evsel;
+ s64 offset;
+ char *line;
+ int line_nr;
+};
+
+static void annotation_line__delete(struct annotation_line *al)
{
- struct disasm_line *dl = zalloc(sizeof(*dl) + privsize);
+ void *ptr = (void *) al - al->privsize;
+
+ free_srcline(al->path);
+ zfree(&al->line);
+ free(ptr);
+}
+
+/*
+ * Allocating the annotation line data with following
+ * structure:
+ *
+ * --------------------------------------
+ * private space | struct annotation_line
+ * --------------------------------------
+ *
+ * Size of the private space is stored in 'struct annotation_line'.
+ *
+ */
+static struct annotation_line *
+annotation_line__new(struct annotate_args *args, size_t privsize)
+{
+ struct annotation_line *al;
+ struct perf_evsel *evsel = args->evsel;
+ size_t size = privsize + sizeof(*al);
+ int nr = 1;
+
+ if (perf_evsel__is_group_event(evsel))
+ nr = evsel->nr_members;
+
+ size += sizeof(al->samples[0]) * nr;
+
+ al = zalloc(size);
+ if (al) {
+ al = (void *) al + privsize;
+ al->privsize = privsize;
+ al->offset = args->offset;
+ al->line = strdup(args->line);
+ al->line_nr = args->line_nr;
+ al->samples_nr = nr;
+ }
+
+ return al;
+}
+
+/*
+ * Allocating the disasm annotation line data with
+ * following structure:
+ *
+ * ------------------------------------------------------------
+ * privsize space | struct disasm_line | struct annotation_line
+ * ------------------------------------------------------------
+ *
+ * We have 'struct annotation_line' member as last member
+ * of 'struct disasm_line' to have an easy access.
+ *
+ */
+static struct disasm_line *disasm_line__new(struct annotate_args *args)
+{
+ struct disasm_line *dl = NULL;
+ struct annotation_line *al;
+ size_t privsize = args->privsize + offsetof(struct disasm_line, al);
+
+ al = annotation_line__new(args, privsize);
+ if (al != NULL) {
+ dl = disasm_line(al);
- if (dl != NULL) {
- dl->offset = offset;
- dl->line = strdup(line);
- dl->line_nr = line_nr;
- if (dl->line == NULL)
+ if (dl->al.line == NULL)
goto out_delete;
- if (offset != -1) {
- if (disasm_line__parse(dl->line, &dl->ins.name, &dl->ops.raw) < 0)
+ if (args->offset != -1) {
+ if (disasm_line__parse(dl->al.line, &dl->ins.name, &dl->ops.raw) < 0)
goto out_free_line;
- disasm_line__init_ins(dl, arch, map);
+ disasm_line__init_ins(dl, args->arch, args->map);
}
}
return dl;
out_free_line:
- zfree(&dl->line);
+ zfree(&dl->al.line);
out_delete:
free(dl);
return NULL;
@@ -911,14 +980,13 @@ out_delete:
void disasm_line__free(struct disasm_line *dl)
{
- zfree(&dl->line);
if (dl->ins.ops && dl->ins.ops->free)
dl->ins.ops->free(&dl->ops);
else
ins__delete(&dl->ops);
free((void *)dl->ins.name);
dl->ins.name = NULL;
- free(dl);
+ annotation_line__delete(&dl->al);
}
int disasm_line__scnprintf(struct disasm_line *dl, char *bf, size_t size, bool raw)
@@ -929,12 +997,13 @@ int disasm_line__scnprintf(struct disasm_line *dl, char *bf, size_t size, bool r
return ins__scnprintf(&dl->ins, bf, size, &dl->ops);
}
-static void disasm__add(struct list_head *head, struct disasm_line *line)
+static void annotation_line__add(struct annotation_line *al, struct list_head *head)
{
- list_add_tail(&line->node, head);
+ list_add_tail(&al->node, head);
}
-struct disasm_line *disasm__get_next_ip_line(struct list_head *head, struct disasm_line *pos)
+struct annotation_line *
+annotation_line__next(struct annotation_line *pos, struct list_head *head)
{
list_for_each_entry_continue(pos, head, node)
if (pos->offset >= 0)
@@ -943,50 +1012,6 @@ struct disasm_line *disasm__get_next_ip_line(struct list_head *head, struct disa
return NULL;
}
-double disasm__calc_percent(struct annotation *notes, int evidx, s64 offset,
- s64 end, const char **path, struct sym_hist_entry *sample)
-{
- struct source_line *src_line = notes->src->lines;
- double percent = 0.0;
-
- sample->nr_samples = sample->period = 0;
-
- if (src_line) {
- size_t sizeof_src_line = sizeof(*src_line) +
- sizeof(src_line->samples) * (src_line->nr_pcnt - 1);
-
- while (offset < end) {
- src_line = (void *)notes->src->lines +
- (sizeof_src_line * offset);
-
- if (*path == NULL)
- *path = src_line->path;
-
- percent += src_line->samples[evidx].percent;
- sample->nr_samples += src_line->samples[evidx].nr;
- offset++;
- }
- } else {
- struct sym_hist *h = annotation__histogram(notes, evidx);
- unsigned int hits = 0;
- u64 period = 0;
-
- while (offset < end) {
- hits += h->addr[offset].nr_samples;
- period += h->addr[offset].period;
- ++offset;
- }
-
- if (h->nr_samples) {
- sample->period = period;
- sample->nr_samples = hits;
- percent = 100.0 * hits / h->nr_samples;
- }
- }
-
- return percent;
-}
-
static const char *annotate__address_color(struct block_range *br)
{
double cov = block_range__coverage(br);
@@ -1069,50 +1094,39 @@ static void annotate__branch_printf(struct block_range *br, u64 addr)
}
}
+static int disasm_line__print(struct disasm_line *dl, u64 start, int addr_fmt_width)
+{
+ s64 offset = dl->al.offset;
+ const u64 addr = start + offset;
+ struct block_range *br;
+
+ br = block_range__find(addr);
+ color_fprintf(stdout, annotate__address_color(br), " %*" PRIx64 ":", addr_fmt_width, addr);
+ color_fprintf(stdout, annotate__asm_color(br), "%s", dl->al.line);
+ annotate__branch_printf(br, addr);
+ return 0;
+}
-static int disasm_line__print(struct disasm_line *dl, struct symbol *sym, u64 start,
- struct perf_evsel *evsel, u64 len, int min_pcnt, int printed,
- int max_lines, struct disasm_line *queue)
+static int
+annotation_line__print(struct annotation_line *al, struct symbol *sym, u64 start,
+ struct perf_evsel *evsel, u64 len, int min_pcnt, int printed,
+ int max_lines, struct annotation_line *queue, int addr_fmt_width)
{
+ struct disasm_line *dl = container_of(al, struct disasm_line, al);
static const char *prev_line;
static const char *prev_color;
- if (dl->offset != -1) {
- const char *path = NULL;
- double percent, max_percent = 0.0;
- double *ppercents = &percent;
- struct sym_hist_entry sample;
- struct sym_hist_entry *psamples = &sample;
+ if (al->offset != -1) {
+ double max_percent = 0.0;
int i, nr_percent = 1;
const char *color;
struct annotation *notes = symbol__annotation(sym);
- s64 offset = dl->offset;
- const u64 addr = start + offset;
- struct disasm_line *next;
- struct block_range *br;
-
- next = disasm__get_next_ip_line(&notes->src->source, dl);
-
- if (perf_evsel__is_group_event(evsel)) {
- nr_percent = evsel->nr_members;
- ppercents = calloc(nr_percent, sizeof(double));
- psamples = calloc(nr_percent, sizeof(struct sym_hist_entry));
- if (ppercents == NULL || psamples == NULL) {
- return -1;
- }
- }
- for (i = 0; i < nr_percent; i++) {
- percent = disasm__calc_percent(notes,
- notes->src->lines ? i : evsel->idx + i,
- offset,
- next ? next->offset : (s64) len,
- &path, &sample);
-
- ppercents[i] = percent;
- psamples[i] = sample;
- if (percent > max_percent)
- max_percent = percent;
+ for (i = 0; i < al->samples_nr; i++) {
+ struct annotation_data *sample = &al->samples[i];
+
+ if (sample->percent > max_percent)
+ max_percent = sample->percent;
}
if (max_percent < min_pcnt)
@@ -1123,10 +1137,10 @@ static int disasm_line__print(struct disasm_line *dl, struct symbol *sym, u64 st
if (queue != NULL) {
list_for_each_entry_from(queue, &notes->src->source, node) {
- if (queue == dl)
+ if (queue == al)
break;
- disasm_line__print(queue, sym, start, evsel, len,
- 0, 0, 1, NULL);
+ annotation_line__print(queue, sym, start, evsel, len,
+ 0, 0, 1, NULL, addr_fmt_width);
}
}
@@ -1137,44 +1151,34 @@ static int disasm_line__print(struct disasm_line *dl, struct symbol *sym, u64 st
* the same color than the percentage. Don't print it
* twice for close colored addr with the same filename:line
*/
- if (path) {
- if (!prev_line || strcmp(prev_line, path)
+ if (al->path) {
+ if (!prev_line || strcmp(prev_line, al->path)
|| color != prev_color) {
- color_fprintf(stdout, color, " %s", path);
- prev_line = path;
+ color_fprintf(stdout, color, " %s", al->path);
+ prev_line = al->path;
prev_color = color;
}
}
for (i = 0; i < nr_percent; i++) {
- percent = ppercents[i];
- sample = psamples[i];
- color = get_percent_color(percent);
+ struct annotation_data *sample = &al->samples[i];
+
+ color = get_percent_color(sample->percent);
if (symbol_conf.show_total_period)
color_fprintf(stdout, color, " %11" PRIu64,
- sample.period);
+ sample->he.period);
else if (symbol_conf.show_nr_samples)
color_fprintf(stdout, color, " %7" PRIu64,
- sample.nr_samples);
+ sample->he.nr_samples);
else
- color_fprintf(stdout, color, " %7.2f", percent);
+ color_fprintf(stdout, color, " %7.2f", sample->percent);
}
- printf(" : ");
+ printf(" : ");
- br = block_range__find(addr);
- color_fprintf(stdout, annotate__address_color(br), " %" PRIx64 ":", addr);
- color_fprintf(stdout, annotate__asm_color(br), "%s", dl->line);
- annotate__branch_printf(br, addr);
+ disasm_line__print(dl, start, addr_fmt_width);
printf("\n");
-
- if (ppercents != &percent)
- free(ppercents);
-
- if (psamples != &sample)
- free(psamples);
-
} else if (max_lines && printed >= max_lines)
return 1;
else {
@@ -1186,10 +1190,10 @@ static int disasm_line__print(struct disasm_line *dl, struct symbol *sym, u64 st
if (perf_evsel__is_group_event(evsel))
width *= evsel->nr_members;
- if (!*dl->line)
+ if (!*al->line)
printf(" %*s:\n", width, " ");
else
- printf(" %*s: %s\n", width, " ", dl->line);
+ printf(" %*s: %*s %s\n", width, " ", addr_fmt_width, " ", al->line);
}
return 0;
@@ -1215,11 +1219,11 @@ static int disasm_line__print(struct disasm_line *dl, struct symbol *sym, u64 st
* means that it's not a disassembly line so should be treated differently.
* The ops.raw part will be parsed further according to type of the instruction.
*/
-static int symbol__parse_objdump_line(struct symbol *sym, struct map *map,
- struct arch *arch,
- FILE *file, size_t privsize,
+static int symbol__parse_objdump_line(struct symbol *sym, FILE *file,
+ struct annotate_args *args,
int *line_nr)
{
+ struct map *map = args->map;
struct annotation *notes = symbol__annotation(sym);
struct disasm_line *dl;
char *line = NULL, *parsed_line, *tmp, *tmp2;
@@ -1263,7 +1267,11 @@ static int symbol__parse_objdump_line(struct symbol *sym, struct map *map,
parsed_line = tmp2 + 1;
}
- dl = disasm_line__new(offset, parsed_line, privsize, *line_nr, arch, map);
+ args->offset = offset;
+ args->line = parsed_line;
+ args->line_nr = *line_nr;
+
+ dl = disasm_line__new(args);
free(line);
(*line_nr)++;
@@ -1288,7 +1296,7 @@ static int symbol__parse_objdump_line(struct symbol *sym, struct map *map,
dl->ops.target.name = strdup(target.sym->name);
}
- disasm__add(&notes->src->source, dl);
+ annotation_line__add(&dl->al, &notes->src->source);
return 0;
}
@@ -1305,19 +1313,19 @@ static void delete_last_nop(struct symbol *sym)
struct disasm_line *dl;
while (!list_empty(list)) {
- dl = list_entry(list->prev, struct disasm_line, node);
+ dl = list_entry(list->prev, struct disasm_line, al.node);
if (dl->ins.ops) {
if (dl->ins.ops != &nop_ops)
return;
} else {
- if (!strstr(dl->line, " nop ") &&
- !strstr(dl->line, " nopl ") &&
- !strstr(dl->line, " nopw "))
+ if (!strstr(dl->al.line, " nop ") &&
+ !strstr(dl->al.line, " nopl ") &&
+ !strstr(dl->al.line, " nopw "))
return;
}
- list_del(&dl->node);
+ list_del(&dl->al.node);
disasm_line__free(dl);
}
}
@@ -1424,13 +1432,11 @@ static const char *annotate__norm_arch(const char *arch_name)
return normalize_arch((char *)arch_name);
}
-int symbol__disassemble(struct symbol *sym, struct map *map,
- const char *arch_name, size_t privsize,
- struct arch **parch, char *cpuid)
+static int symbol__disassemble(struct symbol *sym, struct annotate_args *args)
{
+ struct map *map = args->map;
struct dso *dso = map->dso;
char command[PATH_MAX * 2];
- struct arch *arch = NULL;
FILE *file;
char symfs_filename[PATH_MAX];
struct kcore_extract kce;
@@ -1444,25 +1450,6 @@ int symbol__disassemble(struct symbol *sym, struct map *map,
if (err)
return err;
- arch_name = annotate__norm_arch(arch_name);
- if (!arch_name)
- return -1;
-
- arch = arch__find(arch_name);
- if (arch == NULL)
- return -ENOTSUP;
-
- if (parch)
- *parch = arch;
-
- if (arch->init) {
- err = arch->init(arch, cpuid);
- if (err) {
- pr_err("%s: failed to initialize %s arch priv area\n", __func__, arch->name);
- return err;
- }
- }
-
pr_debug("%s: filename=%s, sym=%s, start=%#" PRIx64 ", end=%#" PRIx64 "\n", __func__,
symfs_filename, sym->name, map->unmap_ip(map, sym->start),
map->unmap_ip(map, sym->end));
@@ -1546,8 +1533,7 @@ int symbol__disassemble(struct symbol *sym, struct map *map,
* can associate it with the instructions till the next one.
* See disasm_line__new() and struct disasm_line::line_nr.
*/
- if (symbol__parse_objdump_line(sym, map, arch, file, privsize,
- &lineno) < 0)
+ if (symbol__parse_objdump_line(sym, file, args, &lineno) < 0)
break;
nline++;
}
@@ -1580,21 +1566,113 @@ out_close_stdout:
goto out_remove_tmp;
}
-static void insert_source_line(struct rb_root *root, struct source_line *src_line)
+static void calc_percent(struct sym_hist *hist,
+ struct annotation_data *sample,
+ s64 offset, s64 end)
{
- struct source_line *iter;
+ unsigned int hits = 0;
+ u64 period = 0;
+
+ while (offset < end) {
+ hits += hist->addr[offset].nr_samples;
+ period += hist->addr[offset].period;
+ ++offset;
+ }
+
+ if (hist->nr_samples) {
+ sample->he.period = period;
+ sample->he.nr_samples = hits;
+ sample->percent = 100.0 * hits / hist->nr_samples;
+ }
+}
+
+static void annotation__calc_percent(struct annotation *notes,
+ struct perf_evsel *evsel, s64 len)
+{
+ struct annotation_line *al, *next;
+
+ list_for_each_entry(al, &notes->src->source, node) {
+ s64 end;
+ int i;
+
+ if (al->offset == -1)
+ continue;
+
+ next = annotation_line__next(al, &notes->src->source);
+ end = next ? next->offset : len;
+
+ for (i = 0; i < al->samples_nr; i++) {
+ struct annotation_data *sample;
+ struct sym_hist *hist;
+
+ hist = annotation__histogram(notes, evsel->idx + i);
+ sample = &al->samples[i];
+
+ calc_percent(hist, sample, al->offset, end);
+ }
+ }
+}
+
+void symbol__calc_percent(struct symbol *sym, struct perf_evsel *evsel)
+{
+ struct annotation *notes = symbol__annotation(sym);
+
+ annotation__calc_percent(notes, evsel, symbol__size(sym));
+}
+
+int symbol__annotate(struct symbol *sym, struct map *map,
+ struct perf_evsel *evsel, size_t privsize,
+ struct arch **parch, char *cpuid)
+{
+ struct annotate_args args = {
+ .privsize = privsize,
+ .map = map,
+ .evsel = evsel,
+ };
+ const char *arch_name = NULL;
+ struct arch *arch;
+ int err;
+
+ if (evsel)
+ arch_name = perf_evsel__env_arch(evsel);
+
+ arch_name = annotate__norm_arch(arch_name);
+ if (!arch_name)
+ return -1;
+
+ args.arch = arch = arch__find(arch_name);
+ if (arch == NULL)
+ return -ENOTSUP;
+
+ if (parch)
+ *parch = arch;
+
+ if (arch->init) {
+ err = arch->init(arch, cpuid);
+ if (err) {
+ pr_err("%s: failed to initialize %s arch priv area\n", __func__, arch->name);
+ return err;
+ }
+ }
+
+ return symbol__disassemble(sym, &args);
+}
+
+static void insert_source_line(struct rb_root *root, struct annotation_line *al)
+{
+ struct annotation_line *iter;
struct rb_node **p = &root->rb_node;
struct rb_node *parent = NULL;
int i, ret;
while (*p != NULL) {
parent = *p;
- iter = rb_entry(parent, struct source_line, node);
+ iter = rb_entry(parent, struct annotation_line, rb_node);
- ret = strcmp(iter->path, src_line->path);
+ ret = strcmp(iter->path, al->path);
if (ret == 0) {
- for (i = 0; i < src_line->nr_pcnt; i++)
- iter->samples[i].percent_sum += src_line->samples[i].percent;
+ for (i = 0; i < al->samples_nr; i++)
+ iter->samples[i].percent_sum += al->samples[i].percent;
return;
}
@@ -1604,18 +1682,18 @@ static void insert_source_line(struct rb_root *root, struct source_line *src_lin
p = &(*p)->rb_right;
}
- for (i = 0; i < src_line->nr_pcnt; i++)
- src_line->samples[i].percent_sum = src_line->samples[i].percent;
+ for (i = 0; i < al->samples_nr; i++)
+ al->samples[i].percent_sum = al->samples[i].percent;
- rb_link_node(&src_line->node, parent, p);
- rb_insert_color(&src_line->node, root);
+ rb_link_node(&al->rb_node, parent, p);
+ rb_insert_color(&al->rb_node, root);
}
-static int cmp_source_line(struct source_line *a, struct source_line *b)
+static int cmp_source_line(struct annotation_line *a, struct annotation_line *b)
{
int i;
- for (i = 0; i < a->nr_pcnt; i++) {
+ for (i = 0; i < a->samples_nr; i++) {
if (a->samples[i].percent_sum == b->samples[i].percent_sum)
continue;
return a->samples[i].percent_sum > b->samples[i].percent_sum;
@@ -1624,135 +1702,47 @@ static int cmp_source_line(struct source_line *a, struct source_line *b)
return 0;
}
-static void __resort_source_line(struct rb_root *root, struct source_line *src_line)
+static void __resort_source_line(struct rb_root *root, struct annotation_line *al)
{
- struct source_line *iter;
+ struct annotation_line *iter;
struct rb_node **p = &root->rb_node;
struct rb_node *parent = NULL;
while (*p != NULL) {
parent = *p;
- iter = rb_entry(parent, struct source_line, node);
+ iter = rb_entry(parent, struct annotation_line, rb_node);
- if (cmp_source_line(src_line, iter))
+ if (cmp_source_line(al, iter))
p = &(*p)->rb_left;
else
p = &(*p)->rb_right;
}
- rb_link_node(&src_line->node, parent, p);
- rb_insert_color(&src_line->node, root);
+ rb_link_node(&al->rb_node, parent, p);
+ rb_insert_color(&al->rb_node, root);
}
static void resort_source_line(struct rb_root *dest_root, struct rb_root *src_root)
{
- struct source_line *src_line;
+ struct annotation_line *al;
struct rb_node *node;
node = rb_first(src_root);
while (node) {
struct rb_node *next;
- src_line = rb_entry(node, struct source_line, node);
+ al = rb_entry(node, struct annotation_line, rb_node);
next = rb_next(node);
rb_erase(node, src_root);
- __resort_source_line(dest_root, src_line);
+ __resort_source_line(dest_root, al);
node = next;
}
}
-static void symbol__free_source_line(struct symbol *sym, int len)
-{
- struct annotation *notes = symbol__annotation(sym);
- struct source_line *src_line = notes->src->lines;
- size_t sizeof_src_line;
- int i;
-
- sizeof_src_line = sizeof(*src_line) +
- (sizeof(src_line->samples) * (src_line->nr_pcnt - 1));
-
- for (i = 0; i < len; i++) {
- free_srcline(src_line->path);
- src_line = (void *)src_line + sizeof_src_line;
- }
-
- zfree(&notes->src->lines);
-}
-
-/* Get the filename:line for the colored entries */
-static int symbol__get_source_line(struct symbol *sym, struct map *map,
- struct perf_evsel *evsel,
- struct rb_root *root, int len)
-{
- u64 start;
- int i, k;
- int evidx = evsel->idx;
- struct source_line *src_line;
- struct annotation *notes = symbol__annotation(sym);
- struct sym_hist *h = annotation__histogram(notes, evidx);
- struct rb_root tmp_root = RB_ROOT;
- int nr_pcnt = 1;
- u64 nr_samples = h->nr_samples;
- size_t sizeof_src_line = sizeof(struct source_line);
-
- if (perf_evsel__is_group_event(evsel)) {
- for (i = 1; i < evsel->nr_members; i++) {
- h = annotation__histogram(notes, evidx + i);
- nr_samples += h->nr_samples;
- }
- nr_pcnt = evsel->nr_members;
- sizeof_src_line += (nr_pcnt - 1) * sizeof(src_line->samples);
- }
-
- if (!nr_samples)
- return 0;
-
- src_line = notes->src->lines = calloc(len, sizeof_src_line);
- if (!notes->src->lines)
- return -1;
-
- start = map__rip_2objdump(map, sym->start);
-
- for (i = 0; i < len; i++) {
- u64 offset;
- double percent_max = 0.0;
-
- src_line->nr_pcnt = nr_pcnt;
-
- for (k = 0; k < nr_pcnt; k++) {
- double percent = 0.0;
-
- h = annotation__histogram(notes, evidx + k);
- nr_samples = h->addr[i].nr_samples;
- if (h->nr_samples)
- percent = 100.0 * nr_samples / h->nr_samples;
-
- if (percent > percent_max)
- percent_max = percent;
- src_line->samples[k].percent = percent;
- src_line->samples[k].nr = nr_samples;
- }
-
- if (percent_max <= 0.5)
- goto next;
-
- offset = start + i;
- src_line->path = get_srcline(map->dso, offset, NULL,
- false, true);
- insert_source_line(&tmp_root, src_line);
-
- next:
- src_line = (void *)src_line + sizeof_src_line;
- }
-
- resort_source_line(root, &tmp_root);
- return 0;
-}
-
static void print_summary(struct rb_root *root, const char *filename)
{
- struct source_line *src_line;
+ struct annotation_line *al;
struct rb_node *node;
printf("\nSorted summary for file %s\n", filename);
@@ -1770,9 +1760,9 @@ static void print_summary(struct rb_root *root, const char *filename)
char *path;
int i;
- src_line = rb_entry(node, struct source_line, node);
- for (i = 0; i < src_line->nr_pcnt; i++) {
- percent = src_line->samples[i].percent_sum;
+ al = rb_entry(node, struct annotation_line, rb_node);
+ for (i = 0; i < al->samples_nr; i++) {
+ percent = al->samples[i].percent_sum;
color = get_percent_color(percent);
color_fprintf(stdout, color, " %7.2f", percent);
@@ -1780,7 +1770,7 @@ static void print_summary(struct rb_root *root, const char *filename)
percent_max = percent;
}
- path = src_line->path;
+ path = al->path;
color = get_percent_color(percent_max);
color_fprintf(stdout, color, " %s\n", path);
@@ -1801,6 +1791,19 @@ static void symbol__annotate_hits(struct symbol *sym, struct perf_evsel *evsel)
printf("%*s: %" PRIu64 "\n", BITS_PER_LONG / 2, "h->nr_samples", h->nr_samples);
}
+static int annotated_source__addr_fmt_width(struct list_head *lines, u64 start)
+{
+ char bf[32];
+ struct annotation_line *line;
+
+ list_for_each_entry_reverse(line, lines, node) {
+ if (line->offset != -1)
+ return scnprintf(bf, sizeof(bf), "%" PRIx64, start + line->offset);
+ }
+
+ return 0;
+}
+
int symbol__annotate_printf(struct symbol *sym, struct map *map,
struct perf_evsel *evsel, bool full_paths,
int min_pcnt, int max_lines, int context)
@@ -1811,9 +1814,9 @@ int symbol__annotate_printf(struct symbol *sym, struct map *map,
const char *evsel_name = perf_evsel__name(evsel);
struct annotation *notes = symbol__annotation(sym);
struct sym_hist *h = annotation__histogram(notes, evsel->idx);
- struct disasm_line *pos, *queue = NULL;
+ struct annotation_line *pos, *queue = NULL;
u64 start = map__rip_2objdump(map, sym->start);
- int printed = 2, queue_len = 0;
+ int printed = 2, queue_len = 0, addr_fmt_width;
int more = 0;
u64 len;
int width = symbol_conf.show_total_period ? 12 : 8;
@@ -1844,15 +1847,21 @@ int symbol__annotate_printf(struct symbol *sym, struct map *map,
if (verbose > 0)
symbol__annotate_hits(sym, evsel);
+ addr_fmt_width = annotated_source__addr_fmt_width(&notes->src->source, start);
+
list_for_each_entry(pos, &notes->src->source, node) {
+ int err;
+
if (context && queue == NULL) {
queue = pos;
queue_len = 0;
}
- switch (disasm_line__print(pos, sym, start, evsel, len,
- min_pcnt, printed, max_lines,
- queue)) {
+ err = annotation_line__print(pos, sym, start, evsel, len,
+ min_pcnt, printed, max_lines,
+ queue, addr_fmt_width);
+
+ switch (err) {
case 0:
++printed;
if (context) {
@@ -1907,13 +1916,13 @@ void symbol__annotate_decay_histogram(struct symbol *sym, int evidx)
}
}
-void disasm__purge(struct list_head *head)
+void annotated_source__purge(struct annotated_source *as)
{
- struct disasm_line *pos, *n;
+ struct annotation_line *al, *n;
- list_for_each_entry_safe(pos, n, head, node) {
- list_del(&pos->node);
- disasm_line__free(pos);
+ list_for_each_entry_safe(al, n, &as->source, node) {
+ list_del(&al->node);
+ disasm_line__free(disasm_line(al));
}
}
@@ -1921,10 +1930,10 @@ static size_t disasm_line__fprintf(struct disasm_line *dl, FILE *fp)
{
size_t printed;
- if (dl->offset == -1)
- return fprintf(fp, "%s\n", dl->line);
+ if (dl->al.offset == -1)
+ return fprintf(fp, "%s\n", dl->al.line);
- printed = fprintf(fp, "%#" PRIx64 " %s", dl->offset, dl->ins.name);
+ printed = fprintf(fp, "%#" PRIx64 " %s", dl->al.offset, dl->ins.name);
if (dl->ops.raw[0] != '\0') {
printed += fprintf(fp, "%.*s %s\n", 6 - (int)printed, " ",
@@ -1939,38 +1948,72 @@ size_t disasm__fprintf(struct list_head *head, FILE *fp)
struct disasm_line *pos;
size_t printed = 0;
- list_for_each_entry(pos, head, node)
+ list_for_each_entry(pos, head, al.node)
printed += disasm_line__fprintf(pos, fp);
return printed;
}
+static void annotation__calc_lines(struct annotation *notes, struct map *map,
+ struct rb_root *root, u64 start)
+{
+ struct annotation_line *al;
+ struct rb_root tmp_root = RB_ROOT;
+
+ list_for_each_entry(al, &notes->src->source, node) {
+ double percent_max = 0.0;
+ int i;
+
+ for (i = 0; i < al->samples_nr; i++) {
+ struct annotation_data *sample;
+
+ sample = &al->samples[i];
+
+ if (sample->percent > percent_max)
+ percent_max = sample->percent;
+ }
+
+ if (percent_max <= 0.5)
+ continue;
+
+ al->path = get_srcline(map->dso, start + al->offset, NULL, false, true);
+ insert_source_line(&tmp_root, al);
+ }
+
+ resort_source_line(root, &tmp_root);
+}
+
+static void symbol__calc_lines(struct symbol *sym, struct map *map,
+ struct rb_root *root)
+{
+ struct annotation *notes = symbol__annotation(sym);
+ u64 start = map__rip_2objdump(map, sym->start);
+
+ annotation__calc_lines(notes, map, root, start);
+}
+
int symbol__tty_annotate(struct symbol *sym, struct map *map,
struct perf_evsel *evsel, bool print_lines,
bool full_paths, int min_pcnt, int max_lines)
{
struct dso *dso = map->dso;
struct rb_root source_line = RB_ROOT;
- u64 len;
- if (symbol__disassemble(sym, map, perf_evsel__env_arch(evsel),
- 0, NULL, NULL) < 0)
+ if (symbol__annotate(sym, map, evsel, 0, NULL, NULL) < 0)
return -1;
- len = symbol__size(sym);
+ symbol__calc_percent(sym, evsel);
if (print_lines) {
srcline_full_filename = full_paths;
- symbol__get_source_line(sym, map, evsel, &source_line, len);
+ symbol__calc_lines(sym, map, &source_line);
print_summary(&source_line, dso->long_name);
}
symbol__annotate_printf(sym, map, evsel, full_paths,
min_pcnt, max_lines, 0);
- if (print_lines)
- symbol__free_source_line(sym, len);
- disasm__purge(&symbol__annotation(sym)->src->source);
+ annotated_source__purge(symbol__annotation(sym)->src);
return 0;
}
diff --git a/tools/perf/util/annotate.h b/tools/perf/util/annotate.h
index f6ba3560de5e..6d7289e88fa3 100644
--- a/tools/perf/util/annotate.h
+++ b/tools/perf/util/annotate.h
@@ -59,33 +59,55 @@ bool ins__is_fused(struct arch *arch, const char *ins1, const char *ins2);
struct annotation;
+struct sym_hist_entry {
+ u64 nr_samples;
+ u64 period;
+};
+
+struct annotation_data {
+ double percent;
+ double percent_sum;
+ struct sym_hist_entry he;
+};
+
+struct annotation_line {
+ struct list_head node;
+ struct rb_node rb_node;
+ s64 offset;
+ char *line;
+ int line_nr;
+ float ipc;
+ u64 cycles;
+ size_t privsize;
+ char *path;
+ int samples_nr;
+ struct annotation_data samples[0];
+};
+
struct disasm_line {
- struct list_head node;
- s64 offset;
- char *line;
- struct ins ins;
- int line_nr;
- float ipc;
- u64 cycles;
- struct ins_operands ops;
+ struct ins ins;
+ struct ins_operands ops;
+
+ /* This needs to be at the end. */
+ struct annotation_line al;
};
+static inline struct disasm_line *disasm_line(struct annotation_line *al)
+{
+ return al ? container_of(al, struct disasm_line, al) : NULL;
+}
+
static inline bool disasm_line__has_offset(const struct disasm_line *dl)
{
return dl->ops.target.offset_avail;
}
-struct sym_hist_entry {
- u64 nr_samples;
- u64 period;
-};
-
void disasm_line__free(struct disasm_line *dl);
-struct disasm_line *disasm__get_next_ip_line(struct list_head *head, struct disasm_line *pos);
+struct annotation_line *
+annotation_line__next(struct annotation_line *pos, struct list_head *head);
int disasm_line__scnprintf(struct disasm_line *dl, char *bf, size_t size, bool raw);
size_t disasm__fprintf(struct list_head *head, FILE *fp);
-double disasm__calc_percent(struct annotation *notes, int evidx, s64 offset,
- s64 end, const char **path, struct sym_hist_entry *sample);
+void symbol__calc_percent(struct symbol *sym, struct perf_evsel *evsel);
struct sym_hist {
u64 nr_samples;
@@ -104,19 +126,6 @@ struct cyc_hist {
u16 reset;
};
-struct source_line_samples {
- double percent;
- double percent_sum;
- u64 nr;
-};
-
-struct source_line {
- struct rb_node node;
- char *path;
- int nr_pcnt;
- struct source_line_samples samples[1];
-};
-
/** struct annotated_source - symbols with hits have this attached as in sannotation
*
* @histogram: Array of addr hit histograms per event being monitored
@@ -132,7 +141,6 @@ struct source_line {
*/
struct annotated_source {
struct list_head source;
- struct source_line *lines;
int nr_histograms;
size_t sizeof_sym_hist;
struct cyc_hist *cycles_hist;
@@ -169,9 +177,9 @@ int hist_entry__inc_addr_samples(struct hist_entry *he, struct perf_sample *samp
int symbol__alloc_hist(struct symbol *sym);
void symbol__annotate_zero_histograms(struct symbol *sym);
-int symbol__disassemble(struct symbol *sym, struct map *map,
- const char *arch_name, size_t privsize,
- struct arch **parch, char *cpuid);
+int symbol__annotate(struct symbol *sym, struct map *map,
+ struct perf_evsel *evsel, size_t privsize,
+ struct arch **parch, char *cpuid);
enum symbol_disassemble_errno {
SYMBOL_ANNOTATE_ERRNO__SUCCESS = 0,
@@ -198,7 +206,7 @@ int symbol__annotate_printf(struct symbol *sym, struct map *map,
int min_pcnt, int max_lines, int context);
void symbol__annotate_zero_histogram(struct symbol *sym, int evidx);
void symbol__annotate_decay_histogram(struct symbol *sym, int evidx);
-void disasm__purge(struct list_head *head);
+void annotated_source__purge(struct annotated_source *as);
bool ui__has_annotation(void);
diff --git a/tools/perf/util/evlist.c b/tools/perf/util/evlist.c
index b62e523a7035..3570355bcf39 100644
--- a/tools/perf/util/evlist.c
+++ b/tools/perf/util/evlist.c
@@ -125,7 +125,7 @@ static void perf_evlist__purge(struct perf_evlist *evlist)
void perf_evlist__exit(struct perf_evlist *evlist)
{
zfree(&evlist->mmap);
- zfree(&evlist->backward_mmap);
+ zfree(&evlist->overwrite_mmap);
fdarray__exit(&evlist->pollfd);
}
@@ -675,11 +675,11 @@ static int perf_evlist__set_paused(struct perf_evlist *evlist, bool value)
{
int i;
- if (!evlist->backward_mmap)
+ if (!evlist->overwrite_mmap)
return 0;
for (i = 0; i < evlist->nr_mmaps; i++) {
- int fd = evlist->backward_mmap[i].fd;
+ int fd = evlist->overwrite_mmap[i].fd;
int err;
if (fd < 0)
@@ -711,7 +711,7 @@ union perf_event *perf_evlist__mmap_read_forward(struct perf_evlist *evlist, int
* No need for read-write ring buffer: kernel stop outputting when
* it hit md->prev (perf_mmap__consume()).
*/
- return perf_mmap__read_forward(md, evlist->overwrite);
+ return perf_mmap__read_forward(md);
}
union perf_event *perf_evlist__mmap_read_backward(struct perf_evlist *evlist, int idx)
@@ -738,7 +738,7 @@ void perf_evlist__mmap_read_catchup(struct perf_evlist *evlist, int idx)
void perf_evlist__mmap_consume(struct perf_evlist *evlist, int idx)
{
- perf_mmap__consume(&evlist->mmap[idx], evlist->overwrite);
+ perf_mmap__consume(&evlist->mmap[idx], false);
}
static void perf_evlist__munmap_nofree(struct perf_evlist *evlist)
@@ -749,16 +749,16 @@ static void perf_evlist__munmap_nofree(struct perf_evlist *evlist)
for (i = 0; i < evlist->nr_mmaps; i++)
perf_mmap__munmap(&evlist->mmap[i]);
- if (evlist->backward_mmap)
+ if (evlist->overwrite_mmap)
for (i = 0; i < evlist->nr_mmaps; i++)
- perf_mmap__munmap(&evlist->backward_mmap[i]);
+ perf_mmap__munmap(&evlist->overwrite_mmap[i]);
}
void perf_evlist__munmap(struct perf_evlist *evlist)
{
perf_evlist__munmap_nofree(evlist);
zfree(&evlist->mmap);
- zfree(&evlist->backward_mmap);
+ zfree(&evlist->overwrite_mmap);
}
static struct perf_mmap *perf_evlist__alloc_mmap(struct perf_evlist *evlist)
@@ -800,7 +800,7 @@ perf_evlist__should_poll(struct perf_evlist *evlist __maybe_unused,
static int perf_evlist__mmap_per_evsel(struct perf_evlist *evlist, int idx,
struct mmap_params *mp, int cpu_idx,
- int thread, int *_output, int *_output_backward)
+ int thread, int *_output, int *_output_overwrite)
{
struct perf_evsel *evsel;
int revent;
@@ -812,18 +812,20 @@ static int perf_evlist__mmap_per_evsel(struct perf_evlist *evlist, int idx,
int fd;
int cpu;
+ mp->prot = PROT_READ | PROT_WRITE;
if (evsel->attr.write_backward) {
- output = _output_backward;
- maps = evlist->backward_mmap;
+ output = _output_overwrite;
+ maps = evlist->overwrite_mmap;
if (!maps) {
maps = perf_evlist__alloc_mmap(evlist);
if (!maps)
return -1;
- evlist->backward_mmap = maps;
+ evlist->overwrite_mmap = maps;
if (evlist->bkw_mmap_state == BKW_MMAP_NOTREADY)
perf_evlist__toggle_bkw_mmap(evlist, BKW_MMAP_RUNNING);
}
+ mp->prot &= ~PROT_WRITE;
}
if (evsel->system_wide && thread)
@@ -884,14 +886,14 @@ static int perf_evlist__mmap_per_cpu(struct perf_evlist *evlist,
pr_debug2("perf event ring buffer mmapped per cpu\n");
for (cpu = 0; cpu < nr_cpus; cpu++) {
int output = -1;
- int output_backward = -1;
+ int output_overwrite = -1;
auxtrace_mmap_params__set_idx(&mp->auxtrace_mp, evlist, cpu,
true);
for (thread = 0; thread < nr_threads; thread++) {
if (perf_evlist__mmap_per_evsel(evlist, cpu, mp, cpu,
- thread, &output, &output_backward))
+ thread, &output, &output_overwrite))
goto out_unmap;
}
}
@@ -912,13 +914,13 @@ static int perf_evlist__mmap_per_thread(struct perf_evlist *evlist,
pr_debug2("perf event ring buffer mmapped per thread\n");
for (thread = 0; thread < nr_threads; thread++) {
int output = -1;
- int output_backward = -1;
+ int output_overwrite = -1;
auxtrace_mmap_params__set_idx(&mp->auxtrace_mp, evlist, thread,
false);
if (perf_evlist__mmap_per_evsel(evlist, thread, mp, 0, thread,
- &output, &output_backward))
+ &output, &output_overwrite))
goto out_unmap;
}
@@ -1052,15 +1054,18 @@ int perf_evlist__parse_mmap_pages(const struct option *opt, const char *str,
* Return: %0 on success, negative error code otherwise.
*/
int perf_evlist__mmap_ex(struct perf_evlist *evlist, unsigned int pages,
- bool overwrite, unsigned int auxtrace_pages,
+ unsigned int auxtrace_pages,
bool auxtrace_overwrite)
{
struct perf_evsel *evsel;
const struct cpu_map *cpus = evlist->cpus;
const struct thread_map *threads = evlist->threads;
- struct mmap_params mp = {
- .prot = PROT_READ | (overwrite ? 0 : PROT_WRITE),
- };
+ /*
+ * Delay setting mp.prot: set it before calling perf_mmap__mmap.
+ * Its value is decided by evsel's write_backward.
+ * So &mp should not be passed through const pointer.
+ */
+ struct mmap_params mp;
if (!evlist->mmap)
evlist->mmap = perf_evlist__alloc_mmap(evlist);
@@ -1070,7 +1075,6 @@ int perf_evlist__mmap_ex(struct perf_evlist *evlist, unsigned int pages,
if (evlist->pollfd.entries == NULL && perf_evlist__alloc_pollfd(evlist) < 0)
return -ENOMEM;
- evlist->overwrite = overwrite;
evlist->mmap_len = perf_evlist__mmap_size(pages);
pr_debug("mmap size %zuB\n", evlist->mmap_len);
mp.mask = evlist->mmap_len - page_size - 1;
@@ -1091,10 +1095,9 @@ int perf_evlist__mmap_ex(struct perf_evlist *evlist, unsigned int pages,
return perf_evlist__mmap_per_cpu(evlist, &mp);
}
-int perf_evlist__mmap(struct perf_evlist *evlist, unsigned int pages,
- bool overwrite)
+int perf_evlist__mmap(struct perf_evlist *evlist, unsigned int pages)
{
- return perf_evlist__mmap_ex(evlist, pages, overwrite, 0, false);
+ return perf_evlist__mmap_ex(evlist, pages, 0, false);
}
int perf_evlist__create_maps(struct perf_evlist *evlist, struct target *target)
@@ -1582,6 +1585,17 @@ int perf_evlist__parse_sample(struct perf_evlist *evlist, union perf_event *even
return perf_evsel__parse_sample(evsel, event, sample);
}
+int perf_evlist__parse_sample_timestamp(struct perf_evlist *evlist,
+ union perf_event *event,
+ u64 *timestamp)
+{
+ struct perf_evsel *evsel = perf_evlist__event2evsel(evlist, event);
+
+ if (!evsel)
+ return -EFAULT;
+ return perf_evsel__parse_sample_timestamp(evsel, event, timestamp);
+}
+
size_t perf_evlist__fprintf(struct perf_evlist *evlist, FILE *fp)
{
struct perf_evsel *evsel;
@@ -1739,7 +1753,7 @@ void perf_evlist__toggle_bkw_mmap(struct perf_evlist *evlist,
RESUME,
} action = NONE;
- if (!evlist->backward_mmap)
+ if (!evlist->overwrite_mmap)
return;
switch (old_state) {
diff --git a/tools/perf/util/evlist.h b/tools/perf/util/evlist.h
index 491f69542920..75160666d305 100644
--- a/tools/perf/util/evlist.h
+++ b/tools/perf/util/evlist.h
@@ -31,7 +31,6 @@ struct perf_evlist {
int nr_entries;
int nr_groups;
int nr_mmaps;
- bool overwrite;
bool enabled;
bool has_user_cpus;
size_t mmap_len;
@@ -45,7 +44,7 @@ struct perf_evlist {
} workload;
struct fdarray pollfd;
struct perf_mmap *mmap;
- struct perf_mmap *backward_mmap;
+ struct perf_mmap *overwrite_mmap;
struct thread_map *threads;
struct cpu_map *cpus;
struct perf_evsel *selected;
@@ -169,10 +168,9 @@ int perf_evlist__parse_mmap_pages(const struct option *opt,
unsigned long perf_event_mlock_kb_in_pages(void);
int perf_evlist__mmap_ex(struct perf_evlist *evlist, unsigned int pages,
- bool overwrite, unsigned int auxtrace_pages,
+ unsigned int auxtrace_pages,
bool auxtrace_overwrite);
-int perf_evlist__mmap(struct perf_evlist *evlist, unsigned int pages,
- bool overwrite);
+int perf_evlist__mmap(struct perf_evlist *evlist, unsigned int pages);
void perf_evlist__munmap(struct perf_evlist *evlist);
size_t perf_evlist__mmap_size(unsigned long pages);
@@ -205,6 +203,10 @@ u16 perf_evlist__id_hdr_size(struct perf_evlist *evlist);
int perf_evlist__parse_sample(struct perf_evlist *evlist, union perf_event *event,
struct perf_sample *sample);
+int perf_evlist__parse_sample_timestamp(struct perf_evlist *evlist,
+ union perf_event *event,
+ u64 *timestamp);
+
bool perf_evlist__valid_sample_type(struct perf_evlist *evlist);
bool perf_evlist__valid_sample_id_all(struct perf_evlist *evlist);
bool perf_evlist__valid_read_format(struct perf_evlist *evlist);
diff --git a/tools/perf/util/evsel.c b/tools/perf/util/evsel.c
index d5fbcf8c7aa7..95853c51c0ca 100644
--- a/tools/perf/util/evsel.c
+++ b/tools/perf/util/evsel.c
@@ -779,6 +779,8 @@ static void apply_config_terms(struct perf_evsel *evsel,
case PERF_EVSEL__CONFIG_TERM_OVERWRITE:
attr->write_backward = term->val.overwrite ? 1 : 0;
break;
+ case PERF_EVSEL__CONFIG_TERM_DRV_CFG:
+ BUG_ON(1);
default:
break;
}
@@ -1960,6 +1962,20 @@ static inline bool overflow(const void *endp, u16 max_size, const void *offset,
#define OVERFLOW_CHECK_u64(offset) \
OVERFLOW_CHECK(offset, sizeof(u64), sizeof(u64))
+static int
+perf_event__check_size(union perf_event *event, unsigned int sample_size)
+{
+ /*
+ * The evsel's sample_size is based on PERF_SAMPLE_MASK which includes
+ * up to PERF_SAMPLE_PERIOD. After that overflow() must be used to
+ * check the format does not go past the end of the event.
+ */
+ if (sample_size + sizeof(event->header) > event->header.size)
+ return -EFAULT;
+
+ return 0;
+}
+
int perf_evsel__parse_sample(struct perf_evsel *evsel, union perf_event *event,
struct perf_sample *data)
{
@@ -1981,6 +1997,8 @@ int perf_evsel__parse_sample(struct perf_evsel *evsel, union perf_event *event,
data->stream_id = data->id = data->time = -1ULL;
data->period = evsel->attr.sample_period;
data->cpumode = event->header.misc & PERF_RECORD_MISC_CPUMODE_MASK;
+ data->id = -1ULL;
+ data->data_src = PERF_MEM_DATA_SRC_NONE;
if (event->header.type != PERF_RECORD_SAMPLE) {
if (!evsel->attr.sample_id_all)
@@ -1990,15 +2008,9 @@ int perf_evsel__parse_sample(struct perf_evsel *evsel, union perf_event *event,
array = event->sample.array;
- /*
- * The evsel's sample_size is based on PERF_SAMPLE_MASK which includes
- * up to PERF_SAMPLE_PERIOD. After that overflow() must be used to
- * check the format does not go past the end of the event.
- */
- if (evsel->sample_size + sizeof(event->header) > event->header.size)
+ if (perf_event__check_size(event, evsel->sample_size))
return -EFAULT;
- data->id = -1ULL;
if (type & PERF_SAMPLE_IDENTIFIER) {
data->id = *array;
array++;
@@ -2028,7 +2040,6 @@ int perf_evsel__parse_sample(struct perf_evsel *evsel, union perf_event *event,
array++;
}
- data->addr = 0;
if (type & PERF_SAMPLE_ADDR) {
data->addr = *array;
array++;
@@ -2192,14 +2203,12 @@ int perf_evsel__parse_sample(struct perf_evsel *evsel, union perf_event *event,
array++;
}
- data->data_src = PERF_MEM_DATA_SRC_NONE;
if (type & PERF_SAMPLE_DATA_SRC) {
OVERFLOW_CHECK_u64(array);
data->data_src = *array;
array++;
}
- data->transaction = 0;
if (type & PERF_SAMPLE_TRANSACTION) {
OVERFLOW_CHECK_u64(array);
data->transaction = *array;
@@ -2232,6 +2241,50 @@ int perf_evsel__parse_sample(struct perf_evsel *evsel, union perf_event *event,
return 0;
}
+int perf_evsel__parse_sample_timestamp(struct perf_evsel *evsel,
+ union perf_event *event,
+ u64 *timestamp)
+{
+ u64 type = evsel->attr.sample_type;
+ const u64 *array;
+
+ if (!(type & PERF_SAMPLE_TIME))
+ return -1;
+
+ if (event->header.type != PERF_RECORD_SAMPLE) {
+ struct perf_sample data = {
+ .time = -1ULL,
+ };
+
+ if (!evsel->attr.sample_id_all)
+ return -1;
+ if (perf_evsel__parse_id_sample(evsel, event, &data))
+ return -1;
+
+ *timestamp = data.time;
+ return 0;
+ }
+
+ array = event->sample.array;
+
+ if (perf_event__check_size(event, evsel->sample_size))
+ return -EFAULT;
+
+ if (type & PERF_SAMPLE_IDENTIFIER)
+ array++;
+
+ if (type & PERF_SAMPLE_IP)
+ array++;
+
+ if (type & PERF_SAMPLE_TID)
+ array++;
+
+ if (type & PERF_SAMPLE_TIME)
+ *timestamp = *array;
+
+ return 0;
+}
+
size_t perf_event__sample_event_size(const struct perf_sample *sample, u64 type,
u64 read_format)
{
@@ -2743,8 +2796,9 @@ int perf_evsel__open_strerror(struct perf_evsel *evsel, struct target *target,
break;
case EOPNOTSUPP:
if (evsel->attr.sample_period != 0)
- return scnprintf(msg, size, "%s",
- "PMU Hardware doesn't support sampling/overflow-interrupts.");
+ return scnprintf(msg, size,
+ "%s: PMU Hardware doesn't support sampling/overflow-interrupts. Try 'perf stat'",
+ perf_evsel__name(evsel));
if (evsel->attr.precise_ip)
return scnprintf(msg, size, "%s",
"\'precise\' request may not be supported. Try removing 'p' modifier.");
diff --git a/tools/perf/util/evsel.h b/tools/perf/util/evsel.h
index 157f49e8a772..c3663a70c9b9 100644
--- a/tools/perf/util/evsel.h
+++ b/tools/perf/util/evsel.h
@@ -38,7 +38,7 @@ struct cgroup_sel;
* It is allocated within event parsing and attached to
* perf_evsel::config_terms list head.
*/
-enum {
+enum term_type {
PERF_EVSEL__CONFIG_TERM_PERIOD,
PERF_EVSEL__CONFIG_TERM_FREQ,
PERF_EVSEL__CONFIG_TERM_TIME,
@@ -49,12 +49,11 @@ enum {
PERF_EVSEL__CONFIG_TERM_OVERWRITE,
PERF_EVSEL__CONFIG_TERM_DRV_CFG,
PERF_EVSEL__CONFIG_TERM_BRANCH,
- PERF_EVSEL__CONFIG_TERM_MAX,
};
struct perf_evsel_config_term {
struct list_head list;
- int type;
+ enum term_type type;
union {
u64 period;
u64 freq;
@@ -339,6 +338,10 @@ static inline int perf_evsel__read_on_cpu_scaled(struct perf_evsel *evsel,
int perf_evsel__parse_sample(struct perf_evsel *evsel, union perf_event *event,
struct perf_sample *sample);
+int perf_evsel__parse_sample_timestamp(struct perf_evsel *evsel,
+ union perf_event *event,
+ u64 *timestamp);
+
static inline struct perf_evsel *perf_evsel__next(struct perf_evsel *evsel)
{
return list_entry(evsel->node.next, struct perf_evsel, node);
diff --git a/tools/perf/util/header.c b/tools/perf/util/header.c
index 7c0e9d587bfa..5890e08e0754 100644
--- a/tools/perf/util/header.c
+++ b/tools/perf/util/header.c
@@ -3258,6 +3258,74 @@ int perf_event__synthesize_attrs(struct perf_tool *tool,
return err;
}
+static bool has_unit(struct perf_evsel *counter)
+{
+ return counter->unit && *counter->unit;
+}
+
+static bool has_scale(struct perf_evsel *counter)
+{
+ return counter->scale != 1;
+}
+
+int perf_event__synthesize_extra_attr(struct perf_tool *tool,
+ struct perf_evlist *evsel_list,
+ perf_event__handler_t process,
+ bool is_pipe)
+{
+ struct perf_evsel *counter;
+ int err;
+
+ /*
+ * Synthesize other events stuff not carried within
+ * attr event - unit, scale, name
+ */
+ evlist__for_each_entry(evsel_list, counter) {
+ if (!counter->supported)
+ continue;
+
+ /*
+ * Synthesize unit and scale only if it's defined.
+ */
+ if (has_unit(counter)) {
+ err = perf_event__synthesize_event_update_unit(tool, counter, process);
+ if (err < 0) {
+ pr_err("Couldn't synthesize evsel unit.\n");
+ return err;
+ }
+ }
+
+ if (has_scale(counter)) {
+ err = perf_event__synthesize_event_update_scale(tool, counter, process);
+ if (err < 0) {
+ pr_err("Couldn't synthesize evsel counter.\n");
+ return err;
+ }
+ }
+
+ if (counter->own_cpus) {
+ err = perf_event__synthesize_event_update_cpus(tool, counter, process);
+ if (err < 0) {
+ pr_err("Couldn't synthesize evsel cpus.\n");
+ return err;
+ }
+ }
+
+ /*
+ * Name is needed only for pipe output,
+ * perf.data carries event names.
+ */
+ if (is_pipe) {
+ err = perf_event__synthesize_event_update_name(tool, counter, process);
+ if (err < 0) {
+ pr_err("Couldn't synthesize evsel name.\n");
+ return err;
+ }
+ }
+ }
+ return 0;
+}
+
int perf_event__process_attr(struct perf_tool *tool __maybe_unused,
union perf_event *event,
struct perf_evlist **pevlist)
diff --git a/tools/perf/util/header.h b/tools/perf/util/header.h
index 29ccbfdf8724..317fb901e47f 100644
--- a/tools/perf/util/header.h
+++ b/tools/perf/util/header.h
@@ -9,6 +9,7 @@
#include <linux/types.h>
#include "event.h"
#include "env.h"
+#include "pmu.h"
enum {
HEADER_RESERVED = 0, /* always cleared */
@@ -107,6 +108,11 @@ int perf_event__synthesize_features(struct perf_tool *tool,
struct perf_evlist *evlist,
perf_event__handler_t process);
+int perf_event__synthesize_extra_attr(struct perf_tool *tool,
+ struct perf_evlist *evsel_list,
+ perf_event__handler_t process,
+ bool is_pipe);
+
int perf_event__process_feature(struct perf_tool *tool,
union perf_event *event,
struct perf_session *session);
@@ -166,5 +172,5 @@ int write_padded(struct feat_fd *fd, const void *bf,
*/
int get_cpuid(char *buffer, size_t sz);
-char *get_cpuid_str(void);
+char *get_cpuid_str(struct perf_pmu *pmu __maybe_unused);
#endif /* __PERF_HEADER_H */
diff --git a/tools/perf/util/intel-pt-decoder/Build b/tools/perf/util/intel-pt-decoder/Build
index 10e0814bb8d2..1b704fbea9de 100644
--- a/tools/perf/util/intel-pt-decoder/Build
+++ b/tools/perf/util/intel-pt-decoder/Build
@@ -11,15 +11,21 @@ $(OUTPUT)util/intel-pt-decoder/inat-tables.c: $(inat_tables_script) $(inat_table
$(OUTPUT)util/intel-pt-decoder/intel-pt-insn-decoder.o: util/intel-pt-decoder/intel-pt-insn-decoder.c util/intel-pt-decoder/inat.c $(OUTPUT)util/intel-pt-decoder/inat-tables.c
@(diff -I 2>&1 | grep -q 'option requires an argument' && \
- test -d ../../kernel -a -d ../../tools -a -d ../perf && (( \
- diff -B -I'^#include' util/intel-pt-decoder/insn.c ../../arch/x86/lib/insn.c >/dev/null && \
- diff -B -I'^#include' util/intel-pt-decoder/inat.c ../../arch/x86/lib/inat.c >/dev/null && \
- diff -B util/intel-pt-decoder/x86-opcode-map.txt ../../arch/x86/lib/x86-opcode-map.txt >/dev/null && \
- diff -B util/intel-pt-decoder/gen-insn-attr-x86.awk ../../arch/x86/tools/gen-insn-attr-x86.awk >/dev/null && \
- diff -B -I'^#include' util/intel-pt-decoder/insn.h ../../arch/x86/include/asm/insn.h >/dev/null && \
- diff -B -I'^#include' util/intel-pt-decoder/inat.h ../../arch/x86/include/asm/inat.h >/dev/null && \
- diff -B -I'^#include' util/intel-pt-decoder/inat_types.h ../../arch/x86/include/asm/inat_types.h >/dev/null) \
- || echo "Warning: Intel PT: x86 instruction decoder differs from kernel" >&2 )) || true
+ test -d ../../kernel -a -d ../../tools -a -d ../perf && ( \
+ ((diff -B -I'^#include' util/intel-pt-decoder/insn.c ../../arch/x86/lib/insn.c >/dev/null) || \
+ (echo "Warning: Intel PT: x86 instruction decoder C file at 'tools/perf/util/intel-pt-decoder/insn.c' differs from latest version at 'arch/x86/lib/insn.c'" >&2)) && \
+ ((diff -B -I'^#include' util/intel-pt-decoder/inat.c ../../arch/x86/lib/inat.c >/dev/null) || \
+ (echo "Warning: Intel PT: x86 instruction decoder C file at 'tools/perf/util/intel-pt-decoder/inat.c' differs from latest version at 'arch/x86/lib/inat.c'" >&2)) && \
+ ((diff -B util/intel-pt-decoder/x86-opcode-map.txt ../../arch/x86/lib/x86-opcode-map.txt >/dev/null) || \
+ (echo "Warning: Intel PT: x86 instruction decoder map file at 'tools/perf/util/intel-pt-decoder/x86-opcode-map.txt' differs from latest version at 'arch/x86/lib/x86-opcode-map.txt'" >&2)) && \
+ ((diff -B util/intel-pt-decoder/gen-insn-attr-x86.awk ../../arch/x86/tools/gen-insn-attr-x86.awk >/dev/null) || \
+ (echo "Warning: Intel PT: x86 instruction decoder script at 'tools/perf/util/intel-pt-decoder/gen-insn-attr-x86.awk' differs from latest version at 'arch/x86/tools/gen-insn-attr-x86.awk'" >&2)) && \
+ ((diff -B -I'^#include' util/intel-pt-decoder/insn.h ../../arch/x86/include/asm/insn.h >/dev/null) || \
+ (echo "Warning: Intel PT: x86 instruction decoder header at 'tools/perf/util/intel-pt-decoder/insn.h' differs from latest version at 'arch/x86/include/asm/insn.h'" >&2)) && \
+ ((diff -B -I'^#include' util/intel-pt-decoder/inat.h ../../arch/x86/include/asm/inat.h >/dev/null) || \
+ (echo "Warning: Intel PT: x86 instruction decoder header at 'tools/perf/util/intel-pt-decoder/inat.h' differs from latest version at 'arch/x86/include/asm/inat.h'" >&2)) && \
+ ((diff -B -I'^#include' util/intel-pt-decoder/inat_types.h ../../arch/x86/include/asm/inat_types.h >/dev/null) || \
+ (echo "Warning: Intel PT: x86 instruction decoder header at 'tools/perf/util/intel-pt-decoder/inat_types.h' differs from latest version at 'arch/x86/include/asm/inat_types.h'" >&2)))) || true
$(call rule_mkdir)
$(call if_changed_dep,cc_o_c)
diff --git a/tools/perf/util/machine.c b/tools/perf/util/machine.c
index 270f3223c6df..64d255f6a537 100644
--- a/tools/perf/util/machine.c
+++ b/tools/perf/util/machine.c
@@ -2204,7 +2204,7 @@ int thread__resolve_callchain(struct thread *thread,
{
int ret = 0;
- callchain_cursor_reset(&callchain_cursor);
+ callchain_cursor_reset(cursor);
if (callchain_param.order == ORDER_CALLEE) {
ret = thread__resolve_callchain_sample(thread, cursor,
diff --git a/tools/perf/util/metricgroup.c b/tools/perf/util/metricgroup.c
index 0ddd9c199227..e48410c99b39 100644
--- a/tools/perf/util/metricgroup.c
+++ b/tools/perf/util/metricgroup.c
@@ -38,6 +38,10 @@ struct metric_event *metricgroup__lookup(struct rblist *metric_events,
struct metric_event me = {
.evsel = evsel
};
+
+ if (!metric_events)
+ return NULL;
+
nd = rblist__find(metric_events, &me);
if (nd)
return container_of(nd, struct metric_event, nd);
@@ -270,7 +274,7 @@ static void metricgroup__print_strlist(struct strlist *metrics, bool raw)
void metricgroup__print(bool metrics, bool metricgroups, char *filter,
bool raw)
{
- struct pmu_events_map *map = perf_pmu__find_map();
+ struct pmu_events_map *map = perf_pmu__find_map(NULL);
struct pmu_event *pe;
int i;
struct rblist groups;
@@ -368,7 +372,7 @@ void metricgroup__print(bool metrics, bool metricgroups, char *filter,
static int metricgroup__add_metric(const char *metric, struct strbuf *events,
struct list_head *group_list)
{
- struct pmu_events_map *map = perf_pmu__find_map();
+ struct pmu_events_map *map = perf_pmu__find_map(NULL);
struct pmu_event *pe;
int ret = -EINVAL;
int i, j;
diff --git a/tools/perf/util/mmap.c b/tools/perf/util/mmap.c
index 9fe5f9c7d577..05076e683938 100644
--- a/tools/perf/util/mmap.c
+++ b/tools/perf/util/mmap.c
@@ -21,33 +21,13 @@ size_t perf_mmap__mmap_len(struct perf_mmap *map)
}
/* When check_messup is true, 'end' must points to a good entry */
-static union perf_event *perf_mmap__read(struct perf_mmap *map, bool check_messup,
+static union perf_event *perf_mmap__read(struct perf_mmap *map,
u64 start, u64 end, u64 *prev)
{
unsigned char *data = map->base + page_size;
union perf_event *event = NULL;
int diff = end - start;
- if (check_messup) {
- /*
- * If we're further behind than half the buffer, there's a chance
- * the writer will bite our tail and mess up the samples under us.
- *
- * If we somehow ended up ahead of the 'end', we got messed up.
- *
- * In either case, truncate and restart at 'end'.
- */
- if (diff > map->mask / 2 || diff < 0) {
- fprintf(stderr, "WARNING: failed to keep up with mmap data.\n");
-
- /*
- * 'end' points to a known good entry, start there.
- */
- start = end;
- diff = 0;
- }
- }
-
if (diff >= (int)sizeof(event->header)) {
size_t size;
@@ -89,7 +69,7 @@ broken_event:
return event;
}
-union perf_event *perf_mmap__read_forward(struct perf_mmap *map, bool check_messup)
+union perf_event *perf_mmap__read_forward(struct perf_mmap *map)
{
u64 head;
u64 old = map->prev;
@@ -102,7 +82,7 @@ union perf_event *perf_mmap__read_forward(struct perf_mmap *map, bool check_mess
head = perf_mmap__read_head(map);
- return perf_mmap__read(map, check_messup, old, head, &map->prev);
+ return perf_mmap__read(map, old, head, &map->prev);
}
union perf_event *perf_mmap__read_backward(struct perf_mmap *map)
@@ -138,7 +118,7 @@ union perf_event *perf_mmap__read_backward(struct perf_mmap *map)
else
end = head + map->mask + 1;
- return perf_mmap__read(map, false, start, end, &map->prev);
+ return perf_mmap__read(map, start, end, &map->prev);
}
void perf_mmap__read_catchup(struct perf_mmap *map)
@@ -254,18 +234,18 @@ int perf_mmap__mmap(struct perf_mmap *map, struct mmap_params *mp, int fd)
return 0;
}
-static int backward_rb_find_range(void *buf, int mask, u64 head, u64 *start, u64 *end)
+static int overwrite_rb_find_range(void *buf, int mask, u64 head, u64 *start, u64 *end)
{
struct perf_event_header *pheader;
u64 evt_head = head;
int size = mask + 1;
- pr_debug2("backward_rb_find_range: buf=%p, head=%"PRIx64"\n", buf, head);
+ pr_debug2("overwrite_rb_find_range: buf=%p, head=%"PRIx64"\n", buf, head);
pheader = (struct perf_event_header *)(buf + (head & mask));
*start = head;
while (true) {
if (evt_head - head >= (unsigned int)size) {
- pr_debug("Finished reading backward ring buffer: rewind\n");
+ pr_debug("Finished reading overwrite ring buffer: rewind\n");
if (evt_head - head > (unsigned int)size)
evt_head -= pheader->size;
*end = evt_head;
@@ -275,7 +255,7 @@ static int backward_rb_find_range(void *buf, int mask, u64 head, u64 *start, u64
pheader = (struct perf_event_header *)(buf + (evt_head & mask));
if (pheader->size == 0) {
- pr_debug("Finished reading backward ring buffer: get start\n");
+ pr_debug("Finished reading overwrite ring buffer: get start\n");
*end = evt_head;
return 0;
}
@@ -287,19 +267,7 @@ static int backward_rb_find_range(void *buf, int mask, u64 head, u64 *start, u64
return -1;
}
-static int rb_find_range(void *data, int mask, u64 head, u64 old,
- u64 *start, u64 *end, bool backward)
-{
- if (!backward) {
- *start = old;
- *end = head;
- return 0;
- }
-
- return backward_rb_find_range(data, mask, head, start, end);
-}
-
-int perf_mmap__push(struct perf_mmap *md, bool overwrite, bool backward,
+int perf_mmap__push(struct perf_mmap *md, bool overwrite,
void *to, int push(void *to, void *buf, size_t size))
{
u64 head = perf_mmap__read_head(md);
@@ -310,19 +278,28 @@ int perf_mmap__push(struct perf_mmap *md, bool overwrite, bool backward,
void *buf;
int rc = 0;
- if (rb_find_range(data, md->mask, head, old, &start, &end, backward))
- return -1;
+ start = overwrite ? head : old;
+ end = overwrite ? old : head;
if (start == end)
return 0;
size = end - start;
if (size > (unsigned long)(md->mask) + 1) {
- WARN_ONCE(1, "failed to keep up with mmap data. (warn only once)\n");
+ if (!overwrite) {
+ WARN_ONCE(1, "failed to keep up with mmap data. (warn only once)\n");
- md->prev = head;
- perf_mmap__consume(md, overwrite || backward);
- return 0;
+ md->prev = head;
+ perf_mmap__consume(md, overwrite);
+ return 0;
+ }
+
+ /*
+ * Backward ring buffer is full. We still have a chance to read
+ * most of data from it.
+ */
+ if (overwrite_rb_find_range(data, md->mask, head, &start, &end))
+ return -1;
}
if ((start & md->mask) + size != (end & md->mask)) {
@@ -346,7 +323,7 @@ int perf_mmap__push(struct perf_mmap *md, bool overwrite, bool backward,
}
md->prev = head;
- perf_mmap__consume(md, overwrite || backward);
+ perf_mmap__consume(md, overwrite);
out:
return rc;
}
diff --git a/tools/perf/util/mmap.h b/tools/perf/util/mmap.h
index efd78b827b05..d640273b7762 100644
--- a/tools/perf/util/mmap.h
+++ b/tools/perf/util/mmap.h
@@ -86,10 +86,10 @@ static inline void perf_mmap__write_tail(struct perf_mmap *md, u64 tail)
pc->data_tail = tail;
}
-union perf_event *perf_mmap__read_forward(struct perf_mmap *map, bool check_messup);
+union perf_event *perf_mmap__read_forward(struct perf_mmap *map);
union perf_event *perf_mmap__read_backward(struct perf_mmap *map);
-int perf_mmap__push(struct perf_mmap *md, bool overwrite, bool backward,
+int perf_mmap__push(struct perf_mmap *md, bool backward,
void *to, int push(void *to, void *buf, size_t size));
size_t perf_mmap__mmap_len(struct perf_mmap *map);
diff --git a/tools/perf/util/ordered-events.c b/tools/perf/util/ordered-events.c
index 8e09fd2d842f..bad9e0296e9a 100644
--- a/tools/perf/util/ordered-events.c
+++ b/tools/perf/util/ordered-events.c
@@ -157,9 +157,8 @@ void ordered_events__delete(struct ordered_events *oe, struct ordered_event *eve
}
int ordered_events__queue(struct ordered_events *oe, union perf_event *event,
- struct perf_sample *sample, u64 file_offset)
+ u64 timestamp, u64 file_offset)
{
- u64 timestamp = sample->time;
struct ordered_event *oevent;
if (!timestamp || timestamp == ~0ULL)
diff --git a/tools/perf/util/ordered-events.h b/tools/perf/util/ordered-events.h
index 96e5292d88e2..8c7a2948593e 100644
--- a/tools/perf/util/ordered-events.h
+++ b/tools/perf/util/ordered-events.h
@@ -45,7 +45,7 @@ struct ordered_events {
};
int ordered_events__queue(struct ordered_events *oe, union perf_event *event,
- struct perf_sample *sample, u64 file_offset);
+ u64 timestamp, u64 file_offset);
void ordered_events__delete(struct ordered_events *oe, struct ordered_event *event);
int ordered_events__flush(struct ordered_events *oe, enum oe_flush how);
void ordered_events__init(struct ordered_events *oe, ordered_events__deliver_t deliver);
diff --git a/tools/perf/util/pmu.c b/tools/perf/util/pmu.c
index 80fb1593913a..57e38fdf0b34 100644
--- a/tools/perf/util/pmu.c
+++ b/tools/perf/util/pmu.c
@@ -12,6 +12,7 @@
#include <dirent.h>
#include <api/fs/fs.h>
#include <locale.h>
+#include <regex.h>
#include "util.h"
#include "pmu.h"
#include "parse-events.h"
@@ -537,17 +538,45 @@ static bool pmu_is_uncore(const char *name)
}
/*
+ * PMU CORE devices have different name other than cpu in sysfs on some
+ * platforms. looking for possible sysfs files to identify as core device.
+ */
+static int is_pmu_core(const char *name)
+{
+ struct stat st;
+ char path[PATH_MAX];
+ const char *sysfs = sysfs__mountpoint();
+
+ if (!sysfs)
+ return 0;
+
+ /* Look for cpu sysfs (x86 and others) */
+ scnprintf(path, PATH_MAX, "%s/bus/event_source/devices/cpu", sysfs);
+ if ((stat(path, &st) == 0) &&
+ (strncmp(name, "cpu", strlen("cpu")) == 0))
+ return 1;
+
+ /* Look for cpu sysfs (specific to arm) */
+ scnprintf(path, PATH_MAX, "%s/bus/event_source/devices/%s/cpus",
+ sysfs, name);
+ if (stat(path, &st) == 0)
+ return 1;
+
+ return 0;
+}
+
+/*
* Return the CPU id as a raw string.
*
* Each architecture should provide a more precise id string that
* can be use to match the architecture's "mapfile".
*/
-char * __weak get_cpuid_str(void)
+char * __weak get_cpuid_str(struct perf_pmu *pmu __maybe_unused)
{
return NULL;
}
-static char *perf_pmu__getcpuid(void)
+static char *perf_pmu__getcpuid(struct perf_pmu *pmu)
{
char *cpuid;
static bool printed;
@@ -556,7 +585,7 @@ static char *perf_pmu__getcpuid(void)
if (cpuid)
cpuid = strdup(cpuid);
if (!cpuid)
- cpuid = get_cpuid_str();
+ cpuid = get_cpuid_str(pmu);
if (!cpuid)
return NULL;
@@ -567,22 +596,45 @@ static char *perf_pmu__getcpuid(void)
return cpuid;
}
-struct pmu_events_map *perf_pmu__find_map(void)
+struct pmu_events_map *perf_pmu__find_map(struct perf_pmu *pmu)
{
struct pmu_events_map *map;
- char *cpuid = perf_pmu__getcpuid();
+ char *cpuid = perf_pmu__getcpuid(pmu);
int i;
+ /* on some platforms which uses cpus map, cpuid can be NULL for
+ * PMUs other than CORE PMUs.
+ */
+ if (!cpuid)
+ return NULL;
+
i = 0;
for (;;) {
+ regex_t re;
+ regmatch_t pmatch[1];
+ int match;
+
map = &pmu_events_map[i++];
if (!map->table) {
map = NULL;
break;
}
- if (!strcmp(map->cpuid, cpuid))
+ if (regcomp(&re, map->cpuid, REG_EXTENDED) != 0) {
+ /* Warn unable to generate match particular string. */
+ pr_info("Invalid regular expression %s\n", map->cpuid);
break;
+ }
+
+ match = !regexec(&re, cpuid, 1, pmatch, 0);
+ regfree(&re);
+ if (match) {
+ size_t match_len = (pmatch[0].rm_eo - pmatch[0].rm_so);
+
+ /* Verify the entire string matched. */
+ if (match_len == strlen(cpuid))
+ break;
+ }
}
free(cpuid);
return map;
@@ -593,13 +645,14 @@ struct pmu_events_map *perf_pmu__find_map(void)
* to the current running CPU. Then, add all PMU events from that table
* as aliases.
*/
-static void pmu_add_cpu_aliases(struct list_head *head, const char *name)
+static void pmu_add_cpu_aliases(struct list_head *head, struct perf_pmu *pmu)
{
int i;
struct pmu_events_map *map;
struct pmu_event *pe;
+ const char *name = pmu->name;
- map = perf_pmu__find_map();
+ map = perf_pmu__find_map(pmu);
if (!map)
return;
@@ -608,7 +661,6 @@ static void pmu_add_cpu_aliases(struct list_head *head, const char *name)
*/
i = 0;
while (1) {
- const char *pname;
pe = &map->table[i++];
if (!pe->name) {
@@ -617,9 +669,13 @@ static void pmu_add_cpu_aliases(struct list_head *head, const char *name)
break;
}
- pname = pe->pmu ? pe->pmu : "cpu";
- if (strncmp(pname, name, strlen(pname)))
- continue;
+ if (!is_pmu_core(name)) {
+ /* check for uncore devices */
+ if (pe->pmu == NULL)
+ continue;
+ if (strncmp(pe->pmu, name, strlen(pe->pmu)))
+ continue;
+ }
/* need type casts to override 'const' */
__perf_pmu__new_alias(head, NULL, (char *)pe->name,
@@ -661,21 +717,20 @@ static struct perf_pmu *pmu_lookup(const char *name)
if (pmu_aliases(name, &aliases))
return NULL;
- pmu_add_cpu_aliases(&aliases, name);
pmu = zalloc(sizeof(*pmu));
if (!pmu)
return NULL;
pmu->cpus = pmu_cpumask(name);
-
+ pmu->name = strdup(name);
+ pmu->type = type;
pmu->is_uncore = pmu_is_uncore(name);
+ pmu_add_cpu_aliases(&aliases, pmu);
INIT_LIST_HEAD(&pmu->format);
INIT_LIST_HEAD(&pmu->aliases);
list_splice(&format, &pmu->format);
list_splice(&aliases, &pmu->aliases);
- pmu->name = strdup(name);
- pmu->type = type;
list_add_tail(&pmu->list, &pmus);
pmu->default_config = perf_pmu__get_default_config(pmu);
diff --git a/tools/perf/util/pmu.h b/tools/perf/util/pmu.h
index 27c75e635866..76fecec7b3f9 100644
--- a/tools/perf/util/pmu.h
+++ b/tools/perf/util/pmu.h
@@ -92,6 +92,6 @@ int perf_pmu__test(void);
struct perf_event_attr *perf_pmu__get_default_config(struct perf_pmu *pmu);
-struct pmu_events_map *perf_pmu__find_map(void);
+struct pmu_events_map *perf_pmu__find_map(struct perf_pmu *pmu);
#endif /* __PMU_H */
diff --git a/tools/perf/util/python.c b/tools/perf/util/python.c
index 8e49d9cafcfc..b1e999bd21ef 100644
--- a/tools/perf/util/python.c
+++ b/tools/perf/util/python.c
@@ -864,7 +864,7 @@ static PyObject *pyrf_evlist__mmap(struct pyrf_evlist *pevlist,
&pages, &overwrite))
return NULL;
- if (perf_evlist__mmap(evlist, pages, overwrite) < 0) {
+ if (perf_evlist__mmap(evlist, pages) < 0) {
PyErr_SetFromErrno(PyExc_OSError);
return NULL;
}
diff --git a/tools/perf/util/rblist.c b/tools/perf/util/rblist.c
index 0dfe27d99458..0efc3258c648 100644
--- a/tools/perf/util/rblist.c
+++ b/tools/perf/util/rblist.c
@@ -101,16 +101,21 @@ void rblist__init(struct rblist *rblist)
return;
}
+void rblist__exit(struct rblist *rblist)
+{
+ struct rb_node *pos, *next = rb_first(&rblist->entries);
+
+ while (next) {
+ pos = next;
+ next = rb_next(pos);
+ rblist__remove_node(rblist, pos);
+ }
+}
+
void rblist__delete(struct rblist *rblist)
{
if (rblist != NULL) {
- struct rb_node *pos, *next = rb_first(&rblist->entries);
-
- while (next) {
- pos = next;
- next = rb_next(pos);
- rblist__remove_node(rblist, pos);
- }
+ rblist__exit(rblist);
free(rblist);
}
}
diff --git a/tools/perf/util/rblist.h b/tools/perf/util/rblist.h
index 4c8638a22571..76df15c27f5f 100644
--- a/tools/perf/util/rblist.h
+++ b/tools/perf/util/rblist.h
@@ -29,6 +29,7 @@ struct rblist {
};
void rblist__init(struct rblist *rblist);
+void rblist__exit(struct rblist *rblist);
void rblist__delete(struct rblist *rblist);
int rblist__add_node(struct rblist *rblist, const void *new_entry);
void rblist__remove_node(struct rblist *rblist, struct rb_node *rb_node);
diff --git a/tools/perf/util/session.c b/tools/perf/util/session.c
index 5c412310f266..54e30f1bcbd7 100644
--- a/tools/perf/util/session.c
+++ b/tools/perf/util/session.c
@@ -27,7 +27,6 @@
static int perf_session__deliver_event(struct perf_session *session,
union perf_event *event,
- struct perf_sample *sample,
struct perf_tool *tool,
u64 file_offset);
@@ -107,17 +106,10 @@ static void perf_session__set_comm_exec(struct perf_session *session)
static int ordered_events__deliver_event(struct ordered_events *oe,
struct ordered_event *event)
{
- struct perf_sample sample;
struct perf_session *session = container_of(oe, struct perf_session,
ordered_events);
- int ret = perf_evlist__parse_sample(session->evlist, event->event, &sample);
-
- if (ret) {
- pr_err("Can't parse sample, err = %d\n", ret);
- return ret;
- }
- return perf_session__deliver_event(session, event->event, &sample,
+ return perf_session__deliver_event(session, event->event,
session->tool, event->file_offset);
}
@@ -873,9 +865,9 @@ static int process_finished_round(struct perf_tool *tool __maybe_unused,
}
int perf_session__queue_event(struct perf_session *s, union perf_event *event,
- struct perf_sample *sample, u64 file_offset)
+ u64 timestamp, u64 file_offset)
{
- return ordered_events__queue(&s->ordered_events, event, sample, file_offset);
+ return ordered_events__queue(&s->ordered_events, event, timestamp, file_offset);
}
static void callchain__lbr_callstack_printf(struct perf_sample *sample)
@@ -1328,20 +1320,26 @@ static int machines__deliver_event(struct machines *machines,
static int perf_session__deliver_event(struct perf_session *session,
union perf_event *event,
- struct perf_sample *sample,
struct perf_tool *tool,
u64 file_offset)
{
+ struct perf_sample sample;
int ret;
- ret = auxtrace__process_event(session, event, sample, tool);
+ ret = perf_evlist__parse_sample(session->evlist, event, &sample);
+ if (ret) {
+ pr_err("Can't parse sample, err = %d\n", ret);
+ return ret;
+ }
+
+ ret = auxtrace__process_event(session, event, &sample, tool);
if (ret < 0)
return ret;
if (ret > 0)
return 0;
return machines__deliver_event(&session->machines, session->evlist,
- event, sample, tool, file_offset);
+ event, &sample, tool, file_offset);
}
static s64 perf_session__process_user_event(struct perf_session *session,
@@ -1350,10 +1348,11 @@ static s64 perf_session__process_user_event(struct perf_session *session,
{
struct ordered_events *oe = &session->ordered_events;
struct perf_tool *tool = session->tool;
+ struct perf_sample sample = { .time = 0, };
int fd = perf_data__fd(session->data);
int err;
- dump_event(session->evlist, event, file_offset, NULL);
+ dump_event(session->evlist, event, file_offset, &sample);
/* These events are processed right away */
switch (event->header.type) {
@@ -1495,7 +1494,6 @@ static s64 perf_session__process_event(struct perf_session *session,
{
struct perf_evlist *evlist = session->evlist;
struct perf_tool *tool = session->tool;
- struct perf_sample sample;
int ret;
if (session->header.needs_swap)
@@ -1509,21 +1507,19 @@ static s64 perf_session__process_event(struct perf_session *session,
if (event->header.type >= PERF_RECORD_USER_TYPE_START)
return perf_session__process_user_event(session, event, file_offset);
- /*
- * For all kernel events we get the sample data
- */
- ret = perf_evlist__parse_sample(evlist, event, &sample);
- if (ret)
- return ret;
-
if (tool->ordered_events) {
- ret = perf_session__queue_event(session, event, &sample, file_offset);
+ u64 timestamp;
+
+ ret = perf_evlist__parse_sample_timestamp(evlist, event, &timestamp);
+ if (ret)
+ return ret;
+
+ ret = perf_session__queue_event(session, event, timestamp, file_offset);
if (ret != -ETIME)
return ret;
}
- return perf_session__deliver_event(session, event, &sample, tool,
- file_offset);
+ return perf_session__deliver_event(session, event, tool, file_offset);
}
void perf_event_header__bswap(struct perf_event_header *hdr)
diff --git a/tools/perf/util/session.h b/tools/perf/util/session.h
index da1434a7c120..da40b4b380ca 100644
--- a/tools/perf/util/session.h
+++ b/tools/perf/util/session.h
@@ -53,7 +53,7 @@ int perf_session__peek_event(struct perf_session *session, off_t file_offset,
int perf_session__process_events(struct perf_session *session);
int perf_session__queue_event(struct perf_session *s, union perf_event *event,
- struct perf_sample *sample, u64 file_offset);
+ u64 timestamp, u64 file_offset);
void perf_tool__fill_defaults(struct perf_tool *tool);
diff --git a/tools/perf/util/stat-shadow.c b/tools/perf/util/stat-shadow.c
index 855e35cbb1dc..57ec22513971 100644
--- a/tools/perf/util/stat-shadow.c
+++ b/tools/perf/util/stat-shadow.c
@@ -87,6 +87,16 @@ static struct rb_node *saved_value_new(struct rblist *rblist __maybe_unused,
return &nd->rb_node;
}
+static void saved_value_delete(struct rblist *rblist __maybe_unused,
+ struct rb_node *rb_node)
+{
+ struct saved_value *v;
+
+ BUG_ON(!rb_node);
+ v = container_of(rb_node, struct saved_value, rb_node);
+ free(v);
+}
+
static struct saved_value *saved_value_lookup(struct perf_evsel *evsel,
int cpu,
bool create)
@@ -114,7 +124,7 @@ void perf_stat__init_shadow_stats(void)
rblist__init(&runtime_saved_values);
runtime_saved_values.node_cmp = saved_value_cmp;
runtime_saved_values.node_new = saved_value_new;
- /* No delete for now */
+ runtime_saved_values.node_delete = saved_value_delete;
}
static int evsel_context(struct perf_evsel *evsel)
diff --git a/tools/perf/util/thread_map.c b/tools/perf/util/thread_map.c
index be0d5a736dea..2b653853eec2 100644
--- a/tools/perf/util/thread_map.c
+++ b/tools/perf/util/thread_map.c
@@ -92,7 +92,7 @@ struct thread_map *thread_map__new_by_tid(pid_t tid)
return threads;
}
-struct thread_map *thread_map__new_by_uid(uid_t uid)
+static struct thread_map *__thread_map__new_all_cpus(uid_t uid)
{
DIR *proc;
int max_threads = 32, items, i;
@@ -113,7 +113,6 @@ struct thread_map *thread_map__new_by_uid(uid_t uid)
while ((dirent = readdir(proc)) != NULL) {
char *end;
bool grow = false;
- struct stat st;
pid_t pid = strtol(dirent->d_name, &end, 10);
if (*end) /* only interested in proper numerical dirents */
@@ -121,11 +120,12 @@ struct thread_map *thread_map__new_by_uid(uid_t uid)
snprintf(path, sizeof(path), "/proc/%s", dirent->d_name);
- if (stat(path, &st) != 0)
- continue;
+ if (uid != UINT_MAX) {
+ struct stat st;
- if (st.st_uid != uid)
- continue;
+ if (stat(path, &st) != 0 || st.st_uid != uid)
+ continue;
+ }
snprintf(path, sizeof(path), "/proc/%d/task", pid);
items = scandir(path, &namelist, filter, NULL);
@@ -178,6 +178,16 @@ out_free_closedir:
goto out_closedir;
}
+struct thread_map *thread_map__new_all_cpus(void)
+{
+ return __thread_map__new_all_cpus(UINT_MAX);
+}
+
+struct thread_map *thread_map__new_by_uid(uid_t uid)
+{
+ return __thread_map__new_all_cpus(uid);
+}
+
struct thread_map *thread_map__new(pid_t pid, pid_t tid, uid_t uid)
{
if (pid != -1)
diff --git a/tools/perf/util/thread_map.h b/tools/perf/util/thread_map.h
index f15803985435..07a765fb22bb 100644
--- a/tools/perf/util/thread_map.h
+++ b/tools/perf/util/thread_map.h
@@ -23,6 +23,7 @@ struct thread_map *thread_map__new_dummy(void);
struct thread_map *thread_map__new_by_pid(pid_t pid);
struct thread_map *thread_map__new_by_tid(pid_t tid);
struct thread_map *thread_map__new_by_uid(uid_t uid);
+struct thread_map *thread_map__new_all_cpus(void);
struct thread_map *thread_map__new(pid_t pid, pid_t tid, uid_t uid);
struct thread_map *thread_map__new_event(struct thread_map_event *event);