aboutsummaryrefslogtreecommitdiffstats
path: root/tools/perf/util
diff options
context:
space:
mode:
Diffstat (limited to 'tools/perf/util')
-rw-r--r--tools/perf/util/Build4
-rwxr-xr-xtools/perf/util/PERF-VERSION-GEN13
-rw-r--r--tools/perf/util/affinity.c10
-rw-r--r--tools/perf/util/annotate.c12
-rw-r--r--tools/perf/util/arm-spe-decoder/arm-spe-decoder.c2
-rw-r--r--tools/perf/util/arm-spe-decoder/arm-spe-decoder.h1
-rw-r--r--tools/perf/util/arm-spe.c84
-rw-r--r--tools/perf/util/arm64-frame-pointer-unwind-support.c63
-rw-r--r--tools/perf/util/arm64-frame-pointer-unwind-support.h10
-rw-r--r--tools/perf/util/auxtrace.c47
-rw-r--r--tools/perf/util/auxtrace.h22
-rw-r--r--tools/perf/util/bpf-event.c33
-rw-r--r--tools/perf/util/bpf-loader.c373
-rw-r--r--tools/perf/util/bpf_counter.c90
-rw-r--r--tools/perf/util/bpf_counter.h4
-rw-r--r--tools/perf/util/bpf_counter_cgroup.c48
-rw-r--r--tools/perf/util/bpf_ftrace.c154
-rw-r--r--tools/perf/util/bpf_map.c28
-rw-r--r--tools/perf/util/bpf_off_cpu.c338
-rw-r--r--tools/perf/util/bpf_skel/func_latency.bpf.c116
-rw-r--r--tools/perf/util/bpf_skel/off_cpu.bpf.c229
-rw-r--r--tools/perf/util/branch.c4
-rw-r--r--tools/perf/util/build-id.c34
-rw-r--r--tools/perf/util/c++/clang.cpp4
-rw-r--r--tools/perf/util/callchain.c16
-rw-r--r--tools/perf/util/callchain.h4
-rw-r--r--tools/perf/util/counts.c8
-rw-r--r--tools/perf/util/counts.h14
-rw-r--r--tools/perf/util/cpumap.c253
-rw-r--r--tools/perf/util/cpumap.h125
-rw-r--r--tools/perf/util/cputopo.c11
-rw-r--r--tools/perf/util/cs-etm.c16
-rw-r--r--tools/perf/util/data-convert-bt.c2
-rw-r--r--tools/perf/util/data.c29
-rw-r--r--tools/perf/util/data.h2
-rw-r--r--tools/perf/util/debug.c2
-rw-r--r--tools/perf/util/dso.c15
-rw-r--r--tools/perf/util/dso.h3
-rw-r--r--tools/perf/util/dsos.c13
-rw-r--r--tools/perf/util/env.c29
-rw-r--r--tools/perf/util/env.h3
-rw-r--r--tools/perf/util/event.c13
-rw-r--r--tools/perf/util/event.h45
-rw-r--r--tools/perf/util/evlist-hybrid.c15
-rw-r--r--tools/perf/util/evlist.c325
-rw-r--r--tools/perf/util/evlist.h60
-rw-r--r--tools/perf/util/evsel.c320
-rw-r--r--tools/perf/util/evsel.h62
-rw-r--r--tools/perf/util/expr.c37
-rw-r--r--tools/perf/util/expr.l2
-rw-r--r--tools/perf/util/ftrace.h82
-rw-r--r--tools/perf/util/genelf.h3
-rw-r--r--tools/perf/util/hashmap.c3
-rw-r--r--tools/perf/util/header.c136
-rw-r--r--tools/perf/util/header.h18
-rw-r--r--tools/perf/util/hist.c6
-rw-r--r--tools/perf/util/hist.h5
-rw-r--r--tools/perf/util/include/linux/linkage.h52
-rw-r--r--tools/perf/util/intel-pt-decoder/intel-pt-decoder.c344
-rw-r--r--tools/perf/util/intel-pt-decoder/intel-pt-decoder.h22
-rw-r--r--tools/perf/util/intel-pt-decoder/intel-pt-insn-decoder.c1
-rw-r--r--tools/perf/util/intel-pt-decoder/intel-pt-insn-decoder.h1
-rw-r--r--tools/perf/util/intel-pt-decoder/intel-pt-pkt-decoder.c47
-rw-r--r--tools/perf/util/intel-pt-decoder/intel-pt-pkt-decoder.h3
-rw-r--r--tools/perf/util/intel-pt.c221
-rw-r--r--tools/perf/util/jitdump.c10
-rw-r--r--tools/perf/util/libunwind/arm64.c4
-rw-r--r--tools/perf/util/machine.c196
-rw-r--r--tools/perf/util/machine.h14
-rw-r--r--tools/perf/util/map.c456
-rw-r--r--tools/perf/util/map.h26
-rw-r--r--tools/perf/util/map_symbol.h1
-rw-r--r--tools/perf/util/maps.c403
-rw-r--r--tools/perf/util/maps.h2
-rw-r--r--tools/perf/util/mem-events.c61
-rw-r--r--tools/perf/util/mem-events.h1
-rw-r--r--tools/perf/util/metricgroup.c153
-rw-r--r--tools/perf/util/mmap.c33
-rw-r--r--tools/perf/util/mmap.h6
-rw-r--r--tools/perf/util/namespaces.c124
-rw-r--r--tools/perf/util/namespaces.h12
-rw-r--r--tools/perf/util/off_cpu.h29
-rw-r--r--tools/perf/util/ordered-events.c3
-rw-r--r--tools/perf/util/ordered-events.h3
-rw-r--r--tools/perf/util/parse-events-hybrid.c9
-rw-r--r--tools/perf/util/parse-events.c136
-rw-r--r--tools/perf/util/parse-events.h1
-rw-r--r--tools/perf/util/parse-events.l4
-rw-r--r--tools/perf/util/parse-events.y17
-rw-r--r--tools/perf/util/path.c14
-rw-r--r--tools/perf/util/path.h1
-rw-r--r--tools/perf/util/perf_api_probe.c19
-rw-r--r--tools/perf/util/perf_event_attr_fprintf.c2
-rw-r--r--tools/perf/util/perf_regs.c668
-rw-r--r--tools/perf/util/perf_regs.h17
-rw-r--r--tools/perf/util/pmu.c14
-rw-r--r--tools/perf/util/probe-event.c5
-rw-r--r--tools/perf/util/python-ext-sources1
-rw-r--r--tools/perf/util/python.c25
-rw-r--r--tools/perf/util/record.c15
-rw-r--r--tools/perf/util/record.h2
-rw-r--r--tools/perf/util/scripting-engines/trace-event-perl.c2
-rw-r--r--tools/perf/util/scripting-engines/trace-event-python.c95
-rw-r--r--tools/perf/util/session.c299
-rw-r--r--tools/perf/util/session.h3
-rw-r--r--tools/perf/util/setup.py8
-rw-r--r--tools/perf/util/sideband_evlist.c3
-rw-r--r--tools/perf/util/smt.c73
-rw-r--r--tools/perf/util/sort.c168
-rw-r--r--tools/perf/util/sort.h5
-rw-r--r--tools/perf/util/stat-display.c203
-rw-r--r--tools/perf/util/stat-shadow.c337
-rw-r--r--tools/perf/util/stat.c81
-rw-r--r--tools/perf/util/stat.h31
-rw-r--r--tools/perf/util/svghelper.c10
-rw-r--r--tools/perf/util/symbol-elf.c2
-rw-r--r--tools/perf/util/symbol.c55
-rw-r--r--tools/perf/util/symbol.h3
-rw-r--r--tools/perf/util/symbol_conf.h3
-rw-r--r--tools/perf/util/synthetic-events.c45
-rw-r--r--tools/perf/util/synthetic-events.h3
-rw-r--r--tools/perf/util/tool.h3
-rw-r--r--tools/perf/util/top.c8
-rw-r--r--tools/perf/util/top.h5
-rw-r--r--tools/perf/util/topdown.c17
-rw-r--r--tools/perf/util/topdown.h3
-rw-r--r--tools/perf/util/trace-event-parse.c2
-rw-r--r--tools/perf/util/unwind-libdw.c10
-rw-r--r--tools/perf/util/unwind-libdw.h1
-rw-r--r--tools/perf/util/unwind-libunwind-local.c115
-rw-r--r--tools/perf/util/unwind-libunwind.c6
-rw-r--r--tools/perf/util/unwind.h13
-rw-r--r--tools/perf/util/util.c51
-rw-r--r--tools/perf/util/util.h13
134 files changed, 6298 insertions, 1956 deletions
diff --git a/tools/perf/util/Build b/tools/perf/util/Build
index 2e5bfbb69960..a51267d88ca9 100644
--- a/tools/perf/util/Build
+++ b/tools/perf/util/Build
@@ -1,3 +1,4 @@
+perf-y += arm64-frame-pointer-unwind-support.o
perf-y += annotate.o
perf-y += block-info.o
perf-y += block-range.o
@@ -55,6 +56,7 @@ perf-y += debug.o
perf-y += fncache.o
perf-y += machine.o
perf-y += map.o
+perf-y += maps.o
perf-y += pstack.o
perf-y += session.o
perf-y += sample-raw.o
@@ -144,6 +146,8 @@ perf-$(CONFIG_LIBBPF) += bpf-loader.o
perf-$(CONFIG_LIBBPF) += bpf_map.o
perf-$(CONFIG_PERF_BPF_SKEL) += bpf_counter.o
perf-$(CONFIG_PERF_BPF_SKEL) += bpf_counter_cgroup.o
+perf-$(CONFIG_PERF_BPF_SKEL) += bpf_ftrace.o
+perf-$(CONFIG_PERF_BPF_SKEL) += bpf_off_cpu.o
perf-$(CONFIG_BPF_PROLOGUE) += bpf-prologue.o
perf-$(CONFIG_LIBELF) += symbol-elf.o
perf-$(CONFIG_LIBELF) += probe-file.o
diff --git a/tools/perf/util/PERF-VERSION-GEN b/tools/perf/util/PERF-VERSION-GEN
index 59241ff342be..0ee5af529238 100755
--- a/tools/perf/util/PERF-VERSION-GEN
+++ b/tools/perf/util/PERF-VERSION-GEN
@@ -11,23 +11,18 @@ LF='
'
#
-# First check if there is a .git to get the version from git describe
-# otherwise try to get the version from the kernel Makefile
+# Always try first to get the version from the kernel Makefile
#
CID=
TAG=
if test -d ../../.git -o -f ../../.git
then
- TAG=$(git describe --abbrev=0 --match "v[0-9].[0-9]*" 2>/dev/null )
+ TAG=$(MAKEFLAGS= make -sC ../.. kernelversion)
CID=$(git log -1 --abbrev=12 --pretty=format:"%h" 2>/dev/null) && CID="-g$CID"
-elif test -f ../../PERF-VERSION-FILE
-then
+else
TAG=$(cut -d' ' -f3 ../../PERF-VERSION-FILE | sed -e 's/\"//g')
fi
-if test -z "$TAG"
-then
- TAG=$(MAKEFLAGS= make -sC ../.. kernelversion)
-fi
+
VN="$TAG$CID"
if test -n "$CID"
then
diff --git a/tools/perf/util/affinity.c b/tools/perf/util/affinity.c
index 7b12bd7a3080..4d216c0dc425 100644
--- a/tools/perf/util/affinity.c
+++ b/tools/perf/util/affinity.c
@@ -11,7 +11,7 @@
static int get_cpu_set_size(void)
{
- int sz = cpu__max_cpu() + 8 - 1;
+ int sz = cpu__max_cpu().cpu + 8 - 1;
/*
* sched_getaffinity doesn't like masks smaller than the kernel.
* Hopefully that's big enough.
@@ -62,7 +62,7 @@ void affinity__set(struct affinity *a, int cpu)
clear_bit(cpu, a->sched_cpus);
}
-void affinity__cleanup(struct affinity *a)
+static void __affinity__cleanup(struct affinity *a)
{
int cpu_set_size = get_cpu_set_size();
@@ -71,3 +71,9 @@ void affinity__cleanup(struct affinity *a)
zfree(&a->sched_cpus);
zfree(&a->orig_cpus);
}
+
+void affinity__cleanup(struct affinity *a)
+{
+ if (a != NULL)
+ __affinity__cleanup(a);
+}
diff --git a/tools/perf/util/annotate.c b/tools/perf/util/annotate.c
index 01900689dc00..82cc396ef516 100644
--- a/tools/perf/util/annotate.c
+++ b/tools/perf/util/annotate.c
@@ -33,6 +33,7 @@
#include "string2.h"
#include "util/event.h"
#include "arch/common.h"
+#include "namespaces.h"
#include <regex.h>
#include <pthread.h>
#include <linux/bitops.h>
@@ -1696,6 +1697,15 @@ fallback:
* DSO is the same as when 'perf record' ran.
*/
__symbol__join_symfs(filename, filename_size, dso->long_name);
+
+ if (access(filename, R_OK) && errno == ENOENT && dso->nsinfo) {
+ char *new_name = filename_with_chroot(dso->nsinfo->pid,
+ filename);
+ if (new_name) {
+ strlcpy(filename, new_name, filename_size);
+ free(new_name);
+ }
+ }
}
free(build_id_path);
@@ -2036,6 +2046,8 @@ static int symbol__disassemble(struct symbol *sym, struct annotate_args *args)
memset(&objdump_process, 0, sizeof(objdump_process));
objdump_process.argv = objdump_argv;
objdump_process.out = -1;
+ objdump_process.err = -1;
+ objdump_process.no_stderr = 1;
if (start_command(&objdump_process)) {
pr_err("Failure starting to run %s\n", command);
err = -1;
diff --git a/tools/perf/util/arm-spe-decoder/arm-spe-decoder.c b/tools/perf/util/arm-spe-decoder/arm-spe-decoder.c
index 3fc528c9270c..5e390a1a79ab 100644
--- a/tools/perf/util/arm-spe-decoder/arm-spe-decoder.c
+++ b/tools/perf/util/arm-spe-decoder/arm-spe-decoder.c
@@ -179,6 +179,8 @@ static int arm_spe_read_record(struct arm_spe_decoder *decoder)
decoder->record.phys_addr = ip;
break;
case ARM_SPE_COUNTER:
+ if (idx == SPE_CNT_PKT_HDR_INDEX_TOTAL_LAT)
+ decoder->record.latency = payload;
break;
case ARM_SPE_CONTEXT:
decoder->record.context_id = payload;
diff --git a/tools/perf/util/arm-spe-decoder/arm-spe-decoder.h b/tools/perf/util/arm-spe-decoder/arm-spe-decoder.h
index 46a8556a9e95..69b31084d6be 100644
--- a/tools/perf/util/arm-spe-decoder/arm-spe-decoder.h
+++ b/tools/perf/util/arm-spe-decoder/arm-spe-decoder.h
@@ -33,6 +33,7 @@ struct arm_spe_record {
enum arm_spe_sample_type type;
int err;
u32 op;
+ u32 latency;
u64 from_ip;
u64 to_ip;
u64 timestamp;
diff --git a/tools/perf/util/arm-spe.c b/tools/perf/util/arm-spe.c
index fccac06b573a..d040406f3314 100644
--- a/tools/perf/util/arm-spe.c
+++ b/tools/perf/util/arm-spe.c
@@ -58,6 +58,8 @@ struct arm_spe {
u8 sample_branch;
u8 sample_remote_access;
u8 sample_memory;
+ u8 sample_instructions;
+ u64 instructions_sample_period;
u64 l1d_miss_id;
u64 l1d_access_id;
@@ -68,6 +70,7 @@ struct arm_spe {
u64 branch_miss_id;
u64 remote_access_id;
u64 memory_id;
+ u64 instructions_id;
u64 kernel_start;
@@ -90,6 +93,7 @@ struct arm_spe_queue {
u64 time;
u64 timestamp;
struct thread *thread;
+ u64 period_instructions;
};
static void arm_spe_dump(struct arm_spe *spe __maybe_unused,
@@ -202,6 +206,7 @@ static struct arm_spe_queue *arm_spe__alloc_queue(struct arm_spe *spe,
speq->pid = -1;
speq->tid = -1;
speq->cpu = -1;
+ speq->period_instructions = 0;
/* params set */
params.get_trace = arm_spe_get_trace;
@@ -330,6 +335,7 @@ static int arm_spe__synth_mem_sample(struct arm_spe_queue *speq,
sample.addr = record->virt_addr;
sample.phys_addr = record->phys_addr;
sample.data_src = data_src;
+ sample.weight = record->latency;
return arm_spe_deliver_synth_event(spe, speq, event, &sample);
}
@@ -347,20 +353,38 @@ static int arm_spe__synth_branch_sample(struct arm_spe_queue *speq,
sample.id = spe_events_id;
sample.stream_id = spe_events_id;
sample.addr = record->to_ip;
+ sample.weight = record->latency;
return arm_spe_deliver_synth_event(spe, speq, event, &sample);
}
-#define SPE_MEM_TYPE (ARM_SPE_L1D_ACCESS | ARM_SPE_L1D_MISS | \
- ARM_SPE_LLC_ACCESS | ARM_SPE_LLC_MISS | \
- ARM_SPE_REMOTE_ACCESS)
-
-static bool arm_spe__is_memory_event(enum arm_spe_sample_type type)
+static int arm_spe__synth_instruction_sample(struct arm_spe_queue *speq,
+ u64 spe_events_id, u64 data_src)
{
- if (type & SPE_MEM_TYPE)
- return true;
+ struct arm_spe *spe = speq->spe;
+ struct arm_spe_record *record = &speq->decoder->record;
+ union perf_event *event = speq->event_buf;
+ struct perf_sample sample = { .ip = 0, };
+
+ /*
+ * Handles perf instruction sampling period.
+ */
+ speq->period_instructions++;
+ if (speq->period_instructions < spe->instructions_sample_period)
+ return 0;
+ speq->period_instructions = 0;
+
+ arm_spe_prep_sample(spe, speq, event, &sample);
+
+ sample.id = spe_events_id;
+ sample.stream_id = spe_events_id;
+ sample.addr = record->virt_addr;
+ sample.phys_addr = record->phys_addr;
+ sample.data_src = data_src;
+ sample.period = spe->instructions_sample_period;
+ sample.weight = record->latency;
- return false;
+ return arm_spe_deliver_synth_event(spe, speq, event, &sample);
}
static u64 arm_spe__synth_data_source(const struct arm_spe_record *record)
@@ -369,8 +393,10 @@ static u64 arm_spe__synth_data_source(const struct arm_spe_record *record)
if (record->op == ARM_SPE_LD)
data_src.mem_op = PERF_MEM_OP_LOAD;
- else
+ else if (record->op == ARM_SPE_ST)
data_src.mem_op = PERF_MEM_OP_STORE;
+ else
+ return 0;
if (record->type & (ARM_SPE_LLC_ACCESS | ARM_SPE_LLC_MISS)) {
data_src.mem_lvl = PERF_MEM_LVL_L3;
@@ -474,12 +500,22 @@ static int arm_spe_sample(struct arm_spe_queue *speq)
return err;
}
- if (spe->sample_memory && arm_spe__is_memory_event(record->type)) {
+ /*
+ * When data_src is zero it means the record is not a memory operation,
+ * skip to synthesize memory sample for this case.
+ */
+ if (spe->sample_memory && data_src) {
err = arm_spe__synth_mem_sample(speq, spe->memory_id, data_src);
if (err)
return err;
}
+ if (spe->sample_instructions) {
+ err = arm_spe__synth_instruction_sample(speq, spe->instructions_id, data_src);
+ if (err)
+ return err;
+ }
+
return 0;
}
@@ -991,9 +1027,11 @@ arm_spe_synth_events(struct arm_spe *spe, struct perf_session *session)
memset(&attr, 0, sizeof(struct perf_event_attr));
attr.size = sizeof(struct perf_event_attr);
attr.type = PERF_TYPE_HARDWARE;
- attr.sample_type = evsel->core.attr.sample_type & PERF_SAMPLE_MASK;
+ attr.sample_type = evsel->core.attr.sample_type &
+ (PERF_SAMPLE_MASK | PERF_SAMPLE_PHYS_ADDR);
attr.sample_type |= PERF_SAMPLE_IP | PERF_SAMPLE_TID |
- PERF_SAMPLE_PERIOD | PERF_SAMPLE_DATA_SRC;
+ PERF_SAMPLE_PERIOD | PERF_SAMPLE_DATA_SRC |
+ PERF_SAMPLE_WEIGHT | PERF_SAMPLE_ADDR;
if (spe->timeless_decoding)
attr.sample_type &= ~(u64)PERF_SAMPLE_TIME;
else
@@ -1107,7 +1145,29 @@ arm_spe_synth_events(struct arm_spe *spe, struct perf_session *session)
return err;
spe->memory_id = id;
arm_spe_set_event_name(evlist, id, "memory");
+ id += 1;
+ }
+
+ if (spe->synth_opts.instructions) {
+ if (spe->synth_opts.period_type != PERF_ITRACE_PERIOD_INSTRUCTIONS) {
+ pr_warning("Only instruction-based sampling period is currently supported by Arm SPE.\n");
+ goto synth_instructions_out;
+ }
+ if (spe->synth_opts.period > 1)
+ pr_warning("Arm SPE has a hardware-based sample period.\n"
+ "Additional instruction events will be discarded by --itrace\n");
+
+ spe->sample_instructions = true;
+ attr.config = PERF_COUNT_HW_INSTRUCTIONS;
+ attr.sample_period = spe->synth_opts.period;
+ spe->instructions_sample_period = attr.sample_period;
+ err = arm_spe_synth_event(session, &attr, id);
+ if (err)
+ return err;
+ spe->instructions_id = id;
+ arm_spe_set_event_name(evlist, id, "instructions");
}
+synth_instructions_out:
return 0;
}
diff --git a/tools/perf/util/arm64-frame-pointer-unwind-support.c b/tools/perf/util/arm64-frame-pointer-unwind-support.c
new file mode 100644
index 000000000000..4940be4a0569
--- /dev/null
+++ b/tools/perf/util/arm64-frame-pointer-unwind-support.c
@@ -0,0 +1,63 @@
+// SPDX-License-Identifier: GPL-2.0
+#include "arm64-frame-pointer-unwind-support.h"
+#include "callchain.h"
+#include "event.h"
+#include "perf_regs.h" // SMPL_REG_MASK
+#include "unwind.h"
+
+#define perf_event_arm_regs perf_event_arm64_regs
+#include "../../arch/arm64/include/uapi/asm/perf_regs.h"
+#undef perf_event_arm_regs
+
+struct entries {
+ u64 stack[2];
+ size_t length;
+};
+
+static bool get_leaf_frame_caller_enabled(struct perf_sample *sample)
+{
+ return callchain_param.record_mode == CALLCHAIN_FP && sample->user_regs.regs
+ && sample->user_regs.mask & SMPL_REG_MASK(PERF_REG_ARM64_LR);
+}
+
+static int add_entry(struct unwind_entry *entry, void *arg)
+{
+ struct entries *entries = arg;
+
+ entries->stack[entries->length++] = entry->ip;
+ return 0;
+}
+
+u64 get_leaf_frame_caller_aarch64(struct perf_sample *sample, struct thread *thread, int usr_idx)
+{
+ int ret;
+ struct entries entries = {};
+ struct regs_dump old_regs = sample->user_regs;
+
+ if (!get_leaf_frame_caller_enabled(sample))
+ return 0;
+
+ /*
+ * If PC and SP are not recorded, get the value of PC from the stack
+ * and set its mask. SP is not used when doing the unwinding but it
+ * still needs to be set to prevent failures.
+ */
+
+ if (!(sample->user_regs.mask & SMPL_REG_MASK(PERF_REG_ARM64_PC))) {
+ sample->user_regs.cache_mask |= SMPL_REG_MASK(PERF_REG_ARM64_PC);
+ sample->user_regs.cache_regs[PERF_REG_ARM64_PC] = sample->callchain->ips[usr_idx+1];
+ }
+
+ if (!(sample->user_regs.mask & SMPL_REG_MASK(PERF_REG_ARM64_SP))) {
+ sample->user_regs.cache_mask |= SMPL_REG_MASK(PERF_REG_ARM64_SP);
+ sample->user_regs.cache_regs[PERF_REG_ARM64_SP] = 0;
+ }
+
+ ret = unwind__get_entries(add_entry, &entries, thread, sample, 2, true);
+ sample->user_regs = old_regs;
+
+ if (ret || entries.length != 2)
+ return ret;
+
+ return callchain_param.order == ORDER_CALLER ? entries.stack[0] : entries.stack[1];
+}
diff --git a/tools/perf/util/arm64-frame-pointer-unwind-support.h b/tools/perf/util/arm64-frame-pointer-unwind-support.h
new file mode 100644
index 000000000000..32af9ce94398
--- /dev/null
+++ b/tools/perf/util/arm64-frame-pointer-unwind-support.h
@@ -0,0 +1,10 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef __PERF_ARM_FRAME_POINTER_UNWIND_SUPPORT_H
+#define __PERF_ARM_FRAME_POINTER_UNWIND_SUPPORT_H
+
+#include "event.h"
+#include "thread.h"
+
+u64 get_leaf_frame_caller_aarch64(struct perf_sample *sample, struct thread *thread, int user_idx);
+
+#endif /* __PERF_ARM_FRAME_POINTER_UNWIND_SUPPORT_H */
diff --git a/tools/perf/util/auxtrace.c b/tools/perf/util/auxtrace.c
index c679394b898d..511dd3caa1bc 100644
--- a/tools/perf/util/auxtrace.c
+++ b/tools/perf/util/auxtrace.c
@@ -123,9 +123,9 @@ int auxtrace_mmap__mmap(struct auxtrace_mmap *mm,
mm->prev = 0;
mm->idx = mp->idx;
mm->tid = mp->tid;
- mm->cpu = mp->cpu;
+ mm->cpu = mp->cpu.cpu;
- if (!mp->len) {
+ if (!mp->len || !mp->mmap_needed) {
mm->base = NULL;
return 0;
}
@@ -168,19 +168,26 @@ void auxtrace_mmap_params__init(struct auxtrace_mmap_params *mp,
}
void auxtrace_mmap_params__set_idx(struct auxtrace_mmap_params *mp,
- struct evlist *evlist, int idx,
- bool per_cpu)
+ struct evlist *evlist,
+ struct evsel *evsel, int idx)
{
+ bool per_cpu = !perf_cpu_map__empty(evlist->core.user_requested_cpus);
+
+ mp->mmap_needed = evsel->needs_auxtrace_mmap;
+
+ if (!mp->mmap_needed)
+ return;
+
mp->idx = idx;
if (per_cpu) {
- mp->cpu = evlist->core.cpus->map[idx];
+ mp->cpu = perf_cpu_map__cpu(evlist->core.all_cpus, idx);
if (evlist->core.threads)
mp->tid = perf_thread_map__pid(evlist->core.threads, 0);
else
mp->tid = -1;
} else {
- mp->cpu = -1;
+ mp->cpu.cpu = -1;
mp->tid = perf_thread_map__pid(evlist->core.threads, idx);
}
}
@@ -292,7 +299,7 @@ static int auxtrace_queues__queue_buffer(struct auxtrace_queues *queues,
if (!queue->set) {
queue->set = true;
queue->tid = buffer->tid;
- queue->cpu = buffer->cpu;
+ queue->cpu = buffer->cpu.cpu;
}
buffer->buffer_nr = queues->next_buffer_nr++;
@@ -339,11 +346,11 @@ static int auxtrace_queues__split_buffer(struct auxtrace_queues *queues,
return 0;
}
-static bool filter_cpu(struct perf_session *session, int cpu)
+static bool filter_cpu(struct perf_session *session, struct perf_cpu cpu)
{
unsigned long *cpu_bitmap = session->itrace_synth_opts->cpu_bitmap;
- return cpu_bitmap && cpu != -1 && !test_bit(cpu, cpu_bitmap);
+ return cpu_bitmap && cpu.cpu != -1 && !test_bit(cpu.cpu, cpu_bitmap);
}
static int auxtrace_queues__add_buffer(struct auxtrace_queues *queues,
@@ -399,7 +406,7 @@ int auxtrace_queues__add_event(struct auxtrace_queues *queues,
struct auxtrace_buffer buffer = {
.pid = -1,
.tid = event->auxtrace.tid,
- .cpu = event->auxtrace.cpu,
+ .cpu = { event->auxtrace.cpu },
.data_offset = data_offset,
.offset = event->auxtrace.offset,
.reference = event->auxtrace.reference,
@@ -636,6 +643,22 @@ int auxtrace_parse_snapshot_options(struct auxtrace_record *itr,
return -EINVAL;
}
+static int evlist__enable_event_idx(struct evlist *evlist, struct evsel *evsel, int idx)
+{
+ bool per_cpu_mmaps = !perf_cpu_map__empty(evlist->core.user_requested_cpus);
+
+ if (per_cpu_mmaps) {
+ struct perf_cpu evlist_cpu = perf_cpu_map__cpu(evlist->core.all_cpus, idx);
+ int cpu_map_idx = perf_cpu_map__idx(evsel->core.cpus, evlist_cpu);
+
+ if (cpu_map_idx == -1)
+ return -EINVAL;
+ return perf_evsel__enable_cpu(&evsel->core, cpu_map_idx);
+ }
+
+ return perf_evsel__enable_thread(&evsel->core, idx);
+}
+
int auxtrace_record__read_finish(struct auxtrace_record *itr, int idx)
{
struct evsel *evsel;
@@ -1333,6 +1356,7 @@ void itrace_synth_opts__set_default(struct itrace_synth_opts *synth_opts,
synth_opts->ptwrites = true;
synth_opts->pwr_events = true;
synth_opts->other_events = true;
+ synth_opts->intr_events = true;
synth_opts->errors = true;
synth_opts->flc = true;
synth_opts->llc = true;
@@ -1479,6 +1503,9 @@ int itrace_do_parse_synth_opts(struct itrace_synth_opts *synth_opts,
case 'o':
synth_opts->other_events = true;
break;
+ case 'I':
+ synth_opts->intr_events = true;
+ break;
case 'e':
synth_opts->errors = true;
if (get_flags(&p, &synth_opts->error_plus_flags,
diff --git a/tools/perf/util/auxtrace.h b/tools/perf/util/auxtrace.h
index bbf0d78c6401..cd0d25c2751c 100644
--- a/tools/perf/util/auxtrace.h
+++ b/tools/perf/util/auxtrace.h
@@ -15,6 +15,7 @@
#include <linux/list.h>
#include <linux/perf_event.h>
#include <linux/types.h>
+#include <internal/cpumap.h>
#include <asm/bitsperlong.h>
#include <asm/barrier.h>
@@ -75,6 +76,7 @@ enum itrace_period_type {
* @pwr_events: whether to synthesize power events
* @other_events: whether to synthesize other events recorded due to the use of
* aux_output
+ * @intr_events: whether to synthesize interrupt events
* @errors: whether to synthesize decoder error events
* @dont_decode: whether to skip decoding entirely
* @log: write a decoding log
@@ -119,6 +121,7 @@ struct itrace_synth_opts {
bool ptwrites;
bool pwr_events;
bool other_events;
+ bool intr_events;
bool errors;
bool dont_decode;
bool log;
@@ -240,7 +243,7 @@ struct auxtrace_buffer {
size_t size;
pid_t pid;
pid_t tid;
- int cpu;
+ struct perf_cpu cpu;
void *data;
off_t data_offset;
void *mmap_addr;
@@ -341,6 +344,10 @@ struct auxtrace_mmap {
* @idx: index of this mmap
* @tid: tid for a per-thread mmap (also set if there is only 1 tid on a per-cpu
* mmap) otherwise %0
+ * @mmap_needed: set to %false for non-auxtrace events. This is needed because
+ * auxtrace mmapping is done in the same code path as non-auxtrace
+ * mmapping but not every evsel that needs non-auxtrace mmapping
+ * also needs auxtrace mmapping.
* @cpu: cpu number for a per-cpu mmap otherwise %-1
*/
struct auxtrace_mmap_params {
@@ -350,7 +357,8 @@ struct auxtrace_mmap_params {
int prot;
int idx;
pid_t tid;
- int cpu;
+ bool mmap_needed;
+ struct perf_cpu cpu;
};
/**
@@ -487,8 +495,8 @@ void auxtrace_mmap_params__init(struct auxtrace_mmap_params *mp,
unsigned int auxtrace_pages,
bool auxtrace_overwrite);
void auxtrace_mmap_params__set_idx(struct auxtrace_mmap_params *mp,
- struct evlist *evlist, int idx,
- bool per_cpu);
+ struct evlist *evlist,
+ struct evsel *evsel, int idx);
typedef int (*process_auxtrace_t)(struct perf_tool *tool,
struct mmap *map,
@@ -635,6 +643,8 @@ bool auxtrace__evsel_is_auxtrace(struct perf_session *session,
" p: synthesize power events\n" \
" o: synthesize other events recorded due to the use\n" \
" of aux-output (refer to perf record)\n" \
+" I: synthesize interrupt or similar (asynchronous) events\n" \
+" (e.g. Intel PT Event Trace)\n" \
" e[flags]: synthesize error events\n" \
" each flag must be preceded by + or -\n" \
" error flags are: o (overflow)\n" \
@@ -858,8 +868,8 @@ void auxtrace_mmap_params__init(struct auxtrace_mmap_params *mp,
unsigned int auxtrace_pages,
bool auxtrace_overwrite);
void auxtrace_mmap_params__set_idx(struct auxtrace_mmap_params *mp,
- struct evlist *evlist, int idx,
- bool per_cpu);
+ struct evlist *evlist,
+ struct evsel *evsel, int idx);
#define ITRACE_HELP ""
diff --git a/tools/perf/util/bpf-event.c b/tools/perf/util/bpf-event.c
index a517eaa51eb3..eee64ddb766d 100644
--- a/tools/perf/util/bpf-event.c
+++ b/tools/perf/util/bpf-event.c
@@ -22,7 +22,8 @@
#include "record.h"
#include "util/synthetic-events.h"
-struct btf * __weak btf__load_from_kernel_by_id(__u32 id)
+#ifndef HAVE_LIBBPF_BTF__LOAD_FROM_KERNEL_BY_ID
+struct btf *btf__load_from_kernel_by_id(__u32 id)
{
struct btf *btf;
#pragma GCC diagnostic push
@@ -32,8 +33,25 @@ struct btf * __weak btf__load_from_kernel_by_id(__u32 id)
return err ? ERR_PTR(err) : btf;
}
+#endif
+
+#ifndef HAVE_LIBBPF_BPF_PROG_LOAD
+int bpf_prog_load(enum bpf_prog_type prog_type,
+ const char *prog_name __maybe_unused,
+ const char *license,
+ const struct bpf_insn *insns, size_t insn_cnt,
+ const struct bpf_prog_load_opts *opts)
+{
+#pragma GCC diagnostic push
+#pragma GCC diagnostic ignored "-Wdeprecated-declarations"
+ return bpf_load_program(prog_type, insns, insn_cnt, license,
+ opts->kern_version, opts->log_buf, opts->log_size);
+#pragma GCC diagnostic pop
+}
+#endif
-struct bpf_program * __weak
+#ifndef HAVE_LIBBPF_BPF_OBJECT__NEXT_PROGRAM
+struct bpf_program *
bpf_object__next_program(const struct bpf_object *obj, struct bpf_program *prev)
{
#pragma GCC diagnostic push
@@ -41,8 +59,10 @@ bpf_object__next_program(const struct bpf_object *obj, struct bpf_program *prev)
return bpf_program__next(prev, obj);
#pragma GCC diagnostic pop
}
+#endif
-struct bpf_map * __weak
+#ifndef HAVE_LIBBPF_BPF_OBJECT__NEXT_MAP
+struct bpf_map *
bpf_object__next_map(const struct bpf_object *obj, const struct bpf_map *prev)
{
#pragma GCC diagnostic push
@@ -50,8 +70,10 @@ bpf_object__next_map(const struct bpf_object *obj, const struct bpf_map *prev)
return bpf_map__next(prev, obj);
#pragma GCC diagnostic pop
}
+#endif
-const void * __weak
+#ifndef HAVE_LIBBPF_BTF__RAW_DATA
+const void *
btf__raw_data(const struct btf *btf_ro, __u32 *size)
{
#pragma GCC diagnostic push
@@ -59,6 +81,7 @@ btf__raw_data(const struct btf *btf_ro, __u32 *size)
return btf__get_raw_data(btf_ro, size);
#pragma GCC diagnostic pop
}
+#endif
static int snprintf_hex(char *buf, size_t size, unsigned char *data, size_t len)
{
@@ -92,7 +115,7 @@ static int machine__process_bpf_event_load(struct machine *machine,
for (i = 0; i < info_linear->info.nr_jited_ksyms; i++) {
u64 *addrs = (u64 *)(uintptr_t)(info_linear->info.jited_ksyms);
u64 addr = addrs[i];
- struct map *map = maps__find(&machine->kmaps, addr);
+ struct map *map = maps__find(machine__kernel_maps(machine), addr);
if (map) {
map->dso->binary_type = DSO_BINARY_TYPE__BPF_PROG_INFO;
diff --git a/tools/perf/util/bpf-loader.c b/tools/perf/util/bpf-loader.c
index 528aeb0ab79d..f8ad581ea247 100644
--- a/tools/perf/util/bpf-loader.c
+++ b/tools/perf/util/bpf-loader.c
@@ -26,6 +26,8 @@
#include "util.h"
#include "llvm-utils.h"
#include "c++/clang-c.h"
+#include "hashmap.h"
+#include "asm/bug.h"
#include <internal/xyarray.h>
@@ -49,38 +51,107 @@ struct bpf_prog_priv {
int *type_mapping;
};
+struct bpf_perf_object {
+ struct list_head list;
+ struct bpf_object *obj;
+};
+
+static LIST_HEAD(bpf_objects_list);
+static struct hashmap *bpf_program_hash;
+static struct hashmap *bpf_map_hash;
+
+static struct bpf_perf_object *
+bpf_perf_object__next(struct bpf_perf_object *prev)
+{
+ struct bpf_perf_object *next;
+
+ if (!prev)
+ next = list_first_entry(&bpf_objects_list,
+ struct bpf_perf_object,
+ list);
+ else
+ next = list_next_entry(prev, list);
+
+ /* Empty list is noticed here so don't need checking on entry. */
+ if (&next->list == &bpf_objects_list)
+ return NULL;
+
+ return next;
+}
+
+#define bpf_perf_object__for_each(perf_obj, tmp) \
+ for ((perf_obj) = bpf_perf_object__next(NULL), \
+ (tmp) = bpf_perf_object__next(perf_obj); \
+ (perf_obj) != NULL; \
+ (perf_obj) = (tmp), (tmp) = bpf_perf_object__next(tmp))
+
static bool libbpf_initialized;
+static int bpf_perf_object__add(struct bpf_object *obj)
+{
+ struct bpf_perf_object *perf_obj = zalloc(sizeof(*perf_obj));
+
+ if (perf_obj) {
+ INIT_LIST_HEAD(&perf_obj->list);
+ perf_obj->obj = obj;
+ list_add_tail(&perf_obj->list, &bpf_objects_list);
+ }
+ return perf_obj ? 0 : -ENOMEM;
+}
+
+static int libbpf_init(void)
+{
+ if (libbpf_initialized)
+ return 0;
+
+ libbpf_set_print(libbpf_perf_print);
+ libbpf_initialized = true;
+ return 0;
+}
+
struct bpf_object *
bpf__prepare_load_buffer(void *obj_buf, size_t obj_buf_sz, const char *name)
{
+ LIBBPF_OPTS(bpf_object_open_opts, opts, .object_name = name);
struct bpf_object *obj;
+ int err;
- if (!libbpf_initialized) {
- libbpf_set_print(libbpf_perf_print);
- libbpf_initialized = true;
- }
+ err = libbpf_init();
+ if (err)
+ return ERR_PTR(err);
- obj = bpf_object__open_buffer(obj_buf, obj_buf_sz, name);
+ obj = bpf_object__open_mem(obj_buf, obj_buf_sz, &opts);
if (IS_ERR_OR_NULL(obj)) {
pr_debug("bpf: failed to load buffer\n");
return ERR_PTR(-EINVAL);
}
+ if (bpf_perf_object__add(obj)) {
+ bpf_object__close(obj);
+ return ERR_PTR(-ENOMEM);
+ }
+
return obj;
}
+static void bpf_perf_object__close(struct bpf_perf_object *perf_obj)
+{
+ list_del(&perf_obj->list);
+ bpf_object__close(perf_obj->obj);
+ free(perf_obj);
+}
+
struct bpf_object *bpf__prepare_load(const char *filename, bool source)
{
+ LIBBPF_OPTS(bpf_object_open_opts, opts, .object_name = filename);
struct bpf_object *obj;
+ int err;
- if (!libbpf_initialized) {
- libbpf_set_print(libbpf_perf_print);
- libbpf_initialized = true;
- }
+ err = libbpf_init();
+ if (err)
+ return ERR_PTR(err);
if (source) {
- int err;
void *obj_buf;
size_t obj_buf_sz;
@@ -94,35 +165,31 @@ struct bpf_object *bpf__prepare_load(const char *filename, bool source)
return ERR_PTR(-BPF_LOADER_ERRNO__COMPILE);
} else
pr_debug("bpf: successful builtin compilation\n");
- obj = bpf_object__open_buffer(obj_buf, obj_buf_sz, filename);
+ obj = bpf_object__open_mem(obj_buf, obj_buf_sz, &opts);
if (!IS_ERR_OR_NULL(obj) && llvm_param.dump_obj)
llvm__dump_obj(filename, obj_buf, obj_buf_sz);
free(obj_buf);
- } else
+ } else {
obj = bpf_object__open(filename);
+ }
if (IS_ERR_OR_NULL(obj)) {
pr_debug("bpf: failed to load %s\n", filename);
return obj;
}
- return obj;
-}
-
-void bpf__clear(void)
-{
- struct bpf_object *obj, *tmp;
-
- bpf_object__for_each_safe(obj, tmp) {
- bpf__unprobe(obj);
+ if (bpf_perf_object__add(obj)) {
bpf_object__close(obj);
+ return ERR_PTR(-BPF_LOADER_ERRNO__COMPILE);
}
+
+ return obj;
}
static void
-clear_prog_priv(struct bpf_program *prog __maybe_unused,
+clear_prog_priv(const struct bpf_program *prog __maybe_unused,
void *_priv)
{
struct bpf_prog_priv *priv = _priv;
@@ -135,6 +202,83 @@ clear_prog_priv(struct bpf_program *prog __maybe_unused,
free(priv);
}
+static void bpf_program_hash_free(void)
+{
+ struct hashmap_entry *cur;
+ size_t bkt;
+
+ if (IS_ERR_OR_NULL(bpf_program_hash))
+ return;
+
+ hashmap__for_each_entry(bpf_program_hash, cur, bkt)
+ clear_prog_priv(cur->key, cur->value);
+
+ hashmap__free(bpf_program_hash);
+ bpf_program_hash = NULL;
+}
+
+static void bpf_map_hash_free(void);
+
+void bpf__clear(void)
+{
+ struct bpf_perf_object *perf_obj, *tmp;
+
+ bpf_perf_object__for_each(perf_obj, tmp) {
+ bpf__unprobe(perf_obj->obj);
+ bpf_perf_object__close(perf_obj);
+ }
+
+ bpf_program_hash_free();
+ bpf_map_hash_free();
+}
+
+static size_t ptr_hash(const void *__key, void *ctx __maybe_unused)
+{
+ return (size_t) __key;
+}
+
+static bool ptr_equal(const void *key1, const void *key2,
+ void *ctx __maybe_unused)
+{
+ return key1 == key2;
+}
+
+static void *program_priv(const struct bpf_program *prog)
+{
+ void *priv;
+
+ if (IS_ERR_OR_NULL(bpf_program_hash))
+ return NULL;
+ if (!hashmap__find(bpf_program_hash, prog, &priv))
+ return NULL;
+ return priv;
+}
+
+static int program_set_priv(struct bpf_program *prog, void *priv)
+{
+ void *old_priv;
+
+ /*
+ * Should not happen, we warn about it in the
+ * caller function - config_bpf_program
+ */
+ if (IS_ERR(bpf_program_hash))
+ return PTR_ERR(bpf_program_hash);
+
+ if (!bpf_program_hash) {
+ bpf_program_hash = hashmap__new(ptr_hash, ptr_equal, NULL);
+ if (IS_ERR(bpf_program_hash))
+ return PTR_ERR(bpf_program_hash);
+ }
+
+ old_priv = program_priv(prog);
+ if (old_priv) {
+ clear_prog_priv(prog, old_priv);
+ return hashmap__set(bpf_program_hash, prog, priv, NULL, NULL);
+ }
+ return hashmap__add(bpf_program_hash, prog, priv);
+}
+
static int
prog_config__exec(const char *value, struct perf_probe_event *pev)
{
@@ -376,7 +520,7 @@ config_bpf_program(struct bpf_program *prog)
pr_debug("bpf: config '%s' is ok\n", config_str);
set_priv:
- err = bpf_program__set_priv(prog, priv, clear_prog_priv);
+ err = program_set_priv(prog, priv);
if (err) {
pr_debug("Failed to set priv for program '%s'\n", config_str);
goto errout;
@@ -417,14 +561,14 @@ preproc_gen_prologue(struct bpf_program *prog, int n,
struct bpf_insn *orig_insns, int orig_insns_cnt,
struct bpf_prog_prep_result *res)
{
- struct bpf_prog_priv *priv = bpf_program__priv(prog);
+ struct bpf_prog_priv *priv = program_priv(prog);
struct probe_trace_event *tev;
struct perf_probe_event *pev;
struct bpf_insn *buf;
size_t prologue_cnt = 0;
int i, err;
- if (IS_ERR(priv) || !priv || priv->is_tp)
+ if (IS_ERR_OR_NULL(priv) || priv->is_tp)
goto errout;
pev = &priv->pev;
@@ -568,12 +712,12 @@ static int map_prologue(struct perf_probe_event *pev, int *mapping,
static int hook_load_preprocessor(struct bpf_program *prog)
{
- struct bpf_prog_priv *priv = bpf_program__priv(prog);
+ struct bpf_prog_priv *priv = program_priv(prog);
struct perf_probe_event *pev;
bool need_prologue = false;
int err, i;
- if (IS_ERR(priv) || !priv) {
+ if (IS_ERR_OR_NULL(priv)) {
pr_debug("Internal error when hook preprocessor\n");
return -BPF_LOADER_ERRNO__INTERNAL;
}
@@ -644,18 +788,21 @@ int bpf__probe(struct bpf_object *obj)
if (err)
goto out;
- priv = bpf_program__priv(prog);
- if (IS_ERR(priv) || !priv) {
- err = PTR_ERR(priv);
+ priv = program_priv(prog);
+ if (IS_ERR_OR_NULL(priv)) {
+ if (!priv)
+ err = -BPF_LOADER_ERRNO__INTERNAL;
+ else
+ err = PTR_ERR(priv);
goto out;
}
if (priv->is_tp) {
- bpf_program__set_tracepoint(prog);
+ bpf_program__set_type(prog, BPF_PROG_TYPE_TRACEPOINT);
continue;
}
- bpf_program__set_kprobe(prog);
+ bpf_program__set_type(prog, BPF_PROG_TYPE_KPROBE);
pev = &priv->pev;
err = convert_perf_probe_events(pev, 1);
@@ -693,10 +840,10 @@ int bpf__unprobe(struct bpf_object *obj)
struct bpf_program *prog;
bpf_object__for_each_program(prog, obj) {
- struct bpf_prog_priv *priv = bpf_program__priv(prog);
+ struct bpf_prog_priv *priv = program_priv(prog);
int i;
- if (IS_ERR(priv) || !priv || priv->is_tp)
+ if (IS_ERR_OR_NULL(priv) || priv->is_tp)
continue;
for (i = 0; i < priv->pev.ntevs; i++) {
@@ -749,12 +896,12 @@ int bpf__foreach_event(struct bpf_object *obj,
int err;
bpf_object__for_each_program(prog, obj) {
- struct bpf_prog_priv *priv = bpf_program__priv(prog);
+ struct bpf_prog_priv *priv = program_priv(prog);
struct probe_trace_event *tev;
struct perf_probe_event *pev;
int i, fd;
- if (IS_ERR(priv) || !priv) {
+ if (IS_ERR_OR_NULL(priv)) {
pr_debug("bpf: failed to get private field\n");
return -BPF_LOADER_ERRNO__INTERNAL;
}
@@ -845,7 +992,7 @@ bpf_map_priv__purge(struct bpf_map_priv *priv)
}
static void
-bpf_map_priv__clear(struct bpf_map *map __maybe_unused,
+bpf_map_priv__clear(const struct bpf_map *map __maybe_unused,
void *_priv)
{
struct bpf_map_priv *priv = _priv;
@@ -854,6 +1001,53 @@ bpf_map_priv__clear(struct bpf_map *map __maybe_unused,
free(priv);
}
+static void *map_priv(const struct bpf_map *map)
+{
+ void *priv;
+
+ if (IS_ERR_OR_NULL(bpf_map_hash))
+ return NULL;
+ if (!hashmap__find(bpf_map_hash, map, &priv))
+ return NULL;
+ return priv;
+}
+
+static void bpf_map_hash_free(void)
+{
+ struct hashmap_entry *cur;
+ size_t bkt;
+
+ if (IS_ERR_OR_NULL(bpf_map_hash))
+ return;
+
+ hashmap__for_each_entry(bpf_map_hash, cur, bkt)
+ bpf_map_priv__clear(cur->key, cur->value);
+
+ hashmap__free(bpf_map_hash);
+ bpf_map_hash = NULL;
+}
+
+static int map_set_priv(struct bpf_map *map, void *priv)
+{
+ void *old_priv;
+
+ if (WARN_ON_ONCE(IS_ERR(bpf_map_hash)))
+ return PTR_ERR(bpf_program_hash);
+
+ if (!bpf_map_hash) {
+ bpf_map_hash = hashmap__new(ptr_hash, ptr_equal, NULL);
+ if (IS_ERR(bpf_map_hash))
+ return PTR_ERR(bpf_map_hash);
+ }
+
+ old_priv = map_priv(map);
+ if (old_priv) {
+ bpf_map_priv__clear(map, old_priv);
+ return hashmap__set(bpf_map_hash, map, priv, NULL, NULL);
+ }
+ return hashmap__add(bpf_map_hash, map, priv);
+}
+
static int
bpf_map_op_setkey(struct bpf_map_op *op, struct parse_events_term *term)
{
@@ -953,7 +1147,7 @@ static int
bpf_map__add_op(struct bpf_map *map, struct bpf_map_op *op)
{
const char *map_name = bpf_map__name(map);
- struct bpf_map_priv *priv = bpf_map__priv(map);
+ struct bpf_map_priv *priv = map_priv(map);
if (IS_ERR(priv)) {
pr_debug("Failed to get private from map %s\n", map_name);
@@ -968,7 +1162,7 @@ bpf_map__add_op(struct bpf_map *map, struct bpf_map_op *op)
}
INIT_LIST_HEAD(&priv->ops_list);
- if (bpf_map__set_priv(map, priv, bpf_map_priv__clear)) {
+ if (map_set_priv(map, priv)) {
free(priv);
return -BPF_LOADER_ERRNO__INTERNAL;
}
@@ -1002,24 +1196,22 @@ __bpf_map__config_value(struct bpf_map *map,
{
struct bpf_map_op *op;
const char *map_name = bpf_map__name(map);
- const struct bpf_map_def *def = bpf_map__def(map);
- if (IS_ERR(def)) {
- pr_debug("Unable to get map definition from '%s'\n",
- map_name);
+ if (!map) {
+ pr_debug("Map '%s' is invalid\n", map_name);
return -BPF_LOADER_ERRNO__INTERNAL;
}
- if (def->type != BPF_MAP_TYPE_ARRAY) {
+ if (bpf_map__type(map) != BPF_MAP_TYPE_ARRAY) {
pr_debug("Map %s type is not BPF_MAP_TYPE_ARRAY\n",
map_name);
return -BPF_LOADER_ERRNO__OBJCONF_MAP_TYPE;
}
- if (def->key_size < sizeof(unsigned int)) {
+ if (bpf_map__key_size(map) < sizeof(unsigned int)) {
pr_debug("Map %s has incorrect key size\n", map_name);
return -BPF_LOADER_ERRNO__OBJCONF_MAP_KEYSIZE;
}
- switch (def->value_size) {
+ switch (bpf_map__value_size(map)) {
case 1:
case 2:
case 4:
@@ -1061,7 +1253,6 @@ __bpf_map__config_event(struct bpf_map *map,
struct parse_events_term *term,
struct evlist *evlist)
{
- const struct bpf_map_def *def;
struct bpf_map_op *op;
const char *map_name = bpf_map__name(map);
struct evsel *evsel = evlist__find_evsel_by_str(evlist, term->val.str);
@@ -1072,18 +1263,16 @@ __bpf_map__config_event(struct bpf_map *map,
return -BPF_LOADER_ERRNO__OBJCONF_MAP_NOEVT;
}
- def = bpf_map__def(map);
- if (IS_ERR(def)) {
- pr_debug("Unable to get map definition from '%s'\n",
- map_name);
- return PTR_ERR(def);
+ if (!map) {
+ pr_debug("Map '%s' is invalid\n", map_name);
+ return PTR_ERR(map);
}
/*
* No need to check key_size and value_size:
* kernel has already checked them.
*/
- if (def->type != BPF_MAP_TYPE_PERF_EVENT_ARRAY) {
+ if (bpf_map__type(map) != BPF_MAP_TYPE_PERF_EVENT_ARRAY) {
pr_debug("Map %s type is not BPF_MAP_TYPE_PERF_EVENT_ARRAY\n",
map_name);
return -BPF_LOADER_ERRNO__OBJCONF_MAP_TYPE;
@@ -1132,7 +1321,6 @@ config_map_indices_range_check(struct parse_events_term *term,
const char *map_name)
{
struct parse_events_array *array = &term->array;
- const struct bpf_map_def *def;
unsigned int i;
if (!array->nr_ranges)
@@ -1143,10 +1331,8 @@ config_map_indices_range_check(struct parse_events_term *term,
return -BPF_LOADER_ERRNO__INTERNAL;
}
- def = bpf_map__def(map);
- if (IS_ERR(def)) {
- pr_debug("ERROR: Unable to get map definition from '%s'\n",
- map_name);
+ if (!map) {
+ pr_debug("Map '%s' is invalid\n", map_name);
return -BPF_LOADER_ERRNO__INTERNAL;
}
@@ -1155,7 +1341,7 @@ config_map_indices_range_check(struct parse_events_term *term,
size_t length = array->ranges[i].length;
unsigned int idx = start + length - 1;
- if (idx >= def->max_entries) {
+ if (idx >= bpf_map__max_entries(map)) {
pr_debug("ERROR: index %d too large\n", idx);
return -BPF_LOADER_ERRNO__OBJCONF_MAP_IDX2BIG;
}
@@ -1217,9 +1403,10 @@ bpf__obj_config_map(struct bpf_object *obj,
pr_debug("ERROR: Invalid map config option '%s'\n", map_opt);
err = -BPF_LOADER_ERRNO__OBJCONF_MAP_OPT;
out:
- free(map_name);
if (!err)
*key_scan_pos += strlen(map_opt);
+
+ free(map_name);
return err;
}
@@ -1248,21 +1435,21 @@ out:
}
typedef int (*map_config_func_t)(const char *name, int map_fd,
- const struct bpf_map_def *pdef,
+ const struct bpf_map *map,
struct bpf_map_op *op,
void *pkey, void *arg);
static int
foreach_key_array_all(map_config_func_t func,
void *arg, const char *name,
- int map_fd, const struct bpf_map_def *pdef,
+ int map_fd, const struct bpf_map *map,
struct bpf_map_op *op)
{
unsigned int i;
int err;
- for (i = 0; i < pdef->max_entries; i++) {
- err = func(name, map_fd, pdef, op, &i, arg);
+ for (i = 0; i < bpf_map__max_entries(map); i++) {
+ err = func(name, map_fd, map, op, &i, arg);
if (err) {
pr_debug("ERROR: failed to insert value to %s[%u]\n",
name, i);
@@ -1275,7 +1462,7 @@ foreach_key_array_all(map_config_func_t func,
static int
foreach_key_array_ranges(map_config_func_t func, void *arg,
const char *name, int map_fd,
- const struct bpf_map_def *pdef,
+ const struct bpf_map *map,
struct bpf_map_op *op)
{
unsigned int i, j;
@@ -1288,7 +1475,7 @@ foreach_key_array_ranges(map_config_func_t func, void *arg,
for (j = 0; j < length; j++) {
unsigned int idx = start + j;
- err = func(name, map_fd, pdef, op, &idx, arg);
+ err = func(name, map_fd, map, op, &idx, arg);
if (err) {
pr_debug("ERROR: failed to insert value to %s[%u]\n",
name, idx);
@@ -1304,11 +1491,10 @@ bpf_map_config_foreach_key(struct bpf_map *map,
map_config_func_t func,
void *arg)
{
- int err, map_fd;
+ int err, map_fd, type;
struct bpf_map_op *op;
- const struct bpf_map_def *def;
const char *name = bpf_map__name(map);
- struct bpf_map_priv *priv = bpf_map__priv(map);
+ struct bpf_map_priv *priv = map_priv(map);
if (IS_ERR(priv)) {
pr_debug("ERROR: failed to get private from map %s\n", name);
@@ -1319,9 +1505,8 @@ bpf_map_config_foreach_key(struct bpf_map *map,
return 0;
}
- def = bpf_map__def(map);
- if (IS_ERR(def)) {
- pr_debug("ERROR: failed to get definition from map %s\n", name);
+ if (!map) {
+ pr_debug("Map '%s' is invalid\n", name);
return -BPF_LOADER_ERRNO__INTERNAL;
}
map_fd = bpf_map__fd(map);
@@ -1330,19 +1515,19 @@ bpf_map_config_foreach_key(struct bpf_map *map,
return map_fd;
}
+ type = bpf_map__type(map);
list_for_each_entry(op, &priv->ops_list, list) {
- switch (def->type) {
+ switch (type) {
case BPF_MAP_TYPE_ARRAY:
case BPF_MAP_TYPE_PERF_EVENT_ARRAY:
switch (op->key_type) {
case BPF_MAP_KEY_ALL:
err = foreach_key_array_all(func, arg, name,
- map_fd, def, op);
+ map_fd, map, op);
break;
case BPF_MAP_KEY_RANGES:
err = foreach_key_array_ranges(func, arg, name,
- map_fd, def,
- op);
+ map_fd, map, op);
break;
default:
pr_debug("ERROR: keytype for map '%s' invalid\n",
@@ -1451,7 +1636,7 @@ apply_config_evsel_for_key(const char *name, int map_fd, void *pkey,
static int
apply_obj_config_map_for_key(const char *name, int map_fd,
- const struct bpf_map_def *pdef,
+ const struct bpf_map *map,
struct bpf_map_op *op,
void *pkey, void *arg __maybe_unused)
{
@@ -1460,7 +1645,7 @@ apply_obj_config_map_for_key(const char *name, int map_fd,
switch (op->op_type) {
case BPF_MAP_OP_SET_VALUE:
err = apply_config_value_for_key(map_fd, pkey,
- pdef->value_size,
+ bpf_map__value_size(map),
op->v.value);
break;
case BPF_MAP_OP_SET_EVSEL:
@@ -1498,11 +1683,11 @@ apply_obj_config_object(struct bpf_object *obj)
int bpf__apply_obj_config(void)
{
- struct bpf_object *obj, *tmp;
+ struct bpf_perf_object *perf_obj, *tmp;
int err;
- bpf_object__for_each_safe(obj, tmp) {
- err = apply_obj_config_object(obj);
+ bpf_perf_object__for_each(perf_obj, tmp) {
+ err = apply_obj_config_object(perf_obj->obj);
if (err)
return err;
}
@@ -1510,27 +1695,25 @@ int bpf__apply_obj_config(void)
return 0;
}
-#define bpf__for_each_map(pos, obj, objtmp) \
- bpf_object__for_each_safe(obj, objtmp) \
- bpf_object__for_each_map(pos, obj)
+#define bpf__perf_for_each_map(map, pobj, tmp) \
+ bpf_perf_object__for_each(pobj, tmp) \
+ bpf_object__for_each_map(map, pobj->obj)
-#define bpf__for_each_map_named(pos, obj, objtmp, name) \
- bpf__for_each_map(pos, obj, objtmp) \
- if (bpf_map__name(pos) && \
- (strcmp(name, \
- bpf_map__name(pos)) == 0))
+#define bpf__perf_for_each_map_named(map, pobj, pobjtmp, name) \
+ bpf__perf_for_each_map(map, pobj, pobjtmp) \
+ if (bpf_map__name(map) && (strcmp(name, bpf_map__name(map)) == 0))
struct evsel *bpf__setup_output_event(struct evlist *evlist, const char *name)
{
struct bpf_map_priv *tmpl_priv = NULL;
- struct bpf_object *obj, *tmp;
+ struct bpf_perf_object *perf_obj, *tmp;
struct evsel *evsel = NULL;
struct bpf_map *map;
int err;
bool need_init = false;
- bpf__for_each_map_named(map, obj, tmp, name) {
- struct bpf_map_priv *priv = bpf_map__priv(map);
+ bpf__perf_for_each_map_named(map, perf_obj, tmp, name) {
+ struct bpf_map_priv *priv = map_priv(map);
if (IS_ERR(priv))
return ERR_PTR(-BPF_LOADER_ERRNO__INTERNAL);
@@ -1565,8 +1748,8 @@ struct evsel *bpf__setup_output_event(struct evlist *evlist, const char *name)
evsel = evlist__last(evlist);
}
- bpf__for_each_map_named(map, obj, tmp, name) {
- struct bpf_map_priv *priv = bpf_map__priv(map);
+ bpf__perf_for_each_map_named(map, perf_obj, tmp, name) {
+ struct bpf_map_priv *priv = map_priv(map);
if (IS_ERR(priv))
return ERR_PTR(-BPF_LOADER_ERRNO__INTERNAL);
@@ -1578,7 +1761,7 @@ struct evsel *bpf__setup_output_event(struct evlist *evlist, const char *name)
if (!priv)
return ERR_PTR(-ENOMEM);
- err = bpf_map__set_priv(map, priv, bpf_map_priv__clear);
+ err = map_set_priv(map, priv);
if (err) {
bpf_map_priv__clear(map, priv);
return ERR_PTR(err);
diff --git a/tools/perf/util/bpf_counter.c b/tools/perf/util/bpf_counter.c
index 5a97fd7d0a71..ef1c15e4aeba 100644
--- a/tools/perf/util/bpf_counter.c
+++ b/tools/perf/util/bpf_counter.c
@@ -224,25 +224,25 @@ static int bpf_program_profiler__disable(struct evsel *evsel)
static int bpf_program_profiler__read(struct evsel *evsel)
{
- // perf_cpu_map uses /sys/devices/system/cpu/online
- int num_cpu = evsel__nr_cpus(evsel);
// BPF_MAP_TYPE_PERCPU_ARRAY uses /sys/devices/system/cpu/possible
// Sometimes possible > online, like on a Ryzen 3900X that has 24
// threads but its possible showed 0-31 -acme
int num_cpu_bpf = libbpf_num_possible_cpus();
struct bpf_perf_event_value values[num_cpu_bpf];
struct bpf_counter *counter;
+ struct perf_counts_values *counts;
int reading_map_fd;
__u32 key = 0;
- int err, cpu;
+ int err, idx, bpf_cpu;
if (list_empty(&evsel->bpf_counter_list))
return -EAGAIN;
- for (cpu = 0; cpu < num_cpu; cpu++) {
- perf_counts(evsel->counts, cpu, 0)->val = 0;
- perf_counts(evsel->counts, cpu, 0)->ena = 0;
- perf_counts(evsel->counts, cpu, 0)->run = 0;
+ perf_cpu_map__for_each_idx(idx, evsel__cpus(evsel)) {
+ counts = perf_counts(evsel->counts, idx, 0);
+ counts->val = 0;
+ counts->ena = 0;
+ counts->run = 0;
}
list_for_each_entry(counter, &evsel->bpf_counter_list, list) {
struct bpf_prog_profiler_bpf *skel = counter->skel;
@@ -256,16 +256,21 @@ static int bpf_program_profiler__read(struct evsel *evsel)
return err;
}
- for (cpu = 0; cpu < num_cpu; cpu++) {
- perf_counts(evsel->counts, cpu, 0)->val += values[cpu].counter;
- perf_counts(evsel->counts, cpu, 0)->ena += values[cpu].enabled;
- perf_counts(evsel->counts, cpu, 0)->run += values[cpu].running;
+ for (bpf_cpu = 0; bpf_cpu < num_cpu_bpf; bpf_cpu++) {
+ idx = perf_cpu_map__idx(evsel__cpus(evsel),
+ (struct perf_cpu){.cpu = bpf_cpu});
+ if (idx == -1)
+ continue;
+ counts = perf_counts(evsel->counts, idx, 0);
+ counts->val += values[bpf_cpu].counter;
+ counts->ena += values[bpf_cpu].enabled;
+ counts->run += values[bpf_cpu].running;
}
}
return 0;
}
-static int bpf_program_profiler__install_pe(struct evsel *evsel, int cpu,
+static int bpf_program_profiler__install_pe(struct evsel *evsel, int cpu_map_idx,
int fd)
{
struct bpf_prog_profiler_bpf *skel;
@@ -277,7 +282,7 @@ static int bpf_program_profiler__install_pe(struct evsel *evsel, int cpu,
assert(skel != NULL);
ret = bpf_map_update_elem(bpf_map__fd(skel->maps.events),
- &cpu, &fd, BPF_ANY);
+ &cpu_map_idx, &fd, BPF_ANY);
if (ret)
return ret;
}
@@ -307,7 +312,10 @@ static bool bperf_attr_map_compatible(int attr_map_fd)
(map_info.value_size == sizeof(struct perf_event_attr_map_entry));
}
-int __weak
+#ifndef HAVE_LIBBPF_BPF_MAP_CREATE
+LIBBPF_API int bpf_create_map(enum bpf_map_type map_type, int key_size,
+ int value_size, int max_entries, __u32 map_flags);
+int
bpf_map_create(enum bpf_map_type map_type,
const char *map_name __maybe_unused,
__u32 key_size,
@@ -320,6 +328,7 @@ bpf_map_create(enum bpf_map_type map_type,
return bpf_create_map(map_type, key_size, value_size, max_entries, 0);
#pragma GCC diagnostic pop
}
+#endif
static int bperf_lock_attr_map(struct target *target)
{
@@ -554,7 +563,7 @@ static int bperf__load(struct evsel *evsel, struct target *target)
filter_type == BPERF_FILTER_TGID)
key = evsel->core.threads->map[i].pid;
else if (filter_type == BPERF_FILTER_CPU)
- key = evsel->core.cpus->map[i];
+ key = evsel->core.cpus->map[i].cpu;
else
break;
@@ -580,12 +589,12 @@ out:
return err;
}
-static int bperf__install_pe(struct evsel *evsel, int cpu, int fd)
+static int bperf__install_pe(struct evsel *evsel, int cpu_map_idx, int fd)
{
struct bperf_leader_bpf *skel = evsel->leader_skel;
return bpf_map_update_elem(bpf_map__fd(skel->maps.events),
- &cpu, &fd, BPF_ANY);
+ &cpu_map_idx, &fd, BPF_ANY);
}
/*
@@ -598,7 +607,7 @@ static int bperf_sync_counters(struct evsel *evsel)
num_cpu = all_cpu_map->nr;
for (i = 0; i < num_cpu; i++) {
- cpu = all_cpu_map->map[i];
+ cpu = all_cpu_map->map[i].cpu;
bperf_trigger_reading(evsel->bperf_leader_prog_fd, cpu);
}
return 0;
@@ -619,15 +628,18 @@ static int bperf__disable(struct evsel *evsel)
static int bperf__read(struct evsel *evsel)
{
struct bperf_follower_bpf *skel = evsel->follower_skel;
- __u32 num_cpu_bpf = cpu__max_cpu();
+ __u32 num_cpu_bpf = cpu__max_cpu().cpu;
struct bpf_perf_event_value values[num_cpu_bpf];
+ struct perf_counts_values *counts;
int reading_map_fd, err = 0;
- __u32 i, j, num_cpu;
+ __u32 i;
+ int j;
bperf_sync_counters(evsel);
reading_map_fd = bpf_map__fd(skel->maps.accum_readings);
for (i = 0; i < bpf_map__max_entries(skel->maps.accum_readings); i++) {
+ struct perf_cpu entry;
__u32 cpu;
err = bpf_map_lookup_elem(reading_map_fd, &i, values);
@@ -637,30 +649,32 @@ static int bperf__read(struct evsel *evsel)
case BPERF_FILTER_GLOBAL:
assert(i == 0);
- num_cpu = all_cpu_map->nr;
- for (j = 0; j < num_cpu; j++) {
- cpu = all_cpu_map->map[j];
- perf_counts(evsel->counts, cpu, 0)->val = values[cpu].counter;
- perf_counts(evsel->counts, cpu, 0)->ena = values[cpu].enabled;
- perf_counts(evsel->counts, cpu, 0)->run = values[cpu].running;
+ perf_cpu_map__for_each_cpu(entry, j, evsel__cpus(evsel)) {
+ counts = perf_counts(evsel->counts, j, 0);
+ counts->val = values[entry.cpu].counter;
+ counts->ena = values[entry.cpu].enabled;
+ counts->run = values[entry.cpu].running;
}
break;
case BPERF_FILTER_CPU:
- cpu = evsel->core.cpus->map[i];
- perf_counts(evsel->counts, i, 0)->val = values[cpu].counter;
- perf_counts(evsel->counts, i, 0)->ena = values[cpu].enabled;
- perf_counts(evsel->counts, i, 0)->run = values[cpu].running;
+ cpu = perf_cpu_map__cpu(evsel__cpus(evsel), i).cpu;
+ assert(cpu >= 0);
+ counts = perf_counts(evsel->counts, i, 0);
+ counts->val = values[cpu].counter;
+ counts->ena = values[cpu].enabled;
+ counts->run = values[cpu].running;
break;
case BPERF_FILTER_PID:
case BPERF_FILTER_TGID:
- perf_counts(evsel->counts, 0, i)->val = 0;
- perf_counts(evsel->counts, 0, i)->ena = 0;
- perf_counts(evsel->counts, 0, i)->run = 0;
+ counts = perf_counts(evsel->counts, 0, i);
+ counts->val = 0;
+ counts->ena = 0;
+ counts->run = 0;
for (cpu = 0; cpu < num_cpu_bpf; cpu++) {
- perf_counts(evsel->counts, 0, i)->val += values[cpu].counter;
- perf_counts(evsel->counts, 0, i)->ena += values[cpu].enabled;
- perf_counts(evsel->counts, 0, i)->run += values[cpu].running;
+ counts->val += values[cpu].counter;
+ counts->ena += values[cpu].enabled;
+ counts->run += values[cpu].running;
}
break;
default:
@@ -771,11 +785,11 @@ static inline bool bpf_counter_skip(struct evsel *evsel)
evsel->follower_skel == NULL;
}
-int bpf_counter__install_pe(struct evsel *evsel, int cpu, int fd)
+int bpf_counter__install_pe(struct evsel *evsel, int cpu_map_idx, int fd)
{
if (bpf_counter_skip(evsel))
return 0;
- return evsel->bpf_counter_ops->install_pe(evsel, cpu, fd);
+ return evsel->bpf_counter_ops->install_pe(evsel, cpu_map_idx, fd);
}
int bpf_counter__load(struct evsel *evsel, struct target *target)
diff --git a/tools/perf/util/bpf_counter.h b/tools/perf/util/bpf_counter.h
index 65ebaa6694fb..4dbf26408b69 100644
--- a/tools/perf/util/bpf_counter.h
+++ b/tools/perf/util/bpf_counter.h
@@ -16,7 +16,7 @@ typedef int (*bpf_counter_evsel_op)(struct evsel *evsel);
typedef int (*bpf_counter_evsel_target_op)(struct evsel *evsel,
struct target *target);
typedef int (*bpf_counter_evsel_install_pe_op)(struct evsel *evsel,
- int cpu,
+ int cpu_map_idx,
int fd);
struct bpf_counter_ops {
@@ -40,7 +40,7 @@ int bpf_counter__enable(struct evsel *evsel);
int bpf_counter__disable(struct evsel *evsel);
int bpf_counter__read(struct evsel *evsel);
void bpf_counter__destroy(struct evsel *evsel);
-int bpf_counter__install_pe(struct evsel *evsel, int cpu, int fd);
+int bpf_counter__install_pe(struct evsel *evsel, int cpu_map_idx, int fd);
#else /* HAVE_BPF_SKEL */
diff --git a/tools/perf/util/bpf_counter_cgroup.c b/tools/perf/util/bpf_counter_cgroup.c
index cbc6c2bca488..63b9db657442 100644
--- a/tools/perf/util/bpf_counter_cgroup.c
+++ b/tools/perf/util/bpf_counter_cgroup.c
@@ -46,9 +46,9 @@ static int bperf_load_program(struct evlist *evlist)
struct bpf_link *link;
struct evsel *evsel;
struct cgroup *cgrp, *leader_cgrp;
- __u32 i, cpu;
- __u32 nr_cpus = evlist->core.all_cpus->nr;
- int total_cpus = cpu__max_cpu();
+ int i, j;
+ struct perf_cpu cpu;
+ int total_cpus = cpu__max_cpu().cpu;
int map_size, map_fd;
int prog_fd, err;
@@ -93,9 +93,9 @@ static int bperf_load_program(struct evlist *evlist)
goto out;
}
- for (i = 0; i < nr_cpus; i++) {
+ perf_cpu_map__for_each_cpu(cpu, i, evlist->core.all_cpus) {
link = bpf_program__attach_perf_event(skel->progs.on_cgrp_switch,
- FD(cgrp_switch, i));
+ FD(cgrp_switch, cpu.cpu));
if (IS_ERR(link)) {
pr_err("Failed to attach cgroup program\n");
err = PTR_ERR(link);
@@ -122,10 +122,9 @@ static int bperf_load_program(struct evlist *evlist)
}
map_fd = bpf_map__fd(skel->maps.events);
- for (cpu = 0; cpu < nr_cpus; cpu++) {
- int fd = FD(evsel, cpu);
- __u32 idx = evsel->core.idx * total_cpus +
- evlist->core.all_cpus->map[cpu];
+ perf_cpu_map__for_each_cpu(cpu, j, evlist->core.all_cpus) {
+ int fd = FD(evsel, cpu.cpu);
+ __u32 idx = evsel->core.idx * total_cpus + cpu.cpu;
err = bpf_map_update_elem(map_fd, &idx, &fd,
BPF_ANY);
@@ -207,14 +206,12 @@ static int bperf_cgrp__install_pe(struct evsel *evsel __maybe_unused,
*/
static int bperf_cgrp__sync_counters(struct evlist *evlist)
{
- int i, cpu;
- int nr_cpus = evlist->core.all_cpus->nr;
+ struct perf_cpu cpu;
+ int idx;
int prog_fd = bpf_program__fd(skel->progs.trigger_read);
- for (i = 0; i < nr_cpus; i++) {
- cpu = evlist->core.all_cpus->map[i];
- bperf_trigger_reading(prog_fd, cpu);
- }
+ perf_cpu_map__for_each_cpu(cpu, idx, evlist->core.all_cpus)
+ bperf_trigger_reading(prog_fd, cpu.cpu);
return 0;
}
@@ -244,12 +241,10 @@ static int bperf_cgrp__disable(struct evsel *evsel)
static int bperf_cgrp__read(struct evsel *evsel)
{
struct evlist *evlist = evsel->evlist;
- int i, cpu, nr_cpus = evlist->core.all_cpus->nr;
- int total_cpus = cpu__max_cpu();
+ int total_cpus = cpu__max_cpu().cpu;
struct perf_counts_values *counts;
struct bpf_perf_event_value *values;
int reading_map_fd, err = 0;
- __u32 idx;
if (evsel->core.idx)
return 0;
@@ -263,21 +258,22 @@ static int bperf_cgrp__read(struct evsel *evsel)
reading_map_fd = bpf_map__fd(skel->maps.cgrp_readings);
evlist__for_each_entry(evlist, evsel) {
- idx = evsel->core.idx;
+ __u32 idx = evsel->core.idx;
+ int i;
+ struct perf_cpu cpu;
+
err = bpf_map_lookup_elem(reading_map_fd, &idx, values);
if (err) {
- pr_err("bpf map lookup falied: idx=%u, event=%s, cgrp=%s\n",
+ pr_err("bpf map lookup failed: idx=%u, event=%s, cgrp=%s\n",
idx, evsel__name(evsel), evsel->cgrp->name);
goto out;
}
- for (i = 0; i < nr_cpus; i++) {
- cpu = evlist->core.all_cpus->map[i];
-
+ perf_cpu_map__for_each_cpu(cpu, i, evlist->core.all_cpus) {
counts = perf_counts(evsel->counts, i, 0);
- counts->val = values[cpu].counter;
- counts->ena = values[cpu].enabled;
- counts->run = values[cpu].running;
+ counts->val = values[cpu.cpu].counter;
+ counts->ena = values[cpu.cpu].enabled;
+ counts->run = values[cpu.cpu].running;
}
}
diff --git a/tools/perf/util/bpf_ftrace.c b/tools/perf/util/bpf_ftrace.c
new file mode 100644
index 000000000000..7a4297d8fd2c
--- /dev/null
+++ b/tools/perf/util/bpf_ftrace.c
@@ -0,0 +1,154 @@
+#include <stdio.h>
+#include <fcntl.h>
+#include <stdint.h>
+#include <stdlib.h>
+
+#include <linux/err.h>
+
+#include "util/ftrace.h"
+#include "util/cpumap.h"
+#include "util/thread_map.h"
+#include "util/debug.h"
+#include "util/evlist.h"
+#include "util/bpf_counter.h"
+
+#include "util/bpf_skel/func_latency.skel.h"
+
+static struct func_latency_bpf *skel;
+
+int perf_ftrace__latency_prepare_bpf(struct perf_ftrace *ftrace)
+{
+ int fd, err;
+ int i, ncpus = 1, ntasks = 1;
+ struct filter_entry *func;
+
+ if (!list_is_singular(&ftrace->filters)) {
+ pr_err("ERROR: %s target function(s).\n",
+ list_empty(&ftrace->filters) ? "No" : "Too many");
+ return -1;
+ }
+
+ func = list_first_entry(&ftrace->filters, struct filter_entry, list);
+
+ skel = func_latency_bpf__open();
+ if (!skel) {
+ pr_err("Failed to open func latency skeleton\n");
+ return -1;
+ }
+
+ /* don't need to set cpu filter for system-wide mode */
+ if (ftrace->target.cpu_list) {
+ ncpus = perf_cpu_map__nr(ftrace->evlist->core.user_requested_cpus);
+ bpf_map__set_max_entries(skel->maps.cpu_filter, ncpus);
+ }
+
+ if (target__has_task(&ftrace->target) || target__none(&ftrace->target)) {
+ ntasks = perf_thread_map__nr(ftrace->evlist->core.threads);
+ bpf_map__set_max_entries(skel->maps.task_filter, ntasks);
+ }
+
+ set_max_rlimit();
+
+ err = func_latency_bpf__load(skel);
+ if (err) {
+ pr_err("Failed to load func latency skeleton\n");
+ goto out;
+ }
+
+ if (ftrace->target.cpu_list) {
+ u32 cpu;
+ u8 val = 1;
+
+ skel->bss->has_cpu = 1;
+ fd = bpf_map__fd(skel->maps.cpu_filter);
+
+ for (i = 0; i < ncpus; i++) {
+ cpu = perf_cpu_map__cpu(ftrace->evlist->core.user_requested_cpus, i).cpu;
+ bpf_map_update_elem(fd, &cpu, &val, BPF_ANY);
+ }
+ }
+
+ if (target__has_task(&ftrace->target) || target__none(&ftrace->target)) {
+ u32 pid;
+ u8 val = 1;
+
+ skel->bss->has_task = 1;
+ fd = bpf_map__fd(skel->maps.task_filter);
+
+ for (i = 0; i < ntasks; i++) {
+ pid = perf_thread_map__pid(ftrace->evlist->core.threads, i);
+ bpf_map_update_elem(fd, &pid, &val, BPF_ANY);
+ }
+ }
+
+ skel->bss->use_nsec = ftrace->use_nsec;
+
+ skel->links.func_begin = bpf_program__attach_kprobe(skel->progs.func_begin,
+ false, func->name);
+ if (IS_ERR(skel->links.func_begin)) {
+ pr_err("Failed to attach fentry program\n");
+ err = PTR_ERR(skel->links.func_begin);
+ goto out;
+ }
+
+ skel->links.func_end = bpf_program__attach_kprobe(skel->progs.func_end,
+ true, func->name);
+ if (IS_ERR(skel->links.func_end)) {
+ pr_err("Failed to attach fexit program\n");
+ err = PTR_ERR(skel->links.func_end);
+ goto out;
+ }
+
+ /* XXX: we don't actually use this fd - just for poll() */
+ return open("/dev/null", O_RDONLY);
+
+out:
+ return err;
+}
+
+int perf_ftrace__latency_start_bpf(struct perf_ftrace *ftrace __maybe_unused)
+{
+ skel->bss->enabled = 1;
+ return 0;
+}
+
+int perf_ftrace__latency_stop_bpf(struct perf_ftrace *ftrace __maybe_unused)
+{
+ skel->bss->enabled = 0;
+ return 0;
+}
+
+int perf_ftrace__latency_read_bpf(struct perf_ftrace *ftrace __maybe_unused,
+ int buckets[])
+{
+ int i, fd, err;
+ u32 idx;
+ u64 *hist;
+ int ncpus = cpu__max_cpu().cpu;
+
+ fd = bpf_map__fd(skel->maps.latency);
+
+ hist = calloc(ncpus, sizeof(*hist));
+ if (hist == NULL)
+ return -ENOMEM;
+
+ for (idx = 0; idx < NUM_BUCKET; idx++) {
+ err = bpf_map_lookup_elem(fd, &idx, hist);
+ if (err) {
+ buckets[idx] = 0;
+ continue;
+ }
+
+ for (i = 0; i < ncpus; i++)
+ buckets[idx] += hist[i];
+ }
+
+ free(hist);
+ return 0;
+}
+
+int perf_ftrace__latency_cleanup_bpf(struct perf_ftrace *ftrace __maybe_unused)
+{
+ func_latency_bpf__destroy(skel);
+ return 0;
+}
diff --git a/tools/perf/util/bpf_map.c b/tools/perf/util/bpf_map.c
index eb853ca67cf4..c863ae0c5cb5 100644
--- a/tools/perf/util/bpf_map.c
+++ b/tools/perf/util/bpf_map.c
@@ -9,25 +9,25 @@
#include <stdlib.h>
#include <unistd.h>
-static bool bpf_map_def__is_per_cpu(const struct bpf_map_def *def)
+static bool bpf_map__is_per_cpu(enum bpf_map_type type)
{
- return def->type == BPF_MAP_TYPE_PERCPU_HASH ||
- def->type == BPF_MAP_TYPE_PERCPU_ARRAY ||
- def->type == BPF_MAP_TYPE_LRU_PERCPU_HASH ||
- def->type == BPF_MAP_TYPE_PERCPU_CGROUP_STORAGE;
+ return type == BPF_MAP_TYPE_PERCPU_HASH ||
+ type == BPF_MAP_TYPE_PERCPU_ARRAY ||
+ type == BPF_MAP_TYPE_LRU_PERCPU_HASH ||
+ type == BPF_MAP_TYPE_PERCPU_CGROUP_STORAGE;
}
-static void *bpf_map_def__alloc_value(const struct bpf_map_def *def)
+static void *bpf_map__alloc_value(const struct bpf_map *map)
{
- if (bpf_map_def__is_per_cpu(def))
- return malloc(round_up(def->value_size, 8) * sysconf(_SC_NPROCESSORS_CONF));
+ if (bpf_map__is_per_cpu(bpf_map__type(map)))
+ return malloc(round_up(bpf_map__value_size(map), 8) *
+ sysconf(_SC_NPROCESSORS_CONF));
- return malloc(def->value_size);
+ return malloc(bpf_map__value_size(map));
}
int bpf_map__fprintf(struct bpf_map *map, FILE *fp)
{
- const struct bpf_map_def *def = bpf_map__def(map);
void *prev_key = NULL, *key, *value;
int fd = bpf_map__fd(map), err;
int printed = 0;
@@ -35,15 +35,15 @@ int bpf_map__fprintf(struct bpf_map *map, FILE *fp)
if (fd < 0)
return fd;
- if (IS_ERR(def))
- return PTR_ERR(def);
+ if (!map)
+ return PTR_ERR(map);
err = -ENOMEM;
- key = malloc(def->key_size);
+ key = malloc(bpf_map__key_size(map));
if (key == NULL)
goto out;
- value = bpf_map_def__alloc_value(def);
+ value = bpf_map__alloc_value(map);
if (value == NULL)
goto out_free_key;
diff --git a/tools/perf/util/bpf_off_cpu.c b/tools/perf/util/bpf_off_cpu.c
new file mode 100644
index 000000000000..b73e84a02264
--- /dev/null
+++ b/tools/perf/util/bpf_off_cpu.c
@@ -0,0 +1,338 @@
+// SPDX-License-Identifier: GPL-2.0
+#include "util/bpf_counter.h"
+#include "util/debug.h"
+#include "util/evsel.h"
+#include "util/evlist.h"
+#include "util/off_cpu.h"
+#include "util/perf-hooks.h"
+#include "util/record.h"
+#include "util/session.h"
+#include "util/target.h"
+#include "util/cpumap.h"
+#include "util/thread_map.h"
+#include "util/cgroup.h"
+#include <bpf/bpf.h>
+
+#include "bpf_skel/off_cpu.skel.h"
+
+#define MAX_STACKS 32
+/* we don't need actual timestamp, just want to put the samples at last */
+#define OFF_CPU_TIMESTAMP (~0ull << 32)
+
+static struct off_cpu_bpf *skel;
+
+struct off_cpu_key {
+ u32 pid;
+ u32 tgid;
+ u32 stack_id;
+ u32 state;
+ u64 cgroup_id;
+};
+
+union off_cpu_data {
+ struct perf_event_header hdr;
+ u64 array[1024 / sizeof(u64)];
+};
+
+static int off_cpu_config(struct evlist *evlist)
+{
+ struct evsel *evsel;
+ struct perf_event_attr attr = {
+ .type = PERF_TYPE_SOFTWARE,
+ .config = PERF_COUNT_SW_BPF_OUTPUT,
+ .size = sizeof(attr), /* to capture ABI version */
+ };
+ char *evname = strdup(OFFCPU_EVENT);
+
+ if (evname == NULL)
+ return -ENOMEM;
+
+ evsel = evsel__new(&attr);
+ if (!evsel) {
+ free(evname);
+ return -ENOMEM;
+ }
+
+ evsel->core.attr.freq = 1;
+ evsel->core.attr.sample_period = 1;
+ /* off-cpu analysis depends on stack trace */
+ evsel->core.attr.sample_type = PERF_SAMPLE_CALLCHAIN;
+
+ evlist__add(evlist, evsel);
+
+ free(evsel->name);
+ evsel->name = evname;
+
+ return 0;
+}
+
+static void off_cpu_start(void *arg)
+{
+ struct evlist *evlist = arg;
+
+ /* update task filter for the given workload */
+ if (!skel->bss->has_cpu && !skel->bss->has_task &&
+ perf_thread_map__pid(evlist->core.threads, 0) != -1) {
+ int fd;
+ u32 pid;
+ u8 val = 1;
+
+ skel->bss->has_task = 1;
+ fd = bpf_map__fd(skel->maps.task_filter);
+ pid = perf_thread_map__pid(evlist->core.threads, 0);
+ bpf_map_update_elem(fd, &pid, &val, BPF_ANY);
+ }
+
+ skel->bss->enabled = 1;
+}
+
+static void off_cpu_finish(void *arg __maybe_unused)
+{
+ skel->bss->enabled = 0;
+ off_cpu_bpf__destroy(skel);
+}
+
+/* v5.18 kernel added prev_state arg, so it needs to check the signature */
+static void check_sched_switch_args(void)
+{
+ const struct btf *btf = bpf_object__btf(skel->obj);
+ const struct btf_type *t1, *t2, *t3;
+ u32 type_id;
+
+ type_id = btf__find_by_name_kind(btf, "bpf_trace_sched_switch",
+ BTF_KIND_TYPEDEF);
+ if ((s32)type_id < 0)
+ return;
+
+ t1 = btf__type_by_id(btf, type_id);
+ if (t1 == NULL)
+ return;
+
+ t2 = btf__type_by_id(btf, t1->type);
+ if (t2 == NULL || !btf_is_ptr(t2))
+ return;
+
+ t3 = btf__type_by_id(btf, t2->type);
+ if (t3 && btf_is_func_proto(t3) && btf_vlen(t3) == 4) {
+ /* new format: pass prev_state as 4th arg */
+ skel->rodata->has_prev_state = true;
+ }
+}
+
+int off_cpu_prepare(struct evlist *evlist, struct target *target,
+ struct record_opts *opts)
+{
+ int err, fd, i;
+ int ncpus = 1, ntasks = 1, ncgrps = 1;
+
+ if (off_cpu_config(evlist) < 0) {
+ pr_err("Failed to config off-cpu BPF event\n");
+ return -1;
+ }
+
+ skel = off_cpu_bpf__open();
+ if (!skel) {
+ pr_err("Failed to open off-cpu BPF skeleton\n");
+ return -1;
+ }
+
+ /* don't need to set cpu filter for system-wide mode */
+ if (target->cpu_list) {
+ ncpus = perf_cpu_map__nr(evlist->core.user_requested_cpus);
+ bpf_map__set_max_entries(skel->maps.cpu_filter, ncpus);
+ }
+
+ if (target__has_task(target)) {
+ ntasks = perf_thread_map__nr(evlist->core.threads);
+ bpf_map__set_max_entries(skel->maps.task_filter, ntasks);
+ }
+
+ if (evlist__first(evlist)->cgrp) {
+ ncgrps = evlist->core.nr_entries - 1; /* excluding a dummy */
+ bpf_map__set_max_entries(skel->maps.cgroup_filter, ncgrps);
+
+ if (!cgroup_is_v2("perf_event"))
+ skel->rodata->uses_cgroup_v1 = true;
+ }
+
+ if (opts->record_cgroup) {
+ skel->rodata->needs_cgroup = true;
+
+ if (!cgroup_is_v2("perf_event"))
+ skel->rodata->uses_cgroup_v1 = true;
+ }
+
+ set_max_rlimit();
+ check_sched_switch_args();
+
+ err = off_cpu_bpf__load(skel);
+ if (err) {
+ pr_err("Failed to load off-cpu skeleton\n");
+ goto out;
+ }
+
+ if (target->cpu_list) {
+ u32 cpu;
+ u8 val = 1;
+
+ skel->bss->has_cpu = 1;
+ fd = bpf_map__fd(skel->maps.cpu_filter);
+
+ for (i = 0; i < ncpus; i++) {
+ cpu = perf_cpu_map__cpu(evlist->core.user_requested_cpus, i).cpu;
+ bpf_map_update_elem(fd, &cpu, &val, BPF_ANY);
+ }
+ }
+
+ if (target__has_task(target)) {
+ u32 pid;
+ u8 val = 1;
+
+ skel->bss->has_task = 1;
+ fd = bpf_map__fd(skel->maps.task_filter);
+
+ for (i = 0; i < ntasks; i++) {
+ pid = perf_thread_map__pid(evlist->core.threads, i);
+ bpf_map_update_elem(fd, &pid, &val, BPF_ANY);
+ }
+ }
+
+ if (evlist__first(evlist)->cgrp) {
+ struct evsel *evsel;
+ u8 val = 1;
+
+ skel->bss->has_cgroup = 1;
+ fd = bpf_map__fd(skel->maps.cgroup_filter);
+
+ evlist__for_each_entry(evlist, evsel) {
+ struct cgroup *cgrp = evsel->cgrp;
+
+ if (cgrp == NULL)
+ continue;
+
+ if (!cgrp->id && read_cgroup_id(cgrp) < 0) {
+ pr_err("Failed to read cgroup id of %s\n",
+ cgrp->name);
+ goto out;
+ }
+
+ bpf_map_update_elem(fd, &cgrp->id, &val, BPF_ANY);
+ }
+ }
+
+ err = off_cpu_bpf__attach(skel);
+ if (err) {
+ pr_err("Failed to attach off-cpu BPF skeleton\n");
+ goto out;
+ }
+
+ if (perf_hooks__set_hook("record_start", off_cpu_start, evlist) ||
+ perf_hooks__set_hook("record_end", off_cpu_finish, evlist)) {
+ pr_err("Failed to attach off-cpu skeleton\n");
+ goto out;
+ }
+
+ return 0;
+
+out:
+ off_cpu_bpf__destroy(skel);
+ return -1;
+}
+
+int off_cpu_write(struct perf_session *session)
+{
+ int bytes = 0, size;
+ int fd, stack;
+ u64 sample_type, val, sid = 0;
+ struct evsel *evsel;
+ struct perf_data_file *file = &session->data->file;
+ struct off_cpu_key prev, key;
+ union off_cpu_data data = {
+ .hdr = {
+ .type = PERF_RECORD_SAMPLE,
+ .misc = PERF_RECORD_MISC_USER,
+ },
+ };
+ u64 tstamp = OFF_CPU_TIMESTAMP;
+
+ skel->bss->enabled = 0;
+
+ evsel = evlist__find_evsel_by_str(session->evlist, OFFCPU_EVENT);
+ if (evsel == NULL) {
+ pr_err("%s evsel not found\n", OFFCPU_EVENT);
+ return 0;
+ }
+
+ sample_type = evsel->core.attr.sample_type;
+
+ if (sample_type & (PERF_SAMPLE_ID | PERF_SAMPLE_IDENTIFIER)) {
+ if (evsel->core.id)
+ sid = evsel->core.id[0];
+ }
+
+ fd = bpf_map__fd(skel->maps.off_cpu);
+ stack = bpf_map__fd(skel->maps.stacks);
+ memset(&prev, 0, sizeof(prev));
+
+ while (!bpf_map_get_next_key(fd, &prev, &key)) {
+ int n = 1; /* start from perf_event_header */
+ int ip_pos = -1;
+
+ bpf_map_lookup_elem(fd, &key, &val);
+
+ if (sample_type & PERF_SAMPLE_IDENTIFIER)
+ data.array[n++] = sid;
+ if (sample_type & PERF_SAMPLE_IP) {
+ ip_pos = n;
+ data.array[n++] = 0; /* will be updated */
+ }
+ if (sample_type & PERF_SAMPLE_TID)
+ data.array[n++] = (u64)key.pid << 32 | key.tgid;
+ if (sample_type & PERF_SAMPLE_TIME)
+ data.array[n++] = tstamp;
+ if (sample_type & PERF_SAMPLE_ID)
+ data.array[n++] = sid;
+ if (sample_type & PERF_SAMPLE_CPU)
+ data.array[n++] = 0;
+ if (sample_type & PERF_SAMPLE_PERIOD)
+ data.array[n++] = val;
+ if (sample_type & PERF_SAMPLE_CALLCHAIN) {
+ int len = 0;
+
+ /* data.array[n] is callchain->nr (updated later) */
+ data.array[n + 1] = PERF_CONTEXT_USER;
+ data.array[n + 2] = 0;
+
+ bpf_map_lookup_elem(stack, &key.stack_id, &data.array[n + 2]);
+ while (data.array[n + 2 + len])
+ len++;
+
+ /* update length of callchain */
+ data.array[n] = len + 1;
+
+ /* update sample ip with the first callchain entry */
+ if (ip_pos >= 0)
+ data.array[ip_pos] = data.array[n + 2];
+
+ /* calculate sample callchain data array length */
+ n += len + 2;
+ }
+ if (sample_type & PERF_SAMPLE_CGROUP)
+ data.array[n++] = key.cgroup_id;
+ /* TODO: handle more sample types */
+
+ size = n * sizeof(u64);
+ data.hdr.size = size;
+ bytes += size;
+
+ if (perf_data_file__write(file, &data, size) < 0) {
+ pr_err("failed to write perf data, error: %m\n");
+ return bytes;
+ }
+
+ prev = key;
+ /* increase dummy timestamp to sort later samples */
+ tstamp++;
+ }
+ return bytes;
+}
diff --git a/tools/perf/util/bpf_skel/func_latency.bpf.c b/tools/perf/util/bpf_skel/func_latency.bpf.c
new file mode 100644
index 000000000000..9d01e3af7479
--- /dev/null
+++ b/tools/perf/util/bpf_skel/func_latency.bpf.c
@@ -0,0 +1,116 @@
+// SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
+// Copyright (c) 2021 Google
+#include "vmlinux.h"
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_tracing.h>
+
+// This should be in sync with "util/ftrace.h"
+#define NUM_BUCKET 22
+
+struct {
+ __uint(type, BPF_MAP_TYPE_HASH);
+ __uint(key_size, sizeof(__u64));
+ __uint(value_size, sizeof(__u64));
+ __uint(max_entries, 10000);
+} functime SEC(".maps");
+
+struct {
+ __uint(type, BPF_MAP_TYPE_HASH);
+ __uint(key_size, sizeof(__u32));
+ __uint(value_size, sizeof(__u8));
+ __uint(max_entries, 1);
+} cpu_filter SEC(".maps");
+
+struct {
+ __uint(type, BPF_MAP_TYPE_HASH);
+ __uint(key_size, sizeof(__u32));
+ __uint(value_size, sizeof(__u8));
+ __uint(max_entries, 1);
+} task_filter SEC(".maps");
+
+struct {
+ __uint(type, BPF_MAP_TYPE_PERCPU_ARRAY);
+ __uint(key_size, sizeof(__u32));
+ __uint(value_size, sizeof(__u64));
+ __uint(max_entries, NUM_BUCKET);
+} latency SEC(".maps");
+
+
+int enabled = 0;
+int has_cpu = 0;
+int has_task = 0;
+int use_nsec = 0;
+
+SEC("kprobe/func")
+int BPF_PROG(func_begin)
+{
+ __u64 key, now;
+
+ if (!enabled)
+ return 0;
+
+ key = bpf_get_current_pid_tgid();
+
+ if (has_cpu) {
+ __u32 cpu = bpf_get_smp_processor_id();
+ __u8 *ok;
+
+ ok = bpf_map_lookup_elem(&cpu_filter, &cpu);
+ if (!ok)
+ return 0;
+ }
+
+ if (has_task) {
+ __u32 pid = key & 0xffffffff;
+ __u8 *ok;
+
+ ok = bpf_map_lookup_elem(&task_filter, &pid);
+ if (!ok)
+ return 0;
+ }
+
+ now = bpf_ktime_get_ns();
+
+ // overwrite timestamp for nested functions
+ bpf_map_update_elem(&functime, &key, &now, BPF_ANY);
+ return 0;
+}
+
+SEC("kretprobe/func")
+int BPF_PROG(func_end)
+{
+ __u64 tid;
+ __u64 *start;
+ __u64 cmp_base = use_nsec ? 1 : 1000;
+
+ if (!enabled)
+ return 0;
+
+ tid = bpf_get_current_pid_tgid();
+
+ start = bpf_map_lookup_elem(&functime, &tid);
+ if (start) {
+ __s64 delta = bpf_ktime_get_ns() - *start;
+ __u32 key;
+ __u64 *hist;
+
+ bpf_map_delete_elem(&functime, &tid);
+
+ if (delta < 0)
+ return 0;
+
+ // calculate index using delta
+ for (key = 0; key < (NUM_BUCKET - 1); key++) {
+ if (delta < (cmp_base << key))
+ break;
+ }
+
+ hist = bpf_map_lookup_elem(&latency, &key);
+ if (!hist)
+ return 0;
+
+ *hist += 1;
+ }
+
+ return 0;
+}
diff --git a/tools/perf/util/bpf_skel/off_cpu.bpf.c b/tools/perf/util/bpf_skel/off_cpu.bpf.c
new file mode 100644
index 000000000000..792ae2847080
--- /dev/null
+++ b/tools/perf/util/bpf_skel/off_cpu.bpf.c
@@ -0,0 +1,229 @@
+// SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
+// Copyright (c) 2022 Google
+#include "vmlinux.h"
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_tracing.h>
+#include <bpf/bpf_core_read.h>
+
+/* task->flags for off-cpu analysis */
+#define PF_KTHREAD 0x00200000 /* I am a kernel thread */
+
+/* task->state for off-cpu analysis */
+#define TASK_INTERRUPTIBLE 0x0001
+#define TASK_UNINTERRUPTIBLE 0x0002
+
+#define MAX_STACKS 32
+#define MAX_ENTRIES 102400
+
+struct tstamp_data {
+ __u32 stack_id;
+ __u32 state;
+ __u64 timestamp;
+};
+
+struct offcpu_key {
+ __u32 pid;
+ __u32 tgid;
+ __u32 stack_id;
+ __u32 state;
+ __u64 cgroup_id;
+};
+
+struct {
+ __uint(type, BPF_MAP_TYPE_STACK_TRACE);
+ __uint(key_size, sizeof(__u32));
+ __uint(value_size, MAX_STACKS * sizeof(__u64));
+ __uint(max_entries, MAX_ENTRIES);
+} stacks SEC(".maps");
+
+struct {
+ __uint(type, BPF_MAP_TYPE_TASK_STORAGE);
+ __uint(map_flags, BPF_F_NO_PREALLOC);
+ __type(key, int);
+ __type(value, struct tstamp_data);
+} tstamp SEC(".maps");
+
+struct {
+ __uint(type, BPF_MAP_TYPE_HASH);
+ __uint(key_size, sizeof(struct offcpu_key));
+ __uint(value_size, sizeof(__u64));
+ __uint(max_entries, MAX_ENTRIES);
+} off_cpu SEC(".maps");
+
+struct {
+ __uint(type, BPF_MAP_TYPE_HASH);
+ __uint(key_size, sizeof(__u32));
+ __uint(value_size, sizeof(__u8));
+ __uint(max_entries, 1);
+} cpu_filter SEC(".maps");
+
+struct {
+ __uint(type, BPF_MAP_TYPE_HASH);
+ __uint(key_size, sizeof(__u32));
+ __uint(value_size, sizeof(__u8));
+ __uint(max_entries, 1);
+} task_filter SEC(".maps");
+
+struct {
+ __uint(type, BPF_MAP_TYPE_HASH);
+ __uint(key_size, sizeof(__u64));
+ __uint(value_size, sizeof(__u8));
+ __uint(max_entries, 1);
+} cgroup_filter SEC(".maps");
+
+/* old kernel task_struct definition */
+struct task_struct___old {
+ long state;
+} __attribute__((preserve_access_index));
+
+int enabled = 0;
+int has_cpu = 0;
+int has_task = 0;
+int has_cgroup = 0;
+
+const volatile bool has_prev_state = false;
+const volatile bool needs_cgroup = false;
+const volatile bool uses_cgroup_v1 = false;
+
+/*
+ * Old kernel used to call it task_struct->state and now it's '__state'.
+ * Use BPF CO-RE "ignored suffix rule" to deal with it like below:
+ *
+ * https://nakryiko.com/posts/bpf-core-reference-guide/#handling-incompatible-field-and-type-changes
+ */
+static inline int get_task_state(struct task_struct *t)
+{
+ if (bpf_core_field_exists(t->__state))
+ return BPF_CORE_READ(t, __state);
+
+ /* recast pointer to capture task_struct___old type for compiler */
+ struct task_struct___old *t_old = (void *)t;
+
+ /* now use old "state" name of the field */
+ return BPF_CORE_READ(t_old, state);
+}
+
+static inline __u64 get_cgroup_id(struct task_struct *t)
+{
+ struct cgroup *cgrp;
+
+ if (uses_cgroup_v1)
+ cgrp = BPF_CORE_READ(t, cgroups, subsys[perf_event_cgrp_id], cgroup);
+ else
+ cgrp = BPF_CORE_READ(t, cgroups, dfl_cgrp);
+
+ return BPF_CORE_READ(cgrp, kn, id);
+}
+
+static inline int can_record(struct task_struct *t, int state)
+{
+ /* kernel threads don't have user stack */
+ if (t->flags & PF_KTHREAD)
+ return 0;
+
+ if (state != TASK_INTERRUPTIBLE &&
+ state != TASK_UNINTERRUPTIBLE)
+ return 0;
+
+ if (has_cpu) {
+ __u32 cpu = bpf_get_smp_processor_id();
+ __u8 *ok;
+
+ ok = bpf_map_lookup_elem(&cpu_filter, &cpu);
+ if (!ok)
+ return 0;
+ }
+
+ if (has_task) {
+ __u8 *ok;
+ __u32 pid = t->pid;
+
+ ok = bpf_map_lookup_elem(&task_filter, &pid);
+ if (!ok)
+ return 0;
+ }
+
+ if (has_cgroup) {
+ __u8 *ok;
+ __u64 cgrp_id = get_cgroup_id(t);
+
+ ok = bpf_map_lookup_elem(&cgroup_filter, &cgrp_id);
+ if (!ok)
+ return 0;
+ }
+
+ return 1;
+}
+
+static int off_cpu_stat(u64 *ctx, struct task_struct *prev,
+ struct task_struct *next, int state)
+{
+ __u64 ts;
+ __u32 stack_id;
+ struct tstamp_data *pelem;
+
+ ts = bpf_ktime_get_ns();
+
+ if (!can_record(prev, state))
+ goto next;
+
+ stack_id = bpf_get_stackid(ctx, &stacks,
+ BPF_F_FAST_STACK_CMP | BPF_F_USER_STACK);
+
+ pelem = bpf_task_storage_get(&tstamp, prev, NULL,
+ BPF_LOCAL_STORAGE_GET_F_CREATE);
+ if (!pelem)
+ goto next;
+
+ pelem->timestamp = ts;
+ pelem->state = state;
+ pelem->stack_id = stack_id;
+
+next:
+ pelem = bpf_task_storage_get(&tstamp, next, NULL, 0);
+
+ if (pelem && pelem->timestamp) {
+ struct offcpu_key key = {
+ .pid = next->pid,
+ .tgid = next->tgid,
+ .stack_id = pelem->stack_id,
+ .state = pelem->state,
+ .cgroup_id = needs_cgroup ? get_cgroup_id(next) : 0,
+ };
+ __u64 delta = ts - pelem->timestamp;
+ __u64 *total;
+
+ total = bpf_map_lookup_elem(&off_cpu, &key);
+ if (total)
+ *total += delta;
+ else
+ bpf_map_update_elem(&off_cpu, &key, &delta, BPF_ANY);
+
+ /* prevent to reuse the timestamp later */
+ pelem->timestamp = 0;
+ }
+
+ return 0;
+}
+
+SEC("tp_btf/sched_switch")
+int on_switch(u64 *ctx)
+{
+ struct task_struct *prev, *next;
+ int prev_state;
+
+ if (!enabled)
+ return 0;
+
+ prev = (struct task_struct *)ctx[1];
+ next = (struct task_struct *)ctx[2];
+
+ if (has_prev_state)
+ prev_state = (int)ctx[3];
+ else
+ prev_state = get_task_state(prev);
+
+ return off_cpu_stat(ctx, prev, next, prev_state);
+}
+
+char LICENSE[] SEC("license") = "Dual BSD/GPL";
diff --git a/tools/perf/util/branch.c b/tools/perf/util/branch.c
index 2285b1eb3128..a9a909db8cc7 100644
--- a/tools/perf/util/branch.c
+++ b/tools/perf/util/branch.c
@@ -49,7 +49,9 @@ const char *branch_type_name(int type)
"SYSCALL",
"SYSRET",
"COND_CALL",
- "COND_RET"
+ "COND_RET",
+ "ERET",
+ "IRQ"
};
if (type >= 0 && type < PERF_BR_MAX)
diff --git a/tools/perf/util/build-id.c b/tools/perf/util/build-id.c
index e32e8f2ff3bd..328668f38c69 100644
--- a/tools/perf/util/build-id.c
+++ b/tools/perf/util/build-id.c
@@ -706,7 +706,7 @@ build_id_cache__add(const char *sbuild_id, const char *name, const char *realnam
if (is_kallsyms) {
if (copyfile("/proc/kallsyms", filename))
goto out_free;
- } else if (nsi && nsi->need_setns) {
+ } else if (nsi && nsinfo__need_setns(nsi)) {
if (copyfile_ns(name, filename, nsi))
goto out_free;
} else if (link(realname, filename) && errno != EEXIST &&
@@ -730,7 +730,7 @@ build_id_cache__add(const char *sbuild_id, const char *name, const char *realnam
goto out_free;
}
if (access(filename, F_OK)) {
- if (nsi && nsi->need_setns) {
+ if (nsi && nsinfo__need_setns(nsi)) {
if (copyfile_ns(debugfile, filename,
nsi))
goto out_free;
@@ -762,7 +762,7 @@ build_id_cache__add(const char *sbuild_id, const char *name, const char *realnam
len = readlink(linkname, path, sizeof(path) - 1);
if (len <= 0) {
- pr_err("Cant read link: %s\n", linkname);
+ pr_err("Can't read link: %s\n", linkname);
goto out_free;
}
path[len] = '\0';
@@ -872,6 +872,30 @@ out_free:
return err;
}
+static int filename__read_build_id_ns(const char *filename,
+ struct build_id *bid,
+ struct nsinfo *nsi)
+{
+ struct nscookie nsc;
+ int ret;
+
+ nsinfo__mountns_enter(nsi, &nsc);
+ ret = filename__read_build_id(filename, bid);
+ nsinfo__mountns_exit(&nsc);
+
+ return ret;
+}
+
+static bool dso__build_id_mismatch(struct dso *dso, const char *name)
+{
+ struct build_id bid;
+
+ if (filename__read_build_id_ns(name, &bid, dso->nsinfo) < 0)
+ return false;
+
+ return !dso__build_id_equal(dso, &bid);
+}
+
static int dso__cache_build_id(struct dso *dso, struct machine *machine,
void *priv __maybe_unused)
{
@@ -886,6 +910,10 @@ static int dso__cache_build_id(struct dso *dso, struct machine *machine,
is_kallsyms = true;
name = machine->mmap_name;
}
+
+ if (!is_kallsyms && dso__build_id_mismatch(dso, name))
+ return 0;
+
return build_id_cache__add_b(&dso->bid, name, dso->nsinfo,
is_kallsyms, is_vdso);
}
diff --git a/tools/perf/util/c++/clang.cpp b/tools/perf/util/c++/clang.cpp
index df7b18fb6b6e..1aad7d6d34aa 100644
--- a/tools/perf/util/c++/clang.cpp
+++ b/tools/perf/util/c++/clang.cpp
@@ -20,7 +20,11 @@
#include "llvm/Option/Option.h"
#include "llvm/Support/FileSystem.h"
#include "llvm/Support/ManagedStatic.h"
+#if CLANG_VERSION_MAJOR >= 14
+#include "llvm/MC/TargetRegistry.h"
+#else
#include "llvm/Support/TargetRegistry.h"
+#endif
#include "llvm/Support/TargetSelect.h"
#include "llvm/Target/TargetMachine.h"
#include "llvm/Target/TargetOptions.h"
diff --git a/tools/perf/util/callchain.c b/tools/perf/util/callchain.c
index 8e2777133bd9..5c27a4b2e7a7 100644
--- a/tools/perf/util/callchain.c
+++ b/tools/perf/util/callchain.c
@@ -1119,7 +1119,7 @@ int fill_callchain_info(struct addr_location *al, struct callchain_cursor_node *
goto out;
}
- if (al->maps == &al->maps->machine->kmaps) {
+ if (al->maps == machine__kernel_maps(al->maps->machine)) {
if (machine__is_host(al->maps->machine)) {
al->cpumode = PERF_RECORD_MISC_KERNEL;
al->level = 'k';
@@ -1600,7 +1600,7 @@ void callchain_cursor_reset(struct callchain_cursor *cursor)
map__zput(node->ms.map);
}
-void callchain_param_setup(u64 sample_type)
+void callchain_param_setup(u64 sample_type, const char *arch)
{
if (symbol_conf.use_callchain || symbol_conf.cumulate_callchain) {
if ((sample_type & PERF_SAMPLE_REGS_USER) &&
@@ -1612,6 +1612,18 @@ void callchain_param_setup(u64 sample_type)
else
callchain_param.record_mode = CALLCHAIN_FP;
}
+
+ /*
+ * It's necessary to use libunwind to reliably determine the caller of
+ * a leaf function on aarch64, as otherwise we cannot know whether to
+ * start from the LR or FP.
+ *
+ * Always starting from the LR can result in duplicate or entirely
+ * erroneous entries. Always skipping the LR and starting from the FP
+ * can result in missing entries.
+ */
+ if (callchain_param.record_mode == CALLCHAIN_FP && !strcmp(arch, "arm64"))
+ dwarf_callchain_users = true;
}
static bool chain_match(struct callchain_list *base_chain,
diff --git a/tools/perf/util/callchain.h b/tools/perf/util/callchain.h
index 5824134f983b..d95615daed73 100644
--- a/tools/perf/util/callchain.h
+++ b/tools/perf/util/callchain.h
@@ -280,6 +280,8 @@ static inline int arch_skip_callchain_idx(struct thread *thread __maybe_unused,
}
#endif
+void arch__add_leaf_frame_record_opts(struct record_opts *opts);
+
char *callchain_list__sym_name(struct callchain_list *cl,
char *bf, size_t bfsize, bool show_dso);
char *callchain_node__scnprintf_value(struct callchain_node *node,
@@ -298,7 +300,7 @@ int callchain_branch_counts(struct callchain_root *root,
u64 *branch_count, u64 *predicted_count,
u64 *abort_count, u64 *cycles_count);
-void callchain_param_setup(u64 sample_type);
+void callchain_param_setup(u64 sample_type, const char *arch);
bool callchain_cnode_matched(struct callchain_node *base_cnode,
struct callchain_node *pair_cnode);
diff --git a/tools/perf/util/counts.c b/tools/perf/util/counts.c
index 582f3aeaf5e4..7a447d918458 100644
--- a/tools/perf/util/counts.c
+++ b/tools/perf/util/counts.c
@@ -4,6 +4,7 @@
#include <string.h>
#include "evsel.h"
#include "counts.h"
+#include <perf/threadmap.h>
#include <linux/zalloc.h>
struct perf_counts *perf_counts__new(int ncpus, int nthreads)
@@ -55,9 +56,12 @@ void evsel__reset_counts(struct evsel *evsel)
perf_counts__reset(evsel->counts);
}
-int evsel__alloc_counts(struct evsel *evsel, int ncpus, int nthreads)
+int evsel__alloc_counts(struct evsel *evsel)
{
- evsel->counts = perf_counts__new(ncpus, nthreads);
+ struct perf_cpu_map *cpus = evsel__cpus(evsel);
+ int nthreads = perf_thread_map__nr(evsel->core.threads);
+
+ evsel->counts = perf_counts__new(perf_cpu_map__nr(cpus), nthreads);
return evsel->counts != NULL ? 0 : -ENOMEM;
}
diff --git a/tools/perf/util/counts.h b/tools/perf/util/counts.h
index 7ff36bf6d644..5de275194f2b 100644
--- a/tools/perf/util/counts.h
+++ b/tools/perf/util/counts.h
@@ -18,21 +18,21 @@ struct perf_counts {
static inline struct perf_counts_values*
-perf_counts(struct perf_counts *counts, int cpu, int thread)
+perf_counts(struct perf_counts *counts, int cpu_map_idx, int thread)
{
- return xyarray__entry(counts->values, cpu, thread);
+ return xyarray__entry(counts->values, cpu_map_idx, thread);
}
static inline bool
-perf_counts__is_loaded(struct perf_counts *counts, int cpu, int thread)
+perf_counts__is_loaded(struct perf_counts *counts, int cpu_map_idx, int thread)
{
- return *((bool *) xyarray__entry(counts->loaded, cpu, thread));
+ return *((bool *) xyarray__entry(counts->loaded, cpu_map_idx, thread));
}
static inline void
-perf_counts__set_loaded(struct perf_counts *counts, int cpu, int thread, bool loaded)
+perf_counts__set_loaded(struct perf_counts *counts, int cpu_map_idx, int thread, bool loaded)
{
- *((bool *) xyarray__entry(counts->loaded, cpu, thread)) = loaded;
+ *((bool *) xyarray__entry(counts->loaded, cpu_map_idx, thread)) = loaded;
}
struct perf_counts *perf_counts__new(int ncpus, int nthreads);
@@ -40,7 +40,7 @@ void perf_counts__delete(struct perf_counts *counts);
void perf_counts__reset(struct perf_counts *counts);
void evsel__reset_counts(struct evsel *evsel);
-int evsel__alloc_counts(struct evsel *evsel, int ncpus, int nthreads);
+int evsel__alloc_counts(struct evsel *evsel);
void evsel__free_counts(struct evsel *evsel);
#endif /* __PERF_COUNTS_H */
diff --git a/tools/perf/util/cpumap.c b/tools/perf/util/cpumap.c
index 87d3eca9b872..12b2243222b0 100644
--- a/tools/perf/util/cpumap.c
+++ b/tools/perf/util/cpumap.c
@@ -13,9 +13,13 @@
#include <linux/ctype.h>
#include <linux/zalloc.h>
-static int max_cpu_num;
-static int max_present_cpu_num;
+static struct perf_cpu max_cpu_num;
+static struct perf_cpu max_present_cpu_num;
static int max_node_num;
+/**
+ * The numa node X as read from /sys/devices/system/node/nodeX indexed by the
+ * CPU number.
+ */
static int *cpunode_map;
static struct perf_cpu_map *cpu_map__from_entries(struct cpu_map_entries *cpus)
@@ -33,9 +37,9 @@ static struct perf_cpu_map *cpu_map__from_entries(struct cpu_map_entries *cpus)
* otherwise it would become 65535.
*/
if (cpus->cpu[i] == (u16) -1)
- map->map[i] = -1;
+ map->map[i].cpu = -1;
else
- map->map[i] = (int) cpus->cpu[i];
+ map->map[i].cpu = (int) cpus->cpu[i];
}
}
@@ -54,7 +58,7 @@ static struct perf_cpu_map *cpu_map__from_mask(struct perf_record_record_cpu_map
int cpu, i = 0;
for_each_set_bit(cpu, mask->mask, nbits)
- map->map[i++] = cpu;
+ map->map[i++].cpu = cpu;
}
return map;
@@ -87,7 +91,7 @@ struct perf_cpu_map *perf_cpu_map__empty_new(int nr)
cpus->nr = nr;
for (i = 0; i < nr; i++)
- cpus->map[i] = -1;
+ cpus->map[i].cpu = -1;
refcount_set(&cpus->refcnt, 1);
}
@@ -104,7 +108,7 @@ struct cpu_aggr_map *cpu_aggr_map__empty_new(int nr)
cpus->nr = nr;
for (i = 0; i < nr; i++)
- cpus->map[i] = cpu_map__empty_aggr_cpu_id();
+ cpus->map[i] = aggr_cpu_id__empty();
refcount_set(&cpus->refcnt, 1);
}
@@ -122,28 +126,21 @@ static int cpu__get_topology_int(int cpu, const char *name, int *value)
return sysfs__read_int(path, value);
}
-int cpu_map__get_socket_id(int cpu)
+int cpu__get_socket_id(struct perf_cpu cpu)
{
- int value, ret = cpu__get_topology_int(cpu, "physical_package_id", &value);
+ int value, ret = cpu__get_topology_int(cpu.cpu, "physical_package_id", &value);
return ret ?: value;
}
-struct aggr_cpu_id cpu_map__get_socket(struct perf_cpu_map *map, int idx,
- void *data __maybe_unused)
+struct aggr_cpu_id aggr_cpu_id__socket(struct perf_cpu cpu, void *data __maybe_unused)
{
- int cpu;
- struct aggr_cpu_id id = cpu_map__empty_aggr_cpu_id();
+ struct aggr_cpu_id id = aggr_cpu_id__empty();
- if (idx > map->nr)
- return id;
-
- cpu = map->map[idx];
-
- id.socket = cpu_map__get_socket_id(cpu);
+ id.socket = cpu__get_socket_id(cpu);
return id;
}
-static int cmp_aggr_cpu_id(const void *a_pointer, const void *b_pointer)
+static int aggr_cpu_id__cmp(const void *a_pointer, const void *b_pointer)
{
struct aggr_cpu_id *a = (struct aggr_cpu_id *)a_pointer;
struct aggr_cpu_id *b = (struct aggr_cpu_id *)b_pointer;
@@ -160,57 +157,64 @@ static int cmp_aggr_cpu_id(const void *a_pointer, const void *b_pointer)
return a->thread - b->thread;
}
-int cpu_map__build_map(struct perf_cpu_map *cpus, struct cpu_aggr_map **res,
- struct aggr_cpu_id (*f)(struct perf_cpu_map *map, int cpu, void *data),
- void *data)
+struct cpu_aggr_map *cpu_aggr_map__new(const struct perf_cpu_map *cpus,
+ aggr_cpu_id_get_t get_id,
+ void *data)
{
- int nr = cpus->nr;
- struct cpu_aggr_map *c = cpu_aggr_map__empty_new(nr);
- int cpu, s2;
- struct aggr_cpu_id s1;
+ int idx;
+ struct perf_cpu cpu;
+ struct cpu_aggr_map *c = cpu_aggr_map__empty_new(cpus->nr);
if (!c)
- return -1;
+ return NULL;
/* Reset size as it may only be partially filled */
c->nr = 0;
- for (cpu = 0; cpu < nr; cpu++) {
- s1 = f(cpus, cpu, data);
- for (s2 = 0; s2 < c->nr; s2++) {
- if (cpu_map__compare_aggr_cpu_id(s1, c->map[s2]))
+ perf_cpu_map__for_each_cpu(cpu, idx, cpus) {
+ bool duplicate = false;
+ struct aggr_cpu_id cpu_id = get_id(cpu, data);
+
+ for (int j = 0; j < c->nr; j++) {
+ if (aggr_cpu_id__equal(&cpu_id, &c->map[j])) {
+ duplicate = true;
break;
+ }
}
- if (s2 == c->nr) {
- c->map[c->nr] = s1;
+ if (!duplicate) {
+ c->map[c->nr] = cpu_id;
c->nr++;
}
}
+ /* Trim. */
+ if (c->nr != cpus->nr) {
+ struct cpu_aggr_map *trimmed_c =
+ realloc(c,
+ sizeof(struct cpu_aggr_map) + sizeof(struct aggr_cpu_id) * c->nr);
+
+ if (trimmed_c)
+ c = trimmed_c;
+ }
/* ensure we process id in increasing order */
- qsort(c->map, c->nr, sizeof(struct aggr_cpu_id), cmp_aggr_cpu_id);
+ qsort(c->map, c->nr, sizeof(struct aggr_cpu_id), aggr_cpu_id__cmp);
+
+ return c;
- *res = c;
- return 0;
}
-int cpu_map__get_die_id(int cpu)
+int cpu__get_die_id(struct perf_cpu cpu)
{
- int value, ret = cpu__get_topology_int(cpu, "die_id", &value);
+ int value, ret = cpu__get_topology_int(cpu.cpu, "die_id", &value);
return ret ?: value;
}
-struct aggr_cpu_id cpu_map__get_die(struct perf_cpu_map *map, int idx, void *data)
+struct aggr_cpu_id aggr_cpu_id__die(struct perf_cpu cpu, void *data)
{
- int cpu, die;
- struct aggr_cpu_id id = cpu_map__empty_aggr_cpu_id();
+ struct aggr_cpu_id id;
+ int die;
- if (idx > map->nr)
- return id;
-
- cpu = map->map[idx];
-
- die = cpu_map__get_die_id(cpu);
+ die = cpu__get_die_id(cpu);
/* There is no die_id on legacy system. */
if (die == -1)
die = 0;
@@ -220,79 +224,59 @@ struct aggr_cpu_id cpu_map__get_die(struct perf_cpu_map *map, int idx, void *dat
* with the socket ID and then add die to
* make a unique ID.
*/
- id = cpu_map__get_socket(map, idx, data);
- if (cpu_map__aggr_cpu_id_is_empty(id))
+ id = aggr_cpu_id__socket(cpu, data);
+ if (aggr_cpu_id__is_empty(&id))
return id;
id.die = die;
return id;
}
-int cpu_map__get_core_id(int cpu)
+int cpu__get_core_id(struct perf_cpu cpu)
{
- int value, ret = cpu__get_topology_int(cpu, "core_id", &value);
+ int value, ret = cpu__get_topology_int(cpu.cpu, "core_id", &value);
return ret ?: value;
}
-int cpu_map__get_node_id(int cpu)
-{
- return cpu__get_node(cpu);
-}
-
-struct aggr_cpu_id cpu_map__get_core(struct perf_cpu_map *map, int idx, void *data)
+struct aggr_cpu_id aggr_cpu_id__core(struct perf_cpu cpu, void *data)
{
- int cpu;
- struct aggr_cpu_id id = cpu_map__empty_aggr_cpu_id();
-
- if (idx > map->nr)
- return id;
+ struct aggr_cpu_id id;
+ int core = cpu__get_core_id(cpu);
- cpu = map->map[idx];
-
- cpu = cpu_map__get_core_id(cpu);
-
- /* cpu_map__get_die returns a struct with socket and die set*/
- id = cpu_map__get_die(map, idx, data);
- if (cpu_map__aggr_cpu_id_is_empty(id))
+ /* aggr_cpu_id__die returns a struct with socket and die set. */
+ id = aggr_cpu_id__die(cpu, data);
+ if (aggr_cpu_id__is_empty(&id))
return id;
/*
* core_id is relative to socket and die, we need a global id.
* So we combine the result from cpu_map__get_die with the core id
*/
- id.core = cpu;
+ id.core = core;
return id;
+
}
-struct aggr_cpu_id cpu_map__get_node(struct perf_cpu_map *map, int idx, void *data __maybe_unused)
+struct aggr_cpu_id aggr_cpu_id__cpu(struct perf_cpu cpu, void *data)
{
- struct aggr_cpu_id id = cpu_map__empty_aggr_cpu_id();
+ struct aggr_cpu_id id;
- if (idx < 0 || idx >= map->nr)
+ /* aggr_cpu_id__core returns a struct with socket, die and core set. */
+ id = aggr_cpu_id__core(cpu, data);
+ if (aggr_cpu_id__is_empty(&id))
return id;
- id.node = cpu_map__get_node_id(map->map[idx]);
+ id.cpu = cpu;
return id;
-}
-int cpu_map__build_socket_map(struct perf_cpu_map *cpus, struct cpu_aggr_map **sockp)
-{
- return cpu_map__build_map(cpus, sockp, cpu_map__get_socket, NULL);
}
-int cpu_map__build_die_map(struct perf_cpu_map *cpus, struct cpu_aggr_map **diep)
+struct aggr_cpu_id aggr_cpu_id__node(struct perf_cpu cpu, void *data __maybe_unused)
{
- return cpu_map__build_map(cpus, diep, cpu_map__get_die, NULL);
-}
-
-int cpu_map__build_core_map(struct perf_cpu_map *cpus, struct cpu_aggr_map **corep)
-{
- return cpu_map__build_map(cpus, corep, cpu_map__get_core, NULL);
-}
+ struct aggr_cpu_id id = aggr_cpu_id__empty();
-int cpu_map__build_node_map(struct perf_cpu_map *cpus, struct cpu_aggr_map **numap)
-{
- return cpu_map__build_map(cpus, numap, cpu_map__get_node, NULL);
+ id.node = cpu__get_node(cpu);
+ return id;
}
/* setup simple routines to easily access node numbers given a cpu number */
@@ -335,8 +319,8 @@ static void set_max_cpu_num(void)
int ret = -1;
/* set up default */
- max_cpu_num = 4096;
- max_present_cpu_num = 4096;
+ max_cpu_num.cpu = 4096;
+ max_present_cpu_num.cpu = 4096;
mnt = sysfs__mountpoint();
if (!mnt)
@@ -349,7 +333,7 @@ static void set_max_cpu_num(void)
goto out;
}
- ret = get_max_num(path, &max_cpu_num);
+ ret = get_max_num(path, &max_cpu_num.cpu);
if (ret)
goto out;
@@ -360,11 +344,11 @@ static void set_max_cpu_num(void)
goto out;
}
- ret = get_max_num(path, &max_present_cpu_num);
+ ret = get_max_num(path, &max_present_cpu_num.cpu);
out:
if (ret)
- pr_err("Failed to read max cpus, using default of %d\n", max_cpu_num);
+ pr_err("Failed to read max cpus, using default of %d\n", max_cpu_num.cpu);
}
/* Determine highest possible node in the system for sparse allocation */
@@ -403,31 +387,31 @@ int cpu__max_node(void)
return max_node_num;
}
-int cpu__max_cpu(void)
+struct perf_cpu cpu__max_cpu(void)
{
- if (unlikely(!max_cpu_num))
+ if (unlikely(!max_cpu_num.cpu))
set_max_cpu_num();
return max_cpu_num;
}
-int cpu__max_present_cpu(void)
+struct perf_cpu cpu__max_present_cpu(void)
{
- if (unlikely(!max_present_cpu_num))
+ if (unlikely(!max_present_cpu_num.cpu))
set_max_cpu_num();
return max_present_cpu_num;
}
-int cpu__get_node(int cpu)
+int cpu__get_node(struct perf_cpu cpu)
{
if (unlikely(cpunode_map == NULL)) {
pr_debug("cpu_map not initialized\n");
return -1;
}
- return cpunode_map[cpu];
+ return cpunode_map[cpu.cpu];
}
static int init_cpunode_map(void)
@@ -437,13 +421,13 @@ static int init_cpunode_map(void)
set_max_cpu_num();
set_max_node_num();
- cpunode_map = calloc(max_cpu_num, sizeof(int));
+ cpunode_map = calloc(max_cpu_num.cpu, sizeof(int));
if (!cpunode_map) {
pr_err("%s: calloc failed\n", __func__);
return -1;
}
- for (i = 0; i < max_cpu_num; i++)
+ for (i = 0; i < max_cpu_num.cpu; i++)
cpunode_map[i] = -1;
return 0;
@@ -502,47 +486,39 @@ int cpu__setup_cpunode_map(void)
return 0;
}
-bool cpu_map__has(struct perf_cpu_map *cpus, int cpu)
-{
- return perf_cpu_map__idx(cpus, cpu) != -1;
-}
-
-int cpu_map__cpu(struct perf_cpu_map *cpus, int idx)
-{
- return cpus->map[idx];
-}
-
size_t cpu_map__snprint(struct perf_cpu_map *map, char *buf, size_t size)
{
- int i, cpu, start = -1;
+ int i, start = -1;
bool first = true;
size_t ret = 0;
#define COMMA first ? "" : ","
for (i = 0; i < map->nr + 1; i++) {
+ struct perf_cpu cpu = { .cpu = INT_MAX };
bool last = i == map->nr;
- cpu = last ? INT_MAX : map->map[i];
+ if (!last)
+ cpu = map->map[i];
if (start == -1) {
start = i;
if (last) {
ret += snprintf(buf + ret, size - ret,
"%s%d", COMMA,
- map->map[i]);
+ map->map[i].cpu);
}
- } else if (((i - start) != (cpu - map->map[start])) || last) {
+ } else if (((i - start) != (cpu.cpu - map->map[start].cpu)) || last) {
int end = i - 1;
if (start == end) {
ret += snprintf(buf + ret, size - ret,
"%s%d", COMMA,
- map->map[start]);
+ map->map[start].cpu);
} else {
ret += snprintf(buf + ret, size - ret,
"%s%d-%d", COMMA,
- map->map[start], map->map[end]);
+ map->map[start].cpu, map->map[end].cpu);
}
first = false;
start = i;
@@ -569,23 +545,23 @@ size_t cpu_map__snprint_mask(struct perf_cpu_map *map, char *buf, size_t size)
int i, cpu;
char *ptr = buf;
unsigned char *bitmap;
- int last_cpu = cpu_map__cpu(map, map->nr - 1);
+ struct perf_cpu last_cpu = perf_cpu_map__cpu(map, map->nr - 1);
if (buf == NULL)
return 0;
- bitmap = zalloc(last_cpu / 8 + 1);
+ bitmap = zalloc(last_cpu.cpu / 8 + 1);
if (bitmap == NULL) {
buf[0] = '\0';
return 0;
}
for (i = 0; i < map->nr; i++) {
- cpu = cpu_map__cpu(map, i);
+ cpu = perf_cpu_map__cpu(map, i).cpu;
bitmap[cpu / 8] |= 1 << (cpu % 8);
}
- for (cpu = last_cpu / 4 * 4; cpu >= 0; cpu -= 4) {
+ for (cpu = last_cpu.cpu / 4 * 4; cpu >= 0; cpu -= 4) {
unsigned char bits = bitmap[cpu / 8];
if (cpu % 8)
@@ -614,32 +590,35 @@ const struct perf_cpu_map *cpu_map__online(void) /* thread unsafe */
return online;
}
-bool cpu_map__compare_aggr_cpu_id(struct aggr_cpu_id a, struct aggr_cpu_id b)
+bool aggr_cpu_id__equal(const struct aggr_cpu_id *a, const struct aggr_cpu_id *b)
{
- return a.thread == b.thread &&
- a.node == b.node &&
- a.socket == b.socket &&
- a.die == b.die &&
- a.core == b.core;
+ return a->thread == b->thread &&
+ a->node == b->node &&
+ a->socket == b->socket &&
+ a->die == b->die &&
+ a->core == b->core &&
+ a->cpu.cpu == b->cpu.cpu;
}
-bool cpu_map__aggr_cpu_id_is_empty(struct aggr_cpu_id a)
+bool aggr_cpu_id__is_empty(const struct aggr_cpu_id *a)
{
- return a.thread == -1 &&
- a.node == -1 &&
- a.socket == -1 &&
- a.die == -1 &&
- a.core == -1;
+ return a->thread == -1 &&
+ a->node == -1 &&
+ a->socket == -1 &&
+ a->die == -1 &&
+ a->core == -1 &&
+ a->cpu.cpu == -1;
}
-struct aggr_cpu_id cpu_map__empty_aggr_cpu_id(void)
+struct aggr_cpu_id aggr_cpu_id__empty(void)
{
struct aggr_cpu_id ret = {
.thread = -1,
.node = -1,
.socket = -1,
.die = -1,
- .core = -1
+ .core = -1,
+ .cpu = (struct perf_cpu){ .cpu = -1 },
};
return ret;
}
diff --git a/tools/perf/util/cpumap.h b/tools/perf/util/cpumap.h
index a27eeaf086e8..703ae6d3386e 100644
--- a/tools/perf/util/cpumap.h
+++ b/tools/perf/util/cpumap.h
@@ -2,71 +2,134 @@
#ifndef __PERF_CPUMAP_H
#define __PERF_CPUMAP_H
-#include <stdio.h>
#include <stdbool.h>
+#include <stdio.h>
#include <internal/cpumap.h>
#include <perf/cpumap.h>
+/** Identify where counts are aggregated, -1 implies not to aggregate. */
struct aggr_cpu_id {
+ /** A value in the range 0 to number of threads. */
int thread;
+ /** The numa node X as read from /sys/devices/system/node/nodeX. */
int node;
+ /**
+ * The socket number as read from
+ * /sys/devices/system/cpu/cpuX/topology/physical_package_id.
+ */
int socket;
+ /** The die id as read from /sys/devices/system/cpu/cpuX/topology/die_id. */
int die;
+ /** The core id as read from /sys/devices/system/cpu/cpuX/topology/core_id. */
int core;
+ /** CPU aggregation, note there is one CPU for each SMT thread. */
+ struct perf_cpu cpu;
};
+/** A collection of aggr_cpu_id values, the "built" version is sorted and uniqued. */
struct cpu_aggr_map {
refcount_t refcnt;
+ /** Number of valid entries. */
int nr;
+ /** The entries. */
struct aggr_cpu_id map[];
};
struct perf_record_cpu_map_data;
struct perf_cpu_map *perf_cpu_map__empty_new(int nr);
-struct cpu_aggr_map *cpu_aggr_map__empty_new(int nr);
struct perf_cpu_map *cpu_map__new_data(struct perf_record_cpu_map_data *data);
size_t cpu_map__snprint(struct perf_cpu_map *map, char *buf, size_t size);
size_t cpu_map__snprint_mask(struct perf_cpu_map *map, char *buf, size_t size);
size_t cpu_map__fprintf(struct perf_cpu_map *map, FILE *fp);
-int cpu_map__get_socket_id(int cpu);
-struct aggr_cpu_id cpu_map__get_socket(struct perf_cpu_map *map, int idx, void *data);
-int cpu_map__get_die_id(int cpu);
-struct aggr_cpu_id cpu_map__get_die(struct perf_cpu_map *map, int idx, void *data);
-int cpu_map__get_core_id(int cpu);
-struct aggr_cpu_id cpu_map__get_core(struct perf_cpu_map *map, int idx, void *data);
-int cpu_map__get_node_id(int cpu);
-struct aggr_cpu_id cpu_map__get_node(struct perf_cpu_map *map, int idx, void *data);
-int cpu_map__build_socket_map(struct perf_cpu_map *cpus, struct cpu_aggr_map **sockp);
-int cpu_map__build_die_map(struct perf_cpu_map *cpus, struct cpu_aggr_map **diep);
-int cpu_map__build_core_map(struct perf_cpu_map *cpus, struct cpu_aggr_map **corep);
-int cpu_map__build_node_map(struct perf_cpu_map *cpus, struct cpu_aggr_map **nodep);
const struct perf_cpu_map *cpu_map__online(void); /* thread unsafe */
-static inline int cpu_map__socket(struct perf_cpu_map *sock, int s)
+int cpu__setup_cpunode_map(void);
+
+int cpu__max_node(void);
+struct perf_cpu cpu__max_cpu(void);
+struct perf_cpu cpu__max_present_cpu(void);
+
+/**
+ * cpu_map__is_dummy - Events associated with a pid, rather than a CPU, use a single dummy map with an entry of -1.
+ */
+static inline bool cpu_map__is_dummy(struct perf_cpu_map *cpus)
{
- if (!sock || s > sock->nr || s < 0)
- return 0;
- return sock->map[s];
+ return perf_cpu_map__nr(cpus) == 1 && perf_cpu_map__cpu(cpus, 0).cpu == -1;
}
-int cpu__setup_cpunode_map(void);
+/**
+ * cpu__get_node - Returns the numa node X as read from
+ * /sys/devices/system/node/nodeX for the given CPU.
+ */
+int cpu__get_node(struct perf_cpu cpu);
+/**
+ * cpu__get_socket_id - Returns the socket number as read from
+ * /sys/devices/system/cpu/cpuX/topology/physical_package_id for the given CPU.
+ */
+int cpu__get_socket_id(struct perf_cpu cpu);
+/**
+ * cpu__get_die_id - Returns the die id as read from
+ * /sys/devices/system/cpu/cpuX/topology/die_id for the given CPU.
+ */
+int cpu__get_die_id(struct perf_cpu cpu);
+/**
+ * cpu__get_core_id - Returns the core id as read from
+ * /sys/devices/system/cpu/cpuX/topology/core_id for the given CPU.
+ */
+int cpu__get_core_id(struct perf_cpu cpu);
-int cpu__max_node(void);
-int cpu__max_cpu(void);
-int cpu__max_present_cpu(void);
-int cpu__get_node(int cpu);
+/**
+ * cpu_aggr_map__empty_new - Create a cpu_aggr_map of size nr with every entry
+ * being empty.
+ */
+struct cpu_aggr_map *cpu_aggr_map__empty_new(int nr);
+
+typedef struct aggr_cpu_id (*aggr_cpu_id_get_t)(struct perf_cpu cpu, void *data);
+
+/**
+ * cpu_aggr_map__new - Create a cpu_aggr_map with an aggr_cpu_id for each cpu in
+ * cpus. The aggr_cpu_id is created with 'get_id' that may have a data value
+ * passed to it. The cpu_aggr_map is sorted with duplicate values removed.
+ */
+struct cpu_aggr_map *cpu_aggr_map__new(const struct perf_cpu_map *cpus,
+ aggr_cpu_id_get_t get_id,
+ void *data);
-int cpu_map__build_map(struct perf_cpu_map *cpus, struct cpu_aggr_map **res,
- struct aggr_cpu_id (*f)(struct perf_cpu_map *map, int cpu, void *data),
- void *data);
+bool aggr_cpu_id__equal(const struct aggr_cpu_id *a, const struct aggr_cpu_id *b);
+bool aggr_cpu_id__is_empty(const struct aggr_cpu_id *a);
+struct aggr_cpu_id aggr_cpu_id__empty(void);
-int cpu_map__cpu(struct perf_cpu_map *cpus, int idx);
-bool cpu_map__has(struct perf_cpu_map *cpus, int cpu);
-bool cpu_map__compare_aggr_cpu_id(struct aggr_cpu_id a, struct aggr_cpu_id b);
-bool cpu_map__aggr_cpu_id_is_empty(struct aggr_cpu_id a);
-struct aggr_cpu_id cpu_map__empty_aggr_cpu_id(void);
+/**
+ * aggr_cpu_id__socket - Create an aggr_cpu_id with the socket populated with
+ * the socket for cpu. The function signature is compatible with
+ * aggr_cpu_id_get_t.
+ */
+struct aggr_cpu_id aggr_cpu_id__socket(struct perf_cpu cpu, void *data);
+/**
+ * aggr_cpu_id__die - Create an aggr_cpu_id with the die and socket populated
+ * with the die and socket for cpu. The function signature is compatible with
+ * aggr_cpu_id_get_t.
+ */
+struct aggr_cpu_id aggr_cpu_id__die(struct perf_cpu cpu, void *data);
+/**
+ * aggr_cpu_id__core - Create an aggr_cpu_id with the core, die and socket
+ * populated with the core, die and socket for cpu. The function signature is
+ * compatible with aggr_cpu_id_get_t.
+ */
+struct aggr_cpu_id aggr_cpu_id__core(struct perf_cpu cpu, void *data);
+/**
+ * aggr_cpu_id__core - Create an aggr_cpu_id with the cpu, core, die and socket
+ * populated with the cpu, core, die and socket for cpu. The function signature
+ * is compatible with aggr_cpu_id_get_t.
+ */
+struct aggr_cpu_id aggr_cpu_id__cpu(struct perf_cpu cpu, void *data);
+/**
+ * aggr_cpu_id__node - Create an aggr_cpu_id with the numa node populated for
+ * cpu. The function signature is compatible with aggr_cpu_id_get_t.
+ */
+struct aggr_cpu_id aggr_cpu_id__node(struct perf_cpu cpu, void *data);
#endif /* __PERF_CPUMAP_H */
diff --git a/tools/perf/util/cputopo.c b/tools/perf/util/cputopo.c
index 51b429c86f98..d275d843c155 100644
--- a/tools/perf/util/cputopo.c
+++ b/tools/perf/util/cputopo.c
@@ -165,7 +165,8 @@ static bool has_die_topology(void)
if (uname(&uts) < 0)
return false;
- if (strncmp(uts.machine, "x86_64", 6))
+ if (strncmp(uts.machine, "x86_64", 6) &&
+ strncmp(uts.machine, "s390x", 5))
return false;
scnprintf(filename, MAXPATHLEN, DIE_CPUS_FMT,
@@ -187,7 +188,7 @@ struct cpu_topology *cpu_topology__new(void)
struct perf_cpu_map *map;
bool has_die = has_die_topology();
- ncpus = cpu__max_present_cpu();
+ ncpus = cpu__max_present_cpu().cpu;
/* build online CPU map */
map = perf_cpu_map__new(NULL);
@@ -218,7 +219,7 @@ struct cpu_topology *cpu_topology__new(void)
tp->core_cpus_list = addr;
for (i = 0; i < nr; i++) {
- if (!cpu_map__has(map, i))
+ if (!perf_cpu_map__has(map, (struct perf_cpu){ .cpu = i }))
continue;
ret = build_cpu_topology(tp, i);
@@ -324,7 +325,7 @@ struct numa_topology *numa_topology__new(void)
if (!node_map)
goto out;
- nr = (u32) node_map->nr;
+ nr = (u32) perf_cpu_map__nr(node_map);
tp = zalloc(sizeof(*tp) + sizeof(tp->nodes[0])*nr);
if (!tp)
@@ -333,7 +334,7 @@ struct numa_topology *numa_topology__new(void)
tp->nr = nr;
for (i = 0; i < nr; i++) {
- if (load_numa_node(&tp->nodes[i], node_map->map[i])) {
+ if (load_numa_node(&tp->nodes[i], perf_cpu_map__cpu(node_map, i).cpu)) {
numa_topology__delete(tp);
tp = NULL;
break;
diff --git a/tools/perf/util/cs-etm.c b/tools/perf/util/cs-etm.c
index 4f672f7d008c..8b95fb3c4d7b 100644
--- a/tools/perf/util/cs-etm.c
+++ b/tools/perf/util/cs-etm.c
@@ -50,8 +50,6 @@ struct cs_etm_auxtrace {
u8 timeless_decoding;
u8 snapshot_mode;
u8 data_queued;
- u8 sample_branches;
- u8 sample_instructions;
int num_cpu;
u64 latest_kernel_timestamp;
@@ -410,8 +408,8 @@ static void cs_etm__packet_swap(struct cs_etm_auxtrace *etm,
{
struct cs_etm_packet *tmp;
- if (etm->sample_branches || etm->synth_opts.last_branch ||
- etm->sample_instructions) {
+ if (etm->synth_opts.branches || etm->synth_opts.last_branch ||
+ etm->synth_opts.instructions) {
/*
* Swap PACKET with PREV_PACKET: PACKET becomes PREV_PACKET for
* the next incoming packet.
@@ -1365,7 +1363,6 @@ static int cs_etm__synth_events(struct cs_etm_auxtrace *etm,
err = cs_etm__synth_event(session, &attr, id);
if (err)
return err;
- etm->sample_branches = true;
etm->branches_sample_type = attr.sample_type;
etm->branches_id = id;
id += 1;
@@ -1389,7 +1386,6 @@ static int cs_etm__synth_events(struct cs_etm_auxtrace *etm,
err = cs_etm__synth_event(session, &attr, id);
if (err)
return err;
- etm->sample_instructions = true;
etm->instructions_sample_type = attr.sample_type;
etm->instructions_id = id;
id += 1;
@@ -1420,7 +1416,7 @@ static int cs_etm__sample(struct cs_etm_queue *etmq,
tidq->prev_packet->last_instr_taken_branch)
cs_etm__update_last_branch_rb(etmq, tidq);
- if (etm->sample_instructions &&
+ if (etm->synth_opts.instructions &&
tidq->period_instructions >= etm->instructions_sample_period) {
/*
* Emit instruction sample periodically
@@ -1503,7 +1499,7 @@ static int cs_etm__sample(struct cs_etm_queue *etmq,
}
}
- if (etm->sample_branches) {
+ if (etm->synth_opts.branches) {
bool generate_sample = false;
/* Generate sample for tracing on packet */
@@ -1557,6 +1553,7 @@ static int cs_etm__flush(struct cs_etm_queue *etmq,
goto swap_packet;
if (etmq->etm->synth_opts.last_branch &&
+ etmq->etm->synth_opts.instructions &&
tidq->prev_packet->sample_type == CS_ETM_RANGE) {
u64 addr;
@@ -1582,7 +1579,7 @@ static int cs_etm__flush(struct cs_etm_queue *etmq,
}
- if (etm->sample_branches &&
+ if (etm->synth_opts.branches &&
tidq->prev_packet->sample_type == CS_ETM_RANGE) {
err = cs_etm__synth_branch_sample(etmq, tidq);
if (err)
@@ -1614,6 +1611,7 @@ static int cs_etm__end_block(struct cs_etm_queue *etmq,
* the trace.
*/
if (etmq->etm->synth_opts.last_branch &&
+ etmq->etm->synth_opts.instructions &&
tidq->prev_packet->sample_type == CS_ETM_RANGE) {
u64 addr;
diff --git a/tools/perf/util/data-convert-bt.c b/tools/perf/util/data-convert-bt.c
index 8f7705bbc2da..9e0aee276df8 100644
--- a/tools/perf/util/data-convert-bt.c
+++ b/tools/perf/util/data-convert-bt.c
@@ -318,6 +318,8 @@ static int add_tracepoint_field_value(struct ctf_writer *cw,
offset = tmp_val;
len = offset >> 16;
offset &= 0xffff;
+ if (flags & TEP_FIELD_IS_RELATIVE)
+ offset += fmtf->offset + fmtf->size;
}
if (flags & TEP_FIELD_IS_ARRAY) {
diff --git a/tools/perf/util/data.c b/tools/perf/util/data.c
index f5d260b1df4d..caabeac24c69 100644
--- a/tools/perf/util/data.c
+++ b/tools/perf/util/data.c
@@ -44,24 +44,27 @@ int perf_data__create_dir(struct perf_data *data, int nr)
if (!files)
return -ENOMEM;
- data->dir.version = PERF_DIR_VERSION;
- data->dir.files = files;
- data->dir.nr = nr;
-
for (i = 0; i < nr; i++) {
struct perf_data_file *file = &files[i];
ret = asprintf(&file->path, "%s/data.%d", data->path, i);
- if (ret < 0)
+ if (ret < 0) {
+ ret = -ENOMEM;
goto out_err;
+ }
ret = open(file->path, O_RDWR|O_CREAT|O_TRUNC, S_IRUSR|S_IWUSR);
- if (ret < 0)
+ if (ret < 0) {
+ ret = -errno;
goto out_err;
+ }
file->fd = ret;
}
+ data->dir.version = PERF_DIR_VERSION;
+ data->dir.files = files;
+ data->dir.nr = nr;
return 0;
out_err:
@@ -476,6 +479,20 @@ int perf_data__make_kcore_dir(struct perf_data *data, char *buf, size_t buf_sz)
return mkdir(buf, S_IRWXU);
}
+bool has_kcore_dir(const char *path)
+{
+ char *kcore_dir;
+ int ret;
+
+ if (asprintf(&kcore_dir, "%s/kcore_dir", path) < 0)
+ return false;
+
+ ret = access(kcore_dir, F_OK);
+
+ free(kcore_dir);
+ return !ret;
+}
+
char *perf_data__kallsyms_name(struct perf_data *data)
{
char *kallsyms_name;
diff --git a/tools/perf/util/data.h b/tools/perf/util/data.h
index c9de82af5584..7de53d6e2d7f 100644
--- a/tools/perf/util/data.h
+++ b/tools/perf/util/data.h
@@ -4,6 +4,7 @@
#include <stdio.h>
#include <stdbool.h>
+#include <linux/types.h>
enum perf_data_mode {
PERF_DATA_MODE_WRITE,
@@ -98,6 +99,7 @@ void perf_data__close_dir(struct perf_data *data);
int perf_data__update_dir(struct perf_data *data);
unsigned long perf_data__size(struct perf_data *data);
int perf_data__make_kcore_dir(struct perf_data *data, char *buf, size_t buf_sz);
+bool has_kcore_dir(const char *path);
char *perf_data__kallsyms_name(struct perf_data *data);
bool is_perf_data(const char *path);
#endif /* __PERF_DATA_H */
diff --git a/tools/perf/util/debug.c b/tools/perf/util/debug.c
index 2c06abf6dcd2..65e6c22f38e4 100644
--- a/tools/perf/util/debug.c
+++ b/tools/perf/util/debug.c
@@ -179,7 +179,7 @@ static int trace_event_printer(enum binary_printer_ops op,
break;
case BINARY_PRINT_CHAR_DATA:
printed += color_fprintf(fp, color, "%c",
- isprint(ch) ? ch : '.');
+ isprint(ch) && isascii(ch) ? ch : '.');
break;
case BINARY_PRINT_CHAR_PAD:
printed += color_fprintf(fp, color, " ");
diff --git a/tools/perf/util/dso.c b/tools/perf/util/dso.c
index 9cc8a1772b4b..5ac13958d1bd 100644
--- a/tools/perf/util/dso.c
+++ b/tools/perf/util/dso.c
@@ -508,8 +508,19 @@ static int __open_dso(struct dso *dso, struct machine *machine)
root_dir, name, PATH_MAX))
goto out;
- if (!is_regular_file(name))
- goto out;
+ if (!is_regular_file(name)) {
+ char *new_name;
+
+ if (errno != ENOENT || dso->nsinfo == NULL)
+ goto out;
+
+ new_name = filename_with_chroot(dso->nsinfo->pid, name);
+ if (!new_name)
+ goto out;
+
+ free(name);
+ name = new_name;
+ }
if (dso__needs_decompress(dso)) {
char newpath[KMOD_DECOMP_LEN];
diff --git a/tools/perf/util/dso.h b/tools/perf/util/dso.h
index 011da3924fc1..97047a11282b 100644
--- a/tools/perf/util/dso.h
+++ b/tools/perf/util/dso.h
@@ -167,6 +167,7 @@ struct dso {
enum dso_load_errno load_errno;
u8 adjust_symbols:1;
u8 has_build_id:1;
+ u8 header_build_id:1;
u8 has_srcline:1;
u8 hit:1;
u8 annotate_warned:1;
@@ -195,7 +196,9 @@ struct dso {
u32 status_seen;
u64 file_size;
struct list_head open_entry;
+ u64 elf_base_addr;
u64 debug_frame_offset;
+ u64 eh_frame_hdr_addr;
u64 eh_frame_hdr_offset;
} data;
/* bpf prog information */
diff --git a/tools/perf/util/dsos.c b/tools/perf/util/dsos.c
index 183a81d5b2f9..b97366f77bbf 100644
--- a/tools/perf/util/dsos.c
+++ b/tools/perf/util/dsos.c
@@ -2,12 +2,15 @@
#include "debug.h"
#include "dsos.h"
#include "dso.h"
+#include "util.h"
#include "vdso.h"
#include "namespaces.h"
+#include <errno.h>
#include <libgen.h>
#include <stdlib.h>
#include <string.h>
#include <symbol.h> // filename__read_build_id
+#include <unistd.h>
static int __dso_id__cmp(struct dso_id *a, struct dso_id *b)
{
@@ -76,6 +79,16 @@ bool __dsos__read_build_ids(struct list_head *head, bool with_hits)
if (filename__read_build_id(pos->long_name, &pos->bid) > 0) {
have_build_id = true;
pos->has_build_id = true;
+ } else if (errno == ENOENT && pos->nsinfo) {
+ char *new_name = filename_with_chroot(pos->nsinfo->pid,
+ pos->long_name);
+
+ if (new_name && filename__read_build_id(new_name,
+ &pos->bid) > 0) {
+ have_build_id = true;
+ pos->has_build_id = true;
+ }
+ free(new_name);
}
nsinfo__mountns_exit(&nsc);
}
diff --git a/tools/perf/util/env.c b/tools/perf/util/env.c
index b9904896eb97..579e44c59914 100644
--- a/tools/perf/util/env.c
+++ b/tools/perf/util/env.c
@@ -285,13 +285,13 @@ out_enomem:
int perf_env__read_cpu_topology_map(struct perf_env *env)
{
- int cpu, nr_cpus;
+ int idx, nr_cpus;
if (env->cpu != NULL)
return 0;
if (env->nr_cpus_avail == 0)
- env->nr_cpus_avail = cpu__max_present_cpu();
+ env->nr_cpus_avail = cpu__max_present_cpu().cpu;
nr_cpus = env->nr_cpus_avail;
if (nr_cpus == -1)
@@ -301,10 +301,12 @@ int perf_env__read_cpu_topology_map(struct perf_env *env)
if (env->cpu == NULL)
return -ENOMEM;
- for (cpu = 0; cpu < nr_cpus; ++cpu) {
- env->cpu[cpu].core_id = cpu_map__get_core_id(cpu);
- env->cpu[cpu].socket_id = cpu_map__get_socket_id(cpu);
- env->cpu[cpu].die_id = cpu_map__get_die_id(cpu);
+ for (idx = 0; idx < nr_cpus; ++idx) {
+ struct perf_cpu cpu = { .cpu = idx };
+
+ env->cpu[idx].core_id = cpu__get_core_id(cpu);
+ env->cpu[idx].socket_id = cpu__get_socket_id(cpu);
+ env->cpu[idx].die_id = cpu__get_die_id(cpu);
}
env->nr_cpus_avail = nr_cpus;
@@ -381,7 +383,7 @@ static int perf_env__read_arch(struct perf_env *env)
static int perf_env__read_nr_cpus_avail(struct perf_env *env)
{
if (env->nr_cpus_avail == 0)
- env->nr_cpus_avail = cpu__max_present_cpu();
+ env->nr_cpus_avail = cpu__max_present_cpu().cpu;
return env->nr_cpus_avail ? 0 : -ENOENT;
}
@@ -487,7 +489,7 @@ const char *perf_env__pmu_mappings(struct perf_env *env)
return env->pmu_mappings;
}
-int perf_env__numa_node(struct perf_env *env, int cpu)
+int perf_env__numa_node(struct perf_env *env, struct perf_cpu cpu)
{
if (!env->nr_numa_map) {
struct numa_node *nn;
@@ -495,7 +497,7 @@ int perf_env__numa_node(struct perf_env *env, int cpu)
for (i = 0; i < env->nr_numa_nodes; i++) {
nn = &env->numa_nodes[i];
- nr = max(nr, perf_cpu_map__max(nn->map));
+ nr = max(nr, perf_cpu_map__max(nn->map).cpu);
}
nr++;
@@ -514,13 +516,14 @@ int perf_env__numa_node(struct perf_env *env, int cpu)
env->nr_numa_map = nr;
for (i = 0; i < env->nr_numa_nodes; i++) {
- int tmp, j;
+ struct perf_cpu tmp;
+ int j;
nn = &env->numa_nodes[i];
- perf_cpu_map__for_each_cpu(j, tmp, nn->map)
- env->numa_map[j] = i;
+ perf_cpu_map__for_each_cpu(tmp, j, nn->map)
+ env->numa_map[tmp.cpu] = i;
}
}
- return cpu >= 0 && cpu < env->nr_numa_map ? env->numa_map[cpu] : -1;
+ return cpu.cpu >= 0 && cpu.cpu < env->nr_numa_map ? env->numa_map[cpu.cpu] : -1;
}
diff --git a/tools/perf/util/env.h b/tools/perf/util/env.h
index 163e5ec503a2..a3541f98e1fc 100644
--- a/tools/perf/util/env.h
+++ b/tools/perf/util/env.h
@@ -4,6 +4,7 @@
#include <linux/types.h>
#include <linux/rbtree.h>
+#include "cpumap.h"
#include "rwsem.h"
struct perf_cpu_map;
@@ -170,5 +171,5 @@ struct bpf_prog_info_node *perf_env__find_bpf_prog_info(struct perf_env *env,
bool perf_env__insert_btf(struct perf_env *env, struct btf_node *btf_node);
struct btf_node *perf_env__find_btf(struct perf_env *env, __u32 btf_id);
-int perf_env__numa_node(struct perf_env *env, int cpu);
+int perf_env__numa_node(struct perf_env *env, struct perf_cpu cpu);
#endif /* __PERF_ENV_H */
diff --git a/tools/perf/util/event.c b/tools/perf/util/event.c
index fe24801f8e9f..0476bb3a4188 100644
--- a/tools/perf/util/event.c
+++ b/tools/perf/util/event.c
@@ -484,7 +484,7 @@ size_t perf_event__fprintf_text_poke(union perf_event *event, struct machine *ma
if (machine) {
struct addr_location al;
- al.map = maps__find(&machine->kmaps, tp->addr);
+ al.map = maps__find(machine__kernel_maps(machine), tp->addr);
if (al.map && map__load(al.map) >= 0) {
al.addr = al.map->map_ip(al.map, tp->addr);
al.sym = map__find_symbol(al.map, al.addr);
@@ -587,13 +587,13 @@ struct map *thread__find_map(struct thread *thread, u8 cpumode, u64 addr,
if (cpumode == PERF_RECORD_MISC_KERNEL && perf_host) {
al->level = 'k';
- al->maps = maps = &machine->kmaps;
+ al->maps = maps = machine__kernel_maps(machine);
load_map = true;
} else if (cpumode == PERF_RECORD_MISC_USER && perf_host) {
al->level = '.';
} else if (cpumode == PERF_RECORD_MISC_GUEST_KERNEL && perf_guest) {
al->level = 'g';
- al->maps = maps = &machine->kmaps;
+ al->maps = maps = machine__kernel_maps(machine);
load_map = true;
} else if (cpumode == PERF_RECORD_MISC_GUEST_USER && perf_guest) {
al->level = 'u';
@@ -683,9 +683,12 @@ static bool check_address_range(struct intlist *addr_list, int addr_range,
int machine__resolve(struct machine *machine, struct addr_location *al,
struct perf_sample *sample)
{
- struct thread *thread = machine__findnew_thread(machine, sample->pid,
- sample->tid);
+ struct thread *thread;
+ if (symbol_conf.guest_code && !machine__is_host(machine))
+ thread = machine__findnew_guest_code(machine, sample->pid);
+ else
+ thread = machine__findnew_thread(machine, sample->pid, sample->tid);
if (thread == NULL)
return -1;
diff --git a/tools/perf/util/event.h b/tools/perf/util/event.h
index c59331eea1d9..cdd72e05fd28 100644
--- a/tools/perf/util/event.h
+++ b/tools/perf/util/event.h
@@ -101,9 +101,11 @@ enum {
PERF_IP_FLAG_IN_TX = 1ULL << 10,
PERF_IP_FLAG_VMENTRY = 1ULL << 11,
PERF_IP_FLAG_VMEXIT = 1ULL << 12,
+ PERF_IP_FLAG_INTR_DISABLE = 1ULL << 13,
+ PERF_IP_FLAG_INTR_TOGGLE = 1ULL << 14,
};
-#define PERF_IP_FLAG_CHARS "bcrosyiABExgh"
+#define PERF_IP_FLAG_CHARS "bcrosyiABExghDt"
#define PERF_BRANCH_MASK (\
PERF_IP_FLAG_BRANCH |\
@@ -182,6 +184,8 @@ enum perf_synth_id {
PERF_SYNTH_INTEL_PWRX,
PERF_SYNTH_INTEL_CBR,
PERF_SYNTH_INTEL_PSB,
+ PERF_SYNTH_INTEL_EVT,
+ PERF_SYNTH_INTEL_IFLAG_CHG,
};
/*
@@ -280,6 +284,45 @@ struct perf_synth_intel_psb {
u64 offset;
};
+struct perf_synth_intel_evd {
+ union {
+ struct {
+ u8 evd_type;
+ u8 reserved[7];
+ };
+ u64 et;
+ };
+ u64 payload;
+};
+
+/* Intel PT Event Trace */
+struct perf_synth_intel_evt {
+ u32 padding;
+ union {
+ struct {
+ u32 type : 5,
+ reserved : 2,
+ ip : 1,
+ vector : 8,
+ evd_cnt : 16;
+ };
+ u32 cfe;
+ };
+ struct perf_synth_intel_evd evd[0];
+};
+
+struct perf_synth_intel_iflag_chg {
+ u32 padding;
+ union {
+ struct {
+ u32 iflag : 1,
+ via_branch : 1;
+ };
+ u32 flags;
+ };
+ u64 branch_ip; /* If via_branch */
+};
+
/*
* raw_data is always 4 bytes from an 8-byte boundary, so subtract 4 to get
* 8-byte alignment.
diff --git a/tools/perf/util/evlist-hybrid.c b/tools/perf/util/evlist-hybrid.c
index 7c554234b43d..57f02beef023 100644
--- a/tools/perf/util/evlist-hybrid.c
+++ b/tools/perf/util/evlist-hybrid.c
@@ -124,22 +124,23 @@ int evlist__fix_hybrid_cpus(struct evlist *evlist, const char *cpu_list)
events_nr++;
- if (matched_cpus->nr > 0 && (unmatched_cpus->nr > 0 ||
- matched_cpus->nr < cpus->nr ||
- matched_cpus->nr < pmu->cpus->nr)) {
+ if (perf_cpu_map__nr(matched_cpus) > 0 &&
+ (perf_cpu_map__nr(unmatched_cpus) > 0 ||
+ perf_cpu_map__nr(matched_cpus) < perf_cpu_map__nr(cpus) ||
+ perf_cpu_map__nr(matched_cpus) < perf_cpu_map__nr(pmu->cpus))) {
perf_cpu_map__put(evsel->core.cpus);
perf_cpu_map__put(evsel->core.own_cpus);
evsel->core.cpus = perf_cpu_map__get(matched_cpus);
evsel->core.own_cpus = perf_cpu_map__get(matched_cpus);
- if (unmatched_cpus->nr > 0) {
+ if (perf_cpu_map__nr(unmatched_cpus) > 0) {
cpu_map__snprint(matched_cpus, buf1, sizeof(buf1));
pr_warning("WARNING: use %s in '%s' for '%s', skip other cpus in list.\n",
buf1, pmu->name, evsel->name);
}
}
- if (matched_cpus->nr == 0) {
+ if (perf_cpu_map__nr(matched_cpus) == 0) {
evlist__remove(evlist, evsel);
evsel__delete(evsel);
@@ -153,8 +154,8 @@ int evlist__fix_hybrid_cpus(struct evlist *evlist, const char *cpu_list)
perf_cpu_map__put(matched_cpus);
perf_cpu_map__put(unmatched_cpus);
}
-
- ret = (unmatched_count == events_nr) ? -1 : 0;
+ if (events_nr)
+ ret = (unmatched_count == events_nr) ? -1 : 0;
out:
perf_cpu_map__put(cpus);
return ret;
diff --git a/tools/perf/util/evlist.c b/tools/perf/util/evlist.c
index 5f92319ce258..48af7d379d82 100644
--- a/tools/perf/util/evlist.c
+++ b/tools/perf/util/evlist.c
@@ -242,14 +242,20 @@ int __evlist__add_default(struct evlist *evlist, bool precise)
return 0;
}
-int evlist__add_dummy(struct evlist *evlist)
+static struct evsel *evlist__dummy_event(struct evlist *evlist)
{
struct perf_event_attr attr = {
.type = PERF_TYPE_SOFTWARE,
.config = PERF_COUNT_SW_DUMMY,
.size = sizeof(attr), /* to capture ABI version */
};
- struct evsel *evsel = evsel__new_idx(&attr, evlist->core.nr_entries);
+
+ return evsel__new_idx(&attr, evlist->core.nr_entries);
+}
+
+int evlist__add_dummy(struct evlist *evlist)
+{
+ struct evsel *evsel = evlist__dummy_event(evlist);
if (evsel == NULL)
return -ENOMEM;
@@ -258,6 +264,51 @@ int evlist__add_dummy(struct evlist *evlist)
return 0;
}
+static void evlist__add_on_all_cpus(struct evlist *evlist, struct evsel *evsel)
+{
+ evsel->core.system_wide = true;
+
+ /*
+ * All CPUs.
+ *
+ * Note perf_event_open() does not accept CPUs that are not online, so
+ * in fact this CPU list will include only all online CPUs.
+ */
+ perf_cpu_map__put(evsel->core.own_cpus);
+ evsel->core.own_cpus = perf_cpu_map__new(NULL);
+ perf_cpu_map__put(evsel->core.cpus);
+ evsel->core.cpus = perf_cpu_map__get(evsel->core.own_cpus);
+
+ /* No threads */
+ perf_thread_map__put(evsel->core.threads);
+ evsel->core.threads = perf_thread_map__new_dummy();
+
+ evlist__add(evlist, evsel);
+}
+
+struct evsel *evlist__add_aux_dummy(struct evlist *evlist, bool system_wide)
+{
+ struct evsel *evsel = evlist__dummy_event(evlist);
+
+ if (!evsel)
+ return NULL;
+
+ evsel->core.attr.exclude_kernel = 1;
+ evsel->core.attr.exclude_guest = 1;
+ evsel->core.attr.exclude_hv = 1;
+ evsel->core.attr.freq = 0;
+ evsel->core.attr.sample_period = 1;
+ evsel->no_aux_samples = true;
+ evsel->name = strdup("dummy:u");
+
+ if (system_wide)
+ evlist__add_on_all_cpus(evlist, evsel);
+ else
+ evlist__add(evlist, evsel);
+
+ return evsel;
+}
+
static int evlist__add_attrs(struct evlist *evlist, struct perf_event_attr *attrs, size_t nr_attrs)
{
struct evsel *evsel, *n;
@@ -334,44 +385,71 @@ int evlist__add_newtp(struct evlist *evlist, const char *sys, const char *name,
return 0;
}
-static int evlist__nr_threads(struct evlist *evlist, struct evsel *evsel)
-{
- if (evsel->core.system_wide)
- return 1;
- else
- return perf_thread_map__nr(evlist->core.threads);
-}
-
-void evlist__cpu_iter_start(struct evlist *evlist)
+struct evlist_cpu_iterator evlist__cpu_begin(struct evlist *evlist, struct affinity *affinity)
{
- struct evsel *pos;
-
- /*
- * Reset the per evsel cpu_iter. This is needed because
- * each evsel's cpumap may have a different index space,
- * and some operations need the index to modify
- * the FD xyarray (e.g. open, close)
- */
- evlist__for_each_entry(evlist, pos)
- pos->cpu_iter = 0;
-}
+ struct evlist_cpu_iterator itr = {
+ .container = evlist,
+ .evsel = NULL,
+ .cpu_map_idx = 0,
+ .evlist_cpu_map_idx = 0,
+ .evlist_cpu_map_nr = perf_cpu_map__nr(evlist->core.all_cpus),
+ .cpu = (struct perf_cpu){ .cpu = -1},
+ .affinity = affinity,
+ };
-bool evsel__cpu_iter_skip_no_inc(struct evsel *ev, int cpu)
-{
- if (ev->cpu_iter >= ev->core.cpus->nr)
- return true;
- if (cpu >= 0 && ev->core.cpus->map[ev->cpu_iter] != cpu)
- return true;
- return false;
+ if (evlist__empty(evlist)) {
+ /* Ensure the empty list doesn't iterate. */
+ itr.evlist_cpu_map_idx = itr.evlist_cpu_map_nr;
+ } else {
+ itr.evsel = evlist__first(evlist);
+ if (itr.affinity) {
+ itr.cpu = perf_cpu_map__cpu(evlist->core.all_cpus, 0);
+ affinity__set(itr.affinity, itr.cpu.cpu);
+ itr.cpu_map_idx = perf_cpu_map__idx(itr.evsel->core.cpus, itr.cpu);
+ /*
+ * If this CPU isn't in the evsel's cpu map then advance
+ * through the list.
+ */
+ if (itr.cpu_map_idx == -1)
+ evlist_cpu_iterator__next(&itr);
+ }
+ }
+ return itr;
+}
+
+void evlist_cpu_iterator__next(struct evlist_cpu_iterator *evlist_cpu_itr)
+{
+ while (evlist_cpu_itr->evsel != evlist__last(evlist_cpu_itr->container)) {
+ evlist_cpu_itr->evsel = evsel__next(evlist_cpu_itr->evsel);
+ evlist_cpu_itr->cpu_map_idx =
+ perf_cpu_map__idx(evlist_cpu_itr->evsel->core.cpus,
+ evlist_cpu_itr->cpu);
+ if (evlist_cpu_itr->cpu_map_idx != -1)
+ return;
+ }
+ evlist_cpu_itr->evlist_cpu_map_idx++;
+ if (evlist_cpu_itr->evlist_cpu_map_idx < evlist_cpu_itr->evlist_cpu_map_nr) {
+ evlist_cpu_itr->evsel = evlist__first(evlist_cpu_itr->container);
+ evlist_cpu_itr->cpu =
+ perf_cpu_map__cpu(evlist_cpu_itr->container->core.all_cpus,
+ evlist_cpu_itr->evlist_cpu_map_idx);
+ if (evlist_cpu_itr->affinity)
+ affinity__set(evlist_cpu_itr->affinity, evlist_cpu_itr->cpu.cpu);
+ evlist_cpu_itr->cpu_map_idx =
+ perf_cpu_map__idx(evlist_cpu_itr->evsel->core.cpus,
+ evlist_cpu_itr->cpu);
+ /*
+ * If this CPU isn't in the evsel's cpu map then advance through
+ * the list.
+ */
+ if (evlist_cpu_itr->cpu_map_idx == -1)
+ evlist_cpu_iterator__next(evlist_cpu_itr);
+ }
}
-bool evsel__cpu_iter_skip(struct evsel *ev, int cpu)
+bool evlist_cpu_iterator__end(const struct evlist_cpu_iterator *evlist_cpu_itr)
{
- if (!evsel__cpu_iter_skip_no_inc(ev, cpu)) {
- ev->cpu_iter++;
- return false;
- }
- return true;
+ return evlist_cpu_itr->evlist_cpu_map_idx >= evlist_cpu_itr->evlist_cpu_map_nr;
}
static int evsel__strcmp(struct evsel *pos, char *evsel_name)
@@ -400,37 +478,36 @@ static int evlist__is_enabled(struct evlist *evlist)
static void __evlist__disable(struct evlist *evlist, char *evsel_name)
{
struct evsel *pos;
- struct affinity affinity;
- int cpu, i, imm = 0;
+ struct evlist_cpu_iterator evlist_cpu_itr;
+ struct affinity saved_affinity, *affinity = NULL;
bool has_imm = false;
- if (affinity__setup(&affinity) < 0)
- return;
+ // See explanation in evlist__close()
+ if (!cpu_map__is_dummy(evlist->core.user_requested_cpus)) {
+ if (affinity__setup(&saved_affinity) < 0)
+ return;
+ affinity = &saved_affinity;
+ }
/* Disable 'immediate' events last */
- for (imm = 0; imm <= 1; imm++) {
- evlist__for_each_cpu(evlist, i, cpu) {
- affinity__set(&affinity, cpu);
-
- evlist__for_each_entry(evlist, pos) {
- if (evsel__strcmp(pos, evsel_name))
- continue;
- if (evsel__cpu_iter_skip(pos, cpu))
- continue;
- if (pos->disabled || !evsel__is_group_leader(pos) || !pos->core.fd)
- continue;
- if (pos->immediate)
- has_imm = true;
- if (pos->immediate != imm)
- continue;
- evsel__disable_cpu(pos, pos->cpu_iter - 1);
- }
+ for (int imm = 0; imm <= 1; imm++) {
+ evlist__for_each_cpu(evlist_cpu_itr, evlist, affinity) {
+ pos = evlist_cpu_itr.evsel;
+ if (evsel__strcmp(pos, evsel_name))
+ continue;
+ if (pos->disabled || !evsel__is_group_leader(pos) || !pos->core.fd)
+ continue;
+ if (pos->immediate)
+ has_imm = true;
+ if (pos->immediate != imm)
+ continue;
+ evsel__disable_cpu(pos, evlist_cpu_itr.cpu_map_idx);
}
if (!has_imm)
break;
}
- affinity__cleanup(&affinity);
+ affinity__cleanup(affinity);
evlist__for_each_entry(evlist, pos) {
if (evsel__strcmp(pos, evsel_name))
continue;
@@ -462,26 +539,25 @@ void evlist__disable_evsel(struct evlist *evlist, char *evsel_name)
static void __evlist__enable(struct evlist *evlist, char *evsel_name)
{
struct evsel *pos;
- struct affinity affinity;
- int cpu, i;
-
- if (affinity__setup(&affinity) < 0)
- return;
+ struct evlist_cpu_iterator evlist_cpu_itr;
+ struct affinity saved_affinity, *affinity = NULL;
- evlist__for_each_cpu(evlist, i, cpu) {
- affinity__set(&affinity, cpu);
+ // See explanation in evlist__close()
+ if (!cpu_map__is_dummy(evlist->core.user_requested_cpus)) {
+ if (affinity__setup(&saved_affinity) < 0)
+ return;
+ affinity = &saved_affinity;
+ }
- evlist__for_each_entry(evlist, pos) {
- if (evsel__strcmp(pos, evsel_name))
- continue;
- if (evsel__cpu_iter_skip(pos, cpu))
- continue;
- if (!evsel__is_group_leader(pos) || !pos->core.fd)
- continue;
- evsel__enable_cpu(pos, pos->cpu_iter - 1);
- }
+ evlist__for_each_cpu(evlist_cpu_itr, evlist, affinity) {
+ pos = evlist_cpu_itr.evsel;
+ if (evsel__strcmp(pos, evsel_name))
+ continue;
+ if (!evsel__is_group_leader(pos) || !pos->core.fd)
+ continue;
+ evsel__enable_cpu(pos, evlist_cpu_itr.cpu_map_idx);
}
- affinity__cleanup(&affinity);
+ affinity__cleanup(affinity);
evlist__for_each_entry(evlist, pos) {
if (evsel__strcmp(pos, evsel_name))
continue;
@@ -513,48 +589,6 @@ void evlist__toggle_enable(struct evlist *evlist)
(evlist->enabled ? evlist__disable : evlist__enable)(evlist);
}
-static int evlist__enable_event_cpu(struct evlist *evlist, struct evsel *evsel, int cpu)
-{
- int thread;
- int nr_threads = evlist__nr_threads(evlist, evsel);
-
- if (!evsel->core.fd)
- return -EINVAL;
-
- for (thread = 0; thread < nr_threads; thread++) {
- int err = ioctl(FD(evsel, cpu, thread), PERF_EVENT_IOC_ENABLE, 0);
- if (err)
- return err;
- }
- return 0;
-}
-
-static int evlist__enable_event_thread(struct evlist *evlist, struct evsel *evsel, int thread)
-{
- int cpu;
- int nr_cpus = perf_cpu_map__nr(evlist->core.cpus);
-
- if (!evsel->core.fd)
- return -EINVAL;
-
- for (cpu = 0; cpu < nr_cpus; cpu++) {
- int err = ioctl(FD(evsel, cpu, thread), PERF_EVENT_IOC_ENABLE, 0);
- if (err)
- return err;
- }
- return 0;
-}
-
-int evlist__enable_event_idx(struct evlist *evlist, struct evsel *evsel, int idx)
-{
- bool per_cpu_mmaps = !perf_cpu_map__empty(evlist->core.cpus);
-
- if (per_cpu_mmaps)
- return evlist__enable_event_cpu(evlist, evsel, idx);
-
- return evlist__enable_event_thread(evlist, evsel, idx);
-}
-
int evlist__add_pollfd(struct evlist *evlist, int fd)
{
return perf_evlist__add_pollfd(&evlist->core, fd, NULL, POLLIN, fdarray_flag__default);
@@ -764,13 +798,15 @@ static struct mmap *evlist__alloc_mmap(struct evlist *evlist,
static void
perf_evlist__mmap_cb_idx(struct perf_evlist *_evlist,
+ struct perf_evsel *_evsel,
struct perf_mmap_param *_mp,
- int idx, bool per_cpu)
+ int idx)
{
struct evlist *evlist = container_of(_evlist, struct evlist, core);
struct mmap_params *mp = container_of(_mp, struct mmap_params, core);
+ struct evsel *evsel = container_of(_evsel, struct evsel, core);
- auxtrace_mmap_params__set_idx(&mp->auxtrace_mp, evlist, idx, per_cpu);
+ auxtrace_mmap_params__set_idx(&mp->auxtrace_mp, evlist, evsel, idx);
}
static struct perf_mmap*
@@ -800,7 +836,7 @@ perf_evlist__mmap_cb_get(struct perf_evlist *_evlist, bool overwrite, int idx)
static int
perf_evlist__mmap_cb_mmap(struct perf_mmap *_map, struct perf_mmap_param *_mp,
- int output, int cpu)
+ int output, struct perf_cpu cpu)
{
struct mmap *map = container_of(_map, struct mmap, core);
struct mmap_params *mp = container_of(_mp, struct mmap_params, core);
@@ -1264,14 +1300,15 @@ void evlist__set_selected(struct evlist *evlist, struct evsel *evsel)
void evlist__close(struct evlist *evlist)
{
struct evsel *evsel;
+ struct evlist_cpu_iterator evlist_cpu_itr;
struct affinity affinity;
- int cpu, i;
/*
- * With perf record core.cpus is usually NULL.
+ * With perf record core.user_requested_cpus is usually NULL.
* Use the old method to handle this for now.
*/
- if (!evlist->core.cpus) {
+ if (!evlist->core.user_requested_cpus ||
+ cpu_map__is_dummy(evlist->core.user_requested_cpus)) {
evlist__for_each_entry_reverse(evlist, evsel)
evsel__close(evsel);
return;
@@ -1279,15 +1316,12 @@ void evlist__close(struct evlist *evlist)
if (affinity__setup(&affinity) < 0)
return;
- evlist__for_each_cpu(evlist, i, cpu) {
- affinity__set(&affinity, cpu);
- evlist__for_each_entry_reverse(evlist, evsel) {
- if (evsel__cpu_iter_skip(evsel, cpu))
- continue;
- perf_evsel__close_cpu(&evsel->core, evsel->cpu_iter - 1);
- }
+ evlist__for_each_cpu(evlist_cpu_itr, evlist, &affinity) {
+ perf_evsel__close_cpu(&evlist_cpu_itr.evsel->core,
+ evlist_cpu_itr.cpu_map_idx);
}
+
affinity__cleanup(&affinity);
evlist__for_each_entry_reverse(evlist, evsel) {
perf_evsel__free_fd(&evsel->core);
@@ -1300,7 +1334,6 @@ static int evlist__create_syswide_maps(struct evlist *evlist)
{
struct perf_cpu_map *cpus;
struct perf_thread_map *threads;
- int err = -ENOMEM;
/*
* Try reading /sys/devices/system/cpu/online to get
@@ -1325,7 +1358,7 @@ static int evlist__create_syswide_maps(struct evlist *evlist)
out_put:
perf_cpu_map__put(cpus);
out:
- return err;
+ return -ENOMEM;
}
int evlist__open(struct evlist *evlist)
@@ -1337,7 +1370,7 @@ int evlist__open(struct evlist *evlist)
* Default: one fd per CPU, all threads, aka systemwide
* as sys_perf_event_open(cpu = -1, thread = -1) is EINVAL
*/
- if (evlist->core.threads == NULL && evlist->core.cpus == NULL) {
+ if (evlist->core.threads == NULL && evlist->core.user_requested_cpus == NULL) {
err = evlist__create_syswide_maps(evlist);
if (err < 0)
goto out_err;
@@ -1760,8 +1793,13 @@ struct evsel *evlist__reset_weak_group(struct evlist *evsel_list, struct evsel *
if (evsel__has_leader(c2, leader)) {
if (is_open && close)
perf_evsel__close(&c2->core);
- evsel__set_leader(c2, c2);
- c2->core.nr_members = 0;
+ /*
+ * We want to close all members of the group and reopen
+ * them. Some events, like Intel topdown, require being
+ * in a group and so keep these in the group.
+ */
+ evsel__remove_from_group(c2, leader);
+
/*
* Set this for all former members of the group
* to indicate they get reopened.
@@ -1769,6 +1807,9 @@ struct evsel *evlist__reset_weak_group(struct evlist *evsel_list, struct evsel *
c2->reset_group = true;
}
}
+ /* Reset the leader count if all entries were removed. */
+ if (leader->core.nr_members == 1)
+ leader->core.nr_members = 0;
return leader;
}
@@ -2115,6 +2156,22 @@ int evlist__ctlfd_process(struct evlist *evlist, enum evlist_ctl_cmd *cmd)
return err;
}
+int evlist__ctlfd_update(struct evlist *evlist, struct pollfd *update)
+{
+ int ctlfd_pos = evlist->ctl_fd.pos;
+ struct pollfd *entries = evlist->core.pollfd.entries;
+
+ if (!evlist__ctlfd_initialized(evlist))
+ return 0;
+
+ if (entries[ctlfd_pos].fd != update->fd ||
+ entries[ctlfd_pos].events != update->events)
+ return -1;
+
+ entries[ctlfd_pos].revents = update->revents;
+ return 0;
+}
+
struct evsel *evlist__find_evsel(struct evlist *evlist, int idx)
{
struct evsel *evsel;
diff --git a/tools/perf/util/evlist.h b/tools/perf/util/evlist.h
index 97bfb8d0be4f..1bde9ccf4e7d 100644
--- a/tools/perf/util/evlist.h
+++ b/tools/perf/util/evlist.h
@@ -64,6 +64,7 @@ struct evlist {
struct evsel *selected;
struct events_stats stats;
struct perf_env *env;
+ const char *hybrid_pmu_name;
void (*trace_event_sample_raw)(struct evlist *evlist,
union perf_event *event,
struct perf_sample *sample);
@@ -110,8 +111,14 @@ int __evlist__add_default_attrs(struct evlist *evlist,
__evlist__add_default_attrs(evlist, array, ARRAY_SIZE(array))
int arch_evlist__add_default_attrs(struct evlist *evlist);
+struct evsel *arch_evlist__leader(struct list_head *list);
int evlist__add_dummy(struct evlist *evlist);
+struct evsel *evlist__add_aux_dummy(struct evlist *evlist, bool system_wide);
+static inline struct evsel *evlist__add_dummy_on_all_cpus(struct evlist *evlist)
+{
+ return evlist__add_aux_dummy(evlist, true);
+}
int evlist__add_sb_event(struct evlist *evlist, struct perf_event_attr *attr,
evsel__sb_cb_t cb, void *data);
@@ -194,8 +201,6 @@ void evlist__toggle_enable(struct evlist *evlist);
void evlist__disable_evsel(struct evlist *evlist, char *evsel_name);
void evlist__enable_evsel(struct evlist *evlist, char *evsel_name);
-int evlist__enable_event_idx(struct evlist *evlist, struct evsel *evsel, int idx);
-
void evlist__set_selected(struct evlist *evlist, struct evsel *evsel);
int evlist__create_maps(struct evlist *evlist, struct target *target);
@@ -325,17 +330,53 @@ void evlist__to_front(struct evlist *evlist, struct evsel *move_evsel);
#define evlist__for_each_entry_safe(evlist, tmp, evsel) \
__evlist__for_each_entry_safe(&(evlist)->core.entries, tmp, evsel)
-#define evlist__for_each_cpu(evlist, index, cpu) \
- evlist__cpu_iter_start(evlist); \
- perf_cpu_map__for_each_cpu (cpu, index, (evlist)->core.all_cpus)
+/** Iterator state for evlist__for_each_cpu */
+struct evlist_cpu_iterator {
+ /** The list being iterated through. */
+ struct evlist *container;
+ /** The current evsel of the iterator. */
+ struct evsel *evsel;
+ /** The CPU map index corresponding to the evsel->core.cpus for the current CPU. */
+ int cpu_map_idx;
+ /**
+ * The CPU map index corresponding to evlist->core.all_cpus for the
+ * current CPU. Distinct from cpu_map_idx as the evsel's cpu map may
+ * contain fewer entries.
+ */
+ int evlist_cpu_map_idx;
+ /** The number of CPU map entries in evlist->core.all_cpus. */
+ int evlist_cpu_map_nr;
+ /** The current CPU of the iterator. */
+ struct perf_cpu cpu;
+ /** If present, used to set the affinity when switching between CPUs. */
+ struct affinity *affinity;
+};
+
+/**
+ * evlist__for_each_cpu - without affinity, iterate over the evlist. With
+ * affinity, iterate over all CPUs and then the evlist
+ * for each evsel on that CPU. When switching between
+ * CPUs the affinity is set to the CPU to avoid IPIs
+ * during syscalls.
+ * @evlist_cpu_itr: the iterator instance.
+ * @evlist: evlist instance to iterate.
+ * @affinity: NULL or used to set the affinity to the current CPU.
+ */
+#define evlist__for_each_cpu(evlist_cpu_itr, evlist, affinity) \
+ for ((evlist_cpu_itr) = evlist__cpu_begin(evlist, affinity); \
+ !evlist_cpu_iterator__end(&evlist_cpu_itr); \
+ evlist_cpu_iterator__next(&evlist_cpu_itr))
+
+/** Returns an iterator set to the first CPU/evsel of evlist. */
+struct evlist_cpu_iterator evlist__cpu_begin(struct evlist *evlist, struct affinity *affinity);
+/** Move to next element in iterator, updating CPU, evsel and the affinity. */
+void evlist_cpu_iterator__next(struct evlist_cpu_iterator *evlist_cpu_itr);
+/** Returns true when iterator is at the end of the CPUs and evlist. */
+bool evlist_cpu_iterator__end(const struct evlist_cpu_iterator *evlist_cpu_itr);
struct evsel *evlist__get_tracking_event(struct evlist *evlist);
void evlist__set_tracking_event(struct evlist *evlist, struct evsel *tracking_evsel);
-void evlist__cpu_iter_start(struct evlist *evlist);
-bool evsel__cpu_iter_skip(struct evsel *ev, int cpu);
-bool evsel__cpu_iter_skip_no_inc(struct evsel *ev, int cpu);
-
struct evsel *evlist__find_evsel_by_str(struct evlist *evlist, const char *str);
struct evsel *evlist__event2evsel(struct evlist *evlist, union perf_event *event);
@@ -372,6 +413,7 @@ void evlist__close_control(int ctl_fd, int ctl_fd_ack, bool *ctl_fd_close);
int evlist__initialize_ctlfd(struct evlist *evlist, int ctl_fd, int ctl_fd_ack);
int evlist__finalize_ctlfd(struct evlist *evlist);
bool evlist__ctlfd_initialized(struct evlist *evlist);
+int evlist__ctlfd_update(struct evlist *evlist, struct pollfd *update);
int evlist__ctlfd_process(struct evlist *evlist, enum evlist_ctl_cmd *cmd);
int evlist__ctlfd_ack(struct evlist *evlist);
diff --git a/tools/perf/util/evsel.c b/tools/perf/util/evsel.c
index ac0127be0459..ce499c5da8d7 100644
--- a/tools/perf/util/evsel.c
+++ b/tools/perf/util/evsel.c
@@ -59,6 +59,33 @@ struct perf_missing_features perf_missing_features;
static clockid_t clockid;
+static const char *const perf_tool_event__tool_names[PERF_TOOL_MAX] = {
+ NULL,
+ "duration_time",
+ "user_time",
+ "system_time",
+};
+
+const char *perf_tool_event__to_str(enum perf_tool_event ev)
+{
+ if (ev > PERF_TOOL_NONE && ev < PERF_TOOL_MAX)
+ return perf_tool_event__tool_names[ev];
+
+ return NULL;
+}
+
+enum perf_tool_event perf_tool_event__from_str(const char *str)
+{
+ int i;
+
+ perf_tool_event__for_each_event(i) {
+ if (!strcmp(str, perf_tool_event__tool_names[i]))
+ return i;
+ }
+ return PERF_TOOL_NONE;
+}
+
+
static int evsel__no_extra_init(struct evsel *evsel __maybe_unused)
{
return 0;
@@ -269,8 +296,8 @@ struct evsel *evsel__new_idx(struct perf_event_attr *attr, int idx)
return NULL;
evsel__init(evsel, attr, idx);
- if (evsel__is_bpf_output(evsel)) {
- evsel->core.attr.sample_type |= (PERF_SAMPLE_RAW | PERF_SAMPLE_TIME |
+ if (evsel__is_bpf_output(evsel) && !attr->sample_type) {
+ evsel->core.attr.sample_type = (PERF_SAMPLE_RAW | PERF_SAMPLE_TIME |
PERF_SAMPLE_CPU | PERF_SAMPLE_PERIOD),
evsel->core.attr.sample_period = 1;
}
@@ -382,6 +409,7 @@ struct evsel *evsel__clone(struct evsel *orig)
evsel->core.threads = perf_thread_map__get(orig->core.threads);
evsel->core.nr_members = orig->core.nr_members;
evsel->core.system_wide = orig->core.system_wide;
+ evsel->core.requires_cpu = orig->core.requires_cpu;
if (orig->name) {
evsel->name = strdup(orig->name);
@@ -486,7 +514,7 @@ out_err:
return ERR_PTR(err);
}
-const char *evsel__hw_names[PERF_COUNT_HW_MAX] = {
+const char *const evsel__hw_names[PERF_COUNT_HW_MAX] = {
"cycles",
"instructions",
"cache-references",
@@ -571,7 +599,7 @@ static int evsel__hw_name(struct evsel *evsel, char *bf, size_t size)
return r + evsel__add_modifiers(evsel, bf + r, size - r);
}
-const char *evsel__sw_names[PERF_COUNT_SW_MAX] = {
+const char *const evsel__sw_names[PERF_COUNT_SW_MAX] = {
"cpu-clock",
"task-clock",
"page-faults",
@@ -597,6 +625,11 @@ static int evsel__sw_name(struct evsel *evsel, char *bf, size_t size)
return r + evsel__add_modifiers(evsel, bf + r, size - r);
}
+static int evsel__tool_name(enum perf_tool_event ev, char *bf, size_t size)
+{
+ return scnprintf(bf, size, "%s", perf_tool_event__to_str(ev));
+}
+
static int __evsel__bp_name(char *bf, size_t size, u64 addr, u64 type)
{
int r;
@@ -622,7 +655,7 @@ static int evsel__bp_name(struct evsel *evsel, char *bf, size_t size)
return r + evsel__add_modifiers(evsel, bf + r, size - r);
}
-const char *evsel__hw_cache[PERF_COUNT_HW_CACHE_MAX][EVSEL__MAX_ALIASES] = {
+const char *const evsel__hw_cache[PERF_COUNT_HW_CACHE_MAX][EVSEL__MAX_ALIASES] = {
{ "L1-dcache", "l1-d", "l1d", "L1-data", },
{ "L1-icache", "l1-i", "l1i", "L1-instruction", },
{ "LLC", "L2", },
@@ -632,13 +665,13 @@ const char *evsel__hw_cache[PERF_COUNT_HW_CACHE_MAX][EVSEL__MAX_ALIASES] = {
{ "node", },
};
-const char *evsel__hw_cache_op[PERF_COUNT_HW_CACHE_OP_MAX][EVSEL__MAX_ALIASES] = {
+const char *const evsel__hw_cache_op[PERF_COUNT_HW_CACHE_OP_MAX][EVSEL__MAX_ALIASES] = {
{ "load", "loads", "read", },
{ "store", "stores", "write", },
{ "prefetch", "prefetches", "speculative-read", "speculative-load", },
};
-const char *evsel__hw_cache_result[PERF_COUNT_HW_CACHE_RESULT_MAX][EVSEL__MAX_ALIASES] = {
+const char *const evsel__hw_cache_result[PERF_COUNT_HW_CACHE_RESULT_MAX][EVSEL__MAX_ALIASES] = {
{ "refs", "Reference", "ops", "access", },
{ "misses", "miss", },
};
@@ -654,7 +687,7 @@ const char *evsel__hw_cache_result[PERF_COUNT_HW_CACHE_RESULT_MAX][EVSEL__MAX_AL
* L1I : Read and prefetch only
* ITLB and BPU : Read-only
*/
-static unsigned long evsel__hw_cache_stat[C(MAX)] = {
+static const unsigned long evsel__hw_cache_stat[C(MAX)] = {
[C(L1D)] = (CACHE_READ | CACHE_WRITE | CACHE_PREFETCH),
[C(L1I)] = (CACHE_READ | CACHE_PREFETCH),
[C(LL)] = (CACHE_READ | CACHE_WRITE | CACHE_PREFETCH),
@@ -723,12 +756,6 @@ static int evsel__raw_name(struct evsel *evsel, char *bf, size_t size)
return ret + evsel__add_modifiers(evsel, bf + ret, size - ret);
}
-static int evsel__tool_name(char *bf, size_t size)
-{
- int ret = scnprintf(bf, size, "duration_time");
- return ret;
-}
-
const char *evsel__name(struct evsel *evsel)
{
char bf[128];
@@ -753,8 +780,8 @@ const char *evsel__name(struct evsel *evsel)
break;
case PERF_TYPE_SOFTWARE:
- if (evsel->tool_event)
- evsel__tool_name(bf, sizeof(bf));
+ if (evsel__is_tool(evsel))
+ evsel__tool_name(evsel->tool_event, bf, sizeof(bf));
else
evsel__sw_name(evsel, bf, sizeof(bf));
break;
@@ -786,8 +813,8 @@ const char *evsel__metric_id(const struct evsel *evsel)
if (evsel->metric_id)
return evsel->metric_id;
- if (evsel->core.attr.type == PERF_TYPE_SOFTWARE && evsel->tool_event)
- return "duration_time";
+ if (evsel__is_tool(evsel))
+ return perf_tool_event__to_str(evsel->tool_event);
return "unknown";
}
@@ -870,7 +897,7 @@ static void __evsel__config_callchain(struct evsel *evsel, struct record_opts *o
"specifying a subset with --user-regs may render DWARF unwinding unreliable, "
"so the minimal registers set (IP, SP) is explicitly forced.\n");
} else {
- attr->sample_regs_user |= PERF_REGS_MASK;
+ attr->sample_regs_user |= arch__user_reg_mask();
}
attr->sample_stack_user = param->dump_size;
attr->exclude_callchain_user = 1;
@@ -1064,6 +1091,17 @@ void __weak arch_evsel__fixup_new_cycles(struct perf_event_attr *attr __maybe_un
{
}
+static void evsel__set_default_freq_period(struct record_opts *opts,
+ struct perf_event_attr *attr)
+{
+ if (opts->freq) {
+ attr->freq = 1;
+ attr->sample_freq = opts->freq;
+ } else {
+ attr->sample_period = opts->default_interval;
+ }
+}
+
/*
* The enable_on_exec/disabled value strategy:
*
@@ -1130,14 +1168,12 @@ void evsel__config(struct evsel *evsel, struct record_opts *opts,
* We default some events to have a default interval. But keep
* it a weak assumption overridable by the user.
*/
- if (!attr->sample_period) {
- if (opts->freq) {
- attr->freq = 1;
- attr->sample_freq = opts->freq;
- } else {
- attr->sample_period = opts->default_interval;
- }
- }
+ if ((evsel->is_libpfm_event && !attr->sample_period) ||
+ (!evsel->is_libpfm_event && (!attr->sample_period ||
+ opts->user_freq != UINT_MAX ||
+ opts->user_interval != ULLONG_MAX)))
+ evsel__set_default_freq_period(opts, attr);
+
/*
* If attr->freq was set (here or earlier), ask for period
* to be sampled.
@@ -1372,9 +1408,9 @@ int evsel__append_addr_filter(struct evsel *evsel, const char *filter)
}
/* Caller has to clear disabled after going through all CPUs. */
-int evsel__enable_cpu(struct evsel *evsel, int cpu)
+int evsel__enable_cpu(struct evsel *evsel, int cpu_map_idx)
{
- return perf_evsel__enable_cpu(&evsel->core, cpu);
+ return perf_evsel__enable_cpu(&evsel->core, cpu_map_idx);
}
int evsel__enable(struct evsel *evsel)
@@ -1387,9 +1423,9 @@ int evsel__enable(struct evsel *evsel)
}
/* Caller has to set disabled after going through all CPUs. */
-int evsel__disable_cpu(struct evsel *evsel, int cpu)
+int evsel__disable_cpu(struct evsel *evsel, int cpu_map_idx)
{
- return perf_evsel__disable_cpu(&evsel->core, cpu);
+ return perf_evsel__disable_cpu(&evsel->core, cpu_map_idx);
}
int evsel__disable(struct evsel *evsel)
@@ -1455,7 +1491,7 @@ void evsel__delete(struct evsel *evsel)
free(evsel);
}
-void evsel__compute_deltas(struct evsel *evsel, int cpu, int thread,
+void evsel__compute_deltas(struct evsel *evsel, int cpu_map_idx, int thread,
struct perf_counts_values *count)
{
struct perf_counts_values tmp;
@@ -1463,12 +1499,12 @@ void evsel__compute_deltas(struct evsel *evsel, int cpu, int thread,
if (!evsel->prev_raw_counts)
return;
- if (cpu == -1) {
+ if (cpu_map_idx == -1) {
tmp = evsel->prev_raw_counts->aggr;
evsel->prev_raw_counts->aggr = *count;
} else {
- tmp = *perf_counts(evsel->prev_raw_counts, cpu, thread);
- *perf_counts(evsel->prev_raw_counts, cpu, thread) = *count;
+ tmp = *perf_counts(evsel->prev_raw_counts, cpu_map_idx, thread);
+ *perf_counts(evsel->prev_raw_counts, cpu_map_idx, thread) = *count;
}
count->val = count->val - tmp.val;
@@ -1476,46 +1512,28 @@ void evsel__compute_deltas(struct evsel *evsel, int cpu, int thread,
count->run = count->run - tmp.run;
}
-void perf_counts_values__scale(struct perf_counts_values *count,
- bool scale, s8 *pscaled)
+static int evsel__read_one(struct evsel *evsel, int cpu_map_idx, int thread)
{
- s8 scaled = 0;
+ struct perf_counts_values *count = perf_counts(evsel->counts, cpu_map_idx, thread);
- if (scale) {
- if (count->run == 0) {
- scaled = -1;
- count->val = 0;
- } else if (count->run < count->ena) {
- scaled = 1;
- count->val = (u64)((double) count->val * count->ena / count->run);
- }
- }
-
- if (pscaled)
- *pscaled = scaled;
-}
-
-static int evsel__read_one(struct evsel *evsel, int cpu, int thread)
-{
- struct perf_counts_values *count = perf_counts(evsel->counts, cpu, thread);
-
- return perf_evsel__read(&evsel->core, cpu, thread, count);
+ return perf_evsel__read(&evsel->core, cpu_map_idx, thread, count);
}
-static void evsel__set_count(struct evsel *counter, int cpu, int thread, u64 val, u64 ena, u64 run)
+static void evsel__set_count(struct evsel *counter, int cpu_map_idx, int thread,
+ u64 val, u64 ena, u64 run)
{
struct perf_counts_values *count;
- count = perf_counts(counter->counts, cpu, thread);
+ count = perf_counts(counter->counts, cpu_map_idx, thread);
count->val = val;
count->ena = ena;
count->run = run;
- perf_counts__set_loaded(counter->counts, cpu, thread, true);
+ perf_counts__set_loaded(counter->counts, cpu_map_idx, thread, true);
}
-static int evsel__process_group_data(struct evsel *leader, int cpu, int thread, u64 *data)
+static int evsel__process_group_data(struct evsel *leader, int cpu_map_idx, int thread, u64 *data)
{
u64 read_format = leader->core.attr.read_format;
struct sample_read_value *v;
@@ -1534,7 +1552,7 @@ static int evsel__process_group_data(struct evsel *leader, int cpu, int thread,
v = (struct sample_read_value *) data;
- evsel__set_count(leader, cpu, thread, v[0].value, ena, run);
+ evsel__set_count(leader, cpu_map_idx, thread, v[0].value, ena, run);
for (i = 1; i < nr; i++) {
struct evsel *counter;
@@ -1543,13 +1561,13 @@ static int evsel__process_group_data(struct evsel *leader, int cpu, int thread,
if (!counter)
return -EINVAL;
- evsel__set_count(counter, cpu, thread, v[i].value, ena, run);
+ evsel__set_count(counter, cpu_map_idx, thread, v[i].value, ena, run);
}
return 0;
}
-static int evsel__read_group(struct evsel *leader, int cpu, int thread)
+static int evsel__read_group(struct evsel *leader, int cpu_map_idx, int thread)
{
struct perf_stat_evsel *ps = leader->stats;
u64 read_format = leader->core.attr.read_format;
@@ -1570,67 +1588,67 @@ static int evsel__read_group(struct evsel *leader, int cpu, int thread)
ps->group_data = data;
}
- if (FD(leader, cpu, thread) < 0)
+ if (FD(leader, cpu_map_idx, thread) < 0)
return -EINVAL;
- if (readn(FD(leader, cpu, thread), data, size) <= 0)
+ if (readn(FD(leader, cpu_map_idx, thread), data, size) <= 0)
return -errno;
- return evsel__process_group_data(leader, cpu, thread, data);
+ return evsel__process_group_data(leader, cpu_map_idx, thread, data);
}
-int evsel__read_counter(struct evsel *evsel, int cpu, int thread)
+int evsel__read_counter(struct evsel *evsel, int cpu_map_idx, int thread)
{
u64 read_format = evsel->core.attr.read_format;
if (read_format & PERF_FORMAT_GROUP)
- return evsel__read_group(evsel, cpu, thread);
+ return evsel__read_group(evsel, cpu_map_idx, thread);
- return evsel__read_one(evsel, cpu, thread);
+ return evsel__read_one(evsel, cpu_map_idx, thread);
}
-int __evsel__read_on_cpu(struct evsel *evsel, int cpu, int thread, bool scale)
+int __evsel__read_on_cpu(struct evsel *evsel, int cpu_map_idx, int thread, bool scale)
{
struct perf_counts_values count;
size_t nv = scale ? 3 : 1;
- if (FD(evsel, cpu, thread) < 0)
+ if (FD(evsel, cpu_map_idx, thread) < 0)
return -EINVAL;
- if (evsel->counts == NULL && evsel__alloc_counts(evsel, cpu + 1, thread + 1) < 0)
+ if (evsel->counts == NULL && evsel__alloc_counts(evsel) < 0)
return -ENOMEM;
- if (readn(FD(evsel, cpu, thread), &count, nv * sizeof(u64)) <= 0)
+ if (readn(FD(evsel, cpu_map_idx, thread), &count, nv * sizeof(u64)) <= 0)
return -errno;
- evsel__compute_deltas(evsel, cpu, thread, &count);
+ evsel__compute_deltas(evsel, cpu_map_idx, thread, &count);
perf_counts_values__scale(&count, scale, NULL);
- *perf_counts(evsel->counts, cpu, thread) = count;
+ *perf_counts(evsel->counts, cpu_map_idx, thread) = count;
return 0;
}
static int evsel__match_other_cpu(struct evsel *evsel, struct evsel *other,
- int cpu)
+ int cpu_map_idx)
{
- int cpuid;
+ struct perf_cpu cpu;
- cpuid = perf_cpu_map__cpu(evsel->core.cpus, cpu);
- return perf_cpu_map__idx(other->core.cpus, cpuid);
+ cpu = perf_cpu_map__cpu(evsel->core.cpus, cpu_map_idx);
+ return perf_cpu_map__idx(other->core.cpus, cpu);
}
-static int evsel__hybrid_group_cpu(struct evsel *evsel, int cpu)
+static int evsel__hybrid_group_cpu_map_idx(struct evsel *evsel, int cpu_map_idx)
{
struct evsel *leader = evsel__leader(evsel);
if ((evsel__is_hybrid(evsel) && !evsel__is_hybrid(leader)) ||
(!evsel__is_hybrid(evsel) && evsel__is_hybrid(leader))) {
- return evsel__match_other_cpu(evsel, leader, cpu);
+ return evsel__match_other_cpu(evsel, leader, cpu_map_idx);
}
- return cpu;
+ return cpu_map_idx;
}
-static int get_group_fd(struct evsel *evsel, int cpu, int thread)
+static int get_group_fd(struct evsel *evsel, int cpu_map_idx, int thread)
{
struct evsel *leader = evsel__leader(evsel);
int fd;
@@ -1644,11 +1662,11 @@ static int get_group_fd(struct evsel *evsel, int cpu, int thread)
*/
BUG_ON(!leader->core.fd);
- cpu = evsel__hybrid_group_cpu(evsel, cpu);
- if (cpu == -1)
+ cpu_map_idx = evsel__hybrid_group_cpu_map_idx(evsel, cpu_map_idx);
+ if (cpu_map_idx == -1)
return -1;
- fd = FD(leader, cpu, thread);
+ fd = FD(leader, cpu_map_idx, thread);
BUG_ON(fd == -1);
return fd;
@@ -1662,16 +1680,16 @@ static void evsel__remove_fd(struct evsel *pos, int nr_cpus, int nr_threads, int
}
static int update_fds(struct evsel *evsel,
- int nr_cpus, int cpu_idx,
+ int nr_cpus, int cpu_map_idx,
int nr_threads, int thread_idx)
{
struct evsel *pos;
- if (cpu_idx >= nr_cpus || thread_idx >= nr_threads)
+ if (cpu_map_idx >= nr_cpus || thread_idx >= nr_threads)
return -EINVAL;
evlist__for_each_entry(evsel->evlist, pos) {
- nr_cpus = pos != evsel ? nr_cpus : cpu_idx;
+ nr_cpus = pos != evsel ? nr_cpus : cpu_map_idx;
evsel__remove_fd(pos, nr_cpus, nr_threads, thread_idx);
@@ -1685,10 +1703,10 @@ static int update_fds(struct evsel *evsel,
return 0;
}
-bool evsel__ignore_missing_thread(struct evsel *evsel,
- int nr_cpus, int cpu,
- struct perf_thread_map *threads,
- int thread, int err)
+static bool evsel__ignore_missing_thread(struct evsel *evsel,
+ int nr_cpus, int cpu_map_idx,
+ struct perf_thread_map *threads,
+ int thread, int err)
{
pid_t ignore_pid = perf_thread_map__pid(threads, thread);
@@ -1711,7 +1729,7 @@ bool evsel__ignore_missing_thread(struct evsel *evsel,
* We should remove fd for missing_thread first
* because thread_map__remove() will decrease threads->nr.
*/
- if (update_fds(evsel, nr_cpus, cpu, threads->nr, thread))
+ if (update_fds(evsel, nr_cpus, cpu_map_idx, threads->nr, thread))
return false;
if (thread_map__remove(threads, thread))
@@ -1800,7 +1818,7 @@ static int __evsel__prepare_open(struct evsel *evsel, struct perf_cpu_map *cpus,
nthreads = threads->nr;
if (evsel->core.fd == NULL &&
- perf_evsel__alloc_fd(&evsel->core, cpus->nr, nthreads) < 0)
+ perf_evsel__alloc_fd(&evsel->core, perf_cpu_map__nr(cpus), nthreads) < 0)
return -ENOMEM;
evsel->open_flags = PERF_FLAG_FD_CLOEXEC;
@@ -1993,9 +2011,9 @@ bool evsel__increase_rlimit(enum rlimit_action *set_rlimit)
static int evsel__open_cpu(struct evsel *evsel, struct perf_cpu_map *cpus,
struct perf_thread_map *threads,
- int start_cpu, int end_cpu)
+ int start_cpu_map_idx, int end_cpu_map_idx)
{
- int cpu, thread, nthreads;
+ int idx, thread, nthreads;
int pid = -1, err, old_errno;
enum rlimit_action set_rlimit = NO_CHANGE;
@@ -2022,7 +2040,7 @@ fallback_missing_features:
display_attr(&evsel->core.attr);
- for (cpu = start_cpu; cpu < end_cpu; cpu++) {
+ for (idx = start_cpu_map_idx; idx < end_cpu_map_idx; idx++) {
for (thread = 0; thread < nthreads; thread++) {
int fd, group_fd;
@@ -2033,17 +2051,18 @@ retry_open:
if (!evsel->cgrp && !evsel->core.system_wide)
pid = perf_thread_map__pid(threads, thread);
- group_fd = get_group_fd(evsel, cpu, thread);
+ group_fd = get_group_fd(evsel, idx, thread);
test_attr__ready();
pr_debug2_peo("sys_perf_event_open: pid %d cpu %d group_fd %d flags %#lx",
- pid, cpus->map[cpu], group_fd, evsel->open_flags);
+ pid, perf_cpu_map__cpu(cpus, idx).cpu, group_fd, evsel->open_flags);
- fd = sys_perf_event_open(&evsel->core.attr, pid, cpus->map[cpu],
+ fd = sys_perf_event_open(&evsel->core.attr, pid,
+ perf_cpu_map__cpu(cpus, idx).cpu,
group_fd, evsel->open_flags);
- FD(evsel, cpu, thread) = fd;
+ FD(evsel, idx, thread) = fd;
if (fd < 0) {
err = -errno;
@@ -2053,10 +2072,11 @@ retry_open:
goto try_fallback;
}
- bpf_counter__install_pe(evsel, cpu, fd);
+ bpf_counter__install_pe(evsel, idx, fd);
if (unlikely(test_attr__enabled)) {
- test_attr__open(&evsel->core.attr, pid, cpus->map[cpu],
+ test_attr__open(&evsel->core.attr, pid,
+ perf_cpu_map__cpu(cpus, idx),
fd, group_fd, evsel->open_flags);
}
@@ -2097,7 +2117,8 @@ try_fallback:
if (evsel__precise_ip_fallback(evsel))
goto retry_open;
- if (evsel__ignore_missing_thread(evsel, cpus->nr, cpu, threads, thread, err)) {
+ if (evsel__ignore_missing_thread(evsel, perf_cpu_map__nr(cpus),
+ idx, threads, thread, err)) {
/* We just removed 1 thread, so lower the upper nthreads limit. */
nthreads--;
@@ -2112,7 +2133,7 @@ try_fallback:
if (err == -EMFILE && evsel__increase_rlimit(&set_rlimit))
goto retry_open;
- if (err != -EINVAL || cpu > 0 || thread > 0)
+ if (err != -EINVAL || idx > 0 || thread > 0)
goto out_close;
if (evsel__detect_missing_features(evsel))
@@ -2124,12 +2145,12 @@ out_close:
old_errno = errno;
do {
while (--thread >= 0) {
- if (FD(evsel, cpu, thread) >= 0)
- close(FD(evsel, cpu, thread));
- FD(evsel, cpu, thread) = -1;
+ if (FD(evsel, idx, thread) >= 0)
+ close(FD(evsel, idx, thread));
+ FD(evsel, idx, thread) = -1;
}
thread = nthreads;
- } while (--cpu >= 0);
+ } while (--idx >= 0);
errno = old_errno;
return err;
}
@@ -2137,7 +2158,7 @@ out_close:
int evsel__open(struct evsel *evsel, struct perf_cpu_map *cpus,
struct perf_thread_map *threads)
{
- return evsel__open_cpu(evsel, cpus, threads, 0, cpus ? cpus->nr : 1);
+ return evsel__open_cpu(evsel, cpus, threads, 0, perf_cpu_map__nr(cpus));
}
void evsel__close(struct evsel *evsel)
@@ -2146,13 +2167,12 @@ void evsel__close(struct evsel *evsel)
perf_evsel__free_id(&evsel->core);
}
-int evsel__open_per_cpu(struct evsel *evsel, struct perf_cpu_map *cpus, int cpu)
+int evsel__open_per_cpu(struct evsel *evsel, struct perf_cpu_map *cpus, int cpu_map_idx)
{
- if (cpu == -1)
- return evsel__open_cpu(evsel, cpus, NULL, 0,
- cpus ? cpus->nr : 1);
+ if (cpu_map_idx == -1)
+ return evsel__open_cpu(evsel, cpus, NULL, 0, perf_cpu_map__nr(cpus));
- return evsel__open_cpu(evsel, cpus, NULL, cpu, cpu + 1);
+ return evsel__open_cpu(evsel, cpus, NULL, cpu_map_idx, cpu_map_idx + 1);
}
int evsel__open_per_thread(struct evsel *evsel, struct perf_thread_map *threads)
@@ -2706,6 +2726,8 @@ void *evsel__rawptr(struct evsel *evsel, struct perf_sample *sample, const char
if (field->flags & TEP_FIELD_IS_DYNAMIC) {
offset = *(int *)(sample->raw_data + field->offset);
offset &= 0xffff;
+ if (field->flags & TEP_FIELD_IS_RELATIVE)
+ offset += field->offset + field->size;
}
return sample->raw_data + offset;
@@ -2852,9 +2874,23 @@ static bool find_process(const char *name)
return ret ? false : true;
}
+static bool is_amd(const char *arch, const char *cpuid)
+{
+ return arch && !strcmp("x86", arch) && cpuid && strstarts(cpuid, "AuthenticAMD");
+}
+
+static bool is_amd_ibs(struct evsel *evsel)
+{
+ return evsel->core.attr.precise_ip
+ || (evsel->pmu_name && !strncmp(evsel->pmu_name, "ibs", 3));
+}
+
int evsel__open_strerror(struct evsel *evsel, struct target *target,
int err, char *msg, size_t size)
{
+ struct perf_env *env = evsel__env(evsel);
+ const char *arch = perf_env__arch(env);
+ const char *cpuid = perf_env__cpuid(env);
char sbuf[STRERR_BUFSIZE];
int printed = 0, enforced = 0;
@@ -2914,6 +2950,10 @@ int evsel__open_strerror(struct evsel *evsel, struct target *target,
"No such device - did you specify an out-of-range profile CPU?");
break;
case EOPNOTSUPP:
+ if (evsel->core.attr.sample_type & PERF_SAMPLE_BRANCH_STACK)
+ return scnprintf(msg, size,
+ "%s: PMU Hardware or event type doesn't support branch stack sampling.",
+ evsel__name(evsel));
if (evsel->core.attr.aux_output)
return scnprintf(msg, size,
"%s: PMU Hardware doesn't support 'aux_output' feature",
@@ -2950,6 +2990,21 @@ int evsel__open_strerror(struct evsel *evsel, struct target *target,
return scnprintf(msg, size, "wrong clockid (%d).", clockid);
if (perf_missing_features.aux_output)
return scnprintf(msg, size, "The 'aux_output' feature is not supported, update the kernel.");
+ if (!target__has_cpu(target))
+ return scnprintf(msg, size,
+ "Invalid event (%s) in per-thread mode, enable system wide with '-a'.",
+ evsel__name(evsel));
+ if (is_amd(arch, cpuid)) {
+ if (is_amd_ibs(evsel)) {
+ if (evsel->core.attr.exclude_kernel)
+ return scnprintf(msg, size,
+ "AMD IBS can't exclude kernel events. Try running at a higher privilege level.");
+ if (!evsel->core.system_wide)
+ return scnprintf(msg, size,
+ "AMD IBS may only be available in system-wide/per-cpu mode. Try using -a, or -C and workload affinity");
+ }
+ }
+
break;
case ENODATA:
return scnprintf(msg, size, "Cannot collect data source with the load latency event alone. "
@@ -2966,22 +3021,22 @@ int evsel__open_strerror(struct evsel *evsel, struct target *target,
struct perf_env *evsel__env(struct evsel *evsel)
{
- if (evsel && evsel->evlist)
+ if (evsel && evsel->evlist && evsel->evlist->env)
return evsel->evlist->env;
return &perf_env;
}
static int store_evsel_ids(struct evsel *evsel, struct evlist *evlist)
{
- int cpu, thread;
+ int cpu_map_idx, thread;
- for (cpu = 0; cpu < xyarray__max_x(evsel->core.fd); cpu++) {
+ for (cpu_map_idx = 0; cpu_map_idx < xyarray__max_x(evsel->core.fd); cpu_map_idx++) {
for (thread = 0; thread < xyarray__max_y(evsel->core.fd);
thread++) {
- int fd = FD(evsel, cpu, thread);
+ int fd = FD(evsel, cpu_map_idx, thread);
if (perf_evlist__id_add_fd(&evlist->core, &evsel->core,
- cpu, thread, fd) < 0)
+ cpu_map_idx, thread, fd) < 0)
return -1;
}
}
@@ -2994,7 +3049,7 @@ int evsel__store_ids(struct evsel *evsel, struct evlist *evlist)
struct perf_cpu_map *cpus = evsel->core.cpus;
struct perf_thread_map *threads = evsel->core.threads;
- if (perf_evsel__alloc_id(&evsel->core, cpus->nr, threads->nr))
+ if (perf_evsel__alloc_id(&evsel->core, perf_cpu_map__nr(cpus), threads->nr))
return -ENOMEM;
return store_evsel_ids(evsel, evlist);
@@ -3049,3 +3104,22 @@ int evsel__source_count(const struct evsel *evsel)
}
return count;
}
+
+bool __weak arch_evsel__must_be_in_group(const struct evsel *evsel __maybe_unused)
+{
+ return false;
+}
+
+/*
+ * Remove an event from a given group (leader).
+ * Some events, e.g., perf metrics Topdown events,
+ * must always be grouped. Ignore the events.
+ */
+void evsel__remove_from_group(struct evsel *evsel, struct evsel *leader)
+{
+ if (!arch_evsel__must_be_in_group(evsel) && evsel != leader) {
+ evsel__set_leader(evsel, evsel);
+ evsel->core.nr_members = 0;
+ leader->core.nr_members--;
+ }
+}
diff --git a/tools/perf/util/evsel.h b/tools/perf/util/evsel.h
index 29d49a8c1e92..73ea48e94079 100644
--- a/tools/perf/util/evsel.h
+++ b/tools/perf/util/evsel.h
@@ -11,6 +11,7 @@
#include <perf/evsel.h>
#include "symbol_conf.h"
#include <internal/cpumap.h>
+#include <perf/cpumap.h>
struct bpf_object;
struct cgroup;
@@ -29,8 +30,18 @@ typedef int (evsel__sb_cb_t)(union perf_event *event, void *data);
enum perf_tool_event {
PERF_TOOL_NONE = 0,
PERF_TOOL_DURATION_TIME = 1,
+ PERF_TOOL_USER_TIME = 2,
+ PERF_TOOL_SYSTEM_TIME = 3,
+
+ PERF_TOOL_MAX,
};
+const char *perf_tool_event__to_str(enum perf_tool_event ev);
+enum perf_tool_event perf_tool_event__from_str(const char *str);
+
+#define perf_tool_event__for_each_event(ev) \
+ for ((ev) = PERF_TOOL_DURATION_TIME; (ev) < PERF_TOOL_MAX; ev++)
+
/** struct evsel - event selector
*
* @evlist - evlist this evsel is in, if it is in one.
@@ -119,9 +130,9 @@ struct evsel {
bool merged_stat;
bool reset_group;
bool errored;
+ bool needs_auxtrace_mmap;
struct hashmap *per_pkg_mask;
int err;
- int cpu_iter;
struct {
evsel__sb_cb_t *cb;
void *data;
@@ -192,12 +203,9 @@ static inline struct perf_cpu_map *evsel__cpus(struct evsel *evsel)
static inline int evsel__nr_cpus(struct evsel *evsel)
{
- return evsel__cpus(evsel)->nr;
+ return perf_cpu_map__nr(evsel__cpus(evsel));
}
-void perf_counts_values__scale(struct perf_counts_values *count,
- bool scale, s8 *pscaled);
-
void evsel__compute_deltas(struct evsel *evsel, int cpu, int thread,
struct perf_counts_values *count);
@@ -256,11 +264,11 @@ static inline bool evsel__is_bpf(struct evsel *evsel)
#define EVSEL__MAX_ALIASES 8
-extern const char *evsel__hw_cache[PERF_COUNT_HW_CACHE_MAX][EVSEL__MAX_ALIASES];
-extern const char *evsel__hw_cache_op[PERF_COUNT_HW_CACHE_OP_MAX][EVSEL__MAX_ALIASES];
-extern const char *evsel__hw_cache_result[PERF_COUNT_HW_CACHE_RESULT_MAX][EVSEL__MAX_ALIASES];
-extern const char *evsel__hw_names[PERF_COUNT_HW_MAX];
-extern const char *evsel__sw_names[PERF_COUNT_SW_MAX];
+extern const char *const evsel__hw_cache[PERF_COUNT_HW_CACHE_MAX][EVSEL__MAX_ALIASES];
+extern const char *const evsel__hw_cache_op[PERF_COUNT_HW_CACHE_OP_MAX][EVSEL__MAX_ALIASES];
+extern const char *const evsel__hw_cache_result[PERF_COUNT_HW_CACHE_RESULT_MAX][EVSEL__MAX_ALIASES];
+extern const char *const evsel__hw_names[PERF_COUNT_HW_MAX];
+extern const char *const evsel__sw_names[PERF_COUNT_SW_MAX];
extern char *evsel__bpf_counter_events;
bool evsel__match_bpf_counter_events(const char *name);
@@ -268,6 +276,11 @@ int __evsel__hw_cache_type_op_res_name(u8 type, u8 op, u8 result, char *bf, size
const char *evsel__name(struct evsel *evsel);
const char *evsel__metric_id(const struct evsel *evsel);
+static inline bool evsel__is_tool(const struct evsel *evsel)
+{
+ return evsel->tool_event != PERF_TOOL_NONE;
+}
+
const char *evsel__group_name(struct evsel *evsel);
int evsel__group_desc(struct evsel *evsel, char *buf, size_t size);
@@ -288,12 +301,12 @@ void arch_evsel__fixup_new_cycles(struct perf_event_attr *attr);
int evsel__set_filter(struct evsel *evsel, const char *filter);
int evsel__append_tp_filter(struct evsel *evsel, const char *filter);
int evsel__append_addr_filter(struct evsel *evsel, const char *filter);
-int evsel__enable_cpu(struct evsel *evsel, int cpu);
+int evsel__enable_cpu(struct evsel *evsel, int cpu_map_idx);
int evsel__enable(struct evsel *evsel);
int evsel__disable(struct evsel *evsel);
-int evsel__disable_cpu(struct evsel *evsel, int cpu);
+int evsel__disable_cpu(struct evsel *evsel, int cpu_map_idx);
-int evsel__open_per_cpu(struct evsel *evsel, struct perf_cpu_map *cpus, int cpu);
+int evsel__open_per_cpu(struct evsel *evsel, struct perf_cpu_map *cpus, int cpu_map_idx);
int evsel__open_per_thread(struct evsel *evsel, struct perf_thread_map *threads);
int evsel__open(struct evsel *evsel, struct perf_cpu_map *cpus,
struct perf_thread_map *threads);
@@ -305,10 +318,6 @@ bool evsel__detect_missing_features(struct evsel *evsel);
enum rlimit_action { NO_CHANGE, SET_TO_MAX, INCREASED_MAX };
bool evsel__increase_rlimit(enum rlimit_action *set_rlimit);
-bool evsel__ignore_missing_thread(struct evsel *evsel,
- int nr_cpus, int cpu,
- struct perf_thread_map *threads,
- int thread, int err);
bool evsel__precise_ip_fallback(struct evsel *evsel);
struct perf_sample;
@@ -337,32 +346,32 @@ static inline bool evsel__match2(struct evsel *e1, struct evsel *e2)
(e1->core.attr.config == e2->core.attr.config);
}
-int evsel__read_counter(struct evsel *evsel, int cpu, int thread);
+int evsel__read_counter(struct evsel *evsel, int cpu_map_idx, int thread);
-int __evsel__read_on_cpu(struct evsel *evsel, int cpu, int thread, bool scale);
+int __evsel__read_on_cpu(struct evsel *evsel, int cpu_map_idx, int thread, bool scale);
/**
* evsel__read_on_cpu - Read out the results on a CPU and thread
*
* @evsel - event selector to read value
- * @cpu - CPU of interest
+ * @cpu_map_idx - CPU of interest
* @thread - thread of interest
*/
-static inline int evsel__read_on_cpu(struct evsel *evsel, int cpu, int thread)
+static inline int evsel__read_on_cpu(struct evsel *evsel, int cpu_map_idx, int thread)
{
- return __evsel__read_on_cpu(evsel, cpu, thread, false);
+ return __evsel__read_on_cpu(evsel, cpu_map_idx, thread, false);
}
/**
* evsel__read_on_cpu_scaled - Read out the results on a CPU and thread, scaled
*
* @evsel - event selector to read value
- * @cpu - CPU of interest
+ * @cpu_map_idx - CPU of interest
* @thread - thread of interest
*/
-static inline int evsel__read_on_cpu_scaled(struct evsel *evsel, int cpu, int thread)
+static inline int evsel__read_on_cpu_scaled(struct evsel *evsel, int cpu_map_idx, int thread)
{
- return __evsel__read_on_cpu(evsel, cpu, thread, true);
+ return __evsel__read_on_cpu(evsel, cpu_map_idx, thread, true);
}
int evsel__parse_sample(struct evsel *evsel, union perf_event *event,
@@ -490,6 +499,9 @@ bool evsel__has_leader(struct evsel *evsel, struct evsel *leader);
bool evsel__is_leader(struct evsel *evsel);
void evsel__set_leader(struct evsel *evsel, struct evsel *leader);
int evsel__source_count(const struct evsel *evsel);
+void evsel__remove_from_group(struct evsel *evsel, struct evsel *leader);
+
+bool arch_evsel__must_be_in_group(const struct evsel *evsel);
/*
* Macro to swap the bit-field postition and size.
diff --git a/tools/perf/util/expr.c b/tools/perf/util/expr.c
index 666b59baeb70..675f318ce7c1 100644
--- a/tools/perf/util/expr.c
+++ b/tools/perf/util/expr.c
@@ -405,12 +405,17 @@ double expr_id_data__source_count(const struct expr_id_data *data)
double expr__get_literal(const char *literal)
{
static struct cpu_topology *topology;
+ double result = NAN;
- if (!strcmp("#smt_on", literal))
- return smt_on() > 0 ? 1.0 : 0.0;
+ if (!strcasecmp("#smt_on", literal)) {
+ result = smt_on() > 0 ? 1.0 : 0.0;
+ goto out;
+ }
- if (!strcmp("#num_cpus", literal))
- return cpu__max_present_cpu();
+ if (!strcmp("#num_cpus", literal)) {
+ result = cpu__max_present_cpu().cpu;
+ goto out;
+ }
/*
* Assume that topology strings are consistent, such as CPUs "0-1"
@@ -422,16 +427,24 @@ double expr__get_literal(const char *literal)
topology = cpu_topology__new();
if (!topology) {
pr_err("Error creating CPU topology");
- return NAN;
+ goto out;
}
}
- if (!strcmp("#num_packages", literal))
- return topology->package_cpus_lists;
- if (!strcmp("#num_dies", literal))
- return topology->die_cpus_lists;
- if (!strcmp("#num_cores", literal))
- return topology->core_cpus_lists;
+ if (!strcmp("#num_packages", literal)) {
+ result = topology->package_cpus_lists;
+ goto out;
+ }
+ if (!strcmp("#num_dies", literal)) {
+ result = topology->die_cpus_lists;
+ goto out;
+ }
+ if (!strcmp("#num_cores", literal)) {
+ result = topology->core_cpus_lists;
+ goto out;
+ }
pr_err("Unrecognized literal '%s'", literal);
- return NAN;
+out:
+ pr_debug2("literal: %s = %f\n", literal, result);
+ return result;
}
diff --git a/tools/perf/util/expr.l b/tools/perf/util/expr.l
index 0a13eb20c814..4dc8edbfd9ce 100644
--- a/tools/perf/util/expr.l
+++ b/tools/perf/util/expr.l
@@ -91,7 +91,7 @@ static int literal(yyscan_t scanner)
}
%}
-number ([0-9]+\.?[0-9]*|[0-9]*\.?[0-9]+)
+number ([0-9]+\.?[0-9]*|[0-9]*\.?[0-9]+)(e-?[0-9]+)?
sch [-,=]
spec \\{sch}
diff --git a/tools/perf/util/ftrace.h b/tools/perf/util/ftrace.h
new file mode 100644
index 000000000000..a34cd15733b8
--- /dev/null
+++ b/tools/perf/util/ftrace.h
@@ -0,0 +1,82 @@
+#ifndef __PERF_FTRACE_H__
+#define __PERF_FTRACE_H__
+
+#include <linux/list.h>
+
+#include "target.h"
+
+struct evlist;
+
+struct perf_ftrace {
+ struct evlist *evlist;
+ struct target target;
+ const char *tracer;
+ struct list_head filters;
+ struct list_head notrace;
+ struct list_head graph_funcs;
+ struct list_head nograph_funcs;
+ unsigned long percpu_buffer_size;
+ bool inherit;
+ bool use_nsec;
+ int graph_depth;
+ int func_stack_trace;
+ int func_irq_info;
+ int graph_nosleep_time;
+ int graph_noirqs;
+ int graph_verbose;
+ int graph_thresh;
+ unsigned int initial_delay;
+};
+
+struct filter_entry {
+ struct list_head list;
+ char name[];
+};
+
+#define NUM_BUCKET 22 /* 20 + 2 (for outliers in both direction) */
+
+#ifdef HAVE_BPF_SKEL
+
+int perf_ftrace__latency_prepare_bpf(struct perf_ftrace *ftrace);
+int perf_ftrace__latency_start_bpf(struct perf_ftrace *ftrace);
+int perf_ftrace__latency_stop_bpf(struct perf_ftrace *ftrace);
+int perf_ftrace__latency_read_bpf(struct perf_ftrace *ftrace,
+ int buckets[]);
+int perf_ftrace__latency_cleanup_bpf(struct perf_ftrace *ftrace);
+
+#else /* !HAVE_BPF_SKEL */
+
+static inline int
+perf_ftrace__latency_prepare_bpf(struct perf_ftrace *ftrace __maybe_unused)
+{
+ return -1;
+}
+
+static inline int
+perf_ftrace__latency_start_bpf(struct perf_ftrace *ftrace __maybe_unused)
+{
+ return -1;
+}
+
+static inline int
+perf_ftrace__latency_stop_bpf(struct perf_ftrace *ftrace __maybe_unused)
+{
+ return -1;
+}
+
+static inline int
+perf_ftrace__latency_read_bpf(struct perf_ftrace *ftrace __maybe_unused,
+ int buckets[] __maybe_unused)
+{
+ return -1;
+}
+
+static inline int
+perf_ftrace__latency_cleanup_bpf(struct perf_ftrace *ftrace __maybe_unused)
+{
+ return -1;
+}
+
+#endif /* HAVE_BPF_SKEL */
+
+#endif /* __PERF_FTRACE_H__ */
diff --git a/tools/perf/util/genelf.h b/tools/perf/util/genelf.h
index 3db3293213a9..ae138afe6c56 100644
--- a/tools/perf/util/genelf.h
+++ b/tools/perf/util/genelf.h
@@ -38,6 +38,9 @@ int jit_add_debug_info(Elf *e, uint64_t code_addr, void *debug, int nr_debug_ent
#elif defined(__s390x__)
#define GEN_ELF_ARCH EM_S390
#define GEN_ELF_CLASS ELFCLASS64
+#elif defined(__riscv) && __riscv_xlen == 64
+#define GEN_ELF_ARCH EM_RISCV
+#define GEN_ELF_CLASS ELFCLASS64
#else
#error "unsupported architecture"
#endif
diff --git a/tools/perf/util/hashmap.c b/tools/perf/util/hashmap.c
index 3c20b126d60d..aeb09c288716 100644
--- a/tools/perf/util/hashmap.c
+++ b/tools/perf/util/hashmap.c
@@ -75,7 +75,7 @@ void hashmap__clear(struct hashmap *map)
void hashmap__free(struct hashmap *map)
{
- if (!map)
+ if (IS_ERR_OR_NULL(map))
return;
hashmap__clear(map);
@@ -238,4 +238,3 @@ bool hashmap__delete(struct hashmap *map, const void *key,
return true;
}
-
diff --git a/tools/perf/util/header.c b/tools/perf/util/header.c
index e3c1a532d059..6ad629db63b7 100644
--- a/tools/perf/util/header.c
+++ b/tools/perf/util/header.c
@@ -472,7 +472,7 @@ static int write_nrcpus(struct feat_fd *ff,
u32 nrc, nra;
int ret;
- nrc = cpu__max_present_cpu();
+ nrc = cpu__max_present_cpu().cpu;
nr = sysconf(_SC_NPROCESSORS_ONLN);
if (nr < 0)
@@ -983,6 +983,57 @@ static int write_dir_format(struct feat_fd *ff,
return do_write(ff, &data->dir.version, sizeof(data->dir.version));
}
+/*
+ * Check whether a CPU is online
+ *
+ * Returns:
+ * 1 -> if CPU is online
+ * 0 -> if CPU is offline
+ * -1 -> error case
+ */
+int is_cpu_online(unsigned int cpu)
+{
+ char *str;
+ size_t strlen;
+ char buf[256];
+ int status = -1;
+ struct stat statbuf;
+
+ snprintf(buf, sizeof(buf),
+ "/sys/devices/system/cpu/cpu%d", cpu);
+ if (stat(buf, &statbuf) != 0)
+ return 0;
+
+ /*
+ * Check if /sys/devices/system/cpu/cpux/online file
+ * exists. Some cases cpu0 won't have online file since
+ * it is not expected to be turned off generally.
+ * In kernels without CONFIG_HOTPLUG_CPU, this
+ * file won't exist
+ */
+ snprintf(buf, sizeof(buf),
+ "/sys/devices/system/cpu/cpu%d/online", cpu);
+ if (stat(buf, &statbuf) != 0)
+ return 1;
+
+ /*
+ * Read online file using sysfs__read_str.
+ * If read or open fails, return -1.
+ * If read succeeds, return value from file
+ * which gets stored in "str"
+ */
+ snprintf(buf, sizeof(buf),
+ "devices/system/cpu/cpu%d/online", cpu);
+
+ if (sysfs__read_str(buf, &str, &strlen) < 0)
+ return status;
+
+ status = atoi(str);
+
+ free(str);
+ return status;
+}
+
#ifdef HAVE_LIBBPF_SUPPORT
static int write_bpf_prog_info(struct feat_fd *ff,
struct evlist *evlist __maybe_unused)
@@ -1163,7 +1214,7 @@ static int build_caches(struct cpu_cache_level caches[], u32 *cntp)
u32 nr, cpu;
u16 level;
- nr = cpu__max_cpu();
+ nr = cpu__max_cpu().cpu;
for (cpu = 0; cpu < nr; cpu++) {
for (level = 0; level < MAX_CACHE_LVL; level++) {
@@ -1195,7 +1246,7 @@ static int build_caches(struct cpu_cache_level caches[], u32 *cntp)
static int write_cache(struct feat_fd *ff,
struct evlist *evlist __maybe_unused)
{
- u32 max_caches = cpu__max_cpu() * MAX_CACHE_LVL;
+ u32 max_caches = cpu__max_cpu().cpu * MAX_CACHE_LVL;
struct cpu_cache_level caches[max_caches];
u32 cnt = 0, i, version = 1;
int ret;
@@ -1335,7 +1386,7 @@ static int build_mem_topology(struct memory_node *nodes, u64 size, u64 *cntp)
dir = opendir(path);
if (!dir) {
- pr_debug2("%s: could't read %s, does this arch have topology information?\n",
+ pr_debug2("%s: couldn't read %s, does this arch have topology information?\n",
__func__, path);
return -1;
}
@@ -2200,6 +2251,7 @@ static int __event_process_build_id(struct perf_record_header_build_id *bev,
build_id__init(&bid, bev->data, size);
dso__set_build_id(dso, &bid);
+ dso->header_build_id = 1;
if (dso_space != DSO_SPACE__USER) {
struct kmod_path m = { .name = NULL, };
@@ -3410,9 +3462,22 @@ int perf_header__fprintf_info(struct perf_session *session, FILE *fp, bool full)
return 0;
}
+struct header_fw {
+ struct feat_writer fw;
+ struct feat_fd *ff;
+};
+
+static int feat_writer_cb(struct feat_writer *fw, void *buf, size_t sz)
+{
+ struct header_fw *h = container_of(fw, struct header_fw, fw);
+
+ return do_write(h->ff, buf, sz);
+}
+
static int do_write_feat(struct feat_fd *ff, int type,
struct perf_file_section **p,
- struct evlist *evlist)
+ struct evlist *evlist,
+ struct feat_copier *fc)
{
int err;
int ret = 0;
@@ -3426,7 +3491,23 @@ static int do_write_feat(struct feat_fd *ff, int type,
(*p)->offset = lseek(ff->fd, 0, SEEK_CUR);
- err = feat_ops[type].write(ff, evlist);
+ /*
+ * Hook to let perf inject copy features sections from the input
+ * file.
+ */
+ if (fc && fc->copy) {
+ struct header_fw h = {
+ .fw.write = feat_writer_cb,
+ .ff = ff,
+ };
+
+ /* ->copy() returns 0 if the feature was not copied */
+ err = fc->copy(fc, type, &h.fw);
+ } else {
+ err = 0;
+ }
+ if (!err)
+ err = feat_ops[type].write(ff, evlist);
if (err < 0) {
pr_debug("failed to write feature %s\n", feat_ops[type].name);
@@ -3442,7 +3523,8 @@ static int do_write_feat(struct feat_fd *ff, int type,
}
static int perf_header__adds_write(struct perf_header *header,
- struct evlist *evlist, int fd)
+ struct evlist *evlist, int fd,
+ struct feat_copier *fc)
{
int nr_sections;
struct feat_fd ff;
@@ -3471,7 +3553,7 @@ static int perf_header__adds_write(struct perf_header *header,
lseek(fd, sec_start + sec_size, SEEK_SET);
for_each_set_bit(feat, header->adds_features, HEADER_FEAT_BITS) {
- if (do_write_feat(&ff, feat, &p, evlist))
+ if (do_write_feat(&ff, feat, &p, evlist, fc))
perf_header__clear_feat(header, feat);
}
@@ -3509,9 +3591,10 @@ int perf_header__write_pipe(int fd)
return 0;
}
-int perf_session__write_header(struct perf_session *session,
- struct evlist *evlist,
- int fd, bool at_exit)
+static int perf_session__do_write_header(struct perf_session *session,
+ struct evlist *evlist,
+ int fd, bool at_exit,
+ struct feat_copier *fc)
{
struct perf_file_header f_header;
struct perf_file_attr f_attr;
@@ -3563,7 +3646,7 @@ int perf_session__write_header(struct perf_session *session,
header->feat_offset = header->data_offset + header->data_size;
if (at_exit) {
- err = perf_header__adds_write(header, evlist, fd);
+ err = perf_header__adds_write(header, evlist, fd, fc);
if (err < 0)
return err;
}
@@ -3596,6 +3679,35 @@ int perf_session__write_header(struct perf_session *session,
return 0;
}
+int perf_session__write_header(struct perf_session *session,
+ struct evlist *evlist,
+ int fd, bool at_exit)
+{
+ return perf_session__do_write_header(session, evlist, fd, at_exit, NULL);
+}
+
+size_t perf_session__data_offset(const struct evlist *evlist)
+{
+ struct evsel *evsel;
+ size_t data_offset;
+
+ data_offset = sizeof(struct perf_file_header);
+ evlist__for_each_entry(evlist, evsel) {
+ data_offset += evsel->core.ids * sizeof(u64);
+ }
+ data_offset += evlist->core.nr_entries * sizeof(struct perf_file_attr);
+
+ return data_offset;
+}
+
+int perf_session__inject_header(struct perf_session *session,
+ struct evlist *evlist,
+ int fd,
+ struct feat_copier *fc)
+{
+ return perf_session__do_write_header(session, evlist, fd, true, fc);
+}
+
static int perf_header__getbuffer64(struct perf_header *header,
int fd, void *buf, size_t size)
{
diff --git a/tools/perf/util/header.h b/tools/perf/util/header.h
index c9e3265832d9..56916dabce7b 100644
--- a/tools/perf/util/header.h
+++ b/tools/perf/util/header.h
@@ -121,6 +121,23 @@ int perf_session__write_header(struct perf_session *session,
int fd, bool at_exit);
int perf_header__write_pipe(int fd);
+/* feat_writer writes a feature section to output */
+struct feat_writer {
+ int (*write)(struct feat_writer *fw, void *buf, size_t sz);
+};
+
+/* feat_copier copies a feature section using feat_writer to output */
+struct feat_copier {
+ int (*copy)(struct feat_copier *fc, int feat, struct feat_writer *fw);
+};
+
+int perf_session__inject_header(struct perf_session *session,
+ struct evlist *evlist,
+ int fd,
+ struct feat_copier *fc);
+
+size_t perf_session__data_offset(const struct evlist *evlist);
+
void perf_header__set_feat(struct perf_header *header, int feat);
void perf_header__clear_feat(struct perf_header *header, int feat);
bool perf_header__has_feat(const struct perf_header *header, int feat);
@@ -158,6 +175,7 @@ int do_write(struct feat_fd *fd, const void *buf, size_t size);
int write_padded(struct feat_fd *fd, const void *bf,
size_t count, size_t count_aligned);
+int is_cpu_online(unsigned int cpu);
/*
* arch specific callback
*/
diff --git a/tools/perf/util/hist.c b/tools/perf/util/hist.c
index b776465e04ef..1c085ab56534 100644
--- a/tools/perf/util/hist.c
+++ b/tools/perf/util/hist.c
@@ -124,6 +124,7 @@ void hists__calc_col_len(struct hists *hists, struct hist_entry *h)
} else {
symlen = unresolved_col_width + 4 + 2;
hists__new_col_len(hists, HISTC_SYMBOL_FROM, symlen);
+ hists__new_col_len(hists, HISTC_ADDR_FROM, symlen);
hists__set_unres_dso_col_len(hists, HISTC_DSO_FROM);
}
@@ -138,6 +139,7 @@ void hists__calc_col_len(struct hists *hists, struct hist_entry *h)
} else {
symlen = unresolved_col_width + 4 + 2;
hists__new_col_len(hists, HISTC_SYMBOL_TO, symlen);
+ hists__new_col_len(hists, HISTC_ADDR_TO, symlen);
hists__set_unres_dso_col_len(hists, HISTC_DSO_TO);
}
@@ -211,7 +213,9 @@ void hists__calc_col_len(struct hists *hists, struct hist_entry *h)
hists__new_col_len(hists, HISTC_MEM_BLOCKED, 10);
hists__new_col_len(hists, HISTC_LOCAL_INS_LAT, 13);
hists__new_col_len(hists, HISTC_GLOBAL_INS_LAT, 13);
- hists__new_col_len(hists, HISTC_P_STAGE_CYC, 13);
+ hists__new_col_len(hists, HISTC_LOCAL_P_STAGE_CYC, 13);
+ hists__new_col_len(hists, HISTC_GLOBAL_P_STAGE_CYC, 13);
+
if (symbol_conf.nanosecs)
hists__new_col_len(hists, HISTC_TIME, 16);
else
diff --git a/tools/perf/util/hist.h b/tools/perf/util/hist.h
index 621f35ae1efa..7ed4648d2fc2 100644
--- a/tools/perf/util/hist.h
+++ b/tools/perf/util/hist.h
@@ -75,7 +75,10 @@ enum hist_column {
HISTC_MEM_BLOCKED,
HISTC_LOCAL_INS_LAT,
HISTC_GLOBAL_INS_LAT,
- HISTC_P_STAGE_CYC,
+ HISTC_LOCAL_P_STAGE_CYC,
+ HISTC_GLOBAL_P_STAGE_CYC,
+ HISTC_ADDR_FROM,
+ HISTC_ADDR_TO,
HISTC_NR_COLS, /* Last entry */
};
diff --git a/tools/perf/util/include/linux/linkage.h b/tools/perf/util/include/linux/linkage.h
index 5acf053fca7d..aa0c5179836d 100644
--- a/tools/perf/util/include/linux/linkage.h
+++ b/tools/perf/util/include/linux/linkage.h
@@ -50,41 +50,32 @@
#ifndef SYM_END
#define SYM_END(name, sym_type) \
.type name sym_type ASM_NL \
+ .set .L__sym_size_##name, .-name ASM_NL \
.size name, .-name
#endif
-/*
- * SYM_FUNC_START_ALIAS -- use where there are two global names for one
- * function
- */
-#ifndef SYM_FUNC_START_ALIAS
-#define SYM_FUNC_START_ALIAS(name) \
- SYM_START(name, SYM_L_GLOBAL, SYM_A_ALIGN)
+/* SYM_ALIAS -- use only if you have to */
+#ifndef SYM_ALIAS
+#define SYM_ALIAS(alias, name, sym_type, linkage) \
+ linkage(alias) ASM_NL \
+ .set alias, name ASM_NL \
+ .type alias sym_type ASM_NL \
+ .set .L__sym_size_##alias, .L__sym_size_##name ASM_NL \
+ .size alias, .L__sym_size_##alias
#endif
/* SYM_FUNC_START -- use for global functions */
#ifndef SYM_FUNC_START
-/*
- * The same as SYM_FUNC_START_ALIAS, but we will need to distinguish these two
- * later.
- */
#define SYM_FUNC_START(name) \
SYM_START(name, SYM_L_GLOBAL, SYM_A_ALIGN)
#endif
/* SYM_FUNC_START_LOCAL -- use for local functions */
#ifndef SYM_FUNC_START_LOCAL
-/* the same as SYM_FUNC_START_LOCAL_ALIAS, see comment near SYM_FUNC_START */
#define SYM_FUNC_START_LOCAL(name) \
SYM_START(name, SYM_L_LOCAL, SYM_A_ALIGN)
#endif
-/* SYM_FUNC_END_ALIAS -- the end of LOCAL_ALIASed or ALIASed function */
-#ifndef SYM_FUNC_END_ALIAS
-#define SYM_FUNC_END_ALIAS(name) \
- SYM_END(name, SYM_T_FUNC)
-#endif
-
/* SYM_FUNC_START_WEAK -- use for weak functions */
#ifndef SYM_FUNC_START_WEAK
#define SYM_FUNC_START_WEAK(name) \
@@ -96,9 +87,32 @@
* SYM_FUNC_START_WEAK, ...
*/
#ifndef SYM_FUNC_END
-/* the same as SYM_FUNC_END_ALIAS, see comment near SYM_FUNC_START */
#define SYM_FUNC_END(name) \
SYM_END(name, SYM_T_FUNC)
#endif
+/*
+ * SYM_FUNC_ALIAS -- define a global alias for an existing function
+ */
+#ifndef SYM_FUNC_ALIAS
+#define SYM_FUNC_ALIAS(alias, name) \
+ SYM_ALIAS(alias, name, SYM_T_FUNC, SYM_L_GLOBAL)
+#endif
+
+/*
+ * SYM_FUNC_ALIAS_LOCAL -- define a local alias for an existing function
+ */
+#ifndef SYM_FUNC_ALIAS_LOCAL
+#define SYM_FUNC_ALIAS_LOCAL(alias, name) \
+ SYM_ALIAS(alias, name, SYM_T_FUNC, SYM_L_LOCAL)
+#endif
+
+/*
+ * SYM_FUNC_ALIAS_WEAK -- define a weak global alias for an existing function
+ */
+#ifndef SYM_FUNC_ALIAS_WEAK
+#define SYM_FUNC_ALIAS_WEAK(alias, name) \
+ SYM_ALIAS(alias, name, SYM_T_FUNC, SYM_L_WEAK)
+#endif
+
#endif /* PERF_LINUX_LINKAGE_H_ */
diff --git a/tools/perf/util/intel-pt-decoder/intel-pt-decoder.c b/tools/perf/util/intel-pt-decoder/intel-pt-decoder.c
index 0e013c2d9eb4..0ac860c8dd2b 100644
--- a/tools/perf/util/intel-pt-decoder/intel-pt-decoder.c
+++ b/tools/perf/util/intel-pt-decoder/intel-pt-decoder.c
@@ -137,6 +137,7 @@ struct intel_pt_decoder {
bool in_psb;
bool hop;
bool leap;
+ bool emulated_ptwrite;
bool vm_time_correlation;
bool vm_tm_corr_dry_run;
bool vm_tm_corr_reliable;
@@ -144,6 +145,8 @@ struct intel_pt_decoder {
bool vm_tm_corr_continuous;
bool nr;
bool next_nr;
+ bool iflag;
+ bool next_iflag;
enum intel_pt_param_flags flags;
uint64_t pos;
uint64_t last_ip;
@@ -213,6 +216,9 @@ struct intel_pt_decoder {
bool set_fup_pwre;
bool set_fup_exstop;
bool set_fup_bep;
+ bool set_fup_cfe_ip;
+ bool set_fup_cfe;
+ bool set_fup_mode_exec;
bool sample_cyc;
unsigned int fup_tx_flags;
unsigned int tx_flags;
@@ -223,6 +229,7 @@ struct intel_pt_decoder {
uint64_t timestamp_insn_cnt;
uint64_t sample_insn_cnt;
uint64_t stuck_ip;
+ struct intel_pt_pkt fup_cfe_pkt;
int max_loops;
int no_progress;
int stuck_ip_prd;
@@ -231,6 +238,8 @@ struct intel_pt_decoder {
const unsigned char *next_buf;
size_t next_len;
unsigned char temp_buf[INTEL_PT_PKT_MAX_SZ];
+ int evd_cnt;
+ struct intel_pt_evd evd[INTEL_PT_MAX_EVDS];
};
static uint64_t intel_pt_lower_power_of_2(uint64_t x)
@@ -473,6 +482,8 @@ static int intel_pt_ext_err(int code)
return INTEL_PT_ERR_LOST;
case -ELOOP:
return INTEL_PT_ERR_NELOOP;
+ case -ECONNRESET:
+ return INTEL_PT_ERR_EPTW;
default:
return INTEL_PT_ERR_UNK;
}
@@ -489,6 +500,7 @@ static const char *intel_pt_err_msgs[] = {
[INTEL_PT_ERR_LOST] = "Lost trace data",
[INTEL_PT_ERR_UNK] = "Unknown error!",
[INTEL_PT_ERR_NELOOP] = "Never-ending loop (refer perf config intel-pt.max-loops)",
+ [INTEL_PT_ERR_EPTW] = "Broken emulated ptwrite",
};
int intel_pt__strerror(int code, char *buf, size_t buflen)
@@ -820,6 +832,9 @@ static int intel_pt_calc_cyc_cb(struct intel_pt_pkt_info *pkt_info)
case INTEL_PT_BIP:
case INTEL_PT_BEP:
case INTEL_PT_BEP_IP:
+ case INTEL_PT_CFE:
+ case INTEL_PT_CFE_IP:
+ case INTEL_PT_EVD:
return 0;
case INTEL_PT_MTC:
@@ -1094,6 +1109,52 @@ static void intel_pt_sample_insn(struct intel_pt_decoder *decoder)
decoder->state.type |= INTEL_PT_INSTRUCTION;
}
+/*
+ * Sample FUP instruction at the same time as reporting the FUP event, so the
+ * instruction sample gets the same flags as the FUP event.
+ */
+static void intel_pt_sample_fup_insn(struct intel_pt_decoder *decoder)
+{
+ struct intel_pt_insn intel_pt_insn;
+ uint64_t max_insn_cnt, insn_cnt = 0;
+ int err;
+
+ decoder->state.insn_op = INTEL_PT_OP_OTHER;
+ decoder->state.insn_len = 0;
+
+ if (!decoder->branch_enable || !decoder->pge || decoder->hop ||
+ decoder->ip != decoder->last_ip)
+ return;
+
+ if (!decoder->mtc_insn)
+ decoder->mtc_insn = true;
+
+ max_insn_cnt = intel_pt_next_sample(decoder);
+ if (max_insn_cnt != 1)
+ return;
+
+ err = decoder->walk_insn(&intel_pt_insn, &insn_cnt, &decoder->ip,
+ 0, max_insn_cnt, decoder->data);
+ /* Ignore error, it will be reported next walk anyway */
+ if (err)
+ return;
+
+ if (intel_pt_insn.branch != INTEL_PT_BR_NO_BRANCH) {
+ intel_pt_log_at("ERROR: Unexpected branch at FUP instruction", decoder->ip);
+ return;
+ }
+
+ decoder->tot_insn_cnt += insn_cnt;
+ decoder->timestamp_insn_cnt += insn_cnt;
+ decoder->sample_insn_cnt += insn_cnt;
+ decoder->period_insn_cnt += insn_cnt;
+
+ intel_pt_sample_insn(decoder);
+
+ decoder->state.type |= INTEL_PT_INSTRUCTION;
+ decoder->ip += intel_pt_insn.length;
+}
+
static int intel_pt_walk_insn(struct intel_pt_decoder *decoder,
struct intel_pt_insn *intel_pt_insn, uint64_t ip)
{
@@ -1203,13 +1264,85 @@ out_no_progress:
return err;
}
-static bool intel_pt_fup_event(struct intel_pt_decoder *decoder)
+static void intel_pt_mode_exec_status(struct intel_pt_decoder *decoder)
+{
+ bool iflag = decoder->packet.count & INTEL_PT_IFLAG;
+
+ decoder->exec_mode = decoder->packet.payload;
+ decoder->iflag = iflag;
+ decoder->next_iflag = iflag;
+ decoder->state.from_iflag = iflag;
+ decoder->state.to_iflag = iflag;
+}
+
+static void intel_pt_mode_exec(struct intel_pt_decoder *decoder)
+{
+ bool iflag = decoder->packet.count & INTEL_PT_IFLAG;
+
+ decoder->exec_mode = decoder->packet.payload;
+ decoder->next_iflag = iflag;
+}
+
+static void intel_pt_sample_iflag(struct intel_pt_decoder *decoder)
+{
+ decoder->state.type |= INTEL_PT_IFLAG_CHG;
+ decoder->state.from_iflag = decoder->iflag;
+ decoder->state.to_iflag = decoder->next_iflag;
+ decoder->iflag = decoder->next_iflag;
+}
+
+static void intel_pt_sample_iflag_chg(struct intel_pt_decoder *decoder)
+{
+ if (decoder->iflag != decoder->next_iflag)
+ intel_pt_sample_iflag(decoder);
+}
+
+static void intel_pt_clear_fup_event(struct intel_pt_decoder *decoder)
+{
+ decoder->set_fup_tx_flags = false;
+ decoder->set_fup_ptw = false;
+ decoder->set_fup_mwait = false;
+ decoder->set_fup_pwre = false;
+ decoder->set_fup_exstop = false;
+ decoder->set_fup_bep = false;
+ decoder->set_fup_cfe_ip = false;
+ decoder->set_fup_cfe = false;
+ decoder->evd_cnt = 0;
+ decoder->set_fup_mode_exec = false;
+ decoder->iflag = decoder->next_iflag;
+}
+
+static bool intel_pt_fup_event(struct intel_pt_decoder *decoder, bool no_tip)
{
enum intel_pt_sample_type type = decoder->state.type;
+ bool sample_fup_insn = false;
bool ret = false;
decoder->state.type &= ~INTEL_PT_BRANCH;
+ if (decoder->set_fup_cfe_ip || decoder->set_fup_cfe) {
+ bool ip = decoder->set_fup_cfe_ip;
+
+ decoder->set_fup_cfe_ip = false;
+ decoder->set_fup_cfe = false;
+ decoder->state.type |= INTEL_PT_EVT;
+ if (!ip && decoder->pge)
+ decoder->state.type |= INTEL_PT_BRANCH;
+ decoder->state.cfe_type = decoder->fup_cfe_pkt.count;
+ decoder->state.cfe_vector = decoder->fup_cfe_pkt.payload;
+ decoder->state.evd_cnt = decoder->evd_cnt;
+ decoder->state.evd = decoder->evd;
+ decoder->evd_cnt = 0;
+ if (ip || decoder->pge)
+ decoder->state.flags |= INTEL_PT_FUP_IP;
+ ret = true;
+ }
+ if (decoder->set_fup_mode_exec) {
+ decoder->set_fup_mode_exec = false;
+ intel_pt_sample_iflag(decoder);
+ sample_fup_insn = no_tip;
+ ret = true;
+ }
if (decoder->set_fup_tx_flags) {
decoder->set_fup_tx_flags = false;
decoder->tx_flags = decoder->fup_tx_flags;
@@ -1266,6 +1399,8 @@ static bool intel_pt_fup_event(struct intel_pt_decoder *decoder)
if (ret) {
decoder->state.from_ip = decoder->ip;
decoder->state.to_ip = 0;
+ if (sample_fup_insn)
+ intel_pt_sample_fup_insn(decoder);
} else {
decoder->state.type = type;
}
@@ -1298,7 +1433,7 @@ static int intel_pt_walk_fup(struct intel_pt_decoder *decoder)
bool no_tip = decoder->pkt_state != INTEL_PT_STATE_FUP;
decoder->pkt_state = INTEL_PT_STATE_IN_SYNC;
- if (intel_pt_fup_event(decoder) && no_tip)
+ if (intel_pt_fup_event(decoder, no_tip) && no_tip)
return 0;
return -EAGAIN;
}
@@ -1350,6 +1485,7 @@ static int intel_pt_walk_tip(struct intel_pt_decoder *decoder)
return err;
intel_pt_update_nr(decoder);
+ intel_pt_sample_iflag_chg(decoder);
if (intel_pt_insn.branch == INTEL_PT_BR_INDIRECT) {
if (decoder->pkt_state == INTEL_PT_STATE_TIP_PGD) {
@@ -1403,17 +1539,108 @@ static int intel_pt_walk_tip(struct intel_pt_decoder *decoder)
return intel_pt_bug(decoder);
}
+struct eptw_data {
+ int bit_countdown;
+ uint64_t payload;
+};
+
+static int intel_pt_eptw_lookahead_cb(struct intel_pt_pkt_info *pkt_info)
+{
+ struct eptw_data *data = pkt_info->data;
+ int nr_bits;
+
+ switch (pkt_info->packet.type) {
+ case INTEL_PT_PAD:
+ case INTEL_PT_MNT:
+ case INTEL_PT_MODE_EXEC:
+ case INTEL_PT_MODE_TSX:
+ case INTEL_PT_MTC:
+ case INTEL_PT_FUP:
+ case INTEL_PT_CYC:
+ case INTEL_PT_CBR:
+ case INTEL_PT_TSC:
+ case INTEL_PT_TMA:
+ case INTEL_PT_PIP:
+ case INTEL_PT_VMCS:
+ case INTEL_PT_PSB:
+ case INTEL_PT_PSBEND:
+ case INTEL_PT_PTWRITE:
+ case INTEL_PT_PTWRITE_IP:
+ case INTEL_PT_EXSTOP:
+ case INTEL_PT_EXSTOP_IP:
+ case INTEL_PT_MWAIT:
+ case INTEL_PT_PWRE:
+ case INTEL_PT_PWRX:
+ case INTEL_PT_BBP:
+ case INTEL_PT_BIP:
+ case INTEL_PT_BEP:
+ case INTEL_PT_BEP_IP:
+ case INTEL_PT_CFE:
+ case INTEL_PT_CFE_IP:
+ case INTEL_PT_EVD:
+ break;
+
+ case INTEL_PT_TNT:
+ nr_bits = data->bit_countdown;
+ if (nr_bits > pkt_info->packet.count)
+ nr_bits = pkt_info->packet.count;
+ data->payload <<= nr_bits;
+ data->payload |= pkt_info->packet.payload >> (64 - nr_bits);
+ data->bit_countdown -= nr_bits;
+ return !data->bit_countdown;
+
+ case INTEL_PT_TIP_PGE:
+ case INTEL_PT_TIP_PGD:
+ case INTEL_PT_TIP:
+ case INTEL_PT_BAD:
+ case INTEL_PT_OVF:
+ case INTEL_PT_TRACESTOP:
+ default:
+ return 1;
+ }
+
+ return 0;
+}
+
+static int intel_pt_emulated_ptwrite(struct intel_pt_decoder *decoder)
+{
+ int n = 64 - decoder->tnt.count;
+ struct eptw_data data = {
+ .bit_countdown = n,
+ .payload = decoder->tnt.payload >> n,
+ };
+
+ decoder->emulated_ptwrite = false;
+ intel_pt_log("Emulated ptwrite detected\n");
+
+ intel_pt_pkt_lookahead(decoder, intel_pt_eptw_lookahead_cb, &data);
+ if (data.bit_countdown)
+ return -ECONNRESET;
+
+ decoder->state.type = INTEL_PT_PTW;
+ decoder->state.from_ip = decoder->ip;
+ decoder->state.to_ip = 0;
+ decoder->state.ptw_payload = data.payload;
+ return 0;
+}
+
static int intel_pt_walk_tnt(struct intel_pt_decoder *decoder)
{
struct intel_pt_insn intel_pt_insn;
int err;
while (1) {
+ if (decoder->emulated_ptwrite)
+ return intel_pt_emulated_ptwrite(decoder);
err = intel_pt_walk_insn(decoder, &intel_pt_insn, 0);
- if (err == INTEL_PT_RETURN)
+ if (err == INTEL_PT_RETURN) {
+ decoder->emulated_ptwrite = intel_pt_insn.emulated_ptwrite;
return 0;
- if (err)
+ }
+ if (err) {
+ decoder->emulated_ptwrite = false;
return err;
+ }
if (intel_pt_insn.op == INTEL_PT_OP_RET) {
if (!decoder->return_compression) {
@@ -1464,6 +1691,7 @@ static int intel_pt_walk_tnt(struct intel_pt_decoder *decoder)
decoder->state.to_ip = decoder->last_ip;
decoder->ip = decoder->last_ip;
intel_pt_update_nr(decoder);
+ intel_pt_sample_iflag_chg(decoder);
return 0;
}
@@ -1527,6 +1755,19 @@ static int intel_pt_mode_tsx(struct intel_pt_decoder *decoder, bool *no_tip)
return 0;
}
+static int intel_pt_evd(struct intel_pt_decoder *decoder)
+{
+ if (decoder->evd_cnt >= INTEL_PT_MAX_EVDS) {
+ intel_pt_log_at("ERROR: Too many EVD packets", decoder->pos);
+ return -ENOSYS;
+ }
+ decoder->evd[decoder->evd_cnt++] = (struct intel_pt_evd){
+ .type = decoder->packet.count,
+ .payload = decoder->packet.payload,
+ };
+ return 0;
+}
+
static uint64_t intel_pt_8b_tsc(uint64_t timestamp, uint64_t ref_timestamp)
{
timestamp |= (ref_timestamp & (0xffULL << 56));
@@ -1620,12 +1861,7 @@ static int intel_pt_overflow(struct intel_pt_decoder *decoder)
decoder->state.from_ip = decoder->ip;
decoder->ip = 0;
decoder->pge = false;
- decoder->set_fup_tx_flags = false;
- decoder->set_fup_ptw = false;
- decoder->set_fup_mwait = false;
- decoder->set_fup_pwre = false;
- decoder->set_fup_exstop = false;
- decoder->set_fup_bep = false;
+ intel_pt_clear_fup_event(decoder);
decoder->overflow = true;
return -EOVERFLOW;
}
@@ -1873,6 +2109,9 @@ static int intel_pt_walk_psbend(struct intel_pt_decoder *decoder)
case INTEL_PT_BIP:
case INTEL_PT_BEP:
case INTEL_PT_BEP_IP:
+ case INTEL_PT_CFE:
+ case INTEL_PT_CFE_IP:
+ case INTEL_PT_EVD:
decoder->have_tma = false;
intel_pt_log("ERROR: Unexpected packet\n");
err = -EAGAIN;
@@ -1895,7 +2134,7 @@ static int intel_pt_walk_psbend(struct intel_pt_decoder *decoder)
break;
case INTEL_PT_MODE_EXEC:
- decoder->exec_mode = decoder->packet.payload;
+ intel_pt_mode_exec_status(decoder);
break;
case INTEL_PT_PIP:
@@ -1975,6 +2214,9 @@ static int intel_pt_walk_fup_tip(struct intel_pt_decoder *decoder)
case INTEL_PT_BIP:
case INTEL_PT_BEP:
case INTEL_PT_BEP_IP:
+ case INTEL_PT_CFE:
+ case INTEL_PT_CFE_IP:
+ case INTEL_PT_EVD:
intel_pt_log("ERROR: Missing TIP after FUP\n");
decoder->pkt_state = INTEL_PT_STATE_ERR3;
decoder->pkt_step = 0;
@@ -2026,6 +2268,7 @@ static int intel_pt_walk_fup_tip(struct intel_pt_decoder *decoder)
decoder->state.to_ip = decoder->ip;
}
intel_pt_update_nr(decoder);
+ intel_pt_sample_iflag_chg(decoder);
return 0;
case INTEL_PT_PIP:
@@ -2043,7 +2286,7 @@ static int intel_pt_walk_fup_tip(struct intel_pt_decoder *decoder)
break;
case INTEL_PT_MODE_EXEC:
- decoder->exec_mode = decoder->packet.payload;
+ intel_pt_mode_exec(decoder);
break;
case INTEL_PT_VMCS:
@@ -2134,6 +2377,9 @@ static int intel_pt_vm_psb_lookahead_cb(struct intel_pt_pkt_info *pkt_info)
case INTEL_PT_TIP:
case INTEL_PT_PSB:
case INTEL_PT_TRACESTOP:
+ case INTEL_PT_CFE:
+ case INTEL_PT_CFE_IP:
+ case INTEL_PT_EVD:
default:
return 1;
}
@@ -2653,6 +2899,9 @@ static int intel_pt_vm_time_correlation(struct intel_pt_decoder *decoder)
decoder->blk_type = 0;
break;
+ case INTEL_PT_CFE:
+ case INTEL_PT_CFE_IP:
+ case INTEL_PT_EVD:
case INTEL_PT_MODE_EXEC:
case INTEL_PT_MODE_TSX:
case INTEL_PT_MNT:
@@ -2719,6 +2968,7 @@ static int intel_pt_hop_trace(struct intel_pt_decoder *decoder, bool *no_tip, in
decoder->state.from_ip = decoder->ip;
decoder->state.to_ip = 0;
intel_pt_update_nr(decoder);
+ intel_pt_sample_iflag_chg(decoder);
return HOP_RETURN;
case INTEL_PT_FUP:
@@ -2733,10 +2983,10 @@ static int intel_pt_hop_trace(struct intel_pt_decoder *decoder, bool *no_tip, in
decoder->state.type = INTEL_PT_INSTRUCTION;
decoder->state.from_ip = decoder->ip;
decoder->state.to_ip = 0;
- intel_pt_fup_event(decoder);
+ intel_pt_fup_event(decoder, *no_tip);
return HOP_RETURN;
}
- intel_pt_fup_event(decoder);
+ intel_pt_fup_event(decoder, *no_tip);
decoder->state.type |= INTEL_PT_INSTRUCTION | INTEL_PT_BRANCH;
*err = intel_pt_walk_fup_tip(decoder);
if (!*err && decoder->state.to_ip)
@@ -2789,6 +3039,9 @@ static int intel_pt_hop_trace(struct intel_pt_decoder *decoder, bool *no_tip, in
case INTEL_PT_BIP:
case INTEL_PT_BEP:
case INTEL_PT_BEP_IP:
+ case INTEL_PT_CFE:
+ case INTEL_PT_CFE_IP:
+ case INTEL_PT_EVD:
default:
return HOP_PROCESS;
}
@@ -2857,6 +3110,9 @@ static int intel_pt_psb_lookahead_cb(struct intel_pt_pkt_info *pkt_info)
case INTEL_PT_BIP:
case INTEL_PT_BEP:
case INTEL_PT_BEP_IP:
+ case INTEL_PT_CFE:
+ case INTEL_PT_CFE_IP:
+ case INTEL_PT_EVD:
if (data->after_psbend) {
data->after_psbend -= 1;
if (!data->after_psbend)
@@ -2994,6 +3250,7 @@ next:
decoder->pos);
break;
}
+ intel_pt_sample_iflag_chg(decoder);
intel_pt_set_ip(decoder);
decoder->state.from_ip = 0;
decoder->state.to_ip = decoder->ip;
@@ -3026,7 +3283,7 @@ next:
intel_pt_set_last_ip(decoder);
if (!decoder->branch_enable || !decoder->pge) {
decoder->ip = decoder->last_ip;
- if (intel_pt_fup_event(decoder))
+ if (intel_pt_fup_event(decoder, no_tip))
return 0;
no_tip = false;
break;
@@ -3108,8 +3365,15 @@ next:
break;
case INTEL_PT_MODE_EXEC:
- decoder->exec_mode = decoder->packet.payload;
- break;
+ intel_pt_mode_exec(decoder);
+ err = intel_pt_get_next_packet(decoder);
+ if (err)
+ return err;
+ if (decoder->packet.type == INTEL_PT_FUP) {
+ decoder->set_fup_mode_exec = true;
+ no_tip = true;
+ }
+ goto next;
case INTEL_PT_MODE_TSX:
/* MODE_TSX need not be followed by FUP */
@@ -3223,6 +3487,35 @@ next:
}
goto next;
+ case INTEL_PT_CFE:
+ decoder->fup_cfe_pkt = decoder->packet;
+ decoder->set_fup_cfe = true;
+ if (!decoder->pge) {
+ intel_pt_fup_event(decoder, true);
+ return 0;
+ }
+ break;
+
+ case INTEL_PT_CFE_IP:
+ decoder->fup_cfe_pkt = decoder->packet;
+ err = intel_pt_get_next_packet(decoder);
+ if (err)
+ return err;
+ if (decoder->packet.type == INTEL_PT_FUP) {
+ decoder->set_fup_cfe_ip = true;
+ no_tip = true;
+ } else {
+ intel_pt_log_at("ERROR: Missing FUP after CFE",
+ decoder->pos);
+ }
+ goto next;
+
+ case INTEL_PT_EVD:
+ err = intel_pt_evd(decoder);
+ if (err)
+ return err;
+ break;
+
default:
return intel_pt_bug(decoder);
}
@@ -3265,6 +3558,9 @@ static int intel_pt_walk_psb(struct intel_pt_decoder *decoder)
case INTEL_PT_BIP:
case INTEL_PT_BEP:
case INTEL_PT_BEP_IP:
+ case INTEL_PT_CFE:
+ case INTEL_PT_CFE_IP:
+ case INTEL_PT_EVD:
intel_pt_log("ERROR: Unexpected packet\n");
err = -ENOENT;
goto out;
@@ -3307,7 +3603,7 @@ static int intel_pt_walk_psb(struct intel_pt_decoder *decoder)
break;
case INTEL_PT_MODE_EXEC:
- decoder->exec_mode = decoder->packet.payload;
+ intel_pt_mode_exec_status(decoder);
break;
case INTEL_PT_MODE_TSX:
@@ -3426,7 +3722,7 @@ static int intel_pt_walk_to_ip(struct intel_pt_decoder *decoder)
break;
case INTEL_PT_MODE_EXEC:
- decoder->exec_mode = decoder->packet.payload;
+ intel_pt_mode_exec_status(decoder);
break;
case INTEL_PT_MODE_TSX:
@@ -3476,6 +3772,9 @@ static int intel_pt_walk_to_ip(struct intel_pt_decoder *decoder)
case INTEL_PT_BIP:
case INTEL_PT_BEP:
case INTEL_PT_BEP_IP:
+ case INTEL_PT_CFE:
+ case INTEL_PT_CFE_IP:
+ case INTEL_PT_EVD:
default:
break;
}
@@ -3486,12 +3785,7 @@ static int intel_pt_sync_ip(struct intel_pt_decoder *decoder)
{
int err;
- decoder->set_fup_tx_flags = false;
- decoder->set_fup_ptw = false;
- decoder->set_fup_mwait = false;
- decoder->set_fup_pwre = false;
- decoder->set_fup_exstop = false;
- decoder->set_fup_bep = false;
+ intel_pt_clear_fup_event(decoder);
decoder->overflow = false;
if (!decoder->branch_enable) {
diff --git a/tools/perf/util/intel-pt-decoder/intel-pt-decoder.h b/tools/perf/util/intel-pt-decoder/intel-pt-decoder.h
index 8fd68f7a0963..c773028df80e 100644
--- a/tools/perf/util/intel-pt-decoder/intel-pt-decoder.h
+++ b/tools/perf/util/intel-pt-decoder/intel-pt-decoder.h
@@ -17,6 +17,7 @@
#define INTEL_PT_IN_TX (1 << 0)
#define INTEL_PT_ABORT_TX (1 << 1)
+#define INTEL_PT_IFLAG (1 << 2)
#define INTEL_PT_ASYNC (1 << 2)
#define INTEL_PT_FUP_IP (1 << 3)
#define INTEL_PT_SAMPLE_IPC (1 << 4)
@@ -35,6 +36,8 @@ enum intel_pt_sample_type {
INTEL_PT_TRACE_END = 1 << 10,
INTEL_PT_BLK_ITEMS = 1 << 11,
INTEL_PT_PSB_EVT = 1 << 12,
+ INTEL_PT_EVT = 1 << 13,
+ INTEL_PT_IFLAG_CHG = 1 << 14,
};
enum intel_pt_period_type {
@@ -55,6 +58,7 @@ enum {
INTEL_PT_ERR_LOST,
INTEL_PT_ERR_UNK,
INTEL_PT_ERR_NELOOP,
+ INTEL_PT_ERR_EPTW,
INTEL_PT_ERR_MAX,
};
@@ -209,10 +213,24 @@ struct intel_pt_vmcs_info {
bool error_printed;
};
+/*
+ * Maximum number of event trace data in one go, assuming at most 1 per type
+ * and 6-bits of type in the EVD packet.
+ */
+#define INTEL_PT_MAX_EVDS 64
+
+/* Event trace data from EVD packet */
+struct intel_pt_evd {
+ int type;
+ uint64_t payload;
+};
+
struct intel_pt_state {
enum intel_pt_sample_type type;
bool from_nr;
bool to_nr;
+ bool from_iflag;
+ bool to_iflag;
int err;
uint64_t from_ip;
uint64_t to_ip;
@@ -234,6 +252,10 @@ struct intel_pt_state {
int insn_len;
char insn[INTEL_PT_INSN_BUF_SZ];
struct intel_pt_blk_items items;
+ int cfe_type;
+ int cfe_vector;
+ int evd_cnt;
+ struct intel_pt_evd *evd;
};
struct intel_pt_insn;
diff --git a/tools/perf/util/intel-pt-decoder/intel-pt-insn-decoder.c b/tools/perf/util/intel-pt-decoder/intel-pt-insn-decoder.c
index 9d5e65cec89b..1376077183f7 100644
--- a/tools/perf/util/intel-pt-decoder/intel-pt-insn-decoder.c
+++ b/tools/perf/util/intel-pt-decoder/intel-pt-insn-decoder.c
@@ -32,6 +32,7 @@ static void intel_pt_insn_decoder(struct insn *insn,
int ext;
intel_pt_insn->rel = 0;
+ intel_pt_insn->emulated_ptwrite = false;
if (insn_is_avx(insn)) {
intel_pt_insn->op = INTEL_PT_OP_OTHER;
diff --git a/tools/perf/util/intel-pt-decoder/intel-pt-insn-decoder.h b/tools/perf/util/intel-pt-decoder/intel-pt-insn-decoder.h
index c2861cfdd768..e3338b56a75f 100644
--- a/tools/perf/util/intel-pt-decoder/intel-pt-insn-decoder.h
+++ b/tools/perf/util/intel-pt-decoder/intel-pt-insn-decoder.h
@@ -37,6 +37,7 @@ enum intel_pt_insn_branch {
struct intel_pt_insn {
enum intel_pt_insn_op op;
enum intel_pt_insn_branch branch;
+ bool emulated_ptwrite;
int length;
int32_t rel;
unsigned char buf[INTEL_PT_INSN_BUF_SZ];
diff --git a/tools/perf/util/intel-pt-decoder/intel-pt-pkt-decoder.c b/tools/perf/util/intel-pt-decoder/intel-pt-pkt-decoder.c
index 4bd154848cad..18f97f43e01a 100644
--- a/tools/perf/util/intel-pt-decoder/intel-pt-pkt-decoder.c
+++ b/tools/perf/util/intel-pt-decoder/intel-pt-pkt-decoder.c
@@ -64,6 +64,9 @@ static const char * const packet_name[] = {
[INTEL_PT_BIP] = "BIP",
[INTEL_PT_BEP] = "BEP",
[INTEL_PT_BEP_IP] = "BEP",
+ [INTEL_PT_CFE] = "CFE",
+ [INTEL_PT_CFE_IP] = "CFE",
+ [INTEL_PT_EVD] = "EVD",
};
const char *intel_pt_pkt_name(enum intel_pt_pkt_type type)
@@ -197,8 +200,7 @@ static int intel_pt_get_mnt(const unsigned char *buf, size_t len,
return INTEL_PT_NEED_MORE_BYTES;
packet->type = INTEL_PT_MNT;
memcpy_le64(&packet->payload, buf + 3, 8);
- return 11
-;
+ return 11;
}
static int intel_pt_get_3byte(const unsigned char *buf, size_t len,
@@ -329,6 +331,29 @@ static int intel_pt_get_bep_ip(size_t len, struct intel_pt_pkt *packet)
return 2;
}
+static int intel_pt_get_cfe(const unsigned char *buf, size_t len,
+ struct intel_pt_pkt *packet)
+{
+ if (len < 4)
+ return INTEL_PT_NEED_MORE_BYTES;
+ packet->type = buf[2] & 0x80 ? INTEL_PT_CFE_IP : INTEL_PT_CFE;
+ packet->count = buf[2] & 0x1f;
+ packet->payload = buf[3];
+ return 4;
+}
+
+static int intel_pt_get_evd(const unsigned char *buf, size_t len,
+ struct intel_pt_pkt *packet)
+{
+ if (len < 11)
+ return INTEL_PT_NEED_MORE_BYTES;
+ packet->type = INTEL_PT_EVD;
+ packet->count = buf[2] & 0x3f;
+ packet->payload = buf[3];
+ memcpy_le64(&packet->payload, buf + 3, 8);
+ return 11;
+}
+
static int intel_pt_get_ext(const unsigned char *buf, size_t len,
struct intel_pt_pkt *packet)
{
@@ -375,6 +400,10 @@ static int intel_pt_get_ext(const unsigned char *buf, size_t len,
return intel_pt_get_bep(len, packet);
case 0xb3: /* BEP with IP */
return intel_pt_get_bep_ip(len, packet);
+ case 0x13: /* CFE */
+ return intel_pt_get_cfe(buf, len, packet);
+ case 0x53: /* EVD */
+ return intel_pt_get_evd(buf, len, packet);
default:
return INTEL_PT_BAD_PACKET;
}
@@ -475,6 +504,7 @@ static int intel_pt_get_mode(const unsigned char *buf, size_t len,
switch (buf[1] >> 5) {
case 0:
packet->type = INTEL_PT_MODE_EXEC;
+ packet->count = buf[1];
switch (buf[1] & 3) {
case 0:
packet->payload = 16;
@@ -624,6 +654,9 @@ void intel_pt_upd_pkt_ctx(const struct intel_pt_pkt *packet,
case INTEL_PT_MWAIT:
case INTEL_PT_BEP:
case INTEL_PT_BEP_IP:
+ case INTEL_PT_CFE:
+ case INTEL_PT_CFE_IP:
+ case INTEL_PT_EVD:
*ctx = INTEL_PT_NO_CTX;
break;
case INTEL_PT_BBP:
@@ -709,7 +742,8 @@ int intel_pt_pkt_desc(const struct intel_pt_pkt *packet, char *buf,
return snprintf(buf, buf_len, "%s CTC 0x%x FC 0x%x", name,
(unsigned)payload, packet->count);
case INTEL_PT_MODE_EXEC:
- return snprintf(buf, buf_len, "%s %lld", name, payload);
+ return snprintf(buf, buf_len, "%s IF:%d %lld",
+ name, !!(packet->count & 4), payload);
case INTEL_PT_MODE_TSX:
return snprintf(buf, buf_len, "%s TXAbort:%u InTX:%u",
name, (unsigned)(payload >> 1) & 1,
@@ -751,6 +785,13 @@ int intel_pt_pkt_desc(const struct intel_pt_pkt *packet, char *buf,
case INTEL_PT_BIP:
return snprintf(buf, buf_len, "%s ID 0x%02x Value 0x%llx",
name, packet->count, payload);
+ case INTEL_PT_CFE:
+ case INTEL_PT_CFE_IP:
+ return snprintf(buf, buf_len, "%s IP:%d Type 0x%02x Vector 0x%llx",
+ name, packet->type == INTEL_PT_CFE_IP, packet->count, payload);
+ case INTEL_PT_EVD:
+ return snprintf(buf, buf_len, "%s Type 0x%02x Payload 0x%llx",
+ name, packet->count, payload);
default:
break;
}
diff --git a/tools/perf/util/intel-pt-decoder/intel-pt-pkt-decoder.h b/tools/perf/util/intel-pt-decoder/intel-pt-pkt-decoder.h
index 996090cb84f6..496ba4be875c 100644
--- a/tools/perf/util/intel-pt-decoder/intel-pt-pkt-decoder.h
+++ b/tools/perf/util/intel-pt-decoder/intel-pt-pkt-decoder.h
@@ -56,6 +56,9 @@ enum intel_pt_pkt_type {
INTEL_PT_BIP,
INTEL_PT_BEP,
INTEL_PT_BEP_IP,
+ INTEL_PT_CFE,
+ INTEL_PT_CFE_IP,
+ INTEL_PT_EVD,
};
struct intel_pt_pkt {
diff --git a/tools/perf/util/intel-pt.c b/tools/perf/util/intel-pt.c
index e8613cbda331..62b2f375a94d 100644
--- a/tools/perf/util/intel-pt.c
+++ b/tools/perf/util/intel-pt.c
@@ -46,6 +46,12 @@
#define MAX_TIMESTAMP (~0ULL)
+#define INTEL_PT_CFG_PASS_THRU BIT_ULL(0)
+#define INTEL_PT_CFG_PWR_EVT_EN BIT_ULL(4)
+#define INTEL_PT_CFG_BRANCH_EN BIT_ULL(13)
+#define INTEL_PT_CFG_EVT_EN BIT_ULL(31)
+#define INTEL_PT_CFG_TNT_DIS BIT_ULL(55)
+
struct range {
u64 start;
u64 end;
@@ -71,6 +77,7 @@ struct intel_pt {
bool mispred_all;
bool use_thread_stack;
bool callstack;
+ bool cap_event_trace;
unsigned int br_stack_sz;
unsigned int br_stack_sz_plus;
int have_sched_switch;
@@ -115,6 +122,12 @@ struct intel_pt {
bool sample_pebs;
struct evsel *pebs_evsel;
+ u64 evt_sample_type;
+ u64 evt_id;
+
+ u64 iflag_chg_sample_type;
+ u64 iflag_chg_id;
+
u64 tsc_bit;
u64 mtc_bit;
u64 mtc_freq_bits;
@@ -179,6 +192,7 @@ struct intel_pt_queue {
pid_t next_tid;
struct thread *thread;
struct machine *guest_machine;
+ struct thread *guest_thread;
struct thread *unknown_guest_thread;
pid_t guest_machine_pid;
bool exclude_kernel;
@@ -517,6 +531,7 @@ struct intel_pt_cache_entry {
u64 byte_cnt;
enum intel_pt_insn_op op;
enum intel_pt_insn_branch branch;
+ bool emulated_ptwrite;
int length;
int32_t rel;
char insn[INTEL_PT_INSN_BUF_SZ];
@@ -603,6 +618,7 @@ static int intel_pt_cache_add(struct dso *dso, struct machine *machine,
e->byte_cnt = byte_cnt;
e->op = intel_pt_insn->op;
e->branch = intel_pt_insn->branch;
+ e->emulated_ptwrite = intel_pt_insn->emulated_ptwrite;
e->length = intel_pt_insn->length;
e->rel = intel_pt_insn->rel;
memcpy(e->insn, intel_pt_insn->buf, INTEL_PT_INSN_BUF_SZ);
@@ -675,6 +691,11 @@ static int intel_pt_get_guest(struct intel_pt_queue *ptq)
ptq->guest_machine = NULL;
thread__zput(ptq->unknown_guest_thread);
+ if (symbol_conf.guest_code) {
+ thread__zput(ptq->guest_thread);
+ ptq->guest_thread = machines__findnew_guest_code(machines, pid);
+ }
+
machine = machines__find_guest(machines, pid);
if (!machine)
return -1;
@@ -689,6 +710,28 @@ static int intel_pt_get_guest(struct intel_pt_queue *ptq)
return 0;
}
+static inline bool intel_pt_jmp_16(struct intel_pt_insn *intel_pt_insn)
+{
+ return intel_pt_insn->rel == 16 && intel_pt_insn->branch == INTEL_PT_BR_UNCONDITIONAL;
+}
+
+#define PTWRITE_MAGIC "\x0f\x0bperf,ptwrite "
+#define PTWRITE_MAGIC_LEN 16
+
+static bool intel_pt_emulated_ptwrite(struct dso *dso, struct machine *machine, u64 offset)
+{
+ unsigned char buf[PTWRITE_MAGIC_LEN];
+ ssize_t len;
+
+ len = dso__data_read_offset(dso, machine, offset, buf, PTWRITE_MAGIC_LEN);
+ if (len == PTWRITE_MAGIC_LEN && !memcmp(buf, PTWRITE_MAGIC, PTWRITE_MAGIC_LEN)) {
+ intel_pt_log("Emulated ptwrite signature found\n");
+ return true;
+ }
+ intel_pt_log("Emulated ptwrite signature not found\n");
+ return false;
+}
+
static int intel_pt_walk_next_insn(struct intel_pt_insn *intel_pt_insn,
uint64_t *insn_cnt_ptr, uint64_t *ip,
uint64_t to_ip, uint64_t max_insn_cnt,
@@ -716,11 +759,16 @@ static int intel_pt_walk_next_insn(struct intel_pt_insn *intel_pt_insn,
cpumode = intel_pt_nr_cpumode(ptq, *ip, nr);
if (nr) {
- if (cpumode != PERF_RECORD_MISC_GUEST_KERNEL ||
+ if ((!symbol_conf.guest_code && cpumode != PERF_RECORD_MISC_GUEST_KERNEL) ||
intel_pt_get_guest(ptq))
return -EINVAL;
machine = ptq->guest_machine;
- thread = ptq->unknown_guest_thread;
+ thread = ptq->guest_thread;
+ if (!thread) {
+ if (cpumode != PERF_RECORD_MISC_GUEST_KERNEL)
+ return -EINVAL;
+ thread = ptq->unknown_guest_thread;
+ }
} else {
thread = ptq->thread;
if (!thread) {
@@ -751,6 +799,7 @@ static int intel_pt_walk_next_insn(struct intel_pt_insn *intel_pt_insn,
*ip += e->byte_cnt;
intel_pt_insn->op = e->op;
intel_pt_insn->branch = e->branch;
+ intel_pt_insn->emulated_ptwrite = e->emulated_ptwrite;
intel_pt_insn->length = e->length;
intel_pt_insn->rel = e->rel;
memcpy(intel_pt_insn->buf, e->insn,
@@ -782,8 +831,18 @@ static int intel_pt_walk_next_insn(struct intel_pt_insn *intel_pt_insn,
insn_cnt += 1;
- if (intel_pt_insn->branch != INTEL_PT_BR_NO_BRANCH)
+ if (intel_pt_insn->branch != INTEL_PT_BR_NO_BRANCH) {
+ bool eptw;
+ u64 offs;
+
+ if (!intel_pt_jmp_16(intel_pt_insn))
+ goto out;
+ /* Check for emulated ptwrite */
+ offs = offset + intel_pt_insn->length;
+ eptw = intel_pt_emulated_ptwrite(al.map->dso, machine, offs);
+ intel_pt_insn->emulated_ptwrite = eptw;
goto out;
+ }
if (max_insn_cnt && insn_cnt >= max_insn_cnt)
goto out_no_cache;
@@ -953,12 +1012,26 @@ static bool intel_pt_branch_enable(struct intel_pt *pt)
evlist__for_each_entry(pt->session->evlist, evsel) {
if (intel_pt_get_config(pt, &evsel->core.attr, &config) &&
- (config & 1) && !(config & 0x2000))
+ (config & INTEL_PT_CFG_PASS_THRU) &&
+ !(config & INTEL_PT_CFG_BRANCH_EN))
return false;
}
return true;
}
+static bool intel_pt_disabled_tnt(struct intel_pt *pt)
+{
+ struct evsel *evsel;
+ u64 config;
+
+ evlist__for_each_entry(pt->session->evlist, evsel) {
+ if (intel_pt_get_config(pt, &evsel->core.attr, &config) &&
+ config & INTEL_PT_CFG_TNT_DIS)
+ return true;
+ }
+ return false;
+}
+
static unsigned int intel_pt_mtc_period(struct intel_pt *pt)
{
struct evsel *evsel;
@@ -1214,6 +1287,10 @@ static struct intel_pt_queue *intel_pt_alloc_queue(struct intel_pt *pt,
params.first_timestamp = pt->first_timestamp;
params.max_loops = pt->max_loops;
+ /* Cannot walk code without TNT, so force 'quick' mode */
+ if (params.branch_enable && intel_pt_disabled_tnt(pt) && !params.quick)
+ params.quick = 1;
+
if (pt->filts.cnt > 0)
params.pgd_ip = intel_pt_pgd_ip;
@@ -1269,6 +1346,7 @@ static void intel_pt_free_queue(void *priv)
if (!ptq)
return;
thread__zput(ptq->thread);
+ thread__zput(ptq->guest_thread);
thread__zput(ptq->unknown_guest_thread);
intel_pt_decoder_free(ptq->decoder);
zfree(&ptq->event_buf);
@@ -1316,6 +1394,8 @@ static void intel_pt_set_pid_tid_cpu(struct intel_pt *pt,
static void intel_pt_sample_flags(struct intel_pt_queue *ptq)
{
+ struct intel_pt *pt = ptq->pt;
+
ptq->insn_len = 0;
if (ptq->state->flags & INTEL_PT_ABORT_TX) {
ptq->flags = PERF_IP_FLAG_BRANCH | PERF_IP_FLAG_TX_ABORT;
@@ -1346,6 +1426,17 @@ static void intel_pt_sample_flags(struct intel_pt_queue *ptq)
ptq->flags |= PERF_IP_FLAG_TRACE_BEGIN;
if (ptq->state->type & INTEL_PT_TRACE_END)
ptq->flags |= PERF_IP_FLAG_TRACE_END;
+
+ if (pt->cap_event_trace) {
+ if (ptq->state->type & INTEL_PT_IFLAG_CHG) {
+ if (!ptq->state->from_iflag)
+ ptq->flags |= PERF_IP_FLAG_INTR_DISABLE;
+ if (ptq->state->from_iflag != ptq->state->to_iflag)
+ ptq->flags |= PERF_IP_FLAG_INTR_TOGGLE;
+ } else if (!ptq->state->to_iflag) {
+ ptq->flags |= PERF_IP_FLAG_INTR_DISABLE;
+ }
+ }
}
static void intel_pt_setup_time_range(struct intel_pt *pt,
@@ -2160,6 +2251,78 @@ static int intel_pt_synth_pebs_sample(struct intel_pt_queue *ptq)
return err;
}
+static int intel_pt_synth_events_sample(struct intel_pt_queue *ptq)
+{
+ struct intel_pt *pt = ptq->pt;
+ union perf_event *event = ptq->event_buf;
+ struct perf_sample sample = { .ip = 0, };
+ struct {
+ struct perf_synth_intel_evt cfe;
+ struct perf_synth_intel_evd evd[INTEL_PT_MAX_EVDS];
+ } raw;
+ int i;
+
+ if (intel_pt_skip_event(pt))
+ return 0;
+
+ intel_pt_prep_p_sample(pt, ptq, event, &sample);
+
+ sample.id = ptq->pt->evt_id;
+ sample.stream_id = ptq->pt->evt_id;
+
+ raw.cfe.type = ptq->state->cfe_type;
+ raw.cfe.reserved = 0;
+ raw.cfe.ip = !!(ptq->state->flags & INTEL_PT_FUP_IP);
+ raw.cfe.vector = ptq->state->cfe_vector;
+ raw.cfe.evd_cnt = ptq->state->evd_cnt;
+
+ for (i = 0; i < ptq->state->evd_cnt; i++) {
+ raw.evd[i].et = 0;
+ raw.evd[i].evd_type = ptq->state->evd[i].type;
+ raw.evd[i].payload = ptq->state->evd[i].payload;
+ }
+
+ sample.raw_size = perf_synth__raw_size(raw) +
+ ptq->state->evd_cnt * sizeof(struct perf_synth_intel_evd);
+ sample.raw_data = perf_synth__raw_data(&raw);
+
+ return intel_pt_deliver_synth_event(pt, event, &sample,
+ pt->evt_sample_type);
+}
+
+static int intel_pt_synth_iflag_chg_sample(struct intel_pt_queue *ptq)
+{
+ struct intel_pt *pt = ptq->pt;
+ union perf_event *event = ptq->event_buf;
+ struct perf_sample sample = { .ip = 0, };
+ struct perf_synth_intel_iflag_chg raw;
+
+ if (intel_pt_skip_event(pt))
+ return 0;
+
+ intel_pt_prep_p_sample(pt, ptq, event, &sample);
+
+ sample.id = ptq->pt->iflag_chg_id;
+ sample.stream_id = ptq->pt->iflag_chg_id;
+
+ raw.flags = 0;
+ raw.iflag = ptq->state->to_iflag;
+
+ if (ptq->state->type & INTEL_PT_BRANCH) {
+ raw.via_branch = 1;
+ raw.branch_ip = ptq->state->to_ip;
+ } else {
+ sample.addr = 0;
+ }
+ sample.flags = ptq->flags;
+
+ sample.raw_size = perf_synth__raw_size(raw);
+ sample.raw_data = perf_synth__raw_data(&raw);
+
+ return intel_pt_deliver_synth_event(pt, event, &sample,
+ pt->iflag_chg_sample_type);
+}
+
static int intel_pt_synth_error(struct intel_pt *pt, int code, int cpu,
pid_t pid, pid_t tid, u64 ip, u64 timestamp)
{
@@ -2256,6 +2419,10 @@ static int intel_pt_sample(struct intel_pt_queue *ptq)
ptq->sample_ipc = ptq->state->flags & INTEL_PT_SAMPLE_IPC;
}
+ /* Ensure guest code maps are set up */
+ if (symbol_conf.guest_code && (state->from_nr || state->to_nr))
+ intel_pt_get_guest(ptq);
+
/*
* Do PEBS first to allow for the possibility that the PEBS timestamp
* precedes the current timestamp.
@@ -2266,6 +2433,19 @@ static int intel_pt_sample(struct intel_pt_queue *ptq)
return err;
}
+ if (pt->synth_opts.intr_events) {
+ if (state->type & INTEL_PT_EVT) {
+ err = intel_pt_synth_events_sample(ptq);
+ if (err)
+ return err;
+ }
+ if (state->type & INTEL_PT_IFLAG_CHG) {
+ err = intel_pt_synth_iflag_chg_sample(ptq);
+ if (err)
+ return err;
+ }
+ }
+
if (pt->sample_pwr_events) {
if (state->type & INTEL_PT_PSB_EVT) {
err = intel_pt_synth_psb_sample(ptq);
@@ -3429,7 +3609,7 @@ static int intel_pt_synth_events(struct intel_pt *pt,
id += 1;
}
- if (pt->synth_opts.pwr_events && (evsel->core.attr.config & 0x10)) {
+ if (pt->synth_opts.pwr_events && (evsel->core.attr.config & INTEL_PT_CFG_PWR_EVT_EN)) {
attr.config = PERF_SYNTH_INTEL_MWAIT;
err = intel_pt_synth_event(session, "mwait", &attr, id);
if (err)
@@ -3463,6 +3643,28 @@ static int intel_pt_synth_events(struct intel_pt *pt,
id += 1;
}
+ if (pt->synth_opts.intr_events && (evsel->core.attr.config & INTEL_PT_CFG_EVT_EN)) {
+ attr.config = PERF_SYNTH_INTEL_EVT;
+ err = intel_pt_synth_event(session, "evt", &attr, id);
+ if (err)
+ return err;
+ pt->evt_sample_type = attr.sample_type;
+ pt->evt_id = id;
+ intel_pt_set_event_name(evlist, id, "evt");
+ id += 1;
+ }
+
+ if (pt->synth_opts.intr_events && pt->cap_event_trace) {
+ attr.config = PERF_SYNTH_INTEL_IFLAG_CHG;
+ err = intel_pt_synth_event(session, "iflag", &attr, id);
+ if (err)
+ return err;
+ pt->iflag_chg_sample_type = attr.sample_type;
+ pt->iflag_chg_id = id;
+ intel_pt_set_event_name(evlist, id, "iflag");
+ id += 1;
+ }
+
return 0;
}
@@ -3790,7 +3992,7 @@ int intel_pt_process_auxtrace_info(union perf_event *event,
}
info = &auxtrace_info->priv[INTEL_PT_FILTER_STR_LEN] + 1;
- info_end = (void *)info + auxtrace_info->header.size;
+ info_end = (void *)auxtrace_info + auxtrace_info->header.size;
if (intel_pt_has(auxtrace_info, INTEL_PT_FILTER_STR_LEN)) {
size_t len;
@@ -3829,6 +4031,13 @@ int intel_pt_process_auxtrace_info(union perf_event *event,
intel_pt_print_info_str("Filter string", pt->filter);
}
+ if ((void *)info < info_end) {
+ pt->cap_event_trace = *info++;
+ if (dump_trace)
+ fprintf(stdout, " Cap Event Trace %d\n",
+ pt->cap_event_trace);
+ }
+
pt->timeless_decoding = intel_pt_timeless_decoding(pt);
if (pt->timeless_decoding && !pt->tc.time_mult)
pt->tc.time_mult = 1;
diff --git a/tools/perf/util/jitdump.c b/tools/perf/util/jitdump.c
index 917a9c707371..a23255773c60 100644
--- a/tools/perf/util/jitdump.c
+++ b/tools/perf/util/jitdump.c
@@ -382,15 +382,15 @@ jit_inject_event(struct jit_buf_desc *jd, union perf_event *event)
static pid_t jr_entry_pid(struct jit_buf_desc *jd, union jr_entry *jr)
{
- if (jd->nsi && jd->nsi->in_pidns)
- return jd->nsi->tgid;
+ if (jd->nsi && nsinfo__in_pidns(jd->nsi))
+ return nsinfo__tgid(jd->nsi);
return jr->load.pid;
}
static pid_t jr_entry_tid(struct jit_buf_desc *jd, union jr_entry *jr)
{
- if (jd->nsi && jd->nsi->in_pidns)
- return jd->nsi->pid;
+ if (jd->nsi && nsinfo__in_pidns(jd->nsi))
+ return nsinfo__pid(jd->nsi);
return jr->load.tid;
}
@@ -779,7 +779,7 @@ jit_detect(char *mmap_name, pid_t pid, struct nsinfo *nsi)
* pid does not match mmap pid
* pid==0 in system-wide mode (synthesized)
*/
- if (pid && pid2 != nsi->nstgid)
+ if (pid && pid2 != nsinfo__nstgid(nsi))
return -1;
/*
* validate suffix
diff --git a/tools/perf/util/libunwind/arm64.c b/tools/perf/util/libunwind/arm64.c
index c397be0c2e32..014d82159656 100644
--- a/tools/perf/util/libunwind/arm64.c
+++ b/tools/perf/util/libunwind/arm64.c
@@ -23,7 +23,9 @@
#include "unwind.h"
#include "libunwind-aarch64.h"
-#include <../../../../arch/arm64/include/uapi/asm/perf_regs.h>
+#define perf_event_arm_regs perf_event_arm64_regs
+#include <../../../arch/arm64/include/uapi/asm/perf_regs.h>
+#undef perf_event_arm_regs
#include "../../arch/arm64/util/unwind-libunwind.c"
/* NO_LIBUNWIND_DEBUG_FRAME is a feature flag for local libunwind,
diff --git a/tools/perf/util/machine.c b/tools/perf/util/machine.c
index fb8496df8432..009061852808 100644
--- a/tools/perf/util/machine.c
+++ b/tools/perf/util/machine.c
@@ -16,6 +16,7 @@
#include "map_symbol.h"
#include "branch.h"
#include "mem-events.h"
+#include "path.h"
#include "srcline.h"
#include "symbol.h"
#include "sort.h"
@@ -34,6 +35,7 @@
#include "bpf-event.h"
#include <internal/lib.h> // page_size
#include "cgroup.h"
+#include "arm64-frame-pointer-unwind-support.h"
#include <linux/ctype.h>
#include <symbol/kallsyms.h>
@@ -82,12 +84,23 @@ static int machine__set_mmap_name(struct machine *machine)
return machine->mmap_name ? 0 : -ENOMEM;
}
+static void thread__set_guest_comm(struct thread *thread, pid_t pid)
+{
+ char comm[64];
+
+ snprintf(comm, sizeof(comm), "[guest/%d]", pid);
+ thread__set_comm(thread, comm, 0);
+}
+
int machine__init(struct machine *machine, const char *root_dir, pid_t pid)
{
int err = -ENOMEM;
memset(machine, 0, sizeof(*machine));
- maps__init(&machine->kmaps, machine);
+ machine->kmaps = maps__new(machine);
+ if (machine->kmaps == NULL)
+ return -ENOMEM;
+
RB_CLEAR_NODE(&machine->rb_node);
dsos__init(&machine->dsos);
@@ -106,7 +119,7 @@ int machine__init(struct machine *machine, const char *root_dir, pid_t pid)
machine->root_dir = strdup(root_dir);
if (machine->root_dir == NULL)
- return -ENOMEM;
+ goto out;
if (machine__set_mmap_name(machine))
goto out;
@@ -114,13 +127,11 @@ int machine__init(struct machine *machine, const char *root_dir, pid_t pid)
if (pid != HOST_KERNEL_ID) {
struct thread *thread = machine__findnew_thread(machine, -1,
pid);
- char comm[64];
if (thread == NULL)
goto out;
- snprintf(comm, sizeof(comm), "[guest/%d]", pid);
- thread__set_comm(thread, comm, 0);
+ thread__set_guest_comm(thread, pid);
thread__put(thread);
}
@@ -129,6 +140,7 @@ int machine__init(struct machine *machine, const char *root_dir, pid_t pid)
out:
if (err) {
+ zfree(&machine->kmaps);
zfree(&machine->root_dir);
zfree(&machine->mmap_name);
}
@@ -218,7 +230,7 @@ void machine__exit(struct machine *machine)
return;
machine__destroy_kernel_maps(machine);
- maps__exit(&machine->kmaps);
+ maps__delete(machine->kmaps);
dsos__exit(&machine->dsos);
machine__exit_vdso(machine);
zfree(&machine->root_dir);
@@ -293,6 +305,8 @@ struct machine *machines__add(struct machines *machines, pid_t pid,
rb_link_node(&machine->rb_node, parent, p);
rb_insert_color_cached(&machine->rb_node, &machines->guests, leftmost);
+ machine->machines = machines;
+
return machine;
}
@@ -378,6 +392,93 @@ struct machine *machines__find_guest(struct machines *machines, pid_t pid)
return machine;
}
+/*
+ * A common case for KVM test programs is that the test program acts as the
+ * hypervisor, creating, running and destroying the virtual machine, and
+ * providing the guest object code from its own object code. In this case,
+ * the VM is not running an OS, but only the functions loaded into it by the
+ * hypervisor test program, and conveniently, loaded at the same virtual
+ * addresses.
+ *
+ * Normally to resolve addresses, MMAP events are needed to map addresses
+ * back to the object code and debug symbols for that object code.
+ *
+ * Currently, there is no way to get such mapping information from guests
+ * but, in the scenario described above, the guest has the same mappings
+ * as the hypervisor, so support for that scenario can be achieved.
+ *
+ * To support that, copy the host thread's maps to the guest thread's maps.
+ * Note, we do not discover the guest until we encounter a guest event,
+ * which works well because it is not until then that we know that the host
+ * thread's maps have been set up.
+ *
+ * This function returns the guest thread. Apart from keeping the data
+ * structures sane, using a thread belonging to the guest machine, instead
+ * of the host thread, allows it to have its own comm (refer
+ * thread__set_guest_comm()).
+ */
+static struct thread *findnew_guest_code(struct machine *machine,
+ struct machine *host_machine,
+ pid_t pid)
+{
+ struct thread *host_thread;
+ struct thread *thread;
+ int err;
+
+ if (!machine)
+ return NULL;
+
+ thread = machine__findnew_thread(machine, -1, pid);
+ if (!thread)
+ return NULL;
+
+ /* Assume maps are set up if there are any */
+ if (thread->maps->nr_maps)
+ return thread;
+
+ host_thread = machine__find_thread(host_machine, -1, pid);
+ if (!host_thread)
+ goto out_err;
+
+ thread__set_guest_comm(thread, pid);
+
+ /*
+ * Guest code can be found in hypervisor process at the same address
+ * so copy host maps.
+ */
+ err = maps__clone(thread, host_thread->maps);
+ thread__put(host_thread);
+ if (err)
+ goto out_err;
+
+ return thread;
+
+out_err:
+ thread__zput(thread);
+ return NULL;
+}
+
+struct thread *machines__findnew_guest_code(struct machines *machines, pid_t pid)
+{
+ struct machine *host_machine = machines__find(machines, HOST_KERNEL_ID);
+ struct machine *machine = machines__findnew(machines, pid);
+
+ return findnew_guest_code(machine, host_machine, pid);
+}
+
+struct thread *machine__findnew_guest_code(struct machine *machine, pid_t pid)
+{
+ struct machines *machines = machine->machines;
+ struct machine *host_machine;
+
+ if (!machines)
+ return NULL;
+
+ host_machine = machines__find(machines, HOST_KERNEL_ID);
+
+ return findnew_guest_code(machine, host_machine, pid);
+}
+
void machines__process_guests(struct machines *machines,
machine__process_t process, void *data)
{
@@ -776,7 +877,7 @@ static int machine__process_ksymbol_register(struct machine *machine,
struct perf_sample *sample __maybe_unused)
{
struct symbol *sym;
- struct map *map = maps__find(&machine->kmaps, event->ksymbol.addr);
+ struct map *map = maps__find(machine__kernel_maps(machine), event->ksymbol.addr);
if (!map) {
struct dso *dso = dso__new(event->ksymbol.name);
@@ -799,7 +900,7 @@ static int machine__process_ksymbol_register(struct machine *machine,
map->start = event->ksymbol.addr;
map->end = map->start + event->ksymbol.len;
- maps__insert(&machine->kmaps, map);
+ maps__insert(machine__kernel_maps(machine), map);
map__put(map);
dso__set_loaded(dso);
@@ -825,12 +926,12 @@ static int machine__process_ksymbol_unregister(struct machine *machine,
struct symbol *sym;
struct map *map;
- map = maps__find(&machine->kmaps, event->ksymbol.addr);
+ map = maps__find(machine__kernel_maps(machine), event->ksymbol.addr);
if (!map)
return 0;
if (map != machine->vmlinux_map)
- maps__remove(&machine->kmaps, map);
+ maps__remove(machine__kernel_maps(machine), map);
else {
sym = dso__find_symbol(map->dso, map->map_ip(map, map->start));
if (sym)
@@ -856,7 +957,7 @@ int machine__process_ksymbol(struct machine *machine __maybe_unused,
int machine__process_text_poke(struct machine *machine, union perf_event *event,
struct perf_sample *sample __maybe_unused)
{
- struct map *map = maps__find(&machine->kmaps, event->text_poke.addr);
+ struct map *map = maps__find(machine__kernel_maps(machine), event->text_poke.addr);
u8 cpumode = event->header.misc & PERF_RECORD_MISC_CPUMODE_MASK;
if (dump_trace)
@@ -912,7 +1013,7 @@ static struct map *machine__addnew_module_map(struct machine *machine, u64 start
if (map == NULL)
goto out;
- maps__insert(&machine->kmaps, map);
+ maps__insert(machine__kernel_maps(machine), map);
/* Put the map here because maps__insert already got it */
map__put(map);
@@ -1098,7 +1199,7 @@ int machine__create_extra_kernel_map(struct machine *machine,
strlcpy(kmap->name, xm->name, KMAP_NAME_LEN);
- maps__insert(&machine->kmaps, map);
+ maps__insert(machine__kernel_maps(machine), map);
pr_debug2("Added extra kernel map %s %" PRIx64 "-%" PRIx64 "\n",
kmap->name, map->start, map->end);
@@ -1143,7 +1244,7 @@ static u64 find_entry_trampoline(struct dso *dso)
int machine__map_x86_64_entry_trampolines(struct machine *machine,
struct dso *kernel)
{
- struct maps *kmaps = &machine->kmaps;
+ struct maps *kmaps = machine__kernel_maps(machine);
int nr_cpus_avail, cpu;
bool found = false;
struct map *map;
@@ -1213,7 +1314,7 @@ __machine__create_kernel_maps(struct machine *machine, struct dso *kernel)
return -1;
machine->vmlinux_map->map_ip = machine->vmlinux_map->unmap_ip = identity__map_ip;
- maps__insert(&machine->kmaps, machine->vmlinux_map);
+ maps__insert(machine__kernel_maps(machine), machine->vmlinux_map);
return 0;
}
@@ -1226,7 +1327,7 @@ void machine__destroy_kernel_maps(struct machine *machine)
return;
kmap = map__kmap(map);
- maps__remove(&machine->kmaps, map);
+ maps__remove(machine__kernel_maps(machine), map);
if (kmap && kmap->ref_reloc_sym) {
zfree((char **)&kmap->ref_reloc_sym->name);
zfree(&kmap->ref_reloc_sym);
@@ -1321,7 +1422,7 @@ int machine__load_kallsyms(struct machine *machine, const char *filename)
* kernel, with modules between them, fixup the end of all
* sections.
*/
- maps__fixup_end(&machine->kmaps);
+ maps__fixup_end(machine__kernel_maps(machine));
}
return ret;
@@ -1415,7 +1516,7 @@ static int maps__set_modules_path_dir(struct maps *maps, const char *dir_name, i
struct stat st;
/*sshfs might return bad dent->d_type, so we have to stat*/
- snprintf(path, sizeof(path), "%s/%s", dir_name, dent->d_name);
+ path__join(path, sizeof(path), dir_name, dent->d_name);
if (stat(path, &st))
continue;
@@ -1469,7 +1570,7 @@ static int machine__set_modules_path(struct machine *machine)
machine->root_dir, version);
free(version);
- return maps__set_modules_path_dir(&machine->kmaps, modules_path, 0);
+ return maps__set_modules_path_dir(machine__kernel_maps(machine), modules_path, 0);
}
int __weak arch__fix_module_text_start(u64 *start __maybe_unused,
u64 *size __maybe_unused,
@@ -1542,11 +1643,11 @@ static void machine__update_kernel_mmap(struct machine *machine,
struct map *map = machine__kernel_map(machine);
map__get(map);
- maps__remove(&machine->kmaps, map);
+ maps__remove(machine__kernel_maps(machine), map);
machine__set_kernel_mmap(machine, start, end);
- maps__insert(&machine->kmaps, map);
+ maps__insert(machine__kernel_maps(machine), map);
map__put(map);
}
@@ -2071,6 +2172,7 @@ static void ip__resolve_ams(struct thread *thread,
ams->addr = ip;
ams->al_addr = al.addr;
+ ams->al_level = al.level;
ams->ms.maps = al.maps;
ams->ms.sym = al.sym;
ams->ms.map = al.map;
@@ -2090,6 +2192,7 @@ static void ip__resolve_data(struct thread *thread,
ams->addr = addr;
ams->al_addr = al.addr;
+ ams->al_level = al.level;
ams->ms.maps = al.maps;
ams->ms.sym = al.sym;
ams->ms.map = al.map;
@@ -2710,6 +2813,15 @@ static int find_prev_cpumode(struct ip_callchain *chain, struct thread *thread,
return err;
}
+static u64 get_leaf_frame_caller(struct perf_sample *sample,
+ struct thread *thread, int usr_idx)
+{
+ if (machine__normalized_is(thread->maps->machine, "arm64"))
+ return get_leaf_frame_caller_aarch64(sample, thread, usr_idx);
+ else
+ return 0;
+}
+
static int thread__resolve_callchain_sample(struct thread *thread,
struct callchain_cursor *cursor,
struct evsel *evsel,
@@ -2723,9 +2835,10 @@ static int thread__resolve_callchain_sample(struct thread *thread,
struct ip_callchain *chain = sample->callchain;
int chain_nr = 0;
u8 cpumode = PERF_RECORD_MISC_USER;
- int i, j, err, nr_entries;
+ int i, j, err, nr_entries, usr_idx;
int skip_idx = -1;
int first_call = 0;
+ u64 leaf_frame_caller;
if (chain)
chain_nr = chain->nr;
@@ -2850,6 +2963,34 @@ check_calls:
continue;
}
+ /*
+ * PERF_CONTEXT_USER allows us to locate where the user stack ends.
+ * Depending on callchain_param.order and the position of PERF_CONTEXT_USER,
+ * the index will be different in order to add the missing frame
+ * at the right place.
+ */
+
+ usr_idx = callchain_param.order == ORDER_CALLEE ? j-2 : j-1;
+
+ if (usr_idx >= 0 && chain->ips[usr_idx] == PERF_CONTEXT_USER) {
+
+ leaf_frame_caller = get_leaf_frame_caller(sample, thread, usr_idx);
+
+ /*
+ * check if leaf_frame_Caller != ip to not add the same
+ * value twice.
+ */
+
+ if (leaf_frame_caller && leaf_frame_caller != ip) {
+
+ err = add_callchain_ip(thread, cursor, parent,
+ root_al, &cpumode, leaf_frame_caller,
+ false, NULL, NULL, 0);
+ if (err)
+ return (err < 0) ? err : 0;
+ }
+ }
+
err = add_callchain_ip(thread, cursor, parent,
root_al, &cpumode, ip,
false, NULL, NULL, 0);
@@ -2941,7 +3082,7 @@ static int thread__resolve_callchain_unwind(struct thread *thread,
return 0;
return unwind__get_entries(unwind_entry, cursor,
- thread, sample, max_stack);
+ thread, sample, max_stack, false);
}
int thread__resolve_callchain(struct thread *thread,
@@ -3079,14 +3220,19 @@ int machine__set_current_tid(struct machine *machine, int cpu, pid_t pid,
}
/*
- * Compares the raw arch string. N.B. see instead perf_env__arch() if a
- * normalized arch is needed.
+ * Compares the raw arch string. N.B. see instead perf_env__arch() or
+ * machine__normalized_is() if a normalized arch is needed.
*/
bool machine__is(struct machine *machine, const char *arch)
{
return machine && !strcmp(perf_env__raw_arch(machine->env), arch);
}
+bool machine__normalized_is(struct machine *machine, const char *arch)
+{
+ return machine && !strcmp(perf_env__arch(machine->env), arch);
+}
+
int machine__nr_cpus_avail(struct machine *machine)
{
return machine ? perf_env__nr_cpus_avail(machine->env) : 0;
diff --git a/tools/perf/util/machine.h b/tools/perf/util/machine.h
index a143087eeb47..5d7daf7cb7bc 100644
--- a/tools/perf/util/machine.h
+++ b/tools/perf/util/machine.h
@@ -18,6 +18,7 @@ struct symbol;
struct target;
struct thread;
union perf_event;
+struct machines;
/* Native host kernel uses -1 as pid index in machine */
#define HOST_KERNEL_ID (-1)
@@ -51,7 +52,7 @@ struct machine {
struct vdso_info *vdso_info;
struct perf_env *env;
struct dsos dsos;
- struct maps kmaps;
+ struct maps *kmaps;
struct map *vmlinux_map;
u64 kernel_start;
pid_t *current_tid;
@@ -59,6 +60,7 @@ struct machine {
void *priv;
u64 db_id;
};
+ struct machines *machines;
bool trampolines_mapped;
};
@@ -83,7 +85,7 @@ struct map *machine__kernel_map(struct machine *machine)
static inline
struct maps *machine__kernel_maps(struct machine *machine)
{
- return &machine->kmaps;
+ return machine->kmaps;
}
int machine__get_kernel_start(struct machine *machine);
@@ -162,10 +164,11 @@ void machines__process_guests(struct machines *machines,
struct machine *machines__add(struct machines *machines, pid_t pid,
const char *root_dir);
-struct machine *machines__find_host(struct machines *machines);
struct machine *machines__find(struct machines *machines, pid_t pid);
struct machine *machines__findnew(struct machines *machines, pid_t pid);
struct machine *machines__find_guest(struct machines *machines, pid_t pid);
+struct thread *machines__findnew_guest_code(struct machines *machines, pid_t pid);
+struct thread *machine__findnew_guest_code(struct machine *machine, pid_t pid);
void machines__set_id_hdr_size(struct machines *machines, u16 id_hdr_size);
void machines__set_comm_exec(struct machines *machines, bool comm_exec);
@@ -208,6 +211,7 @@ static inline bool machine__is_host(struct machine *machine)
}
bool machine__is(struct machine *machine, const char *arch);
+bool machine__normalized_is(struct machine *machine, const char *arch);
int machine__nr_cpus_avail(struct machine *machine);
struct thread *__machine__findnew_thread(struct machine *machine, pid_t pid, pid_t tid);
@@ -222,7 +226,7 @@ static inline
struct symbol *machine__find_kernel_symbol(struct machine *machine, u64 addr,
struct map **mapp)
{
- return maps__find_symbol(&machine->kmaps, addr, mapp);
+ return maps__find_symbol(machine->kmaps, addr, mapp);
}
static inline
@@ -230,7 +234,7 @@ struct symbol *machine__find_kernel_symbol_by_name(struct machine *machine,
const char *name,
struct map **mapp)
{
- return maps__find_symbol_by_name(&machine->kmaps, name, mapp);
+ return maps__find_symbol_by_name(machine->kmaps, name, mapp);
}
int arch__fix_module_text_start(u64 *start, u64 *size, const char *name);
diff --git a/tools/perf/util/map.c b/tools/perf/util/map.c
index 8af693d9678c..e0aa4a254583 100644
--- a/tools/perf/util/map.c
+++ b/tools/perf/util/map.c
@@ -1,31 +1,20 @@
// SPDX-License-Identifier: GPL-2.0
-#include "symbol.h"
-#include <assert.h>
-#include <errno.h>
#include <inttypes.h>
#include <limits.h>
+#include <stdio.h>
#include <stdlib.h>
#include <string.h>
-#include <stdio.h>
-#include <unistd.h>
+#include <linux/string.h>
+#include <linux/zalloc.h>
#include <uapi/linux/mman.h> /* To get things like MAP_HUGETLB even on older libc headers */
+#include "debug.h"
#include "dso.h"
#include "map.h"
-#include "map_symbol.h"
+#include "namespaces.h"
+#include "srcline.h"
+#include "symbol.h"
#include "thread.h"
#include "vdso.h"
-#include "build-id.h"
-#include "debug.h"
-#include "machine.h"
-#include <linux/string.h>
-#include <linux/zalloc.h>
-#include "srcline.h"
-#include "namespaces.h"
-#include "unwind.h"
-#include "srccode.h"
-#include "ui/ui.h"
-
-static void __maps__insert(struct maps *maps, struct map *map);
static inline int is_android_lib(const char *filename)
{
@@ -138,7 +127,7 @@ struct map *map__new(struct machine *machine, u64 start, u64 len,
if (map != NULL) {
char newfilename[PATH_MAX];
- struct dso *dso;
+ struct dso *dso, *header_bid_dso;
int anon, no_dso, vdso, android;
android = is_android_lib(filename);
@@ -151,7 +140,7 @@ struct map *map__new(struct machine *machine, u64 start, u64 len,
if ((anon || no_dso) && nsi && (prot & PROT_EXEC)) {
snprintf(newfilename, sizeof(newfilename),
- "/tmp/perf-%d.map", nsi->pid);
+ "/tmp/perf-%d.map", nsinfo__pid(nsi));
filename = newfilename;
}
@@ -168,7 +157,7 @@ struct map *map__new(struct machine *machine, u64 start, u64 len,
nnsi = nsinfo__copy(nsi);
if (nnsi) {
nsinfo__put(nsi);
- nnsi->need_setns = false;
+ nsinfo__clear_need_setns(nnsi);
nsi = nnsi;
}
pgoff = 0;
@@ -194,9 +183,23 @@ struct map *map__new(struct machine *machine, u64 start, u64 len,
}
dso->nsinfo = nsi;
- if (build_id__is_defined(bid))
+ if (build_id__is_defined(bid)) {
dso__set_build_id(dso, bid);
-
+ } else {
+ /*
+ * If the mmap event had no build ID, search for an existing dso from the
+ * build ID header by name. Otherwise only the dso loaded at the time of
+ * reading the header will have the build ID set and all future mmaps will
+ * have it missing.
+ */
+ down_read(&machine->dsos.lock);
+ header_bid_dso = __dsos__find(&machine->dsos, filename, false);
+ up_read(&machine->dsos.lock);
+ if (header_bid_dso && header_bid_dso->header_build_id) {
+ dso__set_build_id(dso, &header_bid_dso->bid);
+ dso->header_build_id = 1;
+ }
+ }
dso__put(dso);
}
return map;
@@ -524,403 +527,13 @@ u64 map__objdump_2mem(struct map *map, u64 ip)
return ip + map->reloc;
}
-void maps__init(struct maps *maps, struct machine *machine)
-{
- maps->entries = RB_ROOT;
- init_rwsem(&maps->lock);
- maps->machine = machine;
- maps->last_search_by_name = NULL;
- maps->nr_maps = 0;
- maps->maps_by_name = NULL;
- refcount_set(&maps->refcnt, 1);
-}
-
-static void __maps__free_maps_by_name(struct maps *maps)
-{
- /*
- * Free everything to try to do it from the rbtree in the next search
- */
- zfree(&maps->maps_by_name);
- maps->nr_maps_allocated = 0;
-}
-
-void maps__insert(struct maps *maps, struct map *map)
-{
- down_write(&maps->lock);
- __maps__insert(maps, map);
- ++maps->nr_maps;
-
- if (map->dso && map->dso->kernel) {
- struct kmap *kmap = map__kmap(map);
-
- if (kmap)
- kmap->kmaps = maps;
- else
- pr_err("Internal error: kernel dso with non kernel map\n");
- }
-
-
- /*
- * If we already performed some search by name, then we need to add the just
- * inserted map and resort.
- */
- if (maps->maps_by_name) {
- if (maps->nr_maps > maps->nr_maps_allocated) {
- int nr_allocate = maps->nr_maps * 2;
- struct map **maps_by_name = realloc(maps->maps_by_name, nr_allocate * sizeof(map));
-
- if (maps_by_name == NULL) {
- __maps__free_maps_by_name(maps);
- up_write(&maps->lock);
- return;
- }
-
- maps->maps_by_name = maps_by_name;
- maps->nr_maps_allocated = nr_allocate;
- }
- maps->maps_by_name[maps->nr_maps - 1] = map;
- __maps__sort_by_name(maps);
- }
- up_write(&maps->lock);
-}
-
-static void __maps__remove(struct maps *maps, struct map *map)
-{
- rb_erase_init(&map->rb_node, &maps->entries);
- map__put(map);
-}
-
-void maps__remove(struct maps *maps, struct map *map)
-{
- down_write(&maps->lock);
- if (maps->last_search_by_name == map)
- maps->last_search_by_name = NULL;
-
- __maps__remove(maps, map);
- --maps->nr_maps;
- if (maps->maps_by_name)
- __maps__free_maps_by_name(maps);
- up_write(&maps->lock);
-}
-
-static void __maps__purge(struct maps *maps)
-{
- struct map *pos, *next;
-
- maps__for_each_entry_safe(maps, pos, next) {
- rb_erase_init(&pos->rb_node, &maps->entries);
- map__put(pos);
- }
-}
-
-void maps__exit(struct maps *maps)
-{
- down_write(&maps->lock);
- __maps__purge(maps);
- up_write(&maps->lock);
-}
-
-bool maps__empty(struct maps *maps)
-{
- return !maps__first(maps);
-}
-
-struct maps *maps__new(struct machine *machine)
-{
- struct maps *maps = zalloc(sizeof(*maps));
-
- if (maps != NULL)
- maps__init(maps, machine);
-
- return maps;
-}
-
-void maps__delete(struct maps *maps)
-{
- maps__exit(maps);
- unwind__finish_access(maps);
- free(maps);
-}
-
-void maps__put(struct maps *maps)
-{
- if (maps && refcount_dec_and_test(&maps->refcnt))
- maps__delete(maps);
-}
-
-struct symbol *maps__find_symbol(struct maps *maps, u64 addr, struct map **mapp)
-{
- struct map *map = maps__find(maps, addr);
-
- /* Ensure map is loaded before using map->map_ip */
- if (map != NULL && map__load(map) >= 0) {
- if (mapp != NULL)
- *mapp = map;
- return map__find_symbol(map, map->map_ip(map, addr));
- }
-
- return NULL;
-}
-
-static bool map__contains_symbol(struct map *map, struct symbol *sym)
+bool map__contains_symbol(const struct map *map, const struct symbol *sym)
{
u64 ip = map->unmap_ip(map, sym->start);
return ip >= map->start && ip < map->end;
}
-struct symbol *maps__find_symbol_by_name(struct maps *maps, const char *name, struct map **mapp)
-{
- struct symbol *sym;
- struct map *pos;
-
- down_read(&maps->lock);
-
- maps__for_each_entry(maps, pos) {
- sym = map__find_symbol_by_name(pos, name);
-
- if (sym == NULL)
- continue;
- if (!map__contains_symbol(pos, sym)) {
- sym = NULL;
- continue;
- }
- if (mapp != NULL)
- *mapp = pos;
- goto out;
- }
-
- sym = NULL;
-out:
- up_read(&maps->lock);
- return sym;
-}
-
-int maps__find_ams(struct maps *maps, struct addr_map_symbol *ams)
-{
- if (ams->addr < ams->ms.map->start || ams->addr >= ams->ms.map->end) {
- if (maps == NULL)
- return -1;
- ams->ms.map = maps__find(maps, ams->addr);
- if (ams->ms.map == NULL)
- return -1;
- }
-
- ams->al_addr = ams->ms.map->map_ip(ams->ms.map, ams->addr);
- ams->ms.sym = map__find_symbol(ams->ms.map, ams->al_addr);
-
- return ams->ms.sym ? 0 : -1;
-}
-
-size_t maps__fprintf(struct maps *maps, FILE *fp)
-{
- size_t printed = 0;
- struct map *pos;
-
- down_read(&maps->lock);
-
- maps__for_each_entry(maps, pos) {
- printed += fprintf(fp, "Map:");
- printed += map__fprintf(pos, fp);
- if (verbose > 2) {
- printed += dso__fprintf(pos->dso, fp);
- printed += fprintf(fp, "--\n");
- }
- }
-
- up_read(&maps->lock);
-
- return printed;
-}
-
-int maps__fixup_overlappings(struct maps *maps, struct map *map, FILE *fp)
-{
- struct rb_root *root;
- struct rb_node *next, *first;
- int err = 0;
-
- down_write(&maps->lock);
-
- root = &maps->entries;
-
- /*
- * Find first map where end > map->start.
- * Same as find_vma() in kernel.
- */
- next = root->rb_node;
- first = NULL;
- while (next) {
- struct map *pos = rb_entry(next, struct map, rb_node);
-
- if (pos->end > map->start) {
- first = next;
- if (pos->start <= map->start)
- break;
- next = next->rb_left;
- } else
- next = next->rb_right;
- }
-
- next = first;
- while (next) {
- struct map *pos = rb_entry(next, struct map, rb_node);
- next = rb_next(&pos->rb_node);
-
- /*
- * Stop if current map starts after map->end.
- * Maps are ordered by start: next will not overlap for sure.
- */
- if (pos->start >= map->end)
- break;
-
- if (verbose >= 2) {
-
- if (use_browser) {
- pr_debug("overlapping maps in %s (disable tui for more info)\n",
- map->dso->name);
- } else {
- fputs("overlapping maps:\n", fp);
- map__fprintf(map, fp);
- map__fprintf(pos, fp);
- }
- }
-
- rb_erase_init(&pos->rb_node, root);
- /*
- * Now check if we need to create new maps for areas not
- * overlapped by the new map:
- */
- if (map->start > pos->start) {
- struct map *before = map__clone(pos);
-
- if (before == NULL) {
- err = -ENOMEM;
- goto put_map;
- }
-
- before->end = map->start;
- __maps__insert(maps, before);
- if (verbose >= 2 && !use_browser)
- map__fprintf(before, fp);
- map__put(before);
- }
-
- if (map->end < pos->end) {
- struct map *after = map__clone(pos);
-
- if (after == NULL) {
- err = -ENOMEM;
- goto put_map;
- }
-
- after->start = map->end;
- after->pgoff += map->end - pos->start;
- assert(pos->map_ip(pos, map->end) == after->map_ip(after, map->end));
- __maps__insert(maps, after);
- if (verbose >= 2 && !use_browser)
- map__fprintf(after, fp);
- map__put(after);
- }
-put_map:
- map__put(pos);
-
- if (err)
- goto out;
- }
-
- err = 0;
-out:
- up_write(&maps->lock);
- return err;
-}
-
-/*
- * XXX This should not really _copy_ te maps, but refcount them.
- */
-int maps__clone(struct thread *thread, struct maps *parent)
-{
- struct maps *maps = thread->maps;
- int err;
- struct map *map;
-
- down_read(&parent->lock);
-
- maps__for_each_entry(parent, map) {
- struct map *new = map__clone(map);
-
- if (new == NULL) {
- err = -ENOMEM;
- goto out_unlock;
- }
-
- err = unwind__prepare_access(maps, new, NULL);
- if (err)
- goto out_unlock;
-
- maps__insert(maps, new);
- map__put(new);
- }
-
- err = 0;
-out_unlock:
- up_read(&parent->lock);
- return err;
-}
-
-static void __maps__insert(struct maps *maps, struct map *map)
-{
- struct rb_node **p = &maps->entries.rb_node;
- struct rb_node *parent = NULL;
- const u64 ip = map->start;
- struct map *m;
-
- while (*p != NULL) {
- parent = *p;
- m = rb_entry(parent, struct map, rb_node);
- if (ip < m->start)
- p = &(*p)->rb_left;
- else
- p = &(*p)->rb_right;
- }
-
- rb_link_node(&map->rb_node, parent, p);
- rb_insert_color(&map->rb_node, &maps->entries);
- map__get(map);
-}
-
-struct map *maps__find(struct maps *maps, u64 ip)
-{
- struct rb_node *p;
- struct map *m;
-
- down_read(&maps->lock);
-
- p = maps->entries.rb_node;
- while (p != NULL) {
- m = rb_entry(p, struct map, rb_node);
- if (ip < m->start)
- p = p->rb_left;
- else if (ip >= m->end)
- p = p->rb_right;
- else
- goto out;
- }
-
- m = NULL;
-out:
- up_read(&maps->lock);
- return m;
-}
-
-struct map *maps__first(struct maps *maps)
-{
- struct rb_node *first = rb_first(&maps->entries);
-
- if (first)
- return rb_entry(first, struct map, rb_node);
- return NULL;
-}
-
static struct map *__map__next(struct map *map)
{
struct rb_node *next = rb_next(&map->rb_node);
@@ -961,3 +574,18 @@ struct maps *map__kmaps(struct map *map)
}
return kmap->kmaps;
}
+
+u64 map__map_ip(const struct map *map, u64 ip)
+{
+ return ip - map->start + map->pgoff;
+}
+
+u64 map__unmap_ip(const struct map *map, u64 ip)
+{
+ return ip + map->start - map->pgoff;
+}
+
+u64 identity__map_ip(const struct map *map __maybe_unused, u64 ip)
+{
+ return ip;
+}
diff --git a/tools/perf/util/map.h b/tools/perf/util/map.h
index d32f5b28c1fb..3dcfe06db6b3 100644
--- a/tools/perf/util/map.h
+++ b/tools/perf/util/map.h
@@ -29,9 +29,9 @@ struct map {
u64 reloc;
/* ip -> dso rip */
- u64 (*map_ip)(struct map *, u64);
+ u64 (*map_ip)(const struct map *, u64);
/* dso rip -> ip */
- u64 (*unmap_ip)(struct map *, u64);
+ u64 (*unmap_ip)(const struct map *, u64);
struct dso *dso;
refcount_t refcnt;
@@ -44,20 +44,12 @@ struct kmap *__map__kmap(struct map *map);
struct kmap *map__kmap(struct map *map);
struct maps *map__kmaps(struct map *map);
-static inline u64 map__map_ip(struct map *map, u64 ip)
-{
- return ip - map->start + map->pgoff;
-}
-
-static inline u64 map__unmap_ip(struct map *map, u64 ip)
-{
- return ip + map->start - map->pgoff;
-}
-
-static inline u64 identity__map_ip(struct map *map __maybe_unused, u64 ip)
-{
- return ip;
-}
+/* ip -> dso rip */
+u64 map__map_ip(const struct map *map, u64 ip);
+/* dso rip -> ip */
+u64 map__unmap_ip(const struct map *map, u64 ip);
+/* Returns ip */
+u64 identity__map_ip(const struct map *map __maybe_unused, u64 ip);
static inline size_t map__size(const struct map *map)
{
@@ -160,6 +152,8 @@ static inline bool __map__is_kmodule(const struct map *map)
bool map__has_symbols(const struct map *map);
+bool map__contains_symbol(const struct map *map, const struct symbol *sym);
+
#define ENTRY_TRAMPOLINE_NAME "__entry_SYSCALL_64_trampoline"
static inline bool is_entry_trampoline(const char *name)
diff --git a/tools/perf/util/map_symbol.h b/tools/perf/util/map_symbol.h
index 7d22ade082c8..e08817b0c30f 100644
--- a/tools/perf/util/map_symbol.h
+++ b/tools/perf/util/map_symbol.h
@@ -18,6 +18,7 @@ struct addr_map_symbol {
struct map_symbol ms;
u64 addr;
u64 al_addr;
+ char al_level;
u64 phys_addr;
u64 data_page_size;
};
diff --git a/tools/perf/util/maps.c b/tools/perf/util/maps.c
new file mode 100644
index 000000000000..37bd5b40000d
--- /dev/null
+++ b/tools/perf/util/maps.c
@@ -0,0 +1,403 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <errno.h>
+#include <stdlib.h>
+#include <linux/zalloc.h>
+#include "debug.h"
+#include "dso.h"
+#include "map.h"
+#include "maps.h"
+#include "thread.h"
+#include "ui/ui.h"
+#include "unwind.h"
+
+static void __maps__insert(struct maps *maps, struct map *map);
+
+static void maps__init(struct maps *maps, struct machine *machine)
+{
+ maps->entries = RB_ROOT;
+ init_rwsem(&maps->lock);
+ maps->machine = machine;
+ maps->last_search_by_name = NULL;
+ maps->nr_maps = 0;
+ maps->maps_by_name = NULL;
+ refcount_set(&maps->refcnt, 1);
+}
+
+static void __maps__free_maps_by_name(struct maps *maps)
+{
+ /*
+ * Free everything to try to do it from the rbtree in the next search
+ */
+ zfree(&maps->maps_by_name);
+ maps->nr_maps_allocated = 0;
+}
+
+void maps__insert(struct maps *maps, struct map *map)
+{
+ down_write(&maps->lock);
+ __maps__insert(maps, map);
+ ++maps->nr_maps;
+
+ if (map->dso && map->dso->kernel) {
+ struct kmap *kmap = map__kmap(map);
+
+ if (kmap)
+ kmap->kmaps = maps;
+ else
+ pr_err("Internal error: kernel dso with non kernel map\n");
+ }
+
+
+ /*
+ * If we already performed some search by name, then we need to add the just
+ * inserted map and resort.
+ */
+ if (maps->maps_by_name) {
+ if (maps->nr_maps > maps->nr_maps_allocated) {
+ int nr_allocate = maps->nr_maps * 2;
+ struct map **maps_by_name = realloc(maps->maps_by_name, nr_allocate * sizeof(map));
+
+ if (maps_by_name == NULL) {
+ __maps__free_maps_by_name(maps);
+ up_write(&maps->lock);
+ return;
+ }
+
+ maps->maps_by_name = maps_by_name;
+ maps->nr_maps_allocated = nr_allocate;
+ }
+ maps->maps_by_name[maps->nr_maps - 1] = map;
+ __maps__sort_by_name(maps);
+ }
+ up_write(&maps->lock);
+}
+
+static void __maps__remove(struct maps *maps, struct map *map)
+{
+ rb_erase_init(&map->rb_node, &maps->entries);
+ map__put(map);
+}
+
+void maps__remove(struct maps *maps, struct map *map)
+{
+ down_write(&maps->lock);
+ if (maps->last_search_by_name == map)
+ maps->last_search_by_name = NULL;
+
+ __maps__remove(maps, map);
+ --maps->nr_maps;
+ if (maps->maps_by_name)
+ __maps__free_maps_by_name(maps);
+ up_write(&maps->lock);
+}
+
+static void __maps__purge(struct maps *maps)
+{
+ struct map *pos, *next;
+
+ maps__for_each_entry_safe(maps, pos, next) {
+ rb_erase_init(&pos->rb_node, &maps->entries);
+ map__put(pos);
+ }
+}
+
+static void maps__exit(struct maps *maps)
+{
+ down_write(&maps->lock);
+ __maps__purge(maps);
+ up_write(&maps->lock);
+}
+
+bool maps__empty(struct maps *maps)
+{
+ return !maps__first(maps);
+}
+
+struct maps *maps__new(struct machine *machine)
+{
+ struct maps *maps = zalloc(sizeof(*maps));
+
+ if (maps != NULL)
+ maps__init(maps, machine);
+
+ return maps;
+}
+
+void maps__delete(struct maps *maps)
+{
+ maps__exit(maps);
+ unwind__finish_access(maps);
+ free(maps);
+}
+
+void maps__put(struct maps *maps)
+{
+ if (maps && refcount_dec_and_test(&maps->refcnt))
+ maps__delete(maps);
+}
+
+struct symbol *maps__find_symbol(struct maps *maps, u64 addr, struct map **mapp)
+{
+ struct map *map = maps__find(maps, addr);
+
+ /* Ensure map is loaded before using map->map_ip */
+ if (map != NULL && map__load(map) >= 0) {
+ if (mapp != NULL)
+ *mapp = map;
+ return map__find_symbol(map, map->map_ip(map, addr));
+ }
+
+ return NULL;
+}
+
+struct symbol *maps__find_symbol_by_name(struct maps *maps, const char *name, struct map **mapp)
+{
+ struct symbol *sym;
+ struct map *pos;
+
+ down_read(&maps->lock);
+
+ maps__for_each_entry(maps, pos) {
+ sym = map__find_symbol_by_name(pos, name);
+
+ if (sym == NULL)
+ continue;
+ if (!map__contains_symbol(pos, sym)) {
+ sym = NULL;
+ continue;
+ }
+ if (mapp != NULL)
+ *mapp = pos;
+ goto out;
+ }
+
+ sym = NULL;
+out:
+ up_read(&maps->lock);
+ return sym;
+}
+
+int maps__find_ams(struct maps *maps, struct addr_map_symbol *ams)
+{
+ if (ams->addr < ams->ms.map->start || ams->addr >= ams->ms.map->end) {
+ if (maps == NULL)
+ return -1;
+ ams->ms.map = maps__find(maps, ams->addr);
+ if (ams->ms.map == NULL)
+ return -1;
+ }
+
+ ams->al_addr = ams->ms.map->map_ip(ams->ms.map, ams->addr);
+ ams->ms.sym = map__find_symbol(ams->ms.map, ams->al_addr);
+
+ return ams->ms.sym ? 0 : -1;
+}
+
+size_t maps__fprintf(struct maps *maps, FILE *fp)
+{
+ size_t printed = 0;
+ struct map *pos;
+
+ down_read(&maps->lock);
+
+ maps__for_each_entry(maps, pos) {
+ printed += fprintf(fp, "Map:");
+ printed += map__fprintf(pos, fp);
+ if (verbose > 2) {
+ printed += dso__fprintf(pos->dso, fp);
+ printed += fprintf(fp, "--\n");
+ }
+ }
+
+ up_read(&maps->lock);
+
+ return printed;
+}
+
+int maps__fixup_overlappings(struct maps *maps, struct map *map, FILE *fp)
+{
+ struct rb_root *root;
+ struct rb_node *next, *first;
+ int err = 0;
+
+ down_write(&maps->lock);
+
+ root = &maps->entries;
+
+ /*
+ * Find first map where end > map->start.
+ * Same as find_vma() in kernel.
+ */
+ next = root->rb_node;
+ first = NULL;
+ while (next) {
+ struct map *pos = rb_entry(next, struct map, rb_node);
+
+ if (pos->end > map->start) {
+ first = next;
+ if (pos->start <= map->start)
+ break;
+ next = next->rb_left;
+ } else
+ next = next->rb_right;
+ }
+
+ next = first;
+ while (next) {
+ struct map *pos = rb_entry(next, struct map, rb_node);
+ next = rb_next(&pos->rb_node);
+
+ /*
+ * Stop if current map starts after map->end.
+ * Maps are ordered by start: next will not overlap for sure.
+ */
+ if (pos->start >= map->end)
+ break;
+
+ if (verbose >= 2) {
+
+ if (use_browser) {
+ pr_debug("overlapping maps in %s (disable tui for more info)\n",
+ map->dso->name);
+ } else {
+ fputs("overlapping maps:\n", fp);
+ map__fprintf(map, fp);
+ map__fprintf(pos, fp);
+ }
+ }
+
+ rb_erase_init(&pos->rb_node, root);
+ /*
+ * Now check if we need to create new maps for areas not
+ * overlapped by the new map:
+ */
+ if (map->start > pos->start) {
+ struct map *before = map__clone(pos);
+
+ if (before == NULL) {
+ err = -ENOMEM;
+ goto put_map;
+ }
+
+ before->end = map->start;
+ __maps__insert(maps, before);
+ if (verbose >= 2 && !use_browser)
+ map__fprintf(before, fp);
+ map__put(before);
+ }
+
+ if (map->end < pos->end) {
+ struct map *after = map__clone(pos);
+
+ if (after == NULL) {
+ err = -ENOMEM;
+ goto put_map;
+ }
+
+ after->start = map->end;
+ after->pgoff += map->end - pos->start;
+ assert(pos->map_ip(pos, map->end) == after->map_ip(after, map->end));
+ __maps__insert(maps, after);
+ if (verbose >= 2 && !use_browser)
+ map__fprintf(after, fp);
+ map__put(after);
+ }
+put_map:
+ map__put(pos);
+
+ if (err)
+ goto out;
+ }
+
+ err = 0;
+out:
+ up_write(&maps->lock);
+ return err;
+}
+
+/*
+ * XXX This should not really _copy_ te maps, but refcount them.
+ */
+int maps__clone(struct thread *thread, struct maps *parent)
+{
+ struct maps *maps = thread->maps;
+ int err;
+ struct map *map;
+
+ down_read(&parent->lock);
+
+ maps__for_each_entry(parent, map) {
+ struct map *new = map__clone(map);
+
+ if (new == NULL) {
+ err = -ENOMEM;
+ goto out_unlock;
+ }
+
+ err = unwind__prepare_access(maps, new, NULL);
+ if (err)
+ goto out_unlock;
+
+ maps__insert(maps, new);
+ map__put(new);
+ }
+
+ err = 0;
+out_unlock:
+ up_read(&parent->lock);
+ return err;
+}
+
+static void __maps__insert(struct maps *maps, struct map *map)
+{
+ struct rb_node **p = &maps->entries.rb_node;
+ struct rb_node *parent = NULL;
+ const u64 ip = map->start;
+ struct map *m;
+
+ while (*p != NULL) {
+ parent = *p;
+ m = rb_entry(parent, struct map, rb_node);
+ if (ip < m->start)
+ p = &(*p)->rb_left;
+ else
+ p = &(*p)->rb_right;
+ }
+
+ rb_link_node(&map->rb_node, parent, p);
+ rb_insert_color(&map->rb_node, &maps->entries);
+ map__get(map);
+}
+
+struct map *maps__find(struct maps *maps, u64 ip)
+{
+ struct rb_node *p;
+ struct map *m;
+
+ down_read(&maps->lock);
+
+ p = maps->entries.rb_node;
+ while (p != NULL) {
+ m = rb_entry(p, struct map, rb_node);
+ if (ip < m->start)
+ p = p->rb_left;
+ else if (ip >= m->end)
+ p = p->rb_right;
+ else
+ goto out;
+ }
+
+ m = NULL;
+out:
+ up_read(&maps->lock);
+ return m;
+}
+
+struct map *maps__first(struct maps *maps)
+{
+ struct rb_node *first = rb_first(&maps->entries);
+
+ if (first)
+ return rb_entry(first, struct map, rb_node);
+ return NULL;
+}
diff --git a/tools/perf/util/maps.h b/tools/perf/util/maps.h
index 3dd000ddf925..7e729ff42749 100644
--- a/tools/perf/util/maps.h
+++ b/tools/perf/util/maps.h
@@ -60,8 +60,6 @@ static inline struct maps *maps__get(struct maps *maps)
}
void maps__put(struct maps *maps);
-void maps__init(struct maps *maps, struct machine *machine);
-void maps__exit(struct maps *maps);
int maps__clone(struct thread *thread, struct maps *parent);
size_t maps__fprintf(struct maps *maps, FILE *fp);
diff --git a/tools/perf/util/mem-events.c b/tools/perf/util/mem-events.c
index 3167b4628b6d..c3c21a9c350b 100644
--- a/tools/perf/util/mem-events.c
+++ b/tools/perf/util/mem-events.c
@@ -309,14 +309,41 @@ static const char * const mem_hops[] = {
* to be set with mem_hops field.
*/
"core, same node",
+ "node, same socket",
+ "socket, same board",
+ "board",
};
+static int perf_mem__op_scnprintf(char *out, size_t sz, struct mem_info *mem_info)
+{
+ u64 op = PERF_MEM_LOCK_NA;
+ int l;
+
+ if (mem_info)
+ op = mem_info->data_src.mem_op;
+
+ if (op & PERF_MEM_OP_NA)
+ l = scnprintf(out, sz, "N/A");
+ else if (op & PERF_MEM_OP_LOAD)
+ l = scnprintf(out, sz, "LOAD");
+ else if (op & PERF_MEM_OP_STORE)
+ l = scnprintf(out, sz, "STORE");
+ else if (op & PERF_MEM_OP_PFETCH)
+ l = scnprintf(out, sz, "PFETCH");
+ else if (op & PERF_MEM_OP_EXEC)
+ l = scnprintf(out, sz, "EXEC");
+ else
+ l = scnprintf(out, sz, "No");
+
+ return l;
+}
+
int perf_mem__lvl_scnprintf(char *out, size_t sz, struct mem_info *mem_info)
{
size_t i, l = 0;
u64 m = PERF_MEM_LVL_NA;
u64 hit, miss;
- int printed;
+ int printed = 0;
if (mem_info)
m = mem_info->data_src.mem_lvl;
@@ -335,18 +362,22 @@ int perf_mem__lvl_scnprintf(char *out, size_t sz, struct mem_info *mem_info)
l += 7;
}
- if (mem_info && mem_info->data_src.mem_hops)
+ /*
+ * Incase mem_hops field is set, we can skip printing data source via
+ * PERF_MEM_LVL namespace.
+ */
+ if (mem_info && mem_info->data_src.mem_hops) {
l += scnprintf(out + l, sz - l, "%s ", mem_hops[mem_info->data_src.mem_hops]);
-
- printed = 0;
- for (i = 0; m && i < ARRAY_SIZE(mem_lvl); i++, m >>= 1) {
- if (!(m & 0x1))
- continue;
- if (printed++) {
- strcat(out, " or ");
- l += 4;
+ } else {
+ for (i = 0; m && i < ARRAY_SIZE(mem_lvl); i++, m >>= 1) {
+ if (!(m & 0x1))
+ continue;
+ if (printed++) {
+ strcat(out, " or ");
+ l += 4;
+ }
+ l += scnprintf(out + l, sz - l, mem_lvl[i]);
}
- l += scnprintf(out + l, sz - l, mem_lvl[i]);
}
if (mem_info && mem_info->data_src.mem_lvl_num) {
@@ -459,7 +490,10 @@ int perf_script__meminfo_scnprintf(char *out, size_t sz, struct mem_info *mem_in
{
int i = 0;
- i += perf_mem__lvl_scnprintf(out, sz, mem_info);
+ i += scnprintf(out, sz, "|OP ");
+ i += perf_mem__op_scnprintf(out + i, sz - i, mem_info);
+ i += scnprintf(out + i, sz - i, "|LVL ");
+ i += perf_mem__lvl_scnprintf(out + i, sz, mem_info);
i += scnprintf(out + i, sz - i, "|SNP ");
i += perf_mem__snp_scnprintf(out + i, sz - i, mem_info);
i += scnprintf(out + i, sz - i, "|TLB ");
@@ -575,6 +609,8 @@ do { \
}
if (lvl & P(LVL, MISS))
if (lvl & P(LVL, L1)) stats->st_l1miss++;
+ if (lvl & P(LVL, NA))
+ stats->st_na++;
} else {
/* unparsable data_src? */
stats->noparse++;
@@ -601,6 +637,7 @@ void c2c_add_stats(struct c2c_stats *stats, struct c2c_stats *add)
stats->st_noadrs += add->st_noadrs;
stats->st_l1hit += add->st_l1hit;
stats->st_l1miss += add->st_l1miss;
+ stats->st_na += add->st_na;
stats->load += add->load;
stats->ld_excl += add->ld_excl;
stats->ld_shared += add->ld_shared;
diff --git a/tools/perf/util/mem-events.h b/tools/perf/util/mem-events.h
index 916242f8020a..8a8b568baeee 100644
--- a/tools/perf/util/mem-events.h
+++ b/tools/perf/util/mem-events.h
@@ -63,6 +63,7 @@ struct c2c_stats {
u32 st_noadrs; /* cacheable store with no address */
u32 st_l1hit; /* count of stores that hit L1D */
u32 st_l1miss; /* count of stores that miss L1D */
+ u32 st_na; /* count of stores with memory level is not available */
u32 load; /* count of all loads in trace */
u32 ld_excl; /* exclusive loads, rmt/lcl DRAM - snp none/miss */
u32 ld_shared; /* shared loads, rmt/lcl DRAM - snp hit */
diff --git a/tools/perf/util/metricgroup.c b/tools/perf/util/metricgroup.c
index fffe02aae3ed..8f7baeabc5cf 100644
--- a/tools/perf/util/metricgroup.c
+++ b/tools/perf/util/metricgroup.c
@@ -209,8 +209,8 @@ static struct metric *metric__new(const struct pmu_event *pe,
m->metric_name = pe->metric_name;
m->modifier = modifier ? strdup(modifier) : NULL;
if (modifier && !m->modifier) {
- free(m);
expr__ctx_free(m->pctx);
+ free(m);
return NULL;
}
m->metric_expr = pe->metric_expr;
@@ -314,7 +314,7 @@ static int setup_metric_events(struct hashmap *ids,
*/
metric_id = evsel__metric_id(ev);
evlist__for_each_entry_continue(metric_evlist, ev) {
- if (!strcmp(evsel__metric_id(metric_events[i]), metric_id))
+ if (!strcmp(evsel__metric_id(ev), metric_id))
ev->metric_leader = metric_events[i];
}
}
@@ -728,22 +728,23 @@ static int metricgroup__build_event_string(struct strbuf *events,
{
struct hashmap_entry *cur;
size_t bkt;
- bool no_group = true, has_duration = false;
+ bool no_group = true, has_tool_events = false;
+ bool tool_events[PERF_TOOL_MAX] = {false};
int ret = 0;
#define RETURN_IF_NON_ZERO(x) do { if (x) return x; } while (0)
hashmap__for_each_entry(ctx->ids, cur, bkt) {
const char *sep, *rsep, *id = cur->key;
+ enum perf_tool_event ev;
pr_debug("found event %s\n", id);
- /*
- * Duration time maps to a software event and can make
- * groups not count. Always use it outside a
- * group.
- */
- if (!strcmp(id, "duration_time")) {
- has_duration = true;
+
+ /* Always move tool events outside of the group. */
+ ev = perf_tool_event__from_str(id);
+ if (ev != PERF_TOOL_NONE) {
+ has_tool_events = true;
+ tool_events[ev] = true;
continue;
}
/* Separate events with commas and open the group if necessary. */
@@ -802,16 +803,25 @@ static int metricgroup__build_event_string(struct strbuf *events,
RETURN_IF_NON_ZERO(ret);
}
}
- if (has_duration) {
- if (no_group) {
- /* Strange case of a metric of just duration_time. */
- ret = strbuf_addf(events, "duration_time");
- } else if (!has_constraint)
- ret = strbuf_addf(events, "}:W,duration_time");
- else
- ret = strbuf_addf(events, ",duration_time");
- } else if (!no_group && !has_constraint)
+ if (!no_group && !has_constraint) {
ret = strbuf_addf(events, "}:W");
+ RETURN_IF_NON_ZERO(ret);
+ }
+ if (has_tool_events) {
+ int i;
+
+ perf_tool_event__for_each_event(i) {
+ if (tool_events[i]) {
+ if (!no_group) {
+ ret = strbuf_addch(events, ',');
+ RETURN_IF_NON_ZERO(ret);
+ }
+ no_group = false;
+ ret = strbuf_addstr(events, perf_tool_event__to_str(i));
+ RETURN_IF_NON_ZERO(ret);
+ }
+ }
+ }
return ret;
#undef RETURN_IF_NON_ZERO
@@ -1115,13 +1125,31 @@ out:
return ret;
}
+/**
+ * metric_list_cmp - list_sort comparator that sorts metrics with more events to
+ * the front. tool events are excluded from the count.
+ */
static int metric_list_cmp(void *priv __maybe_unused, const struct list_head *l,
const struct list_head *r)
{
const struct metric *left = container_of(l, struct metric, nd);
const struct metric *right = container_of(r, struct metric, nd);
+ struct expr_id_data *data;
+ int i, left_count, right_count;
- return hashmap__size(right->pctx->ids) - hashmap__size(left->pctx->ids);
+ left_count = hashmap__size(left->pctx->ids);
+ perf_tool_event__for_each_event(i) {
+ if (!expr__get_id(left->pctx, perf_tool_event__to_str(i), &data))
+ left_count--;
+ }
+
+ right_count = hashmap__size(right->pctx->ids);
+ perf_tool_event__for_each_event(i) {
+ if (!expr__get_id(right->pctx, perf_tool_event__to_str(i), &data))
+ right_count--;
+ }
+
+ return right_count - left_count;
}
/**
@@ -1256,6 +1284,30 @@ static void metricgroup__free_metrics(struct list_head *metric_list)
}
/**
+ * find_tool_events - Search for the pressence of tool events in metric_list.
+ * @metric_list: List to take metrics from.
+ * @tool_events: Array of false values, indices corresponding to tool events set
+ * to true if tool event is found.
+ */
+static void find_tool_events(const struct list_head *metric_list,
+ bool tool_events[PERF_TOOL_MAX])
+{
+ struct metric *m;
+
+ list_for_each_entry(m, metric_list, nd) {
+ int i;
+
+ perf_tool_event__for_each_event(i) {
+ struct expr_id_data *data;
+
+ if (!tool_events[i] &&
+ !expr__get_id(m->pctx, perf_tool_event__to_str(i), &data))
+ tool_events[i] = true;
+ }
+ }
+}
+
+/**
* build_combined_expr_ctx - Make an expr_parse_ctx with all has_constraint
* metric IDs, as the IDs are held in a set,
* duplicates will be removed.
@@ -1299,14 +1351,19 @@ err_out:
/**
* parse_ids - Build the event string for the ids and parse them creating an
* evlist. The encoded metric_ids are decoded.
+ * @metric_no_merge: is metric sharing explicitly disabled.
* @fake_pmu: used when testing metrics not supported by the current CPU.
* @ids: the event identifiers parsed from a metric.
* @modifier: any modifiers added to the events.
* @has_constraint: false if events should be placed in a weak group.
+ * @tool_events: entries set true if the tool event of index could be present in
+ * the overall list of metrics.
* @out_evlist: the created list of events.
*/
-static int parse_ids(struct perf_pmu *fake_pmu, struct expr_parse_ctx *ids,
- const char *modifier, bool has_constraint, struct evlist **out_evlist)
+static int parse_ids(bool metric_no_merge, struct perf_pmu *fake_pmu,
+ struct expr_parse_ctx *ids, const char *modifier,
+ bool has_constraint, const bool tool_events[PERF_TOOL_MAX],
+ struct evlist **out_evlist)
{
struct parse_events_error parse_error;
struct evlist *parsed_evlist;
@@ -1314,20 +1371,39 @@ static int parse_ids(struct perf_pmu *fake_pmu, struct expr_parse_ctx *ids,
int ret;
*out_evlist = NULL;
- if (hashmap__size(ids->ids) == 0) {
- char *tmp;
+ if (!metric_no_merge || hashmap__size(ids->ids) == 0) {
+ bool added_event = false;
+ int i;
/*
- * No ids/events in the expression parsing context. Events may
- * have been removed because of constant evaluation, e.g.:
- * event1 if #smt_on else 0
- * Add a duration_time event to avoid a parse error on an empty
- * string.
+ * We may fail to share events between metrics because a tool
+ * event isn't present in one metric. For example, a ratio of
+ * cache misses doesn't need duration_time but the same events
+ * may be used for a misses per second. Events without sharing
+ * implies multiplexing, that is best avoided, so place
+ * all tool events in every group.
+ *
+ * Also, there may be no ids/events in the expression parsing
+ * context because of constant evaluation, e.g.:
+ * event1 if #smt_on else 0
+ * Add a tool event to avoid a parse error on an empty string.
*/
- tmp = strdup("duration_time");
- if (!tmp)
- return -ENOMEM;
+ perf_tool_event__for_each_event(i) {
+ if (tool_events[i]) {
+ char *tmp = strdup(perf_tool_event__to_str(i));
+
+ if (!tmp)
+ return -ENOMEM;
+ ids__insert(ids->ids, tmp);
+ added_event = true;
+ }
+ }
+ if (!added_event && hashmap__size(ids->ids) == 0) {
+ char *tmp = strdup("duration_time");
- ids__insert(ids->ids, tmp);
+ if (!tmp)
+ return -ENOMEM;
+ ids__insert(ids->ids, tmp);
+ }
}
ret = metricgroup__build_event_string(&events, ids, modifier,
has_constraint);
@@ -1369,6 +1445,7 @@ static int parse_groups(struct evlist *perf_evlist, const char *str,
struct evlist *combined_evlist = NULL;
LIST_HEAD(metric_list);
struct metric *m;
+ bool tool_events[PERF_TOOL_MAX] = {false};
int ret;
if (metric_events_list->nr_entries == 0)
@@ -1384,11 +1461,15 @@ static int parse_groups(struct evlist *perf_evlist, const char *str,
if (!metric_no_merge) {
struct expr_parse_ctx *combined = NULL;
+ find_tool_events(&metric_list, tool_events);
+
ret = build_combined_expr_ctx(&metric_list, &combined);
if (!ret && combined && hashmap__size(combined->ids)) {
- ret = parse_ids(fake_pmu, combined, /*modifier=*/NULL,
+ ret = parse_ids(metric_no_merge, fake_pmu, combined,
+ /*modifier=*/NULL,
/*has_constraint=*/true,
+ tool_events,
&combined_evlist);
}
if (combined)
@@ -1435,8 +1516,8 @@ static int parse_groups(struct evlist *perf_evlist, const char *str,
}
}
if (!metric_evlist) {
- ret = parse_ids(fake_pmu, m->pctx, m->modifier,
- m->has_constraint, &m->evlist);
+ ret = parse_ids(metric_no_merge, fake_pmu, m->pctx, m->modifier,
+ m->has_constraint, tool_events, &m->evlist);
if (ret)
goto out;
diff --git a/tools/perf/util/mmap.c b/tools/perf/util/mmap.c
index 23ecdba9e670..a4dff881be39 100644
--- a/tools/perf/util/mmap.c
+++ b/tools/perf/util/mmap.c
@@ -62,8 +62,8 @@ void __weak auxtrace_mmap_params__init(struct auxtrace_mmap_params *mp __maybe_u
void __weak auxtrace_mmap_params__set_idx(struct auxtrace_mmap_params *mp __maybe_unused,
struct evlist *evlist __maybe_unused,
- int idx __maybe_unused,
- bool per_cpu __maybe_unused)
+ struct evsel *evsel __maybe_unused,
+ int idx __maybe_unused)
{
}
@@ -94,7 +94,7 @@ static void perf_mmap__aio_free(struct mmap *map, int idx)
}
}
-static int perf_mmap__aio_bind(struct mmap *map, int idx, int cpu, int affinity)
+static int perf_mmap__aio_bind(struct mmap *map, int idx, struct perf_cpu cpu, int affinity)
{
void *data;
size_t mmap_len;
@@ -138,7 +138,7 @@ static void perf_mmap__aio_free(struct mmap *map, int idx)
}
static int perf_mmap__aio_bind(struct mmap *map __maybe_unused, int idx __maybe_unused,
- int cpu __maybe_unused, int affinity __maybe_unused)
+ struct perf_cpu cpu __maybe_unused, int affinity __maybe_unused)
{
return 0;
}
@@ -230,6 +230,10 @@ void mmap__munmap(struct mmap *map)
{
bitmap_free(map->affinity_mask.bits);
+#ifndef PYTHON_PERF
+ zstd_fini(&map->zstd_data);
+#endif
+
perf_mmap__aio_munmap(map);
if (map->data != NULL) {
munmap(map->data, mmap__mmap_len(map));
@@ -240,7 +244,8 @@ void mmap__munmap(struct mmap *map)
static void build_node_mask(int node, struct mmap_cpu_mask *mask)
{
- int c, cpu, nr_cpus;
+ int idx, nr_cpus;
+ struct perf_cpu cpu;
const struct perf_cpu_map *cpu_map = NULL;
cpu_map = cpu_map__online();
@@ -248,16 +253,16 @@ static void build_node_mask(int node, struct mmap_cpu_mask *mask)
return;
nr_cpus = perf_cpu_map__nr(cpu_map);
- for (c = 0; c < nr_cpus; c++) {
- cpu = cpu_map->map[c]; /* map c index to online cpu index */
+ for (idx = 0; idx < nr_cpus; idx++) {
+ cpu = perf_cpu_map__cpu(cpu_map, idx); /* map c index to online cpu index */
if (cpu__get_node(cpu) == node)
- set_bit(cpu, mask->bits);
+ set_bit(cpu.cpu, mask->bits);
}
}
static int perf_mmap__setup_affinity_mask(struct mmap *map, struct mmap_params *mp)
{
- map->affinity_mask.nbits = cpu__max_cpu();
+ map->affinity_mask.nbits = cpu__max_cpu().cpu;
map->affinity_mask.bits = bitmap_zalloc(map->affinity_mask.nbits);
if (!map->affinity_mask.bits)
return -1;
@@ -265,12 +270,12 @@ static int perf_mmap__setup_affinity_mask(struct mmap *map, struct mmap_params *
if (mp->affinity == PERF_AFFINITY_NODE && cpu__max_node() > 1)
build_node_mask(cpu__get_node(map->core.cpu), &map->affinity_mask);
else if (mp->affinity == PERF_AFFINITY_CPU)
- set_bit(map->core.cpu, map->affinity_mask.bits);
+ set_bit(map->core.cpu.cpu, map->affinity_mask.bits);
return 0;
}
-int mmap__mmap(struct mmap *map, struct mmap_params *mp, int fd, int cpu)
+int mmap__mmap(struct mmap *map, struct mmap_params *mp, int fd, struct perf_cpu cpu)
{
if (perf_mmap__mmap(&map->core, &mp->core, fd, cpu)) {
pr_debug2("failed to mmap perf event ring buffer, error %d\n",
@@ -291,6 +296,12 @@ int mmap__mmap(struct mmap *map, struct mmap_params *mp, int fd, int cpu)
map->core.flush = mp->flush;
map->comp_level = mp->comp_level;
+#ifndef PYTHON_PERF
+ if (zstd_init(&map->zstd_data, map->comp_level)) {
+ pr_debug2("failed to init mmap compressor, error %d\n", errno);
+ return -1;
+ }
+#endif
if (map->comp_level && !perf_mmap__aio_enabled(map)) {
map->data = mmap(NULL, mmap__mmap_len(map), PROT_READ|PROT_WRITE,
diff --git a/tools/perf/util/mmap.h b/tools/perf/util/mmap.h
index 8e259b9610f8..cd8b0777473b 100644
--- a/tools/perf/util/mmap.h
+++ b/tools/perf/util/mmap.h
@@ -7,6 +7,7 @@
#include <linux/types.h>
#include <linux/ring_buffer.h>
#include <linux/bitops.h>
+#include <perf/cpumap.h>
#include <stdbool.h>
#include <pthread.h> // for cpu_set_t
#ifdef HAVE_AIO_SUPPORT
@@ -14,6 +15,7 @@
#endif
#include "auxtrace.h"
#include "event.h"
+#include "util/compress.h"
struct aiocb;
@@ -44,6 +46,8 @@ struct mmap {
struct mmap_cpu_mask affinity_mask;
void *data;
int comp_level;
+ struct perf_data_file *file;
+ struct zstd_data zstd_data;
};
struct mmap_params {
@@ -52,7 +56,7 @@ struct mmap_params {
struct auxtrace_mmap_params auxtrace_mp;
};
-int mmap__mmap(struct mmap *map, struct mmap_params *mp, int fd, int cpu);
+int mmap__mmap(struct mmap *map, struct mmap_params *mp, int fd, struct perf_cpu cpu);
void mmap__munmap(struct mmap *map);
union perf_event *perf_mmap__read_forward(struct mmap *map);
diff --git a/tools/perf/util/namespaces.c b/tools/perf/util/namespaces.c
index 608b20c72a5c..dd536220cdb9 100644
--- a/tools/perf/util/namespaces.c
+++ b/tools/perf/util/namespaces.c
@@ -60,23 +60,55 @@ void namespaces__free(struct namespaces *namespaces)
free(namespaces);
}
+static int nsinfo__get_nspid(struct nsinfo *nsi, const char *path)
+{
+ FILE *f = NULL;
+ char *statln = NULL;
+ size_t linesz = 0;
+ char *nspid;
+
+ f = fopen(path, "r");
+ if (f == NULL)
+ return -1;
+
+ while (getline(&statln, &linesz, f) != -1) {
+ /* Use tgid if CONFIG_PID_NS is not defined. */
+ if (strstr(statln, "Tgid:") != NULL) {
+ nsi->tgid = (pid_t)strtol(strrchr(statln, '\t'),
+ NULL, 10);
+ nsi->nstgid = nsinfo__tgid(nsi);
+ }
+
+ if (strstr(statln, "NStgid:") != NULL) {
+ nspid = strrchr(statln, '\t');
+ nsi->nstgid = (pid_t)strtol(nspid, NULL, 10);
+ /*
+ * If innermost tgid is not the first, process is in a different
+ * PID namespace.
+ */
+ nsi->in_pidns = (statln + sizeof("NStgid:") - 1) != nspid;
+ break;
+ }
+ }
+
+ fclose(f);
+ free(statln);
+ return 0;
+}
+
int nsinfo__init(struct nsinfo *nsi)
{
char oldns[PATH_MAX];
char spath[PATH_MAX];
char *newns = NULL;
- char *statln = NULL;
- char *nspid;
struct stat old_stat;
struct stat new_stat;
- FILE *f = NULL;
- size_t linesz = 0;
int rv = -1;
if (snprintf(oldns, PATH_MAX, "/proc/self/ns/mnt") >= PATH_MAX)
return rv;
- if (asprintf(&newns, "/proc/%d/ns/mnt", nsi->pid) == -1)
+ if (asprintf(&newns, "/proc/%d/ns/mnt", nsinfo__pid(nsi)) == -1)
return rv;
if (stat(oldns, &old_stat) < 0)
@@ -97,37 +129,12 @@ int nsinfo__init(struct nsinfo *nsi)
/* If we're dealing with a process that is in a different PID namespace,
* attempt to work out the innermost tgid for the process.
*/
- if (snprintf(spath, PATH_MAX, "/proc/%d/status", nsi->pid) >= PATH_MAX)
+ if (snprintf(spath, PATH_MAX, "/proc/%d/status", nsinfo__pid(nsi)) >= PATH_MAX)
goto out;
- f = fopen(spath, "r");
- if (f == NULL)
- goto out;
-
- while (getline(&statln, &linesz, f) != -1) {
- /* Use tgid if CONFIG_PID_NS is not defined. */
- if (strstr(statln, "Tgid:") != NULL) {
- nsi->tgid = (pid_t)strtol(strrchr(statln, '\t'),
- NULL, 10);
- nsi->nstgid = nsi->tgid;
- }
-
- if (strstr(statln, "NStgid:") != NULL) {
- nspid = strrchr(statln, '\t');
- nsi->nstgid = (pid_t)strtol(nspid, NULL, 10);
- /* If innermost tgid is not the first, process is in a different
- * PID namespace.
- */
- nsi->in_pidns = (statln + sizeof("NStgid:") - 1) != nspid;
- break;
- }
- }
- rv = 0;
+ rv = nsinfo__get_nspid(nsi, spath);
out:
- if (f != NULL)
- (void) fclose(f);
- free(statln);
free(newns);
return rv;
}
@@ -159,7 +166,7 @@ struct nsinfo *nsinfo__new(pid_t pid)
return nsi;
}
-struct nsinfo *nsinfo__copy(struct nsinfo *nsi)
+struct nsinfo *nsinfo__copy(const struct nsinfo *nsi)
{
struct nsinfo *nnsi;
@@ -168,11 +175,11 @@ struct nsinfo *nsinfo__copy(struct nsinfo *nsi)
nnsi = calloc(1, sizeof(*nnsi));
if (nnsi != NULL) {
- nnsi->pid = nsi->pid;
- nnsi->tgid = nsi->tgid;
- nnsi->nstgid = nsi->nstgid;
- nnsi->need_setns = nsi->need_setns;
- nnsi->in_pidns = nsi->in_pidns;
+ nnsi->pid = nsinfo__pid(nsi);
+ nnsi->tgid = nsinfo__tgid(nsi);
+ nnsi->nstgid = nsinfo__nstgid(nsi);
+ nnsi->need_setns = nsinfo__need_setns(nsi);
+ nnsi->in_pidns = nsinfo__in_pidns(nsi);
if (nsi->mntns_path) {
nnsi->mntns_path = strdup(nsi->mntns_path);
if (!nnsi->mntns_path) {
@@ -186,7 +193,7 @@ struct nsinfo *nsinfo__copy(struct nsinfo *nsi)
return nnsi;
}
-void nsinfo__delete(struct nsinfo *nsi)
+static void nsinfo__delete(struct nsinfo *nsi)
{
zfree(&nsi->mntns_path);
free(nsi);
@@ -205,6 +212,36 @@ void nsinfo__put(struct nsinfo *nsi)
nsinfo__delete(nsi);
}
+bool nsinfo__need_setns(const struct nsinfo *nsi)
+{
+ return nsi->need_setns;
+}
+
+void nsinfo__clear_need_setns(struct nsinfo *nsi)
+{
+ nsi->need_setns = false;
+}
+
+pid_t nsinfo__tgid(const struct nsinfo *nsi)
+{
+ return nsi->tgid;
+}
+
+pid_t nsinfo__nstgid(const struct nsinfo *nsi)
+{
+ return nsi->nstgid;
+}
+
+pid_t nsinfo__pid(const struct nsinfo *nsi)
+{
+ return nsi->pid;
+}
+
+pid_t nsinfo__in_pidns(const struct nsinfo *nsi)
+{
+ return nsi->in_pidns;
+}
+
void nsinfo__mountns_enter(struct nsinfo *nsi,
struct nscookie *nc)
{
@@ -299,3 +336,12 @@ int nsinfo__stat(const char *filename, struct stat *st, struct nsinfo *nsi)
return ret;
}
+
+bool nsinfo__is_in_root_namespace(void)
+{
+ struct nsinfo nsi;
+
+ memset(&nsi, 0x0, sizeof(nsi));
+ nsinfo__get_nspid(&nsi, "/proc/self/status");
+ return !nsi.in_pidns;
+}
diff --git a/tools/perf/util/namespaces.h b/tools/perf/util/namespaces.h
index ad9775db7b9c..567829262c42 100644
--- a/tools/perf/util/namespaces.h
+++ b/tools/perf/util/namespaces.h
@@ -47,18 +47,26 @@ struct nscookie {
int nsinfo__init(struct nsinfo *nsi);
struct nsinfo *nsinfo__new(pid_t pid);
-struct nsinfo *nsinfo__copy(struct nsinfo *nsi);
-void nsinfo__delete(struct nsinfo *nsi);
+struct nsinfo *nsinfo__copy(const struct nsinfo *nsi);
struct nsinfo *nsinfo__get(struct nsinfo *nsi);
void nsinfo__put(struct nsinfo *nsi);
+bool nsinfo__need_setns(const struct nsinfo *nsi);
+void nsinfo__clear_need_setns(struct nsinfo *nsi);
+pid_t nsinfo__tgid(const struct nsinfo *nsi);
+pid_t nsinfo__nstgid(const struct nsinfo *nsi);
+pid_t nsinfo__pid(const struct nsinfo *nsi);
+pid_t nsinfo__in_pidns(const struct nsinfo *nsi);
+
void nsinfo__mountns_enter(struct nsinfo *nsi, struct nscookie *nc);
void nsinfo__mountns_exit(struct nscookie *nc);
char *nsinfo__realpath(const char *path, struct nsinfo *nsi);
int nsinfo__stat(const char *filename, struct stat *st, struct nsinfo *nsi);
+bool nsinfo__is_in_root_namespace(void);
+
static inline void __nsinfo__zput(struct nsinfo **nsip)
{
if (nsip) {
diff --git a/tools/perf/util/off_cpu.h b/tools/perf/util/off_cpu.h
new file mode 100644
index 000000000000..548008f74d42
--- /dev/null
+++ b/tools/perf/util/off_cpu.h
@@ -0,0 +1,29 @@
+#ifndef PERF_UTIL_OFF_CPU_H
+#define PERF_UTIL_OFF_CPU_H
+
+struct evlist;
+struct target;
+struct perf_session;
+struct record_opts;
+
+#define OFFCPU_EVENT "offcpu-time"
+
+#ifdef HAVE_BPF_SKEL
+int off_cpu_prepare(struct evlist *evlist, struct target *target,
+ struct record_opts *opts);
+int off_cpu_write(struct perf_session *session);
+#else
+static inline int off_cpu_prepare(struct evlist *evlist __maybe_unused,
+ struct target *target __maybe_unused,
+ struct record_opts *opts __maybe_unused)
+{
+ return -1;
+}
+
+static inline int off_cpu_write(struct perf_session *session __maybe_unused)
+{
+ return -1;
+}
+#endif
+
+#endif /* PERF_UTIL_OFF_CPU_H */
diff --git a/tools/perf/util/ordered-events.c b/tools/perf/util/ordered-events.c
index 48c8f609441b..b887dfeea673 100644
--- a/tools/perf/util/ordered-events.c
+++ b/tools/perf/util/ordered-events.c
@@ -192,7 +192,7 @@ void ordered_events__delete(struct ordered_events *oe, struct ordered_event *eve
}
int ordered_events__queue(struct ordered_events *oe, union perf_event *event,
- u64 timestamp, u64 file_offset)
+ u64 timestamp, u64 file_offset, const char *file_path)
{
struct ordered_event *oevent;
@@ -217,6 +217,7 @@ int ordered_events__queue(struct ordered_events *oe, union perf_event *event,
return -ENOMEM;
oevent->file_offset = file_offset;
+ oevent->file_path = file_path;
return 0;
}
diff --git a/tools/perf/util/ordered-events.h b/tools/perf/util/ordered-events.h
index 75345946c4b9..0b05c3c0aeaa 100644
--- a/tools/perf/util/ordered-events.h
+++ b/tools/perf/util/ordered-events.h
@@ -9,6 +9,7 @@ struct perf_sample;
struct ordered_event {
u64 timestamp;
u64 file_offset;
+ const char *file_path;
union perf_event *event;
struct list_head list;
};
@@ -53,7 +54,7 @@ struct ordered_events {
};
int ordered_events__queue(struct ordered_events *oe, union perf_event *event,
- u64 timestamp, u64 file_offset);
+ u64 timestamp, u64 file_offset, const char *file_path);
void ordered_events__delete(struct ordered_events *oe, struct ordered_event *event);
int ordered_events__flush(struct ordered_events *oe, enum oe_flush how);
int ordered_events__flush_time(struct ordered_events *oe, u64 timestamp);
diff --git a/tools/perf/util/parse-events-hybrid.c b/tools/perf/util/parse-events-hybrid.c
index 9fc86971027b..284f8eabd3b9 100644
--- a/tools/perf/util/parse-events-hybrid.c
+++ b/tools/perf/util/parse-events-hybrid.c
@@ -63,10 +63,13 @@ static int create_event_hybrid(__u32 config_type, int *idx,
static int pmu_cmp(struct parse_events_state *parse_state,
struct perf_pmu *pmu)
{
- if (!parse_state->hybrid_pmu_name)
- return 0;
+ if (parse_state->evlist && parse_state->evlist->hybrid_pmu_name)
+ return strcmp(parse_state->evlist->hybrid_pmu_name, pmu->name);
+
+ if (parse_state->hybrid_pmu_name)
+ return strcmp(parse_state->hybrid_pmu_name, pmu->name);
- return strcmp(parse_state->hybrid_pmu_name, pmu->name);
+ return 0;
}
static int add_hw_hybrid(struct parse_events_state *parse_state,
diff --git a/tools/perf/util/parse-events.c b/tools/perf/util/parse-events.c
index ba74fdf74af9..7ed235740431 100644
--- a/tools/perf/util/parse-events.c
+++ b/tools/perf/util/parse-events.c
@@ -154,6 +154,21 @@ struct event_symbol event_symbols_sw[PERF_COUNT_SW_MAX] = {
},
};
+struct event_symbol event_symbols_tool[PERF_TOOL_MAX] = {
+ [PERF_TOOL_DURATION_TIME] = {
+ .symbol = "duration_time",
+ .alias = "",
+ },
+ [PERF_TOOL_USER_TIME] = {
+ .symbol = "user_time",
+ .alias = "",
+ },
+ [PERF_TOOL_SYSTEM_TIME] = {
+ .symbol = "system_time",
+ .alias = "",
+ },
+};
+
#define __PERF_EVENT_FIELD(config, name) \
((config & PERF_EVENT_##name##_MASK) >> PERF_EVENT_##name##_SHIFT)
@@ -350,7 +365,7 @@ __add_event(struct list_head *list, int *idx,
(*idx)++;
evsel->core.cpus = cpus;
evsel->core.own_cpus = perf_cpu_map__get(cpus);
- evsel->core.system_wide = pmu ? pmu->is_uncore : false;
+ evsel->core.requires_cpu = pmu ? pmu->is_uncore : false;
evsel->auto_merge_stats = auto_merge_stats;
if (name)
@@ -402,14 +417,16 @@ static int add_event_tool(struct list_head *list, int *idx,
if (!evsel)
return -ENOMEM;
evsel->tool_event = tool_event;
- if (tool_event == PERF_TOOL_DURATION_TIME) {
+ if (tool_event == PERF_TOOL_DURATION_TIME
+ || tool_event == PERF_TOOL_USER_TIME
+ || tool_event == PERF_TOOL_SYSTEM_TIME) {
free((char *)evsel->unit);
evsel->unit = strdup("ns");
}
return 0;
}
-static int parse_aliases(char *str, const char *names[][EVSEL__MAX_ALIASES], int size)
+static int parse_aliases(char *str, const char *const names[][EVSEL__MAX_ALIASES], int size)
{
int i, j;
int n, longest = -1;
@@ -1523,7 +1540,9 @@ int parse_events_add_pmu(struct parse_events_state *parse_state,
bool use_uncore_alias;
LIST_HEAD(config_terms);
- if (verbose > 1) {
+ pmu = parse_state->fake_pmu ?: perf_pmu__find(name);
+
+ if (verbose > 1 && !(pmu && pmu->selectable)) {
fprintf(stderr, "Attempting to add event pmu '%s' with '",
name);
if (head_config) {
@@ -1536,7 +1555,6 @@ int parse_events_add_pmu(struct parse_events_state *parse_state,
fprintf(stderr, "' that may result in non-fatal errors\n");
}
- pmu = parse_state->fake_pmu ?: perf_pmu__find(name);
if (!pmu) {
char *err_str;
@@ -1648,6 +1666,7 @@ int parse_events_multi_pmu_add(struct parse_events_state *parse_state,
{
struct parse_events_term *term;
struct list_head *list = NULL;
+ struct list_head *orig_head = NULL;
struct perf_pmu *pmu = NULL;
int ok = 0;
char *config;
@@ -1674,7 +1693,6 @@ int parse_events_multi_pmu_add(struct parse_events_state *parse_state,
}
list_add_tail(&term->list, head);
-
/* Add it for all PMUs that support the alias */
list = malloc(sizeof(struct list_head));
if (!list)
@@ -1687,16 +1705,27 @@ int parse_events_multi_pmu_add(struct parse_events_state *parse_state,
list_for_each_entry(alias, &pmu->aliases, list) {
if (!strcasecmp(alias->name, str)) {
+ parse_events_copy_term_list(head, &orig_head);
if (!parse_events_add_pmu(parse_state, list,
- pmu->name, head,
+ pmu->name, orig_head,
true, true)) {
pr_debug("%s -> %s/%s/\n", str,
pmu->name, alias->str);
ok++;
}
+ parse_events_terms__delete(orig_head);
}
}
}
+
+ if (parse_state->fake_pmu) {
+ if (!parse_events_add_pmu(parse_state, list, str, head,
+ true, true)) {
+ pr_debug("%s -> %s/%s/\n", str, "fake_pmu", str);
+ ok++;
+ }
+ }
+
out_err:
if (ok)
*listp = list;
@@ -1824,6 +1853,11 @@ out:
return ret;
}
+__weak struct evsel *arch_evlist__leader(struct list_head *list)
+{
+ return list_first_entry(list, struct evsel, core.node);
+}
+
void parse_events__set_leader(char *name, struct list_head *list,
struct parse_events_state *parse_state)
{
@@ -1837,9 +1871,10 @@ void parse_events__set_leader(char *name, struct list_head *list,
if (parse_events__set_leader_for_uncore_aliase(name, list, parse_state))
return;
- __perf_evlist__set_leader(list);
- leader = list_entry(list->next, struct evsel, core.node);
+ leader = arch_evlist__leader(list);
+ __perf_evlist__set_leader(list, &leader->core);
leader->group_name = name ? strdup(name) : NULL;
+ list_move(&leader->core.node, list);
}
/* list_event is assumed to point to malloc'ed memory */
@@ -2092,8 +2127,17 @@ static void perf_pmu__parse_init(void)
pmu = NULL;
while ((pmu = perf_pmu__scan(pmu)) != NULL) {
list_for_each_entry(alias, &pmu->aliases, list) {
- if (strchr(alias->name, '-'))
+ char *tmp = strchr(alias->name, '-');
+
+ if (tmp) {
+ char *tmp2 = NULL;
+
+ tmp2 = strchr(tmp + 1, '-');
len++;
+ if (tmp2)
+ len++;
+ }
+
len++;
}
}
@@ -2113,8 +2157,20 @@ static void perf_pmu__parse_init(void)
list_for_each_entry(alias, &pmu->aliases, list) {
struct perf_pmu_event_symbol *p = perf_pmu_events_list + len;
char *tmp = strchr(alias->name, '-');
+ char *tmp2 = NULL;
- if (tmp != NULL) {
+ if (tmp)
+ tmp2 = strchr(tmp + 1, '-');
+ if (tmp2) {
+ SET_SYMBOL(strndup(alias->name, tmp - alias->name),
+ PMU_EVENT_SYMBOL_PREFIX);
+ p++;
+ tmp++;
+ SET_SYMBOL(strndup(tmp, tmp2 - tmp), PMU_EVENT_SYMBOL_SUFFIX);
+ p++;
+ SET_SYMBOL(strdup(++tmp2), PMU_EVENT_SYMBOL_SUFFIX2);
+ len += 3;
+ } else if (tmp) {
SET_SYMBOL(strndup(alias->name, tmp - alias->name),
PMU_EVENT_SYMBOL_PREFIX);
p++;
@@ -2141,23 +2197,38 @@ err:
*/
int perf_pmu__test_parse_init(void)
{
- struct perf_pmu_event_symbol *list;
+ struct perf_pmu_event_symbol *list, *tmp, symbols[] = {
+ {(char *)"read", PMU_EVENT_SYMBOL},
+ {(char *)"event", PMU_EVENT_SYMBOL_PREFIX},
+ {(char *)"two", PMU_EVENT_SYMBOL_SUFFIX},
+ {(char *)"hyphen", PMU_EVENT_SYMBOL_SUFFIX},
+ {(char *)"hyph", PMU_EVENT_SYMBOL_SUFFIX2},
+ };
+ unsigned long i, j;
- list = malloc(sizeof(*list) * 1);
+ tmp = list = malloc(sizeof(*list) * ARRAY_SIZE(symbols));
if (!list)
return -ENOMEM;
- list->type = PMU_EVENT_SYMBOL;
- list->symbol = strdup("read");
-
- if (!list->symbol) {
- free(list);
- return -ENOMEM;
+ for (i = 0; i < ARRAY_SIZE(symbols); i++, tmp++) {
+ tmp->type = symbols[i].type;
+ tmp->symbol = strdup(symbols[i].symbol);
+ if (!tmp->symbol)
+ goto err_free;
}
perf_pmu_events_list = list;
- perf_pmu_events_list_num = 1;
+ perf_pmu_events_list_num = ARRAY_SIZE(symbols);
+
+ qsort(perf_pmu_events_list, ARRAY_SIZE(symbols),
+ sizeof(struct perf_pmu_event_symbol), comp_pmu);
return 0;
+
+err_free:
+ for (j = 0, tmp = list; j < i; j++, tmp++)
+ free(tmp->symbol);
+ free(list);
+ return -ENOMEM;
}
enum perf_pmu_event_symbol_type
@@ -3002,21 +3073,34 @@ out_enomem:
return evt_num;
}
-static void print_tool_event(const char *name, const char *event_glob,
+static void print_tool_event(const struct event_symbol *syms, const char *event_glob,
bool name_only)
{
- if (event_glob && !strglobmatch(name, event_glob))
+ if (syms->symbol == NULL)
+ return;
+
+ if (event_glob && !(strglobmatch(syms->symbol, event_glob) ||
+ (syms->alias && strglobmatch(syms->alias, event_glob))))
return;
+
if (name_only)
- printf("%s ", name);
- else
+ printf("%s ", syms->symbol);
+ else {
+ char name[MAX_NAME_LEN];
+ if (syms->alias && strlen(syms->alias))
+ snprintf(name, MAX_NAME_LEN, "%s OR %s", syms->symbol, syms->alias);
+ else
+ strlcpy(name, syms->symbol, MAX_NAME_LEN);
printf(" %-50s [%s]\n", name, "Tool event");
-
+ }
}
void print_tool_events(const char *event_glob, bool name_only)
{
- print_tool_event("duration_time", event_glob, name_only);
+ // Start at 1 because the first enum entry symbols no tool event
+ for (int i = 1; i < PERF_TOOL_MAX; ++i) {
+ print_tool_event(event_symbols_tool + i, event_glob, name_only);
+ }
if (pager_in_use())
printf("\n");
}
diff --git a/tools/perf/util/parse-events.h b/tools/perf/util/parse-events.h
index c7fc93f54577..a38b8b160e80 100644
--- a/tools/perf/util/parse-events.h
+++ b/tools/perf/util/parse-events.h
@@ -53,6 +53,7 @@ enum perf_pmu_event_symbol_type {
PMU_EVENT_SYMBOL, /* normal style PMU event */
PMU_EVENT_SYMBOL_PREFIX, /* prefix of pre-suf style event */
PMU_EVENT_SYMBOL_SUFFIX, /* suffix of pre-suf style event */
+ PMU_EVENT_SYMBOL_SUFFIX2, /* suffix of pre-suf2 style event */
};
struct perf_pmu_event_symbol {
diff --git a/tools/perf/util/parse-events.l b/tools/perf/util/parse-events.l
index 4efe9872c667..3a9ce96c8bce 100644
--- a/tools/perf/util/parse-events.l
+++ b/tools/perf/util/parse-events.l
@@ -149,6 +149,8 @@ static int pmu_str_check(yyscan_t scanner, struct parse_events_state *parse_stat
return PE_PMU_EVENT_PRE;
case PMU_EVENT_SYMBOL_SUFFIX:
return PE_PMU_EVENT_SUF;
+ case PMU_EVENT_SYMBOL_SUFFIX2:
+ return PE_PMU_EVENT_SUF2;
case PMU_EVENT_SYMBOL:
return parse_state->fake_pmu
? PE_PMU_EVENT_FAKE : PE_KERNEL_PMU_EVENT;
@@ -351,6 +353,8 @@ alignment-faults { return sym(yyscanner, PERF_TYPE_SOFTWARE, PERF_COUNT_SW_AL
emulation-faults { return sym(yyscanner, PERF_TYPE_SOFTWARE, PERF_COUNT_SW_EMULATION_FAULTS); }
dummy { return sym(yyscanner, PERF_TYPE_SOFTWARE, PERF_COUNT_SW_DUMMY); }
duration_time { return tool(yyscanner, PERF_TOOL_DURATION_TIME); }
+user_time { return tool(yyscanner, PERF_TOOL_USER_TIME); }
+system_time { return tool(yyscanner, PERF_TOOL_SYSTEM_TIME); }
bpf-output { return sym(yyscanner, PERF_TYPE_SOFTWARE, PERF_COUNT_SW_BPF_OUTPUT); }
cgroup-switches { return sym(yyscanner, PERF_TYPE_SOFTWARE, PERF_COUNT_SW_CGROUP_SWITCHES); }
diff --git a/tools/perf/util/parse-events.y b/tools/perf/util/parse-events.y
index 174158982fae..be8c51770051 100644
--- a/tools/perf/util/parse-events.y
+++ b/tools/perf/util/parse-events.y
@@ -69,7 +69,7 @@ static void inc_group_count(struct list_head *list,
%token PE_NAME_CACHE_TYPE PE_NAME_CACHE_OP_RESULT
%token PE_PREFIX_MEM PE_PREFIX_RAW PE_PREFIX_GROUP
%token PE_ERROR
-%token PE_PMU_EVENT_PRE PE_PMU_EVENT_SUF PE_KERNEL_PMU_EVENT PE_PMU_EVENT_FAKE
+%token PE_PMU_EVENT_PRE PE_PMU_EVENT_SUF PE_PMU_EVENT_SUF2 PE_KERNEL_PMU_EVENT PE_PMU_EVENT_FAKE
%token PE_ARRAY_ALL PE_ARRAY_RANGE
%token PE_DRV_CFG_TERM
%type <num> PE_VALUE
@@ -87,7 +87,7 @@ static void inc_group_count(struct list_head *list,
%type <str> PE_MODIFIER_EVENT
%type <str> PE_MODIFIER_BP
%type <str> PE_EVENT_NAME
-%type <str> PE_PMU_EVENT_PRE PE_PMU_EVENT_SUF PE_KERNEL_PMU_EVENT PE_PMU_EVENT_FAKE
+%type <str> PE_PMU_EVENT_PRE PE_PMU_EVENT_SUF PE_PMU_EVENT_SUF2 PE_KERNEL_PMU_EVENT PE_PMU_EVENT_FAKE
%type <str> PE_DRV_CFG_TERM
%type <str> event_pmu_name
%destructor { free ($$); } <str>
@@ -372,6 +372,19 @@ PE_KERNEL_PMU_EVENT opt_pmu_config
$$ = list;
}
|
+PE_PMU_EVENT_PRE '-' PE_PMU_EVENT_SUF '-' PE_PMU_EVENT_SUF2 sep_dc
+{
+ struct list_head *list;
+ char pmu_name[128];
+ snprintf(pmu_name, sizeof(pmu_name), "%s-%s-%s", $1, $3, $5);
+ free($1);
+ free($3);
+ free($5);
+ if (parse_events_multi_pmu_add(_parse_state, pmu_name, NULL, &list) < 0)
+ YYABORT;
+ $$ = list;
+}
+|
PE_PMU_EVENT_PRE '-' PE_PMU_EVENT_SUF sep_dc
{
struct list_head *list;
diff --git a/tools/perf/util/path.c b/tools/perf/util/path.c
index caed0336429f..ce80b79be103 100644
--- a/tools/perf/util/path.c
+++ b/tools/perf/util/path.c
@@ -86,9 +86,21 @@ bool is_directory(const char *base_path, const struct dirent *dent)
char path[PATH_MAX];
struct stat st;
- sprintf(path, "%s/%s", base_path, dent->d_name);
+ snprintf(path, sizeof(path), "%s/%s", base_path, dent->d_name);
if (stat(path, &st))
return false;
return S_ISDIR(st.st_mode);
}
+
+bool is_executable_file(const char *base_path, const struct dirent *dent)
+{
+ char path[PATH_MAX];
+ struct stat st;
+
+ snprintf(path, sizeof(path), "%s/%s", base_path, dent->d_name);
+ if (stat(path, &st))
+ return false;
+
+ return !S_ISDIR(st.st_mode) && (st.st_mode & S_IXUSR);
+}
diff --git a/tools/perf/util/path.h b/tools/perf/util/path.h
index 083429b7efa3..d94902c22222 100644
--- a/tools/perf/util/path.h
+++ b/tools/perf/util/path.h
@@ -12,5 +12,6 @@ int path__join3(char *bf, size_t size, const char *path1, const char *path2, con
bool is_regular_file(const char *file);
bool is_directory(const char *base_path, const struct dirent *dent);
+bool is_executable_file(const char *base_path, const struct dirent *dent);
#endif /* _PERF_PATH_H */
diff --git a/tools/perf/util/perf_api_probe.c b/tools/perf/util/perf_api_probe.c
index 020411682a3c..c28dd50bd571 100644
--- a/tools/perf/util/perf_api_probe.c
+++ b/tools/perf/util/perf_api_probe.c
@@ -11,7 +11,7 @@
typedef void (*setup_probe_fn_t)(struct evsel *evsel);
-static int perf_do_probe_api(setup_probe_fn_t fn, int cpu, const char *str)
+static int perf_do_probe_api(setup_probe_fn_t fn, struct perf_cpu cpu, const char *str)
{
struct evlist *evlist;
struct evsel *evsel;
@@ -29,7 +29,7 @@ static int perf_do_probe_api(setup_probe_fn_t fn, int cpu, const char *str)
evsel = evlist__first(evlist);
while (1) {
- fd = sys_perf_event_open(&evsel->core.attr, pid, cpu, -1, flags);
+ fd = sys_perf_event_open(&evsel->core.attr, pid, cpu.cpu, -1, flags);
if (fd < 0) {
if (pid == -1 && errno == EACCES) {
pid = 0;
@@ -43,7 +43,7 @@ static int perf_do_probe_api(setup_probe_fn_t fn, int cpu, const char *str)
fn(evsel);
- fd = sys_perf_event_open(&evsel->core.attr, pid, cpu, -1, flags);
+ fd = sys_perf_event_open(&evsel->core.attr, pid, cpu.cpu, -1, flags);
if (fd < 0) {
if (errno == EINVAL)
err = -EINVAL;
@@ -61,12 +61,13 @@ static bool perf_probe_api(setup_probe_fn_t fn)
{
const char *try[] = {"cycles:u", "instructions:u", "cpu-clock:u", NULL};
struct perf_cpu_map *cpus;
- int cpu, ret, i = 0;
+ struct perf_cpu cpu;
+ int ret, i = 0;
cpus = perf_cpu_map__new(NULL);
if (!cpus)
return false;
- cpu = cpus->map[0];
+ cpu = perf_cpu_map__cpu(cpus, 0);
perf_cpu_map__put(cpus);
do {
@@ -136,15 +137,17 @@ bool perf_can_record_cpu_wide(void)
.exclude_kernel = 1,
};
struct perf_cpu_map *cpus;
- int cpu, fd;
+ struct perf_cpu cpu;
+ int fd;
cpus = perf_cpu_map__new(NULL);
if (!cpus)
return false;
- cpu = cpus->map[0];
+
+ cpu = perf_cpu_map__cpu(cpus, 0);
perf_cpu_map__put(cpus);
- fd = sys_perf_event_open(&attr, -1, cpu, -1, 0);
+ fd = sys_perf_event_open(&attr, -1, cpu.cpu, -1, 0);
if (fd < 0)
return false;
close(fd);
diff --git a/tools/perf/util/perf_event_attr_fprintf.c b/tools/perf/util/perf_event_attr_fprintf.c
index 47b7531f51da..98af3fa4ea35 100644
--- a/tools/perf/util/perf_event_attr_fprintf.c
+++ b/tools/perf/util/perf_event_attr_fprintf.c
@@ -52,7 +52,7 @@ static void __p_branch_sample_type(char *buf, size_t size, u64 value)
bit_name(ABORT_TX), bit_name(IN_TX), bit_name(NO_TX),
bit_name(COND), bit_name(CALL_STACK), bit_name(IND_JUMP),
bit_name(CALL), bit_name(NO_FLAGS), bit_name(NO_CYCLES),
- bit_name(HW_INDEX),
+ bit_name(TYPE_SAVE), bit_name(HW_INDEX),
{ .name = NULL, }
};
#undef bit_name
diff --git a/tools/perf/util/perf_regs.c b/tools/perf/util/perf_regs.c
index 06a7461ba864..872dd3d38782 100644
--- a/tools/perf/util/perf_regs.c
+++ b/tools/perf/util/perf_regs.c
@@ -1,5 +1,6 @@
// SPDX-License-Identifier: GPL-2.0
#include <errno.h>
+#include <string.h>
#include "perf_regs.h"
#include "event.h"
@@ -20,6 +21,673 @@ uint64_t __weak arch__user_reg_mask(void)
}
#ifdef HAVE_PERF_REGS_SUPPORT
+
+#define perf_event_arm_regs perf_event_arm64_regs
+#include "../../arch/arm64/include/uapi/asm/perf_regs.h"
+#undef perf_event_arm_regs
+
+#include "../../arch/arm/include/uapi/asm/perf_regs.h"
+#include "../../arch/csky/include/uapi/asm/perf_regs.h"
+#include "../../arch/mips/include/uapi/asm/perf_regs.h"
+#include "../../arch/powerpc/include/uapi/asm/perf_regs.h"
+#include "../../arch/riscv/include/uapi/asm/perf_regs.h"
+#include "../../arch/s390/include/uapi/asm/perf_regs.h"
+#include "../../arch/x86/include/uapi/asm/perf_regs.h"
+
+static const char *__perf_reg_name_arm64(int id)
+{
+ switch (id) {
+ case PERF_REG_ARM64_X0:
+ return "x0";
+ case PERF_REG_ARM64_X1:
+ return "x1";
+ case PERF_REG_ARM64_X2:
+ return "x2";
+ case PERF_REG_ARM64_X3:
+ return "x3";
+ case PERF_REG_ARM64_X4:
+ return "x4";
+ case PERF_REG_ARM64_X5:
+ return "x5";
+ case PERF_REG_ARM64_X6:
+ return "x6";
+ case PERF_REG_ARM64_X7:
+ return "x7";
+ case PERF_REG_ARM64_X8:
+ return "x8";
+ case PERF_REG_ARM64_X9:
+ return "x9";
+ case PERF_REG_ARM64_X10:
+ return "x10";
+ case PERF_REG_ARM64_X11:
+ return "x11";
+ case PERF_REG_ARM64_X12:
+ return "x12";
+ case PERF_REG_ARM64_X13:
+ return "x13";
+ case PERF_REG_ARM64_X14:
+ return "x14";
+ case PERF_REG_ARM64_X15:
+ return "x15";
+ case PERF_REG_ARM64_X16:
+ return "x16";
+ case PERF_REG_ARM64_X17:
+ return "x17";
+ case PERF_REG_ARM64_X18:
+ return "x18";
+ case PERF_REG_ARM64_X19:
+ return "x19";
+ case PERF_REG_ARM64_X20:
+ return "x20";
+ case PERF_REG_ARM64_X21:
+ return "x21";
+ case PERF_REG_ARM64_X22:
+ return "x22";
+ case PERF_REG_ARM64_X23:
+ return "x23";
+ case PERF_REG_ARM64_X24:
+ return "x24";
+ case PERF_REG_ARM64_X25:
+ return "x25";
+ case PERF_REG_ARM64_X26:
+ return "x26";
+ case PERF_REG_ARM64_X27:
+ return "x27";
+ case PERF_REG_ARM64_X28:
+ return "x28";
+ case PERF_REG_ARM64_X29:
+ return "x29";
+ case PERF_REG_ARM64_SP:
+ return "sp";
+ case PERF_REG_ARM64_LR:
+ return "lr";
+ case PERF_REG_ARM64_PC:
+ return "pc";
+ case PERF_REG_ARM64_VG:
+ return "vg";
+ default:
+ return NULL;
+ }
+
+ return NULL;
+}
+
+static const char *__perf_reg_name_arm(int id)
+{
+ switch (id) {
+ case PERF_REG_ARM_R0:
+ return "r0";
+ case PERF_REG_ARM_R1:
+ return "r1";
+ case PERF_REG_ARM_R2:
+ return "r2";
+ case PERF_REG_ARM_R3:
+ return "r3";
+ case PERF_REG_ARM_R4:
+ return "r4";
+ case PERF_REG_ARM_R5:
+ return "r5";
+ case PERF_REG_ARM_R6:
+ return "r6";
+ case PERF_REG_ARM_R7:
+ return "r7";
+ case PERF_REG_ARM_R8:
+ return "r8";
+ case PERF_REG_ARM_R9:
+ return "r9";
+ case PERF_REG_ARM_R10:
+ return "r10";
+ case PERF_REG_ARM_FP:
+ return "fp";
+ case PERF_REG_ARM_IP:
+ return "ip";
+ case PERF_REG_ARM_SP:
+ return "sp";
+ case PERF_REG_ARM_LR:
+ return "lr";
+ case PERF_REG_ARM_PC:
+ return "pc";
+ default:
+ return NULL;
+ }
+
+ return NULL;
+}
+
+static const char *__perf_reg_name_csky(int id)
+{
+ switch (id) {
+ case PERF_REG_CSKY_A0:
+ return "a0";
+ case PERF_REG_CSKY_A1:
+ return "a1";
+ case PERF_REG_CSKY_A2:
+ return "a2";
+ case PERF_REG_CSKY_A3:
+ return "a3";
+ case PERF_REG_CSKY_REGS0:
+ return "regs0";
+ case PERF_REG_CSKY_REGS1:
+ return "regs1";
+ case PERF_REG_CSKY_REGS2:
+ return "regs2";
+ case PERF_REG_CSKY_REGS3:
+ return "regs3";
+ case PERF_REG_CSKY_REGS4:
+ return "regs4";
+ case PERF_REG_CSKY_REGS5:
+ return "regs5";
+ case PERF_REG_CSKY_REGS6:
+ return "regs6";
+ case PERF_REG_CSKY_REGS7:
+ return "regs7";
+ case PERF_REG_CSKY_REGS8:
+ return "regs8";
+ case PERF_REG_CSKY_REGS9:
+ return "regs9";
+ case PERF_REG_CSKY_SP:
+ return "sp";
+ case PERF_REG_CSKY_LR:
+ return "lr";
+ case PERF_REG_CSKY_PC:
+ return "pc";
+#if defined(__CSKYABIV2__)
+ case PERF_REG_CSKY_EXREGS0:
+ return "exregs0";
+ case PERF_REG_CSKY_EXREGS1:
+ return "exregs1";
+ case PERF_REG_CSKY_EXREGS2:
+ return "exregs2";
+ case PERF_REG_CSKY_EXREGS3:
+ return "exregs3";
+ case PERF_REG_CSKY_EXREGS4:
+ return "exregs4";
+ case PERF_REG_CSKY_EXREGS5:
+ return "exregs5";
+ case PERF_REG_CSKY_EXREGS6:
+ return "exregs6";
+ case PERF_REG_CSKY_EXREGS7:
+ return "exregs7";
+ case PERF_REG_CSKY_EXREGS8:
+ return "exregs8";
+ case PERF_REG_CSKY_EXREGS9:
+ return "exregs9";
+ case PERF_REG_CSKY_EXREGS10:
+ return "exregs10";
+ case PERF_REG_CSKY_EXREGS11:
+ return "exregs11";
+ case PERF_REG_CSKY_EXREGS12:
+ return "exregs12";
+ case PERF_REG_CSKY_EXREGS13:
+ return "exregs13";
+ case PERF_REG_CSKY_EXREGS14:
+ return "exregs14";
+ case PERF_REG_CSKY_TLS:
+ return "tls";
+ case PERF_REG_CSKY_HI:
+ return "hi";
+ case PERF_REG_CSKY_LO:
+ return "lo";
+#endif
+ default:
+ return NULL;
+ }
+
+ return NULL;
+}
+
+static const char *__perf_reg_name_mips(int id)
+{
+ switch (id) {
+ case PERF_REG_MIPS_PC:
+ return "PC";
+ case PERF_REG_MIPS_R1:
+ return "$1";
+ case PERF_REG_MIPS_R2:
+ return "$2";
+ case PERF_REG_MIPS_R3:
+ return "$3";
+ case PERF_REG_MIPS_R4:
+ return "$4";
+ case PERF_REG_MIPS_R5:
+ return "$5";
+ case PERF_REG_MIPS_R6:
+ return "$6";
+ case PERF_REG_MIPS_R7:
+ return "$7";
+ case PERF_REG_MIPS_R8:
+ return "$8";
+ case PERF_REG_MIPS_R9:
+ return "$9";
+ case PERF_REG_MIPS_R10:
+ return "$10";
+ case PERF_REG_MIPS_R11:
+ return "$11";
+ case PERF_REG_MIPS_R12:
+ return "$12";
+ case PERF_REG_MIPS_R13:
+ return "$13";
+ case PERF_REG_MIPS_R14:
+ return "$14";
+ case PERF_REG_MIPS_R15:
+ return "$15";
+ case PERF_REG_MIPS_R16:
+ return "$16";
+ case PERF_REG_MIPS_R17:
+ return "$17";
+ case PERF_REG_MIPS_R18:
+ return "$18";
+ case PERF_REG_MIPS_R19:
+ return "$19";
+ case PERF_REG_MIPS_R20:
+ return "$20";
+ case PERF_REG_MIPS_R21:
+ return "$21";
+ case PERF_REG_MIPS_R22:
+ return "$22";
+ case PERF_REG_MIPS_R23:
+ return "$23";
+ case PERF_REG_MIPS_R24:
+ return "$24";
+ case PERF_REG_MIPS_R25:
+ return "$25";
+ case PERF_REG_MIPS_R28:
+ return "$28";
+ case PERF_REG_MIPS_R29:
+ return "$29";
+ case PERF_REG_MIPS_R30:
+ return "$30";
+ case PERF_REG_MIPS_R31:
+ return "$31";
+ default:
+ break;
+ }
+ return NULL;
+}
+
+static const char *__perf_reg_name_powerpc(int id)
+{
+ switch (id) {
+ case PERF_REG_POWERPC_R0:
+ return "r0";
+ case PERF_REG_POWERPC_R1:
+ return "r1";
+ case PERF_REG_POWERPC_R2:
+ return "r2";
+ case PERF_REG_POWERPC_R3:
+ return "r3";
+ case PERF_REG_POWERPC_R4:
+ return "r4";
+ case PERF_REG_POWERPC_R5:
+ return "r5";
+ case PERF_REG_POWERPC_R6:
+ return "r6";
+ case PERF_REG_POWERPC_R7:
+ return "r7";
+ case PERF_REG_POWERPC_R8:
+ return "r8";
+ case PERF_REG_POWERPC_R9:
+ return "r9";
+ case PERF_REG_POWERPC_R10:
+ return "r10";
+ case PERF_REG_POWERPC_R11:
+ return "r11";
+ case PERF_REG_POWERPC_R12:
+ return "r12";
+ case PERF_REG_POWERPC_R13:
+ return "r13";
+ case PERF_REG_POWERPC_R14:
+ return "r14";
+ case PERF_REG_POWERPC_R15:
+ return "r15";
+ case PERF_REG_POWERPC_R16:
+ return "r16";
+ case PERF_REG_POWERPC_R17:
+ return "r17";
+ case PERF_REG_POWERPC_R18:
+ return "r18";
+ case PERF_REG_POWERPC_R19:
+ return "r19";
+ case PERF_REG_POWERPC_R20:
+ return "r20";
+ case PERF_REG_POWERPC_R21:
+ return "r21";
+ case PERF_REG_POWERPC_R22:
+ return "r22";
+ case PERF_REG_POWERPC_R23:
+ return "r23";
+ case PERF_REG_POWERPC_R24:
+ return "r24";
+ case PERF_REG_POWERPC_R25:
+ return "r25";
+ case PERF_REG_POWERPC_R26:
+ return "r26";
+ case PERF_REG_POWERPC_R27:
+ return "r27";
+ case PERF_REG_POWERPC_R28:
+ return "r28";
+ case PERF_REG_POWERPC_R29:
+ return "r29";
+ case PERF_REG_POWERPC_R30:
+ return "r30";
+ case PERF_REG_POWERPC_R31:
+ return "r31";
+ case PERF_REG_POWERPC_NIP:
+ return "nip";
+ case PERF_REG_POWERPC_MSR:
+ return "msr";
+ case PERF_REG_POWERPC_ORIG_R3:
+ return "orig_r3";
+ case PERF_REG_POWERPC_CTR:
+ return "ctr";
+ case PERF_REG_POWERPC_LINK:
+ return "link";
+ case PERF_REG_POWERPC_XER:
+ return "xer";
+ case PERF_REG_POWERPC_CCR:
+ return "ccr";
+ case PERF_REG_POWERPC_SOFTE:
+ return "softe";
+ case PERF_REG_POWERPC_TRAP:
+ return "trap";
+ case PERF_REG_POWERPC_DAR:
+ return "dar";
+ case PERF_REG_POWERPC_DSISR:
+ return "dsisr";
+ case PERF_REG_POWERPC_SIER:
+ return "sier";
+ case PERF_REG_POWERPC_MMCRA:
+ return "mmcra";
+ case PERF_REG_POWERPC_MMCR0:
+ return "mmcr0";
+ case PERF_REG_POWERPC_MMCR1:
+ return "mmcr1";
+ case PERF_REG_POWERPC_MMCR2:
+ return "mmcr2";
+ case PERF_REG_POWERPC_MMCR3:
+ return "mmcr3";
+ case PERF_REG_POWERPC_SIER2:
+ return "sier2";
+ case PERF_REG_POWERPC_SIER3:
+ return "sier3";
+ case PERF_REG_POWERPC_PMC1:
+ return "pmc1";
+ case PERF_REG_POWERPC_PMC2:
+ return "pmc2";
+ case PERF_REG_POWERPC_PMC3:
+ return "pmc3";
+ case PERF_REG_POWERPC_PMC4:
+ return "pmc4";
+ case PERF_REG_POWERPC_PMC5:
+ return "pmc5";
+ case PERF_REG_POWERPC_PMC6:
+ return "pmc6";
+ case PERF_REG_POWERPC_SDAR:
+ return "sdar";
+ case PERF_REG_POWERPC_SIAR:
+ return "siar";
+ default:
+ break;
+ }
+ return NULL;
+}
+
+static const char *__perf_reg_name_riscv(int id)
+{
+ switch (id) {
+ case PERF_REG_RISCV_PC:
+ return "pc";
+ case PERF_REG_RISCV_RA:
+ return "ra";
+ case PERF_REG_RISCV_SP:
+ return "sp";
+ case PERF_REG_RISCV_GP:
+ return "gp";
+ case PERF_REG_RISCV_TP:
+ return "tp";
+ case PERF_REG_RISCV_T0:
+ return "t0";
+ case PERF_REG_RISCV_T1:
+ return "t1";
+ case PERF_REG_RISCV_T2:
+ return "t2";
+ case PERF_REG_RISCV_S0:
+ return "s0";
+ case PERF_REG_RISCV_S1:
+ return "s1";
+ case PERF_REG_RISCV_A0:
+ return "a0";
+ case PERF_REG_RISCV_A1:
+ return "a1";
+ case PERF_REG_RISCV_A2:
+ return "a2";
+ case PERF_REG_RISCV_A3:
+ return "a3";
+ case PERF_REG_RISCV_A4:
+ return "a4";
+ case PERF_REG_RISCV_A5:
+ return "a5";
+ case PERF_REG_RISCV_A6:
+ return "a6";
+ case PERF_REG_RISCV_A7:
+ return "a7";
+ case PERF_REG_RISCV_S2:
+ return "s2";
+ case PERF_REG_RISCV_S3:
+ return "s3";
+ case PERF_REG_RISCV_S4:
+ return "s4";
+ case PERF_REG_RISCV_S5:
+ return "s5";
+ case PERF_REG_RISCV_S6:
+ return "s6";
+ case PERF_REG_RISCV_S7:
+ return "s7";
+ case PERF_REG_RISCV_S8:
+ return "s8";
+ case PERF_REG_RISCV_S9:
+ return "s9";
+ case PERF_REG_RISCV_S10:
+ return "s10";
+ case PERF_REG_RISCV_S11:
+ return "s11";
+ case PERF_REG_RISCV_T3:
+ return "t3";
+ case PERF_REG_RISCV_T4:
+ return "t4";
+ case PERF_REG_RISCV_T5:
+ return "t5";
+ case PERF_REG_RISCV_T6:
+ return "t6";
+ default:
+ return NULL;
+ }
+
+ return NULL;
+}
+
+static const char *__perf_reg_name_s390(int id)
+{
+ switch (id) {
+ case PERF_REG_S390_R0:
+ return "R0";
+ case PERF_REG_S390_R1:
+ return "R1";
+ case PERF_REG_S390_R2:
+ return "R2";
+ case PERF_REG_S390_R3:
+ return "R3";
+ case PERF_REG_S390_R4:
+ return "R4";
+ case PERF_REG_S390_R5:
+ return "R5";
+ case PERF_REG_S390_R6:
+ return "R6";
+ case PERF_REG_S390_R7:
+ return "R7";
+ case PERF_REG_S390_R8:
+ return "R8";
+ case PERF_REG_S390_R9:
+ return "R9";
+ case PERF_REG_S390_R10:
+ return "R10";
+ case PERF_REG_S390_R11:
+ return "R11";
+ case PERF_REG_S390_R12:
+ return "R12";
+ case PERF_REG_S390_R13:
+ return "R13";
+ case PERF_REG_S390_R14:
+ return "R14";
+ case PERF_REG_S390_R15:
+ return "R15";
+ case PERF_REG_S390_FP0:
+ return "FP0";
+ case PERF_REG_S390_FP1:
+ return "FP1";
+ case PERF_REG_S390_FP2:
+ return "FP2";
+ case PERF_REG_S390_FP3:
+ return "FP3";
+ case PERF_REG_S390_FP4:
+ return "FP4";
+ case PERF_REG_S390_FP5:
+ return "FP5";
+ case PERF_REG_S390_FP6:
+ return "FP6";
+ case PERF_REG_S390_FP7:
+ return "FP7";
+ case PERF_REG_S390_FP8:
+ return "FP8";
+ case PERF_REG_S390_FP9:
+ return "FP9";
+ case PERF_REG_S390_FP10:
+ return "FP10";
+ case PERF_REG_S390_FP11:
+ return "FP11";
+ case PERF_REG_S390_FP12:
+ return "FP12";
+ case PERF_REG_S390_FP13:
+ return "FP13";
+ case PERF_REG_S390_FP14:
+ return "FP14";
+ case PERF_REG_S390_FP15:
+ return "FP15";
+ case PERF_REG_S390_MASK:
+ return "MASK";
+ case PERF_REG_S390_PC:
+ return "PC";
+ default:
+ return NULL;
+ }
+
+ return NULL;
+}
+
+static const char *__perf_reg_name_x86(int id)
+{
+ switch (id) {
+ case PERF_REG_X86_AX:
+ return "AX";
+ case PERF_REG_X86_BX:
+ return "BX";
+ case PERF_REG_X86_CX:
+ return "CX";
+ case PERF_REG_X86_DX:
+ return "DX";
+ case PERF_REG_X86_SI:
+ return "SI";
+ case PERF_REG_X86_DI:
+ return "DI";
+ case PERF_REG_X86_BP:
+ return "BP";
+ case PERF_REG_X86_SP:
+ return "SP";
+ case PERF_REG_X86_IP:
+ return "IP";
+ case PERF_REG_X86_FLAGS:
+ return "FLAGS";
+ case PERF_REG_X86_CS:
+ return "CS";
+ case PERF_REG_X86_SS:
+ return "SS";
+ case PERF_REG_X86_DS:
+ return "DS";
+ case PERF_REG_X86_ES:
+ return "ES";
+ case PERF_REG_X86_FS:
+ return "FS";
+ case PERF_REG_X86_GS:
+ return "GS";
+ case PERF_REG_X86_R8:
+ return "R8";
+ case PERF_REG_X86_R9:
+ return "R9";
+ case PERF_REG_X86_R10:
+ return "R10";
+ case PERF_REG_X86_R11:
+ return "R11";
+ case PERF_REG_X86_R12:
+ return "R12";
+ case PERF_REG_X86_R13:
+ return "R13";
+ case PERF_REG_X86_R14:
+ return "R14";
+ case PERF_REG_X86_R15:
+ return "R15";
+
+#define XMM(x) \
+ case PERF_REG_X86_XMM ## x: \
+ case PERF_REG_X86_XMM ## x + 1: \
+ return "XMM" #x;
+ XMM(0)
+ XMM(1)
+ XMM(2)
+ XMM(3)
+ XMM(4)
+ XMM(5)
+ XMM(6)
+ XMM(7)
+ XMM(8)
+ XMM(9)
+ XMM(10)
+ XMM(11)
+ XMM(12)
+ XMM(13)
+ XMM(14)
+ XMM(15)
+#undef XMM
+ default:
+ return NULL;
+ }
+
+ return NULL;
+}
+
+const char *perf_reg_name(int id, const char *arch)
+{
+ const char *reg_name = NULL;
+
+ if (!strcmp(arch, "csky"))
+ reg_name = __perf_reg_name_csky(id);
+ else if (!strcmp(arch, "mips"))
+ reg_name = __perf_reg_name_mips(id);
+ else if (!strcmp(arch, "powerpc"))
+ reg_name = __perf_reg_name_powerpc(id);
+ else if (!strcmp(arch, "riscv"))
+ reg_name = __perf_reg_name_riscv(id);
+ else if (!strcmp(arch, "s390"))
+ reg_name = __perf_reg_name_s390(id);
+ else if (!strcmp(arch, "x86"))
+ reg_name = __perf_reg_name_x86(id);
+ else if (!strcmp(arch, "arm"))
+ reg_name = __perf_reg_name_arm(id);
+ else if (!strcmp(arch, "arm64"))
+ reg_name = __perf_reg_name_arm64(id);
+
+ return reg_name ?: "unknown";
+}
+
int perf_reg_value(u64 *valp, struct regs_dump *regs, int id)
{
int i, idx = 0;
diff --git a/tools/perf/util/perf_regs.h b/tools/perf/util/perf_regs.h
index eeac181ebccf..ce1127af05e4 100644
--- a/tools/perf/util/perf_regs.h
+++ b/tools/perf/util/perf_regs.h
@@ -11,8 +11,11 @@ struct sample_reg {
const char *name;
uint64_t mask;
};
-#define SMPL_REG(n, b) { .name = #n, .mask = 1ULL << (b) }
-#define SMPL_REG2(n, b) { .name = #n, .mask = 3ULL << (b) }
+
+#define SMPL_REG_MASK(b) (1ULL << (b))
+#define SMPL_REG(n, b) { .name = #n, .mask = SMPL_REG_MASK(b) }
+#define SMPL_REG2_MASK(b) (3ULL << (b))
+#define SMPL_REG2(n, b) { .name = #n, .mask = SMPL_REG2_MASK(b) }
#define SMPL_REG_END { .name = NULL }
enum {
@@ -31,22 +34,16 @@ extern const struct sample_reg sample_reg_masks[];
#define DWARF_MINIMAL_REGS ((1ULL << PERF_REG_IP) | (1ULL << PERF_REG_SP))
+const char *perf_reg_name(int id, const char *arch);
int perf_reg_value(u64 *valp, struct regs_dump *regs, int id);
-static inline const char *perf_reg_name(int id)
-{
- const char *reg_name = __perf_reg_name(id);
-
- return reg_name ?: "unknown";
-}
-
#else
#define PERF_REGS_MASK 0
#define PERF_REGS_MAX 0
#define DWARF_MINIMAL_REGS PERF_REGS_MASK
-static inline const char *perf_reg_name(int id __maybe_unused)
+static inline const char *perf_reg_name(int id __maybe_unused, const char *arch __maybe_unused)
{
return "unknown";
}
diff --git a/tools/perf/util/pmu.c b/tools/perf/util/pmu.c
index 8dfbba15aeb8..9a1c7e63e663 100644
--- a/tools/perf/util/pmu.c
+++ b/tools/perf/util/pmu.c
@@ -1998,7 +1998,8 @@ int perf_pmu__cpus_match(struct perf_pmu *pmu, struct perf_cpu_map *cpus,
{
struct perf_cpu_map *pmu_cpus = pmu->cpus;
struct perf_cpu_map *matched_cpus, *unmatched_cpus;
- int matched_nr = 0, unmatched_nr = 0;
+ struct perf_cpu cpu;
+ int i, matched_nr = 0, unmatched_nr = 0;
matched_cpus = perf_cpu_map__default_new();
if (!matched_cpus)
@@ -2010,14 +2011,11 @@ int perf_pmu__cpus_match(struct perf_pmu *pmu, struct perf_cpu_map *cpus,
return -1;
}
- for (int i = 0; i < cpus->nr; i++) {
- int cpu;
-
- cpu = perf_cpu_map__idx(pmu_cpus, cpus->map[i]);
- if (cpu == -1)
- unmatched_cpus->map[unmatched_nr++] = cpus->map[i];
+ perf_cpu_map__for_each_cpu(cpu, i, cpus) {
+ if (!perf_cpu_map__has(pmu_cpus, cpu))
+ unmatched_cpus->map[unmatched_nr++] = cpu;
else
- matched_cpus->map[matched_nr++] = cpus->map[i];
+ matched_cpus->map[matched_nr++] = cpu;
}
unmatched_cpus->nr = unmatched_nr;
diff --git a/tools/perf/util/probe-event.c b/tools/perf/util/probe-event.c
index b2a02c9ab8ea..062b5cbe67af 100644
--- a/tools/perf/util/probe-event.c
+++ b/tools/perf/util/probe-event.c
@@ -332,7 +332,7 @@ static int kernel_get_module_dso(const char *module, struct dso **pdso)
char module_name[128];
snprintf(module_name, sizeof(module_name), "[%s]", module);
- map = maps__find_by_name(&host_machine->kmaps, module_name);
+ map = maps__find_by_name(machine__kernel_maps(host_machine), module_name);
if (map) {
dso = map->dso;
goto found;
@@ -3083,6 +3083,9 @@ static int find_probe_trace_events_from_map(struct perf_probe_event *pev,
for (j = 0; j < num_matched_functions; j++) {
sym = syms[j];
+ if (sym->type != STT_FUNC)
+ continue;
+
/* There can be duplicated symbols in the map */
for (i = 0; i < j; i++)
if (sym->start == syms[i]->start) {
diff --git a/tools/perf/util/python-ext-sources b/tools/perf/util/python-ext-sources
index a685d20165f7..aa5156c2bcff 100644
--- a/tools/perf/util/python-ext-sources
+++ b/tools/perf/util/python-ext-sources
@@ -38,5 +38,6 @@ util/units.c
util/affinity.c
util/rwsem.c
util/hashmap.c
+util/perf_regs.c
util/pmu-hybrid.c
util/fncache.c
diff --git a/tools/perf/util/python.c b/tools/perf/util/python.c
index 7f782a31bda3..5be5fa2391de 100644
--- a/tools/perf/util/python.c
+++ b/tools/perf/util/python.c
@@ -58,10 +58,21 @@ int parse_callchain_record(const char *arg __maybe_unused,
}
/*
- * Add this one here not to drag util/env.c
+ * Add these not to drag util/env.c
*/
struct perf_env perf_env;
+const char *perf_env__cpuid(struct perf_env *env __maybe_unused)
+{
+ return NULL;
+}
+
+// This one is a bit easier, wouldn't drag too much, but leave it as a stub we need it here
+const char *perf_env__arch(struct perf_env *env __maybe_unused)
+{
+ return NULL;
+}
+
/*
* Add this one here not to drag util/stat-shadow.c
*/
@@ -428,6 +439,8 @@ tracepoint_field(struct pyrf_event *pe, struct tep_format_field *field)
offset = val;
len = offset >> 16;
offset &= 0xffff;
+ if (field->flags & TEP_FIELD_IS_RELATIVE)
+ offset += field->offset + field->size;
}
if (field->flags & TEP_FIELD_IS_STRING &&
is_printable_array(data + offset, len)) {
@@ -636,17 +649,17 @@ static Py_ssize_t pyrf_cpu_map__length(PyObject *obj)
{
struct pyrf_cpu_map *pcpus = (void *)obj;
- return pcpus->cpus->nr;
+ return perf_cpu_map__nr(pcpus->cpus);
}
static PyObject *pyrf_cpu_map__item(PyObject *obj, Py_ssize_t i)
{
struct pyrf_cpu_map *pcpus = (void *)obj;
- if (i >= pcpus->cpus->nr)
+ if (i >= perf_cpu_map__nr(pcpus->cpus))
return NULL;
- return Py_BuildValue("i", pcpus->cpus->map[i]);
+ return Py_BuildValue("i", perf_cpu_map__cpu(pcpus->cpus, i).cpu);
}
static PySequenceMethods pyrf_cpu_map__sequence_methods = {
@@ -1057,7 +1070,7 @@ static struct mmap *get_md(struct evlist *evlist, int cpu)
for (i = 0; i < evlist->core.nr_mmaps; i++) {
struct mmap *md = &evlist->mmap[i];
- if (md->core.cpu == cpu)
+ if (md->core.cpu.cpu == cpu)
return md;
}
@@ -1443,7 +1456,7 @@ error:
* Dummy, to avoid dragging all the test_attr infrastructure in the python
* binding.
*/
-void test_attr__open(struct perf_event_attr *attr, pid_t pid, int cpu,
+void test_attr__open(struct perf_event_attr *attr, pid_t pid, struct perf_cpu cpu,
int fd, int group_fd, unsigned long flags)
{
}
diff --git a/tools/perf/util/record.c b/tools/perf/util/record.c
index bff669b615ee..5b09ecbb05dc 100644
--- a/tools/perf/util/record.c
+++ b/tools/perf/util/record.c
@@ -106,7 +106,7 @@ void evlist__config(struct evlist *evlist, struct record_opts *opts, struct call
if (opts->group)
evlist__set_leader(evlist);
- if (evlist->core.cpus->map[0] < 0)
+ if (perf_cpu_map__cpu(evlist->core.user_requested_cpus, 0).cpu < 0)
opts->no_inherit = true;
use_comm_exec = perf_can_comm_exec();
@@ -229,7 +229,8 @@ bool evlist__can_select_event(struct evlist *evlist, const char *str)
{
struct evlist *temp_evlist;
struct evsel *evsel;
- int err, fd, cpu;
+ int err, fd;
+ struct perf_cpu cpu = { .cpu = 0 };
bool ret = false;
pid_t pid = -1;
@@ -243,17 +244,19 @@ bool evlist__can_select_event(struct evlist *evlist, const char *str)
evsel = evlist__last(temp_evlist);
- if (!evlist || perf_cpu_map__empty(evlist->core.cpus)) {
+ if (!evlist || perf_cpu_map__empty(evlist->core.user_requested_cpus)) {
struct perf_cpu_map *cpus = perf_cpu_map__new(NULL);
- cpu = cpus ? cpus->map[0] : 0;
+ if (cpus)
+ cpu = perf_cpu_map__cpu(cpus, 0);
+
perf_cpu_map__put(cpus);
} else {
- cpu = evlist->core.cpus->map[0];
+ cpu = perf_cpu_map__cpu(evlist->core.user_requested_cpus, 0);
}
while (1) {
- fd = sys_perf_event_open(&evsel->core.attr, pid, cpu, -1,
+ fd = sys_perf_event_open(&evsel->core.attr, pid, cpu.cpu, -1,
perf_event_open_cloexec_flag());
if (fd < 0) {
if (pid == -1 && errno == EACCES) {
diff --git a/tools/perf/util/record.h b/tools/perf/util/record.h
index ef6c2715fdd9..be9a957501f4 100644
--- a/tools/perf/util/record.h
+++ b/tools/perf/util/record.h
@@ -78,6 +78,8 @@ struct record_opts {
int ctl_fd_ack;
bool ctl_fd_close;
int synth;
+ int threads_spec;
+ const char *threads_user_spec;
};
extern const char * const *record_usage;
diff --git a/tools/perf/util/scripting-engines/trace-event-perl.c b/tools/perf/util/scripting-engines/trace-event-perl.c
index 32a721b3e9a5..a5d945415bbc 100644
--- a/tools/perf/util/scripting-engines/trace-event-perl.c
+++ b/tools/perf/util/scripting-engines/trace-event-perl.c
@@ -392,6 +392,8 @@ static void perl_process_tracepoint(struct perf_sample *sample,
if (field->flags & TEP_FIELD_IS_DYNAMIC) {
offset = *(int *)(data + field->offset);
offset &= 0xffff;
+ if (field->flags & TEP_FIELD_IS_RELATIVE)
+ offset += field->offset + field->size;
} else
offset = field->offset;
XPUSHs(sv_2mortal(newSVpv((char *)data + offset, 0)));
diff --git a/tools/perf/util/scripting-engines/trace-event-python.c b/tools/perf/util/scripting-engines/trace-event-python.c
index c0c010350bc2..adba01b7d9dd 100644
--- a/tools/perf/util/scripting-engines/trace-event-python.c
+++ b/tools/perf/util/scripting-engines/trace-event-python.c
@@ -36,6 +36,7 @@
#include "../debug.h"
#include "../dso.h"
#include "../callchain.h"
+#include "../env.h"
#include "../evsel.h"
#include "../event.h"
#include "../thread.h"
@@ -391,6 +392,18 @@ static const char *get_dsoname(struct map *map)
return dsoname;
}
+static unsigned long get_offset(struct symbol *sym, struct addr_location *al)
+{
+ unsigned long offset;
+
+ if (al->addr < sym->end)
+ offset = al->addr - sym->start;
+ else
+ offset = al->addr - al->map->start - sym->start;
+
+ return offset;
+}
+
static PyObject *python_process_callchain(struct perf_sample *sample,
struct evsel *evsel,
struct addr_location *al)
@@ -442,6 +455,25 @@ static PyObject *python_process_callchain(struct perf_sample *sample,
_PyUnicode_FromStringAndSize(node->ms.sym->name,
node->ms.sym->namelen));
pydict_set_item_string_decref(pyelem, "sym", pysym);
+
+ if (node->ms.map) {
+ struct map *map = node->ms.map;
+ struct addr_location node_al;
+ unsigned long offset;
+
+ node_al.addr = map->map_ip(map, node->ip);
+ node_al.map = map;
+ offset = get_offset(node->ms.sym, &node_al);
+
+ pydict_set_item_string_decref(
+ pyelem, "sym_off",
+ PyLong_FromUnsignedLongLong(offset));
+ }
+ if (node->srcline && strcmp(":0", node->srcline)) {
+ pydict_set_item_string_decref(
+ pyelem, "sym_srcline",
+ _PyUnicode_FromString(node->srcline));
+ }
}
if (node->ms.map) {
@@ -519,18 +551,6 @@ exit:
return pylist;
}
-static unsigned long get_offset(struct symbol *sym, struct addr_location *al)
-{
- unsigned long offset;
-
- if (al->addr < sym->end)
- offset = al->addr - sym->start;
- else
- offset = al->addr - al->map->start - sym->start;
-
- return offset;
-}
-
static int get_symoff(struct symbol *sym, struct addr_location *al,
bool print_off, char *bf, int size)
{
@@ -687,7 +707,7 @@ static void set_sample_datasrc_in_dict(PyObject *dict,
_PyUnicode_FromString(decode));
}
-static void regs_map(struct regs_dump *regs, uint64_t mask, char *bf, int size)
+static void regs_map(struct regs_dump *regs, uint64_t mask, const char *arch, char *bf, int size)
{
unsigned int i = 0, r;
int printed = 0;
@@ -702,7 +722,7 @@ static void regs_map(struct regs_dump *regs, uint64_t mask, char *bf, int size)
printed += scnprintf(bf + printed, size - printed,
"%5s:0x%" PRIx64 " ",
- perf_reg_name(r), val);
+ perf_reg_name(r, arch), val);
}
}
@@ -711,6 +731,7 @@ static void set_regs_in_dict(PyObject *dict,
struct evsel *evsel)
{
struct perf_event_attr *attr = &evsel->core.attr;
+ const char *arch = perf_env__arch(evsel__env(evsel));
/*
* Here value 28 is a constant size which can be used to print
@@ -722,24 +743,34 @@ static void set_regs_in_dict(PyObject *dict,
int size = __sw_hweight64(attr->sample_regs_intr) * 28;
char bf[size];
- regs_map(&sample->intr_regs, attr->sample_regs_intr, bf, sizeof(bf));
+ regs_map(&sample->intr_regs, attr->sample_regs_intr, arch, bf, sizeof(bf));
pydict_set_item_string_decref(dict, "iregs",
_PyUnicode_FromString(bf));
- regs_map(&sample->user_regs, attr->sample_regs_user, bf, sizeof(bf));
+ regs_map(&sample->user_regs, attr->sample_regs_user, arch, bf, sizeof(bf));
pydict_set_item_string_decref(dict, "uregs",
_PyUnicode_FromString(bf));
}
static void set_sym_in_dict(PyObject *dict, struct addr_location *al,
- const char *dso_field, const char *sym_field,
- const char *symoff_field)
+ const char *dso_field, const char *dso_bid_field,
+ const char *dso_map_start, const char *dso_map_end,
+ const char *sym_field, const char *symoff_field)
{
+ char sbuild_id[SBUILD_ID_SIZE];
+
if (al->map) {
pydict_set_item_string_decref(dict, dso_field,
_PyUnicode_FromString(al->map->dso->name));
+ build_id__sprintf(&al->map->dso->bid, sbuild_id);
+ pydict_set_item_string_decref(dict, dso_bid_field,
+ _PyUnicode_FromString(sbuild_id));
+ pydict_set_item_string_decref(dict, dso_map_start,
+ PyLong_FromUnsignedLong(al->map->start));
+ pydict_set_item_string_decref(dict, dso_map_end,
+ PyLong_FromUnsignedLong(al->map->end));
}
if (al->sym) {
pydict_set_item_string_decref(dict, sym_field,
@@ -819,7 +850,8 @@ static PyObject *get_perf_sample_dict(struct perf_sample *sample,
(const char *)sample->raw_data, sample->raw_size));
pydict_set_item_string_decref(dict, "comm",
_PyUnicode_FromString(thread__comm_str(al->thread)));
- set_sym_in_dict(dict, al, "dso", "symbol", "symoff");
+ set_sym_in_dict(dict, al, "dso", "dso_bid", "dso_map_start", "dso_map_end",
+ "symbol", "symoff");
pydict_set_item_string_decref(dict, "callchain", callchain);
@@ -835,7 +867,9 @@ static PyObject *get_perf_sample_dict(struct perf_sample *sample,
if (addr_al) {
pydict_set_item_string_decref(dict_sample, "addr_correlates_sym",
PyBool_FromLong(1));
- set_sym_in_dict(dict_sample, addr_al, "addr_dso", "addr_symbol", "addr_symoff");
+ set_sym_in_dict(dict_sample, addr_al, "addr_dso", "addr_dso_bid",
+ "addr_dso_map_start", "addr_dso_map_end",
+ "addr_symbol", "addr_symoff");
}
if (sample->flags)
@@ -942,6 +976,8 @@ static void python_process_tracepoint(struct perf_sample *sample,
offset = val;
len = offset >> 16;
offset &= 0xffff;
+ if (field->flags & TEP_FIELD_IS_RELATIVE)
+ offset += field->offset + field->size;
}
if (field->flags & TEP_FIELD_IS_STRING &&
is_printable_array(data + offset, len)) {
@@ -1212,7 +1248,7 @@ static void python_export_sample_table(struct db_export *dbe,
struct tables *tables = container_of(dbe, struct tables, dbe);
PyObject *t;
- t = tuple_new(24);
+ t = tuple_new(25);
tuple_set_d64(t, 0, es->db_id);
tuple_set_d64(t, 1, es->evsel->db_id);
@@ -1238,6 +1274,7 @@ static void python_export_sample_table(struct db_export *dbe,
tuple_set_d64(t, 21, es->call_path_id);
tuple_set_d64(t, 22, es->sample->insn_cnt);
tuple_set_d64(t, 23, es->sample->cyc_cnt);
+ tuple_set_s32(t, 24, es->sample->flags);
call_object(tables->sample_handler, t, "sample_table");
@@ -1553,7 +1590,7 @@ static void get_handler_name(char *str, size_t size,
}
static void
-process_stat(struct evsel *counter, int cpu, int thread, u64 tstamp,
+process_stat(struct evsel *counter, struct perf_cpu cpu, int thread, u64 tstamp,
struct perf_counts_values *count)
{
PyObject *handler, *t;
@@ -1573,7 +1610,7 @@ process_stat(struct evsel *counter, int cpu, int thread, u64 tstamp,
return;
}
- PyTuple_SetItem(t, n++, _PyLong_FromLong(cpu));
+ PyTuple_SetItem(t, n++, _PyLong_FromLong(cpu.cpu));
PyTuple_SetItem(t, n++, _PyLong_FromLong(thread));
tuple_set_u64(t, n++, tstamp);
@@ -1597,14 +1634,14 @@ static void python_process_stat(struct perf_stat_config *config,
int cpu, thread;
if (config->aggr_mode == AGGR_GLOBAL) {
- process_stat(counter, -1, -1, tstamp,
+ process_stat(counter, (struct perf_cpu){ .cpu = -1 }, -1, tstamp,
&counter->counts->aggr);
return;
}
for (thread = 0; thread < threads->nr; thread++) {
- for (cpu = 0; cpu < cpus->nr; cpu++) {
- process_stat(counter, cpus->map[cpu],
+ for (cpu = 0; cpu < perf_cpu_map__nr(cpus); cpu++) {
+ process_stat(counter, perf_cpu_map__cpu(cpus, cpu),
perf_thread_map__pid(threads, thread), tstamp,
perf_counts(counter->counts, cpu, thread));
}
@@ -2069,7 +2106,11 @@ static int python_generate_script(struct tep_handle *pevent, const char *outfile
fprintf(ofp, "\t\tfor node in common_callchain:");
fprintf(ofp, "\n\t\t\tif 'sym' in node:");
- fprintf(ofp, "\n\t\t\t\tprint(\"\\t[%%x] %%s\" %% (node['ip'], node['sym']['name']))");
+ fprintf(ofp, "\n\t\t\t\tprint(\"\t[%%x] %%s%%s%%s%%s\" %% (");
+ fprintf(ofp, "\n\t\t\t\t\tnode['ip'], node['sym']['name'],");
+ fprintf(ofp, "\n\t\t\t\t\t\"+0x{:x}\".format(node['sym_off']) if 'sym_off' in node else \"\",");
+ fprintf(ofp, "\n\t\t\t\t\t\" ({})\".format(node['dso']) if 'dso' in node else \"\",");
+ fprintf(ofp, "\n\t\t\t\t\t\" \" + node['sym_srcline'] if 'sym_srcline' in node else \"\"))");
fprintf(ofp, "\n\t\t\telse:");
fprintf(ofp, "\n\t\t\t\tprint(\"\t[%%x]\" %% (node['ip']))\n\n");
fprintf(ofp, "\t\tprint()\n\n");
diff --git a/tools/perf/util/session.c b/tools/perf/util/session.c
index d8857d1b6d7c..0aa818977d2b 100644
--- a/tools/perf/util/session.c
+++ b/tools/perf/util/session.c
@@ -15,6 +15,7 @@
#include "map_symbol.h"
#include "branch.h"
#include "debug.h"
+#include "env.h"
#include "evlist.h"
#include "evsel.h"
#include "memswap.h"
@@ -38,7 +39,8 @@
#ifdef HAVE_ZSTD_SUPPORT
static int perf_session__process_compressed_event(struct perf_session *session,
- union perf_event *event, u64 file_offset)
+ union perf_event *event, u64 file_offset,
+ const char *file_path)
{
void *src;
size_t decomp_size, src_size;
@@ -60,6 +62,7 @@ static int perf_session__process_compressed_event(struct perf_session *session,
}
decomp->file_pos = file_offset;
+ decomp->file_path = file_path;
decomp->mmap_len = mmap_len;
decomp->head = 0;
@@ -99,7 +102,8 @@ static int perf_session__process_compressed_event(struct perf_session *session,
static int perf_session__deliver_event(struct perf_session *session,
union perf_event *event,
struct perf_tool *tool,
- u64 file_offset);
+ u64 file_offset,
+ const char *file_path);
static int perf_session__open(struct perf_session *session, int repipe_fd)
{
@@ -181,7 +185,8 @@ static int ordered_events__deliver_event(struct ordered_events *oe,
ordered_events);
return perf_session__deliver_event(session, event->event,
- session->tool, event->file_offset);
+ session->tool, event->file_offset,
+ event->file_path);
}
struct perf_session *__perf_session__new(struct perf_data *data,
@@ -470,7 +475,8 @@ static int process_event_time_conv_stub(struct perf_session *perf_session __mayb
static int perf_session__process_compressed_event_stub(struct perf_session *session __maybe_unused,
union perf_event *event __maybe_unused,
- u64 file_offset __maybe_unused)
+ u64 file_offset __maybe_unused,
+ const char *file_path __maybe_unused)
{
dump_printf(": unhandled!\n");
return 0;
@@ -1071,9 +1077,9 @@ static int process_finished_round(struct perf_tool *tool __maybe_unused,
}
int perf_session__queue_event(struct perf_session *s, union perf_event *event,
- u64 timestamp, u64 file_offset)
+ u64 timestamp, u64 file_offset, const char *file_path)
{
- return ordered_events__queue(&s->ordered_events, event, timestamp, file_offset);
+ return ordered_events__queue(&s->ordered_events, event, timestamp, file_offset, file_path);
}
static void callchain__lbr_callstack_printf(struct perf_sample *sample)
@@ -1145,30 +1151,47 @@ static void branch_stack__printf(struct perf_sample *sample, bool callstack)
struct branch_entry *entries = perf_sample__branch_entries(sample);
uint64_t i;
- printf("%s: nr:%" PRIu64 "\n",
- !callstack ? "... branch stack" : "... branch callstack",
- sample->branch_stack->nr);
+ if (!callstack) {
+ printf("%s: nr:%" PRIu64 "\n", "... branch stack", sample->branch_stack->nr);
+ } else {
+ /* the reason of adding 1 to nr is because after expanding
+ * branch stack it generates nr + 1 callstack records. e.g.,
+ * B()->C()
+ * A()->B()
+ * the final callstack should be:
+ * C()
+ * B()
+ * A()
+ */
+ printf("%s: nr:%" PRIu64 "\n", "... branch callstack", sample->branch_stack->nr+1);
+ }
for (i = 0; i < sample->branch_stack->nr; i++) {
struct branch_entry *e = &entries[i];
if (!callstack) {
- printf("..... %2"PRIu64": %016" PRIx64 " -> %016" PRIx64 " %hu cycles %s%s%s%s %x\n",
+ printf("..... %2"PRIu64": %016" PRIx64 " -> %016" PRIx64 " %hu cycles %s%s%s%s %x %s\n",
i, e->from, e->to,
(unsigned short)e->flags.cycles,
e->flags.mispred ? "M" : " ",
e->flags.predicted ? "P" : " ",
e->flags.abort ? "A" : " ",
e->flags.in_tx ? "T" : " ",
- (unsigned)e->flags.reserved);
+ (unsigned)e->flags.reserved,
+ e->flags.type ? branch_type_name(e->flags.type) : "");
} else {
- printf("..... %2"PRIu64": %016" PRIx64 "\n",
- i, i > 0 ? e->from : e->to);
+ if (i == 0) {
+ printf("..... %2"PRIu64": %016" PRIx64 "\n"
+ "..... %2"PRIu64": %016" PRIx64 "\n",
+ i, e->to, i+1, e->from);
+ } else {
+ printf("..... %2"PRIu64": %016" PRIx64 "\n", i+1, e->from);
+ }
}
}
}
-static void regs_dump__printf(u64 mask, u64 *regs)
+static void regs_dump__printf(u64 mask, u64 *regs, const char *arch)
{
unsigned rid, i = 0;
@@ -1176,7 +1199,7 @@ static void regs_dump__printf(u64 mask, u64 *regs)
u64 val = regs[i++];
printf(".... %-5s 0x%016" PRIx64 "\n",
- perf_reg_name(rid), val);
+ perf_reg_name(rid, arch), val);
}
}
@@ -1194,7 +1217,7 @@ static inline const char *regs_dump_abi(struct regs_dump *d)
return regs_abi[d->abi];
}
-static void regs__printf(const char *type, struct regs_dump *regs)
+static void regs__printf(const char *type, struct regs_dump *regs, const char *arch)
{
u64 mask = regs->mask;
@@ -1203,23 +1226,23 @@ static void regs__printf(const char *type, struct regs_dump *regs)
mask,
regs_dump_abi(regs));
- regs_dump__printf(mask, regs->regs);
+ regs_dump__printf(mask, regs->regs, arch);
}
-static void regs_user__printf(struct perf_sample *sample)
+static void regs_user__printf(struct perf_sample *sample, const char *arch)
{
struct regs_dump *user_regs = &sample->user_regs;
if (user_regs->regs)
- regs__printf("user", user_regs);
+ regs__printf("user", user_regs, arch);
}
-static void regs_intr__printf(struct perf_sample *sample)
+static void regs_intr__printf(struct perf_sample *sample, const char *arch)
{
struct regs_dump *intr_regs = &sample->intr_regs;
if (intr_regs->regs)
- regs__printf("intr", intr_regs);
+ regs__printf("intr", intr_regs, arch);
}
static void stack_user__printf(struct stack_dump *dump)
@@ -1276,13 +1299,14 @@ static void sample_read__printf(struct perf_sample *sample, u64 read_format)
}
static void dump_event(struct evlist *evlist, union perf_event *event,
- u64 file_offset, struct perf_sample *sample)
+ u64 file_offset, struct perf_sample *sample,
+ const char *file_path)
{
if (!dump_trace)
return;
- printf("\n%#" PRIx64 " [%#x]: event: %d\n",
- file_offset, event->header.size, event->header.type);
+ printf("\n%#" PRIx64 "@%s [%#x]: event: %d\n",
+ file_offset, file_path, event->header.size, event->header.type);
trace_event(event);
if (event->header.type == PERF_RECORD_SAMPLE && evlist->trace_event_sample_raw)
@@ -1304,7 +1328,7 @@ char *get_page_size_name(u64 size, char *str)
}
static void dump_sample(struct evsel *evsel, union perf_event *event,
- struct perf_sample *sample)
+ struct perf_sample *sample, const char *arch)
{
u64 sample_type;
char str[PAGE_SIZE_NAME_LEN];
@@ -1325,10 +1349,10 @@ static void dump_sample(struct evsel *evsel, union perf_event *event,
branch_stack__printf(sample, evsel__has_branch_callstack(evsel));
if (sample_type & PERF_SAMPLE_REGS_USER)
- regs_user__printf(sample);
+ regs_user__printf(sample, arch);
if (sample_type & PERF_SAMPLE_REGS_INTR)
- regs_intr__printf(sample);
+ regs_intr__printf(sample, arch);
if (sample_type & PERF_SAMPLE_STACK_USER)
stack_user__printf(&sample->user_stack);
@@ -1402,6 +1426,13 @@ static struct machine *machines__find_for_cpumode(struct machines *machines,
else
pid = sample->pid;
+ /*
+ * Guest code machine is created as needed and does not use
+ * DEFAULT_GUEST_KERNEL_ID.
+ */
+ if (symbol_conf.guest_code)
+ return machines__findnew(machines, pid);
+
return machines__find_guest(machines, pid);
}
@@ -1485,12 +1516,13 @@ static int machines__deliver_event(struct machines *machines,
struct evlist *evlist,
union perf_event *event,
struct perf_sample *sample,
- struct perf_tool *tool, u64 file_offset)
+ struct perf_tool *tool, u64 file_offset,
+ const char *file_path)
{
struct evsel *evsel;
struct machine *machine;
- dump_event(evlist, event, file_offset, sample);
+ dump_event(evlist, event, file_offset, sample, file_path);
evsel = evlist__id2evsel(evlist, sample->id);
@@ -1502,11 +1534,12 @@ static int machines__deliver_event(struct machines *machines,
++evlist->stats.nr_unknown_id;
return 0;
}
- dump_sample(evsel, event, sample);
if (machine == NULL) {
++evlist->stats.nr_unprocessable_samples;
+ dump_sample(evsel, event, sample, perf_env__arch(NULL));
return 0;
}
+ dump_sample(evsel, event, sample, perf_env__arch(machine->env));
return evlist__deliver_sample(evlist, tool, event, sample, evsel, machine);
case PERF_RECORD_MMAP:
return tool->mmap(tool, event, sample, machine);
@@ -1571,7 +1604,8 @@ static int machines__deliver_event(struct machines *machines,
static int perf_session__deliver_event(struct perf_session *session,
union perf_event *event,
struct perf_tool *tool,
- u64 file_offset)
+ u64 file_offset,
+ const char *file_path)
{
struct perf_sample sample;
int ret = evlist__parse_sample(session->evlist, event, &sample);
@@ -1588,7 +1622,7 @@ static int perf_session__deliver_event(struct perf_session *session,
return 0;
ret = machines__deliver_event(&session->machines, session->evlist,
- event, &sample, tool, file_offset);
+ event, &sample, tool, file_offset, file_path);
if (dump_trace && sample.aux_sample.size)
auxtrace__dump_auxtrace_sample(session, &sample);
@@ -1598,7 +1632,8 @@ static int perf_session__deliver_event(struct perf_session *session,
static s64 perf_session__process_user_event(struct perf_session *session,
union perf_event *event,
- u64 file_offset)
+ u64 file_offset,
+ const char *file_path)
{
struct ordered_events *oe = &session->ordered_events;
struct perf_tool *tool = session->tool;
@@ -1608,7 +1643,7 @@ static s64 perf_session__process_user_event(struct perf_session *session,
if (event->header.type != PERF_RECORD_COMPRESSED ||
tool->compressed == perf_session__process_compressed_event_stub)
- dump_event(session->evlist, event, file_offset, &sample);
+ dump_event(session->evlist, event, file_offset, &sample, file_path);
/* These events are processed right away */
switch (event->header.type) {
@@ -1667,9 +1702,9 @@ static s64 perf_session__process_user_event(struct perf_session *session,
case PERF_RECORD_HEADER_FEATURE:
return tool->feature(session, event);
case PERF_RECORD_COMPRESSED:
- err = tool->compressed(session, event, file_offset);
+ err = tool->compressed(session, event, file_offset, file_path);
if (err)
- dump_event(session->evlist, event, file_offset, &sample);
+ dump_event(session->evlist, event, file_offset, &sample, file_path);
return err;
default:
return -EINVAL;
@@ -1686,9 +1721,9 @@ int perf_session__deliver_synth_event(struct perf_session *session,
events_stats__inc(&evlist->stats, event->header.type);
if (event->header.type >= PERF_RECORD_USER_TYPE_START)
- return perf_session__process_user_event(session, event, 0);
+ return perf_session__process_user_event(session, event, 0, NULL);
- return machines__deliver_event(&session->machines, evlist, event, sample, tool, 0);
+ return machines__deliver_event(&session->machines, evlist, event, sample, tool, 0, NULL);
}
static void event_swap(union perf_event *event, bool sample_id_all)
@@ -1785,7 +1820,8 @@ int perf_session__peek_events(struct perf_session *session, u64 offset,
}
static s64 perf_session__process_event(struct perf_session *session,
- union perf_event *event, u64 file_offset)
+ union perf_event *event, u64 file_offset,
+ const char *file_path)
{
struct evlist *evlist = session->evlist;
struct perf_tool *tool = session->tool;
@@ -1800,7 +1836,7 @@ static s64 perf_session__process_event(struct perf_session *session,
events_stats__inc(&evlist->stats, event->header.type);
if (event->header.type >= PERF_RECORD_USER_TYPE_START)
- return perf_session__process_user_event(session, event, file_offset);
+ return perf_session__process_user_event(session, event, file_offset, file_path);
if (tool->ordered_events) {
u64 timestamp = -1ULL;
@@ -1809,12 +1845,12 @@ static s64 perf_session__process_event(struct perf_session *session,
if (ret && ret != -1)
return ret;
- ret = perf_session__queue_event(session, event, timestamp, file_offset);
+ ret = perf_session__queue_event(session, event, timestamp, file_offset, file_path);
if (ret != -ETIME)
return ret;
}
- return perf_session__deliver_event(session, event, tool, file_offset);
+ return perf_session__deliver_event(session, event, tool, file_offset, file_path);
}
void perf_event_header__bswap(struct perf_event_header *hdr)
@@ -2041,7 +2077,7 @@ more:
}
}
- if ((skip = perf_session__process_event(session, event, head)) < 0) {
+ if ((skip = perf_session__process_event(session, event, head, "pipe")) < 0) {
pr_err("%#" PRIx64 " [%#x]: failed to process type: %d\n",
head, event->header.size, event->header.type);
err = -EINVAL;
@@ -2082,6 +2118,7 @@ prefetch_event(char *buf, u64 head, size_t mmap_size,
bool needs_swap, union perf_event *error)
{
union perf_event *event;
+ u16 event_size;
/*
* Ensure we have enough space remaining to read
@@ -2094,15 +2131,23 @@ prefetch_event(char *buf, u64 head, size_t mmap_size,
if (needs_swap)
perf_event_header__bswap(&event->header);
- if (head + event->header.size <= mmap_size)
+ event_size = event->header.size;
+ if (head + event_size <= mmap_size)
return event;
/* We're not fetching the event so swap back again */
if (needs_swap)
perf_event_header__bswap(&event->header);
- pr_debug("%s: head=%#" PRIx64 " event->header_size=%#x, mmap_size=%#zx:"
- " fuzzed or compressed perf.data?\n",__func__, head, event->header.size, mmap_size);
+ /* Check if the event fits into the next mmapped buf. */
+ if (event_size <= mmap_size - head % page_size) {
+ /* Remap buf and fetch again. */
+ return NULL;
+ }
+
+ /* Invalid input. Event size should never exceed mmap_size. */
+ pr_debug("%s: head=%#" PRIx64 " event->header.size=%#x, mmap_size=%#zx:"
+ " fuzzed or compressed perf.data?\n", __func__, head, event_size, mmap_size);
return error;
}
@@ -2138,7 +2183,8 @@ static int __perf_session__process_decomp_events(struct perf_session *session)
size = event->header.size;
if (size < sizeof(struct perf_event_header) ||
- (skip = perf_session__process_event(session, event, decomp->file_pos)) < 0) {
+ (skip = perf_session__process_event(session, event, decomp->file_pos,
+ decomp->file_path)) < 0) {
pr_err("%#" PRIx64 " [%#x]: failed to process type: %d\n",
decomp->file_pos + decomp->head, event->header.size, event->header.type);
return -EINVAL;
@@ -2169,10 +2215,12 @@ struct reader;
typedef s64 (*reader_cb_t)(struct perf_session *session,
union perf_event *event,
- u64 file_offset);
+ u64 file_offset,
+ const char *file_path);
struct reader {
int fd;
+ const char *path;
u64 data_size;
u64 data_offset;
reader_cb_t process;
@@ -2184,6 +2232,8 @@ struct reader {
u64 file_pos;
u64 file_offset;
u64 head;
+ u64 size;
+ bool done;
struct zstd_data zstd_data;
struct decomp_data decomp_data;
};
@@ -2290,7 +2340,7 @@ reader__read_event(struct reader *rd, struct perf_session *session,
skip = -EINVAL;
if (size < sizeof(struct perf_event_header) ||
- (skip = rd->process(session, event, rd->file_pos)) < 0) {
+ (skip = rd->process(session, event, rd->file_pos, rd->path)) < 0) {
pr_err("%#" PRIx64 " [%#x]: failed to process type: %d [%s]\n",
rd->file_offset + rd->head, event->header.size,
event->header.type, strerror(-skip));
@@ -2301,6 +2351,7 @@ reader__read_event(struct reader *rd, struct perf_session *session,
if (skip)
size += skip;
+ rd->size += size;
rd->head += size;
rd->file_pos += size;
@@ -2357,15 +2408,17 @@ out:
static s64 process_simple(struct perf_session *session,
union perf_event *event,
- u64 file_offset)
+ u64 file_offset,
+ const char *file_path)
{
- return perf_session__process_event(session, event, file_offset);
+ return perf_session__process_event(session, event, file_offset, file_path);
}
static int __perf_session__process_events(struct perf_session *session)
{
struct reader rd = {
.fd = perf_data__fd(session->data),
+ .path = session->data->file.path,
.data_size = session->header.data_size,
.data_offset = session->header.data_offset,
.process = process_simple,
@@ -2409,6 +2462,135 @@ out_err:
return err;
}
+/*
+ * Processing 2 MB of data from each reader in sequence,
+ * because that's the way the ordered events sorting works
+ * most efficiently.
+ */
+#define READER_MAX_SIZE (2 * 1024 * 1024)
+
+/*
+ * This function reads, merge and process directory data.
+ * It assumens the version 1 of directory data, where each
+ * data file holds per-cpu data, already sorted by kernel.
+ */
+static int __perf_session__process_dir_events(struct perf_session *session)
+{
+ struct perf_data *data = session->data;
+ struct perf_tool *tool = session->tool;
+ int i, ret, readers, nr_readers;
+ struct ui_progress prog;
+ u64 total_size = perf_data__size(session->data);
+ struct reader *rd;
+
+ perf_tool__fill_defaults(tool);
+
+ ui_progress__init_size(&prog, total_size, "Sorting events...");
+
+ nr_readers = 1;
+ for (i = 0; i < data->dir.nr; i++) {
+ if (data->dir.files[i].size)
+ nr_readers++;
+ }
+
+ rd = zalloc(nr_readers * sizeof(struct reader));
+ if (!rd)
+ return -ENOMEM;
+
+ rd[0] = (struct reader) {
+ .fd = perf_data__fd(session->data),
+ .path = session->data->file.path,
+ .data_size = session->header.data_size,
+ .data_offset = session->header.data_offset,
+ .process = process_simple,
+ .in_place_update = session->data->in_place_update,
+ };
+ ret = reader__init(&rd[0], NULL);
+ if (ret)
+ goto out_err;
+ ret = reader__mmap(&rd[0], session);
+ if (ret)
+ goto out_err;
+ readers = 1;
+
+ for (i = 0; i < data->dir.nr; i++) {
+ if (!data->dir.files[i].size)
+ continue;
+ rd[readers] = (struct reader) {
+ .fd = data->dir.files[i].fd,
+ .path = data->dir.files[i].path,
+ .data_size = data->dir.files[i].size,
+ .data_offset = 0,
+ .process = process_simple,
+ .in_place_update = session->data->in_place_update,
+ };
+ ret = reader__init(&rd[readers], NULL);
+ if (ret)
+ goto out_err;
+ ret = reader__mmap(&rd[readers], session);
+ if (ret)
+ goto out_err;
+ readers++;
+ }
+
+ i = 0;
+ while (readers) {
+ if (session_done())
+ break;
+
+ if (rd[i].done) {
+ i = (i + 1) % nr_readers;
+ continue;
+ }
+ if (reader__eof(&rd[i])) {
+ rd[i].done = true;
+ readers--;
+ continue;
+ }
+
+ session->active_decomp = &rd[i].decomp_data;
+ ret = reader__read_event(&rd[i], session, &prog);
+ if (ret < 0) {
+ goto out_err;
+ } else if (ret == READER_NODATA) {
+ ret = reader__mmap(&rd[i], session);
+ if (ret)
+ goto out_err;
+ }
+
+ if (rd[i].size >= READER_MAX_SIZE) {
+ rd[i].size = 0;
+ i = (i + 1) % nr_readers;
+ }
+ }
+
+ ret = ordered_events__flush(&session->ordered_events, OE_FLUSH__FINAL);
+ if (ret)
+ goto out_err;
+
+ ret = perf_session__flush_thread_stacks(session);
+out_err:
+ ui_progress__finish();
+
+ if (!tool->no_warn)
+ perf_session__warn_about_errors(session);
+
+ /*
+ * We may switching perf.data output, make ordered_events
+ * reusable.
+ */
+ ordered_events__reinit(&session->ordered_events);
+
+ session->one_mmap = false;
+
+ session->active_decomp = &session->decomp_data;
+ for (i = 0; i < nr_readers; i++)
+ reader__release_decomp(&rd[i]);
+ zfree(&rd);
+
+ return ret;
+}
+
int perf_session__process_events(struct perf_session *session)
{
if (perf_session__register_idle_thread(session) < 0)
@@ -2417,6 +2599,9 @@ int perf_session__process_events(struct perf_session *session)
if (perf_data__is_pipe(session->data))
return __perf_session__process_pipe_events(session);
+ if (perf_data__is_dir(session->data) && session->data->dir.nr)
+ return __perf_session__process_dir_events(session);
+
return __perf_session__process_events(session);
}
@@ -2536,16 +2721,16 @@ int perf_session__cpu_bitmap(struct perf_session *session,
return -1;
}
- for (i = 0; i < map->nr; i++) {
- int cpu = map->map[i];
+ for (i = 0; i < perf_cpu_map__nr(map); i++) {
+ struct perf_cpu cpu = perf_cpu_map__cpu(map, i);
- if (cpu >= nr_cpus) {
+ if (cpu.cpu >= nr_cpus) {
pr_err("Requested CPU %d too large. "
- "Consider raising MAX_NR_CPUS\n", cpu);
+ "Consider raising MAX_NR_CPUS\n", cpu.cpu);
goto out_delete_map;
}
- set_bit(cpu, cpu_bitmap);
+ set_bit(cpu.cpu, cpu_bitmap);
}
err = 0;
@@ -2597,7 +2782,7 @@ int perf_event__process_id_index(struct perf_session *session,
if (!sid)
return -ENOENT;
sid->idx = e->idx;
- sid->cpu = e->cpu;
+ sid->cpu.cpu = e->cpu;
sid->tid = e->tid;
}
return 0;
diff --git a/tools/perf/util/session.h b/tools/perf/util/session.h
index 46c854292ad6..34500a3da735 100644
--- a/tools/perf/util/session.h
+++ b/tools/perf/util/session.h
@@ -52,6 +52,7 @@ struct perf_session {
struct decomp {
struct decomp *next;
u64 file_pos;
+ const char *file_path;
size_t mmap_len;
u64 head;
size_t size;
@@ -87,7 +88,7 @@ int perf_session__peek_events(struct perf_session *session, u64 offset,
int perf_session__process_events(struct perf_session *session);
int perf_session__queue_event(struct perf_session *s, union perf_event *event,
- u64 timestamp, u64 file_offset);
+ u64 timestamp, u64 file_offset, const char *file_path);
void perf_tool__fill_defaults(struct perf_tool *tool);
diff --git a/tools/perf/util/setup.py b/tools/perf/util/setup.py
index 483f05004e68..c255a2c90cd6 100644
--- a/tools/perf/util/setup.py
+++ b/tools/perf/util/setup.py
@@ -1,12 +1,14 @@
-from os import getenv
+from os import getenv, path
from subprocess import Popen, PIPE
from re import sub
cc = getenv("CC")
cc_is_clang = b"clang version" in Popen([cc.split()[0], "-v"], stderr=PIPE).stderr.readline()
+src_feature_tests = getenv('srctree') + '/tools/build/feature'
def clang_has_option(option):
- return [o for o in Popen([cc, option], stderr=PIPE).stderr.readlines() if b"unknown argument" in o] == [ ]
+ cc_output = Popen([cc, option, path.join(src_feature_tests, "test-hello.c") ], stderr=PIPE).stderr.readlines()
+ return [o for o in cc_output if ((b"unknown argument" in o) or (b"is not supported" in o))] == [ ]
if cc_is_clang:
from distutils.sysconfig import get_config_vars
@@ -23,6 +25,8 @@ if cc_is_clang:
vars[var] = sub("-fstack-protector-strong", "", vars[var])
if not clang_has_option("-fno-semantic-interposition"):
vars[var] = sub("-fno-semantic-interposition", "", vars[var])
+ if not clang_has_option("-ffat-lto-objects"):
+ vars[var] = sub("-ffat-lto-objects", "", vars[var])
from distutils.core import setup, Extension
diff --git a/tools/perf/util/sideband_evlist.c b/tools/perf/util/sideband_evlist.c
index 748371ac22be..388846f17bc1 100644
--- a/tools/perf/util/sideband_evlist.c
+++ b/tools/perf/util/sideband_evlist.c
@@ -114,7 +114,8 @@ int evlist__start_sb_thread(struct evlist *evlist, struct target *target)
}
evlist__for_each_entry(evlist, counter) {
- if (evsel__open(counter, evlist->core.cpus, evlist->core.threads) < 0)
+ if (evsel__open(counter, evlist->core.user_requested_cpus,
+ evlist->core.threads) < 0)
goto out_delete_evlist;
}
diff --git a/tools/perf/util/smt.c b/tools/perf/util/smt.c
index 34f1b1b1176c..2b0a36ebf27a 100644
--- a/tools/perf/util/smt.c
+++ b/tools/perf/util/smt.c
@@ -5,6 +5,56 @@
#include "api/fs/fs.h"
#include "smt.h"
+/**
+ * hweight_str - Returns the number of bits set in str. Stops at first non-hex
+ * or ',' character.
+ */
+static int hweight_str(char *str)
+{
+ int result = 0;
+
+ while (*str) {
+ switch (*str++) {
+ case '0':
+ case ',':
+ break;
+ case '1':
+ case '2':
+ case '4':
+ case '8':
+ result++;
+ break;
+ case '3':
+ case '5':
+ case '6':
+ case '9':
+ case 'a':
+ case 'A':
+ case 'c':
+ case 'C':
+ result += 2;
+ break;
+ case '7':
+ case 'b':
+ case 'B':
+ case 'd':
+ case 'D':
+ case 'e':
+ case 'E':
+ result += 3;
+ break;
+ case 'f':
+ case 'F':
+ result += 4;
+ break;
+ default:
+ goto done;
+ }
+ }
+done:
+ return result;
+}
+
int smt_on(void)
{
static bool cached;
@@ -15,9 +65,12 @@ int smt_on(void)
if (cached)
return cached_result;
- if (sysfs__read_int("devices/system/cpu/smt/active", &cached_result) >= 0)
- goto done;
+ if (sysfs__read_int("devices/system/cpu/smt/active", &cached_result) >= 0) {
+ cached = true;
+ return cached_result;
+ }
+ cached_result = 0;
ncpu = sysconf(_SC_NPROCESSORS_CONF);
for (cpu = 0; cpu < ncpu; cpu++) {
unsigned long long siblings;
@@ -26,27 +79,21 @@ int smt_on(void)
char fn[256];
snprintf(fn, sizeof fn,
- "devices/system/cpu/cpu%d/topology/core_cpus", cpu);
+ "devices/system/cpu/cpu%d/topology/thread_siblings", cpu);
if (sysfs__read_str(fn, &str, &strlen) < 0) {
snprintf(fn, sizeof fn,
- "devices/system/cpu/cpu%d/topology/thread_siblings",
- cpu);
+ "devices/system/cpu/cpu%d/topology/core_cpus", cpu);
if (sysfs__read_str(fn, &str, &strlen) < 0)
continue;
}
/* Entry is hex, but does not have 0x, so need custom parser */
- siblings = strtoull(str, NULL, 16);
+ siblings = hweight_str(str);
free(str);
- if (hweight64(siblings) > 1) {
+ if (siblings > 1) {
cached_result = 1;
- cached = true;
break;
}
}
- if (!cached) {
- cached_result = 0;
-done:
- cached = true;
- }
+ cached = true;
return cached_result;
}
diff --git a/tools/perf/util/sort.c b/tools/perf/util/sort.c
index a111065b484e..6d5588e80935 100644
--- a/tools/perf/util/sort.c
+++ b/tools/perf/util/sort.c
@@ -37,7 +37,7 @@ const char default_parent_pattern[] = "^sys_|^do_page_fault";
const char *parent_pattern = default_parent_pattern;
const char *default_sort_order = "comm,dso,symbol";
const char default_branch_sort_order[] = "comm,dso_from,symbol_from,symbol_to,cycles";
-const char default_mem_sort_order[] = "local_weight,mem,sym,dso,symbol_daddr,dso_daddr,snoop,tlb,locked,blocked,local_ins_lat,p_stage_cyc";
+const char default_mem_sort_order[] = "local_weight,mem,sym,dso,symbol_daddr,dso_daddr,snoop,tlb,locked,blocked,local_ins_lat,local_p_stage_cyc";
const char default_top_sort_order[] = "dso,symbol";
const char default_diff_sort_order[] = "dso,symbol";
const char default_tracepoint_sort_order[] = "trace";
@@ -46,8 +46,8 @@ const char *field_order;
regex_t ignore_callees_regex;
int have_ignore_callees = 0;
enum sort_mode sort__mode = SORT_MODE__NORMAL;
-const char *dynamic_headers[] = {"local_ins_lat", "p_stage_cyc"};
-const char *arch_specific_sort_keys[] = {"p_stage_cyc"};
+static const char *const dynamic_headers[] = {"local_ins_lat", "ins_lat", "local_p_stage_cyc", "p_stage_cyc"};
+static const char *const arch_specific_sort_keys[] = {"local_p_stage_cyc", "p_stage_cyc"};
/*
* Replaces all occurrences of a char used with the:
@@ -915,7 +915,7 @@ static int hist_entry__sym_from_snprintf(struct hist_entry *he, char *bf,
struct addr_map_symbol *from = &he->branch_info->from;
return _hist_entry__sym_snprintf(&from->ms, from->al_addr,
- he->level, bf, size, width);
+ from->al_level, bf, size, width);
}
return repsep_snprintf(bf, size, "%-*.*s", width, width, "N/A");
@@ -928,7 +928,7 @@ static int hist_entry__sym_to_snprintf(struct hist_entry *he, char *bf,
struct addr_map_symbol *to = &he->branch_info->to;
return _hist_entry__sym_snprintf(&to->ms, to->al_addr,
- he->level, bf, size, width);
+ to->al_level, bf, size, width);
}
return repsep_snprintf(bf, size, "%-*.*s", width, width, "N/A");
@@ -990,6 +990,128 @@ struct sort_entry sort_sym_to = {
.se_width_idx = HISTC_SYMBOL_TO,
};
+static int _hist_entry__addr_snprintf(struct map_symbol *ms,
+ u64 ip, char level, char *bf, size_t size,
+ unsigned int width)
+{
+ struct symbol *sym = ms->sym;
+ struct map *map = ms->map;
+ size_t ret = 0, offs;
+
+ ret += repsep_snprintf(bf + ret, size - ret, "[%c] ", level);
+ if (sym && map) {
+ if (sym->type == STT_OBJECT) {
+ ret += repsep_snprintf(bf + ret, size - ret, "%s", sym->name);
+ ret += repsep_snprintf(bf + ret, size - ret, "+0x%llx",
+ ip - map->unmap_ip(map, sym->start));
+ } else {
+ ret += repsep_snprintf(bf + ret, size - ret, "%.*s",
+ width - ret,
+ sym->name);
+ offs = ip - sym->start;
+ if (offs)
+ ret += repsep_snprintf(bf + ret, size - ret, "+0x%llx", offs);
+ }
+ } else {
+ size_t len = BITS_PER_LONG / 4;
+ ret += repsep_snprintf(bf + ret, size - ret, "%-#.*llx",
+ len, ip);
+ }
+
+ return ret;
+}
+
+static int hist_entry__addr_from_snprintf(struct hist_entry *he, char *bf,
+ size_t size, unsigned int width)
+{
+ if (he->branch_info) {
+ struct addr_map_symbol *from = &he->branch_info->from;
+
+ return _hist_entry__addr_snprintf(&from->ms, from->al_addr,
+ he->level, bf, size, width);
+ }
+
+ return repsep_snprintf(bf, size, "%-*.*s", width, width, "N/A");
+}
+
+static int hist_entry__addr_to_snprintf(struct hist_entry *he, char *bf,
+ size_t size, unsigned int width)
+{
+ if (he->branch_info) {
+ struct addr_map_symbol *to = &he->branch_info->to;
+
+ return _hist_entry__addr_snprintf(&to->ms, to->al_addr,
+ he->level, bf, size, width);
+ }
+
+ return repsep_snprintf(bf, size, "%-*.*s", width, width, "N/A");
+}
+
+static int64_t
+sort__addr_from_cmp(struct hist_entry *left, struct hist_entry *right)
+{
+ struct addr_map_symbol *from_l;
+ struct addr_map_symbol *from_r;
+ int64_t ret;
+
+ if (!left->branch_info || !right->branch_info)
+ return cmp_null(left->branch_info, right->branch_info);
+
+ from_l = &left->branch_info->from;
+ from_r = &right->branch_info->from;
+
+ /*
+ * comparing symbol address alone is not enough since it's a
+ * relative address within a dso.
+ */
+ ret = _sort__dso_cmp(from_l->ms.map, from_r->ms.map);
+ if (ret != 0)
+ return ret;
+
+ return _sort__addr_cmp(from_l->addr, from_r->addr);
+}
+
+static int64_t
+sort__addr_to_cmp(struct hist_entry *left, struct hist_entry *right)
+{
+ struct addr_map_symbol *to_l;
+ struct addr_map_symbol *to_r;
+ int64_t ret;
+
+ if (!left->branch_info || !right->branch_info)
+ return cmp_null(left->branch_info, right->branch_info);
+
+ to_l = &left->branch_info->to;
+ to_r = &right->branch_info->to;
+
+ /*
+ * comparing symbol address alone is not enough since it's a
+ * relative address within a dso.
+ */
+ ret = _sort__dso_cmp(to_l->ms.map, to_r->ms.map);
+ if (ret != 0)
+ return ret;
+
+ return _sort__addr_cmp(to_l->addr, to_r->addr);
+}
+
+struct sort_entry sort_addr_from = {
+ .se_header = "Source Address",
+ .se_cmp = sort__addr_from_cmp,
+ .se_snprintf = hist_entry__addr_from_snprintf,
+ .se_filter = hist_entry__sym_from_filter, /* shared with sym_from */
+ .se_width_idx = HISTC_ADDR_FROM,
+};
+
+struct sort_entry sort_addr_to = {
+ .se_header = "Target Address",
+ .se_cmp = sort__addr_to_cmp,
+ .se_snprintf = hist_entry__addr_to_snprintf,
+ .se_filter = hist_entry__sym_to_filter, /* shared with sym_to */
+ .se_width_idx = HISTC_ADDR_TO,
+};
+
+
static int64_t
sort__mispredict_cmp(struct hist_entry *left, struct hist_entry *right)
{
@@ -1392,22 +1514,37 @@ struct sort_entry sort_global_ins_lat = {
};
static int64_t
-sort__global_p_stage_cyc_cmp(struct hist_entry *left, struct hist_entry *right)
+sort__p_stage_cyc_cmp(struct hist_entry *left, struct hist_entry *right)
{
return left->p_stage_cyc - right->p_stage_cyc;
}
+static int hist_entry__global_p_stage_cyc_snprintf(struct hist_entry *he, char *bf,
+ size_t size, unsigned int width)
+{
+ return repsep_snprintf(bf, size, "%-*u", width,
+ he->p_stage_cyc * he->stat.nr_events);
+}
+
+
static int hist_entry__p_stage_cyc_snprintf(struct hist_entry *he, char *bf,
size_t size, unsigned int width)
{
return repsep_snprintf(bf, size, "%-*u", width, he->p_stage_cyc);
}
-struct sort_entry sort_p_stage_cyc = {
- .se_header = "Pipeline Stage Cycle",
- .se_cmp = sort__global_p_stage_cyc_cmp,
+struct sort_entry sort_local_p_stage_cyc = {
+ .se_header = "Local Pipeline Stage Cycle",
+ .se_cmp = sort__p_stage_cyc_cmp,
.se_snprintf = hist_entry__p_stage_cyc_snprintf,
- .se_width_idx = HISTC_P_STAGE_CYC,
+ .se_width_idx = HISTC_LOCAL_P_STAGE_CYC,
+};
+
+struct sort_entry sort_global_p_stage_cyc = {
+ .se_header = "Pipeline Stage Cycle",
+ .se_cmp = sort__p_stage_cyc_cmp,
+ .se_snprintf = hist_entry__global_p_stage_cyc_snprintf,
+ .se_width_idx = HISTC_GLOBAL_P_STAGE_CYC,
};
struct sort_entry sort_mem_daddr_sym = {
@@ -1858,7 +1995,8 @@ static struct sort_dimension common_sort_dimensions[] = {
DIM(SORT_CODE_PAGE_SIZE, "code_page_size", sort_code_page_size),
DIM(SORT_LOCAL_INS_LAT, "local_ins_lat", sort_local_ins_lat),
DIM(SORT_GLOBAL_INS_LAT, "ins_lat", sort_global_ins_lat),
- DIM(SORT_PIPELINE_STAGE_CYC, "p_stage_cyc", sort_p_stage_cyc),
+ DIM(SORT_LOCAL_PIPELINE_STAGE_CYC, "local_p_stage_cyc", sort_local_p_stage_cyc),
+ DIM(SORT_GLOBAL_PIPELINE_STAGE_CYC, "p_stage_cyc", sort_global_p_stage_cyc),
};
#undef DIM
@@ -1877,6 +2015,8 @@ static struct sort_dimension bstack_sort_dimensions[] = {
DIM(SORT_SRCLINE_FROM, "srcline_from", sort_srcline_from),
DIM(SORT_SRCLINE_TO, "srcline_to", sort_srcline_to),
DIM(SORT_SYM_IPC, "ipc_lbr", sort_sym_ipc),
+ DIM(SORT_ADDR_FROM, "addr_from", sort_addr_from),
+ DIM(SORT_ADDR_TO, "addr_to", sort_addr_to),
};
#undef DIM
@@ -2365,6 +2505,8 @@ static int64_t __sort__hde_cmp(struct perf_hpp_fmt *fmt,
tep_read_number_field(field, a->raw_data, &dyn);
offset = dyn & 0xffff;
size = (dyn >> 16) & 0xffff;
+ if (field->flags & TEP_FIELD_IS_RELATIVE)
+ offset += field->offset + field->size;
/* record max width for output */
if (size > hde->dynamic_len)
@@ -3108,6 +3250,10 @@ static bool get_elide(int idx, FILE *output)
return __get_elide(symbol_conf.dso_from_list, "dso_from", output);
case HISTC_DSO_TO:
return __get_elide(symbol_conf.dso_to_list, "dso_to", output);
+ case HISTC_ADDR_FROM:
+ return __get_elide(symbol_conf.sym_from_list, "addr_from", output);
+ case HISTC_ADDR_TO:
+ return __get_elide(symbol_conf.sym_to_list, "addr_to", output);
default:
break;
}
diff --git a/tools/perf/util/sort.h b/tools/perf/util/sort.h
index 7b7145501933..2ddc00d1c464 100644
--- a/tools/perf/util/sort.h
+++ b/tools/perf/util/sort.h
@@ -235,7 +235,8 @@ enum sort_type {
SORT_CODE_PAGE_SIZE,
SORT_LOCAL_INS_LAT,
SORT_GLOBAL_INS_LAT,
- SORT_PIPELINE_STAGE_CYC,
+ SORT_LOCAL_PIPELINE_STAGE_CYC,
+ SORT_GLOBAL_PIPELINE_STAGE_CYC,
/* branch stack specific sort keys */
__SORT_BRANCH_STACK,
@@ -250,6 +251,8 @@ enum sort_type {
SORT_SRCLINE_FROM,
SORT_SRCLINE_TO,
SORT_SYM_IPC,
+ SORT_ADDR_FROM,
+ SORT_ADDR_TO,
/* memory mode specific sort keys */
__SORT_MEMORY_MODE,
diff --git a/tools/perf/util/stat-display.c b/tools/perf/util/stat-display.c
index 588601000f3f..606f09b09226 100644
--- a/tools/perf/util/stat-display.c
+++ b/tools/perf/util/stat-display.c
@@ -4,6 +4,7 @@
#include <linux/string.h>
#include <linux/time64.h>
#include <math.h>
+#include <perf/cpumap.h>
#include "color.h"
#include "counts.h"
#include "evlist.h"
@@ -120,11 +121,10 @@ static void aggr_printout(struct perf_stat_config *config,
id.die,
config->csv_output ? 0 : -3,
id.core, config->csv_sep);
- } else if (id.core > -1) {
+ } else if (id.cpu.cpu > -1) {
fprintf(config->output, "CPU%*d%s",
config->csv_output ? 0 : -7,
- evsel__cpus(evsel)->map[id.core],
- config->csv_sep);
+ id.cpu.cpu, config->csv_sep);
}
break;
case AGGR_THREAD:
@@ -327,26 +327,24 @@ static void print_metric_header(struct perf_stat_config *config,
fprintf(os->fh, "%*s ", config->metric_only_len, unit);
}
-static int first_shadow_cpu(struct perf_stat_config *config,
- struct evsel *evsel, struct aggr_cpu_id id)
+static int first_shadow_cpu_map_idx(struct perf_stat_config *config,
+ struct evsel *evsel, const struct aggr_cpu_id *id)
{
- struct evlist *evlist = evsel->evlist;
- int i;
+ struct perf_cpu_map *cpus = evsel__cpus(evsel);
+ struct perf_cpu cpu;
+ int idx;
if (config->aggr_mode == AGGR_NONE)
- return id.core;
+ return perf_cpu_map__idx(cpus, id->cpu);
if (!config->aggr_get_id)
return 0;
- for (i = 0; i < evsel__nr_cpus(evsel); i++) {
- int cpu2 = evsel__cpus(evsel)->map[i];
+ perf_cpu_map__for_each_cpu(cpu, idx, cpus) {
+ struct aggr_cpu_id cpu_id = config->aggr_get_id(config, cpu);
- if (cpu_map__compare_aggr_cpu_id(
- config->aggr_get_id(config, evlist->core.cpus, cpu2),
- id)) {
- return cpu2;
- }
+ if (aggr_cpu_id__equal(&cpu_id, id))
+ return idx;
}
return 0;
}
@@ -505,7 +503,7 @@ static void printout(struct perf_stat_config *config, struct aggr_cpu_id id, int
}
perf_stat__print_shadow_stats(config, counter, uval,
- first_shadow_cpu(config, counter, id),
+ first_shadow_cpu_map_idx(config, counter, &id),
&out, &config->metric_events, st);
if (!config->csv_output && !config->metric_only) {
print_noise(config, counter, noise);
@@ -516,23 +514,26 @@ static void printout(struct perf_stat_config *config, struct aggr_cpu_id id, int
static void aggr_update_shadow(struct perf_stat_config *config,
struct evlist *evlist)
{
- int cpu, s;
+ int idx, s;
+ struct perf_cpu cpu;
struct aggr_cpu_id s2, id;
u64 val;
struct evsel *counter;
+ struct perf_cpu_map *cpus;
for (s = 0; s < config->aggr_map->nr; s++) {
id = config->aggr_map->map[s];
evlist__for_each_entry(evlist, counter) {
+ cpus = evsel__cpus(counter);
val = 0;
- for (cpu = 0; cpu < evsel__nr_cpus(counter); cpu++) {
- s2 = config->aggr_get_id(config, evlist->core.cpus, cpu);
- if (!cpu_map__compare_aggr_cpu_id(s2, id))
+ perf_cpu_map__for_each_cpu(cpu, idx, cpus) {
+ s2 = config->aggr_get_id(config, cpu);
+ if (!aggr_cpu_id__equal(&s2, &id))
continue;
- val += perf_counts(counter->counts, cpu, 0)->val;
+ val += perf_counts(counter->counts, idx, 0)->val;
}
perf_stat__update_shadow_stats(counter, val,
- first_shadow_cpu(config, counter, id),
+ first_shadow_cpu_map_idx(config, counter, &id),
&rt_stat);
}
}
@@ -584,15 +585,16 @@ static void collect_all_aliases(struct perf_stat_config *config, struct evsel *c
alias = list_prepare_entry(counter, &(evlist->core.entries), core.node);
list_for_each_entry_continue (alias, &evlist->core.entries, core.node) {
- if (strcmp(evsel__name(alias), evsel__name(counter)) ||
- alias->scale != counter->scale ||
- alias->cgrp != counter->cgrp ||
- strcmp(alias->unit, counter->unit) ||
- evsel__is_clock(alias) != evsel__is_clock(counter) ||
- !strcmp(alias->pmu_name, counter->pmu_name))
- break;
- alias->merged_stat = true;
- cb(config, alias, data, false);
+ /* Merge events with the same name, etc. but on different PMUs. */
+ if (!strcmp(evsel__name(alias), evsel__name(counter)) &&
+ alias->scale == counter->scale &&
+ alias->cgrp == counter->cgrp &&
+ !strcmp(alias->unit, counter->unit) &&
+ evsel__is_clock(alias) == evsel__is_clock(counter) &&
+ strcmp(alias->pmu_name, counter->pmu_name)) {
+ alias->merged_stat = true;
+ cb(config, alias, data, false);
+ }
}
}
@@ -608,6 +610,19 @@ static bool hybrid_uniquify(struct evsel *evsel)
return perf_pmu__has_hybrid() && !is_uncore(evsel);
}
+static bool hybrid_merge(struct evsel *counter, struct perf_stat_config *config,
+ bool check)
+{
+ if (hybrid_uniquify(counter)) {
+ if (check)
+ return config && config->hybrid_merge;
+ else
+ return config && !config->hybrid_merge;
+ }
+
+ return false;
+}
+
static bool collect_data(struct perf_stat_config *config, struct evsel *counter,
void (*cb)(struct perf_stat_config *config, struct evsel *counter, void *data,
bool first),
@@ -616,9 +631,9 @@ static bool collect_data(struct perf_stat_config *config, struct evsel *counter,
if (counter->merged_stat)
return false;
cb(config, counter, data, true);
- if (config->no_merge || hybrid_uniquify(counter))
+ if (config->no_merge || hybrid_merge(counter, config, false))
uniquify_event_name(counter);
- else if (counter->auto_merge_stats)
+ else if (counter->auto_merge_stats || hybrid_merge(counter, config, true))
collect_all_aliases(config, counter, cb, data);
return true;
}
@@ -627,25 +642,28 @@ struct aggr_data {
u64 ena, run, val;
struct aggr_cpu_id id;
int nr;
- int cpu;
+ int cpu_map_idx;
};
static void aggr_cb(struct perf_stat_config *config,
struct evsel *counter, void *data, bool first)
{
struct aggr_data *ad = data;
- int cpu;
+ int idx;
+ struct perf_cpu cpu;
+ struct perf_cpu_map *cpus;
struct aggr_cpu_id s2;
- for (cpu = 0; cpu < evsel__nr_cpus(counter); cpu++) {
+ cpus = evsel__cpus(counter);
+ perf_cpu_map__for_each_cpu(cpu, idx, cpus) {
struct perf_counts_values *counts;
- s2 = config->aggr_get_id(config, evsel__cpus(counter), cpu);
- if (!cpu_map__compare_aggr_cpu_id(s2, ad->id))
+ s2 = config->aggr_get_id(config, cpu);
+ if (!aggr_cpu_id__equal(&s2, &ad->id))
continue;
if (first)
ad->nr++;
- counts = perf_counts(counter->counts, cpu, 0);
+ counts = perf_counts(counter->counts, idx, 0);
/*
* When any result is bad, make them all to give
* consistent output in interval mode.
@@ -665,7 +683,7 @@ static void aggr_cb(struct perf_stat_config *config,
static void print_counter_aggrdata(struct perf_stat_config *config,
struct evsel *counter, int s,
char *prefix, bool metric_only,
- bool *first, int cpu)
+ bool *first, struct perf_cpu cpu)
{
struct aggr_data ad;
FILE *output = config->output;
@@ -695,10 +713,9 @@ static void print_counter_aggrdata(struct perf_stat_config *config,
fprintf(output, "%s", prefix);
uval = val * counter->scale;
- if (cpu != -1) {
- id = cpu_map__empty_aggr_cpu_id();
- id.core = cpu;
- }
+ if (cpu.cpu != -1)
+ id = aggr_cpu_id__cpu(cpu, /*data=*/NULL);
+
printout(config, id, nr, counter, uval,
prefix, run, ena, 1.0, &rt_stat);
if (!metric_only)
@@ -731,8 +748,8 @@ static void print_aggr(struct perf_stat_config *config,
first = true;
evlist__for_each_entry(evlist, counter) {
print_counter_aggrdata(config, counter, s,
- prefix, metric_only,
- &first, -1);
+ prefix, metric_only,
+ &first, (struct perf_cpu){ .cpu = -1 });
}
if (metric_only)
fputc('\n', output);
@@ -747,11 +764,11 @@ static int cmp_val(const void *a, const void *b)
static struct perf_aggr_thread_value *sort_aggr_thread(
struct evsel *counter,
- int nthreads, int ncpus,
int *ret,
struct target *_target)
{
- int cpu, thread, i = 0;
+ int nthreads = perf_thread_map__nr(counter->core.threads);
+ int i = 0;
double uval;
struct perf_aggr_thread_value *buf;
@@ -759,13 +776,17 @@ static struct perf_aggr_thread_value *sort_aggr_thread(
if (!buf)
return NULL;
- for (thread = 0; thread < nthreads; thread++) {
+ for (int thread = 0; thread < nthreads; thread++) {
+ int idx;
u64 ena = 0, run = 0, val = 0;
- for (cpu = 0; cpu < ncpus; cpu++) {
- val += perf_counts(counter->counts, cpu, thread)->val;
- ena += perf_counts(counter->counts, cpu, thread)->ena;
- run += perf_counts(counter->counts, cpu, thread)->run;
+ perf_cpu_map__for_each_idx(idx, evsel__cpus(counter)) {
+ struct perf_counts_values *counts =
+ perf_counts(counter->counts, idx, thread);
+
+ val += counts->val;
+ ena += counts->ena;
+ run += counts->run;
}
uval = val * counter->scale;
@@ -778,7 +799,7 @@ static struct perf_aggr_thread_value *sort_aggr_thread(
continue;
buf[i].counter = counter;
- buf[i].id = cpu_map__empty_aggr_cpu_id();
+ buf[i].id = aggr_cpu_id__empty();
buf[i].id.thread = thread;
buf[i].uval = uval;
buf[i].val = val;
@@ -800,13 +821,11 @@ static void print_aggr_thread(struct perf_stat_config *config,
struct evsel *counter, char *prefix)
{
FILE *output = config->output;
- int nthreads = perf_thread_map__nr(counter->core.threads);
- int ncpus = perf_cpu_map__nr(counter->core.cpus);
int thread, sorted_threads;
struct aggr_cpu_id id;
struct perf_aggr_thread_value *buf;
- buf = sort_aggr_thread(counter, nthreads, ncpus, &sorted_threads, _target);
+ buf = sort_aggr_thread(counter, &sorted_threads, _target);
if (!buf) {
perror("cannot sort aggr thread");
return;
@@ -866,7 +885,7 @@ static void print_counter_aggr(struct perf_stat_config *config,
fprintf(output, "%s", prefix);
uval = cd.avg * counter->scale;
- printout(config, cpu_map__empty_aggr_cpu_id(), 0, counter, uval, prefix, cd.avg_running,
+ printout(config, aggr_cpu_id__empty(), 0, counter, uval, prefix, cd.avg_running,
cd.avg_enabled, cd.avg, &rt_stat);
if (!metric_only)
fprintf(output, "\n");
@@ -878,9 +897,9 @@ static void counter_cb(struct perf_stat_config *config __maybe_unused,
{
struct aggr_data *ad = data;
- ad->val += perf_counts(counter->counts, ad->cpu, 0)->val;
- ad->ena += perf_counts(counter->counts, ad->cpu, 0)->ena;
- ad->run += perf_counts(counter->counts, ad->cpu, 0)->run;
+ ad->val += perf_counts(counter->counts, ad->cpu_map_idx, 0)->val;
+ ad->ena += perf_counts(counter->counts, ad->cpu_map_idx, 0)->ena;
+ ad->run += perf_counts(counter->counts, ad->cpu_map_idx, 0)->run;
}
/*
@@ -893,11 +912,12 @@ static void print_counter(struct perf_stat_config *config,
FILE *output = config->output;
u64 ena, run, val;
double uval;
- int cpu;
+ int idx;
+ struct perf_cpu cpu;
struct aggr_cpu_id id;
- for (cpu = 0; cpu < evsel__nr_cpus(counter); cpu++) {
- struct aggr_data ad = { .cpu = cpu };
+ perf_cpu_map__for_each_cpu(cpu, idx, evsel__cpus(counter)) {
+ struct aggr_data ad = { .cpu_map_idx = idx };
if (!collect_data(config, counter, counter_cb, &ad))
return;
@@ -909,8 +929,7 @@ static void print_counter(struct perf_stat_config *config,
fprintf(output, "%s", prefix);
uval = val * counter->scale;
- id = cpu_map__empty_aggr_cpu_id();
- id.core = cpu;
+ id = aggr_cpu_id__cpu(cpu, /*data=*/NULL);
printout(config, id, 0, counter, uval, prefix,
run, ena, 1.0, &rt_stat);
@@ -922,35 +941,39 @@ static void print_no_aggr_metric(struct perf_stat_config *config,
struct evlist *evlist,
char *prefix)
{
- int cpu;
- int nrcpus = 0;
- struct evsel *counter;
- u64 ena, run, val;
- double uval;
- struct aggr_cpu_id id;
+ int all_idx;
+ struct perf_cpu cpu;
- nrcpus = evlist->core.cpus->nr;
- for (cpu = 0; cpu < nrcpus; cpu++) {
+ perf_cpu_map__for_each_cpu(cpu, all_idx, evlist->core.user_requested_cpus) {
+ struct evsel *counter;
bool first = true;
- if (prefix)
- fputs(prefix, config->output);
evlist__for_each_entry(evlist, counter) {
- id = cpu_map__empty_aggr_cpu_id();
- id.core = cpu;
+ u64 ena, run, val;
+ double uval;
+ struct aggr_cpu_id id;
+ int counter_idx = perf_cpu_map__idx(evsel__cpus(counter), cpu);
+
+ if (counter_idx < 0)
+ continue;
+
+ id = aggr_cpu_id__cpu(cpu, /*data=*/NULL);
if (first) {
+ if (prefix)
+ fputs(prefix, config->output);
aggr_printout(config, counter, id, 0);
first = false;
}
- val = perf_counts(counter->counts, cpu, 0)->val;
- ena = perf_counts(counter->counts, cpu, 0)->ena;
- run = perf_counts(counter->counts, cpu, 0)->run;
+ val = perf_counts(counter->counts, counter_idx, 0)->val;
+ ena = perf_counts(counter->counts, counter_idx, 0)->ena;
+ run = perf_counts(counter->counts, counter_idx, 0)->run;
uval = val * counter->scale;
printout(config, id, 0, counter, uval, prefix,
run, ena, 1.0, &rt_stat);
}
- fputc('\n', config->output);
+ if (!first)
+ fputc('\n', config->output);
}
}
@@ -1208,19 +1231,23 @@ static void print_percore_thread(struct perf_stat_config *config,
{
int s;
struct aggr_cpu_id s2, id;
+ struct perf_cpu_map *cpus;
bool first = true;
+ int idx;
+ struct perf_cpu cpu;
- for (int i = 0; i < evsel__nr_cpus(counter); i++) {
- s2 = config->aggr_get_id(config, evsel__cpus(counter), i);
+ cpus = evsel__cpus(counter);
+ perf_cpu_map__for_each_cpu(cpu, idx, cpus) {
+ s2 = config->aggr_get_id(config, cpu);
for (s = 0; s < config->aggr_map->nr; s++) {
id = config->aggr_map->map[s];
- if (cpu_map__compare_aggr_cpu_id(s2, id))
+ if (aggr_cpu_id__equal(&s2, &id))
break;
}
print_counter_aggrdata(config, counter, s,
prefix, false,
- &first, i);
+ &first, cpu);
}
}
@@ -1243,8 +1270,8 @@ static void print_percore(struct perf_stat_config *config,
fprintf(output, "%s", prefix);
print_counter_aggrdata(config, counter, s,
- prefix, metric_only,
- &first, -1);
+ prefix, metric_only,
+ &first, (struct perf_cpu){ .cpu = -1 });
}
if (metric_only)
diff --git a/tools/perf/util/stat-shadow.c b/tools/perf/util/stat-shadow.c
index 5c7308efa768..979c8cb918f7 100644
--- a/tools/perf/util/stat-shadow.c
+++ b/tools/perf/util/stat-shadow.c
@@ -26,13 +26,14 @@
struct runtime_stat rt_stat;
struct stats walltime_nsecs_stats;
+struct rusage_stats ru_stats;
struct saved_value {
struct rb_node rb_node;
struct evsel *evsel;
enum stat_type type;
int ctx;
- int cpu;
+ int cpu_map_idx;
struct cgroup *cgrp;
struct runtime_stat *stat;
struct stats stats;
@@ -47,8 +48,8 @@ static int saved_value_cmp(struct rb_node *rb_node, const void *entry)
rb_node);
const struct saved_value *b = entry;
- if (a->cpu != b->cpu)
- return a->cpu - b->cpu;
+ if (a->cpu_map_idx != b->cpu_map_idx)
+ return a->cpu_map_idx - b->cpu_map_idx;
/*
* Previously the rbtree was used to link generic metrics.
@@ -105,7 +106,7 @@ static void saved_value_delete(struct rblist *rblist __maybe_unused,
}
static struct saved_value *saved_value_lookup(struct evsel *evsel,
- int cpu,
+ int cpu_map_idx,
bool create,
enum stat_type type,
int ctx,
@@ -115,7 +116,7 @@ static struct saved_value *saved_value_lookup(struct evsel *evsel,
struct rblist *rblist;
struct rb_node *nd;
struct saved_value dm = {
- .cpu = cpu,
+ .cpu_map_idx = cpu_map_idx,
.evsel = evsel,
.type = type,
.ctx = ctx,
@@ -199,6 +200,7 @@ void perf_stat__reset_shadow_stats(void)
{
reset_stat(&rt_stat);
memset(&walltime_nsecs_stats, 0, sizeof(walltime_nsecs_stats));
+ memset(&ru_stats, 0, sizeof(ru_stats));
}
void perf_stat__reset_shadow_per_stat(struct runtime_stat *st)
@@ -213,10 +215,10 @@ struct runtime_stat_data {
static void update_runtime_stat(struct runtime_stat *st,
enum stat_type type,
- int cpu, u64 count,
+ int cpu_map_idx, u64 count,
struct runtime_stat_data *rsd)
{
- struct saved_value *v = saved_value_lookup(NULL, cpu, true, type,
+ struct saved_value *v = saved_value_lookup(NULL, cpu_map_idx, true, type,
rsd->ctx, st, rsd->cgrp);
if (v)
@@ -229,7 +231,7 @@ static void update_runtime_stat(struct runtime_stat *st,
* instruction rates, etc:
*/
void perf_stat__update_shadow_stats(struct evsel *counter, u64 count,
- int cpu, struct runtime_stat *st)
+ int cpu_map_idx, struct runtime_stat *st)
{
u64 count_ns = count;
struct saved_value *v;
@@ -241,88 +243,88 @@ void perf_stat__update_shadow_stats(struct evsel *counter, u64 count,
count *= counter->scale;
if (evsel__is_clock(counter))
- update_runtime_stat(st, STAT_NSECS, cpu, count_ns, &rsd);
+ update_runtime_stat(st, STAT_NSECS, cpu_map_idx, count_ns, &rsd);
else if (evsel__match(counter, HARDWARE, HW_CPU_CYCLES))
- update_runtime_stat(st, STAT_CYCLES, cpu, count, &rsd);
+ update_runtime_stat(st, STAT_CYCLES, cpu_map_idx, count, &rsd);
else if (perf_stat_evsel__is(counter, CYCLES_IN_TX))
- update_runtime_stat(st, STAT_CYCLES_IN_TX, cpu, count, &rsd);
+ update_runtime_stat(st, STAT_CYCLES_IN_TX, cpu_map_idx, count, &rsd);
else if (perf_stat_evsel__is(counter, TRANSACTION_START))
- update_runtime_stat(st, STAT_TRANSACTION, cpu, count, &rsd);
+ update_runtime_stat(st, STAT_TRANSACTION, cpu_map_idx, count, &rsd);
else if (perf_stat_evsel__is(counter, ELISION_START))
- update_runtime_stat(st, STAT_ELISION, cpu, count, &rsd);
+ update_runtime_stat(st, STAT_ELISION, cpu_map_idx, count, &rsd);
else if (perf_stat_evsel__is(counter, TOPDOWN_TOTAL_SLOTS))
update_runtime_stat(st, STAT_TOPDOWN_TOTAL_SLOTS,
- cpu, count, &rsd);
+ cpu_map_idx, count, &rsd);
else if (perf_stat_evsel__is(counter, TOPDOWN_SLOTS_ISSUED))
update_runtime_stat(st, STAT_TOPDOWN_SLOTS_ISSUED,
- cpu, count, &rsd);
+ cpu_map_idx, count, &rsd);
else if (perf_stat_evsel__is(counter, TOPDOWN_SLOTS_RETIRED))
update_runtime_stat(st, STAT_TOPDOWN_SLOTS_RETIRED,
- cpu, count, &rsd);
+ cpu_map_idx, count, &rsd);
else if (perf_stat_evsel__is(counter, TOPDOWN_FETCH_BUBBLES))
update_runtime_stat(st, STAT_TOPDOWN_FETCH_BUBBLES,
- cpu, count, &rsd);
+ cpu_map_idx, count, &rsd);
else if (perf_stat_evsel__is(counter, TOPDOWN_RECOVERY_BUBBLES))
update_runtime_stat(st, STAT_TOPDOWN_RECOVERY_BUBBLES,
- cpu, count, &rsd);
+ cpu_map_idx, count, &rsd);
else if (perf_stat_evsel__is(counter, TOPDOWN_RETIRING))
update_runtime_stat(st, STAT_TOPDOWN_RETIRING,
- cpu, count, &rsd);
+ cpu_map_idx, count, &rsd);
else if (perf_stat_evsel__is(counter, TOPDOWN_BAD_SPEC))
update_runtime_stat(st, STAT_TOPDOWN_BAD_SPEC,
- cpu, count, &rsd);
+ cpu_map_idx, count, &rsd);
else if (perf_stat_evsel__is(counter, TOPDOWN_FE_BOUND))
update_runtime_stat(st, STAT_TOPDOWN_FE_BOUND,
- cpu, count, &rsd);
+ cpu_map_idx, count, &rsd);
else if (perf_stat_evsel__is(counter, TOPDOWN_BE_BOUND))
update_runtime_stat(st, STAT_TOPDOWN_BE_BOUND,
- cpu, count, &rsd);
+ cpu_map_idx, count, &rsd);
else if (perf_stat_evsel__is(counter, TOPDOWN_HEAVY_OPS))
update_runtime_stat(st, STAT_TOPDOWN_HEAVY_OPS,
- cpu, count, &rsd);
+ cpu_map_idx, count, &rsd);
else if (perf_stat_evsel__is(counter, TOPDOWN_BR_MISPREDICT))
update_runtime_stat(st, STAT_TOPDOWN_BR_MISPREDICT,
- cpu, count, &rsd);
+ cpu_map_idx, count, &rsd);
else if (perf_stat_evsel__is(counter, TOPDOWN_FETCH_LAT))
update_runtime_stat(st, STAT_TOPDOWN_FETCH_LAT,
- cpu, count, &rsd);
+ cpu_map_idx, count, &rsd);
else if (perf_stat_evsel__is(counter, TOPDOWN_MEM_BOUND))
update_runtime_stat(st, STAT_TOPDOWN_MEM_BOUND,
- cpu, count, &rsd);
+ cpu_map_idx, count, &rsd);
else if (evsel__match(counter, HARDWARE, HW_STALLED_CYCLES_FRONTEND))
update_runtime_stat(st, STAT_STALLED_CYCLES_FRONT,
- cpu, count, &rsd);
+ cpu_map_idx, count, &rsd);
else if (evsel__match(counter, HARDWARE, HW_STALLED_CYCLES_BACKEND))
update_runtime_stat(st, STAT_STALLED_CYCLES_BACK,
- cpu, count, &rsd);
+ cpu_map_idx, count, &rsd);
else if (evsel__match(counter, HARDWARE, HW_BRANCH_INSTRUCTIONS))
- update_runtime_stat(st, STAT_BRANCHES, cpu, count, &rsd);
+ update_runtime_stat(st, STAT_BRANCHES, cpu_map_idx, count, &rsd);
else if (evsel__match(counter, HARDWARE, HW_CACHE_REFERENCES))
- update_runtime_stat(st, STAT_CACHEREFS, cpu, count, &rsd);
+ update_runtime_stat(st, STAT_CACHEREFS, cpu_map_idx, count, &rsd);
else if (evsel__match(counter, HW_CACHE, HW_CACHE_L1D))
- update_runtime_stat(st, STAT_L1_DCACHE, cpu, count, &rsd);
+ update_runtime_stat(st, STAT_L1_DCACHE, cpu_map_idx, count, &rsd);
else if (evsel__match(counter, HW_CACHE, HW_CACHE_L1I))
- update_runtime_stat(st, STAT_L1_ICACHE, cpu, count, &rsd);
+ update_runtime_stat(st, STAT_L1_ICACHE, cpu_map_idx, count, &rsd);
else if (evsel__match(counter, HW_CACHE, HW_CACHE_LL))
- update_runtime_stat(st, STAT_LL_CACHE, cpu, count, &rsd);
+ update_runtime_stat(st, STAT_LL_CACHE, cpu_map_idx, count, &rsd);
else if (evsel__match(counter, HW_CACHE, HW_CACHE_DTLB))
- update_runtime_stat(st, STAT_DTLB_CACHE, cpu, count, &rsd);
+ update_runtime_stat(st, STAT_DTLB_CACHE, cpu_map_idx, count, &rsd);
else if (evsel__match(counter, HW_CACHE, HW_CACHE_ITLB))
- update_runtime_stat(st, STAT_ITLB_CACHE, cpu, count, &rsd);
+ update_runtime_stat(st, STAT_ITLB_CACHE, cpu_map_idx, count, &rsd);
else if (perf_stat_evsel__is(counter, SMI_NUM))
- update_runtime_stat(st, STAT_SMI_NUM, cpu, count, &rsd);
+ update_runtime_stat(st, STAT_SMI_NUM, cpu_map_idx, count, &rsd);
else if (perf_stat_evsel__is(counter, APERF))
- update_runtime_stat(st, STAT_APERF, cpu, count, &rsd);
+ update_runtime_stat(st, STAT_APERF, cpu_map_idx, count, &rsd);
if (counter->collect_stat) {
- v = saved_value_lookup(counter, cpu, true, STAT_NONE, 0, st,
+ v = saved_value_lookup(counter, cpu_map_idx, true, STAT_NONE, 0, st,
rsd.cgrp);
update_stats(&v->stats, count);
if (counter->metric_leader)
v->metric_total += count;
} else if (counter->metric_leader) {
v = saved_value_lookup(counter->metric_leader,
- cpu, true, STAT_NONE, 0, st, rsd.cgrp);
+ cpu_map_idx, true, STAT_NONE, 0, st, rsd.cgrp);
v->metric_total += count;
v->metric_other++;
}
@@ -464,12 +466,12 @@ void perf_stat__collect_metric_expr(struct evlist *evsel_list)
}
static double runtime_stat_avg(struct runtime_stat *st,
- enum stat_type type, int cpu,
+ enum stat_type type, int cpu_map_idx,
struct runtime_stat_data *rsd)
{
struct saved_value *v;
- v = saved_value_lookup(NULL, cpu, false, type, rsd->ctx, st, rsd->cgrp);
+ v = saved_value_lookup(NULL, cpu_map_idx, false, type, rsd->ctx, st, rsd->cgrp);
if (!v)
return 0.0;
@@ -477,12 +479,12 @@ static double runtime_stat_avg(struct runtime_stat *st,
}
static double runtime_stat_n(struct runtime_stat *st,
- enum stat_type type, int cpu,
+ enum stat_type type, int cpu_map_idx,
struct runtime_stat_data *rsd)
{
struct saved_value *v;
- v = saved_value_lookup(NULL, cpu, false, type, rsd->ctx, st, rsd->cgrp);
+ v = saved_value_lookup(NULL, cpu_map_idx, false, type, rsd->ctx, st, rsd->cgrp);
if (!v)
return 0.0;
@@ -490,7 +492,7 @@ static double runtime_stat_n(struct runtime_stat *st,
}
static void print_stalled_cycles_frontend(struct perf_stat_config *config,
- int cpu, double avg,
+ int cpu_map_idx, double avg,
struct perf_stat_output_ctx *out,
struct runtime_stat *st,
struct runtime_stat_data *rsd)
@@ -498,7 +500,7 @@ static void print_stalled_cycles_frontend(struct perf_stat_config *config,
double total, ratio = 0.0;
const char *color;
- total = runtime_stat_avg(st, STAT_CYCLES, cpu, rsd);
+ total = runtime_stat_avg(st, STAT_CYCLES, cpu_map_idx, rsd);
if (total)
ratio = avg / total * 100.0;
@@ -513,7 +515,7 @@ static void print_stalled_cycles_frontend(struct perf_stat_config *config,
}
static void print_stalled_cycles_backend(struct perf_stat_config *config,
- int cpu, double avg,
+ int cpu_map_idx, double avg,
struct perf_stat_output_ctx *out,
struct runtime_stat *st,
struct runtime_stat_data *rsd)
@@ -521,7 +523,7 @@ static void print_stalled_cycles_backend(struct perf_stat_config *config,
double total, ratio = 0.0;
const char *color;
- total = runtime_stat_avg(st, STAT_CYCLES, cpu, rsd);
+ total = runtime_stat_avg(st, STAT_CYCLES, cpu_map_idx, rsd);
if (total)
ratio = avg / total * 100.0;
@@ -532,7 +534,7 @@ static void print_stalled_cycles_backend(struct perf_stat_config *config,
}
static void print_branch_misses(struct perf_stat_config *config,
- int cpu, double avg,
+ int cpu_map_idx, double avg,
struct perf_stat_output_ctx *out,
struct runtime_stat *st,
struct runtime_stat_data *rsd)
@@ -540,7 +542,7 @@ static void print_branch_misses(struct perf_stat_config *config,
double total, ratio = 0.0;
const char *color;
- total = runtime_stat_avg(st, STAT_BRANCHES, cpu, rsd);
+ total = runtime_stat_avg(st, STAT_BRANCHES, cpu_map_idx, rsd);
if (total)
ratio = avg / total * 100.0;
@@ -551,7 +553,7 @@ static void print_branch_misses(struct perf_stat_config *config,
}
static void print_l1_dcache_misses(struct perf_stat_config *config,
- int cpu, double avg,
+ int cpu_map_idx, double avg,
struct perf_stat_output_ctx *out,
struct runtime_stat *st,
struct runtime_stat_data *rsd)
@@ -559,7 +561,7 @@ static void print_l1_dcache_misses(struct perf_stat_config *config,
double total, ratio = 0.0;
const char *color;
- total = runtime_stat_avg(st, STAT_L1_DCACHE, cpu, rsd);
+ total = runtime_stat_avg(st, STAT_L1_DCACHE, cpu_map_idx, rsd);
if (total)
ratio = avg / total * 100.0;
@@ -570,7 +572,7 @@ static void print_l1_dcache_misses(struct perf_stat_config *config,
}
static void print_l1_icache_misses(struct perf_stat_config *config,
- int cpu, double avg,
+ int cpu_map_idx, double avg,
struct perf_stat_output_ctx *out,
struct runtime_stat *st,
struct runtime_stat_data *rsd)
@@ -578,7 +580,7 @@ static void print_l1_icache_misses(struct perf_stat_config *config,
double total, ratio = 0.0;
const char *color;
- total = runtime_stat_avg(st, STAT_L1_ICACHE, cpu, rsd);
+ total = runtime_stat_avg(st, STAT_L1_ICACHE, cpu_map_idx, rsd);
if (total)
ratio = avg / total * 100.0;
@@ -588,7 +590,7 @@ static void print_l1_icache_misses(struct perf_stat_config *config,
}
static void print_dtlb_cache_misses(struct perf_stat_config *config,
- int cpu, double avg,
+ int cpu_map_idx, double avg,
struct perf_stat_output_ctx *out,
struct runtime_stat *st,
struct runtime_stat_data *rsd)
@@ -596,7 +598,7 @@ static void print_dtlb_cache_misses(struct perf_stat_config *config,
double total, ratio = 0.0;
const char *color;
- total = runtime_stat_avg(st, STAT_DTLB_CACHE, cpu, rsd);
+ total = runtime_stat_avg(st, STAT_DTLB_CACHE, cpu_map_idx, rsd);
if (total)
ratio = avg / total * 100.0;
@@ -606,7 +608,7 @@ static void print_dtlb_cache_misses(struct perf_stat_config *config,
}
static void print_itlb_cache_misses(struct perf_stat_config *config,
- int cpu, double avg,
+ int cpu_map_idx, double avg,
struct perf_stat_output_ctx *out,
struct runtime_stat *st,
struct runtime_stat_data *rsd)
@@ -614,7 +616,7 @@ static void print_itlb_cache_misses(struct perf_stat_config *config,
double total, ratio = 0.0;
const char *color;
- total = runtime_stat_avg(st, STAT_ITLB_CACHE, cpu, rsd);
+ total = runtime_stat_avg(st, STAT_ITLB_CACHE, cpu_map_idx, rsd);
if (total)
ratio = avg / total * 100.0;
@@ -624,7 +626,7 @@ static void print_itlb_cache_misses(struct perf_stat_config *config,
}
static void print_ll_cache_misses(struct perf_stat_config *config,
- int cpu, double avg,
+ int cpu_map_idx, double avg,
struct perf_stat_output_ctx *out,
struct runtime_stat *st,
struct runtime_stat_data *rsd)
@@ -632,7 +634,7 @@ static void print_ll_cache_misses(struct perf_stat_config *config,
double total, ratio = 0.0;
const char *color;
- total = runtime_stat_avg(st, STAT_LL_CACHE, cpu, rsd);
+ total = runtime_stat_avg(st, STAT_LL_CACHE, cpu_map_idx, rsd);
if (total)
ratio = avg / total * 100.0;
@@ -690,61 +692,61 @@ static double sanitize_val(double x)
return x;
}
-static double td_total_slots(int cpu, struct runtime_stat *st,
+static double td_total_slots(int cpu_map_idx, struct runtime_stat *st,
struct runtime_stat_data *rsd)
{
- return runtime_stat_avg(st, STAT_TOPDOWN_TOTAL_SLOTS, cpu, rsd);
+ return runtime_stat_avg(st, STAT_TOPDOWN_TOTAL_SLOTS, cpu_map_idx, rsd);
}
-static double td_bad_spec(int cpu, struct runtime_stat *st,
+static double td_bad_spec(int cpu_map_idx, struct runtime_stat *st,
struct runtime_stat_data *rsd)
{
double bad_spec = 0;
double total_slots;
double total;
- total = runtime_stat_avg(st, STAT_TOPDOWN_SLOTS_ISSUED, cpu, rsd) -
- runtime_stat_avg(st, STAT_TOPDOWN_SLOTS_RETIRED, cpu, rsd) +
- runtime_stat_avg(st, STAT_TOPDOWN_RECOVERY_BUBBLES, cpu, rsd);
+ total = runtime_stat_avg(st, STAT_TOPDOWN_SLOTS_ISSUED, cpu_map_idx, rsd) -
+ runtime_stat_avg(st, STAT_TOPDOWN_SLOTS_RETIRED, cpu_map_idx, rsd) +
+ runtime_stat_avg(st, STAT_TOPDOWN_RECOVERY_BUBBLES, cpu_map_idx, rsd);
- total_slots = td_total_slots(cpu, st, rsd);
+ total_slots = td_total_slots(cpu_map_idx, st, rsd);
if (total_slots)
bad_spec = total / total_slots;
return sanitize_val(bad_spec);
}
-static double td_retiring(int cpu, struct runtime_stat *st,
+static double td_retiring(int cpu_map_idx, struct runtime_stat *st,
struct runtime_stat_data *rsd)
{
double retiring = 0;
- double total_slots = td_total_slots(cpu, st, rsd);
+ double total_slots = td_total_slots(cpu_map_idx, st, rsd);
double ret_slots = runtime_stat_avg(st, STAT_TOPDOWN_SLOTS_RETIRED,
- cpu, rsd);
+ cpu_map_idx, rsd);
if (total_slots)
retiring = ret_slots / total_slots;
return retiring;
}
-static double td_fe_bound(int cpu, struct runtime_stat *st,
+static double td_fe_bound(int cpu_map_idx, struct runtime_stat *st,
struct runtime_stat_data *rsd)
{
double fe_bound = 0;
- double total_slots = td_total_slots(cpu, st, rsd);
+ double total_slots = td_total_slots(cpu_map_idx, st, rsd);
double fetch_bub = runtime_stat_avg(st, STAT_TOPDOWN_FETCH_BUBBLES,
- cpu, rsd);
+ cpu_map_idx, rsd);
if (total_slots)
fe_bound = fetch_bub / total_slots;
return fe_bound;
}
-static double td_be_bound(int cpu, struct runtime_stat *st,
+static double td_be_bound(int cpu_map_idx, struct runtime_stat *st,
struct runtime_stat_data *rsd)
{
- double sum = (td_fe_bound(cpu, st, rsd) +
- td_bad_spec(cpu, st, rsd) +
- td_retiring(cpu, st, rsd));
+ double sum = (td_fe_bound(cpu_map_idx, st, rsd) +
+ td_bad_spec(cpu_map_idx, st, rsd) +
+ td_retiring(cpu_map_idx, st, rsd));
if (sum == 0)
return 0;
return sanitize_val(1.0 - sum);
@@ -755,15 +757,15 @@ static double td_be_bound(int cpu, struct runtime_stat *st,
* the ratios we need to recreate the sum.
*/
-static double td_metric_ratio(int cpu, enum stat_type type,
+static double td_metric_ratio(int cpu_map_idx, enum stat_type type,
struct runtime_stat *stat,
struct runtime_stat_data *rsd)
{
- double sum = runtime_stat_avg(stat, STAT_TOPDOWN_RETIRING, cpu, rsd) +
- runtime_stat_avg(stat, STAT_TOPDOWN_FE_BOUND, cpu, rsd) +
- runtime_stat_avg(stat, STAT_TOPDOWN_BE_BOUND, cpu, rsd) +
- runtime_stat_avg(stat, STAT_TOPDOWN_BAD_SPEC, cpu, rsd);
- double d = runtime_stat_avg(stat, type, cpu, rsd);
+ double sum = runtime_stat_avg(stat, STAT_TOPDOWN_RETIRING, cpu_map_idx, rsd) +
+ runtime_stat_avg(stat, STAT_TOPDOWN_FE_BOUND, cpu_map_idx, rsd) +
+ runtime_stat_avg(stat, STAT_TOPDOWN_BE_BOUND, cpu_map_idx, rsd) +
+ runtime_stat_avg(stat, STAT_TOPDOWN_BAD_SPEC, cpu_map_idx, rsd);
+ double d = runtime_stat_avg(stat, type, cpu_map_idx, rsd);
if (sum)
return d / sum;
@@ -775,23 +777,23 @@ static double td_metric_ratio(int cpu, enum stat_type type,
* We allow two missing.
*/
-static bool full_td(int cpu, struct runtime_stat *stat,
+static bool full_td(int cpu_map_idx, struct runtime_stat *stat,
struct runtime_stat_data *rsd)
{
int c = 0;
- if (runtime_stat_avg(stat, STAT_TOPDOWN_RETIRING, cpu, rsd) > 0)
+ if (runtime_stat_avg(stat, STAT_TOPDOWN_RETIRING, cpu_map_idx, rsd) > 0)
c++;
- if (runtime_stat_avg(stat, STAT_TOPDOWN_BE_BOUND, cpu, rsd) > 0)
+ if (runtime_stat_avg(stat, STAT_TOPDOWN_BE_BOUND, cpu_map_idx, rsd) > 0)
c++;
- if (runtime_stat_avg(stat, STAT_TOPDOWN_FE_BOUND, cpu, rsd) > 0)
+ if (runtime_stat_avg(stat, STAT_TOPDOWN_FE_BOUND, cpu_map_idx, rsd) > 0)
c++;
- if (runtime_stat_avg(stat, STAT_TOPDOWN_BAD_SPEC, cpu, rsd) > 0)
+ if (runtime_stat_avg(stat, STAT_TOPDOWN_BAD_SPEC, cpu_map_idx, rsd) > 0)
c++;
return c >= 2;
}
-static void print_smi_cost(struct perf_stat_config *config, int cpu,
+static void print_smi_cost(struct perf_stat_config *config, int cpu_map_idx,
struct perf_stat_output_ctx *out,
struct runtime_stat *st,
struct runtime_stat_data *rsd)
@@ -799,9 +801,9 @@ static void print_smi_cost(struct perf_stat_config *config, int cpu,
double smi_num, aperf, cycles, cost = 0.0;
const char *color = NULL;
- smi_num = runtime_stat_avg(st, STAT_SMI_NUM, cpu, rsd);
- aperf = runtime_stat_avg(st, STAT_APERF, cpu, rsd);
- cycles = runtime_stat_avg(st, STAT_CYCLES, cpu, rsd);
+ smi_num = runtime_stat_avg(st, STAT_SMI_NUM, cpu_map_idx, rsd);
+ aperf = runtime_stat_avg(st, STAT_APERF, cpu_map_idx, rsd);
+ cycles = runtime_stat_avg(st, STAT_CYCLES, cpu_map_idx, rsd);
if ((cycles == 0) || (aperf == 0))
return;
@@ -818,7 +820,7 @@ static void print_smi_cost(struct perf_stat_config *config, int cpu,
static int prepare_metric(struct evsel **metric_events,
struct metric_ref *metric_refs,
struct expr_parse_ctx *pctx,
- int cpu,
+ int cpu_map_idx,
struct runtime_stat *st)
{
double scale;
@@ -831,12 +833,33 @@ static int prepare_metric(struct evsel **metric_events,
u64 metric_total = 0;
int source_count;
- if (!strcmp(metric_events[i]->name, "duration_time")) {
- stats = &walltime_nsecs_stats;
- scale = 1e-9;
+ if (evsel__is_tool(metric_events[i])) {
source_count = 1;
+ switch (metric_events[i]->tool_event) {
+ case PERF_TOOL_DURATION_TIME:
+ stats = &walltime_nsecs_stats;
+ scale = 1e-9;
+ break;
+ case PERF_TOOL_USER_TIME:
+ stats = &ru_stats.ru_utime_usec_stat;
+ scale = 1e-6;
+ break;
+ case PERF_TOOL_SYSTEM_TIME:
+ stats = &ru_stats.ru_stime_usec_stat;
+ scale = 1e-6;
+ break;
+ case PERF_TOOL_NONE:
+ pr_err("Invalid tool event 'none'");
+ abort();
+ case PERF_TOOL_MAX:
+ pr_err("Invalid tool event 'max'");
+ abort();
+ default:
+ pr_err("Unknown tool event '%s'", evsel__name(metric_events[i]));
+ abort();
+ }
} else {
- v = saved_value_lookup(metric_events[i], cpu, false,
+ v = saved_value_lookup(metric_events[i], cpu_map_idx, false,
STAT_NONE, 0, st,
metric_events[i]->cgrp);
if (!v)
@@ -874,7 +897,7 @@ static void generic_metric(struct perf_stat_config *config,
const char *metric_name,
const char *metric_unit,
int runtime,
- int cpu,
+ int cpu_map_idx,
struct perf_stat_output_ctx *out,
struct runtime_stat *st)
{
@@ -889,7 +912,7 @@ static void generic_metric(struct perf_stat_config *config,
return;
pctx->runtime = runtime;
- i = prepare_metric(metric_events, metric_refs, pctx, cpu, st);
+ i = prepare_metric(metric_events, metric_refs, pctx, cpu_map_idx, st);
if (i < 0) {
expr__ctx_free(pctx);
return;
@@ -934,7 +957,7 @@ static void generic_metric(struct perf_stat_config *config,
expr__ctx_free(pctx);
}
-double test_generic_metric(struct metric_expr *mexp, int cpu, struct runtime_stat *st)
+double test_generic_metric(struct metric_expr *mexp, int cpu_map_idx, struct runtime_stat *st)
{
struct expr_parse_ctx *pctx;
double ratio = 0.0;
@@ -943,7 +966,7 @@ double test_generic_metric(struct metric_expr *mexp, int cpu, struct runtime_sta
if (!pctx)
return NAN;
- if (prepare_metric(mexp->metric_events, mexp->metric_refs, pctx, cpu, st) < 0)
+ if (prepare_metric(mexp->metric_events, mexp->metric_refs, pctx, cpu_map_idx, st) < 0)
goto out;
if (expr__parse(&ratio, pctx, mexp->metric_expr))
@@ -956,7 +979,7 @@ out:
void perf_stat__print_shadow_stats(struct perf_stat_config *config,
struct evsel *evsel,
- double avg, int cpu,
+ double avg, int cpu_map_idx,
struct perf_stat_output_ctx *out,
struct rblist *metric_events,
struct runtime_stat *st)
@@ -975,7 +998,7 @@ void perf_stat__print_shadow_stats(struct perf_stat_config *config,
if (config->iostat_run) {
iostat_print_metric(config, evsel, out);
} else if (evsel__match(evsel, HARDWARE, HW_INSTRUCTIONS)) {
- total = runtime_stat_avg(st, STAT_CYCLES, cpu, &rsd);
+ total = runtime_stat_avg(st, STAT_CYCLES, cpu_map_idx, &rsd);
if (total) {
ratio = avg / total;
@@ -985,11 +1008,11 @@ void perf_stat__print_shadow_stats(struct perf_stat_config *config,
print_metric(config, ctxp, NULL, NULL, "insn per cycle", 0);
}
- total = runtime_stat_avg(st, STAT_STALLED_CYCLES_FRONT, cpu, &rsd);
+ total = runtime_stat_avg(st, STAT_STALLED_CYCLES_FRONT, cpu_map_idx, &rsd);
total = max(total, runtime_stat_avg(st,
STAT_STALLED_CYCLES_BACK,
- cpu, &rsd));
+ cpu_map_idx, &rsd));
if (total && avg) {
out->new_line(config, ctxp);
@@ -999,8 +1022,8 @@ void perf_stat__print_shadow_stats(struct perf_stat_config *config,
ratio);
}
} else if (evsel__match(evsel, HARDWARE, HW_BRANCH_MISSES)) {
- if (runtime_stat_n(st, STAT_BRANCHES, cpu, &rsd) != 0)
- print_branch_misses(config, cpu, avg, out, st, &rsd);
+ if (runtime_stat_n(st, STAT_BRANCHES, cpu_map_idx, &rsd) != 0)
+ print_branch_misses(config, cpu_map_idx, avg, out, st, &rsd);
else
print_metric(config, ctxp, NULL, NULL, "of all branches", 0);
} else if (
@@ -1009,8 +1032,8 @@ void perf_stat__print_shadow_stats(struct perf_stat_config *config,
((PERF_COUNT_HW_CACHE_OP_READ) << 8) |
((PERF_COUNT_HW_CACHE_RESULT_MISS) << 16))) {
- if (runtime_stat_n(st, STAT_L1_DCACHE, cpu, &rsd) != 0)
- print_l1_dcache_misses(config, cpu, avg, out, st, &rsd);
+ if (runtime_stat_n(st, STAT_L1_DCACHE, cpu_map_idx, &rsd) != 0)
+ print_l1_dcache_misses(config, cpu_map_idx, avg, out, st, &rsd);
else
print_metric(config, ctxp, NULL, NULL, "of all L1-dcache accesses", 0);
} else if (
@@ -1019,8 +1042,8 @@ void perf_stat__print_shadow_stats(struct perf_stat_config *config,
((PERF_COUNT_HW_CACHE_OP_READ) << 8) |
((PERF_COUNT_HW_CACHE_RESULT_MISS) << 16))) {
- if (runtime_stat_n(st, STAT_L1_ICACHE, cpu, &rsd) != 0)
- print_l1_icache_misses(config, cpu, avg, out, st, &rsd);
+ if (runtime_stat_n(st, STAT_L1_ICACHE, cpu_map_idx, &rsd) != 0)
+ print_l1_icache_misses(config, cpu_map_idx, avg, out, st, &rsd);
else
print_metric(config, ctxp, NULL, NULL, "of all L1-icache accesses", 0);
} else if (
@@ -1029,8 +1052,8 @@ void perf_stat__print_shadow_stats(struct perf_stat_config *config,
((PERF_COUNT_HW_CACHE_OP_READ) << 8) |
((PERF_COUNT_HW_CACHE_RESULT_MISS) << 16))) {
- if (runtime_stat_n(st, STAT_DTLB_CACHE, cpu, &rsd) != 0)
- print_dtlb_cache_misses(config, cpu, avg, out, st, &rsd);
+ if (runtime_stat_n(st, STAT_DTLB_CACHE, cpu_map_idx, &rsd) != 0)
+ print_dtlb_cache_misses(config, cpu_map_idx, avg, out, st, &rsd);
else
print_metric(config, ctxp, NULL, NULL, "of all dTLB cache accesses", 0);
} else if (
@@ -1039,8 +1062,8 @@ void perf_stat__print_shadow_stats(struct perf_stat_config *config,
((PERF_COUNT_HW_CACHE_OP_READ) << 8) |
((PERF_COUNT_HW_CACHE_RESULT_MISS) << 16))) {
- if (runtime_stat_n(st, STAT_ITLB_CACHE, cpu, &rsd) != 0)
- print_itlb_cache_misses(config, cpu, avg, out, st, &rsd);
+ if (runtime_stat_n(st, STAT_ITLB_CACHE, cpu_map_idx, &rsd) != 0)
+ print_itlb_cache_misses(config, cpu_map_idx, avg, out, st, &rsd);
else
print_metric(config, ctxp, NULL, NULL, "of all iTLB cache accesses", 0);
} else if (
@@ -1049,27 +1072,27 @@ void perf_stat__print_shadow_stats(struct perf_stat_config *config,
((PERF_COUNT_HW_CACHE_OP_READ) << 8) |
((PERF_COUNT_HW_CACHE_RESULT_MISS) << 16))) {
- if (runtime_stat_n(st, STAT_LL_CACHE, cpu, &rsd) != 0)
- print_ll_cache_misses(config, cpu, avg, out, st, &rsd);
+ if (runtime_stat_n(st, STAT_LL_CACHE, cpu_map_idx, &rsd) != 0)
+ print_ll_cache_misses(config, cpu_map_idx, avg, out, st, &rsd);
else
print_metric(config, ctxp, NULL, NULL, "of all LL-cache accesses", 0);
} else if (evsel__match(evsel, HARDWARE, HW_CACHE_MISSES)) {
- total = runtime_stat_avg(st, STAT_CACHEREFS, cpu, &rsd);
+ total = runtime_stat_avg(st, STAT_CACHEREFS, cpu_map_idx, &rsd);
if (total)
ratio = avg * 100 / total;
- if (runtime_stat_n(st, STAT_CACHEREFS, cpu, &rsd) != 0)
+ if (runtime_stat_n(st, STAT_CACHEREFS, cpu_map_idx, &rsd) != 0)
print_metric(config, ctxp, NULL, "%8.3f %%",
"of all cache refs", ratio);
else
print_metric(config, ctxp, NULL, NULL, "of all cache refs", 0);
} else if (evsel__match(evsel, HARDWARE, HW_STALLED_CYCLES_FRONTEND)) {
- print_stalled_cycles_frontend(config, cpu, avg, out, st, &rsd);
+ print_stalled_cycles_frontend(config, cpu_map_idx, avg, out, st, &rsd);
} else if (evsel__match(evsel, HARDWARE, HW_STALLED_CYCLES_BACKEND)) {
- print_stalled_cycles_backend(config, cpu, avg, out, st, &rsd);
+ print_stalled_cycles_backend(config, cpu_map_idx, avg, out, st, &rsd);
} else if (evsel__match(evsel, HARDWARE, HW_CPU_CYCLES)) {
- total = runtime_stat_avg(st, STAT_NSECS, cpu, &rsd);
+ total = runtime_stat_avg(st, STAT_NSECS, cpu_map_idx, &rsd);
if (total) {
ratio = avg / total;
@@ -1078,7 +1101,7 @@ void perf_stat__print_shadow_stats(struct perf_stat_config *config,
print_metric(config, ctxp, NULL, NULL, "Ghz", 0);
}
} else if (perf_stat_evsel__is(evsel, CYCLES_IN_TX)) {
- total = runtime_stat_avg(st, STAT_CYCLES, cpu, &rsd);
+ total = runtime_stat_avg(st, STAT_CYCLES, cpu_map_idx, &rsd);
if (total)
print_metric(config, ctxp, NULL,
@@ -1088,8 +1111,8 @@ void perf_stat__print_shadow_stats(struct perf_stat_config *config,
print_metric(config, ctxp, NULL, NULL, "transactional cycles",
0);
} else if (perf_stat_evsel__is(evsel, CYCLES_IN_TX_CP)) {
- total = runtime_stat_avg(st, STAT_CYCLES, cpu, &rsd);
- total2 = runtime_stat_avg(st, STAT_CYCLES_IN_TX, cpu, &rsd);
+ total = runtime_stat_avg(st, STAT_CYCLES, cpu_map_idx, &rsd);
+ total2 = runtime_stat_avg(st, STAT_CYCLES_IN_TX, cpu_map_idx, &rsd);
if (total2 < avg)
total2 = avg;
@@ -1099,19 +1122,19 @@ void perf_stat__print_shadow_stats(struct perf_stat_config *config,
else
print_metric(config, ctxp, NULL, NULL, "aborted cycles", 0);
} else if (perf_stat_evsel__is(evsel, TRANSACTION_START)) {
- total = runtime_stat_avg(st, STAT_CYCLES_IN_TX, cpu, &rsd);
+ total = runtime_stat_avg(st, STAT_CYCLES_IN_TX, cpu_map_idx, &rsd);
if (avg)
ratio = total / avg;
- if (runtime_stat_n(st, STAT_CYCLES_IN_TX, cpu, &rsd) != 0)
+ if (runtime_stat_n(st, STAT_CYCLES_IN_TX, cpu_map_idx, &rsd) != 0)
print_metric(config, ctxp, NULL, "%8.0f",
"cycles / transaction", ratio);
else
print_metric(config, ctxp, NULL, NULL, "cycles / transaction",
0);
} else if (perf_stat_evsel__is(evsel, ELISION_START)) {
- total = runtime_stat_avg(st, STAT_CYCLES_IN_TX, cpu, &rsd);
+ total = runtime_stat_avg(st, STAT_CYCLES_IN_TX, cpu_map_idx, &rsd);
if (avg)
ratio = total / avg;
@@ -1124,28 +1147,28 @@ void perf_stat__print_shadow_stats(struct perf_stat_config *config,
else
print_metric(config, ctxp, NULL, NULL, "CPUs utilized", 0);
} else if (perf_stat_evsel__is(evsel, TOPDOWN_FETCH_BUBBLES)) {
- double fe_bound = td_fe_bound(cpu, st, &rsd);
+ double fe_bound = td_fe_bound(cpu_map_idx, st, &rsd);
if (fe_bound > 0.2)
color = PERF_COLOR_RED;
print_metric(config, ctxp, color, "%8.1f%%", "frontend bound",
fe_bound * 100.);
} else if (perf_stat_evsel__is(evsel, TOPDOWN_SLOTS_RETIRED)) {
- double retiring = td_retiring(cpu, st, &rsd);
+ double retiring = td_retiring(cpu_map_idx, st, &rsd);
if (retiring > 0.7)
color = PERF_COLOR_GREEN;
print_metric(config, ctxp, color, "%8.1f%%", "retiring",
retiring * 100.);
} else if (perf_stat_evsel__is(evsel, TOPDOWN_RECOVERY_BUBBLES)) {
- double bad_spec = td_bad_spec(cpu, st, &rsd);
+ double bad_spec = td_bad_spec(cpu_map_idx, st, &rsd);
if (bad_spec > 0.1)
color = PERF_COLOR_RED;
print_metric(config, ctxp, color, "%8.1f%%", "bad speculation",
bad_spec * 100.);
} else if (perf_stat_evsel__is(evsel, TOPDOWN_SLOTS_ISSUED)) {
- double be_bound = td_be_bound(cpu, st, &rsd);
+ double be_bound = td_be_bound(cpu_map_idx, st, &rsd);
const char *name = "backend bound";
static int have_recovery_bubbles = -1;
@@ -1158,14 +1181,14 @@ void perf_stat__print_shadow_stats(struct perf_stat_config *config,
if (be_bound > 0.2)
color = PERF_COLOR_RED;
- if (td_total_slots(cpu, st, &rsd) > 0)
+ if (td_total_slots(cpu_map_idx, st, &rsd) > 0)
print_metric(config, ctxp, color, "%8.1f%%", name,
be_bound * 100.);
else
print_metric(config, ctxp, NULL, NULL, name, 0);
} else if (perf_stat_evsel__is(evsel, TOPDOWN_RETIRING) &&
- full_td(cpu, st, &rsd)) {
- double retiring = td_metric_ratio(cpu,
+ full_td(cpu_map_idx, st, &rsd)) {
+ double retiring = td_metric_ratio(cpu_map_idx,
STAT_TOPDOWN_RETIRING, st,
&rsd);
if (retiring > 0.7)
@@ -1173,8 +1196,8 @@ void perf_stat__print_shadow_stats(struct perf_stat_config *config,
print_metric(config, ctxp, color, "%8.1f%%", "retiring",
retiring * 100.);
} else if (perf_stat_evsel__is(evsel, TOPDOWN_FE_BOUND) &&
- full_td(cpu, st, &rsd)) {
- double fe_bound = td_metric_ratio(cpu,
+ full_td(cpu_map_idx, st, &rsd)) {
+ double fe_bound = td_metric_ratio(cpu_map_idx,
STAT_TOPDOWN_FE_BOUND, st,
&rsd);
if (fe_bound > 0.2)
@@ -1182,8 +1205,8 @@ void perf_stat__print_shadow_stats(struct perf_stat_config *config,
print_metric(config, ctxp, color, "%8.1f%%", "frontend bound",
fe_bound * 100.);
} else if (perf_stat_evsel__is(evsel, TOPDOWN_BE_BOUND) &&
- full_td(cpu, st, &rsd)) {
- double be_bound = td_metric_ratio(cpu,
+ full_td(cpu_map_idx, st, &rsd)) {
+ double be_bound = td_metric_ratio(cpu_map_idx,
STAT_TOPDOWN_BE_BOUND, st,
&rsd);
if (be_bound > 0.2)
@@ -1191,8 +1214,8 @@ void perf_stat__print_shadow_stats(struct perf_stat_config *config,
print_metric(config, ctxp, color, "%8.1f%%", "backend bound",
be_bound * 100.);
} else if (perf_stat_evsel__is(evsel, TOPDOWN_BAD_SPEC) &&
- full_td(cpu, st, &rsd)) {
- double bad_spec = td_metric_ratio(cpu,
+ full_td(cpu_map_idx, st, &rsd)) {
+ double bad_spec = td_metric_ratio(cpu_map_idx,
STAT_TOPDOWN_BAD_SPEC, st,
&rsd);
if (bad_spec > 0.1)
@@ -1200,11 +1223,11 @@ void perf_stat__print_shadow_stats(struct perf_stat_config *config,
print_metric(config, ctxp, color, "%8.1f%%", "bad speculation",
bad_spec * 100.);
} else if (perf_stat_evsel__is(evsel, TOPDOWN_HEAVY_OPS) &&
- full_td(cpu, st, &rsd) && (config->topdown_level > 1)) {
- double retiring = td_metric_ratio(cpu,
+ full_td(cpu_map_idx, st, &rsd) && (config->topdown_level > 1)) {
+ double retiring = td_metric_ratio(cpu_map_idx,
STAT_TOPDOWN_RETIRING, st,
&rsd);
- double heavy_ops = td_metric_ratio(cpu,
+ double heavy_ops = td_metric_ratio(cpu_map_idx,
STAT_TOPDOWN_HEAVY_OPS, st,
&rsd);
double light_ops = retiring - heavy_ops;
@@ -1220,11 +1243,11 @@ void perf_stat__print_shadow_stats(struct perf_stat_config *config,
print_metric(config, ctxp, color, "%8.1f%%", "light operations",
light_ops * 100.);
} else if (perf_stat_evsel__is(evsel, TOPDOWN_BR_MISPREDICT) &&
- full_td(cpu, st, &rsd) && (config->topdown_level > 1)) {
- double bad_spec = td_metric_ratio(cpu,
+ full_td(cpu_map_idx, st, &rsd) && (config->topdown_level > 1)) {
+ double bad_spec = td_metric_ratio(cpu_map_idx,
STAT_TOPDOWN_BAD_SPEC, st,
&rsd);
- double br_mis = td_metric_ratio(cpu,
+ double br_mis = td_metric_ratio(cpu_map_idx,
STAT_TOPDOWN_BR_MISPREDICT, st,
&rsd);
double m_clears = bad_spec - br_mis;
@@ -1240,11 +1263,11 @@ void perf_stat__print_shadow_stats(struct perf_stat_config *config,
print_metric(config, ctxp, color, "%8.1f%%", "machine clears",
m_clears * 100.);
} else if (perf_stat_evsel__is(evsel, TOPDOWN_FETCH_LAT) &&
- full_td(cpu, st, &rsd) && (config->topdown_level > 1)) {
- double fe_bound = td_metric_ratio(cpu,
+ full_td(cpu_map_idx, st, &rsd) && (config->topdown_level > 1)) {
+ double fe_bound = td_metric_ratio(cpu_map_idx,
STAT_TOPDOWN_FE_BOUND, st,
&rsd);
- double fetch_lat = td_metric_ratio(cpu,
+ double fetch_lat = td_metric_ratio(cpu_map_idx,
STAT_TOPDOWN_FETCH_LAT, st,
&rsd);
double fetch_bw = fe_bound - fetch_lat;
@@ -1260,11 +1283,11 @@ void perf_stat__print_shadow_stats(struct perf_stat_config *config,
print_metric(config, ctxp, color, "%8.1f%%", "fetch bandwidth",
fetch_bw * 100.);
} else if (perf_stat_evsel__is(evsel, TOPDOWN_MEM_BOUND) &&
- full_td(cpu, st, &rsd) && (config->topdown_level > 1)) {
- double be_bound = td_metric_ratio(cpu,
+ full_td(cpu_map_idx, st, &rsd) && (config->topdown_level > 1)) {
+ double be_bound = td_metric_ratio(cpu_map_idx,
STAT_TOPDOWN_BE_BOUND, st,
&rsd);
- double mem_bound = td_metric_ratio(cpu,
+ double mem_bound = td_metric_ratio(cpu_map_idx,
STAT_TOPDOWN_MEM_BOUND, st,
&rsd);
double core_bound = be_bound - mem_bound;
@@ -1281,12 +1304,12 @@ void perf_stat__print_shadow_stats(struct perf_stat_config *config,
core_bound * 100.);
} else if (evsel->metric_expr) {
generic_metric(config, evsel->metric_expr, evsel->metric_events, NULL,
- evsel->name, evsel->metric_name, NULL, 1, cpu, out, st);
- } else if (runtime_stat_n(st, STAT_NSECS, cpu, &rsd) != 0) {
+ evsel->name, evsel->metric_name, NULL, 1, cpu_map_idx, out, st);
+ } else if (runtime_stat_n(st, STAT_NSECS, cpu_map_idx, &rsd) != 0) {
char unit = ' ';
char unit_buf[10] = "/sec";
- total = runtime_stat_avg(st, STAT_NSECS, cpu, &rsd);
+ total = runtime_stat_avg(st, STAT_NSECS, cpu_map_idx, &rsd);
if (total)
ratio = convert_unit_double(1000000000.0 * avg / total, &unit);
@@ -1294,7 +1317,7 @@ void perf_stat__print_shadow_stats(struct perf_stat_config *config,
snprintf(unit_buf, sizeof(unit_buf), "%c/sec", unit);
print_metric(config, ctxp, NULL, "%8.3f", unit_buf, ratio);
} else if (perf_stat_evsel__is(evsel, SMI_NUM)) {
- print_smi_cost(config, cpu, out, st, &rsd);
+ print_smi_cost(config, cpu_map_idx, out, st, &rsd);
} else {
num = 0;
}
@@ -1307,7 +1330,7 @@ void perf_stat__print_shadow_stats(struct perf_stat_config *config,
out->new_line(config, ctxp);
generic_metric(config, mexp->metric_expr, mexp->metric_events,
mexp->metric_refs, evsel->name, mexp->metric_name,
- mexp->metric_unit, mexp->runtime, cpu, out, st);
+ mexp->metric_unit, mexp->runtime, cpu_map_idx, out, st);
}
}
if (num == 0)
diff --git a/tools/perf/util/stat.c b/tools/perf/util/stat.c
index 09ea334586f2..37ea2d044708 100644
--- a/tools/perf/util/stat.c
+++ b/tools/perf/util/stat.c
@@ -1,5 +1,6 @@
// SPDX-License-Identifier: GPL-2.0
#include <errno.h>
+#include <linux/err.h>
#include <inttypes.h>
#include <math.h>
#include <string.h>
@@ -116,7 +117,9 @@ static void perf_stat_evsel_id_init(struct evsel *evsel)
/* ps->id is 0 hence PERF_STAT_EVSEL_ID__NONE by default */
for (i = 0; i < PERF_STAT_EVSEL_ID__MAX; i++) {
- if (!strcmp(evsel__name(evsel), id_str[i])) {
+ if (!strcmp(evsel__name(evsel), id_str[i]) ||
+ (strstr(evsel__name(evsel), id_str[i]) && evsel->pmu_name
+ && strstr(evsel__name(evsel), evsel->pmu_name))) {
ps->id = i;
break;
}
@@ -152,11 +155,13 @@ static void evsel__free_stat_priv(struct evsel *evsel)
zfree(&evsel->stats);
}
-static int evsel__alloc_prev_raw_counts(struct evsel *evsel, int ncpus, int nthreads)
+static int evsel__alloc_prev_raw_counts(struct evsel *evsel)
{
+ int cpu_map_nr = evsel__nr_cpus(evsel);
+ int nthreads = perf_thread_map__nr(evsel->core.threads);
struct perf_counts *counts;
- counts = perf_counts__new(ncpus, nthreads);
+ counts = perf_counts__new(cpu_map_nr, nthreads);
if (counts)
evsel->prev_raw_counts = counts;
@@ -177,12 +182,9 @@ static void evsel__reset_prev_raw_counts(struct evsel *evsel)
static int evsel__alloc_stats(struct evsel *evsel, bool alloc_raw)
{
- int ncpus = evsel__nr_cpus(evsel);
- int nthreads = perf_thread_map__nr(evsel->core.threads);
-
if (evsel__alloc_stat_priv(evsel) < 0 ||
- evsel__alloc_counts(evsel, ncpus, nthreads) < 0 ||
- (alloc_raw && evsel__alloc_prev_raw_counts(evsel, ncpus, nthreads) < 0))
+ evsel__alloc_counts(evsel) < 0 ||
+ (alloc_raw && evsel__alloc_prev_raw_counts(evsel) < 0))
return -ENOMEM;
return 0;
@@ -235,14 +237,12 @@ void evlist__reset_prev_raw_counts(struct evlist *evlist)
static void evsel__copy_prev_raw_counts(struct evsel *evsel)
{
- int ncpus = evsel__nr_cpus(evsel);
- int nthreads = perf_thread_map__nr(evsel->core.threads);
+ int idx, nthreads = perf_thread_map__nr(evsel->core.threads);
for (int thread = 0; thread < nthreads; thread++) {
- for (int cpu = 0; cpu < ncpus; cpu++) {
- *perf_counts(evsel->counts, cpu, thread) =
- *perf_counts(evsel->prev_raw_counts, cpu,
- thread);
+ perf_cpu_map__for_each_idx(idx, evsel__cpus(evsel)) {
+ *perf_counts(evsel->counts, idx, thread) =
+ *perf_counts(evsel->prev_raw_counts, idx, thread);
}
}
@@ -293,11 +293,12 @@ static bool pkg_id_equal(const void *__key1, const void *__key2,
return *key1 == *key2;
}
-static int check_per_pkg(struct evsel *counter,
- struct perf_counts_values *vals, int cpu, bool *skip)
+static int check_per_pkg(struct evsel *counter, struct perf_counts_values *vals,
+ int cpu_map_idx, bool *skip)
{
struct hashmap *mask = counter->per_pkg_mask;
struct perf_cpu_map *cpus = evsel__cpus(counter);
+ struct perf_cpu cpu = perf_cpu_map__cpu(cpus, cpu_map_idx);
int s, d, ret = 0;
uint64_t *key;
@@ -311,7 +312,7 @@ static int check_per_pkg(struct evsel *counter,
if (!mask) {
mask = hashmap__new(pkg_id_hash, pkg_id_equal, NULL);
- if (!mask)
+ if (IS_ERR(mask))
return -ENOMEM;
counter->per_pkg_mask = mask;
@@ -328,7 +329,7 @@ static int check_per_pkg(struct evsel *counter,
if (!(vals->run && vals->ena))
return 0;
- s = cpu_map__get_socket(cpus, cpu, NULL).socket;
+ s = cpu__get_socket_id(cpu);
if (s < 0)
return -1;
@@ -336,7 +337,7 @@ static int check_per_pkg(struct evsel *counter,
* On multi-die system, die_id > 0. On no-die system, die_id = 0.
* We use hashmap(socket, die) to check the used socket+die pair.
*/
- d = cpu_map__get_die(cpus, cpu, NULL).die;
+ d = cpu__get_die_id(cpu);
if (d < 0)
return -1;
@@ -345,9 +346,10 @@ static int check_per_pkg(struct evsel *counter,
return -ENOMEM;
*key = (uint64_t)d << 32 | s;
- if (hashmap__find(mask, (void *)key, NULL))
+ if (hashmap__find(mask, (void *)key, NULL)) {
*skip = true;
- else
+ free(key);
+ } else
ret = hashmap__add(mask, (void *)key, (void *)1);
return ret;
@@ -355,14 +357,14 @@ static int check_per_pkg(struct evsel *counter,
static int
process_counter_values(struct perf_stat_config *config, struct evsel *evsel,
- int cpu, int thread,
+ int cpu_map_idx, int thread,
struct perf_counts_values *count)
{
struct perf_counts_values *aggr = &evsel->counts->aggr;
static struct perf_counts_values zero;
bool skip = false;
- if (check_per_pkg(evsel, count, cpu, &skip)) {
+ if (check_per_pkg(evsel, count, cpu_map_idx, &skip)) {
pr_err("failed to read per-pkg counter\n");
return -1;
}
@@ -378,11 +380,11 @@ process_counter_values(struct perf_stat_config *config, struct evsel *evsel,
case AGGR_NODE:
case AGGR_NONE:
if (!evsel->snapshot)
- evsel__compute_deltas(evsel, cpu, thread, count);
+ evsel__compute_deltas(evsel, cpu_map_idx, thread, count);
perf_counts_values__scale(count, config->scale, NULL);
if ((config->aggr_mode == AGGR_NONE) && (!evsel->percore)) {
perf_stat__update_shadow_stats(evsel, count->val,
- cpu, &rt_stat);
+ cpu_map_idx, &rt_stat);
}
if (config->aggr_mode == AGGR_THREAD) {
@@ -411,15 +413,15 @@ static int process_counter_maps(struct perf_stat_config *config,
{
int nthreads = perf_thread_map__nr(counter->core.threads);
int ncpus = evsel__nr_cpus(counter);
- int cpu, thread;
+ int idx, thread;
if (counter->core.system_wide)
nthreads = 1;
for (thread = 0; thread < nthreads; thread++) {
- for (cpu = 0; cpu < ncpus; cpu++) {
- if (process_counter_values(config, counter, cpu, thread,
- perf_counts(counter->counts, cpu, thread)))
+ for (idx = 0; idx < ncpus; idx++) {
+ if (process_counter_values(config, counter, idx, thread,
+ perf_counts(counter->counts, idx, thread)))
return -1;
}
}
@@ -470,9 +472,10 @@ int perf_stat_process_counter(struct perf_stat_config *config,
int perf_event__process_stat_event(struct perf_session *session,
union perf_event *event)
{
- struct perf_counts_values count;
+ struct perf_counts_values count, *ptr;
struct perf_record_stat *st = &event->stat;
struct evsel *counter;
+ int cpu_map_idx;
count.val = st->val;
count.ena = st->ena;
@@ -483,8 +486,18 @@ int perf_event__process_stat_event(struct perf_session *session,
pr_err("Failed to resolve counter for stat event.\n");
return -EINVAL;
}
-
- *perf_counts(counter->counts, st->cpu, st->thread) = count;
+ cpu_map_idx = perf_cpu_map__idx(evsel__cpus(counter), (struct perf_cpu){.cpu = st->cpu});
+ if (cpu_map_idx == -1) {
+ pr_err("Invalid CPU %d for event %s.\n", st->cpu, evsel__name(counter));
+ return -EINVAL;
+ }
+ ptr = perf_counts(counter->counts, cpu_map_idx, st->thread);
+ if (ptr == NULL) {
+ pr_err("Failed to find perf count for CPU %d thread %d on event %s.\n",
+ st->cpu, st->thread, evsel__name(counter));
+ return -EINVAL;
+ }
+ *ptr = count;
counter->supported = true;
return 0;
}
@@ -531,7 +544,7 @@ size_t perf_event__fprintf_stat_config(union perf_event *event, FILE *fp)
int create_perf_stat_counter(struct evsel *evsel,
struct perf_stat_config *config,
struct target *target,
- int cpu)
+ int cpu_map_idx)
{
struct perf_event_attr *attr = &evsel->core.attr;
struct evsel *leader = evsel__leader(evsel);
@@ -585,7 +598,7 @@ int create_perf_stat_counter(struct evsel *evsel,
}
if (target__has_cpu(target) && !target__has_per_thread(target))
- return evsel__open_per_cpu(evsel, evsel__cpus(evsel), cpu);
+ return evsel__open_per_cpu(evsel, evsel__cpus(evsel), cpu_map_idx);
return evsel__open_per_thread(evsel, evsel->core.threads);
}
diff --git a/tools/perf/util/stat.h b/tools/perf/util/stat.h
index 32c8527de347..b5aeb8e6d34b 100644
--- a/tools/perf/util/stat.h
+++ b/tools/perf/util/stat.h
@@ -108,8 +108,12 @@ struct runtime_stat {
struct rblist value_list;
};
-typedef struct aggr_cpu_id (*aggr_get_id_t)(struct perf_stat_config *config,
- struct perf_cpu_map *m, int cpu);
+struct rusage_stats {
+ struct stats ru_utime_usec_stat;
+ struct stats ru_stime_usec_stat;
+};
+
+typedef struct aggr_cpu_id (*aggr_get_id_t)(struct perf_stat_config *config, struct perf_cpu cpu);
struct perf_stat_config {
enum aggr_mode aggr_mode;
@@ -123,6 +127,7 @@ struct perf_stat_config {
bool ru_display;
bool big_num;
bool no_merge;
+ bool hybrid_merge;
bool walltime_run_table;
bool all_kernel;
bool all_user;
@@ -149,6 +154,7 @@ struct perf_stat_config {
const char *csv_sep;
struct stats *walltime_nsecs_stats;
struct rusage ru_data;
+ struct rusage_stats *ru_stats;
struct cpu_aggr_map *aggr_map;
aggr_get_id_t aggr_get_id;
struct cpu_aggr_map *cpus_aggr_map;
@@ -178,6 +184,20 @@ static inline void init_stats(struct stats *stats)
stats->max = 0;
}
+static inline void init_rusage_stats(struct rusage_stats *ru_stats) {
+ init_stats(&ru_stats->ru_utime_usec_stat);
+ init_stats(&ru_stats->ru_stime_usec_stat);
+}
+
+static inline void update_rusage_stats(struct rusage_stats *ru_stats, struct rusage* rusage) {
+ const u64 us_to_ns = 1000;
+ const u64 s_to_ns = 1000000000;
+ update_stats(&ru_stats->ru_utime_usec_stat,
+ (rusage->ru_utime.tv_usec * us_to_ns + rusage->ru_utime.tv_sec * s_to_ns));
+ update_stats(&ru_stats->ru_stime_usec_stat,
+ (rusage->ru_stime.tv_usec * us_to_ns + rusage->ru_stime.tv_sec * s_to_ns));
+}
+
struct evsel;
struct evlist;
@@ -197,6 +217,7 @@ bool __perf_stat_evsel__is(struct evsel *evsel, enum perf_stat_evsel_id id);
extern struct runtime_stat rt_stat;
extern struct stats walltime_nsecs_stats;
+extern struct rusage_stats ru_stats;
typedef void (*print_metric_t)(struct perf_stat_config *config,
void *ctx, const char *color, const char *unit,
@@ -209,7 +230,7 @@ void perf_stat__init_shadow_stats(void);
void perf_stat__reset_shadow_stats(void);
void perf_stat__reset_shadow_per_stat(struct runtime_stat *st);
void perf_stat__update_shadow_stats(struct evsel *counter, u64 count,
- int cpu, struct runtime_stat *st);
+ int cpu_map_idx, struct runtime_stat *st);
struct perf_stat_output_ctx {
void *ctx;
print_metric_t print_metric;
@@ -249,10 +270,10 @@ size_t perf_event__fprintf_stat_config(union perf_event *event, FILE *fp);
int create_perf_stat_counter(struct evsel *evsel,
struct perf_stat_config *config,
struct target *target,
- int cpu);
+ int cpu_map_idx);
void evlist__print_counters(struct evlist *evlist, struct perf_stat_config *config,
struct target *_target, struct timespec *ts, int argc, const char **argv);
struct metric_expr;
-double test_generic_metric(struct metric_expr *mexp, int cpu, struct runtime_stat *st);
+double test_generic_metric(struct metric_expr *mexp, int cpu_map_idx, struct runtime_stat *st);
#endif
diff --git a/tools/perf/util/svghelper.c b/tools/perf/util/svghelper.c
index 96f941e01681..1e0c731fc539 100644
--- a/tools/perf/util/svghelper.c
+++ b/tools/perf/util/svghelper.c
@@ -728,20 +728,20 @@ static int str_to_bitmap(char *s, cpumask_t *b, int nr_cpus)
int i;
int ret = 0;
struct perf_cpu_map *m;
- int c;
+ struct perf_cpu c;
m = perf_cpu_map__new(s);
if (!m)
return -1;
- for (i = 0; i < m->nr; i++) {
- c = m->map[i];
- if (c >= nr_cpus) {
+ for (i = 0; i < perf_cpu_map__nr(m); i++) {
+ c = perf_cpu_map__cpu(m, i);
+ if (c.cpu >= nr_cpus) {
ret = -1;
break;
}
- set_bit(c, cpumask_bits(b));
+ set_bit(c.cpu, cpumask_bits(b));
}
perf_cpu_map__put(m);
diff --git a/tools/perf/util/symbol-elf.c b/tools/perf/util/symbol-elf.c
index 31cd59a2b66e..ecd377938eea 100644
--- a/tools/perf/util/symbol-elf.c
+++ b/tools/perf/util/symbol-elf.c
@@ -1290,7 +1290,7 @@ dso__load_sym_internal(struct dso *dso, struct map *map, struct symsrc *syms_ss,
* For misannotated, zeroed, ASM function sizes.
*/
if (nr > 0) {
- symbols__fixup_end(&dso->symbols);
+ symbols__fixup_end(&dso->symbols, false);
symbols__fixup_duplicate(&dso->symbols);
if (kmap) {
/*
diff --git a/tools/perf/util/symbol.c b/tools/perf/util/symbol.c
index b2ed3140a1fa..f72baf636724 100644
--- a/tools/perf/util/symbol.c
+++ b/tools/perf/util/symbol.c
@@ -101,11 +101,6 @@ static int prefix_underscores_count(const char *str)
return tail - str;
}
-void __weak arch__symbols__fixup_end(struct symbol *p, struct symbol *c)
-{
- p->end = c->start;
-}
-
const char * __weak arch__normalize_symbol_name(const char *name)
{
return name;
@@ -217,7 +212,8 @@ again:
}
}
-void symbols__fixup_end(struct rb_root_cached *symbols)
+/* Update zero-sized symbols using the address of the next symbol */
+void symbols__fixup_end(struct rb_root_cached *symbols, bool is_kallsyms)
{
struct rb_node *nd, *prevnd = rb_first_cached(symbols);
struct symbol *curr, *prev;
@@ -231,8 +227,29 @@ void symbols__fixup_end(struct rb_root_cached *symbols)
prev = curr;
curr = rb_entry(nd, struct symbol, rb_node);
- if (prev->end == prev->start && prev->end != curr->start)
- arch__symbols__fixup_end(prev, curr);
+ /*
+ * On some architecture kernel text segment start is located at
+ * some low memory address, while modules are located at high
+ * memory addresses (or vice versa). The gap between end of
+ * kernel text segment and beginning of first module's text
+ * segment is very big. Therefore do not fill this gap and do
+ * not assign it to the kernel dso map (kallsyms).
+ *
+ * In kallsyms, it determines module symbols using '[' character
+ * like in:
+ * ffffffffc1937000 T hdmi_driver_init [snd_hda_codec_hdmi]
+ */
+ if (prev->end == prev->start) {
+ /* Last kernel/module symbol mapped to end of page */
+ if (is_kallsyms && (!strchr(prev->name, '[') !=
+ !strchr(curr->name, '[')))
+ prev->end = roundup(prev->end + 4096, 4096);
+ else
+ prev->end = curr->start;
+
+ pr_debug4("%s sym:%s end:%#" PRIx64 "\n",
+ __func__, prev->name, prev->end);
+ }
}
/* Last entry */
@@ -1467,7 +1484,7 @@ int __dso__load_kallsyms(struct dso *dso, const char *filename,
if (kallsyms__delta(kmap, filename, &delta))
return -1;
- symbols__fixup_end(&dso->symbols);
+ symbols__fixup_end(&dso->symbols, true);
symbols__fixup_duplicate(&dso->symbols);
if (dso->kernel == DSO_SPACE__KERNEL_GUEST)
@@ -1659,7 +1676,7 @@ int dso__load_bfd_symbols(struct dso *dso, const char *debugfile)
#undef bfd_asymbol_section
#endif
- symbols__fixup_end(&dso->symbols);
+ symbols__fixup_end(&dso->symbols, false);
symbols__fixup_duplicate(&dso->symbols);
dso->adjust_symbols = 1;
@@ -1735,8 +1752,8 @@ static int dso__find_perf_map(char *filebuf, size_t bufsz,
nsi = *nsip;
- if (nsi->need_setns) {
- snprintf(filebuf, bufsz, "/tmp/perf-%d.map", nsi->nstgid);
+ if (nsinfo__need_setns(nsi)) {
+ snprintf(filebuf, bufsz, "/tmp/perf-%d.map", nsinfo__nstgid(nsi));
nsinfo__mountns_enter(nsi, &nsc);
rc = access(filebuf, R_OK);
nsinfo__mountns_exit(&nsc);
@@ -1748,8 +1765,8 @@ static int dso__find_perf_map(char *filebuf, size_t bufsz,
if (nnsi) {
nsinfo__put(nsi);
- nnsi->need_setns = false;
- snprintf(filebuf, bufsz, "/tmp/perf-%d.map", nnsi->tgid);
+ nsinfo__clear_need_setns(nnsi);
+ snprintf(filebuf, bufsz, "/tmp/perf-%d.map", nsinfo__tgid(nnsi));
*nsip = nnsi;
rc = 0;
}
@@ -1864,6 +1881,16 @@ int dso__load(struct dso *dso, struct map *map)
nsinfo__mountns_exit(&nsc);
is_reg = is_regular_file(name);
+ if (!is_reg && errno == ENOENT && dso->nsinfo) {
+ char *new_name = filename_with_chroot(dso->nsinfo->pid,
+ name);
+ if (new_name) {
+ is_reg = is_regular_file(new_name);
+ strlcpy(name, new_name, PATH_MAX);
+ free(new_name);
+ }
+ }
+
#ifdef HAVE_LIBBFD_SUPPORT
if (is_reg)
bfdrc = dso__load_bfd_symbols(dso, name);
diff --git a/tools/perf/util/symbol.h b/tools/perf/util/symbol.h
index fbf866d82dcc..0b893dcc8ea6 100644
--- a/tools/perf/util/symbol.h
+++ b/tools/perf/util/symbol.h
@@ -203,7 +203,7 @@ void __symbols__insert(struct rb_root_cached *symbols, struct symbol *sym,
bool kernel);
void symbols__insert(struct rb_root_cached *symbols, struct symbol *sym);
void symbols__fixup_duplicate(struct rb_root_cached *symbols);
-void symbols__fixup_end(struct rb_root_cached *symbols);
+void symbols__fixup_end(struct rb_root_cached *symbols, bool is_kallsyms);
void maps__fixup_end(struct maps *maps);
typedef int (*mapfn_t)(u64 start, u64 len, u64 pgoff, void *data);
@@ -241,7 +241,6 @@ const char *arch__normalize_symbol_name(const char *name);
#define SYMBOL_A 0
#define SYMBOL_B 1
-void arch__symbols__fixup_end(struct symbol *p, struct symbol *c);
int arch__compare_symbol_names(const char *namea, const char *nameb);
int arch__compare_symbol_names_n(const char *namea, const char *nameb,
unsigned int n);
diff --git a/tools/perf/util/symbol_conf.h b/tools/perf/util/symbol_conf.h
index a70b3ec09dac..bc3d046fbb63 100644
--- a/tools/perf/util/symbol_conf.h
+++ b/tools/perf/util/symbol_conf.h
@@ -43,7 +43,8 @@ struct symbol_conf {
report_individual_block,
inline_name,
disable_add2line_warn,
- buildid_mmap2;
+ buildid_mmap2,
+ guest_code;
const char *vmlinux_name,
*kallsyms_name,
*source_prefix,
diff --git a/tools/perf/util/synthetic-events.c b/tools/perf/util/synthetic-events.c
index 198982109f0f..27acdc5e5723 100644
--- a/tools/perf/util/synthetic-events.c
+++ b/tools/perf/util/synthetic-events.c
@@ -1186,12 +1186,12 @@ int perf_event__synthesize_thread_map2(struct perf_tool *tool,
static void synthesize_cpus(struct cpu_map_entries *cpus,
struct perf_cpu_map *map)
{
- int i;
+ int i, map_nr = perf_cpu_map__nr(map);
- cpus->nr = map->nr;
+ cpus->nr = map_nr;
- for (i = 0; i < map->nr; i++)
- cpus->cpu[i] = map->map[i];
+ for (i = 0; i < map_nr; i++)
+ cpus->cpu[i] = perf_cpu_map__cpu(map, i).cpu;
}
static void synthesize_mask(struct perf_record_record_cpu_map *mask,
@@ -1202,13 +1202,13 @@ static void synthesize_mask(struct perf_record_record_cpu_map *mask,
mask->nr = BITS_TO_LONGS(max);
mask->long_size = sizeof(long);
- for (i = 0; i < map->nr; i++)
- set_bit(map->map[i], mask->mask);
+ for (i = 0; i < perf_cpu_map__nr(map); i++)
+ set_bit(perf_cpu_map__cpu(map, i).cpu, mask->mask);
}
static size_t cpus_size(struct perf_cpu_map *map)
{
- return sizeof(struct cpu_map_entries) + map->nr * sizeof(u16);
+ return sizeof(struct cpu_map_entries) + perf_cpu_map__nr(map) * sizeof(u16);
}
static size_t mask_size(struct perf_cpu_map *map, int *max)
@@ -1217,9 +1217,9 @@ static size_t mask_size(struct perf_cpu_map *map, int *max)
*max = 0;
- for (i = 0; i < map->nr; i++) {
+ for (i = 0; i < perf_cpu_map__nr(map); i++) {
/* bit position of the cpu is + 1 */
- int bit = map->map[i] + 1;
+ int bit = perf_cpu_map__cpu(map, i).cpu + 1;
if (bit > *max)
*max = bit;
@@ -1354,7 +1354,7 @@ int perf_event__synthesize_stat_config(struct perf_tool *tool,
}
int perf_event__synthesize_stat(struct perf_tool *tool,
- u32 cpu, u32 thread, u64 id,
+ struct perf_cpu cpu, u32 thread, u64 id,
struct perf_counts_values *count,
perf_event__handler_t process,
struct machine *machine)
@@ -1366,7 +1366,7 @@ int perf_event__synthesize_stat(struct perf_tool *tool,
event.header.misc = 0;
event.id = id;
- event.cpu = cpu;
+ event.cpu = cpu.cpu;
event.thread = thread;
event.val = count->val;
event.ena = count->ena;
@@ -1763,7 +1763,7 @@ int perf_event__synthesize_id_index(struct perf_tool *tool, perf_event__handler_
}
e->idx = sid->idx;
- e->cpu = sid->cpu;
+ e->cpu = sid->cpu.cpu;
e->tid = sid->tid;
}
}
@@ -1784,6 +1784,25 @@ int __machine__synthesize_threads(struct machine *machine, struct perf_tool *too
perf_event__handler_t process, bool needs_mmap,
bool data_mmap, unsigned int nr_threads_synthesize)
{
+ /*
+ * When perf runs in non-root PID namespace, and the namespace's proc FS
+ * is not mounted, nsinfo__is_in_root_namespace() returns false.
+ * In this case, the proc FS is coming for the parent namespace, thus
+ * perf tool will wrongly gather process info from its parent PID
+ * namespace.
+ *
+ * To avoid the confusion that the perf tool runs in a child PID
+ * namespace but it synthesizes thread info from its parent PID
+ * namespace, returns failure with warning.
+ */
+ if (!nsinfo__is_in_root_namespace()) {
+ pr_err("Perf runs in non-root PID namespace but it tries to ");
+ pr_err("gather process info from its parent PID namespace.\n");
+ pr_err("Please mount the proc file system properly, e.g. ");
+ pr_err("add the option '--mount-proc' for unshare command.\n");
+ return -EPERM;
+ }
+
if (target__has_task(target))
return perf_event__synthesize_thread_map(tool, threads, process, machine,
needs_mmap, data_mmap);
@@ -2108,7 +2127,7 @@ int perf_event__synthesize_stat_events(struct perf_stat_config *config, struct p
return err;
}
- err = perf_event__synthesize_cpu_map(tool, evlist->core.cpus, process, NULL);
+ err = perf_event__synthesize_cpu_map(tool, evlist->core.user_requested_cpus, process, NULL);
if (err < 0) {
pr_err("Couldn't synthesize thread map.\n");
return err;
diff --git a/tools/perf/util/synthetic-events.h b/tools/perf/util/synthetic-events.h
index c931433bacbf..78a0450db164 100644
--- a/tools/perf/util/synthetic-events.h
+++ b/tools/perf/util/synthetic-events.h
@@ -6,6 +6,7 @@
#include <sys/types.h> // pid_t
#include <linux/compiler.h>
#include <linux/types.h>
+#include <perf/cpumap.h>
struct auxtrace_record;
struct dso;
@@ -63,7 +64,7 @@ int perf_event__synthesize_sample(union perf_event *event, u64 type, u64 read_fo
int perf_event__synthesize_stat_config(struct perf_tool *tool, struct perf_stat_config *config, perf_event__handler_t process, struct machine *machine);
int perf_event__synthesize_stat_events(struct perf_stat_config *config, struct perf_tool *tool, struct evlist *evlist, perf_event__handler_t process, bool attrs);
int perf_event__synthesize_stat_round(struct perf_tool *tool, u64 time, u64 type, perf_event__handler_t process, struct machine *machine);
-int perf_event__synthesize_stat(struct perf_tool *tool, u32 cpu, u32 thread, u64 id, struct perf_counts_values *count, perf_event__handler_t process, struct machine *machine);
+int perf_event__synthesize_stat(struct perf_tool *tool, struct perf_cpu cpu, u32 thread, u64 id, struct perf_counts_values *count, perf_event__handler_t process, struct machine *machine);
int perf_event__synthesize_thread_map2(struct perf_tool *tool, struct perf_thread_map *threads, perf_event__handler_t process, struct machine *machine);
int perf_event__synthesize_thread_map(struct perf_tool *tool, struct perf_thread_map *threads, perf_event__handler_t process, struct machine *machine, bool needs_mmap, bool mmap_data);
int perf_event__synthesize_threads(struct perf_tool *tool, perf_event__handler_t process, struct machine *machine, bool needs_mmap, bool mmap_data, unsigned int nr_threads_synthesize);
diff --git a/tools/perf/util/tool.h b/tools/perf/util/tool.h
index ef873f2cc38f..f2352dba1875 100644
--- a/tools/perf/util/tool.h
+++ b/tools/perf/util/tool.h
@@ -28,7 +28,8 @@ typedef int (*event_attr_op)(struct perf_tool *tool,
typedef int (*event_op2)(struct perf_session *session, union perf_event *event);
typedef s64 (*event_op3)(struct perf_session *session, union perf_event *event);
-typedef int (*event_op4)(struct perf_session *session, union perf_event *event, u64 data);
+typedef int (*event_op4)(struct perf_session *session, union perf_event *event, u64 data,
+ const char *str);
typedef int (*event_oe)(struct perf_tool *tool, union perf_event *event,
struct ordered_events *oe);
diff --git a/tools/perf/util/top.c b/tools/perf/util/top.c
index 27945eeb0cb5..b8b32431d2f7 100644
--- a/tools/perf/util/top.c
+++ b/tools/perf/util/top.c
@@ -95,15 +95,17 @@ size_t perf_top__header_snprintf(struct perf_top *top, char *bf, size_t size)
if (target->cpu_list)
ret += SNPRINTF(bf + ret, size - ret, ", CPU%s: %s)",
- top->evlist->core.cpus->nr > 1 ? "s" : "",
+ perf_cpu_map__nr(top->evlist->core.user_requested_cpus) > 1
+ ? "s" : "",
target->cpu_list);
else {
if (target->tid)
ret += SNPRINTF(bf + ret, size - ret, ")");
else
ret += SNPRINTF(bf + ret, size - ret, ", %d CPU%s)",
- top->evlist->core.cpus->nr,
- top->evlist->core.cpus->nr > 1 ? "s" : "");
+ perf_cpu_map__nr(top->evlist->core.user_requested_cpus),
+ perf_cpu_map__nr(top->evlist->core.user_requested_cpus) > 1
+ ? "s" : "");
}
perf_top__reset_sample_counters(top);
diff --git a/tools/perf/util/top.h b/tools/perf/util/top.h
index ff8391208ecd..1c2c0a838430 100644
--- a/tools/perf/util/top.h
+++ b/tools/perf/util/top.h
@@ -33,7 +33,10 @@ struct perf_top {
int print_entries, count_filter, delay_secs;
int max_stack;
bool hide_kernel_symbols, hide_user_symbols, zero;
- bool use_tui, use_stdio;
+#ifdef HAVE_SLANG_SUPPORT
+ bool use_tui;
+#endif
+ bool use_stdio;
bool vmlinux_warned;
bool dump_symtab;
bool stitch_lbr;
diff --git a/tools/perf/util/topdown.c b/tools/perf/util/topdown.c
index 1081b20f9891..a369f84ceb6a 100644
--- a/tools/perf/util/topdown.c
+++ b/tools/perf/util/topdown.c
@@ -1,18 +1,24 @@
// SPDX-License-Identifier: GPL-2.0
#include <stdio.h>
#include "pmu.h"
+#include "pmu-hybrid.h"
#include "topdown.h"
-int topdown_filter_events(const char **attr, char **str, bool use_group)
+int topdown_filter_events(const char **attr, char **str, bool use_group,
+ const char *pmu_name)
{
int off = 0;
int i;
int len = 0;
char *s;
+ bool is_hybrid = perf_pmu__is_hybrid(pmu_name);
for (i = 0; attr[i]; i++) {
- if (pmu_have_event("cpu", attr[i])) {
- len += strlen(attr[i]) + 1;
+ if (pmu_have_event(pmu_name, attr[i])) {
+ if (is_hybrid)
+ len += strlen(attr[i]) + strlen(pmu_name) + 3;
+ else
+ len += strlen(attr[i]) + 1;
attr[i - off] = attr[i];
} else
off++;
@@ -30,7 +36,10 @@ int topdown_filter_events(const char **attr, char **str, bool use_group)
if (use_group)
*s++ = '{';
for (i = 0; attr[i]; i++) {
- strcpy(s, attr[i]);
+ if (!is_hybrid)
+ strcpy(s, attr[i]);
+ else
+ sprintf(s, "%s/%s/", pmu_name, attr[i]);
s += strlen(s);
*s++ = ',';
}
diff --git a/tools/perf/util/topdown.h b/tools/perf/util/topdown.h
index 2f0d0b887639..118e75281f93 100644
--- a/tools/perf/util/topdown.h
+++ b/tools/perf/util/topdown.h
@@ -7,6 +7,7 @@ bool arch_topdown_check_group(bool *warn);
void arch_topdown_group_warn(void);
bool arch_topdown_sample_read(struct evsel *leader);
-int topdown_filter_events(const char **attr, char **str, bool use_group);
+int topdown_filter_events(const char **attr, char **str, bool use_group,
+ const char *pmu_name);
#endif
diff --git a/tools/perf/util/trace-event-parse.c b/tools/perf/util/trace-event-parse.c
index 9634f0ae57be..c9c83a40647c 100644
--- a/tools/perf/util/trace-event-parse.c
+++ b/tools/perf/util/trace-event-parse.c
@@ -206,7 +206,7 @@ unsigned long long eval_flag(const char *flag)
if (isdigit(flag[0]))
return strtoull(flag, NULL, 0);
- for (i = 0; i < (int)(sizeof(flags)/sizeof(flags[0])); i++)
+ for (i = 0; i < (int)(ARRAY_SIZE(flags)); i++)
if (strcmp(flags[i].name, flag) == 0)
return flags[i].value;
diff --git a/tools/perf/util/unwind-libdw.c b/tools/perf/util/unwind-libdw.c
index a74b517f7497..94aa40f6e348 100644
--- a/tools/perf/util/unwind-libdw.c
+++ b/tools/perf/util/unwind-libdw.c
@@ -200,7 +200,8 @@ frame_callback(Dwfl_Frame *state, void *arg)
bool isactivation;
if (!dwfl_frame_pc(state, &pc, NULL)) {
- pr_err("%s", dwfl_errmsg(-1));
+ if (!ui->best_effort)
+ pr_err("%s", dwfl_errmsg(-1));
return DWARF_CB_ABORT;
}
@@ -208,7 +209,8 @@ frame_callback(Dwfl_Frame *state, void *arg)
report_module(pc, ui);
if (!dwfl_frame_pc(state, &pc, &isactivation)) {
- pr_err("%s", dwfl_errmsg(-1));
+ if (!ui->best_effort)
+ pr_err("%s", dwfl_errmsg(-1));
return DWARF_CB_ABORT;
}
@@ -222,7 +224,8 @@ frame_callback(Dwfl_Frame *state, void *arg)
int unwind__get_entries(unwind_entry_cb_t cb, void *arg,
struct thread *thread,
struct perf_sample *data,
- int max_stack)
+ int max_stack,
+ bool best_effort)
{
struct unwind_info *ui, ui_buf = {
.sample = data,
@@ -231,6 +234,7 @@ int unwind__get_entries(unwind_entry_cb_t cb, void *arg,
.cb = cb,
.arg = arg,
.max_stack = max_stack,
+ .best_effort = best_effort
};
Dwarf_Word ip;
int err = -EINVAL, i;
diff --git a/tools/perf/util/unwind-libdw.h b/tools/perf/util/unwind-libdw.h
index 0cbd2650e280..8c88bc4f2304 100644
--- a/tools/perf/util/unwind-libdw.h
+++ b/tools/perf/util/unwind-libdw.h
@@ -20,6 +20,7 @@ struct unwind_info {
void *arg;
int max_stack;
int idx;
+ bool best_effort;
struct unwind_entry entries[];
};
diff --git a/tools/perf/util/unwind-libunwind-local.c b/tools/perf/util/unwind-libunwind-local.c
index 71a353349181..6e5b8cce47bf 100644
--- a/tools/perf/util/unwind-libunwind-local.c
+++ b/tools/perf/util/unwind-libunwind-local.c
@@ -96,6 +96,7 @@ struct unwind_info {
struct perf_sample *sample;
struct machine *machine;
struct thread *thread;
+ bool best_effort;
};
#define dw_read(ptr, type, end) ({ \
@@ -168,29 +169,63 @@ static int __dw_read_encoded_value(u8 **p, u8 *end, u64 *val,
__v; \
})
-static u64 elf_section_offset(int fd, const char *name)
+static int elf_section_address_and_offset(int fd, const char *name, u64 *address, u64 *offset)
{
Elf *elf;
GElf_Ehdr ehdr;
GElf_Shdr shdr;
- u64 offset = 0;
+ int ret = -1;
elf = elf_begin(fd, PERF_ELF_C_READ_MMAP, NULL);
if (elf == NULL)
+ return -1;
+
+ if (gelf_getehdr(elf, &ehdr) == NULL)
+ goto out_err;
+
+ if (!elf_section_by_name(elf, &ehdr, &shdr, name, NULL))
+ goto out_err;
+
+ *address = shdr.sh_addr;
+ *offset = shdr.sh_offset;
+ ret = 0;
+out_err:
+ elf_end(elf);
+ return ret;
+}
+
+#ifndef NO_LIBUNWIND_DEBUG_FRAME
+static u64 elf_section_offset(int fd, const char *name)
+{
+ u64 address, offset;
+
+ if (elf_section_address_and_offset(fd, name, &address, &offset))
return 0;
- do {
- if (gelf_getehdr(elf, &ehdr) == NULL)
- break;
+ return offset;
+}
+#endif
- if (!elf_section_by_name(elf, &ehdr, &shdr, name, NULL))
- break;
+static u64 elf_base_address(int fd)
+{
+ Elf *elf = elf_begin(fd, PERF_ELF_C_READ_MMAP, NULL);
+ GElf_Phdr phdr;
+ u64 retval = 0;
+ size_t i, phdrnum = 0;
- offset = shdr.sh_offset;
- } while (0);
+ if (elf == NULL)
+ return 0;
+ (void)elf_getphdrnum(elf, &phdrnum);
+ /* PT_LOAD segments are sorted by p_vaddr, so the first has the minimum p_vaddr. */
+ for (i = 0; i < phdrnum; i++) {
+ if (gelf_getphdr(elf, i, &phdr) && phdr.p_type == PT_LOAD) {
+ retval = phdr.p_vaddr & -getpagesize();
+ break;
+ }
+ }
elf_end(elf);
- return offset;
+ return retval;
}
#ifndef NO_LIBUNWIND_DEBUG_FRAME
@@ -247,8 +282,7 @@ struct eh_frame_hdr {
} __packed;
static int unwind_spec_ehframe(struct dso *dso, struct machine *machine,
- u64 offset, u64 *table_data, u64 *segbase,
- u64 *fde_count)
+ u64 offset, u64 *table_data_offset, u64 *fde_count)
{
struct eh_frame_hdr hdr;
u8 *enc = (u8 *) &hdr.enc;
@@ -264,35 +298,47 @@ static int unwind_spec_ehframe(struct dso *dso, struct machine *machine,
dw_read_encoded_value(enc, end, hdr.eh_frame_ptr_enc);
*fde_count = dw_read_encoded_value(enc, end, hdr.fde_count_enc);
- *segbase = offset;
- *table_data = (enc - (u8 *) &hdr) + offset;
+ *table_data_offset = enc - (u8 *) &hdr;
return 0;
}
-static int read_unwind_spec_eh_frame(struct dso *dso, struct machine *machine,
+static int read_unwind_spec_eh_frame(struct dso *dso, struct unwind_info *ui,
u64 *table_data, u64 *segbase,
u64 *fde_count)
{
- int ret = -EINVAL, fd;
- u64 offset = dso->data.eh_frame_hdr_offset;
+ struct map *map;
+ u64 base_addr = UINT64_MAX;
+ int ret, fd;
- if (offset == 0) {
- fd = dso__data_get_fd(dso, machine);
+ if (dso->data.eh_frame_hdr_offset == 0) {
+ fd = dso__data_get_fd(dso, ui->machine);
if (fd < 0)
return -EINVAL;
/* Check the .eh_frame section for unwinding info */
- offset = elf_section_offset(fd, ".eh_frame_hdr");
- dso->data.eh_frame_hdr_offset = offset;
+ ret = elf_section_address_and_offset(fd, ".eh_frame_hdr",
+ &dso->data.eh_frame_hdr_addr,
+ &dso->data.eh_frame_hdr_offset);
+ dso->data.elf_base_addr = elf_base_address(fd);
dso__data_put_fd(dso);
+ if (ret || dso->data.eh_frame_hdr_offset == 0)
+ return -EINVAL;
}
- if (offset)
- ret = unwind_spec_ehframe(dso, machine, offset,
- table_data, segbase,
- fde_count);
-
- return ret;
+ maps__for_each_entry(ui->thread->maps, map) {
+ if (map->dso == dso && map->start < base_addr)
+ base_addr = map->start;
+ }
+ base_addr -= dso->data.elf_base_addr;
+ /* Address of .eh_frame_hdr */
+ *segbase = base_addr + dso->data.eh_frame_hdr_addr;
+ ret = unwind_spec_ehframe(dso, ui->machine, dso->data.eh_frame_hdr_offset,
+ table_data, fde_count);
+ if (ret)
+ return ret;
+ /* binary_search_table offset plus .eh_frame_hdr address */
+ *table_data += *segbase;
+ return 0;
}
#ifndef NO_LIBUNWIND_DEBUG_FRAME
@@ -387,14 +433,14 @@ find_proc_info(unw_addr_space_t as, unw_word_t ip, unw_proc_info_t *pi,
pr_debug("unwind: find_proc_info dso %s\n", map->dso->name);
/* Check the .eh_frame section for unwinding info */
- if (!read_unwind_spec_eh_frame(map->dso, ui->machine,
+ if (!read_unwind_spec_eh_frame(map->dso, ui,
&table_data, &segbase, &fde_count)) {
memset(&di, 0, sizeof(di));
di.format = UNW_INFO_FORMAT_REMOTE_TABLE;
di.start_ip = map->start;
di.end_ip = map->end;
- di.u.rti.segbase = map->start + segbase - map->pgoff;
- di.u.rti.table_data = map->start + table_data - map->pgoff;
+ di.u.rti.segbase = segbase;
+ di.u.rti.table_data = table_data;
di.u.rti.table_len = fde_count * sizeof(struct table_entry)
/ sizeof(unw_word_t);
ret = dwarf_search_unwind_table(as, ip, &di, pi,
@@ -553,7 +599,8 @@ static int access_reg(unw_addr_space_t __maybe_unused as,
ret = perf_reg_value(&val, &ui->sample->user_regs, id);
if (ret) {
- pr_err("unwind: can't read reg %d\n", regnum);
+ if (!ui->best_effort)
+ pr_err("unwind: can't read reg %d\n", regnum);
return ret;
}
@@ -666,7 +713,7 @@ static int get_entries(struct unwind_info *ui, unwind_entry_cb_t cb,
return -1;
ret = unw_init_remote(&c, addr_space, ui);
- if (ret)
+ if (ret && !ui->best_effort)
display_error(ret);
while (!ret && (unw_step(&c) > 0) && i < max_stack) {
@@ -704,12 +751,14 @@ static int get_entries(struct unwind_info *ui, unwind_entry_cb_t cb,
static int _unwind__get_entries(unwind_entry_cb_t cb, void *arg,
struct thread *thread,
- struct perf_sample *data, int max_stack)
+ struct perf_sample *data, int max_stack,
+ bool best_effort)
{
struct unwind_info ui = {
.sample = data,
.thread = thread,
.machine = thread->maps->machine,
+ .best_effort = best_effort
};
if (!data->user_regs.regs)
diff --git a/tools/perf/util/unwind-libunwind.c b/tools/perf/util/unwind-libunwind.c
index e89a5479b361..509c287ee762 100644
--- a/tools/perf/util/unwind-libunwind.c
+++ b/tools/perf/util/unwind-libunwind.c
@@ -80,9 +80,11 @@ void unwind__finish_access(struct maps *maps)
int unwind__get_entries(unwind_entry_cb_t cb, void *arg,
struct thread *thread,
- struct perf_sample *data, int max_stack)
+ struct perf_sample *data, int max_stack,
+ bool best_effort)
{
if (thread->maps->unwind_libunwind_ops)
- return thread->maps->unwind_libunwind_ops->get_entries(cb, arg, thread, data, max_stack);
+ return thread->maps->unwind_libunwind_ops->get_entries(cb, arg, thread, data,
+ max_stack, best_effort);
return 0;
}
diff --git a/tools/perf/util/unwind.h b/tools/perf/util/unwind.h
index ab8ad469c8de..b2a03fa5289b 100644
--- a/tools/perf/util/unwind.h
+++ b/tools/perf/util/unwind.h
@@ -23,13 +23,19 @@ struct unwind_libunwind_ops {
void (*finish_access)(struct maps *maps);
int (*get_entries)(unwind_entry_cb_t cb, void *arg,
struct thread *thread,
- struct perf_sample *data, int max_stack);
+ struct perf_sample *data, int max_stack, bool best_effort);
};
#ifdef HAVE_DWARF_UNWIND_SUPPORT
+/*
+ * When best_effort is set, don't report errors and fail silently. This could
+ * be expanded in the future to be more permissive about things other than
+ * error messages.
+ */
int unwind__get_entries(unwind_entry_cb_t cb, void *arg,
struct thread *thread,
- struct perf_sample *data, int max_stack);
+ struct perf_sample *data, int max_stack,
+ bool best_effort);
/* libunwind specific */
#ifdef HAVE_LIBUNWIND_SUPPORT
#ifndef LIBUNWIND__ARCH_REG_ID
@@ -65,7 +71,8 @@ unwind__get_entries(unwind_entry_cb_t cb __maybe_unused,
void *arg __maybe_unused,
struct thread *thread __maybe_unused,
struct perf_sample *data __maybe_unused,
- int max_stack __maybe_unused)
+ int max_stack __maybe_unused,
+ bool best_effort __maybe_unused)
{
return 0;
}
diff --git a/tools/perf/util/util.c b/tools/perf/util/util.c
index df3c4671be72..eeb83c80f458 100644
--- a/tools/perf/util/util.c
+++ b/tools/perf/util/util.c
@@ -416,3 +416,54 @@ char *perf_exe(char *buf, int len)
}
return strcpy(buf, "perf");
}
+
+void perf_debuginfod_setup(struct perf_debuginfod *di)
+{
+ /*
+ * By default '!di->set' we clear DEBUGINFOD_URLS, so debuginfod
+ * processing is not triggered, otherwise we set it to 'di->urls'
+ * value. If 'di->urls' is "system" we keep DEBUGINFOD_URLS value.
+ */
+ if (!di->set)
+ setenv("DEBUGINFOD_URLS", "", 1);
+ else if (di->urls && strcmp(di->urls, "system"))
+ setenv("DEBUGINFOD_URLS", di->urls, 1);
+
+ pr_debug("DEBUGINFOD_URLS=%s\n", getenv("DEBUGINFOD_URLS"));
+
+#ifndef HAVE_DEBUGINFOD_SUPPORT
+ if (di->set)
+ pr_warning("WARNING: debuginfod support requested, but perf is not built with it\n");
+#endif
+}
+
+/*
+ * Return a new filename prepended with task's root directory if it's in
+ * a chroot. Callers should free the returned string.
+ */
+char *filename_with_chroot(int pid, const char *filename)
+{
+ char buf[PATH_MAX];
+ char proc_root[32];
+ char *new_name = NULL;
+ int ret;
+
+ scnprintf(proc_root, sizeof(proc_root), "/proc/%d/root", pid);
+ ret = readlink(proc_root, buf, sizeof(buf) - 1);
+ if (ret <= 0)
+ return NULL;
+
+ /* readlink(2) does not append a null byte to buf */
+ buf[ret] = '\0';
+
+ if (!strcmp(buf, "/"))
+ return NULL;
+
+ if (strstr(buf, "(deleted)"))
+ return NULL;
+
+ if (asprintf(&new_name, "%s/%s", buf, filename) < 0)
+ return NULL;
+
+ return new_name;
+}
diff --git a/tools/perf/util/util.h b/tools/perf/util/util.h
index 9f0d36ba77f2..0f78f1e7782d 100644
--- a/tools/perf/util/util.h
+++ b/tools/perf/util/util.h
@@ -11,6 +11,9 @@
#include <stddef.h>
#include <linux/compiler.h>
#include <sys/types.h>
+#ifndef __cplusplus
+#include <internal/cpumap.h>
+#endif
/* General helper functions */
void usage(const char *err) __noreturn;
@@ -66,6 +69,14 @@ extern bool test_attr__enabled;
void test_attr__ready(void);
void test_attr__init(void);
struct perf_event_attr;
-void test_attr__open(struct perf_event_attr *attr, pid_t pid, int cpu,
+void test_attr__open(struct perf_event_attr *attr, pid_t pid, struct perf_cpu cpu,
int fd, int group_fd, unsigned long flags);
+
+struct perf_debuginfod {
+ const char *urls;
+ bool set;
+};
+void perf_debuginfod_setup(struct perf_debuginfod *di);
+
+char *filename_with_chroot(int pid, const char *filename);
#endif /* GIT_COMPAT_UTIL_H */