aboutsummaryrefslogtreecommitdiffstats
path: root/tools/perf/util
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2022-10-11 15:02:25 -0700
committerLinus Torvalds <torvalds@linux-foundation.org>2022-10-11 15:02:25 -0700
commitd465bff130bf4ca17b6980abe51164ace1e0cba4 (patch)
treecf63ff5514126f777e8c1f727bb832b33d07505e /tools/perf/util
parentMerge tag 'pci-v6.1-changes' of git://git.kernel.org/pub/scm/linux/kernel/git/helgaas/pci (diff)
parentperf script: Add missing fields in usage hint (diff)
downloadlinux-dev-d465bff130bf4ca17b6980abe51164ace1e0cba4.tar.xz
linux-dev-d465bff130bf4ca17b6980abe51164ace1e0cba4.zip
Merge tag 'perf-tools-for-v6.1-1-2022-10-07' of git://git.kernel.org/pub/scm/linux/kernel/git/acme/linux
Pull perf tools updates from Arnaldo Carvalho de Melo: - Add support for AMD on 'perf mem' and 'perf c2c', the kernel enablement patches went via tip. Example: $ sudo perf mem record -- -c 10000 ^C[ perf record: Woken up 227 times to write data ] [ perf record: Captured and wrote 58.760 MB perf.data (836978 samples) ] $ sudo perf mem report -F mem,sample,snoop Samples: 836K of event 'ibs_op//', Event count (approx.): 8418762 Memory access Samples Snoop N/A 700620 N/A L1 hit 126675 N/A L2 hit 424 N/A L3 hit 664 HitM L3 hit 10 N/A Local RAM hit 2 N/A Remote RAM (1 hop) hit 8558 N/A Remote Cache (1 hop) hit 3 N/A Remote Cache (1 hop) hit 2 HitM Remote Cache (2 hops) hit 10 HitM Remote Cache (2 hops) hit 6 N/A Uncached hit 4 N/A $ - "perf lock" improvements: - Add -E/--entries option to limit the number of entries to display, say to ask for just the top 5 contended locks. - Add -q/--quiet option to suppress header and debug messages. - Add a 'perf test' kernel lock contention entry to test 'perf lock'. - "perf lock contention" improvements: - Ask BPF's bpf_get_stackid() to skip some callchain entries. The ones closer to the tooling are bpf related and not that interesting, the ones calling the locking function are the ones we're interested in, example of a full, unskipped callstack: - Allow changing the callstack depth and number of entries to skip. 1 10.74 us 10.74 us 10.74 us spinlock __bpf_trace_contention_begin+0xb 0xffffffffc03b5c47 bpf_prog_bf07ae9e2cbd02c5_contention_begin+0x117 0xffffffffc03b5c47 bpf_prog_bf07ae9e2cbd02c5_contention_begin+0x117 0xffffffffbb8b8e75 bpf_trace_run2+0x35 0xffffffffbb7eab9b __bpf_trace_contention_begin+0xb 0xffffffffbb7ebe75 queued_spin_lock_slowpath+0x1f5 0xffffffffbc1c26ff _raw_spin_lock+0x1f 0xffffffffbb841015 tick_do_update_jiffies64+0x25 0xffffffffbb8409ee tick_irq_enter+0x9e - Show full callstack in verbose mode (-v option), sometimes this is desirable instead of showing just one callstack entry. - Allow multiple time ranges in 'perf record --delay' to help in reducing the amount of data collected from hardware tracing (Intel PT, etc) when there is a rough idea of periods of time where events of interest take time. - Add Intel PT to record only decoder debug messages when error happens. - Improve layout of Intel PT man page. - Add new branch types: alignment, data and inst faults and arch specific ones, such as fiq, debug_halt, debug_exit, debug_inst and debug_data on arm64. Kernel enablement went thru the tip tree. - Fix 'perf probe' error log check in 'perf test' when no debuginfo is available. - Fix 'perf stat' aggregation mode logic, it should be looking at the CPU not at the core number. - Fix flags parsing in 'perf trace' filters. - Introduce compact encoding of CPU range encoding on perf.data, to avoid having a bitmap with all the CPUs. - Improvements to the 'perf stat' metrics, including adding "core_wide", and computing "smt" from the CPU topology. - Add support to the new PERF_FORMAT_LOST perf_event_attr.read_format, that allows tooling to ask for the precise number of lost samples for a given event. - Add 'addr' sort key to see just the address of sampled instructions: $ perf record -o- true | perf report -i- -s addr [ perf record: Woken up 1 times to write data ] [ perf record: Captured and wrote 0.000 MB - ] # Samples: 12 of event 'cycles:u' # Event count (approx.): 252512 # # Overhead Address # ........ .................. 42.96% 0x7f96f08443d7 29.55% 0x7f96f0859b50 14.76% 0x7f96f0852e02 8.30% 0x7f96f0855028 4.43% 0xffffffff8de01087 perf annotate: Toggle full address <-> offset display - Add 'f' hotkey to the 'perf annotate' TUI interface when in 'disassembler output' mode ('o' hotkey) to toggle showing full virtual address or just the offset. - Cache DSO build-ids when synthesizing PERF_RECORD_MMAP records for pre-existing threads, at the start of a 'perf record' session, speeding up that record startup phase. - Add a command line option to specify build ids in 'perf inject'. - Update JSON event files for the Intel alderlake, broadwell, broadwellde, broadwellx, cascadelakex, haswell, haswellx, icelake, icelakex, ivybridge, ivytown, jaketown, sandybridge, sapphirerapids, skylake, skylakex, and tigerlake processors. - Update vendor JSON event files for the ARM Neoverse V1 and E1 platforms. - Add a 'perf test' entry for 'perf mem' where a struct has false sharing and this gets detected in the 'perf mem' output, tested with Intel, AMD and ARM64 systems. - Add a 'perf test' entry to test the resolution of java symbols, where an output like this is expected: 8.18% jshell jitted-50116-29.so [.] Interpreter 0.75% Thread-1 jitted-83602-1670.so [.] jdk.internal.jimage.BasicImageReader.getString(int) - Add tests for the ARM64 CoreSight hardware tracing feature, with specially crafted pureloop, memcpy, thread loop and unroll tread that then gets traced and the output compared with expected output. Documentation explaining it is also included. - Add per thread Intel PT 'perf test' entry to check that PERF_RECORD_TEXT_POKE events are recorded per CPU, resulting in a mixture of per thread and per CPU events and mmaps, verify that this gets all recorded correctly. - Introduce pthread mutex wrappers to allow for building with clang's -Wthread-safety, i.e. using the "guarded_by" "pt_guarded_by" "lockable", "exclusive_lock_function", "exclusive_trylock_function", "exclusive_locks_required", and "no_thread_safety_analysis" compiler function attributes. - Fix empty version number when building outside of a git repo. - Improve feature detection display when multiple versions of a feature are present, such as for binutils libbfd, that has a mix of possible ways to detect according to the Linux distribution. Previously in some cases we had: Auto-detecting system features <SNIP> ... libbfd: [ on ] ... libbfd-liberty: [ on ] ... libbfd-liberty-z: [ on ] <SNIP> Now for this case we show just the main feature: Auto-detecting system features <SNIP> ... libbfd: [ on ] <SNIP> - Remove some unused structs, variables, macros, function prototypes and includes from various places. * tag 'perf-tools-for-v6.1-1-2022-10-07' of git://git.kernel.org/pub/scm/linux/kernel/git/acme/linux: (169 commits) perf script: Add missing fields in usage hint perf mem: Print "LFB/MAB" for PERF_MEM_LVLNUM_LFB perf mem/c2c: Avoid printing empty lines for unsupported events perf mem/c2c: Add load store event mappings for AMD perf mem/c2c: Set PERF_SAMPLE_WEIGHT for LOAD_STORE events perf mem: Add support for printing PERF_MEM_LVLNUM_{CXL|IO} perf amd ibs: Sync arch/x86/include/asm/amd-ibs.h header with the kernel tools headers UAPI: Sync include/uapi/linux/perf_event.h header with the kernel perf stat: Fix cpu check to use id.cpu.cpu in aggr_printout() perf test coresight: Add relevant documentation about ARM64 CoreSight testing perf test: Add git ignore for tmp and output files of ARM CoreSight tests perf test coresight: Add unroll thread test shell script perf test coresight: Add unroll thread test tool perf test coresight: Add thread loop test shell scripts perf test coresight: Add thread loop test tool perf test coresight: Add memcpy thread test shell script perf test coresight: Add memcpy thread test tool perf test: Add git ignore for perf data generated by the ARM CoreSight tests perf test: Add arm64 asm pureloop test shell script perf test: Add asm pureloop test tool ...
Diffstat (limited to 'tools/perf/util')
-rw-r--r--tools/perf/util/Build1
-rwxr-xr-xtools/perf/util/PERF-VERSION-GEN10
-rw-r--r--tools/perf/util/annotate.c34
-rw-r--r--tools/perf/util/annotate.h8
-rw-r--r--tools/perf/util/auxtrace.c13
-rw-r--r--tools/perf/util/auxtrace.h3
-rw-r--r--tools/perf/util/bpf-event.h1
-rw-r--r--tools/perf/util/bpf-loader.c6
-rw-r--r--tools/perf/util/bpf_lock_contention.c26
-rw-r--r--tools/perf/util/bpf_skel/lock_contention.bpf.c5
-rw-r--r--tools/perf/util/branch.c70
-rw-r--r--tools/perf/util/branch.h7
-rw-r--r--tools/perf/util/build-id.c12
-rw-r--r--tools/perf/util/callchain.c12
-rw-r--r--tools/perf/util/config.c31
-rw-r--r--tools/perf/util/config.h1
-rw-r--r--tools/perf/util/cpumap.c39
-rw-r--r--tools/perf/util/cpumap.h2
-rw-r--r--tools/perf/util/cputopo.c61
-rw-r--r--tools/perf/util/cputopo.h5
-rw-r--r--tools/perf/util/dso.c19
-rw-r--r--tools/perf/util/dso.h4
-rw-r--r--tools/perf/util/events_stats.h1
-rw-r--r--tools/perf/util/evlist.c316
-rw-r--r--tools/perf/util/evlist.h13
-rw-r--r--tools/perf/util/evsel.c30
-rw-r--r--tools/perf/util/evsel.h1
-rw-r--r--tools/perf/util/expr.c40
-rw-r--r--tools/perf/util/expr.h25
-rw-r--r--tools/perf/util/expr.l6
-rw-r--r--tools/perf/util/expr.y2
-rw-r--r--tools/perf/util/genelf.c1
-rw-r--r--tools/perf/util/header.c24
-rw-r--r--tools/perf/util/hist.c22
-rw-r--r--tools/perf/util/hist.h6
-rw-r--r--tools/perf/util/intel-pt-decoder/intel-pt-log.c117
-rw-r--r--tools/perf/util/intel-pt-decoder/intel-pt-log.h3
-rw-r--r--tools/perf/util/intel-pt.c23
-rw-r--r--tools/perf/util/jitdump.c7
-rw-r--r--tools/perf/util/lock-contention.h5
-rw-r--r--tools/perf/util/machine.c4
-rw-r--r--tools/perf/util/map.c3
-rw-r--r--tools/perf/util/mem-events.c17
-rw-r--r--tools/perf/util/metricgroup.c145
-rw-r--r--tools/perf/util/metricgroup.h4
-rw-r--r--tools/perf/util/mmap.h1
-rw-r--r--tools/perf/util/mutex.c119
-rw-r--r--tools/perf/util/mutex.h108
-rw-r--r--tools/perf/util/parse-branch-options.c1
-rw-r--r--tools/perf/util/parse-events.c8
-rw-r--r--tools/perf/util/perf_event_attr_fprintf.c4
-rw-r--r--tools/perf/util/pmu.c2
-rw-r--r--tools/perf/util/pmu.y2
-rw-r--r--tools/perf/util/probe-event.c3
-rw-r--r--tools/perf/util/session.c7
-rw-r--r--tools/perf/util/smt.c110
-rw-r--r--tools/perf/util/smt.h19
-rw-r--r--tools/perf/util/sort.c38
-rw-r--r--tools/perf/util/sort.h3
-rw-r--r--tools/perf/util/stat-display.c36
-rw-r--r--tools/perf/util/stat-shadow.c338
-rw-r--r--tools/perf/util/stat.c29
-rw-r--r--tools/perf/util/stat.h12
-rw-r--r--tools/perf/util/string.c1
-rw-r--r--tools/perf/util/symbol.c4
-rw-r--r--tools/perf/util/synthetic-events.c184
-rw-r--r--tools/perf/util/top.h5
67 files changed, 1557 insertions, 662 deletions
diff --git a/tools/perf/util/Build b/tools/perf/util/Build
index 485e1a343165..815d235466d0 100644
--- a/tools/perf/util/Build
+++ b/tools/perf/util/Build
@@ -143,6 +143,7 @@ perf-y += branch.o
perf-y += mem2node.o
perf-y += clockid.o
perf-y += list_sort.o
+perf-y += mutex.o
perf-$(CONFIG_LIBBPF) += bpf-loader.o
perf-$(CONFIG_LIBBPF) += bpf_map.o
diff --git a/tools/perf/util/PERF-VERSION-GEN b/tools/perf/util/PERF-VERSION-GEN
index 0ee5af529238..3cc42821d9b3 100755
--- a/tools/perf/util/PERF-VERSION-GEN
+++ b/tools/perf/util/PERF-VERSION-GEN
@@ -11,7 +11,8 @@ LF='
'
#
-# Always try first to get the version from the kernel Makefile
+# Use version from kernel Makefile unless not in a git repository and
+# PERF-VERSION-FILE exists
#
CID=
TAG=
@@ -19,9 +20,14 @@ if test -d ../../.git -o -f ../../.git
then
TAG=$(MAKEFLAGS= make -sC ../.. kernelversion)
CID=$(git log -1 --abbrev=12 --pretty=format:"%h" 2>/dev/null) && CID="-g$CID"
-else
+elif test -f ../../PERF-VERSION-FILE
+then
TAG=$(cut -d' ' -f3 ../../PERF-VERSION-FILE | sed -e 's/\"//g')
fi
+if test -z "$TAG"
+then
+ TAG=$(MAKEFLAGS= make -sC ../.. kernelversion)
+fi
VN="$TAG$CID"
if test -n "$CID"
diff --git a/tools/perf/util/annotate.c b/tools/perf/util/annotate.c
index 2c6a485c3de5..db475e44f42f 100644
--- a/tools/perf/util/annotate.c
+++ b/tools/perf/util/annotate.c
@@ -35,7 +35,6 @@
#include "arch/common.h"
#include "namespaces.h"
#include <regex.h>
-#include <pthread.h>
#include <linux/bitops.h>
#include <linux/kernel.h>
#include <linux/string.h>
@@ -821,7 +820,7 @@ void symbol__annotate_zero_histograms(struct symbol *sym)
{
struct annotation *notes = symbol__annotation(sym);
- pthread_mutex_lock(&notes->lock);
+ mutex_lock(&notes->lock);
if (notes->src != NULL) {
memset(notes->src->histograms, 0,
notes->src->nr_histograms * notes->src->sizeof_sym_hist);
@@ -829,7 +828,7 @@ void symbol__annotate_zero_histograms(struct symbol *sym)
memset(notes->src->cycles_hist, 0,
symbol__size(sym) * sizeof(struct cyc_hist));
}
- pthread_mutex_unlock(&notes->lock);
+ mutex_unlock(&notes->lock);
}
static int __symbol__account_cycles(struct cyc_hist *ch,
@@ -1086,7 +1085,7 @@ void annotation__compute_ipc(struct annotation *notes, size_t size)
notes->hit_insn = 0;
notes->cover_insn = 0;
- pthread_mutex_lock(&notes->lock);
+ mutex_lock(&notes->lock);
for (offset = size - 1; offset >= 0; --offset) {
struct cyc_hist *ch;
@@ -1105,7 +1104,7 @@ void annotation__compute_ipc(struct annotation *notes, size_t size)
notes->have_cycles = true;
}
}
- pthread_mutex_unlock(&notes->lock);
+ mutex_unlock(&notes->lock);
}
int addr_map_symbol__inc_samples(struct addr_map_symbol *ams, struct perf_sample *sample,
@@ -1258,13 +1257,13 @@ int disasm_line__scnprintf(struct disasm_line *dl, char *bf, size_t size, bool r
void annotation__init(struct annotation *notes)
{
- pthread_mutex_init(&notes->lock, NULL);
+ mutex_init(&notes->lock);
}
void annotation__exit(struct annotation *notes)
{
annotated_source__delete(notes->src);
- pthread_mutex_destroy(&notes->lock);
+ mutex_destroy(&notes->lock);
}
static void annotation_line__add(struct annotation_line *al, struct list_head *head)
@@ -1698,6 +1697,7 @@ fallback:
*/
__symbol__join_symfs(filename, filename_size, dso->long_name);
+ mutex_lock(&dso->lock);
if (access(filename, R_OK) && errno == ENOENT && dso->nsinfo) {
char *new_name = filename_with_chroot(dso->nsinfo->pid,
filename);
@@ -1706,6 +1706,7 @@ fallback:
free(new_name);
}
}
+ mutex_unlock(&dso->lock);
}
free(build_id_path);
@@ -2238,7 +2239,10 @@ int symbol__annotate(struct map_symbol *ms, struct evsel *evsel,
}
args.ms = *ms;
- notes->start = map__rip_2objdump(ms->map, sym->start);
+ if (notes->options && notes->options->full_addr)
+ notes->start = map__objdump_2mem(ms->map, ms->sym->start);
+ else
+ notes->start = map__rip_2objdump(ms->map, ms->sym->start);
return symbol__disassemble(sym, &args);
}
@@ -2761,6 +2765,8 @@ void annotation__update_column_widths(struct annotation *notes)
{
if (notes->options->use_offset)
notes->widths.target = notes->widths.min_addr;
+ else if (notes->options->full_addr)
+ notes->widths.target = BITS_PER_LONG / 4;
else
notes->widths.target = notes->widths.max_addr;
@@ -2770,6 +2776,18 @@ void annotation__update_column_widths(struct annotation *notes)
notes->widths.addr += notes->widths.jumps + 1;
}
+void annotation__toggle_full_addr(struct annotation *notes, struct map_symbol *ms)
+{
+ notes->options->full_addr = !notes->options->full_addr;
+
+ if (notes->options->full_addr)
+ notes->start = map__objdump_2mem(ms->map, ms->sym->start);
+ else
+ notes->start = map__rip_2objdump(ms->map, ms->sym->start);
+
+ annotation__update_column_widths(notes);
+}
+
static void annotation__calc_lines(struct annotation *notes, struct map *map,
struct rb_root *root,
struct annotation_options *opts)
diff --git a/tools/perf/util/annotate.h b/tools/perf/util/annotate.h
index 986f2bbe4870..8934072c39e6 100644
--- a/tools/perf/util/annotate.h
+++ b/tools/perf/util/annotate.h
@@ -8,9 +8,9 @@
#include <linux/types.h>
#include <linux/list.h>
#include <linux/rbtree.h>
-#include <pthread.h>
#include <asm/bug.h>
#include "symbol_conf.h"
+#include "mutex.h"
#include "spark.h"
struct hist_browser_timer;
@@ -88,7 +88,8 @@ struct annotation_options {
show_nr_jumps,
show_minmax_cycle,
show_asm_raw,
- annotate_src;
+ annotate_src,
+ full_addr;
u8 offset_level;
int min_pcnt;
int max_lines;
@@ -273,7 +274,7 @@ struct annotated_source {
};
struct annotation {
- pthread_mutex_t lock;
+ struct mutex lock;
u64 max_coverage;
u64 start;
u64 hit_cycles;
@@ -325,6 +326,7 @@ void annotation__compute_ipc(struct annotation *notes, size_t size);
void annotation__mark_jump_targets(struct annotation *notes, struct symbol *sym);
void annotation__update_column_widths(struct annotation *notes);
void annotation__init_column_widths(struct annotation *notes, struct symbol *sym);
+void annotation__toggle_full_addr(struct annotation *notes, struct map_symbol *ms);
static inline struct sym_hist *annotated_source__histogram(struct annotated_source *src, int idx)
{
diff --git a/tools/perf/util/auxtrace.c b/tools/perf/util/auxtrace.c
index 6edab8a16de6..b59c278fe9ed 100644
--- a/tools/perf/util/auxtrace.c
+++ b/tools/perf/util/auxtrace.c
@@ -26,6 +26,7 @@
#include <linux/list.h>
#include <linux/zalloc.h>
+#include "config.h"
#include "evlist.h"
#include "dso.h"
#include "map.h"
@@ -1434,6 +1435,16 @@ static int get_flags(const char **ptr, unsigned int *plus_flags, unsigned int *m
}
}
+#define ITRACE_DFLT_LOG_ON_ERROR_SZ 16384
+
+static unsigned int itrace_log_on_error_size(void)
+{
+ unsigned int sz = 0;
+
+ perf_config_scan("itrace.debug-log-buffer-size", "%u", &sz);
+ return sz ?: ITRACE_DFLT_LOG_ON_ERROR_SZ;
+}
+
/*
* Please check tools/perf/Documentation/perf-script.txt for information
* about the options parsed here, which is introduced after this cset,
@@ -1532,6 +1543,8 @@ int itrace_do_parse_synth_opts(struct itrace_synth_opts *synth_opts,
if (get_flags(&p, &synth_opts->log_plus_flags,
&synth_opts->log_minus_flags))
goto out_err;
+ if (synth_opts->log_plus_flags & AUXTRACE_LOG_FLG_ON_ERROR)
+ synth_opts->log_on_error_size = itrace_log_on_error_size();
break;
case 'c':
synth_opts->branches = true;
diff --git a/tools/perf/util/auxtrace.h b/tools/perf/util/auxtrace.h
index 6a4fbfd34c6b..cb8e0a01abb6 100644
--- a/tools/perf/util/auxtrace.h
+++ b/tools/perf/util/auxtrace.h
@@ -60,6 +60,7 @@ enum itrace_period_type {
#define AUXTRACE_ERR_FLG_DATA_LOST (1 << ('l' - 'a'))
#define AUXTRACE_LOG_FLG_ALL_PERF_EVTS (1 << ('a' - 'a'))
+#define AUXTRACE_LOG_FLG_ON_ERROR (1 << ('e' - 'a'))
#define AUXTRACE_LOG_FLG_USE_STDOUT (1 << ('o' - 'a'))
/**
@@ -110,6 +111,7 @@ enum itrace_period_type {
* @log_plus_flags: flags to affect what is logged
* @log_minus_flags: flags to affect what is logged
* @quick: quicker (less detailed) decoding
+ * @log_on_error_size: size of log to keep for outputting log only on errors
*/
struct itrace_synth_opts {
bool set;
@@ -155,6 +157,7 @@ struct itrace_synth_opts {
unsigned int log_plus_flags;
unsigned int log_minus_flags;
unsigned int quick;
+ unsigned int log_on_error_size;
};
/**
diff --git a/tools/perf/util/bpf-event.h b/tools/perf/util/bpf-event.h
index 144a8a24cc69..1bcbd4fb6c66 100644
--- a/tools/perf/util/bpf-event.h
+++ b/tools/perf/util/bpf-event.h
@@ -4,7 +4,6 @@
#include <linux/compiler.h>
#include <linux/rbtree.h>
-#include <pthread.h>
#include <api/fd/array.h>
#include <stdio.h>
diff --git a/tools/perf/util/bpf-loader.c b/tools/perf/util/bpf-loader.c
index e2052f4fed33..d657594894cf 100644
--- a/tools/perf/util/bpf-loader.c
+++ b/tools/perf/util/bpf-loader.c
@@ -27,7 +27,11 @@
#include "util.h"
#include "llvm-utils.h"
#include "c++/clang-c.h"
-#include "hashmap.h"
+#ifdef HAVE_LIBBPF_SUPPORT
+#include <bpf/hashmap.h>
+#else
+#include "util/hashmap.h"
+#endif
#include "asm/bug.h"
#include <internal/xyarray.h>
diff --git a/tools/perf/util/bpf_lock_contention.c b/tools/perf/util/bpf_lock_contention.c
index c591a66733ef..fc4d613cb979 100644
--- a/tools/perf/util/bpf_lock_contention.c
+++ b/tools/perf/util/bpf_lock_contention.c
@@ -8,17 +8,13 @@
#include "util/thread_map.h"
#include "util/lock-contention.h"
#include <linux/zalloc.h>
+#include <linux/string.h>
#include <bpf/bpf.h>
#include "bpf_skel/lock_contention.skel.h"
static struct lock_contention_bpf *skel;
-/* should be same as bpf_skel/lock_contention.bpf.c */
-struct lock_contention_key {
- s32 stack_id;
-};
-
struct lock_contention_data {
u64 total_time;
u64 min_time;
@@ -40,6 +36,7 @@ int lock_contention_prepare(struct lock_contention *con)
return -1;
}
+ bpf_map__set_value_size(skel->maps.stacks, con->max_stack * sizeof(u64));
bpf_map__set_max_entries(skel->maps.stacks, con->map_nr_entries);
bpf_map__set_max_entries(skel->maps.lock_stat, con->map_nr_entries);
@@ -91,6 +88,8 @@ int lock_contention_prepare(struct lock_contention *con)
bpf_map_update_elem(fd, &pid, &val, BPF_ANY);
}
+ skel->bss->stack_skip = con->stack_skip;
+
lock_contention_bpf__attach(skel);
return 0;
}
@@ -114,7 +113,7 @@ int lock_contention_read(struct lock_contention *con)
struct lock_contention_data data;
struct lock_stat *st;
struct machine *machine = con->machine;
- u64 stack_trace[CONTENTION_STACK_DEPTH];
+ u64 stack_trace[con->max_stack];
fd = bpf_map__fd(skel->maps.lock_stat);
stack = bpf_map__fd(skel->maps.stacks);
@@ -125,7 +124,7 @@ int lock_contention_read(struct lock_contention *con)
while (!bpf_map_get_next_key(fd, &prev_key, &key)) {
struct map *kmap;
struct symbol *sym;
- int idx;
+ int idx = 0;
bpf_map_lookup_elem(fd, &key, &data);
st = zalloc(sizeof(*st));
@@ -144,10 +143,9 @@ int lock_contention_read(struct lock_contention *con)
bpf_map_lookup_elem(stack, &key, stack_trace);
- /* skip BPF + lock internal functions */
- idx = CONTENTION_STACK_SKIP;
+ /* skip lock internal functions */
while (is_lock_function(machine, stack_trace[idx]) &&
- idx < CONTENTION_STACK_DEPTH - 1)
+ idx < con->max_stack - 1)
idx++;
st->addr = stack_trace[idx];
@@ -171,6 +169,14 @@ int lock_contention_read(struct lock_contention *con)
return -1;
}
+ if (verbose) {
+ st->callstack = memdup(stack_trace, sizeof(stack_trace));
+ if (st->callstack == NULL) {
+ free(st);
+ return -1;
+ }
+ }
+
hlist_add_head(&st->hash_entry, con->result);
prev_key = key;
}
diff --git a/tools/perf/util/bpf_skel/lock_contention.bpf.c b/tools/perf/util/bpf_skel/lock_contention.bpf.c
index 9e8b94eb6320..1bb8628e7c9f 100644
--- a/tools/perf/util/bpf_skel/lock_contention.bpf.c
+++ b/tools/perf/util/bpf_skel/lock_contention.bpf.c
@@ -72,9 +72,10 @@ struct {
int enabled;
int has_cpu;
int has_task;
+int stack_skip;
/* error stat */
-unsigned long lost;
+int lost;
static inline int can_record(void)
{
@@ -117,7 +118,7 @@ int contention_begin(u64 *ctx)
pelem->timestamp = bpf_ktime_get_ns();
pelem->lock = (__u64)ctx[0];
pelem->flags = (__u32)ctx[1];
- pelem->stack_id = bpf_get_stackid(ctx, &stacks, BPF_F_FAST_STACK_CMP);
+ pelem->stack_id = bpf_get_stackid(ctx, &stacks, BPF_F_FAST_STACK_CMP | stack_skip);
if (pelem->stack_id < 0)
lost++;
diff --git a/tools/perf/util/branch.c b/tools/perf/util/branch.c
index a9a909db8cc7..6d38238481d3 100644
--- a/tools/perf/util/branch.c
+++ b/tools/perf/util/branch.c
@@ -21,7 +21,10 @@ void branch_type_count(struct branch_type_stat *st, struct branch_flags *flags,
if (flags->type == PERF_BR_UNKNOWN || from == 0)
return;
- st->counts[flags->type]++;
+ if (flags->type == PERF_BR_EXTEND_ABI)
+ st->new_counts[flags->new_type]++;
+ else
+ st->counts[flags->type]++;
if (flags->type == PERF_BR_COND) {
if (to > from)
@@ -36,6 +39,38 @@ void branch_type_count(struct branch_type_stat *st, struct branch_flags *flags,
st->cross_4k++;
}
+const char *branch_new_type_name(int new_type)
+{
+ const char *branch_new_names[PERF_BR_NEW_MAX] = {
+ "FAULT_ALGN",
+ "FAULT_DATA",
+ "FAULT_INST",
+/*
+ * TODO: This switch should happen on 'session->header.env.arch'
+ * instead, because an arm64 platform perf recording could be
+ * opened for analysis on other platforms as well.
+ */
+#ifdef __aarch64__
+ "ARM64_FIQ",
+ "ARM64_DEBUG_HALT",
+ "ARM64_DEBUG_EXIT",
+ "ARM64_DEBUG_INST",
+ "ARM64_DEBUG_DATA"
+#else
+ "ARCH_1",
+ "ARCH_2",
+ "ARCH_3",
+ "ARCH_4",
+ "ARCH_5"
+#endif
+ };
+
+ if (new_type >= 0 && new_type < PERF_BR_NEW_MAX)
+ return branch_new_names[new_type];
+
+ return NULL;
+}
+
const char *branch_type_name(int type)
{
const char *branch_names[PERF_BR_MAX] = {
@@ -51,7 +86,10 @@ const char *branch_type_name(int type)
"COND_CALL",
"COND_RET",
"ERET",
- "IRQ"
+ "IRQ",
+ "SERROR",
+ "NO_TX",
+ "", // Needed for PERF_BR_EXTEND_ABI that ends up triggering some compiler warnings about NULL deref
};
if (type >= 0 && type < PERF_BR_MAX)
@@ -60,6 +98,17 @@ const char *branch_type_name(int type)
return NULL;
}
+const char *get_branch_type(struct branch_entry *e)
+{
+ if (e->flags.type == PERF_BR_UNKNOWN)
+ return "";
+
+ if (e->flags.type == PERF_BR_EXTEND_ABI)
+ return branch_new_type_name(e->flags.new_type);
+
+ return branch_type_name(e->flags.type);
+}
+
void branch_type_stat_display(FILE *fp, struct branch_type_stat *st)
{
u64 total = 0;
@@ -106,6 +155,15 @@ void branch_type_stat_display(FILE *fp, struct branch_type_stat *st)
100.0 *
(double)st->counts[i] / (double)total);
}
+
+ for (i = 0; i < PERF_BR_NEW_MAX; i++) {
+ if (st->new_counts[i] > 0)
+ fprintf(fp, "\n%8s: %5.1f%%",
+ branch_new_type_name(i),
+ 100.0 *
+ (double)st->new_counts[i] / (double)total);
+ }
+
}
static int count_str_scnprintf(int idx, const char *str, char *bf, int size)
@@ -121,6 +179,9 @@ int branch_type_str(struct branch_type_stat *st, char *bf, int size)
for (i = 0; i < PERF_BR_MAX; i++)
total += st->counts[i];
+ for (i = 0; i < PERF_BR_NEW_MAX; i++)
+ total += st->new_counts[i];
+
if (total == 0)
return 0;
@@ -138,6 +199,11 @@ int branch_type_str(struct branch_type_stat *st, char *bf, int size)
printed += count_str_scnprintf(j++, branch_type_name(i), bf + printed, size - printed);
}
+ for (i = 0; i < PERF_BR_NEW_MAX; i++) {
+ if (st->new_counts[i] > 0)
+ printed += count_str_scnprintf(j++, branch_new_type_name(i), bf + printed, size - printed);
+ }
+
if (st->cross_4k > 0)
printed += count_str_scnprintf(j++, "CROSS_4K", bf + printed, size - printed);
diff --git a/tools/perf/util/branch.h b/tools/perf/util/branch.h
index 17b2ccc61094..f838b23db180 100644
--- a/tools/perf/util/branch.h
+++ b/tools/perf/util/branch.h
@@ -24,7 +24,9 @@ struct branch_flags {
u64 abort:1;
u64 cycles:16;
u64 type:4;
- u64 reserved:40;
+ u64 new_type:4;
+ u64 priv:3;
+ u64 reserved:33;
};
};
};
@@ -72,6 +74,7 @@ static inline struct branch_entry *perf_sample__branch_entries(struct perf_sampl
struct branch_type_stat {
bool branch_to;
u64 counts[PERF_BR_MAX];
+ u64 new_counts[PERF_BR_NEW_MAX];
u64 cond_fwd;
u64 cond_bwd;
u64 cross_4k;
@@ -82,6 +85,8 @@ void branch_type_count(struct branch_type_stat *st, struct branch_flags *flags,
u64 from, u64 to);
const char *branch_type_name(int type);
+const char *branch_new_type_name(int new_type);
+const char *get_branch_type(struct branch_entry *e);
void branch_type_stat_display(FILE *fp, struct branch_type_stat *st);
int branch_type_str(struct branch_type_stat *st, char *bf, int bfsize);
diff --git a/tools/perf/util/build-id.c b/tools/perf/util/build-id.c
index ec18ed5caf3e..a839b30c981b 100644
--- a/tools/perf/util/build-id.c
+++ b/tools/perf/util/build-id.c
@@ -898,11 +898,15 @@ static int filename__read_build_id_ns(const char *filename,
static bool dso__build_id_mismatch(struct dso *dso, const char *name)
{
struct build_id bid;
+ bool ret = false;
- if (filename__read_build_id_ns(name, &bid, dso->nsinfo) < 0)
- return false;
+ mutex_lock(&dso->lock);
+ if (filename__read_build_id_ns(name, &bid, dso->nsinfo) >= 0)
+ ret = !dso__build_id_equal(dso, &bid);
- return !dso__build_id_equal(dso, &bid);
+ mutex_unlock(&dso->lock);
+
+ return ret;
}
static int dso__cache_build_id(struct dso *dso, struct machine *machine,
@@ -941,8 +945,10 @@ static int dso__cache_build_id(struct dso *dso, struct machine *machine,
if (!is_kallsyms && dso__build_id_mismatch(dso, name))
goto out_free;
+ mutex_lock(&dso->lock);
ret = build_id_cache__add_b(&dso->bid, name, dso->nsinfo,
is_kallsyms, is_vdso, proper_name, root_dir);
+ mutex_unlock(&dso->lock);
out_free:
free(allocated_name);
return ret;
diff --git a/tools/perf/util/callchain.c b/tools/perf/util/callchain.c
index 7e663673f79f..a093a15f048f 100644
--- a/tools/perf/util/callchain.c
+++ b/tools/perf/util/callchain.c
@@ -1307,24 +1307,16 @@ int callchain_branch_counts(struct callchain_root *root,
static int count_pri64_printf(int idx, const char *str, u64 value, char *bf, int bfsize)
{
- int printed;
-
- printed = scnprintf(bf, bfsize, "%s%s:%" PRId64 "", (idx) ? " " : " (", str, value);
-
- return printed;
+ return scnprintf(bf, bfsize, "%s%s:%" PRId64 "", (idx) ? " " : " (", str, value);
}
static int count_float_printf(int idx, const char *str, float value,
char *bf, int bfsize, float threshold)
{
- int printed;
-
if (threshold != 0.0 && value < threshold)
return 0;
- printed = scnprintf(bf, bfsize, "%s%s:%.1f%%", (idx) ? " " : " (", str, value);
-
- return printed;
+ return scnprintf(bf, bfsize, "%s%s:%.1f%%", (idx) ? " " : " (", str, value);
}
static int branch_to_str(char *bf, int bfsize,
diff --git a/tools/perf/util/config.c b/tools/perf/util/config.c
index 60ce5908c664..3f2ae19a1dd4 100644
--- a/tools/perf/util/config.c
+++ b/tools/perf/util/config.c
@@ -908,3 +908,34 @@ void set_buildid_dir(const char *dir)
/* for communicating with external commands */
setenv("PERF_BUILDID_DIR", buildid_dir, 1);
}
+
+struct perf_config_scan_data {
+ const char *name;
+ const char *fmt;
+ va_list args;
+ int ret;
+};
+
+static int perf_config_scan_cb(const char *var, const char *value, void *data)
+{
+ struct perf_config_scan_data *d = data;
+
+ if (!strcmp(var, d->name))
+ d->ret = vsscanf(value, d->fmt, d->args);
+
+ return 0;
+}
+
+int perf_config_scan(const char *name, const char *fmt, ...)
+{
+ struct perf_config_scan_data d = {
+ .name = name,
+ .fmt = fmt,
+ };
+
+ va_start(d.args, fmt);
+ perf_config(perf_config_scan_cb, &d);
+ va_end(d.args);
+
+ return d.ret;
+}
diff --git a/tools/perf/util/config.h b/tools/perf/util/config.h
index 2fd77aaff4d2..2e5e808928a5 100644
--- a/tools/perf/util/config.h
+++ b/tools/perf/util/config.h
@@ -29,6 +29,7 @@ typedef int (*config_fn_t)(const char *, const char *, void *);
int perf_default_config(const char *, const char *, void *);
int perf_config(config_fn_t fn, void *);
+int perf_config_scan(const char *name, const char *fmt, ...) __scanf(2, 3);
int perf_config_set(struct perf_config_set *set,
config_fn_t fn, void *data);
int perf_config_int(int *dest, const char *, const char *);
diff --git a/tools/perf/util/cpumap.c b/tools/perf/util/cpumap.c
index ae43fb88f444..8486ca3bec75 100644
--- a/tools/perf/util/cpumap.c
+++ b/tools/perf/util/cpumap.c
@@ -112,12 +112,39 @@ static struct perf_cpu_map *cpu_map__from_mask(const struct perf_record_cpu_map_
}
+static struct perf_cpu_map *cpu_map__from_range(const struct perf_record_cpu_map_data *data)
+{
+ struct perf_cpu_map *map;
+ unsigned int i = 0;
+
+ map = perf_cpu_map__empty_new(data->range_cpu_data.end_cpu -
+ data->range_cpu_data.start_cpu + 1 + data->range_cpu_data.any_cpu);
+ if (!map)
+ return NULL;
+
+ if (data->range_cpu_data.any_cpu)
+ map->map[i++].cpu = -1;
+
+ for (int cpu = data->range_cpu_data.start_cpu; cpu <= data->range_cpu_data.end_cpu;
+ i++, cpu++)
+ map->map[i].cpu = cpu;
+
+ return map;
+}
+
struct perf_cpu_map *cpu_map__new_data(const struct perf_record_cpu_map_data *data)
{
- if (data->type == PERF_CPU_MAP__CPUS)
+ switch (data->type) {
+ case PERF_CPU_MAP__CPUS:
return cpu_map__from_entries(data);
- else
+ case PERF_CPU_MAP__MASK:
return cpu_map__from_mask(data);
+ case PERF_CPU_MAP__RANGE_CPUS:
+ return cpu_map__from_range(data);
+ default:
+ pr_err("cpu_map__new_data unknown type %d\n", data->type);
+ return NULL;
+ }
}
size_t cpu_map__fprintf(struct perf_cpu_map *map, FILE *fp)
@@ -202,7 +229,7 @@ static int aggr_cpu_id__cmp(const void *a_pointer, const void *b_pointer)
else if (a->core != b->core)
return a->core - b->core;
else
- return a->thread - b->thread;
+ return a->thread_idx - b->thread_idx;
}
struct cpu_aggr_map *cpu_aggr_map__new(const struct perf_cpu_map *cpus,
@@ -640,7 +667,7 @@ const struct perf_cpu_map *cpu_map__online(void) /* thread unsafe */
bool aggr_cpu_id__equal(const struct aggr_cpu_id *a, const struct aggr_cpu_id *b)
{
- return a->thread == b->thread &&
+ return a->thread_idx == b->thread_idx &&
a->node == b->node &&
a->socket == b->socket &&
a->die == b->die &&
@@ -650,7 +677,7 @@ bool aggr_cpu_id__equal(const struct aggr_cpu_id *a, const struct aggr_cpu_id *b
bool aggr_cpu_id__is_empty(const struct aggr_cpu_id *a)
{
- return a->thread == -1 &&
+ return a->thread_idx == -1 &&
a->node == -1 &&
a->socket == -1 &&
a->die == -1 &&
@@ -661,7 +688,7 @@ bool aggr_cpu_id__is_empty(const struct aggr_cpu_id *a)
struct aggr_cpu_id aggr_cpu_id__empty(void)
{
struct aggr_cpu_id ret = {
- .thread = -1,
+ .thread_idx = -1,
.node = -1,
.socket = -1,
.die = -1,
diff --git a/tools/perf/util/cpumap.h b/tools/perf/util/cpumap.h
index fa8a5acdcae1..4a6d029576ee 100644
--- a/tools/perf/util/cpumap.h
+++ b/tools/perf/util/cpumap.h
@@ -10,7 +10,7 @@
/** Identify where counts are aggregated, -1 implies not to aggregate. */
struct aggr_cpu_id {
/** A value in the range 0 to number of threads. */
- int thread;
+ int thread_idx;
/** The numa node X as read from /sys/devices/system/node/nodeX. */
int node;
/**
diff --git a/tools/perf/util/cputopo.c b/tools/perf/util/cputopo.c
index d275d843c155..1a3ff6449158 100644
--- a/tools/perf/util/cputopo.c
+++ b/tools/perf/util/cputopo.c
@@ -157,6 +157,67 @@ void cpu_topology__delete(struct cpu_topology *tp)
free(tp);
}
+bool cpu_topology__smt_on(const struct cpu_topology *topology)
+{
+ for (u32 i = 0; i < topology->core_cpus_lists; i++) {
+ const char *cpu_list = topology->core_cpus_list[i];
+
+ /*
+ * If there is a need to separate siblings in a core then SMT is
+ * enabled.
+ */
+ if (strchr(cpu_list, ',') || strchr(cpu_list, '-'))
+ return true;
+ }
+ return false;
+}
+
+bool cpu_topology__core_wide(const struct cpu_topology *topology,
+ const char *user_requested_cpu_list)
+{
+ struct perf_cpu_map *user_requested_cpus;
+
+ /*
+ * If user_requested_cpu_list is empty then all CPUs are recorded and so
+ * core_wide is true.
+ */
+ if (!user_requested_cpu_list)
+ return true;
+
+ user_requested_cpus = perf_cpu_map__new(user_requested_cpu_list);
+ /* Check that every user requested CPU is the complete set of SMT threads on a core. */
+ for (u32 i = 0; i < topology->core_cpus_lists; i++) {
+ const char *core_cpu_list = topology->core_cpus_list[i];
+ struct perf_cpu_map *core_cpus = perf_cpu_map__new(core_cpu_list);
+ struct perf_cpu cpu;
+ int idx;
+ bool has_first, first = true;
+
+ perf_cpu_map__for_each_cpu(cpu, idx, core_cpus) {
+ if (first) {
+ has_first = perf_cpu_map__has(user_requested_cpus, cpu);
+ first = false;
+ } else {
+ /*
+ * If the first core CPU is user requested then
+ * all subsequent CPUs in the core must be user
+ * requested too. If the first CPU isn't user
+ * requested then none of the others must be
+ * too.
+ */
+ if (perf_cpu_map__has(user_requested_cpus, cpu) != has_first) {
+ perf_cpu_map__put(core_cpus);
+ perf_cpu_map__put(user_requested_cpus);
+ return false;
+ }
+ }
+ }
+ perf_cpu_map__put(core_cpus);
+ }
+ perf_cpu_map__put(user_requested_cpus);
+ return true;
+}
+
static bool has_die_topology(void)
{
char filename[MAXPATHLEN];
diff --git a/tools/perf/util/cputopo.h b/tools/perf/util/cputopo.h
index 854e18f9041e..969e5920a00e 100644
--- a/tools/perf/util/cputopo.h
+++ b/tools/perf/util/cputopo.h
@@ -58,6 +58,11 @@ struct hybrid_topology {
struct cpu_topology *cpu_topology__new(void);
void cpu_topology__delete(struct cpu_topology *tp);
+/* Determine from the core list whether SMT was enabled. */
+bool cpu_topology__smt_on(const struct cpu_topology *topology);
+/* Are the sets of SMT siblings all enabled or all disabled in user_requested_cpus. */
+bool cpu_topology__core_wide(const struct cpu_topology *topology,
+ const char *user_requested_cpu_list);
struct numa_topology *numa_topology__new(void);
void numa_topology__delete(struct numa_topology *tp);
diff --git a/tools/perf/util/dso.c b/tools/perf/util/dso.c
index 5ac13958d1bd..f1a14c0ad26d 100644
--- a/tools/perf/util/dso.c
+++ b/tools/perf/util/dso.c
@@ -501,6 +501,7 @@ static int __open_dso(struct dso *dso, struct machine *machine)
if (!name)
return -ENOMEM;
+ mutex_lock(&dso->lock);
if (machine)
root_dir = machine->root_dir;
@@ -541,6 +542,7 @@ static int __open_dso(struct dso *dso, struct machine *machine)
unlink(name);
out:
+ mutex_unlock(&dso->lock);
free(name);
return fd;
}
@@ -559,8 +561,11 @@ static int open_dso(struct dso *dso, struct machine *machine)
int fd;
struct nscookie nsc;
- if (dso->binary_type != DSO_BINARY_TYPE__BUILD_ID_CACHE)
+ if (dso->binary_type != DSO_BINARY_TYPE__BUILD_ID_CACHE) {
+ mutex_lock(&dso->lock);
nsinfo__mountns_enter(dso->nsinfo, &nsc);
+ mutex_unlock(&dso->lock);
+ }
fd = __open_dso(dso, machine);
if (dso->binary_type != DSO_BINARY_TYPE__BUILD_ID_CACHE)
nsinfo__mountns_exit(&nsc);
@@ -795,7 +800,7 @@ dso_cache__free(struct dso *dso)
struct rb_root *root = &dso->data.cache;
struct rb_node *next = rb_first(root);
- pthread_mutex_lock(&dso->lock);
+ mutex_lock(&dso->lock);
while (next) {
struct dso_cache *cache;
@@ -804,7 +809,7 @@ dso_cache__free(struct dso *dso)
rb_erase(&cache->rb_node, root);
free(cache);
}
- pthread_mutex_unlock(&dso->lock);
+ mutex_unlock(&dso->lock);
}
static struct dso_cache *__dso_cache__find(struct dso *dso, u64 offset)
@@ -841,7 +846,7 @@ dso_cache__insert(struct dso *dso, struct dso_cache *new)
struct dso_cache *cache;
u64 offset = new->offset;
- pthread_mutex_lock(&dso->lock);
+ mutex_lock(&dso->lock);
while (*p != NULL) {
u64 end;
@@ -862,7 +867,7 @@ dso_cache__insert(struct dso *dso, struct dso_cache *new)
cache = NULL;
out:
- pthread_mutex_unlock(&dso->lock);
+ mutex_unlock(&dso->lock);
return cache;
}
@@ -1297,7 +1302,7 @@ struct dso *dso__new_id(const char *name, struct dso_id *id)
dso->root = NULL;
INIT_LIST_HEAD(&dso->node);
INIT_LIST_HEAD(&dso->data.open_entry);
- pthread_mutex_init(&dso->lock, NULL);
+ mutex_init(&dso->lock);
refcount_set(&dso->refcnt, 1);
}
@@ -1336,7 +1341,7 @@ void dso__delete(struct dso *dso)
dso__free_a2l(dso);
zfree(&dso->symsrc_filename);
nsinfo__zput(dso->nsinfo);
- pthread_mutex_destroy(&dso->lock);
+ mutex_destroy(&dso->lock);
free(dso);
}
diff --git a/tools/perf/util/dso.h b/tools/perf/util/dso.h
index 66981c7a9a18..58d94175e714 100644
--- a/tools/perf/util/dso.h
+++ b/tools/perf/util/dso.h
@@ -2,7 +2,6 @@
#ifndef __PERF_DSO
#define __PERF_DSO
-#include <pthread.h>
#include <linux/refcount.h>
#include <linux/types.h>
#include <linux/rbtree.h>
@@ -11,6 +10,7 @@
#include <stdio.h>
#include <linux/bitops.h>
#include "build-id.h"
+#include "mutex.h"
struct machine;
struct map;
@@ -145,7 +145,7 @@ struct dso_cache {
struct auxtrace_cache;
struct dso {
- pthread_mutex_t lock;
+ struct mutex lock;
struct list_head node;
struct rb_node rb_node; /* rbtree node sorted by long name */
struct rb_root *root; /* root of rbtree that rb_node is in */
diff --git a/tools/perf/util/events_stats.h b/tools/perf/util/events_stats.h
index 040ab9d0a803..8fecc9fbaecc 100644
--- a/tools/perf/util/events_stats.h
+++ b/tools/perf/util/events_stats.h
@@ -47,6 +47,7 @@ struct hists_stats {
u64 total_non_filtered_period;
u32 nr_samples;
u32 nr_non_filtered_samples;
+ u32 nr_lost_samples;
};
void events_stats__inc(struct events_stats *stats, u32 type);
diff --git a/tools/perf/util/evlist.c b/tools/perf/util/evlist.c
index 48167f3941a6..6612b00949e7 100644
--- a/tools/perf/util/evlist.c
+++ b/tools/perf/util/evlist.c
@@ -15,6 +15,7 @@
#include "target.h"
#include "evlist.h"
#include "evsel.h"
+#include "record.h"
#include "debug.h"
#include "units.h"
#include "bpf_counter.h"
@@ -40,12 +41,14 @@
#include <sys/ioctl.h>
#include <sys/mman.h>
#include <sys/prctl.h>
+#include <sys/timerfd.h>
#include <linux/bitops.h>
#include <linux/hash.h>
#include <linux/log2.h>
#include <linux/err.h>
#include <linux/string.h>
+#include <linux/time64.h>
#include <linux/zalloc.h>
#include <perf/evlist.h>
#include <perf/evsel.h>
@@ -147,6 +150,7 @@ static void evlist__purge(struct evlist *evlist)
void evlist__exit(struct evlist *evlist)
{
+ event_enable_timer__exit(&evlist->eet);
zfree(&evlist->mmap);
zfree(&evlist->overwrite_mmap);
perf_evlist__exit(&evlist->core);
@@ -264,28 +268,6 @@ int evlist__add_dummy(struct evlist *evlist)
return 0;
}
-static void evlist__add_on_all_cpus(struct evlist *evlist, struct evsel *evsel)
-{
- evsel->core.system_wide = true;
-
- /*
- * All CPUs.
- *
- * Note perf_event_open() does not accept CPUs that are not online, so
- * in fact this CPU list will include only all online CPUs.
- */
- perf_cpu_map__put(evsel->core.own_cpus);
- evsel->core.own_cpus = perf_cpu_map__new(NULL);
- perf_cpu_map__put(evsel->core.cpus);
- evsel->core.cpus = perf_cpu_map__get(evsel->core.own_cpus);
-
- /* No threads */
- perf_thread_map__put(evsel->core.threads);
- evsel->core.threads = perf_thread_map__new_dummy();
-
- evlist__add(evlist, evsel);
-}
-
struct evsel *evlist__add_aux_dummy(struct evlist *evlist, bool system_wide)
{
struct evsel *evsel = evlist__dummy_event(evlist);
@@ -298,17 +280,31 @@ struct evsel *evlist__add_aux_dummy(struct evlist *evlist, bool system_wide)
evsel->core.attr.exclude_hv = 1;
evsel->core.attr.freq = 0;
evsel->core.attr.sample_period = 1;
+ evsel->core.system_wide = system_wide;
evsel->no_aux_samples = true;
evsel->name = strdup("dummy:u");
- if (system_wide)
- evlist__add_on_all_cpus(evlist, evsel);
- else
- evlist__add(evlist, evsel);
-
+ evlist__add(evlist, evsel);
return evsel;
}
+struct evsel *evlist__add_sched_switch(struct evlist *evlist, bool system_wide)
+{
+ struct evsel *evsel = evsel__newtp_idx("sched", "sched_switch", 0);
+
+ if (IS_ERR(evsel))
+ return evsel;
+
+ evsel__set_sample_bit(evsel, CPU);
+ evsel__set_sample_bit(evsel, TIME);
+
+ evsel->core.system_wide = system_wide;
+ evsel->no_aux_samples = true;
+
+ evlist__add(evlist, evsel);
+ return evsel;
+};
+
int evlist__add_attrs(struct evlist *evlist, struct perf_event_attr *attrs, size_t nr_attrs)
{
struct evsel *evsel, *n;
@@ -480,7 +476,7 @@ static int evlist__is_enabled(struct evlist *evlist)
return false;
}
-static void __evlist__disable(struct evlist *evlist, char *evsel_name)
+static void __evlist__disable(struct evlist *evlist, char *evsel_name, bool excl_dummy)
{
struct evsel *pos;
struct evlist_cpu_iterator evlist_cpu_itr;
@@ -502,6 +498,8 @@ static void __evlist__disable(struct evlist *evlist, char *evsel_name)
continue;
if (pos->disabled || !evsel__is_group_leader(pos) || !pos->core.fd)
continue;
+ if (excl_dummy && evsel__is_dummy_event(pos))
+ continue;
if (pos->immediate)
has_imm = true;
if (pos->immediate != imm)
@@ -518,6 +516,8 @@ static void __evlist__disable(struct evlist *evlist, char *evsel_name)
continue;
if (!evsel__is_group_leader(pos) || !pos->core.fd)
continue;
+ if (excl_dummy && evsel__is_dummy_event(pos))
+ continue;
pos->disabled = true;
}
@@ -533,15 +533,20 @@ static void __evlist__disable(struct evlist *evlist, char *evsel_name)
void evlist__disable(struct evlist *evlist)
{
- __evlist__disable(evlist, NULL);
+ __evlist__disable(evlist, NULL, false);
+}
+
+void evlist__disable_non_dummy(struct evlist *evlist)
+{
+ __evlist__disable(evlist, NULL, true);
}
void evlist__disable_evsel(struct evlist *evlist, char *evsel_name)
{
- __evlist__disable(evlist, evsel_name);
+ __evlist__disable(evlist, evsel_name, false);
}
-static void __evlist__enable(struct evlist *evlist, char *evsel_name)
+static void __evlist__enable(struct evlist *evlist, char *evsel_name, bool excl_dummy)
{
struct evsel *pos;
struct evlist_cpu_iterator evlist_cpu_itr;
@@ -560,6 +565,8 @@ static void __evlist__enable(struct evlist *evlist, char *evsel_name)
continue;
if (!evsel__is_group_leader(pos) || !pos->core.fd)
continue;
+ if (excl_dummy && evsel__is_dummy_event(pos))
+ continue;
evsel__enable_cpu(pos, evlist_cpu_itr.cpu_map_idx);
}
affinity__cleanup(affinity);
@@ -568,6 +575,8 @@ static void __evlist__enable(struct evlist *evlist, char *evsel_name)
continue;
if (!evsel__is_group_leader(pos) || !pos->core.fd)
continue;
+ if (excl_dummy && evsel__is_dummy_event(pos))
+ continue;
pos->disabled = false;
}
@@ -581,12 +590,17 @@ static void __evlist__enable(struct evlist *evlist, char *evsel_name)
void evlist__enable(struct evlist *evlist)
{
- __evlist__enable(evlist, NULL);
+ __evlist__enable(evlist, NULL, false);
+}
+
+void evlist__enable_non_dummy(struct evlist *evlist)
+{
+ __evlist__enable(evlist, NULL, true);
}
void evlist__enable_evsel(struct evlist *evlist, char *evsel_name)
{
- __evlist__enable(evlist, evsel_name);
+ __evlist__enable(evlist, evsel_name, false);
}
void evlist__toggle_enable(struct evlist *evlist)
@@ -608,7 +622,8 @@ int evlist__filter_pollfd(struct evlist *evlist, short revents_and_mask)
int evlist__add_wakeup_eventfd(struct evlist *evlist, int fd)
{
return perf_evlist__add_pollfd(&evlist->core, fd, NULL, POLLIN,
- fdarray_flag__nonfilterable);
+ fdarray_flag__nonfilterable |
+ fdarray_flag__non_perf_event);
}
#endif
@@ -1897,7 +1912,8 @@ int evlist__initialize_ctlfd(struct evlist *evlist, int fd, int ack)
}
evlist->ctl_fd.pos = perf_evlist__add_pollfd(&evlist->core, fd, NULL, POLLIN,
- fdarray_flag__nonfilterable);
+ fdarray_flag__nonfilterable |
+ fdarray_flag__non_perf_event);
if (evlist->ctl_fd.pos < 0) {
evlist->ctl_fd.pos = -1;
pr_err("Failed to add ctl fd entry: %m\n");
@@ -2147,20 +2163,234 @@ int evlist__ctlfd_process(struct evlist *evlist, enum evlist_ctl_cmd *cmd)
return err;
}
-int evlist__ctlfd_update(struct evlist *evlist, struct pollfd *update)
+/**
+ * struct event_enable_time - perf record -D/--delay single time range.
+ * @start: start of time range to enable events in milliseconds
+ * @end: end of time range to enable events in milliseconds
+ *
+ * N.B. this structure is also accessed as an array of int.
+ */
+struct event_enable_time {
+ int start;
+ int end;
+};
+
+static int parse_event_enable_time(const char *str, struct event_enable_time *range, bool first)
{
- int ctlfd_pos = evlist->ctl_fd.pos;
- struct pollfd *entries = evlist->core.pollfd.entries;
+ const char *fmt = first ? "%u - %u %n" : " , %u - %u %n";
+ int ret, start, end, n;
- if (!evlist__ctlfd_initialized(evlist))
+ ret = sscanf(str, fmt, &start, &end, &n);
+ if (ret != 2 || end <= start)
+ return -EINVAL;
+ if (range) {
+ range->start = start;
+ range->end = end;
+ }
+ return n;
+}
+
+static ssize_t parse_event_enable_times(const char *str, struct event_enable_time *range)
+{
+ int incr = !!range;
+ bool first = true;
+ ssize_t ret, cnt;
+
+ for (cnt = 0; *str; cnt++) {
+ ret = parse_event_enable_time(str, range, first);
+ if (ret < 0)
+ return ret;
+ /* Check no overlap */
+ if (!first && range && range->start <= range[-1].end)
+ return -EINVAL;
+ str += ret;
+ range += incr;
+ first = false;
+ }
+ return cnt;
+}
+
+/**
+ * struct event_enable_timer - control structure for perf record -D/--delay.
+ * @evlist: event list
+ * @times: time ranges that events are enabled (N.B. this is also accessed as an
+ * array of int)
+ * @times_cnt: number of time ranges
+ * @timerfd: timer file descriptor
+ * @pollfd_pos: position in @evlist array of file descriptors to poll (fdarray)
+ * @times_step: current position in (int *)@times)[],
+ * refer event_enable_timer__process()
+ *
+ * Note, this structure is only used when there are time ranges, not when there
+ * is only an initial delay.
+ */
+struct event_enable_timer {
+ struct evlist *evlist;
+ struct event_enable_time *times;
+ size_t times_cnt;
+ int timerfd;
+ int pollfd_pos;
+ size_t times_step;
+};
+
+static int str_to_delay(const char *str)
+{
+ char *endptr;
+ long d;
+
+ d = strtol(str, &endptr, 10);
+ if (*endptr || d > INT_MAX || d < -1)
return 0;
+ return d;
+}
- if (entries[ctlfd_pos].fd != update->fd ||
- entries[ctlfd_pos].events != update->events)
- return -1;
+int evlist__parse_event_enable_time(struct evlist *evlist, struct record_opts *opts,
+ const char *str, int unset)
+{
+ enum fdarray_flags flags = fdarray_flag__nonfilterable | fdarray_flag__non_perf_event;
+ struct event_enable_timer *eet;
+ ssize_t times_cnt;
+ ssize_t ret;
+ int err;
+
+ if (unset)
+ return 0;
+
+ opts->initial_delay = str_to_delay(str);
+ if (opts->initial_delay)
+ return 0;
+
+ ret = parse_event_enable_times(str, NULL);
+ if (ret < 0)
+ return ret;
+
+ times_cnt = ret;
+ if (times_cnt == 0)
+ return -EINVAL;
+
+ eet = zalloc(sizeof(*eet));
+ if (!eet)
+ return -ENOMEM;
+
+ eet->times = calloc(times_cnt, sizeof(*eet->times));
+ if (!eet->times) {
+ err = -ENOMEM;
+ goto free_eet;
+ }
+
+ if (parse_event_enable_times(str, eet->times) != times_cnt) {
+ err = -EINVAL;
+ goto free_eet_times;
+ }
+
+ eet->times_cnt = times_cnt;
+
+ eet->timerfd = timerfd_create(CLOCK_MONOTONIC, TFD_CLOEXEC);
+ if (eet->timerfd == -1) {
+ err = -errno;
+ pr_err("timerfd_create failed: %s\n", strerror(errno));
+ goto free_eet_times;
+ }
+
+ eet->pollfd_pos = perf_evlist__add_pollfd(&evlist->core, eet->timerfd, NULL, POLLIN, flags);
+ if (eet->pollfd_pos < 0) {
+ err = eet->pollfd_pos;
+ goto close_timerfd;
+ }
+
+ eet->evlist = evlist;
+ evlist->eet = eet;
+ opts->initial_delay = eet->times[0].start;
- entries[ctlfd_pos].revents = update->revents;
return 0;
+
+close_timerfd:
+ close(eet->timerfd);
+free_eet_times:
+ free(eet->times);
+free_eet:
+ free(eet);
+ return err;
+}
+
+static int event_enable_timer__set_timer(struct event_enable_timer *eet, int ms)
+{
+ struct itimerspec its = {
+ .it_value.tv_sec = ms / MSEC_PER_SEC,
+ .it_value.tv_nsec = (ms % MSEC_PER_SEC) * NSEC_PER_MSEC,
+ };
+ int err = 0;
+
+ if (timerfd_settime(eet->timerfd, 0, &its, NULL) < 0) {
+ err = -errno;
+ pr_err("timerfd_settime failed: %s\n", strerror(errno));
+ }
+ return err;
+}
+
+int event_enable_timer__start(struct event_enable_timer *eet)
+{
+ int ms;
+
+ if (!eet)
+ return 0;
+
+ ms = eet->times[0].end - eet->times[0].start;
+ eet->times_step = 1;
+
+ return event_enable_timer__set_timer(eet, ms);
+}
+
+int event_enable_timer__process(struct event_enable_timer *eet)
+{
+ struct pollfd *entries;
+ short revents;
+
+ if (!eet)
+ return 0;
+
+ entries = eet->evlist->core.pollfd.entries;
+ revents = entries[eet->pollfd_pos].revents;
+ entries[eet->pollfd_pos].revents = 0;
+
+ if (revents & POLLIN) {
+ size_t step = eet->times_step;
+ size_t pos = step / 2;
+
+ if (step & 1) {
+ evlist__disable_non_dummy(eet->evlist);
+ pr_info(EVLIST_DISABLED_MSG);
+ if (pos >= eet->times_cnt - 1) {
+ /* Disarm timer */
+ event_enable_timer__set_timer(eet, 0);
+ return 1; /* Stop */
+ }
+ } else {
+ evlist__enable_non_dummy(eet->evlist);
+ pr_info(EVLIST_ENABLED_MSG);
+ }
+
+ step += 1;
+ pos = step / 2;
+
+ if (pos < eet->times_cnt) {
+ int *times = (int *)eet->times; /* Accessing 'times' as array of int */
+ int ms = times[step] - times[step - 1];
+
+ eet->times_step = step;
+ return event_enable_timer__set_timer(eet, ms);
+ }
+ }
+
+ return 0;
+}
+
+void event_enable_timer__exit(struct event_enable_timer **ep)
+{
+ if (!ep || !*ep)
+ return;
+ free((*ep)->times);
+ zfree(ep);
}
struct evsel *evlist__find_evsel(struct evlist *evlist, int idx)
diff --git a/tools/perf/util/evlist.h b/tools/perf/util/evlist.h
index 351ba2887a79..16734c6756b3 100644
--- a/tools/perf/util/evlist.h
+++ b/tools/perf/util/evlist.h
@@ -48,6 +48,8 @@ enum bkw_mmap_state {
BKW_MMAP_EMPTY,
};
+struct event_enable_timer;
+
struct evlist {
struct perf_evlist core;
bool enabled;
@@ -79,6 +81,7 @@ struct evlist {
int ack; /* ack file descriptor for control commands */
int pos; /* index at evlist core object to check signals */
} ctl_fd;
+ struct event_enable_timer *eet;
};
struct evsel_str_handler {
@@ -124,6 +127,7 @@ static inline struct evsel *evlist__add_dummy_on_all_cpus(struct evlist *evlist)
{
return evlist__add_aux_dummy(evlist, true);
}
+struct evsel *evlist__add_sched_switch(struct evlist *evlist, bool system_wide);
int evlist__add_sb_event(struct evlist *evlist, struct perf_event_attr *attr,
evsel__sb_cb_t cb, void *data);
@@ -205,6 +209,8 @@ void evlist__enable(struct evlist *evlist);
void evlist__toggle_enable(struct evlist *evlist);
void evlist__disable_evsel(struct evlist *evlist, char *evsel_name);
void evlist__enable_evsel(struct evlist *evlist, char *evsel_name);
+void evlist__disable_non_dummy(struct evlist *evlist);
+void evlist__enable_non_dummy(struct evlist *evlist);
void evlist__set_selected(struct evlist *evlist, struct evsel *evsel);
@@ -418,13 +424,18 @@ void evlist__close_control(int ctl_fd, int ctl_fd_ack, bool *ctl_fd_close);
int evlist__initialize_ctlfd(struct evlist *evlist, int ctl_fd, int ctl_fd_ack);
int evlist__finalize_ctlfd(struct evlist *evlist);
bool evlist__ctlfd_initialized(struct evlist *evlist);
-int evlist__ctlfd_update(struct evlist *evlist, struct pollfd *update);
int evlist__ctlfd_process(struct evlist *evlist, enum evlist_ctl_cmd *cmd);
int evlist__ctlfd_ack(struct evlist *evlist);
#define EVLIST_ENABLED_MSG "Events enabled\n"
#define EVLIST_DISABLED_MSG "Events disabled\n"
+int evlist__parse_event_enable_time(struct evlist *evlist, struct record_opts *opts,
+ const char *str, int unset);
+int event_enable_timer__start(struct event_enable_timer *eet);
+void event_enable_timer__exit(struct event_enable_timer **ep);
+int event_enable_timer__process(struct event_enable_timer *eet);
+
struct evsel *evlist__find_evsel(struct evlist *evlist, int idx);
int evlist__scnprintf_evsels(struct evlist *evlist, size_t size, char *bf);
diff --git a/tools/perf/util/evsel.c b/tools/perf/util/evsel.c
index 18c3eb864d55..76605fde3507 100644
--- a/tools/perf/util/evsel.c
+++ b/tools/perf/util/evsel.c
@@ -46,7 +46,11 @@
#include "string2.h"
#include "memswap.h"
#include "util.h"
-#include "hashmap.h"
+#ifdef HAVE_LIBBPF_SUPPORT
+#include <bpf/hashmap.h>
+#else
+#include "util/hashmap.h"
+#endif
#include "pmu-hybrid.h"
#include "off_cpu.h"
#include "../perf-sys.h"
@@ -1157,6 +1161,7 @@ void evsel__config(struct evsel *evsel, struct record_opts *opts,
attr->sample_id_all = perf_missing_features.sample_id_all ? 0 : 1;
attr->inherit = !opts->no_inherit;
attr->write_backward = opts->overwrite ? 1 : 0;
+ attr->read_format = PERF_FORMAT_LOST;
evsel__set_sample_bit(evsel, IP);
evsel__set_sample_bit(evsel, TID);
@@ -1808,7 +1813,7 @@ static struct perf_thread_map *empty_thread_map;
static int __evsel__prepare_open(struct evsel *evsel, struct perf_cpu_map *cpus,
struct perf_thread_map *threads)
{
- int nthreads;
+ int nthreads = perf_thread_map__nr(threads);
if ((perf_missing_features.write_backward && evsel->core.attr.write_backward) ||
(perf_missing_features.aux_output && evsel->core.attr.aux_output))
@@ -1834,11 +1839,6 @@ static int __evsel__prepare_open(struct evsel *evsel, struct perf_cpu_map *cpus,
threads = empty_thread_map;
}
- if (evsel->core.system_wide)
- nthreads = 1;
- else
- nthreads = threads->nr;
-
if (evsel->core.fd == NULL &&
perf_evsel__alloc_fd(&evsel->core, perf_cpu_map__nr(cpus), nthreads) < 0)
return -ENOMEM;
@@ -1852,6 +1852,8 @@ static int __evsel__prepare_open(struct evsel *evsel, struct perf_cpu_map *cpus,
static void evsel__disable_missing_features(struct evsel *evsel)
{
+ if (perf_missing_features.read_lost)
+ evsel->core.attr.read_format &= ~PERF_FORMAT_LOST;
if (perf_missing_features.weight_struct) {
evsel__set_sample_bit(evsel, WEIGHT);
evsel__reset_sample_bit(evsel, WEIGHT_STRUCT);
@@ -1903,7 +1905,12 @@ bool evsel__detect_missing_features(struct evsel *evsel)
* Must probe features in the order they were added to the
* perf_event_attr interface.
*/
- if (!perf_missing_features.weight_struct &&
+ if (!perf_missing_features.read_lost &&
+ (evsel->core.attr.read_format & PERF_FORMAT_LOST)) {
+ perf_missing_features.read_lost = true;
+ pr_debug2("switching off PERF_FORMAT_LOST support\n");
+ return true;
+ } else if (!perf_missing_features.weight_struct &&
(evsel->core.attr.sample_type & PERF_SAMPLE_WEIGHT_STRUCT)) {
perf_missing_features.weight_struct = true;
pr_debug2("switching off weight struct support\n");
@@ -2049,10 +2056,7 @@ static int evsel__open_cpu(struct evsel *evsel, struct perf_cpu_map *cpus,
if (threads == NULL)
threads = empty_thread_map;
- if (evsel->core.system_wide)
- nthreads = 1;
- else
- nthreads = threads->nr;
+ nthreads = perf_thread_map__nr(threads);
if (evsel->cgrp)
pid = evsel->cgrp->fd;
@@ -2077,6 +2081,7 @@ retry_open:
test_attr__ready();
+ /* Debug message used by test scripts */
pr_debug2_peo("sys_perf_event_open: pid %d cpu %d group_fd %d flags %#lx",
pid, perf_cpu_map__cpu(cpus, idx).cpu, group_fd, evsel->open_flags);
@@ -2102,6 +2107,7 @@ retry_open:
fd, group_fd, evsel->open_flags);
}
+ /* Debug message used by test scripts */
pr_debug2_peo(" = %d\n", fd);
if (evsel->bpf_fd >= 0) {
diff --git a/tools/perf/util/evsel.h b/tools/perf/util/evsel.h
index d927713b513e..989865e16aad 100644
--- a/tools/perf/util/evsel.h
+++ b/tools/perf/util/evsel.h
@@ -188,6 +188,7 @@ struct perf_missing_features {
bool data_page_size;
bool code_page_size;
bool weight_struct;
+ bool read_lost;
};
extern struct perf_missing_features perf_missing_features;
diff --git a/tools/perf/util/expr.c b/tools/perf/util/expr.c
index c15a9852fa41..aaacf514dc09 100644
--- a/tools/perf/util/expr.c
+++ b/tools/perf/util/expr.c
@@ -182,7 +182,7 @@ int expr__add_ref(struct expr_parse_ctx *ctx, struct metric_ref *ref)
{
struct expr_id_data *data_ptr = NULL, *old_data = NULL;
char *old_key = NULL;
- char *name, *p;
+ char *name;
int ret;
data_ptr = zalloc(sizeof(*data_ptr));
@@ -196,15 +196,6 @@ int expr__add_ref(struct expr_parse_ctx *ctx, struct metric_ref *ref)
}
/*
- * The jevents tool converts all metric expressions
- * to lowercase, including metric references, hence
- * we need to add lowercase name for metric, so it's
- * properly found.
- */
- for (p = name; *p; p++)
- *p = tolower(*p);
-
- /*
* Intentionally passing just const char pointers,
* originally from 'struct pmu_event' object.
* We don't need to change them, so there's no
@@ -310,7 +301,9 @@ struct expr_parse_ctx *expr__ctx_new(void)
free(ctx);
return NULL;
}
- ctx->runtime = 0;
+ ctx->sctx.user_requested_cpu_list = NULL;
+ ctx->sctx.runtime = 0;
+ ctx->sctx.system_wide = false;
return ctx;
}
@@ -332,6 +325,10 @@ void expr__ctx_free(struct expr_parse_ctx *ctx)
struct hashmap_entry *cur;
size_t bkt;
+ if (!ctx)
+ return;
+
+ free(ctx->sctx.user_requested_cpu_list);
hashmap__for_each_entry(ctx->ids, cur, bkt) {
free((char *)cur->key);
free(cur->value);
@@ -344,16 +341,13 @@ static int
__expr__parse(double *val, struct expr_parse_ctx *ctx, const char *expr,
bool compute_ids)
{
- struct expr_scanner_ctx scanner_ctx = {
- .runtime = ctx->runtime,
- };
YY_BUFFER_STATE buffer;
void *scanner;
int ret;
pr_debug2("parsing metric: %s\n", expr);
- ret = expr_lex_init_extra(&scanner_ctx, &scanner);
+ ret = expr_lex_init_extra(&ctx->sctx, &scanner);
if (ret)
return ret;
@@ -410,16 +404,11 @@ double arch_get_tsc_freq(void)
}
#endif
-double expr__get_literal(const char *literal)
+double expr__get_literal(const char *literal, const struct expr_scanner_ctx *ctx)
{
static struct cpu_topology *topology;
double result = NAN;
- if (!strcasecmp("#smt_on", literal)) {
- result = smt_on() > 0 ? 1.0 : 0.0;
- goto out;
- }
-
if (!strcmp("#num_cpus", literal)) {
result = cpu__max_present_cpu().cpu;
goto out;
@@ -443,6 +432,15 @@ double expr__get_literal(const char *literal)
goto out;
}
}
+ if (!strcasecmp("#smt_on", literal)) {
+ result = smt_on(topology) ? 1.0 : 0.0;
+ goto out;
+ }
+ if (!strcmp("#core_wide", literal)) {
+ result = core_wide(ctx->system_wide, ctx->user_requested_cpu_list, topology)
+ ? 1.0 : 0.0;
+ goto out;
+ }
if (!strcmp("#num_packages", literal)) {
result = topology->package_cpus_lists;
goto out;
diff --git a/tools/perf/util/expr.h b/tools/perf/util/expr.h
index bd2116983bbb..d6c1668dc1a0 100644
--- a/tools/perf/util/expr.h
+++ b/tools/perf/util/expr.h
@@ -2,28 +2,27 @@
#ifndef PARSE_CTX_H
#define PARSE_CTX_H 1
-// There are fixes that need to land upstream before we can use libbpf's headers,
-// for now use our copy unconditionally, since the data structures at this point
-// are exactly the same, no problem.
-//#ifdef HAVE_LIBBPF_SUPPORT
-//#include <bpf/hashmap.h>
-//#else
+#ifdef HAVE_LIBBPF_SUPPORT
+#include <bpf/hashmap.h>
+#else
#include "util/hashmap.h"
-//#endif
+#endif
struct metric_ref;
+struct expr_scanner_ctx {
+ char *user_requested_cpu_list;
+ int runtime;
+ bool system_wide;
+};
+
struct expr_parse_ctx {
struct hashmap *ids;
- int runtime;
+ struct expr_scanner_ctx sctx;
};
struct expr_id_data;
-struct expr_scanner_ctx {
- int runtime;
-};
-
struct hashmap *ids__new(void);
void ids__free(struct hashmap *ids);
int ids__insert(struct hashmap *ids, const char *id);
@@ -58,6 +57,6 @@ int expr__find_ids(const char *expr, const char *one,
double expr_id_data__value(const struct expr_id_data *data);
double expr_id_data__source_count(const struct expr_id_data *data);
-double expr__get_literal(const char *literal);
+double expr__get_literal(const char *literal, const struct expr_scanner_ctx *ctx);
#endif
diff --git a/tools/perf/util/expr.l b/tools/perf/util/expr.l
index 4dc8edbfd9ce..0168a9637330 100644
--- a/tools/perf/util/expr.l
+++ b/tools/perf/util/expr.l
@@ -79,11 +79,11 @@ static int str(yyscan_t scanner, int token, int runtime)
return token;
}
-static int literal(yyscan_t scanner)
+static int literal(yyscan_t scanner, const struct expr_scanner_ctx *sctx)
{
YYSTYPE *yylval = expr_get_lval(scanner);
- yylval->num = expr__get_literal(expr_get_text(scanner));
+ yylval->num = expr__get_literal(expr_get_text(scanner), sctx);
if (isnan(yylval->num))
return EXPR_ERROR;
@@ -108,7 +108,7 @@ min { return MIN; }
if { return IF; }
else { return ELSE; }
source_count { return SOURCE_COUNT; }
-{literal} { return literal(yyscanner); }
+{literal} { return literal(yyscanner, sctx); }
{number} { return value(yyscanner); }
{symbol} { return str(yyscanner, ID, sctx->runtime); }
"|" { return '|'; }
diff --git a/tools/perf/util/expr.y b/tools/perf/util/expr.y
index a30b825adb7b..635e562350c5 100644
--- a/tools/perf/util/expr.y
+++ b/tools/perf/util/expr.y
@@ -156,7 +156,7 @@ start: if_expr
}
;
-if_expr: expr IF expr ELSE expr
+if_expr: expr IF expr ELSE if_expr
{
if (fpclassify($3.val) == FP_ZERO) {
/*
diff --git a/tools/perf/util/genelf.c b/tools/perf/util/genelf.c
index d81b54563e96..fefc72066c4e 100644
--- a/tools/perf/util/genelf.c
+++ b/tools/perf/util/genelf.c
@@ -345,6 +345,7 @@ jit_write_elf(int fd, uint64_t load_addr, const char *sym,
eh_frame_base_offset);
if (retval)
goto error;
+ retval = -1;
}
/*
diff --git a/tools/perf/util/header.c b/tools/perf/util/header.c
index c30c29c51410..98dfaf84bd13 100644
--- a/tools/perf/util/header.c
+++ b/tools/perf/util/header.c
@@ -4295,8 +4295,6 @@ out:
size_t perf_event__fprintf_event_update(union perf_event *event, FILE *fp)
{
struct perf_record_event_update *ev = &event->event_update;
- struct perf_record_event_update_scale *ev_scale;
- struct perf_record_event_update_cpus *ev_cpus;
struct perf_cpu_map *map;
size_t ret;
@@ -4304,20 +4302,18 @@ size_t perf_event__fprintf_event_update(union perf_event *event, FILE *fp)
switch (ev->type) {
case PERF_EVENT_UPDATE__SCALE:
- ev_scale = (struct perf_record_event_update_scale *)ev->data;
- ret += fprintf(fp, "... scale: %f\n", ev_scale->scale);
+ ret += fprintf(fp, "... scale: %f\n", ev->scale.scale);
break;
case PERF_EVENT_UPDATE__UNIT:
- ret += fprintf(fp, "... unit: %s\n", ev->data);
+ ret += fprintf(fp, "... unit: %s\n", ev->unit);
break;
case PERF_EVENT_UPDATE__NAME:
- ret += fprintf(fp, "... name: %s\n", ev->data);
+ ret += fprintf(fp, "... name: %s\n", ev->name);
break;
case PERF_EVENT_UPDATE__CPUS:
- ev_cpus = (struct perf_record_event_update_cpus *)ev->data;
ret += fprintf(fp, "... ");
- map = cpu_map__new_data(&ev_cpus->cpus);
+ map = cpu_map__new_data(&ev->cpus.cpus);
if (map)
ret += cpu_map__fprintf(map, fp);
else
@@ -4374,8 +4370,6 @@ int perf_event__process_event_update(struct perf_tool *tool __maybe_unused,
struct evlist **pevlist)
{
struct perf_record_event_update *ev = &event->event_update;
- struct perf_record_event_update_scale *ev_scale;
- struct perf_record_event_update_cpus *ev_cpus;
struct evlist *evlist;
struct evsel *evsel;
struct perf_cpu_map *map;
@@ -4395,19 +4389,17 @@ int perf_event__process_event_update(struct perf_tool *tool __maybe_unused,
switch (ev->type) {
case PERF_EVENT_UPDATE__UNIT:
free((char *)evsel->unit);
- evsel->unit = strdup(ev->data);
+ evsel->unit = strdup(ev->unit);
break;
case PERF_EVENT_UPDATE__NAME:
free(evsel->name);
- evsel->name = strdup(ev->data);
+ evsel->name = strdup(ev->name);
break;
case PERF_EVENT_UPDATE__SCALE:
- ev_scale = (struct perf_record_event_update_scale *)ev->data;
- evsel->scale = ev_scale->scale;
+ evsel->scale = ev->scale.scale;
break;
case PERF_EVENT_UPDATE__CPUS:
- ev_cpus = (struct perf_record_event_update_cpus *)ev->data;
- map = cpu_map__new_data(&ev_cpus->cpus);
+ map = cpu_map__new_data(&ev->cpus.cpus);
if (map) {
perf_cpu_map__put(evsel->core.own_cpus);
evsel->core.own_cpus = map;
diff --git a/tools/perf/util/hist.c b/tools/perf/util/hist.c
index 1c085ab56534..17a05e943b44 100644
--- a/tools/perf/util/hist.c
+++ b/tools/perf/util/hist.c
@@ -215,6 +215,7 @@ void hists__calc_col_len(struct hists *hists, struct hist_entry *h)
hists__new_col_len(hists, HISTC_GLOBAL_INS_LAT, 13);
hists__new_col_len(hists, HISTC_LOCAL_P_STAGE_CYC, 13);
hists__new_col_len(hists, HISTC_GLOBAL_P_STAGE_CYC, 13);
+ hists__new_col_len(hists, HISTC_ADDR, BITS_PER_LONG / 4 + 2);
if (symbol_conf.nanosecs)
hists__new_col_len(hists, HISTC_TIME, 16);
@@ -1622,13 +1623,13 @@ struct rb_root_cached *hists__get_rotate_entries_in(struct hists *hists)
{
struct rb_root_cached *root;
- pthread_mutex_lock(&hists->lock);
+ mutex_lock(&hists->lock);
root = hists->entries_in;
if (++hists->entries_in > &hists->entries_in_array[1])
hists->entries_in = &hists->entries_in_array[0];
- pthread_mutex_unlock(&hists->lock);
+ mutex_unlock(&hists->lock);
return root;
}
@@ -2335,6 +2336,11 @@ void hists__inc_nr_samples(struct hists *hists, bool filtered)
hists->stats.nr_non_filtered_samples++;
}
+void hists__inc_nr_lost_samples(struct hists *hists, u32 lost)
+{
+ hists->stats.nr_lost_samples += lost;
+}
+
static struct hist_entry *hists__add_dummy_entry(struct hists *hists,
struct hist_entry *pair)
{
@@ -2678,12 +2684,16 @@ size_t evlist__fprintf_nr_events(struct evlist *evlist, FILE *fp,
evlist__for_each_entry(evlist, pos) {
struct hists *hists = evsel__hists(pos);
- if (skip_empty && !hists->stats.nr_samples)
+ if (skip_empty && !hists->stats.nr_samples && !hists->stats.nr_lost_samples)
continue;
ret += fprintf(fp, "%s stats:\n", evsel__name(pos));
- ret += fprintf(fp, "%16s events: %10d\n",
- "SAMPLE", hists->stats.nr_samples);
+ if (hists->stats.nr_samples)
+ ret += fprintf(fp, "%16s events: %10d\n",
+ "SAMPLE", hists->stats.nr_samples);
+ if (hists->stats.nr_lost_samples)
+ ret += fprintf(fp, "%16s events: %10d\n",
+ "LOST_SAMPLES", hists->stats.nr_lost_samples);
}
return ret;
@@ -2805,7 +2815,7 @@ int __hists__init(struct hists *hists, struct perf_hpp_list *hpp_list)
hists->entries_in = &hists->entries_in_array[0];
hists->entries_collapsed = RB_ROOT_CACHED;
hists->entries = RB_ROOT_CACHED;
- pthread_mutex_init(&hists->lock, NULL);
+ mutex_init(&hists->lock);
hists->socket_filter = -1;
hists->hpp_list = hpp_list;
INIT_LIST_HEAD(&hists->hpp_formats);
diff --git a/tools/perf/util/hist.h b/tools/perf/util/hist.h
index 7ed4648d2fc2..ebd8a8f783ee 100644
--- a/tools/perf/util/hist.h
+++ b/tools/perf/util/hist.h
@@ -4,10 +4,10 @@
#include <linux/rbtree.h>
#include <linux/types.h>
-#include <pthread.h>
#include "evsel.h"
#include "color.h"
#include "events_stats.h"
+#include "mutex.h"
struct hist_entry;
struct hist_entry_ops;
@@ -79,6 +79,7 @@ enum hist_column {
HISTC_GLOBAL_P_STAGE_CYC,
HISTC_ADDR_FROM,
HISTC_ADDR_TO,
+ HISTC_ADDR,
HISTC_NR_COLS, /* Last entry */
};
@@ -98,7 +99,7 @@ struct hists {
const struct dso *dso_filter;
const char *uid_filter_str;
const char *symbol_filter_str;
- pthread_mutex_t lock;
+ struct mutex lock;
struct hists_stats stats;
u64 event_stream;
u16 col_len[HISTC_NR_COLS];
@@ -201,6 +202,7 @@ void hists__reset_stats(struct hists *hists);
void hists__inc_stats(struct hists *hists, struct hist_entry *h);
void hists__inc_nr_events(struct hists *hists);
void hists__inc_nr_samples(struct hists *hists, bool filtered);
+void hists__inc_nr_lost_samples(struct hists *hists, u32 lost);
size_t hists__fprintf(struct hists *hists, bool show_header, int max_rows,
int max_cols, float min_pcnt, FILE *fp,
diff --git a/tools/perf/util/intel-pt-decoder/intel-pt-log.c b/tools/perf/util/intel-pt-decoder/intel-pt-log.c
index 5f5dfc8753f3..ef55d6232cf0 100644
--- a/tools/perf/util/intel-pt-decoder/intel-pt-log.c
+++ b/tools/perf/util/intel-pt-decoder/intel-pt-log.c
@@ -5,12 +5,16 @@
*/
#include <stdio.h>
+#include <stdlib.h>
#include <stdint.h>
#include <inttypes.h>
#include <stdarg.h>
#include <stdbool.h>
#include <string.h>
+#include <linux/zalloc.h>
+#include <linux/kernel.h>
+
#include "intel-pt-log.h"
#include "intel-pt-insn-decoder.h"
@@ -18,18 +22,33 @@
#define MAX_LOG_NAME 256
+#define DFLT_BUF_SZ (16 * 1024)
+
+struct log_buf {
+ char *buf;
+ size_t buf_sz;
+ size_t head;
+ bool wrapped;
+ FILE *backend;
+};
+
static FILE *f;
static char log_name[MAX_LOG_NAME];
bool intel_pt_enable_logging;
+static bool intel_pt_dump_log_on_error;
+static unsigned int intel_pt_log_on_error_size;
+static struct log_buf log_buf;
void *intel_pt_log_fp(void)
{
return f;
}
-void intel_pt_log_enable(void)
+void intel_pt_log_enable(bool dump_log_on_error, unsigned int log_on_error_size)
{
intel_pt_enable_logging = true;
+ intel_pt_dump_log_on_error = dump_log_on_error;
+ intel_pt_log_on_error_size = log_on_error_size;
}
void intel_pt_log_disable(void)
@@ -74,6 +93,100 @@ static void intel_pt_print_no_data(uint64_t pos, int indent)
fprintf(f, " ");
}
+static ssize_t log_buf__write(void *cookie, const char *buf, size_t size)
+{
+ struct log_buf *b = cookie;
+ size_t sz = size;
+
+ if (!b->buf)
+ return size;
+
+ while (sz) {
+ size_t space = b->buf_sz - b->head;
+ size_t n = min(space, sz);
+
+ memcpy(b->buf + b->head, buf, n);
+ sz -= n;
+ buf += n;
+ b->head += n;
+ if (sz && b->head >= b->buf_sz) {
+ b->head = 0;
+ b->wrapped = true;
+ }
+ }
+ return size;
+}
+
+static int log_buf__close(void *cookie)
+{
+ struct log_buf *b = cookie;
+
+ zfree(&b->buf);
+ return 0;
+}
+
+static FILE *log_buf__open(struct log_buf *b, FILE *backend, unsigned int sz)
+{
+ cookie_io_functions_t fns = {
+ .write = log_buf__write,
+ .close = log_buf__close,
+ };
+ FILE *file;
+
+ memset(b, 0, sizeof(*b));
+ b->buf_sz = sz;
+ b->buf = malloc(b->buf_sz);
+ b->backend = backend;
+ file = fopencookie(b, "a", fns);
+ if (!file)
+ zfree(&b->buf);
+ return file;
+}
+
+static bool remove_first_line(const char **p, size_t *n)
+{
+ for (; *n && **p != '\n'; ++*p, --*n)
+ ;
+ if (*n) {
+ *p += 1;
+ *n -= 1;
+ return true;
+ }
+ return false;
+}
+
+static void write_lines(const char *p, size_t n, FILE *fp, bool *remove_first)
+{
+ if (*remove_first)
+ *remove_first = !remove_first_line(&p, &n);
+ fwrite(p, n, 1, fp);
+}
+
+static void log_buf__dump(struct log_buf *b)
+{
+ bool remove_first = false;
+
+ if (!b->buf)
+ return;
+
+ fflush(f); /* Could update b->head and b->wrapped */
+ fprintf(b->backend, "Dumping debug log buffer\n");
+ if (b->wrapped) {
+ remove_first = true;
+ write_lines(b->buf + b->head, b->buf_sz - b->head, b->backend, &remove_first);
+ }
+ write_lines(b->buf, b->head, b->backend, &remove_first);
+ fprintf(b->backend, "End of debug log buffer dump\n");
+
+ b->head = 0;
+ b->wrapped = false;
+}
+
+void intel_pt_log_dump_buf(void)
+{
+ log_buf__dump(&log_buf);
+}
+
static int intel_pt_log_open(void)
{
if (!intel_pt_enable_logging)
@@ -86,6 +199,8 @@ static int intel_pt_log_open(void)
f = fopen(log_name, "w+");
else
f = stdout;
+ if (f && intel_pt_dump_log_on_error)
+ f = log_buf__open(&log_buf, f, intel_pt_log_on_error_size);
if (!f) {
intel_pt_enable_logging = false;
return -1;
diff --git a/tools/perf/util/intel-pt-decoder/intel-pt-log.h b/tools/perf/util/intel-pt-decoder/intel-pt-log.h
index d900aab24b21..354d7d23fc81 100644
--- a/tools/perf/util/intel-pt-decoder/intel-pt-log.h
+++ b/tools/perf/util/intel-pt-decoder/intel-pt-log.h
@@ -14,9 +14,10 @@
struct intel_pt_pkt;
void *intel_pt_log_fp(void);
-void intel_pt_log_enable(void);
+void intel_pt_log_enable(bool dump_log_on_error, unsigned int log_on_error_size);
void intel_pt_log_disable(void);
void intel_pt_log_set_name(const char *name);
+void intel_pt_log_dump_buf(void);
void __intel_pt_log_packet(const struct intel_pt_pkt *packet, int pkt_len,
uint64_t pos, const unsigned char *buf);
diff --git a/tools/perf/util/intel-pt.c b/tools/perf/util/intel-pt.c
index d5e9fc8106dd..b34cb3dec1aa 100644
--- a/tools/perf/util/intel-pt.c
+++ b/tools/perf/util/intel-pt.c
@@ -842,7 +842,8 @@ static int intel_pt_walk_next_insn(struct intel_pt_insn *intel_pt_insn,
offset, buf,
INTEL_PT_INSN_BUF_SZ);
if (len <= 0) {
- intel_pt_log("ERROR: failed to read at %" PRIu64 " ", offset);
+ intel_pt_log("ERROR: failed to read at offset %#" PRIx64 " ",
+ offset);
if (intel_pt_enable_logging)
dso__fprintf(al.map->dso, intel_pt_log_fp());
return -EINVAL;
@@ -2418,6 +2419,8 @@ static int intel_pt_synth_error(struct intel_pt *pt, int code, int cpu,
pid_t pid, pid_t tid, u64 ip, u64 timestamp,
pid_t machine_pid, int vcpu)
{
+ bool dump_log_on_error = pt->synth_opts.log_plus_flags & AUXTRACE_LOG_FLG_ON_ERROR;
+ bool log_on_stdout = pt->synth_opts.log_plus_flags & AUXTRACE_LOG_FLG_USE_STDOUT;
union perf_event event;
char msg[MAX_AUXTRACE_ERROR_MSG];
int err;
@@ -2437,6 +2440,16 @@ static int intel_pt_synth_error(struct intel_pt *pt, int code, int cpu,
code, cpu, pid, tid, ip, msg, timestamp,
machine_pid, vcpu);
+ if (intel_pt_enable_logging && !log_on_stdout) {
+ FILE *fp = intel_pt_log_fp();
+
+ if (fp)
+ perf_event__fprintf_auxtrace_error(&event, fp);
+ }
+
+ if (code != INTEL_PT_ERR_LOST && dump_log_on_error)
+ intel_pt_log_dump_buf();
+
err = perf_session__deliver_synth_event(pt->session, &event, NULL);
if (err)
pr_err("Intel Processor Trace: failed to deliver error event, error %d\n",
@@ -4271,8 +4284,12 @@ int intel_pt_process_auxtrace_info(union perf_event *event,
goto err_delete_thread;
}
- if (pt->synth_opts.log)
- intel_pt_log_enable();
+ if (pt->synth_opts.log) {
+ bool log_on_error = pt->synth_opts.log_plus_flags & AUXTRACE_LOG_FLG_ON_ERROR;
+ unsigned int log_on_error_size = pt->synth_opts.log_on_error_size;
+
+ intel_pt_log_enable(log_on_error, log_on_error_size);
+ }
/* Maximum non-turbo ratio is TSC freq / 100 MHz */
if (pt->tc.time_mult) {
diff --git a/tools/perf/util/jitdump.c b/tools/perf/util/jitdump.c
index 4e6632203704..0e033278fa12 100644
--- a/tools/perf/util/jitdump.c
+++ b/tools/perf/util/jitdump.c
@@ -56,13 +56,6 @@ struct jit_buf_desc {
char dir[PATH_MAX];
};
-struct debug_line_info {
- unsigned long vma;
- unsigned int lineno;
- /* The filename format is unspecified, absolute path, relative etc. */
- char const filename[];
-};
-
struct jit_tool {
struct perf_tool tool;
struct perf_data output;
diff --git a/tools/perf/util/lock-contention.h b/tools/perf/util/lock-contention.h
index 2146efc33396..b8cb8830b7bc 100644
--- a/tools/perf/util/lock-contention.h
+++ b/tools/perf/util/lock-contention.h
@@ -11,6 +11,7 @@ struct lock_stat {
u64 addr; /* address of lockdep_map, used as ID */
char *name; /* for strcpy(), we cannot use const */
+ u64 *callstack;
unsigned int nr_acquire;
unsigned int nr_acquired;
@@ -113,7 +114,9 @@ struct lock_contention {
struct machine *machine;
struct hlist_head *result;
unsigned long map_nr_entries;
- unsigned long lost;
+ int lost;
+ int max_stack;
+ int stack_skip;
};
#ifdef HAVE_BPF_SKEL
diff --git a/tools/perf/util/machine.c b/tools/perf/util/machine.c
index 2a16cae28407..76316e459c3d 100644
--- a/tools/perf/util/machine.c
+++ b/tools/perf/util/machine.c
@@ -1128,10 +1128,6 @@ static struct dso *machine__get_kernel(struct machine *machine)
return kernel;
}
-struct process_args {
- u64 start;
-};
-
void machine__get_kallsyms_filename(struct machine *machine, char *buf,
size_t bufsz)
{
diff --git a/tools/perf/util/map.c b/tools/perf/util/map.c
index e0aa4a254583..f3a3d9b3a40d 100644
--- a/tools/perf/util/map.c
+++ b/tools/perf/util/map.c
@@ -181,7 +181,10 @@ struct map *map__new(struct machine *machine, u64 start, u64 len,
if (!(prot & PROT_EXEC))
dso__set_loaded(dso);
}
+ mutex_lock(&dso->lock);
+ nsinfo__put(dso->nsinfo);
dso->nsinfo = nsi;
+ mutex_unlock(&dso->lock);
if (build_id__is_defined(bid)) {
dso__set_build_id(dso, bid);
diff --git a/tools/perf/util/mem-events.c b/tools/perf/util/mem-events.c
index 764883183519..b3a91093069a 100644
--- a/tools/perf/util/mem-events.c
+++ b/tools/perf/util/mem-events.c
@@ -156,11 +156,12 @@ void perf_mem_events__list(void)
for (j = 0; j < PERF_MEM_EVENTS__MAX; j++) {
struct perf_mem_event *e = perf_mem_events__ptr(j);
- fprintf(stderr, "%-13s%-*s%s\n",
- e->tag ?: "",
- verbose > 0 ? 25 : 0,
- verbose > 0 ? perf_mem_events__name(j, NULL) : "",
- e->supported ? ": available" : "");
+ fprintf(stderr, "%-*s%-*s%s",
+ e->tag ? 13 : 0,
+ e->tag ? : "",
+ e->tag && verbose > 0 ? 25 : 0,
+ e->tag && verbose > 0 ? perf_mem_events__name(j, NULL) : "",
+ e->supported ? ": available\n" : "");
}
}
@@ -281,7 +282,7 @@ static const char * const mem_lvl[] = {
"HIT",
"MISS",
"L1",
- "LFB",
+ "LFB/MAB",
"L2",
"L3",
"Local RAM",
@@ -294,8 +295,10 @@ static const char * const mem_lvl[] = {
};
static const char * const mem_lvlnum[] = {
+ [PERF_MEM_LVLNUM_CXL] = "CXL",
+ [PERF_MEM_LVLNUM_IO] = "I/O",
[PERF_MEM_LVLNUM_ANY_CACHE] = "Any cache",
- [PERF_MEM_LVLNUM_LFB] = "LFB",
+ [PERF_MEM_LVLNUM_LFB] = "LFB/MAB",
[PERF_MEM_LVLNUM_RAM] = "RAM",
[PERF_MEM_LVLNUM_PMEM] = "PMEM",
[PERF_MEM_LVLNUM_NA] = "N/A",
diff --git a/tools/perf/util/metricgroup.c b/tools/perf/util/metricgroup.c
index c93bcaf6d55d..4c98ac29ee13 100644
--- a/tools/perf/util/metricgroup.c
+++ b/tools/perf/util/metricgroup.c
@@ -22,6 +22,7 @@
#include <linux/list_sort.h>
#include <linux/string.h>
#include <linux/zalloc.h>
+#include <perf/cpumap.h>
#include <subcmd/parse-options.h>
#include <api/fs/fs.h>
#include "util.h"
@@ -108,17 +109,6 @@ void metricgroup__rblist_exit(struct rblist *metric_events)
rblist__exit(metric_events);
}
-/*
- * A node in the list of referenced metrics. metric_expr
- * is held as a convenience to avoid a search through the
- * metric list.
- */
-struct metric_ref_node {
- const char *metric_name;
- const char *metric_expr;
- struct list_head list;
-};
-
/**
* The metric under construction. The data held here will be placed in a
* metric_expr.
@@ -189,10 +179,24 @@ static bool metricgroup__has_constraint(const struct pmu_event *pe)
return false;
}
+static void metric__free(struct metric *m)
+{
+ if (!m)
+ return;
+
+ free(m->metric_refs);
+ expr__ctx_free(m->pctx);
+ free((char *)m->modifier);
+ evlist__delete(m->evlist);
+ free(m);
+}
+
static struct metric *metric__new(const struct pmu_event *pe,
const char *modifier,
bool metric_no_group,
- int runtime)
+ int runtime,
+ const char *user_requested_cpu_list,
+ bool system_wide)
{
struct metric *m;
@@ -201,35 +205,34 @@ static struct metric *metric__new(const struct pmu_event *pe,
return NULL;
m->pctx = expr__ctx_new();
- if (!m->pctx) {
- free(m);
- return NULL;
- }
+ if (!m->pctx)
+ goto out_err;
m->metric_name = pe->metric_name;
- m->modifier = modifier ? strdup(modifier) : NULL;
- if (modifier && !m->modifier) {
- expr__ctx_free(m->pctx);
- free(m);
- return NULL;
+ m->modifier = NULL;
+ if (modifier) {
+ m->modifier = strdup(modifier);
+ if (!m->modifier)
+ goto out_err;
}
m->metric_expr = pe->metric_expr;
m->metric_unit = pe->unit;
- m->pctx->runtime = runtime;
+ m->pctx->sctx.user_requested_cpu_list = NULL;
+ if (user_requested_cpu_list) {
+ m->pctx->sctx.user_requested_cpu_list = strdup(user_requested_cpu_list);
+ if (!m->pctx->sctx.user_requested_cpu_list)
+ goto out_err;
+ }
+ m->pctx->sctx.runtime = runtime;
+ m->pctx->sctx.system_wide = system_wide;
m->has_constraint = metric_no_group || metricgroup__has_constraint(pe);
m->metric_refs = NULL;
m->evlist = NULL;
return m;
-}
-
-static void metric__free(struct metric *m)
-{
- free(m->metric_refs);
- expr__ctx_free(m->pctx);
- free((char *)m->modifier);
- evlist__delete(m->evlist);
- free(m);
+out_err:
+ metric__free(m);
+ return NULL;
}
static bool contains_metric_id(struct evsel **metric_events, int num_events,
@@ -874,6 +877,8 @@ struct metricgroup_add_iter_data {
int *ret;
bool *has_match;
bool metric_no_group;
+ const char *user_requested_cpu_list;
+ bool system_wide;
struct metric *root_metric;
const struct visited_metric *visited;
const struct pmu_events_table *table;
@@ -887,6 +892,8 @@ static int add_metric(struct list_head *metric_list,
const struct pmu_event *pe,
const char *modifier,
bool metric_no_group,
+ const char *user_requested_cpu_list,
+ bool system_wide,
struct metric *root_metric,
const struct visited_metric *visited,
const struct pmu_events_table *table);
@@ -899,6 +906,8 @@ static int add_metric(struct list_head *metric_list,
* @metric_no_group: Should events written to events be grouped "{}" or
* global. Grouping is the default but due to multiplexing the
* user may override.
+ * @user_requested_cpu_list: Command line specified CPUs to record on.
+ * @system_wide: Are events for all processes recorded.
* @root_metric: Metrics may reference other metrics to form a tree. In this
* case the root_metric holds all the IDs and a list of referenced
* metrics. When adding a root this argument is NULL.
@@ -910,6 +919,8 @@ static int add_metric(struct list_head *metric_list,
static int resolve_metric(struct list_head *metric_list,
const char *modifier,
bool metric_no_group,
+ const char *user_requested_cpu_list,
+ bool system_wide,
struct metric *root_metric,
const struct visited_metric *visited,
const struct pmu_events_table *table)
@@ -956,7 +967,8 @@ static int resolve_metric(struct list_head *metric_list,
*/
for (i = 0; i < pending_cnt; i++) {
ret = add_metric(metric_list, &pending[i].pe, modifier, metric_no_group,
- root_metric, visited, table);
+ user_requested_cpu_list, system_wide, root_metric, visited,
+ table);
if (ret)
break;
}
@@ -974,6 +986,8 @@ static int resolve_metric(struct list_head *metric_list,
* global. Grouping is the default but due to multiplexing the
* user may override.
* @runtime: A special argument for the parser only known at runtime.
+ * @user_requested_cpu_list: Command line specified CPUs to record on.
+ * @system_wide: Are events for all processes recorded.
* @root_metric: Metrics may reference other metrics to form a tree. In this
* case the root_metric holds all the IDs and a list of referenced
* metrics. When adding a root this argument is NULL.
@@ -987,6 +1001,8 @@ static int __add_metric(struct list_head *metric_list,
const char *modifier,
bool metric_no_group,
int runtime,
+ const char *user_requested_cpu_list,
+ bool system_wide,
struct metric *root_metric,
const struct visited_metric *visited,
const struct pmu_events_table *table)
@@ -1011,7 +1027,8 @@ static int __add_metric(struct list_head *metric_list,
* This metric is the root of a tree and may reference other
* metrics that are added recursively.
*/
- root_metric = metric__new(pe, modifier, metric_no_group, runtime);
+ root_metric = metric__new(pe, modifier, metric_no_group, runtime,
+ user_requested_cpu_list, system_wide);
if (!root_metric)
return -ENOMEM;
@@ -1060,8 +1077,9 @@ static int __add_metric(struct list_head *metric_list,
ret = -EINVAL;
} else {
/* Resolve referenced metrics. */
- ret = resolve_metric(metric_list, modifier, metric_no_group, root_metric,
- &visited_node, table);
+ ret = resolve_metric(metric_list, modifier, metric_no_group,
+ user_requested_cpu_list, system_wide,
+ root_metric, &visited_node, table);
}
if (ret) {
@@ -1109,6 +1127,8 @@ static int add_metric(struct list_head *metric_list,
const struct pmu_event *pe,
const char *modifier,
bool metric_no_group,
+ const char *user_requested_cpu_list,
+ bool system_wide,
struct metric *root_metric,
const struct visited_metric *visited,
const struct pmu_events_table *table)
@@ -1119,7 +1139,8 @@ static int add_metric(struct list_head *metric_list,
if (!strstr(pe->metric_expr, "?")) {
ret = __add_metric(metric_list, pe, modifier, metric_no_group, 0,
- root_metric, visited, table);
+ user_requested_cpu_list, system_wide, root_metric,
+ visited, table);
} else {
int j, count;
@@ -1132,7 +1153,8 @@ static int add_metric(struct list_head *metric_list,
for (j = 0; j < count && !ret; j++)
ret = __add_metric(metric_list, pe, modifier, metric_no_group, j,
- root_metric, visited, table);
+ user_requested_cpu_list, system_wide,
+ root_metric, visited, table);
}
return ret;
@@ -1149,6 +1171,7 @@ static int metricgroup__add_metric_sys_event_iter(const struct pmu_event *pe,
return 0;
ret = add_metric(d->metric_list, pe, d->modifier, d->metric_no_group,
+ d->user_requested_cpu_list, d->system_wide,
d->root_metric, d->visited, d->table);
if (ret)
goto out;
@@ -1191,7 +1214,9 @@ struct metricgroup__add_metric_data {
struct list_head *list;
const char *metric_name;
const char *modifier;
+ const char *user_requested_cpu_list;
bool metric_no_group;
+ bool system_wide;
bool has_match;
};
@@ -1208,8 +1233,8 @@ static int metricgroup__add_metric_callback(const struct pmu_event *pe,
data->has_match = true;
ret = add_metric(data->list, pe, data->modifier, data->metric_no_group,
- /*root_metric=*/NULL,
- /*visited_metrics=*/NULL, table);
+ data->user_requested_cpu_list, data->system_wide,
+ /*root_metric=*/NULL, /*visited_metrics=*/NULL, table);
}
return ret;
}
@@ -1223,12 +1248,16 @@ static int metricgroup__add_metric_callback(const struct pmu_event *pe,
* @metric_no_group: Should events written to events be grouped "{}" or
* global. Grouping is the default but due to multiplexing the
* user may override.
+ * @user_requested_cpu_list: Command line specified CPUs to record on.
+ * @system_wide: Are events for all processes recorded.
* @metric_list: The list that the metric or metric group are added to.
* @table: The table that is searched for metrics, most commonly the table for the
* architecture perf is running upon.
*/
static int metricgroup__add_metric(const char *metric_name, const char *modifier,
bool metric_no_group,
+ const char *user_requested_cpu_list,
+ bool system_wide,
struct list_head *metric_list,
const struct pmu_events_table *table)
{
@@ -1242,6 +1271,8 @@ static int metricgroup__add_metric(const char *metric_name, const char *modifier
.metric_name = metric_name,
.modifier = modifier,
.metric_no_group = metric_no_group,
+ .user_requested_cpu_list = user_requested_cpu_list,
+ .system_wide = system_wide,
.has_match = false,
};
/*
@@ -1263,6 +1294,8 @@ static int metricgroup__add_metric(const char *metric_name, const char *modifier
.metric_name = metric_name,
.modifier = modifier,
.metric_no_group = metric_no_group,
+ .user_requested_cpu_list = user_requested_cpu_list,
+ .system_wide = system_wide,
.has_match = &has_match,
.ret = &ret,
.table = table,
@@ -1293,12 +1326,15 @@ out:
* @metric_no_group: Should events written to events be grouped "{}" or
* global. Grouping is the default but due to multiplexing the
* user may override.
+ * @user_requested_cpu_list: Command line specified CPUs to record on.
+ * @system_wide: Are events for all processes recorded.
* @metric_list: The list that metrics are added to.
* @table: The table that is searched for metrics, most commonly the table for the
* architecture perf is running upon.
*/
static int metricgroup__add_metric_list(const char *list, bool metric_no_group,
- struct list_head *metric_list,
+ const char *user_requested_cpu_list,
+ bool system_wide, struct list_head *metric_list,
const struct pmu_events_table *table)
{
char *list_itr, *list_copy, *metric_name, *modifier;
@@ -1315,8 +1351,8 @@ static int metricgroup__add_metric_list(const char *list, bool metric_no_group,
*modifier++ = '\0';
ret = metricgroup__add_metric(metric_name, modifier,
- metric_no_group, metric_list,
- table);
+ metric_no_group, user_requested_cpu_list,
+ system_wide, metric_list, table);
if (ret == -EINVAL)
pr_err("Cannot find metric or group `%s'\n", metric_name);
@@ -1505,6 +1541,8 @@ err_out:
static int parse_groups(struct evlist *perf_evlist, const char *str,
bool metric_no_group,
bool metric_no_merge,
+ const char *user_requested_cpu_list,
+ bool system_wide,
struct perf_pmu *fake_pmu,
struct rblist *metric_events_list,
const struct pmu_events_table *table)
@@ -1518,7 +1556,8 @@ static int parse_groups(struct evlist *perf_evlist, const char *str,
if (metric_events_list->nr_entries == 0)
metricgroup__rblist_init(metric_events_list);
ret = metricgroup__add_metric_list(str, metric_no_group,
- &metric_list, table);
+ user_requested_cpu_list,
+ system_wide, &metric_list, table);
if (ret)
goto out;
@@ -1626,7 +1665,7 @@ static int parse_groups(struct evlist *perf_evlist, const char *str,
}
expr->metric_unit = m->metric_unit;
expr->metric_events = metric_events;
- expr->runtime = m->pctx->runtime;
+ expr->runtime = m->pctx->sctx.runtime;
list_add(&expr->nd, &me->head);
}
@@ -1646,20 +1685,22 @@ out:
return ret;
}
-int metricgroup__parse_groups(const struct option *opt,
+int metricgroup__parse_groups(struct evlist *perf_evlist,
const char *str,
bool metric_no_group,
bool metric_no_merge,
+ const char *user_requested_cpu_list,
+ bool system_wide,
struct rblist *metric_events)
{
- struct evlist *perf_evlist = *(struct evlist **)opt->value;
const struct pmu_events_table *table = pmu_events_table__find();
if (!table)
return -EINVAL;
- return parse_groups(perf_evlist, str, metric_no_group,
- metric_no_merge, NULL, metric_events, table);
+ return parse_groups(perf_evlist, str, metric_no_group, metric_no_merge,
+ user_requested_cpu_list, system_wide,
+ /*fake_pmu=*/NULL, metric_events, table);
}
int metricgroup__parse_groups_test(struct evlist *evlist,
@@ -1669,8 +1710,10 @@ int metricgroup__parse_groups_test(struct evlist *evlist,
bool metric_no_merge,
struct rblist *metric_events)
{
- return parse_groups(evlist, str, metric_no_group,
- metric_no_merge, &perf_pmu__fake, metric_events, table);
+ return parse_groups(evlist, str, metric_no_group, metric_no_merge,
+ /*user_requested_cpu_list=*/NULL,
+ /*system_wide=*/false,
+ &perf_pmu__fake, metric_events, table);
}
static int metricgroup__has_metric_callback(const struct pmu_event *pe,
@@ -1703,7 +1746,7 @@ int metricgroup__copy_metric_events(struct evlist *evlist, struct cgroup *cgrp,
struct rblist *new_metric_events,
struct rblist *old_metric_events)
{
- unsigned i;
+ unsigned int i;
for (i = 0; i < rblist__nr_entries(old_metric_events); i++) {
struct rb_node *nd;
diff --git a/tools/perf/util/metricgroup.h b/tools/perf/util/metricgroup.h
index 016b3b1a289a..732d3a0d3334 100644
--- a/tools/perf/util/metricgroup.h
+++ b/tools/perf/util/metricgroup.h
@@ -64,10 +64,12 @@ struct metric_expr {
struct metric_event *metricgroup__lookup(struct rblist *metric_events,
struct evsel *evsel,
bool create);
-int metricgroup__parse_groups(const struct option *opt,
+int metricgroup__parse_groups(struct evlist *perf_evlist,
const char *str,
bool metric_no_group,
bool metric_no_merge,
+ const char *user_requested_cpu_list,
+ bool system_wide,
struct rblist *metric_events);
int metricgroup__parse_groups_test(struct evlist *evlist,
const struct pmu_events_table *table,
diff --git a/tools/perf/util/mmap.h b/tools/perf/util/mmap.h
index cd8b0777473b..cd4ccec7f361 100644
--- a/tools/perf/util/mmap.h
+++ b/tools/perf/util/mmap.h
@@ -9,7 +9,6 @@
#include <linux/bitops.h>
#include <perf/cpumap.h>
#include <stdbool.h>
-#include <pthread.h> // for cpu_set_t
#ifdef HAVE_AIO_SUPPORT
#include <aio.h>
#endif
diff --git a/tools/perf/util/mutex.c b/tools/perf/util/mutex.c
new file mode 100644
index 000000000000..bca7f0717f35
--- /dev/null
+++ b/tools/perf/util/mutex.c
@@ -0,0 +1,119 @@
+// SPDX-License-Identifier: GPL-2.0
+#include "mutex.h"
+
+#include "debug.h"
+#include <linux/string.h>
+#include <errno.h>
+
+static void check_err(const char *fn, int err)
+{
+ char sbuf[STRERR_BUFSIZE];
+
+ if (err == 0)
+ return;
+
+ pr_err("%s error: '%s'\n", fn, str_error_r(err, sbuf, sizeof(sbuf)));
+}
+
+#define CHECK_ERR(err) check_err(__func__, err)
+
+static void __mutex_init(struct mutex *mtx, bool pshared)
+{
+ pthread_mutexattr_t attr;
+
+ CHECK_ERR(pthread_mutexattr_init(&attr));
+
+#ifndef NDEBUG
+ /* In normal builds enable error checking, such as recursive usage. */
+ CHECK_ERR(pthread_mutexattr_settype(&attr, PTHREAD_MUTEX_ERRORCHECK));
+#endif
+ if (pshared)
+ CHECK_ERR(pthread_mutexattr_setpshared(&attr, PTHREAD_PROCESS_SHARED));
+
+ CHECK_ERR(pthread_mutex_init(&mtx->lock, &attr));
+ CHECK_ERR(pthread_mutexattr_destroy(&attr));
+}
+
+void mutex_init(struct mutex *mtx)
+{
+ __mutex_init(mtx, /*pshared=*/false);
+}
+
+void mutex_init_pshared(struct mutex *mtx)
+{
+ __mutex_init(mtx, /*pshared=*/true);
+}
+
+void mutex_destroy(struct mutex *mtx)
+{
+ CHECK_ERR(pthread_mutex_destroy(&mtx->lock));
+}
+
+void mutex_lock(struct mutex *mtx)
+ NO_THREAD_SAFETY_ANALYSIS
+{
+ CHECK_ERR(pthread_mutex_lock(&mtx->lock));
+}
+
+void mutex_unlock(struct mutex *mtx)
+ NO_THREAD_SAFETY_ANALYSIS
+{
+ CHECK_ERR(pthread_mutex_unlock(&mtx->lock));
+}
+
+bool mutex_trylock(struct mutex *mtx)
+{
+ int ret = pthread_mutex_trylock(&mtx->lock);
+
+ if (ret == 0)
+ return true; /* Lock acquired. */
+
+ if (ret == EBUSY)
+ return false; /* Lock busy. */
+
+ /* Print error. */
+ CHECK_ERR(ret);
+ return false;
+}
+
+static void __cond_init(struct cond *cnd, bool pshared)
+{
+ pthread_condattr_t attr;
+
+ CHECK_ERR(pthread_condattr_init(&attr));
+ if (pshared)
+ CHECK_ERR(pthread_condattr_setpshared(&attr, PTHREAD_PROCESS_SHARED));
+
+ CHECK_ERR(pthread_cond_init(&cnd->cond, &attr));
+ CHECK_ERR(pthread_condattr_destroy(&attr));
+}
+
+void cond_init(struct cond *cnd)
+{
+ __cond_init(cnd, /*pshared=*/false);
+}
+
+void cond_init_pshared(struct cond *cnd)
+{
+ __cond_init(cnd, /*pshared=*/true);
+}
+
+void cond_destroy(struct cond *cnd)
+{
+ CHECK_ERR(pthread_cond_destroy(&cnd->cond));
+}
+
+void cond_wait(struct cond *cnd, struct mutex *mtx)
+{
+ CHECK_ERR(pthread_cond_wait(&cnd->cond, &mtx->lock));
+}
+
+void cond_signal(struct cond *cnd)
+{
+ CHECK_ERR(pthread_cond_signal(&cnd->cond));
+}
+
+void cond_broadcast(struct cond *cnd)
+{
+ CHECK_ERR(pthread_cond_broadcast(&cnd->cond));
+}
diff --git a/tools/perf/util/mutex.h b/tools/perf/util/mutex.h
new file mode 100644
index 000000000000..40661120cacc
--- /dev/null
+++ b/tools/perf/util/mutex.h
@@ -0,0 +1,108 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef __PERF_MUTEX_H
+#define __PERF_MUTEX_H
+
+#include <pthread.h>
+#include <stdbool.h>
+
+/*
+ * A function-like feature checking macro that is a wrapper around
+ * `__has_attribute`, which is defined by GCC 5+ and Clang and evaluates to a
+ * nonzero constant integer if the attribute is supported or 0 if not.
+ */
+#ifdef __has_attribute
+#define HAVE_ATTRIBUTE(x) __has_attribute(x)
+#else
+#define HAVE_ATTRIBUTE(x) 0
+#endif
+
+#if HAVE_ATTRIBUTE(guarded_by) && HAVE_ATTRIBUTE(pt_guarded_by) && \
+ HAVE_ATTRIBUTE(lockable) && HAVE_ATTRIBUTE(exclusive_lock_function) && \
+ HAVE_ATTRIBUTE(exclusive_trylock_function) && HAVE_ATTRIBUTE(exclusive_locks_required) && \
+ HAVE_ATTRIBUTE(no_thread_safety_analysis)
+
+/* Documents if a shared field or global variable needs to be protected by a mutex. */
+#define GUARDED_BY(x) __attribute__((guarded_by(x)))
+
+/*
+ * Documents if the memory location pointed to by a pointer should be guarded by
+ * a mutex when dereferencing the pointer.
+ */
+#define PT_GUARDED_BY(x) __attribute__((pt_guarded_by(x)))
+
+/* Documents if a type is a lockable type. */
+#define LOCKABLE __attribute__((lockable))
+
+/* Documents functions that acquire a lock in the body of a function, and do not release it. */
+#define EXCLUSIVE_LOCK_FUNCTION(...) __attribute__((exclusive_lock_function(__VA_ARGS__)))
+
+/*
+ * Documents functions that expect a lock to be held on entry to the function,
+ * and release it in the body of the function.
+ */
+#define UNLOCK_FUNCTION(...) __attribute__((unlock_function(__VA_ARGS__)))
+
+/* Documents functions that try to acquire a lock, and return success or failure. */
+#define EXCLUSIVE_TRYLOCK_FUNCTION(...) \
+ __attribute__((exclusive_trylock_function(__VA_ARGS__)))
+
+/* Documents a function that expects a mutex to be held prior to entry. */
+#define EXCLUSIVE_LOCKS_REQUIRED(...) __attribute__((exclusive_locks_required(__VA_ARGS__)))
+
+/* Turns off thread safety checking within the body of a particular function. */
+#define NO_THREAD_SAFETY_ANALYSIS __attribute__((no_thread_safety_analysis))
+
+#else
+
+#define GUARDED_BY(x)
+#define PT_GUARDED_BY(x)
+#define LOCKABLE
+#define EXCLUSIVE_LOCK_FUNCTION(...)
+#define UNLOCK_FUNCTION(...)
+#define EXCLUSIVE_TRYLOCK_FUNCTION(...)
+#define EXCLUSIVE_LOCKS_REQUIRED(...)
+#define NO_THREAD_SAFETY_ANALYSIS
+
+#endif
+
+/*
+ * A wrapper around the mutex implementation that allows perf to error check
+ * usage, etc.
+ */
+struct LOCKABLE mutex {
+ pthread_mutex_t lock;
+};
+
+/* A wrapper around the condition variable implementation. */
+struct cond {
+ pthread_cond_t cond;
+};
+
+/* Default initialize the mtx struct. */
+void mutex_init(struct mutex *mtx);
+/*
+ * Initialize the mtx struct and set the process-shared rather than default
+ * process-private attribute.
+ */
+void mutex_init_pshared(struct mutex *mtx);
+void mutex_destroy(struct mutex *mtx);
+
+void mutex_lock(struct mutex *mtx) EXCLUSIVE_LOCK_FUNCTION(*mtx);
+void mutex_unlock(struct mutex *mtx) UNLOCK_FUNCTION(*mtx);
+/* Tries to acquire the lock and returns true on success. */
+bool mutex_trylock(struct mutex *mtx) EXCLUSIVE_TRYLOCK_FUNCTION(true, *mtx);
+
+/* Default initialize the cond struct. */
+void cond_init(struct cond *cnd);
+/*
+ * Initialize the cond struct and specify the process-shared rather than default
+ * process-private attribute.
+ */
+void cond_init_pshared(struct cond *cnd);
+void cond_destroy(struct cond *cnd);
+
+void cond_wait(struct cond *cnd, struct mutex *mtx) EXCLUSIVE_LOCKS_REQUIRED(mtx);
+void cond_signal(struct cond *cnd);
+void cond_broadcast(struct cond *cnd);
+
+#endif /* __PERF_MUTEX_H */
diff --git a/tools/perf/util/parse-branch-options.c b/tools/perf/util/parse-branch-options.c
index bb4aa88c50a8..00588b9db474 100644
--- a/tools/perf/util/parse-branch-options.c
+++ b/tools/perf/util/parse-branch-options.c
@@ -32,6 +32,7 @@ static const struct branch_mode branch_modes[] = {
BRANCH_OPT("call", PERF_SAMPLE_BRANCH_CALL),
BRANCH_OPT("save_type", PERF_SAMPLE_BRANCH_TYPE_SAVE),
BRANCH_OPT("stack", PERF_SAMPLE_BRANCH_CALL_STACK),
+ BRANCH_OPT("priv", PERF_SAMPLE_BRANCH_PRIV_SAVE),
BRANCH_END
};
diff --git a/tools/perf/util/parse-events.c b/tools/perf/util/parse-events.c
index f3b2c2a87456..437389dacf48 100644
--- a/tools/perf/util/parse-events.c
+++ b/tools/perf/util/parse-events.c
@@ -150,14 +150,6 @@ struct event_symbol event_symbols_sw[PERF_COUNT_SW_MAX] = {
},
};
-#define __PERF_EVENT_FIELD(config, name) \
- ((config & PERF_EVENT_##name##_MASK) >> PERF_EVENT_##name##_SHIFT)
-
-#define PERF_EVENT_RAW(config) __PERF_EVENT_FIELD(config, RAW)
-#define PERF_EVENT_CONFIG(config) __PERF_EVENT_FIELD(config, CONFIG)
-#define PERF_EVENT_TYPE(config) __PERF_EVENT_FIELD(config, TYPE)
-#define PERF_EVENT_ID(config) __PERF_EVENT_FIELD(config, EVENT)
-
bool is_event_supported(u8 type, u64 config)
{
bool ret = true;
diff --git a/tools/perf/util/perf_event_attr_fprintf.c b/tools/perf/util/perf_event_attr_fprintf.c
index 98af3fa4ea35..7e5e7b30510d 100644
--- a/tools/perf/util/perf_event_attr_fprintf.c
+++ b/tools/perf/util/perf_event_attr_fprintf.c
@@ -52,7 +52,7 @@ static void __p_branch_sample_type(char *buf, size_t size, u64 value)
bit_name(ABORT_TX), bit_name(IN_TX), bit_name(NO_TX),
bit_name(COND), bit_name(CALL_STACK), bit_name(IND_JUMP),
bit_name(CALL), bit_name(NO_FLAGS), bit_name(NO_CYCLES),
- bit_name(TYPE_SAVE), bit_name(HW_INDEX),
+ bit_name(TYPE_SAVE), bit_name(HW_INDEX), bit_name(PRIV_SAVE),
{ .name = NULL, }
};
#undef bit_name
@@ -64,7 +64,7 @@ static void __p_read_format(char *buf, size_t size, u64 value)
#define bit_name(n) { PERF_FORMAT_##n, #n }
struct bit_names bits[] = {
bit_name(TOTAL_TIME_ENABLED), bit_name(TOTAL_TIME_RUNNING),
- bit_name(ID), bit_name(GROUP),
+ bit_name(ID), bit_name(GROUP), bit_name(LOST),
{ .name = NULL, }
};
#undef bit_name
diff --git a/tools/perf/util/pmu.c b/tools/perf/util/pmu.c
index 89655d53117a..74a2cafb4e8d 100644
--- a/tools/perf/util/pmu.c
+++ b/tools/perf/util/pmu.c
@@ -1182,7 +1182,7 @@ static char *pmu_formats_string(struct list_head *formats)
struct perf_pmu_format *format;
char *str = NULL;
struct strbuf buf = STRBUF_INIT;
- unsigned i = 0;
+ unsigned int i = 0;
if (!formats)
return NULL;
diff --git a/tools/perf/util/pmu.y b/tools/perf/util/pmu.y
index bfd7e8509869..0dab0ec2eff7 100644
--- a/tools/perf/util/pmu.y
+++ b/tools/perf/util/pmu.y
@@ -10,8 +10,6 @@
#include <string.h>
#include "pmu.h"
-extern int perf_pmu_lex (void);
-
#define ABORT_ON(val) \
do { \
if (val) \
diff --git a/tools/perf/util/probe-event.c b/tools/perf/util/probe-event.c
index 785246ff4179..0c24bc7afbca 100644
--- a/tools/perf/util/probe-event.c
+++ b/tools/perf/util/probe-event.c
@@ -29,6 +29,7 @@
#include "color.h"
#include "map.h"
#include "maps.h"
+#include "mutex.h"
#include "symbol.h"
#include <api/fs/fs.h>
#include "trace-event.h" /* For __maybe_unused */
@@ -180,8 +181,10 @@ struct map *get_target_map(const char *target, struct nsinfo *nsi, bool user)
map = dso__new_map(target);
if (map && map->dso) {
+ mutex_lock(&map->dso->lock);
nsinfo__put(map->dso->nsinfo);
map->dso->nsinfo = nsinfo__get(nsi);
+ mutex_unlock(&map->dso->lock);
}
return map;
} else {
diff --git a/tools/perf/util/session.c b/tools/perf/util/session.c
index 192c9274f7ad..1a4f10de29ff 100644
--- a/tools/perf/util/session.c
+++ b/tools/perf/util/session.c
@@ -943,6 +943,11 @@ static void perf_event__cpu_map_swap(union perf_event *event,
default:
pr_err("cpu_map swap: unsupported long size\n");
}
+ break;
+ case PERF_CPU_MAP__RANGE_CPUS:
+ data->range_cpu_data.start_cpu = bswap_16(data->range_cpu_data.start_cpu);
+ data->range_cpu_data.end_cpu = bswap_16(data->range_cpu_data.end_cpu);
+ break;
default:
break;
}
@@ -1180,7 +1185,7 @@ static void branch_stack__printf(struct perf_sample *sample, bool callstack)
e->flags.abort ? "A" : " ",
e->flags.in_tx ? "T" : " ",
(unsigned)e->flags.reserved,
- e->flags.type ? branch_type_name(e->flags.type) : "");
+ get_branch_type(e));
} else {
if (i == 0) {
printf("..... %2"PRIu64": %016" PRIx64 "\n"
diff --git a/tools/perf/util/smt.c b/tools/perf/util/smt.c
index 2b0a36ebf27a..994e9e418227 100644
--- a/tools/perf/util/smt.c
+++ b/tools/perf/util/smt.c
@@ -1,99 +1,37 @@
-#include <stdio.h>
-#include <stdlib.h>
-#include <unistd.h>
-#include <linux/bitops.h>
+// SPDX-License-Identifier: GPL-2.0-only
+#include <string.h>
#include "api/fs/fs.h"
+#include "cputopo.h"
#include "smt.h"
-/**
- * hweight_str - Returns the number of bits set in str. Stops at first non-hex
- * or ',' character.
- */
-static int hweight_str(char *str)
-{
- int result = 0;
-
- while (*str) {
- switch (*str++) {
- case '0':
- case ',':
- break;
- case '1':
- case '2':
- case '4':
- case '8':
- result++;
- break;
- case '3':
- case '5':
- case '6':
- case '9':
- case 'a':
- case 'A':
- case 'c':
- case 'C':
- result += 2;
- break;
- case '7':
- case 'b':
- case 'B':
- case 'd':
- case 'D':
- case 'e':
- case 'E':
- result += 3;
- break;
- case 'f':
- case 'F':
- result += 4;
- break;
- default:
- goto done;
- }
- }
-done:
- return result;
-}
-
-int smt_on(void)
+bool smt_on(const struct cpu_topology *topology)
{
static bool cached;
- static int cached_result;
- int cpu;
- int ncpu;
+ static bool cached_result;
+ int fs_value;
if (cached)
return cached_result;
- if (sysfs__read_int("devices/system/cpu/smt/active", &cached_result) >= 0) {
- cached = true;
- return cached_result;
- }
-
- cached_result = 0;
- ncpu = sysconf(_SC_NPROCESSORS_CONF);
- for (cpu = 0; cpu < ncpu; cpu++) {
- unsigned long long siblings;
- char *str;
- size_t strlen;
- char fn[256];
+ if (sysfs__read_int("devices/system/cpu/smt/active", &fs_value) >= 0)
+ cached_result = (fs_value == 1);
+ else
+ cached_result = cpu_topology__smt_on(topology);
- snprintf(fn, sizeof fn,
- "devices/system/cpu/cpu%d/topology/thread_siblings", cpu);
- if (sysfs__read_str(fn, &str, &strlen) < 0) {
- snprintf(fn, sizeof fn,
- "devices/system/cpu/cpu%d/topology/core_cpus", cpu);
- if (sysfs__read_str(fn, &str, &strlen) < 0)
- continue;
- }
- /* Entry is hex, but does not have 0x, so need custom parser */
- siblings = hweight_str(str);
- free(str);
- if (siblings > 1) {
- cached_result = 1;
- break;
- }
- }
cached = true;
return cached_result;
}
+
+bool core_wide(bool system_wide, const char *user_requested_cpu_list,
+ const struct cpu_topology *topology)
+{
+ /* If not everything running on a core is being recorded then we can't use core_wide. */
+ if (!system_wide)
+ return false;
+
+ /* Cheap case that SMT is disabled and therefore we're inherently core_wide. */
+ if (!smt_on(topology))
+ return true;
+
+ return cpu_topology__core_wide(topology, user_requested_cpu_list);
+}
diff --git a/tools/perf/util/smt.h b/tools/perf/util/smt.h
index b8414b7bcbc8..ae9095f2c38c 100644
--- a/tools/perf/util/smt.h
+++ b/tools/perf/util/smt.h
@@ -1,6 +1,17 @@
-#ifndef SMT_H
-#define SMT_H 1
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef __SMT_H
+#define __SMT_H 1
-int smt_on(void);
+struct cpu_topology;
-#endif
+/* Returns true if SMT (aka hyperthreading) is enabled. */
+bool smt_on(const struct cpu_topology *topology);
+
+/*
+ * Returns true when system wide and all SMT threads for a core are in the
+ * user_requested_cpus map.
+ */
+bool core_wide(bool system_wide, const char *user_requested_cpu_list,
+ const struct cpu_topology *topology);
+
+#endif /* __SMT_H */
diff --git a/tools/perf/util/sort.c b/tools/perf/util/sort.c
index 6d5588e80935..2e7330867e2e 100644
--- a/tools/perf/util/sort.c
+++ b/tools/perf/util/sort.c
@@ -1948,6 +1948,43 @@ struct sort_entry sort_dso_size = {
.se_width_idx = HISTC_DSO_SIZE,
};
+/* --sort dso_size */
+
+static int64_t
+sort__addr_cmp(struct hist_entry *left, struct hist_entry *right)
+{
+ u64 left_ip = left->ip;
+ u64 right_ip = right->ip;
+ struct map *left_map = left->ms.map;
+ struct map *right_map = right->ms.map;
+
+ if (left_map)
+ left_ip = left_map->unmap_ip(left_map, left_ip);
+ if (right_map)
+ right_ip = right_map->unmap_ip(right_map, right_ip);
+
+ return _sort__addr_cmp(left_ip, right_ip);
+}
+
+static int hist_entry__addr_snprintf(struct hist_entry *he, char *bf,
+ size_t size, unsigned int width)
+{
+ u64 ip = he->ip;
+ struct map *map = he->ms.map;
+
+ if (map)
+ ip = map->unmap_ip(map, ip);
+
+ return repsep_snprintf(bf, size, "%-#*llx", width, ip);
+}
+
+struct sort_entry sort_addr = {
+ .se_header = "Address",
+ .se_cmp = sort__addr_cmp,
+ .se_snprintf = hist_entry__addr_snprintf,
+ .se_width_idx = HISTC_ADDR,
+};
+
struct sort_dimension {
const char *name;
@@ -1997,6 +2034,7 @@ static struct sort_dimension common_sort_dimensions[] = {
DIM(SORT_GLOBAL_INS_LAT, "ins_lat", sort_global_ins_lat),
DIM(SORT_LOCAL_PIPELINE_STAGE_CYC, "local_p_stage_cyc", sort_local_p_stage_cyc),
DIM(SORT_GLOBAL_PIPELINE_STAGE_CYC, "p_stage_cyc", sort_global_p_stage_cyc),
+ DIM(SORT_ADDR, "addr", sort_addr),
};
#undef DIM
diff --git a/tools/perf/util/sort.h b/tools/perf/util/sort.h
index 2ddc00d1c464..04ff8b61a2a7 100644
--- a/tools/perf/util/sort.h
+++ b/tools/perf/util/sort.h
@@ -34,7 +34,6 @@ extern struct sort_entry sort_dso_to;
extern struct sort_entry sort_sym_from;
extern struct sort_entry sort_sym_to;
extern struct sort_entry sort_srcline;
-extern enum sort_type sort__first_dimension;
extern const char default_mem_sort_order[];
struct res_sample {
@@ -237,6 +236,7 @@ enum sort_type {
SORT_GLOBAL_INS_LAT,
SORT_LOCAL_PIPELINE_STAGE_CYC,
SORT_GLOBAL_PIPELINE_STAGE_CYC,
+ SORT_ADDR,
/* branch stack specific sort keys */
__SORT_BRANCH_STACK,
@@ -295,7 +295,6 @@ struct block_hist {
};
extern struct sort_entry sort_thread;
-extern struct list_head hist_entry__sort_list;
struct evlist;
struct tep_handle;
diff --git a/tools/perf/util/stat-display.c b/tools/perf/util/stat-display.c
index b82844cb0ce7..5c47ee9963a7 100644
--- a/tools/perf/util/stat-display.c
+++ b/tools/perf/util/stat-display.c
@@ -67,7 +67,7 @@ static void print_noise(struct perf_stat_config *config,
return;
ps = evsel->stats;
- print_noise_pct(config, stddev_stats(&ps->res_stats[0]), avg);
+ print_noise_pct(config, stddev_stats(&ps->res_stats), avg);
}
static void print_cgroup(struct perf_stat_config *config, struct evsel *evsel)
@@ -168,7 +168,7 @@ static void aggr_printout(struct perf_stat_config *config,
id.socket,
id.die,
id.core);
- } else if (id.core > -1) {
+ } else if (id.cpu.cpu > -1) {
fprintf(config->output, "\"cpu\" : \"%d\", ",
id.cpu.cpu);
}
@@ -179,7 +179,7 @@ static void aggr_printout(struct perf_stat_config *config,
id.die,
config->csv_output ? 0 : -3,
id.core, config->csv_sep);
- } else if (id.core > -1) {
+ } else if (id.cpu.cpu > -1) {
fprintf(config->output, "CPU%*d%s",
config->csv_output ? 0 : -7,
id.cpu.cpu, config->csv_sep);
@@ -189,14 +189,14 @@ static void aggr_printout(struct perf_stat_config *config,
case AGGR_THREAD:
if (config->json_output) {
fprintf(config->output, "\"thread\" : \"%s-%d\", ",
- perf_thread_map__comm(evsel->core.threads, id.thread),
- perf_thread_map__pid(evsel->core.threads, id.thread));
+ perf_thread_map__comm(evsel->core.threads, id.thread_idx),
+ perf_thread_map__pid(evsel->core.threads, id.thread_idx));
} else {
fprintf(config->output, "%*s-%*d%s",
config->csv_output ? 0 : 16,
- perf_thread_map__comm(evsel->core.threads, id.thread),
+ perf_thread_map__comm(evsel->core.threads, id.thread_idx),
config->csv_output ? 0 : -8,
- perf_thread_map__pid(evsel->core.threads, id.thread),
+ perf_thread_map__pid(evsel->core.threads, id.thread_idx),
config->csv_sep);
}
break;
@@ -442,7 +442,7 @@ static void print_metric_header(struct perf_stat_config *config,
fprintf(os->fh, "%*s ", config->metric_only_len, unit);
}
-static int first_shadow_cpu_map_idx(struct perf_stat_config *config,
+static int first_shadow_map_idx(struct perf_stat_config *config,
struct evsel *evsel, const struct aggr_cpu_id *id)
{
struct perf_cpu_map *cpus = evsel__cpus(evsel);
@@ -452,6 +452,9 @@ static int first_shadow_cpu_map_idx(struct perf_stat_config *config,
if (config->aggr_mode == AGGR_NONE)
return perf_cpu_map__idx(cpus, id->cpu);
+ if (config->aggr_mode == AGGR_THREAD)
+ return id->thread_idx;
+
if (!config->aggr_get_id)
return 0;
@@ -646,7 +649,7 @@ static void printout(struct perf_stat_config *config, struct aggr_cpu_id id, int
}
perf_stat__print_shadow_stats(config, counter, uval,
- first_shadow_cpu_map_idx(config, counter, &id),
+ first_shadow_map_idx(config, counter, &id),
&out, &config->metric_events, st);
if (!config->csv_output && !config->metric_only && !config->json_output) {
print_noise(config, counter, noise);
@@ -676,7 +679,7 @@ static void aggr_update_shadow(struct perf_stat_config *config,
val += perf_counts(counter->counts, idx, 0)->val;
}
perf_stat__update_shadow_stats(counter, val,
- first_shadow_cpu_map_idx(config, counter, &id),
+ first_shadow_map_idx(config, counter, &id),
&rt_stat);
}
}
@@ -943,7 +946,7 @@ static struct perf_aggr_thread_value *sort_aggr_thread(
buf[i].counter = counter;
buf[i].id = aggr_cpu_id__empty();
- buf[i].id.thread = thread;
+ buf[i].id.thread_idx = thread;
buf[i].uval = uval;
buf[i].val = val;
buf[i].run = run;
@@ -979,14 +982,9 @@ static void print_aggr_thread(struct perf_stat_config *config,
fprintf(output, "%s", prefix);
id = buf[thread].id;
- if (config->stats)
- printout(config, id, 0, buf[thread].counter, buf[thread].uval,
- prefix, buf[thread].run, buf[thread].ena, 1.0,
- &config->stats[id.thread]);
- else
- printout(config, id, 0, buf[thread].counter, buf[thread].uval,
- prefix, buf[thread].run, buf[thread].ena, 1.0,
- &rt_stat);
+ printout(config, id, 0, buf[thread].counter, buf[thread].uval,
+ prefix, buf[thread].run, buf[thread].ena, 1.0,
+ &rt_stat);
fputc('\n', output);
}
diff --git a/tools/perf/util/stat-shadow.c b/tools/perf/util/stat-shadow.c
index 788ce5e46470..07b29fe272c7 100644
--- a/tools/perf/util/stat-shadow.c
+++ b/tools/perf/util/stat-shadow.c
@@ -33,9 +33,8 @@ struct saved_value {
struct evsel *evsel;
enum stat_type type;
int ctx;
- int cpu_map_idx;
+ int map_idx; /* cpu or thread map index */
struct cgroup *cgrp;
- struct runtime_stat *stat;
struct stats stats;
u64 metric_total;
int metric_other;
@@ -48,8 +47,8 @@ static int saved_value_cmp(struct rb_node *rb_node, const void *entry)
rb_node);
const struct saved_value *b = entry;
- if (a->cpu_map_idx != b->cpu_map_idx)
- return a->cpu_map_idx - b->cpu_map_idx;
+ if (a->map_idx != b->map_idx)
+ return a->map_idx - b->map_idx;
/*
* Previously the rbtree was used to link generic metrics.
@@ -67,16 +66,6 @@ static int saved_value_cmp(struct rb_node *rb_node, const void *entry)
if (a->cgrp != b->cgrp)
return (char *)a->cgrp < (char *)b->cgrp ? -1 : +1;
- if (a->evsel == NULL && b->evsel == NULL) {
- if (a->stat == b->stat)
- return 0;
-
- if ((char *)a->stat < (char *)b->stat)
- return -1;
-
- return 1;
- }
-
if (a->evsel == b->evsel)
return 0;
if ((char *)a->evsel < (char *)b->evsel)
@@ -106,7 +95,7 @@ static void saved_value_delete(struct rblist *rblist __maybe_unused,
}
static struct saved_value *saved_value_lookup(struct evsel *evsel,
- int cpu_map_idx,
+ int map_idx,
bool create,
enum stat_type type,
int ctx,
@@ -116,11 +105,10 @@ static struct saved_value *saved_value_lookup(struct evsel *evsel,
struct rblist *rblist;
struct rb_node *nd;
struct saved_value dm = {
- .cpu_map_idx = cpu_map_idx,
+ .map_idx = map_idx,
.evsel = evsel,
.type = type,
.ctx = ctx,
- .stat = st,
.cgrp = cgrp,
};
@@ -215,10 +203,10 @@ struct runtime_stat_data {
static void update_runtime_stat(struct runtime_stat *st,
enum stat_type type,
- int cpu_map_idx, u64 count,
+ int map_idx, u64 count,
struct runtime_stat_data *rsd)
{
- struct saved_value *v = saved_value_lookup(NULL, cpu_map_idx, true, type,
+ struct saved_value *v = saved_value_lookup(NULL, map_idx, true, type,
rsd->ctx, st, rsd->cgrp);
if (v)
@@ -231,7 +219,7 @@ static void update_runtime_stat(struct runtime_stat *st,
* instruction rates, etc:
*/
void perf_stat__update_shadow_stats(struct evsel *counter, u64 count,
- int cpu_map_idx, struct runtime_stat *st)
+ int map_idx, struct runtime_stat *st)
{
u64 count_ns = count;
struct saved_value *v;
@@ -243,88 +231,88 @@ void perf_stat__update_shadow_stats(struct evsel *counter, u64 count,
count *= counter->scale;
if (evsel__is_clock(counter))
- update_runtime_stat(st, STAT_NSECS, cpu_map_idx, count_ns, &rsd);
+ update_runtime_stat(st, STAT_NSECS, map_idx, count_ns, &rsd);
else if (evsel__match(counter, HARDWARE, HW_CPU_CYCLES))
- update_runtime_stat(st, STAT_CYCLES, cpu_map_idx, count, &rsd);
+ update_runtime_stat(st, STAT_CYCLES, map_idx, count, &rsd);
else if (perf_stat_evsel__is(counter, CYCLES_IN_TX))
- update_runtime_stat(st, STAT_CYCLES_IN_TX, cpu_map_idx, count, &rsd);
+ update_runtime_stat(st, STAT_CYCLES_IN_TX, map_idx, count, &rsd);
else if (perf_stat_evsel__is(counter, TRANSACTION_START))
- update_runtime_stat(st, STAT_TRANSACTION, cpu_map_idx, count, &rsd);
+ update_runtime_stat(st, STAT_TRANSACTION, map_idx, count, &rsd);
else if (perf_stat_evsel__is(counter, ELISION_START))
- update_runtime_stat(st, STAT_ELISION, cpu_map_idx, count, &rsd);
+ update_runtime_stat(st, STAT_ELISION, map_idx, count, &rsd);
else if (perf_stat_evsel__is(counter, TOPDOWN_TOTAL_SLOTS))
update_runtime_stat(st, STAT_TOPDOWN_TOTAL_SLOTS,
- cpu_map_idx, count, &rsd);
+ map_idx, count, &rsd);
else if (perf_stat_evsel__is(counter, TOPDOWN_SLOTS_ISSUED))
update_runtime_stat(st, STAT_TOPDOWN_SLOTS_ISSUED,
- cpu_map_idx, count, &rsd);
+ map_idx, count, &rsd);
else if (perf_stat_evsel__is(counter, TOPDOWN_SLOTS_RETIRED))
update_runtime_stat(st, STAT_TOPDOWN_SLOTS_RETIRED,
- cpu_map_idx, count, &rsd);
+ map_idx, count, &rsd);
else if (perf_stat_evsel__is(counter, TOPDOWN_FETCH_BUBBLES))
update_runtime_stat(st, STAT_TOPDOWN_FETCH_BUBBLES,
- cpu_map_idx, count, &rsd);
+ map_idx, count, &rsd);
else if (perf_stat_evsel__is(counter, TOPDOWN_RECOVERY_BUBBLES))
update_runtime_stat(st, STAT_TOPDOWN_RECOVERY_BUBBLES,
- cpu_map_idx, count, &rsd);
+ map_idx, count, &rsd);
else if (perf_stat_evsel__is(counter, TOPDOWN_RETIRING))
update_runtime_stat(st, STAT_TOPDOWN_RETIRING,
- cpu_map_idx, count, &rsd);
+ map_idx, count, &rsd);
else if (perf_stat_evsel__is(counter, TOPDOWN_BAD_SPEC))
update_runtime_stat(st, STAT_TOPDOWN_BAD_SPEC,
- cpu_map_idx, count, &rsd);
+ map_idx, count, &rsd);
else if (perf_stat_evsel__is(counter, TOPDOWN_FE_BOUND))
update_runtime_stat(st, STAT_TOPDOWN_FE_BOUND,
- cpu_map_idx, count, &rsd);
+ map_idx, count, &rsd);
else if (perf_stat_evsel__is(counter, TOPDOWN_BE_BOUND))
update_runtime_stat(st, STAT_TOPDOWN_BE_BOUND,
- cpu_map_idx, count, &rsd);
+ map_idx, count, &rsd);
else if (perf_stat_evsel__is(counter, TOPDOWN_HEAVY_OPS))
update_runtime_stat(st, STAT_TOPDOWN_HEAVY_OPS,
- cpu_map_idx, count, &rsd);
+ map_idx, count, &rsd);
else if (perf_stat_evsel__is(counter, TOPDOWN_BR_MISPREDICT))
update_runtime_stat(st, STAT_TOPDOWN_BR_MISPREDICT,
- cpu_map_idx, count, &rsd);
+ map_idx, count, &rsd);
else if (perf_stat_evsel__is(counter, TOPDOWN_FETCH_LAT))
update_runtime_stat(st, STAT_TOPDOWN_FETCH_LAT,
- cpu_map_idx, count, &rsd);
+ map_idx, count, &rsd);
else if (perf_stat_evsel__is(counter, TOPDOWN_MEM_BOUND))
update_runtime_stat(st, STAT_TOPDOWN_MEM_BOUND,
- cpu_map_idx, count, &rsd);
+ map_idx, count, &rsd);
else if (evsel__match(counter, HARDWARE, HW_STALLED_CYCLES_FRONTEND))
update_runtime_stat(st, STAT_STALLED_CYCLES_FRONT,
- cpu_map_idx, count, &rsd);
+ map_idx, count, &rsd);
else if (evsel__match(counter, HARDWARE, HW_STALLED_CYCLES_BACKEND))
update_runtime_stat(st, STAT_STALLED_CYCLES_BACK,
- cpu_map_idx, count, &rsd);
+ map_idx, count, &rsd);
else if (evsel__match(counter, HARDWARE, HW_BRANCH_INSTRUCTIONS))
- update_runtime_stat(st, STAT_BRANCHES, cpu_map_idx, count, &rsd);
+ update_runtime_stat(st, STAT_BRANCHES, map_idx, count, &rsd);
else if (evsel__match(counter, HARDWARE, HW_CACHE_REFERENCES))
- update_runtime_stat(st, STAT_CACHEREFS, cpu_map_idx, count, &rsd);
+ update_runtime_stat(st, STAT_CACHEREFS, map_idx, count, &rsd);
else if (evsel__match(counter, HW_CACHE, HW_CACHE_L1D))
- update_runtime_stat(st, STAT_L1_DCACHE, cpu_map_idx, count, &rsd);
+ update_runtime_stat(st, STAT_L1_DCACHE, map_idx, count, &rsd);
else if (evsel__match(counter, HW_CACHE, HW_CACHE_L1I))
- update_runtime_stat(st, STAT_L1_ICACHE, cpu_map_idx, count, &rsd);
+ update_runtime_stat(st, STAT_L1_ICACHE, map_idx, count, &rsd);
else if (evsel__match(counter, HW_CACHE, HW_CACHE_LL))
- update_runtime_stat(st, STAT_LL_CACHE, cpu_map_idx, count, &rsd);
+ update_runtime_stat(st, STAT_LL_CACHE, map_idx, count, &rsd);
else if (evsel__match(counter, HW_CACHE, HW_CACHE_DTLB))
- update_runtime_stat(st, STAT_DTLB_CACHE, cpu_map_idx, count, &rsd);
+ update_runtime_stat(st, STAT_DTLB_CACHE, map_idx, count, &rsd);
else if (evsel__match(counter, HW_CACHE, HW_CACHE_ITLB))
- update_runtime_stat(st, STAT_ITLB_CACHE, cpu_map_idx, count, &rsd);
+ update_runtime_stat(st, STAT_ITLB_CACHE, map_idx, count, &rsd);
else if (perf_stat_evsel__is(counter, SMI_NUM))
- update_runtime_stat(st, STAT_SMI_NUM, cpu_map_idx, count, &rsd);
+ update_runtime_stat(st, STAT_SMI_NUM, map_idx, count, &rsd);
else if (perf_stat_evsel__is(counter, APERF))
- update_runtime_stat(st, STAT_APERF, cpu_map_idx, count, &rsd);
+ update_runtime_stat(st, STAT_APERF, map_idx, count, &rsd);
if (counter->collect_stat) {
- v = saved_value_lookup(counter, cpu_map_idx, true, STAT_NONE, 0, st,
+ v = saved_value_lookup(counter, map_idx, true, STAT_NONE, 0, st,
rsd.cgrp);
update_stats(&v->stats, count);
if (counter->metric_leader)
v->metric_total += count;
} else if (counter->metric_leader) {
v = saved_value_lookup(counter->metric_leader,
- cpu_map_idx, true, STAT_NONE, 0, st, rsd.cgrp);
+ map_idx, true, STAT_NONE, 0, st, rsd.cgrp);
v->metric_total += count;
v->metric_other++;
}
@@ -466,12 +454,12 @@ void perf_stat__collect_metric_expr(struct evlist *evsel_list)
}
static double runtime_stat_avg(struct runtime_stat *st,
- enum stat_type type, int cpu_map_idx,
+ enum stat_type type, int map_idx,
struct runtime_stat_data *rsd)
{
struct saved_value *v;
- v = saved_value_lookup(NULL, cpu_map_idx, false, type, rsd->ctx, st, rsd->cgrp);
+ v = saved_value_lookup(NULL, map_idx, false, type, rsd->ctx, st, rsd->cgrp);
if (!v)
return 0.0;
@@ -479,12 +467,12 @@ static double runtime_stat_avg(struct runtime_stat *st,
}
static double runtime_stat_n(struct runtime_stat *st,
- enum stat_type type, int cpu_map_idx,
+ enum stat_type type, int map_idx,
struct runtime_stat_data *rsd)
{
struct saved_value *v;
- v = saved_value_lookup(NULL, cpu_map_idx, false, type, rsd->ctx, st, rsd->cgrp);
+ v = saved_value_lookup(NULL, map_idx, false, type, rsd->ctx, st, rsd->cgrp);
if (!v)
return 0.0;
@@ -492,7 +480,7 @@ static double runtime_stat_n(struct runtime_stat *st,
}
static void print_stalled_cycles_frontend(struct perf_stat_config *config,
- int cpu_map_idx, double avg,
+ int map_idx, double avg,
struct perf_stat_output_ctx *out,
struct runtime_stat *st,
struct runtime_stat_data *rsd)
@@ -500,7 +488,7 @@ static void print_stalled_cycles_frontend(struct perf_stat_config *config,
double total, ratio = 0.0;
const char *color;
- total = runtime_stat_avg(st, STAT_CYCLES, cpu_map_idx, rsd);
+ total = runtime_stat_avg(st, STAT_CYCLES, map_idx, rsd);
if (total)
ratio = avg / total * 100.0;
@@ -515,7 +503,7 @@ static void print_stalled_cycles_frontend(struct perf_stat_config *config,
}
static void print_stalled_cycles_backend(struct perf_stat_config *config,
- int cpu_map_idx, double avg,
+ int map_idx, double avg,
struct perf_stat_output_ctx *out,
struct runtime_stat *st,
struct runtime_stat_data *rsd)
@@ -523,7 +511,7 @@ static void print_stalled_cycles_backend(struct perf_stat_config *config,
double total, ratio = 0.0;
const char *color;
- total = runtime_stat_avg(st, STAT_CYCLES, cpu_map_idx, rsd);
+ total = runtime_stat_avg(st, STAT_CYCLES, map_idx, rsd);
if (total)
ratio = avg / total * 100.0;
@@ -534,7 +522,7 @@ static void print_stalled_cycles_backend(struct perf_stat_config *config,
}
static void print_branch_misses(struct perf_stat_config *config,
- int cpu_map_idx, double avg,
+ int map_idx, double avg,
struct perf_stat_output_ctx *out,
struct runtime_stat *st,
struct runtime_stat_data *rsd)
@@ -542,7 +530,7 @@ static void print_branch_misses(struct perf_stat_config *config,
double total, ratio = 0.0;
const char *color;
- total = runtime_stat_avg(st, STAT_BRANCHES, cpu_map_idx, rsd);
+ total = runtime_stat_avg(st, STAT_BRANCHES, map_idx, rsd);
if (total)
ratio = avg / total * 100.0;
@@ -553,7 +541,7 @@ static void print_branch_misses(struct perf_stat_config *config,
}
static void print_l1_dcache_misses(struct perf_stat_config *config,
- int cpu_map_idx, double avg,
+ int map_idx, double avg,
struct perf_stat_output_ctx *out,
struct runtime_stat *st,
struct runtime_stat_data *rsd)
@@ -561,7 +549,7 @@ static void print_l1_dcache_misses(struct perf_stat_config *config,
double total, ratio = 0.0;
const char *color;
- total = runtime_stat_avg(st, STAT_L1_DCACHE, cpu_map_idx, rsd);
+ total = runtime_stat_avg(st, STAT_L1_DCACHE, map_idx, rsd);
if (total)
ratio = avg / total * 100.0;
@@ -572,7 +560,7 @@ static void print_l1_dcache_misses(struct perf_stat_config *config,
}
static void print_l1_icache_misses(struct perf_stat_config *config,
- int cpu_map_idx, double avg,
+ int map_idx, double avg,
struct perf_stat_output_ctx *out,
struct runtime_stat *st,
struct runtime_stat_data *rsd)
@@ -580,7 +568,7 @@ static void print_l1_icache_misses(struct perf_stat_config *config,
double total, ratio = 0.0;
const char *color;
- total = runtime_stat_avg(st, STAT_L1_ICACHE, cpu_map_idx, rsd);
+ total = runtime_stat_avg(st, STAT_L1_ICACHE, map_idx, rsd);
if (total)
ratio = avg / total * 100.0;
@@ -590,7 +578,7 @@ static void print_l1_icache_misses(struct perf_stat_config *config,
}
static void print_dtlb_cache_misses(struct perf_stat_config *config,
- int cpu_map_idx, double avg,
+ int map_idx, double avg,
struct perf_stat_output_ctx *out,
struct runtime_stat *st,
struct runtime_stat_data *rsd)
@@ -598,7 +586,7 @@ static void print_dtlb_cache_misses(struct perf_stat_config *config,
double total, ratio = 0.0;
const char *color;
- total = runtime_stat_avg(st, STAT_DTLB_CACHE, cpu_map_idx, rsd);
+ total = runtime_stat_avg(st, STAT_DTLB_CACHE, map_idx, rsd);
if (total)
ratio = avg / total * 100.0;
@@ -608,7 +596,7 @@ static void print_dtlb_cache_misses(struct perf_stat_config *config,
}
static void print_itlb_cache_misses(struct perf_stat_config *config,
- int cpu_map_idx, double avg,
+ int map_idx, double avg,
struct perf_stat_output_ctx *out,
struct runtime_stat *st,
struct runtime_stat_data *rsd)
@@ -616,7 +604,7 @@ static void print_itlb_cache_misses(struct perf_stat_config *config,
double total, ratio = 0.0;
const char *color;
- total = runtime_stat_avg(st, STAT_ITLB_CACHE, cpu_map_idx, rsd);
+ total = runtime_stat_avg(st, STAT_ITLB_CACHE, map_idx, rsd);
if (total)
ratio = avg / total * 100.0;
@@ -626,7 +614,7 @@ static void print_itlb_cache_misses(struct perf_stat_config *config,
}
static void print_ll_cache_misses(struct perf_stat_config *config,
- int cpu_map_idx, double avg,
+ int map_idx, double avg,
struct perf_stat_output_ctx *out,
struct runtime_stat *st,
struct runtime_stat_data *rsd)
@@ -634,7 +622,7 @@ static void print_ll_cache_misses(struct perf_stat_config *config,
double total, ratio = 0.0;
const char *color;
- total = runtime_stat_avg(st, STAT_LL_CACHE, cpu_map_idx, rsd);
+ total = runtime_stat_avg(st, STAT_LL_CACHE, map_idx, rsd);
if (total)
ratio = avg / total * 100.0;
@@ -692,61 +680,61 @@ static double sanitize_val(double x)
return x;
}
-static double td_total_slots(int cpu_map_idx, struct runtime_stat *st,
+static double td_total_slots(int map_idx, struct runtime_stat *st,
struct runtime_stat_data *rsd)
{
- return runtime_stat_avg(st, STAT_TOPDOWN_TOTAL_SLOTS, cpu_map_idx, rsd);
+ return runtime_stat_avg(st, STAT_TOPDOWN_TOTAL_SLOTS, map_idx, rsd);
}
-static double td_bad_spec(int cpu_map_idx, struct runtime_stat *st,
+static double td_bad_spec(int map_idx, struct runtime_stat *st,
struct runtime_stat_data *rsd)
{
double bad_spec = 0;
double total_slots;
double total;
- total = runtime_stat_avg(st, STAT_TOPDOWN_SLOTS_ISSUED, cpu_map_idx, rsd) -
- runtime_stat_avg(st, STAT_TOPDOWN_SLOTS_RETIRED, cpu_map_idx, rsd) +
- runtime_stat_avg(st, STAT_TOPDOWN_RECOVERY_BUBBLES, cpu_map_idx, rsd);
+ total = runtime_stat_avg(st, STAT_TOPDOWN_SLOTS_ISSUED, map_idx, rsd) -
+ runtime_stat_avg(st, STAT_TOPDOWN_SLOTS_RETIRED, map_idx, rsd) +
+ runtime_stat_avg(st, STAT_TOPDOWN_RECOVERY_BUBBLES, map_idx, rsd);
- total_slots = td_total_slots(cpu_map_idx, st, rsd);
+ total_slots = td_total_slots(map_idx, st, rsd);
if (total_slots)
bad_spec = total / total_slots;
return sanitize_val(bad_spec);
}
-static double td_retiring(int cpu_map_idx, struct runtime_stat *st,
+static double td_retiring(int map_idx, struct runtime_stat *st,
struct runtime_stat_data *rsd)
{
double retiring = 0;
- double total_slots = td_total_slots(cpu_map_idx, st, rsd);
+ double total_slots = td_total_slots(map_idx, st, rsd);
double ret_slots = runtime_stat_avg(st, STAT_TOPDOWN_SLOTS_RETIRED,
- cpu_map_idx, rsd);
+ map_idx, rsd);
if (total_slots)
retiring = ret_slots / total_slots;
return retiring;
}
-static double td_fe_bound(int cpu_map_idx, struct runtime_stat *st,
+static double td_fe_bound(int map_idx, struct runtime_stat *st,
struct runtime_stat_data *rsd)
{
double fe_bound = 0;
- double total_slots = td_total_slots(cpu_map_idx, st, rsd);
+ double total_slots = td_total_slots(map_idx, st, rsd);
double fetch_bub = runtime_stat_avg(st, STAT_TOPDOWN_FETCH_BUBBLES,
- cpu_map_idx, rsd);
+ map_idx, rsd);
if (total_slots)
fe_bound = fetch_bub / total_slots;
return fe_bound;
}
-static double td_be_bound(int cpu_map_idx, struct runtime_stat *st,
+static double td_be_bound(int map_idx, struct runtime_stat *st,
struct runtime_stat_data *rsd)
{
- double sum = (td_fe_bound(cpu_map_idx, st, rsd) +
- td_bad_spec(cpu_map_idx, st, rsd) +
- td_retiring(cpu_map_idx, st, rsd));
+ double sum = (td_fe_bound(map_idx, st, rsd) +
+ td_bad_spec(map_idx, st, rsd) +
+ td_retiring(map_idx, st, rsd));
if (sum == 0)
return 0;
return sanitize_val(1.0 - sum);
@@ -757,15 +745,15 @@ static double td_be_bound(int cpu_map_idx, struct runtime_stat *st,
* the ratios we need to recreate the sum.
*/
-static double td_metric_ratio(int cpu_map_idx, enum stat_type type,
+static double td_metric_ratio(int map_idx, enum stat_type type,
struct runtime_stat *stat,
struct runtime_stat_data *rsd)
{
- double sum = runtime_stat_avg(stat, STAT_TOPDOWN_RETIRING, cpu_map_idx, rsd) +
- runtime_stat_avg(stat, STAT_TOPDOWN_FE_BOUND, cpu_map_idx, rsd) +
- runtime_stat_avg(stat, STAT_TOPDOWN_BE_BOUND, cpu_map_idx, rsd) +
- runtime_stat_avg(stat, STAT_TOPDOWN_BAD_SPEC, cpu_map_idx, rsd);
- double d = runtime_stat_avg(stat, type, cpu_map_idx, rsd);
+ double sum = runtime_stat_avg(stat, STAT_TOPDOWN_RETIRING, map_idx, rsd) +
+ runtime_stat_avg(stat, STAT_TOPDOWN_FE_BOUND, map_idx, rsd) +
+ runtime_stat_avg(stat, STAT_TOPDOWN_BE_BOUND, map_idx, rsd) +
+ runtime_stat_avg(stat, STAT_TOPDOWN_BAD_SPEC, map_idx, rsd);
+ double d = runtime_stat_avg(stat, type, map_idx, rsd);
if (sum)
return d / sum;
@@ -777,23 +765,23 @@ static double td_metric_ratio(int cpu_map_idx, enum stat_type type,
* We allow two missing.
*/
-static bool full_td(int cpu_map_idx, struct runtime_stat *stat,
+static bool full_td(int map_idx, struct runtime_stat *stat,
struct runtime_stat_data *rsd)
{
int c = 0;
- if (runtime_stat_avg(stat, STAT_TOPDOWN_RETIRING, cpu_map_idx, rsd) > 0)
+ if (runtime_stat_avg(stat, STAT_TOPDOWN_RETIRING, map_idx, rsd) > 0)
c++;
- if (runtime_stat_avg(stat, STAT_TOPDOWN_BE_BOUND, cpu_map_idx, rsd) > 0)
+ if (runtime_stat_avg(stat, STAT_TOPDOWN_BE_BOUND, map_idx, rsd) > 0)
c++;
- if (runtime_stat_avg(stat, STAT_TOPDOWN_FE_BOUND, cpu_map_idx, rsd) > 0)
+ if (runtime_stat_avg(stat, STAT_TOPDOWN_FE_BOUND, map_idx, rsd) > 0)
c++;
- if (runtime_stat_avg(stat, STAT_TOPDOWN_BAD_SPEC, cpu_map_idx, rsd) > 0)
+ if (runtime_stat_avg(stat, STAT_TOPDOWN_BAD_SPEC, map_idx, rsd) > 0)
c++;
return c >= 2;
}
-static void print_smi_cost(struct perf_stat_config *config, int cpu_map_idx,
+static void print_smi_cost(struct perf_stat_config *config, int map_idx,
struct perf_stat_output_ctx *out,
struct runtime_stat *st,
struct runtime_stat_data *rsd)
@@ -801,9 +789,9 @@ static void print_smi_cost(struct perf_stat_config *config, int cpu_map_idx,
double smi_num, aperf, cycles, cost = 0.0;
const char *color = NULL;
- smi_num = runtime_stat_avg(st, STAT_SMI_NUM, cpu_map_idx, rsd);
- aperf = runtime_stat_avg(st, STAT_APERF, cpu_map_idx, rsd);
- cycles = runtime_stat_avg(st, STAT_CYCLES, cpu_map_idx, rsd);
+ smi_num = runtime_stat_avg(st, STAT_SMI_NUM, map_idx, rsd);
+ aperf = runtime_stat_avg(st, STAT_APERF, map_idx, rsd);
+ cycles = runtime_stat_avg(st, STAT_CYCLES, map_idx, rsd);
if ((cycles == 0) || (aperf == 0))
return;
@@ -820,7 +808,7 @@ static void print_smi_cost(struct perf_stat_config *config, int cpu_map_idx,
static int prepare_metric(struct evsel **metric_events,
struct metric_ref *metric_refs,
struct expr_parse_ctx *pctx,
- int cpu_map_idx,
+ int map_idx,
struct runtime_stat *st)
{
double scale;
@@ -859,17 +847,22 @@ static int prepare_metric(struct evsel **metric_events,
abort();
}
} else {
- v = saved_value_lookup(metric_events[i], cpu_map_idx, false,
+ v = saved_value_lookup(metric_events[i], map_idx, false,
STAT_NONE, 0, st,
metric_events[i]->cgrp);
if (!v)
break;
stats = &v->stats;
- scale = 1.0;
+ /*
+ * If an event was scaled during stat gathering, reverse
+ * the scale before computing the metric.
+ */
+ scale = 1.0 / metric_events[i]->scale;
+
source_count = evsel__source_count(metric_events[i]);
if (v->metric_other)
- metric_total = v->metric_total;
+ metric_total = v->metric_total * scale;
}
n = strdup(evsel__metric_id(metric_events[i]));
if (!n)
@@ -897,7 +890,7 @@ static void generic_metric(struct perf_stat_config *config,
const char *metric_name,
const char *metric_unit,
int runtime,
- int cpu_map_idx,
+ int map_idx,
struct perf_stat_output_ctx *out,
struct runtime_stat *st)
{
@@ -911,8 +904,11 @@ static void generic_metric(struct perf_stat_config *config,
if (!pctx)
return;
- pctx->runtime = runtime;
- i = prepare_metric(metric_events, metric_refs, pctx, cpu_map_idx, st);
+ if (config->user_requested_cpu_list)
+ pctx->sctx.user_requested_cpu_list = strdup(config->user_requested_cpu_list);
+ pctx->sctx.runtime = runtime;
+ pctx->sctx.system_wide = config->system_wide;
+ i = prepare_metric(metric_events, metric_refs, pctx, map_idx, st);
if (i < 0) {
expr__ctx_free(pctx);
return;
@@ -957,7 +953,7 @@ static void generic_metric(struct perf_stat_config *config,
expr__ctx_free(pctx);
}
-double test_generic_metric(struct metric_expr *mexp, int cpu_map_idx, struct runtime_stat *st)
+double test_generic_metric(struct metric_expr *mexp, int map_idx, struct runtime_stat *st)
{
struct expr_parse_ctx *pctx;
double ratio = 0.0;
@@ -966,7 +962,7 @@ double test_generic_metric(struct metric_expr *mexp, int cpu_map_idx, struct run
if (!pctx)
return NAN;
- if (prepare_metric(mexp->metric_events, mexp->metric_refs, pctx, cpu_map_idx, st) < 0)
+ if (prepare_metric(mexp->metric_events, mexp->metric_refs, pctx, map_idx, st) < 0)
goto out;
if (expr__parse(&ratio, pctx, mexp->metric_expr))
@@ -979,7 +975,7 @@ out:
void perf_stat__print_shadow_stats(struct perf_stat_config *config,
struct evsel *evsel,
- double avg, int cpu_map_idx,
+ double avg, int map_idx,
struct perf_stat_output_ctx *out,
struct rblist *metric_events,
struct runtime_stat *st)
@@ -998,7 +994,7 @@ void perf_stat__print_shadow_stats(struct perf_stat_config *config,
if (config->iostat_run) {
iostat_print_metric(config, evsel, out);
} else if (evsel__match(evsel, HARDWARE, HW_INSTRUCTIONS)) {
- total = runtime_stat_avg(st, STAT_CYCLES, cpu_map_idx, &rsd);
+ total = runtime_stat_avg(st, STAT_CYCLES, map_idx, &rsd);
if (total) {
ratio = avg / total;
@@ -1008,11 +1004,11 @@ void perf_stat__print_shadow_stats(struct perf_stat_config *config,
print_metric(config, ctxp, NULL, NULL, "insn per cycle", 0);
}
- total = runtime_stat_avg(st, STAT_STALLED_CYCLES_FRONT, cpu_map_idx, &rsd);
+ total = runtime_stat_avg(st, STAT_STALLED_CYCLES_FRONT, map_idx, &rsd);
total = max(total, runtime_stat_avg(st,
STAT_STALLED_CYCLES_BACK,
- cpu_map_idx, &rsd));
+ map_idx, &rsd));
if (total && avg) {
out->new_line(config, ctxp);
@@ -1022,8 +1018,8 @@ void perf_stat__print_shadow_stats(struct perf_stat_config *config,
ratio);
}
} else if (evsel__match(evsel, HARDWARE, HW_BRANCH_MISSES)) {
- if (runtime_stat_n(st, STAT_BRANCHES, cpu_map_idx, &rsd) != 0)
- print_branch_misses(config, cpu_map_idx, avg, out, st, &rsd);
+ if (runtime_stat_n(st, STAT_BRANCHES, map_idx, &rsd) != 0)
+ print_branch_misses(config, map_idx, avg, out, st, &rsd);
else
print_metric(config, ctxp, NULL, NULL, "of all branches", 0);
} else if (
@@ -1032,8 +1028,8 @@ void perf_stat__print_shadow_stats(struct perf_stat_config *config,
((PERF_COUNT_HW_CACHE_OP_READ) << 8) |
((PERF_COUNT_HW_CACHE_RESULT_MISS) << 16))) {
- if (runtime_stat_n(st, STAT_L1_DCACHE, cpu_map_idx, &rsd) != 0)
- print_l1_dcache_misses(config, cpu_map_idx, avg, out, st, &rsd);
+ if (runtime_stat_n(st, STAT_L1_DCACHE, map_idx, &rsd) != 0)
+ print_l1_dcache_misses(config, map_idx, avg, out, st, &rsd);
else
print_metric(config, ctxp, NULL, NULL, "of all L1-dcache accesses", 0);
} else if (
@@ -1042,8 +1038,8 @@ void perf_stat__print_shadow_stats(struct perf_stat_config *config,
((PERF_COUNT_HW_CACHE_OP_READ) << 8) |
((PERF_COUNT_HW_CACHE_RESULT_MISS) << 16))) {
- if (runtime_stat_n(st, STAT_L1_ICACHE, cpu_map_idx, &rsd) != 0)
- print_l1_icache_misses(config, cpu_map_idx, avg, out, st, &rsd);
+ if (runtime_stat_n(st, STAT_L1_ICACHE, map_idx, &rsd) != 0)
+ print_l1_icache_misses(config, map_idx, avg, out, st, &rsd);
else
print_metric(config, ctxp, NULL, NULL, "of all L1-icache accesses", 0);
} else if (
@@ -1052,8 +1048,8 @@ void perf_stat__print_shadow_stats(struct perf_stat_config *config,
((PERF_COUNT_HW_CACHE_OP_READ) << 8) |
((PERF_COUNT_HW_CACHE_RESULT_MISS) << 16))) {
- if (runtime_stat_n(st, STAT_DTLB_CACHE, cpu_map_idx, &rsd) != 0)
- print_dtlb_cache_misses(config, cpu_map_idx, avg, out, st, &rsd);
+ if (runtime_stat_n(st, STAT_DTLB_CACHE, map_idx, &rsd) != 0)
+ print_dtlb_cache_misses(config, map_idx, avg, out, st, &rsd);
else
print_metric(config, ctxp, NULL, NULL, "of all dTLB cache accesses", 0);
} else if (
@@ -1062,8 +1058,8 @@ void perf_stat__print_shadow_stats(struct perf_stat_config *config,
((PERF_COUNT_HW_CACHE_OP_READ) << 8) |
((PERF_COUNT_HW_CACHE_RESULT_MISS) << 16))) {
- if (runtime_stat_n(st, STAT_ITLB_CACHE, cpu_map_idx, &rsd) != 0)
- print_itlb_cache_misses(config, cpu_map_idx, avg, out, st, &rsd);
+ if (runtime_stat_n(st, STAT_ITLB_CACHE, map_idx, &rsd) != 0)
+ print_itlb_cache_misses(config, map_idx, avg, out, st, &rsd);
else
print_metric(config, ctxp, NULL, NULL, "of all iTLB cache accesses", 0);
} else if (
@@ -1072,27 +1068,27 @@ void perf_stat__print_shadow_stats(struct perf_stat_config *config,
((PERF_COUNT_HW_CACHE_OP_READ) << 8) |
((PERF_COUNT_HW_CACHE_RESULT_MISS) << 16))) {
- if (runtime_stat_n(st, STAT_LL_CACHE, cpu_map_idx, &rsd) != 0)
- print_ll_cache_misses(config, cpu_map_idx, avg, out, st, &rsd);
+ if (runtime_stat_n(st, STAT_LL_CACHE, map_idx, &rsd) != 0)
+ print_ll_cache_misses(config, map_idx, avg, out, st, &rsd);
else
print_metric(config, ctxp, NULL, NULL, "of all LL-cache accesses", 0);
} else if (evsel__match(evsel, HARDWARE, HW_CACHE_MISSES)) {
- total = runtime_stat_avg(st, STAT_CACHEREFS, cpu_map_idx, &rsd);
+ total = runtime_stat_avg(st, STAT_CACHEREFS, map_idx, &rsd);
if (total)
ratio = avg * 100 / total;
- if (runtime_stat_n(st, STAT_CACHEREFS, cpu_map_idx, &rsd) != 0)
+ if (runtime_stat_n(st, STAT_CACHEREFS, map_idx, &rsd) != 0)
print_metric(config, ctxp, NULL, "%8.3f %%",
"of all cache refs", ratio);
else
print_metric(config, ctxp, NULL, NULL, "of all cache refs", 0);
} else if (evsel__match(evsel, HARDWARE, HW_STALLED_CYCLES_FRONTEND)) {
- print_stalled_cycles_frontend(config, cpu_map_idx, avg, out, st, &rsd);
+ print_stalled_cycles_frontend(config, map_idx, avg, out, st, &rsd);
} else if (evsel__match(evsel, HARDWARE, HW_STALLED_CYCLES_BACKEND)) {
- print_stalled_cycles_backend(config, cpu_map_idx, avg, out, st, &rsd);
+ print_stalled_cycles_backend(config, map_idx, avg, out, st, &rsd);
} else if (evsel__match(evsel, HARDWARE, HW_CPU_CYCLES)) {
- total = runtime_stat_avg(st, STAT_NSECS, cpu_map_idx, &rsd);
+ total = runtime_stat_avg(st, STAT_NSECS, map_idx, &rsd);
if (total) {
ratio = avg / total;
@@ -1101,7 +1097,7 @@ void perf_stat__print_shadow_stats(struct perf_stat_config *config,
print_metric(config, ctxp, NULL, NULL, "Ghz", 0);
}
} else if (perf_stat_evsel__is(evsel, CYCLES_IN_TX)) {
- total = runtime_stat_avg(st, STAT_CYCLES, cpu_map_idx, &rsd);
+ total = runtime_stat_avg(st, STAT_CYCLES, map_idx, &rsd);
if (total)
print_metric(config, ctxp, NULL,
@@ -1111,8 +1107,8 @@ void perf_stat__print_shadow_stats(struct perf_stat_config *config,
print_metric(config, ctxp, NULL, NULL, "transactional cycles",
0);
} else if (perf_stat_evsel__is(evsel, CYCLES_IN_TX_CP)) {
- total = runtime_stat_avg(st, STAT_CYCLES, cpu_map_idx, &rsd);
- total2 = runtime_stat_avg(st, STAT_CYCLES_IN_TX, cpu_map_idx, &rsd);
+ total = runtime_stat_avg(st, STAT_CYCLES, map_idx, &rsd);
+ total2 = runtime_stat_avg(st, STAT_CYCLES_IN_TX, map_idx, &rsd);
if (total2 < avg)
total2 = avg;
@@ -1122,19 +1118,19 @@ void perf_stat__print_shadow_stats(struct perf_stat_config *config,
else
print_metric(config, ctxp, NULL, NULL, "aborted cycles", 0);
} else if (perf_stat_evsel__is(evsel, TRANSACTION_START)) {
- total = runtime_stat_avg(st, STAT_CYCLES_IN_TX, cpu_map_idx, &rsd);
+ total = runtime_stat_avg(st, STAT_CYCLES_IN_TX, map_idx, &rsd);
if (avg)
ratio = total / avg;
- if (runtime_stat_n(st, STAT_CYCLES_IN_TX, cpu_map_idx, &rsd) != 0)
+ if (runtime_stat_n(st, STAT_CYCLES_IN_TX, map_idx, &rsd) != 0)
print_metric(config, ctxp, NULL, "%8.0f",
"cycles / transaction", ratio);
else
print_metric(config, ctxp, NULL, NULL, "cycles / transaction",
0);
} else if (perf_stat_evsel__is(evsel, ELISION_START)) {
- total = runtime_stat_avg(st, STAT_CYCLES_IN_TX, cpu_map_idx, &rsd);
+ total = runtime_stat_avg(st, STAT_CYCLES_IN_TX, map_idx, &rsd);
if (avg)
ratio = total / avg;
@@ -1147,28 +1143,28 @@ void perf_stat__print_shadow_stats(struct perf_stat_config *config,
else
print_metric(config, ctxp, NULL, NULL, "CPUs utilized", 0);
} else if (perf_stat_evsel__is(evsel, TOPDOWN_FETCH_BUBBLES)) {
- double fe_bound = td_fe_bound(cpu_map_idx, st, &rsd);
+ double fe_bound = td_fe_bound(map_idx, st, &rsd);
if (fe_bound > 0.2)
color = PERF_COLOR_RED;
print_metric(config, ctxp, color, "%8.1f%%", "frontend bound",
fe_bound * 100.);
} else if (perf_stat_evsel__is(evsel, TOPDOWN_SLOTS_RETIRED)) {
- double retiring = td_retiring(cpu_map_idx, st, &rsd);
+ double retiring = td_retiring(map_idx, st, &rsd);
if (retiring > 0.7)
color = PERF_COLOR_GREEN;
print_metric(config, ctxp, color, "%8.1f%%", "retiring",
retiring * 100.);
} else if (perf_stat_evsel__is(evsel, TOPDOWN_RECOVERY_BUBBLES)) {
- double bad_spec = td_bad_spec(cpu_map_idx, st, &rsd);
+ double bad_spec = td_bad_spec(map_idx, st, &rsd);
if (bad_spec > 0.1)
color = PERF_COLOR_RED;
print_metric(config, ctxp, color, "%8.1f%%", "bad speculation",
bad_spec * 100.);
} else if (perf_stat_evsel__is(evsel, TOPDOWN_SLOTS_ISSUED)) {
- double be_bound = td_be_bound(cpu_map_idx, st, &rsd);
+ double be_bound = td_be_bound(map_idx, st, &rsd);
const char *name = "backend bound";
static int have_recovery_bubbles = -1;
@@ -1181,14 +1177,14 @@ void perf_stat__print_shadow_stats(struct perf_stat_config *config,
if (be_bound > 0.2)
color = PERF_COLOR_RED;
- if (td_total_slots(cpu_map_idx, st, &rsd) > 0)
+ if (td_total_slots(map_idx, st, &rsd) > 0)
print_metric(config, ctxp, color, "%8.1f%%", name,
be_bound * 100.);
else
print_metric(config, ctxp, NULL, NULL, name, 0);
} else if (perf_stat_evsel__is(evsel, TOPDOWN_RETIRING) &&
- full_td(cpu_map_idx, st, &rsd)) {
- double retiring = td_metric_ratio(cpu_map_idx,
+ full_td(map_idx, st, &rsd)) {
+ double retiring = td_metric_ratio(map_idx,
STAT_TOPDOWN_RETIRING, st,
&rsd);
if (retiring > 0.7)
@@ -1196,8 +1192,8 @@ void perf_stat__print_shadow_stats(struct perf_stat_config *config,
print_metric(config, ctxp, color, "%8.1f%%", "Retiring",
retiring * 100.);
} else if (perf_stat_evsel__is(evsel, TOPDOWN_FE_BOUND) &&
- full_td(cpu_map_idx, st, &rsd)) {
- double fe_bound = td_metric_ratio(cpu_map_idx,
+ full_td(map_idx, st, &rsd)) {
+ double fe_bound = td_metric_ratio(map_idx,
STAT_TOPDOWN_FE_BOUND, st,
&rsd);
if (fe_bound > 0.2)
@@ -1205,8 +1201,8 @@ void perf_stat__print_shadow_stats(struct perf_stat_config *config,
print_metric(config, ctxp, color, "%8.1f%%", "Frontend Bound",
fe_bound * 100.);
} else if (perf_stat_evsel__is(evsel, TOPDOWN_BE_BOUND) &&
- full_td(cpu_map_idx, st, &rsd)) {
- double be_bound = td_metric_ratio(cpu_map_idx,
+ full_td(map_idx, st, &rsd)) {
+ double be_bound = td_metric_ratio(map_idx,
STAT_TOPDOWN_BE_BOUND, st,
&rsd);
if (be_bound > 0.2)
@@ -1214,8 +1210,8 @@ void perf_stat__print_shadow_stats(struct perf_stat_config *config,
print_metric(config, ctxp, color, "%8.1f%%", "Backend Bound",
be_bound * 100.);
} else if (perf_stat_evsel__is(evsel, TOPDOWN_BAD_SPEC) &&
- full_td(cpu_map_idx, st, &rsd)) {
- double bad_spec = td_metric_ratio(cpu_map_idx,
+ full_td(map_idx, st, &rsd)) {
+ double bad_spec = td_metric_ratio(map_idx,
STAT_TOPDOWN_BAD_SPEC, st,
&rsd);
if (bad_spec > 0.1)
@@ -1223,11 +1219,11 @@ void perf_stat__print_shadow_stats(struct perf_stat_config *config,
print_metric(config, ctxp, color, "%8.1f%%", "Bad Speculation",
bad_spec * 100.);
} else if (perf_stat_evsel__is(evsel, TOPDOWN_HEAVY_OPS) &&
- full_td(cpu_map_idx, st, &rsd) && (config->topdown_level > 1)) {
- double retiring = td_metric_ratio(cpu_map_idx,
+ full_td(map_idx, st, &rsd) && (config->topdown_level > 1)) {
+ double retiring = td_metric_ratio(map_idx,
STAT_TOPDOWN_RETIRING, st,
&rsd);
- double heavy_ops = td_metric_ratio(cpu_map_idx,
+ double heavy_ops = td_metric_ratio(map_idx,
STAT_TOPDOWN_HEAVY_OPS, st,
&rsd);
double light_ops = retiring - heavy_ops;
@@ -1243,11 +1239,11 @@ void perf_stat__print_shadow_stats(struct perf_stat_config *config,
print_metric(config, ctxp, color, "%8.1f%%", "Light Operations",
light_ops * 100.);
} else if (perf_stat_evsel__is(evsel, TOPDOWN_BR_MISPREDICT) &&
- full_td(cpu_map_idx, st, &rsd) && (config->topdown_level > 1)) {
- double bad_spec = td_metric_ratio(cpu_map_idx,
+ full_td(map_idx, st, &rsd) && (config->topdown_level > 1)) {
+ double bad_spec = td_metric_ratio(map_idx,
STAT_TOPDOWN_BAD_SPEC, st,
&rsd);
- double br_mis = td_metric_ratio(cpu_map_idx,
+ double br_mis = td_metric_ratio(map_idx,
STAT_TOPDOWN_BR_MISPREDICT, st,
&rsd);
double m_clears = bad_spec - br_mis;
@@ -1263,11 +1259,11 @@ void perf_stat__print_shadow_stats(struct perf_stat_config *config,
print_metric(config, ctxp, color, "%8.1f%%", "Machine Clears",
m_clears * 100.);
} else if (perf_stat_evsel__is(evsel, TOPDOWN_FETCH_LAT) &&
- full_td(cpu_map_idx, st, &rsd) && (config->topdown_level > 1)) {
- double fe_bound = td_metric_ratio(cpu_map_idx,
+ full_td(map_idx, st, &rsd) && (config->topdown_level > 1)) {
+ double fe_bound = td_metric_ratio(map_idx,
STAT_TOPDOWN_FE_BOUND, st,
&rsd);
- double fetch_lat = td_metric_ratio(cpu_map_idx,
+ double fetch_lat = td_metric_ratio(map_idx,
STAT_TOPDOWN_FETCH_LAT, st,
&rsd);
double fetch_bw = fe_bound - fetch_lat;
@@ -1283,11 +1279,11 @@ void perf_stat__print_shadow_stats(struct perf_stat_config *config,
print_metric(config, ctxp, color, "%8.1f%%", "Fetch Bandwidth",
fetch_bw * 100.);
} else if (perf_stat_evsel__is(evsel, TOPDOWN_MEM_BOUND) &&
- full_td(cpu_map_idx, st, &rsd) && (config->topdown_level > 1)) {
- double be_bound = td_metric_ratio(cpu_map_idx,
+ full_td(map_idx, st, &rsd) && (config->topdown_level > 1)) {
+ double be_bound = td_metric_ratio(map_idx,
STAT_TOPDOWN_BE_BOUND, st,
&rsd);
- double mem_bound = td_metric_ratio(cpu_map_idx,
+ double mem_bound = td_metric_ratio(map_idx,
STAT_TOPDOWN_MEM_BOUND, st,
&rsd);
double core_bound = be_bound - mem_bound;
@@ -1304,12 +1300,13 @@ void perf_stat__print_shadow_stats(struct perf_stat_config *config,
core_bound * 100.);
} else if (evsel->metric_expr) {
generic_metric(config, evsel->metric_expr, evsel->metric_events, NULL,
- evsel->name, evsel->metric_name, NULL, 1, cpu_map_idx, out, st);
- } else if (runtime_stat_n(st, STAT_NSECS, cpu_map_idx, &rsd) != 0) {
+ evsel->name, evsel->metric_name, NULL, 1,
+ map_idx, out, st);
+ } else if (runtime_stat_n(st, STAT_NSECS, map_idx, &rsd) != 0) {
char unit = ' ';
char unit_buf[10] = "/sec";
- total = runtime_stat_avg(st, STAT_NSECS, cpu_map_idx, &rsd);
+ total = runtime_stat_avg(st, STAT_NSECS, map_idx, &rsd);
if (total)
ratio = convert_unit_double(1000000000.0 * avg / total, &unit);
@@ -1317,7 +1314,7 @@ void perf_stat__print_shadow_stats(struct perf_stat_config *config,
snprintf(unit_buf, sizeof(unit_buf), "%c/sec", unit);
print_metric(config, ctxp, NULL, "%8.3f", unit_buf, ratio);
} else if (perf_stat_evsel__is(evsel, SMI_NUM)) {
- print_smi_cost(config, cpu_map_idx, out, st, &rsd);
+ print_smi_cost(config, map_idx, out, st, &rsd);
} else {
num = 0;
}
@@ -1329,8 +1326,9 @@ void perf_stat__print_shadow_stats(struct perf_stat_config *config,
if (num++ > 0)
out->new_line(config, ctxp);
generic_metric(config, mexp->metric_expr, mexp->metric_events,
- mexp->metric_refs, evsel->name, mexp->metric_name,
- mexp->metric_unit, mexp->runtime, cpu_map_idx, out, st);
+ mexp->metric_refs, evsel->name, mexp->metric_name,
+ mexp->metric_unit, mexp->runtime,
+ map_idx, out, st);
}
}
if (num == 0)
diff --git a/tools/perf/util/stat.c b/tools/perf/util/stat.c
index 0882b4754fcf..8ec8bb4a9912 100644
--- a/tools/perf/util/stat.c
+++ b/tools/perf/util/stat.c
@@ -14,7 +14,11 @@
#include "evlist.h"
#include "evsel.h"
#include "thread_map.h"
-#include "hashmap.h"
+#ifdef HAVE_LIBBPF_SUPPORT
+#include <bpf/hashmap.h>
+#else
+#include "util/hashmap.h"
+#endif
#include <linux/zalloc.h>
void update_stats(struct stats *stats, u64 val)
@@ -128,13 +132,9 @@ static void perf_stat_evsel_id_init(struct evsel *evsel)
static void evsel__reset_stat_priv(struct evsel *evsel)
{
- int i;
struct perf_stat_evsel *ps = evsel->stats;
- for (i = 0; i < 3; i++)
- init_stats(&ps->res_stats[i]);
-
- perf_stat_evsel_id_init(evsel);
+ init_stats(&ps->res_stats);
}
static int evsel__alloc_stat_priv(struct evsel *evsel)
@@ -142,6 +142,7 @@ static int evsel__alloc_stat_priv(struct evsel *evsel)
evsel->stats = zalloc(sizeof(struct perf_stat_evsel));
if (evsel->stats == NULL)
return -ENOMEM;
+ perf_stat_evsel_id_init(evsel);
evsel__reset_stat_priv(evsel);
return 0;
}
@@ -388,12 +389,8 @@ process_counter_values(struct perf_stat_config *config, struct evsel *evsel,
}
if (config->aggr_mode == AGGR_THREAD) {
- if (config->stats)
- perf_stat__update_shadow_stats(evsel,
- count->val, 0, &config->stats[thread]);
- else
- perf_stat__update_shadow_stats(evsel,
- count->val, 0, &rt_stat);
+ perf_stat__update_shadow_stats(evsel, count->val,
+ thread, &rt_stat);
}
break;
case AGGR_GLOBAL:
@@ -416,9 +413,6 @@ static int process_counter_maps(struct perf_stat_config *config,
int ncpus = evsel__nr_cpus(counter);
int idx, thread;
- if (counter->core.system_wide)
- nthreads = 1;
-
for (thread = 0; thread < nthreads; thread++) {
for (idx = 0; idx < ncpus; idx++) {
if (process_counter_values(config, counter, idx, thread,
@@ -436,7 +430,7 @@ int perf_stat_process_counter(struct perf_stat_config *config,
struct perf_counts_values *aggr = &counter->counts->aggr;
struct perf_stat_evsel *ps = counter->stats;
u64 *count = counter->counts->aggr.values;
- int i, ret;
+ int ret;
aggr->val = aggr->ena = aggr->run = 0;
@@ -454,8 +448,7 @@ int perf_stat_process_counter(struct perf_stat_config *config,
evsel__compute_deltas(counter, -1, -1, aggr);
perf_counts_values__scale(aggr, config->scale, &counter->counts->scaled);
- for (i = 0; i < 3; i++)
- update_stats(&ps->res_stats[i], count[i]);
+ update_stats(&ps->res_stats, *count);
if (verbose > 0) {
fprintf(config->output, "%s: %" PRIu64 " %" PRIu64 " %" PRIu64 "\n",
diff --git a/tools/perf/util/stat.h b/tools/perf/util/stat.h
index 668250022f8c..b0899c6e002f 100644
--- a/tools/perf/util/stat.h
+++ b/tools/perf/util/stat.h
@@ -43,7 +43,7 @@ enum perf_stat_evsel_id {
};
struct perf_stat_evsel {
- struct stats res_stats[3];
+ struct stats res_stats;
enum perf_stat_evsel_id id;
u64 *group_data;
};
@@ -141,6 +141,8 @@ struct perf_stat_config {
bool stop_read_counter;
bool quiet;
bool iostat_run;
+ char *user_requested_cpu_list;
+ bool system_wide;
FILE *output;
unsigned int interval;
unsigned int timeout;
@@ -151,8 +153,6 @@ struct perf_stat_config {
int run_count;
int print_free_counters_hint;
int print_mixed_hw_group_error;
- struct runtime_stat *stats;
- int stats_num;
const char *csv_sep;
struct stats *walltime_nsecs_stats;
struct rusage ru_data;
@@ -232,7 +232,7 @@ void perf_stat__init_shadow_stats(void);
void perf_stat__reset_shadow_stats(void);
void perf_stat__reset_shadow_per_stat(struct runtime_stat *st);
void perf_stat__update_shadow_stats(struct evsel *counter, u64 count,
- int cpu_map_idx, struct runtime_stat *st);
+ int map_idx, struct runtime_stat *st);
struct perf_stat_output_ctx {
void *ctx;
print_metric_t print_metric;
@@ -242,7 +242,7 @@ struct perf_stat_output_ctx {
void perf_stat__print_shadow_stats(struct perf_stat_config *config,
struct evsel *evsel,
- double avg, int cpu,
+ double avg, int map_idx,
struct perf_stat_output_ctx *out,
struct rblist *metric_events,
struct runtime_stat *st);
@@ -277,5 +277,5 @@ void evlist__print_counters(struct evlist *evlist, struct perf_stat_config *conf
struct target *_target, struct timespec *ts, int argc, const char **argv);
struct metric_expr;
-double test_generic_metric(struct metric_expr *mexp, int cpu_map_idx, struct runtime_stat *st);
+double test_generic_metric(struct metric_expr *mexp, int map_idx, struct runtime_stat *st);
#endif
diff --git a/tools/perf/util/string.c b/tools/perf/util/string.c
index f6d90cdd9225..4f12a96f33cc 100644
--- a/tools/perf/util/string.c
+++ b/tools/perf/util/string.c
@@ -15,7 +15,6 @@ const char *dots =
"....................................................................."
".....................................................................";
-#define K 1024LL
/*
* perf_atoll()
* Parse (\d+)(b|B|kb|KB|mb|MB|gb|GB|tb|TB) (e.g. "256MB")
diff --git a/tools/perf/util/symbol.c b/tools/perf/util/symbol.c
index a4b22caa7c24..a3a165ae933a 100644
--- a/tools/perf/util/symbol.c
+++ b/tools/perf/util/symbol.c
@@ -1791,6 +1791,7 @@ int dso__load(struct dso *dso, struct map *map)
char newmapname[PATH_MAX];
const char *map_path = dso->long_name;
+ mutex_lock(&dso->lock);
perfmap = strncmp(dso->name, "/tmp/perf-", 10) == 0;
if (perfmap) {
if (dso->nsinfo && (dso__find_perf_map(newmapname,
@@ -1800,7 +1801,6 @@ int dso__load(struct dso *dso, struct map *map)
}
nsinfo__mountns_enter(dso->nsinfo, &nsc);
- pthread_mutex_lock(&dso->lock);
/* check again under the dso->lock */
if (dso__loaded(dso)) {
@@ -1964,7 +1964,7 @@ out_free:
ret = 0;
out:
dso__set_loaded(dso);
- pthread_mutex_unlock(&dso->lock);
+ mutex_unlock(&dso->lock);
nsinfo__mountns_exit(&nsc);
return ret;
diff --git a/tools/perf/util/synthetic-events.c b/tools/perf/util/synthetic-events.c
index 538790758e24..cccd293b5312 100644
--- a/tools/perf/util/synthetic-events.c
+++ b/tools/perf/util/synthetic-events.c
@@ -364,11 +364,14 @@ static bool read_proc_maps_line(struct io *io, __u64 *start, __u64 *end,
}
static void perf_record_mmap2__read_build_id(struct perf_record_mmap2 *event,
+ struct machine *machine,
bool is_kernel)
{
struct build_id bid;
struct nsinfo *nsi;
struct nscookie nc;
+ struct dso *dso = NULL;
+ struct dso_id id;
int rc;
if (is_kernel) {
@@ -376,6 +379,18 @@ static void perf_record_mmap2__read_build_id(struct perf_record_mmap2 *event,
goto out;
}
+ id.maj = event->maj;
+ id.min = event->min;
+ id.ino = event->ino;
+ id.ino_generation = event->ino_generation;
+
+ dso = dsos__findnew_id(&machine->dsos, event->filename, &id);
+ if (dso && dso->has_build_id) {
+ bid = dso->bid;
+ rc = 0;
+ goto out;
+ }
+
nsi = nsinfo__new(event->pid);
nsinfo__mountns_enter(nsi, &nc);
@@ -391,12 +406,16 @@ out:
event->header.misc |= PERF_RECORD_MISC_MMAP_BUILD_ID;
event->__reserved_1 = 0;
event->__reserved_2 = 0;
+
+ if (dso && !dso->has_build_id)
+ dso__set_build_id(dso, &bid);
} else {
if (event->filename[0] == '/') {
pr_debug2("Failed to read build ID for %s\n",
event->filename);
}
}
+ dso__put(dso);
}
int perf_event__synthesize_mmap_events(struct perf_tool *tool,
@@ -507,7 +526,7 @@ out:
event->mmap2.tid = pid;
if (symbol_conf.buildid_mmap2)
- perf_record_mmap2__read_build_id(&event->mmap2, false);
+ perf_record_mmap2__read_build_id(&event->mmap2, machine, false);
if (perf_tool__process_synth_event(tool, event, machine, process) != 0) {
rc = -1;
@@ -690,7 +709,7 @@ int perf_event__synthesize_modules(struct perf_tool *tool, perf_event__handler_t
memcpy(event->mmap2.filename, pos->dso->long_name,
pos->dso->long_name_len + 1);
- perf_record_mmap2__read_build_id(&event->mmap2, false);
+ perf_record_mmap2__read_build_id(&event->mmap2, machine, false);
} else {
size = PERF_ALIGN(pos->dso->long_name_len + 1, sizeof(u64));
event->mmap.header.type = PERF_RECORD_MMAP;
@@ -1126,7 +1145,7 @@ static int __perf_event__synthesize_kernel_mmap(struct perf_tool *tool,
event->mmap2.len = map->end - event->mmap.start;
event->mmap2.pid = machine->pid;
- perf_record_mmap2__read_build_id(&event->mmap2, true);
+ perf_record_mmap2__read_build_id(&event->mmap2, machine, true);
} else {
size = snprintf(event->mmap.filename, sizeof(event->mmap.filename),
"%s%s", machine->mmap_name, kmap->ref_reloc_sym->name) + 1;
@@ -1195,93 +1214,97 @@ int perf_event__synthesize_thread_map2(struct perf_tool *tool,
return err;
}
-static void synthesize_cpus(struct perf_record_cpu_map_data *data,
- const struct perf_cpu_map *map)
-{
- int i, map_nr = perf_cpu_map__nr(map);
-
- data->cpus_data.nr = map_nr;
+struct synthesize_cpu_map_data {
+ const struct perf_cpu_map *map;
+ int nr;
+ int min_cpu;
+ int max_cpu;
+ int has_any_cpu;
+ int type;
+ size_t size;
+ struct perf_record_cpu_map_data *data;
+};
- for (i = 0; i < map_nr; i++)
- data->cpus_data.cpu[i] = perf_cpu_map__cpu(map, i).cpu;
+static void synthesize_cpus(struct synthesize_cpu_map_data *data)
+{
+ data->data->type = PERF_CPU_MAP__CPUS;
+ data->data->cpus_data.nr = data->nr;
+ for (int i = 0; i < data->nr; i++)
+ data->data->cpus_data.cpu[i] = perf_cpu_map__cpu(data->map, i).cpu;
}
-static void synthesize_mask(struct perf_record_cpu_map_data *data,
- const struct perf_cpu_map *map, int max)
+static void synthesize_mask(struct synthesize_cpu_map_data *data)
{
int idx;
struct perf_cpu cpu;
/* Due to padding, the 4bytes per entry mask variant is always smaller. */
- data->mask32_data.nr = BITS_TO_U32(max);
- data->mask32_data.long_size = 4;
+ data->data->type = PERF_CPU_MAP__MASK;
+ data->data->mask32_data.nr = BITS_TO_U32(data->max_cpu);
+ data->data->mask32_data.long_size = 4;
- perf_cpu_map__for_each_cpu(cpu, idx, map) {
+ perf_cpu_map__for_each_cpu(cpu, idx, data->map) {
int bit_word = cpu.cpu / 32;
- __u32 bit_mask = 1U << (cpu.cpu & 31);
+ u32 bit_mask = 1U << (cpu.cpu & 31);
- data->mask32_data.mask[bit_word] |= bit_mask;
+ data->data->mask32_data.mask[bit_word] |= bit_mask;
}
}
-static size_t cpus_size(const struct perf_cpu_map *map)
-{
- return sizeof(struct cpu_map_entries) + perf_cpu_map__nr(map) * sizeof(u16);
-}
-
-static size_t mask_size(const struct perf_cpu_map *map, int *max)
+static void synthesize_range_cpus(struct synthesize_cpu_map_data *data)
{
- *max = perf_cpu_map__max(map).cpu;
- return sizeof(struct perf_record_mask_cpu_map32) + BITS_TO_U32(*max) * sizeof(__u32);
+ data->data->type = PERF_CPU_MAP__RANGE_CPUS;
+ data->data->range_cpu_data.any_cpu = data->has_any_cpu;
+ data->data->range_cpu_data.start_cpu = data->min_cpu;
+ data->data->range_cpu_data.end_cpu = data->max_cpu;
}
-static void *cpu_map_data__alloc(const struct perf_cpu_map *map, size_t *size,
- u16 *type, int *max)
+static void *cpu_map_data__alloc(struct synthesize_cpu_map_data *syn_data,
+ size_t header_size)
{
size_t size_cpus, size_mask;
- bool is_dummy = perf_cpu_map__empty(map);
- /*
- * Both array and mask data have variable size based
- * on the number of cpus and their actual values.
- * The size of the 'struct perf_record_cpu_map_data' is:
- *
- * array = size of 'struct cpu_map_entries' +
- * number of cpus * sizeof(u64)
- *
- * mask = size of 'struct perf_record_record_cpu_map' +
- * maximum cpu bit converted to size of longs
- *
- * and finally + the size of 'struct perf_record_cpu_map_data'.
- */
- size_cpus = cpus_size(map);
- size_mask = mask_size(map, max);
+ syn_data->nr = perf_cpu_map__nr(syn_data->map);
+ syn_data->has_any_cpu = (perf_cpu_map__cpu(syn_data->map, 0).cpu == -1) ? 1 : 0;
- if (is_dummy || (size_cpus < size_mask)) {
- *size += size_cpus;
- *type = PERF_CPU_MAP__CPUS;
- } else {
- *size += size_mask;
- *type = PERF_CPU_MAP__MASK;
+ syn_data->min_cpu = perf_cpu_map__cpu(syn_data->map, syn_data->has_any_cpu).cpu;
+ syn_data->max_cpu = perf_cpu_map__max(syn_data->map).cpu;
+ if (syn_data->max_cpu - syn_data->min_cpu + 1 == syn_data->nr - syn_data->has_any_cpu) {
+ /* A consecutive range of CPUs can be encoded using a range. */
+ assert(sizeof(u16) + sizeof(struct perf_record_range_cpu_map) == sizeof(u64));
+ syn_data->type = PERF_CPU_MAP__RANGE_CPUS;
+ syn_data->size = header_size + sizeof(u64);
+ return zalloc(syn_data->size);
}
- *size += sizeof(__u16); /* For perf_record_cpu_map_data.type. */
- *size = PERF_ALIGN(*size, sizeof(u64));
- return zalloc(*size);
+ size_cpus = sizeof(u16) + sizeof(struct cpu_map_entries) + syn_data->nr * sizeof(u16);
+ /* Due to padding, the 4bytes per entry mask variant is always smaller. */
+ size_mask = sizeof(u16) + sizeof(struct perf_record_mask_cpu_map32) +
+ BITS_TO_U32(syn_data->max_cpu) * sizeof(__u32);
+ if (syn_data->has_any_cpu || size_cpus < size_mask) {
+ /* Follow the CPU map encoding. */
+ syn_data->type = PERF_CPU_MAP__CPUS;
+ syn_data->size = header_size + PERF_ALIGN(size_cpus, sizeof(u64));
+ return zalloc(syn_data->size);
+ }
+ /* Encode using a bitmask. */
+ syn_data->type = PERF_CPU_MAP__MASK;
+ syn_data->size = header_size + PERF_ALIGN(size_mask, sizeof(u64));
+ return zalloc(syn_data->size);
}
-static void cpu_map_data__synthesize(struct perf_record_cpu_map_data *data,
- const struct perf_cpu_map *map,
- u16 type, int max)
+static void cpu_map_data__synthesize(struct synthesize_cpu_map_data *data)
{
- data->type = type;
-
- switch (type) {
+ switch (data->type) {
case PERF_CPU_MAP__CPUS:
- synthesize_cpus(data, map);
+ synthesize_cpus(data);
break;
case PERF_CPU_MAP__MASK:
- synthesize_mask(data, map, max);
+ synthesize_mask(data);
+ break;
+ case PERF_CPU_MAP__RANGE_CPUS:
+ synthesize_range_cpus(data);
+ break;
default:
break;
}
@@ -1289,23 +1312,22 @@ static void cpu_map_data__synthesize(struct perf_record_cpu_map_data *data,
static struct perf_record_cpu_map *cpu_map_event__new(const struct perf_cpu_map *map)
{
- size_t size = sizeof(struct perf_event_header);
+ struct synthesize_cpu_map_data syn_data = { .map = map };
struct perf_record_cpu_map *event;
- int max;
- u16 type;
- event = cpu_map_data__alloc(map, &size, &type, &max);
+
+ event = cpu_map_data__alloc(&syn_data, sizeof(struct perf_event_header));
if (!event)
return NULL;
+ syn_data.data = &event->data;
event->header.type = PERF_RECORD_CPU_MAP;
- event->header.size = size;
- event->data.type = type;
-
- cpu_map_data__synthesize(&event->data, map, type, max);
+ event->header.size = syn_data.size;
+ cpu_map_data__synthesize(&syn_data);
return event;
}
+
int perf_event__synthesize_cpu_map(struct perf_tool *tool,
const struct perf_cpu_map *map,
perf_event__handler_t process,
@@ -1955,7 +1977,7 @@ int perf_event__synthesize_event_update_unit(struct perf_tool *tool, struct evse
if (ev == NULL)
return -ENOMEM;
- strlcpy(ev->data, evsel->unit, size + 1);
+ strlcpy(ev->unit, evsel->unit, size + 1);
err = process(tool, (union perf_event *)ev, NULL, NULL);
free(ev);
return err;
@@ -1972,8 +1994,7 @@ int perf_event__synthesize_event_update_scale(struct perf_tool *tool, struct evs
if (ev == NULL)
return -ENOMEM;
- ev_data = (struct perf_record_event_update_scale *)ev->data;
- ev_data->scale = evsel->scale;
+ ev->scale.scale = evsel->scale;
err = process(tool, (union perf_event *)ev, NULL, NULL);
free(ev);
return err;
@@ -1990,7 +2011,7 @@ int perf_event__synthesize_event_update_name(struct perf_tool *tool, struct evse
if (ev == NULL)
return -ENOMEM;
- strlcpy(ev->data, evsel->name, len + 1);
+ strlcpy(ev->name, evsel->name, len + 1);
err = process(tool, (union perf_event *)ev, NULL, NULL);
free(ev);
return err;
@@ -1999,25 +2020,20 @@ int perf_event__synthesize_event_update_name(struct perf_tool *tool, struct evse
int perf_event__synthesize_event_update_cpus(struct perf_tool *tool, struct evsel *evsel,
perf_event__handler_t process)
{
- size_t size = sizeof(struct perf_record_event_update);
+ struct synthesize_cpu_map_data syn_data = { .map = evsel->core.own_cpus };
struct perf_record_event_update *ev;
- int max, err;
- u16 type;
-
- if (!evsel->core.own_cpus)
- return 0;
+ int err;
- ev = cpu_map_data__alloc(evsel->core.own_cpus, &size, &type, &max);
+ ev = cpu_map_data__alloc(&syn_data, sizeof(struct perf_event_header) + 2 * sizeof(u64));
if (!ev)
return -ENOMEM;
+ syn_data.data = &ev->cpus.cpus;
ev->header.type = PERF_RECORD_EVENT_UPDATE;
- ev->header.size = (u16)size;
+ ev->header.size = (u16)syn_data.size;
ev->type = PERF_EVENT_UPDATE__CPUS;
ev->id = evsel->core.id[0];
-
- cpu_map_data__synthesize((struct perf_record_cpu_map_data *)ev->data,
- evsel->core.own_cpus, type, max);
+ cpu_map_data__synthesize(&syn_data);
err = process(tool, (union perf_event *)ev, NULL, NULL);
free(ev);
diff --git a/tools/perf/util/top.h b/tools/perf/util/top.h
index 1c2c0a838430..a8b0d79bd96c 100644
--- a/tools/perf/util/top.h
+++ b/tools/perf/util/top.h
@@ -5,6 +5,7 @@
#include "tool.h"
#include "evswitch.h"
#include "annotate.h"
+#include "mutex.h"
#include "ordered-events.h"
#include "record.h"
#include <linux/types.h>
@@ -53,8 +54,8 @@ struct perf_top {
struct ordered_events *in;
struct ordered_events data[2];
bool rotate;
- pthread_mutex_t mutex;
- pthread_cond_t cond;
+ struct mutex mutex;
+ struct cond cond;
} qe;
};