aboutsummaryrefslogtreecommitdiffstats
path: root/tools/perf/util
diff options
context:
space:
mode:
Diffstat (limited to 'tools/perf/util')
-rw-r--r--tools/perf/util/annotate.c10
-rw-r--r--tools/perf/util/annotate.h4
-rw-r--r--tools/perf/util/auxtrace.c18
-rw-r--r--tools/perf/util/auxtrace.h6
-rw-r--r--tools/perf/util/cache.h3
-rw-r--r--tools/perf/util/config.c43
-rw-r--r--tools/perf/util/config.h4
-rw-r--r--tools/perf/util/data-convert-bt.c6
-rw-r--r--tools/perf/util/debug.h11
-rw-r--r--tools/perf/util/event.h121
-rw-r--r--tools/perf/util/evlist.h3
-rw-r--r--tools/perf/util/evsel.c42
-rw-r--r--tools/perf/util/genelf_debug.c5
-rw-r--r--tools/perf/util/header.c3
-rw-r--r--tools/perf/util/help-unknown-cmd.c2
-rw-r--r--tools/perf/util/intel-bts.c2
-rw-r--r--tools/perf/util/intel-pt-decoder/intel-pt-decoder.c304
-rw-r--r--tools/perf/util/intel-pt-decoder/intel-pt-decoder.h13
-rw-r--r--tools/perf/util/intel-pt-decoder/intel-pt-log.h4
-rw-r--r--tools/perf/util/intel-pt-decoder/intel-pt-pkt-decoder.c110
-rw-r--r--tools/perf/util/intel-pt-decoder/intel-pt-pkt-decoder.h7
-rw-r--r--tools/perf/util/intel-pt-decoder/x86-opcode-map.txt2
-rw-r--r--tools/perf/util/intel-pt.c642
-rw-r--r--tools/perf/util/machine.c10
-rw-r--r--tools/perf/util/pmu.h4
-rw-r--r--tools/perf/util/probe-event.h4
-rw-r--r--tools/perf/util/scripting-engines/trace-event-python.c3
-rw-r--r--tools/perf/util/session.c2
-rw-r--r--tools/perf/util/sort.c22
-rw-r--r--tools/perf/util/stat-shadow.c33
-rw-r--r--tools/perf/util/stat.c2
-rw-r--r--tools/perf/util/stat.h2
-rw-r--r--tools/perf/util/strbuf.h4
-rw-r--r--tools/perf/util/trace-event-parse.c4
-rw-r--r--tools/perf/util/usage.c62
-rw-r--r--tools/perf/util/util.c52
-rw-r--r--tools/perf/util/util.h21
37 files changed, 1189 insertions, 401 deletions
diff --git a/tools/perf/util/annotate.c b/tools/perf/util/annotate.c
index ddbd56df9187..be1caabb9290 100644
--- a/tools/perf/util/annotate.c
+++ b/tools/perf/util/annotate.c
@@ -1379,7 +1379,9 @@ static const char *annotate__norm_arch(const char *arch_name)
return normalize_arch((char *)arch_name);
}
-int symbol__disassemble(struct symbol *sym, struct map *map, const char *arch_name, size_t privsize)
+int symbol__disassemble(struct symbol *sym, struct map *map,
+ const char *arch_name, size_t privsize,
+ struct arch **parch)
{
struct dso *dso = map->dso;
char command[PATH_MAX * 2];
@@ -1405,6 +1407,9 @@ int symbol__disassemble(struct symbol *sym, struct map *map, const char *arch_na
if (arch == NULL)
return -ENOTSUP;
+ if (parch)
+ *parch = arch;
+
if (arch->init) {
err = arch->init(arch);
if (err) {
@@ -1901,7 +1906,8 @@ int symbol__tty_annotate(struct symbol *sym, struct map *map,
struct rb_root source_line = RB_ROOT;
u64 len;
- if (symbol__disassemble(sym, map, perf_evsel__env_arch(evsel), 0) < 0)
+ if (symbol__disassemble(sym, map, perf_evsel__env_arch(evsel),
+ 0, NULL) < 0)
return -1;
len = symbol__size(sym);
diff --git a/tools/perf/util/annotate.h b/tools/perf/util/annotate.h
index 948aa8e6fd39..21055034aedd 100644
--- a/tools/perf/util/annotate.h
+++ b/tools/perf/util/annotate.h
@@ -158,7 +158,9 @@ int hist_entry__inc_addr_samples(struct hist_entry *he, int evidx, u64 addr);
int symbol__alloc_hist(struct symbol *sym);
void symbol__annotate_zero_histograms(struct symbol *sym);
-int symbol__disassemble(struct symbol *sym, struct map *map, const char *arch_name, size_t privsize);
+int symbol__disassemble(struct symbol *sym, struct map *map,
+ const char *arch_name, size_t privsize,
+ struct arch **parch);
enum symbol_disassemble_errno {
SYMBOL_ANNOTATE_ERRNO__SUCCESS = 0,
diff --git a/tools/perf/util/auxtrace.c b/tools/perf/util/auxtrace.c
index 0daf63b9ee3e..5547457566a7 100644
--- a/tools/perf/util/auxtrace.c
+++ b/tools/perf/util/auxtrace.c
@@ -322,6 +322,13 @@ static int auxtrace_queues__add_event_buffer(struct auxtrace_queues *queues,
return auxtrace_queues__add_buffer(queues, idx, buffer);
}
+static bool filter_cpu(struct perf_session *session, int cpu)
+{
+ unsigned long *cpu_bitmap = session->itrace_synth_opts->cpu_bitmap;
+
+ return cpu_bitmap && cpu != -1 && !test_bit(cpu, cpu_bitmap);
+}
+
int auxtrace_queues__add_event(struct auxtrace_queues *queues,
struct perf_session *session,
union perf_event *event, off_t data_offset,
@@ -331,6 +338,9 @@ int auxtrace_queues__add_event(struct auxtrace_queues *queues,
unsigned int idx;
int err;
+ if (filter_cpu(session, event->auxtrace.cpu))
+ return 0;
+
buffer = zalloc(sizeof(struct auxtrace_buffer));
if (!buffer)
return -ENOMEM;
@@ -947,6 +957,8 @@ void itrace_synth_opts__set_default(struct itrace_synth_opts *synth_opts)
synth_opts->instructions = true;
synth_opts->branches = true;
synth_opts->transactions = true;
+ synth_opts->ptwrites = true;
+ synth_opts->pwr_events = true;
synth_opts->errors = true;
synth_opts->period_type = PERF_ITRACE_DEFAULT_PERIOD_TYPE;
synth_opts->period = PERF_ITRACE_DEFAULT_PERIOD;
@@ -1030,6 +1042,12 @@ int itrace_parse_synth_opts(const struct option *opt, const char *str,
case 'x':
synth_opts->transactions = true;
break;
+ case 'w':
+ synth_opts->ptwrites = true;
+ break;
+ case 'p':
+ synth_opts->pwr_events = true;
+ break;
case 'e':
synth_opts->errors = true;
break;
diff --git a/tools/perf/util/auxtrace.h b/tools/perf/util/auxtrace.h
index 9f0de72d58e2..33b5e6cdf38c 100644
--- a/tools/perf/util/auxtrace.h
+++ b/tools/perf/util/auxtrace.h
@@ -59,6 +59,8 @@ enum itrace_period_type {
* @instructions: whether to synthesize 'instructions' events
* @branches: whether to synthesize 'branches' events
* @transactions: whether to synthesize events for transactions
+ * @ptwrites: whether to synthesize events for ptwrites
+ * @pwr_events: whether to synthesize power events
* @errors: whether to synthesize decoder error events
* @dont_decode: whether to skip decoding entirely
* @log: write a decoding log
@@ -72,6 +74,7 @@ enum itrace_period_type {
* @period: 'instructions' events period
* @period_type: 'instructions' events period type
* @initial_skip: skip N events at the beginning.
+ * @cpu_bitmap: CPUs for which to synthesize events, or NULL for all
*/
struct itrace_synth_opts {
bool set;
@@ -79,6 +82,8 @@ struct itrace_synth_opts {
bool instructions;
bool branches;
bool transactions;
+ bool ptwrites;
+ bool pwr_events;
bool errors;
bool dont_decode;
bool log;
@@ -92,6 +97,7 @@ struct itrace_synth_opts {
unsigned long long period;
enum itrace_period_type period_type;
unsigned long initial_skip;
+ unsigned long *cpu_bitmap;
};
/**
diff --git a/tools/perf/util/cache.h b/tools/perf/util/cache.h
index 0328f297a748..0175765c05b9 100644
--- a/tools/perf/util/cache.h
+++ b/tools/perf/util/cache.h
@@ -5,6 +5,7 @@
#include <subcmd/pager.h>
#include "../ui/ui.h"
+#include <linux/compiler.h>
#include <linux/string.h>
#define CMD_EXEC_PATH "--exec-path"
@@ -24,6 +25,6 @@ static inline int is_absolute_path(const char *path)
return path[0] == '/';
}
-char *mkpath(const char *fmt, ...) __attribute__((format (printf, 1, 2)));
+char *mkpath(const char *fmt, ...) __printf(1, 2);
#endif /* __PERF_CACHE_H */
diff --git a/tools/perf/util/config.c b/tools/perf/util/config.c
index 8d724f0fa5a8..31a7dea248d0 100644
--- a/tools/perf/util/config.c
+++ b/tools/perf/util/config.c
@@ -335,32 +335,42 @@ static int perf_parse_long(const char *value, long *ret)
return 0;
}
-static void die_bad_config(const char *name)
+static void bad_config(const char *name)
{
if (config_file_name)
- die("bad config value for '%s' in %s", name, config_file_name);
- die("bad config value for '%s'", name);
+ pr_warning("bad config value for '%s' in %s, ignoring...\n", name, config_file_name);
+ else
+ pr_warning("bad config value for '%s', ignoring...\n", name);
}
-u64 perf_config_u64(const char *name, const char *value)
+int perf_config_u64(u64 *dest, const char *name, const char *value)
{
long long ret = 0;
- if (!perf_parse_llong(value, &ret))
- die_bad_config(name);
- return (u64) ret;
+ if (!perf_parse_llong(value, &ret)) {
+ bad_config(name);
+ return -1;
+ }
+
+ *dest = ret;
+ return 0;
}
-int perf_config_int(const char *name, const char *value)
+int perf_config_int(int *dest, const char *name, const char *value)
{
long ret = 0;
- if (!perf_parse_long(value, &ret))
- die_bad_config(name);
- return ret;
+ if (!perf_parse_long(value, &ret)) {
+ bad_config(name);
+ return -1;
+ }
+ *dest = ret;
+ return 0;
}
static int perf_config_bool_or_int(const char *name, const char *value, int *is_bool)
{
+ int ret;
+
*is_bool = 1;
if (!value)
return 1;
@@ -371,7 +381,7 @@ static int perf_config_bool_or_int(const char *name, const char *value, int *is_
if (!strcasecmp(value, "false") || !strcasecmp(value, "no") || !strcasecmp(value, "off"))
return 0;
*is_bool = 0;
- return perf_config_int(name, value);
+ return perf_config_int(&ret, name, value) < 0 ? -1 : ret;
}
int perf_config_bool(const char *name, const char *value)
@@ -657,8 +667,7 @@ static int perf_config_set__init(struct perf_config_set *set)
user_config = strdup(mkpath("%s/.perfconfig", home));
if (user_config == NULL) {
- warning("Not enough memory to process %s/.perfconfig, "
- "ignoring it.", home);
+ pr_warning("Not enough memory to process %s/.perfconfig, ignoring it.", home);
goto out;
}
@@ -671,8 +680,7 @@ static int perf_config_set__init(struct perf_config_set *set)
ret = 0;
if (st.st_uid && (st.st_uid != geteuid())) {
- warning("File %s not owned by current user or root, "
- "ignoring it.", user_config);
+ pr_warning("File %s not owned by current user or root, ignoring it.", user_config);
goto out_free;
}
@@ -795,7 +803,8 @@ void perf_config_set__delete(struct perf_config_set *set)
*/
int config_error_nonbool(const char *var)
{
- return error("Missing value for '%s'", var);
+ pr_err("Missing value for '%s'", var);
+ return -1;
}
void set_buildid_dir(const char *dir)
diff --git a/tools/perf/util/config.h b/tools/perf/util/config.h
index 1a59a6b43f8b..b6bb11f3f165 100644
--- a/tools/perf/util/config.h
+++ b/tools/perf/util/config.h
@@ -27,8 +27,8 @@ extern const char *config_exclusive_filename;
typedef int (*config_fn_t)(const char *, const char *, void *);
int perf_default_config(const char *, const char *, void *);
int perf_config(config_fn_t fn, void *);
-int perf_config_int(const char *, const char *);
-u64 perf_config_u64(const char *, const char *);
+int perf_config_int(int *dest, const char *, const char *);
+int perf_config_u64(u64 *dest, const char *, const char *);
int perf_config_bool(const char *, const char *);
int config_error_nonbool(const char *);
const char *perf_etc_perfconfig(void);
diff --git a/tools/perf/util/data-convert-bt.c b/tools/perf/util/data-convert-bt.c
index 89d50318833d..3149b70799fd 100644
--- a/tools/perf/util/data-convert-bt.c
+++ b/tools/perf/util/data-convert-bt.c
@@ -1444,10 +1444,8 @@ static int convert__config(const char *var, const char *value, void *cb)
{
struct convert *c = cb;
- if (!strcmp(var, "convert.queue-size")) {
- c->queue_size = perf_config_u64(var, value);
- return 0;
- }
+ if (!strcmp(var, "convert.queue-size"))
+ return perf_config_u64(&c->queue_size, var, value);
return 0;
}
diff --git a/tools/perf/util/debug.h b/tools/perf/util/debug.h
index 8a23ea1a71c7..c818bdb1c1ab 100644
--- a/tools/perf/util/debug.h
+++ b/tools/perf/util/debug.h
@@ -4,6 +4,7 @@
#include <stdbool.h>
#include <string.h>
+#include <linux/compiler.h>
#include "event.h"
#include "../ui/helpline.h"
#include "../ui/progress.h"
@@ -40,16 +41,16 @@ extern int debug_data_convert;
#define STRERR_BUFSIZE 128 /* For the buffer size of str_error_r */
-int dump_printf(const char *fmt, ...) __attribute__((format(printf, 1, 2)));
+int dump_printf(const char *fmt, ...) __printf(1, 2);
void trace_event(union perf_event *event);
-int ui__error(const char *format, ...) __attribute__((format(printf, 1, 2)));
-int ui__warning(const char *format, ...) __attribute__((format(printf, 1, 2)));
+int ui__error(const char *format, ...) __printf(1, 2);
+int ui__warning(const char *format, ...) __printf(1, 2);
void pr_stat(const char *fmt, ...);
-int eprintf(int level, int var, const char *fmt, ...) __attribute__((format(printf, 3, 4)));
-int eprintf_time(int level, int var, u64 t, const char *fmt, ...) __attribute__((format(printf, 4, 5)));
+int eprintf(int level, int var, const char *fmt, ...) __printf(3, 4);
+int eprintf_time(int level, int var, u64 t, const char *fmt, ...) __printf(4, 5);
int veprintf(int level, int var, const char *fmt, va_list args);
int perf_debug_option(const char *str);
diff --git a/tools/perf/util/event.h b/tools/perf/util/event.h
index 7c3fa1c8cbcd..9967c87af7a6 100644
--- a/tools/perf/util/event.h
+++ b/tools/perf/util/event.h
@@ -252,6 +252,127 @@ enum auxtrace_error_type {
PERF_AUXTRACE_ERROR_MAX
};
+/* Attribute type for custom synthesized events */
+#define PERF_TYPE_SYNTH (INT_MAX + 1U)
+
+/* Attribute config for custom synthesized events */
+enum perf_synth_id {
+ PERF_SYNTH_INTEL_PTWRITE,
+ PERF_SYNTH_INTEL_MWAIT,
+ PERF_SYNTH_INTEL_PWRE,
+ PERF_SYNTH_INTEL_EXSTOP,
+ PERF_SYNTH_INTEL_PWRX,
+ PERF_SYNTH_INTEL_CBR,
+};
+
+/*
+ * Raw data formats for synthesized events. Note that 4 bytes of padding are
+ * present to match the 'size' member of PERF_SAMPLE_RAW data which is always
+ * 8-byte aligned. That means we must dereference raw_data with an offset of 4.
+ * Refer perf_sample__synth_ptr() and perf_synth__raw_data(). It also means the
+ * structure sizes are 4 bytes bigger than the raw_size, refer
+ * perf_synth__raw_size().
+ */
+
+struct perf_synth_intel_ptwrite {
+ u32 padding;
+ union {
+ struct {
+ u32 ip : 1,
+ reserved : 31;
+ };
+ u32 flags;
+ };
+ u64 payload;
+};
+
+struct perf_synth_intel_mwait {
+ u32 padding;
+ u32 reserved;
+ union {
+ struct {
+ u64 hints : 8,
+ reserved1 : 24,
+ extensions : 2,
+ reserved2 : 30;
+ };
+ u64 payload;
+ };
+};
+
+struct perf_synth_intel_pwre {
+ u32 padding;
+ u32 reserved;
+ union {
+ struct {
+ u64 reserved1 : 7,
+ hw : 1,
+ subcstate : 4,
+ cstate : 4,
+ reserved2 : 48;
+ };
+ u64 payload;
+ };
+};
+
+struct perf_synth_intel_exstop {
+ u32 padding;
+ union {
+ struct {
+ u32 ip : 1,
+ reserved : 31;
+ };
+ u32 flags;
+ };
+};
+
+struct perf_synth_intel_pwrx {
+ u32 padding;
+ u32 reserved;
+ union {
+ struct {
+ u64 deepest_cstate : 4,
+ last_cstate : 4,
+ wake_reason : 4,
+ reserved1 : 52;
+ };
+ u64 payload;
+ };
+};
+
+struct perf_synth_intel_cbr {
+ u32 padding;
+ union {
+ struct {
+ u32 cbr : 8,
+ reserved1 : 8,
+ max_nonturbo : 8,
+ reserved2 : 8;
+ };
+ u32 flags;
+ };
+ u32 freq;
+ u32 reserved3;
+};
+
+/*
+ * raw_data is always 4 bytes from an 8-byte boundary, so subtract 4 to get
+ * 8-byte alignment.
+ */
+static inline void *perf_sample__synth_ptr(struct perf_sample *sample)
+{
+ return sample->raw_data - 4;
+}
+
+static inline void *perf_synth__raw_data(void *p)
+{
+ return p + 4;
+}
+
+#define perf_synth__raw_size(d) (sizeof(d) - 4)
+
+#define perf_sample__bad_synth_size(s, d) ((s)->raw_size < sizeof(d) - 4)
+
/*
* The kernel collects the number of events it couldn't send in a stretch and
* when possible sends this number in a PERF_RECORD_LOST event. The number of
diff --git a/tools/perf/util/evlist.h b/tools/perf/util/evlist.h
index 94cea4398a13..8d601fbdd8d6 100644
--- a/tools/perf/util/evlist.h
+++ b/tools/perf/util/evlist.h
@@ -1,6 +1,7 @@
#ifndef __PERF_EVLIST_H
#define __PERF_EVLIST_H 1
+#include <linux/compiler.h>
#include <linux/kernel.h>
#include <linux/refcount.h>
#include <linux/list.h>
@@ -34,7 +35,7 @@ struct perf_mmap {
refcount_t refcnt;
u64 prev;
struct auxtrace_mmap auxtrace_mmap;
- char event_copy[PERF_SAMPLE_MAX_SIZE] __attribute__((aligned(8)));
+ char event_copy[PERF_SAMPLE_MAX_SIZE] __aligned(8);
};
static inline size_t
diff --git a/tools/perf/util/evsel.c b/tools/perf/util/evsel.c
index cda44b0e821c..6f4882f8d61f 100644
--- a/tools/perf/util/evsel.c
+++ b/tools/perf/util/evsel.c
@@ -11,13 +11,17 @@
#include <errno.h>
#include <inttypes.h>
#include <linux/bitops.h>
+#include <api/fs/fs.h>
#include <api/fs/tracing_path.h>
#include <traceevent/event-parse.h>
#include <linux/hw_breakpoint.h>
#include <linux/perf_event.h>
+#include <linux/compiler.h>
#include <linux/err.h>
#include <sys/ioctl.h>
#include <sys/resource.h>
+#include <sys/types.h>
+#include <dirent.h>
#include "asm/bug.h"
#include "callchain.h"
#include "cgroup.h"
@@ -1441,7 +1445,7 @@ int perf_event_attr__fprintf(FILE *fp, struct perf_event_attr *attr,
}
static int __open_attr__fprintf(FILE *fp, const char *name, const char *val,
- void *priv __attribute__((unused)))
+ void *priv __maybe_unused)
{
return fprintf(fp, " %-32s %s\n", name, val);
}
@@ -2471,6 +2475,42 @@ bool perf_evsel__fallback(struct perf_evsel *evsel, int err,
return false;
}
+static bool find_process(const char *name)
+{
+ size_t len = strlen(name);
+ DIR *dir;
+ struct dirent *d;
+ int ret = -1;
+
+ dir = opendir(procfs__mountpoint());
+ if (!dir)
+ return false;
+
+ /* Walk through the directory. */
+ while (ret && (d = readdir(dir)) != NULL) {
+ char path[PATH_MAX];
+ char *data;
+ size_t size;
+
+ if ((d->d_type != DT_DIR) ||
+ !strcmp(".", d->d_name) ||
+ !strcmp("..", d->d_name))
+ continue;
+
+ scnprintf(path, sizeof(path), "%s/%s/comm",
+ procfs__mountpoint(), d->d_name);
+
+ if (filename__read_str(path, &data, &size))
+ continue;
+
+ ret = strncmp(name, data, len);
+ free(data);
+ }
+
+ closedir(dir);
+ return ret ? false : true;
+}
+
int perf_evsel__open_strerror(struct perf_evsel *evsel, struct target *target,
int err, char *msg, size_t size)
{
diff --git a/tools/perf/util/genelf_debug.c b/tools/perf/util/genelf_debug.c
index 5980f7d256b1..40789d8603d0 100644
--- a/tools/perf/util/genelf_debug.c
+++ b/tools/perf/util/genelf_debug.c
@@ -11,6 +11,7 @@
* @remark Copyright 2007 OProfile authors
* @author Philippe Elie
*/
+#include <linux/compiler.h>
#include <sys/types.h>
#include <stdio.h>
#include <getopt.h>
@@ -125,7 +126,7 @@ struct debug_line_header {
* and filesize, last entry is followed by en empty string.
*/
/* follow the first program statement */
-} __attribute__((packed));
+} __packed;
/* DWARF 2 spec talk only about one possible compilation unit header while
* binutils can handle two flavours of dwarf 2, 32 and 64 bits, this is not
@@ -138,7 +139,7 @@ struct compilation_unit_header {
uhalf version;
uword debug_abbrev_offset;
ubyte pointer_size;
-} __attribute__((packed));
+} __packed;
#define DW_LNS_num_opcode (DW_LNS_set_isa + 1)
diff --git a/tools/perf/util/header.c b/tools/perf/util/header.c
index b5baff3007bb..76ed7d03e500 100644
--- a/tools/perf/util/header.c
+++ b/tools/perf/util/header.c
@@ -8,6 +8,7 @@
#include <unistd.h>
#include <stdio.h>
#include <stdlib.h>
+#include <linux/compiler.h>
#include <linux/list.h>
#include <linux/kernel.h>
#include <linux/bitops.h>
@@ -1274,7 +1275,7 @@ error:
}
static int __desc_attr__fprintf(FILE *fp, const char *name, const char *val,
- void *priv __attribute__((unused)))
+ void *priv __maybe_unused)
{
return fprintf(fp, ", %s = %s", name, val);
}
diff --git a/tools/perf/util/help-unknown-cmd.c b/tools/perf/util/help-unknown-cmd.c
index 1c88ad6425b8..15b95300d7f3 100644
--- a/tools/perf/util/help-unknown-cmd.c
+++ b/tools/perf/util/help-unknown-cmd.c
@@ -12,7 +12,7 @@ static int perf_unknown_cmd_config(const char *var, const char *value,
void *cb __maybe_unused)
{
if (!strcmp(var, "help.autocorrect"))
- autocorrect = perf_config_int(var,value);
+ return perf_config_int(&autocorrect, var,value);
return 0;
}
diff --git a/tools/perf/util/intel-bts.c b/tools/perf/util/intel-bts.c
index b2834ac7b1f5..218ee2bac9a5 100644
--- a/tools/perf/util/intel-bts.c
+++ b/tools/perf/util/intel-bts.c
@@ -866,8 +866,6 @@ static void intel_bts_print_info(u64 *arr, int start, int finish)
fprintf(stdout, intel_bts_info_fmts[i], arr[i]);
}
-u64 intel_bts_auxtrace_info_priv[INTEL_BTS_AUXTRACE_PRIV_SIZE];
-
int intel_bts_process_auxtrace_info(union perf_event *event,
struct perf_session *session)
{
diff --git a/tools/perf/util/intel-pt-decoder/intel-pt-decoder.c b/tools/perf/util/intel-pt-decoder/intel-pt-decoder.c
index 7cf7f7aca4d2..aa1593ce551d 100644
--- a/tools/perf/util/intel-pt-decoder/intel-pt-decoder.c
+++ b/tools/perf/util/intel-pt-decoder/intel-pt-decoder.c
@@ -64,6 +64,25 @@ enum intel_pt_pkt_state {
INTEL_PT_STATE_FUP_NO_TIP,
};
+static inline bool intel_pt_sample_time(enum intel_pt_pkt_state pkt_state)
+{
+ switch (pkt_state) {
+ case INTEL_PT_STATE_NO_PSB:
+ case INTEL_PT_STATE_NO_IP:
+ case INTEL_PT_STATE_ERR_RESYNC:
+ case INTEL_PT_STATE_IN_SYNC:
+ case INTEL_PT_STATE_TNT:
+ return true;
+ case INTEL_PT_STATE_TIP:
+ case INTEL_PT_STATE_TIP_PGD:
+ case INTEL_PT_STATE_FUP:
+ case INTEL_PT_STATE_FUP_NO_TIP:
+ return false;
+ default:
+ return true;
+ };
+}
+
#ifdef INTEL_PT_STRICT
#define INTEL_PT_STATE_ERR1 INTEL_PT_STATE_NO_PSB
#define INTEL_PT_STATE_ERR2 INTEL_PT_STATE_NO_PSB
@@ -87,11 +106,13 @@ struct intel_pt_decoder {
const unsigned char *buf;
size_t len;
bool return_compression;
+ bool branch_enable;
bool mtc_insn;
bool pge;
bool have_tma;
bool have_cyc;
bool fixup_last_mtc;
+ bool have_last_ip;
uint64_t pos;
uint64_t last_ip;
uint64_t ip;
@@ -99,6 +120,7 @@ struct intel_pt_decoder {
uint64_t timestamp;
uint64_t tsc_timestamp;
uint64_t ref_timestamp;
+ uint64_t sample_timestamp;
uint64_t ret_addr;
uint64_t ctc_timestamp;
uint64_t ctc_delta;
@@ -119,6 +141,7 @@ struct intel_pt_decoder {
int pkt_len;
int last_packet_type;
unsigned int cbr;
+ unsigned int cbr_seen;
unsigned int max_non_turbo_ratio;
double max_non_turbo_ratio_fp;
double cbr_cyc_to_tsc;
@@ -136,9 +159,18 @@ struct intel_pt_decoder {
bool continuous_period;
bool overflow;
bool set_fup_tx_flags;
+ bool set_fup_ptw;
+ bool set_fup_mwait;
+ bool set_fup_pwre;
+ bool set_fup_exstop;
unsigned int fup_tx_flags;
unsigned int tx_flags;
+ uint64_t fup_ptw_payload;
+ uint64_t fup_mwait_payload;
+ uint64_t fup_pwre_payload;
+ uint64_t cbr_payload;
uint64_t timestamp_insn_cnt;
+ uint64_t sample_insn_cnt;
uint64_t stuck_ip;
int no_progress;
int stuck_ip_prd;
@@ -192,6 +224,7 @@ struct intel_pt_decoder *intel_pt_decoder_new(struct intel_pt_params *params)
decoder->pgd_ip = params->pgd_ip;
decoder->data = params->data;
decoder->return_compression = params->return_compression;
+ decoder->branch_enable = params->branch_enable;
decoder->period = params->period;
decoder->period_type = params->period_type;
@@ -398,6 +431,7 @@ static uint64_t intel_pt_calc_ip(const struct intel_pt_pkt *packet,
static inline void intel_pt_set_last_ip(struct intel_pt_decoder *decoder)
{
decoder->last_ip = intel_pt_calc_ip(&decoder->packet, decoder->last_ip);
+ decoder->have_last_ip = true;
}
static inline void intel_pt_set_ip(struct intel_pt_decoder *decoder)
@@ -635,6 +669,8 @@ static int intel_pt_calc_cyc_cb(struct intel_pt_pkt_info *pkt_info)
case INTEL_PT_PAD:
case INTEL_PT_VMCS:
case INTEL_PT_MNT:
+ case INTEL_PT_PTWRITE:
+ case INTEL_PT_PTWRITE_IP:
return 0;
case INTEL_PT_MTC:
@@ -675,6 +711,12 @@ static int intel_pt_calc_cyc_cb(struct intel_pt_pkt_info *pkt_info)
break;
case INTEL_PT_TSC:
+ /*
+ * For now, do not support using TSC packets - refer
+ * intel_pt_calc_cyc_to_tsc().
+ */
+ if (data->from_mtc)
+ return 1;
timestamp = pkt_info->packet.payload |
(data->timestamp & (0xffULL << 56));
if (data->from_mtc && timestamp < data->timestamp &&
@@ -733,6 +775,11 @@ static int intel_pt_calc_cyc_cb(struct intel_pt_pkt_info *pkt_info)
case INTEL_PT_TIP_PGD:
case INTEL_PT_TRACESTOP:
+ case INTEL_PT_EXSTOP:
+ case INTEL_PT_EXSTOP_IP:
+ case INTEL_PT_MWAIT:
+ case INTEL_PT_PWRE:
+ case INTEL_PT_PWRX:
case INTEL_PT_OVF:
case INTEL_PT_BAD: /* Does not happen */
default:
@@ -787,6 +834,14 @@ static void intel_pt_calc_cyc_to_tsc(struct intel_pt_decoder *decoder,
.cbr_cyc_to_tsc = 0,
};
+ /*
+ * For now, do not support using TSC packets for at least the reasons:
+ * 1) timing might have stopped
+ * 2) TSC packets within PSB+ can slip against CYC packets
+ */
+ if (!from_mtc)
+ return;
+
intel_pt_pkt_lookahead(decoder, intel_pt_calc_cyc_cb, &data);
}
@@ -898,6 +953,7 @@ static int intel_pt_walk_insn(struct intel_pt_decoder *decoder,
decoder->tot_insn_cnt += insn_cnt;
decoder->timestamp_insn_cnt += insn_cnt;
+ decoder->sample_insn_cnt += insn_cnt;
decoder->period_insn_cnt += insn_cnt;
if (err) {
@@ -990,6 +1046,57 @@ out_no_progress:
return err;
}
+static bool intel_pt_fup_event(struct intel_pt_decoder *decoder)
+{
+ bool ret = false;
+
+ if (decoder->set_fup_tx_flags) {
+ decoder->set_fup_tx_flags = false;
+ decoder->tx_flags = decoder->fup_tx_flags;
+ decoder->state.type = INTEL_PT_TRANSACTION;
+ decoder->state.from_ip = decoder->ip;
+ decoder->state.to_ip = 0;
+ decoder->state.flags = decoder->fup_tx_flags;
+ return true;
+ }
+ if (decoder->set_fup_ptw) {
+ decoder->set_fup_ptw = false;
+ decoder->state.type = INTEL_PT_PTW;
+ decoder->state.flags |= INTEL_PT_FUP_IP;
+ decoder->state.from_ip = decoder->ip;
+ decoder->state.to_ip = 0;
+ decoder->state.ptw_payload = decoder->fup_ptw_payload;
+ return true;
+ }
+ if (decoder->set_fup_mwait) {
+ decoder->set_fup_mwait = false;
+ decoder->state.type = INTEL_PT_MWAIT_OP;
+ decoder->state.from_ip = decoder->ip;
+ decoder->state.to_ip = 0;
+ decoder->state.mwait_payload = decoder->fup_mwait_payload;
+ ret = true;
+ }
+ if (decoder->set_fup_pwre) {
+ decoder->set_fup_pwre = false;
+ decoder->state.type |= INTEL_PT_PWR_ENTRY;
+ decoder->state.type &= ~INTEL_PT_BRANCH;
+ decoder->state.from_ip = decoder->ip;
+ decoder->state.to_ip = 0;
+ decoder->state.pwre_payload = decoder->fup_pwre_payload;
+ ret = true;
+ }
+ if (decoder->set_fup_exstop) {
+ decoder->set_fup_exstop = false;
+ decoder->state.type |= INTEL_PT_EX_STOP;
+ decoder->state.type &= ~INTEL_PT_BRANCH;
+ decoder->state.flags |= INTEL_PT_FUP_IP;
+ decoder->state.from_ip = decoder->ip;
+ decoder->state.to_ip = 0;
+ ret = true;
+ }
+ return ret;
+}
+
static int intel_pt_walk_fup(struct intel_pt_decoder *decoder)
{
struct intel_pt_insn intel_pt_insn;
@@ -1003,15 +1110,8 @@ static int intel_pt_walk_fup(struct intel_pt_decoder *decoder)
if (err == INTEL_PT_RETURN)
return 0;
if (err == -EAGAIN) {
- if (decoder->set_fup_tx_flags) {
- decoder->set_fup_tx_flags = false;
- decoder->tx_flags = decoder->fup_tx_flags;
- decoder->state.type = INTEL_PT_TRANSACTION;
- decoder->state.from_ip = decoder->ip;
- decoder->state.to_ip = 0;
- decoder->state.flags = decoder->fup_tx_flags;
+ if (intel_pt_fup_event(decoder))
return 0;
- }
return err;
}
decoder->set_fup_tx_flags = false;
@@ -1360,7 +1460,9 @@ static void intel_pt_calc_mtc_timestamp(struct intel_pt_decoder *decoder)
static void intel_pt_calc_cbr(struct intel_pt_decoder *decoder)
{
- unsigned int cbr = decoder->packet.payload;
+ unsigned int cbr = decoder->packet.payload & 0xff;
+
+ decoder->cbr_payload = decoder->packet.payload;
if (decoder->cbr == cbr)
return;
@@ -1417,6 +1519,13 @@ static int intel_pt_walk_psbend(struct intel_pt_decoder *decoder)
case INTEL_PT_TRACESTOP:
case INTEL_PT_BAD:
case INTEL_PT_PSB:
+ case INTEL_PT_PTWRITE:
+ case INTEL_PT_PTWRITE_IP:
+ case INTEL_PT_EXSTOP:
+ case INTEL_PT_EXSTOP_IP:
+ case INTEL_PT_MWAIT:
+ case INTEL_PT_PWRE:
+ case INTEL_PT_PWRX:
decoder->have_tma = false;
intel_pt_log("ERROR: Unexpected packet\n");
return -EAGAIN;
@@ -1446,7 +1555,8 @@ static int intel_pt_walk_psbend(struct intel_pt_decoder *decoder)
case INTEL_PT_FUP:
decoder->pge = true;
- intel_pt_set_last_ip(decoder);
+ if (decoder->packet.count)
+ intel_pt_set_last_ip(decoder);
break;
case INTEL_PT_MODE_TSX:
@@ -1497,6 +1607,13 @@ static int intel_pt_walk_fup_tip(struct intel_pt_decoder *decoder)
case INTEL_PT_MODE_TSX:
case INTEL_PT_BAD:
case INTEL_PT_PSBEND:
+ case INTEL_PT_PTWRITE:
+ case INTEL_PT_PTWRITE_IP:
+ case INTEL_PT_EXSTOP:
+ case INTEL_PT_EXSTOP_IP:
+ case INTEL_PT_MWAIT:
+ case INTEL_PT_PWRE:
+ case INTEL_PT_PWRX:
intel_pt_log("ERROR: Missing TIP after FUP\n");
decoder->pkt_state = INTEL_PT_STATE_ERR3;
return -ENOENT;
@@ -1625,6 +1742,15 @@ next:
break;
}
intel_pt_set_last_ip(decoder);
+ if (!decoder->branch_enable) {
+ decoder->ip = decoder->last_ip;
+ if (intel_pt_fup_event(decoder))
+ return 0;
+ no_tip = false;
+ break;
+ }
+ if (decoder->set_fup_mwait)
+ no_tip = true;
err = intel_pt_walk_fup(decoder);
if (err != -EAGAIN) {
if (err)
@@ -1650,6 +1776,8 @@ next:
break;
case INTEL_PT_PSB:
+ decoder->last_ip = 0;
+ decoder->have_last_ip = true;
intel_pt_clear_stack(&decoder->stack);
err = intel_pt_walk_psbend(decoder);
if (err == -EAGAIN)
@@ -1696,6 +1824,16 @@ next:
case INTEL_PT_CBR:
intel_pt_calc_cbr(decoder);
+ if (!decoder->branch_enable &&
+ decoder->cbr != decoder->cbr_seen) {
+ decoder->cbr_seen = decoder->cbr;
+ decoder->state.type = INTEL_PT_CBR_CHG;
+ decoder->state.from_ip = decoder->ip;
+ decoder->state.to_ip = 0;
+ decoder->state.cbr_payload =
+ decoder->packet.payload;
+ return 0;
+ }
break;
case INTEL_PT_MODE_EXEC:
@@ -1722,6 +1860,71 @@ next:
case INTEL_PT_PAD:
break;
+ case INTEL_PT_PTWRITE_IP:
+ decoder->fup_ptw_payload = decoder->packet.payload;
+ err = intel_pt_get_next_packet(decoder);
+ if (err)
+ return err;
+ if (decoder->packet.type == INTEL_PT_FUP) {
+ decoder->set_fup_ptw = true;
+ no_tip = true;
+ } else {
+ intel_pt_log_at("ERROR: Missing FUP after PTWRITE",
+ decoder->pos);
+ }
+ goto next;
+
+ case INTEL_PT_PTWRITE:
+ decoder->state.type = INTEL_PT_PTW;
+ decoder->state.from_ip = decoder->ip;
+ decoder->state.to_ip = 0;
+ decoder->state.ptw_payload = decoder->packet.payload;
+ return 0;
+
+ case INTEL_PT_MWAIT:
+ decoder->fup_mwait_payload = decoder->packet.payload;
+ decoder->set_fup_mwait = true;
+ break;
+
+ case INTEL_PT_PWRE:
+ if (decoder->set_fup_mwait) {
+ decoder->fup_pwre_payload =
+ decoder->packet.payload;
+ decoder->set_fup_pwre = true;
+ break;
+ }
+ decoder->state.type = INTEL_PT_PWR_ENTRY;
+ decoder->state.from_ip = decoder->ip;
+ decoder->state.to_ip = 0;
+ decoder->state.pwrx_payload = decoder->packet.payload;
+ return 0;
+
+ case INTEL_PT_EXSTOP_IP:
+ err = intel_pt_get_next_packet(decoder);
+ if (err)
+ return err;
+ if (decoder->packet.type == INTEL_PT_FUP) {
+ decoder->set_fup_exstop = true;
+ no_tip = true;
+ } else {
+ intel_pt_log_at("ERROR: Missing FUP after EXSTOP",
+ decoder->pos);
+ }
+ goto next;
+
+ case INTEL_PT_EXSTOP:
+ decoder->state.type = INTEL_PT_EX_STOP;
+ decoder->state.from_ip = decoder->ip;
+ decoder->state.to_ip = 0;
+ return 0;
+
+ case INTEL_PT_PWRX:
+ decoder->state.type = INTEL_PT_PWR_EXIT;
+ decoder->state.from_ip = decoder->ip;
+ decoder->state.to_ip = 0;
+ decoder->state.pwrx_payload = decoder->packet.payload;
+ return 0;
+
default:
return intel_pt_bug(decoder);
}
@@ -1730,8 +1933,9 @@ next:
static inline bool intel_pt_have_ip(struct intel_pt_decoder *decoder)
{
- return decoder->last_ip || decoder->packet.count == 0 ||
- decoder->packet.count == 3 || decoder->packet.count == 6;
+ return decoder->packet.count &&
+ (decoder->have_last_ip || decoder->packet.count == 3 ||
+ decoder->packet.count == 6);
}
/* Walk PSB+ packets to get in sync. */
@@ -1750,6 +1954,13 @@ static int intel_pt_walk_psb(struct intel_pt_decoder *decoder)
__fallthrough;
case INTEL_PT_TIP_PGE:
case INTEL_PT_TIP:
+ case INTEL_PT_PTWRITE:
+ case INTEL_PT_PTWRITE_IP:
+ case INTEL_PT_EXSTOP:
+ case INTEL_PT_EXSTOP_IP:
+ case INTEL_PT_MWAIT:
+ case INTEL_PT_PWRE:
+ case INTEL_PT_PWRX:
intel_pt_log("ERROR: Unexpected packet\n");
return -ENOENT;
@@ -1854,14 +2065,10 @@ static int intel_pt_walk_to_ip(struct intel_pt_decoder *decoder)
break;
case INTEL_PT_FUP:
- if (decoder->overflow) {
- if (intel_pt_have_ip(decoder))
- intel_pt_set_ip(decoder);
- if (decoder->ip)
- return 0;
- }
- if (decoder->packet.count)
- intel_pt_set_last_ip(decoder);
+ if (intel_pt_have_ip(decoder))
+ intel_pt_set_ip(decoder);
+ if (decoder->ip)
+ return 0;
break;
case INTEL_PT_MTC:
@@ -1910,6 +2117,9 @@ static int intel_pt_walk_to_ip(struct intel_pt_decoder *decoder)
break;
case INTEL_PT_PSB:
+ decoder->last_ip = 0;
+ decoder->have_last_ip = true;
+ intel_pt_clear_stack(&decoder->stack);
err = intel_pt_walk_psb(decoder);
if (err)
return err;
@@ -1925,6 +2135,13 @@ static int intel_pt_walk_to_ip(struct intel_pt_decoder *decoder)
case INTEL_PT_VMCS:
case INTEL_PT_MNT:
case INTEL_PT_PAD:
+ case INTEL_PT_PTWRITE:
+ case INTEL_PT_PTWRITE_IP:
+ case INTEL_PT_EXSTOP:
+ case INTEL_PT_EXSTOP_IP:
+ case INTEL_PT_MWAIT:
+ case INTEL_PT_PWRE:
+ case INTEL_PT_PWRX:
default:
break;
}
@@ -1935,6 +2152,19 @@ static int intel_pt_sync_ip(struct intel_pt_decoder *decoder)
{
int err;
+ decoder->set_fup_tx_flags = false;
+ decoder->set_fup_ptw = false;
+ decoder->set_fup_mwait = false;
+ decoder->set_fup_pwre = false;
+ decoder->set_fup_exstop = false;
+
+ if (!decoder->branch_enable) {
+ decoder->pkt_state = INTEL_PT_STATE_IN_SYNC;
+ decoder->overflow = false;
+ decoder->state.type = 0; /* Do not have a sample */
+ return 0;
+ }
+
intel_pt_log("Scanning for full IP\n");
err = intel_pt_walk_to_ip(decoder);
if (err)
@@ -2043,6 +2273,7 @@ static int intel_pt_sync(struct intel_pt_decoder *decoder)
decoder->pge = false;
decoder->continuous_period = false;
+ decoder->have_last_ip = false;
decoder->last_ip = 0;
decoder->ip = 0;
intel_pt_clear_stack(&decoder->stack);
@@ -2051,6 +2282,7 @@ static int intel_pt_sync(struct intel_pt_decoder *decoder)
if (err)
return err;
+ decoder->have_last_ip = true;
decoder->pkt_state = INTEL_PT_STATE_NO_IP;
err = intel_pt_walk_psb(decoder);
@@ -2069,7 +2301,7 @@ static int intel_pt_sync(struct intel_pt_decoder *decoder)
static uint64_t intel_pt_est_timestamp(struct intel_pt_decoder *decoder)
{
- uint64_t est = decoder->timestamp_insn_cnt << 1;
+ uint64_t est = decoder->sample_insn_cnt << 1;
if (!decoder->cbr || !decoder->max_non_turbo_ratio)
goto out;
@@ -2077,7 +2309,7 @@ static uint64_t intel_pt_est_timestamp(struct intel_pt_decoder *decoder)
est *= decoder->max_non_turbo_ratio;
est /= decoder->cbr;
out:
- return decoder->timestamp + est;
+ return decoder->sample_timestamp + est;
}
const struct intel_pt_state *intel_pt_decode(struct intel_pt_decoder *decoder)
@@ -2093,8 +2325,10 @@ const struct intel_pt_state *intel_pt_decode(struct intel_pt_decoder *decoder)
err = intel_pt_sync(decoder);
break;
case INTEL_PT_STATE_NO_IP:
+ decoder->have_last_ip = false;
decoder->last_ip = 0;
- /* Fall through */
+ decoder->ip = 0;
+ __fallthrough;
case INTEL_PT_STATE_ERR_RESYNC:
err = intel_pt_sync_ip(decoder);
break;
@@ -2130,15 +2364,29 @@ const struct intel_pt_state *intel_pt_decode(struct intel_pt_decoder *decoder)
}
} while (err == -ENOLINK);
- decoder->state.err = err ? intel_pt_ext_err(err) : 0;
- decoder->state.timestamp = decoder->timestamp;
+ if (err) {
+ decoder->state.err = intel_pt_ext_err(err);
+ decoder->state.from_ip = decoder->ip;
+ decoder->sample_timestamp = decoder->timestamp;
+ decoder->sample_insn_cnt = decoder->timestamp_insn_cnt;
+ } else {
+ decoder->state.err = 0;
+ if (decoder->cbr != decoder->cbr_seen && decoder->state.type) {
+ decoder->cbr_seen = decoder->cbr;
+ decoder->state.type |= INTEL_PT_CBR_CHG;
+ decoder->state.cbr_payload = decoder->cbr_payload;
+ }
+ if (intel_pt_sample_time(decoder->pkt_state)) {
+ decoder->sample_timestamp = decoder->timestamp;
+ decoder->sample_insn_cnt = decoder->timestamp_insn_cnt;
+ }
+ }
+
+ decoder->state.timestamp = decoder->sample_timestamp;
decoder->state.est_timestamp = intel_pt_est_timestamp(decoder);
decoder->state.cr3 = decoder->cr3;
decoder->state.tot_insn_cnt = decoder->tot_insn_cnt;
- if (err)
- decoder->state.from_ip = decoder->ip;
-
return &decoder->state;
}
diff --git a/tools/perf/util/intel-pt-decoder/intel-pt-decoder.h b/tools/perf/util/intel-pt-decoder/intel-pt-decoder.h
index e90619a43c0c..921b22e8ca0e 100644
--- a/tools/perf/util/intel-pt-decoder/intel-pt-decoder.h
+++ b/tools/perf/util/intel-pt-decoder/intel-pt-decoder.h
@@ -25,11 +25,18 @@
#define INTEL_PT_IN_TX (1 << 0)
#define INTEL_PT_ABORT_TX (1 << 1)
#define INTEL_PT_ASYNC (1 << 2)
+#define INTEL_PT_FUP_IP (1 << 3)
enum intel_pt_sample_type {
INTEL_PT_BRANCH = 1 << 0,
INTEL_PT_INSTRUCTION = 1 << 1,
INTEL_PT_TRANSACTION = 1 << 2,
+ INTEL_PT_PTW = 1 << 3,
+ INTEL_PT_MWAIT_OP = 1 << 4,
+ INTEL_PT_PWR_ENTRY = 1 << 5,
+ INTEL_PT_EX_STOP = 1 << 6,
+ INTEL_PT_PWR_EXIT = 1 << 7,
+ INTEL_PT_CBR_CHG = 1 << 8,
};
enum intel_pt_period_type {
@@ -63,6 +70,11 @@ struct intel_pt_state {
uint64_t timestamp;
uint64_t est_timestamp;
uint64_t trace_nr;
+ uint64_t ptw_payload;
+ uint64_t mwait_payload;
+ uint64_t pwre_payload;
+ uint64_t pwrx_payload;
+ uint64_t cbr_payload;
uint32_t flags;
enum intel_pt_insn_op insn_op;
int insn_len;
@@ -87,6 +99,7 @@ struct intel_pt_params {
bool (*pgd_ip)(uint64_t ip, void *data);
void *data;
bool return_compression;
+ bool branch_enable;
uint64_t period;
enum intel_pt_period_type period_type;
unsigned max_non_turbo_ratio;
diff --git a/tools/perf/util/intel-pt-decoder/intel-pt-log.h b/tools/perf/util/intel-pt-decoder/intel-pt-log.h
index debe751dc3d6..45b64f93f358 100644
--- a/tools/perf/util/intel-pt-decoder/intel-pt-log.h
+++ b/tools/perf/util/intel-pt-decoder/intel-pt-log.h
@@ -16,6 +16,7 @@
#ifndef INCLUDE__INTEL_PT_LOG_H__
#define INCLUDE__INTEL_PT_LOG_H__
+#include <linux/compiler.h>
#include <stdint.h>
#include <inttypes.h>
@@ -34,8 +35,7 @@ void __intel_pt_log_insn(struct intel_pt_insn *intel_pt_insn, uint64_t ip);
void __intel_pt_log_insn_no_data(struct intel_pt_insn *intel_pt_insn,
uint64_t ip);
-__attribute__((format(printf, 1, 2)))
-void __intel_pt_log(const char *fmt, ...);
+void __intel_pt_log(const char *fmt, ...) __printf(1, 2);
#define intel_pt_log(fmt, ...) \
do { \
diff --git a/tools/perf/util/intel-pt-decoder/intel-pt-pkt-decoder.c b/tools/perf/util/intel-pt-decoder/intel-pt-pkt-decoder.c
index 7528ae4f7e28..ba4c9dd18643 100644
--- a/tools/perf/util/intel-pt-decoder/intel-pt-pkt-decoder.c
+++ b/tools/perf/util/intel-pt-decoder/intel-pt-pkt-decoder.c
@@ -64,6 +64,13 @@ static const char * const packet_name[] = {
[INTEL_PT_PIP] = "PIP",
[INTEL_PT_OVF] = "OVF",
[INTEL_PT_MNT] = "MNT",
+ [INTEL_PT_PTWRITE] = "PTWRITE",
+ [INTEL_PT_PTWRITE_IP] = "PTWRITE",
+ [INTEL_PT_EXSTOP] = "EXSTOP",
+ [INTEL_PT_EXSTOP_IP] = "EXSTOP",
+ [INTEL_PT_MWAIT] = "MWAIT",
+ [INTEL_PT_PWRE] = "PWRE",
+ [INTEL_PT_PWRX] = "PWRX",
};
const char *intel_pt_pkt_name(enum intel_pt_pkt_type type)
@@ -123,7 +130,7 @@ static int intel_pt_get_cbr(const unsigned char *buf, size_t len,
if (len < 4)
return INTEL_PT_NEED_MORE_BYTES;
packet->type = INTEL_PT_CBR;
- packet->payload = buf[2];
+ packet->payload = le16_to_cpu(*(uint16_t *)(buf + 2));
return 4;
}
@@ -217,12 +224,80 @@ static int intel_pt_get_3byte(const unsigned char *buf, size_t len,
}
}
+static int intel_pt_get_ptwrite(const unsigned char *buf, size_t len,
+ struct intel_pt_pkt *packet)
+{
+ packet->count = (buf[1] >> 5) & 0x3;
+ packet->type = buf[1] & BIT(7) ? INTEL_PT_PTWRITE_IP :
+ INTEL_PT_PTWRITE;
+
+ switch (packet->count) {
+ case 0:
+ if (len < 6)
+ return INTEL_PT_NEED_MORE_BYTES;
+ packet->payload = le32_to_cpu(*(uint32_t *)(buf + 2));
+ return 6;
+ case 1:
+ if (len < 10)
+ return INTEL_PT_NEED_MORE_BYTES;
+ packet->payload = le64_to_cpu(*(uint64_t *)(buf + 2));
+ return 10;
+ default:
+ return INTEL_PT_BAD_PACKET;
+ }
+}
+
+static int intel_pt_get_exstop(struct intel_pt_pkt *packet)
+{
+ packet->type = INTEL_PT_EXSTOP;
+ return 2;
+}
+
+static int intel_pt_get_exstop_ip(struct intel_pt_pkt *packet)
+{
+ packet->type = INTEL_PT_EXSTOP_IP;
+ return 2;
+}
+
+static int intel_pt_get_mwait(const unsigned char *buf, size_t len,
+ struct intel_pt_pkt *packet)
+{
+ if (len < 10)
+ return INTEL_PT_NEED_MORE_BYTES;
+ packet->type = INTEL_PT_MWAIT;
+ packet->payload = le64_to_cpu(*(uint64_t *)(buf + 2));
+ return 10;
+}
+
+static int intel_pt_get_pwre(const unsigned char *buf, size_t len,
+ struct intel_pt_pkt *packet)
+{
+ if (len < 4)
+ return INTEL_PT_NEED_MORE_BYTES;
+ packet->type = INTEL_PT_PWRE;
+ memcpy_le64(&packet->payload, buf + 2, 2);
+ return 4;
+}
+
+static int intel_pt_get_pwrx(const unsigned char *buf, size_t len,
+ struct intel_pt_pkt *packet)
+{
+ if (len < 7)
+ return INTEL_PT_NEED_MORE_BYTES;
+ packet->type = INTEL_PT_PWRX;
+ memcpy_le64(&packet->payload, buf + 2, 5);
+ return 7;
+}
+
static int intel_pt_get_ext(const unsigned char *buf, size_t len,
struct intel_pt_pkt *packet)
{
if (len < 2)
return INTEL_PT_NEED_MORE_BYTES;
+ if ((buf[1] & 0x1f) == 0x12)
+ return intel_pt_get_ptwrite(buf, len, packet);
+
switch (buf[1]) {
case 0xa3: /* Long TNT */
return intel_pt_get_long_tnt(buf, len, packet);
@@ -244,6 +319,16 @@ static int intel_pt_get_ext(const unsigned char *buf, size_t len,
return intel_pt_get_tma(buf, len, packet);
case 0xC3: /* 3-byte header */
return intel_pt_get_3byte(buf, len, packet);
+ case 0x62: /* EXSTOP no IP */
+ return intel_pt_get_exstop(packet);
+ case 0xE2: /* EXSTOP with IP */
+ return intel_pt_get_exstop_ip(packet);
+ case 0xC2: /* MWAIT */
+ return intel_pt_get_mwait(buf, len, packet);
+ case 0x22: /* PWRE */
+ return intel_pt_get_pwre(buf, len, packet);
+ case 0xA2: /* PWRX */
+ return intel_pt_get_pwrx(buf, len, packet);
default:
return INTEL_PT_BAD_PACKET;
}
@@ -522,6 +607,29 @@ int intel_pt_pkt_desc(const struct intel_pt_pkt *packet, char *buf,
ret = snprintf(buf, buf_len, "%s 0x%llx (NR=%d)",
name, payload, nr);
return ret;
+ case INTEL_PT_PTWRITE:
+ return snprintf(buf, buf_len, "%s 0x%llx IP:0", name, payload);
+ case INTEL_PT_PTWRITE_IP:
+ return snprintf(buf, buf_len, "%s 0x%llx IP:1", name, payload);
+ case INTEL_PT_EXSTOP:
+ return snprintf(buf, buf_len, "%s IP:0", name);
+ case INTEL_PT_EXSTOP_IP:
+ return snprintf(buf, buf_len, "%s IP:1", name);
+ case INTEL_PT_MWAIT:
+ return snprintf(buf, buf_len, "%s 0x%llx Hints 0x%x Extensions 0x%x",
+ name, payload, (unsigned int)(payload & 0xff),
+ (unsigned int)((payload >> 32) & 0x3));
+ case INTEL_PT_PWRE:
+ return snprintf(buf, buf_len, "%s 0x%llx HW:%u CState:%u Sub-CState:%u",
+ name, payload, !!(payload & 0x80),
+ (unsigned int)((payload >> 12) & 0xf),
+ (unsigned int)((payload >> 8) & 0xf));
+ case INTEL_PT_PWRX:
+ return snprintf(buf, buf_len, "%s 0x%llx Last CState:%u Deepest CState:%u Wake Reason 0x%x",
+ name, payload,
+ (unsigned int)((payload >> 4) & 0xf),
+ (unsigned int)(payload & 0xf),
+ (unsigned int)((payload >> 8) & 0xf));
default:
break;
}
diff --git a/tools/perf/util/intel-pt-decoder/intel-pt-pkt-decoder.h b/tools/perf/util/intel-pt-decoder/intel-pt-pkt-decoder.h
index 781bb79883bd..73ddc3a88d07 100644
--- a/tools/perf/util/intel-pt-decoder/intel-pt-pkt-decoder.h
+++ b/tools/perf/util/intel-pt-decoder/intel-pt-pkt-decoder.h
@@ -52,6 +52,13 @@ enum intel_pt_pkt_type {
INTEL_PT_PIP,
INTEL_PT_OVF,
INTEL_PT_MNT,
+ INTEL_PT_PTWRITE,
+ INTEL_PT_PTWRITE_IP,
+ INTEL_PT_EXSTOP,
+ INTEL_PT_EXSTOP_IP,
+ INTEL_PT_MWAIT,
+ INTEL_PT_PWRE,
+ INTEL_PT_PWRX,
};
struct intel_pt_pkt {
diff --git a/tools/perf/util/intel-pt-decoder/x86-opcode-map.txt b/tools/perf/util/intel-pt-decoder/x86-opcode-map.txt
index 767be7c76034..12e377184ee4 100644
--- a/tools/perf/util/intel-pt-decoder/x86-opcode-map.txt
+++ b/tools/perf/util/intel-pt-decoder/x86-opcode-map.txt
@@ -1009,7 +1009,7 @@ GrpTable: Grp15
1: fxstor | RDGSBASE Ry (F3),(11B)
2: vldmxcsr Md (v1) | WRFSBASE Ry (F3),(11B)
3: vstmxcsr Md (v1) | WRGSBASE Ry (F3),(11B)
-4: XSAVE
+4: XSAVE | ptwrite Ey (F3),(11B)
5: XRSTOR | lfence (11B)
6: XSAVEOPT | clwb (66) | mfence (11B)
7: clflush | clflushopt (66) | sfence (11B)
diff --git a/tools/perf/util/intel-pt.c b/tools/perf/util/intel-pt.c
index 4c7718f87a08..b58f9fd1e2ee 100644
--- a/tools/perf/util/intel-pt.c
+++ b/tools/perf/util/intel-pt.c
@@ -81,7 +81,6 @@ struct intel_pt {
bool sample_instructions;
u64 instructions_sample_type;
- u64 instructions_sample_period;
u64 instructions_id;
bool sample_branches;
@@ -93,6 +92,18 @@ struct intel_pt {
u64 transactions_sample_type;
u64 transactions_id;
+ bool sample_ptwrites;
+ u64 ptwrites_sample_type;
+ u64 ptwrites_id;
+
+ bool sample_pwr_events;
+ u64 pwr_events_sample_type;
+ u64 mwait_id;
+ u64 pwre_id;
+ u64 exstop_id;
+ u64 pwrx_id;
+ u64 cbr_id;
+
bool synth_needs_swap;
u64 tsc_bit;
@@ -103,6 +114,7 @@ struct intel_pt {
u64 cyc_bit;
u64 noretcomp_bit;
unsigned max_non_turbo_ratio;
+ unsigned cbr2khz;
unsigned long num_events;
@@ -668,6 +680,19 @@ static bool intel_pt_return_compression(struct intel_pt *pt)
return true;
}
+static bool intel_pt_branch_enable(struct intel_pt *pt)
+{
+ struct perf_evsel *evsel;
+ u64 config;
+
+ evlist__for_each_entry(pt->session->evlist, evsel) {
+ if (intel_pt_get_config(pt, &evsel->attr, &config) &&
+ (config & 1) && !(config & 0x2000))
+ return false;
+ }
+ return true;
+}
+
static unsigned int intel_pt_mtc_period(struct intel_pt *pt)
{
struct perf_evsel *evsel;
@@ -799,6 +824,7 @@ static struct intel_pt_queue *intel_pt_alloc_queue(struct intel_pt *pt,
params.walk_insn = intel_pt_walk_next_insn;
params.data = ptq;
params.return_compression = intel_pt_return_compression(pt);
+ params.branch_enable = intel_pt_branch_enable(pt);
params.max_non_turbo_ratio = pt->max_non_turbo_ratio;
params.mtc_period = intel_pt_mtc_period(pt);
params.tsc_ctc_ratio_n = pt->tsc_ctc_ratio_n;
@@ -1044,6 +1070,36 @@ static void intel_pt_update_last_branch_rb(struct intel_pt_queue *ptq)
bs->nr += 1;
}
+static inline bool intel_pt_skip_event(struct intel_pt *pt)
+{
+ return pt->synth_opts.initial_skip &&
+ pt->num_events++ < pt->synth_opts.initial_skip;
+}
+
+static void intel_pt_prep_b_sample(struct intel_pt *pt,
+ struct intel_pt_queue *ptq,
+ union perf_event *event,
+ struct perf_sample *sample)
+{
+ event->sample.header.type = PERF_RECORD_SAMPLE;
+ event->sample.header.misc = PERF_RECORD_MISC_USER;
+ event->sample.header.size = sizeof(struct perf_event_header);
+
+ if (!pt->timeless_decoding)
+ sample->time = tsc_to_perf_time(ptq->timestamp, &pt->tc);
+
+ sample->cpumode = PERF_RECORD_MISC_USER;
+ sample->ip = ptq->state->from_ip;
+ sample->pid = ptq->pid;
+ sample->tid = ptq->tid;
+ sample->addr = ptq->state->to_ip;
+ sample->period = 1;
+ sample->cpu = ptq->cpu;
+ sample->flags = ptq->flags;
+ sample->insn_len = ptq->insn_len;
+ memcpy(sample->insn, ptq->insn, INTEL_PT_INSN_BUF_SZ);
+}
+
static int intel_pt_inject_event(union perf_event *event,
struct perf_sample *sample, u64 type,
bool swapped)
@@ -1052,9 +1108,35 @@ static int intel_pt_inject_event(union perf_event *event,
return perf_event__synthesize_sample(event, type, 0, sample, swapped);
}
-static int intel_pt_synth_branch_sample(struct intel_pt_queue *ptq)
+static inline int intel_pt_opt_inject(struct intel_pt *pt,
+ union perf_event *event,
+ struct perf_sample *sample, u64 type)
+{
+ if (!pt->synth_opts.inject)
+ return 0;
+
+ return intel_pt_inject_event(event, sample, type, pt->synth_needs_swap);
+}
+
+static int intel_pt_deliver_synth_b_event(struct intel_pt *pt,
+ union perf_event *event,
+ struct perf_sample *sample, u64 type)
{
int ret;
+
+ ret = intel_pt_opt_inject(pt, event, sample, type);
+ if (ret)
+ return ret;
+
+ ret = perf_session__deliver_synth_event(pt->session, event, sample);
+ if (ret)
+ pr_err("Intel PT: failed to deliver event, error %d\n", ret);
+
+ return ret;
+}
+
+static int intel_pt_synth_branch_sample(struct intel_pt_queue *ptq)
+{
struct intel_pt *pt = ptq->pt;
union perf_event *event = ptq->event_buf;
struct perf_sample sample = { .ip = 0, };
@@ -1066,29 +1148,13 @@ static int intel_pt_synth_branch_sample(struct intel_pt_queue *ptq)
if (pt->branches_filter && !(pt->branches_filter & ptq->flags))
return 0;
- if (pt->synth_opts.initial_skip &&
- pt->num_events++ < pt->synth_opts.initial_skip)
+ if (intel_pt_skip_event(pt))
return 0;
- event->sample.header.type = PERF_RECORD_SAMPLE;
- event->sample.header.misc = PERF_RECORD_MISC_USER;
- event->sample.header.size = sizeof(struct perf_event_header);
+ intel_pt_prep_b_sample(pt, ptq, event, &sample);
- if (!pt->timeless_decoding)
- sample.time = tsc_to_perf_time(ptq->timestamp, &pt->tc);
-
- sample.cpumode = PERF_RECORD_MISC_USER;
- sample.ip = ptq->state->from_ip;
- sample.pid = ptq->pid;
- sample.tid = ptq->tid;
- sample.addr = ptq->state->to_ip;
sample.id = ptq->pt->branches_id;
sample.stream_id = ptq->pt->branches_id;
- sample.period = 1;
- sample.cpu = ptq->cpu;
- sample.flags = ptq->flags;
- sample.insn_len = ptq->insn_len;
- memcpy(sample.insn, ptq->insn, INTEL_PT_INSN_BUF_SZ);
/*
* perf report cannot handle events without a branch stack when using
@@ -1105,144 +1171,251 @@ static int intel_pt_synth_branch_sample(struct intel_pt_queue *ptq)
sample.branch_stack = (struct branch_stack *)&dummy_bs;
}
- if (pt->synth_opts.inject) {
- ret = intel_pt_inject_event(event, &sample,
- pt->branches_sample_type,
- pt->synth_needs_swap);
- if (ret)
- return ret;
+ return intel_pt_deliver_synth_b_event(pt, event, &sample,
+ pt->branches_sample_type);
+}
+
+static void intel_pt_prep_sample(struct intel_pt *pt,
+ struct intel_pt_queue *ptq,
+ union perf_event *event,
+ struct perf_sample *sample)
+{
+ intel_pt_prep_b_sample(pt, ptq, event, sample);
+
+ if (pt->synth_opts.callchain) {
+ thread_stack__sample(ptq->thread, ptq->chain,
+ pt->synth_opts.callchain_sz, sample->ip);
+ sample->callchain = ptq->chain;
}
- ret = perf_session__deliver_synth_event(pt->session, event, &sample);
- if (ret)
- pr_err("Intel Processor Trace: failed to deliver branch event, error %d\n",
- ret);
+ if (pt->synth_opts.last_branch) {
+ intel_pt_copy_last_branch_rb(ptq);
+ sample->branch_stack = ptq->last_branch;
+ }
+}
+
+static inline int intel_pt_deliver_synth_event(struct intel_pt *pt,
+ struct intel_pt_queue *ptq,
+ union perf_event *event,
+ struct perf_sample *sample,
+ u64 type)
+{
+ int ret;
+
+ ret = intel_pt_deliver_synth_b_event(pt, event, sample, type);
+
+ if (pt->synth_opts.last_branch)
+ intel_pt_reset_last_branch_rb(ptq);
return ret;
}
static int intel_pt_synth_instruction_sample(struct intel_pt_queue *ptq)
{
- int ret;
struct intel_pt *pt = ptq->pt;
union perf_event *event = ptq->event_buf;
struct perf_sample sample = { .ip = 0, };
- if (pt->synth_opts.initial_skip &&
- pt->num_events++ < pt->synth_opts.initial_skip)
+ if (intel_pt_skip_event(pt))
return 0;
- event->sample.header.type = PERF_RECORD_SAMPLE;
- event->sample.header.misc = PERF_RECORD_MISC_USER;
- event->sample.header.size = sizeof(struct perf_event_header);
-
- if (!pt->timeless_decoding)
- sample.time = tsc_to_perf_time(ptq->timestamp, &pt->tc);
+ intel_pt_prep_sample(pt, ptq, event, &sample);
- sample.cpumode = PERF_RECORD_MISC_USER;
- sample.ip = ptq->state->from_ip;
- sample.pid = ptq->pid;
- sample.tid = ptq->tid;
- sample.addr = ptq->state->to_ip;
sample.id = ptq->pt->instructions_id;
sample.stream_id = ptq->pt->instructions_id;
sample.period = ptq->state->tot_insn_cnt - ptq->last_insn_cnt;
- sample.cpu = ptq->cpu;
- sample.flags = ptq->flags;
- sample.insn_len = ptq->insn_len;
- memcpy(sample.insn, ptq->insn, INTEL_PT_INSN_BUF_SZ);
ptq->last_insn_cnt = ptq->state->tot_insn_cnt;
- if (pt->synth_opts.callchain) {
- thread_stack__sample(ptq->thread, ptq->chain,
- pt->synth_opts.callchain_sz, sample.ip);
- sample.callchain = ptq->chain;
- }
+ return intel_pt_deliver_synth_event(pt, ptq, event, &sample,
+ pt->instructions_sample_type);
+}
- if (pt->synth_opts.last_branch) {
- intel_pt_copy_last_branch_rb(ptq);
- sample.branch_stack = ptq->last_branch;
- }
+static int intel_pt_synth_transaction_sample(struct intel_pt_queue *ptq)
+{
+ struct intel_pt *pt = ptq->pt;
+ union perf_event *event = ptq->event_buf;
+ struct perf_sample sample = { .ip = 0, };
- if (pt->synth_opts.inject) {
- ret = intel_pt_inject_event(event, &sample,
- pt->instructions_sample_type,
- pt->synth_needs_swap);
- if (ret)
- return ret;
- }
+ if (intel_pt_skip_event(pt))
+ return 0;
- ret = perf_session__deliver_synth_event(pt->session, event, &sample);
- if (ret)
- pr_err("Intel Processor Trace: failed to deliver instruction event, error %d\n",
- ret);
+ intel_pt_prep_sample(pt, ptq, event, &sample);
- if (pt->synth_opts.last_branch)
- intel_pt_reset_last_branch_rb(ptq);
+ sample.id = ptq->pt->transactions_id;
+ sample.stream_id = ptq->pt->transactions_id;
- return ret;
+ return intel_pt_deliver_synth_event(pt, ptq, event, &sample,
+ pt->transactions_sample_type);
}
-static int intel_pt_synth_transaction_sample(struct intel_pt_queue *ptq)
+static void intel_pt_prep_p_sample(struct intel_pt *pt,
+ struct intel_pt_queue *ptq,
+ union perf_event *event,
+ struct perf_sample *sample)
+{
+ intel_pt_prep_sample(pt, ptq, event, sample);
+
+ /*
+ * Zero IP is used to mean "trace start" but that is not the case for
+ * power or PTWRITE events with no IP, so clear the flags.
+ */
+ if (!sample->ip)
+ sample->flags = 0;
+}
+
+static int intel_pt_synth_ptwrite_sample(struct intel_pt_queue *ptq)
{
- int ret;
struct intel_pt *pt = ptq->pt;
union perf_event *event = ptq->event_buf;
struct perf_sample sample = { .ip = 0, };
+ struct perf_synth_intel_ptwrite raw;
- if (pt->synth_opts.initial_skip &&
- pt->num_events++ < pt->synth_opts.initial_skip)
+ if (intel_pt_skip_event(pt))
return 0;
- event->sample.header.type = PERF_RECORD_SAMPLE;
- event->sample.header.misc = PERF_RECORD_MISC_USER;
- event->sample.header.size = sizeof(struct perf_event_header);
+ intel_pt_prep_p_sample(pt, ptq, event, &sample);
- if (!pt->timeless_decoding)
- sample.time = tsc_to_perf_time(ptq->timestamp, &pt->tc);
+ sample.id = ptq->pt->ptwrites_id;
+ sample.stream_id = ptq->pt->ptwrites_id;
- sample.cpumode = PERF_RECORD_MISC_USER;
- sample.ip = ptq->state->from_ip;
- sample.pid = ptq->pid;
- sample.tid = ptq->tid;
- sample.addr = ptq->state->to_ip;
- sample.id = ptq->pt->transactions_id;
- sample.stream_id = ptq->pt->transactions_id;
- sample.period = 1;
- sample.cpu = ptq->cpu;
- sample.flags = ptq->flags;
- sample.insn_len = ptq->insn_len;
- memcpy(sample.insn, ptq->insn, INTEL_PT_INSN_BUF_SZ);
+ raw.flags = 0;
+ raw.ip = !!(ptq->state->flags & INTEL_PT_FUP_IP);
+ raw.payload = cpu_to_le64(ptq->state->ptw_payload);
- if (pt->synth_opts.callchain) {
- thread_stack__sample(ptq->thread, ptq->chain,
- pt->synth_opts.callchain_sz, sample.ip);
- sample.callchain = ptq->chain;
- }
+ sample.raw_size = perf_synth__raw_size(raw);
+ sample.raw_data = perf_synth__raw_data(&raw);
- if (pt->synth_opts.last_branch) {
- intel_pt_copy_last_branch_rb(ptq);
- sample.branch_stack = ptq->last_branch;
- }
+ return intel_pt_deliver_synth_event(pt, ptq, event, &sample,
+ pt->ptwrites_sample_type);
+}
- if (pt->synth_opts.inject) {
- ret = intel_pt_inject_event(event, &sample,
- pt->transactions_sample_type,
- pt->synth_needs_swap);
- if (ret)
- return ret;
- }
+static int intel_pt_synth_cbr_sample(struct intel_pt_queue *ptq)
+{
+ struct intel_pt *pt = ptq->pt;
+ union perf_event *event = ptq->event_buf;
+ struct perf_sample sample = { .ip = 0, };
+ struct perf_synth_intel_cbr raw;
+ u32 flags;
- ret = perf_session__deliver_synth_event(pt->session, event, &sample);
- if (ret)
- pr_err("Intel Processor Trace: failed to deliver transaction event, error %d\n",
- ret);
+ if (intel_pt_skip_event(pt))
+ return 0;
- if (pt->synth_opts.last_branch)
- intel_pt_reset_last_branch_rb(ptq);
+ intel_pt_prep_p_sample(pt, ptq, event, &sample);
- return ret;
+ sample.id = ptq->pt->cbr_id;
+ sample.stream_id = ptq->pt->cbr_id;
+
+ flags = (u16)ptq->state->cbr_payload | (pt->max_non_turbo_ratio << 16);
+ raw.flags = cpu_to_le32(flags);
+ raw.freq = cpu_to_le32(raw.cbr * pt->cbr2khz);
+ raw.reserved3 = 0;
+
+ sample.raw_size = perf_synth__raw_size(raw);
+ sample.raw_data = perf_synth__raw_data(&raw);
+
+ return intel_pt_deliver_synth_event(pt, ptq, event, &sample,
+ pt->pwr_events_sample_type);
+}
+
+static int intel_pt_synth_mwait_sample(struct intel_pt_queue *ptq)
+{
+ struct intel_pt *pt = ptq->pt;
+ union perf_event *event = ptq->event_buf;
+ struct perf_sample sample = { .ip = 0, };
+ struct perf_synth_intel_mwait raw;
+
+ if (intel_pt_skip_event(pt))
+ return 0;
+
+ intel_pt_prep_p_sample(pt, ptq, event, &sample);
+
+ sample.id = ptq->pt->mwait_id;
+ sample.stream_id = ptq->pt->mwait_id;
+
+ raw.reserved = 0;
+ raw.payload = cpu_to_le64(ptq->state->mwait_payload);
+
+ sample.raw_size = perf_synth__raw_size(raw);
+ sample.raw_data = perf_synth__raw_data(&raw);
+
+ return intel_pt_deliver_synth_event(pt, ptq, event, &sample,
+ pt->pwr_events_sample_type);
+}
+
+static int intel_pt_synth_pwre_sample(struct intel_pt_queue *ptq)
+{
+ struct intel_pt *pt = ptq->pt;
+ union perf_event *event = ptq->event_buf;
+ struct perf_sample sample = { .ip = 0, };
+ struct perf_synth_intel_pwre raw;
+
+ if (intel_pt_skip_event(pt))
+ return 0;
+
+ intel_pt_prep_p_sample(pt, ptq, event, &sample);
+
+ sample.id = ptq->pt->pwre_id;
+ sample.stream_id = ptq->pt->pwre_id;
+
+ raw.reserved = 0;
+ raw.payload = cpu_to_le64(ptq->state->pwre_payload);
+
+ sample.raw_size = perf_synth__raw_size(raw);
+ sample.raw_data = perf_synth__raw_data(&raw);
+
+ return intel_pt_deliver_synth_event(pt, ptq, event, &sample,
+ pt->pwr_events_sample_type);
+}
+
+static int intel_pt_synth_exstop_sample(struct intel_pt_queue *ptq)
+{
+ struct intel_pt *pt = ptq->pt;
+ union perf_event *event = ptq->event_buf;
+ struct perf_sample sample = { .ip = 0, };
+ struct perf_synth_intel_exstop raw;
+
+ if (intel_pt_skip_event(pt))
+ return 0;
+
+ intel_pt_prep_p_sample(pt, ptq, event, &sample);
+
+ sample.id = ptq->pt->exstop_id;
+ sample.stream_id = ptq->pt->exstop_id;
+
+ raw.flags = 0;
+ raw.ip = !!(ptq->state->flags & INTEL_PT_FUP_IP);
+
+ sample.raw_size = perf_synth__raw_size(raw);
+ sample.raw_data = perf_synth__raw_data(&raw);
+
+ return intel_pt_deliver_synth_event(pt, ptq, event, &sample,
+ pt->pwr_events_sample_type);
+}
+
+static int intel_pt_synth_pwrx_sample(struct intel_pt_queue *ptq)
+{
+ struct intel_pt *pt = ptq->pt;
+ union perf_event *event = ptq->event_buf;
+ struct perf_sample sample = { .ip = 0, };
+ struct perf_synth_intel_pwrx raw;
+
+ if (intel_pt_skip_event(pt))
+ return 0;
+
+ intel_pt_prep_p_sample(pt, ptq, event, &sample);
+
+ sample.id = ptq->pt->pwrx_id;
+ sample.stream_id = ptq->pt->pwrx_id;
+
+ raw.reserved = 0;
+ raw.payload = cpu_to_le64(ptq->state->pwrx_payload);
+
+ sample.raw_size = perf_synth__raw_size(raw);
+ sample.raw_data = perf_synth__raw_data(&raw);
+
+ return intel_pt_deliver_synth_event(pt, ptq, event, &sample,
+ pt->pwr_events_sample_type);
}
static int intel_pt_synth_error(struct intel_pt *pt, int code, int cpu,
@@ -1296,6 +1469,10 @@ static inline bool intel_pt_is_switch_ip(struct intel_pt_queue *ptq, u64 ip)
PERF_IP_FLAG_INTERRUPT | PERF_IP_FLAG_TX_ABORT));
}
+#define INTEL_PT_PWR_EVT (INTEL_PT_MWAIT_OP | INTEL_PT_PWR_ENTRY | \
+ INTEL_PT_EX_STOP | INTEL_PT_PWR_EXIT | \
+ INTEL_PT_CBR_CHG)
+
static int intel_pt_sample(struct intel_pt_queue *ptq)
{
const struct intel_pt_state *state = ptq->state;
@@ -1307,24 +1484,52 @@ static int intel_pt_sample(struct intel_pt_queue *ptq)
ptq->have_sample = false;
- if (pt->sample_instructions &&
- (state->type & INTEL_PT_INSTRUCTION) &&
- (!pt->synth_opts.initial_skip ||
- pt->num_events++ >= pt->synth_opts.initial_skip)) {
+ if (pt->sample_pwr_events && (state->type & INTEL_PT_PWR_EVT)) {
+ if (state->type & INTEL_PT_CBR_CHG) {
+ err = intel_pt_synth_cbr_sample(ptq);
+ if (err)
+ return err;
+ }
+ if (state->type & INTEL_PT_MWAIT_OP) {
+ err = intel_pt_synth_mwait_sample(ptq);
+ if (err)
+ return err;
+ }
+ if (state->type & INTEL_PT_PWR_ENTRY) {
+ err = intel_pt_synth_pwre_sample(ptq);
+ if (err)
+ return err;
+ }
+ if (state->type & INTEL_PT_EX_STOP) {
+ err = intel_pt_synth_exstop_sample(ptq);
+ if (err)
+ return err;
+ }
+ if (state->type & INTEL_PT_PWR_EXIT) {
+ err = intel_pt_synth_pwrx_sample(ptq);
+ if (err)
+ return err;
+ }
+ }
+
+ if (pt->sample_instructions && (state->type & INTEL_PT_INSTRUCTION)) {
err = intel_pt_synth_instruction_sample(ptq);
if (err)
return err;
}
- if (pt->sample_transactions &&
- (state->type & INTEL_PT_TRANSACTION) &&
- (!pt->synth_opts.initial_skip ||
- pt->num_events++ >= pt->synth_opts.initial_skip)) {
+ if (pt->sample_transactions && (state->type & INTEL_PT_TRANSACTION)) {
err = intel_pt_synth_transaction_sample(ptq);
if (err)
return err;
}
+ if (pt->sample_ptwrites && (state->type & INTEL_PT_PTW)) {
+ err = intel_pt_synth_ptwrite_sample(ptq);
+ if (err)
+ return err;
+ }
+
if (!(state->type & INTEL_PT_BRANCH))
return 0;
@@ -1925,36 +2130,65 @@ static int intel_pt_event_synth(struct perf_tool *tool,
NULL);
}
-static int intel_pt_synth_event(struct perf_session *session,
+static int intel_pt_synth_event(struct perf_session *session, const char *name,
struct perf_event_attr *attr, u64 id)
{
struct intel_pt_synth intel_pt_synth;
+ int err;
+
+ pr_debug("Synthesizing '%s' event with id %" PRIu64 " sample type %#" PRIx64 "\n",
+ name, id, (u64)attr->sample_type);
memset(&intel_pt_synth, 0, sizeof(struct intel_pt_synth));
intel_pt_synth.session = session;
- return perf_event__synthesize_attr(&intel_pt_synth.dummy_tool, attr, 1,
- &id, intel_pt_event_synth);
+ err = perf_event__synthesize_attr(&intel_pt_synth.dummy_tool, attr, 1,
+ &id, intel_pt_event_synth);
+ if (err)
+ pr_err("%s: failed to synthesize '%s' event type\n",
+ __func__, name);
+
+ return err;
}
-static int intel_pt_synth_events(struct intel_pt *pt,
- struct perf_session *session)
+static void intel_pt_set_event_name(struct perf_evlist *evlist, u64 id,
+ const char *name)
{
- struct perf_evlist *evlist = session->evlist;
struct perf_evsel *evsel;
- struct perf_event_attr attr;
- bool found = false;
- u64 id;
- int err;
evlist__for_each_entry(evlist, evsel) {
- if (evsel->attr.type == pt->pmu_type && evsel->ids) {
- found = true;
+ if (evsel->id && evsel->id[0] == id) {
+ if (evsel->name)
+ zfree(&evsel->name);
+ evsel->name = strdup(name);
break;
}
}
+}
- if (!found) {
+static struct perf_evsel *intel_pt_evsel(struct intel_pt *pt,
+ struct perf_evlist *evlist)
+{
+ struct perf_evsel *evsel;
+
+ evlist__for_each_entry(evlist, evsel) {
+ if (evsel->attr.type == pt->pmu_type && evsel->ids)
+ return evsel;
+ }
+
+ return NULL;
+}
+
+static int intel_pt_synth_events(struct intel_pt *pt,
+ struct perf_session *session)
+{
+ struct perf_evlist *evlist = session->evlist;
+ struct perf_evsel *evsel = intel_pt_evsel(pt, evlist);
+ struct perf_event_attr attr;
+ u64 id;
+ int err;
+
+ if (!evsel) {
pr_debug("There are no selected events with Intel Processor Trace data\n");
return 0;
}
@@ -1983,6 +2217,25 @@ static int intel_pt_synth_events(struct intel_pt *pt,
if (!id)
id = 1;
+ if (pt->synth_opts.branches) {
+ attr.config = PERF_COUNT_HW_BRANCH_INSTRUCTIONS;
+ attr.sample_period = 1;
+ attr.sample_type |= PERF_SAMPLE_ADDR;
+ err = intel_pt_synth_event(session, "branches", &attr, id);
+ if (err)
+ return err;
+ pt->sample_branches = true;
+ pt->branches_sample_type = attr.sample_type;
+ pt->branches_id = id;
+ id += 1;
+ attr.sample_type &= ~(u64)PERF_SAMPLE_ADDR;
+ }
+
+ if (pt->synth_opts.callchain)
+ attr.sample_type |= PERF_SAMPLE_CALLCHAIN;
+ if (pt->synth_opts.last_branch)
+ attr.sample_type |= PERF_SAMPLE_BRANCH_STACK;
+
if (pt->synth_opts.instructions) {
attr.config = PERF_COUNT_HW_INSTRUCTIONS;
if (pt->synth_opts.period_type == PERF_ITRACE_PERIOD_NANOSECS)
@@ -1990,70 +2243,90 @@ static int intel_pt_synth_events(struct intel_pt *pt,
intel_pt_ns_to_ticks(pt, pt->synth_opts.period);
else
attr.sample_period = pt->synth_opts.period;
- pt->instructions_sample_period = attr.sample_period;
- if (pt->synth_opts.callchain)
- attr.sample_type |= PERF_SAMPLE_CALLCHAIN;
- if (pt->synth_opts.last_branch)
- attr.sample_type |= PERF_SAMPLE_BRANCH_STACK;
- pr_debug("Synthesizing 'instructions' event with id %" PRIu64 " sample type %#" PRIx64 "\n",
- id, (u64)attr.sample_type);
- err = intel_pt_synth_event(session, &attr, id);
- if (err) {
- pr_err("%s: failed to synthesize 'instructions' event type\n",
- __func__);
+ err = intel_pt_synth_event(session, "instructions", &attr, id);
+ if (err)
return err;
- }
pt->sample_instructions = true;
pt->instructions_sample_type = attr.sample_type;
pt->instructions_id = id;
id += 1;
}
+ attr.sample_type &= ~(u64)PERF_SAMPLE_PERIOD;
+ attr.sample_period = 1;
+
if (pt->synth_opts.transactions) {
attr.config = PERF_COUNT_HW_INSTRUCTIONS;
- attr.sample_period = 1;
- if (pt->synth_opts.callchain)
- attr.sample_type |= PERF_SAMPLE_CALLCHAIN;
- if (pt->synth_opts.last_branch)
- attr.sample_type |= PERF_SAMPLE_BRANCH_STACK;
- pr_debug("Synthesizing 'transactions' event with id %" PRIu64 " sample type %#" PRIx64 "\n",
- id, (u64)attr.sample_type);
- err = intel_pt_synth_event(session, &attr, id);
- if (err) {
- pr_err("%s: failed to synthesize 'transactions' event type\n",
- __func__);
+ err = intel_pt_synth_event(session, "transactions", &attr, id);
+ if (err)
return err;
- }
pt->sample_transactions = true;
+ pt->transactions_sample_type = attr.sample_type;
pt->transactions_id = id;
+ intel_pt_set_event_name(evlist, id, "transactions");
id += 1;
- evlist__for_each_entry(evlist, evsel) {
- if (evsel->id && evsel->id[0] == pt->transactions_id) {
- if (evsel->name)
- zfree(&evsel->name);
- evsel->name = strdup("transactions");
- break;
- }
- }
}
- if (pt->synth_opts.branches) {
- attr.config = PERF_COUNT_HW_BRANCH_INSTRUCTIONS;
- attr.sample_period = 1;
- attr.sample_type |= PERF_SAMPLE_ADDR;
- attr.sample_type &= ~(u64)PERF_SAMPLE_CALLCHAIN;
- attr.sample_type &= ~(u64)PERF_SAMPLE_BRANCH_STACK;
- pr_debug("Synthesizing 'branches' event with id %" PRIu64 " sample type %#" PRIx64 "\n",
- id, (u64)attr.sample_type);
- err = intel_pt_synth_event(session, &attr, id);
- if (err) {
- pr_err("%s: failed to synthesize 'branches' event type\n",
- __func__);
+ attr.type = PERF_TYPE_SYNTH;
+ attr.sample_type |= PERF_SAMPLE_RAW;
+
+ if (pt->synth_opts.ptwrites) {
+ attr.config = PERF_SYNTH_INTEL_PTWRITE;
+ err = intel_pt_synth_event(session, "ptwrite", &attr, id);
+ if (err)
return err;
- }
- pt->sample_branches = true;
- pt->branches_sample_type = attr.sample_type;
- pt->branches_id = id;
+ pt->sample_ptwrites = true;
+ pt->ptwrites_sample_type = attr.sample_type;
+ pt->ptwrites_id = id;
+ intel_pt_set_event_name(evlist, id, "ptwrite");
+ id += 1;
+ }
+
+ if (pt->synth_opts.pwr_events) {
+ pt->sample_pwr_events = true;
+ pt->pwr_events_sample_type = attr.sample_type;
+
+ attr.config = PERF_SYNTH_INTEL_CBR;
+ err = intel_pt_synth_event(session, "cbr", &attr, id);
+ if (err)
+ return err;
+ pt->cbr_id = id;
+ intel_pt_set_event_name(evlist, id, "cbr");
+ id += 1;
+ }
+
+ if (pt->synth_opts.pwr_events && (evsel->attr.config & 0x10)) {
+ attr.config = PERF_SYNTH_INTEL_MWAIT;
+ err = intel_pt_synth_event(session, "mwait", &attr, id);
+ if (err)
+ return err;
+ pt->mwait_id = id;
+ intel_pt_set_event_name(evlist, id, "mwait");
+ id += 1;
+
+ attr.config = PERF_SYNTH_INTEL_PWRE;
+ err = intel_pt_synth_event(session, "pwre", &attr, id);
+ if (err)
+ return err;
+ pt->pwre_id = id;
+ intel_pt_set_event_name(evlist, id, "pwre");
+ id += 1;
+
+ attr.config = PERF_SYNTH_INTEL_EXSTOP;
+ err = intel_pt_synth_event(session, "exstop", &attr, id);
+ if (err)
+ return err;
+ pt->exstop_id = id;
+ intel_pt_set_event_name(evlist, id, "exstop");
+ id += 1;
+
+ attr.config = PERF_SYNTH_INTEL_PWRX;
+ err = intel_pt_synth_event(session, "pwrx", &attr, id);
+ if (err)
+ return err;
+ pt->pwrx_id = id;
+ intel_pt_set_event_name(evlist, id, "pwrx");
+ id += 1;
}
pt->synth_needs_swap = evsel->needs_swap;
@@ -2322,6 +2595,7 @@ int intel_pt_process_auxtrace_info(union perf_event *event,
intel_pt_log("TSC frequency %"PRIu64"\n", tsc_freq);
intel_pt_log("Maximum non-turbo ratio %u\n",
pt->max_non_turbo_ratio);
+ pt->cbr2khz = tsc_freq / pt->max_non_turbo_ratio / 1000;
}
if (pt->synth_opts.calls)
diff --git a/tools/perf/util/machine.c b/tools/perf/util/machine.c
index d7f31cb0a4cb..5de2b86b9880 100644
--- a/tools/perf/util/machine.c
+++ b/tools/perf/util/machine.c
@@ -1209,10 +1209,12 @@ int machine__create_kernel_maps(struct machine *machine)
*/
map_groups__fixup_end(&machine->kmaps);
- if (machine__get_running_kernel_start(machine, &name, &addr)) {
- } else if (maps__set_kallsyms_ref_reloc_sym(machine->vmlinux_maps, name, addr)) {
- machine__destroy_kernel_maps(machine);
- return -1;
+ if (!machine__get_running_kernel_start(machine, &name, &addr)) {
+ if (name &&
+ maps__set_kallsyms_ref_reloc_sym(machine->vmlinux_maps, name, addr)) {
+ machine__destroy_kernel_maps(machine);
+ return -1;
+ }
}
return 0;
diff --git a/tools/perf/util/pmu.h b/tools/perf/util/pmu.h
index ea7f450dc609..389e9729331f 100644
--- a/tools/perf/util/pmu.h
+++ b/tools/perf/util/pmu.h
@@ -2,6 +2,7 @@
#define __PMU_H
#include <linux/bitmap.h>
+#include <linux/compiler.h>
#include <linux/perf_event.h>
#include <stdbool.h>
#include "evsel.h"
@@ -83,8 +84,7 @@ void print_pmu_events(const char *event_glob, bool name_only, bool quiet,
bool long_desc, bool details_flag);
bool pmu_have_event(const char *pname, const char *name);
-int perf_pmu__scan_file(struct perf_pmu *pmu, const char *name, const char *fmt,
- ...) __attribute__((format(scanf, 3, 4)));
+int perf_pmu__scan_file(struct perf_pmu *pmu, const char *name, const char *fmt, ...) __scanf(3, 4);
int perf_pmu__test(void);
diff --git a/tools/perf/util/probe-event.h b/tools/perf/util/probe-event.h
index 373842656fb6..5812947418dd 100644
--- a/tools/perf/util/probe-event.h
+++ b/tools/perf/util/probe-event.h
@@ -1,6 +1,7 @@
#ifndef _PROBE_EVENT_H
#define _PROBE_EVENT_H
+#include <linux/compiler.h>
#include <stdbool.h>
#include "intlist.h"
@@ -171,8 +172,7 @@ void arch__fix_tev_from_maps(struct perf_probe_event *pev,
struct symbol *sym);
/* If there is no space to write, returns -E2BIG. */
-int e_snprintf(char *str, size_t size, const char *format, ...)
- __attribute__((format(printf, 3, 4)));
+int e_snprintf(char *str, size_t size, const char *format, ...) __printf(3, 4);
/* Maximum index number of event-name postfix */
#define MAX_EVENT_INDEX 1024
diff --git a/tools/perf/util/scripting-engines/trace-event-python.c b/tools/perf/util/scripting-engines/trace-event-python.c
index 40de3cb40d21..57b7a00e6f16 100644
--- a/tools/perf/util/scripting-engines/trace-event-python.c
+++ b/tools/perf/util/scripting-engines/trace-event-python.c
@@ -28,6 +28,7 @@
#include <stdbool.h>
#include <errno.h>
#include <linux/bitmap.h>
+#include <linux/compiler.h>
#include <linux/time64.h>
#include "../../perf.h"
@@ -84,7 +85,7 @@ struct tables {
static struct tables tables_global;
-static void handler_call_die(const char *handler_name) NORETURN;
+static void handler_call_die(const char *handler_name) __noreturn;
static void handler_call_die(const char *handler_name)
{
PyErr_Print();
diff --git a/tools/perf/util/session.c b/tools/perf/util/session.c
index 7dc1096264c5..d19c40a81040 100644
--- a/tools/perf/util/session.c
+++ b/tools/perf/util/session.c
@@ -2035,7 +2035,7 @@ int perf_session__cpu_bitmap(struct perf_session *session,
if (!(evsel->attr.sample_type & PERF_SAMPLE_CPU)) {
pr_err("File does not contain CPU events. "
- "Remove -c option to proceed.\n");
+ "Remove -C option to proceed.\n");
return -1;
}
}
diff --git a/tools/perf/util/sort.c b/tools/perf/util/sort.c
index 5762ae4e9e91..8b327c955a4f 100644
--- a/tools/perf/util/sort.c
+++ b/tools/perf/util/sort.c
@@ -2532,12 +2532,12 @@ static int setup_sort_list(struct perf_hpp_list *list, char *str,
ret = sort_dimension__add(list, tok, evlist, level);
if (ret == -EINVAL) {
if (!cacheline_size && !strncasecmp(tok, "dcacheline", strlen(tok)))
- error("The \"dcacheline\" --sort key needs to know the cacheline size and it couldn't be determined on this system");
+ pr_err("The \"dcacheline\" --sort key needs to know the cacheline size and it couldn't be determined on this system");
else
- error("Invalid --sort key: `%s'", tok);
+ pr_err("Invalid --sort key: `%s'", tok);
break;
} else if (ret == -ESRCH) {
- error("Unknown --sort key: `%s'", tok);
+ pr_err("Unknown --sort key: `%s'", tok);
break;
}
}
@@ -2594,7 +2594,7 @@ static int setup_sort_order(struct perf_evlist *evlist)
return 0;
if (sort_order[1] == '\0') {
- error("Invalid --sort key: `+'");
+ pr_err("Invalid --sort key: `+'");
return -EINVAL;
}
@@ -2604,7 +2604,7 @@ static int setup_sort_order(struct perf_evlist *evlist)
*/
if (asprintf(&new_sort_order, "%s,%s",
get_default_sort_order(evlist), sort_order + 1) < 0) {
- error("Not enough memory to set up --sort");
+ pr_err("Not enough memory to set up --sort");
return -ENOMEM;
}
@@ -2668,7 +2668,7 @@ static int __setup_sorting(struct perf_evlist *evlist)
str = strdup(sort_keys);
if (str == NULL) {
- error("Not enough memory to setup sort keys");
+ pr_err("Not enough memory to setup sort keys");
return -ENOMEM;
}
@@ -2678,7 +2678,7 @@ static int __setup_sorting(struct perf_evlist *evlist)
if (!is_strict_order(field_order)) {
str = setup_overhead(str);
if (str == NULL) {
- error("Not enough memory to setup overhead keys");
+ pr_err("Not enough memory to setup overhead keys");
return -ENOMEM;
}
}
@@ -2834,10 +2834,10 @@ static int setup_output_list(struct perf_hpp_list *list, char *str)
tok; tok = strtok_r(NULL, ", ", &tmp)) {
ret = output_field_add(list, tok);
if (ret == -EINVAL) {
- error("Invalid --fields key: `%s'", tok);
+ pr_err("Invalid --fields key: `%s'", tok);
break;
} else if (ret == -ESRCH) {
- error("Unknown --fields key: `%s'", tok);
+ pr_err("Unknown --fields key: `%s'", tok);
break;
}
}
@@ -2877,7 +2877,7 @@ static int __setup_output_field(void)
strp = str = strdup(field_order);
if (str == NULL) {
- error("Not enough memory to setup output fields");
+ pr_err("Not enough memory to setup output fields");
return -ENOMEM;
}
@@ -2885,7 +2885,7 @@ static int __setup_output_field(void)
strp++;
if (!strlen(strp)) {
- error("Invalid --fields key: `+'");
+ pr_err("Invalid --fields key: `+'");
goto out;
}
diff --git a/tools/perf/util/stat-shadow.c b/tools/perf/util/stat-shadow.c
index ac10cc675d39..719d6cb86952 100644
--- a/tools/perf/util/stat-shadow.c
+++ b/tools/perf/util/stat-shadow.c
@@ -44,6 +44,8 @@ static struct stats runtime_topdown_slots_issued[NUM_CTX][MAX_NR_CPUS];
static struct stats runtime_topdown_slots_retired[NUM_CTX][MAX_NR_CPUS];
static struct stats runtime_topdown_fetch_bubbles[NUM_CTX][MAX_NR_CPUS];
static struct stats runtime_topdown_recovery_bubbles[NUM_CTX][MAX_NR_CPUS];
+static struct stats runtime_smi_num_stats[NUM_CTX][MAX_NR_CPUS];
+static struct stats runtime_aperf_stats[NUM_CTX][MAX_NR_CPUS];
static struct rblist runtime_saved_values;
static bool have_frontend_stalled;
@@ -157,6 +159,8 @@ void perf_stat__reset_shadow_stats(void)
memset(runtime_topdown_slots_issued, 0, sizeof(runtime_topdown_slots_issued));
memset(runtime_topdown_fetch_bubbles, 0, sizeof(runtime_topdown_fetch_bubbles));
memset(runtime_topdown_recovery_bubbles, 0, sizeof(runtime_topdown_recovery_bubbles));
+ memset(runtime_smi_num_stats, 0, sizeof(runtime_smi_num_stats));
+ memset(runtime_aperf_stats, 0, sizeof(runtime_aperf_stats));
next = rb_first(&runtime_saved_values.entries);
while (next) {
@@ -217,6 +221,10 @@ void perf_stat__update_shadow_stats(struct perf_evsel *counter, u64 *count,
update_stats(&runtime_dtlb_cache_stats[ctx][cpu], count[0]);
else if (perf_evsel__match(counter, HW_CACHE, HW_CACHE_ITLB))
update_stats(&runtime_itlb_cache_stats[ctx][cpu], count[0]);
+ else if (perf_stat_evsel__is(counter, SMI_NUM))
+ update_stats(&runtime_smi_num_stats[ctx][cpu], count[0]);
+ else if (perf_stat_evsel__is(counter, APERF))
+ update_stats(&runtime_aperf_stats[ctx][cpu], count[0]);
if (counter->collect_stat) {
struct saved_value *v = saved_value_lookup(counter, cpu, ctx,
@@ -592,6 +600,29 @@ static double td_be_bound(int ctx, int cpu)
return sanitize_val(1.0 - sum);
}
+static void print_smi_cost(int cpu, struct perf_evsel *evsel,
+ struct perf_stat_output_ctx *out)
+{
+ double smi_num, aperf, cycles, cost = 0.0;
+ int ctx = evsel_context(evsel);
+ const char *color = NULL;
+
+ smi_num = avg_stats(&runtime_smi_num_stats[ctx][cpu]);
+ aperf = avg_stats(&runtime_aperf_stats[ctx][cpu]);
+ cycles = avg_stats(&runtime_cycles_stats[ctx][cpu]);
+
+ if ((cycles == 0) || (aperf == 0))
+ return;
+
+ if (smi_num)
+ cost = (aperf - cycles) / aperf * 100.00;
+
+ if (cost > 10)
+ color = PERF_COLOR_RED;
+ out->print_metric(out->ctx, color, "%8.1f%%", "SMI cycles%", cost);
+ out->print_metric(out->ctx, NULL, "%4.0f", "SMI#", smi_num);
+}
+
void perf_stat__print_shadow_stats(struct perf_evsel *evsel,
double avg, int cpu,
struct perf_stat_output_ctx *out)
@@ -825,6 +856,8 @@ void perf_stat__print_shadow_stats(struct perf_evsel *evsel,
}
snprintf(unit_buf, sizeof(unit_buf), "%c/sec", unit);
print_metric(ctxp, NULL, "%8.3f", unit_buf, ratio);
+ } else if (perf_stat_evsel__is(evsel, SMI_NUM)) {
+ print_smi_cost(cpu, evsel, out);
} else {
print_metric(ctxp, NULL, NULL, NULL, 0);
}
diff --git a/tools/perf/util/stat.c b/tools/perf/util/stat.c
index c58174443dc1..53b9a994a3dc 100644
--- a/tools/perf/util/stat.c
+++ b/tools/perf/util/stat.c
@@ -86,6 +86,8 @@ static const char *id_str[PERF_STAT_EVSEL_ID__MAX] = {
ID(TOPDOWN_SLOTS_RETIRED, topdown-slots-retired),
ID(TOPDOWN_FETCH_BUBBLES, topdown-fetch-bubbles),
ID(TOPDOWN_RECOVERY_BUBBLES, topdown-recovery-bubbles),
+ ID(SMI_NUM, msr/smi/),
+ ID(APERF, msr/aperf/),
};
#undef ID
diff --git a/tools/perf/util/stat.h b/tools/perf/util/stat.h
index 0a65ae23f495..7522bf10b03e 100644
--- a/tools/perf/util/stat.h
+++ b/tools/perf/util/stat.h
@@ -22,6 +22,8 @@ enum perf_stat_evsel_id {
PERF_STAT_EVSEL_ID__TOPDOWN_SLOTS_RETIRED,
PERF_STAT_EVSEL_ID__TOPDOWN_FETCH_BUBBLES,
PERF_STAT_EVSEL_ID__TOPDOWN_RECOVERY_BUBBLES,
+ PERF_STAT_EVSEL_ID__SMI_NUM,
+ PERF_STAT_EVSEL_ID__APERF,
PERF_STAT_EVSEL_ID__MAX,
};
diff --git a/tools/perf/util/strbuf.h b/tools/perf/util/strbuf.h
index 318424ea561d..802d743378af 100644
--- a/tools/perf/util/strbuf.h
+++ b/tools/perf/util/strbuf.h
@@ -42,6 +42,7 @@
#include <stdarg.h>
#include <stddef.h>
#include <string.h>
+#include <linux/compiler.h>
#include <sys/types.h>
extern char strbuf_slopbuf[];
@@ -85,8 +86,7 @@ static inline int strbuf_addstr(struct strbuf *sb, const char *s) {
return strbuf_add(sb, s, strlen(s));
}
-__attribute__((format(printf,2,3)))
-int strbuf_addf(struct strbuf *sb, const char *fmt, ...);
+int strbuf_addf(struct strbuf *sb, const char *fmt, ...) __printf(2, 3);
/* XXX: if read fails, any partial read is undone */
ssize_t strbuf_read(struct strbuf *, int fd, ssize_t hint);
diff --git a/tools/perf/util/trace-event-parse.c b/tools/perf/util/trace-event-parse.c
index 746bbee645d9..e0a6e9a6a053 100644
--- a/tools/perf/util/trace-event-parse.c
+++ b/tools/perf/util/trace-event-parse.c
@@ -24,7 +24,7 @@
#include <errno.h>
#include "../perf.h"
-#include "util.h"
+#include "debug.h"
#include "trace-event.h"
#include "sane_ctype.h"
@@ -150,7 +150,7 @@ void parse_ftrace_printk(struct pevent *pevent,
while (line) {
addr_str = strtok_r(line, ":", &fmt);
if (!addr_str) {
- warning("printk format with empty entry");
+ pr_warning("printk format with empty entry");
break;
}
addr = strtoull(addr_str, NULL, 16);
diff --git a/tools/perf/util/usage.c b/tools/perf/util/usage.c
index 996046a66fe5..6cc9d9888ce0 100644
--- a/tools/perf/util/usage.c
+++ b/tools/perf/util/usage.c
@@ -9,75 +9,17 @@
#include "util.h"
#include "debug.h"
-static void report(const char *prefix, const char *err, va_list params)
-{
- char msg[1024];
- vsnprintf(msg, sizeof(msg), err, params);
- fprintf(stderr, " %s%s\n", prefix, msg);
-}
-
-static NORETURN void usage_builtin(const char *err)
+static __noreturn void usage_builtin(const char *err)
{
fprintf(stderr, "\n Usage: %s\n", err);
exit(129);
}
-static NORETURN void die_builtin(const char *err, va_list params)
-{
- report(" Fatal: ", err, params);
- exit(128);
-}
-
-static void error_builtin(const char *err, va_list params)
-{
- report(" Error: ", err, params);
-}
-
-static void warn_builtin(const char *warn, va_list params)
-{
- report(" Warning: ", warn, params);
-}
-
/* If we are in a dlopen()ed .so write to a global variable would segfault
* (ugh), so keep things static. */
-static void (*usage_routine)(const char *err) NORETURN = usage_builtin;
-static void (*error_routine)(const char *err, va_list params) = error_builtin;
-static void (*warn_routine)(const char *err, va_list params) = warn_builtin;
-
-void set_warning_routine(void (*routine)(const char *err, va_list params))
-{
- warn_routine = routine;
-}
+static void (*usage_routine)(const char *err) __noreturn = usage_builtin;
void usage(const char *err)
{
usage_routine(err);
}
-
-void die(const char *err, ...)
-{
- va_list params;
-
- va_start(params, err);
- die_builtin(err, params);
- va_end(params);
-}
-
-int error(const char *err, ...)
-{
- va_list params;
-
- va_start(params, err);
- error_routine(err, params);
- va_end(params);
- return -1;
-}
-
-void warning(const char *warn, ...)
-{
- va_list params;
-
- va_start(params, warn);
- warn_routine(warn, params);
- va_end(params);
-}
diff --git a/tools/perf/util/util.c b/tools/perf/util/util.c
index 28c9f335006c..988111e0bab5 100644
--- a/tools/perf/util/util.c
+++ b/tools/perf/util/util.c
@@ -343,43 +343,6 @@ int perf_event_paranoid(void)
return value;
}
-
-bool find_process(const char *name)
-{
- size_t len = strlen(name);
- DIR *dir;
- struct dirent *d;
- int ret = -1;
-
- dir = opendir(procfs__mountpoint());
- if (!dir)
- return false;
-
- /* Walk through the directory. */
- while (ret && (d = readdir(dir)) != NULL) {
- char path[PATH_MAX];
- char *data;
- size_t size;
-
- if ((d->d_type != DT_DIR) ||
- !strcmp(".", d->d_name) ||
- !strcmp("..", d->d_name))
- continue;
-
- scnprintf(path, sizeof(path), "%s/%s/comm",
- procfs__mountpoint(), d->d_name);
-
- if (filename__read_str(path, &data, &size))
- continue;
-
- ret = strncmp(name, data, len);
- free(data);
- }
-
- closedir(dir);
- return ret ? false : true;
-}
-
static int
fetch_ubuntu_kernel_version(unsigned int *puint)
{
@@ -387,8 +350,12 @@ fetch_ubuntu_kernel_version(unsigned int *puint)
size_t line_len = 0;
char *ptr, *line = NULL;
int version, patchlevel, sublevel, err;
- FILE *vsig = fopen("/proc/version_signature", "r");
+ FILE *vsig;
+
+ if (!puint)
+ return 0;
+ vsig = fopen("/proc/version_signature", "r");
if (!vsig) {
pr_debug("Open /proc/version_signature failed: %s\n",
strerror(errno));
@@ -418,8 +385,7 @@ fetch_ubuntu_kernel_version(unsigned int *puint)
goto errout;
}
- if (puint)
- *puint = (version << 16) + (patchlevel << 8) + sublevel;
+ *puint = (version << 16) + (patchlevel << 8) + sublevel;
err = 0;
errout:
free(line);
@@ -446,6 +412,9 @@ fetch_kernel_version(unsigned int *puint, char *str,
str[str_size - 1] = '\0';
}
+ if (!puint || int_ver_ready)
+ return 0;
+
err = sscanf(utsname.release, "%d.%d.%d",
&version, &patchlevel, &sublevel);
@@ -455,8 +424,7 @@ fetch_kernel_version(unsigned int *puint, char *str,
return -1;
}
- if (puint && !int_ver_ready)
- *puint = (version << 16) + (patchlevel << 8) + sublevel;
+ *puint = (version << 16) + (patchlevel << 8) + sublevel;
return 0;
}
diff --git a/tools/perf/util/util.h b/tools/perf/util/util.h
index 5dfb9bb6482d..2c9e58a45310 100644
--- a/tools/perf/util/util.h
+++ b/tools/perf/util/util.h
@@ -1,7 +1,6 @@
#ifndef GIT_COMPAT_UTIL_H
#define GIT_COMPAT_UTIL_H
-#define _ALL_SOURCE 1
#define _BSD_SOURCE 1
/* glibc 2.20 deprecates _BSD_SOURCE in favour of _DEFAULT_SOURCE */
#define _DEFAULT_SOURCE 1
@@ -11,24 +10,12 @@
#include <stddef.h>
#include <stdlib.h>
#include <stdarg.h>
+#include <linux/compiler.h>
#include <linux/types.h>
-#ifdef __GNUC__
-#define NORETURN __attribute__((__noreturn__))
-#else
-#define NORETURN
-#ifndef __attribute__
-#define __attribute__(x)
-#endif
-#endif
-
/* General helper functions */
-void usage(const char *err) NORETURN;
-void die(const char *err, ...) NORETURN __attribute__((format (printf, 1, 2)));
-int error(const char *err, ...) __attribute__((format (printf, 1, 2)));
-void warning(const char *err, ...) __attribute__((format (printf, 1, 2)));
-
-void set_warning_routine(void (*routine)(const char *err, va_list params));
+void usage(const char *err) __noreturn;
+void die(const char *err, ...) __noreturn __printf(1, 2);
static inline void *zalloc(size_t size)
{
@@ -57,8 +44,6 @@ int hex2u64(const char *ptr, u64 *val);
extern unsigned int page_size;
extern int cacheline_size;
-bool find_process(const char *name);
-
int fetch_kernel_version(unsigned int *puint,
char *str, size_t str_sz);
#define KVER_VERSION(x) (((x) >> 16) & 0xff)