aboutsummaryrefslogtreecommitdiffstats
path: root/tools/perf/util
diff options
context:
space:
mode:
Diffstat (limited to 'tools/perf/util')
-rwxr-xr-xtools/perf/util/PERF-VERSION-GEN2
-rw-r--r--tools/perf/util/annotate.c5
-rw-r--r--tools/perf/util/auxtrace.c3
-rw-r--r--tools/perf/util/auxtrace.h34
-rw-r--r--tools/perf/util/config.c8
-rw-r--r--tools/perf/util/cpumap.c64
-rw-r--r--tools/perf/util/cpumap.h10
-rw-r--r--tools/perf/util/cputopo.c84
-rw-r--r--tools/perf/util/cputopo.h2
-rw-r--r--tools/perf/util/cs-etm-decoder/cs-etm-decoder.c268
-rw-r--r--tools/perf/util/cs-etm-decoder/cs-etm-decoder.h39
-rw-r--r--tools/perf/util/cs-etm.c1026
-rw-r--r--tools/perf/util/cs-etm.h94
-rw-r--r--tools/perf/util/dso.c125
-rw-r--r--tools/perf/util/env.c1
-rw-r--r--tools/perf/util/env.h3
-rw-r--r--tools/perf/util/event.c4
-rw-r--r--tools/perf/util/event.h2
-rw-r--r--tools/perf/util/evsel.c24
-rw-r--r--tools/perf/util/header.c96
-rw-r--r--tools/perf/util/hist.c2
-rw-r--r--tools/perf/util/intel-pt-decoder/intel-pt-decoder.c443
-rw-r--r--tools/perf/util/intel-pt-decoder/intel-pt-decoder.h143
-rw-r--r--tools/perf/util/intel-pt-decoder/intel-pt-pkt-decoder.c140
-rw-r--r--tools/perf/util/intel-pt-decoder/intel-pt-pkt-decoder.h21
-rw-r--r--tools/perf/util/intel-pt.c697
-rw-r--r--tools/perf/util/machine.c8
-rw-r--r--tools/perf/util/map.c6
-rw-r--r--tools/perf/util/map_groups.h2
-rw-r--r--tools/perf/util/perf_regs.h4
-rw-r--r--tools/perf/util/pmu.c28
-rw-r--r--tools/perf/util/s390-cpumsf.c96
-rw-r--r--tools/perf/util/scripting-engines/trace-event-python.c8
-rw-r--r--tools/perf/util/setup.py2
-rw-r--r--tools/perf/util/smt.c8
-rw-r--r--tools/perf/util/stat-display.c29
-rw-r--r--tools/perf/util/stat-shadow.c1
-rw-r--r--tools/perf/util/stat.c1
-rw-r--r--tools/perf/util/stat.h1
-rw-r--r--tools/perf/util/symbol-elf.c3
-rw-r--r--tools/perf/util/symbol.c97
-rw-r--r--tools/perf/util/symbol_conf.h1
-rw-r--r--tools/perf/util/thread-stack.c14
-rw-r--r--tools/perf/util/thread-stack.h4
-rw-r--r--tools/perf/util/thread.c12
-rw-r--r--tools/perf/util/thread.h4
-rw-r--r--tools/perf/util/time-utils.c132
47 files changed, 3193 insertions, 608 deletions
diff --git a/tools/perf/util/PERF-VERSION-GEN b/tools/perf/util/PERF-VERSION-GEN
index 3802cee5e188..59241ff342be 100755
--- a/tools/perf/util/PERF-VERSION-GEN
+++ b/tools/perf/util/PERF-VERSION-GEN
@@ -19,7 +19,7 @@ TAG=
if test -d ../../.git -o -f ../../.git
then
TAG=$(git describe --abbrev=0 --match "v[0-9].[0-9]*" 2>/dev/null )
- CID=$(git log -1 --abbrev=4 --pretty=format:"%h" 2>/dev/null) && CID="-g$CID"
+ CID=$(git log -1 --abbrev=12 --pretty=format:"%h" 2>/dev/null) && CID="-g$CID"
elif test -f ../../PERF-VERSION-FILE
then
TAG=$(cut -d' ' -f3 ../../PERF-VERSION-FILE | sed -e 's/\"//g')
diff --git a/tools/perf/util/annotate.c b/tools/perf/util/annotate.c
index 79db038b56f2..c8ce13419d9b 100644
--- a/tools/perf/util/annotate.c
+++ b/tools/perf/util/annotate.c
@@ -931,9 +931,8 @@ static int symbol__inc_addr_samples(struct symbol *sym, struct map *map,
if (sym == NULL)
return 0;
src = symbol__hists(sym, evsel->evlist->nr_entries);
- if (src == NULL)
- return -ENOMEM;
- return __symbol__inc_addr_samples(sym, map, src, evsel->idx, addr, sample);
+ return (src) ? __symbol__inc_addr_samples(sym, map, src, evsel->idx,
+ addr, sample) : 0;
}
static int symbol__account_cycles(u64 addr, u64 start,
diff --git a/tools/perf/util/auxtrace.c b/tools/perf/util/auxtrace.c
index 66e82bd0683e..cfdbf65f1e02 100644
--- a/tools/perf/util/auxtrace.c
+++ b/tools/perf/util/auxtrace.c
@@ -1001,7 +1001,8 @@ int itrace_parse_synth_opts(const struct option *opt, const char *str,
}
if (!str) {
- itrace_synth_opts__set_default(synth_opts, false);
+ itrace_synth_opts__set_default(synth_opts,
+ synth_opts->default_no_sample);
return 0;
}
diff --git a/tools/perf/util/auxtrace.h b/tools/perf/util/auxtrace.h
index d62f60eb5df4..e9b4c5edf78b 100644
--- a/tools/perf/util/auxtrace.h
+++ b/tools/perf/util/auxtrace.h
@@ -74,6 +74,8 @@ enum itrace_period_type {
* @period_type: 'instructions' events period type
* @initial_skip: skip N events at the beginning.
* @cpu_bitmap: CPUs for which to synthesize events, or NULL for all
+ * @ptime_range: time intervals to trace or NULL
+ * @range_num: number of time intervals to trace
*/
struct itrace_synth_opts {
bool set;
@@ -98,6 +100,8 @@ struct itrace_synth_opts {
enum itrace_period_type period_type;
unsigned long initial_skip;
unsigned long *cpu_bitmap;
+ struct perf_time_interval *ptime_range;
+ int range_num;
};
/**
@@ -590,6 +594,21 @@ static inline void auxtrace__free(struct perf_session *session)
" PERIOD[ns|us|ms|i|t]: specify period to sample stream\n" \
" concatenate multiple options. Default is ibxwpe or cewp\n"
+static inline
+void itrace_synth_opts__set_time_range(struct itrace_synth_opts *opts,
+ struct perf_time_interval *ptime_range,
+ int range_num)
+{
+ opts->ptime_range = ptime_range;
+ opts->range_num = range_num;
+}
+
+static inline
+void itrace_synth_opts__clear_time_range(struct itrace_synth_opts *opts)
+{
+ opts->ptime_range = NULL;
+ opts->range_num = 0;
+}
#else
@@ -733,6 +752,21 @@ void auxtrace_mmap_params__set_idx(struct auxtrace_mmap_params *mp,
#define ITRACE_HELP ""
+static inline
+void itrace_synth_opts__set_time_range(struct itrace_synth_opts *opts
+ __maybe_unused,
+ struct perf_time_interval *ptime_range
+ __maybe_unused,
+ int range_num __maybe_unused)
+{
+}
+
+static inline
+void itrace_synth_opts__clear_time_range(struct itrace_synth_opts *opts
+ __maybe_unused)
+{
+}
+
#endif
#endif
diff --git a/tools/perf/util/config.c b/tools/perf/util/config.c
index 7e3c1b60120c..e7d2c08d263a 100644
--- a/tools/perf/util/config.c
+++ b/tools/perf/util/config.c
@@ -739,11 +739,15 @@ int perf_config(config_fn_t fn, void *data)
if (ret < 0) {
pr_err("Error: wrong config key-value pair %s=%s\n",
key, value);
- break;
+ /*
+ * Can't be just a 'break', as perf_config_set__for_each_entry()
+ * expands to two nested for() loops.
+ */
+ goto out;
}
}
}
-
+out:
return ret;
}
diff --git a/tools/perf/util/cpumap.c b/tools/perf/util/cpumap.c
index 0b599229bc7e..c11a459ca582 100644
--- a/tools/perf/util/cpumap.c
+++ b/tools/perf/util/cpumap.c
@@ -373,6 +373,46 @@ int cpu_map__build_map(struct cpu_map *cpus, struct cpu_map **res,
return 0;
}
+int cpu_map__get_die_id(int cpu)
+{
+ int value, ret = cpu__get_topology_int(cpu, "die_id", &value);
+
+ return ret ?: value;
+}
+
+int cpu_map__get_die(struct cpu_map *map, int idx, void *data)
+{
+ int cpu, die_id, s;
+
+ if (idx > map->nr)
+ return -1;
+
+ cpu = map->map[idx];
+
+ die_id = cpu_map__get_die_id(cpu);
+ /* There is no die_id on legacy system. */
+ if (die_id == -1)
+ die_id = 0;
+
+ s = cpu_map__get_socket(map, idx, data);
+ if (s == -1)
+ return -1;
+
+ /*
+ * Encode socket in bit range 15:8
+ * die_id is relative to socket, and
+ * we need a global id. So we combine
+ * socket + die id
+ */
+ if (WARN_ONCE(die_id >> 8, "The die id number is too big.\n"))
+ return -1;
+
+ if (WARN_ONCE(s >> 8, "The socket id number is too big.\n"))
+ return -1;
+
+ return (s << 8) | (die_id & 0xff);
+}
+
int cpu_map__get_core_id(int cpu)
{
int value, ret = cpu__get_topology_int(cpu, "core_id", &value);
@@ -381,7 +421,7 @@ int cpu_map__get_core_id(int cpu)
int cpu_map__get_core(struct cpu_map *map, int idx, void *data)
{
- int cpu, s;
+ int cpu, s_die;
if (idx > map->nr)
return -1;
@@ -390,17 +430,22 @@ int cpu_map__get_core(struct cpu_map *map, int idx, void *data)
cpu = cpu_map__get_core_id(cpu);
- s = cpu_map__get_socket(map, idx, data);
- if (s == -1)
+ /* s_die is the combination of socket + die id */
+ s_die = cpu_map__get_die(map, idx, data);
+ if (s_die == -1)
return -1;
/*
- * encode socket in upper 16 bits
- * core_id is relative to socket, and
+ * encode socket in bit range 31:24
+ * encode die id in bit range 23:16
+ * core_id is relative to socket and die,
* we need a global id. So we combine
- * socket+ core id
+ * socket + die id + core id
*/
- return (s << 16) | (cpu & 0xffff);
+ if (WARN_ONCE(cpu >> 16, "The core id number is too big.\n"))
+ return -1;
+
+ return (s_die << 16) | (cpu & 0xffff);
}
int cpu_map__build_socket_map(struct cpu_map *cpus, struct cpu_map **sockp)
@@ -408,6 +453,11 @@ int cpu_map__build_socket_map(struct cpu_map *cpus, struct cpu_map **sockp)
return cpu_map__build_map(cpus, sockp, cpu_map__get_socket, NULL);
}
+int cpu_map__build_die_map(struct cpu_map *cpus, struct cpu_map **diep)
+{
+ return cpu_map__build_map(cpus, diep, cpu_map__get_die, NULL);
+}
+
int cpu_map__build_core_map(struct cpu_map *cpus, struct cpu_map **corep)
{
return cpu_map__build_map(cpus, corep, cpu_map__get_core, NULL);
diff --git a/tools/perf/util/cpumap.h b/tools/perf/util/cpumap.h
index f00ce624b9f7..1265f0e33920 100644
--- a/tools/perf/util/cpumap.h
+++ b/tools/perf/util/cpumap.h
@@ -25,9 +25,12 @@ size_t cpu_map__snprint_mask(struct cpu_map *map, char *buf, size_t size);
size_t cpu_map__fprintf(struct cpu_map *map, FILE *fp);
int cpu_map__get_socket_id(int cpu);
int cpu_map__get_socket(struct cpu_map *map, int idx, void *data);
+int cpu_map__get_die_id(int cpu);
+int cpu_map__get_die(struct cpu_map *map, int idx, void *data);
int cpu_map__get_core_id(int cpu);
int cpu_map__get_core(struct cpu_map *map, int idx, void *data);
int cpu_map__build_socket_map(struct cpu_map *cpus, struct cpu_map **sockp);
+int cpu_map__build_die_map(struct cpu_map *cpus, struct cpu_map **diep);
int cpu_map__build_core_map(struct cpu_map *cpus, struct cpu_map **corep);
const struct cpu_map *cpu_map__online(void); /* thread unsafe */
@@ -43,7 +46,12 @@ static inline int cpu_map__socket(struct cpu_map *sock, int s)
static inline int cpu_map__id_to_socket(int id)
{
- return id >> 16;
+ return id >> 24;
+}
+
+static inline int cpu_map__id_to_die(int id)
+{
+ return (id >> 16) & 0xff;
}
static inline int cpu_map__id_to_cpu(int id)
diff --git a/tools/perf/util/cputopo.c b/tools/perf/util/cputopo.c
index ece0710249d4..26e73a4bd4fe 100644
--- a/tools/perf/util/cputopo.c
+++ b/tools/perf/util/cputopo.c
@@ -1,5 +1,6 @@
// SPDX-License-Identifier: GPL-2.0
#include <sys/param.h>
+#include <sys/utsname.h>
#include <inttypes.h>
#include <api/fs/fs.h>
@@ -8,11 +9,14 @@
#include "util.h"
#include "env.h"
-
#define CORE_SIB_FMT \
"%s/devices/system/cpu/cpu%d/topology/core_siblings_list"
+#define DIE_SIB_FMT \
+ "%s/devices/system/cpu/cpu%d/topology/die_cpus_list"
#define THRD_SIB_FMT \
"%s/devices/system/cpu/cpu%d/topology/thread_siblings_list"
+#define THRD_SIB_FMT_NEW \
+ "%s/devices/system/cpu/cpu%d/topology/core_cpus_list"
#define NODE_ONLINE_FMT \
"%s/devices/system/node/online"
#define NODE_MEMINFO_FMT \
@@ -34,12 +38,12 @@ static int build_cpu_topology(struct cpu_topology *tp, int cpu)
sysfs__mountpoint(), cpu);
fp = fopen(filename, "r");
if (!fp)
- goto try_threads;
+ goto try_dies;
sret = getline(&buf, &len, fp);
fclose(fp);
if (sret <= 0)
- goto try_threads;
+ goto try_dies;
p = strchr(buf, '\n');
if (p)
@@ -57,9 +61,44 @@ static int build_cpu_topology(struct cpu_topology *tp, int cpu)
}
ret = 0;
+try_dies:
+ if (!tp->die_siblings)
+ goto try_threads;
+
+ scnprintf(filename, MAXPATHLEN, DIE_SIB_FMT,
+ sysfs__mountpoint(), cpu);
+ fp = fopen(filename, "r");
+ if (!fp)
+ goto try_threads;
+
+ sret = getline(&buf, &len, fp);
+ fclose(fp);
+ if (sret <= 0)
+ goto try_threads;
+
+ p = strchr(buf, '\n');
+ if (p)
+ *p = '\0';
+
+ for (i = 0; i < tp->die_sib; i++) {
+ if (!strcmp(buf, tp->die_siblings[i]))
+ break;
+ }
+ if (i == tp->die_sib) {
+ tp->die_siblings[i] = buf;
+ tp->die_sib++;
+ buf = NULL;
+ len = 0;
+ }
+ ret = 0;
+
try_threads:
- scnprintf(filename, MAXPATHLEN, THRD_SIB_FMT,
+ scnprintf(filename, MAXPATHLEN, THRD_SIB_FMT_NEW,
sysfs__mountpoint(), cpu);
+ if (access(filename, F_OK) == -1) {
+ scnprintf(filename, MAXPATHLEN, THRD_SIB_FMT,
+ sysfs__mountpoint(), cpu);
+ }
fp = fopen(filename, "r");
if (!fp)
goto done;
@@ -98,21 +137,46 @@ void cpu_topology__delete(struct cpu_topology *tp)
for (i = 0 ; i < tp->core_sib; i++)
zfree(&tp->core_siblings[i]);
+ if (tp->die_sib) {
+ for (i = 0 ; i < tp->die_sib; i++)
+ zfree(&tp->die_siblings[i]);
+ }
+
for (i = 0 ; i < tp->thread_sib; i++)
zfree(&tp->thread_siblings[i]);
free(tp);
}
+static bool has_die_topology(void)
+{
+ char filename[MAXPATHLEN];
+ struct utsname uts;
+
+ if (uname(&uts) < 0)
+ return false;
+
+ if (strncmp(uts.machine, "x86_64", 6))
+ return false;
+
+ scnprintf(filename, MAXPATHLEN, DIE_SIB_FMT,
+ sysfs__mountpoint(), 0);
+ if (access(filename, F_OK) == -1)
+ return false;
+
+ return true;
+}
+
struct cpu_topology *cpu_topology__new(void)
{
struct cpu_topology *tp = NULL;
void *addr;
- u32 nr, i;
+ u32 nr, i, nr_addr;
size_t sz;
long ncpus;
int ret = -1;
struct cpu_map *map;
+ bool has_die = has_die_topology();
ncpus = cpu__max_present_cpu();
@@ -126,7 +190,11 @@ struct cpu_topology *cpu_topology__new(void)
nr = (u32)(ncpus & UINT_MAX);
sz = nr * sizeof(char *);
- addr = calloc(1, sizeof(*tp) + 2 * sz);
+ if (has_die)
+ nr_addr = 3;
+ else
+ nr_addr = 2;
+ addr = calloc(1, sizeof(*tp) + nr_addr * sz);
if (!addr)
goto out_free;
@@ -134,6 +202,10 @@ struct cpu_topology *cpu_topology__new(void)
addr += sizeof(*tp);
tp->core_siblings = addr;
addr += sz;
+ if (has_die) {
+ tp->die_siblings = addr;
+ addr += sz;
+ }
tp->thread_siblings = addr;
for (i = 0; i < nr; i++) {
diff --git a/tools/perf/util/cputopo.h b/tools/perf/util/cputopo.h
index 47a97e71acdf..bae2f1d41856 100644
--- a/tools/perf/util/cputopo.h
+++ b/tools/perf/util/cputopo.h
@@ -7,8 +7,10 @@
struct cpu_topology {
u32 core_sib;
+ u32 die_sib;
u32 thread_sib;
char **core_siblings;
+ char **die_siblings;
char **thread_siblings;
};
diff --git a/tools/perf/util/cs-etm-decoder/cs-etm-decoder.c b/tools/perf/util/cs-etm-decoder/cs-etm-decoder.c
index 39fe21e1cf93..bb45e23018ee 100644
--- a/tools/perf/util/cs-etm-decoder/cs-etm-decoder.c
+++ b/tools/perf/util/cs-etm-decoder/cs-etm-decoder.c
@@ -18,8 +18,6 @@
#include "intlist.h"
#include "util.h"
-#define MAX_BUFFER 1024
-
/* use raw logging */
#ifdef CS_DEBUG_RAW
#define CS_LOG_RAW_FRAMES
@@ -31,33 +29,26 @@
#endif
#endif
-#define CS_ETM_INVAL_ADDR 0xdeadbeefdeadbeefUL
-
struct cs_etm_decoder {
void *data;
void (*packet_printer)(const char *msg);
dcd_tree_handle_t dcd_tree;
cs_etm_mem_cb_type mem_access;
ocsd_datapath_resp_t prev_return;
- u32 packet_count;
- u32 head;
- u32 tail;
- struct cs_etm_packet packet_buffer[MAX_BUFFER];
};
static u32
cs_etm_decoder__mem_access(const void *context,
const ocsd_vaddr_t address,
const ocsd_mem_space_acc_t mem_space __maybe_unused,
+ const u8 trace_chan_id,
const u32 req_size,
u8 *buffer)
{
struct cs_etm_decoder *decoder = (struct cs_etm_decoder *) context;
- return decoder->mem_access(decoder->data,
- address,
- req_size,
- buffer);
+ return decoder->mem_access(decoder->data, trace_chan_id,
+ address, req_size, buffer);
}
int cs_etm_decoder__add_mem_access_cb(struct cs_etm_decoder *decoder,
@@ -66,9 +57,10 @@ int cs_etm_decoder__add_mem_access_cb(struct cs_etm_decoder *decoder,
{
decoder->mem_access = cb_func;
- if (ocsd_dt_add_callback_mem_acc(decoder->dcd_tree, start, end,
- OCSD_MEM_SPACE_ANY,
- cs_etm_decoder__mem_access, decoder))
+ if (ocsd_dt_add_callback_trcid_mem_acc(decoder->dcd_tree, start, end,
+ OCSD_MEM_SPACE_ANY,
+ cs_etm_decoder__mem_access,
+ decoder))
return -1;
return 0;
@@ -88,14 +80,14 @@ int cs_etm_decoder__reset(struct cs_etm_decoder *decoder)
return 0;
}
-int cs_etm_decoder__get_packet(struct cs_etm_decoder *decoder,
+int cs_etm_decoder__get_packet(struct cs_etm_packet_queue *packet_queue,
struct cs_etm_packet *packet)
{
- if (!decoder || !packet)
+ if (!packet_queue || !packet)
return -EINVAL;
/* Nothing to do, might as well just return */
- if (decoder->packet_count == 0)
+ if (packet_queue->packet_count == 0)
return 0;
/*
* The queueing process in function cs_etm_decoder__buffer_packet()
@@ -106,11 +98,12 @@ int cs_etm_decoder__get_packet(struct cs_etm_decoder *decoder,
* value. Otherwise the first element of the packet queue is not
* used.
*/
- decoder->head = (decoder->head + 1) & (MAX_BUFFER - 1);
+ packet_queue->head = (packet_queue->head + 1) &
+ (CS_ETM_PACKET_MAX_BUFFER - 1);
- *packet = decoder->packet_buffer[decoder->head];
+ *packet = packet_queue->packet_buffer[packet_queue->head];
- decoder->packet_count--;
+ packet_queue->packet_count--;
return 1;
}
@@ -276,84 +269,130 @@ cs_etm_decoder__create_etm_packet_printer(struct cs_etm_trace_params *t_params,
trace_config);
}
-static void cs_etm_decoder__clear_buffer(struct cs_etm_decoder *decoder)
+static ocsd_datapath_resp_t
+cs_etm_decoder__do_soft_timestamp(struct cs_etm_queue *etmq,
+ struct cs_etm_packet_queue *packet_queue,
+ const uint8_t trace_chan_id)
{
- int i;
-
- decoder->head = 0;
- decoder->tail = 0;
- decoder->packet_count = 0;
- for (i = 0; i < MAX_BUFFER; i++) {
- decoder->packet_buffer[i].isa = CS_ETM_ISA_UNKNOWN;
- decoder->packet_buffer[i].start_addr = CS_ETM_INVAL_ADDR;
- decoder->packet_buffer[i].end_addr = CS_ETM_INVAL_ADDR;
- decoder->packet_buffer[i].instr_count = 0;
- decoder->packet_buffer[i].last_instr_taken_branch = false;
- decoder->packet_buffer[i].last_instr_size = 0;
- decoder->packet_buffer[i].last_instr_type = 0;
- decoder->packet_buffer[i].last_instr_subtype = 0;
- decoder->packet_buffer[i].last_instr_cond = 0;
- decoder->packet_buffer[i].flags = 0;
- decoder->packet_buffer[i].exception_number = UINT32_MAX;
- decoder->packet_buffer[i].trace_chan_id = UINT8_MAX;
- decoder->packet_buffer[i].cpu = INT_MIN;
+ /* No timestamp packet has been received, nothing to do */
+ if (!packet_queue->timestamp)
+ return OCSD_RESP_CONT;
+
+ packet_queue->timestamp = packet_queue->next_timestamp;
+
+ /* Estimate the timestamp for the next range packet */
+ packet_queue->next_timestamp += packet_queue->instr_count;
+ packet_queue->instr_count = 0;
+
+ /* Tell the front end which traceid_queue needs attention */
+ cs_etm__etmq_set_traceid_queue_timestamp(etmq, trace_chan_id);
+
+ return OCSD_RESP_WAIT;
+}
+
+static ocsd_datapath_resp_t
+cs_etm_decoder__do_hard_timestamp(struct cs_etm_queue *etmq,
+ const ocsd_generic_trace_elem *elem,
+ const uint8_t trace_chan_id)
+{
+ struct cs_etm_packet_queue *packet_queue;
+
+ /* First get the packet queue for this traceID */
+ packet_queue = cs_etm__etmq_get_packet_queue(etmq, trace_chan_id);
+ if (!packet_queue)
+ return OCSD_RESP_FATAL_SYS_ERR;
+
+ /*
+ * We've seen a timestamp packet before - simply record the new value.
+ * Function do_soft_timestamp() will report the value to the front end,
+ * hence asking the decoder to keep decoding rather than stopping.
+ */
+ if (packet_queue->timestamp) {
+ packet_queue->next_timestamp = elem->timestamp;
+ return OCSD_RESP_CONT;
}
+
+ /*
+ * This is the first timestamp we've seen since the beginning of traces
+ * or a discontinuity. Since timestamps packets are generated *after*
+ * range packets have been generated, we need to estimate the time at
+ * which instructions started by substracting the number of instructions
+ * executed to the timestamp.
+ */
+ packet_queue->timestamp = elem->timestamp - packet_queue->instr_count;
+ packet_queue->next_timestamp = elem->timestamp;
+ packet_queue->instr_count = 0;
+
+ /* Tell the front end which traceid_queue needs attention */
+ cs_etm__etmq_set_traceid_queue_timestamp(etmq, trace_chan_id);
+
+ /* Halt processing until we are being told to proceed */
+ return OCSD_RESP_WAIT;
+}
+
+static void
+cs_etm_decoder__reset_timestamp(struct cs_etm_packet_queue *packet_queue)
+{
+ packet_queue->timestamp = 0;
+ packet_queue->next_timestamp = 0;
+ packet_queue->instr_count = 0;
}
static ocsd_datapath_resp_t
-cs_etm_decoder__buffer_packet(struct cs_etm_decoder *decoder,
+cs_etm_decoder__buffer_packet(struct cs_etm_packet_queue *packet_queue,
const u8 trace_chan_id,
enum cs_etm_sample_type sample_type)
{
u32 et = 0;
int cpu;
- if (decoder->packet_count >= MAX_BUFFER - 1)
+ if (packet_queue->packet_count >= CS_ETM_PACKET_MAX_BUFFER - 1)
return OCSD_RESP_FATAL_SYS_ERR;
if (cs_etm__get_cpu(trace_chan_id, &cpu) < 0)
return OCSD_RESP_FATAL_SYS_ERR;
- et = decoder->tail;
- et = (et + 1) & (MAX_BUFFER - 1);
- decoder->tail = et;
- decoder->packet_count++;
-
- decoder->packet_buffer[et].sample_type = sample_type;
- decoder->packet_buffer[et].isa = CS_ETM_ISA_UNKNOWN;
- decoder->packet_buffer[et].cpu = cpu;
- decoder->packet_buffer[et].start_addr = CS_ETM_INVAL_ADDR;
- decoder->packet_buffer[et].end_addr = CS_ETM_INVAL_ADDR;
- decoder->packet_buffer[et].instr_count = 0;
- decoder->packet_buffer[et].last_instr_taken_branch = false;
- decoder->packet_buffer[et].last_instr_size = 0;
- decoder->packet_buffer[et].last_instr_type = 0;
- decoder->packet_buffer[et].last_instr_subtype = 0;
- decoder->packet_buffer[et].last_instr_cond = 0;
- decoder->packet_buffer[et].flags = 0;
- decoder->packet_buffer[et].exception_number = UINT32_MAX;
- decoder->packet_buffer[et].trace_chan_id = trace_chan_id;
-
- if (decoder->packet_count == MAX_BUFFER - 1)
+ et = packet_queue->tail;
+ et = (et + 1) & (CS_ETM_PACKET_MAX_BUFFER - 1);
+ packet_queue->tail = et;
+ packet_queue->packet_count++;
+
+ packet_queue->packet_buffer[et].sample_type = sample_type;
+ packet_queue->packet_buffer[et].isa = CS_ETM_ISA_UNKNOWN;
+ packet_queue->packet_buffer[et].cpu = cpu;
+ packet_queue->packet_buffer[et].start_addr = CS_ETM_INVAL_ADDR;
+ packet_queue->packet_buffer[et].end_addr = CS_ETM_INVAL_ADDR;
+ packet_queue->packet_buffer[et].instr_count = 0;
+ packet_queue->packet_buffer[et].last_instr_taken_branch = false;
+ packet_queue->packet_buffer[et].last_instr_size = 0;
+ packet_queue->packet_buffer[et].last_instr_type = 0;
+ packet_queue->packet_buffer[et].last_instr_subtype = 0;
+ packet_queue->packet_buffer[et].last_instr_cond = 0;
+ packet_queue->packet_buffer[et].flags = 0;
+ packet_queue->packet_buffer[et].exception_number = UINT32_MAX;
+ packet_queue->packet_buffer[et].trace_chan_id = trace_chan_id;
+
+ if (packet_queue->packet_count == CS_ETM_PACKET_MAX_BUFFER - 1)
return OCSD_RESP_WAIT;
return OCSD_RESP_CONT;
}
static ocsd_datapath_resp_t
-cs_etm_decoder__buffer_range(struct cs_etm_decoder *decoder,
+cs_etm_decoder__buffer_range(struct cs_etm_queue *etmq,
+ struct cs_etm_packet_queue *packet_queue,
const ocsd_generic_trace_elem *elem,
const uint8_t trace_chan_id)
{
int ret = 0;
struct cs_etm_packet *packet;
- ret = cs_etm_decoder__buffer_packet(decoder, trace_chan_id,
+ ret = cs_etm_decoder__buffer_packet(packet_queue, trace_chan_id,
CS_ETM_RANGE);
if (ret != OCSD_RESP_CONT && ret != OCSD_RESP_WAIT)
return ret;
- packet = &decoder->packet_buffer[decoder->tail];
+ packet = &packet_queue->packet_buffer[packet_queue->tail];
switch (elem->isa) {
case ocsd_isa_aarch64:
@@ -396,43 +435,90 @@ cs_etm_decoder__buffer_range(struct cs_etm_decoder *decoder,
packet->last_instr_size = elem->last_instr_sz;
+ /* per-thread scenario, no need to generate a timestamp */
+ if (cs_etm__etmq_is_timeless(etmq))
+ goto out;
+
+ /*
+ * The packet queue is full and we haven't seen a timestamp (had we
+ * seen one the packet queue wouldn't be full). Let the front end
+ * deal with it.
+ */
+ if (ret == OCSD_RESP_WAIT)
+ goto out;
+
+ packet_queue->instr_count += elem->num_instr_range;
+ /* Tell the front end we have a new timestamp to process */
+ ret = cs_etm_decoder__do_soft_timestamp(etmq, packet_queue,
+ trace_chan_id);
+out:
return ret;
}
static ocsd_datapath_resp_t
-cs_etm_decoder__buffer_discontinuity(struct cs_etm_decoder *decoder,
- const uint8_t trace_chan_id)
+cs_etm_decoder__buffer_discontinuity(struct cs_etm_packet_queue *queue,
+ const uint8_t trace_chan_id)
{
- return cs_etm_decoder__buffer_packet(decoder, trace_chan_id,
+ /*
+ * Something happened and who knows when we'll get new traces so
+ * reset time statistics.
+ */
+ cs_etm_decoder__reset_timestamp(queue);
+ return cs_etm_decoder__buffer_packet(queue, trace_chan_id,
CS_ETM_DISCONTINUITY);
}
static ocsd_datapath_resp_t
-cs_etm_decoder__buffer_exception(struct cs_etm_decoder *decoder,
+cs_etm_decoder__buffer_exception(struct cs_etm_packet_queue *queue,
const ocsd_generic_trace_elem *elem,
const uint8_t trace_chan_id)
{ int ret = 0;
struct cs_etm_packet *packet;
- ret = cs_etm_decoder__buffer_packet(decoder, trace_chan_id,
+ ret = cs_etm_decoder__buffer_packet(queue, trace_chan_id,
CS_ETM_EXCEPTION);
if (ret != OCSD_RESP_CONT && ret != OCSD_RESP_WAIT)
return ret;
- packet = &decoder->packet_buffer[decoder->tail];
+ packet = &queue->packet_buffer[queue->tail];
packet->exception_number = elem->exception_number;
return ret;
}
static ocsd_datapath_resp_t
-cs_etm_decoder__buffer_exception_ret(struct cs_etm_decoder *decoder,
+cs_etm_decoder__buffer_exception_ret(struct cs_etm_packet_queue *queue,
const uint8_t trace_chan_id)
{
- return cs_etm_decoder__buffer_packet(decoder, trace_chan_id,
+ return cs_etm_decoder__buffer_packet(queue, trace_chan_id,
CS_ETM_EXCEPTION_RET);
}
+static ocsd_datapath_resp_t
+cs_etm_decoder__set_tid(struct cs_etm_queue *etmq,
+ struct cs_etm_packet_queue *packet_queue,
+ const ocsd_generic_trace_elem *elem,
+ const uint8_t trace_chan_id)
+{
+ pid_t tid;
+
+ /* Ignore PE_CONTEXT packets that don't have a valid contextID */
+ if (!elem->context.ctxt_id_valid)
+ return OCSD_RESP_CONT;
+
+ tid = elem->context.context_id;
+ if (cs_etm__etmq_set_tid(etmq, tid, trace_chan_id))
+ return OCSD_RESP_FATAL_SYS_ERR;
+
+ /*
+ * A timestamp is generated after a PE_CONTEXT element so make sure
+ * to rely on that coming one.
+ */
+ cs_etm_decoder__reset_timestamp(packet_queue);
+
+ return OCSD_RESP_CONT;
+}
+
static ocsd_datapath_resp_t cs_etm_decoder__gen_trace_elem_printer(
const void *context,
const ocsd_trc_index_t indx __maybe_unused,
@@ -441,6 +527,13 @@ static ocsd_datapath_resp_t cs_etm_decoder__gen_trace_elem_printer(
{
ocsd_datapath_resp_t resp = OCSD_RESP_CONT;
struct cs_etm_decoder *decoder = (struct cs_etm_decoder *) context;
+ struct cs_etm_queue *etmq = decoder->data;
+ struct cs_etm_packet_queue *packet_queue;
+
+ /* First get the packet queue for this traceID */
+ packet_queue = cs_etm__etmq_get_packet_queue(etmq, trace_chan_id);
+ if (!packet_queue)
+ return OCSD_RESP_FATAL_SYS_ERR;
switch (elem->elem_type) {
case OCSD_GEN_TRC_ELEM_UNKNOWN:
@@ -448,24 +541,30 @@ static ocsd_datapath_resp_t cs_etm_decoder__gen_trace_elem_printer(
case OCSD_GEN_TRC_ELEM_EO_TRACE:
case OCSD_GEN_TRC_ELEM_NO_SYNC:
case OCSD_GEN_TRC_ELEM_TRACE_ON:
- resp = cs_etm_decoder__buffer_discontinuity(decoder,
+ resp = cs_etm_decoder__buffer_discontinuity(packet_queue,
trace_chan_id);
break;
case OCSD_GEN_TRC_ELEM_INSTR_RANGE:
- resp = cs_etm_decoder__buffer_range(decoder, elem,
+ resp = cs_etm_decoder__buffer_range(etmq, packet_queue, elem,
trace_chan_id);
break;
case OCSD_GEN_TRC_ELEM_EXCEPTION:
- resp = cs_etm_decoder__buffer_exception(decoder, elem,
+ resp = cs_etm_decoder__buffer_exception(packet_queue, elem,
trace_chan_id);
break;
case OCSD_GEN_TRC_ELEM_EXCEPTION_RET:
- resp = cs_etm_decoder__buffer_exception_ret(decoder,
+ resp = cs_etm_decoder__buffer_exception_ret(packet_queue,
trace_chan_id);
break;
+ case OCSD_GEN_TRC_ELEM_TIMESTAMP:
+ resp = cs_etm_decoder__do_hard_timestamp(etmq, elem,
+ trace_chan_id);
+ break;
case OCSD_GEN_TRC_ELEM_PE_CONTEXT:
+ resp = cs_etm_decoder__set_tid(etmq, packet_queue,
+ elem, trace_chan_id);
+ break;
case OCSD_GEN_TRC_ELEM_ADDR_NACC:
- case OCSD_GEN_TRC_ELEM_TIMESTAMP:
case OCSD_GEN_TRC_ELEM_CYCLE_COUNT:
case OCSD_GEN_TRC_ELEM_ADDR_UNKNOWN:
case OCSD_GEN_TRC_ELEM_EVENT:
@@ -554,7 +653,6 @@ cs_etm_decoder__new(int num_cpu, struct cs_etm_decoder_params *d_params,
decoder->data = d_params->data;
decoder->prev_return = OCSD_RESP_CONT;
- cs_etm_decoder__clear_buffer(decoder);
format = (d_params->formatted ? OCSD_TRC_SRC_FRAME_FORMATTED :
OCSD_TRC_SRC_SINGLE);
flags = 0;
@@ -577,7 +675,7 @@ cs_etm_decoder__new(int num_cpu, struct cs_etm_decoder_params *d_params,
/* init library print logging support */
ret = cs_etm_decoder__init_def_logger_printing(d_params, decoder);
if (ret != 0)
- goto err_free_decoder_tree;
+ goto err_free_decoder;
/* init raw frame logging if required */
cs_etm_decoder__init_raw_frame_logging(d_params, decoder);
@@ -587,15 +685,13 @@ cs_etm_decoder__new(int num_cpu, struct cs_etm_decoder_params *d_params,
&t_params[i],
decoder);
if (ret != 0)
- goto err_free_decoder_tree;
+ goto err_free_decoder;
}
return decoder;
-err_free_decoder_tree:
- ocsd_destroy_dcd_tree(decoder->dcd_tree);
err_free_decoder:
- free(decoder);
+ cs_etm_decoder__free(decoder);
return NULL;
}
diff --git a/tools/perf/util/cs-etm-decoder/cs-etm-decoder.h b/tools/perf/util/cs-etm-decoder/cs-etm-decoder.h
index 3ab11dfa92ae..11f3391d06f2 100644
--- a/tools/perf/util/cs-etm-decoder/cs-etm-decoder.h
+++ b/tools/perf/util/cs-etm-decoder/cs-etm-decoder.h
@@ -14,43 +14,12 @@
#include <stdio.h>
struct cs_etm_decoder;
-
-enum cs_etm_sample_type {
- CS_ETM_EMPTY,
- CS_ETM_RANGE,
- CS_ETM_DISCONTINUITY,
- CS_ETM_EXCEPTION,
- CS_ETM_EXCEPTION_RET,
-};
-
-enum cs_etm_isa {
- CS_ETM_ISA_UNKNOWN,
- CS_ETM_ISA_A64,
- CS_ETM_ISA_A32,
- CS_ETM_ISA_T32,
-};
-
-struct cs_etm_packet {
- enum cs_etm_sample_type sample_type;
- enum cs_etm_isa isa;
- u64 start_addr;
- u64 end_addr;
- u32 instr_count;
- u32 last_instr_type;
- u32 last_instr_subtype;
- u32 flags;
- u32 exception_number;
- u8 last_instr_cond;
- u8 last_instr_taken_branch;
- u8 last_instr_size;
- u8 trace_chan_id;
- int cpu;
-};
+struct cs_etm_packet;
+struct cs_etm_packet_queue;
struct cs_etm_queue;
-typedef u32 (*cs_etm_mem_cb_type)(struct cs_etm_queue *, u64,
- size_t, u8 *);
+typedef u32 (*cs_etm_mem_cb_type)(struct cs_etm_queue *, u8, u64, size_t, u8 *);
struct cs_etmv3_trace_params {
u32 reg_ctrl;
@@ -119,7 +88,7 @@ int cs_etm_decoder__add_mem_access_cb(struct cs_etm_decoder *decoder,
u64 start, u64 end,
cs_etm_mem_cb_type cb_func);
-int cs_etm_decoder__get_packet(struct cs_etm_decoder *decoder,
+int cs_etm_decoder__get_packet(struct cs_etm_packet_queue *packet_queue,
struct cs_etm_packet *packet);
int cs_etm_decoder__reset(struct cs_etm_decoder *decoder);
diff --git a/tools/perf/util/cs-etm.c b/tools/perf/util/cs-etm.c
index de488b43f440..0c7776b51045 100644
--- a/tools/perf/util/cs-etm.c
+++ b/tools/perf/util/cs-etm.c
@@ -29,6 +29,7 @@
#include "thread.h"
#include "thread_map.h"
#include "thread-stack.h"
+#include <tools/libc_compat.h>
#include "util.h"
#define MAX_TIMESTAMP (~0ULL)
@@ -60,33 +61,55 @@ struct cs_etm_auxtrace {
unsigned int pmu_type;
};
-struct cs_etm_queue {
- struct cs_etm_auxtrace *etm;
- struct thread *thread;
- struct cs_etm_decoder *decoder;
- struct auxtrace_buffer *buffer;
- union perf_event *event_buf;
- unsigned int queue_nr;
+struct cs_etm_traceid_queue {
+ u8 trace_chan_id;
pid_t pid, tid;
- int cpu;
- u64 offset;
u64 period_instructions;
+ size_t last_branch_pos;
+ union perf_event *event_buf;
+ struct thread *thread;
struct branch_stack *last_branch;
struct branch_stack *last_branch_rb;
- size_t last_branch_pos;
struct cs_etm_packet *prev_packet;
struct cs_etm_packet *packet;
+ struct cs_etm_packet_queue packet_queue;
+};
+
+struct cs_etm_queue {
+ struct cs_etm_auxtrace *etm;
+ struct cs_etm_decoder *decoder;
+ struct auxtrace_buffer *buffer;
+ unsigned int queue_nr;
+ u8 pending_timestamp;
+ u64 offset;
const unsigned char *buf;
size_t buf_len, buf_used;
+ /* Conversion between traceID and index in traceid_queues array */
+ struct intlist *traceid_queues_list;
+ struct cs_etm_traceid_queue **traceid_queues;
};
static int cs_etm__update_queues(struct cs_etm_auxtrace *etm);
+static int cs_etm__process_queues(struct cs_etm_auxtrace *etm);
static int cs_etm__process_timeless_queues(struct cs_etm_auxtrace *etm,
pid_t tid);
+static int cs_etm__get_data_block(struct cs_etm_queue *etmq);
+static int cs_etm__decode_data_block(struct cs_etm_queue *etmq);
/* PTMs ETMIDR [11:8] set to b0011 */
#define ETMIDR_PTM_VERSION 0x00000300
+/*
+ * A struct auxtrace_heap_item only has a queue_nr and a timestamp to
+ * work with. One option is to modify to auxtrace_heap_XYZ() API or simply
+ * encode the etm queue number as the upper 16 bit and the channel as
+ * the lower 16 bit.
+ */
+#define TO_CS_QUEUE_NR(queue_nr, trace_id_chan) \
+ (queue_nr << 16 | trace_chan_id)
+#define TO_QUEUE_NR(cs_queue_nr) (cs_queue_nr >> 16)
+#define TO_TRACE_CHAN_ID(cs_queue_nr) (cs_queue_nr & 0x0000ffff)
+
static u32 cs_etm__get_v7_protocol_version(u32 etmidr)
{
etmidr &= ETMIDR_PTM_VERSION;
@@ -125,6 +148,216 @@ int cs_etm__get_cpu(u8 trace_chan_id, int *cpu)
return 0;
}
+void cs_etm__etmq_set_traceid_queue_timestamp(struct cs_etm_queue *etmq,
+ u8 trace_chan_id)
+{
+ /*
+ * Wnen a timestamp packet is encountered the backend code
+ * is stopped so that the front end has time to process packets
+ * that were accumulated in the traceID queue. Since there can
+ * be more than one channel per cs_etm_queue, we need to specify
+ * what traceID queue needs servicing.
+ */
+ etmq->pending_timestamp = trace_chan_id;
+}
+
+static u64 cs_etm__etmq_get_timestamp(struct cs_etm_queue *etmq,
+ u8 *trace_chan_id)
+{
+ struct cs_etm_packet_queue *packet_queue;
+
+ if (!etmq->pending_timestamp)
+ return 0;
+
+ if (trace_chan_id)
+ *trace_chan_id = etmq->pending_timestamp;
+
+ packet_queue = cs_etm__etmq_get_packet_queue(etmq,
+ etmq->pending_timestamp);
+ if (!packet_queue)
+ return 0;
+
+ /* Acknowledge pending status */
+ etmq->pending_timestamp = 0;
+
+ /* See function cs_etm_decoder__do_{hard|soft}_timestamp() */
+ return packet_queue->timestamp;
+}
+
+static void cs_etm__clear_packet_queue(struct cs_etm_packet_queue *queue)
+{
+ int i;
+
+ queue->head = 0;
+ queue->tail = 0;
+ queue->packet_count = 0;
+ for (i = 0; i < CS_ETM_PACKET_MAX_BUFFER; i++) {
+ queue->packet_buffer[i].isa = CS_ETM_ISA_UNKNOWN;
+ queue->packet_buffer[i].start_addr = CS_ETM_INVAL_ADDR;
+ queue->packet_buffer[i].end_addr = CS_ETM_INVAL_ADDR;
+ queue->packet_buffer[i].instr_count = 0;
+ queue->packet_buffer[i].last_instr_taken_branch = false;
+ queue->packet_buffer[i].last_instr_size = 0;
+ queue->packet_buffer[i].last_instr_type = 0;
+ queue->packet_buffer[i].last_instr_subtype = 0;
+ queue->packet_buffer[i].last_instr_cond = 0;
+ queue->packet_buffer[i].flags = 0;
+ queue->packet_buffer[i].exception_number = UINT32_MAX;
+ queue->packet_buffer[i].trace_chan_id = UINT8_MAX;
+ queue->packet_buffer[i].cpu = INT_MIN;
+ }
+}
+
+static void cs_etm__clear_all_packet_queues(struct cs_etm_queue *etmq)
+{
+ int idx;
+ struct int_node *inode;
+ struct cs_etm_traceid_queue *tidq;
+ struct intlist *traceid_queues_list = etmq->traceid_queues_list;
+
+ intlist__for_each_entry(inode, traceid_queues_list) {
+ idx = (int)(intptr_t)inode->priv;
+ tidq = etmq->traceid_queues[idx];
+ cs_etm__clear_packet_queue(&tidq->packet_queue);
+ }
+}
+
+static int cs_etm__init_traceid_queue(struct cs_etm_queue *etmq,
+ struct cs_etm_traceid_queue *tidq,
+ u8 trace_chan_id)
+{
+ int rc = -ENOMEM;
+ struct auxtrace_queue *queue;
+ struct cs_etm_auxtrace *etm = etmq->etm;
+
+ cs_etm__clear_packet_queue(&tidq->packet_queue);
+
+ queue = &etmq->etm->queues.queue_array[etmq->queue_nr];
+ tidq->tid = queue->tid;
+ tidq->pid = -1;
+ tidq->trace_chan_id = trace_chan_id;
+
+ tidq->packet = zalloc(sizeof(struct cs_etm_packet));
+ if (!tidq->packet)
+ goto out;
+
+ tidq->prev_packet = zalloc(sizeof(struct cs_etm_packet));
+ if (!tidq->prev_packet)
+ goto out_free;
+
+ if (etm->synth_opts.last_branch) {
+ size_t sz = sizeof(struct branch_stack);
+
+ sz += etm->synth_opts.last_branch_sz *
+ sizeof(struct branch_entry);
+ tidq->last_branch = zalloc(sz);
+ if (!tidq->last_branch)
+ goto out_free;
+ tidq->last_branch_rb = zalloc(sz);
+ if (!tidq->last_branch_rb)
+ goto out_free;
+ }
+
+ tidq->event_buf = malloc(PERF_SAMPLE_MAX_SIZE);
+ if (!tidq->event_buf)
+ goto out_free;
+
+ return 0;
+
+out_free:
+ zfree(&tidq->last_branch_rb);
+ zfree(&tidq->last_branch);
+ zfree(&tidq->prev_packet);
+ zfree(&tidq->packet);
+out:
+ return rc;
+}
+
+static struct cs_etm_traceid_queue
+*cs_etm__etmq_get_traceid_queue(struct cs_etm_queue *etmq, u8 trace_chan_id)
+{
+ int idx;
+ struct int_node *inode;
+ struct intlist *traceid_queues_list;
+ struct cs_etm_traceid_queue *tidq, **traceid_queues;
+ struct cs_etm_auxtrace *etm = etmq->etm;
+
+ if (etm->timeless_decoding)
+ trace_chan_id = CS_ETM_PER_THREAD_TRACEID;
+
+ traceid_queues_list = etmq->traceid_queues_list;
+
+ /*
+ * Check if the traceid_queue exist for this traceID by looking
+ * in the queue list.
+ */
+ inode = intlist__find(traceid_queues_list, trace_chan_id);
+ if (inode) {
+ idx = (int)(intptr_t)inode->priv;
+ return etmq->traceid_queues[idx];
+ }
+
+ /* We couldn't find a traceid_queue for this traceID, allocate one */
+ tidq = malloc(sizeof(*tidq));
+ if (!tidq)
+ return NULL;
+
+ memset(tidq, 0, sizeof(*tidq));
+
+ /* Get a valid index for the new traceid_queue */
+ idx = intlist__nr_entries(traceid_queues_list);
+ /* Memory for the inode is free'ed in cs_etm_free_traceid_queues () */
+ inode = intlist__findnew(traceid_queues_list, trace_chan_id);
+ if (!inode)
+ goto out_free;
+
+ /* Associate this traceID with this index */
+ inode->priv = (void *)(intptr_t)idx;
+
+ if (cs_etm__init_traceid_queue(etmq, tidq, trace_chan_id))
+ goto out_free;
+
+ /* Grow the traceid_queues array by one unit */
+ traceid_queues = etmq->traceid_queues;
+ traceid_queues = reallocarray(traceid_queues,
+ idx + 1,
+ sizeof(*traceid_queues));
+
+ /*
+ * On failure reallocarray() returns NULL and the original block of
+ * memory is left untouched.
+ */
+ if (!traceid_queues)
+ goto out_free;
+
+ traceid_queues[idx] = tidq;
+ etmq->traceid_queues = traceid_queues;
+
+ return etmq->traceid_queues[idx];
+
+out_free:
+ /*
+ * Function intlist__remove() removes the inode from the list
+ * and delete the memory associated to it.
+ */
+ intlist__remove(traceid_queues_list, inode);
+ free(tidq);
+
+ return NULL;
+}
+
+struct cs_etm_packet_queue
+*cs_etm__etmq_get_packet_queue(struct cs_etm_queue *etmq, u8 trace_chan_id)
+{
+ struct cs_etm_traceid_queue *tidq;
+
+ tidq = cs_etm__etmq_get_traceid_queue(etmq, trace_chan_id);
+ if (tidq)
+ return &tidq->packet_queue;
+
+ return NULL;
+}
+
static void cs_etm__packet_dump(const char *pkt_string)
{
const char *color = PERF_COLOR_BLUE;
@@ -276,15 +509,53 @@ static int cs_etm__flush_events(struct perf_session *session,
if (!tool->ordered_events)
return -EINVAL;
- if (!etm->timeless_decoding)
- return -EINVAL;
-
ret = cs_etm__update_queues(etm);
if (ret < 0)
return ret;
- return cs_etm__process_timeless_queues(etm, -1);
+ if (etm->timeless_decoding)
+ return cs_etm__process_timeless_queues(etm, -1);
+
+ return cs_etm__process_queues(etm);
+}
+
+static void cs_etm__free_traceid_queues(struct cs_etm_queue *etmq)
+{
+ int idx;
+ uintptr_t priv;
+ struct int_node *inode, *tmp;
+ struct cs_etm_traceid_queue *tidq;
+ struct intlist *traceid_queues_list = etmq->traceid_queues_list;
+
+ intlist__for_each_entry_safe(inode, tmp, traceid_queues_list) {
+ priv = (uintptr_t)inode->priv;
+ idx = priv;
+
+ /* Free this traceid_queue from the array */
+ tidq = etmq->traceid_queues[idx];
+ thread__zput(tidq->thread);
+ zfree(&tidq->event_buf);
+ zfree(&tidq->last_branch);
+ zfree(&tidq->last_branch_rb);
+ zfree(&tidq->prev_packet);
+ zfree(&tidq->packet);
+ zfree(&tidq);
+
+ /*
+ * Function intlist__remove() removes the inode from the list
+ * and delete the memory associated to it.
+ */
+ intlist__remove(traceid_queues_list, inode);
+ }
+
+ /* Then the RB tree itself */
+ intlist__delete(traceid_queues_list);
+ etmq->traceid_queues_list = NULL;
+
+ /* finally free the traceid_queues array */
+ free(etmq->traceid_queues);
+ etmq->traceid_queues = NULL;
}
static void cs_etm__free_queue(void *priv)
@@ -294,13 +565,8 @@ static void cs_etm__free_queue(void *priv)
if (!etmq)
return;
- thread__zput(etmq->thread);
cs_etm_decoder__free(etmq->decoder);
- zfree(&etmq->event_buf);
- zfree(&etmq->last_branch);
- zfree(&etmq->last_branch_rb);
- zfree(&etmq->prev_packet);
- zfree(&etmq->packet);
+ cs_etm__free_traceid_queues(etmq);
free(etmq);
}
@@ -365,23 +631,27 @@ static u8 cs_etm__cpu_mode(struct cs_etm_queue *etmq, u64 address)
}
}
-static u32 cs_etm__mem_access(struct cs_etm_queue *etmq, u64 address,
- size_t size, u8 *buffer)
+static u32 cs_etm__mem_access(struct cs_etm_queue *etmq, u8 trace_chan_id,
+ u64 address, size_t size, u8 *buffer)
{
u8 cpumode;
u64 offset;
int len;
- struct thread *thread;
- struct machine *machine;
- struct addr_location al;
+ struct thread *thread;
+ struct machine *machine;
+ struct addr_location al;
+ struct cs_etm_traceid_queue *tidq;
if (!etmq)
return 0;
machine = etmq->etm->machine;
cpumode = cs_etm__cpu_mode(etmq, address);
+ tidq = cs_etm__etmq_get_traceid_queue(etmq, trace_chan_id);
+ if (!tidq)
+ return 0;
- thread = etmq->thread;
+ thread = tidq->thread;
if (!thread) {
if (cpumode != PERF_RECORD_MISC_KERNEL)
return 0;
@@ -412,35 +682,13 @@ static struct cs_etm_queue *cs_etm__alloc_queue(struct cs_etm_auxtrace *etm)
struct cs_etm_decoder_params d_params;
struct cs_etm_trace_params *t_params = NULL;
struct cs_etm_queue *etmq;
- size_t szp = sizeof(struct cs_etm_packet);
etmq = zalloc(sizeof(*etmq));
if (!etmq)
return NULL;
- etmq->packet = zalloc(szp);
- if (!etmq->packet)
- goto out_free;
-
- etmq->prev_packet = zalloc(szp);
- if (!etmq->prev_packet)
- goto out_free;
-
- if (etm->synth_opts.last_branch) {
- size_t sz = sizeof(struct branch_stack);
-
- sz += etm->synth_opts.last_branch_sz *
- sizeof(struct branch_entry);
- etmq->last_branch = zalloc(sz);
- if (!etmq->last_branch)
- goto out_free;
- etmq->last_branch_rb = zalloc(sz);
- if (!etmq->last_branch_rb)
- goto out_free;
- }
-
- etmq->event_buf = malloc(PERF_SAMPLE_MAX_SIZE);
- if (!etmq->event_buf)
+ etmq->traceid_queues_list = intlist__new(NULL);
+ if (!etmq->traceid_queues_list)
goto out_free;
/* Use metadata to fill in trace parameters for trace decoder */
@@ -477,12 +725,7 @@ static struct cs_etm_queue *cs_etm__alloc_queue(struct cs_etm_auxtrace *etm)
out_free_decoder:
cs_etm_decoder__free(etmq->decoder);
out_free:
- zfree(&t_params);
- zfree(&etmq->event_buf);
- zfree(&etmq->last_branch);
- zfree(&etmq->last_branch_rb);
- zfree(&etmq->prev_packet);
- zfree(&etmq->packet);
+ intlist__delete(etmq->traceid_queues_list);
free(etmq);
return NULL;
@@ -493,6 +736,9 @@ static int cs_etm__setup_queue(struct cs_etm_auxtrace *etm,
unsigned int queue_nr)
{
int ret = 0;
+ unsigned int cs_queue_nr;
+ u8 trace_chan_id;
+ u64 timestamp;
struct cs_etm_queue *etmq = queue->priv;
if (list_empty(&queue->head) || etmq)
@@ -508,12 +754,69 @@ static int cs_etm__setup_queue(struct cs_etm_auxtrace *etm,
queue->priv = etmq;
etmq->etm = etm;
etmq->queue_nr = queue_nr;
- etmq->cpu = queue->cpu;
- etmq->tid = queue->tid;
- etmq->pid = -1;
etmq->offset = 0;
- etmq->period_instructions = 0;
+ if (etm->timeless_decoding)
+ goto out;
+
+ /*
+ * We are under a CPU-wide trace scenario. As such we need to know
+ * when the code that generated the traces started to execute so that
+ * it can be correlated with execution on other CPUs. So we get a
+ * handle on the beginning of traces and decode until we find a
+ * timestamp. The timestamp is then added to the auxtrace min heap
+ * in order to know what nibble (of all the etmqs) to decode first.
+ */
+ while (1) {
+ /*
+ * Fetch an aux_buffer from this etmq. Bail if no more
+ * blocks or an error has been encountered.
+ */
+ ret = cs_etm__get_data_block(etmq);
+ if (ret <= 0)
+ goto out;
+
+ /*
+ * Run decoder on the trace block. The decoder will stop when
+ * encountering a timestamp, a full packet queue or the end of
+ * trace for that block.
+ */
+ ret = cs_etm__decode_data_block(etmq);
+ if (ret)
+ goto out;
+
+ /*
+ * Function cs_etm_decoder__do_{hard|soft}_timestamp() does all
+ * the timestamp calculation for us.
+ */
+ timestamp = cs_etm__etmq_get_timestamp(etmq, &trace_chan_id);
+
+ /* We found a timestamp, no need to continue. */
+ if (timestamp)
+ break;
+
+ /*
+ * We didn't find a timestamp so empty all the traceid packet
+ * queues before looking for another timestamp packet, either
+ * in the current data block or a new one. Packets that were
+ * just decoded are useless since no timestamp has been
+ * associated with them. As such simply discard them.
+ */
+ cs_etm__clear_all_packet_queues(etmq);
+ }
+
+ /*
+ * We have a timestamp. Add it to the min heap to reflect when
+ * instructions conveyed by the range packets of this traceID queue
+ * started to execute. Once the same has been done for all the traceID
+ * queues of each etmq, redenring and decoding can start in
+ * chronological order.
+ *
+ * Note that packets decoded above are still in the traceID's packet
+ * queue and will be processed in cs_etm__process_queues().
+ */
+ cs_queue_nr = TO_CS_QUEUE_NR(queue_nr, trace_id_chan);
+ ret = auxtrace_heap__add(&etm->heap, cs_queue_nr, timestamp);
out:
return ret;
}
@@ -545,10 +848,12 @@ static int cs_etm__update_queues(struct cs_etm_auxtrace *etm)
return 0;
}
-static inline void cs_etm__copy_last_branch_rb(struct cs_etm_queue *etmq)
+static inline
+void cs_etm__copy_last_branch_rb(struct cs_etm_queue *etmq,
+ struct cs_etm_traceid_queue *tidq)
{
- struct branch_stack *bs_src = etmq->last_branch_rb;
- struct branch_stack *bs_dst = etmq->last_branch;
+ struct branch_stack *bs_src = tidq->last_branch_rb;
+ struct branch_stack *bs_dst = tidq->last_branch;
size_t nr = 0;
/*
@@ -568,9 +873,9 @@ static inline void cs_etm__copy_last_branch_rb(struct cs_etm_queue *etmq)
* two steps. First, copy the branches from the most recently inserted
* branch ->last_branch_pos until the end of bs_src->entries buffer.
*/
- nr = etmq->etm->synth_opts.last_branch_sz - etmq->last_branch_pos;
+ nr = etmq->etm->synth_opts.last_branch_sz - tidq->last_branch_pos;
memcpy(&bs_dst->entries[0],
- &bs_src->entries[etmq->last_branch_pos],
+ &bs_src->entries[tidq->last_branch_pos],
sizeof(struct branch_entry) * nr);
/*
@@ -583,21 +888,24 @@ static inline void cs_etm__copy_last_branch_rb(struct cs_etm_queue *etmq)
if (bs_src->nr >= etmq->etm->synth_opts.last_branch_sz) {
memcpy(&bs_dst->entries[nr],
&bs_src->entries[0],
- sizeof(struct branch_entry) * etmq->last_branch_pos);
+ sizeof(struct branch_entry) * tidq->last_branch_pos);
}
}
-static inline void cs_etm__reset_last_branch_rb(struct cs_etm_queue *etmq)
+static inline
+void cs_etm__reset_last_branch_rb(struct cs_etm_traceid_queue *tidq)
{
- etmq->last_branch_pos = 0;
- etmq->last_branch_rb->nr = 0;
+ tidq->last_branch_pos = 0;
+ tidq->last_branch_rb->nr = 0;
}
static inline int cs_etm__t32_instr_size(struct cs_etm_queue *etmq,
- u64 addr) {
+ u8 trace_chan_id, u64 addr)
+{
u8 instrBytes[2];
- cs_etm__mem_access(etmq, addr, ARRAY_SIZE(instrBytes), instrBytes);
+ cs_etm__mem_access(etmq, trace_chan_id, addr,
+ ARRAY_SIZE(instrBytes), instrBytes);
/*
* T32 instruction size is indicated by bits[15:11] of the first
* 16-bit word of the instruction: 0b11101, 0b11110 and 0b11111
@@ -626,6 +934,7 @@ u64 cs_etm__last_executed_instr(const struct cs_etm_packet *packet)
}
static inline u64 cs_etm__instr_addr(struct cs_etm_queue *etmq,
+ u64 trace_chan_id,
const struct cs_etm_packet *packet,
u64 offset)
{
@@ -633,7 +942,8 @@ static inline u64 cs_etm__instr_addr(struct cs_etm_queue *etmq,
u64 addr = packet->start_addr;
while (offset > 0) {
- addr += cs_etm__t32_instr_size(etmq, addr);
+ addr += cs_etm__t32_instr_size(etmq,
+ trace_chan_id, addr);
offset--;
}
return addr;
@@ -643,9 +953,10 @@ static inline u64 cs_etm__instr_addr(struct cs_etm_queue *etmq,
return packet->start_addr + offset * 4;
}
-static void cs_etm__update_last_branch_rb(struct cs_etm_queue *etmq)
+static void cs_etm__update_last_branch_rb(struct cs_etm_queue *etmq,
+ struct cs_etm_traceid_queue *tidq)
{
- struct branch_stack *bs = etmq->last_branch_rb;
+ struct branch_stack *bs = tidq->last_branch_rb;
struct branch_entry *be;
/*
@@ -654,14 +965,14 @@ static void cs_etm__update_last_branch_rb(struct cs_etm_queue *etmq)
* buffer down. After writing the first element of the stack, move the
* insert position back to the end of the buffer.
*/
- if (!etmq->last_branch_pos)
- etmq->last_branch_pos = etmq->etm->synth_opts.last_branch_sz;
+ if (!tidq->last_branch_pos)
+ tidq->last_branch_pos = etmq->etm->synth_opts.last_branch_sz;
- etmq->last_branch_pos -= 1;
+ tidq->last_branch_pos -= 1;
- be = &bs->entries[etmq->last_branch_pos];
- be->from = cs_etm__last_executed_instr(etmq->prev_packet);
- be->to = cs_etm__first_executed_instr(etmq->packet);
+ be = &bs->entries[tidq->last_branch_pos];
+ be->from = cs_etm__last_executed_instr(tidq->prev_packet);
+ be->to = cs_etm__first_executed_instr(tidq->packet);
/* No support for mispredict */
be->flags.mispred = 0;
be->flags.predicted = 1;
@@ -725,31 +1036,53 @@ cs_etm__get_trace(struct cs_etm_queue *etmq)
}
static void cs_etm__set_pid_tid_cpu(struct cs_etm_auxtrace *etm,
- struct auxtrace_queue *queue)
+ struct cs_etm_traceid_queue *tidq)
{
- struct cs_etm_queue *etmq = queue->priv;
+ if ((!tidq->thread) && (tidq->tid != -1))
+ tidq->thread = machine__find_thread(etm->machine, -1,
+ tidq->tid);
- /* CPU-wide tracing isn't supported yet */
- if (queue->tid == -1)
- return;
+ if (tidq->thread)
+ tidq->pid = tidq->thread->pid_;
+}
- if ((!etmq->thread) && (etmq->tid != -1))
- etmq->thread = machine__find_thread(etm->machine, -1,
- etmq->tid);
+int cs_etm__etmq_set_tid(struct cs_etm_queue *etmq,
+ pid_t tid, u8 trace_chan_id)
+{
+ int cpu, err = -EINVAL;
+ struct cs_etm_auxtrace *etm = etmq->etm;
+ struct cs_etm_traceid_queue *tidq;
- if (etmq->thread) {
- etmq->pid = etmq->thread->pid_;
- if (queue->cpu == -1)
- etmq->cpu = etmq->thread->cpu;
- }
+ tidq = cs_etm__etmq_get_traceid_queue(etmq, trace_chan_id);
+ if (!tidq)
+ return err;
+
+ if (cs_etm__get_cpu(trace_chan_id, &cpu) < 0)
+ return err;
+
+ err = machine__set_current_tid(etm->machine, cpu, tid, tid);
+ if (err)
+ return err;
+
+ tidq->tid = tid;
+ thread__zput(tidq->thread);
+
+ cs_etm__set_pid_tid_cpu(etm, tidq);
+ return 0;
+}
+
+bool cs_etm__etmq_is_timeless(struct cs_etm_queue *etmq)
+{
+ return !!etmq->etm->timeless_decoding;
}
static int cs_etm__synth_instruction_sample(struct cs_etm_queue *etmq,
+ struct cs_etm_traceid_queue *tidq,
u64 addr, u64 period)
{
int ret = 0;
struct cs_etm_auxtrace *etm = etmq->etm;
- union perf_event *event = etmq->event_buf;
+ union perf_event *event = tidq->event_buf;
struct perf_sample sample = {.ip = 0,};
event->sample.header.type = PERF_RECORD_SAMPLE;
@@ -757,19 +1090,19 @@ static int cs_etm__synth_instruction_sample(struct cs_etm_queue *etmq,
event->sample.header.size = sizeof(struct perf_event_header);
sample.ip = addr;
- sample.pid = etmq->pid;
- sample.tid = etmq->tid;
+ sample.pid = tidq->pid;
+ sample.tid = tidq->tid;
sample.id = etmq->etm->instructions_id;
sample.stream_id = etmq->etm->instructions_id;
sample.period = period;
- sample.cpu = etmq->packet->cpu;
- sample.flags = etmq->prev_packet->flags;
+ sample.cpu = tidq->packet->cpu;
+ sample.flags = tidq->prev_packet->flags;
sample.insn_len = 1;
sample.cpumode = event->sample.header.misc;
if (etm->synth_opts.last_branch) {
- cs_etm__copy_last_branch_rb(etmq);
- sample.branch_stack = etmq->last_branch;
+ cs_etm__copy_last_branch_rb(etmq, tidq);
+ sample.branch_stack = tidq->last_branch;
}
if (etm->synth_opts.inject) {
@@ -787,7 +1120,7 @@ static int cs_etm__synth_instruction_sample(struct cs_etm_queue *etmq,
ret);
if (etm->synth_opts.last_branch)
- cs_etm__reset_last_branch_rb(etmq);
+ cs_etm__reset_last_branch_rb(tidq);
return ret;
}
@@ -796,33 +1129,34 @@ static int cs_etm__synth_instruction_sample(struct cs_etm_queue *etmq,
* The cs etm packet encodes an instruction range between a branch target
* and the next taken branch. Generate sample accordingly.
*/
-static int cs_etm__synth_branch_sample(struct cs_etm_queue *etmq)
+static int cs_etm__synth_branch_sample(struct cs_etm_queue *etmq,
+ struct cs_etm_traceid_queue *tidq)
{
int ret = 0;
struct cs_etm_auxtrace *etm = etmq->etm;
struct perf_sample sample = {.ip = 0,};
- union perf_event *event = etmq->event_buf;
+ union perf_event *event = tidq->event_buf;
struct dummy_branch_stack {
u64 nr;
struct branch_entry entries;
} dummy_bs;
u64 ip;
- ip = cs_etm__last_executed_instr(etmq->prev_packet);
+ ip = cs_etm__last_executed_instr(tidq->prev_packet);
event->sample.header.type = PERF_RECORD_SAMPLE;
event->sample.header.misc = cs_etm__cpu_mode(etmq, ip);
event->sample.header.size = sizeof(struct perf_event_header);
sample.ip = ip;
- sample.pid = etmq->pid;
- sample.tid = etmq->tid;
- sample.addr = cs_etm__first_executed_instr(etmq->packet);
+ sample.pid = tidq->pid;
+ sample.tid = tidq->tid;
+ sample.addr = cs_etm__first_executed_instr(tidq->packet);
sample.id = etmq->etm->branches_id;
sample.stream_id = etmq->etm->branches_id;
sample.period = 1;
- sample.cpu = etmq->packet->cpu;
- sample.flags = etmq->prev_packet->flags;
+ sample.cpu = tidq->packet->cpu;
+ sample.flags = tidq->prev_packet->flags;
sample.cpumode = event->sample.header.misc;
/*
@@ -965,33 +1299,35 @@ static int cs_etm__synth_events(struct cs_etm_auxtrace *etm,
return 0;
}
-static int cs_etm__sample(struct cs_etm_queue *etmq)
+static int cs_etm__sample(struct cs_etm_queue *etmq,
+ struct cs_etm_traceid_queue *tidq)
{
struct cs_etm_auxtrace *etm = etmq->etm;
struct cs_etm_packet *tmp;
int ret;
- u64 instrs_executed = etmq->packet->instr_count;
+ u8 trace_chan_id = tidq->trace_chan_id;
+ u64 instrs_executed = tidq->packet->instr_count;
- etmq->period_instructions += instrs_executed;
+ tidq->period_instructions += instrs_executed;
/*
* Record a branch when the last instruction in
* PREV_PACKET is a branch.
*/
if (etm->synth_opts.last_branch &&
- etmq->prev_packet->sample_type == CS_ETM_RANGE &&
- etmq->prev_packet->last_instr_taken_branch)
- cs_etm__update_last_branch_rb(etmq);
+ tidq->prev_packet->sample_type == CS_ETM_RANGE &&
+ tidq->prev_packet->last_instr_taken_branch)
+ cs_etm__update_last_branch_rb(etmq, tidq);
if (etm->sample_instructions &&
- etmq->period_instructions >= etm->instructions_sample_period) {
+ tidq->period_instructions >= etm->instructions_sample_period) {
/*
* Emit instruction sample periodically
* TODO: allow period to be defined in cycles and clock time
*/
/* Get number of instructions executed after the sample point */
- u64 instrs_over = etmq->period_instructions -
+ u64 instrs_over = tidq->period_instructions -
etm->instructions_sample_period;
/*
@@ -1000,31 +1336,32 @@ static int cs_etm__sample(struct cs_etm_queue *etmq)
* executed, but PC has not advanced to next instruction)
*/
u64 offset = (instrs_executed - instrs_over - 1);
- u64 addr = cs_etm__instr_addr(etmq, etmq->packet, offset);
+ u64 addr = cs_etm__instr_addr(etmq, trace_chan_id,
+ tidq->packet, offset);
ret = cs_etm__synth_instruction_sample(
- etmq, addr, etm->instructions_sample_period);
+ etmq, tidq, addr, etm->instructions_sample_period);
if (ret)
return ret;
/* Carry remaining instructions into next sample period */
- etmq->period_instructions = instrs_over;
+ tidq->period_instructions = instrs_over;
}
if (etm->sample_branches) {
bool generate_sample = false;
/* Generate sample for tracing on packet */
- if (etmq->prev_packet->sample_type == CS_ETM_DISCONTINUITY)
+ if (tidq->prev_packet->sample_type == CS_ETM_DISCONTINUITY)
generate_sample = true;
/* Generate sample for branch taken packet */
- if (etmq->prev_packet->sample_type == CS_ETM_RANGE &&
- etmq->prev_packet->last_instr_taken_branch)
+ if (tidq->prev_packet->sample_type == CS_ETM_RANGE &&
+ tidq->prev_packet->last_instr_taken_branch)
generate_sample = true;
if (generate_sample) {
- ret = cs_etm__synth_branch_sample(etmq);
+ ret = cs_etm__synth_branch_sample(etmq, tidq);
if (ret)
return ret;
}
@@ -1035,15 +1372,15 @@ static int cs_etm__sample(struct cs_etm_queue *etmq)
* Swap PACKET with PREV_PACKET: PACKET becomes PREV_PACKET for
* the next incoming packet.
*/
- tmp = etmq->packet;
- etmq->packet = etmq->prev_packet;
- etmq->prev_packet = tmp;
+ tmp = tidq->packet;
+ tidq->packet = tidq->prev_packet;
+ tidq->prev_packet = tmp;
}
return 0;
}
-static int cs_etm__exception(struct cs_etm_queue *etmq)
+static int cs_etm__exception(struct cs_etm_traceid_queue *tidq)
{
/*
* When the exception packet is inserted, whether the last instruction
@@ -1056,24 +1393,25 @@ static int cs_etm__exception(struct cs_etm_queue *etmq)
* swap PACKET with PREV_PACKET. This keeps PREV_PACKET to be useful
* for generating instruction and branch samples.
*/
- if (etmq->prev_packet->sample_type == CS_ETM_RANGE)
- etmq->prev_packet->last_instr_taken_branch = true;
+ if (tidq->prev_packet->sample_type == CS_ETM_RANGE)
+ tidq->prev_packet->last_instr_taken_branch = true;
return 0;
}
-static int cs_etm__flush(struct cs_etm_queue *etmq)
+static int cs_etm__flush(struct cs_etm_queue *etmq,
+ struct cs_etm_traceid_queue *tidq)
{
int err = 0;
struct cs_etm_auxtrace *etm = etmq->etm;
struct cs_etm_packet *tmp;
/* Handle start tracing packet */
- if (etmq->prev_packet->sample_type == CS_ETM_EMPTY)
+ if (tidq->prev_packet->sample_type == CS_ETM_EMPTY)
goto swap_packet;
if (etmq->etm->synth_opts.last_branch &&
- etmq->prev_packet->sample_type == CS_ETM_RANGE) {
+ tidq->prev_packet->sample_type == CS_ETM_RANGE) {
/*
* Generate a last branch event for the branches left in the
* circular buffer at the end of the trace.
@@ -1081,21 +1419,21 @@ static int cs_etm__flush(struct cs_etm_queue *etmq)
* Use the address of the end of the last reported execution
* range
*/
- u64 addr = cs_etm__last_executed_instr(etmq->prev_packet);
+ u64 addr = cs_etm__last_executed_instr(tidq->prev_packet);
err = cs_etm__synth_instruction_sample(
- etmq, addr,
- etmq->period_instructions);
+ etmq, tidq, addr,
+ tidq->period_instructions);
if (err)
return err;
- etmq->period_instructions = 0;
+ tidq->period_instructions = 0;
}
if (etm->sample_branches &&
- etmq->prev_packet->sample_type == CS_ETM_RANGE) {
- err = cs_etm__synth_branch_sample(etmq);
+ tidq->prev_packet->sample_type == CS_ETM_RANGE) {
+ err = cs_etm__synth_branch_sample(etmq, tidq);
if (err)
return err;
}
@@ -1106,15 +1444,16 @@ swap_packet:
* Swap PACKET with PREV_PACKET: PACKET becomes PREV_PACKET for
* the next incoming packet.
*/
- tmp = etmq->packet;
- etmq->packet = etmq->prev_packet;
- etmq->prev_packet = tmp;
+ tmp = tidq->packet;
+ tidq->packet = tidq->prev_packet;
+ tidq->prev_packet = tmp;
}
return err;
}
-static int cs_etm__end_block(struct cs_etm_queue *etmq)
+static int cs_etm__end_block(struct cs_etm_queue *etmq,
+ struct cs_etm_traceid_queue *tidq)
{
int err;
@@ -1128,20 +1467,20 @@ static int cs_etm__end_block(struct cs_etm_queue *etmq)
* the trace.
*/
if (etmq->etm->synth_opts.last_branch &&
- etmq->prev_packet->sample_type == CS_ETM_RANGE) {
+ tidq->prev_packet->sample_type == CS_ETM_RANGE) {
/*
* Use the address of the end of the last reported execution
* range.
*/
- u64 addr = cs_etm__last_executed_instr(etmq->prev_packet);
+ u64 addr = cs_etm__last_executed_instr(tidq->prev_packet);
err = cs_etm__synth_instruction_sample(
- etmq, addr,
- etmq->period_instructions);
+ etmq, tidq, addr,
+ tidq->period_instructions);
if (err)
return err;
- etmq->period_instructions = 0;
+ tidq->period_instructions = 0;
}
return 0;
@@ -1173,12 +1512,13 @@ static int cs_etm__get_data_block(struct cs_etm_queue *etmq)
return etmq->buf_len;
}
-static bool cs_etm__is_svc_instr(struct cs_etm_queue *etmq,
+static bool cs_etm__is_svc_instr(struct cs_etm_queue *etmq, u8 trace_chan_id,
struct cs_etm_packet *packet,
u64 end_addr)
{
- u16 instr16;
- u32 instr32;
+ /* Initialise to keep compiler happy */
+ u16 instr16 = 0;
+ u32 instr32 = 0;
u64 addr;
switch (packet->isa) {
@@ -1196,7 +1536,8 @@ static bool cs_etm__is_svc_instr(struct cs_etm_queue *etmq,
* so below only read 2 bytes as instruction size for T32.
*/
addr = end_addr - 2;
- cs_etm__mem_access(etmq, addr, sizeof(instr16), (u8 *)&instr16);
+ cs_etm__mem_access(etmq, trace_chan_id, addr,
+ sizeof(instr16), (u8 *)&instr16);
if ((instr16 & 0xFF00) == 0xDF00)
return true;
@@ -1211,7 +1552,8 @@ static bool cs_etm__is_svc_instr(struct cs_etm_queue *etmq,
* +---------+---------+-------------------------+
*/
addr = end_addr - 4;
- cs_etm__mem_access(etmq, addr, sizeof(instr32), (u8 *)&instr32);
+ cs_etm__mem_access(etmq, trace_chan_id, addr,
+ sizeof(instr32), (u8 *)&instr32);
if ((instr32 & 0x0F000000) == 0x0F000000 &&
(instr32 & 0xF0000000) != 0xF0000000)
return true;
@@ -1227,7 +1569,8 @@ static bool cs_etm__is_svc_instr(struct cs_etm_queue *etmq,
* +-----------------------+---------+-----------+
*/
addr = end_addr - 4;
- cs_etm__mem_access(etmq, addr, sizeof(instr32), (u8 *)&instr32);
+ cs_etm__mem_access(etmq, trace_chan_id, addr,
+ sizeof(instr32), (u8 *)&instr32);
if ((instr32 & 0xFFE0001F) == 0xd4000001)
return true;
@@ -1240,10 +1583,12 @@ static bool cs_etm__is_svc_instr(struct cs_etm_queue *etmq,
return false;
}
-static bool cs_etm__is_syscall(struct cs_etm_queue *etmq, u64 magic)
+static bool cs_etm__is_syscall(struct cs_etm_queue *etmq,
+ struct cs_etm_traceid_queue *tidq, u64 magic)
{
- struct cs_etm_packet *packet = etmq->packet;
- struct cs_etm_packet *prev_packet = etmq->prev_packet;
+ u8 trace_chan_id = tidq->trace_chan_id;
+ struct cs_etm_packet *packet = tidq->packet;
+ struct cs_etm_packet *prev_packet = tidq->prev_packet;
if (magic == __perf_cs_etmv3_magic)
if (packet->exception_number == CS_ETMV3_EXC_SVC)
@@ -1256,7 +1601,7 @@ static bool cs_etm__is_syscall(struct cs_etm_queue *etmq, u64 magic)
*/
if (magic == __perf_cs_etmv4_magic) {
if (packet->exception_number == CS_ETMV4_EXC_CALL &&
- cs_etm__is_svc_instr(etmq, prev_packet,
+ cs_etm__is_svc_instr(etmq, trace_chan_id, prev_packet,
prev_packet->end_addr))
return true;
}
@@ -1264,9 +1609,10 @@ static bool cs_etm__is_syscall(struct cs_etm_queue *etmq, u64 magic)
return false;
}
-static bool cs_etm__is_async_exception(struct cs_etm_queue *etmq, u64 magic)
+static bool cs_etm__is_async_exception(struct cs_etm_traceid_queue *tidq,
+ u64 magic)
{
- struct cs_etm_packet *packet = etmq->packet;
+ struct cs_etm_packet *packet = tidq->packet;
if (magic == __perf_cs_etmv3_magic)
if (packet->exception_number == CS_ETMV3_EXC_DEBUG_HALT ||
@@ -1289,10 +1635,13 @@ static bool cs_etm__is_async_exception(struct cs_etm_queue *etmq, u64 magic)
return false;
}
-static bool cs_etm__is_sync_exception(struct cs_etm_queue *etmq, u64 magic)
+static bool cs_etm__is_sync_exception(struct cs_etm_queue *etmq,
+ struct cs_etm_traceid_queue *tidq,
+ u64 magic)
{
- struct cs_etm_packet *packet = etmq->packet;
- struct cs_etm_packet *prev_packet = etmq->prev_packet;
+ u8 trace_chan_id = tidq->trace_chan_id;
+ struct cs_etm_packet *packet = tidq->packet;
+ struct cs_etm_packet *prev_packet = tidq->prev_packet;
if (magic == __perf_cs_etmv3_magic)
if (packet->exception_number == CS_ETMV3_EXC_SMC ||
@@ -1316,7 +1665,7 @@ static bool cs_etm__is_sync_exception(struct cs_etm_queue *etmq, u64 magic)
* (SMC, HVC) are taken as sync exceptions.
*/
if (packet->exception_number == CS_ETMV4_EXC_CALL &&
- !cs_etm__is_svc_instr(etmq, prev_packet,
+ !cs_etm__is_svc_instr(etmq, trace_chan_id, prev_packet,
prev_packet->end_addr))
return true;
@@ -1335,10 +1684,12 @@ static bool cs_etm__is_sync_exception(struct cs_etm_queue *etmq, u64 magic)
return false;
}
-static int cs_etm__set_sample_flags(struct cs_etm_queue *etmq)
+static int cs_etm__set_sample_flags(struct cs_etm_queue *etmq,
+ struct cs_etm_traceid_queue *tidq)
{
- struct cs_etm_packet *packet = etmq->packet;
- struct cs_etm_packet *prev_packet = etmq->prev_packet;
+ struct cs_etm_packet *packet = tidq->packet;
+ struct cs_etm_packet *prev_packet = tidq->prev_packet;
+ u8 trace_chan_id = tidq->trace_chan_id;
u64 magic;
int ret;
@@ -1419,7 +1770,8 @@ static int cs_etm__set_sample_flags(struct cs_etm_queue *etmq)
if (prev_packet->flags == (PERF_IP_FLAG_BRANCH |
PERF_IP_FLAG_RETURN |
PERF_IP_FLAG_INTERRUPT) &&
- cs_etm__is_svc_instr(etmq, packet, packet->start_addr))
+ cs_etm__is_svc_instr(etmq, trace_chan_id,
+ packet, packet->start_addr))
prev_packet->flags = PERF_IP_FLAG_BRANCH |
PERF_IP_FLAG_RETURN |
PERF_IP_FLAG_SYSCALLRET;
@@ -1440,7 +1792,7 @@ static int cs_etm__set_sample_flags(struct cs_etm_queue *etmq)
return ret;
/* The exception is for system call. */
- if (cs_etm__is_syscall(etmq, magic))
+ if (cs_etm__is_syscall(etmq, tidq, magic))
packet->flags = PERF_IP_FLAG_BRANCH |
PERF_IP_FLAG_CALL |
PERF_IP_FLAG_SYSCALLRET;
@@ -1448,7 +1800,7 @@ static int cs_etm__set_sample_flags(struct cs_etm_queue *etmq)
* The exceptions are triggered by external signals from bus,
* interrupt controller, debug module, PE reset or halt.
*/
- else if (cs_etm__is_async_exception(etmq, magic))
+ else if (cs_etm__is_async_exception(tidq, magic))
packet->flags = PERF_IP_FLAG_BRANCH |
PERF_IP_FLAG_CALL |
PERF_IP_FLAG_ASYNC |
@@ -1457,7 +1809,7 @@ static int cs_etm__set_sample_flags(struct cs_etm_queue *etmq)
* Otherwise, exception is caused by trap, instruction &
* data fault, or alignment errors.
*/
- else if (cs_etm__is_sync_exception(etmq, magic))
+ else if (cs_etm__is_sync_exception(etmq, tidq, magic))
packet->flags = PERF_IP_FLAG_BRANCH |
PERF_IP_FLAG_CALL |
PERF_IP_FLAG_INTERRUPT;
@@ -1539,75 +1891,106 @@ out:
return ret;
}
-static int cs_etm__process_decoder_queue(struct cs_etm_queue *etmq)
+static int cs_etm__process_traceid_queue(struct cs_etm_queue *etmq,
+ struct cs_etm_traceid_queue *tidq)
{
int ret;
+ struct cs_etm_packet_queue *packet_queue;
- /* Process each packet in this chunk */
- while (1) {
- ret = cs_etm_decoder__get_packet(etmq->decoder,
- etmq->packet);
- if (ret <= 0)
- /*
- * Stop processing this chunk on
- * end of data or error
- */
- break;
+ packet_queue = &tidq->packet_queue;
+ /* Process each packet in this chunk */
+ while (1) {
+ ret = cs_etm_decoder__get_packet(packet_queue,
+ tidq->packet);
+ if (ret <= 0)
/*
- * Since packet addresses are swapped in packet
- * handling within below switch() statements,
- * thus setting sample flags must be called
- * prior to switch() statement to use address
- * information before packets swapping.
+ * Stop processing this chunk on
+ * end of data or error
*/
- ret = cs_etm__set_sample_flags(etmq);
- if (ret < 0)
- break;
-
- switch (etmq->packet->sample_type) {
- case CS_ETM_RANGE:
- /*
- * If the packet contains an instruction
- * range, generate instruction sequence
- * events.
- */
- cs_etm__sample(etmq);
- break;
- case CS_ETM_EXCEPTION:
- case CS_ETM_EXCEPTION_RET:
- /*
- * If the exception packet is coming,
- * make sure the previous instruction
- * range packet to be handled properly.
- */
- cs_etm__exception(etmq);
- break;
- case CS_ETM_DISCONTINUITY:
- /*
- * Discontinuity in trace, flush
- * previous branch stack
- */
- cs_etm__flush(etmq);
- break;
- case CS_ETM_EMPTY:
- /*
- * Should not receive empty packet,
- * report error.
- */
- pr_err("CS ETM Trace: empty packet\n");
- return -EINVAL;
- default:
- break;
- }
+ break;
+
+ /*
+ * Since packet addresses are swapped in packet
+ * handling within below switch() statements,
+ * thus setting sample flags must be called
+ * prior to switch() statement to use address
+ * information before packets swapping.
+ */
+ ret = cs_etm__set_sample_flags(etmq, tidq);
+ if (ret < 0)
+ break;
+
+ switch (tidq->packet->sample_type) {
+ case CS_ETM_RANGE:
+ /*
+ * If the packet contains an instruction
+ * range, generate instruction sequence
+ * events.
+ */
+ cs_etm__sample(etmq, tidq);
+ break;
+ case CS_ETM_EXCEPTION:
+ case CS_ETM_EXCEPTION_RET:
+ /*
+ * If the exception packet is coming,
+ * make sure the previous instruction
+ * range packet to be handled properly.
+ */
+ cs_etm__exception(tidq);
+ break;
+ case CS_ETM_DISCONTINUITY:
+ /*
+ * Discontinuity in trace, flush
+ * previous branch stack
+ */
+ cs_etm__flush(etmq, tidq);
+ break;
+ case CS_ETM_EMPTY:
+ /*
+ * Should not receive empty packet,
+ * report error.
+ */
+ pr_err("CS ETM Trace: empty packet\n");
+ return -EINVAL;
+ default:
+ break;
}
+ }
return ret;
}
+static void cs_etm__clear_all_traceid_queues(struct cs_etm_queue *etmq)
+{
+ int idx;
+ struct int_node *inode;
+ struct cs_etm_traceid_queue *tidq;
+ struct intlist *traceid_queues_list = etmq->traceid_queues_list;
+
+ intlist__for_each_entry(inode, traceid_queues_list) {
+ idx = (int)(intptr_t)inode->priv;
+ tidq = etmq->traceid_queues[idx];
+
+ /* Ignore return value */
+ cs_etm__process_traceid_queue(etmq, tidq);
+
+ /*
+ * Generate an instruction sample with the remaining
+ * branchstack entries.
+ */
+ cs_etm__flush(etmq, tidq);
+ }
+}
+
static int cs_etm__run_decoder(struct cs_etm_queue *etmq)
{
int err = 0;
+ struct cs_etm_traceid_queue *tidq;
+
+ tidq = cs_etm__etmq_get_traceid_queue(etmq, CS_ETM_PER_THREAD_TRACEID);
+ if (!tidq)
+ return -EINVAL;
/* Go through each buffer in the queue and decode them one by one */
while (1) {
@@ -1626,13 +2009,13 @@ static int cs_etm__run_decoder(struct cs_etm_queue *etmq)
* an error occurs other than hoping the next one will
* be better.
*/
- err = cs_etm__process_decoder_queue(etmq);
+ err = cs_etm__process_traceid_queue(etmq, tidq);
} while (etmq->buf_len);
if (err == 0)
/* Flush any remaining branch stack entries */
- err = cs_etm__end_block(etmq);
+ err = cs_etm__end_block(etmq, tidq);
}
return err;
@@ -1647,9 +2030,19 @@ static int cs_etm__process_timeless_queues(struct cs_etm_auxtrace *etm,
for (i = 0; i < queues->nr_queues; i++) {
struct auxtrace_queue *queue = &etm->queues.queue_array[i];
struct cs_etm_queue *etmq = queue->priv;
+ struct cs_etm_traceid_queue *tidq;
+
+ if (!etmq)
+ continue;
+
+ tidq = cs_etm__etmq_get_traceid_queue(etmq,
+ CS_ETM_PER_THREAD_TRACEID);
+
+ if (!tidq)
+ continue;
- if (etmq && ((tid == -1) || (etmq->tid == tid))) {
- cs_etm__set_pid_tid_cpu(etm, queue);
+ if ((tid == -1) || (tidq->tid == tid)) {
+ cs_etm__set_pid_tid_cpu(etm, tidq);
cs_etm__run_decoder(etmq);
}
}
@@ -1657,6 +2050,164 @@ static int cs_etm__process_timeless_queues(struct cs_etm_auxtrace *etm,
return 0;
}
+static int cs_etm__process_queues(struct cs_etm_auxtrace *etm)
+{
+ int ret = 0;
+ unsigned int cs_queue_nr, queue_nr;
+ u8 trace_chan_id;
+ u64 timestamp;
+ struct auxtrace_queue *queue;
+ struct cs_etm_queue *etmq;
+ struct cs_etm_traceid_queue *tidq;
+
+ while (1) {
+ if (!etm->heap.heap_cnt)
+ goto out;
+
+ /* Take the entry at the top of the min heap */
+ cs_queue_nr = etm->heap.heap_array[0].queue_nr;
+ queue_nr = TO_QUEUE_NR(cs_queue_nr);
+ trace_chan_id = TO_TRACE_CHAN_ID(cs_queue_nr);
+ queue = &etm->queues.queue_array[queue_nr];
+ etmq = queue->priv;
+
+ /*
+ * Remove the top entry from the heap since we are about
+ * to process it.
+ */
+ auxtrace_heap__pop(&etm->heap);
+
+ tidq = cs_etm__etmq_get_traceid_queue(etmq, trace_chan_id);
+ if (!tidq) {
+ /*
+ * No traceID queue has been allocated for this traceID,
+ * which means something somewhere went very wrong. No
+ * other choice than simply exit.
+ */
+ ret = -EINVAL;
+ goto out;
+ }
+
+ /*
+ * Packets associated with this timestamp are already in
+ * the etmq's traceID queue, so process them.
+ */
+ ret = cs_etm__process_traceid_queue(etmq, tidq);
+ if (ret < 0)
+ goto out;
+
+ /*
+ * Packets for this timestamp have been processed, time to
+ * move on to the next timestamp, fetching a new auxtrace_buffer
+ * if need be.
+ */
+refetch:
+ ret = cs_etm__get_data_block(etmq);
+ if (ret < 0)
+ goto out;
+
+ /*
+ * No more auxtrace_buffers to process in this etmq, simply
+ * move on to another entry in the auxtrace_heap.
+ */
+ if (!ret)
+ continue;
+
+ ret = cs_etm__decode_data_block(etmq);
+ if (ret)
+ goto out;
+
+ timestamp = cs_etm__etmq_get_timestamp(etmq, &trace_chan_id);
+
+ if (!timestamp) {
+ /*
+ * Function cs_etm__decode_data_block() returns when
+ * there is no more traces to decode in the current
+ * auxtrace_buffer OR when a timestamp has been
+ * encountered on any of the traceID queues. Since we
+ * did not get a timestamp, there is no more traces to
+ * process in this auxtrace_buffer. As such empty and
+ * flush all traceID queues.
+ */
+ cs_etm__clear_all_traceid_queues(etmq);
+
+ /* Fetch another auxtrace_buffer for this etmq */
+ goto refetch;
+ }
+
+ /*
+ * Add to the min heap the timestamp for packets that have
+ * just been decoded. They will be processed and synthesized
+ * during the next call to cs_etm__process_traceid_queue() for
+ * this queue/traceID.
+ */
+ cs_queue_nr = TO_CS_QUEUE_NR(queue_nr, trace_chan_id);
+ ret = auxtrace_heap__add(&etm->heap, cs_queue_nr, timestamp);
+ }
+
+out:
+ return ret;
+}
+
+static int cs_etm__process_itrace_start(struct cs_etm_auxtrace *etm,
+ union perf_event *event)
+{
+ struct thread *th;
+
+ if (etm->timeless_decoding)
+ return 0;
+
+ /*
+ * Add the tid/pid to the log so that we can get a match when
+ * we get a contextID from the decoder.
+ */
+ th = machine__findnew_thread(etm->machine,
+ event->itrace_start.pid,
+ event->itrace_start.tid);
+ if (!th)
+ return -ENOMEM;
+
+ thread__put(th);
+
+ return 0;
+}
+
+static int cs_etm__process_switch_cpu_wide(struct cs_etm_auxtrace *etm,
+ union perf_event *event)
+{
+ struct thread *th;
+ bool out = event->header.misc & PERF_RECORD_MISC_SWITCH_OUT;
+
+ /*
+ * Context switch in per-thread mode are irrelevant since perf
+ * will start/stop tracing as the process is scheduled.
+ */
+ if (etm->timeless_decoding)
+ return 0;
+
+ /*
+ * SWITCH_IN events carry the next process to be switched out while
+ * SWITCH_OUT events carry the process to be switched in. As such
+ * we don't care about IN events.
+ */
+ if (!out)
+ return 0;
+
+ /*
+ * Add the tid/pid to the log so that we can get a match when
+ * we get a contextID from the decoder.
+ */
+ th = machine__findnew_thread(etm->machine,
+ event->context_switch.next_prev_pid,
+ event->context_switch.next_prev_tid);
+ if (!th)
+ return -ENOMEM;
+
+ thread__put(th);
+
+ return 0;
+}
+
static int cs_etm__process_event(struct perf_session *session,
union perf_event *event,
struct perf_sample *sample,
@@ -1676,9 +2227,6 @@ static int cs_etm__process_event(struct perf_session *session,
return -EINVAL;
}
- if (!etm->timeless_decoding)
- return -EINVAL;
-
if (sample->time && (sample->time != (u64) -1))
timestamp = sample->time;
else
@@ -1690,10 +2238,20 @@ static int cs_etm__process_event(struct perf_session *session,
return err;
}
- if (event->header.type == PERF_RECORD_EXIT)
+ if (etm->timeless_decoding &&
+ event->header.type == PERF_RECORD_EXIT)
return cs_etm__process_timeless_queues(etm,
event->fork.tid);
+ if (event->header.type == PERF_RECORD_ITRACE_START)
+ return cs_etm__process_itrace_start(etm, event);
+ else if (event->header.type == PERF_RECORD_SWITCH_CPU_WIDE)
+ return cs_etm__process_switch_cpu_wide(etm, event);
+
+ if (!etm->timeless_decoding &&
+ event->header.type == PERF_RECORD_AUX)
+ return cs_etm__process_queues(etm);
+
return 0;
}
diff --git a/tools/perf/util/cs-etm.h b/tools/perf/util/cs-etm.h
index 0e97c196147a..bc848fd095f4 100644
--- a/tools/perf/util/cs-etm.h
+++ b/tools/perf/util/cs-etm.h
@@ -9,6 +9,7 @@
#include "util/event.h"
#include "util/session.h"
+#include <linux/bits.h>
/* Versionning header in case things need tro change in the future. That way
* decoding of old snapshot is still possible.
@@ -97,12 +98,72 @@ enum {
CS_ETMV4_EXC_END = 31,
};
+enum cs_etm_sample_type {
+ CS_ETM_EMPTY,
+ CS_ETM_RANGE,
+ CS_ETM_DISCONTINUITY,
+ CS_ETM_EXCEPTION,
+ CS_ETM_EXCEPTION_RET,
+};
+
+enum cs_etm_isa {
+ CS_ETM_ISA_UNKNOWN,
+ CS_ETM_ISA_A64,
+ CS_ETM_ISA_A32,
+ CS_ETM_ISA_T32,
+};
+
/* RB tree for quick conversion between traceID and metadata pointers */
struct intlist *traceid_list;
+struct cs_etm_queue;
+
+struct cs_etm_packet {
+ enum cs_etm_sample_type sample_type;
+ enum cs_etm_isa isa;
+ u64 start_addr;
+ u64 end_addr;
+ u32 instr_count;
+ u32 last_instr_type;
+ u32 last_instr_subtype;
+ u32 flags;
+ u32 exception_number;
+ u8 last_instr_cond;
+ u8 last_instr_taken_branch;
+ u8 last_instr_size;
+ u8 trace_chan_id;
+ int cpu;
+};
+
+#define CS_ETM_PACKET_MAX_BUFFER 1024
+
+/*
+ * When working with per-thread scenarios the process under trace can
+ * be scheduled on any CPU and as such, more than one traceID may be
+ * associated with the same process. Since a traceID of '0' is illegal
+ * as per the CoreSight architecture, use that specific value to
+ * identify the queue where all packets (with any traceID) are
+ * aggregated.
+ */
+#define CS_ETM_PER_THREAD_TRACEID 0
+
+struct cs_etm_packet_queue {
+ u32 packet_count;
+ u32 head;
+ u32 tail;
+ u32 instr_count;
+ u64 timestamp;
+ u64 next_timestamp;
+ struct cs_etm_packet packet_buffer[CS_ETM_PACKET_MAX_BUFFER];
+};
+
#define KiB(x) ((x) * 1024)
#define MiB(x) ((x) * 1024 * 1024)
+#define CS_ETM_INVAL_ADDR 0xdeadbeefdeadbeefUL
+
+#define BMVAL(val, lsb, msb) ((val & GENMASK(msb, lsb)) >> lsb)
+
#define CS_ETM_HEADER_SIZE (CS_HEADER_VERSION_0_MAX * sizeof(u64))
#define __perf_cs_etmv3_magic 0x3030303030303030ULL
@@ -114,6 +175,13 @@ struct intlist *traceid_list;
int cs_etm__process_auxtrace_info(union perf_event *event,
struct perf_session *session);
int cs_etm__get_cpu(u8 trace_chan_id, int *cpu);
+int cs_etm__etmq_set_tid(struct cs_etm_queue *etmq,
+ pid_t tid, u8 trace_chan_id);
+bool cs_etm__etmq_is_timeless(struct cs_etm_queue *etmq);
+void cs_etm__etmq_set_traceid_queue_timestamp(struct cs_etm_queue *etmq,
+ u8 trace_chan_id);
+struct cs_etm_packet_queue
+*cs_etm__etmq_get_packet_queue(struct cs_etm_queue *etmq, u8 trace_chan_id);
#else
static inline int
cs_etm__process_auxtrace_info(union perf_event *event __maybe_unused,
@@ -127,6 +195,32 @@ static inline int cs_etm__get_cpu(u8 trace_chan_id __maybe_unused,
{
return -1;
}
+
+static inline int cs_etm__etmq_set_tid(
+ struct cs_etm_queue *etmq __maybe_unused,
+ pid_t tid __maybe_unused,
+ u8 trace_chan_id __maybe_unused)
+{
+ return -1;
+}
+
+static inline bool cs_etm__etmq_is_timeless(
+ struct cs_etm_queue *etmq __maybe_unused)
+{
+ /* What else to return? */
+ return true;
+}
+
+static inline void cs_etm__etmq_set_traceid_queue_timestamp(
+ struct cs_etm_queue *etmq __maybe_unused,
+ u8 trace_chan_id __maybe_unused) {}
+
+static inline struct cs_etm_packet_queue *cs_etm__etmq_get_packet_queue(
+ struct cs_etm_queue *etmq __maybe_unused,
+ u8 trace_chan_id __maybe_unused)
+{
+ return NULL;
+}
#endif
#endif
diff --git a/tools/perf/util/dso.c b/tools/perf/util/dso.c
index e059976d9d93..1fb18292c2d3 100644
--- a/tools/perf/util/dso.c
+++ b/tools/perf/util/dso.c
@@ -9,6 +9,8 @@
#include <errno.h>
#include <fcntl.h>
#include <libgen.h>
+#include <bpf/libbpf.h>
+#include "bpf-event.h"
#include "compress.h"
#include "namespaces.h"
#include "path.h"
@@ -706,6 +708,44 @@ bool dso__data_status_seen(struct dso *dso, enum dso_data_status_seen by)
return false;
}
+static ssize_t bpf_read(struct dso *dso, u64 offset, char *data)
+{
+ struct bpf_prog_info_node *node;
+ ssize_t size = DSO__DATA_CACHE_SIZE;
+ u64 len;
+ u8 *buf;
+
+ node = perf_env__find_bpf_prog_info(dso->bpf_prog.env, dso->bpf_prog.id);
+ if (!node || !node->info_linear) {
+ dso->data.status = DSO_DATA_STATUS_ERROR;
+ return -1;
+ }
+
+ len = node->info_linear->info.jited_prog_len;
+ buf = (u8 *)(uintptr_t)node->info_linear->info.jited_prog_insns;
+
+ if (offset >= len)
+ return -1;
+
+ size = (ssize_t)min(len - offset, (u64)size);
+ memcpy(data, buf + offset, size);
+ return size;
+}
+
+static int bpf_size(struct dso *dso)
+{
+ struct bpf_prog_info_node *node;
+
+ node = perf_env__find_bpf_prog_info(dso->bpf_prog.env, dso->bpf_prog.id);
+ if (!node || !node->info_linear) {
+ dso->data.status = DSO_DATA_STATUS_ERROR;
+ return -1;
+ }
+
+ dso->data.file_size = node->info_linear->info.jited_prog_len;
+ return 0;
+}
+
static void
dso_cache__free(struct dso *dso)
{
@@ -794,48 +834,53 @@ dso_cache__memcpy(struct dso_cache *cache, u64 offset,
return cache_size;
}
-static ssize_t
-dso_cache__read(struct dso *dso, struct machine *machine,
- u64 offset, u8 *data, ssize_t size)
+static ssize_t file_read(struct dso *dso, struct machine *machine,
+ u64 offset, char *data)
{
- struct dso_cache *cache;
- struct dso_cache *old;
ssize_t ret;
- do {
- u64 cache_offset;
+ pthread_mutex_lock(&dso__data_open_lock);
- cache = zalloc(sizeof(*cache) + DSO__DATA_CACHE_SIZE);
- if (!cache)
- return -ENOMEM;
+ /*
+ * dso->data.fd might be closed if other thread opened another
+ * file (dso) due to open file limit (RLIMIT_NOFILE).
+ */
+ try_to_open_dso(dso, machine);
- pthread_mutex_lock(&dso__data_open_lock);
+ if (dso->data.fd < 0) {
+ dso->data.status = DSO_DATA_STATUS_ERROR;
+ ret = -errno;
+ goto out;
+ }
- /*
- * dso->data.fd might be closed if other thread opened another
- * file (dso) due to open file limit (RLIMIT_NOFILE).
- */
- try_to_open_dso(dso, machine);
+ ret = pread(dso->data.fd, data, DSO__DATA_CACHE_SIZE, offset);
+out:
+ pthread_mutex_unlock(&dso__data_open_lock);
+ return ret;
+}
- if (dso->data.fd < 0) {
- ret = -errno;
- dso->data.status = DSO_DATA_STATUS_ERROR;
- break;
- }
+static ssize_t
+dso_cache__read(struct dso *dso, struct machine *machine,
+ u64 offset, u8 *data, ssize_t size)
+{
+ u64 cache_offset = offset & DSO__DATA_CACHE_MASK;
+ struct dso_cache *cache;
+ struct dso_cache *old;
+ ssize_t ret;
- cache_offset = offset & DSO__DATA_CACHE_MASK;
+ cache = zalloc(sizeof(*cache) + DSO__DATA_CACHE_SIZE);
+ if (!cache)
+ return -ENOMEM;
- ret = pread(dso->data.fd, cache->data, DSO__DATA_CACHE_SIZE, cache_offset);
- if (ret <= 0)
- break;
+ if (dso->binary_type == DSO_BINARY_TYPE__BPF_PROG_INFO)
+ ret = bpf_read(dso, cache_offset, cache->data);
+ else
+ ret = file_read(dso, machine, cache_offset, cache->data);
+ if (ret > 0) {
cache->offset = cache_offset;
cache->size = ret;
- } while (0);
-
- pthread_mutex_unlock(&dso__data_open_lock);
- if (ret > 0) {
old = dso_cache__insert(dso, cache);
if (old) {
/* we lose the race */
@@ -898,18 +943,12 @@ static ssize_t cached_read(struct dso *dso, struct machine *machine,
return r;
}
-int dso__data_file_size(struct dso *dso, struct machine *machine)
+static int file_size(struct dso *dso, struct machine *machine)
{
int ret = 0;
struct stat st;
char sbuf[STRERR_BUFSIZE];
- if (dso->data.file_size)
- return 0;
-
- if (dso->data.status == DSO_DATA_STATUS_ERROR)
- return -1;
-
pthread_mutex_lock(&dso__data_open_lock);
/*
@@ -938,6 +977,20 @@ out:
return ret;
}
+int dso__data_file_size(struct dso *dso, struct machine *machine)
+{
+ if (dso->data.file_size)
+ return 0;
+
+ if (dso->data.status == DSO_DATA_STATUS_ERROR)
+ return -1;
+
+ if (dso->binary_type == DSO_BINARY_TYPE__BPF_PROG_INFO)
+ return bpf_size(dso);
+
+ return file_size(dso, machine);
+}
+
/**
* dso__data_size - Return dso data size
* @dso: dso object
diff --git a/tools/perf/util/env.c b/tools/perf/util/env.c
index 6a3eaf7d9353..1cc7a1837822 100644
--- a/tools/perf/util/env.c
+++ b/tools/perf/util/env.c
@@ -246,6 +246,7 @@ int perf_env__read_cpu_topology_map(struct perf_env *env)
for (cpu = 0; cpu < nr_cpus; ++cpu) {
env->cpu[cpu].core_id = cpu_map__get_core_id(cpu);
env->cpu[cpu].socket_id = cpu_map__get_socket_id(cpu);
+ env->cpu[cpu].die_id = cpu_map__get_die_id(cpu);
}
env->nr_cpus_avail = nr_cpus;
diff --git a/tools/perf/util/env.h b/tools/perf/util/env.h
index 271a90b326c4..d5d9865aa812 100644
--- a/tools/perf/util/env.h
+++ b/tools/perf/util/env.h
@@ -9,6 +9,7 @@
struct cpu_topology_map {
int socket_id;
+ int die_id;
int core_id;
};
@@ -49,6 +50,7 @@ struct perf_env {
int nr_cmdline;
int nr_sibling_cores;
+ int nr_sibling_dies;
int nr_sibling_threads;
int nr_numa_nodes;
int nr_memory_nodes;
@@ -57,6 +59,7 @@ struct perf_env {
char *cmdline;
const char **cmdline_argv;
char *sibling_cores;
+ char *sibling_dies;
char *sibling_threads;
char *pmu_mappings;
struct cpu_topology_map *cpu;
diff --git a/tools/perf/util/event.c b/tools/perf/util/event.c
index d1ad6c419724..c9c6857360e4 100644
--- a/tools/perf/util/event.c
+++ b/tools/perf/util/event.c
@@ -1486,7 +1486,7 @@ static size_t perf_event__fprintf_lost(union perf_event *event, FILE *fp)
size_t perf_event__fprintf_ksymbol(union perf_event *event, FILE *fp)
{
- return fprintf(fp, " ksymbol event with addr %" PRIx64 " len %u type %u flags 0x%x name %s\n",
+ return fprintf(fp, " addr %" PRIx64 " len %u type %u flags 0x%x name %s\n",
event->ksymbol_event.addr, event->ksymbol_event.len,
event->ksymbol_event.ksym_type,
event->ksymbol_event.flags, event->ksymbol_event.name);
@@ -1494,7 +1494,7 @@ size_t perf_event__fprintf_ksymbol(union perf_event *event, FILE *fp)
size_t perf_event__fprintf_bpf_event(union perf_event *event, FILE *fp)
{
- return fprintf(fp, " bpf event with type %u, flags %u, id %u\n",
+ return fprintf(fp, " type %u, flags %u, id %u\n",
event->bpf_event.type, event->bpf_event.flags,
event->bpf_event.id);
}
diff --git a/tools/perf/util/event.h b/tools/perf/util/event.h
index 9e999550f247..1f1da6082806 100644
--- a/tools/perf/util/event.h
+++ b/tools/perf/util/event.h
@@ -204,6 +204,8 @@ struct perf_sample {
u64 period;
u64 weight;
u64 transaction;
+ u64 insn_cnt;
+ u64 cyc_cnt;
u32 cpu;
u32 raw_size;
u64 data_src;
diff --git a/tools/perf/util/evsel.c b/tools/perf/util/evsel.c
index 4a5947625c5c..04c4ed1573cb 100644
--- a/tools/perf/util/evsel.c
+++ b/tools/perf/util/evsel.c
@@ -589,6 +589,9 @@ const char *perf_evsel__name(struct perf_evsel *evsel)
{
char bf[128];
+ if (!evsel)
+ goto out_unknown;
+
if (evsel->name)
return evsel->name;
@@ -628,7 +631,10 @@ const char *perf_evsel__name(struct perf_evsel *evsel)
evsel->name = strdup(bf);
- return evsel->name ?: "unknown";
+ if (evsel->name)
+ return evsel->name;
+out_unknown:
+ return "unknown";
}
const char *perf_evsel__group_name(struct perf_evsel *evsel)
@@ -679,6 +685,10 @@ static void __perf_evsel__config_callchain(struct perf_evsel *evsel,
attr->sample_max_stack = param->max_stack;
+ if (opts->kernel_callchains)
+ attr->exclude_callchain_user = 1;
+ if (opts->user_callchains)
+ attr->exclude_callchain_kernel = 1;
if (param->record_mode == CALLCHAIN_LBR) {
if (!opts->branch_stack) {
if (attr->exclude_user) {
@@ -701,7 +711,14 @@ static void __perf_evsel__config_callchain(struct perf_evsel *evsel,
if (!function) {
perf_evsel__set_sample_bit(evsel, REGS_USER);
perf_evsel__set_sample_bit(evsel, STACK_USER);
- attr->sample_regs_user |= PERF_REGS_MASK;
+ if (opts->sample_user_regs && DWARF_MINIMAL_REGS != PERF_REGS_MASK) {
+ attr->sample_regs_user |= DWARF_MINIMAL_REGS;
+ pr_warning("WARNING: The use of --call-graph=dwarf may require all the user registers, "
+ "specifying a subset with --user-regs may render DWARF unwinding unreliable, "
+ "so the minimal registers set (IP, SP) is explicitly forced.\n");
+ } else {
+ attr->sample_regs_user |= PERF_REGS_MASK;
+ }
attr->sample_stack_user = param->dump_size;
attr->exclude_callchain_user = 1;
} else {
@@ -1136,9 +1153,6 @@ void perf_evsel__config(struct perf_evsel *evsel, struct record_opts *opts,
static int perf_evsel__alloc_fd(struct perf_evsel *evsel, int ncpus, int nthreads)
{
- if (evsel->system_wide)
- nthreads = 1;
-
evsel->fd = xyarray__new(ncpus, nthreads, sizeof(int));
if (evsel->fd) {
diff --git a/tools/perf/util/header.c b/tools/perf/util/header.c
index 847ae51a524b..06ddb6618ef3 100644
--- a/tools/perf/util/header.c
+++ b/tools/perf/util/header.c
@@ -599,6 +599,27 @@ static int write_cpu_topology(struct feat_fd *ff,
if (ret < 0)
return ret;
}
+
+ if (!tp->die_sib)
+ goto done;
+
+ ret = do_write(ff, &tp->die_sib, sizeof(tp->die_sib));
+ if (ret < 0)
+ goto done;
+
+ for (i = 0; i < tp->die_sib; i++) {
+ ret = do_write_string(ff, tp->die_siblings[i]);
+ if (ret < 0)
+ goto done;
+ }
+
+ for (j = 0; j < perf_env.nr_cpus_avail; j++) {
+ ret = do_write(ff, &perf_env.cpu[j].die_id,
+ sizeof(perf_env.cpu[j].die_id));
+ if (ret < 0)
+ return ret;
+ }
+
done:
cpu_topology__delete(tp);
return ret;
@@ -1439,10 +1460,20 @@ static void print_cpu_topology(struct feat_fd *ff, FILE *fp)
str = ph->env.sibling_cores;
for (i = 0; i < nr; i++) {
- fprintf(fp, "# sibling cores : %s\n", str);
+ fprintf(fp, "# sibling sockets : %s\n", str);
str += strlen(str) + 1;
}
+ if (ph->env.nr_sibling_dies) {
+ nr = ph->env.nr_sibling_dies;
+ str = ph->env.sibling_dies;
+
+ for (i = 0; i < nr; i++) {
+ fprintf(fp, "# sibling dies : %s\n", str);
+ str += strlen(str) + 1;
+ }
+ }
+
nr = ph->env.nr_sibling_threads;
str = ph->env.sibling_threads;
@@ -1451,12 +1482,28 @@ static void print_cpu_topology(struct feat_fd *ff, FILE *fp)
str += strlen(str) + 1;
}
- if (ph->env.cpu != NULL) {
- for (i = 0; i < cpu_nr; i++)
- fprintf(fp, "# CPU %d: Core ID %d, Socket ID %d\n", i,
- ph->env.cpu[i].core_id, ph->env.cpu[i].socket_id);
- } else
- fprintf(fp, "# Core ID and Socket ID information is not available\n");
+ if (ph->env.nr_sibling_dies) {
+ if (ph->env.cpu != NULL) {
+ for (i = 0; i < cpu_nr; i++)
+ fprintf(fp, "# CPU %d: Core ID %d, "
+ "Die ID %d, Socket ID %d\n",
+ i, ph->env.cpu[i].core_id,
+ ph->env.cpu[i].die_id,
+ ph->env.cpu[i].socket_id);
+ } else
+ fprintf(fp, "# Core ID, Die ID and Socket ID "
+ "information is not available\n");
+ } else {
+ if (ph->env.cpu != NULL) {
+ for (i = 0; i < cpu_nr; i++)
+ fprintf(fp, "# CPU %d: Core ID %d, "
+ "Socket ID %d\n",
+ i, ph->env.cpu[i].core_id,
+ ph->env.cpu[i].socket_id);
+ } else
+ fprintf(fp, "# Core ID and Socket ID "
+ "information is not available\n");
+ }
}
static void print_clockid(struct feat_fd *ff, FILE *fp)
@@ -2214,6 +2261,7 @@ static int process_cpu_topology(struct feat_fd *ff, void *data __maybe_unused)
goto free_cpu;
ph->env.cpu[i].core_id = nr;
+ size += sizeof(u32);
if (do_read_u32(ff, &nr))
goto free_cpu;
@@ -2225,6 +2273,40 @@ static int process_cpu_topology(struct feat_fd *ff, void *data __maybe_unused)
}
ph->env.cpu[i].socket_id = nr;
+ size += sizeof(u32);
+ }
+
+ /*
+ * The header may be from old perf,
+ * which doesn't include die information.
+ */
+ if (ff->size <= size)
+ return 0;
+
+ if (do_read_u32(ff, &nr))
+ return -1;
+
+ ph->env.nr_sibling_dies = nr;
+ size += sizeof(u32);
+
+ for (i = 0; i < nr; i++) {
+ str = do_read_string(ff);
+ if (!str)
+ goto error;
+
+ /* include a NULL character at the end */
+ if (strbuf_add(&sb, str, strlen(str) + 1) < 0)
+ goto error;
+ size += string_size(str);
+ free(str);
+ }
+ ph->env.sibling_dies = strbuf_detach(&sb, NULL);
+
+ for (i = 0; i < (u32)cpu_nr; i++) {
+ if (do_read_u32(ff, &nr))
+ goto free_cpu;
+
+ ph->env.cpu[i].die_id = nr;
}
return 0;
diff --git a/tools/perf/util/hist.c b/tools/perf/util/hist.c
index 7ace7a10054d..fb3271fd420c 100644
--- a/tools/perf/util/hist.c
+++ b/tools/perf/util/hist.c
@@ -2561,7 +2561,7 @@ int __hists__scnprintf_title(struct hists *hists, char *bf, size_t size, bool sh
char unit;
int printed;
const struct dso *dso = hists->dso_filter;
- const struct thread *thread = hists->thread_filter;
+ struct thread *thread = hists->thread_filter;
int socket_id = hists->socket_filter;
unsigned long nr_samples = hists->stats.nr_events[PERF_RECORD_SAMPLE];
u64 nr_events = hists->stats.total_period;
diff --git a/tools/perf/util/intel-pt-decoder/intel-pt-decoder.c b/tools/perf/util/intel-pt-decoder/intel-pt-decoder.c
index 9d189e90fbdc..f8b71bf2bb4c 100644
--- a/tools/perf/util/intel-pt-decoder/intel-pt-decoder.c
+++ b/tools/perf/util/intel-pt-decoder/intel-pt-decoder.c
@@ -95,6 +95,7 @@ struct intel_pt_decoder {
uint64_t *insn_cnt_ptr, uint64_t *ip, uint64_t to_ip,
uint64_t max_insn_cnt, void *data);
bool (*pgd_ip)(uint64_t ip, void *data);
+ int (*lookahead)(void *data, intel_pt_lookahead_cb_t cb, void *cb_data);
void *data;
struct intel_pt_state state;
const unsigned char *buf;
@@ -107,6 +108,7 @@ struct intel_pt_decoder {
bool have_cyc;
bool fixup_last_mtc;
bool have_last_ip;
+ bool in_psb;
enum intel_pt_param_flags flags;
uint64_t pos;
uint64_t last_ip;
@@ -115,6 +117,7 @@ struct intel_pt_decoder {
uint64_t timestamp;
uint64_t tsc_timestamp;
uint64_t ref_timestamp;
+ uint64_t buf_timestamp;
uint64_t sample_timestamp;
uint64_t ret_addr;
uint64_t ctc_timestamp;
@@ -130,6 +133,10 @@ struct intel_pt_decoder {
int mtc_shift;
struct intel_pt_stack stack;
enum intel_pt_pkt_state pkt_state;
+ enum intel_pt_pkt_ctx pkt_ctx;
+ enum intel_pt_pkt_ctx prev_pkt_ctx;
+ enum intel_pt_blk_type blk_type;
+ int blk_type_pos;
struct intel_pt_pkt packet;
struct intel_pt_pkt tnt;
int pkt_step;
@@ -151,6 +158,11 @@ struct intel_pt_decoder {
uint64_t period_mask;
uint64_t period_ticks;
uint64_t last_masked_timestamp;
+ uint64_t tot_cyc_cnt;
+ uint64_t sample_tot_cyc_cnt;
+ uint64_t base_cyc_cnt;
+ uint64_t cyc_cnt_timestamp;
+ double tsc_to_cyc;
bool continuous_period;
bool overflow;
bool set_fup_tx_flags;
@@ -158,6 +170,8 @@ struct intel_pt_decoder {
bool set_fup_mwait;
bool set_fup_pwre;
bool set_fup_exstop;
+ bool set_fup_bep;
+ bool sample_cyc;
unsigned int fup_tx_flags;
unsigned int tx_flags;
uint64_t fup_ptw_payload;
@@ -217,6 +231,7 @@ struct intel_pt_decoder *intel_pt_decoder_new(struct intel_pt_params *params)
decoder->get_trace = params->get_trace;
decoder->walk_insn = params->walk_insn;
decoder->pgd_ip = params->pgd_ip;
+ decoder->lookahead = params->lookahead;
decoder->data = params->data;
decoder->return_compression = params->return_compression;
decoder->branch_enable = params->branch_enable;
@@ -470,7 +485,21 @@ static int intel_pt_bad_packet(struct intel_pt_decoder *decoder)
return -EBADMSG;
}
-static int intel_pt_get_data(struct intel_pt_decoder *decoder)
+static inline void intel_pt_update_sample_time(struct intel_pt_decoder *decoder)
+{
+ decoder->sample_timestamp = decoder->timestamp;
+ decoder->sample_insn_cnt = decoder->timestamp_insn_cnt;
+}
+
+static void intel_pt_reposition(struct intel_pt_decoder *decoder)
+{
+ decoder->ip = 0;
+ decoder->pkt_state = INTEL_PT_STATE_NO_PSB;
+ decoder->timestamp = 0;
+ decoder->have_tma = false;
+}
+
+static int intel_pt_get_data(struct intel_pt_decoder *decoder, bool reposition)
{
struct intel_pt_buffer buffer = { .buf = 0, };
int ret;
@@ -487,12 +516,10 @@ static int intel_pt_get_data(struct intel_pt_decoder *decoder)
intel_pt_log("No more data\n");
return -ENODATA;
}
- if (!buffer.consecutive) {
- decoder->ip = 0;
- decoder->pkt_state = INTEL_PT_STATE_NO_PSB;
+ decoder->buf_timestamp = buffer.ref_timestamp;
+ if (!buffer.consecutive || reposition) {
+ intel_pt_reposition(decoder);
decoder->ref_timestamp = buffer.ref_timestamp;
- decoder->timestamp = 0;
- decoder->have_tma = false;
decoder->state.trace_nr = buffer.trace_nr;
intel_pt_log("Reference timestamp 0x%" PRIx64 "\n",
decoder->ref_timestamp);
@@ -502,10 +529,11 @@ static int intel_pt_get_data(struct intel_pt_decoder *decoder)
return 0;
}
-static int intel_pt_get_next_data(struct intel_pt_decoder *decoder)
+static int intel_pt_get_next_data(struct intel_pt_decoder *decoder,
+ bool reposition)
{
if (!decoder->next_buf)
- return intel_pt_get_data(decoder);
+ return intel_pt_get_data(decoder, reposition);
decoder->buf = decoder->next_buf;
decoder->len = decoder->next_len;
@@ -524,7 +552,7 @@ static int intel_pt_get_split_packet(struct intel_pt_decoder *decoder)
len = decoder->len;
memcpy(buf, decoder->buf, len);
- ret = intel_pt_get_data(decoder);
+ ret = intel_pt_get_data(decoder, false);
if (ret) {
decoder->pos += old_len;
return ret < 0 ? ret : -EINVAL;
@@ -536,7 +564,8 @@ static int intel_pt_get_split_packet(struct intel_pt_decoder *decoder)
memcpy(buf + len, decoder->buf, n);
len += n;
- ret = intel_pt_get_packet(buf, len, &decoder->packet);
+ decoder->prev_pkt_ctx = decoder->pkt_ctx;
+ ret = intel_pt_get_packet(buf, len, &decoder->packet, &decoder->pkt_ctx);
if (ret < (int)old_len) {
decoder->next_buf = decoder->buf;
decoder->next_len = decoder->len;
@@ -571,6 +600,7 @@ static int intel_pt_pkt_lookahead(struct intel_pt_decoder *decoder,
{
struct intel_pt_pkt_info pkt_info;
const unsigned char *buf = decoder->buf;
+ enum intel_pt_pkt_ctx pkt_ctx = decoder->pkt_ctx;
size_t len = decoder->len;
int ret;
@@ -589,7 +619,8 @@ static int intel_pt_pkt_lookahead(struct intel_pt_decoder *decoder,
if (!len)
return INTEL_PT_NEED_MORE_BYTES;
- ret = intel_pt_get_packet(buf, len, &pkt_info.packet);
+ ret = intel_pt_get_packet(buf, len, &pkt_info.packet,
+ &pkt_ctx);
if (!ret)
return INTEL_PT_NEED_MORE_BYTES;
if (ret < 0)
@@ -664,6 +695,10 @@ static int intel_pt_calc_cyc_cb(struct intel_pt_pkt_info *pkt_info)
case INTEL_PT_MNT:
case INTEL_PT_PTWRITE:
case INTEL_PT_PTWRITE_IP:
+ case INTEL_PT_BBP:
+ case INTEL_PT_BIP:
+ case INTEL_PT_BEP:
+ case INTEL_PT_BEP_IP:
return 0;
case INTEL_PT_MTC:
@@ -850,13 +885,14 @@ static int intel_pt_get_next_packet(struct intel_pt_decoder *decoder)
decoder->len -= decoder->pkt_step;
if (!decoder->len) {
- ret = intel_pt_get_next_data(decoder);
+ ret = intel_pt_get_next_data(decoder, false);
if (ret)
return ret;
}
+ decoder->prev_pkt_ctx = decoder->pkt_ctx;
ret = intel_pt_get_packet(decoder->buf, decoder->len,
- &decoder->packet);
+ &decoder->packet, &decoder->pkt_ctx);
if (ret == INTEL_PT_NEED_MORE_BYTES && BITS_PER_LONG == 32 &&
decoder->len < INTEL_PT_PKT_MAX_SZ && !decoder->next_buf) {
ret = intel_pt_get_split_packet(decoder);
@@ -1094,6 +1130,14 @@ static bool intel_pt_fup_event(struct intel_pt_decoder *decoder)
decoder->state.to_ip = 0;
ret = true;
}
+ if (decoder->set_fup_bep) {
+ decoder->set_fup_bep = false;
+ decoder->state.type |= INTEL_PT_BLK_ITEMS;
+ decoder->state.type &= ~INTEL_PT_BRANCH;
+ decoder->state.from_ip = decoder->ip;
+ decoder->state.to_ip = 0;
+ ret = true;
+ }
return ret;
}
@@ -1308,10 +1352,10 @@ static int intel_pt_walk_tnt(struct intel_pt_decoder *decoder)
decoder->ip += intel_pt_insn.length;
return 0;
}
+ decoder->sample_cyc = false;
decoder->ip += intel_pt_insn.length;
if (!decoder->tnt.count) {
- decoder->sample_timestamp = decoder->timestamp;
- decoder->sample_insn_cnt = decoder->timestamp_insn_cnt;
+ intel_pt_update_sample_time(decoder);
return -EAGAIN;
}
decoder->tnt.payload <<= 1;
@@ -1345,6 +1389,21 @@ static int intel_pt_mode_tsx(struct intel_pt_decoder *decoder, bool *no_tip)
return 0;
}
+static uint64_t intel_pt_8b_tsc(uint64_t timestamp, uint64_t ref_timestamp)
+{
+ timestamp |= (ref_timestamp & (0xffULL << 56));
+
+ if (timestamp < ref_timestamp) {
+ if (ref_timestamp - timestamp > (1ULL << 55))
+ timestamp += (1ULL << 56);
+ } else {
+ if (timestamp - ref_timestamp > (1ULL << 55))
+ timestamp -= (1ULL << 56);
+ }
+
+ return timestamp;
+}
+
static void intel_pt_calc_tsc_timestamp(struct intel_pt_decoder *decoder)
{
uint64_t timestamp;
@@ -1352,15 +1411,8 @@ static void intel_pt_calc_tsc_timestamp(struct intel_pt_decoder *decoder)
decoder->have_tma = false;
if (decoder->ref_timestamp) {
- timestamp = decoder->packet.payload |
- (decoder->ref_timestamp & (0xffULL << 56));
- if (timestamp < decoder->ref_timestamp) {
- if (decoder->ref_timestamp - timestamp > (1ULL << 55))
- timestamp += (1ULL << 56);
- } else {
- if (timestamp - decoder->ref_timestamp > (1ULL << 55))
- timestamp -= (1ULL << 56);
- }
+ timestamp = intel_pt_8b_tsc(decoder->packet.payload,
+ decoder->ref_timestamp);
decoder->tsc_timestamp = timestamp;
decoder->timestamp = timestamp;
decoder->ref_timestamp = 0;
@@ -1404,6 +1456,42 @@ static int intel_pt_overflow(struct intel_pt_decoder *decoder)
return -EOVERFLOW;
}
+static inline void intel_pt_mtc_cyc_cnt_pge(struct intel_pt_decoder *decoder)
+{
+ if (decoder->have_cyc)
+ return;
+
+ decoder->cyc_cnt_timestamp = decoder->timestamp;
+ decoder->base_cyc_cnt = decoder->tot_cyc_cnt;
+}
+
+static inline void intel_pt_mtc_cyc_cnt_cbr(struct intel_pt_decoder *decoder)
+{
+ decoder->tsc_to_cyc = decoder->cbr / decoder->max_non_turbo_ratio_fp;
+
+ if (decoder->pge)
+ intel_pt_mtc_cyc_cnt_pge(decoder);
+}
+
+static inline void intel_pt_mtc_cyc_cnt_upd(struct intel_pt_decoder *decoder)
+{
+ uint64_t tot_cyc_cnt, tsc_delta;
+
+ if (decoder->have_cyc)
+ return;
+
+ decoder->sample_cyc = true;
+
+ if (!decoder->pge || decoder->timestamp <= decoder->cyc_cnt_timestamp)
+ return;
+
+ tsc_delta = decoder->timestamp - decoder->cyc_cnt_timestamp;
+ tot_cyc_cnt = tsc_delta * decoder->tsc_to_cyc + decoder->base_cyc_cnt;
+
+ if (tot_cyc_cnt > decoder->tot_cyc_cnt)
+ decoder->tot_cyc_cnt = tot_cyc_cnt;
+}
+
static void intel_pt_calc_tma(struct intel_pt_decoder *decoder)
{
uint32_t ctc = decoder->packet.payload;
@@ -1413,6 +1501,11 @@ static void intel_pt_calc_tma(struct intel_pt_decoder *decoder)
if (!decoder->tsc_ctc_ratio_d)
return;
+ if (decoder->pge && !decoder->in_psb)
+ intel_pt_mtc_cyc_cnt_pge(decoder);
+ else
+ intel_pt_mtc_cyc_cnt_upd(decoder);
+
decoder->last_mtc = (ctc >> decoder->mtc_shift) & 0xff;
decoder->ctc_timestamp = decoder->tsc_timestamp - fc;
if (decoder->tsc_ctc_mult) {
@@ -1468,6 +1561,8 @@ static void intel_pt_calc_mtc_timestamp(struct intel_pt_decoder *decoder)
else
decoder->timestamp = timestamp;
+ intel_pt_mtc_cyc_cnt_upd(decoder);
+
decoder->timestamp_insn_cnt = 0;
decoder->last_mtc = mtc;
@@ -1492,6 +1587,8 @@ static void intel_pt_calc_cbr(struct intel_pt_decoder *decoder)
decoder->cbr = cbr;
decoder->cbr_cyc_to_tsc = decoder->max_non_turbo_ratio_fp / cbr;
+
+ intel_pt_mtc_cyc_cnt_cbr(decoder);
}
static void intel_pt_calc_cyc_timestamp(struct intel_pt_decoder *decoder)
@@ -1501,6 +1598,9 @@ static void intel_pt_calc_cyc_timestamp(struct intel_pt_decoder *decoder)
decoder->have_cyc = true;
decoder->cycle_cnt += decoder->packet.payload;
+ if (decoder->pge)
+ decoder->tot_cyc_cnt += decoder->packet.payload;
+ decoder->sample_cyc = true;
if (!decoder->cyc_ref_timestamp)
return;
@@ -1523,19 +1623,62 @@ static void intel_pt_calc_cyc_timestamp(struct intel_pt_decoder *decoder)
intel_pt_log_to("Setting timestamp", decoder->timestamp);
}
+static void intel_pt_bbp(struct intel_pt_decoder *decoder)
+{
+ if (decoder->prev_pkt_ctx == INTEL_PT_NO_CTX) {
+ memset(decoder->state.items.mask, 0, sizeof(decoder->state.items.mask));
+ decoder->state.items.is_32_bit = false;
+ }
+ decoder->blk_type = decoder->packet.payload;
+ decoder->blk_type_pos = intel_pt_blk_type_pos(decoder->blk_type);
+ if (decoder->blk_type == INTEL_PT_GP_REGS)
+ decoder->state.items.is_32_bit = decoder->packet.count;
+ if (decoder->blk_type_pos < 0) {
+ intel_pt_log("WARNING: Unknown block type %u\n",
+ decoder->blk_type);
+ } else if (decoder->state.items.mask[decoder->blk_type_pos]) {
+ intel_pt_log("WARNING: Duplicate block type %u\n",
+ decoder->blk_type);
+ }
+}
+
+static void intel_pt_bip(struct intel_pt_decoder *decoder)
+{
+ uint32_t id = decoder->packet.count;
+ uint32_t bit = 1 << id;
+ int pos = decoder->blk_type_pos;
+
+ if (pos < 0 || id >= INTEL_PT_BLK_ITEM_ID_CNT) {
+ intel_pt_log("WARNING: Unknown block item %u type %d\n",
+ id, decoder->blk_type);
+ return;
+ }
+
+ if (decoder->state.items.mask[pos] & bit) {
+ intel_pt_log("WARNING: Duplicate block item %u type %d\n",
+ id, decoder->blk_type);
+ }
+
+ decoder->state.items.mask[pos] |= bit;
+ decoder->state.items.val[pos][id] = decoder->packet.payload;
+}
+
/* Walk PSB+ packets when already in sync. */
static int intel_pt_walk_psbend(struct intel_pt_decoder *decoder)
{
int err;
+ decoder->in_psb = true;
+
while (1) {
err = intel_pt_get_next_packet(decoder);
if (err)
- return err;
+ goto out;
switch (decoder->packet.type) {
case INTEL_PT_PSBEND:
- return 0;
+ err = 0;
+ goto out;
case INTEL_PT_TIP_PGD:
case INTEL_PT_TIP_PGE:
@@ -1551,12 +1694,18 @@ static int intel_pt_walk_psbend(struct intel_pt_decoder *decoder)
case INTEL_PT_MWAIT:
case INTEL_PT_PWRE:
case INTEL_PT_PWRX:
+ case INTEL_PT_BBP:
+ case INTEL_PT_BIP:
+ case INTEL_PT_BEP:
+ case INTEL_PT_BEP_IP:
decoder->have_tma = false;
intel_pt_log("ERROR: Unexpected packet\n");
- return -EAGAIN;
+ err = -EAGAIN;
+ goto out;
case INTEL_PT_OVF:
- return intel_pt_overflow(decoder);
+ err = intel_pt_overflow(decoder);
+ goto out;
case INTEL_PT_TSC:
intel_pt_calc_tsc_timestamp(decoder);
@@ -1602,6 +1751,10 @@ static int intel_pt_walk_psbend(struct intel_pt_decoder *decoder)
break;
}
}
+out:
+ decoder->in_psb = false;
+
+ return err;
}
static int intel_pt_walk_fup_tip(struct intel_pt_decoder *decoder)
@@ -1638,6 +1791,10 @@ static int intel_pt_walk_fup_tip(struct intel_pt_decoder *decoder)
case INTEL_PT_MWAIT:
case INTEL_PT_PWRE:
case INTEL_PT_PWRX:
+ case INTEL_PT_BBP:
+ case INTEL_PT_BIP:
+ case INTEL_PT_BEP:
+ case INTEL_PT_BEP_IP:
intel_pt_log("ERROR: Missing TIP after FUP\n");
decoder->pkt_state = INTEL_PT_STATE_ERR3;
decoder->pkt_step = 0;
@@ -1675,6 +1832,7 @@ static int intel_pt_walk_fup_tip(struct intel_pt_decoder *decoder)
decoder->state.to_ip = decoder->ip;
}
decoder->state.type |= INTEL_PT_TRACE_BEGIN;
+ intel_pt_mtc_cyc_cnt_pge(decoder);
return 0;
case INTEL_PT_TIP:
@@ -1745,6 +1903,7 @@ next:
case INTEL_PT_TIP_PGE: {
decoder->pge = true;
+ intel_pt_mtc_cyc_cnt_pge(decoder);
if (decoder->packet.count == 0) {
intel_pt_log_at("Skipping zero TIP.PGE",
decoder->pos);
@@ -1957,6 +2116,33 @@ next:
decoder->state.pwrx_payload = decoder->packet.payload;
return 0;
+ case INTEL_PT_BBP:
+ intel_pt_bbp(decoder);
+ break;
+
+ case INTEL_PT_BIP:
+ intel_pt_bip(decoder);
+ break;
+
+ case INTEL_PT_BEP:
+ decoder->state.type = INTEL_PT_BLK_ITEMS;
+ decoder->state.from_ip = decoder->ip;
+ decoder->state.to_ip = 0;
+ return 0;
+
+ case INTEL_PT_BEP_IP:
+ err = intel_pt_get_next_packet(decoder);
+ if (err)
+ return err;
+ if (decoder->packet.type == INTEL_PT_FUP) {
+ decoder->set_fup_bep = true;
+ no_tip = true;
+ } else {
+ intel_pt_log_at("ERROR: Missing FUP after BEP",
+ decoder->pos);
+ }
+ goto next;
+
default:
return intel_pt_bug(decoder);
}
@@ -1975,10 +2161,12 @@ static int intel_pt_walk_psb(struct intel_pt_decoder *decoder)
{
int err;
+ decoder->in_psb = true;
+
while (1) {
err = intel_pt_get_next_packet(decoder);
if (err)
- return err;
+ goto out;
switch (decoder->packet.type) {
case INTEL_PT_TIP_PGD:
@@ -1993,8 +2181,13 @@ static int intel_pt_walk_psb(struct intel_pt_decoder *decoder)
case INTEL_PT_MWAIT:
case INTEL_PT_PWRE:
case INTEL_PT_PWRX:
+ case INTEL_PT_BBP:
+ case INTEL_PT_BIP:
+ case INTEL_PT_BEP:
+ case INTEL_PT_BEP_IP:
intel_pt_log("ERROR: Unexpected packet\n");
- return -ENOENT;
+ err = -ENOENT;
+ goto out;
case INTEL_PT_FUP:
decoder->pge = true;
@@ -2053,16 +2246,20 @@ static int intel_pt_walk_psb(struct intel_pt_decoder *decoder)
decoder->pkt_state = INTEL_PT_STATE_ERR4;
else
decoder->pkt_state = INTEL_PT_STATE_ERR3;
- return -ENOENT;
+ err = -ENOENT;
+ goto out;
case INTEL_PT_BAD: /* Does not happen */
- return intel_pt_bug(decoder);
+ err = intel_pt_bug(decoder);
+ goto out;
case INTEL_PT_OVF:
- return intel_pt_overflow(decoder);
+ err = intel_pt_overflow(decoder);
+ goto out;
case INTEL_PT_PSBEND:
- return 0;
+ err = 0;
+ goto out;
case INTEL_PT_PSB:
case INTEL_PT_VMCS:
@@ -2072,6 +2269,10 @@ static int intel_pt_walk_psb(struct intel_pt_decoder *decoder)
break;
}
}
+out:
+ decoder->in_psb = false;
+
+ return err;
}
static int intel_pt_walk_to_ip(struct intel_pt_decoder *decoder)
@@ -2086,18 +2287,30 @@ static int intel_pt_walk_to_ip(struct intel_pt_decoder *decoder)
switch (decoder->packet.type) {
case INTEL_PT_TIP_PGD:
decoder->continuous_period = false;
- __fallthrough;
+ decoder->pge = false;
+ if (intel_pt_have_ip(decoder))
+ intel_pt_set_ip(decoder);
+ if (!decoder->ip)
+ break;
+ decoder->state.type |= INTEL_PT_TRACE_END;
+ return 0;
+
case INTEL_PT_TIP_PGE:
+ decoder->pge = true;
+ intel_pt_mtc_cyc_cnt_pge(decoder);
+ if (intel_pt_have_ip(decoder))
+ intel_pt_set_ip(decoder);
+ if (!decoder->ip)
+ break;
+ decoder->state.type |= INTEL_PT_TRACE_BEGIN;
+ return 0;
+
case INTEL_PT_TIP:
- decoder->pge = decoder->packet.type != INTEL_PT_TIP_PGD;
+ decoder->pge = true;
if (intel_pt_have_ip(decoder))
intel_pt_set_ip(decoder);
if (!decoder->ip)
break;
- if (decoder->packet.type == INTEL_PT_TIP_PGE)
- decoder->state.type |= INTEL_PT_TRACE_BEGIN;
- if (decoder->packet.type == INTEL_PT_TIP_PGD)
- decoder->state.type |= INTEL_PT_TRACE_END;
return 0;
case INTEL_PT_FUP:
@@ -2178,6 +2391,10 @@ static int intel_pt_walk_to_ip(struct intel_pt_decoder *decoder)
case INTEL_PT_MWAIT:
case INTEL_PT_PWRE:
case INTEL_PT_PWRX:
+ case INTEL_PT_BBP:
+ case INTEL_PT_BIP:
+ case INTEL_PT_BEP:
+ case INTEL_PT_BEP_IP:
default:
break;
}
@@ -2193,6 +2410,7 @@ static int intel_pt_sync_ip(struct intel_pt_decoder *decoder)
decoder->set_fup_mwait = false;
decoder->set_fup_pwre = false;
decoder->set_fup_exstop = false;
+ decoder->set_fup_bep = false;
if (!decoder->branch_enable) {
decoder->pkt_state = INTEL_PT_STATE_IN_SYNC;
@@ -2250,7 +2468,7 @@ static int intel_pt_get_split_psb(struct intel_pt_decoder *decoder,
decoder->pos += decoder->len;
decoder->len = 0;
- ret = intel_pt_get_next_data(decoder);
+ ret = intel_pt_get_next_data(decoder, false);
if (ret)
return ret;
@@ -2276,7 +2494,7 @@ static int intel_pt_scan_for_psb(struct intel_pt_decoder *decoder)
intel_pt_log("Scanning for PSB\n");
while (1) {
if (!decoder->len) {
- ret = intel_pt_get_next_data(decoder);
+ ret = intel_pt_get_next_data(decoder, false);
if (ret)
return ret;
}
@@ -2404,8 +2622,8 @@ const struct intel_pt_state *intel_pt_decode(struct intel_pt_decoder *decoder)
if (err) {
decoder->state.err = intel_pt_ext_err(err);
decoder->state.from_ip = decoder->ip;
- decoder->sample_timestamp = decoder->timestamp;
- decoder->sample_insn_cnt = decoder->timestamp_insn_cnt;
+ intel_pt_update_sample_time(decoder);
+ decoder->sample_tot_cyc_cnt = decoder->tot_cyc_cnt;
} else {
decoder->state.err = 0;
if (decoder->cbr != decoder->cbr_seen && decoder->state.type) {
@@ -2414,8 +2632,9 @@ const struct intel_pt_state *intel_pt_decode(struct intel_pt_decoder *decoder)
decoder->state.cbr_payload = decoder->cbr_payload;
}
if (intel_pt_sample_time(decoder->pkt_state)) {
- decoder->sample_timestamp = decoder->timestamp;
- decoder->sample_insn_cnt = decoder->timestamp_insn_cnt;
+ intel_pt_update_sample_time(decoder);
+ if (decoder->sample_cyc)
+ decoder->sample_tot_cyc_cnt = decoder->tot_cyc_cnt;
}
}
@@ -2423,6 +2642,7 @@ const struct intel_pt_state *intel_pt_decode(struct intel_pt_decoder *decoder)
decoder->state.est_timestamp = intel_pt_est_timestamp(decoder);
decoder->state.cr3 = decoder->cr3;
decoder->state.tot_insn_cnt = decoder->tot_insn_cnt;
+ decoder->state.tot_cyc_cnt = decoder->sample_tot_cyc_cnt;
return &decoder->state;
}
@@ -2526,11 +2746,12 @@ static unsigned char *intel_pt_last_psb(unsigned char *buf, size_t len)
static bool intel_pt_next_tsc(unsigned char *buf, size_t len, uint64_t *tsc,
size_t *rem)
{
+ enum intel_pt_pkt_ctx ctx = INTEL_PT_NO_CTX;
struct intel_pt_pkt packet;
int ret;
while (len) {
- ret = intel_pt_get_packet(buf, len, &packet);
+ ret = intel_pt_get_packet(buf, len, &packet, &ctx);
if (ret <= 0)
return false;
if (packet.type == INTEL_PT_TSC) {
@@ -2732,3 +2953,131 @@ unsigned char *intel_pt_find_overlap(unsigned char *buf_a, size_t len_a,
return buf_b; /* No overlap */
}
}
+
+/**
+ * struct fast_forward_data - data used by intel_pt_ff_cb().
+ * @timestamp: timestamp to fast forward towards
+ * @buf_timestamp: buffer timestamp of last buffer with trace data earlier than
+ * the fast forward timestamp.
+ */
+struct fast_forward_data {
+ uint64_t timestamp;
+ uint64_t buf_timestamp;
+};
+
+/**
+ * intel_pt_ff_cb - fast forward lookahead callback.
+ * @buffer: Intel PT trace buffer
+ * @data: opaque pointer to fast forward data (struct fast_forward_data)
+ *
+ * Determine if @buffer trace is past the fast forward timestamp.
+ *
+ * Return: 1 (stop lookahead) if @buffer trace is past the fast forward
+ * timestamp, and 0 otherwise.
+ */
+static int intel_pt_ff_cb(struct intel_pt_buffer *buffer, void *data)
+{
+ struct fast_forward_data *d = data;
+ unsigned char *buf;
+ uint64_t tsc;
+ size_t rem;
+ size_t len;
+
+ buf = (unsigned char *)buffer->buf;
+ len = buffer->len;
+
+ if (!intel_pt_next_psb(&buf, &len) ||
+ !intel_pt_next_tsc(buf, len, &tsc, &rem))
+ return 0;
+
+ tsc = intel_pt_8b_tsc(tsc, buffer->ref_timestamp);
+
+ intel_pt_log("Buffer 1st timestamp " x64_fmt " ref timestamp " x64_fmt "\n",
+ tsc, buffer->ref_timestamp);
+
+ /*
+ * If the buffer contains a timestamp earlier that the fast forward
+ * timestamp, then record it, else stop.
+ */
+ if (tsc < d->timestamp)
+ d->buf_timestamp = buffer->ref_timestamp;
+ else
+ return 1;
+
+ return 0;
+}
+
+/**
+ * intel_pt_fast_forward - reposition decoder forwards.
+ * @decoder: Intel PT decoder
+ * @timestamp: timestamp to fast forward towards
+ *
+ * Reposition decoder at the last PSB with a timestamp earlier than @timestamp.
+ *
+ * Return: 0 on success or negative error code on failure.
+ */
+int intel_pt_fast_forward(struct intel_pt_decoder *decoder, uint64_t timestamp)
+{
+ struct fast_forward_data d = { .timestamp = timestamp };
+ unsigned char *buf;
+ size_t len;
+ int err;
+
+ intel_pt_log("Fast forward towards timestamp " x64_fmt "\n", timestamp);
+
+ /* Find buffer timestamp of buffer to fast forward to */
+ err = decoder->lookahead(decoder->data, intel_pt_ff_cb, &d);
+ if (err < 0)
+ return err;
+
+ /* Walk to buffer with same buffer timestamp */
+ if (d.buf_timestamp) {
+ do {
+ decoder->pos += decoder->len;
+ decoder->len = 0;
+ err = intel_pt_get_next_data(decoder, true);
+ /* -ENOLINK means non-consecutive trace */
+ if (err && err != -ENOLINK)
+ return err;
+ } while (decoder->buf_timestamp != d.buf_timestamp);
+ }
+
+ if (!decoder->buf)
+ return 0;
+
+ buf = (unsigned char *)decoder->buf;
+ len = decoder->len;
+
+ if (!intel_pt_next_psb(&buf, &len))
+ return 0;
+
+ /*
+ * Walk PSBs while the PSB timestamp is less than the fast forward
+ * timestamp.
+ */
+ do {
+ uint64_t tsc;
+ size_t rem;
+
+ if (!intel_pt_next_tsc(buf, len, &tsc, &rem))
+ break;
+ tsc = intel_pt_8b_tsc(tsc, decoder->buf_timestamp);
+ /*
+ * A TSC packet can slip past MTC packets but, after fast
+ * forward, decoding starts at the TSC timestamp. That means
+ * the timestamps may not be exactly the same as the timestamps
+ * that would have been decoded without fast forward.
+ */
+ if (tsc < timestamp) {
+ intel_pt_log("Fast forward to next PSB timestamp " x64_fmt "\n", tsc);
+ decoder->pos += decoder->len - len;
+ decoder->buf = buf;
+ decoder->len = len;
+ intel_pt_reposition(decoder);
+ } else {
+ break;
+ }
+ } while (intel_pt_step_psb(&buf, &len));
+
+ return 0;
+}
diff --git a/tools/perf/util/intel-pt-decoder/intel-pt-decoder.h b/tools/perf/util/intel-pt-decoder/intel-pt-decoder.h
index 1e8cfdc7bfab..9957f2ccdca8 100644
--- a/tools/perf/util/intel-pt-decoder/intel-pt-decoder.h
+++ b/tools/perf/util/intel-pt-decoder/intel-pt-decoder.h
@@ -30,6 +30,7 @@ enum intel_pt_sample_type {
INTEL_PT_CBR_CHG = 1 << 8,
INTEL_PT_TRACE_BEGIN = 1 << 9,
INTEL_PT_TRACE_END = 1 << 10,
+ INTEL_PT_BLK_ITEMS = 1 << 11,
};
enum intel_pt_period_type {
@@ -61,6 +62,141 @@ enum intel_pt_param_flags {
INTEL_PT_FUP_WITH_NLIP = 1 << 0,
};
+enum intel_pt_blk_type {
+ INTEL_PT_GP_REGS = 1,
+ INTEL_PT_PEBS_BASIC = 4,
+ INTEL_PT_PEBS_MEM = 5,
+ INTEL_PT_LBR_0 = 8,
+ INTEL_PT_LBR_1 = 9,
+ INTEL_PT_LBR_2 = 10,
+ INTEL_PT_XMM = 16,
+ INTEL_PT_BLK_TYPE_MAX
+};
+
+/*
+ * The block type numbers are not sequential but here they are given sequential
+ * positions to avoid wasting space for array placement.
+ */
+enum intel_pt_blk_type_pos {
+ INTEL_PT_GP_REGS_POS,
+ INTEL_PT_PEBS_BASIC_POS,
+ INTEL_PT_PEBS_MEM_POS,
+ INTEL_PT_LBR_0_POS,
+ INTEL_PT_LBR_1_POS,
+ INTEL_PT_LBR_2_POS,
+ INTEL_PT_XMM_POS,
+ INTEL_PT_BLK_TYPE_CNT
+};
+
+/* Get the array position for a block type */
+static inline int intel_pt_blk_type_pos(enum intel_pt_blk_type blk_type)
+{
+#define BLK_TYPE(bt) [INTEL_PT_##bt] = INTEL_PT_##bt##_POS + 1
+ const int map[INTEL_PT_BLK_TYPE_MAX] = {
+ BLK_TYPE(GP_REGS),
+ BLK_TYPE(PEBS_BASIC),
+ BLK_TYPE(PEBS_MEM),
+ BLK_TYPE(LBR_0),
+ BLK_TYPE(LBR_1),
+ BLK_TYPE(LBR_2),
+ BLK_TYPE(XMM),
+ };
+#undef BLK_TYPE
+
+ return blk_type < INTEL_PT_BLK_TYPE_MAX ? map[blk_type] - 1 : -1;
+}
+
+#define INTEL_PT_BLK_ITEM_ID_CNT 32
+
+/*
+ * Use unions so that the block items can be accessed by name or by array index.
+ * There is an array of 32-bit masks for each block type, which indicate which
+ * values are present. Then arrays of 32 64-bit values for each block type.
+ */
+struct intel_pt_blk_items {
+ union {
+ uint32_t mask[INTEL_PT_BLK_TYPE_CNT];
+ struct {
+ uint32_t has_rflags:1;
+ uint32_t has_rip:1;
+ uint32_t has_rax:1;
+ uint32_t has_rcx:1;
+ uint32_t has_rdx:1;
+ uint32_t has_rbx:1;
+ uint32_t has_rsp:1;
+ uint32_t has_rbp:1;
+ uint32_t has_rsi:1;
+ uint32_t has_rdi:1;
+ uint32_t has_r8:1;
+ uint32_t has_r9:1;
+ uint32_t has_r10:1;
+ uint32_t has_r11:1;
+ uint32_t has_r12:1;
+ uint32_t has_r13:1;
+ uint32_t has_r14:1;
+ uint32_t has_r15:1;
+ uint32_t has_unused_0:14;
+ uint32_t has_ip:1;
+ uint32_t has_applicable_counters:1;
+ uint32_t has_timestamp:1;
+ uint32_t has_unused_1:29;
+ uint32_t has_mem_access_address:1;
+ uint32_t has_mem_aux_info:1;
+ uint32_t has_mem_access_latency:1;
+ uint32_t has_tsx_aux_info:1;
+ uint32_t has_unused_2:28;
+ uint32_t has_lbr_0;
+ uint32_t has_lbr_1;
+ uint32_t has_lbr_2;
+ uint32_t has_xmm;
+ };
+ };
+ union {
+ uint64_t val[INTEL_PT_BLK_TYPE_CNT][INTEL_PT_BLK_ITEM_ID_CNT];
+ struct {
+ struct {
+ uint64_t rflags;
+ uint64_t rip;
+ uint64_t rax;
+ uint64_t rcx;
+ uint64_t rdx;
+ uint64_t rbx;
+ uint64_t rsp;
+ uint64_t rbp;
+ uint64_t rsi;
+ uint64_t rdi;
+ uint64_t r8;
+ uint64_t r9;
+ uint64_t r10;
+ uint64_t r11;
+ uint64_t r12;
+ uint64_t r13;
+ uint64_t r14;
+ uint64_t r15;
+ uint64_t unused_0[INTEL_PT_BLK_ITEM_ID_CNT - 18];
+ };
+ struct {
+ uint64_t ip;
+ uint64_t applicable_counters;
+ uint64_t timestamp;
+ uint64_t unused_1[INTEL_PT_BLK_ITEM_ID_CNT - 3];
+ };
+ struct {
+ uint64_t mem_access_address;
+ uint64_t mem_aux_info;
+ uint64_t mem_access_latency;
+ uint64_t tsx_aux_info;
+ uint64_t unused_2[INTEL_PT_BLK_ITEM_ID_CNT - 4];
+ };
+ uint64_t lbr_0[INTEL_PT_BLK_ITEM_ID_CNT];
+ uint64_t lbr_1[INTEL_PT_BLK_ITEM_ID_CNT];
+ uint64_t lbr_2[INTEL_PT_BLK_ITEM_ID_CNT];
+ uint64_t xmm[INTEL_PT_BLK_ITEM_ID_CNT];
+ };
+ };
+ bool is_32_bit;
+};
+
struct intel_pt_state {
enum intel_pt_sample_type type;
int err;
@@ -68,6 +204,7 @@ struct intel_pt_state {
uint64_t to_ip;
uint64_t cr3;
uint64_t tot_insn_cnt;
+ uint64_t tot_cyc_cnt;
uint64_t timestamp;
uint64_t est_timestamp;
uint64_t trace_nr;
@@ -80,6 +217,7 @@ struct intel_pt_state {
enum intel_pt_insn_op insn_op;
int insn_len;
char insn[INTEL_PT_INSN_BUF_SZ];
+ struct intel_pt_blk_items items;
};
struct intel_pt_insn;
@@ -92,12 +230,15 @@ struct intel_pt_buffer {
uint64_t trace_nr;
};
+typedef int (*intel_pt_lookahead_cb_t)(struct intel_pt_buffer *, void *);
+
struct intel_pt_params {
int (*get_trace)(struct intel_pt_buffer *buffer, void *data);
int (*walk_insn)(struct intel_pt_insn *intel_pt_insn,
uint64_t *insn_cnt_ptr, uint64_t *ip, uint64_t to_ip,
uint64_t max_insn_cnt, void *data);
bool (*pgd_ip)(uint64_t ip, void *data);
+ int (*lookahead)(void *data, intel_pt_lookahead_cb_t cb, void *cb_data);
void *data;
bool return_compression;
bool branch_enable;
@@ -117,6 +258,8 @@ void intel_pt_decoder_free(struct intel_pt_decoder *decoder);
const struct intel_pt_state *intel_pt_decode(struct intel_pt_decoder *decoder);
+int intel_pt_fast_forward(struct intel_pt_decoder *decoder, uint64_t timestamp);
+
unsigned char *intel_pt_find_overlap(unsigned char *buf_a, size_t len_a,
unsigned char *buf_b, size_t len_b,
bool have_tsc, bool *consecutive);
diff --git a/tools/perf/util/intel-pt-decoder/intel-pt-pkt-decoder.c b/tools/perf/util/intel-pt-decoder/intel-pt-pkt-decoder.c
index 605fce537d80..0ccf10a0bf44 100644
--- a/tools/perf/util/intel-pt-decoder/intel-pt-pkt-decoder.c
+++ b/tools/perf/util/intel-pt-decoder/intel-pt-pkt-decoder.c
@@ -62,6 +62,10 @@ static const char * const packet_name[] = {
[INTEL_PT_MWAIT] = "MWAIT",
[INTEL_PT_PWRE] = "PWRE",
[INTEL_PT_PWRX] = "PWRX",
+ [INTEL_PT_BBP] = "BBP",
+ [INTEL_PT_BIP] = "BIP",
+ [INTEL_PT_BEP] = "BEP",
+ [INTEL_PT_BEP_IP] = "BEP",
};
const char *intel_pt_pkt_name(enum intel_pt_pkt_type type)
@@ -280,6 +284,55 @@ static int intel_pt_get_pwrx(const unsigned char *buf, size_t len,
return 7;
}
+static int intel_pt_get_bbp(const unsigned char *buf, size_t len,
+ struct intel_pt_pkt *packet)
+{
+ if (len < 3)
+ return INTEL_PT_NEED_MORE_BYTES;
+ packet->type = INTEL_PT_BBP;
+ packet->count = buf[2] >> 7;
+ packet->payload = buf[2] & 0x1f;
+ return 3;
+}
+
+static int intel_pt_get_bip_4(const unsigned char *buf, size_t len,
+ struct intel_pt_pkt *packet)
+{
+ if (len < 5)
+ return INTEL_PT_NEED_MORE_BYTES;
+ packet->type = INTEL_PT_BIP;
+ packet->count = buf[0] >> 3;
+ memcpy_le64(&packet->payload, buf + 1, 4);
+ return 5;
+}
+
+static int intel_pt_get_bip_8(const unsigned char *buf, size_t len,
+ struct intel_pt_pkt *packet)
+{
+ if (len < 9)
+ return INTEL_PT_NEED_MORE_BYTES;
+ packet->type = INTEL_PT_BIP;
+ packet->count = buf[0] >> 3;
+ memcpy_le64(&packet->payload, buf + 1, 8);
+ return 9;
+}
+
+static int intel_pt_get_bep(size_t len, struct intel_pt_pkt *packet)
+{
+ if (len < 2)
+ return INTEL_PT_NEED_MORE_BYTES;
+ packet->type = INTEL_PT_BEP;
+ return 2;
+}
+
+static int intel_pt_get_bep_ip(size_t len, struct intel_pt_pkt *packet)
+{
+ if (len < 2)
+ return INTEL_PT_NEED_MORE_BYTES;
+ packet->type = INTEL_PT_BEP_IP;
+ return 2;
+}
+
static int intel_pt_get_ext(const unsigned char *buf, size_t len,
struct intel_pt_pkt *packet)
{
@@ -320,6 +373,12 @@ static int intel_pt_get_ext(const unsigned char *buf, size_t len,
return intel_pt_get_pwre(buf, len, packet);
case 0xA2: /* PWRX */
return intel_pt_get_pwrx(buf, len, packet);
+ case 0x63: /* BBP */
+ return intel_pt_get_bbp(buf, len, packet);
+ case 0x33: /* BEP no IP */
+ return intel_pt_get_bep(len, packet);
+ case 0xb3: /* BEP with IP */
+ return intel_pt_get_bep_ip(len, packet);
default:
return INTEL_PT_BAD_PACKET;
}
@@ -468,7 +527,8 @@ static int intel_pt_get_mtc(const unsigned char *buf, size_t len,
}
static int intel_pt_do_get_packet(const unsigned char *buf, size_t len,
- struct intel_pt_pkt *packet)
+ struct intel_pt_pkt *packet,
+ enum intel_pt_pkt_ctx ctx)
{
unsigned int byte;
@@ -478,6 +538,22 @@ static int intel_pt_do_get_packet(const unsigned char *buf, size_t len,
return INTEL_PT_NEED_MORE_BYTES;
byte = buf[0];
+
+ switch (ctx) {
+ case INTEL_PT_NO_CTX:
+ break;
+ case INTEL_PT_BLK_4_CTX:
+ if ((byte & 0x7) == 4)
+ return intel_pt_get_bip_4(buf, len, packet);
+ break;
+ case INTEL_PT_BLK_8_CTX:
+ if ((byte & 0x7) == 4)
+ return intel_pt_get_bip_8(buf, len, packet);
+ break;
+ default:
+ break;
+ };
+
if (!(byte & BIT(0))) {
if (byte == 0)
return intel_pt_get_pad(packet);
@@ -516,15 +592,65 @@ static int intel_pt_do_get_packet(const unsigned char *buf, size_t len,
}
}
+void intel_pt_upd_pkt_ctx(const struct intel_pt_pkt *packet,
+ enum intel_pt_pkt_ctx *ctx)
+{
+ switch (packet->type) {
+ case INTEL_PT_BAD:
+ case INTEL_PT_PAD:
+ case INTEL_PT_TSC:
+ case INTEL_PT_TMA:
+ case INTEL_PT_MTC:
+ case INTEL_PT_FUP:
+ case INTEL_PT_CYC:
+ case INTEL_PT_CBR:
+ case INTEL_PT_MNT:
+ case INTEL_PT_EXSTOP:
+ case INTEL_PT_EXSTOP_IP:
+ case INTEL_PT_PWRE:
+ case INTEL_PT_PWRX:
+ case INTEL_PT_BIP:
+ break;
+ case INTEL_PT_TNT:
+ case INTEL_PT_TIP:
+ case INTEL_PT_TIP_PGD:
+ case INTEL_PT_TIP_PGE:
+ case INTEL_PT_MODE_EXEC:
+ case INTEL_PT_MODE_TSX:
+ case INTEL_PT_PIP:
+ case INTEL_PT_OVF:
+ case INTEL_PT_VMCS:
+ case INTEL_PT_TRACESTOP:
+ case INTEL_PT_PSB:
+ case INTEL_PT_PSBEND:
+ case INTEL_PT_PTWRITE:
+ case INTEL_PT_PTWRITE_IP:
+ case INTEL_PT_MWAIT:
+ case INTEL_PT_BEP:
+ case INTEL_PT_BEP_IP:
+ *ctx = INTEL_PT_NO_CTX;
+ break;
+ case INTEL_PT_BBP:
+ if (packet->count)
+ *ctx = INTEL_PT_BLK_4_CTX;
+ else
+ *ctx = INTEL_PT_BLK_8_CTX;
+ break;
+ default:
+ break;
+ }
+}
+
int intel_pt_get_packet(const unsigned char *buf, size_t len,
- struct intel_pt_pkt *packet)
+ struct intel_pt_pkt *packet, enum intel_pt_pkt_ctx *ctx)
{
int ret;
- ret = intel_pt_do_get_packet(buf, len, packet);
+ ret = intel_pt_do_get_packet(buf, len, packet, *ctx);
if (ret > 0) {
while (ret < 8 && len > (size_t)ret && !buf[ret])
ret += 1;
+ intel_pt_upd_pkt_ctx(packet, ctx);
}
return ret;
}
@@ -602,8 +728,10 @@ int intel_pt_pkt_desc(const struct intel_pt_pkt *packet, char *buf,
return snprintf(buf, buf_len, "%s 0x%llx IP:0", name, payload);
case INTEL_PT_PTWRITE_IP:
return snprintf(buf, buf_len, "%s 0x%llx IP:1", name, payload);
+ case INTEL_PT_BEP:
case INTEL_PT_EXSTOP:
return snprintf(buf, buf_len, "%s IP:0", name);
+ case INTEL_PT_BEP_IP:
case INTEL_PT_EXSTOP_IP:
return snprintf(buf, buf_len, "%s IP:1", name);
case INTEL_PT_MWAIT:
@@ -621,6 +749,12 @@ int intel_pt_pkt_desc(const struct intel_pt_pkt *packet, char *buf,
(unsigned int)((payload >> 4) & 0xf),
(unsigned int)(payload & 0xf),
(unsigned int)((payload >> 8) & 0xf));
+ case INTEL_PT_BBP:
+ return snprintf(buf, buf_len, "%s SZ %s-byte Type 0x%llx",
+ name, packet->count ? "4" : "8", payload);
+ case INTEL_PT_BIP:
+ return snprintf(buf, buf_len, "%s ID 0x%02x Value 0x%llx",
+ name, packet->count, payload);
default:
break;
}
diff --git a/tools/perf/util/intel-pt-decoder/intel-pt-pkt-decoder.h b/tools/perf/util/intel-pt-decoder/intel-pt-pkt-decoder.h
index a7aefaa08588..17ca9b56d72f 100644
--- a/tools/perf/util/intel-pt-decoder/intel-pt-pkt-decoder.h
+++ b/tools/perf/util/intel-pt-decoder/intel-pt-pkt-decoder.h
@@ -50,6 +50,10 @@ enum intel_pt_pkt_type {
INTEL_PT_MWAIT,
INTEL_PT_PWRE,
INTEL_PT_PWRX,
+ INTEL_PT_BBP,
+ INTEL_PT_BIP,
+ INTEL_PT_BEP,
+ INTEL_PT_BEP_IP,
};
struct intel_pt_pkt {
@@ -58,10 +62,25 @@ struct intel_pt_pkt {
uint64_t payload;
};
+/*
+ * Decoding of BIP packets conflicts with single-byte TNT packets. Since BIP
+ * packets only occur in the context of a block (i.e. between BBP and BEP), that
+ * context must be recorded and passed to the packet decoder.
+ */
+enum intel_pt_pkt_ctx {
+ INTEL_PT_NO_CTX, /* BIP packets are invalid */
+ INTEL_PT_BLK_4_CTX, /* 4-byte BIP packets */
+ INTEL_PT_BLK_8_CTX, /* 8-byte BIP packets */
+};
+
const char *intel_pt_pkt_name(enum intel_pt_pkt_type);
int intel_pt_get_packet(const unsigned char *buf, size_t len,
- struct intel_pt_pkt *packet);
+ struct intel_pt_pkt *packet,
+ enum intel_pt_pkt_ctx *ctx);
+
+void intel_pt_upd_pkt_ctx(const struct intel_pt_pkt *packet,
+ enum intel_pt_pkt_ctx *ctx);
int intel_pt_pkt_desc(const struct intel_pt_pkt *packet, char *buf, size_t len);
diff --git a/tools/perf/util/intel-pt.c b/tools/perf/util/intel-pt.c
index d6f1b2a03f9b..550db6e77968 100644
--- a/tools/perf/util/intel-pt.c
+++ b/tools/perf/util/intel-pt.c
@@ -33,6 +33,9 @@
#include "tsc.h"
#include "intel-pt.h"
#include "config.h"
+#include "time-utils.h"
+
+#include "../arch/x86/include/uapi/asm/perf_regs.h"
#include "intel-pt-decoder/intel-pt-log.h"
#include "intel-pt-decoder/intel-pt-decoder.h"
@@ -41,6 +44,11 @@
#define MAX_TIMESTAMP (~0ULL)
+struct range {
+ u64 start;
+ u64 end;
+};
+
struct intel_pt {
struct auxtrace auxtrace;
struct auxtrace_queues queues;
@@ -95,6 +103,9 @@ struct intel_pt {
u64 pwrx_id;
u64 cbr_id;
+ bool sample_pebs;
+ struct perf_evsel *pebs_evsel;
+
u64 tsc_bit;
u64 mtc_bit;
u64 mtc_freq_bits;
@@ -109,6 +120,9 @@ struct intel_pt {
char *filter;
struct addr_filters filts;
+
+ struct range *time_ranges;
+ unsigned int range_cnt;
};
enum switch_state {
@@ -145,9 +159,18 @@ struct intel_pt_queue {
bool have_sample;
u64 time;
u64 timestamp;
+ u64 sel_timestamp;
+ bool sel_start;
+ unsigned int sel_idx;
u32 flags;
u16 insn_len;
u64 last_insn_cnt;
+ u64 ipc_insn_cnt;
+ u64 ipc_cyc_cnt;
+ u64 last_in_insn_cnt;
+ u64 last_in_cyc_cnt;
+ u64 last_br_insn_cnt;
+ u64 last_br_cyc_cnt;
char insn[INTEL_PT_INSN_BUF_SZ];
};
@@ -159,13 +182,14 @@ static void intel_pt_dump(struct intel_pt *pt __maybe_unused,
int ret, pkt_len, i;
char desc[INTEL_PT_PKT_DESC_MAX];
const char *color = PERF_COLOR_BLUE;
+ enum intel_pt_pkt_ctx ctx = INTEL_PT_NO_CTX;
color_fprintf(stdout, color,
". ... Intel Processor Trace data: size %zu bytes\n",
len);
while (len) {
- ret = intel_pt_get_packet(buf, len, &packet);
+ ret = intel_pt_get_packet(buf, len, &packet, &ctx);
if (ret > 0)
pkt_len = ret;
else
@@ -224,32 +248,13 @@ static int intel_pt_do_fix_overlap(struct intel_pt *pt, struct auxtrace_buffer *
return 0;
}
-/* This function assumes data is processed sequentially only */
-static int intel_pt_get_trace(struct intel_pt_buffer *b, void *data)
+static int intel_pt_get_buffer(struct intel_pt_queue *ptq,
+ struct auxtrace_buffer *buffer,
+ struct auxtrace_buffer *old_buffer,
+ struct intel_pt_buffer *b)
{
- struct intel_pt_queue *ptq = data;
- struct auxtrace_buffer *buffer = ptq->buffer;
- struct auxtrace_buffer *old_buffer = ptq->old_buffer;
- struct auxtrace_queue *queue;
bool might_overlap;
- if (ptq->stop) {
- b->len = 0;
- return 0;
- }
-
- queue = &ptq->pt->queues.queue_array[ptq->queue_nr];
-
- buffer = auxtrace_buffer__next(queue, buffer);
- if (!buffer) {
- if (old_buffer)
- auxtrace_buffer__drop_data(old_buffer);
- b->len = 0;
- return 0;
- }
-
- ptq->buffer = buffer;
-
if (!buffer->data) {
int fd = perf_data__fd(ptq->pt->session->data);
@@ -279,6 +284,95 @@ static int intel_pt_get_trace(struct intel_pt_buffer *b, void *data)
b->consecutive = true;
}
+ return 0;
+}
+
+/* Do not drop buffers with references - refer intel_pt_get_trace() */
+static void intel_pt_lookahead_drop_buffer(struct intel_pt_queue *ptq,
+ struct auxtrace_buffer *buffer)
+{
+ if (!buffer || buffer == ptq->buffer || buffer == ptq->old_buffer)
+ return;
+
+ auxtrace_buffer__drop_data(buffer);
+}
+
+/* Must be serialized with respect to intel_pt_get_trace() */
+static int intel_pt_lookahead(void *data, intel_pt_lookahead_cb_t cb,
+ void *cb_data)
+{
+ struct intel_pt_queue *ptq = data;
+ struct auxtrace_buffer *buffer = ptq->buffer;
+ struct auxtrace_buffer *old_buffer = ptq->old_buffer;
+ struct auxtrace_queue *queue;
+ int err = 0;
+
+ queue = &ptq->pt->queues.queue_array[ptq->queue_nr];
+
+ while (1) {
+ struct intel_pt_buffer b = { .len = 0 };
+
+ buffer = auxtrace_buffer__next(queue, buffer);
+ if (!buffer)
+ break;
+
+ err = intel_pt_get_buffer(ptq, buffer, old_buffer, &b);
+ if (err)
+ break;
+
+ if (b.len) {
+ intel_pt_lookahead_drop_buffer(ptq, old_buffer);
+ old_buffer = buffer;
+ } else {
+ intel_pt_lookahead_drop_buffer(ptq, buffer);
+ continue;
+ }
+
+ err = cb(&b, cb_data);
+ if (err)
+ break;
+ }
+
+ if (buffer != old_buffer)
+ intel_pt_lookahead_drop_buffer(ptq, buffer);
+ intel_pt_lookahead_drop_buffer(ptq, old_buffer);
+
+ return err;
+}
+
+/*
+ * This function assumes data is processed sequentially only.
+ * Must be serialized with respect to intel_pt_lookahead()
+ */
+static int intel_pt_get_trace(struct intel_pt_buffer *b, void *data)
+{
+ struct intel_pt_queue *ptq = data;
+ struct auxtrace_buffer *buffer = ptq->buffer;
+ struct auxtrace_buffer *old_buffer = ptq->old_buffer;
+ struct auxtrace_queue *queue;
+ int err;
+
+ if (ptq->stop) {
+ b->len = 0;
+ return 0;
+ }
+
+ queue = &ptq->pt->queues.queue_array[ptq->queue_nr];
+
+ buffer = auxtrace_buffer__next(queue, buffer);
+ if (!buffer) {
+ if (old_buffer)
+ auxtrace_buffer__drop_data(old_buffer);
+ b->len = 0;
+ return 0;
+ }
+
+ ptq->buffer = buffer;
+
+ err = intel_pt_get_buffer(ptq, buffer, old_buffer, b);
+ if (err)
+ return err;
+
if (ptq->step_through_buffers)
ptq->stop = true;
@@ -798,6 +892,7 @@ static struct intel_pt_queue *intel_pt_alloc_queue(struct intel_pt *pt,
params.get_trace = intel_pt_get_trace;
params.walk_insn = intel_pt_walk_next_insn;
+ params.lookahead = intel_pt_lookahead;
params.data = ptq;
params.return_compression = intel_pt_return_compression(pt);
params.branch_enable = intel_pt_branch_enable(pt);
@@ -921,6 +1016,23 @@ static void intel_pt_sample_flags(struct intel_pt_queue *ptq)
ptq->flags |= PERF_IP_FLAG_TRACE_END;
}
+static void intel_pt_setup_time_range(struct intel_pt *pt,
+ struct intel_pt_queue *ptq)
+{
+ if (!pt->range_cnt)
+ return;
+
+ ptq->sel_timestamp = pt->time_ranges[0].start;
+ ptq->sel_idx = 0;
+
+ if (ptq->sel_timestamp) {
+ ptq->sel_start = true;
+ } else {
+ ptq->sel_timestamp = pt->time_ranges[0].end;
+ ptq->sel_start = false;
+ }
+}
+
static int intel_pt_setup_queue(struct intel_pt *pt,
struct auxtrace_queue *queue,
unsigned int queue_nr)
@@ -945,6 +1057,8 @@ static int intel_pt_setup_queue(struct intel_pt *pt,
ptq->step_through_buffers = true;
ptq->sync_switch = pt->sync_switch;
+
+ intel_pt_setup_time_range(pt, ptq);
}
if (!ptq->on_heap &&
@@ -959,6 +1073,14 @@ static int intel_pt_setup_queue(struct intel_pt *pt,
intel_pt_log("queue %u getting timestamp\n", queue_nr);
intel_pt_log("queue %u decoding cpu %d pid %d tid %d\n",
queue_nr, ptq->cpu, ptq->pid, ptq->tid);
+
+ if (ptq->sel_start && ptq->sel_timestamp) {
+ ret = intel_pt_fast_forward(ptq->decoder,
+ ptq->sel_timestamp);
+ if (ret)
+ return ret;
+ }
+
while (1) {
state = intel_pt_decode(ptq->decoder);
if (state->err) {
@@ -978,6 +1100,9 @@ static int intel_pt_setup_queue(struct intel_pt *pt,
queue_nr, ptq->timestamp);
ptq->state = state;
ptq->have_sample = true;
+ if (ptq->sel_start && ptq->sel_timestamp &&
+ ptq->timestamp < ptq->sel_timestamp)
+ ptq->have_sample = false;
intel_pt_sample_flags(ptq);
ret = auxtrace_heap__add(&pt->heap, queue_nr, ptq->timestamp);
if (ret)
@@ -1059,28 +1184,37 @@ static inline bool intel_pt_skip_event(struct intel_pt *pt)
pt->num_events++ < pt->synth_opts.initial_skip;
}
+static void intel_pt_prep_a_sample(struct intel_pt_queue *ptq,
+ union perf_event *event,
+ struct perf_sample *sample)
+{
+ event->sample.header.type = PERF_RECORD_SAMPLE;
+ event->sample.header.size = sizeof(struct perf_event_header);
+
+ sample->pid = ptq->pid;
+ sample->tid = ptq->tid;
+ sample->cpu = ptq->cpu;
+ sample->insn_len = ptq->insn_len;
+ memcpy(sample->insn, ptq->insn, INTEL_PT_INSN_BUF_SZ);
+}
+
static void intel_pt_prep_b_sample(struct intel_pt *pt,
struct intel_pt_queue *ptq,
union perf_event *event,
struct perf_sample *sample)
{
+ intel_pt_prep_a_sample(ptq, event, sample);
+
if (!pt->timeless_decoding)
sample->time = tsc_to_perf_time(ptq->timestamp, &pt->tc);
sample->ip = ptq->state->from_ip;
sample->cpumode = intel_pt_cpumode(pt, sample->ip);
- sample->pid = ptq->pid;
- sample->tid = ptq->tid;
sample->addr = ptq->state->to_ip;
sample->period = 1;
- sample->cpu = ptq->cpu;
sample->flags = ptq->flags;
- sample->insn_len = ptq->insn_len;
- memcpy(sample->insn, ptq->insn, INTEL_PT_INSN_BUF_SZ);
- event->sample.header.type = PERF_RECORD_SAMPLE;
event->sample.header.misc = sample->cpumode;
- event->sample.header.size = sizeof(struct perf_event_header);
}
static int intel_pt_inject_event(union perf_event *event,
@@ -1153,6 +1287,13 @@ static int intel_pt_synth_branch_sample(struct intel_pt_queue *ptq)
sample.branch_stack = (struct branch_stack *)&dummy_bs;
}
+ sample.cyc_cnt = ptq->ipc_cyc_cnt - ptq->last_br_cyc_cnt;
+ if (sample.cyc_cnt) {
+ sample.insn_cnt = ptq->ipc_insn_cnt - ptq->last_br_insn_cnt;
+ ptq->last_br_insn_cnt = ptq->ipc_insn_cnt;
+ ptq->last_br_cyc_cnt = ptq->ipc_cyc_cnt;
+ }
+
return intel_pt_deliver_synth_b_event(pt, event, &sample,
pt->branches_sample_type);
}
@@ -1208,6 +1349,13 @@ static int intel_pt_synth_instruction_sample(struct intel_pt_queue *ptq)
sample.stream_id = ptq->pt->instructions_id;
sample.period = ptq->state->tot_insn_cnt - ptq->last_insn_cnt;
+ sample.cyc_cnt = ptq->ipc_cyc_cnt - ptq->last_in_cyc_cnt;
+ if (sample.cyc_cnt) {
+ sample.insn_cnt = ptq->ipc_insn_cnt - ptq->last_in_insn_cnt;
+ ptq->last_in_insn_cnt = ptq->ipc_insn_cnt;
+ ptq->last_in_cyc_cnt = ptq->ipc_cyc_cnt;
+ }
+
ptq->last_insn_cnt = ptq->state->tot_insn_cnt;
return intel_pt_deliver_synth_event(pt, ptq, event, &sample,
@@ -1401,6 +1549,261 @@ static int intel_pt_synth_pwrx_sample(struct intel_pt_queue *ptq)
pt->pwr_events_sample_type);
}
+/*
+ * PEBS gp_regs array indexes plus 1 so that 0 means not present. Refer
+ * intel_pt_add_gp_regs().
+ */
+static const int pebs_gp_regs[] = {
+ [PERF_REG_X86_FLAGS] = 1,
+ [PERF_REG_X86_IP] = 2,
+ [PERF_REG_X86_AX] = 3,
+ [PERF_REG_X86_CX] = 4,
+ [PERF_REG_X86_DX] = 5,
+ [PERF_REG_X86_BX] = 6,
+ [PERF_REG_X86_SP] = 7,
+ [PERF_REG_X86_BP] = 8,
+ [PERF_REG_X86_SI] = 9,
+ [PERF_REG_X86_DI] = 10,
+ [PERF_REG_X86_R8] = 11,
+ [PERF_REG_X86_R9] = 12,
+ [PERF_REG_X86_R10] = 13,
+ [PERF_REG_X86_R11] = 14,
+ [PERF_REG_X86_R12] = 15,
+ [PERF_REG_X86_R13] = 16,
+ [PERF_REG_X86_R14] = 17,
+ [PERF_REG_X86_R15] = 18,
+};
+
+static u64 *intel_pt_add_gp_regs(struct regs_dump *intr_regs, u64 *pos,
+ const struct intel_pt_blk_items *items,
+ u64 regs_mask)
+{
+ const u64 *gp_regs = items->val[INTEL_PT_GP_REGS_POS];
+ u32 mask = items->mask[INTEL_PT_GP_REGS_POS];
+ u32 bit;
+ int i;
+
+ for (i = 0, bit = 1; i < PERF_REG_X86_64_MAX; i++, bit <<= 1) {
+ /* Get the PEBS gp_regs array index */
+ int n = pebs_gp_regs[i] - 1;
+
+ if (n < 0)
+ continue;
+ /*
+ * Add only registers that were requested (i.e. 'regs_mask') and
+ * that were provided (i.e. 'mask'), and update the resulting
+ * mask (i.e. 'intr_regs->mask') accordingly.
+ */
+ if (mask & 1 << n && regs_mask & bit) {
+ intr_regs->mask |= bit;
+ *pos++ = gp_regs[n];
+ }
+ }
+
+ return pos;
+}
+
+#ifndef PERF_REG_X86_XMM0
+#define PERF_REG_X86_XMM0 32
+#endif
+
+static void intel_pt_add_xmm(struct regs_dump *intr_regs, u64 *pos,
+ const struct intel_pt_blk_items *items,
+ u64 regs_mask)
+{
+ u32 mask = items->has_xmm & (regs_mask >> PERF_REG_X86_XMM0);
+ const u64 *xmm = items->xmm;
+
+ /*
+ * If there are any XMM registers, then there should be all of them.
+ * Nevertheless, follow the logic to add only registers that were
+ * requested (i.e. 'regs_mask') and that were provided (i.e. 'mask'),
+ * and update the resulting mask (i.e. 'intr_regs->mask') accordingly.
+ */
+ intr_regs->mask |= (u64)mask << PERF_REG_X86_XMM0;
+
+ for (; mask; mask >>= 1, xmm++) {
+ if (mask & 1)
+ *pos++ = *xmm;
+ }
+}
+
+#define LBR_INFO_MISPRED (1ULL << 63)
+#define LBR_INFO_IN_TX (1ULL << 62)
+#define LBR_INFO_ABORT (1ULL << 61)
+#define LBR_INFO_CYCLES 0xffff
+
+/* Refer kernel's intel_pmu_store_pebs_lbrs() */
+static u64 intel_pt_lbr_flags(u64 info)
+{
+ union {
+ struct branch_flags flags;
+ u64 result;
+ } u = {
+ .flags = {
+ .mispred = !!(info & LBR_INFO_MISPRED),
+ .predicted = !(info & LBR_INFO_MISPRED),
+ .in_tx = !!(info & LBR_INFO_IN_TX),
+ .abort = !!(info & LBR_INFO_ABORT),
+ .cycles = info & LBR_INFO_CYCLES,
+ }
+ };
+
+ return u.result;
+}
+
+static void intel_pt_add_lbrs(struct branch_stack *br_stack,
+ const struct intel_pt_blk_items *items)
+{
+ u64 *to;
+ int i;
+
+ br_stack->nr = 0;
+
+ to = &br_stack->entries[0].from;
+
+ for (i = INTEL_PT_LBR_0_POS; i <= INTEL_PT_LBR_2_POS; i++) {
+ u32 mask = items->mask[i];
+ const u64 *from = items->val[i];
+
+ for (; mask; mask >>= 3, from += 3) {
+ if ((mask & 7) == 7) {
+ *to++ = from[0];
+ *to++ = from[1];
+ *to++ = intel_pt_lbr_flags(from[2]);
+ br_stack->nr += 1;
+ }
+ }
+ }
+}
+
+/* INTEL_PT_LBR_0, INTEL_PT_LBR_1 and INTEL_PT_LBR_2 */
+#define LBRS_MAX (INTEL_PT_BLK_ITEM_ID_CNT * 3)
+
+static int intel_pt_synth_pebs_sample(struct intel_pt_queue *ptq)
+{
+ const struct intel_pt_blk_items *items = &ptq->state->items;
+ struct perf_sample sample = { .ip = 0, };
+ union perf_event *event = ptq->event_buf;
+ struct intel_pt *pt = ptq->pt;
+ struct perf_evsel *evsel = pt->pebs_evsel;
+ u64 sample_type = evsel->attr.sample_type;
+ u64 id = evsel->id[0];
+ u8 cpumode;
+
+ if (intel_pt_skip_event(pt))
+ return 0;
+
+ intel_pt_prep_a_sample(ptq, event, &sample);
+
+ sample.id = id;
+ sample.stream_id = id;
+
+ if (!evsel->attr.freq)
+ sample.period = evsel->attr.sample_period;
+
+ /* No support for non-zero CS base */
+ if (items->has_ip)
+ sample.ip = items->ip;
+ else if (items->has_rip)
+ sample.ip = items->rip;
+ else
+ sample.ip = ptq->state->from_ip;
+
+ /* No support for guest mode at this time */
+ cpumode = sample.ip < ptq->pt->kernel_start ?
+ PERF_RECORD_MISC_USER :
+ PERF_RECORD_MISC_KERNEL;
+
+ event->sample.header.misc = cpumode | PERF_RECORD_MISC_EXACT_IP;
+
+ sample.cpumode = cpumode;
+
+ if (sample_type & PERF_SAMPLE_TIME) {
+ u64 timestamp = 0;
+
+ if (items->has_timestamp)
+ timestamp = items->timestamp;
+ else if (!pt->timeless_decoding)
+ timestamp = ptq->timestamp;
+ if (timestamp)
+ sample.time = tsc_to_perf_time(timestamp, &pt->tc);
+ }
+
+ if (sample_type & PERF_SAMPLE_CALLCHAIN &&
+ pt->synth_opts.callchain) {
+ thread_stack__sample(ptq->thread, ptq->cpu, ptq->chain,
+ pt->synth_opts.callchain_sz, sample.ip,
+ pt->kernel_start);
+ sample.callchain = ptq->chain;
+ }
+
+ if (sample_type & PERF_SAMPLE_REGS_INTR &&
+ items->mask[INTEL_PT_GP_REGS_POS]) {
+ u64 regs[sizeof(sample.intr_regs.mask)];
+ u64 regs_mask = evsel->attr.sample_regs_intr;
+ u64 *pos;
+
+ sample.intr_regs.abi = items->is_32_bit ?
+ PERF_SAMPLE_REGS_ABI_32 :
+ PERF_SAMPLE_REGS_ABI_64;
+ sample.intr_regs.regs = regs;
+
+ pos = intel_pt_add_gp_regs(&sample.intr_regs, regs, items, regs_mask);
+
+ intel_pt_add_xmm(&sample.intr_regs, pos, items, regs_mask);
+ }
+
+ if (sample_type & PERF_SAMPLE_BRANCH_STACK) {
+ struct {
+ struct branch_stack br_stack;
+ struct branch_entry entries[LBRS_MAX];
+ } br;
+
+ if (items->mask[INTEL_PT_LBR_0_POS] ||
+ items->mask[INTEL_PT_LBR_1_POS] ||
+ items->mask[INTEL_PT_LBR_2_POS]) {
+ intel_pt_add_lbrs(&br.br_stack, items);
+ sample.branch_stack = &br.br_stack;
+ } else if (pt->synth_opts.last_branch) {
+ intel_pt_copy_last_branch_rb(ptq);
+ sample.branch_stack = ptq->last_branch;
+ } else {
+ br.br_stack.nr = 0;
+ sample.branch_stack = &br.br_stack;
+ }
+ }
+
+ if (sample_type & PERF_SAMPLE_ADDR && items->has_mem_access_address)
+ sample.addr = items->mem_access_address;
+
+ if (sample_type & PERF_SAMPLE_WEIGHT) {
+ /*
+ * Refer kernel's setup_pebs_adaptive_sample_data() and
+ * intel_hsw_weight().
+ */
+ if (items->has_mem_access_latency)
+ sample.weight = items->mem_access_latency;
+ if (!sample.weight && items->has_tsx_aux_info) {
+ /* Cycles last block */
+ sample.weight = (u32)items->tsx_aux_info;
+ }
+ }
+
+ if (sample_type & PERF_SAMPLE_TRANSACTION && items->has_tsx_aux_info) {
+ u64 ax = items->has_rax ? items->rax : 0;
+ /* Refer kernel's intel_hsw_transaction() */
+ u64 txn = (u8)(items->tsx_aux_info >> 32);
+
+ /* For RTM XABORTs also log the abort code from AX */
+ if (txn & PERF_TXN_TRANSACTION && ax & 1)
+ txn |= ((ax >> 24) & 0xff) << PERF_TXN_ABORT_SHIFT;
+ sample.transaction = txn;
+ }
+
+ return intel_pt_deliver_synth_event(pt, ptq, event, &sample, sample_type);
+}
+
static int intel_pt_synth_error(struct intel_pt *pt, int code, int cpu,
pid_t pid, pid_t tid, u64 ip, u64 timestamp)
{
@@ -1479,6 +1882,25 @@ static int intel_pt_sample(struct intel_pt_queue *ptq)
ptq->have_sample = false;
+ if (ptq->state->tot_cyc_cnt > ptq->ipc_cyc_cnt) {
+ /*
+ * Cycle count and instruction count only go together to create
+ * a valid IPC ratio when the cycle count changes.
+ */
+ ptq->ipc_insn_cnt = ptq->state->tot_insn_cnt;
+ ptq->ipc_cyc_cnt = ptq->state->tot_cyc_cnt;
+ }
+
+ /*
+ * Do PEBS first to allow for the possibility that the PEBS timestamp
+ * precedes the current timestamp.
+ */
+ if (pt->sample_pebs && state->type & INTEL_PT_BLK_ITEMS) {
+ err = intel_pt_synth_pebs_sample(ptq);
+ if (err)
+ return err;
+ }
+
if (pt->sample_pwr_events && (state->type & INTEL_PT_PWR_EVT)) {
if (state->type & INTEL_PT_CBR_CHG) {
err = intel_pt_synth_cbr_sample(ptq);
@@ -1641,10 +2063,83 @@ static void intel_pt_enable_sync_switch(struct intel_pt *pt)
}
}
+/*
+ * To filter against time ranges, it is only necessary to look at the next start
+ * or end time.
+ */
+static bool intel_pt_next_time(struct intel_pt_queue *ptq)
+{
+ struct intel_pt *pt = ptq->pt;
+
+ if (ptq->sel_start) {
+ /* Next time is an end time */
+ ptq->sel_start = false;
+ ptq->sel_timestamp = pt->time_ranges[ptq->sel_idx].end;
+ return true;
+ } else if (ptq->sel_idx + 1 < pt->range_cnt) {
+ /* Next time is a start time */
+ ptq->sel_start = true;
+ ptq->sel_idx += 1;
+ ptq->sel_timestamp = pt->time_ranges[ptq->sel_idx].start;
+ return true;
+ }
+
+ /* No next time */
+ return false;
+}
+
+static int intel_pt_time_filter(struct intel_pt_queue *ptq, u64 *ff_timestamp)
+{
+ int err;
+
+ while (1) {
+ if (ptq->sel_start) {
+ if (ptq->timestamp >= ptq->sel_timestamp) {
+ /* After start time, so consider next time */
+ intel_pt_next_time(ptq);
+ if (!ptq->sel_timestamp) {
+ /* No end time */
+ return 0;
+ }
+ /* Check against end time */
+ continue;
+ }
+ /* Before start time, so fast forward */
+ ptq->have_sample = false;
+ if (ptq->sel_timestamp > *ff_timestamp) {
+ if (ptq->sync_switch) {
+ intel_pt_next_tid(ptq->pt, ptq);
+ ptq->switch_state = INTEL_PT_SS_UNKNOWN;
+ }
+ *ff_timestamp = ptq->sel_timestamp;
+ err = intel_pt_fast_forward(ptq->decoder,
+ ptq->sel_timestamp);
+ if (err)
+ return err;
+ }
+ return 0;
+ } else if (ptq->timestamp > ptq->sel_timestamp) {
+ /* After end time, so consider next time */
+ if (!intel_pt_next_time(ptq)) {
+ /* No next time range, so stop decoding */
+ ptq->have_sample = false;
+ ptq->switch_state = INTEL_PT_SS_NOT_TRACING;
+ return 1;
+ }
+ /* Check against next start time */
+ continue;
+ } else {
+ /* Before end time */
+ return 0;
+ }
+ }
+}
+
static int intel_pt_run_decoder(struct intel_pt_queue *ptq, u64 *timestamp)
{
const struct intel_pt_state *state = ptq->state;
struct intel_pt *pt = ptq->pt;
+ u64 ff_timestamp = 0;
int err;
if (!pt->kernel_start) {
@@ -1709,6 +2204,12 @@ static int intel_pt_run_decoder(struct intel_pt_queue *ptq, u64 *timestamp)
ptq->timestamp = state->timestamp;
}
+ if (ptq->sel_timestamp) {
+ err = intel_pt_time_filter(ptq, &ff_timestamp);
+ if (err)
+ return err;
+ }
+
if (!pt->timeless_decoding && ptq->timestamp >= *timestamp) {
*timestamp = ptq->timestamp;
return 0;
@@ -1850,7 +2351,6 @@ static int intel_pt_sync_switch(struct intel_pt *pt, int cpu, pid_t tid,
switch (ptq->switch_state) {
case INTEL_PT_SS_NOT_TRACING:
- ptq->next_tid = -1;
break;
case INTEL_PT_SS_UNKNOWN:
case INTEL_PT_SS_TRACING:
@@ -1870,13 +2370,14 @@ static int intel_pt_sync_switch(struct intel_pt *pt, int cpu, pid_t tid,
ptq->switch_state = INTEL_PT_SS_TRACING;
break;
case INTEL_PT_SS_EXPECTING_SWITCH_IP:
- ptq->next_tid = tid;
intel_pt_log("ERROR: cpu %d expecting switch ip\n", cpu);
break;
default:
break;
}
+ ptq->next_tid = -1;
+
return 1;
}
@@ -1905,6 +2406,44 @@ static int intel_pt_process_switch(struct intel_pt *pt,
return machine__set_current_tid(pt->machine, cpu, -1, tid);
}
+static int intel_pt_context_switch_in(struct intel_pt *pt,
+ struct perf_sample *sample)
+{
+ pid_t pid = sample->pid;
+ pid_t tid = sample->tid;
+ int cpu = sample->cpu;
+
+ if (pt->sync_switch) {
+ struct intel_pt_queue *ptq;
+
+ ptq = intel_pt_cpu_to_ptq(pt, cpu);
+ if (ptq && ptq->sync_switch) {
+ ptq->next_tid = -1;
+ switch (ptq->switch_state) {
+ case INTEL_PT_SS_NOT_TRACING:
+ case INTEL_PT_SS_UNKNOWN:
+ case INTEL_PT_SS_TRACING:
+ break;
+ case INTEL_PT_SS_EXPECTING_SWITCH_EVENT:
+ case INTEL_PT_SS_EXPECTING_SWITCH_IP:
+ ptq->switch_state = INTEL_PT_SS_TRACING;
+ break;
+ default:
+ break;
+ }
+ }
+ }
+
+ /*
+ * If the current tid has not been updated yet, ensure it is now that
+ * a "switch in" event has occurred.
+ */
+ if (machine__get_current_tid(pt->machine, cpu) == tid)
+ return 0;
+
+ return machine__set_current_tid(pt->machine, cpu, pid, tid);
+}
+
static int intel_pt_context_switch(struct intel_pt *pt, union perf_event *event,
struct perf_sample *sample)
{
@@ -1916,7 +2455,7 @@ static int intel_pt_context_switch(struct intel_pt *pt, union perf_event *event,
if (pt->have_sched_switch == 3) {
if (!out)
- return 0;
+ return intel_pt_context_switch_in(pt, sample);
if (event->header.type != PERF_RECORD_SWITCH_CPU_WIDE) {
pr_err("Expecting CPU-wide context switch event\n");
return -EINVAL;
@@ -2076,6 +2615,7 @@ static void intel_pt_free(struct perf_session *session)
thread__put(pt->unknown_thread);
addr_filters__exit(&pt->filts);
zfree(&pt->filter);
+ zfree(&pt->time_ranges);
free(pt);
}
@@ -2373,6 +2913,85 @@ static int intel_pt_perf_config(const char *var, const char *value, void *data)
return 0;
}
+/* Find least TSC which converts to ns or later */
+static u64 intel_pt_tsc_start(u64 ns, struct intel_pt *pt)
+{
+ u64 tsc, tm;
+
+ tsc = perf_time_to_tsc(ns, &pt->tc);
+
+ while (1) {
+ tm = tsc_to_perf_time(tsc, &pt->tc);
+ if (tm < ns)
+ break;
+ tsc -= 1;
+ }
+
+ while (tm < ns)
+ tm = tsc_to_perf_time(++tsc, &pt->tc);
+
+ return tsc;
+}
+
+/* Find greatest TSC which converts to ns or earlier */
+static u64 intel_pt_tsc_end(u64 ns, struct intel_pt *pt)
+{
+ u64 tsc, tm;
+
+ tsc = perf_time_to_tsc(ns, &pt->tc);
+
+ while (1) {
+ tm = tsc_to_perf_time(tsc, &pt->tc);
+ if (tm > ns)
+ break;
+ tsc += 1;
+ }
+
+ while (tm > ns)
+ tm = tsc_to_perf_time(--tsc, &pt->tc);
+
+ return tsc;
+}
+
+static int intel_pt_setup_time_ranges(struct intel_pt *pt,
+ struct itrace_synth_opts *opts)
+{
+ struct perf_time_interval *p = opts->ptime_range;
+ int n = opts->range_num;
+ int i;
+
+ if (!n || !p || pt->timeless_decoding)
+ return 0;
+
+ pt->time_ranges = calloc(n, sizeof(struct range));
+ if (!pt->time_ranges)
+ return -ENOMEM;
+
+ pt->range_cnt = n;
+
+ intel_pt_log("%s: %u range(s)\n", __func__, n);
+
+ for (i = 0; i < n; i++) {
+ struct range *r = &pt->time_ranges[i];
+ u64 ts = p[i].start;
+ u64 te = p[i].end;
+
+ /*
+ * Take care to ensure the TSC range matches the perf-time range
+ * when converted back to perf-time.
+ */
+ r->start = ts ? intel_pt_tsc_start(ts, pt) : 0;
+ r->end = te ? intel_pt_tsc_end(te, pt) : 0;
+
+ intel_pt_log("range %d: perf time interval: %"PRIu64" to %"PRIu64"\n",
+ i, ts, te);
+ intel_pt_log("range %d: TSC time interval: %#"PRIx64" to %#"PRIx64"\n",
+ i, r->start, r->end);
+ }
+
+ return 0;
+}
+
static const char * const intel_pt_info_fmts[] = {
[INTEL_PT_PMU_TYPE] = " PMU Type %"PRId64"\n",
[INTEL_PT_TIME_SHIFT] = " Time Shift %"PRIu64"\n",
@@ -2579,7 +3198,8 @@ int intel_pt_process_auxtrace_info(union perf_event *event,
} else {
itrace_synth_opts__set_default(&pt->synth_opts,
session->itrace_synth_opts->default_no_sample);
- if (use_browser != -1) {
+ if (!session->itrace_synth_opts->default_no_sample &&
+ !session->itrace_synth_opts->inject) {
pt->synth_opts.branches = false;
pt->synth_opts.callchain = true;
}
@@ -2604,6 +3224,12 @@ int intel_pt_process_auxtrace_info(union perf_event *event,
pt->cbr2khz = tsc_freq / pt->max_non_turbo_ratio / 1000;
}
+ if (session->itrace_synth_opts) {
+ err = intel_pt_setup_time_ranges(pt, session->itrace_synth_opts);
+ if (err)
+ goto err_delete_thread;
+ }
+
if (pt->synth_opts.calls)
pt->branches_filter |= PERF_IP_FLAG_CALL | PERF_IP_FLAG_ASYNC |
PERF_IP_FLAG_TRACE_END;
@@ -2644,6 +3270,7 @@ err_free_queues:
err_free:
addr_filters__exit(&pt->filts);
zfree(&pt->filter);
+ zfree(&pt->time_ranges);
free(pt);
return err;
}
diff --git a/tools/perf/util/machine.c b/tools/perf/util/machine.c
index dc7aafe45a2b..17eec39e775e 100644
--- a/tools/perf/util/machine.c
+++ b/tools/perf/util/machine.c
@@ -704,12 +704,12 @@ static int machine__process_ksymbol_register(struct machine *machine,
return -ENOMEM;
map->start = event->ksymbol_event.addr;
- map->pgoff = map->start;
map->end = map->start + event->ksymbol_event.len;
map_groups__insert(&machine->kmaps, map);
}
- sym = symbol__new(event->ksymbol_event.addr, event->ksymbol_event.len,
+ sym = symbol__new(map->map_ip(map, map->start),
+ event->ksymbol_event.len,
0, 0, event->ksymbol_event.name);
if (!sym)
return -ENOMEM;
@@ -1241,9 +1241,9 @@ static char *get_kernel_version(const char *root_dir)
return NULL;
tmp = fgets(version, sizeof(version), file);
- if (!tmp)
- *version = '\0';
fclose(file);
+ if (!tmp)
+ return NULL;
name = strstr(version, prefix);
if (!name)
diff --git a/tools/perf/util/map.c b/tools/perf/util/map.c
index ee71efb9db62..6fce983c6115 100644
--- a/tools/perf/util/map.c
+++ b/tools/perf/util/map.c
@@ -405,6 +405,7 @@ size_t map__fprintf(struct map *map, FILE *fp)
size_t map__fprintf_dsoname(struct map *map, FILE *fp)
{
+ char buf[symbol_conf.pad_output_len_dso + 1];
const char *dsoname = "[unknown]";
if (map && map->dso) {
@@ -414,6 +415,11 @@ size_t map__fprintf_dsoname(struct map *map, FILE *fp)
dsoname = map->dso->name;
}
+ if (symbol_conf.pad_output_len_dso) {
+ scnprintf_pad(buf, symbol_conf.pad_output_len_dso, "%s", dsoname);
+ dsoname = buf;
+ }
+
return fprintf(fp, "%s", dsoname);
}
diff --git a/tools/perf/util/map_groups.h b/tools/perf/util/map_groups.h
index 4dcda33e0fdf..5f25efa6d6bc 100644
--- a/tools/perf/util/map_groups.h
+++ b/tools/perf/util/map_groups.h
@@ -88,4 +88,6 @@ int map_groups__fixup_overlappings(struct map_groups *mg, struct map *map, FILE
struct map *map_groups__find_by_name(struct map_groups *mg, const char *name);
+int map_groups__merge_in(struct map_groups *kmaps, struct map *new_map);
+
#endif // __PERF_MAP_GROUPS_H
diff --git a/tools/perf/util/perf_regs.h b/tools/perf/util/perf_regs.h
index cb9c246c8962..47fe34e5f7d5 100644
--- a/tools/perf/util/perf_regs.h
+++ b/tools/perf/util/perf_regs.h
@@ -29,12 +29,16 @@ uint64_t arch__user_reg_mask(void);
#ifdef HAVE_PERF_REGS_SUPPORT
#include <perf_regs.h>
+#define DWARF_MINIMAL_REGS ((1ULL << PERF_REG_IP) | (1ULL << PERF_REG_SP))
+
int perf_reg_value(u64 *valp, struct regs_dump *regs, int id);
#else
#define PERF_REGS_MASK 0
#define PERF_REGS_MAX 0
+#define DWARF_MINIMAL_REGS PERF_REGS_MASK
+
static inline const char *perf_reg_name(int id __maybe_unused)
{
return NULL;
diff --git a/tools/perf/util/pmu.c b/tools/perf/util/pmu.c
index e0429f4ef335..faa8eb231e1b 100644
--- a/tools/perf/util/pmu.c
+++ b/tools/perf/util/pmu.c
@@ -709,9 +709,7 @@ static void pmu_add_cpu_aliases(struct list_head *head, struct perf_pmu *pmu)
{
int i;
struct pmu_events_map *map;
- struct pmu_event *pe;
const char *name = pmu->name;
- const char *pname;
map = perf_pmu__find_map(pmu);
if (!map)
@@ -722,28 +720,26 @@ static void pmu_add_cpu_aliases(struct list_head *head, struct perf_pmu *pmu)
*/
i = 0;
while (1) {
+ const char *cpu_name = is_arm_pmu_core(name) ? name : "cpu";
+ struct pmu_event *pe = &map->table[i++];
+ const char *pname = pe->pmu ? pe->pmu : cpu_name;
- pe = &map->table[i++];
if (!pe->name) {
if (pe->metric_group || pe->metric_name)
continue;
break;
}
- if (!is_arm_pmu_core(name)) {
- pname = pe->pmu ? pe->pmu : "cpu";
-
- /*
- * uncore alias may be from different PMU
- * with common prefix
- */
- if (pmu_is_uncore(name) &&
- !strncmp(pname, name, strlen(pname)))
- goto new_alias;
+ /*
+ * uncore alias may be from different PMU
+ * with common prefix
+ */
+ if (pmu_is_uncore(name) &&
+ !strncmp(pname, name, strlen(pname)))
+ goto new_alias;
- if (strcmp(pname, name))
- continue;
- }
+ if (strcmp(pname, name))
+ continue;
new_alias:
/* need type casts to override 'const' */
diff --git a/tools/perf/util/s390-cpumsf.c b/tools/perf/util/s390-cpumsf.c
index c215704931dc..10d36d9b7909 100644
--- a/tools/perf/util/s390-cpumsf.c
+++ b/tools/perf/util/s390-cpumsf.c
@@ -17,8 +17,8 @@
* see Documentation/perf.data-file-format.txt.
* PERF_RECORD_AUXTRACE_INFO:
* Defines a table of contains for PERF_RECORD_AUXTRACE records. This
- * record is generated during 'perf record' command. Each record contains up
- * to 256 entries describing offset and size of the AUXTRACE data in the
+ * record is generated during 'perf record' command. Each record contains
+ * up to 256 entries describing offset and size of the AUXTRACE data in the
* perf.data file.
* PERF_RECORD_AUXTRACE_ERROR:
* Indicates an error during AUXTRACE collection such as buffer overflow.
@@ -237,10 +237,33 @@ static int s390_cpumcf_dumpctr(struct s390_cpumsf *sf,
return rc;
}
-/* Display s390 CPU measurement facility basic-sampling data entry */
+/* Display s390 CPU measurement facility basic-sampling data entry
+ * Data written on s390 in big endian byte order and contains bit
+ * fields across byte boundaries.
+ */
static bool s390_cpumsf_basic_show(const char *color, size_t pos,
- struct hws_basic_entry *basic)
+ struct hws_basic_entry *basicp)
{
+ struct hws_basic_entry *basic = basicp;
+#if __BYTE_ORDER == __LITTLE_ENDIAN
+ struct hws_basic_entry local;
+ unsigned long long word = be64toh(*(unsigned long long *)basicp);
+
+ memset(&local, 0, sizeof(local));
+ local.def = be16toh(basicp->def);
+ local.prim_asn = word & 0xffff;
+ local.CL = word >> 30 & 0x3;
+ local.I = word >> 32 & 0x1;
+ local.AS = word >> 33 & 0x3;
+ local.P = word >> 35 & 0x1;
+ local.W = word >> 36 & 0x1;
+ local.T = word >> 37 & 0x1;
+ local.U = word >> 40 & 0xf;
+ local.ia = be64toh(basicp->ia);
+ local.gpp = be64toh(basicp->gpp);
+ local.hpp = be64toh(basicp->hpp);
+ basic = &local;
+#endif
if (basic->def != 1) {
pr_err("Invalid AUX trace basic entry [%#08zx]\n", pos);
return false;
@@ -258,10 +281,22 @@ static bool s390_cpumsf_basic_show(const char *color, size_t pos,
return true;
}
-/* Display s390 CPU measurement facility diagnostic-sampling data entry */
+/* Display s390 CPU measurement facility diagnostic-sampling data entry.
+ * Data written on s390 in big endian byte order and contains bit
+ * fields across byte boundaries.
+ */
static bool s390_cpumsf_diag_show(const char *color, size_t pos,
- struct hws_diag_entry *diag)
+ struct hws_diag_entry *diagp)
{
+ struct hws_diag_entry *diag = diagp;
+#if __BYTE_ORDER == __LITTLE_ENDIAN
+ struct hws_diag_entry local;
+ unsigned long long word = be64toh(*(unsigned long long *)diagp);
+
+ local.def = be16toh(diagp->def);
+ local.I = word >> 32 & 0x1;
+ diag = &local;
+#endif
if (diag->def < S390_CPUMSF_DIAG_DEF_FIRST) {
pr_err("Invalid AUX trace diagnostic entry [%#08zx]\n", pos);
return false;
@@ -272,35 +307,52 @@ static bool s390_cpumsf_diag_show(const char *color, size_t pos,
}
/* Return TOD timestamp contained in an trailer entry */
-static unsigned long long trailer_timestamp(struct hws_trailer_entry *te)
+static unsigned long long trailer_timestamp(struct hws_trailer_entry *te,
+ int idx)
{
/* te->t set: TOD in STCKE format, bytes 8-15
* to->t not set: TOD in STCK format, bytes 0-7
*/
unsigned long long ts;
- memcpy(&ts, &te->timestamp[te->t], sizeof(ts));
- return ts;
+ memcpy(&ts, &te->timestamp[idx], sizeof(ts));
+ return be64toh(ts);
}
/* Display s390 CPU measurement facility trailer entry */
static bool s390_cpumsf_trailer_show(const char *color, size_t pos,
struct hws_trailer_entry *te)
{
+#if __BYTE_ORDER == __LITTLE_ENDIAN
+ struct hws_trailer_entry local;
+ const unsigned long long flags = be64toh(te->flags);
+
+ memset(&local, 0, sizeof(local));
+ local.f = flags >> 63 & 0x1;
+ local.a = flags >> 62 & 0x1;
+ local.t = flags >> 61 & 0x1;
+ local.bsdes = be16toh((flags >> 16 & 0xffff));
+ local.dsdes = be16toh((flags & 0xffff));
+ memcpy(&local.timestamp, te->timestamp, sizeof(te->timestamp));
+ local.overflow = be64toh(te->overflow);
+ local.clock_base = be64toh(te->progusage[0]) >> 63 & 1;
+ local.progusage2 = be64toh(te->progusage2);
+ te = &local;
+#endif
if (te->bsdes != sizeof(struct hws_basic_entry)) {
pr_err("Invalid AUX trace trailer entry [%#08zx]\n", pos);
return false;
}
color_fprintf(stdout, color, " [%#08zx] Trailer %c%c%c bsdes:%d"
" dsdes:%d Overflow:%lld Time:%#llx\n"
- "\t\tC:%d TOD:%#lx 1:%#llx 2:%#llx\n",
+ "\t\tC:%d TOD:%#lx\n",
pos,
te->f ? 'F' : ' ',
te->a ? 'A' : ' ',
te->t ? 'T' : ' ',
te->bsdes, te->dsdes, te->overflow,
- trailer_timestamp(te), te->clock_base, te->progusage2,
- te->progusage[0], te->progusage[1]);
+ trailer_timestamp(te, te->clock_base),
+ te->clock_base, te->progusage2);
return true;
}
@@ -327,13 +379,13 @@ static bool s390_cpumsf_validate(int machine_type,
*dsdes = *bsdes = 0;
if (len & (S390_CPUMSF_PAGESZ - 1)) /* Illegal size */
return false;
- if (basic->def != 1) /* No basic set entry, must be first */
+ if (be16toh(basic->def) != 1) /* No basic set entry, must be first */
return false;
/* Check for trailer entry at end of SDB */
te = (struct hws_trailer_entry *)(buf + S390_CPUMSF_PAGESZ
- sizeof(*te));
- *bsdes = te->bsdes;
- *dsdes = te->dsdes;
+ *bsdes = be16toh(te->bsdes);
+ *dsdes = be16toh(te->dsdes);
if (!te->bsdes && !te->dsdes) {
/* Very old hardware, use CPUID */
switch (machine_type) {
@@ -495,19 +547,27 @@ static bool s390_cpumsf_make_event(size_t pos,
static unsigned long long get_trailer_time(const unsigned char *buf)
{
struct hws_trailer_entry *te;
- unsigned long long aux_time;
+ unsigned long long aux_time, progusage2;
+ bool clock_base;
te = (struct hws_trailer_entry *)(buf + S390_CPUMSF_PAGESZ
- sizeof(*te));
- if (!te->clock_base) /* TOD_CLOCK_BASE value missing */
+#if __BYTE_ORDER == __LITTLE_ENDIAN
+ clock_base = be64toh(te->progusage[0]) >> 63 & 0x1;
+ progusage2 = be64toh(te->progusage[1]);
+#else
+ clock_base = te->clock_base;
+ progusage2 = te->progusage2;
+#endif
+ if (!clock_base) /* TOD_CLOCK_BASE value missing */
return 0;
/* Correct calculation to convert time stamp in trailer entry to
* nano seconds (taken from arch/s390 function tod_to_ns()).
* TOD_CLOCK_BASE is stored in trailer entry member progusage2.
*/
- aux_time = trailer_timestamp(te) - te->progusage2;
+ aux_time = trailer_timestamp(te, clock_base) - progusage2;
aux_time = (aux_time >> 9) * 125 + (((aux_time & 0x1ff) * 125) >> 9);
return aux_time;
}
diff --git a/tools/perf/util/scripting-engines/trace-event-python.c b/tools/perf/util/scripting-engines/trace-event-python.c
index 22f52b669871..6acb379b53ec 100644
--- a/tools/perf/util/scripting-engines/trace-event-python.c
+++ b/tools/perf/util/scripting-engines/trace-event-python.c
@@ -1111,7 +1111,7 @@ static int python_export_sample(struct db_export *dbe,
struct tables *tables = container_of(dbe, struct tables, dbe);
PyObject *t;
- t = tuple_new(22);
+ t = tuple_new(24);
tuple_set_u64(t, 0, es->db_id);
tuple_set_u64(t, 1, es->evsel->db_id);
@@ -1135,6 +1135,8 @@ static int python_export_sample(struct db_export *dbe,
tuple_set_s32(t, 19, es->sample->flags & PERF_BRANCH_MASK);
tuple_set_s32(t, 20, !!(es->sample->flags & PERF_IP_FLAG_IN_TX));
tuple_set_u64(t, 21, es->call_path_id);
+ tuple_set_u64(t, 22, es->sample->insn_cnt);
+ tuple_set_u64(t, 23, es->sample->cyc_cnt);
call_object(tables->sample_handler, t, "sample_table");
@@ -1173,7 +1175,7 @@ static int python_export_call_return(struct db_export *dbe,
u64 comm_db_id = cr->comm ? cr->comm->db_id : 0;
PyObject *t;
- t = tuple_new(12);
+ t = tuple_new(14);
tuple_set_u64(t, 0, cr->db_id);
tuple_set_u64(t, 1, cr->thread->db_id);
@@ -1187,6 +1189,8 @@ static int python_export_call_return(struct db_export *dbe,
tuple_set_u64(t, 9, cr->cp->parent->db_id);
tuple_set_s32(t, 10, cr->flags);
tuple_set_u64(t, 11, cr->parent_db_id);
+ tuple_set_u64(t, 12, cr->insn_count);
+ tuple_set_u64(t, 13, cr->cyc_count);
call_object(tables->call_return_handler, t, "call_return_table");
diff --git a/tools/perf/util/setup.py b/tools/perf/util/setup.py
index 5b5a167b43ce..a1a68a2fa917 100644
--- a/tools/perf/util/setup.py
+++ b/tools/perf/util/setup.py
@@ -17,6 +17,8 @@ if cc == "clang":
vars[var] = sub("-fcf-protection", "", vars[var])
if not clang_has_option("-fstack-clash-protection"):
vars[var] = sub("-fstack-clash-protection", "", vars[var])
+ if not clang_has_option("-fstack-protector-strong"):
+ vars[var] = sub("-fstack-protector-strong", "", vars[var])
from distutils.core import setup, Extension
diff --git a/tools/perf/util/smt.c b/tools/perf/util/smt.c
index 453f6f6f29f3..3b791ef2cd50 100644
--- a/tools/perf/util/smt.c
+++ b/tools/perf/util/smt.c
@@ -23,8 +23,12 @@ int smt_on(void)
char fn[256];
snprintf(fn, sizeof fn,
- "devices/system/cpu/cpu%d/topology/thread_siblings",
- cpu);
+ "devices/system/cpu/cpu%d/topology/core_cpus", cpu);
+ if (access(fn, F_OK) == -1) {
+ snprintf(fn, sizeof fn,
+ "devices/system/cpu/cpu%d/topology/thread_siblings",
+ cpu);
+ }
if (sysfs__read_str(fn, &str, &strlen) < 0)
continue;
/* Entry is hex, but does not have 0x, so need custom parser */
diff --git a/tools/perf/util/stat-display.c b/tools/perf/util/stat-display.c
index 4c53bae5644b..a6b9de3e83fc 100644
--- a/tools/perf/util/stat-display.c
+++ b/tools/perf/util/stat-display.c
@@ -69,8 +69,9 @@ static void aggr_printout(struct perf_stat_config *config,
{
switch (config->aggr_mode) {
case AGGR_CORE:
- fprintf(config->output, "S%d-C%*d%s%*d%s",
+ fprintf(config->output, "S%d-D%d-C%*d%s%*d%s",
cpu_map__id_to_socket(id),
+ cpu_map__id_to_die(id),
config->csv_output ? 0 : -8,
cpu_map__id_to_cpu(id),
config->csv_sep,
@@ -78,6 +79,16 @@ static void aggr_printout(struct perf_stat_config *config,
nr,
config->csv_sep);
break;
+ case AGGR_DIE:
+ fprintf(config->output, "S%d-D%*d%s%*d%s",
+ cpu_map__id_to_socket(id << 16),
+ config->csv_output ? 0 : -8,
+ cpu_map__id_to_die(id << 16),
+ config->csv_sep,
+ config->csv_output ? 0 : 4,
+ nr,
+ config->csv_sep);
+ break;
case AGGR_SOCKET:
fprintf(config->output, "S%*d%s%*d%s",
config->csv_output ? 0 : -5,
@@ -89,8 +100,9 @@ static void aggr_printout(struct perf_stat_config *config,
break;
case AGGR_NONE:
if (evsel->percore) {
- fprintf(config->output, "S%d-C%*d%s",
+ fprintf(config->output, "S%d-D%d-C%*d%s",
cpu_map__id_to_socket(id),
+ cpu_map__id_to_die(id),
config->csv_output ? 0 : -5,
cpu_map__id_to_cpu(id), config->csv_sep);
} else {
@@ -407,6 +419,7 @@ static void printout(struct perf_stat_config *config, int id, int nr,
[AGGR_THREAD] = 1,
[AGGR_NONE] = 1,
[AGGR_SOCKET] = 2,
+ [AGGR_DIE] = 2,
[AGGR_CORE] = 2,
};
@@ -879,7 +892,8 @@ static void print_no_aggr_metric(struct perf_stat_config *config,
}
static int aggr_header_lens[] = {
- [AGGR_CORE] = 18,
+ [AGGR_CORE] = 24,
+ [AGGR_DIE] = 18,
[AGGR_SOCKET] = 12,
[AGGR_NONE] = 6,
[AGGR_THREAD] = 24,
@@ -888,6 +902,7 @@ static int aggr_header_lens[] = {
static const char *aggr_header_csv[] = {
[AGGR_CORE] = "core,cpus,",
+ [AGGR_DIE] = "die,cpus",
[AGGR_SOCKET] = "socket,cpus",
[AGGR_NONE] = "cpu,",
[AGGR_THREAD] = "comm-pid,",
@@ -954,8 +969,13 @@ static void print_interval(struct perf_stat_config *config,
if (!metric_only)
fprintf(output, " counts %*s events\n", unit_width, "unit");
break;
+ case AGGR_DIE:
+ fprintf(output, "# time die cpus");
+ if (!metric_only)
+ fprintf(output, " counts %*s events\n", unit_width, "unit");
+ break;
case AGGR_CORE:
- fprintf(output, "# time core cpus");
+ fprintf(output, "# time core cpus");
if (!metric_only)
fprintf(output, " counts %*s events\n", unit_width, "unit");
break;
@@ -1165,6 +1185,7 @@ perf_evlist__print_counters(struct perf_evlist *evlist,
switch (config->aggr_mode) {
case AGGR_CORE:
+ case AGGR_DIE:
case AGGR_SOCKET:
print_aggr(config, evlist, prefix);
break;
diff --git a/tools/perf/util/stat-shadow.c b/tools/perf/util/stat-shadow.c
index 83d8094be4fe..027b09aaa4cf 100644
--- a/tools/perf/util/stat-shadow.c
+++ b/tools/perf/util/stat-shadow.c
@@ -12,6 +12,7 @@
/*
* AGGR_GLOBAL: Use CPU 0
* AGGR_SOCKET: Use first CPU of socket
+ * AGGR_DIE: Use first CPU of die
* AGGR_CORE: Use first CPU of core
* AGGR_NONE: Use matching CPU
* AGGR_THREAD: Not supported?
diff --git a/tools/perf/util/stat.c b/tools/perf/util/stat.c
index c3115d939b0b..d91fe754b6d2 100644
--- a/tools/perf/util/stat.c
+++ b/tools/perf/util/stat.c
@@ -272,6 +272,7 @@ process_counter_values(struct perf_stat_config *config, struct perf_evsel *evsel
switch (config->aggr_mode) {
case AGGR_THREAD:
case AGGR_CORE:
+ case AGGR_DIE:
case AGGR_SOCKET:
case AGGR_NONE:
if (!evsel->snapshot)
diff --git a/tools/perf/util/stat.h b/tools/perf/util/stat.h
index 2f9c9159a364..7032dd1eeac2 100644
--- a/tools/perf/util/stat.h
+++ b/tools/perf/util/stat.h
@@ -44,6 +44,7 @@ enum aggr_mode {
AGGR_NONE,
AGGR_GLOBAL,
AGGR_SOCKET,
+ AGGR_DIE,
AGGR_CORE,
AGGR_THREAD,
AGGR_UNSET,
diff --git a/tools/perf/util/symbol-elf.c b/tools/perf/util/symbol-elf.c
index 4ad106a5f2c0..fdc5bd7dbb90 100644
--- a/tools/perf/util/symbol-elf.c
+++ b/tools/perf/util/symbol-elf.c
@@ -699,7 +699,6 @@ bool __weak elf__needs_adjust_symbols(GElf_Ehdr ehdr)
int symsrc__init(struct symsrc *ss, struct dso *dso, const char *name,
enum dso_binary_type type)
{
- int err = -1;
GElf_Ehdr ehdr;
Elf *elf;
int fd;
@@ -793,7 +792,7 @@ out_elf_end:
elf_end(elf);
out_close:
close(fd);
- return err;
+ return -1;
}
/**
diff --git a/tools/perf/util/symbol.c b/tools/perf/util/symbol.c
index 5cbad55cd99d..f4540f8bbed1 100644
--- a/tools/perf/util/symbol.c
+++ b/tools/perf/util/symbol.c
@@ -1166,6 +1166,85 @@ static int kcore_mapfn(u64 start, u64 len, u64 pgoff, void *data)
return 0;
}
+/*
+ * Merges map into map_groups by splitting the new map
+ * within the existing map regions.
+ */
+int map_groups__merge_in(struct map_groups *kmaps, struct map *new_map)
+{
+ struct map *old_map;
+ LIST_HEAD(merged);
+
+ for (old_map = map_groups__first(kmaps); old_map;
+ old_map = map_groups__next(old_map)) {
+
+ /* no overload with this one */
+ if (new_map->end < old_map->start ||
+ new_map->start >= old_map->end)
+ continue;
+
+ if (new_map->start < old_map->start) {
+ /*
+ * |new......
+ * |old....
+ */
+ if (new_map->end < old_map->end) {
+ /*
+ * |new......| -> |new..|
+ * |old....| -> |old....|
+ */
+ new_map->end = old_map->start;
+ } else {
+ /*
+ * |new.............| -> |new..| |new..|
+ * |old....| -> |old....|
+ */
+ struct map *m = map__clone(new_map);
+
+ if (!m)
+ return -ENOMEM;
+
+ m->end = old_map->start;
+ list_add_tail(&m->node, &merged);
+ new_map->start = old_map->end;
+ }
+ } else {
+ /*
+ * |new......
+ * |old....
+ */
+ if (new_map->end < old_map->end) {
+ /*
+ * |new..| -> x
+ * |old.........| -> |old.........|
+ */
+ map__put(new_map);
+ new_map = NULL;
+ break;
+ } else {
+ /*
+ * |new......| -> |new...|
+ * |old....| -> |old....|
+ */
+ new_map->start = old_map->end;
+ }
+ }
+ }
+
+ while (!list_empty(&merged)) {
+ old_map = list_entry(merged.next, struct map, node);
+ list_del_init(&old_map->node);
+ map_groups__insert(kmaps, old_map);
+ map__put(old_map);
+ }
+
+ if (new_map) {
+ map_groups__insert(kmaps, new_map);
+ map__put(new_map);
+ }
+ return 0;
+}
+
static int dso__load_kcore(struct dso *dso, struct map *map,
const char *kallsyms_filename)
{
@@ -1222,7 +1301,12 @@ static int dso__load_kcore(struct dso *dso, struct map *map,
while (old_map) {
struct map *next = map_groups__next(old_map);
- if (old_map != map)
+ /*
+ * We need to preserve eBPF maps even if they are
+ * covered by kcore, because we need to access
+ * eBPF dso for source data.
+ */
+ if (old_map != map && !__map__is_bpf_prog(old_map))
map_groups__remove(kmaps, old_map);
old_map = next;
}
@@ -1256,11 +1340,16 @@ static int dso__load_kcore(struct dso *dso, struct map *map,
map_groups__remove(kmaps, map);
map_groups__insert(kmaps, map);
map__put(map);
+ map__put(new_map);
} else {
- map_groups__insert(kmaps, new_map);
+ /*
+ * Merge kcore map into existing maps,
+ * and ensure that current maps (eBPF)
+ * stay intact.
+ */
+ if (map_groups__merge_in(kmaps, new_map))
+ goto out_err;
}
-
- map__put(new_map);
}
if (machine__is(machine, "x86_64")) {
diff --git a/tools/perf/util/symbol_conf.h b/tools/perf/util/symbol_conf.h
index 6c55fa6fccec..382ba63fc554 100644
--- a/tools/perf/util/symbol_conf.h
+++ b/tools/perf/util/symbol_conf.h
@@ -69,6 +69,7 @@ struct symbol_conf {
*tid_list;
const char *symfs;
int res_sample;
+ int pad_output_len_dso;
};
extern struct symbol_conf symbol_conf;
diff --git a/tools/perf/util/thread-stack.c b/tools/perf/util/thread-stack.c
index 4ba9e866b076..c485186a8b6d 100644
--- a/tools/perf/util/thread-stack.c
+++ b/tools/perf/util/thread-stack.c
@@ -40,6 +40,8 @@ enum retpoline_state_t {
* @timestamp: timestamp (if known)
* @ref: external reference (e.g. db_id of sample)
* @branch_count: the branch count when the entry was created
+ * @insn_count: the instruction count when the entry was created
+ * @cyc_count the cycle count when the entry was created
* @db_id: id used for db-export
* @cp: call path
* @no_call: a 'call' was not seen
@@ -51,6 +53,8 @@ struct thread_stack_entry {
u64 timestamp;
u64 ref;
u64 branch_count;
+ u64 insn_count;
+ u64 cyc_count;
u64 db_id;
struct call_path *cp;
bool no_call;
@@ -66,6 +70,8 @@ struct thread_stack_entry {
* @sz: current maximum stack size
* @trace_nr: current trace number
* @branch_count: running branch count
+ * @insn_count: running instruction count
+ * @cyc_count running cycle count
* @kernel_start: kernel start address
* @last_time: last timestamp
* @crp: call/return processor
@@ -79,6 +85,8 @@ struct thread_stack {
size_t sz;
u64 trace_nr;
u64 branch_count;
+ u64 insn_count;
+ u64 cyc_count;
u64 kernel_start;
u64 last_time;
struct call_return_processor *crp;
@@ -280,6 +288,8 @@ static int thread_stack__call_return(struct thread *thread,
cr.call_time = tse->timestamp;
cr.return_time = timestamp;
cr.branch_count = ts->branch_count - tse->branch_count;
+ cr.insn_count = ts->insn_count - tse->insn_count;
+ cr.cyc_count = ts->cyc_count - tse->cyc_count;
cr.db_id = tse->db_id;
cr.call_ref = tse->ref;
cr.return_ref = ref;
@@ -535,6 +545,8 @@ static int thread_stack__push_cp(struct thread_stack *ts, u64 ret_addr,
tse->timestamp = timestamp;
tse->ref = ref;
tse->branch_count = ts->branch_count;
+ tse->insn_count = ts->insn_count;
+ tse->cyc_count = ts->cyc_count;
tse->cp = cp;
tse->no_call = no_call;
tse->trace_end = trace_end;
@@ -865,6 +877,8 @@ int thread_stack__process(struct thread *thread, struct comm *comm,
}
ts->branch_count += 1;
+ ts->insn_count += sample->insn_cnt;
+ ts->cyc_count += sample->cyc_cnt;
ts->last_time = sample->time;
if (sample->flags & PERF_IP_FLAG_CALL) {
diff --git a/tools/perf/util/thread-stack.h b/tools/perf/util/thread-stack.h
index 71e15d4ec533..e1ec5a58f1b2 100644
--- a/tools/perf/util/thread-stack.h
+++ b/tools/perf/util/thread-stack.h
@@ -43,6 +43,8 @@ enum {
* @call_time: timestamp of call (if known)
* @return_time: timestamp of return (if known)
* @branch_count: number of branches seen between call and return
+ * @insn_count: approx. number of instructions between call and return
+ * @cyc_count: approx. number of cycles between call and return
* @call_ref: external reference to 'call' sample (e.g. db_id)
* @return_ref: external reference to 'return' sample (e.g. db_id)
* @db_id: id used for db-export
@@ -56,6 +58,8 @@ struct call_return {
u64 call_time;
u64 return_time;
u64 branch_count;
+ u64 insn_count;
+ u64 cyc_count;
u64 call_ref;
u64 return_ref;
u64 db_id;
diff --git a/tools/perf/util/thread.c b/tools/perf/util/thread.c
index b413ba5b9835..aab7807d445f 100644
--- a/tools/perf/util/thread.c
+++ b/tools/perf/util/thread.c
@@ -141,13 +141,13 @@ static struct namespaces *__thread__namespaces(const struct thread *thread)
return list_first_entry(&thread->namespaces_list, struct namespaces, list);
}
-struct namespaces *thread__namespaces(const struct thread *thread)
+struct namespaces *thread__namespaces(struct thread *thread)
{
struct namespaces *ns;
- down_read((struct rw_semaphore *)&thread->namespaces_lock);
+ down_read(&thread->namespaces_lock);
ns = __thread__namespaces(thread);
- up_read((struct rw_semaphore *)&thread->namespaces_lock);
+ up_read(&thread->namespaces_lock);
return ns;
}
@@ -271,13 +271,13 @@ static const char *__thread__comm_str(const struct thread *thread)
return comm__str(comm);
}
-const char *thread__comm_str(const struct thread *thread)
+const char *thread__comm_str(struct thread *thread)
{
const char *str;
- down_read((struct rw_semaphore *)&thread->comm_lock);
+ down_read(&thread->comm_lock);
str = __thread__comm_str(thread);
- up_read((struct rw_semaphore *)&thread->comm_lock);
+ up_read(&thread->comm_lock);
return str;
}
diff --git a/tools/perf/util/thread.h b/tools/perf/util/thread.h
index cf8375c017a0..e97ef6977eb9 100644
--- a/tools/perf/util/thread.h
+++ b/tools/perf/util/thread.h
@@ -76,7 +76,7 @@ static inline void thread__exited(struct thread *thread)
thread->dead = true;
}
-struct namespaces *thread__namespaces(const struct thread *thread);
+struct namespaces *thread__namespaces(struct thread *thread);
int thread__set_namespaces(struct thread *thread, u64 timestamp,
struct namespaces_event *event);
@@ -93,7 +93,7 @@ int thread__set_comm_from_proc(struct thread *thread);
int thread__comm_len(struct thread *thread);
struct comm *thread__comm(const struct thread *thread);
struct comm *thread__exec_comm(const struct thread *thread);
-const char *thread__comm_str(const struct thread *thread);
+const char *thread__comm_str(struct thread *thread);
int thread__insert_map(struct thread *thread, struct map *map);
int thread__fork(struct thread *thread, struct thread *parent, u64 timestamp, bool do_maps_clone);
size_t thread__fprintf(struct thread *thread, FILE *fp);
diff --git a/tools/perf/util/time-utils.c b/tools/perf/util/time-utils.c
index 20663a460df3..2b48816a2d2e 100644
--- a/tools/perf/util/time-utils.c
+++ b/tools/perf/util/time-utils.c
@@ -7,6 +7,7 @@
#include <errno.h>
#include <inttypes.h>
#include <math.h>
+#include <ctype.h>
#include "perf.h"
#include "debug.h"
@@ -116,6 +117,69 @@ int perf_time__parse_str(struct perf_time_interval *ptime, const char *ostr)
return rc;
}
+static int perf_time__parse_strs(struct perf_time_interval *ptime,
+ const char *ostr, int size)
+{
+ const char *cp;
+ char *str, *arg, *p;
+ int i, num = 0, rc = 0;
+
+ /* Count the commas */
+ for (cp = ostr; *cp; cp++)
+ num += !!(*cp == ',');
+
+ if (!num)
+ return -EINVAL;
+
+ BUG_ON(num > size);
+
+ str = strdup(ostr);
+ if (!str)
+ return -ENOMEM;
+
+ /* Split the string and parse each piece, except the last */
+ for (i = 0, p = str; i < num - 1; i++) {
+ arg = p;
+ /* Find next comma, there must be one */
+ p = strchr(p, ',') + 1;
+ /* Skip white space */
+ while (isspace(*p))
+ p++;
+ /* Skip the value, must not contain space or comma */
+ while (*p && !isspace(*p)) {
+ if (*p++ == ',') {
+ rc = -EINVAL;
+ goto out;
+ }
+ }
+ /* Split and parse */
+ if (*p)
+ *p++ = 0;
+ rc = perf_time__parse_str(ptime + i, arg);
+ if (rc < 0)
+ goto out;
+ }
+
+ /* Parse the last piece */
+ rc = perf_time__parse_str(ptime + i, p);
+ if (rc < 0)
+ goto out;
+
+ /* Check there is no overlap */
+ for (i = 0; i < num - 1; i++) {
+ if (ptime[i].end >= ptime[i + 1].start) {
+ rc = -EINVAL;
+ goto out;
+ }
+ }
+
+ rc = num;
+out:
+ free(str);
+
+ return rc;
+}
+
static int parse_percent(double *pcnt, char *str)
{
char *c, *endptr;
@@ -135,12 +199,30 @@ static int parse_percent(double *pcnt, char *str)
return 0;
}
+static int set_percent_time(struct perf_time_interval *ptime, double start_pcnt,
+ double end_pcnt, u64 start, u64 end)
+{
+ u64 total = end - start;
+
+ if (start_pcnt < 0.0 || start_pcnt > 1.0 ||
+ end_pcnt < 0.0 || end_pcnt > 1.0) {
+ return -1;
+ }
+
+ ptime->start = start + round(start_pcnt * total);
+ ptime->end = start + round(end_pcnt * total);
+
+ if (ptime->end > ptime->start && ptime->end != end)
+ ptime->end -= 1;
+
+ return 0;
+}
+
static int percent_slash_split(char *str, struct perf_time_interval *ptime,
u64 start, u64 end)
{
char *p, *end_str;
double pcnt, start_pcnt, end_pcnt;
- u64 total = end - start;
int i;
/*
@@ -168,15 +250,7 @@ static int percent_slash_split(char *str, struct perf_time_interval *ptime,
start_pcnt = pcnt * (i - 1);
end_pcnt = pcnt * i;
- if (start_pcnt < 0.0 || start_pcnt > 1.0 ||
- end_pcnt < 0.0 || end_pcnt > 1.0) {
- return -1;
- }
-
- ptime->start = start + round(start_pcnt * total);
- ptime->end = start + round(end_pcnt * total);
-
- return 0;
+ return set_percent_time(ptime, start_pcnt, end_pcnt, start, end);
}
static int percent_dash_split(char *str, struct perf_time_interval *ptime,
@@ -184,7 +258,6 @@ static int percent_dash_split(char *str, struct perf_time_interval *ptime,
{
char *start_str = NULL, *end_str;
double start_pcnt, end_pcnt;
- u64 total = end - start;
int ret;
/*
@@ -203,16 +276,7 @@ static int percent_dash_split(char *str, struct perf_time_interval *ptime,
free(start_str);
- if (start_pcnt < 0.0 || start_pcnt > 1.0 ||
- end_pcnt < 0.0 || end_pcnt > 1.0 ||
- start_pcnt > end_pcnt) {
- return -1;
- }
-
- ptime->start = start + round(start_pcnt * total);
- ptime->end = start + round(end_pcnt * total);
-
- return 0;
+ return set_percent_time(ptime, start_pcnt, end_pcnt, start, end);
}
typedef int (*time_pecent_split)(char *, struct perf_time_interval *,
@@ -389,13 +453,12 @@ bool perf_time__ranges_skip_sample(struct perf_time_interval *ptime_buf,
ptime = &ptime_buf[i];
if (timestamp >= ptime->start &&
- ((timestamp < ptime->end && i < num - 1) ||
- (timestamp <= ptime->end && i == num - 1))) {
- break;
+ (timestamp <= ptime->end || !ptime->end)) {
+ return false;
}
}
- return (i == num) ? true : false;
+ return true;
}
int perf_time__parse_for_ranges(const char *time_str,
@@ -403,20 +466,20 @@ int perf_time__parse_for_ranges(const char *time_str,
struct perf_time_interval **ranges,
int *range_size, int *range_num)
{
+ bool has_percent = strchr(time_str, '%');
struct perf_time_interval *ptime_range;
- int size, num, ret;
+ int size, num, ret = -EINVAL;
ptime_range = perf_time__range_alloc(time_str, &size);
if (!ptime_range)
return -ENOMEM;
- if (perf_time__parse_str(ptime_range, time_str) != 0) {
+ if (has_percent) {
if (session->evlist->first_sample_time == 0 &&
session->evlist->last_sample_time == 0) {
pr_err("HINT: no first/last sample time found in perf data.\n"
"Please use latest perf binary to execute 'perf record'\n"
"(if '--buildid-all' is enabled, please set '--timestamp-boundary').\n");
- ret = -EINVAL;
goto error;
}
@@ -425,21 +488,20 @@ int perf_time__parse_for_ranges(const char *time_str,
time_str,
session->evlist->first_sample_time,
session->evlist->last_sample_time);
-
- if (num < 0) {
- pr_err("Invalid time string\n");
- ret = -EINVAL;
- goto error;
- }
} else {
- num = 1;
+ num = perf_time__parse_strs(ptime_range, time_str, size);
}
+ if (num < 0)
+ goto error_invalid;
+
*range_size = size;
*range_num = num;
*ranges = ptime_range;
return 0;
+error_invalid:
+ pr_err("Invalid time string\n");
error:
free(ptime_range);
return ret;