aboutsummaryrefslogtreecommitdiffstatshomepage
path: root/tools/perf/util/intel-pt.c
diff options
context:
space:
mode:
Diffstat (limited to '')
-rw-r--r--tools/perf/util/intel-pt.c1528
1 files changed, 1335 insertions, 193 deletions
diff --git a/tools/perf/util/intel-pt.c b/tools/perf/util/intel-pt.c
index 3a0348caec7d..9b1011fe4826 100644
--- a/tools/perf/util/intel-pt.c
+++ b/tools/perf/util/intel-pt.c
@@ -5,6 +5,7 @@
*/
#include <inttypes.h>
+#include <linux/perf_event.h>
#include <stdio.h>
#include <stdbool.h>
#include <errno.h>
@@ -46,6 +47,12 @@
#define MAX_TIMESTAMP (~0ULL)
+#define INTEL_PT_CFG_PASS_THRU BIT_ULL(0)
+#define INTEL_PT_CFG_PWR_EVT_EN BIT_ULL(4)
+#define INTEL_PT_CFG_BRANCH_EN BIT_ULL(13)
+#define INTEL_PT_CFG_EVT_EN BIT_ULL(31)
+#define INTEL_PT_CFG_TNT_DIS BIT_ULL(55)
+
struct range {
u64 start;
u64 end;
@@ -68,9 +75,12 @@ struct intel_pt {
bool data_queued;
bool est_tsc;
bool sync_switch;
+ bool sync_switch_not_supported;
bool mispred_all;
bool use_thread_stack;
bool callstack;
+ bool cap_event_trace;
+ bool have_guest_sideband;
unsigned int br_stack_sz;
unsigned int br_stack_sz_plus;
int have_sched_switch;
@@ -78,6 +88,7 @@ struct intel_pt {
u64 kernel_start;
u64 switch_ip;
u64 ptss_ip;
+ u64 first_timestamp;
struct perf_tsc_conversion tc;
bool cap_user_time_zero;
@@ -88,6 +99,10 @@ struct intel_pt {
u64 instructions_sample_type;
u64 instructions_id;
+ bool sample_cycles;
+ u64 cycles_sample_type;
+ u64 cycles_id;
+
bool sample_branches;
u32 branches_filter;
u64 branches_sample_type;
@@ -108,10 +123,19 @@ struct intel_pt {
u64 exstop_id;
u64 pwrx_id;
u64 cbr_id;
+ u64 psb_id;
+ bool single_pebs;
bool sample_pebs;
+ int pebs_data_src_fmt;
struct evsel *pebs_evsel;
+ u64 evt_sample_type;
+ u64 evt_id;
+
+ u64 iflag_chg_sample_type;
+ u64 iflag_chg_id;
+
u64 tsc_bit;
u64 mtc_bit;
u64 mtc_freq_bits;
@@ -121,6 +145,7 @@ struct intel_pt {
u64 noretcomp_bit;
unsigned max_non_turbo_ratio;
unsigned cbr2khz;
+ int max_loops;
unsigned long num_events;
@@ -132,6 +157,9 @@ struct intel_pt {
struct ip_callchain *chain;
struct branch_stack *br_stack;
+
+ u64 dflt_tsc_offset;
+ struct rb_root vmcs_info;
};
enum switch_state {
@@ -142,6 +170,15 @@ enum switch_state {
INTEL_PT_SS_EXPECTING_SWITCH_IP,
};
+/* applicable_counters is 64-bits */
+#define INTEL_PT_MAX_PEBS 64
+
+struct intel_pt_pebs_event {
+ struct evsel *evsel;
+ u64 id;
+ int data_src_fmt;
+};
+
struct intel_pt_queue {
struct intel_pt *pt;
unsigned int queue_nr;
@@ -157,11 +194,19 @@ struct intel_pt_queue {
bool step_through_buffers;
bool use_buffer_pid_tid;
bool sync_switch;
+ bool sample_ipc;
pid_t pid, tid;
int cpu;
int switch_state;
pid_t next_tid;
struct thread *thread;
+ struct machine *guest_machine;
+ struct thread *guest_thread;
+ struct thread *unknown_guest_thread;
+ pid_t guest_machine_pid;
+ pid_t guest_pid;
+ pid_t guest_tid;
+ int vcpu;
bool exclude_kernel;
bool have_sample;
u64 time;
@@ -176,10 +221,13 @@ struct intel_pt_queue {
u64 ipc_cyc_cnt;
u64 last_in_insn_cnt;
u64 last_in_cyc_cnt;
+ u64 last_cy_insn_cnt;
+ u64 last_cy_cyc_cnt;
u64 last_br_insn_cnt;
u64 last_br_cyc_cnt;
unsigned int cbr_seen;
char insn[INTEL_PT_INSN_BUF_SZ];
+ struct intel_pt_pebs_event pebs[INTEL_PT_MAX_PEBS];
};
static void intel_pt_dump(struct intel_pt *pt __maybe_unused,
@@ -203,7 +251,7 @@ static void intel_pt_dump(struct intel_pt *pt __maybe_unused,
else
pkt_len = 1;
printf(".");
- color_fprintf(stdout, color, " %08x: ", pos);
+ color_fprintf(stdout, color, " %08zx: ", pos);
for (i = 0; i < pkt_len; i++)
color_fprintf(stdout, color, " %02x", buf[i]);
for (; i < 16; i++)
@@ -267,6 +315,65 @@ static bool intel_pt_log_events(struct intel_pt *pt, u64 tm)
return !n || !perf_time__ranges_skip_sample(range, n, tm);
}
+static struct intel_pt_vmcs_info *intel_pt_findnew_vmcs(struct rb_root *rb_root,
+ u64 vmcs,
+ u64 dflt_tsc_offset)
+{
+ struct rb_node **p = &rb_root->rb_node;
+ struct rb_node *parent = NULL;
+ struct intel_pt_vmcs_info *v;
+
+ while (*p) {
+ parent = *p;
+ v = rb_entry(parent, struct intel_pt_vmcs_info, rb_node);
+
+ if (v->vmcs == vmcs)
+ return v;
+
+ if (vmcs < v->vmcs)
+ p = &(*p)->rb_left;
+ else
+ p = &(*p)->rb_right;
+ }
+
+ v = zalloc(sizeof(*v));
+ if (v) {
+ v->vmcs = vmcs;
+ v->tsc_offset = dflt_tsc_offset;
+ v->reliable = dflt_tsc_offset;
+
+ rb_link_node(&v->rb_node, parent, p);
+ rb_insert_color(&v->rb_node, rb_root);
+ }
+
+ return v;
+}
+
+static struct intel_pt_vmcs_info *intel_pt_findnew_vmcs_info(void *data, uint64_t vmcs)
+{
+ struct intel_pt_queue *ptq = data;
+ struct intel_pt *pt = ptq->pt;
+
+ if (!vmcs && !pt->dflt_tsc_offset)
+ return NULL;
+
+ return intel_pt_findnew_vmcs(&pt->vmcs_info, vmcs, pt->dflt_tsc_offset);
+}
+
+static void intel_pt_free_vmcs_info(struct intel_pt *pt)
+{
+ struct intel_pt_vmcs_info *v;
+ struct rb_node *n;
+
+ n = rb_first(&pt->vmcs_info);
+ while (n) {
+ v = rb_entry(n, struct intel_pt_vmcs_info, rb_node);
+ n = rb_next(n);
+ rb_erase(&v->rb_node, &pt->vmcs_info);
+ free(v);
+ }
+}
+
static int intel_pt_do_fix_overlap(struct intel_pt *pt, struct auxtrace_buffer *a,
struct auxtrace_buffer *b)
{
@@ -274,9 +381,17 @@ static int intel_pt_do_fix_overlap(struct intel_pt *pt, struct auxtrace_buffer *
void *start;
start = intel_pt_find_overlap(a->data, a->size, b->data, b->size,
- pt->have_tsc, &consecutive);
+ pt->have_tsc, &consecutive,
+ pt->synth_opts.vm_time_correlation);
if (!start)
return -EINVAL;
+ /*
+ * In the case of vm_time_correlation, the overlap might contain TSC
+ * packets that will not be fixed, and that will then no longer work for
+ * overlap detection. Avoid that by zeroing out the overlap.
+ */
+ if (pt->synth_opts.vm_time_correlation)
+ memset(b->data, 0, start - b->data);
b->use_size = b->data + b->size - start;
b->use_data = start;
if (b->use_size && consecutive)
@@ -430,6 +545,7 @@ struct intel_pt_cache_entry {
u64 byte_cnt;
enum intel_pt_insn_op op;
enum intel_pt_insn_branch branch;
+ bool emulated_ptwrite;
int length;
int32_t rel;
char insn[INTEL_PT_INSN_BUF_SZ];
@@ -484,15 +600,15 @@ static struct auxtrace_cache *intel_pt_cache(struct dso *dso,
struct auxtrace_cache *c;
unsigned int bits;
- if (dso->auxtrace_cache)
- return dso->auxtrace_cache;
+ if (dso__auxtrace_cache(dso))
+ return dso__auxtrace_cache(dso);
bits = intel_pt_cache_size(dso, machine);
/* Ignoring cache creation failure */
c = auxtrace_cache__new(bits, sizeof(struct intel_pt_cache_entry), 200);
- dso->auxtrace_cache = c;
+ dso__set_auxtrace_cache(dso, c);
return c;
}
@@ -516,6 +632,7 @@ static int intel_pt_cache_add(struct dso *dso, struct machine *machine,
e->byte_cnt = byte_cnt;
e->op = intel_pt_insn->op;
e->branch = intel_pt_insn->branch;
+ e->emulated_ptwrite = intel_pt_insn->emulated_ptwrite;
e->length = intel_pt_insn->length;
e->rel = intel_pt_insn->rel;
memcpy(e->insn, intel_pt_insn->buf, INTEL_PT_INSN_BUF_SZ);
@@ -535,7 +652,7 @@ intel_pt_cache_lookup(struct dso *dso, struct machine *machine, u64 offset)
if (!c)
return NULL;
- return auxtrace_cache__lookup(dso->auxtrace_cache, offset);
+ return auxtrace_cache__lookup(dso__auxtrace_cache(dso), offset);
}
static void intel_pt_cache_invalidate(struct dso *dso, struct machine *machine,
@@ -546,16 +663,88 @@ static void intel_pt_cache_invalidate(struct dso *dso, struct machine *machine,
if (!c)
return;
- auxtrace_cache__remove(dso->auxtrace_cache, offset);
+ auxtrace_cache__remove(dso__auxtrace_cache(dso), offset);
}
-static inline u8 intel_pt_cpumode(struct intel_pt *pt, uint64_t ip)
+static inline bool intel_pt_guest_kernel_ip(uint64_t ip)
{
- return ip >= pt->kernel_start ?
+ /* Assumes 64-bit kernel */
+ return ip & (1ULL << 63);
+}
+
+static inline u8 intel_pt_nr_cpumode(struct intel_pt_queue *ptq, uint64_t ip, bool nr)
+{
+ if (nr) {
+ return intel_pt_guest_kernel_ip(ip) ?
+ PERF_RECORD_MISC_GUEST_KERNEL :
+ PERF_RECORD_MISC_GUEST_USER;
+ }
+
+ return ip >= ptq->pt->kernel_start ?
PERF_RECORD_MISC_KERNEL :
PERF_RECORD_MISC_USER;
}
+static inline u8 intel_pt_cpumode(struct intel_pt_queue *ptq, uint64_t from_ip, uint64_t to_ip)
+{
+ /* No support for non-zero CS base */
+ if (from_ip)
+ return intel_pt_nr_cpumode(ptq, from_ip, ptq->state->from_nr);
+ return intel_pt_nr_cpumode(ptq, to_ip, ptq->state->to_nr);
+}
+
+static int intel_pt_get_guest(struct intel_pt_queue *ptq)
+{
+ struct machines *machines = &ptq->pt->session->machines;
+ struct machine *machine;
+ pid_t pid = ptq->pid <= 0 ? DEFAULT_GUEST_KERNEL_ID : ptq->pid;
+
+ if (ptq->guest_machine && pid == ptq->guest_machine->pid)
+ return 0;
+
+ ptq->guest_machine = NULL;
+ thread__zput(ptq->unknown_guest_thread);
+
+ if (symbol_conf.guest_code) {
+ thread__zput(ptq->guest_thread);
+ ptq->guest_thread = machines__findnew_guest_code(machines, pid);
+ }
+
+ machine = machines__find_guest(machines, pid);
+ if (!machine)
+ return -1;
+
+ ptq->unknown_guest_thread = machine__idle_thread(machine);
+ if (!ptq->unknown_guest_thread)
+ return -1;
+
+ ptq->guest_machine = machine;
+
+ return 0;
+}
+
+static inline bool intel_pt_jmp_16(struct intel_pt_insn *intel_pt_insn)
+{
+ return intel_pt_insn->rel == 16 && intel_pt_insn->branch == INTEL_PT_BR_UNCONDITIONAL;
+}
+
+#define PTWRITE_MAGIC "\x0f\x0bperf,ptwrite "
+#define PTWRITE_MAGIC_LEN 16
+
+static bool intel_pt_emulated_ptwrite(struct dso *dso, struct machine *machine, u64 offset)
+{
+ unsigned char buf[PTWRITE_MAGIC_LEN];
+ ssize_t len;
+
+ len = dso__data_read_offset(dso, machine, offset, buf, PTWRITE_MAGIC_LEN);
+ if (len == PTWRITE_MAGIC_LEN && !memcmp(buf, PTWRITE_MAGIC, PTWRITE_MAGIC_LEN)) {
+ intel_pt_log("Emulated ptwrite signature found\n");
+ return true;
+ }
+ intel_pt_log("Emulated ptwrite signature not found\n");
+ return false;
+}
+
static int intel_pt_walk_next_insn(struct intel_pt_insn *intel_pt_insn,
uint64_t *insn_cnt_ptr, uint64_t *ip,
uint64_t to_ip, uint64_t max_insn_cnt,
@@ -567,53 +756,98 @@ static int intel_pt_walk_next_insn(struct intel_pt_insn *intel_pt_insn,
struct addr_location al;
unsigned char buf[INTEL_PT_INSN_BUF_SZ];
ssize_t len;
- int x86_64;
+ int x86_64, ret = 0;
u8 cpumode;
u64 offset, start_offset, start_ip;
u64 insn_cnt = 0;
bool one_map = true;
+ bool nr;
+
+ addr_location__init(&al);
intel_pt_insn->length = 0;
+ intel_pt_insn->op = INTEL_PT_OP_OTHER;
if (to_ip && *ip == to_ip)
goto out_no_cache;
- cpumode = intel_pt_cpumode(ptq->pt, *ip);
+ nr = ptq->state->to_nr;
+ cpumode = intel_pt_nr_cpumode(ptq, *ip, nr);
- thread = ptq->thread;
- if (!thread) {
- if (cpumode != PERF_RECORD_MISC_KERNEL)
- return -EINVAL;
- thread = ptq->pt->unknown_thread;
+ if (nr) {
+ if (ptq->pt->have_guest_sideband) {
+ if (!ptq->guest_machine || ptq->guest_machine_pid != ptq->pid) {
+ intel_pt_log("ERROR: guest sideband but no guest machine\n");
+ ret = -EINVAL;
+ goto out_ret;
+ }
+ } else if ((!symbol_conf.guest_code && cpumode != PERF_RECORD_MISC_GUEST_KERNEL) ||
+ intel_pt_get_guest(ptq)) {
+ intel_pt_log("ERROR: no guest machine\n");
+ ret = -EINVAL;
+ goto out_ret;
+ }
+ machine = ptq->guest_machine;
+ thread = ptq->guest_thread;
+ if (!thread) {
+ if (cpumode != PERF_RECORD_MISC_GUEST_KERNEL) {
+ intel_pt_log("ERROR: no guest thread\n");
+ ret = -EINVAL;
+ goto out_ret;
+ }
+ thread = ptq->unknown_guest_thread;
+ }
+ } else {
+ thread = ptq->thread;
+ if (!thread) {
+ if (cpumode != PERF_RECORD_MISC_KERNEL) {
+ intel_pt_log("ERROR: no thread\n");
+ ret = -EINVAL;
+ goto out_ret;
+ }
+ thread = ptq->pt->unknown_thread;
+ }
}
while (1) {
- if (!thread__find_map(thread, cpumode, *ip, &al) || !al.map->dso)
- return -EINVAL;
+ struct dso *dso;
- if (al.map->dso->data.status == DSO_DATA_STATUS_ERROR &&
- dso__data_status_seen(al.map->dso,
- DSO_DATA_STATUS_SEEN_ITRACE))
- return -ENOENT;
+ if (!thread__find_map(thread, cpumode, *ip, &al) || !map__dso(al.map)) {
+ if (al.map)
+ intel_pt_log("ERROR: thread has no dso for %#" PRIx64 "\n", *ip);
+ else
+ intel_pt_log("ERROR: thread has no map for %#" PRIx64 "\n", *ip);
+ addr_location__exit(&al);
+ ret = -EINVAL;
+ goto out_ret;
+ }
+ dso = map__dso(al.map);
- offset = al.map->map_ip(al.map, *ip);
+ if (dso__data(dso)->status == DSO_DATA_STATUS_ERROR &&
+ dso__data_status_seen(dso, DSO_DATA_STATUS_SEEN_ITRACE)) {
+ ret = -ENOENT;
+ goto out_ret;
+ }
+
+ offset = map__map_ip(al.map, *ip);
if (!to_ip && one_map) {
struct intel_pt_cache_entry *e;
- e = intel_pt_cache_lookup(al.map->dso, machine, offset);
+ e = intel_pt_cache_lookup(dso, machine, offset);
if (e &&
(!max_insn_cnt || e->insn_cnt <= max_insn_cnt)) {
*insn_cnt_ptr = e->insn_cnt;
*ip += e->byte_cnt;
intel_pt_insn->op = e->op;
intel_pt_insn->branch = e->branch;
+ intel_pt_insn->emulated_ptwrite = e->emulated_ptwrite;
intel_pt_insn->length = e->length;
intel_pt_insn->rel = e->rel;
- memcpy(intel_pt_insn->buf, e->insn,
- INTEL_PT_INSN_BUF_SZ);
+ memcpy(intel_pt_insn->buf, e->insn, INTEL_PT_INSN_BUF_SZ);
intel_pt_log_insn_no_data(intel_pt_insn, *ip);
- return 0;
+ ret = 0;
+ goto out_ret;
}
}
@@ -623,34 +857,55 @@ static int intel_pt_walk_next_insn(struct intel_pt_insn *intel_pt_insn,
/* Load maps to ensure dso->is_64_bit has been updated */
map__load(al.map);
- x86_64 = al.map->dso->is_64_bit;
+ x86_64 = dso__is_64_bit(dso);
while (1) {
- len = dso__data_read_offset(al.map->dso, machine,
+ len = dso__data_read_offset(dso, machine,
offset, buf,
INTEL_PT_INSN_BUF_SZ);
- if (len <= 0)
- return -EINVAL;
+ if (len <= 0) {
+ intel_pt_log("ERROR: failed to read at offset %#" PRIx64 " ",
+ offset);
+ if (intel_pt_enable_logging)
+ dso__fprintf(dso, intel_pt_log_fp());
+ ret = -EINVAL;
+ goto out_ret;
+ }
- if (intel_pt_get_insn(buf, len, x86_64, intel_pt_insn))
- return -EINVAL;
+ if (intel_pt_get_insn(buf, len, x86_64, intel_pt_insn)) {
+ ret = -EINVAL;
+ goto out_ret;
+ }
intel_pt_log_insn(intel_pt_insn, *ip);
insn_cnt += 1;
- if (intel_pt_insn->branch != INTEL_PT_BR_NO_BRANCH)
+ if (intel_pt_insn->branch != INTEL_PT_BR_NO_BRANCH) {
+ bool eptw;
+ u64 offs;
+
+ if (!intel_pt_jmp_16(intel_pt_insn))
+ goto out;
+ /* Check for emulated ptwrite */
+ offs = offset + intel_pt_insn->length;
+ eptw = intel_pt_emulated_ptwrite(dso, machine, offs);
+ intel_pt_insn->emulated_ptwrite = eptw;
goto out;
+ }
if (max_insn_cnt && insn_cnt >= max_insn_cnt)
goto out_no_cache;
*ip += intel_pt_insn->length;
- if (to_ip && *ip == to_ip)
+ if (to_ip && *ip == to_ip) {
+ intel_pt_insn->length = 0;
+ intel_pt_insn->op = INTEL_PT_OP_OTHER;
goto out_no_cache;
+ }
- if (*ip >= al.map->end)
+ if (*ip >= map__end(al.map))
break;
offset += intel_pt_insn->length;
@@ -670,19 +925,22 @@ out:
if (to_ip) {
struct intel_pt_cache_entry *e;
- e = intel_pt_cache_lookup(al.map->dso, machine, start_offset);
+ e = intel_pt_cache_lookup(map__dso(al.map), machine, start_offset);
if (e)
- return 0;
+ goto out_ret;
}
/* Ignore cache errors */
- intel_pt_cache_add(al.map->dso, machine, start_offset, insn_cnt,
+ intel_pt_cache_add(map__dso(al.map), machine, start_offset, insn_cnt,
*ip - start_ip, intel_pt_insn);
- return 0;
+out_ret:
+ addr_location__exit(&al);
+ return ret;
out_no_cache:
*insn_cnt_ptr = insn_cnt;
+ addr_location__exit(&al);
return 0;
}
@@ -731,9 +989,16 @@ static int __intel_pt_pgd_ip(uint64_t ip, void *data)
struct addr_location al;
u8 cpumode;
u64 offset;
+ int res;
- if (ip >= ptq->pt->kernel_start)
+ if (ptq->state->to_nr) {
+ if (intel_pt_guest_kernel_ip(ip))
+ return intel_pt_match_pgd_ip(ptq->pt, ip, ip, NULL);
+ /* No support for decoding guest user space */
+ return -EINVAL;
+ } else if (ip >= ptq->pt->kernel_start) {
return intel_pt_match_pgd_ip(ptq->pt, ip, ip, NULL);
+ }
cpumode = PERF_RECORD_MISC_USER;
@@ -741,13 +1006,15 @@ static int __intel_pt_pgd_ip(uint64_t ip, void *data)
if (!thread)
return -EINVAL;
- if (!thread__find_map(thread, cpumode, ip, &al) || !al.map->dso)
+ addr_location__init(&al);
+ if (!thread__find_map(thread, cpumode, ip, &al) || !map__dso(al.map))
return -EINVAL;
- offset = al.map->map_ip(al.map, ip);
+ offset = map__map_ip(al.map, ip);
- return intel_pt_match_pgd_ip(ptq->pt, ip, offset,
- al.map->dso->long_name);
+ res = intel_pt_match_pgd_ip(ptq->pt, ip, offset, dso__long_name(map__dso(al.map)));
+ addr_location__exit(&al);
+ return res;
}
static bool intel_pt_pgd_ip(uint64_t ip, void *data)
@@ -802,12 +1069,26 @@ static bool intel_pt_branch_enable(struct intel_pt *pt)
evlist__for_each_entry(pt->session->evlist, evsel) {
if (intel_pt_get_config(pt, &evsel->core.attr, &config) &&
- (config & 1) && !(config & 0x2000))
+ (config & INTEL_PT_CFG_PASS_THRU) &&
+ !(config & INTEL_PT_CFG_BRANCH_EN))
return false;
}
return true;
}
+static bool intel_pt_disabled_tnt(struct intel_pt *pt)
+{
+ struct evsel *evsel;
+ u64 config;
+
+ evlist__for_each_entry(pt->session->evlist, evsel) {
+ if (intel_pt_get_config(pt, &evsel->core.attr, &config) &&
+ config & INTEL_PT_CFG_TNT_DIS)
+ return true;
+ }
+ return false;
+}
+
static unsigned int intel_pt_mtc_period(struct intel_pt *pt)
{
struct evsel *evsel;
@@ -833,7 +1114,7 @@ static bool intel_pt_timeless_decoding(struct intel_pt *pt)
bool timeless_decoding = true;
u64 config;
- if (!pt->tsc_bit || !pt->cap_user_time_zero)
+ if (!pt->tsc_bit || !pt->cap_user_time_zero || pt->synth_opts.timeless_decoding)
return true;
evlist__for_each_entry(pt->session->evlist, evsel) {
@@ -881,6 +1162,19 @@ static bool intel_pt_have_tsc(struct intel_pt *pt)
return have_tsc;
}
+static bool intel_pt_have_mtc(struct intel_pt *pt)
+{
+ struct evsel *evsel;
+ u64 config;
+
+ evlist__for_each_entry(pt->session->evlist, evsel) {
+ if (intel_pt_get_config(pt, &evsel->core.attr, &config) &&
+ (config & pt->mtc_bit))
+ return true;
+ }
+ return false;
+}
+
static bool intel_pt_sampling_mode(struct intel_pt *pt)
{
struct evsel *evsel;
@@ -893,6 +1187,18 @@ static bool intel_pt_sampling_mode(struct intel_pt *pt)
return false;
}
+static u64 intel_pt_ctl(struct intel_pt *pt)
+{
+ struct evsel *evsel;
+ u64 config;
+
+ evlist__for_each_entry(pt->session->evlist, evsel) {
+ if (intel_pt_get_config(pt, &evsel->core.attr, &config))
+ return config;
+ }
+ return 0;
+}
+
static u64 intel_pt_ns_to_ticks(const struct intel_pt *pt, u64 ns)
{
u64 quot, rem;
@@ -978,6 +1284,7 @@ static void intel_pt_add_br_stack(struct intel_pt *pt,
pt->kernel_start);
sample->branch_stack = pt->br_stack;
+ thread__put(thread);
}
/* INTEL_PT_LBR_0, INTEL_PT_LBR_1 and INTEL_PT_LBR_2 */
@@ -1023,19 +1330,29 @@ static struct intel_pt_queue *intel_pt_alloc_queue(struct intel_pt *pt,
params.get_trace = intel_pt_get_trace;
params.walk_insn = intel_pt_walk_next_insn;
params.lookahead = intel_pt_lookahead;
+ params.findnew_vmcs_info = intel_pt_findnew_vmcs_info;
params.data = ptq;
params.return_compression = intel_pt_return_compression(pt);
params.branch_enable = intel_pt_branch_enable(pt);
+ params.ctl = intel_pt_ctl(pt);
params.max_non_turbo_ratio = pt->max_non_turbo_ratio;
params.mtc_period = intel_pt_mtc_period(pt);
params.tsc_ctc_ratio_n = pt->tsc_ctc_ratio_n;
params.tsc_ctc_ratio_d = pt->tsc_ctc_ratio_d;
params.quick = pt->synth_opts.quick;
+ params.vm_time_correlation = pt->synth_opts.vm_time_correlation;
+ params.vm_tm_corr_dry_run = pt->synth_opts.vm_tm_corr_dry_run;
+ params.first_timestamp = pt->first_timestamp;
+ params.max_loops = pt->max_loops;
+
+ /* Cannot walk code without TNT, so force 'quick' mode */
+ if (params.branch_enable && intel_pt_disabled_tnt(pt) && !params.quick)
+ params.quick = 1;
if (pt->filts.cnt > 0)
params.pgd_ip = intel_pt_pgd_ip;
- if (pt->synth_opts.instructions) {
+ if (pt->synth_opts.instructions || pt->synth_opts.cycles) {
if (pt->synth_opts.period) {
switch (pt->synth_opts.period_type) {
case PERF_ITRACE_PERIOD_INSTRUCTIONS:
@@ -1087,6 +1404,8 @@ static void intel_pt_free_queue(void *priv)
if (!ptq)
return;
thread__zput(ptq->thread);
+ thread__zput(ptq->guest_thread);
+ thread__zput(ptq->unknown_guest_thread);
intel_pt_decoder_free(ptq->decoder);
zfree(&ptq->event_buf);
zfree(&ptq->last_branch);
@@ -1094,6 +1413,70 @@ static void intel_pt_free_queue(void *priv)
free(ptq);
}
+static void intel_pt_first_timestamp(struct intel_pt *pt, u64 timestamp)
+{
+ unsigned int i;
+
+ pt->first_timestamp = timestamp;
+
+ for (i = 0; i < pt->queues.nr_queues; i++) {
+ struct auxtrace_queue *queue = &pt->queues.queue_array[i];
+ struct intel_pt_queue *ptq = queue->priv;
+
+ if (ptq && ptq->decoder)
+ intel_pt_set_first_timestamp(ptq->decoder, timestamp);
+ }
+}
+
+static int intel_pt_get_guest_from_sideband(struct intel_pt_queue *ptq)
+{
+ struct machines *machines = &ptq->pt->session->machines;
+ struct machine *machine;
+ pid_t machine_pid = ptq->pid;
+ pid_t tid;
+ int vcpu;
+
+ if (machine_pid <= 0)
+ return 0; /* Not a guest machine */
+
+ machine = machines__find(machines, machine_pid);
+ if (!machine)
+ return 0; /* Not a guest machine */
+
+ if (ptq->guest_machine != machine) {
+ ptq->guest_machine = NULL;
+ thread__zput(ptq->guest_thread);
+ thread__zput(ptq->unknown_guest_thread);
+
+ ptq->unknown_guest_thread = machine__find_thread(machine, 0, 0);
+ if (!ptq->unknown_guest_thread)
+ return -1;
+ ptq->guest_machine = machine;
+ }
+
+ vcpu = ptq->thread ? thread__guest_cpu(ptq->thread) : -1;
+ if (vcpu < 0)
+ return -1;
+
+ tid = machine__get_current_tid(machine, vcpu);
+
+ if (ptq->guest_thread && thread__tid(ptq->guest_thread) != tid)
+ thread__zput(ptq->guest_thread);
+
+ if (!ptq->guest_thread) {
+ ptq->guest_thread = machine__find_thread(machine, -1, tid);
+ if (!ptq->guest_thread)
+ return -1;
+ }
+
+ ptq->guest_machine_pid = machine_pid;
+ ptq->guest_pid = thread__pid(ptq->guest_thread);
+ ptq->guest_tid = tid;
+ ptq->vcpu = vcpu;
+
+ return 0;
+}
+
static void intel_pt_set_pid_tid_cpu(struct intel_pt *pt,
struct auxtrace_queue *queue)
{
@@ -1110,25 +1493,39 @@ static void intel_pt_set_pid_tid_cpu(struct intel_pt *pt,
ptq->thread = machine__find_thread(pt->machine, -1, ptq->tid);
if (ptq->thread) {
- ptq->pid = ptq->thread->pid_;
+ ptq->pid = thread__pid(ptq->thread);
if (queue->cpu == -1)
- ptq->cpu = ptq->thread->cpu;
+ ptq->cpu = thread__cpu(ptq->thread);
+ }
+
+ if (pt->have_guest_sideband && intel_pt_get_guest_from_sideband(ptq)) {
+ ptq->guest_machine_pid = 0;
+ ptq->guest_pid = -1;
+ ptq->guest_tid = -1;
+ ptq->vcpu = -1;
}
}
static void intel_pt_sample_flags(struct intel_pt_queue *ptq)
{
+ struct intel_pt *pt = ptq->pt;
+
+ ptq->insn_len = 0;
if (ptq->state->flags & INTEL_PT_ABORT_TX) {
ptq->flags = PERF_IP_FLAG_BRANCH | PERF_IP_FLAG_TX_ABORT;
} else if (ptq->state->flags & INTEL_PT_ASYNC) {
- if (ptq->state->to_ip)
+ if (!ptq->state->to_ip)
+ ptq->flags = PERF_IP_FLAG_BRANCH |
+ PERF_IP_FLAG_ASYNC |
+ PERF_IP_FLAG_TRACE_END;
+ else if (ptq->state->from_nr && !ptq->state->to_nr)
ptq->flags = PERF_IP_FLAG_BRANCH | PERF_IP_FLAG_CALL |
PERF_IP_FLAG_ASYNC |
- PERF_IP_FLAG_INTERRUPT;
+ PERF_IP_FLAG_VMEXIT;
else
- ptq->flags = PERF_IP_FLAG_BRANCH |
- PERF_IP_FLAG_TRACE_END;
- ptq->insn_len = 0;
+ ptq->flags = PERF_IP_FLAG_BRANCH | PERF_IP_FLAG_CALL |
+ PERF_IP_FLAG_ASYNC |
+ PERF_IP_FLAG_INTERRUPT;
} else {
if (ptq->state->from_ip)
ptq->flags = intel_pt_insn_type(ptq->state->insn_op);
@@ -1145,6 +1542,17 @@ static void intel_pt_sample_flags(struct intel_pt_queue *ptq)
ptq->flags |= PERF_IP_FLAG_TRACE_BEGIN;
if (ptq->state->type & INTEL_PT_TRACE_END)
ptq->flags |= PERF_IP_FLAG_TRACE_END;
+
+ if (pt->cap_event_trace) {
+ if (ptq->state->type & INTEL_PT_IFLAG_CHG) {
+ if (!ptq->state->from_iflag)
+ ptq->flags |= PERF_IP_FLAG_INTR_DISABLE;
+ if (ptq->state->from_iflag != ptq->state->to_iflag)
+ ptq->flags |= PERF_IP_FLAG_INTR_TOGGLE;
+ } else if (!ptq->state->to_iflag) {
+ ptq->flags |= PERF_IP_FLAG_INTR_DISABLE;
+ }
+ }
}
static void intel_pt_setup_time_range(struct intel_pt *pt,
@@ -1285,6 +1693,17 @@ static void intel_pt_prep_a_sample(struct intel_pt_queue *ptq,
sample->pid = ptq->pid;
sample->tid = ptq->tid;
+
+ if (ptq->pt->have_guest_sideband) {
+ if ((ptq->state->from_ip && ptq->state->from_nr) ||
+ (ptq->state->to_ip && ptq->state->to_nr)) {
+ sample->pid = ptq->guest_pid;
+ sample->tid = ptq->guest_tid;
+ sample->machine_pid = ptq->guest_machine_pid;
+ sample->vcpu = ptq->vcpu;
+ }
+ }
+
sample->cpu = ptq->cpu;
sample->insn_len = ptq->insn_len;
memcpy(sample->insn, ptq->insn, INTEL_PT_INSN_BUF_SZ);
@@ -1301,8 +1720,8 @@ static void intel_pt_prep_b_sample(struct intel_pt *pt,
sample->time = tsc_to_perf_time(ptq->timestamp, &pt->tc);
sample->ip = ptq->state->from_ip;
- sample->cpumode = intel_pt_cpumode(pt, sample->ip);
sample->addr = ptq->state->to_ip;
+ sample->cpumode = intel_pt_cpumode(ptq, sample->ip, sample->addr);
sample->period = 1;
sample->flags = ptq->flags;
@@ -1347,12 +1766,13 @@ static int intel_pt_synth_branch_sample(struct intel_pt_queue *ptq)
{
struct intel_pt *pt = ptq->pt;
union perf_event *event = ptq->event_buf;
- struct perf_sample sample = { .ip = 0, };
+ struct perf_sample sample;
struct dummy_branch_stack {
u64 nr;
u64 hw_idx;
struct branch_entry entries;
} dummy_bs;
+ int ret;
if (pt->branches_filter && !(pt->branches_filter & ptq->flags))
return 0;
@@ -1360,6 +1780,7 @@ static int intel_pt_synth_branch_sample(struct intel_pt_queue *ptq)
if (intel_pt_skip_event(pt))
return 0;
+ perf_sample__init(&sample, /*all=*/true);
intel_pt_prep_b_sample(pt, ptq, event, &sample);
sample.id = ptq->pt->branches_id;
@@ -1381,15 +1802,18 @@ static int intel_pt_synth_branch_sample(struct intel_pt_queue *ptq)
sample.branch_stack = (struct branch_stack *)&dummy_bs;
}
- sample.cyc_cnt = ptq->ipc_cyc_cnt - ptq->last_br_cyc_cnt;
+ if (ptq->sample_ipc)
+ sample.cyc_cnt = ptq->ipc_cyc_cnt - ptq->last_br_cyc_cnt;
if (sample.cyc_cnt) {
sample.insn_cnt = ptq->ipc_insn_cnt - ptq->last_br_insn_cnt;
ptq->last_br_insn_cnt = ptq->ipc_insn_cnt;
ptq->last_br_cyc_cnt = ptq->ipc_cyc_cnt;
}
- return intel_pt_deliver_synth_event(pt, event, &sample,
+ perf_sample__exit(&sample);
+ ret = intel_pt_deliver_synth_event(pt, event, &sample,
pt->branches_sample_type);
+ return ret;
}
static void intel_pt_prep_sample(struct intel_pt *pt,
@@ -1417,11 +1841,13 @@ static int intel_pt_synth_instruction_sample(struct intel_pt_queue *ptq)
{
struct intel_pt *pt = ptq->pt;
union perf_event *event = ptq->event_buf;
- struct perf_sample sample = { .ip = 0, };
+ struct perf_sample sample;
+ int ret;
if (intel_pt_skip_event(pt))
return 0;
+ perf_sample__init(&sample, /*all=*/true);
intel_pt_prep_sample(pt, ptq, event, &sample);
sample.id = ptq->pt->instructions_id;
@@ -1431,7 +1857,8 @@ static int intel_pt_synth_instruction_sample(struct intel_pt_queue *ptq)
else
sample.period = ptq->state->tot_insn_cnt - ptq->last_insn_cnt;
- sample.cyc_cnt = ptq->ipc_cyc_cnt - ptq->last_in_cyc_cnt;
+ if (ptq->sample_ipc)
+ sample.cyc_cnt = ptq->ipc_cyc_cnt - ptq->last_in_cyc_cnt;
if (sample.cyc_cnt) {
sample.insn_cnt = ptq->ipc_insn_cnt - ptq->last_in_insn_cnt;
ptq->last_in_insn_cnt = ptq->ipc_insn_cnt;
@@ -1440,26 +1867,63 @@ static int intel_pt_synth_instruction_sample(struct intel_pt_queue *ptq)
ptq->last_insn_cnt = ptq->state->tot_insn_cnt;
- return intel_pt_deliver_synth_event(pt, event, &sample,
- pt->instructions_sample_type);
+ ret = intel_pt_deliver_synth_event(pt, event, &sample,
+ pt->instructions_sample_type);
+ perf_sample__exit(&sample);
+ return ret;
+}
+
+static int intel_pt_synth_cycle_sample(struct intel_pt_queue *ptq)
+{
+ struct intel_pt *pt = ptq->pt;
+ union perf_event *event = ptq->event_buf;
+ struct perf_sample sample;
+ u64 period = 0;
+ int ret;
+
+ if (ptq->sample_ipc)
+ period = ptq->ipc_cyc_cnt - ptq->last_cy_cyc_cnt;
+
+ if (!period || intel_pt_skip_event(pt))
+ return 0;
+
+ perf_sample__init(&sample, /*all=*/true);
+ intel_pt_prep_sample(pt, ptq, event, &sample);
+
+ sample.id = ptq->pt->cycles_id;
+ sample.stream_id = ptq->pt->cycles_id;
+ sample.period = period;
+
+ sample.cyc_cnt = period;
+ sample.insn_cnt = ptq->ipc_insn_cnt - ptq->last_cy_insn_cnt;
+ ptq->last_cy_insn_cnt = ptq->ipc_insn_cnt;
+ ptq->last_cy_cyc_cnt = ptq->ipc_cyc_cnt;
+
+ ret = intel_pt_deliver_synth_event(pt, event, &sample, pt->cycles_sample_type);
+ perf_sample__exit(&sample);
+ return ret;
}
static int intel_pt_synth_transaction_sample(struct intel_pt_queue *ptq)
{
struct intel_pt *pt = ptq->pt;
union perf_event *event = ptq->event_buf;
- struct perf_sample sample = { .ip = 0, };
+ struct perf_sample sample;
+ int ret;
if (intel_pt_skip_event(pt))
return 0;
+ perf_sample__init(&sample, /*all=*/true);
intel_pt_prep_sample(pt, ptq, event, &sample);
sample.id = ptq->pt->transactions_id;
sample.stream_id = ptq->pt->transactions_id;
- return intel_pt_deliver_synth_event(pt, event, &sample,
- pt->transactions_sample_type);
+ ret = intel_pt_deliver_synth_event(pt, event, &sample,
+ pt->transactions_sample_type);
+ perf_sample__exit(&sample);
+ return ret;
}
static void intel_pt_prep_p_sample(struct intel_pt *pt,
@@ -1507,15 +1971,17 @@ static int intel_pt_synth_cbr_sample(struct intel_pt_queue *ptq)
{
struct intel_pt *pt = ptq->pt;
union perf_event *event = ptq->event_buf;
- struct perf_sample sample = { .ip = 0, };
+ struct perf_sample sample;
struct perf_synth_intel_cbr raw;
u32 flags;
+ int ret;
if (intel_pt_skip_cbr_event(pt))
return 0;
ptq->cbr_seen = ptq->state->cbr;
+ perf_sample__init(&sample, /*all=*/true);
intel_pt_prep_p_sample(pt, ptq, event, &sample);
sample.id = ptq->pt->cbr_id;
@@ -1529,20 +1995,54 @@ static int intel_pt_synth_cbr_sample(struct intel_pt_queue *ptq)
sample.raw_size = perf_synth__raw_size(raw);
sample.raw_data = perf_synth__raw_data(&raw);
- return intel_pt_deliver_synth_event(pt, event, &sample,
- pt->pwr_events_sample_type);
+ ret = intel_pt_deliver_synth_event(pt, event, &sample,
+ pt->pwr_events_sample_type);
+ perf_sample__exit(&sample);
+ return ret;
+}
+
+static int intel_pt_synth_psb_sample(struct intel_pt_queue *ptq)
+{
+ struct intel_pt *pt = ptq->pt;
+ union perf_event *event = ptq->event_buf;
+ struct perf_sample sample;
+ struct perf_synth_intel_psb raw;
+ int ret;
+
+ if (intel_pt_skip_event(pt))
+ return 0;
+
+ perf_sample__init(&sample, /*all=*/true);
+ intel_pt_prep_p_sample(pt, ptq, event, &sample);
+
+ sample.id = ptq->pt->psb_id;
+ sample.stream_id = ptq->pt->psb_id;
+ sample.flags = 0;
+
+ raw.reserved = 0;
+ raw.offset = ptq->state->psb_offset;
+
+ sample.raw_size = perf_synth__raw_size(raw);
+ sample.raw_data = perf_synth__raw_data(&raw);
+
+ ret = intel_pt_deliver_synth_event(pt, event, &sample,
+ pt->pwr_events_sample_type);
+ perf_sample__exit(&sample);
+ return ret;
}
static int intel_pt_synth_mwait_sample(struct intel_pt_queue *ptq)
{
struct intel_pt *pt = ptq->pt;
union perf_event *event = ptq->event_buf;
- struct perf_sample sample = { .ip = 0, };
+ struct perf_sample sample;
struct perf_synth_intel_mwait raw;
+ int ret;
if (intel_pt_skip_event(pt))
return 0;
+ perf_sample__init(&sample, /*all=*/true);
intel_pt_prep_p_sample(pt, ptq, event, &sample);
sample.id = ptq->pt->mwait_id;
@@ -1554,20 +2054,24 @@ static int intel_pt_synth_mwait_sample(struct intel_pt_queue *ptq)
sample.raw_size = perf_synth__raw_size(raw);
sample.raw_data = perf_synth__raw_data(&raw);
- return intel_pt_deliver_synth_event(pt, event, &sample,
- pt->pwr_events_sample_type);
+ ret = intel_pt_deliver_synth_event(pt, event, &sample,
+ pt->pwr_events_sample_type);
+ perf_sample__exit(&sample);
+ return ret;
}
static int intel_pt_synth_pwre_sample(struct intel_pt_queue *ptq)
{
struct intel_pt *pt = ptq->pt;
union perf_event *event = ptq->event_buf;
- struct perf_sample sample = { .ip = 0, };
+ struct perf_sample sample;
struct perf_synth_intel_pwre raw;
+ int ret;
if (intel_pt_skip_event(pt))
return 0;
+ perf_sample__init(&sample, /*all=*/true);
intel_pt_prep_p_sample(pt, ptq, event, &sample);
sample.id = ptq->pt->pwre_id;
@@ -1579,20 +2083,24 @@ static int intel_pt_synth_pwre_sample(struct intel_pt_queue *ptq)
sample.raw_size = perf_synth__raw_size(raw);
sample.raw_data = perf_synth__raw_data(&raw);
- return intel_pt_deliver_synth_event(pt, event, &sample,
- pt->pwr_events_sample_type);
+ ret = intel_pt_deliver_synth_event(pt, event, &sample,
+ pt->pwr_events_sample_type);
+ perf_sample__exit(&sample);
+ return ret;
}
static int intel_pt_synth_exstop_sample(struct intel_pt_queue *ptq)
{
struct intel_pt *pt = ptq->pt;
union perf_event *event = ptq->event_buf;
- struct perf_sample sample = { .ip = 0, };
+ struct perf_sample sample;
struct perf_synth_intel_exstop raw;
+ int ret;
if (intel_pt_skip_event(pt))
return 0;
+ perf_sample__init(&sample, /*all=*/true);
intel_pt_prep_p_sample(pt, ptq, event, &sample);
sample.id = ptq->pt->exstop_id;
@@ -1604,20 +2112,24 @@ static int intel_pt_synth_exstop_sample(struct intel_pt_queue *ptq)
sample.raw_size = perf_synth__raw_size(raw);
sample.raw_data = perf_synth__raw_data(&raw);
- return intel_pt_deliver_synth_event(pt, event, &sample,
- pt->pwr_events_sample_type);
+ ret = intel_pt_deliver_synth_event(pt, event, &sample,
+ pt->pwr_events_sample_type);
+ perf_sample__exit(&sample);
+ return ret;
}
static int intel_pt_synth_pwrx_sample(struct intel_pt_queue *ptq)
{
struct intel_pt *pt = ptq->pt;
union perf_event *event = ptq->event_buf;
- struct perf_sample sample = { .ip = 0, };
+ struct perf_sample sample;
struct perf_synth_intel_pwrx raw;
+ int ret;
if (intel_pt_skip_event(pt))
return 0;
+ perf_sample__init(&sample, /*all=*/true);
intel_pt_prep_p_sample(pt, ptq, event, &sample);
sample.id = ptq->pt->pwrx_id;
@@ -1629,8 +2141,10 @@ static int intel_pt_synth_pwrx_sample(struct intel_pt_queue *ptq)
sample.raw_size = perf_synth__raw_size(raw);
sample.raw_data = perf_synth__raw_data(&raw);
- return intel_pt_deliver_synth_event(pt, event, &sample,
- pt->pwr_events_sample_type);
+ ret = intel_pt_deliver_synth_event(pt, event, &sample,
+ pt->pwr_events_sample_type);
+ perf_sample__exit(&sample);
+ return ret;
}
/*
@@ -1760,21 +2274,160 @@ static void intel_pt_add_lbrs(struct branch_stack *br_stack,
}
}
-static int intel_pt_synth_pebs_sample(struct intel_pt_queue *ptq)
+#define P(a, b) PERF_MEM_S(a, b)
+#define OP_LH (P(OP, LOAD) | P(LVL, HIT))
+#define LEVEL(x) P(LVLNUM, x)
+#define REM P(REMOTE, REMOTE)
+#define SNOOP_NONE_MISS (P(SNOOP, NONE) | P(SNOOP, MISS))
+
+#define PERF_PEBS_DATA_SOURCE_GRT_MAX 0x10
+#define PERF_PEBS_DATA_SOURCE_GRT_MASK (PERF_PEBS_DATA_SOURCE_GRT_MAX - 1)
+
+/* Based on kernel __intel_pmu_pebs_data_source_grt() and pebs_data_source */
+static const u64 pebs_data_source_grt[PERF_PEBS_DATA_SOURCE_GRT_MAX] = {
+ P(OP, LOAD) | P(LVL, MISS) | LEVEL(L3) | P(SNOOP, NA), /* L3 miss|SNP N/A */
+ OP_LH | P(LVL, L1) | LEVEL(L1) | P(SNOOP, NONE), /* L1 hit|SNP None */
+ OP_LH | P(LVL, LFB) | LEVEL(LFB) | P(SNOOP, NONE), /* LFB/MAB hit|SNP None */
+ OP_LH | P(LVL, L2) | LEVEL(L2) | P(SNOOP, NONE), /* L2 hit|SNP None */
+ OP_LH | P(LVL, L3) | LEVEL(L3) | P(SNOOP, NONE), /* L3 hit|SNP None */
+ OP_LH | P(LVL, L3) | LEVEL(L3) | P(SNOOP, HIT), /* L3 hit|SNP Hit */
+ OP_LH | P(LVL, L3) | LEVEL(L3) | P(SNOOP, HITM), /* L3 hit|SNP HitM */
+ OP_LH | P(LVL, L3) | LEVEL(L3) | P(SNOOP, HITM), /* L3 hit|SNP HitM */
+ OP_LH | P(LVL, L3) | LEVEL(L3) | P(SNOOPX, FWD), /* L3 hit|SNP Fwd */
+ OP_LH | P(LVL, REM_CCE1) | REM | LEVEL(L3) | P(SNOOP, HITM), /* Remote L3 hit|SNP HitM */
+ OP_LH | P(LVL, LOC_RAM) | LEVEL(RAM) | P(SNOOP, HIT), /* RAM hit|SNP Hit */
+ OP_LH | P(LVL, REM_RAM1) | REM | LEVEL(L3) | P(SNOOP, HIT), /* Remote L3 hit|SNP Hit */
+ OP_LH | P(LVL, LOC_RAM) | LEVEL(RAM) | SNOOP_NONE_MISS, /* RAM hit|SNP None or Miss */
+ OP_LH | P(LVL, REM_RAM1) | LEVEL(RAM) | REM | SNOOP_NONE_MISS, /* Remote RAM hit|SNP None or Miss */
+ OP_LH | P(LVL, IO) | LEVEL(NA) | P(SNOOP, NONE), /* I/O hit|SNP None */
+ OP_LH | P(LVL, UNC) | LEVEL(NA) | P(SNOOP, NONE), /* Uncached hit|SNP None */
+};
+
+/* Based on kernel __intel_pmu_pebs_data_source_cmt() and pebs_data_source */
+static const u64 pebs_data_source_cmt[PERF_PEBS_DATA_SOURCE_GRT_MAX] = {
+ P(OP, LOAD) | P(LVL, MISS) | LEVEL(L3) | P(SNOOP, NA), /* L3 miss|SNP N/A */
+ OP_LH | P(LVL, L1) | LEVEL(L1) | P(SNOOP, NONE), /* L1 hit|SNP None */
+ OP_LH | P(LVL, LFB) | LEVEL(LFB) | P(SNOOP, NONE), /* LFB/MAB hit|SNP None */
+ OP_LH | P(LVL, L2) | LEVEL(L2) | P(SNOOP, NONE), /* L2 hit|SNP None */
+ OP_LH | P(LVL, L3) | LEVEL(L3) | P(SNOOP, NONE), /* L3 hit|SNP None */
+ OP_LH | P(LVL, L3) | LEVEL(L3) | P(SNOOP, MISS), /* L3 hit|SNP Hit */
+ OP_LH | P(LVL, L3) | LEVEL(L3) | P(SNOOP, HIT), /* L3 hit|SNP HitM */
+ OP_LH | P(LVL, L3) | LEVEL(L3) | P(SNOOPX, FWD), /* L3 hit|SNP HitM */
+ OP_LH | P(LVL, L3) | LEVEL(L3) | P(SNOOP, HITM), /* L3 hit|SNP Fwd */
+ OP_LH | P(LVL, REM_CCE1) | REM | LEVEL(L3) | P(SNOOP, HITM), /* Remote L3 hit|SNP HitM */
+ OP_LH | P(LVL, LOC_RAM) | LEVEL(RAM) | P(SNOOP, NONE), /* RAM hit|SNP Hit */
+ OP_LH | LEVEL(RAM) | REM | P(SNOOP, NONE), /* Remote L3 hit|SNP Hit */
+ OP_LH | LEVEL(RAM) | REM | P(SNOOPX, FWD), /* RAM hit|SNP None or Miss */
+ OP_LH | LEVEL(RAM) | REM | P(SNOOP, HITM), /* Remote RAM hit|SNP None or Miss */
+ OP_LH | P(LVL, IO) | LEVEL(NA) | P(SNOOP, NONE), /* I/O hit|SNP None */
+ OP_LH | P(LVL, UNC) | LEVEL(NA) | P(SNOOP, NONE), /* Uncached hit|SNP None */
+};
+
+/* Based on kernel pebs_set_tlb_lock() */
+static inline void pebs_set_tlb_lock(u64 *val, bool tlb, bool lock)
+{
+ /*
+ * TLB access
+ * 0 = did not miss 2nd level TLB
+ * 1 = missed 2nd level TLB
+ */
+ if (tlb)
+ *val |= P(TLB, MISS) | P(TLB, L2);
+ else
+ *val |= P(TLB, HIT) | P(TLB, L1) | P(TLB, L2);
+
+ /* locked prefix */
+ if (lock)
+ *val |= P(LOCK, LOCKED);
+}
+
+/* Based on kernel __grt_latency_data() */
+static u64 intel_pt_grt_latency_data(u8 dse, bool tlb, bool lock, bool blk,
+ const u64 *pebs_data_source)
+{
+ u64 val;
+
+ dse &= PERF_PEBS_DATA_SOURCE_GRT_MASK;
+ val = pebs_data_source[dse];
+
+ pebs_set_tlb_lock(&val, tlb, lock);
+
+ if (blk)
+ val |= P(BLK, DATA);
+ else
+ val |= P(BLK, NA);
+
+ return val;
+}
+
+/* Default value for data source */
+#define PERF_MEM_NA (PERF_MEM_S(OP, NA) |\
+ PERF_MEM_S(LVL, NA) |\
+ PERF_MEM_S(SNOOP, NA) |\
+ PERF_MEM_S(LOCK, NA) |\
+ PERF_MEM_S(TLB, NA) |\
+ PERF_MEM_S(LVLNUM, NA))
+
+enum DATA_SRC_FORMAT {
+ DATA_SRC_FORMAT_ERR = -1,
+ DATA_SRC_FORMAT_NA = 0,
+ DATA_SRC_FORMAT_GRT = 1,
+ DATA_SRC_FORMAT_CMT = 2,
+};
+
+/* Based on kernel grt_latency_data() and cmt_latency_data */
+static u64 intel_pt_get_data_src(u64 mem_aux_info, int data_src_fmt)
+{
+ switch (data_src_fmt) {
+ case DATA_SRC_FORMAT_GRT: {
+ union {
+ u64 val;
+ struct {
+ unsigned int dse:4;
+ unsigned int locked:1;
+ unsigned int stlb_miss:1;
+ unsigned int fwd_blk:1;
+ unsigned int reserved:25;
+ };
+ } x = {.val = mem_aux_info};
+ return intel_pt_grt_latency_data(x.dse, x.stlb_miss, x.locked, x.fwd_blk,
+ pebs_data_source_grt);
+ }
+ case DATA_SRC_FORMAT_CMT: {
+ union {
+ u64 val;
+ struct {
+ unsigned int dse:5;
+ unsigned int locked:1;
+ unsigned int stlb_miss:1;
+ unsigned int fwd_blk:1;
+ unsigned int reserved:24;
+ };
+ } x = {.val = mem_aux_info};
+ return intel_pt_grt_latency_data(x.dse, x.stlb_miss, x.locked, x.fwd_blk,
+ pebs_data_source_cmt);
+ }
+ default:
+ return PERF_MEM_NA;
+ }
+}
+
+static int intel_pt_do_synth_pebs_sample(struct intel_pt_queue *ptq, struct evsel *evsel,
+ u64 id, int data_src_fmt)
{
const struct intel_pt_blk_items *items = &ptq->state->items;
- struct perf_sample sample = { .ip = 0, };
+ struct perf_sample sample;
union perf_event *event = ptq->event_buf;
struct intel_pt *pt = ptq->pt;
- struct evsel *evsel = pt->pebs_evsel;
u64 sample_type = evsel->core.attr.sample_type;
- u64 id = evsel->core.id[0];
u8 cpumode;
- u64 regs[8 * sizeof(sample.intr_regs.mask)];
+ u64 regs[8 * sizeof(sample.intr_regs->mask)];
+ int ret;
if (intel_pt_skip_event(pt))
return 0;
+ perf_sample__init(&sample, /*all=*/true);
intel_pt_prep_a_sample(ptq, event, &sample);
sample.id = id;
@@ -1791,10 +2444,7 @@ static int intel_pt_synth_pebs_sample(struct intel_pt_queue *ptq)
else
sample.ip = ptq->state->from_ip;
- /* No support for guest mode at this time */
- cpumode = sample.ip < ptq->pt->kernel_start ?
- PERF_RECORD_MISC_USER :
- PERF_RECORD_MISC_KERNEL;
+ cpumode = intel_pt_cpumode(ptq, sample.ip, 0);
event->sample.header.misc = cpumode | PERF_RECORD_MISC_EXACT_IP;
@@ -1824,15 +2474,16 @@ static int intel_pt_synth_pebs_sample(struct intel_pt_queue *ptq)
items->mask[INTEL_PT_XMM_POS])) {
u64 regs_mask = evsel->core.attr.sample_regs_intr;
u64 *pos;
+ struct regs_dump *intr_regs = perf_sample__intr_regs(&sample);
- sample.intr_regs.abi = items->is_32_bit ?
+ intr_regs->abi = items->is_32_bit ?
PERF_SAMPLE_REGS_ABI_32 :
PERF_SAMPLE_REGS_ABI_64;
- sample.intr_regs.regs = regs;
+ intr_regs->regs = regs;
- pos = intel_pt_add_gp_regs(&sample.intr_regs, regs, items, regs_mask);
+ pos = intel_pt_add_gp_regs(intr_regs, regs, items, regs_mask);
- intel_pt_add_xmm(&sample.intr_regs, pos, items, regs_mask);
+ intel_pt_add_xmm(intr_regs, pos, items, regs_mask);
}
if (sample_type & PERF_SAMPLE_BRANCH_STACK) {
@@ -1853,19 +2504,48 @@ static int intel_pt_synth_pebs_sample(struct intel_pt_queue *ptq)
if (sample_type & PERF_SAMPLE_ADDR && items->has_mem_access_address)
sample.addr = items->mem_access_address;
- if (sample_type & PERF_SAMPLE_WEIGHT) {
+ if (sample_type & PERF_SAMPLE_WEIGHT_TYPE) {
/*
* Refer kernel's setup_pebs_adaptive_sample_data() and
* intel_hsw_weight().
*/
- if (items->has_mem_access_latency)
- sample.weight = items->mem_access_latency;
+ if (items->has_mem_access_latency) {
+ u64 weight = items->mem_access_latency >> 32;
+
+ /*
+ * Starts from SPR, the mem access latency field
+ * contains both cache latency [47:32] and instruction
+ * latency [15:0]. The cache latency is the same as the
+ * mem access latency on previous platforms.
+ *
+ * In practice, no memory access could last than 4G
+ * cycles. Use latency >> 32 to distinguish the
+ * different format of the mem access latency field.
+ */
+ if (weight > 0) {
+ sample.weight = weight & 0xffff;
+ sample.ins_lat = items->mem_access_latency & 0xffff;
+ } else
+ sample.weight = items->mem_access_latency;
+ }
if (!sample.weight && items->has_tsx_aux_info) {
/* Cycles last block */
sample.weight = (u32)items->tsx_aux_info;
}
}
+ if (sample_type & PERF_SAMPLE_DATA_SRC) {
+ if (items->has_mem_aux_info && data_src_fmt) {
+ if (data_src_fmt < 0) {
+ pr_err("Intel PT missing data_src info\n");
+ return -1;
+ }
+ sample.data_src = intel_pt_get_data_src(items->mem_aux_info, data_src_fmt);
+ } else {
+ sample.data_src = PERF_MEM_NA;
+ }
+ }
+
if (sample_type & PERF_SAMPLE_TRANSACTION && items->has_tsx_aux_info) {
u64 ax = items->has_rax ? items->rax : 0;
/* Refer kernel's intel_hsw_transaction() */
@@ -1877,12 +2557,136 @@ static int intel_pt_synth_pebs_sample(struct intel_pt_queue *ptq)
sample.transaction = txn;
}
- return intel_pt_deliver_synth_event(pt, event, &sample, sample_type);
+ ret = intel_pt_deliver_synth_event(pt, event, &sample, sample_type);
+ perf_sample__exit(&sample);
+ return ret;
+}
+
+static int intel_pt_synth_single_pebs_sample(struct intel_pt_queue *ptq)
+{
+ struct intel_pt *pt = ptq->pt;
+ struct evsel *evsel = pt->pebs_evsel;
+ int data_src_fmt = pt->pebs_data_src_fmt;
+ u64 id = evsel->core.id[0];
+
+ return intel_pt_do_synth_pebs_sample(ptq, evsel, id, data_src_fmt);
+}
+
+static int intel_pt_synth_pebs_sample(struct intel_pt_queue *ptq)
+{
+ const struct intel_pt_blk_items *items = &ptq->state->items;
+ struct intel_pt_pebs_event *pe;
+ struct intel_pt *pt = ptq->pt;
+ int err = -EINVAL;
+ int hw_id;
+
+ if (!items->has_applicable_counters || !items->applicable_counters) {
+ if (!pt->single_pebs)
+ pr_err("PEBS-via-PT record with no applicable_counters\n");
+ return intel_pt_synth_single_pebs_sample(ptq);
+ }
+
+ for_each_set_bit(hw_id, (unsigned long *)&items->applicable_counters, INTEL_PT_MAX_PEBS) {
+ pe = &ptq->pebs[hw_id];
+ if (!pe->evsel) {
+ if (!pt->single_pebs)
+ pr_err("PEBS-via-PT record with no matching event, hw_id %d\n",
+ hw_id);
+ return intel_pt_synth_single_pebs_sample(ptq);
+ }
+ err = intel_pt_do_synth_pebs_sample(ptq, pe->evsel, pe->id, pe->data_src_fmt);
+ if (err)
+ return err;
+ }
+
+ return err;
+}
+
+static int intel_pt_synth_events_sample(struct intel_pt_queue *ptq)
+{
+ struct intel_pt *pt = ptq->pt;
+ union perf_event *event = ptq->event_buf;
+ struct perf_sample sample;
+ struct {
+ struct perf_synth_intel_evt cfe;
+ struct perf_synth_intel_evd evd[INTEL_PT_MAX_EVDS];
+ } raw;
+ int i, ret;
+
+ if (intel_pt_skip_event(pt))
+ return 0;
+
+ perf_sample__init(&sample, /*all=*/true);
+ intel_pt_prep_p_sample(pt, ptq, event, &sample);
+
+ sample.id = ptq->pt->evt_id;
+ sample.stream_id = ptq->pt->evt_id;
+
+ raw.cfe.type = ptq->state->cfe_type;
+ raw.cfe.reserved = 0;
+ raw.cfe.ip = !!(ptq->state->flags & INTEL_PT_FUP_IP);
+ raw.cfe.vector = ptq->state->cfe_vector;
+ raw.cfe.evd_cnt = ptq->state->evd_cnt;
+
+ for (i = 0; i < ptq->state->evd_cnt; i++) {
+ raw.evd[i].et = 0;
+ raw.evd[i].evd_type = ptq->state->evd[i].type;
+ raw.evd[i].payload = ptq->state->evd[i].payload;
+ }
+
+ sample.raw_size = perf_synth__raw_size(raw) +
+ ptq->state->evd_cnt * sizeof(struct perf_synth_intel_evd);
+ sample.raw_data = perf_synth__raw_data(&raw);
+
+ ret = intel_pt_deliver_synth_event(pt, event, &sample,
+ pt->evt_sample_type);
+ perf_sample__exit(&sample);
+ return ret;
+}
+
+static int intel_pt_synth_iflag_chg_sample(struct intel_pt_queue *ptq)
+{
+ struct intel_pt *pt = ptq->pt;
+ union perf_event *event = ptq->event_buf;
+ struct perf_sample sample;
+ struct perf_synth_intel_iflag_chg raw;
+ int ret;
+
+ if (intel_pt_skip_event(pt))
+ return 0;
+
+ perf_sample__init(&sample, /*all=*/true);
+ intel_pt_prep_p_sample(pt, ptq, event, &sample);
+
+ sample.id = ptq->pt->iflag_chg_id;
+ sample.stream_id = ptq->pt->iflag_chg_id;
+
+ raw.flags = 0;
+ raw.iflag = ptq->state->to_iflag;
+
+ if (ptq->state->type & INTEL_PT_BRANCH) {
+ raw.via_branch = 1;
+ raw.branch_ip = ptq->state->to_ip;
+ } else {
+ sample.addr = 0;
+ }
+ sample.flags = ptq->flags;
+
+ sample.raw_size = perf_synth__raw_size(raw);
+ sample.raw_data = perf_synth__raw_data(&raw);
+
+ ret = intel_pt_deliver_synth_event(pt, event, &sample,
+ pt->iflag_chg_sample_type);
+ perf_sample__exit(&sample);
+ return ret;
}
static int intel_pt_synth_error(struct intel_pt *pt, int code, int cpu,
- pid_t pid, pid_t tid, u64 ip, u64 timestamp)
+ pid_t pid, pid_t tid, u64 ip, u64 timestamp,
+ pid_t machine_pid, int vcpu)
{
+ bool dump_log_on_error = pt->synth_opts.log_plus_flags & AUXTRACE_LOG_FLG_ON_ERROR;
+ bool log_on_stdout = pt->synth_opts.log_plus_flags & AUXTRACE_LOG_FLG_USE_STDOUT;
union perf_event event;
char msg[MAX_AUXTRACE_ERROR_MSG];
int err;
@@ -1898,8 +2702,19 @@ static int intel_pt_synth_error(struct intel_pt *pt, int code, int cpu,
intel_pt__strerror(code, msg, MAX_AUXTRACE_ERROR_MSG);
- auxtrace_synth_error(&event.auxtrace_error, PERF_AUXTRACE_ERROR_ITRACE,
- code, cpu, pid, tid, ip, msg, timestamp);
+ auxtrace_synth_guest_error(&event.auxtrace_error, PERF_AUXTRACE_ERROR_ITRACE,
+ code, cpu, pid, tid, ip, msg, timestamp,
+ machine_pid, vcpu);
+
+ if (intel_pt_enable_logging && !log_on_stdout) {
+ FILE *fp = intel_pt_log_fp();
+
+ if (fp)
+ perf_event__fprintf_auxtrace_error(&event, fp);
+ }
+
+ if (code != INTEL_PT_ERR_LOST && dump_log_on_error)
+ intel_pt_log_dump_buf();
err = perf_session__deliver_synth_event(pt->session, &event, NULL);
if (err)
@@ -1914,11 +2729,22 @@ static int intel_ptq_synth_error(struct intel_pt_queue *ptq,
{
struct intel_pt *pt = ptq->pt;
u64 tm = ptq->timestamp;
+ pid_t machine_pid = 0;
+ pid_t pid = ptq->pid;
+ pid_t tid = ptq->tid;
+ int vcpu = -1;
tm = pt->timeless_decoding ? 0 : tsc_to_perf_time(tm, &pt->tc);
- return intel_pt_synth_error(pt, state->err, ptq->cpu, ptq->pid,
- ptq->tid, state->from_ip, tm);
+ if (pt->have_guest_sideband && state->from_nr) {
+ machine_pid = ptq->guest_machine_pid;
+ vcpu = ptq->vcpu;
+ pid = ptq->guest_pid;
+ tid = ptq->guest_tid;
+ }
+
+ return intel_pt_synth_error(pt, state->err, ptq->cpu, pid, tid,
+ state->from_ip, tm, machine_pid, vcpu);
}
static int intel_pt_next_tid(struct intel_pt *pt, struct intel_pt_queue *ptq)
@@ -1966,15 +2792,20 @@ static int intel_pt_sample(struct intel_pt_queue *ptq)
ptq->have_sample = false;
- if (ptq->state->tot_cyc_cnt > ptq->ipc_cyc_cnt) {
- /*
- * Cycle count and instruction count only go together to create
- * a valid IPC ratio when the cycle count changes.
- */
+ if (pt->synth_opts.approx_ipc) {
+ ptq->ipc_insn_cnt = ptq->state->tot_insn_cnt;
+ ptq->ipc_cyc_cnt = ptq->state->cycles;
+ ptq->sample_ipc = true;
+ } else {
ptq->ipc_insn_cnt = ptq->state->tot_insn_cnt;
ptq->ipc_cyc_cnt = ptq->state->tot_cyc_cnt;
+ ptq->sample_ipc = ptq->state->flags & INTEL_PT_SAMPLE_IPC;
}
+ /* Ensure guest code maps are set up */
+ if (symbol_conf.guest_code && (state->from_nr || state->to_nr))
+ intel_pt_get_guest(ptq);
+
/*
* Do PEBS first to allow for the possibility that the PEBS timestamp
* precedes the current timestamp.
@@ -1985,7 +2816,25 @@ static int intel_pt_sample(struct intel_pt_queue *ptq)
return err;
}
+ if (pt->synth_opts.intr_events) {
+ if (state->type & INTEL_PT_EVT) {
+ err = intel_pt_synth_events_sample(ptq);
+ if (err)
+ return err;
+ }
+ if (state->type & INTEL_PT_IFLAG_CHG) {
+ err = intel_pt_synth_iflag_chg_sample(ptq);
+ if (err)
+ return err;
+ }
+ }
+
if (pt->sample_pwr_events) {
+ if (state->type & INTEL_PT_PSB_EVT) {
+ err = intel_pt_synth_psb_sample(ptq);
+ if (err)
+ return err;
+ }
if (ptq->state->cbr != ptq->cbr_seen) {
err = intel_pt_synth_cbr_sample(ptq);
if (err)
@@ -2015,10 +2864,17 @@ static int intel_pt_sample(struct intel_pt_queue *ptq)
}
}
- if (pt->sample_instructions && (state->type & INTEL_PT_INSTRUCTION)) {
- err = intel_pt_synth_instruction_sample(ptq);
- if (err)
- return err;
+ if (state->type & INTEL_PT_INSTRUCTION) {
+ if (pt->sample_instructions) {
+ err = intel_pt_synth_instruction_sample(ptq);
+ if (err)
+ return err;
+ }
+ if (pt->sample_cycles) {
+ err = intel_pt_synth_cycle_sample(ptq);
+ if (err)
+ return err;
+ }
}
if (pt->sample_transactions && (state->type & INTEL_PT_TRANSACTION)) {
@@ -2047,7 +2903,27 @@ static int intel_pt_sample(struct intel_pt_queue *ptq)
}
if (pt->sample_branches) {
- err = intel_pt_synth_branch_sample(ptq);
+ if (state->from_nr != state->to_nr &&
+ state->from_ip && state->to_ip) {
+ struct intel_pt_state *st = (struct intel_pt_state *)state;
+ u64 to_ip = st->to_ip;
+ u64 from_ip = st->from_ip;
+
+ /*
+ * perf cannot handle having different machines for ip
+ * and addr, so create 2 branches.
+ */
+ st->to_ip = 0;
+ err = intel_pt_synth_branch_sample(ptq);
+ if (err)
+ return err;
+ st->from_ip = 0;
+ st->to_ip = to_ip;
+ err = intel_pt_synth_branch_sample(ptq);
+ st->from_ip = from_ip;
+ } else {
+ err = intel_pt_synth_branch_sample(ptq);
+ }
if (err)
return err;
}
@@ -2100,13 +2976,13 @@ static u64 intel_pt_switch_ip(struct intel_pt *pt, u64 *ptss_ip)
if (map__load(map))
return 0;
- start = dso__first_symbol(map->dso);
+ start = dso__first_symbol(map__dso(map));
for (sym = start; sym; sym = dso__next_symbol(sym)) {
if (sym->binding == STB_GLOBAL &&
!strcmp(sym->name, "__switch_to")) {
- ip = map->unmap_ip(map, sym->start);
- if (ip >= map->start && ip < map->end) {
+ ip = map__unmap_ip(map, sym->start);
+ if (ip >= map__start(map) && ip < map__end(map)) {
switch_ip = ip;
break;
}
@@ -2123,8 +2999,8 @@ static u64 intel_pt_switch_ip(struct intel_pt *pt, u64 *ptss_ip)
for (sym = start; sym; sym = dso__next_symbol(sym)) {
if (!strcmp(sym->name, ptss)) {
- ip = map->unmap_ip(map, sym->start);
- if (ip >= map->start && ip < map->end) {
+ ip = map__unmap_ip(map, sym->start);
+ if (ip >= map__start(map) && ip < map__end(map)) {
*ptss_ip = ip;
break;
}
@@ -2138,6 +3014,9 @@ static void intel_pt_enable_sync_switch(struct intel_pt *pt)
{
unsigned int i;
+ if (pt->sync_switch_not_supported)
+ return;
+
pt->sync_switch = true;
for (i = 0; i < pt->queues.nr_queues; i++) {
@@ -2149,6 +3028,23 @@ static void intel_pt_enable_sync_switch(struct intel_pt *pt)
}
}
+static void intel_pt_disable_sync_switch(struct intel_pt *pt)
+{
+ unsigned int i;
+
+ pt->sync_switch = false;
+
+ for (i = 0; i < pt->queues.nr_queues; i++) {
+ struct auxtrace_queue *queue = &pt->queues.queue_array[i];
+ struct intel_pt_queue *ptq = queue->priv;
+
+ if (ptq) {
+ ptq->sync_switch = false;
+ intel_pt_next_tid(pt, ptq);
+ }
+ }
+}
+
/*
* To filter against time ranges, it is only necessary to look at the next start
* or end time.
@@ -2233,7 +3129,7 @@ static int intel_pt_run_decoder(struct intel_pt_queue *ptq, u64 *timestamp)
if (pt->per_cpu_mmaps &&
(pt->have_sched_switch == 1 || pt->have_sched_switch == 3) &&
!pt->timeless_decoding && intel_pt_tracing_kernel(pt) &&
- !pt->sampling_mode) {
+ !pt->sampling_mode && !pt->synth_opts.vm_time_correlation) {
pt->switch_ip = intel_pt_switch_ip(pt, &pt->ptss_ip);
if (pt->switch_ip) {
intel_pt_log("switch_ip: %"PRIx64" ptss_ip: %"PRIx64"\n",
@@ -2259,6 +3155,7 @@ static int intel_pt_run_decoder(struct intel_pt_queue *ptq, u64 *timestamp)
ptq->sync_switch = false;
intel_pt_next_tid(pt, ptq);
}
+ ptq->timestamp = state->est_timestamp;
if (pt->synth_opts.errors) {
err = intel_ptq_synth_error(ptq, state);
if (err)
@@ -2409,7 +3306,7 @@ static void intel_pt_sample_set_pid_tid_cpu(struct intel_pt_queue *ptq,
if (ptq->pid == -1) {
ptq->thread = machine__find_thread(m, -1, ptq->tid);
if (ptq->thread)
- ptq->pid = ptq->thread->pid_;
+ ptq->pid = thread__pid(ptq->thread);
return;
}
@@ -2441,7 +3338,8 @@ static int intel_pt_process_timeless_sample(struct intel_pt *pt,
static int intel_pt_lost(struct intel_pt *pt, struct perf_sample *sample)
{
return intel_pt_synth_error(pt, INTEL_PT_ERR_LOST, sample->cpu,
- sample->pid, sample->tid, 0, sample->time);
+ sample->pid, sample->tid, 0, sample->time,
+ sample->machine_pid, sample->vcpu);
}
static struct intel_pt_queue *intel_pt_cpu_to_ptq(struct intel_pt *pt, int cpu)
@@ -2517,14 +3415,14 @@ static int intel_pt_sync_switch(struct intel_pt *pt, int cpu, pid_t tid,
return 1;
}
+#ifdef HAVE_LIBTRACEEVENT
static int intel_pt_process_switch(struct intel_pt *pt,
struct perf_sample *sample)
{
- struct evsel *evsel;
pid_t tid;
int cpu, ret;
+ struct evsel *evsel = evlist__id2evsel(pt->session->evlist, sample->id);
- evsel = perf_evlist__id2evsel(pt->session->evlist, sample->id);
if (evsel != pt->switch_evsel)
return 0;
@@ -2541,6 +3439,7 @@ static int intel_pt_process_switch(struct intel_pt *pt,
return machine__set_current_tid(pt->machine, cpu, -1, tid);
}
+#endif /* HAVE_LIBTRACEEVENT */
static int intel_pt_context_switch_in(struct intel_pt *pt,
struct perf_sample *sample)
@@ -2580,6 +3479,33 @@ static int intel_pt_context_switch_in(struct intel_pt *pt,
return machine__set_current_tid(pt->machine, cpu, pid, tid);
}
+static int intel_pt_guest_context_switch(struct intel_pt *pt,
+ union perf_event *event,
+ struct perf_sample *sample)
+{
+ bool out = event->header.misc & PERF_RECORD_MISC_SWITCH_OUT;
+ struct machines *machines = &pt->session->machines;
+ struct machine *machine = machines__find(machines, sample->machine_pid);
+
+ pt->have_guest_sideband = true;
+
+ /*
+ * sync_switch cannot handle guest machines at present, so just disable
+ * it.
+ */
+ pt->sync_switch_not_supported = true;
+ if (pt->sync_switch)
+ intel_pt_disable_sync_switch(pt);
+
+ if (out)
+ return 0;
+
+ if (!machine)
+ return -EINVAL;
+
+ return machine__set_current_tid(machine, sample->vcpu, sample->pid, sample->tid);
+}
+
static int intel_pt_context_switch(struct intel_pt *pt, union perf_event *event,
struct perf_sample *sample)
{
@@ -2587,6 +3513,9 @@ static int intel_pt_context_switch(struct intel_pt *pt, union perf_event *event,
pid_t pid, tid;
int cpu, ret;
+ if (perf_event__is_guest(event))
+ return intel_pt_guest_context_switch(pt, event, sample);
+
cpu = sample->cpu;
if (pt->have_sched_switch == 3) {
@@ -2632,10 +3561,78 @@ static int intel_pt_process_itrace_start(struct intel_pt *pt,
event->itrace_start.tid);
}
+/*
+ * Events with data_src are identified by L1_Hit_Indication
+ * refer https://github.com/intel/perfmon
+ */
+static int intel_pt_data_src_fmt(struct intel_pt *pt, struct evsel *evsel)
+{
+ struct perf_env *env = pt->machine->env;
+ int fmt = DATA_SRC_FORMAT_NA;
+
+ if (!env->cpuid)
+ return DATA_SRC_FORMAT_ERR;
+
+ /*
+ * PEBS-via-PT is only supported on E-core non-hybrid. Of those only
+ * Gracemont and Crestmont have data_src. Check for:
+ * Alderlake N (Gracemont)
+ * Sierra Forest (Crestmont)
+ * Grand Ridge (Crestmont)
+ */
+
+ if (!strncmp(env->cpuid, "GenuineIntel,6,190,", 19))
+ fmt = DATA_SRC_FORMAT_GRT;
+
+ if (!strncmp(env->cpuid, "GenuineIntel,6,175,", 19) ||
+ !strncmp(env->cpuid, "GenuineIntel,6,182,", 19))
+ fmt = DATA_SRC_FORMAT_CMT;
+
+ if (fmt == DATA_SRC_FORMAT_NA)
+ return fmt;
+
+ /*
+ * Only data_src events are:
+ * mem-loads event=0xd0,umask=0x5
+ * mem-stores event=0xd0,umask=0x6
+ */
+ if (evsel->core.attr.type == PERF_TYPE_RAW &&
+ ((evsel->core.attr.config & 0xffff) == 0x5d0 ||
+ (evsel->core.attr.config & 0xffff) == 0x6d0))
+ return fmt;
+
+ return DATA_SRC_FORMAT_NA;
+}
+
+static int intel_pt_process_aux_output_hw_id(struct intel_pt *pt,
+ union perf_event *event,
+ struct perf_sample *sample)
+{
+ u64 hw_id = event->aux_output_hw_id.hw_id;
+ struct auxtrace_queue *queue;
+ struct intel_pt_queue *ptq;
+ struct evsel *evsel;
+
+ queue = auxtrace_queues__sample_queue(&pt->queues, sample, pt->session);
+ evsel = evlist__id2evsel_strict(pt->session->evlist, sample->id);
+ if (!queue || !queue->priv || !evsel || hw_id > INTEL_PT_MAX_PEBS) {
+ pr_err("Bad AUX output hardware ID\n");
+ return -EINVAL;
+ }
+
+ ptq = queue->priv;
+
+ ptq->pebs[hw_id].evsel = evsel;
+ ptq->pebs[hw_id].id = sample->id;
+ ptq->pebs[hw_id].data_src_fmt = intel_pt_data_src_fmt(pt, evsel);
+
+ return 0;
+}
+
static int intel_pt_find_map(struct thread *thread, u8 cpumode, u64 addr,
struct addr_location *al)
{
- if (!al->map || addr < al->map->start || addr >= al->map->end) {
+ if (!al->map || addr < map__start(al->map) || addr >= map__end(al->map)) {
if (!thread__find_map(thread, cpumode, addr, al))
return -1;
}
@@ -2651,27 +3648,32 @@ static int intel_pt_text_poke(struct intel_pt *pt, union perf_event *event)
/* Assume text poke begins in a basic block no more than 4096 bytes */
int cnt = 4096 + event->text_poke.new_len;
struct thread *thread = pt->unknown_thread;
- struct addr_location al = { .map = NULL };
+ struct addr_location al;
struct machine *machine = pt->machine;
struct intel_pt_cache_entry *e;
u64 offset;
+ int ret = 0;
+ addr_location__init(&al);
if (!event->text_poke.new_len)
- return 0;
+ goto out;
for (; cnt; cnt--, addr--) {
+ struct dso *dso;
+
if (intel_pt_find_map(thread, cpumode, addr, &al)) {
if (addr < event->text_poke.addr)
- return 0;
+ goto out;
continue;
}
- if (!al.map->dso || !al.map->dso->auxtrace_cache)
+ dso = map__dso(al.map);
+ if (!dso || !dso__auxtrace_cache(dso))
continue;
- offset = al.map->map_ip(al.map, addr);
+ offset = map__map_ip(al.map, addr);
- e = intel_pt_cache_lookup(al.map->dso, machine, offset);
+ e = intel_pt_cache_lookup(dso, machine, offset);
if (!e)
continue;
@@ -2682,21 +3684,22 @@ static int intel_pt_text_poke(struct intel_pt *pt, union perf_event *event)
* branch instruction before the text poke address.
*/
if (e->branch != INTEL_PT_BR_NO_BRANCH)
- return 0;
+ goto out;
} else {
- intel_pt_cache_invalidate(al.map->dso, machine, offset);
+ intel_pt_cache_invalidate(dso, machine, offset);
intel_pt_log("Invalidated instruction cache for %s at %#"PRIx64"\n",
- al.map->dso->long_name, addr);
+ dso__long_name(dso), addr);
}
}
-
- return 0;
+out:
+ addr_location__exit(&al);
+ return ret;
}
static int intel_pt_process_event(struct perf_session *session,
union perf_event *event,
struct perf_sample *sample,
- struct perf_tool *tool)
+ const struct perf_tool *tool)
{
struct intel_pt *pt = container_of(session->auxtrace, struct intel_pt,
auxtrace);
@@ -2733,6 +3736,8 @@ static int intel_pt_process_event(struct perf_session *session,
sample->time);
}
} else if (timestamp) {
+ if (!pt->first_timestamp)
+ intel_pt_first_timestamp(pt, timestamp);
err = intel_pt_process_queues(pt, timestamp);
}
if (err)
@@ -2753,10 +3758,15 @@ static int intel_pt_process_event(struct perf_session *session,
return err;
}
+#ifdef HAVE_LIBTRACEEVENT
if (pt->switch_evsel && event->header.type == PERF_RECORD_SAMPLE)
err = intel_pt_process_switch(pt, sample);
- else if (event->header.type == PERF_RECORD_ITRACE_START)
+ else
+#endif
+ if (event->header.type == PERF_RECORD_ITRACE_START)
err = intel_pt_process_itrace_start(pt, event, sample);
+ else if (event->header.type == PERF_RECORD_AUX_OUTPUT_HW_ID)
+ err = intel_pt_process_aux_output_hw_id(pt, event, sample);
else if (event->header.type == PERF_RECORD_SWITCH ||
event->header.type == PERF_RECORD_SWITCH_CPU_WIDE)
err = intel_pt_context_switch(pt, event, sample);
@@ -2773,7 +3783,7 @@ static int intel_pt_process_event(struct perf_session *session,
return err;
}
-static int intel_pt_flush(struct perf_session *session, struct perf_tool *tool)
+static int intel_pt_flush(struct perf_session *session, const struct perf_tool *tool)
{
struct intel_pt *pt = container_of(session->auxtrace, struct intel_pt,
auxtrace);
@@ -2819,11 +3829,13 @@ static void intel_pt_free(struct perf_session *session)
auxtrace_heap__free(&pt->heap);
intel_pt_free_events(session);
session->auxtrace = NULL;
+ intel_pt_free_vmcs_info(pt);
thread__put(pt->unknown_thread);
addr_filters__exit(&pt->filts);
zfree(&pt->chain);
zfree(&pt->filter);
zfree(&pt->time_ranges);
+ zfree(&pt->br_stack);
free(pt);
}
@@ -2838,7 +3850,7 @@ static bool intel_pt_evsel_is_auxtrace(struct perf_session *session,
static int intel_pt_process_auxtrace_event(struct perf_session *session,
union perf_event *event,
- struct perf_tool *tool __maybe_unused)
+ const struct perf_tool *tool __maybe_unused)
{
struct intel_pt *pt = container_of(session->auxtrace, struct intel_pt,
auxtrace);
@@ -2897,37 +3909,15 @@ static int intel_pt_queue_data(struct perf_session *session,
data_offset, timestamp);
}
-struct intel_pt_synth {
- struct perf_tool dummy_tool;
- struct perf_session *session;
-};
-
-static int intel_pt_event_synth(struct perf_tool *tool,
- union perf_event *event,
- struct perf_sample *sample __maybe_unused,
- struct machine *machine __maybe_unused)
-{
- struct intel_pt_synth *intel_pt_synth =
- container_of(tool, struct intel_pt_synth, dummy_tool);
-
- return perf_session__deliver_synth_event(intel_pt_synth->session, event,
- NULL);
-}
-
static int intel_pt_synth_event(struct perf_session *session, const char *name,
struct perf_event_attr *attr, u64 id)
{
- struct intel_pt_synth intel_pt_synth;
int err;
pr_debug("Synthesizing '%s' event with id %" PRIu64 " sample type %#" PRIx64 "\n",
name, id, (u64)attr->sample_type);
- memset(&intel_pt_synth, 0, sizeof(struct intel_pt_synth));
- intel_pt_synth.session = session;
-
- err = perf_event__synthesize_attr(&intel_pt_synth.dummy_tool, attr, 1,
- &id, intel_pt_event_synth);
+ err = perf_session__deliver_synth_attr_event(session, attr, id);
if (err)
pr_err("%s: failed to synthesize '%s' event type\n",
__func__, name);
@@ -3043,6 +4033,22 @@ static int intel_pt_synth_events(struct intel_pt *pt,
id += 1;
}
+ if (pt->synth_opts.cycles) {
+ attr.config = PERF_COUNT_HW_CPU_CYCLES;
+ if (pt->synth_opts.period_type == PERF_ITRACE_PERIOD_NANOSECS)
+ attr.sample_period =
+ intel_pt_ns_to_ticks(pt, pt->synth_opts.period);
+ else
+ attr.sample_period = pt->synth_opts.period;
+ err = intel_pt_synth_event(session, "cycles", &attr, id);
+ if (err)
+ return err;
+ pt->sample_cycles = true;
+ pt->cycles_sample_type = attr.sample_type;
+ pt->cycles_id = id;
+ id += 1;
+ }
+
attr.sample_type &= ~(u64)PERF_SAMPLE_PERIOD;
attr.sample_period = 1;
@@ -3084,9 +4090,17 @@ static int intel_pt_synth_events(struct intel_pt *pt,
pt->cbr_id = id;
intel_pt_set_event_name(evlist, id, "cbr");
id += 1;
+
+ attr.config = PERF_SYNTH_INTEL_PSB;
+ err = intel_pt_synth_event(session, "psb", &attr, id);
+ if (err)
+ return err;
+ pt->psb_id = id;
+ intel_pt_set_event_name(evlist, id, "psb");
+ id += 1;
}
- if (pt->synth_opts.pwr_events && (evsel->core.attr.config & 0x10)) {
+ if (pt->synth_opts.pwr_events && (evsel->core.attr.config & INTEL_PT_CFG_PWR_EVT_EN)) {
attr.config = PERF_SYNTH_INTEL_MWAIT;
err = intel_pt_synth_event(session, "mwait", &attr, id);
if (err)
@@ -3120,6 +4134,28 @@ static int intel_pt_synth_events(struct intel_pt *pt,
id += 1;
}
+ if (pt->synth_opts.intr_events && (evsel->core.attr.config & INTEL_PT_CFG_EVT_EN)) {
+ attr.config = PERF_SYNTH_INTEL_EVT;
+ err = intel_pt_synth_event(session, "evt", &attr, id);
+ if (err)
+ return err;
+ pt->evt_sample_type = attr.sample_type;
+ pt->evt_id = id;
+ intel_pt_set_event_name(evlist, id, "evt");
+ id += 1;
+ }
+
+ if (pt->synth_opts.intr_events && pt->cap_event_trace) {
+ attr.config = PERF_SYNTH_INTEL_IFLAG_CHG;
+ err = intel_pt_synth_event(session, "iflag", &attr, id);
+ if (err)
+ return err;
+ pt->iflag_chg_sample_type = attr.sample_type;
+ pt->iflag_chg_id = id;
+ intel_pt_set_event_name(evlist, id, "iflag");
+ id += 1;
+ }
+
return 0;
}
@@ -3132,9 +4168,14 @@ static void intel_pt_setup_pebs_events(struct intel_pt *pt)
evlist__for_each_entry(pt->session->evlist, evsel) {
if (evsel->core.attr.aux_output && evsel->core.id) {
+ if (pt->single_pebs) {
+ pt->single_pebs = false;
+ return;
+ }
+ pt->single_pebs = true;
pt->sample_pebs = true;
+ pt->pebs_data_src_fmt = intel_pt_data_src_fmt(pt, evsel);
pt->pebs_evsel = evsel;
- return;
}
}
}
@@ -3172,6 +4213,9 @@ static int intel_pt_perf_config(const char *var, const char *value, void *data)
if (!strcmp(var, "intel-pt.mispred-all"))
pt->mispred_all = perf_config_bool(var, value);
+ if (!strcmp(var, "intel-pt.max-loops"))
+ perf_config_int(&pt->max_loops, var, value);
+
return 0;
}
@@ -3254,10 +4298,70 @@ static int intel_pt_setup_time_ranges(struct intel_pt *pt,
return 0;
}
+static int intel_pt_parse_vm_tm_corr_arg(struct intel_pt *pt, char **args)
+{
+ struct intel_pt_vmcs_info *vmcs_info;
+ u64 tsc_offset, vmcs;
+ char *p = *args;
+
+ errno = 0;
+
+ p = skip_spaces(p);
+ if (!*p)
+ return 1;
+
+ tsc_offset = strtoull(p, &p, 0);
+ if (errno)
+ return -errno;
+ p = skip_spaces(p);
+ if (*p != ':') {
+ pt->dflt_tsc_offset = tsc_offset;
+ *args = p;
+ return 0;
+ }
+ p += 1;
+ while (1) {
+ vmcs = strtoull(p, &p, 0);
+ if (errno)
+ return -errno;
+ if (!vmcs)
+ return -EINVAL;
+ vmcs_info = intel_pt_findnew_vmcs(&pt->vmcs_info, vmcs, tsc_offset);
+ if (!vmcs_info)
+ return -ENOMEM;
+ p = skip_spaces(p);
+ if (*p != ',')
+ break;
+ p += 1;
+ }
+ *args = p;
+ return 0;
+}
+
+static int intel_pt_parse_vm_tm_corr_args(struct intel_pt *pt)
+{
+ char *args = pt->synth_opts.vm_tm_corr_args;
+ int ret;
+
+ if (!args)
+ return 0;
+
+ do {
+ ret = intel_pt_parse_vm_tm_corr_arg(pt, &args);
+ } while (!ret);
+
+ if (ret < 0) {
+ pr_err("Failed to parse VM Time Correlation options\n");
+ return ret;
+ }
+
+ return 0;
+}
+
static const char * const intel_pt_info_fmts[] = {
[INTEL_PT_PMU_TYPE] = " PMU Type %"PRId64"\n",
[INTEL_PT_TIME_SHIFT] = " Time Shift %"PRIu64"\n",
- [INTEL_PT_TIME_MULT] = " Time Muliplier %"PRIu64"\n",
+ [INTEL_PT_TIME_MULT] = " Time Multiplier %"PRIu64"\n",
[INTEL_PT_TIME_ZERO] = " Time Zero %"PRIu64"\n",
[INTEL_PT_CAP_USER_TIME_ZERO] = " Cap Time Zero %"PRId64"\n",
[INTEL_PT_TSC_BIT] = " TSC bit %#"PRIx64"\n",
@@ -3266,6 +4370,7 @@ static const char * const intel_pt_info_fmts[] = {
[INTEL_PT_SNAPSHOT_MODE] = " Snapshot mode %"PRId64"\n",
[INTEL_PT_PER_CPU_MMAPS] = " Per-cpu maps %"PRId64"\n",
[INTEL_PT_MTC_BIT] = " MTC bit %#"PRIx64"\n",
+ [INTEL_PT_MTC_FREQ_BITS] = " MTC freq bits %#"PRIx64"\n",
[INTEL_PT_TSC_CTC_N] = " TSC:CTC numerator %"PRIu64"\n",
[INTEL_PT_TSC_CTC_D] = " TSC:CTC denominator %"PRIu64"\n",
[INTEL_PT_CYC_BIT] = " CYC bit %#"PRIx64"\n",
@@ -3280,8 +4385,12 @@ static void intel_pt_print_info(__u64 *arr, int start, int finish)
if (!dump_trace)
return;
- for (i = start; i <= finish; i++)
- fprintf(stdout, intel_pt_info_fmts[i], arr[i]);
+ for (i = start; i <= finish; i++) {
+ const char *fmt = intel_pt_info_fmts[i];
+
+ if (fmt)
+ fprintf(stdout, fmt, arr[i]);
+ }
}
static void intel_pt_print_info_str(const char *name, const char *str)
@@ -3316,6 +4425,8 @@ int intel_pt_process_auxtrace_info(union perf_event *event,
if (!pt)
return -ENOMEM;
+ pt->vmcs_info = RB_ROOT;
+
addr_filters__init(&pt->filts);
err = perf_config(intel_pt_perf_config, pt);
@@ -3326,7 +4437,22 @@ int intel_pt_process_auxtrace_info(union perf_event *event,
if (err)
goto err_free;
- intel_pt_log_set_name(INTEL_PT_PMU_NAME);
+ if (session->itrace_synth_opts->set) {
+ pt->synth_opts = *session->itrace_synth_opts;
+ } else {
+ struct itrace_synth_opts *opts = session->itrace_synth_opts;
+
+ itrace_synth_opts__set_default(&pt->synth_opts, opts->default_no_sample);
+ if (!opts->default_no_sample && !opts->inject) {
+ pt->synth_opts.branches = false;
+ pt->synth_opts.callchain = true;
+ pt->synth_opts.add_callchain = true;
+ }
+ pt->synth_opts.thread_stack = opts->thread_stack;
+ }
+
+ if (!(pt->synth_opts.log_plus_flags & AUXTRACE_LOG_FLG_USE_STDOUT))
+ intel_pt_log_set_name(INTEL_PT_PMU_NAME);
pt->session = session;
pt->machine = &session->machines.host; /* No kvm support */
@@ -3363,7 +4489,7 @@ int intel_pt_process_auxtrace_info(union perf_event *event,
}
info = &auxtrace_info->priv[INTEL_PT_FILTER_STR_LEN] + 1;
- info_end = (void *)info + auxtrace_info->header.size;
+ info_end = (void *)auxtrace_info + auxtrace_info->header.size;
if (intel_pt_has(auxtrace_info, INTEL_PT_FILTER_STR_LEN)) {
size_t len;
@@ -3402,6 +4528,13 @@ int intel_pt_process_auxtrace_info(union perf_event *event,
intel_pt_print_info_str("Filter string", pt->filter);
}
+ if ((void *)info < info_end) {
+ pt->cap_event_trace = *info++;
+ if (dump_trace)
+ fprintf(stdout, " Cap Event Trace %d\n",
+ pt->cap_event_trace);
+ }
+
pt->timeless_decoding = intel_pt_timeless_decoding(pt);
if (pt->timeless_decoding && !pt->tc.time_mult)
pt->tc.time_mult = 1;
@@ -3409,20 +4542,34 @@ int intel_pt_process_auxtrace_info(union perf_event *event,
pt->sampling_mode = intel_pt_sampling_mode(pt);
pt->est_tsc = !pt->timeless_decoding;
+ if (pt->synth_opts.vm_time_correlation) {
+ if (pt->timeless_decoding) {
+ pr_err("Intel PT has no time information for VM Time Correlation\n");
+ err = -EINVAL;
+ goto err_free_queues;
+ }
+ if (session->itrace_synth_opts->ptime_range) {
+ pr_err("Time ranges cannot be specified with VM Time Correlation\n");
+ err = -EINVAL;
+ goto err_free_queues;
+ }
+ /* Currently TSC Offset is calculated using MTC packets */
+ if (!intel_pt_have_mtc(pt)) {
+ pr_err("MTC packets must have been enabled for VM Time Correlation\n");
+ err = -EINVAL;
+ goto err_free_queues;
+ }
+ err = intel_pt_parse_vm_tm_corr_args(pt);
+ if (err)
+ goto err_free_queues;
+ }
+
pt->unknown_thread = thread__new(999999999, 999999999);
if (!pt->unknown_thread) {
err = -ENOMEM;
goto err_free_queues;
}
- /*
- * Since this thread will not be kept in any rbtree not in a
- * list, initialize its list node so that at thread__put() the
- * current thread lifetime assuption is kept and we don't segfault
- * at list_del_init().
- */
- INIT_LIST_HEAD(&pt->unknown_thread->node);
-
err = thread__set_comm(pt->unknown_thread, "unknown", 0);
if (err)
goto err_delete_thread;
@@ -3458,23 +4605,12 @@ int intel_pt_process_auxtrace_info(union perf_event *event,
goto err_delete_thread;
}
- if (session->itrace_synth_opts->set) {
- pt->synth_opts = *session->itrace_synth_opts;
- } else {
- itrace_synth_opts__set_default(&pt->synth_opts,
- session->itrace_synth_opts->default_no_sample);
- if (!session->itrace_synth_opts->default_no_sample &&
- !session->itrace_synth_opts->inject) {
- pt->synth_opts.branches = false;
- pt->synth_opts.callchain = true;
- pt->synth_opts.add_callchain = true;
- }
- pt->synth_opts.thread_stack =
- session->itrace_synth_opts->thread_stack;
- }
+ if (pt->synth_opts.log) {
+ bool log_on_error = pt->synth_opts.log_plus_flags & AUXTRACE_LOG_FLG_ON_ERROR;
+ unsigned int log_on_error_size = pt->synth_opts.log_on_error_size;
- if (pt->synth_opts.log)
- intel_pt_log_enable();
+ intel_pt_log_enable(log_on_error, log_on_error_size);
+ }
/* Maximum non-turbo ratio is TSC freq / 100 MHz */
if (pt->tc.time_mult) {
@@ -3554,6 +4690,12 @@ int intel_pt_process_auxtrace_info(union perf_event *event,
intel_pt_setup_pebs_events(pt);
+ if (perf_data__is_pipe(session->data)) {
+ pr_warning("WARNING: Intel PT with pipe mode is not recommended.\n"
+ " The output cannot relied upon. In particular,\n"
+ " timestamps and the order of events may be incorrect.\n");
+ }
+
if (pt->sampling_mode || list_empty(&session->auxtrace_index))
err = auxtrace_queue_data(session, true, true);
else