diff options
Diffstat (limited to '')
-rw-r--r-- | tools/perf/util/intel-pt.c | 1528 |
1 files changed, 1335 insertions, 193 deletions
diff --git a/tools/perf/util/intel-pt.c b/tools/perf/util/intel-pt.c index 3a0348caec7d..9b1011fe4826 100644 --- a/tools/perf/util/intel-pt.c +++ b/tools/perf/util/intel-pt.c @@ -5,6 +5,7 @@ */ #include <inttypes.h> +#include <linux/perf_event.h> #include <stdio.h> #include <stdbool.h> #include <errno.h> @@ -46,6 +47,12 @@ #define MAX_TIMESTAMP (~0ULL) +#define INTEL_PT_CFG_PASS_THRU BIT_ULL(0) +#define INTEL_PT_CFG_PWR_EVT_EN BIT_ULL(4) +#define INTEL_PT_CFG_BRANCH_EN BIT_ULL(13) +#define INTEL_PT_CFG_EVT_EN BIT_ULL(31) +#define INTEL_PT_CFG_TNT_DIS BIT_ULL(55) + struct range { u64 start; u64 end; @@ -68,9 +75,12 @@ struct intel_pt { bool data_queued; bool est_tsc; bool sync_switch; + bool sync_switch_not_supported; bool mispred_all; bool use_thread_stack; bool callstack; + bool cap_event_trace; + bool have_guest_sideband; unsigned int br_stack_sz; unsigned int br_stack_sz_plus; int have_sched_switch; @@ -78,6 +88,7 @@ struct intel_pt { u64 kernel_start; u64 switch_ip; u64 ptss_ip; + u64 first_timestamp; struct perf_tsc_conversion tc; bool cap_user_time_zero; @@ -88,6 +99,10 @@ struct intel_pt { u64 instructions_sample_type; u64 instructions_id; + bool sample_cycles; + u64 cycles_sample_type; + u64 cycles_id; + bool sample_branches; u32 branches_filter; u64 branches_sample_type; @@ -108,10 +123,19 @@ struct intel_pt { u64 exstop_id; u64 pwrx_id; u64 cbr_id; + u64 psb_id; + bool single_pebs; bool sample_pebs; + int pebs_data_src_fmt; struct evsel *pebs_evsel; + u64 evt_sample_type; + u64 evt_id; + + u64 iflag_chg_sample_type; + u64 iflag_chg_id; + u64 tsc_bit; u64 mtc_bit; u64 mtc_freq_bits; @@ -121,6 +145,7 @@ struct intel_pt { u64 noretcomp_bit; unsigned max_non_turbo_ratio; unsigned cbr2khz; + int max_loops; unsigned long num_events; @@ -132,6 +157,9 @@ struct intel_pt { struct ip_callchain *chain; struct branch_stack *br_stack; + + u64 dflt_tsc_offset; + struct rb_root vmcs_info; }; enum switch_state { @@ -142,6 +170,15 @@ enum switch_state { INTEL_PT_SS_EXPECTING_SWITCH_IP, }; +/* applicable_counters is 64-bits */ +#define INTEL_PT_MAX_PEBS 64 + +struct intel_pt_pebs_event { + struct evsel *evsel; + u64 id; + int data_src_fmt; +}; + struct intel_pt_queue { struct intel_pt *pt; unsigned int queue_nr; @@ -157,11 +194,19 @@ struct intel_pt_queue { bool step_through_buffers; bool use_buffer_pid_tid; bool sync_switch; + bool sample_ipc; pid_t pid, tid; int cpu; int switch_state; pid_t next_tid; struct thread *thread; + struct machine *guest_machine; + struct thread *guest_thread; + struct thread *unknown_guest_thread; + pid_t guest_machine_pid; + pid_t guest_pid; + pid_t guest_tid; + int vcpu; bool exclude_kernel; bool have_sample; u64 time; @@ -176,10 +221,13 @@ struct intel_pt_queue { u64 ipc_cyc_cnt; u64 last_in_insn_cnt; u64 last_in_cyc_cnt; + u64 last_cy_insn_cnt; + u64 last_cy_cyc_cnt; u64 last_br_insn_cnt; u64 last_br_cyc_cnt; unsigned int cbr_seen; char insn[INTEL_PT_INSN_BUF_SZ]; + struct intel_pt_pebs_event pebs[INTEL_PT_MAX_PEBS]; }; static void intel_pt_dump(struct intel_pt *pt __maybe_unused, @@ -203,7 +251,7 @@ static void intel_pt_dump(struct intel_pt *pt __maybe_unused, else pkt_len = 1; printf("."); - color_fprintf(stdout, color, " %08x: ", pos); + color_fprintf(stdout, color, " %08zx: ", pos); for (i = 0; i < pkt_len; i++) color_fprintf(stdout, color, " %02x", buf[i]); for (; i < 16; i++) @@ -267,6 +315,65 @@ static bool intel_pt_log_events(struct intel_pt *pt, u64 tm) return !n || !perf_time__ranges_skip_sample(range, n, tm); } +static struct intel_pt_vmcs_info *intel_pt_findnew_vmcs(struct rb_root *rb_root, + u64 vmcs, + u64 dflt_tsc_offset) +{ + struct rb_node **p = &rb_root->rb_node; + struct rb_node *parent = NULL; + struct intel_pt_vmcs_info *v; + + while (*p) { + parent = *p; + v = rb_entry(parent, struct intel_pt_vmcs_info, rb_node); + + if (v->vmcs == vmcs) + return v; + + if (vmcs < v->vmcs) + p = &(*p)->rb_left; + else + p = &(*p)->rb_right; + } + + v = zalloc(sizeof(*v)); + if (v) { + v->vmcs = vmcs; + v->tsc_offset = dflt_tsc_offset; + v->reliable = dflt_tsc_offset; + + rb_link_node(&v->rb_node, parent, p); + rb_insert_color(&v->rb_node, rb_root); + } + + return v; +} + +static struct intel_pt_vmcs_info *intel_pt_findnew_vmcs_info(void *data, uint64_t vmcs) +{ + struct intel_pt_queue *ptq = data; + struct intel_pt *pt = ptq->pt; + + if (!vmcs && !pt->dflt_tsc_offset) + return NULL; + + return intel_pt_findnew_vmcs(&pt->vmcs_info, vmcs, pt->dflt_tsc_offset); +} + +static void intel_pt_free_vmcs_info(struct intel_pt *pt) +{ + struct intel_pt_vmcs_info *v; + struct rb_node *n; + + n = rb_first(&pt->vmcs_info); + while (n) { + v = rb_entry(n, struct intel_pt_vmcs_info, rb_node); + n = rb_next(n); + rb_erase(&v->rb_node, &pt->vmcs_info); + free(v); + } +} + static int intel_pt_do_fix_overlap(struct intel_pt *pt, struct auxtrace_buffer *a, struct auxtrace_buffer *b) { @@ -274,9 +381,17 @@ static int intel_pt_do_fix_overlap(struct intel_pt *pt, struct auxtrace_buffer * void *start; start = intel_pt_find_overlap(a->data, a->size, b->data, b->size, - pt->have_tsc, &consecutive); + pt->have_tsc, &consecutive, + pt->synth_opts.vm_time_correlation); if (!start) return -EINVAL; + /* + * In the case of vm_time_correlation, the overlap might contain TSC + * packets that will not be fixed, and that will then no longer work for + * overlap detection. Avoid that by zeroing out the overlap. + */ + if (pt->synth_opts.vm_time_correlation) + memset(b->data, 0, start - b->data); b->use_size = b->data + b->size - start; b->use_data = start; if (b->use_size && consecutive) @@ -430,6 +545,7 @@ struct intel_pt_cache_entry { u64 byte_cnt; enum intel_pt_insn_op op; enum intel_pt_insn_branch branch; + bool emulated_ptwrite; int length; int32_t rel; char insn[INTEL_PT_INSN_BUF_SZ]; @@ -484,15 +600,15 @@ static struct auxtrace_cache *intel_pt_cache(struct dso *dso, struct auxtrace_cache *c; unsigned int bits; - if (dso->auxtrace_cache) - return dso->auxtrace_cache; + if (dso__auxtrace_cache(dso)) + return dso__auxtrace_cache(dso); bits = intel_pt_cache_size(dso, machine); /* Ignoring cache creation failure */ c = auxtrace_cache__new(bits, sizeof(struct intel_pt_cache_entry), 200); - dso->auxtrace_cache = c; + dso__set_auxtrace_cache(dso, c); return c; } @@ -516,6 +632,7 @@ static int intel_pt_cache_add(struct dso *dso, struct machine *machine, e->byte_cnt = byte_cnt; e->op = intel_pt_insn->op; e->branch = intel_pt_insn->branch; + e->emulated_ptwrite = intel_pt_insn->emulated_ptwrite; e->length = intel_pt_insn->length; e->rel = intel_pt_insn->rel; memcpy(e->insn, intel_pt_insn->buf, INTEL_PT_INSN_BUF_SZ); @@ -535,7 +652,7 @@ intel_pt_cache_lookup(struct dso *dso, struct machine *machine, u64 offset) if (!c) return NULL; - return auxtrace_cache__lookup(dso->auxtrace_cache, offset); + return auxtrace_cache__lookup(dso__auxtrace_cache(dso), offset); } static void intel_pt_cache_invalidate(struct dso *dso, struct machine *machine, @@ -546,16 +663,88 @@ static void intel_pt_cache_invalidate(struct dso *dso, struct machine *machine, if (!c) return; - auxtrace_cache__remove(dso->auxtrace_cache, offset); + auxtrace_cache__remove(dso__auxtrace_cache(dso), offset); } -static inline u8 intel_pt_cpumode(struct intel_pt *pt, uint64_t ip) +static inline bool intel_pt_guest_kernel_ip(uint64_t ip) { - return ip >= pt->kernel_start ? + /* Assumes 64-bit kernel */ + return ip & (1ULL << 63); +} + +static inline u8 intel_pt_nr_cpumode(struct intel_pt_queue *ptq, uint64_t ip, bool nr) +{ + if (nr) { + return intel_pt_guest_kernel_ip(ip) ? + PERF_RECORD_MISC_GUEST_KERNEL : + PERF_RECORD_MISC_GUEST_USER; + } + + return ip >= ptq->pt->kernel_start ? PERF_RECORD_MISC_KERNEL : PERF_RECORD_MISC_USER; } +static inline u8 intel_pt_cpumode(struct intel_pt_queue *ptq, uint64_t from_ip, uint64_t to_ip) +{ + /* No support for non-zero CS base */ + if (from_ip) + return intel_pt_nr_cpumode(ptq, from_ip, ptq->state->from_nr); + return intel_pt_nr_cpumode(ptq, to_ip, ptq->state->to_nr); +} + +static int intel_pt_get_guest(struct intel_pt_queue *ptq) +{ + struct machines *machines = &ptq->pt->session->machines; + struct machine *machine; + pid_t pid = ptq->pid <= 0 ? DEFAULT_GUEST_KERNEL_ID : ptq->pid; + + if (ptq->guest_machine && pid == ptq->guest_machine->pid) + return 0; + + ptq->guest_machine = NULL; + thread__zput(ptq->unknown_guest_thread); + + if (symbol_conf.guest_code) { + thread__zput(ptq->guest_thread); + ptq->guest_thread = machines__findnew_guest_code(machines, pid); + } + + machine = machines__find_guest(machines, pid); + if (!machine) + return -1; + + ptq->unknown_guest_thread = machine__idle_thread(machine); + if (!ptq->unknown_guest_thread) + return -1; + + ptq->guest_machine = machine; + + return 0; +} + +static inline bool intel_pt_jmp_16(struct intel_pt_insn *intel_pt_insn) +{ + return intel_pt_insn->rel == 16 && intel_pt_insn->branch == INTEL_PT_BR_UNCONDITIONAL; +} + +#define PTWRITE_MAGIC "\x0f\x0bperf,ptwrite " +#define PTWRITE_MAGIC_LEN 16 + +static bool intel_pt_emulated_ptwrite(struct dso *dso, struct machine *machine, u64 offset) +{ + unsigned char buf[PTWRITE_MAGIC_LEN]; + ssize_t len; + + len = dso__data_read_offset(dso, machine, offset, buf, PTWRITE_MAGIC_LEN); + if (len == PTWRITE_MAGIC_LEN && !memcmp(buf, PTWRITE_MAGIC, PTWRITE_MAGIC_LEN)) { + intel_pt_log("Emulated ptwrite signature found\n"); + return true; + } + intel_pt_log("Emulated ptwrite signature not found\n"); + return false; +} + static int intel_pt_walk_next_insn(struct intel_pt_insn *intel_pt_insn, uint64_t *insn_cnt_ptr, uint64_t *ip, uint64_t to_ip, uint64_t max_insn_cnt, @@ -567,53 +756,98 @@ static int intel_pt_walk_next_insn(struct intel_pt_insn *intel_pt_insn, struct addr_location al; unsigned char buf[INTEL_PT_INSN_BUF_SZ]; ssize_t len; - int x86_64; + int x86_64, ret = 0; u8 cpumode; u64 offset, start_offset, start_ip; u64 insn_cnt = 0; bool one_map = true; + bool nr; + + addr_location__init(&al); intel_pt_insn->length = 0; + intel_pt_insn->op = INTEL_PT_OP_OTHER; if (to_ip && *ip == to_ip) goto out_no_cache; - cpumode = intel_pt_cpumode(ptq->pt, *ip); + nr = ptq->state->to_nr; + cpumode = intel_pt_nr_cpumode(ptq, *ip, nr); - thread = ptq->thread; - if (!thread) { - if (cpumode != PERF_RECORD_MISC_KERNEL) - return -EINVAL; - thread = ptq->pt->unknown_thread; + if (nr) { + if (ptq->pt->have_guest_sideband) { + if (!ptq->guest_machine || ptq->guest_machine_pid != ptq->pid) { + intel_pt_log("ERROR: guest sideband but no guest machine\n"); + ret = -EINVAL; + goto out_ret; + } + } else if ((!symbol_conf.guest_code && cpumode != PERF_RECORD_MISC_GUEST_KERNEL) || + intel_pt_get_guest(ptq)) { + intel_pt_log("ERROR: no guest machine\n"); + ret = -EINVAL; + goto out_ret; + } + machine = ptq->guest_machine; + thread = ptq->guest_thread; + if (!thread) { + if (cpumode != PERF_RECORD_MISC_GUEST_KERNEL) { + intel_pt_log("ERROR: no guest thread\n"); + ret = -EINVAL; + goto out_ret; + } + thread = ptq->unknown_guest_thread; + } + } else { + thread = ptq->thread; + if (!thread) { + if (cpumode != PERF_RECORD_MISC_KERNEL) { + intel_pt_log("ERROR: no thread\n"); + ret = -EINVAL; + goto out_ret; + } + thread = ptq->pt->unknown_thread; + } } while (1) { - if (!thread__find_map(thread, cpumode, *ip, &al) || !al.map->dso) - return -EINVAL; + struct dso *dso; - if (al.map->dso->data.status == DSO_DATA_STATUS_ERROR && - dso__data_status_seen(al.map->dso, - DSO_DATA_STATUS_SEEN_ITRACE)) - return -ENOENT; + if (!thread__find_map(thread, cpumode, *ip, &al) || !map__dso(al.map)) { + if (al.map) + intel_pt_log("ERROR: thread has no dso for %#" PRIx64 "\n", *ip); + else + intel_pt_log("ERROR: thread has no map for %#" PRIx64 "\n", *ip); + addr_location__exit(&al); + ret = -EINVAL; + goto out_ret; + } + dso = map__dso(al.map); - offset = al.map->map_ip(al.map, *ip); + if (dso__data(dso)->status == DSO_DATA_STATUS_ERROR && + dso__data_status_seen(dso, DSO_DATA_STATUS_SEEN_ITRACE)) { + ret = -ENOENT; + goto out_ret; + } + + offset = map__map_ip(al.map, *ip); if (!to_ip && one_map) { struct intel_pt_cache_entry *e; - e = intel_pt_cache_lookup(al.map->dso, machine, offset); + e = intel_pt_cache_lookup(dso, machine, offset); if (e && (!max_insn_cnt || e->insn_cnt <= max_insn_cnt)) { *insn_cnt_ptr = e->insn_cnt; *ip += e->byte_cnt; intel_pt_insn->op = e->op; intel_pt_insn->branch = e->branch; + intel_pt_insn->emulated_ptwrite = e->emulated_ptwrite; intel_pt_insn->length = e->length; intel_pt_insn->rel = e->rel; - memcpy(intel_pt_insn->buf, e->insn, - INTEL_PT_INSN_BUF_SZ); + memcpy(intel_pt_insn->buf, e->insn, INTEL_PT_INSN_BUF_SZ); intel_pt_log_insn_no_data(intel_pt_insn, *ip); - return 0; + ret = 0; + goto out_ret; } } @@ -623,34 +857,55 @@ static int intel_pt_walk_next_insn(struct intel_pt_insn *intel_pt_insn, /* Load maps to ensure dso->is_64_bit has been updated */ map__load(al.map); - x86_64 = al.map->dso->is_64_bit; + x86_64 = dso__is_64_bit(dso); while (1) { - len = dso__data_read_offset(al.map->dso, machine, + len = dso__data_read_offset(dso, machine, offset, buf, INTEL_PT_INSN_BUF_SZ); - if (len <= 0) - return -EINVAL; + if (len <= 0) { + intel_pt_log("ERROR: failed to read at offset %#" PRIx64 " ", + offset); + if (intel_pt_enable_logging) + dso__fprintf(dso, intel_pt_log_fp()); + ret = -EINVAL; + goto out_ret; + } - if (intel_pt_get_insn(buf, len, x86_64, intel_pt_insn)) - return -EINVAL; + if (intel_pt_get_insn(buf, len, x86_64, intel_pt_insn)) { + ret = -EINVAL; + goto out_ret; + } intel_pt_log_insn(intel_pt_insn, *ip); insn_cnt += 1; - if (intel_pt_insn->branch != INTEL_PT_BR_NO_BRANCH) + if (intel_pt_insn->branch != INTEL_PT_BR_NO_BRANCH) { + bool eptw; + u64 offs; + + if (!intel_pt_jmp_16(intel_pt_insn)) + goto out; + /* Check for emulated ptwrite */ + offs = offset + intel_pt_insn->length; + eptw = intel_pt_emulated_ptwrite(dso, machine, offs); + intel_pt_insn->emulated_ptwrite = eptw; goto out; + } if (max_insn_cnt && insn_cnt >= max_insn_cnt) goto out_no_cache; *ip += intel_pt_insn->length; - if (to_ip && *ip == to_ip) + if (to_ip && *ip == to_ip) { + intel_pt_insn->length = 0; + intel_pt_insn->op = INTEL_PT_OP_OTHER; goto out_no_cache; + } - if (*ip >= al.map->end) + if (*ip >= map__end(al.map)) break; offset += intel_pt_insn->length; @@ -670,19 +925,22 @@ out: if (to_ip) { struct intel_pt_cache_entry *e; - e = intel_pt_cache_lookup(al.map->dso, machine, start_offset); + e = intel_pt_cache_lookup(map__dso(al.map), machine, start_offset); if (e) - return 0; + goto out_ret; } /* Ignore cache errors */ - intel_pt_cache_add(al.map->dso, machine, start_offset, insn_cnt, + intel_pt_cache_add(map__dso(al.map), machine, start_offset, insn_cnt, *ip - start_ip, intel_pt_insn); - return 0; +out_ret: + addr_location__exit(&al); + return ret; out_no_cache: *insn_cnt_ptr = insn_cnt; + addr_location__exit(&al); return 0; } @@ -731,9 +989,16 @@ static int __intel_pt_pgd_ip(uint64_t ip, void *data) struct addr_location al; u8 cpumode; u64 offset; + int res; - if (ip >= ptq->pt->kernel_start) + if (ptq->state->to_nr) { + if (intel_pt_guest_kernel_ip(ip)) + return intel_pt_match_pgd_ip(ptq->pt, ip, ip, NULL); + /* No support for decoding guest user space */ + return -EINVAL; + } else if (ip >= ptq->pt->kernel_start) { return intel_pt_match_pgd_ip(ptq->pt, ip, ip, NULL); + } cpumode = PERF_RECORD_MISC_USER; @@ -741,13 +1006,15 @@ static int __intel_pt_pgd_ip(uint64_t ip, void *data) if (!thread) return -EINVAL; - if (!thread__find_map(thread, cpumode, ip, &al) || !al.map->dso) + addr_location__init(&al); + if (!thread__find_map(thread, cpumode, ip, &al) || !map__dso(al.map)) return -EINVAL; - offset = al.map->map_ip(al.map, ip); + offset = map__map_ip(al.map, ip); - return intel_pt_match_pgd_ip(ptq->pt, ip, offset, - al.map->dso->long_name); + res = intel_pt_match_pgd_ip(ptq->pt, ip, offset, dso__long_name(map__dso(al.map))); + addr_location__exit(&al); + return res; } static bool intel_pt_pgd_ip(uint64_t ip, void *data) @@ -802,12 +1069,26 @@ static bool intel_pt_branch_enable(struct intel_pt *pt) evlist__for_each_entry(pt->session->evlist, evsel) { if (intel_pt_get_config(pt, &evsel->core.attr, &config) && - (config & 1) && !(config & 0x2000)) + (config & INTEL_PT_CFG_PASS_THRU) && + !(config & INTEL_PT_CFG_BRANCH_EN)) return false; } return true; } +static bool intel_pt_disabled_tnt(struct intel_pt *pt) +{ + struct evsel *evsel; + u64 config; + + evlist__for_each_entry(pt->session->evlist, evsel) { + if (intel_pt_get_config(pt, &evsel->core.attr, &config) && + config & INTEL_PT_CFG_TNT_DIS) + return true; + } + return false; +} + static unsigned int intel_pt_mtc_period(struct intel_pt *pt) { struct evsel *evsel; @@ -833,7 +1114,7 @@ static bool intel_pt_timeless_decoding(struct intel_pt *pt) bool timeless_decoding = true; u64 config; - if (!pt->tsc_bit || !pt->cap_user_time_zero) + if (!pt->tsc_bit || !pt->cap_user_time_zero || pt->synth_opts.timeless_decoding) return true; evlist__for_each_entry(pt->session->evlist, evsel) { @@ -881,6 +1162,19 @@ static bool intel_pt_have_tsc(struct intel_pt *pt) return have_tsc; } +static bool intel_pt_have_mtc(struct intel_pt *pt) +{ + struct evsel *evsel; + u64 config; + + evlist__for_each_entry(pt->session->evlist, evsel) { + if (intel_pt_get_config(pt, &evsel->core.attr, &config) && + (config & pt->mtc_bit)) + return true; + } + return false; +} + static bool intel_pt_sampling_mode(struct intel_pt *pt) { struct evsel *evsel; @@ -893,6 +1187,18 @@ static bool intel_pt_sampling_mode(struct intel_pt *pt) return false; } +static u64 intel_pt_ctl(struct intel_pt *pt) +{ + struct evsel *evsel; + u64 config; + + evlist__for_each_entry(pt->session->evlist, evsel) { + if (intel_pt_get_config(pt, &evsel->core.attr, &config)) + return config; + } + return 0; +} + static u64 intel_pt_ns_to_ticks(const struct intel_pt *pt, u64 ns) { u64 quot, rem; @@ -978,6 +1284,7 @@ static void intel_pt_add_br_stack(struct intel_pt *pt, pt->kernel_start); sample->branch_stack = pt->br_stack; + thread__put(thread); } /* INTEL_PT_LBR_0, INTEL_PT_LBR_1 and INTEL_PT_LBR_2 */ @@ -1023,19 +1330,29 @@ static struct intel_pt_queue *intel_pt_alloc_queue(struct intel_pt *pt, params.get_trace = intel_pt_get_trace; params.walk_insn = intel_pt_walk_next_insn; params.lookahead = intel_pt_lookahead; + params.findnew_vmcs_info = intel_pt_findnew_vmcs_info; params.data = ptq; params.return_compression = intel_pt_return_compression(pt); params.branch_enable = intel_pt_branch_enable(pt); + params.ctl = intel_pt_ctl(pt); params.max_non_turbo_ratio = pt->max_non_turbo_ratio; params.mtc_period = intel_pt_mtc_period(pt); params.tsc_ctc_ratio_n = pt->tsc_ctc_ratio_n; params.tsc_ctc_ratio_d = pt->tsc_ctc_ratio_d; params.quick = pt->synth_opts.quick; + params.vm_time_correlation = pt->synth_opts.vm_time_correlation; + params.vm_tm_corr_dry_run = pt->synth_opts.vm_tm_corr_dry_run; + params.first_timestamp = pt->first_timestamp; + params.max_loops = pt->max_loops; + + /* Cannot walk code without TNT, so force 'quick' mode */ + if (params.branch_enable && intel_pt_disabled_tnt(pt) && !params.quick) + params.quick = 1; if (pt->filts.cnt > 0) params.pgd_ip = intel_pt_pgd_ip; - if (pt->synth_opts.instructions) { + if (pt->synth_opts.instructions || pt->synth_opts.cycles) { if (pt->synth_opts.period) { switch (pt->synth_opts.period_type) { case PERF_ITRACE_PERIOD_INSTRUCTIONS: @@ -1087,6 +1404,8 @@ static void intel_pt_free_queue(void *priv) if (!ptq) return; thread__zput(ptq->thread); + thread__zput(ptq->guest_thread); + thread__zput(ptq->unknown_guest_thread); intel_pt_decoder_free(ptq->decoder); zfree(&ptq->event_buf); zfree(&ptq->last_branch); @@ -1094,6 +1413,70 @@ static void intel_pt_free_queue(void *priv) free(ptq); } +static void intel_pt_first_timestamp(struct intel_pt *pt, u64 timestamp) +{ + unsigned int i; + + pt->first_timestamp = timestamp; + + for (i = 0; i < pt->queues.nr_queues; i++) { + struct auxtrace_queue *queue = &pt->queues.queue_array[i]; + struct intel_pt_queue *ptq = queue->priv; + + if (ptq && ptq->decoder) + intel_pt_set_first_timestamp(ptq->decoder, timestamp); + } +} + +static int intel_pt_get_guest_from_sideband(struct intel_pt_queue *ptq) +{ + struct machines *machines = &ptq->pt->session->machines; + struct machine *machine; + pid_t machine_pid = ptq->pid; + pid_t tid; + int vcpu; + + if (machine_pid <= 0) + return 0; /* Not a guest machine */ + + machine = machines__find(machines, machine_pid); + if (!machine) + return 0; /* Not a guest machine */ + + if (ptq->guest_machine != machine) { + ptq->guest_machine = NULL; + thread__zput(ptq->guest_thread); + thread__zput(ptq->unknown_guest_thread); + + ptq->unknown_guest_thread = machine__find_thread(machine, 0, 0); + if (!ptq->unknown_guest_thread) + return -1; + ptq->guest_machine = machine; + } + + vcpu = ptq->thread ? thread__guest_cpu(ptq->thread) : -1; + if (vcpu < 0) + return -1; + + tid = machine__get_current_tid(machine, vcpu); + + if (ptq->guest_thread && thread__tid(ptq->guest_thread) != tid) + thread__zput(ptq->guest_thread); + + if (!ptq->guest_thread) { + ptq->guest_thread = machine__find_thread(machine, -1, tid); + if (!ptq->guest_thread) + return -1; + } + + ptq->guest_machine_pid = machine_pid; + ptq->guest_pid = thread__pid(ptq->guest_thread); + ptq->guest_tid = tid; + ptq->vcpu = vcpu; + + return 0; +} + static void intel_pt_set_pid_tid_cpu(struct intel_pt *pt, struct auxtrace_queue *queue) { @@ -1110,25 +1493,39 @@ static void intel_pt_set_pid_tid_cpu(struct intel_pt *pt, ptq->thread = machine__find_thread(pt->machine, -1, ptq->tid); if (ptq->thread) { - ptq->pid = ptq->thread->pid_; + ptq->pid = thread__pid(ptq->thread); if (queue->cpu == -1) - ptq->cpu = ptq->thread->cpu; + ptq->cpu = thread__cpu(ptq->thread); + } + + if (pt->have_guest_sideband && intel_pt_get_guest_from_sideband(ptq)) { + ptq->guest_machine_pid = 0; + ptq->guest_pid = -1; + ptq->guest_tid = -1; + ptq->vcpu = -1; } } static void intel_pt_sample_flags(struct intel_pt_queue *ptq) { + struct intel_pt *pt = ptq->pt; + + ptq->insn_len = 0; if (ptq->state->flags & INTEL_PT_ABORT_TX) { ptq->flags = PERF_IP_FLAG_BRANCH | PERF_IP_FLAG_TX_ABORT; } else if (ptq->state->flags & INTEL_PT_ASYNC) { - if (ptq->state->to_ip) + if (!ptq->state->to_ip) + ptq->flags = PERF_IP_FLAG_BRANCH | + PERF_IP_FLAG_ASYNC | + PERF_IP_FLAG_TRACE_END; + else if (ptq->state->from_nr && !ptq->state->to_nr) ptq->flags = PERF_IP_FLAG_BRANCH | PERF_IP_FLAG_CALL | PERF_IP_FLAG_ASYNC | - PERF_IP_FLAG_INTERRUPT; + PERF_IP_FLAG_VMEXIT; else - ptq->flags = PERF_IP_FLAG_BRANCH | - PERF_IP_FLAG_TRACE_END; - ptq->insn_len = 0; + ptq->flags = PERF_IP_FLAG_BRANCH | PERF_IP_FLAG_CALL | + PERF_IP_FLAG_ASYNC | + PERF_IP_FLAG_INTERRUPT; } else { if (ptq->state->from_ip) ptq->flags = intel_pt_insn_type(ptq->state->insn_op); @@ -1145,6 +1542,17 @@ static void intel_pt_sample_flags(struct intel_pt_queue *ptq) ptq->flags |= PERF_IP_FLAG_TRACE_BEGIN; if (ptq->state->type & INTEL_PT_TRACE_END) ptq->flags |= PERF_IP_FLAG_TRACE_END; + + if (pt->cap_event_trace) { + if (ptq->state->type & INTEL_PT_IFLAG_CHG) { + if (!ptq->state->from_iflag) + ptq->flags |= PERF_IP_FLAG_INTR_DISABLE; + if (ptq->state->from_iflag != ptq->state->to_iflag) + ptq->flags |= PERF_IP_FLAG_INTR_TOGGLE; + } else if (!ptq->state->to_iflag) { + ptq->flags |= PERF_IP_FLAG_INTR_DISABLE; + } + } } static void intel_pt_setup_time_range(struct intel_pt *pt, @@ -1285,6 +1693,17 @@ static void intel_pt_prep_a_sample(struct intel_pt_queue *ptq, sample->pid = ptq->pid; sample->tid = ptq->tid; + + if (ptq->pt->have_guest_sideband) { + if ((ptq->state->from_ip && ptq->state->from_nr) || + (ptq->state->to_ip && ptq->state->to_nr)) { + sample->pid = ptq->guest_pid; + sample->tid = ptq->guest_tid; + sample->machine_pid = ptq->guest_machine_pid; + sample->vcpu = ptq->vcpu; + } + } + sample->cpu = ptq->cpu; sample->insn_len = ptq->insn_len; memcpy(sample->insn, ptq->insn, INTEL_PT_INSN_BUF_SZ); @@ -1301,8 +1720,8 @@ static void intel_pt_prep_b_sample(struct intel_pt *pt, sample->time = tsc_to_perf_time(ptq->timestamp, &pt->tc); sample->ip = ptq->state->from_ip; - sample->cpumode = intel_pt_cpumode(pt, sample->ip); sample->addr = ptq->state->to_ip; + sample->cpumode = intel_pt_cpumode(ptq, sample->ip, sample->addr); sample->period = 1; sample->flags = ptq->flags; @@ -1347,12 +1766,13 @@ static int intel_pt_synth_branch_sample(struct intel_pt_queue *ptq) { struct intel_pt *pt = ptq->pt; union perf_event *event = ptq->event_buf; - struct perf_sample sample = { .ip = 0, }; + struct perf_sample sample; struct dummy_branch_stack { u64 nr; u64 hw_idx; struct branch_entry entries; } dummy_bs; + int ret; if (pt->branches_filter && !(pt->branches_filter & ptq->flags)) return 0; @@ -1360,6 +1780,7 @@ static int intel_pt_synth_branch_sample(struct intel_pt_queue *ptq) if (intel_pt_skip_event(pt)) return 0; + perf_sample__init(&sample, /*all=*/true); intel_pt_prep_b_sample(pt, ptq, event, &sample); sample.id = ptq->pt->branches_id; @@ -1381,15 +1802,18 @@ static int intel_pt_synth_branch_sample(struct intel_pt_queue *ptq) sample.branch_stack = (struct branch_stack *)&dummy_bs; } - sample.cyc_cnt = ptq->ipc_cyc_cnt - ptq->last_br_cyc_cnt; + if (ptq->sample_ipc) + sample.cyc_cnt = ptq->ipc_cyc_cnt - ptq->last_br_cyc_cnt; if (sample.cyc_cnt) { sample.insn_cnt = ptq->ipc_insn_cnt - ptq->last_br_insn_cnt; ptq->last_br_insn_cnt = ptq->ipc_insn_cnt; ptq->last_br_cyc_cnt = ptq->ipc_cyc_cnt; } - return intel_pt_deliver_synth_event(pt, event, &sample, + perf_sample__exit(&sample); + ret = intel_pt_deliver_synth_event(pt, event, &sample, pt->branches_sample_type); + return ret; } static void intel_pt_prep_sample(struct intel_pt *pt, @@ -1417,11 +1841,13 @@ static int intel_pt_synth_instruction_sample(struct intel_pt_queue *ptq) { struct intel_pt *pt = ptq->pt; union perf_event *event = ptq->event_buf; - struct perf_sample sample = { .ip = 0, }; + struct perf_sample sample; + int ret; if (intel_pt_skip_event(pt)) return 0; + perf_sample__init(&sample, /*all=*/true); intel_pt_prep_sample(pt, ptq, event, &sample); sample.id = ptq->pt->instructions_id; @@ -1431,7 +1857,8 @@ static int intel_pt_synth_instruction_sample(struct intel_pt_queue *ptq) else sample.period = ptq->state->tot_insn_cnt - ptq->last_insn_cnt; - sample.cyc_cnt = ptq->ipc_cyc_cnt - ptq->last_in_cyc_cnt; + if (ptq->sample_ipc) + sample.cyc_cnt = ptq->ipc_cyc_cnt - ptq->last_in_cyc_cnt; if (sample.cyc_cnt) { sample.insn_cnt = ptq->ipc_insn_cnt - ptq->last_in_insn_cnt; ptq->last_in_insn_cnt = ptq->ipc_insn_cnt; @@ -1440,26 +1867,63 @@ static int intel_pt_synth_instruction_sample(struct intel_pt_queue *ptq) ptq->last_insn_cnt = ptq->state->tot_insn_cnt; - return intel_pt_deliver_synth_event(pt, event, &sample, - pt->instructions_sample_type); + ret = intel_pt_deliver_synth_event(pt, event, &sample, + pt->instructions_sample_type); + perf_sample__exit(&sample); + return ret; +} + +static int intel_pt_synth_cycle_sample(struct intel_pt_queue *ptq) +{ + struct intel_pt *pt = ptq->pt; + union perf_event *event = ptq->event_buf; + struct perf_sample sample; + u64 period = 0; + int ret; + + if (ptq->sample_ipc) + period = ptq->ipc_cyc_cnt - ptq->last_cy_cyc_cnt; + + if (!period || intel_pt_skip_event(pt)) + return 0; + + perf_sample__init(&sample, /*all=*/true); + intel_pt_prep_sample(pt, ptq, event, &sample); + + sample.id = ptq->pt->cycles_id; + sample.stream_id = ptq->pt->cycles_id; + sample.period = period; + + sample.cyc_cnt = period; + sample.insn_cnt = ptq->ipc_insn_cnt - ptq->last_cy_insn_cnt; + ptq->last_cy_insn_cnt = ptq->ipc_insn_cnt; + ptq->last_cy_cyc_cnt = ptq->ipc_cyc_cnt; + + ret = intel_pt_deliver_synth_event(pt, event, &sample, pt->cycles_sample_type); + perf_sample__exit(&sample); + return ret; } static int intel_pt_synth_transaction_sample(struct intel_pt_queue *ptq) { struct intel_pt *pt = ptq->pt; union perf_event *event = ptq->event_buf; - struct perf_sample sample = { .ip = 0, }; + struct perf_sample sample; + int ret; if (intel_pt_skip_event(pt)) return 0; + perf_sample__init(&sample, /*all=*/true); intel_pt_prep_sample(pt, ptq, event, &sample); sample.id = ptq->pt->transactions_id; sample.stream_id = ptq->pt->transactions_id; - return intel_pt_deliver_synth_event(pt, event, &sample, - pt->transactions_sample_type); + ret = intel_pt_deliver_synth_event(pt, event, &sample, + pt->transactions_sample_type); + perf_sample__exit(&sample); + return ret; } static void intel_pt_prep_p_sample(struct intel_pt *pt, @@ -1507,15 +1971,17 @@ static int intel_pt_synth_cbr_sample(struct intel_pt_queue *ptq) { struct intel_pt *pt = ptq->pt; union perf_event *event = ptq->event_buf; - struct perf_sample sample = { .ip = 0, }; + struct perf_sample sample; struct perf_synth_intel_cbr raw; u32 flags; + int ret; if (intel_pt_skip_cbr_event(pt)) return 0; ptq->cbr_seen = ptq->state->cbr; + perf_sample__init(&sample, /*all=*/true); intel_pt_prep_p_sample(pt, ptq, event, &sample); sample.id = ptq->pt->cbr_id; @@ -1529,20 +1995,54 @@ static int intel_pt_synth_cbr_sample(struct intel_pt_queue *ptq) sample.raw_size = perf_synth__raw_size(raw); sample.raw_data = perf_synth__raw_data(&raw); - return intel_pt_deliver_synth_event(pt, event, &sample, - pt->pwr_events_sample_type); + ret = intel_pt_deliver_synth_event(pt, event, &sample, + pt->pwr_events_sample_type); + perf_sample__exit(&sample); + return ret; +} + +static int intel_pt_synth_psb_sample(struct intel_pt_queue *ptq) +{ + struct intel_pt *pt = ptq->pt; + union perf_event *event = ptq->event_buf; + struct perf_sample sample; + struct perf_synth_intel_psb raw; + int ret; + + if (intel_pt_skip_event(pt)) + return 0; + + perf_sample__init(&sample, /*all=*/true); + intel_pt_prep_p_sample(pt, ptq, event, &sample); + + sample.id = ptq->pt->psb_id; + sample.stream_id = ptq->pt->psb_id; + sample.flags = 0; + + raw.reserved = 0; + raw.offset = ptq->state->psb_offset; + + sample.raw_size = perf_synth__raw_size(raw); + sample.raw_data = perf_synth__raw_data(&raw); + + ret = intel_pt_deliver_synth_event(pt, event, &sample, + pt->pwr_events_sample_type); + perf_sample__exit(&sample); + return ret; } static int intel_pt_synth_mwait_sample(struct intel_pt_queue *ptq) { struct intel_pt *pt = ptq->pt; union perf_event *event = ptq->event_buf; - struct perf_sample sample = { .ip = 0, }; + struct perf_sample sample; struct perf_synth_intel_mwait raw; + int ret; if (intel_pt_skip_event(pt)) return 0; + perf_sample__init(&sample, /*all=*/true); intel_pt_prep_p_sample(pt, ptq, event, &sample); sample.id = ptq->pt->mwait_id; @@ -1554,20 +2054,24 @@ static int intel_pt_synth_mwait_sample(struct intel_pt_queue *ptq) sample.raw_size = perf_synth__raw_size(raw); sample.raw_data = perf_synth__raw_data(&raw); - return intel_pt_deliver_synth_event(pt, event, &sample, - pt->pwr_events_sample_type); + ret = intel_pt_deliver_synth_event(pt, event, &sample, + pt->pwr_events_sample_type); + perf_sample__exit(&sample); + return ret; } static int intel_pt_synth_pwre_sample(struct intel_pt_queue *ptq) { struct intel_pt *pt = ptq->pt; union perf_event *event = ptq->event_buf; - struct perf_sample sample = { .ip = 0, }; + struct perf_sample sample; struct perf_synth_intel_pwre raw; + int ret; if (intel_pt_skip_event(pt)) return 0; + perf_sample__init(&sample, /*all=*/true); intel_pt_prep_p_sample(pt, ptq, event, &sample); sample.id = ptq->pt->pwre_id; @@ -1579,20 +2083,24 @@ static int intel_pt_synth_pwre_sample(struct intel_pt_queue *ptq) sample.raw_size = perf_synth__raw_size(raw); sample.raw_data = perf_synth__raw_data(&raw); - return intel_pt_deliver_synth_event(pt, event, &sample, - pt->pwr_events_sample_type); + ret = intel_pt_deliver_synth_event(pt, event, &sample, + pt->pwr_events_sample_type); + perf_sample__exit(&sample); + return ret; } static int intel_pt_synth_exstop_sample(struct intel_pt_queue *ptq) { struct intel_pt *pt = ptq->pt; union perf_event *event = ptq->event_buf; - struct perf_sample sample = { .ip = 0, }; + struct perf_sample sample; struct perf_synth_intel_exstop raw; + int ret; if (intel_pt_skip_event(pt)) return 0; + perf_sample__init(&sample, /*all=*/true); intel_pt_prep_p_sample(pt, ptq, event, &sample); sample.id = ptq->pt->exstop_id; @@ -1604,20 +2112,24 @@ static int intel_pt_synth_exstop_sample(struct intel_pt_queue *ptq) sample.raw_size = perf_synth__raw_size(raw); sample.raw_data = perf_synth__raw_data(&raw); - return intel_pt_deliver_synth_event(pt, event, &sample, - pt->pwr_events_sample_type); + ret = intel_pt_deliver_synth_event(pt, event, &sample, + pt->pwr_events_sample_type); + perf_sample__exit(&sample); + return ret; } static int intel_pt_synth_pwrx_sample(struct intel_pt_queue *ptq) { struct intel_pt *pt = ptq->pt; union perf_event *event = ptq->event_buf; - struct perf_sample sample = { .ip = 0, }; + struct perf_sample sample; struct perf_synth_intel_pwrx raw; + int ret; if (intel_pt_skip_event(pt)) return 0; + perf_sample__init(&sample, /*all=*/true); intel_pt_prep_p_sample(pt, ptq, event, &sample); sample.id = ptq->pt->pwrx_id; @@ -1629,8 +2141,10 @@ static int intel_pt_synth_pwrx_sample(struct intel_pt_queue *ptq) sample.raw_size = perf_synth__raw_size(raw); sample.raw_data = perf_synth__raw_data(&raw); - return intel_pt_deliver_synth_event(pt, event, &sample, - pt->pwr_events_sample_type); + ret = intel_pt_deliver_synth_event(pt, event, &sample, + pt->pwr_events_sample_type); + perf_sample__exit(&sample); + return ret; } /* @@ -1760,21 +2274,160 @@ static void intel_pt_add_lbrs(struct branch_stack *br_stack, } } -static int intel_pt_synth_pebs_sample(struct intel_pt_queue *ptq) +#define P(a, b) PERF_MEM_S(a, b) +#define OP_LH (P(OP, LOAD) | P(LVL, HIT)) +#define LEVEL(x) P(LVLNUM, x) +#define REM P(REMOTE, REMOTE) +#define SNOOP_NONE_MISS (P(SNOOP, NONE) | P(SNOOP, MISS)) + +#define PERF_PEBS_DATA_SOURCE_GRT_MAX 0x10 +#define PERF_PEBS_DATA_SOURCE_GRT_MASK (PERF_PEBS_DATA_SOURCE_GRT_MAX - 1) + +/* Based on kernel __intel_pmu_pebs_data_source_grt() and pebs_data_source */ +static const u64 pebs_data_source_grt[PERF_PEBS_DATA_SOURCE_GRT_MAX] = { + P(OP, LOAD) | P(LVL, MISS) | LEVEL(L3) | P(SNOOP, NA), /* L3 miss|SNP N/A */ + OP_LH | P(LVL, L1) | LEVEL(L1) | P(SNOOP, NONE), /* L1 hit|SNP None */ + OP_LH | P(LVL, LFB) | LEVEL(LFB) | P(SNOOP, NONE), /* LFB/MAB hit|SNP None */ + OP_LH | P(LVL, L2) | LEVEL(L2) | P(SNOOP, NONE), /* L2 hit|SNP None */ + OP_LH | P(LVL, L3) | LEVEL(L3) | P(SNOOP, NONE), /* L3 hit|SNP None */ + OP_LH | P(LVL, L3) | LEVEL(L3) | P(SNOOP, HIT), /* L3 hit|SNP Hit */ + OP_LH | P(LVL, L3) | LEVEL(L3) | P(SNOOP, HITM), /* L3 hit|SNP HitM */ + OP_LH | P(LVL, L3) | LEVEL(L3) | P(SNOOP, HITM), /* L3 hit|SNP HitM */ + OP_LH | P(LVL, L3) | LEVEL(L3) | P(SNOOPX, FWD), /* L3 hit|SNP Fwd */ + OP_LH | P(LVL, REM_CCE1) | REM | LEVEL(L3) | P(SNOOP, HITM), /* Remote L3 hit|SNP HitM */ + OP_LH | P(LVL, LOC_RAM) | LEVEL(RAM) | P(SNOOP, HIT), /* RAM hit|SNP Hit */ + OP_LH | P(LVL, REM_RAM1) | REM | LEVEL(L3) | P(SNOOP, HIT), /* Remote L3 hit|SNP Hit */ + OP_LH | P(LVL, LOC_RAM) | LEVEL(RAM) | SNOOP_NONE_MISS, /* RAM hit|SNP None or Miss */ + OP_LH | P(LVL, REM_RAM1) | LEVEL(RAM) | REM | SNOOP_NONE_MISS, /* Remote RAM hit|SNP None or Miss */ + OP_LH | P(LVL, IO) | LEVEL(NA) | P(SNOOP, NONE), /* I/O hit|SNP None */ + OP_LH | P(LVL, UNC) | LEVEL(NA) | P(SNOOP, NONE), /* Uncached hit|SNP None */ +}; + +/* Based on kernel __intel_pmu_pebs_data_source_cmt() and pebs_data_source */ +static const u64 pebs_data_source_cmt[PERF_PEBS_DATA_SOURCE_GRT_MAX] = { + P(OP, LOAD) | P(LVL, MISS) | LEVEL(L3) | P(SNOOP, NA), /* L3 miss|SNP N/A */ + OP_LH | P(LVL, L1) | LEVEL(L1) | P(SNOOP, NONE), /* L1 hit|SNP None */ + OP_LH | P(LVL, LFB) | LEVEL(LFB) | P(SNOOP, NONE), /* LFB/MAB hit|SNP None */ + OP_LH | P(LVL, L2) | LEVEL(L2) | P(SNOOP, NONE), /* L2 hit|SNP None */ + OP_LH | P(LVL, L3) | LEVEL(L3) | P(SNOOP, NONE), /* L3 hit|SNP None */ + OP_LH | P(LVL, L3) | LEVEL(L3) | P(SNOOP, MISS), /* L3 hit|SNP Hit */ + OP_LH | P(LVL, L3) | LEVEL(L3) | P(SNOOP, HIT), /* L3 hit|SNP HitM */ + OP_LH | P(LVL, L3) | LEVEL(L3) | P(SNOOPX, FWD), /* L3 hit|SNP HitM */ + OP_LH | P(LVL, L3) | LEVEL(L3) | P(SNOOP, HITM), /* L3 hit|SNP Fwd */ + OP_LH | P(LVL, REM_CCE1) | REM | LEVEL(L3) | P(SNOOP, HITM), /* Remote L3 hit|SNP HitM */ + OP_LH | P(LVL, LOC_RAM) | LEVEL(RAM) | P(SNOOP, NONE), /* RAM hit|SNP Hit */ + OP_LH | LEVEL(RAM) | REM | P(SNOOP, NONE), /* Remote L3 hit|SNP Hit */ + OP_LH | LEVEL(RAM) | REM | P(SNOOPX, FWD), /* RAM hit|SNP None or Miss */ + OP_LH | LEVEL(RAM) | REM | P(SNOOP, HITM), /* Remote RAM hit|SNP None or Miss */ + OP_LH | P(LVL, IO) | LEVEL(NA) | P(SNOOP, NONE), /* I/O hit|SNP None */ + OP_LH | P(LVL, UNC) | LEVEL(NA) | P(SNOOP, NONE), /* Uncached hit|SNP None */ +}; + +/* Based on kernel pebs_set_tlb_lock() */ +static inline void pebs_set_tlb_lock(u64 *val, bool tlb, bool lock) +{ + /* + * TLB access + * 0 = did not miss 2nd level TLB + * 1 = missed 2nd level TLB + */ + if (tlb) + *val |= P(TLB, MISS) | P(TLB, L2); + else + *val |= P(TLB, HIT) | P(TLB, L1) | P(TLB, L2); + + /* locked prefix */ + if (lock) + *val |= P(LOCK, LOCKED); +} + +/* Based on kernel __grt_latency_data() */ +static u64 intel_pt_grt_latency_data(u8 dse, bool tlb, bool lock, bool blk, + const u64 *pebs_data_source) +{ + u64 val; + + dse &= PERF_PEBS_DATA_SOURCE_GRT_MASK; + val = pebs_data_source[dse]; + + pebs_set_tlb_lock(&val, tlb, lock); + + if (blk) + val |= P(BLK, DATA); + else + val |= P(BLK, NA); + + return val; +} + +/* Default value for data source */ +#define PERF_MEM_NA (PERF_MEM_S(OP, NA) |\ + PERF_MEM_S(LVL, NA) |\ + PERF_MEM_S(SNOOP, NA) |\ + PERF_MEM_S(LOCK, NA) |\ + PERF_MEM_S(TLB, NA) |\ + PERF_MEM_S(LVLNUM, NA)) + +enum DATA_SRC_FORMAT { + DATA_SRC_FORMAT_ERR = -1, + DATA_SRC_FORMAT_NA = 0, + DATA_SRC_FORMAT_GRT = 1, + DATA_SRC_FORMAT_CMT = 2, +}; + +/* Based on kernel grt_latency_data() and cmt_latency_data */ +static u64 intel_pt_get_data_src(u64 mem_aux_info, int data_src_fmt) +{ + switch (data_src_fmt) { + case DATA_SRC_FORMAT_GRT: { + union { + u64 val; + struct { + unsigned int dse:4; + unsigned int locked:1; + unsigned int stlb_miss:1; + unsigned int fwd_blk:1; + unsigned int reserved:25; + }; + } x = {.val = mem_aux_info}; + return intel_pt_grt_latency_data(x.dse, x.stlb_miss, x.locked, x.fwd_blk, + pebs_data_source_grt); + } + case DATA_SRC_FORMAT_CMT: { + union { + u64 val; + struct { + unsigned int dse:5; + unsigned int locked:1; + unsigned int stlb_miss:1; + unsigned int fwd_blk:1; + unsigned int reserved:24; + }; + } x = {.val = mem_aux_info}; + return intel_pt_grt_latency_data(x.dse, x.stlb_miss, x.locked, x.fwd_blk, + pebs_data_source_cmt); + } + default: + return PERF_MEM_NA; + } +} + +static int intel_pt_do_synth_pebs_sample(struct intel_pt_queue *ptq, struct evsel *evsel, + u64 id, int data_src_fmt) { const struct intel_pt_blk_items *items = &ptq->state->items; - struct perf_sample sample = { .ip = 0, }; + struct perf_sample sample; union perf_event *event = ptq->event_buf; struct intel_pt *pt = ptq->pt; - struct evsel *evsel = pt->pebs_evsel; u64 sample_type = evsel->core.attr.sample_type; - u64 id = evsel->core.id[0]; u8 cpumode; - u64 regs[8 * sizeof(sample.intr_regs.mask)]; + u64 regs[8 * sizeof(sample.intr_regs->mask)]; + int ret; if (intel_pt_skip_event(pt)) return 0; + perf_sample__init(&sample, /*all=*/true); intel_pt_prep_a_sample(ptq, event, &sample); sample.id = id; @@ -1791,10 +2444,7 @@ static int intel_pt_synth_pebs_sample(struct intel_pt_queue *ptq) else sample.ip = ptq->state->from_ip; - /* No support for guest mode at this time */ - cpumode = sample.ip < ptq->pt->kernel_start ? - PERF_RECORD_MISC_USER : - PERF_RECORD_MISC_KERNEL; + cpumode = intel_pt_cpumode(ptq, sample.ip, 0); event->sample.header.misc = cpumode | PERF_RECORD_MISC_EXACT_IP; @@ -1824,15 +2474,16 @@ static int intel_pt_synth_pebs_sample(struct intel_pt_queue *ptq) items->mask[INTEL_PT_XMM_POS])) { u64 regs_mask = evsel->core.attr.sample_regs_intr; u64 *pos; + struct regs_dump *intr_regs = perf_sample__intr_regs(&sample); - sample.intr_regs.abi = items->is_32_bit ? + intr_regs->abi = items->is_32_bit ? PERF_SAMPLE_REGS_ABI_32 : PERF_SAMPLE_REGS_ABI_64; - sample.intr_regs.regs = regs; + intr_regs->regs = regs; - pos = intel_pt_add_gp_regs(&sample.intr_regs, regs, items, regs_mask); + pos = intel_pt_add_gp_regs(intr_regs, regs, items, regs_mask); - intel_pt_add_xmm(&sample.intr_regs, pos, items, regs_mask); + intel_pt_add_xmm(intr_regs, pos, items, regs_mask); } if (sample_type & PERF_SAMPLE_BRANCH_STACK) { @@ -1853,19 +2504,48 @@ static int intel_pt_synth_pebs_sample(struct intel_pt_queue *ptq) if (sample_type & PERF_SAMPLE_ADDR && items->has_mem_access_address) sample.addr = items->mem_access_address; - if (sample_type & PERF_SAMPLE_WEIGHT) { + if (sample_type & PERF_SAMPLE_WEIGHT_TYPE) { /* * Refer kernel's setup_pebs_adaptive_sample_data() and * intel_hsw_weight(). */ - if (items->has_mem_access_latency) - sample.weight = items->mem_access_latency; + if (items->has_mem_access_latency) { + u64 weight = items->mem_access_latency >> 32; + + /* + * Starts from SPR, the mem access latency field + * contains both cache latency [47:32] and instruction + * latency [15:0]. The cache latency is the same as the + * mem access latency on previous platforms. + * + * In practice, no memory access could last than 4G + * cycles. Use latency >> 32 to distinguish the + * different format of the mem access latency field. + */ + if (weight > 0) { + sample.weight = weight & 0xffff; + sample.ins_lat = items->mem_access_latency & 0xffff; + } else + sample.weight = items->mem_access_latency; + } if (!sample.weight && items->has_tsx_aux_info) { /* Cycles last block */ sample.weight = (u32)items->tsx_aux_info; } } + if (sample_type & PERF_SAMPLE_DATA_SRC) { + if (items->has_mem_aux_info && data_src_fmt) { + if (data_src_fmt < 0) { + pr_err("Intel PT missing data_src info\n"); + return -1; + } + sample.data_src = intel_pt_get_data_src(items->mem_aux_info, data_src_fmt); + } else { + sample.data_src = PERF_MEM_NA; + } + } + if (sample_type & PERF_SAMPLE_TRANSACTION && items->has_tsx_aux_info) { u64 ax = items->has_rax ? items->rax : 0; /* Refer kernel's intel_hsw_transaction() */ @@ -1877,12 +2557,136 @@ static int intel_pt_synth_pebs_sample(struct intel_pt_queue *ptq) sample.transaction = txn; } - return intel_pt_deliver_synth_event(pt, event, &sample, sample_type); + ret = intel_pt_deliver_synth_event(pt, event, &sample, sample_type); + perf_sample__exit(&sample); + return ret; +} + +static int intel_pt_synth_single_pebs_sample(struct intel_pt_queue *ptq) +{ + struct intel_pt *pt = ptq->pt; + struct evsel *evsel = pt->pebs_evsel; + int data_src_fmt = pt->pebs_data_src_fmt; + u64 id = evsel->core.id[0]; + + return intel_pt_do_synth_pebs_sample(ptq, evsel, id, data_src_fmt); +} + +static int intel_pt_synth_pebs_sample(struct intel_pt_queue *ptq) +{ + const struct intel_pt_blk_items *items = &ptq->state->items; + struct intel_pt_pebs_event *pe; + struct intel_pt *pt = ptq->pt; + int err = -EINVAL; + int hw_id; + + if (!items->has_applicable_counters || !items->applicable_counters) { + if (!pt->single_pebs) + pr_err("PEBS-via-PT record with no applicable_counters\n"); + return intel_pt_synth_single_pebs_sample(ptq); + } + + for_each_set_bit(hw_id, (unsigned long *)&items->applicable_counters, INTEL_PT_MAX_PEBS) { + pe = &ptq->pebs[hw_id]; + if (!pe->evsel) { + if (!pt->single_pebs) + pr_err("PEBS-via-PT record with no matching event, hw_id %d\n", + hw_id); + return intel_pt_synth_single_pebs_sample(ptq); + } + err = intel_pt_do_synth_pebs_sample(ptq, pe->evsel, pe->id, pe->data_src_fmt); + if (err) + return err; + } + + return err; +} + +static int intel_pt_synth_events_sample(struct intel_pt_queue *ptq) +{ + struct intel_pt *pt = ptq->pt; + union perf_event *event = ptq->event_buf; + struct perf_sample sample; + struct { + struct perf_synth_intel_evt cfe; + struct perf_synth_intel_evd evd[INTEL_PT_MAX_EVDS]; + } raw; + int i, ret; + + if (intel_pt_skip_event(pt)) + return 0; + + perf_sample__init(&sample, /*all=*/true); + intel_pt_prep_p_sample(pt, ptq, event, &sample); + + sample.id = ptq->pt->evt_id; + sample.stream_id = ptq->pt->evt_id; + + raw.cfe.type = ptq->state->cfe_type; + raw.cfe.reserved = 0; + raw.cfe.ip = !!(ptq->state->flags & INTEL_PT_FUP_IP); + raw.cfe.vector = ptq->state->cfe_vector; + raw.cfe.evd_cnt = ptq->state->evd_cnt; + + for (i = 0; i < ptq->state->evd_cnt; i++) { + raw.evd[i].et = 0; + raw.evd[i].evd_type = ptq->state->evd[i].type; + raw.evd[i].payload = ptq->state->evd[i].payload; + } + + sample.raw_size = perf_synth__raw_size(raw) + + ptq->state->evd_cnt * sizeof(struct perf_synth_intel_evd); + sample.raw_data = perf_synth__raw_data(&raw); + + ret = intel_pt_deliver_synth_event(pt, event, &sample, + pt->evt_sample_type); + perf_sample__exit(&sample); + return ret; +} + +static int intel_pt_synth_iflag_chg_sample(struct intel_pt_queue *ptq) +{ + struct intel_pt *pt = ptq->pt; + union perf_event *event = ptq->event_buf; + struct perf_sample sample; + struct perf_synth_intel_iflag_chg raw; + int ret; + + if (intel_pt_skip_event(pt)) + return 0; + + perf_sample__init(&sample, /*all=*/true); + intel_pt_prep_p_sample(pt, ptq, event, &sample); + + sample.id = ptq->pt->iflag_chg_id; + sample.stream_id = ptq->pt->iflag_chg_id; + + raw.flags = 0; + raw.iflag = ptq->state->to_iflag; + + if (ptq->state->type & INTEL_PT_BRANCH) { + raw.via_branch = 1; + raw.branch_ip = ptq->state->to_ip; + } else { + sample.addr = 0; + } + sample.flags = ptq->flags; + + sample.raw_size = perf_synth__raw_size(raw); + sample.raw_data = perf_synth__raw_data(&raw); + + ret = intel_pt_deliver_synth_event(pt, event, &sample, + pt->iflag_chg_sample_type); + perf_sample__exit(&sample); + return ret; } static int intel_pt_synth_error(struct intel_pt *pt, int code, int cpu, - pid_t pid, pid_t tid, u64 ip, u64 timestamp) + pid_t pid, pid_t tid, u64 ip, u64 timestamp, + pid_t machine_pid, int vcpu) { + bool dump_log_on_error = pt->synth_opts.log_plus_flags & AUXTRACE_LOG_FLG_ON_ERROR; + bool log_on_stdout = pt->synth_opts.log_plus_flags & AUXTRACE_LOG_FLG_USE_STDOUT; union perf_event event; char msg[MAX_AUXTRACE_ERROR_MSG]; int err; @@ -1898,8 +2702,19 @@ static int intel_pt_synth_error(struct intel_pt *pt, int code, int cpu, intel_pt__strerror(code, msg, MAX_AUXTRACE_ERROR_MSG); - auxtrace_synth_error(&event.auxtrace_error, PERF_AUXTRACE_ERROR_ITRACE, - code, cpu, pid, tid, ip, msg, timestamp); + auxtrace_synth_guest_error(&event.auxtrace_error, PERF_AUXTRACE_ERROR_ITRACE, + code, cpu, pid, tid, ip, msg, timestamp, + machine_pid, vcpu); + + if (intel_pt_enable_logging && !log_on_stdout) { + FILE *fp = intel_pt_log_fp(); + + if (fp) + perf_event__fprintf_auxtrace_error(&event, fp); + } + + if (code != INTEL_PT_ERR_LOST && dump_log_on_error) + intel_pt_log_dump_buf(); err = perf_session__deliver_synth_event(pt->session, &event, NULL); if (err) @@ -1914,11 +2729,22 @@ static int intel_ptq_synth_error(struct intel_pt_queue *ptq, { struct intel_pt *pt = ptq->pt; u64 tm = ptq->timestamp; + pid_t machine_pid = 0; + pid_t pid = ptq->pid; + pid_t tid = ptq->tid; + int vcpu = -1; tm = pt->timeless_decoding ? 0 : tsc_to_perf_time(tm, &pt->tc); - return intel_pt_synth_error(pt, state->err, ptq->cpu, ptq->pid, - ptq->tid, state->from_ip, tm); + if (pt->have_guest_sideband && state->from_nr) { + machine_pid = ptq->guest_machine_pid; + vcpu = ptq->vcpu; + pid = ptq->guest_pid; + tid = ptq->guest_tid; + } + + return intel_pt_synth_error(pt, state->err, ptq->cpu, pid, tid, + state->from_ip, tm, machine_pid, vcpu); } static int intel_pt_next_tid(struct intel_pt *pt, struct intel_pt_queue *ptq) @@ -1966,15 +2792,20 @@ static int intel_pt_sample(struct intel_pt_queue *ptq) ptq->have_sample = false; - if (ptq->state->tot_cyc_cnt > ptq->ipc_cyc_cnt) { - /* - * Cycle count and instruction count only go together to create - * a valid IPC ratio when the cycle count changes. - */ + if (pt->synth_opts.approx_ipc) { + ptq->ipc_insn_cnt = ptq->state->tot_insn_cnt; + ptq->ipc_cyc_cnt = ptq->state->cycles; + ptq->sample_ipc = true; + } else { ptq->ipc_insn_cnt = ptq->state->tot_insn_cnt; ptq->ipc_cyc_cnt = ptq->state->tot_cyc_cnt; + ptq->sample_ipc = ptq->state->flags & INTEL_PT_SAMPLE_IPC; } + /* Ensure guest code maps are set up */ + if (symbol_conf.guest_code && (state->from_nr || state->to_nr)) + intel_pt_get_guest(ptq); + /* * Do PEBS first to allow for the possibility that the PEBS timestamp * precedes the current timestamp. @@ -1985,7 +2816,25 @@ static int intel_pt_sample(struct intel_pt_queue *ptq) return err; } + if (pt->synth_opts.intr_events) { + if (state->type & INTEL_PT_EVT) { + err = intel_pt_synth_events_sample(ptq); + if (err) + return err; + } + if (state->type & INTEL_PT_IFLAG_CHG) { + err = intel_pt_synth_iflag_chg_sample(ptq); + if (err) + return err; + } + } + if (pt->sample_pwr_events) { + if (state->type & INTEL_PT_PSB_EVT) { + err = intel_pt_synth_psb_sample(ptq); + if (err) + return err; + } if (ptq->state->cbr != ptq->cbr_seen) { err = intel_pt_synth_cbr_sample(ptq); if (err) @@ -2015,10 +2864,17 @@ static int intel_pt_sample(struct intel_pt_queue *ptq) } } - if (pt->sample_instructions && (state->type & INTEL_PT_INSTRUCTION)) { - err = intel_pt_synth_instruction_sample(ptq); - if (err) - return err; + if (state->type & INTEL_PT_INSTRUCTION) { + if (pt->sample_instructions) { + err = intel_pt_synth_instruction_sample(ptq); + if (err) + return err; + } + if (pt->sample_cycles) { + err = intel_pt_synth_cycle_sample(ptq); + if (err) + return err; + } } if (pt->sample_transactions && (state->type & INTEL_PT_TRANSACTION)) { @@ -2047,7 +2903,27 @@ static int intel_pt_sample(struct intel_pt_queue *ptq) } if (pt->sample_branches) { - err = intel_pt_synth_branch_sample(ptq); + if (state->from_nr != state->to_nr && + state->from_ip && state->to_ip) { + struct intel_pt_state *st = (struct intel_pt_state *)state; + u64 to_ip = st->to_ip; + u64 from_ip = st->from_ip; + + /* + * perf cannot handle having different machines for ip + * and addr, so create 2 branches. + */ + st->to_ip = 0; + err = intel_pt_synth_branch_sample(ptq); + if (err) + return err; + st->from_ip = 0; + st->to_ip = to_ip; + err = intel_pt_synth_branch_sample(ptq); + st->from_ip = from_ip; + } else { + err = intel_pt_synth_branch_sample(ptq); + } if (err) return err; } @@ -2100,13 +2976,13 @@ static u64 intel_pt_switch_ip(struct intel_pt *pt, u64 *ptss_ip) if (map__load(map)) return 0; - start = dso__first_symbol(map->dso); + start = dso__first_symbol(map__dso(map)); for (sym = start; sym; sym = dso__next_symbol(sym)) { if (sym->binding == STB_GLOBAL && !strcmp(sym->name, "__switch_to")) { - ip = map->unmap_ip(map, sym->start); - if (ip >= map->start && ip < map->end) { + ip = map__unmap_ip(map, sym->start); + if (ip >= map__start(map) && ip < map__end(map)) { switch_ip = ip; break; } @@ -2123,8 +2999,8 @@ static u64 intel_pt_switch_ip(struct intel_pt *pt, u64 *ptss_ip) for (sym = start; sym; sym = dso__next_symbol(sym)) { if (!strcmp(sym->name, ptss)) { - ip = map->unmap_ip(map, sym->start); - if (ip >= map->start && ip < map->end) { + ip = map__unmap_ip(map, sym->start); + if (ip >= map__start(map) && ip < map__end(map)) { *ptss_ip = ip; break; } @@ -2138,6 +3014,9 @@ static void intel_pt_enable_sync_switch(struct intel_pt *pt) { unsigned int i; + if (pt->sync_switch_not_supported) + return; + pt->sync_switch = true; for (i = 0; i < pt->queues.nr_queues; i++) { @@ -2149,6 +3028,23 @@ static void intel_pt_enable_sync_switch(struct intel_pt *pt) } } +static void intel_pt_disable_sync_switch(struct intel_pt *pt) +{ + unsigned int i; + + pt->sync_switch = false; + + for (i = 0; i < pt->queues.nr_queues; i++) { + struct auxtrace_queue *queue = &pt->queues.queue_array[i]; + struct intel_pt_queue *ptq = queue->priv; + + if (ptq) { + ptq->sync_switch = false; + intel_pt_next_tid(pt, ptq); + } + } +} + /* * To filter against time ranges, it is only necessary to look at the next start * or end time. @@ -2233,7 +3129,7 @@ static int intel_pt_run_decoder(struct intel_pt_queue *ptq, u64 *timestamp) if (pt->per_cpu_mmaps && (pt->have_sched_switch == 1 || pt->have_sched_switch == 3) && !pt->timeless_decoding && intel_pt_tracing_kernel(pt) && - !pt->sampling_mode) { + !pt->sampling_mode && !pt->synth_opts.vm_time_correlation) { pt->switch_ip = intel_pt_switch_ip(pt, &pt->ptss_ip); if (pt->switch_ip) { intel_pt_log("switch_ip: %"PRIx64" ptss_ip: %"PRIx64"\n", @@ -2259,6 +3155,7 @@ static int intel_pt_run_decoder(struct intel_pt_queue *ptq, u64 *timestamp) ptq->sync_switch = false; intel_pt_next_tid(pt, ptq); } + ptq->timestamp = state->est_timestamp; if (pt->synth_opts.errors) { err = intel_ptq_synth_error(ptq, state); if (err) @@ -2409,7 +3306,7 @@ static void intel_pt_sample_set_pid_tid_cpu(struct intel_pt_queue *ptq, if (ptq->pid == -1) { ptq->thread = machine__find_thread(m, -1, ptq->tid); if (ptq->thread) - ptq->pid = ptq->thread->pid_; + ptq->pid = thread__pid(ptq->thread); return; } @@ -2441,7 +3338,8 @@ static int intel_pt_process_timeless_sample(struct intel_pt *pt, static int intel_pt_lost(struct intel_pt *pt, struct perf_sample *sample) { return intel_pt_synth_error(pt, INTEL_PT_ERR_LOST, sample->cpu, - sample->pid, sample->tid, 0, sample->time); + sample->pid, sample->tid, 0, sample->time, + sample->machine_pid, sample->vcpu); } static struct intel_pt_queue *intel_pt_cpu_to_ptq(struct intel_pt *pt, int cpu) @@ -2517,14 +3415,14 @@ static int intel_pt_sync_switch(struct intel_pt *pt, int cpu, pid_t tid, return 1; } +#ifdef HAVE_LIBTRACEEVENT static int intel_pt_process_switch(struct intel_pt *pt, struct perf_sample *sample) { - struct evsel *evsel; pid_t tid; int cpu, ret; + struct evsel *evsel = evlist__id2evsel(pt->session->evlist, sample->id); - evsel = perf_evlist__id2evsel(pt->session->evlist, sample->id); if (evsel != pt->switch_evsel) return 0; @@ -2541,6 +3439,7 @@ static int intel_pt_process_switch(struct intel_pt *pt, return machine__set_current_tid(pt->machine, cpu, -1, tid); } +#endif /* HAVE_LIBTRACEEVENT */ static int intel_pt_context_switch_in(struct intel_pt *pt, struct perf_sample *sample) @@ -2580,6 +3479,33 @@ static int intel_pt_context_switch_in(struct intel_pt *pt, return machine__set_current_tid(pt->machine, cpu, pid, tid); } +static int intel_pt_guest_context_switch(struct intel_pt *pt, + union perf_event *event, + struct perf_sample *sample) +{ + bool out = event->header.misc & PERF_RECORD_MISC_SWITCH_OUT; + struct machines *machines = &pt->session->machines; + struct machine *machine = machines__find(machines, sample->machine_pid); + + pt->have_guest_sideband = true; + + /* + * sync_switch cannot handle guest machines at present, so just disable + * it. + */ + pt->sync_switch_not_supported = true; + if (pt->sync_switch) + intel_pt_disable_sync_switch(pt); + + if (out) + return 0; + + if (!machine) + return -EINVAL; + + return machine__set_current_tid(machine, sample->vcpu, sample->pid, sample->tid); +} + static int intel_pt_context_switch(struct intel_pt *pt, union perf_event *event, struct perf_sample *sample) { @@ -2587,6 +3513,9 @@ static int intel_pt_context_switch(struct intel_pt *pt, union perf_event *event, pid_t pid, tid; int cpu, ret; + if (perf_event__is_guest(event)) + return intel_pt_guest_context_switch(pt, event, sample); + cpu = sample->cpu; if (pt->have_sched_switch == 3) { @@ -2632,10 +3561,78 @@ static int intel_pt_process_itrace_start(struct intel_pt *pt, event->itrace_start.tid); } +/* + * Events with data_src are identified by L1_Hit_Indication + * refer https://github.com/intel/perfmon + */ +static int intel_pt_data_src_fmt(struct intel_pt *pt, struct evsel *evsel) +{ + struct perf_env *env = pt->machine->env; + int fmt = DATA_SRC_FORMAT_NA; + + if (!env->cpuid) + return DATA_SRC_FORMAT_ERR; + + /* + * PEBS-via-PT is only supported on E-core non-hybrid. Of those only + * Gracemont and Crestmont have data_src. Check for: + * Alderlake N (Gracemont) + * Sierra Forest (Crestmont) + * Grand Ridge (Crestmont) + */ + + if (!strncmp(env->cpuid, "GenuineIntel,6,190,", 19)) + fmt = DATA_SRC_FORMAT_GRT; + + if (!strncmp(env->cpuid, "GenuineIntel,6,175,", 19) || + !strncmp(env->cpuid, "GenuineIntel,6,182,", 19)) + fmt = DATA_SRC_FORMAT_CMT; + + if (fmt == DATA_SRC_FORMAT_NA) + return fmt; + + /* + * Only data_src events are: + * mem-loads event=0xd0,umask=0x5 + * mem-stores event=0xd0,umask=0x6 + */ + if (evsel->core.attr.type == PERF_TYPE_RAW && + ((evsel->core.attr.config & 0xffff) == 0x5d0 || + (evsel->core.attr.config & 0xffff) == 0x6d0)) + return fmt; + + return DATA_SRC_FORMAT_NA; +} + +static int intel_pt_process_aux_output_hw_id(struct intel_pt *pt, + union perf_event *event, + struct perf_sample *sample) +{ + u64 hw_id = event->aux_output_hw_id.hw_id; + struct auxtrace_queue *queue; + struct intel_pt_queue *ptq; + struct evsel *evsel; + + queue = auxtrace_queues__sample_queue(&pt->queues, sample, pt->session); + evsel = evlist__id2evsel_strict(pt->session->evlist, sample->id); + if (!queue || !queue->priv || !evsel || hw_id > INTEL_PT_MAX_PEBS) { + pr_err("Bad AUX output hardware ID\n"); + return -EINVAL; + } + + ptq = queue->priv; + + ptq->pebs[hw_id].evsel = evsel; + ptq->pebs[hw_id].id = sample->id; + ptq->pebs[hw_id].data_src_fmt = intel_pt_data_src_fmt(pt, evsel); + + return 0; +} + static int intel_pt_find_map(struct thread *thread, u8 cpumode, u64 addr, struct addr_location *al) { - if (!al->map || addr < al->map->start || addr >= al->map->end) { + if (!al->map || addr < map__start(al->map) || addr >= map__end(al->map)) { if (!thread__find_map(thread, cpumode, addr, al)) return -1; } @@ -2651,27 +3648,32 @@ static int intel_pt_text_poke(struct intel_pt *pt, union perf_event *event) /* Assume text poke begins in a basic block no more than 4096 bytes */ int cnt = 4096 + event->text_poke.new_len; struct thread *thread = pt->unknown_thread; - struct addr_location al = { .map = NULL }; + struct addr_location al; struct machine *machine = pt->machine; struct intel_pt_cache_entry *e; u64 offset; + int ret = 0; + addr_location__init(&al); if (!event->text_poke.new_len) - return 0; + goto out; for (; cnt; cnt--, addr--) { + struct dso *dso; + if (intel_pt_find_map(thread, cpumode, addr, &al)) { if (addr < event->text_poke.addr) - return 0; + goto out; continue; } - if (!al.map->dso || !al.map->dso->auxtrace_cache) + dso = map__dso(al.map); + if (!dso || !dso__auxtrace_cache(dso)) continue; - offset = al.map->map_ip(al.map, addr); + offset = map__map_ip(al.map, addr); - e = intel_pt_cache_lookup(al.map->dso, machine, offset); + e = intel_pt_cache_lookup(dso, machine, offset); if (!e) continue; @@ -2682,21 +3684,22 @@ static int intel_pt_text_poke(struct intel_pt *pt, union perf_event *event) * branch instruction before the text poke address. */ if (e->branch != INTEL_PT_BR_NO_BRANCH) - return 0; + goto out; } else { - intel_pt_cache_invalidate(al.map->dso, machine, offset); + intel_pt_cache_invalidate(dso, machine, offset); intel_pt_log("Invalidated instruction cache for %s at %#"PRIx64"\n", - al.map->dso->long_name, addr); + dso__long_name(dso), addr); } } - - return 0; +out: + addr_location__exit(&al); + return ret; } static int intel_pt_process_event(struct perf_session *session, union perf_event *event, struct perf_sample *sample, - struct perf_tool *tool) + const struct perf_tool *tool) { struct intel_pt *pt = container_of(session->auxtrace, struct intel_pt, auxtrace); @@ -2733,6 +3736,8 @@ static int intel_pt_process_event(struct perf_session *session, sample->time); } } else if (timestamp) { + if (!pt->first_timestamp) + intel_pt_first_timestamp(pt, timestamp); err = intel_pt_process_queues(pt, timestamp); } if (err) @@ -2753,10 +3758,15 @@ static int intel_pt_process_event(struct perf_session *session, return err; } +#ifdef HAVE_LIBTRACEEVENT if (pt->switch_evsel && event->header.type == PERF_RECORD_SAMPLE) err = intel_pt_process_switch(pt, sample); - else if (event->header.type == PERF_RECORD_ITRACE_START) + else +#endif + if (event->header.type == PERF_RECORD_ITRACE_START) err = intel_pt_process_itrace_start(pt, event, sample); + else if (event->header.type == PERF_RECORD_AUX_OUTPUT_HW_ID) + err = intel_pt_process_aux_output_hw_id(pt, event, sample); else if (event->header.type == PERF_RECORD_SWITCH || event->header.type == PERF_RECORD_SWITCH_CPU_WIDE) err = intel_pt_context_switch(pt, event, sample); @@ -2773,7 +3783,7 @@ static int intel_pt_process_event(struct perf_session *session, return err; } -static int intel_pt_flush(struct perf_session *session, struct perf_tool *tool) +static int intel_pt_flush(struct perf_session *session, const struct perf_tool *tool) { struct intel_pt *pt = container_of(session->auxtrace, struct intel_pt, auxtrace); @@ -2819,11 +3829,13 @@ static void intel_pt_free(struct perf_session *session) auxtrace_heap__free(&pt->heap); intel_pt_free_events(session); session->auxtrace = NULL; + intel_pt_free_vmcs_info(pt); thread__put(pt->unknown_thread); addr_filters__exit(&pt->filts); zfree(&pt->chain); zfree(&pt->filter); zfree(&pt->time_ranges); + zfree(&pt->br_stack); free(pt); } @@ -2838,7 +3850,7 @@ static bool intel_pt_evsel_is_auxtrace(struct perf_session *session, static int intel_pt_process_auxtrace_event(struct perf_session *session, union perf_event *event, - struct perf_tool *tool __maybe_unused) + const struct perf_tool *tool __maybe_unused) { struct intel_pt *pt = container_of(session->auxtrace, struct intel_pt, auxtrace); @@ -2897,37 +3909,15 @@ static int intel_pt_queue_data(struct perf_session *session, data_offset, timestamp); } -struct intel_pt_synth { - struct perf_tool dummy_tool; - struct perf_session *session; -}; - -static int intel_pt_event_synth(struct perf_tool *tool, - union perf_event *event, - struct perf_sample *sample __maybe_unused, - struct machine *machine __maybe_unused) -{ - struct intel_pt_synth *intel_pt_synth = - container_of(tool, struct intel_pt_synth, dummy_tool); - - return perf_session__deliver_synth_event(intel_pt_synth->session, event, - NULL); -} - static int intel_pt_synth_event(struct perf_session *session, const char *name, struct perf_event_attr *attr, u64 id) { - struct intel_pt_synth intel_pt_synth; int err; pr_debug("Synthesizing '%s' event with id %" PRIu64 " sample type %#" PRIx64 "\n", name, id, (u64)attr->sample_type); - memset(&intel_pt_synth, 0, sizeof(struct intel_pt_synth)); - intel_pt_synth.session = session; - - err = perf_event__synthesize_attr(&intel_pt_synth.dummy_tool, attr, 1, - &id, intel_pt_event_synth); + err = perf_session__deliver_synth_attr_event(session, attr, id); if (err) pr_err("%s: failed to synthesize '%s' event type\n", __func__, name); @@ -3043,6 +4033,22 @@ static int intel_pt_synth_events(struct intel_pt *pt, id += 1; } + if (pt->synth_opts.cycles) { + attr.config = PERF_COUNT_HW_CPU_CYCLES; + if (pt->synth_opts.period_type == PERF_ITRACE_PERIOD_NANOSECS) + attr.sample_period = + intel_pt_ns_to_ticks(pt, pt->synth_opts.period); + else + attr.sample_period = pt->synth_opts.period; + err = intel_pt_synth_event(session, "cycles", &attr, id); + if (err) + return err; + pt->sample_cycles = true; + pt->cycles_sample_type = attr.sample_type; + pt->cycles_id = id; + id += 1; + } + attr.sample_type &= ~(u64)PERF_SAMPLE_PERIOD; attr.sample_period = 1; @@ -3084,9 +4090,17 @@ static int intel_pt_synth_events(struct intel_pt *pt, pt->cbr_id = id; intel_pt_set_event_name(evlist, id, "cbr"); id += 1; + + attr.config = PERF_SYNTH_INTEL_PSB; + err = intel_pt_synth_event(session, "psb", &attr, id); + if (err) + return err; + pt->psb_id = id; + intel_pt_set_event_name(evlist, id, "psb"); + id += 1; } - if (pt->synth_opts.pwr_events && (evsel->core.attr.config & 0x10)) { + if (pt->synth_opts.pwr_events && (evsel->core.attr.config & INTEL_PT_CFG_PWR_EVT_EN)) { attr.config = PERF_SYNTH_INTEL_MWAIT; err = intel_pt_synth_event(session, "mwait", &attr, id); if (err) @@ -3120,6 +4134,28 @@ static int intel_pt_synth_events(struct intel_pt *pt, id += 1; } + if (pt->synth_opts.intr_events && (evsel->core.attr.config & INTEL_PT_CFG_EVT_EN)) { + attr.config = PERF_SYNTH_INTEL_EVT; + err = intel_pt_synth_event(session, "evt", &attr, id); + if (err) + return err; + pt->evt_sample_type = attr.sample_type; + pt->evt_id = id; + intel_pt_set_event_name(evlist, id, "evt"); + id += 1; + } + + if (pt->synth_opts.intr_events && pt->cap_event_trace) { + attr.config = PERF_SYNTH_INTEL_IFLAG_CHG; + err = intel_pt_synth_event(session, "iflag", &attr, id); + if (err) + return err; + pt->iflag_chg_sample_type = attr.sample_type; + pt->iflag_chg_id = id; + intel_pt_set_event_name(evlist, id, "iflag"); + id += 1; + } + return 0; } @@ -3132,9 +4168,14 @@ static void intel_pt_setup_pebs_events(struct intel_pt *pt) evlist__for_each_entry(pt->session->evlist, evsel) { if (evsel->core.attr.aux_output && evsel->core.id) { + if (pt->single_pebs) { + pt->single_pebs = false; + return; + } + pt->single_pebs = true; pt->sample_pebs = true; + pt->pebs_data_src_fmt = intel_pt_data_src_fmt(pt, evsel); pt->pebs_evsel = evsel; - return; } } } @@ -3172,6 +4213,9 @@ static int intel_pt_perf_config(const char *var, const char *value, void *data) if (!strcmp(var, "intel-pt.mispred-all")) pt->mispred_all = perf_config_bool(var, value); + if (!strcmp(var, "intel-pt.max-loops")) + perf_config_int(&pt->max_loops, var, value); + return 0; } @@ -3254,10 +4298,70 @@ static int intel_pt_setup_time_ranges(struct intel_pt *pt, return 0; } +static int intel_pt_parse_vm_tm_corr_arg(struct intel_pt *pt, char **args) +{ + struct intel_pt_vmcs_info *vmcs_info; + u64 tsc_offset, vmcs; + char *p = *args; + + errno = 0; + + p = skip_spaces(p); + if (!*p) + return 1; + + tsc_offset = strtoull(p, &p, 0); + if (errno) + return -errno; + p = skip_spaces(p); + if (*p != ':') { + pt->dflt_tsc_offset = tsc_offset; + *args = p; + return 0; + } + p += 1; + while (1) { + vmcs = strtoull(p, &p, 0); + if (errno) + return -errno; + if (!vmcs) + return -EINVAL; + vmcs_info = intel_pt_findnew_vmcs(&pt->vmcs_info, vmcs, tsc_offset); + if (!vmcs_info) + return -ENOMEM; + p = skip_spaces(p); + if (*p != ',') + break; + p += 1; + } + *args = p; + return 0; +} + +static int intel_pt_parse_vm_tm_corr_args(struct intel_pt *pt) +{ + char *args = pt->synth_opts.vm_tm_corr_args; + int ret; + + if (!args) + return 0; + + do { + ret = intel_pt_parse_vm_tm_corr_arg(pt, &args); + } while (!ret); + + if (ret < 0) { + pr_err("Failed to parse VM Time Correlation options\n"); + return ret; + } + + return 0; +} + static const char * const intel_pt_info_fmts[] = { [INTEL_PT_PMU_TYPE] = " PMU Type %"PRId64"\n", [INTEL_PT_TIME_SHIFT] = " Time Shift %"PRIu64"\n", - [INTEL_PT_TIME_MULT] = " Time Muliplier %"PRIu64"\n", + [INTEL_PT_TIME_MULT] = " Time Multiplier %"PRIu64"\n", [INTEL_PT_TIME_ZERO] = " Time Zero %"PRIu64"\n", [INTEL_PT_CAP_USER_TIME_ZERO] = " Cap Time Zero %"PRId64"\n", [INTEL_PT_TSC_BIT] = " TSC bit %#"PRIx64"\n", @@ -3266,6 +4370,7 @@ static const char * const intel_pt_info_fmts[] = { [INTEL_PT_SNAPSHOT_MODE] = " Snapshot mode %"PRId64"\n", [INTEL_PT_PER_CPU_MMAPS] = " Per-cpu maps %"PRId64"\n", [INTEL_PT_MTC_BIT] = " MTC bit %#"PRIx64"\n", + [INTEL_PT_MTC_FREQ_BITS] = " MTC freq bits %#"PRIx64"\n", [INTEL_PT_TSC_CTC_N] = " TSC:CTC numerator %"PRIu64"\n", [INTEL_PT_TSC_CTC_D] = " TSC:CTC denominator %"PRIu64"\n", [INTEL_PT_CYC_BIT] = " CYC bit %#"PRIx64"\n", @@ -3280,8 +4385,12 @@ static void intel_pt_print_info(__u64 *arr, int start, int finish) if (!dump_trace) return; - for (i = start; i <= finish; i++) - fprintf(stdout, intel_pt_info_fmts[i], arr[i]); + for (i = start; i <= finish; i++) { + const char *fmt = intel_pt_info_fmts[i]; + + if (fmt) + fprintf(stdout, fmt, arr[i]); + } } static void intel_pt_print_info_str(const char *name, const char *str) @@ -3316,6 +4425,8 @@ int intel_pt_process_auxtrace_info(union perf_event *event, if (!pt) return -ENOMEM; + pt->vmcs_info = RB_ROOT; + addr_filters__init(&pt->filts); err = perf_config(intel_pt_perf_config, pt); @@ -3326,7 +4437,22 @@ int intel_pt_process_auxtrace_info(union perf_event *event, if (err) goto err_free; - intel_pt_log_set_name(INTEL_PT_PMU_NAME); + if (session->itrace_synth_opts->set) { + pt->synth_opts = *session->itrace_synth_opts; + } else { + struct itrace_synth_opts *opts = session->itrace_synth_opts; + + itrace_synth_opts__set_default(&pt->synth_opts, opts->default_no_sample); + if (!opts->default_no_sample && !opts->inject) { + pt->synth_opts.branches = false; + pt->synth_opts.callchain = true; + pt->synth_opts.add_callchain = true; + } + pt->synth_opts.thread_stack = opts->thread_stack; + } + + if (!(pt->synth_opts.log_plus_flags & AUXTRACE_LOG_FLG_USE_STDOUT)) + intel_pt_log_set_name(INTEL_PT_PMU_NAME); pt->session = session; pt->machine = &session->machines.host; /* No kvm support */ @@ -3363,7 +4489,7 @@ int intel_pt_process_auxtrace_info(union perf_event *event, } info = &auxtrace_info->priv[INTEL_PT_FILTER_STR_LEN] + 1; - info_end = (void *)info + auxtrace_info->header.size; + info_end = (void *)auxtrace_info + auxtrace_info->header.size; if (intel_pt_has(auxtrace_info, INTEL_PT_FILTER_STR_LEN)) { size_t len; @@ -3402,6 +4528,13 @@ int intel_pt_process_auxtrace_info(union perf_event *event, intel_pt_print_info_str("Filter string", pt->filter); } + if ((void *)info < info_end) { + pt->cap_event_trace = *info++; + if (dump_trace) + fprintf(stdout, " Cap Event Trace %d\n", + pt->cap_event_trace); + } + pt->timeless_decoding = intel_pt_timeless_decoding(pt); if (pt->timeless_decoding && !pt->tc.time_mult) pt->tc.time_mult = 1; @@ -3409,20 +4542,34 @@ int intel_pt_process_auxtrace_info(union perf_event *event, pt->sampling_mode = intel_pt_sampling_mode(pt); pt->est_tsc = !pt->timeless_decoding; + if (pt->synth_opts.vm_time_correlation) { + if (pt->timeless_decoding) { + pr_err("Intel PT has no time information for VM Time Correlation\n"); + err = -EINVAL; + goto err_free_queues; + } + if (session->itrace_synth_opts->ptime_range) { + pr_err("Time ranges cannot be specified with VM Time Correlation\n"); + err = -EINVAL; + goto err_free_queues; + } + /* Currently TSC Offset is calculated using MTC packets */ + if (!intel_pt_have_mtc(pt)) { + pr_err("MTC packets must have been enabled for VM Time Correlation\n"); + err = -EINVAL; + goto err_free_queues; + } + err = intel_pt_parse_vm_tm_corr_args(pt); + if (err) + goto err_free_queues; + } + pt->unknown_thread = thread__new(999999999, 999999999); if (!pt->unknown_thread) { err = -ENOMEM; goto err_free_queues; } - /* - * Since this thread will not be kept in any rbtree not in a - * list, initialize its list node so that at thread__put() the - * current thread lifetime assuption is kept and we don't segfault - * at list_del_init(). - */ - INIT_LIST_HEAD(&pt->unknown_thread->node); - err = thread__set_comm(pt->unknown_thread, "unknown", 0); if (err) goto err_delete_thread; @@ -3458,23 +4605,12 @@ int intel_pt_process_auxtrace_info(union perf_event *event, goto err_delete_thread; } - if (session->itrace_synth_opts->set) { - pt->synth_opts = *session->itrace_synth_opts; - } else { - itrace_synth_opts__set_default(&pt->synth_opts, - session->itrace_synth_opts->default_no_sample); - if (!session->itrace_synth_opts->default_no_sample && - !session->itrace_synth_opts->inject) { - pt->synth_opts.branches = false; - pt->synth_opts.callchain = true; - pt->synth_opts.add_callchain = true; - } - pt->synth_opts.thread_stack = - session->itrace_synth_opts->thread_stack; - } + if (pt->synth_opts.log) { + bool log_on_error = pt->synth_opts.log_plus_flags & AUXTRACE_LOG_FLG_ON_ERROR; + unsigned int log_on_error_size = pt->synth_opts.log_on_error_size; - if (pt->synth_opts.log) - intel_pt_log_enable(); + intel_pt_log_enable(log_on_error, log_on_error_size); + } /* Maximum non-turbo ratio is TSC freq / 100 MHz */ if (pt->tc.time_mult) { @@ -3554,6 +4690,12 @@ int intel_pt_process_auxtrace_info(union perf_event *event, intel_pt_setup_pebs_events(pt); + if (perf_data__is_pipe(session->data)) { + pr_warning("WARNING: Intel PT with pipe mode is not recommended.\n" + " The output cannot relied upon. In particular,\n" + " timestamps and the order of events may be incorrect.\n"); + } + if (pt->sampling_mode || list_empty(&session->auxtrace_index)) err = auxtrace_queue_data(session, true, true); else |