diff options
Diffstat (limited to 'tools/perf/util/intel-pt.c')
-rw-r--r-- | tools/perf/util/intel-pt.c | 1433 |
1 files changed, 1213 insertions, 220 deletions
diff --git a/tools/perf/util/intel-pt.c b/tools/perf/util/intel-pt.c index 33cf8928cf05..e3548ddef254 100644 --- a/tools/perf/util/intel-pt.c +++ b/tools/perf/util/intel-pt.c @@ -33,6 +33,7 @@ #include "tsc.h" #include "intel-pt.h" #include "config.h" +#include "util/perf_api_probe.h" #include "util/synthetic-events.h" #include "time-utils.h" @@ -45,6 +46,12 @@ #define MAX_TIMESTAMP (~0ULL) +#define INTEL_PT_CFG_PASS_THRU BIT_ULL(0) +#define INTEL_PT_CFG_PWR_EVT_EN BIT_ULL(4) +#define INTEL_PT_CFG_BRANCH_EN BIT_ULL(13) +#define INTEL_PT_CFG_EVT_EN BIT_ULL(31) +#define INTEL_PT_CFG_TNT_DIS BIT_ULL(55) + struct range { u64 start; u64 end; @@ -67,12 +74,20 @@ struct intel_pt { bool data_queued; bool est_tsc; bool sync_switch; + bool sync_switch_not_supported; bool mispred_all; + bool use_thread_stack; + bool callstack; + bool cap_event_trace; + bool have_guest_sideband; + unsigned int br_stack_sz; + unsigned int br_stack_sz_plus; int have_sched_switch; u32 pmu_type; u64 kernel_start; u64 switch_ip; u64 ptss_ip; + u64 first_timestamp; struct perf_tsc_conversion tc; bool cap_user_time_zero; @@ -103,10 +118,18 @@ struct intel_pt { u64 exstop_id; u64 pwrx_id; u64 cbr_id; + u64 psb_id; + bool single_pebs; bool sample_pebs; struct evsel *pebs_evsel; + u64 evt_sample_type; + u64 evt_id; + + u64 iflag_chg_sample_type; + u64 iflag_chg_id; + u64 tsc_bit; u64 mtc_bit; u64 mtc_freq_bits; @@ -116,6 +139,7 @@ struct intel_pt { u64 noretcomp_bit; unsigned max_non_turbo_ratio; unsigned cbr2khz; + int max_loops; unsigned long num_events; @@ -124,6 +148,12 @@ struct intel_pt { struct range *time_ranges; unsigned int range_cnt; + + struct ip_callchain *chain; + struct branch_stack *br_stack; + + u64 dflt_tsc_offset; + struct rb_root vmcs_info; }; enum switch_state { @@ -134,6 +164,14 @@ enum switch_state { INTEL_PT_SS_EXPECTING_SWITCH_IP, }; +/* applicable_counters is 64-bits */ +#define INTEL_PT_MAX_PEBS 64 + +struct intel_pt_pebs_event { + struct evsel *evsel; + u64 id; +}; + struct intel_pt_queue { struct intel_pt *pt; unsigned int queue_nr; @@ -143,19 +181,25 @@ struct intel_pt_queue { const struct intel_pt_state *state; struct ip_callchain *chain; struct branch_stack *last_branch; - struct branch_stack *last_branch_rb; - size_t last_branch_pos; union perf_event *event_buf; bool on_heap; bool stop; bool step_through_buffers; bool use_buffer_pid_tid; bool sync_switch; + bool sample_ipc; pid_t pid, tid; int cpu; int switch_state; pid_t next_tid; struct thread *thread; + struct machine *guest_machine; + struct thread *guest_thread; + struct thread *unknown_guest_thread; + pid_t guest_machine_pid; + pid_t guest_pid; + pid_t guest_tid; + int vcpu; bool exclude_kernel; bool have_sample; u64 time; @@ -174,6 +218,7 @@ struct intel_pt_queue { u64 last_br_cyc_cnt; unsigned int cbr_seen; char insn[INTEL_PT_INSN_BUF_SZ]; + struct intel_pt_pebs_event pebs[INTEL_PT_MAX_PEBS]; }; static void intel_pt_dump(struct intel_pt *pt __maybe_unused, @@ -230,7 +275,7 @@ static void intel_pt_log_event(union perf_event *event) if (!intel_pt_enable_logging || !f) return; - perf_event__fprintf(event, f); + perf_event__fprintf(event, NULL, f); } static void intel_pt_dump_sample(struct perf_session *session, @@ -243,6 +288,83 @@ static void intel_pt_dump_sample(struct perf_session *session, intel_pt_dump(pt, sample->aux_sample.data, sample->aux_sample.size); } +static bool intel_pt_log_events(struct intel_pt *pt, u64 tm) +{ + struct perf_time_interval *range = pt->synth_opts.ptime_range; + int n = pt->synth_opts.range_num; + + if (pt->synth_opts.log_plus_flags & AUXTRACE_LOG_FLG_ALL_PERF_EVTS) + return true; + + if (pt->synth_opts.log_minus_flags & AUXTRACE_LOG_FLG_ALL_PERF_EVTS) + return false; + + /* perf_time__ranges_skip_sample does not work if time is zero */ + if (!tm) + tm = 1; + + return !n || !perf_time__ranges_skip_sample(range, n, tm); +} + +static struct intel_pt_vmcs_info *intel_pt_findnew_vmcs(struct rb_root *rb_root, + u64 vmcs, + u64 dflt_tsc_offset) +{ + struct rb_node **p = &rb_root->rb_node; + struct rb_node *parent = NULL; + struct intel_pt_vmcs_info *v; + + while (*p) { + parent = *p; + v = rb_entry(parent, struct intel_pt_vmcs_info, rb_node); + + if (v->vmcs == vmcs) + return v; + + if (vmcs < v->vmcs) + p = &(*p)->rb_left; + else + p = &(*p)->rb_right; + } + + v = zalloc(sizeof(*v)); + if (v) { + v->vmcs = vmcs; + v->tsc_offset = dflt_tsc_offset; + v->reliable = dflt_tsc_offset; + + rb_link_node(&v->rb_node, parent, p); + rb_insert_color(&v->rb_node, rb_root); + } + + return v; +} + +static struct intel_pt_vmcs_info *intel_pt_findnew_vmcs_info(void *data, uint64_t vmcs) +{ + struct intel_pt_queue *ptq = data; + struct intel_pt *pt = ptq->pt; + + if (!vmcs && !pt->dflt_tsc_offset) + return NULL; + + return intel_pt_findnew_vmcs(&pt->vmcs_info, vmcs, pt->dflt_tsc_offset); +} + +static void intel_pt_free_vmcs_info(struct intel_pt *pt) +{ + struct intel_pt_vmcs_info *v; + struct rb_node *n; + + n = rb_first(&pt->vmcs_info); + while (n) { + v = rb_entry(n, struct intel_pt_vmcs_info, rb_node); + n = rb_next(n); + rb_erase(&v->rb_node, &pt->vmcs_info); + free(v); + } +} + static int intel_pt_do_fix_overlap(struct intel_pt *pt, struct auxtrace_buffer *a, struct auxtrace_buffer *b) { @@ -250,9 +372,17 @@ static int intel_pt_do_fix_overlap(struct intel_pt *pt, struct auxtrace_buffer * void *start; start = intel_pt_find_overlap(a->data, a->size, b->data, b->size, - pt->have_tsc, &consecutive); + pt->have_tsc, &consecutive, + pt->synth_opts.vm_time_correlation); if (!start) return -EINVAL; + /* + * In the case of vm_time_correlation, the overlap might contain TSC + * packets that will not be fixed, and that will then no longer work for + * overlap detection. Avoid that by zeroing out the overlap. + */ + if (pt->synth_opts.vm_time_correlation) + memset(b->data, 0, start - b->data); b->use_size = b->data + b->size - start; b->use_data = start; if (b->use_size && consecutive) @@ -406,6 +536,7 @@ struct intel_pt_cache_entry { u64 byte_cnt; enum intel_pt_insn_op op; enum intel_pt_insn_branch branch; + bool emulated_ptwrite; int length; int32_t rel; char insn[INTEL_PT_INSN_BUF_SZ]; @@ -492,6 +623,7 @@ static int intel_pt_cache_add(struct dso *dso, struct machine *machine, e->byte_cnt = byte_cnt; e->op = intel_pt_insn->op; e->branch = intel_pt_insn->branch; + e->emulated_ptwrite = intel_pt_insn->emulated_ptwrite; e->length = intel_pt_insn->length; e->rel = intel_pt_insn->rel; memcpy(e->insn, intel_pt_insn->buf, INTEL_PT_INSN_BUF_SZ); @@ -514,13 +646,96 @@ intel_pt_cache_lookup(struct dso *dso, struct machine *machine, u64 offset) return auxtrace_cache__lookup(dso->auxtrace_cache, offset); } -static inline u8 intel_pt_cpumode(struct intel_pt *pt, uint64_t ip) +static void intel_pt_cache_invalidate(struct dso *dso, struct machine *machine, + u64 offset) +{ + struct auxtrace_cache *c = intel_pt_cache(dso, machine); + + if (!c) + return; + + auxtrace_cache__remove(dso->auxtrace_cache, offset); +} + +static inline bool intel_pt_guest_kernel_ip(uint64_t ip) { - return ip >= pt->kernel_start ? + /* Assumes 64-bit kernel */ + return ip & (1ULL << 63); +} + +static inline u8 intel_pt_nr_cpumode(struct intel_pt_queue *ptq, uint64_t ip, bool nr) +{ + if (nr) { + return intel_pt_guest_kernel_ip(ip) ? + PERF_RECORD_MISC_GUEST_KERNEL : + PERF_RECORD_MISC_GUEST_USER; + } + + return ip >= ptq->pt->kernel_start ? PERF_RECORD_MISC_KERNEL : PERF_RECORD_MISC_USER; } +static inline u8 intel_pt_cpumode(struct intel_pt_queue *ptq, uint64_t from_ip, uint64_t to_ip) +{ + /* No support for non-zero CS base */ + if (from_ip) + return intel_pt_nr_cpumode(ptq, from_ip, ptq->state->from_nr); + return intel_pt_nr_cpumode(ptq, to_ip, ptq->state->to_nr); +} + +static int intel_pt_get_guest(struct intel_pt_queue *ptq) +{ + struct machines *machines = &ptq->pt->session->machines; + struct machine *machine; + pid_t pid = ptq->pid <= 0 ? DEFAULT_GUEST_KERNEL_ID : ptq->pid; + + if (ptq->guest_machine && pid == ptq->guest_machine->pid) + return 0; + + ptq->guest_machine = NULL; + thread__zput(ptq->unknown_guest_thread); + + if (symbol_conf.guest_code) { + thread__zput(ptq->guest_thread); + ptq->guest_thread = machines__findnew_guest_code(machines, pid); + } + + machine = machines__find_guest(machines, pid); + if (!machine) + return -1; + + ptq->unknown_guest_thread = machine__idle_thread(machine); + if (!ptq->unknown_guest_thread) + return -1; + + ptq->guest_machine = machine; + + return 0; +} + +static inline bool intel_pt_jmp_16(struct intel_pt_insn *intel_pt_insn) +{ + return intel_pt_insn->rel == 16 && intel_pt_insn->branch == INTEL_PT_BR_UNCONDITIONAL; +} + +#define PTWRITE_MAGIC "\x0f\x0bperf,ptwrite " +#define PTWRITE_MAGIC_LEN 16 + +static bool intel_pt_emulated_ptwrite(struct dso *dso, struct machine *machine, u64 offset) +{ + unsigned char buf[PTWRITE_MAGIC_LEN]; + ssize_t len; + + len = dso__data_read_offset(dso, machine, offset, buf, PTWRITE_MAGIC_LEN); + if (len == PTWRITE_MAGIC_LEN && !memcmp(buf, PTWRITE_MAGIC, PTWRITE_MAGIC_LEN)) { + intel_pt_log("Emulated ptwrite signature found\n"); + return true; + } + intel_pt_log("Emulated ptwrite signature not found\n"); + return false; +} + static int intel_pt_walk_next_insn(struct intel_pt_insn *intel_pt_insn, uint64_t *insn_cnt_ptr, uint64_t *ip, uint64_t to_ip, uint64_t max_insn_cnt, @@ -537,24 +752,55 @@ static int intel_pt_walk_next_insn(struct intel_pt_insn *intel_pt_insn, u64 offset, start_offset, start_ip; u64 insn_cnt = 0; bool one_map = true; + bool nr; intel_pt_insn->length = 0; if (to_ip && *ip == to_ip) goto out_no_cache; - cpumode = intel_pt_cpumode(ptq->pt, *ip); + nr = ptq->state->to_nr; + cpumode = intel_pt_nr_cpumode(ptq, *ip, nr); - thread = ptq->thread; - if (!thread) { - if (cpumode != PERF_RECORD_MISC_KERNEL) + if (nr) { + if (ptq->pt->have_guest_sideband) { + if (!ptq->guest_machine || ptq->guest_machine_pid != ptq->pid) { + intel_pt_log("ERROR: guest sideband but no guest machine\n"); + return -EINVAL; + } + } else if ((!symbol_conf.guest_code && cpumode != PERF_RECORD_MISC_GUEST_KERNEL) || + intel_pt_get_guest(ptq)) { + intel_pt_log("ERROR: no guest machine\n"); return -EINVAL; - thread = ptq->pt->unknown_thread; + } + machine = ptq->guest_machine; + thread = ptq->guest_thread; + if (!thread) { + if (cpumode != PERF_RECORD_MISC_GUEST_KERNEL) { + intel_pt_log("ERROR: no guest thread\n"); + return -EINVAL; + } + thread = ptq->unknown_guest_thread; + } + } else { + thread = ptq->thread; + if (!thread) { + if (cpumode != PERF_RECORD_MISC_KERNEL) { + intel_pt_log("ERROR: no thread\n"); + return -EINVAL; + } + thread = ptq->pt->unknown_thread; + } } while (1) { - if (!thread__find_map(thread, cpumode, *ip, &al) || !al.map->dso) + if (!thread__find_map(thread, cpumode, *ip, &al) || !al.map->dso) { + if (al.map) + intel_pt_log("ERROR: thread has no dso for %#" PRIx64 "\n", *ip); + else + intel_pt_log("ERROR: thread has no map for %#" PRIx64 "\n", *ip); return -EINVAL; + } if (al.map->dso->data.status == DSO_DATA_STATUS_ERROR && dso__data_status_seen(al.map->dso, @@ -573,6 +819,7 @@ static int intel_pt_walk_next_insn(struct intel_pt_insn *intel_pt_insn, *ip += e->byte_cnt; intel_pt_insn->op = e->op; intel_pt_insn->branch = e->branch; + intel_pt_insn->emulated_ptwrite = e->emulated_ptwrite; intel_pt_insn->length = e->length; intel_pt_insn->rel = e->rel; memcpy(intel_pt_insn->buf, e->insn, @@ -594,8 +841,13 @@ static int intel_pt_walk_next_insn(struct intel_pt_insn *intel_pt_insn, len = dso__data_read_offset(al.map->dso, machine, offset, buf, INTEL_PT_INSN_BUF_SZ); - if (len <= 0) + if (len <= 0) { + intel_pt_log("ERROR: failed to read at offset %#" PRIx64 " ", + offset); + if (intel_pt_enable_logging) + dso__fprintf(al.map->dso, intel_pt_log_fp()); return -EINVAL; + } if (intel_pt_get_insn(buf, len, x86_64, intel_pt_insn)) return -EINVAL; @@ -604,16 +856,28 @@ static int intel_pt_walk_next_insn(struct intel_pt_insn *intel_pt_insn, insn_cnt += 1; - if (intel_pt_insn->branch != INTEL_PT_BR_NO_BRANCH) + if (intel_pt_insn->branch != INTEL_PT_BR_NO_BRANCH) { + bool eptw; + u64 offs; + + if (!intel_pt_jmp_16(intel_pt_insn)) + goto out; + /* Check for emulated ptwrite */ + offs = offset + intel_pt_insn->length; + eptw = intel_pt_emulated_ptwrite(al.map->dso, machine, offs); + intel_pt_insn->emulated_ptwrite = eptw; goto out; + } if (max_insn_cnt && insn_cnt >= max_insn_cnt) goto out_no_cache; *ip += intel_pt_insn->length; - if (to_ip && *ip == to_ip) + if (to_ip && *ip == to_ip) { + intel_pt_insn->length = 0; goto out_no_cache; + } if (*ip >= al.map->end) break; @@ -697,8 +961,14 @@ static int __intel_pt_pgd_ip(uint64_t ip, void *data) u8 cpumode; u64 offset; - if (ip >= ptq->pt->kernel_start) + if (ptq->state->to_nr) { + if (intel_pt_guest_kernel_ip(ip)) + return intel_pt_match_pgd_ip(ptq->pt, ip, ip, NULL); + /* No support for decoding guest user space */ + return -EINVAL; + } else if (ip >= ptq->pt->kernel_start) { return intel_pt_match_pgd_ip(ptq->pt, ip, ip, NULL); + } cpumode = PERF_RECORD_MISC_USER; @@ -767,12 +1037,26 @@ static bool intel_pt_branch_enable(struct intel_pt *pt) evlist__for_each_entry(pt->session->evlist, evsel) { if (intel_pt_get_config(pt, &evsel->core.attr, &config) && - (config & 1) && !(config & 0x2000)) + (config & INTEL_PT_CFG_PASS_THRU) && + !(config & INTEL_PT_CFG_BRANCH_EN)) return false; } return true; } +static bool intel_pt_disabled_tnt(struct intel_pt *pt) +{ + struct evsel *evsel; + u64 config; + + evlist__for_each_entry(pt->session->evlist, evsel) { + if (intel_pt_get_config(pt, &evsel->core.attr, &config) && + config & INTEL_PT_CFG_TNT_DIS) + return true; + } + return false; +} + static unsigned int intel_pt_mtc_period(struct intel_pt *pt) { struct evsel *evsel; @@ -798,7 +1082,7 @@ static bool intel_pt_timeless_decoding(struct intel_pt *pt) bool timeless_decoding = true; u64 config; - if (!pt->tsc_bit || !pt->cap_user_time_zero) + if (!pt->tsc_bit || !pt->cap_user_time_zero || pt->synth_opts.timeless_decoding) return true; evlist__for_each_entry(pt->session->evlist, evsel) { @@ -846,6 +1130,19 @@ static bool intel_pt_have_tsc(struct intel_pt *pt) return have_tsc; } +static bool intel_pt_have_mtc(struct intel_pt *pt) +{ + struct evsel *evsel; + u64 config; + + evlist__for_each_entry(pt->session->evlist, evsel) { + if (intel_pt_get_config(pt, &evsel->core.attr, &config) && + (config & pt->mtc_bit)) + return true; + } + return false; +} + static bool intel_pt_sampling_mode(struct intel_pt *pt) { struct evsel *evsel; @@ -858,6 +1155,18 @@ static bool intel_pt_sampling_mode(struct intel_pt *pt) return false; } +static u64 intel_pt_ctl(struct intel_pt *pt) +{ + struct evsel *evsel; + u64 config; + + evlist__for_each_entry(pt->session->evlist, evsel) { + if (intel_pt_get_config(pt, &evsel->core.attr, &config)) + return config; + } + return 0; +} + static u64 intel_pt_ns_to_ticks(const struct intel_pt *pt, u64 ns) { u64 quot, rem; @@ -868,6 +1177,86 @@ static u64 intel_pt_ns_to_ticks(const struct intel_pt *pt, u64 ns) pt->tc.time_mult; } +static struct ip_callchain *intel_pt_alloc_chain(struct intel_pt *pt) +{ + size_t sz = sizeof(struct ip_callchain); + + /* Add 1 to callchain_sz for callchain context */ + sz += (pt->synth_opts.callchain_sz + 1) * sizeof(u64); + return zalloc(sz); +} + +static int intel_pt_callchain_init(struct intel_pt *pt) +{ + struct evsel *evsel; + + evlist__for_each_entry(pt->session->evlist, evsel) { + if (!(evsel->core.attr.sample_type & PERF_SAMPLE_CALLCHAIN)) + evsel->synth_sample_type |= PERF_SAMPLE_CALLCHAIN; + } + + pt->chain = intel_pt_alloc_chain(pt); + if (!pt->chain) + return -ENOMEM; + + return 0; +} + +static void intel_pt_add_callchain(struct intel_pt *pt, + struct perf_sample *sample) +{ + struct thread *thread = machine__findnew_thread(pt->machine, + sample->pid, + sample->tid); + + thread_stack__sample_late(thread, sample->cpu, pt->chain, + pt->synth_opts.callchain_sz + 1, sample->ip, + pt->kernel_start); + + sample->callchain = pt->chain; +} + +static struct branch_stack *intel_pt_alloc_br_stack(unsigned int entry_cnt) +{ + size_t sz = sizeof(struct branch_stack); + + sz += entry_cnt * sizeof(struct branch_entry); + return zalloc(sz); +} + +static int intel_pt_br_stack_init(struct intel_pt *pt) +{ + struct evsel *evsel; + + evlist__for_each_entry(pt->session->evlist, evsel) { + if (!(evsel->core.attr.sample_type & PERF_SAMPLE_BRANCH_STACK)) + evsel->synth_sample_type |= PERF_SAMPLE_BRANCH_STACK; + } + + pt->br_stack = intel_pt_alloc_br_stack(pt->br_stack_sz); + if (!pt->br_stack) + return -ENOMEM; + + return 0; +} + +static void intel_pt_add_br_stack(struct intel_pt *pt, + struct perf_sample *sample) +{ + struct thread *thread = machine__findnew_thread(pt->machine, + sample->pid, + sample->tid); + + thread_stack__br_sample_late(thread, sample->cpu, pt->br_stack, + pt->br_stack_sz, sample->ip, + pt->kernel_start); + + sample->branch_stack = pt->br_stack; +} + +/* INTEL_PT_LBR_0, INTEL_PT_LBR_1 and INTEL_PT_LBR_2 */ +#define LBRS_MAX (INTEL_PT_BLK_ITEM_ID_CNT * 3U) + static struct intel_pt_queue *intel_pt_alloc_queue(struct intel_pt *pt, unsigned int queue_nr) { @@ -880,26 +1269,17 @@ static struct intel_pt_queue *intel_pt_alloc_queue(struct intel_pt *pt, return NULL; if (pt->synth_opts.callchain) { - size_t sz = sizeof(struct ip_callchain); - - /* Add 1 to callchain_sz for callchain context */ - sz += (pt->synth_opts.callchain_sz + 1) * sizeof(u64); - ptq->chain = zalloc(sz); + ptq->chain = intel_pt_alloc_chain(pt); if (!ptq->chain) goto out_free; } - if (pt->synth_opts.last_branch) { - size_t sz = sizeof(struct branch_stack); + if (pt->synth_opts.last_branch || pt->synth_opts.other_events) { + unsigned int entry_cnt = max(LBRS_MAX, pt->br_stack_sz); - sz += pt->synth_opts.last_branch_sz * - sizeof(struct branch_entry); - ptq->last_branch = zalloc(sz); + ptq->last_branch = intel_pt_alloc_br_stack(entry_cnt); if (!ptq->last_branch) goto out_free; - ptq->last_branch_rb = zalloc(sz); - if (!ptq->last_branch_rb) - goto out_free; } ptq->event_buf = malloc(PERF_SAMPLE_MAX_SIZE); @@ -917,13 +1297,24 @@ static struct intel_pt_queue *intel_pt_alloc_queue(struct intel_pt *pt, params.get_trace = intel_pt_get_trace; params.walk_insn = intel_pt_walk_next_insn; params.lookahead = intel_pt_lookahead; + params.findnew_vmcs_info = intel_pt_findnew_vmcs_info; params.data = ptq; params.return_compression = intel_pt_return_compression(pt); params.branch_enable = intel_pt_branch_enable(pt); + params.ctl = intel_pt_ctl(pt); params.max_non_turbo_ratio = pt->max_non_turbo_ratio; params.mtc_period = intel_pt_mtc_period(pt); params.tsc_ctc_ratio_n = pt->tsc_ctc_ratio_n; params.tsc_ctc_ratio_d = pt->tsc_ctc_ratio_d; + params.quick = pt->synth_opts.quick; + params.vm_time_correlation = pt->synth_opts.vm_time_correlation; + params.vm_tm_corr_dry_run = pt->synth_opts.vm_tm_corr_dry_run; + params.first_timestamp = pt->first_timestamp; + params.max_loops = pt->max_loops; + + /* Cannot walk code without TNT, so force 'quick' mode */ + if (params.branch_enable && intel_pt_disabled_tnt(pt) && !params.quick) + params.quick = 1; if (pt->filts.cnt > 0) params.pgd_ip = intel_pt_pgd_ip; @@ -968,7 +1359,6 @@ static struct intel_pt_queue *intel_pt_alloc_queue(struct intel_pt *pt, out_free: zfree(&ptq->event_buf); zfree(&ptq->last_branch); - zfree(&ptq->last_branch_rb); zfree(&ptq->chain); free(ptq); return NULL; @@ -981,14 +1371,79 @@ static void intel_pt_free_queue(void *priv) if (!ptq) return; thread__zput(ptq->thread); + thread__zput(ptq->guest_thread); + thread__zput(ptq->unknown_guest_thread); intel_pt_decoder_free(ptq->decoder); zfree(&ptq->event_buf); zfree(&ptq->last_branch); - zfree(&ptq->last_branch_rb); zfree(&ptq->chain); free(ptq); } +static void intel_pt_first_timestamp(struct intel_pt *pt, u64 timestamp) +{ + unsigned int i; + + pt->first_timestamp = timestamp; + + for (i = 0; i < pt->queues.nr_queues; i++) { + struct auxtrace_queue *queue = &pt->queues.queue_array[i]; + struct intel_pt_queue *ptq = queue->priv; + + if (ptq && ptq->decoder) + intel_pt_set_first_timestamp(ptq->decoder, timestamp); + } +} + +static int intel_pt_get_guest_from_sideband(struct intel_pt_queue *ptq) +{ + struct machines *machines = &ptq->pt->session->machines; + struct machine *machine; + pid_t machine_pid = ptq->pid; + pid_t tid; + int vcpu; + + if (machine_pid <= 0) + return 0; /* Not a guest machine */ + + machine = machines__find(machines, machine_pid); + if (!machine) + return 0; /* Not a guest machine */ + + if (ptq->guest_machine != machine) { + ptq->guest_machine = NULL; + thread__zput(ptq->guest_thread); + thread__zput(ptq->unknown_guest_thread); + + ptq->unknown_guest_thread = machine__find_thread(machine, 0, 0); + if (!ptq->unknown_guest_thread) + return -1; + ptq->guest_machine = machine; + } + + vcpu = ptq->thread ? ptq->thread->guest_cpu : -1; + if (vcpu < 0) + return -1; + + tid = machine__get_current_tid(machine, vcpu); + + if (ptq->guest_thread && ptq->guest_thread->tid != tid) + thread__zput(ptq->guest_thread); + + if (!ptq->guest_thread) { + ptq->guest_thread = machine__find_thread(machine, -1, tid); + if (!ptq->guest_thread) + return -1; + } + + ptq->guest_machine_pid = machine_pid; + ptq->guest_pid = ptq->guest_thread->pid_; + ptq->guest_tid = tid; + ptq->vcpu = vcpu; + + return 0; +} + static void intel_pt_set_pid_tid_cpu(struct intel_pt *pt, struct auxtrace_queue *queue) { @@ -996,6 +1451,8 @@ static void intel_pt_set_pid_tid_cpu(struct intel_pt *pt, if (queue->tid == -1 || pt->have_sched_switch) { ptq->tid = machine__get_current_tid(pt->machine, ptq->cpu); + if (ptq->tid == -1) + ptq->pid = -1; thread__zput(ptq->thread); } @@ -1007,21 +1464,33 @@ static void intel_pt_set_pid_tid_cpu(struct intel_pt *pt, if (queue->cpu == -1) ptq->cpu = ptq->thread->cpu; } + + if (pt->have_guest_sideband && intel_pt_get_guest_from_sideband(ptq)) { + ptq->guest_machine_pid = 0; + ptq->guest_pid = -1; + ptq->guest_tid = -1; + ptq->vcpu = -1; + } } static void intel_pt_sample_flags(struct intel_pt_queue *ptq) { + struct intel_pt *pt = ptq->pt; + + ptq->insn_len = 0; if (ptq->state->flags & INTEL_PT_ABORT_TX) { ptq->flags = PERF_IP_FLAG_BRANCH | PERF_IP_FLAG_TX_ABORT; } else if (ptq->state->flags & INTEL_PT_ASYNC) { - if (ptq->state->to_ip) + if (!ptq->state->to_ip) + ptq->flags = PERF_IP_FLAG_BRANCH | + PERF_IP_FLAG_TRACE_END; + else if (ptq->state->from_nr && !ptq->state->to_nr) + ptq->flags = PERF_IP_FLAG_BRANCH | PERF_IP_FLAG_CALL | + PERF_IP_FLAG_VMEXIT; + else ptq->flags = PERF_IP_FLAG_BRANCH | PERF_IP_FLAG_CALL | PERF_IP_FLAG_ASYNC | PERF_IP_FLAG_INTERRUPT; - else - ptq->flags = PERF_IP_FLAG_BRANCH | - PERF_IP_FLAG_TRACE_END; - ptq->insn_len = 0; } else { if (ptq->state->from_ip) ptq->flags = intel_pt_insn_type(ptq->state->insn_op); @@ -1038,6 +1507,17 @@ static void intel_pt_sample_flags(struct intel_pt_queue *ptq) ptq->flags |= PERF_IP_FLAG_TRACE_BEGIN; if (ptq->state->type & INTEL_PT_TRACE_END) ptq->flags |= PERF_IP_FLAG_TRACE_END; + + if (pt->cap_event_trace) { + if (ptq->state->type & INTEL_PT_IFLAG_CHG) { + if (!ptq->state->from_iflag) + ptq->flags |= PERF_IP_FLAG_INTR_DISABLE; + if (ptq->state->from_iflag != ptq->state->to_iflag) + ptq->flags |= PERF_IP_FLAG_INTR_TOGGLE; + } else if (!ptq->state->to_iflag) { + ptq->flags |= PERF_IP_FLAG_INTR_DISABLE; + } + } } static void intel_pt_setup_time_range(struct intel_pt *pt, @@ -1152,58 +1632,6 @@ static int intel_pt_setup_queues(struct intel_pt *pt) return 0; } -static inline void intel_pt_copy_last_branch_rb(struct intel_pt_queue *ptq) -{ - struct branch_stack *bs_src = ptq->last_branch_rb; - struct branch_stack *bs_dst = ptq->last_branch; - size_t nr = 0; - - bs_dst->nr = bs_src->nr; - - if (!bs_src->nr) - return; - - nr = ptq->pt->synth_opts.last_branch_sz - ptq->last_branch_pos; - memcpy(&bs_dst->entries[0], - &bs_src->entries[ptq->last_branch_pos], - sizeof(struct branch_entry) * nr); - - if (bs_src->nr >= ptq->pt->synth_opts.last_branch_sz) { - memcpy(&bs_dst->entries[nr], - &bs_src->entries[0], - sizeof(struct branch_entry) * ptq->last_branch_pos); - } -} - -static inline void intel_pt_reset_last_branch_rb(struct intel_pt_queue *ptq) -{ - ptq->last_branch_pos = 0; - ptq->last_branch_rb->nr = 0; -} - -static void intel_pt_update_last_branch_rb(struct intel_pt_queue *ptq) -{ - const struct intel_pt_state *state = ptq->state; - struct branch_stack *bs = ptq->last_branch_rb; - struct branch_entry *be; - - if (!ptq->last_branch_pos) - ptq->last_branch_pos = ptq->pt->synth_opts.last_branch_sz; - - ptq->last_branch_pos -= 1; - - be = &bs->entries[ptq->last_branch_pos]; - be->from = state->from_ip; - be->to = state->to_ip; - be->flags.abort = !!(state->flags & INTEL_PT_ABORT_TX); - be->flags.in_tx = !!(state->flags & INTEL_PT_IN_TX); - /* No support for mispredict */ - be->flags.mispred = ptq->pt->mispred_all; - - if (bs->nr < ptq->pt->synth_opts.last_branch_sz) - bs->nr += 1; -} - static inline bool intel_pt_skip_event(struct intel_pt *pt) { return pt->synth_opts.initial_skip && @@ -1230,6 +1658,17 @@ static void intel_pt_prep_a_sample(struct intel_pt_queue *ptq, sample->pid = ptq->pid; sample->tid = ptq->tid; + + if (ptq->pt->have_guest_sideband) { + if ((ptq->state->from_ip && ptq->state->from_nr) || + (ptq->state->to_ip && ptq->state->to_nr)) { + sample->pid = ptq->guest_pid; + sample->tid = ptq->guest_tid; + sample->machine_pid = ptq->guest_machine_pid; + sample->vcpu = ptq->vcpu; + } + } + sample->cpu = ptq->cpu; sample->insn_len = ptq->insn_len; memcpy(sample->insn, ptq->insn, INTEL_PT_INSN_BUF_SZ); @@ -1246,8 +1685,8 @@ static void intel_pt_prep_b_sample(struct intel_pt *pt, sample->time = tsc_to_perf_time(ptq->timestamp, &pt->tc); sample->ip = ptq->state->from_ip; - sample->cpumode = intel_pt_cpumode(pt, sample->ip); sample->addr = ptq->state->to_ip; + sample->cpumode = intel_pt_cpumode(ptq, sample->ip, sample->addr); sample->period = 1; sample->flags = ptq->flags; @@ -1271,9 +1710,9 @@ static inline int intel_pt_opt_inject(struct intel_pt *pt, return intel_pt_inject_event(event, sample, type); } -static int intel_pt_deliver_synth_b_event(struct intel_pt *pt, - union perf_event *event, - struct perf_sample *sample, u64 type) +static int intel_pt_deliver_synth_event(struct intel_pt *pt, + union perf_event *event, + struct perf_sample *sample, u64 type) { int ret; @@ -1295,6 +1734,7 @@ static int intel_pt_synth_branch_sample(struct intel_pt_queue *ptq) struct perf_sample sample = { .ip = 0, }; struct dummy_branch_stack { u64 nr; + u64 hw_idx; struct branch_entry entries; } dummy_bs; @@ -1316,6 +1756,7 @@ static int intel_pt_synth_branch_sample(struct intel_pt_queue *ptq) if (pt->synth_opts.last_branch && sort__mode == SORT_MODE__BRANCH) { dummy_bs = (struct dummy_branch_stack){ .nr = 1, + .hw_idx = -1ULL, .entries = { .from = sample.ip, .to = sample.addr, @@ -1324,15 +1765,16 @@ static int intel_pt_synth_branch_sample(struct intel_pt_queue *ptq) sample.branch_stack = (struct branch_stack *)&dummy_bs; } - sample.cyc_cnt = ptq->ipc_cyc_cnt - ptq->last_br_cyc_cnt; + if (ptq->sample_ipc) + sample.cyc_cnt = ptq->ipc_cyc_cnt - ptq->last_br_cyc_cnt; if (sample.cyc_cnt) { sample.insn_cnt = ptq->ipc_insn_cnt - ptq->last_br_insn_cnt; ptq->last_br_insn_cnt = ptq->ipc_insn_cnt; ptq->last_br_cyc_cnt = ptq->ipc_cyc_cnt; } - return intel_pt_deliver_synth_b_event(pt, event, &sample, - pt->branches_sample_type); + return intel_pt_deliver_synth_event(pt, event, &sample, + pt->branches_sample_type); } static void intel_pt_prep_sample(struct intel_pt *pt, @@ -1350,27 +1792,12 @@ static void intel_pt_prep_sample(struct intel_pt *pt, } if (pt->synth_opts.last_branch) { - intel_pt_copy_last_branch_rb(ptq); + thread_stack__br_sample(ptq->thread, ptq->cpu, ptq->last_branch, + pt->br_stack_sz); sample->branch_stack = ptq->last_branch; } } -static inline int intel_pt_deliver_synth_event(struct intel_pt *pt, - struct intel_pt_queue *ptq, - union perf_event *event, - struct perf_sample *sample, - u64 type) -{ - int ret; - - ret = intel_pt_deliver_synth_b_event(pt, event, sample, type); - - if (pt->synth_opts.last_branch) - intel_pt_reset_last_branch_rb(ptq); - - return ret; -} - static int intel_pt_synth_instruction_sample(struct intel_pt_queue *ptq) { struct intel_pt *pt = ptq->pt; @@ -1384,9 +1811,13 @@ static int intel_pt_synth_instruction_sample(struct intel_pt_queue *ptq) sample.id = ptq->pt->instructions_id; sample.stream_id = ptq->pt->instructions_id; - sample.period = ptq->state->tot_insn_cnt - ptq->last_insn_cnt; + if (pt->synth_opts.quick) + sample.period = 1; + else + sample.period = ptq->state->tot_insn_cnt - ptq->last_insn_cnt; - sample.cyc_cnt = ptq->ipc_cyc_cnt - ptq->last_in_cyc_cnt; + if (ptq->sample_ipc) + sample.cyc_cnt = ptq->ipc_cyc_cnt - ptq->last_in_cyc_cnt; if (sample.cyc_cnt) { sample.insn_cnt = ptq->ipc_insn_cnt - ptq->last_in_insn_cnt; ptq->last_in_insn_cnt = ptq->ipc_insn_cnt; @@ -1395,7 +1826,7 @@ static int intel_pt_synth_instruction_sample(struct intel_pt_queue *ptq) ptq->last_insn_cnt = ptq->state->tot_insn_cnt; - return intel_pt_deliver_synth_event(pt, ptq, event, &sample, + return intel_pt_deliver_synth_event(pt, event, &sample, pt->instructions_sample_type); } @@ -1413,7 +1844,7 @@ static int intel_pt_synth_transaction_sample(struct intel_pt_queue *ptq) sample.id = ptq->pt->transactions_id; sample.stream_id = ptq->pt->transactions_id; - return intel_pt_deliver_synth_event(pt, ptq, event, &sample, + return intel_pt_deliver_synth_event(pt, event, &sample, pt->transactions_sample_type); } @@ -1454,7 +1885,7 @@ static int intel_pt_synth_ptwrite_sample(struct intel_pt_queue *ptq) sample.raw_size = perf_synth__raw_size(raw); sample.raw_data = perf_synth__raw_data(&raw); - return intel_pt_deliver_synth_event(pt, ptq, event, &sample, + return intel_pt_deliver_synth_event(pt, event, &sample, pt->ptwrites_sample_type); } @@ -1484,7 +1915,33 @@ static int intel_pt_synth_cbr_sample(struct intel_pt_queue *ptq) sample.raw_size = perf_synth__raw_size(raw); sample.raw_data = perf_synth__raw_data(&raw); - return intel_pt_deliver_synth_event(pt, ptq, event, &sample, + return intel_pt_deliver_synth_event(pt, event, &sample, + pt->pwr_events_sample_type); +} + +static int intel_pt_synth_psb_sample(struct intel_pt_queue *ptq) +{ + struct intel_pt *pt = ptq->pt; + union perf_event *event = ptq->event_buf; + struct perf_sample sample = { .ip = 0, }; + struct perf_synth_intel_psb raw; + + if (intel_pt_skip_event(pt)) + return 0; + + intel_pt_prep_p_sample(pt, ptq, event, &sample); + + sample.id = ptq->pt->psb_id; + sample.stream_id = ptq->pt->psb_id; + sample.flags = 0; + + raw.reserved = 0; + raw.offset = ptq->state->psb_offset; + + sample.raw_size = perf_synth__raw_size(raw); + sample.raw_data = perf_synth__raw_data(&raw); + + return intel_pt_deliver_synth_event(pt, event, &sample, pt->pwr_events_sample_type); } @@ -1509,7 +1966,7 @@ static int intel_pt_synth_mwait_sample(struct intel_pt_queue *ptq) sample.raw_size = perf_synth__raw_size(raw); sample.raw_data = perf_synth__raw_data(&raw); - return intel_pt_deliver_synth_event(pt, ptq, event, &sample, + return intel_pt_deliver_synth_event(pt, event, &sample, pt->pwr_events_sample_type); } @@ -1534,7 +1991,7 @@ static int intel_pt_synth_pwre_sample(struct intel_pt_queue *ptq) sample.raw_size = perf_synth__raw_size(raw); sample.raw_data = perf_synth__raw_data(&raw); - return intel_pt_deliver_synth_event(pt, ptq, event, &sample, + return intel_pt_deliver_synth_event(pt, event, &sample, pt->pwr_events_sample_type); } @@ -1559,7 +2016,7 @@ static int intel_pt_synth_exstop_sample(struct intel_pt_queue *ptq) sample.raw_size = perf_synth__raw_size(raw); sample.raw_data = perf_synth__raw_data(&raw); - return intel_pt_deliver_synth_event(pt, ptq, event, &sample, + return intel_pt_deliver_synth_event(pt, event, &sample, pt->pwr_events_sample_type); } @@ -1584,7 +2041,7 @@ static int intel_pt_synth_pwrx_sample(struct intel_pt_queue *ptq) sample.raw_size = perf_synth__raw_size(raw); sample.raw_data = perf_synth__raw_data(&raw); - return intel_pt_deliver_synth_event(pt, ptq, event, &sample, + return intel_pt_deliver_synth_event(pt, event, &sample, pt->pwr_events_sample_type); } @@ -1678,15 +2135,14 @@ static u64 intel_pt_lbr_flags(u64 info) union { struct branch_flags flags; u64 result; - } u = { - .flags = { - .mispred = !!(info & LBR_INFO_MISPRED), - .predicted = !(info & LBR_INFO_MISPRED), - .in_tx = !!(info & LBR_INFO_IN_TX), - .abort = !!(info & LBR_INFO_ABORT), - .cycles = info & LBR_INFO_CYCLES, - } - }; + } u; + + u.result = 0; + u.flags.mispred = !!(info & LBR_INFO_MISPRED); + u.flags.predicted = !(info & LBR_INFO_MISPRED); + u.flags.in_tx = !!(info & LBR_INFO_IN_TX); + u.flags.abort = !!(info & LBR_INFO_ABORT); + u.flags.cycles = info & LBR_INFO_CYCLES; return u.result; } @@ -1716,19 +2172,15 @@ static void intel_pt_add_lbrs(struct branch_stack *br_stack, } } -/* INTEL_PT_LBR_0, INTEL_PT_LBR_1 and INTEL_PT_LBR_2 */ -#define LBRS_MAX (INTEL_PT_BLK_ITEM_ID_CNT * 3) - -static int intel_pt_synth_pebs_sample(struct intel_pt_queue *ptq) +static int intel_pt_do_synth_pebs_sample(struct intel_pt_queue *ptq, struct evsel *evsel, u64 id) { const struct intel_pt_blk_items *items = &ptq->state->items; struct perf_sample sample = { .ip = 0, }; union perf_event *event = ptq->event_buf; struct intel_pt *pt = ptq->pt; - struct evsel *evsel = pt->pebs_evsel; u64 sample_type = evsel->core.attr.sample_type; - u64 id = evsel->core.id[0]; u8 cpumode; + u64 regs[8 * sizeof(sample.intr_regs.mask)]; if (intel_pt_skip_event(pt)) return 0; @@ -1749,10 +2201,7 @@ static int intel_pt_synth_pebs_sample(struct intel_pt_queue *ptq) else sample.ip = ptq->state->from_ip; - /* No support for guest mode at this time */ - cpumode = sample.ip < ptq->pt->kernel_start ? - PERF_RECORD_MISC_USER : - PERF_RECORD_MISC_KERNEL; + cpumode = intel_pt_cpumode(ptq, sample.ip, 0); event->sample.header.misc = cpumode | PERF_RECORD_MISC_EXACT_IP; @@ -1778,8 +2227,8 @@ static int intel_pt_synth_pebs_sample(struct intel_pt_queue *ptq) } if (sample_type & PERF_SAMPLE_REGS_INTR && - items->mask[INTEL_PT_GP_REGS_POS]) { - u64 regs[sizeof(sample.intr_regs.mask)]; + (items->mask[INTEL_PT_GP_REGS_POS] || + items->mask[INTEL_PT_XMM_POS])) { u64 regs_mask = evsel->core.attr.sample_regs_intr; u64 *pos; @@ -1794,35 +2243,47 @@ static int intel_pt_synth_pebs_sample(struct intel_pt_queue *ptq) } if (sample_type & PERF_SAMPLE_BRANCH_STACK) { - struct { - struct branch_stack br_stack; - struct branch_entry entries[LBRS_MAX]; - } br; - if (items->mask[INTEL_PT_LBR_0_POS] || items->mask[INTEL_PT_LBR_1_POS] || items->mask[INTEL_PT_LBR_2_POS]) { - intel_pt_add_lbrs(&br.br_stack, items); - sample.branch_stack = &br.br_stack; + intel_pt_add_lbrs(ptq->last_branch, items); } else if (pt->synth_opts.last_branch) { - intel_pt_copy_last_branch_rb(ptq); - sample.branch_stack = ptq->last_branch; + thread_stack__br_sample(ptq->thread, ptq->cpu, + ptq->last_branch, + pt->br_stack_sz); } else { - br.br_stack.nr = 0; - sample.branch_stack = &br.br_stack; + ptq->last_branch->nr = 0; } + sample.branch_stack = ptq->last_branch; } if (sample_type & PERF_SAMPLE_ADDR && items->has_mem_access_address) sample.addr = items->mem_access_address; - if (sample_type & PERF_SAMPLE_WEIGHT) { + if (sample_type & PERF_SAMPLE_WEIGHT_TYPE) { /* * Refer kernel's setup_pebs_adaptive_sample_data() and * intel_hsw_weight(). */ - if (items->has_mem_access_latency) - sample.weight = items->mem_access_latency; + if (items->has_mem_access_latency) { + u64 weight = items->mem_access_latency >> 32; + + /* + * Starts from SPR, the mem access latency field + * contains both cache latency [47:32] and instruction + * latency [15:0]. The cache latency is the same as the + * mem access latency on previous platforms. + * + * In practice, no memory access could last than 4G + * cycles. Use latency >> 32 to distinguish the + * different format of the mem access latency field. + */ + if (weight > 0) { + sample.weight = weight & 0xffff; + sample.ins_lat = items->mem_access_latency & 0xffff; + } else + sample.weight = items->mem_access_latency; + } if (!sample.weight && items->has_tsx_aux_info) { /* Cycles last block */ sample.weight = (u32)items->tsx_aux_info; @@ -1840,20 +2301,154 @@ static int intel_pt_synth_pebs_sample(struct intel_pt_queue *ptq) sample.transaction = txn; } - return intel_pt_deliver_synth_event(pt, ptq, event, &sample, sample_type); + return intel_pt_deliver_synth_event(pt, event, &sample, sample_type); +} + +static int intel_pt_synth_single_pebs_sample(struct intel_pt_queue *ptq) +{ + struct intel_pt *pt = ptq->pt; + struct evsel *evsel = pt->pebs_evsel; + u64 id = evsel->core.id[0]; + + return intel_pt_do_synth_pebs_sample(ptq, evsel, id); +} + +static int intel_pt_synth_pebs_sample(struct intel_pt_queue *ptq) +{ + const struct intel_pt_blk_items *items = &ptq->state->items; + struct intel_pt_pebs_event *pe; + struct intel_pt *pt = ptq->pt; + int err = -EINVAL; + int hw_id; + + if (!items->has_applicable_counters || !items->applicable_counters) { + if (!pt->single_pebs) + pr_err("PEBS-via-PT record with no applicable_counters\n"); + return intel_pt_synth_single_pebs_sample(ptq); + } + + for_each_set_bit(hw_id, (unsigned long *)&items->applicable_counters, INTEL_PT_MAX_PEBS) { + pe = &ptq->pebs[hw_id]; + if (!pe->evsel) { + if (!pt->single_pebs) + pr_err("PEBS-via-PT record with no matching event, hw_id %d\n", + hw_id); + return intel_pt_synth_single_pebs_sample(ptq); + } + err = intel_pt_do_synth_pebs_sample(ptq, pe->evsel, pe->id); + if (err) + return err; + } + + return err; +} + +static int intel_pt_synth_events_sample(struct intel_pt_queue *ptq) +{ + struct intel_pt *pt = ptq->pt; + union perf_event *event = ptq->event_buf; + struct perf_sample sample = { .ip = 0, }; + struct { + struct perf_synth_intel_evt cfe; + struct perf_synth_intel_evd evd[INTEL_PT_MAX_EVDS]; + } raw; + int i; + + if (intel_pt_skip_event(pt)) + return 0; + + intel_pt_prep_p_sample(pt, ptq, event, &sample); + + sample.id = ptq->pt->evt_id; + sample.stream_id = ptq->pt->evt_id; + + raw.cfe.type = ptq->state->cfe_type; + raw.cfe.reserved = 0; + raw.cfe.ip = !!(ptq->state->flags & INTEL_PT_FUP_IP); + raw.cfe.vector = ptq->state->cfe_vector; + raw.cfe.evd_cnt = ptq->state->evd_cnt; + + for (i = 0; i < ptq->state->evd_cnt; i++) { + raw.evd[i].et = 0; + raw.evd[i].evd_type = ptq->state->evd[i].type; + raw.evd[i].payload = ptq->state->evd[i].payload; + } + + sample.raw_size = perf_synth__raw_size(raw) + + ptq->state->evd_cnt * sizeof(struct perf_synth_intel_evd); + sample.raw_data = perf_synth__raw_data(&raw); + + return intel_pt_deliver_synth_event(pt, event, &sample, + pt->evt_sample_type); +} + +static int intel_pt_synth_iflag_chg_sample(struct intel_pt_queue *ptq) +{ + struct intel_pt *pt = ptq->pt; + union perf_event *event = ptq->event_buf; + struct perf_sample sample = { .ip = 0, }; + struct perf_synth_intel_iflag_chg raw; + + if (intel_pt_skip_event(pt)) + return 0; + + intel_pt_prep_p_sample(pt, ptq, event, &sample); + + sample.id = ptq->pt->iflag_chg_id; + sample.stream_id = ptq->pt->iflag_chg_id; + + raw.flags = 0; + raw.iflag = ptq->state->to_iflag; + + if (ptq->state->type & INTEL_PT_BRANCH) { + raw.via_branch = 1; + raw.branch_ip = ptq->state->to_ip; + } else { + sample.addr = 0; + } + sample.flags = ptq->flags; + + sample.raw_size = perf_synth__raw_size(raw); + sample.raw_data = perf_synth__raw_data(&raw); + + return intel_pt_deliver_synth_event(pt, event, &sample, + pt->iflag_chg_sample_type); } static int intel_pt_synth_error(struct intel_pt *pt, int code, int cpu, - pid_t pid, pid_t tid, u64 ip, u64 timestamp) + pid_t pid, pid_t tid, u64 ip, u64 timestamp, + pid_t machine_pid, int vcpu) { + bool dump_log_on_error = pt->synth_opts.log_plus_flags & AUXTRACE_LOG_FLG_ON_ERROR; + bool log_on_stdout = pt->synth_opts.log_plus_flags & AUXTRACE_LOG_FLG_USE_STDOUT; union perf_event event; char msg[MAX_AUXTRACE_ERROR_MSG]; int err; + if (pt->synth_opts.error_minus_flags) { + if (code == INTEL_PT_ERR_OVR && + pt->synth_opts.error_minus_flags & AUXTRACE_ERR_FLG_OVERFLOW) + return 0; + if (code == INTEL_PT_ERR_LOST && + pt->synth_opts.error_minus_flags & AUXTRACE_ERR_FLG_DATA_LOST) + return 0; + } + intel_pt__strerror(code, msg, MAX_AUXTRACE_ERROR_MSG); - auxtrace_synth_error(&event.auxtrace_error, PERF_AUXTRACE_ERROR_ITRACE, - code, cpu, pid, tid, ip, msg, timestamp); + auxtrace_synth_guest_error(&event.auxtrace_error, PERF_AUXTRACE_ERROR_ITRACE, + code, cpu, pid, tid, ip, msg, timestamp, + machine_pid, vcpu); + + if (intel_pt_enable_logging && !log_on_stdout) { + FILE *fp = intel_pt_log_fp(); + + if (fp) + perf_event__fprintf_auxtrace_error(&event, fp); + } + + if (code != INTEL_PT_ERR_LOST && dump_log_on_error) + intel_pt_log_dump_buf(); err = perf_session__deliver_synth_event(pt->session, &event, NULL); if (err) @@ -1868,11 +2463,22 @@ static int intel_ptq_synth_error(struct intel_pt_queue *ptq, { struct intel_pt *pt = ptq->pt; u64 tm = ptq->timestamp; + pid_t machine_pid = 0; + pid_t pid = ptq->pid; + pid_t tid = ptq->tid; + int vcpu = -1; tm = pt->timeless_decoding ? 0 : tsc_to_perf_time(tm, &pt->tc); - return intel_pt_synth_error(pt, state->err, ptq->cpu, ptq->pid, - ptq->tid, state->from_ip, tm); + if (pt->have_guest_sideband && state->from_nr) { + machine_pid = ptq->guest_machine_pid; + vcpu = ptq->vcpu; + pid = ptq->guest_pid; + tid = ptq->guest_tid; + } + + return intel_pt_synth_error(pt, state->err, ptq->cpu, pid, tid, + state->from_ip, tm, machine_pid, vcpu); } static int intel_pt_next_tid(struct intel_pt *pt, struct intel_pt_queue *ptq) @@ -1920,15 +2526,20 @@ static int intel_pt_sample(struct intel_pt_queue *ptq) ptq->have_sample = false; - if (ptq->state->tot_cyc_cnt > ptq->ipc_cyc_cnt) { - /* - * Cycle count and instruction count only go together to create - * a valid IPC ratio when the cycle count changes. - */ + if (pt->synth_opts.approx_ipc) { + ptq->ipc_insn_cnt = ptq->state->tot_insn_cnt; + ptq->ipc_cyc_cnt = ptq->state->cycles; + ptq->sample_ipc = true; + } else { ptq->ipc_insn_cnt = ptq->state->tot_insn_cnt; ptq->ipc_cyc_cnt = ptq->state->tot_cyc_cnt; + ptq->sample_ipc = ptq->state->flags & INTEL_PT_SAMPLE_IPC; } + /* Ensure guest code maps are set up */ + if (symbol_conf.guest_code && (state->from_nr || state->to_nr)) + intel_pt_get_guest(ptq); + /* * Do PEBS first to allow for the possibility that the PEBS timestamp * precedes the current timestamp. @@ -1939,7 +2550,25 @@ static int intel_pt_sample(struct intel_pt_queue *ptq) return err; } + if (pt->synth_opts.intr_events) { + if (state->type & INTEL_PT_EVT) { + err = intel_pt_synth_events_sample(ptq); + if (err) + return err; + } + if (state->type & INTEL_PT_IFLAG_CHG) { + err = intel_pt_synth_iflag_chg_sample(ptq); + if (err) + return err; + } + } + if (pt->sample_pwr_events) { + if (state->type & INTEL_PT_PSB_EVT) { + err = intel_pt_synth_psb_sample(ptq); + if (err) + return err; + } if (ptq->state->cbr != ptq->cbr_seen) { err = intel_pt_synth_cbr_sample(ptq); if (err) @@ -1990,22 +2619,42 @@ static int intel_pt_sample(struct intel_pt_queue *ptq) if (!(state->type & INTEL_PT_BRANCH)) return 0; - if (pt->synth_opts.callchain || pt->synth_opts.thread_stack) - thread_stack__event(ptq->thread, ptq->cpu, ptq->flags, state->from_ip, - state->to_ip, ptq->insn_len, - state->trace_nr); - else + if (pt->use_thread_stack) { + thread_stack__event(ptq->thread, ptq->cpu, ptq->flags, + state->from_ip, state->to_ip, ptq->insn_len, + state->trace_nr, pt->callstack, + pt->br_stack_sz_plus, + pt->mispred_all); + } else { thread_stack__set_trace_nr(ptq->thread, ptq->cpu, state->trace_nr); + } if (pt->sample_branches) { - err = intel_pt_synth_branch_sample(ptq); + if (state->from_nr != state->to_nr && + state->from_ip && state->to_ip) { + struct intel_pt_state *st = (struct intel_pt_state *)state; + u64 to_ip = st->to_ip; + u64 from_ip = st->from_ip; + + /* + * perf cannot handle having different machines for ip + * and addr, so create 2 branches. + */ + st->to_ip = 0; + err = intel_pt_synth_branch_sample(ptq); + if (err) + return err; + st->from_ip = 0; + st->to_ip = to_ip; + err = intel_pt_synth_branch_sample(ptq); + st->from_ip = from_ip; + } else { + err = intel_pt_synth_branch_sample(ptq); + } if (err) return err; } - if (pt->synth_opts.last_branch) - intel_pt_update_last_branch_rb(ptq); - if (!ptq->sync_switch) return 0; @@ -2092,6 +2741,9 @@ static void intel_pt_enable_sync_switch(struct intel_pt *pt) { unsigned int i; + if (pt->sync_switch_not_supported) + return; + pt->sync_switch = true; for (i = 0; i < pt->queues.nr_queues; i++) { @@ -2103,6 +2755,23 @@ static void intel_pt_enable_sync_switch(struct intel_pt *pt) } } +static void intel_pt_disable_sync_switch(struct intel_pt *pt) +{ + unsigned int i; + + pt->sync_switch = false; + + for (i = 0; i < pt->queues.nr_queues; i++) { + struct auxtrace_queue *queue = &pt->queues.queue_array[i]; + struct intel_pt_queue *ptq = queue->priv; + + if (ptq) { + ptq->sync_switch = false; + intel_pt_next_tid(pt, ptq); + } + } +} + /* * To filter against time ranges, it is only necessary to look at the next start * or end time. @@ -2187,7 +2856,7 @@ static int intel_pt_run_decoder(struct intel_pt_queue *ptq, u64 *timestamp) if (pt->per_cpu_mmaps && (pt->have_sched_switch == 1 || pt->have_sched_switch == 3) && !pt->timeless_decoding && intel_pt_tracing_kernel(pt) && - !pt->sampling_mode) { + !pt->sampling_mode && !pt->synth_opts.vm_time_correlation) { pt->switch_ip = intel_pt_switch_ip(pt, &pt->ptss_ip); if (pt->switch_ip) { intel_pt_log("switch_ip: %"PRIx64" ptss_ip: %"PRIx64"\n", @@ -2213,6 +2882,7 @@ static int intel_pt_run_decoder(struct intel_pt_queue *ptq, u64 *timestamp) ptq->sync_switch = false; intel_pt_next_tid(pt, ptq); } + ptq->timestamp = state->est_timestamp; if (pt->synth_opts.errors) { err = intel_ptq_synth_error(ptq, state); if (err) @@ -2395,7 +3065,8 @@ static int intel_pt_process_timeless_sample(struct intel_pt *pt, static int intel_pt_lost(struct intel_pt *pt, struct perf_sample *sample) { return intel_pt_synth_error(pt, INTEL_PT_ERR_LOST, sample->cpu, - sample->pid, sample->tid, 0, sample->time); + sample->pid, sample->tid, 0, sample->time, + sample->machine_pid, sample->vcpu); } static struct intel_pt_queue *intel_pt_cpu_to_ptq(struct intel_pt *pt, int cpu) @@ -2474,15 +3145,14 @@ static int intel_pt_sync_switch(struct intel_pt *pt, int cpu, pid_t tid, static int intel_pt_process_switch(struct intel_pt *pt, struct perf_sample *sample) { - struct evsel *evsel; pid_t tid; int cpu, ret; + struct evsel *evsel = evlist__id2evsel(pt->session->evlist, sample->id); - evsel = perf_evlist__id2evsel(pt->session->evlist, sample->id); if (evsel != pt->switch_evsel) return 0; - tid = perf_evsel__intval(evsel, sample, "next_pid"); + tid = evsel__intval(evsel, sample, "next_pid"); cpu = sample->cpu; intel_pt_log("sched_switch: cpu %d tid %d time %"PRIu64" tsc %#"PRIx64"\n", @@ -2534,6 +3204,33 @@ static int intel_pt_context_switch_in(struct intel_pt *pt, return machine__set_current_tid(pt->machine, cpu, pid, tid); } +static int intel_pt_guest_context_switch(struct intel_pt *pt, + union perf_event *event, + struct perf_sample *sample) +{ + bool out = event->header.misc & PERF_RECORD_MISC_SWITCH_OUT; + struct machines *machines = &pt->session->machines; + struct machine *machine = machines__find(machines, sample->machine_pid); + + pt->have_guest_sideband = true; + + /* + * sync_switch cannot handle guest machines at present, so just disable + * it. + */ + pt->sync_switch_not_supported = true; + if (pt->sync_switch) + intel_pt_disable_sync_switch(pt); + + if (out) + return 0; + + if (!machine) + return -EINVAL; + + return machine__set_current_tid(machine, sample->vcpu, sample->pid, sample->tid); +} + static int intel_pt_context_switch(struct intel_pt *pt, union perf_event *event, struct perf_sample *sample) { @@ -2541,6 +3238,9 @@ static int intel_pt_context_switch(struct intel_pt *pt, union perf_event *event, pid_t pid, tid; int cpu, ret; + if (perf_event__is_guest(event)) + return intel_pt_guest_context_switch(pt, event, sample); + cpu = sample->cpu; if (pt->have_sched_switch == 3) { @@ -2559,14 +3259,8 @@ static int intel_pt_context_switch(struct intel_pt *pt, union perf_event *event, tid = sample->tid; } - if (tid == -1) { - pr_err("context_switch event has no tid\n"); - return -EINVAL; - } - - intel_pt_log("context_switch: cpu %d pid %d tid %d time %"PRIu64" tsc %#"PRIx64"\n", - cpu, pid, tid, sample->time, perf_time_to_tsc(sample->time, - &pt->tc)); + if (tid == -1) + intel_pt_log("context_switch event has no tid\n"); ret = intel_pt_sync_switch(pt, cpu, tid, sample->time); if (ret <= 0) @@ -2592,6 +3286,91 @@ static int intel_pt_process_itrace_start(struct intel_pt *pt, event->itrace_start.tid); } +static int intel_pt_process_aux_output_hw_id(struct intel_pt *pt, + union perf_event *event, + struct perf_sample *sample) +{ + u64 hw_id = event->aux_output_hw_id.hw_id; + struct auxtrace_queue *queue; + struct intel_pt_queue *ptq; + struct evsel *evsel; + + queue = auxtrace_queues__sample_queue(&pt->queues, sample, pt->session); + evsel = evlist__id2evsel_strict(pt->session->evlist, sample->id); + if (!queue || !queue->priv || !evsel || hw_id > INTEL_PT_MAX_PEBS) { + pr_err("Bad AUX output hardware ID\n"); + return -EINVAL; + } + + ptq = queue->priv; + + ptq->pebs[hw_id].evsel = evsel; + ptq->pebs[hw_id].id = sample->id; + + return 0; +} + +static int intel_pt_find_map(struct thread *thread, u8 cpumode, u64 addr, + struct addr_location *al) +{ + if (!al->map || addr < al->map->start || addr >= al->map->end) { + if (!thread__find_map(thread, cpumode, addr, al)) + return -1; + } + + return 0; +} + +/* Invalidate all instruction cache entries that overlap the text poke */ +static int intel_pt_text_poke(struct intel_pt *pt, union perf_event *event) +{ + u8 cpumode = event->header.misc & PERF_RECORD_MISC_CPUMODE_MASK; + u64 addr = event->text_poke.addr + event->text_poke.new_len - 1; + /* Assume text poke begins in a basic block no more than 4096 bytes */ + int cnt = 4096 + event->text_poke.new_len; + struct thread *thread = pt->unknown_thread; + struct addr_location al = { .map = NULL }; + struct machine *machine = pt->machine; + struct intel_pt_cache_entry *e; + u64 offset; + + if (!event->text_poke.new_len) + return 0; + + for (; cnt; cnt--, addr--) { + if (intel_pt_find_map(thread, cpumode, addr, &al)) { + if (addr < event->text_poke.addr) + return 0; + continue; + } + + if (!al.map->dso || !al.map->dso->auxtrace_cache) + continue; + + offset = al.map->map_ip(al.map, addr); + + e = intel_pt_cache_lookup(al.map->dso, machine, offset); + if (!e) + continue; + + if (addr + e->byte_cnt + e->length <= event->text_poke.addr) { + /* + * No overlap. Working backwards there cannot be another + * basic block that overlaps the text poke if there is a + * branch instruction before the text poke address. + */ + if (e->branch != INTEL_PT_BR_NO_BRANCH) + return 0; + } else { + intel_pt_cache_invalidate(al.map->dso, machine, offset); + intel_pt_log("Invalidated instruction cache for %s at %#"PRIx64"\n", + al.map->dso->long_name, addr); + } + } + + return 0; +} + static int intel_pt_process_event(struct perf_session *session, union perf_event *event, struct perf_sample *sample, @@ -2632,11 +3411,20 @@ static int intel_pt_process_event(struct perf_session *session, sample->time); } } else if (timestamp) { + if (!pt->first_timestamp) + intel_pt_first_timestamp(pt, timestamp); err = intel_pt_process_queues(pt, timestamp); } if (err) return err; + if (event->header.type == PERF_RECORD_SAMPLE) { + if (pt->synth_opts.add_callchain && !sample->callchain) + intel_pt_add_callchain(pt, sample); + if (pt->synth_opts.add_last_branch && !sample->branch_stack) + intel_pt_add_br_stack(pt, sample); + } + if (event->header.type == PERF_RECORD_AUX && (event->aux.flags & PERF_AUX_FLAG_TRUNCATED) && pt->synth_opts.errors) { @@ -2649,13 +3437,20 @@ static int intel_pt_process_event(struct perf_session *session, err = intel_pt_process_switch(pt, sample); else if (event->header.type == PERF_RECORD_ITRACE_START) err = intel_pt_process_itrace_start(pt, event, sample); + else if (event->header.type == PERF_RECORD_AUX_OUTPUT_HW_ID) + err = intel_pt_process_aux_output_hw_id(pt, event, sample); else if (event->header.type == PERF_RECORD_SWITCH || event->header.type == PERF_RECORD_SWITCH_CPU_WIDE) err = intel_pt_context_switch(pt, event, sample); - intel_pt_log("event %u: cpu %d time %"PRIu64" tsc %#"PRIx64" ", - event->header.type, sample->cpu, sample->time, timestamp); - intel_pt_log_event(event); + if (!err && event->header.type == PERF_RECORD_TEXT_POKE) + err = intel_pt_text_poke(pt, event); + + if (intel_pt_enable_logging && intel_pt_log_events(pt, sample->time)) { + intel_pt_log("event %u: cpu %d time %"PRIu64" tsc %#"PRIx64" ", + event->header.type, sample->cpu, sample->time, timestamp); + intel_pt_log_event(event); + } return err; } @@ -2706,13 +3501,24 @@ static void intel_pt_free(struct perf_session *session) auxtrace_heap__free(&pt->heap); intel_pt_free_events(session); session->auxtrace = NULL; + intel_pt_free_vmcs_info(pt); thread__put(pt->unknown_thread); addr_filters__exit(&pt->filts); + zfree(&pt->chain); zfree(&pt->filter); zfree(&pt->time_ranges); free(pt); } +static bool intel_pt_evsel_is_auxtrace(struct perf_session *session, + struct evsel *evsel) +{ + struct intel_pt *pt = container_of(session->auxtrace, struct intel_pt, + auxtrace); + + return evsel->core.attr.type == pt->pmu_type; +} + static int intel_pt_process_auxtrace_event(struct perf_session *session, union perf_event *event, struct perf_tool *tool __maybe_unused) @@ -2894,8 +3700,15 @@ static int intel_pt_synth_events(struct intel_pt *pt, if (pt->synth_opts.callchain) attr.sample_type |= PERF_SAMPLE_CALLCHAIN; - if (pt->synth_opts.last_branch) + if (pt->synth_opts.last_branch) { attr.sample_type |= PERF_SAMPLE_BRANCH_STACK; + /* + * We don't use the hardware index, but the sample generation + * code uses the new format branch_stack with this field, + * so the event attributes must indicate that it's present. + */ + attr.branch_sample_type |= PERF_SAMPLE_BRANCH_HW_INDEX; + } if (pt->synth_opts.instructions) { attr.config = PERF_COUNT_HW_INSTRUCTIONS; @@ -2954,9 +3767,17 @@ static int intel_pt_synth_events(struct intel_pt *pt, pt->cbr_id = id; intel_pt_set_event_name(evlist, id, "cbr"); id += 1; + + attr.config = PERF_SYNTH_INTEL_PSB; + err = intel_pt_synth_event(session, "psb", &attr, id); + if (err) + return err; + pt->psb_id = id; + intel_pt_set_event_name(evlist, id, "psb"); + id += 1; } - if (pt->synth_opts.pwr_events && (evsel->core.attr.config & 0x10)) { + if (pt->synth_opts.pwr_events && (evsel->core.attr.config & INTEL_PT_CFG_PWR_EVT_EN)) { attr.config = PERF_SYNTH_INTEL_MWAIT; err = intel_pt_synth_event(session, "mwait", &attr, id); if (err) @@ -2990,6 +3811,28 @@ static int intel_pt_synth_events(struct intel_pt *pt, id += 1; } + if (pt->synth_opts.intr_events && (evsel->core.attr.config & INTEL_PT_CFG_EVT_EN)) { + attr.config = PERF_SYNTH_INTEL_EVT; + err = intel_pt_synth_event(session, "evt", &attr, id); + if (err) + return err; + pt->evt_sample_type = attr.sample_type; + pt->evt_id = id; + intel_pt_set_event_name(evlist, id, "evt"); + id += 1; + } + + if (pt->synth_opts.intr_events && pt->cap_event_trace) { + attr.config = PERF_SYNTH_INTEL_IFLAG_CHG; + err = intel_pt_synth_event(session, "iflag", &attr, id); + if (err) + return err; + pt->iflag_chg_sample_type = attr.sample_type; + pt->iflag_chg_id = id; + intel_pt_set_event_name(evlist, id, "iflag"); + id += 1; + } + return 0; } @@ -3002,9 +3845,13 @@ static void intel_pt_setup_pebs_events(struct intel_pt *pt) evlist__for_each_entry(pt->session->evlist, evsel) { if (evsel->core.attr.aux_output && evsel->core.id) { + if (pt->single_pebs) { + pt->single_pebs = false; + return; + } + pt->single_pebs = true; pt->sample_pebs = true; pt->pebs_evsel = evsel; - return; } } } @@ -3014,7 +3861,7 @@ static struct evsel *intel_pt_find_sched_switch(struct evlist *evlist) struct evsel *evsel; evlist__for_each_entry_reverse(evlist, evsel) { - const char *name = perf_evsel__name(evsel); + const char *name = evsel__name(evsel); if (!strcmp(name, "sched:sched_switch")) return evsel; @@ -3042,6 +3889,9 @@ static int intel_pt_perf_config(const char *var, const char *value, void *data) if (!strcmp(var, "intel-pt.mispred-all")) pt->mispred_all = perf_config_bool(var, value); + if (!strcmp(var, "intel-pt.max-loops")) + perf_config_int(&pt->max_loops, var, value); + return 0; } @@ -3124,6 +3974,66 @@ static int intel_pt_setup_time_ranges(struct intel_pt *pt, return 0; } +static int intel_pt_parse_vm_tm_corr_arg(struct intel_pt *pt, char **args) +{ + struct intel_pt_vmcs_info *vmcs_info; + u64 tsc_offset, vmcs; + char *p = *args; + + errno = 0; + + p = skip_spaces(p); + if (!*p) + return 1; + + tsc_offset = strtoull(p, &p, 0); + if (errno) + return -errno; + p = skip_spaces(p); + if (*p != ':') { + pt->dflt_tsc_offset = tsc_offset; + *args = p; + return 0; + } + p += 1; + while (1) { + vmcs = strtoull(p, &p, 0); + if (errno) + return -errno; + if (!vmcs) + return -EINVAL; + vmcs_info = intel_pt_findnew_vmcs(&pt->vmcs_info, vmcs, tsc_offset); + if (!vmcs_info) + return -ENOMEM; + p = skip_spaces(p); + if (*p != ',') + break; + p += 1; + } + *args = p; + return 0; +} + +static int intel_pt_parse_vm_tm_corr_args(struct intel_pt *pt) +{ + char *args = pt->synth_opts.vm_tm_corr_args; + int ret; + + if (!args) + return 0; + + do { + ret = intel_pt_parse_vm_tm_corr_arg(pt, &args); + } while (!ret); + + if (ret < 0) { + pr_err("Failed to parse VM Time Correlation options\n"); + return ret; + } + + return 0; +} + static const char * const intel_pt_info_fmts[] = { [INTEL_PT_PMU_TYPE] = " PMU Type %"PRId64"\n", [INTEL_PT_TIME_SHIFT] = " Time Shift %"PRIu64"\n", @@ -3136,6 +4046,7 @@ static const char * const intel_pt_info_fmts[] = { [INTEL_PT_SNAPSHOT_MODE] = " Snapshot mode %"PRId64"\n", [INTEL_PT_PER_CPU_MMAPS] = " Per-cpu maps %"PRId64"\n", [INTEL_PT_MTC_BIT] = " MTC bit %#"PRIx64"\n", + [INTEL_PT_MTC_FREQ_BITS] = " MTC freq bits %#"PRIx64"\n", [INTEL_PT_TSC_CTC_N] = " TSC:CTC numerator %"PRIu64"\n", [INTEL_PT_TSC_CTC_D] = " TSC:CTC denominator %"PRIu64"\n", [INTEL_PT_CYC_BIT] = " CYC bit %#"PRIx64"\n", @@ -3150,8 +4061,12 @@ static void intel_pt_print_info(__u64 *arr, int start, int finish) if (!dump_trace) return; - for (i = start; i <= finish; i++) - fprintf(stdout, intel_pt_info_fmts[i], arr[i]); + for (i = start; i <= finish; i++) { + const char *fmt = intel_pt_info_fmts[i]; + + if (fmt) + fprintf(stdout, fmt, arr[i]); + } } static void intel_pt_print_info_str(const char *name, const char *str) @@ -3186,6 +4101,8 @@ int intel_pt_process_auxtrace_info(union perf_event *event, if (!pt) return -ENOMEM; + pt->vmcs_info = RB_ROOT; + addr_filters__init(&pt->filts); err = perf_config(intel_pt_perf_config, pt); @@ -3196,7 +4113,22 @@ int intel_pt_process_auxtrace_info(union perf_event *event, if (err) goto err_free; - intel_pt_log_set_name(INTEL_PT_PMU_NAME); + if (session->itrace_synth_opts->set) { + pt->synth_opts = *session->itrace_synth_opts; + } else { + struct itrace_synth_opts *opts = session->itrace_synth_opts; + + itrace_synth_opts__set_default(&pt->synth_opts, opts->default_no_sample); + if (!opts->default_no_sample && !opts->inject) { + pt->synth_opts.branches = false; + pt->synth_opts.callchain = true; + pt->synth_opts.add_callchain = true; + } + pt->synth_opts.thread_stack = opts->thread_stack; + } + + if (!(pt->synth_opts.log_plus_flags & AUXTRACE_LOG_FLG_USE_STDOUT)) + intel_pt_log_set_name(INTEL_PT_PMU_NAME); pt->session = session; pt->machine = &session->machines.host; /* No kvm support */ @@ -3233,7 +4165,7 @@ int intel_pt_process_auxtrace_info(union perf_event *event, } info = &auxtrace_info->priv[INTEL_PT_FILTER_STR_LEN] + 1; - info_end = (void *)info + auxtrace_info->header.size; + info_end = (void *)auxtrace_info + auxtrace_info->header.size; if (intel_pt_has(auxtrace_info, INTEL_PT_FILTER_STR_LEN)) { size_t len; @@ -3272,6 +4204,13 @@ int intel_pt_process_auxtrace_info(union perf_event *event, intel_pt_print_info_str("Filter string", pt->filter); } + if ((void *)info < info_end) { + pt->cap_event_trace = *info++; + if (dump_trace) + fprintf(stdout, " Cap Event Trace %d\n", + pt->cap_event_trace); + } + pt->timeless_decoding = intel_pt_timeless_decoding(pt); if (pt->timeless_decoding && !pt->tc.time_mult) pt->tc.time_mult = 1; @@ -3279,6 +4218,28 @@ int intel_pt_process_auxtrace_info(union perf_event *event, pt->sampling_mode = intel_pt_sampling_mode(pt); pt->est_tsc = !pt->timeless_decoding; + if (pt->synth_opts.vm_time_correlation) { + if (pt->timeless_decoding) { + pr_err("Intel PT has no time information for VM Time Correlation\n"); + err = -EINVAL; + goto err_free_queues; + } + if (session->itrace_synth_opts->ptime_range) { + pr_err("Time ranges cannot be specified with VM Time Correlation\n"); + err = -EINVAL; + goto err_free_queues; + } + /* Currently TSC Offset is calculated using MTC packets */ + if (!intel_pt_have_mtc(pt)) { + pr_err("MTC packets must have been enabled for VM Time Correlation\n"); + err = -EINVAL; + goto err_free_queues; + } + err = intel_pt_parse_vm_tm_corr_args(pt); + if (err) + goto err_free_queues; + } + pt->unknown_thread = thread__new(999999999, 999999999); if (!pt->unknown_thread) { err = -ENOMEM; @@ -3288,7 +4249,7 @@ int intel_pt_process_auxtrace_info(union perf_event *event, /* * Since this thread will not be kept in any rbtree not in a * list, initialize its list node so that at thread__put() the - * current thread lifetime assuption is kept and we don't segfault + * current thread lifetime assumption is kept and we don't segfault * at list_del_init(). */ INIT_LIST_HEAD(&pt->unknown_thread->node); @@ -3308,6 +4269,7 @@ int intel_pt_process_auxtrace_info(union perf_event *event, pt->auxtrace.flush_events = intel_pt_flush; pt->auxtrace.free_events = intel_pt_free_events; pt->auxtrace.free = intel_pt_free; + pt->auxtrace.evsel_is_auxtrace = intel_pt_evsel_is_auxtrace; session->auxtrace = &pt->auxtrace; if (dump_trace) @@ -3327,22 +4289,12 @@ int intel_pt_process_auxtrace_info(union perf_event *event, goto err_delete_thread; } - if (session->itrace_synth_opts->set) { - pt->synth_opts = *session->itrace_synth_opts; - } else { - itrace_synth_opts__set_default(&pt->synth_opts, - session->itrace_synth_opts->default_no_sample); - if (!session->itrace_synth_opts->default_no_sample && - !session->itrace_synth_opts->inject) { - pt->synth_opts.branches = false; - pt->synth_opts.callchain = true; - } - pt->synth_opts.thread_stack = - session->itrace_synth_opts->thread_stack; - } + if (pt->synth_opts.log) { + bool log_on_error = pt->synth_opts.log_plus_flags & AUXTRACE_LOG_FLG_ON_ERROR; + unsigned int log_on_error_size = pt->synth_opts.log_on_error_size; - if (pt->synth_opts.log) - intel_pt_log_enable(); + intel_pt_log_enable(log_on_error, log_on_error_size); + } /* Maximum non-turbo ratio is TSC freq / 100 MHz */ if (pt->tc.time_mult) { @@ -3368,14 +4320,54 @@ int intel_pt_process_auxtrace_info(union perf_event *event, pt->branches_filter |= PERF_IP_FLAG_RETURN | PERF_IP_FLAG_TRACE_BEGIN; - if (pt->synth_opts.callchain && !symbol_conf.use_callchain) { + if ((pt->synth_opts.callchain || pt->synth_opts.add_callchain) && + !symbol_conf.use_callchain) { symbol_conf.use_callchain = true; if (callchain_register_param(&callchain_param) < 0) { symbol_conf.use_callchain = false; pt->synth_opts.callchain = false; + pt->synth_opts.add_callchain = false; } } + if (pt->synth_opts.add_callchain) { + err = intel_pt_callchain_init(pt); + if (err) + goto err_delete_thread; + } + + if (pt->synth_opts.last_branch || pt->synth_opts.add_last_branch) { + pt->br_stack_sz = pt->synth_opts.last_branch_sz; + pt->br_stack_sz_plus = pt->br_stack_sz; + } + + if (pt->synth_opts.add_last_branch) { + err = intel_pt_br_stack_init(pt); + if (err) + goto err_delete_thread; + /* + * Additional branch stack size to cater for tracing from the + * actual sample ip to where the sample time is recorded. + * Measured at about 200 branches, but generously set to 1024. + * If kernel space is not being traced, then add just 1 for the + * branch to kernel space. + */ + if (intel_pt_tracing_kernel(pt)) + pt->br_stack_sz_plus += 1024; + else + pt->br_stack_sz_plus += 1; + } + + pt->use_thread_stack = pt->synth_opts.callchain || + pt->synth_opts.add_callchain || + pt->synth_opts.thread_stack || + pt->synth_opts.last_branch || + pt->synth_opts.add_last_branch; + + pt->callstack = pt->synth_opts.callchain || + pt->synth_opts.add_callchain || + pt->synth_opts.thread_stack; + err = intel_pt_synth_events(pt, session); if (err) goto err_delete_thread; @@ -3398,6 +4390,7 @@ int intel_pt_process_auxtrace_info(union perf_event *event, return 0; err_delete_thread: + zfree(&pt->chain); thread__zput(pt->unknown_thread); err_free_queues: intel_pt_log_disable(); |