diff options
Diffstat (limited to '')
-rw-r--r-- | tools/perf/util/cs-etm.c | 974 |
1 files changed, 743 insertions, 231 deletions
diff --git a/tools/perf/util/cs-etm.c b/tools/perf/util/cs-etm.c index 5471045ebf5c..16db965ac995 100644 --- a/tools/perf/util/cs-etm.c +++ b/tools/perf/util/cs-etm.c @@ -7,6 +7,7 @@ */ #include <linux/bitops.h> +#include <linux/coresight-pmu.h> #include <linux/err.h> #include <linux/kernel.h> #include <linux/log2.h> @@ -37,8 +38,6 @@ #include <tools/libc_compat.h> #include "util/synthetic-events.h" -#define MAX_TIMESTAMP (~0ULL) - struct cs_etm_auxtrace { struct auxtrace auxtrace; struct auxtrace_queues queues; @@ -51,10 +50,9 @@ struct cs_etm_auxtrace { u8 timeless_decoding; u8 snapshot_mode; u8 data_queued; - u8 sample_branches; - u8 sample_instructions; int num_cpu; + u64 latest_kernel_timestamp; u32 auxtrace_type; u64 branches_sample_type; u64 branches_id; @@ -62,7 +60,6 @@ struct cs_etm_auxtrace { u64 instructions_sample_period; u64 instructions_id; u64 **metadata; - u64 kernel_start; unsigned int pmu_type; }; @@ -85,7 +82,7 @@ struct cs_etm_queue { struct cs_etm_decoder *decoder; struct auxtrace_buffer *buffer; unsigned int queue_nr; - u8 pending_timestamp; + u8 pending_timestamp_chan_id; u64 offset; const unsigned char *buf; size_t buf_len, buf_used; @@ -94,7 +91,9 @@ struct cs_etm_queue { struct cs_etm_traceid_queue **traceid_queues; }; -static int cs_etm__update_queues(struct cs_etm_auxtrace *etm); +/* RB tree for quick conversion between traceID and metadata pointers */ +static struct intlist *traceid_list; + static int cs_etm__process_queues(struct cs_etm_auxtrace *etm); static int cs_etm__process_timeless_queues(struct cs_etm_auxtrace *etm, pid_t tid); @@ -153,17 +152,58 @@ int cs_etm__get_cpu(u8 trace_chan_id, int *cpu) return 0; } +/* + * The returned PID format is presented by two bits: + * + * Bit ETM_OPT_CTXTID: CONTEXTIDR or CONTEXTIDR_EL1 is traced; + * Bit ETM_OPT_CTXTID2: CONTEXTIDR_EL2 is traced. + * + * It's possible that the two bits ETM_OPT_CTXTID and ETM_OPT_CTXTID2 + * are enabled at the same time when the session runs on an EL2 kernel. + * This means the CONTEXTIDR_EL1 and CONTEXTIDR_EL2 both will be + * recorded in the trace data, the tool will selectively use + * CONTEXTIDR_EL2 as PID. + */ +int cs_etm__get_pid_fmt(u8 trace_chan_id, u64 *pid_fmt) +{ + struct int_node *inode; + u64 *metadata, val; + + inode = intlist__find(traceid_list, trace_chan_id); + if (!inode) + return -EINVAL; + + metadata = inode->priv; + + if (metadata[CS_ETM_MAGIC] == __perf_cs_etmv3_magic) { + val = metadata[CS_ETM_ETMCR]; + /* CONTEXTIDR is traced */ + if (val & BIT(ETM_OPT_CTXTID)) + *pid_fmt = BIT(ETM_OPT_CTXTID); + } else { + val = metadata[CS_ETMV4_TRCCONFIGR]; + /* CONTEXTIDR_EL2 is traced */ + if (val & (BIT(ETM4_CFG_BIT_VMID) | BIT(ETM4_CFG_BIT_VMID_OPT))) + *pid_fmt = BIT(ETM_OPT_CTXTID2); + /* CONTEXTIDR_EL1 is traced */ + else if (val & BIT(ETM4_CFG_BIT_CTXTID)) + *pid_fmt = BIT(ETM_OPT_CTXTID); + } + + return 0; +} + void cs_etm__etmq_set_traceid_queue_timestamp(struct cs_etm_queue *etmq, u8 trace_chan_id) { /* - * Wnen a timestamp packet is encountered the backend code + * When a timestamp packet is encountered the backend code * is stopped so that the front end has time to process packets * that were accumulated in the traceID queue. Since there can * be more than one channel per cs_etm_queue, we need to specify * what traceID queue needs servicing. */ - etmq->pending_timestamp = trace_chan_id; + etmq->pending_timestamp_chan_id = trace_chan_id; } static u64 cs_etm__etmq_get_timestamp(struct cs_etm_queue *etmq, @@ -171,22 +211,22 @@ static u64 cs_etm__etmq_get_timestamp(struct cs_etm_queue *etmq, { struct cs_etm_packet_queue *packet_queue; - if (!etmq->pending_timestamp) + if (!etmq->pending_timestamp_chan_id) return 0; if (trace_chan_id) - *trace_chan_id = etmq->pending_timestamp; + *trace_chan_id = etmq->pending_timestamp_chan_id; packet_queue = cs_etm__etmq_get_packet_queue(etmq, - etmq->pending_timestamp); + etmq->pending_timestamp_chan_id); if (!packet_queue) return 0; /* Acknowledge pending status */ - etmq->pending_timestamp = 0; + etmq->pending_timestamp_chan_id = 0; /* See function cs_etm_decoder__do_{hard|soft}_timestamp() */ - return packet_queue->timestamp; + return packet_queue->cs_timestamp; } static void cs_etm__clear_packet_queue(struct cs_etm_packet_queue *queue) @@ -363,6 +403,23 @@ struct cs_etm_packet_queue return NULL; } +static void cs_etm__packet_swap(struct cs_etm_auxtrace *etm, + struct cs_etm_traceid_queue *tidq) +{ + struct cs_etm_packet *tmp; + + if (etm->synth_opts.branches || etm->synth_opts.last_branch || + etm->synth_opts.instructions) { + /* + * Swap PACKET with PREV_PACKET: PACKET becomes PREV_PACKET for + * the next incoming packet. + */ + tmp = tidq->packet; + tidq->packet = tidq->prev_packet; + tidq->prev_packet = tmp; + } +} + static void cs_etm__packet_dump(const char *pkt_string) { const char *color = PERF_COLOR_BLUE; @@ -401,14 +458,30 @@ static void cs_etm__set_trace_param_etmv4(struct cs_etm_trace_params *t_params, t_params[idx].etmv4.reg_traceidr = metadata[idx][CS_ETMV4_TRCTRACEIDR]; } +static void cs_etm__set_trace_param_ete(struct cs_etm_trace_params *t_params, + struct cs_etm_auxtrace *etm, int idx) +{ + u64 **metadata = etm->metadata; + + t_params[idx].protocol = CS_ETM_PROTO_ETE; + t_params[idx].ete.reg_idr0 = metadata[idx][CS_ETMV4_TRCIDR0]; + t_params[idx].ete.reg_idr1 = metadata[idx][CS_ETMV4_TRCIDR1]; + t_params[idx].ete.reg_idr2 = metadata[idx][CS_ETMV4_TRCIDR2]; + t_params[idx].ete.reg_idr8 = metadata[idx][CS_ETMV4_TRCIDR8]; + t_params[idx].ete.reg_configr = metadata[idx][CS_ETMV4_TRCCONFIGR]; + t_params[idx].ete.reg_traceidr = metadata[idx][CS_ETMV4_TRCTRACEIDR]; + t_params[idx].ete.reg_devarch = metadata[idx][CS_ETE_TRCDEVARCH]; +} + static int cs_etm__init_trace_params(struct cs_etm_trace_params *t_params, - struct cs_etm_auxtrace *etm) + struct cs_etm_auxtrace *etm, + int decoders) { int i; u32 etmidr; u64 architecture; - for (i = 0; i < etm->num_cpu; i++) { + for (i = 0; i < decoders; i++) { architecture = etm->metadata[i][CS_ETM_MAGIC]; switch (architecture) { @@ -419,6 +492,9 @@ static int cs_etm__init_trace_params(struct cs_etm_trace_params *t_params, case __perf_cs_etmv4_magic: cs_etm__set_trace_param_etmv4(t_params, etm, i); break; + case __perf_cs_ete_magic: + cs_etm__set_trace_param_ete(t_params, etm, i); + break; default: return -EINVAL; } @@ -429,7 +505,8 @@ static int cs_etm__init_trace_params(struct cs_etm_trace_params *t_params, static int cs_etm__init_decoder_params(struct cs_etm_decoder_params *d_params, struct cs_etm_queue *etmq, - enum cs_etm_decoder_operation mode) + enum cs_etm_decoder_operation mode, + bool formatted) { int ret = -EINVAL; @@ -439,7 +516,7 @@ static int cs_etm__init_decoder_params(struct cs_etm_decoder_params *d_params, d_params->packet_printer = cs_etm__packet_dump; d_params->operation = mode; d_params->data = etmq; - d_params->formatted = true; + d_params->formatted = formatted; d_params->fsyncs = false; d_params->hsyncs = false; d_params->frame_aligned = true; @@ -449,44 +526,23 @@ out: return ret; } -static void cs_etm__dump_event(struct cs_etm_auxtrace *etm, +static void cs_etm__dump_event(struct cs_etm_queue *etmq, struct auxtrace_buffer *buffer) { int ret; const char *color = PERF_COLOR_BLUE; - struct cs_etm_decoder_params d_params; - struct cs_etm_trace_params *t_params; - struct cs_etm_decoder *decoder; size_t buffer_used = 0; fprintf(stdout, "\n"); color_fprintf(stdout, color, - ". ... CoreSight ETM Trace data: size %zu bytes\n", - buffer->size); - - /* Use metadata to fill in trace parameters for trace decoder */ - t_params = zalloc(sizeof(*t_params) * etm->num_cpu); + ". ... CoreSight %s Trace data: size %#zx bytes\n", + cs_etm_decoder__get_name(etmq->decoder), buffer->size); - if (!t_params) - return; - - if (cs_etm__init_trace_params(t_params, etm)) - goto out_free; - - /* Set decoder parameters to simply print the trace packets */ - if (cs_etm__init_decoder_params(&d_params, NULL, - CS_ETM_OPERATION_PRINT)) - goto out_free; - - decoder = cs_etm_decoder__new(etm->num_cpu, &d_params, t_params); - - if (!decoder) - goto out_free; do { size_t consumed; ret = cs_etm_decoder__process_data_block( - decoder, buffer->offset, + etmq->decoder, buffer->offset, &((u8 *)buffer->data)[buffer_used], buffer->size - buffer_used, &consumed); if (ret) @@ -495,16 +551,12 @@ static void cs_etm__dump_event(struct cs_etm_auxtrace *etm, buffer_used += consumed; } while (buffer_used < buffer->size); - cs_etm_decoder__free(decoder); - -out_free: - zfree(&t_params); + cs_etm_decoder__reset(etmq->decoder); } static int cs_etm__flush_events(struct perf_session *session, struct perf_tool *tool) { - int ret; struct cs_etm_auxtrace *etm = container_of(session->auxtrace, struct cs_etm_auxtrace, auxtrace); @@ -514,11 +566,6 @@ static int cs_etm__flush_events(struct perf_session *session, if (!tool->ordered_events) return -EINVAL; - ret = cs_etm__update_queues(etm); - - if (ret < 0) - return ret; - if (etm->timeless_decoding) return cs_etm__process_timeless_queues(etm, -1); @@ -614,13 +661,23 @@ static void cs_etm__free(struct perf_session *session) zfree(&aux); } +static bool cs_etm__evsel_is_auxtrace(struct perf_session *session, + struct evsel *evsel) +{ + struct cs_etm_auxtrace *aux = container_of(session->auxtrace, + struct cs_etm_auxtrace, + auxtrace); + + return evsel->core.attr.type == aux->pmu_type; +} + static u8 cs_etm__cpu_mode(struct cs_etm_queue *etmq, u64 address) { struct machine *machine; machine = etmq->etm->machine; - if (address >= etmq->etm->kernel_start) { + if (address >= machine__kernel_start(machine)) { if (machine__is_host(machine)) return PERF_RECORD_MISC_KERNEL; else @@ -675,17 +732,32 @@ static u32 cs_etm__mem_access(struct cs_etm_queue *etmq, u8 trace_chan_id, len = dso__data_read_offset(al.map->dso, machine, offset, buffer, size); - if (len <= 0) + if (len <= 0) { + ui__warning_once("CS ETM Trace: Missing DSO. Use 'perf archive' or debuginfod to export data from the traced system.\n" + " Enable CONFIG_PROC_KCORE or use option '-k /path/to/vmlinux' for kernel symbols.\n"); + if (!al.map->dso->auxtrace_warned) { + pr_err("CS ETM Trace: Debug data not found for address %#"PRIx64" in %s\n", + address, + al.map->dso->long_name ? al.map->dso->long_name : "Unknown"); + al.map->dso->auxtrace_warned = true; + } return 0; + } return len; } -static struct cs_etm_queue *cs_etm__alloc_queue(struct cs_etm_auxtrace *etm) +static struct cs_etm_queue *cs_etm__alloc_queue(struct cs_etm_auxtrace *etm, + bool formatted) { struct cs_etm_decoder_params d_params; struct cs_etm_trace_params *t_params = NULL; struct cs_etm_queue *etmq; + /* + * Each queue can only contain data from one CPU when unformatted, so only one decoder is + * needed. + */ + int decoders = formatted ? etm->num_cpu : 1; etmq = zalloc(sizeof(*etmq)); if (!etmq) @@ -696,20 +768,23 @@ static struct cs_etm_queue *cs_etm__alloc_queue(struct cs_etm_auxtrace *etm) goto out_free; /* Use metadata to fill in trace parameters for trace decoder */ - t_params = zalloc(sizeof(*t_params) * etm->num_cpu); + t_params = zalloc(sizeof(*t_params) * decoders); if (!t_params) goto out_free; - if (cs_etm__init_trace_params(t_params, etm)) + if (cs_etm__init_trace_params(t_params, etm, decoders)) goto out_free; /* Set decoder parameters to decode trace packets */ if (cs_etm__init_decoder_params(&d_params, etmq, - CS_ETM_OPERATION_DECODE)) + dump_trace ? CS_ETM_OPERATION_PRINT : + CS_ETM_OPERATION_DECODE, + formatted)) goto out_free; - etmq->decoder = cs_etm_decoder__new(etm->num_cpu, &d_params, t_params); + etmq->decoder = cs_etm_decoder__new(decoders, &d_params, + t_params); if (!etmq->decoder) goto out_free; @@ -737,31 +812,35 @@ out_free: static int cs_etm__setup_queue(struct cs_etm_auxtrace *etm, struct auxtrace_queue *queue, - unsigned int queue_nr) + unsigned int queue_nr, + bool formatted) { - int ret = 0; - unsigned int cs_queue_nr; - u8 trace_chan_id; - u64 timestamp; struct cs_etm_queue *etmq = queue->priv; if (list_empty(&queue->head) || etmq) - goto out; + return 0; - etmq = cs_etm__alloc_queue(etm); + etmq = cs_etm__alloc_queue(etm, formatted); - if (!etmq) { - ret = -ENOMEM; - goto out; - } + if (!etmq) + return -ENOMEM; queue->priv = etmq; etmq->etm = etm; etmq->queue_nr = queue_nr; etmq->offset = 0; - if (etm->timeless_decoding) - goto out; + return 0; +} + +static int cs_etm__queue_first_cs_timestamp(struct cs_etm_auxtrace *etm, + struct cs_etm_queue *etmq, + unsigned int queue_nr) +{ + int ret = 0; + unsigned int cs_queue_nr; + u8 trace_chan_id; + u64 cs_timestamp; /* * We are under a CPU-wide trace scenario. As such we need to know @@ -782,7 +861,7 @@ static int cs_etm__setup_queue(struct cs_etm_auxtrace *etm, /* * Run decoder on the trace block. The decoder will stop when - * encountering a timestamp, a full packet queue or the end of + * encountering a CS timestamp, a full packet queue or the end of * trace for that block. */ ret = cs_etm__decode_data_block(etmq); @@ -793,10 +872,10 @@ static int cs_etm__setup_queue(struct cs_etm_auxtrace *etm, * Function cs_etm_decoder__do_{hard|soft}_timestamp() does all * the timestamp calculation for us. */ - timestamp = cs_etm__etmq_get_timestamp(etmq, &trace_chan_id); + cs_timestamp = cs_etm__etmq_get_timestamp(etmq, &trace_chan_id); /* We found a timestamp, no need to continue. */ - if (timestamp) + if (cs_timestamp) break; /* @@ -820,38 +899,11 @@ static int cs_etm__setup_queue(struct cs_etm_auxtrace *etm, * queue and will be processed in cs_etm__process_queues(). */ cs_queue_nr = TO_CS_QUEUE_NR(queue_nr, trace_chan_id); - ret = auxtrace_heap__add(&etm->heap, cs_queue_nr, timestamp); + ret = auxtrace_heap__add(&etm->heap, cs_queue_nr, cs_timestamp); out: return ret; } -static int cs_etm__setup_queues(struct cs_etm_auxtrace *etm) -{ - unsigned int i; - int ret; - - if (!etm->kernel_start) - etm->kernel_start = machine__kernel_start(etm->machine); - - for (i = 0; i < etm->queues.nr_queues; i++) { - ret = cs_etm__setup_queue(etm, &etm->queues.queue_array[i], i); - if (ret) - return ret; - } - - return 0; -} - -static int cs_etm__update_queues(struct cs_etm_auxtrace *etm) -{ - if (etm->queues.new_data) { - etm->queues.new_data = false; - return cs_etm__setup_queues(etm); - } - - return 0; -} - static inline void cs_etm__copy_last_branch_rb(struct cs_etm_queue *etmq, struct cs_etm_traceid_queue *tidq) @@ -945,7 +997,7 @@ static inline u64 cs_etm__instr_addr(struct cs_etm_queue *etmq, if (packet->isa == CS_ETM_ISA_T32) { u64 addr = packet->start_addr; - while (offset > 0) { + while (offset) { addr += cs_etm__t32_instr_size(etmq, trace_chan_id, addr); offset--; @@ -1122,6 +1174,8 @@ static int cs_etm__synth_instruction_sample(struct cs_etm_queue *etmq, event->sample.header.misc = cs_etm__cpu_mode(etmq, addr); event->sample.header.size = sizeof(struct perf_event_header); + if (!etm->timeless_decoding) + sample.time = etm->latest_kernel_timestamp; sample.ip = addr; sample.pid = tidq->pid; sample.tid = tidq->tid; @@ -1134,10 +1188,8 @@ static int cs_etm__synth_instruction_sample(struct cs_etm_queue *etmq, cs_etm__copy_insn(etmq, tidq->trace_chan_id, tidq->packet, &sample); - if (etm->synth_opts.last_branch) { - cs_etm__copy_last_branch_rb(etmq, tidq); + if (etm->synth_opts.last_branch) sample.branch_stack = tidq->last_branch; - } if (etm->synth_opts.inject) { ret = cs_etm__inject_event(event, &sample, @@ -1153,9 +1205,6 @@ static int cs_etm__synth_instruction_sample(struct cs_etm_queue *etmq, "CS ETM Trace: failed to deliver instruction event, error %d\n", ret); - if (etm->synth_opts.last_branch) - cs_etm__reset_last_branch_rb(tidq); - return ret; } @@ -1172,6 +1221,7 @@ static int cs_etm__synth_branch_sample(struct cs_etm_queue *etmq, union perf_event *event = tidq->event_buf; struct dummy_branch_stack { u64 nr; + u64 hw_idx; struct branch_entry entries; } dummy_bs; u64 ip; @@ -1182,6 +1232,8 @@ static int cs_etm__synth_branch_sample(struct cs_etm_queue *etmq, event->sample.header.misc = cs_etm__cpu_mode(etmq, ip); event->sample.header.size = sizeof(struct perf_event_header); + if (!etm->timeless_decoding) + sample.time = etm->latest_kernel_timestamp; sample.ip = ip; sample.pid = tidq->pid; sample.tid = tidq->tid; @@ -1202,6 +1254,7 @@ static int cs_etm__synth_branch_sample(struct cs_etm_queue *etmq, if (etm->synth_opts.last_branch) { dummy_bs = (struct dummy_branch_stack){ .nr = 1, + .hw_idx = -1ULL, .entries = { .from = sample.ip, .to = sample.addr, @@ -1310,15 +1363,21 @@ static int cs_etm__synth_events(struct cs_etm_auxtrace *etm, err = cs_etm__synth_event(session, &attr, id); if (err) return err; - etm->sample_branches = true; etm->branches_sample_type = attr.sample_type; etm->branches_id = id; id += 1; attr.sample_type &= ~(u64)PERF_SAMPLE_ADDR; } - if (etm->synth_opts.last_branch) + if (etm->synth_opts.last_branch) { attr.sample_type |= PERF_SAMPLE_BRANCH_STACK; + /* + * We don't use the hardware index, but the sample generation + * code uses the new format branch_stack with this field, + * so the event attributes must indicate that it's present. + */ + attr.branch_sample_type |= PERF_SAMPLE_BRANCH_HW_INDEX; + } if (etm->synth_opts.instructions) { attr.config = PERF_COUNT_HW_INSTRUCTIONS; @@ -1327,7 +1386,6 @@ static int cs_etm__synth_events(struct cs_etm_auxtrace *etm, err = cs_etm__synth_event(session, &attr, id); if (err) return err; - etm->sample_instructions = true; etm->instructions_sample_type = attr.sample_type; etm->instructions_id = id; id += 1; @@ -1340,12 +1398,14 @@ static int cs_etm__sample(struct cs_etm_queue *etmq, struct cs_etm_traceid_queue *tidq) { struct cs_etm_auxtrace *etm = etmq->etm; - struct cs_etm_packet *tmp; int ret; u8 trace_chan_id = tidq->trace_chan_id; - u64 instrs_executed = tidq->packet->instr_count; + u64 instrs_prev; - tidq->period_instructions += instrs_executed; + /* Get instructions remainder from previous packet */ + instrs_prev = tidq->period_instructions; + + tidq->period_instructions += tidq->packet->instr_count; /* * Record a branch when the last instruction in @@ -1356,36 +1416,90 @@ static int cs_etm__sample(struct cs_etm_queue *etmq, tidq->prev_packet->last_instr_taken_branch) cs_etm__update_last_branch_rb(etmq, tidq); - if (etm->sample_instructions && + if (etm->synth_opts.instructions && tidq->period_instructions >= etm->instructions_sample_period) { /* * Emit instruction sample periodically * TODO: allow period to be defined in cycles and clock time */ - /* Get number of instructions executed after the sample point */ - u64 instrs_over = tidq->period_instructions - - etm->instructions_sample_period; + /* + * Below diagram demonstrates the instruction samples + * generation flows: + * + * Instrs Instrs Instrs Instrs + * Sample(n) Sample(n+1) Sample(n+2) Sample(n+3) + * | | | | + * V V V V + * -------------------------------------------------- + * ^ ^ + * | | + * Period Period + * instructions(Pi) instructions(Pi') + * + * | | + * \---------------- -----------------/ + * V + * tidq->packet->instr_count + * + * Instrs Sample(n...) are the synthesised samples occurring + * every etm->instructions_sample_period instructions - as + * defined on the perf command line. Sample(n) is being the + * last sample before the current etm packet, n+1 to n+3 + * samples are generated from the current etm packet. + * + * tidq->packet->instr_count represents the number of + * instructions in the current etm packet. + * + * Period instructions (Pi) contains the number of + * instructions executed after the sample point(n) from the + * previous etm packet. This will always be less than + * etm->instructions_sample_period. + * + * When generate new samples, it combines with two parts + * instructions, one is the tail of the old packet and another + * is the head of the new coming packet, to generate + * sample(n+1); sample(n+2) and sample(n+3) consume the + * instructions with sample period. After sample(n+3), the rest + * instructions will be used by later packet and it is assigned + * to tidq->period_instructions for next round calculation. + */ /* - * Calculate the address of the sampled instruction (-1 as - * sample is reported as though instruction has just been - * executed, but PC has not advanced to next instruction) + * Get the initial offset into the current packet instructions; + * entry conditions ensure that instrs_prev is less than + * etm->instructions_sample_period. */ - u64 offset = (instrs_executed - instrs_over - 1); - u64 addr = cs_etm__instr_addr(etmq, trace_chan_id, - tidq->packet, offset); + u64 offset = etm->instructions_sample_period - instrs_prev; + u64 addr; - ret = cs_etm__synth_instruction_sample( - etmq, tidq, addr, etm->instructions_sample_period); - if (ret) - return ret; + /* Prepare last branches for instruction sample */ + if (etm->synth_opts.last_branch) + cs_etm__copy_last_branch_rb(etmq, tidq); + + while (tidq->period_instructions >= + etm->instructions_sample_period) { + /* + * Calculate the address of the sampled instruction (-1 + * as sample is reported as though instruction has just + * been executed, but PC has not advanced to next + * instruction) + */ + addr = cs_etm__instr_addr(etmq, trace_chan_id, + tidq->packet, offset - 1); + ret = cs_etm__synth_instruction_sample( + etmq, tidq, addr, + etm->instructions_sample_period); + if (ret) + return ret; - /* Carry remaining instructions into next sample period */ - tidq->period_instructions = instrs_over; + offset += etm->instructions_sample_period; + tidq->period_instructions -= + etm->instructions_sample_period; + } } - if (etm->sample_branches) { + if (etm->synth_opts.branches) { bool generate_sample = false; /* Generate sample for tracing on packet */ @@ -1404,15 +1518,7 @@ static int cs_etm__sample(struct cs_etm_queue *etmq, } } - if (etm->sample_branches || etm->synth_opts.last_branch) { - /* - * Swap PACKET with PREV_PACKET: PACKET becomes PREV_PACKET for - * the next incoming packet. - */ - tmp = tidq->packet; - tidq->packet = tidq->prev_packet; - tidq->prev_packet = tmp; - } + cs_etm__packet_swap(etm, tidq); return 0; } @@ -1441,14 +1547,19 @@ static int cs_etm__flush(struct cs_etm_queue *etmq, { int err = 0; struct cs_etm_auxtrace *etm = etmq->etm; - struct cs_etm_packet *tmp; /* Handle start tracing packet */ if (tidq->prev_packet->sample_type == CS_ETM_EMPTY) goto swap_packet; if (etmq->etm->synth_opts.last_branch && + etmq->etm->synth_opts.instructions && tidq->prev_packet->sample_type == CS_ETM_RANGE) { + u64 addr; + + /* Prepare last branches for instruction sample */ + cs_etm__copy_last_branch_rb(etmq, tidq); + /* * Generate a last branch event for the branches left in the * circular buffer at the end of the trace. @@ -1456,7 +1567,7 @@ static int cs_etm__flush(struct cs_etm_queue *etmq, * Use the address of the end of the last reported execution * range */ - u64 addr = cs_etm__last_executed_instr(tidq->prev_packet); + addr = cs_etm__last_executed_instr(tidq->prev_packet); err = cs_etm__synth_instruction_sample( etmq, tidq, addr, @@ -1468,7 +1579,7 @@ static int cs_etm__flush(struct cs_etm_queue *etmq, } - if (etm->sample_branches && + if (etm->synth_opts.branches && tidq->prev_packet->sample_type == CS_ETM_RANGE) { err = cs_etm__synth_branch_sample(etmq, tidq); if (err) @@ -1476,15 +1587,11 @@ static int cs_etm__flush(struct cs_etm_queue *etmq, } swap_packet: - if (etm->sample_branches || etm->synth_opts.last_branch) { - /* - * Swap PACKET with PREV_PACKET: PACKET becomes PREV_PACKET for - * the next incoming packet. - */ - tmp = tidq->packet; - tidq->packet = tidq->prev_packet; - tidq->prev_packet = tmp; - } + cs_etm__packet_swap(etm, tidq); + + /* Reset last branches after flush the trace */ + if (etm->synth_opts.last_branch) + cs_etm__reset_last_branch_rb(tidq); return err; } @@ -1504,12 +1611,18 @@ static int cs_etm__end_block(struct cs_etm_queue *etmq, * the trace. */ if (etmq->etm->synth_opts.last_branch && + etmq->etm->synth_opts.instructions && tidq->prev_packet->sample_type == CS_ETM_RANGE) { + u64 addr; + + /* Prepare last branches for instruction sample */ + cs_etm__copy_last_branch_rb(etmq, tidq); + /* * Use the address of the end of the last reported execution * range. */ - u64 addr = cs_etm__last_executed_instr(tidq->prev_packet); + addr = cs_etm__last_executed_instr(tidq->prev_packet); err = cs_etm__synth_instruction_sample( etmq, tidq, addr, @@ -1568,7 +1681,7 @@ static bool cs_etm__is_svc_instr(struct cs_etm_queue *etmq, u8 trace_chan_id, * | 1 1 0 1 1 1 1 1 | imm8 | * +-----------------+--------+ * - * According to the specifiction, it only defines SVC for T32 + * According to the specification, it only defines SVC for T32 * with 16 bits instruction and has no definition for 32bits; * so below only read 2 bytes as instruction size for T32. */ @@ -1800,7 +1913,7 @@ static int cs_etm__set_sample_flags(struct cs_etm_queue *etmq, /* * If the previous packet is an exception return packet - * and the return address just follows SVC instuction, + * and the return address just follows SVC instruction, * it needs to calibrate the previous packet sample flags * as PERF_IP_FLAG_SYSCALLRET. */ @@ -1874,7 +1987,7 @@ static int cs_etm__set_sample_flags(struct cs_etm_queue *etmq, * contain exception type related info so we cannot decide * the exception type purely based on exception return packet. * If we record the exception number from exception packet and - * reuse it for excpetion return packet, this is not reliable + * reuse it for exception return packet, this is not reliable * due the trace can be discontinuity or the interrupt can * be nested, thus the recorded exception number cannot be * used for exception return packet for these two cases. @@ -2090,13 +2203,27 @@ static int cs_etm__process_timeless_queues(struct cs_etm_auxtrace *etm, static int cs_etm__process_queues(struct cs_etm_auxtrace *etm) { int ret = 0; - unsigned int cs_queue_nr, queue_nr; + unsigned int cs_queue_nr, queue_nr, i; u8 trace_chan_id; - u64 timestamp; + u64 cs_timestamp; struct auxtrace_queue *queue; struct cs_etm_queue *etmq; struct cs_etm_traceid_queue *tidq; + /* + * Pre-populate the heap with one entry from each queue so that we can + * start processing in time order across all queues. + */ + for (i = 0; i < etm->queues.nr_queues; i++) { + etmq = etm->queues.queue_array[i].priv; + if (!etmq) + continue; + + ret = cs_etm__queue_first_cs_timestamp(etm, etmq, i); + if (ret) + return ret; + } + while (1) { if (!etm->heap.heap_cnt) goto out; @@ -2154,9 +2281,9 @@ refetch: if (ret) goto out; - timestamp = cs_etm__etmq_get_timestamp(etmq, &trace_chan_id); + cs_timestamp = cs_etm__etmq_get_timestamp(etmq, &trace_chan_id); - if (!timestamp) { + if (!cs_timestamp) { /* * Function cs_etm__decode_data_block() returns when * there is no more traces to decode in the current @@ -2179,7 +2306,7 @@ refetch: * this queue/traceID. */ cs_queue_nr = TO_CS_QUEUE_NR(queue_nr, trace_chan_id); - ret = auxtrace_heap__add(&etm->heap, cs_queue_nr, timestamp); + ret = auxtrace_heap__add(&etm->heap, cs_queue_nr, cs_timestamp); } out: @@ -2250,8 +2377,7 @@ static int cs_etm__process_event(struct perf_session *session, struct perf_sample *sample, struct perf_tool *tool) { - int err = 0; - u64 timestamp; + u64 sample_kernel_timestamp; struct cs_etm_auxtrace *etm = container_of(session->auxtrace, struct cs_etm_auxtrace, auxtrace); @@ -2265,16 +2391,15 @@ static int cs_etm__process_event(struct perf_session *session, } if (sample->time && (sample->time != (u64) -1)) - timestamp = sample->time; + sample_kernel_timestamp = sample->time; else - timestamp = 0; - - if (timestamp || etm->timeless_decoding) { - err = cs_etm__update_queues(etm); - if (err) - return err; - } + sample_kernel_timestamp = 0; + /* + * Don't wait for cs_etm__flush_events() in per-thread/timeless mode to start the decode. We + * need the tid of the PERF_RECORD_EXIT event to assign to the synthesised samples because + * ETM_OPT_CTXTID is not enabled. + */ if (etm->timeless_decoding && event->header.type == PERF_RECORD_EXIT) return cs_etm__process_timeless_queues(etm, @@ -2285,13 +2410,34 @@ static int cs_etm__process_event(struct perf_session *session, else if (event->header.type == PERF_RECORD_SWITCH_CPU_WIDE) return cs_etm__process_switch_cpu_wide(etm, event); - if (!etm->timeless_decoding && - event->header.type == PERF_RECORD_AUX) - return cs_etm__process_queues(etm); + if (!etm->timeless_decoding && event->header.type == PERF_RECORD_AUX) { + /* + * Record the latest kernel timestamp available in the header + * for samples so that synthesised samples occur from this point + * onwards. + */ + etm->latest_kernel_timestamp = sample_kernel_timestamp; + } return 0; } +static void dump_queued_data(struct cs_etm_auxtrace *etm, + struct perf_record_auxtrace *event) +{ + struct auxtrace_buffer *buf; + unsigned int i; + /* + * Find all buffers with same reference in the queues and dump them. + * This is because the queues can contain multiple entries of the same + * buffer that were split on aux records. + */ + for (i = 0; i < etm->queues.nr_queues; ++i) + list_for_each_entry(buf, &etm->queues.queue_array[i].head, list) + if (buf->reference == event->reference) + cs_etm__dump_event(etm->queues.queue_array[i].priv, buf); +} + static int cs_etm__process_auxtrace_event(struct perf_session *session, union perf_event *event, struct perf_tool *tool __maybe_unused) @@ -2305,6 +2451,7 @@ static int cs_etm__process_auxtrace_event(struct perf_session *session, int fd = perf_data__fd(session->data); bool is_pipe = perf_data__is_pipe(session->data); int err; + int idx = event->auxtrace.idx; if (is_pipe) data_offset = 0; @@ -2319,12 +2466,24 @@ static int cs_etm__process_auxtrace_event(struct perf_session *session, if (err) return err; + /* + * Knowing if the trace is formatted or not requires a lookup of + * the aux record so only works in non-piped mode where data is + * queued in cs_etm__queue_aux_records(). Always assume + * formatted in piped mode (true). + */ + err = cs_etm__setup_queue(etm, &etm->queues.queue_array[idx], + idx, true); + if (err) + return err; + if (dump_trace) if (auxtrace_buffer__get_data(buffer, fd)) { - cs_etm__dump_event(etm, buffer); + cs_etm__dump_event(etm->queues.queue_array[idx].priv, buffer); auxtrace_buffer__put_data(buffer); } - } + } else if (dump_trace) + dump_queued_data(etm, &event->auxtrace); return 0; } @@ -2335,6 +2494,10 @@ static bool cs_etm__is_timeless_decoding(struct cs_etm_auxtrace *etm) struct evlist *evlist = etm->session->evlist; bool timeless_decoding = true; + /* Override timeless mode with user input from --itrace=Z */ + if (etm->synth_opts.timeless_decoding) + return true; + /* * Circle through the list of event and complain if we find one * with the time bit set. @@ -2348,7 +2511,7 @@ static bool cs_etm__is_timeless_decoding(struct cs_etm_auxtrace *etm) } static const char * const cs_etm_global_header_fmts[] = { - [CS_HEADER_VERSION_0] = " Header version %llx\n", + [CS_HEADER_VERSION] = " Header version %llx\n", [CS_PMU_TYPE_CPUS] = " PMU type/num cpus %llx\n", [CS_ETM_SNAPSHOT] = " Snapshot %llx\n", }; @@ -2356,6 +2519,7 @@ static const char * const cs_etm_global_header_fmts[] = { static const char * const cs_etm_priv_fmts[] = { [CS_ETM_MAGIC] = " Magic number %llx\n", [CS_ETM_CPU] = " CPU %lld\n", + [CS_ETM_NR_TRC_PARAMS] = " NR_TRC_PARAMS %llx\n", [CS_ETM_ETMCR] = " ETMCR %llx\n", [CS_ETM_ETMTRACEIDR] = " ETMTRACEIDR %llx\n", [CS_ETM_ETMCCER] = " ETMCCER %llx\n", @@ -2365,6 +2529,7 @@ static const char * const cs_etm_priv_fmts[] = { static const char * const cs_etmv4_priv_fmts[] = { [CS_ETM_MAGIC] = " Magic number %llx\n", [CS_ETM_CPU] = " CPU %lld\n", + [CS_ETM_NR_TRC_PARAMS] = " NR_TRC_PARAMS %llx\n", [CS_ETMV4_TRCCONFIGR] = " TRCCONFIGR %llx\n", [CS_ETMV4_TRCTRACEIDR] = " TRCTRACEIDR %llx\n", [CS_ETMV4_TRCIDR0] = " TRCIDR0 %llx\n", @@ -2372,28 +2537,350 @@ static const char * const cs_etmv4_priv_fmts[] = { [CS_ETMV4_TRCIDR2] = " TRCIDR2 %llx\n", [CS_ETMV4_TRCIDR8] = " TRCIDR8 %llx\n", [CS_ETMV4_TRCAUTHSTATUS] = " TRCAUTHSTATUS %llx\n", + [CS_ETE_TRCDEVARCH] = " TRCDEVARCH %llx\n" }; -static void cs_etm__print_auxtrace_info(__u64 *val, int num) +static const char * const param_unk_fmt = + " Unknown parameter [%d] %llx\n"; +static const char * const magic_unk_fmt = + " Magic number Unknown %llx\n"; + +static int cs_etm__print_cpu_metadata_v0(__u64 *val, int *offset) { - int i, j, cpu = 0; + int i = *offset, j, nr_params = 0, fmt_offset; + __u64 magic; - for (i = 0; i < CS_HEADER_VERSION_0_MAX; i++) - fprintf(stdout, cs_etm_global_header_fmts[i], val[i]); + /* check magic value */ + magic = val[i + CS_ETM_MAGIC]; + if ((magic != __perf_cs_etmv3_magic) && + (magic != __perf_cs_etmv4_magic)) { + /* failure - note bad magic value */ + fprintf(stdout, magic_unk_fmt, magic); + return -EINVAL; + } + + /* print common header block */ + fprintf(stdout, cs_etm_priv_fmts[CS_ETM_MAGIC], val[i++]); + fprintf(stdout, cs_etm_priv_fmts[CS_ETM_CPU], val[i++]); + + if (magic == __perf_cs_etmv3_magic) { + nr_params = CS_ETM_NR_TRC_PARAMS_V0; + fmt_offset = CS_ETM_ETMCR; + /* after common block, offset format index past NR_PARAMS */ + for (j = fmt_offset; j < nr_params + fmt_offset; j++, i++) + fprintf(stdout, cs_etm_priv_fmts[j], val[i]); + } else if (magic == __perf_cs_etmv4_magic) { + nr_params = CS_ETMV4_NR_TRC_PARAMS_V0; + fmt_offset = CS_ETMV4_TRCCONFIGR; + /* after common block, offset format index past NR_PARAMS */ + for (j = fmt_offset; j < nr_params + fmt_offset; j++, i++) + fprintf(stdout, cs_etmv4_priv_fmts[j], val[i]); + } + *offset = i; + return 0; +} - for (i = CS_HEADER_VERSION_0_MAX; cpu < num; cpu++) { - if (val[i] == __perf_cs_etmv3_magic) - for (j = 0; j < CS_ETM_PRIV_MAX; j++, i++) +static int cs_etm__print_cpu_metadata_v1(__u64 *val, int *offset) +{ + int i = *offset, j, total_params = 0; + __u64 magic; + + magic = val[i + CS_ETM_MAGIC]; + /* total params to print is NR_PARAMS + common block size for v1 */ + total_params = val[i + CS_ETM_NR_TRC_PARAMS] + CS_ETM_COMMON_BLK_MAX_V1; + + if (magic == __perf_cs_etmv3_magic) { + for (j = 0; j < total_params; j++, i++) { + /* if newer record - could be excess params */ + if (j >= CS_ETM_PRIV_MAX) + fprintf(stdout, param_unk_fmt, j, val[i]); + else fprintf(stdout, cs_etm_priv_fmts[j], val[i]); - else if (val[i] == __perf_cs_etmv4_magic) - for (j = 0; j < CS_ETMV4_PRIV_MAX; j++, i++) + } + } else if (magic == __perf_cs_etmv4_magic || magic == __perf_cs_ete_magic) { + /* + * ETE and ETMv4 can be printed in the same block because the number of parameters + * is saved and they share the list of parameter names. ETE is also only supported + * in V1 files. + */ + for (j = 0; j < total_params; j++, i++) { + /* if newer record - could be excess params */ + if (j >= CS_ETE_PRIV_MAX) + fprintf(stdout, param_unk_fmt, j, val[i]); + else fprintf(stdout, cs_etmv4_priv_fmts[j], val[i]); - else - /* failure.. return */ + } + } else { + /* failure - note bad magic value and error out */ + fprintf(stdout, magic_unk_fmt, magic); + return -EINVAL; + } + *offset = i; + return 0; +} + +static void cs_etm__print_auxtrace_info(__u64 *val, int num) +{ + int i, cpu = 0, version, err; + + /* bail out early on bad header version */ + version = val[0]; + if (version > CS_HEADER_CURRENT_VERSION) { + /* failure.. return */ + fprintf(stdout, " Unknown Header Version = %x, ", version); + fprintf(stdout, "Version supported <= %x\n", CS_HEADER_CURRENT_VERSION); + return; + } + + for (i = 0; i < CS_HEADER_VERSION_MAX; i++) + fprintf(stdout, cs_etm_global_header_fmts[i], val[i]); + + for (i = CS_HEADER_VERSION_MAX; cpu < num; cpu++) { + if (version == 0) + err = cs_etm__print_cpu_metadata_v0(val, &i); + else if (version == 1) + err = cs_etm__print_cpu_metadata_v1(val, &i); + if (err) return; } } +/* + * Read a single cpu parameter block from the auxtrace_info priv block. + * + * For version 1 there is a per cpu nr_params entry. If we are handling + * version 1 file, then there may be less, the same, or more params + * indicated by this value than the compile time number we understand. + * + * For a version 0 info block, there are a fixed number, and we need to + * fill out the nr_param value in the metadata we create. + */ +static u64 *cs_etm__create_meta_blk(u64 *buff_in, int *buff_in_offset, + int out_blk_size, int nr_params_v0) +{ + u64 *metadata = NULL; + int hdr_version; + int nr_in_params, nr_out_params, nr_cmn_params; + int i, k; + + metadata = zalloc(sizeof(*metadata) * out_blk_size); + if (!metadata) + return NULL; + + /* read block current index & version */ + i = *buff_in_offset; + hdr_version = buff_in[CS_HEADER_VERSION]; + + if (!hdr_version) { + /* read version 0 info block into a version 1 metadata block */ + nr_in_params = nr_params_v0; + metadata[CS_ETM_MAGIC] = buff_in[i + CS_ETM_MAGIC]; + metadata[CS_ETM_CPU] = buff_in[i + CS_ETM_CPU]; + metadata[CS_ETM_NR_TRC_PARAMS] = nr_in_params; + /* remaining block params at offset +1 from source */ + for (k = CS_ETM_COMMON_BLK_MAX_V1 - 1; k < nr_in_params; k++) + metadata[k + 1] = buff_in[i + k]; + /* version 0 has 2 common params */ + nr_cmn_params = 2; + } else { + /* read version 1 info block - input and output nr_params may differ */ + /* version 1 has 3 common params */ + nr_cmn_params = 3; + nr_in_params = buff_in[i + CS_ETM_NR_TRC_PARAMS]; + + /* if input has more params than output - skip excess */ + nr_out_params = nr_in_params + nr_cmn_params; + if (nr_out_params > out_blk_size) + nr_out_params = out_blk_size; + + for (k = CS_ETM_MAGIC; k < nr_out_params; k++) + metadata[k] = buff_in[i + k]; + + /* record the actual nr params we copied */ + metadata[CS_ETM_NR_TRC_PARAMS] = nr_out_params - nr_cmn_params; + } + + /* adjust in offset by number of in params used */ + i += nr_in_params + nr_cmn_params; + *buff_in_offset = i; + return metadata; +} + +/** + * Puts a fragment of an auxtrace buffer into the auxtrace queues based + * on the bounds of aux_event, if it matches with the buffer that's at + * file_offset. + * + * Normally, whole auxtrace buffers would be added to the queue. But we + * want to reset the decoder for every PERF_RECORD_AUX event, and the decoder + * is reset across each buffer, so splitting the buffers up in advance has + * the same effect. + */ +static int cs_etm__queue_aux_fragment(struct perf_session *session, off_t file_offset, size_t sz, + struct perf_record_aux *aux_event, struct perf_sample *sample) +{ + int err; + char buf[PERF_SAMPLE_MAX_SIZE]; + union perf_event *auxtrace_event_union; + struct perf_record_auxtrace *auxtrace_event; + union perf_event auxtrace_fragment; + __u64 aux_offset, aux_size; + __u32 idx; + bool formatted; + + struct cs_etm_auxtrace *etm = container_of(session->auxtrace, + struct cs_etm_auxtrace, + auxtrace); + + /* + * There should be a PERF_RECORD_AUXTRACE event at the file_offset that we got + * from looping through the auxtrace index. + */ + err = perf_session__peek_event(session, file_offset, buf, + PERF_SAMPLE_MAX_SIZE, &auxtrace_event_union, NULL); + if (err) + return err; + auxtrace_event = &auxtrace_event_union->auxtrace; + if (auxtrace_event->header.type != PERF_RECORD_AUXTRACE) + return -EINVAL; + + if (auxtrace_event->header.size < sizeof(struct perf_record_auxtrace) || + auxtrace_event->header.size != sz) { + return -EINVAL; + } + + /* + * In per-thread mode, CPU is set to -1, but TID will be set instead. See + * auxtrace_mmap_params__set_idx(). Return 'not found' if neither CPU nor TID match. + */ + if ((auxtrace_event->cpu == (__u32) -1 && auxtrace_event->tid != sample->tid) || + auxtrace_event->cpu != sample->cpu) + return 1; + + if (aux_event->flags & PERF_AUX_FLAG_OVERWRITE) { + /* + * Clamp size in snapshot mode. The buffer size is clamped in + * __auxtrace_mmap__read() for snapshots, so the aux record size doesn't reflect + * the buffer size. + */ + aux_size = min(aux_event->aux_size, auxtrace_event->size); + + /* + * In this mode, the head also points to the end of the buffer so aux_offset + * needs to have the size subtracted so it points to the beginning as in normal mode + */ + aux_offset = aux_event->aux_offset - aux_size; + } else { + aux_size = aux_event->aux_size; + aux_offset = aux_event->aux_offset; + } + + if (aux_offset >= auxtrace_event->offset && + aux_offset + aux_size <= auxtrace_event->offset + auxtrace_event->size) { + /* + * If this AUX event was inside this buffer somewhere, create a new auxtrace event + * based on the sizes of the aux event, and queue that fragment. + */ + auxtrace_fragment.auxtrace = *auxtrace_event; + auxtrace_fragment.auxtrace.size = aux_size; + auxtrace_fragment.auxtrace.offset = aux_offset; + file_offset += aux_offset - auxtrace_event->offset + auxtrace_event->header.size; + + pr_debug3("CS ETM: Queue buffer size: %#"PRI_lx64" offset: %#"PRI_lx64 + " tid: %d cpu: %d\n", aux_size, aux_offset, sample->tid, sample->cpu); + err = auxtrace_queues__add_event(&etm->queues, session, &auxtrace_fragment, + file_offset, NULL); + if (err) + return err; + + idx = auxtrace_event->idx; + formatted = !(aux_event->flags & PERF_AUX_FLAG_CORESIGHT_FORMAT_RAW); + return cs_etm__setup_queue(etm, &etm->queues.queue_array[idx], + idx, formatted); + } + + /* Wasn't inside this buffer, but there were no parse errors. 1 == 'not found' */ + return 1; +} + +static int cs_etm__queue_aux_records_cb(struct perf_session *session, union perf_event *event, + u64 offset __maybe_unused, void *data __maybe_unused) +{ + struct perf_sample sample; + int ret; + struct auxtrace_index_entry *ent; + struct auxtrace_index *auxtrace_index; + struct evsel *evsel; + size_t i; + + /* Don't care about any other events, we're only queuing buffers for AUX events */ + if (event->header.type != PERF_RECORD_AUX) + return 0; + + if (event->header.size < sizeof(struct perf_record_aux)) + return -EINVAL; + + /* Truncated Aux records can have 0 size and shouldn't result in anything being queued. */ + if (!event->aux.aux_size) + return 0; + + /* + * Parse the sample, we need the sample_id_all data that comes after the event so that the + * CPU or PID can be matched to an AUXTRACE buffer's CPU or PID. + */ + evsel = evlist__event2evsel(session->evlist, event); + if (!evsel) + return -EINVAL; + ret = evsel__parse_sample(evsel, event, &sample); + if (ret) + return ret; + + /* + * Loop through the auxtrace index to find the buffer that matches up with this aux event. + */ + list_for_each_entry(auxtrace_index, &session->auxtrace_index, list) { + for (i = 0; i < auxtrace_index->nr; i++) { + ent = &auxtrace_index->entries[i]; + ret = cs_etm__queue_aux_fragment(session, ent->file_offset, + ent->sz, &event->aux, &sample); + /* + * Stop search on error or successful values. Continue search on + * 1 ('not found') + */ + if (ret != 1) + return ret; + } + } + + /* + * Couldn't find the buffer corresponding to this aux record, something went wrong. Warn but + * don't exit with an error because it will still be possible to decode other aux records. + */ + pr_err("CS ETM: Couldn't find auxtrace buffer for aux_offset: %#"PRI_lx64 + " tid: %d cpu: %d\n", event->aux.aux_offset, sample.tid, sample.cpu); + return 0; +} + +static int cs_etm__queue_aux_records(struct perf_session *session) +{ + struct auxtrace_index *index = list_first_entry_or_null(&session->auxtrace_index, + struct auxtrace_index, list); + if (index && index->nr > 0) + return perf_session__peek_events(session, session->header.data_offset, + session->header.data_size, + cs_etm__queue_aux_records_cb, NULL); + + /* + * We would get here if there are no entries in the index (either no auxtrace + * buffers or no index at all). Fail silently as there is the possibility of + * queueing them in cs_etm__process_auxtrace_event() if etm->data_queued is still + * false. + * + * In that scenario, buffers will not be split by AUX records. + */ + return 0; +} + int cs_etm__process_auxtrace_info(union perf_event *event, struct perf_session *session) { @@ -2405,11 +2892,12 @@ int cs_etm__process_auxtrace_info(union perf_event *event, int info_header_size; int total_size = auxtrace_info->header.size; int priv_size = 0; - int num_cpu; - int err = 0, idx = -1; - int i, j, k; + int num_cpu, trcidr_idx; + int err = 0; + int i, j; u64 *ptr, *hdr = NULL; u64 **metadata = NULL; + u64 hdr_version; /* * sizeof(auxtrace_info_event::type) + @@ -2425,16 +2913,21 @@ int cs_etm__process_auxtrace_info(union perf_event *event, /* First the global part */ ptr = (u64 *) auxtrace_info->priv; - /* Look for version '0' of the header */ - if (ptr[0] != 0) + /* Look for version of the header */ + hdr_version = ptr[0]; + if (hdr_version > CS_HEADER_CURRENT_VERSION) { + /* print routine will print an error on bad version */ + if (dump_trace) + cs_etm__print_auxtrace_info(auxtrace_info->priv, 0); return -EINVAL; + } - hdr = zalloc(sizeof(*hdr) * CS_HEADER_VERSION_0_MAX); + hdr = zalloc(sizeof(*hdr) * CS_HEADER_VERSION_MAX); if (!hdr) return -ENOMEM; /* Extract header information - see cs-etm.h for format */ - for (i = 0; i < CS_HEADER_VERSION_0_MAX; i++) + for (i = 0; i < CS_HEADER_VERSION_MAX; i++) hdr[i] = ptr[i]; num_cpu = hdr[CS_PMU_TYPE_CPUS] & 0xffffffff; pmu_type = (unsigned int) ((hdr[CS_PMU_TYPE_CPUS] >> 32) & @@ -2465,35 +2958,41 @@ int cs_etm__process_auxtrace_info(union perf_event *event, */ for (j = 0; j < num_cpu; j++) { if (ptr[i] == __perf_cs_etmv3_magic) { - metadata[j] = zalloc(sizeof(*metadata[j]) * - CS_ETM_PRIV_MAX); - if (!metadata[j]) { - err = -ENOMEM; - goto err_free_metadata; - } - for (k = 0; k < CS_ETM_PRIV_MAX; k++) - metadata[j][k] = ptr[i + k]; + metadata[j] = + cs_etm__create_meta_blk(ptr, &i, + CS_ETM_PRIV_MAX, + CS_ETM_NR_TRC_PARAMS_V0); /* The traceID is our handle */ - idx = metadata[j][CS_ETM_ETMTRACEIDR]; - i += CS_ETM_PRIV_MAX; + trcidr_idx = CS_ETM_ETMTRACEIDR; + } else if (ptr[i] == __perf_cs_etmv4_magic) { - metadata[j] = zalloc(sizeof(*metadata[j]) * - CS_ETMV4_PRIV_MAX); - if (!metadata[j]) { - err = -ENOMEM; - goto err_free_metadata; - } - for (k = 0; k < CS_ETMV4_PRIV_MAX; k++) - metadata[j][k] = ptr[i + k]; + metadata[j] = + cs_etm__create_meta_blk(ptr, &i, + CS_ETMV4_PRIV_MAX, + CS_ETMV4_NR_TRC_PARAMS_V0); /* The traceID is our handle */ - idx = metadata[j][CS_ETMV4_TRCTRACEIDR]; - i += CS_ETMV4_PRIV_MAX; + trcidr_idx = CS_ETMV4_TRCTRACEIDR; + } else if (ptr[i] == __perf_cs_ete_magic) { + metadata[j] = cs_etm__create_meta_blk(ptr, &i, CS_ETE_PRIV_MAX, -1); + + /* ETE shares first part of metadata with ETMv4 */ + trcidr_idx = CS_ETMV4_TRCTRACEIDR; + } else { + ui__error("CS ETM Trace: Unrecognised magic number %#"PRIx64". File could be from a newer version of perf.\n", + ptr[i]); + err = -EINVAL; + goto err_free_metadata; + } + + if (!metadata[j]) { + err = -ENOMEM; + goto err_free_metadata; } /* Get an RB node for this CPU */ - inode = intlist__findnew(traceid_list, idx); + inode = intlist__findnew(traceid_list, metadata[j][trcidr_idx]); /* Something went wrong, no need to continue */ if (!inode) { @@ -2514,7 +3013,7 @@ int cs_etm__process_auxtrace_info(union perf_event *event, } /* - * Each of CS_HEADER_VERSION_0_MAX, CS_ETM_PRIV_MAX and + * Each of CS_HEADER_VERSION_MAX, CS_ETM_PRIV_MAX and * CS_ETMV4_PRIV_MAX mark how many double words are in the * global metadata, and each cpu's metadata respectively. * The following tests if the correct number of double words was @@ -2536,6 +3035,14 @@ int cs_etm__process_auxtrace_info(union perf_event *event, if (err) goto err_free_etm; + if (session->itrace_synth_opts->set) { + etm->synth_opts = *session->itrace_synth_opts; + } else { + itrace_synth_opts__set_default(&etm->synth_opts, + session->itrace_synth_opts->default_no_sample); + etm->synth_opts.callchain = false; + } + etm->session = session; etm->machine = &session->machines.host; @@ -2551,6 +3058,7 @@ int cs_etm__process_auxtrace_info(union perf_event *event, etm->auxtrace.flush_events = cs_etm__flush_events; etm->auxtrace.free_events = cs_etm__free_events; etm->auxtrace.free = cs_etm__free; + etm->auxtrace.evsel_is_auxtrace = cs_etm__evsel_is_auxtrace; session->auxtrace = &etm->auxtrace; etm->unknown_thread = thread__new(999999999, 999999999); @@ -2576,26 +3084,24 @@ int cs_etm__process_auxtrace_info(union perf_event *event, if (dump_trace) { cs_etm__print_auxtrace_info(auxtrace_info->priv, num_cpu); - return 0; - } - - if (session->itrace_synth_opts->set) { - etm->synth_opts = *session->itrace_synth_opts; - } else { - itrace_synth_opts__set_default(&etm->synth_opts, - session->itrace_synth_opts->default_no_sample); - etm->synth_opts.callchain = false; } err = cs_etm__synth_events(etm, session); if (err) goto err_delete_thread; - err = auxtrace_queues__process_index(&etm->queues, session); + err = cs_etm__queue_aux_records(session); if (err) goto err_delete_thread; etm->data_queued = etm->queues.populated; + /* + * Print warning in pipe mode, see cs_etm__process_auxtrace_event() and + * cs_etm__queue_aux_fragment() for details relating to limitations. + */ + if (!etm->data_queued) + pr_warning("CS ETM warning: Coresight decode and TRBE support requires random file access.\n" + "Continuing with best effort decoding in piped mode.\n\n"); return 0; @@ -2615,6 +3121,12 @@ err_free_traceid_list: intlist__delete(traceid_list); err_free_hdr: zfree(&hdr); - + /* + * At this point, as a minimum we have valid header. Dump the rest of + * the info section - the print routines will error out on structural + * issues. + */ + if (dump_trace) + cs_etm__print_auxtrace_info(auxtrace_info->priv, num_cpu); return err; } |