diff options
Diffstat (limited to '')
-rw-r--r-- | tools/perf/util/cs-etm-decoder/cs-etm-decoder.c | 333 |
1 files changed, 207 insertions, 126 deletions
diff --git a/tools/perf/util/cs-etm-decoder/cs-etm-decoder.c b/tools/perf/util/cs-etm-decoder/cs-etm-decoder.c index cd007cc9c283..e917985bbbe6 100644 --- a/tools/perf/util/cs-etm-decoder/cs-etm-decoder.c +++ b/tools/perf/util/cs-etm-decoder/cs-etm-decoder.c @@ -6,16 +6,17 @@ * Author: Mathieu Poirier <mathieu.poirier@linaro.org> */ +#include <asm/bug.h> +#include <linux/coresight-pmu.h> #include <linux/err.h> #include <linux/list.h> #include <linux/zalloc.h> #include <stdlib.h> #include <opencsd/c_api/opencsd_c_api.h> -#include <opencsd/etmv4/trc_pkt_types_etmv4.h> -#include <opencsd/ocsd_if_types.h> #include "cs-etm.h" #include "cs-etm-decoder.h" +#include "debug.h" #include "intlist.h" /* use raw logging */ @@ -29,26 +30,37 @@ #endif #endif +/* + * Assume a maximum of 0.1ns elapsed per instruction. This would be the + * case with a theoretical 10GHz core executing 1 instruction per cycle. + * Used to estimate the sample time for synthesized instructions because + * Coresight only emits a timestamp for a range of instructions rather + * than per instruction. + */ +const u32 INSTR_PER_NS = 10; + struct cs_etm_decoder { void *data; void (*packet_printer)(const char *msg); + bool suppress_printing; dcd_tree_handle_t dcd_tree; cs_etm_mem_cb_type mem_access; ocsd_datapath_resp_t prev_return; + const char *decoder_name; }; static u32 cs_etm_decoder__mem_access(const void *context, const ocsd_vaddr_t address, - const ocsd_mem_space_acc_t mem_space __maybe_unused, + const ocsd_mem_space_acc_t mem_space, const u8 trace_chan_id, const u32 req_size, u8 *buffer) { struct cs_etm_decoder *decoder = (struct cs_etm_decoder *) context; - return decoder->mem_access(decoder->data, trace_chan_id, - address, req_size, buffer); + return decoder->mem_access(decoder->data, trace_chan_id, address, + req_size, buffer, mem_space); } int cs_etm_decoder__add_mem_access_cb(struct cs_etm_decoder *decoder, @@ -71,9 +83,10 @@ int cs_etm_decoder__reset(struct cs_etm_decoder *decoder) ocsd_datapath_resp_t dp_ret; decoder->prev_return = OCSD_RESP_CONT; - + decoder->suppress_printing = true; dp_ret = ocsd_dt_process_data(decoder->dcd_tree, OCSD_OP_RESET, 0, 0, NULL, NULL); + decoder->suppress_printing = false; if (OCSD_DATA_RESP_IS_FATAL(dp_ret)) return -1; @@ -108,6 +121,20 @@ int cs_etm_decoder__get_packet(struct cs_etm_packet_queue *packet_queue, return 1; } +/* + * Calculate the number of nanoseconds elapsed. + * + * instr_count is updated in place with the remainder of the instructions + * which didn't make up a whole nanosecond. + */ +static u32 cs_etm_decoder__dec_instr_count_to_ns(u32 *instr_count) +{ + const u32 instr_copy = *instr_count; + + *instr_count %= INSTR_PER_NS; + return instr_copy / INSTR_PER_NS; +} + static int cs_etm_decoder__gen_etmv3_config(struct cs_etm_trace_params *params, ocsd_etmv3_cfg *config) { @@ -121,6 +148,21 @@ static int cs_etm_decoder__gen_etmv3_config(struct cs_etm_trace_params *params, return 0; } +#define TRCIDR1_TRCARCHMIN_SHIFT 4 +#define TRCIDR1_TRCARCHMIN_MASK GENMASK(7, 4) +#define TRCIDR1_TRCARCHMIN(x) (((x) & TRCIDR1_TRCARCHMIN_MASK) >> TRCIDR1_TRCARCHMIN_SHIFT) + +static enum _ocsd_arch_version cs_etm_decoder__get_etmv4_arch_ver(u32 reg_idr1) +{ + /* + * For ETMv4 if the trace minor version is 4 or more then we can assume + * the architecture is ARCH_AA64 rather than just V8. + * ARCH_V8 = V8 architecture + * ARCH_AA64 = Min v8r3 plus additional AA64 PE features + */ + return TRCIDR1_TRCARCHMIN(reg_idr1) >= 4 ? ARCH_AA64 : ARCH_V8; +} + static void cs_etm_decoder__gen_etmv4_config(struct cs_etm_trace_params *params, ocsd_etmv4_cfg *config) { @@ -135,7 +177,21 @@ static void cs_etm_decoder__gen_etmv4_config(struct cs_etm_trace_params *params, config->reg_idr11 = 0; config->reg_idr12 = 0; config->reg_idr13 = 0; - config->arch_ver = ARCH_V8; + config->arch_ver = cs_etm_decoder__get_etmv4_arch_ver(params->etmv4.reg_idr1); + config->core_prof = profile_CortexA; +} + +static void cs_etm_decoder__gen_ete_config(struct cs_etm_trace_params *params, + ocsd_ete_cfg *config) +{ + config->reg_configr = params->ete.reg_configr; + config->reg_traceidr = params->ete.reg_traceidr; + config->reg_idr0 = params->ete.reg_idr0; + config->reg_idr1 = params->ete.reg_idr1; + config->reg_idr2 = params->ete.reg_idr2; + config->reg_idr8 = params->ete.reg_idr8; + config->reg_devarch = params->ete.reg_devarch; + config->arch_ver = ARCH_AA64; config->core_prof = profile_CortexA; } @@ -143,8 +199,10 @@ static void cs_etm_decoder__print_str_cb(const void *p_context, const char *msg, const int str_len) { - if (p_context && str_len) - ((struct cs_etm_decoder *)p_context)->packet_printer(msg); + const struct cs_etm_decoder *decoder = p_context; + + if (p_context && str_len && !decoder->suppress_printing) + decoder->packet_printer(msg); } static int @@ -220,69 +278,22 @@ cs_etm_decoder__init_raw_frame_logging( } #endif -static int cs_etm_decoder__create_packet_printer(struct cs_etm_decoder *decoder, - const char *decoder_name, - void *trace_config) -{ - u8 csid; - - if (ocsd_dt_create_decoder(decoder->dcd_tree, decoder_name, - OCSD_CREATE_FLG_PACKET_PROC, - trace_config, &csid)) - return -1; - - if (ocsd_dt_set_pkt_protocol_printer(decoder->dcd_tree, csid, 0)) - return -1; - - return 0; -} - -static int -cs_etm_decoder__create_etm_packet_printer(struct cs_etm_trace_params *t_params, - struct cs_etm_decoder *decoder) -{ - const char *decoder_name; - ocsd_etmv3_cfg config_etmv3; - ocsd_etmv4_cfg trace_config_etmv4; - void *trace_config; - - switch (t_params->protocol) { - case CS_ETM_PROTO_ETMV3: - case CS_ETM_PROTO_PTM: - cs_etm_decoder__gen_etmv3_config(t_params, &config_etmv3); - decoder_name = (t_params->protocol == CS_ETM_PROTO_ETMV3) ? - OCSD_BUILTIN_DCD_ETMV3 : - OCSD_BUILTIN_DCD_PTM; - trace_config = &config_etmv3; - break; - case CS_ETM_PROTO_ETMV4i: - cs_etm_decoder__gen_etmv4_config(t_params, &trace_config_etmv4); - decoder_name = OCSD_BUILTIN_DCD_ETMV4I; - trace_config = &trace_config_etmv4; - break; - default: - return -1; - } - - return cs_etm_decoder__create_packet_printer(decoder, - decoder_name, - trace_config); -} - static ocsd_datapath_resp_t cs_etm_decoder__do_soft_timestamp(struct cs_etm_queue *etmq, struct cs_etm_packet_queue *packet_queue, const uint8_t trace_chan_id) { + u64 estimated_ts; + /* No timestamp packet has been received, nothing to do */ - if (!packet_queue->timestamp) + if (!packet_queue->next_cs_timestamp) return OCSD_RESP_CONT; - packet_queue->timestamp = packet_queue->next_timestamp; + estimated_ts = packet_queue->cs_timestamp + + cs_etm_decoder__dec_instr_count_to_ns(&packet_queue->instr_count); - /* Estimate the timestamp for the next range packet */ - packet_queue->next_timestamp += packet_queue->instr_count; - packet_queue->instr_count = 0; + /* Estimated TS can never be higher than the next real one in the trace */ + packet_queue->cs_timestamp = min(packet_queue->next_cs_timestamp, estimated_ts); /* Tell the front end which traceid_queue needs attention */ cs_etm__etmq_set_traceid_queue_timestamp(etmq, trace_chan_id); @@ -293,9 +304,12 @@ cs_etm_decoder__do_soft_timestamp(struct cs_etm_queue *etmq, static ocsd_datapath_resp_t cs_etm_decoder__do_hard_timestamp(struct cs_etm_queue *etmq, const ocsd_generic_trace_elem *elem, - const uint8_t trace_chan_id) + const uint8_t trace_chan_id, + const ocsd_trc_index_t indx) { struct cs_etm_packet_queue *packet_queue; + u64 converted_timestamp; + u64 estimated_first_ts; /* First get the packet queue for this traceID */ packet_queue = cs_etm__etmq_get_packet_queue(etmq, trace_chan_id); @@ -303,24 +317,60 @@ cs_etm_decoder__do_hard_timestamp(struct cs_etm_queue *etmq, return OCSD_RESP_FATAL_SYS_ERR; /* + * Coresight timestamps are raw timer values which need to be scaled to ns. Assume + * 0 is a bad value so don't try to convert it. + */ + converted_timestamp = elem->timestamp ? + cs_etm__convert_sample_time(etmq, elem->timestamp) : 0; + + /* * We've seen a timestamp packet before - simply record the new value. * Function do_soft_timestamp() will report the value to the front end, * hence asking the decoder to keep decoding rather than stopping. */ - if (packet_queue->timestamp) { - packet_queue->next_timestamp = elem->timestamp; + if (packet_queue->next_cs_timestamp) { + /* + * What was next is now where new ranges start from, overwriting + * any previous estimate in cs_timestamp + */ + packet_queue->cs_timestamp = packet_queue->next_cs_timestamp; + packet_queue->next_cs_timestamp = converted_timestamp; return OCSD_RESP_CONT; } - /* - * This is the first timestamp we've seen since the beginning of traces - * or a discontinuity. Since timestamps packets are generated *after* - * range packets have been generated, we need to estimate the time at - * which instructions started by substracting the number of instructions - * executed to the timestamp. - */ - packet_queue->timestamp = elem->timestamp - packet_queue->instr_count; - packet_queue->next_timestamp = elem->timestamp; + if (!converted_timestamp) { + /* + * Zero timestamps can be seen due to misconfiguration or hardware bugs. + * Warn once, and don't try to subtract instr_count as it would result in an + * underflow. + */ + packet_queue->cs_timestamp = 0; + if (!cs_etm__etmq_is_timeless(etmq)) + pr_warning_once("Zero Coresight timestamp found at Idx:%" OCSD_TRC_IDX_STR + ". Decoding may be improved by prepending 'Z' to your current --itrace arguments.\n", + indx); + + } else if (packet_queue->instr_count / INSTR_PER_NS > converted_timestamp) { + /* + * Sanity check that the elem->timestamp - packet_queue->instr_count would not + * result in an underflow. Warn and clamp at 0 if it would. + */ + packet_queue->cs_timestamp = 0; + pr_err("Timestamp calculation underflow at Idx:%" OCSD_TRC_IDX_STR "\n", indx); + } else { + /* + * This is the first timestamp we've seen since the beginning of traces + * or a discontinuity. Since timestamps packets are generated *after* + * range packets have been generated, we need to estimate the time at + * which instructions started by subtracting the number of instructions + * executed to the timestamp. Don't estimate earlier than the last used + * timestamp though. + */ + estimated_first_ts = converted_timestamp - + (packet_queue->instr_count / INSTR_PER_NS); + packet_queue->cs_timestamp = max(packet_queue->cs_timestamp, estimated_first_ts); + } + packet_queue->next_cs_timestamp = converted_timestamp; packet_queue->instr_count = 0; /* Tell the front end which traceid_queue needs attention */ @@ -333,8 +383,7 @@ cs_etm_decoder__do_hard_timestamp(struct cs_etm_queue *etmq, static void cs_etm_decoder__reset_timestamp(struct cs_etm_packet_queue *packet_queue) { - packet_queue->timestamp = 0; - packet_queue->next_timestamp = 0; + packet_queue->next_cs_timestamp = 0; packet_queue->instr_count = 0; } @@ -419,19 +468,10 @@ cs_etm_decoder__buffer_range(struct cs_etm_queue *etmq, packet->last_instr_subtype = elem->last_i_subtype; packet->last_instr_cond = elem->last_instr_cond; - switch (elem->last_i_type) { - case OCSD_INSTR_BR: - case OCSD_INSTR_BR_INDIRECT: + if (elem->last_i_type == OCSD_INSTR_BR || elem->last_i_type == OCSD_INSTR_BR_INDIRECT) packet->last_instr_taken_branch = elem->last_instr_exec; - break; - case OCSD_INSTR_ISB: - case OCSD_INSTR_DSB_DMB: - case OCSD_INSTR_WFI_WFE: - case OCSD_INSTR_OTHER: - default: + else packet->last_instr_taken_branch = false; - break; - } packet->last_instr_size = elem->last_instr_sz; @@ -500,16 +540,34 @@ cs_etm_decoder__set_tid(struct cs_etm_queue *etmq, const ocsd_generic_trace_elem *elem, const uint8_t trace_chan_id) { - pid_t tid; + pid_t tid = -1; - /* Ignore PE_CONTEXT packets that don't have a valid contextID */ - if (!elem->context.ctxt_id_valid) - return OCSD_RESP_CONT; + /* + * Process the PE_CONTEXT packets if we have a valid contextID or VMID. + * If the kernel is running at EL2, the PID is traced in CONTEXTIDR_EL2 + * as VMID, Bit ETM_OPT_CTXTID2 is set in this case. + */ + switch (cs_etm__get_pid_fmt(etmq)) { + case CS_ETM_PIDFMT_CTXTID: + if (elem->context.ctxt_id_valid) + tid = elem->context.context_id; + break; + case CS_ETM_PIDFMT_CTXTID2: + if (elem->context.vmid_valid) + tid = elem->context.vmid; + break; + case CS_ETM_PIDFMT_NONE: + default: + break; + } - tid = elem->context.context_id; - if (cs_etm__etmq_set_tid(etmq, tid, trace_chan_id)) + if (cs_etm__etmq_set_tid_el(etmq, tid, trace_chan_id, + elem->context.exception_level)) return OCSD_RESP_FATAL_SYS_ERR; + if (tid == -1) + return OCSD_RESP_CONT; + /* * A timestamp is generated after a PE_CONTEXT element so make sure * to rely on that coming one. @@ -521,7 +579,7 @@ cs_etm_decoder__set_tid(struct cs_etm_queue *etmq, static ocsd_datapath_resp_t cs_etm_decoder__gen_trace_elem_printer( const void *context, - const ocsd_trc_index_t indx __maybe_unused, + const ocsd_trc_index_t indx, const u8 trace_chan_id __maybe_unused, const ocsd_generic_trace_elem *elem) { @@ -558,7 +616,8 @@ static ocsd_datapath_resp_t cs_etm_decoder__gen_trace_elem_printer( break; case OCSD_GEN_TRC_ELEM_TIMESTAMP: resp = cs_etm_decoder__do_hard_timestamp(etmq, elem, - trace_chan_id); + trace_chan_id, + indx); break; case OCSD_GEN_TRC_ELEM_PE_CONTEXT: resp = cs_etm_decoder__set_tid(etmq, packet_queue, @@ -572,6 +631,11 @@ static ocsd_datapath_resp_t cs_etm_decoder__gen_trace_elem_printer( case OCSD_GEN_TRC_ELEM_EVENT: case OCSD_GEN_TRC_ELEM_SWTRACE: case OCSD_GEN_TRC_ELEM_CUSTOM: + case OCSD_GEN_TRC_ELEM_SYNC_MARKER: + case OCSD_GEN_TRC_ELEM_MEMTRANS: +#if (OCSD_VER_NUM >= 0x010400) + case OCSD_GEN_TRC_ELEM_INSTRUMENTATION: +#endif default: break; } @@ -579,65 +643,77 @@ static ocsd_datapath_resp_t cs_etm_decoder__gen_trace_elem_printer( return resp; } -static int cs_etm_decoder__create_etm_packet_decoder( - struct cs_etm_trace_params *t_params, - struct cs_etm_decoder *decoder) +static int +cs_etm_decoder__create_etm_decoder(struct cs_etm_decoder_params *d_params, + struct cs_etm_trace_params *t_params, + struct cs_etm_decoder *decoder) { - const char *decoder_name; ocsd_etmv3_cfg config_etmv3; ocsd_etmv4_cfg trace_config_etmv4; + ocsd_ete_cfg trace_config_ete; void *trace_config; u8 csid; switch (t_params->protocol) { case CS_ETM_PROTO_ETMV3: case CS_ETM_PROTO_PTM: + csid = (t_params->etmv3.reg_idr & CORESIGHT_TRACE_ID_VAL_MASK); cs_etm_decoder__gen_etmv3_config(t_params, &config_etmv3); - decoder_name = (t_params->protocol == CS_ETM_PROTO_ETMV3) ? + decoder->decoder_name = (t_params->protocol == CS_ETM_PROTO_ETMV3) ? OCSD_BUILTIN_DCD_ETMV3 : OCSD_BUILTIN_DCD_PTM; trace_config = &config_etmv3; break; case CS_ETM_PROTO_ETMV4i: + csid = (t_params->etmv4.reg_traceidr & CORESIGHT_TRACE_ID_VAL_MASK); cs_etm_decoder__gen_etmv4_config(t_params, &trace_config_etmv4); - decoder_name = OCSD_BUILTIN_DCD_ETMV4I; + decoder->decoder_name = OCSD_BUILTIN_DCD_ETMV4I; trace_config = &trace_config_etmv4; break; + case CS_ETM_PROTO_ETE: + csid = (t_params->ete.reg_traceidr & CORESIGHT_TRACE_ID_VAL_MASK); + cs_etm_decoder__gen_ete_config(t_params, &trace_config_ete); + decoder->decoder_name = OCSD_BUILTIN_DCD_ETE; + trace_config = &trace_config_ete; + break; default: return -1; } - if (ocsd_dt_create_decoder(decoder->dcd_tree, - decoder_name, - OCSD_CREATE_FLG_FULL_DECODER, - trace_config, &csid)) - return -1; + /* if the CPU has no trace ID associated, no decoder needed */ + if (csid == CORESIGHT_TRACE_ID_UNUSED_VAL) + return 0; - if (ocsd_dt_set_gen_elem_outfn(decoder->dcd_tree, - cs_etm_decoder__gen_trace_elem_printer, - decoder)) - return -1; + if (d_params->operation == CS_ETM_OPERATION_DECODE) { + if (ocsd_dt_create_decoder(decoder->dcd_tree, + decoder->decoder_name, + OCSD_CREATE_FLG_FULL_DECODER, + trace_config, &csid)) + return -1; - return 0; -} + if (ocsd_dt_set_gen_elem_outfn(decoder->dcd_tree, + cs_etm_decoder__gen_trace_elem_printer, + decoder)) + return -1; -static int -cs_etm_decoder__create_etm_decoder(struct cs_etm_decoder_params *d_params, - struct cs_etm_trace_params *t_params, - struct cs_etm_decoder *decoder) -{ - if (d_params->operation == CS_ETM_OPERATION_PRINT) - return cs_etm_decoder__create_etm_packet_printer(t_params, - decoder); - else if (d_params->operation == CS_ETM_OPERATION_DECODE) - return cs_etm_decoder__create_etm_packet_decoder(t_params, - decoder); + return 0; + } else if (d_params->operation == CS_ETM_OPERATION_PRINT) { + if (ocsd_dt_create_decoder(decoder->dcd_tree, decoder->decoder_name, + OCSD_CREATE_FLG_PACKET_PROC, + trace_config, &csid)) + return -1; + + if (ocsd_dt_set_pkt_protocol_printer(decoder->dcd_tree, csid, 0)) + return -1; + + return 0; + } return -1; } struct cs_etm_decoder * -cs_etm_decoder__new(int num_cpu, struct cs_etm_decoder_params *d_params, +cs_etm_decoder__new(int decoders, struct cs_etm_decoder_params *d_params, struct cs_etm_trace_params t_params[]) { struct cs_etm_decoder *decoder; @@ -682,7 +758,7 @@ cs_etm_decoder__new(int num_cpu, struct cs_etm_decoder_params *d_params, /* init raw frame logging if required */ cs_etm_decoder__init_raw_frame_logging(d_params, decoder); - for (i = 0; i < num_cpu; i++) { + for (i = 0; i < decoders; i++) { ret = cs_etm_decoder__create_etm_decoder(d_params, &t_params[i], decoder); @@ -754,3 +830,8 @@ void cs_etm_decoder__free(struct cs_etm_decoder *decoder) decoder->dcd_tree = NULL; free(decoder); } + +const char *cs_etm_decoder__get_name(struct cs_etm_decoder *decoder) +{ + return decoder->decoder_name; +} |