aboutsummaryrefslogtreecommitdiffstats
path: root/tools/perf/util
diff options
context:
space:
mode:
authorIngo Molnar <mingo@kernel.org>2019-06-22 08:27:05 +0200
committerIngo Molnar <mingo@kernel.org>2019-06-22 08:27:05 +0200
commita3664a74a0aa0b11d8d4ade04984965b77d14d44 (patch)
tree38d4309390ad01026ff59c2c9bd6441a9bde1363 /tools/perf/util
parentMerge tag 'perf-core-for-mingo-5.3-20190611' of git://git.kernel.org/pub/scm/linux/kernel/git/acme/linux into perf/core (diff)
parenttools build: Fix the zstd test in the test-all.c common case feature test (diff)
downloadlinux-dev-a3664a74a0aa0b11d8d4ade04984965b77d14d44.tar.xz
linux-dev-a3664a74a0aa0b11d8d4ade04984965b77d14d44.zip
Merge tag 'perf-core-for-mingo-5.3-20190621' of git://git.kernel.org/pub/scm/linux/kernel/git/acme/linux into perf/core
Pull perf/core improvements and fixes from Arnaldo Carvalho de Melo: perf trace: Arnaldo Carvalho de Melo: - Fix exclusion of not available syscall names from selector list. - Fixup pointer arithmetic when consuming augmented syscall args. Intel PT: Adrian Hunter: - Add support for decoding PEBS via PT packets. See: https://software.intel.com/en-us/articles/intel-sdm May 2019 version: Vol. 3B 18.5.5.2 PEBS output to IntelĀ® Processor Trace for more details about it. ARM64: John Garry: - Fix uncore PMU alias list for ARM64 Raphael Gault: - Compile tests unconditionally. cs-etm: Mathieu Poirier: - Optimize option setup for CPU-wide sessions. build: Florian Fainelli: - Don't hardcode host include path for libslang, fixing up building with it in cross build environments. Arnaldo Carvalho de Melo: - Check if gettid() is available before providing helper, fixing the build when using the latest glibc version, where a helper for gettid() is finally present. - Fix building with libslang in systems where it is located in slang/slang.h. - Fix fast path test for zstd library. Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com> Signed-off-by: Ingo Molnar <mingo@kernel.org>
Diffstat (limited to 'tools/perf/util')
-rw-r--r--tools/perf/util/evsel.c8
-rw-r--r--tools/perf/util/intel-pt-decoder/intel-pt-decoder.c114
-rw-r--r--tools/perf/util/intel-pt-decoder/intel-pt-decoder.h137
-rw-r--r--tools/perf/util/intel-pt-decoder/intel-pt-pkt-decoder.c140
-rw-r--r--tools/perf/util/intel-pt-decoder/intel-pt-pkt-decoder.h21
-rw-r--r--tools/perf/util/intel-pt.c296
-rw-r--r--tools/perf/util/pmu.c28
7 files changed, 711 insertions, 33 deletions
diff --git a/tools/perf/util/evsel.c b/tools/perf/util/evsel.c
index 0f506f10ecf0..04c4ed1573cb 100644
--- a/tools/perf/util/evsel.c
+++ b/tools/perf/util/evsel.c
@@ -589,6 +589,9 @@ const char *perf_evsel__name(struct perf_evsel *evsel)
{
char bf[128];
+ if (!evsel)
+ goto out_unknown;
+
if (evsel->name)
return evsel->name;
@@ -628,7 +631,10 @@ const char *perf_evsel__name(struct perf_evsel *evsel)
evsel->name = strdup(bf);
- return evsel->name ?: "unknown";
+ if (evsel->name)
+ return evsel->name;
+out_unknown:
+ return "unknown";
}
const char *perf_evsel__group_name(struct perf_evsel *evsel)
diff --git a/tools/perf/util/intel-pt-decoder/intel-pt-decoder.c b/tools/perf/util/intel-pt-decoder/intel-pt-decoder.c
index f001f4ec4ddf..f8b71bf2bb4c 100644
--- a/tools/perf/util/intel-pt-decoder/intel-pt-decoder.c
+++ b/tools/perf/util/intel-pt-decoder/intel-pt-decoder.c
@@ -133,6 +133,10 @@ struct intel_pt_decoder {
int mtc_shift;
struct intel_pt_stack stack;
enum intel_pt_pkt_state pkt_state;
+ enum intel_pt_pkt_ctx pkt_ctx;
+ enum intel_pt_pkt_ctx prev_pkt_ctx;
+ enum intel_pt_blk_type blk_type;
+ int blk_type_pos;
struct intel_pt_pkt packet;
struct intel_pt_pkt tnt;
int pkt_step;
@@ -166,6 +170,7 @@ struct intel_pt_decoder {
bool set_fup_mwait;
bool set_fup_pwre;
bool set_fup_exstop;
+ bool set_fup_bep;
bool sample_cyc;
unsigned int fup_tx_flags;
unsigned int tx_flags;
@@ -559,7 +564,8 @@ static int intel_pt_get_split_packet(struct intel_pt_decoder *decoder)
memcpy(buf + len, decoder->buf, n);
len += n;
- ret = intel_pt_get_packet(buf, len, &decoder->packet);
+ decoder->prev_pkt_ctx = decoder->pkt_ctx;
+ ret = intel_pt_get_packet(buf, len, &decoder->packet, &decoder->pkt_ctx);
if (ret < (int)old_len) {
decoder->next_buf = decoder->buf;
decoder->next_len = decoder->len;
@@ -594,6 +600,7 @@ static int intel_pt_pkt_lookahead(struct intel_pt_decoder *decoder,
{
struct intel_pt_pkt_info pkt_info;
const unsigned char *buf = decoder->buf;
+ enum intel_pt_pkt_ctx pkt_ctx = decoder->pkt_ctx;
size_t len = decoder->len;
int ret;
@@ -612,7 +619,8 @@ static int intel_pt_pkt_lookahead(struct intel_pt_decoder *decoder,
if (!len)
return INTEL_PT_NEED_MORE_BYTES;
- ret = intel_pt_get_packet(buf, len, &pkt_info.packet);
+ ret = intel_pt_get_packet(buf, len, &pkt_info.packet,
+ &pkt_ctx);
if (!ret)
return INTEL_PT_NEED_MORE_BYTES;
if (ret < 0)
@@ -687,6 +695,10 @@ static int intel_pt_calc_cyc_cb(struct intel_pt_pkt_info *pkt_info)
case INTEL_PT_MNT:
case INTEL_PT_PTWRITE:
case INTEL_PT_PTWRITE_IP:
+ case INTEL_PT_BBP:
+ case INTEL_PT_BIP:
+ case INTEL_PT_BEP:
+ case INTEL_PT_BEP_IP:
return 0;
case INTEL_PT_MTC:
@@ -878,8 +890,9 @@ static int intel_pt_get_next_packet(struct intel_pt_decoder *decoder)
return ret;
}
+ decoder->prev_pkt_ctx = decoder->pkt_ctx;
ret = intel_pt_get_packet(decoder->buf, decoder->len,
- &decoder->packet);
+ &decoder->packet, &decoder->pkt_ctx);
if (ret == INTEL_PT_NEED_MORE_BYTES && BITS_PER_LONG == 32 &&
decoder->len < INTEL_PT_PKT_MAX_SZ && !decoder->next_buf) {
ret = intel_pt_get_split_packet(decoder);
@@ -1117,6 +1130,14 @@ static bool intel_pt_fup_event(struct intel_pt_decoder *decoder)
decoder->state.to_ip = 0;
ret = true;
}
+ if (decoder->set_fup_bep) {
+ decoder->set_fup_bep = false;
+ decoder->state.type |= INTEL_PT_BLK_ITEMS;
+ decoder->state.type &= ~INTEL_PT_BRANCH;
+ decoder->state.from_ip = decoder->ip;
+ decoder->state.to_ip = 0;
+ ret = true;
+ }
return ret;
}
@@ -1602,6 +1623,46 @@ static void intel_pt_calc_cyc_timestamp(struct intel_pt_decoder *decoder)
intel_pt_log_to("Setting timestamp", decoder->timestamp);
}
+static void intel_pt_bbp(struct intel_pt_decoder *decoder)
+{
+ if (decoder->prev_pkt_ctx == INTEL_PT_NO_CTX) {
+ memset(decoder->state.items.mask, 0, sizeof(decoder->state.items.mask));
+ decoder->state.items.is_32_bit = false;
+ }
+ decoder->blk_type = decoder->packet.payload;
+ decoder->blk_type_pos = intel_pt_blk_type_pos(decoder->blk_type);
+ if (decoder->blk_type == INTEL_PT_GP_REGS)
+ decoder->state.items.is_32_bit = decoder->packet.count;
+ if (decoder->blk_type_pos < 0) {
+ intel_pt_log("WARNING: Unknown block type %u\n",
+ decoder->blk_type);
+ } else if (decoder->state.items.mask[decoder->blk_type_pos]) {
+ intel_pt_log("WARNING: Duplicate block type %u\n",
+ decoder->blk_type);
+ }
+}
+
+static void intel_pt_bip(struct intel_pt_decoder *decoder)
+{
+ uint32_t id = decoder->packet.count;
+ uint32_t bit = 1 << id;
+ int pos = decoder->blk_type_pos;
+
+ if (pos < 0 || id >= INTEL_PT_BLK_ITEM_ID_CNT) {
+ intel_pt_log("WARNING: Unknown block item %u type %d\n",
+ id, decoder->blk_type);
+ return;
+ }
+
+ if (decoder->state.items.mask[pos] & bit) {
+ intel_pt_log("WARNING: Duplicate block item %u type %d\n",
+ id, decoder->blk_type);
+ }
+
+ decoder->state.items.mask[pos] |= bit;
+ decoder->state.items.val[pos][id] = decoder->packet.payload;
+}
+
/* Walk PSB+ packets when already in sync. */
static int intel_pt_walk_psbend(struct intel_pt_decoder *decoder)
{
@@ -1633,6 +1694,10 @@ static int intel_pt_walk_psbend(struct intel_pt_decoder *decoder)
case INTEL_PT_MWAIT:
case INTEL_PT_PWRE:
case INTEL_PT_PWRX:
+ case INTEL_PT_BBP:
+ case INTEL_PT_BIP:
+ case INTEL_PT_BEP:
+ case INTEL_PT_BEP_IP:
decoder->have_tma = false;
intel_pt_log("ERROR: Unexpected packet\n");
err = -EAGAIN;
@@ -1726,6 +1791,10 @@ static int intel_pt_walk_fup_tip(struct intel_pt_decoder *decoder)
case INTEL_PT_MWAIT:
case INTEL_PT_PWRE:
case INTEL_PT_PWRX:
+ case INTEL_PT_BBP:
+ case INTEL_PT_BIP:
+ case INTEL_PT_BEP:
+ case INTEL_PT_BEP_IP:
intel_pt_log("ERROR: Missing TIP after FUP\n");
decoder->pkt_state = INTEL_PT_STATE_ERR3;
decoder->pkt_step = 0;
@@ -2047,6 +2116,33 @@ next:
decoder->state.pwrx_payload = decoder->packet.payload;
return 0;
+ case INTEL_PT_BBP:
+ intel_pt_bbp(decoder);
+ break;
+
+ case INTEL_PT_BIP:
+ intel_pt_bip(decoder);
+ break;
+
+ case INTEL_PT_BEP:
+ decoder->state.type = INTEL_PT_BLK_ITEMS;
+ decoder->state.from_ip = decoder->ip;
+ decoder->state.to_ip = 0;
+ return 0;
+
+ case INTEL_PT_BEP_IP:
+ err = intel_pt_get_next_packet(decoder);
+ if (err)
+ return err;
+ if (decoder->packet.type == INTEL_PT_FUP) {
+ decoder->set_fup_bep = true;
+ no_tip = true;
+ } else {
+ intel_pt_log_at("ERROR: Missing FUP after BEP",
+ decoder->pos);
+ }
+ goto next;
+
default:
return intel_pt_bug(decoder);
}
@@ -2085,6 +2181,10 @@ static int intel_pt_walk_psb(struct intel_pt_decoder *decoder)
case INTEL_PT_MWAIT:
case INTEL_PT_PWRE:
case INTEL_PT_PWRX:
+ case INTEL_PT_BBP:
+ case INTEL_PT_BIP:
+ case INTEL_PT_BEP:
+ case INTEL_PT_BEP_IP:
intel_pt_log("ERROR: Unexpected packet\n");
err = -ENOENT;
goto out;
@@ -2291,6 +2391,10 @@ static int intel_pt_walk_to_ip(struct intel_pt_decoder *decoder)
case INTEL_PT_MWAIT:
case INTEL_PT_PWRE:
case INTEL_PT_PWRX:
+ case INTEL_PT_BBP:
+ case INTEL_PT_BIP:
+ case INTEL_PT_BEP:
+ case INTEL_PT_BEP_IP:
default:
break;
}
@@ -2306,6 +2410,7 @@ static int intel_pt_sync_ip(struct intel_pt_decoder *decoder)
decoder->set_fup_mwait = false;
decoder->set_fup_pwre = false;
decoder->set_fup_exstop = false;
+ decoder->set_fup_bep = false;
if (!decoder->branch_enable) {
decoder->pkt_state = INTEL_PT_STATE_IN_SYNC;
@@ -2641,11 +2746,12 @@ static unsigned char *intel_pt_last_psb(unsigned char *buf, size_t len)
static bool intel_pt_next_tsc(unsigned char *buf, size_t len, uint64_t *tsc,
size_t *rem)
{
+ enum intel_pt_pkt_ctx ctx = INTEL_PT_NO_CTX;
struct intel_pt_pkt packet;
int ret;
while (len) {
- ret = intel_pt_get_packet(buf, len, &packet);
+ ret = intel_pt_get_packet(buf, len, &packet, &ctx);
if (ret <= 0)
return false;
if (packet.type == INTEL_PT_TSC) {
diff --git a/tools/perf/util/intel-pt-decoder/intel-pt-decoder.h b/tools/perf/util/intel-pt-decoder/intel-pt-decoder.h
index 754efa8b501f..9957f2ccdca8 100644
--- a/tools/perf/util/intel-pt-decoder/intel-pt-decoder.h
+++ b/tools/perf/util/intel-pt-decoder/intel-pt-decoder.h
@@ -30,6 +30,7 @@ enum intel_pt_sample_type {
INTEL_PT_CBR_CHG = 1 << 8,
INTEL_PT_TRACE_BEGIN = 1 << 9,
INTEL_PT_TRACE_END = 1 << 10,
+ INTEL_PT_BLK_ITEMS = 1 << 11,
};
enum intel_pt_period_type {
@@ -61,6 +62,141 @@ enum intel_pt_param_flags {
INTEL_PT_FUP_WITH_NLIP = 1 << 0,
};
+enum intel_pt_blk_type {
+ INTEL_PT_GP_REGS = 1,
+ INTEL_PT_PEBS_BASIC = 4,
+ INTEL_PT_PEBS_MEM = 5,
+ INTEL_PT_LBR_0 = 8,
+ INTEL_PT_LBR_1 = 9,
+ INTEL_PT_LBR_2 = 10,
+ INTEL_PT_XMM = 16,
+ INTEL_PT_BLK_TYPE_MAX
+};
+
+/*
+ * The block type numbers are not sequential but here they are given sequential
+ * positions to avoid wasting space for array placement.
+ */
+enum intel_pt_blk_type_pos {
+ INTEL_PT_GP_REGS_POS,
+ INTEL_PT_PEBS_BASIC_POS,
+ INTEL_PT_PEBS_MEM_POS,
+ INTEL_PT_LBR_0_POS,
+ INTEL_PT_LBR_1_POS,
+ INTEL_PT_LBR_2_POS,
+ INTEL_PT_XMM_POS,
+ INTEL_PT_BLK_TYPE_CNT
+};
+
+/* Get the array position for a block type */
+static inline int intel_pt_blk_type_pos(enum intel_pt_blk_type blk_type)
+{
+#define BLK_TYPE(bt) [INTEL_PT_##bt] = INTEL_PT_##bt##_POS + 1
+ const int map[INTEL_PT_BLK_TYPE_MAX] = {
+ BLK_TYPE(GP_REGS),
+ BLK_TYPE(PEBS_BASIC),
+ BLK_TYPE(PEBS_MEM),
+ BLK_TYPE(LBR_0),
+ BLK_TYPE(LBR_1),
+ BLK_TYPE(LBR_2),
+ BLK_TYPE(XMM),
+ };
+#undef BLK_TYPE
+
+ return blk_type < INTEL_PT_BLK_TYPE_MAX ? map[blk_type] - 1 : -1;
+}
+
+#define INTEL_PT_BLK_ITEM_ID_CNT 32
+
+/*
+ * Use unions so that the block items can be accessed by name or by array index.
+ * There is an array of 32-bit masks for each block type, which indicate which
+ * values are present. Then arrays of 32 64-bit values for each block type.
+ */
+struct intel_pt_blk_items {
+ union {
+ uint32_t mask[INTEL_PT_BLK_TYPE_CNT];
+ struct {
+ uint32_t has_rflags:1;
+ uint32_t has_rip:1;
+ uint32_t has_rax:1;
+ uint32_t has_rcx:1;
+ uint32_t has_rdx:1;
+ uint32_t has_rbx:1;
+ uint32_t has_rsp:1;
+ uint32_t has_rbp:1;
+ uint32_t has_rsi:1;
+ uint32_t has_rdi:1;
+ uint32_t has_r8:1;
+ uint32_t has_r9:1;
+ uint32_t has_r10:1;
+ uint32_t has_r11:1;
+ uint32_t has_r12:1;
+ uint32_t has_r13:1;
+ uint32_t has_r14:1;
+ uint32_t has_r15:1;
+ uint32_t has_unused_0:14;
+ uint32_t has_ip:1;
+ uint32_t has_applicable_counters:1;
+ uint32_t has_timestamp:1;
+ uint32_t has_unused_1:29;
+ uint32_t has_mem_access_address:1;
+ uint32_t has_mem_aux_info:1;
+ uint32_t has_mem_access_latency:1;
+ uint32_t has_tsx_aux_info:1;
+ uint32_t has_unused_2:28;
+ uint32_t has_lbr_0;
+ uint32_t has_lbr_1;
+ uint32_t has_lbr_2;
+ uint32_t has_xmm;
+ };
+ };
+ union {
+ uint64_t val[INTEL_PT_BLK_TYPE_CNT][INTEL_PT_BLK_ITEM_ID_CNT];
+ struct {
+ struct {
+ uint64_t rflags;
+ uint64_t rip;
+ uint64_t rax;
+ uint64_t rcx;
+ uint64_t rdx;
+ uint64_t rbx;
+ uint64_t rsp;
+ uint64_t rbp;
+ uint64_t rsi;
+ uint64_t rdi;
+ uint64_t r8;
+ uint64_t r9;
+ uint64_t r10;
+ uint64_t r11;
+ uint64_t r12;
+ uint64_t r13;
+ uint64_t r14;
+ uint64_t r15;
+ uint64_t unused_0[INTEL_PT_BLK_ITEM_ID_CNT - 18];
+ };
+ struct {
+ uint64_t ip;
+ uint64_t applicable_counters;
+ uint64_t timestamp;
+ uint64_t unused_1[INTEL_PT_BLK_ITEM_ID_CNT - 3];
+ };
+ struct {
+ uint64_t mem_access_address;
+ uint64_t mem_aux_info;
+ uint64_t mem_access_latency;
+ uint64_t tsx_aux_info;
+ uint64_t unused_2[INTEL_PT_BLK_ITEM_ID_CNT - 4];
+ };
+ uint64_t lbr_0[INTEL_PT_BLK_ITEM_ID_CNT];
+ uint64_t lbr_1[INTEL_PT_BLK_ITEM_ID_CNT];
+ uint64_t lbr_2[INTEL_PT_BLK_ITEM_ID_CNT];
+ uint64_t xmm[INTEL_PT_BLK_ITEM_ID_CNT];
+ };
+ };
+ bool is_32_bit;
+};
+
struct intel_pt_state {
enum intel_pt_sample_type type;
int err;
@@ -81,6 +217,7 @@ struct intel_pt_state {
enum intel_pt_insn_op insn_op;
int insn_len;
char insn[INTEL_PT_INSN_BUF_SZ];
+ struct intel_pt_blk_items items;
};
struct intel_pt_insn;
diff --git a/tools/perf/util/intel-pt-decoder/intel-pt-pkt-decoder.c b/tools/perf/util/intel-pt-decoder/intel-pt-pkt-decoder.c
index 605fce537d80..0ccf10a0bf44 100644
--- a/tools/perf/util/intel-pt-decoder/intel-pt-pkt-decoder.c
+++ b/tools/perf/util/intel-pt-decoder/intel-pt-pkt-decoder.c
@@ -62,6 +62,10 @@ static const char * const packet_name[] = {
[INTEL_PT_MWAIT] = "MWAIT",
[INTEL_PT_PWRE] = "PWRE",
[INTEL_PT_PWRX] = "PWRX",
+ [INTEL_PT_BBP] = "BBP",
+ [INTEL_PT_BIP] = "BIP",
+ [INTEL_PT_BEP] = "BEP",
+ [INTEL_PT_BEP_IP] = "BEP",
};
const char *intel_pt_pkt_name(enum intel_pt_pkt_type type)
@@ -280,6 +284,55 @@ static int intel_pt_get_pwrx(const unsigned char *buf, size_t len,
return 7;
}
+static int intel_pt_get_bbp(const unsigned char *buf, size_t len,
+ struct intel_pt_pkt *packet)
+{
+ if (len < 3)
+ return INTEL_PT_NEED_MORE_BYTES;
+ packet->type = INTEL_PT_BBP;
+ packet->count = buf[2] >> 7;
+ packet->payload = buf[2] & 0x1f;
+ return 3;
+}
+
+static int intel_pt_get_bip_4(const unsigned char *buf, size_t len,
+ struct intel_pt_pkt *packet)
+{
+ if (len < 5)
+ return INTEL_PT_NEED_MORE_BYTES;
+ packet->type = INTEL_PT_BIP;
+ packet->count = buf[0] >> 3;
+ memcpy_le64(&packet->payload, buf + 1, 4);
+ return 5;
+}
+
+static int intel_pt_get_bip_8(const unsigned char *buf, size_t len,
+ struct intel_pt_pkt *packet)
+{
+ if (len < 9)
+ return INTEL_PT_NEED_MORE_BYTES;
+ packet->type = INTEL_PT_BIP;
+ packet->count = buf[0] >> 3;
+ memcpy_le64(&packet->payload, buf + 1, 8);
+ return 9;
+}
+
+static int intel_pt_get_bep(size_t len, struct intel_pt_pkt *packet)
+{
+ if (len < 2)
+ return INTEL_PT_NEED_MORE_BYTES;
+ packet->type = INTEL_PT_BEP;
+ return 2;
+}
+
+static int intel_pt_get_bep_ip(size_t len, struct intel_pt_pkt *packet)
+{
+ if (len < 2)
+ return INTEL_PT_NEED_MORE_BYTES;
+ packet->type = INTEL_PT_BEP_IP;
+ return 2;
+}
+
static int intel_pt_get_ext(const unsigned char *buf, size_t len,
struct intel_pt_pkt *packet)
{
@@ -320,6 +373,12 @@ static int intel_pt_get_ext(const unsigned char *buf, size_t len,
return intel_pt_get_pwre(buf, len, packet);
case 0xA2: /* PWRX */
return intel_pt_get_pwrx(buf, len, packet);
+ case 0x63: /* BBP */
+ return intel_pt_get_bbp(buf, len, packet);
+ case 0x33: /* BEP no IP */
+ return intel_pt_get_bep(len, packet);
+ case 0xb3: /* BEP with IP */
+ return intel_pt_get_bep_ip(len, packet);
default:
return INTEL_PT_BAD_PACKET;
}
@@ -468,7 +527,8 @@ static int intel_pt_get_mtc(const unsigned char *buf, size_t len,
}
static int intel_pt_do_get_packet(const unsigned char *buf, size_t len,
- struct intel_pt_pkt *packet)
+ struct intel_pt_pkt *packet,
+ enum intel_pt_pkt_ctx ctx)
{
unsigned int byte;
@@ -478,6 +538,22 @@ static int intel_pt_do_get_packet(const unsigned char *buf, size_t len,
return INTEL_PT_NEED_MORE_BYTES;
byte = buf[0];
+
+ switch (ctx) {
+ case INTEL_PT_NO_CTX:
+ break;
+ case INTEL_PT_BLK_4_CTX:
+ if ((byte & 0x7) == 4)
+ return intel_pt_get_bip_4(buf, len, packet);
+ break;
+ case INTEL_PT_BLK_8_CTX:
+ if ((byte & 0x7) == 4)
+ return intel_pt_get_bip_8(buf, len, packet);
+ break;
+ default:
+ break;
+ };
+
if (!(byte & BIT(0))) {
if (byte == 0)
return intel_pt_get_pad(packet);
@@ -516,15 +592,65 @@ static int intel_pt_do_get_packet(const unsigned char *buf, size_t len,
}
}
+void intel_pt_upd_pkt_ctx(const struct intel_pt_pkt *packet,
+ enum intel_pt_pkt_ctx *ctx)
+{
+ switch (packet->type) {
+ case INTEL_PT_BAD:
+ case INTEL_PT_PAD:
+ case INTEL_PT_TSC:
+ case INTEL_PT_TMA:
+ case INTEL_PT_MTC:
+ case INTEL_PT_FUP:
+ case INTEL_PT_CYC:
+ case INTEL_PT_CBR:
+ case INTEL_PT_MNT:
+ case INTEL_PT_EXSTOP:
+ case INTEL_PT_EXSTOP_IP:
+ case INTEL_PT_PWRE:
+ case INTEL_PT_PWRX:
+ case INTEL_PT_BIP:
+ break;
+ case INTEL_PT_TNT:
+ case INTEL_PT_TIP:
+ case INTEL_PT_TIP_PGD:
+ case INTEL_PT_TIP_PGE:
+ case INTEL_PT_MODE_EXEC:
+ case INTEL_PT_MODE_TSX:
+ case INTEL_PT_PIP:
+ case INTEL_PT_OVF:
+ case INTEL_PT_VMCS:
+ case INTEL_PT_TRACESTOP:
+ case INTEL_PT_PSB:
+ case INTEL_PT_PSBEND:
+ case INTEL_PT_PTWRITE:
+ case INTEL_PT_PTWRITE_IP:
+ case INTEL_PT_MWAIT:
+ case INTEL_PT_BEP:
+ case INTEL_PT_BEP_IP:
+ *ctx = INTEL_PT_NO_CTX;
+ break;
+ case INTEL_PT_BBP:
+ if (packet->count)
+ *ctx = INTEL_PT_BLK_4_CTX;
+ else
+ *ctx = INTEL_PT_BLK_8_CTX;
+ break;
+ default:
+ break;
+ }
+}
+
int intel_pt_get_packet(const unsigned char *buf, size_t len,
- struct intel_pt_pkt *packet)
+ struct intel_pt_pkt *packet, enum intel_pt_pkt_ctx *ctx)
{
int ret;
- ret = intel_pt_do_get_packet(buf, len, packet);
+ ret = intel_pt_do_get_packet(buf, len, packet, *ctx);
if (ret > 0) {
while (ret < 8 && len > (size_t)ret && !buf[ret])
ret += 1;
+ intel_pt_upd_pkt_ctx(packet, ctx);
}
return ret;
}
@@ -602,8 +728,10 @@ int intel_pt_pkt_desc(const struct intel_pt_pkt *packet, char *buf,
return snprintf(buf, buf_len, "%s 0x%llx IP:0", name, payload);
case INTEL_PT_PTWRITE_IP:
return snprintf(buf, buf_len, "%s 0x%llx IP:1", name, payload);
+ case INTEL_PT_BEP:
case INTEL_PT_EXSTOP:
return snprintf(buf, buf_len, "%s IP:0", name);
+ case INTEL_PT_BEP_IP:
case INTEL_PT_EXSTOP_IP:
return snprintf(buf, buf_len, "%s IP:1", name);
case INTEL_PT_MWAIT:
@@ -621,6 +749,12 @@ int intel_pt_pkt_desc(const struct intel_pt_pkt *packet, char *buf,
(unsigned int)((payload >> 4) & 0xf),
(unsigned int)(payload & 0xf),
(unsigned int)((payload >> 8) & 0xf));
+ case INTEL_PT_BBP:
+ return snprintf(buf, buf_len, "%s SZ %s-byte Type 0x%llx",
+ name, packet->count ? "4" : "8", payload);
+ case INTEL_PT_BIP:
+ return snprintf(buf, buf_len, "%s ID 0x%02x Value 0x%llx",
+ name, packet->count, payload);
default:
break;
}
diff --git a/tools/perf/util/intel-pt-decoder/intel-pt-pkt-decoder.h b/tools/perf/util/intel-pt-decoder/intel-pt-pkt-decoder.h
index a7aefaa08588..17ca9b56d72f 100644
--- a/tools/perf/util/intel-pt-decoder/intel-pt-pkt-decoder.h
+++ b/tools/perf/util/intel-pt-decoder/intel-pt-pkt-decoder.h
@@ -50,6 +50,10 @@ enum intel_pt_pkt_type {
INTEL_PT_MWAIT,
INTEL_PT_PWRE,
INTEL_PT_PWRX,
+ INTEL_PT_BBP,
+ INTEL_PT_BIP,
+ INTEL_PT_BEP,
+ INTEL_PT_BEP_IP,
};
struct intel_pt_pkt {
@@ -58,10 +62,25 @@ struct intel_pt_pkt {
uint64_t payload;
};
+/*
+ * Decoding of BIP packets conflicts with single-byte TNT packets. Since BIP
+ * packets only occur in the context of a block (i.e. between BBP and BEP), that
+ * context must be recorded and passed to the packet decoder.
+ */
+enum intel_pt_pkt_ctx {
+ INTEL_PT_NO_CTX, /* BIP packets are invalid */
+ INTEL_PT_BLK_4_CTX, /* 4-byte BIP packets */
+ INTEL_PT_BLK_8_CTX, /* 8-byte BIP packets */
+};
+
const char *intel_pt_pkt_name(enum intel_pt_pkt_type);
int intel_pt_get_packet(const unsigned char *buf, size_t len,
- struct intel_pt_pkt *packet);
+ struct intel_pt_pkt *packet,
+ enum intel_pt_pkt_ctx *ctx);
+
+void intel_pt_upd_pkt_ctx(const struct intel_pt_pkt *packet,
+ enum intel_pt_pkt_ctx *ctx);
int intel_pt_pkt_desc(const struct intel_pt_pkt *packet, char *buf, size_t len);
diff --git a/tools/perf/util/intel-pt.c b/tools/perf/util/intel-pt.c
index 8ed51f4e9e30..550db6e77968 100644
--- a/tools/perf/util/intel-pt.c
+++ b/tools/perf/util/intel-pt.c
@@ -35,6 +35,8 @@
#include "config.h"
#include "time-utils.h"
+#include "../arch/x86/include/uapi/asm/perf_regs.h"
+
#include "intel-pt-decoder/intel-pt-log.h"
#include "intel-pt-decoder/intel-pt-decoder.h"
#include "intel-pt-decoder/intel-pt-insn-decoder.h"
@@ -101,6 +103,9 @@ struct intel_pt {
u64 pwrx_id;
u64 cbr_id;
+ bool sample_pebs;
+ struct perf_evsel *pebs_evsel;
+
u64 tsc_bit;
u64 mtc_bit;
u64 mtc_freq_bits;
@@ -177,13 +182,14 @@ static void intel_pt_dump(struct intel_pt *pt __maybe_unused,
int ret, pkt_len, i;
char desc[INTEL_PT_PKT_DESC_MAX];
const char *color = PERF_COLOR_BLUE;
+ enum intel_pt_pkt_ctx ctx = INTEL_PT_NO_CTX;
color_fprintf(stdout, color,
". ... Intel Processor Trace data: size %zu bytes\n",
len);
while (len) {
- ret = intel_pt_get_packet(buf, len, &packet);
+ ret = intel_pt_get_packet(buf, len, &packet, &ctx);
if (ret > 0)
pkt_len = ret;
else
@@ -1178,28 +1184,37 @@ static inline bool intel_pt_skip_event(struct intel_pt *pt)
pt->num_events++ < pt->synth_opts.initial_skip;
}
+static void intel_pt_prep_a_sample(struct intel_pt_queue *ptq,
+ union perf_event *event,
+ struct perf_sample *sample)
+{
+ event->sample.header.type = PERF_RECORD_SAMPLE;
+ event->sample.header.size = sizeof(struct perf_event_header);
+
+ sample->pid = ptq->pid;
+ sample->tid = ptq->tid;
+ sample->cpu = ptq->cpu;
+ sample->insn_len = ptq->insn_len;
+ memcpy(sample->insn, ptq->insn, INTEL_PT_INSN_BUF_SZ);
+}
+
static void intel_pt_prep_b_sample(struct intel_pt *pt,
struct intel_pt_queue *ptq,
union perf_event *event,
struct perf_sample *sample)
{
+ intel_pt_prep_a_sample(ptq, event, sample);
+
if (!pt->timeless_decoding)
sample->time = tsc_to_perf_time(ptq->timestamp, &pt->tc);
sample->ip = ptq->state->from_ip;
sample->cpumode = intel_pt_cpumode(pt, sample->ip);
- sample->pid = ptq->pid;
- sample->tid = ptq->tid;
sample->addr = ptq->state->to_ip;
sample->period = 1;
- sample->cpu = ptq->cpu;
sample->flags = ptq->flags;
- sample->insn_len = ptq->insn_len;
- memcpy(sample->insn, ptq->insn, INTEL_PT_INSN_BUF_SZ);
- event->sample.header.type = PERF_RECORD_SAMPLE;
event->sample.header.misc = sample->cpumode;
- event->sample.header.size = sizeof(struct perf_event_header);
}
static int intel_pt_inject_event(union perf_event *event,
@@ -1534,6 +1549,261 @@ static int intel_pt_synth_pwrx_sample(struct intel_pt_queue *ptq)
pt->pwr_events_sample_type);
}
+/*
+ * PEBS gp_regs array indexes plus 1 so that 0 means not present. Refer
+ * intel_pt_add_gp_regs().
+ */
+static const int pebs_gp_regs[] = {
+ [PERF_REG_X86_FLAGS] = 1,
+ [PERF_REG_X86_IP] = 2,
+ [PERF_REG_X86_AX] = 3,
+ [PERF_REG_X86_CX] = 4,
+ [PERF_REG_X86_DX] = 5,
+ [PERF_REG_X86_BX] = 6,
+ [PERF_REG_X86_SP] = 7,
+ [PERF_REG_X86_BP] = 8,
+ [PERF_REG_X86_SI] = 9,
+ [PERF_REG_X86_DI] = 10,
+ [PERF_REG_X86_R8] = 11,
+ [PERF_REG_X86_R9] = 12,
+ [PERF_REG_X86_R10] = 13,
+ [PERF_REG_X86_R11] = 14,
+ [PERF_REG_X86_R12] = 15,
+ [PERF_REG_X86_R13] = 16,
+ [PERF_REG_X86_R14] = 17,
+ [PERF_REG_X86_R15] = 18,
+};
+
+static u64 *intel_pt_add_gp_regs(struct regs_dump *intr_regs, u64 *pos,
+ const struct intel_pt_blk_items *items,
+ u64 regs_mask)
+{
+ const u64 *gp_regs = items->val[INTEL_PT_GP_REGS_POS];
+ u32 mask = items->mask[INTEL_PT_GP_REGS_POS];
+ u32 bit;
+ int i;
+
+ for (i = 0, bit = 1; i < PERF_REG_X86_64_MAX; i++, bit <<= 1) {
+ /* Get the PEBS gp_regs array index */
+ int n = pebs_gp_regs[i] - 1;
+
+ if (n < 0)
+ continue;
+ /*
+ * Add only registers that were requested (i.e. 'regs_mask') and
+ * that were provided (i.e. 'mask'), and update the resulting
+ * mask (i.e. 'intr_regs->mask') accordingly.
+ */
+ if (mask & 1 << n && regs_mask & bit) {
+ intr_regs->mask |= bit;
+ *pos++ = gp_regs[n];
+ }
+ }
+
+ return pos;
+}
+
+#ifndef PERF_REG_X86_XMM0
+#define PERF_REG_X86_XMM0 32
+#endif
+
+static void intel_pt_add_xmm(struct regs_dump *intr_regs, u64 *pos,
+ const struct intel_pt_blk_items *items,
+ u64 regs_mask)
+{
+ u32 mask = items->has_xmm & (regs_mask >> PERF_REG_X86_XMM0);
+ const u64 *xmm = items->xmm;
+
+ /*
+ * If there are any XMM registers, then there should be all of them.
+ * Nevertheless, follow the logic to add only registers that were
+ * requested (i.e. 'regs_mask') and that were provided (i.e. 'mask'),
+ * and update the resulting mask (i.e. 'intr_regs->mask') accordingly.
+ */
+ intr_regs->mask |= (u64)mask << PERF_REG_X86_XMM0;
+
+ for (; mask; mask >>= 1, xmm++) {
+ if (mask & 1)
+ *pos++ = *xmm;
+ }
+}
+
+#define LBR_INFO_MISPRED (1ULL << 63)
+#define LBR_INFO_IN_TX (1ULL << 62)
+#define LBR_INFO_ABORT (1ULL << 61)
+#define LBR_INFO_CYCLES 0xffff
+
+/* Refer kernel's intel_pmu_store_pebs_lbrs() */
+static u64 intel_pt_lbr_flags(u64 info)
+{
+ union {
+ struct branch_flags flags;
+ u64 result;
+ } u = {
+ .flags = {
+ .mispred = !!(info & LBR_INFO_MISPRED),
+ .predicted = !(info & LBR_INFO_MISPRED),
+ .in_tx = !!(info & LBR_INFO_IN_TX),
+ .abort = !!(info & LBR_INFO_ABORT),
+ .cycles = info & LBR_INFO_CYCLES,
+ }
+ };
+
+ return u.result;
+}
+
+static void intel_pt_add_lbrs(struct branch_stack *br_stack,
+ const struct intel_pt_blk_items *items)
+{
+ u64 *to;
+ int i;
+
+ br_stack->nr = 0;
+
+ to = &br_stack->entries[0].from;
+
+ for (i = INTEL_PT_LBR_0_POS; i <= INTEL_PT_LBR_2_POS; i++) {
+ u32 mask = items->mask[i];
+ const u64 *from = items->val[i];
+
+ for (; mask; mask >>= 3, from += 3) {
+ if ((mask & 7) == 7) {
+ *to++ = from[0];
+ *to++ = from[1];
+ *to++ = intel_pt_lbr_flags(from[2]);
+ br_stack->nr += 1;
+ }
+ }
+ }
+}
+
+/* INTEL_PT_LBR_0, INTEL_PT_LBR_1 and INTEL_PT_LBR_2 */
+#define LBRS_MAX (INTEL_PT_BLK_ITEM_ID_CNT * 3)
+
+static int intel_pt_synth_pebs_sample(struct intel_pt_queue *ptq)
+{
+ const struct intel_pt_blk_items *items = &ptq->state->items;
+ struct perf_sample sample = { .ip = 0, };
+ union perf_event *event = ptq->event_buf;
+ struct intel_pt *pt = ptq->pt;
+ struct perf_evsel *evsel = pt->pebs_evsel;
+ u64 sample_type = evsel->attr.sample_type;
+ u64 id = evsel->id[0];
+ u8 cpumode;
+
+ if (intel_pt_skip_event(pt))
+ return 0;
+
+ intel_pt_prep_a_sample(ptq, event, &sample);
+
+ sample.id = id;
+ sample.stream_id = id;
+
+ if (!evsel->attr.freq)
+ sample.period = evsel->attr.sample_period;
+
+ /* No support for non-zero CS base */
+ if (items->has_ip)
+ sample.ip = items->ip;
+ else if (items->has_rip)
+ sample.ip = items->rip;
+ else
+ sample.ip = ptq->state->from_ip;
+
+ /* No support for guest mode at this time */
+ cpumode = sample.ip < ptq->pt->kernel_start ?
+ PERF_RECORD_MISC_USER :
+ PERF_RECORD_MISC_KERNEL;
+
+ event->sample.header.misc = cpumode | PERF_RECORD_MISC_EXACT_IP;
+
+ sample.cpumode = cpumode;
+
+ if (sample_type & PERF_SAMPLE_TIME) {
+ u64 timestamp = 0;
+
+ if (items->has_timestamp)
+ timestamp = items->timestamp;
+ else if (!pt->timeless_decoding)
+ timestamp = ptq->timestamp;
+ if (timestamp)
+ sample.time = tsc_to_perf_time(timestamp, &pt->tc);
+ }
+
+ if (sample_type & PERF_SAMPLE_CALLCHAIN &&
+ pt->synth_opts.callchain) {
+ thread_stack__sample(ptq->thread, ptq->cpu, ptq->chain,
+ pt->synth_opts.callchain_sz, sample.ip,
+ pt->kernel_start);
+ sample.callchain = ptq->chain;
+ }
+
+ if (sample_type & PERF_SAMPLE_REGS_INTR &&
+ items->mask[INTEL_PT_GP_REGS_POS]) {
+ u64 regs[sizeof(sample.intr_regs.mask)];
+ u64 regs_mask = evsel->attr.sample_regs_intr;
+ u64 *pos;
+
+ sample.intr_regs.abi = items->is_32_bit ?
+ PERF_SAMPLE_REGS_ABI_32 :
+ PERF_SAMPLE_REGS_ABI_64;
+ sample.intr_regs.regs = regs;
+
+ pos = intel_pt_add_gp_regs(&sample.intr_regs, regs, items, regs_mask);
+
+ intel_pt_add_xmm(&sample.intr_regs, pos, items, regs_mask);
+ }
+
+ if (sample_type & PERF_SAMPLE_BRANCH_STACK) {
+ struct {
+ struct branch_stack br_stack;
+ struct branch_entry entries[LBRS_MAX];
+ } br;
+
+ if (items->mask[INTEL_PT_LBR_0_POS] ||
+ items->mask[INTEL_PT_LBR_1_POS] ||
+ items->mask[INTEL_PT_LBR_2_POS]) {
+ intel_pt_add_lbrs(&br.br_stack, items);
+ sample.branch_stack = &br.br_stack;
+ } else if (pt->synth_opts.last_branch) {
+ intel_pt_copy_last_branch_rb(ptq);
+ sample.branch_stack = ptq->last_branch;
+ } else {
+ br.br_stack.nr = 0;
+ sample.branch_stack = &br.br_stack;
+ }
+ }
+
+ if (sample_type & PERF_SAMPLE_ADDR && items->has_mem_access_address)
+ sample.addr = items->mem_access_address;
+
+ if (sample_type & PERF_SAMPLE_WEIGHT) {
+ /*
+ * Refer kernel's setup_pebs_adaptive_sample_data() and
+ * intel_hsw_weight().
+ */
+ if (items->has_mem_access_latency)
+ sample.weight = items->mem_access_latency;
+ if (!sample.weight && items->has_tsx_aux_info) {
+ /* Cycles last block */
+ sample.weight = (u32)items->tsx_aux_info;
+ }
+ }
+
+ if (sample_type & PERF_SAMPLE_TRANSACTION && items->has_tsx_aux_info) {
+ u64 ax = items->has_rax ? items->rax : 0;
+ /* Refer kernel's intel_hsw_transaction() */
+ u64 txn = (u8)(items->tsx_aux_info >> 32);
+
+ /* For RTM XABORTs also log the abort code from AX */
+ if (txn & PERF_TXN_TRANSACTION && ax & 1)
+ txn |= ((ax >> 24) & 0xff) << PERF_TXN_ABORT_SHIFT;
+ sample.transaction = txn;
+ }
+
+ return intel_pt_deliver_synth_event(pt, ptq, event, &sample, sample_type);
+}
+
static int intel_pt_synth_error(struct intel_pt *pt, int code, int cpu,
pid_t pid, pid_t tid, u64 ip, u64 timestamp)
{
@@ -1621,6 +1891,16 @@ static int intel_pt_sample(struct intel_pt_queue *ptq)
ptq->ipc_cyc_cnt = ptq->state->tot_cyc_cnt;
}
+ /*
+ * Do PEBS first to allow for the possibility that the PEBS timestamp
+ * precedes the current timestamp.
+ */
+ if (pt->sample_pebs && state->type & INTEL_PT_BLK_ITEMS) {
+ err = intel_pt_synth_pebs_sample(ptq);
+ if (err)
+ return err;
+ }
+
if (pt->sample_pwr_events && (state->type & INTEL_PT_PWR_EVT)) {
if (state->type & INTEL_PT_CBR_CHG) {
err = intel_pt_synth_cbr_sample(ptq);
diff --git a/tools/perf/util/pmu.c b/tools/perf/util/pmu.c
index e0429f4ef335..faa8eb231e1b 100644
--- a/tools/perf/util/pmu.c
+++ b/tools/perf/util/pmu.c
@@ -709,9 +709,7 @@ static void pmu_add_cpu_aliases(struct list_head *head, struct perf_pmu *pmu)
{
int i;
struct pmu_events_map *map;
- struct pmu_event *pe;
const char *name = pmu->name;
- const char *pname;
map = perf_pmu__find_map(pmu);
if (!map)
@@ -722,28 +720,26 @@ static void pmu_add_cpu_aliases(struct list_head *head, struct perf_pmu *pmu)
*/
i = 0;
while (1) {
+ const char *cpu_name = is_arm_pmu_core(name) ? name : "cpu";
+ struct pmu_event *pe = &map->table[i++];
+ const char *pname = pe->pmu ? pe->pmu : cpu_name;
- pe = &map->table[i++];
if (!pe->name) {
if (pe->metric_group || pe->metric_name)
continue;
break;
}
- if (!is_arm_pmu_core(name)) {
- pname = pe->pmu ? pe->pmu : "cpu";
-
- /*
- * uncore alias may be from different PMU
- * with common prefix
- */
- if (pmu_is_uncore(name) &&
- !strncmp(pname, name, strlen(pname)))
- goto new_alias;
+ /*
+ * uncore alias may be from different PMU
+ * with common prefix
+ */
+ if (pmu_is_uncore(name) &&
+ !strncmp(pname, name, strlen(pname)))
+ goto new_alias;
- if (strcmp(pname, name))
- continue;
- }
+ if (strcmp(pname, name))
+ continue;
new_alias:
/* need type casts to override 'const' */