aboutsummaryrefslogtreecommitdiffstats
path: root/arch/x86/events/amd/ibs.c
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2022-10-10 09:27:46 -0700
committerLinus Torvalds <torvalds@linux-foundation.org>2022-10-10 09:27:46 -0700
commit3871d93b82a4a6c1f4308064f046a544f16ada21 (patch)
tree99ef06b536798100cd7abb4753263505cc465215 /arch/x86/events/amd/ibs.c
parentMerge tag 'sched-core-2022-10-07' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip (diff)
parentperf/hw_breakpoint: Annotate tsk->perf_event_mutex vs ctx->mutex (diff)
downloadlinux-dev-3871d93b82a4a6c1f4308064f046a544f16ada21.tar.xz
linux-dev-3871d93b82a4a6c1f4308064f046a544f16ada21.zip
Merge tag 'perf-core-2022-10-07' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip
Pull perf events updates from Ingo Molnar: "PMU driver updates: - Add AMD Last Branch Record Extension Version 2 (LbrExtV2) feature support for Zen 4 processors. - Extend the perf ABI to provide branch speculation information, if available, and use this on CPUs that have it (eg. LbrExtV2). - Improve Intel PEBS TSC timestamp handling & integration. - Add Intel Raptor Lake S CPU support. - Add 'perf mem' and 'perf c2c' memory profiling support on AMD CPUs by utilizing IBS tagged load/store samples. - Clean up & optimize various x86 PMU details. HW breakpoints: - Big rework to optimize the code for systems with hundreds of CPUs and thousands of breakpoints: - Replace the nr_bp_mutex global mutex with the bp_cpuinfo_sem per-CPU rwsem that is read-locked during most of the key operations. - Improve the O(#cpus * #tasks) logic in toggle_bp_slot() and fetch_bp_busy_slots(). - Apply micro-optimizations & cleanups. - Misc cleanups & enhancements" * tag 'perf-core-2022-10-07' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip: (75 commits) perf/hw_breakpoint: Annotate tsk->perf_event_mutex vs ctx->mutex perf: Fix pmu_filter_match() perf: Fix lockdep_assert_event_ctx() perf/x86/amd/lbr: Adjust LBR regardless of filtering perf/x86/utils: Fix uninitialized var in get_branch_type() perf/uapi: Define PERF_MEM_SNOOPX_PEER in kernel header file perf/x86/amd: Support PERF_SAMPLE_PHY_ADDR perf/x86/amd: Support PERF_SAMPLE_ADDR perf/x86/amd: Support PERF_SAMPLE_{WEIGHT|WEIGHT_STRUCT} perf/x86/amd: Support PERF_SAMPLE_DATA_SRC perf/x86/amd: Add IBS OP_DATA2 DataSrc bit definitions perf/mem: Introduce PERF_MEM_LVLNUM_{EXTN_MEM|IO} perf/x86/uncore: Add new Raptor Lake S support perf/x86/cstate: Add new Raptor Lake S support perf/x86/msr: Add new Raptor Lake S support perf/x86: Add new Raptor Lake S support bpf: Check flags for branch stack in bpf_read_branch_records helper perf, hw_breakpoint: Fix use-after-free if perf_event_open() fails perf: Use sample_flags for raw_data perf: Use sample_flags for addr ...
Diffstat (limited to 'arch/x86/events/amd/ibs.c')
-rw-r--r--arch/x86/events/amd/ibs.c360
1 files changed, 343 insertions, 17 deletions
diff --git a/arch/x86/events/amd/ibs.c b/arch/x86/events/amd/ibs.c
index c251bc44c088..3271735f0070 100644
--- a/arch/x86/events/amd/ibs.c
+++ b/arch/x86/events/amd/ibs.c
@@ -300,16 +300,6 @@ static int perf_ibs_init(struct perf_event *event)
hwc->config_base = perf_ibs->msr;
hwc->config = config;
- /*
- * rip recorded by IbsOpRip will not be consistent with rsp and rbp
- * recorded as part of interrupt regs. Thus we need to use rip from
- * interrupt regs while unwinding call stack. Setting _EARLY flag
- * makes sure we unwind call-stack before perf sample rip is set to
- * IbsOpRip.
- */
- if (event->attr.sample_type & PERF_SAMPLE_CALLCHAIN)
- event->attr.sample_type |= __PERF_SAMPLE_CALLCHAIN_EARLY;
-
return 0;
}
@@ -688,6 +678,339 @@ static struct perf_ibs perf_ibs_op = {
.get_count = get_ibs_op_count,
};
+static void perf_ibs_get_mem_op(union ibs_op_data3 *op_data3,
+ struct perf_sample_data *data)
+{
+ union perf_mem_data_src *data_src = &data->data_src;
+
+ data_src->mem_op = PERF_MEM_OP_NA;
+
+ if (op_data3->ld_op)
+ data_src->mem_op = PERF_MEM_OP_LOAD;
+ else if (op_data3->st_op)
+ data_src->mem_op = PERF_MEM_OP_STORE;
+}
+
+/*
+ * Processors having CPUID_Fn8000001B_EAX[11] aka IBS_CAPS_ZEN4 has
+ * more fine granular DataSrc encodings. Others have coarse.
+ */
+static u8 perf_ibs_data_src(union ibs_op_data2 *op_data2)
+{
+ if (ibs_caps & IBS_CAPS_ZEN4)
+ return (op_data2->data_src_hi << 3) | op_data2->data_src_lo;
+
+ return op_data2->data_src_lo;
+}
+
+static void perf_ibs_get_mem_lvl(union ibs_op_data2 *op_data2,
+ union ibs_op_data3 *op_data3,
+ struct perf_sample_data *data)
+{
+ union perf_mem_data_src *data_src = &data->data_src;
+ u8 ibs_data_src = perf_ibs_data_src(op_data2);
+
+ data_src->mem_lvl = 0;
+
+ /*
+ * DcMiss, L2Miss, DataSrc, DcMissLat etc. are all invalid for Uncached
+ * memory accesses. So, check DcUcMemAcc bit early.
+ */
+ if (op_data3->dc_uc_mem_acc && ibs_data_src != IBS_DATA_SRC_EXT_IO) {
+ data_src->mem_lvl = PERF_MEM_LVL_UNC | PERF_MEM_LVL_HIT;
+ return;
+ }
+
+ /* L1 Hit */
+ if (op_data3->dc_miss == 0) {
+ data_src->mem_lvl = PERF_MEM_LVL_L1 | PERF_MEM_LVL_HIT;
+ return;
+ }
+
+ /* L2 Hit */
+ if (op_data3->l2_miss == 0) {
+ /* Erratum #1293 */
+ if (boot_cpu_data.x86 != 0x19 || boot_cpu_data.x86_model > 0xF ||
+ !(op_data3->sw_pf || op_data3->dc_miss_no_mab_alloc)) {
+ data_src->mem_lvl = PERF_MEM_LVL_L2 | PERF_MEM_LVL_HIT;
+ return;
+ }
+ }
+
+ /*
+ * OP_DATA2 is valid only for load ops. Skip all checks which
+ * uses OP_DATA2[DataSrc].
+ */
+ if (data_src->mem_op != PERF_MEM_OP_LOAD)
+ goto check_mab;
+
+ /* L3 Hit */
+ if (ibs_caps & IBS_CAPS_ZEN4) {
+ if (ibs_data_src == IBS_DATA_SRC_EXT_LOC_CACHE) {
+ data_src->mem_lvl = PERF_MEM_LVL_L3 | PERF_MEM_LVL_HIT;
+ return;
+ }
+ } else {
+ if (ibs_data_src == IBS_DATA_SRC_LOC_CACHE) {
+ data_src->mem_lvl = PERF_MEM_LVL_L3 | PERF_MEM_LVL_REM_CCE1 |
+ PERF_MEM_LVL_HIT;
+ return;
+ }
+ }
+
+ /* A peer cache in a near CCX */
+ if (ibs_caps & IBS_CAPS_ZEN4 &&
+ ibs_data_src == IBS_DATA_SRC_EXT_NEAR_CCX_CACHE) {
+ data_src->mem_lvl = PERF_MEM_LVL_REM_CCE1 | PERF_MEM_LVL_HIT;
+ return;
+ }
+
+ /* A peer cache in a far CCX */
+ if (ibs_caps & IBS_CAPS_ZEN4) {
+ if (ibs_data_src == IBS_DATA_SRC_EXT_FAR_CCX_CACHE) {
+ data_src->mem_lvl = PERF_MEM_LVL_REM_CCE2 | PERF_MEM_LVL_HIT;
+ return;
+ }
+ } else {
+ if (ibs_data_src == IBS_DATA_SRC_REM_CACHE) {
+ data_src->mem_lvl = PERF_MEM_LVL_REM_CCE2 | PERF_MEM_LVL_HIT;
+ return;
+ }
+ }
+
+ /* DRAM */
+ if (ibs_data_src == IBS_DATA_SRC_EXT_DRAM) {
+ if (op_data2->rmt_node == 0)
+ data_src->mem_lvl = PERF_MEM_LVL_LOC_RAM | PERF_MEM_LVL_HIT;
+ else
+ data_src->mem_lvl = PERF_MEM_LVL_REM_RAM1 | PERF_MEM_LVL_HIT;
+ return;
+ }
+
+ /* PMEM */
+ if (ibs_caps & IBS_CAPS_ZEN4 && ibs_data_src == IBS_DATA_SRC_EXT_PMEM) {
+ data_src->mem_lvl_num = PERF_MEM_LVLNUM_PMEM;
+ if (op_data2->rmt_node) {
+ data_src->mem_remote = PERF_MEM_REMOTE_REMOTE;
+ /* IBS doesn't provide Remote socket detail */
+ data_src->mem_hops = PERF_MEM_HOPS_1;
+ }
+ return;
+ }
+
+ /* Extension Memory */
+ if (ibs_caps & IBS_CAPS_ZEN4 &&
+ ibs_data_src == IBS_DATA_SRC_EXT_EXT_MEM) {
+ data_src->mem_lvl_num = PERF_MEM_LVLNUM_EXTN_MEM;
+ if (op_data2->rmt_node) {
+ data_src->mem_remote = PERF_MEM_REMOTE_REMOTE;
+ /* IBS doesn't provide Remote socket detail */
+ data_src->mem_hops = PERF_MEM_HOPS_1;
+ }
+ return;
+ }
+
+ /* IO */
+ if (ibs_data_src == IBS_DATA_SRC_EXT_IO) {
+ data_src->mem_lvl = PERF_MEM_LVL_IO;
+ data_src->mem_lvl_num = PERF_MEM_LVLNUM_IO;
+ if (op_data2->rmt_node) {
+ data_src->mem_remote = PERF_MEM_REMOTE_REMOTE;
+ /* IBS doesn't provide Remote socket detail */
+ data_src->mem_hops = PERF_MEM_HOPS_1;
+ }
+ return;
+ }
+
+check_mab:
+ /*
+ * MAB (Miss Address Buffer) Hit. MAB keeps track of outstanding
+ * DC misses. However, such data may come from any level in mem
+ * hierarchy. IBS provides detail about both MAB as well as actual
+ * DataSrc simultaneously. Prioritize DataSrc over MAB, i.e. set
+ * MAB only when IBS fails to provide DataSrc.
+ */
+ if (op_data3->dc_miss_no_mab_alloc) {
+ data_src->mem_lvl = PERF_MEM_LVL_LFB | PERF_MEM_LVL_HIT;
+ return;
+ }
+
+ data_src->mem_lvl = PERF_MEM_LVL_NA;
+}
+
+static bool perf_ibs_cache_hit_st_valid(void)
+{
+ /* 0: Uninitialized, 1: Valid, -1: Invalid */
+ static int cache_hit_st_valid;
+
+ if (unlikely(!cache_hit_st_valid)) {
+ if (boot_cpu_data.x86 == 0x19 &&
+ (boot_cpu_data.x86_model <= 0xF ||
+ (boot_cpu_data.x86_model >= 0x20 &&
+ boot_cpu_data.x86_model <= 0x5F))) {
+ cache_hit_st_valid = -1;
+ } else {
+ cache_hit_st_valid = 1;
+ }
+ }
+
+ return cache_hit_st_valid == 1;
+}
+
+static void perf_ibs_get_mem_snoop(union ibs_op_data2 *op_data2,
+ struct perf_sample_data *data)
+{
+ union perf_mem_data_src *data_src = &data->data_src;
+ u8 ibs_data_src;
+
+ data_src->mem_snoop = PERF_MEM_SNOOP_NA;
+
+ if (!perf_ibs_cache_hit_st_valid() ||
+ data_src->mem_op != PERF_MEM_OP_LOAD ||
+ data_src->mem_lvl & PERF_MEM_LVL_L1 ||
+ data_src->mem_lvl & PERF_MEM_LVL_L2 ||
+ op_data2->cache_hit_st)
+ return;
+
+ ibs_data_src = perf_ibs_data_src(op_data2);
+
+ if (ibs_caps & IBS_CAPS_ZEN4) {
+ if (ibs_data_src == IBS_DATA_SRC_EXT_LOC_CACHE ||
+ ibs_data_src == IBS_DATA_SRC_EXT_NEAR_CCX_CACHE ||
+ ibs_data_src == IBS_DATA_SRC_EXT_FAR_CCX_CACHE)
+ data_src->mem_snoop = PERF_MEM_SNOOP_HITM;
+ } else if (ibs_data_src == IBS_DATA_SRC_LOC_CACHE) {
+ data_src->mem_snoop = PERF_MEM_SNOOP_HITM;
+ }
+}
+
+static void perf_ibs_get_tlb_lvl(union ibs_op_data3 *op_data3,
+ struct perf_sample_data *data)
+{
+ union perf_mem_data_src *data_src = &data->data_src;
+
+ data_src->mem_dtlb = PERF_MEM_TLB_NA;
+
+ if (!op_data3->dc_lin_addr_valid)
+ return;
+
+ if (!op_data3->dc_l1tlb_miss) {
+ data_src->mem_dtlb = PERF_MEM_TLB_L1 | PERF_MEM_TLB_HIT;
+ return;
+ }
+
+ if (!op_data3->dc_l2tlb_miss) {
+ data_src->mem_dtlb = PERF_MEM_TLB_L2 | PERF_MEM_TLB_HIT;
+ return;
+ }
+
+ data_src->mem_dtlb = PERF_MEM_TLB_L2 | PERF_MEM_TLB_MISS;
+}
+
+static void perf_ibs_get_mem_lock(union ibs_op_data3 *op_data3,
+ struct perf_sample_data *data)
+{
+ union perf_mem_data_src *data_src = &data->data_src;
+
+ data_src->mem_lock = PERF_MEM_LOCK_NA;
+
+ if (op_data3->dc_locked_op)
+ data_src->mem_lock = PERF_MEM_LOCK_LOCKED;
+}
+
+#define ibs_op_msr_idx(msr) (msr - MSR_AMD64_IBSOPCTL)
+
+static void perf_ibs_get_data_src(struct perf_ibs_data *ibs_data,
+ struct perf_sample_data *data,
+ union ibs_op_data2 *op_data2,
+ union ibs_op_data3 *op_data3)
+{
+ perf_ibs_get_mem_lvl(op_data2, op_data3, data);
+ perf_ibs_get_mem_snoop(op_data2, data);
+ perf_ibs_get_tlb_lvl(op_data3, data);
+ perf_ibs_get_mem_lock(op_data3, data);
+}
+
+static __u64 perf_ibs_get_op_data2(struct perf_ibs_data *ibs_data,
+ union ibs_op_data3 *op_data3)
+{
+ __u64 val = ibs_data->regs[ibs_op_msr_idx(MSR_AMD64_IBSOPDATA2)];
+
+ /* Erratum #1293 */
+ if (boot_cpu_data.x86 == 0x19 && boot_cpu_data.x86_model <= 0xF &&
+ (op_data3->sw_pf || op_data3->dc_miss_no_mab_alloc)) {
+ /*
+ * OP_DATA2 has only two fields on Zen3: DataSrc and RmtNode.
+ * DataSrc=0 is 'No valid status' and RmtNode is invalid when
+ * DataSrc=0.
+ */
+ val = 0;
+ }
+ return val;
+}
+
+static void perf_ibs_parse_ld_st_data(__u64 sample_type,
+ struct perf_ibs_data *ibs_data,
+ struct perf_sample_data *data)
+{
+ union ibs_op_data3 op_data3;
+ union ibs_op_data2 op_data2;
+ union ibs_op_data op_data;
+
+ data->data_src.val = PERF_MEM_NA;
+ op_data3.val = ibs_data->regs[ibs_op_msr_idx(MSR_AMD64_IBSOPDATA3)];
+
+ perf_ibs_get_mem_op(&op_data3, data);
+ if (data->data_src.mem_op != PERF_MEM_OP_LOAD &&
+ data->data_src.mem_op != PERF_MEM_OP_STORE)
+ return;
+
+ op_data2.val = perf_ibs_get_op_data2(ibs_data, &op_data3);
+
+ if (sample_type & PERF_SAMPLE_DATA_SRC) {
+ perf_ibs_get_data_src(ibs_data, data, &op_data2, &op_data3);
+ data->sample_flags |= PERF_SAMPLE_DATA_SRC;
+ }
+
+ if (sample_type & PERF_SAMPLE_WEIGHT_TYPE && op_data3.dc_miss &&
+ data->data_src.mem_op == PERF_MEM_OP_LOAD) {
+ op_data.val = ibs_data->regs[ibs_op_msr_idx(MSR_AMD64_IBSOPDATA)];
+
+ if (sample_type & PERF_SAMPLE_WEIGHT_STRUCT) {
+ data->weight.var1_dw = op_data3.dc_miss_lat;
+ data->weight.var2_w = op_data.tag_to_ret_ctr;
+ } else if (sample_type & PERF_SAMPLE_WEIGHT) {
+ data->weight.full = op_data3.dc_miss_lat;
+ }
+ data->sample_flags |= PERF_SAMPLE_WEIGHT_TYPE;
+ }
+
+ if (sample_type & PERF_SAMPLE_ADDR && op_data3.dc_lin_addr_valid) {
+ data->addr = ibs_data->regs[ibs_op_msr_idx(MSR_AMD64_IBSDCLINAD)];
+ data->sample_flags |= PERF_SAMPLE_ADDR;
+ }
+
+ if (sample_type & PERF_SAMPLE_PHYS_ADDR && op_data3.dc_phy_addr_valid) {
+ data->phys_addr = ibs_data->regs[ibs_op_msr_idx(MSR_AMD64_IBSDCPHYSAD)];
+ data->sample_flags |= PERF_SAMPLE_PHYS_ADDR;
+ }
+}
+
+static int perf_ibs_get_offset_max(struct perf_ibs *perf_ibs, u64 sample_type,
+ int check_rip)
+{
+ if (sample_type & PERF_SAMPLE_RAW ||
+ (perf_ibs == &perf_ibs_op &&
+ (sample_type & PERF_SAMPLE_DATA_SRC ||
+ sample_type & PERF_SAMPLE_WEIGHT_TYPE ||
+ sample_type & PERF_SAMPLE_ADDR ||
+ sample_type & PERF_SAMPLE_PHYS_ADDR)))
+ return perf_ibs->offset_max;
+ else if (check_rip)
+ return 3;
+ return 1;
+}
+
static int perf_ibs_handle_irq(struct perf_ibs *perf_ibs, struct pt_regs *iregs)
{
struct cpu_perf_ibs *pcpu = this_cpu_ptr(perf_ibs->pcpu);
@@ -735,12 +1058,9 @@ fail:
size = 1;
offset = 1;
check_rip = (perf_ibs == &perf_ibs_op && (ibs_caps & IBS_CAPS_RIPINVALIDCHK));
- if (event->attr.sample_type & PERF_SAMPLE_RAW)
- offset_max = perf_ibs->offset_max;
- else if (check_rip)
- offset_max = 3;
- else
- offset_max = 1;
+
+ offset_max = perf_ibs_get_offset_max(perf_ibs, event->attr.sample_type, check_rip);
+
do {
rdmsrl(msr + offset, *buf++);
size++;
@@ -791,15 +1111,21 @@ fail:
},
};
data.raw = &raw;
+ data.sample_flags |= PERF_SAMPLE_RAW;
}
+ if (perf_ibs == &perf_ibs_op)
+ perf_ibs_parse_ld_st_data(event->attr.sample_type, &ibs_data, &data);
+
/*
* rip recorded by IbsOpRip will not be consistent with rsp and rbp
* recorded as part of interrupt regs. Thus we need to use rip from
* interrupt regs while unwinding call stack.
*/
- if (event->attr.sample_type & PERF_SAMPLE_CALLCHAIN)
+ if (event->attr.sample_type & PERF_SAMPLE_CALLCHAIN) {
data.callchain = perf_callchain(event, iregs);
+ data.sample_flags |= PERF_SAMPLE_CALLCHAIN;
+ }
throttle = perf_event_overflow(event, &data, &regs);
out: