diff options
Diffstat (limited to 'arch/x86/events/intel')
-rw-r--r-- | arch/x86/events/intel/Makefile | 4 | ||||
-rw-r--r-- | arch/x86/events/intel/bts.c | 10 | ||||
-rw-r--r-- | arch/x86/events/intel/core.c | 2173 | ||||
-rw-r--r-- | arch/x86/events/intel/cstate.c | 177 | ||||
-rw-r--r-- | arch/x86/events/intel/ds.c | 467 | ||||
-rw-r--r-- | arch/x86/events/intel/lbr.c | 1198 | ||||
-rw-r--r-- | arch/x86/events/intel/p4.c | 59 | ||||
-rw-r--r-- | arch/x86/events/intel/pt.c | 86 | ||||
-rw-r--r-- | arch/x86/events/intel/rapl.c | 804 | ||||
-rw-r--r-- | arch/x86/events/intel/uncore.c | 616 | ||||
-rw-r--r-- | arch/x86/events/intel/uncore.h | 92 | ||||
-rw-r--r-- | arch/x86/events/intel/uncore_discovery.c | 630 | ||||
-rw-r--r-- | arch/x86/events/intel/uncore_discovery.h | 152 | ||||
-rw-r--r-- | arch/x86/events/intel/uncore_snb.c | 887 | ||||
-rw-r--r-- | arch/x86/events/intel/uncore_snbep.c | 1701 |
15 files changed, 6786 insertions, 2270 deletions
diff --git a/arch/x86/events/intel/Makefile b/arch/x86/events/intel/Makefile index 3468b0c1dc7c..10bde6c5abb2 100644 --- a/arch/x86/events/intel/Makefile +++ b/arch/x86/events/intel/Makefile @@ -2,9 +2,7 @@ obj-$(CONFIG_CPU_SUP_INTEL) += core.o bts.o obj-$(CONFIG_CPU_SUP_INTEL) += ds.o knc.o obj-$(CONFIG_CPU_SUP_INTEL) += lbr.o p4.o p6.o pt.o -obj-$(CONFIG_PERF_EVENTS_INTEL_RAPL) += intel-rapl-perf.o -intel-rapl-perf-objs := rapl.o obj-$(CONFIG_PERF_EVENTS_INTEL_UNCORE) += intel-uncore.o -intel-uncore-objs := uncore.o uncore_nhmex.o uncore_snb.o uncore_snbep.o +intel-uncore-objs := uncore.o uncore_nhmex.o uncore_snb.o uncore_snbep.o uncore_discovery.o obj-$(CONFIG_PERF_EVENTS_INTEL_CSTATE) += intel-cstate.o intel-cstate-objs := cstate.o diff --git a/arch/x86/events/intel/bts.c b/arch/x86/events/intel/bts.c index 6a3b599ee0fe..974e917e65b2 100644 --- a/arch/x86/events/intel/bts.c +++ b/arch/x86/events/intel/bts.c @@ -58,7 +58,7 @@ struct bts_buffer { local_t head; unsigned long end; void **data_pages; - struct bts_phys buf[0]; + struct bts_phys buf[]; }; static struct pmu bts_pmu; @@ -209,6 +209,12 @@ static void bts_update(struct bts_ctx *bts) } else { local_set(&buf->data_size, head); } + + /* + * Since BTS is coherent, just add compiler barrier to ensure + * BTS updating is ordered against bts::handle::event. + */ + barrier(); } static int @@ -594,7 +600,7 @@ static __init int bts_init(void) * we cannot use the user mapping since it will not be available * if we're not running the owning process. * - * With PTI we can't use the kernal map either, because its not + * With PTI we can't use the kernel map either, because its not * there when we run userspace. * * For now, disable this driver when using PTI. diff --git a/arch/x86/events/intel/core.c b/arch/x86/events/intel/core.c index dff6623804c2..1b92bf05fd65 100644 --- a/arch/x86/events/intel/core.c +++ b/arch/x86/events/intel/core.c @@ -14,6 +14,7 @@ #include <linux/slab.h> #include <linux/export.h> #include <linux/nmi.h> +#include <linux/kvm_host.h> #include <asm/cpufeature.h> #include <asm/hardirq.h> @@ -137,7 +138,7 @@ static struct event_constraint intel_ivb_event_constraints[] __read_mostly = FIXED_EVENT_CONSTRAINT(0x003c, 1), /* CPU_CLK_UNHALTED.CORE */ FIXED_EVENT_CONSTRAINT(0x0300, 2), /* CPU_CLK_UNHALTED.REF */ INTEL_UEVENT_CONSTRAINT(0x0148, 0x4), /* L1D_PEND_MISS.PENDING */ - INTEL_UEVENT_CONSTRAINT(0x0279, 0xf), /* IDQ.EMTPY */ + INTEL_UEVENT_CONSTRAINT(0x0279, 0xf), /* IDQ.EMPTY */ INTEL_UEVENT_CONSTRAINT(0x019c, 0xf), /* IDQ_UOPS_NOT_DELIVERED.CORE */ INTEL_UEVENT_CONSTRAINT(0x02a3, 0xf), /* CYCLE_ACTIVITY.CYCLES_LDM_PENDING */ INTEL_UEVENT_CONSTRAINT(0x04a3, 0xf), /* CYCLE_ACTIVITY.CYCLES_NO_EXECUTE */ @@ -181,6 +182,27 @@ static struct event_constraint intel_gen_event_constraints[] __read_mostly = EVENT_CONSTRAINT_END }; +static struct event_constraint intel_v5_gen_event_constraints[] __read_mostly = +{ + FIXED_EVENT_CONSTRAINT(0x00c0, 0), /* INST_RETIRED.ANY */ + FIXED_EVENT_CONSTRAINT(0x003c, 1), /* CPU_CLK_UNHALTED.CORE */ + FIXED_EVENT_CONSTRAINT(0x0300, 2), /* CPU_CLK_UNHALTED.REF */ + FIXED_EVENT_CONSTRAINT(0x0400, 3), /* SLOTS */ + FIXED_EVENT_CONSTRAINT(0x0500, 4), + FIXED_EVENT_CONSTRAINT(0x0600, 5), + FIXED_EVENT_CONSTRAINT(0x0700, 6), + FIXED_EVENT_CONSTRAINT(0x0800, 7), + FIXED_EVENT_CONSTRAINT(0x0900, 8), + FIXED_EVENT_CONSTRAINT(0x0a00, 9), + FIXED_EVENT_CONSTRAINT(0x0b00, 10), + FIXED_EVENT_CONSTRAINT(0x0c00, 11), + FIXED_EVENT_CONSTRAINT(0x0d00, 12), + FIXED_EVENT_CONSTRAINT(0x0e00, 13), + FIXED_EVENT_CONSTRAINT(0x0f00, 14), + FIXED_EVENT_CONSTRAINT(0x1000, 15), + EVENT_CONSTRAINT_END +}; + static struct event_constraint intel_slm_event_constraints[] __read_mostly = { FIXED_EVENT_CONSTRAINT(0x00c0, 0), /* INST_RETIRED.ANY */ @@ -243,21 +265,28 @@ static struct extra_reg intel_skl_extra_regs[] __read_mostly = { static struct event_constraint intel_icl_event_constraints[] = { FIXED_EVENT_CONSTRAINT(0x00c0, 0), /* INST_RETIRED.ANY */ - INTEL_UEVENT_CONSTRAINT(0x1c0, 0), /* INST_RETIRED.PREC_DIST */ + FIXED_EVENT_CONSTRAINT(0x01c0, 0), /* old INST_RETIRED.PREC_DIST */ + FIXED_EVENT_CONSTRAINT(0x0100, 0), /* INST_RETIRED.PREC_DIST */ FIXED_EVENT_CONSTRAINT(0x003c, 1), /* CPU_CLK_UNHALTED.CORE */ FIXED_EVENT_CONSTRAINT(0x0300, 2), /* CPU_CLK_UNHALTED.REF */ FIXED_EVENT_CONSTRAINT(0x0400, 3), /* SLOTS */ + METRIC_EVENT_CONSTRAINT(INTEL_TD_METRIC_RETIRING, 0), + METRIC_EVENT_CONSTRAINT(INTEL_TD_METRIC_BAD_SPEC, 1), + METRIC_EVENT_CONSTRAINT(INTEL_TD_METRIC_FE_BOUND, 2), + METRIC_EVENT_CONSTRAINT(INTEL_TD_METRIC_BE_BOUND, 3), INTEL_EVENT_CONSTRAINT_RANGE(0x03, 0x0a, 0xf), INTEL_EVENT_CONSTRAINT_RANGE(0x1f, 0x28, 0xf), INTEL_EVENT_CONSTRAINT(0x32, 0xf), /* SW_PREFETCH_ACCESS.* */ - INTEL_EVENT_CONSTRAINT_RANGE(0x48, 0x54, 0xf), + INTEL_EVENT_CONSTRAINT_RANGE(0x48, 0x56, 0xf), INTEL_EVENT_CONSTRAINT_RANGE(0x60, 0x8b, 0xf), INTEL_UEVENT_CONSTRAINT(0x04a3, 0xff), /* CYCLE_ACTIVITY.STALLS_TOTAL */ - INTEL_UEVENT_CONSTRAINT(0x10a3, 0xff), /* CYCLE_ACTIVITY.STALLS_MEM_ANY */ + INTEL_UEVENT_CONSTRAINT(0x10a3, 0xff), /* CYCLE_ACTIVITY.CYCLES_MEM_ANY */ + INTEL_UEVENT_CONSTRAINT(0x14a3, 0xff), /* CYCLE_ACTIVITY.STALLS_MEM_ANY */ INTEL_EVENT_CONSTRAINT(0xa3, 0xf), /* CYCLE_ACTIVITY.* */ INTEL_EVENT_CONSTRAINT_RANGE(0xa8, 0xb0, 0xf), INTEL_EVENT_CONSTRAINT_RANGE(0xb7, 0xbd, 0xf), INTEL_EVENT_CONSTRAINT_RANGE(0xd0, 0xe6, 0xf), + INTEL_EVENT_CONSTRAINT(0xef, 0xf), INTEL_EVENT_CONSTRAINT_RANGE(0xf0, 0xf4, 0xf), EVENT_CONSTRAINT_END }; @@ -270,6 +299,57 @@ static struct extra_reg intel_icl_extra_regs[] __read_mostly = { EVENT_EXTRA_END }; +static struct extra_reg intel_spr_extra_regs[] __read_mostly = { + INTEL_UEVENT_EXTRA_REG(0x012a, MSR_OFFCORE_RSP_0, 0x3fffffffffull, RSP_0), + INTEL_UEVENT_EXTRA_REG(0x012b, MSR_OFFCORE_RSP_1, 0x3fffffffffull, RSP_1), + INTEL_UEVENT_PEBS_LDLAT_EXTRA_REG(0x01cd), + INTEL_UEVENT_EXTRA_REG(0x01c6, MSR_PEBS_FRONTEND, 0x7fff1f, FE), + INTEL_UEVENT_EXTRA_REG(0x40ad, MSR_PEBS_FRONTEND, 0x7, FE), + INTEL_UEVENT_EXTRA_REG(0x04c2, MSR_PEBS_FRONTEND, 0x8, FE), + EVENT_EXTRA_END +}; + +static struct event_constraint intel_spr_event_constraints[] = { + FIXED_EVENT_CONSTRAINT(0x00c0, 0), /* INST_RETIRED.ANY */ + FIXED_EVENT_CONSTRAINT(0x0100, 0), /* INST_RETIRED.PREC_DIST */ + FIXED_EVENT_CONSTRAINT(0x003c, 1), /* CPU_CLK_UNHALTED.CORE */ + FIXED_EVENT_CONSTRAINT(0x0300, 2), /* CPU_CLK_UNHALTED.REF */ + FIXED_EVENT_CONSTRAINT(0x0400, 3), /* SLOTS */ + METRIC_EVENT_CONSTRAINT(INTEL_TD_METRIC_RETIRING, 0), + METRIC_EVENT_CONSTRAINT(INTEL_TD_METRIC_BAD_SPEC, 1), + METRIC_EVENT_CONSTRAINT(INTEL_TD_METRIC_FE_BOUND, 2), + METRIC_EVENT_CONSTRAINT(INTEL_TD_METRIC_BE_BOUND, 3), + METRIC_EVENT_CONSTRAINT(INTEL_TD_METRIC_HEAVY_OPS, 4), + METRIC_EVENT_CONSTRAINT(INTEL_TD_METRIC_BR_MISPREDICT, 5), + METRIC_EVENT_CONSTRAINT(INTEL_TD_METRIC_FETCH_LAT, 6), + METRIC_EVENT_CONSTRAINT(INTEL_TD_METRIC_MEM_BOUND, 7), + + INTEL_EVENT_CONSTRAINT(0x2e, 0xff), + INTEL_EVENT_CONSTRAINT(0x3c, 0xff), + /* + * Generally event codes < 0x90 are restricted to counters 0-3. + * The 0x2E and 0x3C are exception, which has no restriction. + */ + INTEL_EVENT_CONSTRAINT_RANGE(0x01, 0x8f, 0xf), + + INTEL_UEVENT_CONSTRAINT(0x01a3, 0xf), + INTEL_UEVENT_CONSTRAINT(0x02a3, 0xf), + INTEL_UEVENT_CONSTRAINT(0x08a3, 0xf), + INTEL_UEVENT_CONSTRAINT(0x04a4, 0x1), + INTEL_UEVENT_CONSTRAINT(0x08a4, 0x1), + INTEL_UEVENT_CONSTRAINT(0x02cd, 0x1), + INTEL_EVENT_CONSTRAINT(0xce, 0x1), + INTEL_EVENT_CONSTRAINT_RANGE(0xd0, 0xdf, 0xf), + /* + * Generally event codes >= 0x90 are likely to have no restrictions. + * The exception are defined as above. + */ + INTEL_EVENT_CONSTRAINT_RANGE(0x90, 0xfe, 0xff), + + EVENT_CONSTRAINT_END +}; + + EVENT_ATTR_STR(mem-loads, mem_ld_nhm, "event=0x0b,umask=0x10,ldlat=3"); EVENT_ATTR_STR(mem-loads, mem_ld_snb, "event=0xcd,umask=0x1,ldlat=3"); EVENT_ATTR_STR(mem-stores, mem_st_snb, "event=0xcd,umask=0x2"); @@ -309,6 +389,16 @@ EVENT_ATTR_STR_HT(topdown-recovery-bubbles, td_recovery_bubbles, EVENT_ATTR_STR_HT(topdown-recovery-bubbles.scale, td_recovery_bubbles_scale, "4", "2"); +EVENT_ATTR_STR(slots, slots, "event=0x00,umask=0x4"); +EVENT_ATTR_STR(topdown-retiring, td_retiring, "event=0x00,umask=0x80"); +EVENT_ATTR_STR(topdown-bad-spec, td_bad_spec, "event=0x00,umask=0x81"); +EVENT_ATTR_STR(topdown-fe-bound, td_fe_bound, "event=0x00,umask=0x82"); +EVENT_ATTR_STR(topdown-be-bound, td_be_bound, "event=0x00,umask=0x83"); +EVENT_ATTR_STR(topdown-heavy-ops, td_heavy_ops, "event=0x00,umask=0x84"); +EVENT_ATTR_STR(topdown-br-mispredict, td_br_mispredict, "event=0x00,umask=0x85"); +EVENT_ATTR_STR(topdown-fetch-lat, td_fetch_lat, "event=0x00,umask=0x86"); +EVENT_ATTR_STR(topdown-mem-bound, td_mem_bound, "event=0x00,umask=0x87"); + static struct attribute *snb_events_attrs[] = { EVENT_PTR(td_slots_issued), EVENT_PTR(td_slots_retired), @@ -373,6 +463,108 @@ static u64 intel_pmu_event_map(int hw_event) return intel_perfmon_event_map[hw_event]; } +static __initconst const u64 spr_hw_cache_event_ids + [PERF_COUNT_HW_CACHE_MAX] + [PERF_COUNT_HW_CACHE_OP_MAX] + [PERF_COUNT_HW_CACHE_RESULT_MAX] = +{ + [ C(L1D ) ] = { + [ C(OP_READ) ] = { + [ C(RESULT_ACCESS) ] = 0x81d0, + [ C(RESULT_MISS) ] = 0xe124, + }, + [ C(OP_WRITE) ] = { + [ C(RESULT_ACCESS) ] = 0x82d0, + }, + }, + [ C(L1I ) ] = { + [ C(OP_READ) ] = { + [ C(RESULT_MISS) ] = 0xe424, + }, + [ C(OP_WRITE) ] = { + [ C(RESULT_ACCESS) ] = -1, + [ C(RESULT_MISS) ] = -1, + }, + }, + [ C(LL ) ] = { + [ C(OP_READ) ] = { + [ C(RESULT_ACCESS) ] = 0x12a, + [ C(RESULT_MISS) ] = 0x12a, + }, + [ C(OP_WRITE) ] = { + [ C(RESULT_ACCESS) ] = 0x12a, + [ C(RESULT_MISS) ] = 0x12a, + }, + }, + [ C(DTLB) ] = { + [ C(OP_READ) ] = { + [ C(RESULT_ACCESS) ] = 0x81d0, + [ C(RESULT_MISS) ] = 0xe12, + }, + [ C(OP_WRITE) ] = { + [ C(RESULT_ACCESS) ] = 0x82d0, + [ C(RESULT_MISS) ] = 0xe13, + }, + }, + [ C(ITLB) ] = { + [ C(OP_READ) ] = { + [ C(RESULT_ACCESS) ] = -1, + [ C(RESULT_MISS) ] = 0xe11, + }, + [ C(OP_WRITE) ] = { + [ C(RESULT_ACCESS) ] = -1, + [ C(RESULT_MISS) ] = -1, + }, + [ C(OP_PREFETCH) ] = { + [ C(RESULT_ACCESS) ] = -1, + [ C(RESULT_MISS) ] = -1, + }, + }, + [ C(BPU ) ] = { + [ C(OP_READ) ] = { + [ C(RESULT_ACCESS) ] = 0x4c4, + [ C(RESULT_MISS) ] = 0x4c5, + }, + [ C(OP_WRITE) ] = { + [ C(RESULT_ACCESS) ] = -1, + [ C(RESULT_MISS) ] = -1, + }, + [ C(OP_PREFETCH) ] = { + [ C(RESULT_ACCESS) ] = -1, + [ C(RESULT_MISS) ] = -1, + }, + }, + [ C(NODE) ] = { + [ C(OP_READ) ] = { + [ C(RESULT_ACCESS) ] = 0x12a, + [ C(RESULT_MISS) ] = 0x12a, + }, + }, +}; + +static __initconst const u64 spr_hw_cache_extra_regs + [PERF_COUNT_HW_CACHE_MAX] + [PERF_COUNT_HW_CACHE_OP_MAX] + [PERF_COUNT_HW_CACHE_RESULT_MAX] = +{ + [ C(LL ) ] = { + [ C(OP_READ) ] = { + [ C(RESULT_ACCESS) ] = 0x10001, + [ C(RESULT_MISS) ] = 0x3fbfc00001, + }, + [ C(OP_WRITE) ] = { + [ C(RESULT_ACCESS) ] = 0x3f3ffc0002, + [ C(RESULT_MISS) ] = 0x3f3fc00002, + }, + }, + [ C(NODE) ] = { + [ C(OP_READ) ] = { + [ C(RESULT_ACCESS) ] = 0x10c000001, + [ C(RESULT_MISS) ] = 0x3fb3000001, + }, + }, +}; + /* * Notes on the events: * - data reads do not include code reads (comparable to earlier tables) @@ -1890,10 +2082,40 @@ static __initconst const u64 tnt_hw_cache_extra_regs }, }; +EVENT_ATTR_STR(topdown-fe-bound, td_fe_bound_tnt, "event=0x71,umask=0x0"); +EVENT_ATTR_STR(topdown-retiring, td_retiring_tnt, "event=0xc2,umask=0x0"); +EVENT_ATTR_STR(topdown-bad-spec, td_bad_spec_tnt, "event=0x73,umask=0x6"); +EVENT_ATTR_STR(topdown-be-bound, td_be_bound_tnt, "event=0x74,umask=0x0"); + +static struct attribute *tnt_events_attrs[] = { + EVENT_PTR(td_fe_bound_tnt), + EVENT_PTR(td_retiring_tnt), + EVENT_PTR(td_bad_spec_tnt), + EVENT_PTR(td_be_bound_tnt), + NULL, +}; + static struct extra_reg intel_tnt_extra_regs[] __read_mostly = { /* must define OFFCORE_RSP_X first, see intel_fixup_er() */ - INTEL_UEVENT_EXTRA_REG(0x01b7, MSR_OFFCORE_RSP_0, 0xffffff9fffull, RSP_0), - INTEL_UEVENT_EXTRA_REG(0x02b7, MSR_OFFCORE_RSP_1, 0xffffff9fffull, RSP_1), + INTEL_UEVENT_EXTRA_REG(0x01b7, MSR_OFFCORE_RSP_0, 0x800ff0ffffff9fffull, RSP_0), + INTEL_UEVENT_EXTRA_REG(0x02b7, MSR_OFFCORE_RSP_1, 0xff0ffffff9fffull, RSP_1), + EVENT_EXTRA_END +}; + +EVENT_ATTR_STR(mem-loads, mem_ld_grt, "event=0xd0,umask=0x5,ldlat=3"); +EVENT_ATTR_STR(mem-stores, mem_st_grt, "event=0xd0,umask=0x6"); + +static struct attribute *grt_mem_attrs[] = { + EVENT_PTR(mem_ld_grt), + EVENT_PTR(mem_st_grt), + NULL +}; + +static struct extra_reg intel_grt_extra_regs[] __read_mostly = { + /* must define OFFCORE_RSP_X first, see intel_fixup_er() */ + INTEL_UEVENT_EXTRA_REG(0x01b7, MSR_OFFCORE_RSP_0, 0x3fffffffffull, RSP_0), + INTEL_UEVENT_EXTRA_REG(0x02b7, MSR_OFFCORE_RSP_1, 0x3fffffffffull, RSP_1), + INTEL_UEVENT_PEBS_LDLAT_EXTRA_REG(0x5d0), EVENT_EXTRA_END }; @@ -1945,33 +2167,46 @@ static __initconst const u64 knl_hw_cache_extra_regs * intel_bts events don't coexist with intel PMU's BTS events because of * x86_add_exclusive(x86_lbr_exclusive_lbr); there's no need to keep them * disabled around intel PMU's event batching etc, only inside the PMI handler. + * + * Avoid PEBS_ENABLE MSR access in PMIs. + * The GLOBAL_CTRL has been disabled. All the counters do not count anymore. + * It doesn't matter if the PEBS is enabled or not. + * Usually, the PEBS status are not changed in PMIs. It's unnecessary to + * access PEBS_ENABLE MSR in disable_all()/enable_all(). + * However, there are some cases which may change PEBS status, e.g. PMI + * throttle. The PEBS_ENABLE should be updated where the status changes. */ -static void __intel_pmu_disable_all(void) +static __always_inline void __intel_pmu_disable_all(bool bts) { struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events); wrmsrl(MSR_CORE_PERF_GLOBAL_CTRL, 0); - if (test_bit(INTEL_PMC_IDX_FIXED_BTS, cpuc->active_mask)) + if (bts && test_bit(INTEL_PMC_IDX_FIXED_BTS, cpuc->active_mask)) intel_pmu_disable_bts(); - - intel_pmu_pebs_disable_all(); } -static void intel_pmu_disable_all(void) +static __always_inline void intel_pmu_disable_all(void) { - __intel_pmu_disable_all(); + __intel_pmu_disable_all(true); + intel_pmu_pebs_disable_all(); intel_pmu_lbr_disable_all(); } static void __intel_pmu_enable_all(int added, bool pmi) { struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events); + u64 intel_ctrl = hybrid(cpuc->pmu, intel_ctrl); - intel_pmu_pebs_enable_all(); intel_pmu_lbr_enable_all(pmi); + + if (cpuc->fixed_ctrl_val != cpuc->active_fixed_ctrl_val) { + wrmsrl(MSR_ARCH_PERFMON_FIXED_CTR_CTRL, cpuc->fixed_ctrl_val); + cpuc->active_fixed_ctrl_val = cpuc->fixed_ctrl_val; + } + wrmsrl(MSR_CORE_PERF_GLOBAL_CTRL, - x86_pmu.intel_ctrl & ~cpuc->intel_ctrl_guest_mask); + intel_ctrl & ~cpuc->intel_ctrl_guest_mask); if (test_bit(INTEL_PMC_IDX_FIXED_BTS, cpuc->active_mask)) { struct perf_event *event = @@ -1986,9 +2221,51 @@ static void __intel_pmu_enable_all(int added, bool pmi) static void intel_pmu_enable_all(int added) { + intel_pmu_pebs_enable_all(); __intel_pmu_enable_all(added, false); } +static noinline int +__intel_pmu_snapshot_branch_stack(struct perf_branch_entry *entries, + unsigned int cnt, unsigned long flags) +{ + struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events); + + intel_pmu_lbr_read(); + cnt = min_t(unsigned int, cnt, x86_pmu.lbr_nr); + + memcpy(entries, cpuc->lbr_entries, sizeof(struct perf_branch_entry) * cnt); + intel_pmu_enable_all(0); + local_irq_restore(flags); + return cnt; +} + +static int +intel_pmu_snapshot_branch_stack(struct perf_branch_entry *entries, unsigned int cnt) +{ + unsigned long flags; + + /* must not have branches... */ + local_irq_save(flags); + __intel_pmu_disable_all(false); /* we don't care about BTS */ + __intel_pmu_lbr_disable(); + /* ... until here */ + return __intel_pmu_snapshot_branch_stack(entries, cnt, flags); +} + +static int +intel_pmu_snapshot_arch_branch_stack(struct perf_branch_entry *entries, unsigned int cnt) +{ + unsigned long flags; + + /* must not have branches... */ + local_irq_save(flags); + __intel_pmu_disable_all(false); /* we don't care about BTS */ + __intel_pmu_arch_lbr_disable(); + /* ... until here */ + return __intel_pmu_snapshot_branch_stack(entries, cnt, flags); +} + /* * Workaround for: * Intel Errata AAK100 (model 26) @@ -2000,7 +2277,7 @@ static void intel_pmu_enable_all(int added) * magic three (non-counting) events 0x4300B5, 0x4300D2, and 0x4300B1 either * in sequence on the same PMC or on different PMCs. * - * In practise it appears some of these events do in fact count, and + * In practice it appears some of these events do in fact count, and * we need to program all 4 events. */ static void intel_pmu_nhm_workaround(void) @@ -2040,7 +2317,7 @@ static void intel_pmu_nhm_workaround(void) for (i = 0; i < 4; i++) { event = cpuc->events[i]; if (event) - x86_perf_event_update(event); + static_call(x86_pmu_update)(event); } for (i = 0; i < 4; i++) { @@ -2055,7 +2332,7 @@ static void intel_pmu_nhm_workaround(void) event = cpuc->events[i]; if (event) { - x86_perf_event_set_period(event); + static_call(x86_pmu_set_period)(event); __x86_pmu_enable_event(&event->hw, ARCH_PERFMON_EVENTSEL_ENABLE); } else @@ -2103,18 +2380,6 @@ static void intel_tfa_pmu_enable_all(int added) intel_pmu_enable_all(added); } -static void enable_counter_freeze(void) -{ - update_debugctlmsr(get_debugctlmsr() | - DEBUGCTLMSR_FREEZE_PERFMON_ON_PMI); -} - -static void disable_counter_freeze(void) -{ - update_debugctlmsr(get_debugctlmsr() & - ~DEBUGCTLMSR_FREEZE_PERFMON_ON_PMI); -} - static inline u64 intel_pmu_get_status(void) { u64 status; @@ -2129,43 +2394,85 @@ static inline void intel_pmu_ack_status(u64 ack) wrmsrl(MSR_CORE_PERF_GLOBAL_OVF_CTRL, ack); } -static void intel_pmu_disable_fixed(struct hw_perf_event *hwc) +static inline bool event_is_checkpointed(struct perf_event *event) { - int idx = hwc->idx - INTEL_PMC_IDX_FIXED; - u64 ctrl_val, mask; + return unlikely(event->hw.config & HSW_IN_TX_CHECKPOINTED) != 0; +} - mask = 0xfULL << (idx * 4); +static inline void intel_set_masks(struct perf_event *event, int idx) +{ + struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events); - rdmsrl(hwc->config_base, ctrl_val); - ctrl_val &= ~mask; - wrmsrl(hwc->config_base, ctrl_val); + if (event->attr.exclude_host) + __set_bit(idx, (unsigned long *)&cpuc->intel_ctrl_guest_mask); + if (event->attr.exclude_guest) + __set_bit(idx, (unsigned long *)&cpuc->intel_ctrl_host_mask); + if (event_is_checkpointed(event)) + __set_bit(idx, (unsigned long *)&cpuc->intel_cp_status); } -static inline bool event_is_checkpointed(struct perf_event *event) +static inline void intel_clear_masks(struct perf_event *event, int idx) { - return (event->hw.config & HSW_IN_TX_CHECKPOINTED) != 0; + struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events); + + __clear_bit(idx, (unsigned long *)&cpuc->intel_ctrl_guest_mask); + __clear_bit(idx, (unsigned long *)&cpuc->intel_ctrl_host_mask); + __clear_bit(idx, (unsigned long *)&cpuc->intel_cp_status); +} + +static void intel_pmu_disable_fixed(struct perf_event *event) +{ + struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events); + struct hw_perf_event *hwc = &event->hw; + int idx = hwc->idx; + u64 mask; + + if (is_topdown_idx(idx)) { + struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events); + + /* + * When there are other active TopDown events, + * don't disable the fixed counter 3. + */ + if (*(u64 *)cpuc->active_mask & INTEL_PMC_OTHER_TOPDOWN_BITS(idx)) + return; + idx = INTEL_PMC_IDX_FIXED_SLOTS; + } + + intel_clear_masks(event, idx); + + mask = 0xfULL << ((idx - INTEL_PMC_IDX_FIXED) * 4); + cpuc->fixed_ctrl_val &= ~mask; } static void intel_pmu_disable_event(struct perf_event *event) { struct hw_perf_event *hwc = &event->hw; - struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events); + int idx = hwc->idx; - if (unlikely(hwc->idx == INTEL_PMC_IDX_FIXED_BTS)) { + switch (idx) { + case 0 ... INTEL_PMC_IDX_FIXED - 1: + intel_clear_masks(event, idx); + x86_pmu_disable_event(event); + break; + case INTEL_PMC_IDX_FIXED ... INTEL_PMC_IDX_FIXED_BTS - 1: + case INTEL_PMC_IDX_METRIC_BASE ... INTEL_PMC_IDX_METRIC_END: + intel_pmu_disable_fixed(event); + break; + case INTEL_PMC_IDX_FIXED_BTS: intel_pmu_disable_bts(); intel_pmu_drain_bts_buffer(); return; + case INTEL_PMC_IDX_FIXED_VLBR: + intel_clear_masks(event, idx); + break; + default: + intel_clear_masks(event, idx); + pr_warn("Failed to disable the event with invalid index %d\n", + idx); + return; } - cpuc->intel_ctrl_guest_mask &= ~(1ull << hwc->idx); - cpuc->intel_ctrl_host_mask &= ~(1ull << hwc->idx); - cpuc->intel_cp_status &= ~(1ull << hwc->idx); - - if (unlikely(hwc->config_base == MSR_ARCH_PERFMON_FIXED_CTR_CTRL)) - intel_pmu_disable_fixed(hwc); - else - x86_pmu_disable_event(event); - /* * Needs to be called after x86_pmu_disable_event, * so we don't trigger the event without PEBS bit set. @@ -2174,6 +2481,12 @@ static void intel_pmu_disable_event(struct perf_event *event) intel_pmu_pebs_disable(event); } +static void intel_pmu_assign_event(struct perf_event *event, int idx) +{ + if (is_pebs_pt(event)) + perf_report_aux_output_id(event, idx); +} + static void intel_pmu_del_event(struct perf_event *event) { if (needs_branch_stack(event)) @@ -2182,19 +2495,244 @@ static void intel_pmu_del_event(struct perf_event *event) intel_pmu_pebs_del(event); } +static int icl_set_topdown_event_period(struct perf_event *event) +{ + struct hw_perf_event *hwc = &event->hw; + s64 left = local64_read(&hwc->period_left); + + /* + * The values in PERF_METRICS MSR are derived from fixed counter 3. + * Software should start both registers, PERF_METRICS and fixed + * counter 3, from zero. + * Clear PERF_METRICS and Fixed counter 3 in initialization. + * After that, both MSRs will be cleared for each read. + * Don't need to clear them again. + */ + if (left == x86_pmu.max_period) { + wrmsrl(MSR_CORE_PERF_FIXED_CTR3, 0); + wrmsrl(MSR_PERF_METRICS, 0); + hwc->saved_slots = 0; + hwc->saved_metric = 0; + } + + if ((hwc->saved_slots) && is_slots_event(event)) { + wrmsrl(MSR_CORE_PERF_FIXED_CTR3, hwc->saved_slots); + wrmsrl(MSR_PERF_METRICS, hwc->saved_metric); + } + + perf_event_update_userpage(event); + + return 0; +} + +static int adl_set_topdown_event_period(struct perf_event *event) +{ + struct x86_hybrid_pmu *pmu = hybrid_pmu(event->pmu); + + if (pmu->cpu_type != hybrid_big) + return 0; + + return icl_set_topdown_event_period(event); +} + +DEFINE_STATIC_CALL(intel_pmu_set_topdown_event_period, x86_perf_event_set_period); + +static inline u64 icl_get_metrics_event_value(u64 metric, u64 slots, int idx) +{ + u32 val; + + /* + * The metric is reported as an 8bit integer fraction + * summing up to 0xff. + * slots-in-metric = (Metric / 0xff) * slots + */ + val = (metric >> ((idx - INTEL_PMC_IDX_METRIC_BASE) * 8)) & 0xff; + return mul_u64_u32_div(slots, val, 0xff); +} + +static u64 icl_get_topdown_value(struct perf_event *event, + u64 slots, u64 metrics) +{ + int idx = event->hw.idx; + u64 delta; + + if (is_metric_idx(idx)) + delta = icl_get_metrics_event_value(metrics, slots, idx); + else + delta = slots; + + return delta; +} + +static void __icl_update_topdown_event(struct perf_event *event, + u64 slots, u64 metrics, + u64 last_slots, u64 last_metrics) +{ + u64 delta, last = 0; + + delta = icl_get_topdown_value(event, slots, metrics); + if (last_slots) + last = icl_get_topdown_value(event, last_slots, last_metrics); + + /* + * The 8bit integer fraction of metric may be not accurate, + * especially when the changes is very small. + * For example, if only a few bad_spec happens, the fraction + * may be reduced from 1 to 0. If so, the bad_spec event value + * will be 0 which is definitely less than the last value. + * Avoid update event->count for this case. + */ + if (delta > last) { + delta -= last; + local64_add(delta, &event->count); + } +} + +static void update_saved_topdown_regs(struct perf_event *event, u64 slots, + u64 metrics, int metric_end) +{ + struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events); + struct perf_event *other; + int idx; + + event->hw.saved_slots = slots; + event->hw.saved_metric = metrics; + + for_each_set_bit(idx, cpuc->active_mask, metric_end + 1) { + if (!is_topdown_idx(idx)) + continue; + other = cpuc->events[idx]; + other->hw.saved_slots = slots; + other->hw.saved_metric = metrics; + } +} + +/* + * Update all active Topdown events. + * + * The PERF_METRICS and Fixed counter 3 are read separately. The values may be + * modify by a NMI. PMU has to be disabled before calling this function. + */ + +static u64 intel_update_topdown_event(struct perf_event *event, int metric_end) +{ + struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events); + struct perf_event *other; + u64 slots, metrics; + bool reset = true; + int idx; + + /* read Fixed counter 3 */ + rdpmcl((3 | INTEL_PMC_FIXED_RDPMC_BASE), slots); + if (!slots) + return 0; + + /* read PERF_METRICS */ + rdpmcl(INTEL_PMC_FIXED_RDPMC_METRICS, metrics); + + for_each_set_bit(idx, cpuc->active_mask, metric_end + 1) { + if (!is_topdown_idx(idx)) + continue; + other = cpuc->events[idx]; + __icl_update_topdown_event(other, slots, metrics, + event ? event->hw.saved_slots : 0, + event ? event->hw.saved_metric : 0); + } + + /* + * Check and update this event, which may have been cleared + * in active_mask e.g. x86_pmu_stop() + */ + if (event && !test_bit(event->hw.idx, cpuc->active_mask)) { + __icl_update_topdown_event(event, slots, metrics, + event->hw.saved_slots, + event->hw.saved_metric); + + /* + * In x86_pmu_stop(), the event is cleared in active_mask first, + * then drain the delta, which indicates context switch for + * counting. + * Save metric and slots for context switch. + * Don't need to reset the PERF_METRICS and Fixed counter 3. + * Because the values will be restored in next schedule in. + */ + update_saved_topdown_regs(event, slots, metrics, metric_end); + reset = false; + } + + if (reset) { + /* The fixed counter 3 has to be written before the PERF_METRICS. */ + wrmsrl(MSR_CORE_PERF_FIXED_CTR3, 0); + wrmsrl(MSR_PERF_METRICS, 0); + if (event) + update_saved_topdown_regs(event, 0, 0, metric_end); + } + + return slots; +} + +static u64 icl_update_topdown_event(struct perf_event *event) +{ + return intel_update_topdown_event(event, INTEL_PMC_IDX_METRIC_BASE + + x86_pmu.num_topdown_events - 1); +} + +static u64 adl_update_topdown_event(struct perf_event *event) +{ + struct x86_hybrid_pmu *pmu = hybrid_pmu(event->pmu); + + if (pmu->cpu_type != hybrid_big) + return 0; + + return icl_update_topdown_event(event); +} + +DEFINE_STATIC_CALL(intel_pmu_update_topdown_event, x86_perf_event_update); + +static void intel_pmu_read_topdown_event(struct perf_event *event) +{ + struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events); + + /* Only need to call update_topdown_event() once for group read. */ + if ((cpuc->txn_flags & PERF_PMU_TXN_READ) && + !is_slots_event(event)) + return; + + perf_pmu_disable(event->pmu); + static_call(intel_pmu_update_topdown_event)(event); + perf_pmu_enable(event->pmu); +} + static void intel_pmu_read_event(struct perf_event *event) { if (event->hw.flags & PERF_X86_EVENT_AUTO_RELOAD) intel_pmu_auto_reload_read(event); + else if (is_topdown_count(event)) + intel_pmu_read_topdown_event(event); else x86_perf_event_update(event); } static void intel_pmu_enable_fixed(struct perf_event *event) { + struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events); struct hw_perf_event *hwc = &event->hw; - int idx = hwc->idx - INTEL_PMC_IDX_FIXED; - u64 ctrl_val, mask, bits = 0; + u64 mask, bits = 0; + int idx = hwc->idx; + + if (is_topdown_idx(idx)) { + struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events); + /* + * When there are other active TopDown events, + * don't enable the fixed counter 3 again. + */ + if (*(u64 *)cpuc->active_mask & INTEL_PMC_OTHER_TOPDOWN_BITS(idx)) + return; + + idx = INTEL_PMC_IDX_FIXED_SLOTS; + } + + intel_set_masks(event, idx); /* * Enable IRQ generation (0x8), if not PEBS, @@ -2214,6 +2752,7 @@ static void intel_pmu_enable_fixed(struct perf_event *event) if (x86_pmu.version > 2 && hwc->config & ARCH_PERFMON_EVENTSEL_ANY) bits |= 0x4; + idx -= INTEL_PMC_IDX_FIXED; bits <<= (idx * 4); mask = 0xfULL << (idx * 4); @@ -2222,42 +2761,39 @@ static void intel_pmu_enable_fixed(struct perf_event *event) mask |= ICL_FIXED_0_ADAPTIVE << (idx * 4); } - rdmsrl(hwc->config_base, ctrl_val); - ctrl_val &= ~mask; - ctrl_val |= bits; - wrmsrl(hwc->config_base, ctrl_val); + cpuc->fixed_ctrl_val &= ~mask; + cpuc->fixed_ctrl_val |= bits; } static void intel_pmu_enable_event(struct perf_event *event) { struct hw_perf_event *hwc = &event->hw; - struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events); - - if (unlikely(hwc->idx == INTEL_PMC_IDX_FIXED_BTS)) { - if (!__this_cpu_read(cpu_hw_events.enabled)) - return; - - intel_pmu_enable_bts(hwc->config); - return; - } - - if (event->attr.exclude_host) - cpuc->intel_ctrl_guest_mask |= (1ull << hwc->idx); - if (event->attr.exclude_guest) - cpuc->intel_ctrl_host_mask |= (1ull << hwc->idx); - - if (unlikely(event_is_checkpointed(event))) - cpuc->intel_cp_status |= (1ull << hwc->idx); + int idx = hwc->idx; if (unlikely(event->attr.precise_ip)) intel_pmu_pebs_enable(event); - if (unlikely(hwc->config_base == MSR_ARCH_PERFMON_FIXED_CTR_CTRL)) { + switch (idx) { + case 0 ... INTEL_PMC_IDX_FIXED - 1: + intel_set_masks(event, idx); + __x86_pmu_enable_event(hwc, ARCH_PERFMON_EVENTSEL_ENABLE); + break; + case INTEL_PMC_IDX_FIXED ... INTEL_PMC_IDX_FIXED_BTS - 1: + case INTEL_PMC_IDX_METRIC_BASE ... INTEL_PMC_IDX_METRIC_END: intel_pmu_enable_fixed(event); - return; + break; + case INTEL_PMC_IDX_FIXED_BTS: + if (!__this_cpu_read(cpu_hw_events.enabled)) + return; + intel_pmu_enable_bts(hwc->config); + break; + case INTEL_PMC_IDX_FIXED_VLBR: + intel_set_masks(event, idx); + break; + default: + pr_warn("Failed to enable the event with invalid index %d\n", + idx); } - - __x86_pmu_enable_event(hwc, ARCH_PERFMON_EVENTSEL_ENABLE); } static void intel_pmu_add_event(struct perf_event *event) @@ -2274,7 +2810,7 @@ static void intel_pmu_add_event(struct perf_event *event) */ int intel_pmu_save_and_restart(struct perf_event *event) { - x86_perf_event_update(event); + static_call(x86_pmu_update)(event); /* * For a checkpointed counter always reset back to 0. This * avoids a situation where the counter overflows, aborts the @@ -2286,28 +2822,50 @@ int intel_pmu_save_and_restart(struct perf_event *event) wrmsrl(event->hw.event_base, 0); local64_set(&event->hw.prev_count, 0); } + return static_call(x86_pmu_set_period)(event); +} + +static int intel_pmu_set_period(struct perf_event *event) +{ + if (unlikely(is_topdown_count(event))) + return static_call(intel_pmu_set_topdown_event_period)(event); + return x86_perf_event_set_period(event); } +static u64 intel_pmu_update(struct perf_event *event) +{ + if (unlikely(is_topdown_count(event))) + return static_call(intel_pmu_update_topdown_event)(event); + + return x86_perf_event_update(event); +} + static void intel_pmu_reset(void) { struct debug_store *ds = __this_cpu_read(cpu_hw_events.ds); + struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events); + int num_counters_fixed = hybrid(cpuc->pmu, num_counters_fixed); + int num_counters = hybrid(cpuc->pmu, num_counters); unsigned long flags; int idx; - if (!x86_pmu.num_counters) + if (!num_counters) return; local_irq_save(flags); pr_info("clearing PMU state on CPU#%d\n", smp_processor_id()); - for (idx = 0; idx < x86_pmu.num_counters; idx++) { + for (idx = 0; idx < num_counters; idx++) { wrmsrl_safe(x86_pmu_config_addr(idx), 0ull); wrmsrl_safe(x86_pmu_event_addr(idx), 0ull); } - for (idx = 0; idx < x86_pmu.num_counters_fixed; idx++) + for (idx = 0; idx < num_counters_fixed; idx++) { + if (fixed_counter_disabled(idx, cpuc->pmu)) + continue; wrmsrl_safe(MSR_ARCH_PERFMON_FIXED_CTR0 + idx, 0ull); + } if (ds) ds->bts_index = ds->bts_buffer_base; @@ -2327,12 +2885,54 @@ static void intel_pmu_reset(void) local_irq_restore(flags); } +/* + * We may be running with guest PEBS events created by KVM, and the + * PEBS records are logged into the guest's DS and invisible to host. + * + * In the case of guest PEBS overflow, we only trigger a fake event + * to emulate the PEBS overflow PMI for guest PEBS counters in KVM. + * The guest will then vm-entry and check the guest DS area to read + * the guest PEBS records. + * + * The contents and other behavior of the guest event do not matter. + */ +static void x86_pmu_handle_guest_pebs(struct pt_regs *regs, + struct perf_sample_data *data) +{ + struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events); + u64 guest_pebs_idxs = cpuc->pebs_enabled & ~cpuc->intel_ctrl_host_mask; + struct perf_event *event = NULL; + int bit; + + if (!unlikely(perf_guest_state())) + return; + + if (!x86_pmu.pebs_ept || !x86_pmu.pebs_active || + !guest_pebs_idxs) + return; + + for_each_set_bit(bit, (unsigned long *)&guest_pebs_idxs, + INTEL_PMC_IDX_FIXED + x86_pmu.num_counters_fixed) { + event = cpuc->events[bit]; + if (!event->attr.precise_ip) + continue; + + perf_sample_data_init(data, 0, event->hw.last_period); + if (perf_event_overflow(event, data, regs)) + x86_pmu_stop(event, 0); + + /* Inject one fake event is enough. */ + break; + } +} + static int handle_pmi_common(struct pt_regs *regs, u64 status) { struct perf_sample_data data; struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events); int bit; int handled = 0; + u64 intel_ctrl = hybrid(cpuc->pmu, intel_ctrl); inc_irq_stat(apic_perf_irqs); @@ -2356,7 +2956,7 @@ static int handle_pmi_common(struct pt_regs *regs, u64 status) * processing loop coming after that the function, otherwise * phony regular samples may be generated in the sampling buffer * not marked with the EXACT tag. Another possibility is to have - * one PEBS event and at least one non-PEBS event whic hoverflows + * one PEBS event and at least one non-PEBS event which overflows * while PEBS has armed. In this case, bit 62 of GLOBAL_STATUS will * not be set, yet the overflow status bit for the PEBS counter will * be on Skylake. @@ -2365,33 +2965,48 @@ static int handle_pmi_common(struct pt_regs *regs, u64 status) * counters from the GLOBAL_STATUS mask and we always process PEBS * events via drain_pebs(). */ - if (x86_pmu.flags & PMU_FL_PEBS_ALL) - status &= ~cpuc->pebs_enabled; - else - status &= ~(cpuc->pebs_enabled & PEBS_COUNTER_MASK); + status &= ~(cpuc->pebs_enabled & x86_pmu.pebs_capable); /* * PEBS overflow sets bit 62 in the global status register */ - if (__test_and_clear_bit(62, (unsigned long *)&status)) { + if (__test_and_clear_bit(GLOBAL_STATUS_BUFFER_OVF_BIT, (unsigned long *)&status)) { + u64 pebs_enabled = cpuc->pebs_enabled; + handled++; - x86_pmu.drain_pebs(regs); - status &= x86_pmu.intel_ctrl | GLOBAL_STATUS_TRACE_TOPAPMI; + x86_pmu_handle_guest_pebs(regs, &data); + x86_pmu.drain_pebs(regs, &data); + status &= intel_ctrl | GLOBAL_STATUS_TRACE_TOPAPMI; + + /* + * PMI throttle may be triggered, which stops the PEBS event. + * Although cpuc->pebs_enabled is updated accordingly, the + * MSR_IA32_PEBS_ENABLE is not updated. Because the + * cpuc->enabled has been forced to 0 in PMI. + * Update the MSR if pebs_enabled is changed. + */ + if (pebs_enabled != cpuc->pebs_enabled) + wrmsrl(MSR_IA32_PEBS_ENABLE, cpuc->pebs_enabled); } /* * Intel PT */ - if (__test_and_clear_bit(55, (unsigned long *)&status)) { + if (__test_and_clear_bit(GLOBAL_STATUS_TRACE_TOPAPMI_BIT, (unsigned long *)&status)) { handled++; - if (unlikely(perf_guest_cbs && perf_guest_cbs->is_in_guest() && - perf_guest_cbs->handle_intel_pt_intr)) - perf_guest_cbs->handle_intel_pt_intr(); - else + if (!perf_guest_handle_intel_pt_intr()) intel_pt_interrupt(); } /* + * Intel Perf metrics + */ + if (__test_and_clear_bit(GLOBAL_STATUS_PERF_METRICS_OVF_BIT, (unsigned long *)&status)) { + handled++; + static_call(intel_pmu_update_topdown_event)(NULL); + } + + /* * Checkpointed counters can lead to 'spurious' PMIs because the * rollback caused by the PMI will have cleared the overflow status * bit. Therefore always force probe these counters. @@ -2411,8 +3026,10 @@ static int handle_pmi_common(struct pt_regs *regs, u64 status) perf_sample_data_init(&data, 0, event->hw.last_period); - if (has_branch_stack(event)) + if (has_branch_stack(event)) { data.br_stack = &cpuc->lbr_stack; + data.sample_flags |= PERF_SAMPLE_BRANCH_STACK; + } if (perf_event_overflow(event, &data, regs)) x86_pmu_stop(event, 0); @@ -2421,123 +3038,38 @@ static int handle_pmi_common(struct pt_regs *regs, u64 status) return handled; } -static bool disable_counter_freezing = true; -static int __init intel_perf_counter_freezing_setup(char *s) -{ - bool res; - - if (kstrtobool(s, &res)) - return -EINVAL; - - disable_counter_freezing = !res; - return 1; -} -__setup("perf_v4_pmi=", intel_perf_counter_freezing_setup); - -/* - * Simplified handler for Arch Perfmon v4: - * - We rely on counter freezing/unfreezing to enable/disable the PMU. - * This is done automatically on PMU ack. - * - Ack the PMU only after the APIC. - */ - -static int intel_pmu_handle_irq_v4(struct pt_regs *regs) -{ - struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events); - int handled = 0; - bool bts = false; - u64 status; - int pmu_enabled = cpuc->enabled; - int loops = 0; - - /* PMU has been disabled because of counter freezing */ - cpuc->enabled = 0; - if (test_bit(INTEL_PMC_IDX_FIXED_BTS, cpuc->active_mask)) { - bts = true; - intel_bts_disable_local(); - handled = intel_pmu_drain_bts_buffer(); - handled += intel_bts_interrupt(); - } - status = intel_pmu_get_status(); - if (!status) - goto done; -again: - intel_pmu_lbr_read(); - if (++loops > 100) { - static bool warned; - - if (!warned) { - WARN(1, "perfevents: irq loop stuck!\n"); - perf_event_print_debug(); - warned = true; - } - intel_pmu_reset(); - goto done; - } - - - handled += handle_pmi_common(regs, status); -done: - /* Ack the PMI in the APIC */ - apic_write(APIC_LVTPC, APIC_DM_NMI); - - /* - * The counters start counting immediately while ack the status. - * Make it as close as possible to IRET. This avoids bogus - * freezing on Skylake CPUs. - */ - if (status) { - intel_pmu_ack_status(status); - } else { - /* - * CPU may issues two PMIs very close to each other. - * When the PMI handler services the first one, the - * GLOBAL_STATUS is already updated to reflect both. - * When it IRETs, the second PMI is immediately - * handled and it sees clear status. At the meantime, - * there may be a third PMI, because the freezing bit - * isn't set since the ack in first PMI handlers. - * Double check if there is more work to be done. - */ - status = intel_pmu_get_status(); - if (status) - goto again; - } - - if (bts) - intel_bts_enable_local(); - cpuc->enabled = pmu_enabled; - return handled; -} - /* * This handler is triggered by the local APIC, so the APIC IRQ handling * rules apply: */ static int intel_pmu_handle_irq(struct pt_regs *regs) { - struct cpu_hw_events *cpuc; + struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events); + bool late_ack = hybrid_bit(cpuc->pmu, late_ack); + bool mid_ack = hybrid_bit(cpuc->pmu, mid_ack); int loops; u64 status; int handled; int pmu_enabled; - cpuc = this_cpu_ptr(&cpu_hw_events); - /* * Save the PMU state. * It needs to be restored when leaving the handler. */ pmu_enabled = cpuc->enabled; /* - * No known reason to not always do late ACK, - * but just in case do it opt-in. + * In general, the early ACK is only applied for old platforms. + * For the big core starts from Haswell, the late ACK should be + * applied. + * For the small core after Tremont, we have to do the ACK right + * before re-enabling counters, which is in the middle of the + * NMI handler. */ - if (!x86_pmu.late_ack) + if (!late_ack && !mid_ack) apic_write(APIC_LVTPC, APIC_DM_NMI); intel_bts_disable_local(); cpuc->enabled = 0; - __intel_pmu_disable_all(); + __intel_pmu_disable_all(true); handled = intel_pmu_drain_bts_buffer(); handled += intel_bts_interrupt(); status = intel_pmu_get_status(); @@ -2570,6 +3102,8 @@ again: goto again; done: + if (mid_ack) + apic_write(APIC_LVTPC, APIC_DM_NMI); /* Only restore PMU state when it's active. See x86_pmu_disable(). */ cpuc->enabled = pmu_enabled; if (pmu_enabled) @@ -2581,7 +3115,7 @@ done: * have been reset. This avoids spurious NMIs on * Haswell CPUs. */ - if (x86_pmu.late_ack) + if (late_ack) apic_write(APIC_LVTPC, APIC_DM_NMI); return handled; } @@ -2595,8 +3129,26 @@ intel_bts_constraints(struct perf_event *event) return NULL; } -static int intel_alt_er(int idx, u64 config) +/* + * Note: matches a fake event, like Fixed2. + */ +static struct event_constraint * +intel_vlbr_constraints(struct perf_event *event) +{ + struct event_constraint *c = &vlbr_constraint; + + if (unlikely(constraint_match(c, event->hw.config))) { + event->hw.flags |= c->flags; + return c; + } + + return NULL; +} + +static int intel_alt_er(struct cpu_hw_events *cpuc, + int idx, u64 config) { + struct extra_reg *extra_regs = hybrid(cpuc->pmu, extra_regs); int alt_idx = idx; if (!(x86_pmu.flags & PMU_FL_HAS_RSP_1)) @@ -2608,7 +3160,7 @@ static int intel_alt_er(int idx, u64 config) if (idx == EXTRA_REG_RSP_1) alt_idx = EXTRA_REG_RSP_0; - if (config & ~x86_pmu.extra_regs[alt_idx].valid_mask) + if (config & ~extra_regs[alt_idx].valid_mask) return idx; return alt_idx; @@ -2616,15 +3168,16 @@ static int intel_alt_er(int idx, u64 config) static void intel_fixup_er(struct perf_event *event, int idx) { + struct extra_reg *extra_regs = hybrid(event->pmu, extra_regs); event->hw.extra_reg.idx = idx; if (idx == EXTRA_REG_RSP_0) { event->hw.config &= ~INTEL_ARCH_EVENT_MASK; - event->hw.config |= x86_pmu.extra_regs[EXTRA_REG_RSP_0].event; + event->hw.config |= extra_regs[EXTRA_REG_RSP_0].event; event->hw.extra_reg.reg = MSR_OFFCORE_RSP_0; } else if (idx == EXTRA_REG_RSP_1) { event->hw.config &= ~INTEL_ARCH_EVENT_MASK; - event->hw.config |= x86_pmu.extra_regs[EXTRA_REG_RSP_1].event; + event->hw.config |= extra_regs[EXTRA_REG_RSP_1].event; event->hw.extra_reg.reg = MSR_OFFCORE_RSP_1; } } @@ -2700,7 +3253,7 @@ again: */ c = NULL; } else { - idx = intel_alt_er(idx, reg->config); + idx = intel_alt_er(cpuc, idx, reg->config); if (idx != reg->idx) { raw_spin_unlock_irqrestore(&era->lock, flags); goto again; @@ -2765,10 +3318,11 @@ struct event_constraint * x86_get_event_constraints(struct cpu_hw_events *cpuc, int idx, struct perf_event *event) { + struct event_constraint *event_constraints = hybrid(cpuc->pmu, event_constraints); struct event_constraint *c; - if (x86_pmu.event_constraints) { - for_each_event_constraint(c, x86_pmu.event_constraints) { + if (event_constraints) { + for_each_event_constraint(c, event_constraints) { if (constraint_match(c, event->hw.config)) { event->hw.flags |= c->flags; return c; @@ -2776,7 +3330,7 @@ x86_get_event_constraints(struct cpu_hw_events *cpuc, int idx, } } - return &unconstrained; + return &hybrid_var(cpuc->pmu, unconstrained); } static struct event_constraint * @@ -2785,6 +3339,10 @@ __intel_get_event_constraints(struct cpu_hw_events *cpuc, int idx, { struct event_constraint *c; + c = intel_vlbr_constraints(event); + if (c) + return c; + c = intel_bts_constraints(event); if (c) return c; @@ -3257,6 +3815,43 @@ static int core_pmu_hw_config(struct perf_event *event) return intel_pmu_bts_config(event); } +#define INTEL_TD_METRIC_AVAILABLE_MAX (INTEL_TD_METRIC_RETIRING + \ + ((x86_pmu.num_topdown_events - 1) << 8)) + +static bool is_available_metric_event(struct perf_event *event) +{ + return is_metric_event(event) && + event->attr.config <= INTEL_TD_METRIC_AVAILABLE_MAX; +} + +static inline bool is_mem_loads_event(struct perf_event *event) +{ + return (event->attr.config & INTEL_ARCH_EVENT_MASK) == X86_CONFIG(.event=0xcd, .umask=0x01); +} + +static inline bool is_mem_loads_aux_event(struct perf_event *event) +{ + return (event->attr.config & INTEL_ARCH_EVENT_MASK) == X86_CONFIG(.event=0x03, .umask=0x82); +} + +static inline bool require_mem_loads_aux_event(struct perf_event *event) +{ + if (!(x86_pmu.flags & PMU_FL_MEM_LOADS_AUX)) + return false; + + if (is_hybrid()) + return hybrid_pmu(event->pmu)->cpu_type == hybrid_big; + + return true; +} + +static inline bool intel_pmu_has_cap(struct perf_event *event, int idx) +{ + union perf_capabilities *intel_cap = &hybrid(event->pmu, intel_cap); + + return test_bit(idx, (unsigned long *)&intel_cap->capabilities); +} + static int intel_pmu_hw_config(struct perf_event *event) { int ret = x86_pmu_hw_config(event); @@ -3269,23 +3864,26 @@ static int intel_pmu_hw_config(struct perf_event *event) return ret; if (event->attr.precise_ip) { + if ((event->attr.config & INTEL_ARCH_EVENT_MASK) == INTEL_FIXED_VLBR_EVENT) + return -EINVAL; + if (!(event->attr.freq || (event->attr.wakeup_events && !event->attr.watermark))) { event->hw.flags |= PERF_X86_EVENT_AUTO_RELOAD; if (!(event->attr.sample_type & - ~intel_pmu_large_pebs_flags(event))) + ~intel_pmu_large_pebs_flags(event))) { event->hw.flags |= PERF_X86_EVENT_LARGE_PEBS; + event->attach_state |= PERF_ATTACH_SCHED_CB; + } } if (x86_pmu.pebs_aliases) x86_pmu.pebs_aliases(event); - - if (event->attr.sample_type & PERF_SAMPLE_CALLCHAIN) - event->attr.sample_type |= __PERF_SAMPLE_CALLCHAIN_EARLY; } if (needs_branch_stack(event)) { ret = intel_pmu_setup_lbr_filter(event); if (ret) return ret; + event->attach_state |= PERF_ATTACH_SCHED_CB; /* * BTS is set up earlier in this path, so don't account twice @@ -3306,9 +3904,87 @@ static int intel_pmu_hw_config(struct perf_event *event) event->hw.flags |= PERF_X86_EVENT_PEBS_VIA_PT; } - if (event->attr.type != PERF_TYPE_RAW) + if ((event->attr.type == PERF_TYPE_HARDWARE) || + (event->attr.type == PERF_TYPE_HW_CACHE)) return 0; + /* + * Config Topdown slots and metric events + * + * The slots event on Fixed Counter 3 can support sampling, + * which will be handled normally in x86_perf_event_update(). + * + * Metric events don't support sampling and require being paired + * with a slots event as group leader. When the slots event + * is used in a metrics group, it too cannot support sampling. + */ + if (intel_pmu_has_cap(event, PERF_CAP_METRICS_IDX) && is_topdown_event(event)) { + if (event->attr.config1 || event->attr.config2) + return -EINVAL; + + /* + * The TopDown metrics events and slots event don't + * support any filters. + */ + if (event->attr.config & X86_ALL_EVENT_FLAGS) + return -EINVAL; + + if (is_available_metric_event(event)) { + struct perf_event *leader = event->group_leader; + + /* The metric events don't support sampling. */ + if (is_sampling_event(event)) + return -EINVAL; + + /* The metric events require a slots group leader. */ + if (!is_slots_event(leader)) + return -EINVAL; + + /* + * The leader/SLOTS must not be a sampling event for + * metric use; hardware requires it starts at 0 when used + * in conjunction with MSR_PERF_METRICS. + */ + if (is_sampling_event(leader)) + return -EINVAL; + + event->event_caps |= PERF_EV_CAP_SIBLING; + /* + * Only once we have a METRICs sibling do we + * need TopDown magic. + */ + leader->hw.flags |= PERF_X86_EVENT_TOPDOWN; + event->hw.flags |= PERF_X86_EVENT_TOPDOWN; + } + } + + /* + * The load latency event X86_CONFIG(.event=0xcd, .umask=0x01) on SPR + * doesn't function quite right. As a work-around it needs to always be + * co-scheduled with a auxiliary event X86_CONFIG(.event=0x03, .umask=0x82). + * The actual count of this second event is irrelevant it just needs + * to be active to make the first event function correctly. + * + * In a group, the auxiliary event must be in front of the load latency + * event. The rule is to simplify the implementation of the check. + * That's because perf cannot have a complete group at the moment. + */ + if (require_mem_loads_aux_event(event) && + (event->attr.sample_type & PERF_SAMPLE_DATA_SRC) && + is_mem_loads_event(event)) { + struct perf_event *leader = event->group_leader; + struct perf_event *sibling = NULL; + + if (!is_mem_loads_aux_event(leader)) { + for_each_sibling_event(sibling, leader) { + if (is_mem_loads_aux_event(sibling)) + break; + } + if (list_entry_is_head(sibling, &leader->sibling_list, sibling_list)) + return -ENODATA; + } + } + if (!(event->attr.config & ARCH_PERFMON_EVENTSEL_ANY)) return 0; @@ -3324,59 +4000,99 @@ static int intel_pmu_hw_config(struct perf_event *event) return 0; } -#ifdef CONFIG_RETPOLINE -static struct perf_guest_switch_msr *core_guest_get_msrs(int *nr); -static struct perf_guest_switch_msr *intel_guest_get_msrs(int *nr); -#endif - -struct perf_guest_switch_msr *perf_guest_get_msrs(int *nr) -{ -#ifdef CONFIG_RETPOLINE - if (x86_pmu.guest_get_msrs == intel_guest_get_msrs) - return intel_guest_get_msrs(nr); - else if (x86_pmu.guest_get_msrs == core_guest_get_msrs) - return core_guest_get_msrs(nr); -#endif - if (x86_pmu.guest_get_msrs) - return x86_pmu.guest_get_msrs(nr); - *nr = 0; - return NULL; -} -EXPORT_SYMBOL_GPL(perf_guest_get_msrs); - -static struct perf_guest_switch_msr *intel_guest_get_msrs(int *nr) +/* + * Currently, the only caller of this function is the atomic_switch_perf_msrs(). + * The host perf conext helps to prepare the values of the real hardware for + * a set of msrs that need to be switched atomically in a vmx transaction. + * + * For example, the pseudocode needed to add a new msr should look like: + * + * arr[(*nr)++] = (struct perf_guest_switch_msr){ + * .msr = the hardware msr address, + * .host = the value the hardware has when it doesn't run a guest, + * .guest = the value the hardware has when it runs a guest, + * }; + * + * These values have nothing to do with the emulated values the guest sees + * when it uses {RD,WR}MSR, which should be handled by the KVM context, + * specifically in the intel_pmu_{get,set}_msr(). + */ +static struct perf_guest_switch_msr *intel_guest_get_msrs(int *nr, void *data) { struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events); struct perf_guest_switch_msr *arr = cpuc->guest_switch_msrs; + struct kvm_pmu *kvm_pmu = (struct kvm_pmu *)data; + u64 intel_ctrl = hybrid(cpuc->pmu, intel_ctrl); + u64 pebs_mask = cpuc->pebs_enabled & x86_pmu.pebs_capable; + int global_ctrl, pebs_enable; - arr[0].msr = MSR_CORE_PERF_GLOBAL_CTRL; - arr[0].host = x86_pmu.intel_ctrl & ~cpuc->intel_ctrl_guest_mask; - arr[0].guest = x86_pmu.intel_ctrl & ~cpuc->intel_ctrl_host_mask; - if (x86_pmu.flags & PMU_FL_PEBS_ALL) - arr[0].guest &= ~cpuc->pebs_enabled; - else - arr[0].guest &= ~(cpuc->pebs_enabled & PEBS_COUNTER_MASK); - *nr = 1; + *nr = 0; + global_ctrl = (*nr)++; + arr[global_ctrl] = (struct perf_guest_switch_msr){ + .msr = MSR_CORE_PERF_GLOBAL_CTRL, + .host = intel_ctrl & ~cpuc->intel_ctrl_guest_mask, + .guest = intel_ctrl & (~cpuc->intel_ctrl_host_mask | ~pebs_mask), + }; - if (x86_pmu.pebs && x86_pmu.pebs_no_isolation) { - /* - * If PMU counter has PEBS enabled it is not enough to - * disable counter on a guest entry since PEBS memory - * write can overshoot guest entry and corrupt guest - * memory. Disabling PEBS solves the problem. - * - * Don't do this if the CPU already enforces it. - */ - arr[1].msr = MSR_IA32_PEBS_ENABLE; - arr[1].host = cpuc->pebs_enabled; - arr[1].guest = 0; - *nr = 2; + if (!x86_pmu.pebs) + return arr; + + /* + * If PMU counter has PEBS enabled it is not enough to + * disable counter on a guest entry since PEBS memory + * write can overshoot guest entry and corrupt guest + * memory. Disabling PEBS solves the problem. + * + * Don't do this if the CPU already enforces it. + */ + if (x86_pmu.pebs_no_isolation) { + arr[(*nr)++] = (struct perf_guest_switch_msr){ + .msr = MSR_IA32_PEBS_ENABLE, + .host = cpuc->pebs_enabled, + .guest = 0, + }; + return arr; + } + + if (!kvm_pmu || !x86_pmu.pebs_ept) + return arr; + + arr[(*nr)++] = (struct perf_guest_switch_msr){ + .msr = MSR_IA32_DS_AREA, + .host = (unsigned long)cpuc->ds, + .guest = kvm_pmu->ds_area, + }; + + if (x86_pmu.intel_cap.pebs_baseline) { + arr[(*nr)++] = (struct perf_guest_switch_msr){ + .msr = MSR_PEBS_DATA_CFG, + .host = cpuc->pebs_data_cfg, + .guest = kvm_pmu->pebs_data_cfg, + }; + } + + pebs_enable = (*nr)++; + arr[pebs_enable] = (struct perf_guest_switch_msr){ + .msr = MSR_IA32_PEBS_ENABLE, + .host = cpuc->pebs_enabled & ~cpuc->intel_ctrl_guest_mask, + .guest = pebs_mask & ~cpuc->intel_ctrl_host_mask, + }; + + if (arr[pebs_enable].host) { + /* Disable guest PEBS if host PEBS is enabled. */ + arr[pebs_enable].guest = 0; + } else { + /* Disable guest PEBS thoroughly for cross-mapped PEBS counters. */ + arr[pebs_enable].guest &= ~kvm_pmu->host_cross_mapped_mask; + arr[global_ctrl].guest &= ~kvm_pmu->host_cross_mapped_mask; + /* Set hw GLOBAL_CTRL bits for PEBS counter when it runs for guest */ + arr[global_ctrl].guest |= arr[pebs_enable].guest; } return arr; } -static struct perf_guest_switch_msr *core_guest_get_msrs(int *nr) +static struct perf_guest_switch_msr *core_guest_get_msrs(int *nr, void *data) { struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events); struct perf_guest_switch_msr *arr = cpuc->guest_switch_msrs; @@ -3509,6 +4225,31 @@ icl_get_event_constraints(struct cpu_hw_events *cpuc, int idx, } static struct event_constraint * +spr_get_event_constraints(struct cpu_hw_events *cpuc, int idx, + struct perf_event *event) +{ + struct event_constraint *c; + + c = icl_get_event_constraints(cpuc, idx, event); + + /* + * The :ppp indicates the Precise Distribution (PDist) facility, which + * is only supported on the GP counter 0. If a :ppp event which is not + * available on the GP counter 0, error out. + * Exception: Instruction PDIR is only available on the fixed counter 0. + */ + if ((event->attr.precise_ip == 3) && + !constraint_match(&fixed0_constraint, event->hw.config)) { + if (c->idxmsk64 & BIT_ULL(0)) + return &counter0_constraint; + + return &emptyconstraint; + } + + return c; +} + +static struct event_constraint * glp_get_event_constraints(struct cpu_hw_events *cpuc, int idx, struct perf_event *event) { @@ -3529,6 +4270,8 @@ tnt_get_event_constraints(struct cpu_hw_events *cpuc, int idx, { struct event_constraint *c; + c = intel_get_event_constraints(cpuc, idx, event); + /* * :ppp means to do reduced skid PEBS, * which is available on PMC0 and fixed counter 0. @@ -3541,8 +4284,6 @@ tnt_get_event_constraints(struct cpu_hw_events *cpuc, int idx, return &counter0_constraint; } - c = intel_get_event_constraints(cpuc, idx, event); - return c; } @@ -3566,6 +4307,39 @@ tfa_get_event_constraints(struct cpu_hw_events *cpuc, int idx, return c; } +static struct event_constraint * +adl_get_event_constraints(struct cpu_hw_events *cpuc, int idx, + struct perf_event *event) +{ + struct x86_hybrid_pmu *pmu = hybrid_pmu(event->pmu); + + if (pmu->cpu_type == hybrid_big) + return spr_get_event_constraints(cpuc, idx, event); + else if (pmu->cpu_type == hybrid_small) + return tnt_get_event_constraints(cpuc, idx, event); + + WARN_ON(1); + return &emptyconstraint; +} + +static int adl_hw_config(struct perf_event *event) +{ + struct x86_hybrid_pmu *pmu = hybrid_pmu(event->pmu); + + if (pmu->cpu_type == hybrid_big) + return hsw_hw_config(event); + else if (pmu->cpu_type == hybrid_small) + return intel_pmu_hw_config(event); + + WARN_ON(1); + return -EOPNOTSUPP; +} + +static u8 adl_get_hybrid_cpu_type(void) +{ + return hybrid_big; +} + /* * Broadwell: * @@ -3581,20 +4355,25 @@ tfa_get_event_constraints(struct cpu_hw_events *cpuc, int idx, * Therefore the effective (average) period matches the requested period, * despite coarser hardware granularity. */ -static u64 bdw_limit_period(struct perf_event *event, u64 left) +static void bdw_limit_period(struct perf_event *event, s64 *left) { if ((event->hw.config & INTEL_ARCH_EVENT_MASK) == X86_CONFIG(.event=0xc0, .umask=0x01)) { - if (left < 128) - left = 128; - left &= ~0x3fULL; + if (*left < 128) + *left = 128; + *left &= ~0x3fULL; } - return left; } -static u64 nhm_limit_period(struct perf_event *event, u64 left) +static void nhm_limit_period(struct perf_event *event, s64 *left) { - return max(left, 32ULL); + *left = max(*left, 32LL); +} + +static void spr_limit_period(struct perf_event *event, s64 *left) +{ + if (event->attr.precise_ip == 3) + *left = max(*left, 128LL); } PMU_FORMAT_ATTR(event, "config:0-7" ); @@ -3661,7 +4440,7 @@ int intel_cpuc_prepare(struct cpu_hw_events *cpuc, int cpu) { cpuc->pebs_record_size = x86_pmu.pebs_record_size; - if (x86_pmu.extra_regs || x86_pmu.lbr_sel_map) { + if (is_hybrid() || x86_pmu.extra_regs || x86_pmu.lbr_sel_map) { cpuc->shared_regs = allocate_shared_regs(cpu); if (!cpuc->shared_regs) goto err; @@ -3715,12 +4494,62 @@ static void flip_smm_bit(void *data) } } +static bool init_hybrid_pmu(int cpu) +{ + struct cpu_hw_events *cpuc = &per_cpu(cpu_hw_events, cpu); + u8 cpu_type = get_this_hybrid_cpu_type(); + struct x86_hybrid_pmu *pmu = NULL; + int i; + + if (!cpu_type && x86_pmu.get_hybrid_cpu_type) + cpu_type = x86_pmu.get_hybrid_cpu_type(); + + for (i = 0; i < x86_pmu.num_hybrid_pmus; i++) { + if (x86_pmu.hybrid_pmu[i].cpu_type == cpu_type) { + pmu = &x86_pmu.hybrid_pmu[i]; + break; + } + } + if (WARN_ON_ONCE(!pmu || (pmu->pmu.type == -1))) { + cpuc->pmu = NULL; + return false; + } + + /* Only check and dump the PMU information for the first CPU */ + if (!cpumask_empty(&pmu->supported_cpus)) + goto end; + + if (!check_hw_exists(&pmu->pmu, pmu->num_counters, pmu->num_counters_fixed)) + return false; + + pr_info("%s PMU driver: ", pmu->name); + + if (pmu->intel_cap.pebs_output_pt_available) + pr_cont("PEBS-via-PT "); + + pr_cont("\n"); + + x86_pmu_show_pmu_cap(pmu->num_counters, pmu->num_counters_fixed, + pmu->intel_ctrl); + +end: + cpumask_set_cpu(cpu, &pmu->supported_cpus); + cpuc->pmu = &pmu->pmu; + + x86_pmu_update_cpu_context(&pmu->pmu, cpu); + + return true; +} + static void intel_pmu_cpu_starting(int cpu) { struct cpu_hw_events *cpuc = &per_cpu(cpu_hw_events, cpu); int core_id = topology_core_id(cpu); int i; + if (is_hybrid() && !init_hybrid_pmu(cpu)) + return; + init_debug_store_on_cpu(cpu); /* * Deal with CPUs that don't clear their LBRs on power-up. @@ -3738,8 +4567,24 @@ static void intel_pmu_cpu_starting(int cpu) if (x86_pmu.version > 1) flip_smm_bit(&x86_pmu.attr_freeze_on_smi); - if (x86_pmu.counter_freezing) - enable_counter_freeze(); + /* + * Disable perf metrics if any added CPU doesn't support it. + * + * Turn off the check for a hybrid architecture, because the + * architecture MSR, MSR_IA32_PERF_CAPABILITIES, only indicate + * the architecture features. The perf metrics is a model-specific + * feature for now. The corresponding bit should always be 0 on + * a hybrid platform, e.g., Alder Lake. + */ + if (!is_hybrid() && x86_pmu.intel_cap.perf_metrics) { + union perf_capabilities perf_cap; + + rdmsrl(MSR_IA32_PERF_CAPABILITIES, perf_cap.capabilities); + if (!perf_cap.perf_metrics) { + x86_pmu.intel_cap.perf_metrics = 0; + x86_pmu.intel_ctrl &= ~(1ULL << GLOBAL_CTRL_EN_PERF_METRICS); + } + } if (!cpuc->shared_regs) return; @@ -3800,9 +4645,6 @@ static void free_excl_cntrs(struct cpu_hw_events *cpuc) static void intel_pmu_cpu_dying(int cpu) { fini_debug_store_on_cpu(cpu); - - if (x86_pmu.counter_freezing) - disable_counter_freeze(); } void intel_cpuc_finish(struct cpu_hw_events *cpuc) @@ -3821,7 +4663,12 @@ void intel_cpuc_finish(struct cpu_hw_events *cpuc) static void intel_pmu_cpu_dead(int cpu) { - intel_cpuc_finish(&per_cpu(cpu_hw_events, cpu)); + struct cpu_hw_events *cpuc = &per_cpu(cpu_hw_events, cpu); + + intel_cpuc_finish(cpuc); + + if (is_hybrid() && cpuc->pmu) + cpumask_clear_cpu(cpu, &hybrid_pmu(cpuc->pmu)->supported_cpus); } static void intel_pmu_sched_task(struct perf_event_context *ctx, @@ -3842,14 +4689,30 @@ static int intel_pmu_check_period(struct perf_event *event, u64 value) return intel_pmu_has_bts_period(event, value) ? -EINVAL : 0; } +static void intel_aux_output_init(void) +{ + /* Refer also intel_pmu_aux_output_match() */ + if (x86_pmu.intel_cap.pebs_output_pt_available) + x86_pmu.assign = intel_pmu_assign_event; +} + static int intel_pmu_aux_output_match(struct perf_event *event) { + /* intel_pmu_assign_event() is needed, refer intel_aux_output_init() */ if (!x86_pmu.intel_cap.pebs_output_pt_available) return 0; return is_intel_pt_event(event); } +static int intel_pmu_filter_match(struct perf_event *event) +{ + struct x86_hybrid_pmu *pmu = hybrid_pmu(event->pmu); + unsigned int cpu = smp_processor_id(); + + return cpumask_test_cpu(cpu, &pmu->supported_cpus); +} + PMU_FORMAT_ATTR(offcore_rsp, "config1:0-63"); PMU_FORMAT_ATTR(ldlat, "config1:0-15"); @@ -3932,6 +4795,11 @@ static __initconst const struct x86_pmu core_pmu = { .cpu_dead = intel_pmu_cpu_dead, .check_period = intel_pmu_check_period, + + .lbr_reset = intel_pmu_lbr_reset_64, + .lbr_read = intel_pmu_lbr_read_64, + .lbr_save = intel_pmu_lbr_save, + .lbr_restore = intel_pmu_lbr_restore, }; static __initconst const struct x86_pmu intel_pmu = { @@ -3944,6 +4812,8 @@ static __initconst const struct x86_pmu intel_pmu = { .add = intel_pmu_add_event, .del = intel_pmu_del_event, .read = intel_pmu_read_event, + .set_period = intel_pmu_set_period, + .update = intel_pmu_update, .hw_config = intel_pmu_hw_config, .schedule_events = x86_schedule_events, .eventsel = MSR_ARCH_PERFMON_EVENTSEL0, @@ -3977,6 +4847,24 @@ static __initconst const struct x86_pmu intel_pmu = { .check_period = intel_pmu_check_period, .aux_output_match = intel_pmu_aux_output_match, + + .lbr_reset = intel_pmu_lbr_reset_64, + .lbr_read = intel_pmu_lbr_read_64, + .lbr_save = intel_pmu_lbr_save, + .lbr_restore = intel_pmu_lbr_restore, + + /* + * SMM has access to all 4 rings and while traditionally SMM code only + * ran in CPL0, 2021-era firmware is starting to make use of CPL3 in SMM. + * + * Since the EVENTSEL.{USR,OS} CPL filtering makes no distinction + * between SMM or not, this results in what should be pure userspace + * counters including SMM data. + * + * This is a clear privilege issue, therefore globally disable + * counting SMM by default. + */ + .attr_freeze_on_smi = 1, }; static __init void intel_clovertown_quirk(void) @@ -4017,9 +4905,13 @@ static const struct x86_cpu_desc isolation_ucodes[] = { INTEL_CPU_DESC(INTEL_FAM6_BROADWELL_D, 3, 0x07000009), INTEL_CPU_DESC(INTEL_FAM6_BROADWELL_D, 4, 0x0f000009), INTEL_CPU_DESC(INTEL_FAM6_BROADWELL_D, 5, 0x0e000002), - INTEL_CPU_DESC(INTEL_FAM6_BROADWELL_X, 2, 0x0b000014), + INTEL_CPU_DESC(INTEL_FAM6_BROADWELL_X, 1, 0x0b000014), INTEL_CPU_DESC(INTEL_FAM6_SKYLAKE_X, 3, 0x00000021), INTEL_CPU_DESC(INTEL_FAM6_SKYLAKE_X, 4, 0x00000000), + INTEL_CPU_DESC(INTEL_FAM6_SKYLAKE_X, 5, 0x00000000), + INTEL_CPU_DESC(INTEL_FAM6_SKYLAKE_X, 6, 0x00000000), + INTEL_CPU_DESC(INTEL_FAM6_SKYLAKE_X, 7, 0x00000000), + INTEL_CPU_DESC(INTEL_FAM6_SKYLAKE_X, 11, 0x00000000), INTEL_CPU_DESC(INTEL_FAM6_SKYLAKE_L, 3, 0x0000007c), INTEL_CPU_DESC(INTEL_FAM6_SKYLAKE, 3, 0x0000007c), INTEL_CPU_DESC(INTEL_FAM6_KABYLAKE, 9, 0x0000004e), @@ -4092,7 +4984,7 @@ static bool check_msr(unsigned long msr, u64 mask) /* * Disable the check for real HW, so we don't - * mess with potentionaly enabled registers: + * mess with potentially enabled registers: */ if (!boot_cpu_has(X86_FEATURE_HYPERVISOR)) return true; @@ -4157,7 +5049,7 @@ static __init void intel_arch_events_quirk(void) { int bit; - /* disable event that reported as not presend by cpuid */ + /* disable event that reported as not present by cpuid */ for_each_set_bit(bit, x86_pmu.events_mask, ARRAY_SIZE(intel_arch_events_map)) { intel_perfmon_event_map[intel_arch_events_map[bit].id] = 0; pr_warn("CPUID marked event: \'%s\' unavailable\n", @@ -4184,39 +5076,6 @@ static __init void intel_nehalem_quirk(void) } } -static const struct x86_cpu_desc counter_freezing_ucodes[] = { - INTEL_CPU_DESC(INTEL_FAM6_ATOM_GOLDMONT, 2, 0x0000000e), - INTEL_CPU_DESC(INTEL_FAM6_ATOM_GOLDMONT, 9, 0x0000002e), - INTEL_CPU_DESC(INTEL_FAM6_ATOM_GOLDMONT, 10, 0x00000008), - INTEL_CPU_DESC(INTEL_FAM6_ATOM_GOLDMONT_D, 1, 0x00000028), - INTEL_CPU_DESC(INTEL_FAM6_ATOM_GOLDMONT_PLUS, 1, 0x00000028), - INTEL_CPU_DESC(INTEL_FAM6_ATOM_GOLDMONT_PLUS, 8, 0x00000006), - {} -}; - -static bool intel_counter_freezing_broken(void) -{ - return !x86_cpu_has_min_microcode_rev(counter_freezing_ucodes); -} - -static __init void intel_counter_freezing_quirk(void) -{ - /* Check if it's already disabled */ - if (disable_counter_freezing) - return; - - /* - * If the system starts with the wrong ucode, leave the - * counter-freezing feature permanently disabled. - */ - if (intel_counter_freezing_broken()) { - pr_info("PMU counter freezing disabled due to CPU errata," - "please upgrade microcode\n"); - x86_pmu.counter_freezing = false; - x86_pmu.handle_irq = intel_pmu_handle_irq; - } -} - /* * enable software workaround for errata: * SNB: BJ122 @@ -4299,6 +5158,15 @@ static struct attribute *icl_events_attrs[] = { NULL, }; +static struct attribute *icl_td_events_attrs[] = { + EVENT_PTR(slots), + EVENT_PTR(td_retiring), + EVENT_PTR(td_bad_spec), + EVENT_PTR(td_fe_bound), + EVENT_PTR(td_be_bound), + NULL, +}; + static struct attribute *icl_tsx_events_attrs[] = { EVENT_PTR(tx_start), EVENT_PTR(tx_abort), @@ -4317,6 +5185,42 @@ static struct attribute *icl_tsx_events_attrs[] = { NULL, }; + +EVENT_ATTR_STR(mem-stores, mem_st_spr, "event=0xcd,umask=0x2"); +EVENT_ATTR_STR(mem-loads-aux, mem_ld_aux, "event=0x03,umask=0x82"); + +static struct attribute *spr_events_attrs[] = { + EVENT_PTR(mem_ld_hsw), + EVENT_PTR(mem_st_spr), + EVENT_PTR(mem_ld_aux), + NULL, +}; + +static struct attribute *spr_td_events_attrs[] = { + EVENT_PTR(slots), + EVENT_PTR(td_retiring), + EVENT_PTR(td_bad_spec), + EVENT_PTR(td_fe_bound), + EVENT_PTR(td_be_bound), + EVENT_PTR(td_heavy_ops), + EVENT_PTR(td_br_mispredict), + EVENT_PTR(td_fetch_lat), + EVENT_PTR(td_mem_bound), + NULL, +}; + +static struct attribute *spr_tsx_events_attrs[] = { + EVENT_PTR(tx_start), + EVENT_PTR(tx_abort), + EVENT_PTR(tx_commit), + EVENT_PTR(tx_capacity_read), + EVENT_PTR(tx_capacity_write), + EVENT_PTR(tx_conflict), + EVENT_PTR(cycles_t), + EVENT_PTR(cycles_ct), + NULL, +}; + static ssize_t freeze_on_smi_show(struct device *cdev, struct device_attribute *attr, char *buf) @@ -4347,9 +5251,9 @@ static ssize_t freeze_on_smi_store(struct device *cdev, x86_pmu.attr_freeze_on_smi = val; - get_online_cpus(); + cpus_read_lock(); on_each_cpu(flip_smm_bit, &val, 1); - put_online_cpus(); + cpus_read_unlock(); done: mutex_unlock(&freeze_on_smi_mutex); @@ -4365,7 +5269,7 @@ static void update_tfa_sched(void *ignored) * and if so force schedule out for all event types all contexts */ if (test_bit(3, cpuc->active_mask)) - perf_pmu_resched(x86_get_pmu()); + perf_pmu_resched(x86_get_pmu(smp_processor_id())); } static ssize_t show_sysctl_tfa(struct device *cdev, @@ -4392,9 +5296,9 @@ static ssize_t set_sysctl_tfa(struct device *cdev, allow_tsx_force_abort = val; - get_online_cpus(); + cpus_read_lock(); on_each_cpu(update_tfa_sched, NULL, 1); - put_online_cpus(); + cpus_read_unlock(); return count; } @@ -4527,8 +5431,303 @@ static const struct attribute_group *attr_update[] = { NULL, }; +EVENT_ATTR_STR_HYBRID(slots, slots_adl, "event=0x00,umask=0x4", hybrid_big); +EVENT_ATTR_STR_HYBRID(topdown-retiring, td_retiring_adl, "event=0xc2,umask=0x0;event=0x00,umask=0x80", hybrid_big_small); +EVENT_ATTR_STR_HYBRID(topdown-bad-spec, td_bad_spec_adl, "event=0x73,umask=0x0;event=0x00,umask=0x81", hybrid_big_small); +EVENT_ATTR_STR_HYBRID(topdown-fe-bound, td_fe_bound_adl, "event=0x71,umask=0x0;event=0x00,umask=0x82", hybrid_big_small); +EVENT_ATTR_STR_HYBRID(topdown-be-bound, td_be_bound_adl, "event=0x74,umask=0x0;event=0x00,umask=0x83", hybrid_big_small); +EVENT_ATTR_STR_HYBRID(topdown-heavy-ops, td_heavy_ops_adl, "event=0x00,umask=0x84", hybrid_big); +EVENT_ATTR_STR_HYBRID(topdown-br-mispredict, td_br_mis_adl, "event=0x00,umask=0x85", hybrid_big); +EVENT_ATTR_STR_HYBRID(topdown-fetch-lat, td_fetch_lat_adl, "event=0x00,umask=0x86", hybrid_big); +EVENT_ATTR_STR_HYBRID(topdown-mem-bound, td_mem_bound_adl, "event=0x00,umask=0x87", hybrid_big); + +static struct attribute *adl_hybrid_events_attrs[] = { + EVENT_PTR(slots_adl), + EVENT_PTR(td_retiring_adl), + EVENT_PTR(td_bad_spec_adl), + EVENT_PTR(td_fe_bound_adl), + EVENT_PTR(td_be_bound_adl), + EVENT_PTR(td_heavy_ops_adl), + EVENT_PTR(td_br_mis_adl), + EVENT_PTR(td_fetch_lat_adl), + EVENT_PTR(td_mem_bound_adl), + NULL, +}; + +/* Must be in IDX order */ +EVENT_ATTR_STR_HYBRID(mem-loads, mem_ld_adl, "event=0xd0,umask=0x5,ldlat=3;event=0xcd,umask=0x1,ldlat=3", hybrid_big_small); +EVENT_ATTR_STR_HYBRID(mem-stores, mem_st_adl, "event=0xd0,umask=0x6;event=0xcd,umask=0x2", hybrid_big_small); +EVENT_ATTR_STR_HYBRID(mem-loads-aux, mem_ld_aux_adl, "event=0x03,umask=0x82", hybrid_big); + +static struct attribute *adl_hybrid_mem_attrs[] = { + EVENT_PTR(mem_ld_adl), + EVENT_PTR(mem_st_adl), + EVENT_PTR(mem_ld_aux_adl), + NULL, +}; + +EVENT_ATTR_STR_HYBRID(tx-start, tx_start_adl, "event=0xc9,umask=0x1", hybrid_big); +EVENT_ATTR_STR_HYBRID(tx-commit, tx_commit_adl, "event=0xc9,umask=0x2", hybrid_big); +EVENT_ATTR_STR_HYBRID(tx-abort, tx_abort_adl, "event=0xc9,umask=0x4", hybrid_big); +EVENT_ATTR_STR_HYBRID(tx-conflict, tx_conflict_adl, "event=0x54,umask=0x1", hybrid_big); +EVENT_ATTR_STR_HYBRID(cycles-t, cycles_t_adl, "event=0x3c,in_tx=1", hybrid_big); +EVENT_ATTR_STR_HYBRID(cycles-ct, cycles_ct_adl, "event=0x3c,in_tx=1,in_tx_cp=1", hybrid_big); +EVENT_ATTR_STR_HYBRID(tx-capacity-read, tx_capacity_read_adl, "event=0x54,umask=0x80", hybrid_big); +EVENT_ATTR_STR_HYBRID(tx-capacity-write, tx_capacity_write_adl, "event=0x54,umask=0x2", hybrid_big); + +static struct attribute *adl_hybrid_tsx_attrs[] = { + EVENT_PTR(tx_start_adl), + EVENT_PTR(tx_abort_adl), + EVENT_PTR(tx_commit_adl), + EVENT_PTR(tx_capacity_read_adl), + EVENT_PTR(tx_capacity_write_adl), + EVENT_PTR(tx_conflict_adl), + EVENT_PTR(cycles_t_adl), + EVENT_PTR(cycles_ct_adl), + NULL, +}; + +FORMAT_ATTR_HYBRID(in_tx, hybrid_big); +FORMAT_ATTR_HYBRID(in_tx_cp, hybrid_big); +FORMAT_ATTR_HYBRID(offcore_rsp, hybrid_big_small); +FORMAT_ATTR_HYBRID(ldlat, hybrid_big_small); +FORMAT_ATTR_HYBRID(frontend, hybrid_big); + +static struct attribute *adl_hybrid_extra_attr_rtm[] = { + FORMAT_HYBRID_PTR(in_tx), + FORMAT_HYBRID_PTR(in_tx_cp), + FORMAT_HYBRID_PTR(offcore_rsp), + FORMAT_HYBRID_PTR(ldlat), + FORMAT_HYBRID_PTR(frontend), + NULL, +}; + +static struct attribute *adl_hybrid_extra_attr[] = { + FORMAT_HYBRID_PTR(offcore_rsp), + FORMAT_HYBRID_PTR(ldlat), + FORMAT_HYBRID_PTR(frontend), + NULL, +}; + +static bool is_attr_for_this_pmu(struct kobject *kobj, struct attribute *attr) +{ + struct device *dev = kobj_to_dev(kobj); + struct x86_hybrid_pmu *pmu = + container_of(dev_get_drvdata(dev), struct x86_hybrid_pmu, pmu); + struct perf_pmu_events_hybrid_attr *pmu_attr = + container_of(attr, struct perf_pmu_events_hybrid_attr, attr.attr); + + return pmu->cpu_type & pmu_attr->pmu_type; +} + +static umode_t hybrid_events_is_visible(struct kobject *kobj, + struct attribute *attr, int i) +{ + return is_attr_for_this_pmu(kobj, attr) ? attr->mode : 0; +} + +static inline int hybrid_find_supported_cpu(struct x86_hybrid_pmu *pmu) +{ + int cpu = cpumask_first(&pmu->supported_cpus); + + return (cpu >= nr_cpu_ids) ? -1 : cpu; +} + +static umode_t hybrid_tsx_is_visible(struct kobject *kobj, + struct attribute *attr, int i) +{ + struct device *dev = kobj_to_dev(kobj); + struct x86_hybrid_pmu *pmu = + container_of(dev_get_drvdata(dev), struct x86_hybrid_pmu, pmu); + int cpu = hybrid_find_supported_cpu(pmu); + + return (cpu >= 0) && is_attr_for_this_pmu(kobj, attr) && cpu_has(&cpu_data(cpu), X86_FEATURE_RTM) ? attr->mode : 0; +} + +static umode_t hybrid_format_is_visible(struct kobject *kobj, + struct attribute *attr, int i) +{ + struct device *dev = kobj_to_dev(kobj); + struct x86_hybrid_pmu *pmu = + container_of(dev_get_drvdata(dev), struct x86_hybrid_pmu, pmu); + struct perf_pmu_format_hybrid_attr *pmu_attr = + container_of(attr, struct perf_pmu_format_hybrid_attr, attr.attr); + int cpu = hybrid_find_supported_cpu(pmu); + + return (cpu >= 0) && (pmu->cpu_type & pmu_attr->pmu_type) ? attr->mode : 0; +} + +static struct attribute_group hybrid_group_events_td = { + .name = "events", + .is_visible = hybrid_events_is_visible, +}; + +static struct attribute_group hybrid_group_events_mem = { + .name = "events", + .is_visible = hybrid_events_is_visible, +}; + +static struct attribute_group hybrid_group_events_tsx = { + .name = "events", + .is_visible = hybrid_tsx_is_visible, +}; + +static struct attribute_group hybrid_group_format_extra = { + .name = "format", + .is_visible = hybrid_format_is_visible, +}; + +static ssize_t intel_hybrid_get_attr_cpus(struct device *dev, + struct device_attribute *attr, + char *buf) +{ + struct x86_hybrid_pmu *pmu = + container_of(dev_get_drvdata(dev), struct x86_hybrid_pmu, pmu); + + return cpumap_print_to_pagebuf(true, buf, &pmu->supported_cpus); +} + +static DEVICE_ATTR(cpus, S_IRUGO, intel_hybrid_get_attr_cpus, NULL); +static struct attribute *intel_hybrid_cpus_attrs[] = { + &dev_attr_cpus.attr, + NULL, +}; + +static struct attribute_group hybrid_group_cpus = { + .attrs = intel_hybrid_cpus_attrs, +}; + +static const struct attribute_group *hybrid_attr_update[] = { + &hybrid_group_events_td, + &hybrid_group_events_mem, + &hybrid_group_events_tsx, + &group_caps_gen, + &group_caps_lbr, + &hybrid_group_format_extra, + &group_default, + &hybrid_group_cpus, + NULL, +}; + static struct attribute *empty_attrs; +static void intel_pmu_check_num_counters(int *num_counters, + int *num_counters_fixed, + u64 *intel_ctrl, u64 fixed_mask) +{ + if (*num_counters > INTEL_PMC_MAX_GENERIC) { + WARN(1, KERN_ERR "hw perf events %d > max(%d), clipping!", + *num_counters, INTEL_PMC_MAX_GENERIC); + *num_counters = INTEL_PMC_MAX_GENERIC; + } + *intel_ctrl = (1ULL << *num_counters) - 1; + + if (*num_counters_fixed > INTEL_PMC_MAX_FIXED) { + WARN(1, KERN_ERR "hw perf events fixed %d > max(%d), clipping!", + *num_counters_fixed, INTEL_PMC_MAX_FIXED); + *num_counters_fixed = INTEL_PMC_MAX_FIXED; + } + + *intel_ctrl |= fixed_mask << INTEL_PMC_IDX_FIXED; +} + +static void intel_pmu_check_event_constraints(struct event_constraint *event_constraints, + int num_counters, + int num_counters_fixed, + u64 intel_ctrl) +{ + struct event_constraint *c; + + if (!event_constraints) + return; + + /* + * event on fixed counter2 (REF_CYCLES) only works on this + * counter, so do not extend mask to generic counters + */ + for_each_event_constraint(c, event_constraints) { + /* + * Don't extend the topdown slots and metrics + * events to the generic counters. + */ + if (c->idxmsk64 & INTEL_PMC_MSK_TOPDOWN) { + /* + * Disable topdown slots and metrics events, + * if slots event is not in CPUID. + */ + if (!(INTEL_PMC_MSK_FIXED_SLOTS & intel_ctrl)) + c->idxmsk64 = 0; + c->weight = hweight64(c->idxmsk64); + continue; + } + + if (c->cmask == FIXED_EVENT_FLAGS) { + /* Disabled fixed counters which are not in CPUID */ + c->idxmsk64 &= intel_ctrl; + + /* + * Don't extend the pseudo-encoding to the + * generic counters + */ + if (!use_fixed_pseudo_encoding(c->code)) + c->idxmsk64 |= (1ULL << num_counters) - 1; + } + c->idxmsk64 &= + ~(~0ULL << (INTEL_PMC_IDX_FIXED + num_counters_fixed)); + c->weight = hweight64(c->idxmsk64); + } +} + +static void intel_pmu_check_extra_regs(struct extra_reg *extra_regs) +{ + struct extra_reg *er; + + /* + * Access extra MSR may cause #GP under certain circumstances. + * E.g. KVM doesn't support offcore event + * Check all extra_regs here. + */ + if (!extra_regs) + return; + + for (er = extra_regs; er->msr; er++) { + er->extra_msr_access = check_msr(er->msr, 0x11UL); + /* Disable LBR select mapping */ + if ((er->idx == EXTRA_REG_LBR) && !er->extra_msr_access) + x86_pmu.lbr_sel_map = NULL; + } +} + +static void intel_pmu_check_hybrid_pmus(u64 fixed_mask) +{ + struct x86_hybrid_pmu *pmu; + int i; + + for (i = 0; i < x86_pmu.num_hybrid_pmus; i++) { + pmu = &x86_pmu.hybrid_pmu[i]; + + intel_pmu_check_num_counters(&pmu->num_counters, + &pmu->num_counters_fixed, + &pmu->intel_ctrl, + fixed_mask); + + if (pmu->intel_cap.perf_metrics) { + pmu->intel_ctrl |= 1ULL << GLOBAL_CTRL_EN_PERF_METRICS; + pmu->intel_ctrl |= INTEL_PMC_MSK_FIXED_SLOTS; + } + + if (pmu->intel_cap.pebs_output_pt_available) + pmu->pmu.capabilities |= PERF_PMU_CAP_AUX_OUTPUT; + + intel_pmu_check_event_constraints(pmu->event_constraints, + pmu->num_counters, + pmu->num_counters_fixed, + pmu->intel_ctrl); + + intel_pmu_check_extra_regs(pmu->extra_regs); + } +} + __init int intel_pmu_init(void) { struct attribute **extra_skl_attr = &empty_attrs; @@ -4539,12 +5738,11 @@ __init int intel_pmu_init(void) union cpuid10_edx edx; union cpuid10_eax eax; union cpuid10_ebx ebx; - struct event_constraint *c; - unsigned int unused; - struct extra_reg *er; + unsigned int fixed_mask; bool pmem = false; int version, i; char *name; + struct x86_hybrid_pmu *pmu; if (!cpu_has(&boot_cpu_data, X86_FEATURE_ARCH_PERFMON)) { switch (boot_cpu_data.x86) { @@ -4562,7 +5760,7 @@ __init int intel_pmu_init(void) * Check whether the Architectural PerfMon supports * Branch Misses Retired hw_event or not. */ - cpuid(10, &eax.full, &ebx.full, &unused, &edx.full); + cpuid(10, &eax.full, &ebx.full, &fixed_mask, &edx.full); if (eax.split.mask_length < ARCH_PERFMON_EVENTS_COUNT) return -ENODEV; @@ -4581,20 +5779,21 @@ __init int intel_pmu_init(void) x86_pmu.events_mask_len = eax.split.mask_length; x86_pmu.max_pebs_events = min_t(unsigned, MAX_PEBS_EVENTS, x86_pmu.num_counters); + x86_pmu.pebs_capable = PEBS_COUNTER_MASK; /* * Quirk: v2 perfmon does not report fixed-purpose events, so * assume at least 3 events, when not running in a hypervisor: */ - if (version > 1) { + if (version > 1 && version < 5) { int assume = 3 * !boot_cpu_has(X86_FEATURE_HYPERVISOR); x86_pmu.num_counters_fixed = max((int)edx.split.num_counters_fixed, assume); - } - if (version >= 4) - x86_pmu.counter_freezing = !disable_counter_freezing; + fixed_mask = (1L << x86_pmu.num_counters_fixed) - 1; + } else if (version >= 5) + x86_pmu.num_counters_fixed = fls(fixed_mask); if (boot_cpu_has(X86_FEATURE_PDCM)) { u64 capabilities; @@ -4603,10 +5802,24 @@ __init int intel_pmu_init(void) x86_pmu.intel_cap.capabilities = capabilities; } + if (x86_pmu.intel_cap.lbr_format == LBR_FORMAT_32) { + x86_pmu.lbr_reset = intel_pmu_lbr_reset_32; + x86_pmu.lbr_read = intel_pmu_lbr_read_32; + } + + if (boot_cpu_has(X86_FEATURE_ARCH_LBR)) + intel_pmu_arch_lbr_init(); + intel_ds_init(); x86_add_quirk(intel_arch_events_quirk); /* Install first, so it runs last */ + if (version >= 5) { + x86_pmu.intel_cap.anythread_deprecated = edx.split.anythread_deprecated; + if (x86_pmu.intel_cap.anythread_deprecated) + pr_cont(" AnyThread deprecated, "); + } + /* * Install the hw-cache-events table: */ @@ -4618,7 +5831,7 @@ __init int intel_pmu_init(void) case INTEL_FAM6_CORE2_MEROM: x86_add_quirk(intel_clovertown_quirk); - /* fall through */ + fallthrough; case INTEL_FAM6_CORE2_MEROM_L: case INTEL_FAM6_CORE2_PENRYN: @@ -4709,7 +5922,6 @@ __init int intel_pmu_init(void) case INTEL_FAM6_ATOM_GOLDMONT: case INTEL_FAM6_ATOM_GOLDMONT_D: - x86_add_quirk(intel_counter_freezing_quirk); memcpy(hw_cache_event_ids, glm_hw_cache_event_ids, sizeof(hw_cache_event_ids)); memcpy(hw_cache_extra_regs, glm_hw_cache_extra_regs, @@ -4736,7 +5948,6 @@ __init int intel_pmu_init(void) break; case INTEL_FAM6_ATOM_GOLDMONT_PLUS: - x86_add_quirk(intel_counter_freezing_quirk); memcpy(hw_cache_event_ids, glp_hw_cache_event_ids, sizeof(hw_cache_event_ids)); memcpy(hw_cache_extra_regs, glp_hw_cache_extra_regs, @@ -4753,6 +5964,7 @@ __init int intel_pmu_init(void) x86_pmu.pebs_aliases = NULL; x86_pmu.pebs_prec_dist = true; x86_pmu.lbr_pt_coexist = true; + x86_pmu.pebs_capable = ~0ULL; x86_pmu.flags |= PMU_FL_HAS_RSP_1; x86_pmu.flags |= PMU_FL_PEBS_ALL; x86_pmu.get_event_constraints = glp_get_event_constraints; @@ -4766,6 +5978,7 @@ __init int intel_pmu_init(void) case INTEL_FAM6_ATOM_TREMONT_D: case INTEL_FAM6_ATOM_TREMONT: + case INTEL_FAM6_ATOM_TREMONT_L: x86_pmu.late_ack = true; memcpy(hw_cache_event_ids, glp_hw_cache_event_ids, sizeof(hw_cache_event_ids)); @@ -4786,11 +5999,42 @@ __init int intel_pmu_init(void) x86_pmu.lbr_pt_coexist = true; x86_pmu.flags |= PMU_FL_HAS_RSP_1; x86_pmu.get_event_constraints = tnt_get_event_constraints; + td_attr = tnt_events_attrs; extra_attr = slm_format_attr; pr_cont("Tremont events, "); name = "Tremont"; break; + case INTEL_FAM6_ALDERLAKE_N: + x86_pmu.mid_ack = true; + memcpy(hw_cache_event_ids, glp_hw_cache_event_ids, + sizeof(hw_cache_event_ids)); + memcpy(hw_cache_extra_regs, tnt_hw_cache_extra_regs, + sizeof(hw_cache_extra_regs)); + hw_cache_event_ids[C(ITLB)][C(OP_READ)][C(RESULT_ACCESS)] = -1; + + x86_pmu.event_constraints = intel_slm_event_constraints; + x86_pmu.pebs_constraints = intel_grt_pebs_event_constraints; + x86_pmu.extra_regs = intel_grt_extra_regs; + + x86_pmu.pebs_aliases = NULL; + x86_pmu.pebs_prec_dist = true; + x86_pmu.pebs_block = true; + x86_pmu.lbr_pt_coexist = true; + x86_pmu.flags |= PMU_FL_HAS_RSP_1; + x86_pmu.flags |= PMU_FL_INSTR_LATENCY; + + intel_pmu_pebs_data_source_grt(); + x86_pmu.pebs_latency_data = adl_latency_data_small; + x86_pmu.get_event_constraints = tnt_get_event_constraints; + x86_pmu.limit_period = spr_limit_period; + td_attr = tnt_events_attrs; + mem_attr = grt_mem_attrs; + extra_attr = nhm_format_attr; + pr_cont("Gracemont events, "); + name = "gracemont"; + break; + case INTEL_FAM6_WESTMERE: case INTEL_FAM6_WESTMERE_EP: case INTEL_FAM6_WESTMERE_EX: @@ -4998,7 +6242,7 @@ __init int intel_pmu_init(void) case INTEL_FAM6_SKYLAKE_X: pmem = true; - /* fall through */ + fallthrough; case INTEL_FAM6_SKYLAKE_L: case INTEL_FAM6_SKYLAKE: case INTEL_FAM6_KABYLAKE_L: @@ -5036,7 +6280,13 @@ __init int intel_pmu_init(void) tsx_attr = hsw_tsx_events_attrs; intel_pmu_pebs_data_source_skl(pmem); - if (boot_cpu_has(X86_FEATURE_TSX_FORCE_ABORT)) { + /* + * Processors with CPUID.RTM_ALWAYS_ABORT have TSX deprecated by default. + * TSX force abort hooks are not required on these systems. Only deploy + * workaround when microcode has not enabled X86_FEATURE_RTM_ALWAYS_ABORT. + */ + if (boot_cpu_has(X86_FEATURE_TSX_FORCE_ABORT) && + !boot_cpu_has(X86_FEATURE_RTM_ALWAYS_ABORT)) { x86_pmu.flags |= PMU_FL_TFA; x86_pmu.get_event_constraints = tfa_get_event_constraints; x86_pmu.enable_all = intel_tfa_pmu_enable_all; @@ -5049,12 +6299,14 @@ __init int intel_pmu_init(void) case INTEL_FAM6_ICELAKE_X: case INTEL_FAM6_ICELAKE_D: + x86_pmu.pebs_ept = 1; pmem = true; - /* fall through */ + fallthrough; case INTEL_FAM6_ICELAKE_L: case INTEL_FAM6_ICELAKE: case INTEL_FAM6_TIGERLAKE_L: case INTEL_FAM6_TIGERLAKE: + case INTEL_FAM6_ROCKETLAKE: x86_pmu.late_ack = true; memcpy(hw_cache_event_ids, skl_hw_cache_event_ids, sizeof(hw_cache_event_ids)); memcpy(hw_cache_extra_regs, skl_hw_cache_extra_regs, sizeof(hw_cache_extra_regs)); @@ -5075,14 +6327,175 @@ __init int intel_pmu_init(void) hsw_format_attr : nhm_format_attr; extra_skl_attr = skl_format_attr; mem_attr = icl_events_attrs; + td_attr = icl_td_events_attrs; tsx_attr = icl_tsx_events_attrs; - x86_pmu.rtm_abort_event = X86_CONFIG(.event=0xca, .umask=0x02); + x86_pmu.rtm_abort_event = X86_CONFIG(.event=0xc9, .umask=0x04); x86_pmu.lbr_pt_coexist = true; intel_pmu_pebs_data_source_skl(pmem); + x86_pmu.num_topdown_events = 4; + static_call_update(intel_pmu_update_topdown_event, + &icl_update_topdown_event); + static_call_update(intel_pmu_set_topdown_event_period, + &icl_set_topdown_event_period); pr_cont("Icelake events, "); name = "icelake"; break; + case INTEL_FAM6_SAPPHIRERAPIDS_X: + pmem = true; + x86_pmu.late_ack = true; + memcpy(hw_cache_event_ids, spr_hw_cache_event_ids, sizeof(hw_cache_event_ids)); + memcpy(hw_cache_extra_regs, spr_hw_cache_extra_regs, sizeof(hw_cache_extra_regs)); + + x86_pmu.event_constraints = intel_spr_event_constraints; + x86_pmu.pebs_constraints = intel_spr_pebs_event_constraints; + x86_pmu.extra_regs = intel_spr_extra_regs; + x86_pmu.limit_period = spr_limit_period; + x86_pmu.pebs_aliases = NULL; + x86_pmu.pebs_prec_dist = true; + x86_pmu.pebs_block = true; + x86_pmu.flags |= PMU_FL_HAS_RSP_1; + x86_pmu.flags |= PMU_FL_NO_HT_SHARING; + x86_pmu.flags |= PMU_FL_INSTR_LATENCY; + x86_pmu.flags |= PMU_FL_MEM_LOADS_AUX; + + x86_pmu.hw_config = hsw_hw_config; + x86_pmu.get_event_constraints = spr_get_event_constraints; + extra_attr = boot_cpu_has(X86_FEATURE_RTM) ? + hsw_format_attr : nhm_format_attr; + extra_skl_attr = skl_format_attr; + mem_attr = spr_events_attrs; + td_attr = spr_td_events_attrs; + tsx_attr = spr_tsx_events_attrs; + x86_pmu.rtm_abort_event = X86_CONFIG(.event=0xc9, .umask=0x04); + x86_pmu.lbr_pt_coexist = true; + intel_pmu_pebs_data_source_skl(pmem); + x86_pmu.num_topdown_events = 8; + static_call_update(intel_pmu_update_topdown_event, + &icl_update_topdown_event); + static_call_update(intel_pmu_set_topdown_event_period, + &icl_set_topdown_event_period); + pr_cont("Sapphire Rapids events, "); + name = "sapphire_rapids"; + break; + + case INTEL_FAM6_ALDERLAKE: + case INTEL_FAM6_ALDERLAKE_L: + case INTEL_FAM6_RAPTORLAKE: + case INTEL_FAM6_RAPTORLAKE_P: + case INTEL_FAM6_RAPTORLAKE_S: + /* + * Alder Lake has 2 types of CPU, core and atom. + * + * Initialize the common PerfMon capabilities here. + */ + x86_pmu.hybrid_pmu = kcalloc(X86_HYBRID_NUM_PMUS, + sizeof(struct x86_hybrid_pmu), + GFP_KERNEL); + if (!x86_pmu.hybrid_pmu) + return -ENOMEM; + static_branch_enable(&perf_is_hybrid); + x86_pmu.num_hybrid_pmus = X86_HYBRID_NUM_PMUS; + + x86_pmu.pebs_aliases = NULL; + x86_pmu.pebs_prec_dist = true; + x86_pmu.pebs_block = true; + x86_pmu.flags |= PMU_FL_HAS_RSP_1; + x86_pmu.flags |= PMU_FL_NO_HT_SHARING; + x86_pmu.flags |= PMU_FL_INSTR_LATENCY; + x86_pmu.flags |= PMU_FL_MEM_LOADS_AUX; + x86_pmu.lbr_pt_coexist = true; + intel_pmu_pebs_data_source_adl(); + x86_pmu.pebs_latency_data = adl_latency_data_small; + x86_pmu.num_topdown_events = 8; + static_call_update(intel_pmu_update_topdown_event, + &adl_update_topdown_event); + static_call_update(intel_pmu_set_topdown_event_period, + &adl_set_topdown_event_period); + + x86_pmu.filter_match = intel_pmu_filter_match; + x86_pmu.get_event_constraints = adl_get_event_constraints; + x86_pmu.hw_config = adl_hw_config; + x86_pmu.limit_period = spr_limit_period; + x86_pmu.get_hybrid_cpu_type = adl_get_hybrid_cpu_type; + /* + * The rtm_abort_event is used to check whether to enable GPRs + * for the RTM abort event. Atom doesn't have the RTM abort + * event. There is no harmful to set it in the common + * x86_pmu.rtm_abort_event. + */ + x86_pmu.rtm_abort_event = X86_CONFIG(.event=0xc9, .umask=0x04); + + td_attr = adl_hybrid_events_attrs; + mem_attr = adl_hybrid_mem_attrs; + tsx_attr = adl_hybrid_tsx_attrs; + extra_attr = boot_cpu_has(X86_FEATURE_RTM) ? + adl_hybrid_extra_attr_rtm : adl_hybrid_extra_attr; + + /* Initialize big core specific PerfMon capabilities.*/ + pmu = &x86_pmu.hybrid_pmu[X86_HYBRID_PMU_CORE_IDX]; + pmu->name = "cpu_core"; + pmu->cpu_type = hybrid_big; + pmu->late_ack = true; + if (cpu_feature_enabled(X86_FEATURE_HYBRID_CPU)) { + pmu->num_counters = x86_pmu.num_counters + 2; + pmu->num_counters_fixed = x86_pmu.num_counters_fixed + 1; + } else { + pmu->num_counters = x86_pmu.num_counters; + pmu->num_counters_fixed = x86_pmu.num_counters_fixed; + } + + /* + * Quirk: For some Alder Lake machine, when all E-cores are disabled in + * a BIOS, the leaf 0xA will enumerate all counters of P-cores. However, + * the X86_FEATURE_HYBRID_CPU is still set. The above codes will + * mistakenly add extra counters for P-cores. Correct the number of + * counters here. + */ + if ((pmu->num_counters > 8) || (pmu->num_counters_fixed > 4)) { + pmu->num_counters = x86_pmu.num_counters; + pmu->num_counters_fixed = x86_pmu.num_counters_fixed; + } + + pmu->max_pebs_events = min_t(unsigned, MAX_PEBS_EVENTS, pmu->num_counters); + pmu->unconstrained = (struct event_constraint) + __EVENT_CONSTRAINT(0, (1ULL << pmu->num_counters) - 1, + 0, pmu->num_counters, 0, 0); + pmu->intel_cap.capabilities = x86_pmu.intel_cap.capabilities; + pmu->intel_cap.perf_metrics = 1; + pmu->intel_cap.pebs_output_pt_available = 0; + + memcpy(pmu->hw_cache_event_ids, spr_hw_cache_event_ids, sizeof(pmu->hw_cache_event_ids)); + memcpy(pmu->hw_cache_extra_regs, spr_hw_cache_extra_regs, sizeof(pmu->hw_cache_extra_regs)); + pmu->event_constraints = intel_spr_event_constraints; + pmu->pebs_constraints = intel_spr_pebs_event_constraints; + pmu->extra_regs = intel_spr_extra_regs; + + /* Initialize Atom core specific PerfMon capabilities.*/ + pmu = &x86_pmu.hybrid_pmu[X86_HYBRID_PMU_ATOM_IDX]; + pmu->name = "cpu_atom"; + pmu->cpu_type = hybrid_small; + pmu->mid_ack = true; + pmu->num_counters = x86_pmu.num_counters; + pmu->num_counters_fixed = x86_pmu.num_counters_fixed; + pmu->max_pebs_events = x86_pmu.max_pebs_events; + pmu->unconstrained = (struct event_constraint) + __EVENT_CONSTRAINT(0, (1ULL << pmu->num_counters) - 1, + 0, pmu->num_counters, 0, 0); + pmu->intel_cap.capabilities = x86_pmu.intel_cap.capabilities; + pmu->intel_cap.perf_metrics = 0; + pmu->intel_cap.pebs_output_pt_available = 1; + + memcpy(pmu->hw_cache_event_ids, glp_hw_cache_event_ids, sizeof(pmu->hw_cache_event_ids)); + memcpy(pmu->hw_cache_extra_regs, tnt_hw_cache_extra_regs, sizeof(pmu->hw_cache_extra_regs)); + pmu->hw_cache_event_ids[C(ITLB)][C(OP_READ)][C(RESULT_ACCESS)] = -1; + pmu->event_constraints = intel_slm_event_constraints; + pmu->pebs_constraints = intel_grt_pebs_event_constraints; + pmu->extra_regs = intel_grt_extra_regs; + pr_cont("Alderlake Hybrid events, "); + name = "alderlake_hybrid"; + break; + default: switch (x86_pmu.version) { case 1: @@ -5090,7 +6503,9 @@ __init int intel_pmu_init(void) pr_cont("generic architected perfmon v1, "); name = "generic_arch_v1"; break; - default: + case 2: + case 3: + case 4: /* * default constraints for v2 and up */ @@ -5098,59 +6513,62 @@ __init int intel_pmu_init(void) pr_cont("generic architected perfmon, "); name = "generic_arch_v2+"; break; + default: + /* + * The default constraints for v5 and up can support up to + * 16 fixed counters. For the fixed counters 4 and later, + * the pseudo-encoding is applied. + * The constraints may be cut according to the CPUID enumeration + * by inserting the EVENT_CONSTRAINT_END. + */ + if (x86_pmu.num_counters_fixed > INTEL_PMC_MAX_FIXED) + x86_pmu.num_counters_fixed = INTEL_PMC_MAX_FIXED; + intel_v5_gen_event_constraints[x86_pmu.num_counters_fixed].weight = -1; + x86_pmu.event_constraints = intel_v5_gen_event_constraints; + pr_cont("generic architected perfmon, "); + name = "generic_arch_v5+"; + break; } } snprintf(pmu_name_str, sizeof(pmu_name_str), "%s", name); + if (!is_hybrid()) { + group_events_td.attrs = td_attr; + group_events_mem.attrs = mem_attr; + group_events_tsx.attrs = tsx_attr; + group_format_extra.attrs = extra_attr; + group_format_extra_skl.attrs = extra_skl_attr; - group_events_td.attrs = td_attr; - group_events_mem.attrs = mem_attr; - group_events_tsx.attrs = tsx_attr; - group_format_extra.attrs = extra_attr; - group_format_extra_skl.attrs = extra_skl_attr; - - x86_pmu.attr_update = attr_update; - - if (x86_pmu.num_counters > INTEL_PMC_MAX_GENERIC) { - WARN(1, KERN_ERR "hw perf events %d > max(%d), clipping!", - x86_pmu.num_counters, INTEL_PMC_MAX_GENERIC); - x86_pmu.num_counters = INTEL_PMC_MAX_GENERIC; - } - x86_pmu.intel_ctrl = (1ULL << x86_pmu.num_counters) - 1; + x86_pmu.attr_update = attr_update; + } else { + hybrid_group_events_td.attrs = td_attr; + hybrid_group_events_mem.attrs = mem_attr; + hybrid_group_events_tsx.attrs = tsx_attr; + hybrid_group_format_extra.attrs = extra_attr; - if (x86_pmu.num_counters_fixed > INTEL_PMC_MAX_FIXED) { - WARN(1, KERN_ERR "hw perf events fixed %d > max(%d), clipping!", - x86_pmu.num_counters_fixed, INTEL_PMC_MAX_FIXED); - x86_pmu.num_counters_fixed = INTEL_PMC_MAX_FIXED; + x86_pmu.attr_update = hybrid_attr_update; } - x86_pmu.intel_ctrl |= - ((1LL << x86_pmu.num_counters_fixed)-1) << INTEL_PMC_IDX_FIXED; + intel_pmu_check_num_counters(&x86_pmu.num_counters, + &x86_pmu.num_counters_fixed, + &x86_pmu.intel_ctrl, + (u64)fixed_mask); - if (x86_pmu.event_constraints) { - /* - * event on fixed counter2 (REF_CYCLES) only works on this - * counter, so do not extend mask to generic counters - */ - for_each_event_constraint(c, x86_pmu.event_constraints) { - if (c->cmask == FIXED_EVENT_FLAGS - && c->idxmsk64 != INTEL_PMC_MSK_FIXED_REF_CYCLES) { - c->idxmsk64 |= (1ULL << x86_pmu.num_counters) - 1; - } - c->idxmsk64 &= - ~(~0ULL << (INTEL_PMC_IDX_FIXED + x86_pmu.num_counters_fixed)); - c->weight = hweight64(c->idxmsk64); - } - } + /* AnyThread may be deprecated on arch perfmon v5 or later */ + if (x86_pmu.intel_cap.anythread_deprecated) + x86_pmu.format_attrs = intel_arch_formats_attr; + intel_pmu_check_event_constraints(x86_pmu.event_constraints, + x86_pmu.num_counters, + x86_pmu.num_counters_fixed, + x86_pmu.intel_ctrl); /* * Access LBR MSR may cause #GP under certain circumstances. - * E.g. KVM doesn't support LBR MSR - * Check all LBT MSR here. + * Check all LBR MSR here. * Disable LBR access if any LBR MSRs can not be accessed. */ - if (x86_pmu.lbr_nr && !check_msr(x86_pmu.lbr_tos, 0x3UL)) + if (x86_pmu.lbr_tos && !check_msr(x86_pmu.lbr_tos, 0x3UL)) x86_pmu.lbr_nr = 0; for (i = 0; i < x86_pmu.lbr_nr; i++) { if (!(check_msr(x86_pmu.lbr_from + i, 0xffffUL) && @@ -5158,23 +6576,25 @@ __init int intel_pmu_init(void) x86_pmu.lbr_nr = 0; } - if (x86_pmu.lbr_nr) + if (x86_pmu.lbr_nr) { + intel_pmu_lbr_init(); + pr_cont("%d-deep LBR, ", x86_pmu.lbr_nr); - /* - * Access extra MSR may cause #GP under certain circumstances. - * E.g. KVM doesn't support offcore event - * Check all extra_regs here. - */ - if (x86_pmu.extra_regs) { - for (er = x86_pmu.extra_regs; er->msr; er++) { - er->extra_msr_access = check_msr(er->msr, 0x11UL); - /* Disable LBR select mapping */ - if ((er->idx == EXTRA_REG_LBR) && !er->extra_msr_access) - x86_pmu.lbr_sel_map = NULL; + /* only support branch_stack snapshot for perfmon >= v2 */ + if (x86_pmu.disable_all == intel_pmu_disable_all) { + if (boot_cpu_has(X86_FEATURE_ARCH_LBR)) { + static_call_update(perf_snapshot_branch_stack, + intel_pmu_snapshot_arch_branch_stack); + } else { + static_call_update(perf_snapshot_branch_stack, + intel_pmu_snapshot_branch_stack); + } } } + intel_pmu_check_extra_regs(x86_pmu.extra_regs); + /* Support full width counters using alternative MSR range */ if (x86_pmu.intel_cap.full_width_write) { x86_pmu.max_period = x86_pmu.cntval_mask >> 1; @@ -5182,12 +6602,13 @@ __init int intel_pmu_init(void) pr_cont("full-width counters, "); } - /* - * For arch perfmon 4 use counter freezing to avoid - * several MSR accesses in the PMI. - */ - if (x86_pmu.counter_freezing) - x86_pmu.handle_irq = intel_pmu_handle_irq_v4; + if (!is_hybrid() && x86_pmu.intel_cap.perf_metrics) + x86_pmu.intel_ctrl |= 1ULL << GLOBAL_CTRL_EN_PERF_METRICS; + + if (is_hybrid()) + intel_pmu_check_hybrid_pmus((u64)fixed_mask); + + intel_aux_output_init(); return 0; } diff --git a/arch/x86/events/intel/cstate.c b/arch/x86/events/intel/cstate.c index 4814c964692c..a2834bc93149 100644 --- a/arch/x86/events/intel/cstate.c +++ b/arch/x86/events/intel/cstate.c @@ -40,7 +40,7 @@ * Model specific counters: * MSR_CORE_C1_RES: CORE C1 Residency Counter * perf code: 0x00 - * Available model: SLM,AMT,GLM,CNL,TNT + * Available model: SLM,AMT,GLM,CNL,ICX,TNT,ADL,RPL * Scope: Core (each processor core has a MSR) * MSR_CORE_C3_RESIDENCY: CORE C3 Residency Counter * perf code: 0x01 @@ -50,47 +50,51 @@ * MSR_CORE_C6_RESIDENCY: CORE C6 Residency Counter * perf code: 0x02 * Available model: SLM,AMT,NHM,WSM,SNB,IVB,HSW,BDW, - * SKL,KNL,GLM,CNL,KBL,CML,ICL,TGL, - * TNT + * SKL,KNL,GLM,CNL,KBL,CML,ICL,ICX, + * TGL,TNT,RKL,ADL,RPL,SPR * Scope: Core * MSR_CORE_C7_RESIDENCY: CORE C7 Residency Counter * perf code: 0x03 * Available model: SNB,IVB,HSW,BDW,SKL,CNL,KBL,CML, - * ICL,TGL + * ICL,TGL,RKL,ADL,RPL * Scope: Core * MSR_PKG_C2_RESIDENCY: Package C2 Residency Counter. * perf code: 0x00 * Available model: SNB,IVB,HSW,BDW,SKL,KNL,GLM,CNL, - * KBL,CML,ICL,TGL,TNT + * KBL,CML,ICL,ICX,TGL,TNT,RKL,ADL, + * RPL,SPR * Scope: Package (physical package) * MSR_PKG_C3_RESIDENCY: Package C3 Residency Counter. * perf code: 0x01 * Available model: NHM,WSM,SNB,IVB,HSW,BDW,SKL,KNL, - * GLM,CNL,KBL,CML,ICL,TGL,TNT + * GLM,CNL,KBL,CML,ICL,TGL,TNT,RKL, + * ADL,RPL * Scope: Package (physical package) * MSR_PKG_C6_RESIDENCY: Package C6 Residency Counter. * perf code: 0x02 * Available model: SLM,AMT,NHM,WSM,SNB,IVB,HSW,BDW, - * SKL,KNL,GLM,CNL,KBL,CML,ICL,TGL, - * TNT + * SKL,KNL,GLM,CNL,KBL,CML,ICL,ICX, + * TGL,TNT,RKL,ADL,RPL,SPR * Scope: Package (physical package) * MSR_PKG_C7_RESIDENCY: Package C7 Residency Counter. * perf code: 0x03 * Available model: NHM,WSM,SNB,IVB,HSW,BDW,SKL,CNL, - * KBL,CML,ICL,TGL + * KBL,CML,ICL,TGL,RKL,ADL,RPL * Scope: Package (physical package) * MSR_PKG_C8_RESIDENCY: Package C8 Residency Counter. * perf code: 0x04 - * Available model: HSW ULT,KBL,CNL,CML,ICL,TGL + * Available model: HSW ULT,KBL,CNL,CML,ICL,TGL,RKL, + * ADL,RPL * Scope: Package (physical package) * MSR_PKG_C9_RESIDENCY: Package C9 Residency Counter. * perf code: 0x05 - * Available model: HSW ULT,KBL,CNL,CML,ICL,TGL + * Available model: HSW ULT,KBL,CNL,CML,ICL,TGL,RKL, + * ADL,RPL * Scope: Package (physical package) * MSR_PKG_C10_RESIDENCY: Package C10 Residency Counter. * perf code: 0x06 * Available model: HSW ULT,KBL,GLM,CNL,CML,ICL,TGL, - * TNT + * TNT,RKL,ADL,RPL * Scope: Package (physical package) * */ @@ -107,14 +111,14 @@ MODULE_LICENSE("GPL"); #define DEFINE_CSTATE_FORMAT_ATTR(_var, _name, _format) \ -static ssize_t __cstate_##_var##_show(struct kobject *kobj, \ - struct kobj_attribute *attr, \ +static ssize_t __cstate_##_var##_show(struct device *dev, \ + struct device_attribute *attr, \ char *page) \ { \ BUILD_BUG_ON(sizeof(_format) >= PAGE_SIZE); \ return sprintf(page, _format "\n"); \ } \ -static struct kobj_attribute format_attr_##_var = \ +static struct device_attribute format_attr_##_var = \ __ATTR(_name, 0444, __cstate_##_var##_show, NULL) static ssize_t cstate_get_attr_cpumask(struct device *dev, @@ -563,6 +567,28 @@ static const struct cstate_model icl_cstates __initconst = { BIT(PERF_CSTATE_PKG_C10_RES), }; +static const struct cstate_model icx_cstates __initconst = { + .core_events = BIT(PERF_CSTATE_CORE_C1_RES) | + BIT(PERF_CSTATE_CORE_C6_RES), + + .pkg_events = BIT(PERF_CSTATE_PKG_C2_RES) | + BIT(PERF_CSTATE_PKG_C6_RES), +}; + +static const struct cstate_model adl_cstates __initconst = { + .core_events = BIT(PERF_CSTATE_CORE_C1_RES) | + BIT(PERF_CSTATE_CORE_C6_RES) | + BIT(PERF_CSTATE_CORE_C7_RES), + + .pkg_events = BIT(PERF_CSTATE_PKG_C2_RES) | + BIT(PERF_CSTATE_PKG_C3_RES) | + BIT(PERF_CSTATE_PKG_C6_RES) | + BIT(PERF_CSTATE_PKG_C7_RES) | + BIT(PERF_CSTATE_PKG_C8_RES) | + BIT(PERF_CSTATE_PKG_C9_RES) | + BIT(PERF_CSTATE_PKG_C10_RES), +}; + static const struct cstate_model slm_cstates __initconst = { .core_events = BIT(PERF_CSTATE_CORE_C1_RES) | BIT(PERF_CSTATE_CORE_C6_RES), @@ -594,63 +620,72 @@ static const struct cstate_model glm_cstates __initconst = { }; -#define X86_CSTATES_MODEL(model, states) \ - { X86_VENDOR_INTEL, 6, model, X86_FEATURE_ANY, (unsigned long) &(states) } - static const struct x86_cpu_id intel_cstates_match[] __initconst = { - X86_CSTATES_MODEL(INTEL_FAM6_NEHALEM, nhm_cstates), - X86_CSTATES_MODEL(INTEL_FAM6_NEHALEM_EP, nhm_cstates), - X86_CSTATES_MODEL(INTEL_FAM6_NEHALEM_EX, nhm_cstates), - - X86_CSTATES_MODEL(INTEL_FAM6_WESTMERE, nhm_cstates), - X86_CSTATES_MODEL(INTEL_FAM6_WESTMERE_EP, nhm_cstates), - X86_CSTATES_MODEL(INTEL_FAM6_WESTMERE_EX, nhm_cstates), - - X86_CSTATES_MODEL(INTEL_FAM6_SANDYBRIDGE, snb_cstates), - X86_CSTATES_MODEL(INTEL_FAM6_SANDYBRIDGE_X, snb_cstates), - - X86_CSTATES_MODEL(INTEL_FAM6_IVYBRIDGE, snb_cstates), - X86_CSTATES_MODEL(INTEL_FAM6_IVYBRIDGE_X, snb_cstates), - - X86_CSTATES_MODEL(INTEL_FAM6_HASWELL, snb_cstates), - X86_CSTATES_MODEL(INTEL_FAM6_HASWELL_X, snb_cstates), - X86_CSTATES_MODEL(INTEL_FAM6_HASWELL_G, snb_cstates), - - X86_CSTATES_MODEL(INTEL_FAM6_HASWELL_L, hswult_cstates), - - X86_CSTATES_MODEL(INTEL_FAM6_ATOM_SILVERMONT, slm_cstates), - X86_CSTATES_MODEL(INTEL_FAM6_ATOM_SILVERMONT_D, slm_cstates), - X86_CSTATES_MODEL(INTEL_FAM6_ATOM_AIRMONT, slm_cstates), - - X86_CSTATES_MODEL(INTEL_FAM6_BROADWELL, snb_cstates), - X86_CSTATES_MODEL(INTEL_FAM6_BROADWELL_D, snb_cstates), - X86_CSTATES_MODEL(INTEL_FAM6_BROADWELL_G, snb_cstates), - X86_CSTATES_MODEL(INTEL_FAM6_BROADWELL_X, snb_cstates), - - X86_CSTATES_MODEL(INTEL_FAM6_SKYLAKE_L, snb_cstates), - X86_CSTATES_MODEL(INTEL_FAM6_SKYLAKE, snb_cstates), - X86_CSTATES_MODEL(INTEL_FAM6_SKYLAKE_X, snb_cstates), - - X86_CSTATES_MODEL(INTEL_FAM6_KABYLAKE_L, hswult_cstates), - X86_CSTATES_MODEL(INTEL_FAM6_KABYLAKE, hswult_cstates), - X86_CSTATES_MODEL(INTEL_FAM6_COMETLAKE_L, hswult_cstates), - X86_CSTATES_MODEL(INTEL_FAM6_COMETLAKE, hswult_cstates), - - X86_CSTATES_MODEL(INTEL_FAM6_CANNONLAKE_L, cnl_cstates), - - X86_CSTATES_MODEL(INTEL_FAM6_XEON_PHI_KNL, knl_cstates), - X86_CSTATES_MODEL(INTEL_FAM6_XEON_PHI_KNM, knl_cstates), - - X86_CSTATES_MODEL(INTEL_FAM6_ATOM_GOLDMONT, glm_cstates), - X86_CSTATES_MODEL(INTEL_FAM6_ATOM_GOLDMONT_D, glm_cstates), - X86_CSTATES_MODEL(INTEL_FAM6_ATOM_GOLDMONT_PLUS, glm_cstates), - X86_CSTATES_MODEL(INTEL_FAM6_ATOM_TREMONT_D, glm_cstates), - X86_CSTATES_MODEL(INTEL_FAM6_ATOM_TREMONT, glm_cstates), - - X86_CSTATES_MODEL(INTEL_FAM6_ICELAKE_L, icl_cstates), - X86_CSTATES_MODEL(INTEL_FAM6_ICELAKE, icl_cstates), - X86_CSTATES_MODEL(INTEL_FAM6_TIGERLAKE_L, icl_cstates), - X86_CSTATES_MODEL(INTEL_FAM6_TIGERLAKE, icl_cstates), + X86_MATCH_INTEL_FAM6_MODEL(NEHALEM, &nhm_cstates), + X86_MATCH_INTEL_FAM6_MODEL(NEHALEM_EP, &nhm_cstates), + X86_MATCH_INTEL_FAM6_MODEL(NEHALEM_EX, &nhm_cstates), + + X86_MATCH_INTEL_FAM6_MODEL(WESTMERE, &nhm_cstates), + X86_MATCH_INTEL_FAM6_MODEL(WESTMERE_EP, &nhm_cstates), + X86_MATCH_INTEL_FAM6_MODEL(WESTMERE_EX, &nhm_cstates), + + X86_MATCH_INTEL_FAM6_MODEL(SANDYBRIDGE, &snb_cstates), + X86_MATCH_INTEL_FAM6_MODEL(SANDYBRIDGE_X, &snb_cstates), + + X86_MATCH_INTEL_FAM6_MODEL(IVYBRIDGE, &snb_cstates), + X86_MATCH_INTEL_FAM6_MODEL(IVYBRIDGE_X, &snb_cstates), + + X86_MATCH_INTEL_FAM6_MODEL(HASWELL, &snb_cstates), + X86_MATCH_INTEL_FAM6_MODEL(HASWELL_X, &snb_cstates), + X86_MATCH_INTEL_FAM6_MODEL(HASWELL_G, &snb_cstates), + + X86_MATCH_INTEL_FAM6_MODEL(HASWELL_L, &hswult_cstates), + + X86_MATCH_INTEL_FAM6_MODEL(ATOM_SILVERMONT, &slm_cstates), + X86_MATCH_INTEL_FAM6_MODEL(ATOM_SILVERMONT_D, &slm_cstates), + X86_MATCH_INTEL_FAM6_MODEL(ATOM_AIRMONT, &slm_cstates), + + X86_MATCH_INTEL_FAM6_MODEL(BROADWELL, &snb_cstates), + X86_MATCH_INTEL_FAM6_MODEL(BROADWELL_D, &snb_cstates), + X86_MATCH_INTEL_FAM6_MODEL(BROADWELL_G, &snb_cstates), + X86_MATCH_INTEL_FAM6_MODEL(BROADWELL_X, &snb_cstates), + + X86_MATCH_INTEL_FAM6_MODEL(SKYLAKE_L, &snb_cstates), + X86_MATCH_INTEL_FAM6_MODEL(SKYLAKE, &snb_cstates), + X86_MATCH_INTEL_FAM6_MODEL(SKYLAKE_X, &snb_cstates), + + X86_MATCH_INTEL_FAM6_MODEL(KABYLAKE_L, &hswult_cstates), + X86_MATCH_INTEL_FAM6_MODEL(KABYLAKE, &hswult_cstates), + X86_MATCH_INTEL_FAM6_MODEL(COMETLAKE_L, &hswult_cstates), + X86_MATCH_INTEL_FAM6_MODEL(COMETLAKE, &hswult_cstates), + + X86_MATCH_INTEL_FAM6_MODEL(CANNONLAKE_L, &cnl_cstates), + + X86_MATCH_INTEL_FAM6_MODEL(XEON_PHI_KNL, &knl_cstates), + X86_MATCH_INTEL_FAM6_MODEL(XEON_PHI_KNM, &knl_cstates), + + X86_MATCH_INTEL_FAM6_MODEL(ATOM_GOLDMONT, &glm_cstates), + X86_MATCH_INTEL_FAM6_MODEL(ATOM_GOLDMONT_D, &glm_cstates), + X86_MATCH_INTEL_FAM6_MODEL(ATOM_GOLDMONT_PLUS, &glm_cstates), + X86_MATCH_INTEL_FAM6_MODEL(ATOM_TREMONT_D, &glm_cstates), + X86_MATCH_INTEL_FAM6_MODEL(ATOM_TREMONT, &glm_cstates), + X86_MATCH_INTEL_FAM6_MODEL(ATOM_TREMONT_L, &glm_cstates), + + X86_MATCH_INTEL_FAM6_MODEL(ICELAKE_L, &icl_cstates), + X86_MATCH_INTEL_FAM6_MODEL(ICELAKE, &icl_cstates), + X86_MATCH_INTEL_FAM6_MODEL(ICELAKE_X, &icx_cstates), + X86_MATCH_INTEL_FAM6_MODEL(ICELAKE_D, &icx_cstates), + X86_MATCH_INTEL_FAM6_MODEL(SAPPHIRERAPIDS_X, &icx_cstates), + + X86_MATCH_INTEL_FAM6_MODEL(TIGERLAKE_L, &icl_cstates), + X86_MATCH_INTEL_FAM6_MODEL(TIGERLAKE, &icl_cstates), + X86_MATCH_INTEL_FAM6_MODEL(ROCKETLAKE, &icl_cstates), + X86_MATCH_INTEL_FAM6_MODEL(ALDERLAKE, &adl_cstates), + X86_MATCH_INTEL_FAM6_MODEL(ALDERLAKE_L, &adl_cstates), + X86_MATCH_INTEL_FAM6_MODEL(ALDERLAKE_N, &adl_cstates), + X86_MATCH_INTEL_FAM6_MODEL(RAPTORLAKE, &adl_cstates), + X86_MATCH_INTEL_FAM6_MODEL(RAPTORLAKE_P, &adl_cstates), + X86_MATCH_INTEL_FAM6_MODEL(RAPTORLAKE_S, &adl_cstates), { }, }; MODULE_DEVICE_TABLE(x86cpu, intel_cstates_match); diff --git a/arch/x86/events/intel/ds.c b/arch/x86/events/intel/ds.c index dc43cc124e09..446d2833efa7 100644 --- a/arch/x86/events/intel/ds.c +++ b/arch/x86/events/intel/ds.c @@ -36,7 +36,9 @@ union intel_x86_pebs_dse { unsigned int ld_dse:4; unsigned int ld_stlb_miss:1; unsigned int ld_locked:1; - unsigned int ld_reserved:26; + unsigned int ld_data_blk:1; + unsigned int ld_addr_blk:1; + unsigned int ld_reserved:24; }; struct { unsigned int st_l1d_hit:1; @@ -45,6 +47,12 @@ union intel_x86_pebs_dse { unsigned int st_locked:1; unsigned int st_reserved2:26; }; + struct { + unsigned int st_lat_dse:4; + unsigned int st_lat_stlb_miss:1; + unsigned int st_lat_locked:1; + unsigned int ld_reserved3:26; + }; }; @@ -86,15 +94,45 @@ void __init intel_pmu_pebs_data_source_nhm(void) pebs_data_source[0x07] = OP_LH | P(LVL, L3) | LEVEL(L3) | P(SNOOP, HITM); } -void __init intel_pmu_pebs_data_source_skl(bool pmem) +static void __init __intel_pmu_pebs_data_source_skl(bool pmem, u64 *data_source) { u64 pmem_or_l4 = pmem ? LEVEL(PMEM) : LEVEL(L4); - pebs_data_source[0x08] = OP_LH | pmem_or_l4 | P(SNOOP, HIT); - pebs_data_source[0x09] = OP_LH | pmem_or_l4 | REM | P(SNOOP, HIT); - pebs_data_source[0x0b] = OP_LH | LEVEL(RAM) | REM | P(SNOOP, NONE); - pebs_data_source[0x0c] = OP_LH | LEVEL(ANY_CACHE) | REM | P(SNOOPX, FWD); - pebs_data_source[0x0d] = OP_LH | LEVEL(ANY_CACHE) | REM | P(SNOOP, HITM); + data_source[0x08] = OP_LH | pmem_or_l4 | P(SNOOP, HIT); + data_source[0x09] = OP_LH | pmem_or_l4 | REM | P(SNOOP, HIT); + data_source[0x0b] = OP_LH | LEVEL(RAM) | REM | P(SNOOP, NONE); + data_source[0x0c] = OP_LH | LEVEL(ANY_CACHE) | REM | P(SNOOPX, FWD); + data_source[0x0d] = OP_LH | LEVEL(ANY_CACHE) | REM | P(SNOOP, HITM); +} + +void __init intel_pmu_pebs_data_source_skl(bool pmem) +{ + __intel_pmu_pebs_data_source_skl(pmem, pebs_data_source); +} + +static void __init __intel_pmu_pebs_data_source_grt(u64 *data_source) +{ + data_source[0x05] = OP_LH | P(LVL, L3) | LEVEL(L3) | P(SNOOP, HIT); + data_source[0x06] = OP_LH | P(LVL, L3) | LEVEL(L3) | P(SNOOP, HITM); + data_source[0x08] = OP_LH | P(LVL, L3) | LEVEL(L3) | P(SNOOPX, FWD); +} + +void __init intel_pmu_pebs_data_source_grt(void) +{ + __intel_pmu_pebs_data_source_grt(pebs_data_source); +} + +void __init intel_pmu_pebs_data_source_adl(void) +{ + u64 *data_source; + + data_source = x86_pmu.hybrid_pmu[X86_HYBRID_PMU_CORE_IDX].pebs_data_source; + memcpy(data_source, pebs_data_source, sizeof(pebs_data_source)); + __intel_pmu_pebs_data_source_skl(false, data_source); + + data_source = x86_pmu.hybrid_pmu[X86_HYBRID_PMU_ATOM_IDX].pebs_data_source; + memcpy(data_source, pebs_data_source, sizeof(pebs_data_source)); + __intel_pmu_pebs_data_source_grt(data_source); } static u64 precise_store_data(u64 status) @@ -163,7 +201,50 @@ static u64 precise_datala_hsw(struct perf_event *event, u64 status) return dse.val; } -static u64 load_latency_data(u64 status) +static inline void pebs_set_tlb_lock(u64 *val, bool tlb, bool lock) +{ + /* + * TLB access + * 0 = did not miss 2nd level TLB + * 1 = missed 2nd level TLB + */ + if (tlb) + *val |= P(TLB, MISS) | P(TLB, L2); + else + *val |= P(TLB, HIT) | P(TLB, L1) | P(TLB, L2); + + /* locked prefix */ + if (lock) + *val |= P(LOCK, LOCKED); +} + +/* Retrieve the latency data for e-core of ADL */ +u64 adl_latency_data_small(struct perf_event *event, u64 status) +{ + union intel_x86_pebs_dse dse; + u64 val; + + WARN_ON_ONCE(hybrid_pmu(event->pmu)->cpu_type == hybrid_big); + + dse.val = status; + + val = hybrid_var(event->pmu, pebs_data_source)[dse.ld_dse]; + + /* + * For the atom core on ADL, + * bit 4: lock, bit 5: TLB access. + */ + pebs_set_tlb_lock(&val, dse.ld_locked, dse.ld_stlb_miss); + + if (dse.ld_data_blk) + val |= P(BLK, DATA); + else + val |= P(BLK, NA); + + return val; +} + +static u64 load_latency_data(struct perf_event *event, u64 status) { union intel_x86_pebs_dse dse; u64 val; @@ -173,7 +254,7 @@ static u64 load_latency_data(u64 status) /* * use the mapping table for bit 0-3 */ - val = pebs_data_source[dse.ld_dse]; + val = hybrid_var(event->pmu, pebs_data_source)[dse.ld_dse]; /* * Nehalem models do not support TLB, Lock infos @@ -182,25 +263,63 @@ static u64 load_latency_data(u64 status) val |= P(TLB, NA) | P(LOCK, NA); return val; } + + pebs_set_tlb_lock(&val, dse.ld_stlb_miss, dse.ld_locked); + /* - * bit 4: TLB access - * 0 = did not miss 2nd level TLB - * 1 = missed 2nd level TLB + * Ice Lake and earlier models do not support block infos. */ - if (dse.ld_stlb_miss) - val |= P(TLB, MISS) | P(TLB, L2); - else - val |= P(TLB, HIT) | P(TLB, L1) | P(TLB, L2); + if (!x86_pmu.pebs_block) { + val |= P(BLK, NA); + return val; + } + /* + * bit 6: load was blocked since its data could not be forwarded + * from a preceding store + */ + if (dse.ld_data_blk) + val |= P(BLK, DATA); /* - * bit 5: locked prefix + * bit 7: load was blocked due to potential address conflict with + * a preceding store */ - if (dse.ld_locked) - val |= P(LOCK, LOCKED); + if (dse.ld_addr_blk) + val |= P(BLK, ADDR); + + if (!dse.ld_data_blk && !dse.ld_addr_blk) + val |= P(BLK, NA); return val; } +static u64 store_latency_data(struct perf_event *event, u64 status) +{ + union intel_x86_pebs_dse dse; + union perf_mem_data_src src; + u64 val; + + dse.val = status; + + /* + * use the mapping table for bit 0-3 + */ + val = hybrid_var(event->pmu, pebs_data_source)[dse.st_lat_dse]; + + pebs_set_tlb_lock(&val, dse.st_lat_stlb_miss, dse.st_lat_locked); + + val |= P(BLK, NA); + + /* + * the pebs_data_source table is only for loads + * so override the mem_op to say STORE instead + */ + src.val = val; + src.mem_op = P(OP,STORE); + + return src.val; +} + struct pebs_record_core { u64 flags, ip; u64 ax, bx, cx, dx; @@ -642,8 +761,8 @@ int intel_pmu_drain_bts_buffer(void) rcu_read_lock(); perf_prepare_sample(&header, &data, event, ®s); - if (perf_output_begin(&handle, event, header.size * - (top - base - skip))) + if (perf_output_begin(&handle, &data, event, + header.size * (top - base - skip))) goto unlock; for (at = base; at < top; at++) { @@ -670,9 +789,9 @@ unlock: static inline void intel_pmu_drain_pebs_buffer(void) { - struct pt_regs regs; + struct perf_sample_data data; - x86_pmu.drain_pebs(®s); + x86_pmu.drain_pebs(NULL, &data); } /* @@ -714,6 +833,13 @@ struct event_constraint intel_glm_pebs_event_constraints[] = { EVENT_CONSTRAINT_END }; +struct event_constraint intel_grt_pebs_event_constraints[] = { + /* Allow all events as PEBS with no flags */ + INTEL_HYBRID_LAT_CONSTRAINT(0x5d0, 0x3), + INTEL_HYBRID_LAT_CONSTRAINT(0x6d0, 0xf), + EVENT_CONSTRAINT_END +}; + struct event_constraint intel_nehalem_pebs_event_constraints[] = { INTEL_PLD_CONSTRAINT(0x100b, 0xf), /* MEM_INST_RETIRED.* */ INTEL_FLAGS_EVENT_CONSTRAINT(0x0f, 0xf), /* MEM_UNCORE_RETIRED.* */ @@ -851,12 +977,18 @@ struct event_constraint intel_skl_pebs_event_constraints[] = { }; struct event_constraint intel_icl_pebs_event_constraints[] = { - INTEL_FLAGS_UEVENT_CONSTRAINT(0x1c0, 0x100000000ULL), /* INST_RETIRED.PREC_DIST */ + INTEL_FLAGS_UEVENT_CONSTRAINT(0x01c0, 0x100000000ULL), /* old INST_RETIRED.PREC_DIST */ + INTEL_FLAGS_UEVENT_CONSTRAINT(0x0100, 0x100000000ULL), /* INST_RETIRED.PREC_DIST */ INTEL_FLAGS_UEVENT_CONSTRAINT(0x0400, 0x800000000ULL), /* SLOTS */ INTEL_PLD_CONSTRAINT(0x1cd, 0xff), /* MEM_TRANS_RETIRED.LOAD_LATENCY */ - INTEL_FLAGS_UEVENT_CONSTRAINT_DATALA_LD(0x1d0, 0xf), /* MEM_INST_RETIRED.LOAD */ - INTEL_FLAGS_UEVENT_CONSTRAINT_DATALA_ST(0x2d0, 0xf), /* MEM_INST_RETIRED.STORE */ + INTEL_FLAGS_UEVENT_CONSTRAINT_DATALA_LD(0x11d0, 0xf), /* MEM_INST_RETIRED.STLB_MISS_LOADS */ + INTEL_FLAGS_UEVENT_CONSTRAINT_DATALA_ST(0x12d0, 0xf), /* MEM_INST_RETIRED.STLB_MISS_STORES */ + INTEL_FLAGS_UEVENT_CONSTRAINT_DATALA_LD(0x21d0, 0xf), /* MEM_INST_RETIRED.LOCK_LOADS */ + INTEL_FLAGS_UEVENT_CONSTRAINT_DATALA_LD(0x41d0, 0xf), /* MEM_INST_RETIRED.SPLIT_LOADS */ + INTEL_FLAGS_UEVENT_CONSTRAINT_DATALA_ST(0x42d0, 0xf), /* MEM_INST_RETIRED.SPLIT_STORES */ + INTEL_FLAGS_UEVENT_CONSTRAINT_DATALA_LD(0x81d0, 0xf), /* MEM_INST_RETIRED.ALL_LOADS */ + INTEL_FLAGS_UEVENT_CONSTRAINT_DATALA_ST(0x82d0, 0xf), /* MEM_INST_RETIRED.ALL_STORES */ INTEL_FLAGS_EVENT_CONSTRAINT_DATALA_LD_RANGE(0xd1, 0xd4, 0xf), /* MEM_LOAD_*_RETIRED.* */ @@ -870,15 +1002,43 @@ struct event_constraint intel_icl_pebs_event_constraints[] = { EVENT_CONSTRAINT_END }; +struct event_constraint intel_spr_pebs_event_constraints[] = { + INTEL_FLAGS_UEVENT_CONSTRAINT(0x100, 0x100000000ULL), /* INST_RETIRED.PREC_DIST */ + INTEL_FLAGS_UEVENT_CONSTRAINT(0x0400, 0x800000000ULL), + + INTEL_FLAGS_EVENT_CONSTRAINT(0xc0, 0xfe), + INTEL_PLD_CONSTRAINT(0x1cd, 0xfe), + INTEL_PSD_CONSTRAINT(0x2cd, 0x1), + INTEL_FLAGS_UEVENT_CONSTRAINT_DATALA_LD(0x11d0, 0xf), /* MEM_INST_RETIRED.STLB_MISS_LOADS */ + INTEL_FLAGS_UEVENT_CONSTRAINT_DATALA_ST(0x12d0, 0xf), /* MEM_INST_RETIRED.STLB_MISS_STORES */ + INTEL_FLAGS_UEVENT_CONSTRAINT_DATALA_LD(0x21d0, 0xf), /* MEM_INST_RETIRED.LOCK_LOADS */ + INTEL_FLAGS_UEVENT_CONSTRAINT_DATALA_LD(0x41d0, 0xf), /* MEM_INST_RETIRED.SPLIT_LOADS */ + INTEL_FLAGS_UEVENT_CONSTRAINT_DATALA_ST(0x42d0, 0xf), /* MEM_INST_RETIRED.SPLIT_STORES */ + INTEL_FLAGS_UEVENT_CONSTRAINT_DATALA_LD(0x81d0, 0xf), /* MEM_INST_RETIRED.ALL_LOADS */ + INTEL_FLAGS_UEVENT_CONSTRAINT_DATALA_ST(0x82d0, 0xf), /* MEM_INST_RETIRED.ALL_STORES */ + + INTEL_FLAGS_EVENT_CONSTRAINT_DATALA_LD_RANGE(0xd1, 0xd4, 0xf), + + INTEL_FLAGS_EVENT_CONSTRAINT(0xd0, 0xf), + + /* + * Everything else is handled by PMU_FL_PEBS_ALL, because we + * need the full constraints from the main table. + */ + + EVENT_CONSTRAINT_END +}; + struct event_constraint *intel_pebs_constraints(struct perf_event *event) { + struct event_constraint *pebs_constraints = hybrid(event->pmu, pebs_constraints); struct event_constraint *c; if (!event->attr.precise_ip) return NULL; - if (x86_pmu.pebs_constraints) { - for_each_event_constraint(c, x86_pmu.pebs_constraints) { + if (pebs_constraints) { + for_each_event_constraint(c, pebs_constraints) { if (constraint_match(c, event->hw.config)) { event->hw.flags |= c->flags; return c; @@ -920,6 +1080,8 @@ void intel_pmu_pebs_sched_task(struct perf_event_context *ctx, bool sched_in) static inline void pebs_update_threshold(struct cpu_hw_events *cpuc) { struct debug_store *ds = cpuc->ds; + int max_pebs_events = hybrid(cpuc->pmu, max_pebs_events); + int num_counters_fixed = hybrid(cpuc->pmu, num_counters_fixed); u64 threshold; int reserved; @@ -927,9 +1089,9 @@ static inline void pebs_update_threshold(struct cpu_hw_events *cpuc) return; if (x86_pmu.flags & PMU_FL_PEBS_ALL) - reserved = x86_pmu.max_pebs_events + x86_pmu.num_counters_fixed; + reserved = max_pebs_events + num_counters_fixed; else - reserved = x86_pmu.max_pebs_events; + reserved = max_pebs_events; if (cpuc->n_pebs == cpuc->n_large_pebs) { threshold = ds->pebs_absolute_maximum - @@ -954,14 +1116,16 @@ static void adaptive_pebs_record_size_update(void) if (pebs_data_cfg & PEBS_DATACFG_XMMS) sz += sizeof(struct pebs_xmm); if (pebs_data_cfg & PEBS_DATACFG_LBRS) - sz += x86_pmu.lbr_nr * sizeof(struct pebs_lbr_entry); + sz += x86_pmu.lbr_nr * sizeof(struct lbr_entry); cpuc->pebs_record_size = sz; } #define PERF_PEBS_MEMINFO_TYPE (PERF_SAMPLE_ADDR | PERF_SAMPLE_DATA_SRC | \ - PERF_SAMPLE_PHYS_ADDR | PERF_SAMPLE_WEIGHT | \ - PERF_SAMPLE_TRANSACTION) + PERF_SAMPLE_PHYS_ADDR | \ + PERF_SAMPLE_WEIGHT_TYPE | \ + PERF_SAMPLE_TRANSACTION | \ + PERF_SAMPLE_DATA_PAGE_SIZE) static u64 pebs_update_adaptive_cfg(struct perf_event *event) { @@ -986,7 +1150,7 @@ static u64 pebs_update_adaptive_cfg(struct perf_event *event) gprs = (sample_type & PERF_SAMPLE_REGS_INTR) && (attr->sample_regs_intr & PEBS_GP_REGS); - tsx_weight = (sample_type & PERF_SAMPLE_WEIGHT) && + tsx_weight = (sample_type & PERF_SAMPLE_WEIGHT_TYPE) && ((attr->config & INTEL_ARCH_EVENT_MASK) == x86_pmu.rtm_abort_event); @@ -1088,6 +1252,9 @@ static void intel_pmu_pebs_via_pt_enable(struct perf_event *event) struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events); struct hw_perf_event *hwc = &event->hw; struct debug_store *ds = cpuc->ds; + u64 value = ds->pebs_event_reset[hwc->idx]; + u32 base = MSR_RELOAD_PMC0; + unsigned int idx = hwc->idx; if (!is_pebs_pt(event)) return; @@ -1097,7 +1264,15 @@ static void intel_pmu_pebs_via_pt_enable(struct perf_event *event) cpuc->pebs_enabled |= PEBS_OUTPUT_PT; - wrmsrl(MSR_RELOAD_PMC0 + hwc->idx, ds->pebs_event_reset[hwc->idx]); + if (hwc->idx >= INTEL_PMC_IDX_FIXED) { + base = MSR_RELOAD_FIXED_CTR0; + idx = hwc->idx - INTEL_PMC_IDX_FIXED; + if (x86_pmu.intel_cap.pebs_format < 5) + value = ds->pebs_event_reset[MAX_PEBS_EVENTS_FMT4 + idx]; + else + value = ds->pebs_event_reset[MAX_PEBS_EVENTS + idx]; + } + wrmsrl(base + idx, value); } void intel_pmu_pebs_enable(struct perf_event *event) @@ -1105,6 +1280,7 @@ void intel_pmu_pebs_enable(struct perf_event *event) struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events); struct hw_perf_event *hwc = &event->hw; struct debug_store *ds = cpuc->ds; + unsigned int idx = hwc->idx; hwc->config &= ~ARCH_PERFMON_EVENTSEL_INT; @@ -1123,19 +1299,22 @@ void intel_pmu_pebs_enable(struct perf_event *event) } } + if (idx >= INTEL_PMC_IDX_FIXED) { + if (x86_pmu.intel_cap.pebs_format < 5) + idx = MAX_PEBS_EVENTS_FMT4 + (idx - INTEL_PMC_IDX_FIXED); + else + idx = MAX_PEBS_EVENTS + (idx - INTEL_PMC_IDX_FIXED); + } + /* * Use auto-reload if possible to save a MSR write in the PMI. * This must be done in pmu::start(), because PERF_EVENT_IOC_PERIOD. */ if (hwc->flags & PERF_X86_EVENT_AUTO_RELOAD) { - unsigned int idx = hwc->idx; - - if (idx >= INTEL_PMC_IDX_FIXED) - idx = MAX_PEBS_EVENTS + (idx - INTEL_PMC_IDX_FIXED); ds->pebs_event_reset[idx] = (u64)(-hwc->sample_period) & x86_pmu.cntval_mask; } else { - ds->pebs_event_reset[hwc->idx] = 0; + ds->pebs_event_reset[idx] = 0; } intel_pmu_pebs_via_pt_enable(event); @@ -1194,7 +1373,7 @@ void intel_pmu_pebs_disable_all(void) struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events); if (cpuc->pebs_enabled) - wrmsrl(MSR_IA32_PEBS_ENABLE, 0); + __intel_pmu_pebs_disable_all(); } static int intel_pmu_pebs_fixup_ip(struct pt_regs *regs) @@ -1261,17 +1440,16 @@ static int intel_pmu_pebs_fixup_ip(struct pt_regs *regs) old_to = to; #ifdef CONFIG_X86_64 - is_64bit = kernel_ip(to) || !test_thread_flag(TIF_IA32); + is_64bit = kernel_ip(to) || any_64bit_mode(regs); #endif insn_init(&insn, kaddr, size, is_64bit); - insn_get_length(&insn); + /* - * Make sure there was not a problem decoding the - * instruction and getting the length. This is - * doubly important because we have an infinite - * loop if insn.length=0. + * Make sure there was not a problem decoding the instruction. + * This is doubly important because we have an infinite loop if + * insn.length=0. */ - if (!insn.length) + if (insn_get_length(&insn)) break; to += insn.length; @@ -1329,7 +1507,11 @@ static u64 get_data_src(struct perf_event *event, u64 aux) bool fst = fl & (PERF_X86_EVENT_PEBS_ST | PERF_X86_EVENT_PEBS_HSW_PREC); if (fl & PERF_X86_EVENT_PEBS_LDLAT) - val = load_latency_data(aux); + val = load_latency_data(event, aux); + else if (fl & PERF_X86_EVENT_PEBS_STLAT) + val = store_latency_data(event, aux); + else if (fl & PERF_X86_EVENT_PEBS_LAT_HYBRID) + val = x86_pmu.pebs_latency_data(event, aux); else if (fst && (fl & PERF_X86_EVENT_PEBS_HSW_PREC)) val = precise_datala_hsw(event, aux); else if (fst) @@ -1337,6 +1519,10 @@ static u64 get_data_src(struct perf_event *event, u64 aux) return val; } +#define PERF_SAMPLE_ADDR_TYPE (PERF_SAMPLE_ADDR | \ + PERF_SAMPLE_PHYS_ADDR | \ + PERF_SAMPLE_DATA_PAGE_SIZE) + static void setup_pebs_fixed_sample_data(struct perf_event *event, struct pt_regs *iregs, void *__pebs, struct perf_sample_data *data, @@ -1364,14 +1550,18 @@ static void setup_pebs_fixed_sample_data(struct perf_event *event, /* * Use latency for weight (only avail with PEBS-LL) */ - if (fll && (sample_type & PERF_SAMPLE_WEIGHT)) - data->weight = pebs->lat; + if (fll && (sample_type & PERF_SAMPLE_WEIGHT_TYPE)) { + data->weight.full = pebs->lat; + data->sample_flags |= PERF_SAMPLE_WEIGHT_TYPE; + } /* * data.data_src encodes the data source */ - if (sample_type & PERF_SAMPLE_DATA_SRC) + if (sample_type & PERF_SAMPLE_DATA_SRC) { data->data_src.val = get_data_src(event, pebs->dse); + data->sample_flags |= PERF_SAMPLE_DATA_SRC; + } /* * We must however always use iregs for the unwinder to stay sane; the @@ -1379,8 +1569,10 @@ static void setup_pebs_fixed_sample_data(struct perf_event *event, * previous PMI context or an (I)RET happened between the record and * PMI. */ - if (sample_type & PERF_SAMPLE_CALLCHAIN) + if (sample_type & PERF_SAMPLE_CALLCHAIN) { data->callchain = perf_callchain(event, iregs); + data->sample_flags |= PERF_SAMPLE_CALLCHAIN; + } /* * We use the interrupt regs as a base because the PEBS record does not @@ -1451,18 +1643,23 @@ static void setup_pebs_fixed_sample_data(struct perf_event *event, } - if ((sample_type & (PERF_SAMPLE_ADDR | PERF_SAMPLE_PHYS_ADDR)) && - x86_pmu.intel_cap.pebs_format >= 1) + if ((sample_type & PERF_SAMPLE_ADDR_TYPE) && + x86_pmu.intel_cap.pebs_format >= 1) { data->addr = pebs->dla; + data->sample_flags |= PERF_SAMPLE_ADDR; + } if (x86_pmu.intel_cap.pebs_format >= 2) { /* Only set the TSX weight when no memory weight. */ - if ((sample_type & PERF_SAMPLE_WEIGHT) && !fll) - data->weight = intel_get_tsx_weight(pebs->tsx_tuning); - - if (sample_type & PERF_SAMPLE_TRANSACTION) + if ((sample_type & PERF_SAMPLE_WEIGHT_TYPE) && !fll) { + data->weight.full = intel_get_tsx_weight(pebs->tsx_tuning); + data->sample_flags |= PERF_SAMPLE_WEIGHT_TYPE; + } + if (sample_type & PERF_SAMPLE_TRANSACTION) { data->txn = intel_get_tsx_transaction(pebs->tsx_tuning, pebs->ax); + data->sample_flags |= PERF_SAMPLE_TRANSACTION; + } } /* @@ -1472,11 +1669,15 @@ static void setup_pebs_fixed_sample_data(struct perf_event *event, * We can only do this for the default trace clock. */ if (x86_pmu.intel_cap.pebs_format >= 3 && - event->attr.use_clockid == 0) + event->attr.use_clockid == 0) { data->time = native_sched_clock_from_tsc(pebs->tsc); + data->sample_flags |= PERF_SAMPLE_TIME; + } - if (has_branch_stack(event)) + if (has_branch_stack(event)) { data->br_stack = &cpuc->lbr_stack; + data->sample_flags |= PERF_SAMPLE_BRANCH_STACK; + } } static void adaptive_pebs_save_regs(struct pt_regs *regs, @@ -1502,6 +1703,9 @@ static void adaptive_pebs_save_regs(struct pt_regs *regs, #endif } +#define PEBS_LATENCY_MASK 0xffff +#define PEBS_CACHE_LATENCY_OFFSET 32 + /* * With adaptive PEBS the layout depends on what fields are configured. */ @@ -1531,8 +1735,10 @@ static void setup_pebs_adaptive_sample_data(struct perf_event *event, perf_sample_data_init(data, 0, event->hw.last_period); data->period = event->hw.last_period; - if (event->attr.use_clockid == 0) + if (event->attr.use_clockid == 0) { data->time = native_sched_clock_from_tsc(basic->tsc); + data->sample_flags |= PERF_SAMPLE_TIME; + } /* * We must however always use iregs for the unwinder to stay sane; the @@ -1540,8 +1746,10 @@ static void setup_pebs_adaptive_sample_data(struct perf_event *event, * previous PMI context or an (I)RET happened between the record and * PMI. */ - if (sample_type & PERF_SAMPLE_CALLCHAIN) + if (sample_type & PERF_SAMPLE_CALLCHAIN) { data->callchain = perf_callchain(event, iregs); + data->sample_flags |= PERF_SAMPLE_CALLCHAIN; + } *regs = *iregs; /* The ip in basic is EventingIP */ @@ -1572,19 +1780,44 @@ static void setup_pebs_adaptive_sample_data(struct perf_event *event, } if (format_size & PEBS_DATACFG_MEMINFO) { - if (sample_type & PERF_SAMPLE_WEIGHT) - data->weight = meminfo->latency ?: - intel_get_tsx_weight(meminfo->tsx_tuning); + if (sample_type & PERF_SAMPLE_WEIGHT_TYPE) { + u64 weight = meminfo->latency; + + if (x86_pmu.flags & PMU_FL_INSTR_LATENCY) { + data->weight.var2_w = weight & PEBS_LATENCY_MASK; + weight >>= PEBS_CACHE_LATENCY_OFFSET; + } - if (sample_type & PERF_SAMPLE_DATA_SRC) + /* + * Although meminfo::latency is defined as a u64, + * only the lower 32 bits include the valid data + * in practice on Ice Lake and earlier platforms. + */ + if (sample_type & PERF_SAMPLE_WEIGHT) { + data->weight.full = weight ?: + intel_get_tsx_weight(meminfo->tsx_tuning); + } else { + data->weight.var1_dw = (u32)(weight & PEBS_LATENCY_MASK) ?: + intel_get_tsx_weight(meminfo->tsx_tuning); + } + data->sample_flags |= PERF_SAMPLE_WEIGHT_TYPE; + } + + if (sample_type & PERF_SAMPLE_DATA_SRC) { data->data_src.val = get_data_src(event, meminfo->aux); + data->sample_flags |= PERF_SAMPLE_DATA_SRC; + } - if (sample_type & (PERF_SAMPLE_ADDR | PERF_SAMPLE_PHYS_ADDR)) + if (sample_type & PERF_SAMPLE_ADDR_TYPE) { data->addr = meminfo->address; + data->sample_flags |= PERF_SAMPLE_ADDR; + } - if (sample_type & PERF_SAMPLE_TRANSACTION) + if (sample_type & PERF_SAMPLE_TRANSACTION) { data->txn = intel_get_tsx_transaction(meminfo->tsx_tuning, gprs ? gprs->ax : 0); + data->sample_flags |= PERF_SAMPLE_TRANSACTION; + } } if (format_size & PEBS_DATACFG_XMMS) { @@ -1595,14 +1828,15 @@ static void setup_pebs_adaptive_sample_data(struct perf_event *event, } if (format_size & PEBS_DATACFG_LBRS) { - struct pebs_lbr *lbr = next_record; + struct lbr_entry *lbr = next_record; int num_lbr = ((format_size >> PEBS_DATACFG_LBR_SHIFT) & 0xff) + 1; - next_record = next_record + num_lbr*sizeof(struct pebs_lbr_entry); + next_record = next_record + num_lbr * sizeof(struct lbr_entry); if (has_branch_stack(event)) { intel_pmu_store_pebs_lbrs(lbr); data->br_stack = &cpuc->lbr_stack; + data->sample_flags |= PERF_SAMPLE_BRANCH_STACK; } } @@ -1689,7 +1923,7 @@ intel_pmu_save_and_restart_reload(struct perf_event *event, int count) * * [-period, 0] * - * the difference between two consequtive reads is: + * the difference between two consecutive reads is: * * A) value2 - value1; * when no overflows have happened in between, @@ -1721,22 +1955,24 @@ intel_pmu_save_and_restart_reload(struct perf_event *event, int count) return 0; } -static void __intel_pmu_pebs_event(struct perf_event *event, - struct pt_regs *iregs, - void *base, void *top, - int bit, int count, - void (*setup_sample)(struct perf_event *, - struct pt_regs *, - void *, - struct perf_sample_data *, - struct pt_regs *)) +static __always_inline void +__intel_pmu_pebs_event(struct perf_event *event, + struct pt_regs *iregs, + struct perf_sample_data *data, + void *base, void *top, + int bit, int count, + void (*setup_sample)(struct perf_event *, + struct pt_regs *, + void *, + struct perf_sample_data *, + struct pt_regs *)) { struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events); struct hw_perf_event *hwc = &event->hw; - struct perf_sample_data data; struct x86_perf_regs perf_regs; struct pt_regs *regs = &perf_regs.regs; void *at = get_next_pebs_record_by_bit(base, top, bit); + static struct pt_regs dummy_iregs; if (hwc->flags & PERF_X86_EVENT_AUTO_RELOAD) { /* @@ -1749,28 +1985,37 @@ static void __intel_pmu_pebs_event(struct perf_event *event, } else if (!intel_pmu_save_and_restart(event)) return; + if (!iregs) + iregs = &dummy_iregs; + while (count > 1) { - setup_sample(event, iregs, at, &data, regs); - perf_event_output(event, &data, regs); + setup_sample(event, iregs, at, data, regs); + perf_event_output(event, data, regs); at += cpuc->pebs_record_size; at = get_next_pebs_record_by_bit(at, top, bit); count--; } - setup_sample(event, iregs, at, &data, regs); - - /* - * All but the last records are processed. - * The last one is left to be able to call the overflow handler. - */ - if (perf_event_overflow(event, &data, regs)) { - x86_pmu_stop(event, 0); - return; + setup_sample(event, iregs, at, data, regs); + if (iregs == &dummy_iregs) { + /* + * The PEBS records may be drained in the non-overflow context, + * e.g., large PEBS + context switch. Perf should treat the + * last record the same as other PEBS records, and doesn't + * invoke the generic overflow handler. + */ + perf_event_output(event, data, regs); + } else { + /* + * All but the last records are processed. + * The last one is left to be able to call the overflow handler. + */ + if (perf_event_overflow(event, data, regs)) + x86_pmu_stop(event, 0); } - } -static void intel_pmu_drain_pebs_core(struct pt_regs *iregs) +static void intel_pmu_drain_pebs_core(struct pt_regs *iregs, struct perf_sample_data *data) { struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events); struct debug_store *ds = cpuc->ds; @@ -1804,7 +2049,7 @@ static void intel_pmu_drain_pebs_core(struct pt_regs *iregs) return; } - __intel_pmu_pebs_event(event, iregs, at, top, 0, n, + __intel_pmu_pebs_event(event, iregs, data, at, top, 0, n, setup_pebs_fixed_sample_data); } @@ -1827,7 +2072,7 @@ static void intel_pmu_pebs_event_update_no_drain(struct cpu_hw_events *cpuc, int } } -static void intel_pmu_drain_pebs_nhm(struct pt_regs *iregs) +static void intel_pmu_drain_pebs_nhm(struct pt_regs *iregs, struct perf_sample_data *data) { struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events); struct debug_store *ds = cpuc->ds; @@ -1883,7 +2128,7 @@ static void intel_pmu_drain_pebs_nhm(struct pt_regs *iregs) */ if (!pebs_status && cpuc->pebs_enabled && !(cpuc->pebs_enabled & (cpuc->pebs_enabled-1))) - pebs_status = cpuc->pebs_enabled; + pebs_status = p->status = cpuc->pebs_enabled; bit = find_first_bit((unsigned long *)&pebs_status, x86_pmu.max_pebs_events); @@ -1905,7 +2150,7 @@ static void intel_pmu_drain_pebs_nhm(struct pt_regs *iregs) * that caused the PEBS record. It's called collision. * If collision happened, the record will be dropped. */ - if (p->status != (1ULL << bit)) { + if (pebs_status != (1ULL << bit)) { for_each_set_bit(i, (unsigned long *)&pebs_status, size) error[i]++; continue; @@ -1929,22 +2174,24 @@ static void intel_pmu_drain_pebs_nhm(struct pt_regs *iregs) if (error[bit]) { perf_log_lost_samples(event, error[bit]); - if (perf_event_account_interrupt(event)) + if (iregs && perf_event_account_interrupt(event)) x86_pmu_stop(event, 0); } if (counts[bit]) { - __intel_pmu_pebs_event(event, iregs, base, + __intel_pmu_pebs_event(event, iregs, data, base, top, bit, counts[bit], setup_pebs_fixed_sample_data); } } } -static void intel_pmu_drain_pebs_icl(struct pt_regs *iregs) +static void intel_pmu_drain_pebs_icl(struct pt_regs *iregs, struct perf_sample_data *data) { short counts[INTEL_PMC_IDX_FIXED + MAX_FIXED_PEBS_EVENTS] = {}; struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events); + int max_pebs_events = hybrid(cpuc->pmu, max_pebs_events); + int num_counters_fixed = hybrid(cpuc->pmu, num_counters_fixed); struct debug_store *ds = cpuc->ds; struct perf_event *event; void *base, *at, *top; @@ -1959,9 +2206,9 @@ static void intel_pmu_drain_pebs_icl(struct pt_regs *iregs) ds->pebs_index = ds->pebs_buffer_base; - mask = ((1ULL << x86_pmu.max_pebs_events) - 1) | - (((1ULL << x86_pmu.num_counters_fixed) - 1) << INTEL_PMC_IDX_FIXED); - size = INTEL_PMC_IDX_FIXED + x86_pmu.num_counters_fixed; + mask = ((1ULL << max_pebs_events) - 1) | + (((1ULL << num_counters_fixed) - 1) << INTEL_PMC_IDX_FIXED); + size = INTEL_PMC_IDX_FIXED + num_counters_fixed; if (unlikely(base >= top)) { intel_pmu_pebs_event_update_no_drain(cpuc, size); @@ -1989,7 +2236,7 @@ static void intel_pmu_drain_pebs_icl(struct pt_regs *iregs) if (WARN_ON_ONCE(!event->attr.precise_ip)) continue; - __intel_pmu_pebs_event(event, iregs, base, + __intel_pmu_pebs_event(event, iregs, data, base, top, bit, counts[bit], setup_pebs_adaptive_sample_data); } @@ -2057,6 +2304,7 @@ void __init intel_ds_init(void) break; case 4: + case 5: x86_pmu.drain_pebs = intel_pmu_drain_pebs_icl; x86_pmu.pebs_record_size = sizeof(struct pebs_basic); if (x86_pmu.intel_cap.pebs_baseline) { @@ -2064,8 +2312,9 @@ void __init intel_ds_init(void) PERF_SAMPLE_BRANCH_STACK | PERF_SAMPLE_TIME; x86_pmu.flags |= PMU_FL_PEBS_ALL; + x86_pmu.pebs_capable = ~0ULL; pebs_qual = "-baseline"; - x86_get_pmu()->capabilities |= PERF_PMU_CAP_EXTENDED_REGS; + x86_get_pmu(smp_processor_id())->capabilities |= PERF_PMU_CAP_EXTENDED_REGS; } else { /* Only basic record supported */ x86_pmu.large_pebs_flags &= @@ -2078,9 +2327,9 @@ void __init intel_ds_init(void) } pr_cont("PEBS fmt4%c%s, ", pebs_type, pebs_qual); - if (x86_pmu.intel_cap.pebs_output_pt_available) { + if (!is_hybrid() && x86_pmu.intel_cap.pebs_output_pt_available) { pr_cont("PEBS-via-PT, "); - x86_get_pmu()->capabilities |= PERF_PMU_CAP_AUX_OUTPUT; + x86_get_pmu(smp_processor_id())->capabilities |= PERF_PMU_CAP_AUX_OUTPUT; } break; diff --git a/arch/x86/events/intel/lbr.c b/arch/x86/events/intel/lbr.c index 534c76606049..8259d725054d 100644 --- a/arch/x86/events/intel/lbr.c +++ b/arch/x86/events/intel/lbr.c @@ -4,29 +4,9 @@ #include <asm/perf_event.h> #include <asm/msr.h> -#include <asm/insn.h> #include "../perf_event.h" -enum { - LBR_FORMAT_32 = 0x00, - LBR_FORMAT_LIP = 0x01, - LBR_FORMAT_EIP = 0x02, - LBR_FORMAT_EIP_FLAGS = 0x03, - LBR_FORMAT_EIP_FLAGS2 = 0x04, - LBR_FORMAT_INFO = 0x05, - LBR_FORMAT_TIME = 0x06, - LBR_FORMAT_MAX_KNOWN = LBR_FORMAT_TIME, -}; - -static const enum { - LBR_EIP_FLAGS = 1, - LBR_TSX = 2, -} lbr_desc[LBR_FORMAT_MAX_KNOWN + 1] = { - [LBR_FORMAT_EIP_FLAGS] = LBR_EIP_FLAGS, - [LBR_FORMAT_EIP_FLAGS2] = LBR_EIP_FLAGS | LBR_TSX, -}; - /* * Intel LBR_SELECT bits * Intel Vol3a, April 2011, Section 16.7 Table 16-10 @@ -85,65 +65,52 @@ static const enum { #define LBR_FROM_SIGNEXT_2MSB (BIT_ULL(60) | BIT_ULL(59)) /* - * x86control flow change classification - * x86control flow changes include branches, interrupts, traps, faults + * Intel LBR_CTL bits + * + * Hardware branch filter for Arch LBR */ -enum { - X86_BR_NONE = 0, /* unknown */ - - X86_BR_USER = 1 << 0, /* branch target is user */ - X86_BR_KERNEL = 1 << 1, /* branch target is kernel */ - - X86_BR_CALL = 1 << 2, /* call */ - X86_BR_RET = 1 << 3, /* return */ - X86_BR_SYSCALL = 1 << 4, /* syscall */ - X86_BR_SYSRET = 1 << 5, /* syscall return */ - X86_BR_INT = 1 << 6, /* sw interrupt */ - X86_BR_IRET = 1 << 7, /* return from interrupt */ - X86_BR_JCC = 1 << 8, /* conditional */ - X86_BR_JMP = 1 << 9, /* jump */ - X86_BR_IRQ = 1 << 10,/* hw interrupt or trap or fault */ - X86_BR_IND_CALL = 1 << 11,/* indirect calls */ - X86_BR_ABORT = 1 << 12,/* transaction abort */ - X86_BR_IN_TX = 1 << 13,/* in transaction */ - X86_BR_NO_TX = 1 << 14,/* not in transaction */ - X86_BR_ZERO_CALL = 1 << 15,/* zero length call */ - X86_BR_CALL_STACK = 1 << 16,/* call stack */ - X86_BR_IND_JMP = 1 << 17,/* indirect jump */ - - X86_BR_TYPE_SAVE = 1 << 18,/* indicate to save branch type */ +#define ARCH_LBR_KERNEL_BIT 1 /* capture at ring0 */ +#define ARCH_LBR_USER_BIT 2 /* capture at ring > 0 */ +#define ARCH_LBR_CALL_STACK_BIT 3 /* enable call stack */ +#define ARCH_LBR_JCC_BIT 16 /* capture conditional branches */ +#define ARCH_LBR_REL_JMP_BIT 17 /* capture relative jumps */ +#define ARCH_LBR_IND_JMP_BIT 18 /* capture indirect jumps */ +#define ARCH_LBR_REL_CALL_BIT 19 /* capture relative calls */ +#define ARCH_LBR_IND_CALL_BIT 20 /* capture indirect calls */ +#define ARCH_LBR_RETURN_BIT 21 /* capture near returns */ +#define ARCH_LBR_OTHER_BRANCH_BIT 22 /* capture other branches */ + +#define ARCH_LBR_KERNEL (1ULL << ARCH_LBR_KERNEL_BIT) +#define ARCH_LBR_USER (1ULL << ARCH_LBR_USER_BIT) +#define ARCH_LBR_CALL_STACK (1ULL << ARCH_LBR_CALL_STACK_BIT) +#define ARCH_LBR_JCC (1ULL << ARCH_LBR_JCC_BIT) +#define ARCH_LBR_REL_JMP (1ULL << ARCH_LBR_REL_JMP_BIT) +#define ARCH_LBR_IND_JMP (1ULL << ARCH_LBR_IND_JMP_BIT) +#define ARCH_LBR_REL_CALL (1ULL << ARCH_LBR_REL_CALL_BIT) +#define ARCH_LBR_IND_CALL (1ULL << ARCH_LBR_IND_CALL_BIT) +#define ARCH_LBR_RETURN (1ULL << ARCH_LBR_RETURN_BIT) +#define ARCH_LBR_OTHER_BRANCH (1ULL << ARCH_LBR_OTHER_BRANCH_BIT) + +#define ARCH_LBR_ANY \ + (ARCH_LBR_JCC |\ + ARCH_LBR_REL_JMP |\ + ARCH_LBR_IND_JMP |\ + ARCH_LBR_REL_CALL |\ + ARCH_LBR_IND_CALL |\ + ARCH_LBR_RETURN |\ + ARCH_LBR_OTHER_BRANCH) + +#define ARCH_LBR_CTL_MASK 0x7f000e -}; +static void intel_pmu_lbr_filter(struct cpu_hw_events *cpuc); -#define X86_BR_PLM (X86_BR_USER | X86_BR_KERNEL) -#define X86_BR_ANYTX (X86_BR_NO_TX | X86_BR_IN_TX) - -#define X86_BR_ANY \ - (X86_BR_CALL |\ - X86_BR_RET |\ - X86_BR_SYSCALL |\ - X86_BR_SYSRET |\ - X86_BR_INT |\ - X86_BR_IRET |\ - X86_BR_JCC |\ - X86_BR_JMP |\ - X86_BR_IRQ |\ - X86_BR_ABORT |\ - X86_BR_IND_CALL |\ - X86_BR_IND_JMP |\ - X86_BR_ZERO_CALL) - -#define X86_BR_ALL (X86_BR_PLM | X86_BR_ANY) - -#define X86_BR_ANY_CALL \ - (X86_BR_CALL |\ - X86_BR_IND_CALL |\ - X86_BR_ZERO_CALL |\ - X86_BR_SYSCALL |\ - X86_BR_IRQ |\ - X86_BR_INT) +static __always_inline bool is_lbr_call_stack_bit_set(u64 config) +{ + if (static_cpu_has(X86_FEATURE_ARCH_LBR)) + return !!(config & ARCH_LBR_CALL_STACK); -static void intel_pmu_lbr_filter(struct cpu_hw_events *cpuc); + return !!(config & LBR_CALL_STACK); +} /* * We only support LBR implementations that have FREEZE_LBRS_ON_PMI @@ -168,33 +135,32 @@ static void __intel_pmu_lbr_enable(bool pmi) */ if (cpuc->lbr_sel) lbr_select = cpuc->lbr_sel->config & x86_pmu.lbr_sel_mask; - if (!pmi && cpuc->lbr_sel) + if (!static_cpu_has(X86_FEATURE_ARCH_LBR) && !pmi && cpuc->lbr_sel) wrmsrl(MSR_LBR_SELECT, lbr_select); rdmsrl(MSR_IA32_DEBUGCTLMSR, debugctl); orig_debugctl = debugctl; - debugctl |= DEBUGCTLMSR_LBR; + + if (!static_cpu_has(X86_FEATURE_ARCH_LBR)) + debugctl |= DEBUGCTLMSR_LBR; /* * LBR callstack does not work well with FREEZE_LBRS_ON_PMI. * If FREEZE_LBRS_ON_PMI is set, PMI near call/return instructions * may cause superfluous increase/decrease of LBR_TOS. */ - if (!(lbr_select & LBR_CALL_STACK)) + if (is_lbr_call_stack_bit_set(lbr_select)) + debugctl &= ~DEBUGCTLMSR_FREEZE_LBRS_ON_PMI; + else debugctl |= DEBUGCTLMSR_FREEZE_LBRS_ON_PMI; + if (orig_debugctl != debugctl) wrmsrl(MSR_IA32_DEBUGCTLMSR, debugctl); -} -static void __intel_pmu_lbr_disable(void) -{ - u64 debugctl; - - rdmsrl(MSR_IA32_DEBUGCTLMSR, debugctl); - debugctl &= ~(DEBUGCTLMSR_LBR | DEBUGCTLMSR_FREEZE_LBRS_ON_PMI); - wrmsrl(MSR_IA32_DEBUGCTLMSR, debugctl); + if (static_cpu_has(X86_FEATURE_ARCH_LBR)) + wrmsrl(MSR_ARCH_LBR_CTL, lbr_select | ARCH_LBR_CTL_LBREN); } -static void intel_pmu_lbr_reset_32(void) +void intel_pmu_lbr_reset_32(void) { int i; @@ -202,18 +168,24 @@ static void intel_pmu_lbr_reset_32(void) wrmsrl(x86_pmu.lbr_from + i, 0); } -static void intel_pmu_lbr_reset_64(void) +void intel_pmu_lbr_reset_64(void) { int i; for (i = 0; i < x86_pmu.lbr_nr; i++) { wrmsrl(x86_pmu.lbr_from + i, 0); wrmsrl(x86_pmu.lbr_to + i, 0); - if (x86_pmu.intel_cap.lbr_format == LBR_FORMAT_INFO) - wrmsrl(MSR_LBR_INFO_0 + i, 0); + if (x86_pmu.lbr_has_info) + wrmsrl(x86_pmu.lbr_info + i, 0); } } +static void intel_pmu_arch_lbr_reset(void) +{ + /* Write to ARCH_LBR_DEPTH MSR, all LBR entries are reset to 0 */ + wrmsrl(MSR_ARCH_LBR_DEPTH, x86_pmu.lbr_nr); +} + void intel_pmu_lbr_reset(void) { struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events); @@ -221,13 +193,12 @@ void intel_pmu_lbr_reset(void) if (!x86_pmu.lbr_nr) return; - if (x86_pmu.intel_cap.lbr_format == LBR_FORMAT_32) - intel_pmu_lbr_reset_32(); - else - intel_pmu_lbr_reset_64(); + x86_pmu.lbr_reset(); cpuc->last_task_ctx = NULL; cpuc->last_log_id = 0; + if (!static_cpu_has(X86_FEATURE_ARCH_LBR) && cpuc->lbr_select) + wrmsrl(MSR_LBR_SELECT, 0); } /* @@ -247,9 +218,9 @@ enum { }; /* - * For formats with LBR_TSX flags (e.g. LBR_FORMAT_EIP_FLAGS2), bits 61:62 in - * MSR_LAST_BRANCH_FROM_x are the TSX flags when TSX is supported, but when - * TSX is not supported they have no consistent behavior: + * For format LBR_FORMAT_EIP_FLAGS2, bits 61:62 in MSR_LAST_BRANCH_FROM_x + * are the TSX flags when TSX is supported, but when TSX is not supported + * they have no consistent behavior: * * - For wrmsr(), bits 61:62 are considered part of the sign extension. * - For HW updates (branch captures) bits 61:62 are always OFF and are not @@ -257,7 +228,7 @@ enum { * * Therefore, if: * - * 1) LBR has TSX format + * 1) LBR format LBR_FORMAT_EIP_FLAGS2 * 2) CPU has no TSX support enabled * * ... then any value passed to wrmsr() must be sign extended to 63 bits and any @@ -266,11 +237,10 @@ enum { */ static inline bool lbr_from_signext_quirk_needed(void) { - int lbr_format = x86_pmu.intel_cap.lbr_format; bool tsx_support = boot_cpu_has(X86_FEATURE_HLE) || boot_cpu_has(X86_FEATURE_RTM); - return !tsx_support && (lbr_desc[lbr_format] & LBR_TSX); + return !tsx_support; } static DEFINE_STATIC_KEY_FALSE(lbr_from_quirk_key); @@ -308,119 +278,247 @@ static u64 lbr_from_signext_quirk_rd(u64 val) return val; } -static inline void wrlbr_from(unsigned int idx, u64 val) +static __always_inline void wrlbr_from(unsigned int idx, u64 val) { val = lbr_from_signext_quirk_wr(val); wrmsrl(x86_pmu.lbr_from + idx, val); } -static inline void wrlbr_to(unsigned int idx, u64 val) +static __always_inline void wrlbr_to(unsigned int idx, u64 val) { wrmsrl(x86_pmu.lbr_to + idx, val); } -static inline u64 rdlbr_from(unsigned int idx) +static __always_inline void wrlbr_info(unsigned int idx, u64 val) +{ + wrmsrl(x86_pmu.lbr_info + idx, val); +} + +static __always_inline u64 rdlbr_from(unsigned int idx, struct lbr_entry *lbr) { u64 val; + if (lbr) + return lbr->from; + rdmsrl(x86_pmu.lbr_from + idx, val); return lbr_from_signext_quirk_rd(val); } -static inline u64 rdlbr_to(unsigned int idx) +static __always_inline u64 rdlbr_to(unsigned int idx, struct lbr_entry *lbr) { u64 val; + if (lbr) + return lbr->to; + rdmsrl(x86_pmu.lbr_to + idx, val); return val; } -static void __intel_pmu_lbr_restore(struct x86_perf_task_context *task_ctx) +static __always_inline u64 rdlbr_info(unsigned int idx, struct lbr_entry *lbr) { - struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events); - int i; - unsigned lbr_idx, mask; - u64 tos; + u64 val; - if (task_ctx->lbr_callstack_users == 0 || - task_ctx->lbr_stack_state == LBR_NONE) { - intel_pmu_lbr_reset(); - return; - } + if (lbr) + return lbr->info; - tos = task_ctx->tos; - /* - * Does not restore the LBR registers, if - * - No one else touched them, and - * - Did not enter C6 - */ - if ((task_ctx == cpuc->last_task_ctx) && - (task_ctx->log_id == cpuc->last_log_id) && - rdlbr_from(tos)) { - task_ctx->lbr_stack_state = LBR_NONE; - return; - } + rdmsrl(x86_pmu.lbr_info + idx, val); + + return val; +} + +static inline void +wrlbr_all(struct lbr_entry *lbr, unsigned int idx, bool need_info) +{ + wrlbr_from(idx, lbr->from); + wrlbr_to(idx, lbr->to); + if (need_info) + wrlbr_info(idx, lbr->info); +} + +static inline bool +rdlbr_all(struct lbr_entry *lbr, unsigned int idx, bool need_info) +{ + u64 from = rdlbr_from(idx, NULL); + + /* Don't read invalid entry */ + if (!from) + return false; + + lbr->from = from; + lbr->to = rdlbr_to(idx, NULL); + if (need_info) + lbr->info = rdlbr_info(idx, NULL); + + return true; +} + +void intel_pmu_lbr_restore(void *ctx) +{ + struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events); + struct x86_perf_task_context *task_ctx = ctx; + bool need_info = x86_pmu.lbr_has_info; + u64 tos = task_ctx->tos; + unsigned lbr_idx, mask; + int i; mask = x86_pmu.lbr_nr - 1; for (i = 0; i < task_ctx->valid_lbrs; i++) { lbr_idx = (tos - i) & mask; - wrlbr_from(lbr_idx, task_ctx->lbr_from[i]); - wrlbr_to (lbr_idx, task_ctx->lbr_to[i]); - - if (x86_pmu.intel_cap.lbr_format == LBR_FORMAT_INFO) - wrmsrl(MSR_LBR_INFO_0 + lbr_idx, task_ctx->lbr_info[i]); + wrlbr_all(&task_ctx->lbr[i], lbr_idx, need_info); } for (; i < x86_pmu.lbr_nr; i++) { lbr_idx = (tos - i) & mask; wrlbr_from(lbr_idx, 0); wrlbr_to(lbr_idx, 0); - if (x86_pmu.intel_cap.lbr_format == LBR_FORMAT_INFO) - wrmsrl(MSR_LBR_INFO_0 + lbr_idx, 0); + if (need_info) + wrlbr_info(lbr_idx, 0); } wrmsrl(x86_pmu.lbr_tos, tos); - task_ctx->lbr_stack_state = LBR_NONE; + + if (cpuc->lbr_select) + wrmsrl(MSR_LBR_SELECT, task_ctx->lbr_sel); } -static void __intel_pmu_lbr_save(struct x86_perf_task_context *task_ctx) +static void intel_pmu_arch_lbr_restore(void *ctx) { - struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events); - unsigned lbr_idx, mask; - u64 tos, from; + struct x86_perf_task_context_arch_lbr *task_ctx = ctx; + struct lbr_entry *entries = task_ctx->entries; int i; - if (task_ctx->lbr_callstack_users == 0) { - task_ctx->lbr_stack_state = LBR_NONE; + /* Fast reset the LBRs before restore if the call stack is not full. */ + if (!entries[x86_pmu.lbr_nr - 1].from) + intel_pmu_arch_lbr_reset(); + + for (i = 0; i < x86_pmu.lbr_nr; i++) { + if (!entries[i].from) + break; + wrlbr_all(&entries[i], i, true); + } +} + +/* + * Restore the Architecture LBR state from the xsave area in the perf + * context data for the task via the XRSTORS instruction. + */ +static void intel_pmu_arch_lbr_xrstors(void *ctx) +{ + struct x86_perf_task_context_arch_lbr_xsave *task_ctx = ctx; + + xrstors(&task_ctx->xsave, XFEATURE_MASK_LBR); +} + +static __always_inline bool lbr_is_reset_in_cstate(void *ctx) +{ + if (static_cpu_has(X86_FEATURE_ARCH_LBR)) + return x86_pmu.lbr_deep_c_reset && !rdlbr_from(0, NULL); + + return !rdlbr_from(((struct x86_perf_task_context *)ctx)->tos, NULL); +} + +static void __intel_pmu_lbr_restore(void *ctx) +{ + struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events); + + if (task_context_opt(ctx)->lbr_callstack_users == 0 || + task_context_opt(ctx)->lbr_stack_state == LBR_NONE) { + intel_pmu_lbr_reset(); + return; + } + + /* + * Does not restore the LBR registers, if + * - No one else touched them, and + * - Was not cleared in Cstate + */ + if ((ctx == cpuc->last_task_ctx) && + (task_context_opt(ctx)->log_id == cpuc->last_log_id) && + !lbr_is_reset_in_cstate(ctx)) { + task_context_opt(ctx)->lbr_stack_state = LBR_NONE; return; } + x86_pmu.lbr_restore(ctx); + + task_context_opt(ctx)->lbr_stack_state = LBR_NONE; +} + +void intel_pmu_lbr_save(void *ctx) +{ + struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events); + struct x86_perf_task_context *task_ctx = ctx; + bool need_info = x86_pmu.lbr_has_info; + unsigned lbr_idx, mask; + u64 tos; + int i; + mask = x86_pmu.lbr_nr - 1; tos = intel_pmu_lbr_tos(); for (i = 0; i < x86_pmu.lbr_nr; i++) { lbr_idx = (tos - i) & mask; - from = rdlbr_from(lbr_idx); - if (!from) + if (!rdlbr_all(&task_ctx->lbr[i], lbr_idx, need_info)) break; - task_ctx->lbr_from[i] = from; - task_ctx->lbr_to[i] = rdlbr_to(lbr_idx); - if (x86_pmu.intel_cap.lbr_format == LBR_FORMAT_INFO) - rdmsrl(MSR_LBR_INFO_0 + lbr_idx, task_ctx->lbr_info[i]); } task_ctx->valid_lbrs = i; task_ctx->tos = tos; - task_ctx->lbr_stack_state = LBR_VALID; - cpuc->last_task_ctx = task_ctx; - cpuc->last_log_id = ++task_ctx->log_id; + if (cpuc->lbr_select) + rdmsrl(MSR_LBR_SELECT, task_ctx->lbr_sel); +} + +static void intel_pmu_arch_lbr_save(void *ctx) +{ + struct x86_perf_task_context_arch_lbr *task_ctx = ctx; + struct lbr_entry *entries = task_ctx->entries; + int i; + + for (i = 0; i < x86_pmu.lbr_nr; i++) { + if (!rdlbr_all(&entries[i], i, true)) + break; + } + + /* LBR call stack is not full. Reset is required in restore. */ + if (i < x86_pmu.lbr_nr) + entries[x86_pmu.lbr_nr - 1].from = 0; +} + +/* + * Save the Architecture LBR state to the xsave area in the perf + * context data for the task via the XSAVES instruction. + */ +static void intel_pmu_arch_lbr_xsaves(void *ctx) +{ + struct x86_perf_task_context_arch_lbr_xsave *task_ctx = ctx; + + xsaves(&task_ctx->xsave, XFEATURE_MASK_LBR); +} + +static void __intel_pmu_lbr_save(void *ctx) +{ + struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events); + + if (task_context_opt(ctx)->lbr_callstack_users == 0) { + task_context_opt(ctx)->lbr_stack_state = LBR_NONE; + return; + } + + x86_pmu.lbr_save(ctx); + + task_context_opt(ctx)->lbr_stack_state = LBR_VALID; + + cpuc->last_task_ctx = ctx; + cpuc->last_log_id = ++task_context_opt(ctx)->log_id; } void intel_pmu_lbr_swap_task_ctx(struct perf_event_context *prev, struct perf_event_context *next) { - struct x86_perf_task_context *prev_ctx_data, *next_ctx_data; + void *prev_ctx_data, *next_ctx_data; swap(prev->task_ctx_data, next->task_ctx_data); @@ -436,14 +534,14 @@ void intel_pmu_lbr_swap_task_ctx(struct perf_event_context *prev, if (!prev_ctx_data || !next_ctx_data) return; - swap(prev_ctx_data->lbr_callstack_users, - next_ctx_data->lbr_callstack_users); + swap(task_context_opt(prev_ctx_data)->lbr_callstack_users, + task_context_opt(next_ctx_data)->lbr_callstack_users); } void intel_pmu_lbr_sched_task(struct perf_event_context *ctx, bool sched_in) { struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events); - struct x86_perf_task_context *task_ctx; + void *task_ctx; if (!cpuc->lbr_users) return; @@ -480,17 +578,17 @@ static inline bool branch_user_callstack(unsigned br_sel) void intel_pmu_lbr_add(struct perf_event *event) { struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events); - struct x86_perf_task_context *task_ctx; if (!x86_pmu.lbr_nr) return; + if (event->hw.flags & PERF_X86_EVENT_LBR_SELECT) + cpuc->lbr_select = 1; + cpuc->br_sel = event->hw.branch_reg.reg; - if (branch_user_callstack(cpuc->br_sel) && event->ctx->task_ctx_data) { - task_ctx = event->ctx->task_ctx_data; - task_ctx->lbr_callstack_users++; - } + if (branch_user_callstack(cpuc->br_sel) && event->ctx->task_ctx_data) + task_context_opt(event->ctx->task_ctx_data)->lbr_callstack_users++; /* * Request pmu::sched_task() callback, which will fire inside the @@ -518,19 +616,59 @@ void intel_pmu_lbr_add(struct perf_event *event) intel_pmu_lbr_reset(); } +void release_lbr_buffers(void) +{ + struct kmem_cache *kmem_cache; + struct cpu_hw_events *cpuc; + int cpu; + + if (!static_cpu_has(X86_FEATURE_ARCH_LBR)) + return; + + for_each_possible_cpu(cpu) { + cpuc = per_cpu_ptr(&cpu_hw_events, cpu); + kmem_cache = x86_get_pmu(cpu)->task_ctx_cache; + if (kmem_cache && cpuc->lbr_xsave) { + kmem_cache_free(kmem_cache, cpuc->lbr_xsave); + cpuc->lbr_xsave = NULL; + } + } +} + +void reserve_lbr_buffers(void) +{ + struct kmem_cache *kmem_cache; + struct cpu_hw_events *cpuc; + int cpu; + + if (!static_cpu_has(X86_FEATURE_ARCH_LBR)) + return; + + for_each_possible_cpu(cpu) { + cpuc = per_cpu_ptr(&cpu_hw_events, cpu); + kmem_cache = x86_get_pmu(cpu)->task_ctx_cache; + if (!kmem_cache || cpuc->lbr_xsave) + continue; + + cpuc->lbr_xsave = kmem_cache_alloc_node(kmem_cache, + GFP_KERNEL | __GFP_ZERO, + cpu_to_node(cpu)); + } +} + void intel_pmu_lbr_del(struct perf_event *event) { struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events); - struct x86_perf_task_context *task_ctx; if (!x86_pmu.lbr_nr) return; if (branch_user_callstack(cpuc->br_sel) && - event->ctx->task_ctx_data) { - task_ctx = event->ctx->task_ctx_data; - task_ctx->lbr_callstack_users--; - } + event->ctx->task_ctx_data) + task_context_opt(event->ctx->task_ctx_data)->lbr_callstack_users--; + + if (event->hw.flags & PERF_X86_EVENT_LBR_SELECT) + cpuc->lbr_select = 0; if (x86_pmu.intel_cap.pebs_baseline && event->attr.precise_ip > 0) cpuc->lbr_pebs_users--; @@ -540,11 +678,19 @@ void intel_pmu_lbr_del(struct perf_event *event) perf_sched_cb_dec(event->ctx->pmu); } +static inline bool vlbr_exclude_host(void) +{ + struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events); + + return test_bit(INTEL_PMC_IDX_FIXED_VLBR, + (unsigned long *)&cpuc->intel_ctrl_guest_mask); +} + void intel_pmu_lbr_enable_all(bool pmi) { struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events); - if (cpuc->lbr_users) + if (cpuc->lbr_users && !vlbr_exclude_host()) __intel_pmu_lbr_enable(pmi); } @@ -552,13 +698,18 @@ void intel_pmu_lbr_disable_all(void) { struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events); - if (cpuc->lbr_users) + if (cpuc->lbr_users && !vlbr_exclude_host()) { + if (static_cpu_has(X86_FEATURE_ARCH_LBR)) + return __intel_pmu_arch_lbr_disable(); + __intel_pmu_lbr_disable(); + } } -static void intel_pmu_lbr_read_32(struct cpu_hw_events *cpuc) +void intel_pmu_lbr_read_32(struct cpu_hw_events *cpuc) { unsigned long mask = x86_pmu.lbr_nr - 1; + struct perf_branch_entry *br = cpuc->lbr_entries; u64 tos = intel_pmu_lbr_tos(); int i; @@ -574,17 +725,14 @@ static void intel_pmu_lbr_read_32(struct cpu_hw_events *cpuc) rdmsrl(x86_pmu.lbr_from + lbr_idx, msr_lastbranch.lbr); - cpuc->lbr_entries[i].from = msr_lastbranch.from; - cpuc->lbr_entries[i].to = msr_lastbranch.to; - cpuc->lbr_entries[i].mispred = 0; - cpuc->lbr_entries[i].predicted = 0; - cpuc->lbr_entries[i].in_tx = 0; - cpuc->lbr_entries[i].abort = 0; - cpuc->lbr_entries[i].cycles = 0; - cpuc->lbr_entries[i].type = 0; - cpuc->lbr_entries[i].reserved = 0; + perf_clear_branch_entry_bitfields(br); + + br->from = msr_lastbranch.from; + br->to = msr_lastbranch.to; + br++; } cpuc->lbr_stack.nr = i; + cpuc->lbr_stack.hw_idx = tos; } /* @@ -592,11 +740,11 @@ static void intel_pmu_lbr_read_32(struct cpu_hw_events *cpuc) * is the same as the linear address, allowing us to merge the LIP and EIP * LBR formats. */ -static void intel_pmu_lbr_read_64(struct cpu_hw_events *cpuc) +void intel_pmu_lbr_read_64(struct cpu_hw_events *cpuc) { bool need_info = false, call_stack = false; unsigned long mask = x86_pmu.lbr_nr - 1; - int lbr_format = x86_pmu.intel_cap.lbr_format; + struct perf_branch_entry *br = cpuc->lbr_entries; u64 tos = intel_pmu_lbr_tos(); int i; int out = 0; @@ -611,12 +759,10 @@ static void intel_pmu_lbr_read_64(struct cpu_hw_events *cpuc) for (i = 0; i < num; i++) { unsigned long lbr_idx = (tos - i) & mask; u64 from, to, mis = 0, pred = 0, in_tx = 0, abort = 0; - int skip = 0; u16 cycles = 0; - int lbr_flags = lbr_desc[lbr_format]; - from = rdlbr_from(lbr_idx); - to = rdlbr_to(lbr_idx); + from = rdlbr_from(lbr_idx, NULL); + to = rdlbr_to(lbr_idx, NULL); /* * Read LBR call stack entries @@ -625,37 +771,39 @@ static void intel_pmu_lbr_read_64(struct cpu_hw_events *cpuc) if (call_stack && !from) break; - if (lbr_format == LBR_FORMAT_INFO && need_info) { - u64 info; - - rdmsrl(MSR_LBR_INFO_0 + lbr_idx, info); - mis = !!(info & LBR_INFO_MISPRED); - pred = !mis; - in_tx = !!(info & LBR_INFO_IN_TX); - abort = !!(info & LBR_INFO_ABORT); - cycles = (info & LBR_INFO_CYCLES); - } - - if (lbr_format == LBR_FORMAT_TIME) { - mis = !!(from & LBR_FROM_FLAG_MISPRED); - pred = !mis; - skip = 1; - cycles = ((to >> 48) & LBR_INFO_CYCLES); - - to = (u64)((((s64)to) << 16) >> 16); - } - - if (lbr_flags & LBR_EIP_FLAGS) { - mis = !!(from & LBR_FROM_FLAG_MISPRED); - pred = !mis; - skip = 1; - } - if (lbr_flags & LBR_TSX) { - in_tx = !!(from & LBR_FROM_FLAG_IN_TX); - abort = !!(from & LBR_FROM_FLAG_ABORT); - skip = 3; + if (x86_pmu.lbr_has_info) { + if (need_info) { + u64 info; + + info = rdlbr_info(lbr_idx, NULL); + mis = !!(info & LBR_INFO_MISPRED); + pred = !mis; + cycles = (info & LBR_INFO_CYCLES); + if (x86_pmu.lbr_has_tsx) { + in_tx = !!(info & LBR_INFO_IN_TX); + abort = !!(info & LBR_INFO_ABORT); + } + } + } else { + int skip = 0; + + if (x86_pmu.lbr_from_flags) { + mis = !!(from & LBR_FROM_FLAG_MISPRED); + pred = !mis; + skip = 1; + } + if (x86_pmu.lbr_has_tsx) { + in_tx = !!(from & LBR_FROM_FLAG_IN_TX); + abort = !!(from & LBR_FROM_FLAG_ABORT); + skip = 3; + } + from = (u64)((((s64)from) << skip) >> skip); + + if (x86_pmu.lbr_to_cycles) { + cycles = ((to >> 48) & LBR_INFO_CYCLES); + to = (u64)((((s64)to) << 16) >> 16); + } } - from = (u64)((((s64)from) << skip) >> skip); /* * Some CPUs report duplicated abort records, @@ -668,18 +816,109 @@ static void intel_pmu_lbr_read_64(struct cpu_hw_events *cpuc) if (abort && x86_pmu.lbr_double_abort && out > 0) out--; - cpuc->lbr_entries[out].from = from; - cpuc->lbr_entries[out].to = to; - cpuc->lbr_entries[out].mispred = mis; - cpuc->lbr_entries[out].predicted = pred; - cpuc->lbr_entries[out].in_tx = in_tx; - cpuc->lbr_entries[out].abort = abort; - cpuc->lbr_entries[out].cycles = cycles; - cpuc->lbr_entries[out].type = 0; - cpuc->lbr_entries[out].reserved = 0; + perf_clear_branch_entry_bitfields(br+out); + br[out].from = from; + br[out].to = to; + br[out].mispred = mis; + br[out].predicted = pred; + br[out].in_tx = in_tx; + br[out].abort = abort; + br[out].cycles = cycles; out++; } cpuc->lbr_stack.nr = out; + cpuc->lbr_stack.hw_idx = tos; +} + +static DEFINE_STATIC_KEY_FALSE(x86_lbr_mispred); +static DEFINE_STATIC_KEY_FALSE(x86_lbr_cycles); +static DEFINE_STATIC_KEY_FALSE(x86_lbr_type); + +static __always_inline int get_lbr_br_type(u64 info) +{ + int type = 0; + + if (static_branch_likely(&x86_lbr_type)) + type = (info & LBR_INFO_BR_TYPE) >> LBR_INFO_BR_TYPE_OFFSET; + + return type; +} + +static __always_inline bool get_lbr_mispred(u64 info) +{ + bool mispred = 0; + + if (static_branch_likely(&x86_lbr_mispred)) + mispred = !!(info & LBR_INFO_MISPRED); + + return mispred; +} + +static __always_inline u16 get_lbr_cycles(u64 info) +{ + u16 cycles = info & LBR_INFO_CYCLES; + + if (static_cpu_has(X86_FEATURE_ARCH_LBR) && + (!static_branch_likely(&x86_lbr_cycles) || + !(info & LBR_INFO_CYC_CNT_VALID))) + cycles = 0; + + return cycles; +} + +static void intel_pmu_store_lbr(struct cpu_hw_events *cpuc, + struct lbr_entry *entries) +{ + struct perf_branch_entry *e; + struct lbr_entry *lbr; + u64 from, to, info; + int i; + + for (i = 0; i < x86_pmu.lbr_nr; i++) { + lbr = entries ? &entries[i] : NULL; + e = &cpuc->lbr_entries[i]; + + from = rdlbr_from(i, lbr); + /* + * Read LBR entries until invalid entry (0s) is detected. + */ + if (!from) + break; + + to = rdlbr_to(i, lbr); + info = rdlbr_info(i, lbr); + + perf_clear_branch_entry_bitfields(e); + + e->from = from; + e->to = to; + e->mispred = get_lbr_mispred(info); + e->predicted = !e->mispred; + e->in_tx = !!(info & LBR_INFO_IN_TX); + e->abort = !!(info & LBR_INFO_ABORT); + e->cycles = get_lbr_cycles(info); + e->type = get_lbr_br_type(info); + } + + cpuc->lbr_stack.nr = i; +} + +static void intel_pmu_arch_lbr_read(struct cpu_hw_events *cpuc) +{ + intel_pmu_store_lbr(cpuc, NULL); +} + +static void intel_pmu_arch_lbr_read_xsave(struct cpu_hw_events *cpuc) +{ + struct x86_perf_task_context_arch_lbr_xsave *xsave = cpuc->lbr_xsave; + + if (!xsave) { + intel_pmu_store_lbr(cpuc, NULL); + return; + } + xsaves(&xsave->xsave, XFEATURE_MASK_LBR); + + intel_pmu_store_lbr(cpuc, xsave->lbr.entries); } void intel_pmu_lbr_read(void) @@ -692,13 +931,11 @@ void intel_pmu_lbr_read(void) * This could be smarter and actually check the event, * but this simple approach seems to work for now. */ - if (!cpuc->lbr_users || cpuc->lbr_users == cpuc->lbr_pebs_users) + if (!cpuc->lbr_users || vlbr_exclude_host() || + cpuc->lbr_users == cpuc->lbr_pebs_users) return; - if (x86_pmu.intel_cap.lbr_format == LBR_FORMAT_32) - intel_pmu_lbr_read_32(cpuc); - else - intel_pmu_lbr_read_64(cpuc); + x86_pmu.lbr_read(cpuc); intel_pmu_lbr_filter(cpuc); } @@ -798,6 +1035,19 @@ static int intel_pmu_setup_hw_lbr_filter(struct perf_event *event) reg = &event->hw.branch_reg; reg->idx = EXTRA_REG_LBR; + if (static_cpu_has(X86_FEATURE_ARCH_LBR)) { + reg->config = mask; + + /* + * The Arch LBR HW can retrieve the common branch types + * from the LBR_INFO. It doesn't require the high overhead + * SW disassemble. + * Enable the branch type by default for the Arch LBR. + */ + reg->reg |= X86_BR_TYPE_SAVE; + return 0; + } + /* * The first 9 bits (LBR_SEL_MASK) in LBR_SELECT operate * in suppress mode. So LBR_SELECT should be set to @@ -809,7 +1059,7 @@ static int intel_pmu_setup_hw_lbr_filter(struct perf_event *event) if ((br_type & PERF_SAMPLE_BRANCH_NO_CYCLES) && (br_type & PERF_SAMPLE_BRANCH_NO_FLAGS) && - (x86_pmu.intel_cap.lbr_format == LBR_FORMAT_INFO)) + x86_pmu.lbr_has_info) reg->config |= LBR_NO_INFO; return 0; @@ -841,218 +1091,26 @@ int intel_pmu_setup_lbr_filter(struct perf_event *event) return ret; } -/* - * return the type of control flow change at address "from" - * instruction is not necessarily a branch (in case of interrupt). - * - * The branch type returned also includes the priv level of the - * target of the control flow change (X86_BR_USER, X86_BR_KERNEL). - * - * If a branch type is unknown OR the instruction cannot be - * decoded (e.g., text page not present), then X86_BR_NONE is - * returned. - */ -static int branch_type(unsigned long from, unsigned long to, int abort) -{ - struct insn insn; - void *addr; - int bytes_read, bytes_left; - int ret = X86_BR_NONE; - int ext, to_plm, from_plm; - u8 buf[MAX_INSN_SIZE]; - int is64 = 0; - - to_plm = kernel_ip(to) ? X86_BR_KERNEL : X86_BR_USER; - from_plm = kernel_ip(from) ? X86_BR_KERNEL : X86_BR_USER; - - /* - * maybe zero if lbr did not fill up after a reset by the time - * we get a PMU interrupt - */ - if (from == 0 || to == 0) - return X86_BR_NONE; - - if (abort) - return X86_BR_ABORT | to_plm; - - if (from_plm == X86_BR_USER) { - /* - * can happen if measuring at the user level only - * and we interrupt in a kernel thread, e.g., idle. - */ - if (!current->mm) - return X86_BR_NONE; - - /* may fail if text not present */ - bytes_left = copy_from_user_nmi(buf, (void __user *)from, - MAX_INSN_SIZE); - bytes_read = MAX_INSN_SIZE - bytes_left; - if (!bytes_read) - return X86_BR_NONE; - - addr = buf; - } else { - /* - * The LBR logs any address in the IP, even if the IP just - * faulted. This means userspace can control the from address. - * Ensure we don't blindy read any address by validating it is - * a known text address. - */ - if (kernel_text_address(from)) { - addr = (void *)from; - /* - * Assume we can get the maximum possible size - * when grabbing kernel data. This is not - * _strictly_ true since we could possibly be - * executing up next to a memory hole, but - * it is very unlikely to be a problem. - */ - bytes_read = MAX_INSN_SIZE; - } else { - return X86_BR_NONE; - } - } - - /* - * decoder needs to know the ABI especially - * on 64-bit systems running 32-bit apps - */ -#ifdef CONFIG_X86_64 - is64 = kernel_ip((unsigned long)addr) || !test_thread_flag(TIF_IA32); -#endif - insn_init(&insn, addr, bytes_read, is64); - insn_get_opcode(&insn); - if (!insn.opcode.got) - return X86_BR_ABORT; - - switch (insn.opcode.bytes[0]) { - case 0xf: - switch (insn.opcode.bytes[1]) { - case 0x05: /* syscall */ - case 0x34: /* sysenter */ - ret = X86_BR_SYSCALL; - break; - case 0x07: /* sysret */ - case 0x35: /* sysexit */ - ret = X86_BR_SYSRET; - break; - case 0x80 ... 0x8f: /* conditional */ - ret = X86_BR_JCC; - break; - default: - ret = X86_BR_NONE; - } - break; - case 0x70 ... 0x7f: /* conditional */ - ret = X86_BR_JCC; - break; - case 0xc2: /* near ret */ - case 0xc3: /* near ret */ - case 0xca: /* far ret */ - case 0xcb: /* far ret */ - ret = X86_BR_RET; - break; - case 0xcf: /* iret */ - ret = X86_BR_IRET; - break; - case 0xcc ... 0xce: /* int */ - ret = X86_BR_INT; - break; - case 0xe8: /* call near rel */ - insn_get_immediate(&insn); - if (insn.immediate1.value == 0) { - /* zero length call */ - ret = X86_BR_ZERO_CALL; - break; - } - /* fall through */ - case 0x9a: /* call far absolute */ - ret = X86_BR_CALL; - break; - case 0xe0 ... 0xe3: /* loop jmp */ - ret = X86_BR_JCC; - break; - case 0xe9 ... 0xeb: /* jmp */ - ret = X86_BR_JMP; - break; - case 0xff: /* call near absolute, call far absolute ind */ - insn_get_modrm(&insn); - ext = (insn.modrm.bytes[0] >> 3) & 0x7; - switch (ext) { - case 2: /* near ind call */ - case 3: /* far ind call */ - ret = X86_BR_IND_CALL; - break; - case 4: - case 5: - ret = X86_BR_IND_JMP; - break; - } - break; - default: - ret = X86_BR_NONE; - } - /* - * interrupts, traps, faults (and thus ring transition) may - * occur on any instructions. Thus, to classify them correctly, - * we need to first look at the from and to priv levels. If they - * are different and to is in the kernel, then it indicates - * a ring transition. If the from instruction is not a ring - * transition instr (syscall, systenter, int), then it means - * it was a irq, trap or fault. - * - * we have no way of detecting kernel to kernel faults. - */ - if (from_plm == X86_BR_USER && to_plm == X86_BR_KERNEL - && ret != X86_BR_SYSCALL && ret != X86_BR_INT) - ret = X86_BR_IRQ; - - /* - * branch priv level determined by target as - * is done by HW when LBR_SELECT is implemented - */ - if (ret != X86_BR_NONE) - ret |= to_plm; - - return ret; -} - -#define X86_BR_TYPE_MAP_MAX 16 - -static int branch_map[X86_BR_TYPE_MAP_MAX] = { - PERF_BR_CALL, /* X86_BR_CALL */ - PERF_BR_RET, /* X86_BR_RET */ - PERF_BR_SYSCALL, /* X86_BR_SYSCALL */ - PERF_BR_SYSRET, /* X86_BR_SYSRET */ - PERF_BR_UNKNOWN, /* X86_BR_INT */ - PERF_BR_UNKNOWN, /* X86_BR_IRET */ - PERF_BR_COND, /* X86_BR_JCC */ - PERF_BR_UNCOND, /* X86_BR_JMP */ - PERF_BR_UNKNOWN, /* X86_BR_IRQ */ - PERF_BR_IND_CALL, /* X86_BR_IND_CALL */ - PERF_BR_UNKNOWN, /* X86_BR_ABORT */ - PERF_BR_UNKNOWN, /* X86_BR_IN_TX */ - PERF_BR_UNKNOWN, /* X86_BR_NO_TX */ - PERF_BR_CALL, /* X86_BR_ZERO_CALL */ - PERF_BR_UNKNOWN, /* X86_BR_CALL_STACK */ - PERF_BR_IND, /* X86_BR_IND_JMP */ +enum { + ARCH_LBR_BR_TYPE_JCC = 0, + ARCH_LBR_BR_TYPE_NEAR_IND_JMP = 1, + ARCH_LBR_BR_TYPE_NEAR_REL_JMP = 2, + ARCH_LBR_BR_TYPE_NEAR_IND_CALL = 3, + ARCH_LBR_BR_TYPE_NEAR_REL_CALL = 4, + ARCH_LBR_BR_TYPE_NEAR_RET = 5, + ARCH_LBR_BR_TYPE_KNOWN_MAX = ARCH_LBR_BR_TYPE_NEAR_RET, + + ARCH_LBR_BR_TYPE_MAP_MAX = 16, }; -static int -common_branch_type(int type) -{ - int i; - - type >>= 2; /* skip X86_BR_USER and X86_BR_KERNEL */ - - if (type) { - i = __ffs(type); - if (i < X86_BR_TYPE_MAP_MAX) - return branch_map[i]; - } - - return PERF_BR_UNKNOWN; -} +static const int arch_lbr_br_type_map[ARCH_LBR_BR_TYPE_MAP_MAX] = { + [ARCH_LBR_BR_TYPE_JCC] = X86_BR_JCC, + [ARCH_LBR_BR_TYPE_NEAR_IND_JMP] = X86_BR_IND_JMP, + [ARCH_LBR_BR_TYPE_NEAR_REL_JMP] = X86_BR_JMP, + [ARCH_LBR_BR_TYPE_NEAR_IND_CALL] = X86_BR_IND_CALL, + [ARCH_LBR_BR_TYPE_NEAR_REL_CALL] = X86_BR_CALL, + [ARCH_LBR_BR_TYPE_NEAR_RET] = X86_BR_RET, +}; /* * implement actual branch filter based on user demand. @@ -1066,7 +1124,7 @@ intel_pmu_lbr_filter(struct cpu_hw_events *cpuc) { u64 from, to; int br_sel = cpuc->br_sel; - int i, j, type; + int i, j, type, to_plm; bool compress = false; /* if sampling all branches, then nothing to filter */ @@ -1078,8 +1136,19 @@ intel_pmu_lbr_filter(struct cpu_hw_events *cpuc) from = cpuc->lbr_entries[i].from; to = cpuc->lbr_entries[i].to; + type = cpuc->lbr_entries[i].type; - type = branch_type(from, to, cpuc->lbr_entries[i].abort); + /* + * Parse the branch type recorded in LBR_x_INFO MSR. + * Doesn't support OTHER_BRANCH decoding for now. + * OTHER_BRANCH branch type still rely on software decoding. + */ + if (static_cpu_has(X86_FEATURE_ARCH_LBR) && + type <= ARCH_LBR_BR_TYPE_KNOWN_MAX) { + to_plm = kernel_ip(to) ? X86_BR_KERNEL : X86_BR_USER; + type = arch_lbr_br_type_map[type] | to_plm; + } else + type = branch_type(from, to, cpuc->lbr_entries[i].abort); if (type != X86_BR_NONE && (br_sel & X86_BR_ANYTX)) { if (cpuc->lbr_entries[i].in_tx) type |= X86_BR_IN_TX; @@ -1114,25 +1183,18 @@ intel_pmu_lbr_filter(struct cpu_hw_events *cpuc) } } -void intel_pmu_store_pebs_lbrs(struct pebs_lbr *lbr) +void intel_pmu_store_pebs_lbrs(struct lbr_entry *lbr) { struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events); - int i; - cpuc->lbr_stack.nr = x86_pmu.lbr_nr; - for (i = 0; i < x86_pmu.lbr_nr; i++) { - u64 info = lbr->lbr[i].info; - struct perf_branch_entry *e = &cpuc->lbr_entries[i]; + /* Cannot get TOS for large PEBS and Arch LBR */ + if (static_cpu_has(X86_FEATURE_ARCH_LBR) || + (cpuc->n_pebs == cpuc->n_large_pebs)) + cpuc->lbr_stack.hw_idx = -1ULL; + else + cpuc->lbr_stack.hw_idx = intel_pmu_lbr_tos(); - e->from = lbr->lbr[i].from; - e->to = lbr->lbr[i].to; - e->mispred = !!(info & LBR_INFO_MISPRED); - e->predicted = !(info & LBR_INFO_MISPRED); - e->in_tx = !!(info & LBR_INFO_IN_TX); - e->abort = !!(info & LBR_INFO_ABORT); - e->cycles = info & LBR_INFO_CYCLES; - e->reserved = 0; - } + intel_pmu_store_lbr(cpuc, lbr); intel_pmu_lbr_filter(cpuc); } @@ -1189,6 +1251,26 @@ static const int hsw_lbr_sel_map[PERF_SAMPLE_BRANCH_MAX_SHIFT] = { [PERF_SAMPLE_BRANCH_CALL_SHIFT] = LBR_REL_CALL, }; +static int arch_lbr_ctl_map[PERF_SAMPLE_BRANCH_MAX_SHIFT] = { + [PERF_SAMPLE_BRANCH_ANY_SHIFT] = ARCH_LBR_ANY, + [PERF_SAMPLE_BRANCH_USER_SHIFT] = ARCH_LBR_USER, + [PERF_SAMPLE_BRANCH_KERNEL_SHIFT] = ARCH_LBR_KERNEL, + [PERF_SAMPLE_BRANCH_HV_SHIFT] = LBR_IGN, + [PERF_SAMPLE_BRANCH_ANY_RETURN_SHIFT] = ARCH_LBR_RETURN | + ARCH_LBR_OTHER_BRANCH, + [PERF_SAMPLE_BRANCH_ANY_CALL_SHIFT] = ARCH_LBR_REL_CALL | + ARCH_LBR_IND_CALL | + ARCH_LBR_OTHER_BRANCH, + [PERF_SAMPLE_BRANCH_IND_CALL_SHIFT] = ARCH_LBR_IND_CALL, + [PERF_SAMPLE_BRANCH_COND_SHIFT] = ARCH_LBR_JCC, + [PERF_SAMPLE_BRANCH_CALL_STACK_SHIFT] = ARCH_LBR_REL_CALL | + ARCH_LBR_IND_CALL | + ARCH_LBR_RETURN | + ARCH_LBR_CALL_STACK, + [PERF_SAMPLE_BRANCH_IND_JUMP_SHIFT] = ARCH_LBR_IND_JMP, + [PERF_SAMPLE_BRANCH_CALL_SHIFT] = ARCH_LBR_REL_CALL, +}; + /* core */ void __init intel_pmu_lbr_init_core(void) { @@ -1242,9 +1324,17 @@ void __init intel_pmu_lbr_init_snb(void) */ } +static inline struct kmem_cache * +create_lbr_kmem_cache(size_t size, size_t align) +{ + return kmem_cache_create("x86_lbr", size, align, 0, NULL); +} + /* haswell */ void intel_pmu_lbr_init_hsw(void) { + size_t size = sizeof(struct x86_perf_task_context); + x86_pmu.lbr_nr = 16; x86_pmu.lbr_tos = MSR_LBR_TOS; x86_pmu.lbr_from = MSR_LBR_NHM_FROM; @@ -1253,21 +1343,25 @@ void intel_pmu_lbr_init_hsw(void) x86_pmu.lbr_sel_mask = LBR_SEL_MASK; x86_pmu.lbr_sel_map = hsw_lbr_sel_map; - if (lbr_from_signext_quirk_needed()) - static_branch_enable(&lbr_from_quirk_key); + x86_get_pmu(smp_processor_id())->task_ctx_cache = create_lbr_kmem_cache(size, 0); } /* skylake */ __init void intel_pmu_lbr_init_skl(void) { + size_t size = sizeof(struct x86_perf_task_context); + x86_pmu.lbr_nr = 32; x86_pmu.lbr_tos = MSR_LBR_TOS; x86_pmu.lbr_from = MSR_LBR_NHM_FROM; x86_pmu.lbr_to = MSR_LBR_NHM_TO; + x86_pmu.lbr_info = MSR_LBR_INFO_0; x86_pmu.lbr_sel_mask = LBR_SEL_MASK; x86_pmu.lbr_sel_map = hsw_lbr_sel_map; + x86_get_pmu(smp_processor_id())->task_ctx_cache = create_lbr_kmem_cache(size, 0); + /* * SW branch filter usage: * - support syscall, sysret capture. @@ -1334,3 +1428,197 @@ void intel_pmu_lbr_init_knl(void) if (x86_pmu.intel_cap.lbr_format == LBR_FORMAT_LIP) x86_pmu.intel_cap.lbr_format = LBR_FORMAT_EIP_FLAGS; } + +void intel_pmu_lbr_init(void) +{ + switch (x86_pmu.intel_cap.lbr_format) { + case LBR_FORMAT_EIP_FLAGS2: + x86_pmu.lbr_has_tsx = 1; + x86_pmu.lbr_from_flags = 1; + if (lbr_from_signext_quirk_needed()) + static_branch_enable(&lbr_from_quirk_key); + break; + + case LBR_FORMAT_EIP_FLAGS: + x86_pmu.lbr_from_flags = 1; + break; + + case LBR_FORMAT_INFO: + x86_pmu.lbr_has_tsx = 1; + fallthrough; + case LBR_FORMAT_INFO2: + x86_pmu.lbr_has_info = 1; + break; + + case LBR_FORMAT_TIME: + x86_pmu.lbr_from_flags = 1; + x86_pmu.lbr_to_cycles = 1; + break; + } + + if (x86_pmu.lbr_has_info) { + /* + * Only used in combination with baseline pebs. + */ + static_branch_enable(&x86_lbr_mispred); + static_branch_enable(&x86_lbr_cycles); + } +} + +/* + * LBR state size is variable based on the max number of registers. + * This calculates the expected state size, which should match + * what the hardware enumerates for the size of XFEATURE_LBR. + */ +static inline unsigned int get_lbr_state_size(void) +{ + return sizeof(struct arch_lbr_state) + + x86_pmu.lbr_nr * sizeof(struct lbr_entry); +} + +static bool is_arch_lbr_xsave_available(void) +{ + if (!boot_cpu_has(X86_FEATURE_XSAVES)) + return false; + + /* + * Check the LBR state with the corresponding software structure. + * Disable LBR XSAVES support if the size doesn't match. + */ + if (xfeature_size(XFEATURE_LBR) == 0) + return false; + + if (WARN_ON(xfeature_size(XFEATURE_LBR) != get_lbr_state_size())) + return false; + + return true; +} + +void __init intel_pmu_arch_lbr_init(void) +{ + struct pmu *pmu = x86_get_pmu(smp_processor_id()); + union cpuid28_eax eax; + union cpuid28_ebx ebx; + union cpuid28_ecx ecx; + unsigned int unused_edx; + bool arch_lbr_xsave; + size_t size; + u64 lbr_nr; + + /* Arch LBR Capabilities */ + cpuid(28, &eax.full, &ebx.full, &ecx.full, &unused_edx); + + lbr_nr = fls(eax.split.lbr_depth_mask) * 8; + if (!lbr_nr) + goto clear_arch_lbr; + + /* Apply the max depth of Arch LBR */ + if (wrmsrl_safe(MSR_ARCH_LBR_DEPTH, lbr_nr)) + goto clear_arch_lbr; + + x86_pmu.lbr_depth_mask = eax.split.lbr_depth_mask; + x86_pmu.lbr_deep_c_reset = eax.split.lbr_deep_c_reset; + x86_pmu.lbr_lip = eax.split.lbr_lip; + x86_pmu.lbr_cpl = ebx.split.lbr_cpl; + x86_pmu.lbr_filter = ebx.split.lbr_filter; + x86_pmu.lbr_call_stack = ebx.split.lbr_call_stack; + x86_pmu.lbr_mispred = ecx.split.lbr_mispred; + x86_pmu.lbr_timed_lbr = ecx.split.lbr_timed_lbr; + x86_pmu.lbr_br_type = ecx.split.lbr_br_type; + x86_pmu.lbr_nr = lbr_nr; + + if (x86_pmu.lbr_mispred) + static_branch_enable(&x86_lbr_mispred); + if (x86_pmu.lbr_timed_lbr) + static_branch_enable(&x86_lbr_cycles); + if (x86_pmu.lbr_br_type) + static_branch_enable(&x86_lbr_type); + + arch_lbr_xsave = is_arch_lbr_xsave_available(); + if (arch_lbr_xsave) { + size = sizeof(struct x86_perf_task_context_arch_lbr_xsave) + + get_lbr_state_size(); + pmu->task_ctx_cache = create_lbr_kmem_cache(size, + XSAVE_ALIGNMENT); + } + + if (!pmu->task_ctx_cache) { + arch_lbr_xsave = false; + + size = sizeof(struct x86_perf_task_context_arch_lbr) + + lbr_nr * sizeof(struct lbr_entry); + pmu->task_ctx_cache = create_lbr_kmem_cache(size, 0); + } + + x86_pmu.lbr_from = MSR_ARCH_LBR_FROM_0; + x86_pmu.lbr_to = MSR_ARCH_LBR_TO_0; + x86_pmu.lbr_info = MSR_ARCH_LBR_INFO_0; + + /* LBR callstack requires both CPL and Branch Filtering support */ + if (!x86_pmu.lbr_cpl || + !x86_pmu.lbr_filter || + !x86_pmu.lbr_call_stack) + arch_lbr_ctl_map[PERF_SAMPLE_BRANCH_CALL_STACK_SHIFT] = LBR_NOT_SUPP; + + if (!x86_pmu.lbr_cpl) { + arch_lbr_ctl_map[PERF_SAMPLE_BRANCH_USER_SHIFT] = LBR_NOT_SUPP; + arch_lbr_ctl_map[PERF_SAMPLE_BRANCH_KERNEL_SHIFT] = LBR_NOT_SUPP; + } else if (!x86_pmu.lbr_filter) { + arch_lbr_ctl_map[PERF_SAMPLE_BRANCH_ANY_SHIFT] = LBR_NOT_SUPP; + arch_lbr_ctl_map[PERF_SAMPLE_BRANCH_ANY_RETURN_SHIFT] = LBR_NOT_SUPP; + arch_lbr_ctl_map[PERF_SAMPLE_BRANCH_ANY_CALL_SHIFT] = LBR_NOT_SUPP; + arch_lbr_ctl_map[PERF_SAMPLE_BRANCH_IND_CALL_SHIFT] = LBR_NOT_SUPP; + arch_lbr_ctl_map[PERF_SAMPLE_BRANCH_COND_SHIFT] = LBR_NOT_SUPP; + arch_lbr_ctl_map[PERF_SAMPLE_BRANCH_IND_JUMP_SHIFT] = LBR_NOT_SUPP; + arch_lbr_ctl_map[PERF_SAMPLE_BRANCH_CALL_SHIFT] = LBR_NOT_SUPP; + } + + x86_pmu.lbr_ctl_mask = ARCH_LBR_CTL_MASK; + x86_pmu.lbr_ctl_map = arch_lbr_ctl_map; + + if (!x86_pmu.lbr_cpl && !x86_pmu.lbr_filter) + x86_pmu.lbr_ctl_map = NULL; + + x86_pmu.lbr_reset = intel_pmu_arch_lbr_reset; + if (arch_lbr_xsave) { + x86_pmu.lbr_save = intel_pmu_arch_lbr_xsaves; + x86_pmu.lbr_restore = intel_pmu_arch_lbr_xrstors; + x86_pmu.lbr_read = intel_pmu_arch_lbr_read_xsave; + pr_cont("XSAVE "); + } else { + x86_pmu.lbr_save = intel_pmu_arch_lbr_save; + x86_pmu.lbr_restore = intel_pmu_arch_lbr_restore; + x86_pmu.lbr_read = intel_pmu_arch_lbr_read; + } + + pr_cont("Architectural LBR, "); + + return; + +clear_arch_lbr: + setup_clear_cpu_cap(X86_FEATURE_ARCH_LBR); +} + +/** + * x86_perf_get_lbr - get the LBR records information + * + * @lbr: the caller's memory to store the LBR records information + * + * Returns: 0 indicates the LBR info has been successfully obtained + */ +int x86_perf_get_lbr(struct x86_pmu_lbr *lbr) +{ + int lbr_fmt = x86_pmu.intel_cap.lbr_format; + + lbr->nr = x86_pmu.lbr_nr; + lbr->from = x86_pmu.lbr_from; + lbr->to = x86_pmu.lbr_to; + lbr->info = (lbr_fmt == LBR_FORMAT_INFO) ? x86_pmu.lbr_info : 0; + + return 0; +} +EXPORT_SYMBOL_GPL(x86_perf_get_lbr); + +struct event_constraint vlbr_constraint = + __EVENT_CONSTRAINT(INTEL_FIXED_VLBR_EVENT, (1ULL << INTEL_PMC_IDX_FIXED_VLBR), + FIXED_EVENT_FLAGS, 1, 0, PERF_X86_EVENT_LBR_SELECT); diff --git a/arch/x86/events/intel/p4.c b/arch/x86/events/intel/p4.c index a4cc66005ce8..03bbcc2fa2ff 100644 --- a/arch/x86/events/intel/p4.c +++ b/arch/x86/events/intel/p4.c @@ -24,7 +24,7 @@ struct p4_event_bind { unsigned int escr_msr[2]; /* ESCR MSR for this event */ unsigned int escr_emask; /* valid ESCR EventMask bits */ unsigned int shared; /* event is shared across threads */ - char cntr[2][P4_CNTR_LIMIT]; /* counter index (offset), -1 on abscence */ + char cntr[2][P4_CNTR_LIMIT]; /* counter index (offset), -1 on absence */ }; struct p4_pebs_bind { @@ -45,7 +45,7 @@ struct p4_pebs_bind { * it's needed for mapping P4_PEBS_CONFIG_METRIC_MASK bits of * event configuration to find out which values are to be * written into MSR_IA32_PEBS_ENABLE and MSR_P4_PEBS_MATRIX_VERT - * resgisters + * registers */ static struct p4_pebs_bind p4_pebs_bind_map[] = { P4_GEN_PEBS_BIND(1stl_cache_load_miss_retired, 0x0000001, 0x0000001), @@ -947,7 +947,7 @@ static void p4_pmu_enable_pebs(u64 config) (void)wrmsrl_safe(MSR_P4_PEBS_MATRIX_VERT, (u64)bind->metric_vert); } -static void p4_pmu_enable_event(struct perf_event *event) +static void __p4_pmu_enable_event(struct perf_event *event) { struct hw_perf_event *hwc = &event->hw; int thread = p4_ht_config_thread(hwc->config); @@ -983,6 +983,16 @@ static void p4_pmu_enable_event(struct perf_event *event) (cccr & ~P4_CCCR_RESERVED) | P4_CCCR_ENABLE); } +static DEFINE_PER_CPU(unsigned long [BITS_TO_LONGS(X86_PMC_IDX_MAX)], p4_running); + +static void p4_pmu_enable_event(struct perf_event *event) +{ + int idx = event->hw.idx; + + __set_bit(idx, per_cpu(p4_running, smp_processor_id())); + __p4_pmu_enable_event(event); +} + static void p4_pmu_enable_all(int added) { struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events); @@ -992,10 +1002,33 @@ static void p4_pmu_enable_all(int added) struct perf_event *event = cpuc->events[idx]; if (!test_bit(idx, cpuc->active_mask)) continue; - p4_pmu_enable_event(event); + __p4_pmu_enable_event(event); } } +static int p4_pmu_set_period(struct perf_event *event) +{ + struct hw_perf_event *hwc = &event->hw; + s64 left = this_cpu_read(pmc_prev_left[hwc->idx]); + int ret; + + ret = x86_perf_event_set_period(event); + + if (hwc->event_base) { + /* + * This handles erratum N15 in intel doc 249199-029, + * the counter may not be updated correctly on write + * so we need a second write operation to do the trick + * (the official workaround didn't work) + * + * the former idea is taken from OProfile code + */ + wrmsrl(hwc->event_base, (u64)(-left) & x86_pmu.cntval_mask); + } + + return ret; +} + static int p4_pmu_handle_irq(struct pt_regs *regs) { struct perf_sample_data data; @@ -1012,7 +1045,7 @@ static int p4_pmu_handle_irq(struct pt_regs *regs) if (!test_bit(idx, cpuc->active_mask)) { /* catch in-flight IRQs */ - if (__test_and_clear_bit(idx, cpuc->running)) + if (__test_and_clear_bit(idx, per_cpu(p4_running, smp_processor_id()))) handled++; continue; } @@ -1034,7 +1067,7 @@ static int p4_pmu_handle_irq(struct pt_regs *regs) /* event overflow for sure */ perf_sample_data_init(&data, 0, hwc->last_period); - if (!x86_perf_event_set_period(event)) + if (!static_call(x86_pmu_set_period)(event)) continue; @@ -1306,6 +1339,9 @@ static __initconst const struct x86_pmu p4_pmu = { .enable_all = p4_pmu_enable_all, .enable = p4_pmu_enable_event, .disable = p4_pmu_disable_event, + + .set_period = p4_pmu_set_period, + .eventsel = MSR_P4_BPU_CCCR0, .perfctr = MSR_P4_BPU_PERFCTR0, .event_map = p4_pmu_event_map, @@ -1313,7 +1349,7 @@ static __initconst const struct x86_pmu p4_pmu = { .get_event_constraints = x86_get_event_constraints, /* * IF HT disabled we may need to use all - * ARCH_P4_MAX_CCCR counters simulaneously + * ARCH_P4_MAX_CCCR counters simultaneously * though leave it restricted at moment assuming * HT is on */ @@ -1324,15 +1360,6 @@ static __initconst const struct x86_pmu p4_pmu = { .max_period = (1ULL << (ARCH_P4_CNTRVAL_BITS - 1)) - 1, .hw_config = p4_hw_config, .schedule_events = p4_pmu_schedule_events, - /* - * This handles erratum N15 in intel doc 249199-029, - * the counter may not be updated correctly on write - * so we need a second write operation to do the trick - * (the official workaround didn't work) - * - * the former idea is taken from OProfile code - */ - .perfctr_second_write = 1, .format_attrs = intel_p4_formats_attr, }; diff --git a/arch/x86/events/intel/pt.c b/arch/x86/events/intel/pt.c index 1db7a51d9792..82ef87e9a897 100644 --- a/arch/x86/events/intel/pt.c +++ b/arch/x86/events/intel/pt.c @@ -13,6 +13,8 @@ #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt #include <linux/types.h> +#include <linux/bits.h> +#include <linux/limits.h> #include <linux/slab.h> #include <linux/device.h> @@ -57,12 +59,14 @@ static struct pt_cap_desc { PT_CAP(mtc, 0, CPUID_EBX, BIT(3)), PT_CAP(ptwrite, 0, CPUID_EBX, BIT(4)), PT_CAP(power_event_trace, 0, CPUID_EBX, BIT(5)), + PT_CAP(event_trace, 0, CPUID_EBX, BIT(7)), + PT_CAP(tnt_disable, 0, CPUID_EBX, BIT(8)), PT_CAP(topa_output, 0, CPUID_ECX, BIT(0)), PT_CAP(topa_multiple_entries, 0, CPUID_ECX, BIT(1)), PT_CAP(single_range_output, 0, CPUID_ECX, BIT(2)), PT_CAP(output_subsys, 0, CPUID_ECX, BIT(3)), PT_CAP(payloads_lip, 0, CPUID_ECX, BIT(31)), - PT_CAP(num_address_ranges, 1, CPUID_EAX, 0x3), + PT_CAP(num_address_ranges, 1, CPUID_EAX, 0x7), PT_CAP(mtc_periods, 1, CPUID_EAX, 0xffff0000), PT_CAP(cycle_thresholds, 1, CPUID_EBX, 0xffff), PT_CAP(psb_periods, 1, CPUID_EBX, 0xffff0000), @@ -108,6 +112,8 @@ PMU_FORMAT_ATTR(tsc, "config:10" ); PMU_FORMAT_ATTR(noretcomp, "config:11" ); PMU_FORMAT_ATTR(ptw, "config:12" ); PMU_FORMAT_ATTR(branch, "config:13" ); +PMU_FORMAT_ATTR(event, "config:31" ); +PMU_FORMAT_ATTR(notnt, "config:55" ); PMU_FORMAT_ATTR(mtc_period, "config:14-17" ); PMU_FORMAT_ATTR(cyc_thresh, "config:19-22" ); PMU_FORMAT_ATTR(psb_period, "config:24-27" ); @@ -116,6 +122,8 @@ static struct attribute *pt_formats_attr[] = { &format_attr_pt.attr, &format_attr_cyc.attr, &format_attr_pwr_evt.attr, + &format_attr_event.attr, + &format_attr_notnt.attr, &format_attr_fup_on_ptw.attr, &format_attr_mtc.attr, &format_attr_tsc.attr, @@ -226,8 +234,6 @@ static int __init pt_pmu_hw_init(void) pt_pmu.vmx = true; } - attrs = NULL; - for (i = 0; i < PT_CPUID_LEAVES; i++) { cpuid_count(20, i, &pt_pmu.caps[CPUID_EAX + i*PT_CPUID_REGS_NUM], @@ -298,6 +304,8 @@ fail: RTIT_CTL_CYC_PSB | \ RTIT_CTL_MTC | \ RTIT_CTL_PWR_EVT_EN | \ + RTIT_CTL_EVENT_EN | \ + RTIT_CTL_NOTNT | \ RTIT_CTL_FUP_ON_PTW | \ RTIT_CTL_PTW_EN) @@ -352,6 +360,14 @@ static bool pt_event_valid(struct perf_event *event) !intel_pt_validate_hw_cap(PT_CAP_power_event_trace)) return false; + if (config & RTIT_CTL_EVENT_EN && + !intel_pt_validate_hw_cap(PT_CAP_event_trace)) + return false; + + if (config & RTIT_CTL_NOTNT && + !intel_pt_validate_hw_cap(PT_CAP_tnt_disable)) + return false; + if (config & RTIT_CTL_PTW) { if (!intel_pt_validate_hw_cap(PT_CAP_ptwrite)) return false; @@ -364,7 +380,7 @@ static bool pt_event_valid(struct perf_event *event) /* * Setting bit 0 (TraceEn in RTIT_CTL MSR) in the attr.config - * clears the assomption that BranchEn must always be enabled, + * clears the assumption that BranchEn must always be enabled, * as was the case with the first implementation of PT. * If this bit is not set, the legacy behavior is preserved * for compatibility with the older userspace. @@ -474,7 +490,7 @@ static u64 pt_config_filters(struct perf_event *event) pt->filters.filter[range].msr_b = filter->msr_b; } - rtit_ctl |= filter->config << pt_address_ranges[range].reg_off; + rtit_ctl |= (u64)filter->config << pt_address_ranges[range].reg_off; } return rtit_ctl; @@ -899,8 +915,9 @@ static void pt_handle_status(struct pt *pt) * means we are already losing data; need to let the decoder * know. */ - if (!intel_pt_validate_hw_cap(PT_CAP_topa_multiple_entries) || - buf->output_off == pt_buffer_region_size(buf)) { + if (!buf->single && + (!intel_pt_validate_hw_cap(PT_CAP_topa_multiple_entries) || + buf->output_off == pt_buffer_region_size(buf))) { perf_aux_output_flag(&pt->handle, PERF_AUX_FLAG_TRUNCATED); advance++; @@ -1349,11 +1366,27 @@ static void pt_addr_filters_fini(struct perf_event *event) event->hw.addr_filters = NULL; } -static inline bool valid_kernel_ip(unsigned long ip) +#ifdef CONFIG_X86_64 +/* Clamp to a canonical address greater-than-or-equal-to the address given */ +static u64 clamp_to_ge_canonical_addr(u64 vaddr, u8 vaddr_bits) { - return virt_addr_valid(ip) && kernel_ip(ip); + return __is_canonical_address(vaddr, vaddr_bits) ? + vaddr : + -BIT_ULL(vaddr_bits - 1); } +/* Clamp to a canonical address less-than-or-equal-to the address given */ +static u64 clamp_to_le_canonical_addr(u64 vaddr, u8 vaddr_bits) +{ + return __is_canonical_address(vaddr, vaddr_bits) ? + vaddr : + BIT_ULL(vaddr_bits - 1) - 1; +} +#else +#define clamp_to_ge_canonical_addr(x, y) (x) +#define clamp_to_le_canonical_addr(x, y) (x) +#endif + static int pt_event_addr_filters_validate(struct list_head *filters) { struct perf_addr_filter *filter; @@ -1368,14 +1401,6 @@ static int pt_event_addr_filters_validate(struct list_head *filters) filter->action == PERF_ADDR_FILTER_ACTION_START) return -EOPNOTSUPP; - if (!filter->path.dentry) { - if (!valid_kernel_ip(filter->offset)) - return -EINVAL; - - if (!valid_kernel_ip(filter->offset + filter->size)) - return -EINVAL; - } - if (++range > intel_pt_validate_hw_cap(PT_CAP_num_address_ranges)) return -EOPNOTSUPP; } @@ -1399,9 +1424,26 @@ static void pt_event_addr_filters_sync(struct perf_event *event) if (filter->path.dentry && !fr[range].start) { msr_a = msr_b = 0; } else { - /* apply the offset */ - msr_a = fr[range].start; - msr_b = msr_a + fr[range].size - 1; + unsigned long n = fr[range].size - 1; + unsigned long a = fr[range].start; + unsigned long b; + + if (a > ULONG_MAX - n) + b = ULONG_MAX; + else + b = a + n; + /* + * Apply the offset. 64-bit addresses written to the + * MSRs must be canonical, but the range can encompass + * non-canonical addresses. Since software cannot + * execute at non-canonical addresses, adjusting to + * canonical addresses does not affect the result of the + * address filter. + */ + msr_a = clamp_to_ge_canonical_addr(a, boot_cpu_data.x86_virt_bits); + msr_b = clamp_to_le_canonical_addr(b, boot_cpu_data.x86_virt_bits); + if (msr_b < msr_a) + msr_a = msr_b = 0; } filters->filter[range].msr_a = msr_a; @@ -1710,7 +1752,7 @@ static __init int pt_init(void) if (!boot_cpu_has(X86_FEATURE_INTEL_PT)) return -ENODEV; - get_online_cpus(); + cpus_read_lock(); for_each_online_cpu(cpu) { u64 ctl; @@ -1718,7 +1760,7 @@ static __init int pt_init(void) if (!ret && (ctl & RTIT_CTL_TRACEEN)) prior_warn++; } - put_online_cpus(); + cpus_read_unlock(); if (prior_warn) { x86_add_exclusive(x86_lbr_exclusive_pt); diff --git a/arch/x86/events/intel/rapl.c b/arch/x86/events/intel/rapl.c deleted file mode 100644 index 09913121e726..000000000000 --- a/arch/x86/events/intel/rapl.c +++ /dev/null @@ -1,804 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0-only -/* - * Support Intel RAPL energy consumption counters - * Copyright (C) 2013 Google, Inc., Stephane Eranian - * - * Intel RAPL interface is specified in the IA-32 Manual Vol3b - * section 14.7.1 (September 2013) - * - * RAPL provides more controls than just reporting energy consumption - * however here we only expose the 3 energy consumption free running - * counters (pp0, pkg, dram). - * - * Each of those counters increments in a power unit defined by the - * RAPL_POWER_UNIT MSR. On SandyBridge, this unit is 1/(2^16) Joules - * but it can vary. - * - * Counter to rapl events mappings: - * - * pp0 counter: consumption of all physical cores (power plane 0) - * event: rapl_energy_cores - * perf code: 0x1 - * - * pkg counter: consumption of the whole processor package - * event: rapl_energy_pkg - * perf code: 0x2 - * - * dram counter: consumption of the dram domain (servers only) - * event: rapl_energy_dram - * perf code: 0x3 - * - * gpu counter: consumption of the builtin-gpu domain (client only) - * event: rapl_energy_gpu - * perf code: 0x4 - * - * psys counter: consumption of the builtin-psys domain (client only) - * event: rapl_energy_psys - * perf code: 0x5 - * - * We manage those counters as free running (read-only). They may be - * use simultaneously by other tools, such as turbostat. - * - * The events only support system-wide mode counting. There is no - * sampling support because it does not make sense and is not - * supported by the RAPL hardware. - * - * Because we want to avoid floating-point operations in the kernel, - * the events are all reported in fixed point arithmetic (32.32). - * Tools must adjust the counts to convert them to Watts using - * the duration of the measurement. Tools may use a function such as - * ldexp(raw_count, -32); - */ - -#define pr_fmt(fmt) "RAPL PMU: " fmt - -#include <linux/module.h> -#include <linux/slab.h> -#include <linux/perf_event.h> -#include <linux/nospec.h> -#include <asm/cpu_device_id.h> -#include <asm/intel-family.h> -#include "../perf_event.h" -#include "../probe.h" - -MODULE_LICENSE("GPL"); - -/* - * RAPL energy status counters - */ -enum perf_rapl_events { - PERF_RAPL_PP0 = 0, /* all cores */ - PERF_RAPL_PKG, /* entire package */ - PERF_RAPL_RAM, /* DRAM */ - PERF_RAPL_PP1, /* gpu */ - PERF_RAPL_PSYS, /* psys */ - - PERF_RAPL_MAX, - NR_RAPL_DOMAINS = PERF_RAPL_MAX, -}; - -static const char *const rapl_domain_names[NR_RAPL_DOMAINS] __initconst = { - "pp0-core", - "package", - "dram", - "pp1-gpu", - "psys", -}; - -/* - * event code: LSB 8 bits, passed in attr->config - * any other bit is reserved - */ -#define RAPL_EVENT_MASK 0xFFULL - -#define DEFINE_RAPL_FORMAT_ATTR(_var, _name, _format) \ -static ssize_t __rapl_##_var##_show(struct kobject *kobj, \ - struct kobj_attribute *attr, \ - char *page) \ -{ \ - BUILD_BUG_ON(sizeof(_format) >= PAGE_SIZE); \ - return sprintf(page, _format "\n"); \ -} \ -static struct kobj_attribute format_attr_##_var = \ - __ATTR(_name, 0444, __rapl_##_var##_show, NULL) - -#define RAPL_CNTR_WIDTH 32 - -#define RAPL_EVENT_ATTR_STR(_name, v, str) \ -static struct perf_pmu_events_attr event_attr_##v = { \ - .attr = __ATTR(_name, 0444, perf_event_sysfs_show, NULL), \ - .id = 0, \ - .event_str = str, \ -}; - -struct rapl_pmu { - raw_spinlock_t lock; - int n_active; - int cpu; - struct list_head active_list; - struct pmu *pmu; - ktime_t timer_interval; - struct hrtimer hrtimer; -}; - -struct rapl_pmus { - struct pmu pmu; - unsigned int maxdie; - struct rapl_pmu *pmus[]; -}; - -struct rapl_model { - unsigned long events; - bool apply_quirk; -}; - - /* 1/2^hw_unit Joule */ -static int rapl_hw_unit[NR_RAPL_DOMAINS] __read_mostly; -static struct rapl_pmus *rapl_pmus; -static cpumask_t rapl_cpu_mask; -static unsigned int rapl_cntr_mask; -static u64 rapl_timer_ms; -static struct perf_msr rapl_msrs[]; - -static inline struct rapl_pmu *cpu_to_rapl_pmu(unsigned int cpu) -{ - unsigned int dieid = topology_logical_die_id(cpu); - - /* - * The unsigned check also catches the '-1' return value for non - * existent mappings in the topology map. - */ - return dieid < rapl_pmus->maxdie ? rapl_pmus->pmus[dieid] : NULL; -} - -static inline u64 rapl_read_counter(struct perf_event *event) -{ - u64 raw; - rdmsrl(event->hw.event_base, raw); - return raw; -} - -static inline u64 rapl_scale(u64 v, int cfg) -{ - if (cfg > NR_RAPL_DOMAINS) { - pr_warn("Invalid domain %d, failed to scale data\n", cfg); - return v; - } - /* - * scale delta to smallest unit (1/2^32) - * users must then scale back: count * 1/(1e9*2^32) to get Joules - * or use ldexp(count, -32). - * Watts = Joules/Time delta - */ - return v << (32 - rapl_hw_unit[cfg - 1]); -} - -static u64 rapl_event_update(struct perf_event *event) -{ - struct hw_perf_event *hwc = &event->hw; - u64 prev_raw_count, new_raw_count; - s64 delta, sdelta; - int shift = RAPL_CNTR_WIDTH; - -again: - prev_raw_count = local64_read(&hwc->prev_count); - rdmsrl(event->hw.event_base, new_raw_count); - - if (local64_cmpxchg(&hwc->prev_count, prev_raw_count, - new_raw_count) != prev_raw_count) { - cpu_relax(); - goto again; - } - - /* - * Now we have the new raw value and have updated the prev - * timestamp already. We can now calculate the elapsed delta - * (event-)time and add that to the generic event. - * - * Careful, not all hw sign-extends above the physical width - * of the count. - */ - delta = (new_raw_count << shift) - (prev_raw_count << shift); - delta >>= shift; - - sdelta = rapl_scale(delta, event->hw.config); - - local64_add(sdelta, &event->count); - - return new_raw_count; -} - -static void rapl_start_hrtimer(struct rapl_pmu *pmu) -{ - hrtimer_start(&pmu->hrtimer, pmu->timer_interval, - HRTIMER_MODE_REL_PINNED); -} - -static enum hrtimer_restart rapl_hrtimer_handle(struct hrtimer *hrtimer) -{ - struct rapl_pmu *pmu = container_of(hrtimer, struct rapl_pmu, hrtimer); - struct perf_event *event; - unsigned long flags; - - if (!pmu->n_active) - return HRTIMER_NORESTART; - - raw_spin_lock_irqsave(&pmu->lock, flags); - - list_for_each_entry(event, &pmu->active_list, active_entry) - rapl_event_update(event); - - raw_spin_unlock_irqrestore(&pmu->lock, flags); - - hrtimer_forward_now(hrtimer, pmu->timer_interval); - - return HRTIMER_RESTART; -} - -static void rapl_hrtimer_init(struct rapl_pmu *pmu) -{ - struct hrtimer *hr = &pmu->hrtimer; - - hrtimer_init(hr, CLOCK_MONOTONIC, HRTIMER_MODE_REL); - hr->function = rapl_hrtimer_handle; -} - -static void __rapl_pmu_event_start(struct rapl_pmu *pmu, - struct perf_event *event) -{ - if (WARN_ON_ONCE(!(event->hw.state & PERF_HES_STOPPED))) - return; - - event->hw.state = 0; - - list_add_tail(&event->active_entry, &pmu->active_list); - - local64_set(&event->hw.prev_count, rapl_read_counter(event)); - - pmu->n_active++; - if (pmu->n_active == 1) - rapl_start_hrtimer(pmu); -} - -static void rapl_pmu_event_start(struct perf_event *event, int mode) -{ - struct rapl_pmu *pmu = event->pmu_private; - unsigned long flags; - - raw_spin_lock_irqsave(&pmu->lock, flags); - __rapl_pmu_event_start(pmu, event); - raw_spin_unlock_irqrestore(&pmu->lock, flags); -} - -static void rapl_pmu_event_stop(struct perf_event *event, int mode) -{ - struct rapl_pmu *pmu = event->pmu_private; - struct hw_perf_event *hwc = &event->hw; - unsigned long flags; - - raw_spin_lock_irqsave(&pmu->lock, flags); - - /* mark event as deactivated and stopped */ - if (!(hwc->state & PERF_HES_STOPPED)) { - WARN_ON_ONCE(pmu->n_active <= 0); - pmu->n_active--; - if (pmu->n_active == 0) - hrtimer_cancel(&pmu->hrtimer); - - list_del(&event->active_entry); - - WARN_ON_ONCE(hwc->state & PERF_HES_STOPPED); - hwc->state |= PERF_HES_STOPPED; - } - - /* check if update of sw counter is necessary */ - if ((mode & PERF_EF_UPDATE) && !(hwc->state & PERF_HES_UPTODATE)) { - /* - * Drain the remaining delta count out of a event - * that we are disabling: - */ - rapl_event_update(event); - hwc->state |= PERF_HES_UPTODATE; - } - - raw_spin_unlock_irqrestore(&pmu->lock, flags); -} - -static int rapl_pmu_event_add(struct perf_event *event, int mode) -{ - struct rapl_pmu *pmu = event->pmu_private; - struct hw_perf_event *hwc = &event->hw; - unsigned long flags; - - raw_spin_lock_irqsave(&pmu->lock, flags); - - hwc->state = PERF_HES_UPTODATE | PERF_HES_STOPPED; - - if (mode & PERF_EF_START) - __rapl_pmu_event_start(pmu, event); - - raw_spin_unlock_irqrestore(&pmu->lock, flags); - - return 0; -} - -static void rapl_pmu_event_del(struct perf_event *event, int flags) -{ - rapl_pmu_event_stop(event, PERF_EF_UPDATE); -} - -static int rapl_pmu_event_init(struct perf_event *event) -{ - u64 cfg = event->attr.config & RAPL_EVENT_MASK; - int bit, ret = 0; - struct rapl_pmu *pmu; - - /* only look at RAPL events */ - if (event->attr.type != rapl_pmus->pmu.type) - return -ENOENT; - - /* check only supported bits are set */ - if (event->attr.config & ~RAPL_EVENT_MASK) - return -EINVAL; - - if (event->cpu < 0) - return -EINVAL; - - event->event_caps |= PERF_EV_CAP_READ_ACTIVE_PKG; - - if (!cfg || cfg >= NR_RAPL_DOMAINS + 1) - return -EINVAL; - - cfg = array_index_nospec((long)cfg, NR_RAPL_DOMAINS + 1); - bit = cfg - 1; - - /* check event supported */ - if (!(rapl_cntr_mask & (1 << bit))) - return -EINVAL; - - /* unsupported modes and filters */ - if (event->attr.sample_period) /* no sampling */ - return -EINVAL; - - /* must be done before validate_group */ - pmu = cpu_to_rapl_pmu(event->cpu); - if (!pmu) - return -EINVAL; - event->cpu = pmu->cpu; - event->pmu_private = pmu; - event->hw.event_base = rapl_msrs[bit].msr; - event->hw.config = cfg; - event->hw.idx = bit; - - return ret; -} - -static void rapl_pmu_event_read(struct perf_event *event) -{ - rapl_event_update(event); -} - -static ssize_t rapl_get_attr_cpumask(struct device *dev, - struct device_attribute *attr, char *buf) -{ - return cpumap_print_to_pagebuf(true, buf, &rapl_cpu_mask); -} - -static DEVICE_ATTR(cpumask, S_IRUGO, rapl_get_attr_cpumask, NULL); - -static struct attribute *rapl_pmu_attrs[] = { - &dev_attr_cpumask.attr, - NULL, -}; - -static struct attribute_group rapl_pmu_attr_group = { - .attrs = rapl_pmu_attrs, -}; - -RAPL_EVENT_ATTR_STR(energy-cores, rapl_cores, "event=0x01"); -RAPL_EVENT_ATTR_STR(energy-pkg , rapl_pkg, "event=0x02"); -RAPL_EVENT_ATTR_STR(energy-ram , rapl_ram, "event=0x03"); -RAPL_EVENT_ATTR_STR(energy-gpu , rapl_gpu, "event=0x04"); -RAPL_EVENT_ATTR_STR(energy-psys, rapl_psys, "event=0x05"); - -RAPL_EVENT_ATTR_STR(energy-cores.unit, rapl_cores_unit, "Joules"); -RAPL_EVENT_ATTR_STR(energy-pkg.unit , rapl_pkg_unit, "Joules"); -RAPL_EVENT_ATTR_STR(energy-ram.unit , rapl_ram_unit, "Joules"); -RAPL_EVENT_ATTR_STR(energy-gpu.unit , rapl_gpu_unit, "Joules"); -RAPL_EVENT_ATTR_STR(energy-psys.unit, rapl_psys_unit, "Joules"); - -/* - * we compute in 0.23 nJ increments regardless of MSR - */ -RAPL_EVENT_ATTR_STR(energy-cores.scale, rapl_cores_scale, "2.3283064365386962890625e-10"); -RAPL_EVENT_ATTR_STR(energy-pkg.scale, rapl_pkg_scale, "2.3283064365386962890625e-10"); -RAPL_EVENT_ATTR_STR(energy-ram.scale, rapl_ram_scale, "2.3283064365386962890625e-10"); -RAPL_EVENT_ATTR_STR(energy-gpu.scale, rapl_gpu_scale, "2.3283064365386962890625e-10"); -RAPL_EVENT_ATTR_STR(energy-psys.scale, rapl_psys_scale, "2.3283064365386962890625e-10"); - -/* - * There are no default events, but we need to create - * "events" group (with empty attrs) before updating - * it with detected events. - */ -static struct attribute *attrs_empty[] = { - NULL, -}; - -static struct attribute_group rapl_pmu_events_group = { - .name = "events", - .attrs = attrs_empty, -}; - -DEFINE_RAPL_FORMAT_ATTR(event, event, "config:0-7"); -static struct attribute *rapl_formats_attr[] = { - &format_attr_event.attr, - NULL, -}; - -static struct attribute_group rapl_pmu_format_group = { - .name = "format", - .attrs = rapl_formats_attr, -}; - -static const struct attribute_group *rapl_attr_groups[] = { - &rapl_pmu_attr_group, - &rapl_pmu_format_group, - &rapl_pmu_events_group, - NULL, -}; - -static struct attribute *rapl_events_cores[] = { - EVENT_PTR(rapl_cores), - EVENT_PTR(rapl_cores_unit), - EVENT_PTR(rapl_cores_scale), - NULL, -}; - -static struct attribute_group rapl_events_cores_group = { - .name = "events", - .attrs = rapl_events_cores, -}; - -static struct attribute *rapl_events_pkg[] = { - EVENT_PTR(rapl_pkg), - EVENT_PTR(rapl_pkg_unit), - EVENT_PTR(rapl_pkg_scale), - NULL, -}; - -static struct attribute_group rapl_events_pkg_group = { - .name = "events", - .attrs = rapl_events_pkg, -}; - -static struct attribute *rapl_events_ram[] = { - EVENT_PTR(rapl_ram), - EVENT_PTR(rapl_ram_unit), - EVENT_PTR(rapl_ram_scale), - NULL, -}; - -static struct attribute_group rapl_events_ram_group = { - .name = "events", - .attrs = rapl_events_ram, -}; - -static struct attribute *rapl_events_gpu[] = { - EVENT_PTR(rapl_gpu), - EVENT_PTR(rapl_gpu_unit), - EVENT_PTR(rapl_gpu_scale), - NULL, -}; - -static struct attribute_group rapl_events_gpu_group = { - .name = "events", - .attrs = rapl_events_gpu, -}; - -static struct attribute *rapl_events_psys[] = { - EVENT_PTR(rapl_psys), - EVENT_PTR(rapl_psys_unit), - EVENT_PTR(rapl_psys_scale), - NULL, -}; - -static struct attribute_group rapl_events_psys_group = { - .name = "events", - .attrs = rapl_events_psys, -}; - -static bool test_msr(int idx, void *data) -{ - return test_bit(idx, (unsigned long *) data); -} - -static struct perf_msr rapl_msrs[] = { - [PERF_RAPL_PP0] = { MSR_PP0_ENERGY_STATUS, &rapl_events_cores_group, test_msr }, - [PERF_RAPL_PKG] = { MSR_PKG_ENERGY_STATUS, &rapl_events_pkg_group, test_msr }, - [PERF_RAPL_RAM] = { MSR_DRAM_ENERGY_STATUS, &rapl_events_ram_group, test_msr }, - [PERF_RAPL_PP1] = { MSR_PP1_ENERGY_STATUS, &rapl_events_gpu_group, test_msr }, - [PERF_RAPL_PSYS] = { MSR_PLATFORM_ENERGY_STATUS, &rapl_events_psys_group, test_msr }, -}; - -static int rapl_cpu_offline(unsigned int cpu) -{ - struct rapl_pmu *pmu = cpu_to_rapl_pmu(cpu); - int target; - - /* Check if exiting cpu is used for collecting rapl events */ - if (!cpumask_test_and_clear_cpu(cpu, &rapl_cpu_mask)) - return 0; - - pmu->cpu = -1; - /* Find a new cpu to collect rapl events */ - target = cpumask_any_but(topology_die_cpumask(cpu), cpu); - - /* Migrate rapl events to the new target */ - if (target < nr_cpu_ids) { - cpumask_set_cpu(target, &rapl_cpu_mask); - pmu->cpu = target; - perf_pmu_migrate_context(pmu->pmu, cpu, target); - } - return 0; -} - -static int rapl_cpu_online(unsigned int cpu) -{ - struct rapl_pmu *pmu = cpu_to_rapl_pmu(cpu); - int target; - - if (!pmu) { - pmu = kzalloc_node(sizeof(*pmu), GFP_KERNEL, cpu_to_node(cpu)); - if (!pmu) - return -ENOMEM; - - raw_spin_lock_init(&pmu->lock); - INIT_LIST_HEAD(&pmu->active_list); - pmu->pmu = &rapl_pmus->pmu; - pmu->timer_interval = ms_to_ktime(rapl_timer_ms); - rapl_hrtimer_init(pmu); - - rapl_pmus->pmus[topology_logical_die_id(cpu)] = pmu; - } - - /* - * Check if there is an online cpu in the package which collects rapl - * events already. - */ - target = cpumask_any_and(&rapl_cpu_mask, topology_die_cpumask(cpu)); - if (target < nr_cpu_ids) - return 0; - - cpumask_set_cpu(cpu, &rapl_cpu_mask); - pmu->cpu = cpu; - return 0; -} - -static int rapl_check_hw_unit(bool apply_quirk) -{ - u64 msr_rapl_power_unit_bits; - int i; - - /* protect rdmsrl() to handle virtualization */ - if (rdmsrl_safe(MSR_RAPL_POWER_UNIT, &msr_rapl_power_unit_bits)) - return -1; - for (i = 0; i < NR_RAPL_DOMAINS; i++) - rapl_hw_unit[i] = (msr_rapl_power_unit_bits >> 8) & 0x1FULL; - - /* - * DRAM domain on HSW server and KNL has fixed energy unit which can be - * different than the unit from power unit MSR. See - * "Intel Xeon Processor E5-1600 and E5-2600 v3 Product Families, V2 - * of 2. Datasheet, September 2014, Reference Number: 330784-001 " - */ - if (apply_quirk) - rapl_hw_unit[PERF_RAPL_RAM] = 16; - - /* - * Calculate the timer rate: - * Use reference of 200W for scaling the timeout to avoid counter - * overflows. 200W = 200 Joules/sec - * Divide interval by 2 to avoid lockstep (2 * 100) - * if hw unit is 32, then we use 2 ms 1/200/2 - */ - rapl_timer_ms = 2; - if (rapl_hw_unit[0] < 32) { - rapl_timer_ms = (1000 / (2 * 100)); - rapl_timer_ms *= (1ULL << (32 - rapl_hw_unit[0] - 1)); - } - return 0; -} - -static void __init rapl_advertise(void) -{ - int i; - - pr_info("API unit is 2^-32 Joules, %d fixed counters, %llu ms ovfl timer\n", - hweight32(rapl_cntr_mask), rapl_timer_ms); - - for (i = 0; i < NR_RAPL_DOMAINS; i++) { - if (rapl_cntr_mask & (1 << i)) { - pr_info("hw unit of domain %s 2^-%d Joules\n", - rapl_domain_names[i], rapl_hw_unit[i]); - } - } -} - -static void cleanup_rapl_pmus(void) -{ - int i; - - for (i = 0; i < rapl_pmus->maxdie; i++) - kfree(rapl_pmus->pmus[i]); - kfree(rapl_pmus); -} - -static const struct attribute_group *rapl_attr_update[] = { - &rapl_events_cores_group, - &rapl_events_pkg_group, - &rapl_events_ram_group, - &rapl_events_gpu_group, - &rapl_events_gpu_group, - NULL, -}; - -static int __init init_rapl_pmus(void) -{ - int maxdie = topology_max_packages() * topology_max_die_per_package(); - size_t size; - - size = sizeof(*rapl_pmus) + maxdie * sizeof(struct rapl_pmu *); - rapl_pmus = kzalloc(size, GFP_KERNEL); - if (!rapl_pmus) - return -ENOMEM; - - rapl_pmus->maxdie = maxdie; - rapl_pmus->pmu.attr_groups = rapl_attr_groups; - rapl_pmus->pmu.attr_update = rapl_attr_update; - rapl_pmus->pmu.task_ctx_nr = perf_invalid_context; - rapl_pmus->pmu.event_init = rapl_pmu_event_init; - rapl_pmus->pmu.add = rapl_pmu_event_add; - rapl_pmus->pmu.del = rapl_pmu_event_del; - rapl_pmus->pmu.start = rapl_pmu_event_start; - rapl_pmus->pmu.stop = rapl_pmu_event_stop; - rapl_pmus->pmu.read = rapl_pmu_event_read; - rapl_pmus->pmu.module = THIS_MODULE; - rapl_pmus->pmu.capabilities = PERF_PMU_CAP_NO_EXCLUDE; - return 0; -} - -#define X86_RAPL_MODEL_MATCH(model, init) \ - { X86_VENDOR_INTEL, 6, model, X86_FEATURE_ANY, (unsigned long)&init } - -static struct rapl_model model_snb = { - .events = BIT(PERF_RAPL_PP0) | - BIT(PERF_RAPL_PKG) | - BIT(PERF_RAPL_PP1), - .apply_quirk = false, -}; - -static struct rapl_model model_snbep = { - .events = BIT(PERF_RAPL_PP0) | - BIT(PERF_RAPL_PKG) | - BIT(PERF_RAPL_RAM), - .apply_quirk = false, -}; - -static struct rapl_model model_hsw = { - .events = BIT(PERF_RAPL_PP0) | - BIT(PERF_RAPL_PKG) | - BIT(PERF_RAPL_RAM) | - BIT(PERF_RAPL_PP1), - .apply_quirk = false, -}; - -static struct rapl_model model_hsx = { - .events = BIT(PERF_RAPL_PP0) | - BIT(PERF_RAPL_PKG) | - BIT(PERF_RAPL_RAM), - .apply_quirk = true, -}; - -static struct rapl_model model_knl = { - .events = BIT(PERF_RAPL_PKG) | - BIT(PERF_RAPL_RAM), - .apply_quirk = true, -}; - -static struct rapl_model model_skl = { - .events = BIT(PERF_RAPL_PP0) | - BIT(PERF_RAPL_PKG) | - BIT(PERF_RAPL_RAM) | - BIT(PERF_RAPL_PP1) | - BIT(PERF_RAPL_PSYS), - .apply_quirk = false, -}; - -static const struct x86_cpu_id rapl_model_match[] __initconst = { - X86_RAPL_MODEL_MATCH(INTEL_FAM6_SANDYBRIDGE, model_snb), - X86_RAPL_MODEL_MATCH(INTEL_FAM6_SANDYBRIDGE_X, model_snbep), - X86_RAPL_MODEL_MATCH(INTEL_FAM6_IVYBRIDGE, model_snb), - X86_RAPL_MODEL_MATCH(INTEL_FAM6_IVYBRIDGE_X, model_snbep), - X86_RAPL_MODEL_MATCH(INTEL_FAM6_HASWELL, model_hsw), - X86_RAPL_MODEL_MATCH(INTEL_FAM6_HASWELL_X, model_hsx), - X86_RAPL_MODEL_MATCH(INTEL_FAM6_HASWELL_L, model_hsw), - X86_RAPL_MODEL_MATCH(INTEL_FAM6_HASWELL_G, model_hsw), - X86_RAPL_MODEL_MATCH(INTEL_FAM6_BROADWELL, model_hsw), - X86_RAPL_MODEL_MATCH(INTEL_FAM6_BROADWELL_G, model_hsw), - X86_RAPL_MODEL_MATCH(INTEL_FAM6_BROADWELL_X, model_hsx), - X86_RAPL_MODEL_MATCH(INTEL_FAM6_BROADWELL_D, model_hsx), - X86_RAPL_MODEL_MATCH(INTEL_FAM6_XEON_PHI_KNL, model_knl), - X86_RAPL_MODEL_MATCH(INTEL_FAM6_XEON_PHI_KNM, model_knl), - X86_RAPL_MODEL_MATCH(INTEL_FAM6_SKYLAKE_L, model_skl), - X86_RAPL_MODEL_MATCH(INTEL_FAM6_SKYLAKE, model_skl), - X86_RAPL_MODEL_MATCH(INTEL_FAM6_SKYLAKE_X, model_hsx), - X86_RAPL_MODEL_MATCH(INTEL_FAM6_KABYLAKE_L, model_skl), - X86_RAPL_MODEL_MATCH(INTEL_FAM6_KABYLAKE, model_skl), - X86_RAPL_MODEL_MATCH(INTEL_FAM6_CANNONLAKE_L, model_skl), - X86_RAPL_MODEL_MATCH(INTEL_FAM6_ATOM_GOLDMONT, model_hsw), - X86_RAPL_MODEL_MATCH(INTEL_FAM6_ATOM_GOLDMONT_D, model_hsw), - X86_RAPL_MODEL_MATCH(INTEL_FAM6_ATOM_GOLDMONT_PLUS, model_hsw), - X86_RAPL_MODEL_MATCH(INTEL_FAM6_ICELAKE_L, model_skl), - X86_RAPL_MODEL_MATCH(INTEL_FAM6_ICELAKE, model_skl), - X86_RAPL_MODEL_MATCH(INTEL_FAM6_COMETLAKE_L, model_skl), - X86_RAPL_MODEL_MATCH(INTEL_FAM6_COMETLAKE, model_skl), - {}, -}; - -MODULE_DEVICE_TABLE(x86cpu, rapl_model_match); - -static int __init rapl_pmu_init(void) -{ - const struct x86_cpu_id *id; - struct rapl_model *rm; - int ret; - - id = x86_match_cpu(rapl_model_match); - if (!id) - return -ENODEV; - - rm = (struct rapl_model *) id->driver_data; - rapl_cntr_mask = perf_msr_probe(rapl_msrs, PERF_RAPL_MAX, - false, (void *) &rm->events); - - ret = rapl_check_hw_unit(rm->apply_quirk); - if (ret) - return ret; - - ret = init_rapl_pmus(); - if (ret) - return ret; - - /* - * Install callbacks. Core will call them for each online cpu. - */ - ret = cpuhp_setup_state(CPUHP_AP_PERF_X86_RAPL_ONLINE, - "perf/x86/rapl:online", - rapl_cpu_online, rapl_cpu_offline); - if (ret) - goto out; - - ret = perf_pmu_register(&rapl_pmus->pmu, "power", -1); - if (ret) - goto out1; - - rapl_advertise(); - return 0; - -out1: - cpuhp_remove_state(CPUHP_AP_PERF_X86_RAPL_ONLINE); -out: - pr_warn("Initialization failed (%d), disabled\n", ret); - cleanup_rapl_pmus(); - return ret; -} -module_init(rapl_pmu_init); - -static void __exit intel_rapl_exit(void) -{ - cpuhp_remove_state_nocalls(CPUHP_AP_PERF_X86_RAPL_ONLINE); - perf_pmu_unregister(&rapl_pmus->pmu); - cleanup_rapl_pmus(); -} -module_exit(intel_rapl_exit); diff --git a/arch/x86/events/intel/uncore.c b/arch/x86/events/intel/uncore.c index 86467f85c383..6f1ccc57a692 100644 --- a/arch/x86/events/intel/uncore.c +++ b/arch/x86/events/intel/uncore.c @@ -4,19 +4,26 @@ #include <asm/cpu_device_id.h> #include <asm/intel-family.h> #include "uncore.h" +#include "uncore_discovery.h" -static struct intel_uncore_type *empty_uncore[] = { NULL, }; +static bool uncore_no_discover; +module_param(uncore_no_discover, bool, 0); +MODULE_PARM_DESC(uncore_no_discover, "Don't enable the Intel uncore PerfMon discovery mechanism " + "(default: enable the discovery mechanism)."); +struct intel_uncore_type *empty_uncore[] = { NULL, }; struct intel_uncore_type **uncore_msr_uncores = empty_uncore; struct intel_uncore_type **uncore_pci_uncores = empty_uncore; struct intel_uncore_type **uncore_mmio_uncores = empty_uncore; static bool pcidrv_registered; struct pci_driver *uncore_pci_driver; +/* The PCI driver for the device which the uncore doesn't own. */ +struct pci_driver *uncore_pci_sub_driver; /* pci bus to socket mapping */ DEFINE_RAW_SPINLOCK(pci2phy_map_lock); struct list_head pci2phy_map_head = LIST_HEAD_INIT(pci2phy_map_head); struct pci_extra_dev *uncore_extra_pci_dev; -static int max_dies; +int __uncore_max_dies; /* mask of cpus that collect uncore events */ static cpumask_t uncore_cpu_mask; @@ -29,21 +36,33 @@ struct event_constraint uncore_constraint_empty = MODULE_LICENSE("GPL"); -int uncore_pcibus_to_physid(struct pci_bus *bus) +int uncore_pcibus_to_dieid(struct pci_bus *bus) { struct pci2phy_map *map; - int phys_id = -1; + int die_id = -1; raw_spin_lock(&pci2phy_map_lock); list_for_each_entry(map, &pci2phy_map_head, list) { if (map->segment == pci_domain_nr(bus)) { - phys_id = map->pbus_to_physid[bus->number]; + die_id = map->pbus_to_dieid[bus->number]; break; } } raw_spin_unlock(&pci2phy_map_lock); - return phys_id; + return die_id; +} + +int uncore_die_to_segment(int die) +{ + struct pci_bus *bus = NULL; + + /* Find first pci bus which attributes to specified die. */ + while ((bus = pci_find_next_bus(bus)) && + (die != uncore_pcibus_to_dieid(bus))) + ; + + return bus ? pci_domain_nr(bus) : -EINVAL; } static void uncore_free_pcibus_map(void) @@ -84,7 +103,7 @@ lookup: alloc = NULL; map->segment = segment; for (i = 0; i < 256; i++) - map->pbus_to_physid[i] = -1; + map->pbus_to_dieid[i] = -1; list_add_tail(&map->list, &pci2phy_map_head); end: @@ -92,8 +111,8 @@ end: return map; } -ssize_t uncore_event_show(struct kobject *kobj, - struct kobj_attribute *attr, char *buf) +ssize_t uncore_event_show(struct device *dev, + struct device_attribute *attr, char *buf) { struct uncore_event_desc *event = container_of(attr, struct uncore_event_desc, attr); @@ -108,7 +127,7 @@ struct intel_uncore_box *uncore_pmu_to_box(struct intel_uncore_pmu *pmu, int cpu * The unsigned check also catches the '-1' return value for non * existent mappings in the topology map. */ - return dieid < max_dies ? pmu->boxes[dieid] : NULL; + return dieid < uncore_max_dies() ? pmu->boxes[dieid] : NULL; } u64 uncore_msr_read_counter(struct intel_uncore_box *box, struct perf_event *event) @@ -132,6 +151,9 @@ u64 uncore_mmio_read_counter(struct intel_uncore_box *box, if (!box->io_addr) return 0; + if (!uncore_mmio_is_valid_offset(box, event->hw.event_base)) + return 0; + return readq(box->io_addr + event->hw.event_base); } @@ -327,7 +349,6 @@ static struct intel_uncore_box *uncore_alloc_box(struct intel_uncore_type *type, uncore_pmu_init_hrtimer(box); box->cpu = -1; - box->pci_phys_id = -1; box->dieid = -1; /* set default hrtimer timeout */ @@ -780,8 +801,6 @@ static void uncore_pmu_enable(struct pmu *pmu) struct intel_uncore_box *box; uncore_pmu = container_of(pmu, struct intel_uncore_pmu, pmu); - if (!uncore_pmu) - return; box = uncore_pmu_to_box(uncore_pmu, smp_processor_id()); if (!box) @@ -797,8 +816,6 @@ static void uncore_pmu_disable(struct pmu *pmu) struct intel_uncore_box *box; uncore_pmu = container_of(pmu, struct intel_uncore_pmu, pmu); - if (!uncore_pmu) - return; box = uncore_pmu_to_box(uncore_pmu, smp_processor_id()); if (!box) @@ -825,6 +842,45 @@ static const struct attribute_group uncore_pmu_attr_group = { .attrs = uncore_pmu_attrs, }; +void uncore_get_alias_name(char *pmu_name, struct intel_uncore_pmu *pmu) +{ + struct intel_uncore_type *type = pmu->type; + + if (type->num_boxes == 1) + sprintf(pmu_name, "uncore_type_%u", type->type_id); + else { + sprintf(pmu_name, "uncore_type_%u_%d", + type->type_id, type->box_ids[pmu->pmu_idx]); + } +} + +static void uncore_get_pmu_name(struct intel_uncore_pmu *pmu) +{ + struct intel_uncore_type *type = pmu->type; + + /* + * No uncore block name in discovery table. + * Use uncore_type_&typeid_&boxid as name. + */ + if (!type->name) { + uncore_get_alias_name(pmu->name, pmu); + return; + } + + if (type->num_boxes == 1) { + if (strlen(type->name) > 0) + sprintf(pmu->name, "uncore_%s", type->name); + else + sprintf(pmu->name, "uncore"); + } else { + /* + * Use the box ID from the discovery table if applicable. + */ + sprintf(pmu->name, "uncore_%s_%d", type->name, + type->box_ids ? type->box_ids[pmu->pmu_idx] : pmu->pmu_idx); + } +} + static int uncore_pmu_register(struct intel_uncore_pmu *pmu) { int ret; @@ -843,21 +899,15 @@ static int uncore_pmu_register(struct intel_uncore_pmu *pmu) .read = uncore_pmu_event_read, .module = THIS_MODULE, .capabilities = PERF_PMU_CAP_NO_EXCLUDE, + .attr_update = pmu->type->attr_update, }; } else { pmu->pmu = *pmu->type->pmu; pmu->pmu.attr_groups = pmu->type->attr_groups; + pmu->pmu.attr_update = pmu->type->attr_update; } - if (pmu->type->num_boxes == 1) { - if (strlen(pmu->type->name) > 0) - sprintf(pmu->name, "uncore_%s", pmu->type->name); - else - sprintf(pmu->name, "uncore"); - } else { - sprintf(pmu->name, "uncore_%s_%d", pmu->type->name, - pmu->pmu_idx); - } + uncore_get_pmu_name(pmu); ret = perf_pmu_register(&pmu->pmu, pmu->name, -1); if (!ret) @@ -877,7 +927,7 @@ static void uncore_free_boxes(struct intel_uncore_pmu *pmu) { int die; - for (die = 0; die < max_dies; die++) + for (die = 0; die < uncore_max_dies(); die++) kfree(pmu->boxes[die]); kfree(pmu->boxes); } @@ -887,6 +937,9 @@ static void uncore_type_exit(struct intel_uncore_type *type) struct intel_uncore_pmu *pmu = type->pmus; int i; + if (type->cleanup_mapping) + type->cleanup_mapping(type); + if (pmu) { for (i = 0; i < type->num_boxes; i++, pmu++) { uncore_pmu_unregister(pmu); @@ -895,6 +948,10 @@ static void uncore_type_exit(struct intel_uncore_type *type) kfree(type->pmus); type->pmus = NULL; } + if (type->box_ids) { + kfree(type->box_ids); + type->box_ids = NULL; + } kfree(type->events_group); type->events_group = NULL; } @@ -915,7 +972,7 @@ static int __init uncore_type_init(struct intel_uncore_type *type, bool setid) if (!pmus) return -ENOMEM; - size = max_dies * sizeof(struct intel_uncore_box *); + size = uncore_max_dies() * sizeof(struct intel_uncore_box *); for (i = 0; i < type->num_boxes; i++) { pmus[i].func_id = setid ? i : -1; @@ -954,6 +1011,9 @@ static int __init uncore_type_init(struct intel_uncore_type *type, bool setid) type->pmu_group = &uncore_pmu_attr_group; + if (type->set_mapping) + type->set_mapping(type); + return 0; err: @@ -978,65 +1038,93 @@ uncore_types_init(struct intel_uncore_type **types, bool setid) } /* - * add a pci uncore device + * Get the die information of a PCI device. + * @pdev: The PCI device. + * @die: The die id which the device maps to. */ -static int uncore_pci_probe(struct pci_dev *pdev, const struct pci_device_id *id) +static int uncore_pci_get_dev_die_info(struct pci_dev *pdev, int *die) { - struct intel_uncore_type *type; - struct intel_uncore_pmu *pmu = NULL; - struct intel_uncore_box *box; - int phys_id, die, ret; - - phys_id = uncore_pcibus_to_physid(pdev->bus); - if (phys_id < 0) - return -ENODEV; - - die = (topology_max_die_per_package() > 1) ? phys_id : - topology_phys_to_logical_pkg(phys_id); - if (die < 0) + *die = uncore_pcibus_to_dieid(pdev->bus); + if (*die < 0) return -EINVAL; - if (UNCORE_PCI_DEV_TYPE(id->driver_data) == UNCORE_EXTRA_PCI_DEV) { - int idx = UNCORE_PCI_DEV_IDX(id->driver_data); + return 0; +} - uncore_extra_pci_dev[die].dev[idx] = pdev; - pci_set_drvdata(pdev, NULL); - return 0; +static struct intel_uncore_pmu * +uncore_pci_find_dev_pmu_from_types(struct pci_dev *pdev) +{ + struct intel_uncore_type **types = uncore_pci_uncores; + struct intel_uncore_type *type; + u64 box_ctl; + int i, die; + + for (; *types; types++) { + type = *types; + for (die = 0; die < __uncore_max_dies; die++) { + for (i = 0; i < type->num_boxes; i++) { + if (!type->box_ctls[die]) + continue; + box_ctl = type->box_ctls[die] + type->pci_offsets[i]; + if (pdev->devfn == UNCORE_DISCOVERY_PCI_DEVFN(box_ctl) && + pdev->bus->number == UNCORE_DISCOVERY_PCI_BUS(box_ctl) && + pci_domain_nr(pdev->bus) == UNCORE_DISCOVERY_PCI_DOMAIN(box_ctl)) + return &type->pmus[i]; + } + } } - type = uncore_pci_uncores[UNCORE_PCI_DEV_TYPE(id->driver_data)]; + return NULL; +} - /* - * Some platforms, e.g. Knights Landing, use a common PCI device ID - * for multiple instances of an uncore PMU device type. We should check - * PCI slot and func to indicate the uncore box. - */ - if (id->driver_data & ~0xffff) { - struct pci_driver *pci_drv = pdev->driver; - const struct pci_device_id *ids = pci_drv->id_table; - unsigned int devfn; - - while (ids && ids->vendor) { - if ((ids->vendor == pdev->vendor) && - (ids->device == pdev->device)) { - devfn = PCI_DEVFN(UNCORE_PCI_DEV_DEV(ids->driver_data), - UNCORE_PCI_DEV_FUNC(ids->driver_data)); - if (devfn == pdev->devfn) { - pmu = &type->pmus[UNCORE_PCI_DEV_IDX(ids->driver_data)]; - break; - } +/* + * Find the PMU of a PCI device. + * @pdev: The PCI device. + * @ids: The ID table of the available PCI devices with a PMU. + * If NULL, search the whole uncore_pci_uncores. + */ +static struct intel_uncore_pmu * +uncore_pci_find_dev_pmu(struct pci_dev *pdev, const struct pci_device_id *ids) +{ + struct intel_uncore_pmu *pmu = NULL; + struct intel_uncore_type *type; + kernel_ulong_t data; + unsigned int devfn; + + if (!ids) + return uncore_pci_find_dev_pmu_from_types(pdev); + + while (ids && ids->vendor) { + if ((ids->vendor == pdev->vendor) && + (ids->device == pdev->device)) { + data = ids->driver_data; + devfn = PCI_DEVFN(UNCORE_PCI_DEV_DEV(data), + UNCORE_PCI_DEV_FUNC(data)); + if (devfn == pdev->devfn) { + type = uncore_pci_uncores[UNCORE_PCI_DEV_TYPE(data)]; + pmu = &type->pmus[UNCORE_PCI_DEV_IDX(data)]; + break; } - ids++; } - if (pmu == NULL) - return -ENODEV; - } else { - /* - * for performance monitoring unit with multiple boxes, - * each box has a different function id. - */ - pmu = &type->pmus[UNCORE_PCI_DEV_IDX(id->driver_data)]; + ids++; } + return pmu; +} + +/* + * Register the PMU for a PCI device + * @pdev: The PCI device. + * @type: The corresponding PMU type of the device. + * @pmu: The corresponding PMU of the device. + * @die: The die id which the device maps to. + */ +static int uncore_pci_pmu_register(struct pci_dev *pdev, + struct intel_uncore_type *type, + struct intel_uncore_pmu *pmu, + int die) +{ + struct intel_uncore_box *box; + int ret; if (WARN_ON_ONCE(pmu->boxes[die] != NULL)) return -EINVAL; @@ -1051,12 +1139,10 @@ static int uncore_pci_probe(struct pci_dev *pdev, const struct pci_device_id *id WARN_ON_ONCE(pmu->func_id != pdev->devfn); atomic_inc(&box->refcnt); - box->pci_phys_id = phys_id; box->dieid = die; box->pci_dev = pdev; box->pmu = pmu; uncore_box_init(box); - pci_set_drvdata(pdev, box); pmu->boxes[die] = box; if (atomic_inc_return(&pmu->activeboxes) > 1) @@ -1065,7 +1151,6 @@ static int uncore_pci_probe(struct pci_dev *pdev, const struct pci_device_id *id /* First active box registers the pmu */ ret = uncore_pmu_register(pmu); if (ret) { - pci_set_drvdata(pdev, NULL); pmu->boxes[die] = NULL; uncore_box_exit(box); kfree(box); @@ -1073,18 +1158,82 @@ static int uncore_pci_probe(struct pci_dev *pdev, const struct pci_device_id *id return ret; } +/* + * add a pci uncore device + */ +static int uncore_pci_probe(struct pci_dev *pdev, const struct pci_device_id *id) +{ + struct intel_uncore_type *type; + struct intel_uncore_pmu *pmu = NULL; + int die, ret; + + ret = uncore_pci_get_dev_die_info(pdev, &die); + if (ret) + return ret; + + if (UNCORE_PCI_DEV_TYPE(id->driver_data) == UNCORE_EXTRA_PCI_DEV) { + int idx = UNCORE_PCI_DEV_IDX(id->driver_data); + + uncore_extra_pci_dev[die].dev[idx] = pdev; + pci_set_drvdata(pdev, NULL); + return 0; + } + + type = uncore_pci_uncores[UNCORE_PCI_DEV_TYPE(id->driver_data)]; + + /* + * Some platforms, e.g. Knights Landing, use a common PCI device ID + * for multiple instances of an uncore PMU device type. We should check + * PCI slot and func to indicate the uncore box. + */ + if (id->driver_data & ~0xffff) { + struct pci_driver *pci_drv = to_pci_driver(pdev->dev.driver); + + pmu = uncore_pci_find_dev_pmu(pdev, pci_drv->id_table); + if (pmu == NULL) + return -ENODEV; + } else { + /* + * for performance monitoring unit with multiple boxes, + * each box has a different function id. + */ + pmu = &type->pmus[UNCORE_PCI_DEV_IDX(id->driver_data)]; + } + + ret = uncore_pci_pmu_register(pdev, type, pmu, die); + + pci_set_drvdata(pdev, pmu->boxes[die]); + + return ret; +} + +/* + * Unregister the PMU of a PCI device + * @pmu: The corresponding PMU is unregistered. + * @die: The die id which the device maps to. + */ +static void uncore_pci_pmu_unregister(struct intel_uncore_pmu *pmu, int die) +{ + struct intel_uncore_box *box = pmu->boxes[die]; + + pmu->boxes[die] = NULL; + if (atomic_dec_return(&pmu->activeboxes) == 0) + uncore_pmu_unregister(pmu); + uncore_box_exit(box); + kfree(box); +} + static void uncore_pci_remove(struct pci_dev *pdev) { struct intel_uncore_box *box; struct intel_uncore_pmu *pmu; - int i, phys_id, die; + int i, die; - phys_id = uncore_pcibus_to_physid(pdev->bus); + if (uncore_pci_get_dev_die_info(pdev, &die)) + return; box = pci_get_drvdata(pdev); if (!box) { - die = (topology_max_die_per_package() > 1) ? phys_id : - topology_phys_to_logical_pkg(phys_id); for (i = 0; i < UNCORE_EXTRA_PCI_DEV_MAX; i++) { if (uncore_extra_pci_dev[die].dev[i] == pdev) { uncore_extra_pci_dev[die].dev[i] = NULL; @@ -1096,15 +1245,133 @@ static void uncore_pci_remove(struct pci_dev *pdev) } pmu = box->pmu; - if (WARN_ON_ONCE(phys_id != box->pci_phys_id)) - return; pci_set_drvdata(pdev, NULL); - pmu->boxes[box->dieid] = NULL; - if (atomic_dec_return(&pmu->activeboxes) == 0) - uncore_pmu_unregister(pmu); - uncore_box_exit(box); - kfree(box); + + uncore_pci_pmu_unregister(pmu, die); +} + +static int uncore_bus_notify(struct notifier_block *nb, + unsigned long action, void *data, + const struct pci_device_id *ids) +{ + struct device *dev = data; + struct pci_dev *pdev = to_pci_dev(dev); + struct intel_uncore_pmu *pmu; + int die; + + /* Unregister the PMU when the device is going to be deleted. */ + if (action != BUS_NOTIFY_DEL_DEVICE) + return NOTIFY_DONE; + + pmu = uncore_pci_find_dev_pmu(pdev, ids); + if (!pmu) + return NOTIFY_DONE; + + if (uncore_pci_get_dev_die_info(pdev, &die)) + return NOTIFY_DONE; + + uncore_pci_pmu_unregister(pmu, die); + + return NOTIFY_OK; +} + +static int uncore_pci_sub_bus_notify(struct notifier_block *nb, + unsigned long action, void *data) +{ + return uncore_bus_notify(nb, action, data, + uncore_pci_sub_driver->id_table); +} + +static struct notifier_block uncore_pci_sub_notifier = { + .notifier_call = uncore_pci_sub_bus_notify, +}; + +static void uncore_pci_sub_driver_init(void) +{ + const struct pci_device_id *ids = uncore_pci_sub_driver->id_table; + struct intel_uncore_type *type; + struct intel_uncore_pmu *pmu; + struct pci_dev *pci_sub_dev; + bool notify = false; + unsigned int devfn; + int die; + + while (ids && ids->vendor) { + pci_sub_dev = NULL; + type = uncore_pci_uncores[UNCORE_PCI_DEV_TYPE(ids->driver_data)]; + /* + * Search the available device, and register the + * corresponding PMU. + */ + while ((pci_sub_dev = pci_get_device(PCI_VENDOR_ID_INTEL, + ids->device, pci_sub_dev))) { + devfn = PCI_DEVFN(UNCORE_PCI_DEV_DEV(ids->driver_data), + UNCORE_PCI_DEV_FUNC(ids->driver_data)); + if (devfn != pci_sub_dev->devfn) + continue; + + pmu = &type->pmus[UNCORE_PCI_DEV_IDX(ids->driver_data)]; + if (!pmu) + continue; + + if (uncore_pci_get_dev_die_info(pci_sub_dev, &die)) + continue; + + if (!uncore_pci_pmu_register(pci_sub_dev, type, pmu, + die)) + notify = true; + } + ids++; + } + + if (notify && bus_register_notifier(&pci_bus_type, &uncore_pci_sub_notifier)) + notify = false; + + if (!notify) + uncore_pci_sub_driver = NULL; +} + +static int uncore_pci_bus_notify(struct notifier_block *nb, + unsigned long action, void *data) +{ + return uncore_bus_notify(nb, action, data, NULL); +} + +static struct notifier_block uncore_pci_notifier = { + .notifier_call = uncore_pci_bus_notify, +}; + + +static void uncore_pci_pmus_register(void) +{ + struct intel_uncore_type **types = uncore_pci_uncores; + struct intel_uncore_type *type; + struct intel_uncore_pmu *pmu; + struct pci_dev *pdev; + u64 box_ctl; + int i, die; + + for (; *types; types++) { + type = *types; + for (die = 0; die < __uncore_max_dies; die++) { + for (i = 0; i < type->num_boxes; i++) { + if (!type->box_ctls[die]) + continue; + box_ctl = type->box_ctls[die] + type->pci_offsets[i]; + pdev = pci_get_domain_bus_and_slot(UNCORE_DISCOVERY_PCI_DOMAIN(box_ctl), + UNCORE_DISCOVERY_PCI_BUS(box_ctl), + UNCORE_DISCOVERY_PCI_DEVFN(box_ctl)); + if (!pdev) + continue; + pmu = &type->pmus[i]; + + uncore_pci_pmu_register(pdev, type, pmu, die); + } + } + } + + bus_register_notifier(&pci_bus_type, &uncore_pci_notifier); } static int __init uncore_pci_init(void) @@ -1112,7 +1379,7 @@ static int __init uncore_pci_init(void) size_t size; int ret; - size = max_dies * sizeof(struct pci_extra_dev); + size = uncore_max_dies() * sizeof(struct pci_extra_dev); uncore_extra_pci_dev = kzalloc(size, GFP_KERNEL); if (!uncore_extra_pci_dev) { ret = -ENOMEM; @@ -1123,12 +1390,18 @@ static int __init uncore_pci_init(void) if (ret) goto errtype; - uncore_pci_driver->probe = uncore_pci_probe; - uncore_pci_driver->remove = uncore_pci_remove; + if (uncore_pci_driver) { + uncore_pci_driver->probe = uncore_pci_probe; + uncore_pci_driver->remove = uncore_pci_remove; - ret = pci_register_driver(uncore_pci_driver); - if (ret) - goto errtype; + ret = pci_register_driver(uncore_pci_driver); + if (ret) + goto errtype; + } else + uncore_pci_pmus_register(); + + if (uncore_pci_sub_driver) + uncore_pci_sub_driver_init(); pcidrv_registered = true; return 0; @@ -1147,7 +1420,12 @@ static void uncore_pci_exit(void) { if (pcidrv_registered) { pcidrv_registered = false; - pci_unregister_driver(uncore_pci_driver); + if (uncore_pci_sub_driver) + bus_unregister_notifier(&pci_bus_type, &uncore_pci_sub_notifier); + if (uncore_pci_driver) + pci_unregister_driver(uncore_pci_driver); + else + bus_unregister_notifier(&pci_bus_type, &uncore_pci_notifier); uncore_types_exit(uncore_pci_uncores); kfree(uncore_extra_pci_dev); uncore_free_pcibus_map(); @@ -1392,14 +1670,11 @@ err: return ret; } - -#define X86_UNCORE_MODEL_MATCH(model, init) \ - { X86_VENDOR_INTEL, 6, model, X86_FEATURE_ANY, (unsigned long)&init } - struct intel_uncore_init_fun { void (*cpu_init)(void); int (*pci_init)(void); void (*mmio_init)(void); + bool use_discovery; }; static const struct intel_uncore_init_fun nhm_uncore_init __initconst = { @@ -1470,45 +1745,97 @@ static const struct intel_uncore_init_fun icl_uncore_init __initconst = { .pci_init = skl_uncore_pci_init, }; +static const struct intel_uncore_init_fun tgl_uncore_init __initconst = { + .cpu_init = tgl_uncore_cpu_init, + .mmio_init = tgl_uncore_mmio_init, +}; + +static const struct intel_uncore_init_fun tgl_l_uncore_init __initconst = { + .cpu_init = tgl_uncore_cpu_init, + .mmio_init = tgl_l_uncore_mmio_init, +}; + +static const struct intel_uncore_init_fun rkl_uncore_init __initconst = { + .cpu_init = tgl_uncore_cpu_init, + .pci_init = skl_uncore_pci_init, +}; + +static const struct intel_uncore_init_fun adl_uncore_init __initconst = { + .cpu_init = adl_uncore_cpu_init, + .mmio_init = adl_uncore_mmio_init, +}; + +static const struct intel_uncore_init_fun icx_uncore_init __initconst = { + .cpu_init = icx_uncore_cpu_init, + .pci_init = icx_uncore_pci_init, + .mmio_init = icx_uncore_mmio_init, +}; + static const struct intel_uncore_init_fun snr_uncore_init __initconst = { .cpu_init = snr_uncore_cpu_init, .pci_init = snr_uncore_pci_init, .mmio_init = snr_uncore_mmio_init, }; +static const struct intel_uncore_init_fun spr_uncore_init __initconst = { + .cpu_init = spr_uncore_cpu_init, + .pci_init = spr_uncore_pci_init, + .mmio_init = spr_uncore_mmio_init, + .use_discovery = true, +}; + +static const struct intel_uncore_init_fun generic_uncore_init __initconst = { + .cpu_init = intel_uncore_generic_uncore_cpu_init, + .pci_init = intel_uncore_generic_uncore_pci_init, + .mmio_init = intel_uncore_generic_uncore_mmio_init, +}; + static const struct x86_cpu_id intel_uncore_match[] __initconst = { - X86_UNCORE_MODEL_MATCH(INTEL_FAM6_NEHALEM_EP, nhm_uncore_init), - X86_UNCORE_MODEL_MATCH(INTEL_FAM6_NEHALEM, nhm_uncore_init), - X86_UNCORE_MODEL_MATCH(INTEL_FAM6_WESTMERE, nhm_uncore_init), - X86_UNCORE_MODEL_MATCH(INTEL_FAM6_WESTMERE_EP, nhm_uncore_init), - X86_UNCORE_MODEL_MATCH(INTEL_FAM6_SANDYBRIDGE, snb_uncore_init), - X86_UNCORE_MODEL_MATCH(INTEL_FAM6_IVYBRIDGE, ivb_uncore_init), - X86_UNCORE_MODEL_MATCH(INTEL_FAM6_HASWELL, hsw_uncore_init), - X86_UNCORE_MODEL_MATCH(INTEL_FAM6_HASWELL_L, hsw_uncore_init), - X86_UNCORE_MODEL_MATCH(INTEL_FAM6_HASWELL_G, hsw_uncore_init), - X86_UNCORE_MODEL_MATCH(INTEL_FAM6_BROADWELL, bdw_uncore_init), - X86_UNCORE_MODEL_MATCH(INTEL_FAM6_BROADWELL_G, bdw_uncore_init), - X86_UNCORE_MODEL_MATCH(INTEL_FAM6_SANDYBRIDGE_X, snbep_uncore_init), - X86_UNCORE_MODEL_MATCH(INTEL_FAM6_NEHALEM_EX, nhmex_uncore_init), - X86_UNCORE_MODEL_MATCH(INTEL_FAM6_WESTMERE_EX, nhmex_uncore_init), - X86_UNCORE_MODEL_MATCH(INTEL_FAM6_IVYBRIDGE_X, ivbep_uncore_init), - X86_UNCORE_MODEL_MATCH(INTEL_FAM6_HASWELL_X, hswep_uncore_init), - X86_UNCORE_MODEL_MATCH(INTEL_FAM6_BROADWELL_X, bdx_uncore_init), - X86_UNCORE_MODEL_MATCH(INTEL_FAM6_BROADWELL_D, bdx_uncore_init), - X86_UNCORE_MODEL_MATCH(INTEL_FAM6_XEON_PHI_KNL, knl_uncore_init), - X86_UNCORE_MODEL_MATCH(INTEL_FAM6_XEON_PHI_KNM, knl_uncore_init), - X86_UNCORE_MODEL_MATCH(INTEL_FAM6_SKYLAKE, skl_uncore_init), - X86_UNCORE_MODEL_MATCH(INTEL_FAM6_SKYLAKE_L, skl_uncore_init), - X86_UNCORE_MODEL_MATCH(INTEL_FAM6_SKYLAKE_X, skx_uncore_init), - X86_UNCORE_MODEL_MATCH(INTEL_FAM6_KABYLAKE_L, skl_uncore_init), - X86_UNCORE_MODEL_MATCH(INTEL_FAM6_KABYLAKE, skl_uncore_init), - X86_UNCORE_MODEL_MATCH(INTEL_FAM6_ICELAKE_L, icl_uncore_init), - X86_UNCORE_MODEL_MATCH(INTEL_FAM6_ICELAKE_NNPI, icl_uncore_init), - X86_UNCORE_MODEL_MATCH(INTEL_FAM6_ICELAKE, icl_uncore_init), - X86_UNCORE_MODEL_MATCH(INTEL_FAM6_ATOM_TREMONT_D, snr_uncore_init), + X86_MATCH_INTEL_FAM6_MODEL(NEHALEM_EP, &nhm_uncore_init), + X86_MATCH_INTEL_FAM6_MODEL(NEHALEM, &nhm_uncore_init), + X86_MATCH_INTEL_FAM6_MODEL(WESTMERE, &nhm_uncore_init), + X86_MATCH_INTEL_FAM6_MODEL(WESTMERE_EP, &nhm_uncore_init), + X86_MATCH_INTEL_FAM6_MODEL(SANDYBRIDGE, &snb_uncore_init), + X86_MATCH_INTEL_FAM6_MODEL(IVYBRIDGE, &ivb_uncore_init), + X86_MATCH_INTEL_FAM6_MODEL(HASWELL, &hsw_uncore_init), + X86_MATCH_INTEL_FAM6_MODEL(HASWELL_L, &hsw_uncore_init), + X86_MATCH_INTEL_FAM6_MODEL(HASWELL_G, &hsw_uncore_init), + X86_MATCH_INTEL_FAM6_MODEL(BROADWELL, &bdw_uncore_init), + X86_MATCH_INTEL_FAM6_MODEL(BROADWELL_G, &bdw_uncore_init), + X86_MATCH_INTEL_FAM6_MODEL(SANDYBRIDGE_X, &snbep_uncore_init), + X86_MATCH_INTEL_FAM6_MODEL(NEHALEM_EX, &nhmex_uncore_init), + X86_MATCH_INTEL_FAM6_MODEL(WESTMERE_EX, &nhmex_uncore_init), + X86_MATCH_INTEL_FAM6_MODEL(IVYBRIDGE_X, &ivbep_uncore_init), + X86_MATCH_INTEL_FAM6_MODEL(HASWELL_X, &hswep_uncore_init), + X86_MATCH_INTEL_FAM6_MODEL(BROADWELL_X, &bdx_uncore_init), + X86_MATCH_INTEL_FAM6_MODEL(BROADWELL_D, &bdx_uncore_init), + X86_MATCH_INTEL_FAM6_MODEL(XEON_PHI_KNL, &knl_uncore_init), + X86_MATCH_INTEL_FAM6_MODEL(XEON_PHI_KNM, &knl_uncore_init), + X86_MATCH_INTEL_FAM6_MODEL(SKYLAKE, &skl_uncore_init), + X86_MATCH_INTEL_FAM6_MODEL(SKYLAKE_L, &skl_uncore_init), + X86_MATCH_INTEL_FAM6_MODEL(SKYLAKE_X, &skx_uncore_init), + X86_MATCH_INTEL_FAM6_MODEL(KABYLAKE_L, &skl_uncore_init), + X86_MATCH_INTEL_FAM6_MODEL(KABYLAKE, &skl_uncore_init), + X86_MATCH_INTEL_FAM6_MODEL(COMETLAKE_L, &skl_uncore_init), + X86_MATCH_INTEL_FAM6_MODEL(COMETLAKE, &skl_uncore_init), + X86_MATCH_INTEL_FAM6_MODEL(ICELAKE_L, &icl_uncore_init), + X86_MATCH_INTEL_FAM6_MODEL(ICELAKE_NNPI, &icl_uncore_init), + X86_MATCH_INTEL_FAM6_MODEL(ICELAKE, &icl_uncore_init), + X86_MATCH_INTEL_FAM6_MODEL(ICELAKE_D, &icx_uncore_init), + X86_MATCH_INTEL_FAM6_MODEL(ICELAKE_X, &icx_uncore_init), + X86_MATCH_INTEL_FAM6_MODEL(TIGERLAKE_L, &tgl_l_uncore_init), + X86_MATCH_INTEL_FAM6_MODEL(TIGERLAKE, &tgl_uncore_init), + X86_MATCH_INTEL_FAM6_MODEL(ROCKETLAKE, &rkl_uncore_init), + X86_MATCH_INTEL_FAM6_MODEL(ALDERLAKE, &adl_uncore_init), + X86_MATCH_INTEL_FAM6_MODEL(ALDERLAKE_L, &adl_uncore_init), + X86_MATCH_INTEL_FAM6_MODEL(ALDERLAKE_N, &adl_uncore_init), + X86_MATCH_INTEL_FAM6_MODEL(RAPTORLAKE, &adl_uncore_init), + X86_MATCH_INTEL_FAM6_MODEL(RAPTORLAKE_P, &adl_uncore_init), + X86_MATCH_INTEL_FAM6_MODEL(RAPTORLAKE_S, &adl_uncore_init), + X86_MATCH_INTEL_FAM6_MODEL(SAPPHIRERAPIDS_X, &spr_uncore_init), + X86_MATCH_INTEL_FAM6_MODEL(ATOM_TREMONT_D, &snr_uncore_init), {}, }; - MODULE_DEVICE_TABLE(x86cpu, intel_uncore_match); static int __init intel_uncore_init(void) @@ -1517,16 +1844,26 @@ static int __init intel_uncore_init(void) struct intel_uncore_init_fun *uncore_init; int pret = 0, cret = 0, mret = 0, ret; - id = x86_match_cpu(intel_uncore_match); - if (!id) - return -ENODEV; - if (boot_cpu_has(X86_FEATURE_HYPERVISOR)) return -ENODEV; - max_dies = topology_max_packages() * topology_max_die_per_package(); + __uncore_max_dies = + topology_max_packages() * topology_max_die_per_package(); + + id = x86_match_cpu(intel_uncore_match); + if (!id) { + if (!uncore_no_discover && intel_uncore_has_discovery_tables()) + uncore_init = (struct intel_uncore_init_fun *)&generic_uncore_init; + else + return -ENODEV; + } else { + uncore_init = (struct intel_uncore_init_fun *)id->driver_data; + if (uncore_no_discover && uncore_init->use_discovery) + return -ENODEV; + if (uncore_init->use_discovery && !intel_uncore_has_discovery_tables()) + return -ENODEV; + } - uncore_init = (struct intel_uncore_init_fun *)id->driver_data; if (uncore_init->pci_init) { pret = uncore_init->pci_init(); if (!pret) @@ -1543,8 +1880,10 @@ static int __init intel_uncore_init(void) mret = uncore_mmio_init(); } - if (cret && pret && mret) - return -ENODEV; + if (cret && pret && mret) { + ret = -ENODEV; + goto free_discovery; + } /* Install hotplug callbacks to setup the targets for each package */ ret = cpuhp_setup_state(CPUHP_AP_PERF_X86_UNCORE_ONLINE, @@ -1559,6 +1898,8 @@ err: uncore_types_exit(uncore_msr_uncores); uncore_types_exit(uncore_mmio_uncores); uncore_pci_exit(); +free_discovery: + intel_uncore_clear_discovery_tables(); return ret; } module_init(intel_uncore_init); @@ -1569,5 +1910,6 @@ static void __exit intel_uncore_exit(void) uncore_types_exit(uncore_msr_uncores); uncore_types_exit(uncore_mmio_uncores); uncore_pci_exit(); + intel_uncore_clear_discovery_tables(); } module_exit(intel_uncore_exit); diff --git a/arch/x86/events/intel/uncore.h b/arch/x86/events/intel/uncore.h index bbfdaa720b45..2adeaf4de4df 100644 --- a/arch/x86/events/intel/uncore.h +++ b/arch/x86/events/intel/uncore.h @@ -42,6 +42,7 @@ struct intel_uncore_pmu; struct intel_uncore_box; struct uncore_event_desc; struct freerunning_counters; +struct intel_uncore_topology; struct intel_uncore_type { const char *name; @@ -50,6 +51,7 @@ struct intel_uncore_type { int perf_ctr_bits; int fixed_ctr_bits; int num_freerunning_types; + int type_id; unsigned perf_ctr; unsigned event_ctl; unsigned event_mask; @@ -57,14 +59,21 @@ struct intel_uncore_type { unsigned fixed_ctr; unsigned fixed_ctl; unsigned box_ctl; + u64 *box_ctls; /* Unit ctrl addr of the first box of each die */ union { unsigned msr_offset; unsigned mmio_offset; }; + unsigned mmio_map_size; unsigned num_shared_regs:8; unsigned single_fixed:1; unsigned pair_ctr_ctl:1; - unsigned *msr_offsets; + union { + unsigned *msr_offsets; + unsigned *pci_offsets; + unsigned *mmio_offsets; + }; + unsigned *box_ids; struct event_constraint unconstrainted; struct event_constraint *constraints; struct intel_uncore_pmu *pmus; @@ -72,7 +81,20 @@ struct intel_uncore_type { struct uncore_event_desc *event_descs; struct freerunning_counters *freerunning; const struct attribute_group *attr_groups[4]; + const struct attribute_group **attr_update; struct pmu *pmu; /* for custom pmu ops */ + /* + * Uncore PMU would store relevant platform topology configuration here + * to identify which platform component each PMON block of that type is + * supposed to monitor. + */ + struct intel_uncore_topology *topology; + /* + * Optional callbacks for managing mapping of Uncore units to PMONs + */ + int (*get_topology)(struct intel_uncore_type *type); + int (*set_mapping)(struct intel_uncore_type *type); + void (*cleanup_mapping)(struct intel_uncore_type *type); }; #define pmu_group attr_groups[0] @@ -111,7 +133,6 @@ struct intel_uncore_extra_reg { }; struct intel_uncore_box { - int pci_phys_id; int dieid; /* Logical die ID */ int n_active; /* number of active events */ int n_events; @@ -130,7 +151,7 @@ struct intel_uncore_box { struct list_head list; struct list_head active_list; void __iomem *io_addr; - struct intel_uncore_extra_reg shared_regs[0]; + struct intel_uncore_extra_reg shared_regs[]; }; /* CFL uncore 8th cbox MSRs */ @@ -144,7 +165,7 @@ struct intel_uncore_box { #define UNCORE_BOX_FLAG_CFL8_CBOX_MSR_OFFS 2 struct uncore_event_desc { - struct kobj_attribute attr; + struct device_attribute attr; const char *config; }; @@ -154,19 +175,38 @@ struct freerunning_counters { unsigned int box_offset; unsigned int num_counters; unsigned int bits; + unsigned *box_offsets; +}; + +struct intel_uncore_topology { + u64 configuration; + int segment; }; struct pci2phy_map { struct list_head list; int segment; - int pbus_to_physid[256]; + int pbus_to_dieid[256]; }; struct pci2phy_map *__find_pci2phy_map(int segment); -int uncore_pcibus_to_physid(struct pci_bus *bus); +int uncore_pcibus_to_dieid(struct pci_bus *bus); +int uncore_die_to_segment(int die); + +ssize_t uncore_event_show(struct device *dev, + struct device_attribute *attr, char *buf); -ssize_t uncore_event_show(struct kobject *kobj, - struct kobj_attribute *attr, char *buf); +static inline struct intel_uncore_pmu *dev_to_uncore_pmu(struct device *dev) +{ + return container_of(dev_get_drvdata(dev), struct intel_uncore_pmu, pmu); +} + +#define to_device_attribute(n) container_of(n, struct device_attribute, attr) +#define to_dev_ext_attribute(n) container_of(n, struct dev_ext_attribute, attr) +#define attr_to_ext_attr(n) to_dev_ext_attribute(to_device_attribute(n)) + +extern int __uncore_max_dies; +#define uncore_max_dies() (__uncore_max_dies) #define INTEL_UNCORE_EVENT_DESC(_name, _config) \ { \ @@ -175,14 +215,14 @@ ssize_t uncore_event_show(struct kobject *kobj, } #define DEFINE_UNCORE_FORMAT_ATTR(_var, _name, _format) \ -static ssize_t __uncore_##_var##_show(struct kobject *kobj, \ - struct kobj_attribute *attr, \ +static ssize_t __uncore_##_var##_show(struct device *dev, \ + struct device_attribute *attr, \ char *page) \ { \ BUILD_BUG_ON(sizeof(_format) >= PAGE_SIZE); \ return sprintf(page, _format "\n"); \ } \ -static struct kobj_attribute format_attr_##_var = \ +static struct device_attribute format_attr_##_var = \ __ATTR(_name, 0444, __uncore_##_var##_show, NULL) static inline bool uncore_pmc_fixed(int idx) @@ -195,6 +235,18 @@ static inline bool uncore_pmc_freerunning(int idx) return idx == UNCORE_PMC_IDX_FREERUNNING; } +static inline bool uncore_mmio_is_valid_offset(struct intel_uncore_box *box, + unsigned long offset) +{ + if (offset < box->pmu->type->mmio_map_size) + return true; + + pr_warn_once("perf uncore: Invalid offset 0x%lx exceeds mapped area of %s.\n", + offset, box->pmu->type->name); + + return false; +} + static inline unsigned int uncore_mmio_box_ctl(struct intel_uncore_box *box) { @@ -310,7 +362,9 @@ unsigned int uncore_freerunning_counter(struct intel_uncore_box *box, return pmu->type->freerunning[type].counter_base + pmu->type->freerunning[type].counter_offset * idx + - pmu->type->freerunning[type].box_offset * pmu->pmu_idx; + (pmu->type->freerunning[type].box_offsets ? + pmu->type->freerunning[type].box_offsets[pmu->pmu_idx] : + pmu->type->freerunning[type].box_offset * pmu->pmu_idx); } static inline @@ -507,11 +561,14 @@ struct event_constraint * uncore_get_constraint(struct intel_uncore_box *box, struct perf_event *event); void uncore_put_constraint(struct intel_uncore_box *box, struct perf_event *event); u64 uncore_shared_reg_config(struct intel_uncore_box *box, int idx); +void uncore_get_alias_name(char *pmu_name, struct intel_uncore_pmu *pmu); +extern struct intel_uncore_type *empty_uncore[]; extern struct intel_uncore_type **uncore_msr_uncores; extern struct intel_uncore_type **uncore_pci_uncores; extern struct intel_uncore_type **uncore_mmio_uncores; extern struct pci_driver *uncore_pci_driver; +extern struct pci_driver *uncore_pci_sub_driver; extern raw_spinlock_t pci2phy_map_lock; extern struct list_head pci2phy_map_head; extern struct pci_extra_dev *uncore_extra_pci_dev; @@ -527,6 +584,11 @@ void snb_uncore_cpu_init(void); void nhm_uncore_cpu_init(void); void skl_uncore_cpu_init(void); void icl_uncore_cpu_init(void); +void tgl_uncore_cpu_init(void); +void adl_uncore_cpu_init(void); +void tgl_uncore_mmio_init(void); +void tgl_l_uncore_mmio_init(void); +void adl_uncore_mmio_init(void); int snb_pci2phy_map_init(int devid); /* uncore_snbep.c */ @@ -545,6 +607,12 @@ void skx_uncore_cpu_init(void); int snr_uncore_pci_init(void); void snr_uncore_cpu_init(void); void snr_uncore_mmio_init(void); +int icx_uncore_pci_init(void); +void icx_uncore_cpu_init(void); +void icx_uncore_mmio_init(void); +int spr_uncore_pci_init(void); +void spr_uncore_cpu_init(void); +void spr_uncore_mmio_init(void); /* uncore_nhmex.c */ void nhmex_uncore_cpu_init(void); diff --git a/arch/x86/events/intel/uncore_discovery.c b/arch/x86/events/intel/uncore_discovery.c new file mode 100644 index 000000000000..5fd72d4b8bbb --- /dev/null +++ b/arch/x86/events/intel/uncore_discovery.c @@ -0,0 +1,630 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* + * Support Intel uncore PerfMon discovery mechanism. + * Copyright(c) 2021 Intel Corporation. + */ +#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt + +#include "uncore.h" +#include "uncore_discovery.h" + +static struct rb_root discovery_tables = RB_ROOT; +static int num_discovered_types[UNCORE_ACCESS_MAX]; + +static bool has_generic_discovery_table(void) +{ + struct pci_dev *dev; + int dvsec; + + dev = pci_get_device(PCI_VENDOR_ID_INTEL, UNCORE_DISCOVERY_TABLE_DEVICE, NULL); + if (!dev) + return false; + + /* A discovery table device has the unique capability ID. */ + dvsec = pci_find_next_ext_capability(dev, 0, UNCORE_EXT_CAP_ID_DISCOVERY); + pci_dev_put(dev); + if (dvsec) + return true; + + return false; +} + +static int logical_die_id; + +static int get_device_die_id(struct pci_dev *dev) +{ + int cpu, node = pcibus_to_node(dev->bus); + + /* + * If the NUMA info is not available, assume that the logical die id is + * continuous in the order in which the discovery table devices are + * detected. + */ + if (node < 0) + return logical_die_id++; + + for_each_cpu(cpu, cpumask_of_node(node)) { + struct cpuinfo_x86 *c = &cpu_data(cpu); + + if (c->initialized && cpu_to_node(cpu) == node) + return c->logical_die_id; + } + + /* + * All CPUs of a node may be offlined. For this case, + * the PCI and MMIO type of uncore blocks which are + * enumerated by the device will be unavailable. + */ + return -1; +} + +#define __node_2_type(cur) \ + rb_entry((cur), struct intel_uncore_discovery_type, node) + +static inline int __type_cmp(const void *key, const struct rb_node *b) +{ + struct intel_uncore_discovery_type *type_b = __node_2_type(b); + const u16 *type_id = key; + + if (type_b->type > *type_id) + return -1; + else if (type_b->type < *type_id) + return 1; + + return 0; +} + +static inline struct intel_uncore_discovery_type * +search_uncore_discovery_type(u16 type_id) +{ + struct rb_node *node = rb_find(&type_id, &discovery_tables, __type_cmp); + + return (node) ? __node_2_type(node) : NULL; +} + +static inline bool __type_less(struct rb_node *a, const struct rb_node *b) +{ + return (__node_2_type(a)->type < __node_2_type(b)->type); +} + +static struct intel_uncore_discovery_type * +add_uncore_discovery_type(struct uncore_unit_discovery *unit) +{ + struct intel_uncore_discovery_type *type; + + if (unit->access_type >= UNCORE_ACCESS_MAX) { + pr_warn("Unsupported access type %d\n", unit->access_type); + return NULL; + } + + type = kzalloc(sizeof(struct intel_uncore_discovery_type), GFP_KERNEL); + if (!type) + return NULL; + + type->box_ctrl_die = kcalloc(__uncore_max_dies, sizeof(u64), GFP_KERNEL); + if (!type->box_ctrl_die) + goto free_type; + + type->access_type = unit->access_type; + num_discovered_types[type->access_type]++; + type->type = unit->box_type; + + rb_add(&type->node, &discovery_tables, __type_less); + + return type; + +free_type: + kfree(type); + + return NULL; + +} + +static struct intel_uncore_discovery_type * +get_uncore_discovery_type(struct uncore_unit_discovery *unit) +{ + struct intel_uncore_discovery_type *type; + + type = search_uncore_discovery_type(unit->box_type); + if (type) + return type; + + return add_uncore_discovery_type(unit); +} + +static void +uncore_insert_box_info(struct uncore_unit_discovery *unit, + int die, bool parsed) +{ + struct intel_uncore_discovery_type *type; + unsigned int *box_offset, *ids; + int i; + + if (WARN_ON_ONCE(!unit->ctl || !unit->ctl_offset || !unit->ctr_offset)) + return; + + if (parsed) { + type = search_uncore_discovery_type(unit->box_type); + if (WARN_ON_ONCE(!type)) + return; + /* Store the first box of each die */ + if (!type->box_ctrl_die[die]) + type->box_ctrl_die[die] = unit->ctl; + return; + } + + type = get_uncore_discovery_type(unit); + if (!type) + return; + + box_offset = kcalloc(type->num_boxes + 1, sizeof(unsigned int), GFP_KERNEL); + if (!box_offset) + return; + + ids = kcalloc(type->num_boxes + 1, sizeof(unsigned int), GFP_KERNEL); + if (!ids) + goto free_box_offset; + + /* Store generic information for the first box */ + if (!type->num_boxes) { + type->box_ctrl = unit->ctl; + type->box_ctrl_die[die] = unit->ctl; + type->num_counters = unit->num_regs; + type->counter_width = unit->bit_width; + type->ctl_offset = unit->ctl_offset; + type->ctr_offset = unit->ctr_offset; + *ids = unit->box_id; + goto end; + } + + for (i = 0; i < type->num_boxes; i++) { + ids[i] = type->ids[i]; + box_offset[i] = type->box_offset[i]; + + if (WARN_ON_ONCE(unit->box_id == ids[i])) + goto free_ids; + } + ids[i] = unit->box_id; + box_offset[i] = unit->ctl - type->box_ctrl; + kfree(type->ids); + kfree(type->box_offset); +end: + type->ids = ids; + type->box_offset = box_offset; + type->num_boxes++; + return; + +free_ids: + kfree(ids); + +free_box_offset: + kfree(box_offset); + +} + +static int parse_discovery_table(struct pci_dev *dev, int die, + u32 bar_offset, bool *parsed) +{ + struct uncore_global_discovery global; + struct uncore_unit_discovery unit; + void __iomem *io_addr; + resource_size_t addr; + unsigned long size; + u32 val; + int i; + + pci_read_config_dword(dev, bar_offset, &val); + + if (val & ~PCI_BASE_ADDRESS_MEM_MASK & ~PCI_BASE_ADDRESS_MEM_TYPE_64) + return -EINVAL; + + addr = (resource_size_t)(val & PCI_BASE_ADDRESS_MEM_MASK); +#ifdef CONFIG_PHYS_ADDR_T_64BIT + if ((val & PCI_BASE_ADDRESS_MEM_TYPE_MASK) == PCI_BASE_ADDRESS_MEM_TYPE_64) { + u32 val2; + + pci_read_config_dword(dev, bar_offset + 4, &val2); + addr |= ((resource_size_t)val2) << 32; + } +#endif + size = UNCORE_DISCOVERY_GLOBAL_MAP_SIZE; + io_addr = ioremap(addr, size); + if (!io_addr) + return -ENOMEM; + + /* Read Global Discovery State */ + memcpy_fromio(&global, io_addr, sizeof(struct uncore_global_discovery)); + if (uncore_discovery_invalid_unit(global)) { + pr_info("Invalid Global Discovery State: 0x%llx 0x%llx 0x%llx\n", + global.table1, global.ctl, global.table3); + iounmap(io_addr); + return -EINVAL; + } + iounmap(io_addr); + + size = (1 + global.max_units) * global.stride * 8; + io_addr = ioremap(addr, size); + if (!io_addr) + return -ENOMEM; + + /* Parsing Unit Discovery State */ + for (i = 0; i < global.max_units; i++) { + memcpy_fromio(&unit, io_addr + (i + 1) * (global.stride * 8), + sizeof(struct uncore_unit_discovery)); + + if (uncore_discovery_invalid_unit(unit)) + continue; + + if (unit.access_type >= UNCORE_ACCESS_MAX) + continue; + + uncore_insert_box_info(&unit, die, *parsed); + } + + *parsed = true; + iounmap(io_addr); + return 0; +} + +bool intel_uncore_has_discovery_tables(void) +{ + u32 device, val, entry_id, bar_offset; + int die, dvsec = 0, ret = true; + struct pci_dev *dev = NULL; + bool parsed = false; + + if (has_generic_discovery_table()) + device = UNCORE_DISCOVERY_TABLE_DEVICE; + else + device = PCI_ANY_ID; + + /* + * Start a new search and iterates through the list of + * the discovery table devices. + */ + while ((dev = pci_get_device(PCI_VENDOR_ID_INTEL, device, dev)) != NULL) { + while ((dvsec = pci_find_next_ext_capability(dev, dvsec, UNCORE_EXT_CAP_ID_DISCOVERY))) { + pci_read_config_dword(dev, dvsec + UNCORE_DISCOVERY_DVSEC_OFFSET, &val); + entry_id = val & UNCORE_DISCOVERY_DVSEC_ID_MASK; + if (entry_id != UNCORE_DISCOVERY_DVSEC_ID_PMON) + continue; + + pci_read_config_dword(dev, dvsec + UNCORE_DISCOVERY_DVSEC2_OFFSET, &val); + + if (val & ~UNCORE_DISCOVERY_DVSEC2_BIR_MASK) { + ret = false; + goto err; + } + bar_offset = UNCORE_DISCOVERY_BIR_BASE + + (val & UNCORE_DISCOVERY_DVSEC2_BIR_MASK) * UNCORE_DISCOVERY_BIR_STEP; + + die = get_device_die_id(dev); + if (die < 0) + continue; + + parse_discovery_table(dev, die, bar_offset, &parsed); + } + } + + /* None of the discovery tables are available */ + if (!parsed) + ret = false; +err: + pci_dev_put(dev); + + return ret; +} + +void intel_uncore_clear_discovery_tables(void) +{ + struct intel_uncore_discovery_type *type, *next; + + rbtree_postorder_for_each_entry_safe(type, next, &discovery_tables, node) { + kfree(type->box_ctrl_die); + kfree(type); + } +} + +DEFINE_UNCORE_FORMAT_ATTR(event, event, "config:0-7"); +DEFINE_UNCORE_FORMAT_ATTR(umask, umask, "config:8-15"); +DEFINE_UNCORE_FORMAT_ATTR(edge, edge, "config:18"); +DEFINE_UNCORE_FORMAT_ATTR(inv, inv, "config:23"); +DEFINE_UNCORE_FORMAT_ATTR(thresh, thresh, "config:24-31"); + +static struct attribute *generic_uncore_formats_attr[] = { + &format_attr_event.attr, + &format_attr_umask.attr, + &format_attr_edge.attr, + &format_attr_inv.attr, + &format_attr_thresh.attr, + NULL, +}; + +static const struct attribute_group generic_uncore_format_group = { + .name = "format", + .attrs = generic_uncore_formats_attr, +}; + +void intel_generic_uncore_msr_init_box(struct intel_uncore_box *box) +{ + wrmsrl(uncore_msr_box_ctl(box), GENERIC_PMON_BOX_CTL_INT); +} + +void intel_generic_uncore_msr_disable_box(struct intel_uncore_box *box) +{ + wrmsrl(uncore_msr_box_ctl(box), GENERIC_PMON_BOX_CTL_FRZ); +} + +void intel_generic_uncore_msr_enable_box(struct intel_uncore_box *box) +{ + wrmsrl(uncore_msr_box_ctl(box), 0); +} + +static void intel_generic_uncore_msr_enable_event(struct intel_uncore_box *box, + struct perf_event *event) +{ + struct hw_perf_event *hwc = &event->hw; + + wrmsrl(hwc->config_base, hwc->config); +} + +static void intel_generic_uncore_msr_disable_event(struct intel_uncore_box *box, + struct perf_event *event) +{ + struct hw_perf_event *hwc = &event->hw; + + wrmsrl(hwc->config_base, 0); +} + +static struct intel_uncore_ops generic_uncore_msr_ops = { + .init_box = intel_generic_uncore_msr_init_box, + .disable_box = intel_generic_uncore_msr_disable_box, + .enable_box = intel_generic_uncore_msr_enable_box, + .disable_event = intel_generic_uncore_msr_disable_event, + .enable_event = intel_generic_uncore_msr_enable_event, + .read_counter = uncore_msr_read_counter, +}; + +void intel_generic_uncore_pci_init_box(struct intel_uncore_box *box) +{ + struct pci_dev *pdev = box->pci_dev; + int box_ctl = uncore_pci_box_ctl(box); + + __set_bit(UNCORE_BOX_FLAG_CTL_OFFS8, &box->flags); + pci_write_config_dword(pdev, box_ctl, GENERIC_PMON_BOX_CTL_INT); +} + +void intel_generic_uncore_pci_disable_box(struct intel_uncore_box *box) +{ + struct pci_dev *pdev = box->pci_dev; + int box_ctl = uncore_pci_box_ctl(box); + + pci_write_config_dword(pdev, box_ctl, GENERIC_PMON_BOX_CTL_FRZ); +} + +void intel_generic_uncore_pci_enable_box(struct intel_uncore_box *box) +{ + struct pci_dev *pdev = box->pci_dev; + int box_ctl = uncore_pci_box_ctl(box); + + pci_write_config_dword(pdev, box_ctl, 0); +} + +static void intel_generic_uncore_pci_enable_event(struct intel_uncore_box *box, + struct perf_event *event) +{ + struct pci_dev *pdev = box->pci_dev; + struct hw_perf_event *hwc = &event->hw; + + pci_write_config_dword(pdev, hwc->config_base, hwc->config); +} + +void intel_generic_uncore_pci_disable_event(struct intel_uncore_box *box, + struct perf_event *event) +{ + struct pci_dev *pdev = box->pci_dev; + struct hw_perf_event *hwc = &event->hw; + + pci_write_config_dword(pdev, hwc->config_base, 0); +} + +u64 intel_generic_uncore_pci_read_counter(struct intel_uncore_box *box, + struct perf_event *event) +{ + struct pci_dev *pdev = box->pci_dev; + struct hw_perf_event *hwc = &event->hw; + u64 count = 0; + + pci_read_config_dword(pdev, hwc->event_base, (u32 *)&count); + pci_read_config_dword(pdev, hwc->event_base + 4, (u32 *)&count + 1); + + return count; +} + +static struct intel_uncore_ops generic_uncore_pci_ops = { + .init_box = intel_generic_uncore_pci_init_box, + .disable_box = intel_generic_uncore_pci_disable_box, + .enable_box = intel_generic_uncore_pci_enable_box, + .disable_event = intel_generic_uncore_pci_disable_event, + .enable_event = intel_generic_uncore_pci_enable_event, + .read_counter = intel_generic_uncore_pci_read_counter, +}; + +#define UNCORE_GENERIC_MMIO_SIZE 0x4000 + +static u64 generic_uncore_mmio_box_ctl(struct intel_uncore_box *box) +{ + struct intel_uncore_type *type = box->pmu->type; + + if (!type->box_ctls || !type->box_ctls[box->dieid] || !type->mmio_offsets) + return 0; + + return type->box_ctls[box->dieid] + type->mmio_offsets[box->pmu->pmu_idx]; +} + +void intel_generic_uncore_mmio_init_box(struct intel_uncore_box *box) +{ + u64 box_ctl = generic_uncore_mmio_box_ctl(box); + struct intel_uncore_type *type = box->pmu->type; + resource_size_t addr; + + if (!box_ctl) { + pr_warn("Uncore type %d box %d: Invalid box control address.\n", + type->type_id, type->box_ids[box->pmu->pmu_idx]); + return; + } + + addr = box_ctl; + box->io_addr = ioremap(addr, UNCORE_GENERIC_MMIO_SIZE); + if (!box->io_addr) { + pr_warn("Uncore type %d box %d: ioremap error for 0x%llx.\n", + type->type_id, type->box_ids[box->pmu->pmu_idx], + (unsigned long long)addr); + return; + } + + writel(GENERIC_PMON_BOX_CTL_INT, box->io_addr); +} + +void intel_generic_uncore_mmio_disable_box(struct intel_uncore_box *box) +{ + if (!box->io_addr) + return; + + writel(GENERIC_PMON_BOX_CTL_FRZ, box->io_addr); +} + +void intel_generic_uncore_mmio_enable_box(struct intel_uncore_box *box) +{ + if (!box->io_addr) + return; + + writel(0, box->io_addr); +} + +void intel_generic_uncore_mmio_enable_event(struct intel_uncore_box *box, + struct perf_event *event) +{ + struct hw_perf_event *hwc = &event->hw; + + if (!box->io_addr) + return; + + writel(hwc->config, box->io_addr + hwc->config_base); +} + +void intel_generic_uncore_mmio_disable_event(struct intel_uncore_box *box, + struct perf_event *event) +{ + struct hw_perf_event *hwc = &event->hw; + + if (!box->io_addr) + return; + + writel(0, box->io_addr + hwc->config_base); +} + +static struct intel_uncore_ops generic_uncore_mmio_ops = { + .init_box = intel_generic_uncore_mmio_init_box, + .exit_box = uncore_mmio_exit_box, + .disable_box = intel_generic_uncore_mmio_disable_box, + .enable_box = intel_generic_uncore_mmio_enable_box, + .disable_event = intel_generic_uncore_mmio_disable_event, + .enable_event = intel_generic_uncore_mmio_enable_event, + .read_counter = uncore_mmio_read_counter, +}; + +static bool uncore_update_uncore_type(enum uncore_access_type type_id, + struct intel_uncore_type *uncore, + struct intel_uncore_discovery_type *type) +{ + uncore->type_id = type->type; + uncore->num_boxes = type->num_boxes; + uncore->num_counters = type->num_counters; + uncore->perf_ctr_bits = type->counter_width; + uncore->box_ids = type->ids; + + switch (type_id) { + case UNCORE_ACCESS_MSR: + uncore->ops = &generic_uncore_msr_ops; + uncore->perf_ctr = (unsigned int)type->box_ctrl + type->ctr_offset; + uncore->event_ctl = (unsigned int)type->box_ctrl + type->ctl_offset; + uncore->box_ctl = (unsigned int)type->box_ctrl; + uncore->msr_offsets = type->box_offset; + break; + case UNCORE_ACCESS_PCI: + uncore->ops = &generic_uncore_pci_ops; + uncore->perf_ctr = (unsigned int)UNCORE_DISCOVERY_PCI_BOX_CTRL(type->box_ctrl) + type->ctr_offset; + uncore->event_ctl = (unsigned int)UNCORE_DISCOVERY_PCI_BOX_CTRL(type->box_ctrl) + type->ctl_offset; + uncore->box_ctl = (unsigned int)UNCORE_DISCOVERY_PCI_BOX_CTRL(type->box_ctrl); + uncore->box_ctls = type->box_ctrl_die; + uncore->pci_offsets = type->box_offset; + break; + case UNCORE_ACCESS_MMIO: + uncore->ops = &generic_uncore_mmio_ops; + uncore->perf_ctr = (unsigned int)type->ctr_offset; + uncore->event_ctl = (unsigned int)type->ctl_offset; + uncore->box_ctl = (unsigned int)type->box_ctrl; + uncore->box_ctls = type->box_ctrl_die; + uncore->mmio_offsets = type->box_offset; + uncore->mmio_map_size = UNCORE_GENERIC_MMIO_SIZE; + break; + default: + return false; + } + + return true; +} + +struct intel_uncore_type ** +intel_uncore_generic_init_uncores(enum uncore_access_type type_id, int num_extra) +{ + struct intel_uncore_discovery_type *type; + struct intel_uncore_type **uncores; + struct intel_uncore_type *uncore; + struct rb_node *node; + int i = 0; + + uncores = kcalloc(num_discovered_types[type_id] + num_extra + 1, + sizeof(struct intel_uncore_type *), GFP_KERNEL); + if (!uncores) + return empty_uncore; + + for (node = rb_first(&discovery_tables); node; node = rb_next(node)) { + type = rb_entry(node, struct intel_uncore_discovery_type, node); + if (type->access_type != type_id) + continue; + + uncore = kzalloc(sizeof(struct intel_uncore_type), GFP_KERNEL); + if (!uncore) + break; + + uncore->event_mask = GENERIC_PMON_RAW_EVENT_MASK; + uncore->format_group = &generic_uncore_format_group; + + if (!uncore_update_uncore_type(type_id, uncore, type)) { + kfree(uncore); + continue; + } + uncores[i++] = uncore; + } + + return uncores; +} + +void intel_uncore_generic_uncore_cpu_init(void) +{ + uncore_msr_uncores = intel_uncore_generic_init_uncores(UNCORE_ACCESS_MSR, 0); +} + +int intel_uncore_generic_uncore_pci_init(void) +{ + uncore_pci_uncores = intel_uncore_generic_init_uncores(UNCORE_ACCESS_PCI, 0); + + return 0; +} + +void intel_uncore_generic_uncore_mmio_init(void) +{ + uncore_mmio_uncores = intel_uncore_generic_init_uncores(UNCORE_ACCESS_MMIO, 0); +} diff --git a/arch/x86/events/intel/uncore_discovery.h b/arch/x86/events/intel/uncore_discovery.h new file mode 100644 index 000000000000..f4439357779a --- /dev/null +++ b/arch/x86/events/intel/uncore_discovery.h @@ -0,0 +1,152 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ + +/* Generic device ID of a discovery table device */ +#define UNCORE_DISCOVERY_TABLE_DEVICE 0x09a7 +/* Capability ID for a discovery table device */ +#define UNCORE_EXT_CAP_ID_DISCOVERY 0x23 +/* First DVSEC offset */ +#define UNCORE_DISCOVERY_DVSEC_OFFSET 0x8 +/* Mask of the supported discovery entry type */ +#define UNCORE_DISCOVERY_DVSEC_ID_MASK 0xffff +/* PMON discovery entry type ID */ +#define UNCORE_DISCOVERY_DVSEC_ID_PMON 0x1 +/* Second DVSEC offset */ +#define UNCORE_DISCOVERY_DVSEC2_OFFSET 0xc +/* Mask of the discovery table BAR offset */ +#define UNCORE_DISCOVERY_DVSEC2_BIR_MASK 0x7 +/* Discovery table BAR base offset */ +#define UNCORE_DISCOVERY_BIR_BASE 0x10 +/* Discovery table BAR step */ +#define UNCORE_DISCOVERY_BIR_STEP 0x4 +/* Global discovery table size */ +#define UNCORE_DISCOVERY_GLOBAL_MAP_SIZE 0x20 + +#define UNCORE_DISCOVERY_PCI_DOMAIN(data) ((data >> 28) & 0x7) +#define UNCORE_DISCOVERY_PCI_BUS(data) ((data >> 20) & 0xff) +#define UNCORE_DISCOVERY_PCI_DEVFN(data) ((data >> 12) & 0xff) +#define UNCORE_DISCOVERY_PCI_BOX_CTRL(data) (data & 0xfff) + + +#define uncore_discovery_invalid_unit(unit) \ + (!unit.table1 || !unit.ctl || \ + unit.table1 == -1ULL || unit.ctl == -1ULL || \ + unit.table3 == -1ULL) + +#define GENERIC_PMON_CTL_EV_SEL_MASK 0x000000ff +#define GENERIC_PMON_CTL_UMASK_MASK 0x0000ff00 +#define GENERIC_PMON_CTL_EDGE_DET (1 << 18) +#define GENERIC_PMON_CTL_INVERT (1 << 23) +#define GENERIC_PMON_CTL_TRESH_MASK 0xff000000 +#define GENERIC_PMON_RAW_EVENT_MASK (GENERIC_PMON_CTL_EV_SEL_MASK | \ + GENERIC_PMON_CTL_UMASK_MASK | \ + GENERIC_PMON_CTL_EDGE_DET | \ + GENERIC_PMON_CTL_INVERT | \ + GENERIC_PMON_CTL_TRESH_MASK) + +#define GENERIC_PMON_BOX_CTL_FRZ (1 << 0) +#define GENERIC_PMON_BOX_CTL_RST_CTRL (1 << 8) +#define GENERIC_PMON_BOX_CTL_RST_CTRS (1 << 9) +#define GENERIC_PMON_BOX_CTL_INT (GENERIC_PMON_BOX_CTL_RST_CTRL | \ + GENERIC_PMON_BOX_CTL_RST_CTRS) + +enum uncore_access_type { + UNCORE_ACCESS_MSR = 0, + UNCORE_ACCESS_MMIO, + UNCORE_ACCESS_PCI, + + UNCORE_ACCESS_MAX, +}; + +struct uncore_global_discovery { + union { + u64 table1; + struct { + u64 type : 8, + stride : 8, + max_units : 10, + __reserved_1 : 36, + access_type : 2; + }; + }; + + u64 ctl; /* Global Control Address */ + + union { + u64 table3; + struct { + u64 status_offset : 8, + num_status : 16, + __reserved_2 : 40; + }; + }; +}; + +struct uncore_unit_discovery { + union { + u64 table1; + struct { + u64 num_regs : 8, + ctl_offset : 8, + bit_width : 8, + ctr_offset : 8, + status_offset : 8, + __reserved_1 : 22, + access_type : 2; + }; + }; + + u64 ctl; /* Unit Control Address */ + + union { + u64 table3; + struct { + u64 box_type : 16, + box_id : 16, + __reserved_2 : 32; + }; + }; +}; + +struct intel_uncore_discovery_type { + struct rb_node node; + enum uncore_access_type access_type; + u64 box_ctrl; /* Unit ctrl addr of the first box */ + u64 *box_ctrl_die; /* Unit ctrl addr of the first box of each die */ + u16 type; /* Type ID of the uncore block */ + u8 num_counters; + u8 counter_width; + u8 ctl_offset; /* Counter Control 0 offset */ + u8 ctr_offset; /* Counter 0 offset */ + u16 num_boxes; /* number of boxes for the uncore block */ + unsigned int *ids; /* Box IDs */ + unsigned int *box_offset; /* Box offset */ +}; + +bool intel_uncore_has_discovery_tables(void); +void intel_uncore_clear_discovery_tables(void); +void intel_uncore_generic_uncore_cpu_init(void); +int intel_uncore_generic_uncore_pci_init(void); +void intel_uncore_generic_uncore_mmio_init(void); + +void intel_generic_uncore_msr_init_box(struct intel_uncore_box *box); +void intel_generic_uncore_msr_disable_box(struct intel_uncore_box *box); +void intel_generic_uncore_msr_enable_box(struct intel_uncore_box *box); + +void intel_generic_uncore_mmio_init_box(struct intel_uncore_box *box); +void intel_generic_uncore_mmio_disable_box(struct intel_uncore_box *box); +void intel_generic_uncore_mmio_enable_box(struct intel_uncore_box *box); +void intel_generic_uncore_mmio_disable_event(struct intel_uncore_box *box, + struct perf_event *event); +void intel_generic_uncore_mmio_enable_event(struct intel_uncore_box *box, + struct perf_event *event); + +void intel_generic_uncore_pci_init_box(struct intel_uncore_box *box); +void intel_generic_uncore_pci_disable_box(struct intel_uncore_box *box); +void intel_generic_uncore_pci_enable_box(struct intel_uncore_box *box); +void intel_generic_uncore_pci_disable_event(struct intel_uncore_box *box, + struct perf_event *event); +u64 intel_generic_uncore_pci_read_counter(struct intel_uncore_box *box, + struct perf_event *event); + +struct intel_uncore_type ** +intel_uncore_generic_init_uncores(enum uncore_access_type type_id, int num_extra); diff --git a/arch/x86/events/intel/uncore_snb.c b/arch/x86/events/intel/uncore_snb.c index c37cb12d0ef6..1ef4f7861e2e 100644 --- a/arch/x86/events/intel/uncore_snb.c +++ b/arch/x86/events/intel/uncore_snb.c @@ -1,6 +1,7 @@ // SPDX-License-Identifier: GPL-2.0 /* Nehalem/SandBridge/Haswell/Broadwell/Skylake uncore support */ #include "uncore.h" +#include "uncore_discovery.h" /* Uncore IMC PCI IDs */ #define PCI_DEVICE_ID_INTEL_SNB_IMC 0x0100 @@ -42,9 +43,79 @@ #define PCI_DEVICE_ID_INTEL_WHL_UQ_IMC 0x3ed0 #define PCI_DEVICE_ID_INTEL_WHL_4_UQ_IMC 0x3e34 #define PCI_DEVICE_ID_INTEL_WHL_UD_IMC 0x3e35 +#define PCI_DEVICE_ID_INTEL_CML_H1_IMC 0x9b44 +#define PCI_DEVICE_ID_INTEL_CML_H2_IMC 0x9b54 +#define PCI_DEVICE_ID_INTEL_CML_H3_IMC 0x9b64 +#define PCI_DEVICE_ID_INTEL_CML_U1_IMC 0x9b51 +#define PCI_DEVICE_ID_INTEL_CML_U2_IMC 0x9b61 +#define PCI_DEVICE_ID_INTEL_CML_U3_IMC 0x9b71 +#define PCI_DEVICE_ID_INTEL_CML_S1_IMC 0x9b33 +#define PCI_DEVICE_ID_INTEL_CML_S2_IMC 0x9b43 +#define PCI_DEVICE_ID_INTEL_CML_S3_IMC 0x9b53 +#define PCI_DEVICE_ID_INTEL_CML_S4_IMC 0x9b63 +#define PCI_DEVICE_ID_INTEL_CML_S5_IMC 0x9b73 #define PCI_DEVICE_ID_INTEL_ICL_U_IMC 0x8a02 #define PCI_DEVICE_ID_INTEL_ICL_U2_IMC 0x8a12 - +#define PCI_DEVICE_ID_INTEL_TGL_U1_IMC 0x9a02 +#define PCI_DEVICE_ID_INTEL_TGL_U2_IMC 0x9a04 +#define PCI_DEVICE_ID_INTEL_TGL_U3_IMC 0x9a12 +#define PCI_DEVICE_ID_INTEL_TGL_U4_IMC 0x9a14 +#define PCI_DEVICE_ID_INTEL_TGL_H_IMC 0x9a36 +#define PCI_DEVICE_ID_INTEL_RKL_1_IMC 0x4c43 +#define PCI_DEVICE_ID_INTEL_RKL_2_IMC 0x4c53 +#define PCI_DEVICE_ID_INTEL_ADL_1_IMC 0x4660 +#define PCI_DEVICE_ID_INTEL_ADL_2_IMC 0x4641 +#define PCI_DEVICE_ID_INTEL_ADL_3_IMC 0x4601 +#define PCI_DEVICE_ID_INTEL_ADL_4_IMC 0x4602 +#define PCI_DEVICE_ID_INTEL_ADL_5_IMC 0x4609 +#define PCI_DEVICE_ID_INTEL_ADL_6_IMC 0x460a +#define PCI_DEVICE_ID_INTEL_ADL_7_IMC 0x4621 +#define PCI_DEVICE_ID_INTEL_ADL_8_IMC 0x4623 +#define PCI_DEVICE_ID_INTEL_ADL_9_IMC 0x4629 +#define PCI_DEVICE_ID_INTEL_ADL_10_IMC 0x4637 +#define PCI_DEVICE_ID_INTEL_ADL_11_IMC 0x463b +#define PCI_DEVICE_ID_INTEL_ADL_12_IMC 0x4648 +#define PCI_DEVICE_ID_INTEL_ADL_13_IMC 0x4649 +#define PCI_DEVICE_ID_INTEL_ADL_14_IMC 0x4650 +#define PCI_DEVICE_ID_INTEL_ADL_15_IMC 0x4668 +#define PCI_DEVICE_ID_INTEL_ADL_16_IMC 0x4670 +#define PCI_DEVICE_ID_INTEL_ADL_17_IMC 0x4614 +#define PCI_DEVICE_ID_INTEL_ADL_18_IMC 0x4617 +#define PCI_DEVICE_ID_INTEL_ADL_19_IMC 0x4618 +#define PCI_DEVICE_ID_INTEL_ADL_20_IMC 0x461B +#define PCI_DEVICE_ID_INTEL_ADL_21_IMC 0x461C +#define PCI_DEVICE_ID_INTEL_RPL_1_IMC 0xA700 +#define PCI_DEVICE_ID_INTEL_RPL_2_IMC 0xA702 +#define PCI_DEVICE_ID_INTEL_RPL_3_IMC 0xA706 +#define PCI_DEVICE_ID_INTEL_RPL_4_IMC 0xA709 +#define PCI_DEVICE_ID_INTEL_RPL_5_IMC 0xA701 +#define PCI_DEVICE_ID_INTEL_RPL_6_IMC 0xA703 +#define PCI_DEVICE_ID_INTEL_RPL_7_IMC 0xA704 +#define PCI_DEVICE_ID_INTEL_RPL_8_IMC 0xA705 +#define PCI_DEVICE_ID_INTEL_RPL_9_IMC 0xA706 +#define PCI_DEVICE_ID_INTEL_RPL_10_IMC 0xA707 +#define PCI_DEVICE_ID_INTEL_RPL_11_IMC 0xA708 +#define PCI_DEVICE_ID_INTEL_RPL_12_IMC 0xA709 +#define PCI_DEVICE_ID_INTEL_RPL_13_IMC 0xA70a +#define PCI_DEVICE_ID_INTEL_RPL_14_IMC 0xA70b +#define PCI_DEVICE_ID_INTEL_RPL_15_IMC 0xA715 +#define PCI_DEVICE_ID_INTEL_RPL_16_IMC 0xA716 +#define PCI_DEVICE_ID_INTEL_RPL_17_IMC 0xA717 +#define PCI_DEVICE_ID_INTEL_RPL_18_IMC 0xA718 +#define PCI_DEVICE_ID_INTEL_RPL_19_IMC 0xA719 +#define PCI_DEVICE_ID_INTEL_RPL_20_IMC 0xA71A +#define PCI_DEVICE_ID_INTEL_RPL_21_IMC 0xA71B +#define PCI_DEVICE_ID_INTEL_RPL_22_IMC 0xA71C +#define PCI_DEVICE_ID_INTEL_RPL_23_IMC 0xA728 +#define PCI_DEVICE_ID_INTEL_RPL_24_IMC 0xA729 +#define PCI_DEVICE_ID_INTEL_RPL_25_IMC 0xA72A + + +#define IMC_UNCORE_DEV(a) \ +{ \ + PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_##a##_IMC), \ + .driver_data = UNCORE_PCI_DEV_DATA(SNB_PCI_UNCORE_IMC, 0), \ +} /* SNB event control */ #define SNB_UNC_CTL_EV_SEL_MASK 0x000000ff @@ -110,12 +181,38 @@ #define ICL_UNC_CBO_0_PER_CTR0 0x702 #define ICL_UNC_CBO_MSR_OFFSET 0x8 +/* ICL ARB register */ +#define ICL_UNC_ARB_PER_CTR 0x3b1 +#define ICL_UNC_ARB_PERFEVTSEL 0x3b3 + +/* ADL uncore global control */ +#define ADL_UNC_PERF_GLOBAL_CTL 0x2ff0 +#define ADL_UNC_FIXED_CTR_CTRL 0x2fde +#define ADL_UNC_FIXED_CTR 0x2fdf + +/* ADL Cbo register */ +#define ADL_UNC_CBO_0_PER_CTR0 0x2002 +#define ADL_UNC_CBO_0_PERFEVTSEL0 0x2000 +#define ADL_UNC_CTL_THRESHOLD 0x3f000000 +#define ADL_UNC_RAW_EVENT_MASK (SNB_UNC_CTL_EV_SEL_MASK | \ + SNB_UNC_CTL_UMASK_MASK | \ + SNB_UNC_CTL_EDGE_DET | \ + SNB_UNC_CTL_INVERT | \ + ADL_UNC_CTL_THRESHOLD) + +/* ADL ARB register */ +#define ADL_UNC_ARB_PER_CTR0 0x2FD2 +#define ADL_UNC_ARB_PERFEVTSEL0 0x2FD0 +#define ADL_UNC_ARB_MSR_OFFSET 0x8 + DEFINE_UNCORE_FORMAT_ATTR(event, event, "config:0-7"); DEFINE_UNCORE_FORMAT_ATTR(umask, umask, "config:8-15"); +DEFINE_UNCORE_FORMAT_ATTR(chmask, chmask, "config:8-11"); DEFINE_UNCORE_FORMAT_ATTR(edge, edge, "config:18"); DEFINE_UNCORE_FORMAT_ATTR(inv, inv, "config:23"); DEFINE_UNCORE_FORMAT_ATTR(cmask5, cmask, "config:24-28"); DEFINE_UNCORE_FORMAT_ATTR(cmask8, cmask, "config:24-31"); +DEFINE_UNCORE_FORMAT_ATTR(threshold, threshold, "config:24-29"); /* Sandy Bridge uncore support */ static void snb_uncore_msr_enable_event(struct intel_uncore_box *box, struct perf_event *event) @@ -297,15 +394,21 @@ void skl_uncore_cpu_init(void) snb_uncore_arb.ops = &skl_uncore_msr_ops; } +static struct intel_uncore_ops icl_uncore_msr_ops = { + .disable_event = snb_uncore_msr_disable_event, + .enable_event = snb_uncore_msr_enable_event, + .read_counter = uncore_msr_read_counter, +}; + static struct intel_uncore_type icl_uncore_cbox = { .name = "cbox", - .num_counters = 4, + .num_counters = 2, .perf_ctr_bits = 44, .perf_ctr = ICL_UNC_CBO_0_PER_CTR0, .event_ctl = SNB_UNC_CBO_0_PERFEVTSEL0, .event_mask = SNB_UNC_RAW_EVENT_MASK, .msr_offset = ICL_UNC_CBO_MSR_OFFSET, - .ops = &skl_uncore_msr_ops, + .ops = &icl_uncore_msr_ops, .format_group = &snb_uncore_format_group, }; @@ -334,13 +437,25 @@ static struct intel_uncore_type icl_uncore_clockbox = { .single_fixed = 1, .event_mask = SNB_UNC_CTL_EV_SEL_MASK, .format_group = &icl_uncore_clock_format_group, - .ops = &skl_uncore_msr_ops, + .ops = &icl_uncore_msr_ops, .event_descs = icl_uncore_events, }; +static struct intel_uncore_type icl_uncore_arb = { + .name = "arb", + .num_counters = 1, + .num_boxes = 1, + .perf_ctr_bits = 44, + .perf_ctr = ICL_UNC_ARB_PER_CTR, + .event_ctl = ICL_UNC_ARB_PERFEVTSEL, + .event_mask = SNB_UNC_RAW_EVENT_MASK, + .ops = &icl_uncore_msr_ops, + .format_group = &snb_uncore_format_group, +}; + static struct intel_uncore_type *icl_msr_uncores[] = { &icl_uncore_cbox, - &snb_uncore_arb, + &icl_uncore_arb, &icl_uncore_clockbox, NULL, }; @@ -358,7 +473,129 @@ void icl_uncore_cpu_init(void) { uncore_msr_uncores = icl_msr_uncores; icl_uncore_cbox.num_boxes = icl_get_cbox_num(); +} + +static struct intel_uncore_type *tgl_msr_uncores[] = { + &icl_uncore_cbox, + &snb_uncore_arb, + &icl_uncore_clockbox, + NULL, +}; + +static void rkl_uncore_msr_init_box(struct intel_uncore_box *box) +{ + if (box->pmu->pmu_idx == 0) + wrmsrl(SKL_UNC_PERF_GLOBAL_CTL, SNB_UNC_GLOBAL_CTL_EN); +} + +void tgl_uncore_cpu_init(void) +{ + uncore_msr_uncores = tgl_msr_uncores; + icl_uncore_cbox.num_boxes = icl_get_cbox_num(); + icl_uncore_cbox.ops = &skl_uncore_msr_ops; + icl_uncore_clockbox.ops = &skl_uncore_msr_ops; snb_uncore_arb.ops = &skl_uncore_msr_ops; + skl_uncore_msr_ops.init_box = rkl_uncore_msr_init_box; +} + +static void adl_uncore_msr_init_box(struct intel_uncore_box *box) +{ + if (box->pmu->pmu_idx == 0) + wrmsrl(ADL_UNC_PERF_GLOBAL_CTL, SNB_UNC_GLOBAL_CTL_EN); +} + +static void adl_uncore_msr_enable_box(struct intel_uncore_box *box) +{ + wrmsrl(ADL_UNC_PERF_GLOBAL_CTL, SNB_UNC_GLOBAL_CTL_EN); +} + +static void adl_uncore_msr_disable_box(struct intel_uncore_box *box) +{ + if (box->pmu->pmu_idx == 0) + wrmsrl(ADL_UNC_PERF_GLOBAL_CTL, 0); +} + +static void adl_uncore_msr_exit_box(struct intel_uncore_box *box) +{ + if (box->pmu->pmu_idx == 0) + wrmsrl(ADL_UNC_PERF_GLOBAL_CTL, 0); +} + +static struct intel_uncore_ops adl_uncore_msr_ops = { + .init_box = adl_uncore_msr_init_box, + .enable_box = adl_uncore_msr_enable_box, + .disable_box = adl_uncore_msr_disable_box, + .exit_box = adl_uncore_msr_exit_box, + .disable_event = snb_uncore_msr_disable_event, + .enable_event = snb_uncore_msr_enable_event, + .read_counter = uncore_msr_read_counter, +}; + +static struct attribute *adl_uncore_formats_attr[] = { + &format_attr_event.attr, + &format_attr_umask.attr, + &format_attr_edge.attr, + &format_attr_inv.attr, + &format_attr_threshold.attr, + NULL, +}; + +static const struct attribute_group adl_uncore_format_group = { + .name = "format", + .attrs = adl_uncore_formats_attr, +}; + +static struct intel_uncore_type adl_uncore_cbox = { + .name = "cbox", + .num_counters = 2, + .perf_ctr_bits = 44, + .perf_ctr = ADL_UNC_CBO_0_PER_CTR0, + .event_ctl = ADL_UNC_CBO_0_PERFEVTSEL0, + .event_mask = ADL_UNC_RAW_EVENT_MASK, + .msr_offset = ICL_UNC_CBO_MSR_OFFSET, + .ops = &adl_uncore_msr_ops, + .format_group = &adl_uncore_format_group, +}; + +static struct intel_uncore_type adl_uncore_arb = { + .name = "arb", + .num_counters = 2, + .num_boxes = 2, + .perf_ctr_bits = 44, + .perf_ctr = ADL_UNC_ARB_PER_CTR0, + .event_ctl = ADL_UNC_ARB_PERFEVTSEL0, + .event_mask = SNB_UNC_RAW_EVENT_MASK, + .msr_offset = ADL_UNC_ARB_MSR_OFFSET, + .constraints = snb_uncore_arb_constraints, + .ops = &adl_uncore_msr_ops, + .format_group = &snb_uncore_format_group, +}; + +static struct intel_uncore_type adl_uncore_clockbox = { + .name = "clock", + .num_counters = 1, + .num_boxes = 1, + .fixed_ctr_bits = 48, + .fixed_ctr = ADL_UNC_FIXED_CTR, + .fixed_ctl = ADL_UNC_FIXED_CTR_CTRL, + .single_fixed = 1, + .event_mask = SNB_UNC_CTL_EV_SEL_MASK, + .format_group = &icl_uncore_clock_format_group, + .ops = &adl_uncore_msr_ops, + .event_descs = icl_uncore_events, +}; + +static struct intel_uncore_type *adl_msr_uncores[] = { + &adl_uncore_cbox, + &adl_uncore_arb, + &adl_uncore_clockbox, + NULL, +}; + +void adl_uncore_cpu_init(void) +{ + adl_uncore_cbox.num_boxes = icl_get_cbox_num(); + uncore_msr_uncores = adl_msr_uncores; } enum { @@ -374,6 +611,18 @@ static struct uncore_event_desc snb_uncore_imc_events[] = { INTEL_UNCORE_EVENT_DESC(data_writes.scale, "6.103515625e-5"), INTEL_UNCORE_EVENT_DESC(data_writes.unit, "MiB"), + INTEL_UNCORE_EVENT_DESC(gt_requests, "event=0x03"), + INTEL_UNCORE_EVENT_DESC(gt_requests.scale, "6.103515625e-5"), + INTEL_UNCORE_EVENT_DESC(gt_requests.unit, "MiB"), + + INTEL_UNCORE_EVENT_DESC(ia_requests, "event=0x04"), + INTEL_UNCORE_EVENT_DESC(ia_requests.scale, "6.103515625e-5"), + INTEL_UNCORE_EVENT_DESC(ia_requests.unit, "MiB"), + + INTEL_UNCORE_EVENT_DESC(io_requests, "event=0x05"), + INTEL_UNCORE_EVENT_DESC(io_requests.scale, "6.103515625e-5"), + INTEL_UNCORE_EVENT_DESC(io_requests.unit, "MiB"), + { /* end: all zeroes */ }, }; @@ -389,13 +638,35 @@ static struct uncore_event_desc snb_uncore_imc_events[] = { #define SNB_UNCORE_PCI_IMC_DATA_WRITES_BASE 0x5054 #define SNB_UNCORE_PCI_IMC_CTR_BASE SNB_UNCORE_PCI_IMC_DATA_READS_BASE +/* BW break down- legacy counters */ +#define SNB_UNCORE_PCI_IMC_GT_REQUESTS 0x3 +#define SNB_UNCORE_PCI_IMC_GT_REQUESTS_BASE 0x5040 +#define SNB_UNCORE_PCI_IMC_IA_REQUESTS 0x4 +#define SNB_UNCORE_PCI_IMC_IA_REQUESTS_BASE 0x5044 +#define SNB_UNCORE_PCI_IMC_IO_REQUESTS 0x5 +#define SNB_UNCORE_PCI_IMC_IO_REQUESTS_BASE 0x5048 + enum perf_snb_uncore_imc_freerunning_types { - SNB_PCI_UNCORE_IMC_DATA = 0, + SNB_PCI_UNCORE_IMC_DATA_READS = 0, + SNB_PCI_UNCORE_IMC_DATA_WRITES, + SNB_PCI_UNCORE_IMC_GT_REQUESTS, + SNB_PCI_UNCORE_IMC_IA_REQUESTS, + SNB_PCI_UNCORE_IMC_IO_REQUESTS, + SNB_PCI_UNCORE_IMC_FREERUNNING_TYPE_MAX, }; static struct freerunning_counters snb_uncore_imc_freerunning[] = { - [SNB_PCI_UNCORE_IMC_DATA] = { SNB_UNCORE_PCI_IMC_DATA_READS_BASE, 0x4, 0x0, 2, 32 }, + [SNB_PCI_UNCORE_IMC_DATA_READS] = { SNB_UNCORE_PCI_IMC_DATA_READS_BASE, + 0x0, 0x0, 1, 32 }, + [SNB_PCI_UNCORE_IMC_DATA_WRITES] = { SNB_UNCORE_PCI_IMC_DATA_WRITES_BASE, + 0x0, 0x0, 1, 32 }, + [SNB_PCI_UNCORE_IMC_GT_REQUESTS] = { SNB_UNCORE_PCI_IMC_GT_REQUESTS_BASE, + 0x0, 0x0, 1, 32 }, + [SNB_PCI_UNCORE_IMC_IA_REQUESTS] = { SNB_UNCORE_PCI_IMC_IA_REQUESTS_BASE, + 0x0, 0x0, 1, 32 }, + [SNB_PCI_UNCORE_IMC_IO_REQUESTS] = { SNB_UNCORE_PCI_IMC_IO_REQUESTS_BASE, + 0x0, 0x0, 1, 32 }, }; static struct attribute *snb_uncore_imc_formats_attr[] = { @@ -410,6 +681,7 @@ static const struct attribute_group snb_uncore_imc_format_group = { static void snb_uncore_imc_init_box(struct intel_uncore_box *box) { + struct intel_uncore_type *type = box->pmu->type; struct pci_dev *pdev = box->pci_dev; int where = SNB_UNCORE_PCI_IMC_BAR_OFFSET; resource_size_t addr; @@ -425,7 +697,10 @@ static void snb_uncore_imc_init_box(struct intel_uncore_box *box) addr &= ~(PAGE_SIZE - 1); - box->io_addr = ioremap(addr, SNB_UNCORE_PCI_IMC_MAP_SIZE); + box->io_addr = ioremap(addr, type->mmio_map_size); + if (!box->io_addr) + pr_warn("perf uncore: Failed to ioremap for %s.\n", type->name); + box->hrtimer_duration = UNCORE_SNB_IMC_HRTIMER_INTERVAL; } @@ -505,6 +780,18 @@ static int snb_uncore_imc_event_init(struct perf_event *event) base = SNB_UNCORE_PCI_IMC_DATA_WRITES_BASE; idx = UNCORE_PMC_IDX_FREERUNNING; break; + case SNB_UNCORE_PCI_IMC_GT_REQUESTS: + base = SNB_UNCORE_PCI_IMC_GT_REQUESTS_BASE; + idx = UNCORE_PMC_IDX_FREERUNNING; + break; + case SNB_UNCORE_PCI_IMC_IA_REQUESTS: + base = SNB_UNCORE_PCI_IMC_IA_REQUESTS_BASE; + idx = UNCORE_PMC_IDX_FREERUNNING; + break; + case SNB_UNCORE_PCI_IMC_IO_REQUESTS: + base = SNB_UNCORE_PCI_IMC_IO_REQUESTS_BASE; + idx = UNCORE_PMC_IDX_FREERUNNING; + break; default: return -EINVAL; } @@ -546,7 +833,7 @@ int snb_pci2phy_map_init(int devid) pci_dev_put(dev); return -ENOMEM; } - map->pbus_to_physid[bus] = 0; + map->pbus_to_dieid[bus] = 0; raw_spin_unlock(&pci2phy_map_lock); pci_dev_put(dev); @@ -554,6 +841,22 @@ int snb_pci2phy_map_init(int devid) return 0; } +static u64 snb_uncore_imc_read_counter(struct intel_uncore_box *box, struct perf_event *event) +{ + struct hw_perf_event *hwc = &event->hw; + + /* + * SNB IMC counters are 32-bit and are laid out back to back + * in MMIO space. Therefore we must use a 32-bit accessor function + * using readq() from uncore_mmio_read_counter() causes problems + * because it is reading 64-bit at a time. This is okay for the + * uncore_perf_event_update() function because it drops the upper + * 32-bits but not okay for plain uncore_read_counter() as invoked + * in uncore_pmu_event_start(). + */ + return (u64)readl(box->io_addr + hwc->event_base); +} + static struct pmu snb_uncore_imc_pmu = { .task_ctx_nr = perf_invalid_context, .event_init = snb_uncore_imc_event_init, @@ -573,14 +876,15 @@ static struct intel_uncore_ops snb_uncore_imc_ops = { .disable_event = snb_uncore_imc_disable_event, .enable_event = snb_uncore_imc_enable_event, .hw_config = snb_uncore_imc_hw_config, - .read_counter = uncore_mmio_read_counter, + .read_counter = snb_uncore_imc_read_counter, }; static struct intel_uncore_type snb_uncore_imc = { .name = "imc", - .num_counters = 2, + .num_counters = 5, .num_boxes = 1, .num_freerunning_types = SNB_PCI_UNCORE_IMC_FREERUNNING_TYPE_MAX, + .mmio_map_size = SNB_UNCORE_PCI_IMC_MAP_SIZE, .freerunning = snb_uncore_imc_freerunning, .event_descs = snb_uncore_imc_events, .format_group = &snb_uncore_imc_format_group, @@ -594,190 +898,80 @@ static struct intel_uncore_type *snb_pci_uncores[] = { }; static const struct pci_device_id snb_uncore_pci_ids[] = { - { /* IMC */ - PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_SNB_IMC), - .driver_data = UNCORE_PCI_DEV_DATA(SNB_PCI_UNCORE_IMC, 0), - }, + IMC_UNCORE_DEV(SNB), { /* end: all zeroes */ }, }; static const struct pci_device_id ivb_uncore_pci_ids[] = { - { /* IMC */ - PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_IVB_IMC), - .driver_data = UNCORE_PCI_DEV_DATA(SNB_PCI_UNCORE_IMC, 0), - }, - { /* IMC */ - PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_IVB_E3_IMC), - .driver_data = UNCORE_PCI_DEV_DATA(SNB_PCI_UNCORE_IMC, 0), - }, + IMC_UNCORE_DEV(IVB), + IMC_UNCORE_DEV(IVB_E3), { /* end: all zeroes */ }, }; static const struct pci_device_id hsw_uncore_pci_ids[] = { - { /* IMC */ - PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_HSW_IMC), - .driver_data = UNCORE_PCI_DEV_DATA(SNB_PCI_UNCORE_IMC, 0), - }, - { /* IMC */ - PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_HSW_U_IMC), - .driver_data = UNCORE_PCI_DEV_DATA(SNB_PCI_UNCORE_IMC, 0), - }, + IMC_UNCORE_DEV(HSW), + IMC_UNCORE_DEV(HSW_U), { /* end: all zeroes */ }, }; static const struct pci_device_id bdw_uncore_pci_ids[] = { - { /* IMC */ - PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_BDW_IMC), - .driver_data = UNCORE_PCI_DEV_DATA(SNB_PCI_UNCORE_IMC, 0), - }, + IMC_UNCORE_DEV(BDW), { /* end: all zeroes */ }, }; static const struct pci_device_id skl_uncore_pci_ids[] = { - { /* IMC */ - PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_SKL_Y_IMC), - .driver_data = UNCORE_PCI_DEV_DATA(SNB_PCI_UNCORE_IMC, 0), - }, - { /* IMC */ - PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_SKL_U_IMC), - .driver_data = UNCORE_PCI_DEV_DATA(SNB_PCI_UNCORE_IMC, 0), - }, - { /* IMC */ - PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_SKL_HD_IMC), - .driver_data = UNCORE_PCI_DEV_DATA(SNB_PCI_UNCORE_IMC, 0), - }, - { /* IMC */ - PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_SKL_HQ_IMC), - .driver_data = UNCORE_PCI_DEV_DATA(SNB_PCI_UNCORE_IMC, 0), - }, - { /* IMC */ - PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_SKL_SD_IMC), - .driver_data = UNCORE_PCI_DEV_DATA(SNB_PCI_UNCORE_IMC, 0), - }, - { /* IMC */ - PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_SKL_SQ_IMC), - .driver_data = UNCORE_PCI_DEV_DATA(SNB_PCI_UNCORE_IMC, 0), - }, - { /* IMC */ - PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_SKL_E3_IMC), - .driver_data = UNCORE_PCI_DEV_DATA(SNB_PCI_UNCORE_IMC, 0), - }, - { /* IMC */ - PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_KBL_Y_IMC), - .driver_data = UNCORE_PCI_DEV_DATA(SNB_PCI_UNCORE_IMC, 0), - }, - { /* IMC */ - PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_KBL_U_IMC), - .driver_data = UNCORE_PCI_DEV_DATA(SNB_PCI_UNCORE_IMC, 0), - }, - { /* IMC */ - PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_KBL_UQ_IMC), - .driver_data = UNCORE_PCI_DEV_DATA(SNB_PCI_UNCORE_IMC, 0), - }, - { /* IMC */ - PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_KBL_SD_IMC), - .driver_data = UNCORE_PCI_DEV_DATA(SNB_PCI_UNCORE_IMC, 0), - }, - { /* IMC */ - PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_KBL_SQ_IMC), - .driver_data = UNCORE_PCI_DEV_DATA(SNB_PCI_UNCORE_IMC, 0), - }, - { /* IMC */ - PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_KBL_HQ_IMC), - .driver_data = UNCORE_PCI_DEV_DATA(SNB_PCI_UNCORE_IMC, 0), - }, - { /* IMC */ - PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_KBL_WQ_IMC), - .driver_data = UNCORE_PCI_DEV_DATA(SNB_PCI_UNCORE_IMC, 0), - }, - { /* IMC */ - PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_CFL_2U_IMC), - .driver_data = UNCORE_PCI_DEV_DATA(SNB_PCI_UNCORE_IMC, 0), - }, - { /* IMC */ - PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_CFL_4U_IMC), - .driver_data = UNCORE_PCI_DEV_DATA(SNB_PCI_UNCORE_IMC, 0), - }, - { /* IMC */ - PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_CFL_4H_IMC), - .driver_data = UNCORE_PCI_DEV_DATA(SNB_PCI_UNCORE_IMC, 0), - }, - { /* IMC */ - PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_CFL_6H_IMC), - .driver_data = UNCORE_PCI_DEV_DATA(SNB_PCI_UNCORE_IMC, 0), - }, - { /* IMC */ - PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_CFL_2S_D_IMC), - .driver_data = UNCORE_PCI_DEV_DATA(SNB_PCI_UNCORE_IMC, 0), - }, - { /* IMC */ - PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_CFL_4S_D_IMC), - .driver_data = UNCORE_PCI_DEV_DATA(SNB_PCI_UNCORE_IMC, 0), - }, - { /* IMC */ - PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_CFL_6S_D_IMC), - .driver_data = UNCORE_PCI_DEV_DATA(SNB_PCI_UNCORE_IMC, 0), - }, - { /* IMC */ - PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_CFL_8S_D_IMC), - .driver_data = UNCORE_PCI_DEV_DATA(SNB_PCI_UNCORE_IMC, 0), - }, - { /* IMC */ - PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_CFL_4S_W_IMC), - .driver_data = UNCORE_PCI_DEV_DATA(SNB_PCI_UNCORE_IMC, 0), - }, - { /* IMC */ - PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_CFL_6S_W_IMC), - .driver_data = UNCORE_PCI_DEV_DATA(SNB_PCI_UNCORE_IMC, 0), - }, - { /* IMC */ - PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_CFL_8S_W_IMC), - .driver_data = UNCORE_PCI_DEV_DATA(SNB_PCI_UNCORE_IMC, 0), - }, - { /* IMC */ - PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_CFL_4S_S_IMC), - .driver_data = UNCORE_PCI_DEV_DATA(SNB_PCI_UNCORE_IMC, 0), - }, - { /* IMC */ - PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_CFL_6S_S_IMC), - .driver_data = UNCORE_PCI_DEV_DATA(SNB_PCI_UNCORE_IMC, 0), - }, - { /* IMC */ - PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_CFL_8S_S_IMC), - .driver_data = UNCORE_PCI_DEV_DATA(SNB_PCI_UNCORE_IMC, 0), - }, - { /* IMC */ - PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_AML_YD_IMC), - .driver_data = UNCORE_PCI_DEV_DATA(SNB_PCI_UNCORE_IMC, 0), - }, - { /* IMC */ - PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_AML_YQ_IMC), - .driver_data = UNCORE_PCI_DEV_DATA(SNB_PCI_UNCORE_IMC, 0), - }, - { /* IMC */ - PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_WHL_UQ_IMC), - .driver_data = UNCORE_PCI_DEV_DATA(SNB_PCI_UNCORE_IMC, 0), - }, - { /* IMC */ - PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_WHL_4_UQ_IMC), - .driver_data = UNCORE_PCI_DEV_DATA(SNB_PCI_UNCORE_IMC, 0), - }, - { /* IMC */ - PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_WHL_UD_IMC), - .driver_data = UNCORE_PCI_DEV_DATA(SNB_PCI_UNCORE_IMC, 0), - }, + IMC_UNCORE_DEV(SKL_Y), + IMC_UNCORE_DEV(SKL_U), + IMC_UNCORE_DEV(SKL_HD), + IMC_UNCORE_DEV(SKL_HQ), + IMC_UNCORE_DEV(SKL_SD), + IMC_UNCORE_DEV(SKL_SQ), + IMC_UNCORE_DEV(SKL_E3), + IMC_UNCORE_DEV(KBL_Y), + IMC_UNCORE_DEV(KBL_U), + IMC_UNCORE_DEV(KBL_UQ), + IMC_UNCORE_DEV(KBL_SD), + IMC_UNCORE_DEV(KBL_SQ), + IMC_UNCORE_DEV(KBL_HQ), + IMC_UNCORE_DEV(KBL_WQ), + IMC_UNCORE_DEV(CFL_2U), + IMC_UNCORE_DEV(CFL_4U), + IMC_UNCORE_DEV(CFL_4H), + IMC_UNCORE_DEV(CFL_6H), + IMC_UNCORE_DEV(CFL_2S_D), + IMC_UNCORE_DEV(CFL_4S_D), + IMC_UNCORE_DEV(CFL_6S_D), + IMC_UNCORE_DEV(CFL_8S_D), + IMC_UNCORE_DEV(CFL_4S_W), + IMC_UNCORE_DEV(CFL_6S_W), + IMC_UNCORE_DEV(CFL_8S_W), + IMC_UNCORE_DEV(CFL_4S_S), + IMC_UNCORE_DEV(CFL_6S_S), + IMC_UNCORE_DEV(CFL_8S_S), + IMC_UNCORE_DEV(AML_YD), + IMC_UNCORE_DEV(AML_YQ), + IMC_UNCORE_DEV(WHL_UQ), + IMC_UNCORE_DEV(WHL_4_UQ), + IMC_UNCORE_DEV(WHL_UD), + IMC_UNCORE_DEV(CML_H1), + IMC_UNCORE_DEV(CML_H2), + IMC_UNCORE_DEV(CML_H3), + IMC_UNCORE_DEV(CML_U1), + IMC_UNCORE_DEV(CML_U2), + IMC_UNCORE_DEV(CML_U3), + IMC_UNCORE_DEV(CML_S1), + IMC_UNCORE_DEV(CML_S2), + IMC_UNCORE_DEV(CML_S3), + IMC_UNCORE_DEV(CML_S4), + IMC_UNCORE_DEV(CML_S5), { /* end: all zeroes */ }, }; static const struct pci_device_id icl_uncore_pci_ids[] = { - { /* IMC */ - PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_ICL_U_IMC), - .driver_data = UNCORE_PCI_DEV_DATA(SNB_PCI_UNCORE_IMC, 0), - }, - { /* IMC */ - PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_ICL_U2_IMC), - .driver_data = UNCORE_PCI_DEV_DATA(SNB_PCI_UNCORE_IMC, 0), - }, + IMC_UNCORE_DEV(ICL_U), + IMC_UNCORE_DEV(ICL_U2), + IMC_UNCORE_DEV(RKL_1), + IMC_UNCORE_DEV(RKL_2), { /* end: all zeroes */ }, }; @@ -858,8 +1052,21 @@ static const struct imc_uncore_pci_dev desktop_imc_pci_ids[] = { IMC_DEV(WHL_UQ_IMC, &skl_uncore_pci_driver), /* 8th Gen Core U Mobile Quad Core */ IMC_DEV(WHL_4_UQ_IMC, &skl_uncore_pci_driver), /* 8th Gen Core U Mobile Quad Core */ IMC_DEV(WHL_UD_IMC, &skl_uncore_pci_driver), /* 8th Gen Core U Mobile Dual Core */ + IMC_DEV(CML_H1_IMC, &skl_uncore_pci_driver), + IMC_DEV(CML_H2_IMC, &skl_uncore_pci_driver), + IMC_DEV(CML_H3_IMC, &skl_uncore_pci_driver), + IMC_DEV(CML_U1_IMC, &skl_uncore_pci_driver), + IMC_DEV(CML_U2_IMC, &skl_uncore_pci_driver), + IMC_DEV(CML_U3_IMC, &skl_uncore_pci_driver), + IMC_DEV(CML_S1_IMC, &skl_uncore_pci_driver), + IMC_DEV(CML_S2_IMC, &skl_uncore_pci_driver), + IMC_DEV(CML_S3_IMC, &skl_uncore_pci_driver), + IMC_DEV(CML_S4_IMC, &skl_uncore_pci_driver), + IMC_DEV(CML_S5_IMC, &skl_uncore_pci_driver), IMC_DEV(ICL_U_IMC, &icl_uncore_pci_driver), /* 10th Gen Core Mobile */ IMC_DEV(ICL_U2_IMC, &icl_uncore_pci_driver), /* 10th Gen Core Mobile */ + IMC_DEV(RKL_1_IMC, &icl_uncore_pci_driver), + IMC_DEV(RKL_2_IMC, &icl_uncore_pci_driver), { /* end marker */ } }; @@ -1002,3 +1209,333 @@ void nhm_uncore_cpu_init(void) } /* end of Nehalem uncore support */ + +/* Tiger Lake MMIO uncore support */ + +static const struct pci_device_id tgl_uncore_pci_ids[] = { + IMC_UNCORE_DEV(TGL_U1), + IMC_UNCORE_DEV(TGL_U2), + IMC_UNCORE_DEV(TGL_U3), + IMC_UNCORE_DEV(TGL_U4), + IMC_UNCORE_DEV(TGL_H), + IMC_UNCORE_DEV(ADL_1), + IMC_UNCORE_DEV(ADL_2), + IMC_UNCORE_DEV(ADL_3), + IMC_UNCORE_DEV(ADL_4), + IMC_UNCORE_DEV(ADL_5), + IMC_UNCORE_DEV(ADL_6), + IMC_UNCORE_DEV(ADL_7), + IMC_UNCORE_DEV(ADL_8), + IMC_UNCORE_DEV(ADL_9), + IMC_UNCORE_DEV(ADL_10), + IMC_UNCORE_DEV(ADL_11), + IMC_UNCORE_DEV(ADL_12), + IMC_UNCORE_DEV(ADL_13), + IMC_UNCORE_DEV(ADL_14), + IMC_UNCORE_DEV(ADL_15), + IMC_UNCORE_DEV(ADL_16), + IMC_UNCORE_DEV(ADL_17), + IMC_UNCORE_DEV(ADL_18), + IMC_UNCORE_DEV(ADL_19), + IMC_UNCORE_DEV(ADL_20), + IMC_UNCORE_DEV(ADL_21), + IMC_UNCORE_DEV(RPL_1), + IMC_UNCORE_DEV(RPL_2), + IMC_UNCORE_DEV(RPL_3), + IMC_UNCORE_DEV(RPL_4), + IMC_UNCORE_DEV(RPL_5), + IMC_UNCORE_DEV(RPL_6), + IMC_UNCORE_DEV(RPL_7), + IMC_UNCORE_DEV(RPL_8), + IMC_UNCORE_DEV(RPL_9), + IMC_UNCORE_DEV(RPL_10), + IMC_UNCORE_DEV(RPL_11), + IMC_UNCORE_DEV(RPL_12), + IMC_UNCORE_DEV(RPL_13), + IMC_UNCORE_DEV(RPL_14), + IMC_UNCORE_DEV(RPL_15), + IMC_UNCORE_DEV(RPL_16), + IMC_UNCORE_DEV(RPL_17), + IMC_UNCORE_DEV(RPL_18), + IMC_UNCORE_DEV(RPL_19), + IMC_UNCORE_DEV(RPL_20), + IMC_UNCORE_DEV(RPL_21), + IMC_UNCORE_DEV(RPL_22), + IMC_UNCORE_DEV(RPL_23), + IMC_UNCORE_DEV(RPL_24), + IMC_UNCORE_DEV(RPL_25), + { /* end: all zeroes */ } +}; + +enum perf_tgl_uncore_imc_freerunning_types { + TGL_MMIO_UNCORE_IMC_DATA_TOTAL, + TGL_MMIO_UNCORE_IMC_DATA_READ, + TGL_MMIO_UNCORE_IMC_DATA_WRITE, + TGL_MMIO_UNCORE_IMC_FREERUNNING_TYPE_MAX +}; + +static struct freerunning_counters tgl_l_uncore_imc_freerunning[] = { + [TGL_MMIO_UNCORE_IMC_DATA_TOTAL] = { 0x5040, 0x0, 0x0, 1, 64 }, + [TGL_MMIO_UNCORE_IMC_DATA_READ] = { 0x5058, 0x0, 0x0, 1, 64 }, + [TGL_MMIO_UNCORE_IMC_DATA_WRITE] = { 0x50A0, 0x0, 0x0, 1, 64 }, +}; + +static struct freerunning_counters tgl_uncore_imc_freerunning[] = { + [TGL_MMIO_UNCORE_IMC_DATA_TOTAL] = { 0xd840, 0x0, 0x0, 1, 64 }, + [TGL_MMIO_UNCORE_IMC_DATA_READ] = { 0xd858, 0x0, 0x0, 1, 64 }, + [TGL_MMIO_UNCORE_IMC_DATA_WRITE] = { 0xd8A0, 0x0, 0x0, 1, 64 }, +}; + +static struct uncore_event_desc tgl_uncore_imc_events[] = { + INTEL_UNCORE_EVENT_DESC(data_total, "event=0xff,umask=0x10"), + INTEL_UNCORE_EVENT_DESC(data_total.scale, "6.103515625e-5"), + INTEL_UNCORE_EVENT_DESC(data_total.unit, "MiB"), + + INTEL_UNCORE_EVENT_DESC(data_read, "event=0xff,umask=0x20"), + INTEL_UNCORE_EVENT_DESC(data_read.scale, "6.103515625e-5"), + INTEL_UNCORE_EVENT_DESC(data_read.unit, "MiB"), + + INTEL_UNCORE_EVENT_DESC(data_write, "event=0xff,umask=0x30"), + INTEL_UNCORE_EVENT_DESC(data_write.scale, "6.103515625e-5"), + INTEL_UNCORE_EVENT_DESC(data_write.unit, "MiB"), + + { /* end: all zeroes */ } +}; + +static struct pci_dev *tgl_uncore_get_mc_dev(void) +{ + const struct pci_device_id *ids = tgl_uncore_pci_ids; + struct pci_dev *mc_dev = NULL; + + while (ids && ids->vendor) { + mc_dev = pci_get_device(PCI_VENDOR_ID_INTEL, ids->device, NULL); + if (mc_dev) + return mc_dev; + ids++; + } + + return mc_dev; +} + +#define TGL_UNCORE_MMIO_IMC_MEM_OFFSET 0x10000 +#define TGL_UNCORE_PCI_IMC_MAP_SIZE 0xe000 + +static void __uncore_imc_init_box(struct intel_uncore_box *box, + unsigned int base_offset) +{ + struct pci_dev *pdev = tgl_uncore_get_mc_dev(); + struct intel_uncore_pmu *pmu = box->pmu; + struct intel_uncore_type *type = pmu->type; + resource_size_t addr; + u32 mch_bar; + + if (!pdev) { + pr_warn("perf uncore: Cannot find matched IMC device.\n"); + return; + } + + pci_read_config_dword(pdev, SNB_UNCORE_PCI_IMC_BAR_OFFSET, &mch_bar); + /* MCHBAR is disabled */ + if (!(mch_bar & BIT(0))) { + pr_warn("perf uncore: MCHBAR is disabled. Failed to map IMC free-running counters.\n"); + return; + } + mch_bar &= ~BIT(0); + addr = (resource_size_t)(mch_bar + TGL_UNCORE_MMIO_IMC_MEM_OFFSET * pmu->pmu_idx); + +#ifdef CONFIG_PHYS_ADDR_T_64BIT + pci_read_config_dword(pdev, SNB_UNCORE_PCI_IMC_BAR_OFFSET + 4, &mch_bar); + addr |= ((resource_size_t)mch_bar << 32); +#endif + + addr += base_offset; + box->io_addr = ioremap(addr, type->mmio_map_size); + if (!box->io_addr) + pr_warn("perf uncore: Failed to ioremap for %s.\n", type->name); +} + +static void tgl_uncore_imc_freerunning_init_box(struct intel_uncore_box *box) +{ + __uncore_imc_init_box(box, 0); +} + +static struct intel_uncore_ops tgl_uncore_imc_freerunning_ops = { + .init_box = tgl_uncore_imc_freerunning_init_box, + .exit_box = uncore_mmio_exit_box, + .read_counter = uncore_mmio_read_counter, + .hw_config = uncore_freerunning_hw_config, +}; + +static struct attribute *tgl_uncore_imc_formats_attr[] = { + &format_attr_event.attr, + &format_attr_umask.attr, + NULL +}; + +static const struct attribute_group tgl_uncore_imc_format_group = { + .name = "format", + .attrs = tgl_uncore_imc_formats_attr, +}; + +static struct intel_uncore_type tgl_uncore_imc_free_running = { + .name = "imc_free_running", + .num_counters = 3, + .num_boxes = 2, + .num_freerunning_types = TGL_MMIO_UNCORE_IMC_FREERUNNING_TYPE_MAX, + .mmio_map_size = TGL_UNCORE_PCI_IMC_MAP_SIZE, + .freerunning = tgl_uncore_imc_freerunning, + .ops = &tgl_uncore_imc_freerunning_ops, + .event_descs = tgl_uncore_imc_events, + .format_group = &tgl_uncore_imc_format_group, +}; + +static struct intel_uncore_type *tgl_mmio_uncores[] = { + &tgl_uncore_imc_free_running, + NULL +}; + +void tgl_l_uncore_mmio_init(void) +{ + tgl_uncore_imc_free_running.freerunning = tgl_l_uncore_imc_freerunning; + uncore_mmio_uncores = tgl_mmio_uncores; +} + +void tgl_uncore_mmio_init(void) +{ + uncore_mmio_uncores = tgl_mmio_uncores; +} + +/* end of Tiger Lake MMIO uncore support */ + +/* Alder Lake MMIO uncore support */ +#define ADL_UNCORE_IMC_BASE 0xd900 +#define ADL_UNCORE_IMC_MAP_SIZE 0x200 +#define ADL_UNCORE_IMC_CTR 0xe8 +#define ADL_UNCORE_IMC_CTRL 0xd0 +#define ADL_UNCORE_IMC_GLOBAL_CTL 0xc0 +#define ADL_UNCORE_IMC_BOX_CTL 0xc4 +#define ADL_UNCORE_IMC_FREERUNNING_BASE 0xd800 +#define ADL_UNCORE_IMC_FREERUNNING_MAP_SIZE 0x100 + +#define ADL_UNCORE_IMC_CTL_FRZ (1 << 0) +#define ADL_UNCORE_IMC_CTL_RST_CTRL (1 << 1) +#define ADL_UNCORE_IMC_CTL_RST_CTRS (1 << 2) +#define ADL_UNCORE_IMC_CTL_INT (ADL_UNCORE_IMC_CTL_RST_CTRL | \ + ADL_UNCORE_IMC_CTL_RST_CTRS) + +static void adl_uncore_imc_init_box(struct intel_uncore_box *box) +{ + __uncore_imc_init_box(box, ADL_UNCORE_IMC_BASE); + + /* The global control in MC1 can control both MCs. */ + if (box->io_addr && (box->pmu->pmu_idx == 1)) + writel(ADL_UNCORE_IMC_CTL_INT, box->io_addr + ADL_UNCORE_IMC_GLOBAL_CTL); +} + +static void adl_uncore_mmio_disable_box(struct intel_uncore_box *box) +{ + if (!box->io_addr) + return; + + writel(ADL_UNCORE_IMC_CTL_FRZ, box->io_addr + uncore_mmio_box_ctl(box)); +} + +static void adl_uncore_mmio_enable_box(struct intel_uncore_box *box) +{ + if (!box->io_addr) + return; + + writel(0, box->io_addr + uncore_mmio_box_ctl(box)); +} + +static struct intel_uncore_ops adl_uncore_mmio_ops = { + .init_box = adl_uncore_imc_init_box, + .exit_box = uncore_mmio_exit_box, + .disable_box = adl_uncore_mmio_disable_box, + .enable_box = adl_uncore_mmio_enable_box, + .disable_event = intel_generic_uncore_mmio_disable_event, + .enable_event = intel_generic_uncore_mmio_enable_event, + .read_counter = uncore_mmio_read_counter, +}; + +#define ADL_UNC_CTL_CHMASK_MASK 0x00000f00 +#define ADL_UNC_IMC_EVENT_MASK (SNB_UNC_CTL_EV_SEL_MASK | \ + ADL_UNC_CTL_CHMASK_MASK | \ + SNB_UNC_CTL_EDGE_DET) + +static struct attribute *adl_uncore_imc_formats_attr[] = { + &format_attr_event.attr, + &format_attr_chmask.attr, + &format_attr_edge.attr, + NULL, +}; + +static const struct attribute_group adl_uncore_imc_format_group = { + .name = "format", + .attrs = adl_uncore_imc_formats_attr, +}; + +static struct intel_uncore_type adl_uncore_imc = { + .name = "imc", + .num_counters = 5, + .num_boxes = 2, + .perf_ctr_bits = 64, + .perf_ctr = ADL_UNCORE_IMC_CTR, + .event_ctl = ADL_UNCORE_IMC_CTRL, + .event_mask = ADL_UNC_IMC_EVENT_MASK, + .box_ctl = ADL_UNCORE_IMC_BOX_CTL, + .mmio_offset = 0, + .mmio_map_size = ADL_UNCORE_IMC_MAP_SIZE, + .ops = &adl_uncore_mmio_ops, + .format_group = &adl_uncore_imc_format_group, +}; + +enum perf_adl_uncore_imc_freerunning_types { + ADL_MMIO_UNCORE_IMC_DATA_TOTAL, + ADL_MMIO_UNCORE_IMC_DATA_READ, + ADL_MMIO_UNCORE_IMC_DATA_WRITE, + ADL_MMIO_UNCORE_IMC_FREERUNNING_TYPE_MAX +}; + +static struct freerunning_counters adl_uncore_imc_freerunning[] = { + [ADL_MMIO_UNCORE_IMC_DATA_TOTAL] = { 0x40, 0x0, 0x0, 1, 64 }, + [ADL_MMIO_UNCORE_IMC_DATA_READ] = { 0x58, 0x0, 0x0, 1, 64 }, + [ADL_MMIO_UNCORE_IMC_DATA_WRITE] = { 0xA0, 0x0, 0x0, 1, 64 }, +}; + +static void adl_uncore_imc_freerunning_init_box(struct intel_uncore_box *box) +{ + __uncore_imc_init_box(box, ADL_UNCORE_IMC_FREERUNNING_BASE); +} + +static struct intel_uncore_ops adl_uncore_imc_freerunning_ops = { + .init_box = adl_uncore_imc_freerunning_init_box, + .exit_box = uncore_mmio_exit_box, + .read_counter = uncore_mmio_read_counter, + .hw_config = uncore_freerunning_hw_config, +}; + +static struct intel_uncore_type adl_uncore_imc_free_running = { + .name = "imc_free_running", + .num_counters = 3, + .num_boxes = 2, + .num_freerunning_types = ADL_MMIO_UNCORE_IMC_FREERUNNING_TYPE_MAX, + .mmio_map_size = ADL_UNCORE_IMC_FREERUNNING_MAP_SIZE, + .freerunning = adl_uncore_imc_freerunning, + .ops = &adl_uncore_imc_freerunning_ops, + .event_descs = tgl_uncore_imc_events, + .format_group = &tgl_uncore_imc_format_group, +}; + +static struct intel_uncore_type *adl_mmio_uncores[] = { + &adl_uncore_imc, + &adl_uncore_imc_free_running, + NULL +}; + +void adl_uncore_mmio_init(void) +{ + uncore_mmio_uncores = adl_mmio_uncores; +} + +/* end of Alder Lake MMIO uncore support */ diff --git a/arch/x86/events/intel/uncore_snbep.c b/arch/x86/events/intel/uncore_snbep.c index ad20220af303..ed869443efb2 100644 --- a/arch/x86/events/intel/uncore_snbep.c +++ b/arch/x86/events/intel/uncore_snbep.c @@ -1,6 +1,7 @@ // SPDX-License-Identifier: GPL-2.0 /* SandyBridge-EP/IvyTown uncore support */ #include "uncore.h" +#include "uncore_discovery.h" /* SNB-EP pci bus to socket mapping */ #define SNBEP_CPUNODEID 0x40 @@ -273,6 +274,30 @@ #define SKX_CPUNODEID 0xc0 #define SKX_GIDNIDMAP 0xd4 +/* + * The CPU_BUS_NUMBER MSR returns the values of the respective CPUBUSNO CSR + * that BIOS programmed. MSR has package scope. + * | Bit | Default | Description + * | [63] | 00h | VALID - When set, indicates the CPU bus + * numbers have been initialized. (RO) + * |[62:48]| --- | Reserved + * |[47:40]| 00h | BUS_NUM_5 - Return the bus number BIOS assigned + * CPUBUSNO(5). (RO) + * |[39:32]| 00h | BUS_NUM_4 - Return the bus number BIOS assigned + * CPUBUSNO(4). (RO) + * |[31:24]| 00h | BUS_NUM_3 - Return the bus number BIOS assigned + * CPUBUSNO(3). (RO) + * |[23:16]| 00h | BUS_NUM_2 - Return the bus number BIOS assigned + * CPUBUSNO(2). (RO) + * |[15:8] | 00h | BUS_NUM_1 - Return the bus number BIOS assigned + * CPUBUSNO(1). (RO) + * | [7:0] | 00h | BUS_NUM_0 - Return the bus number BIOS assigned + * CPUBUSNO(0). (RO) + */ +#define SKX_MSR_CPU_BUS_NUMBER 0x300 +#define SKX_MSR_CPU_BUS_VALID_BIT (1ULL << 63) +#define BUS_NUM_STRIDE 8 + /* SKX CHA */ #define SKX_CHA_MSR_PMON_BOX_FILTER_TID (0x1ffULL << 0) #define SKX_CHA_MSR_PMON_BOX_FILTER_LINK (0xfULL << 9) @@ -324,6 +349,13 @@ #define SKX_M2M_PCI_PMON_CTR0 0x200 #define SKX_M2M_PCI_PMON_BOX_CTL 0x258 +/* Memory Map registers device ID */ +#define SNR_ICX_MESH2IIO_MMAP_DID 0x9a2 +#define SNR_ICX_SAD_CONTROL_CFG 0x3f4 + +/* Getting I/O stack id in SAD_COTROL_CFG notation */ +#define SAD_CONTROL_STACK_ID(data) (((data) >> 4) & 0x7) + /* SNR Ubox */ #define SNR_U_MSR_PMON_CTR0 0x1f98 #define SNR_U_MSR_PMON_CTL0 0x1f91 @@ -369,6 +401,11 @@ #define SNR_M2M_PCI_PMON_BOX_CTL 0x438 #define SNR_M2M_PCI_PMON_UMASK_EXT 0xff +/* SNR PCIE3 */ +#define SNR_PCIE3_PCI_PMON_CTL0 0x508 +#define SNR_PCIE3_PCI_PMON_CTR0 0x4e8 +#define SNR_PCIE3_PCI_PMON_BOX_CTL 0x4e0 + /* SNR IMC */ #define SNR_IMC_MMIO_PMON_FIXED_CTL 0x54 #define SNR_IMC_MMIO_PMON_FIXED_CTR 0x38 @@ -382,6 +419,53 @@ #define SNR_IMC_MMIO_MEM0_OFFSET 0xd8 #define SNR_IMC_MMIO_MEM0_MASK 0x7FF +/* ICX CHA */ +#define ICX_C34_MSR_PMON_CTR0 0xb68 +#define ICX_C34_MSR_PMON_CTL0 0xb61 +#define ICX_C34_MSR_PMON_BOX_CTL 0xb60 +#define ICX_C34_MSR_PMON_BOX_FILTER0 0xb65 + +/* ICX IIO */ +#define ICX_IIO_MSR_PMON_CTL0 0xa58 +#define ICX_IIO_MSR_PMON_CTR0 0xa51 +#define ICX_IIO_MSR_PMON_BOX_CTL 0xa50 + +/* ICX IRP */ +#define ICX_IRP0_MSR_PMON_CTL0 0xa4d +#define ICX_IRP0_MSR_PMON_CTR0 0xa4b +#define ICX_IRP0_MSR_PMON_BOX_CTL 0xa4a + +/* ICX M2PCIE */ +#define ICX_M2PCIE_MSR_PMON_CTL0 0xa46 +#define ICX_M2PCIE_MSR_PMON_CTR0 0xa41 +#define ICX_M2PCIE_MSR_PMON_BOX_CTL 0xa40 + +/* ICX UPI */ +#define ICX_UPI_PCI_PMON_CTL0 0x350 +#define ICX_UPI_PCI_PMON_CTR0 0x320 +#define ICX_UPI_PCI_PMON_BOX_CTL 0x318 +#define ICX_UPI_CTL_UMASK_EXT 0xffffff + +/* ICX M3UPI*/ +#define ICX_M3UPI_PCI_PMON_CTL0 0xd8 +#define ICX_M3UPI_PCI_PMON_CTR0 0xa8 +#define ICX_M3UPI_PCI_PMON_BOX_CTL 0xa0 + +/* ICX IMC */ +#define ICX_NUMBER_IMC_CHN 3 +#define ICX_IMC_MEM_STRIDE 0x4 + +/* SPR */ +#define SPR_RAW_EVENT_MASK_EXT 0xffffff + +/* SPR CHA */ +#define SPR_CHA_PMON_CTL_TID_EN (1 << 16) +#define SPR_CHA_PMON_EVENT_MASK (SNBEP_PMON_RAW_EVENT_MASK | \ + SPR_CHA_PMON_CTL_TID_EN) +#define SPR_CHA_PMON_BOX_FILTER_TID 0x3ff + +#define SPR_C0_MSR_PMON_BOX_FILTER0 0x200e + DEFINE_UNCORE_FORMAT_ATTR(event, event, "config:0-7"); DEFINE_UNCORE_FORMAT_ATTR(event2, event, "config:0-6"); DEFINE_UNCORE_FORMAT_ATTR(event_ext, event, "config:0-7,21"); @@ -390,9 +474,11 @@ DEFINE_UNCORE_FORMAT_ATTR(umask, umask, "config:8-15"); DEFINE_UNCORE_FORMAT_ATTR(umask_ext, umask, "config:8-15,32-43,45-55"); DEFINE_UNCORE_FORMAT_ATTR(umask_ext2, umask, "config:8-15,32-57"); DEFINE_UNCORE_FORMAT_ATTR(umask_ext3, umask, "config:8-15,32-39"); +DEFINE_UNCORE_FORMAT_ATTR(umask_ext4, umask, "config:8-15,32-55"); DEFINE_UNCORE_FORMAT_ATTR(qor, qor, "config:16"); DEFINE_UNCORE_FORMAT_ATTR(edge, edge, "config:18"); DEFINE_UNCORE_FORMAT_ATTR(tid_en, tid_en, "config:19"); +DEFINE_UNCORE_FORMAT_ATTR(tid_en2, tid_en, "config:16"); DEFINE_UNCORE_FORMAT_ATTR(inv, inv, "config:23"); DEFINE_UNCORE_FORMAT_ATTR(thresh9, thresh, "config:24-35"); DEFINE_UNCORE_FORMAT_ATTR(thresh8, thresh, "config:24-31"); @@ -1093,7 +1179,6 @@ enum { SNBEP_PCI_QPI_PORT0_FILTER, SNBEP_PCI_QPI_PORT1_FILTER, BDX_PCI_QPI_PORT2_FILTER, - HSWEP_PCI_PCU_3, }; static int snbep_qpi_hw_config(struct intel_uncore_box *box, struct perf_event *event) @@ -1293,7 +1378,7 @@ static struct pci_driver snbep_uncore_pci_driver = { static int snbep_pci2phy_map_init(int devid, int nodeid_loc, int idmap_loc, bool reverse) { struct pci_dev *ubox_dev = NULL; - int i, bus, nodeid, segment; + int i, bus, nodeid, segment, die_id; struct pci2phy_map *map; int err = 0; u32 config = 0; @@ -1304,36 +1389,79 @@ static int snbep_pci2phy_map_init(int devid, int nodeid_loc, int idmap_loc, bool if (!ubox_dev) break; bus = ubox_dev->bus->number; - /* get the Node ID of the local register */ - err = pci_read_config_dword(ubox_dev, nodeid_loc, &config); - if (err) - break; - nodeid = config & NODE_ID_MASK; - /* get the Node ID mapping */ - err = pci_read_config_dword(ubox_dev, idmap_loc, &config); - if (err) - break; + /* + * The nodeid and idmap registers only contain enough + * information to handle 8 nodes. On systems with more + * than 8 nodes, we need to rely on NUMA information, + * filled in from BIOS supplied information, to determine + * the topology. + */ + if (nr_node_ids <= 8) { + /* get the Node ID of the local register */ + err = pci_read_config_dword(ubox_dev, nodeid_loc, &config); + if (err) + break; + nodeid = config & NODE_ID_MASK; + /* get the Node ID mapping */ + err = pci_read_config_dword(ubox_dev, idmap_loc, &config); + if (err) + break; - segment = pci_domain_nr(ubox_dev->bus); - raw_spin_lock(&pci2phy_map_lock); - map = __find_pci2phy_map(segment); - if (!map) { + segment = pci_domain_nr(ubox_dev->bus); + raw_spin_lock(&pci2phy_map_lock); + map = __find_pci2phy_map(segment); + if (!map) { + raw_spin_unlock(&pci2phy_map_lock); + err = -ENOMEM; + break; + } + + /* + * every three bits in the Node ID mapping register maps + * to a particular node. + */ + for (i = 0; i < 8; i++) { + if (nodeid == ((config >> (3 * i)) & 0x7)) { + if (topology_max_die_per_package() > 1) + die_id = i; + else + die_id = topology_phys_to_logical_pkg(i); + if (die_id < 0) + die_id = -ENODEV; + map->pbus_to_dieid[bus] = die_id; + break; + } + } raw_spin_unlock(&pci2phy_map_lock); - err = -ENOMEM; - break; - } + } else { + int node = pcibus_to_node(ubox_dev->bus); + int cpu; + + segment = pci_domain_nr(ubox_dev->bus); + raw_spin_lock(&pci2phy_map_lock); + map = __find_pci2phy_map(segment); + if (!map) { + raw_spin_unlock(&pci2phy_map_lock); + err = -ENOMEM; + break; + } - /* - * every three bits in the Node ID mapping register maps - * to a particular node. - */ - for (i = 0; i < 8; i++) { - if (nodeid == ((config >> (3 * i)) & 0x7)) { - map->pbus_to_physid[bus] = i; + die_id = -1; + for_each_cpu(cpu, cpumask_of_pcibus(ubox_dev->bus)) { + struct cpuinfo_x86 *c = &cpu_data(cpu); + + if (c->initialized && cpu_to_node(cpu) == node) { + map->pbus_to_dieid[bus] = die_id = c->logical_die_id; + break; + } + } + raw_spin_unlock(&pci2phy_map_lock); + + if (WARN_ON_ONCE(die_id == -1)) { + err = -EINVAL; break; } } - raw_spin_unlock(&pci2phy_map_lock); } if (!err) { @@ -1346,17 +1474,17 @@ static int snbep_pci2phy_map_init(int devid, int nodeid_loc, int idmap_loc, bool i = -1; if (reverse) { for (bus = 255; bus >= 0; bus--) { - if (map->pbus_to_physid[bus] >= 0) - i = map->pbus_to_physid[bus]; + if (map->pbus_to_dieid[bus] != -1) + i = map->pbus_to_dieid[bus]; else - map->pbus_to_physid[bus] = i; + map->pbus_to_dieid[bus] = i; } } else { for (bus = 0; bus <= 255; bus++) { - if (map->pbus_to_physid[bus] >= 0) - i = map->pbus_to_physid[bus]; + if (map->pbus_to_dieid[bus] != -1) + i = map->pbus_to_dieid[bus]; else - map->pbus_to_physid[bus] = i; + map->pbus_to_dieid[bus] = i; } } } @@ -2750,22 +2878,33 @@ static struct intel_uncore_type *hswep_msr_uncores[] = { NULL, }; -void hswep_uncore_cpu_init(void) +#define HSWEP_PCU_DID 0x2fc0 +#define HSWEP_PCU_CAPID4_OFFET 0x94 +#define hswep_get_chop(_cap) (((_cap) >> 6) & 0x3) + +static bool hswep_has_limit_sbox(unsigned int device) { - int pkg = boot_cpu_data.logical_proc_id; + struct pci_dev *dev = pci_get_device(PCI_VENDOR_ID_INTEL, device, NULL); + u32 capid4; + + if (!dev) + return false; + + pci_read_config_dword(dev, HSWEP_PCU_CAPID4_OFFET, &capid4); + if (!hswep_get_chop(capid4)) + return true; + + return false; +} +void hswep_uncore_cpu_init(void) +{ if (hswep_uncore_cbox.num_boxes > boot_cpu_data.x86_max_cores) hswep_uncore_cbox.num_boxes = boot_cpu_data.x86_max_cores; /* Detect 6-8 core systems with only two SBOXes */ - if (uncore_extra_pci_dev[pkg].dev[HSWEP_PCI_PCU_3]) { - u32 capid4; - - pci_read_config_dword(uncore_extra_pci_dev[pkg].dev[HSWEP_PCI_PCU_3], - 0x94, &capid4); - if (((capid4 >> 6) & 0x3) == 0) - hswep_uncore_sbox.num_boxes = 2; - } + if (hswep_has_limit_sbox(HSWEP_PCU_DID)) + hswep_uncore_sbox.num_boxes = 2; uncore_msr_uncores = hswep_msr_uncores; } @@ -3028,11 +3167,6 @@ static const struct pci_device_id hswep_uncore_pci_ids[] = { .driver_data = UNCORE_PCI_DEV_DATA(UNCORE_EXTRA_PCI_DEV, SNBEP_PCI_QPI_PORT1_FILTER), }, - { /* PCU.3 (for Capability registers) */ - PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x2fc0), - .driver_data = UNCORE_PCI_DEV_DATA(UNCORE_EXTRA_PCI_DEV, - HSWEP_PCI_PCU_3), - }, { /* end: all zeroes */ } }; @@ -3124,27 +3258,18 @@ static struct event_constraint bdx_uncore_pcu_constraints[] = { EVENT_CONSTRAINT_END }; +#define BDX_PCU_DID 0x6fc0 + void bdx_uncore_cpu_init(void) { - int pkg = topology_phys_to_logical_pkg(boot_cpu_data.phys_proc_id); - if (bdx_uncore_cbox.num_boxes > boot_cpu_data.x86_max_cores) bdx_uncore_cbox.num_boxes = boot_cpu_data.x86_max_cores; uncore_msr_uncores = bdx_msr_uncores; - /* BDX-DE doesn't have SBOX */ - if (boot_cpu_data.x86_model == 86) { - uncore_msr_uncores[BDX_MSR_UNCORE_SBOX] = NULL; /* Detect systems with no SBOXes */ - } else if (uncore_extra_pci_dev[pkg].dev[HSWEP_PCI_PCU_3]) { - struct pci_dev *pdev; - u32 capid4; - - pdev = uncore_extra_pci_dev[pkg].dev[HSWEP_PCI_PCU_3]; - pci_read_config_dword(pdev, 0x94, &capid4); - if (((capid4 >> 6) & 0x3) == 0) - bdx_msr_uncores[BDX_MSR_UNCORE_SBOX] = NULL; - } + if ((boot_cpu_data.x86_model == 86) || hswep_has_limit_sbox(BDX_PCU_DID)) + uncore_msr_uncores[BDX_MSR_UNCORE_SBOX] = NULL; + hswep_uncore_pcu.constraints = bdx_uncore_pcu_constraints; } @@ -3365,11 +3490,6 @@ static const struct pci_device_id bdx_uncore_pci_ids[] = { .driver_data = UNCORE_PCI_DEV_DATA(UNCORE_EXTRA_PCI_DEV, BDX_PCI_QPI_PORT2_FILTER), }, - { /* PCU.3 (for Capability registers) */ - PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x6fc0), - .driver_data = UNCORE_PCI_DEV_DATA(UNCORE_EXTRA_PCI_DEV, - HSWEP_PCI_PCU_3), - }, { /* end: all zeroes */ } }; @@ -3488,6 +3608,9 @@ static int skx_cha_hw_config(struct intel_uncore_box *box, struct perf_event *ev struct hw_perf_event_extra *reg1 = &event->hw.extra_reg; struct extra_reg *er; int idx = 0; + /* Any of the CHA events may be filtered by Thread/Core-ID.*/ + if (event->hw.config & SNBEP_CBO_PMON_CTL_TID_EN) + idx = SKX_CHA_MSR_PMON_BOX_FILTER_TID; for (er = skx_uncore_cha_extra_regs; er->msr; er++) { if (er->event != (event->hw.config & er->config_mask)) @@ -3555,6 +3678,7 @@ static struct event_constraint skx_uncore_iio_constraints[] = { UNCORE_EVENT_CONSTRAINT(0xc0, 0xc), UNCORE_EVENT_CONSTRAINT(0xc5, 0xc), UNCORE_EVENT_CONSTRAINT(0xd4, 0xc), + UNCORE_EVENT_CONSTRAINT(0xd5, 0xc), EVENT_CONSTRAINT_END }; @@ -3575,6 +3699,188 @@ static struct intel_uncore_ops skx_uncore_iio_ops = { .read_counter = uncore_msr_read_counter, }; +static inline u8 skx_iio_stack(struct intel_uncore_pmu *pmu, int die) +{ + return pmu->type->topology[die].configuration >> + (pmu->pmu_idx * BUS_NUM_STRIDE); +} + +static umode_t +pmu_iio_mapping_visible(struct kobject *kobj, struct attribute *attr, + int die, int zero_bus_pmu) +{ + struct intel_uncore_pmu *pmu = dev_to_uncore_pmu(kobj_to_dev(kobj)); + + return (!skx_iio_stack(pmu, die) && pmu->pmu_idx != zero_bus_pmu) ? 0 : attr->mode; +} + +static umode_t +skx_iio_mapping_visible(struct kobject *kobj, struct attribute *attr, int die) +{ + /* Root bus 0x00 is valid only for pmu_idx = 0. */ + return pmu_iio_mapping_visible(kobj, attr, die, 0); +} + +static ssize_t skx_iio_mapping_show(struct device *dev, + struct device_attribute *attr, char *buf) +{ + struct intel_uncore_pmu *pmu = dev_to_uncore_pmu(dev); + struct dev_ext_attribute *ea = to_dev_ext_attribute(attr); + long die = (long)ea->var; + + return sprintf(buf, "%04x:%02x\n", pmu->type->topology[die].segment, + skx_iio_stack(pmu, die)); +} + +static int skx_msr_cpu_bus_read(int cpu, u64 *topology) +{ + u64 msr_value; + + if (rdmsrl_on_cpu(cpu, SKX_MSR_CPU_BUS_NUMBER, &msr_value) || + !(msr_value & SKX_MSR_CPU_BUS_VALID_BIT)) + return -ENXIO; + + *topology = msr_value; + + return 0; +} + +static int die_to_cpu(int die) +{ + int res = 0, cpu, current_die; + /* + * Using cpus_read_lock() to ensure cpu is not going down between + * looking at cpu_online_mask. + */ + cpus_read_lock(); + for_each_online_cpu(cpu) { + current_die = topology_logical_die_id(cpu); + if (current_die == die) { + res = cpu; + break; + } + } + cpus_read_unlock(); + return res; +} + +static int skx_iio_get_topology(struct intel_uncore_type *type) +{ + int die, ret = -EPERM; + + type->topology = kcalloc(uncore_max_dies(), sizeof(*type->topology), + GFP_KERNEL); + if (!type->topology) + return -ENOMEM; + + for (die = 0; die < uncore_max_dies(); die++) { + ret = skx_msr_cpu_bus_read(die_to_cpu(die), + &type->topology[die].configuration); + if (ret) + break; + + ret = uncore_die_to_segment(die); + if (ret < 0) + break; + + type->topology[die].segment = ret; + } + + if (ret < 0) { + kfree(type->topology); + type->topology = NULL; + } + + return ret; +} + +static struct attribute_group skx_iio_mapping_group = { + .is_visible = skx_iio_mapping_visible, +}; + +static const struct attribute_group *skx_iio_attr_update[] = { + &skx_iio_mapping_group, + NULL, +}; + +static int +pmu_iio_set_mapping(struct intel_uncore_type *type, struct attribute_group *ag) +{ + char buf[64]; + int ret; + long die = -1; + struct attribute **attrs = NULL; + struct dev_ext_attribute *eas = NULL; + + ret = type->get_topology(type); + if (ret < 0) + goto clear_attr_update; + + ret = -ENOMEM; + + /* One more for NULL. */ + attrs = kcalloc((uncore_max_dies() + 1), sizeof(*attrs), GFP_KERNEL); + if (!attrs) + goto clear_topology; + + eas = kcalloc(uncore_max_dies(), sizeof(*eas), GFP_KERNEL); + if (!eas) + goto clear_attrs; + + for (die = 0; die < uncore_max_dies(); die++) { + sprintf(buf, "die%ld", die); + sysfs_attr_init(&eas[die].attr.attr); + eas[die].attr.attr.name = kstrdup(buf, GFP_KERNEL); + if (!eas[die].attr.attr.name) + goto err; + eas[die].attr.attr.mode = 0444; + eas[die].attr.show = skx_iio_mapping_show; + eas[die].attr.store = NULL; + eas[die].var = (void *)die; + attrs[die] = &eas[die].attr.attr; + } + ag->attrs = attrs; + + return 0; +err: + for (; die >= 0; die--) + kfree(eas[die].attr.attr.name); + kfree(eas); +clear_attrs: + kfree(attrs); +clear_topology: + kfree(type->topology); +clear_attr_update: + type->attr_update = NULL; + return ret; +} + +static void +pmu_iio_cleanup_mapping(struct intel_uncore_type *type, struct attribute_group *ag) +{ + struct attribute **attr = ag->attrs; + + if (!attr) + return; + + for (; *attr; attr++) + kfree((*attr)->name); + kfree(attr_to_ext_attr(*ag->attrs)); + kfree(ag->attrs); + ag->attrs = NULL; + kfree(type->topology); +} + +static int skx_iio_set_mapping(struct intel_uncore_type *type) +{ + return pmu_iio_set_mapping(type, &skx_iio_mapping_group); +} + +static void skx_iio_cleanup_mapping(struct intel_uncore_type *type) +{ + pmu_iio_cleanup_mapping(type, &skx_iio_mapping_group); +} + static struct intel_uncore_type skx_uncore_iio = { .name = "iio", .num_counters = 4, @@ -3589,6 +3895,10 @@ static struct intel_uncore_type skx_uncore_iio = { .constraints = skx_uncore_iio_constraints, .ops = &skx_uncore_iio_ops, .format_group = &skx_uncore_iio_format_group, + .attr_update = skx_iio_attr_update, + .get_topology = skx_iio_get_topology, + .set_mapping = skx_iio_set_mapping, + .cleanup_mapping = skx_iio_cleanup_mapping, }; enum perf_uncore_iio_freerunning_type_id { @@ -4129,6 +4439,103 @@ static const struct attribute_group snr_uncore_iio_format_group = { .attrs = snr_uncore_iio_formats_attr, }; +static umode_t +snr_iio_mapping_visible(struct kobject *kobj, struct attribute *attr, int die) +{ + /* Root bus 0x00 is valid only for pmu_idx = 1. */ + return pmu_iio_mapping_visible(kobj, attr, die, 1); +} + +static struct attribute_group snr_iio_mapping_group = { + .is_visible = snr_iio_mapping_visible, +}; + +static const struct attribute_group *snr_iio_attr_update[] = { + &snr_iio_mapping_group, + NULL, +}; + +static int sad_cfg_iio_topology(struct intel_uncore_type *type, u8 *sad_pmon_mapping) +{ + u32 sad_cfg; + int die, stack_id, ret = -EPERM; + struct pci_dev *dev = NULL; + + type->topology = kcalloc(uncore_max_dies(), sizeof(*type->topology), + GFP_KERNEL); + if (!type->topology) + return -ENOMEM; + + while ((dev = pci_get_device(PCI_VENDOR_ID_INTEL, SNR_ICX_MESH2IIO_MMAP_DID, dev))) { + ret = pci_read_config_dword(dev, SNR_ICX_SAD_CONTROL_CFG, &sad_cfg); + if (ret) { + ret = pcibios_err_to_errno(ret); + break; + } + + die = uncore_pcibus_to_dieid(dev->bus); + stack_id = SAD_CONTROL_STACK_ID(sad_cfg); + if (die < 0 || stack_id >= type->num_boxes) { + ret = -EPERM; + break; + } + + /* Convert stack id from SAD_CONTROL to PMON notation. */ + stack_id = sad_pmon_mapping[stack_id]; + + ((u8 *)&(type->topology[die].configuration))[stack_id] = dev->bus->number; + type->topology[die].segment = pci_domain_nr(dev->bus); + } + + if (ret) { + kfree(type->topology); + type->topology = NULL; + } + + return ret; +} + +/* + * SNR has a static mapping of stack IDs from SAD_CONTROL_CFG notation to PMON + */ +enum { + SNR_QAT_PMON_ID, + SNR_CBDMA_DMI_PMON_ID, + SNR_NIS_PMON_ID, + SNR_DLB_PMON_ID, + SNR_PCIE_GEN3_PMON_ID +}; + +static u8 snr_sad_pmon_mapping[] = { + SNR_CBDMA_DMI_PMON_ID, + SNR_PCIE_GEN3_PMON_ID, + SNR_DLB_PMON_ID, + SNR_NIS_PMON_ID, + SNR_QAT_PMON_ID +}; + +static int snr_iio_get_topology(struct intel_uncore_type *type) +{ + return sad_cfg_iio_topology(type, snr_sad_pmon_mapping); +} + +static int snr_iio_set_mapping(struct intel_uncore_type *type) +{ + return pmu_iio_set_mapping(type, &snr_iio_mapping_group); +} + +static void snr_iio_cleanup_mapping(struct intel_uncore_type *type) +{ + pmu_iio_cleanup_mapping(type, &snr_iio_mapping_group); +} + +static struct event_constraint snr_uncore_iio_constraints[] = { + UNCORE_EVENT_CONSTRAINT(0x83, 0x3), + UNCORE_EVENT_CONSTRAINT(0xc0, 0xc), + UNCORE_EVENT_CONSTRAINT(0xd5, 0xc), + EVENT_CONSTRAINT_END +}; + static struct intel_uncore_type snr_uncore_iio = { .name = "iio", .num_counters = 4, @@ -4140,8 +4547,13 @@ static struct intel_uncore_type snr_uncore_iio = { .event_mask_ext = SNR_IIO_PMON_RAW_EVENT_MASK_EXT, .box_ctl = SNR_IIO_MSR_PMON_BOX_CTL, .msr_offset = SNR_IIO_MSR_OFFSET, + .constraints = snr_uncore_iio_constraints, .ops = &ivbep_uncore_msr_ops, .format_group = &snr_uncore_iio_format_group, + .attr_update = snr_iio_attr_update, + .get_topology = snr_iio_get_topology, + .set_mapping = snr_iio_set_mapping, + .cleanup_mapping = snr_iio_cleanup_mapping, }; static struct intel_uncore_type snr_uncore_irp = { @@ -4323,12 +4735,46 @@ static struct intel_uncore_type snr_uncore_m2m = { .format_group = &snr_m2m_uncore_format_group, }; +static void snr_uncore_pci_enable_event(struct intel_uncore_box *box, struct perf_event *event) +{ + struct pci_dev *pdev = box->pci_dev; + struct hw_perf_event *hwc = &event->hw; + + pci_write_config_dword(pdev, hwc->config_base, (u32)(hwc->config | SNBEP_PMON_CTL_EN)); + pci_write_config_dword(pdev, hwc->config_base + 4, (u32)(hwc->config >> 32)); +} + +static struct intel_uncore_ops snr_pcie3_uncore_pci_ops = { + .init_box = snr_m2m_uncore_pci_init_box, + .disable_box = snbep_uncore_pci_disable_box, + .enable_box = snbep_uncore_pci_enable_box, + .disable_event = snbep_uncore_pci_disable_event, + .enable_event = snr_uncore_pci_enable_event, + .read_counter = snbep_uncore_pci_read_counter, +}; + +static struct intel_uncore_type snr_uncore_pcie3 = { + .name = "pcie3", + .num_counters = 4, + .num_boxes = 1, + .perf_ctr_bits = 48, + .perf_ctr = SNR_PCIE3_PCI_PMON_CTR0, + .event_ctl = SNR_PCIE3_PCI_PMON_CTL0, + .event_mask = SKX_IIO_PMON_RAW_EVENT_MASK, + .event_mask_ext = SKX_IIO_PMON_RAW_EVENT_MASK_EXT, + .box_ctl = SNR_PCIE3_PCI_PMON_BOX_CTL, + .ops = &snr_pcie3_uncore_pci_ops, + .format_group = &skx_uncore_iio_format_group, +}; + enum { SNR_PCI_UNCORE_M2M, + SNR_PCI_UNCORE_PCIE3, }; static struct intel_uncore_type *snr_pci_uncores[] = { [SNR_PCI_UNCORE_M2M] = &snr_uncore_m2m, + [SNR_PCI_UNCORE_PCIE3] = &snr_uncore_pcie3, NULL, }; @@ -4345,6 +4791,19 @@ static struct pci_driver snr_uncore_pci_driver = { .id_table = snr_uncore_pci_ids, }; +static const struct pci_device_id snr_uncore_pci_sub_ids[] = { + { /* PCIe3 RP */ + PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x334a), + .driver_data = UNCORE_PCI_DEV_FULL_DATA(4, 0, SNR_PCI_UNCORE_PCIE3, 0), + }, + { /* end: all zeroes */ } +}; + +static struct pci_driver snr_uncore_pci_sub_driver = { + .name = "snr_uncore_sub", + .id_table = snr_uncore_pci_sub_ids, +}; + int snr_uncore_pci_init(void) { /* SNR UBOX DID */ @@ -4356,53 +4815,70 @@ int snr_uncore_pci_init(void) uncore_pci_uncores = snr_pci_uncores; uncore_pci_driver = &snr_uncore_pci_driver; + uncore_pci_sub_driver = &snr_uncore_pci_sub_driver; return 0; } -static struct pci_dev *snr_uncore_get_mc_dev(int id) +#define SNR_MC_DEVICE_ID 0x3451 + +static struct pci_dev *snr_uncore_get_mc_dev(unsigned int device, int id) { struct pci_dev *mc_dev = NULL; - int phys_id, pkg; + int pkg; while (1) { - mc_dev = pci_get_device(PCI_VENDOR_ID_INTEL, 0x3451, mc_dev); + mc_dev = pci_get_device(PCI_VENDOR_ID_INTEL, device, mc_dev); if (!mc_dev) break; - phys_id = uncore_pcibus_to_physid(mc_dev->bus); - if (phys_id < 0) - continue; - pkg = topology_phys_to_logical_pkg(phys_id); - if (pkg < 0) - continue; - else if (pkg == id) + pkg = uncore_pcibus_to_dieid(mc_dev->bus); + if (pkg == id) break; } return mc_dev; } -static void snr_uncore_mmio_init_box(struct intel_uncore_box *box) +static int snr_uncore_mmio_map(struct intel_uncore_box *box, + unsigned int box_ctl, int mem_offset, + unsigned int device) { - struct pci_dev *pdev = snr_uncore_get_mc_dev(box->dieid); - unsigned int box_ctl = uncore_mmio_box_ctl(box); + struct pci_dev *pdev = snr_uncore_get_mc_dev(device, box->dieid); + struct intel_uncore_type *type = box->pmu->type; resource_size_t addr; u32 pci_dword; if (!pdev) - return; + return -ENODEV; pci_read_config_dword(pdev, SNR_IMC_MMIO_BASE_OFFSET, &pci_dword); - addr = (pci_dword & SNR_IMC_MMIO_BASE_MASK) << 23; + addr = ((resource_size_t)pci_dword & SNR_IMC_MMIO_BASE_MASK) << 23; - pci_read_config_dword(pdev, SNR_IMC_MMIO_MEM0_OFFSET, &pci_dword); + pci_read_config_dword(pdev, mem_offset, &pci_dword); addr |= (pci_dword & SNR_IMC_MMIO_MEM0_MASK) << 12; addr += box_ctl; - box->io_addr = ioremap(addr, SNR_IMC_MMIO_SIZE); - if (!box->io_addr) - return; + box->io_addr = ioremap(addr, type->mmio_map_size); + if (!box->io_addr) { + pr_warn("perf uncore: Failed to ioremap for %s.\n", type->name); + return -EINVAL; + } + + return 0; +} + +static void __snr_uncore_mmio_init_box(struct intel_uncore_box *box, + unsigned int box_ctl, int mem_offset, + unsigned int device) +{ + if (!snr_uncore_mmio_map(box, box_ctl, mem_offset, device)) + writel(IVBEP_PMON_BOX_CTL_INT, box->io_addr); +} - writel(IVBEP_PMON_BOX_CTL_INT, box->io_addr); +static void snr_uncore_mmio_init_box(struct intel_uncore_box *box) +{ + __snr_uncore_mmio_init_box(box, uncore_mmio_box_ctl(box), + SNR_IMC_MMIO_MEM0_OFFSET, + SNR_MC_DEVICE_ID); } static void snr_uncore_mmio_disable_box(struct intel_uncore_box *box) @@ -4437,6 +4913,9 @@ static void snr_uncore_mmio_enable_event(struct intel_uncore_box *box, if (!box->io_addr) return; + if (!uncore_mmio_is_valid_offset(box, hwc->config_base)) + return; + writel(hwc->config | SNBEP_PMON_CTL_EN, box->io_addr + hwc->config_base); } @@ -4449,6 +4928,9 @@ static void snr_uncore_mmio_disable_event(struct intel_uncore_box *box, if (!box->io_addr) return; + if (!uncore_mmio_is_valid_offset(box, hwc->config_base)) + return; + writel(hwc->config, box->io_addr + hwc->config_base); } @@ -4487,6 +4969,7 @@ static struct intel_uncore_type snr_uncore_imc = { .event_mask = SNBEP_PMON_RAW_EVENT_MASK, .box_ctl = SNR_IMC_MMIO_PMON_BOX_CTL, .mmio_offset = SNR_IMC_MMIO_OFFSET, + .mmio_map_size = SNR_IMC_MMIO_SIZE, .ops = &snr_uncore_mmio_ops, .format_group = &skx_uncore_format_group, }; @@ -4507,10 +4990,10 @@ static struct uncore_event_desc snr_uncore_imc_freerunning_events[] = { INTEL_UNCORE_EVENT_DESC(dclk, "event=0xff,umask=0x10"), INTEL_UNCORE_EVENT_DESC(read, "event=0xff,umask=0x20"), - INTEL_UNCORE_EVENT_DESC(read.scale, "3.814697266e-6"), + INTEL_UNCORE_EVENT_DESC(read.scale, "6.103515625e-5"), INTEL_UNCORE_EVENT_DESC(read.unit, "MiB"), INTEL_UNCORE_EVENT_DESC(write, "event=0xff,umask=0x21"), - INTEL_UNCORE_EVENT_DESC(write.scale, "3.814697266e-6"), + INTEL_UNCORE_EVENT_DESC(write.scale, "6.103515625e-5"), INTEL_UNCORE_EVENT_DESC(write.unit, "MiB"), { /* end: all zeroes */ }, }; @@ -4527,6 +5010,7 @@ static struct intel_uncore_type snr_uncore_imc_free_running = { .num_counters = 3, .num_boxes = 1, .num_freerunning_types = SNR_IMC_FREERUNNING_TYPE_MAX, + .mmio_map_size = SNR_IMC_MMIO_SIZE, .freerunning = snr_imc_freerunning, .ops = &snr_uncore_imc_freerunning_ops, .event_descs = snr_uncore_imc_freerunning_events, @@ -4545,3 +5029,1044 @@ void snr_uncore_mmio_init(void) } /* end of SNR uncore support */ + +/* ICX uncore support */ + +static unsigned icx_cha_msr_offsets[] = { + 0x2a0, 0x2ae, 0x2bc, 0x2ca, 0x2d8, 0x2e6, 0x2f4, 0x302, 0x310, + 0x31e, 0x32c, 0x33a, 0x348, 0x356, 0x364, 0x372, 0x380, 0x38e, + 0x3aa, 0x3b8, 0x3c6, 0x3d4, 0x3e2, 0x3f0, 0x3fe, 0x40c, 0x41a, + 0x428, 0x436, 0x444, 0x452, 0x460, 0x46e, 0x47c, 0x0, 0xe, + 0x1c, 0x2a, 0x38, 0x46, +}; + +static int icx_cha_hw_config(struct intel_uncore_box *box, struct perf_event *event) +{ + struct hw_perf_event_extra *reg1 = &event->hw.extra_reg; + bool tie_en = !!(event->hw.config & SNBEP_CBO_PMON_CTL_TID_EN); + + if (tie_en) { + reg1->reg = ICX_C34_MSR_PMON_BOX_FILTER0 + + icx_cha_msr_offsets[box->pmu->pmu_idx]; + reg1->config = event->attr.config1 & SKX_CHA_MSR_PMON_BOX_FILTER_TID; + reg1->idx = 0; + } + + return 0; +} + +static struct intel_uncore_ops icx_uncore_chabox_ops = { + .init_box = ivbep_uncore_msr_init_box, + .disable_box = snbep_uncore_msr_disable_box, + .enable_box = snbep_uncore_msr_enable_box, + .disable_event = snbep_uncore_msr_disable_event, + .enable_event = snr_cha_enable_event, + .read_counter = uncore_msr_read_counter, + .hw_config = icx_cha_hw_config, +}; + +static struct intel_uncore_type icx_uncore_chabox = { + .name = "cha", + .num_counters = 4, + .perf_ctr_bits = 48, + .event_ctl = ICX_C34_MSR_PMON_CTL0, + .perf_ctr = ICX_C34_MSR_PMON_CTR0, + .box_ctl = ICX_C34_MSR_PMON_BOX_CTL, + .msr_offsets = icx_cha_msr_offsets, + .event_mask = HSWEP_S_MSR_PMON_RAW_EVENT_MASK, + .event_mask_ext = SNR_CHA_RAW_EVENT_MASK_EXT, + .constraints = skx_uncore_chabox_constraints, + .ops = &icx_uncore_chabox_ops, + .format_group = &snr_uncore_chabox_format_group, +}; + +static unsigned icx_msr_offsets[] = { + 0x0, 0x20, 0x40, 0x90, 0xb0, 0xd0, +}; + +static struct event_constraint icx_uncore_iio_constraints[] = { + UNCORE_EVENT_CONSTRAINT(0x02, 0x3), + UNCORE_EVENT_CONSTRAINT(0x03, 0x3), + UNCORE_EVENT_CONSTRAINT(0x83, 0x3), + UNCORE_EVENT_CONSTRAINT(0x88, 0xc), + UNCORE_EVENT_CONSTRAINT(0xc0, 0xc), + UNCORE_EVENT_CONSTRAINT(0xc5, 0xc), + UNCORE_EVENT_CONSTRAINT(0xd5, 0xc), + EVENT_CONSTRAINT_END +}; + +static umode_t +icx_iio_mapping_visible(struct kobject *kobj, struct attribute *attr, int die) +{ + /* Root bus 0x00 is valid only for pmu_idx = 5. */ + return pmu_iio_mapping_visible(kobj, attr, die, 5); +} + +static struct attribute_group icx_iio_mapping_group = { + .is_visible = icx_iio_mapping_visible, +}; + +static const struct attribute_group *icx_iio_attr_update[] = { + &icx_iio_mapping_group, + NULL, +}; + +/* + * ICX has a static mapping of stack IDs from SAD_CONTROL_CFG notation to PMON + */ +enum { + ICX_PCIE1_PMON_ID, + ICX_PCIE2_PMON_ID, + ICX_PCIE3_PMON_ID, + ICX_PCIE4_PMON_ID, + ICX_PCIE5_PMON_ID, + ICX_CBDMA_DMI_PMON_ID +}; + +static u8 icx_sad_pmon_mapping[] = { + ICX_CBDMA_DMI_PMON_ID, + ICX_PCIE1_PMON_ID, + ICX_PCIE2_PMON_ID, + ICX_PCIE3_PMON_ID, + ICX_PCIE4_PMON_ID, + ICX_PCIE5_PMON_ID, +}; + +static int icx_iio_get_topology(struct intel_uncore_type *type) +{ + return sad_cfg_iio_topology(type, icx_sad_pmon_mapping); +} + +static int icx_iio_set_mapping(struct intel_uncore_type *type) +{ + return pmu_iio_set_mapping(type, &icx_iio_mapping_group); +} + +static void icx_iio_cleanup_mapping(struct intel_uncore_type *type) +{ + pmu_iio_cleanup_mapping(type, &icx_iio_mapping_group); +} + +static struct intel_uncore_type icx_uncore_iio = { + .name = "iio", + .num_counters = 4, + .num_boxes = 6, + .perf_ctr_bits = 48, + .event_ctl = ICX_IIO_MSR_PMON_CTL0, + .perf_ctr = ICX_IIO_MSR_PMON_CTR0, + .event_mask = SNBEP_PMON_RAW_EVENT_MASK, + .event_mask_ext = SNR_IIO_PMON_RAW_EVENT_MASK_EXT, + .box_ctl = ICX_IIO_MSR_PMON_BOX_CTL, + .msr_offsets = icx_msr_offsets, + .constraints = icx_uncore_iio_constraints, + .ops = &skx_uncore_iio_ops, + .format_group = &snr_uncore_iio_format_group, + .attr_update = icx_iio_attr_update, + .get_topology = icx_iio_get_topology, + .set_mapping = icx_iio_set_mapping, + .cleanup_mapping = icx_iio_cleanup_mapping, +}; + +static struct intel_uncore_type icx_uncore_irp = { + .name = "irp", + .num_counters = 2, + .num_boxes = 6, + .perf_ctr_bits = 48, + .event_ctl = ICX_IRP0_MSR_PMON_CTL0, + .perf_ctr = ICX_IRP0_MSR_PMON_CTR0, + .event_mask = SNBEP_PMON_RAW_EVENT_MASK, + .box_ctl = ICX_IRP0_MSR_PMON_BOX_CTL, + .msr_offsets = icx_msr_offsets, + .ops = &ivbep_uncore_msr_ops, + .format_group = &ivbep_uncore_format_group, +}; + +static struct event_constraint icx_uncore_m2pcie_constraints[] = { + UNCORE_EVENT_CONSTRAINT(0x14, 0x3), + UNCORE_EVENT_CONSTRAINT(0x23, 0x3), + UNCORE_EVENT_CONSTRAINT(0x2d, 0x3), + EVENT_CONSTRAINT_END +}; + +static struct intel_uncore_type icx_uncore_m2pcie = { + .name = "m2pcie", + .num_counters = 4, + .num_boxes = 6, + .perf_ctr_bits = 48, + .event_ctl = ICX_M2PCIE_MSR_PMON_CTL0, + .perf_ctr = ICX_M2PCIE_MSR_PMON_CTR0, + .box_ctl = ICX_M2PCIE_MSR_PMON_BOX_CTL, + .msr_offsets = icx_msr_offsets, + .constraints = icx_uncore_m2pcie_constraints, + .event_mask = SNBEP_PMON_RAW_EVENT_MASK, + .ops = &ivbep_uncore_msr_ops, + .format_group = &ivbep_uncore_format_group, +}; + +enum perf_uncore_icx_iio_freerunning_type_id { + ICX_IIO_MSR_IOCLK, + ICX_IIO_MSR_BW_IN, + + ICX_IIO_FREERUNNING_TYPE_MAX, +}; + +static unsigned icx_iio_clk_freerunning_box_offsets[] = { + 0x0, 0x20, 0x40, 0x90, 0xb0, 0xd0, +}; + +static unsigned icx_iio_bw_freerunning_box_offsets[] = { + 0x0, 0x10, 0x20, 0x90, 0xa0, 0xb0, +}; + +static struct freerunning_counters icx_iio_freerunning[] = { + [ICX_IIO_MSR_IOCLK] = { 0xa55, 0x1, 0x20, 1, 48, icx_iio_clk_freerunning_box_offsets }, + [ICX_IIO_MSR_BW_IN] = { 0xaa0, 0x1, 0x10, 8, 48, icx_iio_bw_freerunning_box_offsets }, +}; + +static struct uncore_event_desc icx_uncore_iio_freerunning_events[] = { + /* Free-Running IIO CLOCKS Counter */ + INTEL_UNCORE_EVENT_DESC(ioclk, "event=0xff,umask=0x10"), + /* Free-Running IIO BANDWIDTH IN Counters */ + INTEL_UNCORE_EVENT_DESC(bw_in_port0, "event=0xff,umask=0x20"), + INTEL_UNCORE_EVENT_DESC(bw_in_port0.scale, "3.814697266e-6"), + INTEL_UNCORE_EVENT_DESC(bw_in_port0.unit, "MiB"), + INTEL_UNCORE_EVENT_DESC(bw_in_port1, "event=0xff,umask=0x21"), + INTEL_UNCORE_EVENT_DESC(bw_in_port1.scale, "3.814697266e-6"), + INTEL_UNCORE_EVENT_DESC(bw_in_port1.unit, "MiB"), + INTEL_UNCORE_EVENT_DESC(bw_in_port2, "event=0xff,umask=0x22"), + INTEL_UNCORE_EVENT_DESC(bw_in_port2.scale, "3.814697266e-6"), + INTEL_UNCORE_EVENT_DESC(bw_in_port2.unit, "MiB"), + INTEL_UNCORE_EVENT_DESC(bw_in_port3, "event=0xff,umask=0x23"), + INTEL_UNCORE_EVENT_DESC(bw_in_port3.scale, "3.814697266e-6"), + INTEL_UNCORE_EVENT_DESC(bw_in_port3.unit, "MiB"), + INTEL_UNCORE_EVENT_DESC(bw_in_port4, "event=0xff,umask=0x24"), + INTEL_UNCORE_EVENT_DESC(bw_in_port4.scale, "3.814697266e-6"), + INTEL_UNCORE_EVENT_DESC(bw_in_port4.unit, "MiB"), + INTEL_UNCORE_EVENT_DESC(bw_in_port5, "event=0xff,umask=0x25"), + INTEL_UNCORE_EVENT_DESC(bw_in_port5.scale, "3.814697266e-6"), + INTEL_UNCORE_EVENT_DESC(bw_in_port5.unit, "MiB"), + INTEL_UNCORE_EVENT_DESC(bw_in_port6, "event=0xff,umask=0x26"), + INTEL_UNCORE_EVENT_DESC(bw_in_port6.scale, "3.814697266e-6"), + INTEL_UNCORE_EVENT_DESC(bw_in_port6.unit, "MiB"), + INTEL_UNCORE_EVENT_DESC(bw_in_port7, "event=0xff,umask=0x27"), + INTEL_UNCORE_EVENT_DESC(bw_in_port7.scale, "3.814697266e-6"), + INTEL_UNCORE_EVENT_DESC(bw_in_port7.unit, "MiB"), + { /* end: all zeroes */ }, +}; + +static struct intel_uncore_type icx_uncore_iio_free_running = { + .name = "iio_free_running", + .num_counters = 9, + .num_boxes = 6, + .num_freerunning_types = ICX_IIO_FREERUNNING_TYPE_MAX, + .freerunning = icx_iio_freerunning, + .ops = &skx_uncore_iio_freerunning_ops, + .event_descs = icx_uncore_iio_freerunning_events, + .format_group = &skx_uncore_iio_freerunning_format_group, +}; + +static struct intel_uncore_type *icx_msr_uncores[] = { + &skx_uncore_ubox, + &icx_uncore_chabox, + &icx_uncore_iio, + &icx_uncore_irp, + &icx_uncore_m2pcie, + &skx_uncore_pcu, + &icx_uncore_iio_free_running, + NULL, +}; + +/* + * To determine the number of CHAs, it should read CAPID6(Low) and CAPID7 (High) + * registers which located at Device 30, Function 3 + */ +#define ICX_CAPID6 0x9c +#define ICX_CAPID7 0xa0 + +static u64 icx_count_chabox(void) +{ + struct pci_dev *dev = NULL; + u64 caps = 0; + + dev = pci_get_device(PCI_VENDOR_ID_INTEL, 0x345b, dev); + if (!dev) + goto out; + + pci_read_config_dword(dev, ICX_CAPID6, (u32 *)&caps); + pci_read_config_dword(dev, ICX_CAPID7, (u32 *)&caps + 1); +out: + pci_dev_put(dev); + return hweight64(caps); +} + +void icx_uncore_cpu_init(void) +{ + u64 num_boxes = icx_count_chabox(); + + if (WARN_ON(num_boxes > ARRAY_SIZE(icx_cha_msr_offsets))) + return; + icx_uncore_chabox.num_boxes = num_boxes; + uncore_msr_uncores = icx_msr_uncores; +} + +static struct intel_uncore_type icx_uncore_m2m = { + .name = "m2m", + .num_counters = 4, + .num_boxes = 4, + .perf_ctr_bits = 48, + .perf_ctr = SNR_M2M_PCI_PMON_CTR0, + .event_ctl = SNR_M2M_PCI_PMON_CTL0, + .event_mask = SNBEP_PMON_RAW_EVENT_MASK, + .event_mask_ext = SNR_M2M_PCI_PMON_UMASK_EXT, + .box_ctl = SNR_M2M_PCI_PMON_BOX_CTL, + .ops = &snr_m2m_uncore_pci_ops, + .format_group = &snr_m2m_uncore_format_group, +}; + +static struct attribute *icx_upi_uncore_formats_attr[] = { + &format_attr_event.attr, + &format_attr_umask_ext4.attr, + &format_attr_edge.attr, + &format_attr_inv.attr, + &format_attr_thresh8.attr, + NULL, +}; + +static const struct attribute_group icx_upi_uncore_format_group = { + .name = "format", + .attrs = icx_upi_uncore_formats_attr, +}; + +static struct intel_uncore_type icx_uncore_upi = { + .name = "upi", + .num_counters = 4, + .num_boxes = 3, + .perf_ctr_bits = 48, + .perf_ctr = ICX_UPI_PCI_PMON_CTR0, + .event_ctl = ICX_UPI_PCI_PMON_CTL0, + .event_mask = SNBEP_PMON_RAW_EVENT_MASK, + .event_mask_ext = ICX_UPI_CTL_UMASK_EXT, + .box_ctl = ICX_UPI_PCI_PMON_BOX_CTL, + .ops = &skx_upi_uncore_pci_ops, + .format_group = &icx_upi_uncore_format_group, +}; + +static struct event_constraint icx_uncore_m3upi_constraints[] = { + UNCORE_EVENT_CONSTRAINT(0x1c, 0x1), + UNCORE_EVENT_CONSTRAINT(0x1d, 0x1), + UNCORE_EVENT_CONSTRAINT(0x1e, 0x1), + UNCORE_EVENT_CONSTRAINT(0x1f, 0x1), + UNCORE_EVENT_CONSTRAINT(0x40, 0x7), + UNCORE_EVENT_CONSTRAINT(0x4e, 0x7), + UNCORE_EVENT_CONSTRAINT(0x4f, 0x7), + UNCORE_EVENT_CONSTRAINT(0x50, 0x7), + EVENT_CONSTRAINT_END +}; + +static struct intel_uncore_type icx_uncore_m3upi = { + .name = "m3upi", + .num_counters = 4, + .num_boxes = 3, + .perf_ctr_bits = 48, + .perf_ctr = ICX_M3UPI_PCI_PMON_CTR0, + .event_ctl = ICX_M3UPI_PCI_PMON_CTL0, + .event_mask = SNBEP_PMON_RAW_EVENT_MASK, + .box_ctl = ICX_M3UPI_PCI_PMON_BOX_CTL, + .constraints = icx_uncore_m3upi_constraints, + .ops = &ivbep_uncore_pci_ops, + .format_group = &skx_uncore_format_group, +}; + +enum { + ICX_PCI_UNCORE_M2M, + ICX_PCI_UNCORE_UPI, + ICX_PCI_UNCORE_M3UPI, +}; + +static struct intel_uncore_type *icx_pci_uncores[] = { + [ICX_PCI_UNCORE_M2M] = &icx_uncore_m2m, + [ICX_PCI_UNCORE_UPI] = &icx_uncore_upi, + [ICX_PCI_UNCORE_M3UPI] = &icx_uncore_m3upi, + NULL, +}; + +static const struct pci_device_id icx_uncore_pci_ids[] = { + { /* M2M 0 */ + PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x344a), + .driver_data = UNCORE_PCI_DEV_FULL_DATA(12, 0, ICX_PCI_UNCORE_M2M, 0), + }, + { /* M2M 1 */ + PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x344a), + .driver_data = UNCORE_PCI_DEV_FULL_DATA(13, 0, ICX_PCI_UNCORE_M2M, 1), + }, + { /* M2M 2 */ + PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x344a), + .driver_data = UNCORE_PCI_DEV_FULL_DATA(14, 0, ICX_PCI_UNCORE_M2M, 2), + }, + { /* M2M 3 */ + PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x344a), + .driver_data = UNCORE_PCI_DEV_FULL_DATA(15, 0, ICX_PCI_UNCORE_M2M, 3), + }, + { /* UPI Link 0 */ + PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x3441), + .driver_data = UNCORE_PCI_DEV_FULL_DATA(2, 1, ICX_PCI_UNCORE_UPI, 0), + }, + { /* UPI Link 1 */ + PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x3441), + .driver_data = UNCORE_PCI_DEV_FULL_DATA(3, 1, ICX_PCI_UNCORE_UPI, 1), + }, + { /* UPI Link 2 */ + PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x3441), + .driver_data = UNCORE_PCI_DEV_FULL_DATA(4, 1, ICX_PCI_UNCORE_UPI, 2), + }, + { /* M3UPI Link 0 */ + PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x3446), + .driver_data = UNCORE_PCI_DEV_FULL_DATA(5, 1, ICX_PCI_UNCORE_M3UPI, 0), + }, + { /* M3UPI Link 1 */ + PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x3446), + .driver_data = UNCORE_PCI_DEV_FULL_DATA(6, 1, ICX_PCI_UNCORE_M3UPI, 1), + }, + { /* M3UPI Link 2 */ + PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x3446), + .driver_data = UNCORE_PCI_DEV_FULL_DATA(7, 1, ICX_PCI_UNCORE_M3UPI, 2), + }, + { /* end: all zeroes */ } +}; + +static struct pci_driver icx_uncore_pci_driver = { + .name = "icx_uncore", + .id_table = icx_uncore_pci_ids, +}; + +int icx_uncore_pci_init(void) +{ + /* ICX UBOX DID */ + int ret = snbep_pci2phy_map_init(0x3450, SKX_CPUNODEID, + SKX_GIDNIDMAP, true); + + if (ret) + return ret; + + uncore_pci_uncores = icx_pci_uncores; + uncore_pci_driver = &icx_uncore_pci_driver; + return 0; +} + +static void icx_uncore_imc_init_box(struct intel_uncore_box *box) +{ + unsigned int box_ctl = box->pmu->type->box_ctl + + box->pmu->type->mmio_offset * (box->pmu->pmu_idx % ICX_NUMBER_IMC_CHN); + int mem_offset = (box->pmu->pmu_idx / ICX_NUMBER_IMC_CHN) * ICX_IMC_MEM_STRIDE + + SNR_IMC_MMIO_MEM0_OFFSET; + + __snr_uncore_mmio_init_box(box, box_ctl, mem_offset, + SNR_MC_DEVICE_ID); +} + +static struct intel_uncore_ops icx_uncore_mmio_ops = { + .init_box = icx_uncore_imc_init_box, + .exit_box = uncore_mmio_exit_box, + .disable_box = snr_uncore_mmio_disable_box, + .enable_box = snr_uncore_mmio_enable_box, + .disable_event = snr_uncore_mmio_disable_event, + .enable_event = snr_uncore_mmio_enable_event, + .read_counter = uncore_mmio_read_counter, +}; + +static struct intel_uncore_type icx_uncore_imc = { + .name = "imc", + .num_counters = 4, + .num_boxes = 12, + .perf_ctr_bits = 48, + .fixed_ctr_bits = 48, + .fixed_ctr = SNR_IMC_MMIO_PMON_FIXED_CTR, + .fixed_ctl = SNR_IMC_MMIO_PMON_FIXED_CTL, + .event_descs = snr_uncore_imc_events, + .perf_ctr = SNR_IMC_MMIO_PMON_CTR0, + .event_ctl = SNR_IMC_MMIO_PMON_CTL0, + .event_mask = SNBEP_PMON_RAW_EVENT_MASK, + .box_ctl = SNR_IMC_MMIO_PMON_BOX_CTL, + .mmio_offset = SNR_IMC_MMIO_OFFSET, + .mmio_map_size = SNR_IMC_MMIO_SIZE, + .ops = &icx_uncore_mmio_ops, + .format_group = &skx_uncore_format_group, +}; + +enum perf_uncore_icx_imc_freerunning_type_id { + ICX_IMC_DCLK, + ICX_IMC_DDR, + ICX_IMC_DDRT, + + ICX_IMC_FREERUNNING_TYPE_MAX, +}; + +static struct freerunning_counters icx_imc_freerunning[] = { + [ICX_IMC_DCLK] = { 0x22b0, 0x0, 0, 1, 48 }, + [ICX_IMC_DDR] = { 0x2290, 0x8, 0, 2, 48 }, + [ICX_IMC_DDRT] = { 0x22a0, 0x8, 0, 2, 48 }, +}; + +static struct uncore_event_desc icx_uncore_imc_freerunning_events[] = { + INTEL_UNCORE_EVENT_DESC(dclk, "event=0xff,umask=0x10"), + + INTEL_UNCORE_EVENT_DESC(read, "event=0xff,umask=0x20"), + INTEL_UNCORE_EVENT_DESC(read.scale, "6.103515625e-5"), + INTEL_UNCORE_EVENT_DESC(read.unit, "MiB"), + INTEL_UNCORE_EVENT_DESC(write, "event=0xff,umask=0x21"), + INTEL_UNCORE_EVENT_DESC(write.scale, "6.103515625e-5"), + INTEL_UNCORE_EVENT_DESC(write.unit, "MiB"), + + INTEL_UNCORE_EVENT_DESC(ddrt_read, "event=0xff,umask=0x30"), + INTEL_UNCORE_EVENT_DESC(ddrt_read.scale, "6.103515625e-5"), + INTEL_UNCORE_EVENT_DESC(ddrt_read.unit, "MiB"), + INTEL_UNCORE_EVENT_DESC(ddrt_write, "event=0xff,umask=0x31"), + INTEL_UNCORE_EVENT_DESC(ddrt_write.scale, "6.103515625e-5"), + INTEL_UNCORE_EVENT_DESC(ddrt_write.unit, "MiB"), + { /* end: all zeroes */ }, +}; + +static void icx_uncore_imc_freerunning_init_box(struct intel_uncore_box *box) +{ + int mem_offset = box->pmu->pmu_idx * ICX_IMC_MEM_STRIDE + + SNR_IMC_MMIO_MEM0_OFFSET; + + snr_uncore_mmio_map(box, uncore_mmio_box_ctl(box), + mem_offset, SNR_MC_DEVICE_ID); +} + +static struct intel_uncore_ops icx_uncore_imc_freerunning_ops = { + .init_box = icx_uncore_imc_freerunning_init_box, + .exit_box = uncore_mmio_exit_box, + .read_counter = uncore_mmio_read_counter, + .hw_config = uncore_freerunning_hw_config, +}; + +static struct intel_uncore_type icx_uncore_imc_free_running = { + .name = "imc_free_running", + .num_counters = 5, + .num_boxes = 4, + .num_freerunning_types = ICX_IMC_FREERUNNING_TYPE_MAX, + .mmio_map_size = SNR_IMC_MMIO_SIZE, + .freerunning = icx_imc_freerunning, + .ops = &icx_uncore_imc_freerunning_ops, + .event_descs = icx_uncore_imc_freerunning_events, + .format_group = &skx_uncore_iio_freerunning_format_group, +}; + +static struct intel_uncore_type *icx_mmio_uncores[] = { + &icx_uncore_imc, + &icx_uncore_imc_free_running, + NULL, +}; + +void icx_uncore_mmio_init(void) +{ + uncore_mmio_uncores = icx_mmio_uncores; +} + +/* end of ICX uncore support */ + +/* SPR uncore support */ + +static void spr_uncore_msr_enable_event(struct intel_uncore_box *box, + struct perf_event *event) +{ + struct hw_perf_event *hwc = &event->hw; + struct hw_perf_event_extra *reg1 = &hwc->extra_reg; + + if (reg1->idx != EXTRA_REG_NONE) + wrmsrl(reg1->reg, reg1->config); + + wrmsrl(hwc->config_base, hwc->config); +} + +static void spr_uncore_msr_disable_event(struct intel_uncore_box *box, + struct perf_event *event) +{ + struct hw_perf_event *hwc = &event->hw; + struct hw_perf_event_extra *reg1 = &hwc->extra_reg; + + if (reg1->idx != EXTRA_REG_NONE) + wrmsrl(reg1->reg, 0); + + wrmsrl(hwc->config_base, 0); +} + +static int spr_cha_hw_config(struct intel_uncore_box *box, struct perf_event *event) +{ + struct hw_perf_event_extra *reg1 = &event->hw.extra_reg; + bool tie_en = !!(event->hw.config & SPR_CHA_PMON_CTL_TID_EN); + struct intel_uncore_type *type = box->pmu->type; + + if (tie_en) { + reg1->reg = SPR_C0_MSR_PMON_BOX_FILTER0 + + HSWEP_CBO_MSR_OFFSET * type->box_ids[box->pmu->pmu_idx]; + reg1->config = event->attr.config1 & SPR_CHA_PMON_BOX_FILTER_TID; + reg1->idx = 0; + } + + return 0; +} + +static struct intel_uncore_ops spr_uncore_chabox_ops = { + .init_box = intel_generic_uncore_msr_init_box, + .disable_box = intel_generic_uncore_msr_disable_box, + .enable_box = intel_generic_uncore_msr_enable_box, + .disable_event = spr_uncore_msr_disable_event, + .enable_event = spr_uncore_msr_enable_event, + .read_counter = uncore_msr_read_counter, + .hw_config = spr_cha_hw_config, + .get_constraint = uncore_get_constraint, + .put_constraint = uncore_put_constraint, +}; + +static struct attribute *spr_uncore_cha_formats_attr[] = { + &format_attr_event.attr, + &format_attr_umask_ext4.attr, + &format_attr_tid_en2.attr, + &format_attr_edge.attr, + &format_attr_inv.attr, + &format_attr_thresh8.attr, + &format_attr_filter_tid5.attr, + NULL, +}; +static const struct attribute_group spr_uncore_chabox_format_group = { + .name = "format", + .attrs = spr_uncore_cha_formats_attr, +}; + +static ssize_t alias_show(struct device *dev, + struct device_attribute *attr, + char *buf) +{ + struct intel_uncore_pmu *pmu = dev_to_uncore_pmu(dev); + char pmu_name[UNCORE_PMU_NAME_LEN]; + + uncore_get_alias_name(pmu_name, pmu); + return sysfs_emit(buf, "%s\n", pmu_name); +} + +static DEVICE_ATTR_RO(alias); + +static struct attribute *uncore_alias_attrs[] = { + &dev_attr_alias.attr, + NULL +}; + +ATTRIBUTE_GROUPS(uncore_alias); + +static struct intel_uncore_type spr_uncore_chabox = { + .name = "cha", + .event_mask = SPR_CHA_PMON_EVENT_MASK, + .event_mask_ext = SPR_RAW_EVENT_MASK_EXT, + .num_shared_regs = 1, + .constraints = skx_uncore_chabox_constraints, + .ops = &spr_uncore_chabox_ops, + .format_group = &spr_uncore_chabox_format_group, + .attr_update = uncore_alias_groups, +}; + +static struct intel_uncore_type spr_uncore_iio = { + .name = "iio", + .event_mask = SNBEP_PMON_RAW_EVENT_MASK, + .event_mask_ext = SNR_IIO_PMON_RAW_EVENT_MASK_EXT, + .format_group = &snr_uncore_iio_format_group, + .attr_update = uncore_alias_groups, + .constraints = icx_uncore_iio_constraints, +}; + +static struct attribute *spr_uncore_raw_formats_attr[] = { + &format_attr_event.attr, + &format_attr_umask_ext4.attr, + &format_attr_edge.attr, + &format_attr_inv.attr, + &format_attr_thresh8.attr, + NULL, +}; + +static const struct attribute_group spr_uncore_raw_format_group = { + .name = "format", + .attrs = spr_uncore_raw_formats_attr, +}; + +#define SPR_UNCORE_COMMON_FORMAT() \ + .event_mask = SNBEP_PMON_RAW_EVENT_MASK, \ + .event_mask_ext = SPR_RAW_EVENT_MASK_EXT, \ + .format_group = &spr_uncore_raw_format_group, \ + .attr_update = uncore_alias_groups + +static struct intel_uncore_type spr_uncore_irp = { + SPR_UNCORE_COMMON_FORMAT(), + .name = "irp", + +}; + +static struct event_constraint spr_uncore_m2pcie_constraints[] = { + UNCORE_EVENT_CONSTRAINT(0x14, 0x3), + UNCORE_EVENT_CONSTRAINT(0x2d, 0x3), + EVENT_CONSTRAINT_END +}; + +static struct intel_uncore_type spr_uncore_m2pcie = { + SPR_UNCORE_COMMON_FORMAT(), + .name = "m2pcie", + .constraints = spr_uncore_m2pcie_constraints, +}; + +static struct intel_uncore_type spr_uncore_pcu = { + .name = "pcu", + .attr_update = uncore_alias_groups, +}; + +static void spr_uncore_mmio_enable_event(struct intel_uncore_box *box, + struct perf_event *event) +{ + struct hw_perf_event *hwc = &event->hw; + + if (!box->io_addr) + return; + + if (uncore_pmc_fixed(hwc->idx)) + writel(SNBEP_PMON_CTL_EN, box->io_addr + hwc->config_base); + else + writel(hwc->config, box->io_addr + hwc->config_base); +} + +static struct intel_uncore_ops spr_uncore_mmio_ops = { + .init_box = intel_generic_uncore_mmio_init_box, + .exit_box = uncore_mmio_exit_box, + .disable_box = intel_generic_uncore_mmio_disable_box, + .enable_box = intel_generic_uncore_mmio_enable_box, + .disable_event = intel_generic_uncore_mmio_disable_event, + .enable_event = spr_uncore_mmio_enable_event, + .read_counter = uncore_mmio_read_counter, +}; + +static struct intel_uncore_type spr_uncore_imc = { + SPR_UNCORE_COMMON_FORMAT(), + .name = "imc", + .fixed_ctr_bits = 48, + .fixed_ctr = SNR_IMC_MMIO_PMON_FIXED_CTR, + .fixed_ctl = SNR_IMC_MMIO_PMON_FIXED_CTL, + .ops = &spr_uncore_mmio_ops, +}; + +static void spr_uncore_pci_enable_event(struct intel_uncore_box *box, + struct perf_event *event) +{ + struct pci_dev *pdev = box->pci_dev; + struct hw_perf_event *hwc = &event->hw; + + pci_write_config_dword(pdev, hwc->config_base + 4, (u32)(hwc->config >> 32)); + pci_write_config_dword(pdev, hwc->config_base, (u32)hwc->config); +} + +static struct intel_uncore_ops spr_uncore_pci_ops = { + .init_box = intel_generic_uncore_pci_init_box, + .disable_box = intel_generic_uncore_pci_disable_box, + .enable_box = intel_generic_uncore_pci_enable_box, + .disable_event = intel_generic_uncore_pci_disable_event, + .enable_event = spr_uncore_pci_enable_event, + .read_counter = intel_generic_uncore_pci_read_counter, +}; + +#define SPR_UNCORE_PCI_COMMON_FORMAT() \ + SPR_UNCORE_COMMON_FORMAT(), \ + .ops = &spr_uncore_pci_ops + +static struct intel_uncore_type spr_uncore_m2m = { + SPR_UNCORE_PCI_COMMON_FORMAT(), + .name = "m2m", +}; + +static struct intel_uncore_type spr_uncore_upi = { + SPR_UNCORE_PCI_COMMON_FORMAT(), + .name = "upi", +}; + +static struct intel_uncore_type spr_uncore_m3upi = { + SPR_UNCORE_PCI_COMMON_FORMAT(), + .name = "m3upi", + .constraints = icx_uncore_m3upi_constraints, +}; + +static struct intel_uncore_type spr_uncore_mdf = { + SPR_UNCORE_COMMON_FORMAT(), + .name = "mdf", +}; + +#define UNCORE_SPR_NUM_UNCORE_TYPES 12 +#define UNCORE_SPR_IIO 1 +#define UNCORE_SPR_IMC 6 + +static struct intel_uncore_type *spr_uncores[UNCORE_SPR_NUM_UNCORE_TYPES] = { + &spr_uncore_chabox, + &spr_uncore_iio, + &spr_uncore_irp, + &spr_uncore_m2pcie, + &spr_uncore_pcu, + NULL, + &spr_uncore_imc, + &spr_uncore_m2m, + &spr_uncore_upi, + &spr_uncore_m3upi, + NULL, + &spr_uncore_mdf, +}; + +enum perf_uncore_spr_iio_freerunning_type_id { + SPR_IIO_MSR_IOCLK, + SPR_IIO_MSR_BW_IN, + SPR_IIO_MSR_BW_OUT, + + SPR_IIO_FREERUNNING_TYPE_MAX, +}; + +static struct freerunning_counters spr_iio_freerunning[] = { + [SPR_IIO_MSR_IOCLK] = { 0x340e, 0x1, 0x10, 1, 48 }, + [SPR_IIO_MSR_BW_IN] = { 0x3800, 0x1, 0x10, 8, 48 }, + [SPR_IIO_MSR_BW_OUT] = { 0x3808, 0x1, 0x10, 8, 48 }, +}; + +static struct uncore_event_desc spr_uncore_iio_freerunning_events[] = { + /* Free-Running IIO CLOCKS Counter */ + INTEL_UNCORE_EVENT_DESC(ioclk, "event=0xff,umask=0x10"), + /* Free-Running IIO BANDWIDTH IN Counters */ + INTEL_UNCORE_EVENT_DESC(bw_in_port0, "event=0xff,umask=0x20"), + INTEL_UNCORE_EVENT_DESC(bw_in_port0.scale, "3.814697266e-6"), + INTEL_UNCORE_EVENT_DESC(bw_in_port0.unit, "MiB"), + INTEL_UNCORE_EVENT_DESC(bw_in_port1, "event=0xff,umask=0x21"), + INTEL_UNCORE_EVENT_DESC(bw_in_port1.scale, "3.814697266e-6"), + INTEL_UNCORE_EVENT_DESC(bw_in_port1.unit, "MiB"), + INTEL_UNCORE_EVENT_DESC(bw_in_port2, "event=0xff,umask=0x22"), + INTEL_UNCORE_EVENT_DESC(bw_in_port2.scale, "3.814697266e-6"), + INTEL_UNCORE_EVENT_DESC(bw_in_port2.unit, "MiB"), + INTEL_UNCORE_EVENT_DESC(bw_in_port3, "event=0xff,umask=0x23"), + INTEL_UNCORE_EVENT_DESC(bw_in_port3.scale, "3.814697266e-6"), + INTEL_UNCORE_EVENT_DESC(bw_in_port3.unit, "MiB"), + INTEL_UNCORE_EVENT_DESC(bw_in_port4, "event=0xff,umask=0x24"), + INTEL_UNCORE_EVENT_DESC(bw_in_port4.scale, "3.814697266e-6"), + INTEL_UNCORE_EVENT_DESC(bw_in_port4.unit, "MiB"), + INTEL_UNCORE_EVENT_DESC(bw_in_port5, "event=0xff,umask=0x25"), + INTEL_UNCORE_EVENT_DESC(bw_in_port5.scale, "3.814697266e-6"), + INTEL_UNCORE_EVENT_DESC(bw_in_port5.unit, "MiB"), + INTEL_UNCORE_EVENT_DESC(bw_in_port6, "event=0xff,umask=0x26"), + INTEL_UNCORE_EVENT_DESC(bw_in_port6.scale, "3.814697266e-6"), + INTEL_UNCORE_EVENT_DESC(bw_in_port6.unit, "MiB"), + INTEL_UNCORE_EVENT_DESC(bw_in_port7, "event=0xff,umask=0x27"), + INTEL_UNCORE_EVENT_DESC(bw_in_port7.scale, "3.814697266e-6"), + INTEL_UNCORE_EVENT_DESC(bw_in_port7.unit, "MiB"), + /* Free-Running IIO BANDWIDTH OUT Counters */ + INTEL_UNCORE_EVENT_DESC(bw_out_port0, "event=0xff,umask=0x30"), + INTEL_UNCORE_EVENT_DESC(bw_out_port0.scale, "3.814697266e-6"), + INTEL_UNCORE_EVENT_DESC(bw_out_port0.unit, "MiB"), + INTEL_UNCORE_EVENT_DESC(bw_out_port1, "event=0xff,umask=0x31"), + INTEL_UNCORE_EVENT_DESC(bw_out_port1.scale, "3.814697266e-6"), + INTEL_UNCORE_EVENT_DESC(bw_out_port1.unit, "MiB"), + INTEL_UNCORE_EVENT_DESC(bw_out_port2, "event=0xff,umask=0x32"), + INTEL_UNCORE_EVENT_DESC(bw_out_port2.scale, "3.814697266e-6"), + INTEL_UNCORE_EVENT_DESC(bw_out_port2.unit, "MiB"), + INTEL_UNCORE_EVENT_DESC(bw_out_port3, "event=0xff,umask=0x33"), + INTEL_UNCORE_EVENT_DESC(bw_out_port3.scale, "3.814697266e-6"), + INTEL_UNCORE_EVENT_DESC(bw_out_port3.unit, "MiB"), + INTEL_UNCORE_EVENT_DESC(bw_out_port4, "event=0xff,umask=0x34"), + INTEL_UNCORE_EVENT_DESC(bw_out_port4.scale, "3.814697266e-6"), + INTEL_UNCORE_EVENT_DESC(bw_out_port4.unit, "MiB"), + INTEL_UNCORE_EVENT_DESC(bw_out_port5, "event=0xff,umask=0x35"), + INTEL_UNCORE_EVENT_DESC(bw_out_port5.scale, "3.814697266e-6"), + INTEL_UNCORE_EVENT_DESC(bw_out_port5.unit, "MiB"), + INTEL_UNCORE_EVENT_DESC(bw_out_port6, "event=0xff,umask=0x36"), + INTEL_UNCORE_EVENT_DESC(bw_out_port6.scale, "3.814697266e-6"), + INTEL_UNCORE_EVENT_DESC(bw_out_port6.unit, "MiB"), + INTEL_UNCORE_EVENT_DESC(bw_out_port7, "event=0xff,umask=0x37"), + INTEL_UNCORE_EVENT_DESC(bw_out_port7.scale, "3.814697266e-6"), + INTEL_UNCORE_EVENT_DESC(bw_out_port7.unit, "MiB"), + { /* end: all zeroes */ }, +}; + +static struct intel_uncore_type spr_uncore_iio_free_running = { + .name = "iio_free_running", + .num_counters = 17, + .num_freerunning_types = SPR_IIO_FREERUNNING_TYPE_MAX, + .freerunning = spr_iio_freerunning, + .ops = &skx_uncore_iio_freerunning_ops, + .event_descs = spr_uncore_iio_freerunning_events, + .format_group = &skx_uncore_iio_freerunning_format_group, +}; + +enum perf_uncore_spr_imc_freerunning_type_id { + SPR_IMC_DCLK, + SPR_IMC_PQ_CYCLES, + + SPR_IMC_FREERUNNING_TYPE_MAX, +}; + +static struct freerunning_counters spr_imc_freerunning[] = { + [SPR_IMC_DCLK] = { 0x22b0, 0x0, 0, 1, 48 }, + [SPR_IMC_PQ_CYCLES] = { 0x2318, 0x8, 0, 2, 48 }, +}; + +static struct uncore_event_desc spr_uncore_imc_freerunning_events[] = { + INTEL_UNCORE_EVENT_DESC(dclk, "event=0xff,umask=0x10"), + + INTEL_UNCORE_EVENT_DESC(rpq_cycles, "event=0xff,umask=0x20"), + INTEL_UNCORE_EVENT_DESC(wpq_cycles, "event=0xff,umask=0x21"), + { /* end: all zeroes */ }, +}; + +#define SPR_MC_DEVICE_ID 0x3251 + +static void spr_uncore_imc_freerunning_init_box(struct intel_uncore_box *box) +{ + int mem_offset = box->pmu->pmu_idx * ICX_IMC_MEM_STRIDE + SNR_IMC_MMIO_MEM0_OFFSET; + + snr_uncore_mmio_map(box, uncore_mmio_box_ctl(box), + mem_offset, SPR_MC_DEVICE_ID); +} + +static struct intel_uncore_ops spr_uncore_imc_freerunning_ops = { + .init_box = spr_uncore_imc_freerunning_init_box, + .exit_box = uncore_mmio_exit_box, + .read_counter = uncore_mmio_read_counter, + .hw_config = uncore_freerunning_hw_config, +}; + +static struct intel_uncore_type spr_uncore_imc_free_running = { + .name = "imc_free_running", + .num_counters = 3, + .mmio_map_size = SNR_IMC_MMIO_SIZE, + .num_freerunning_types = SPR_IMC_FREERUNNING_TYPE_MAX, + .freerunning = spr_imc_freerunning, + .ops = &spr_uncore_imc_freerunning_ops, + .event_descs = spr_uncore_imc_freerunning_events, + .format_group = &skx_uncore_iio_freerunning_format_group, +}; + +#define UNCORE_SPR_MSR_EXTRA_UNCORES 1 +#define UNCORE_SPR_MMIO_EXTRA_UNCORES 1 + +static struct intel_uncore_type *spr_msr_uncores[UNCORE_SPR_MSR_EXTRA_UNCORES] = { + &spr_uncore_iio_free_running, +}; + +static struct intel_uncore_type *spr_mmio_uncores[UNCORE_SPR_MMIO_EXTRA_UNCORES] = { + &spr_uncore_imc_free_running, +}; + +static void uncore_type_customized_copy(struct intel_uncore_type *to_type, + struct intel_uncore_type *from_type) +{ + if (!to_type || !from_type) + return; + + if (from_type->name) + to_type->name = from_type->name; + if (from_type->fixed_ctr_bits) + to_type->fixed_ctr_bits = from_type->fixed_ctr_bits; + if (from_type->event_mask) + to_type->event_mask = from_type->event_mask; + if (from_type->event_mask_ext) + to_type->event_mask_ext = from_type->event_mask_ext; + if (from_type->fixed_ctr) + to_type->fixed_ctr = from_type->fixed_ctr; + if (from_type->fixed_ctl) + to_type->fixed_ctl = from_type->fixed_ctl; + if (from_type->fixed_ctr_bits) + to_type->fixed_ctr_bits = from_type->fixed_ctr_bits; + if (from_type->num_shared_regs) + to_type->num_shared_regs = from_type->num_shared_regs; + if (from_type->constraints) + to_type->constraints = from_type->constraints; + if (from_type->ops) + to_type->ops = from_type->ops; + if (from_type->event_descs) + to_type->event_descs = from_type->event_descs; + if (from_type->format_group) + to_type->format_group = from_type->format_group; + if (from_type->attr_update) + to_type->attr_update = from_type->attr_update; +} + +static struct intel_uncore_type ** +uncore_get_uncores(enum uncore_access_type type_id, int num_extra, + struct intel_uncore_type **extra) +{ + struct intel_uncore_type **types, **start_types; + int i; + + start_types = types = intel_uncore_generic_init_uncores(type_id, num_extra); + + /* Only copy the customized features */ + for (; *types; types++) { + if ((*types)->type_id >= UNCORE_SPR_NUM_UNCORE_TYPES) + continue; + uncore_type_customized_copy(*types, spr_uncores[(*types)->type_id]); + } + + for (i = 0; i < num_extra; i++, types++) + *types = extra[i]; + + return start_types; +} + +static struct intel_uncore_type * +uncore_find_type_by_id(struct intel_uncore_type **types, int type_id) +{ + for (; *types; types++) { + if (type_id == (*types)->type_id) + return *types; + } + + return NULL; +} + +static int uncore_type_max_boxes(struct intel_uncore_type **types, + int type_id) +{ + struct intel_uncore_type *type; + int i, max = 0; + + type = uncore_find_type_by_id(types, type_id); + if (!type) + return 0; + + for (i = 0; i < type->num_boxes; i++) { + if (type->box_ids[i] > max) + max = type->box_ids[i]; + } + + return max + 1; +} + +void spr_uncore_cpu_init(void) +{ + uncore_msr_uncores = uncore_get_uncores(UNCORE_ACCESS_MSR, + UNCORE_SPR_MSR_EXTRA_UNCORES, + spr_msr_uncores); + + spr_uncore_iio_free_running.num_boxes = uncore_type_max_boxes(uncore_msr_uncores, UNCORE_SPR_IIO); +} + +int spr_uncore_pci_init(void) +{ + uncore_pci_uncores = uncore_get_uncores(UNCORE_ACCESS_PCI, 0, NULL); + return 0; +} + +void spr_uncore_mmio_init(void) +{ + int ret = snbep_pci2phy_map_init(0x3250, SKX_CPUNODEID, SKX_GIDNIDMAP, true); + + if (ret) + uncore_mmio_uncores = uncore_get_uncores(UNCORE_ACCESS_MMIO, 0, NULL); + else { + uncore_mmio_uncores = uncore_get_uncores(UNCORE_ACCESS_MMIO, + UNCORE_SPR_MMIO_EXTRA_UNCORES, + spr_mmio_uncores); + + spr_uncore_imc_free_running.num_boxes = uncore_type_max_boxes(uncore_mmio_uncores, UNCORE_SPR_IMC) / 2; + } +} + +/* end of SPR uncore support */ |