aboutsummaryrefslogtreecommitdiffstats
path: root/arch/x86/events
diff options
context:
space:
mode:
Diffstat (limited to 'arch/x86/events')
-rw-r--r--arch/x86/events/Kconfig20
-rw-r--r--arch/x86/events/Makefile2
-rw-r--r--arch/x86/events/amd/Makefile3
-rw-r--r--arch/x86/events/amd/brs.c434
-rw-r--r--arch/x86/events/amd/core.c509
-rw-r--r--arch/x86/events/amd/ibs.c547
-rw-r--r--arch/x86/events/amd/lbr.c439
-rw-r--r--arch/x86/events/amd/uncore.c146
-rw-r--r--arch/x86/events/core.c114
-rw-r--r--arch/x86/events/intel/core.c394
-rw-r--r--arch/x86/events/intel/cstate.c26
-rw-r--r--arch/x86/events/intel/ds.c230
-rw-r--r--arch/x86/events/intel/lbr.c502
-rw-r--r--arch/x86/events/intel/p4.c37
-rw-r--r--arch/x86/events/intel/pt.c76
-rw-r--r--arch/x86/events/intel/uncore.c6
-rw-r--r--arch/x86/events/intel/uncore.h3
-rw-r--r--arch/x86/events/intel/uncore_discovery.c20
-rw-r--r--arch/x86/events/intel/uncore_discovery.h4
-rw-r--r--arch/x86/events/intel/uncore_snb.c562
-rw-r--r--arch/x86/events/intel/uncore_snbep.c2
-rw-r--r--arch/x86/events/msr.c4
-rw-r--r--arch/x86/events/perf_event.h250
-rw-r--r--arch/x86/events/perf_event_flags.h22
-rw-r--r--arch/x86/events/rapl.c19
-rw-r--r--arch/x86/events/utils.c251
26 files changed, 3647 insertions, 975 deletions
diff --git a/arch/x86/events/Kconfig b/arch/x86/events/Kconfig
index d6cdfe631674..dabdf3d7bf84 100644
--- a/arch/x86/events/Kconfig
+++ b/arch/x86/events/Kconfig
@@ -6,24 +6,24 @@ config PERF_EVENTS_INTEL_UNCORE
depends on PERF_EVENTS && CPU_SUP_INTEL && PCI
default y
help
- Include support for Intel uncore performance events. These are
- available on NehalemEX and more modern processors.
+ Include support for Intel uncore performance events. These are
+ available on NehalemEX and more modern processors.
config PERF_EVENTS_INTEL_RAPL
tristate "Intel/AMD rapl performance events"
depends on PERF_EVENTS && (CPU_SUP_INTEL || CPU_SUP_AMD) && PCI
default y
help
- Include support for Intel and AMD rapl performance events for power
- monitoring on modern processors.
+ Include support for Intel and AMD rapl performance events for power
+ monitoring on modern processors.
config PERF_EVENTS_INTEL_CSTATE
tristate "Intel cstate performance events"
depends on PERF_EVENTS && CPU_SUP_INTEL && PCI
default y
help
- Include support for Intel cstate performance events for power
- monitoring on modern processors.
+ Include support for Intel cstate performance events for power
+ monitoring on modern processors.
config PERF_EVENTS_AMD_POWER
depends on PERF_EVENTS && CPU_SUP_AMD
@@ -44,4 +44,12 @@ config PERF_EVENTS_AMD_UNCORE
To compile this driver as a module, choose M here: the
module will be called 'amd-uncore'.
+
+config PERF_EVENTS_AMD_BRS
+ depends on PERF_EVENTS && CPU_SUP_AMD
+ bool "AMD Zen3 Branch Sampling support"
+ help
+ Enable AMD Zen3 branch sampling support (BRS) which samples up to
+ 16 consecutive taken branches in registers.
+
endmenu
diff --git a/arch/x86/events/Makefile b/arch/x86/events/Makefile
index 9933c0e8e97a..86a76efa8bb6 100644
--- a/arch/x86/events/Makefile
+++ b/arch/x86/events/Makefile
@@ -1,5 +1,5 @@
# SPDX-License-Identifier: GPL-2.0-only
-obj-y += core.o probe.o
+obj-y += core.o probe.o utils.o
obj-$(CONFIG_PERF_EVENTS_INTEL_RAPL) += rapl.o
obj-y += amd/
obj-$(CONFIG_X86_LOCAL_APIC) += msr.o
diff --git a/arch/x86/events/amd/Makefile b/arch/x86/events/amd/Makefile
index 6cbe38d5fd9d..527d947eb76b 100644
--- a/arch/x86/events/amd/Makefile
+++ b/arch/x86/events/amd/Makefile
@@ -1,5 +1,6 @@
# SPDX-License-Identifier: GPL-2.0
-obj-$(CONFIG_CPU_SUP_AMD) += core.o
+obj-$(CONFIG_CPU_SUP_AMD) += core.o lbr.o
+obj-$(CONFIG_PERF_EVENTS_AMD_BRS) += brs.o
obj-$(CONFIG_PERF_EVENTS_AMD_POWER) += power.o
obj-$(CONFIG_X86_LOCAL_APIC) += ibs.o
obj-$(CONFIG_PERF_EVENTS_AMD_UNCORE) += amd-uncore.o
diff --git a/arch/x86/events/amd/brs.c b/arch/x86/events/amd/brs.c
new file mode 100644
index 000000000000..f1bff153d945
--- /dev/null
+++ b/arch/x86/events/amd/brs.c
@@ -0,0 +1,434 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Implement support for AMD Fam19h Branch Sampling feature
+ * Based on specifications published in AMD PPR Fam19 Model 01
+ *
+ * Copyright 2021 Google LLC
+ * Contributed by Stephane Eranian <eranian@google.com>
+ */
+#include <linux/kernel.h>
+#include <linux/jump_label.h>
+#include <asm/msr.h>
+#include <asm/cpufeature.h>
+
+#include "../perf_event.h"
+
+#define BRS_POISON 0xFFFFFFFFFFFFFFFEULL /* mark limit of valid entries */
+
+/* Debug Extension Configuration register layout */
+union amd_debug_extn_cfg {
+ __u64 val;
+ struct {
+ __u64 rsvd0:2, /* reserved */
+ brsmen:1, /* branch sample enable */
+ rsvd4_3:2,/* reserved - must be 0x3 */
+ vb:1, /* valid branches recorded */
+ rsvd2:10, /* reserved */
+ msroff:4, /* index of next entry to write */
+ rsvd3:4, /* reserved */
+ pmc:3, /* #PMC holding the sampling event */
+ rsvd4:37; /* reserved */
+ };
+};
+
+static inline unsigned int brs_from(int idx)
+{
+ return MSR_AMD_SAMP_BR_FROM + 2 * idx;
+}
+
+static inline unsigned int brs_to(int idx)
+{
+ return MSR_AMD_SAMP_BR_FROM + 2 * idx + 1;
+}
+
+static inline void set_debug_extn_cfg(u64 val)
+{
+ /* bits[4:3] must always be set to 11b */
+ wrmsrl(MSR_AMD_DBG_EXTN_CFG, val | 3ULL << 3);
+}
+
+static inline u64 get_debug_extn_cfg(void)
+{
+ u64 val;
+
+ rdmsrl(MSR_AMD_DBG_EXTN_CFG, val);
+ return val;
+}
+
+static bool __init amd_brs_detect(void)
+{
+ if (!cpu_feature_enabled(X86_FEATURE_BRS))
+ return false;
+
+ switch (boot_cpu_data.x86) {
+ case 0x19: /* AMD Fam19h (Zen3) */
+ x86_pmu.lbr_nr = 16;
+
+ /* No hardware filtering supported */
+ x86_pmu.lbr_sel_map = NULL;
+ x86_pmu.lbr_sel_mask = 0;
+ break;
+ default:
+ return false;
+ }
+
+ return true;
+}
+
+/*
+ * Current BRS implementation does not support branch type or privilege level
+ * filtering. Therefore, this function simply enforces these limitations. No need for
+ * a br_sel_map. Software filtering is not supported because it would not correlate well
+ * with a sampling period.
+ */
+static int amd_brs_setup_filter(struct perf_event *event)
+{
+ u64 type = event->attr.branch_sample_type;
+
+ /* No BRS support */
+ if (!x86_pmu.lbr_nr)
+ return -EOPNOTSUPP;
+
+ /* Can only capture all branches, i.e., no filtering */
+ if ((type & ~PERF_SAMPLE_BRANCH_PLM_ALL) != PERF_SAMPLE_BRANCH_ANY)
+ return -EINVAL;
+
+ return 0;
+}
+
+static inline int amd_is_brs_event(struct perf_event *e)
+{
+ return (e->hw.config & AMD64_RAW_EVENT_MASK) == AMD_FAM19H_BRS_EVENT;
+}
+
+int amd_brs_hw_config(struct perf_event *event)
+{
+ int ret = 0;
+
+ /*
+ * Due to interrupt holding, BRS is not recommended in
+ * counting mode.
+ */
+ if (!is_sampling_event(event))
+ return -EINVAL;
+
+ /*
+ * Due to the way BRS operates by holding the interrupt until
+ * lbr_nr entries have been captured, it does not make sense
+ * to allow sampling on BRS with an event that does not match
+ * what BRS is capturing, i.e., retired taken branches.
+ * Otherwise the correlation with the event's period is even
+ * more loose:
+ *
+ * With retired taken branch:
+ * Effective P = P + 16 + X
+ * With any other event:
+ * Effective P = P + Y + X
+ *
+ * Where X is the number of taken branches due to interrupt
+ * skid. Skid is large.
+ *
+ * Where Y is the occurences of the event while BRS is
+ * capturing the lbr_nr entries.
+ *
+ * By using retired taken branches, we limit the impact on the
+ * Y variable. We know it cannot be more than the depth of
+ * BRS.
+ */
+ if (!amd_is_brs_event(event))
+ return -EINVAL;
+
+ /*
+ * BRS implementation does not work with frequency mode
+ * reprogramming of the period.
+ */
+ if (event->attr.freq)
+ return -EINVAL;
+ /*
+ * The kernel subtracts BRS depth from period, so it must
+ * be big enough.
+ */
+ if (event->attr.sample_period <= x86_pmu.lbr_nr)
+ return -EINVAL;
+
+ /*
+ * Check if we can allow PERF_SAMPLE_BRANCH_STACK
+ */
+ ret = amd_brs_setup_filter(event);
+
+ /* only set in case of success */
+ if (!ret)
+ event->hw.flags |= PERF_X86_EVENT_AMD_BRS;
+
+ return ret;
+}
+
+/* tos = top of stack, i.e., last valid entry written */
+static inline int amd_brs_get_tos(union amd_debug_extn_cfg *cfg)
+{
+ /*
+ * msroff: index of next entry to write so top-of-stack is one off
+ * if BRS is full then msroff is set back to 0.
+ */
+ return (cfg->msroff ? cfg->msroff : x86_pmu.lbr_nr) - 1;
+}
+
+/*
+ * make sure we have a sane BRS offset to begin with
+ * especially with kexec
+ */
+void amd_brs_reset(void)
+{
+ if (!cpu_feature_enabled(X86_FEATURE_BRS))
+ return;
+
+ /*
+ * Reset config
+ */
+ set_debug_extn_cfg(0);
+
+ /*
+ * Mark first entry as poisoned
+ */
+ wrmsrl(brs_to(0), BRS_POISON);
+}
+
+int __init amd_brs_init(void)
+{
+ if (!amd_brs_detect())
+ return -EOPNOTSUPP;
+
+ pr_cont("%d-deep BRS, ", x86_pmu.lbr_nr);
+
+ return 0;
+}
+
+void amd_brs_enable(void)
+{
+ struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
+ union amd_debug_extn_cfg cfg;
+
+ /* Activate only on first user */
+ if (++cpuc->brs_active > 1)
+ return;
+
+ cfg.val = 0; /* reset all fields */
+ cfg.brsmen = 1; /* enable branch sampling */
+
+ /* Set enable bit */
+ set_debug_extn_cfg(cfg.val);
+}
+
+void amd_brs_enable_all(void)
+{
+ struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
+ if (cpuc->lbr_users)
+ amd_brs_enable();
+}
+
+void amd_brs_disable(void)
+{
+ struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
+ union amd_debug_extn_cfg cfg;
+
+ /* Check if active (could be disabled via x86_pmu_disable_all()) */
+ if (!cpuc->brs_active)
+ return;
+
+ /* Only disable for last user */
+ if (--cpuc->brs_active)
+ return;
+
+ /*
+ * Clear the brsmen bit but preserve the others as they contain
+ * useful state such as vb and msroff
+ */
+ cfg.val = get_debug_extn_cfg();
+
+ /*
+ * When coming in on interrupt and BRS is full, then hw will have
+ * already stopped BRS, no need to issue wrmsr again
+ */
+ if (cfg.brsmen) {
+ cfg.brsmen = 0;
+ set_debug_extn_cfg(cfg.val);
+ }
+}
+
+void amd_brs_disable_all(void)
+{
+ struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
+ if (cpuc->lbr_users)
+ amd_brs_disable();
+}
+
+static bool amd_brs_match_plm(struct perf_event *event, u64 to)
+{
+ int type = event->attr.branch_sample_type;
+ int plm_k = PERF_SAMPLE_BRANCH_KERNEL | PERF_SAMPLE_BRANCH_HV;
+ int plm_u = PERF_SAMPLE_BRANCH_USER;
+
+ if (!(type & plm_k) && kernel_ip(to))
+ return 0;
+
+ if (!(type & plm_u) && !kernel_ip(to))
+ return 0;
+
+ return 1;
+}
+
+/*
+ * Caller must ensure amd_brs_inuse() is true before calling
+ * return:
+ */
+void amd_brs_drain(void)
+{
+ struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
+ struct perf_event *event = cpuc->events[0];
+ struct perf_branch_entry *br = cpuc->lbr_entries;
+ union amd_debug_extn_cfg cfg;
+ u32 i, nr = 0, num, tos, start;
+ u32 shift = 64 - boot_cpu_data.x86_virt_bits;
+
+ /*
+ * BRS event forced on PMC0,
+ * so check if there is an event.
+ * It is possible to have lbr_users > 0 but the event
+ * not yet scheduled due to long latency PMU irq
+ */
+ if (!event)
+ goto empty;
+
+ cfg.val = get_debug_extn_cfg();
+
+ /* Sanity check [0-x86_pmu.lbr_nr] */
+ if (WARN_ON_ONCE(cfg.msroff >= x86_pmu.lbr_nr))
+ goto empty;
+
+ /* No valid branch */
+ if (cfg.vb == 0)
+ goto empty;
+
+ /*
+ * msr.off points to next entry to be written
+ * tos = most recent entry index = msr.off - 1
+ * BRS register buffer saturates, so we know we have
+ * start < tos and that we have to read from start to tos
+ */
+ start = 0;
+ tos = amd_brs_get_tos(&cfg);
+
+ num = tos - start + 1;
+
+ /*
+ * BRS is only one pass (saturation) from MSROFF to depth-1
+ * MSROFF wraps to zero when buffer is full
+ */
+ for (i = 0; i < num; i++) {
+ u32 brs_idx = tos - i;
+ u64 from, to;
+
+ rdmsrl(brs_to(brs_idx), to);
+
+ /* Entry does not belong to us (as marked by kernel) */
+ if (to == BRS_POISON)
+ break;
+
+ /*
+ * Sign-extend SAMP_BR_TO to 64 bits, bits 61-63 are reserved.
+ * Necessary to generate proper virtual addresses suitable for
+ * symbolization
+ */
+ to = (u64)(((s64)to << shift) >> shift);
+
+ if (!amd_brs_match_plm(event, to))
+ continue;
+
+ rdmsrl(brs_from(brs_idx), from);
+
+ perf_clear_branch_entry_bitfields(br+nr);
+
+ br[nr].from = from;
+ br[nr].to = to;
+
+ nr++;
+ }
+empty:
+ /* Record number of sampled branches */
+ cpuc->lbr_stack.nr = nr;
+}
+
+/*
+ * Poison most recent entry to prevent reuse by next task
+ * required because BRS entry are not tagged by PID
+ */
+static void amd_brs_poison_buffer(void)
+{
+ union amd_debug_extn_cfg cfg;
+ unsigned int idx;
+
+ /* Get current state */
+ cfg.val = get_debug_extn_cfg();
+
+ /* idx is most recently written entry */
+ idx = amd_brs_get_tos(&cfg);
+
+ /* Poison target of entry */
+ wrmsrl(brs_to(idx), BRS_POISON);
+}
+
+/*
+ * On context switch in, we need to make sure no samples from previous user
+ * are left in the BRS.
+ *
+ * On ctxswin, sched_in = true, called after the PMU has started
+ * On ctxswout, sched_in = false, called before the PMU is stopped
+ */
+void amd_pmu_brs_sched_task(struct perf_event_context *ctx, bool sched_in)
+{
+ struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
+
+ /* no active users */
+ if (!cpuc->lbr_users)
+ return;
+
+ /*
+ * On context switch in, we need to ensure we do not use entries
+ * from previous BRS user on that CPU, so we poison the buffer as
+ * a faster way compared to resetting all entries.
+ */
+ if (sched_in)
+ amd_brs_poison_buffer();
+}
+
+/*
+ * called from ACPI processor_idle.c or acpi_pad.c
+ * with interrupts disabled
+ */
+void perf_amd_brs_lopwr_cb(bool lopwr_in)
+{
+ struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
+ union amd_debug_extn_cfg cfg;
+
+ /*
+ * on mwait in, we may end up in non C0 state.
+ * we must disable branch sampling to avoid holding the NMI
+ * for too long. We disable it in hardware but we
+ * keep the state in cpuc, so we can re-enable.
+ *
+ * The hardware will deliver the NMI if needed when brsmen cleared
+ */
+ if (cpuc->brs_active) {
+ cfg.val = get_debug_extn_cfg();
+ cfg.brsmen = !lopwr_in;
+ set_debug_extn_cfg(cfg.val);
+ }
+}
+
+DEFINE_STATIC_CALL_NULL(perf_lopwr_cb, perf_amd_brs_lopwr_cb);
+EXPORT_STATIC_CALL_TRAMP_GPL(perf_lopwr_cb);
+
+void __init amd_brs_lopwr_init(void)
+{
+ static_call_update(perf_lopwr_cb, perf_amd_brs_lopwr_cb);
+}
diff --git a/arch/x86/events/amd/core.c b/arch/x86/events/amd/core.c
index 9687a8aef01c..8b70237c33f7 100644
--- a/arch/x86/events/amd/core.c
+++ b/arch/x86/events/amd/core.c
@@ -1,5 +1,6 @@
// SPDX-License-Identifier: GPL-2.0-only
#include <linux/perf_event.h>
+#include <linux/jump_label.h>
#include <linux/export.h>
#include <linux/types.h>
#include <linux/init.h>
@@ -7,6 +8,7 @@
#include <linux/delay.h>
#include <linux/jiffies.h>
#include <asm/apicdef.h>
+#include <asm/apic.h>
#include <asm/nmi.h>
#include "../perf_event.h"
@@ -18,6 +20,9 @@ static unsigned long perf_nmi_window;
#define AMD_MERGE_EVENT ((0xFULL << 32) | 0xFFULL)
#define AMD_MERGE_EVENT_ENABLE (AMD_MERGE_EVENT | ARCH_PERFMON_EVENTSEL_ENABLE)
+/* PMC Enable and Overflow bits for PerfCntrGlobal* registers */
+static u64 amd_pmu_global_cntr_mask __read_mostly;
+
static __initconst const u64 amd_hw_cache_event_ids
[PERF_COUNT_HW_CACHE_MAX]
[PERF_COUNT_HW_CACHE_OP_MAX]
@@ -325,6 +330,8 @@ static inline bool amd_is_pair_event_code(struct hw_perf_event *hwc)
}
}
+DEFINE_STATIC_CALL_RET0(amd_pmu_branch_hw_config, *x86_pmu.hw_config);
+
static int amd_core_hw_config(struct perf_event *event)
{
if (event->attr.exclude_host && event->attr.exclude_guest)
@@ -343,6 +350,9 @@ static int amd_core_hw_config(struct perf_event *event)
if ((x86_pmu.flags & PMU_FL_PAIR) && amd_is_pair_event_code(&event->hw))
event->hw.flags |= PERF_X86_EVENT_PAIR;
+ if (has_branch_stack(event))
+ return static_call(amd_pmu_branch_hw_config)(event);
+
return 0;
}
@@ -366,7 +376,7 @@ static int amd_pmu_hw_config(struct perf_event *event)
if (event->attr.precise_ip && get_ibs_caps())
return -ENOENT;
- if (has_branch_stack(event))
+ if (has_branch_stack(event) && !x86_pmu.lbr_nr)
return -EOPNOTSUPP;
ret = x86_pmu_hw_config(event);
@@ -510,20 +520,46 @@ static struct amd_nb *amd_alloc_nb(int cpu)
return nb;
}
+typedef void (amd_pmu_branch_reset_t)(void);
+DEFINE_STATIC_CALL_NULL(amd_pmu_branch_reset, amd_pmu_branch_reset_t);
+
+static void amd_pmu_cpu_reset(int cpu)
+{
+ if (x86_pmu.lbr_nr)
+ static_call(amd_pmu_branch_reset)();
+
+ if (x86_pmu.version < 2)
+ return;
+
+ /* Clear enable bits i.e. PerfCntrGlobalCtl.PerfCntrEn */
+ wrmsrl(MSR_AMD64_PERF_CNTR_GLOBAL_CTL, 0);
+
+ /* Clear overflow bits i.e. PerfCntrGLobalStatus.PerfCntrOvfl */
+ wrmsrl(MSR_AMD64_PERF_CNTR_GLOBAL_STATUS_CLR, amd_pmu_global_cntr_mask);
+}
+
static int amd_pmu_cpu_prepare(int cpu)
{
struct cpu_hw_events *cpuc = &per_cpu(cpu_hw_events, cpu);
+ cpuc->lbr_sel = kzalloc_node(sizeof(struct er_account), GFP_KERNEL,
+ cpu_to_node(cpu));
+ if (!cpuc->lbr_sel)
+ return -ENOMEM;
+
WARN_ON_ONCE(cpuc->amd_nb);
if (!x86_pmu.amd_nb_constraints)
return 0;
cpuc->amd_nb = amd_alloc_nb(cpu);
- if (!cpuc->amd_nb)
- return -ENOMEM;
+ if (cpuc->amd_nb)
+ return 0;
- return 0;
+ kfree(cpuc->lbr_sel);
+ cpuc->lbr_sel = NULL;
+
+ return -ENOMEM;
}
static void amd_pmu_cpu_starting(int cpu)
@@ -555,17 +591,20 @@ static void amd_pmu_cpu_starting(int cpu)
cpuc->amd_nb->nb_id = nb_id;
cpuc->amd_nb->refcnt++;
+
+ amd_pmu_cpu_reset(cpu);
}
static void amd_pmu_cpu_dead(int cpu)
{
- struct cpu_hw_events *cpuhw;
+ struct cpu_hw_events *cpuhw = &per_cpu(cpu_hw_events, cpu);
+
+ kfree(cpuhw->lbr_sel);
+ cpuhw->lbr_sel = NULL;
if (!x86_pmu.amd_nb_constraints)
return;
- cpuhw = &per_cpu(cpu_hw_events, cpu);
-
if (cpuhw->amd_nb) {
struct amd_nb *nb = cpuhw->amd_nb;
@@ -574,8 +613,52 @@ static void amd_pmu_cpu_dead(int cpu)
cpuhw->amd_nb = NULL;
}
+
+ amd_pmu_cpu_reset(cpu);
+}
+
+static inline void amd_pmu_set_global_ctl(u64 ctl)
+{
+ wrmsrl(MSR_AMD64_PERF_CNTR_GLOBAL_CTL, ctl);
+}
+
+static inline u64 amd_pmu_get_global_status(void)
+{
+ u64 status;
+
+ /* PerfCntrGlobalStatus is read-only */
+ rdmsrl(MSR_AMD64_PERF_CNTR_GLOBAL_STATUS, status);
+
+ return status;
+}
+
+static inline void amd_pmu_ack_global_status(u64 status)
+{
+ /*
+ * PerfCntrGlobalStatus is read-only but an overflow acknowledgment
+ * mechanism exists; writing 1 to a bit in PerfCntrGlobalStatusClr
+ * clears the same bit in PerfCntrGlobalStatus
+ */
+
+ wrmsrl(MSR_AMD64_PERF_CNTR_GLOBAL_STATUS_CLR, status);
+}
+
+static bool amd_pmu_test_overflow_topbit(int idx)
+{
+ u64 counter;
+
+ rdmsrl(x86_pmu_event_addr(idx), counter);
+
+ return !(counter & BIT_ULL(x86_pmu.cntval_bits - 1));
+}
+
+static bool amd_pmu_test_overflow_status(int idx)
+{
+ return amd_pmu_get_global_status() & BIT_ULL(idx);
}
+DEFINE_STATIC_CALL(amd_pmu_test_overflow, amd_pmu_test_overflow_topbit);
+
/*
* When a PMC counter overflows, an NMI is used to process the event and
* reset the counter. NMI latency can result in the counter being updated
@@ -588,7 +671,6 @@ static void amd_pmu_cpu_dead(int cpu)
static void amd_pmu_wait_on_overflow(int idx)
{
unsigned int i;
- u64 counter;
/*
* Wait for the counter to be reset if it has overflowed. This loop
@@ -596,8 +678,7 @@ static void amd_pmu_wait_on_overflow(int idx)
* forever...
*/
for (i = 0; i < OVERFLOW_WAIT_COUNT; i++) {
- rdmsrl(x86_pmu_event_addr(idx), counter);
- if (counter & (1ULL << (x86_pmu.cntval_bits - 1)))
+ if (!static_call(amd_pmu_test_overflow)(idx))
break;
/* Might be in IRQ context, so can't sleep */
@@ -605,13 +686,11 @@ static void amd_pmu_wait_on_overflow(int idx)
}
}
-static void amd_pmu_disable_all(void)
+static void amd_pmu_check_overflow(void)
{
struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
int idx;
- x86_pmu_disable_all();
-
/*
* This shouldn't be called from NMI context, but add a safeguard here
* to return, since if we're in NMI context we can't wait for an NMI
@@ -634,6 +713,53 @@ static void amd_pmu_disable_all(void)
}
}
+static void amd_pmu_enable_event(struct perf_event *event)
+{
+ x86_pmu_enable_event(event);
+}
+
+static void amd_pmu_enable_all(int added)
+{
+ struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
+ int idx;
+
+ amd_brs_enable_all();
+
+ for (idx = 0; idx < x86_pmu.num_counters; idx++) {
+ /* only activate events which are marked as active */
+ if (!test_bit(idx, cpuc->active_mask))
+ continue;
+
+ amd_pmu_enable_event(cpuc->events[idx]);
+ }
+}
+
+static void amd_pmu_v2_enable_event(struct perf_event *event)
+{
+ struct hw_perf_event *hwc = &event->hw;
+
+ /*
+ * Testing cpu_hw_events.enabled should be skipped in this case unlike
+ * in x86_pmu_enable_event().
+ *
+ * Since cpu_hw_events.enabled is set only after returning from
+ * x86_pmu_start(), the PMCs must be programmed and kept ready.
+ * Counting starts only after x86_pmu_enable_all() is called.
+ */
+ __x86_pmu_enable_event(hwc, ARCH_PERFMON_EVENTSEL_ENABLE);
+}
+
+static __always_inline void amd_pmu_core_enable_all(void)
+{
+ amd_pmu_set_global_ctl(amd_pmu_global_cntr_mask);
+}
+
+static void amd_pmu_v2_enable_all(int added)
+{
+ amd_pmu_lbr_enable_all();
+ amd_pmu_core_enable_all();
+}
+
static void amd_pmu_disable_event(struct perf_event *event)
{
x86_pmu_disable_event(event);
@@ -651,6 +777,41 @@ static void amd_pmu_disable_event(struct perf_event *event)
amd_pmu_wait_on_overflow(event->hw.idx);
}
+static void amd_pmu_disable_all(void)
+{
+ amd_brs_disable_all();
+ x86_pmu_disable_all();
+ amd_pmu_check_overflow();
+}
+
+static __always_inline void amd_pmu_core_disable_all(void)
+{
+ amd_pmu_set_global_ctl(0);
+}
+
+static void amd_pmu_v2_disable_all(void)
+{
+ amd_pmu_core_disable_all();
+ amd_pmu_lbr_disable_all();
+ amd_pmu_check_overflow();
+}
+
+DEFINE_STATIC_CALL_NULL(amd_pmu_branch_add, *x86_pmu.add);
+
+static void amd_pmu_add_event(struct perf_event *event)
+{
+ if (needs_branch_stack(event))
+ static_call(amd_pmu_branch_add)(event);
+}
+
+DEFINE_STATIC_CALL_NULL(amd_pmu_branch_del, *x86_pmu.del);
+
+static void amd_pmu_del_event(struct perf_event *event)
+{
+ if (needs_branch_stack(event))
+ static_call(amd_pmu_branch_del)(event);
+}
+
/*
* Because of NMI latency, if multiple PMC counters are active or other sources
* of NMIs are received, the perf NMI handler can handle one or more overflowed
@@ -669,13 +830,8 @@ static void amd_pmu_disable_event(struct perf_event *event)
* handled a counter. When an un-handled NMI is received, it will be claimed
* only if arriving within that window.
*/
-static int amd_pmu_handle_irq(struct pt_regs *regs)
+static inline int amd_pmu_adjust_nmi_window(int handled)
{
- int handled;
-
- /* Process any counter overflows */
- handled = x86_pmu_handle_irq(regs);
-
/*
* If a counter was handled, record a timestamp such that un-handled
* NMIs will be claimed if arriving within that window.
@@ -692,6 +848,124 @@ static int amd_pmu_handle_irq(struct pt_regs *regs)
return NMI_HANDLED;
}
+static int amd_pmu_handle_irq(struct pt_regs *regs)
+{
+ struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
+ int handled;
+ int pmu_enabled;
+
+ /*
+ * Save the PMU state.
+ * It needs to be restored when leaving the handler.
+ */
+ pmu_enabled = cpuc->enabled;
+ cpuc->enabled = 0;
+
+ /* stop everything (includes BRS) */
+ amd_pmu_disable_all();
+
+ /* Drain BRS is in use (could be inactive) */
+ if (cpuc->lbr_users)
+ amd_brs_drain();
+
+ /* Process any counter overflows */
+ handled = x86_pmu_handle_irq(regs);
+
+ cpuc->enabled = pmu_enabled;
+ if (pmu_enabled)
+ amd_pmu_enable_all(0);
+
+ return amd_pmu_adjust_nmi_window(handled);
+}
+
+static int amd_pmu_v2_handle_irq(struct pt_regs *regs)
+{
+ struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
+ struct perf_sample_data data;
+ struct hw_perf_event *hwc;
+ struct perf_event *event;
+ int handled = 0, idx;
+ u64 status, mask;
+ bool pmu_enabled;
+
+ /*
+ * Save the PMU state as it needs to be restored when leaving the
+ * handler
+ */
+ pmu_enabled = cpuc->enabled;
+ cpuc->enabled = 0;
+
+ /* Stop counting but do not disable LBR */
+ amd_pmu_core_disable_all();
+
+ status = amd_pmu_get_global_status();
+
+ /* Check if any overflows are pending */
+ if (!status)
+ goto done;
+
+ /* Read branch records before unfreezing */
+ if (status & GLOBAL_STATUS_LBRS_FROZEN) {
+ amd_pmu_lbr_read();
+ status &= ~GLOBAL_STATUS_LBRS_FROZEN;
+ }
+
+ for (idx = 0; idx < x86_pmu.num_counters; idx++) {
+ if (!test_bit(idx, cpuc->active_mask))
+ continue;
+
+ event = cpuc->events[idx];
+ hwc = &event->hw;
+ x86_perf_event_update(event);
+ mask = BIT_ULL(idx);
+
+ if (!(status & mask))
+ continue;
+
+ /* Event overflow */
+ handled++;
+ perf_sample_data_init(&data, 0, hwc->last_period);
+
+ if (!x86_perf_event_set_period(event))
+ continue;
+
+ if (has_branch_stack(event)) {
+ data.br_stack = &cpuc->lbr_stack;
+ data.sample_flags |= PERF_SAMPLE_BRANCH_STACK;
+ }
+
+ if (perf_event_overflow(event, &data, regs))
+ x86_pmu_stop(event, 0);
+
+ status &= ~mask;
+ }
+
+ /*
+ * It should never be the case that some overflows are not handled as
+ * the corresponding PMCs are expected to be inactive according to the
+ * active_mask
+ */
+ WARN_ON(status > 0);
+
+ /* Clear overflow and freeze bits */
+ amd_pmu_ack_global_status(~status);
+
+ /*
+ * Unmasking the LVTPC is not required as the Mask (M) bit of the LVT
+ * PMI entry is not set by the local APIC when a PMC overflow occurs
+ */
+ inc_irq_stat(apic_perf_irqs);
+
+done:
+ cpuc->enabled = pmu_enabled;
+
+ /* Resume counting only if PMU is active */
+ if (pmu_enabled)
+ amd_pmu_core_enable_all();
+
+ return amd_pmu_adjust_nmi_window(handled);
+}
+
static struct event_constraint *
amd_get_event_constraints(struct cpu_hw_events *cpuc, int idx,
struct perf_event *event)
@@ -897,6 +1171,51 @@ static void amd_put_event_constraints_f17h(struct cpu_hw_events *cpuc,
--cpuc->n_pair;
}
+/*
+ * Because of the way BRS operates with an inactive and active phases, and
+ * the link to one counter, it is not possible to have two events using BRS
+ * scheduled at the same time. There would be an issue with enforcing the
+ * period of each one and given that the BRS saturates, it would not be possible
+ * to guarantee correlated content for all events. Therefore, in situations
+ * where multiple events want to use BRS, the kernel enforces mutual exclusion.
+ * Exclusion is enforced by chosing only one counter for events using BRS.
+ * The event scheduling logic will then automatically multiplex the
+ * events and ensure that at most one event is actively using BRS.
+ *
+ * The BRS counter could be any counter, but there is no constraint on Fam19h,
+ * therefore all counters are equal and thus we pick the first one: PMC0
+ */
+static struct event_constraint amd_fam19h_brs_cntr0_constraint =
+ EVENT_CONSTRAINT(0, 0x1, AMD64_RAW_EVENT_MASK);
+
+static struct event_constraint amd_fam19h_brs_pair_cntr0_constraint =
+ __EVENT_CONSTRAINT(0, 0x1, AMD64_RAW_EVENT_MASK, 1, 0, PERF_X86_EVENT_PAIR);
+
+static struct event_constraint *
+amd_get_event_constraints_f19h(struct cpu_hw_events *cpuc, int idx,
+ struct perf_event *event)
+{
+ struct hw_perf_event *hwc = &event->hw;
+ bool has_brs = has_amd_brs(hwc);
+
+ /*
+ * In case BRS is used with an event requiring a counter pair,
+ * the kernel allows it but only on counter 0 & 1 to enforce
+ * multiplexing requiring to protect BRS in case of multiple
+ * BRS users
+ */
+ if (amd_is_pair_event_code(hwc)) {
+ return has_brs ? &amd_fam19h_brs_pair_cntr0_constraint
+ : &pair_constraint;
+ }
+
+ if (has_brs)
+ return &amd_fam19h_brs_cntr0_constraint;
+
+ return &unconstrained;
+}
+
+
static ssize_t amd_event_sysfs_show(char *page, u64 config)
{
u64 event = (config & ARCH_PERFMON_EVENTSEL_EVENT) |
@@ -905,12 +1224,22 @@ static ssize_t amd_event_sysfs_show(char *page, u64 config)
return x86_event_sysfs_show(page, config, event);
}
+static void amd_pmu_limit_period(struct perf_event *event, s64 *left)
+{
+ /*
+ * Decrease period by the depth of the BRS feature to get the last N
+ * taken branches and approximate the desired period
+ */
+ if (has_branch_stack(event) && *left > x86_pmu.lbr_nr)
+ *left -= x86_pmu.lbr_nr;
+}
+
static __initconst const struct x86_pmu amd_pmu = {
.name = "AMD",
.handle_irq = amd_pmu_handle_irq,
.disable_all = amd_pmu_disable_all,
- .enable_all = x86_pmu_enable_all,
- .enable = x86_pmu_enable_event,
+ .enable_all = amd_pmu_enable_all,
+ .enable = amd_pmu_enable_event,
.disable = amd_pmu_disable_event,
.hw_config = amd_pmu_hw_config,
.schedule_events = x86_schedule_events,
@@ -920,6 +1249,8 @@ static __initconst const struct x86_pmu amd_pmu = {
.event_map = amd_pmu_event_map,
.max_events = ARRAY_SIZE(amd_perfmon_event_map),
.num_counters = AMD64_NUM_COUNTERS,
+ .add = amd_pmu_add_event,
+ .del = amd_pmu_del_event,
.cntval_bits = 48,
.cntval_mask = (1ULL << 48) - 1,
.apic = 1,
@@ -938,8 +1269,68 @@ static __initconst const struct x86_pmu amd_pmu = {
.amd_nb_constraints = 1,
};
+static ssize_t branches_show(struct device *cdev,
+ struct device_attribute *attr,
+ char *buf)
+{
+ return snprintf(buf, PAGE_SIZE, "%d\n", x86_pmu.lbr_nr);
+}
+
+static DEVICE_ATTR_RO(branches);
+
+static struct attribute *amd_pmu_branches_attrs[] = {
+ &dev_attr_branches.attr,
+ NULL,
+};
+
+static umode_t
+amd_branches_is_visible(struct kobject *kobj, struct attribute *attr, int i)
+{
+ return x86_pmu.lbr_nr ? attr->mode : 0;
+}
+
+static struct attribute_group group_caps_amd_branches = {
+ .name = "caps",
+ .attrs = amd_pmu_branches_attrs,
+ .is_visible = amd_branches_is_visible,
+};
+
+#ifdef CONFIG_PERF_EVENTS_AMD_BRS
+
+EVENT_ATTR_STR(branch-brs, amd_branch_brs,
+ "event=" __stringify(AMD_FAM19H_BRS_EVENT)"\n");
+
+static struct attribute *amd_brs_events_attrs[] = {
+ EVENT_PTR(amd_branch_brs),
+ NULL,
+};
+
+static umode_t
+amd_brs_is_visible(struct kobject *kobj, struct attribute *attr, int i)
+{
+ return static_cpu_has(X86_FEATURE_BRS) && x86_pmu.lbr_nr ?
+ attr->mode : 0;
+}
+
+static struct attribute_group group_events_amd_brs = {
+ .name = "events",
+ .attrs = amd_brs_events_attrs,
+ .is_visible = amd_brs_is_visible,
+};
+
+#endif /* CONFIG_PERF_EVENTS_AMD_BRS */
+
+static const struct attribute_group *amd_attr_update[] = {
+ &group_caps_amd_branches,
+#ifdef CONFIG_PERF_EVENTS_AMD_BRS
+ &group_events_amd_brs,
+#endif
+ NULL,
+};
+
static int __init amd_core_pmu_init(void)
{
+ union cpuid_0x80000022_ebx ebx;
u64 even_ctr_mask = 0ULL;
int i;
@@ -957,6 +1348,27 @@ static int __init amd_core_pmu_init(void)
x86_pmu.eventsel = MSR_F15H_PERF_CTL;
x86_pmu.perfctr = MSR_F15H_PERF_CTR;
x86_pmu.num_counters = AMD64_NUM_COUNTERS_CORE;
+
+ /* Check for Performance Monitoring v2 support */
+ if (boot_cpu_has(X86_FEATURE_PERFMON_V2)) {
+ ebx.full = cpuid_ebx(EXT_PERFMON_DEBUG_FEATURES);
+
+ /* Update PMU version for later usage */
+ x86_pmu.version = 2;
+
+ /* Find the number of available Core PMCs */
+ x86_pmu.num_counters = ebx.split.num_core_pmc;
+
+ amd_pmu_global_cntr_mask = (1ULL << x86_pmu.num_counters) - 1;
+
+ /* Update PMC handling functions */
+ x86_pmu.enable_all = amd_pmu_v2_enable_all;
+ x86_pmu.disable_all = amd_pmu_v2_disable_all;
+ x86_pmu.enable = amd_pmu_v2_enable_event;
+ x86_pmu.handle_irq = amd_pmu_v2_handle_irq;
+ static_call_update(amd_pmu_test_overflow, amd_pmu_test_overflow_status);
+ }
+
/*
* AMD Core perfctr has separate MSRs for the NB events, see
* the amd/uncore.c driver.
@@ -989,6 +1401,37 @@ static int __init amd_core_pmu_init(void)
x86_pmu.flags |= PMU_FL_PAIR;
}
+ /* LBR and BRS are mutually exclusive features */
+ if (!amd_pmu_lbr_init()) {
+ /* LBR requires flushing on context switch */
+ x86_pmu.sched_task = amd_pmu_lbr_sched_task;
+ static_call_update(amd_pmu_branch_hw_config, amd_pmu_lbr_hw_config);
+ static_call_update(amd_pmu_branch_reset, amd_pmu_lbr_reset);
+ static_call_update(amd_pmu_branch_add, amd_pmu_lbr_add);
+ static_call_update(amd_pmu_branch_del, amd_pmu_lbr_del);
+ } else if (!amd_brs_init()) {
+ /*
+ * BRS requires special event constraints and flushing on ctxsw.
+ */
+ x86_pmu.get_event_constraints = amd_get_event_constraints_f19h;
+ x86_pmu.sched_task = amd_pmu_brs_sched_task;
+ x86_pmu.limit_period = amd_pmu_limit_period;
+
+ static_call_update(amd_pmu_branch_hw_config, amd_brs_hw_config);
+ static_call_update(amd_pmu_branch_reset, amd_brs_reset);
+ static_call_update(amd_pmu_branch_add, amd_pmu_brs_add);
+ static_call_update(amd_pmu_branch_del, amd_pmu_brs_del);
+
+ /*
+ * put_event_constraints callback same as Fam17h, set above
+ */
+
+ /* branch sampling must be stopped when entering low power */
+ amd_brs_lopwr_init();
+ }
+
+ x86_pmu.attr_update = amd_attr_update;
+
pr_cont("core perfctr, ");
return 0;
}
@@ -1023,6 +1466,24 @@ __init int amd_pmu_init(void)
return 0;
}
+static inline void amd_pmu_reload_virt(void)
+{
+ if (x86_pmu.version >= 2) {
+ /*
+ * Clear global enable bits, reprogram the PERF_CTL
+ * registers with updated perf_ctr_virt_mask and then
+ * set global enable bits once again
+ */
+ amd_pmu_v2_disable_all();
+ amd_pmu_enable_all(0);
+ amd_pmu_v2_enable_all(0);
+ return;
+ }
+
+ amd_pmu_disable_all();
+ amd_pmu_enable_all(0);
+}
+
void amd_pmu_enable_virt(void)
{
struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
@@ -1030,8 +1491,7 @@ void amd_pmu_enable_virt(void)
cpuc->perf_ctr_virt_mask = 0;
/* Reload all events */
- amd_pmu_disable_all();
- x86_pmu_enable_all(0);
+ amd_pmu_reload_virt();
}
EXPORT_SYMBOL_GPL(amd_pmu_enable_virt);
@@ -1048,7 +1508,6 @@ void amd_pmu_disable_virt(void)
cpuc->perf_ctr_virt_mask = AMD64_EVENTSEL_HOSTONLY;
/* Reload all events */
- amd_pmu_disable_all();
- x86_pmu_enable_all(0);
+ amd_pmu_reload_virt();
}
EXPORT_SYMBOL_GPL(amd_pmu_disable_virt);
diff --git a/arch/x86/events/amd/ibs.c b/arch/x86/events/amd/ibs.c
index 9739019d4b67..4cb710efbdd9 100644
--- a/arch/x86/events/amd/ibs.c
+++ b/arch/x86/events/amd/ibs.c
@@ -94,10 +94,6 @@ struct perf_ibs {
unsigned int fetch_ignore_if_zero_rip : 1;
struct cpu_perf_ibs __percpu *pcpu;
- struct attribute **format_attrs;
- struct attribute_group format_group;
- const struct attribute_group *attr_groups[2];
-
u64 (*get_count)(u64 config);
};
@@ -518,16 +514,118 @@ static void perf_ibs_del(struct perf_event *event, int flags)
static void perf_ibs_read(struct perf_event *event) { }
+/*
+ * We need to initialize with empty group if all attributes in the
+ * group are dynamic.
+ */
+static struct attribute *attrs_empty[] = {
+ NULL,
+};
+
+static struct attribute_group empty_format_group = {
+ .name = "format",
+ .attrs = attrs_empty,
+};
+
+static struct attribute_group empty_caps_group = {
+ .name = "caps",
+ .attrs = attrs_empty,
+};
+
+static const struct attribute_group *empty_attr_groups[] = {
+ &empty_format_group,
+ &empty_caps_group,
+ NULL,
+};
+
PMU_FORMAT_ATTR(rand_en, "config:57");
PMU_FORMAT_ATTR(cnt_ctl, "config:19");
+PMU_EVENT_ATTR_STRING(l3missonly, fetch_l3missonly, "config:59");
+PMU_EVENT_ATTR_STRING(l3missonly, op_l3missonly, "config:16");
+PMU_EVENT_ATTR_STRING(zen4_ibs_extensions, zen4_ibs_extensions, "1");
-static struct attribute *ibs_fetch_format_attrs[] = {
+static umode_t
+zen4_ibs_extensions_is_visible(struct kobject *kobj, struct attribute *attr, int i)
+{
+ return ibs_caps & IBS_CAPS_ZEN4 ? attr->mode : 0;
+}
+
+static struct attribute *rand_en_attrs[] = {
&format_attr_rand_en.attr,
NULL,
};
-static struct attribute *ibs_op_format_attrs[] = {
- NULL, /* &format_attr_cnt_ctl.attr if IBS_CAPS_OPCNT */
+static struct attribute *fetch_l3missonly_attrs[] = {
+ &fetch_l3missonly.attr.attr,
+ NULL,
+};
+
+static struct attribute *zen4_ibs_extensions_attrs[] = {
+ &zen4_ibs_extensions.attr.attr,
+ NULL,
+};
+
+static struct attribute_group group_rand_en = {
+ .name = "format",
+ .attrs = rand_en_attrs,
+};
+
+static struct attribute_group group_fetch_l3missonly = {
+ .name = "format",
+ .attrs = fetch_l3missonly_attrs,
+ .is_visible = zen4_ibs_extensions_is_visible,
+};
+
+static struct attribute_group group_zen4_ibs_extensions = {
+ .name = "caps",
+ .attrs = zen4_ibs_extensions_attrs,
+ .is_visible = zen4_ibs_extensions_is_visible,
+};
+
+static const struct attribute_group *fetch_attr_groups[] = {
+ &group_rand_en,
+ &empty_caps_group,
+ NULL,
+};
+
+static const struct attribute_group *fetch_attr_update[] = {
+ &group_fetch_l3missonly,
+ &group_zen4_ibs_extensions,
+ NULL,
+};
+
+static umode_t
+cnt_ctl_is_visible(struct kobject *kobj, struct attribute *attr, int i)
+{
+ return ibs_caps & IBS_CAPS_OPCNT ? attr->mode : 0;
+}
+
+static struct attribute *cnt_ctl_attrs[] = {
+ &format_attr_cnt_ctl.attr,
+ NULL,
+};
+
+static struct attribute *op_l3missonly_attrs[] = {
+ &op_l3missonly.attr.attr,
+ NULL,
+};
+
+static struct attribute_group group_cnt_ctl = {
+ .name = "format",
+ .attrs = cnt_ctl_attrs,
+ .is_visible = cnt_ctl_is_visible,
+};
+
+static struct attribute_group group_op_l3missonly = {
+ .name = "format",
+ .attrs = op_l3missonly_attrs,
+ .is_visible = zen4_ibs_extensions_is_visible,
+};
+
+static const struct attribute_group *op_attr_update[] = {
+ &group_cnt_ctl,
+ &group_op_l3missonly,
+ &group_zen4_ibs_extensions,
NULL,
};
@@ -551,7 +649,6 @@ static struct perf_ibs perf_ibs_fetch = {
.max_period = IBS_FETCH_MAX_CNT << 4,
.offset_mask = { MSR_AMD64_IBSFETCH_REG_MASK },
.offset_max = MSR_AMD64_IBSFETCH_REG_COUNT,
- .format_attrs = ibs_fetch_format_attrs,
.get_count = get_ibs_fetch_count,
};
@@ -577,11 +674,343 @@ static struct perf_ibs perf_ibs_op = {
.max_period = IBS_OP_MAX_CNT << 4,
.offset_mask = { MSR_AMD64_IBSOP_REG_MASK },
.offset_max = MSR_AMD64_IBSOP_REG_COUNT,
- .format_attrs = ibs_op_format_attrs,
.get_count = get_ibs_op_count,
};
+static void perf_ibs_get_mem_op(union ibs_op_data3 *op_data3,
+ struct perf_sample_data *data)
+{
+ union perf_mem_data_src *data_src = &data->data_src;
+
+ data_src->mem_op = PERF_MEM_OP_NA;
+
+ if (op_data3->ld_op)
+ data_src->mem_op = PERF_MEM_OP_LOAD;
+ else if (op_data3->st_op)
+ data_src->mem_op = PERF_MEM_OP_STORE;
+}
+
+/*
+ * Processors having CPUID_Fn8000001B_EAX[11] aka IBS_CAPS_ZEN4 has
+ * more fine granular DataSrc encodings. Others have coarse.
+ */
+static u8 perf_ibs_data_src(union ibs_op_data2 *op_data2)
+{
+ if (ibs_caps & IBS_CAPS_ZEN4)
+ return (op_data2->data_src_hi << 3) | op_data2->data_src_lo;
+
+ return op_data2->data_src_lo;
+}
+
+static void perf_ibs_get_mem_lvl(union ibs_op_data2 *op_data2,
+ union ibs_op_data3 *op_data3,
+ struct perf_sample_data *data)
+{
+ union perf_mem_data_src *data_src = &data->data_src;
+ u8 ibs_data_src = perf_ibs_data_src(op_data2);
+
+ data_src->mem_lvl = 0;
+
+ /*
+ * DcMiss, L2Miss, DataSrc, DcMissLat etc. are all invalid for Uncached
+ * memory accesses. So, check DcUcMemAcc bit early.
+ */
+ if (op_data3->dc_uc_mem_acc && ibs_data_src != IBS_DATA_SRC_EXT_IO) {
+ data_src->mem_lvl = PERF_MEM_LVL_UNC | PERF_MEM_LVL_HIT;
+ return;
+ }
+
+ /* L1 Hit */
+ if (op_data3->dc_miss == 0) {
+ data_src->mem_lvl = PERF_MEM_LVL_L1 | PERF_MEM_LVL_HIT;
+ return;
+ }
+
+ /* L2 Hit */
+ if (op_data3->l2_miss == 0) {
+ /* Erratum #1293 */
+ if (boot_cpu_data.x86 != 0x19 || boot_cpu_data.x86_model > 0xF ||
+ !(op_data3->sw_pf || op_data3->dc_miss_no_mab_alloc)) {
+ data_src->mem_lvl = PERF_MEM_LVL_L2 | PERF_MEM_LVL_HIT;
+ return;
+ }
+ }
+
+ /*
+ * OP_DATA2 is valid only for load ops. Skip all checks which
+ * uses OP_DATA2[DataSrc].
+ */
+ if (data_src->mem_op != PERF_MEM_OP_LOAD)
+ goto check_mab;
+
+ /* L3 Hit */
+ if (ibs_caps & IBS_CAPS_ZEN4) {
+ if (ibs_data_src == IBS_DATA_SRC_EXT_LOC_CACHE) {
+ data_src->mem_lvl = PERF_MEM_LVL_L3 | PERF_MEM_LVL_HIT;
+ return;
+ }
+ } else {
+ if (ibs_data_src == IBS_DATA_SRC_LOC_CACHE) {
+ data_src->mem_lvl = PERF_MEM_LVL_L3 | PERF_MEM_LVL_REM_CCE1 |
+ PERF_MEM_LVL_HIT;
+ return;
+ }
+ }
+
+ /* A peer cache in a near CCX */
+ if (ibs_caps & IBS_CAPS_ZEN4 &&
+ ibs_data_src == IBS_DATA_SRC_EXT_NEAR_CCX_CACHE) {
+ data_src->mem_lvl = PERF_MEM_LVL_REM_CCE1 | PERF_MEM_LVL_HIT;
+ return;
+ }
+
+ /* A peer cache in a far CCX */
+ if (ibs_caps & IBS_CAPS_ZEN4) {
+ if (ibs_data_src == IBS_DATA_SRC_EXT_FAR_CCX_CACHE) {
+ data_src->mem_lvl = PERF_MEM_LVL_REM_CCE2 | PERF_MEM_LVL_HIT;
+ return;
+ }
+ } else {
+ if (ibs_data_src == IBS_DATA_SRC_REM_CACHE) {
+ data_src->mem_lvl = PERF_MEM_LVL_REM_CCE2 | PERF_MEM_LVL_HIT;
+ return;
+ }
+ }
+
+ /* DRAM */
+ if (ibs_data_src == IBS_DATA_SRC_EXT_DRAM) {
+ if (op_data2->rmt_node == 0)
+ data_src->mem_lvl = PERF_MEM_LVL_LOC_RAM | PERF_MEM_LVL_HIT;
+ else
+ data_src->mem_lvl = PERF_MEM_LVL_REM_RAM1 | PERF_MEM_LVL_HIT;
+ return;
+ }
+
+ /* PMEM */
+ if (ibs_caps & IBS_CAPS_ZEN4 && ibs_data_src == IBS_DATA_SRC_EXT_PMEM) {
+ data_src->mem_lvl_num = PERF_MEM_LVLNUM_PMEM;
+ if (op_data2->rmt_node) {
+ data_src->mem_remote = PERF_MEM_REMOTE_REMOTE;
+ /* IBS doesn't provide Remote socket detail */
+ data_src->mem_hops = PERF_MEM_HOPS_1;
+ }
+ return;
+ }
+
+ /* Extension Memory */
+ if (ibs_caps & IBS_CAPS_ZEN4 &&
+ ibs_data_src == IBS_DATA_SRC_EXT_EXT_MEM) {
+ data_src->mem_lvl_num = PERF_MEM_LVLNUM_CXL;
+ if (op_data2->rmt_node) {
+ data_src->mem_remote = PERF_MEM_REMOTE_REMOTE;
+ /* IBS doesn't provide Remote socket detail */
+ data_src->mem_hops = PERF_MEM_HOPS_1;
+ }
+ return;
+ }
+
+ /* IO */
+ if (ibs_data_src == IBS_DATA_SRC_EXT_IO) {
+ data_src->mem_lvl = PERF_MEM_LVL_IO;
+ data_src->mem_lvl_num = PERF_MEM_LVLNUM_IO;
+ if (op_data2->rmt_node) {
+ data_src->mem_remote = PERF_MEM_REMOTE_REMOTE;
+ /* IBS doesn't provide Remote socket detail */
+ data_src->mem_hops = PERF_MEM_HOPS_1;
+ }
+ return;
+ }
+
+check_mab:
+ /*
+ * MAB (Miss Address Buffer) Hit. MAB keeps track of outstanding
+ * DC misses. However, such data may come from any level in mem
+ * hierarchy. IBS provides detail about both MAB as well as actual
+ * DataSrc simultaneously. Prioritize DataSrc over MAB, i.e. set
+ * MAB only when IBS fails to provide DataSrc.
+ */
+ if (op_data3->dc_miss_no_mab_alloc) {
+ data_src->mem_lvl = PERF_MEM_LVL_LFB | PERF_MEM_LVL_HIT;
+ return;
+ }
+
+ data_src->mem_lvl = PERF_MEM_LVL_NA;
+}
+
+static bool perf_ibs_cache_hit_st_valid(void)
+{
+ /* 0: Uninitialized, 1: Valid, -1: Invalid */
+ static int cache_hit_st_valid;
+
+ if (unlikely(!cache_hit_st_valid)) {
+ if (boot_cpu_data.x86 == 0x19 &&
+ (boot_cpu_data.x86_model <= 0xF ||
+ (boot_cpu_data.x86_model >= 0x20 &&
+ boot_cpu_data.x86_model <= 0x5F))) {
+ cache_hit_st_valid = -1;
+ } else {
+ cache_hit_st_valid = 1;
+ }
+ }
+
+ return cache_hit_st_valid == 1;
+}
+
+static void perf_ibs_get_mem_snoop(union ibs_op_data2 *op_data2,
+ struct perf_sample_data *data)
+{
+ union perf_mem_data_src *data_src = &data->data_src;
+ u8 ibs_data_src;
+
+ data_src->mem_snoop = PERF_MEM_SNOOP_NA;
+
+ if (!perf_ibs_cache_hit_st_valid() ||
+ data_src->mem_op != PERF_MEM_OP_LOAD ||
+ data_src->mem_lvl & PERF_MEM_LVL_L1 ||
+ data_src->mem_lvl & PERF_MEM_LVL_L2 ||
+ op_data2->cache_hit_st)
+ return;
+
+ ibs_data_src = perf_ibs_data_src(op_data2);
+
+ if (ibs_caps & IBS_CAPS_ZEN4) {
+ if (ibs_data_src == IBS_DATA_SRC_EXT_LOC_CACHE ||
+ ibs_data_src == IBS_DATA_SRC_EXT_NEAR_CCX_CACHE ||
+ ibs_data_src == IBS_DATA_SRC_EXT_FAR_CCX_CACHE)
+ data_src->mem_snoop = PERF_MEM_SNOOP_HITM;
+ } else if (ibs_data_src == IBS_DATA_SRC_LOC_CACHE) {
+ data_src->mem_snoop = PERF_MEM_SNOOP_HITM;
+ }
+}
+
+static void perf_ibs_get_tlb_lvl(union ibs_op_data3 *op_data3,
+ struct perf_sample_data *data)
+{
+ union perf_mem_data_src *data_src = &data->data_src;
+
+ data_src->mem_dtlb = PERF_MEM_TLB_NA;
+
+ if (!op_data3->dc_lin_addr_valid)
+ return;
+
+ if (!op_data3->dc_l1tlb_miss) {
+ data_src->mem_dtlb = PERF_MEM_TLB_L1 | PERF_MEM_TLB_HIT;
+ return;
+ }
+
+ if (!op_data3->dc_l2tlb_miss) {
+ data_src->mem_dtlb = PERF_MEM_TLB_L2 | PERF_MEM_TLB_HIT;
+ return;
+ }
+
+ data_src->mem_dtlb = PERF_MEM_TLB_L2 | PERF_MEM_TLB_MISS;
+}
+
+static void perf_ibs_get_mem_lock(union ibs_op_data3 *op_data3,
+ struct perf_sample_data *data)
+{
+ union perf_mem_data_src *data_src = &data->data_src;
+
+ data_src->mem_lock = PERF_MEM_LOCK_NA;
+
+ if (op_data3->dc_locked_op)
+ data_src->mem_lock = PERF_MEM_LOCK_LOCKED;
+}
+
+#define ibs_op_msr_idx(msr) (msr - MSR_AMD64_IBSOPCTL)
+
+static void perf_ibs_get_data_src(struct perf_ibs_data *ibs_data,
+ struct perf_sample_data *data,
+ union ibs_op_data2 *op_data2,
+ union ibs_op_data3 *op_data3)
+{
+ perf_ibs_get_mem_lvl(op_data2, op_data3, data);
+ perf_ibs_get_mem_snoop(op_data2, data);
+ perf_ibs_get_tlb_lvl(op_data3, data);
+ perf_ibs_get_mem_lock(op_data3, data);
+}
+
+static __u64 perf_ibs_get_op_data2(struct perf_ibs_data *ibs_data,
+ union ibs_op_data3 *op_data3)
+{
+ __u64 val = ibs_data->regs[ibs_op_msr_idx(MSR_AMD64_IBSOPDATA2)];
+
+ /* Erratum #1293 */
+ if (boot_cpu_data.x86 == 0x19 && boot_cpu_data.x86_model <= 0xF &&
+ (op_data3->sw_pf || op_data3->dc_miss_no_mab_alloc)) {
+ /*
+ * OP_DATA2 has only two fields on Zen3: DataSrc and RmtNode.
+ * DataSrc=0 is 'No valid status' and RmtNode is invalid when
+ * DataSrc=0.
+ */
+ val = 0;
+ }
+ return val;
+}
+
+static void perf_ibs_parse_ld_st_data(__u64 sample_type,
+ struct perf_ibs_data *ibs_data,
+ struct perf_sample_data *data)
+{
+ union ibs_op_data3 op_data3;
+ union ibs_op_data2 op_data2;
+ union ibs_op_data op_data;
+
+ data->data_src.val = PERF_MEM_NA;
+ op_data3.val = ibs_data->regs[ibs_op_msr_idx(MSR_AMD64_IBSOPDATA3)];
+
+ perf_ibs_get_mem_op(&op_data3, data);
+ if (data->data_src.mem_op != PERF_MEM_OP_LOAD &&
+ data->data_src.mem_op != PERF_MEM_OP_STORE)
+ return;
+
+ op_data2.val = perf_ibs_get_op_data2(ibs_data, &op_data3);
+
+ if (sample_type & PERF_SAMPLE_DATA_SRC) {
+ perf_ibs_get_data_src(ibs_data, data, &op_data2, &op_data3);
+ data->sample_flags |= PERF_SAMPLE_DATA_SRC;
+ }
+
+ if (sample_type & PERF_SAMPLE_WEIGHT_TYPE && op_data3.dc_miss &&
+ data->data_src.mem_op == PERF_MEM_OP_LOAD) {
+ op_data.val = ibs_data->regs[ibs_op_msr_idx(MSR_AMD64_IBSOPDATA)];
+
+ if (sample_type & PERF_SAMPLE_WEIGHT_STRUCT) {
+ data->weight.var1_dw = op_data3.dc_miss_lat;
+ data->weight.var2_w = op_data.tag_to_ret_ctr;
+ } else if (sample_type & PERF_SAMPLE_WEIGHT) {
+ data->weight.full = op_data3.dc_miss_lat;
+ }
+ data->sample_flags |= PERF_SAMPLE_WEIGHT_TYPE;
+ }
+
+ if (sample_type & PERF_SAMPLE_ADDR && op_data3.dc_lin_addr_valid) {
+ data->addr = ibs_data->regs[ibs_op_msr_idx(MSR_AMD64_IBSDCLINAD)];
+ data->sample_flags |= PERF_SAMPLE_ADDR;
+ }
+
+ if (sample_type & PERF_SAMPLE_PHYS_ADDR && op_data3.dc_phy_addr_valid) {
+ data->phys_addr = ibs_data->regs[ibs_op_msr_idx(MSR_AMD64_IBSDCPHYSAD)];
+ data->sample_flags |= PERF_SAMPLE_PHYS_ADDR;
+ }
+}
+
+static int perf_ibs_get_offset_max(struct perf_ibs *perf_ibs, u64 sample_type,
+ int check_rip)
+{
+ if (sample_type & PERF_SAMPLE_RAW ||
+ (perf_ibs == &perf_ibs_op &&
+ (sample_type & PERF_SAMPLE_DATA_SRC ||
+ sample_type & PERF_SAMPLE_WEIGHT_TYPE ||
+ sample_type & PERF_SAMPLE_ADDR ||
+ sample_type & PERF_SAMPLE_PHYS_ADDR)))
+ return perf_ibs->offset_max;
+ else if (check_rip)
+ return 3;
+ return 1;
+}
+
static int perf_ibs_handle_irq(struct perf_ibs *perf_ibs, struct pt_regs *iregs)
{
struct cpu_perf_ibs *pcpu = this_cpu_ptr(perf_ibs->pcpu);
@@ -629,12 +1058,9 @@ fail:
size = 1;
offset = 1;
check_rip = (perf_ibs == &perf_ibs_op && (ibs_caps & IBS_CAPS_RIPINVALIDCHK));
- if (event->attr.sample_type & PERF_SAMPLE_RAW)
- offset_max = perf_ibs->offset_max;
- else if (check_rip)
- offset_max = 3;
- else
- offset_max = 1;
+
+ offset_max = perf_ibs_get_offset_max(perf_ibs, event->attr.sample_type, check_rip);
+
do {
rdmsrl(msr + offset, *buf++);
size++;
@@ -685,6 +1111,20 @@ fail:
},
};
data.raw = &raw;
+ data.sample_flags |= PERF_SAMPLE_RAW;
+ }
+
+ if (perf_ibs == &perf_ibs_op)
+ perf_ibs_parse_ld_st_data(event->attr.sample_type, &ibs_data, &data);
+
+ /*
+ * rip recorded by IbsOpRip will not be consistent with rsp and rbp
+ * recorded as part of interrupt regs. Thus we need to use rip from
+ * interrupt regs while unwinding call stack.
+ */
+ if (event->attr.sample_type & PERF_SAMPLE_CALLCHAIN) {
+ data.callchain = perf_callchain(event, iregs);
+ data.sample_flags |= PERF_SAMPLE_CALLCHAIN;
}
throttle = perf_event_overflow(event, &data, &regs);
@@ -739,17 +1179,6 @@ static __init int perf_ibs_pmu_init(struct perf_ibs *perf_ibs, char *name)
perf_ibs->pcpu = pcpu;
- /* register attributes */
- if (perf_ibs->format_attrs[0]) {
- memset(&perf_ibs->format_group, 0, sizeof(perf_ibs->format_group));
- perf_ibs->format_group.name = "format";
- perf_ibs->format_group.attrs = perf_ibs->format_attrs;
-
- memset(&perf_ibs->attr_groups, 0, sizeof(perf_ibs->attr_groups));
- perf_ibs->attr_groups[0] = &perf_ibs->format_group;
- perf_ibs->pmu.attr_groups = perf_ibs->attr_groups;
- }
-
ret = perf_pmu_register(&perf_ibs->pmu, name, -1);
if (ret) {
perf_ibs->pcpu = NULL;
@@ -759,10 +1188,8 @@ static __init int perf_ibs_pmu_init(struct perf_ibs *perf_ibs, char *name)
return ret;
}
-static __init void perf_event_ibs_init(void)
+static __init int perf_ibs_fetch_init(void)
{
- struct attribute **attr = ibs_op_format_attrs;
-
/*
* Some chips fail to reset the fetch count when it is written; instead
* they need a 0-1 transition of IbsFetchEn.
@@ -773,12 +1200,19 @@ static __init void perf_event_ibs_init(void)
if (boot_cpu_data.x86 == 0x19 && boot_cpu_data.x86_model < 0x10)
perf_ibs_fetch.fetch_ignore_if_zero_rip = 1;
- perf_ibs_pmu_init(&perf_ibs_fetch, "ibs_fetch");
+ if (ibs_caps & IBS_CAPS_ZEN4)
+ perf_ibs_fetch.config_mask |= IBS_FETCH_L3MISSONLY;
- if (ibs_caps & IBS_CAPS_OPCNT) {
+ perf_ibs_fetch.pmu.attr_groups = fetch_attr_groups;
+ perf_ibs_fetch.pmu.attr_update = fetch_attr_update;
+
+ return perf_ibs_pmu_init(&perf_ibs_fetch, "ibs_fetch");
+}
+
+static __init int perf_ibs_op_init(void)
+{
+ if (ibs_caps & IBS_CAPS_OPCNT)
perf_ibs_op.config_mask |= IBS_OP_CNT_CTL;
- *attr++ = &format_attr_cnt_ctl.attr;
- }
if (ibs_caps & IBS_CAPS_OPCNTEXT) {
perf_ibs_op.max_period |= IBS_OP_MAX_CNT_EXT_MASK;
@@ -786,15 +1220,52 @@ static __init void perf_event_ibs_init(void)
perf_ibs_op.cnt_mask |= IBS_OP_MAX_CNT_EXT_MASK;
}
- perf_ibs_pmu_init(&perf_ibs_op, "ibs_op");
+ if (ibs_caps & IBS_CAPS_ZEN4)
+ perf_ibs_op.config_mask |= IBS_OP_L3MISSONLY;
+
+ perf_ibs_op.pmu.attr_groups = empty_attr_groups;
+ perf_ibs_op.pmu.attr_update = op_attr_update;
+
+ return perf_ibs_pmu_init(&perf_ibs_op, "ibs_op");
+}
+
+static __init int perf_event_ibs_init(void)
+{
+ int ret;
+
+ ret = perf_ibs_fetch_init();
+ if (ret)
+ return ret;
+
+ ret = perf_ibs_op_init();
+ if (ret)
+ goto err_op;
+
+ ret = register_nmi_handler(NMI_LOCAL, perf_ibs_nmi_handler, 0, "perf_ibs");
+ if (ret)
+ goto err_nmi;
- register_nmi_handler(NMI_LOCAL, perf_ibs_nmi_handler, 0, "perf_ibs");
pr_info("perf: AMD IBS detected (0x%08x)\n", ibs_caps);
+ return 0;
+
+err_nmi:
+ perf_pmu_unregister(&perf_ibs_op.pmu);
+ free_percpu(perf_ibs_op.pcpu);
+ perf_ibs_op.pcpu = NULL;
+err_op:
+ perf_pmu_unregister(&perf_ibs_fetch.pmu);
+ free_percpu(perf_ibs_fetch.pcpu);
+ perf_ibs_fetch.pcpu = NULL;
+
+ return ret;
}
#else /* defined(CONFIG_PERF_EVENTS) && defined(CONFIG_CPU_SUP_AMD) */
-static __init void perf_event_ibs_init(void) { }
+static __init int perf_event_ibs_init(void)
+{
+ return 0;
+}
#endif
@@ -1064,9 +1535,7 @@ static __init int amd_ibs_init(void)
x86_pmu_amd_ibs_starting_cpu,
x86_pmu_amd_ibs_dying_cpu);
- perf_event_ibs_init();
-
- return 0;
+ return perf_event_ibs_init();
}
/* Since we need the pci subsystem to init ibs we can't do this earlier: */
diff --git a/arch/x86/events/amd/lbr.c b/arch/x86/events/amd/lbr.c
new file mode 100644
index 000000000000..38a75216c12c
--- /dev/null
+++ b/arch/x86/events/amd/lbr.c
@@ -0,0 +1,439 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <linux/perf_event.h>
+#include <asm/perf_event.h>
+
+#include "../perf_event.h"
+
+/* LBR Branch Select valid bits */
+#define LBR_SELECT_MASK 0x1ff
+
+/*
+ * LBR Branch Select filter bits which when set, ensures that the
+ * corresponding type of branches are not recorded
+ */
+#define LBR_SELECT_KERNEL 0 /* Branches ending in CPL = 0 */
+#define LBR_SELECT_USER 1 /* Branches ending in CPL > 0 */
+#define LBR_SELECT_JCC 2 /* Conditional branches */
+#define LBR_SELECT_CALL_NEAR_REL 3 /* Near relative calls */
+#define LBR_SELECT_CALL_NEAR_IND 4 /* Indirect relative calls */
+#define LBR_SELECT_RET_NEAR 5 /* Near returns */
+#define LBR_SELECT_JMP_NEAR_IND 6 /* Near indirect jumps (excl. calls and returns) */
+#define LBR_SELECT_JMP_NEAR_REL 7 /* Near relative jumps (excl. calls) */
+#define LBR_SELECT_FAR_BRANCH 8 /* Far branches */
+
+#define LBR_KERNEL BIT(LBR_SELECT_KERNEL)
+#define LBR_USER BIT(LBR_SELECT_USER)
+#define LBR_JCC BIT(LBR_SELECT_JCC)
+#define LBR_REL_CALL BIT(LBR_SELECT_CALL_NEAR_REL)
+#define LBR_IND_CALL BIT(LBR_SELECT_CALL_NEAR_IND)
+#define LBR_RETURN BIT(LBR_SELECT_RET_NEAR)
+#define LBR_REL_JMP BIT(LBR_SELECT_JMP_NEAR_REL)
+#define LBR_IND_JMP BIT(LBR_SELECT_JMP_NEAR_IND)
+#define LBR_FAR BIT(LBR_SELECT_FAR_BRANCH)
+#define LBR_NOT_SUPP -1 /* unsupported filter */
+#define LBR_IGNORE 0
+
+#define LBR_ANY \
+ (LBR_JCC | LBR_REL_CALL | LBR_IND_CALL | LBR_RETURN | \
+ LBR_REL_JMP | LBR_IND_JMP | LBR_FAR)
+
+struct branch_entry {
+ union {
+ struct {
+ u64 ip:58;
+ u64 ip_sign_ext:5;
+ u64 mispredict:1;
+ } split;
+ u64 full;
+ } from;
+
+ union {
+ struct {
+ u64 ip:58;
+ u64 ip_sign_ext:3;
+ u64 reserved:1;
+ u64 spec:1;
+ u64 valid:1;
+ } split;
+ u64 full;
+ } to;
+};
+
+static __always_inline void amd_pmu_lbr_set_from(unsigned int idx, u64 val)
+{
+ wrmsrl(MSR_AMD_SAMP_BR_FROM + idx * 2, val);
+}
+
+static __always_inline void amd_pmu_lbr_set_to(unsigned int idx, u64 val)
+{
+ wrmsrl(MSR_AMD_SAMP_BR_FROM + idx * 2 + 1, val);
+}
+
+static __always_inline u64 amd_pmu_lbr_get_from(unsigned int idx)
+{
+ u64 val;
+
+ rdmsrl(MSR_AMD_SAMP_BR_FROM + idx * 2, val);
+
+ return val;
+}
+
+static __always_inline u64 amd_pmu_lbr_get_to(unsigned int idx)
+{
+ u64 val;
+
+ rdmsrl(MSR_AMD_SAMP_BR_FROM + idx * 2 + 1, val);
+
+ return val;
+}
+
+static __always_inline u64 sign_ext_branch_ip(u64 ip)
+{
+ u32 shift = 64 - boot_cpu_data.x86_virt_bits;
+
+ return (u64)(((s64)ip << shift) >> shift);
+}
+
+static void amd_pmu_lbr_filter(void)
+{
+ struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
+ int br_sel = cpuc->br_sel, offset, type, i, j;
+ bool compress = false;
+ bool fused_only = false;
+ u64 from, to;
+
+ /* If sampling all branches, there is nothing to filter */
+ if (((br_sel & X86_BR_ALL) == X86_BR_ALL) &&
+ ((br_sel & X86_BR_TYPE_SAVE) != X86_BR_TYPE_SAVE))
+ fused_only = true;
+
+ for (i = 0; i < cpuc->lbr_stack.nr; i++) {
+ from = cpuc->lbr_entries[i].from;
+ to = cpuc->lbr_entries[i].to;
+ type = branch_type_fused(from, to, 0, &offset);
+
+ /*
+ * Adjust the branch from address in case of instruction
+ * fusion where it points to an instruction preceding the
+ * actual branch
+ */
+ if (offset) {
+ cpuc->lbr_entries[i].from += offset;
+ if (fused_only)
+ continue;
+ }
+
+ /* If type does not correspond, then discard */
+ if (type == X86_BR_NONE || (br_sel & type) != type) {
+ cpuc->lbr_entries[i].from = 0; /* mark invalid */
+ compress = true;
+ }
+
+ if ((br_sel & X86_BR_TYPE_SAVE) == X86_BR_TYPE_SAVE)
+ cpuc->lbr_entries[i].type = common_branch_type(type);
+ }
+
+ if (!compress)
+ return;
+
+ /* Remove all invalid entries */
+ for (i = 0; i < cpuc->lbr_stack.nr; ) {
+ if (!cpuc->lbr_entries[i].from) {
+ j = i;
+ while (++j < cpuc->lbr_stack.nr)
+ cpuc->lbr_entries[j - 1] = cpuc->lbr_entries[j];
+ cpuc->lbr_stack.nr--;
+ if (!cpuc->lbr_entries[i].from)
+ continue;
+ }
+ i++;
+ }
+}
+
+static const int lbr_spec_map[PERF_BR_SPEC_MAX] = {
+ PERF_BR_SPEC_NA,
+ PERF_BR_SPEC_WRONG_PATH,
+ PERF_BR_NON_SPEC_CORRECT_PATH,
+ PERF_BR_SPEC_CORRECT_PATH,
+};
+
+void amd_pmu_lbr_read(void)
+{
+ struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
+ struct perf_branch_entry *br = cpuc->lbr_entries;
+ struct branch_entry entry;
+ int out = 0, idx, i;
+
+ if (!cpuc->lbr_users)
+ return;
+
+ for (i = 0; i < x86_pmu.lbr_nr; i++) {
+ entry.from.full = amd_pmu_lbr_get_from(i);
+ entry.to.full = amd_pmu_lbr_get_to(i);
+
+ /*
+ * Check if a branch has been logged; if valid = 0, spec = 0
+ * then no branch was recorded
+ */
+ if (!entry.to.split.valid && !entry.to.split.spec)
+ continue;
+
+ perf_clear_branch_entry_bitfields(br + out);
+
+ br[out].from = sign_ext_branch_ip(entry.from.split.ip);
+ br[out].to = sign_ext_branch_ip(entry.to.split.ip);
+ br[out].mispred = entry.from.split.mispredict;
+ br[out].predicted = !br[out].mispred;
+
+ /*
+ * Set branch speculation information using the status of
+ * the valid and spec bits.
+ *
+ * When valid = 0, spec = 0, no branch was recorded and the
+ * entry is discarded as seen above.
+ *
+ * When valid = 0, spec = 1, the recorded branch was
+ * speculative but took the wrong path.
+ *
+ * When valid = 1, spec = 0, the recorded branch was
+ * non-speculative but took the correct path.
+ *
+ * When valid = 1, spec = 1, the recorded branch was
+ * speculative and took the correct path
+ */
+ idx = (entry.to.split.valid << 1) | entry.to.split.spec;
+ br[out].spec = lbr_spec_map[idx];
+ out++;
+ }
+
+ cpuc->lbr_stack.nr = out;
+
+ /*
+ * Internal register renaming always ensures that LBR From[0] and
+ * LBR To[0] always represent the TOS
+ */
+ cpuc->lbr_stack.hw_idx = 0;
+
+ /* Perform further software filtering */
+ amd_pmu_lbr_filter();
+}
+
+static const int lbr_select_map[PERF_SAMPLE_BRANCH_MAX_SHIFT] = {
+ [PERF_SAMPLE_BRANCH_USER_SHIFT] = LBR_USER,
+ [PERF_SAMPLE_BRANCH_KERNEL_SHIFT] = LBR_KERNEL,
+ [PERF_SAMPLE_BRANCH_HV_SHIFT] = LBR_IGNORE,
+
+ [PERF_SAMPLE_BRANCH_ANY_SHIFT] = LBR_ANY,
+ [PERF_SAMPLE_BRANCH_ANY_CALL_SHIFT] = LBR_REL_CALL | LBR_IND_CALL | LBR_FAR,
+ [PERF_SAMPLE_BRANCH_ANY_RETURN_SHIFT] = LBR_RETURN | LBR_FAR,
+ [PERF_SAMPLE_BRANCH_IND_CALL_SHIFT] = LBR_IND_CALL,
+ [PERF_SAMPLE_BRANCH_ABORT_TX_SHIFT] = LBR_NOT_SUPP,
+ [PERF_SAMPLE_BRANCH_IN_TX_SHIFT] = LBR_NOT_SUPP,
+ [PERF_SAMPLE_BRANCH_NO_TX_SHIFT] = LBR_NOT_SUPP,
+ [PERF_SAMPLE_BRANCH_COND_SHIFT] = LBR_JCC,
+
+ [PERF_SAMPLE_BRANCH_CALL_STACK_SHIFT] = LBR_NOT_SUPP,
+ [PERF_SAMPLE_BRANCH_IND_JUMP_SHIFT] = LBR_IND_JMP,
+ [PERF_SAMPLE_BRANCH_CALL_SHIFT] = LBR_REL_CALL,
+
+ [PERF_SAMPLE_BRANCH_NO_FLAGS_SHIFT] = LBR_NOT_SUPP,
+ [PERF_SAMPLE_BRANCH_NO_CYCLES_SHIFT] = LBR_NOT_SUPP,
+};
+
+static int amd_pmu_lbr_setup_filter(struct perf_event *event)
+{
+ struct hw_perf_event_extra *reg = &event->hw.branch_reg;
+ u64 br_type = event->attr.branch_sample_type;
+ u64 mask = 0, v;
+ int i;
+
+ /* No LBR support */
+ if (!x86_pmu.lbr_nr)
+ return -EOPNOTSUPP;
+
+ if (br_type & PERF_SAMPLE_BRANCH_USER)
+ mask |= X86_BR_USER;
+
+ if (br_type & PERF_SAMPLE_BRANCH_KERNEL)
+ mask |= X86_BR_KERNEL;
+
+ /* Ignore BRANCH_HV here */
+
+ if (br_type & PERF_SAMPLE_BRANCH_ANY)
+ mask |= X86_BR_ANY;
+
+ if (br_type & PERF_SAMPLE_BRANCH_ANY_CALL)
+ mask |= X86_BR_ANY_CALL;
+
+ if (br_type & PERF_SAMPLE_BRANCH_ANY_RETURN)
+ mask |= X86_BR_RET | X86_BR_IRET | X86_BR_SYSRET;
+
+ if (br_type & PERF_SAMPLE_BRANCH_IND_CALL)
+ mask |= X86_BR_IND_CALL;
+
+ if (br_type & PERF_SAMPLE_BRANCH_COND)
+ mask |= X86_BR_JCC;
+
+ if (br_type & PERF_SAMPLE_BRANCH_IND_JUMP)
+ mask |= X86_BR_IND_JMP;
+
+ if (br_type & PERF_SAMPLE_BRANCH_CALL)
+ mask |= X86_BR_CALL | X86_BR_ZERO_CALL;
+
+ if (br_type & PERF_SAMPLE_BRANCH_TYPE_SAVE)
+ mask |= X86_BR_TYPE_SAVE;
+
+ reg->reg = mask;
+ mask = 0;
+
+ for (i = 0; i < PERF_SAMPLE_BRANCH_MAX_SHIFT; i++) {
+ if (!(br_type & BIT_ULL(i)))
+ continue;
+
+ v = lbr_select_map[i];
+ if (v == LBR_NOT_SUPP)
+ return -EOPNOTSUPP;
+
+ if (v != LBR_IGNORE)
+ mask |= v;
+ }
+
+ /* Filter bits operate in suppress mode */
+ reg->config = mask ^ LBR_SELECT_MASK;
+
+ return 0;
+}
+
+int amd_pmu_lbr_hw_config(struct perf_event *event)
+{
+ int ret = 0;
+
+ /* LBR is not recommended in counting mode */
+ if (!is_sampling_event(event))
+ return -EINVAL;
+
+ ret = amd_pmu_lbr_setup_filter(event);
+ if (!ret)
+ event->attach_state |= PERF_ATTACH_SCHED_CB;
+
+ return ret;
+}
+
+void amd_pmu_lbr_reset(void)
+{
+ struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
+ int i;
+
+ if (!x86_pmu.lbr_nr)
+ return;
+
+ /* Reset all branch records individually */
+ for (i = 0; i < x86_pmu.lbr_nr; i++) {
+ amd_pmu_lbr_set_from(i, 0);
+ amd_pmu_lbr_set_to(i, 0);
+ }
+
+ cpuc->last_task_ctx = NULL;
+ cpuc->last_log_id = 0;
+ wrmsrl(MSR_AMD64_LBR_SELECT, 0);
+}
+
+void amd_pmu_lbr_add(struct perf_event *event)
+{
+ struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
+ struct hw_perf_event_extra *reg = &event->hw.branch_reg;
+
+ if (!x86_pmu.lbr_nr)
+ return;
+
+ if (has_branch_stack(event)) {
+ cpuc->lbr_select = 1;
+ cpuc->lbr_sel->config = reg->config;
+ cpuc->br_sel = reg->reg;
+ }
+
+ perf_sched_cb_inc(event->ctx->pmu);
+
+ if (!cpuc->lbr_users++ && !event->total_time_running)
+ amd_pmu_lbr_reset();
+}
+
+void amd_pmu_lbr_del(struct perf_event *event)
+{
+ struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
+
+ if (!x86_pmu.lbr_nr)
+ return;
+
+ if (has_branch_stack(event))
+ cpuc->lbr_select = 0;
+
+ cpuc->lbr_users--;
+ WARN_ON_ONCE(cpuc->lbr_users < 0);
+ perf_sched_cb_dec(event->ctx->pmu);
+}
+
+void amd_pmu_lbr_sched_task(struct perf_event_context *ctx, bool sched_in)
+{
+ struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
+
+ /*
+ * A context switch can flip the address space and LBR entries are
+ * not tagged with an identifier. Hence, branches cannot be resolved
+ * from the old address space and the LBR records should be wiped.
+ */
+ if (cpuc->lbr_users && sched_in)
+ amd_pmu_lbr_reset();
+}
+
+void amd_pmu_lbr_enable_all(void)
+{
+ struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
+ u64 lbr_select, dbg_ctl, dbg_extn_cfg;
+
+ if (!cpuc->lbr_users || !x86_pmu.lbr_nr)
+ return;
+
+ /* Set hardware branch filter */
+ if (cpuc->lbr_select) {
+ lbr_select = cpuc->lbr_sel->config & LBR_SELECT_MASK;
+ wrmsrl(MSR_AMD64_LBR_SELECT, lbr_select);
+ }
+
+ rdmsrl(MSR_IA32_DEBUGCTLMSR, dbg_ctl);
+ rdmsrl(MSR_AMD_DBG_EXTN_CFG, dbg_extn_cfg);
+
+ wrmsrl(MSR_IA32_DEBUGCTLMSR, dbg_ctl | DEBUGCTLMSR_FREEZE_LBRS_ON_PMI);
+ wrmsrl(MSR_AMD_DBG_EXTN_CFG, dbg_extn_cfg | DBG_EXTN_CFG_LBRV2EN);
+}
+
+void amd_pmu_lbr_disable_all(void)
+{
+ struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
+ u64 dbg_ctl, dbg_extn_cfg;
+
+ if (!cpuc->lbr_users || !x86_pmu.lbr_nr)
+ return;
+
+ rdmsrl(MSR_AMD_DBG_EXTN_CFG, dbg_extn_cfg);
+ rdmsrl(MSR_IA32_DEBUGCTLMSR, dbg_ctl);
+
+ wrmsrl(MSR_AMD_DBG_EXTN_CFG, dbg_extn_cfg & ~DBG_EXTN_CFG_LBRV2EN);
+ wrmsrl(MSR_IA32_DEBUGCTLMSR, dbg_ctl & ~DEBUGCTLMSR_FREEZE_LBRS_ON_PMI);
+}
+
+__init int amd_pmu_lbr_init(void)
+{
+ union cpuid_0x80000022_ebx ebx;
+
+ if (x86_pmu.version < 2 || !boot_cpu_has(X86_FEATURE_AMD_LBR_V2))
+ return -EOPNOTSUPP;
+
+ /* Set number of entries */
+ ebx.full = cpuid_ebx(EXT_PERFMON_DEBUG_FEATURES);
+ x86_pmu.lbr_nr = ebx.split.lbr_v2_stack_sz;
+
+ pr_cont("%d-deep LBR, ", x86_pmu.lbr_nr);
+
+ return 0;
+}
diff --git a/arch/x86/events/amd/uncore.c b/arch/x86/events/amd/uncore.c
index 0d04414b97d2..d568afc705d2 100644
--- a/arch/x86/events/amd/uncore.c
+++ b/arch/x86/events/amd/uncore.c
@@ -21,7 +21,6 @@
#define NUM_COUNTERS_NB 4
#define NUM_COUNTERS_L2 4
#define NUM_COUNTERS_L3 6
-#define MAX_COUNTERS 6
#define RDPMC_BASE_NB 6
#define RDPMC_BASE_LLC 10
@@ -31,6 +30,7 @@
#undef pr_fmt
#define pr_fmt(fmt) "amd_uncore: " fmt
+static int pmu_version;
static int num_counters_llc;
static int num_counters_nb;
static bool l3_mask;
@@ -46,7 +46,7 @@ struct amd_uncore {
u32 msr_base;
cpumask_t *active_mask;
struct pmu *pmu;
- struct perf_event *events[MAX_COUNTERS];
+ struct perf_event **events;
struct hlist_node node;
};
@@ -158,6 +158,16 @@ out:
hwc->event_base_rdpmc = uncore->rdpmc_base + hwc->idx;
hwc->state = PERF_HES_UPTODATE | PERF_HES_STOPPED;
+ /*
+ * The first four DF counters are accessible via RDPMC index 6 to 9
+ * followed by the L3 counters from index 10 to 15. For processors
+ * with more than four DF counters, the DF RDPMC assignments become
+ * discontiguous as the additional counters are accessible starting
+ * from index 16.
+ */
+ if (is_nb_event(event) && hwc->idx >= NUM_COUNTERS_NB)
+ hwc->event_base_rdpmc += NUM_COUNTERS_L3;
+
if (flags & PERF_EF_START)
amd_uncore_start(event, PERF_EF_RELOAD);
@@ -209,10 +219,14 @@ static int amd_uncore_event_init(struct perf_event *event)
{
struct amd_uncore *uncore;
struct hw_perf_event *hwc = &event->hw;
+ u64 event_mask = AMD64_RAW_EVENT_MASK_NB;
if (event->attr.type != event->pmu->type)
return -ENOENT;
+ if (pmu_version >= 2 && is_nb_event(event))
+ event_mask = AMD64_PERFMON_V2_RAW_EVENT_MASK_NB;
+
/*
* NB and Last level cache counters (MSRs) are shared across all cores
* that share the same NB / Last level cache. On family 16h and below,
@@ -221,7 +235,7 @@ static int amd_uncore_event_init(struct perf_event *event)
* out. So we do not support sampling and per-thread events via
* CAP_NO_INTERRUPT, and we do not enable counter overflow interrupts:
*/
- hwc->config = event->attr.config & AMD64_RAW_EVENT_MASK_NB;
+ hwc->config = event->attr.config & event_mask;
hwc->idx = -1;
if (event->cpu < 0)
@@ -247,6 +261,19 @@ static int amd_uncore_event_init(struct perf_event *event)
return 0;
}
+static umode_t
+amd_f17h_uncore_is_visible(struct kobject *kobj, struct attribute *attr, int i)
+{
+ return boot_cpu_data.x86 >= 0x17 && boot_cpu_data.x86 < 0x19 ?
+ attr->mode : 0;
+}
+
+static umode_t
+amd_f19h_uncore_is_visible(struct kobject *kobj, struct attribute *attr, int i)
+{
+ return boot_cpu_data.x86 >= 0x19 ? attr->mode : 0;
+}
+
static ssize_t amd_uncore_attr_show_cpumask(struct device *dev,
struct device_attribute *attr,
char *buf)
@@ -287,8 +314,10 @@ static struct device_attribute format_attr_##_var = \
DEFINE_UNCORE_FORMAT_ATTR(event12, event, "config:0-7,32-35");
DEFINE_UNCORE_FORMAT_ATTR(event14, event, "config:0-7,32-35,59-60"); /* F17h+ DF */
+DEFINE_UNCORE_FORMAT_ATTR(event14v2, event, "config:0-7,32-37"); /* PerfMonV2 DF */
DEFINE_UNCORE_FORMAT_ATTR(event8, event, "config:0-7"); /* F17h+ L3 */
-DEFINE_UNCORE_FORMAT_ATTR(umask, umask, "config:8-15");
+DEFINE_UNCORE_FORMAT_ATTR(umask8, umask, "config:8-15");
+DEFINE_UNCORE_FORMAT_ATTR(umask12, umask, "config:8-15,24-27"); /* PerfMonV2 DF */
DEFINE_UNCORE_FORMAT_ATTR(coreid, coreid, "config:42-44"); /* F19h L3 */
DEFINE_UNCORE_FORMAT_ATTR(slicemask, slicemask, "config:48-51"); /* F17h L3 */
DEFINE_UNCORE_FORMAT_ATTR(threadmask8, threadmask, "config:56-63"); /* F17h L3 */
@@ -297,20 +326,33 @@ DEFINE_UNCORE_FORMAT_ATTR(enallslices, enallslices, "config:46"); /* F19h L3
DEFINE_UNCORE_FORMAT_ATTR(enallcores, enallcores, "config:47"); /* F19h L3 */
DEFINE_UNCORE_FORMAT_ATTR(sliceid, sliceid, "config:48-50"); /* F19h L3 */
+/* Common DF and NB attributes */
static struct attribute *amd_uncore_df_format_attr[] = {
- &format_attr_event12.attr, /* event14 if F17h+ */
- &format_attr_umask.attr,
+ &format_attr_event12.attr, /* event */
+ &format_attr_umask8.attr, /* umask */
NULL,
};
+/* Common L2 and L3 attributes */
static struct attribute *amd_uncore_l3_format_attr[] = {
- &format_attr_event12.attr, /* event8 if F17h+ */
- &format_attr_umask.attr,
- NULL, /* slicemask if F17h, coreid if F19h */
- NULL, /* threadmask8 if F17h, enallslices if F19h */
- NULL, /* enallcores if F19h */
- NULL, /* sliceid if F19h */
- NULL, /* threadmask2 if F19h */
+ &format_attr_event12.attr, /* event */
+ &format_attr_umask8.attr, /* umask */
+ NULL, /* threadmask */
+ NULL,
+};
+
+/* F17h unique L3 attributes */
+static struct attribute *amd_f17h_uncore_l3_format_attr[] = {
+ &format_attr_slicemask.attr, /* slicemask */
+ NULL,
+};
+
+/* F19h unique L3 attributes */
+static struct attribute *amd_f19h_uncore_l3_format_attr[] = {
+ &format_attr_coreid.attr, /* coreid */
+ &format_attr_enallslices.attr, /* enallslices */
+ &format_attr_enallcores.attr, /* enallcores */
+ &format_attr_sliceid.attr, /* sliceid */
NULL,
};
@@ -324,6 +366,18 @@ static struct attribute_group amd_uncore_l3_format_group = {
.attrs = amd_uncore_l3_format_attr,
};
+static struct attribute_group amd_f17h_uncore_l3_format_group = {
+ .name = "format",
+ .attrs = amd_f17h_uncore_l3_format_attr,
+ .is_visible = amd_f17h_uncore_is_visible,
+};
+
+static struct attribute_group amd_f19h_uncore_l3_format_group = {
+ .name = "format",
+ .attrs = amd_f19h_uncore_l3_format_attr,
+ .is_visible = amd_f19h_uncore_is_visible,
+};
+
static const struct attribute_group *amd_uncore_df_attr_groups[] = {
&amd_uncore_attr_group,
&amd_uncore_df_format_group,
@@ -336,6 +390,12 @@ static const struct attribute_group *amd_uncore_l3_attr_groups[] = {
NULL,
};
+static const struct attribute_group *amd_uncore_l3_attr_update[] = {
+ &amd_f17h_uncore_l3_format_group,
+ &amd_f19h_uncore_l3_format_group,
+ NULL,
+};
+
static struct pmu amd_nb_pmu = {
.task_ctx_nr = perf_invalid_context,
.attr_groups = amd_uncore_df_attr_groups,
@@ -353,6 +413,7 @@ static struct pmu amd_nb_pmu = {
static struct pmu amd_llc_pmu = {
.task_ctx_nr = perf_invalid_context,
.attr_groups = amd_uncore_l3_attr_groups,
+ .attr_update = amd_uncore_l3_attr_update,
.name = "amd_l2",
.event_init = amd_uncore_event_init,
.add = amd_uncore_add,
@@ -370,11 +431,19 @@ static struct amd_uncore *amd_uncore_alloc(unsigned int cpu)
cpu_to_node(cpu));
}
+static inline struct perf_event **
+amd_uncore_events_alloc(unsigned int num, unsigned int cpu)
+{
+ return kzalloc_node(sizeof(struct perf_event *) * num, GFP_KERNEL,
+ cpu_to_node(cpu));
+}
+
static int amd_uncore_cpu_up_prepare(unsigned int cpu)
{
- struct amd_uncore *uncore_nb = NULL, *uncore_llc;
+ struct amd_uncore *uncore_nb = NULL, *uncore_llc = NULL;
if (amd_uncore_nb) {
+ *per_cpu_ptr(amd_uncore_nb, cpu) = NULL;
uncore_nb = amd_uncore_alloc(cpu);
if (!uncore_nb)
goto fail;
@@ -384,11 +453,15 @@ static int amd_uncore_cpu_up_prepare(unsigned int cpu)
uncore_nb->msr_base = MSR_F15H_NB_PERF_CTL;
uncore_nb->active_mask = &amd_nb_active_mask;
uncore_nb->pmu = &amd_nb_pmu;
+ uncore_nb->events = amd_uncore_events_alloc(num_counters_nb, cpu);
+ if (!uncore_nb->events)
+ goto fail;
uncore_nb->id = -1;
*per_cpu_ptr(amd_uncore_nb, cpu) = uncore_nb;
}
if (amd_uncore_llc) {
+ *per_cpu_ptr(amd_uncore_llc, cpu) = NULL;
uncore_llc = amd_uncore_alloc(cpu);
if (!uncore_llc)
goto fail;
@@ -398,6 +471,9 @@ static int amd_uncore_cpu_up_prepare(unsigned int cpu)
uncore_llc->msr_base = MSR_F16H_L2I_PERF_CTL;
uncore_llc->active_mask = &amd_llc_active_mask;
uncore_llc->pmu = &amd_llc_pmu;
+ uncore_llc->events = amd_uncore_events_alloc(num_counters_llc, cpu);
+ if (!uncore_llc->events)
+ goto fail;
uncore_llc->id = -1;
*per_cpu_ptr(amd_uncore_llc, cpu) = uncore_llc;
}
@@ -405,9 +481,16 @@ static int amd_uncore_cpu_up_prepare(unsigned int cpu)
return 0;
fail:
- if (amd_uncore_nb)
- *per_cpu_ptr(amd_uncore_nb, cpu) = NULL;
- kfree(uncore_nb);
+ if (uncore_nb) {
+ kfree(uncore_nb->events);
+ kfree(uncore_nb);
+ }
+
+ if (uncore_llc) {
+ kfree(uncore_llc->events);
+ kfree(uncore_llc);
+ }
+
return -ENOMEM;
}
@@ -540,8 +623,11 @@ static void uncore_dead(unsigned int cpu, struct amd_uncore * __percpu *uncores)
if (cpu == uncore->cpu)
cpumask_clear_cpu(cpu, uncore->active_mask);
- if (!--uncore->refcnt)
+ if (!--uncore->refcnt) {
+ kfree(uncore->events);
kfree(uncore);
+ }
+
*per_cpu_ptr(uncores, cpu) = NULL;
}
@@ -560,6 +646,7 @@ static int __init amd_uncore_init(void)
{
struct attribute **df_attr = amd_uncore_df_format_attr;
struct attribute **l3_attr = amd_uncore_l3_format_attr;
+ union cpuid_0x80000022_ebx ebx;
int ret = -ENODEV;
if (boot_cpu_data.x86_vendor != X86_VENDOR_AMD &&
@@ -569,6 +656,9 @@ static int __init amd_uncore_init(void)
if (!boot_cpu_has(X86_FEATURE_TOPOEXT))
return -ENODEV;
+ if (boot_cpu_has(X86_FEATURE_PERFMON_V2))
+ pmu_version = 2;
+
num_counters_nb = NUM_COUNTERS_NB;
num_counters_llc = NUM_COUNTERS_L2;
if (boot_cpu_data.x86 >= 0x17) {
@@ -585,8 +675,12 @@ static int __init amd_uncore_init(void)
}
if (boot_cpu_has(X86_FEATURE_PERFCTR_NB)) {
- if (boot_cpu_data.x86 >= 0x17)
+ if (pmu_version >= 2) {
+ *df_attr++ = &format_attr_event14v2.attr;
+ *df_attr++ = &format_attr_umask12.attr;
+ } else if (boot_cpu_data.x86 >= 0x17) {
*df_attr = &format_attr_event14.attr;
+ }
amd_uncore_nb = alloc_percpu(struct amd_uncore *);
if (!amd_uncore_nb) {
@@ -597,6 +691,11 @@ static int __init amd_uncore_init(void)
if (ret)
goto fail_nb;
+ if (pmu_version >= 2) {
+ ebx.full = cpuid_ebx(EXT_PERFMON_DEBUG_FEATURES);
+ num_counters_nb = ebx.split.num_df_pmc;
+ }
+
pr_info("%d %s %s counters detected\n", num_counters_nb,
boot_cpu_data.x86_vendor == X86_VENDOR_HYGON ? "HYGON" : "",
amd_nb_pmu.name);
@@ -607,16 +706,11 @@ static int __init amd_uncore_init(void)
if (boot_cpu_has(X86_FEATURE_PERFCTR_LLC)) {
if (boot_cpu_data.x86 >= 0x19) {
*l3_attr++ = &format_attr_event8.attr;
- *l3_attr++ = &format_attr_umask.attr;
- *l3_attr++ = &format_attr_coreid.attr;
- *l3_attr++ = &format_attr_enallslices.attr;
- *l3_attr++ = &format_attr_enallcores.attr;
- *l3_attr++ = &format_attr_sliceid.attr;
+ *l3_attr++ = &format_attr_umask8.attr;
*l3_attr++ = &format_attr_threadmask2.attr;
} else if (boot_cpu_data.x86 >= 0x17) {
*l3_attr++ = &format_attr_event8.attr;
- *l3_attr++ = &format_attr_umask.attr;
- *l3_attr++ = &format_attr_slicemask.attr;
+ *l3_attr++ = &format_attr_umask8.attr;
*l3_attr++ = &format_attr_threadmask8.attr;
}
diff --git a/arch/x86/events/core.c b/arch/x86/events/core.c
index 68dea7ce6a22..b30b8bbcd1e2 100644
--- a/arch/x86/events/core.c
+++ b/arch/x86/events/core.c
@@ -72,6 +72,10 @@ DEFINE_STATIC_CALL_NULL(x86_pmu_add, *x86_pmu.add);
DEFINE_STATIC_CALL_NULL(x86_pmu_del, *x86_pmu.del);
DEFINE_STATIC_CALL_NULL(x86_pmu_read, *x86_pmu.read);
+DEFINE_STATIC_CALL_NULL(x86_pmu_set_period, *x86_pmu.set_period);
+DEFINE_STATIC_CALL_NULL(x86_pmu_update, *x86_pmu.update);
+DEFINE_STATIC_CALL_NULL(x86_pmu_limit_period, *x86_pmu.limit_period);
+
DEFINE_STATIC_CALL_NULL(x86_pmu_schedule_events, *x86_pmu.schedule_events);
DEFINE_STATIC_CALL_NULL(x86_pmu_get_event_constraints, *x86_pmu.get_event_constraints);
DEFINE_STATIC_CALL_NULL(x86_pmu_put_event_constraints, *x86_pmu.put_event_constraints);
@@ -116,9 +120,6 @@ u64 x86_perf_event_update(struct perf_event *event)
if (unlikely(!hwc->event_base))
return 0;
- if (unlikely(is_topdown_count(event)) && x86_pmu.update_topdown_event)
- return x86_pmu.update_topdown_event(event);
-
/*
* Careful: an NMI might modify the previous event value.
*
@@ -621,8 +622,9 @@ int x86_pmu_hw_config(struct perf_event *event)
event->hw.config |= event->attr.config & X86_RAW_EVENT_MASK;
if (event->attr.sample_period && x86_pmu.limit_period) {
- if (x86_pmu.limit_period(event, event->attr.sample_period) >
- event->attr.sample_period)
+ s64 left = event->attr.sample_period;
+ x86_pmu.limit_period(event, &left);
+ if (left > event->attr.sample_period)
return -EINVAL;
}
@@ -693,9 +695,9 @@ void x86_pmu_disable_all(void)
}
}
-struct perf_guest_switch_msr *perf_guest_get_msrs(int *nr)
+struct perf_guest_switch_msr *perf_guest_get_msrs(int *nr, void *data)
{
- return static_call(x86_pmu_guest_get_msrs)(nr);
+ return static_call(x86_pmu_guest_get_msrs)(nr, data);
}
EXPORT_SYMBOL_GPL(perf_guest_get_msrs);
@@ -1338,6 +1340,10 @@ static void x86_pmu_enable(struct pmu *pmu)
if (hwc->state & PERF_HES_ARCH)
continue;
+ /*
+ * if cpuc->enabled = 0, then no wrmsr as
+ * per x86_pmu_enable_event()
+ */
x86_pmu_start(event, PERF_EF_RELOAD);
}
cpuc->n_added = 0;
@@ -1350,7 +1356,7 @@ static void x86_pmu_enable(struct pmu *pmu)
static_call(x86_pmu_enable_all)(added);
}
-static DEFINE_PER_CPU(u64 [X86_PMC_IDX_MAX], pmc_prev_left);
+DEFINE_PER_CPU(u64 [X86_PMC_IDX_MAX], pmc_prev_left);
/*
* Set the next IRQ period, based on the hwc->period_left value.
@@ -1366,10 +1372,6 @@ int x86_perf_event_set_period(struct perf_event *event)
if (unlikely(!hwc->event_base))
return 0;
- if (unlikely(is_topdown_count(event)) &&
- x86_pmu.set_topdown_event_period)
- return x86_pmu.set_topdown_event_period(event);
-
/*
* If we are way outside a reasonable range then just skip forward:
*/
@@ -1395,10 +1397,9 @@ int x86_perf_event_set_period(struct perf_event *event)
if (left > x86_pmu.max_period)
left = x86_pmu.max_period;
- if (x86_pmu.limit_period)
- left = x86_pmu.limit_period(event, left);
+ static_call_cond(x86_pmu_limit_period)(event, &left);
- per_cpu(pmc_prev_left[idx], smp_processor_id()) = left;
+ this_cpu_write(pmc_prev_left[idx], left);
/*
* The hw event starts counting from this event offset,
@@ -1415,16 +1416,6 @@ int x86_perf_event_set_period(struct perf_event *event)
if (is_counter_pair(hwc))
wrmsrl(x86_pmu_event_addr(idx + 1), 0xffff);
- /*
- * Due to erratum on certan cpu we need
- * a second write to be sure the register
- * is updated properly
- */
- if (x86_pmu.perfctr_second_write) {
- wrmsrl(hwc->event_base,
- (u64)(-left) & x86_pmu.cntval_mask);
- }
-
perf_event_update_userpage(event);
return ret;
@@ -1514,7 +1505,7 @@ static void x86_pmu_start(struct perf_event *event, int flags)
if (flags & PERF_EF_RELOAD) {
WARN_ON_ONCE(!(event->hw.state & PERF_HES_UPTODATE));
- x86_perf_event_set_period(event);
+ static_call(x86_pmu_set_period)(event);
}
event->hw.state = 0;
@@ -1606,7 +1597,7 @@ void x86_pmu_stop(struct perf_event *event, int flags)
* Drain the remaining delta count out of a event
* that we are disabling:
*/
- x86_perf_event_update(event);
+ static_call(x86_pmu_update)(event);
hwc->state |= PERF_HES_UPTODATE;
}
}
@@ -1696,7 +1687,7 @@ int x86_pmu_handle_irq(struct pt_regs *regs)
event = cpuc->events[idx];
- val = x86_perf_event_update(event);
+ val = static_call(x86_pmu_update)(event);
if (val & (1ULL << (x86_pmu.cntval_bits - 1)))
continue;
@@ -1704,11 +1695,17 @@ int x86_pmu_handle_irq(struct pt_regs *regs)
* event overflow
*/
handled++;
- perf_sample_data_init(&data, 0, event->hw.last_period);
- if (!x86_perf_event_set_period(event))
+ if (!static_call(x86_pmu_set_period)(event))
continue;
+ perf_sample_data_init(&data, 0, event->hw.last_period);
+
+ if (has_branch_stack(event)) {
+ data.br_stack = &cpuc->lbr_stack;
+ data.sample_flags |= PERF_SAMPLE_BRANCH_STACK;
+ }
+
if (perf_event_overflow(event, &data, regs))
x86_pmu_stop(event, 0);
}
@@ -1837,7 +1834,7 @@ ssize_t events_sysfs_show(struct device *dev, struct device_attribute *attr, cha
/* string trumps id */
if (pmu_attr->event_str)
- return sprintf(page, "%s", pmu_attr->event_str);
+ return sprintf(page, "%s\n", pmu_attr->event_str);
return x86_pmu.events_sysfs_show(page, config);
}
@@ -2015,6 +2012,10 @@ static void x86_pmu_static_call_update(void)
static_call_update(x86_pmu_del, x86_pmu.del);
static_call_update(x86_pmu_read, x86_pmu.read);
+ static_call_update(x86_pmu_set_period, x86_pmu.set_period);
+ static_call_update(x86_pmu_update, x86_pmu.update);
+ static_call_update(x86_pmu_limit_period, x86_pmu.limit_period);
+
static_call_update(x86_pmu_schedule_events, x86_pmu.schedule_events);
static_call_update(x86_pmu_get_event_constraints, x86_pmu.get_event_constraints);
static_call_update(x86_pmu_put_event_constraints, x86_pmu.put_event_constraints);
@@ -2034,7 +2035,7 @@ static void x86_pmu_static_call_update(void)
static void _x86_pmu_read(struct perf_event *event)
{
- x86_perf_event_update(event);
+ static_call(x86_pmu_update)(event);
}
void x86_pmu_show_pmu_cap(int num_counters, int num_counters_fixed,
@@ -2095,14 +2096,15 @@ static int __init init_hw_perf_events(void)
}
if (err != 0) {
pr_cont("no PMU driver, software events only.\n");
- return 0;
+ err = 0;
+ goto out_bad_pmu;
}
pmu_check_apic();
/* sanity check that the hardware exists or is emulated */
if (!check_hw_exists(&pmu, x86_pmu.num_counters, x86_pmu.num_counters_fixed))
- return 0;
+ goto out_bad_pmu;
pr_cont("%s PMU driver.\n", x86_pmu.name);
@@ -2140,6 +2142,12 @@ static int __init init_hw_perf_events(void)
if (!x86_pmu.guest_get_msrs)
x86_pmu.guest_get_msrs = (void *)&__static_call_return0;
+ if (!x86_pmu.set_period)
+ x86_pmu.set_period = x86_perf_event_set_period;
+
+ if (!x86_pmu.update)
+ x86_pmu.update = x86_perf_event_update;
+
x86_pmu_static_call_update();
/*
@@ -2211,6 +2219,8 @@ out1:
cpuhp_remove_state(CPUHP_AP_PERF_X86_STARTING);
out:
cpuhp_remove_state(CPUHP_PERF_X86_PREPARE);
+out_bad_pmu:
+ memset(&x86_pmu, 0, sizeof(x86_pmu));
return err;
}
early_initcall(init_hw_perf_events);
@@ -2659,7 +2669,9 @@ static int x86_pmu_check_period(struct perf_event *event, u64 value)
return -EINVAL;
if (value && x86_pmu.limit_period) {
- if (x86_pmu.limit_period(event, value) > value)
+ s64 left = value;
+ x86_pmu.limit_period(event, &left);
+ if (left > value)
return -EINVAL;
}
@@ -2771,7 +2783,7 @@ perf_callchain_kernel(struct perf_callchain_entry_ctx *entry, struct pt_regs *re
struct unwind_state state;
unsigned long addr;
- if (perf_guest_cbs && perf_guest_cbs->is_in_guest()) {
+ if (perf_guest_state()) {
/* TODO: We don't support guest os callchain now */
return;
}
@@ -2794,7 +2806,7 @@ perf_callchain_kernel(struct perf_callchain_entry_ctx *entry, struct pt_regs *re
static inline int
valid_user_frame(const void __user *fp, unsigned long size)
{
- return (__range_not_ok(fp, size, TASK_SIZE) == 0);
+ return __access_ok(fp, size);
}
static unsigned long get_segment_base(unsigned int segment)
@@ -2874,7 +2886,7 @@ perf_callchain_user(struct perf_callchain_entry_ctx *entry, struct pt_regs *regs
struct stack_frame frame;
const struct stack_frame __user *fp;
- if (perf_guest_cbs && perf_guest_cbs->is_in_guest()) {
+ if (perf_guest_state()) {
/* TODO: We don't support guest os callchain now */
return;
}
@@ -2951,18 +2963,19 @@ static unsigned long code_segment_base(struct pt_regs *regs)
unsigned long perf_instruction_pointer(struct pt_regs *regs)
{
- if (perf_guest_cbs && perf_guest_cbs->is_in_guest())
- return perf_guest_cbs->get_guest_ip();
+ if (perf_guest_state())
+ return perf_guest_get_ip();
return regs->ip + code_segment_base(regs);
}
unsigned long perf_misc_flags(struct pt_regs *regs)
{
+ unsigned int guest_state = perf_guest_state();
int misc = 0;
- if (perf_guest_cbs && perf_guest_cbs->is_in_guest()) {
- if (perf_guest_cbs->is_user_mode())
+ if (guest_state) {
+ if (guest_state & PERF_GUEST_USER)
misc |= PERF_RECORD_MISC_GUEST_USER;
else
misc |= PERF_RECORD_MISC_GUEST_KERNEL;
@@ -2981,6 +2994,11 @@ unsigned long perf_misc_flags(struct pt_regs *regs)
void perf_get_x86_pmu_capability(struct x86_pmu_capability *cap)
{
+ if (!x86_pmu_initialized()) {
+ memset(cap, 0, sizeof(*cap));
+ return;
+ }
+
cap->version = x86_pmu.version;
/*
* KVM doesn't support the hybrid PMU yet.
@@ -2993,5 +3011,17 @@ void perf_get_x86_pmu_capability(struct x86_pmu_capability *cap)
cap->bit_width_fixed = x86_pmu.cntval_bits;
cap->events_mask = (unsigned int)x86_pmu.events_maskl;
cap->events_mask_len = x86_pmu.events_mask_len;
+ cap->pebs_ept = x86_pmu.pebs_ept;
}
EXPORT_SYMBOL_GPL(perf_get_x86_pmu_capability);
+
+u64 perf_get_hw_event_config(int hw_event)
+{
+ int max = x86_pmu.max_events;
+
+ if (hw_event < max)
+ return x86_pmu.event_map(array_index_nospec(hw_event, max));
+
+ return 0;
+}
+EXPORT_SYMBOL_GPL(perf_get_hw_event_config);
diff --git a/arch/x86/events/intel/core.c b/arch/x86/events/intel/core.c
index ec6444f2c9dc..1b92bf05fd65 100644
--- a/arch/x86/events/intel/core.c
+++ b/arch/x86/events/intel/core.c
@@ -14,6 +14,7 @@
#include <linux/slab.h>
#include <linux/export.h>
#include <linux/nmi.h>
+#include <linux/kvm_host.h>
#include <asm/cpufeature.h>
#include <asm/hardirq.h>
@@ -181,6 +182,27 @@ static struct event_constraint intel_gen_event_constraints[] __read_mostly =
EVENT_CONSTRAINT_END
};
+static struct event_constraint intel_v5_gen_event_constraints[] __read_mostly =
+{
+ FIXED_EVENT_CONSTRAINT(0x00c0, 0), /* INST_RETIRED.ANY */
+ FIXED_EVENT_CONSTRAINT(0x003c, 1), /* CPU_CLK_UNHALTED.CORE */
+ FIXED_EVENT_CONSTRAINT(0x0300, 2), /* CPU_CLK_UNHALTED.REF */
+ FIXED_EVENT_CONSTRAINT(0x0400, 3), /* SLOTS */
+ FIXED_EVENT_CONSTRAINT(0x0500, 4),
+ FIXED_EVENT_CONSTRAINT(0x0600, 5),
+ FIXED_EVENT_CONSTRAINT(0x0700, 6),
+ FIXED_EVENT_CONSTRAINT(0x0800, 7),
+ FIXED_EVENT_CONSTRAINT(0x0900, 8),
+ FIXED_EVENT_CONSTRAINT(0x0a00, 9),
+ FIXED_EVENT_CONSTRAINT(0x0b00, 10),
+ FIXED_EVENT_CONSTRAINT(0x0c00, 11),
+ FIXED_EVENT_CONSTRAINT(0x0d00, 12),
+ FIXED_EVENT_CONSTRAINT(0x0e00, 13),
+ FIXED_EVENT_CONSTRAINT(0x0f00, 14),
+ FIXED_EVENT_CONSTRAINT(0x1000, 15),
+ EVENT_CONSTRAINT_END
+};
+
static struct event_constraint intel_slm_event_constraints[] __read_mostly =
{
FIXED_EVENT_CONSTRAINT(0x00c0, 0), /* INST_RETIRED.ANY */
@@ -255,7 +277,7 @@ static struct event_constraint intel_icl_event_constraints[] = {
INTEL_EVENT_CONSTRAINT_RANGE(0x03, 0x0a, 0xf),
INTEL_EVENT_CONSTRAINT_RANGE(0x1f, 0x28, 0xf),
INTEL_EVENT_CONSTRAINT(0x32, 0xf), /* SW_PREFETCH_ACCESS.* */
- INTEL_EVENT_CONSTRAINT_RANGE(0x48, 0x54, 0xf),
+ INTEL_EVENT_CONSTRAINT_RANGE(0x48, 0x56, 0xf),
INTEL_EVENT_CONSTRAINT_RANGE(0x60, 0x8b, 0xf),
INTEL_UEVENT_CONSTRAINT(0x04a3, 0xff), /* CYCLE_ACTIVITY.STALLS_TOTAL */
INTEL_UEVENT_CONSTRAINT(0x10a3, 0xff), /* CYCLE_ACTIVITY.CYCLES_MEM_ANY */
@@ -281,7 +303,7 @@ static struct extra_reg intel_spr_extra_regs[] __read_mostly = {
INTEL_UEVENT_EXTRA_REG(0x012a, MSR_OFFCORE_RSP_0, 0x3fffffffffull, RSP_0),
INTEL_UEVENT_EXTRA_REG(0x012b, MSR_OFFCORE_RSP_1, 0x3fffffffffull, RSP_1),
INTEL_UEVENT_PEBS_LDLAT_EXTRA_REG(0x01cd),
- INTEL_UEVENT_EXTRA_REG(0x01c6, MSR_PEBS_FRONTEND, 0x7fff17, FE),
+ INTEL_UEVENT_EXTRA_REG(0x01c6, MSR_PEBS_FRONTEND, 0x7fff1f, FE),
INTEL_UEVENT_EXTRA_REG(0x40ad, MSR_PEBS_FRONTEND, 0x7, FE),
INTEL_UEVENT_EXTRA_REG(0x04c2, MSR_PEBS_FRONTEND, 0x8, FE),
EVENT_EXTRA_END
@@ -2080,6 +2102,15 @@ static struct extra_reg intel_tnt_extra_regs[] __read_mostly = {
EVENT_EXTRA_END
};
+EVENT_ATTR_STR(mem-loads, mem_ld_grt, "event=0xd0,umask=0x5,ldlat=3");
+EVENT_ATTR_STR(mem-stores, mem_st_grt, "event=0xd0,umask=0x6");
+
+static struct attribute *grt_mem_attrs[] = {
+ EVENT_PTR(mem_ld_grt),
+ EVENT_PTR(mem_st_grt),
+ NULL
+};
+
static struct extra_reg intel_grt_extra_regs[] __read_mostly = {
/* must define OFFCORE_RSP_X first, see intel_fixup_er() */
INTEL_UEVENT_EXTRA_REG(0x01b7, MSR_OFFCORE_RSP_0, 0x3fffffffffull, RSP_0),
@@ -2168,6 +2199,12 @@ static void __intel_pmu_enable_all(int added, bool pmi)
u64 intel_ctrl = hybrid(cpuc->pmu, intel_ctrl);
intel_pmu_lbr_enable_all(pmi);
+
+ if (cpuc->fixed_ctrl_val != cpuc->active_fixed_ctrl_val) {
+ wrmsrl(MSR_ARCH_PERFMON_FIXED_CTR_CTRL, cpuc->fixed_ctrl_val);
+ cpuc->active_fixed_ctrl_val = cpuc->fixed_ctrl_val;
+ }
+
wrmsrl(MSR_CORE_PERF_GLOBAL_CTRL,
intel_ctrl & ~cpuc->intel_ctrl_guest_mask);
@@ -2280,7 +2317,7 @@ static void intel_pmu_nhm_workaround(void)
for (i = 0; i < 4; i++) {
event = cpuc->events[i];
if (event)
- x86_perf_event_update(event);
+ static_call(x86_pmu_update)(event);
}
for (i = 0; i < 4; i++) {
@@ -2295,7 +2332,7 @@ static void intel_pmu_nhm_workaround(void)
event = cpuc->events[i];
if (event) {
- x86_perf_event_set_period(event);
+ static_call(x86_pmu_set_period)(event);
__x86_pmu_enable_event(&event->hw,
ARCH_PERFMON_EVENTSEL_ENABLE);
} else
@@ -2385,9 +2422,10 @@ static inline void intel_clear_masks(struct perf_event *event, int idx)
static void intel_pmu_disable_fixed(struct perf_event *event)
{
+ struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
struct hw_perf_event *hwc = &event->hw;
- u64 ctrl_val, mask;
int idx = hwc->idx;
+ u64 mask;
if (is_topdown_idx(idx)) {
struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
@@ -2404,9 +2442,7 @@ static void intel_pmu_disable_fixed(struct perf_event *event)
intel_clear_masks(event, idx);
mask = 0xfULL << ((idx - INTEL_PMC_IDX_FIXED) * 4);
- rdmsrl(hwc->config_base, ctrl_val);
- ctrl_val &= ~mask;
- wrmsrl(hwc->config_base, ctrl_val);
+ cpuc->fixed_ctrl_val &= ~mask;
}
static void intel_pmu_disable_event(struct perf_event *event)
@@ -2499,6 +2535,8 @@ static int adl_set_topdown_event_period(struct perf_event *event)
return icl_set_topdown_event_period(event);
}
+DEFINE_STATIC_CALL(intel_pmu_set_topdown_event_period, x86_perf_event_set_period);
+
static inline u64 icl_get_metrics_event_value(u64 metric, u64 slots, int idx)
{
u32 val;
@@ -2649,6 +2687,7 @@ static u64 adl_update_topdown_event(struct perf_event *event)
return icl_update_topdown_event(event);
}
+DEFINE_STATIC_CALL(intel_pmu_update_topdown_event, x86_perf_event_update);
static void intel_pmu_read_topdown_event(struct perf_event *event)
{
@@ -2660,7 +2699,7 @@ static void intel_pmu_read_topdown_event(struct perf_event *event)
return;
perf_pmu_disable(event->pmu);
- x86_pmu.update_topdown_event(event);
+ static_call(intel_pmu_update_topdown_event)(event);
perf_pmu_enable(event->pmu);
}
@@ -2668,7 +2707,7 @@ static void intel_pmu_read_event(struct perf_event *event)
{
if (event->hw.flags & PERF_X86_EVENT_AUTO_RELOAD)
intel_pmu_auto_reload_read(event);
- else if (is_topdown_count(event) && x86_pmu.update_topdown_event)
+ else if (is_topdown_count(event))
intel_pmu_read_topdown_event(event);
else
x86_perf_event_update(event);
@@ -2676,8 +2715,9 @@ static void intel_pmu_read_event(struct perf_event *event)
static void intel_pmu_enable_fixed(struct perf_event *event)
{
+ struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
struct hw_perf_event *hwc = &event->hw;
- u64 ctrl_val, mask, bits = 0;
+ u64 mask, bits = 0;
int idx = hwc->idx;
if (is_topdown_idx(idx)) {
@@ -2721,10 +2761,8 @@ static void intel_pmu_enable_fixed(struct perf_event *event)
mask |= ICL_FIXED_0_ADAPTIVE << (idx * 4);
}
- rdmsrl(hwc->config_base, ctrl_val);
- ctrl_val &= ~mask;
- ctrl_val |= bits;
- wrmsrl(hwc->config_base, ctrl_val);
+ cpuc->fixed_ctrl_val &= ~mask;
+ cpuc->fixed_ctrl_val |= bits;
}
static void intel_pmu_enable_event(struct perf_event *event)
@@ -2772,7 +2810,7 @@ static void intel_pmu_add_event(struct perf_event *event)
*/
int intel_pmu_save_and_restart(struct perf_event *event)
{
- x86_perf_event_update(event);
+ static_call(x86_pmu_update)(event);
/*
* For a checkpointed counter always reset back to 0. This
* avoids a situation where the counter overflows, aborts the
@@ -2784,9 +2822,25 @@ int intel_pmu_save_and_restart(struct perf_event *event)
wrmsrl(event->hw.event_base, 0);
local64_set(&event->hw.prev_count, 0);
}
+ return static_call(x86_pmu_set_period)(event);
+}
+
+static int intel_pmu_set_period(struct perf_event *event)
+{
+ if (unlikely(is_topdown_count(event)))
+ return static_call(intel_pmu_set_topdown_event_period)(event);
+
return x86_perf_event_set_period(event);
}
+static u64 intel_pmu_update(struct perf_event *event)
+{
+ if (unlikely(is_topdown_count(event)))
+ return static_call(intel_pmu_update_topdown_event)(event);
+
+ return x86_perf_event_update(event);
+}
+
static void intel_pmu_reset(void)
{
struct debug_store *ds = __this_cpu_read(cpu_hw_events.ds);
@@ -2831,6 +2885,47 @@ static void intel_pmu_reset(void)
local_irq_restore(flags);
}
+/*
+ * We may be running with guest PEBS events created by KVM, and the
+ * PEBS records are logged into the guest's DS and invisible to host.
+ *
+ * In the case of guest PEBS overflow, we only trigger a fake event
+ * to emulate the PEBS overflow PMI for guest PEBS counters in KVM.
+ * The guest will then vm-entry and check the guest DS area to read
+ * the guest PEBS records.
+ *
+ * The contents and other behavior of the guest event do not matter.
+ */
+static void x86_pmu_handle_guest_pebs(struct pt_regs *regs,
+ struct perf_sample_data *data)
+{
+ struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
+ u64 guest_pebs_idxs = cpuc->pebs_enabled & ~cpuc->intel_ctrl_host_mask;
+ struct perf_event *event = NULL;
+ int bit;
+
+ if (!unlikely(perf_guest_state()))
+ return;
+
+ if (!x86_pmu.pebs_ept || !x86_pmu.pebs_active ||
+ !guest_pebs_idxs)
+ return;
+
+ for_each_set_bit(bit, (unsigned long *)&guest_pebs_idxs,
+ INTEL_PMC_IDX_FIXED + x86_pmu.num_counters_fixed) {
+ event = cpuc->events[bit];
+ if (!event->attr.precise_ip)
+ continue;
+
+ perf_sample_data_init(data, 0, event->hw.last_period);
+ if (perf_event_overflow(event, data, regs))
+ x86_pmu_stop(event, 0);
+
+ /* Inject one fake event is enough. */
+ break;
+ }
+}
+
static int handle_pmi_common(struct pt_regs *regs, u64 status)
{
struct perf_sample_data data;
@@ -2870,10 +2965,7 @@ static int handle_pmi_common(struct pt_regs *regs, u64 status)
* counters from the GLOBAL_STATUS mask and we always process PEBS
* events via drain_pebs().
*/
- if (x86_pmu.flags & PMU_FL_PEBS_ALL)
- status &= ~cpuc->pebs_enabled;
- else
- status &= ~(cpuc->pebs_enabled & PEBS_COUNTER_MASK);
+ status &= ~(cpuc->pebs_enabled & x86_pmu.pebs_capable);
/*
* PEBS overflow sets bit 62 in the global status register
@@ -2882,6 +2974,7 @@ static int handle_pmi_common(struct pt_regs *regs, u64 status)
u64 pebs_enabled = cpuc->pebs_enabled;
handled++;
+ x86_pmu_handle_guest_pebs(regs, &data);
x86_pmu.drain_pebs(regs, &data);
status &= intel_ctrl | GLOBAL_STATUS_TRACE_TOPAPMI;
@@ -2901,10 +2994,7 @@ static int handle_pmi_common(struct pt_regs *regs, u64 status)
*/
if (__test_and_clear_bit(GLOBAL_STATUS_TRACE_TOPAPMI_BIT, (unsigned long *)&status)) {
handled++;
- if (unlikely(perf_guest_cbs && perf_guest_cbs->is_in_guest() &&
- perf_guest_cbs->handle_intel_pt_intr))
- perf_guest_cbs->handle_intel_pt_intr();
- else
+ if (!perf_guest_handle_intel_pt_intr())
intel_pt_interrupt();
}
@@ -2913,8 +3003,7 @@ static int handle_pmi_common(struct pt_regs *regs, u64 status)
*/
if (__test_and_clear_bit(GLOBAL_STATUS_PERF_METRICS_OVF_BIT, (unsigned long *)&status)) {
handled++;
- if (x86_pmu.update_topdown_event)
- x86_pmu.update_topdown_event(NULL);
+ static_call(intel_pmu_update_topdown_event)(NULL);
}
/*
@@ -2937,8 +3026,10 @@ static int handle_pmi_common(struct pt_regs *regs, u64 status)
perf_sample_data_init(&data, 0, event->hw.last_period);
- if (has_branch_stack(event))
+ if (has_branch_stack(event)) {
data.br_stack = &cpuc->lbr_stack;
+ data.sample_flags |= PERF_SAMPLE_BRANCH_STACK;
+ }
if (perf_event_overflow(event, &data, regs))
x86_pmu_stop(event, 0);
@@ -3786,9 +3877,6 @@ static int intel_pmu_hw_config(struct perf_event *event)
}
if (x86_pmu.pebs_aliases)
x86_pmu.pebs_aliases(event);
-
- if (event->attr.sample_type & PERF_SAMPLE_CALLCHAIN)
- event->attr.sample_type |= __PERF_SAMPLE_CALLCHAIN_EARLY;
}
if (needs_branch_stack(event)) {
@@ -3912,40 +4000,99 @@ static int intel_pmu_hw_config(struct perf_event *event)
return 0;
}
-static struct perf_guest_switch_msr *intel_guest_get_msrs(int *nr)
+/*
+ * Currently, the only caller of this function is the atomic_switch_perf_msrs().
+ * The host perf conext helps to prepare the values of the real hardware for
+ * a set of msrs that need to be switched atomically in a vmx transaction.
+ *
+ * For example, the pseudocode needed to add a new msr should look like:
+ *
+ * arr[(*nr)++] = (struct perf_guest_switch_msr){
+ * .msr = the hardware msr address,
+ * .host = the value the hardware has when it doesn't run a guest,
+ * .guest = the value the hardware has when it runs a guest,
+ * };
+ *
+ * These values have nothing to do with the emulated values the guest sees
+ * when it uses {RD,WR}MSR, which should be handled by the KVM context,
+ * specifically in the intel_pmu_{get,set}_msr().
+ */
+static struct perf_guest_switch_msr *intel_guest_get_msrs(int *nr, void *data)
{
struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
struct perf_guest_switch_msr *arr = cpuc->guest_switch_msrs;
+ struct kvm_pmu *kvm_pmu = (struct kvm_pmu *)data;
u64 intel_ctrl = hybrid(cpuc->pmu, intel_ctrl);
+ u64 pebs_mask = cpuc->pebs_enabled & x86_pmu.pebs_capable;
+ int global_ctrl, pebs_enable;
+
+ *nr = 0;
+ global_ctrl = (*nr)++;
+ arr[global_ctrl] = (struct perf_guest_switch_msr){
+ .msr = MSR_CORE_PERF_GLOBAL_CTRL,
+ .host = intel_ctrl & ~cpuc->intel_ctrl_guest_mask,
+ .guest = intel_ctrl & (~cpuc->intel_ctrl_host_mask | ~pebs_mask),
+ };
- arr[0].msr = MSR_CORE_PERF_GLOBAL_CTRL;
- arr[0].host = intel_ctrl & ~cpuc->intel_ctrl_guest_mask;
- arr[0].guest = intel_ctrl & ~cpuc->intel_ctrl_host_mask;
- if (x86_pmu.flags & PMU_FL_PEBS_ALL)
- arr[0].guest &= ~cpuc->pebs_enabled;
- else
- arr[0].guest &= ~(cpuc->pebs_enabled & PEBS_COUNTER_MASK);
- *nr = 1;
+ if (!x86_pmu.pebs)
+ return arr;
- if (x86_pmu.pebs && x86_pmu.pebs_no_isolation) {
- /*
- * If PMU counter has PEBS enabled it is not enough to
- * disable counter on a guest entry since PEBS memory
- * write can overshoot guest entry and corrupt guest
- * memory. Disabling PEBS solves the problem.
- *
- * Don't do this if the CPU already enforces it.
- */
- arr[1].msr = MSR_IA32_PEBS_ENABLE;
- arr[1].host = cpuc->pebs_enabled;
- arr[1].guest = 0;
- *nr = 2;
+ /*
+ * If PMU counter has PEBS enabled it is not enough to
+ * disable counter on a guest entry since PEBS memory
+ * write can overshoot guest entry and corrupt guest
+ * memory. Disabling PEBS solves the problem.
+ *
+ * Don't do this if the CPU already enforces it.
+ */
+ if (x86_pmu.pebs_no_isolation) {
+ arr[(*nr)++] = (struct perf_guest_switch_msr){
+ .msr = MSR_IA32_PEBS_ENABLE,
+ .host = cpuc->pebs_enabled,
+ .guest = 0,
+ };
+ return arr;
+ }
+
+ if (!kvm_pmu || !x86_pmu.pebs_ept)
+ return arr;
+
+ arr[(*nr)++] = (struct perf_guest_switch_msr){
+ .msr = MSR_IA32_DS_AREA,
+ .host = (unsigned long)cpuc->ds,
+ .guest = kvm_pmu->ds_area,
+ };
+
+ if (x86_pmu.intel_cap.pebs_baseline) {
+ arr[(*nr)++] = (struct perf_guest_switch_msr){
+ .msr = MSR_PEBS_DATA_CFG,
+ .host = cpuc->pebs_data_cfg,
+ .guest = kvm_pmu->pebs_data_cfg,
+ };
+ }
+
+ pebs_enable = (*nr)++;
+ arr[pebs_enable] = (struct perf_guest_switch_msr){
+ .msr = MSR_IA32_PEBS_ENABLE,
+ .host = cpuc->pebs_enabled & ~cpuc->intel_ctrl_guest_mask,
+ .guest = pebs_mask & ~cpuc->intel_ctrl_host_mask,
+ };
+
+ if (arr[pebs_enable].host) {
+ /* Disable guest PEBS if host PEBS is enabled. */
+ arr[pebs_enable].guest = 0;
+ } else {
+ /* Disable guest PEBS thoroughly for cross-mapped PEBS counters. */
+ arr[pebs_enable].guest &= ~kvm_pmu->host_cross_mapped_mask;
+ arr[global_ctrl].guest &= ~kvm_pmu->host_cross_mapped_mask;
+ /* Set hw GLOBAL_CTRL bits for PEBS counter when it runs for guest */
+ arr[global_ctrl].guest |= arr[pebs_enable].guest;
}
return arr;
}
-static struct perf_guest_switch_msr *core_guest_get_msrs(int *nr)
+static struct perf_guest_switch_msr *core_guest_get_msrs(int *nr, void *data)
{
struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
struct perf_guest_switch_msr *arr = cpuc->guest_switch_msrs;
@@ -4123,6 +4270,8 @@ tnt_get_event_constraints(struct cpu_hw_events *cpuc, int idx,
{
struct event_constraint *c;
+ c = intel_get_event_constraints(cpuc, idx, event);
+
/*
* :ppp means to do reduced skid PEBS,
* which is available on PMC0 and fixed counter 0.
@@ -4135,8 +4284,6 @@ tnt_get_event_constraints(struct cpu_hw_events *cpuc, int idx,
return &counter0_constraint;
}
- c = intel_get_event_constraints(cpuc, idx, event);
-
return c;
}
@@ -4208,28 +4355,25 @@ static u8 adl_get_hybrid_cpu_type(void)
* Therefore the effective (average) period matches the requested period,
* despite coarser hardware granularity.
*/
-static u64 bdw_limit_period(struct perf_event *event, u64 left)
+static void bdw_limit_period(struct perf_event *event, s64 *left)
{
if ((event->hw.config & INTEL_ARCH_EVENT_MASK) ==
X86_CONFIG(.event=0xc0, .umask=0x01)) {
- if (left < 128)
- left = 128;
- left &= ~0x3fULL;
+ if (*left < 128)
+ *left = 128;
+ *left &= ~0x3fULL;
}
- return left;
}
-static u64 nhm_limit_period(struct perf_event *event, u64 left)
+static void nhm_limit_period(struct perf_event *event, s64 *left)
{
- return max(left, 32ULL);
+ *left = max(*left, 32LL);
}
-static u64 spr_limit_period(struct perf_event *event, u64 left)
+static void spr_limit_period(struct perf_event *event, s64 *left)
{
if (event->attr.precise_ip == 3)
- return max(left, 128ULL);
-
- return left;
+ *left = max(*left, 128LL);
}
PMU_FORMAT_ATTR(event, "config:0-7" );
@@ -4668,6 +4812,8 @@ static __initconst const struct x86_pmu intel_pmu = {
.add = intel_pmu_add_event,
.del = intel_pmu_del_event,
.read = intel_pmu_read_event,
+ .set_period = intel_pmu_set_period,
+ .update = intel_pmu_update,
.hw_config = intel_pmu_hw_config,
.schedule_events = x86_schedule_events,
.eventsel = MSR_ARCH_PERFMON_EVENTSEL0,
@@ -4706,6 +4852,19 @@ static __initconst const struct x86_pmu intel_pmu = {
.lbr_read = intel_pmu_lbr_read_64,
.lbr_save = intel_pmu_lbr_save,
.lbr_restore = intel_pmu_lbr_restore,
+
+ /*
+ * SMM has access to all 4 rings and while traditionally SMM code only
+ * ran in CPL0, 2021-era firmware is starting to make use of CPL3 in SMM.
+ *
+ * Since the EVENTSEL.{USR,OS} CPL filtering makes no distinction
+ * between SMM or not, this results in what should be pure userspace
+ * counters including SMM data.
+ *
+ * This is a clear privilege issue, therefore globally disable
+ * counting SMM by default.
+ */
+ .attr_freeze_on_smi = 1,
};
static __init void intel_clovertown_quirk(void)
@@ -4752,6 +4911,7 @@ static const struct x86_cpu_desc isolation_ucodes[] = {
INTEL_CPU_DESC(INTEL_FAM6_SKYLAKE_X, 5, 0x00000000),
INTEL_CPU_DESC(INTEL_FAM6_SKYLAKE_X, 6, 0x00000000),
INTEL_CPU_DESC(INTEL_FAM6_SKYLAKE_X, 7, 0x00000000),
+ INTEL_CPU_DESC(INTEL_FAM6_SKYLAKE_X, 11, 0x00000000),
INTEL_CPU_DESC(INTEL_FAM6_SKYLAKE_L, 3, 0x0000007c),
INTEL_CPU_DESC(INTEL_FAM6_SKYLAKE, 3, 0x0000007c),
INTEL_CPU_DESC(INTEL_FAM6_KABYLAKE, 9, 0x0000004e),
@@ -5505,7 +5665,11 @@ static void intel_pmu_check_event_constraints(struct event_constraint *event_con
/* Disabled fixed counters which are not in CPUID */
c->idxmsk64 &= intel_ctrl;
- if (c->idxmsk64 != INTEL_PMC_MSK_FIXED_REF_CYCLES)
+ /*
+ * Don't extend the pseudo-encoding to the
+ * generic counters
+ */
+ if (!use_fixed_pseudo_encoding(c->code))
c->idxmsk64 |= (1ULL << num_counters) - 1;
}
c->idxmsk64 &=
@@ -5615,6 +5779,7 @@ __init int intel_pmu_init(void)
x86_pmu.events_mask_len = eax.split.mask_length;
x86_pmu.max_pebs_events = min_t(unsigned, MAX_PEBS_EVENTS, x86_pmu.num_counters);
+ x86_pmu.pebs_capable = PEBS_COUNTER_MASK;
/*
* Quirk: v2 perfmon does not report fixed-purpose events, so
@@ -5799,6 +5964,7 @@ __init int intel_pmu_init(void)
x86_pmu.pebs_aliases = NULL;
x86_pmu.pebs_prec_dist = true;
x86_pmu.lbr_pt_coexist = true;
+ x86_pmu.pebs_capable = ~0ULL;
x86_pmu.flags |= PMU_FL_HAS_RSP_1;
x86_pmu.flags |= PMU_FL_PEBS_ALL;
x86_pmu.get_event_constraints = glp_get_event_constraints;
@@ -5839,6 +6005,36 @@ __init int intel_pmu_init(void)
name = "Tremont";
break;
+ case INTEL_FAM6_ALDERLAKE_N:
+ x86_pmu.mid_ack = true;
+ memcpy(hw_cache_event_ids, glp_hw_cache_event_ids,
+ sizeof(hw_cache_event_ids));
+ memcpy(hw_cache_extra_regs, tnt_hw_cache_extra_regs,
+ sizeof(hw_cache_extra_regs));
+ hw_cache_event_ids[C(ITLB)][C(OP_READ)][C(RESULT_ACCESS)] = -1;
+
+ x86_pmu.event_constraints = intel_slm_event_constraints;
+ x86_pmu.pebs_constraints = intel_grt_pebs_event_constraints;
+ x86_pmu.extra_regs = intel_grt_extra_regs;
+
+ x86_pmu.pebs_aliases = NULL;
+ x86_pmu.pebs_prec_dist = true;
+ x86_pmu.pebs_block = true;
+ x86_pmu.lbr_pt_coexist = true;
+ x86_pmu.flags |= PMU_FL_HAS_RSP_1;
+ x86_pmu.flags |= PMU_FL_INSTR_LATENCY;
+
+ intel_pmu_pebs_data_source_grt();
+ x86_pmu.pebs_latency_data = adl_latency_data_small;
+ x86_pmu.get_event_constraints = tnt_get_event_constraints;
+ x86_pmu.limit_period = spr_limit_period;
+ td_attr = tnt_events_attrs;
+ mem_attr = grt_mem_attrs;
+ extra_attr = nhm_format_attr;
+ pr_cont("Gracemont events, ");
+ name = "gracemont";
+ break;
+
case INTEL_FAM6_WESTMERE:
case INTEL_FAM6_WESTMERE_EP:
case INTEL_FAM6_WESTMERE_EX:
@@ -6103,6 +6299,7 @@ __init int intel_pmu_init(void)
case INTEL_FAM6_ICELAKE_X:
case INTEL_FAM6_ICELAKE_D:
+ x86_pmu.pebs_ept = 1;
pmem = true;
fallthrough;
case INTEL_FAM6_ICELAKE_L:
@@ -6136,8 +6333,10 @@ __init int intel_pmu_init(void)
x86_pmu.lbr_pt_coexist = true;
intel_pmu_pebs_data_source_skl(pmem);
x86_pmu.num_topdown_events = 4;
- x86_pmu.update_topdown_event = icl_update_topdown_event;
- x86_pmu.set_topdown_event_period = icl_set_topdown_event_period;
+ static_call_update(intel_pmu_update_topdown_event,
+ &icl_update_topdown_event);
+ static_call_update(intel_pmu_set_topdown_event_period,
+ &icl_set_topdown_event_period);
pr_cont("Icelake events, ");
name = "icelake";
break;
@@ -6157,7 +6356,6 @@ __init int intel_pmu_init(void)
x86_pmu.pebs_block = true;
x86_pmu.flags |= PMU_FL_HAS_RSP_1;
x86_pmu.flags |= PMU_FL_NO_HT_SHARING;
- x86_pmu.flags |= PMU_FL_PEBS_ALL;
x86_pmu.flags |= PMU_FL_INSTR_LATENCY;
x86_pmu.flags |= PMU_FL_MEM_LOADS_AUX;
@@ -6173,14 +6371,19 @@ __init int intel_pmu_init(void)
x86_pmu.lbr_pt_coexist = true;
intel_pmu_pebs_data_source_skl(pmem);
x86_pmu.num_topdown_events = 8;
- x86_pmu.update_topdown_event = icl_update_topdown_event;
- x86_pmu.set_topdown_event_period = icl_set_topdown_event_period;
+ static_call_update(intel_pmu_update_topdown_event,
+ &icl_update_topdown_event);
+ static_call_update(intel_pmu_set_topdown_event_period,
+ &icl_set_topdown_event_period);
pr_cont("Sapphire Rapids events, ");
name = "sapphire_rapids";
break;
case INTEL_FAM6_ALDERLAKE:
case INTEL_FAM6_ALDERLAKE_L:
+ case INTEL_FAM6_RAPTORLAKE:
+ case INTEL_FAM6_RAPTORLAKE_P:
+ case INTEL_FAM6_RAPTORLAKE_S:
/*
* Alder Lake has 2 types of CPU, core and atom.
*
@@ -6199,14 +6402,16 @@ __init int intel_pmu_init(void)
x86_pmu.pebs_block = true;
x86_pmu.flags |= PMU_FL_HAS_RSP_1;
x86_pmu.flags |= PMU_FL_NO_HT_SHARING;
- x86_pmu.flags |= PMU_FL_PEBS_ALL;
x86_pmu.flags |= PMU_FL_INSTR_LATENCY;
x86_pmu.flags |= PMU_FL_MEM_LOADS_AUX;
x86_pmu.lbr_pt_coexist = true;
- intel_pmu_pebs_data_source_skl(false);
+ intel_pmu_pebs_data_source_adl();
+ x86_pmu.pebs_latency_data = adl_latency_data_small;
x86_pmu.num_topdown_events = 8;
- x86_pmu.update_topdown_event = adl_update_topdown_event;
- x86_pmu.set_topdown_event_period = adl_set_topdown_event_period;
+ static_call_update(intel_pmu_update_topdown_event,
+ &adl_update_topdown_event);
+ static_call_update(intel_pmu_set_topdown_event_period,
+ &adl_set_topdown_event_period);
x86_pmu.filter_match = intel_pmu_filter_match;
x86_pmu.get_event_constraints = adl_get_event_constraints;
@@ -6239,6 +6444,19 @@ __init int intel_pmu_init(void)
pmu->num_counters = x86_pmu.num_counters;
pmu->num_counters_fixed = x86_pmu.num_counters_fixed;
}
+
+ /*
+ * Quirk: For some Alder Lake machine, when all E-cores are disabled in
+ * a BIOS, the leaf 0xA will enumerate all counters of P-cores. However,
+ * the X86_FEATURE_HYBRID_CPU is still set. The above codes will
+ * mistakenly add extra counters for P-cores. Correct the number of
+ * counters here.
+ */
+ if ((pmu->num_counters > 8) || (pmu->num_counters_fixed > 4)) {
+ pmu->num_counters = x86_pmu.num_counters;
+ pmu->num_counters_fixed = x86_pmu.num_counters_fixed;
+ }
+
pmu->max_pebs_events = min_t(unsigned, MAX_PEBS_EVENTS, pmu->num_counters);
pmu->unconstrained = (struct event_constraint)
__EVENT_CONSTRAINT(0, (1ULL << pmu->num_counters) - 1,
@@ -6285,7 +6503,9 @@ __init int intel_pmu_init(void)
pr_cont("generic architected perfmon v1, ");
name = "generic_arch_v1";
break;
- default:
+ case 2:
+ case 3:
+ case 4:
/*
* default constraints for v2 and up
*/
@@ -6293,6 +6513,21 @@ __init int intel_pmu_init(void)
pr_cont("generic architected perfmon, ");
name = "generic_arch_v2+";
break;
+ default:
+ /*
+ * The default constraints for v5 and up can support up to
+ * 16 fixed counters. For the fixed counters 4 and later,
+ * the pseudo-encoding is applied.
+ * The constraints may be cut according to the CPUID enumeration
+ * by inserting the EVENT_CONSTRAINT_END.
+ */
+ if (x86_pmu.num_counters_fixed > INTEL_PMC_MAX_FIXED)
+ x86_pmu.num_counters_fixed = INTEL_PMC_MAX_FIXED;
+ intel_v5_gen_event_constraints[x86_pmu.num_counters_fixed].weight = -1;
+ x86_pmu.event_constraints = intel_v5_gen_event_constraints;
+ pr_cont("generic architected perfmon, ");
+ name = "generic_arch_v5+";
+ break;
}
}
@@ -6330,8 +6565,7 @@ __init int intel_pmu_init(void)
x86_pmu.intel_ctrl);
/*
* Access LBR MSR may cause #GP under certain circumstances.
- * E.g. KVM doesn't support LBR MSR
- * Check all LBT MSR here.
+ * Check all LBR MSR here.
* Disable LBR access if any LBR MSRs can not be accessed.
*/
if (x86_pmu.lbr_tos && !check_msr(x86_pmu.lbr_tos, 0x3UL))
@@ -6343,6 +6577,8 @@ __init int intel_pmu_init(void)
}
if (x86_pmu.lbr_nr) {
+ intel_pmu_lbr_init();
+
pr_cont("%d-deep LBR, ", x86_pmu.lbr_nr);
/* only support branch_stack snapshot for perfmon >= v2 */
diff --git a/arch/x86/events/intel/cstate.c b/arch/x86/events/intel/cstate.c
index c6262b154c3a..a2834bc93149 100644
--- a/arch/x86/events/intel/cstate.c
+++ b/arch/x86/events/intel/cstate.c
@@ -40,7 +40,7 @@
* Model specific counters:
* MSR_CORE_C1_RES: CORE C1 Residency Counter
* perf code: 0x00
- * Available model: SLM,AMT,GLM,CNL,ICX,TNT,ADL
+ * Available model: SLM,AMT,GLM,CNL,ICX,TNT,ADL,RPL
* Scope: Core (each processor core has a MSR)
* MSR_CORE_C3_RESIDENCY: CORE C3 Residency Counter
* perf code: 0x01
@@ -51,49 +51,50 @@
* perf code: 0x02
* Available model: SLM,AMT,NHM,WSM,SNB,IVB,HSW,BDW,
* SKL,KNL,GLM,CNL,KBL,CML,ICL,ICX,
- * TGL,TNT,RKL,ADL
+ * TGL,TNT,RKL,ADL,RPL,SPR
* Scope: Core
* MSR_CORE_C7_RESIDENCY: CORE C7 Residency Counter
* perf code: 0x03
* Available model: SNB,IVB,HSW,BDW,SKL,CNL,KBL,CML,
- * ICL,TGL,RKL,ADL
+ * ICL,TGL,RKL,ADL,RPL
* Scope: Core
* MSR_PKG_C2_RESIDENCY: Package C2 Residency Counter.
* perf code: 0x00
* Available model: SNB,IVB,HSW,BDW,SKL,KNL,GLM,CNL,
- * KBL,CML,ICL,ICX,TGL,TNT,RKL,ADL
+ * KBL,CML,ICL,ICX,TGL,TNT,RKL,ADL,
+ * RPL,SPR
* Scope: Package (physical package)
* MSR_PKG_C3_RESIDENCY: Package C3 Residency Counter.
* perf code: 0x01
* Available model: NHM,WSM,SNB,IVB,HSW,BDW,SKL,KNL,
* GLM,CNL,KBL,CML,ICL,TGL,TNT,RKL,
- * ADL
+ * ADL,RPL
* Scope: Package (physical package)
* MSR_PKG_C6_RESIDENCY: Package C6 Residency Counter.
* perf code: 0x02
* Available model: SLM,AMT,NHM,WSM,SNB,IVB,HSW,BDW,
* SKL,KNL,GLM,CNL,KBL,CML,ICL,ICX,
- * TGL,TNT,RKL,ADL
+ * TGL,TNT,RKL,ADL,RPL,SPR
* Scope: Package (physical package)
* MSR_PKG_C7_RESIDENCY: Package C7 Residency Counter.
* perf code: 0x03
* Available model: NHM,WSM,SNB,IVB,HSW,BDW,SKL,CNL,
- * KBL,CML,ICL,TGL,RKL,ADL
+ * KBL,CML,ICL,TGL,RKL,ADL,RPL
* Scope: Package (physical package)
* MSR_PKG_C8_RESIDENCY: Package C8 Residency Counter.
* perf code: 0x04
* Available model: HSW ULT,KBL,CNL,CML,ICL,TGL,RKL,
- * ADL
+ * ADL,RPL
* Scope: Package (physical package)
* MSR_PKG_C9_RESIDENCY: Package C9 Residency Counter.
* perf code: 0x05
* Available model: HSW ULT,KBL,CNL,CML,ICL,TGL,RKL,
- * ADL
+ * ADL,RPL
* Scope: Package (physical package)
* MSR_PKG_C10_RESIDENCY: Package C10 Residency Counter.
* perf code: 0x06
* Available model: HSW ULT,KBL,GLM,CNL,CML,ICL,TGL,
- * TNT,RKL,ADL
+ * TNT,RKL,ADL,RPL
* Scope: Package (physical package)
*
*/
@@ -674,12 +675,17 @@ static const struct x86_cpu_id intel_cstates_match[] __initconst = {
X86_MATCH_INTEL_FAM6_MODEL(ICELAKE, &icl_cstates),
X86_MATCH_INTEL_FAM6_MODEL(ICELAKE_X, &icx_cstates),
X86_MATCH_INTEL_FAM6_MODEL(ICELAKE_D, &icx_cstates),
+ X86_MATCH_INTEL_FAM6_MODEL(SAPPHIRERAPIDS_X, &icx_cstates),
X86_MATCH_INTEL_FAM6_MODEL(TIGERLAKE_L, &icl_cstates),
X86_MATCH_INTEL_FAM6_MODEL(TIGERLAKE, &icl_cstates),
X86_MATCH_INTEL_FAM6_MODEL(ROCKETLAKE, &icl_cstates),
X86_MATCH_INTEL_FAM6_MODEL(ALDERLAKE, &adl_cstates),
X86_MATCH_INTEL_FAM6_MODEL(ALDERLAKE_L, &adl_cstates),
+ X86_MATCH_INTEL_FAM6_MODEL(ALDERLAKE_N, &adl_cstates),
+ X86_MATCH_INTEL_FAM6_MODEL(RAPTORLAKE, &adl_cstates),
+ X86_MATCH_INTEL_FAM6_MODEL(RAPTORLAKE_P, &adl_cstates),
+ X86_MATCH_INTEL_FAM6_MODEL(RAPTORLAKE_S, &adl_cstates),
{ },
};
MODULE_DEVICE_TABLE(x86cpu, intel_cstates_match);
diff --git a/arch/x86/events/intel/ds.c b/arch/x86/events/intel/ds.c
index 2e215369df4a..446d2833efa7 100644
--- a/arch/x86/events/intel/ds.c
+++ b/arch/x86/events/intel/ds.c
@@ -94,15 +94,45 @@ void __init intel_pmu_pebs_data_source_nhm(void)
pebs_data_source[0x07] = OP_LH | P(LVL, L3) | LEVEL(L3) | P(SNOOP, HITM);
}
-void __init intel_pmu_pebs_data_source_skl(bool pmem)
+static void __init __intel_pmu_pebs_data_source_skl(bool pmem, u64 *data_source)
{
u64 pmem_or_l4 = pmem ? LEVEL(PMEM) : LEVEL(L4);
- pebs_data_source[0x08] = OP_LH | pmem_or_l4 | P(SNOOP, HIT);
- pebs_data_source[0x09] = OP_LH | pmem_or_l4 | REM | P(SNOOP, HIT);
- pebs_data_source[0x0b] = OP_LH | LEVEL(RAM) | REM | P(SNOOP, NONE);
- pebs_data_source[0x0c] = OP_LH | LEVEL(ANY_CACHE) | REM | P(SNOOPX, FWD);
- pebs_data_source[0x0d] = OP_LH | LEVEL(ANY_CACHE) | REM | P(SNOOP, HITM);
+ data_source[0x08] = OP_LH | pmem_or_l4 | P(SNOOP, HIT);
+ data_source[0x09] = OP_LH | pmem_or_l4 | REM | P(SNOOP, HIT);
+ data_source[0x0b] = OP_LH | LEVEL(RAM) | REM | P(SNOOP, NONE);
+ data_source[0x0c] = OP_LH | LEVEL(ANY_CACHE) | REM | P(SNOOPX, FWD);
+ data_source[0x0d] = OP_LH | LEVEL(ANY_CACHE) | REM | P(SNOOP, HITM);
+}
+
+void __init intel_pmu_pebs_data_source_skl(bool pmem)
+{
+ __intel_pmu_pebs_data_source_skl(pmem, pebs_data_source);
+}
+
+static void __init __intel_pmu_pebs_data_source_grt(u64 *data_source)
+{
+ data_source[0x05] = OP_LH | P(LVL, L3) | LEVEL(L3) | P(SNOOP, HIT);
+ data_source[0x06] = OP_LH | P(LVL, L3) | LEVEL(L3) | P(SNOOP, HITM);
+ data_source[0x08] = OP_LH | P(LVL, L3) | LEVEL(L3) | P(SNOOPX, FWD);
+}
+
+void __init intel_pmu_pebs_data_source_grt(void)
+{
+ __intel_pmu_pebs_data_source_grt(pebs_data_source);
+}
+
+void __init intel_pmu_pebs_data_source_adl(void)
+{
+ u64 *data_source;
+
+ data_source = x86_pmu.hybrid_pmu[X86_HYBRID_PMU_CORE_IDX].pebs_data_source;
+ memcpy(data_source, pebs_data_source, sizeof(pebs_data_source));
+ __intel_pmu_pebs_data_source_skl(false, data_source);
+
+ data_source = x86_pmu.hybrid_pmu[X86_HYBRID_PMU_ATOM_IDX].pebs_data_source;
+ memcpy(data_source, pebs_data_source, sizeof(pebs_data_source));
+ __intel_pmu_pebs_data_source_grt(data_source);
}
static u64 precise_store_data(u64 status)
@@ -171,7 +201,50 @@ static u64 precise_datala_hsw(struct perf_event *event, u64 status)
return dse.val;
}
-static u64 load_latency_data(u64 status)
+static inline void pebs_set_tlb_lock(u64 *val, bool tlb, bool lock)
+{
+ /*
+ * TLB access
+ * 0 = did not miss 2nd level TLB
+ * 1 = missed 2nd level TLB
+ */
+ if (tlb)
+ *val |= P(TLB, MISS) | P(TLB, L2);
+ else
+ *val |= P(TLB, HIT) | P(TLB, L1) | P(TLB, L2);
+
+ /* locked prefix */
+ if (lock)
+ *val |= P(LOCK, LOCKED);
+}
+
+/* Retrieve the latency data for e-core of ADL */
+u64 adl_latency_data_small(struct perf_event *event, u64 status)
+{
+ union intel_x86_pebs_dse dse;
+ u64 val;
+
+ WARN_ON_ONCE(hybrid_pmu(event->pmu)->cpu_type == hybrid_big);
+
+ dse.val = status;
+
+ val = hybrid_var(event->pmu, pebs_data_source)[dse.ld_dse];
+
+ /*
+ * For the atom core on ADL,
+ * bit 4: lock, bit 5: TLB access.
+ */
+ pebs_set_tlb_lock(&val, dse.ld_locked, dse.ld_stlb_miss);
+
+ if (dse.ld_data_blk)
+ val |= P(BLK, DATA);
+ else
+ val |= P(BLK, NA);
+
+ return val;
+}
+
+static u64 load_latency_data(struct perf_event *event, u64 status)
{
union intel_x86_pebs_dse dse;
u64 val;
@@ -181,7 +254,7 @@ static u64 load_latency_data(u64 status)
/*
* use the mapping table for bit 0-3
*/
- val = pebs_data_source[dse.ld_dse];
+ val = hybrid_var(event->pmu, pebs_data_source)[dse.ld_dse];
/*
* Nehalem models do not support TLB, Lock infos
@@ -190,21 +263,8 @@ static u64 load_latency_data(u64 status)
val |= P(TLB, NA) | P(LOCK, NA);
return val;
}
- /*
- * bit 4: TLB access
- * 0 = did not miss 2nd level TLB
- * 1 = missed 2nd level TLB
- */
- if (dse.ld_stlb_miss)
- val |= P(TLB, MISS) | P(TLB, L2);
- else
- val |= P(TLB, HIT) | P(TLB, L1) | P(TLB, L2);
- /*
- * bit 5: locked prefix
- */
- if (dse.ld_locked)
- val |= P(LOCK, LOCKED);
+ pebs_set_tlb_lock(&val, dse.ld_stlb_miss, dse.ld_locked);
/*
* Ice Lake and earlier models do not support block infos.
@@ -233,9 +293,10 @@ static u64 load_latency_data(u64 status)
return val;
}
-static u64 store_latency_data(u64 status)
+static u64 store_latency_data(struct perf_event *event, u64 status)
{
union intel_x86_pebs_dse dse;
+ union perf_mem_data_src src;
u64 val;
dse.val = status;
@@ -243,27 +304,20 @@ static u64 store_latency_data(u64 status)
/*
* use the mapping table for bit 0-3
*/
- val = pebs_data_source[dse.st_lat_dse];
+ val = hybrid_var(event->pmu, pebs_data_source)[dse.st_lat_dse];
- /*
- * bit 4: TLB access
- * 0 = did not miss 2nd level TLB
- * 1 = missed 2nd level TLB
- */
- if (dse.st_lat_stlb_miss)
- val |= P(TLB, MISS) | P(TLB, L2);
- else
- val |= P(TLB, HIT) | P(TLB, L1) | P(TLB, L2);
+ pebs_set_tlb_lock(&val, dse.st_lat_stlb_miss, dse.st_lat_locked);
+
+ val |= P(BLK, NA);
/*
- * bit 5: locked prefix
+ * the pebs_data_source table is only for loads
+ * so override the mem_op to say STORE instead
*/
- if (dse.st_lat_locked)
- val |= P(LOCK, LOCKED);
+ src.val = val;
+ src.mem_op = P(OP,STORE);
- val |= P(BLK, NA);
-
- return val;
+ return src.val;
}
struct pebs_record_core {
@@ -781,8 +835,8 @@ struct event_constraint intel_glm_pebs_event_constraints[] = {
struct event_constraint intel_grt_pebs_event_constraints[] = {
/* Allow all events as PEBS with no flags */
- INTEL_PLD_CONSTRAINT(0x5d0, 0xf),
- INTEL_PSD_CONSTRAINT(0x6d0, 0xf),
+ INTEL_HYBRID_LAT_CONSTRAINT(0x5d0, 0x3),
+ INTEL_HYBRID_LAT_CONSTRAINT(0x6d0, 0xf),
EVENT_CONSTRAINT_END
};
@@ -928,8 +982,13 @@ struct event_constraint intel_icl_pebs_event_constraints[] = {
INTEL_FLAGS_UEVENT_CONSTRAINT(0x0400, 0x800000000ULL), /* SLOTS */
INTEL_PLD_CONSTRAINT(0x1cd, 0xff), /* MEM_TRANS_RETIRED.LOAD_LATENCY */
- INTEL_FLAGS_UEVENT_CONSTRAINT_DATALA_LD(0x1d0, 0xf), /* MEM_INST_RETIRED.LOAD */
- INTEL_FLAGS_UEVENT_CONSTRAINT_DATALA_ST(0x2d0, 0xf), /* MEM_INST_RETIRED.STORE */
+ INTEL_FLAGS_UEVENT_CONSTRAINT_DATALA_LD(0x11d0, 0xf), /* MEM_INST_RETIRED.STLB_MISS_LOADS */
+ INTEL_FLAGS_UEVENT_CONSTRAINT_DATALA_ST(0x12d0, 0xf), /* MEM_INST_RETIRED.STLB_MISS_STORES */
+ INTEL_FLAGS_UEVENT_CONSTRAINT_DATALA_LD(0x21d0, 0xf), /* MEM_INST_RETIRED.LOCK_LOADS */
+ INTEL_FLAGS_UEVENT_CONSTRAINT_DATALA_LD(0x41d0, 0xf), /* MEM_INST_RETIRED.SPLIT_LOADS */
+ INTEL_FLAGS_UEVENT_CONSTRAINT_DATALA_ST(0x42d0, 0xf), /* MEM_INST_RETIRED.SPLIT_STORES */
+ INTEL_FLAGS_UEVENT_CONSTRAINT_DATALA_LD(0x81d0, 0xf), /* MEM_INST_RETIRED.ALL_LOADS */
+ INTEL_FLAGS_UEVENT_CONSTRAINT_DATALA_ST(0x82d0, 0xf), /* MEM_INST_RETIRED.ALL_STORES */
INTEL_FLAGS_EVENT_CONSTRAINT_DATALA_LD_RANGE(0xd1, 0xd4, 0xf), /* MEM_LOAD_*_RETIRED.* */
@@ -950,8 +1009,13 @@ struct event_constraint intel_spr_pebs_event_constraints[] = {
INTEL_FLAGS_EVENT_CONSTRAINT(0xc0, 0xfe),
INTEL_PLD_CONSTRAINT(0x1cd, 0xfe),
INTEL_PSD_CONSTRAINT(0x2cd, 0x1),
- INTEL_FLAGS_UEVENT_CONSTRAINT_DATALA_LD(0x1d0, 0xf),
- INTEL_FLAGS_UEVENT_CONSTRAINT_DATALA_ST(0x2d0, 0xf),
+ INTEL_FLAGS_UEVENT_CONSTRAINT_DATALA_LD(0x11d0, 0xf), /* MEM_INST_RETIRED.STLB_MISS_LOADS */
+ INTEL_FLAGS_UEVENT_CONSTRAINT_DATALA_ST(0x12d0, 0xf), /* MEM_INST_RETIRED.STLB_MISS_STORES */
+ INTEL_FLAGS_UEVENT_CONSTRAINT_DATALA_LD(0x21d0, 0xf), /* MEM_INST_RETIRED.LOCK_LOADS */
+ INTEL_FLAGS_UEVENT_CONSTRAINT_DATALA_LD(0x41d0, 0xf), /* MEM_INST_RETIRED.SPLIT_LOADS */
+ INTEL_FLAGS_UEVENT_CONSTRAINT_DATALA_ST(0x42d0, 0xf), /* MEM_INST_RETIRED.SPLIT_STORES */
+ INTEL_FLAGS_UEVENT_CONSTRAINT_DATALA_LD(0x81d0, 0xf), /* MEM_INST_RETIRED.ALL_LOADS */
+ INTEL_FLAGS_UEVENT_CONSTRAINT_DATALA_ST(0x82d0, 0xf), /* MEM_INST_RETIRED.ALL_STORES */
INTEL_FLAGS_EVENT_CONSTRAINT_DATALA_LD_RANGE(0xd1, 0xd4, 0xf),
@@ -1203,7 +1267,10 @@ static void intel_pmu_pebs_via_pt_enable(struct perf_event *event)
if (hwc->idx >= INTEL_PMC_IDX_FIXED) {
base = MSR_RELOAD_FIXED_CTR0;
idx = hwc->idx - INTEL_PMC_IDX_FIXED;
- value = ds->pebs_event_reset[MAX_PEBS_EVENTS + idx];
+ if (x86_pmu.intel_cap.pebs_format < 5)
+ value = ds->pebs_event_reset[MAX_PEBS_EVENTS_FMT4 + idx];
+ else
+ value = ds->pebs_event_reset[MAX_PEBS_EVENTS + idx];
}
wrmsrl(base + idx, value);
}
@@ -1232,8 +1299,12 @@ void intel_pmu_pebs_enable(struct perf_event *event)
}
}
- if (idx >= INTEL_PMC_IDX_FIXED)
- idx = MAX_PEBS_EVENTS + (idx - INTEL_PMC_IDX_FIXED);
+ if (idx >= INTEL_PMC_IDX_FIXED) {
+ if (x86_pmu.intel_cap.pebs_format < 5)
+ idx = MAX_PEBS_EVENTS_FMT4 + (idx - INTEL_PMC_IDX_FIXED);
+ else
+ idx = MAX_PEBS_EVENTS + (idx - INTEL_PMC_IDX_FIXED);
+ }
/*
* Use auto-reload if possible to save a MSR write in the PMI.
@@ -1436,9 +1507,11 @@ static u64 get_data_src(struct perf_event *event, u64 aux)
bool fst = fl & (PERF_X86_EVENT_PEBS_ST | PERF_X86_EVENT_PEBS_HSW_PREC);
if (fl & PERF_X86_EVENT_PEBS_LDLAT)
- val = load_latency_data(aux);
+ val = load_latency_data(event, aux);
else if (fl & PERF_X86_EVENT_PEBS_STLAT)
- val = store_latency_data(aux);
+ val = store_latency_data(event, aux);
+ else if (fl & PERF_X86_EVENT_PEBS_LAT_HYBRID)
+ val = x86_pmu.pebs_latency_data(event, aux);
else if (fst && (fl & PERF_X86_EVENT_PEBS_HSW_PREC))
val = precise_datala_hsw(event, aux);
else if (fst)
@@ -1477,14 +1550,18 @@ static void setup_pebs_fixed_sample_data(struct perf_event *event,
/*
* Use latency for weight (only avail with PEBS-LL)
*/
- if (fll && (sample_type & PERF_SAMPLE_WEIGHT_TYPE))
+ if (fll && (sample_type & PERF_SAMPLE_WEIGHT_TYPE)) {
data->weight.full = pebs->lat;
+ data->sample_flags |= PERF_SAMPLE_WEIGHT_TYPE;
+ }
/*
* data.data_src encodes the data source
*/
- if (sample_type & PERF_SAMPLE_DATA_SRC)
+ if (sample_type & PERF_SAMPLE_DATA_SRC) {
data->data_src.val = get_data_src(event, pebs->dse);
+ data->sample_flags |= PERF_SAMPLE_DATA_SRC;
+ }
/*
* We must however always use iregs for the unwinder to stay sane; the
@@ -1492,8 +1569,10 @@ static void setup_pebs_fixed_sample_data(struct perf_event *event,
* previous PMI context or an (I)RET happened between the record and
* PMI.
*/
- if (sample_type & PERF_SAMPLE_CALLCHAIN)
+ if (sample_type & PERF_SAMPLE_CALLCHAIN) {
data->callchain = perf_callchain(event, iregs);
+ data->sample_flags |= PERF_SAMPLE_CALLCHAIN;
+ }
/*
* We use the interrupt regs as a base because the PEBS record does not
@@ -1565,17 +1644,22 @@ static void setup_pebs_fixed_sample_data(struct perf_event *event,
if ((sample_type & PERF_SAMPLE_ADDR_TYPE) &&
- x86_pmu.intel_cap.pebs_format >= 1)
+ x86_pmu.intel_cap.pebs_format >= 1) {
data->addr = pebs->dla;
+ data->sample_flags |= PERF_SAMPLE_ADDR;
+ }
if (x86_pmu.intel_cap.pebs_format >= 2) {
/* Only set the TSX weight when no memory weight. */
- if ((sample_type & PERF_SAMPLE_WEIGHT_TYPE) && !fll)
+ if ((sample_type & PERF_SAMPLE_WEIGHT_TYPE) && !fll) {
data->weight.full = intel_get_tsx_weight(pebs->tsx_tuning);
-
- if (sample_type & PERF_SAMPLE_TRANSACTION)
+ data->sample_flags |= PERF_SAMPLE_WEIGHT_TYPE;
+ }
+ if (sample_type & PERF_SAMPLE_TRANSACTION) {
data->txn = intel_get_tsx_transaction(pebs->tsx_tuning,
pebs->ax);
+ data->sample_flags |= PERF_SAMPLE_TRANSACTION;
+ }
}
/*
@@ -1585,11 +1669,15 @@ static void setup_pebs_fixed_sample_data(struct perf_event *event,
* We can only do this for the default trace clock.
*/
if (x86_pmu.intel_cap.pebs_format >= 3 &&
- event->attr.use_clockid == 0)
+ event->attr.use_clockid == 0) {
data->time = native_sched_clock_from_tsc(pebs->tsc);
+ data->sample_flags |= PERF_SAMPLE_TIME;
+ }
- if (has_branch_stack(event))
+ if (has_branch_stack(event)) {
data->br_stack = &cpuc->lbr_stack;
+ data->sample_flags |= PERF_SAMPLE_BRANCH_STACK;
+ }
}
static void adaptive_pebs_save_regs(struct pt_regs *regs,
@@ -1647,8 +1735,10 @@ static void setup_pebs_adaptive_sample_data(struct perf_event *event,
perf_sample_data_init(data, 0, event->hw.last_period);
data->period = event->hw.last_period;
- if (event->attr.use_clockid == 0)
+ if (event->attr.use_clockid == 0) {
data->time = native_sched_clock_from_tsc(basic->tsc);
+ data->sample_flags |= PERF_SAMPLE_TIME;
+ }
/*
* We must however always use iregs for the unwinder to stay sane; the
@@ -1656,8 +1746,10 @@ static void setup_pebs_adaptive_sample_data(struct perf_event *event,
* previous PMI context or an (I)RET happened between the record and
* PMI.
*/
- if (sample_type & PERF_SAMPLE_CALLCHAIN)
+ if (sample_type & PERF_SAMPLE_CALLCHAIN) {
data->callchain = perf_callchain(event, iregs);
+ data->sample_flags |= PERF_SAMPLE_CALLCHAIN;
+ }
*regs = *iregs;
/* The ip in basic is EventingIP */
@@ -1708,17 +1800,24 @@ static void setup_pebs_adaptive_sample_data(struct perf_event *event,
data->weight.var1_dw = (u32)(weight & PEBS_LATENCY_MASK) ?:
intel_get_tsx_weight(meminfo->tsx_tuning);
}
+ data->sample_flags |= PERF_SAMPLE_WEIGHT_TYPE;
}
- if (sample_type & PERF_SAMPLE_DATA_SRC)
+ if (sample_type & PERF_SAMPLE_DATA_SRC) {
data->data_src.val = get_data_src(event, meminfo->aux);
+ data->sample_flags |= PERF_SAMPLE_DATA_SRC;
+ }
- if (sample_type & PERF_SAMPLE_ADDR_TYPE)
+ if (sample_type & PERF_SAMPLE_ADDR_TYPE) {
data->addr = meminfo->address;
+ data->sample_flags |= PERF_SAMPLE_ADDR;
+ }
- if (sample_type & PERF_SAMPLE_TRANSACTION)
+ if (sample_type & PERF_SAMPLE_TRANSACTION) {
data->txn = intel_get_tsx_transaction(meminfo->tsx_tuning,
gprs ? gprs->ax : 0);
+ data->sample_flags |= PERF_SAMPLE_TRANSACTION;
+ }
}
if (format_size & PEBS_DATACFG_XMMS) {
@@ -1737,6 +1836,7 @@ static void setup_pebs_adaptive_sample_data(struct perf_event *event,
if (has_branch_stack(event)) {
intel_pmu_store_pebs_lbrs(lbr);
data->br_stack = &cpuc->lbr_stack;
+ data->sample_flags |= PERF_SAMPLE_BRANCH_STACK;
}
}
@@ -2204,6 +2304,7 @@ void __init intel_ds_init(void)
break;
case 4:
+ case 5:
x86_pmu.drain_pebs = intel_pmu_drain_pebs_icl;
x86_pmu.pebs_record_size = sizeof(struct pebs_basic);
if (x86_pmu.intel_cap.pebs_baseline) {
@@ -2211,6 +2312,7 @@ void __init intel_ds_init(void)
PERF_SAMPLE_BRANCH_STACK |
PERF_SAMPLE_TIME;
x86_pmu.flags |= PMU_FL_PEBS_ALL;
+ x86_pmu.pebs_capable = ~0ULL;
pebs_qual = "-baseline";
x86_get_pmu(smp_processor_id())->capabilities |= PERF_PMU_CAP_EXTENDED_REGS;
} else {
diff --git a/arch/x86/events/intel/lbr.c b/arch/x86/events/intel/lbr.c
index 8043213b75a5..8259d725054d 100644
--- a/arch/x86/events/intel/lbr.c
+++ b/arch/x86/events/intel/lbr.c
@@ -4,18 +4,9 @@
#include <asm/perf_event.h>
#include <asm/msr.h>
-#include <asm/insn.h>
#include "../perf_event.h"
-static const enum {
- LBR_EIP_FLAGS = 1,
- LBR_TSX = 2,
-} lbr_desc[LBR_FORMAT_MAX_KNOWN + 1] = {
- [LBR_FORMAT_EIP_FLAGS] = LBR_EIP_FLAGS,
- [LBR_FORMAT_EIP_FLAGS2] = LBR_EIP_FLAGS | LBR_TSX,
-};
-
/*
* Intel LBR_SELECT bits
* Intel Vol3a, April 2011, Section 16.7 Table 16-10
@@ -74,65 +65,6 @@ static const enum {
#define LBR_FROM_SIGNEXT_2MSB (BIT_ULL(60) | BIT_ULL(59))
/*
- * x86control flow change classification
- * x86control flow changes include branches, interrupts, traps, faults
- */
-enum {
- X86_BR_NONE = 0, /* unknown */
-
- X86_BR_USER = 1 << 0, /* branch target is user */
- X86_BR_KERNEL = 1 << 1, /* branch target is kernel */
-
- X86_BR_CALL = 1 << 2, /* call */
- X86_BR_RET = 1 << 3, /* return */
- X86_BR_SYSCALL = 1 << 4, /* syscall */
- X86_BR_SYSRET = 1 << 5, /* syscall return */
- X86_BR_INT = 1 << 6, /* sw interrupt */
- X86_BR_IRET = 1 << 7, /* return from interrupt */
- X86_BR_JCC = 1 << 8, /* conditional */
- X86_BR_JMP = 1 << 9, /* jump */
- X86_BR_IRQ = 1 << 10,/* hw interrupt or trap or fault */
- X86_BR_IND_CALL = 1 << 11,/* indirect calls */
- X86_BR_ABORT = 1 << 12,/* transaction abort */
- X86_BR_IN_TX = 1 << 13,/* in transaction */
- X86_BR_NO_TX = 1 << 14,/* not in transaction */
- X86_BR_ZERO_CALL = 1 << 15,/* zero length call */
- X86_BR_CALL_STACK = 1 << 16,/* call stack */
- X86_BR_IND_JMP = 1 << 17,/* indirect jump */
-
- X86_BR_TYPE_SAVE = 1 << 18,/* indicate to save branch type */
-
-};
-
-#define X86_BR_PLM (X86_BR_USER | X86_BR_KERNEL)
-#define X86_BR_ANYTX (X86_BR_NO_TX | X86_BR_IN_TX)
-
-#define X86_BR_ANY \
- (X86_BR_CALL |\
- X86_BR_RET |\
- X86_BR_SYSCALL |\
- X86_BR_SYSRET |\
- X86_BR_INT |\
- X86_BR_IRET |\
- X86_BR_JCC |\
- X86_BR_JMP |\
- X86_BR_IRQ |\
- X86_BR_ABORT |\
- X86_BR_IND_CALL |\
- X86_BR_IND_JMP |\
- X86_BR_ZERO_CALL)
-
-#define X86_BR_ALL (X86_BR_PLM | X86_BR_ANY)
-
-#define X86_BR_ANY_CALL \
- (X86_BR_CALL |\
- X86_BR_IND_CALL |\
- X86_BR_ZERO_CALL |\
- X86_BR_SYSCALL |\
- X86_BR_IRQ |\
- X86_BR_INT)
-
-/*
* Intel LBR_CTL bits
*
* Hardware branch filter for Arch LBR
@@ -243,7 +175,7 @@ void intel_pmu_lbr_reset_64(void)
for (i = 0; i < x86_pmu.lbr_nr; i++) {
wrmsrl(x86_pmu.lbr_from + i, 0);
wrmsrl(x86_pmu.lbr_to + i, 0);
- if (x86_pmu.intel_cap.lbr_format == LBR_FORMAT_INFO)
+ if (x86_pmu.lbr_has_info)
wrmsrl(x86_pmu.lbr_info + i, 0);
}
}
@@ -286,9 +218,9 @@ enum {
};
/*
- * For formats with LBR_TSX flags (e.g. LBR_FORMAT_EIP_FLAGS2), bits 61:62 in
- * MSR_LAST_BRANCH_FROM_x are the TSX flags when TSX is supported, but when
- * TSX is not supported they have no consistent behavior:
+ * For format LBR_FORMAT_EIP_FLAGS2, bits 61:62 in MSR_LAST_BRANCH_FROM_x
+ * are the TSX flags when TSX is supported, but when TSX is not supported
+ * they have no consistent behavior:
*
* - For wrmsr(), bits 61:62 are considered part of the sign extension.
* - For HW updates (branch captures) bits 61:62 are always OFF and are not
@@ -296,7 +228,7 @@ enum {
*
* Therefore, if:
*
- * 1) LBR has TSX format
+ * 1) LBR format LBR_FORMAT_EIP_FLAGS2
* 2) CPU has no TSX support enabled
*
* ... then any value passed to wrmsr() must be sign extended to 63 bits and any
@@ -305,11 +237,10 @@ enum {
*/
static inline bool lbr_from_signext_quirk_needed(void)
{
- int lbr_format = x86_pmu.intel_cap.lbr_format;
bool tsx_support = boot_cpu_has(X86_FEATURE_HLE) ||
boot_cpu_has(X86_FEATURE_RTM);
- return !tsx_support && (lbr_desc[lbr_format] & LBR_TSX);
+ return !tsx_support;
}
static DEFINE_STATIC_KEY_FALSE(lbr_from_quirk_key);
@@ -427,12 +358,12 @@ rdlbr_all(struct lbr_entry *lbr, unsigned int idx, bool need_info)
void intel_pmu_lbr_restore(void *ctx)
{
- bool need_info = x86_pmu.intel_cap.lbr_format == LBR_FORMAT_INFO;
struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
struct x86_perf_task_context *task_ctx = ctx;
- int i;
- unsigned lbr_idx, mask;
+ bool need_info = x86_pmu.lbr_has_info;
u64 tos = task_ctx->tos;
+ unsigned lbr_idx, mask;
+ int i;
mask = x86_pmu.lbr_nr - 1;
for (i = 0; i < task_ctx->valid_lbrs; i++) {
@@ -444,7 +375,7 @@ void intel_pmu_lbr_restore(void *ctx)
lbr_idx = (tos - i) & mask;
wrlbr_from(lbr_idx, 0);
wrlbr_to(lbr_idx, 0);
- if (x86_pmu.intel_cap.lbr_format == LBR_FORMAT_INFO)
+ if (need_info)
wrlbr_info(lbr_idx, 0);
}
@@ -519,9 +450,9 @@ static void __intel_pmu_lbr_restore(void *ctx)
void intel_pmu_lbr_save(void *ctx)
{
- bool need_info = x86_pmu.intel_cap.lbr_format == LBR_FORMAT_INFO;
struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
struct x86_perf_task_context *task_ctx = ctx;
+ bool need_info = x86_pmu.lbr_has_info;
unsigned lbr_idx, mask;
u64 tos;
int i;
@@ -778,6 +709,7 @@ void intel_pmu_lbr_disable_all(void)
void intel_pmu_lbr_read_32(struct cpu_hw_events *cpuc)
{
unsigned long mask = x86_pmu.lbr_nr - 1;
+ struct perf_branch_entry *br = cpuc->lbr_entries;
u64 tos = intel_pmu_lbr_tos();
int i;
@@ -793,15 +725,11 @@ void intel_pmu_lbr_read_32(struct cpu_hw_events *cpuc)
rdmsrl(x86_pmu.lbr_from + lbr_idx, msr_lastbranch.lbr);
- cpuc->lbr_entries[i].from = msr_lastbranch.from;
- cpuc->lbr_entries[i].to = msr_lastbranch.to;
- cpuc->lbr_entries[i].mispred = 0;
- cpuc->lbr_entries[i].predicted = 0;
- cpuc->lbr_entries[i].in_tx = 0;
- cpuc->lbr_entries[i].abort = 0;
- cpuc->lbr_entries[i].cycles = 0;
- cpuc->lbr_entries[i].type = 0;
- cpuc->lbr_entries[i].reserved = 0;
+ perf_clear_branch_entry_bitfields(br);
+
+ br->from = msr_lastbranch.from;
+ br->to = msr_lastbranch.to;
+ br++;
}
cpuc->lbr_stack.nr = i;
cpuc->lbr_stack.hw_idx = tos;
@@ -816,7 +744,7 @@ void intel_pmu_lbr_read_64(struct cpu_hw_events *cpuc)
{
bool need_info = false, call_stack = false;
unsigned long mask = x86_pmu.lbr_nr - 1;
- int lbr_format = x86_pmu.intel_cap.lbr_format;
+ struct perf_branch_entry *br = cpuc->lbr_entries;
u64 tos = intel_pmu_lbr_tos();
int i;
int out = 0;
@@ -831,9 +759,7 @@ void intel_pmu_lbr_read_64(struct cpu_hw_events *cpuc)
for (i = 0; i < num; i++) {
unsigned long lbr_idx = (tos - i) & mask;
u64 from, to, mis = 0, pred = 0, in_tx = 0, abort = 0;
- int skip = 0;
u16 cycles = 0;
- int lbr_flags = lbr_desc[lbr_format];
from = rdlbr_from(lbr_idx, NULL);
to = rdlbr_to(lbr_idx, NULL);
@@ -845,37 +771,39 @@ void intel_pmu_lbr_read_64(struct cpu_hw_events *cpuc)
if (call_stack && !from)
break;
- if (lbr_format == LBR_FORMAT_INFO && need_info) {
- u64 info;
-
- info = rdlbr_info(lbr_idx, NULL);
- mis = !!(info & LBR_INFO_MISPRED);
- pred = !mis;
- in_tx = !!(info & LBR_INFO_IN_TX);
- abort = !!(info & LBR_INFO_ABORT);
- cycles = (info & LBR_INFO_CYCLES);
- }
-
- if (lbr_format == LBR_FORMAT_TIME) {
- mis = !!(from & LBR_FROM_FLAG_MISPRED);
- pred = !mis;
- skip = 1;
- cycles = ((to >> 48) & LBR_INFO_CYCLES);
-
- to = (u64)((((s64)to) << 16) >> 16);
- }
-
- if (lbr_flags & LBR_EIP_FLAGS) {
- mis = !!(from & LBR_FROM_FLAG_MISPRED);
- pred = !mis;
- skip = 1;
- }
- if (lbr_flags & LBR_TSX) {
- in_tx = !!(from & LBR_FROM_FLAG_IN_TX);
- abort = !!(from & LBR_FROM_FLAG_ABORT);
- skip = 3;
+ if (x86_pmu.lbr_has_info) {
+ if (need_info) {
+ u64 info;
+
+ info = rdlbr_info(lbr_idx, NULL);
+ mis = !!(info & LBR_INFO_MISPRED);
+ pred = !mis;
+ cycles = (info & LBR_INFO_CYCLES);
+ if (x86_pmu.lbr_has_tsx) {
+ in_tx = !!(info & LBR_INFO_IN_TX);
+ abort = !!(info & LBR_INFO_ABORT);
+ }
+ }
+ } else {
+ int skip = 0;
+
+ if (x86_pmu.lbr_from_flags) {
+ mis = !!(from & LBR_FROM_FLAG_MISPRED);
+ pred = !mis;
+ skip = 1;
+ }
+ if (x86_pmu.lbr_has_tsx) {
+ in_tx = !!(from & LBR_FROM_FLAG_IN_TX);
+ abort = !!(from & LBR_FROM_FLAG_ABORT);
+ skip = 3;
+ }
+ from = (u64)((((s64)from) << skip) >> skip);
+
+ if (x86_pmu.lbr_to_cycles) {
+ cycles = ((to >> 48) & LBR_INFO_CYCLES);
+ to = (u64)((((s64)to) << 16) >> 16);
+ }
}
- from = (u64)((((s64)from) << skip) >> skip);
/*
* Some CPUs report duplicated abort records,
@@ -888,52 +816,54 @@ void intel_pmu_lbr_read_64(struct cpu_hw_events *cpuc)
if (abort && x86_pmu.lbr_double_abort && out > 0)
out--;
- cpuc->lbr_entries[out].from = from;
- cpuc->lbr_entries[out].to = to;
- cpuc->lbr_entries[out].mispred = mis;
- cpuc->lbr_entries[out].predicted = pred;
- cpuc->lbr_entries[out].in_tx = in_tx;
- cpuc->lbr_entries[out].abort = abort;
- cpuc->lbr_entries[out].cycles = cycles;
- cpuc->lbr_entries[out].type = 0;
- cpuc->lbr_entries[out].reserved = 0;
+ perf_clear_branch_entry_bitfields(br+out);
+ br[out].from = from;
+ br[out].to = to;
+ br[out].mispred = mis;
+ br[out].predicted = pred;
+ br[out].in_tx = in_tx;
+ br[out].abort = abort;
+ br[out].cycles = cycles;
out++;
}
cpuc->lbr_stack.nr = out;
cpuc->lbr_stack.hw_idx = tos;
}
+static DEFINE_STATIC_KEY_FALSE(x86_lbr_mispred);
+static DEFINE_STATIC_KEY_FALSE(x86_lbr_cycles);
+static DEFINE_STATIC_KEY_FALSE(x86_lbr_type);
+
static __always_inline int get_lbr_br_type(u64 info)
{
- if (!static_cpu_has(X86_FEATURE_ARCH_LBR) || !x86_pmu.lbr_br_type)
- return 0;
+ int type = 0;
+
+ if (static_branch_likely(&x86_lbr_type))
+ type = (info & LBR_INFO_BR_TYPE) >> LBR_INFO_BR_TYPE_OFFSET;
- return (info & LBR_INFO_BR_TYPE) >> LBR_INFO_BR_TYPE_OFFSET;
+ return type;
}
static __always_inline bool get_lbr_mispred(u64 info)
{
- if (static_cpu_has(X86_FEATURE_ARCH_LBR) && !x86_pmu.lbr_mispred)
- return 0;
+ bool mispred = 0;
- return !!(info & LBR_INFO_MISPRED);
-}
-
-static __always_inline bool get_lbr_predicted(u64 info)
-{
- if (static_cpu_has(X86_FEATURE_ARCH_LBR) && !x86_pmu.lbr_mispred)
- return 0;
+ if (static_branch_likely(&x86_lbr_mispred))
+ mispred = !!(info & LBR_INFO_MISPRED);
- return !(info & LBR_INFO_MISPRED);
+ return mispred;
}
static __always_inline u16 get_lbr_cycles(u64 info)
{
+ u16 cycles = info & LBR_INFO_CYCLES;
+
if (static_cpu_has(X86_FEATURE_ARCH_LBR) &&
- !(x86_pmu.lbr_timed_lbr && info & LBR_INFO_CYC_CNT_VALID))
- return 0;
+ (!static_branch_likely(&x86_lbr_cycles) ||
+ !(info & LBR_INFO_CYC_CNT_VALID)))
+ cycles = 0;
- return info & LBR_INFO_CYCLES;
+ return cycles;
}
static void intel_pmu_store_lbr(struct cpu_hw_events *cpuc,
@@ -958,15 +888,16 @@ static void intel_pmu_store_lbr(struct cpu_hw_events *cpuc,
to = rdlbr_to(i, lbr);
info = rdlbr_info(i, lbr);
+ perf_clear_branch_entry_bitfields(e);
+
e->from = from;
e->to = to;
e->mispred = get_lbr_mispred(info);
- e->predicted = get_lbr_predicted(info);
+ e->predicted = !e->mispred;
e->in_tx = !!(info & LBR_INFO_IN_TX);
e->abort = !!(info & LBR_INFO_ABORT);
e->cycles = get_lbr_cycles(info);
e->type = get_lbr_br_type(info);
- e->reserved = 0;
}
cpuc->lbr_stack.nr = i;
@@ -1106,6 +1037,14 @@ static int intel_pmu_setup_hw_lbr_filter(struct perf_event *event)
if (static_cpu_has(X86_FEATURE_ARCH_LBR)) {
reg->config = mask;
+
+ /*
+ * The Arch LBR HW can retrieve the common branch types
+ * from the LBR_INFO. It doesn't require the high overhead
+ * SW disassemble.
+ * Enable the branch type by default for the Arch LBR.
+ */
+ reg->reg |= X86_BR_TYPE_SAVE;
return 0;
}
@@ -1120,7 +1059,7 @@ static int intel_pmu_setup_hw_lbr_filter(struct perf_event *event)
if ((br_type & PERF_SAMPLE_BRANCH_NO_CYCLES) &&
(br_type & PERF_SAMPLE_BRANCH_NO_FLAGS) &&
- (x86_pmu.intel_cap.lbr_format == LBR_FORMAT_INFO))
+ x86_pmu.lbr_has_info)
reg->config |= LBR_NO_INFO;
return 0;
@@ -1152,219 +1091,6 @@ int intel_pmu_setup_lbr_filter(struct perf_event *event)
return ret;
}
-/*
- * return the type of control flow change at address "from"
- * instruction is not necessarily a branch (in case of interrupt).
- *
- * The branch type returned also includes the priv level of the
- * target of the control flow change (X86_BR_USER, X86_BR_KERNEL).
- *
- * If a branch type is unknown OR the instruction cannot be
- * decoded (e.g., text page not present), then X86_BR_NONE is
- * returned.
- */
-static int branch_type(unsigned long from, unsigned long to, int abort)
-{
- struct insn insn;
- void *addr;
- int bytes_read, bytes_left;
- int ret = X86_BR_NONE;
- int ext, to_plm, from_plm;
- u8 buf[MAX_INSN_SIZE];
- int is64 = 0;
-
- to_plm = kernel_ip(to) ? X86_BR_KERNEL : X86_BR_USER;
- from_plm = kernel_ip(from) ? X86_BR_KERNEL : X86_BR_USER;
-
- /*
- * maybe zero if lbr did not fill up after a reset by the time
- * we get a PMU interrupt
- */
- if (from == 0 || to == 0)
- return X86_BR_NONE;
-
- if (abort)
- return X86_BR_ABORT | to_plm;
-
- if (from_plm == X86_BR_USER) {
- /*
- * can happen if measuring at the user level only
- * and we interrupt in a kernel thread, e.g., idle.
- */
- if (!current->mm)
- return X86_BR_NONE;
-
- /* may fail if text not present */
- bytes_left = copy_from_user_nmi(buf, (void __user *)from,
- MAX_INSN_SIZE);
- bytes_read = MAX_INSN_SIZE - bytes_left;
- if (!bytes_read)
- return X86_BR_NONE;
-
- addr = buf;
- } else {
- /*
- * The LBR logs any address in the IP, even if the IP just
- * faulted. This means userspace can control the from address.
- * Ensure we don't blindly read any address by validating it is
- * a known text address.
- */
- if (kernel_text_address(from)) {
- addr = (void *)from;
- /*
- * Assume we can get the maximum possible size
- * when grabbing kernel data. This is not
- * _strictly_ true since we could possibly be
- * executing up next to a memory hole, but
- * it is very unlikely to be a problem.
- */
- bytes_read = MAX_INSN_SIZE;
- } else {
- return X86_BR_NONE;
- }
- }
-
- /*
- * decoder needs to know the ABI especially
- * on 64-bit systems running 32-bit apps
- */
-#ifdef CONFIG_X86_64
- is64 = kernel_ip((unsigned long)addr) || any_64bit_mode(current_pt_regs());
-#endif
- insn_init(&insn, addr, bytes_read, is64);
- if (insn_get_opcode(&insn))
- return X86_BR_ABORT;
-
- switch (insn.opcode.bytes[0]) {
- case 0xf:
- switch (insn.opcode.bytes[1]) {
- case 0x05: /* syscall */
- case 0x34: /* sysenter */
- ret = X86_BR_SYSCALL;
- break;
- case 0x07: /* sysret */
- case 0x35: /* sysexit */
- ret = X86_BR_SYSRET;
- break;
- case 0x80 ... 0x8f: /* conditional */
- ret = X86_BR_JCC;
- break;
- default:
- ret = X86_BR_NONE;
- }
- break;
- case 0x70 ... 0x7f: /* conditional */
- ret = X86_BR_JCC;
- break;
- case 0xc2: /* near ret */
- case 0xc3: /* near ret */
- case 0xca: /* far ret */
- case 0xcb: /* far ret */
- ret = X86_BR_RET;
- break;
- case 0xcf: /* iret */
- ret = X86_BR_IRET;
- break;
- case 0xcc ... 0xce: /* int */
- ret = X86_BR_INT;
- break;
- case 0xe8: /* call near rel */
- if (insn_get_immediate(&insn) || insn.immediate1.value == 0) {
- /* zero length call */
- ret = X86_BR_ZERO_CALL;
- break;
- }
- fallthrough;
- case 0x9a: /* call far absolute */
- ret = X86_BR_CALL;
- break;
- case 0xe0 ... 0xe3: /* loop jmp */
- ret = X86_BR_JCC;
- break;
- case 0xe9 ... 0xeb: /* jmp */
- ret = X86_BR_JMP;
- break;
- case 0xff: /* call near absolute, call far absolute ind */
- if (insn_get_modrm(&insn))
- return X86_BR_ABORT;
-
- ext = (insn.modrm.bytes[0] >> 3) & 0x7;
- switch (ext) {
- case 2: /* near ind call */
- case 3: /* far ind call */
- ret = X86_BR_IND_CALL;
- break;
- case 4:
- case 5:
- ret = X86_BR_IND_JMP;
- break;
- }
- break;
- default:
- ret = X86_BR_NONE;
- }
- /*
- * interrupts, traps, faults (and thus ring transition) may
- * occur on any instructions. Thus, to classify them correctly,
- * we need to first look at the from and to priv levels. If they
- * are different and to is in the kernel, then it indicates
- * a ring transition. If the from instruction is not a ring
- * transition instr (syscall, systenter, int), then it means
- * it was a irq, trap or fault.
- *
- * we have no way of detecting kernel to kernel faults.
- */
- if (from_plm == X86_BR_USER && to_plm == X86_BR_KERNEL
- && ret != X86_BR_SYSCALL && ret != X86_BR_INT)
- ret = X86_BR_IRQ;
-
- /*
- * branch priv level determined by target as
- * is done by HW when LBR_SELECT is implemented
- */
- if (ret != X86_BR_NONE)
- ret |= to_plm;
-
- return ret;
-}
-
-#define X86_BR_TYPE_MAP_MAX 16
-
-static int branch_map[X86_BR_TYPE_MAP_MAX] = {
- PERF_BR_CALL, /* X86_BR_CALL */
- PERF_BR_RET, /* X86_BR_RET */
- PERF_BR_SYSCALL, /* X86_BR_SYSCALL */
- PERF_BR_SYSRET, /* X86_BR_SYSRET */
- PERF_BR_UNKNOWN, /* X86_BR_INT */
- PERF_BR_UNKNOWN, /* X86_BR_IRET */
- PERF_BR_COND, /* X86_BR_JCC */
- PERF_BR_UNCOND, /* X86_BR_JMP */
- PERF_BR_UNKNOWN, /* X86_BR_IRQ */
- PERF_BR_IND_CALL, /* X86_BR_IND_CALL */
- PERF_BR_UNKNOWN, /* X86_BR_ABORT */
- PERF_BR_UNKNOWN, /* X86_BR_IN_TX */
- PERF_BR_UNKNOWN, /* X86_BR_NO_TX */
- PERF_BR_CALL, /* X86_BR_ZERO_CALL */
- PERF_BR_UNKNOWN, /* X86_BR_CALL_STACK */
- PERF_BR_IND, /* X86_BR_IND_JMP */
-};
-
-static int
-common_branch_type(int type)
-{
- int i;
-
- type >>= 2; /* skip X86_BR_USER and X86_BR_KERNEL */
-
- if (type) {
- i = __ffs(type);
- if (i < X86_BR_TYPE_MAP_MAX)
- return branch_map[i];
- }
-
- return PERF_BR_UNKNOWN;
-}
-
enum {
ARCH_LBR_BR_TYPE_JCC = 0,
ARCH_LBR_BR_TYPE_NEAR_IND_JMP = 1,
@@ -1618,9 +1344,6 @@ void intel_pmu_lbr_init_hsw(void)
x86_pmu.lbr_sel_map = hsw_lbr_sel_map;
x86_get_pmu(smp_processor_id())->task_ctx_cache = create_lbr_kmem_cache(size, 0);
-
- if (lbr_from_signext_quirk_needed())
- static_branch_enable(&lbr_from_quirk_key);
}
/* skylake */
@@ -1706,6 +1429,42 @@ void intel_pmu_lbr_init_knl(void)
x86_pmu.intel_cap.lbr_format = LBR_FORMAT_EIP_FLAGS;
}
+void intel_pmu_lbr_init(void)
+{
+ switch (x86_pmu.intel_cap.lbr_format) {
+ case LBR_FORMAT_EIP_FLAGS2:
+ x86_pmu.lbr_has_tsx = 1;
+ x86_pmu.lbr_from_flags = 1;
+ if (lbr_from_signext_quirk_needed())
+ static_branch_enable(&lbr_from_quirk_key);
+ break;
+
+ case LBR_FORMAT_EIP_FLAGS:
+ x86_pmu.lbr_from_flags = 1;
+ break;
+
+ case LBR_FORMAT_INFO:
+ x86_pmu.lbr_has_tsx = 1;
+ fallthrough;
+ case LBR_FORMAT_INFO2:
+ x86_pmu.lbr_has_info = 1;
+ break;
+
+ case LBR_FORMAT_TIME:
+ x86_pmu.lbr_from_flags = 1;
+ x86_pmu.lbr_to_cycles = 1;
+ break;
+ }
+
+ if (x86_pmu.lbr_has_info) {
+ /*
+ * Only used in combination with baseline pebs.
+ */
+ static_branch_enable(&x86_lbr_mispred);
+ static_branch_enable(&x86_lbr_cycles);
+ }
+}
+
/*
* LBR state size is variable based on the max number of registers.
* This calculates the expected state size, which should match
@@ -1726,6 +1485,9 @@ static bool is_arch_lbr_xsave_available(void)
* Check the LBR state with the corresponding software structure.
* Disable LBR XSAVES support if the size doesn't match.
*/
+ if (xfeature_size(XFEATURE_LBR) == 0)
+ return false;
+
if (WARN_ON(xfeature_size(XFEATURE_LBR) != get_lbr_state_size()))
return false;
@@ -1765,6 +1527,12 @@ void __init intel_pmu_arch_lbr_init(void)
x86_pmu.lbr_br_type = ecx.split.lbr_br_type;
x86_pmu.lbr_nr = lbr_nr;
+ if (x86_pmu.lbr_mispred)
+ static_branch_enable(&x86_lbr_mispred);
+ if (x86_pmu.lbr_timed_lbr)
+ static_branch_enable(&x86_lbr_cycles);
+ if (x86_pmu.lbr_br_type)
+ static_branch_enable(&x86_lbr_type);
arch_lbr_xsave = is_arch_lbr_xsave_available();
if (arch_lbr_xsave) {
@@ -1828,7 +1596,7 @@ void __init intel_pmu_arch_lbr_init(void)
return;
clear_arch_lbr:
- clear_cpu_cap(&boot_cpu_data, X86_FEATURE_ARCH_LBR);
+ setup_clear_cpu_cap(X86_FEATURE_ARCH_LBR);
}
/**
diff --git a/arch/x86/events/intel/p4.c b/arch/x86/events/intel/p4.c
index 7951a5dc73b6..03bbcc2fa2ff 100644
--- a/arch/x86/events/intel/p4.c
+++ b/arch/x86/events/intel/p4.c
@@ -1006,6 +1006,29 @@ static void p4_pmu_enable_all(int added)
}
}
+static int p4_pmu_set_period(struct perf_event *event)
+{
+ struct hw_perf_event *hwc = &event->hw;
+ s64 left = this_cpu_read(pmc_prev_left[hwc->idx]);
+ int ret;
+
+ ret = x86_perf_event_set_period(event);
+
+ if (hwc->event_base) {
+ /*
+ * This handles erratum N15 in intel doc 249199-029,
+ * the counter may not be updated correctly on write
+ * so we need a second write operation to do the trick
+ * (the official workaround didn't work)
+ *
+ * the former idea is taken from OProfile code
+ */
+ wrmsrl(hwc->event_base, (u64)(-left) & x86_pmu.cntval_mask);
+ }
+
+ return ret;
+}
+
static int p4_pmu_handle_irq(struct pt_regs *regs)
{
struct perf_sample_data data;
@@ -1044,7 +1067,7 @@ static int p4_pmu_handle_irq(struct pt_regs *regs)
/* event overflow for sure */
perf_sample_data_init(&data, 0, hwc->last_period);
- if (!x86_perf_event_set_period(event))
+ if (!static_call(x86_pmu_set_period)(event))
continue;
@@ -1316,6 +1339,9 @@ static __initconst const struct x86_pmu p4_pmu = {
.enable_all = p4_pmu_enable_all,
.enable = p4_pmu_enable_event,
.disable = p4_pmu_disable_event,
+
+ .set_period = p4_pmu_set_period,
+
.eventsel = MSR_P4_BPU_CCCR0,
.perfctr = MSR_P4_BPU_PERFCTR0,
.event_map = p4_pmu_event_map,
@@ -1334,15 +1360,6 @@ static __initconst const struct x86_pmu p4_pmu = {
.max_period = (1ULL << (ARCH_P4_CNTRVAL_BITS - 1)) - 1,
.hw_config = p4_hw_config,
.schedule_events = p4_pmu_schedule_events,
- /*
- * This handles erratum N15 in intel doc 249199-029,
- * the counter may not be updated correctly on write
- * so we need a second write operation to do the trick
- * (the official workaround didn't work)
- *
- * the former idea is taken from OProfile code
- */
- .perfctr_second_write = 1,
.format_attrs = intel_p4_formats_attr,
};
diff --git a/arch/x86/events/intel/pt.c b/arch/x86/events/intel/pt.c
index 7f406c14715f..82ef87e9a897 100644
--- a/arch/x86/events/intel/pt.c
+++ b/arch/x86/events/intel/pt.c
@@ -13,6 +13,8 @@
#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
#include <linux/types.h>
+#include <linux/bits.h>
+#include <linux/limits.h>
#include <linux/slab.h>
#include <linux/device.h>
@@ -57,6 +59,8 @@ static struct pt_cap_desc {
PT_CAP(mtc, 0, CPUID_EBX, BIT(3)),
PT_CAP(ptwrite, 0, CPUID_EBX, BIT(4)),
PT_CAP(power_event_trace, 0, CPUID_EBX, BIT(5)),
+ PT_CAP(event_trace, 0, CPUID_EBX, BIT(7)),
+ PT_CAP(tnt_disable, 0, CPUID_EBX, BIT(8)),
PT_CAP(topa_output, 0, CPUID_ECX, BIT(0)),
PT_CAP(topa_multiple_entries, 0, CPUID_ECX, BIT(1)),
PT_CAP(single_range_output, 0, CPUID_ECX, BIT(2)),
@@ -108,6 +112,8 @@ PMU_FORMAT_ATTR(tsc, "config:10" );
PMU_FORMAT_ATTR(noretcomp, "config:11" );
PMU_FORMAT_ATTR(ptw, "config:12" );
PMU_FORMAT_ATTR(branch, "config:13" );
+PMU_FORMAT_ATTR(event, "config:31" );
+PMU_FORMAT_ATTR(notnt, "config:55" );
PMU_FORMAT_ATTR(mtc_period, "config:14-17" );
PMU_FORMAT_ATTR(cyc_thresh, "config:19-22" );
PMU_FORMAT_ATTR(psb_period, "config:24-27" );
@@ -116,6 +122,8 @@ static struct attribute *pt_formats_attr[] = {
&format_attr_pt.attr,
&format_attr_cyc.attr,
&format_attr_pwr_evt.attr,
+ &format_attr_event.attr,
+ &format_attr_notnt.attr,
&format_attr_fup_on_ptw.attr,
&format_attr_mtc.attr,
&format_attr_tsc.attr,
@@ -296,6 +304,8 @@ fail:
RTIT_CTL_CYC_PSB | \
RTIT_CTL_MTC | \
RTIT_CTL_PWR_EVT_EN | \
+ RTIT_CTL_EVENT_EN | \
+ RTIT_CTL_NOTNT | \
RTIT_CTL_FUP_ON_PTW | \
RTIT_CTL_PTW_EN)
@@ -350,6 +360,14 @@ static bool pt_event_valid(struct perf_event *event)
!intel_pt_validate_hw_cap(PT_CAP_power_event_trace))
return false;
+ if (config & RTIT_CTL_EVENT_EN &&
+ !intel_pt_validate_hw_cap(PT_CAP_event_trace))
+ return false;
+
+ if (config & RTIT_CTL_NOTNT &&
+ !intel_pt_validate_hw_cap(PT_CAP_tnt_disable))
+ return false;
+
if (config & RTIT_CTL_PTW) {
if (!intel_pt_validate_hw_cap(PT_CAP_ptwrite))
return false;
@@ -472,7 +490,7 @@ static u64 pt_config_filters(struct perf_event *event)
pt->filters.filter[range].msr_b = filter->msr_b;
}
- rtit_ctl |= filter->config << pt_address_ranges[range].reg_off;
+ rtit_ctl |= (u64)filter->config << pt_address_ranges[range].reg_off;
}
return rtit_ctl;
@@ -897,8 +915,9 @@ static void pt_handle_status(struct pt *pt)
* means we are already losing data; need to let the decoder
* know.
*/
- if (!intel_pt_validate_hw_cap(PT_CAP_topa_multiple_entries) ||
- buf->output_off == pt_buffer_region_size(buf)) {
+ if (!buf->single &&
+ (!intel_pt_validate_hw_cap(PT_CAP_topa_multiple_entries) ||
+ buf->output_off == pt_buffer_region_size(buf))) {
perf_aux_output_flag(&pt->handle,
PERF_AUX_FLAG_TRUNCATED);
advance++;
@@ -1347,10 +1366,26 @@ static void pt_addr_filters_fini(struct perf_event *event)
event->hw.addr_filters = NULL;
}
-static inline bool valid_kernel_ip(unsigned long ip)
+#ifdef CONFIG_X86_64
+/* Clamp to a canonical address greater-than-or-equal-to the address given */
+static u64 clamp_to_ge_canonical_addr(u64 vaddr, u8 vaddr_bits)
+{
+ return __is_canonical_address(vaddr, vaddr_bits) ?
+ vaddr :
+ -BIT_ULL(vaddr_bits - 1);
+}
+
+/* Clamp to a canonical address less-than-or-equal-to the address given */
+static u64 clamp_to_le_canonical_addr(u64 vaddr, u8 vaddr_bits)
{
- return virt_addr_valid(ip) && kernel_ip(ip);
+ return __is_canonical_address(vaddr, vaddr_bits) ?
+ vaddr :
+ BIT_ULL(vaddr_bits - 1) - 1;
}
+#else
+#define clamp_to_ge_canonical_addr(x, y) (x)
+#define clamp_to_le_canonical_addr(x, y) (x)
+#endif
static int pt_event_addr_filters_validate(struct list_head *filters)
{
@@ -1366,14 +1401,6 @@ static int pt_event_addr_filters_validate(struct list_head *filters)
filter->action == PERF_ADDR_FILTER_ACTION_START)
return -EOPNOTSUPP;
- if (!filter->path.dentry) {
- if (!valid_kernel_ip(filter->offset))
- return -EINVAL;
-
- if (!valid_kernel_ip(filter->offset + filter->size))
- return -EINVAL;
- }
-
if (++range > intel_pt_validate_hw_cap(PT_CAP_num_address_ranges))
return -EOPNOTSUPP;
}
@@ -1397,9 +1424,26 @@ static void pt_event_addr_filters_sync(struct perf_event *event)
if (filter->path.dentry && !fr[range].start) {
msr_a = msr_b = 0;
} else {
- /* apply the offset */
- msr_a = fr[range].start;
- msr_b = msr_a + fr[range].size - 1;
+ unsigned long n = fr[range].size - 1;
+ unsigned long a = fr[range].start;
+ unsigned long b;
+
+ if (a > ULONG_MAX - n)
+ b = ULONG_MAX;
+ else
+ b = a + n;
+ /*
+ * Apply the offset. 64-bit addresses written to the
+ * MSRs must be canonical, but the range can encompass
+ * non-canonical addresses. Since software cannot
+ * execute at non-canonical addresses, adjusting to
+ * canonical addresses does not affect the result of the
+ * address filter.
+ */
+ msr_a = clamp_to_ge_canonical_addr(a, boot_cpu_data.x86_virt_bits);
+ msr_b = clamp_to_le_canonical_addr(b, boot_cpu_data.x86_virt_bits);
+ if (msr_b < msr_a)
+ msr_a = msr_b = 0;
}
filters->filter[range].msr_a = msr_a;
diff --git a/arch/x86/events/intel/uncore.c b/arch/x86/events/intel/uncore.c
index f1ba6ab2e97e..6f1ccc57a692 100644
--- a/arch/x86/events/intel/uncore.c
+++ b/arch/x86/events/intel/uncore.c
@@ -1762,7 +1762,7 @@ static const struct intel_uncore_init_fun rkl_uncore_init __initconst = {
static const struct intel_uncore_init_fun adl_uncore_init __initconst = {
.cpu_init = adl_uncore_cpu_init,
- .mmio_init = tgl_uncore_mmio_init,
+ .mmio_init = adl_uncore_mmio_init,
};
static const struct intel_uncore_init_fun icx_uncore_init __initconst = {
@@ -1828,6 +1828,10 @@ static const struct x86_cpu_id intel_uncore_match[] __initconst = {
X86_MATCH_INTEL_FAM6_MODEL(ROCKETLAKE, &rkl_uncore_init),
X86_MATCH_INTEL_FAM6_MODEL(ALDERLAKE, &adl_uncore_init),
X86_MATCH_INTEL_FAM6_MODEL(ALDERLAKE_L, &adl_uncore_init),
+ X86_MATCH_INTEL_FAM6_MODEL(ALDERLAKE_N, &adl_uncore_init),
+ X86_MATCH_INTEL_FAM6_MODEL(RAPTORLAKE, &adl_uncore_init),
+ X86_MATCH_INTEL_FAM6_MODEL(RAPTORLAKE_P, &adl_uncore_init),
+ X86_MATCH_INTEL_FAM6_MODEL(RAPTORLAKE_S, &adl_uncore_init),
X86_MATCH_INTEL_FAM6_MODEL(SAPPHIRERAPIDS_X, &spr_uncore_init),
X86_MATCH_INTEL_FAM6_MODEL(ATOM_TREMONT_D, &snr_uncore_init),
{},
diff --git a/arch/x86/events/intel/uncore.h b/arch/x86/events/intel/uncore.h
index b9687980aab6..2adeaf4de4df 100644
--- a/arch/x86/events/intel/uncore.h
+++ b/arch/x86/events/intel/uncore.h
@@ -584,10 +584,11 @@ void snb_uncore_cpu_init(void);
void nhm_uncore_cpu_init(void);
void skl_uncore_cpu_init(void);
void icl_uncore_cpu_init(void);
-void adl_uncore_cpu_init(void);
void tgl_uncore_cpu_init(void);
+void adl_uncore_cpu_init(void);
void tgl_uncore_mmio_init(void);
void tgl_l_uncore_mmio_init(void);
+void adl_uncore_mmio_init(void);
int snb_pci2phy_map_init(int devid);
/* uncore_snbep.c */
diff --git a/arch/x86/events/intel/uncore_discovery.c b/arch/x86/events/intel/uncore_discovery.c
index 3049c646fa20..5fd72d4b8bbb 100644
--- a/arch/x86/events/intel/uncore_discovery.c
+++ b/arch/x86/events/intel/uncore_discovery.c
@@ -215,10 +215,18 @@ static int parse_discovery_table(struct pci_dev *dev, int die,
pci_read_config_dword(dev, bar_offset, &val);
- if (val & UNCORE_DISCOVERY_MASK)
+ if (val & ~PCI_BASE_ADDRESS_MEM_MASK & ~PCI_BASE_ADDRESS_MEM_TYPE_64)
return -EINVAL;
- addr = (resource_size_t)(val & ~UNCORE_DISCOVERY_MASK);
+ addr = (resource_size_t)(val & PCI_BASE_ADDRESS_MEM_MASK);
+#ifdef CONFIG_PHYS_ADDR_T_64BIT
+ if ((val & PCI_BASE_ADDRESS_MEM_TYPE_MASK) == PCI_BASE_ADDRESS_MEM_TYPE_64) {
+ u32 val2;
+
+ pci_read_config_dword(dev, bar_offset + 4, &val2);
+ addr |= ((resource_size_t)val2) << 32;
+ }
+#endif
size = UNCORE_DISCOVERY_GLOBAL_MAP_SIZE;
io_addr = ioremap(addr, size);
if (!io_addr)
@@ -444,7 +452,7 @@ static struct intel_uncore_ops generic_uncore_pci_ops = {
#define UNCORE_GENERIC_MMIO_SIZE 0x4000
-static unsigned int generic_uncore_mmio_box_ctl(struct intel_uncore_box *box)
+static u64 generic_uncore_mmio_box_ctl(struct intel_uncore_box *box)
{
struct intel_uncore_type *type = box->pmu->type;
@@ -456,7 +464,7 @@ static unsigned int generic_uncore_mmio_box_ctl(struct intel_uncore_box *box)
void intel_generic_uncore_mmio_init_box(struct intel_uncore_box *box)
{
- unsigned int box_ctl = generic_uncore_mmio_box_ctl(box);
+ u64 box_ctl = generic_uncore_mmio_box_ctl(box);
struct intel_uncore_type *type = box->pmu->type;
resource_size_t addr;
@@ -494,8 +502,8 @@ void intel_generic_uncore_mmio_enable_box(struct intel_uncore_box *box)
writel(0, box->io_addr);
}
-static void intel_generic_uncore_mmio_enable_event(struct intel_uncore_box *box,
- struct perf_event *event)
+void intel_generic_uncore_mmio_enable_event(struct intel_uncore_box *box,
+ struct perf_event *event)
{
struct hw_perf_event *hwc = &event->hw;
diff --git a/arch/x86/events/intel/uncore_discovery.h b/arch/x86/events/intel/uncore_discovery.h
index 6d735611c281..f4439357779a 100644
--- a/arch/x86/events/intel/uncore_discovery.h
+++ b/arch/x86/events/intel/uncore_discovery.h
@@ -18,8 +18,6 @@
#define UNCORE_DISCOVERY_BIR_BASE 0x10
/* Discovery table BAR step */
#define UNCORE_DISCOVERY_BIR_STEP 0x4
-/* Mask of the discovery table offset */
-#define UNCORE_DISCOVERY_MASK 0xf
/* Global discovery table size */
#define UNCORE_DISCOVERY_GLOBAL_MAP_SIZE 0x20
@@ -139,6 +137,8 @@ void intel_generic_uncore_mmio_disable_box(struct intel_uncore_box *box);
void intel_generic_uncore_mmio_enable_box(struct intel_uncore_box *box);
void intel_generic_uncore_mmio_disable_event(struct intel_uncore_box *box,
struct perf_event *event);
+void intel_generic_uncore_mmio_enable_event(struct intel_uncore_box *box,
+ struct perf_event *event);
void intel_generic_uncore_pci_init_box(struct intel_uncore_box *box);
void intel_generic_uncore_pci_disable_box(struct intel_uncore_box *box);
diff --git a/arch/x86/events/intel/uncore_snb.c b/arch/x86/events/intel/uncore_snb.c
index 0f63706cdadf..1ef4f7861e2e 100644
--- a/arch/x86/events/intel/uncore_snb.c
+++ b/arch/x86/events/intel/uncore_snb.c
@@ -1,6 +1,7 @@
// SPDX-License-Identifier: GPL-2.0
/* Nehalem/SandBridge/Haswell/Broadwell/Skylake uncore support */
#include "uncore.h"
+#include "uncore_discovery.h"
/* Uncore IMC PCI IDs */
#define PCI_DEVICE_ID_INTEL_SNB_IMC 0x0100
@@ -64,6 +65,57 @@
#define PCI_DEVICE_ID_INTEL_RKL_2_IMC 0x4c53
#define PCI_DEVICE_ID_INTEL_ADL_1_IMC 0x4660
#define PCI_DEVICE_ID_INTEL_ADL_2_IMC 0x4641
+#define PCI_DEVICE_ID_INTEL_ADL_3_IMC 0x4601
+#define PCI_DEVICE_ID_INTEL_ADL_4_IMC 0x4602
+#define PCI_DEVICE_ID_INTEL_ADL_5_IMC 0x4609
+#define PCI_DEVICE_ID_INTEL_ADL_6_IMC 0x460a
+#define PCI_DEVICE_ID_INTEL_ADL_7_IMC 0x4621
+#define PCI_DEVICE_ID_INTEL_ADL_8_IMC 0x4623
+#define PCI_DEVICE_ID_INTEL_ADL_9_IMC 0x4629
+#define PCI_DEVICE_ID_INTEL_ADL_10_IMC 0x4637
+#define PCI_DEVICE_ID_INTEL_ADL_11_IMC 0x463b
+#define PCI_DEVICE_ID_INTEL_ADL_12_IMC 0x4648
+#define PCI_DEVICE_ID_INTEL_ADL_13_IMC 0x4649
+#define PCI_DEVICE_ID_INTEL_ADL_14_IMC 0x4650
+#define PCI_DEVICE_ID_INTEL_ADL_15_IMC 0x4668
+#define PCI_DEVICE_ID_INTEL_ADL_16_IMC 0x4670
+#define PCI_DEVICE_ID_INTEL_ADL_17_IMC 0x4614
+#define PCI_DEVICE_ID_INTEL_ADL_18_IMC 0x4617
+#define PCI_DEVICE_ID_INTEL_ADL_19_IMC 0x4618
+#define PCI_DEVICE_ID_INTEL_ADL_20_IMC 0x461B
+#define PCI_DEVICE_ID_INTEL_ADL_21_IMC 0x461C
+#define PCI_DEVICE_ID_INTEL_RPL_1_IMC 0xA700
+#define PCI_DEVICE_ID_INTEL_RPL_2_IMC 0xA702
+#define PCI_DEVICE_ID_INTEL_RPL_3_IMC 0xA706
+#define PCI_DEVICE_ID_INTEL_RPL_4_IMC 0xA709
+#define PCI_DEVICE_ID_INTEL_RPL_5_IMC 0xA701
+#define PCI_DEVICE_ID_INTEL_RPL_6_IMC 0xA703
+#define PCI_DEVICE_ID_INTEL_RPL_7_IMC 0xA704
+#define PCI_DEVICE_ID_INTEL_RPL_8_IMC 0xA705
+#define PCI_DEVICE_ID_INTEL_RPL_9_IMC 0xA706
+#define PCI_DEVICE_ID_INTEL_RPL_10_IMC 0xA707
+#define PCI_DEVICE_ID_INTEL_RPL_11_IMC 0xA708
+#define PCI_DEVICE_ID_INTEL_RPL_12_IMC 0xA709
+#define PCI_DEVICE_ID_INTEL_RPL_13_IMC 0xA70a
+#define PCI_DEVICE_ID_INTEL_RPL_14_IMC 0xA70b
+#define PCI_DEVICE_ID_INTEL_RPL_15_IMC 0xA715
+#define PCI_DEVICE_ID_INTEL_RPL_16_IMC 0xA716
+#define PCI_DEVICE_ID_INTEL_RPL_17_IMC 0xA717
+#define PCI_DEVICE_ID_INTEL_RPL_18_IMC 0xA718
+#define PCI_DEVICE_ID_INTEL_RPL_19_IMC 0xA719
+#define PCI_DEVICE_ID_INTEL_RPL_20_IMC 0xA71A
+#define PCI_DEVICE_ID_INTEL_RPL_21_IMC 0xA71B
+#define PCI_DEVICE_ID_INTEL_RPL_22_IMC 0xA71C
+#define PCI_DEVICE_ID_INTEL_RPL_23_IMC 0xA728
+#define PCI_DEVICE_ID_INTEL_RPL_24_IMC 0xA729
+#define PCI_DEVICE_ID_INTEL_RPL_25_IMC 0xA72A
+
+
+#define IMC_UNCORE_DEV(a) \
+{ \
+ PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_##a##_IMC), \
+ .driver_data = UNCORE_PCI_DEV_DATA(SNB_PCI_UNCORE_IMC, 0), \
+}
/* SNB event control */
#define SNB_UNC_CTL_EV_SEL_MASK 0x000000ff
@@ -155,6 +207,7 @@
DEFINE_UNCORE_FORMAT_ATTR(event, event, "config:0-7");
DEFINE_UNCORE_FORMAT_ATTR(umask, umask, "config:8-15");
+DEFINE_UNCORE_FORMAT_ATTR(chmask, chmask, "config:8-11");
DEFINE_UNCORE_FORMAT_ATTR(edge, edge, "config:18");
DEFINE_UNCORE_FORMAT_ATTR(inv, inv, "config:23");
DEFINE_UNCORE_FORMAT_ATTR(cmask5, cmask, "config:24-28");
@@ -788,6 +841,22 @@ int snb_pci2phy_map_init(int devid)
return 0;
}
+static u64 snb_uncore_imc_read_counter(struct intel_uncore_box *box, struct perf_event *event)
+{
+ struct hw_perf_event *hwc = &event->hw;
+
+ /*
+ * SNB IMC counters are 32-bit and are laid out back to back
+ * in MMIO space. Therefore we must use a 32-bit accessor function
+ * using readq() from uncore_mmio_read_counter() causes problems
+ * because it is reading 64-bit at a time. This is okay for the
+ * uncore_perf_event_update() function because it drops the upper
+ * 32-bits but not okay for plain uncore_read_counter() as invoked
+ * in uncore_pmu_event_start().
+ */
+ return (u64)readl(box->io_addr + hwc->event_base);
+}
+
static struct pmu snb_uncore_imc_pmu = {
.task_ctx_nr = perf_invalid_context,
.event_init = snb_uncore_imc_event_init,
@@ -807,7 +876,7 @@ static struct intel_uncore_ops snb_uncore_imc_ops = {
.disable_event = snb_uncore_imc_disable_event,
.enable_event = snb_uncore_imc_enable_event,
.hw_config = snb_uncore_imc_hw_config,
- .read_counter = uncore_mmio_read_counter,
+ .read_counter = snb_uncore_imc_read_counter,
};
static struct intel_uncore_type snb_uncore_imc = {
@@ -829,242 +898,80 @@ static struct intel_uncore_type *snb_pci_uncores[] = {
};
static const struct pci_device_id snb_uncore_pci_ids[] = {
- { /* IMC */
- PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_SNB_IMC),
- .driver_data = UNCORE_PCI_DEV_DATA(SNB_PCI_UNCORE_IMC, 0),
- },
+ IMC_UNCORE_DEV(SNB),
{ /* end: all zeroes */ },
};
static const struct pci_device_id ivb_uncore_pci_ids[] = {
- { /* IMC */
- PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_IVB_IMC),
- .driver_data = UNCORE_PCI_DEV_DATA(SNB_PCI_UNCORE_IMC, 0),
- },
- { /* IMC */
- PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_IVB_E3_IMC),
- .driver_data = UNCORE_PCI_DEV_DATA(SNB_PCI_UNCORE_IMC, 0),
- },
+ IMC_UNCORE_DEV(IVB),
+ IMC_UNCORE_DEV(IVB_E3),
{ /* end: all zeroes */ },
};
static const struct pci_device_id hsw_uncore_pci_ids[] = {
- { /* IMC */
- PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_HSW_IMC),
- .driver_data = UNCORE_PCI_DEV_DATA(SNB_PCI_UNCORE_IMC, 0),
- },
- { /* IMC */
- PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_HSW_U_IMC),
- .driver_data = UNCORE_PCI_DEV_DATA(SNB_PCI_UNCORE_IMC, 0),
- },
+ IMC_UNCORE_DEV(HSW),
+ IMC_UNCORE_DEV(HSW_U),
{ /* end: all zeroes */ },
};
static const struct pci_device_id bdw_uncore_pci_ids[] = {
- { /* IMC */
- PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_BDW_IMC),
- .driver_data = UNCORE_PCI_DEV_DATA(SNB_PCI_UNCORE_IMC, 0),
- },
+ IMC_UNCORE_DEV(BDW),
{ /* end: all zeroes */ },
};
static const struct pci_device_id skl_uncore_pci_ids[] = {
- { /* IMC */
- PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_SKL_Y_IMC),
- .driver_data = UNCORE_PCI_DEV_DATA(SNB_PCI_UNCORE_IMC, 0),
- },
- { /* IMC */
- PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_SKL_U_IMC),
- .driver_data = UNCORE_PCI_DEV_DATA(SNB_PCI_UNCORE_IMC, 0),
- },
- { /* IMC */
- PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_SKL_HD_IMC),
- .driver_data = UNCORE_PCI_DEV_DATA(SNB_PCI_UNCORE_IMC, 0),
- },
- { /* IMC */
- PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_SKL_HQ_IMC),
- .driver_data = UNCORE_PCI_DEV_DATA(SNB_PCI_UNCORE_IMC, 0),
- },
- { /* IMC */
- PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_SKL_SD_IMC),
- .driver_data = UNCORE_PCI_DEV_DATA(SNB_PCI_UNCORE_IMC, 0),
- },
- { /* IMC */
- PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_SKL_SQ_IMC),
- .driver_data = UNCORE_PCI_DEV_DATA(SNB_PCI_UNCORE_IMC, 0),
- },
- { /* IMC */
- PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_SKL_E3_IMC),
- .driver_data = UNCORE_PCI_DEV_DATA(SNB_PCI_UNCORE_IMC, 0),
- },
- { /* IMC */
- PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_KBL_Y_IMC),
- .driver_data = UNCORE_PCI_DEV_DATA(SNB_PCI_UNCORE_IMC, 0),
- },
- { /* IMC */
- PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_KBL_U_IMC),
- .driver_data = UNCORE_PCI_DEV_DATA(SNB_PCI_UNCORE_IMC, 0),
- },
- { /* IMC */
- PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_KBL_UQ_IMC),
- .driver_data = UNCORE_PCI_DEV_DATA(SNB_PCI_UNCORE_IMC, 0),
- },
- { /* IMC */
- PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_KBL_SD_IMC),
- .driver_data = UNCORE_PCI_DEV_DATA(SNB_PCI_UNCORE_IMC, 0),
- },
- { /* IMC */
- PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_KBL_SQ_IMC),
- .driver_data = UNCORE_PCI_DEV_DATA(SNB_PCI_UNCORE_IMC, 0),
- },
- { /* IMC */
- PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_KBL_HQ_IMC),
- .driver_data = UNCORE_PCI_DEV_DATA(SNB_PCI_UNCORE_IMC, 0),
- },
- { /* IMC */
- PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_KBL_WQ_IMC),
- .driver_data = UNCORE_PCI_DEV_DATA(SNB_PCI_UNCORE_IMC, 0),
- },
- { /* IMC */
- PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_CFL_2U_IMC),
- .driver_data = UNCORE_PCI_DEV_DATA(SNB_PCI_UNCORE_IMC, 0),
- },
- { /* IMC */
- PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_CFL_4U_IMC),
- .driver_data = UNCORE_PCI_DEV_DATA(SNB_PCI_UNCORE_IMC, 0),
- },
- { /* IMC */
- PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_CFL_4H_IMC),
- .driver_data = UNCORE_PCI_DEV_DATA(SNB_PCI_UNCORE_IMC, 0),
- },
- { /* IMC */
- PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_CFL_6H_IMC),
- .driver_data = UNCORE_PCI_DEV_DATA(SNB_PCI_UNCORE_IMC, 0),
- },
- { /* IMC */
- PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_CFL_2S_D_IMC),
- .driver_data = UNCORE_PCI_DEV_DATA(SNB_PCI_UNCORE_IMC, 0),
- },
- { /* IMC */
- PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_CFL_4S_D_IMC),
- .driver_data = UNCORE_PCI_DEV_DATA(SNB_PCI_UNCORE_IMC, 0),
- },
- { /* IMC */
- PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_CFL_6S_D_IMC),
- .driver_data = UNCORE_PCI_DEV_DATA(SNB_PCI_UNCORE_IMC, 0),
- },
- { /* IMC */
- PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_CFL_8S_D_IMC),
- .driver_data = UNCORE_PCI_DEV_DATA(SNB_PCI_UNCORE_IMC, 0),
- },
- { /* IMC */
- PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_CFL_4S_W_IMC),
- .driver_data = UNCORE_PCI_DEV_DATA(SNB_PCI_UNCORE_IMC, 0),
- },
- { /* IMC */
- PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_CFL_6S_W_IMC),
- .driver_data = UNCORE_PCI_DEV_DATA(SNB_PCI_UNCORE_IMC, 0),
- },
- { /* IMC */
- PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_CFL_8S_W_IMC),
- .driver_data = UNCORE_PCI_DEV_DATA(SNB_PCI_UNCORE_IMC, 0),
- },
- { /* IMC */
- PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_CFL_4S_S_IMC),
- .driver_data = UNCORE_PCI_DEV_DATA(SNB_PCI_UNCORE_IMC, 0),
- },
- { /* IMC */
- PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_CFL_6S_S_IMC),
- .driver_data = UNCORE_PCI_DEV_DATA(SNB_PCI_UNCORE_IMC, 0),
- },
- { /* IMC */
- PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_CFL_8S_S_IMC),
- .driver_data = UNCORE_PCI_DEV_DATA(SNB_PCI_UNCORE_IMC, 0),
- },
- { /* IMC */
- PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_AML_YD_IMC),
- .driver_data = UNCORE_PCI_DEV_DATA(SNB_PCI_UNCORE_IMC, 0),
- },
- { /* IMC */
- PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_AML_YQ_IMC),
- .driver_data = UNCORE_PCI_DEV_DATA(SNB_PCI_UNCORE_IMC, 0),
- },
- { /* IMC */
- PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_WHL_UQ_IMC),
- .driver_data = UNCORE_PCI_DEV_DATA(SNB_PCI_UNCORE_IMC, 0),
- },
- { /* IMC */
- PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_WHL_4_UQ_IMC),
- .driver_data = UNCORE_PCI_DEV_DATA(SNB_PCI_UNCORE_IMC, 0),
- },
- { /* IMC */
- PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_WHL_UD_IMC),
- .driver_data = UNCORE_PCI_DEV_DATA(SNB_PCI_UNCORE_IMC, 0),
- },
- { /* IMC */
- PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_CML_H1_IMC),
- .driver_data = UNCORE_PCI_DEV_DATA(SNB_PCI_UNCORE_IMC, 0),
- },
- { /* IMC */
- PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_CML_H2_IMC),
- .driver_data = UNCORE_PCI_DEV_DATA(SNB_PCI_UNCORE_IMC, 0),
- },
- { /* IMC */
- PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_CML_H3_IMC),
- .driver_data = UNCORE_PCI_DEV_DATA(SNB_PCI_UNCORE_IMC, 0),
- },
- { /* IMC */
- PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_CML_U1_IMC),
- .driver_data = UNCORE_PCI_DEV_DATA(SNB_PCI_UNCORE_IMC, 0),
- },
- { /* IMC */
- PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_CML_U2_IMC),
- .driver_data = UNCORE_PCI_DEV_DATA(SNB_PCI_UNCORE_IMC, 0),
- },
- { /* IMC */
- PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_CML_U3_IMC),
- .driver_data = UNCORE_PCI_DEV_DATA(SNB_PCI_UNCORE_IMC, 0),
- },
- { /* IMC */
- PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_CML_S1_IMC),
- .driver_data = UNCORE_PCI_DEV_DATA(SNB_PCI_UNCORE_IMC, 0),
- },
- { /* IMC */
- PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_CML_S2_IMC),
- .driver_data = UNCORE_PCI_DEV_DATA(SNB_PCI_UNCORE_IMC, 0),
- },
- { /* IMC */
- PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_CML_S3_IMC),
- .driver_data = UNCORE_PCI_DEV_DATA(SNB_PCI_UNCORE_IMC, 0),
- },
- { /* IMC */
- PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_CML_S4_IMC),
- .driver_data = UNCORE_PCI_DEV_DATA(SNB_PCI_UNCORE_IMC, 0),
- },
- { /* IMC */
- PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_CML_S5_IMC),
- .driver_data = UNCORE_PCI_DEV_DATA(SNB_PCI_UNCORE_IMC, 0),
- },
+ IMC_UNCORE_DEV(SKL_Y),
+ IMC_UNCORE_DEV(SKL_U),
+ IMC_UNCORE_DEV(SKL_HD),
+ IMC_UNCORE_DEV(SKL_HQ),
+ IMC_UNCORE_DEV(SKL_SD),
+ IMC_UNCORE_DEV(SKL_SQ),
+ IMC_UNCORE_DEV(SKL_E3),
+ IMC_UNCORE_DEV(KBL_Y),
+ IMC_UNCORE_DEV(KBL_U),
+ IMC_UNCORE_DEV(KBL_UQ),
+ IMC_UNCORE_DEV(KBL_SD),
+ IMC_UNCORE_DEV(KBL_SQ),
+ IMC_UNCORE_DEV(KBL_HQ),
+ IMC_UNCORE_DEV(KBL_WQ),
+ IMC_UNCORE_DEV(CFL_2U),
+ IMC_UNCORE_DEV(CFL_4U),
+ IMC_UNCORE_DEV(CFL_4H),
+ IMC_UNCORE_DEV(CFL_6H),
+ IMC_UNCORE_DEV(CFL_2S_D),
+ IMC_UNCORE_DEV(CFL_4S_D),
+ IMC_UNCORE_DEV(CFL_6S_D),
+ IMC_UNCORE_DEV(CFL_8S_D),
+ IMC_UNCORE_DEV(CFL_4S_W),
+ IMC_UNCORE_DEV(CFL_6S_W),
+ IMC_UNCORE_DEV(CFL_8S_W),
+ IMC_UNCORE_DEV(CFL_4S_S),
+ IMC_UNCORE_DEV(CFL_6S_S),
+ IMC_UNCORE_DEV(CFL_8S_S),
+ IMC_UNCORE_DEV(AML_YD),
+ IMC_UNCORE_DEV(AML_YQ),
+ IMC_UNCORE_DEV(WHL_UQ),
+ IMC_UNCORE_DEV(WHL_4_UQ),
+ IMC_UNCORE_DEV(WHL_UD),
+ IMC_UNCORE_DEV(CML_H1),
+ IMC_UNCORE_DEV(CML_H2),
+ IMC_UNCORE_DEV(CML_H3),
+ IMC_UNCORE_DEV(CML_U1),
+ IMC_UNCORE_DEV(CML_U2),
+ IMC_UNCORE_DEV(CML_U3),
+ IMC_UNCORE_DEV(CML_S1),
+ IMC_UNCORE_DEV(CML_S2),
+ IMC_UNCORE_DEV(CML_S3),
+ IMC_UNCORE_DEV(CML_S4),
+ IMC_UNCORE_DEV(CML_S5),
{ /* end: all zeroes */ },
};
static const struct pci_device_id icl_uncore_pci_ids[] = {
- { /* IMC */
- PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_ICL_U_IMC),
- .driver_data = UNCORE_PCI_DEV_DATA(SNB_PCI_UNCORE_IMC, 0),
- },
- { /* IMC */
- PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_ICL_U2_IMC),
- .driver_data = UNCORE_PCI_DEV_DATA(SNB_PCI_UNCORE_IMC, 0),
- },
- { /* IMC */
- PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_RKL_1_IMC),
- .driver_data = UNCORE_PCI_DEV_DATA(SNB_PCI_UNCORE_IMC, 0),
- },
- { /* IMC */
- PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_RKL_2_IMC),
- .driver_data = UNCORE_PCI_DEV_DATA(SNB_PCI_UNCORE_IMC, 0),
- },
+ IMC_UNCORE_DEV(ICL_U),
+ IMC_UNCORE_DEV(ICL_U2),
+ IMC_UNCORE_DEV(RKL_1),
+ IMC_UNCORE_DEV(RKL_2),
{ /* end: all zeroes */ },
};
@@ -1306,34 +1213,57 @@ void nhm_uncore_cpu_init(void)
/* Tiger Lake MMIO uncore support */
static const struct pci_device_id tgl_uncore_pci_ids[] = {
- { /* IMC */
- PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_TGL_U1_IMC),
- .driver_data = UNCORE_PCI_DEV_DATA(SNB_PCI_UNCORE_IMC, 0),
- },
- { /* IMC */
- PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_TGL_U2_IMC),
- .driver_data = UNCORE_PCI_DEV_DATA(SNB_PCI_UNCORE_IMC, 0),
- },
- { /* IMC */
- PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_TGL_U3_IMC),
- .driver_data = UNCORE_PCI_DEV_DATA(SNB_PCI_UNCORE_IMC, 0),
- },
- { /* IMC */
- PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_TGL_U4_IMC),
- .driver_data = UNCORE_PCI_DEV_DATA(SNB_PCI_UNCORE_IMC, 0),
- },
- { /* IMC */
- PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_TGL_H_IMC),
- .driver_data = UNCORE_PCI_DEV_DATA(SNB_PCI_UNCORE_IMC, 0),
- },
- { /* IMC */
- PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_ADL_1_IMC),
- .driver_data = UNCORE_PCI_DEV_DATA(SNB_PCI_UNCORE_IMC, 0),
- },
- { /* IMC */
- PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_ADL_2_IMC),
- .driver_data = UNCORE_PCI_DEV_DATA(SNB_PCI_UNCORE_IMC, 0),
- },
+ IMC_UNCORE_DEV(TGL_U1),
+ IMC_UNCORE_DEV(TGL_U2),
+ IMC_UNCORE_DEV(TGL_U3),
+ IMC_UNCORE_DEV(TGL_U4),
+ IMC_UNCORE_DEV(TGL_H),
+ IMC_UNCORE_DEV(ADL_1),
+ IMC_UNCORE_DEV(ADL_2),
+ IMC_UNCORE_DEV(ADL_3),
+ IMC_UNCORE_DEV(ADL_4),
+ IMC_UNCORE_DEV(ADL_5),
+ IMC_UNCORE_DEV(ADL_6),
+ IMC_UNCORE_DEV(ADL_7),
+ IMC_UNCORE_DEV(ADL_8),
+ IMC_UNCORE_DEV(ADL_9),
+ IMC_UNCORE_DEV(ADL_10),
+ IMC_UNCORE_DEV(ADL_11),
+ IMC_UNCORE_DEV(ADL_12),
+ IMC_UNCORE_DEV(ADL_13),
+ IMC_UNCORE_DEV(ADL_14),
+ IMC_UNCORE_DEV(ADL_15),
+ IMC_UNCORE_DEV(ADL_16),
+ IMC_UNCORE_DEV(ADL_17),
+ IMC_UNCORE_DEV(ADL_18),
+ IMC_UNCORE_DEV(ADL_19),
+ IMC_UNCORE_DEV(ADL_20),
+ IMC_UNCORE_DEV(ADL_21),
+ IMC_UNCORE_DEV(RPL_1),
+ IMC_UNCORE_DEV(RPL_2),
+ IMC_UNCORE_DEV(RPL_3),
+ IMC_UNCORE_DEV(RPL_4),
+ IMC_UNCORE_DEV(RPL_5),
+ IMC_UNCORE_DEV(RPL_6),
+ IMC_UNCORE_DEV(RPL_7),
+ IMC_UNCORE_DEV(RPL_8),
+ IMC_UNCORE_DEV(RPL_9),
+ IMC_UNCORE_DEV(RPL_10),
+ IMC_UNCORE_DEV(RPL_11),
+ IMC_UNCORE_DEV(RPL_12),
+ IMC_UNCORE_DEV(RPL_13),
+ IMC_UNCORE_DEV(RPL_14),
+ IMC_UNCORE_DEV(RPL_15),
+ IMC_UNCORE_DEV(RPL_16),
+ IMC_UNCORE_DEV(RPL_17),
+ IMC_UNCORE_DEV(RPL_18),
+ IMC_UNCORE_DEV(RPL_19),
+ IMC_UNCORE_DEV(RPL_20),
+ IMC_UNCORE_DEV(RPL_21),
+ IMC_UNCORE_DEV(RPL_22),
+ IMC_UNCORE_DEV(RPL_23),
+ IMC_UNCORE_DEV(RPL_24),
+ IMC_UNCORE_DEV(RPL_25),
{ /* end: all zeroes */ }
};
@@ -1390,7 +1320,8 @@ static struct pci_dev *tgl_uncore_get_mc_dev(void)
#define TGL_UNCORE_MMIO_IMC_MEM_OFFSET 0x10000
#define TGL_UNCORE_PCI_IMC_MAP_SIZE 0xe000
-static void tgl_uncore_imc_freerunning_init_box(struct intel_uncore_box *box)
+static void __uncore_imc_init_box(struct intel_uncore_box *box,
+ unsigned int base_offset)
{
struct pci_dev *pdev = tgl_uncore_get_mc_dev();
struct intel_uncore_pmu *pmu = box->pmu;
@@ -1417,11 +1348,17 @@ static void tgl_uncore_imc_freerunning_init_box(struct intel_uncore_box *box)
addr |= ((resource_size_t)mch_bar << 32);
#endif
+ addr += base_offset;
box->io_addr = ioremap(addr, type->mmio_map_size);
if (!box->io_addr)
pr_warn("perf uncore: Failed to ioremap for %s.\n", type->name);
}
+static void tgl_uncore_imc_freerunning_init_box(struct intel_uncore_box *box)
+{
+ __uncore_imc_init_box(box, 0);
+}
+
static struct intel_uncore_ops tgl_uncore_imc_freerunning_ops = {
.init_box = tgl_uncore_imc_freerunning_init_box,
.exit_box = uncore_mmio_exit_box,
@@ -1469,3 +1406,136 @@ void tgl_uncore_mmio_init(void)
}
/* end of Tiger Lake MMIO uncore support */
+
+/* Alder Lake MMIO uncore support */
+#define ADL_UNCORE_IMC_BASE 0xd900
+#define ADL_UNCORE_IMC_MAP_SIZE 0x200
+#define ADL_UNCORE_IMC_CTR 0xe8
+#define ADL_UNCORE_IMC_CTRL 0xd0
+#define ADL_UNCORE_IMC_GLOBAL_CTL 0xc0
+#define ADL_UNCORE_IMC_BOX_CTL 0xc4
+#define ADL_UNCORE_IMC_FREERUNNING_BASE 0xd800
+#define ADL_UNCORE_IMC_FREERUNNING_MAP_SIZE 0x100
+
+#define ADL_UNCORE_IMC_CTL_FRZ (1 << 0)
+#define ADL_UNCORE_IMC_CTL_RST_CTRL (1 << 1)
+#define ADL_UNCORE_IMC_CTL_RST_CTRS (1 << 2)
+#define ADL_UNCORE_IMC_CTL_INT (ADL_UNCORE_IMC_CTL_RST_CTRL | \
+ ADL_UNCORE_IMC_CTL_RST_CTRS)
+
+static void adl_uncore_imc_init_box(struct intel_uncore_box *box)
+{
+ __uncore_imc_init_box(box, ADL_UNCORE_IMC_BASE);
+
+ /* The global control in MC1 can control both MCs. */
+ if (box->io_addr && (box->pmu->pmu_idx == 1))
+ writel(ADL_UNCORE_IMC_CTL_INT, box->io_addr + ADL_UNCORE_IMC_GLOBAL_CTL);
+}
+
+static void adl_uncore_mmio_disable_box(struct intel_uncore_box *box)
+{
+ if (!box->io_addr)
+ return;
+
+ writel(ADL_UNCORE_IMC_CTL_FRZ, box->io_addr + uncore_mmio_box_ctl(box));
+}
+
+static void adl_uncore_mmio_enable_box(struct intel_uncore_box *box)
+{
+ if (!box->io_addr)
+ return;
+
+ writel(0, box->io_addr + uncore_mmio_box_ctl(box));
+}
+
+static struct intel_uncore_ops adl_uncore_mmio_ops = {
+ .init_box = adl_uncore_imc_init_box,
+ .exit_box = uncore_mmio_exit_box,
+ .disable_box = adl_uncore_mmio_disable_box,
+ .enable_box = adl_uncore_mmio_enable_box,
+ .disable_event = intel_generic_uncore_mmio_disable_event,
+ .enable_event = intel_generic_uncore_mmio_enable_event,
+ .read_counter = uncore_mmio_read_counter,
+};
+
+#define ADL_UNC_CTL_CHMASK_MASK 0x00000f00
+#define ADL_UNC_IMC_EVENT_MASK (SNB_UNC_CTL_EV_SEL_MASK | \
+ ADL_UNC_CTL_CHMASK_MASK | \
+ SNB_UNC_CTL_EDGE_DET)
+
+static struct attribute *adl_uncore_imc_formats_attr[] = {
+ &format_attr_event.attr,
+ &format_attr_chmask.attr,
+ &format_attr_edge.attr,
+ NULL,
+};
+
+static const struct attribute_group adl_uncore_imc_format_group = {
+ .name = "format",
+ .attrs = adl_uncore_imc_formats_attr,
+};
+
+static struct intel_uncore_type adl_uncore_imc = {
+ .name = "imc",
+ .num_counters = 5,
+ .num_boxes = 2,
+ .perf_ctr_bits = 64,
+ .perf_ctr = ADL_UNCORE_IMC_CTR,
+ .event_ctl = ADL_UNCORE_IMC_CTRL,
+ .event_mask = ADL_UNC_IMC_EVENT_MASK,
+ .box_ctl = ADL_UNCORE_IMC_BOX_CTL,
+ .mmio_offset = 0,
+ .mmio_map_size = ADL_UNCORE_IMC_MAP_SIZE,
+ .ops = &adl_uncore_mmio_ops,
+ .format_group = &adl_uncore_imc_format_group,
+};
+
+enum perf_adl_uncore_imc_freerunning_types {
+ ADL_MMIO_UNCORE_IMC_DATA_TOTAL,
+ ADL_MMIO_UNCORE_IMC_DATA_READ,
+ ADL_MMIO_UNCORE_IMC_DATA_WRITE,
+ ADL_MMIO_UNCORE_IMC_FREERUNNING_TYPE_MAX
+};
+
+static struct freerunning_counters adl_uncore_imc_freerunning[] = {
+ [ADL_MMIO_UNCORE_IMC_DATA_TOTAL] = { 0x40, 0x0, 0x0, 1, 64 },
+ [ADL_MMIO_UNCORE_IMC_DATA_READ] = { 0x58, 0x0, 0x0, 1, 64 },
+ [ADL_MMIO_UNCORE_IMC_DATA_WRITE] = { 0xA0, 0x0, 0x0, 1, 64 },
+};
+
+static void adl_uncore_imc_freerunning_init_box(struct intel_uncore_box *box)
+{
+ __uncore_imc_init_box(box, ADL_UNCORE_IMC_FREERUNNING_BASE);
+}
+
+static struct intel_uncore_ops adl_uncore_imc_freerunning_ops = {
+ .init_box = adl_uncore_imc_freerunning_init_box,
+ .exit_box = uncore_mmio_exit_box,
+ .read_counter = uncore_mmio_read_counter,
+ .hw_config = uncore_freerunning_hw_config,
+};
+
+static struct intel_uncore_type adl_uncore_imc_free_running = {
+ .name = "imc_free_running",
+ .num_counters = 3,
+ .num_boxes = 2,
+ .num_freerunning_types = ADL_MMIO_UNCORE_IMC_FREERUNNING_TYPE_MAX,
+ .mmio_map_size = ADL_UNCORE_IMC_FREERUNNING_MAP_SIZE,
+ .freerunning = adl_uncore_imc_freerunning,
+ .ops = &adl_uncore_imc_freerunning_ops,
+ .event_descs = tgl_uncore_imc_events,
+ .format_group = &tgl_uncore_imc_format_group,
+};
+
+static struct intel_uncore_type *adl_mmio_uncores[] = {
+ &adl_uncore_imc,
+ &adl_uncore_imc_free_running,
+ NULL
+};
+
+void adl_uncore_mmio_init(void)
+{
+ uncore_mmio_uncores = adl_mmio_uncores;
+}
+
+/* end of Alder Lake MMIO uncore support */
diff --git a/arch/x86/events/intel/uncore_snbep.c b/arch/x86/events/intel/uncore_snbep.c
index 3660f698fb2a..ed869443efb2 100644
--- a/arch/x86/events/intel/uncore_snbep.c
+++ b/arch/x86/events/intel/uncore_snbep.c
@@ -5482,7 +5482,7 @@ static struct intel_uncore_type icx_uncore_imc = {
.fixed_ctr_bits = 48,
.fixed_ctr = SNR_IMC_MMIO_PMON_FIXED_CTR,
.fixed_ctl = SNR_IMC_MMIO_PMON_FIXED_CTL,
- .event_descs = hswep_uncore_imc_events,
+ .event_descs = snr_uncore_imc_events,
.perf_ctr = SNR_IMC_MMIO_PMON_CTR0,
.event_ctl = SNR_IMC_MMIO_PMON_CTL0,
.event_mask = SNBEP_PMON_RAW_EVENT_MASK,
diff --git a/arch/x86/events/msr.c b/arch/x86/events/msr.c
index 96c775abe31f..ecced3a52668 100644
--- a/arch/x86/events/msr.c
+++ b/arch/x86/events/msr.c
@@ -103,6 +103,10 @@ static bool test_intel(int idx, void *data)
case INTEL_FAM6_ROCKETLAKE:
case INTEL_FAM6_ALDERLAKE:
case INTEL_FAM6_ALDERLAKE_L:
+ case INTEL_FAM6_ALDERLAKE_N:
+ case INTEL_FAM6_RAPTORLAKE:
+ case INTEL_FAM6_RAPTORLAKE_P:
+ case INTEL_FAM6_RAPTORLAKE_S:
if (idx == PERF_MSR_SMI || idx == PERF_MSR_PPERF)
return true;
break;
diff --git a/arch/x86/events/perf_event.h b/arch/x86/events/perf_event.h
index 9d376e528dfc..332d2e6d8ae4 100644
--- a/arch/x86/events/perf_event.h
+++ b/arch/x86/events/perf_event.h
@@ -64,25 +64,25 @@ static inline bool constraint_match(struct event_constraint *c, u64 ecode)
return ((ecode & c->cmask) - c->code) <= (u64)c->size;
}
+#define PERF_ARCH(name, val) \
+ PERF_X86_EVENT_##name = val,
+
/*
* struct hw_perf_event.flags flags
*/
-#define PERF_X86_EVENT_PEBS_LDLAT 0x0001 /* ld+ldlat data address sampling */
-#define PERF_X86_EVENT_PEBS_ST 0x0002 /* st data address sampling */
-#define PERF_X86_EVENT_PEBS_ST_HSW 0x0004 /* haswell style datala, store */
-#define PERF_X86_EVENT_PEBS_LD_HSW 0x0008 /* haswell style datala, load */
-#define PERF_X86_EVENT_PEBS_NA_HSW 0x0010 /* haswell style datala, unknown */
-#define PERF_X86_EVENT_EXCL 0x0020 /* HT exclusivity on counter */
-#define PERF_X86_EVENT_DYNAMIC 0x0040 /* dynamic alloc'd constraint */
-
-#define PERF_X86_EVENT_EXCL_ACCT 0x0100 /* accounted EXCL event */
-#define PERF_X86_EVENT_AUTO_RELOAD 0x0200 /* use PEBS auto-reload */
-#define PERF_X86_EVENT_LARGE_PEBS 0x0400 /* use large PEBS */
-#define PERF_X86_EVENT_PEBS_VIA_PT 0x0800 /* use PT buffer for PEBS */
-#define PERF_X86_EVENT_PAIR 0x1000 /* Large Increment per Cycle */
-#define PERF_X86_EVENT_LBR_SELECT 0x2000 /* Save/Restore MSR_LBR_SELECT */
-#define PERF_X86_EVENT_TOPDOWN 0x4000 /* Count Topdown slots/metrics events */
-#define PERF_X86_EVENT_PEBS_STLAT 0x8000 /* st+stlat data address sampling */
+enum {
+#include "perf_event_flags.h"
+};
+
+#undef PERF_ARCH
+
+#define PERF_ARCH(name, val) \
+ static_assert((PERF_X86_EVENT_##name & PERF_EVENT_FLAG_ARCH) == \
+ PERF_X86_EVENT_##name);
+
+#include "perf_event_flags.h"
+
+#undef PERF_ARCH
static inline bool is_topdown_count(struct perf_event *event)
{
@@ -135,7 +135,8 @@ struct amd_nb {
PERF_SAMPLE_DATA_SRC | PERF_SAMPLE_IDENTIFIER | \
PERF_SAMPLE_TRANSACTION | PERF_SAMPLE_PHYS_ADDR | \
PERF_SAMPLE_REGS_INTR | PERF_SAMPLE_REGS_USER | \
- PERF_SAMPLE_PERIOD | PERF_SAMPLE_CODE_PAGE_SIZE)
+ PERF_SAMPLE_PERIOD | PERF_SAMPLE_CODE_PAGE_SIZE | \
+ PERF_SAMPLE_WEIGHT_TYPE)
#define PEBS_GP_REGS \
((1ULL << PERF_REG_X86_AX) | \
@@ -215,7 +216,8 @@ enum {
LBR_FORMAT_EIP_FLAGS2 = 0x04,
LBR_FORMAT_INFO = 0x05,
LBR_FORMAT_TIME = 0x06,
- LBR_FORMAT_MAX_KNOWN = LBR_FORMAT_TIME,
+ LBR_FORMAT_INFO2 = 0x07,
+ LBR_FORMAT_MAX_KNOWN = LBR_FORMAT_INFO2,
};
enum {
@@ -268,6 +270,10 @@ struct cpu_hw_events {
u64 active_pebs_data_cfg;
int pebs_record_size;
+ /* Intel Fixed counter configuration */
+ u64 fixed_ctrl_val;
+ u64 active_fixed_ctrl_val;
+
/*
* Intel LBR bits
*/
@@ -324,6 +330,8 @@ struct cpu_hw_events {
* AMD specific bits
*/
struct amd_nb *amd_nb;
+ int brs_active; /* BRS is enabled */
+
/* Inverted mask of bits to clear in the perf_ctr ctrl registers */
u64 perf_ctr_virt_mask;
int n_pair; /* Large increment events */
@@ -456,6 +464,10 @@ struct cpu_hw_events {
__EVENT_CONSTRAINT(c, n, INTEL_ARCH_EVENT_MASK|X86_ALL_EVENT_FLAGS, \
HWEIGHT(n), 0, PERF_X86_EVENT_PEBS_ST)
+#define INTEL_HYBRID_LAT_CONSTRAINT(c, n) \
+ __EVENT_CONSTRAINT(c, n, INTEL_ARCH_EVENT_MASK|X86_ALL_EVENT_FLAGS, \
+ HWEIGHT(n), 0, PERF_X86_EVENT_PEBS_LAT_HYBRID)
+
/* Event constraint, but match on all event flags too. */
#define INTEL_FLAGS_EVENT_CONSTRAINT(c, n) \
EVENT_CONSTRAINT(c, n, ARCH_PERFMON_EVENTSEL_EVENT|X86_ALL_EVENT_FLAGS)
@@ -634,6 +646,8 @@ enum {
x86_lbr_exclusive_max,
};
+#define PERF_PEBS_DATA_SOURCE_MAX 0x10
+
struct x86_hybrid_pmu {
struct pmu pmu;
const char *name;
@@ -661,6 +675,8 @@ struct x86_hybrid_pmu {
unsigned int late_ack :1,
mid_ack :1,
enabled_ack :1;
+
+ u64 pebs_data_source[PERF_PEBS_DATA_SOURCE_MAX];
};
static __always_inline struct x86_hybrid_pmu *hybrid_pmu(struct pmu *pmu)
@@ -731,6 +747,8 @@ struct x86_pmu {
void (*add)(struct perf_event *);
void (*del)(struct perf_event *);
void (*read)(struct perf_event *event);
+ int (*set_period)(struct perf_event *event);
+ u64 (*update)(struct perf_event *event);
int (*hw_config)(struct perf_event *event);
int (*schedule_events)(struct cpu_hw_events *cpuc, int n, int *assign);
unsigned eventsel;
@@ -766,8 +784,7 @@ struct x86_pmu {
struct event_constraint *event_constraints;
struct x86_pmu_quirk *quirks;
- int perfctr_second_write;
- u64 (*limit_period)(struct perf_event *event, u64 l);
+ void (*limit_period)(struct perf_event *event, s64 *l);
/* PMI handler bits */
unsigned int late_ack :1,
@@ -814,15 +831,18 @@ struct x86_pmu {
pebs_prec_dist :1,
pebs_no_tlb :1,
pebs_no_isolation :1,
- pebs_block :1;
+ pebs_block :1,
+ pebs_ept :1;
int pebs_record_size;
int pebs_buffer_size;
int max_pebs_events;
void (*drain_pebs)(struct pt_regs *regs, struct perf_sample_data *data);
struct event_constraint *pebs_constraints;
void (*pebs_aliases)(struct perf_event *event);
+ u64 (*pebs_latency_data)(struct perf_event *event, u64 status);
unsigned long large_pebs_flags;
u64 rtm_abort_event;
+ u64 pebs_capable;
/*
* Intel LBR
@@ -840,6 +860,11 @@ struct x86_pmu {
bool lbr_double_abort; /* duplicated lbr aborts */
bool lbr_pt_coexist; /* (LBR|BTS) may coexist with PT */
+ unsigned int lbr_has_info:1;
+ unsigned int lbr_has_tsx:1;
+ unsigned int lbr_from_flags:1;
+ unsigned int lbr_to_cycles:1;
+
/*
* Intel Architectural LBR CPUID Enumeration
*/
@@ -867,8 +892,6 @@ struct x86_pmu {
* Intel perf metrics
*/
int num_topdown_events;
- u64 (*update_topdown_event)(struct perf_event *event);
- int (*set_topdown_event_period)(struct perf_event *event);
/*
* perf task context (i.e. struct perf_event_context::task_ctx_data)
@@ -893,7 +916,7 @@ struct x86_pmu {
/*
* Intel host/guest support (KVM)
*/
- struct perf_guest_switch_msr *(*guest_get_msrs)(int *nr);
+ struct perf_guest_switch_msr *(*guest_get_msrs)(int *nr, void *data);
/*
* Check period value for PERF_EVENT_IOC_PERIOD ioctl.
@@ -1022,6 +1045,9 @@ static struct perf_pmu_format_hybrid_attr format_attr_hybrid_##_name = {\
struct pmu *x86_get_pmu(unsigned int cpu);
extern struct x86_pmu x86_pmu __read_mostly;
+DECLARE_STATIC_CALL(x86_pmu_set_period, *x86_pmu.set_period);
+DECLARE_STATIC_CALL(x86_pmu_update, *x86_pmu.update);
+
static __always_inline struct x86_perf_task_context_opt *task_context_opt(void *ctx)
{
if (static_cpu_has(X86_FEATURE_ARCH_LBR))
@@ -1037,6 +1063,7 @@ static inline bool x86_pmu_has_lbr_callstack(void)
}
DECLARE_PER_CPU(struct cpu_hw_events, cpu_hw_events);
+DECLARE_PER_CPU(u64 [X86_PMC_IDX_MAX], pmc_prev_left);
int x86_perf_event_set_period(struct perf_event *event);
@@ -1099,6 +1126,11 @@ int x86_pmu_hw_config(struct perf_event *event);
void x86_pmu_disable_all(void);
+static inline bool has_amd_brs(struct hw_perf_event *hwc)
+{
+ return hwc->flags & PERF_X86_EVENT_AMD_BRS;
+}
+
static inline bool is_counter_pair(struct hw_perf_event *hwc)
{
return hwc->flags & PERF_X86_EVENT_PAIR;
@@ -1183,6 +1215,70 @@ static inline void set_linear_ip(struct pt_regs *regs, unsigned long ip)
regs->ip = ip;
}
+/*
+ * x86control flow change classification
+ * x86control flow changes include branches, interrupts, traps, faults
+ */
+enum {
+ X86_BR_NONE = 0, /* unknown */
+
+ X86_BR_USER = 1 << 0, /* branch target is user */
+ X86_BR_KERNEL = 1 << 1, /* branch target is kernel */
+
+ X86_BR_CALL = 1 << 2, /* call */
+ X86_BR_RET = 1 << 3, /* return */
+ X86_BR_SYSCALL = 1 << 4, /* syscall */
+ X86_BR_SYSRET = 1 << 5, /* syscall return */
+ X86_BR_INT = 1 << 6, /* sw interrupt */
+ X86_BR_IRET = 1 << 7, /* return from interrupt */
+ X86_BR_JCC = 1 << 8, /* conditional */
+ X86_BR_JMP = 1 << 9, /* jump */
+ X86_BR_IRQ = 1 << 10,/* hw interrupt or trap or fault */
+ X86_BR_IND_CALL = 1 << 11,/* indirect calls */
+ X86_BR_ABORT = 1 << 12,/* transaction abort */
+ X86_BR_IN_TX = 1 << 13,/* in transaction */
+ X86_BR_NO_TX = 1 << 14,/* not in transaction */
+ X86_BR_ZERO_CALL = 1 << 15,/* zero length call */
+ X86_BR_CALL_STACK = 1 << 16,/* call stack */
+ X86_BR_IND_JMP = 1 << 17,/* indirect jump */
+
+ X86_BR_TYPE_SAVE = 1 << 18,/* indicate to save branch type */
+
+};
+
+#define X86_BR_PLM (X86_BR_USER | X86_BR_KERNEL)
+#define X86_BR_ANYTX (X86_BR_NO_TX | X86_BR_IN_TX)
+
+#define X86_BR_ANY \
+ (X86_BR_CALL |\
+ X86_BR_RET |\
+ X86_BR_SYSCALL |\
+ X86_BR_SYSRET |\
+ X86_BR_INT |\
+ X86_BR_IRET |\
+ X86_BR_JCC |\
+ X86_BR_JMP |\
+ X86_BR_IRQ |\
+ X86_BR_ABORT |\
+ X86_BR_IND_CALL |\
+ X86_BR_IND_JMP |\
+ X86_BR_ZERO_CALL)
+
+#define X86_BR_ALL (X86_BR_PLM | X86_BR_ANY)
+
+#define X86_BR_ANY_CALL \
+ (X86_BR_CALL |\
+ X86_BR_IND_CALL |\
+ X86_BR_ZERO_CALL |\
+ X86_BR_SYSCALL |\
+ X86_BR_IRQ |\
+ X86_BR_INT)
+
+int common_branch_type(int type);
+int branch_type(unsigned long from, unsigned long to, int abort);
+int branch_type_fused(unsigned long from, unsigned long to, int abort,
+ int *offset);
+
ssize_t x86_event_sysfs_show(char *page, u64 config, u64 event);
ssize_t intel_event_sysfs_show(char *page, u64 config);
@@ -1205,6 +1301,88 @@ static inline bool fixed_counter_disabled(int i, struct pmu *pmu)
int amd_pmu_init(void);
+int amd_pmu_lbr_init(void);
+void amd_pmu_lbr_reset(void);
+void amd_pmu_lbr_read(void);
+void amd_pmu_lbr_add(struct perf_event *event);
+void amd_pmu_lbr_del(struct perf_event *event);
+void amd_pmu_lbr_sched_task(struct perf_event_context *ctx, bool sched_in);
+void amd_pmu_lbr_enable_all(void);
+void amd_pmu_lbr_disable_all(void);
+int amd_pmu_lbr_hw_config(struct perf_event *event);
+
+#ifdef CONFIG_PERF_EVENTS_AMD_BRS
+
+#define AMD_FAM19H_BRS_EVENT 0xc4 /* RETIRED_TAKEN_BRANCH_INSTRUCTIONS */
+
+int amd_brs_init(void);
+void amd_brs_disable(void);
+void amd_brs_enable(void);
+void amd_brs_enable_all(void);
+void amd_brs_disable_all(void);
+void amd_brs_drain(void);
+void amd_brs_lopwr_init(void);
+void amd_brs_disable_all(void);
+int amd_brs_hw_config(struct perf_event *event);
+void amd_brs_reset(void);
+
+static inline void amd_pmu_brs_add(struct perf_event *event)
+{
+ struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
+
+ perf_sched_cb_inc(event->ctx->pmu);
+ cpuc->lbr_users++;
+ /*
+ * No need to reset BRS because it is reset
+ * on brs_enable() and it is saturating
+ */
+}
+
+static inline void amd_pmu_brs_del(struct perf_event *event)
+{
+ struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
+
+ cpuc->lbr_users--;
+ WARN_ON_ONCE(cpuc->lbr_users < 0);
+
+ perf_sched_cb_dec(event->ctx->pmu);
+}
+
+void amd_pmu_brs_sched_task(struct perf_event_context *ctx, bool sched_in);
+#else
+static inline int amd_brs_init(void)
+{
+ return 0;
+}
+static inline void amd_brs_disable(void) {}
+static inline void amd_brs_enable(void) {}
+static inline void amd_brs_drain(void) {}
+static inline void amd_brs_lopwr_init(void) {}
+static inline void amd_brs_disable_all(void) {}
+static inline int amd_brs_hw_config(struct perf_event *event)
+{
+ return 0;
+}
+static inline void amd_brs_reset(void) {}
+
+static inline void amd_pmu_brs_add(struct perf_event *event)
+{
+}
+
+static inline void amd_pmu_brs_del(struct perf_event *event)
+{
+}
+
+static inline void amd_pmu_brs_sched_task(struct perf_event_context *ctx, bool sched_in)
+{
+}
+
+static inline void amd_brs_enable_all(void)
+{
+}
+
+#endif
+
#else /* CONFIG_CPU_SUP_AMD */
static inline int amd_pmu_init(void)
@@ -1212,6 +1390,22 @@ static inline int amd_pmu_init(void)
return 0;
}
+static inline int amd_brs_init(void)
+{
+ return -EOPNOTSUPP;
+}
+
+static inline void amd_brs_drain(void)
+{
+}
+
+static inline void amd_brs_enable_all(void)
+{
+}
+
+static inline void amd_brs_disable_all(void)
+{
+}
#endif /* CONFIG_CPU_SUP_AMD */
static inline int is_pebs_pt(struct perf_event *event)
@@ -1293,6 +1487,8 @@ void intel_pmu_disable_bts(void);
int intel_pmu_drain_bts_buffer(void);
+u64 adl_latency_data_small(struct perf_event *event, u64 status);
+
extern struct event_constraint intel_core2_pebs_event_constraints[];
extern struct event_constraint intel_atom_pebs_event_constraints[];
@@ -1392,12 +1588,18 @@ void intel_pmu_lbr_init_skl(void);
void intel_pmu_lbr_init_knl(void);
+void intel_pmu_lbr_init(void);
+
void intel_pmu_arch_lbr_init(void);
void intel_pmu_pebs_data_source_nhm(void);
void intel_pmu_pebs_data_source_skl(bool pmem);
+void intel_pmu_pebs_data_source_adl(void);
+
+void intel_pmu_pebs_data_source_grt(void);
+
int intel_pmu_setup_lbr_filter(struct perf_event *event);
void intel_pt_interrupt(void);
diff --git a/arch/x86/events/perf_event_flags.h b/arch/x86/events/perf_event_flags.h
new file mode 100644
index 000000000000..1dc19b9b4426
--- /dev/null
+++ b/arch/x86/events/perf_event_flags.h
@@ -0,0 +1,22 @@
+
+/*
+ * struct hw_perf_event.flags flags
+ */
+PERF_ARCH(PEBS_LDLAT, 0x00001) /* ld+ldlat data address sampling */
+PERF_ARCH(PEBS_ST, 0x00002) /* st data address sampling */
+PERF_ARCH(PEBS_ST_HSW, 0x00004) /* haswell style datala, store */
+PERF_ARCH(PEBS_LD_HSW, 0x00008) /* haswell style datala, load */
+PERF_ARCH(PEBS_NA_HSW, 0x00010) /* haswell style datala, unknown */
+PERF_ARCH(EXCL, 0x00020) /* HT exclusivity on counter */
+PERF_ARCH(DYNAMIC, 0x00040) /* dynamic alloc'd constraint */
+ /* 0x00080 */
+PERF_ARCH(EXCL_ACCT, 0x00100) /* accounted EXCL event */
+PERF_ARCH(AUTO_RELOAD, 0x00200) /* use PEBS auto-reload */
+PERF_ARCH(LARGE_PEBS, 0x00400) /* use large PEBS */
+PERF_ARCH(PEBS_VIA_PT, 0x00800) /* use PT buffer for PEBS */
+PERF_ARCH(PAIR, 0x01000) /* Large Increment per Cycle */
+PERF_ARCH(LBR_SELECT, 0x02000) /* Save/Restore MSR_LBR_SELECT */
+PERF_ARCH(TOPDOWN, 0x04000) /* Count Topdown slots/metrics events */
+PERF_ARCH(PEBS_STLAT, 0x08000) /* st+stlat data address sampling */
+PERF_ARCH(AMD_BRS, 0x10000) /* AMD Branch Sampling */
+PERF_ARCH(PEBS_LAT_HYBRID, 0x20000) /* ld and st lat for hybrid */
diff --git a/arch/x86/events/rapl.c b/arch/x86/events/rapl.c
index 85feafacc445..a829492bca4c 100644
--- a/arch/x86/events/rapl.c
+++ b/arch/x86/events/rapl.c
@@ -536,11 +536,14 @@ static struct perf_msr intel_rapl_spr_msrs[] = {
* - perf_msr_probe(PERF_RAPL_MAX)
* - want to use same event codes across both architectures
*/
-static struct perf_msr amd_rapl_msrs[PERF_RAPL_MAX] = {
- [PERF_RAPL_PKG] = { MSR_AMD_PKG_ENERGY_STATUS, &rapl_events_pkg_group, test_msr },
+static struct perf_msr amd_rapl_msrs[] = {
+ [PERF_RAPL_PP0] = { 0, &rapl_events_cores_group, 0, false, 0 },
+ [PERF_RAPL_PKG] = { MSR_AMD_PKG_ENERGY_STATUS, &rapl_events_pkg_group, test_msr, false, RAPL_MSR_MASK },
+ [PERF_RAPL_RAM] = { 0, &rapl_events_ram_group, 0, false, 0 },
+ [PERF_RAPL_PP1] = { 0, &rapl_events_gpu_group, 0, false, 0 },
+ [PERF_RAPL_PSYS] = { 0, &rapl_events_psys_group, 0, false, 0 },
};
-
static int rapl_cpu_offline(unsigned int cpu)
{
struct rapl_pmu *pmu = cpu_to_rapl_pmu(cpu);
@@ -616,12 +619,8 @@ static int rapl_check_hw_unit(struct rapl_model *rm)
case RAPL_UNIT_QUIRK_INTEL_HSW:
rapl_hw_unit[PERF_RAPL_RAM] = 16;
break;
- /*
- * SPR shares the same DRAM domain energy unit as HSW, plus it
- * also has a fixed energy unit for Psys domain.
- */
+ /* SPR uses a fixed energy unit for Psys domain. */
case RAPL_UNIT_QUIRK_INTEL_SPR:
- rapl_hw_unit[PERF_RAPL_RAM] = 16;
rapl_hw_unit[PERF_RAPL_PSYS] = 0;
break;
default:
@@ -803,7 +802,11 @@ static const struct x86_cpu_id rapl_model_match[] __initconst = {
X86_MATCH_INTEL_FAM6_MODEL(COMETLAKE, &model_skl),
X86_MATCH_INTEL_FAM6_MODEL(ALDERLAKE, &model_skl),
X86_MATCH_INTEL_FAM6_MODEL(ALDERLAKE_L, &model_skl),
+ X86_MATCH_INTEL_FAM6_MODEL(ALDERLAKE_N, &model_skl),
X86_MATCH_INTEL_FAM6_MODEL(SAPPHIRERAPIDS_X, &model_spr),
+ X86_MATCH_INTEL_FAM6_MODEL(RAPTORLAKE, &model_skl),
+ X86_MATCH_INTEL_FAM6_MODEL(RAPTORLAKE_P, &model_skl),
+ X86_MATCH_INTEL_FAM6_MODEL(RAPTORLAKE_S, &model_skl),
{},
};
MODULE_DEVICE_TABLE(x86cpu, rapl_model_match);
diff --git a/arch/x86/events/utils.c b/arch/x86/events/utils.c
new file mode 100644
index 000000000000..76b1f8bb0fd5
--- /dev/null
+++ b/arch/x86/events/utils.c
@@ -0,0 +1,251 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <asm/insn.h>
+
+#include "perf_event.h"
+
+static int decode_branch_type(struct insn *insn)
+{
+ int ext;
+
+ if (insn_get_opcode(insn))
+ return X86_BR_ABORT;
+
+ switch (insn->opcode.bytes[0]) {
+ case 0xf:
+ switch (insn->opcode.bytes[1]) {
+ case 0x05: /* syscall */
+ case 0x34: /* sysenter */
+ return X86_BR_SYSCALL;
+ case 0x07: /* sysret */
+ case 0x35: /* sysexit */
+ return X86_BR_SYSRET;
+ case 0x80 ... 0x8f: /* conditional */
+ return X86_BR_JCC;
+ }
+ return X86_BR_NONE;
+ case 0x70 ... 0x7f: /* conditional */
+ return X86_BR_JCC;
+ case 0xc2: /* near ret */
+ case 0xc3: /* near ret */
+ case 0xca: /* far ret */
+ case 0xcb: /* far ret */
+ return X86_BR_RET;
+ case 0xcf: /* iret */
+ return X86_BR_IRET;
+ case 0xcc ... 0xce: /* int */
+ return X86_BR_INT;
+ case 0xe8: /* call near rel */
+ if (insn_get_immediate(insn) || insn->immediate1.value == 0) {
+ /* zero length call */
+ return X86_BR_ZERO_CALL;
+ }
+ fallthrough;
+ case 0x9a: /* call far absolute */
+ return X86_BR_CALL;
+ case 0xe0 ... 0xe3: /* loop jmp */
+ return X86_BR_JCC;
+ case 0xe9 ... 0xeb: /* jmp */
+ return X86_BR_JMP;
+ case 0xff: /* call near absolute, call far absolute ind */
+ if (insn_get_modrm(insn))
+ return X86_BR_ABORT;
+
+ ext = (insn->modrm.bytes[0] >> 3) & 0x7;
+ switch (ext) {
+ case 2: /* near ind call */
+ case 3: /* far ind call */
+ return X86_BR_IND_CALL;
+ case 4:
+ case 5:
+ return X86_BR_IND_JMP;
+ }
+ return X86_BR_NONE;
+ }
+
+ return X86_BR_NONE;
+}
+
+/*
+ * return the type of control flow change at address "from"
+ * instruction is not necessarily a branch (in case of interrupt).
+ *
+ * The branch type returned also includes the priv level of the
+ * target of the control flow change (X86_BR_USER, X86_BR_KERNEL).
+ *
+ * If a branch type is unknown OR the instruction cannot be
+ * decoded (e.g., text page not present), then X86_BR_NONE is
+ * returned.
+ *
+ * While recording branches, some processors can report the "from"
+ * address to be that of an instruction preceding the actual branch
+ * when instruction fusion occurs. If fusion is expected, attempt to
+ * find the type of the first branch instruction within the next
+ * MAX_INSN_SIZE bytes and if found, provide the offset between the
+ * reported "from" address and the actual branch instruction address.
+ */
+static int get_branch_type(unsigned long from, unsigned long to, int abort,
+ bool fused, int *offset)
+{
+ struct insn insn;
+ void *addr;
+ int bytes_read, bytes_left, insn_offset;
+ int ret = X86_BR_NONE;
+ int to_plm, from_plm;
+ u8 buf[MAX_INSN_SIZE];
+ int is64 = 0;
+
+ /* make sure we initialize offset */
+ if (offset)
+ *offset = 0;
+
+ to_plm = kernel_ip(to) ? X86_BR_KERNEL : X86_BR_USER;
+ from_plm = kernel_ip(from) ? X86_BR_KERNEL : X86_BR_USER;
+
+ /*
+ * maybe zero if lbr did not fill up after a reset by the time
+ * we get a PMU interrupt
+ */
+ if (from == 0 || to == 0)
+ return X86_BR_NONE;
+
+ if (abort)
+ return X86_BR_ABORT | to_plm;
+
+ if (from_plm == X86_BR_USER) {
+ /*
+ * can happen if measuring at the user level only
+ * and we interrupt in a kernel thread, e.g., idle.
+ */
+ if (!current->mm)
+ return X86_BR_NONE;
+
+ /* may fail if text not present */
+ bytes_left = copy_from_user_nmi(buf, (void __user *)from,
+ MAX_INSN_SIZE);
+ bytes_read = MAX_INSN_SIZE - bytes_left;
+ if (!bytes_read)
+ return X86_BR_NONE;
+
+ addr = buf;
+ } else {
+ /*
+ * The LBR logs any address in the IP, even if the IP just
+ * faulted. This means userspace can control the from address.
+ * Ensure we don't blindly read any address by validating it is
+ * a known text address.
+ */
+ if (kernel_text_address(from)) {
+ addr = (void *)from;
+ /*
+ * Assume we can get the maximum possible size
+ * when grabbing kernel data. This is not
+ * _strictly_ true since we could possibly be
+ * executing up next to a memory hole, but
+ * it is very unlikely to be a problem.
+ */
+ bytes_read = MAX_INSN_SIZE;
+ } else {
+ return X86_BR_NONE;
+ }
+ }
+
+ /*
+ * decoder needs to know the ABI especially
+ * on 64-bit systems running 32-bit apps
+ */
+#ifdef CONFIG_X86_64
+ is64 = kernel_ip((unsigned long)addr) || any_64bit_mode(current_pt_regs());
+#endif
+ insn_init(&insn, addr, bytes_read, is64);
+ ret = decode_branch_type(&insn);
+ insn_offset = 0;
+
+ /* Check for the possibility of branch fusion */
+ while (fused && ret == X86_BR_NONE) {
+ /* Check for decoding errors */
+ if (insn_get_length(&insn) || !insn.length)
+ break;
+
+ insn_offset += insn.length;
+ bytes_read -= insn.length;
+ if (bytes_read < 0)
+ break;
+
+ insn_init(&insn, addr + insn_offset, bytes_read, is64);
+ ret = decode_branch_type(&insn);
+ }
+
+ if (offset)
+ *offset = insn_offset;
+
+ /*
+ * interrupts, traps, faults (and thus ring transition) may
+ * occur on any instructions. Thus, to classify them correctly,
+ * we need to first look at the from and to priv levels. If they
+ * are different and to is in the kernel, then it indicates
+ * a ring transition. If the from instruction is not a ring
+ * transition instr (syscall, systenter, int), then it means
+ * it was a irq, trap or fault.
+ *
+ * we have no way of detecting kernel to kernel faults.
+ */
+ if (from_plm == X86_BR_USER && to_plm == X86_BR_KERNEL
+ && ret != X86_BR_SYSCALL && ret != X86_BR_INT)
+ ret = X86_BR_IRQ;
+
+ /*
+ * branch priv level determined by target as
+ * is done by HW when LBR_SELECT is implemented
+ */
+ if (ret != X86_BR_NONE)
+ ret |= to_plm;
+
+ return ret;
+}
+
+int branch_type(unsigned long from, unsigned long to, int abort)
+{
+ return get_branch_type(from, to, abort, false, NULL);
+}
+
+int branch_type_fused(unsigned long from, unsigned long to, int abort,
+ int *offset)
+{
+ return get_branch_type(from, to, abort, true, offset);
+}
+
+#define X86_BR_TYPE_MAP_MAX 16
+
+static int branch_map[X86_BR_TYPE_MAP_MAX] = {
+ PERF_BR_CALL, /* X86_BR_CALL */
+ PERF_BR_RET, /* X86_BR_RET */
+ PERF_BR_SYSCALL, /* X86_BR_SYSCALL */
+ PERF_BR_SYSRET, /* X86_BR_SYSRET */
+ PERF_BR_UNKNOWN, /* X86_BR_INT */
+ PERF_BR_ERET, /* X86_BR_IRET */
+ PERF_BR_COND, /* X86_BR_JCC */
+ PERF_BR_UNCOND, /* X86_BR_JMP */
+ PERF_BR_IRQ, /* X86_BR_IRQ */
+ PERF_BR_IND_CALL, /* X86_BR_IND_CALL */
+ PERF_BR_UNKNOWN, /* X86_BR_ABORT */
+ PERF_BR_UNKNOWN, /* X86_BR_IN_TX */
+ PERF_BR_NO_TX, /* X86_BR_NO_TX */
+ PERF_BR_CALL, /* X86_BR_ZERO_CALL */
+ PERF_BR_UNKNOWN, /* X86_BR_CALL_STACK */
+ PERF_BR_IND, /* X86_BR_IND_JMP */
+};
+
+int common_branch_type(int type)
+{
+ int i;
+
+ type >>= 2; /* skip X86_BR_USER and X86_BR_KERNEL */
+
+ if (type) {
+ i = __ffs(type);
+ if (i < X86_BR_TYPE_MAP_MAX)
+ return branch_map[i];
+ }
+
+ return PERF_BR_UNKNOWN;
+}