aboutsummaryrefslogtreecommitdiffstatshomepage
path: root/arch
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2025-05-26 15:40:23 -0700
committerLinus Torvalds <torvalds@linux-foundation.org>2025-05-26 15:40:23 -0700
commitddddf9d64f7361323da663637adb4a02466bfc99 (patch)
tree5fa3fc48db5d0ed98d0386754cf435fc6cd67cbb /arch
parentMerge tag 'sched-core-2025-05-25' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip (diff)
parentperf/headers: Clean up <linux/perf_event.h> a bit (diff)
downloadwireguard-linux-ddddf9d64f7361323da663637adb4a02466bfc99.tar.xz
wireguard-linux-ddddf9d64f7361323da663637adb4a02466bfc99.zip
Merge tag 'perf-core-2025-05-25' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip
Pull perf events updates from Ingo Molnar: "Core & generic-arch updates: - Add support for dynamic constraints and propagate it to the Intel driver (Kan Liang) - Fix & enhance driver-specific throttling support (Kan Liang) - Record sample last_period before updating on the x86 and PowerPC platforms (Mark Barnett) - Make perf_pmu_unregister() usable (Peter Zijlstra) - Unify perf_event_free_task() / perf_event_exit_task_context() (Peter Zijlstra) - Simplify perf_event_release_kernel() and perf_event_free_task() (Peter Zijlstra) - Allocate non-contiguous AUX pages by default (Yabin Cui) Uprobes updates: - Add support to emulate NOP instructions (Jiri Olsa) - selftests/bpf: Add 5-byte NOP uprobe trigger benchmark (Jiri Olsa) x86 Intel PMU enhancements: - Support Intel Auto Counter Reload [ACR] (Kan Liang) - Add PMU support for Clearwater Forest (Dapeng Mi) - Arch-PEBS preparatory changes: (Dapeng Mi) - Parse CPUID archPerfmonExt leaves for non-hybrid CPUs - Decouple BTS initialization from PEBS initialization - Introduce pairs of PEBS static calls x86 AMD PMU enhancements: - Use hrtimer for handling overflows in the AMD uncore driver (Sandipan Das) - Prevent UMC counters from saturating (Sandipan Das) Fixes and cleanups: - Fix put_ctx() ordering (Frederic Weisbecker) - Fix irq work dereferencing garbage (Frederic Weisbecker) - Misc fixes and cleanups (Changbin Du, Frederic Weisbecker, Ian Rogers, Ingo Molnar, Kan Liang, Peter Zijlstra, Qing Wang, Sandipan Das, Thorsten Blum)" * tag 'perf-core-2025-05-25' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip: (60 commits) perf/headers: Clean up <linux/perf_event.h> a bit perf/uapi: Clean up <uapi/linux/perf_event.h> a bit perf/uapi: Fix PERF_RECORD_SAMPLE comments in <uapi/linux/perf_event.h> mips/perf: Remove driver-specific throttle support xtensa/perf: Remove driver-specific throttle support sparc/perf: Remove driver-specific throttle support loongarch/perf: Remove driver-specific throttle support csky/perf: Remove driver-specific throttle support arc/perf: Remove driver-specific throttle support alpha/perf: Remove driver-specific throttle support perf/apple_m1: Remove driver-specific throttle support perf/arm: Remove driver-specific throttle support s390/perf: Remove driver-specific throttle support powerpc/perf: Remove driver-specific throttle support perf/x86/zhaoxin: Remove driver-specific throttle support perf/x86/amd: Remove driver-specific throttle support perf/x86/intel: Remove driver-specific throttle support perf: Only dump the throttle log for the leader perf: Fix the throttle logic for a group perf/core: Add the is_event_in_freq_mode() helper to simplify the code ...
Diffstat (limited to 'arch')
-rw-r--r--arch/alpha/kernel/perf_event.c11
-rw-r--r--arch/arc/kernel/perf_event.c6
-rw-r--r--arch/csky/kernel/perf_event.c3
-rw-r--r--arch/loongarch/kernel/perf_event.c3
-rw-r--r--arch/mips/kernel/perf_event_mipsxx.c3
-rw-r--r--arch/powerpc/perf/core-book3s.c9
-rw-r--r--arch/powerpc/perf/core-fsl-emb.c6
-rw-r--r--arch/s390/kernel/perf_cpum_cf.c2
-rw-r--r--arch/s390/kernel/perf_cpum_sf.c5
-rw-r--r--arch/sparc/kernel/perf_event.c3
-rw-r--r--arch/x86/events/amd/core.c3
-rw-r--r--arch/x86/events/amd/ibs.c4
-rw-r--r--arch/x86/events/amd/uncore.c103
-rw-r--r--arch/x86/events/core.c37
-rw-r--r--arch/x86/events/intel/bts.c150
-rw-r--r--arch/x86/events/intel/core.c345
-rw-r--r--arch/x86/events/intel/ds.c55
-rw-r--r--arch/x86/events/intel/knc.c7
-rw-r--r--arch/x86/events/intel/lbr.c2
-rw-r--r--arch/x86/events/intel/p4.c3
-rw-r--r--arch/x86/events/intel/pt.c2
-rw-r--r--arch/x86/events/intel/uncore.c12
-rw-r--r--arch/x86/events/perf_event.h45
-rw-r--r--arch/x86/events/perf_event_flags.h41
-rw-r--r--arch/x86/events/zhaoxin/core.c3
-rw-r--r--arch/x86/include/asm/msr-index.h4
-rw-r--r--arch/x86/include/asm/perf_event.h1
-rw-r--r--arch/x86/kernel/uprobes.c5
-rw-r--r--arch/xtensa/kernel/perf_event.c3
29 files changed, 656 insertions, 220 deletions
diff --git a/arch/alpha/kernel/perf_event.c b/arch/alpha/kernel/perf_event.c
index 1f0eb4f25c0f..a3eaab094ece 100644
--- a/arch/alpha/kernel/perf_event.c
+++ b/arch/alpha/kernel/perf_event.c
@@ -852,14 +852,9 @@ static void alpha_perf_event_irq_handler(unsigned long la_ptr,
alpha_perf_event_update(event, hwc, idx, alpha_pmu->pmc_max_period[idx]+1);
perf_sample_data_init(&data, 0, hwc->last_period);
- if (alpha_perf_event_set_period(event, hwc, idx)) {
- if (perf_event_overflow(event, &data, regs)) {
- /* Interrupts coming too quickly; "throttle" the
- * counter, i.e., disable it for a little while.
- */
- alpha_pmu_stop(event, 0);
- }
- }
+ if (alpha_perf_event_set_period(event, hwc, idx))
+ perf_event_overflow(event, &data, regs);
+
wrperfmon(PERFMON_CMD_ENABLE, cpuc->idx_mask);
return;
diff --git a/arch/arc/kernel/perf_event.c b/arch/arc/kernel/perf_event.c
index 6e5a651cd75c..ed6d4f0cd621 100644
--- a/arch/arc/kernel/perf_event.c
+++ b/arch/arc/kernel/perf_event.c
@@ -599,10 +599,8 @@ static irqreturn_t arc_pmu_intr(int irq, void *dev)
arc_perf_event_update(event, &event->hw, event->hw.idx);
perf_sample_data_init(&data, 0, hwc->last_period);
- if (arc_pmu_event_set_period(event)) {
- if (perf_event_overflow(event, &data, regs))
- arc_pmu_stop(event, 0);
- }
+ if (arc_pmu_event_set_period(event))
+ perf_event_overflow(event, &data, regs);
active_ints &= ~BIT(idx);
} while (active_ints);
diff --git a/arch/csky/kernel/perf_event.c b/arch/csky/kernel/perf_event.c
index e5f18420ce64..e0a36acd265b 100644
--- a/arch/csky/kernel/perf_event.c
+++ b/arch/csky/kernel/perf_event.c
@@ -1139,8 +1139,7 @@ static irqreturn_t csky_pmu_handle_irq(int irq_num, void *dev)
perf_sample_data_init(&data, 0, hwc->last_period);
csky_pmu_event_set_period(event);
- if (perf_event_overflow(event, &data, regs))
- csky_pmu_stop_event(event);
+ perf_event_overflow(event, &data, regs);
}
csky_pmu_enable(&csky_pmu.pmu);
diff --git a/arch/loongarch/kernel/perf_event.c b/arch/loongarch/kernel/perf_event.c
index f86a4b838dd7..8ad098703488 100644
--- a/arch/loongarch/kernel/perf_event.c
+++ b/arch/loongarch/kernel/perf_event.c
@@ -479,8 +479,7 @@ static void handle_associated_event(struct cpu_hw_events *cpuc, int idx,
if (!loongarch_pmu_event_set_period(event, hwc, idx))
return;
- if (perf_event_overflow(event, data, regs))
- loongarch_pmu_disable_event(idx);
+ perf_event_overflow(event, data, regs);
}
static irqreturn_t pmu_handle_irq(int irq, void *dev)
diff --git a/arch/mips/kernel/perf_event_mipsxx.c b/arch/mips/kernel/perf_event_mipsxx.c
index c4d6b09136b1..196a070349b0 100644
--- a/arch/mips/kernel/perf_event_mipsxx.c
+++ b/arch/mips/kernel/perf_event_mipsxx.c
@@ -791,8 +791,7 @@ static void handle_associated_event(struct cpu_hw_events *cpuc,
if (!mipspmu_event_set_period(event, hwc, idx))
return;
- if (perf_event_overflow(event, data, regs))
- mipsxx_pmu_disable_event(idx);
+ perf_event_overflow(event, data, regs);
}
diff --git a/arch/powerpc/perf/core-book3s.c b/arch/powerpc/perf/core-book3s.c
index b906d28f74fd..8b0081441f85 100644
--- a/arch/powerpc/perf/core-book3s.c
+++ b/arch/powerpc/perf/core-book3s.c
@@ -2239,6 +2239,7 @@ static void record_and_restart(struct perf_event *event, unsigned long val,
struct pt_regs *regs)
{
u64 period = event->hw.sample_period;
+ const u64 last_period = event->hw.last_period;
s64 prev, delta, left;
int record = 0;
@@ -2320,7 +2321,7 @@ static void record_and_restart(struct perf_event *event, unsigned long val,
if (record) {
struct perf_sample_data data;
- perf_sample_data_init(&data, ~0ULL, event->hw.last_period);
+ perf_sample_data_init(&data, ~0ULL, last_period);
if (event->attr.sample_type & PERF_SAMPLE_ADDR_TYPE)
perf_get_data_addr(event, regs, &data.addr);
@@ -2343,12 +2344,10 @@ static void record_and_restart(struct perf_event *event, unsigned long val,
ppmu->get_mem_weight(&data.weight.full, event->attr.sample_type);
data.sample_flags |= PERF_SAMPLE_WEIGHT_TYPE;
}
- if (perf_event_overflow(event, &data, regs))
- power_pmu_stop(event, 0);
+ perf_event_overflow(event, &data, regs);
} else if (period) {
/* Account for interrupt in case of invalid SIAR */
- if (perf_event_account_interrupt(event))
- power_pmu_stop(event, 0);
+ perf_event_account_interrupt(event);
}
}
diff --git a/arch/powerpc/perf/core-fsl-emb.c b/arch/powerpc/perf/core-fsl-emb.c
index 1a53ab08447c..7120ab20cbfe 100644
--- a/arch/powerpc/perf/core-fsl-emb.c
+++ b/arch/powerpc/perf/core-fsl-emb.c
@@ -590,6 +590,7 @@ static void record_and_restart(struct perf_event *event, unsigned long val,
struct pt_regs *regs)
{
u64 period = event->hw.sample_period;
+ const u64 last_period = event->hw.last_period;
s64 prev, delta, left;
int record = 0;
@@ -632,10 +633,9 @@ static void record_and_restart(struct perf_event *event, unsigned long val,
if (record) {
struct perf_sample_data data;
- perf_sample_data_init(&data, 0, event->hw.last_period);
+ perf_sample_data_init(&data, 0, last_period);
- if (perf_event_overflow(event, &data, regs))
- fsl_emb_pmu_stop(event, 0);
+ perf_event_overflow(event, &data, regs);
}
}
diff --git a/arch/s390/kernel/perf_cpum_cf.c b/arch/s390/kernel/perf_cpum_cf.c
index e657fad7e376..6a262e198e35 100644
--- a/arch/s390/kernel/perf_cpum_cf.c
+++ b/arch/s390/kernel/perf_cpum_cf.c
@@ -980,8 +980,6 @@ static int cfdiag_push_sample(struct perf_event *event,
}
overflow = perf_event_overflow(event, &data, &regs);
- if (overflow)
- event->pmu->stop(event, 0);
perf_event_update_userpage(event);
return overflow;
diff --git a/arch/s390/kernel/perf_cpum_sf.c b/arch/s390/kernel/perf_cpum_sf.c
index ad22799d8a7d..91469401f2c9 100644
--- a/arch/s390/kernel/perf_cpum_sf.c
+++ b/arch/s390/kernel/perf_cpum_sf.c
@@ -1072,10 +1072,7 @@ static int perf_push_sample(struct perf_event *event,
overflow = 0;
if (perf_event_exclude(event, &regs, sde_regs))
goto out;
- if (perf_event_overflow(event, &data, &regs)) {
- overflow = 1;
- event->pmu->stop(event, 0);
- }
+ overflow = perf_event_overflow(event, &data, &regs);
perf_event_update_userpage(event);
out:
return overflow;
diff --git a/arch/sparc/kernel/perf_event.c b/arch/sparc/kernel/perf_event.c
index f02a283a8e8f..cae4d33002a5 100644
--- a/arch/sparc/kernel/perf_event.c
+++ b/arch/sparc/kernel/perf_event.c
@@ -1668,8 +1668,7 @@ static int __kprobes perf_event_nmi_handler(struct notifier_block *self,
if (!sparc_perf_event_set_period(event, hwc, idx))
continue;
- if (perf_event_overflow(event, &data, regs))
- sparc_pmu_stop(event, 0);
+ perf_event_overflow(event, &data, regs);
}
finish_clock = sched_clock();
diff --git a/arch/x86/events/amd/core.c b/arch/x86/events/amd/core.c
index 30d6ceb4c8ad..5e64283b9bf2 100644
--- a/arch/x86/events/amd/core.c
+++ b/arch/x86/events/amd/core.c
@@ -1003,8 +1003,7 @@ static int amd_pmu_v2_handle_irq(struct pt_regs *regs)
perf_sample_save_brstack(&data, event, &cpuc->lbr_stack, NULL);
- if (perf_event_overflow(event, &data, regs))
- x86_pmu_stop(event, 0);
+ perf_event_overflow(event, &data, regs);
}
/*
diff --git a/arch/x86/events/amd/ibs.c b/arch/x86/events/amd/ibs.c
index 0252b7ea8bca..4bbbca02aeb1 100644
--- a/arch/x86/events/amd/ibs.c
+++ b/arch/x86/events/amd/ibs.c
@@ -1373,9 +1373,7 @@ fail:
hwc->sample_period = perf_ibs->min_period;
out:
- if (throttle) {
- perf_ibs_stop(event, 0);
- } else {
+ if (!throttle) {
if (perf_ibs == &perf_ibs_op) {
if (ibs_caps & IBS_CAPS_OPCNTEXT) {
new_config = period & IBS_OP_MAX_CNT_EXT_MASK;
diff --git a/arch/x86/events/amd/uncore.c b/arch/x86/events/amd/uncore.c
index 49c26ce2b115..d328de166481 100644
--- a/arch/x86/events/amd/uncore.c
+++ b/arch/x86/events/amd/uncore.c
@@ -21,6 +21,7 @@
#define NUM_COUNTERS_NB 4
#define NUM_COUNTERS_L2 4
#define NUM_COUNTERS_L3 6
+#define NUM_COUNTERS_MAX 64
#define RDPMC_BASE_NB 6
#define RDPMC_BASE_LLC 10
@@ -38,7 +39,10 @@ struct amd_uncore_ctx {
int refcnt;
int cpu;
struct perf_event **events;
- struct hlist_node node;
+ unsigned long active_mask[BITS_TO_LONGS(NUM_COUNTERS_MAX)];
+ int nr_active;
+ struct hrtimer hrtimer;
+ u64 hrtimer_duration;
};
struct amd_uncore_pmu {
@@ -83,11 +87,51 @@ struct amd_uncore {
static struct amd_uncore uncores[UNCORE_TYPE_MAX];
+/* Interval for hrtimer, defaults to 60000 milliseconds */
+static unsigned int update_interval = 60 * MSEC_PER_SEC;
+module_param(update_interval, uint, 0444);
+
static struct amd_uncore_pmu *event_to_amd_uncore_pmu(struct perf_event *event)
{
return container_of(event->pmu, struct amd_uncore_pmu, pmu);
}
+static enum hrtimer_restart amd_uncore_hrtimer(struct hrtimer *hrtimer)
+{
+ struct amd_uncore_ctx *ctx;
+ struct perf_event *event;
+ int bit;
+
+ ctx = container_of(hrtimer, struct amd_uncore_ctx, hrtimer);
+
+ if (!ctx->nr_active || ctx->cpu != smp_processor_id())
+ return HRTIMER_NORESTART;
+
+ for_each_set_bit(bit, ctx->active_mask, NUM_COUNTERS_MAX) {
+ event = ctx->events[bit];
+ event->pmu->read(event);
+ }
+
+ hrtimer_forward_now(hrtimer, ns_to_ktime(ctx->hrtimer_duration));
+ return HRTIMER_RESTART;
+}
+
+static void amd_uncore_start_hrtimer(struct amd_uncore_ctx *ctx)
+{
+ hrtimer_start(&ctx->hrtimer, ns_to_ktime(ctx->hrtimer_duration),
+ HRTIMER_MODE_REL_PINNED_HARD);
+}
+
+static void amd_uncore_cancel_hrtimer(struct amd_uncore_ctx *ctx)
+{
+ hrtimer_cancel(&ctx->hrtimer);
+}
+
+static void amd_uncore_init_hrtimer(struct amd_uncore_ctx *ctx)
+{
+ hrtimer_setup(&ctx->hrtimer, amd_uncore_hrtimer, CLOCK_MONOTONIC, HRTIMER_MODE_REL_HARD);
+}
+
static void amd_uncore_read(struct perf_event *event)
{
struct hw_perf_event *hwc = &event->hw;
@@ -118,18 +162,26 @@ static void amd_uncore_read(struct perf_event *event)
static void amd_uncore_start(struct perf_event *event, int flags)
{
+ struct amd_uncore_pmu *pmu = event_to_amd_uncore_pmu(event);
+ struct amd_uncore_ctx *ctx = *per_cpu_ptr(pmu->ctx, event->cpu);
struct hw_perf_event *hwc = &event->hw;
+ if (!ctx->nr_active++)
+ amd_uncore_start_hrtimer(ctx);
+
if (flags & PERF_EF_RELOAD)
wrmsrl(hwc->event_base, (u64)local64_read(&hwc->prev_count));
hwc->state = 0;
+ __set_bit(hwc->idx, ctx->active_mask);
wrmsrl(hwc->config_base, (hwc->config | ARCH_PERFMON_EVENTSEL_ENABLE));
perf_event_update_userpage(event);
}
static void amd_uncore_stop(struct perf_event *event, int flags)
{
+ struct amd_uncore_pmu *pmu = event_to_amd_uncore_pmu(event);
+ struct amd_uncore_ctx *ctx = *per_cpu_ptr(pmu->ctx, event->cpu);
struct hw_perf_event *hwc = &event->hw;
wrmsrl(hwc->config_base, hwc->config);
@@ -139,6 +191,11 @@ static void amd_uncore_stop(struct perf_event *event, int flags)
event->pmu->read(event);
hwc->state |= PERF_HES_UPTODATE;
}
+
+ if (!--ctx->nr_active)
+ amd_uncore_cancel_hrtimer(ctx);
+
+ __clear_bit(hwc->idx, ctx->active_mask);
}
static int amd_uncore_add(struct perf_event *event, int flags)
@@ -491,6 +548,9 @@ static int amd_uncore_ctx_init(struct amd_uncore *uncore, unsigned int cpu)
goto fail;
}
+ amd_uncore_init_hrtimer(curr);
+ curr->hrtimer_duration = (u64)update_interval * NSEC_PER_MSEC;
+
cpumask_set_cpu(cpu, &pmu->active_mask);
}
@@ -880,16 +940,55 @@ static int amd_uncore_umc_event_init(struct perf_event *event)
static void amd_uncore_umc_start(struct perf_event *event, int flags)
{
+ struct amd_uncore_pmu *pmu = event_to_amd_uncore_pmu(event);
+ struct amd_uncore_ctx *ctx = *per_cpu_ptr(pmu->ctx, event->cpu);
struct hw_perf_event *hwc = &event->hw;
+ if (!ctx->nr_active++)
+ amd_uncore_start_hrtimer(ctx);
+
if (flags & PERF_EF_RELOAD)
wrmsrl(hwc->event_base, (u64)local64_read(&hwc->prev_count));
hwc->state = 0;
+ __set_bit(hwc->idx, ctx->active_mask);
wrmsrl(hwc->config_base, (hwc->config | AMD64_PERFMON_V2_ENABLE_UMC));
perf_event_update_userpage(event);
}
+static void amd_uncore_umc_read(struct perf_event *event)
+{
+ struct hw_perf_event *hwc = &event->hw;
+ u64 prev, new, shift;
+ s64 delta;
+
+ shift = COUNTER_SHIFT + 1;
+ prev = local64_read(&hwc->prev_count);
+
+ /*
+ * UMC counters do not have RDPMC assignments. Read counts directly
+ * from the corresponding PERF_CTR.
+ */
+ rdmsrl(hwc->event_base, new);
+
+ /*
+ * Unlike the other uncore counters, UMC counters saturate and set the
+ * Overflow bit (bit 48) on overflow. Since they do not roll over,
+ * proactively reset the corresponding PERF_CTR when bit 47 is set so
+ * that the counter never gets a chance to saturate.
+ */
+ if (new & BIT_ULL(63 - COUNTER_SHIFT)) {
+ wrmsrl(hwc->event_base, 0);
+ local64_set(&hwc->prev_count, 0);
+ } else {
+ local64_set(&hwc->prev_count, new);
+ }
+
+ delta = (new << shift) - (prev << shift);
+ delta >>= shift;
+ local64_add(delta, &event->count);
+}
+
static
void amd_uncore_umc_ctx_scan(struct amd_uncore *uncore, unsigned int cpu)
{
@@ -968,7 +1067,7 @@ int amd_uncore_umc_ctx_init(struct amd_uncore *uncore, unsigned int cpu)
.del = amd_uncore_del,
.start = amd_uncore_umc_start,
.stop = amd_uncore_stop,
- .read = amd_uncore_read,
+ .read = amd_uncore_umc_read,
.capabilities = PERF_PMU_CAP_NO_EXCLUDE | PERF_PMU_CAP_NO_INTERRUPT,
.module = THIS_MODULE,
};
diff --git a/arch/x86/events/core.c b/arch/x86/events/core.c
index 139ad80d1df3..4c49eef8052e 100644
--- a/arch/x86/events/core.c
+++ b/arch/x86/events/core.c
@@ -95,6 +95,11 @@ DEFINE_STATIC_CALL_NULL(x86_pmu_filter, *x86_pmu.filter);
DEFINE_STATIC_CALL_NULL(x86_pmu_late_setup, *x86_pmu.late_setup);
+DEFINE_STATIC_CALL_NULL(x86_pmu_pebs_enable, *x86_pmu.pebs_enable);
+DEFINE_STATIC_CALL_NULL(x86_pmu_pebs_disable, *x86_pmu.pebs_disable);
+DEFINE_STATIC_CALL_NULL(x86_pmu_pebs_enable_all, *x86_pmu.pebs_enable_all);
+DEFINE_STATIC_CALL_NULL(x86_pmu_pebs_disable_all, *x86_pmu.pebs_disable_all);
+
/*
* This one is magic, it will get called even when PMU init fails (because
* there is no PMU), in which case it should simply return NULL.
@@ -674,6 +679,7 @@ static int __x86_pmu_event_init(struct perf_event *event)
event->hw.idx = -1;
event->hw.last_cpu = -1;
event->hw.last_tag = ~0ULL;
+ event->hw.dyn_constraint = ~0ULL;
/* mark unused */
event->hw.extra_reg.idx = EXTRA_REG_NONE;
@@ -756,15 +762,16 @@ void x86_pmu_enable_all(int added)
int is_x86_event(struct perf_event *event)
{
- int i;
-
- if (!is_hybrid())
- return event->pmu == &pmu;
-
- for (i = 0; i < x86_pmu.num_hybrid_pmus; i++) {
- if (event->pmu == &x86_pmu.hybrid_pmu[i].pmu)
- return true;
- }
+ /*
+ * For a non-hybrid platforms, the type of X86 pmu is
+ * always PERF_TYPE_RAW.
+ * For a hybrid platform, the PERF_PMU_CAP_EXTENDED_HW_TYPE
+ * is a unique capability for the X86 PMU.
+ * Use them to detect a X86 event.
+ */
+ if (event->pmu->type == PERF_TYPE_RAW ||
+ event->pmu->capabilities & PERF_PMU_CAP_EXTENDED_HW_TYPE)
+ return true;
return false;
}
@@ -1683,6 +1690,7 @@ int x86_pmu_handle_irq(struct pt_regs *regs)
struct cpu_hw_events *cpuc;
struct perf_event *event;
int idx, handled = 0;
+ u64 last_period;
u64 val;
cpuc = this_cpu_ptr(&cpu_hw_events);
@@ -1702,6 +1710,7 @@ int x86_pmu_handle_irq(struct pt_regs *regs)
continue;
event = cpuc->events[idx];
+ last_period = event->hw.last_period;
val = static_call(x86_pmu_update)(event);
if (val & (1ULL << (x86_pmu.cntval_bits - 1)))
@@ -1715,12 +1724,11 @@ int x86_pmu_handle_irq(struct pt_regs *regs)
if (!static_call(x86_pmu_set_period)(event))
continue;
- perf_sample_data_init(&data, 0, event->hw.last_period);
+ perf_sample_data_init(&data, 0, last_period);
perf_sample_save_brstack(&data, event, &cpuc->lbr_stack, NULL);
- if (perf_event_overflow(event, &data, regs))
- x86_pmu_stop(event, 0);
+ perf_event_overflow(event, &data, regs);
}
if (handled)
@@ -2046,6 +2054,11 @@ static void x86_pmu_static_call_update(void)
static_call_update(x86_pmu_filter, x86_pmu.filter);
static_call_update(x86_pmu_late_setup, x86_pmu.late_setup);
+
+ static_call_update(x86_pmu_pebs_enable, x86_pmu.pebs_enable);
+ static_call_update(x86_pmu_pebs_disable, x86_pmu.pebs_disable);
+ static_call_update(x86_pmu_pebs_enable_all, x86_pmu.pebs_enable_all);
+ static_call_update(x86_pmu_pebs_disable_all, x86_pmu.pebs_disable_all);
}
static void _x86_pmu_read(struct perf_event *event)
diff --git a/arch/x86/events/intel/bts.c b/arch/x86/events/intel/bts.c
index a95e6c91c4d7..9560f693fac0 100644
--- a/arch/x86/events/intel/bts.c
+++ b/arch/x86/events/intel/bts.c
@@ -80,54 +80,54 @@ static void *
bts_buffer_setup_aux(struct perf_event *event, void **pages,
int nr_pages, bool overwrite)
{
- struct bts_buffer *buf;
+ struct bts_buffer *bb;
struct page *page;
int cpu = event->cpu;
int node = (cpu == -1) ? cpu : cpu_to_node(cpu);
unsigned long offset;
size_t size = nr_pages << PAGE_SHIFT;
- int pg, nbuf, pad;
+ int pg, nr_buf, pad;
/* count all the high order buffers */
- for (pg = 0, nbuf = 0; pg < nr_pages;) {
+ for (pg = 0, nr_buf = 0; pg < nr_pages;) {
page = virt_to_page(pages[pg]);
pg += buf_nr_pages(page);
- nbuf++;
+ nr_buf++;
}
/*
* to avoid interrupts in overwrite mode, only allow one physical
*/
- if (overwrite && nbuf > 1)
+ if (overwrite && nr_buf > 1)
return NULL;
- buf = kzalloc_node(offsetof(struct bts_buffer, buf[nbuf]), GFP_KERNEL, node);
- if (!buf)
+ bb = kzalloc_node(struct_size(bb, buf, nr_buf), GFP_KERNEL, node);
+ if (!bb)
return NULL;
- buf->nr_pages = nr_pages;
- buf->nr_bufs = nbuf;
- buf->snapshot = overwrite;
- buf->data_pages = pages;
- buf->real_size = size - size % BTS_RECORD_SIZE;
+ bb->nr_pages = nr_pages;
+ bb->nr_bufs = nr_buf;
+ bb->snapshot = overwrite;
+ bb->data_pages = pages;
+ bb->real_size = size - size % BTS_RECORD_SIZE;
- for (pg = 0, nbuf = 0, offset = 0, pad = 0; nbuf < buf->nr_bufs; nbuf++) {
+ for (pg = 0, nr_buf = 0, offset = 0, pad = 0; nr_buf < bb->nr_bufs; nr_buf++) {
unsigned int __nr_pages;
page = virt_to_page(pages[pg]);
__nr_pages = buf_nr_pages(page);
- buf->buf[nbuf].page = page;
- buf->buf[nbuf].offset = offset;
- buf->buf[nbuf].displacement = (pad ? BTS_RECORD_SIZE - pad : 0);
- buf->buf[nbuf].size = buf_size(page) - buf->buf[nbuf].displacement;
- pad = buf->buf[nbuf].size % BTS_RECORD_SIZE;
- buf->buf[nbuf].size -= pad;
+ bb->buf[nr_buf].page = page;
+ bb->buf[nr_buf].offset = offset;
+ bb->buf[nr_buf].displacement = (pad ? BTS_RECORD_SIZE - pad : 0);
+ bb->buf[nr_buf].size = buf_size(page) - bb->buf[nr_buf].displacement;
+ pad = bb->buf[nr_buf].size % BTS_RECORD_SIZE;
+ bb->buf[nr_buf].size -= pad;
pg += __nr_pages;
offset += __nr_pages << PAGE_SHIFT;
}
- return buf;
+ return bb;
}
static void bts_buffer_free_aux(void *data)
@@ -135,25 +135,25 @@ static void bts_buffer_free_aux(void *data)
kfree(data);
}
-static unsigned long bts_buffer_offset(struct bts_buffer *buf, unsigned int idx)
+static unsigned long bts_buffer_offset(struct bts_buffer *bb, unsigned int idx)
{
- return buf->buf[idx].offset + buf->buf[idx].displacement;
+ return bb->buf[idx].offset + bb->buf[idx].displacement;
}
static void
-bts_config_buffer(struct bts_buffer *buf)
+bts_config_buffer(struct bts_buffer *bb)
{
int cpu = raw_smp_processor_id();
struct debug_store *ds = per_cpu(cpu_hw_events, cpu).ds;
- struct bts_phys *phys = &buf->buf[buf->cur_buf];
+ struct bts_phys *phys = &bb->buf[bb->cur_buf];
unsigned long index, thresh = 0, end = phys->size;
struct page *page = phys->page;
- index = local_read(&buf->head);
+ index = local_read(&bb->head);
- if (!buf->snapshot) {
- if (buf->end < phys->offset + buf_size(page))
- end = buf->end - phys->offset - phys->displacement;
+ if (!bb->snapshot) {
+ if (bb->end < phys->offset + buf_size(page))
+ end = bb->end - phys->offset - phys->displacement;
index -= phys->offset + phys->displacement;
@@ -168,7 +168,7 @@ bts_config_buffer(struct bts_buffer *buf)
ds->bts_buffer_base = (u64)(long)page_address(page) + phys->displacement;
ds->bts_index = ds->bts_buffer_base + index;
ds->bts_absolute_maximum = ds->bts_buffer_base + end;
- ds->bts_interrupt_threshold = !buf->snapshot
+ ds->bts_interrupt_threshold = !bb->snapshot
? ds->bts_buffer_base + thresh
: ds->bts_absolute_maximum + BTS_RECORD_SIZE;
}
@@ -184,16 +184,16 @@ static void bts_update(struct bts_ctx *bts)
{
int cpu = raw_smp_processor_id();
struct debug_store *ds = per_cpu(cpu_hw_events, cpu).ds;
- struct bts_buffer *buf = perf_get_aux(&bts->handle);
+ struct bts_buffer *bb = perf_get_aux(&bts->handle);
unsigned long index = ds->bts_index - ds->bts_buffer_base, old, head;
- if (!buf)
+ if (!bb)
return;
- head = index + bts_buffer_offset(buf, buf->cur_buf);
- old = local_xchg(&buf->head, head);
+ head = index + bts_buffer_offset(bb, bb->cur_buf);
+ old = local_xchg(&bb->head, head);
- if (!buf->snapshot) {
+ if (!bb->snapshot) {
if (old == head)
return;
@@ -205,9 +205,9 @@ static void bts_update(struct bts_ctx *bts)
* old and head are always in the same physical buffer, so we
* can subtract them to get the data size.
*/
- local_add(head - old, &buf->data_size);
+ local_add(head - old, &bb->data_size);
} else {
- local_set(&buf->data_size, head);
+ local_set(&bb->data_size, head);
}
/*
@@ -218,7 +218,7 @@ static void bts_update(struct bts_ctx *bts)
}
static int
-bts_buffer_reset(struct bts_buffer *buf, struct perf_output_handle *handle);
+bts_buffer_reset(struct bts_buffer *bb, struct perf_output_handle *handle);
/*
* Ordering PMU callbacks wrt themselves and the PMI is done by means
@@ -232,17 +232,17 @@ bts_buffer_reset(struct bts_buffer *buf, struct perf_output_handle *handle);
static void __bts_event_start(struct perf_event *event)
{
struct bts_ctx *bts = this_cpu_ptr(bts_ctx);
- struct bts_buffer *buf = perf_get_aux(&bts->handle);
+ struct bts_buffer *bb = perf_get_aux(&bts->handle);
u64 config = 0;
- if (!buf->snapshot)
+ if (!bb->snapshot)
config |= ARCH_PERFMON_EVENTSEL_INT;
if (!event->attr.exclude_kernel)
config |= ARCH_PERFMON_EVENTSEL_OS;
if (!event->attr.exclude_user)
config |= ARCH_PERFMON_EVENTSEL_USR;
- bts_config_buffer(buf);
+ bts_config_buffer(bb);
/*
* local barrier to make sure that ds configuration made it
@@ -261,13 +261,13 @@ static void bts_event_start(struct perf_event *event, int flags)
{
struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
struct bts_ctx *bts = this_cpu_ptr(bts_ctx);
- struct bts_buffer *buf;
+ struct bts_buffer *bb;
- buf = perf_aux_output_begin(&bts->handle, event);
- if (!buf)
+ bb = perf_aux_output_begin(&bts->handle, event);
+ if (!bb)
goto fail_stop;
- if (bts_buffer_reset(buf, &bts->handle))
+ if (bts_buffer_reset(bb, &bts->handle))
goto fail_end_stop;
bts->ds_back.bts_buffer_base = cpuc->ds->bts_buffer_base;
@@ -306,27 +306,27 @@ static void bts_event_stop(struct perf_event *event, int flags)
{
struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
struct bts_ctx *bts = this_cpu_ptr(bts_ctx);
- struct bts_buffer *buf = NULL;
+ struct bts_buffer *bb = NULL;
int state = READ_ONCE(bts->state);
if (state == BTS_STATE_ACTIVE)
__bts_event_stop(event, BTS_STATE_STOPPED);
if (state != BTS_STATE_STOPPED)
- buf = perf_get_aux(&bts->handle);
+ bb = perf_get_aux(&bts->handle);
event->hw.state |= PERF_HES_STOPPED;
if (flags & PERF_EF_UPDATE) {
bts_update(bts);
- if (buf) {
- if (buf->snapshot)
+ if (bb) {
+ if (bb->snapshot)
bts->handle.head =
- local_xchg(&buf->data_size,
- buf->nr_pages << PAGE_SHIFT);
+ local_xchg(&bb->data_size,
+ bb->nr_pages << PAGE_SHIFT);
perf_aux_output_end(&bts->handle,
- local_xchg(&buf->data_size, 0));
+ local_xchg(&bb->data_size, 0));
}
cpuc->ds->bts_index = bts->ds_back.bts_buffer_base;
@@ -382,19 +382,19 @@ void intel_bts_disable_local(void)
}
static int
-bts_buffer_reset(struct bts_buffer *buf, struct perf_output_handle *handle)
+bts_buffer_reset(struct bts_buffer *bb, struct perf_output_handle *handle)
{
unsigned long head, space, next_space, pad, gap, skip, wakeup;
unsigned int next_buf;
struct bts_phys *phys, *next_phys;
int ret;
- if (buf->snapshot)
+ if (bb->snapshot)
return 0;
- head = handle->head & ((buf->nr_pages << PAGE_SHIFT) - 1);
+ head = handle->head & ((bb->nr_pages << PAGE_SHIFT) - 1);
- phys = &buf->buf[buf->cur_buf];
+ phys = &bb->buf[bb->cur_buf];
space = phys->offset + phys->displacement + phys->size - head;
pad = space;
if (space > handle->size) {
@@ -403,10 +403,10 @@ bts_buffer_reset(struct bts_buffer *buf, struct perf_output_handle *handle)
}
if (space <= BTS_SAFETY_MARGIN) {
/* See if next phys buffer has more space */
- next_buf = buf->cur_buf + 1;
- if (next_buf >= buf->nr_bufs)
+ next_buf = bb->cur_buf + 1;
+ if (next_buf >= bb->nr_bufs)
next_buf = 0;
- next_phys = &buf->buf[next_buf];
+ next_phys = &bb->buf[next_buf];
gap = buf_size(phys->page) - phys->displacement - phys->size +
next_phys->displacement;
skip = pad + gap;
@@ -431,8 +431,8 @@ bts_buffer_reset(struct bts_buffer *buf, struct perf_output_handle *handle)
* anymore, so we must not be racing with
* bts_update().
*/
- buf->cur_buf = next_buf;
- local_set(&buf->head, head);
+ bb->cur_buf = next_buf;
+ local_set(&bb->head, head);
}
}
}
@@ -445,7 +445,7 @@ bts_buffer_reset(struct bts_buffer *buf, struct perf_output_handle *handle)
space -= space % BTS_RECORD_SIZE;
}
- buf->end = head + space;
+ bb->end = head + space;
/*
* If we have no space, the lost notification would have been sent when
@@ -462,7 +462,7 @@ int intel_bts_interrupt(void)
struct debug_store *ds = this_cpu_ptr(&cpu_hw_events)->ds;
struct bts_ctx *bts;
struct perf_event *event;
- struct bts_buffer *buf;
+ struct bts_buffer *bb;
s64 old_head;
int err = -ENOSPC, handled = 0;
@@ -485,8 +485,8 @@ int intel_bts_interrupt(void)
if (READ_ONCE(bts->state) == BTS_STATE_STOPPED)
return handled;
- buf = perf_get_aux(&bts->handle);
- if (!buf)
+ bb = perf_get_aux(&bts->handle);
+ if (!bb)
return handled;
/*
@@ -494,26 +494,26 @@ int intel_bts_interrupt(void)
* there's no other way of telling, because the pointer will
* keep moving
*/
- if (buf->snapshot)
+ if (bb->snapshot)
return 0;
- old_head = local_read(&buf->head);
+ old_head = local_read(&bb->head);
bts_update(bts);
/* no new data */
- if (old_head == local_read(&buf->head))
+ if (old_head == local_read(&bb->head))
return handled;
- perf_aux_output_end(&bts->handle, local_xchg(&buf->data_size, 0));
+ perf_aux_output_end(&bts->handle, local_xchg(&bb->data_size, 0));
- buf = perf_aux_output_begin(&bts->handle, event);
- if (buf)
- err = bts_buffer_reset(buf, &bts->handle);
+ bb = perf_aux_output_begin(&bts->handle, event);
+ if (bb)
+ err = bts_buffer_reset(bb, &bts->handle);
if (err) {
WRITE_ONCE(bts->state, BTS_STATE_STOPPED);
- if (buf) {
+ if (bb) {
/*
* BTS_STATE_STOPPED should be visible before
* cleared handle::event
@@ -599,7 +599,11 @@ static void bts_event_read(struct perf_event *event)
static __init int bts_init(void)
{
- if (!boot_cpu_has(X86_FEATURE_DTES64) || !x86_pmu.bts)
+ if (!boot_cpu_has(X86_FEATURE_DTES64))
+ return -ENODEV;
+
+ x86_pmu.bts = boot_cpu_has(X86_FEATURE_BTS);
+ if (!x86_pmu.bts)
return -ENODEV;
if (boot_cpu_has(X86_FEATURE_PTI)) {
diff --git a/arch/x86/events/intel/core.c b/arch/x86/events/intel/core.c
index c5f385413392..3204591249e8 100644
--- a/arch/x86/events/intel/core.c
+++ b/arch/x86/events/intel/core.c
@@ -2224,6 +2224,18 @@ static struct extra_reg intel_cmt_extra_regs[] __read_mostly = {
EVENT_EXTRA_END
};
+EVENT_ATTR_STR(topdown-fe-bound, td_fe_bound_skt, "event=0x9c,umask=0x01");
+EVENT_ATTR_STR(topdown-retiring, td_retiring_skt, "event=0xc2,umask=0x02");
+EVENT_ATTR_STR(topdown-be-bound, td_be_bound_skt, "event=0xa4,umask=0x02");
+
+static struct attribute *skt_events_attrs[] = {
+ EVENT_PTR(td_fe_bound_skt),
+ EVENT_PTR(td_retiring_skt),
+ EVENT_PTR(td_bad_spec_cmt),
+ EVENT_PTR(td_be_bound_skt),
+ NULL,
+};
+
#define KNL_OT_L2_HITE BIT_ULL(19) /* Other Tile L2 Hit */
#define KNL_OT_L2_HITF BIT_ULL(20) /* Other Tile L2 Hit */
#define KNL_MCDRAM_LOCAL BIT_ULL(21)
@@ -2294,7 +2306,7 @@ static __always_inline void __intel_pmu_disable_all(bool bts)
static __always_inline void intel_pmu_disable_all(void)
{
__intel_pmu_disable_all(true);
- intel_pmu_pebs_disable_all();
+ static_call_cond(x86_pmu_pebs_disable_all)();
intel_pmu_lbr_disable_all();
}
@@ -2326,7 +2338,7 @@ static void __intel_pmu_enable_all(int added, bool pmi)
static void intel_pmu_enable_all(int added)
{
- intel_pmu_pebs_enable_all();
+ static_call_cond(x86_pmu_pebs_enable_all)();
__intel_pmu_enable_all(added, false);
}
@@ -2583,7 +2595,7 @@ static void intel_pmu_disable_event(struct perf_event *event)
* so we don't trigger the event without PEBS bit set.
*/
if (unlikely(event->attr.precise_ip))
- intel_pmu_pebs_disable(event);
+ static_call(x86_pmu_pebs_disable)(event);
}
static void intel_pmu_assign_event(struct perf_event *event, int idx)
@@ -2603,6 +2615,9 @@ static void intel_pmu_del_event(struct perf_event *event)
intel_pmu_lbr_del(event);
if (event->attr.precise_ip)
intel_pmu_pebs_del(event);
+ if (is_pebs_counter_event_group(event) ||
+ is_acr_event_group(event))
+ this_cpu_ptr(&cpu_hw_events)->n_late_setup--;
}
static int icl_set_topdown_event_period(struct perf_event *event)
@@ -2880,6 +2895,52 @@ static void intel_pmu_enable_fixed(struct perf_event *event)
cpuc->fixed_ctrl_val |= bits;
}
+static void intel_pmu_config_acr(int idx, u64 mask, u32 reload)
+{
+ struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
+ int msr_b, msr_c;
+
+ if (!mask && !cpuc->acr_cfg_b[idx])
+ return;
+
+ if (idx < INTEL_PMC_IDX_FIXED) {
+ msr_b = MSR_IA32_PMC_V6_GP0_CFG_B;
+ msr_c = MSR_IA32_PMC_V6_GP0_CFG_C;
+ } else {
+ msr_b = MSR_IA32_PMC_V6_FX0_CFG_B;
+ msr_c = MSR_IA32_PMC_V6_FX0_CFG_C;
+ idx -= INTEL_PMC_IDX_FIXED;
+ }
+
+ if (cpuc->acr_cfg_b[idx] != mask) {
+ wrmsrl(msr_b + x86_pmu.addr_offset(idx, false), mask);
+ cpuc->acr_cfg_b[idx] = mask;
+ }
+ /* Only need to update the reload value when there is a valid config value. */
+ if (mask && cpuc->acr_cfg_c[idx] != reload) {
+ wrmsrl(msr_c + x86_pmu.addr_offset(idx, false), reload);
+ cpuc->acr_cfg_c[idx] = reload;
+ }
+}
+
+static void intel_pmu_enable_acr(struct perf_event *event)
+{
+ struct hw_perf_event *hwc = &event->hw;
+
+ if (!is_acr_event_group(event) || !event->attr.config2) {
+ /*
+ * The disable doesn't clear the ACR CFG register.
+ * Check and clear the ACR CFG register.
+ */
+ intel_pmu_config_acr(hwc->idx, 0, 0);
+ return;
+ }
+
+ intel_pmu_config_acr(hwc->idx, hwc->config1, -hwc->sample_period);
+}
+
+DEFINE_STATIC_CALL_NULL(intel_pmu_enable_acr_event, intel_pmu_enable_acr);
+
static void intel_pmu_enable_event(struct perf_event *event)
{
u64 enable_mask = ARCH_PERFMON_EVENTSEL_ENABLE;
@@ -2887,16 +2948,19 @@ static void intel_pmu_enable_event(struct perf_event *event)
int idx = hwc->idx;
if (unlikely(event->attr.precise_ip))
- intel_pmu_pebs_enable(event);
+ static_call(x86_pmu_pebs_enable)(event);
switch (idx) {
case 0 ... INTEL_PMC_IDX_FIXED - 1:
if (branch_sample_counters(event))
enable_mask |= ARCH_PERFMON_EVENTSEL_BR_CNTR;
intel_set_masks(event, idx);
+ static_call_cond(intel_pmu_enable_acr_event)(event);
__x86_pmu_enable_event(hwc, enable_mask);
break;
case INTEL_PMC_IDX_FIXED ... INTEL_PMC_IDX_FIXED_BTS - 1:
+ static_call_cond(intel_pmu_enable_acr_event)(event);
+ fallthrough;
case INTEL_PMC_IDX_METRIC_BASE ... INTEL_PMC_IDX_METRIC_END:
intel_pmu_enable_fixed(event);
break;
@@ -2914,12 +2978,51 @@ static void intel_pmu_enable_event(struct perf_event *event)
}
}
+static void intel_pmu_acr_late_setup(struct cpu_hw_events *cpuc)
+{
+ struct perf_event *event, *leader;
+ int i, j, idx;
+
+ for (i = 0; i < cpuc->n_events; i++) {
+ leader = cpuc->event_list[i];
+ if (!is_acr_event_group(leader))
+ continue;
+
+ /* The ACR events must be contiguous. */
+ for (j = i; j < cpuc->n_events; j++) {
+ event = cpuc->event_list[j];
+ if (event->group_leader != leader->group_leader)
+ break;
+ for_each_set_bit(idx, (unsigned long *)&event->attr.config2, X86_PMC_IDX_MAX) {
+ if (WARN_ON_ONCE(i + idx > cpuc->n_events))
+ return;
+ __set_bit(cpuc->assign[i + idx], (unsigned long *)&event->hw.config1);
+ }
+ }
+ i = j - 1;
+ }
+}
+
+void intel_pmu_late_setup(void)
+{
+ struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
+
+ if (!cpuc->n_late_setup)
+ return;
+
+ intel_pmu_pebs_late_setup(cpuc);
+ intel_pmu_acr_late_setup(cpuc);
+}
+
static void intel_pmu_add_event(struct perf_event *event)
{
if (event->attr.precise_ip)
intel_pmu_pebs_add(event);
if (intel_pmu_needs_branch_stack(event))
intel_pmu_lbr_add(event);
+ if (is_pebs_counter_event_group(event) ||
+ is_acr_event_group(event))
+ this_cpu_ptr(&cpu_hw_events)->n_late_setup++;
}
/*
@@ -3035,8 +3138,7 @@ static void x86_pmu_handle_guest_pebs(struct pt_regs *regs,
continue;
perf_sample_data_init(data, 0, event->hw.last_period);
- if (perf_event_overflow(event, data, regs))
- x86_pmu_stop(event, 0);
+ perf_event_overflow(event, data, regs);
/* Inject one fake event is enough. */
break;
@@ -3141,6 +3243,7 @@ static int handle_pmi_common(struct pt_regs *regs, u64 status)
for_each_set_bit(bit, (unsigned long *)&status, X86_PMC_IDX_MAX) {
struct perf_event *event = cpuc->events[bit];
+ u64 last_period;
handled++;
@@ -3168,16 +3271,17 @@ static int handle_pmi_common(struct pt_regs *regs, u64 status)
if (is_pebs_counter_event_group(event))
x86_pmu.drain_pebs(regs, &data);
+ last_period = event->hw.last_period;
+
if (!intel_pmu_save_and_restart(event))
continue;
- perf_sample_data_init(&data, 0, event->hw.last_period);
+ perf_sample_data_init(&data, 0, last_period);
if (has_branch_stack(event))
intel_pmu_lbr_save_brstack(&data, cpuc, event);
- if (perf_event_overflow(event, &data, regs))
- x86_pmu_stop(event, 0);
+ perf_event_overflow(event, &data, regs);
}
return handled;
@@ -3739,10 +3843,9 @@ intel_get_event_constraints(struct cpu_hw_events *cpuc, int idx,
if (cpuc->excl_cntrs)
return intel_get_excl_constraints(cpuc, event, idx, c2);
- /* Not all counters support the branch counter feature. */
- if (branch_sample_counters(event)) {
+ if (event->hw.dyn_constraint != ~0ULL) {
c2 = dyn_constraint(cpuc, c2, idx);
- c2->idxmsk64 &= x86_pmu.lbr_counters;
+ c2->idxmsk64 &= event->hw.dyn_constraint;
c2->weight = hweight64(c2->idxmsk64);
}
@@ -4083,6 +4186,39 @@ end:
return start;
}
+static inline bool intel_pmu_has_acr(struct pmu *pmu)
+{
+ return !!hybrid(pmu, acr_cause_mask64);
+}
+
+static bool intel_pmu_is_acr_group(struct perf_event *event)
+{
+ /* The group leader has the ACR flag set */
+ if (is_acr_event_group(event))
+ return true;
+
+ /* The acr_mask is set */
+ if (event->attr.config2)
+ return true;
+
+ return false;
+}
+
+static inline void intel_pmu_set_acr_cntr_constr(struct perf_event *event,
+ u64 *cause_mask, int *num)
+{
+ event->hw.dyn_constraint &= hybrid(event->pmu, acr_cntr_mask64);
+ *cause_mask |= event->attr.config2;
+ *num += 1;
+}
+
+static inline void intel_pmu_set_acr_caused_constr(struct perf_event *event,
+ int idx, u64 cause_mask)
+{
+ if (test_bit(idx, (unsigned long *)&cause_mask))
+ event->hw.dyn_constraint &= hybrid(event->pmu, acr_cause_mask64);
+}
+
static int intel_pmu_hw_config(struct perf_event *event)
{
int ret = x86_pmu_hw_config(event);
@@ -4144,15 +4280,19 @@ static int intel_pmu_hw_config(struct perf_event *event)
leader = event->group_leader;
if (branch_sample_call_stack(leader))
return -EINVAL;
- if (branch_sample_counters(leader))
+ if (branch_sample_counters(leader)) {
num++;
+ leader->hw.dyn_constraint &= x86_pmu.lbr_counters;
+ }
leader->hw.flags |= PERF_X86_EVENT_BRANCH_COUNTERS;
for_each_sibling_event(sibling, leader) {
if (branch_sample_call_stack(sibling))
return -EINVAL;
- if (branch_sample_counters(sibling))
+ if (branch_sample_counters(sibling)) {
num++;
+ sibling->hw.dyn_constraint &= x86_pmu.lbr_counters;
+ }
}
if (num > fls(x86_pmu.lbr_counters))
@@ -4207,6 +4347,94 @@ static int intel_pmu_hw_config(struct perf_event *event)
event->attr.precise_ip)
event->group_leader->hw.flags |= PERF_X86_EVENT_PEBS_CNTR;
+ if (intel_pmu_has_acr(event->pmu) && intel_pmu_is_acr_group(event)) {
+ struct perf_event *sibling, *leader = event->group_leader;
+ struct pmu *pmu = event->pmu;
+ bool has_sw_event = false;
+ int num = 0, idx = 0;
+ u64 cause_mask = 0;
+
+ /* Not support perf metrics */
+ if (is_metric_event(event))
+ return -EINVAL;
+
+ /* Not support freq mode */
+ if (event->attr.freq)
+ return -EINVAL;
+
+ /* PDist is not supported */
+ if (event->attr.config2 && event->attr.precise_ip > 2)
+ return -EINVAL;
+
+ /* The reload value cannot exceeds the max period */
+ if (event->attr.sample_period > x86_pmu.max_period)
+ return -EINVAL;
+ /*
+ * The counter-constraints of each event cannot be finalized
+ * unless the whole group is scanned. However, it's hard
+ * to know whether the event is the last one of the group.
+ * Recalculate the counter-constraints for each event when
+ * adding a new event.
+ *
+ * The group is traversed twice, which may be optimized later.
+ * In the first round,
+ * - Find all events which do reload when other events
+ * overflow and set the corresponding counter-constraints
+ * - Add all events, which can cause other events reload,
+ * in the cause_mask
+ * - Error out if the number of events exceeds the HW limit
+ * - The ACR events must be contiguous.
+ * Error out if there are non-X86 events between ACR events.
+ * This is not a HW limit, but a SW limit.
+ * With the assumption, the intel_pmu_acr_late_setup() can
+ * easily convert the event idx to counter idx without
+ * traversing the whole event list.
+ */
+ if (!is_x86_event(leader))
+ return -EINVAL;
+
+ if (leader->attr.config2)
+ intel_pmu_set_acr_cntr_constr(leader, &cause_mask, &num);
+
+ if (leader->nr_siblings) {
+ for_each_sibling_event(sibling, leader) {
+ if (!is_x86_event(sibling)) {
+ has_sw_event = true;
+ continue;
+ }
+ if (!sibling->attr.config2)
+ continue;
+ if (has_sw_event)
+ return -EINVAL;
+ intel_pmu_set_acr_cntr_constr(sibling, &cause_mask, &num);
+ }
+ }
+ if (leader != event && event->attr.config2) {
+ if (has_sw_event)
+ return -EINVAL;
+ intel_pmu_set_acr_cntr_constr(event, &cause_mask, &num);
+ }
+
+ if (hweight64(cause_mask) > hweight64(hybrid(pmu, acr_cause_mask64)) ||
+ num > hweight64(hybrid(event->pmu, acr_cntr_mask64)))
+ return -EINVAL;
+ /*
+ * In the second round, apply the counter-constraints for
+ * the events which can cause other events reload.
+ */
+ intel_pmu_set_acr_caused_constr(leader, idx++, cause_mask);
+
+ if (leader->nr_siblings) {
+ for_each_sibling_event(sibling, leader)
+ intel_pmu_set_acr_caused_constr(sibling, idx++, cause_mask);
+ }
+
+ if (leader != event)
+ intel_pmu_set_acr_caused_constr(event, idx, cause_mask);
+
+ leader->hw.flags |= PERF_X86_EVENT_ACR;
+ }
+
if ((event->attr.type == PERF_TYPE_HARDWARE) ||
(event->attr.type == PERF_TYPE_HW_CACHE))
return 0;
@@ -4354,7 +4582,7 @@ static struct perf_guest_switch_msr *intel_guest_get_msrs(int *nr, void *data)
.guest = intel_ctrl & ~cpuc->intel_ctrl_host_mask & ~pebs_mask,
};
- if (!x86_pmu.pebs)
+ if (!x86_pmu.ds_pebs)
return arr;
/*
@@ -4952,7 +5180,7 @@ int intel_cpuc_prepare(struct cpu_hw_events *cpuc, int cpu)
goto err;
}
- if (x86_pmu.flags & (PMU_FL_EXCL_CNTRS | PMU_FL_TFA | PMU_FL_BR_CNTR)) {
+ if (x86_pmu.flags & (PMU_FL_EXCL_CNTRS | PMU_FL_TFA | PMU_FL_DYN_CONSTRAINT)) {
size_t sz = X86_PMC_IDX_MAX * sizeof(struct event_constraint);
cpuc->constraint_list = kzalloc_node(sz, GFP_KERNEL, cpu_to_node(cpu));
@@ -5041,7 +5269,7 @@ static inline bool intel_pmu_broken_perf_cap(void)
return false;
}
-static void update_pmu_cap(struct x86_hybrid_pmu *pmu)
+static void update_pmu_cap(struct pmu *pmu)
{
unsigned int cntr, fixed_cntr, ecx, edx;
union cpuid35_eax eax;
@@ -5050,20 +5278,30 @@ static void update_pmu_cap(struct x86_hybrid_pmu *pmu)
cpuid(ARCH_PERFMON_EXT_LEAF, &eax.full, &ebx.full, &ecx, &edx);
if (ebx.split.umask2)
- pmu->config_mask |= ARCH_PERFMON_EVENTSEL_UMASK2;
+ hybrid(pmu, config_mask) |= ARCH_PERFMON_EVENTSEL_UMASK2;
if (ebx.split.eq)
- pmu->config_mask |= ARCH_PERFMON_EVENTSEL_EQ;
+ hybrid(pmu, config_mask) |= ARCH_PERFMON_EVENTSEL_EQ;
if (eax.split.cntr_subleaf) {
cpuid_count(ARCH_PERFMON_EXT_LEAF, ARCH_PERFMON_NUM_COUNTER_LEAF,
&cntr, &fixed_cntr, &ecx, &edx);
- pmu->cntr_mask64 = cntr;
- pmu->fixed_cntr_mask64 = fixed_cntr;
+ hybrid(pmu, cntr_mask64) = cntr;
+ hybrid(pmu, fixed_cntr_mask64) = fixed_cntr;
+ }
+
+ if (eax.split.acr_subleaf) {
+ cpuid_count(ARCH_PERFMON_EXT_LEAF, ARCH_PERFMON_ACR_LEAF,
+ &cntr, &fixed_cntr, &ecx, &edx);
+ /* The mask of the counters which can be reloaded */
+ hybrid(pmu, acr_cntr_mask64) = cntr | ((u64)fixed_cntr << INTEL_PMC_IDX_FIXED);
+
+ /* The mask of the counters which can cause a reload of reloadable counters */
+ hybrid(pmu, acr_cause_mask64) = ecx | ((u64)edx << INTEL_PMC_IDX_FIXED);
}
if (!intel_pmu_broken_perf_cap()) {
/* Perf Metric (Bit 15) and PEBS via PT (Bit 16) are hybrid enumeration */
- rdmsrl(MSR_IA32_PERF_CAPABILITIES, pmu->intel_cap.capabilities);
+ rdmsrl(MSR_IA32_PERF_CAPABILITIES, hybrid(pmu, intel_cap).capabilities);
}
}
@@ -5150,7 +5388,7 @@ static bool init_hybrid_pmu(int cpu)
goto end;
if (this_cpu_has(X86_FEATURE_ARCH_PERFMON_EXT))
- update_pmu_cap(pmu);
+ update_pmu_cap(&pmu->pmu);
intel_pmu_check_hybrid_pmus(pmu);
@@ -5524,7 +5762,7 @@ static __init void intel_clovertown_quirk(void)
* these chips.
*/
pr_warn("PEBS disabled due to CPU errata\n");
- x86_pmu.pebs = 0;
+ x86_pmu.ds_pebs = 0;
x86_pmu.pebs_constraints = NULL;
}
@@ -6012,7 +6250,7 @@ tsx_is_visible(struct kobject *kobj, struct attribute *attr, int i)
static umode_t
pebs_is_visible(struct kobject *kobj, struct attribute *attr, int i)
{
- return x86_pmu.pebs ? attr->mode : 0;
+ return x86_pmu.ds_pebs ? attr->mode : 0;
}
static umode_t
@@ -6043,6 +6281,21 @@ td_is_visible(struct kobject *kobj, struct attribute *attr, int i)
return attr->mode;
}
+PMU_FORMAT_ATTR(acr_mask, "config2:0-63");
+
+static struct attribute *format_acr_attrs[] = {
+ &format_attr_acr_mask.attr,
+ NULL
+};
+
+static umode_t
+acr_is_visible(struct kobject *kobj, struct attribute *attr, int i)
+{
+ struct device *dev = kobj_to_dev(kobj);
+
+ return intel_pmu_has_acr(dev_get_drvdata(dev)) ? attr->mode : 0;
+}
+
static struct attribute_group group_events_td = {
.name = "events",
.is_visible = td_is_visible,
@@ -6085,6 +6338,12 @@ static struct attribute_group group_format_evtsel_ext = {
.is_visible = evtsel_ext_is_visible,
};
+static struct attribute_group group_format_acr = {
+ .name = "format",
+ .attrs = format_acr_attrs,
+ .is_visible = acr_is_visible,
+};
+
static struct attribute_group group_default = {
.attrs = intel_pmu_attrs,
.is_visible = default_is_visible,
@@ -6099,6 +6358,7 @@ static const struct attribute_group *attr_update[] = {
&group_format_extra,
&group_format_extra_skl,
&group_format_evtsel_ext,
+ &group_format_acr,
&group_default,
NULL,
};
@@ -6383,6 +6643,7 @@ static const struct attribute_group *hybrid_attr_update[] = {
&group_caps_lbr,
&hybrid_group_format_extra,
&group_format_evtsel_ext,
+ &group_format_acr,
&group_default,
&hybrid_group_cpus,
NULL,
@@ -6575,6 +6836,7 @@ static __always_inline void intel_pmu_init_skt(struct pmu *pmu)
intel_pmu_init_grt(pmu);
hybrid(pmu, event_constraints) = intel_skt_event_constraints;
hybrid(pmu, extra_regs) = intel_cmt_extra_regs;
+ static_call_update(intel_pmu_enable_acr_event, intel_pmu_enable_acr);
}
__init int intel_pmu_init(void)
@@ -6635,6 +6897,7 @@ __init int intel_pmu_init(void)
x86_pmu.pebs_events_mask = intel_pmu_pebs_mask(x86_pmu.cntr_mask64);
x86_pmu.pebs_capable = PEBS_COUNTER_MASK;
+ x86_pmu.config_mask = X86_RAW_EVENT_MASK;
/*
* Quirk: v2 perfmon does not report fixed-purpose events, so
@@ -6663,7 +6926,7 @@ __init int intel_pmu_init(void)
if (boot_cpu_has(X86_FEATURE_ARCH_LBR))
intel_pmu_arch_lbr_init();
- intel_ds_init();
+ intel_pebs_init();
x86_add_quirk(intel_arch_events_quirk); /* Install first, so it runs last */
@@ -6674,6 +6937,12 @@ __init int intel_pmu_init(void)
}
/*
+ * Many features on and after V6 require dynamic constraint,
+ * e.g., Arch PEBS, ACR.
+ */
+ if (version >= 6)
+ x86_pmu.flags |= PMU_FL_DYN_CONSTRAINT;
+ /*
* Install the hw-cache-events table:
*/
switch (boot_cpu_data.x86_vfm) {
@@ -6884,6 +7153,18 @@ __init int intel_pmu_init(void)
name = "crestmont";
break;
+ case INTEL_ATOM_DARKMONT_X:
+ intel_pmu_init_skt(NULL);
+ intel_pmu_pebs_data_source_cmt();
+ x86_pmu.pebs_latency_data = cmt_latency_data;
+ x86_pmu.get_event_constraints = cmt_get_event_constraints;
+ td_attr = skt_events_attrs;
+ mem_attr = grt_mem_attrs;
+ extra_attr = cmt_format_attr;
+ pr_cont("Darkmont events, ");
+ name = "darkmont";
+ break;
+
case INTEL_WESTMERE:
case INTEL_WESTMERE_EP:
case INTEL_WESTMERE_EX:
@@ -7433,6 +7714,18 @@ __init int intel_pmu_init(void)
x86_pmu.attr_update = hybrid_attr_update;
}
+ /*
+ * The archPerfmonExt (0x23) includes an enhanced enumeration of
+ * PMU architectural features with a per-core view. For non-hybrid,
+ * each core has the same PMU capabilities. It's good enough to
+ * update the x86_pmu from the booting CPU. For hybrid, the x86_pmu
+ * is used to keep the common capabilities. Still keep the values
+ * from the leaf 0xa. The core specific update will be done later
+ * when a new type is online.
+ */
+ if (!is_hybrid() && boot_cpu_has(X86_FEATURE_ARCH_PERFMON_EXT))
+ update_pmu_cap(NULL);
+
intel_pmu_check_counters_mask(&x86_pmu.cntr_mask64,
&x86_pmu.fixed_cntr_mask64,
&x86_pmu.intel_ctrl);
diff --git a/arch/x86/events/intel/ds.c b/arch/x86/events/intel/ds.c
index 8d86e91bd5e5..e2ea4fdedea9 100644
--- a/arch/x86/events/intel/ds.c
+++ b/arch/x86/events/intel/ds.c
@@ -624,7 +624,7 @@ static int alloc_pebs_buffer(int cpu)
int max, node = cpu_to_node(cpu);
void *buffer, *insn_buff, *cea;
- if (!x86_pmu.pebs)
+ if (!x86_pmu.ds_pebs)
return 0;
buffer = dsalloc_pages(bsiz, GFP_KERNEL, cpu);
@@ -659,7 +659,7 @@ static void release_pebs_buffer(int cpu)
struct cpu_hw_events *hwev = per_cpu_ptr(&cpu_hw_events, cpu);
void *cea;
- if (!x86_pmu.pebs)
+ if (!x86_pmu.ds_pebs)
return;
kfree(per_cpu(insn_buffer, cpu));
@@ -734,7 +734,7 @@ void release_ds_buffers(void)
{
int cpu;
- if (!x86_pmu.bts && !x86_pmu.pebs)
+ if (!x86_pmu.bts && !x86_pmu.ds_pebs)
return;
for_each_possible_cpu(cpu)
@@ -750,7 +750,8 @@ void release_ds_buffers(void)
}
for_each_possible_cpu(cpu) {
- release_pebs_buffer(cpu);
+ if (x86_pmu.ds_pebs)
+ release_pebs_buffer(cpu);
release_bts_buffer(cpu);
}
}
@@ -761,15 +762,17 @@ void reserve_ds_buffers(void)
int cpu;
x86_pmu.bts_active = 0;
- x86_pmu.pebs_active = 0;
- if (!x86_pmu.bts && !x86_pmu.pebs)
+ if (x86_pmu.ds_pebs)
+ x86_pmu.pebs_active = 0;
+
+ if (!x86_pmu.bts && !x86_pmu.ds_pebs)
return;
if (!x86_pmu.bts)
bts_err = 1;
- if (!x86_pmu.pebs)
+ if (!x86_pmu.ds_pebs)
pebs_err = 1;
for_each_possible_cpu(cpu) {
@@ -781,7 +784,8 @@ void reserve_ds_buffers(void)
if (!bts_err && alloc_bts_buffer(cpu))
bts_err = 1;
- if (!pebs_err && alloc_pebs_buffer(cpu))
+ if (x86_pmu.ds_pebs && !pebs_err &&
+ alloc_pebs_buffer(cpu))
pebs_err = 1;
if (bts_err && pebs_err)
@@ -793,7 +797,7 @@ void reserve_ds_buffers(void)
release_bts_buffer(cpu);
}
- if (pebs_err) {
+ if (x86_pmu.ds_pebs && pebs_err) {
for_each_possible_cpu(cpu)
release_pebs_buffer(cpu);
}
@@ -805,7 +809,7 @@ void reserve_ds_buffers(void)
if (x86_pmu.bts && !bts_err)
x86_pmu.bts_active = 1;
- if (x86_pmu.pebs && !pebs_err)
+ if (x86_pmu.ds_pebs && !pebs_err)
x86_pmu.pebs_active = 1;
for_each_possible_cpu(cpu) {
@@ -1355,9 +1359,8 @@ static void __intel_pmu_pebs_update_cfg(struct perf_event *event,
}
-static void intel_pmu_late_setup(void)
+void intel_pmu_pebs_late_setup(struct cpu_hw_events *cpuc)
{
- struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
struct perf_event *event;
u64 pebs_data_cfg = 0;
int i;
@@ -1828,8 +1831,6 @@ static void setup_pebs_fixed_sample_data(struct perf_event *event,
perf_sample_data_init(data, 0, event->hw.last_period);
- data->period = event->hw.last_period;
-
/*
* Use latency for weight (only avail with PEBS-LL)
*/
@@ -2082,7 +2083,6 @@ static void setup_pebs_adaptive_sample_data(struct perf_event *event,
sample_type = event->attr.sample_type;
format_group = basic->format_group;
perf_sample_data_init(data, 0, event->hw.last_period);
- data->period = event->hw.last_period;
setup_pebs_time(event, data, basic->tsc);
@@ -2359,8 +2359,7 @@ __intel_pmu_pebs_last_event(struct perf_event *event,
* All but the last records are processed.
* The last one is left to be able to call the overflow handler.
*/
- if (perf_event_overflow(event, data, regs))
- x86_pmu_stop(event, 0);
+ perf_event_overflow(event, data, regs);
}
if (hwc->flags & PERF_X86_EVENT_AUTO_RELOAD) {
@@ -2589,8 +2588,8 @@ static void intel_pmu_drain_pebs_nhm(struct pt_regs *iregs, struct perf_sample_d
if (error[bit]) {
perf_log_lost_samples(event, error[bit]);
- if (iregs && perf_event_account_interrupt(event))
- x86_pmu_stop(event, 0);
+ if (iregs)
+ perf_event_account_interrupt(event);
}
if (counts[bit]) {
@@ -2670,10 +2669,10 @@ static void intel_pmu_drain_pebs_icl(struct pt_regs *iregs, struct perf_sample_d
}
/*
- * BTS, PEBS probe and setup
+ * PEBS probe and setup
*/
-void __init intel_ds_init(void)
+void __init intel_pebs_init(void)
{
/*
* No support for 32bit formats
@@ -2681,13 +2680,12 @@ void __init intel_ds_init(void)
if (!boot_cpu_has(X86_FEATURE_DTES64))
return;
- x86_pmu.bts = boot_cpu_has(X86_FEATURE_BTS);
- x86_pmu.pebs = boot_cpu_has(X86_FEATURE_PEBS);
+ x86_pmu.ds_pebs = boot_cpu_has(X86_FEATURE_PEBS);
x86_pmu.pebs_buffer_size = PEBS_BUFFER_SIZE;
if (x86_pmu.version <= 4)
x86_pmu.pebs_no_isolation = 1;
- if (x86_pmu.pebs) {
+ if (x86_pmu.ds_pebs) {
char pebs_type = x86_pmu.intel_cap.pebs_trap ? '+' : '-';
char *pebs_qual = "";
int format = x86_pmu.intel_cap.pebs_format;
@@ -2695,6 +2693,11 @@ void __init intel_ds_init(void)
if (format < 4)
x86_pmu.intel_cap.pebs_baseline = 0;
+ x86_pmu.pebs_enable = intel_pmu_pebs_enable;
+ x86_pmu.pebs_disable = intel_pmu_pebs_disable;
+ x86_pmu.pebs_enable_all = intel_pmu_pebs_enable_all;
+ x86_pmu.pebs_disable_all = intel_pmu_pebs_disable_all;
+
switch (format) {
case 0:
pr_cont("PEBS fmt0%c, ", pebs_type);
@@ -2779,7 +2782,7 @@ void __init intel_ds_init(void)
default:
pr_cont("no PEBS fmt%d%c, ", format, pebs_type);
- x86_pmu.pebs = 0;
+ x86_pmu.ds_pebs = 0;
}
}
}
@@ -2788,7 +2791,7 @@ void perf_restore_debug_store(void)
{
struct debug_store *ds = __this_cpu_read(cpu_hw_events.ds);
- if (!x86_pmu.bts && !x86_pmu.pebs)
+ if (!x86_pmu.bts && !x86_pmu.ds_pebs)
return;
wrmsrl(MSR_IA32_DS_AREA, (unsigned long)ds);
diff --git a/arch/x86/events/intel/knc.c b/arch/x86/events/intel/knc.c
index 034a1f6a457c..384589168c1a 100644
--- a/arch/x86/events/intel/knc.c
+++ b/arch/x86/events/intel/knc.c
@@ -241,19 +241,20 @@ again:
for_each_set_bit(bit, (unsigned long *)&status, X86_PMC_IDX_MAX) {
struct perf_event *event = cpuc->events[bit];
+ u64 last_period;
handled++;
if (!test_bit(bit, cpuc->active_mask))
continue;
+ last_period = event->hw.last_period;
if (!intel_pmu_save_and_restart(event))
continue;
- perf_sample_data_init(&data, 0, event->hw.last_period);
+ perf_sample_data_init(&data, 0, last_period);
- if (perf_event_overflow(event, &data, regs))
- x86_pmu_stop(event, 0);
+ perf_event_overflow(event, &data, regs);
}
/*
diff --git a/arch/x86/events/intel/lbr.c b/arch/x86/events/intel/lbr.c
index f44c3d866f24..05acd6449ceb 100644
--- a/arch/x86/events/intel/lbr.c
+++ b/arch/x86/events/intel/lbr.c
@@ -1618,7 +1618,7 @@ void __init intel_pmu_arch_lbr_init(void)
x86_pmu.lbr_nr = lbr_nr;
if (!!x86_pmu.lbr_counters)
- x86_pmu.flags |= PMU_FL_BR_CNTR;
+ x86_pmu.flags |= PMU_FL_BR_CNTR | PMU_FL_DYN_CONSTRAINT;
if (x86_pmu.lbr_mispred)
static_branch_enable(&x86_lbr_mispred);
diff --git a/arch/x86/events/intel/p4.c b/arch/x86/events/intel/p4.c
index c85a9fc44355..126d5ae264cb 100644
--- a/arch/x86/events/intel/p4.c
+++ b/arch/x86/events/intel/p4.c
@@ -1072,8 +1072,7 @@ static int p4_pmu_handle_irq(struct pt_regs *regs)
continue;
- if (perf_event_overflow(event, &data, regs))
- x86_pmu_stop(event, 0);
+ perf_event_overflow(event, &data, regs);
}
if (handled)
diff --git a/arch/x86/events/intel/pt.c b/arch/x86/events/intel/pt.c
index fa37565f6418..25ead919fc48 100644
--- a/arch/x86/events/intel/pt.c
+++ b/arch/x86/events/intel/pt.c
@@ -1863,6 +1863,8 @@ static __init int pt_init(void)
if (!intel_pt_validate_hw_cap(PT_CAP_topa_multiple_entries))
pt_pmu.pmu.capabilities = PERF_PMU_CAP_AUX_NO_SG;
+ else
+ pt_pmu.pmu.capabilities = PERF_PMU_CAP_AUX_PREFER_LARGE;
pt_pmu.pmu.capabilities |= PERF_PMU_CAP_EXCLUSIVE |
PERF_PMU_CAP_ITRACE |
diff --git a/arch/x86/events/intel/uncore.c b/arch/x86/events/intel/uncore.c
index a34e50fc4a8f..5811e172f721 100644
--- a/arch/x86/events/intel/uncore.c
+++ b/arch/x86/events/intel/uncore.c
@@ -305,17 +305,11 @@ static enum hrtimer_restart uncore_pmu_hrtimer(struct hrtimer *hrtimer)
{
struct intel_uncore_box *box;
struct perf_event *event;
- unsigned long flags;
int bit;
box = container_of(hrtimer, struct intel_uncore_box, hrtimer);
if (!box->n_active || box->cpu != smp_processor_id())
return HRTIMER_NORESTART;
- /*
- * disable local interrupt to prevent uncore_pmu_event_start/stop
- * to interrupt the update process
- */
- local_irq_save(flags);
/*
* handle boxes with an active event list as opposed to active
@@ -328,8 +322,6 @@ static enum hrtimer_restart uncore_pmu_hrtimer(struct hrtimer *hrtimer)
for_each_set_bit(bit, box->active_mask, UNCORE_PMC_IDX_MAX)
uncore_perf_event_update(box, box->events[bit]);
- local_irq_restore(flags);
-
hrtimer_forward_now(hrtimer, ns_to_ktime(box->hrtimer_duration));
return HRTIMER_RESTART;
}
@@ -337,7 +329,7 @@ static enum hrtimer_restart uncore_pmu_hrtimer(struct hrtimer *hrtimer)
void uncore_pmu_start_hrtimer(struct intel_uncore_box *box)
{
hrtimer_start(&box->hrtimer, ns_to_ktime(box->hrtimer_duration),
- HRTIMER_MODE_REL_PINNED);
+ HRTIMER_MODE_REL_PINNED_HARD);
}
void uncore_pmu_cancel_hrtimer(struct intel_uncore_box *box)
@@ -347,7 +339,7 @@ void uncore_pmu_cancel_hrtimer(struct intel_uncore_box *box)
static void uncore_pmu_init_hrtimer(struct intel_uncore_box *box)
{
- hrtimer_setup(&box->hrtimer, uncore_pmu_hrtimer, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
+ hrtimer_setup(&box->hrtimer, uncore_pmu_hrtimer, CLOCK_MONOTONIC, HRTIMER_MODE_REL_HARD);
}
static struct intel_uncore_box *uncore_alloc_box(struct intel_uncore_type *type,
diff --git a/arch/x86/events/perf_event.h b/arch/x86/events/perf_event.h
index 46d120597bab..e8bce89821be 100644
--- a/arch/x86/events/perf_event.h
+++ b/arch/x86/events/perf_event.h
@@ -127,6 +127,11 @@ static inline bool is_pebs_counter_event_group(struct perf_event *event)
return check_leader_group(event->group_leader, PERF_X86_EVENT_PEBS_CNTR);
}
+static inline bool is_acr_event_group(struct perf_event *event)
+{
+ return check_leader_group(event->group_leader, PERF_X86_EVENT_ACR);
+}
+
struct amd_nb {
int nb_id; /* NorthBridge id */
int refcnt; /* reference count */
@@ -268,6 +273,7 @@ struct cpu_hw_events {
struct event_constraint *event_constraint[X86_PMC_IDX_MAX];
int n_excl; /* the number of exclusive events */
+ int n_late_setup; /* the num of events needs late setup */
unsigned int txn_flags;
int is_fake;
@@ -293,6 +299,10 @@ struct cpu_hw_events {
u64 fixed_ctrl_val;
u64 active_fixed_ctrl_val;
+ /* Intel ACR configuration */
+ u64 acr_cfg_b[X86_PMC_IDX_MAX];
+ u64 acr_cfg_c[X86_PMC_IDX_MAX];
+
/*
* Intel LBR bits
*/
@@ -714,6 +724,15 @@ struct x86_hybrid_pmu {
u64 fixed_cntr_mask64;
unsigned long fixed_cntr_mask[BITS_TO_LONGS(X86_PMC_IDX_MAX)];
};
+
+ union {
+ u64 acr_cntr_mask64;
+ unsigned long acr_cntr_mask[BITS_TO_LONGS(X86_PMC_IDX_MAX)];
+ };
+ union {
+ u64 acr_cause_mask64;
+ unsigned long acr_cause_mask[BITS_TO_LONGS(X86_PMC_IDX_MAX)];
+ };
struct event_constraint unconstrained;
u64 hw_cache_event_ids
@@ -796,6 +815,10 @@ struct x86_pmu {
int (*hw_config)(struct perf_event *event);
int (*schedule_events)(struct cpu_hw_events *cpuc, int n, int *assign);
void (*late_setup)(void);
+ void (*pebs_enable)(struct perf_event *event);
+ void (*pebs_disable)(struct perf_event *event);
+ void (*pebs_enable_all)(void);
+ void (*pebs_disable_all)(void);
unsigned eventsel;
unsigned perfctr;
unsigned fixedctr;
@@ -812,6 +835,14 @@ struct x86_pmu {
u64 fixed_cntr_mask64;
unsigned long fixed_cntr_mask[BITS_TO_LONGS(X86_PMC_IDX_MAX)];
};
+ union {
+ u64 acr_cntr_mask64;
+ unsigned long acr_cntr_mask[BITS_TO_LONGS(X86_PMC_IDX_MAX)];
+ };
+ union {
+ u64 acr_cause_mask64;
+ unsigned long acr_cause_mask[BITS_TO_LONGS(X86_PMC_IDX_MAX)];
+ };
int cntval_bits;
u64 cntval_mask;
union {
@@ -878,7 +909,7 @@ struct x86_pmu {
*/
unsigned int bts :1,
bts_active :1,
- pebs :1,
+ ds_pebs :1,
pebs_active :1,
pebs_broken :1,
pebs_prec_dist :1,
@@ -1049,6 +1080,7 @@ do { \
#define PMU_FL_MEM_LOADS_AUX 0x100 /* Require an auxiliary event for the complete memory info */
#define PMU_FL_RETIRE_LATENCY 0x200 /* Support Retire Latency in PEBS */
#define PMU_FL_BR_CNTR 0x400 /* Support branch counter logging */
+#define PMU_FL_DYN_CONSTRAINT 0x800 /* Needs dynamic constraint */
#define EVENT_VAR(_id) event_attr_##_id
#define EVENT_PTR(_id) &event_attr_##_id.attr.attr
@@ -1091,6 +1123,7 @@ static struct perf_pmu_format_hybrid_attr format_attr_hybrid_##_name = {\
.pmu_type = _pmu, \
}
+int is_x86_event(struct perf_event *event);
struct pmu *x86_get_pmu(unsigned int cpu);
extern struct x86_pmu x86_pmu __read_mostly;
@@ -1098,6 +1131,10 @@ DECLARE_STATIC_CALL(x86_pmu_set_period, *x86_pmu.set_period);
DECLARE_STATIC_CALL(x86_pmu_update, *x86_pmu.update);
DECLARE_STATIC_CALL(x86_pmu_drain_pebs, *x86_pmu.drain_pebs);
DECLARE_STATIC_CALL(x86_pmu_late_setup, *x86_pmu.late_setup);
+DECLARE_STATIC_CALL(x86_pmu_pebs_enable, *x86_pmu.pebs_enable);
+DECLARE_STATIC_CALL(x86_pmu_pebs_disable, *x86_pmu.pebs_disable);
+DECLARE_STATIC_CALL(x86_pmu_pebs_enable_all, *x86_pmu.pebs_enable_all);
+DECLARE_STATIC_CALL(x86_pmu_pebs_disable_all, *x86_pmu.pebs_disable_all);
static __always_inline struct x86_perf_task_context_opt *task_context_opt(void *ctx)
{
@@ -1587,6 +1624,8 @@ void intel_pmu_disable_bts(void);
int intel_pmu_drain_bts_buffer(void);
+void intel_pmu_late_setup(void);
+
u64 grt_latency_data(struct perf_event *event, u64 status);
u64 cmt_latency_data(struct perf_event *event, u64 status);
@@ -1643,11 +1682,13 @@ void intel_pmu_pebs_disable_all(void);
void intel_pmu_pebs_sched_task(struct perf_event_pmu_context *pmu_ctx, bool sched_in);
+void intel_pmu_pebs_late_setup(struct cpu_hw_events *cpuc);
+
void intel_pmu_drain_pebs_buffer(void);
void intel_pmu_store_pebs_lbrs(struct lbr_entry *lbr);
-void intel_ds_init(void);
+void intel_pebs_init(void);
void intel_pmu_lbr_save_brstack(struct perf_sample_data *data,
struct cpu_hw_events *cpuc,
diff --git a/arch/x86/events/perf_event_flags.h b/arch/x86/events/perf_event_flags.h
index 1d9e385649b5..70078334e4a3 100644
--- a/arch/x86/events/perf_event_flags.h
+++ b/arch/x86/events/perf_event_flags.h
@@ -2,23 +2,24 @@
/*
* struct hw_perf_event.flags flags
*/
-PERF_ARCH(PEBS_LDLAT, 0x00001) /* ld+ldlat data address sampling */
-PERF_ARCH(PEBS_ST, 0x00002) /* st data address sampling */
-PERF_ARCH(PEBS_ST_HSW, 0x00004) /* haswell style datala, store */
-PERF_ARCH(PEBS_LD_HSW, 0x00008) /* haswell style datala, load */
-PERF_ARCH(PEBS_NA_HSW, 0x00010) /* haswell style datala, unknown */
-PERF_ARCH(EXCL, 0x00020) /* HT exclusivity on counter */
-PERF_ARCH(DYNAMIC, 0x00040) /* dynamic alloc'd constraint */
-PERF_ARCH(PEBS_CNTR, 0x00080) /* PEBS counters snapshot */
-PERF_ARCH(EXCL_ACCT, 0x00100) /* accounted EXCL event */
-PERF_ARCH(AUTO_RELOAD, 0x00200) /* use PEBS auto-reload */
-PERF_ARCH(LARGE_PEBS, 0x00400) /* use large PEBS */
-PERF_ARCH(PEBS_VIA_PT, 0x00800) /* use PT buffer for PEBS */
-PERF_ARCH(PAIR, 0x01000) /* Large Increment per Cycle */
-PERF_ARCH(LBR_SELECT, 0x02000) /* Save/Restore MSR_LBR_SELECT */
-PERF_ARCH(TOPDOWN, 0x04000) /* Count Topdown slots/metrics events */
-PERF_ARCH(PEBS_STLAT, 0x08000) /* st+stlat data address sampling */
-PERF_ARCH(AMD_BRS, 0x10000) /* AMD Branch Sampling */
-PERF_ARCH(PEBS_LAT_HYBRID, 0x20000) /* ld and st lat for hybrid */
-PERF_ARCH(NEEDS_BRANCH_STACK, 0x40000) /* require branch stack setup */
-PERF_ARCH(BRANCH_COUNTERS, 0x80000) /* logs the counters in the extra space of each branch */
+PERF_ARCH(PEBS_LDLAT, 0x0000001) /* ld+ldlat data address sampling */
+PERF_ARCH(PEBS_ST, 0x0000002) /* st data address sampling */
+PERF_ARCH(PEBS_ST_HSW, 0x0000004) /* haswell style datala, store */
+PERF_ARCH(PEBS_LD_HSW, 0x0000008) /* haswell style datala, load */
+PERF_ARCH(PEBS_NA_HSW, 0x0000010) /* haswell style datala, unknown */
+PERF_ARCH(EXCL, 0x0000020) /* HT exclusivity on counter */
+PERF_ARCH(DYNAMIC, 0x0000040) /* dynamic alloc'd constraint */
+PERF_ARCH(PEBS_CNTR, 0x0000080) /* PEBS counters snapshot */
+PERF_ARCH(EXCL_ACCT, 0x0000100) /* accounted EXCL event */
+PERF_ARCH(AUTO_RELOAD, 0x0000200) /* use PEBS auto-reload */
+PERF_ARCH(LARGE_PEBS, 0x0000400) /* use large PEBS */
+PERF_ARCH(PEBS_VIA_PT, 0x0000800) /* use PT buffer for PEBS */
+PERF_ARCH(PAIR, 0x0001000) /* Large Increment per Cycle */
+PERF_ARCH(LBR_SELECT, 0x0002000) /* Save/Restore MSR_LBR_SELECT */
+PERF_ARCH(TOPDOWN, 0x0004000) /* Count Topdown slots/metrics events */
+PERF_ARCH(PEBS_STLAT, 0x0008000) /* st+stlat data address sampling */
+PERF_ARCH(AMD_BRS, 0x0010000) /* AMD Branch Sampling */
+PERF_ARCH(PEBS_LAT_HYBRID, 0x0020000) /* ld and st lat for hybrid */
+PERF_ARCH(NEEDS_BRANCH_STACK, 0x0040000) /* require branch stack setup */
+PERF_ARCH(BRANCH_COUNTERS, 0x0080000) /* logs the counters in the extra space of each branch */
+PERF_ARCH(ACR, 0x0100000) /* Auto counter reload */
diff --git a/arch/x86/events/zhaoxin/core.c b/arch/x86/events/zhaoxin/core.c
index 2fd9b0cf9a5e..49a5944fac63 100644
--- a/arch/x86/events/zhaoxin/core.c
+++ b/arch/x86/events/zhaoxin/core.c
@@ -397,8 +397,7 @@ again:
if (!x86_perf_event_set_period(event))
continue;
- if (perf_event_overflow(event, &data, regs))
- x86_pmu_stop(event, 0);
+ perf_event_overflow(event, &data, regs);
}
/*
diff --git a/arch/x86/include/asm/msr-index.h b/arch/x86/include/asm/msr-index.h
index e7d2f460fcc6..7c3f5ef1a169 100644
--- a/arch/x86/include/asm/msr-index.h
+++ b/arch/x86/include/asm/msr-index.h
@@ -602,7 +602,11 @@
/* V6 PMON MSR range */
#define MSR_IA32_PMC_V6_GP0_CTR 0x1900
#define MSR_IA32_PMC_V6_GP0_CFG_A 0x1901
+#define MSR_IA32_PMC_V6_GP0_CFG_B 0x1902
+#define MSR_IA32_PMC_V6_GP0_CFG_C 0x1903
#define MSR_IA32_PMC_V6_FX0_CTR 0x1980
+#define MSR_IA32_PMC_V6_FX0_CFG_B 0x1982
+#define MSR_IA32_PMC_V6_FX0_CFG_C 0x1983
#define MSR_IA32_PMC_V6_STEP 4
/* KeyID partitioning between MKTME and TDX */
diff --git a/arch/x86/include/asm/perf_event.h b/arch/x86/include/asm/perf_event.h
index 812dac3f79f0..70d1d94aca7e 100644
--- a/arch/x86/include/asm/perf_event.h
+++ b/arch/x86/include/asm/perf_event.h
@@ -195,6 +195,7 @@ union cpuid10_edx {
*/
#define ARCH_PERFMON_EXT_LEAF 0x00000023
#define ARCH_PERFMON_NUM_COUNTER_LEAF 0x1
+#define ARCH_PERFMON_ACR_LEAF 0x2
union cpuid35_eax {
struct {
diff --git a/arch/x86/kernel/uprobes.c b/arch/x86/kernel/uprobes.c
index 9194695662b2..6d383839e839 100644
--- a/arch/x86/kernel/uprobes.c
+++ b/arch/x86/kernel/uprobes.c
@@ -840,6 +840,11 @@ static int branch_setup_xol_ops(struct arch_uprobe *auprobe, struct insn *insn)
insn_byte_t p;
int i;
+ /* x86_nops[insn->length]; same as jmp with .offs = 0 */
+ if (insn->length <= ASM_NOP_MAX &&
+ !memcmp(insn->kaddr, x86_nops[insn->length], insn->length))
+ goto setup;
+
switch (opc1) {
case 0xeb: /* jmp 8 */
case 0xe9: /* jmp 32 */
diff --git a/arch/xtensa/kernel/perf_event.c b/arch/xtensa/kernel/perf_event.c
index 183618090d05..223f1d452310 100644
--- a/arch/xtensa/kernel/perf_event.c
+++ b/arch/xtensa/kernel/perf_event.c
@@ -388,8 +388,7 @@ irqreturn_t xtensa_pmu_irq_handler(int irq, void *dev_id)
struct pt_regs *regs = get_irq_regs();
perf_sample_data_init(&data, 0, last_period);
- if (perf_event_overflow(event, &data, regs))
- xtensa_pmu_stop(event, 0);
+ perf_event_overflow(event, &data, regs);
}
rc = IRQ_HANDLED;