diff options
author | 2025-05-26 15:40:23 -0700 | |
---|---|---|
committer | 2025-05-26 15:40:23 -0700 | |
commit | ddddf9d64f7361323da663637adb4a02466bfc99 (patch) | |
tree | 5fa3fc48db5d0ed98d0386754cf435fc6cd67cbb /arch | |
parent | Merge tag 'sched-core-2025-05-25' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip (diff) | |
parent | perf/headers: Clean up <linux/perf_event.h> a bit (diff) | |
download | wireguard-linux-ddddf9d64f7361323da663637adb4a02466bfc99.tar.xz wireguard-linux-ddddf9d64f7361323da663637adb4a02466bfc99.zip |
Merge tag 'perf-core-2025-05-25' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip
Pull perf events updates from Ingo Molnar:
"Core & generic-arch updates:
- Add support for dynamic constraints and propagate it to the Intel
driver (Kan Liang)
- Fix & enhance driver-specific throttling support (Kan Liang)
- Record sample last_period before updating on the x86 and PowerPC
platforms (Mark Barnett)
- Make perf_pmu_unregister() usable (Peter Zijlstra)
- Unify perf_event_free_task() / perf_event_exit_task_context()
(Peter Zijlstra)
- Simplify perf_event_release_kernel() and perf_event_free_task()
(Peter Zijlstra)
- Allocate non-contiguous AUX pages by default (Yabin Cui)
Uprobes updates:
- Add support to emulate NOP instructions (Jiri Olsa)
- selftests/bpf: Add 5-byte NOP uprobe trigger benchmark (Jiri Olsa)
x86 Intel PMU enhancements:
- Support Intel Auto Counter Reload [ACR] (Kan Liang)
- Add PMU support for Clearwater Forest (Dapeng Mi)
- Arch-PEBS preparatory changes: (Dapeng Mi)
- Parse CPUID archPerfmonExt leaves for non-hybrid CPUs
- Decouple BTS initialization from PEBS initialization
- Introduce pairs of PEBS static calls
x86 AMD PMU enhancements:
- Use hrtimer for handling overflows in the AMD uncore driver
(Sandipan Das)
- Prevent UMC counters from saturating (Sandipan Das)
Fixes and cleanups:
- Fix put_ctx() ordering (Frederic Weisbecker)
- Fix irq work dereferencing garbage (Frederic Weisbecker)
- Misc fixes and cleanups (Changbin Du, Frederic Weisbecker, Ian
Rogers, Ingo Molnar, Kan Liang, Peter Zijlstra, Qing Wang, Sandipan
Das, Thorsten Blum)"
* tag 'perf-core-2025-05-25' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip: (60 commits)
perf/headers: Clean up <linux/perf_event.h> a bit
perf/uapi: Clean up <uapi/linux/perf_event.h> a bit
perf/uapi: Fix PERF_RECORD_SAMPLE comments in <uapi/linux/perf_event.h>
mips/perf: Remove driver-specific throttle support
xtensa/perf: Remove driver-specific throttle support
sparc/perf: Remove driver-specific throttle support
loongarch/perf: Remove driver-specific throttle support
csky/perf: Remove driver-specific throttle support
arc/perf: Remove driver-specific throttle support
alpha/perf: Remove driver-specific throttle support
perf/apple_m1: Remove driver-specific throttle support
perf/arm: Remove driver-specific throttle support
s390/perf: Remove driver-specific throttle support
powerpc/perf: Remove driver-specific throttle support
perf/x86/zhaoxin: Remove driver-specific throttle support
perf/x86/amd: Remove driver-specific throttle support
perf/x86/intel: Remove driver-specific throttle support
perf: Only dump the throttle log for the leader
perf: Fix the throttle logic for a group
perf/core: Add the is_event_in_freq_mode() helper to simplify the code
...
Diffstat (limited to 'arch')
29 files changed, 656 insertions, 220 deletions
diff --git a/arch/alpha/kernel/perf_event.c b/arch/alpha/kernel/perf_event.c index 1f0eb4f25c0f..a3eaab094ece 100644 --- a/arch/alpha/kernel/perf_event.c +++ b/arch/alpha/kernel/perf_event.c @@ -852,14 +852,9 @@ static void alpha_perf_event_irq_handler(unsigned long la_ptr, alpha_perf_event_update(event, hwc, idx, alpha_pmu->pmc_max_period[idx]+1); perf_sample_data_init(&data, 0, hwc->last_period); - if (alpha_perf_event_set_period(event, hwc, idx)) { - if (perf_event_overflow(event, &data, regs)) { - /* Interrupts coming too quickly; "throttle" the - * counter, i.e., disable it for a little while. - */ - alpha_pmu_stop(event, 0); - } - } + if (alpha_perf_event_set_period(event, hwc, idx)) + perf_event_overflow(event, &data, regs); + wrperfmon(PERFMON_CMD_ENABLE, cpuc->idx_mask); return; diff --git a/arch/arc/kernel/perf_event.c b/arch/arc/kernel/perf_event.c index 6e5a651cd75c..ed6d4f0cd621 100644 --- a/arch/arc/kernel/perf_event.c +++ b/arch/arc/kernel/perf_event.c @@ -599,10 +599,8 @@ static irqreturn_t arc_pmu_intr(int irq, void *dev) arc_perf_event_update(event, &event->hw, event->hw.idx); perf_sample_data_init(&data, 0, hwc->last_period); - if (arc_pmu_event_set_period(event)) { - if (perf_event_overflow(event, &data, regs)) - arc_pmu_stop(event, 0); - } + if (arc_pmu_event_set_period(event)) + perf_event_overflow(event, &data, regs); active_ints &= ~BIT(idx); } while (active_ints); diff --git a/arch/csky/kernel/perf_event.c b/arch/csky/kernel/perf_event.c index e5f18420ce64..e0a36acd265b 100644 --- a/arch/csky/kernel/perf_event.c +++ b/arch/csky/kernel/perf_event.c @@ -1139,8 +1139,7 @@ static irqreturn_t csky_pmu_handle_irq(int irq_num, void *dev) perf_sample_data_init(&data, 0, hwc->last_period); csky_pmu_event_set_period(event); - if (perf_event_overflow(event, &data, regs)) - csky_pmu_stop_event(event); + perf_event_overflow(event, &data, regs); } csky_pmu_enable(&csky_pmu.pmu); diff --git a/arch/loongarch/kernel/perf_event.c b/arch/loongarch/kernel/perf_event.c index f86a4b838dd7..8ad098703488 100644 --- a/arch/loongarch/kernel/perf_event.c +++ b/arch/loongarch/kernel/perf_event.c @@ -479,8 +479,7 @@ static void handle_associated_event(struct cpu_hw_events *cpuc, int idx, if (!loongarch_pmu_event_set_period(event, hwc, idx)) return; - if (perf_event_overflow(event, data, regs)) - loongarch_pmu_disable_event(idx); + perf_event_overflow(event, data, regs); } static irqreturn_t pmu_handle_irq(int irq, void *dev) diff --git a/arch/mips/kernel/perf_event_mipsxx.c b/arch/mips/kernel/perf_event_mipsxx.c index c4d6b09136b1..196a070349b0 100644 --- a/arch/mips/kernel/perf_event_mipsxx.c +++ b/arch/mips/kernel/perf_event_mipsxx.c @@ -791,8 +791,7 @@ static void handle_associated_event(struct cpu_hw_events *cpuc, if (!mipspmu_event_set_period(event, hwc, idx)) return; - if (perf_event_overflow(event, data, regs)) - mipsxx_pmu_disable_event(idx); + perf_event_overflow(event, data, regs); } diff --git a/arch/powerpc/perf/core-book3s.c b/arch/powerpc/perf/core-book3s.c index b906d28f74fd..8b0081441f85 100644 --- a/arch/powerpc/perf/core-book3s.c +++ b/arch/powerpc/perf/core-book3s.c @@ -2239,6 +2239,7 @@ static void record_and_restart(struct perf_event *event, unsigned long val, struct pt_regs *regs) { u64 period = event->hw.sample_period; + const u64 last_period = event->hw.last_period; s64 prev, delta, left; int record = 0; @@ -2320,7 +2321,7 @@ static void record_and_restart(struct perf_event *event, unsigned long val, if (record) { struct perf_sample_data data; - perf_sample_data_init(&data, ~0ULL, event->hw.last_period); + perf_sample_data_init(&data, ~0ULL, last_period); if (event->attr.sample_type & PERF_SAMPLE_ADDR_TYPE) perf_get_data_addr(event, regs, &data.addr); @@ -2343,12 +2344,10 @@ static void record_and_restart(struct perf_event *event, unsigned long val, ppmu->get_mem_weight(&data.weight.full, event->attr.sample_type); data.sample_flags |= PERF_SAMPLE_WEIGHT_TYPE; } - if (perf_event_overflow(event, &data, regs)) - power_pmu_stop(event, 0); + perf_event_overflow(event, &data, regs); } else if (period) { /* Account for interrupt in case of invalid SIAR */ - if (perf_event_account_interrupt(event)) - power_pmu_stop(event, 0); + perf_event_account_interrupt(event); } } diff --git a/arch/powerpc/perf/core-fsl-emb.c b/arch/powerpc/perf/core-fsl-emb.c index 1a53ab08447c..7120ab20cbfe 100644 --- a/arch/powerpc/perf/core-fsl-emb.c +++ b/arch/powerpc/perf/core-fsl-emb.c @@ -590,6 +590,7 @@ static void record_and_restart(struct perf_event *event, unsigned long val, struct pt_regs *regs) { u64 period = event->hw.sample_period; + const u64 last_period = event->hw.last_period; s64 prev, delta, left; int record = 0; @@ -632,10 +633,9 @@ static void record_and_restart(struct perf_event *event, unsigned long val, if (record) { struct perf_sample_data data; - perf_sample_data_init(&data, 0, event->hw.last_period); + perf_sample_data_init(&data, 0, last_period); - if (perf_event_overflow(event, &data, regs)) - fsl_emb_pmu_stop(event, 0); + perf_event_overflow(event, &data, regs); } } diff --git a/arch/s390/kernel/perf_cpum_cf.c b/arch/s390/kernel/perf_cpum_cf.c index e657fad7e376..6a262e198e35 100644 --- a/arch/s390/kernel/perf_cpum_cf.c +++ b/arch/s390/kernel/perf_cpum_cf.c @@ -980,8 +980,6 @@ static int cfdiag_push_sample(struct perf_event *event, } overflow = perf_event_overflow(event, &data, ®s); - if (overflow) - event->pmu->stop(event, 0); perf_event_update_userpage(event); return overflow; diff --git a/arch/s390/kernel/perf_cpum_sf.c b/arch/s390/kernel/perf_cpum_sf.c index ad22799d8a7d..91469401f2c9 100644 --- a/arch/s390/kernel/perf_cpum_sf.c +++ b/arch/s390/kernel/perf_cpum_sf.c @@ -1072,10 +1072,7 @@ static int perf_push_sample(struct perf_event *event, overflow = 0; if (perf_event_exclude(event, ®s, sde_regs)) goto out; - if (perf_event_overflow(event, &data, ®s)) { - overflow = 1; - event->pmu->stop(event, 0); - } + overflow = perf_event_overflow(event, &data, ®s); perf_event_update_userpage(event); out: return overflow; diff --git a/arch/sparc/kernel/perf_event.c b/arch/sparc/kernel/perf_event.c index f02a283a8e8f..cae4d33002a5 100644 --- a/arch/sparc/kernel/perf_event.c +++ b/arch/sparc/kernel/perf_event.c @@ -1668,8 +1668,7 @@ static int __kprobes perf_event_nmi_handler(struct notifier_block *self, if (!sparc_perf_event_set_period(event, hwc, idx)) continue; - if (perf_event_overflow(event, &data, regs)) - sparc_pmu_stop(event, 0); + perf_event_overflow(event, &data, regs); } finish_clock = sched_clock(); diff --git a/arch/x86/events/amd/core.c b/arch/x86/events/amd/core.c index 30d6ceb4c8ad..5e64283b9bf2 100644 --- a/arch/x86/events/amd/core.c +++ b/arch/x86/events/amd/core.c @@ -1003,8 +1003,7 @@ static int amd_pmu_v2_handle_irq(struct pt_regs *regs) perf_sample_save_brstack(&data, event, &cpuc->lbr_stack, NULL); - if (perf_event_overflow(event, &data, regs)) - x86_pmu_stop(event, 0); + perf_event_overflow(event, &data, regs); } /* diff --git a/arch/x86/events/amd/ibs.c b/arch/x86/events/amd/ibs.c index 0252b7ea8bca..4bbbca02aeb1 100644 --- a/arch/x86/events/amd/ibs.c +++ b/arch/x86/events/amd/ibs.c @@ -1373,9 +1373,7 @@ fail: hwc->sample_period = perf_ibs->min_period; out: - if (throttle) { - perf_ibs_stop(event, 0); - } else { + if (!throttle) { if (perf_ibs == &perf_ibs_op) { if (ibs_caps & IBS_CAPS_OPCNTEXT) { new_config = period & IBS_OP_MAX_CNT_EXT_MASK; diff --git a/arch/x86/events/amd/uncore.c b/arch/x86/events/amd/uncore.c index 49c26ce2b115..d328de166481 100644 --- a/arch/x86/events/amd/uncore.c +++ b/arch/x86/events/amd/uncore.c @@ -21,6 +21,7 @@ #define NUM_COUNTERS_NB 4 #define NUM_COUNTERS_L2 4 #define NUM_COUNTERS_L3 6 +#define NUM_COUNTERS_MAX 64 #define RDPMC_BASE_NB 6 #define RDPMC_BASE_LLC 10 @@ -38,7 +39,10 @@ struct amd_uncore_ctx { int refcnt; int cpu; struct perf_event **events; - struct hlist_node node; + unsigned long active_mask[BITS_TO_LONGS(NUM_COUNTERS_MAX)]; + int nr_active; + struct hrtimer hrtimer; + u64 hrtimer_duration; }; struct amd_uncore_pmu { @@ -83,11 +87,51 @@ struct amd_uncore { static struct amd_uncore uncores[UNCORE_TYPE_MAX]; +/* Interval for hrtimer, defaults to 60000 milliseconds */ +static unsigned int update_interval = 60 * MSEC_PER_SEC; +module_param(update_interval, uint, 0444); + static struct amd_uncore_pmu *event_to_amd_uncore_pmu(struct perf_event *event) { return container_of(event->pmu, struct amd_uncore_pmu, pmu); } +static enum hrtimer_restart amd_uncore_hrtimer(struct hrtimer *hrtimer) +{ + struct amd_uncore_ctx *ctx; + struct perf_event *event; + int bit; + + ctx = container_of(hrtimer, struct amd_uncore_ctx, hrtimer); + + if (!ctx->nr_active || ctx->cpu != smp_processor_id()) + return HRTIMER_NORESTART; + + for_each_set_bit(bit, ctx->active_mask, NUM_COUNTERS_MAX) { + event = ctx->events[bit]; + event->pmu->read(event); + } + + hrtimer_forward_now(hrtimer, ns_to_ktime(ctx->hrtimer_duration)); + return HRTIMER_RESTART; +} + +static void amd_uncore_start_hrtimer(struct amd_uncore_ctx *ctx) +{ + hrtimer_start(&ctx->hrtimer, ns_to_ktime(ctx->hrtimer_duration), + HRTIMER_MODE_REL_PINNED_HARD); +} + +static void amd_uncore_cancel_hrtimer(struct amd_uncore_ctx *ctx) +{ + hrtimer_cancel(&ctx->hrtimer); +} + +static void amd_uncore_init_hrtimer(struct amd_uncore_ctx *ctx) +{ + hrtimer_setup(&ctx->hrtimer, amd_uncore_hrtimer, CLOCK_MONOTONIC, HRTIMER_MODE_REL_HARD); +} + static void amd_uncore_read(struct perf_event *event) { struct hw_perf_event *hwc = &event->hw; @@ -118,18 +162,26 @@ static void amd_uncore_read(struct perf_event *event) static void amd_uncore_start(struct perf_event *event, int flags) { + struct amd_uncore_pmu *pmu = event_to_amd_uncore_pmu(event); + struct amd_uncore_ctx *ctx = *per_cpu_ptr(pmu->ctx, event->cpu); struct hw_perf_event *hwc = &event->hw; + if (!ctx->nr_active++) + amd_uncore_start_hrtimer(ctx); + if (flags & PERF_EF_RELOAD) wrmsrl(hwc->event_base, (u64)local64_read(&hwc->prev_count)); hwc->state = 0; + __set_bit(hwc->idx, ctx->active_mask); wrmsrl(hwc->config_base, (hwc->config | ARCH_PERFMON_EVENTSEL_ENABLE)); perf_event_update_userpage(event); } static void amd_uncore_stop(struct perf_event *event, int flags) { + struct amd_uncore_pmu *pmu = event_to_amd_uncore_pmu(event); + struct amd_uncore_ctx *ctx = *per_cpu_ptr(pmu->ctx, event->cpu); struct hw_perf_event *hwc = &event->hw; wrmsrl(hwc->config_base, hwc->config); @@ -139,6 +191,11 @@ static void amd_uncore_stop(struct perf_event *event, int flags) event->pmu->read(event); hwc->state |= PERF_HES_UPTODATE; } + + if (!--ctx->nr_active) + amd_uncore_cancel_hrtimer(ctx); + + __clear_bit(hwc->idx, ctx->active_mask); } static int amd_uncore_add(struct perf_event *event, int flags) @@ -491,6 +548,9 @@ static int amd_uncore_ctx_init(struct amd_uncore *uncore, unsigned int cpu) goto fail; } + amd_uncore_init_hrtimer(curr); + curr->hrtimer_duration = (u64)update_interval * NSEC_PER_MSEC; + cpumask_set_cpu(cpu, &pmu->active_mask); } @@ -880,16 +940,55 @@ static int amd_uncore_umc_event_init(struct perf_event *event) static void amd_uncore_umc_start(struct perf_event *event, int flags) { + struct amd_uncore_pmu *pmu = event_to_amd_uncore_pmu(event); + struct amd_uncore_ctx *ctx = *per_cpu_ptr(pmu->ctx, event->cpu); struct hw_perf_event *hwc = &event->hw; + if (!ctx->nr_active++) + amd_uncore_start_hrtimer(ctx); + if (flags & PERF_EF_RELOAD) wrmsrl(hwc->event_base, (u64)local64_read(&hwc->prev_count)); hwc->state = 0; + __set_bit(hwc->idx, ctx->active_mask); wrmsrl(hwc->config_base, (hwc->config | AMD64_PERFMON_V2_ENABLE_UMC)); perf_event_update_userpage(event); } +static void amd_uncore_umc_read(struct perf_event *event) +{ + struct hw_perf_event *hwc = &event->hw; + u64 prev, new, shift; + s64 delta; + + shift = COUNTER_SHIFT + 1; + prev = local64_read(&hwc->prev_count); + + /* + * UMC counters do not have RDPMC assignments. Read counts directly + * from the corresponding PERF_CTR. + */ + rdmsrl(hwc->event_base, new); + + /* + * Unlike the other uncore counters, UMC counters saturate and set the + * Overflow bit (bit 48) on overflow. Since they do not roll over, + * proactively reset the corresponding PERF_CTR when bit 47 is set so + * that the counter never gets a chance to saturate. + */ + if (new & BIT_ULL(63 - COUNTER_SHIFT)) { + wrmsrl(hwc->event_base, 0); + local64_set(&hwc->prev_count, 0); + } else { + local64_set(&hwc->prev_count, new); + } + + delta = (new << shift) - (prev << shift); + delta >>= shift; + local64_add(delta, &event->count); +} + static void amd_uncore_umc_ctx_scan(struct amd_uncore *uncore, unsigned int cpu) { @@ -968,7 +1067,7 @@ int amd_uncore_umc_ctx_init(struct amd_uncore *uncore, unsigned int cpu) .del = amd_uncore_del, .start = amd_uncore_umc_start, .stop = amd_uncore_stop, - .read = amd_uncore_read, + .read = amd_uncore_umc_read, .capabilities = PERF_PMU_CAP_NO_EXCLUDE | PERF_PMU_CAP_NO_INTERRUPT, .module = THIS_MODULE, }; diff --git a/arch/x86/events/core.c b/arch/x86/events/core.c index 139ad80d1df3..4c49eef8052e 100644 --- a/arch/x86/events/core.c +++ b/arch/x86/events/core.c @@ -95,6 +95,11 @@ DEFINE_STATIC_CALL_NULL(x86_pmu_filter, *x86_pmu.filter); DEFINE_STATIC_CALL_NULL(x86_pmu_late_setup, *x86_pmu.late_setup); +DEFINE_STATIC_CALL_NULL(x86_pmu_pebs_enable, *x86_pmu.pebs_enable); +DEFINE_STATIC_CALL_NULL(x86_pmu_pebs_disable, *x86_pmu.pebs_disable); +DEFINE_STATIC_CALL_NULL(x86_pmu_pebs_enable_all, *x86_pmu.pebs_enable_all); +DEFINE_STATIC_CALL_NULL(x86_pmu_pebs_disable_all, *x86_pmu.pebs_disable_all); + /* * This one is magic, it will get called even when PMU init fails (because * there is no PMU), in which case it should simply return NULL. @@ -674,6 +679,7 @@ static int __x86_pmu_event_init(struct perf_event *event) event->hw.idx = -1; event->hw.last_cpu = -1; event->hw.last_tag = ~0ULL; + event->hw.dyn_constraint = ~0ULL; /* mark unused */ event->hw.extra_reg.idx = EXTRA_REG_NONE; @@ -756,15 +762,16 @@ void x86_pmu_enable_all(int added) int is_x86_event(struct perf_event *event) { - int i; - - if (!is_hybrid()) - return event->pmu == &pmu; - - for (i = 0; i < x86_pmu.num_hybrid_pmus; i++) { - if (event->pmu == &x86_pmu.hybrid_pmu[i].pmu) - return true; - } + /* + * For a non-hybrid platforms, the type of X86 pmu is + * always PERF_TYPE_RAW. + * For a hybrid platform, the PERF_PMU_CAP_EXTENDED_HW_TYPE + * is a unique capability for the X86 PMU. + * Use them to detect a X86 event. + */ + if (event->pmu->type == PERF_TYPE_RAW || + event->pmu->capabilities & PERF_PMU_CAP_EXTENDED_HW_TYPE) + return true; return false; } @@ -1683,6 +1690,7 @@ int x86_pmu_handle_irq(struct pt_regs *regs) struct cpu_hw_events *cpuc; struct perf_event *event; int idx, handled = 0; + u64 last_period; u64 val; cpuc = this_cpu_ptr(&cpu_hw_events); @@ -1702,6 +1710,7 @@ int x86_pmu_handle_irq(struct pt_regs *regs) continue; event = cpuc->events[idx]; + last_period = event->hw.last_period; val = static_call(x86_pmu_update)(event); if (val & (1ULL << (x86_pmu.cntval_bits - 1))) @@ -1715,12 +1724,11 @@ int x86_pmu_handle_irq(struct pt_regs *regs) if (!static_call(x86_pmu_set_period)(event)) continue; - perf_sample_data_init(&data, 0, event->hw.last_period); + perf_sample_data_init(&data, 0, last_period); perf_sample_save_brstack(&data, event, &cpuc->lbr_stack, NULL); - if (perf_event_overflow(event, &data, regs)) - x86_pmu_stop(event, 0); + perf_event_overflow(event, &data, regs); } if (handled) @@ -2046,6 +2054,11 @@ static void x86_pmu_static_call_update(void) static_call_update(x86_pmu_filter, x86_pmu.filter); static_call_update(x86_pmu_late_setup, x86_pmu.late_setup); + + static_call_update(x86_pmu_pebs_enable, x86_pmu.pebs_enable); + static_call_update(x86_pmu_pebs_disable, x86_pmu.pebs_disable); + static_call_update(x86_pmu_pebs_enable_all, x86_pmu.pebs_enable_all); + static_call_update(x86_pmu_pebs_disable_all, x86_pmu.pebs_disable_all); } static void _x86_pmu_read(struct perf_event *event) diff --git a/arch/x86/events/intel/bts.c b/arch/x86/events/intel/bts.c index a95e6c91c4d7..9560f693fac0 100644 --- a/arch/x86/events/intel/bts.c +++ b/arch/x86/events/intel/bts.c @@ -80,54 +80,54 @@ static void * bts_buffer_setup_aux(struct perf_event *event, void **pages, int nr_pages, bool overwrite) { - struct bts_buffer *buf; + struct bts_buffer *bb; struct page *page; int cpu = event->cpu; int node = (cpu == -1) ? cpu : cpu_to_node(cpu); unsigned long offset; size_t size = nr_pages << PAGE_SHIFT; - int pg, nbuf, pad; + int pg, nr_buf, pad; /* count all the high order buffers */ - for (pg = 0, nbuf = 0; pg < nr_pages;) { + for (pg = 0, nr_buf = 0; pg < nr_pages;) { page = virt_to_page(pages[pg]); pg += buf_nr_pages(page); - nbuf++; + nr_buf++; } /* * to avoid interrupts in overwrite mode, only allow one physical */ - if (overwrite && nbuf > 1) + if (overwrite && nr_buf > 1) return NULL; - buf = kzalloc_node(offsetof(struct bts_buffer, buf[nbuf]), GFP_KERNEL, node); - if (!buf) + bb = kzalloc_node(struct_size(bb, buf, nr_buf), GFP_KERNEL, node); + if (!bb) return NULL; - buf->nr_pages = nr_pages; - buf->nr_bufs = nbuf; - buf->snapshot = overwrite; - buf->data_pages = pages; - buf->real_size = size - size % BTS_RECORD_SIZE; + bb->nr_pages = nr_pages; + bb->nr_bufs = nr_buf; + bb->snapshot = overwrite; + bb->data_pages = pages; + bb->real_size = size - size % BTS_RECORD_SIZE; - for (pg = 0, nbuf = 0, offset = 0, pad = 0; nbuf < buf->nr_bufs; nbuf++) { + for (pg = 0, nr_buf = 0, offset = 0, pad = 0; nr_buf < bb->nr_bufs; nr_buf++) { unsigned int __nr_pages; page = virt_to_page(pages[pg]); __nr_pages = buf_nr_pages(page); - buf->buf[nbuf].page = page; - buf->buf[nbuf].offset = offset; - buf->buf[nbuf].displacement = (pad ? BTS_RECORD_SIZE - pad : 0); - buf->buf[nbuf].size = buf_size(page) - buf->buf[nbuf].displacement; - pad = buf->buf[nbuf].size % BTS_RECORD_SIZE; - buf->buf[nbuf].size -= pad; + bb->buf[nr_buf].page = page; + bb->buf[nr_buf].offset = offset; + bb->buf[nr_buf].displacement = (pad ? BTS_RECORD_SIZE - pad : 0); + bb->buf[nr_buf].size = buf_size(page) - bb->buf[nr_buf].displacement; + pad = bb->buf[nr_buf].size % BTS_RECORD_SIZE; + bb->buf[nr_buf].size -= pad; pg += __nr_pages; offset += __nr_pages << PAGE_SHIFT; } - return buf; + return bb; } static void bts_buffer_free_aux(void *data) @@ -135,25 +135,25 @@ static void bts_buffer_free_aux(void *data) kfree(data); } -static unsigned long bts_buffer_offset(struct bts_buffer *buf, unsigned int idx) +static unsigned long bts_buffer_offset(struct bts_buffer *bb, unsigned int idx) { - return buf->buf[idx].offset + buf->buf[idx].displacement; + return bb->buf[idx].offset + bb->buf[idx].displacement; } static void -bts_config_buffer(struct bts_buffer *buf) +bts_config_buffer(struct bts_buffer *bb) { int cpu = raw_smp_processor_id(); struct debug_store *ds = per_cpu(cpu_hw_events, cpu).ds; - struct bts_phys *phys = &buf->buf[buf->cur_buf]; + struct bts_phys *phys = &bb->buf[bb->cur_buf]; unsigned long index, thresh = 0, end = phys->size; struct page *page = phys->page; - index = local_read(&buf->head); + index = local_read(&bb->head); - if (!buf->snapshot) { - if (buf->end < phys->offset + buf_size(page)) - end = buf->end - phys->offset - phys->displacement; + if (!bb->snapshot) { + if (bb->end < phys->offset + buf_size(page)) + end = bb->end - phys->offset - phys->displacement; index -= phys->offset + phys->displacement; @@ -168,7 +168,7 @@ bts_config_buffer(struct bts_buffer *buf) ds->bts_buffer_base = (u64)(long)page_address(page) + phys->displacement; ds->bts_index = ds->bts_buffer_base + index; ds->bts_absolute_maximum = ds->bts_buffer_base + end; - ds->bts_interrupt_threshold = !buf->snapshot + ds->bts_interrupt_threshold = !bb->snapshot ? ds->bts_buffer_base + thresh : ds->bts_absolute_maximum + BTS_RECORD_SIZE; } @@ -184,16 +184,16 @@ static void bts_update(struct bts_ctx *bts) { int cpu = raw_smp_processor_id(); struct debug_store *ds = per_cpu(cpu_hw_events, cpu).ds; - struct bts_buffer *buf = perf_get_aux(&bts->handle); + struct bts_buffer *bb = perf_get_aux(&bts->handle); unsigned long index = ds->bts_index - ds->bts_buffer_base, old, head; - if (!buf) + if (!bb) return; - head = index + bts_buffer_offset(buf, buf->cur_buf); - old = local_xchg(&buf->head, head); + head = index + bts_buffer_offset(bb, bb->cur_buf); + old = local_xchg(&bb->head, head); - if (!buf->snapshot) { + if (!bb->snapshot) { if (old == head) return; @@ -205,9 +205,9 @@ static void bts_update(struct bts_ctx *bts) * old and head are always in the same physical buffer, so we * can subtract them to get the data size. */ - local_add(head - old, &buf->data_size); + local_add(head - old, &bb->data_size); } else { - local_set(&buf->data_size, head); + local_set(&bb->data_size, head); } /* @@ -218,7 +218,7 @@ static void bts_update(struct bts_ctx *bts) } static int -bts_buffer_reset(struct bts_buffer *buf, struct perf_output_handle *handle); +bts_buffer_reset(struct bts_buffer *bb, struct perf_output_handle *handle); /* * Ordering PMU callbacks wrt themselves and the PMI is done by means @@ -232,17 +232,17 @@ bts_buffer_reset(struct bts_buffer *buf, struct perf_output_handle *handle); static void __bts_event_start(struct perf_event *event) { struct bts_ctx *bts = this_cpu_ptr(bts_ctx); - struct bts_buffer *buf = perf_get_aux(&bts->handle); + struct bts_buffer *bb = perf_get_aux(&bts->handle); u64 config = 0; - if (!buf->snapshot) + if (!bb->snapshot) config |= ARCH_PERFMON_EVENTSEL_INT; if (!event->attr.exclude_kernel) config |= ARCH_PERFMON_EVENTSEL_OS; if (!event->attr.exclude_user) config |= ARCH_PERFMON_EVENTSEL_USR; - bts_config_buffer(buf); + bts_config_buffer(bb); /* * local barrier to make sure that ds configuration made it @@ -261,13 +261,13 @@ static void bts_event_start(struct perf_event *event, int flags) { struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events); struct bts_ctx *bts = this_cpu_ptr(bts_ctx); - struct bts_buffer *buf; + struct bts_buffer *bb; - buf = perf_aux_output_begin(&bts->handle, event); - if (!buf) + bb = perf_aux_output_begin(&bts->handle, event); + if (!bb) goto fail_stop; - if (bts_buffer_reset(buf, &bts->handle)) + if (bts_buffer_reset(bb, &bts->handle)) goto fail_end_stop; bts->ds_back.bts_buffer_base = cpuc->ds->bts_buffer_base; @@ -306,27 +306,27 @@ static void bts_event_stop(struct perf_event *event, int flags) { struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events); struct bts_ctx *bts = this_cpu_ptr(bts_ctx); - struct bts_buffer *buf = NULL; + struct bts_buffer *bb = NULL; int state = READ_ONCE(bts->state); if (state == BTS_STATE_ACTIVE) __bts_event_stop(event, BTS_STATE_STOPPED); if (state != BTS_STATE_STOPPED) - buf = perf_get_aux(&bts->handle); + bb = perf_get_aux(&bts->handle); event->hw.state |= PERF_HES_STOPPED; if (flags & PERF_EF_UPDATE) { bts_update(bts); - if (buf) { - if (buf->snapshot) + if (bb) { + if (bb->snapshot) bts->handle.head = - local_xchg(&buf->data_size, - buf->nr_pages << PAGE_SHIFT); + local_xchg(&bb->data_size, + bb->nr_pages << PAGE_SHIFT); perf_aux_output_end(&bts->handle, - local_xchg(&buf->data_size, 0)); + local_xchg(&bb->data_size, 0)); } cpuc->ds->bts_index = bts->ds_back.bts_buffer_base; @@ -382,19 +382,19 @@ void intel_bts_disable_local(void) } static int -bts_buffer_reset(struct bts_buffer *buf, struct perf_output_handle *handle) +bts_buffer_reset(struct bts_buffer *bb, struct perf_output_handle *handle) { unsigned long head, space, next_space, pad, gap, skip, wakeup; unsigned int next_buf; struct bts_phys *phys, *next_phys; int ret; - if (buf->snapshot) + if (bb->snapshot) return 0; - head = handle->head & ((buf->nr_pages << PAGE_SHIFT) - 1); + head = handle->head & ((bb->nr_pages << PAGE_SHIFT) - 1); - phys = &buf->buf[buf->cur_buf]; + phys = &bb->buf[bb->cur_buf]; space = phys->offset + phys->displacement + phys->size - head; pad = space; if (space > handle->size) { @@ -403,10 +403,10 @@ bts_buffer_reset(struct bts_buffer *buf, struct perf_output_handle *handle) } if (space <= BTS_SAFETY_MARGIN) { /* See if next phys buffer has more space */ - next_buf = buf->cur_buf + 1; - if (next_buf >= buf->nr_bufs) + next_buf = bb->cur_buf + 1; + if (next_buf >= bb->nr_bufs) next_buf = 0; - next_phys = &buf->buf[next_buf]; + next_phys = &bb->buf[next_buf]; gap = buf_size(phys->page) - phys->displacement - phys->size + next_phys->displacement; skip = pad + gap; @@ -431,8 +431,8 @@ bts_buffer_reset(struct bts_buffer *buf, struct perf_output_handle *handle) * anymore, so we must not be racing with * bts_update(). */ - buf->cur_buf = next_buf; - local_set(&buf->head, head); + bb->cur_buf = next_buf; + local_set(&bb->head, head); } } } @@ -445,7 +445,7 @@ bts_buffer_reset(struct bts_buffer *buf, struct perf_output_handle *handle) space -= space % BTS_RECORD_SIZE; } - buf->end = head + space; + bb->end = head + space; /* * If we have no space, the lost notification would have been sent when @@ -462,7 +462,7 @@ int intel_bts_interrupt(void) struct debug_store *ds = this_cpu_ptr(&cpu_hw_events)->ds; struct bts_ctx *bts; struct perf_event *event; - struct bts_buffer *buf; + struct bts_buffer *bb; s64 old_head; int err = -ENOSPC, handled = 0; @@ -485,8 +485,8 @@ int intel_bts_interrupt(void) if (READ_ONCE(bts->state) == BTS_STATE_STOPPED) return handled; - buf = perf_get_aux(&bts->handle); - if (!buf) + bb = perf_get_aux(&bts->handle); + if (!bb) return handled; /* @@ -494,26 +494,26 @@ int intel_bts_interrupt(void) * there's no other way of telling, because the pointer will * keep moving */ - if (buf->snapshot) + if (bb->snapshot) return 0; - old_head = local_read(&buf->head); + old_head = local_read(&bb->head); bts_update(bts); /* no new data */ - if (old_head == local_read(&buf->head)) + if (old_head == local_read(&bb->head)) return handled; - perf_aux_output_end(&bts->handle, local_xchg(&buf->data_size, 0)); + perf_aux_output_end(&bts->handle, local_xchg(&bb->data_size, 0)); - buf = perf_aux_output_begin(&bts->handle, event); - if (buf) - err = bts_buffer_reset(buf, &bts->handle); + bb = perf_aux_output_begin(&bts->handle, event); + if (bb) + err = bts_buffer_reset(bb, &bts->handle); if (err) { WRITE_ONCE(bts->state, BTS_STATE_STOPPED); - if (buf) { + if (bb) { /* * BTS_STATE_STOPPED should be visible before * cleared handle::event @@ -599,7 +599,11 @@ static void bts_event_read(struct perf_event *event) static __init int bts_init(void) { - if (!boot_cpu_has(X86_FEATURE_DTES64) || !x86_pmu.bts) + if (!boot_cpu_has(X86_FEATURE_DTES64)) + return -ENODEV; + + x86_pmu.bts = boot_cpu_has(X86_FEATURE_BTS); + if (!x86_pmu.bts) return -ENODEV; if (boot_cpu_has(X86_FEATURE_PTI)) { diff --git a/arch/x86/events/intel/core.c b/arch/x86/events/intel/core.c index c5f385413392..3204591249e8 100644 --- a/arch/x86/events/intel/core.c +++ b/arch/x86/events/intel/core.c @@ -2224,6 +2224,18 @@ static struct extra_reg intel_cmt_extra_regs[] __read_mostly = { EVENT_EXTRA_END }; +EVENT_ATTR_STR(topdown-fe-bound, td_fe_bound_skt, "event=0x9c,umask=0x01"); +EVENT_ATTR_STR(topdown-retiring, td_retiring_skt, "event=0xc2,umask=0x02"); +EVENT_ATTR_STR(topdown-be-bound, td_be_bound_skt, "event=0xa4,umask=0x02"); + +static struct attribute *skt_events_attrs[] = { + EVENT_PTR(td_fe_bound_skt), + EVENT_PTR(td_retiring_skt), + EVENT_PTR(td_bad_spec_cmt), + EVENT_PTR(td_be_bound_skt), + NULL, +}; + #define KNL_OT_L2_HITE BIT_ULL(19) /* Other Tile L2 Hit */ #define KNL_OT_L2_HITF BIT_ULL(20) /* Other Tile L2 Hit */ #define KNL_MCDRAM_LOCAL BIT_ULL(21) @@ -2294,7 +2306,7 @@ static __always_inline void __intel_pmu_disable_all(bool bts) static __always_inline void intel_pmu_disable_all(void) { __intel_pmu_disable_all(true); - intel_pmu_pebs_disable_all(); + static_call_cond(x86_pmu_pebs_disable_all)(); intel_pmu_lbr_disable_all(); } @@ -2326,7 +2338,7 @@ static void __intel_pmu_enable_all(int added, bool pmi) static void intel_pmu_enable_all(int added) { - intel_pmu_pebs_enable_all(); + static_call_cond(x86_pmu_pebs_enable_all)(); __intel_pmu_enable_all(added, false); } @@ -2583,7 +2595,7 @@ static void intel_pmu_disable_event(struct perf_event *event) * so we don't trigger the event without PEBS bit set. */ if (unlikely(event->attr.precise_ip)) - intel_pmu_pebs_disable(event); + static_call(x86_pmu_pebs_disable)(event); } static void intel_pmu_assign_event(struct perf_event *event, int idx) @@ -2603,6 +2615,9 @@ static void intel_pmu_del_event(struct perf_event *event) intel_pmu_lbr_del(event); if (event->attr.precise_ip) intel_pmu_pebs_del(event); + if (is_pebs_counter_event_group(event) || + is_acr_event_group(event)) + this_cpu_ptr(&cpu_hw_events)->n_late_setup--; } static int icl_set_topdown_event_period(struct perf_event *event) @@ -2880,6 +2895,52 @@ static void intel_pmu_enable_fixed(struct perf_event *event) cpuc->fixed_ctrl_val |= bits; } +static void intel_pmu_config_acr(int idx, u64 mask, u32 reload) +{ + struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events); + int msr_b, msr_c; + + if (!mask && !cpuc->acr_cfg_b[idx]) + return; + + if (idx < INTEL_PMC_IDX_FIXED) { + msr_b = MSR_IA32_PMC_V6_GP0_CFG_B; + msr_c = MSR_IA32_PMC_V6_GP0_CFG_C; + } else { + msr_b = MSR_IA32_PMC_V6_FX0_CFG_B; + msr_c = MSR_IA32_PMC_V6_FX0_CFG_C; + idx -= INTEL_PMC_IDX_FIXED; + } + + if (cpuc->acr_cfg_b[idx] != mask) { + wrmsrl(msr_b + x86_pmu.addr_offset(idx, false), mask); + cpuc->acr_cfg_b[idx] = mask; + } + /* Only need to update the reload value when there is a valid config value. */ + if (mask && cpuc->acr_cfg_c[idx] != reload) { + wrmsrl(msr_c + x86_pmu.addr_offset(idx, false), reload); + cpuc->acr_cfg_c[idx] = reload; + } +} + +static void intel_pmu_enable_acr(struct perf_event *event) +{ + struct hw_perf_event *hwc = &event->hw; + + if (!is_acr_event_group(event) || !event->attr.config2) { + /* + * The disable doesn't clear the ACR CFG register. + * Check and clear the ACR CFG register. + */ + intel_pmu_config_acr(hwc->idx, 0, 0); + return; + } + + intel_pmu_config_acr(hwc->idx, hwc->config1, -hwc->sample_period); +} + +DEFINE_STATIC_CALL_NULL(intel_pmu_enable_acr_event, intel_pmu_enable_acr); + static void intel_pmu_enable_event(struct perf_event *event) { u64 enable_mask = ARCH_PERFMON_EVENTSEL_ENABLE; @@ -2887,16 +2948,19 @@ static void intel_pmu_enable_event(struct perf_event *event) int idx = hwc->idx; if (unlikely(event->attr.precise_ip)) - intel_pmu_pebs_enable(event); + static_call(x86_pmu_pebs_enable)(event); switch (idx) { case 0 ... INTEL_PMC_IDX_FIXED - 1: if (branch_sample_counters(event)) enable_mask |= ARCH_PERFMON_EVENTSEL_BR_CNTR; intel_set_masks(event, idx); + static_call_cond(intel_pmu_enable_acr_event)(event); __x86_pmu_enable_event(hwc, enable_mask); break; case INTEL_PMC_IDX_FIXED ... INTEL_PMC_IDX_FIXED_BTS - 1: + static_call_cond(intel_pmu_enable_acr_event)(event); + fallthrough; case INTEL_PMC_IDX_METRIC_BASE ... INTEL_PMC_IDX_METRIC_END: intel_pmu_enable_fixed(event); break; @@ -2914,12 +2978,51 @@ static void intel_pmu_enable_event(struct perf_event *event) } } +static void intel_pmu_acr_late_setup(struct cpu_hw_events *cpuc) +{ + struct perf_event *event, *leader; + int i, j, idx; + + for (i = 0; i < cpuc->n_events; i++) { + leader = cpuc->event_list[i]; + if (!is_acr_event_group(leader)) + continue; + + /* The ACR events must be contiguous. */ + for (j = i; j < cpuc->n_events; j++) { + event = cpuc->event_list[j]; + if (event->group_leader != leader->group_leader) + break; + for_each_set_bit(idx, (unsigned long *)&event->attr.config2, X86_PMC_IDX_MAX) { + if (WARN_ON_ONCE(i + idx > cpuc->n_events)) + return; + __set_bit(cpuc->assign[i + idx], (unsigned long *)&event->hw.config1); + } + } + i = j - 1; + } +} + +void intel_pmu_late_setup(void) +{ + struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events); + + if (!cpuc->n_late_setup) + return; + + intel_pmu_pebs_late_setup(cpuc); + intel_pmu_acr_late_setup(cpuc); +} + static void intel_pmu_add_event(struct perf_event *event) { if (event->attr.precise_ip) intel_pmu_pebs_add(event); if (intel_pmu_needs_branch_stack(event)) intel_pmu_lbr_add(event); + if (is_pebs_counter_event_group(event) || + is_acr_event_group(event)) + this_cpu_ptr(&cpu_hw_events)->n_late_setup++; } /* @@ -3035,8 +3138,7 @@ static void x86_pmu_handle_guest_pebs(struct pt_regs *regs, continue; perf_sample_data_init(data, 0, event->hw.last_period); - if (perf_event_overflow(event, data, regs)) - x86_pmu_stop(event, 0); + perf_event_overflow(event, data, regs); /* Inject one fake event is enough. */ break; @@ -3141,6 +3243,7 @@ static int handle_pmi_common(struct pt_regs *regs, u64 status) for_each_set_bit(bit, (unsigned long *)&status, X86_PMC_IDX_MAX) { struct perf_event *event = cpuc->events[bit]; + u64 last_period; handled++; @@ -3168,16 +3271,17 @@ static int handle_pmi_common(struct pt_regs *regs, u64 status) if (is_pebs_counter_event_group(event)) x86_pmu.drain_pebs(regs, &data); + last_period = event->hw.last_period; + if (!intel_pmu_save_and_restart(event)) continue; - perf_sample_data_init(&data, 0, event->hw.last_period); + perf_sample_data_init(&data, 0, last_period); if (has_branch_stack(event)) intel_pmu_lbr_save_brstack(&data, cpuc, event); - if (perf_event_overflow(event, &data, regs)) - x86_pmu_stop(event, 0); + perf_event_overflow(event, &data, regs); } return handled; @@ -3739,10 +3843,9 @@ intel_get_event_constraints(struct cpu_hw_events *cpuc, int idx, if (cpuc->excl_cntrs) return intel_get_excl_constraints(cpuc, event, idx, c2); - /* Not all counters support the branch counter feature. */ - if (branch_sample_counters(event)) { + if (event->hw.dyn_constraint != ~0ULL) { c2 = dyn_constraint(cpuc, c2, idx); - c2->idxmsk64 &= x86_pmu.lbr_counters; + c2->idxmsk64 &= event->hw.dyn_constraint; c2->weight = hweight64(c2->idxmsk64); } @@ -4083,6 +4186,39 @@ end: return start; } +static inline bool intel_pmu_has_acr(struct pmu *pmu) +{ + return !!hybrid(pmu, acr_cause_mask64); +} + +static bool intel_pmu_is_acr_group(struct perf_event *event) +{ + /* The group leader has the ACR flag set */ + if (is_acr_event_group(event)) + return true; + + /* The acr_mask is set */ + if (event->attr.config2) + return true; + + return false; +} + +static inline void intel_pmu_set_acr_cntr_constr(struct perf_event *event, + u64 *cause_mask, int *num) +{ + event->hw.dyn_constraint &= hybrid(event->pmu, acr_cntr_mask64); + *cause_mask |= event->attr.config2; + *num += 1; +} + +static inline void intel_pmu_set_acr_caused_constr(struct perf_event *event, + int idx, u64 cause_mask) +{ + if (test_bit(idx, (unsigned long *)&cause_mask)) + event->hw.dyn_constraint &= hybrid(event->pmu, acr_cause_mask64); +} + static int intel_pmu_hw_config(struct perf_event *event) { int ret = x86_pmu_hw_config(event); @@ -4144,15 +4280,19 @@ static int intel_pmu_hw_config(struct perf_event *event) leader = event->group_leader; if (branch_sample_call_stack(leader)) return -EINVAL; - if (branch_sample_counters(leader)) + if (branch_sample_counters(leader)) { num++; + leader->hw.dyn_constraint &= x86_pmu.lbr_counters; + } leader->hw.flags |= PERF_X86_EVENT_BRANCH_COUNTERS; for_each_sibling_event(sibling, leader) { if (branch_sample_call_stack(sibling)) return -EINVAL; - if (branch_sample_counters(sibling)) + if (branch_sample_counters(sibling)) { num++; + sibling->hw.dyn_constraint &= x86_pmu.lbr_counters; + } } if (num > fls(x86_pmu.lbr_counters)) @@ -4207,6 +4347,94 @@ static int intel_pmu_hw_config(struct perf_event *event) event->attr.precise_ip) event->group_leader->hw.flags |= PERF_X86_EVENT_PEBS_CNTR; + if (intel_pmu_has_acr(event->pmu) && intel_pmu_is_acr_group(event)) { + struct perf_event *sibling, *leader = event->group_leader; + struct pmu *pmu = event->pmu; + bool has_sw_event = false; + int num = 0, idx = 0; + u64 cause_mask = 0; + + /* Not support perf metrics */ + if (is_metric_event(event)) + return -EINVAL; + + /* Not support freq mode */ + if (event->attr.freq) + return -EINVAL; + + /* PDist is not supported */ + if (event->attr.config2 && event->attr.precise_ip > 2) + return -EINVAL; + + /* The reload value cannot exceeds the max period */ + if (event->attr.sample_period > x86_pmu.max_period) + return -EINVAL; + /* + * The counter-constraints of each event cannot be finalized + * unless the whole group is scanned. However, it's hard + * to know whether the event is the last one of the group. + * Recalculate the counter-constraints for each event when + * adding a new event. + * + * The group is traversed twice, which may be optimized later. + * In the first round, + * - Find all events which do reload when other events + * overflow and set the corresponding counter-constraints + * - Add all events, which can cause other events reload, + * in the cause_mask + * - Error out if the number of events exceeds the HW limit + * - The ACR events must be contiguous. + * Error out if there are non-X86 events between ACR events. + * This is not a HW limit, but a SW limit. + * With the assumption, the intel_pmu_acr_late_setup() can + * easily convert the event idx to counter idx without + * traversing the whole event list. + */ + if (!is_x86_event(leader)) + return -EINVAL; + + if (leader->attr.config2) + intel_pmu_set_acr_cntr_constr(leader, &cause_mask, &num); + + if (leader->nr_siblings) { + for_each_sibling_event(sibling, leader) { + if (!is_x86_event(sibling)) { + has_sw_event = true; + continue; + } + if (!sibling->attr.config2) + continue; + if (has_sw_event) + return -EINVAL; + intel_pmu_set_acr_cntr_constr(sibling, &cause_mask, &num); + } + } + if (leader != event && event->attr.config2) { + if (has_sw_event) + return -EINVAL; + intel_pmu_set_acr_cntr_constr(event, &cause_mask, &num); + } + + if (hweight64(cause_mask) > hweight64(hybrid(pmu, acr_cause_mask64)) || + num > hweight64(hybrid(event->pmu, acr_cntr_mask64))) + return -EINVAL; + /* + * In the second round, apply the counter-constraints for + * the events which can cause other events reload. + */ + intel_pmu_set_acr_caused_constr(leader, idx++, cause_mask); + + if (leader->nr_siblings) { + for_each_sibling_event(sibling, leader) + intel_pmu_set_acr_caused_constr(sibling, idx++, cause_mask); + } + + if (leader != event) + intel_pmu_set_acr_caused_constr(event, idx, cause_mask); + + leader->hw.flags |= PERF_X86_EVENT_ACR; + } + if ((event->attr.type == PERF_TYPE_HARDWARE) || (event->attr.type == PERF_TYPE_HW_CACHE)) return 0; @@ -4354,7 +4582,7 @@ static struct perf_guest_switch_msr *intel_guest_get_msrs(int *nr, void *data) .guest = intel_ctrl & ~cpuc->intel_ctrl_host_mask & ~pebs_mask, }; - if (!x86_pmu.pebs) + if (!x86_pmu.ds_pebs) return arr; /* @@ -4952,7 +5180,7 @@ int intel_cpuc_prepare(struct cpu_hw_events *cpuc, int cpu) goto err; } - if (x86_pmu.flags & (PMU_FL_EXCL_CNTRS | PMU_FL_TFA | PMU_FL_BR_CNTR)) { + if (x86_pmu.flags & (PMU_FL_EXCL_CNTRS | PMU_FL_TFA | PMU_FL_DYN_CONSTRAINT)) { size_t sz = X86_PMC_IDX_MAX * sizeof(struct event_constraint); cpuc->constraint_list = kzalloc_node(sz, GFP_KERNEL, cpu_to_node(cpu)); @@ -5041,7 +5269,7 @@ static inline bool intel_pmu_broken_perf_cap(void) return false; } -static void update_pmu_cap(struct x86_hybrid_pmu *pmu) +static void update_pmu_cap(struct pmu *pmu) { unsigned int cntr, fixed_cntr, ecx, edx; union cpuid35_eax eax; @@ -5050,20 +5278,30 @@ static void update_pmu_cap(struct x86_hybrid_pmu *pmu) cpuid(ARCH_PERFMON_EXT_LEAF, &eax.full, &ebx.full, &ecx, &edx); if (ebx.split.umask2) - pmu->config_mask |= ARCH_PERFMON_EVENTSEL_UMASK2; + hybrid(pmu, config_mask) |= ARCH_PERFMON_EVENTSEL_UMASK2; if (ebx.split.eq) - pmu->config_mask |= ARCH_PERFMON_EVENTSEL_EQ; + hybrid(pmu, config_mask) |= ARCH_PERFMON_EVENTSEL_EQ; if (eax.split.cntr_subleaf) { cpuid_count(ARCH_PERFMON_EXT_LEAF, ARCH_PERFMON_NUM_COUNTER_LEAF, &cntr, &fixed_cntr, &ecx, &edx); - pmu->cntr_mask64 = cntr; - pmu->fixed_cntr_mask64 = fixed_cntr; + hybrid(pmu, cntr_mask64) = cntr; + hybrid(pmu, fixed_cntr_mask64) = fixed_cntr; + } + + if (eax.split.acr_subleaf) { + cpuid_count(ARCH_PERFMON_EXT_LEAF, ARCH_PERFMON_ACR_LEAF, + &cntr, &fixed_cntr, &ecx, &edx); + /* The mask of the counters which can be reloaded */ + hybrid(pmu, acr_cntr_mask64) = cntr | ((u64)fixed_cntr << INTEL_PMC_IDX_FIXED); + + /* The mask of the counters which can cause a reload of reloadable counters */ + hybrid(pmu, acr_cause_mask64) = ecx | ((u64)edx << INTEL_PMC_IDX_FIXED); } if (!intel_pmu_broken_perf_cap()) { /* Perf Metric (Bit 15) and PEBS via PT (Bit 16) are hybrid enumeration */ - rdmsrl(MSR_IA32_PERF_CAPABILITIES, pmu->intel_cap.capabilities); + rdmsrl(MSR_IA32_PERF_CAPABILITIES, hybrid(pmu, intel_cap).capabilities); } } @@ -5150,7 +5388,7 @@ static bool init_hybrid_pmu(int cpu) goto end; if (this_cpu_has(X86_FEATURE_ARCH_PERFMON_EXT)) - update_pmu_cap(pmu); + update_pmu_cap(&pmu->pmu); intel_pmu_check_hybrid_pmus(pmu); @@ -5524,7 +5762,7 @@ static __init void intel_clovertown_quirk(void) * these chips. */ pr_warn("PEBS disabled due to CPU errata\n"); - x86_pmu.pebs = 0; + x86_pmu.ds_pebs = 0; x86_pmu.pebs_constraints = NULL; } @@ -6012,7 +6250,7 @@ tsx_is_visible(struct kobject *kobj, struct attribute *attr, int i) static umode_t pebs_is_visible(struct kobject *kobj, struct attribute *attr, int i) { - return x86_pmu.pebs ? attr->mode : 0; + return x86_pmu.ds_pebs ? attr->mode : 0; } static umode_t @@ -6043,6 +6281,21 @@ td_is_visible(struct kobject *kobj, struct attribute *attr, int i) return attr->mode; } +PMU_FORMAT_ATTR(acr_mask, "config2:0-63"); + +static struct attribute *format_acr_attrs[] = { + &format_attr_acr_mask.attr, + NULL +}; + +static umode_t +acr_is_visible(struct kobject *kobj, struct attribute *attr, int i) +{ + struct device *dev = kobj_to_dev(kobj); + + return intel_pmu_has_acr(dev_get_drvdata(dev)) ? attr->mode : 0; +} + static struct attribute_group group_events_td = { .name = "events", .is_visible = td_is_visible, @@ -6085,6 +6338,12 @@ static struct attribute_group group_format_evtsel_ext = { .is_visible = evtsel_ext_is_visible, }; +static struct attribute_group group_format_acr = { + .name = "format", + .attrs = format_acr_attrs, + .is_visible = acr_is_visible, +}; + static struct attribute_group group_default = { .attrs = intel_pmu_attrs, .is_visible = default_is_visible, @@ -6099,6 +6358,7 @@ static const struct attribute_group *attr_update[] = { &group_format_extra, &group_format_extra_skl, &group_format_evtsel_ext, + &group_format_acr, &group_default, NULL, }; @@ -6383,6 +6643,7 @@ static const struct attribute_group *hybrid_attr_update[] = { &group_caps_lbr, &hybrid_group_format_extra, &group_format_evtsel_ext, + &group_format_acr, &group_default, &hybrid_group_cpus, NULL, @@ -6575,6 +6836,7 @@ static __always_inline void intel_pmu_init_skt(struct pmu *pmu) intel_pmu_init_grt(pmu); hybrid(pmu, event_constraints) = intel_skt_event_constraints; hybrid(pmu, extra_regs) = intel_cmt_extra_regs; + static_call_update(intel_pmu_enable_acr_event, intel_pmu_enable_acr); } __init int intel_pmu_init(void) @@ -6635,6 +6897,7 @@ __init int intel_pmu_init(void) x86_pmu.pebs_events_mask = intel_pmu_pebs_mask(x86_pmu.cntr_mask64); x86_pmu.pebs_capable = PEBS_COUNTER_MASK; + x86_pmu.config_mask = X86_RAW_EVENT_MASK; /* * Quirk: v2 perfmon does not report fixed-purpose events, so @@ -6663,7 +6926,7 @@ __init int intel_pmu_init(void) if (boot_cpu_has(X86_FEATURE_ARCH_LBR)) intel_pmu_arch_lbr_init(); - intel_ds_init(); + intel_pebs_init(); x86_add_quirk(intel_arch_events_quirk); /* Install first, so it runs last */ @@ -6674,6 +6937,12 @@ __init int intel_pmu_init(void) } /* + * Many features on and after V6 require dynamic constraint, + * e.g., Arch PEBS, ACR. + */ + if (version >= 6) + x86_pmu.flags |= PMU_FL_DYN_CONSTRAINT; + /* * Install the hw-cache-events table: */ switch (boot_cpu_data.x86_vfm) { @@ -6884,6 +7153,18 @@ __init int intel_pmu_init(void) name = "crestmont"; break; + case INTEL_ATOM_DARKMONT_X: + intel_pmu_init_skt(NULL); + intel_pmu_pebs_data_source_cmt(); + x86_pmu.pebs_latency_data = cmt_latency_data; + x86_pmu.get_event_constraints = cmt_get_event_constraints; + td_attr = skt_events_attrs; + mem_attr = grt_mem_attrs; + extra_attr = cmt_format_attr; + pr_cont("Darkmont events, "); + name = "darkmont"; + break; + case INTEL_WESTMERE: case INTEL_WESTMERE_EP: case INTEL_WESTMERE_EX: @@ -7433,6 +7714,18 @@ __init int intel_pmu_init(void) x86_pmu.attr_update = hybrid_attr_update; } + /* + * The archPerfmonExt (0x23) includes an enhanced enumeration of + * PMU architectural features with a per-core view. For non-hybrid, + * each core has the same PMU capabilities. It's good enough to + * update the x86_pmu from the booting CPU. For hybrid, the x86_pmu + * is used to keep the common capabilities. Still keep the values + * from the leaf 0xa. The core specific update will be done later + * when a new type is online. + */ + if (!is_hybrid() && boot_cpu_has(X86_FEATURE_ARCH_PERFMON_EXT)) + update_pmu_cap(NULL); + intel_pmu_check_counters_mask(&x86_pmu.cntr_mask64, &x86_pmu.fixed_cntr_mask64, &x86_pmu.intel_ctrl); diff --git a/arch/x86/events/intel/ds.c b/arch/x86/events/intel/ds.c index 8d86e91bd5e5..e2ea4fdedea9 100644 --- a/arch/x86/events/intel/ds.c +++ b/arch/x86/events/intel/ds.c @@ -624,7 +624,7 @@ static int alloc_pebs_buffer(int cpu) int max, node = cpu_to_node(cpu); void *buffer, *insn_buff, *cea; - if (!x86_pmu.pebs) + if (!x86_pmu.ds_pebs) return 0; buffer = dsalloc_pages(bsiz, GFP_KERNEL, cpu); @@ -659,7 +659,7 @@ static void release_pebs_buffer(int cpu) struct cpu_hw_events *hwev = per_cpu_ptr(&cpu_hw_events, cpu); void *cea; - if (!x86_pmu.pebs) + if (!x86_pmu.ds_pebs) return; kfree(per_cpu(insn_buffer, cpu)); @@ -734,7 +734,7 @@ void release_ds_buffers(void) { int cpu; - if (!x86_pmu.bts && !x86_pmu.pebs) + if (!x86_pmu.bts && !x86_pmu.ds_pebs) return; for_each_possible_cpu(cpu) @@ -750,7 +750,8 @@ void release_ds_buffers(void) } for_each_possible_cpu(cpu) { - release_pebs_buffer(cpu); + if (x86_pmu.ds_pebs) + release_pebs_buffer(cpu); release_bts_buffer(cpu); } } @@ -761,15 +762,17 @@ void reserve_ds_buffers(void) int cpu; x86_pmu.bts_active = 0; - x86_pmu.pebs_active = 0; - if (!x86_pmu.bts && !x86_pmu.pebs) + if (x86_pmu.ds_pebs) + x86_pmu.pebs_active = 0; + + if (!x86_pmu.bts && !x86_pmu.ds_pebs) return; if (!x86_pmu.bts) bts_err = 1; - if (!x86_pmu.pebs) + if (!x86_pmu.ds_pebs) pebs_err = 1; for_each_possible_cpu(cpu) { @@ -781,7 +784,8 @@ void reserve_ds_buffers(void) if (!bts_err && alloc_bts_buffer(cpu)) bts_err = 1; - if (!pebs_err && alloc_pebs_buffer(cpu)) + if (x86_pmu.ds_pebs && !pebs_err && + alloc_pebs_buffer(cpu)) pebs_err = 1; if (bts_err && pebs_err) @@ -793,7 +797,7 @@ void reserve_ds_buffers(void) release_bts_buffer(cpu); } - if (pebs_err) { + if (x86_pmu.ds_pebs && pebs_err) { for_each_possible_cpu(cpu) release_pebs_buffer(cpu); } @@ -805,7 +809,7 @@ void reserve_ds_buffers(void) if (x86_pmu.bts && !bts_err) x86_pmu.bts_active = 1; - if (x86_pmu.pebs && !pebs_err) + if (x86_pmu.ds_pebs && !pebs_err) x86_pmu.pebs_active = 1; for_each_possible_cpu(cpu) { @@ -1355,9 +1359,8 @@ static void __intel_pmu_pebs_update_cfg(struct perf_event *event, } -static void intel_pmu_late_setup(void) +void intel_pmu_pebs_late_setup(struct cpu_hw_events *cpuc) { - struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events); struct perf_event *event; u64 pebs_data_cfg = 0; int i; @@ -1828,8 +1831,6 @@ static void setup_pebs_fixed_sample_data(struct perf_event *event, perf_sample_data_init(data, 0, event->hw.last_period); - data->period = event->hw.last_period; - /* * Use latency for weight (only avail with PEBS-LL) */ @@ -2082,7 +2083,6 @@ static void setup_pebs_adaptive_sample_data(struct perf_event *event, sample_type = event->attr.sample_type; format_group = basic->format_group; perf_sample_data_init(data, 0, event->hw.last_period); - data->period = event->hw.last_period; setup_pebs_time(event, data, basic->tsc); @@ -2359,8 +2359,7 @@ __intel_pmu_pebs_last_event(struct perf_event *event, * All but the last records are processed. * The last one is left to be able to call the overflow handler. */ - if (perf_event_overflow(event, data, regs)) - x86_pmu_stop(event, 0); + perf_event_overflow(event, data, regs); } if (hwc->flags & PERF_X86_EVENT_AUTO_RELOAD) { @@ -2589,8 +2588,8 @@ static void intel_pmu_drain_pebs_nhm(struct pt_regs *iregs, struct perf_sample_d if (error[bit]) { perf_log_lost_samples(event, error[bit]); - if (iregs && perf_event_account_interrupt(event)) - x86_pmu_stop(event, 0); + if (iregs) + perf_event_account_interrupt(event); } if (counts[bit]) { @@ -2670,10 +2669,10 @@ static void intel_pmu_drain_pebs_icl(struct pt_regs *iregs, struct perf_sample_d } /* - * BTS, PEBS probe and setup + * PEBS probe and setup */ -void __init intel_ds_init(void) +void __init intel_pebs_init(void) { /* * No support for 32bit formats @@ -2681,13 +2680,12 @@ void __init intel_ds_init(void) if (!boot_cpu_has(X86_FEATURE_DTES64)) return; - x86_pmu.bts = boot_cpu_has(X86_FEATURE_BTS); - x86_pmu.pebs = boot_cpu_has(X86_FEATURE_PEBS); + x86_pmu.ds_pebs = boot_cpu_has(X86_FEATURE_PEBS); x86_pmu.pebs_buffer_size = PEBS_BUFFER_SIZE; if (x86_pmu.version <= 4) x86_pmu.pebs_no_isolation = 1; - if (x86_pmu.pebs) { + if (x86_pmu.ds_pebs) { char pebs_type = x86_pmu.intel_cap.pebs_trap ? '+' : '-'; char *pebs_qual = ""; int format = x86_pmu.intel_cap.pebs_format; @@ -2695,6 +2693,11 @@ void __init intel_ds_init(void) if (format < 4) x86_pmu.intel_cap.pebs_baseline = 0; + x86_pmu.pebs_enable = intel_pmu_pebs_enable; + x86_pmu.pebs_disable = intel_pmu_pebs_disable; + x86_pmu.pebs_enable_all = intel_pmu_pebs_enable_all; + x86_pmu.pebs_disable_all = intel_pmu_pebs_disable_all; + switch (format) { case 0: pr_cont("PEBS fmt0%c, ", pebs_type); @@ -2779,7 +2782,7 @@ void __init intel_ds_init(void) default: pr_cont("no PEBS fmt%d%c, ", format, pebs_type); - x86_pmu.pebs = 0; + x86_pmu.ds_pebs = 0; } } } @@ -2788,7 +2791,7 @@ void perf_restore_debug_store(void) { struct debug_store *ds = __this_cpu_read(cpu_hw_events.ds); - if (!x86_pmu.bts && !x86_pmu.pebs) + if (!x86_pmu.bts && !x86_pmu.ds_pebs) return; wrmsrl(MSR_IA32_DS_AREA, (unsigned long)ds); diff --git a/arch/x86/events/intel/knc.c b/arch/x86/events/intel/knc.c index 034a1f6a457c..384589168c1a 100644 --- a/arch/x86/events/intel/knc.c +++ b/arch/x86/events/intel/knc.c @@ -241,19 +241,20 @@ again: for_each_set_bit(bit, (unsigned long *)&status, X86_PMC_IDX_MAX) { struct perf_event *event = cpuc->events[bit]; + u64 last_period; handled++; if (!test_bit(bit, cpuc->active_mask)) continue; + last_period = event->hw.last_period; if (!intel_pmu_save_and_restart(event)) continue; - perf_sample_data_init(&data, 0, event->hw.last_period); + perf_sample_data_init(&data, 0, last_period); - if (perf_event_overflow(event, &data, regs)) - x86_pmu_stop(event, 0); + perf_event_overflow(event, &data, regs); } /* diff --git a/arch/x86/events/intel/lbr.c b/arch/x86/events/intel/lbr.c index f44c3d866f24..05acd6449ceb 100644 --- a/arch/x86/events/intel/lbr.c +++ b/arch/x86/events/intel/lbr.c @@ -1618,7 +1618,7 @@ void __init intel_pmu_arch_lbr_init(void) x86_pmu.lbr_nr = lbr_nr; if (!!x86_pmu.lbr_counters) - x86_pmu.flags |= PMU_FL_BR_CNTR; + x86_pmu.flags |= PMU_FL_BR_CNTR | PMU_FL_DYN_CONSTRAINT; if (x86_pmu.lbr_mispred) static_branch_enable(&x86_lbr_mispred); diff --git a/arch/x86/events/intel/p4.c b/arch/x86/events/intel/p4.c index c85a9fc44355..126d5ae264cb 100644 --- a/arch/x86/events/intel/p4.c +++ b/arch/x86/events/intel/p4.c @@ -1072,8 +1072,7 @@ static int p4_pmu_handle_irq(struct pt_regs *regs) continue; - if (perf_event_overflow(event, &data, regs)) - x86_pmu_stop(event, 0); + perf_event_overflow(event, &data, regs); } if (handled) diff --git a/arch/x86/events/intel/pt.c b/arch/x86/events/intel/pt.c index fa37565f6418..25ead919fc48 100644 --- a/arch/x86/events/intel/pt.c +++ b/arch/x86/events/intel/pt.c @@ -1863,6 +1863,8 @@ static __init int pt_init(void) if (!intel_pt_validate_hw_cap(PT_CAP_topa_multiple_entries)) pt_pmu.pmu.capabilities = PERF_PMU_CAP_AUX_NO_SG; + else + pt_pmu.pmu.capabilities = PERF_PMU_CAP_AUX_PREFER_LARGE; pt_pmu.pmu.capabilities |= PERF_PMU_CAP_EXCLUSIVE | PERF_PMU_CAP_ITRACE | diff --git a/arch/x86/events/intel/uncore.c b/arch/x86/events/intel/uncore.c index a34e50fc4a8f..5811e172f721 100644 --- a/arch/x86/events/intel/uncore.c +++ b/arch/x86/events/intel/uncore.c @@ -305,17 +305,11 @@ static enum hrtimer_restart uncore_pmu_hrtimer(struct hrtimer *hrtimer) { struct intel_uncore_box *box; struct perf_event *event; - unsigned long flags; int bit; box = container_of(hrtimer, struct intel_uncore_box, hrtimer); if (!box->n_active || box->cpu != smp_processor_id()) return HRTIMER_NORESTART; - /* - * disable local interrupt to prevent uncore_pmu_event_start/stop - * to interrupt the update process - */ - local_irq_save(flags); /* * handle boxes with an active event list as opposed to active @@ -328,8 +322,6 @@ static enum hrtimer_restart uncore_pmu_hrtimer(struct hrtimer *hrtimer) for_each_set_bit(bit, box->active_mask, UNCORE_PMC_IDX_MAX) uncore_perf_event_update(box, box->events[bit]); - local_irq_restore(flags); - hrtimer_forward_now(hrtimer, ns_to_ktime(box->hrtimer_duration)); return HRTIMER_RESTART; } @@ -337,7 +329,7 @@ static enum hrtimer_restart uncore_pmu_hrtimer(struct hrtimer *hrtimer) void uncore_pmu_start_hrtimer(struct intel_uncore_box *box) { hrtimer_start(&box->hrtimer, ns_to_ktime(box->hrtimer_duration), - HRTIMER_MODE_REL_PINNED); + HRTIMER_MODE_REL_PINNED_HARD); } void uncore_pmu_cancel_hrtimer(struct intel_uncore_box *box) @@ -347,7 +339,7 @@ void uncore_pmu_cancel_hrtimer(struct intel_uncore_box *box) static void uncore_pmu_init_hrtimer(struct intel_uncore_box *box) { - hrtimer_setup(&box->hrtimer, uncore_pmu_hrtimer, CLOCK_MONOTONIC, HRTIMER_MODE_REL); + hrtimer_setup(&box->hrtimer, uncore_pmu_hrtimer, CLOCK_MONOTONIC, HRTIMER_MODE_REL_HARD); } static struct intel_uncore_box *uncore_alloc_box(struct intel_uncore_type *type, diff --git a/arch/x86/events/perf_event.h b/arch/x86/events/perf_event.h index 46d120597bab..e8bce89821be 100644 --- a/arch/x86/events/perf_event.h +++ b/arch/x86/events/perf_event.h @@ -127,6 +127,11 @@ static inline bool is_pebs_counter_event_group(struct perf_event *event) return check_leader_group(event->group_leader, PERF_X86_EVENT_PEBS_CNTR); } +static inline bool is_acr_event_group(struct perf_event *event) +{ + return check_leader_group(event->group_leader, PERF_X86_EVENT_ACR); +} + struct amd_nb { int nb_id; /* NorthBridge id */ int refcnt; /* reference count */ @@ -268,6 +273,7 @@ struct cpu_hw_events { struct event_constraint *event_constraint[X86_PMC_IDX_MAX]; int n_excl; /* the number of exclusive events */ + int n_late_setup; /* the num of events needs late setup */ unsigned int txn_flags; int is_fake; @@ -293,6 +299,10 @@ struct cpu_hw_events { u64 fixed_ctrl_val; u64 active_fixed_ctrl_val; + /* Intel ACR configuration */ + u64 acr_cfg_b[X86_PMC_IDX_MAX]; + u64 acr_cfg_c[X86_PMC_IDX_MAX]; + /* * Intel LBR bits */ @@ -714,6 +724,15 @@ struct x86_hybrid_pmu { u64 fixed_cntr_mask64; unsigned long fixed_cntr_mask[BITS_TO_LONGS(X86_PMC_IDX_MAX)]; }; + + union { + u64 acr_cntr_mask64; + unsigned long acr_cntr_mask[BITS_TO_LONGS(X86_PMC_IDX_MAX)]; + }; + union { + u64 acr_cause_mask64; + unsigned long acr_cause_mask[BITS_TO_LONGS(X86_PMC_IDX_MAX)]; + }; struct event_constraint unconstrained; u64 hw_cache_event_ids @@ -796,6 +815,10 @@ struct x86_pmu { int (*hw_config)(struct perf_event *event); int (*schedule_events)(struct cpu_hw_events *cpuc, int n, int *assign); void (*late_setup)(void); + void (*pebs_enable)(struct perf_event *event); + void (*pebs_disable)(struct perf_event *event); + void (*pebs_enable_all)(void); + void (*pebs_disable_all)(void); unsigned eventsel; unsigned perfctr; unsigned fixedctr; @@ -812,6 +835,14 @@ struct x86_pmu { u64 fixed_cntr_mask64; unsigned long fixed_cntr_mask[BITS_TO_LONGS(X86_PMC_IDX_MAX)]; }; + union { + u64 acr_cntr_mask64; + unsigned long acr_cntr_mask[BITS_TO_LONGS(X86_PMC_IDX_MAX)]; + }; + union { + u64 acr_cause_mask64; + unsigned long acr_cause_mask[BITS_TO_LONGS(X86_PMC_IDX_MAX)]; + }; int cntval_bits; u64 cntval_mask; union { @@ -878,7 +909,7 @@ struct x86_pmu { */ unsigned int bts :1, bts_active :1, - pebs :1, + ds_pebs :1, pebs_active :1, pebs_broken :1, pebs_prec_dist :1, @@ -1049,6 +1080,7 @@ do { \ #define PMU_FL_MEM_LOADS_AUX 0x100 /* Require an auxiliary event for the complete memory info */ #define PMU_FL_RETIRE_LATENCY 0x200 /* Support Retire Latency in PEBS */ #define PMU_FL_BR_CNTR 0x400 /* Support branch counter logging */ +#define PMU_FL_DYN_CONSTRAINT 0x800 /* Needs dynamic constraint */ #define EVENT_VAR(_id) event_attr_##_id #define EVENT_PTR(_id) &event_attr_##_id.attr.attr @@ -1091,6 +1123,7 @@ static struct perf_pmu_format_hybrid_attr format_attr_hybrid_##_name = {\ .pmu_type = _pmu, \ } +int is_x86_event(struct perf_event *event); struct pmu *x86_get_pmu(unsigned int cpu); extern struct x86_pmu x86_pmu __read_mostly; @@ -1098,6 +1131,10 @@ DECLARE_STATIC_CALL(x86_pmu_set_period, *x86_pmu.set_period); DECLARE_STATIC_CALL(x86_pmu_update, *x86_pmu.update); DECLARE_STATIC_CALL(x86_pmu_drain_pebs, *x86_pmu.drain_pebs); DECLARE_STATIC_CALL(x86_pmu_late_setup, *x86_pmu.late_setup); +DECLARE_STATIC_CALL(x86_pmu_pebs_enable, *x86_pmu.pebs_enable); +DECLARE_STATIC_CALL(x86_pmu_pebs_disable, *x86_pmu.pebs_disable); +DECLARE_STATIC_CALL(x86_pmu_pebs_enable_all, *x86_pmu.pebs_enable_all); +DECLARE_STATIC_CALL(x86_pmu_pebs_disable_all, *x86_pmu.pebs_disable_all); static __always_inline struct x86_perf_task_context_opt *task_context_opt(void *ctx) { @@ -1587,6 +1624,8 @@ void intel_pmu_disable_bts(void); int intel_pmu_drain_bts_buffer(void); +void intel_pmu_late_setup(void); + u64 grt_latency_data(struct perf_event *event, u64 status); u64 cmt_latency_data(struct perf_event *event, u64 status); @@ -1643,11 +1682,13 @@ void intel_pmu_pebs_disable_all(void); void intel_pmu_pebs_sched_task(struct perf_event_pmu_context *pmu_ctx, bool sched_in); +void intel_pmu_pebs_late_setup(struct cpu_hw_events *cpuc); + void intel_pmu_drain_pebs_buffer(void); void intel_pmu_store_pebs_lbrs(struct lbr_entry *lbr); -void intel_ds_init(void); +void intel_pebs_init(void); void intel_pmu_lbr_save_brstack(struct perf_sample_data *data, struct cpu_hw_events *cpuc, diff --git a/arch/x86/events/perf_event_flags.h b/arch/x86/events/perf_event_flags.h index 1d9e385649b5..70078334e4a3 100644 --- a/arch/x86/events/perf_event_flags.h +++ b/arch/x86/events/perf_event_flags.h @@ -2,23 +2,24 @@ /* * struct hw_perf_event.flags flags */ -PERF_ARCH(PEBS_LDLAT, 0x00001) /* ld+ldlat data address sampling */ -PERF_ARCH(PEBS_ST, 0x00002) /* st data address sampling */ -PERF_ARCH(PEBS_ST_HSW, 0x00004) /* haswell style datala, store */ -PERF_ARCH(PEBS_LD_HSW, 0x00008) /* haswell style datala, load */ -PERF_ARCH(PEBS_NA_HSW, 0x00010) /* haswell style datala, unknown */ -PERF_ARCH(EXCL, 0x00020) /* HT exclusivity on counter */ -PERF_ARCH(DYNAMIC, 0x00040) /* dynamic alloc'd constraint */ -PERF_ARCH(PEBS_CNTR, 0x00080) /* PEBS counters snapshot */ -PERF_ARCH(EXCL_ACCT, 0x00100) /* accounted EXCL event */ -PERF_ARCH(AUTO_RELOAD, 0x00200) /* use PEBS auto-reload */ -PERF_ARCH(LARGE_PEBS, 0x00400) /* use large PEBS */ -PERF_ARCH(PEBS_VIA_PT, 0x00800) /* use PT buffer for PEBS */ -PERF_ARCH(PAIR, 0x01000) /* Large Increment per Cycle */ -PERF_ARCH(LBR_SELECT, 0x02000) /* Save/Restore MSR_LBR_SELECT */ -PERF_ARCH(TOPDOWN, 0x04000) /* Count Topdown slots/metrics events */ -PERF_ARCH(PEBS_STLAT, 0x08000) /* st+stlat data address sampling */ -PERF_ARCH(AMD_BRS, 0x10000) /* AMD Branch Sampling */ -PERF_ARCH(PEBS_LAT_HYBRID, 0x20000) /* ld and st lat for hybrid */ -PERF_ARCH(NEEDS_BRANCH_STACK, 0x40000) /* require branch stack setup */ -PERF_ARCH(BRANCH_COUNTERS, 0x80000) /* logs the counters in the extra space of each branch */ +PERF_ARCH(PEBS_LDLAT, 0x0000001) /* ld+ldlat data address sampling */ +PERF_ARCH(PEBS_ST, 0x0000002) /* st data address sampling */ +PERF_ARCH(PEBS_ST_HSW, 0x0000004) /* haswell style datala, store */ +PERF_ARCH(PEBS_LD_HSW, 0x0000008) /* haswell style datala, load */ +PERF_ARCH(PEBS_NA_HSW, 0x0000010) /* haswell style datala, unknown */ +PERF_ARCH(EXCL, 0x0000020) /* HT exclusivity on counter */ +PERF_ARCH(DYNAMIC, 0x0000040) /* dynamic alloc'd constraint */ +PERF_ARCH(PEBS_CNTR, 0x0000080) /* PEBS counters snapshot */ +PERF_ARCH(EXCL_ACCT, 0x0000100) /* accounted EXCL event */ +PERF_ARCH(AUTO_RELOAD, 0x0000200) /* use PEBS auto-reload */ +PERF_ARCH(LARGE_PEBS, 0x0000400) /* use large PEBS */ +PERF_ARCH(PEBS_VIA_PT, 0x0000800) /* use PT buffer for PEBS */ +PERF_ARCH(PAIR, 0x0001000) /* Large Increment per Cycle */ +PERF_ARCH(LBR_SELECT, 0x0002000) /* Save/Restore MSR_LBR_SELECT */ +PERF_ARCH(TOPDOWN, 0x0004000) /* Count Topdown slots/metrics events */ +PERF_ARCH(PEBS_STLAT, 0x0008000) /* st+stlat data address sampling */ +PERF_ARCH(AMD_BRS, 0x0010000) /* AMD Branch Sampling */ +PERF_ARCH(PEBS_LAT_HYBRID, 0x0020000) /* ld and st lat for hybrid */ +PERF_ARCH(NEEDS_BRANCH_STACK, 0x0040000) /* require branch stack setup */ +PERF_ARCH(BRANCH_COUNTERS, 0x0080000) /* logs the counters in the extra space of each branch */ +PERF_ARCH(ACR, 0x0100000) /* Auto counter reload */ diff --git a/arch/x86/events/zhaoxin/core.c b/arch/x86/events/zhaoxin/core.c index 2fd9b0cf9a5e..49a5944fac63 100644 --- a/arch/x86/events/zhaoxin/core.c +++ b/arch/x86/events/zhaoxin/core.c @@ -397,8 +397,7 @@ again: if (!x86_perf_event_set_period(event)) continue; - if (perf_event_overflow(event, &data, regs)) - x86_pmu_stop(event, 0); + perf_event_overflow(event, &data, regs); } /* diff --git a/arch/x86/include/asm/msr-index.h b/arch/x86/include/asm/msr-index.h index e7d2f460fcc6..7c3f5ef1a169 100644 --- a/arch/x86/include/asm/msr-index.h +++ b/arch/x86/include/asm/msr-index.h @@ -602,7 +602,11 @@ /* V6 PMON MSR range */ #define MSR_IA32_PMC_V6_GP0_CTR 0x1900 #define MSR_IA32_PMC_V6_GP0_CFG_A 0x1901 +#define MSR_IA32_PMC_V6_GP0_CFG_B 0x1902 +#define MSR_IA32_PMC_V6_GP0_CFG_C 0x1903 #define MSR_IA32_PMC_V6_FX0_CTR 0x1980 +#define MSR_IA32_PMC_V6_FX0_CFG_B 0x1982 +#define MSR_IA32_PMC_V6_FX0_CFG_C 0x1983 #define MSR_IA32_PMC_V6_STEP 4 /* KeyID partitioning between MKTME and TDX */ diff --git a/arch/x86/include/asm/perf_event.h b/arch/x86/include/asm/perf_event.h index 812dac3f79f0..70d1d94aca7e 100644 --- a/arch/x86/include/asm/perf_event.h +++ b/arch/x86/include/asm/perf_event.h @@ -195,6 +195,7 @@ union cpuid10_edx { */ #define ARCH_PERFMON_EXT_LEAF 0x00000023 #define ARCH_PERFMON_NUM_COUNTER_LEAF 0x1 +#define ARCH_PERFMON_ACR_LEAF 0x2 union cpuid35_eax { struct { diff --git a/arch/x86/kernel/uprobes.c b/arch/x86/kernel/uprobes.c index 9194695662b2..6d383839e839 100644 --- a/arch/x86/kernel/uprobes.c +++ b/arch/x86/kernel/uprobes.c @@ -840,6 +840,11 @@ static int branch_setup_xol_ops(struct arch_uprobe *auprobe, struct insn *insn) insn_byte_t p; int i; + /* x86_nops[insn->length]; same as jmp with .offs = 0 */ + if (insn->length <= ASM_NOP_MAX && + !memcmp(insn->kaddr, x86_nops[insn->length], insn->length)) + goto setup; + switch (opc1) { case 0xeb: /* jmp 8 */ case 0xe9: /* jmp 32 */ diff --git a/arch/xtensa/kernel/perf_event.c b/arch/xtensa/kernel/perf_event.c index 183618090d05..223f1d452310 100644 --- a/arch/xtensa/kernel/perf_event.c +++ b/arch/xtensa/kernel/perf_event.c @@ -388,8 +388,7 @@ irqreturn_t xtensa_pmu_irq_handler(int irq, void *dev_id) struct pt_regs *regs = get_irq_regs(); perf_sample_data_init(&data, 0, last_period); - if (perf_event_overflow(event, &data, regs)) - xtensa_pmu_stop(event, 0); + perf_event_overflow(event, &data, regs); } rc = IRQ_HANDLED; |