diff options
author | Paolo Bonzini <pbonzini@redhat.com> | 2022-06-09 09:45:00 -0400 |
---|---|---|
committer | Paolo Bonzini <pbonzini@redhat.com> | 2022-06-09 09:45:00 -0400 |
commit | 66da65005aa819e0b8d3a08f5ec1491b7690cb67 (patch) | |
tree | 001471fc80938e6e13c6ecf356503d4c65043d5a /arch/x86/events/amd/core.c | |
parent | KVM: x86: do not report a vCPU as preempted outside instruction boundaries (diff) | |
parent | MAINTAINERS: Limit KVM RISC-V entry to existing selftests (diff) | |
download | linux-dev-66da65005aa819e0b8d3a08f5ec1491b7690cb67.tar.xz linux-dev-66da65005aa819e0b8d3a08f5ec1491b7690cb67.zip |
Merge tag 'kvm-riscv-fixes-5.19-1' of https://github.com/kvm-riscv/linux into HEAD
KVM/riscv fixes for 5.19, take #1
- Typo fix in arch/riscv/kvm/vmid.c
- Remove broken reference pattern from MAINTAINERS entry
Diffstat (limited to 'arch/x86/events/amd/core.c')
-rw-r--r-- | arch/x86/events/amd/core.c | 505 |
1 files changed, 485 insertions, 20 deletions
diff --git a/arch/x86/events/amd/core.c b/arch/x86/events/amd/core.c index 9687a8aef01c..9ac3718410ce 100644 --- a/arch/x86/events/amd/core.c +++ b/arch/x86/events/amd/core.c @@ -1,5 +1,6 @@ // SPDX-License-Identifier: GPL-2.0-only #include <linux/perf_event.h> +#include <linux/jump_label.h> #include <linux/export.h> #include <linux/types.h> #include <linux/init.h> @@ -7,6 +8,7 @@ #include <linux/delay.h> #include <linux/jiffies.h> #include <asm/apicdef.h> +#include <asm/apic.h> #include <asm/nmi.h> #include "../perf_event.h" @@ -18,6 +20,9 @@ static unsigned long perf_nmi_window; #define AMD_MERGE_EVENT ((0xFULL << 32) | 0xFFULL) #define AMD_MERGE_EVENT_ENABLE (AMD_MERGE_EVENT | ARCH_PERFMON_EVENTSEL_ENABLE) +/* PMC Enable and Overflow bits for PerfCntrGlobal* registers */ +static u64 amd_pmu_global_cntr_mask __read_mostly; + static __initconst const u64 amd_hw_cache_event_ids [PERF_COUNT_HW_CACHE_MAX] [PERF_COUNT_HW_CACHE_OP_MAX] @@ -325,8 +330,16 @@ static inline bool amd_is_pair_event_code(struct hw_perf_event *hwc) } } +#define AMD_FAM19H_BRS_EVENT 0xc4 /* RETIRED_TAKEN_BRANCH_INSTRUCTIONS */ +static inline int amd_is_brs_event(struct perf_event *e) +{ + return (e->hw.config & AMD64_RAW_EVENT_MASK) == AMD_FAM19H_BRS_EVENT; +} + static int amd_core_hw_config(struct perf_event *event) { + int ret = 0; + if (event->attr.exclude_host && event->attr.exclude_guest) /* * When HO == GO == 1 the hardware treats that as GO == HO == 0 @@ -343,7 +356,66 @@ static int amd_core_hw_config(struct perf_event *event) if ((x86_pmu.flags & PMU_FL_PAIR) && amd_is_pair_event_code(&event->hw)) event->hw.flags |= PERF_X86_EVENT_PAIR; - return 0; + /* + * if branch stack is requested + */ + if (has_branch_stack(event)) { + /* + * Due to interrupt holding, BRS is not recommended in + * counting mode. + */ + if (!is_sampling_event(event)) + return -EINVAL; + + /* + * Due to the way BRS operates by holding the interrupt until + * lbr_nr entries have been captured, it does not make sense + * to allow sampling on BRS with an event that does not match + * what BRS is capturing, i.e., retired taken branches. + * Otherwise the correlation with the event's period is even + * more loose: + * + * With retired taken branch: + * Effective P = P + 16 + X + * With any other event: + * Effective P = P + Y + X + * + * Where X is the number of taken branches due to interrupt + * skid. Skid is large. + * + * Where Y is the occurences of the event while BRS is + * capturing the lbr_nr entries. + * + * By using retired taken branches, we limit the impact on the + * Y variable. We know it cannot be more than the depth of + * BRS. + */ + if (!amd_is_brs_event(event)) + return -EINVAL; + + /* + * BRS implementation does not work with frequency mode + * reprogramming of the period. + */ + if (event->attr.freq) + return -EINVAL; + /* + * The kernel subtracts BRS depth from period, so it must + * be big enough. + */ + if (event->attr.sample_period <= x86_pmu.lbr_nr) + return -EINVAL; + + /* + * Check if we can allow PERF_SAMPLE_BRANCH_STACK + */ + ret = amd_brs_setup_filter(event); + + /* only set in case of success */ + if (!ret) + event->hw.flags |= PERF_X86_EVENT_AMD_BRS; + } + return ret; } static inline int amd_is_nb_event(struct hw_perf_event *hwc) @@ -366,7 +438,7 @@ static int amd_pmu_hw_config(struct perf_event *event) if (event->attr.precise_ip && get_ibs_caps()) return -ENOENT; - if (has_branch_stack(event)) + if (has_branch_stack(event) && !x86_pmu.lbr_nr) return -EOPNOTSUPP; ret = x86_pmu_hw_config(event); @@ -510,6 +582,18 @@ static struct amd_nb *amd_alloc_nb(int cpu) return nb; } +static void amd_pmu_cpu_reset(int cpu) +{ + if (x86_pmu.version < 2) + return; + + /* Clear enable bits i.e. PerfCntrGlobalCtl.PerfCntrEn */ + wrmsrl(MSR_AMD64_PERF_CNTR_GLOBAL_CTL, 0); + + /* Clear overflow bits i.e. PerfCntrGLobalStatus.PerfCntrOvfl */ + wrmsrl(MSR_AMD64_PERF_CNTR_GLOBAL_STATUS_CLR, amd_pmu_global_cntr_mask); +} + static int amd_pmu_cpu_prepare(int cpu) { struct cpu_hw_events *cpuc = &per_cpu(cpu_hw_events, cpu); @@ -555,6 +639,9 @@ static void amd_pmu_cpu_starting(int cpu) cpuc->amd_nb->nb_id = nb_id; cpuc->amd_nb->refcnt++; + + amd_brs_reset(); + amd_pmu_cpu_reset(cpu); } static void amd_pmu_cpu_dead(int cpu) @@ -574,8 +661,54 @@ static void amd_pmu_cpu_dead(int cpu) cpuhw->amd_nb = NULL; } + + amd_pmu_cpu_reset(cpu); +} + +static inline void amd_pmu_set_global_ctl(u64 ctl) +{ + wrmsrl(MSR_AMD64_PERF_CNTR_GLOBAL_CTL, ctl); } +static inline u64 amd_pmu_get_global_status(void) +{ + u64 status; + + /* PerfCntrGlobalStatus is read-only */ + rdmsrl(MSR_AMD64_PERF_CNTR_GLOBAL_STATUS, status); + + return status & amd_pmu_global_cntr_mask; +} + +static inline void amd_pmu_ack_global_status(u64 status) +{ + /* + * PerfCntrGlobalStatus is read-only but an overflow acknowledgment + * mechanism exists; writing 1 to a bit in PerfCntrGlobalStatusClr + * clears the same bit in PerfCntrGlobalStatus + */ + + /* Only allow modifications to PerfCntrGlobalStatus.PerfCntrOvfl */ + status &= amd_pmu_global_cntr_mask; + wrmsrl(MSR_AMD64_PERF_CNTR_GLOBAL_STATUS_CLR, status); +} + +static bool amd_pmu_test_overflow_topbit(int idx) +{ + u64 counter; + + rdmsrl(x86_pmu_event_addr(idx), counter); + + return !(counter & BIT_ULL(x86_pmu.cntval_bits - 1)); +} + +static bool amd_pmu_test_overflow_status(int idx) +{ + return amd_pmu_get_global_status() & BIT_ULL(idx); +} + +DEFINE_STATIC_CALL(amd_pmu_test_overflow, amd_pmu_test_overflow_topbit); + /* * When a PMC counter overflows, an NMI is used to process the event and * reset the counter. NMI latency can result in the counter being updated @@ -588,7 +721,6 @@ static void amd_pmu_cpu_dead(int cpu) static void amd_pmu_wait_on_overflow(int idx) { unsigned int i; - u64 counter; /* * Wait for the counter to be reset if it has overflowed. This loop @@ -596,8 +728,7 @@ static void amd_pmu_wait_on_overflow(int idx) * forever... */ for (i = 0; i < OVERFLOW_WAIT_COUNT; i++) { - rdmsrl(x86_pmu_event_addr(idx), counter); - if (counter & (1ULL << (x86_pmu.cntval_bits - 1))) + if (!static_call(amd_pmu_test_overflow)(idx)) break; /* Might be in IRQ context, so can't sleep */ @@ -605,13 +736,11 @@ static void amd_pmu_wait_on_overflow(int idx) } } -static void amd_pmu_disable_all(void) +static void amd_pmu_check_overflow(void) { struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events); int idx; - x86_pmu_disable_all(); - /* * This shouldn't be called from NMI context, but add a safeguard here * to return, since if we're in NMI context we can't wait for an NMI @@ -634,6 +763,47 @@ static void amd_pmu_disable_all(void) } } +static void amd_pmu_enable_event(struct perf_event *event) +{ + x86_pmu_enable_event(event); +} + +static void amd_pmu_enable_all(int added) +{ + struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events); + int idx; + + amd_brs_enable_all(); + + for (idx = 0; idx < x86_pmu.num_counters; idx++) { + /* only activate events which are marked as active */ + if (!test_bit(idx, cpuc->active_mask)) + continue; + + amd_pmu_enable_event(cpuc->events[idx]); + } +} + +static void amd_pmu_v2_enable_event(struct perf_event *event) +{ + struct hw_perf_event *hwc = &event->hw; + + /* + * Testing cpu_hw_events.enabled should be skipped in this case unlike + * in x86_pmu_enable_event(). + * + * Since cpu_hw_events.enabled is set only after returning from + * x86_pmu_start(), the PMCs must be programmed and kept ready. + * Counting starts only after x86_pmu_enable_all() is called. + */ + __x86_pmu_enable_event(hwc, ARCH_PERFMON_EVENTSEL_ENABLE); +} + +static void amd_pmu_v2_enable_all(int added) +{ + amd_pmu_set_global_ctl(amd_pmu_global_cntr_mask); +} + static void amd_pmu_disable_event(struct perf_event *event) { x86_pmu_disable_event(event); @@ -651,6 +821,32 @@ static void amd_pmu_disable_event(struct perf_event *event) amd_pmu_wait_on_overflow(event->hw.idx); } +static void amd_pmu_disable_all(void) +{ + amd_brs_disable_all(); + x86_pmu_disable_all(); + amd_pmu_check_overflow(); +} + +static void amd_pmu_v2_disable_all(void) +{ + /* Disable all PMCs */ + amd_pmu_set_global_ctl(0); + amd_pmu_check_overflow(); +} + +static void amd_pmu_add_event(struct perf_event *event) +{ + if (needs_branch_stack(event)) + amd_pmu_brs_add(event); +} + +static void amd_pmu_del_event(struct perf_event *event) +{ + if (needs_branch_stack(event)) + amd_pmu_brs_del(event); +} + /* * Because of NMI latency, if multiple PMC counters are active or other sources * of NMIs are received, the perf NMI handler can handle one or more overflowed @@ -669,13 +865,8 @@ static void amd_pmu_disable_event(struct perf_event *event) * handled a counter. When an un-handled NMI is received, it will be claimed * only if arriving within that window. */ -static int amd_pmu_handle_irq(struct pt_regs *regs) +static inline int amd_pmu_adjust_nmi_window(int handled) { - int handled; - - /* Process any counter overflows */ - handled = x86_pmu_handle_irq(regs); - /* * If a counter was handled, record a timestamp such that un-handled * NMIs will be claimed if arriving within that window. @@ -692,6 +883,113 @@ static int amd_pmu_handle_irq(struct pt_regs *regs) return NMI_HANDLED; } +static int amd_pmu_handle_irq(struct pt_regs *regs) +{ + struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events); + int handled; + int pmu_enabled; + + /* + * Save the PMU state. + * It needs to be restored when leaving the handler. + */ + pmu_enabled = cpuc->enabled; + cpuc->enabled = 0; + + /* stop everything (includes BRS) */ + amd_pmu_disable_all(); + + /* Drain BRS is in use (could be inactive) */ + if (cpuc->lbr_users) + amd_brs_drain(); + + /* Process any counter overflows */ + handled = x86_pmu_handle_irq(regs); + + cpuc->enabled = pmu_enabled; + if (pmu_enabled) + amd_pmu_enable_all(0); + + return amd_pmu_adjust_nmi_window(handled); +} + +static int amd_pmu_v2_handle_irq(struct pt_regs *regs) +{ + struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events); + struct perf_sample_data data; + struct hw_perf_event *hwc; + struct perf_event *event; + int handled = 0, idx; + u64 status, mask; + bool pmu_enabled; + + /* + * Save the PMU state as it needs to be restored when leaving the + * handler + */ + pmu_enabled = cpuc->enabled; + cpuc->enabled = 0; + + /* Stop counting */ + amd_pmu_v2_disable_all(); + + status = amd_pmu_get_global_status(); + + /* Check if any overflows are pending */ + if (!status) + goto done; + + for (idx = 0; idx < x86_pmu.num_counters; idx++) { + if (!test_bit(idx, cpuc->active_mask)) + continue; + + event = cpuc->events[idx]; + hwc = &event->hw; + x86_perf_event_update(event); + mask = BIT_ULL(idx); + + if (!(status & mask)) + continue; + + /* Event overflow */ + handled++; + perf_sample_data_init(&data, 0, hwc->last_period); + + if (!x86_perf_event_set_period(event)) + continue; + + if (perf_event_overflow(event, &data, regs)) + x86_pmu_stop(event, 0); + + status &= ~mask; + } + + /* + * It should never be the case that some overflows are not handled as + * the corresponding PMCs are expected to be inactive according to the + * active_mask + */ + WARN_ON(status > 0); + + /* Clear overflow bits */ + amd_pmu_ack_global_status(~status); + + /* + * Unmasking the LVTPC is not required as the Mask (M) bit of the LVT + * PMI entry is not set by the local APIC when a PMC overflow occurs + */ + inc_irq_stat(apic_perf_irqs); + +done: + cpuc->enabled = pmu_enabled; + + /* Resume counting only if PMU is active */ + if (pmu_enabled) + amd_pmu_v2_enable_all(0); + + return amd_pmu_adjust_nmi_window(handled); +} + static struct event_constraint * amd_get_event_constraints(struct cpu_hw_events *cpuc, int idx, struct perf_event *event) @@ -897,6 +1195,51 @@ static void amd_put_event_constraints_f17h(struct cpu_hw_events *cpuc, --cpuc->n_pair; } +/* + * Because of the way BRS operates with an inactive and active phases, and + * the link to one counter, it is not possible to have two events using BRS + * scheduled at the same time. There would be an issue with enforcing the + * period of each one and given that the BRS saturates, it would not be possible + * to guarantee correlated content for all events. Therefore, in situations + * where multiple events want to use BRS, the kernel enforces mutual exclusion. + * Exclusion is enforced by chosing only one counter for events using BRS. + * The event scheduling logic will then automatically multiplex the + * events and ensure that at most one event is actively using BRS. + * + * The BRS counter could be any counter, but there is no constraint on Fam19h, + * therefore all counters are equal and thus we pick the first one: PMC0 + */ +static struct event_constraint amd_fam19h_brs_cntr0_constraint = + EVENT_CONSTRAINT(0, 0x1, AMD64_RAW_EVENT_MASK); + +static struct event_constraint amd_fam19h_brs_pair_cntr0_constraint = + __EVENT_CONSTRAINT(0, 0x1, AMD64_RAW_EVENT_MASK, 1, 0, PERF_X86_EVENT_PAIR); + +static struct event_constraint * +amd_get_event_constraints_f19h(struct cpu_hw_events *cpuc, int idx, + struct perf_event *event) +{ + struct hw_perf_event *hwc = &event->hw; + bool has_brs = has_amd_brs(hwc); + + /* + * In case BRS is used with an event requiring a counter pair, + * the kernel allows it but only on counter 0 & 1 to enforce + * multiplexing requiring to protect BRS in case of multiple + * BRS users + */ + if (amd_is_pair_event_code(hwc)) { + return has_brs ? &amd_fam19h_brs_pair_cntr0_constraint + : &pair_constraint; + } + + if (has_brs) + return &amd_fam19h_brs_cntr0_constraint; + + return &unconstrained; +} + + static ssize_t amd_event_sysfs_show(char *page, u64 config) { u64 event = (config & ARCH_PERFMON_EVENTSEL_EVENT) | @@ -905,12 +1248,31 @@ static ssize_t amd_event_sysfs_show(char *page, u64 config) return x86_event_sysfs_show(page, config, event); } +static void amd_pmu_sched_task(struct perf_event_context *ctx, + bool sched_in) +{ + if (sched_in && x86_pmu.lbr_nr) + amd_pmu_brs_sched_task(ctx, sched_in); +} + +static u64 amd_pmu_limit_period(struct perf_event *event, u64 left) +{ + /* + * Decrease period by the depth of the BRS feature to get the last N + * taken branches and approximate the desired period + */ + if (has_branch_stack(event) && left > x86_pmu.lbr_nr) + left -= x86_pmu.lbr_nr; + + return left; +} + static __initconst const struct x86_pmu amd_pmu = { .name = "AMD", .handle_irq = amd_pmu_handle_irq, .disable_all = amd_pmu_disable_all, - .enable_all = x86_pmu_enable_all, - .enable = x86_pmu_enable_event, + .enable_all = amd_pmu_enable_all, + .enable = amd_pmu_enable_event, .disable = amd_pmu_disable_event, .hw_config = amd_pmu_hw_config, .schedule_events = x86_schedule_events, @@ -920,6 +1282,8 @@ static __initconst const struct x86_pmu amd_pmu = { .event_map = amd_pmu_event_map, .max_events = ARRAY_SIZE(amd_perfmon_event_map), .num_counters = AMD64_NUM_COUNTERS, + .add = amd_pmu_add_event, + .del = amd_pmu_del_event, .cntval_bits = 48, .cntval_mask = (1ULL << 48) - 1, .apic = 1, @@ -938,8 +1302,55 @@ static __initconst const struct x86_pmu amd_pmu = { .amd_nb_constraints = 1, }; +static ssize_t branches_show(struct device *cdev, + struct device_attribute *attr, + char *buf) +{ + return snprintf(buf, PAGE_SIZE, "%d\n", x86_pmu.lbr_nr); +} + +static DEVICE_ATTR_RO(branches); + +static struct attribute *amd_pmu_brs_attrs[] = { + &dev_attr_branches.attr, + NULL, +}; + +static umode_t +amd_brs_is_visible(struct kobject *kobj, struct attribute *attr, int i) +{ + return x86_pmu.lbr_nr ? attr->mode : 0; +} + +static struct attribute_group group_caps_amd_brs = { + .name = "caps", + .attrs = amd_pmu_brs_attrs, + .is_visible = amd_brs_is_visible, +}; + +EVENT_ATTR_STR(branch-brs, amd_branch_brs, + "event=" __stringify(AMD_FAM19H_BRS_EVENT)"\n"); + +static struct attribute *amd_brs_events_attrs[] = { + EVENT_PTR(amd_branch_brs), + NULL, +}; + +static struct attribute_group group_events_amd_brs = { + .name = "events", + .attrs = amd_brs_events_attrs, + .is_visible = amd_brs_is_visible, +}; + +static const struct attribute_group *amd_attr_update[] = { + &group_caps_amd_brs, + &group_events_amd_brs, + NULL, +}; + static int __init amd_core_pmu_init(void) { + union cpuid_0x80000022_ebx ebx; u64 even_ctr_mask = 0ULL; int i; @@ -957,6 +1368,27 @@ static int __init amd_core_pmu_init(void) x86_pmu.eventsel = MSR_F15H_PERF_CTL; x86_pmu.perfctr = MSR_F15H_PERF_CTR; x86_pmu.num_counters = AMD64_NUM_COUNTERS_CORE; + + /* Check for Performance Monitoring v2 support */ + if (boot_cpu_has(X86_FEATURE_PERFMON_V2)) { + ebx.full = cpuid_ebx(EXT_PERFMON_DEBUG_FEATURES); + + /* Update PMU version for later usage */ + x86_pmu.version = 2; + + /* Find the number of available Core PMCs */ + x86_pmu.num_counters = ebx.split.num_core_pmc; + + amd_pmu_global_cntr_mask = (1ULL << x86_pmu.num_counters) - 1; + + /* Update PMC handling functions */ + x86_pmu.enable_all = amd_pmu_v2_enable_all; + x86_pmu.disable_all = amd_pmu_v2_disable_all; + x86_pmu.enable = amd_pmu_v2_enable_event; + x86_pmu.handle_irq = amd_pmu_v2_handle_irq; + static_call_update(amd_pmu_test_overflow, amd_pmu_test_overflow_status); + } + /* * AMD Core perfctr has separate MSRs for the NB events, see * the amd/uncore.c driver. @@ -989,6 +1421,23 @@ static int __init amd_core_pmu_init(void) x86_pmu.flags |= PMU_FL_PAIR; } + /* + * BRS requires special event constraints and flushing on ctxsw. + */ + if (boot_cpu_data.x86 >= 0x19 && !amd_brs_init()) { + x86_pmu.get_event_constraints = amd_get_event_constraints_f19h; + x86_pmu.sched_task = amd_pmu_sched_task; + x86_pmu.limit_period = amd_pmu_limit_period; + /* + * put_event_constraints callback same as Fam17h, set above + */ + + /* branch sampling must be stopped when entering low power */ + amd_brs_lopwr_init(); + } + + x86_pmu.attr_update = amd_attr_update; + pr_cont("core perfctr, "); return 0; } @@ -1023,6 +1472,24 @@ __init int amd_pmu_init(void) return 0; } +static inline void amd_pmu_reload_virt(void) +{ + if (x86_pmu.version >= 2) { + /* + * Clear global enable bits, reprogram the PERF_CTL + * registers with updated perf_ctr_virt_mask and then + * set global enable bits once again + */ + amd_pmu_v2_disable_all(); + amd_pmu_enable_all(0); + amd_pmu_v2_enable_all(0); + return; + } + + amd_pmu_disable_all(); + amd_pmu_enable_all(0); +} + void amd_pmu_enable_virt(void) { struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events); @@ -1030,8 +1497,7 @@ void amd_pmu_enable_virt(void) cpuc->perf_ctr_virt_mask = 0; /* Reload all events */ - amd_pmu_disable_all(); - x86_pmu_enable_all(0); + amd_pmu_reload_virt(); } EXPORT_SYMBOL_GPL(amd_pmu_enable_virt); @@ -1048,7 +1514,6 @@ void amd_pmu_disable_virt(void) cpuc->perf_ctr_virt_mask = AMD64_EVENTSEL_HOSTONLY; /* Reload all events */ - amd_pmu_disable_all(); - x86_pmu_enable_all(0); + amd_pmu_reload_virt(); } EXPORT_SYMBOL_GPL(amd_pmu_disable_virt); |