diff options
Diffstat (limited to 'arch/x86/events/core.c')
-rw-r--r-- | arch/x86/events/core.c | 787 |
1 files changed, 577 insertions, 210 deletions
diff --git a/arch/x86/events/core.c b/arch/x86/events/core.c index 3bb738f5a472..b30b8bbcd1e2 100644 --- a/arch/x86/events/core.c +++ b/arch/x86/events/core.c @@ -28,6 +28,7 @@ #include <linux/bitops.h> #include <linux/device.h> #include <linux/nospec.h> +#include <linux/static_call.h> #include <asm/apic.h> #include <asm/stacktrace.h> @@ -44,13 +45,56 @@ #include "perf_event.h" struct x86_pmu x86_pmu __read_mostly; +static struct pmu pmu; DEFINE_PER_CPU(struct cpu_hw_events, cpu_hw_events) = { .enabled = 1, + .pmu = &pmu, }; DEFINE_STATIC_KEY_FALSE(rdpmc_never_available_key); DEFINE_STATIC_KEY_FALSE(rdpmc_always_available_key); +DEFINE_STATIC_KEY_FALSE(perf_is_hybrid); + +/* + * This here uses DEFINE_STATIC_CALL_NULL() to get a static_call defined + * from just a typename, as opposed to an actual function. + */ +DEFINE_STATIC_CALL_NULL(x86_pmu_handle_irq, *x86_pmu.handle_irq); +DEFINE_STATIC_CALL_NULL(x86_pmu_disable_all, *x86_pmu.disable_all); +DEFINE_STATIC_CALL_NULL(x86_pmu_enable_all, *x86_pmu.enable_all); +DEFINE_STATIC_CALL_NULL(x86_pmu_enable, *x86_pmu.enable); +DEFINE_STATIC_CALL_NULL(x86_pmu_disable, *x86_pmu.disable); + +DEFINE_STATIC_CALL_NULL(x86_pmu_assign, *x86_pmu.assign); + +DEFINE_STATIC_CALL_NULL(x86_pmu_add, *x86_pmu.add); +DEFINE_STATIC_CALL_NULL(x86_pmu_del, *x86_pmu.del); +DEFINE_STATIC_CALL_NULL(x86_pmu_read, *x86_pmu.read); + +DEFINE_STATIC_CALL_NULL(x86_pmu_set_period, *x86_pmu.set_period); +DEFINE_STATIC_CALL_NULL(x86_pmu_update, *x86_pmu.update); +DEFINE_STATIC_CALL_NULL(x86_pmu_limit_period, *x86_pmu.limit_period); + +DEFINE_STATIC_CALL_NULL(x86_pmu_schedule_events, *x86_pmu.schedule_events); +DEFINE_STATIC_CALL_NULL(x86_pmu_get_event_constraints, *x86_pmu.get_event_constraints); +DEFINE_STATIC_CALL_NULL(x86_pmu_put_event_constraints, *x86_pmu.put_event_constraints); + +DEFINE_STATIC_CALL_NULL(x86_pmu_start_scheduling, *x86_pmu.start_scheduling); +DEFINE_STATIC_CALL_NULL(x86_pmu_commit_scheduling, *x86_pmu.commit_scheduling); +DEFINE_STATIC_CALL_NULL(x86_pmu_stop_scheduling, *x86_pmu.stop_scheduling); + +DEFINE_STATIC_CALL_NULL(x86_pmu_sched_task, *x86_pmu.sched_task); +DEFINE_STATIC_CALL_NULL(x86_pmu_swap_task_ctx, *x86_pmu.swap_task_ctx); + +DEFINE_STATIC_CALL_NULL(x86_pmu_drain_pebs, *x86_pmu.drain_pebs); +DEFINE_STATIC_CALL_NULL(x86_pmu_pebs_aliases, *x86_pmu.pebs_aliases); + +/* + * This one is magic, it will get called even when PMU init fails (because + * there is no PMU), in which case it should simply return NULL. + */ +DEFINE_STATIC_CALL_RET0(x86_pmu_guest_get_msrs, *x86_pmu.guest_get_msrs); u64 __read_mostly hw_cache_event_ids [PERF_COUNT_HW_CACHE_MAX] @@ -71,10 +115,9 @@ u64 x86_perf_event_update(struct perf_event *event) struct hw_perf_event *hwc = &event->hw; int shift = 64 - x86_pmu.cntval_bits; u64 prev_raw_count, new_raw_count; - int idx = hwc->idx; u64 delta; - if (idx == INTEL_PMC_IDX_FIXED_BTS) + if (unlikely(!hwc->event_base)) return 0; /* @@ -114,15 +157,16 @@ again: */ static int x86_pmu_extra_regs(u64 config, struct perf_event *event) { + struct extra_reg *extra_regs = hybrid(event->pmu, extra_regs); struct hw_perf_event_extra *reg; struct extra_reg *er; reg = &event->hw.extra_reg; - if (!x86_pmu.extra_regs) + if (!extra_regs) return 0; - for (er = x86_pmu.extra_regs; er->msr; er++) { + for (er = extra_regs; er->msr; er++) { if (er->event != (config & er->config_mask)) continue; if (event->attr.config1 & ~er->valid_mask) @@ -145,16 +189,29 @@ static DEFINE_MUTEX(pmc_reserve_mutex); #ifdef CONFIG_X86_LOCAL_APIC +static inline int get_possible_num_counters(void) +{ + int i, num_counters = x86_pmu.num_counters; + + if (!is_hybrid()) + return num_counters; + + for (i = 0; i < x86_pmu.num_hybrid_pmus; i++) + num_counters = max_t(int, num_counters, x86_pmu.hybrid_pmu[i].num_counters); + + return num_counters; +} + static bool reserve_pmc_hardware(void) { - int i; + int i, num_counters = get_possible_num_counters(); - for (i = 0; i < x86_pmu.num_counters; i++) { + for (i = 0; i < num_counters; i++) { if (!reserve_perfctr_nmi(x86_pmu_event_addr(i))) goto perfctr_fail; } - for (i = 0; i < x86_pmu.num_counters; i++) { + for (i = 0; i < num_counters; i++) { if (!reserve_evntsel_nmi(x86_pmu_config_addr(i))) goto eventsel_fail; } @@ -165,7 +222,7 @@ eventsel_fail: for (i--; i >= 0; i--) release_evntsel_nmi(x86_pmu_config_addr(i)); - i = x86_pmu.num_counters; + i = num_counters; perfctr_fail: for (i--; i >= 0; i--) @@ -176,9 +233,9 @@ perfctr_fail: static void release_pmc_hardware(void) { - int i; + int i, num_counters = get_possible_num_counters(); - for (i = 0; i < x86_pmu.num_counters; i++) { + for (i = 0; i < num_counters; i++) { release_perfctr_nmi(x86_pmu_event_addr(i)); release_evntsel_nmi(x86_pmu_config_addr(i)); } @@ -191,7 +248,7 @@ static void release_pmc_hardware(void) {} #endif -static bool check_hw_exists(void) +bool check_hw_exists(struct pmu *pmu, int num_counters, int num_counters_fixed) { u64 val, val_fail = -1, val_new= ~0; int i, reg, reg_fail = -1, ret = 0; @@ -202,7 +259,7 @@ static bool check_hw_exists(void) * Check to see if the BIOS enabled any of the counters, if so * complain and bail. */ - for (i = 0; i < x86_pmu.num_counters; i++) { + for (i = 0; i < num_counters; i++) { reg = x86_pmu_config_addr(i); ret = rdmsrl_safe(reg, &val); if (ret) @@ -216,13 +273,15 @@ static bool check_hw_exists(void) } } - if (x86_pmu.num_counters_fixed) { + if (num_counters_fixed) { reg = MSR_ARCH_PERFMON_FIXED_CTR_CTRL; ret = rdmsrl_safe(reg, &val); if (ret) goto msr_fail; - for (i = 0; i < x86_pmu.num_counters_fixed; i++) { - if (val & (0x03 << i*4)) { + for (i = 0; i < num_counters_fixed; i++) { + if (fixed_counter_disabled(i, pmu)) + continue; + if (val & (0x03ULL << i*4)) { bios_fail = 1; val_fail = val; reg_fail = reg; @@ -321,8 +380,7 @@ set_ext_hw_attr(struct hw_perf_event *hwc, struct perf_event *event) return -EINVAL; cache_result = array_index_nospec(cache_result, PERF_COUNT_HW_CACHE_RESULT_MAX); - val = hw_cache_event_ids[cache_type][cache_op][cache_result]; - + val = hybrid_var(event->pmu, hw_cache_event_ids)[cache_type][cache_op][cache_result]; if (val == 0) return -ENOENT; @@ -330,7 +388,7 @@ set_ext_hw_attr(struct hw_perf_event *hwc, struct perf_event *event) return -EINVAL; hwc->config |= val; - attr->config1 = hw_cache_extra_regs[cache_type][cache_op][cache_result]; + attr->config1 = hybrid_var(event->pmu, hw_cache_extra_regs)[cache_type][cache_op][cache_result]; return x86_pmu_extra_regs(val, event); } @@ -341,10 +399,12 @@ int x86_reserve_hardware(void) if (!atomic_inc_not_zero(&pmc_refcount)) { mutex_lock(&pmc_reserve_mutex); if (atomic_read(&pmc_refcount) == 0) { - if (!reserve_pmc_hardware()) + if (!reserve_pmc_hardware()) { err = -EBUSY; - else + } else { reserve_ds_buffers(); + reserve_lbr_buffers(); + } } if (!err) atomic_inc(&pmc_refcount); @@ -359,6 +419,7 @@ void x86_release_hardware(void) if (atomic_dec_and_mutex_lock(&pmc_refcount, &pmc_reserve_mutex)) { release_pmc_hardware(); release_ds_buffers(); + release_lbr_buffers(); mutex_unlock(&pmc_reserve_mutex); } } @@ -422,7 +483,7 @@ int x86_setup_perfctr(struct perf_event *event) local64_set(&hwc->period_left, hwc->sample_period); } - if (attr->type == PERF_TYPE_RAW) + if (attr->type == event->pmu->type) return x86_pmu_extra_regs(event->attr.config, event); if (attr->type == PERF_TYPE_HW_CACHE) @@ -557,12 +618,13 @@ int x86_pmu_hw_config(struct perf_event *event) if (!event->attr.exclude_kernel) event->hw.config |= ARCH_PERFMON_EVENTSEL_OS; - if (event->attr.type == PERF_TYPE_RAW) + if (event->attr.type == event->pmu->type) event->hw.config |= event->attr.config & X86_RAW_EVENT_MASK; if (event->attr.sample_period && x86_pmu.limit_period) { - if (x86_pmu.limit_period(event, event->attr.sample_period) > - event->attr.sample_period) + s64 left = event->attr.sample_period; + x86_pmu.limit_period(event, &left); + if (left > event->attr.sample_period) return -EINVAL; } @@ -633,6 +695,12 @@ void x86_pmu_disable_all(void) } } +struct perf_guest_switch_msr *perf_guest_get_msrs(int *nr, void *data) +{ + return static_call(x86_pmu_guest_get_msrs)(nr, data); +} +EXPORT_SYMBOL_GPL(perf_guest_get_msrs); + /* * There may be PMI landing after enabled=0. The PMI hitting could be before or * after disable_all. @@ -660,7 +728,7 @@ static void x86_pmu_disable(struct pmu *pmu) cpuc->enabled = 0; barrier(); - x86_pmu.disable_all(); + static_call(x86_pmu_disable_all)(); } void x86_pmu_enable_all(int added) @@ -678,16 +746,33 @@ void x86_pmu_enable_all(int added) } } -static struct pmu pmu; - static inline int is_x86_event(struct perf_event *event) { - return event->pmu == &pmu; + int i; + + if (!is_hybrid()) + return event->pmu == &pmu; + + for (i = 0; i < x86_pmu.num_hybrid_pmus; i++) { + if (event->pmu == &x86_pmu.hybrid_pmu[i].pmu) + return true; + } + + return false; } -struct pmu *x86_get_pmu(void) +struct pmu *x86_get_pmu(unsigned int cpu) { - return &pmu; + struct cpu_hw_events *cpuc = &per_cpu(cpu_hw_events, cpu); + + /* + * All CPUs of the hybrid type have been offline. + * The x86_get_pmu() should not be invoked. + */ + if (WARN_ON_ONCE(!cpuc->pmu)) + return &pmu; + + return cpuc->pmu; } /* * Event scheduler state: @@ -719,7 +804,7 @@ struct perf_sched { }; /* - * Initialize interator that runs through all events and counters. + * Initialize iterator that runs through all events and counters. */ static void perf_sched_init(struct perf_sched *sched, struct event_constraint **constraints, int num, int wmin, int wmax, int gpmax) @@ -890,6 +975,7 @@ EXPORT_SYMBOL_GPL(perf_assign_events); int x86_schedule_events(struct cpu_hw_events *cpuc, int n, int *assign) { + int num_counters = hybrid(cpuc->pmu, num_counters); struct event_constraint *c; struct perf_event *e; int n0, i, wmin, wmax, unsched = 0; @@ -907,8 +993,7 @@ int x86_schedule_events(struct cpu_hw_events *cpuc, int n, int *assign) if (cpuc->txn_flags & PERF_PMU_TXN_ADD) n0 -= cpuc->n_txn; - if (x86_pmu.start_scheduling) - x86_pmu.start_scheduling(cpuc); + static_call_cond(x86_pmu_start_scheduling)(cpuc); for (i = 0, wmin = X86_PMC_IDX_MAX, wmax = 0; i < n; i++) { c = cpuc->event_constraint[i]; @@ -925,7 +1010,7 @@ int x86_schedule_events(struct cpu_hw_events *cpuc, int n, int *assign) * change due to external factors (sibling state, allow_tfa). */ if (!c || (c->flags & PERF_X86_EVENT_DYNAMIC)) { - c = x86_pmu.get_event_constraints(cpuc, i, cpuc->event_list[i]); + c = static_call(x86_pmu_get_event_constraints)(cpuc, i, cpuc->event_list[i]); cpuc->event_constraint[i] = c; } @@ -966,7 +1051,7 @@ int x86_schedule_events(struct cpu_hw_events *cpuc, int n, int *assign) /* slow path */ if (i != n) { - int gpmax = x86_pmu.num_counters; + int gpmax = num_counters; /* * Do not allow scheduling of more than half the available @@ -987,7 +1072,7 @@ int x86_schedule_events(struct cpu_hw_events *cpuc, int n, int *assign) * the extra Merge events needed by large increment events. */ if (x86_pmu.flags & PMU_FL_PAIR) { - gpmax = x86_pmu.num_counters - cpuc->n_pair; + gpmax = num_counters - cpuc->n_pair; WARN_ON(gpmax <= 0); } @@ -1006,11 +1091,8 @@ int x86_schedule_events(struct cpu_hw_events *cpuc, int n, int *assign) * validate an event group (assign == NULL) */ if (!unsched && assign) { - for (i = 0; i < n; i++) { - e = cpuc->event_list[i]; - if (x86_pmu.commit_scheduling) - x86_pmu.commit_scheduling(cpuc, i, assign[i]); - } + for (i = 0; i < n; i++) + static_call_cond(x86_pmu_commit_scheduling)(cpuc, i, assign[i]); } else { for (i = n0; i < n; i++) { e = cpuc->event_list[i]; @@ -1018,29 +1100,69 @@ int x86_schedule_events(struct cpu_hw_events *cpuc, int n, int *assign) /* * release events that failed scheduling */ - if (x86_pmu.put_event_constraints) - x86_pmu.put_event_constraints(cpuc, e); + static_call_cond(x86_pmu_put_event_constraints)(cpuc, e); cpuc->event_constraint[i] = NULL; } } - if (x86_pmu.stop_scheduling) - x86_pmu.stop_scheduling(cpuc); + static_call_cond(x86_pmu_stop_scheduling)(cpuc); return unsched ? -EINVAL : 0; } +static int add_nr_metric_event(struct cpu_hw_events *cpuc, + struct perf_event *event) +{ + if (is_metric_event(event)) { + if (cpuc->n_metric == INTEL_TD_METRIC_NUM) + return -EINVAL; + cpuc->n_metric++; + cpuc->n_txn_metric++; + } + + return 0; +} + +static void del_nr_metric_event(struct cpu_hw_events *cpuc, + struct perf_event *event) +{ + if (is_metric_event(event)) + cpuc->n_metric--; +} + +static int collect_event(struct cpu_hw_events *cpuc, struct perf_event *event, + int max_count, int n) +{ + union perf_capabilities intel_cap = hybrid(cpuc->pmu, intel_cap); + + if (intel_cap.perf_metrics && add_nr_metric_event(cpuc, event)) + return -EINVAL; + + if (n >= max_count + cpuc->n_metric) + return -EINVAL; + + cpuc->event_list[n] = event; + if (is_counter_pair(&event->hw)) { + cpuc->n_pair++; + cpuc->n_txn_pair++; + } + + return 0; +} + /* * dogrp: true if must collect siblings events (group) * returns total number of events and error code */ static int collect_events(struct cpu_hw_events *cpuc, struct perf_event *leader, bool dogrp) { + int num_counters = hybrid(cpuc->pmu, num_counters); + int num_counters_fixed = hybrid(cpuc->pmu, num_counters_fixed); struct perf_event *event; int n, max_count; - max_count = x86_pmu.num_counters + x86_pmu.num_counters_fixed; + max_count = num_counters + num_counters_fixed; /* current number of events already accepted */ n = cpuc->n_events; @@ -1067,28 +1189,22 @@ static int collect_events(struct cpu_hw_events *cpuc, struct perf_event *leader, } if (is_x86_event(leader)) { - if (n >= max_count) + if (collect_event(cpuc, leader, max_count, n)) return -EINVAL; - cpuc->event_list[n] = leader; n++; - if (is_counter_pair(&leader->hw)) - cpuc->n_pair++; } + if (!dogrp) return n; for_each_sibling_event(event, leader) { - if (!is_x86_event(event) || - event->state <= PERF_EVENT_STATE_OFF) + if (!is_x86_event(event) || event->state <= PERF_EVENT_STATE_OFF) continue; - if (n >= max_count) + if (collect_event(cpuc, event, max_count, n)) return -EINVAL; - cpuc->event_list[n] = event; n++; - if (is_counter_pair(&event->hw)) - cpuc->n_pair++; } return n; } @@ -1097,22 +1213,38 @@ static inline void x86_assign_hw_event(struct perf_event *event, struct cpu_hw_events *cpuc, int i) { struct hw_perf_event *hwc = &event->hw; + int idx; - hwc->idx = cpuc->assign[i]; + idx = hwc->idx = cpuc->assign[i]; hwc->last_cpu = smp_processor_id(); hwc->last_tag = ++cpuc->tags[i]; - if (hwc->idx == INTEL_PMC_IDX_FIXED_BTS) { + static_call_cond(x86_pmu_assign)(event, idx); + + switch (hwc->idx) { + case INTEL_PMC_IDX_FIXED_BTS: + case INTEL_PMC_IDX_FIXED_VLBR: hwc->config_base = 0; hwc->event_base = 0; - } else if (hwc->idx >= INTEL_PMC_IDX_FIXED) { + break; + + case INTEL_PMC_IDX_METRIC_BASE ... INTEL_PMC_IDX_METRIC_END: + /* All the metric events are mapped onto the fixed counter 3. */ + idx = INTEL_PMC_IDX_FIXED_SLOTS; + fallthrough; + case INTEL_PMC_IDX_FIXED ... INTEL_PMC_IDX_FIXED_BTS-1: hwc->config_base = MSR_ARCH_PERFMON_FIXED_CTR_CTRL; - hwc->event_base = MSR_ARCH_PERFMON_FIXED_CTR0 + (hwc->idx - INTEL_PMC_IDX_FIXED); - hwc->event_base_rdpmc = (hwc->idx - INTEL_PMC_IDX_FIXED) | 1<<30; - } else { + hwc->event_base = MSR_ARCH_PERFMON_FIXED_CTR0 + + (idx - INTEL_PMC_IDX_FIXED); + hwc->event_base_rdpmc = (idx - INTEL_PMC_IDX_FIXED) | + INTEL_PMC_FIXED_RDPMC_BASE; + break; + + default: hwc->config_base = x86_pmu_config_addr(hwc->idx); hwc->event_base = x86_pmu_event_addr(hwc->idx); hwc->event_base_rdpmc = x86_pmu_rdpmc_index(hwc->idx); + break; } } @@ -1208,6 +1340,10 @@ static void x86_pmu_enable(struct pmu *pmu) if (hwc->state & PERF_HES_ARCH) continue; + /* + * if cpuc->enabled = 0, then no wrmsr as + * per x86_pmu_enable_event() + */ x86_pmu_start(event, PERF_EF_RELOAD); } cpuc->n_added = 0; @@ -1217,10 +1353,10 @@ static void x86_pmu_enable(struct pmu *pmu) cpuc->enabled = 1; barrier(); - x86_pmu.enable_all(added); + static_call(x86_pmu_enable_all)(added); } -static DEFINE_PER_CPU(u64 [X86_PMC_IDX_MAX], pmc_prev_left); +DEFINE_PER_CPU(u64 [X86_PMC_IDX_MAX], pmc_prev_left); /* * Set the next IRQ period, based on the hwc->period_left value. @@ -1233,7 +1369,7 @@ int x86_perf_event_set_period(struct perf_event *event) s64 period = hwc->sample_period; int ret = 0, idx = hwc->idx; - if (idx == INTEL_PMC_IDX_FIXED_BTS) + if (unlikely(!hwc->event_base)) return 0; /* @@ -1261,10 +1397,9 @@ int x86_perf_event_set_period(struct perf_event *event) if (left > x86_pmu.max_period) left = x86_pmu.max_period; - if (x86_pmu.limit_period) - left = x86_pmu.limit_period(event, left); + static_call_cond(x86_pmu_limit_period)(event, &left); - per_cpu(pmc_prev_left[idx], smp_processor_id()) = left; + this_cpu_write(pmc_prev_left[idx], left); /* * The hw event starts counting from this event offset, @@ -1275,21 +1410,11 @@ int x86_perf_event_set_period(struct perf_event *event) wrmsrl(hwc->event_base, (u64)(-left) & x86_pmu.cntval_mask); /* - * Clear the Merge event counter's upper 16 bits since + * Sign extend the Merge event counter's upper 16 bits since * we currently declare a 48-bit counter width */ if (is_counter_pair(hwc)) - wrmsrl(x86_pmu_event_addr(idx + 1), 0); - - /* - * Due to erratum on certan cpu we need - * a second write to be sure the register - * is updated properly - */ - if (x86_pmu.perfctr_second_write) { - wrmsrl(hwc->event_base, - (u64)(-left) & x86_pmu.cntval_mask); - } + wrmsrl(x86_pmu_event_addr(idx + 1), 0xffff); perf_event_update_userpage(event); @@ -1338,7 +1463,7 @@ static int x86_pmu_add(struct perf_event *event, int flags) if (cpuc->txn_flags & PERF_PMU_TXN_ADD) goto done_collect; - ret = x86_pmu.schedule_events(cpuc, n, assign); + ret = static_call(x86_pmu_schedule_events)(cpuc, n, assign); if (ret) goto out; /* @@ -1356,13 +1481,11 @@ done_collect: cpuc->n_added += n - n0; cpuc->n_txn += n - n0; - if (x86_pmu.add) { - /* - * This is before x86_pmu_enable() will call x86_pmu_start(), - * so we enable LBRs before an event needs them etc.. - */ - x86_pmu.add(event); - } + /* + * This is before x86_pmu_enable() will call x86_pmu_start(), + * so we enable LBRs before an event needs them etc.. + */ + static_call_cond(x86_pmu_add)(event); ret = 0; out: @@ -1382,15 +1505,14 @@ static void x86_pmu_start(struct perf_event *event, int flags) if (flags & PERF_EF_RELOAD) { WARN_ON_ONCE(!(event->hw.state & PERF_HES_UPTODATE)); - x86_perf_event_set_period(event); + static_call(x86_pmu_set_period)(event); } event->hw.state = 0; cpuc->events[idx] = event; __set_bit(idx, cpuc->active_mask); - __set_bit(idx, cpuc->running); - x86_pmu.enable(event); + static_call(x86_pmu_enable)(event); perf_event_update_userpage(event); } @@ -1398,18 +1520,19 @@ void perf_event_print_debug(void) { u64 ctrl, status, overflow, pmc_ctrl, pmc_count, prev_left, fixed; u64 pebs, debugctl; - struct cpu_hw_events *cpuc; + int cpu = smp_processor_id(); + struct cpu_hw_events *cpuc = &per_cpu(cpu_hw_events, cpu); + int num_counters = hybrid(cpuc->pmu, num_counters); + int num_counters_fixed = hybrid(cpuc->pmu, num_counters_fixed); + struct event_constraint *pebs_constraints = hybrid(cpuc->pmu, pebs_constraints); unsigned long flags; - int cpu, idx; + int idx; - if (!x86_pmu.num_counters) + if (!num_counters) return; local_irq_save(flags); - cpu = smp_processor_id(); - cpuc = &per_cpu(cpu_hw_events, cpu); - if (x86_pmu.version >= 2) { rdmsrl(MSR_CORE_PERF_GLOBAL_CTRL, ctrl); rdmsrl(MSR_CORE_PERF_GLOBAL_STATUS, status); @@ -1421,7 +1544,7 @@ void perf_event_print_debug(void) pr_info("CPU#%d: status: %016llx\n", cpu, status); pr_info("CPU#%d: overflow: %016llx\n", cpu, overflow); pr_info("CPU#%d: fixed: %016llx\n", cpu, fixed); - if (x86_pmu.pebs_constraints) { + if (pebs_constraints) { rdmsrl(MSR_IA32_PEBS_ENABLE, pebs); pr_info("CPU#%d: pebs: %016llx\n", cpu, pebs); } @@ -1432,7 +1555,7 @@ void perf_event_print_debug(void) } pr_info("CPU#%d: active: %016llx\n", cpu, *(u64 *)cpuc->active_mask); - for (idx = 0; idx < x86_pmu.num_counters; idx++) { + for (idx = 0; idx < num_counters; idx++) { rdmsrl(x86_pmu_config_addr(idx), pmc_ctrl); rdmsrl(x86_pmu_event_addr(idx), pmc_count); @@ -1445,7 +1568,9 @@ void perf_event_print_debug(void) pr_info("CPU#%d: gen-PMC%d left: %016llx\n", cpu, idx, prev_left); } - for (idx = 0; idx < x86_pmu.num_counters_fixed; idx++) { + for (idx = 0; idx < num_counters_fixed; idx++) { + if (fixed_counter_disabled(idx, cpuc->pmu)) + continue; rdmsrl(MSR_ARCH_PERFMON_FIXED_CTR0 + idx, pmc_count); pr_info("CPU#%d: fixed-PMC%d count: %016llx\n", @@ -1460,7 +1585,7 @@ void x86_pmu_stop(struct perf_event *event, int flags) struct hw_perf_event *hwc = &event->hw; if (test_bit(hwc->idx, cpuc->active_mask)) { - x86_pmu.disable(event); + static_call(x86_pmu_disable)(event); __clear_bit(hwc->idx, cpuc->active_mask); cpuc->events[hwc->idx] = NULL; WARN_ON_ONCE(hwc->state & PERF_HES_STOPPED); @@ -1472,7 +1597,7 @@ void x86_pmu_stop(struct perf_event *event, int flags) * Drain the remaining delta count out of a event * that we are disabling: */ - x86_perf_event_update(event); + static_call(x86_pmu_update)(event); hwc->state |= PERF_HES_UPTODATE; } } @@ -1480,6 +1605,7 @@ void x86_pmu_stop(struct perf_event *event, int flags) static void x86_pmu_del(struct perf_event *event, int flags) { struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events); + union perf_capabilities intel_cap = hybrid(cpuc->pmu, intel_cap); int i; /* @@ -1493,6 +1619,8 @@ static void x86_pmu_del(struct perf_event *event, int flags) if (cpuc->txn_flags & PERF_PMU_TXN_ADD) goto do_del; + __set_bit(event->hw.idx, cpuc->dirty); + /* * Not a TXN, therefore cleanup properly. */ @@ -1510,8 +1638,7 @@ static void x86_pmu_del(struct perf_event *event, int flags) if (i >= cpuc->n_events - cpuc->n_added) --cpuc->n_added; - if (x86_pmu.put_event_constraints) - x86_pmu.put_event_constraints(cpuc, event); + static_call_cond(x86_pmu_put_event_constraints)(cpuc, event); /* Delete the array entry. */ while (++i < cpuc->n_events) { @@ -1520,17 +1647,18 @@ static void x86_pmu_del(struct perf_event *event, int flags) } cpuc->event_constraint[i-1] = NULL; --cpuc->n_events; + if (intel_cap.perf_metrics) + del_nr_metric_event(cpuc, event); perf_event_update_userpage(event); do_del: - if (x86_pmu.del) { - /* - * This is after x86_pmu_stop(); so we disable LBRs after any - * event can need them etc.. - */ - x86_pmu.del(event); - } + + /* + * This is after x86_pmu_stop(); so we disable LBRs after any + * event can need them etc.. + */ + static_call_cond(x86_pmu_del)(event); } int x86_pmu_handle_irq(struct pt_regs *regs) @@ -1559,7 +1687,7 @@ int x86_pmu_handle_irq(struct pt_regs *regs) event = cpuc->events[idx]; - val = x86_perf_event_update(event); + val = static_call(x86_pmu_update)(event); if (val & (1ULL << (x86_pmu.cntval_bits - 1))) continue; @@ -1567,11 +1695,17 @@ int x86_pmu_handle_irq(struct pt_regs *regs) * event overflow */ handled++; - perf_sample_data_init(&data, 0, event->hw.last_period); - if (!x86_perf_event_set_period(event)) + if (!static_call(x86_pmu_set_period)(event)) continue; + perf_sample_data_init(&data, 0, event->hw.last_period); + + if (has_branch_stack(event)) { + data.br_stack = &cpuc->lbr_stack; + data.sample_flags |= PERF_SAMPLE_BRANCH_STACK; + } + if (perf_event_overflow(event, &data, regs)) x86_pmu_stop(event, 0); } @@ -1608,7 +1742,7 @@ perf_event_nmi_handler(unsigned int cmd, struct pt_regs *regs) return NMI_DONE; start_clock = sched_clock(); - ret = x86_pmu.handle_irq(regs); + ret = static_call(x86_pmu_handle_irq)(regs); finish_clock = sched_clock(); perf_sample_event_took(finish_clock - start_clock); @@ -1700,7 +1834,7 @@ ssize_t events_sysfs_show(struct device *dev, struct device_attribute *attr, cha /* string trumps id */ if (pmu_attr->event_str) - return sprintf(page, "%s", pmu_attr->event_str); + return sprintf(page, "%s\n", pmu_attr->event_str); return x86_pmu.events_sysfs_show(page, config); } @@ -1729,6 +1863,49 @@ ssize_t events_ht_sysfs_show(struct device *dev, struct device_attribute *attr, pmu_attr->event_str_noht); } +ssize_t events_hybrid_sysfs_show(struct device *dev, + struct device_attribute *attr, + char *page) +{ + struct perf_pmu_events_hybrid_attr *pmu_attr = + container_of(attr, struct perf_pmu_events_hybrid_attr, attr); + struct x86_hybrid_pmu *pmu; + const char *str, *next_str; + int i; + + if (hweight64(pmu_attr->pmu_type) == 1) + return sprintf(page, "%s", pmu_attr->event_str); + + /* + * Hybrid PMUs may support the same event name, but with different + * event encoding, e.g., the mem-loads event on an Atom PMU has + * different event encoding from a Core PMU. + * + * The event_str includes all event encodings. Each event encoding + * is divided by ";". The order of the event encodings must follow + * the order of the hybrid PMU index. + */ + pmu = container_of(dev_get_drvdata(dev), struct x86_hybrid_pmu, pmu); + + str = pmu_attr->event_str; + for (i = 0; i < x86_pmu.num_hybrid_pmus; i++) { + if (!(x86_pmu.hybrid_pmu[i].cpu_type & pmu_attr->pmu_type)) + continue; + if (x86_pmu.hybrid_pmu[i].cpu_type & pmu->cpu_type) { + next_str = strchr(str, ';'); + if (next_str) + return snprintf(page, next_str - str + 1, "%s", str); + else + return sprintf(page, "%s", str); + } + str = strchr(str, ';'); + str++; + } + + return 0; +} +EXPORT_SYMBOL_GPL(events_hybrid_sysfs_show); + EVENT_ATTR(cpu-cycles, CPU_CYCLES ); EVENT_ATTR(instructions, INSTRUCTIONS ); EVENT_ATTR(cache-references, CACHE_REFERENCES ); @@ -1821,6 +1998,77 @@ ssize_t x86_event_sysfs_show(char *page, u64 config, u64 event) static struct attribute_group x86_pmu_attr_group; static struct attribute_group x86_pmu_caps_group; +static void x86_pmu_static_call_update(void) +{ + static_call_update(x86_pmu_handle_irq, x86_pmu.handle_irq); + static_call_update(x86_pmu_disable_all, x86_pmu.disable_all); + static_call_update(x86_pmu_enable_all, x86_pmu.enable_all); + static_call_update(x86_pmu_enable, x86_pmu.enable); + static_call_update(x86_pmu_disable, x86_pmu.disable); + + static_call_update(x86_pmu_assign, x86_pmu.assign); + + static_call_update(x86_pmu_add, x86_pmu.add); + static_call_update(x86_pmu_del, x86_pmu.del); + static_call_update(x86_pmu_read, x86_pmu.read); + + static_call_update(x86_pmu_set_period, x86_pmu.set_period); + static_call_update(x86_pmu_update, x86_pmu.update); + static_call_update(x86_pmu_limit_period, x86_pmu.limit_period); + + static_call_update(x86_pmu_schedule_events, x86_pmu.schedule_events); + static_call_update(x86_pmu_get_event_constraints, x86_pmu.get_event_constraints); + static_call_update(x86_pmu_put_event_constraints, x86_pmu.put_event_constraints); + + static_call_update(x86_pmu_start_scheduling, x86_pmu.start_scheduling); + static_call_update(x86_pmu_commit_scheduling, x86_pmu.commit_scheduling); + static_call_update(x86_pmu_stop_scheduling, x86_pmu.stop_scheduling); + + static_call_update(x86_pmu_sched_task, x86_pmu.sched_task); + static_call_update(x86_pmu_swap_task_ctx, x86_pmu.swap_task_ctx); + + static_call_update(x86_pmu_drain_pebs, x86_pmu.drain_pebs); + static_call_update(x86_pmu_pebs_aliases, x86_pmu.pebs_aliases); + + static_call_update(x86_pmu_guest_get_msrs, x86_pmu.guest_get_msrs); +} + +static void _x86_pmu_read(struct perf_event *event) +{ + static_call(x86_pmu_update)(event); +} + +void x86_pmu_show_pmu_cap(int num_counters, int num_counters_fixed, + u64 intel_ctrl) +{ + pr_info("... version: %d\n", x86_pmu.version); + pr_info("... bit width: %d\n", x86_pmu.cntval_bits); + pr_info("... generic registers: %d\n", num_counters); + pr_info("... value mask: %016Lx\n", x86_pmu.cntval_mask); + pr_info("... max period: %016Lx\n", x86_pmu.max_period); + pr_info("... fixed-purpose events: %lu\n", + hweight64((((1ULL << num_counters_fixed) - 1) + << INTEL_PMC_IDX_FIXED) & intel_ctrl)); + pr_info("... event mask: %016Lx\n", intel_ctrl); +} + +/* + * The generic code is not hybrid friendly. The hybrid_pmu->pmu + * of the first registered PMU is unconditionally assigned to + * each possible cpuctx->ctx.pmu. + * Update the correct hybrid PMU to the cpuctx->ctx.pmu. + */ +void x86_pmu_update_cpu_context(struct pmu *pmu, int cpu) +{ + struct perf_cpu_context *cpuctx; + + if (!pmu->pmu_cpu_context) + return; + + cpuctx = per_cpu_ptr(pmu->pmu_cpu_context, cpu); + cpuctx->ctx.pmu = pmu; +} + static int __init init_hw_perf_events(void) { struct x86_pmu_quirk *quirk; @@ -1839,19 +2087,24 @@ static int __init init_hw_perf_events(void) err = amd_pmu_init(); x86_pmu.name = "HYGON"; break; + case X86_VENDOR_ZHAOXIN: + case X86_VENDOR_CENTAUR: + err = zhaoxin_pmu_init(); + break; default: err = -ENOTSUPP; } if (err != 0) { pr_cont("no PMU driver, software events only.\n"); - return 0; + err = 0; + goto out_bad_pmu; } pmu_check_apic(); /* sanity check that the hardware exists or is emulated */ - if (!check_hw_exists()) - return 0; + if (!check_hw_exists(&pmu, x86_pmu.num_counters, x86_pmu.num_counters_fixed)) + goto out_bad_pmu; pr_cont("%s PMU driver.\n", x86_pmu.name); @@ -1877,13 +2130,25 @@ static int __init init_hw_perf_events(void) pmu.attr_update = x86_pmu.attr_update; - pr_info("... version: %d\n", x86_pmu.version); - pr_info("... bit width: %d\n", x86_pmu.cntval_bits); - pr_info("... generic registers: %d\n", x86_pmu.num_counters); - pr_info("... value mask: %016Lx\n", x86_pmu.cntval_mask); - pr_info("... max period: %016Lx\n", x86_pmu.max_period); - pr_info("... fixed-purpose events: %d\n", x86_pmu.num_counters_fixed); - pr_info("... event mask: %016Lx\n", x86_pmu.intel_ctrl); + if (!is_hybrid()) { + x86_pmu_show_pmu_cap(x86_pmu.num_counters, + x86_pmu.num_counters_fixed, + x86_pmu.intel_ctrl); + } + + if (!x86_pmu.read) + x86_pmu.read = _x86_pmu_read; + + if (!x86_pmu.guest_get_msrs) + x86_pmu.guest_get_msrs = (void *)&__static_call_return0; + + if (!x86_pmu.set_period) + x86_pmu.set_period = x86_perf_event_set_period; + + if (!x86_pmu.update) + x86_pmu.update = x86_perf_event_update; + + x86_pmu_static_call_update(); /* * Install callbacks. Core will call them for each online @@ -1905,9 +2170,46 @@ static int __init init_hw_perf_events(void) if (err) goto out1; - err = perf_pmu_register(&pmu, "cpu", PERF_TYPE_RAW); - if (err) - goto out2; + if (!is_hybrid()) { + err = perf_pmu_register(&pmu, "cpu", PERF_TYPE_RAW); + if (err) + goto out2; + } else { + u8 cpu_type = get_this_hybrid_cpu_type(); + struct x86_hybrid_pmu *hybrid_pmu; + int i, j; + + if (!cpu_type && x86_pmu.get_hybrid_cpu_type) + cpu_type = x86_pmu.get_hybrid_cpu_type(); + + for (i = 0; i < x86_pmu.num_hybrid_pmus; i++) { + hybrid_pmu = &x86_pmu.hybrid_pmu[i]; + + hybrid_pmu->pmu = pmu; + hybrid_pmu->pmu.type = -1; + hybrid_pmu->pmu.attr_update = x86_pmu.attr_update; + hybrid_pmu->pmu.capabilities |= PERF_PMU_CAP_HETEROGENEOUS_CPUS; + hybrid_pmu->pmu.capabilities |= PERF_PMU_CAP_EXTENDED_HW_TYPE; + + err = perf_pmu_register(&hybrid_pmu->pmu, hybrid_pmu->name, + (hybrid_pmu->cpu_type == hybrid_big) ? PERF_TYPE_RAW : -1); + if (err) + break; + + if (cpu_type == hybrid_pmu->cpu_type) + x86_pmu_update_cpu_context(&hybrid_pmu->pmu, raw_smp_processor_id()); + } + + if (i < x86_pmu.num_hybrid_pmus) { + for (j = 0; j < i; j++) + perf_pmu_unregister(&x86_pmu.hybrid_pmu[j].pmu); + pr_warn("Failed to register hybrid PMUs\n"); + kfree(x86_pmu.hybrid_pmu); + x86_pmu.hybrid_pmu = NULL; + x86_pmu.num_hybrid_pmus = 0; + goto out2; + } + } return 0; @@ -1917,15 +2219,15 @@ out1: cpuhp_remove_state(CPUHP_AP_PERF_X86_STARTING); out: cpuhp_remove_state(CPUHP_PERF_X86_PREPARE); +out_bad_pmu: + memset(&x86_pmu, 0, sizeof(x86_pmu)); return err; } early_initcall(init_hw_perf_events); -static inline void x86_pmu_read(struct perf_event *event) +static void x86_pmu_read(struct perf_event *event) { - if (x86_pmu.read) - return x86_pmu.read(event); - x86_perf_event_update(event); + static_call(x86_pmu_read)(event); } /* @@ -1949,6 +2251,8 @@ static void x86_pmu_start_txn(struct pmu *pmu, unsigned int txn_flags) perf_pmu_disable(pmu); __this_cpu_write(cpu_hw_events.n_txn, 0); + __this_cpu_write(cpu_hw_events.n_txn_pair, 0); + __this_cpu_write(cpu_hw_events.n_txn_metric, 0); } /* @@ -1974,6 +2278,8 @@ static void x86_pmu_cancel_txn(struct pmu *pmu) */ __this_cpu_sub(cpu_hw_events.n_added, __this_cpu_read(cpu_hw_events.n_txn)); __this_cpu_sub(cpu_hw_events.n_events, __this_cpu_read(cpu_hw_events.n_txn)); + __this_cpu_sub(cpu_hw_events.n_pair, __this_cpu_read(cpu_hw_events.n_txn_pair)); + __this_cpu_sub(cpu_hw_events.n_metric, __this_cpu_read(cpu_hw_events.n_txn_metric)); perf_pmu_enable(pmu); } @@ -2002,7 +2308,7 @@ static int x86_pmu_commit_txn(struct pmu *pmu) if (!x86_pmu_initialized()) return -EAGAIN; - ret = x86_pmu.schedule_events(cpuc, n, assign); + ret = static_call(x86_pmu_schedule_events)(cpuc, n, assign); if (ret) return ret; @@ -2030,16 +2336,27 @@ static void free_fake_cpuc(struct cpu_hw_events *cpuc) kfree(cpuc); } -static struct cpu_hw_events *allocate_fake_cpuc(void) +static struct cpu_hw_events *allocate_fake_cpuc(struct pmu *event_pmu) { struct cpu_hw_events *cpuc; - int cpu = raw_smp_processor_id(); + int cpu; cpuc = kzalloc(sizeof(*cpuc), GFP_KERNEL); if (!cpuc) return ERR_PTR(-ENOMEM); cpuc->is_fake = 1; + if (is_hybrid()) { + struct x86_hybrid_pmu *h_pmu; + + h_pmu = hybrid_pmu(event_pmu); + if (cpumask_empty(&h_pmu->supported_cpus)) + goto error; + cpu = cpumask_first(&h_pmu->supported_cpus); + } else + cpu = raw_smp_processor_id(); + cpuc->pmu = event_pmu; + if (intel_cpuc_prepare(cpuc, cpu)) goto error; @@ -2058,7 +2375,7 @@ static int validate_event(struct perf_event *event) struct event_constraint *c; int ret = 0; - fake_cpuc = allocate_fake_cpuc(); + fake_cpuc = allocate_fake_cpuc(event->pmu); if (IS_ERR(fake_cpuc)) return PTR_ERR(fake_cpuc); @@ -2092,7 +2409,27 @@ static int validate_group(struct perf_event *event) struct cpu_hw_events *fake_cpuc; int ret = -EINVAL, n; - fake_cpuc = allocate_fake_cpuc(); + /* + * Reject events from different hybrid PMUs. + */ + if (is_hybrid()) { + struct perf_event *sibling; + struct pmu *pmu = NULL; + + if (is_x86_event(leader)) + pmu = leader->pmu; + + for_each_sibling_event(sibling, leader) { + if (!is_x86_event(sibling)) + continue; + if (!pmu) + pmu = sibling->pmu; + else if (pmu != sibling->pmu) + return ret; + } + } + + fake_cpuc = allocate_fake_cpuc(event->pmu); if (IS_ERR(fake_cpuc)) return PTR_ERR(fake_cpuc); /* @@ -2120,56 +2457,70 @@ out: static int x86_pmu_event_init(struct perf_event *event) { - struct pmu *tmp; + struct x86_hybrid_pmu *pmu = NULL; int err; - switch (event->attr.type) { - case PERF_TYPE_RAW: - case PERF_TYPE_HARDWARE: - case PERF_TYPE_HW_CACHE: - break; - - default: + if ((event->attr.type != event->pmu->type) && + (event->attr.type != PERF_TYPE_HARDWARE) && + (event->attr.type != PERF_TYPE_HW_CACHE)) return -ENOENT; + + if (is_hybrid() && (event->cpu != -1)) { + pmu = hybrid_pmu(event->pmu); + if (!cpumask_test_cpu(event->cpu, &pmu->supported_cpus)) + return -ENOENT; } err = __x86_pmu_event_init(event); if (!err) { - /* - * we temporarily connect event to its pmu - * such that validate_group() can classify - * it as an x86 event using is_x86_event() - */ - tmp = event->pmu; - event->pmu = &pmu; - if (event->group_leader != event) err = validate_group(event); else err = validate_event(event); - - event->pmu = tmp; } if (err) { if (event->destroy) event->destroy(event); + event->destroy = NULL; } if (READ_ONCE(x86_pmu.attr_rdpmc) && !(event->hw.flags & PERF_X86_EVENT_LARGE_PEBS)) - event->hw.flags |= PERF_X86_EVENT_RDPMC_ALLOWED; + event->hw.flags |= PERF_EVENT_FLAG_USER_READ_CNT; return err; } -static void refresh_pce(void *ignored) +void perf_clear_dirty_counters(void) { - load_mm_cr4_irqsoff(this_cpu_read(cpu_tlbstate.loaded_mm)); + struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events); + int i; + + /* Don't need to clear the assigned counter. */ + for (i = 0; i < cpuc->n_events; i++) + __clear_bit(cpuc->assign[i], cpuc->dirty); + + if (bitmap_empty(cpuc->dirty, X86_PMC_IDX_MAX)) + return; + + for_each_set_bit(i, cpuc->dirty, X86_PMC_IDX_MAX) { + if (i >= INTEL_PMC_IDX_FIXED) { + /* Metrics and fake events don't have corresponding HW counters. */ + if ((i - INTEL_PMC_IDX_FIXED) >= hybrid(cpuc->pmu, num_counters_fixed)) + continue; + + wrmsrl(MSR_ARCH_PERFMON_FIXED_CTR0 + (i - INTEL_PMC_IDX_FIXED), 0); + } else { + wrmsrl(x86_pmu_event_addr(i), 0); + } + } + + bitmap_zero(cpuc->dirty, X86_PMC_IDX_MAX); } static void x86_pmu_event_mapped(struct perf_event *event, struct mm_struct *mm) { - if (!(event->hw.flags & PERF_X86_EVENT_RDPMC_ALLOWED)) + if (!(event->hw.flags & PERF_EVENT_FLAG_USER_READ_CNT)) return; /* @@ -2179,38 +2530,35 @@ static void x86_pmu_event_mapped(struct perf_event *event, struct mm_struct *mm) * userspace with CR4.PCE clear while another task is still * doing on_each_cpu_mask() to propagate CR4.PCE. * - * For now, this can't happen because all callers hold mmap_sem + * For now, this can't happen because all callers hold mmap_lock * for write. If this changes, we'll need a different solution. */ - lockdep_assert_held_write(&mm->mmap_sem); + mmap_assert_write_locked(mm); if (atomic_inc_return(&mm->context.perf_rdpmc_allowed) == 1) - on_each_cpu_mask(mm_cpumask(mm), refresh_pce, NULL, 1); + on_each_cpu_mask(mm_cpumask(mm), cr4_update_pce, NULL, 1); } static void x86_pmu_event_unmapped(struct perf_event *event, struct mm_struct *mm) { - - if (!(event->hw.flags & PERF_X86_EVENT_RDPMC_ALLOWED)) + if (!(event->hw.flags & PERF_EVENT_FLAG_USER_READ_CNT)) return; if (atomic_dec_and_test(&mm->context.perf_rdpmc_allowed)) - on_each_cpu_mask(mm_cpumask(mm), refresh_pce, NULL, 1); + on_each_cpu_mask(mm_cpumask(mm), cr4_update_pce, NULL, 1); } static int x86_pmu_event_idx(struct perf_event *event) { - int idx = event->hw.idx; + struct hw_perf_event *hwc = &event->hw; - if (!(event->hw.flags & PERF_X86_EVENT_RDPMC_ALLOWED)) + if (!(hwc->flags & PERF_EVENT_FLAG_USER_READ_CNT)) return 0; - if (x86_pmu.num_counters_fixed && idx >= INTEL_PMC_IDX_FIXED) { - idx -= INTEL_PMC_IDX_FIXED; - idx |= 1 << 30; - } - - return idx + 1; + if (is_metric_idx(hwc->idx)) + return INTEL_PMC_FIXED_RDPMC_METRICS + 1; + else + return hwc->event_base_rdpmc + 1; } static ssize_t get_attr_rdpmc(struct device *cdev, @@ -2253,7 +2601,7 @@ static ssize_t set_attr_rdpmc(struct device *cdev, else if (x86_pmu.attr_rdpmc == 2) static_branch_dec(&rdpmc_always_available_key); - on_each_cpu(refresh_pce, NULL, 1); + on_each_cpu(cr4_update_pce, NULL, 1); x86_pmu.attr_rdpmc = val; } @@ -2300,15 +2648,13 @@ static const struct attribute_group *x86_pmu_attr_groups[] = { static void x86_pmu_sched_task(struct perf_event_context *ctx, bool sched_in) { - if (x86_pmu.sched_task) - x86_pmu.sched_task(ctx, sched_in); + static_call_cond(x86_pmu_sched_task)(ctx, sched_in); } static void x86_pmu_swap_task_ctx(struct perf_event_context *prev, struct perf_event_context *next) { - if (x86_pmu.swap_task_ctx) - x86_pmu.swap_task_ctx(prev, next); + static_call_cond(x86_pmu_swap_task_ctx)(prev, next); } void perf_check_microcode(void) @@ -2323,7 +2669,9 @@ static int x86_pmu_check_period(struct perf_event *event, u64 value) return -EINVAL; if (value && x86_pmu.limit_period) { - if (x86_pmu.limit_period(event, value) > value) + s64 left = value; + x86_pmu.limit_period(event, &left); + if (left > value) return -EINVAL; } @@ -2341,6 +2689,14 @@ static int x86_pmu_aux_output_match(struct perf_event *event) return 0; } +static int x86_pmu_filter_match(struct perf_event *event) +{ + if (x86_pmu.filter_match) + return x86_pmu.filter_match(event); + + return 1; +} + static struct pmu pmu = { .pmu_enable = x86_pmu_enable, .pmu_disable = x86_pmu_disable, @@ -2364,11 +2720,12 @@ static struct pmu pmu = { .event_idx = x86_pmu_event_idx, .sched_task = x86_pmu_sched_task, - .task_ctx_size = sizeof(struct x86_perf_task_context), .swap_task_ctx = x86_pmu_swap_task_ctx, .check_period = x86_pmu_check_period, .aux_output_match = x86_pmu_aux_output_match, + + .filter_match = x86_pmu_filter_match, }; void arch_perf_update_userpage(struct perf_event *event, @@ -2380,7 +2737,7 @@ void arch_perf_update_userpage(struct perf_event *event, userpg->cap_user_time = 0; userpg->cap_user_time_zero = 0; userpg->cap_user_rdpmc = - !!(event->hw.flags & PERF_X86_EVENT_RDPMC_ALLOWED); + !!(event->hw.flags & PERF_EVENT_FLAG_USER_READ_CNT); userpg->pmc_width = x86_pmu.cntval_bits; if (!using_native_sched_clock() || !sched_clock_stable()) @@ -2426,7 +2783,7 @@ perf_callchain_kernel(struct perf_callchain_entry_ctx *entry, struct pt_regs *re struct unwind_state state; unsigned long addr; - if (perf_guest_cbs && perf_guest_cbs->is_in_guest()) { + if (perf_guest_state()) { /* TODO: We don't support guest os callchain now */ return; } @@ -2449,7 +2806,7 @@ perf_callchain_kernel(struct perf_callchain_entry_ctx *entry, struct pt_regs *re static inline int valid_user_frame(const void __user *fp, unsigned long size) { - return (__range_not_ok(fp, size, TASK_SIZE) == 0); + return __access_ok(fp, size); } static unsigned long get_segment_base(unsigned int segment) @@ -2490,9 +2847,9 @@ perf_callchain_user32(struct pt_regs *regs, struct perf_callchain_entry_ctx *ent /* 32-bit process in 64-bit kernel. */ unsigned long ss_base, cs_base; struct stack_frame_ia32 frame; - const void __user *fp; + const struct stack_frame_ia32 __user *fp; - if (!test_thread_flag(TIF_IA32)) + if (user_64bit_mode(regs)) return 0; cs_base = get_segment_base(regs->cs); @@ -2501,18 +2858,12 @@ perf_callchain_user32(struct pt_regs *regs, struct perf_callchain_entry_ctx *ent fp = compat_ptr(ss_base + regs->bp); pagefault_disable(); while (entry->nr < entry->max_stack) { - unsigned long bytes; - frame.next_frame = 0; - frame.return_address = 0; - if (!valid_user_frame(fp, sizeof(frame))) break; - bytes = __copy_from_user_nmi(&frame.next_frame, fp, 4); - if (bytes != 0) + if (__get_user(frame.next_frame, &fp->next_frame)) break; - bytes = __copy_from_user_nmi(&frame.return_address, fp+4, 4); - if (bytes != 0) + if (__get_user(frame.return_address, &fp->return_address)) break; perf_callchain_store(entry, cs_base + frame.return_address); @@ -2533,9 +2884,9 @@ void perf_callchain_user(struct perf_callchain_entry_ctx *entry, struct pt_regs *regs) { struct stack_frame frame; - const unsigned long __user *fp; + const struct stack_frame __user *fp; - if (perf_guest_cbs && perf_guest_cbs->is_in_guest()) { + if (perf_guest_state()) { /* TODO: We don't support guest os callchain now */ return; } @@ -2546,7 +2897,7 @@ perf_callchain_user(struct perf_callchain_entry_ctx *entry, struct pt_regs *regs if (regs->flags & (X86_VM_MASK | PERF_EFLAGS_VM)) return; - fp = (unsigned long __user *)regs->bp; + fp = (void __user *)regs->bp; perf_callchain_store(entry, regs->ip); @@ -2558,19 +2909,12 @@ perf_callchain_user(struct perf_callchain_entry_ctx *entry, struct pt_regs *regs pagefault_disable(); while (entry->nr < entry->max_stack) { - unsigned long bytes; - - frame.next_frame = NULL; - frame.return_address = 0; - if (!valid_user_frame(fp, sizeof(frame))) break; - bytes = __copy_from_user_nmi(&frame.next_frame, fp, sizeof(*fp)); - if (bytes != 0) + if (__get_user(frame.next_frame, &fp->next_frame)) break; - bytes = __copy_from_user_nmi(&frame.return_address, fp + 1, sizeof(*fp)); - if (bytes != 0) + if (__get_user(frame.return_address, &fp->return_address)) break; perf_callchain_store(entry, frame.return_address); @@ -2619,18 +2963,19 @@ static unsigned long code_segment_base(struct pt_regs *regs) unsigned long perf_instruction_pointer(struct pt_regs *regs) { - if (perf_guest_cbs && perf_guest_cbs->is_in_guest()) - return perf_guest_cbs->get_guest_ip(); + if (perf_guest_state()) + return perf_guest_get_ip(); return regs->ip + code_segment_base(regs); } unsigned long perf_misc_flags(struct pt_regs *regs) { + unsigned int guest_state = perf_guest_state(); int misc = 0; - if (perf_guest_cbs && perf_guest_cbs->is_in_guest()) { - if (perf_guest_cbs->is_user_mode()) + if (guest_state) { + if (guest_state & PERF_GUEST_USER) misc |= PERF_RECORD_MISC_GUEST_USER; else misc |= PERF_RECORD_MISC_GUEST_KERNEL; @@ -2649,12 +2994,34 @@ unsigned long perf_misc_flags(struct pt_regs *regs) void perf_get_x86_pmu_capability(struct x86_pmu_capability *cap) { + if (!x86_pmu_initialized()) { + memset(cap, 0, sizeof(*cap)); + return; + } + cap->version = x86_pmu.version; + /* + * KVM doesn't support the hybrid PMU yet. + * Return the common value in global x86_pmu, + * which available for all cores. + */ cap->num_counters_gp = x86_pmu.num_counters; cap->num_counters_fixed = x86_pmu.num_counters_fixed; cap->bit_width_gp = x86_pmu.cntval_bits; cap->bit_width_fixed = x86_pmu.cntval_bits; cap->events_mask = (unsigned int)x86_pmu.events_maskl; cap->events_mask_len = x86_pmu.events_mask_len; + cap->pebs_ept = x86_pmu.pebs_ept; } EXPORT_SYMBOL_GPL(perf_get_x86_pmu_capability); + +u64 perf_get_hw_event_config(int hw_event) +{ + int max = x86_pmu.max_events; + + if (hw_event < max) + return x86_pmu.event_map(array_index_nospec(hw_event, max)); + + return 0; +} +EXPORT_SYMBOL_GPL(perf_get_hw_event_config); |