diff options
Diffstat (limited to 'arch/powerpc/perf/core-book3s.c')
-rw-r--r-- | arch/powerpc/perf/core-book3s.c | 192 |
1 files changed, 185 insertions, 7 deletions
diff --git a/arch/powerpc/perf/core-book3s.c b/arch/powerpc/perf/core-book3s.c index 73e62e9b179b..942aa830e110 100644 --- a/arch/powerpc/perf/core-book3s.c +++ b/arch/powerpc/perf/core-book3s.c @@ -17,6 +17,7 @@ #include <asm/firmware.h> #include <asm/ptrace.h> #include <asm/code-patching.h> +#include <asm/hw_irq.h> #include <asm/interrupt.h> #ifdef CONFIG_PPC64 @@ -775,6 +776,34 @@ static void pmao_restore_workaround(bool ebb) mtspr(SPRN_PMC6, pmcs[5]); } +/* + * If the perf subsystem wants performance monitor interrupts as soon as + * possible (e.g., to sample the instruction address and stack chain), + * this should return true. The IRQ masking code can then enable MSR[EE] + * in some places (e.g., interrupt handlers) that allows PMI interrupts + * through to improve accuracy of profiles, at the cost of some performance. + * + * The PMU counters can be enabled by other means (e.g., sysfs raw SPR + * access), but in that case there is no need for prompt PMI handling. + * + * This currently returns true if any perf counter is being used. It + * could possibly return false if only events are being counted rather than + * samples being taken, but for now this is good enough. + */ +bool power_pmu_wants_prompt_pmi(void) +{ + struct cpu_hw_events *cpuhw; + + /* + * This could simply test local_paca->pmcregs_in_use if that were not + * under ifdef KVM. + */ + if (!ppmu) + return false; + + cpuhw = this_cpu_ptr(&cpu_hw_events); + return cpuhw->n_events; +} #endif /* CONFIG_PPC64 */ static void perf_event_interrupt(struct pt_regs *regs); @@ -857,6 +886,19 @@ static void write_pmc(int idx, unsigned long val) } } +static int any_pmc_overflown(struct cpu_hw_events *cpuhw) +{ + int i, idx; + + for (i = 0; i < cpuhw->n_events; i++) { + idx = cpuhw->event[i]->hw.idx; + if ((idx) && ((int)read_pmc(idx) < 0)) + return idx; + } + + return 0; +} + /* Called from sysrq_handle_showregs() */ void perf_event_print_debug(void) { @@ -1100,7 +1142,7 @@ static u64 check_and_compute_delta(u64 prev, u64 val) /* * POWER7 can roll back counter values, if the new value is smaller * than the previous value it will cause the delta and the counter to - * have bogus values unless we rolled a counter over. If a coutner is + * have bogus values unless we rolled a counter over. If a counter is * rolled back, it will be smaller, but within 256, which is the maximum * number of events to rollback at once. If we detect a rollback * return 0. This can lead to a small lack of precision in the @@ -1281,11 +1323,13 @@ static void power_pmu_disable(struct pmu *pmu) /* * Set the 'freeze counters' bit, clear EBE/BHRBA/PMCC/PMAO/FC56 + * Also clear PMXE to disable PMI's getting triggered in some + * corner cases during PMU disable. */ val = mmcr0 = mfspr(SPRN_MMCR0); val |= MMCR0_FC; val &= ~(MMCR0_EBE | MMCR0_BHRBA | MMCR0_PMCC | MMCR0_PMAO | - MMCR0_FC56); + MMCR0_PMXE | MMCR0_FC56); /* Set mmcr0 PMCCEXT for p10 */ if (ppmu->flags & PPMU_ARCH_31) val |= MMCR0_PMCCEXT; @@ -1299,6 +1343,29 @@ static void power_pmu_disable(struct pmu *pmu) mb(); isync(); + /* + * Some corner cases could clear the PMU counter overflow + * while a masked PMI is pending. One such case is when + * a PMI happens during interrupt replay and perf counter + * values are cleared by PMU callbacks before replay. + * + * Disable the interrupt by clearing the paca bit for PMI + * since we are disabling the PMU now. Otherwise provide a + * warning if there is PMI pending, but no counter is found + * overflown. + * + * Since power_pmu_disable runs under local_irq_save, it + * could happen that code hits a PMC overflow without PMI + * pending in paca. Hence only clear PMI pending if it was + * set. + * + * If a PMI is pending, then MSR[EE] must be disabled (because + * the masked PMI handler disabling EE). So it is safe to + * call clear_pmi_irq_pending(). + */ + if (pmi_irq_pending()) + clear_pmi_irq_pending(); + val = mmcra = cpuhw->mmcr.mmcra; /* @@ -1390,6 +1457,15 @@ static void power_pmu_enable(struct pmu *pmu) * (possibly updated for removal of events). */ if (!cpuhw->n_added) { + /* + * If there is any active event with an overflown PMC + * value, set back PACA_IRQ_PMI which would have been + * cleared in power_pmu_disable(). + */ + hard_irq_disable(); + if (any_pmc_overflown(cpuhw)) + set_pmi_irq_pending(); + mtspr(SPRN_MMCRA, cpuhw->mmcr.mmcra & ~MMCRA_SAMPLE_ENABLE); mtspr(SPRN_MMCR1, cpuhw->mmcr.mmcr1); if (ppmu->flags & PPMU_ARCH_31) @@ -1976,7 +2052,7 @@ static int power_pmu_event_init(struct perf_event *event) /* * PMU config registers have fields that are * reserved and some specific values for bit fields are reserved. - * For ex., MMCRA[61:62] is Randome Sampling Mode (SM) + * For ex., MMCRA[61:62] is Random Sampling Mode (SM) * and value of 0b11 to this field is reserved. * Check for invalid values in attr.config. */ @@ -2055,6 +2131,23 @@ static int power_pmu_event_init(struct perf_event *event) if (has_branch_stack(event)) { u64 bhrb_filter = -1; + /* + * Currently no PMU supports having multiple branch filters + * at the same time. Branch filters are set via MMCRA IFM[32:33] + * bits for Power8 and above. Return EOPNOTSUPP when multiple + * branch filters are requested in the event attr. + * + * When opening event via perf_event_open(), branch_sample_type + * gets adjusted in perf_copy_attr(). Kernel will automatically + * adjust the branch_sample_type based on the event modifier + * settings to include PERF_SAMPLE_BRANCH_PLM_ALL. Hence drop + * the check for PERF_SAMPLE_BRANCH_PLM_ALL. + */ + if (hweight64(event->attr.branch_sample_type & ~PERF_SAMPLE_BRANCH_PLM_ALL) > 1) { + local_irq_restore(irq_flags); + return -EOPNOTSUPP; + } + if (ppmu->bhrb_filter_map) bhrb_filter = ppmu->bhrb_filter_map( event->attr.branch_sample_type); @@ -2221,16 +2314,20 @@ static void record_and_restart(struct perf_event *event, unsigned long val, cpuhw = this_cpu_ptr(&cpu_hw_events); power_pmu_bhrb_read(event, cpuhw); data.br_stack = &cpuhw->bhrb_stack; + data.sample_flags |= PERF_SAMPLE_BRANCH_STACK; } if (event->attr.sample_type & PERF_SAMPLE_DATA_SRC && - ppmu->get_mem_data_src) + ppmu->get_mem_data_src) { ppmu->get_mem_data_src(&data.data_src, ppmu->flags, regs); + data.sample_flags |= PERF_SAMPLE_DATA_SRC; + } if (event->attr.sample_type & PERF_SAMPLE_WEIGHT_TYPE && - ppmu->get_mem_weight) + ppmu->get_mem_weight) { ppmu->get_mem_weight(&data.weight.full, event->attr.sample_type); - + data.sample_flags |= PERF_SAMPLE_WEIGHT_TYPE; + } if (perf_event_overflow(event, &data, regs)) power_pmu_stop(event, 0); } else if (period) { @@ -2337,6 +2434,14 @@ static void __perf_event_interrupt(struct pt_regs *regs) break; } } + + /* + * Clear PACA_IRQ_PMI in case it was set by + * set_pmi_irq_pending() when PMU was enabled + * after accounting for interrupts. + */ + clear_pmi_irq_pending(); + if (!active) /* reset non active counters that have overflowed */ write_pmc(i + 1, 0); @@ -2356,6 +2461,13 @@ static void __perf_event_interrupt(struct pt_regs *regs) } } } + + /* + * During system wide profiling or while specific CPU is monitored for an + * event, some corner cases could cause PMC to overflow in idle path. This + * will trigger a PMI after waking up from idle. Since counter values are _not_ + * saved/restored in idle path, can lead to below "Can't find PMC" message. + */ if (unlikely(!found) && !arch_irq_disabled_regs(regs)) printk_ratelimited(KERN_WARNING "Can't find PMC that caused IRQ\n"); @@ -2392,7 +2504,34 @@ static int power_pmu_prepare_cpu(unsigned int cpu) return 0; } -int register_power_pmu(struct power_pmu *pmu) +static ssize_t pmu_name_show(struct device *cdev, + struct device_attribute *attr, + char *buf) +{ + if (ppmu) + return sysfs_emit(buf, "%s", ppmu->name); + + return 0; +} + +static DEVICE_ATTR_RO(pmu_name); + +static struct attribute *pmu_caps_attrs[] = { + &dev_attr_pmu_name.attr, + NULL +}; + +static const struct attribute_group pmu_caps_group = { + .name = "caps", + .attrs = pmu_caps_attrs, +}; + +static const struct attribute_group *pmu_caps_groups[] = { + &pmu_caps_group, + NULL, +}; + +int __init register_power_pmu(struct power_pmu *pmu) { if (ppmu) return -EBUSY; /* something's already registered */ @@ -2402,6 +2541,10 @@ int register_power_pmu(struct power_pmu *pmu) pmu->name); power_pmu.attr_groups = ppmu->attr_groups; + + if (ppmu->flags & PPMU_ARCH_207S) + power_pmu.attr_update = pmu_caps_groups; + power_pmu.capabilities |= (ppmu->capabilities & PERF_PMU_CAP_EXTENDED_REGS); #ifdef MSR_HV @@ -2419,8 +2562,24 @@ int register_power_pmu(struct power_pmu *pmu) } #ifdef CONFIG_PPC64 +static bool pmu_override = false; +static unsigned long pmu_override_val; +static void do_pmu_override(void *data) +{ + ppc_set_pmu_inuse(1); + if (pmu_override_val) + mtspr(SPRN_MMCR1, pmu_override_val); + mtspr(SPRN_MMCR0, mfspr(SPRN_MMCR0) & ~MMCR0_FC); +} + static int __init init_ppc64_pmu(void) { + if (cpu_has_feature(CPU_FTR_HVMODE) && pmu_override) { + pr_warn("disabling perf due to pmu_override= command line option.\n"); + on_each_cpu(do_pmu_override, NULL, 1); + return 0; + } + /* run through all the pmu drivers one at a time */ if (!init_power5_pmu()) return 0; @@ -2442,4 +2601,23 @@ static int __init init_ppc64_pmu(void) return init_generic_compat_pmu(); } early_initcall(init_ppc64_pmu); + +static int __init pmu_setup(char *str) +{ + unsigned long val; + + if (!early_cpu_has_feature(CPU_FTR_HVMODE)) + return 0; + + pmu_override = true; + + if (kstrtoul(str, 0, &val)) + val = 0; + + pmu_override_val = val; + + return 1; +} +__setup("pmu_override=", pmu_setup); + #endif |