From cdd6c482c9ff9c55475ee7392ec8f672eddb7be6 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Mon, 21 Sep 2009 12:02:48 +0200 Subject: perf: Do the big rename: Performance Counters -> Performance Events Bye-bye Performance Counters, welcome Performance Events! In the past few months the perfcounters subsystem has grown out its initial role of counting hardware events, and has become (and is becoming) a much broader generic event enumeration, reporting, logging, monitoring, analysis facility. Naming its core object 'perf_counter' and naming the subsystem 'perfcounters' has become more and more of a misnomer. With pending code like hw-breakpoints support the 'counter' name is less and less appropriate. All in one, we've decided to rename the subsystem to 'performance events' and to propagate this rename through all fields, variables and API names. (in an ABI compatible fashion) The word 'event' is also a bit shorter than 'counter' - which makes it slightly more convenient to write/handle as well. Thanks goes to Stephane Eranian who first observed this misnomer and suggested a rename. User-space tooling and ABI compatibility is not affected - this patch should be function-invariant. (Also, defconfigs were not touched to keep the size down.) This patch has been generated via the following script: FILES=$(find * -type f | grep -vE 'oprofile|[^K]config') sed -i \ -e 's/PERF_EVENT_/PERF_RECORD_/g' \ -e 's/PERF_COUNTER/PERF_EVENT/g' \ -e 's/perf_counter/perf_event/g' \ -e 's/nb_counters/nb_events/g' \ -e 's/swcounter/swevent/g' \ -e 's/tpcounter_event/tp_event/g' \ $FILES for N in $(find . -name perf_counter.[ch]); do M=$(echo $N | sed 's/perf_counter/perf_event/g') mv $N $M done FILES=$(find . -name perf_event.*) sed -i \ -e 's/COUNTER_MASK/REG_MASK/g' \ -e 's/COUNTER/EVENT/g' \ -e 's/\/event_id/g' \ -e 's/counter/event/g' \ -e 's/Counter/Event/g' \ $FILES ... to keep it as correct as possible. This script can also be used by anyone who has pending perfcounters patches - it converts a Linux kernel tree over to the new naming. We tried to time this change to the point in time where the amount of pending patches is the smallest: the end of the merge window. Namespace clashes were fixed up in a preparatory patch - and some stylistic fallout will be fixed up in a subsequent patch. ( NOTE: 'counters' are still the proper terminology when we deal with hardware registers - and these sed scripts are a bit over-eager in renaming them. I've undone some of that, but in case there's something left where 'counter' would be better than 'event' we can undo that on an individual basis instead of touching an otherwise nicely automated patch. ) Suggested-by: Stephane Eranian Acked-by: Peter Zijlstra Acked-by: Paul Mackerras Reviewed-by: Arjan van de Ven Cc: Mike Galbraith Cc: Arnaldo Carvalho de Melo Cc: Frederic Weisbecker Cc: Steven Rostedt Cc: Benjamin Herrenschmidt Cc: David Howells Cc: Kyle McMartin Cc: Martin Schwidefsky Cc: "David S. Miller" Cc: Thomas Gleixner Cc: "H. Peter Anvin" Cc: LKML-Reference: Signed-off-by: Ingo Molnar --- arch/sparc/kernel/perf_event.c | 556 +++++++++++++++++++++++++++++++++++++++++ 1 file changed, 556 insertions(+) create mode 100644 arch/sparc/kernel/perf_event.c (limited to 'arch/sparc/kernel/perf_event.c') diff --git a/arch/sparc/kernel/perf_event.c b/arch/sparc/kernel/perf_event.c new file mode 100644 index 000000000000..2d6a1b10c81d --- /dev/null +++ b/arch/sparc/kernel/perf_event.c @@ -0,0 +1,556 @@ +/* Performance event support for sparc64. + * + * Copyright (C) 2009 David S. Miller + * + * This code is based almost entirely upon the x86 perf event + * code, which is: + * + * Copyright (C) 2008 Thomas Gleixner + * Copyright (C) 2008-2009 Red Hat, Inc., Ingo Molnar + * Copyright (C) 2009 Jaswinder Singh Rajput + * Copyright (C) 2009 Advanced Micro Devices, Inc., Robert Richter + * Copyright (C) 2008-2009 Red Hat, Inc., Peter Zijlstra + */ + +#include +#include +#include +#include +#include + +#include +#include +#include +#include + +/* Sparc64 chips have two performance counters, 32-bits each, with + * overflow interrupts generated on transition from 0xffffffff to 0. + * The counters are accessed in one go using a 64-bit register. + * + * Both counters are controlled using a single control register. The + * only way to stop all sampling is to clear all of the context (user, + * supervisor, hypervisor) sampling enable bits. But these bits apply + * to both counters, thus the two counters can't be enabled/disabled + * individually. + * + * The control register has two event fields, one for each of the two + * counters. It's thus nearly impossible to have one counter going + * while keeping the other one stopped. Therefore it is possible to + * get overflow interrupts for counters not currently "in use" and + * that condition must be checked in the overflow interrupt handler. + * + * So we use a hack, in that we program inactive counters with the + * "sw_count0" and "sw_count1" events. These count how many times + * the instruction "sethi %hi(0xfc000), %g0" is executed. It's an + * unusual way to encode a NOP and therefore will not trigger in + * normal code. + */ + +#define MAX_HWEVENTS 2 +#define MAX_PERIOD ((1UL << 32) - 1) + +#define PIC_UPPER_INDEX 0 +#define PIC_LOWER_INDEX 1 + +struct cpu_hw_events { + struct perf_event *events[MAX_HWEVENTS]; + unsigned long used_mask[BITS_TO_LONGS(MAX_HWEVENTS)]; + unsigned long active_mask[BITS_TO_LONGS(MAX_HWEVENTS)]; + int enabled; +}; +DEFINE_PER_CPU(struct cpu_hw_events, cpu_hw_events) = { .enabled = 1, }; + +struct perf_event_map { + u16 encoding; + u8 pic_mask; +#define PIC_NONE 0x00 +#define PIC_UPPER 0x01 +#define PIC_LOWER 0x02 +}; + +struct sparc_pmu { + const struct perf_event_map *(*event_map)(int); + int max_events; + int upper_shift; + int lower_shift; + int event_mask; + int hv_bit; + int irq_bit; + int upper_nop; + int lower_nop; +}; + +static const struct perf_event_map ultra3i_perfmon_event_map[] = { + [PERF_COUNT_HW_CPU_CYCLES] = { 0x0000, PIC_UPPER | PIC_LOWER }, + [PERF_COUNT_HW_INSTRUCTIONS] = { 0x0001, PIC_UPPER | PIC_LOWER }, + [PERF_COUNT_HW_CACHE_REFERENCES] = { 0x0009, PIC_LOWER }, + [PERF_COUNT_HW_CACHE_MISSES] = { 0x0009, PIC_UPPER }, +}; + +static const struct perf_event_map *ultra3i_event_map(int event_id) +{ + return &ultra3i_perfmon_event_map[event_id]; +} + +static const struct sparc_pmu ultra3i_pmu = { + .event_map = ultra3i_event_map, + .max_events = ARRAY_SIZE(ultra3i_perfmon_event_map), + .upper_shift = 11, + .lower_shift = 4, + .event_mask = 0x3f, + .upper_nop = 0x1c, + .lower_nop = 0x14, +}; + +static const struct perf_event_map niagara2_perfmon_event_map[] = { + [PERF_COUNT_HW_CPU_CYCLES] = { 0x02ff, PIC_UPPER | PIC_LOWER }, + [PERF_COUNT_HW_INSTRUCTIONS] = { 0x02ff, PIC_UPPER | PIC_LOWER }, + [PERF_COUNT_HW_CACHE_REFERENCES] = { 0x0208, PIC_UPPER | PIC_LOWER }, + [PERF_COUNT_HW_CACHE_MISSES] = { 0x0302, PIC_UPPER | PIC_LOWER }, + [PERF_COUNT_HW_BRANCH_INSTRUCTIONS] = { 0x0201, PIC_UPPER | PIC_LOWER }, + [PERF_COUNT_HW_BRANCH_MISSES] = { 0x0202, PIC_UPPER | PIC_LOWER }, +}; + +static const struct perf_event_map *niagara2_event_map(int event_id) +{ + return &niagara2_perfmon_event_map[event_id]; +} + +static const struct sparc_pmu niagara2_pmu = { + .event_map = niagara2_event_map, + .max_events = ARRAY_SIZE(niagara2_perfmon_event_map), + .upper_shift = 19, + .lower_shift = 6, + .event_mask = 0xfff, + .hv_bit = 0x8, + .irq_bit = 0x03, + .upper_nop = 0x220, + .lower_nop = 0x220, +}; + +static const struct sparc_pmu *sparc_pmu __read_mostly; + +static u64 event_encoding(u64 event_id, int idx) +{ + if (idx == PIC_UPPER_INDEX) + event_id <<= sparc_pmu->upper_shift; + else + event_id <<= sparc_pmu->lower_shift; + return event_id; +} + +static u64 mask_for_index(int idx) +{ + return event_encoding(sparc_pmu->event_mask, idx); +} + +static u64 nop_for_index(int idx) +{ + return event_encoding(idx == PIC_UPPER_INDEX ? + sparc_pmu->upper_nop : + sparc_pmu->lower_nop, idx); +} + +static inline void sparc_pmu_enable_event(struct hw_perf_event *hwc, + int idx) +{ + u64 val, mask = mask_for_index(idx); + + val = pcr_ops->read(); + pcr_ops->write((val & ~mask) | hwc->config); +} + +static inline void sparc_pmu_disable_event(struct hw_perf_event *hwc, + int idx) +{ + u64 mask = mask_for_index(idx); + u64 nop = nop_for_index(idx); + u64 val = pcr_ops->read(); + + pcr_ops->write((val & ~mask) | nop); +} + +void hw_perf_enable(void) +{ + struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); + u64 val; + int i; + + if (cpuc->enabled) + return; + + cpuc->enabled = 1; + barrier(); + + val = pcr_ops->read(); + + for (i = 0; i < MAX_HWEVENTS; i++) { + struct perf_event *cp = cpuc->events[i]; + struct hw_perf_event *hwc; + + if (!cp) + continue; + hwc = &cp->hw; + val |= hwc->config_base; + } + + pcr_ops->write(val); +} + +void hw_perf_disable(void) +{ + struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); + u64 val; + + if (!cpuc->enabled) + return; + + cpuc->enabled = 0; + + val = pcr_ops->read(); + val &= ~(PCR_UTRACE | PCR_STRACE | + sparc_pmu->hv_bit | sparc_pmu->irq_bit); + pcr_ops->write(val); +} + +static u32 read_pmc(int idx) +{ + u64 val; + + read_pic(val); + if (idx == PIC_UPPER_INDEX) + val >>= 32; + + return val & 0xffffffff; +} + +static void write_pmc(int idx, u64 val) +{ + u64 shift, mask, pic; + + shift = 0; + if (idx == PIC_UPPER_INDEX) + shift = 32; + + mask = ((u64) 0xffffffff) << shift; + val <<= shift; + + read_pic(pic); + pic &= ~mask; + pic |= val; + write_pic(pic); +} + +static int sparc_perf_event_set_period(struct perf_event *event, + struct hw_perf_event *hwc, int idx) +{ + s64 left = atomic64_read(&hwc->period_left); + s64 period = hwc->sample_period; + int ret = 0; + + if (unlikely(left <= -period)) { + left = period; + atomic64_set(&hwc->period_left, left); + hwc->last_period = period; + ret = 1; + } + + if (unlikely(left <= 0)) { + left += period; + atomic64_set(&hwc->period_left, left); + hwc->last_period = period; + ret = 1; + } + if (left > MAX_PERIOD) + left = MAX_PERIOD; + + atomic64_set(&hwc->prev_count, (u64)-left); + + write_pmc(idx, (u64)(-left) & 0xffffffff); + + perf_event_update_userpage(event); + + return ret; +} + +static int sparc_pmu_enable(struct perf_event *event) +{ + struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); + struct hw_perf_event *hwc = &event->hw; + int idx = hwc->idx; + + if (test_and_set_bit(idx, cpuc->used_mask)) + return -EAGAIN; + + sparc_pmu_disable_event(hwc, idx); + + cpuc->events[idx] = event; + set_bit(idx, cpuc->active_mask); + + sparc_perf_event_set_period(event, hwc, idx); + sparc_pmu_enable_event(hwc, idx); + perf_event_update_userpage(event); + return 0; +} + +static u64 sparc_perf_event_update(struct perf_event *event, + struct hw_perf_event *hwc, int idx) +{ + int shift = 64 - 32; + u64 prev_raw_count, new_raw_count; + s64 delta; + +again: + prev_raw_count = atomic64_read(&hwc->prev_count); + new_raw_count = read_pmc(idx); + + if (atomic64_cmpxchg(&hwc->prev_count, prev_raw_count, + new_raw_count) != prev_raw_count) + goto again; + + delta = (new_raw_count << shift) - (prev_raw_count << shift); + delta >>= shift; + + atomic64_add(delta, &event->count); + atomic64_sub(delta, &hwc->period_left); + + return new_raw_count; +} + +static void sparc_pmu_disable(struct perf_event *event) +{ + struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); + struct hw_perf_event *hwc = &event->hw; + int idx = hwc->idx; + + clear_bit(idx, cpuc->active_mask); + sparc_pmu_disable_event(hwc, idx); + + barrier(); + + sparc_perf_event_update(event, hwc, idx); + cpuc->events[idx] = NULL; + clear_bit(idx, cpuc->used_mask); + + perf_event_update_userpage(event); +} + +static void sparc_pmu_read(struct perf_event *event) +{ + struct hw_perf_event *hwc = &event->hw; + sparc_perf_event_update(event, hwc, hwc->idx); +} + +static void sparc_pmu_unthrottle(struct perf_event *event) +{ + struct hw_perf_event *hwc = &event->hw; + sparc_pmu_enable_event(hwc, hwc->idx); +} + +static atomic_t active_events = ATOMIC_INIT(0); +static DEFINE_MUTEX(pmc_grab_mutex); + +void perf_event_grab_pmc(void) +{ + if (atomic_inc_not_zero(&active_events)) + return; + + mutex_lock(&pmc_grab_mutex); + if (atomic_read(&active_events) == 0) { + if (atomic_read(&nmi_active) > 0) { + on_each_cpu(stop_nmi_watchdog, NULL, 1); + BUG_ON(atomic_read(&nmi_active) != 0); + } + atomic_inc(&active_events); + } + mutex_unlock(&pmc_grab_mutex); +} + +void perf_event_release_pmc(void) +{ + if (atomic_dec_and_mutex_lock(&active_events, &pmc_grab_mutex)) { + if (atomic_read(&nmi_active) == 0) + on_each_cpu(start_nmi_watchdog, NULL, 1); + mutex_unlock(&pmc_grab_mutex); + } +} + +static void hw_perf_event_destroy(struct perf_event *event) +{ + perf_event_release_pmc(); +} + +static int __hw_perf_event_init(struct perf_event *event) +{ + struct perf_event_attr *attr = &event->attr; + struct hw_perf_event *hwc = &event->hw; + const struct perf_event_map *pmap; + u64 enc; + + if (atomic_read(&nmi_active) < 0) + return -ENODEV; + + if (attr->type != PERF_TYPE_HARDWARE) + return -EOPNOTSUPP; + + if (attr->config >= sparc_pmu->max_events) + return -EINVAL; + + perf_event_grab_pmc(); + event->destroy = hw_perf_event_destroy; + + /* We save the enable bits in the config_base. So to + * turn off sampling just write 'config', and to enable + * things write 'config | config_base'. + */ + hwc->config_base = sparc_pmu->irq_bit; + if (!attr->exclude_user) + hwc->config_base |= PCR_UTRACE; + if (!attr->exclude_kernel) + hwc->config_base |= PCR_STRACE; + if (!attr->exclude_hv) + hwc->config_base |= sparc_pmu->hv_bit; + + if (!hwc->sample_period) { + hwc->sample_period = MAX_PERIOD; + hwc->last_period = hwc->sample_period; + atomic64_set(&hwc->period_left, hwc->sample_period); + } + + pmap = sparc_pmu->event_map(attr->config); + + enc = pmap->encoding; + if (pmap->pic_mask & PIC_UPPER) { + hwc->idx = PIC_UPPER_INDEX; + enc <<= sparc_pmu->upper_shift; + } else { + hwc->idx = PIC_LOWER_INDEX; + enc <<= sparc_pmu->lower_shift; + } + + hwc->config |= enc; + return 0; +} + +static const struct pmu pmu = { + .enable = sparc_pmu_enable, + .disable = sparc_pmu_disable, + .read = sparc_pmu_read, + .unthrottle = sparc_pmu_unthrottle, +}; + +const struct pmu *hw_perf_event_init(struct perf_event *event) +{ + int err = __hw_perf_event_init(event); + + if (err) + return ERR_PTR(err); + return &pmu; +} + +void perf_event_print_debug(void) +{ + unsigned long flags; + u64 pcr, pic; + int cpu; + + if (!sparc_pmu) + return; + + local_irq_save(flags); + + cpu = smp_processor_id(); + + pcr = pcr_ops->read(); + read_pic(pic); + + pr_info("\n"); + pr_info("CPU#%d: PCR[%016llx] PIC[%016llx]\n", + cpu, pcr, pic); + + local_irq_restore(flags); +} + +static int __kprobes perf_event_nmi_handler(struct notifier_block *self, + unsigned long cmd, void *__args) +{ + struct die_args *args = __args; + struct perf_sample_data data; + struct cpu_hw_events *cpuc; + struct pt_regs *regs; + int idx; + + if (!atomic_read(&active_events)) + return NOTIFY_DONE; + + switch (cmd) { + case DIE_NMI: + break; + + default: + return NOTIFY_DONE; + } + + regs = args->regs; + + data.addr = 0; + + cpuc = &__get_cpu_var(cpu_hw_events); + for (idx = 0; idx < MAX_HWEVENTS; idx++) { + struct perf_event *event = cpuc->events[idx]; + struct hw_perf_event *hwc; + u64 val; + + if (!test_bit(idx, cpuc->active_mask)) + continue; + hwc = &event->hw; + val = sparc_perf_event_update(event, hwc, idx); + if (val & (1ULL << 31)) + continue; + + data.period = event->hw.last_period; + if (!sparc_perf_event_set_period(event, hwc, idx)) + continue; + + if (perf_event_overflow(event, 1, &data, regs)) + sparc_pmu_disable_event(hwc, idx); + } + + return NOTIFY_STOP; +} + +static __read_mostly struct notifier_block perf_event_nmi_notifier = { + .notifier_call = perf_event_nmi_handler, +}; + +static bool __init supported_pmu(void) +{ + if (!strcmp(sparc_pmu_type, "ultra3i")) { + sparc_pmu = &ultra3i_pmu; + return true; + } + if (!strcmp(sparc_pmu_type, "niagara2")) { + sparc_pmu = &niagara2_pmu; + return true; + } + return false; +} + +void __init init_hw_perf_events(void) +{ + pr_info("Performance events: "); + + if (!supported_pmu()) { + pr_cont("No support for PMU type '%s'\n", sparc_pmu_type); + return; + } + + pr_cont("Supported PMU type is '%s'\n", sparc_pmu_type); + + /* All sparc64 PMUs currently have 2 events. But this simple + * driver only supports one active event at a time. + */ + perf_max_events = 1; + + register_die_notifier(&perf_event_nmi_notifier); +} -- cgit v1.2.3-59-g8ed1b From 2ce4da2efcaca0dcbfed7a1f24177f18e75e0e89 Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Sat, 26 Sep 2009 20:42:10 -0700 Subject: sparc: Support HW cache events. First supported chip for HW cache events is Ultra-IIIi. Signed-off-by: David S. Miller --- arch/sparc/kernel/perf_event.c | 145 +++++++++++++++++++++++++++++++++++++++-- 1 file changed, 139 insertions(+), 6 deletions(-) (limited to 'arch/sparc/kernel/perf_event.c') diff --git a/arch/sparc/kernel/perf_event.c b/arch/sparc/kernel/perf_event.c index 2d6a1b10c81d..48375f694673 100644 --- a/arch/sparc/kernel/perf_event.c +++ b/arch/sparc/kernel/perf_event.c @@ -68,8 +68,19 @@ struct perf_event_map { #define PIC_LOWER 0x02 }; +#define C(x) PERF_COUNT_HW_CACHE_##x + +#define CACHE_OP_UNSUPPORTED 0xfffe +#define CACHE_OP_NONSENSE 0xffff + +typedef struct perf_event_map cache_map_t + [PERF_COUNT_HW_CACHE_MAX] + [PERF_COUNT_HW_CACHE_OP_MAX] + [PERF_COUNT_HW_CACHE_RESULT_MAX]; + struct sparc_pmu { const struct perf_event_map *(*event_map)(int); + const cache_map_t *cache_map; int max_events; int upper_shift; int lower_shift; @@ -92,8 +103,96 @@ static const struct perf_event_map *ultra3i_event_map(int event_id) return &ultra3i_perfmon_event_map[event_id]; } +static const cache_map_t ultra3i_cache_map = { +[C(L1D)] = { + [C(OP_READ)] = { + [C(RESULT_ACCESS)] = { 0x09, PIC_LOWER, }, + [C(RESULT_MISS)] = { 0x09, PIC_UPPER, }, + }, + [C(OP_WRITE)] = { + [C(RESULT_ACCESS)] = { 0x0a, PIC_LOWER }, + [C(RESULT_MISS)] = { 0x0a, PIC_UPPER }, + }, + [C(OP_PREFETCH)] = { + [C(RESULT_ACCESS)] = { CACHE_OP_UNSUPPORTED }, + [C(RESULT_MISS)] = { CACHE_OP_UNSUPPORTED }, + }, +}, +[C(L1I)] = { + [C(OP_READ)] = { + [C(RESULT_ACCESS)] = { 0x09, PIC_LOWER, }, + [C(RESULT_MISS)] = { 0x09, PIC_UPPER, }, + }, + [ C(OP_WRITE) ] = { + [ C(RESULT_ACCESS) ] = { CACHE_OP_NONSENSE }, + [ C(RESULT_MISS) ] = { CACHE_OP_NONSENSE }, + }, + [ C(OP_PREFETCH) ] = { + [ C(RESULT_ACCESS) ] = { CACHE_OP_UNSUPPORTED }, + [ C(RESULT_MISS) ] = { CACHE_OP_UNSUPPORTED }, + }, +}, +[C(LL)] = { + [C(OP_READ)] = { + [C(RESULT_ACCESS)] = { 0x0c, PIC_LOWER, }, + [C(RESULT_MISS)] = { 0x0c, PIC_UPPER, }, + }, + [C(OP_WRITE)] = { + [C(RESULT_ACCESS)] = { 0x0c, PIC_LOWER }, + [C(RESULT_MISS)] = { 0x0c, PIC_UPPER }, + }, + [C(OP_PREFETCH)] = { + [C(RESULT_ACCESS)] = { CACHE_OP_UNSUPPORTED }, + [C(RESULT_MISS)] = { CACHE_OP_UNSUPPORTED }, + }, +}, +[C(DTLB)] = { + [C(OP_READ)] = { + [C(RESULT_ACCESS)] = { CACHE_OP_UNSUPPORTED }, + [C(RESULT_MISS)] = { 0x12, PIC_UPPER, }, + }, + [ C(OP_WRITE) ] = { + [ C(RESULT_ACCESS) ] = { CACHE_OP_UNSUPPORTED }, + [ C(RESULT_MISS) ] = { CACHE_OP_UNSUPPORTED }, + }, + [ C(OP_PREFETCH) ] = { + [ C(RESULT_ACCESS) ] = { CACHE_OP_UNSUPPORTED }, + [ C(RESULT_MISS) ] = { CACHE_OP_UNSUPPORTED }, + }, +}, +[C(ITLB)] = { + [C(OP_READ)] = { + [C(RESULT_ACCESS)] = { CACHE_OP_UNSUPPORTED }, + [C(RESULT_MISS)] = { 0x11, PIC_UPPER, }, + }, + [ C(OP_WRITE) ] = { + [ C(RESULT_ACCESS) ] = { CACHE_OP_UNSUPPORTED }, + [ C(RESULT_MISS) ] = { CACHE_OP_UNSUPPORTED }, + }, + [ C(OP_PREFETCH) ] = { + [ C(RESULT_ACCESS) ] = { CACHE_OP_UNSUPPORTED }, + [ C(RESULT_MISS) ] = { CACHE_OP_UNSUPPORTED }, + }, +}, +[C(BPU)] = { + [C(OP_READ)] = { + [C(RESULT_ACCESS)] = { CACHE_OP_UNSUPPORTED }, + [C(RESULT_MISS)] = { CACHE_OP_UNSUPPORTED }, + }, + [ C(OP_WRITE) ] = { + [ C(RESULT_ACCESS) ] = { CACHE_OP_UNSUPPORTED }, + [ C(RESULT_MISS) ] = { CACHE_OP_UNSUPPORTED }, + }, + [ C(OP_PREFETCH) ] = { + [ C(RESULT_ACCESS) ] = { CACHE_OP_UNSUPPORTED }, + [ C(RESULT_MISS) ] = { CACHE_OP_UNSUPPORTED }, + }, +}, +}; + static const struct sparc_pmu ultra3i_pmu = { .event_map = ultra3i_event_map, + .cache_map = &ultra3i_cache_map, .max_events = ARRAY_SIZE(ultra3i_perfmon_event_map), .upper_shift = 11, .lower_shift = 4, @@ -375,6 +474,37 @@ void perf_event_release_pmc(void) } } +static const struct perf_event_map *sparc_map_cache_event(u64 config) +{ + unsigned int cache_type, cache_op, cache_result; + const struct perf_event_map *pmap; + + if (!sparc_pmu->cache_map) + return ERR_PTR(-ENOENT); + + cache_type = (config >> 0) & 0xff; + if (cache_type >= PERF_COUNT_HW_CACHE_MAX) + return ERR_PTR(-EINVAL); + + cache_op = (config >> 8) & 0xff; + if (cache_op >= PERF_COUNT_HW_CACHE_OP_MAX) + return ERR_PTR(-EINVAL); + + cache_result = (config >> 16) & 0xff; + if (cache_result >= PERF_COUNT_HW_CACHE_RESULT_MAX) + return ERR_PTR(-EINVAL); + + pmap = &((*sparc_pmu->cache_map)[cache_type][cache_op][cache_result]); + + if (pmap->encoding == CACHE_OP_UNSUPPORTED) + return ERR_PTR(-ENOENT); + + if (pmap->encoding == CACHE_OP_NONSENSE) + return ERR_PTR(-EINVAL); + + return pmap; +} + static void hw_perf_event_destroy(struct perf_event *event) { perf_event_release_pmc(); @@ -390,12 +520,17 @@ static int __hw_perf_event_init(struct perf_event *event) if (atomic_read(&nmi_active) < 0) return -ENODEV; - if (attr->type != PERF_TYPE_HARDWARE) + if (attr->type == PERF_TYPE_HARDWARE) { + if (attr->config >= sparc_pmu->max_events) + return -EINVAL; + pmap = sparc_pmu->event_map(attr->config); + } else if (attr->type == PERF_TYPE_HW_CACHE) { + pmap = sparc_map_cache_event(attr->config); + if (IS_ERR(pmap)) + return PTR_ERR(pmap); + } else return -EOPNOTSUPP; - if (attr->config >= sparc_pmu->max_events) - return -EINVAL; - perf_event_grab_pmc(); event->destroy = hw_perf_event_destroy; @@ -417,8 +552,6 @@ static int __hw_perf_event_init(struct perf_event *event) atomic64_set(&hwc->period_left, hwc->sample_period); } - pmap = sparc_pmu->event_map(attr->config); - enc = pmap->encoding; if (pmap->pic_mask & PIC_UPPER) { hwc->idx = PIC_UPPER_INDEX; -- cgit v1.2.3-59-g8ed1b From 28e8f9bead060aafc630a4256d23e2a55fb8b97d Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Sat, 26 Sep 2009 20:54:22 -0700 Subject: sparc: Support all ultra3 and ultra4 derivatives. For the generic events we support, all of these chips have the same encodings as ultra3i. Signed-off-by: David S. Miller --- arch/sparc/kernel/perf_event.c | 23 +++++++++++++---------- 1 file changed, 13 insertions(+), 10 deletions(-) (limited to 'arch/sparc/kernel/perf_event.c') diff --git a/arch/sparc/kernel/perf_event.c b/arch/sparc/kernel/perf_event.c index 48375f694673..8abdc4d1baa5 100644 --- a/arch/sparc/kernel/perf_event.c +++ b/arch/sparc/kernel/perf_event.c @@ -91,19 +91,19 @@ struct sparc_pmu { int lower_nop; }; -static const struct perf_event_map ultra3i_perfmon_event_map[] = { +static const struct perf_event_map ultra3_perfmon_event_map[] = { [PERF_COUNT_HW_CPU_CYCLES] = { 0x0000, PIC_UPPER | PIC_LOWER }, [PERF_COUNT_HW_INSTRUCTIONS] = { 0x0001, PIC_UPPER | PIC_LOWER }, [PERF_COUNT_HW_CACHE_REFERENCES] = { 0x0009, PIC_LOWER }, [PERF_COUNT_HW_CACHE_MISSES] = { 0x0009, PIC_UPPER }, }; -static const struct perf_event_map *ultra3i_event_map(int event_id) +static const struct perf_event_map *ultra3_event_map(int event_id) { - return &ultra3i_perfmon_event_map[event_id]; + return &ultra3_perfmon_event_map[event_id]; } -static const cache_map_t ultra3i_cache_map = { +static const cache_map_t ultra3_cache_map = { [C(L1D)] = { [C(OP_READ)] = { [C(RESULT_ACCESS)] = { 0x09, PIC_LOWER, }, @@ -190,10 +190,10 @@ static const cache_map_t ultra3i_cache_map = { }, }; -static const struct sparc_pmu ultra3i_pmu = { - .event_map = ultra3i_event_map, - .cache_map = &ultra3i_cache_map, - .max_events = ARRAY_SIZE(ultra3i_perfmon_event_map), +static const struct sparc_pmu ultra3_pmu = { + .event_map = ultra3_event_map, + .cache_map = &ultra3_cache_map, + .max_events = ARRAY_SIZE(ultra3_perfmon_event_map), .upper_shift = 11, .lower_shift = 4, .event_mask = 0x3f, @@ -658,8 +658,11 @@ static __read_mostly struct notifier_block perf_event_nmi_notifier = { static bool __init supported_pmu(void) { - if (!strcmp(sparc_pmu_type, "ultra3i")) { - sparc_pmu = &ultra3i_pmu; + if (!strcmp(sparc_pmu_type, "ultra3") || + !strcmp(sparc_pmu_type, "ultra3+") || + !strcmp(sparc_pmu_type, "ultra3i") || + !strcmp(sparc_pmu_type, "ultra4+")) { + sparc_pmu = &ultra3_pmu; return true; } if (!strcmp(sparc_pmu_type, "niagara2")) { -- cgit v1.2.3-59-g8ed1b From d0b86480f5b33f4a86d7c106706d6e0dcd1935ce Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Sat, 26 Sep 2009 21:04:16 -0700 Subject: sparc: Add Niagara2 HW cache event support. Signed-off-by: David S. Miller --- arch/sparc/kernel/perf_event.c | 88 ++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 88 insertions(+) (limited to 'arch/sparc/kernel/perf_event.c') diff --git a/arch/sparc/kernel/perf_event.c b/arch/sparc/kernel/perf_event.c index 8abdc4d1baa5..6f01e04cc323 100644 --- a/arch/sparc/kernel/perf_event.c +++ b/arch/sparc/kernel/perf_event.c @@ -215,8 +215,96 @@ static const struct perf_event_map *niagara2_event_map(int event_id) return &niagara2_perfmon_event_map[event_id]; } +static const cache_map_t niagara2_cache_map = { +[C(L1D)] = { + [C(OP_READ)] = { + [C(RESULT_ACCESS)] = { 0x0208, PIC_UPPER | PIC_LOWER, }, + [C(RESULT_MISS)] = { 0x0302, PIC_UPPER | PIC_LOWER, }, + }, + [C(OP_WRITE)] = { + [C(RESULT_ACCESS)] = { 0x0210, PIC_UPPER | PIC_LOWER, }, + [C(RESULT_MISS)] = { 0x0302, PIC_UPPER | PIC_LOWER, }, + }, + [C(OP_PREFETCH)] = { + [C(RESULT_ACCESS)] = { CACHE_OP_UNSUPPORTED }, + [C(RESULT_MISS)] = { CACHE_OP_UNSUPPORTED }, + }, +}, +[C(L1I)] = { + [C(OP_READ)] = { + [C(RESULT_ACCESS)] = { 0x02ff, PIC_UPPER | PIC_LOWER, }, + [C(RESULT_MISS)] = { 0x0301, PIC_UPPER | PIC_LOWER, }, + }, + [ C(OP_WRITE) ] = { + [ C(RESULT_ACCESS) ] = { CACHE_OP_NONSENSE }, + [ C(RESULT_MISS) ] = { CACHE_OP_NONSENSE }, + }, + [ C(OP_PREFETCH) ] = { + [ C(RESULT_ACCESS) ] = { CACHE_OP_UNSUPPORTED }, + [ C(RESULT_MISS) ] = { CACHE_OP_UNSUPPORTED }, + }, +}, +[C(LL)] = { + [C(OP_READ)] = { + [C(RESULT_ACCESS)] = { 0x0208, PIC_UPPER | PIC_LOWER, }, + [C(RESULT_MISS)] = { 0x0330, PIC_UPPER | PIC_LOWER, }, + }, + [C(OP_WRITE)] = { + [C(RESULT_ACCESS)] = { 0x0210, PIC_UPPER | PIC_LOWER, }, + [C(RESULT_MISS)] = { 0x0320, PIC_UPPER | PIC_LOWER, }, + }, + [C(OP_PREFETCH)] = { + [C(RESULT_ACCESS)] = { CACHE_OP_UNSUPPORTED }, + [C(RESULT_MISS)] = { CACHE_OP_UNSUPPORTED }, + }, +}, +[C(DTLB)] = { + [C(OP_READ)] = { + [C(RESULT_ACCESS)] = { CACHE_OP_UNSUPPORTED }, + [C(RESULT_MISS)] = { 0x0b08, PIC_UPPER | PIC_LOWER, }, + }, + [ C(OP_WRITE) ] = { + [ C(RESULT_ACCESS) ] = { CACHE_OP_UNSUPPORTED }, + [ C(RESULT_MISS) ] = { CACHE_OP_UNSUPPORTED }, + }, + [ C(OP_PREFETCH) ] = { + [ C(RESULT_ACCESS) ] = { CACHE_OP_UNSUPPORTED }, + [ C(RESULT_MISS) ] = { CACHE_OP_UNSUPPORTED }, + }, +}, +[C(ITLB)] = { + [C(OP_READ)] = { + [C(RESULT_ACCESS)] = { CACHE_OP_UNSUPPORTED }, + [C(RESULT_MISS)] = { 0xb04, PIC_UPPER | PIC_LOWER, }, + }, + [ C(OP_WRITE) ] = { + [ C(RESULT_ACCESS) ] = { CACHE_OP_UNSUPPORTED }, + [ C(RESULT_MISS) ] = { CACHE_OP_UNSUPPORTED }, + }, + [ C(OP_PREFETCH) ] = { + [ C(RESULT_ACCESS) ] = { CACHE_OP_UNSUPPORTED }, + [ C(RESULT_MISS) ] = { CACHE_OP_UNSUPPORTED }, + }, +}, +[C(BPU)] = { + [C(OP_READ)] = { + [C(RESULT_ACCESS)] = { CACHE_OP_UNSUPPORTED }, + [C(RESULT_MISS)] = { CACHE_OP_UNSUPPORTED }, + }, + [ C(OP_WRITE) ] = { + [ C(RESULT_ACCESS) ] = { CACHE_OP_UNSUPPORTED }, + [ C(RESULT_MISS) ] = { CACHE_OP_UNSUPPORTED }, + }, + [ C(OP_PREFETCH) ] = { + [ C(RESULT_ACCESS) ] = { CACHE_OP_UNSUPPORTED }, + [ C(RESULT_MISS) ] = { CACHE_OP_UNSUPPORTED }, + }, +}, +}; + static const struct sparc_pmu niagara2_pmu = { .event_map = niagara2_event_map, + .cache_map = &niagara2_cache_map, .max_events = ARRAY_SIZE(niagara2_perfmon_event_map), .upper_shift = 19, .lower_shift = 6, -- cgit v1.2.3-59-g8ed1b From 7eebda60d57a0862a410f45122c73b8bbe6e260c Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Sat, 26 Sep 2009 21:23:41 -0700 Subject: sparc: Niagara1 perf event support. This chip is extremely limited, and many of the events supported are approximations at best. Signed-off-by: David S. Miller --- arch/sparc/kernel/perf_event.c | 119 +++++++++++++++++++++++++++++++++++++++++ 1 file changed, 119 insertions(+) (limited to 'arch/sparc/kernel/perf_event.c') diff --git a/arch/sparc/kernel/perf_event.c b/arch/sparc/kernel/perf_event.c index 6f01e04cc323..9541b456c3ee 100644 --- a/arch/sparc/kernel/perf_event.c +++ b/arch/sparc/kernel/perf_event.c @@ -201,6 +201,121 @@ static const struct sparc_pmu ultra3_pmu = { .lower_nop = 0x14, }; +/* Niagara1 is very limited. The upper PIC is hard-locked to count + * only instructions, so it is free running which creates all kinds of + * problems. Some hardware designs make one wonder if the creastor + * even looked at how this stuff gets used by software. + */ +static const struct perf_event_map niagara1_perfmon_event_map[] = { + [PERF_COUNT_HW_CPU_CYCLES] = { 0x00, PIC_UPPER }, + [PERF_COUNT_HW_INSTRUCTIONS] = { 0x00, PIC_UPPER }, + [PERF_COUNT_HW_CACHE_REFERENCES] = { 0, PIC_NONE }, + [PERF_COUNT_HW_CACHE_MISSES] = { 0x03, PIC_LOWER }, +}; + +static const struct perf_event_map *niagara1_event_map(int event_id) +{ + return &niagara1_perfmon_event_map[event_id]; +} + +static const cache_map_t niagara1_cache_map = { +[C(L1D)] = { + [C(OP_READ)] = { + [C(RESULT_ACCESS)] = { CACHE_OP_UNSUPPORTED }, + [C(RESULT_MISS)] = { 0x03, PIC_LOWER, }, + }, + [C(OP_WRITE)] = { + [C(RESULT_ACCESS)] = { CACHE_OP_UNSUPPORTED }, + [C(RESULT_MISS)] = { 0x03, PIC_LOWER, }, + }, + [C(OP_PREFETCH)] = { + [C(RESULT_ACCESS)] = { CACHE_OP_UNSUPPORTED }, + [C(RESULT_MISS)] = { CACHE_OP_UNSUPPORTED }, + }, +}, +[C(L1I)] = { + [C(OP_READ)] = { + [C(RESULT_ACCESS)] = { 0x00, PIC_UPPER }, + [C(RESULT_MISS)] = { 0x02, PIC_LOWER, }, + }, + [ C(OP_WRITE) ] = { + [ C(RESULT_ACCESS) ] = { CACHE_OP_NONSENSE }, + [ C(RESULT_MISS) ] = { CACHE_OP_NONSENSE }, + }, + [ C(OP_PREFETCH) ] = { + [ C(RESULT_ACCESS) ] = { CACHE_OP_UNSUPPORTED }, + [ C(RESULT_MISS) ] = { CACHE_OP_UNSUPPORTED }, + }, +}, +[C(LL)] = { + [C(OP_READ)] = { + [C(RESULT_ACCESS)] = { CACHE_OP_UNSUPPORTED }, + [C(RESULT_MISS)] = { 0x07, PIC_LOWER, }, + }, + [C(OP_WRITE)] = { + [C(RESULT_ACCESS)] = { CACHE_OP_UNSUPPORTED }, + [C(RESULT_MISS)] = { 0x07, PIC_LOWER, }, + }, + [C(OP_PREFETCH)] = { + [C(RESULT_ACCESS)] = { CACHE_OP_UNSUPPORTED }, + [C(RESULT_MISS)] = { CACHE_OP_UNSUPPORTED }, + }, +}, +[C(DTLB)] = { + [C(OP_READ)] = { + [C(RESULT_ACCESS)] = { CACHE_OP_UNSUPPORTED }, + [C(RESULT_MISS)] = { 0x05, PIC_LOWER, }, + }, + [ C(OP_WRITE) ] = { + [ C(RESULT_ACCESS) ] = { CACHE_OP_UNSUPPORTED }, + [ C(RESULT_MISS) ] = { CACHE_OP_UNSUPPORTED }, + }, + [ C(OP_PREFETCH) ] = { + [ C(RESULT_ACCESS) ] = { CACHE_OP_UNSUPPORTED }, + [ C(RESULT_MISS) ] = { CACHE_OP_UNSUPPORTED }, + }, +}, +[C(ITLB)] = { + [C(OP_READ)] = { + [C(RESULT_ACCESS)] = { CACHE_OP_UNSUPPORTED }, + [C(RESULT_MISS)] = { 0x04, PIC_LOWER, }, + }, + [ C(OP_WRITE) ] = { + [ C(RESULT_ACCESS) ] = { CACHE_OP_UNSUPPORTED }, + [ C(RESULT_MISS) ] = { CACHE_OP_UNSUPPORTED }, + }, + [ C(OP_PREFETCH) ] = { + [ C(RESULT_ACCESS) ] = { CACHE_OP_UNSUPPORTED }, + [ C(RESULT_MISS) ] = { CACHE_OP_UNSUPPORTED }, + }, +}, +[C(BPU)] = { + [C(OP_READ)] = { + [C(RESULT_ACCESS)] = { CACHE_OP_UNSUPPORTED }, + [C(RESULT_MISS)] = { CACHE_OP_UNSUPPORTED }, + }, + [ C(OP_WRITE) ] = { + [ C(RESULT_ACCESS) ] = { CACHE_OP_UNSUPPORTED }, + [ C(RESULT_MISS) ] = { CACHE_OP_UNSUPPORTED }, + }, + [ C(OP_PREFETCH) ] = { + [ C(RESULT_ACCESS) ] = { CACHE_OP_UNSUPPORTED }, + [ C(RESULT_MISS) ] = { CACHE_OP_UNSUPPORTED }, + }, +}, +}; + +static const struct sparc_pmu niagara1_pmu = { + .event_map = niagara1_event_map, + .cache_map = &niagara1_cache_map, + .max_events = ARRAY_SIZE(niagara1_perfmon_event_map), + .upper_shift = 0, + .lower_shift = 4, + .event_mask = 0x7, + .upper_nop = 0x0, + .lower_nop = 0x0, +}; + static const struct perf_event_map niagara2_perfmon_event_map[] = { [PERF_COUNT_HW_CPU_CYCLES] = { 0x02ff, PIC_UPPER | PIC_LOWER }, [PERF_COUNT_HW_INSTRUCTIONS] = { 0x02ff, PIC_UPPER | PIC_LOWER }, @@ -753,6 +868,10 @@ static bool __init supported_pmu(void) sparc_pmu = &ultra3_pmu; return true; } + if (!strcmp(sparc_pmu_type, "niagara")) { + sparc_pmu = &niagara1_pmu; + return true; + } if (!strcmp(sparc_pmu_type, "niagara2")) { sparc_pmu = &niagara2_pmu; return true; -- cgit v1.2.3-59-g8ed1b From 01552f765cae873d0ea3cca1e64e41dfd62659e6 Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Sun, 27 Sep 2009 20:43:07 -0700 Subject: sparc64: Add initial perf event conflict resolution and checks. Cribbed from powerpc code, as usual. :-) Currently it is only used to validate that all counters have the same user/kernel/hv attributes. Signed-off-by: David S. Miller --- arch/sparc/kernel/perf_event.c | 82 +++++++++++++++++++++++++++++++++++++++--- 1 file changed, 77 insertions(+), 5 deletions(-) (limited to 'arch/sparc/kernel/perf_event.c') diff --git a/arch/sparc/kernel/perf_event.c b/arch/sparc/kernel/perf_event.c index 9541b456c3ee..919952498155 100644 --- a/arch/sparc/kernel/perf_event.c +++ b/arch/sparc/kernel/perf_event.c @@ -713,12 +713,66 @@ static void hw_perf_event_destroy(struct perf_event *event) perf_event_release_pmc(); } +static int check_excludes(struct perf_event **evts, int n_prev, int n_new) +{ + int eu = 0, ek = 0, eh = 0; + struct perf_event *event; + int i, n, first; + + n = n_prev + n_new; + if (n <= 1) + return 0; + + first = 1; + for (i = 0; i < n; i++) { + event = evts[i]; + if (first) { + eu = event->attr.exclude_user; + ek = event->attr.exclude_kernel; + eh = event->attr.exclude_hv; + first = 0; + } else if (event->attr.exclude_user != eu || + event->attr.exclude_kernel != ek || + event->attr.exclude_hv != eh) { + return -EAGAIN; + } + } + + return 0; +} + +static int collect_events(struct perf_event *group, int max_count, + struct perf_event *evts[], u64 *events) +{ + struct perf_event *event; + int n = 0; + + if (!is_software_event(group)) { + if (n >= max_count) + return -1; + evts[n] = group; + events[n++] = group->hw.config; + } + list_for_each_entry(event, &group->sibling_list, group_entry) { + if (!is_software_event(event) && + event->state != PERF_EVENT_STATE_OFF) { + if (n >= max_count) + return -1; + evts[n] = event; + events[n++] = event->hw.config; + } + } + return n; +} + static int __hw_perf_event_init(struct perf_event *event) { struct perf_event_attr *attr = &event->attr; + struct perf_event *evts[MAX_HWEVENTS]; struct hw_perf_event *hwc = &event->hw; const struct perf_event_map *pmap; - u64 enc; + u64 enc, events[MAX_HWEVENTS]; + int n; if (atomic_read(&nmi_active) < 0) return -ENODEV; @@ -734,9 +788,6 @@ static int __hw_perf_event_init(struct perf_event *event) } else return -EOPNOTSUPP; - perf_event_grab_pmc(); - event->destroy = hw_perf_event_destroy; - /* We save the enable bits in the config_base. So to * turn off sampling just write 'config', and to enable * things write 'config | config_base'. @@ -749,13 +800,34 @@ static int __hw_perf_event_init(struct perf_event *event) if (!attr->exclude_hv) hwc->config_base |= sparc_pmu->hv_bit; + enc = pmap->encoding; + + n = 0; + if (event->group_leader != event) { + n = collect_events(event->group_leader, + perf_max_events - 1, + evts, events); + if (n < 0) + return -EINVAL; + } + events[n] = enc; + evts[n] = event; + + if (check_excludes(evts, n, 1)) + return -EINVAL; + + /* Try to do all error checking before this point, as unwinding + * state after grabbing the PMC is difficult. + */ + perf_event_grab_pmc(); + event->destroy = hw_perf_event_destroy; + if (!hwc->sample_period) { hwc->sample_period = MAX_PERIOD; hwc->last_period = hwc->sample_period; atomic64_set(&hwc->period_left, hwc->sample_period); } - enc = pmap->encoding; if (pmap->pic_mask & PIC_UPPER) { hwc->idx = PIC_UPPER_INDEX; enc <<= sparc_pmu->upper_shift; -- cgit v1.2.3-59-g8ed1b From a72a8a5f2ea32074e98803d4b15d0e093c5a9e4d Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Mon, 28 Sep 2009 17:35:20 -0700 Subject: sparc64: Add a basic conflict engine in preparation for multi-counter support. The hardware counter ->event_base state records and encoding of the "struct perf_event_map" entry used for the event. We use this to make sure that when we have more than 1 event, both can be scheduled into the hardware at the same time. As usual, structure of code is largely cribbed from powerpc. Signed-off-by: David S. Miller --- arch/sparc/kernel/perf_event.c | 69 +++++++++++++++++++++++++++++++++++++++--- 1 file changed, 64 insertions(+), 5 deletions(-) (limited to 'arch/sparc/kernel/perf_event.c') diff --git a/arch/sparc/kernel/perf_event.c b/arch/sparc/kernel/perf_event.c index 919952498155..2b7743466ae4 100644 --- a/arch/sparc/kernel/perf_event.c +++ b/arch/sparc/kernel/perf_event.c @@ -68,6 +68,17 @@ struct perf_event_map { #define PIC_LOWER 0x02 }; +static unsigned long perf_event_encode(const struct perf_event_map *pmap) +{ + return ((unsigned long) pmap->encoding << 16) | pmap->pic_mask; +} + +static void perf_event_decode(unsigned long val, u16 *enc, u8 *msk) +{ + *msk = val & 0xff; + *enc = val >> 16; +} + #define C(x) PERF_COUNT_HW_CACHE_##x #define CACHE_OP_UNSUPPORTED 0xfffe @@ -713,6 +724,48 @@ static void hw_perf_event_destroy(struct perf_event *event) perf_event_release_pmc(); } +/* Make sure all events can be scheduled into the hardware at + * the same time. This is simplified by the fact that we only + * need to support 2 simultaneous HW events. + */ +static int sparc_check_constraints(unsigned long *events, int n_ev) +{ + if (n_ev <= perf_max_events) { + u8 msk1, msk2; + u16 dummy; + + if (n_ev == 1) + return 0; + BUG_ON(n_ev != 2); + perf_event_decode(events[0], &dummy, &msk1); + perf_event_decode(events[1], &dummy, &msk2); + + /* If both events can go on any counter, OK. */ + if (msk1 == (PIC_UPPER | PIC_LOWER) && + msk2 == (PIC_UPPER | PIC_LOWER)) + return 0; + + /* If one event is limited to a specific counter, + * and the other can go on both, OK. + */ + if ((msk1 == PIC_UPPER || msk1 == PIC_LOWER) && + msk2 == (PIC_UPPER | PIC_LOWER)) + return 0; + if ((msk2 == PIC_UPPER || msk2 == PIC_LOWER) && + msk1 == (PIC_UPPER | PIC_LOWER)) + return 0; + + /* If the events are fixed to different counters, OK. */ + if ((msk1 == PIC_UPPER && msk2 == PIC_LOWER) || + (msk1 == PIC_LOWER && msk2 == PIC_UPPER)) + return 0; + + /* Otherwise, there is a conflict. */ + } + + return -1; +} + static int check_excludes(struct perf_event **evts, int n_prev, int n_new) { int eu = 0, ek = 0, eh = 0; @@ -742,7 +795,7 @@ static int check_excludes(struct perf_event **evts, int n_prev, int n_new) } static int collect_events(struct perf_event *group, int max_count, - struct perf_event *evts[], u64 *events) + struct perf_event *evts[], unsigned long *events) { struct perf_event *event; int n = 0; @@ -751,7 +804,7 @@ static int collect_events(struct perf_event *group, int max_count, if (n >= max_count) return -1; evts[n] = group; - events[n++] = group->hw.config; + events[n++] = group->hw.event_base; } list_for_each_entry(event, &group->sibling_list, group_entry) { if (!is_software_event(event) && @@ -759,7 +812,7 @@ static int collect_events(struct perf_event *group, int max_count, if (n >= max_count) return -1; evts[n] = event; - events[n++] = event->hw.config; + events[n++] = event->hw.event_base; } } return n; @@ -770,8 +823,9 @@ static int __hw_perf_event_init(struct perf_event *event) struct perf_event_attr *attr = &event->attr; struct perf_event *evts[MAX_HWEVENTS]; struct hw_perf_event *hwc = &event->hw; + unsigned long events[MAX_HWEVENTS]; const struct perf_event_map *pmap; - u64 enc, events[MAX_HWEVENTS]; + u64 enc; int n; if (atomic_read(&nmi_active) < 0) @@ -800,6 +854,8 @@ static int __hw_perf_event_init(struct perf_event *event) if (!attr->exclude_hv) hwc->config_base |= sparc_pmu->hv_bit; + hwc->event_base = perf_event_encode(pmap); + enc = pmap->encoding; n = 0; @@ -810,12 +866,15 @@ static int __hw_perf_event_init(struct perf_event *event) if (n < 0) return -EINVAL; } - events[n] = enc; + events[n] = hwc->event_base; evts[n] = event; if (check_excludes(evts, n, 1)) return -EINVAL; + if (sparc_check_constraints(events, n + 1)) + return -EINVAL; + /* Try to do all error checking before this point, as unwinding * state after grabbing the PMC is difficult. */ -- cgit v1.2.3-59-g8ed1b From d29862f03575cdfa8819f78b0f3f78eec3b44629 Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Mon, 28 Sep 2009 17:37:12 -0700 Subject: sparc64: Minor coding style fixups in perf code. These got introduced during the counter --> event tree-wide renaming. Signed-off-by: David S. Miller --- arch/sparc/kernel/perf_event.c | 12 +++++------- 1 file changed, 5 insertions(+), 7 deletions(-) (limited to 'arch/sparc/kernel/perf_event.c') diff --git a/arch/sparc/kernel/perf_event.c b/arch/sparc/kernel/perf_event.c index 2b7743466ae4..03b041c4d95c 100644 --- a/arch/sparc/kernel/perf_event.c +++ b/arch/sparc/kernel/perf_event.c @@ -464,8 +464,7 @@ static u64 nop_for_index(int idx) sparc_pmu->lower_nop, idx); } -static inline void sparc_pmu_enable_event(struct hw_perf_event *hwc, - int idx) +static inline void sparc_pmu_enable_event(struct hw_perf_event *hwc, int idx) { u64 val, mask = mask_for_index(idx); @@ -473,8 +472,7 @@ static inline void sparc_pmu_enable_event(struct hw_perf_event *hwc, pcr_ops->write((val & ~mask) | hwc->config); } -static inline void sparc_pmu_disable_event(struct hw_perf_event *hwc, - int idx) +static inline void sparc_pmu_disable_event(struct hw_perf_event *hwc, int idx) { u64 mask = mask_for_index(idx); u64 nop = nop_for_index(idx); @@ -555,7 +553,7 @@ static void write_pmc(int idx, u64 val) } static int sparc_perf_event_set_period(struct perf_event *event, - struct hw_perf_event *hwc, int idx) + struct hw_perf_event *hwc, int idx) { s64 left = atomic64_read(&hwc->period_left); s64 period = hwc->sample_period; @@ -607,7 +605,7 @@ static int sparc_pmu_enable(struct perf_event *event) } static u64 sparc_perf_event_update(struct perf_event *event, - struct hw_perf_event *hwc, int idx) + struct hw_perf_event *hwc, int idx) { int shift = 64 - 32; u64 prev_raw_count, new_raw_count; @@ -939,7 +937,7 @@ void perf_event_print_debug(void) } static int __kprobes perf_event_nmi_handler(struct notifier_block *self, - unsigned long cmd, void *__args) + unsigned long cmd, void *__args) { struct die_args *args = __args; struct perf_sample_data data; -- cgit v1.2.3-59-g8ed1b From 6e804251d119bbd5522d76bdb0f48f5c9a7abf51 Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Tue, 29 Sep 2009 15:10:23 -0700 Subject: sparc64: Fix comment typo in perf_event.c Signed-off-by: David S. Miller --- arch/sparc/kernel/perf_event.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch/sparc/kernel/perf_event.c') diff --git a/arch/sparc/kernel/perf_event.c b/arch/sparc/kernel/perf_event.c index 03b041c4d95c..32fc974bf8b5 100644 --- a/arch/sparc/kernel/perf_event.c +++ b/arch/sparc/kernel/perf_event.c @@ -214,7 +214,7 @@ static const struct sparc_pmu ultra3_pmu = { /* Niagara1 is very limited. The upper PIC is hard-locked to count * only instructions, so it is free running which creates all kinds of - * problems. Some hardware designs make one wonder if the creastor + * problems. Some hardware designs make one wonder if the creator * even looked at how this stuff gets used by software. */ static const struct perf_event_map niagara1_perfmon_event_map[] = { -- cgit v1.2.3-59-g8ed1b From d17513889a8b754c5872b6b46e6f7822338a0b79 Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Tue, 29 Sep 2009 21:27:06 -0700 Subject: sparc64: Cache per-cpu %pcr register value in perf code. Signed-off-by: David S. Miller --- arch/sparc/kernel/perf_event.c | 59 ++++++++++++++++++++++++++++++------------ 1 file changed, 42 insertions(+), 17 deletions(-) (limited to 'arch/sparc/kernel/perf_event.c') diff --git a/arch/sparc/kernel/perf_event.c b/arch/sparc/kernel/perf_event.c index 32fc974bf8b5..04db92743896 100644 --- a/arch/sparc/kernel/perf_event.c +++ b/arch/sparc/kernel/perf_event.c @@ -56,7 +56,8 @@ struct cpu_hw_events { struct perf_event *events[MAX_HWEVENTS]; unsigned long used_mask[BITS_TO_LONGS(MAX_HWEVENTS)]; unsigned long active_mask[BITS_TO_LONGS(MAX_HWEVENTS)]; - int enabled; + u64 pcr; + int enabled; }; DEFINE_PER_CPU(struct cpu_hw_events, cpu_hw_events) = { .enabled = 1, }; @@ -464,21 +465,30 @@ static u64 nop_for_index(int idx) sparc_pmu->lower_nop, idx); } -static inline void sparc_pmu_enable_event(struct hw_perf_event *hwc, int idx) +static inline void sparc_pmu_enable_event(struct cpu_hw_events *cpuc, struct hw_perf_event *hwc, int idx) { u64 val, mask = mask_for_index(idx); - val = pcr_ops->read(); - pcr_ops->write((val & ~mask) | hwc->config); + val = cpuc->pcr; + val &= ~mask; + val |= hwc->config; + cpuc->pcr = val; + + pcr_ops->write(cpuc->pcr); } -static inline void sparc_pmu_disable_event(struct hw_perf_event *hwc, int idx) +static inline void sparc_pmu_disable_event(struct cpu_hw_events *cpuc, struct hw_perf_event *hwc, int idx) { u64 mask = mask_for_index(idx); u64 nop = nop_for_index(idx); - u64 val = pcr_ops->read(); + u64 val; - pcr_ops->write((val & ~mask) | nop); + val = cpuc->pcr; + val &= ~mask; + val |= nop; + cpuc->pcr = val; + + pcr_ops->write(cpuc->pcr); } void hw_perf_enable(void) @@ -493,7 +503,7 @@ void hw_perf_enable(void) cpuc->enabled = 1; barrier(); - val = pcr_ops->read(); + val = cpuc->pcr; for (i = 0; i < MAX_HWEVENTS; i++) { struct perf_event *cp = cpuc->events[i]; @@ -505,7 +515,9 @@ void hw_perf_enable(void) val |= hwc->config_base; } - pcr_ops->write(val); + cpuc->pcr = val; + + pcr_ops->write(cpuc->pcr); } void hw_perf_disable(void) @@ -518,10 +530,12 @@ void hw_perf_disable(void) cpuc->enabled = 0; - val = pcr_ops->read(); + val = cpuc->pcr; val &= ~(PCR_UTRACE | PCR_STRACE | sparc_pmu->hv_bit | sparc_pmu->irq_bit); - pcr_ops->write(val); + cpuc->pcr = val; + + pcr_ops->write(cpuc->pcr); } static u32 read_pmc(int idx) @@ -593,13 +607,13 @@ static int sparc_pmu_enable(struct perf_event *event) if (test_and_set_bit(idx, cpuc->used_mask)) return -EAGAIN; - sparc_pmu_disable_event(hwc, idx); + sparc_pmu_disable_event(cpuc, hwc, idx); cpuc->events[idx] = event; set_bit(idx, cpuc->active_mask); sparc_perf_event_set_period(event, hwc, idx); - sparc_pmu_enable_event(hwc, idx); + sparc_pmu_enable_event(cpuc, hwc, idx); perf_event_update_userpage(event); return 0; } @@ -635,7 +649,7 @@ static void sparc_pmu_disable(struct perf_event *event) int idx = hwc->idx; clear_bit(idx, cpuc->active_mask); - sparc_pmu_disable_event(hwc, idx); + sparc_pmu_disable_event(cpuc, hwc, idx); barrier(); @@ -649,18 +663,29 @@ static void sparc_pmu_disable(struct perf_event *event) static void sparc_pmu_read(struct perf_event *event) { struct hw_perf_event *hwc = &event->hw; + sparc_perf_event_update(event, hwc, hwc->idx); } static void sparc_pmu_unthrottle(struct perf_event *event) { + struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); struct hw_perf_event *hwc = &event->hw; - sparc_pmu_enable_event(hwc, hwc->idx); + + sparc_pmu_enable_event(cpuc, hwc, hwc->idx); } static atomic_t active_events = ATOMIC_INIT(0); static DEFINE_MUTEX(pmc_grab_mutex); +static void perf_stop_nmi_watchdog(void *unused) +{ + struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); + + stop_nmi_watchdog(NULL); + cpuc->pcr = pcr_ops->read(); +} + void perf_event_grab_pmc(void) { if (atomic_inc_not_zero(&active_events)) @@ -669,7 +694,7 @@ void perf_event_grab_pmc(void) mutex_lock(&pmc_grab_mutex); if (atomic_read(&active_events) == 0) { if (atomic_read(&nmi_active) > 0) { - on_each_cpu(stop_nmi_watchdog, NULL, 1); + on_each_cpu(perf_stop_nmi_watchdog, NULL, 1); BUG_ON(atomic_read(&nmi_active) != 0); } atomic_inc(&active_events); @@ -978,7 +1003,7 @@ static int __kprobes perf_event_nmi_handler(struct notifier_block *self, continue; if (perf_event_overflow(event, 1, &data, regs)) - sparc_pmu_disable_event(hwc, idx); + sparc_pmu_disable_event(cpuc, hwc, idx); } return NOTIFY_STOP; -- cgit v1.2.3-59-g8ed1b From de23cf3c42618998a7165364f987267ac9b298f0 Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Fri, 9 Oct 2009 00:42:40 -0700 Subject: sparc64: Fix niagara2 perf IRQ bits. Signed-off-by: David S. Miller --- arch/sparc/kernel/perf_event.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch/sparc/kernel/perf_event.c') diff --git a/arch/sparc/kernel/perf_event.c b/arch/sparc/kernel/perf_event.c index 04db92743896..fa5936e1c3b9 100644 --- a/arch/sparc/kernel/perf_event.c +++ b/arch/sparc/kernel/perf_event.c @@ -437,7 +437,7 @@ static const struct sparc_pmu niagara2_pmu = { .lower_shift = 6, .event_mask = 0xfff, .hv_bit = 0x8, - .irq_bit = 0x03, + .irq_bit = 0x30, .upper_nop = 0x220, .lower_nop = 0x220, }; -- cgit v1.2.3-59-g8ed1b