From 0bab20ba4c95f56355c24a0b9f03eb486c2a267d Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Thu, 16 Aug 2012 21:16:22 -0700 Subject: sparc64: Add 'reg_num' argument to pcr_ops methods. SPARC-T4 and later have multiple PCR registers, one for each PIC counter. Signed-off-by: David S. Miller --- arch/sparc/kernel/perf_event.c | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) (limited to 'arch/sparc/kernel/perf_event.c') diff --git a/arch/sparc/kernel/perf_event.c b/arch/sparc/kernel/perf_event.c index 5713957dcb8a..e1c9848c39cb 100644 --- a/arch/sparc/kernel/perf_event.c +++ b/arch/sparc/kernel/perf_event.c @@ -564,7 +564,7 @@ static inline void sparc_pmu_enable_event(struct cpu_hw_events *cpuc, struct hw_ val |= hwc->config; cpuc->pcr = val; - pcr_ops->write(cpuc->pcr); + pcr_ops->write(0, cpuc->pcr); } static inline void sparc_pmu_disable_event(struct cpu_hw_events *cpuc, struct hw_perf_event *hwc, int idx) @@ -578,7 +578,7 @@ static inline void sparc_pmu_disable_event(struct cpu_hw_events *cpuc, struct hw val |= nop; cpuc->pcr = val; - pcr_ops->write(cpuc->pcr); + pcr_ops->write(0, cpuc->pcr); } static u32 read_pmc(int idx) @@ -736,7 +736,7 @@ static void sparc_pmu_enable(struct pmu *pmu) cpuc->pcr = pcr | cpuc->event[0]->hw.config_base; } - pcr_ops->write(cpuc->pcr); + pcr_ops->write(0, cpuc->pcr); } static void sparc_pmu_disable(struct pmu *pmu) @@ -755,7 +755,7 @@ static void sparc_pmu_disable(struct pmu *pmu) sparc_pmu->hv_bit | sparc_pmu->irq_bit); cpuc->pcr = val; - pcr_ops->write(cpuc->pcr); + pcr_ops->write(0, cpuc->pcr); } static int active_event_index(struct cpu_hw_events *cpuc, @@ -856,7 +856,7 @@ static void perf_stop_nmi_watchdog(void *unused) struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); stop_nmi_watchdog(NULL); - cpuc->pcr = pcr_ops->read(); + cpuc->pcr = pcr_ops->read(0); } void perf_event_grab_pmc(void) @@ -1264,7 +1264,7 @@ void perf_event_print_debug(void) cpu = smp_processor_id(); - pcr = pcr_ops->read(); + pcr = pcr_ops->read(0); read_pic(pic); pr_info("\n"); @@ -1306,7 +1306,7 @@ static int __kprobes perf_event_nmi_handler(struct notifier_block *self, * overflow so we don't lose any events. */ if (sparc_pmu->irq_bit) - pcr_ops->write(cpuc->pcr); + pcr_ops->write(0, cpuc->pcr); for (i = 0; i < cpuc->n_events; i++) { struct perf_event *event = cpuc->event[i]; -- cgit v1.2.3-59-g8ed1b From 09d053c797f4a559af0647e4283b9b9ec0682d10 Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Thu, 16 Aug 2012 23:19:32 -0700 Subject: sparc64: Abstract away PIC register accesses. And, like for the PCR, allow indexing of different PIC register numbers. This also removes all of the non-__KERNEL__ bits from asm/perfctr.h, nothing kernel side should include it any more. Signed-off-by: David S. Miller --- arch/sparc/include/asm/pcr.h | 6 ++++-- arch/sparc/include/asm/perfctr.h | 30 --------------------------- arch/sparc/kernel/nmi.c | 21 +++++++++---------- arch/sparc/kernel/pcr.c | 45 ++++++++++++++++++++++++++++++++-------- arch/sparc/kernel/perf_event.c | 23 ++++++++++---------- 5 files changed, 61 insertions(+), 64 deletions(-) (limited to 'arch/sparc/kernel/perf_event.c') diff --git a/arch/sparc/include/asm/pcr.h b/arch/sparc/include/asm/pcr.h index 55e23416eefc..d53abf75198f 100644 --- a/arch/sparc/include/asm/pcr.h +++ b/arch/sparc/include/asm/pcr.h @@ -2,8 +2,10 @@ #define __PCR_H struct pcr_ops { - u64 (*read)(unsigned long); - void (*write)(unsigned long, u64); + u64 (*read_pcr)(unsigned long); + void (*write_pcr)(unsigned long, u64); + u64 (*read_pic)(unsigned long); + void (*write_pic)(unsigned long, u64); }; extern const struct pcr_ops *pcr_ops; diff --git a/arch/sparc/include/asm/perfctr.h b/arch/sparc/include/asm/perfctr.h index 3332d2cba6c1..214feefa577c 100644 --- a/arch/sparc/include/asm/perfctr.h +++ b/arch/sparc/include/asm/perfctr.h @@ -54,11 +54,6 @@ enum perfctr_opcode { PERFCTR_GETPCR }; -/* I don't want the kernel's namespace to be polluted with this - * stuff when this file is included. --DaveM - */ -#ifndef __KERNEL__ - #define PRIV 0x00000001 #define SYS 0x00000002 #define USR 0x00000004 @@ -168,29 +163,4 @@ struct vcounter_struct { unsigned long long vcnt1; }; -#else /* !(__KERNEL__) */ - -#ifndef CONFIG_SPARC32 - -/* Performance counter register access. */ -#define read_pcr(__p) __asm__ __volatile__("rd %%pcr, %0" : "=r" (__p)) -#define write_pcr(__p) __asm__ __volatile__("wr %0, 0x0, %%pcr" : : "r" (__p)) -#define read_pic(__p) __asm__ __volatile__("rd %%pic, %0" : "=r" (__p)) - -/* Blackbird errata workaround. See commentary in - * arch/sparc64/kernel/smp.c:smp_percpu_timer_interrupt() - * for more information. - */ -#define write_pic(__p) \ - __asm__ __volatile__("ba,pt %%xcc, 99f\n\t" \ - " nop\n\t" \ - ".align 64\n" \ - "99:wr %0, 0x0, %%pic\n\t" \ - "rd %%pic, %%g0" : : "r" (__p)) -#define reset_pic() write_pic(0) - -#endif /* !CONFIG_SPARC32 */ - -#endif /* !(__KERNEL__) */ - #endif /* !(PERF_COUNTER_API) */ diff --git a/arch/sparc/kernel/nmi.c b/arch/sparc/kernel/nmi.c index 95df720a14a9..ef22b9bacf1d 100644 --- a/arch/sparc/kernel/nmi.c +++ b/arch/sparc/kernel/nmi.c @@ -22,7 +22,6 @@ #include #include #include -#include #include "kstack.h" @@ -109,7 +108,7 @@ notrace __kprobes void perfctr_irq(int irq, struct pt_regs *regs) pt_regs_trap_type(regs), SIGINT) == NOTIFY_STOP) touched = 1; else - pcr_ops->write(0, PCR_PIC_PRIV); + pcr_ops->write_pcr(0, PCR_PIC_PRIV); sum = local_cpu_data().irq0_irqs; if (__get_cpu_var(nmi_touch)) { @@ -126,8 +125,8 @@ notrace __kprobes void perfctr_irq(int irq, struct pt_regs *regs) __this_cpu_write(alert_counter, 0); } if (__get_cpu_var(wd_enabled)) { - write_pic(picl_value(nmi_hz)); - pcr_ops->write(0, pcr_enable); + pcr_ops->write_pic(0, picl_value(nmi_hz)); + pcr_ops->write_pcr(0, pcr_enable); } restore_hardirq_stack(orig_sp); @@ -166,7 +165,7 @@ static void report_broken_nmi(int cpu, int *prev_nmi_count) void stop_nmi_watchdog(void *unused) { - pcr_ops->write(0, PCR_PIC_PRIV); + pcr_ops->write_pcr(0, PCR_PIC_PRIV); __get_cpu_var(wd_enabled) = 0; atomic_dec(&nmi_active); } @@ -223,10 +222,10 @@ void start_nmi_watchdog(void *unused) __get_cpu_var(wd_enabled) = 1; atomic_inc(&nmi_active); - pcr_ops->write(0, PCR_PIC_PRIV); - write_pic(picl_value(nmi_hz)); + pcr_ops->write_pcr(0, PCR_PIC_PRIV); + pcr_ops->write_pic(0, picl_value(nmi_hz)); - pcr_ops->write(0, pcr_enable); + pcr_ops->write_pcr(0, pcr_enable); } static void nmi_adjust_hz_one(void *unused) @@ -234,10 +233,10 @@ static void nmi_adjust_hz_one(void *unused) if (!__get_cpu_var(wd_enabled)) return; - pcr_ops->write(0, PCR_PIC_PRIV); - write_pic(picl_value(nmi_hz)); + pcr_ops->write_pcr(0, PCR_PIC_PRIV); + pcr_ops->write_pic(0, picl_value(nmi_hz)); - pcr_ops->write(0, pcr_enable); + pcr_ops->write_pcr(0, pcr_enable); } void nmi_adjust_hz(unsigned int new_hz) diff --git a/arch/sparc/kernel/pcr.c b/arch/sparc/kernel/pcr.c index 3d9ab5be23d8..e82ae89666f0 100644 --- a/arch/sparc/kernel/pcr.c +++ b/arch/sparc/kernel/pcr.c @@ -14,7 +14,6 @@ #include #include #include -#include /* This code is shared between various users of the performance * counters. Users will be oprofile, pseudo-NMI watchdog, and the @@ -65,19 +64,45 @@ static u64 direct_pcr_read(unsigned long reg_num) u64 val; WARN_ON_ONCE(reg_num != 0); - read_pcr(val); + __asm__ __volatile__("rd %%pcr, %0" : "=r" (val)); return val; } static void direct_pcr_write(unsigned long reg_num, u64 val) { WARN_ON_ONCE(reg_num != 0); - write_pcr(val); + __asm__ __volatile__("wr %0, 0x0, %%pcr" : : "r" (val)); +} + +static u64 direct_pic_read(unsigned long reg_num) +{ + u64 val; + + WARN_ON_ONCE(reg_num != 0); + __asm__ __volatile__("rd %%pic, %0" : "=r" (val)); + return val; +} + +static void direct_pic_write(unsigned long reg_num, u64 val) +{ + WARN_ON_ONCE(reg_num != 0); + + /* Blackbird errata workaround. See commentary in + * arch/sparc64/kernel/smp.c:smp_percpu_timer_interrupt() + * for more information. + */ + __asm__ __volatile__("ba,pt %%xcc, 99f\n\t" + " nop\n\t" + ".align 64\n" + "99:wr %0, 0x0, %%pic\n\t" + "rd %%pic, %%g0" : : "r" (val)); } static const struct pcr_ops direct_pcr_ops = { - .read = direct_pcr_read, - .write = direct_pcr_write, + .read_pcr = direct_pcr_read, + .write_pcr = direct_pcr_write, + .read_pic = direct_pic_read, + .write_pic = direct_pic_write, }; static void n2_pcr_write(unsigned long reg_num, u64 val) @@ -88,14 +113,16 @@ static void n2_pcr_write(unsigned long reg_num, u64 val) if (val & PCR_N2_HTRACE) { ret = sun4v_niagara2_setperf(HV_N2_PERF_SPARC_CTL, val); if (ret != HV_EOK) - write_pcr(val); + direct_pcr_write(reg_num, val); } else - write_pcr(val); + direct_pcr_write(reg_num, val); } static const struct pcr_ops n2_pcr_ops = { - .read = direct_pcr_read, - .write = n2_pcr_write, + .read_pcr = direct_pcr_read, + .write_pcr = n2_pcr_write, + .read_pic = direct_pic_read, + .write_pic = direct_pic_write, }; static unsigned long perf_hsvc_group; diff --git a/arch/sparc/kernel/perf_event.c b/arch/sparc/kernel/perf_event.c index e1c9848c39cb..dd12aa35805d 100644 --- a/arch/sparc/kernel/perf_event.c +++ b/arch/sparc/kernel/perf_event.c @@ -25,7 +25,6 @@ #include #include #include -#include #include #include "kernel.h" @@ -564,7 +563,7 @@ static inline void sparc_pmu_enable_event(struct cpu_hw_events *cpuc, struct hw_ val |= hwc->config; cpuc->pcr = val; - pcr_ops->write(0, cpuc->pcr); + pcr_ops->write_pcr(0, cpuc->pcr); } static inline void sparc_pmu_disable_event(struct cpu_hw_events *cpuc, struct hw_perf_event *hwc, int idx) @@ -578,14 +577,14 @@ static inline void sparc_pmu_disable_event(struct cpu_hw_events *cpuc, struct hw val |= nop; cpuc->pcr = val; - pcr_ops->write(0, cpuc->pcr); + pcr_ops->write_pcr(0, cpuc->pcr); } static u32 read_pmc(int idx) { u64 val; - read_pic(val); + val = pcr_ops->read_pic(0); if (idx == PIC_UPPER_INDEX) val >>= 32; @@ -603,10 +602,10 @@ static void write_pmc(int idx, u64 val) mask = ((u64) 0xffffffff) << shift; val <<= shift; - read_pic(pic); + pic = pcr_ops->read_pic(0); pic &= ~mask; pic |= val; - write_pic(pic); + pcr_ops->write_pic(0, pic); } static u64 sparc_perf_event_update(struct perf_event *event, @@ -736,7 +735,7 @@ static void sparc_pmu_enable(struct pmu *pmu) cpuc->pcr = pcr | cpuc->event[0]->hw.config_base; } - pcr_ops->write(0, cpuc->pcr); + pcr_ops->write_pcr(0, cpuc->pcr); } static void sparc_pmu_disable(struct pmu *pmu) @@ -755,7 +754,7 @@ static void sparc_pmu_disable(struct pmu *pmu) sparc_pmu->hv_bit | sparc_pmu->irq_bit); cpuc->pcr = val; - pcr_ops->write(0, cpuc->pcr); + pcr_ops->write_pcr(0, cpuc->pcr); } static int active_event_index(struct cpu_hw_events *cpuc, @@ -856,7 +855,7 @@ static void perf_stop_nmi_watchdog(void *unused) struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); stop_nmi_watchdog(NULL); - cpuc->pcr = pcr_ops->read(0); + cpuc->pcr = pcr_ops->read_pcr(0); } void perf_event_grab_pmc(void) @@ -1264,8 +1263,8 @@ void perf_event_print_debug(void) cpu = smp_processor_id(); - pcr = pcr_ops->read(0); - read_pic(pic); + pcr = pcr_ops->read_pcr(0); + pic = pcr_ops->read_pic(0); pr_info("\n"); pr_info("CPU#%d: PCR[%016llx] PIC[%016llx]\n", @@ -1306,7 +1305,7 @@ static int __kprobes perf_event_nmi_handler(struct notifier_block *self, * overflow so we don't lose any events. */ if (sparc_pmu->irq_bit) - pcr_ops->write(0, cpuc->pcr); + pcr_ops->write_pcr(0, cpuc->pcr); for (i = 0; i < cpuc->n_events; i++) { struct perf_event *event = cpuc->event[i]; -- cgit v1.2.3-59-g8ed1b From b38e99f5bdf62f37d7552311fef1bff00bec6308 Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Fri, 17 Aug 2012 02:31:10 -0700 Subject: sparc64: Add perf_event abstractions for orthogonal PMUs. Starting with SPARC-T4 we have a seperate PCR control register for each performance counter, and there are absolutely no restrictions on what events can run on which counters. Add flags that we can use to elide the conflict and dependency logic used to handle older chips. Signed-off-by: David S. Miller --- arch/sparc/kernel/perf_event.c | 20 ++++++++++++++++++++ 1 file changed, 20 insertions(+) (limited to 'arch/sparc/kernel/perf_event.c') diff --git a/arch/sparc/kernel/perf_event.c b/arch/sparc/kernel/perf_event.c index dd12aa35805d..11b424bb0b2b 100644 --- a/arch/sparc/kernel/perf_event.c +++ b/arch/sparc/kernel/perf_event.c @@ -148,6 +148,9 @@ struct sparc_pmu { int irq_bit; int upper_nop; int lower_nop; + unsigned int flags; +#define SPARC_PMU_ALL_EXCLUDES_SAME 0x00000001 +#define SPARC_PMU_HAS_CONFLICTS 0x00000002 }; static const struct perf_event_map ultra3_perfmon_event_map[] = { @@ -272,6 +275,8 @@ static const struct sparc_pmu ultra3_pmu = { .event_mask = 0x3f, .upper_nop = 0x1c, .lower_nop = 0x14, + .flags = (SPARC_PMU_ALL_EXCLUDES_SAME | + SPARC_PMU_HAS_CONFLICTS), }; /* Niagara1 is very limited. The upper PIC is hard-locked to count @@ -401,6 +406,8 @@ static const struct sparc_pmu niagara1_pmu = { .event_mask = 0x7, .upper_nop = 0x0, .lower_nop = 0x0, + .flags = (SPARC_PMU_ALL_EXCLUDES_SAME | + SPARC_PMU_HAS_CONFLICTS), }; static const struct perf_event_map niagara2_perfmon_event_map[] = { @@ -529,6 +536,8 @@ static const struct sparc_pmu niagara2_pmu = { .irq_bit = 0x30, .upper_nop = 0x220, .lower_nop = 0x220, + .flags = (SPARC_PMU_ALL_EXCLUDES_SAME | + SPARC_PMU_HAS_CONFLICTS), }; static const struct sparc_pmu *sparc_pmu __read_mostly; @@ -944,6 +953,14 @@ static int sparc_check_constraints(struct perf_event **evts, if (n_ev > MAX_HWEVENTS) return -1; + if (!(sparc_pmu->flags & SPARC_PMU_HAS_CONFLICTS)) { + int i; + + for (i = 0; i < n_ev; i++) + evts[i]->hw.idx = i; + return 0; + } + msk0 = perf_event_get_msk(events[0]); if (n_ev == 1) { if (msk0 & PIC_LOWER) @@ -999,6 +1016,9 @@ static int check_excludes(struct perf_event **evts, int n_prev, int n_new) struct perf_event *event; int i, n, first; + if (!(sparc_pmu->flags & SPARC_PMU_ALL_EXCLUDES_SAME)) + return 0; + n = n_prev + n_new; if (n <= 1) return 0; -- cgit v1.2.3-59-g8ed1b From 59660495e80e7eabc726c301ddc46afd2ce1bcac Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Fri, 17 Aug 2012 02:33:44 -0700 Subject: sparc64: Allow max hw perf events to be variable. Now specified in sparc_pmu descriptor. Signed-off-by: David S. Miller --- arch/sparc/kernel/perf_event.c | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) (limited to 'arch/sparc/kernel/perf_event.c') diff --git a/arch/sparc/kernel/perf_event.c b/arch/sparc/kernel/perf_event.c index 11b424bb0b2b..f7b9ae39c264 100644 --- a/arch/sparc/kernel/perf_event.c +++ b/arch/sparc/kernel/perf_event.c @@ -151,6 +151,7 @@ struct sparc_pmu { unsigned int flags; #define SPARC_PMU_ALL_EXCLUDES_SAME 0x00000001 #define SPARC_PMU_HAS_CONFLICTS 0x00000002 + int max_hw_events; }; static const struct perf_event_map ultra3_perfmon_event_map[] = { @@ -277,6 +278,7 @@ static const struct sparc_pmu ultra3_pmu = { .lower_nop = 0x14, .flags = (SPARC_PMU_ALL_EXCLUDES_SAME | SPARC_PMU_HAS_CONFLICTS), + .max_hw_events = 2, }; /* Niagara1 is very limited. The upper PIC is hard-locked to count @@ -408,6 +410,7 @@ static const struct sparc_pmu niagara1_pmu = { .lower_nop = 0x0, .flags = (SPARC_PMU_ALL_EXCLUDES_SAME | SPARC_PMU_HAS_CONFLICTS), + .max_hw_events = 2, }; static const struct perf_event_map niagara2_perfmon_event_map[] = { @@ -538,6 +541,7 @@ static const struct sparc_pmu niagara2_pmu = { .lower_nop = 0x220, .flags = (SPARC_PMU_ALL_EXCLUDES_SAME | SPARC_PMU_HAS_CONFLICTS), + .max_hw_events = 2, }; static const struct sparc_pmu *sparc_pmu __read_mostly; @@ -950,7 +954,7 @@ static int sparc_check_constraints(struct perf_event **evts, if (!n_ev) return 0; - if (n_ev > MAX_HWEVENTS) + if (n_ev > sparc_pmu->max_hw_events) return -1; if (!(sparc_pmu->flags & SPARC_PMU_HAS_CONFLICTS)) { @@ -1078,7 +1082,7 @@ static int sparc_pmu_add(struct perf_event *event, int ef_flags) perf_pmu_disable(event->pmu); n0 = cpuc->n_events; - if (n0 >= MAX_HWEVENTS) + if (n0 >= sparc_pmu->max_hw_events) goto out; cpuc->event[n0] = event; @@ -1174,7 +1178,7 @@ static int sparc_pmu_event_init(struct perf_event *event) n = 0; if (event->group_leader != event) { n = collect_events(event->group_leader, - MAX_HWEVENTS - 1, + sparc_pmu->max_hw_events - 1, evts, events, current_idx_dmy); if (n < 0) return -EINVAL; -- cgit v1.2.3-59-g8ed1b From 5344303ca8dad9881def6cfb45ad01201dba16de Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Fri, 17 Aug 2012 02:37:06 -0700 Subject: sparc64: Abstract PMC read/write behind sparc_pmu. Signed-off-by: David S. Miller --- arch/sparc/kernel/perf_event.c | 68 +++++++++++++++++++++++------------------- 1 file changed, 38 insertions(+), 30 deletions(-) (limited to 'arch/sparc/kernel/perf_event.c') diff --git a/arch/sparc/kernel/perf_event.c b/arch/sparc/kernel/perf_event.c index f7b9ae39c264..fbd80299a4bb 100644 --- a/arch/sparc/kernel/perf_event.c +++ b/arch/sparc/kernel/perf_event.c @@ -141,6 +141,8 @@ struct sparc_pmu { const struct perf_event_map *(*event_map)(int); const cache_map_t *cache_map; int max_events; + u32 (*read_pmc)(int); + void (*write_pmc)(int, u64); int upper_shift; int lower_shift; int event_mask; @@ -154,6 +156,34 @@ struct sparc_pmu { int max_hw_events; }; +static u32 sparc_default_read_pmc(int idx) +{ + u64 val; + + val = pcr_ops->read_pic(0); + if (idx == PIC_UPPER_INDEX) + val >>= 32; + + return val & 0xffffffff; +} + +static void sparc_default_write_pmc(int idx, u64 val) +{ + u64 shift, mask, pic; + + shift = 0; + if (idx == PIC_UPPER_INDEX) + shift = 32; + + mask = ((u64) 0xffffffff) << shift; + val <<= shift; + + pic = pcr_ops->read_pic(0); + pic &= ~mask; + pic |= val; + pcr_ops->write_pic(0, pic); +} + static const struct perf_event_map ultra3_perfmon_event_map[] = { [PERF_COUNT_HW_CPU_CYCLES] = { 0x0000, PIC_UPPER | PIC_LOWER }, [PERF_COUNT_HW_INSTRUCTIONS] = { 0x0001, PIC_UPPER | PIC_LOWER }, @@ -271,6 +301,8 @@ static const struct sparc_pmu ultra3_pmu = { .event_map = ultra3_event_map, .cache_map = &ultra3_cache_map, .max_events = ARRAY_SIZE(ultra3_perfmon_event_map), + .read_pmc = sparc_default_read_pmc, + .write_pmc = sparc_default_write_pmc, .upper_shift = 11, .lower_shift = 4, .event_mask = 0x3f, @@ -403,6 +435,8 @@ static const struct sparc_pmu niagara1_pmu = { .event_map = niagara1_event_map, .cache_map = &niagara1_cache_map, .max_events = ARRAY_SIZE(niagara1_perfmon_event_map), + .read_pmc = sparc_default_read_pmc, + .write_pmc = sparc_default_write_pmc, .upper_shift = 0, .lower_shift = 4, .event_mask = 0x7, @@ -532,6 +566,8 @@ static const struct sparc_pmu niagara2_pmu = { .event_map = niagara2_event_map, .cache_map = &niagara2_cache_map, .max_events = ARRAY_SIZE(niagara2_perfmon_event_map), + .read_pmc = sparc_default_read_pmc, + .write_pmc = sparc_default_write_pmc, .upper_shift = 19, .lower_shift = 6, .event_mask = 0xfff, @@ -593,34 +629,6 @@ static inline void sparc_pmu_disable_event(struct cpu_hw_events *cpuc, struct hw pcr_ops->write_pcr(0, cpuc->pcr); } -static u32 read_pmc(int idx) -{ - u64 val; - - val = pcr_ops->read_pic(0); - if (idx == PIC_UPPER_INDEX) - val >>= 32; - - return val & 0xffffffff; -} - -static void write_pmc(int idx, u64 val) -{ - u64 shift, mask, pic; - - shift = 0; - if (idx == PIC_UPPER_INDEX) - shift = 32; - - mask = ((u64) 0xffffffff) << shift; - val <<= shift; - - pic = pcr_ops->read_pic(0); - pic &= ~mask; - pic |= val; - pcr_ops->write_pic(0, pic); -} - static u64 sparc_perf_event_update(struct perf_event *event, struct hw_perf_event *hwc, int idx) { @@ -630,7 +638,7 @@ static u64 sparc_perf_event_update(struct perf_event *event, again: prev_raw_count = local64_read(&hwc->prev_count); - new_raw_count = read_pmc(idx); + new_raw_count = sparc_pmu->read_pmc(idx); if (local64_cmpxchg(&hwc->prev_count, prev_raw_count, new_raw_count) != prev_raw_count) @@ -670,7 +678,7 @@ static int sparc_perf_event_set_period(struct perf_event *event, local64_set(&hwc->prev_count, (u64)-left); - write_pmc(idx, (u64)(-left) & 0xffffffff); + sparc_pmu->write_pmc(idx, (u64)(-left) & 0xffffffff); perf_event_update_userpage(event); -- cgit v1.2.3-59-g8ed1b From 7ac2ed286f9338ea6437831096cc36ce8395b6fc Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Fri, 17 Aug 2012 02:41:32 -0700 Subject: sparc64: Specify user and supervisor trace PCR bits in sparc_pmu. Signed-off-by: David S. Miller --- arch/sparc/kernel/perf_event.c | 16 ++++++++++++---- 1 file changed, 12 insertions(+), 4 deletions(-) (limited to 'arch/sparc/kernel/perf_event.c') diff --git a/arch/sparc/kernel/perf_event.c b/arch/sparc/kernel/perf_event.c index fbd80299a4bb..1ab676bd13f0 100644 --- a/arch/sparc/kernel/perf_event.c +++ b/arch/sparc/kernel/perf_event.c @@ -146,6 +146,8 @@ struct sparc_pmu { int upper_shift; int lower_shift; int event_mask; + int user_bit; + int priv_bit; int hv_bit; int irq_bit; int upper_nop; @@ -306,6 +308,8 @@ static const struct sparc_pmu ultra3_pmu = { .upper_shift = 11, .lower_shift = 4, .event_mask = 0x3f, + .user_bit = PCR_UTRACE, + .priv_bit = PCR_STRACE, .upper_nop = 0x1c, .lower_nop = 0x14, .flags = (SPARC_PMU_ALL_EXCLUDES_SAME | @@ -440,6 +444,8 @@ static const struct sparc_pmu niagara1_pmu = { .upper_shift = 0, .lower_shift = 4, .event_mask = 0x7, + .user_bit = PCR_UTRACE, + .priv_bit = PCR_STRACE, .upper_nop = 0x0, .lower_nop = 0x0, .flags = (SPARC_PMU_ALL_EXCLUDES_SAME | @@ -571,7 +577,9 @@ static const struct sparc_pmu niagara2_pmu = { .upper_shift = 19, .lower_shift = 6, .event_mask = 0xfff, - .hv_bit = 0x8, + .user_bit = PCR_UTRACE, + .priv_bit = PCR_STRACE, + .hv_bit = PCR_N2_HTRACE, .irq_bit = 0x30, .upper_nop = 0x220, .lower_nop = 0x220, @@ -771,7 +779,7 @@ static void sparc_pmu_disable(struct pmu *pmu) cpuc->n_added = 0; val = cpuc->pcr; - val &= ~(PCR_UTRACE | PCR_STRACE | + val &= ~(sparc_pmu->user_bit | sparc_pmu->priv_bit | sparc_pmu->hv_bit | sparc_pmu->irq_bit); cpuc->pcr = val; @@ -1177,9 +1185,9 @@ static int sparc_pmu_event_init(struct perf_event *event) /* We save the enable bits in the config_base. */ hwc->config_base = sparc_pmu->irq_bit; if (!attr->exclude_user) - hwc->config_base |= PCR_UTRACE; + hwc->config_base |= sparc_pmu->user_bit; if (!attr->exclude_kernel) - hwc->config_base |= PCR_STRACE; + hwc->config_base |= sparc_pmu->priv_bit; if (!attr->exclude_hv) hwc->config_base |= sparc_pmu->hv_bit; -- cgit v1.2.3-59-g8ed1b From 3f1a20972239e3f66720c34d9009ae9cc9ddffba Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Fri, 17 Aug 2012 02:51:21 -0700 Subject: sparc64: Prepare perf event layer for handling multiple PCR registers. Make the per-cpu pcr save area an array instead of one u64. Describe how many PCR and PIC registers the chip has in the sparc_pmu descriptor. Signed-off-by: David S. Miller --- arch/sparc/kernel/perf_event.c | 72 ++++++++++++++++++++++++++---------------- 1 file changed, 45 insertions(+), 27 deletions(-) (limited to 'arch/sparc/kernel/perf_event.c') diff --git a/arch/sparc/kernel/perf_event.c b/arch/sparc/kernel/perf_event.c index 1ab676bd13f0..9be089abb5d1 100644 --- a/arch/sparc/kernel/perf_event.c +++ b/arch/sparc/kernel/perf_event.c @@ -54,6 +54,7 @@ */ #define MAX_HWEVENTS 2 +#define MAX_PCRS 1 #define MAX_PERIOD ((1UL << 32) - 1) #define PIC_UPPER_INDEX 0 @@ -89,8 +90,8 @@ struct cpu_hw_events { */ int current_idx[MAX_HWEVENTS]; - /* Software copy of %pcr register on this cpu. */ - u64 pcr; + /* Software copy of %pcr register(s) on this cpu. */ + u64 pcr[MAX_HWEVENTS]; /* Enabled/disable state. */ int enabled; @@ -156,6 +157,8 @@ struct sparc_pmu { #define SPARC_PMU_ALL_EXCLUDES_SAME 0x00000001 #define SPARC_PMU_HAS_CONFLICTS 0x00000002 int max_hw_events; + int num_pcrs; + int num_pic_regs; }; static u32 sparc_default_read_pmc(int idx) @@ -315,6 +318,8 @@ static const struct sparc_pmu ultra3_pmu = { .flags = (SPARC_PMU_ALL_EXCLUDES_SAME | SPARC_PMU_HAS_CONFLICTS), .max_hw_events = 2, + .num_pcrs = 1, + .num_pic_regs = 1, }; /* Niagara1 is very limited. The upper PIC is hard-locked to count @@ -451,6 +456,8 @@ static const struct sparc_pmu niagara1_pmu = { .flags = (SPARC_PMU_ALL_EXCLUDES_SAME | SPARC_PMU_HAS_CONFLICTS), .max_hw_events = 2, + .num_pcrs = 1, + .num_pic_regs = 1, }; static const struct perf_event_map niagara2_perfmon_event_map[] = { @@ -586,6 +593,8 @@ static const struct sparc_pmu niagara2_pmu = { .flags = (SPARC_PMU_ALL_EXCLUDES_SAME | SPARC_PMU_HAS_CONFLICTS), .max_hw_events = 2, + .num_pcrs = 1, + .num_pic_regs = 1, }; static const struct sparc_pmu *sparc_pmu __read_mostly; @@ -615,12 +624,12 @@ static inline void sparc_pmu_enable_event(struct cpu_hw_events *cpuc, struct hw_ { u64 val, mask = mask_for_index(idx); - val = cpuc->pcr; + val = cpuc->pcr[0]; val &= ~mask; val |= hwc->config; - cpuc->pcr = val; + cpuc->pcr[0] = val; - pcr_ops->write_pcr(0, cpuc->pcr); + pcr_ops->write_pcr(0, cpuc->pcr[0]); } static inline void sparc_pmu_disable_event(struct cpu_hw_events *cpuc, struct hw_perf_event *hwc, int idx) @@ -629,12 +638,12 @@ static inline void sparc_pmu_disable_event(struct cpu_hw_events *cpuc, struct hw u64 nop = nop_for_index(idx); u64 val; - val = cpuc->pcr; + val = cpuc->pcr[0]; val &= ~mask; val |= nop; - cpuc->pcr = val; + cpuc->pcr[0] = val; - pcr_ops->write_pcr(0, cpuc->pcr); + pcr_ops->write_pcr(0, cpuc->pcr[0]); } static u64 sparc_perf_event_update(struct perf_event *event, @@ -751,7 +760,7 @@ static void sparc_pmu_enable(struct pmu *pmu) cpuc->enabled = 1; barrier(); - pcr = cpuc->pcr; + pcr = cpuc->pcr[0]; if (!cpuc->n_events) { pcr = 0; } else { @@ -761,16 +770,16 @@ static void sparc_pmu_enable(struct pmu *pmu) * configuration, so just fetch the settings from the * first entry. */ - cpuc->pcr = pcr | cpuc->event[0]->hw.config_base; + cpuc->pcr[0] = pcr | cpuc->event[0]->hw.config_base; } - pcr_ops->write_pcr(0, cpuc->pcr); + pcr_ops->write_pcr(0, cpuc->pcr[0]); } static void sparc_pmu_disable(struct pmu *pmu) { struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); - u64 val; + int i; if (!cpuc->enabled) return; @@ -778,12 +787,14 @@ static void sparc_pmu_disable(struct pmu *pmu) cpuc->enabled = 0; cpuc->n_added = 0; - val = cpuc->pcr; - val &= ~(sparc_pmu->user_bit | sparc_pmu->priv_bit | - sparc_pmu->hv_bit | sparc_pmu->irq_bit); - cpuc->pcr = val; + for (i = 0; i < sparc_pmu->num_pcrs; i++) { + u64 val = cpuc->pcr[i]; - pcr_ops->write_pcr(0, cpuc->pcr); + val &= ~(sparc_pmu->user_bit | sparc_pmu->priv_bit | + sparc_pmu->hv_bit | sparc_pmu->irq_bit); + cpuc->pcr[i] = val; + pcr_ops->write_pcr(i, cpuc->pcr[i]); + } } static int active_event_index(struct cpu_hw_events *cpuc, @@ -882,9 +893,11 @@ static DEFINE_MUTEX(pmc_grab_mutex); static void perf_stop_nmi_watchdog(void *unused) { struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); + int i; stop_nmi_watchdog(NULL); - cpuc->pcr = pcr_ops->read_pcr(0); + for (i = 0; i < sparc_pmu->num_pcrs; i++) + cpuc->pcr[i] = pcr_ops->read_pcr(i); } void perf_event_grab_pmc(void) @@ -1293,8 +1306,7 @@ static struct pmu pmu = { void perf_event_print_debug(void) { unsigned long flags; - u64 pcr, pic; - int cpu; + int cpu, i; if (!sparc_pmu) return; @@ -1303,12 +1315,13 @@ void perf_event_print_debug(void) cpu = smp_processor_id(); - pcr = pcr_ops->read_pcr(0); - pic = pcr_ops->read_pic(0); - pr_info("\n"); - pr_info("CPU#%d: PCR[%016llx] PIC[%016llx]\n", - cpu, pcr, pic); + for (i = 0; i < sparc_pmu->num_pcrs; i++) + pr_info("CPU#%d: PCR%d[%016llx]\n", + cpu, i, pcr_ops->read_pcr(i)); + for (i = 0; i < sparc_pmu->num_pic_regs; i++) + pr_info("CPU#%d: PIC%d[%016llx]\n", + cpu, i, pcr_ops->read_pic(i)); local_irq_restore(flags); } @@ -1344,8 +1357,9 @@ static int __kprobes perf_event_nmi_handler(struct notifier_block *self, * Do this before we peek at the counters to determine * overflow so we don't lose any events. */ - if (sparc_pmu->irq_bit) - pcr_ops->write_pcr(0, cpuc->pcr); + if (sparc_pmu->irq_bit && + sparc_pmu->num_pcrs == 1) + pcr_ops->write_pcr(0, cpuc->pcr[0]); for (i = 0; i < cpuc->n_events; i++) { struct perf_event *event = cpuc->event[i]; @@ -1353,6 +1367,10 @@ static int __kprobes perf_event_nmi_handler(struct notifier_block *self, struct hw_perf_event *hwc; u64 val; + if (sparc_pmu->irq_bit && + sparc_pmu->num_pcrs > 1) + pcr_ops->write_pcr(idx, cpuc->pcr[idx]); + hwc = &event->hw; val = sparc_perf_event_update(event, hwc, idx); if (val & (1ULL << 31)) -- cgit v1.2.3-59-g8ed1b From 5ab968413515e17788003c522f7ca40a07fae900 Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Fri, 17 Aug 2012 03:09:39 -0700 Subject: sparc64: Rework sparc_pmu_enable() so that the side effects are clearer. When cpuc->n_events is zero, we actually don't do anything and we just write the cpuc->pcr[0] value as-is without any modifications. The "pcr = 0;" assignment there was just useless and confusing. Signed-off-by: David S. Miller --- arch/sparc/kernel/perf_event.c | 8 ++------ 1 file changed, 2 insertions(+), 6 deletions(-) (limited to 'arch/sparc/kernel/perf_event.c') diff --git a/arch/sparc/kernel/perf_event.c b/arch/sparc/kernel/perf_event.c index 9be089abb5d1..197c79e9206a 100644 --- a/arch/sparc/kernel/perf_event.c +++ b/arch/sparc/kernel/perf_event.c @@ -752,7 +752,6 @@ out: static void sparc_pmu_enable(struct pmu *pmu) { struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); - u64 pcr; if (cpuc->enabled) return; @@ -760,11 +759,8 @@ static void sparc_pmu_enable(struct pmu *pmu) cpuc->enabled = 1; barrier(); - pcr = cpuc->pcr[0]; - if (!cpuc->n_events) { - pcr = 0; - } else { - pcr = maybe_change_configuration(cpuc, pcr); + if (cpuc->n_events) { + u64 pcr = maybe_change_configuration(cpuc, cpuc->pcr[0]); /* We require that all of the events have the same * configuration, so just fetch the settings from the -- cgit v1.2.3-59-g8ed1b From b4f061a4b8cbf947de4fa816a1cfc53960da218e Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Fri, 17 Aug 2012 03:14:01 -0700 Subject: sparc64: Make sparc_pmu_{enable,disable}_event() multi-pcr aware. Signed-off-by: David S. Miller --- arch/sparc/kernel/perf_event.c | 20 ++++++++++++++------ 1 file changed, 14 insertions(+), 6 deletions(-) (limited to 'arch/sparc/kernel/perf_event.c') diff --git a/arch/sparc/kernel/perf_event.c b/arch/sparc/kernel/perf_event.c index 197c79e9206a..c3ad63775ff9 100644 --- a/arch/sparc/kernel/perf_event.c +++ b/arch/sparc/kernel/perf_event.c @@ -623,27 +623,35 @@ static u64 nop_for_index(int idx) static inline void sparc_pmu_enable_event(struct cpu_hw_events *cpuc, struct hw_perf_event *hwc, int idx) { u64 val, mask = mask_for_index(idx); + int pcr_index = 0; - val = cpuc->pcr[0]; + if (sparc_pmu->num_pcrs > 1) + pcr_index = idx; + + val = cpuc->pcr[pcr_index]; val &= ~mask; val |= hwc->config; - cpuc->pcr[0] = val; + cpuc->pcr[pcr_index] = val; - pcr_ops->write_pcr(0, cpuc->pcr[0]); + pcr_ops->write_pcr(pcr_index, cpuc->pcr[pcr_index]); } static inline void sparc_pmu_disable_event(struct cpu_hw_events *cpuc, struct hw_perf_event *hwc, int idx) { u64 mask = mask_for_index(idx); u64 nop = nop_for_index(idx); + int pcr_index = 0; u64 val; - val = cpuc->pcr[0]; + if (sparc_pmu->num_pcrs > 1) + pcr_index = idx; + + val = cpuc->pcr[pcr_index]; val &= ~mask; val |= nop; - cpuc->pcr[0] = val; + cpuc->pcr[pcr_index] = val; - pcr_ops->write_pcr(0, cpuc->pcr[0]); + pcr_ops->write_pcr(pcr_index, cpuc->pcr[pcr_index]); } static u64 sparc_perf_event_update(struct perf_event *event, -- cgit v1.2.3-59-g8ed1b From 7a37a0b8f8df0872932cf8373b21b5e14a92a794 Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Fri, 17 Aug 2012 03:29:05 -0700 Subject: sparc64: Support perf event encoding for multi-PCR PMUs. Signed-off-by: David S. Miller --- arch/sparc/kernel/perf_event.c | 98 ++++++++++++++++++++++++++++++++---------- 1 file changed, 75 insertions(+), 23 deletions(-) (limited to 'arch/sparc/kernel/perf_event.c') diff --git a/arch/sparc/kernel/perf_event.c b/arch/sparc/kernel/perf_event.c index c3ad63775ff9..64415922a28f 100644 --- a/arch/sparc/kernel/perf_event.c +++ b/arch/sparc/kernel/perf_event.c @@ -710,18 +710,10 @@ static int sparc_perf_event_set_period(struct perf_event *event, return ret; } -/* If performance event entries have been added, move existing - * events around (if necessary) and then assign new entries to - * counters. - */ -static u64 maybe_change_configuration(struct cpu_hw_events *cpuc, u64 pcr) +static void read_in_all_counters(struct cpu_hw_events *cpuc) { int i; - if (!cpuc->n_added) - goto out; - - /* Read in the counters which are moving. */ for (i = 0; i < cpuc->n_events; i++) { struct perf_event *cp = cpuc->event[i]; @@ -732,6 +724,20 @@ static u64 maybe_change_configuration(struct cpu_hw_events *cpuc, u64 pcr) cpuc->current_idx[i] = PIC_NO_INDEX; } } +} + +/* On this PMU all PICs are programmed using a single PCR. Calculate + * the combined control register value. + * + * For such chips we require that all of the events have the same + * configuration, so just fetch the settings from the first entry. + */ +static void calculate_single_pcr(struct cpu_hw_events *cpuc) +{ + int i; + + if (!cpuc->n_added) + goto out; /* Assign to counters all unassigned events. */ for (i = 0; i < cpuc->n_events; i++) { @@ -747,19 +753,71 @@ static u64 maybe_change_configuration(struct cpu_hw_events *cpuc, u64 pcr) cpuc->current_idx[i] = idx; enc = perf_event_get_enc(cpuc->events[i]); - pcr &= ~mask_for_index(idx); + cpuc->pcr[0] &= ~mask_for_index(idx); if (hwc->state & PERF_HES_STOPPED) - pcr |= nop_for_index(idx); + cpuc->pcr[0] |= nop_for_index(idx); else - pcr |= event_encoding(enc, idx); + cpuc->pcr[0] |= event_encoding(enc, idx); } out: - return pcr; + cpuc->pcr[0] |= cpuc->event[0]->hw.config_base; +} + +/* On this PMU each PIC has it's own PCR control register. */ +static void calculate_multiple_pcrs(struct cpu_hw_events *cpuc) +{ + int i; + + if (!cpuc->n_added) + goto out; + + for (i = 0; i < cpuc->n_events; i++) { + struct perf_event *cp = cpuc->event[i]; + struct hw_perf_event *hwc = &cp->hw; + int idx = hwc->idx; + u64 enc; + + if (cpuc->current_idx[i] != PIC_NO_INDEX) + continue; + + sparc_perf_event_set_period(cp, hwc, idx); + cpuc->current_idx[i] = idx; + + enc = perf_event_get_enc(cpuc->events[i]); + cpuc->pcr[idx] &= ~mask_for_index(idx); + if (hwc->state & PERF_HES_STOPPED) + cpuc->pcr[idx] |= nop_for_index(idx); + else + cpuc->pcr[idx] |= event_encoding(enc, idx); + } +out: + for (i = 0; i < cpuc->n_events; i++) { + struct perf_event *cp = cpuc->event[i]; + int idx = cp->hw.idx; + + cpuc->pcr[idx] |= cp->hw.config_base; + } +} + +/* If performance event entries have been added, move existing events + * around (if necessary) and then assign new entries to counters. + */ +static void update_pcrs_for_enable(struct cpu_hw_events *cpuc) +{ + if (cpuc->n_added) + read_in_all_counters(cpuc); + + if (sparc_pmu->num_pcrs == 1) { + calculate_single_pcr(cpuc); + } else { + calculate_multiple_pcrs(cpuc); + } } static void sparc_pmu_enable(struct pmu *pmu) { struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); + int i; if (cpuc->enabled) return; @@ -767,17 +825,11 @@ static void sparc_pmu_enable(struct pmu *pmu) cpuc->enabled = 1; barrier(); - if (cpuc->n_events) { - u64 pcr = maybe_change_configuration(cpuc, cpuc->pcr[0]); - - /* We require that all of the events have the same - * configuration, so just fetch the settings from the - * first entry. - */ - cpuc->pcr[0] = pcr | cpuc->event[0]->hw.config_base; - } + if (cpuc->n_events) + update_pcrs_for_enable(cpuc); - pcr_ops->write_pcr(0, cpuc->pcr[0]); + for (i = 0; i < sparc_pmu->num_pcrs; i++) + pcr_ops->write_pcr(i, cpuc->pcr[i]); } static void sparc_pmu_disable(struct pmu *pmu) -- cgit v1.2.3-59-g8ed1b From 035ea28dde1802ad4cc570976da34f8b7c2ed515 Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Fri, 17 Aug 2012 23:06:09 -0700 Subject: sparc64: Add SPARC-T4 perf event support. Signed-off-by: David S. Miller --- arch/sparc/kernel/perf_event.c | 189 ++++++++++++++++++++++++++++++++++++++++- 1 file changed, 187 insertions(+), 2 deletions(-) (limited to 'arch/sparc/kernel/perf_event.c') diff --git a/arch/sparc/kernel/perf_event.c b/arch/sparc/kernel/perf_event.c index 64415922a28f..18853705282b 100644 --- a/arch/sparc/kernel/perf_event.c +++ b/arch/sparc/kernel/perf_event.c @@ -53,8 +53,8 @@ * normal code. */ -#define MAX_HWEVENTS 2 -#define MAX_PCRS 1 +#define MAX_HWEVENTS 4 +#define MAX_PCRS 4 #define MAX_PERIOD ((1UL << 32) - 1) #define PIC_UPPER_INDEX 0 @@ -597,6 +597,187 @@ static const struct sparc_pmu niagara2_pmu = { .num_pic_regs = 1, }; +static const struct perf_event_map niagara4_perfmon_event_map[] = { + [PERF_COUNT_HW_CPU_CYCLES] = { (26 << 6) }, + [PERF_COUNT_HW_INSTRUCTIONS] = { (3 << 6) | 0x3f }, + [PERF_COUNT_HW_CACHE_REFERENCES] = { (3 << 6) | 0x04 }, + [PERF_COUNT_HW_CACHE_MISSES] = { (16 << 6) | 0x07 }, + [PERF_COUNT_HW_BRANCH_INSTRUCTIONS] = { (4 << 6) | 0x01 }, + [PERF_COUNT_HW_BRANCH_MISSES] = { (25 << 6) | 0x0f }, +}; + +static const struct perf_event_map *niagara4_event_map(int event_id) +{ + return &niagara4_perfmon_event_map[event_id]; +} + +static const cache_map_t niagara4_cache_map = { +[C(L1D)] = { + [C(OP_READ)] = { + [C(RESULT_ACCESS)] = { (3 << 6) | 0x04 }, + [C(RESULT_MISS)] = { (16 << 6) | 0x07 }, + }, + [C(OP_WRITE)] = { + [C(RESULT_ACCESS)] = { (3 << 6) | 0x08 }, + [C(RESULT_MISS)] = { (16 << 6) | 0x07 }, + }, + [C(OP_PREFETCH)] = { + [C(RESULT_ACCESS)] = { CACHE_OP_UNSUPPORTED }, + [C(RESULT_MISS)] = { CACHE_OP_UNSUPPORTED }, + }, +}, +[C(L1I)] = { + [C(OP_READ)] = { + [C(RESULT_ACCESS)] = { (3 << 6) | 0x3f }, + [C(RESULT_MISS)] = { (11 << 6) | 0x03 }, + }, + [ C(OP_WRITE) ] = { + [ C(RESULT_ACCESS) ] = { CACHE_OP_NONSENSE }, + [ C(RESULT_MISS) ] = { CACHE_OP_NONSENSE }, + }, + [ C(OP_PREFETCH) ] = { + [ C(RESULT_ACCESS) ] = { CACHE_OP_UNSUPPORTED }, + [ C(RESULT_MISS) ] = { CACHE_OP_UNSUPPORTED }, + }, +}, +[C(LL)] = { + [C(OP_READ)] = { + [C(RESULT_ACCESS)] = { (3 << 6) | 0x04 }, + [C(RESULT_MISS)] = { CACHE_OP_UNSUPPORTED }, + }, + [C(OP_WRITE)] = { + [C(RESULT_ACCESS)] = { (3 << 6) | 0x08 }, + [C(RESULT_MISS)] = { CACHE_OP_UNSUPPORTED }, + }, + [C(OP_PREFETCH)] = { + [C(RESULT_ACCESS)] = { CACHE_OP_UNSUPPORTED }, + [C(RESULT_MISS)] = { CACHE_OP_UNSUPPORTED }, + }, +}, +[C(DTLB)] = { + [C(OP_READ)] = { + [C(RESULT_ACCESS)] = { CACHE_OP_UNSUPPORTED }, + [C(RESULT_MISS)] = { (17 << 6) | 0x3f }, + }, + [ C(OP_WRITE) ] = { + [ C(RESULT_ACCESS) ] = { CACHE_OP_UNSUPPORTED }, + [ C(RESULT_MISS) ] = { CACHE_OP_UNSUPPORTED }, + }, + [ C(OP_PREFETCH) ] = { + [ C(RESULT_ACCESS) ] = { CACHE_OP_UNSUPPORTED }, + [ C(RESULT_MISS) ] = { CACHE_OP_UNSUPPORTED }, + }, +}, +[C(ITLB)] = { + [C(OP_READ)] = { + [C(RESULT_ACCESS)] = { CACHE_OP_UNSUPPORTED }, + [C(RESULT_MISS)] = { (6 << 6) | 0x3f }, + }, + [ C(OP_WRITE) ] = { + [ C(RESULT_ACCESS) ] = { CACHE_OP_UNSUPPORTED }, + [ C(RESULT_MISS) ] = { CACHE_OP_UNSUPPORTED }, + }, + [ C(OP_PREFETCH) ] = { + [ C(RESULT_ACCESS) ] = { CACHE_OP_UNSUPPORTED }, + [ C(RESULT_MISS) ] = { CACHE_OP_UNSUPPORTED }, + }, +}, +[C(BPU)] = { + [C(OP_READ)] = { + [C(RESULT_ACCESS)] = { CACHE_OP_UNSUPPORTED }, + [C(RESULT_MISS)] = { CACHE_OP_UNSUPPORTED }, + }, + [ C(OP_WRITE) ] = { + [ C(RESULT_ACCESS) ] = { CACHE_OP_UNSUPPORTED }, + [ C(RESULT_MISS) ] = { CACHE_OP_UNSUPPORTED }, + }, + [ C(OP_PREFETCH) ] = { + [ C(RESULT_ACCESS) ] = { CACHE_OP_UNSUPPORTED }, + [ C(RESULT_MISS) ] = { CACHE_OP_UNSUPPORTED }, + }, +}, +[C(NODE)] = { + [C(OP_READ)] = { + [C(RESULT_ACCESS)] = { CACHE_OP_UNSUPPORTED }, + [C(RESULT_MISS) ] = { CACHE_OP_UNSUPPORTED }, + }, + [ C(OP_WRITE) ] = { + [ C(RESULT_ACCESS) ] = { CACHE_OP_UNSUPPORTED }, + [ C(RESULT_MISS) ] = { CACHE_OP_UNSUPPORTED }, + }, + [ C(OP_PREFETCH) ] = { + [ C(RESULT_ACCESS) ] = { CACHE_OP_UNSUPPORTED }, + [ C(RESULT_MISS) ] = { CACHE_OP_UNSUPPORTED }, + }, +}, +}; + +static u32 sparc_vt_read_pmc(int idx) +{ + u64 val = pcr_ops->read_pic(idx); + + return val & 0xffffffff; +} + +static void sparc_vt_write_pmc(int idx, u64 val) +{ + u64 pcr; + + /* There seems to be an internal latch on the overflow event + * on SPARC-T4 that prevents it from triggering unless you + * update the PIC exactly as we do here. The requirement + * seems to be that you have to turn off event counting in the + * PCR around the PIC update. + * + * For example, after the following sequence: + * + * 1) set PIC to -1 + * 2) enable event counting and overflow reporting in PCR + * 3) overflow triggers, softint 15 handler invoked + * 4) clear OV bit in PCR + * 5) write PIC to -1 + * + * a subsequent overflow event will not trigger. This + * sequence works on SPARC-T3 and previous chips. + */ + pcr = pcr_ops->read_pcr(idx); + pcr_ops->write_pcr(idx, PCR_N4_PICNPT); + + pcr_ops->write_pic(idx, val & 0xffffffff); + + pcr_ops->write_pcr(idx, pcr); +} + +static const struct sparc_pmu niagara4_pmu = { + .event_map = niagara4_event_map, + .cache_map = &niagara4_cache_map, + .max_events = ARRAY_SIZE(niagara4_perfmon_event_map), + .read_pmc = sparc_vt_read_pmc, + .write_pmc = sparc_vt_write_pmc, + .upper_shift = 5, + .lower_shift = 5, + .event_mask = 0x7ff, + .user_bit = PCR_N4_UTRACE, + .priv_bit = PCR_N4_STRACE, + + /* We explicitly don't support hypervisor tracing. The T4 + * generates the overflow event for precise events via a trap + * which will not be generated (ie. it's completely lost) if + * we happen to be in the hypervisor when the event triggers. + * Essentially, the overflow event reporting is completely + * unusable when you have hypervisor mode tracing enabled. + */ + .hv_bit = 0, + + .irq_bit = PCR_N4_TOE, + .upper_nop = 0, + .lower_nop = 0, + .flags = 0, + .max_hw_events = 4, + .num_pcrs = 4, + .num_pic_regs = 4, +}; + static const struct sparc_pmu *sparc_pmu __read_mostly; static u64 event_encoding(u64 event_id, int idx) @@ -1465,6 +1646,10 @@ static bool __init supported_pmu(void) sparc_pmu = &niagara2_pmu; return true; } + if (!strcmp(sparc_pmu_type, "niagara4")) { + sparc_pmu = &niagara4_pmu; + return true; + } return false; } -- cgit v1.2.3-59-g8ed1b From bab96bda4431602213deb53723d13f73f5308a20 Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Sat, 18 Aug 2012 23:17:38 -0700 Subject: sparc64: Update generic comments in perf event code to match reality. Describe how we support two types of PMU setups, one with a single control register and two counters stored in a single register, and another with one control register per counter and each counter living in it's own register. Signed-off-by: David S. Miller --- arch/sparc/kernel/perf_event.c | 40 +++++++++++++++++++++++++++------------- 1 file changed, 27 insertions(+), 13 deletions(-) (limited to 'arch/sparc/kernel/perf_event.c') diff --git a/arch/sparc/kernel/perf_event.c b/arch/sparc/kernel/perf_event.c index 18853705282b..e48651dace1b 100644 --- a/arch/sparc/kernel/perf_event.c +++ b/arch/sparc/kernel/perf_event.c @@ -30,27 +30,39 @@ #include "kernel.h" #include "kstack.h" -/* Sparc64 chips have two performance counters, 32-bits each, with - * overflow interrupts generated on transition from 0xffffffff to 0. - * The counters are accessed in one go using a 64-bit register. +/* Two classes of sparc64 chips currently exist. All of which have + * 32-bit counters which can generate overflow interrupts on the + * transition from 0xffffffff to 0. * - * Both counters are controlled using a single control register. The - * only way to stop all sampling is to clear all of the context (user, - * supervisor, hypervisor) sampling enable bits. But these bits apply - * to both counters, thus the two counters can't be enabled/disabled - * individually. + * All chips upto and including SPARC-T3 have two performance + * counters. The two 32-bit counters are accessed in one go using a + * single 64-bit register. * - * The control register has two event fields, one for each of the two - * counters. It's thus nearly impossible to have one counter going - * while keeping the other one stopped. Therefore it is possible to - * get overflow interrupts for counters not currently "in use" and - * that condition must be checked in the overflow interrupt handler. + * On these older chips both counters are controlled using a single + * control register. The only way to stop all sampling is to clear + * all of the context (user, supervisor, hypervisor) sampling enable + * bits. But these bits apply to both counters, thus the two counters + * can't be enabled/disabled individually. + * + * Furthermore, the control register on these older chips have two + * event fields, one for each of the two counters. It's thus nearly + * impossible to have one counter going while keeping the other one + * stopped. Therefore it is possible to get overflow interrupts for + * counters not currently "in use" and that condition must be checked + * in the overflow interrupt handler. * * So we use a hack, in that we program inactive counters with the * "sw_count0" and "sw_count1" events. These count how many times * the instruction "sethi %hi(0xfc000), %g0" is executed. It's an * unusual way to encode a NOP and therefore will not trigger in * normal code. + * + * Starting with SPARC-T4 we have one control register per counter. + * And the counters are stored in individual registers. The registers + * for the counters are 64-bit but only a 32-bit counter is + * implemented. The event selections on SPARC-T4 lack any + * restrictions, therefore we can elide all of the complicated + * conflict resolution code we have for SPARC-T3 and earlier chips. */ #define MAX_HWEVENTS 4 @@ -103,6 +115,8 @@ DEFINE_PER_CPU(struct cpu_hw_events, cpu_hw_events) = { .enabled = 1, }; /* An event map describes the characteristics of a performance * counter event. In particular it gives the encoding as well as * a mask telling which counters the event can be measured on. + * + * The mask is unused on SPARC-T4 and later. */ struct perf_event_map { u16 encoding; -- cgit v1.2.3-59-g8ed1b From 08280e6c4c2e8049ac61d9e8e3536ec1df629c0d Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Sun, 14 Oct 2012 17:59:40 -0700 Subject: sparc64: Like x86 we should check current->mm during perf backtrace generation. If the MM is not active, only report the top-level PC. Do not try to access the address space. Signed-off-by: David S. Miller --- arch/sparc/kernel/perf_event.c | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) (limited to 'arch/sparc/kernel/perf_event.c') diff --git a/arch/sparc/kernel/perf_event.c b/arch/sparc/kernel/perf_event.c index e48651dace1b..9e96f849a744 100644 --- a/arch/sparc/kernel/perf_event.c +++ b/arch/sparc/kernel/perf_event.c @@ -1738,8 +1738,6 @@ static void perf_callchain_user_64(struct perf_callchain_entry *entry, { unsigned long ufp; - perf_callchain_store(entry, regs->tpc); - ufp = regs->u_regs[UREG_I6] + STACK_BIAS; do { struct sparc_stackf *usf, sf; @@ -1760,8 +1758,6 @@ static void perf_callchain_user_32(struct perf_callchain_entry *entry, { unsigned long ufp; - perf_callchain_store(entry, regs->tpc); - ufp = regs->u_regs[UREG_I6] & 0xffffffffUL; do { struct sparc_stackf32 *usf, sf; @@ -1780,6 +1776,11 @@ static void perf_callchain_user_32(struct perf_callchain_entry *entry, void perf_callchain_user(struct perf_callchain_entry *entry, struct pt_regs *regs) { + perf_callchain_store(entry, regs->tpc); + + if (!current->mm) + return; + flushw_user(); if (test_thread_flag(TIF_32BIT)) perf_callchain_user_32(entry, regs); -- cgit v1.2.3-59-g8ed1b From e793d8c6740f8fe704fa216e95685f4d92c4c4b9 Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Tue, 16 Oct 2012 13:05:25 -0700 Subject: sparc64: Fix bit twiddling in sparc_pmu_enable_event(). There was a serious disconnect in the logic happening in sparc_pmu_disable_event() vs. sparc_pmu_enable_event(). Event disable is implemented by programming a NOP event into the PCR. However, event enable was not reversing this operation. Instead, it was setting the User/Priv/Hypervisor trace enable bits. That's not sparc_pmu_enable_event()'s job, that's what sparc_pmu_enable() and sparc_pmu_disable() do . The intent of sparc_pmu_enable_event() is clear, since it first clear out the event type encoding field. So fix this by OR'ing in the event encoding rather than the trace enable bits. Signed-off-by: David S. Miller --- arch/sparc/kernel/perf_event.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) (limited to 'arch/sparc/kernel/perf_event.c') diff --git a/arch/sparc/kernel/perf_event.c b/arch/sparc/kernel/perf_event.c index 9e96f849a744..885a8af74064 100644 --- a/arch/sparc/kernel/perf_event.c +++ b/arch/sparc/kernel/perf_event.c @@ -817,15 +817,17 @@ static u64 nop_for_index(int idx) static inline void sparc_pmu_enable_event(struct cpu_hw_events *cpuc, struct hw_perf_event *hwc, int idx) { - u64 val, mask = mask_for_index(idx); + u64 enc, val, mask = mask_for_index(idx); int pcr_index = 0; if (sparc_pmu->num_pcrs > 1) pcr_index = idx; + enc = perf_event_get_enc(cpuc->events[idx]); + val = cpuc->pcr[pcr_index]; val &= ~mask; - val |= hwc->config; + val |= event_encoding(enc, idx); cpuc->pcr[pcr_index] = val; pcr_ops->write_pcr(pcr_index, cpuc->pcr[pcr_index]); -- cgit v1.2.3-59-g8ed1b From 517ffce4e1a03aea979fe3a18a3dd1761a24fafb Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Fri, 26 Oct 2012 15:18:37 -0700 Subject: sparc64: Make montmul/montsqr/mpmul usable in 32-bit threads. The Montgomery Multiply, Montgomery Square, and Multiple-Precision Multiply instructions work by loading a combination of the floating point and multiple register windows worth of integer registers with the inputs. These values are 64-bit. But for 32-bit userland processes we only save the low 32-bits of each integer register during a register spill. This is because the register window save area is in the user stack and has a fixed layout. Therefore, the only way to use these instruction in 32-bit mode is to perform the following sequence: 1) Load the top-32bits of a choosen integer register with a sentinel, say "-1". This will be in the outer-most register window. The idea is that we're trying to see if the outer-most register window gets spilled, and thus the 64-bit values were truncated. 2) Load all the inputs for the montmul/montsqr/mpmul instruction, down to the inner-most register window. 3) Execute the opcode. 4) Traverse back up to the outer-most register window. 5) Check the sentinel, if it's still "-1" store the results. Otherwise retry the entire sequence. This retry is extremely troublesome. If you're just unlucky and an interrupt or other trap happens, it'll push that outer-most window to the stack and clear the sentinel when we restore it. We could retry forever and never make forward progress if interrupts arrive at a fast enough rate (consider perf events as one example). So we have do limited retries and fallback to software which is extremely non-deterministic. Luckily it's very straightforward to provide a mechanism to let 32-bit applications use a 64-bit stack. Stacks in 64-bit mode are biased by 2047 bytes, which means that the lowest bit is set in the actual %sp register value. So if we see bit zero set in a 32-bit application's stack we treat it like a 64-bit stack. Runtime detection of such a facility is tricky, and cumbersome at best. For example, just trying to use a biased stack and seeing if it works is hard to recover from (the signal handler will need to use an alt stack, plus something along the lines of longjmp). Therefore, we add a system call to report a bitmask of arch specific features like this in a cheap and less hairy way. With help from Andy Polyakov. Signed-off-by: David S. Miller --- arch/sparc/include/asm/compat.h | 5 ++-- arch/sparc/include/asm/thread_info_64.h | 5 ++++ arch/sparc/include/asm/ttable.h | 24 ++++++++++++------- arch/sparc/include/uapi/asm/unistd.h | 6 ++++- arch/sparc/kernel/perf_event.c | 22 ++++++++++++----- arch/sparc/kernel/process_64.c | 42 ++++++++++++++++++--------------- arch/sparc/kernel/ptrace_64.c | 4 ++-- arch/sparc/kernel/sys_sparc_64.c | 5 ++++ arch/sparc/kernel/systbls_64.S | 2 ++ arch/sparc/kernel/unaligned_64.c | 36 ++++++++++++++++++---------- arch/sparc/kernel/visemul.c | 23 +++++++++++------- arch/sparc/kernel/winfixup.S | 2 ++ arch/sparc/math-emu/math_64.c | 2 +- 13 files changed, 117 insertions(+), 61 deletions(-) (limited to 'arch/sparc/kernel/perf_event.c') diff --git a/arch/sparc/include/asm/compat.h b/arch/sparc/include/asm/compat.h index cef99fbc0a21..830502fe62b4 100644 --- a/arch/sparc/include/asm/compat.h +++ b/arch/sparc/include/asm/compat.h @@ -232,9 +232,10 @@ static inline void __user *arch_compat_alloc_user_space(long len) struct pt_regs *regs = current_thread_info()->kregs; unsigned long usp = regs->u_regs[UREG_I6]; - if (!(test_thread_flag(TIF_32BIT))) + if (test_thread_64bit_stack(usp)) usp += STACK_BIAS; - else + + if (test_thread_flag(TIF_32BIT)) usp &= 0xffffffffUL; usp -= len; diff --git a/arch/sparc/include/asm/thread_info_64.h b/arch/sparc/include/asm/thread_info_64.h index 4e2276631081..a3fe4dcc0aa6 100644 --- a/arch/sparc/include/asm/thread_info_64.h +++ b/arch/sparc/include/asm/thread_info_64.h @@ -259,6 +259,11 @@ static inline bool test_and_clear_restore_sigmask(void) #define tsk_is_polling(t) test_tsk_thread_flag(t, TIF_POLLING_NRFLAG) +#define thread32_stack_is_64bit(__SP) (((__SP) & 0x1) != 0) +#define test_thread_64bit_stack(__SP) \ + ((test_thread_flag(TIF_32BIT) && !thread32_stack_is_64bit(__SP)) ? \ + false : true) + #endif /* !__ASSEMBLY__ */ #endif /* __KERNEL__ */ diff --git a/arch/sparc/include/asm/ttable.h b/arch/sparc/include/asm/ttable.h index 48f2807d3265..71b5a67522ab 100644 --- a/arch/sparc/include/asm/ttable.h +++ b/arch/sparc/include/asm/ttable.h @@ -372,7 +372,9 @@ etrap_spill_fixup_64bit: \ /* Normal 32bit spill */ #define SPILL_2_GENERIC(ASI) \ - srl %sp, 0, %sp; \ + and %sp, 1, %g3; \ + brnz,pn %g3, (. - (128 + 4)); \ + srl %sp, 0, %sp; \ stwa %l0, [%sp + %g0] ASI; \ mov 0x04, %g3; \ stwa %l1, [%sp + %g3] ASI; \ @@ -398,14 +400,16 @@ etrap_spill_fixup_64bit: \ stwa %i6, [%g1 + %g0] ASI; \ stwa %i7, [%g1 + %g3] ASI; \ saved; \ - retry; nop; nop; \ + retry; \ b,a,pt %xcc, spill_fixup_dax; \ b,a,pt %xcc, spill_fixup_mna; \ b,a,pt %xcc, spill_fixup; #define SPILL_2_GENERIC_ETRAP \ etrap_user_spill_32bit: \ - srl %sp, 0, %sp; \ + and %sp, 1, %g3; \ + brnz,pn %g3, etrap_user_spill_64bit; \ + srl %sp, 0, %sp; \ stwa %l0, [%sp + 0x00] %asi; \ stwa %l1, [%sp + 0x04] %asi; \ stwa %l2, [%sp + 0x08] %asi; \ @@ -427,7 +431,7 @@ etrap_user_spill_32bit: \ ba,pt %xcc, etrap_save; \ wrpr %g1, %cwp; \ nop; nop; nop; nop; \ - nop; nop; nop; nop; \ + nop; nop; \ ba,a,pt %xcc, etrap_spill_fixup_32bit; \ ba,a,pt %xcc, etrap_spill_fixup_32bit; \ ba,a,pt %xcc, etrap_spill_fixup_32bit; @@ -592,7 +596,9 @@ user_rtt_fill_64bit: \ /* Normal 32bit fill */ #define FILL_2_GENERIC(ASI) \ - srl %sp, 0, %sp; \ + and %sp, 1, %g3; \ + brnz,pn %g3, (. - (128 + 4)); \ + srl %sp, 0, %sp; \ lduwa [%sp + %g0] ASI, %l0; \ mov 0x04, %g2; \ mov 0x08, %g3; \ @@ -616,14 +622,16 @@ user_rtt_fill_64bit: \ lduwa [%g1 + %g3] ASI, %i6; \ lduwa [%g1 + %g5] ASI, %i7; \ restored; \ - retry; nop; nop; nop; nop; \ + retry; nop; nop; \ b,a,pt %xcc, fill_fixup_dax; \ b,a,pt %xcc, fill_fixup_mna; \ b,a,pt %xcc, fill_fixup; #define FILL_2_GENERIC_RTRAP \ user_rtt_fill_32bit: \ - srl %sp, 0, %sp; \ + and %sp, 1, %g3; \ + brnz,pn %g3, user_rtt_fill_64bit; \ + srl %sp, 0, %sp; \ lduwa [%sp + 0x00] %asi, %l0; \ lduwa [%sp + 0x04] %asi, %l1; \ lduwa [%sp + 0x08] %asi, %l2; \ @@ -643,7 +651,7 @@ user_rtt_fill_32bit: \ ba,pt %xcc, user_rtt_pre_restore; \ restored; \ nop; nop; nop; nop; nop; \ - nop; nop; nop; nop; nop; \ + nop; nop; nop; \ ba,a,pt %xcc, user_rtt_fill_fixup; \ ba,a,pt %xcc, user_rtt_fill_fixup; \ ba,a,pt %xcc, user_rtt_fill_fixup; diff --git a/arch/sparc/include/uapi/asm/unistd.h b/arch/sparc/include/uapi/asm/unistd.h index 8974ef7ae920..bed86a820d09 100644 --- a/arch/sparc/include/uapi/asm/unistd.h +++ b/arch/sparc/include/uapi/asm/unistd.h @@ -405,8 +405,12 @@ #define __NR_setns 337 #define __NR_process_vm_readv 338 #define __NR_process_vm_writev 339 +#define __NR_kern_features 340 -#define NR_syscalls 340 +#define NR_syscalls 341 + +/* Bitmask values returned from kern_features system call. */ +#define KERN_FEATURE_MIXED_MODE_STACK 0x00000001 #ifdef __32bit_syscall_numbers__ /* Sparc 32-bit only has the "setresuid32", "getresuid32" variants, diff --git a/arch/sparc/kernel/perf_event.c b/arch/sparc/kernel/perf_event.c index 885a8af74064..b5c38faa4ead 100644 --- a/arch/sparc/kernel/perf_event.c +++ b/arch/sparc/kernel/perf_event.c @@ -1762,15 +1762,25 @@ static void perf_callchain_user_32(struct perf_callchain_entry *entry, ufp = regs->u_regs[UREG_I6] & 0xffffffffUL; do { - struct sparc_stackf32 *usf, sf; unsigned long pc; - usf = (struct sparc_stackf32 *) ufp; - if (__copy_from_user_inatomic(&sf, usf, sizeof(sf))) - break; + if (thread32_stack_is_64bit(ufp)) { + struct sparc_stackf *usf, sf; - pc = sf.callers_pc; - ufp = (unsigned long)sf.fp; + ufp += STACK_BIAS; + usf = (struct sparc_stackf *) ufp; + if (__copy_from_user_inatomic(&sf, usf, sizeof(sf))) + break; + pc = sf.callers_pc & 0xffffffff; + ufp = ((unsigned long) sf.fp) & 0xffffffff; + } else { + struct sparc_stackf32 *usf, sf; + usf = (struct sparc_stackf32 *) ufp; + if (__copy_from_user_inatomic(&sf, usf, sizeof(sf))) + break; + pc = sf.callers_pc; + ufp = (unsigned long)sf.fp; + } perf_callchain_store(entry, pc); } while (entry->nr < PERF_MAX_STACK_DEPTH); } diff --git a/arch/sparc/kernel/process_64.c b/arch/sparc/kernel/process_64.c index d778248ef3f8..c6e0c2910043 100644 --- a/arch/sparc/kernel/process_64.c +++ b/arch/sparc/kernel/process_64.c @@ -452,13 +452,16 @@ void flush_thread(void) /* It's a bit more tricky when 64-bit tasks are involved... */ static unsigned long clone_stackframe(unsigned long csp, unsigned long psp) { + bool stack_64bit = test_thread_64bit_stack(psp); unsigned long fp, distance, rval; - if (!(test_thread_flag(TIF_32BIT))) { + if (stack_64bit) { csp += STACK_BIAS; psp += STACK_BIAS; __get_user(fp, &(((struct reg_window __user *)psp)->ins[6])); fp += STACK_BIAS; + if (test_thread_flag(TIF_32BIT)) + fp &= 0xffffffff; } else __get_user(fp, &(((struct reg_window32 __user *)psp)->ins[6])); @@ -472,7 +475,7 @@ static unsigned long clone_stackframe(unsigned long csp, unsigned long psp) rval = (csp - distance); if (copy_in_user((void __user *) rval, (void __user *) psp, distance)) rval = 0; - else if (test_thread_flag(TIF_32BIT)) { + else if (!stack_64bit) { if (put_user(((u32)csp), &(((struct reg_window32 __user *)rval)->ins[6]))) rval = 0; @@ -507,18 +510,18 @@ void synchronize_user_stack(void) flush_user_windows(); if ((window = get_thread_wsaved()) != 0) { - int winsize = sizeof(struct reg_window); - int bias = 0; - - if (test_thread_flag(TIF_32BIT)) - winsize = sizeof(struct reg_window32); - else - bias = STACK_BIAS; - window -= 1; do { - unsigned long sp = (t->rwbuf_stkptrs[window] + bias); struct reg_window *rwin = &t->reg_window[window]; + int winsize = sizeof(struct reg_window); + unsigned long sp; + + sp = t->rwbuf_stkptrs[window]; + + if (test_thread_64bit_stack(sp)) + sp += STACK_BIAS; + else + winsize = sizeof(struct reg_window32); if (!copy_to_user((char __user *)sp, rwin, winsize)) { shift_window_buffer(window, get_thread_wsaved() - 1, t); @@ -544,13 +547,6 @@ void fault_in_user_windows(void) { struct thread_info *t = current_thread_info(); unsigned long window; - int winsize = sizeof(struct reg_window); - int bias = 0; - - if (test_thread_flag(TIF_32BIT)) - winsize = sizeof(struct reg_window32); - else - bias = STACK_BIAS; flush_user_windows(); window = get_thread_wsaved(); @@ -558,8 +554,16 @@ void fault_in_user_windows(void) if (likely(window != 0)) { window -= 1; do { - unsigned long sp = (t->rwbuf_stkptrs[window] + bias); struct reg_window *rwin = &t->reg_window[window]; + int winsize = sizeof(struct reg_window); + unsigned long sp; + + sp = t->rwbuf_stkptrs[window]; + + if (test_thread_64bit_stack(sp)) + sp += STACK_BIAS; + else + winsize = sizeof(struct reg_window32); if (unlikely(sp & 0x7UL)) stack_unaligned(sp); diff --git a/arch/sparc/kernel/ptrace_64.c b/arch/sparc/kernel/ptrace_64.c index 484dabac7045..7ff45e4ba681 100644 --- a/arch/sparc/kernel/ptrace_64.c +++ b/arch/sparc/kernel/ptrace_64.c @@ -151,7 +151,7 @@ static int regwindow64_get(struct task_struct *target, { unsigned long rw_addr = regs->u_regs[UREG_I6]; - if (test_tsk_thread_flag(current, TIF_32BIT)) { + if (!test_thread_64bit_stack(rw_addr)) { struct reg_window32 win32; int i; @@ -176,7 +176,7 @@ static int regwindow64_set(struct task_struct *target, { unsigned long rw_addr = regs->u_regs[UREG_I6]; - if (test_tsk_thread_flag(current, TIF_32BIT)) { + if (!test_thread_64bit_stack(rw_addr)) { struct reg_window32 win32; int i; diff --git a/arch/sparc/kernel/sys_sparc_64.c b/arch/sparc/kernel/sys_sparc_64.c index 11c6c9603e71..878ef3d5fec5 100644 --- a/arch/sparc/kernel/sys_sparc_64.c +++ b/arch/sparc/kernel/sys_sparc_64.c @@ -751,3 +751,8 @@ int kernel_execve(const char *filename, : "cc"); return __res; } + +asmlinkage long sys_kern_features(void) +{ + return KERN_FEATURE_MIXED_MODE_STACK; +} diff --git a/arch/sparc/kernel/systbls_64.S b/arch/sparc/kernel/systbls_64.S index 3a58e0d66f51..45ce6be088e4 100644 --- a/arch/sparc/kernel/systbls_64.S +++ b/arch/sparc/kernel/systbls_64.S @@ -86,6 +86,7 @@ sys_call_table32: .word compat_sys_pwritev, compat_sys_rt_tgsigqueueinfo, sys_perf_event_open, compat_sys_recvmmsg, sys_fanotify_init /*330*/ .word sys32_fanotify_mark, sys_prlimit64, sys_name_to_handle_at, compat_sys_open_by_handle_at, compat_sys_clock_adjtime .word sys_syncfs, compat_sys_sendmmsg, sys_setns, compat_sys_process_vm_readv, compat_sys_process_vm_writev +/*340*/ .word sys_kern_features #endif /* CONFIG_COMPAT */ @@ -163,3 +164,4 @@ sys_call_table: .word sys_pwritev, sys_rt_tgsigqueueinfo, sys_perf_event_open, sys_recvmmsg, sys_fanotify_init /*330*/ .word sys_fanotify_mark, sys_prlimit64, sys_name_to_handle_at, sys_open_by_handle_at, sys_clock_adjtime .word sys_syncfs, sys_sendmmsg, sys_setns, sys_process_vm_readv, sys_process_vm_writev +/*340*/ .word sys_kern_features diff --git a/arch/sparc/kernel/unaligned_64.c b/arch/sparc/kernel/unaligned_64.c index f81d038f7340..8201c25e7669 100644 --- a/arch/sparc/kernel/unaligned_64.c +++ b/arch/sparc/kernel/unaligned_64.c @@ -113,21 +113,24 @@ static inline long sign_extend_imm13(long imm) static unsigned long fetch_reg(unsigned int reg, struct pt_regs *regs) { - unsigned long value; + unsigned long value, fp; if (reg < 16) return (!reg ? 0 : regs->u_regs[reg]); + + fp = regs->u_regs[UREG_FP]; + if (regs->tstate & TSTATE_PRIV) { struct reg_window *win; - win = (struct reg_window *)(regs->u_regs[UREG_FP] + STACK_BIAS); + win = (struct reg_window *)(fp + STACK_BIAS); value = win->locals[reg - 16]; - } else if (test_thread_flag(TIF_32BIT)) { + } else if (!test_thread_64bit_stack(fp)) { struct reg_window32 __user *win32; - win32 = (struct reg_window32 __user *)((unsigned long)((u32)regs->u_regs[UREG_FP])); + win32 = (struct reg_window32 __user *)((unsigned long)((u32)fp)); get_user(value, &win32->locals[reg - 16]); } else { struct reg_window __user *win; - win = (struct reg_window __user *)(regs->u_regs[UREG_FP] + STACK_BIAS); + win = (struct reg_window __user *)(fp + STACK_BIAS); get_user(value, &win->locals[reg - 16]); } return value; @@ -135,19 +138,24 @@ static unsigned long fetch_reg(unsigned int reg, struct pt_regs *regs) static unsigned long *fetch_reg_addr(unsigned int reg, struct pt_regs *regs) { + unsigned long fp; + if (reg < 16) return ®s->u_regs[reg]; + + fp = regs->u_regs[UREG_FP]; + if (regs->tstate & TSTATE_PRIV) { struct reg_window *win; - win = (struct reg_window *)(regs->u_regs[UREG_FP] + STACK_BIAS); + win = (struct reg_window *)(fp + STACK_BIAS); return &win->locals[reg - 16]; - } else if (test_thread_flag(TIF_32BIT)) { + } else if (!test_thread_64bit_stack(fp)) { struct reg_window32 *win32; - win32 = (struct reg_window32 *)((unsigned long)((u32)regs->u_regs[UREG_FP])); + win32 = (struct reg_window32 *)((unsigned long)((u32)fp)); return (unsigned long *)&win32->locals[reg - 16]; } else { struct reg_window *win; - win = (struct reg_window *)(regs->u_regs[UREG_FP] + STACK_BIAS); + win = (struct reg_window *)(fp + STACK_BIAS); return &win->locals[reg - 16]; } } @@ -392,13 +400,15 @@ int handle_popc(u32 insn, struct pt_regs *regs) if (rd) regs->u_regs[rd] = ret; } else { - if (test_thread_flag(TIF_32BIT)) { + unsigned long fp = regs->u_regs[UREG_FP]; + + if (!test_thread_64bit_stack(fp)) { struct reg_window32 __user *win32; - win32 = (struct reg_window32 __user *)((unsigned long)((u32)regs->u_regs[UREG_FP])); + win32 = (struct reg_window32 __user *)((unsigned long)((u32)fp)); put_user(ret, &win32->locals[rd - 16]); } else { struct reg_window __user *win; - win = (struct reg_window __user *)(regs->u_regs[UREG_FP] + STACK_BIAS); + win = (struct reg_window __user *)(fp + STACK_BIAS); put_user(ret, &win->locals[rd - 16]); } } @@ -554,7 +564,7 @@ void handle_ld_nf(u32 insn, struct pt_regs *regs) reg[0] = 0; if ((insn & 0x780000) == 0x180000) reg[1] = 0; - } else if (test_thread_flag(TIF_32BIT)) { + } else if (!test_thread_64bit_stack(regs->u_regs[UREG_FP])) { put_user(0, (int __user *) reg); if ((insn & 0x780000) == 0x180000) put_user(0, ((int __user *) reg) + 1); diff --git a/arch/sparc/kernel/visemul.c b/arch/sparc/kernel/visemul.c index 08e074b7eb6a..c096c624ac4d 100644 --- a/arch/sparc/kernel/visemul.c +++ b/arch/sparc/kernel/visemul.c @@ -149,21 +149,24 @@ static inline void maybe_flush_windows(unsigned int rs1, unsigned int rs2, static unsigned long fetch_reg(unsigned int reg, struct pt_regs *regs) { - unsigned long value; + unsigned long value, fp; if (reg < 16) return (!reg ? 0 : regs->u_regs[reg]); + + fp = regs->u_regs[UREG_FP]; + if (regs->tstate & TSTATE_PRIV) { struct reg_window *win; - win = (struct reg_window *)(regs->u_regs[UREG_FP] + STACK_BIAS); + win = (struct reg_window *)(fp + STACK_BIAS); value = win->locals[reg - 16]; - } else if (test_thread_flag(TIF_32BIT)) { + } else if (!test_thread_64bit_stack(fp)) { struct reg_window32 __user *win32; - win32 = (struct reg_window32 __user *)((unsigned long)((u32)regs->u_regs[UREG_FP])); + win32 = (struct reg_window32 __user *)((unsigned long)((u32)fp)); get_user(value, &win32->locals[reg - 16]); } else { struct reg_window __user *win; - win = (struct reg_window __user *)(regs->u_regs[UREG_FP] + STACK_BIAS); + win = (struct reg_window __user *)(fp + STACK_BIAS); get_user(value, &win->locals[reg - 16]); } return value; @@ -172,16 +175,18 @@ static unsigned long fetch_reg(unsigned int reg, struct pt_regs *regs) static inline unsigned long __user *__fetch_reg_addr_user(unsigned int reg, struct pt_regs *regs) { + unsigned long fp = regs->u_regs[UREG_FP]; + BUG_ON(reg < 16); BUG_ON(regs->tstate & TSTATE_PRIV); - if (test_thread_flag(TIF_32BIT)) { + if (!test_thread_64bit_stack(fp)) { struct reg_window32 __user *win32; - win32 = (struct reg_window32 __user *)((unsigned long)((u32)regs->u_regs[UREG_FP])); + win32 = (struct reg_window32 __user *)((unsigned long)((u32)fp)); return (unsigned long __user *)&win32->locals[reg - 16]; } else { struct reg_window __user *win; - win = (struct reg_window __user *)(regs->u_regs[UREG_FP] + STACK_BIAS); + win = (struct reg_window __user *)(fp + STACK_BIAS); return &win->locals[reg - 16]; } } @@ -204,7 +209,7 @@ static void store_reg(struct pt_regs *regs, unsigned long val, unsigned long rd) } else { unsigned long __user *rd_user = __fetch_reg_addr_user(rd, regs); - if (test_thread_flag(TIF_32BIT)) + if (!test_thread_64bit_stack(regs->u_regs[UREG_FP])) __put_user((u32)val, (u32 __user *)rd_user); else __put_user(val, rd_user); diff --git a/arch/sparc/kernel/winfixup.S b/arch/sparc/kernel/winfixup.S index a6b0863c27df..1e67ce958369 100644 --- a/arch/sparc/kernel/winfixup.S +++ b/arch/sparc/kernel/winfixup.S @@ -43,6 +43,8 @@ spill_fixup_mna: spill_fixup_dax: TRAP_LOAD_THREAD_REG(%g6, %g1) ldx [%g6 + TI_FLAGS], %g1 + andcc %sp, 0x1, %g0 + movne %icc, 0, %g1 andcc %g1, _TIF_32BIT, %g0 ldub [%g6 + TI_WSAVED], %g1 sll %g1, 3, %g3 diff --git a/arch/sparc/math-emu/math_64.c b/arch/sparc/math-emu/math_64.c index 1704068da928..034aadbff036 100644 --- a/arch/sparc/math-emu/math_64.c +++ b/arch/sparc/math-emu/math_64.c @@ -320,7 +320,7 @@ int do_mathemu(struct pt_regs *regs, struct fpustate *f, bool illegal_insn_trap) XR = 0; else if (freg < 16) XR = regs->u_regs[freg]; - else if (test_thread_flag(TIF_32BIT)) { + else if (!test_thread_64bit_stack(regs->u_regs[UREG_FP])) { struct reg_window32 __user *win32; flushw_user (); win32 = (struct reg_window32 __user *)((unsigned long)((u32)regs->u_regs[UREG_FP])); -- cgit v1.2.3-59-g8ed1b