From 3d2606f42984613d324ad3047cf503bcddc3880a Mon Sep 17 00:00:00 2001 From: Robert Richter Date: Fri, 20 May 2011 09:46:54 +0200 Subject: oprofile, x86: Enable preemption during pci device setup in IBS init IBS initialization is a mix of per-core register access and per-node pci device setup. Register access should be pinned to the cpu, but pci setup must run with preemption enabled. This patch better separates the code into non-/preemptible sections and fixes sleeping with preemption disabled. See bug message below. Fixes also freeing the eilvt entry by introducing put_eilvt(). BUG: sleeping function called from invalid context at mm/slub.c:824 in_atomic(): 1, irqs_disabled(): 0, pid: 32357, name: modprobe INFO: lockdep is turned off. Pid: 32357, comm: modprobe Not tainted 2.6.39-rc7+ #14 Call Trace: [] __might_sleep+0x112/0x117 [] kmem_cache_alloc_trace+0x4b/0xe7 [] kzalloc.constprop.0+0x29/0x2b [] pci_get_subsys+0x36/0x78 [] ? setup_APIC_eilvt+0xfb/0x139 [] pci_get_device+0x16/0x18 [] op_amd_init+0xd3/0x211 [oprofile] [] ? 0xffffffffa064cfff [] op_nmi_init+0x21e/0x26a [oprofile] [] oprofile_arch_init+0xe/0x26 [oprofile] [] oprofile_init+0x10/0x42 [oprofile] [] do_one_initcall+0x7f/0x13a [] sys_init_module+0x132/0x281 [] system_call_fastpath+0x16/0x1b Reported-by: Dave Jones Cc: [2.6.37.x] Signed-off-by: Robert Richter --- arch/x86/oprofile/op_model_amd.c | 95 +++++++++++++++++++++++----------------- 1 file changed, 54 insertions(+), 41 deletions(-) (limited to 'arch') diff --git a/arch/x86/oprofile/op_model_amd.c b/arch/x86/oprofile/op_model_amd.c index c3b8e24f2b16..9fd8a567fe1e 100644 --- a/arch/x86/oprofile/op_model_amd.c +++ b/arch/x86/oprofile/op_model_amd.c @@ -316,16 +316,23 @@ static void op_amd_stop_ibs(void) wrmsrl(MSR_AMD64_IBSOPCTL, 0); } -static inline int eilvt_is_available(int offset) +static inline int get_eilvt(int offset) { - /* check if we may assign a vector */ return !setup_APIC_eilvt(offset, 0, APIC_EILVT_MSG_NMI, 1); } +static inline int put_eilvt(int offset) +{ + return !setup_APIC_eilvt(offset, 0, 0, 1); +} + static inline int ibs_eilvt_valid(void) { int offset; u64 val; + int valid = 0; + + preempt_disable(); rdmsrl(MSR_AMD64_IBSCTL, val); offset = val & IBSCTL_LVT_OFFSET_MASK; @@ -333,16 +340,20 @@ static inline int ibs_eilvt_valid(void) if (!(val & IBSCTL_LVT_OFFSET_VALID)) { pr_err(FW_BUG "cpu %d, invalid IBS interrupt offset %d (MSR%08X=0x%016llx)\n", smp_processor_id(), offset, MSR_AMD64_IBSCTL, val); - return 0; + goto out; } - if (!eilvt_is_available(offset)) { + if (!get_eilvt(offset)) { pr_err(FW_BUG "cpu %d, IBS interrupt offset %d not available (MSR%08X=0x%016llx)\n", smp_processor_id(), offset, MSR_AMD64_IBSCTL, val); - return 0; + goto out; } - return 1; + valid = 1; +out: + preempt_enable(); + + return valid; } static inline int get_ibs_offset(void) @@ -600,67 +611,69 @@ static int setup_ibs_ctl(int ibs_eilvt_off) static int force_ibs_eilvt_setup(void) { - int i; + int offset; int ret; - /* find the next free available EILVT entry */ - for (i = 1; i < 4; i++) { - if (!eilvt_is_available(i)) - continue; - ret = setup_ibs_ctl(i); - if (ret) - return ret; - pr_err(FW_BUG "using offset %d for IBS interrupts\n", i); - return 0; + /* + * find the next free available EILVT entry, skip offset 0, + * pin search to this cpu + */ + preempt_disable(); + for (offset = 1; offset < APIC_EILVT_NR_MAX; offset++) { + if (get_eilvt(offset)) + break; } + preempt_enable(); - printk(KERN_DEBUG "No EILVT entry available\n"); - - return -EBUSY; -} - -static int __init_ibs_nmi(void) -{ - int ret; - - if (ibs_eilvt_valid()) - return 0; + if (offset == APIC_EILVT_NR_MAX) { + printk(KERN_DEBUG "No EILVT entry available\n"); + return -EBUSY; + } - ret = force_ibs_eilvt_setup(); + ret = setup_ibs_ctl(offset); if (ret) - return ret; + goto out; - if (!ibs_eilvt_valid()) - return -EFAULT; + if (!ibs_eilvt_valid()) { + ret = -EFAULT; + goto out; + } + pr_err(FW_BUG "using offset %d for IBS interrupts\n", offset); pr_err(FW_BUG "workaround enabled for IBS LVT offset\n"); return 0; +out: + preempt_disable(); + put_eilvt(offset); + preempt_enable(); + return ret; } /* * check and reserve APIC extended interrupt LVT offset for IBS if * available - * - * init_ibs() preforms implicitly cpu-local operations, so pin this - * thread to its current CPU */ static void init_ibs(void) { - preempt_disable(); - ibs_caps = get_ibs_caps(); + if (!ibs_caps) + return; + + if (ibs_eilvt_valid()) goto out; - if (__init_ibs_nmi() < 0) - ibs_caps = 0; - else - printk(KERN_INFO "oprofile: AMD IBS detected (0x%08x)\n", ibs_caps); + if (!force_ibs_eilvt_setup()) + goto out; + + /* Failed to setup ibs */ + ibs_caps = 0; + return; out: - preempt_enable(); + printk(KERN_INFO "oprofile: AMD IBS detected (0x%08x)\n", ibs_caps); } static int (*create_arch_files)(struct super_block *sb, struct dentry *root); -- cgit v1.2.3-59-g8ed1b From d819437156fd99da61d4e1402b2dbfc5cc472265 Mon Sep 17 00:00:00 2001 From: Eric B Munson Date: Mon, 23 May 2011 10:22:40 -0400 Subject: oprofile, powerpc: Handle events that raise an exception without overflowing Commit 0837e3242c73566fc1c0196b4ec61779c25ffc93 fixes a situation on POWER7 where events can roll back if a specualtive event doesn't actually complete. This can raise a performance monitor exception. We need to catch this to ensure that we reset the PMC. In all cases the PMC will be less than 256 cycles from overflow. This patch lifts Anton's fix for the problem in perf and applies it to oprofile as well. Signed-off-by: Eric B Munson Cc: # as far back as it applies cleanly Tested-by: Maynard Johnson Signed-off-by: Robert Richter --- arch/powerpc/oprofile/op_model_power4.c | 24 +++++++++++++++++++++++- 1 file changed, 23 insertions(+), 1 deletion(-) (limited to 'arch') diff --git a/arch/powerpc/oprofile/op_model_power4.c b/arch/powerpc/oprofile/op_model_power4.c index 8ee51a252cf1..e6bec74be131 100644 --- a/arch/powerpc/oprofile/op_model_power4.c +++ b/arch/powerpc/oprofile/op_model_power4.c @@ -261,6 +261,28 @@ static int get_kernel(unsigned long pc, unsigned long mmcra) return is_kernel; } +static bool pmc_overflow(unsigned long val) +{ + if ((int)val < 0) + return true; + + /* + * Events on POWER7 can roll back if a speculative event doesn't + * eventually complete. Unfortunately in some rare cases they will + * raise a performance monitor exception. We need to catch this to + * ensure we reset the PMC. In all cases the PMC will be 256 or less + * cycles from overflow. + * + * We only do this if the first pass fails to find any overflowing + * PMCs because a user might set a period of less than 256 and we + * don't want to mistakenly reset them. + */ + if (__is_processor(PV_POWER7) && ((0x80000000 - val) <= 256)) + return true; + + return false; +} + static void power4_handle_interrupt(struct pt_regs *regs, struct op_counter_config *ctr) { @@ -281,7 +303,7 @@ static void power4_handle_interrupt(struct pt_regs *regs, for (i = 0; i < cur_cpu_spec->num_pmcs; ++i) { val = classic_ctr_read(i); - if (val < 0) { + if (pmc_overflow(val)) { if (oprofile_running && ctr[i].enabled) { oprofile_add_ext_sample(pc, regs, i, is_kernel); classic_ctr_write(i, reset_value[i]); -- cgit v1.2.3-59-g8ed1b From f29c50419c8d1998edd759f1990c4243a248f469 Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Thu, 19 May 2011 14:35:33 -0400 Subject: maccess,probe_kernel: Make write/read src const void * The functions probe_kernel_write() and probe_kernel_read() do not modify the src pointer. Allow const pointers to be passed in without the need of a typecast. Acked-by: Mike Frysinger Acked-by: Heiko Carstens Acked-by: Martin Schwidefsky Signed-off-by: Steven Rostedt Link: http://lkml.kernel.org/r/1305824936.1465.4.camel@gandalf.stny.rr.com --- arch/blackfin/mm/maccess.c | 4 ++-- arch/s390/mm/maccess.c | 4 ++-- include/linux/uaccess.h | 8 ++++---- mm/maccess.c | 8 ++++---- 4 files changed, 12 insertions(+), 12 deletions(-) (limited to 'arch') diff --git a/arch/blackfin/mm/maccess.c b/arch/blackfin/mm/maccess.c index b71cebc1f8a3..e2532114c5fd 100644 --- a/arch/blackfin/mm/maccess.c +++ b/arch/blackfin/mm/maccess.c @@ -16,7 +16,7 @@ static int validate_memory_access_address(unsigned long addr, int size) return bfin_mem_access_type(addr, size); } -long probe_kernel_read(void *dst, void *src, size_t size) +long probe_kernel_read(void *dst, const void *src, size_t size) { unsigned long lsrc = (unsigned long)src; int mem_type; @@ -55,7 +55,7 @@ long probe_kernel_read(void *dst, void *src, size_t size) return -EFAULT; } -long probe_kernel_write(void *dst, void *src, size_t size) +long probe_kernel_write(void *dst, const void *src, size_t size) { unsigned long ldst = (unsigned long)dst; int mem_type; diff --git a/arch/s390/mm/maccess.c b/arch/s390/mm/maccess.c index 71a4b0d34be0..51e5cd9b906a 100644 --- a/arch/s390/mm/maccess.c +++ b/arch/s390/mm/maccess.c @@ -19,7 +19,7 @@ * using the stura instruction. * Returns the number of bytes copied or -EFAULT. */ -static long probe_kernel_write_odd(void *dst, void *src, size_t size) +static long probe_kernel_write_odd(void *dst, const void *src, size_t size) { unsigned long count, aligned; int offset, mask; @@ -45,7 +45,7 @@ static long probe_kernel_write_odd(void *dst, void *src, size_t size) return rc ? rc : count; } -long probe_kernel_write(void *dst, void *src, size_t size) +long probe_kernel_write(void *dst, const void *src, size_t size) { long copied = 0; diff --git a/include/linux/uaccess.h b/include/linux/uaccess.h index d512d98dfb7d..5ca0951e1855 100644 --- a/include/linux/uaccess.h +++ b/include/linux/uaccess.h @@ -93,8 +93,8 @@ static inline unsigned long __copy_from_user_nocache(void *to, * Safely read from address @src to the buffer at @dst. If a kernel fault * happens, handle that and return -EFAULT. */ -extern long probe_kernel_read(void *dst, void *src, size_t size); -extern long __probe_kernel_read(void *dst, void *src, size_t size); +extern long probe_kernel_read(void *dst, const void *src, size_t size); +extern long __probe_kernel_read(void *dst, const void *src, size_t size); /* * probe_kernel_write(): safely attempt to write to a location @@ -105,7 +105,7 @@ extern long __probe_kernel_read(void *dst, void *src, size_t size); * Safely write to address @dst from the buffer at @src. If a kernel fault * happens, handle that and return -EFAULT. */ -extern long notrace probe_kernel_write(void *dst, void *src, size_t size); -extern long notrace __probe_kernel_write(void *dst, void *src, size_t size); +extern long notrace probe_kernel_write(void *dst, const void *src, size_t size); +extern long notrace __probe_kernel_write(void *dst, const void *src, size_t size); #endif /* __LINUX_UACCESS_H__ */ diff --git a/mm/maccess.c b/mm/maccess.c index e2b6f5634e0d..4cee182ab5f3 100644 --- a/mm/maccess.c +++ b/mm/maccess.c @@ -15,10 +15,10 @@ * happens, handle that and return -EFAULT. */ -long __weak probe_kernel_read(void *dst, void *src, size_t size) +long __weak probe_kernel_read(void *dst, const void *src, size_t size) __attribute__((alias("__probe_kernel_read"))); -long __probe_kernel_read(void *dst, void *src, size_t size) +long __probe_kernel_read(void *dst, const void *src, size_t size) { long ret; mm_segment_t old_fs = get_fs(); @@ -43,10 +43,10 @@ EXPORT_SYMBOL_GPL(probe_kernel_read); * Safely write to address @dst from the buffer at @src. If a kernel fault * happens, handle that and return -EFAULT. */ -long __weak probe_kernel_write(void *dst, void *src, size_t size) +long __weak probe_kernel_write(void *dst, const void *src, size_t size) __attribute__((alias("__probe_kernel_write"))); -long __probe_kernel_write(void *dst, void *src, size_t size) +long __probe_kernel_write(void *dst, const void *src, size_t size) { long ret; mm_segment_t old_fs = get_fs(); -- cgit v1.2.3-59-g8ed1b From 0d098a7d1e39553e8a3f638b923551edec4868a7 Mon Sep 17 00:00:00 2001 From: Rakib Mullick Date: Thu, 12 May 2011 23:33:40 +0600 Subject: x86/ftrace: Fix compiler warning in ftrace.c MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Due to commit dc326fca2b64 (x86, cpu: Clean up and unify the NOP selection infrastructure), we get the following warning: arch/x86/kernel/ftrace.c: In function ‘ftrace_make_nop’: arch/x86/kernel/ftrace.c:308:6: warning: assignment discards qualifiers from pointer target type arch/x86/kernel/ftrace.c: In function ‘ftrace_make_call’: arch/x86/kernel/ftrace.c:318:6: warning: assignment discards qualifiers from pointer target type ftrace_nop_replace() now returns const unsigned char *, so change its associated function/variable to its compatible type to keep compiler clam. Signed-off-by: Rakib Mullick Link: http://lkml.kernel.org/r/1305221620.7986.4.camel@localhost.localdomain [ updated for change of const void *src in probe_kernel_write() ] Signed-off-by: Steven Rostedt --- arch/x86/kernel/ftrace.c | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) (limited to 'arch') diff --git a/arch/x86/kernel/ftrace.c b/arch/x86/kernel/ftrace.c index 0ba15a6cc57e..c9a281f272fd 100644 --- a/arch/x86/kernel/ftrace.c +++ b/arch/x86/kernel/ftrace.c @@ -123,7 +123,7 @@ static unsigned char *ftrace_call_replace(unsigned long ip, unsigned long addr) static atomic_t nmi_running = ATOMIC_INIT(0); static int mod_code_status; /* holds return value of text write */ static void *mod_code_ip; /* holds the IP to write to */ -static void *mod_code_newcode; /* holds the text to write to the IP */ +static const void *mod_code_newcode; /* holds the text to write to the IP */ static unsigned nmi_wait_count; static atomic_t nmi_update_count = ATOMIC_INIT(0); @@ -225,7 +225,7 @@ within(unsigned long addr, unsigned long start, unsigned long end) } static int -do_ftrace_mod_code(unsigned long ip, void *new_code) +do_ftrace_mod_code(unsigned long ip, const void *new_code) { /* * On x86_64, kernel text mappings are mapped read-only with @@ -266,8 +266,8 @@ static const unsigned char *ftrace_nop_replace(void) } static int -ftrace_modify_code(unsigned long ip, unsigned char *old_code, - unsigned char *new_code) +ftrace_modify_code(unsigned long ip, unsigned const char *old_code, + unsigned const char *new_code) { unsigned char replaced[MCOUNT_INSN_SIZE]; @@ -301,7 +301,7 @@ ftrace_modify_code(unsigned long ip, unsigned char *old_code, int ftrace_make_nop(struct module *mod, struct dyn_ftrace *rec, unsigned long addr) { - unsigned char *new, *old; + unsigned const char *new, *old; unsigned long ip = rec->ip; old = ftrace_call_replace(ip, addr); @@ -312,7 +312,7 @@ int ftrace_make_nop(struct module *mod, int ftrace_make_call(struct dyn_ftrace *rec, unsigned long addr) { - unsigned char *new, *old; + unsigned const char *new, *old; unsigned long ip = rec->ip; old = ftrace_nop_replace(); -- cgit v1.2.3-59-g8ed1b