From 78d904b46a72fcf15ea6a39672bbef92953876b5 Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Thu, 5 Feb 2009 18:43:07 -0500 Subject: ring-buffer: add NMI protection for spinlocks Impact: prevent deadlock in NMI The ring buffers are not yet totally lockless with writing to the buffer. When a writer crosses a page, it grabs a per cpu spinlock to protect against a reader. The spinlocks taken by a writer are not to protect against other writers, since a writer can only write to its own per cpu buffer. The spinlocks protect against readers that can touch any cpu buffer. The writers are made to be reentrant with the spinlocks disabling interrupts. The problem arises when an NMI writes to the buffer, and that write crosses a page boundary. If it grabs a spinlock, it can be racing with another writer (since disabling interrupts does not protect against NMIs) or with a reader on the same CPU. Luckily, most of the users are not reentrant and protects against this issue. But if a user of the ring buffer becomes reentrant (which is what the ring buffers do allow), if the NMI also writes to the ring buffer then we risk the chance of a deadlock. This patch moves the ftrace_nmi_enter called by nmi_enter() to the ring buffer code. It replaces the current ftrace_nmi_enter that is used by arch specific code to arch_ftrace_nmi_enter and updates the Kconfig to handle it. When an NMI is called, it will set a per cpu variable in the ring buffer code and will clear it when the NMI exits. If a write to the ring buffer crosses page boundaries inside an NMI, a trylock is used on the spin lock instead. If the spinlock fails to be acquired, then the entry is discarded. This bug appeared in the ftrace work in the RT tree, where event tracing is reentrant. This workaround solved the deadlocks that appeared there. Signed-off-by: Steven Rostedt --- include/linux/ftrace_irq.h | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) (limited to 'include/linux/ftrace_irq.h') diff --git a/include/linux/ftrace_irq.h b/include/linux/ftrace_irq.h index 366a054d0b05..29de6779a963 100644 --- a/include/linux/ftrace_irq.h +++ b/include/linux/ftrace_irq.h @@ -2,7 +2,15 @@ #define _LINUX_FTRACE_IRQ_H -#if defined(CONFIG_DYNAMIC_FTRACE) || defined(CONFIG_FUNCTION_GRAPH_TRACER) +#ifdef CONFIG_FTRACE_NMI_ENTER +extern void arch_ftrace_nmi_enter(void); +extern void arch_ftrace_nmi_exit(void); +#else +static inline void arch_ftrace_nmi_enter(void) { } +static inline void arch_ftrace_nmi_exit(void) { } +#endif + +#ifdef CONFIG_RING_BUFFER extern void ftrace_nmi_enter(void); extern void ftrace_nmi_exit(void); #else -- cgit v1.2.3-59-g8ed1b From a81bd80a0b0a405dc0483e2c428332d69da2c79f Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Fri, 6 Feb 2009 01:45:16 -0500 Subject: ring-buffer: use generic version of in_nmi Impact: clean up Now that a generic in_nmi is available, this patch removes the special code in the ring_buffer and implements the in_nmi generic version instead. With this change, I was also able to rename the "arch_ftrace_nmi_enter" back to "ftrace_nmi_enter" and remove the code from the ring buffer. Signed-off-by: Steven Rostedt --- arch/x86/kernel/ftrace.c | 4 ++-- include/linux/ftrace_irq.h | 8 -------- kernel/trace/ring_buffer.c | 43 +++++++++++++------------------------------ 3 files changed, 15 insertions(+), 40 deletions(-) (limited to 'include/linux/ftrace_irq.h') diff --git a/arch/x86/kernel/ftrace.c b/arch/x86/kernel/ftrace.c index 918073c6681b..d74d75e0952d 100644 --- a/arch/x86/kernel/ftrace.c +++ b/arch/x86/kernel/ftrace.c @@ -113,7 +113,7 @@ static void ftrace_mod_code(void) MCOUNT_INSN_SIZE); } -void arch_ftrace_nmi_enter(void) +void ftrace_nmi_enter(void) { atomic_inc(&nmi_running); /* Must have nmi_running seen before reading write flag */ @@ -124,7 +124,7 @@ void arch_ftrace_nmi_enter(void) } } -void arch_ftrace_nmi_exit(void) +void ftrace_nmi_exit(void) { /* Finish all executions before clearing nmi_running */ smp_wmb(); diff --git a/include/linux/ftrace_irq.h b/include/linux/ftrace_irq.h index 29de6779a963..dca7bf8cffe2 100644 --- a/include/linux/ftrace_irq.h +++ b/include/linux/ftrace_irq.h @@ -3,14 +3,6 @@ #ifdef CONFIG_FTRACE_NMI_ENTER -extern void arch_ftrace_nmi_enter(void); -extern void arch_ftrace_nmi_exit(void); -#else -static inline void arch_ftrace_nmi_enter(void) { } -static inline void arch_ftrace_nmi_exit(void) { } -#endif - -#ifdef CONFIG_RING_BUFFER extern void ftrace_nmi_enter(void); extern void ftrace_nmi_exit(void); #else diff --git a/kernel/trace/ring_buffer.c b/kernel/trace/ring_buffer.c index a60a6a852f42..5ee344417cd5 100644 --- a/kernel/trace/ring_buffer.c +++ b/kernel/trace/ring_buffer.c @@ -8,6 +8,7 @@ #include #include #include +#include #include #include #include @@ -19,35 +20,6 @@ #include "trace.h" -/* - * Since the write to the buffer is still not fully lockless, - * we must be careful with NMIs. The locks in the writers - * are taken when a write crosses to a new page. The locks - * protect against races with the readers (this will soon - * be fixed with a lockless solution). - * - * Because we can not protect against NMIs, and we want to - * keep traces reentrant, we need to manage what happens - * when we are in an NMI. - */ -static DEFINE_PER_CPU(int, rb_in_nmi); - -void ftrace_nmi_enter(void) -{ - __get_cpu_var(rb_in_nmi)++; - /* call arch specific handler too */ - arch_ftrace_nmi_enter(); -} - -void ftrace_nmi_exit(void) -{ - arch_ftrace_nmi_exit(); - __get_cpu_var(rb_in_nmi)--; - /* NMIs are not recursive */ - WARN_ON_ONCE(__get_cpu_var(rb_in_nmi)); -} - - /* * A fast way to enable or disable all ring buffers is to * call tracing_on or tracing_off. Turning off the ring buffers @@ -1027,12 +999,23 @@ __rb_reserve_next(struct ring_buffer_per_cpu *cpu_buffer, local_irq_save(flags); /* + * Since the write to the buffer is still not + * fully lockless, we must be careful with NMIs. + * The locks in the writers are taken when a write + * crosses to a new page. The locks protect against + * races with the readers (this will soon be fixed + * with a lockless solution). + * + * Because we can not protect against NMIs, and we + * want to keep traces reentrant, we need to manage + * what happens when we are in an NMI. + * * NMIs can happen after we take the lock. * If we are in an NMI, only take the lock * if it is not already taken. Otherwise * simply fail. */ - if (unlikely(__get_cpu_var(rb_in_nmi))) { + if (unlikely(in_nmi())) { if (!__raw_spin_trylock(&cpu_buffer->lock)) goto out_unlock; } else -- cgit v1.2.3-59-g8ed1b