From c8446b75be6f85b3d40066922876cb7adc948afb Mon Sep 17 00:00:00 2001 From: Frederic Weisbecker Date: Tue, 30 Oct 2012 13:33:54 +0100 Subject: irq_work: Fix racy IRQ_WORK_BUSY flag setting The IRQ_WORK_BUSY flag is set right before we execute the work. Once this flag value is set, the work enters a claimable state again. So if we have specific data to compute in our work, we ensure it's either handled by another CPU or locally by enqueuing the work again. This state machine is guanranteed by atomic operations on the flags. So when we set IRQ_WORK_BUSY without using an xchg-like operation, we break this guarantee as in the following summarized scenario: CPU 1 CPU 2 ----- ----- (flags = 0) old_flags = flags; (flags = 0) cmpxchg(flags, old_flags, old_flags | IRQ_WORK_FLAGS) (flags = 3) [...] flags = IRQ_WORK_BUSY (flags = 2) func() (sees flags = 3) cmpxchg(flags, old_flags, old_flags | IRQ_WORK_FLAGS) (give up) cmpxchg(flags, 2, 0); (flags = 0) CPU 1 claims a work and executes it, so it sets IRQ_WORK_BUSY and the work is again in a claimable state. Now CPU 2 has new data to process and try to claim that work but it may see a stale value of the flags and think the work is still pending somewhere that will handle our data. This is because CPU 1 doesn't set IRQ_WORK_BUSY atomically. As a result, the data expected to be handle by CPU 2 won't get handled. To fix this, use xchg() to set IRQ_WORK_BUSY, this way we ensure the CPU 2 will see the correct value with cmpxchg() using the expected ordering. Changelog-heavily-inspired-by: Steven Rostedt Signed-off-by: Frederic Weisbecker Acked-by: Steven Rostedt Cc: Peter Zijlstra Cc: Ingo Molnar Cc: Thomas Gleixner Cc: Andrew Morton Cc: Paul Gortmaker Cc: Anish Kumar --- kernel/irq_work.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/kernel/irq_work.c b/kernel/irq_work.c index 1588e3b2871b..57be1a6cd8da 100644 --- a/kernel/irq_work.c +++ b/kernel/irq_work.c @@ -119,8 +119,11 @@ void irq_work_run(void) /* * Clear the PENDING bit, after this point the @work * can be re-used. + * Make it immediately visible so that other CPUs trying + * to claim that work don't rely on us to handle their data + * while we are in the middle of the func. */ - work->flags = IRQ_WORK_BUSY; + xchg(&work->flags, IRQ_WORK_BUSY); work->func(work); /* * Clear the BUSY bit and return to the free state if -- cgit v1.2.3-59-g8ed1b From e0bbe2d80c415bd4063d894ec2ccb336788af814 Mon Sep 17 00:00:00 2001 From: Frederic Weisbecker Date: Sat, 27 Oct 2012 15:21:36 +0200 Subject: irq_work: Fix racy check on work pending flag Work claiming wants to be SMP-safe. And by the time we try to claim a work, if it is already executing concurrently on another CPU, we want to succeed the claiming and queue the work again because the other CPU may have missed the data we wanted to handle in our work if it's about to complete there. This scenario is summarized below: CPU 1 CPU 2 ----- ----- (flags = 0) cmpxchg(flags, 0, IRQ_WORK_FLAGS) (flags = 3) [...] xchg(flags, IRQ_WORK_BUSY) (flags = 2) func() if (flags & IRQ_WORK_PENDING) (not true) cmpxchg(flags, flags, IRQ_WORK_FLAGS) (flags = 3) [...] cmpxchg(flags, IRQ_WORK_BUSY, 0); (fail, pending on CPU 2) This state machine is synchronized using [cmp]xchg() on the flags. As such, the early IRQ_WORK_PENDING check in CPU 2 above is racy. By the time we check it, we may be dealing with a stale value because we aren't using an atomic accessor. As a result, CPU 2 may "see" that the work is still pending on another CPU while it may be actually completing the work function exection already, leaving our data unprocessed. To fix this, we start by speculating about the value we wish to be in the work->flags but we only make any conclusion after the value returned by the cmpxchg() call that either claims the work or let the current owner handle the pending work for us. Changelog-heavily-inspired-by: Steven Rostedt Signed-off-by: Frederic Weisbecker Acked-by: Steven Rostedt Cc: Peter Zijlstra Cc: Ingo Molnar Cc: Thomas Gleixner Cc: Andrew Morton Cc: Paul Gortmaker Cc: Anish Kumar --- kernel/irq_work.c | 16 +++++++++++----- 1 file changed, 11 insertions(+), 5 deletions(-) diff --git a/kernel/irq_work.c b/kernel/irq_work.c index 57be1a6cd8da..64eddd59ed83 100644 --- a/kernel/irq_work.c +++ b/kernel/irq_work.c @@ -34,15 +34,21 @@ static DEFINE_PER_CPU(struct llist_head, irq_work_list); */ static bool irq_work_claim(struct irq_work *work) { - unsigned long flags, nflags; + unsigned long flags, oflags, nflags; + /* + * Start with our best wish as a premise but only trust any + * flag value after cmpxchg() result. + */ + flags = work->flags & ~IRQ_WORK_PENDING; for (;;) { - flags = work->flags; - if (flags & IRQ_WORK_PENDING) - return false; nflags = flags | IRQ_WORK_FLAGS; - if (cmpxchg(&work->flags, flags, nflags) == flags) + oflags = cmpxchg(&work->flags, flags, nflags); + if (oflags == flags) break; + if (oflags & IRQ_WORK_PENDING) + return false; + flags = oflags; cpu_relax(); } -- cgit v1.2.3-59-g8ed1b From 6147a9d8070e1c9d16d57eb53a14942b95b28dc4 Mon Sep 17 00:00:00 2001 From: Frederic Weisbecker Date: Fri, 19 Oct 2012 16:53:18 -0400 Subject: irq_work: Remove CONFIG_HAVE_IRQ_WORK irq work can run on any arch even without IPI support because of the hook on update_process_times(). So lets remove HAVE_IRQ_WORK because it doesn't reflect any backend requirement. Signed-off-by: Frederic Weisbecker Acked-by: Steven Rostedt Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: Ingo Molnar Cc: Andrew Morton Cc: Paul Gortmaker --- arch/alpha/Kconfig | 1 - arch/arm/Kconfig | 1 - arch/arm64/Kconfig | 1 - arch/blackfin/Kconfig | 1 - arch/frv/Kconfig | 1 - arch/hexagon/Kconfig | 1 - arch/mips/Kconfig | 1 - arch/parisc/Kconfig | 1 - arch/powerpc/Kconfig | 1 - arch/s390/Kconfig | 1 - arch/sh/Kconfig | 1 - arch/sparc/Kconfig | 1 - arch/x86/Kconfig | 1 - drivers/staging/iio/trigger/Kconfig | 1 - init/Kconfig | 4 ---- 15 files changed, 18 deletions(-) diff --git a/arch/alpha/Kconfig b/arch/alpha/Kconfig index 5dd7f5db24d4..e56c2d1dde70 100644 --- a/arch/alpha/Kconfig +++ b/arch/alpha/Kconfig @@ -5,7 +5,6 @@ config ALPHA select HAVE_IDE select HAVE_OPROFILE select HAVE_SYSCALL_WRAPPERS - select HAVE_IRQ_WORK select HAVE_PCSPKR_PLATFORM select HAVE_PERF_EVENTS select HAVE_DMA_ATTRS diff --git a/arch/arm/Kconfig b/arch/arm/Kconfig index ade7e924bef5..22d378be1f14 100644 --- a/arch/arm/Kconfig +++ b/arch/arm/Kconfig @@ -36,7 +36,6 @@ config ARM select HAVE_GENERIC_HARDIRQS select HAVE_HW_BREAKPOINT if (PERF_EVENTS && (CPU_V6 || CPU_V6K || CPU_V7)) select HAVE_IDE if PCI || ISA || PCMCIA - select HAVE_IRQ_WORK select HAVE_KERNEL_GZIP select HAVE_KERNEL_LZMA select HAVE_KERNEL_LZO diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig index ef54a59a9e89..dd50d72881c6 100644 --- a/arch/arm64/Kconfig +++ b/arch/arm64/Kconfig @@ -17,7 +17,6 @@ config ARM64 select HAVE_GENERIC_DMA_COHERENT select HAVE_GENERIC_HARDIRQS select HAVE_HW_BREAKPOINT if PERF_EVENTS - select HAVE_IRQ_WORK select HAVE_MEMBLOCK select HAVE_PERF_EVENTS select HAVE_SPARSE_IRQ diff --git a/arch/blackfin/Kconfig b/arch/blackfin/Kconfig index b6f3ad5441c5..86f891f0ae48 100644 --- a/arch/blackfin/Kconfig +++ b/arch/blackfin/Kconfig @@ -24,7 +24,6 @@ config BLACKFIN select HAVE_FUNCTION_TRACER select HAVE_FUNCTION_TRACE_MCOUNT_TEST select HAVE_IDE - select HAVE_IRQ_WORK select HAVE_KERNEL_GZIP if RAMKERNEL select HAVE_KERNEL_BZIP2 if RAMKERNEL select HAVE_KERNEL_LZMA if RAMKERNEL diff --git a/arch/frv/Kconfig b/arch/frv/Kconfig index df2eb4bd9fa2..c44fd6eb0d78 100644 --- a/arch/frv/Kconfig +++ b/arch/frv/Kconfig @@ -3,7 +3,6 @@ config FRV default y select HAVE_IDE select HAVE_ARCH_TRACEHOOK - select HAVE_IRQ_WORK select HAVE_PERF_EVENTS select HAVE_UID16 select HAVE_GENERIC_HARDIRQS diff --git a/arch/hexagon/Kconfig b/arch/hexagon/Kconfig index 0744f7d7b1fd..40a318594ae7 100644 --- a/arch/hexagon/Kconfig +++ b/arch/hexagon/Kconfig @@ -14,7 +14,6 @@ config HEXAGON # select HAVE_CLK # select IRQ_PER_CPU # select GENERIC_PENDING_IRQ if SMP - select HAVE_IRQ_WORK select GENERIC_ATOMIC64 select HAVE_PERF_EVENTS select HAVE_GENERIC_HARDIRQS diff --git a/arch/mips/Kconfig b/arch/mips/Kconfig index dba9390d37cf..3d86d699b9ad 100644 --- a/arch/mips/Kconfig +++ b/arch/mips/Kconfig @@ -4,7 +4,6 @@ config MIPS select HAVE_GENERIC_DMA_COHERENT select HAVE_IDE select HAVE_OPROFILE - select HAVE_IRQ_WORK select HAVE_PERF_EVENTS select PERF_USE_VMALLOC select HAVE_ARCH_KGDB diff --git a/arch/parisc/Kconfig b/arch/parisc/Kconfig index 11def45b98c5..8f0df474deed 100644 --- a/arch/parisc/Kconfig +++ b/arch/parisc/Kconfig @@ -9,7 +9,6 @@ config PARISC select RTC_DRV_GENERIC select INIT_ALL_POSSIBLE select BUG - select HAVE_IRQ_WORK select HAVE_PERF_EVENTS select GENERIC_ATOMIC64 if !64BIT select HAVE_GENERIC_HARDIRQS diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig index a902a5c1c76a..a90f0c9c3ec2 100644 --- a/arch/powerpc/Kconfig +++ b/arch/powerpc/Kconfig @@ -118,7 +118,6 @@ config PPC select HAVE_SYSCALL_WRAPPERS if PPC64 select GENERIC_ATOMIC64 if PPC32 select ARCH_HAS_ATOMIC64_DEC_IF_POSITIVE - select HAVE_IRQ_WORK select HAVE_PERF_EVENTS select HAVE_REGS_AND_STACK_ACCESS_API select HAVE_HW_BREAKPOINT if PERF_EVENTS && PPC_BOOK3S_64 diff --git a/arch/s390/Kconfig b/arch/s390/Kconfig index 5dba755a43e6..0816ff01f40c 100644 --- a/arch/s390/Kconfig +++ b/arch/s390/Kconfig @@ -78,7 +78,6 @@ config S390 select HAVE_KVM if 64BIT select HAVE_ARCH_TRACEHOOK select INIT_ALL_POSSIBLE - select HAVE_IRQ_WORK select HAVE_PERF_EVENTS select ARCH_HAVE_NMI_SAFE_CMPXCHG select HAVE_DEBUG_KMEMLEAK diff --git a/arch/sh/Kconfig b/arch/sh/Kconfig index babc2b826c5c..996e008aaa3e 100644 --- a/arch/sh/Kconfig +++ b/arch/sh/Kconfig @@ -11,7 +11,6 @@ config SUPERH select HAVE_ARCH_TRACEHOOK select HAVE_DMA_API_DEBUG select HAVE_DMA_ATTRS - select HAVE_IRQ_WORK select HAVE_PERF_EVENTS select HAVE_DEBUG_BUGVERBOSE select ARCH_HAVE_CUSTOM_GPIO_H diff --git a/arch/sparc/Kconfig b/arch/sparc/Kconfig index b6b442b0d793..05a478fa9143 100644 --- a/arch/sparc/Kconfig +++ b/arch/sparc/Kconfig @@ -22,7 +22,6 @@ config SPARC select ARCH_WANT_OPTIONAL_GPIOLIB select RTC_CLASS select RTC_DRV_M48T59 - select HAVE_IRQ_WORK select HAVE_DMA_ATTRS select HAVE_DMA_API_DEBUG select HAVE_ARCH_JUMP_LABEL diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index 46c3bff3ced2..c13e07a8ebd3 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig @@ -26,7 +26,6 @@ config X86 select HAVE_OPROFILE select HAVE_PCSPKR_PLATFORM select HAVE_PERF_EVENTS - select HAVE_IRQ_WORK select HAVE_IOREMAP_PROT select HAVE_KPROBES select HAVE_MEMBLOCK diff --git a/drivers/staging/iio/trigger/Kconfig b/drivers/staging/iio/trigger/Kconfig index 7d3207559265..d44d3ad26fa5 100644 --- a/drivers/staging/iio/trigger/Kconfig +++ b/drivers/staging/iio/trigger/Kconfig @@ -21,7 +21,6 @@ config IIO_GPIO_TRIGGER config IIO_SYSFS_TRIGGER tristate "SYSFS trigger" depends on SYSFS - depends on HAVE_IRQ_WORK select IRQ_WORK help Provides support for using SYSFS entry as IIO triggers. diff --git a/init/Kconfig b/init/Kconfig index 6fdd6e339326..cdc152c75727 100644 --- a/init/Kconfig +++ b/init/Kconfig @@ -20,12 +20,8 @@ config CONSTRUCTORS bool depends on !UML -config HAVE_IRQ_WORK - bool - config IRQ_WORK bool - depends on HAVE_IRQ_WORK config BUILDTIME_EXTABLE_SORT bool -- cgit v1.2.3-59-g8ed1b From 33a5f6261a61af28f7b4c86f9f958da0f206c914 Mon Sep 17 00:00:00 2001 From: Frederic Weisbecker Date: Thu, 11 Oct 2012 17:52:56 +0200 Subject: nohz: Add API to check tick state We need some quick way to check if the CPU has stopped its tick. This will be useful to implement the printk tick using the irq work subsystem. Signed-off-by: Frederic Weisbecker Acked-by: Steven Rostedt Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: Ingo Molnar Cc: Andrew Morton Cc: Paul Gortmaker --- include/linux/tick.h | 17 ++++++++++++++++- kernel/time/tick-sched.c | 2 +- 2 files changed, 17 insertions(+), 2 deletions(-) diff --git a/include/linux/tick.h b/include/linux/tick.h index f37fceb69b73..2307dd31b966 100644 --- a/include/linux/tick.h +++ b/include/linux/tick.h @@ -8,6 +8,8 @@ #include #include +#include +#include #ifdef CONFIG_GENERIC_CLOCKEVENTS @@ -122,13 +124,26 @@ static inline int tick_oneshot_mode_active(void) { return 0; } #endif /* !CONFIG_GENERIC_CLOCKEVENTS */ # ifdef CONFIG_NO_HZ +DECLARE_PER_CPU(struct tick_sched, tick_cpu_sched); + +static inline int tick_nohz_tick_stopped(void) +{ + return __this_cpu_read(tick_cpu_sched.tick_stopped); +} + extern void tick_nohz_idle_enter(void); extern void tick_nohz_idle_exit(void); extern void tick_nohz_irq_exit(void); extern ktime_t tick_nohz_get_sleep_length(void); extern u64 get_cpu_idle_time_us(int cpu, u64 *last_update_time); extern u64 get_cpu_iowait_time_us(int cpu, u64 *last_update_time); -# else + +# else /* !CONFIG_NO_HZ */ +static inline int tick_nohz_tick_stopped(void) +{ + return 0; +} + static inline void tick_nohz_idle_enter(void) { } static inline void tick_nohz_idle_exit(void) { } diff --git a/kernel/time/tick-sched.c b/kernel/time/tick-sched.c index a40260885265..9e945aa090ac 100644 --- a/kernel/time/tick-sched.c +++ b/kernel/time/tick-sched.c @@ -28,7 +28,7 @@ /* * Per cpu nohz control structure */ -static DEFINE_PER_CPU(struct tick_sched, tick_cpu_sched); +DEFINE_PER_CPU(struct tick_sched, tick_cpu_sched); /* * The time, when the last jiffy update happened. Protected by xtime_lock. -- cgit v1.2.3-59-g8ed1b From 00b42959106a9ca1c2899e591ae4e9a83ad6af05 Mon Sep 17 00:00:00 2001 From: Frederic Weisbecker Date: Wed, 7 Nov 2012 21:03:07 +0100 Subject: irq_work: Don't stop the tick with pending works Don't stop the tick if we have pending irq works on the queue, otherwise if the arch can't raise self-IPIs, we may not find an opportunity to execute the pending works for a while. Signed-off-by: Frederic Weisbecker Acked-by: Steven Rostedt Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: Ingo Molnar Cc: Andrew Morton Cc: Paul Gortmaker --- include/linux/irq_work.h | 6 ++++++ kernel/irq_work.c | 11 +++++++++++ kernel/time/tick-sched.c | 3 ++- 3 files changed, 19 insertions(+), 1 deletion(-) diff --git a/include/linux/irq_work.h b/include/linux/irq_work.h index 6a9e8f5399e2..a69704f37204 100644 --- a/include/linux/irq_work.h +++ b/include/linux/irq_work.h @@ -20,4 +20,10 @@ bool irq_work_queue(struct irq_work *work); void irq_work_run(void); void irq_work_sync(struct irq_work *work); +#ifdef CONFIG_IRQ_WORK +bool irq_work_needs_cpu(void); +#else +static bool irq_work_needs_cpu(void) { return false; } +#endif + #endif /* _LINUX_IRQ_WORK_H */ diff --git a/kernel/irq_work.c b/kernel/irq_work.c index 64eddd59ed83..b3c113a14727 100644 --- a/kernel/irq_work.c +++ b/kernel/irq_work.c @@ -99,6 +99,17 @@ bool irq_work_queue(struct irq_work *work) } EXPORT_SYMBOL_GPL(irq_work_queue); +bool irq_work_needs_cpu(void) +{ + struct llist_head *this_list; + + this_list = &__get_cpu_var(irq_work_list); + if (llist_empty(this_list)) + return false; + + return true; +} + /* * Run the irq_work entries on this cpu. Requires to be ran from hardirq * context with local IRQs disabled. diff --git a/kernel/time/tick-sched.c b/kernel/time/tick-sched.c index 9e945aa090ac..f249e8c3e58e 100644 --- a/kernel/time/tick-sched.c +++ b/kernel/time/tick-sched.c @@ -20,6 +20,7 @@ #include #include #include +#include #include @@ -289,7 +290,7 @@ static ktime_t tick_nohz_stop_sched_tick(struct tick_sched *ts, } while (read_seqretry(&xtime_lock, seq)); if (rcu_needs_cpu(cpu, &rcu_delta_jiffies) || printk_needs_cpu(cpu) || - arch_needs_cpu(cpu)) { + arch_needs_cpu(cpu) || irq_work_needs_cpu()) { next_jiffies = last_jiffies + 1; delta_jiffies = 1; } else { -- cgit v1.2.3-59-g8ed1b From c0e980a4bd7fc5c9b748f2f0209d2a48c0fdf0ab Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Thu, 15 Nov 2012 11:34:21 -0500 Subject: irq_work: Flush work on CPU_DYING In order not to offline a CPU with pending irq works, flush the queue from CPU_DYING. The notifier is called by stop_machine on the CPU that is going down. The code will not be called from irq context (so things like get_irq_regs() wont work) but I'm not sure what the requirements are for irq_work in that regard (Peter?). But irqs are disabled and the CPU is about to go offline. Might as well flush the work. Signed-off-by: Steven Rostedt Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: Ingo Molnar Cc: Andrew Morton Cc: Paul Gortmaker Signed-off-by: Frederic Weisbecker --- kernel/irq_work.c | 51 +++++++++++++++++++++++++++++++++++++++++++++------ 1 file changed, 45 insertions(+), 6 deletions(-) diff --git a/kernel/irq_work.c b/kernel/irq_work.c index b3c113a14727..4ed17490f629 100644 --- a/kernel/irq_work.c +++ b/kernel/irq_work.c @@ -12,6 +12,8 @@ #include #include #include +#include +#include #include /* @@ -110,11 +112,7 @@ bool irq_work_needs_cpu(void) return true; } -/* - * Run the irq_work entries on this cpu. Requires to be ran from hardirq - * context with local IRQs disabled. - */ -void irq_work_run(void) +static void __irq_work_run(void) { struct irq_work *work; struct llist_head *this_list; @@ -124,7 +122,6 @@ void irq_work_run(void) if (llist_empty(this_list)) return; - BUG_ON(!in_irq()); BUG_ON(!irqs_disabled()); llnode = llist_del_all(this_list); @@ -149,6 +146,16 @@ void irq_work_run(void) (void)cmpxchg(&work->flags, IRQ_WORK_BUSY, 0); } } + +/* + * Run the irq_work entries on this cpu. Requires to be ran from hardirq + * context with local IRQs disabled. + */ +void irq_work_run(void) +{ + BUG_ON(!in_irq()); + __irq_work_run(); +} EXPORT_SYMBOL_GPL(irq_work_run); /* @@ -163,3 +170,35 @@ void irq_work_sync(struct irq_work *work) cpu_relax(); } EXPORT_SYMBOL_GPL(irq_work_sync); + +#ifdef CONFIG_HOTPLUG_CPU +static int irq_work_cpu_notify(struct notifier_block *self, + unsigned long action, void *hcpu) +{ + long cpu = (long)hcpu; + + switch (action) { + case CPU_DYING: + /* Called from stop_machine */ + if (WARN_ON_ONCE(cpu != smp_processor_id())) + break; + __irq_work_run(); + break; + default: + break; + } + return NOTIFY_OK; +} + +static struct notifier_block cpu_notify; + +static __init int irq_work_init_cpu_notifier(void) +{ + cpu_notify.notifier_call = irq_work_cpu_notify; + cpu_notify.priority = 0; + register_cpu_notifier(&cpu_notify); + return 0; +} +device_initcall(irq_work_init_cpu_notifier); + +#endif /* CONFIG_HOTPLUG_CPU */ -- cgit v1.2.3-59-g8ed1b From 8aa2accee41f7045dc904fa41d4475b2f6ffae3e Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Thu, 15 Nov 2012 12:52:44 -0500 Subject: irq_work: Warn if there's still work on cpu_down If we are in nohz and there's still irq_work to be done when the idle task is about to go offline, give a nasty warning. Everything should have been flushed from the CPU_DYING notifier already. Further attempts to enqueue an irq_work are buggy because irqs are disabled by __cpu_disable(). The best we can do is to report the issue to the user. Signed-off-by: Steven Rostedt Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: Ingo Molnar Cc: Andrew Morton Cc: Paul Gortmaker Signed-off-by: Frederic Weisbecker --- kernel/irq_work.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/kernel/irq_work.c b/kernel/irq_work.c index 4ed17490f629..480f74715ba9 100644 --- a/kernel/irq_work.c +++ b/kernel/irq_work.c @@ -109,6 +109,9 @@ bool irq_work_needs_cpu(void) if (llist_empty(this_list)) return false; + /* All work should have been flushed before going offline */ + WARN_ON_ONCE(cpu_is_offline(smp_processor_id())); + return true; } -- cgit v1.2.3-59-g8ed1b From bc6679aef673f9dcb8f718528fc3df49ff661af9 Mon Sep 17 00:00:00 2001 From: Frederic Weisbecker Date: Fri, 19 Oct 2012 16:43:41 -0400 Subject: irq_work: Make self-IPIs optable On irq work initialization, let the user choose to define it as "lazy" or not. "Lazy" means that we don't want to send an IPI (provided the arch can anyway) when we enqueue this work but we rather prefer to wait for the next timer tick to execute our work if possible. This is going to be a benefit for non-urgent enqueuers (like printk in the future) that may prefer not to raise an IPI storm in case of frequent enqueuing on short periods of time. Signed-off-by: Frederic Weisbecker Acked-by: Steven Rostedt Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: Ingo Molnar Cc: Andrew Morton Cc: Paul Gortmaker --- include/linux/irq_work.h | 14 ++++++++++++++ kernel/irq_work.c | 47 +++++++++++++++++++++++++++-------------------- 2 files changed, 41 insertions(+), 20 deletions(-) diff --git a/include/linux/irq_work.h b/include/linux/irq_work.h index a69704f37204..b28eb60c8bf6 100644 --- a/include/linux/irq_work.h +++ b/include/linux/irq_work.h @@ -3,6 +3,20 @@ #include +/* + * An entry can be in one of four states: + * + * free NULL, 0 -> {claimed} : free to be used + * claimed NULL, 3 -> {pending} : claimed to be enqueued + * pending next, 3 -> {busy} : queued, pending callback + * busy NULL, 2 -> {free, claimed} : callback in progress, can be claimed + */ + +#define IRQ_WORK_PENDING 1UL +#define IRQ_WORK_BUSY 2UL +#define IRQ_WORK_FLAGS 3UL +#define IRQ_WORK_LAZY 4UL /* Doesn't want IPI, wait for tick */ + struct irq_work { unsigned long flags; struct llist_node llnode; diff --git a/kernel/irq_work.c b/kernel/irq_work.c index 480f74715ba9..7f3a59bc8e3d 100644 --- a/kernel/irq_work.c +++ b/kernel/irq_work.c @@ -12,24 +12,15 @@ #include #include #include +#include +#include #include #include #include -/* - * An entry can be in one of four states: - * - * free NULL, 0 -> {claimed} : free to be used - * claimed NULL, 3 -> {pending} : claimed to be enqueued - * pending next, 3 -> {busy} : queued, pending callback - * busy NULL, 2 -> {free, claimed} : callback in progress, can be claimed - */ - -#define IRQ_WORK_PENDING 1UL -#define IRQ_WORK_BUSY 2UL -#define IRQ_WORK_FLAGS 3UL static DEFINE_PER_CPU(struct llist_head, irq_work_list); +static DEFINE_PER_CPU(int, irq_work_raised); /* * Claim the entry so that no one else will poke at it. @@ -69,14 +60,19 @@ void __weak arch_irq_work_raise(void) */ static void __irq_work_queue(struct irq_work *work) { - bool empty; - preempt_disable(); - empty = llist_add(&work->llnode, &__get_cpu_var(irq_work_list)); - /* The list was empty, raise self-interrupt to start processing. */ - if (empty) - arch_irq_work_raise(); + llist_add(&work->llnode, &__get_cpu_var(irq_work_list)); + + /* + * If the work is not "lazy" or the tick is stopped, raise the irq + * work interrupt (if supported by the arch), otherwise, just wait + * for the next tick. + */ + if (!(work->flags & IRQ_WORK_LAZY) || tick_nohz_tick_stopped()) { + if (!this_cpu_cmpxchg(irq_work_raised, 0, 1)) + arch_irq_work_raise(); + } preempt_enable(); } @@ -117,10 +113,19 @@ bool irq_work_needs_cpu(void) static void __irq_work_run(void) { + unsigned long flags; struct irq_work *work; struct llist_head *this_list; struct llist_node *llnode; + + /* + * Reset the "raised" state right before we check the list because + * an NMI may enqueue after we find the list empty from the runner. + */ + __this_cpu_write(irq_work_raised, 0); + barrier(); + this_list = &__get_cpu_var(irq_work_list); if (llist_empty(this_list)) return; @@ -140,13 +145,15 @@ static void __irq_work_run(void) * to claim that work don't rely on us to handle their data * while we are in the middle of the func. */ - xchg(&work->flags, IRQ_WORK_BUSY); + flags = work->flags & ~IRQ_WORK_PENDING; + xchg(&work->flags, flags); + work->func(work); /* * Clear the BUSY bit and return to the free state if * no-one else claimed it meanwhile. */ - (void)cmpxchg(&work->flags, IRQ_WORK_BUSY, 0); + (void)cmpxchg(&work->flags, flags, flags & ~IRQ_WORK_BUSY); } } -- cgit v1.2.3-59-g8ed1b From 74876a98a87a115254b3a66a14b27320b7f0acaa Mon Sep 17 00:00:00 2001 From: Frederic Weisbecker Date: Fri, 12 Oct 2012 18:00:23 +0200 Subject: printk: Wake up klogd using irq_work klogd is woken up asynchronously from the tick in order to do it safely. However if printk is called when the tick is stopped, the reader won't be woken up until the next interrupt, which might not fire for a while. As a result, the user may miss some message. To fix this, lets implement the printk tick using a lazy irq work. This subsystem takes care of the timer tick state and can fix up accordingly. Signed-off-by: Frederic Weisbecker Acked-by: Steven Rostedt Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: Ingo Molnar Cc: Andrew Morton Cc: Paul Gortmaker --- include/linux/printk.h | 3 --- init/Kconfig | 1 + kernel/printk.c | 36 ++++++++++++++++++++---------------- kernel/time/tick-sched.c | 2 +- kernel/timer.c | 1 - 5 files changed, 22 insertions(+), 21 deletions(-) diff --git a/include/linux/printk.h b/include/linux/printk.h index 9afc01e5a0a6..86c4b6294713 100644 --- a/include/linux/printk.h +++ b/include/linux/printk.h @@ -98,9 +98,6 @@ int no_printk(const char *fmt, ...) extern asmlinkage __printf(1, 2) void early_printk(const char *fmt, ...); -extern int printk_needs_cpu(int cpu); -extern void printk_tick(void); - #ifdef CONFIG_PRINTK asmlinkage __printf(5, 0) int vprintk_emit(int facility, int level, diff --git a/init/Kconfig b/init/Kconfig index cdc152c75727..c575566be47d 100644 --- a/init/Kconfig +++ b/init/Kconfig @@ -1196,6 +1196,7 @@ config HOTPLUG config PRINTK default y bool "Enable support for printk" if EXPERT + select IRQ_WORK help This option enables normal printk support. Removing it eliminates most of the message strings from the kernel image diff --git a/kernel/printk.c b/kernel/printk.c index 2d607f4d1797..c9104feba5ec 100644 --- a/kernel/printk.c +++ b/kernel/printk.c @@ -42,6 +42,7 @@ #include #include #include +#include #include @@ -1955,30 +1956,32 @@ int is_console_locked(void) static DEFINE_PER_CPU(int, printk_pending); static DEFINE_PER_CPU(char [PRINTK_BUF_SIZE], printk_sched_buf); -void printk_tick(void) +static void wake_up_klogd_work_func(struct irq_work *irq_work) { - if (__this_cpu_read(printk_pending)) { - int pending = __this_cpu_xchg(printk_pending, 0); - if (pending & PRINTK_PENDING_SCHED) { - char *buf = __get_cpu_var(printk_sched_buf); - printk(KERN_WARNING "[sched_delayed] %s", buf); - } - if (pending & PRINTK_PENDING_WAKEUP) - wake_up_interruptible(&log_wait); + int pending = __this_cpu_xchg(printk_pending, 0); + + if (pending & PRINTK_PENDING_SCHED) { + char *buf = __get_cpu_var(printk_sched_buf); + printk(KERN_WARNING "[sched_delayed] %s", buf); } -} -int printk_needs_cpu(int cpu) -{ - if (cpu_is_offline(cpu)) - printk_tick(); - return __this_cpu_read(printk_pending); + if (pending & PRINTK_PENDING_WAKEUP) + wake_up_interruptible(&log_wait); } +static DEFINE_PER_CPU(struct irq_work, wake_up_klogd_work) = { + .func = wake_up_klogd_work_func, + .flags = IRQ_WORK_LAZY, +}; + void wake_up_klogd(void) { - if (waitqueue_active(&log_wait)) + preempt_disable(); + if (waitqueue_active(&log_wait)) { this_cpu_or(printk_pending, PRINTK_PENDING_WAKEUP); + irq_work_queue(&__get_cpu_var(wake_up_klogd_work)); + } + preempt_enable(); } static void console_cont_flush(char *text, size_t size) @@ -2458,6 +2461,7 @@ int printk_sched(const char *fmt, ...) va_end(args); __this_cpu_or(printk_pending, PRINTK_PENDING_SCHED); + irq_work_queue(&__get_cpu_var(wake_up_klogd_work)); local_irq_restore(flags); return r; diff --git a/kernel/time/tick-sched.c b/kernel/time/tick-sched.c index f249e8c3e58e..822d7572bf2d 100644 --- a/kernel/time/tick-sched.c +++ b/kernel/time/tick-sched.c @@ -289,7 +289,7 @@ static ktime_t tick_nohz_stop_sched_tick(struct tick_sched *ts, time_delta = timekeeping_max_deferment(); } while (read_seqretry(&xtime_lock, seq)); - if (rcu_needs_cpu(cpu, &rcu_delta_jiffies) || printk_needs_cpu(cpu) || + if (rcu_needs_cpu(cpu, &rcu_delta_jiffies) || arch_needs_cpu(cpu) || irq_work_needs_cpu()) { next_jiffies = last_jiffies + 1; delta_jiffies = 1; diff --git a/kernel/timer.c b/kernel/timer.c index 367d00858482..ff3b5165737b 100644 --- a/kernel/timer.c +++ b/kernel/timer.c @@ -1351,7 +1351,6 @@ void update_process_times(int user_tick) account_process_tick(p, user_tick); run_local_timers(); rcu_check_callbacks(cpu, user_tick); - printk_tick(); #ifdef CONFIG_IRQ_WORK if (in_irq()) irq_work_run(); -- cgit v1.2.3-59-g8ed1b From e716efde75267eab919cdb2bef5b2cb77f305326 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Fri, 23 Nov 2012 10:08:44 +0100 Subject: genirq: Avoid deadlock in spurious handling commit 52553ddf(genirq: fix regression in irqfixup, irqpoll) introduced a potential deadlock by calling the action handler with the irq descriptor lock held. Remove the call and let the handling code run even for an interrupt where only a single action is registered. That matches the goal of the above commit and avoids the deadlock. Document the confusing action = desc->action reload in the handling loop while at it. Reported-and-tested-by: "Wang, Warner" Tested-by: Edward Donovan Cc: "Wang, Song-Bo (Stoney)" Cc: stable@vger.kernel.org Signed-off-by: Thomas Gleixner --- kernel/irq/spurious.c | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/kernel/irq/spurious.c b/kernel/irq/spurious.c index 611cd6003c45..7b5f012bde9d 100644 --- a/kernel/irq/spurious.c +++ b/kernel/irq/spurious.c @@ -80,13 +80,11 @@ static int try_one_irq(int irq, struct irq_desc *desc, bool force) /* * All handlers must agree on IRQF_SHARED, so we test just the - * first. Check for action->next as well. + * first. */ action = desc->action; if (!action || !(action->flags & IRQF_SHARED) || - (action->flags & __IRQF_TIMER) || - (action->handler(irq, action->dev_id) == IRQ_HANDLED) || - !action->next) + (action->flags & __IRQF_TIMER)) goto out; /* Already running on another processor */ @@ -104,6 +102,7 @@ static int try_one_irq(int irq, struct irq_desc *desc, bool force) do { if (handle_irq_event(desc) == IRQ_HANDLED) ret = IRQ_HANDLED; + /* Make sure that there is still a valid action */ action = desc->action; } while ((desc->istate & IRQS_PENDING) && action); desc->istate &= ~IRQS_POLL_INPROGRESS; -- cgit v1.2.3-59-g8ed1b From c02cf5f8ed6137e2b3b2f10e0fca336e06e09ba4 Mon Sep 17 00:00:00 2001 From: anish kumar Date: Sun, 3 Feb 2013 22:08:23 +0100 Subject: irq_work: Remove return value from the irq_work_queue() function As no one is using the return value of irq_work_queue(), so it is better to just make it void. Signed-off-by: anish kumar Acked-by: Steven Rostedt [ Fix stale comments, remove now unnecessary __irq_work_queue() intermediate function ] Signed-off-by: Frederic Weisbecker Link: http://lkml.kernel.org/r/1359925703-24304-1-git-send-email-fweisbec@gmail.com Signed-off-by: Ingo Molnar --- include/linux/irq_work.h | 2 +- kernel/irq_work.c | 31 ++++++++++--------------------- 2 files changed, 11 insertions(+), 22 deletions(-) diff --git a/include/linux/irq_work.h b/include/linux/irq_work.h index 6a9e8f5399e2..ce60c084635b 100644 --- a/include/linux/irq_work.h +++ b/include/linux/irq_work.h @@ -16,7 +16,7 @@ void init_irq_work(struct irq_work *work, void (*func)(struct irq_work *)) work->func = func; } -bool irq_work_queue(struct irq_work *work); +void irq_work_queue(struct irq_work *work); void irq_work_run(void); void irq_work_sync(struct irq_work *work); diff --git a/kernel/irq_work.c b/kernel/irq_work.c index 64eddd59ed83..c9d7478e4889 100644 --- a/kernel/irq_work.c +++ b/kernel/irq_work.c @@ -63,12 +63,20 @@ void __weak arch_irq_work_raise(void) } /* - * Queue the entry and raise the IPI if needed. + * Enqueue the irq_work @entry unless it's already pending + * somewhere. + * + * Can be re-enqueued while the callback is still in progress. */ -static void __irq_work_queue(struct irq_work *work) +void irq_work_queue(struct irq_work *work) { bool empty; + /* Only queue if not already pending */ + if (!irq_work_claim(work)) + return; + + /* Queue the entry and raise the IPI if needed. */ preempt_disable(); empty = llist_add(&work->llnode, &__get_cpu_var(irq_work_list)); @@ -78,25 +86,6 @@ static void __irq_work_queue(struct irq_work *work) preempt_enable(); } - -/* - * Enqueue the irq_work @entry, returns true on success, failure when the - * @entry was already enqueued by someone else. - * - * Can be re-enqueued while the callback is still in progress. - */ -bool irq_work_queue(struct irq_work *work) -{ - if (!irq_work_claim(work)) { - /* - * Already enqueued, can't do! - */ - return false; - } - - __irq_work_queue(work); - return true; -} EXPORT_SYMBOL_GPL(irq_work_queue); /* -- cgit v1.2.3-59-g8ed1b From f7c819c020db9796ae3a662b82a310617f92b15b Mon Sep 17 00:00:00 2001 From: James Hogan Date: Mon, 4 Feb 2013 10:09:43 +0000 Subject: arch Kconfig: Remove references to IRQ_PER_CPU The IRQ_PER_CPU Kconfig symbol was removed in the following commit: Commit 6a58fb3bad099076f36f0f30f44507bc3275cdb6 ("genirq: Remove CONFIG_IRQ_PER_CPU") merged in v2.6.39-rc1. But IRQ_PER_CPU wasn't removed from any of the architecture Kconfig files where it was defined or selected. It's completely unused so remove the remaining references. Signed-off-by: James Hogan Cc: Cc: Cc: Cc: Mike Frysinger Cc: Fenghua Yu Acked-by: Ralf Baechle Cc: James E.J. Bottomley Cc: Helge Deller Cc: Benjamin Herrenschmidt Cc: Paul Mackerras Acked-by: Paul Mundt Acked-by: Tony Luck Acked-by: Richard Kuo Link: http://lkml.kernel.org/r/1359972583-17134-1-git-send-email-james.hogan@imgtec.com Signed-off-by: Thomas Gleixner --- arch/blackfin/Kconfig | 1 - arch/hexagon/Kconfig | 1 - arch/ia64/Kconfig | 1 - arch/mips/Kconfig | 1 - arch/parisc/Kconfig | 1 - arch/powerpc/Kconfig | 1 - arch/sh/Kconfig | 3 --- 7 files changed, 9 deletions(-) diff --git a/arch/blackfin/Kconfig b/arch/blackfin/Kconfig index 86f891f0ae48..67e4aaad78f5 100644 --- a/arch/blackfin/Kconfig +++ b/arch/blackfin/Kconfig @@ -37,7 +37,6 @@ config BLACKFIN select HAVE_GENERIC_HARDIRQS select GENERIC_ATOMIC64 select GENERIC_IRQ_PROBE - select IRQ_PER_CPU if SMP select USE_GENERIC_SMP_HELPERS if SMP select HAVE_NMI_WATCHDOG if NMI_WATCHDOG select GENERIC_SMP_IDLE_THREAD diff --git a/arch/hexagon/Kconfig b/arch/hexagon/Kconfig index 40a318594ae7..e4decc6b8947 100644 --- a/arch/hexagon/Kconfig +++ b/arch/hexagon/Kconfig @@ -12,7 +12,6 @@ config HEXAGON # select ARCH_WANT_OPTIONAL_GPIOLIB # select ARCH_REQUIRE_GPIOLIB # select HAVE_CLK - # select IRQ_PER_CPU # select GENERIC_PENDING_IRQ if SMP select GENERIC_ATOMIC64 select HAVE_PERF_EVENTS diff --git a/arch/ia64/Kconfig b/arch/ia64/Kconfig index 3279646120e3..00c2e88f7755 100644 --- a/arch/ia64/Kconfig +++ b/arch/ia64/Kconfig @@ -29,7 +29,6 @@ config IA64 select ARCH_DISCARD_MEMBLOCK select GENERIC_IRQ_PROBE select GENERIC_PENDING_IRQ if SMP - select IRQ_PER_CPU select GENERIC_IRQ_SHOW select ARCH_WANT_OPTIONAL_GPIOLIB select ARCH_HAVE_NMI_SAFE_CMPXCHG diff --git a/arch/mips/Kconfig b/arch/mips/Kconfig index 121ed51ac227..9becc44d9d7a 100644 --- a/arch/mips/Kconfig +++ b/arch/mips/Kconfig @@ -2160,7 +2160,6 @@ source "mm/Kconfig" config SMP bool "Multi-Processing support" depends on SYS_SUPPORTS_SMP - select IRQ_PER_CPU select USE_GENERIC_SMP_HELPERS help This enables support for systems with more than one CPU. If you have diff --git a/arch/parisc/Kconfig b/arch/parisc/Kconfig index 9dd5c1888878..a32e34ecda9e 100644 --- a/arch/parisc/Kconfig +++ b/arch/parisc/Kconfig @@ -15,7 +15,6 @@ config PARISC select BROKEN_RODATA select GENERIC_IRQ_PROBE select GENERIC_PCI_IOMAP - select IRQ_PER_CPU select ARCH_HAVE_NMI_SAFE_CMPXCHG select GENERIC_SMP_IDLE_THREAD select GENERIC_STRNCPY_FROM_USER diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig index d45edca88b57..561ccca7b1a7 100644 --- a/arch/powerpc/Kconfig +++ b/arch/powerpc/Kconfig @@ -124,7 +124,6 @@ config PPC select HAVE_GENERIC_HARDIRQS select ARCH_WANT_IPC_PARSE_VERSION select SPARSE_IRQ - select IRQ_PER_CPU select IRQ_DOMAIN select GENERIC_IRQ_SHOW select GENERIC_IRQ_SHOW_LEVEL diff --git a/arch/sh/Kconfig b/arch/sh/Kconfig index 996e008aaa3e..9c833c585871 100644 --- a/arch/sh/Kconfig +++ b/arch/sh/Kconfig @@ -90,9 +90,6 @@ config GENERIC_CSUM config GENERIC_HWEIGHT def_bool y -config IRQ_PER_CPU - def_bool y - config GENERIC_GPIO def_bool n -- cgit v1.2.3-59-g8ed1b From 36a5df85e9a3c218b73f6cf80098016ca3f0410d Mon Sep 17 00:00:00 2001 From: Chris Metcalf Date: Fri, 1 Feb 2013 15:04:26 -0500 Subject: genirq: Export enable/disable_percpu_irq() These functions are used by the tilegx onchip network driver, and it's useful to be able to load that driver as a module. Signed-off-by: Chris Metcalf Link: http://lkml.kernel.org/r/201302012043.r11KhNZF024371@farm-0021.internal.tilera.com Signed-off-by: Thomas Gleixner --- kernel/irq/manage.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/kernel/irq/manage.c b/kernel/irq/manage.c index e49a288fa479..88e7bed62711 100644 --- a/kernel/irq/manage.c +++ b/kernel/irq/manage.c @@ -1524,6 +1524,7 @@ void enable_percpu_irq(unsigned int irq, unsigned int type) out: irq_put_desc_unlock(desc, flags); } +EXPORT_SYMBOL_GPL(enable_percpu_irq); void disable_percpu_irq(unsigned int irq) { @@ -1537,6 +1538,7 @@ void disable_percpu_irq(unsigned int irq) irq_percpu_disable(desc, cpu); irq_put_desc_unlock(desc, flags); } +EXPORT_SYMBOL_GPL(disable_percpu_irq); /* * Internal function to unregister a percpu irqaction. -- cgit v1.2.3-59-g8ed1b