From 202461e2f3c15dbfb05825d29ace0d20cdf55fa4 Mon Sep 17 00:00:00 2001 From: Mike Galbraith Date: Mon, 13 Feb 2017 03:31:55 +0100 Subject: tick/broadcast: Prevent deadlock on tick_broadcast_lock tick_broadcast_lock is taken from interrupt context, but the following call chain takes the lock without disabling interrupts: [ 12.703736] _raw_spin_lock+0x3b/0x50 [ 12.703738] tick_broadcast_control+0x5a/0x1a0 [ 12.703742] intel_idle_cpu_online+0x22/0x100 [ 12.703744] cpuhp_invoke_callback+0x245/0x9d0 [ 12.703752] cpuhp_thread_fun+0x52/0x110 [ 12.703754] smpboot_thread_fn+0x276/0x320 So the following deadlock can happen: lock(tick_broadcast_lock); lock(tick_broadcast_lock); intel_idle_cpu_online() is the only place which violates the calling convention of tick_broadcast_control(). This was caused by the removal of the smp function call in course of the cpu hotplug rework. Instead of slapping local_irq_disable/enable() at the call site, we can relax the calling convention and handle it in the core code, which makes the whole machinery more robust. Fixes: 29d7bbada98e ("intel_idle: Remove superfluous SMP fuction call") Reported-by: Gabriel C Signed-off-by: Mike Galbraith Cc: Ruslan Ruslichenko Cc: Jiri Slaby Cc: Greg KH Cc: Borislav Petkov Cc: lwn@lwn.net Cc: Andrew Morton Cc: Linus Torvalds Cc: Anna-Maria Gleixner Cc: Sebastian Siewior Cc: stable Link: http://lkml.kernel.org/r/1486953115.5912.4.camel@gmx.de Signed-off-by: Thomas Gleixner --- kernel/time/tick-broadcast.c | 15 +++++++-------- 1 file changed, 7 insertions(+), 8 deletions(-) (limited to 'kernel') diff --git a/kernel/time/tick-broadcast.c b/kernel/time/tick-broadcast.c index 3109204c87cc..17ac99b60ee5 100644 --- a/kernel/time/tick-broadcast.c +++ b/kernel/time/tick-broadcast.c @@ -347,17 +347,16 @@ static void tick_handle_periodic_broadcast(struct clock_event_device *dev) * * Called when the system enters a state where affected tick devices * might stop. Note: TICK_BROADCAST_FORCE cannot be undone. - * - * Called with interrupts disabled, so clockevents_lock is not - * required here because the local clock event device cannot go away - * under us. */ void tick_broadcast_control(enum tick_broadcast_mode mode) { struct clock_event_device *bc, *dev; struct tick_device *td; int cpu, bc_stopped; + unsigned long flags; + /* Protects also the local clockevent device. */ + raw_spin_lock_irqsave(&tick_broadcast_lock, flags); td = this_cpu_ptr(&tick_cpu_device); dev = td->evtdev; @@ -365,12 +364,11 @@ void tick_broadcast_control(enum tick_broadcast_mode mode) * Is the device not affected by the powerstate ? */ if (!dev || !(dev->features & CLOCK_EVT_FEAT_C3STOP)) - return; + goto out; if (!tick_device_is_functional(dev)) - return; + goto out; - raw_spin_lock(&tick_broadcast_lock); cpu = smp_processor_id(); bc = tick_broadcast_device.evtdev; bc_stopped = cpumask_empty(tick_broadcast_mask); @@ -420,7 +418,8 @@ void tick_broadcast_control(enum tick_broadcast_mode mode) tick_broadcast_setup_oneshot(bc); } } - raw_spin_unlock(&tick_broadcast_lock); +out: + raw_spin_unlock_irqrestore(&tick_broadcast_lock, flags); } EXPORT_SYMBOL_GPL(tick_broadcast_control); -- cgit v1.2.3-59-g8ed1b From 25f71d1c3e98ef0e52371746220d66458eac75bc Mon Sep 17 00:00:00 2001 From: Yang Yang Date: Fri, 30 Dec 2016 16:17:55 +0800 Subject: futex: Move futex_init() to core_initcall The UEVENT user mode helper is enabled before the initcalls are executed and is available when the root filesystem has been mounted. The user mode helper is triggered by device init calls and the executable might use the futex syscall. futex_init() is marked __initcall which maps to device_initcall, but there is no guarantee that futex_init() is invoked _before_ the first device init call which triggers the UEVENT user mode helper. If the user mode helper uses the futex syscall before futex_init() then the syscall crashes with a NULL pointer dereference because the futex subsystem has not been initialized yet. Move futex_init() to core_initcall so futexes are initialized before the root filesystem is mounted and the usermode helper becomes available. [ tglx: Rewrote changelog ] Signed-off-by: Yang Yang Cc: jiang.biao2@zte.com.cn Cc: jiang.zhengxiong@zte.com.cn Cc: zhong.weidong@zte.com.cn Cc: deng.huali@zte.com.cn Cc: Peter Zijlstra Cc: stable@vger.kernel.org Link: http://lkml.kernel.org/r/1483085875-6130-1-git-send-email-yang.yang29@zte.com.cn Signed-off-by: Thomas Gleixner --- kernel/futex.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'kernel') diff --git a/kernel/futex.c b/kernel/futex.c index 0842c8ca534b..cdf365036141 100644 --- a/kernel/futex.c +++ b/kernel/futex.c @@ -3323,4 +3323,4 @@ static int __init futex_init(void) return 0; } -__initcall(futex_init); +core_initcall(futex_init); -- cgit v1.2.3-59-g8ed1b From f222449c9dfad7c9bb8cb53e64c5c407b172ebbc Mon Sep 17 00:00:00 2001 From: Sergey Senozhatsky Date: Wed, 15 Feb 2017 13:43:32 +0900 Subject: timekeeping: Use deferred printk() in debug code We cannot do printk() from tk_debug_account_sleep_time(), because tk_debug_account_sleep_time() is called under tk_core seq lock. The reason why printk() is unsafe there is that console_sem may invoke scheduler (up()->wake_up_process()->activate_task()), which, in turn, can return back to timekeeping code, for instance, via get_time()->ktime_get(), deadlocking the system on tk_core seq lock. [ 48.950592] ====================================================== [ 48.950622] [ INFO: possible circular locking dependency detected ] [ 48.950622] 4.10.0-rc7-next-20170213+ #101 Not tainted [ 48.950622] ------------------------------------------------------- [ 48.950622] kworker/0:0/3 is trying to acquire lock: [ 48.950653] (tk_core){----..}, at: [] retrigger_next_event+0x4c/0x90 [ 48.950683] but task is already holding lock: [ 48.950683] (hrtimer_bases.lock){-.-...}, at: [] retrigger_next_event+0x38/0x90 [ 48.950714] which lock already depends on the new lock. [ 48.950714] the existing dependency chain (in reverse order) is: [ 48.950714] -> #5 (hrtimer_bases.lock){-.-...}: [ 48.950744] _raw_spin_lock_irqsave+0x50/0x64 [ 48.950775] lock_hrtimer_base+0x28/0x58 [ 48.950775] hrtimer_start_range_ns+0x20/0x5c8 [ 48.950775] __enqueue_rt_entity+0x320/0x360 [ 48.950805] enqueue_rt_entity+0x2c/0x44 [ 48.950805] enqueue_task_rt+0x24/0x94 [ 48.950836] ttwu_do_activate+0x54/0xc0 [ 48.950836] try_to_wake_up+0x248/0x5c8 [ 48.950836] __setup_irq+0x420/0x5f0 [ 48.950836] request_threaded_irq+0xdc/0x184 [ 48.950866] devm_request_threaded_irq+0x58/0xa4 [ 48.950866] omap_i2c_probe+0x530/0x6a0 [ 48.950897] platform_drv_probe+0x50/0xb0 [ 48.950897] driver_probe_device+0x1f8/0x2cc [ 48.950897] __driver_attach+0xc0/0xc4 [ 48.950927] bus_for_each_dev+0x6c/0xa0 [ 48.950927] bus_add_driver+0x100/0x210 [ 48.950927] driver_register+0x78/0xf4 [ 48.950958] do_one_initcall+0x3c/0x16c [ 48.950958] kernel_init_freeable+0x20c/0x2d8 [ 48.950958] kernel_init+0x8/0x110 [ 48.950988] ret_from_fork+0x14/0x24 [ 48.950988] -> #4 (&rt_b->rt_runtime_lock){-.-...}: [ 48.951019] _raw_spin_lock+0x40/0x50 [ 48.951019] rq_offline_rt+0x9c/0x2bc [ 48.951019] set_rq_offline.part.2+0x2c/0x58 [ 48.951049] rq_attach_root+0x134/0x144 [ 48.951049] cpu_attach_domain+0x18c/0x6f4 [ 48.951049] build_sched_domains+0xba4/0xd80 [ 48.951080] sched_init_smp+0x68/0x10c [ 48.951080] kernel_init_freeable+0x160/0x2d8 [ 48.951080] kernel_init+0x8/0x110 [ 48.951080] ret_from_fork+0x14/0x24 [ 48.951110] -> #3 (&rq->lock){-.-.-.}: [ 48.951110] _raw_spin_lock+0x40/0x50 [ 48.951141] task_fork_fair+0x30/0x124 [ 48.951141] sched_fork+0x194/0x2e0 [ 48.951141] copy_process.part.5+0x448/0x1a20 [ 48.951171] _do_fork+0x98/0x7e8 [ 48.951171] kernel_thread+0x2c/0x34 [ 48.951171] rest_init+0x1c/0x18c [ 48.951202] start_kernel+0x35c/0x3d4 [ 48.951202] 0x8000807c [ 48.951202] -> #2 (&p->pi_lock){-.-.-.}: [ 48.951232] _raw_spin_lock_irqsave+0x50/0x64 [ 48.951232] try_to_wake_up+0x30/0x5c8 [ 48.951232] up+0x4c/0x60 [ 48.951263] __up_console_sem+0x2c/0x58 [ 48.951263] console_unlock+0x3b4/0x650 [ 48.951263] vprintk_emit+0x270/0x474 [ 48.951293] vprintk_default+0x20/0x28 [ 48.951293] printk+0x20/0x30 [ 48.951324] kauditd_hold_skb+0x94/0xb8 [ 48.951324] kauditd_thread+0x1a4/0x56c [ 48.951324] kthread+0x104/0x148 [ 48.951354] ret_from_fork+0x14/0x24 [ 48.951354] -> #1 ((console_sem).lock){-.....}: [ 48.951385] _raw_spin_lock_irqsave+0x50/0x64 [ 48.951385] down_trylock+0xc/0x2c [ 48.951385] __down_trylock_console_sem+0x24/0x80 [ 48.951385] console_trylock+0x10/0x8c [ 48.951416] vprintk_emit+0x264/0x474 [ 48.951416] vprintk_default+0x20/0x28 [ 48.951416] printk+0x20/0x30 [ 48.951446] tk_debug_account_sleep_time+0x5c/0x70 [ 48.951446] __timekeeping_inject_sleeptime.constprop.3+0x170/0x1a0 [ 48.951446] timekeeping_resume+0x218/0x23c [ 48.951477] syscore_resume+0x94/0x42c [ 48.951477] suspend_enter+0x554/0x9b4 [ 48.951477] suspend_devices_and_enter+0xd8/0x4b4 [ 48.951507] enter_state+0x934/0xbd4 [ 48.951507] pm_suspend+0x14/0x70 [ 48.951507] state_store+0x68/0xc8 [ 48.951538] kernfs_fop_write+0xf4/0x1f8 [ 48.951538] __vfs_write+0x1c/0x114 [ 48.951538] vfs_write+0xa0/0x168 [ 48.951568] SyS_write+0x3c/0x90 [ 48.951568] __sys_trace_return+0x0/0x10 [ 48.951568] -> #0 (tk_core){----..}: [ 48.951599] lock_acquire+0xe0/0x294 [ 48.951599] ktime_get_update_offsets_now+0x5c/0x1d4 [ 48.951629] retrigger_next_event+0x4c/0x90 [ 48.951629] on_each_cpu+0x40/0x7c [ 48.951629] clock_was_set_work+0x14/0x20 [ 48.951660] process_one_work+0x2b4/0x808 [ 48.951660] worker_thread+0x3c/0x550 [ 48.951660] kthread+0x104/0x148 [ 48.951690] ret_from_fork+0x14/0x24 [ 48.951690] other info that might help us debug this: [ 48.951690] Chain exists of: tk_core --> &rt_b->rt_runtime_lock --> hrtimer_bases.lock [ 48.951721] Possible unsafe locking scenario: [ 48.951721] CPU0 CPU1 [ 48.951721] ---- ---- [ 48.951721] lock(hrtimer_bases.lock); [ 48.951751] lock(&rt_b->rt_runtime_lock); [ 48.951751] lock(hrtimer_bases.lock); [ 48.951751] lock(tk_core); [ 48.951782] *** DEADLOCK *** [ 48.951782] 3 locks held by kworker/0:0/3: [ 48.951782] #0: ("events"){.+.+.+}, at: [] process_one_work+0x1f8/0x808 [ 48.951812] #1: (hrtimer_work){+.+...}, at: [] process_one_work+0x1f8/0x808 [ 48.951843] #2: (hrtimer_bases.lock){-.-...}, at: [] retrigger_next_event+0x38/0x90 [ 48.951843] stack backtrace: [ 48.951873] CPU: 0 PID: 3 Comm: kworker/0:0 Not tainted 4.10.0-rc7-next-20170213+ [ 48.951904] Workqueue: events clock_was_set_work [ 48.951904] [] (unwind_backtrace) from [] (show_stack+0x10/0x14) [ 48.951934] [] (show_stack) from [] (dump_stack+0xac/0xe0) [ 48.951934] [] (dump_stack) from [] (print_circular_bug+0x1d0/0x308) [ 48.951965] [] (print_circular_bug) from [] (validate_chain+0xf50/0x1324) [ 48.951965] [] (validate_chain) from [] (__lock_acquire+0x468/0x7e8) [ 48.951995] [] (__lock_acquire) from [] (lock_acquire+0xe0/0x294) [ 48.951995] [] (lock_acquire) from [] (ktime_get_update_offsets_now+0x5c/0x1d4) [ 48.952026] [] (ktime_get_update_offsets_now) from [] (retrigger_next_event+0x4c/0x90) [ 48.952026] [] (retrigger_next_event) from [] (on_each_cpu+0x40/0x7c) [ 48.952056] [] (on_each_cpu) from [] (clock_was_set_work+0x14/0x20) [ 48.952056] [] (clock_was_set_work) from [] (process_one_work+0x2b4/0x808) [ 48.952087] [] (process_one_work) from [] (worker_thread+0x3c/0x550) [ 48.952087] [] (worker_thread) from [] (kthread+0x104/0x148) [ 48.952087] [] (kthread) from [] (ret_from_fork+0x14/0x24) Replace printk() with printk_deferred(), which does not call into the scheduler. Fixes: 0bf43f15db85 ("timekeeping: Prints the amounts of time spent during suspend") Reported-and-tested-by: Tony Lindgren Signed-off-by: Sergey Senozhatsky Cc: Petr Mladek Cc: Sergey Senozhatsky Cc: Peter Zijlstra Cc: "Rafael J . Wysocki" Cc: Steven Rostedt Cc: John Stultz Cc: "[4.9+]" Link: http://lkml.kernel.org/r/20170215044332.30449-1-sergey.senozhatsky@gmail.com Signed-off-by: Thomas Gleixner --- kernel/time/timekeeping_debug.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'kernel') diff --git a/kernel/time/timekeeping_debug.c b/kernel/time/timekeeping_debug.c index ca9fb800336b..38bc4d2208e8 100644 --- a/kernel/time/timekeeping_debug.c +++ b/kernel/time/timekeeping_debug.c @@ -75,7 +75,7 @@ void tk_debug_account_sleep_time(struct timespec64 *t) int bin = min(fls(t->tv_sec), NUM_BINS-1); sleep_time_bin[bin]++; - pr_info("Suspended for %lld.%03lu seconds\n", (s64)t->tv_sec, - t->tv_nsec / NSEC_PER_MSEC); + printk_deferred(KERN_INFO "Suspended for %lld.%03lu seconds\n", + (s64)t->tv_sec, t->tv_nsec / NSEC_PER_MSEC); } -- cgit v1.2.3-59-g8ed1b From 558e8e27e73f53f8a512485be538b07115fe5f3c Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Thu, 16 Feb 2017 12:19:18 -0800 Subject: Revert "nohz: Fix collision between tick and other hrtimers" This reverts commit 24b91e360ef521a2808771633d76ebc68bd5604b and commit 7bdb59f1ad47 ("tick/nohz: Fix possible missing clock reprog after tick soft restart") that depends on it, Pavel reports that it causes occasional boot hangs for him that seem to depend on just how the machine was booted. In particular, his machine hangs at around the PCI fixups of the EHCI USB host controller, but only hangs from cold boot, not from a warm boot. Thomas Gleixner suspecs it's a CPU hotplug interaction, particularly since Pavel also saw suspend/resume issues that seem to be related. We're reverting for now while trying to figure out the root cause. Reported-bisected-and-tested-by: Pavel Machek Acked-by: Frederic Weisbecker Cc: Wanpeng Li Cc: Peter Zijlstra Cc: Rik van Riel Cc: Thomas Gleixner Cc: stable@kernel.org # reverted commits were marked for stable Signed-off-by: Linus Torvalds --- kernel/time/tick-sched.c | 14 ++------------ kernel/time/tick-sched.h | 2 -- 2 files changed, 2 insertions(+), 14 deletions(-) (limited to 'kernel') diff --git a/kernel/time/tick-sched.c b/kernel/time/tick-sched.c index fc6f740d0277..2c115fdab397 100644 --- a/kernel/time/tick-sched.c +++ b/kernel/time/tick-sched.c @@ -725,11 +725,6 @@ static ktime_t tick_nohz_stop_sched_tick(struct tick_sched *ts, */ if (delta == 0) { tick_nohz_restart(ts, now); - /* - * Make sure next tick stop doesn't get fooled by past - * clock deadline - */ - ts->next_tick = 0; goto out; } } @@ -772,7 +767,7 @@ static ktime_t tick_nohz_stop_sched_tick(struct tick_sched *ts, tick = expires; /* Skip reprogram of event if its not changed */ - if (ts->tick_stopped && (expires == ts->next_tick)) + if (ts->tick_stopped && (expires == dev->next_event)) goto out; /* @@ -792,8 +787,6 @@ static ktime_t tick_nohz_stop_sched_tick(struct tick_sched *ts, trace_tick_stop(1, TICK_DEP_MASK_NONE); } - ts->next_tick = tick; - /* * If the expiration time == KTIME_MAX, then we simply stop * the tick timer. @@ -809,10 +802,7 @@ static ktime_t tick_nohz_stop_sched_tick(struct tick_sched *ts, else tick_program_event(tick, 1); out: - /* - * Update the estimated sleep length until the next timer - * (not only the tick). - */ + /* Update the estimated sleep length */ ts->sleep_length = ktime_sub(dev->next_event, now); return tick; } diff --git a/kernel/time/tick-sched.h b/kernel/time/tick-sched.h index 075444e3d48e..bf38226e5c17 100644 --- a/kernel/time/tick-sched.h +++ b/kernel/time/tick-sched.h @@ -27,7 +27,6 @@ enum tick_nohz_mode { * timer is modified for nohz sleeps. This is necessary * to resume the tick timer operation in the timeline * when the CPU returns from nohz sleep. - * @next_tick: Next tick to be fired when in dynticks mode. * @tick_stopped: Indicator that the idle tick has been stopped * @idle_jiffies: jiffies at the entry to idle for idle time accounting * @idle_calls: Total number of idle calls @@ -45,7 +44,6 @@ struct tick_sched { unsigned long check_clocks; enum tick_nohz_mode nohz_mode; ktime_t last_tick; - ktime_t next_tick; int inidle; int tick_stopped; unsigned long idle_jiffies; -- cgit v1.2.3-59-g8ed1b From fc98c3c8c9dcafd67adcce69e6ce3191d5306c9c Mon Sep 17 00:00:00 2001 From: Sergey Senozhatsky Date: Sat, 18 Feb 2017 03:42:54 -0800 Subject: printk: use rcuidle console tracepoint Use rcuidle console tracepoint because, apparently, it may be issued from an idle CPU: hw-breakpoint: Failed to enable monitor mode on CPU 0. hw-breakpoint: CPU 0 failed to disable vector catch =============================== [ ERR: suspicious RCU usage. ] 4.10.0-rc8-next-20170215+ #119 Not tainted ------------------------------- ./include/trace/events/printk.h:32 suspicious rcu_dereference_check() usage! other info that might help us debug this: RCU used illegally from idle CPU! rcu_scheduler_active = 2, debug_locks = 0 RCU used illegally from extended quiescent state! 2 locks held by swapper/0/0: #0: (cpu_pm_notifier_lock){......}, at: [] cpu_pm_exit+0x10/0x54 #1: (console_lock){+.+.+.}, at: [] vprintk_emit+0x264/0x474 stack backtrace: CPU: 0 PID: 0 Comm: swapper/0 Not tainted 4.10.0-rc8-next-20170215+ #119 Hardware name: Generic OMAP4 (Flattened Device Tree) console_unlock vprintk_emit vprintk_default printk reset_ctrl_regs dbg_cpu_pm_notify notifier_call_chain cpu_pm_exit omap_enter_idle_coupled cpuidle_enter_state cpuidle_enter_state_coupled do_idle cpu_startup_entry start_kernel This RCU warning, however, is suppressed by lockdep_off() in printk(). lockdep_off() increments the ->lockdep_recursion counter and thus disables RCU_LOCKDEP_WARN() and debug_lockdep_rcu_enabled(), which want lockdep to be enabled "current->lockdep_recursion == 0". Link: http://lkml.kernel.org/r/20170217015932.11898-1-sergey.senozhatsky@gmail.com Signed-off-by: Sergey Senozhatsky Reported-by: Tony Lindgren Tested-by: Tony Lindgren Acked-by: Paul E. McKenney Acked-by: Steven Rostedt (VMware) Cc: Petr Mladek Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: Tony Lindgren Cc: Russell King Cc: [3.4+] Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- kernel/printk/printk.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'kernel') diff --git a/kernel/printk/printk.c b/kernel/printk/printk.c index 8b2696420abb..4ba3d34938c0 100644 --- a/kernel/printk/printk.c +++ b/kernel/printk/printk.c @@ -1516,7 +1516,7 @@ static void call_console_drivers(int level, { struct console *con; - trace_console(text, len); + trace_console_rcuidle(text, len); if (!console_drivers) return; -- cgit v1.2.3-59-g8ed1b