From 77498617857f68496b360081dde1a492d40c28b2 Mon Sep 17 00:00:00 2001 From: Stephen Brennan Date: Wed, 2 Feb 2022 09:18:18 -0800 Subject: printk: Add panic_in_progress helper This will be used help avoid deadlocks during panics. Although it would be better to include this in linux/panic.h, it would require that header to include linux/atomic.h as well. On some architectures, this results in a circular dependency as well. So instead add the helper directly to printk.c. Suggested-by: Petr Mladek Signed-off-by: Stephen Brennan Reviewed-by: Petr Mladek Reviewed-by: Sergey Senozhatsky Signed-off-by: Petr Mladek Link: https://lore.kernel.org/r/20220202171821.179394-2-stephen.s.brennan@oracle.com --- kernel/printk/printk.c | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'kernel/printk') diff --git a/kernel/printk/printk.c b/kernel/printk/printk.c index 155229f0cf0f..f04bbed0aa79 100644 --- a/kernel/printk/printk.c +++ b/kernel/printk/printk.c @@ -256,6 +256,11 @@ static void __up_console_sem(unsigned long ip) } #define up_console_sem() __up_console_sem(_RET_IP_) +static bool panic_in_progress(void) +{ + return unlikely(atomic_read(&panic_cpu) != PANIC_CPU_INVALID); +} + /* * This is used for debugging the mess that is the VT code by * keeping track if we have the console semaphore held. It's -- cgit v1.2.3-59-g8ed1b From d51507098ff91e863b6e0a8047507741d59b8175 Mon Sep 17 00:00:00 2001 From: Stephen Brennan Date: Wed, 2 Feb 2022 09:18:19 -0800 Subject: printk: disable optimistic spin during panic A CPU executing with console lock spinning enabled might be halted during a panic. Before the panicking CPU calls console_flush_on_panic(), it may call console_trylock(), which attempts to optimistically spin, deadlocking the panic CPU: CPU 0 (panic CPU) CPU 1 ----------------- ------ printk() { vprintk_func() { vprintk_default() { vprintk_emit() { console_unlock() { console_lock_spinning_enable(); ... printing to console ... panic() { crash_smp_send_stop() { NMI -------------------> HALT } atomic_notifier_call_chain() { printk() { ... console_trylock_spinnning() { // optimistic spin infinitely This hang during panic can be induced when a kdump kernel is loaded, and crash_kexec_post_notifiers=1 is present on the kernel command line. The following script which concurrently writes to /dev/kmsg, and triggers a panic, can result in this hang: #!/bin/bash date # 991 chars (based on log buffer size): chars="$(printf 'a%.0s' {1..991})" while :; do echo $chars > /dev/kmsg done & echo c > /proc/sysrq-trigger & date exit To avoid this deadlock, ensure that console_trylock_spinning() does not allow spinning once a panic has begun. Fixes: dbdda842fe96 ("printk: Add console owner and waiter logic to load balance console writes") Suggested-by: Petr Mladek Signed-off-by: Stephen Brennan Reviewed-by: Petr Mladek Reviewed-by: Sergey Senozhatsky Signed-off-by: Petr Mladek Link: https://lore.kernel.org/r/20220202171821.179394-3-stephen.s.brennan@oracle.com --- kernel/printk/printk.c | 10 ++++++++++ 1 file changed, 10 insertions(+) (limited to 'kernel/printk') diff --git a/kernel/printk/printk.c b/kernel/printk/printk.c index f04bbed0aa79..e83c12770104 100644 --- a/kernel/printk/printk.c +++ b/kernel/printk/printk.c @@ -1847,6 +1847,16 @@ static int console_trylock_spinning(void) if (console_trylock()) return 1; + /* + * It's unsafe to spin once a panic has begun. If we are the + * panic CPU, we may have already halted the owner of the + * console_sem. If we are not the panic CPU, then we should + * avoid taking console_sem, so the panic CPU has a better + * chance of cleanly acquiring it later. + */ + if (panic_in_progress()) + return 0; + printk_safe_enter_irqsave(flags); raw_spin_lock(&console_owner_lock); -- cgit v1.2.3-59-g8ed1b From 13fb0f74d7029df3b8137f11ef955e578a4a4a60 Mon Sep 17 00:00:00 2001 From: Stephen Brennan Date: Wed, 2 Feb 2022 09:18:20 -0800 Subject: printk: Avoid livelock with heavy printk during panic During panic(), if another CPU is writing heavily the kernel log (e.g. via /dev/kmsg), then the panic CPU may livelock writing out its messages to the console. Note when too many messages are dropped during panic and suppress further printk, except from the panic CPU. This could result in some important messages being dropped. However, messages are already being dropped, so this approach at least prevents a livelock. Reviewed-by: Petr Mladek Signed-off-by: Stephen Brennan Reviewed-by: Sergey Senozhatsky Signed-off-by: Petr Mladek Link: https://lore.kernel.org/r/20220202171821.179394-4-stephen.s.brennan@oracle.com --- kernel/printk/printk.c | 15 +++++++++++++++ 1 file changed, 15 insertions(+) (limited to 'kernel/printk') diff --git a/kernel/printk/printk.c b/kernel/printk/printk.c index e83c12770104..2ec6b547cda6 100644 --- a/kernel/printk/printk.c +++ b/kernel/printk/printk.c @@ -93,6 +93,12 @@ EXPORT_SYMBOL_GPL(console_drivers); */ int __read_mostly suppress_printk; +/* + * During panic, heavy printk by other CPUs can delay the + * panic and risk deadlock on console resources. + */ +int __read_mostly suppress_panic_printk; + #ifdef CONFIG_LOCKDEP static struct lockdep_map console_lock_dep_map = { .name = "console_lock" @@ -2232,6 +2238,10 @@ asmlinkage int vprintk_emit(int facility, int level, if (unlikely(suppress_printk)) return 0; + if (unlikely(suppress_panic_printk) && + atomic_read(&panic_cpu) != raw_smp_processor_id()) + return 0; + if (level == LOGLEVEL_SCHED) { level = LOGLEVEL_DEFAULT; in_sched = true; @@ -2617,6 +2627,7 @@ void console_unlock(void) { static char ext_text[CONSOLE_EXT_LOG_MAX]; static char text[CONSOLE_LOG_MAX]; + static int panic_console_dropped; unsigned long flags; bool do_cond_resched, retry; struct printk_info info; @@ -2671,6 +2682,10 @@ skip: if (console_seq != r.info->seq) { console_dropped += r.info->seq - console_seq; console_seq = r.info->seq; + if (panic_in_progress() && panic_console_dropped++ > 10) { + suppress_panic_printk = 1; + pr_warn_once("Too many dropped messages. Suppress messages on non-panic CPUs to prevent livelock.\n"); + } } if (suppress_message_printing(r.info->level)) { -- cgit v1.2.3-59-g8ed1b From 8ebc476fd51e6c0fd3174ec1959a20ba99d4c5e5 Mon Sep 17 00:00:00 2001 From: Stephen Brennan Date: Wed, 2 Feb 2022 09:18:21 -0800 Subject: printk: Drop console_sem during panic If another CPU is in panic, we are about to be halted. Try to gracefully abandon the console_sem, leaving it free for the panic CPU to grab. Suggested-by: Petr Mladek Signed-off-by: Stephen Brennan Reviewed-by: Petr Mladek Reviewed-by: Sergey Senozhatsky Signed-off-by: Petr Mladek Link: https://lore.kernel.org/r/20220202171821.179394-5-stephen.s.brennan@oracle.com --- kernel/printk/printk.c | 25 ++++++++++++++++++++++++- 1 file changed, 24 insertions(+), 1 deletion(-) (limited to 'kernel/printk') diff --git a/kernel/printk/printk.c b/kernel/printk/printk.c index 2ec6b547cda6..6a51907a33b9 100644 --- a/kernel/printk/printk.c +++ b/kernel/printk/printk.c @@ -2597,6 +2597,25 @@ static int have_callable_console(void) return 0; } +/* + * Return true when this CPU should unlock console_sem without pushing all + * messages to the console. This reduces the chance that the console is + * locked when the panic CPU tries to use it. + */ +static bool abandon_console_lock_in_panic(void) +{ + if (!panic_in_progress()) + return false; + + /* + * We can use raw_smp_processor_id() here because it is impossible for + * the task to be migrated to the panic_cpu, or away from it. If + * panic_cpu has already been set, and we're not currently executing on + * that CPU, then we never will be. + */ + return atomic_read(&panic_cpu) != raw_smp_processor_id(); +} + /* * Can we actually use the console at this time on this cpu? * @@ -2745,6 +2764,10 @@ skip: if (handover) return; + /* Allow panic_cpu to take over the consoles safely */ + if (abandon_console_lock_in_panic()) + break; + if (do_cond_resched) cond_resched(); } @@ -2762,7 +2785,7 @@ skip: * flush, no worries. */ retry = prb_read_valid(prb, next_seq, NULL); - if (retry && console_trylock()) + if (retry && !abandon_console_lock_in_panic() && console_trylock()) goto again; } EXPORT_SYMBOL(console_unlock); -- cgit v1.2.3-59-g8ed1b From ce06e863f36f16cdc3b84c7206cd13d5f597d623 Mon Sep 17 00:00:00 2001 From: Jiapeng Chong Date: Wed, 16 Feb 2022 11:19:57 +0800 Subject: printk: make suppress_panic_printk static This symbol is not used outside of printk.c, so marks it static. Fix the following sparse warning: kernel/printk/printk.c:100:19: warning: symbol 'suppress_panic_printk' was not declared. Should it be static? Reported-by: Abaci Robot Signed-off-by: Jiapeng Chong Reviewed-by: Sergey Senozhatsky Reviewed-by: Miguel Ojeda Signed-off-by: Petr Mladek Link: https://lore.kernel.org/r/20220216031957.9761-1-jiapeng.chong@linux.alibaba.com --- kernel/printk/printk.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'kernel/printk') diff --git a/kernel/printk/printk.c b/kernel/printk/printk.c index 6a51907a33b9..f9430ac4caca 100644 --- a/kernel/printk/printk.c +++ b/kernel/printk/printk.c @@ -97,7 +97,7 @@ int __read_mostly suppress_printk; * During panic, heavy printk by other CPUs can delay the * panic and risk deadlock on console resources. */ -int __read_mostly suppress_panic_printk; +static int __read_mostly suppress_panic_printk; #ifdef CONFIG_LOCKDEP static struct lockdep_map console_lock_dep_map = { -- cgit v1.2.3-59-g8ed1b