From 497957576cf8a2150d723aedd74ea60b5d498bfe Mon Sep 17 00:00:00 2001 From: Petr Mladek Date: Mon, 12 Dec 2016 16:45:47 -0800 Subject: printk/kdb: handle more message headers Commit 4bcc595ccd80 ("printk: reinstate KERN_CONT for printing continuation lines") allows to define more message headers for a single message. The motivation is that continuous lines might get mixed. Therefore it make sense to define the right log level for every piece of a cont line. This patch introduces printk_skip_headers() that will skip all headers and uses it in the kdb code instead of printk_skip_level(). This approach helps to fix other printk_skip_level() users independently. Link: http://lkml.kernel.org/r/1478695291-12169-3-git-send-email-pmladek@suse.com Signed-off-by: Petr Mladek Cc: Joe Perches Cc: Sergey Senozhatsky Cc: Steven Rostedt Cc: Jason Wessel Cc: Jaroslav Kysela Cc: Takashi Iwai Cc: Chris Mason Cc: Josef Bacik Cc: David Sterba Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- kernel/debug/kdb/kdb_io.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'kernel/debug') diff --git a/kernel/debug/kdb/kdb_io.c b/kernel/debug/kdb/kdb_io.c index fc1ef736253c..98c9011eac78 100644 --- a/kernel/debug/kdb/kdb_io.c +++ b/kernel/debug/kdb/kdb_io.c @@ -697,7 +697,7 @@ kdb_printit: * Write to all consoles. */ retlen = strlen(kdb_buffer); - cp = (char *) printk_skip_level(kdb_buffer); + cp = (char *) printk_skip_headers(kdb_buffer); if (!dbg_kdb_mode && kgdb_connected) { gdbstub_msg_write(cp, retlen - (cp - kdb_buffer)); } else { -- cgit v1.3-7-g2ca7 From 2d13bb6494c807bcf3f78af0e96c0b8615a94385 Mon Sep 17 00:00:00 2001 From: Douglas Anderson Date: Wed, 14 Dec 2016 15:05:49 -0800 Subject: kernel/debug/debug_core.c: more properly delay for secondary CPUs We've got a delay loop waiting for secondary CPUs. That loop uses loops_per_jiffy. However, loops_per_jiffy doesn't actually mean how many tight loops make up a jiffy on all architectures. It is quite common to see things like this in the boot log: Calibrating delay loop (skipped), value calculated using timer frequency.. 48.00 BogoMIPS (lpj=24000) In my case I was seeing lots of cases where other CPUs timed out entering the debugger only to print their stack crawls shortly after the kdb> prompt was written. Elsewhere in kgdb we already use udelay(), so that should be safe enough to use to implement our timeout. We'll delay 1 ms for 1000 times, which should give us a full second of delay (just like the old code wanted) but allow us to notice that we're done every 1 ms. [akpm@linux-foundation.org: simplifications, per Daniel] Link: http://lkml.kernel.org/r/1477091361-2039-1-git-send-email-dianders@chromium.org Signed-off-by: Douglas Anderson Reviewed-by: Daniel Thompson Cc: Jason Wessel Cc: Brian Norris Cc: [4.0+] Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- kernel/debug/debug_core.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'kernel/debug') diff --git a/kernel/debug/debug_core.c b/kernel/debug/debug_core.c index 0874e2edd275..79517e5549f1 100644 --- a/kernel/debug/debug_core.c +++ b/kernel/debug/debug_core.c @@ -598,11 +598,11 @@ return_normal: /* * Wait for the other CPUs to be notified and be waiting for us: */ - time_left = loops_per_jiffy * HZ; + time_left = MSEC_PER_SEC; while (kgdb_do_roundup && --time_left && (atomic_read(&masters_in_kgdb) + atomic_read(&slaves_in_kgdb)) != online_cpus) - cpu_relax(); + udelay(1000); if (!time_left) pr_crit("Timed out waiting for secondary CPUs.\n"); -- cgit v1.3-7-g2ca7 From d1bd8ead126668a2d6c42d97cc3664e95b3fa1dc Mon Sep 17 00:00:00 2001 From: Petr Mladek Date: Wed, 14 Dec 2016 15:05:52 -0800 Subject: kdb: remove unused kdb_event handling kdb_event state variable is only set but never checked in the kernel code. http://www.spinics.net/lists/kdb/msg01733.html suggests that this variable affected WARN_CONSOLE_UNLOCKED() in the original implementation. But this check never went upstream. The semantic is unclear and racy. The value is updated after the kdb_printf_lock is acquired and after it is released. It should be symmetric at minimum. The value should be manipulated either inside or outside the locked area. Fortunately, it seems that the original function is gone and we could simply remove the state variable. Link: http://lkml.kernel.org/r/1480412276-16690-2-git-send-email-pmladek@suse.com Signed-off-by: Petr Mladek Suggested-by: Daniel Thompson Cc: Jason Wessel Cc: Peter Zijlstra Cc: Sergey Senozhatsky Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/kdb.h | 1 - kernel/debug/kdb/kdb_io.c | 2 -- kernel/debug/kdb/kdb_main.c | 1 - 3 files changed, 4 deletions(-) (limited to 'kernel/debug') diff --git a/include/linux/kdb.h b/include/linux/kdb.h index 410decacff8f..eb706188dc23 100644 --- a/include/linux/kdb.h +++ b/include/linux/kdb.h @@ -77,7 +77,6 @@ extern int kdb_poll_idx; * number whenever the kernel debugger is entered. */ extern int kdb_initial_cpu; -extern atomic_t kdb_event; /* Types and messages used for dynamically added kdb shell commands */ diff --git a/kernel/debug/kdb/kdb_io.c b/kernel/debug/kdb/kdb_io.c index 98c9011eac78..46f477bebe0c 100644 --- a/kernel/debug/kdb/kdb_io.c +++ b/kernel/debug/kdb/kdb_io.c @@ -576,7 +576,6 @@ int vkdb_printf(enum kdb_msgsrc src, const char *fmt, va_list ap) KDB_STATE_SET(PRINTF_LOCK); spin_lock_irqsave(&kdb_printf_lock, flags); got_printf_lock = 1; - atomic_inc(&kdb_event); } else { __acquire(kdb_printf_lock); } @@ -851,7 +850,6 @@ kdb_print_out: got_printf_lock = 0; spin_unlock_irqrestore(&kdb_printf_lock, flags); KDB_STATE_CLEAR(PRINTF_LOCK); - atomic_dec(&kdb_event); } else { __release(kdb_printf_lock); } diff --git a/kernel/debug/kdb/kdb_main.c b/kernel/debug/kdb/kdb_main.c index 2a20c0dfdafc..ca183919d302 100644 --- a/kernel/debug/kdb/kdb_main.c +++ b/kernel/debug/kdb/kdb_main.c @@ -60,7 +60,6 @@ int kdb_grep_trailing; * Kernel debugger state flags */ int kdb_flags; -atomic_t kdb_event; /* * kdb_lock protects updates to kdb_initial_cpu. Used to -- cgit v1.3-7-g2ca7 From d5d8d3d0d4adcc3aec6e2e0fb656165014a712b7 Mon Sep 17 00:00:00 2001 From: Petr Mladek Date: Wed, 14 Dec 2016 15:05:55 -0800 Subject: kdb: properly synchronize vkdb_printf() calls with other CPUs kdb_printf_lock does not prevent other CPUs from entering the critical section because it is ignored when KDB_STATE_PRINTF_LOCK is set. The problematic situation might look like: CPU0 CPU1 vkdb_printf() if (!KDB_STATE(PRINTF_LOCK)) KDB_STATE_SET(PRINTF_LOCK); spin_lock_irqsave(&kdb_printf_lock, flags); vkdb_printf() if (!KDB_STATE(PRINTF_LOCK)) BANG: The PRINTF_LOCK state is set and CPU1 is entering the critical section without spinning on the lock. The problem is that the code tries to implement locking using two state variables that are not handled atomically. Well, we need a custom locking because we want to allow reentering the critical section on the very same CPU. Let's use solution from Petr Zijlstra that was proposed for a similar scenario, see https://lkml.kernel.org/r/20161018171513.734367391@infradead.org This patch uses the same trick with cmpxchg(). The only difference is that we want to handle only recursion from the same context and therefore we disable interrupts. In addition, KDB_STATE_PRINTF_LOCK is removed. In fact, we are not able to set it a non-racy way. Link: http://lkml.kernel.org/r/1480412276-16690-3-git-send-email-pmladek@suse.com Signed-off-by: Petr Mladek Reviewed-by: Daniel Thompson Cc: Jason Wessel Cc: Peter Zijlstra Cc: Sergey Senozhatsky Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- kernel/debug/kdb/kdb_io.c | 30 +++++++++++++----------------- kernel/debug/kdb/kdb_private.h | 1 - 2 files changed, 13 insertions(+), 18 deletions(-) (limited to 'kernel/debug') diff --git a/kernel/debug/kdb/kdb_io.c b/kernel/debug/kdb/kdb_io.c index 46f477bebe0c..daa76154fb1b 100644 --- a/kernel/debug/kdb/kdb_io.c +++ b/kernel/debug/kdb/kdb_io.c @@ -555,16 +555,16 @@ int vkdb_printf(enum kdb_msgsrc src, const char *fmt, va_list ap) int colcount; int logging, saved_loglevel = 0; int saved_trap_printk; - int got_printf_lock = 0; int retlen = 0; int fnd, len; + int this_cpu, old_cpu; + static int kdb_printf_cpu = -1; char *cp, *cp2, *cphold = NULL, replaced_byte = ' '; char *moreprompt = "more> "; struct console *c = console_drivers; - static DEFINE_SPINLOCK(kdb_printf_lock); unsigned long uninitialized_var(flags); - preempt_disable(); + local_irq_save(flags); saved_trap_printk = kdb_trap_printk; kdb_trap_printk = 0; @@ -572,12 +572,13 @@ int vkdb_printf(enum kdb_msgsrc src, const char *fmt, va_list ap) * But if any cpu goes recursive in kdb, just print the output, * even if it is interleaved with any other text. */ - if (!KDB_STATE(PRINTF_LOCK)) { - KDB_STATE_SET(PRINTF_LOCK); - spin_lock_irqsave(&kdb_printf_lock, flags); - got_printf_lock = 1; - } else { - __acquire(kdb_printf_lock); + this_cpu = smp_processor_id(); + for (;;) { + old_cpu = cmpxchg(&kdb_printf_cpu, -1, this_cpu); + if (old_cpu == -1 || old_cpu == this_cpu) + break; + + cpu_relax(); } diag = kdbgetintenv("LINES", &linecount); @@ -846,15 +847,10 @@ kdb_print_out: suspend_grep = 0; /* end of what may have been a recursive call */ if (logging) console_loglevel = saved_loglevel; - if (KDB_STATE(PRINTF_LOCK) && got_printf_lock) { - got_printf_lock = 0; - spin_unlock_irqrestore(&kdb_printf_lock, flags); - KDB_STATE_CLEAR(PRINTF_LOCK); - } else { - __release(kdb_printf_lock); - } + /* kdb_printf_cpu locked the code above. */ + smp_store_release(&kdb_printf_cpu, old_cpu); kdb_trap_printk = saved_trap_printk; - preempt_enable(); + local_irq_restore(flags); return retlen; } diff --git a/kernel/debug/kdb/kdb_private.h b/kernel/debug/kdb/kdb_private.h index 75014d7f4568..fc224fbcf954 100644 --- a/kernel/debug/kdb/kdb_private.h +++ b/kernel/debug/kdb/kdb_private.h @@ -132,7 +132,6 @@ extern int kdb_state; #define KDB_STATE_PAGER 0x00000400 /* pager is available */ #define KDB_STATE_GO_SWITCH 0x00000800 /* go is switching * back to initial cpu */ -#define KDB_STATE_PRINTF_LOCK 0x00001000 /* Holds kdb_printf lock */ #define KDB_STATE_WAIT_IPI 0x00002000 /* Waiting for kdb_ipi() NMI */ #define KDB_STATE_RECURSE 0x00004000 /* Recursive entry to kdb */ #define KDB_STATE_IP_ADJUSTED 0x00008000 /* Restart IP has been -- cgit v1.3-7-g2ca7 From 34aaff40b42148b23dcde40152480e25c7d2d759 Mon Sep 17 00:00:00 2001 From: Petr Mladek Date: Wed, 14 Dec 2016 15:05:58 -0800 Subject: kdb: call vkdb_printf() from vprintk_default() only when wanted kdb_trap_printk allows to pass normal printk() messages to kdb via vkdb_printk(). For example, it is used to get backtrace using the classic show_stack(), see kdb_show_stack(). vkdb_printf() tries to avoid a potential infinite loop by disabling the trap. But this approach is racy, for example: CPU1 CPU2 vkdb_printf() // assume that kdb_trap_printk == 0 saved_trap_printk = kdb_trap_printk; kdb_trap_printk = 0; kdb_show_stack() kdb_trap_printk++; Problem1: Now, a nested printk() on CPU0 calls vkdb_printf() even when it should have been disabled. It will not cause a deadlock but... // using the outdated saved value: 0 kdb_trap_printk = saved_trap_printk; kdb_trap_printk--; Problem2: Now, kdb_trap_printk == -1 and will stay like this. It means that all messages will get passed to kdb from now on. This patch removes the racy saved_trap_printk handling. Instead, the recursion is prevented by a check for the locked CPU. The solution is still kind of racy. A non-related printk(), from another process, might get trapped by vkdb_printf(). And the wanted printk() might not get trapped because kdb_printf_cpu is assigned. But this problem existed even with the original code. A proper solution would be to get_cpu() before setting kdb_trap_printk and trap messages only from this CPU. I am not sure if it is worth the effort, though. In fact, the race is very theoretical. When kdb is running any of the commands that use kdb_trap_printk there is a single active CPU and the other CPUs should be in a holding pen inside kgdb_cpu_enter(). The only time this is violated is when there is a timeout waiting for the other CPUs to report to the holding pen. Finally, note that the situation is a bit schizophrenic. vkdb_printf() explicitly allows recursion but only from KDB code that calls kdb_printf() directly. On the other hand, the generic printk() recursion is not allowed because it might cause an infinite loop. This is why we could not hide the decision inside vkdb_printf() easily. Link: http://lkml.kernel.org/r/1480412276-16690-4-git-send-email-pmladek@suse.com Signed-off-by: Petr Mladek Cc: Daniel Thompson Cc: Jason Wessel Cc: Peter Zijlstra Cc: Sergey Senozhatsky Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/kdb.h | 1 + kernel/debug/kdb/kdb_io.c | 9 ++------- kernel/printk/printk.c | 3 ++- 3 files changed, 5 insertions(+), 8 deletions(-) (limited to 'kernel/debug') diff --git a/include/linux/kdb.h b/include/linux/kdb.h index eb706188dc23..68bd88223417 100644 --- a/include/linux/kdb.h +++ b/include/linux/kdb.h @@ -161,6 +161,7 @@ enum kdb_msgsrc { }; extern int kdb_trap_printk; +extern int kdb_printf_cpu; extern __printf(2, 0) int vkdb_printf(enum kdb_msgsrc src, const char *fmt, va_list args); extern __printf(1, 2) int kdb_printf(const char *, ...); diff --git a/kernel/debug/kdb/kdb_io.c b/kernel/debug/kdb/kdb_io.c index daa76154fb1b..e74be38245ad 100644 --- a/kernel/debug/kdb/kdb_io.c +++ b/kernel/debug/kdb/kdb_io.c @@ -30,6 +30,7 @@ char kdb_prompt_str[CMD_BUFLEN]; int kdb_trap_printk; +int kdb_printf_cpu = -1; static int kgdb_transition_check(char *buffer) { @@ -554,24 +555,19 @@ int vkdb_printf(enum kdb_msgsrc src, const char *fmt, va_list ap) int linecount; int colcount; int logging, saved_loglevel = 0; - int saved_trap_printk; int retlen = 0; int fnd, len; int this_cpu, old_cpu; - static int kdb_printf_cpu = -1; char *cp, *cp2, *cphold = NULL, replaced_byte = ' '; char *moreprompt = "more> "; struct console *c = console_drivers; unsigned long uninitialized_var(flags); - local_irq_save(flags); - saved_trap_printk = kdb_trap_printk; - kdb_trap_printk = 0; - /* Serialize kdb_printf if multiple cpus try to write at once. * But if any cpu goes recursive in kdb, just print the output, * even if it is interleaved with any other text. */ + local_irq_save(flags); this_cpu = smp_processor_id(); for (;;) { old_cpu = cmpxchg(&kdb_printf_cpu, -1, this_cpu); @@ -849,7 +845,6 @@ kdb_print_out: console_loglevel = saved_loglevel; /* kdb_printf_cpu locked the code above. */ smp_store_release(&kdb_printf_cpu, old_cpu); - kdb_trap_printk = saved_trap_printk; local_irq_restore(flags); return retlen; } diff --git a/kernel/printk/printk.c b/kernel/printk/printk.c index 577f2288d19f..a3ce35e0fa1e 100644 --- a/kernel/printk/printk.c +++ b/kernel/printk/printk.c @@ -1926,7 +1926,8 @@ int vprintk_default(const char *fmt, va_list args) int r; #ifdef CONFIG_KGDB_KDB - if (unlikely(kdb_trap_printk)) { + /* Allow to pass printk() to kdb but avoid a recursion. */ + if (unlikely(kdb_trap_printk && kdb_printf_cpu < 0)) { r = vkdb_printf(KDB_MSGSRC_PRINTK, fmt, args); return r; } -- cgit v1.3-7-g2ca7