From e245d99e6cc4a0b904b87b46b4f60d46fb405987 Mon Sep 17 00:00:00 2001 From: Babu Moger Date: Wed, 2 Nov 2016 09:36:33 -0700 Subject: lockdep: Limit static allocations if PROVE_LOCKING_SMALL is defined Reduce the size of data structure for lockdep entries by half if PROVE_LOCKING_SMALL if defined. This is used only for sparc. Signed-off-by: Babu Moger Acked-by: Sam Ravnborg Signed-off-by: David S. Miller --- kernel/locking/lockdep_internals.h | 20 +++++++++++++++++--- 1 file changed, 17 insertions(+), 3 deletions(-) (limited to 'kernel/locking') diff --git a/kernel/locking/lockdep_internals.h b/kernel/locking/lockdep_internals.h index 51c4b24b6328..c2b88490d857 100644 --- a/kernel/locking/lockdep_internals.h +++ b/kernel/locking/lockdep_internals.h @@ -45,6 +45,14 @@ enum { #define LOCKF_USED_IN_IRQ_READ \ (LOCKF_USED_IN_HARDIRQ_READ | LOCKF_USED_IN_SOFTIRQ_READ) +/* + * CONFIG_PROVE_LOCKING_SMALL is defined for sparc. Sparc requires .text, + * .data and .bss to fit in required 32MB limit for the kernel. With + * PROVE_LOCKING we could go over this limit and cause system boot-up problems. + * So, reduce the static allocations for lockdeps related structures so that + * everything fits in current required size limit. + */ +#ifdef CONFIG_PROVE_LOCKING_SMALL /* * MAX_LOCKDEP_ENTRIES is the maximum number of lock dependencies * we track. @@ -54,18 +62,24 @@ enum { * table (if it's not there yet), and we check it for lock order * conflicts and deadlocks. */ +#define MAX_LOCKDEP_ENTRIES 16384UL +#define MAX_LOCKDEP_CHAINS_BITS 15 +#define MAX_STACK_TRACE_ENTRIES 262144UL +#else #define MAX_LOCKDEP_ENTRIES 32768UL #define MAX_LOCKDEP_CHAINS_BITS 16 -#define MAX_LOCKDEP_CHAINS (1UL << MAX_LOCKDEP_CHAINS_BITS) - -#define MAX_LOCKDEP_CHAIN_HLOCKS (MAX_LOCKDEP_CHAINS*5) /* * Stack-trace: tightly packed array of stack backtrace * addresses. Protected by the hash_lock. */ #define MAX_STACK_TRACE_ENTRIES 524288UL +#endif + +#define MAX_LOCKDEP_CHAINS (1UL << MAX_LOCKDEP_CHAINS_BITS) + +#define MAX_LOCKDEP_CHAIN_HLOCKS (MAX_LOCKDEP_CHAINS*5) extern struct list_head all_lock_classes; extern struct lock_chain lock_chains[]; -- cgit v1.3-14-g43fede From dbb26055defd03d59f678cb5f2c992abe05b064a Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Wed, 30 Nov 2016 21:04:41 +0000 Subject: locking/rtmutex: Prevent dequeue vs. unlock race David reported a futex/rtmutex state corruption. It's caused by the following problem: CPU0 CPU1 CPU2 l->owner=T1 rt_mutex_lock(l) lock(l->wait_lock) l->owner = T1 | HAS_WAITERS; enqueue(T2) boost() unlock(l->wait_lock) schedule() rt_mutex_lock(l) lock(l->wait_lock) l->owner = T1 | HAS_WAITERS; enqueue(T3) boost() unlock(l->wait_lock) schedule() signal(->T2) signal(->T3) lock(l->wait_lock) dequeue(T2) deboost() unlock(l->wait_lock) lock(l->wait_lock) dequeue(T3) ===> wait list is now empty deboost() unlock(l->wait_lock) lock(l->wait_lock) fixup_rt_mutex_waiters() if (wait_list_empty(l)) { owner = l->owner & ~HAS_WAITERS; l->owner = owner ==> l->owner = T1 } lock(l->wait_lock) rt_mutex_unlock(l) fixup_rt_mutex_waiters() if (wait_list_empty(l)) { owner = l->owner & ~HAS_WAITERS; cmpxchg(l->owner, T1, NULL) ===> Success (l->owner = NULL) l->owner = owner ==> l->owner = T1 } That means the problem is caused by fixup_rt_mutex_waiters() which does the RMW to clear the waiters bit unconditionally when there are no waiters in the rtmutexes rbtree. This can be fatal: A concurrent unlock can release the rtmutex in the fastpath because the waiters bit is not set. If the cmpxchg() gets in the middle of the RMW operation then the previous owner, which just unlocked the rtmutex is set as the owner again when the write takes place after the successfull cmpxchg(). The solution is rather trivial: verify that the owner member of the rtmutex has the waiters bit set before clearing it. This does not require a cmpxchg() or other atomic operations because the waiters bit can only be set and cleared with the rtmutex wait_lock held. It's also safe against the fast path unlock attempt. The unlock attempt via cmpxchg() will either see the bit set and take the slowpath or see the bit cleared and release it atomically in the fastpath. It's remarkable that the test program provided by David triggers on ARM64 and MIPS64 really quick, but it refuses to reproduce on x86-64, while the problem exists there as well. That refusal might explain that this got not discovered earlier despite the bug existing from day one of the rtmutex implementation more than 10 years ago. Thanks to David for meticulously instrumenting the code and providing the information which allowed to decode this subtle problem. Reported-by: David Daney Tested-by: David Daney Signed-off-by: Thomas Gleixner Reviewed-by: Steven Rostedt Acked-by: Peter Zijlstra (Intel) Cc: Linus Torvalds Cc: Mark Rutland Cc: Peter Zijlstra Cc: Sebastian Siewior Cc: Will Deacon Cc: stable@vger.kernel.org Fixes: 23f78d4a03c5 ("[PATCH] pi-futex: rt mutex core") Link: http://lkml.kernel.org/r/20161130210030.351136722@linutronix.de Signed-off-by: Ingo Molnar --- kernel/locking/rtmutex.c | 68 ++++++++++++++++++++++++++++++++++++++++++++++-- 1 file changed, 66 insertions(+), 2 deletions(-) (limited to 'kernel/locking') diff --git a/kernel/locking/rtmutex.c b/kernel/locking/rtmutex.c index 1ec0f48962b3..2c49d76f96c3 100644 --- a/kernel/locking/rtmutex.c +++ b/kernel/locking/rtmutex.c @@ -65,8 +65,72 @@ static inline void clear_rt_mutex_waiters(struct rt_mutex *lock) static void fixup_rt_mutex_waiters(struct rt_mutex *lock) { - if (!rt_mutex_has_waiters(lock)) - clear_rt_mutex_waiters(lock); + unsigned long owner, *p = (unsigned long *) &lock->owner; + + if (rt_mutex_has_waiters(lock)) + return; + + /* + * The rbtree has no waiters enqueued, now make sure that the + * lock->owner still has the waiters bit set, otherwise the + * following can happen: + * + * CPU 0 CPU 1 CPU2 + * l->owner=T1 + * rt_mutex_lock(l) + * lock(l->lock) + * l->owner = T1 | HAS_WAITERS; + * enqueue(T2) + * boost() + * unlock(l->lock) + * block() + * + * rt_mutex_lock(l) + * lock(l->lock) + * l->owner = T1 | HAS_WAITERS; + * enqueue(T3) + * boost() + * unlock(l->lock) + * block() + * signal(->T2) signal(->T3) + * lock(l->lock) + * dequeue(T2) + * deboost() + * unlock(l->lock) + * lock(l->lock) + * dequeue(T3) + * ==> wait list is empty + * deboost() + * unlock(l->lock) + * lock(l->lock) + * fixup_rt_mutex_waiters() + * if (wait_list_empty(l) { + * l->owner = owner + * owner = l->owner & ~HAS_WAITERS; + * ==> l->owner = T1 + * } + * lock(l->lock) + * rt_mutex_unlock(l) fixup_rt_mutex_waiters() + * if (wait_list_empty(l) { + * owner = l->owner & ~HAS_WAITERS; + * cmpxchg(l->owner, T1, NULL) + * ===> Success (l->owner = NULL) + * + * l->owner = owner + * ==> l->owner = T1 + * } + * + * With the check for the waiter bit in place T3 on CPU2 will not + * overwrite. All tasks fiddling with the waiters bit are + * serialized by l->lock, so nothing else can modify the waiters + * bit. If the bit is set then nothing can change l->owner either + * so the simple RMW is safe. The cmpxchg() will simply fail if it + * happens in the middle of the RMW because the waiters bit is + * still set. + */ + owner = READ_ONCE(*p); + if (owner & RT_MUTEX_HAS_WAITERS) + WRITE_ONCE(*p, owner & ~RT_MUTEX_HAS_WAITERS); } /* -- cgit v1.3-14-g43fede From 1be5d4fa0af34fb7bafa205aeb59f5c7cc7a089d Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Wed, 30 Nov 2016 21:04:42 +0000 Subject: locking/rtmutex: Use READ_ONCE() in rt_mutex_owner() While debugging the rtmutex unlock vs. dequeue race Will suggested to use READ_ONCE() in rt_mutex_owner() as it might race against the cmpxchg_release() in unlock_rt_mutex_safe(). Will: "It's a minor thing which will most likely not matter in practice" Careful search did not unearth an actual problem in todays code, but it's better to be safe than surprised. Suggested-by: Will Deacon Signed-off-by: Thomas Gleixner Acked-by: Peter Zijlstra (Intel) Cc: David Daney Cc: Linus Torvalds Cc: Mark Rutland Cc: Peter Zijlstra Cc: Sebastian Siewior Cc: Steven Rostedt Cc: Link: http://lkml.kernel.org/r/20161130210030.431379999@linutronix.de Signed-off-by: Ingo Molnar --- kernel/locking/rtmutex_common.h | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) (limited to 'kernel/locking') diff --git a/kernel/locking/rtmutex_common.h b/kernel/locking/rtmutex_common.h index 4f5f83c7d2d3..e317e1cbb3eb 100644 --- a/kernel/locking/rtmutex_common.h +++ b/kernel/locking/rtmutex_common.h @@ -75,8 +75,9 @@ task_top_pi_waiter(struct task_struct *p) static inline struct task_struct *rt_mutex_owner(struct rt_mutex *lock) { - return (struct task_struct *) - ((unsigned long)lock->owner & ~RT_MUTEX_OWNER_MASKALL); + unsigned long owner = (unsigned long) READ_ONCE(lock->owner); + + return (struct task_struct *) (owner & ~RT_MUTEX_OWNER_MASKALL); } /* -- cgit v1.3-14-g43fede From f943fe0faf27991d256e10b5a85f175385c64cdc Mon Sep 17 00:00:00 2001 From: Dmitry Vyukov Date: Mon, 28 Nov 2016 15:24:43 +0100 Subject: lockdep: Fix report formatting Since commit: 4bcc595ccd80 ("printk: reinstate KERN_CONT for printing continuation lines") printk() requires KERN_CONT to continue log messages. Lots of printk() in lockdep.c and print_ip_sym() don't have it. As the result lockdep reports are completely messed up. Add missing KERN_CONT and inline print_ip_sym() where necessary. Example of a messed up report: 0-rc5+ #41 Not tainted ------------------------------------------------------- syz-executor0/5036 is trying to acquire lock: ( rtnl_mutex ){+.+.+.} , at: [] rtnl_lock+0x1c/0x20 but task is already holding lock: ( &net->packet.sklist_lock ){+.+...} , at: [] packet_diag_dump+0x1a6/0x1920 which lock already depends on the new lock. the existing dependency chain (in reverse order) is: -> #3 ( &net->packet.sklist_lock +.+...} ... Without this patch all scripts that parse kernel bug reports are broken. Signed-off-by: Dmitry Vyukov Signed-off-by: Peter Zijlstra (Intel) Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: andreyknvl@google.com Cc: aryabinin@virtuozzo.com Cc: joe@perches.com Cc: syzkaller@googlegroups.com Link: http://lkml.kernel.org/r/1480343083-48731-1-git-send-email-dvyukov@google.com Signed-off-by: Ingo Molnar --- kernel/locking/lockdep.c | 111 ++++++++++++++++++++++++----------------------- 1 file changed, 57 insertions(+), 54 deletions(-) (limited to 'kernel/locking') diff --git a/kernel/locking/lockdep.c b/kernel/locking/lockdep.c index 589d763a49b3..4d7ffc0a0d00 100644 --- a/kernel/locking/lockdep.c +++ b/kernel/locking/lockdep.c @@ -506,13 +506,13 @@ static void __print_lock_name(struct lock_class *class) name = class->name; if (!name) { name = __get_key_name(class->key, str); - printk("%s", name); + printk(KERN_CONT "%s", name); } else { - printk("%s", name); + printk(KERN_CONT "%s", name); if (class->name_version > 1) - printk("#%d", class->name_version); + printk(KERN_CONT "#%d", class->name_version); if (class->subclass) - printk("/%d", class->subclass); + printk(KERN_CONT "/%d", class->subclass); } } @@ -522,9 +522,9 @@ static void print_lock_name(struct lock_class *class) get_usage_chars(class, usage); - printk(" ("); + printk(KERN_CONT " ("); __print_lock_name(class); - printk("){%s}", usage); + printk(KERN_CONT "){%s}", usage); } static void print_lockdep_cache(struct lockdep_map *lock) @@ -536,7 +536,7 @@ static void print_lockdep_cache(struct lockdep_map *lock) if (!name) name = __get_key_name(lock->key->subkeys, str); - printk("%s", name); + printk(KERN_CONT "%s", name); } static void print_lock(struct held_lock *hlock) @@ -551,13 +551,13 @@ static void print_lock(struct held_lock *hlock) barrier(); if (!class_idx || (class_idx - 1) >= MAX_LOCKDEP_KEYS) { - printk("\n"); + printk(KERN_CONT "\n"); return; } print_lock_name(lock_classes + class_idx - 1); - printk(", at: "); - print_ip_sym(hlock->acquire_ip); + printk(KERN_CONT ", at: [<%p>] %pS\n", + (void *)hlock->acquire_ip, (void *)hlock->acquire_ip); } static void lockdep_print_held_locks(struct task_struct *curr) @@ -792,8 +792,8 @@ register_lock_class(struct lockdep_map *lock, unsigned int subclass, int force) printk("\nnew class %p: %s", class->key, class->name); if (class->name_version > 1) - printk("#%d", class->name_version); - printk("\n"); + printk(KERN_CONT "#%d", class->name_version); + printk(KERN_CONT "\n"); dump_stack(); if (!graph_lock()) { @@ -1071,7 +1071,7 @@ print_circular_bug_entry(struct lock_list *target, int depth) return 0; printk("\n-> #%u", depth); print_lock_name(target->class); - printk(":\n"); + printk(KERN_CONT ":\n"); print_stack_trace(&target->trace, 6); return 0; @@ -1102,11 +1102,11 @@ print_circular_lock_scenario(struct held_lock *src, if (parent != source) { printk("Chain exists of:\n "); __print_lock_name(source); - printk(" --> "); + printk(KERN_CONT " --> "); __print_lock_name(parent); - printk(" --> "); + printk(KERN_CONT " --> "); __print_lock_name(target); - printk("\n\n"); + printk(KERN_CONT "\n\n"); } printk(" Possible unsafe locking scenario:\n\n"); @@ -1114,16 +1114,16 @@ print_circular_lock_scenario(struct held_lock *src, printk(" ---- ----\n"); printk(" lock("); __print_lock_name(target); - printk(");\n"); + printk(KERN_CONT ");\n"); printk(" lock("); __print_lock_name(parent); - printk(");\n"); + printk(KERN_CONT ");\n"); printk(" lock("); __print_lock_name(target); - printk(");\n"); + printk(KERN_CONT ");\n"); printk(" lock("); __print_lock_name(source); - printk(");\n"); + printk(KERN_CONT ");\n"); printk("\n *** DEADLOCK ***\n\n"); } @@ -1359,22 +1359,22 @@ static void print_lock_class_header(struct lock_class *class, int depth) printk("%*s->", depth, ""); print_lock_name(class); - printk(" ops: %lu", class->ops); - printk(" {\n"); + printk(KERN_CONT " ops: %lu", class->ops); + printk(KERN_CONT " {\n"); for (bit = 0; bit < LOCK_USAGE_STATES; bit++) { if (class->usage_mask & (1 << bit)) { int len = depth; len += printk("%*s %s", depth, "", usage_str[bit]); - len += printk(" at:\n"); + len += printk(KERN_CONT " at:\n"); print_stack_trace(class->usage_traces + bit, len); } } printk("%*s }\n", depth, ""); - printk("%*s ... key at: ",depth,""); - print_ip_sym((unsigned long)class->key); + printk("%*s ... key at: [<%p>] %pS\n", + depth, "", class->key, class->key); } /* @@ -1437,11 +1437,11 @@ print_irq_lock_scenario(struct lock_list *safe_entry, if (middle_class != unsafe_class) { printk("Chain exists of:\n "); __print_lock_name(safe_class); - printk(" --> "); + printk(KERN_CONT " --> "); __print_lock_name(middle_class); - printk(" --> "); + printk(KERN_CONT " --> "); __print_lock_name(unsafe_class); - printk("\n\n"); + printk(KERN_CONT "\n\n"); } printk(" Possible interrupt unsafe locking scenario:\n\n"); @@ -1449,18 +1449,18 @@ print_irq_lock_scenario(struct lock_list *safe_entry, printk(" ---- ----\n"); printk(" lock("); __print_lock_name(unsafe_class); - printk(");\n"); + printk(KERN_CONT ");\n"); printk(" local_irq_disable();\n"); printk(" lock("); __print_lock_name(safe_class); - printk(");\n"); + printk(KERN_CONT ");\n"); printk(" lock("); __print_lock_name(middle_class); - printk(");\n"); + printk(KERN_CONT ");\n"); printk(" \n"); printk(" lock("); __print_lock_name(safe_class); - printk(");\n"); + printk(KERN_CONT ");\n"); printk("\n *** DEADLOCK ***\n\n"); } @@ -1497,9 +1497,9 @@ print_bad_irq_dependency(struct task_struct *curr, print_lock(prev); printk("which would create a new lock dependency:\n"); print_lock_name(hlock_class(prev)); - printk(" ->"); + printk(KERN_CONT " ->"); print_lock_name(hlock_class(next)); - printk("\n"); + printk(KERN_CONT "\n"); printk("\nbut this new dependency connects a %s-irq-safe lock:\n", irqclass); @@ -1521,8 +1521,7 @@ print_bad_irq_dependency(struct task_struct *curr, lockdep_print_held_locks(curr); - printk("\nthe dependencies between %s-irq-safe lock", irqclass); - printk(" and the holding lock:\n"); + printk("\nthe dependencies between %s-irq-safe lock and the holding lock:\n", irqclass); if (!save_trace(&prev_root->trace)) return 0; print_shortest_lock_dependencies(backwards_entry, prev_root); @@ -1694,10 +1693,10 @@ print_deadlock_scenario(struct held_lock *nxt, printk(" ----\n"); printk(" lock("); __print_lock_name(prev); - printk(");\n"); + printk(KERN_CONT ");\n"); printk(" lock("); __print_lock_name(next); - printk(");\n"); + printk(KERN_CONT ");\n"); printk("\n *** DEADLOCK ***\n\n"); printk(" May be due to missing lock nesting notation\n\n"); } @@ -1891,9 +1890,9 @@ check_prev_add(struct task_struct *curr, struct held_lock *prev, graph_unlock(); printk("\n new dependency: "); print_lock_name(hlock_class(prev)); - printk(" => "); + printk(KERN_CONT " => "); print_lock_name(hlock_class(next)); - printk("\n"); + printk(KERN_CONT "\n"); dump_stack(); return graph_lock(); } @@ -2343,11 +2342,11 @@ print_usage_bug_scenario(struct held_lock *lock) printk(" ----\n"); printk(" lock("); __print_lock_name(class); - printk(");\n"); + printk(KERN_CONT ");\n"); printk(" \n"); printk(" lock("); __print_lock_name(class); - printk(");\n"); + printk(KERN_CONT ");\n"); printk("\n *** DEADLOCK ***\n\n"); } @@ -2522,14 +2521,18 @@ check_usage_backwards(struct task_struct *curr, struct held_lock *this, void print_irqtrace_events(struct task_struct *curr) { printk("irq event stamp: %u\n", curr->irq_events); - printk("hardirqs last enabled at (%u): ", curr->hardirq_enable_event); - print_ip_sym(curr->hardirq_enable_ip); - printk("hardirqs last disabled at (%u): ", curr->hardirq_disable_event); - print_ip_sym(curr->hardirq_disable_ip); - printk("softirqs last enabled at (%u): ", curr->softirq_enable_event); - print_ip_sym(curr->softirq_enable_ip); - printk("softirqs last disabled at (%u): ", curr->softirq_disable_event); - print_ip_sym(curr->softirq_disable_ip); + printk("hardirqs last enabled at (%u): [<%p>] %pS\n", + curr->hardirq_enable_event, (void *)curr->hardirq_enable_ip, + (void *)curr->hardirq_enable_ip); + printk("hardirqs last disabled at (%u): [<%p>] %pS\n", + curr->hardirq_disable_event, (void *)curr->hardirq_disable_ip, + (void *)curr->hardirq_disable_ip); + printk("softirqs last enabled at (%u): [<%p>] %pS\n", + curr->softirq_enable_event, (void *)curr->softirq_enable_ip, + (void *)curr->softirq_enable_ip); + printk("softirqs last disabled at (%u): [<%p>] %pS\n", + curr->softirq_disable_event, (void *)curr->softirq_disable_ip, + (void *)curr->softirq_disable_ip); } static int HARDIRQ_verbose(struct lock_class *class) @@ -3235,8 +3238,8 @@ static int __lock_acquire(struct lockdep_map *lock, unsigned int subclass, if (very_verbose(class)) { printk("\nacquire class [%p] %s", class->key, class->name); if (class->name_version > 1) - printk("#%d", class->name_version); - printk("\n"); + printk(KERN_CONT "#%d", class->name_version); + printk(KERN_CONT "\n"); dump_stack(); } @@ -3378,7 +3381,7 @@ print_unlock_imbalance_bug(struct task_struct *curr, struct lockdep_map *lock, printk("%s/%d is trying to release lock (", curr->comm, task_pid_nr(curr)); print_lockdep_cache(lock); - printk(") at:\n"); + printk(KERN_CONT ") at:\n"); print_ip_sym(ip); printk("but there are no more locks to release!\n"); printk("\nother info that might help us debug this:\n"); @@ -3871,7 +3874,7 @@ print_lock_contention_bug(struct task_struct *curr, struct lockdep_map *lock, printk("%s/%d is trying to contend lock (", curr->comm, task_pid_nr(curr)); print_lockdep_cache(lock); - printk(") at:\n"); + printk(KERN_CONT ") at:\n"); print_ip_sym(ip); printk("but there are no locks held!\n"); printk("\nother info that might help us debug this:\n"); -- cgit v1.3-14-g43fede