From e72246748ff006ab928bc774e276e6ef5542f9c5 Mon Sep 17 00:00:00 2001 From: Tim Chen Date: Tue, 21 Jan 2014 15:36:00 -0800 Subject: locking/mutexes/mcs: Restructure the MCS lock defines and locking code into its own file We will need the MCS lock code for doing optimistic spinning for rwsem and queued rwlock. Extracting the MCS code from mutex.c and put into its own file allow us to reuse this code easily. We also inline mcs_spin_lock and mcs_spin_unlock functions for better efficiency. Note that using the smp_load_acquire/smp_store_release pair used in mcs_lock and mcs_unlock is not sufficient to form a full memory barrier across cpus for many architectures (except x86). For applications that absolutely need a full barrier across multiple cpus with mcs_unlock and mcs_lock pair, smp_mb__after_unlock_lock() should be used after mcs_lock. Reviewed-by: Paul E. McKenney Signed-off-by: Tim Chen Signed-off-by: Davidlohr Bueso Signed-off-by: Peter Zijlstra Cc: Linus Torvalds Cc: Andrew Morton Cc: Thomas Gleixner Link: http://lkml.kernel.org/r/1390347360.3138.63.camel@schen9-DESK Signed-off-by: Ingo Molnar --- include/linux/mcs_spinlock.h | 77 ++++++++++++++++++++++++++++++++++++++++++++ include/linux/mutex.h | 5 +-- 2 files changed, 80 insertions(+), 2 deletions(-) create mode 100644 include/linux/mcs_spinlock.h (limited to 'include/linux') diff --git a/include/linux/mcs_spinlock.h b/include/linux/mcs_spinlock.h new file mode 100644 index 000000000000..9578ef81940b --- /dev/null +++ b/include/linux/mcs_spinlock.h @@ -0,0 +1,77 @@ +/* + * MCS lock defines + * + * This file contains the main data structure and API definitions of MCS lock. + * + * The MCS lock (proposed by Mellor-Crummey and Scott) is a simple spin-lock + * with the desirable properties of being fair, and with each cpu trying + * to acquire the lock spinning on a local variable. + * It avoids expensive cache bouncings that common test-and-set spin-lock + * implementations incur. + */ +#ifndef __LINUX_MCS_SPINLOCK_H +#define __LINUX_MCS_SPINLOCK_H + +struct mcs_spinlock { + struct mcs_spinlock *next; + int locked; /* 1 if lock acquired */ +}; + +/* + * Note: the smp_load_acquire/smp_store_release pair is not + * sufficient to form a full memory barrier across + * cpus for many architectures (except x86) for mcs_unlock and mcs_lock. + * For applications that need a full barrier across multiple cpus + * with mcs_unlock and mcs_lock pair, smp_mb__after_unlock_lock() should be + * used after mcs_lock. + */ +static inline +void mcs_spin_lock(struct mcs_spinlock **lock, struct mcs_spinlock *node) +{ + struct mcs_spinlock *prev; + + /* Init node */ + node->locked = 0; + node->next = NULL; + + prev = xchg(lock, node); + if (likely(prev == NULL)) { + /* Lock acquired */ + node->locked = 1; + return; + } + ACCESS_ONCE(prev->next) = node; + /* + * Wait until the lock holder passes the lock down. + * Using smp_load_acquire() provides a memory barrier that + * ensures subsequent operations happen after the lock is acquired. + */ + while (!(smp_load_acquire(&node->locked))) + arch_mutex_cpu_relax(); +} + +static inline +void mcs_spin_unlock(struct mcs_spinlock **lock, struct mcs_spinlock *node) +{ + struct mcs_spinlock *next = ACCESS_ONCE(node->next); + + if (likely(!next)) { + /* + * Release the lock by setting it to NULL + */ + if (cmpxchg(lock, node, NULL) == node) + return; + /* Wait until the next pointer is set */ + while (!(next = ACCESS_ONCE(node->next))) + arch_mutex_cpu_relax(); + } + /* + * Pass lock to next waiter. + * smp_store_release() provides a memory barrier to ensure + * all operations in the critical section has been completed + * before unlocking. + */ + smp_store_release(&next->locked, 1); +} + +#endif /* __LINUX_MCS_SPINLOCK_H */ diff --git a/include/linux/mutex.h b/include/linux/mutex.h index d3181936c138..c482e1d2cc49 100644 --- a/include/linux/mutex.h +++ b/include/linux/mutex.h @@ -46,6 +46,7 @@ * - detects multi-task circular deadlocks and prints out all affected * locks and tasks (and only those tasks) */ +struct mcs_spinlock; struct mutex { /* 1: unlocked, 0: locked, negative: locked, possible waiters */ atomic_t count; @@ -55,7 +56,7 @@ struct mutex { struct task_struct *owner; #endif #ifdef CONFIG_MUTEX_SPIN_ON_OWNER - void *spin_mlock; /* Spinner MCS lock */ + struct mcs_spinlock *mcs_lock; /* Spinner MCS lock */ #endif #ifdef CONFIG_DEBUG_MUTEXES const char *name; @@ -179,4 +180,4 @@ extern int atomic_dec_and_mutex_lock(atomic_t *cnt, struct mutex *lock); # define arch_mutex_cpu_relax() cpu_relax() #endif -#endif +#endif /* __LINUX_MUTEX_H */ -- cgit v1.2.3-59-g8ed1b From 5faeb8adb956a5ad6579c4e309e8689943ad8294 Mon Sep 17 00:00:00 2001 From: Jason Low Date: Tue, 21 Jan 2014 15:36:05 -0800 Subject: locking/mcs: Micro-optimize the MCS code, add extra comments Remove unnecessary operation to assign locked status to 1 if lock is acquired without contention. Lock status will not be checked by lock holder again once it is acquired and any lock contenders will not be looking at the lock holder's lock status. Make the cmpxchg(lock, node, NULL) == node check in mcs_spin_unlock() likely() as it is likely that a race did not occur most of the time. Also add in more comments describing how the local node is used in MCS locks. Reviewed-by: Paul E. McKenney Reviewed-by: Tim Chen Signed-off-by: Jason Low Signed-off-by: Tim Chen Signed-off-by: Peter Zijlstra Cc: Linus Torvalds Cc: Andrew Morton Link: http://lkml.kernel.org/r/1390347365.3138.64.camel@schen9-DESK Signed-off-by: Ingo Molnar --- include/linux/mcs_spinlock.h | 27 ++++++++++++++++++++++++--- 1 file changed, 24 insertions(+), 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/mcs_spinlock.h b/include/linux/mcs_spinlock.h index 9578ef81940b..143fa428a857 100644 --- a/include/linux/mcs_spinlock.h +++ b/include/linux/mcs_spinlock.h @@ -25,6 +25,17 @@ struct mcs_spinlock { * with mcs_unlock and mcs_lock pair, smp_mb__after_unlock_lock() should be * used after mcs_lock. */ + +/* + * In order to acquire the lock, the caller should declare a local node and + * pass a reference of the node to this function in addition to the lock. + * If the lock has already been acquired, then this will proceed to spin + * on this node->locked until the previous lock holder sets the node->locked + * in mcs_spin_unlock(). + * + * We don't inline mcs_spin_lock() so that perf can correctly account for the + * time spent in this lock function. + */ static inline void mcs_spin_lock(struct mcs_spinlock **lock, struct mcs_spinlock *node) { @@ -36,8 +47,14 @@ void mcs_spin_lock(struct mcs_spinlock **lock, struct mcs_spinlock *node) prev = xchg(lock, node); if (likely(prev == NULL)) { - /* Lock acquired */ - node->locked = 1; + /* + * Lock acquired, don't need to set node->locked to 1. Threads + * only spin on its own node->locked value for lock acquisition. + * However, since this thread can immediately acquire the lock + * and does not proceed to spin on its own node->locked, this + * value won't be used. If a debug mode is needed to + * audit lock status, then set node->locked value here. + */ return; } ACCESS_ONCE(prev->next) = node; @@ -50,6 +67,10 @@ void mcs_spin_lock(struct mcs_spinlock **lock, struct mcs_spinlock *node) arch_mutex_cpu_relax(); } +/* + * Releases the lock. The caller should pass in the corresponding node that + * was used to acquire the lock. + */ static inline void mcs_spin_unlock(struct mcs_spinlock **lock, struct mcs_spinlock *node) { @@ -59,7 +80,7 @@ void mcs_spin_unlock(struct mcs_spinlock **lock, struct mcs_spinlock *node) /* * Release the lock by setting it to NULL */ - if (cmpxchg(lock, node, NULL) == node) + if (likely(cmpxchg(lock, node, NULL) == node)) return; /* Wait until the next pointer is set */ while (!(next = ACCESS_ONCE(node->next))) -- cgit v1.2.3-59-g8ed1b From e207552e64ea053a33e856828ad7915484911d06 Mon Sep 17 00:00:00 2001 From: Will Deacon Date: Tue, 21 Jan 2014 15:36:10 -0800 Subject: locking/mcs: Allow architectures to hook in to contended paths When contended, architectures may be able to reduce the polling overhead in ways which aren't expressible using a simple relax() primitive. This patch allows architectures to hook into the mcs_{lock,unlock} functions for the contended cases only. Reviewed-by: Paul E. McKenney Signed-off-by: Will Deacon Signed-off-by: Tim Chen Signed-off-by: Peter Zijlstra Cc: Linus Torvalds Cc: Andrew Morton Link: http://lkml.kernel.org/r/1390347370.3138.65.camel@schen9-DESK Signed-off-by: Ingo Molnar --- include/linux/mcs_spinlock.h | 42 ++++++++++++++++++++++++++++-------------- 1 file changed, 28 insertions(+), 14 deletions(-) (limited to 'include/linux') diff --git a/include/linux/mcs_spinlock.h b/include/linux/mcs_spinlock.h index 143fa428a857..e9a4d74c63dc 100644 --- a/include/linux/mcs_spinlock.h +++ b/include/linux/mcs_spinlock.h @@ -17,6 +17,28 @@ struct mcs_spinlock { int locked; /* 1 if lock acquired */ }; +#ifndef arch_mcs_spin_lock_contended +/* + * Using smp_load_acquire() provides a memory barrier that ensures + * subsequent operations happen after the lock is acquired. + */ +#define arch_mcs_spin_lock_contended(l) \ +do { \ + while (!(smp_load_acquire(l))) \ + arch_mutex_cpu_relax(); \ +} while (0) +#endif + +#ifndef arch_mcs_spin_unlock_contended +/* + * smp_store_release() provides a memory barrier to ensure all + * operations in the critical section has been completed before + * unlocking. + */ +#define arch_mcs_spin_unlock_contended(l) \ + smp_store_release((l), 1) +#endif + /* * Note: the smp_load_acquire/smp_store_release pair is not * sufficient to form a full memory barrier across @@ -58,13 +80,9 @@ void mcs_spin_lock(struct mcs_spinlock **lock, struct mcs_spinlock *node) return; } ACCESS_ONCE(prev->next) = node; - /* - * Wait until the lock holder passes the lock down. - * Using smp_load_acquire() provides a memory barrier that - * ensures subsequent operations happen after the lock is acquired. - */ - while (!(smp_load_acquire(&node->locked))) - arch_mutex_cpu_relax(); + + /* Wait until the lock holder passes the lock down. */ + arch_mcs_spin_lock_contended(&node->locked); } /* @@ -86,13 +104,9 @@ void mcs_spin_unlock(struct mcs_spinlock **lock, struct mcs_spinlock *node) while (!(next = ACCESS_ONCE(node->next))) arch_mutex_cpu_relax(); } - /* - * Pass lock to next waiter. - * smp_store_release() provides a memory barrier to ensure - * all operations in the critical section has been completed - * before unlocking. - */ - smp_store_release(&next->locked, 1); + + /* Pass lock to next waiter. */ + arch_mcs_spin_unlock_contended(&next->locked); } #endif /* __LINUX_MCS_SPINLOCK_H */ -- cgit v1.2.3-59-g8ed1b From 980f88e414418bf65569a3b62b08b07e6fc2f4c6 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Thu, 6 Feb 2014 17:28:41 +0100 Subject: sched/wait: Suppress Sparse 'variable shadowing' warning This warning seems to show up a lot now, since ___wait_event() is (indirectly) used inside wait_event_timeout(), which also has a variable called __ret. Rename the one in ___wait_event() to ___ret (another leading underscore) to suppress the warning. Signed-off-by: Johannes Berg Signed-off-by: Peter Zijlstra Cc: Steven Rostedt Cc: Andrew Morton Link: http://lkml.kernel.org/r/1391704121.12789.20.camel@jlt4.sipsolutions.net Signed-off-by: Ingo Molnar --- include/linux/wait.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/wait.h b/include/linux/wait.h index 559044c79232..c55ea5c24404 100644 --- a/include/linux/wait.h +++ b/include/linux/wait.h @@ -195,7 +195,7 @@ wait_queue_head_t *bit_waitqueue(void *, int); ({ \ __label__ __out; \ wait_queue_t __wait; \ - long __ret = ret; \ + long ___ret = ret; \ \ INIT_LIST_HEAD(&__wait.task_list); \ if (exclusive) \ @@ -210,7 +210,7 @@ wait_queue_head_t *bit_waitqueue(void *, int); break; \ \ if (___wait_is_interruptible(state) && __int) { \ - __ret = __int; \ + ___ret = __int; \ if (exclusive) { \ abort_exclusive_wait(&wq, &__wait, \ state, NULL); \ @@ -222,7 +222,7 @@ wait_queue_head_t *bit_waitqueue(void *, int); cmd; \ } \ finish_wait(&wq, &__wait); \ -__out: __ret; \ +__out: ___ret; \ }) #define __wait_event(wq, condition) \ -- cgit v1.2.3-59-g8ed1b From ddf1d169c0a489d498c1799a7043904a43b0c159 Mon Sep 17 00:00:00 2001 From: Tim Chen Date: Tue, 21 Jan 2014 15:36:22 -0800 Subject: locking/mcs: Allow architecture specific asm files to be used for contended case This patch allows each architecture to add its specific assembly optimized arch_mcs_spin_lock_contended and arch_mcs_spinlock_uncontended for MCS lock and unlock functions. Signed-off-by: Tim Chen Cc: Scott J Norton Cc: Raghavendra K T Cc: AswinChandramouleeswaran Cc: George Spelvin Cc: Rik vanRiel Cc: Andrea Arcangeli Cc: MichelLespinasse Cc: Peter Hurley Cc: Andi Kleen Cc: Alex Shi Cc: Dave Hansen Cc: Tim Chen Cc: Arnd Bergmann Cc: "Figo.zhang" Cc: "Paul E.McKenney" Cc: "H. Peter Anvin" Cc: Davidlohr Bueso Cc: Waiman Long Cc: Ingo Molnar Cc: Will Deacon Cc: Andrew Morton Cc: Linus Torvalds Cc: Matthew R Wilcox Signed-off-by: Peter Zijlstra Link: http://lkml.kernel.org/r/1390347382.3138.67.camel@schen9-DESK Signed-off-by: Ingo Molnar --- arch/alpha/include/asm/Kbuild | 1 + arch/arc/include/asm/Kbuild | 1 + arch/arm/include/asm/Kbuild | 1 + arch/arm64/include/asm/Kbuild | 1 + arch/avr32/include/asm/Kbuild | 1 + arch/blackfin/include/asm/Kbuild | 1 + arch/c6x/include/asm/Kbuild | 1 + arch/cris/include/asm/Kbuild | 1 + arch/frv/include/asm/Kbuild | 1 + arch/hexagon/include/asm/Kbuild | 1 + arch/ia64/include/asm/Kbuild | 1 + arch/m32r/include/asm/Kbuild | 1 + arch/m68k/include/asm/Kbuild | 1 + arch/metag/include/asm/Kbuild | 1 + arch/microblaze/include/asm/Kbuild | 1 + arch/mips/include/asm/Kbuild | 1 + arch/mn10300/include/asm/Kbuild | 1 + arch/openrisc/include/asm/Kbuild | 1 + arch/parisc/include/asm/Kbuild | 1 + arch/powerpc/include/asm/Kbuild | 1 + arch/s390/include/asm/Kbuild | 1 + arch/score/include/asm/Kbuild | 1 + arch/sh/include/asm/Kbuild | 1 + arch/sparc/include/asm/Kbuild | 1 + arch/tile/include/asm/Kbuild | 1 + arch/um/include/asm/Kbuild | 1 + arch/unicore32/include/asm/Kbuild | 1 + arch/x86/include/asm/Kbuild | 1 + arch/xtensa/include/asm/Kbuild | 1 + include/asm-generic/mcs_spinlock.h | 13 +++++++++++++ include/linux/mcs_spinlock.h | 2 ++ 31 files changed, 44 insertions(+) create mode 100644 include/asm-generic/mcs_spinlock.h (limited to 'include/linux') diff --git a/arch/alpha/include/asm/Kbuild b/arch/alpha/include/asm/Kbuild index 55a2b0395d48..7736f426ff3b 100644 --- a/arch/alpha/include/asm/Kbuild +++ b/arch/alpha/include/asm/Kbuild @@ -3,5 +3,6 @@ generic-y += clkdev.h generic-y += exec.h generic-y += hash.h +generic-y += mcs_spinlock.h generic-y += preempt.h generic-y += trace_clock.h diff --git a/arch/arc/include/asm/Kbuild b/arch/arc/include/asm/Kbuild index 54ceb91c7ce0..e76fd79f32b0 100644 --- a/arch/arc/include/asm/Kbuild +++ b/arch/arc/include/asm/Kbuild @@ -22,6 +22,7 @@ generic-y += kmap_types.h generic-y += kvm_para.h generic-y += local.h generic-y += local64.h +generic-y += mcs_spinlock.h generic-y += mman.h generic-y += msgbuf.h generic-y += param.h diff --git a/arch/arm/include/asm/Kbuild b/arch/arm/include/asm/Kbuild index 3b2092049b51..23e728ecf8ab 100644 --- a/arch/arm/include/asm/Kbuild +++ b/arch/arm/include/asm/Kbuild @@ -14,6 +14,7 @@ generic-y += irq_regs.h generic-y += kdebug.h generic-y += local.h generic-y += local64.h +generic-y += mcs_spinlock.h generic-y += msgbuf.h generic-y += param.h generic-y += parport.h diff --git a/arch/arm64/include/asm/Kbuild b/arch/arm64/include/asm/Kbuild index 54038befa0a4..3bdfdda70567 100644 --- a/arch/arm64/include/asm/Kbuild +++ b/arch/arm64/include/asm/Kbuild @@ -23,6 +23,7 @@ generic-y += kmap_types.h generic-y += kvm_para.h generic-y += local.h generic-y += local64.h +generic-y += mcs_spinlock.h generic-y += mman.h generic-y += msgbuf.h generic-y += mutex.h diff --git a/arch/avr32/include/asm/Kbuild b/arch/avr32/include/asm/Kbuild index 064ca3c058ff..8b398ff96974 100644 --- a/arch/avr32/include/asm/Kbuild +++ b/arch/avr32/include/asm/Kbuild @@ -11,6 +11,7 @@ generic-y += hash.h generic-y += irq_regs.h generic-y += local.h generic-y += local64.h +generic-y += mcs_spinlock.h generic-y += param.h generic-y += percpu.h generic-y += preempt.h diff --git a/arch/blackfin/include/asm/Kbuild b/arch/blackfin/include/asm/Kbuild index ca30a3dc47e7..0d93b9a79ca9 100644 --- a/arch/blackfin/include/asm/Kbuild +++ b/arch/blackfin/include/asm/Kbuild @@ -20,6 +20,7 @@ generic-y += kmap_types.h generic-y += kvm_para.h generic-y += local.h generic-y += local64.h +generic-y += mcs_spinlock.h generic-y += mman.h generic-y += msgbuf.h generic-y += mutex.h diff --git a/arch/c6x/include/asm/Kbuild b/arch/c6x/include/asm/Kbuild index dea0671524a1..8dbdce8421b0 100644 --- a/arch/c6x/include/asm/Kbuild +++ b/arch/c6x/include/asm/Kbuild @@ -25,6 +25,7 @@ generic-y += irq_regs.h generic-y += kdebug.h generic-y += kmap_types.h generic-y += local.h +generic-y += mcs_spinlock.h generic-y += mman.h generic-y += mmu.h generic-y += mmu_context.h diff --git a/arch/cris/include/asm/Kbuild b/arch/cris/include/asm/Kbuild index 7f8d1b174a34..056027f38351 100644 --- a/arch/cris/include/asm/Kbuild +++ b/arch/cris/include/asm/Kbuild @@ -9,6 +9,7 @@ generic-y += exec.h generic-y += hash.h generic-y += kvm_para.h generic-y += linkage.h +generic-y += mcs_spinlock.h generic-y += module.h generic-y += preempt.h generic-y += trace_clock.h diff --git a/arch/frv/include/asm/Kbuild b/arch/frv/include/asm/Kbuild index 93428d035cfb..babb9338ebf8 100644 --- a/arch/frv/include/asm/Kbuild +++ b/arch/frv/include/asm/Kbuild @@ -2,5 +2,6 @@ generic-y += clkdev.h generic-y += exec.h generic-y += hash.h +generic-y += mcs_spinlock.h generic-y += preempt.h generic-y += trace_clock.h diff --git a/arch/hexagon/include/asm/Kbuild b/arch/hexagon/include/asm/Kbuild index 9f0eda48643a..eadcc118f950 100644 --- a/arch/hexagon/include/asm/Kbuild +++ b/arch/hexagon/include/asm/Kbuild @@ -27,6 +27,7 @@ generic-y += kdebug.h generic-y += kmap_types.h generic-y += local.h generic-y += local64.h +generic-y += mcs_spinlock.h generic-y += mman.h generic-y += msgbuf.h generic-y += pci.h diff --git a/arch/ia64/include/asm/Kbuild b/arch/ia64/include/asm/Kbuild index 81ebef7f1a87..0da4aa2602ae 100644 --- a/arch/ia64/include/asm/Kbuild +++ b/arch/ia64/include/asm/Kbuild @@ -3,6 +3,7 @@ generic-y += clkdev.h generic-y += exec.h generic-y += hash.h generic-y += kvm_para.h +generic-y += mcs_spinlock.h generic-y += preempt.h generic-y += trace_clock.h generic-y += vtime.h diff --git a/arch/m32r/include/asm/Kbuild b/arch/m32r/include/asm/Kbuild index 87042215b543..5825a35b2c56 100644 --- a/arch/m32r/include/asm/Kbuild +++ b/arch/m32r/include/asm/Kbuild @@ -2,6 +2,7 @@ generic-y += clkdev.h generic-y += exec.h generic-y += hash.h +generic-y += mcs_spinlock.h generic-y += module.h generic-y += preempt.h generic-y += trace_clock.h diff --git a/arch/m68k/include/asm/Kbuild b/arch/m68k/include/asm/Kbuild index f44ac8f9536a..86b4c178cc75 100644 --- a/arch/m68k/include/asm/Kbuild +++ b/arch/m68k/include/asm/Kbuild @@ -16,6 +16,7 @@ generic-y += kmap_types.h generic-y += kvm_para.h generic-y += local.h generic-y += local64.h +generic-y += mcs_spinlock.h generic-y += mman.h generic-y += mutex.h generic-y += percpu.h diff --git a/arch/metag/include/asm/Kbuild b/arch/metag/include/asm/Kbuild index ac22144e92a3..c29ead89a317 100644 --- a/arch/metag/include/asm/Kbuild +++ b/arch/metag/include/asm/Kbuild @@ -24,6 +24,7 @@ generic-y += kmap_types.h generic-y += kvm_para.h generic-y += local.h generic-y += local64.h +generic-y += mcs_spinlock.h generic-y += msgbuf.h generic-y += mutex.h generic-y += param.h diff --git a/arch/microblaze/include/asm/Kbuild b/arch/microblaze/include/asm/Kbuild index 4b5f12259d81..1f590ab8f323 100644 --- a/arch/microblaze/include/asm/Kbuild +++ b/arch/microblaze/include/asm/Kbuild @@ -3,6 +3,7 @@ generic-y += barrier.h generic-y += clkdev.h generic-y += exec.h generic-y += hash.h +generic-y += mcs_spinlock.h generic-y += preempt.h generic-y += syscalls.h generic-y += trace_clock.h diff --git a/arch/mips/include/asm/Kbuild b/arch/mips/include/asm/Kbuild index 82ee17a68ae0..05439187891d 100644 --- a/arch/mips/include/asm/Kbuild +++ b/arch/mips/include/asm/Kbuild @@ -4,6 +4,7 @@ generic-y += current.h generic-y += emergency-restart.h generic-y += hash.h generic-y += local64.h +generic-y += mcs_spinlock.h generic-y += mutex.h generic-y += parport.h generic-y += percpu.h diff --git a/arch/mn10300/include/asm/Kbuild b/arch/mn10300/include/asm/Kbuild index da3b2f50c07e..cbc6b9bf45da 100644 --- a/arch/mn10300/include/asm/Kbuild +++ b/arch/mn10300/include/asm/Kbuild @@ -3,5 +3,6 @@ generic-y += barrier.h generic-y += clkdev.h generic-y += exec.h generic-y += hash.h +generic-y += mcs_spinlock.h generic-y += preempt.h generic-y += trace_clock.h diff --git a/arch/openrisc/include/asm/Kbuild b/arch/openrisc/include/asm/Kbuild index f62a67a15055..480af0d9c2f5 100644 --- a/arch/openrisc/include/asm/Kbuild +++ b/arch/openrisc/include/asm/Kbuild @@ -35,6 +35,7 @@ generic-y += kdebug.h generic-y += kmap_types.h generic-y += kvm_para.h generic-y += local.h +generic-y += mcs_spinlock.h generic-y += mman.h generic-y += module.h generic-y += msgbuf.h diff --git a/arch/parisc/include/asm/Kbuild b/arch/parisc/include/asm/Kbuild index 958b79aac24f..ecf25e6678ad 100644 --- a/arch/parisc/include/asm/Kbuild +++ b/arch/parisc/include/asm/Kbuild @@ -14,6 +14,7 @@ generic-y += kdebug.h generic-y += kvm_para.h generic-y += local.h generic-y += local64.h +generic-y += mcs_spinlock.h generic-y += mutex.h generic-y += param.h generic-y += percpu.h diff --git a/arch/powerpc/include/asm/Kbuild b/arch/powerpc/include/asm/Kbuild index b618c416d3d9..3fb1bc432f4f 100644 --- a/arch/powerpc/include/asm/Kbuild +++ b/arch/powerpc/include/asm/Kbuild @@ -1,6 +1,7 @@ generic-y += clkdev.h generic-y += hash.h +generic-y += mcs_spinlock.h generic-y += preempt.h generic-y += rwsem.h generic-y += trace_clock.h diff --git a/arch/s390/include/asm/Kbuild b/arch/s390/include/asm/Kbuild index 482f492f0834..57892a8a9055 100644 --- a/arch/s390/include/asm/Kbuild +++ b/arch/s390/include/asm/Kbuild @@ -2,5 +2,6 @@ generic-y += clkdev.h generic-y += hash.h +generic-y += mcs_spinlock.h generic-y += preempt.h generic-y += trace_clock.h diff --git a/arch/score/include/asm/Kbuild b/arch/score/include/asm/Kbuild index e92319fb146d..4630cf217b5b 100644 --- a/arch/score/include/asm/Kbuild +++ b/arch/score/include/asm/Kbuild @@ -5,6 +5,7 @@ header-y += generic-y += barrier.h generic-y += clkdev.h generic-y += hash.h +generic-y += mcs_spinlock.h generic-y += preempt.h generic-y += trace_clock.h generic-y += xor.h diff --git a/arch/sh/include/asm/Kbuild b/arch/sh/include/asm/Kbuild index 85c214e456f9..c19e47dacb31 100644 --- a/arch/sh/include/asm/Kbuild +++ b/arch/sh/include/asm/Kbuild @@ -15,6 +15,7 @@ generic-y += irq_regs.h generic-y += kvm_para.h generic-y += local.h generic-y += local64.h +generic-y += mcs_spinlock.h generic-y += mman.h generic-y += msgbuf.h generic-y += param.h diff --git a/arch/sparc/include/asm/Kbuild b/arch/sparc/include/asm/Kbuild index 82150287bfa4..a45821818003 100644 --- a/arch/sparc/include/asm/Kbuild +++ b/arch/sparc/include/asm/Kbuild @@ -11,6 +11,7 @@ generic-y += irq_regs.h generic-y += linkage.h generic-y += local.h generic-y += local64.h +generic-y += mcs_spinlock.h generic-y += module.h generic-y += mutex.h generic-y += preempt.h diff --git a/arch/tile/include/asm/Kbuild b/arch/tile/include/asm/Kbuild index e0b1ed967737..0aa5675e7025 100644 --- a/arch/tile/include/asm/Kbuild +++ b/arch/tile/include/asm/Kbuild @@ -19,6 +19,7 @@ generic-y += ipcbuf.h generic-y += irq_regs.h generic-y += local.h generic-y += local64.h +generic-y += mcs_spinlock.h generic-y += msgbuf.h generic-y += mutex.h generic-y += param.h diff --git a/arch/um/include/asm/Kbuild b/arch/um/include/asm/Kbuild index 215e05d8d4b8..a5e4b6068213 100644 --- a/arch/um/include/asm/Kbuild +++ b/arch/um/include/asm/Kbuild @@ -15,6 +15,7 @@ generic-y += hw_irq.h generic-y += io.h generic-y += irq_regs.h generic-y += kdebug.h +generic-y += mcs_spinlock.h generic-y += mutex.h generic-y += param.h generic-y += pci.h diff --git a/arch/unicore32/include/asm/Kbuild b/arch/unicore32/include/asm/Kbuild index ad2c737a9cab..1e5fb872a4aa 100644 --- a/arch/unicore32/include/asm/Kbuild +++ b/arch/unicore32/include/asm/Kbuild @@ -25,6 +25,7 @@ generic-y += irq_regs.h generic-y += kdebug.h generic-y += kmap_types.h generic-y += local.h +generic-y += mcs_spinlock.h generic-y += mman.h generic-y += module.h generic-y += msgbuf.h diff --git a/arch/x86/include/asm/Kbuild b/arch/x86/include/asm/Kbuild index 7f669853317a..a8fee078b92f 100644 --- a/arch/x86/include/asm/Kbuild +++ b/arch/x86/include/asm/Kbuild @@ -5,3 +5,4 @@ genhdr-y += unistd_64.h genhdr-y += unistd_x32.h generic-y += clkdev.h +generic-y += mcs_spinlock.h diff --git a/arch/xtensa/include/asm/Kbuild b/arch/xtensa/include/asm/Kbuild index 223280d8e611..c3d20ba6eb86 100644 --- a/arch/xtensa/include/asm/Kbuild +++ b/arch/xtensa/include/asm/Kbuild @@ -18,6 +18,7 @@ generic-y += kvm_para.h generic-y += linkage.h generic-y += local.h generic-y += local64.h +generic-y += mcs_spinlock.h generic-y += percpu.h generic-y += preempt.h generic-y += resource.h diff --git a/include/asm-generic/mcs_spinlock.h b/include/asm-generic/mcs_spinlock.h new file mode 100644 index 000000000000..10cd4ffc6ba2 --- /dev/null +++ b/include/asm-generic/mcs_spinlock.h @@ -0,0 +1,13 @@ +#ifndef __ASM_MCS_SPINLOCK_H +#define __ASM_MCS_SPINLOCK_H + +/* + * Architectures can define their own: + * + * arch_mcs_spin_lock_contended(l) + * arch_mcs_spin_unlock_contended(l) + * + * See kernel/locking/mcs_spinlock.c. + */ + +#endif /* __ASM_MCS_SPINLOCK_H */ diff --git a/include/linux/mcs_spinlock.h b/include/linux/mcs_spinlock.h index e9a4d74c63dc..f2a5c6360083 100644 --- a/include/linux/mcs_spinlock.h +++ b/include/linux/mcs_spinlock.h @@ -12,6 +12,8 @@ #ifndef __LINUX_MCS_SPINLOCK_H #define __LINUX_MCS_SPINLOCK_H +#include + struct mcs_spinlock { struct mcs_spinlock *next; int locked; /* 1 if lock acquired */ -- cgit v1.2.3-59-g8ed1b From fb9edbe98493fcd9df66de926ae9157cbe0e4dcd Mon Sep 17 00:00:00 2001 From: Oleg Nesterov Date: Mon, 20 Jan 2014 19:20:06 +0100 Subject: lockdep: Make held_lock->check and "int check" argument bool The "int check" argument of lock_acquire() and held_lock->check are misleading. This is actually a boolean: 2 means "true", everything else is "false". And there is no need to pass 1 or 0 to lock_acquire() depending on CONFIG_PROVE_LOCKING, __lock_acquire() checks prove_locking at the start and clears "check" if !CONFIG_PROVE_LOCKING. Note: probably we can simply kill this member/arg. The only explicit user of check => 0 is rcu_lock_acquire(), perhaps we can change it to use lock_acquire(trylock =>, read => 2). __lockdep_no_validate means check => 0 implicitly, but we can change validate_chain() to check hlock->instance->key instead. Not to mention it would be nice to get rid of lockdep_set_novalidate_class(). Signed-off-by: Oleg Nesterov Cc: Dave Jones Cc: Greg Kroah-Hartman Cc: Linus Torvalds Cc: Paul McKenney Cc: Steven Rostedt Cc: Alan Stern Cc: Sasha Levin Signed-off-by: Peter Zijlstra Link: http://lkml.kernel.org/r/20140120182006.GA26495@redhat.com Signed-off-by: Ingo Molnar --- drivers/tty/tty_ldsem.c | 15 ++++----------- include/linux/lockdep.h | 25 +++++++++---------------- include/linux/rcupdate.h | 2 +- kernel/locking/lockdep.c | 11 ++++------- 4 files changed, 18 insertions(+), 35 deletions(-) (limited to 'include/linux') diff --git a/drivers/tty/tty_ldsem.c b/drivers/tty/tty_ldsem.c index d8a55e87877f..0ffb0cbe2823 100644 --- a/drivers/tty/tty_ldsem.c +++ b/drivers/tty/tty_ldsem.c @@ -39,17 +39,10 @@ lock_acquire(&(l)->dep_map, s, t, r, c, n, i) # define __rel(l, n, i) \ lock_release(&(l)->dep_map, n, i) -# ifdef CONFIG_PROVE_LOCKING -# define lockdep_acquire(l, s, t, i) __acq(l, s, t, 0, 2, NULL, i) -# define lockdep_acquire_nest(l, s, t, n, i) __acq(l, s, t, 0, 2, n, i) -# define lockdep_acquire_read(l, s, t, i) __acq(l, s, t, 1, 2, NULL, i) -# define lockdep_release(l, n, i) __rel(l, n, i) -# else -# define lockdep_acquire(l, s, t, i) __acq(l, s, t, 0, 1, NULL, i) -# define lockdep_acquire_nest(l, s, t, n, i) __acq(l, s, t, 0, 1, n, i) -# define lockdep_acquire_read(l, s, t, i) __acq(l, s, t, 1, 1, NULL, i) -# define lockdep_release(l, n, i) __rel(l, n, i) -# endif +#define lockdep_acquire(l, s, t, i) __acq(l, s, t, 0, 1, NULL, i) +#define lockdep_acquire_nest(l, s, t, n, i) __acq(l, s, t, 0, 1, n, i) +#define lockdep_acquire_read(l, s, t, i) __acq(l, s, t, 1, 1, NULL, i) +#define lockdep_release(l, n, i) __rel(l, n, i) #else # define lockdep_acquire(l, s, t, i) do { } while (0) # define lockdep_acquire_nest(l, s, t, n, i) do { } while (0) diff --git a/include/linux/lockdep.h b/include/linux/lockdep.h index 92b1bfc5da60..1626047c1f26 100644 --- a/include/linux/lockdep.h +++ b/include/linux/lockdep.h @@ -252,9 +252,9 @@ struct held_lock { unsigned int trylock:1; /* 16 bits */ unsigned int read:2; /* see lock_acquire() comment */ - unsigned int check:2; /* see lock_acquire() comment */ + unsigned int check:1; /* see lock_acquire() comment */ unsigned int hardirqs_off:1; - unsigned int references:11; /* 32 bits */ + unsigned int references:12; /* 32 bits */ }; /* @@ -326,9 +326,8 @@ static inline int lockdep_match_key(struct lockdep_map *lock, * * Values for check: * - * 0: disabled - * 1: simple checks (freeing, held-at-exit-time, etc.) - * 2: full validation + * 0: simple checks (freeing, held-at-exit-time, etc.) + * 1: full validation */ extern void lock_acquire(struct lockdep_map *lock, unsigned int subclass, int trylock, int read, int check, @@ -479,15 +478,9 @@ static inline void print_irqtrace_events(struct task_struct *curr) * on the per lock-class debug mode: */ -#ifdef CONFIG_PROVE_LOCKING - #define lock_acquire_exclusive(l, s, t, n, i) lock_acquire(l, s, t, 0, 2, n, i) - #define lock_acquire_shared(l, s, t, n, i) lock_acquire(l, s, t, 1, 2, n, i) - #define lock_acquire_shared_recursive(l, s, t, n, i) lock_acquire(l, s, t, 2, 2, n, i) -#else - #define lock_acquire_exclusive(l, s, t, n, i) lock_acquire(l, s, t, 0, 1, n, i) - #define lock_acquire_shared(l, s, t, n, i) lock_acquire(l, s, t, 1, 1, n, i) - #define lock_acquire_shared_recursive(l, s, t, n, i) lock_acquire(l, s, t, 2, 1, n, i) -#endif +#define lock_acquire_exclusive(l, s, t, n, i) lock_acquire(l, s, t, 0, 1, n, i) +#define lock_acquire_shared(l, s, t, n, i) lock_acquire(l, s, t, 1, 1, n, i) +#define lock_acquire_shared_recursive(l, s, t, n, i) lock_acquire(l, s, t, 2, 1, n, i) #define spin_acquire(l, s, t, i) lock_acquire_exclusive(l, s, t, NULL, i) #define spin_acquire_nest(l, s, t, n, i) lock_acquire_exclusive(l, s, t, n, i) @@ -518,13 +511,13 @@ static inline void print_irqtrace_events(struct task_struct *curr) # define might_lock(lock) \ do { \ typecheck(struct lockdep_map *, &(lock)->dep_map); \ - lock_acquire(&(lock)->dep_map, 0, 0, 0, 2, NULL, _THIS_IP_); \ + lock_acquire(&(lock)->dep_map, 0, 0, 0, 1, NULL, _THIS_IP_); \ lock_release(&(lock)->dep_map, 0, _THIS_IP_); \ } while (0) # define might_lock_read(lock) \ do { \ typecheck(struct lockdep_map *, &(lock)->dep_map); \ - lock_acquire(&(lock)->dep_map, 0, 0, 1, 2, NULL, _THIS_IP_); \ + lock_acquire(&(lock)->dep_map, 0, 0, 1, 1, NULL, _THIS_IP_); \ lock_release(&(lock)->dep_map, 0, _THIS_IP_); \ } while (0) #else diff --git a/include/linux/rcupdate.h b/include/linux/rcupdate.h index 72bf3a01a4ee..adff3c99dcaa 100644 --- a/include/linux/rcupdate.h +++ b/include/linux/rcupdate.h @@ -314,7 +314,7 @@ static inline bool rcu_lockdep_current_cpu_online(void) static inline void rcu_lock_acquire(struct lockdep_map *map) { - lock_acquire(map, 0, 0, 2, 1, NULL, _THIS_IP_); + lock_acquire(map, 0, 0, 2, 0, NULL, _THIS_IP_); } static inline void rcu_lock_release(struct lockdep_map *map) diff --git a/kernel/locking/lockdep.c b/kernel/locking/lockdep.c index eb8a54783fa0..8c85a0da5a38 100644 --- a/kernel/locking/lockdep.c +++ b/kernel/locking/lockdep.c @@ -2098,7 +2098,7 @@ static int validate_chain(struct task_struct *curr, struct lockdep_map *lock, * (If lookup_chain_cache() returns with 1 it acquires * graph_lock for us) */ - if (!hlock->trylock && (hlock->check == 2) && + if (!hlock->trylock && hlock->check && lookup_chain_cache(curr, hlock, chain_key)) { /* * Check whether last held lock: @@ -3055,9 +3055,6 @@ static int __lock_acquire(struct lockdep_map *lock, unsigned int subclass, int class_idx; u64 chain_key; - if (!prove_locking) - check = 1; - if (unlikely(!debug_locks)) return 0; @@ -3069,8 +3066,8 @@ static int __lock_acquire(struct lockdep_map *lock, unsigned int subclass, if (DEBUG_LOCKS_WARN_ON(!irqs_disabled())) return 0; - if (lock->key == &__lockdep_no_validate__) - check = 1; + if (!prove_locking || lock->key == &__lockdep_no_validate__) + check = 0; if (subclass < NR_LOCKDEP_CACHING_CLASSES) class = lock->class_cache[subclass]; @@ -3138,7 +3135,7 @@ static int __lock_acquire(struct lockdep_map *lock, unsigned int subclass, hlock->holdtime_stamp = lockstat_clock(); #endif - if (check == 2 && !mark_irqflags(curr, hlock)) + if (check && !mark_irqflags(curr, hlock)) return 0; /* mark it as used: */ -- cgit v1.2.3-59-g8ed1b From 47be1c1a0e188232b5e5962917b21750053cd3f8 Mon Sep 17 00:00:00 2001 From: Oleg Nesterov Date: Mon, 20 Jan 2014 19:20:16 +0100 Subject: lockdep: Change lockdep_set_novalidate_class() to use _and_name Cosmetic. This doesn't really matter because a) device->mutex is the only user of __lockdep_no_validate__ and b) this class should be never reported as the source of problem, but if something goes wrong "&dev->mutex" looks better than "&__lockdep_no_validate__" as the name of the lock. Signed-off-by: Oleg Nesterov Cc: Dave Jones Cc: Greg Kroah-Hartman Cc: Linus Torvalds Cc: Paul McKenney Cc: Steven Rostedt Cc: Alan Stern Cc: Sasha Levin Signed-off-by: Peter Zijlstra Link: http://lkml.kernel.org/r/20140120182016.GA26512@redhat.com Signed-off-by: Ingo Molnar --- include/linux/lockdep.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/lockdep.h b/include/linux/lockdep.h index 1626047c1f26..060e5137fd80 100644 --- a/include/linux/lockdep.h +++ b/include/linux/lockdep.h @@ -303,7 +303,7 @@ extern void lockdep_init_map(struct lockdep_map *lock, const char *name, (lock)->dep_map.key, sub) #define lockdep_set_novalidate_class(lock) \ - lockdep_set_class(lock, &__lockdep_no_validate__) + lockdep_set_class_and_name(lock, &__lockdep_no_validate__, #lock) /* * Compare locking classes */ -- cgit v1.2.3-59-g8ed1b From f207dbe63c61158c234e2e8929a3725a7f6b2b9b Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Thu, 27 Feb 2014 12:20:31 +0100 Subject: Revert "sched/wait: Suppress Sparse 'variable shadowing' warning" This reverts commit 980f88e414418bf65569a3b62b08b07e6fc2f4c6. This warning is actually useful, don't suppress it. We actually rely on the shadowing for ___wait_cond_timeout(). We further used the __ret variable in __wait_event_timeout()'s cmd argument: __ret = schedule_timeout(__ret). That now explicitly uses the wrong __ret. Reported-by: Gregory CLEMENT Requested-by: Andrew Morton Requested-by: Peter Zijlstra Link: http://lkml.kernel.org/n/tip-Q5blhuqqzwgVwvjf1gszrdol@git.kernel.org Signed-off-by: Ingo Molnar --- include/linux/wait.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/wait.h b/include/linux/wait.h index c55ea5c24404..559044c79232 100644 --- a/include/linux/wait.h +++ b/include/linux/wait.h @@ -195,7 +195,7 @@ wait_queue_head_t *bit_waitqueue(void *, int); ({ \ __label__ __out; \ wait_queue_t __wait; \ - long ___ret = ret; \ + long __ret = ret; \ \ INIT_LIST_HEAD(&__wait.task_list); \ if (exclusive) \ @@ -210,7 +210,7 @@ wait_queue_head_t *bit_waitqueue(void *, int); break; \ \ if (___wait_is_interruptible(state) && __int) { \ - ___ret = __int; \ + __ret = __int; \ if (exclusive) { \ abort_exclusive_wait(&wq, &__wait, \ state, NULL); \ @@ -222,7 +222,7 @@ wait_queue_head_t *bit_waitqueue(void *, int); cmd; \ } \ finish_wait(&wq, &__wait); \ -__out: ___ret; \ +__out: __ret; \ }) #define __wait_event(wq, condition) \ -- cgit v1.2.3-59-g8ed1b From 03b8c7b623c80af264c4c8d6111e5c6289933666 Mon Sep 17 00:00:00 2001 From: Heiko Carstens Date: Sun, 2 Mar 2014 13:09:47 +0100 Subject: futex: Allow architectures to skip futex_atomic_cmpxchg_inatomic() test If an architecture has futex_atomic_cmpxchg_inatomic() implemented and there is no runtime check necessary, allow to skip the test within futex_init(). This allows to get rid of some code which would always give the same result, and also allows the compiler to optimize a couple of if statements away. Signed-off-by: Heiko Carstens Cc: Finn Thain Cc: Geert Uytterhoeven Link: http://lkml.kernel.org/r/20140302120947.GA3641@osiris Signed-off-by: Thomas Gleixner --- arch/s390/Kconfig | 1 + include/linux/futex.h | 4 ++++ init/Kconfig | 7 +++++++ kernel/futex.c | 37 ++++++++++++++++++++++++------------- 4 files changed, 36 insertions(+), 13 deletions(-) (limited to 'include/linux') diff --git a/arch/s390/Kconfig b/arch/s390/Kconfig index 65a07750f4f9..bb74b21f007a 100644 --- a/arch/s390/Kconfig +++ b/arch/s390/Kconfig @@ -117,6 +117,7 @@ config S390 select HAVE_FUNCTION_GRAPH_TRACER select HAVE_FUNCTION_TRACER select HAVE_FUNCTION_TRACE_MCOUNT_TEST + select HAVE_FUTEX_CMPXCHG if FUTEX select HAVE_KERNEL_BZIP2 select HAVE_KERNEL_GZIP select HAVE_KERNEL_LZ4 diff --git a/include/linux/futex.h b/include/linux/futex.h index b0d95cac826e..6435f46d6e13 100644 --- a/include/linux/futex.h +++ b/include/linux/futex.h @@ -55,7 +55,11 @@ union futex_key { #ifdef CONFIG_FUTEX extern void exit_robust_list(struct task_struct *curr); extern void exit_pi_state_list(struct task_struct *curr); +#ifdef CONFIG_HAVE_FUTEX_CMPXCHG +#define futex_cmpxchg_enabled 1 +#else extern int futex_cmpxchg_enabled; +#endif #else static inline void exit_robust_list(struct task_struct *curr) { diff --git a/init/Kconfig b/init/Kconfig index 009a797dd242..d56cb03c1b49 100644 --- a/init/Kconfig +++ b/init/Kconfig @@ -1387,6 +1387,13 @@ config FUTEX support for "fast userspace mutexes". The resulting kernel may not run glibc-based applications correctly. +config HAVE_FUTEX_CMPXCHG + bool + help + Architectures should select this if futex_atomic_cmpxchg_inatomic() + is implemented and always working. This removes a couple of runtime + checks. + config EPOLL bool "Enable eventpoll support" if EXPERT default y diff --git a/kernel/futex.c b/kernel/futex.c index 44a1261cb9ff..5d17e3a83f8c 100644 --- a/kernel/futex.c +++ b/kernel/futex.c @@ -157,7 +157,9 @@ * enqueue. */ +#ifndef CONFIG_HAVE_FUTEX_CMPXCHG int __read_mostly futex_cmpxchg_enabled; +#endif /* * Futex flags used to encode options to functions and preserve them across @@ -2843,9 +2845,28 @@ SYSCALL_DEFINE6(futex, u32 __user *, uaddr, int, op, u32, val, return do_futex(uaddr, op, val, tp, uaddr2, val2, val3); } -static int __init futex_init(void) +static void __init futex_detect_cmpxchg(void) { +#ifndef CONFIG_HAVE_FUTEX_CMPXCHG u32 curval; + + /* + * This will fail and we want it. Some arch implementations do + * runtime detection of the futex_atomic_cmpxchg_inatomic() + * functionality. We want to know that before we call in any + * of the complex code paths. Also we want to prevent + * registration of robust lists in that case. NULL is + * guaranteed to fault and we get -EFAULT on functional + * implementation, the non-functional ones will return + * -ENOSYS. + */ + if (cmpxchg_futex_value_locked(&curval, NULL, 0, 0) == -EFAULT) + futex_cmpxchg_enabled = 1; +#endif +} + +static int __init futex_init(void) +{ unsigned int futex_shift; unsigned long i; @@ -2861,18 +2882,8 @@ static int __init futex_init(void) &futex_shift, NULL, futex_hashsize, futex_hashsize); futex_hashsize = 1UL << futex_shift; - /* - * This will fail and we want it. Some arch implementations do - * runtime detection of the futex_atomic_cmpxchg_inatomic() - * functionality. We want to know that before we call in any - * of the complex code paths. Also we want to prevent - * registration of robust lists in that case. NULL is - * guaranteed to fault and we get -EFAULT on functional - * implementation, the non-functional ones will return - * -ENOSYS. - */ - if (cmpxchg_futex_value_locked(&curval, NULL, 0, 0) == -EFAULT) - futex_cmpxchg_enabled = 1; + + futex_detect_cmpxchg(); for (i = 0; i < futex_hashsize; i++) { plist_head_init(&futex_queues[i].chain); -- cgit v1.2.3-59-g8ed1b From c9122da1e2d29bd6a1475a0d1ce2aa6ac6ea25fa Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Mon, 3 Feb 2014 13:32:16 +0100 Subject: locking: Move mcs_spinlock.h into kernel/locking/ The mcs_spinlock code is not meant (or suitable) as a generic locking primitive, therefore take it away from the normal includes and place it in kernel/locking/. This way the locking primitives implemented there can use it as part of their implementation but we do not risk it getting used inapropriately. Signed-off-by: Peter Zijlstra Link: http://lkml.kernel.org/n/tip-byirmpamgr7h25m5kyavwpzx@git.kernel.org Signed-off-by: Ingo Molnar --- include/linux/mcs_spinlock.h | 114 ------------------------------------------ kernel/locking/mcs_spinlock.h | 114 ++++++++++++++++++++++++++++++++++++++++++ kernel/locking/mutex.c | 2 +- 3 files changed, 115 insertions(+), 115 deletions(-) delete mode 100644 include/linux/mcs_spinlock.h create mode 100644 kernel/locking/mcs_spinlock.h (limited to 'include/linux') diff --git a/include/linux/mcs_spinlock.h b/include/linux/mcs_spinlock.h deleted file mode 100644 index f2a5c6360083..000000000000 --- a/include/linux/mcs_spinlock.h +++ /dev/null @@ -1,114 +0,0 @@ -/* - * MCS lock defines - * - * This file contains the main data structure and API definitions of MCS lock. - * - * The MCS lock (proposed by Mellor-Crummey and Scott) is a simple spin-lock - * with the desirable properties of being fair, and with each cpu trying - * to acquire the lock spinning on a local variable. - * It avoids expensive cache bouncings that common test-and-set spin-lock - * implementations incur. - */ -#ifndef __LINUX_MCS_SPINLOCK_H -#define __LINUX_MCS_SPINLOCK_H - -#include - -struct mcs_spinlock { - struct mcs_spinlock *next; - int locked; /* 1 if lock acquired */ -}; - -#ifndef arch_mcs_spin_lock_contended -/* - * Using smp_load_acquire() provides a memory barrier that ensures - * subsequent operations happen after the lock is acquired. - */ -#define arch_mcs_spin_lock_contended(l) \ -do { \ - while (!(smp_load_acquire(l))) \ - arch_mutex_cpu_relax(); \ -} while (0) -#endif - -#ifndef arch_mcs_spin_unlock_contended -/* - * smp_store_release() provides a memory barrier to ensure all - * operations in the critical section has been completed before - * unlocking. - */ -#define arch_mcs_spin_unlock_contended(l) \ - smp_store_release((l), 1) -#endif - -/* - * Note: the smp_load_acquire/smp_store_release pair is not - * sufficient to form a full memory barrier across - * cpus for many architectures (except x86) for mcs_unlock and mcs_lock. - * For applications that need a full barrier across multiple cpus - * with mcs_unlock and mcs_lock pair, smp_mb__after_unlock_lock() should be - * used after mcs_lock. - */ - -/* - * In order to acquire the lock, the caller should declare a local node and - * pass a reference of the node to this function in addition to the lock. - * If the lock has already been acquired, then this will proceed to spin - * on this node->locked until the previous lock holder sets the node->locked - * in mcs_spin_unlock(). - * - * We don't inline mcs_spin_lock() so that perf can correctly account for the - * time spent in this lock function. - */ -static inline -void mcs_spin_lock(struct mcs_spinlock **lock, struct mcs_spinlock *node) -{ - struct mcs_spinlock *prev; - - /* Init node */ - node->locked = 0; - node->next = NULL; - - prev = xchg(lock, node); - if (likely(prev == NULL)) { - /* - * Lock acquired, don't need to set node->locked to 1. Threads - * only spin on its own node->locked value for lock acquisition. - * However, since this thread can immediately acquire the lock - * and does not proceed to spin on its own node->locked, this - * value won't be used. If a debug mode is needed to - * audit lock status, then set node->locked value here. - */ - return; - } - ACCESS_ONCE(prev->next) = node; - - /* Wait until the lock holder passes the lock down. */ - arch_mcs_spin_lock_contended(&node->locked); -} - -/* - * Releases the lock. The caller should pass in the corresponding node that - * was used to acquire the lock. - */ -static inline -void mcs_spin_unlock(struct mcs_spinlock **lock, struct mcs_spinlock *node) -{ - struct mcs_spinlock *next = ACCESS_ONCE(node->next); - - if (likely(!next)) { - /* - * Release the lock by setting it to NULL - */ - if (likely(cmpxchg(lock, node, NULL) == node)) - return; - /* Wait until the next pointer is set */ - while (!(next = ACCESS_ONCE(node->next))) - arch_mutex_cpu_relax(); - } - - /* Pass lock to next waiter. */ - arch_mcs_spin_unlock_contended(&next->locked); -} - -#endif /* __LINUX_MCS_SPINLOCK_H */ diff --git a/kernel/locking/mcs_spinlock.h b/kernel/locking/mcs_spinlock.h new file mode 100644 index 000000000000..f2a5c6360083 --- /dev/null +++ b/kernel/locking/mcs_spinlock.h @@ -0,0 +1,114 @@ +/* + * MCS lock defines + * + * This file contains the main data structure and API definitions of MCS lock. + * + * The MCS lock (proposed by Mellor-Crummey and Scott) is a simple spin-lock + * with the desirable properties of being fair, and with each cpu trying + * to acquire the lock spinning on a local variable. + * It avoids expensive cache bouncings that common test-and-set spin-lock + * implementations incur. + */ +#ifndef __LINUX_MCS_SPINLOCK_H +#define __LINUX_MCS_SPINLOCK_H + +#include + +struct mcs_spinlock { + struct mcs_spinlock *next; + int locked; /* 1 if lock acquired */ +}; + +#ifndef arch_mcs_spin_lock_contended +/* + * Using smp_load_acquire() provides a memory barrier that ensures + * subsequent operations happen after the lock is acquired. + */ +#define arch_mcs_spin_lock_contended(l) \ +do { \ + while (!(smp_load_acquire(l))) \ + arch_mutex_cpu_relax(); \ +} while (0) +#endif + +#ifndef arch_mcs_spin_unlock_contended +/* + * smp_store_release() provides a memory barrier to ensure all + * operations in the critical section has been completed before + * unlocking. + */ +#define arch_mcs_spin_unlock_contended(l) \ + smp_store_release((l), 1) +#endif + +/* + * Note: the smp_load_acquire/smp_store_release pair is not + * sufficient to form a full memory barrier across + * cpus for many architectures (except x86) for mcs_unlock and mcs_lock. + * For applications that need a full barrier across multiple cpus + * with mcs_unlock and mcs_lock pair, smp_mb__after_unlock_lock() should be + * used after mcs_lock. + */ + +/* + * In order to acquire the lock, the caller should declare a local node and + * pass a reference of the node to this function in addition to the lock. + * If the lock has already been acquired, then this will proceed to spin + * on this node->locked until the previous lock holder sets the node->locked + * in mcs_spin_unlock(). + * + * We don't inline mcs_spin_lock() so that perf can correctly account for the + * time spent in this lock function. + */ +static inline +void mcs_spin_lock(struct mcs_spinlock **lock, struct mcs_spinlock *node) +{ + struct mcs_spinlock *prev; + + /* Init node */ + node->locked = 0; + node->next = NULL; + + prev = xchg(lock, node); + if (likely(prev == NULL)) { + /* + * Lock acquired, don't need to set node->locked to 1. Threads + * only spin on its own node->locked value for lock acquisition. + * However, since this thread can immediately acquire the lock + * and does not proceed to spin on its own node->locked, this + * value won't be used. If a debug mode is needed to + * audit lock status, then set node->locked value here. + */ + return; + } + ACCESS_ONCE(prev->next) = node; + + /* Wait until the lock holder passes the lock down. */ + arch_mcs_spin_lock_contended(&node->locked); +} + +/* + * Releases the lock. The caller should pass in the corresponding node that + * was used to acquire the lock. + */ +static inline +void mcs_spin_unlock(struct mcs_spinlock **lock, struct mcs_spinlock *node) +{ + struct mcs_spinlock *next = ACCESS_ONCE(node->next); + + if (likely(!next)) { + /* + * Release the lock by setting it to NULL + */ + if (likely(cmpxchg(lock, node, NULL) == node)) + return; + /* Wait until the next pointer is set */ + while (!(next = ACCESS_ONCE(node->next))) + arch_mutex_cpu_relax(); + } + + /* Pass lock to next waiter. */ + arch_mcs_spin_unlock_contended(&next->locked); +} + +#endif /* __LINUX_MCS_SPINLOCK_H */ diff --git a/kernel/locking/mutex.c b/kernel/locking/mutex.c index 45fe1b5293d6..4f408be39a07 100644 --- a/kernel/locking/mutex.c +++ b/kernel/locking/mutex.c @@ -25,7 +25,7 @@ #include #include #include -#include +#include "mcs_spinlock.h" /* * In the DEBUG case we are using the "NULL fastpath" for mutexes, -- cgit v1.2.3-59-g8ed1b From fb0527bd5ea99bfeb2dd91e3c1433ecf745d6b99 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Wed, 29 Jan 2014 12:51:42 +0100 Subject: locking/mutexes: Introduce cancelable MCS lock for adaptive spinning Since we want a task waiting for a mutex_lock() to go to sleep and reschedule on need_resched() we must be able to abort the mcs_spin_lock() around the adaptive spin. Therefore implement a cancelable mcs lock. Signed-off-by: Peter Zijlstra Cc: chegu_vinod@hp.com Cc: paulmck@linux.vnet.ibm.com Cc: Waiman.Long@hp.com Cc: torvalds@linux-foundation.org Cc: tglx@linutronix.de Cc: riel@redhat.com Cc: akpm@linux-foundation.org Cc: davidlohr@hp.com Cc: hpa@zytor.com Cc: andi@firstfloor.org Cc: aswin@hp.com Cc: scott.norton@hp.com Cc: Jason Low Link: http://lkml.kernel.org/n/tip-62hcl5wxydmjzd182zhvk89m@git.kernel.org Signed-off-by: Ingo Molnar --- include/linux/mutex.h | 4 +- kernel/locking/Makefile | 2 +- kernel/locking/mcs_spinlock.c | 178 ++++++++++++++++++++++++++++++++++++++++++ kernel/locking/mcs_spinlock.h | 15 ++++ kernel/locking/mutex.c | 10 ++- 5 files changed, 202 insertions(+), 7 deletions(-) create mode 100644 kernel/locking/mcs_spinlock.c (limited to 'include/linux') diff --git a/include/linux/mutex.h b/include/linux/mutex.h index c482e1d2cc49..11692dea18aa 100644 --- a/include/linux/mutex.h +++ b/include/linux/mutex.h @@ -46,7 +46,7 @@ * - detects multi-task circular deadlocks and prints out all affected * locks and tasks (and only those tasks) */ -struct mcs_spinlock; +struct optimistic_spin_queue; struct mutex { /* 1: unlocked, 0: locked, negative: locked, possible waiters */ atomic_t count; @@ -56,7 +56,7 @@ struct mutex { struct task_struct *owner; #endif #ifdef CONFIG_MUTEX_SPIN_ON_OWNER - struct mcs_spinlock *mcs_lock; /* Spinner MCS lock */ + struct optimistic_spin_queue *osq; /* Spinner MCS lock */ #endif #ifdef CONFIG_DEBUG_MUTEXES const char *name; diff --git a/kernel/locking/Makefile b/kernel/locking/Makefile index baab8e5e7f66..2a9ee96ecf00 100644 --- a/kernel/locking/Makefile +++ b/kernel/locking/Makefile @@ -1,5 +1,5 @@ -obj-y += mutex.o semaphore.o rwsem.o lglock.o +obj-y += mutex.o semaphore.o rwsem.o lglock.o mcs_spinlock.o ifdef CONFIG_FUNCTION_TRACER CFLAGS_REMOVE_lockdep.o = -pg diff --git a/kernel/locking/mcs_spinlock.c b/kernel/locking/mcs_spinlock.c new file mode 100644 index 000000000000..838dc9e00669 --- /dev/null +++ b/kernel/locking/mcs_spinlock.c @@ -0,0 +1,178 @@ + +#include +#include +#include +#include "mcs_spinlock.h" + +#ifdef CONFIG_SMP + +/* + * An MCS like lock especially tailored for optimistic spinning for sleeping + * lock implementations (mutex, rwsem, etc). + * + * Using a single mcs node per CPU is safe because sleeping locks should not be + * called from interrupt context and we have preemption disabled while + * spinning. + */ +static DEFINE_PER_CPU_SHARED_ALIGNED(struct optimistic_spin_queue, osq_node); + +/* + * Get a stable @node->next pointer, either for unlock() or unqueue() purposes. + * Can return NULL in case we were the last queued and we updated @lock instead. + */ +static inline struct optimistic_spin_queue * +osq_wait_next(struct optimistic_spin_queue **lock, + struct optimistic_spin_queue *node, + struct optimistic_spin_queue *prev) +{ + struct optimistic_spin_queue *next = NULL; + + for (;;) { + if (*lock == node && cmpxchg(lock, node, prev) == node) { + /* + * We were the last queued, we moved @lock back. @prev + * will now observe @lock and will complete its + * unlock()/unqueue(). + */ + break; + } + + /* + * We must xchg() the @node->next value, because if we were to + * leave it in, a concurrent unlock()/unqueue() from + * @node->next might complete Step-A and think its @prev is + * still valid. + * + * If the concurrent unlock()/unqueue() wins the race, we'll + * wait for either @lock to point to us, through its Step-B, or + * wait for a new @node->next from its Step-C. + */ + if (node->next) { + next = xchg(&node->next, NULL); + if (next) + break; + } + + arch_mutex_cpu_relax(); + } + + return next; +} + +bool osq_lock(struct optimistic_spin_queue **lock) +{ + struct optimistic_spin_queue *node = this_cpu_ptr(&osq_node); + struct optimistic_spin_queue *prev, *next; + + node->locked = 0; + node->next = NULL; + + node->prev = prev = xchg(lock, node); + if (likely(prev == NULL)) + return true; + + ACCESS_ONCE(prev->next) = node; + + /* + * Normally @prev is untouchable after the above store; because at that + * moment unlock can proceed and wipe the node element from stack. + * + * However, since our nodes are static per-cpu storage, we're + * guaranteed their existence -- this allows us to apply + * cmpxchg in an attempt to undo our queueing. + */ + + while (!smp_load_acquire(&node->locked)) { + /* + * If we need to reschedule bail... so we can block. + */ + if (need_resched()) + goto unqueue; + + arch_mutex_cpu_relax(); + } + return true; + +unqueue: + /* + * Step - A -- stabilize @prev + * + * Undo our @prev->next assignment; this will make @prev's + * unlock()/unqueue() wait for a next pointer since @lock points to us + * (or later). + */ + + for (;;) { + if (prev->next == node && + cmpxchg(&prev->next, node, NULL) == node) + break; + + /* + * We can only fail the cmpxchg() racing against an unlock(), + * in which case we should observe @node->locked becomming + * true. + */ + if (smp_load_acquire(&node->locked)) + return true; + + arch_mutex_cpu_relax(); + + /* + * Or we race against a concurrent unqueue()'s step-B, in which + * case its step-C will write us a new @node->prev pointer. + */ + prev = ACCESS_ONCE(node->prev); + } + + /* + * Step - B -- stabilize @next + * + * Similar to unlock(), wait for @node->next or move @lock from @node + * back to @prev. + */ + + next = osq_wait_next(lock, node, prev); + if (!next) + return false; + + /* + * Step - C -- unlink + * + * @prev is stable because its still waiting for a new @prev->next + * pointer, @next is stable because our @node->next pointer is NULL and + * it will wait in Step-A. + */ + + ACCESS_ONCE(next->prev) = prev; + ACCESS_ONCE(prev->next) = next; + + return false; +} + +void osq_unlock(struct optimistic_spin_queue **lock) +{ + struct optimistic_spin_queue *node = this_cpu_ptr(&osq_node); + struct optimistic_spin_queue *next; + + /* + * Fast path for the uncontended case. + */ + if (likely(cmpxchg(lock, node, NULL) == node)) + return; + + /* + * Second most likely case. + */ + next = xchg(&node->next, NULL); + if (next) { + ACCESS_ONCE(next->locked) = 1; + return; + } + + next = osq_wait_next(lock, node, NULL); + if (next) + ACCESS_ONCE(next->locked) = 1; +} + +#endif + diff --git a/kernel/locking/mcs_spinlock.h b/kernel/locking/mcs_spinlock.h index f2a5c6360083..a2dbac4aca6b 100644 --- a/kernel/locking/mcs_spinlock.h +++ b/kernel/locking/mcs_spinlock.h @@ -111,4 +111,19 @@ void mcs_spin_unlock(struct mcs_spinlock **lock, struct mcs_spinlock *node) arch_mcs_spin_unlock_contended(&next->locked); } +/* + * Cancellable version of the MCS lock above. + * + * Intended for adaptive spinning of sleeping locks: + * mutex_lock()/rwsem_down_{read,write}() etc. + */ + +struct optimistic_spin_queue { + struct optimistic_spin_queue *next, *prev; + int locked; /* 1 if lock acquired */ +}; + +extern bool osq_lock(struct optimistic_spin_queue **lock); +extern void osq_unlock(struct optimistic_spin_queue **lock); + #endif /* __LINUX_MCS_SPINLOCK_H */ diff --git a/kernel/locking/mutex.c b/kernel/locking/mutex.c index dc3d6f2bbe2a..2670b84067d6 100644 --- a/kernel/locking/mutex.c +++ b/kernel/locking/mutex.c @@ -53,7 +53,7 @@ __mutex_init(struct mutex *lock, const char *name, struct lock_class_key *key) INIT_LIST_HEAD(&lock->wait_list); mutex_clear_owner(lock); #ifdef CONFIG_MUTEX_SPIN_ON_OWNER - lock->mcs_lock = NULL; + lock->osq = NULL; #endif debug_mutex_init(lock, name, key); @@ -403,7 +403,9 @@ __mutex_lock_common(struct mutex *lock, long state, unsigned int subclass, if (!mutex_can_spin_on_owner(lock)) goto slowpath; - mcs_spin_lock(&lock->mcs_lock, &node); + if (!osq_lock(&lock->osq)) + goto slowpath; + for (;;) { struct task_struct *owner; @@ -442,7 +444,7 @@ __mutex_lock_common(struct mutex *lock, long state, unsigned int subclass, } mutex_set_owner(lock); - mcs_spin_unlock(&lock->mcs_lock, &node); + osq_unlock(&lock->osq); preempt_enable(); return 0; } @@ -464,7 +466,7 @@ __mutex_lock_common(struct mutex *lock, long state, unsigned int subclass, */ arch_mutex_cpu_relax(); } - mcs_spin_unlock(&lock->mcs_lock, &node); + osq_unlock(&lock->osq); slowpath: #endif spin_lock_mutex(&lock->wait_lock, flags); -- cgit v1.2.3-59-g8ed1b