From b464d1270a8016edcf1fd20d77cefdecf9b0b73e Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <peterz@infradead.org>
Date: Mon, 30 May 2016 14:32:04 +0200
Subject: locking/barriers, tile: Provide TILE specific
 smp_acquire__after_ctrl_dep()

Since TILE doesn't do read speculation, its control dependencies also
guarantee LOAD->LOAD order and we don't need the additional RMB
otherwise required to provide ACQUIRE semantics.

Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Acked-by: Chris Metcalf <cmetcalf@mellanox.com>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: linux-kernel@vger.kernel.org
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 arch/tile/include/asm/barrier.h | 7 +++++++
 1 file changed, 7 insertions(+)

(limited to 'arch/tile')

diff --git a/arch/tile/include/asm/barrier.h b/arch/tile/include/asm/barrier.h
index d55222806c2f..4c419ab95ab7 100644
--- a/arch/tile/include/asm/barrier.h
+++ b/arch/tile/include/asm/barrier.h
@@ -87,6 +87,13 @@ mb_incoherent(void)
 #define __smp_mb__after_atomic()	__smp_mb()
 #endif
 
+/*
+ * The TILE architecture does not do speculative reads; this ensures
+ * that a control dependency also orders against loads and already provides
+ * a LOAD->{LOAD,STORE} order and can forgo the additional RMB.
+ */
+#define smp_acquire__after_ctrl_dep()	barrier()
+
 #include <asm-generic/barrier.h>
 
 #endif /* !__ASSEMBLY__ */
-- 
cgit v1.2.3-59-g8ed1b


From 726328d92a42b6d4b76078e2659f43067f82c4e8 Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <peterz@infradead.org>
Date: Thu, 26 May 2016 10:35:03 +0200
Subject: locking/spinlock, arch: Update and fix spin_unlock_wait()
 implementations

This patch updates/fixes all spin_unlock_wait() implementations.

The update is in semantics; where it previously was only a control
dependency, we now upgrade to a full load-acquire to match the
store-release from the spin_unlock() we waited on. This ensures that
when spin_unlock_wait() returns, we're guaranteed to observe the full
critical section we waited on.

This fixes a number of spin_unlock_wait() users that (not
unreasonably) rely on this.

I also fixed a number of ticket lock versions to only wait on the
current lock holder, instead of for a full unlock, as this is
sufficient.

Furthermore; again for ticket locks; I added an smp_rmb() in between
the initial ticket load and the spin loop testing the current value
because I could not convince myself the address dependency is
sufficient, esp. if the loads are of different sizes.

I'm more than happy to remove this smp_rmb() again if people are
certain the address dependency does indeed work as expected.

Note: PPC32 will be fixed independently

Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: chris@zankel.net
Cc: cmetcalf@mellanox.com
Cc: davem@davemloft.net
Cc: dhowells@redhat.com
Cc: james.hogan@imgtec.com
Cc: jejb@parisc-linux.org
Cc: linux@armlinux.org.uk
Cc: mpe@ellerman.id.au
Cc: ralf@linux-mips.org
Cc: realmz6@gmail.com
Cc: rkuo@codeaurora.org
Cc: rth@twiddle.net
Cc: schwidefsky@de.ibm.com
Cc: tony.luck@intel.com
Cc: vgupta@synopsys.com
Cc: ysato@users.sourceforge.jp
Cc: linux-kernel@vger.kernel.org
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 arch/alpha/include/asm/spinlock.h    |  9 +++++++--
 arch/arc/include/asm/spinlock.h      |  7 +++++--
 arch/arm/include/asm/spinlock.h      | 19 +++++++++++++++++--
 arch/blackfin/include/asm/spinlock.h |  5 +++--
 arch/hexagon/include/asm/spinlock.h  | 10 ++++++++--
 arch/ia64/include/asm/spinlock.h     |  4 ++++
 arch/m32r/include/asm/spinlock.h     |  9 +++++++--
 arch/metag/include/asm/spinlock.h    | 14 ++++++++++++--
 arch/mips/include/asm/spinlock.h     | 19 +++++++++++++++++--
 arch/mn10300/include/asm/spinlock.h  |  8 +++++++-
 arch/parisc/include/asm/spinlock.h   |  9 +++++++--
 arch/s390/include/asm/spinlock.h     |  3 +++
 arch/sh/include/asm/spinlock.h       | 10 ++++++++--
 arch/sparc/include/asm/spinlock_32.h |  7 +++++--
 arch/sparc/include/asm/spinlock_64.h | 10 +++++++---
 arch/tile/lib/spinlock_32.c          |  6 ++++++
 arch/tile/lib/spinlock_64.c          |  6 ++++++
 arch/xtensa/include/asm/spinlock.h   | 10 ++++++++--
 include/asm-generic/barrier.h        |  2 +-
 include/linux/spinlock_up.h          | 10 +++++++---
 20 files changed, 145 insertions(+), 32 deletions(-)

(limited to 'arch/tile')

diff --git a/arch/alpha/include/asm/spinlock.h b/arch/alpha/include/asm/spinlock.h
index fed9c6f44c19..a40b9fc0c6c3 100644
--- a/arch/alpha/include/asm/spinlock.h
+++ b/arch/alpha/include/asm/spinlock.h
@@ -3,6 +3,8 @@
 
 #include <linux/kernel.h>
 #include <asm/current.h>
+#include <asm/barrier.h>
+#include <asm/processor.h>
 
 /*
  * Simple spin lock operations.  There are two variants, one clears IRQ's
@@ -13,8 +15,11 @@
 
 #define arch_spin_lock_flags(lock, flags) arch_spin_lock(lock)
 #define arch_spin_is_locked(x)	((x)->lock != 0)
-#define arch_spin_unlock_wait(x) \
-		do { cpu_relax(); } while ((x)->lock)
+
+static inline void arch_spin_unlock_wait(arch_spinlock_t *lock)
+{
+	smp_cond_load_acquire(&lock->lock, !VAL);
+}
 
 static inline int arch_spin_value_unlocked(arch_spinlock_t lock)
 {
diff --git a/arch/arc/include/asm/spinlock.h b/arch/arc/include/asm/spinlock.h
index cded4a9b5438..233d5ffe6ec7 100644
--- a/arch/arc/include/asm/spinlock.h
+++ b/arch/arc/include/asm/spinlock.h
@@ -15,8 +15,11 @@
 
 #define arch_spin_is_locked(x)	((x)->slock != __ARCH_SPIN_LOCK_UNLOCKED__)
 #define arch_spin_lock_flags(lock, flags)	arch_spin_lock(lock)
-#define arch_spin_unlock_wait(x) \
-	do { while (arch_spin_is_locked(x)) cpu_relax(); } while (0)
+
+static inline void arch_spin_unlock_wait(arch_spinlock_t *lock)
+{
+	smp_cond_load_acquire(&lock->slock, !VAL);
+}
 
 #ifdef CONFIG_ARC_HAS_LLSC
 
diff --git a/arch/arm/include/asm/spinlock.h b/arch/arm/include/asm/spinlock.h
index 0fa418463f49..4bec45442072 100644
--- a/arch/arm/include/asm/spinlock.h
+++ b/arch/arm/include/asm/spinlock.h
@@ -6,6 +6,8 @@
 #endif
 
 #include <linux/prefetch.h>
+#include <asm/barrier.h>
+#include <asm/processor.h>
 
 /*
  * sev and wfe are ARMv6K extensions.  Uniprocessor ARMv6 may not have the K
@@ -50,8 +52,21 @@ static inline void dsb_sev(void)
  * memory.
  */
 
-#define arch_spin_unlock_wait(lock) \
-	do { while (arch_spin_is_locked(lock)) cpu_relax(); } while (0)
+static inline void arch_spin_unlock_wait(arch_spinlock_t *lock)
+{
+	u16 owner = READ_ONCE(lock->tickets.owner);
+
+	for (;;) {
+		arch_spinlock_t tmp = READ_ONCE(*lock);
+
+		if (tmp.tickets.owner == tmp.tickets.next ||
+		    tmp.tickets.owner != owner)
+			break;
+
+		wfe();
+	}
+	smp_acquire__after_ctrl_dep();
+}
 
 #define arch_spin_lock_flags(lock, flags) arch_spin_lock(lock)
 
diff --git a/arch/blackfin/include/asm/spinlock.h b/arch/blackfin/include/asm/spinlock.h
index 490c7caa02d9..c58f4a83ed6f 100644
--- a/arch/blackfin/include/asm/spinlock.h
+++ b/arch/blackfin/include/asm/spinlock.h
@@ -12,6 +12,8 @@
 #else
 
 #include <linux/atomic.h>
+#include <asm/processor.h>
+#include <asm/barrier.h>
 
 asmlinkage int __raw_spin_is_locked_asm(volatile int *ptr);
 asmlinkage void __raw_spin_lock_asm(volatile int *ptr);
@@ -48,8 +50,7 @@ static inline void arch_spin_unlock(arch_spinlock_t *lock)
 
 static inline void arch_spin_unlock_wait(arch_spinlock_t *lock)
 {
-	while (arch_spin_is_locked(lock))
-		cpu_relax();
+	smp_cond_load_acquire(&lock->lock, !VAL);
 }
 
 static inline int arch_read_can_lock(arch_rwlock_t *rw)
diff --git a/arch/hexagon/include/asm/spinlock.h b/arch/hexagon/include/asm/spinlock.h
index 12ca4ebc0338..a1c55788c5d6 100644
--- a/arch/hexagon/include/asm/spinlock.h
+++ b/arch/hexagon/include/asm/spinlock.h
@@ -23,6 +23,8 @@
 #define _ASM_SPINLOCK_H
 
 #include <asm/irqflags.h>
+#include <asm/barrier.h>
+#include <asm/processor.h>
 
 /*
  * This file is pulled in for SMP builds.
@@ -176,8 +178,12 @@ static inline unsigned int arch_spin_trylock(arch_spinlock_t *lock)
  * SMP spinlocks are intended to allow only a single CPU at the lock
  */
 #define arch_spin_lock_flags(lock, flags) arch_spin_lock(lock)
-#define arch_spin_unlock_wait(lock) \
-	do {while (arch_spin_is_locked(lock)) cpu_relax(); } while (0)
+
+static inline void arch_spin_unlock_wait(arch_spinlock_t *lock)
+{
+	smp_cond_load_acquire(&lock->lock, !VAL);
+}
+
 #define arch_spin_is_locked(x) ((x)->lock != 0)
 
 #define arch_read_lock_flags(lock, flags) arch_read_lock(lock)
diff --git a/arch/ia64/include/asm/spinlock.h b/arch/ia64/include/asm/spinlock.h
index 45698cd15b7b..ca9e76149a4a 100644
--- a/arch/ia64/include/asm/spinlock.h
+++ b/arch/ia64/include/asm/spinlock.h
@@ -15,6 +15,8 @@
 
 #include <linux/atomic.h>
 #include <asm/intrinsics.h>
+#include <asm/barrier.h>
+#include <asm/processor.h>
 
 #define arch_spin_lock_init(x)			((x)->lock = 0)
 
@@ -86,6 +88,8 @@ static __always_inline void __ticket_spin_unlock_wait(arch_spinlock_t *lock)
 			return;
 		cpu_relax();
 	}
+
+	smp_acquire__after_ctrl_dep();
 }
 
 static inline int __ticket_spin_is_locked(arch_spinlock_t *lock)
diff --git a/arch/m32r/include/asm/spinlock.h b/arch/m32r/include/asm/spinlock.h
index fa13694eaae3..323c7fc953cd 100644
--- a/arch/m32r/include/asm/spinlock.h
+++ b/arch/m32r/include/asm/spinlock.h
@@ -13,6 +13,8 @@
 #include <linux/atomic.h>
 #include <asm/dcache_clear.h>
 #include <asm/page.h>
+#include <asm/barrier.h>
+#include <asm/processor.h>
 
 /*
  * Your basic SMP spinlocks, allowing only a single CPU anywhere
@@ -27,8 +29,11 @@
 
 #define arch_spin_is_locked(x)		(*(volatile int *)(&(x)->slock) <= 0)
 #define arch_spin_lock_flags(lock, flags) arch_spin_lock(lock)
-#define arch_spin_unlock_wait(x) \
-		do { cpu_relax(); } while (arch_spin_is_locked(x))
+
+static inline void arch_spin_unlock_wait(arch_spinlock_t *lock)
+{
+	smp_cond_load_acquire(&lock->slock, VAL > 0);
+}
 
 /**
  * arch_spin_trylock - Try spin lock and return a result
diff --git a/arch/metag/include/asm/spinlock.h b/arch/metag/include/asm/spinlock.h
index 86a7cf3d1386..c0c7a22be1ae 100644
--- a/arch/metag/include/asm/spinlock.h
+++ b/arch/metag/include/asm/spinlock.h
@@ -1,14 +1,24 @@
 #ifndef __ASM_SPINLOCK_H
 #define __ASM_SPINLOCK_H
 
+#include <asm/barrier.h>
+#include <asm/processor.h>
+
 #ifdef CONFIG_METAG_ATOMICITY_LOCK1
 #include <asm/spinlock_lock1.h>
 #else
 #include <asm/spinlock_lnkget.h>
 #endif
 
-#define arch_spin_unlock_wait(lock) \
-	do { while (arch_spin_is_locked(lock)) cpu_relax(); } while (0)
+/*
+ * both lock1 and lnkget are test-and-set spinlocks with 0 unlocked and 1
+ * locked.
+ */
+
+static inline void arch_spin_unlock_wait(arch_spinlock_t *lock)
+{
+	smp_cond_load_acquire(&lock->lock, !VAL);
+}
 
 #define arch_spin_lock_flags(lock, flags) arch_spin_lock(lock)
 
diff --git a/arch/mips/include/asm/spinlock.h b/arch/mips/include/asm/spinlock.h
index 40196bebe849..f485afe51514 100644
--- a/arch/mips/include/asm/spinlock.h
+++ b/arch/mips/include/asm/spinlock.h
@@ -12,6 +12,7 @@
 #include <linux/compiler.h>
 
 #include <asm/barrier.h>
+#include <asm/processor.h>
 #include <asm/compiler.h>
 #include <asm/war.h>
 
@@ -48,8 +49,22 @@ static inline int arch_spin_value_unlocked(arch_spinlock_t lock)
 }
 
 #define arch_spin_lock_flags(lock, flags) arch_spin_lock(lock)
-#define arch_spin_unlock_wait(x) \
-	while (arch_spin_is_locked(x)) { cpu_relax(); }
+
+static inline void arch_spin_unlock_wait(arch_spinlock_t *lock)
+{
+	u16 owner = READ_ONCE(lock->h.serving_now);
+	smp_rmb();
+	for (;;) {
+		arch_spinlock_t tmp = READ_ONCE(*lock);
+
+		if (tmp.h.serving_now == tmp.h.ticket ||
+		    tmp.h.serving_now != owner)
+			break;
+
+		cpu_relax();
+	}
+	smp_acquire__after_ctrl_dep();
+}
 
 static inline int arch_spin_is_contended(arch_spinlock_t *lock)
 {
diff --git a/arch/mn10300/include/asm/spinlock.h b/arch/mn10300/include/asm/spinlock.h
index 1ae580f38933..9c7b8f7942d8 100644
--- a/arch/mn10300/include/asm/spinlock.h
+++ b/arch/mn10300/include/asm/spinlock.h
@@ -12,6 +12,8 @@
 #define _ASM_SPINLOCK_H
 
 #include <linux/atomic.h>
+#include <asm/barrier.h>
+#include <asm/processor.h>
 #include <asm/rwlock.h>
 #include <asm/page.h>
 
@@ -23,7 +25,11 @@
  */
 
 #define arch_spin_is_locked(x)	(*(volatile signed char *)(&(x)->slock) != 0)
-#define arch_spin_unlock_wait(x) do { barrier(); } while (arch_spin_is_locked(x))
+
+static inline void arch_spin_unlock_wait(arch_spinlock_t *lock)
+{
+	smp_cond_load_acquire(&lock->slock, !VAL);
+}
 
 static inline void arch_spin_unlock(arch_spinlock_t *lock)
 {
diff --git a/arch/parisc/include/asm/spinlock.h b/arch/parisc/include/asm/spinlock.h
index 64f2992e439f..e32936cd7f10 100644
--- a/arch/parisc/include/asm/spinlock.h
+++ b/arch/parisc/include/asm/spinlock.h
@@ -13,8 +13,13 @@ static inline int arch_spin_is_locked(arch_spinlock_t *x)
 }
 
 #define arch_spin_lock(lock) arch_spin_lock_flags(lock, 0)
-#define arch_spin_unlock_wait(x) \
-		do { cpu_relax(); } while (arch_spin_is_locked(x))
+
+static inline void arch_spin_unlock_wait(arch_spinlock_t *x)
+{
+	volatile unsigned int *a = __ldcw_align(x);
+
+	smp_cond_load_acquire(a, VAL);
+}
 
 static inline void arch_spin_lock_flags(arch_spinlock_t *x,
 					 unsigned long flags)
diff --git a/arch/s390/include/asm/spinlock.h b/arch/s390/include/asm/spinlock.h
index 63ebf37d3143..7e9e09f600fa 100644
--- a/arch/s390/include/asm/spinlock.h
+++ b/arch/s390/include/asm/spinlock.h
@@ -10,6 +10,8 @@
 #define __ASM_SPINLOCK_H
 
 #include <linux/smp.h>
+#include <asm/barrier.h>
+#include <asm/processor.h>
 
 #define SPINLOCK_LOCKVAL (S390_lowcore.spinlock_lockval)
 
@@ -97,6 +99,7 @@ static inline void arch_spin_unlock_wait(arch_spinlock_t *lock)
 {
 	while (arch_spin_is_locked(lock))
 		arch_spin_relax(lock);
+	smp_acquire__after_ctrl_dep();
 }
 
 /*
diff --git a/arch/sh/include/asm/spinlock.h b/arch/sh/include/asm/spinlock.h
index bdc0f3b6c56a..416834b60ad0 100644
--- a/arch/sh/include/asm/spinlock.h
+++ b/arch/sh/include/asm/spinlock.h
@@ -19,14 +19,20 @@
 #error "Need movli.l/movco.l for spinlocks"
 #endif
 
+#include <asm/barrier.h>
+#include <asm/processor.h>
+
 /*
  * Your basic SMP spinlocks, allowing only a single CPU anywhere
  */
 
 #define arch_spin_is_locked(x)		((x)->lock <= 0)
 #define arch_spin_lock_flags(lock, flags) arch_spin_lock(lock)
-#define arch_spin_unlock_wait(x) \
-	do { while (arch_spin_is_locked(x)) cpu_relax(); } while (0)
+
+static inline void arch_spin_unlock_wait(arch_spinlock_t *lock)
+{
+	smp_cond_load_acquire(&lock->lock, VAL > 0);
+}
 
 /*
  * Simple spin lock operations.  There are two variants, one clears IRQ's
diff --git a/arch/sparc/include/asm/spinlock_32.h b/arch/sparc/include/asm/spinlock_32.h
index bcc98fc35281..d9c5876c6121 100644
--- a/arch/sparc/include/asm/spinlock_32.h
+++ b/arch/sparc/include/asm/spinlock_32.h
@@ -9,12 +9,15 @@
 #ifndef __ASSEMBLY__
 
 #include <asm/psr.h>
+#include <asm/barrier.h>
 #include <asm/processor.h> /* for cpu_relax */
 
 #define arch_spin_is_locked(lock) (*((volatile unsigned char *)(lock)) != 0)
 
-#define arch_spin_unlock_wait(lock) \
-	do { while (arch_spin_is_locked(lock)) cpu_relax(); } while (0)
+static inline void arch_spin_unlock_wait(arch_spinlock_t *lock)
+{
+	smp_cond_load_acquire(&lock->lock, !VAL);
+}
 
 static inline void arch_spin_lock(arch_spinlock_t *lock)
 {
diff --git a/arch/sparc/include/asm/spinlock_64.h b/arch/sparc/include/asm/spinlock_64.h
index 968917694978..87990b7c6b0d 100644
--- a/arch/sparc/include/asm/spinlock_64.h
+++ b/arch/sparc/include/asm/spinlock_64.h
@@ -8,6 +8,9 @@
 
 #ifndef __ASSEMBLY__
 
+#include <asm/processor.h>
+#include <asm/barrier.h>
+
 /* To get debugging spinlocks which detect and catch
  * deadlock situations, set CONFIG_DEBUG_SPINLOCK
  * and rebuild your kernel.
@@ -23,9 +26,10 @@
 
 #define arch_spin_is_locked(lp)	((lp)->lock != 0)
 
-#define arch_spin_unlock_wait(lp)	\
-	do {	rmb();			\
-	} while((lp)->lock)
+static inline void arch_spin_unlock_wait(arch_spinlock_t *lock)
+{
+	smp_cond_load_acquire(&lock->lock, !VAL);
+}
 
 static inline void arch_spin_lock(arch_spinlock_t *lock)
 {
diff --git a/arch/tile/lib/spinlock_32.c b/arch/tile/lib/spinlock_32.c
index 88c2a53362e7..076c6cc43113 100644
--- a/arch/tile/lib/spinlock_32.c
+++ b/arch/tile/lib/spinlock_32.c
@@ -76,6 +76,12 @@ void arch_spin_unlock_wait(arch_spinlock_t *lock)
 	do {
 		delay_backoff(iterations++);
 	} while (READ_ONCE(lock->current_ticket) == curr);
+
+	/*
+	 * The TILE architecture doesn't do read speculation; therefore
+	 * a control dependency guarantees a LOAD->{LOAD,STORE} order.
+	 */
+	barrier();
 }
 EXPORT_SYMBOL(arch_spin_unlock_wait);
 
diff --git a/arch/tile/lib/spinlock_64.c b/arch/tile/lib/spinlock_64.c
index c8d1f94ff1fe..a4b5b2cbce93 100644
--- a/arch/tile/lib/spinlock_64.c
+++ b/arch/tile/lib/spinlock_64.c
@@ -76,6 +76,12 @@ void arch_spin_unlock_wait(arch_spinlock_t *lock)
 	do {
 		delay_backoff(iterations++);
 	} while (arch_spin_current(READ_ONCE(lock->lock)) == curr);
+
+	/*
+	 * The TILE architecture doesn't do read speculation; therefore
+	 * a control dependency guarantees a LOAD->{LOAD,STORE} order.
+	 */
+	barrier();
 }
 EXPORT_SYMBOL(arch_spin_unlock_wait);
 
diff --git a/arch/xtensa/include/asm/spinlock.h b/arch/xtensa/include/asm/spinlock.h
index 1d95fa5dcd10..a36221cf6363 100644
--- a/arch/xtensa/include/asm/spinlock.h
+++ b/arch/xtensa/include/asm/spinlock.h
@@ -11,6 +11,9 @@
 #ifndef _XTENSA_SPINLOCK_H
 #define _XTENSA_SPINLOCK_H
 
+#include <asm/barrier.h>
+#include <asm/processor.h>
+
 /*
  * spinlock
  *
@@ -29,8 +32,11 @@
  */
 
 #define arch_spin_is_locked(x) ((x)->slock != 0)
-#define arch_spin_unlock_wait(lock) \
-	do { while (arch_spin_is_locked(lock)) cpu_relax(); } while (0)
+
+static inline void arch_spin_unlock_wait(arch_spinlock_t *lock)
+{
+	smp_cond_load_acquire(&lock->slock, !VAL);
+}
 
 #define arch_spin_lock_flags(lock, flags) arch_spin_lock(lock)
 
diff --git a/include/asm-generic/barrier.h b/include/asm-generic/barrier.h
index ab7b0bd7d4dd..fe297b599b0a 100644
--- a/include/asm-generic/barrier.h
+++ b/include/asm-generic/barrier.h
@@ -194,7 +194,7 @@ do {									\
 })
 #endif
 
-#endif
+#endif	/* CONFIG_SMP */
 
 /* Barriers for virtual machine guests when talking to an SMP host */
 #define virt_mb() __smp_mb()
diff --git a/include/linux/spinlock_up.h b/include/linux/spinlock_up.h
index 8b3ac0d718eb..0d9848de677d 100644
--- a/include/linux/spinlock_up.h
+++ b/include/linux/spinlock_up.h
@@ -6,6 +6,7 @@
 #endif
 
 #include <asm/processor.h>	/* for cpu_relax() */
+#include <asm/barrier.h>
 
 /*
  * include/linux/spinlock_up.h - UP-debug version of spinlocks.
@@ -25,6 +26,11 @@
 #ifdef CONFIG_DEBUG_SPINLOCK
 #define arch_spin_is_locked(x)		((x)->slock == 0)
 
+static inline void arch_spin_unlock_wait(arch_spinlock_t *lock)
+{
+	smp_cond_load_acquire(&lock->slock, VAL);
+}
+
 static inline void arch_spin_lock(arch_spinlock_t *lock)
 {
 	lock->slock = 0;
@@ -67,6 +73,7 @@ static inline void arch_spin_unlock(arch_spinlock_t *lock)
 
 #else /* DEBUG_SPINLOCK */
 #define arch_spin_is_locked(lock)	((void)(lock), 0)
+#define arch_spin_unlock_wait(lock)	do { barrier(); (void)(lock); } while (0)
 /* for sched/core.c and kernel_lock.c: */
 # define arch_spin_lock(lock)		do { barrier(); (void)(lock); } while (0)
 # define arch_spin_lock_flags(lock, flags)	do { barrier(); (void)(lock); } while (0)
@@ -79,7 +86,4 @@ static inline void arch_spin_unlock(arch_spinlock_t *lock)
 #define arch_read_can_lock(lock)	(((void)(lock), 1))
 #define arch_write_can_lock(lock)	(((void)(lock), 1))
 
-#define arch_spin_unlock_wait(lock) \
-		do { cpu_relax(); } while (arch_spin_is_locked(lock))
-
 #endif /* __LINUX_SPINLOCK_UP_H */
-- 
cgit v1.2.3-59-g8ed1b


From 1af5de9af138941fb8638cf126293b16f3387de4 Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <peterz@infradead.org>
Date: Mon, 18 Apr 2016 01:16:03 +0200
Subject: locking/atomic, arch/tile: Implement
 atomic{,64}_fetch_{add,sub,and,or,xor}()

Implement FETCH-OP atomic primitives, these are very similar to the
existing OP-RETURN primitives we already have, except they return the
value of the atomic variable _before_ modification.

This is especially useful for irreversible operations -- such as
bitops (because it becomes impossible to reconstruct the state prior
to modification).

Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Acked-by: Chris Metcalf <cmetcalf@mellanox.com>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: linux-arch@vger.kernel.org
Cc: linux-kernel@vger.kernel.org
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 arch/tile/include/asm/atomic.h    |   4 ++
 arch/tile/include/asm/atomic_32.h |  60 ++++++++++++++------
 arch/tile/include/asm/atomic_64.h | 115 +++++++++++++++++++++++++-------------
 arch/tile/include/asm/bitops_32.h |  18 +++---
 arch/tile/lib/atomic_32.c         |  42 +++++++-------
 arch/tile/lib/atomic_asm_32.S     |  14 ++---
 6 files changed, 159 insertions(+), 94 deletions(-)

(limited to 'arch/tile')

diff --git a/arch/tile/include/asm/atomic.h b/arch/tile/include/asm/atomic.h
index 9fc0107a9c5e..9807030557c4 100644
--- a/arch/tile/include/asm/atomic.h
+++ b/arch/tile/include/asm/atomic.h
@@ -46,6 +46,10 @@ static inline int atomic_read(const atomic_t *v)
  */
 #define atomic_sub_return(i, v)		atomic_add_return((int)(-(i)), (v))
 
+#define atomic_fetch_sub(i, v)		atomic_fetch_add(-(int)(i), (v))
+
+#define atomic_fetch_or atomic_fetch_or
+
 /**
  * atomic_sub - subtract integer from atomic variable
  * @i: integer value to subtract
diff --git a/arch/tile/include/asm/atomic_32.h b/arch/tile/include/asm/atomic_32.h
index d320ce253d86..da8eb4ed3752 100644
--- a/arch/tile/include/asm/atomic_32.h
+++ b/arch/tile/include/asm/atomic_32.h
@@ -34,18 +34,29 @@ static inline void atomic_add(int i, atomic_t *v)
 	_atomic_xchg_add(&v->counter, i);
 }
 
-#define ATOMIC_OP(op)							\
-unsigned long _atomic_##op(volatile unsigned long *p, unsigned long mask); \
+#define ATOMIC_OPS(op)							\
+unsigned long _atomic_fetch_##op(volatile unsigned long *p, unsigned long mask); \
 static inline void atomic_##op(int i, atomic_t *v)			\
 {									\
-	_atomic_##op((unsigned long *)&v->counter, i);			\
+	_atomic_fetch_##op((unsigned long *)&v->counter, i);		\
+}									\
+static inline int atomic_fetch_##op(int i, atomic_t *v)			\
+{									\
+	smp_mb();							\
+	return _atomic_fetch_##op((unsigned long *)&v->counter, i);	\
 }
 
-ATOMIC_OP(and)
-ATOMIC_OP(or)
-ATOMIC_OP(xor)
+ATOMIC_OPS(and)
+ATOMIC_OPS(or)
+ATOMIC_OPS(xor)
+
+#undef ATOMIC_OPS
 
-#undef ATOMIC_OP
+static inline int atomic_fetch_add(int i, atomic_t *v)
+{
+	smp_mb();
+	return _atomic_xchg_add(&v->counter, i);
+}
 
 /**
  * atomic_add_return - add integer and return
@@ -126,17 +137,30 @@ static inline void atomic64_add(long long i, atomic64_t *v)
 	_atomic64_xchg_add(&v->counter, i);
 }
 
-#define ATOMIC64_OP(op)						\
-long long _atomic64_##op(long long *v, long long n);		\
+#define ATOMIC64_OPS(op)					\
+long long _atomic64_fetch_##op(long long *v, long long n);	\
+static inline void atomic64_##op(long long i, atomic64_t *v)	\
+{								\
+	_atomic64_fetch_##op(&v->counter, i);			\
+}								\
 static inline void atomic64_##op(long long i, atomic64_t *v)	\
 {								\
-	_atomic64_##op(&v->counter, i);				\
+	smp_mb();						\
+	return _atomic64_fetch_##op(&v->counter, i);		\
 }
 
 ATOMIC64_OP(and)
 ATOMIC64_OP(or)
 ATOMIC64_OP(xor)
 
+#undef ATOMIC64_OPS
+
+static inline long long atomic64_fetch_add(long long i, atomic64_t *v)
+{
+	smp_mb();
+	return _atomic64_xchg_add(&v->counter, i);
+}
+
 /**
  * atomic64_add_return - add integer and return
  * @v: pointer of type atomic64_t
@@ -186,6 +210,7 @@ static inline void atomic64_set(atomic64_t *v, long long n)
 #define atomic64_inc_return(v)		atomic64_add_return(1LL, (v))
 #define atomic64_inc_and_test(v)	(atomic64_inc_return(v) == 0)
 #define atomic64_sub_return(i, v)	atomic64_add_return(-(i), (v))
+#define atomic64_fetch_sub(i, v)	atomic64_fetch_add(-(i), (v))
 #define atomic64_sub_and_test(a, v)	(atomic64_sub_return((a), (v)) == 0)
 #define atomic64_sub(i, v)		atomic64_add(-(i), (v))
 #define atomic64_dec(v)			atomic64_sub(1LL, (v))
@@ -193,7 +218,6 @@ static inline void atomic64_set(atomic64_t *v, long long n)
 #define atomic64_dec_and_test(v)	(atomic64_dec_return((v)) == 0)
 #define atomic64_inc_not_zero(v)	atomic64_add_unless((v), 1LL, 0LL)
 
-
 #endif /* !__ASSEMBLY__ */
 
 /*
@@ -248,10 +272,10 @@ extern struct __get_user __atomic_xchg(volatile int *p, int *lock, int n);
 extern struct __get_user __atomic_xchg_add(volatile int *p, int *lock, int n);
 extern struct __get_user __atomic_xchg_add_unless(volatile int *p,
 						  int *lock, int o, int n);
-extern struct __get_user __atomic_or(volatile int *p, int *lock, int n);
-extern struct __get_user __atomic_and(volatile int *p, int *lock, int n);
-extern struct __get_user __atomic_andn(volatile int *p, int *lock, int n);
-extern struct __get_user __atomic_xor(volatile int *p, int *lock, int n);
+extern struct __get_user __atomic_fetch_or(volatile int *p, int *lock, int n);
+extern struct __get_user __atomic_fetch_and(volatile int *p, int *lock, int n);
+extern struct __get_user __atomic_fetch_andn(volatile int *p, int *lock, int n);
+extern struct __get_user __atomic_fetch_xor(volatile int *p, int *lock, int n);
 extern long long __atomic64_cmpxchg(volatile long long *p, int *lock,
 					long long o, long long n);
 extern long long __atomic64_xchg(volatile long long *p, int *lock, long long n);
@@ -259,9 +283,9 @@ extern long long __atomic64_xchg_add(volatile long long *p, int *lock,
 					long long n);
 extern long long __atomic64_xchg_add_unless(volatile long long *p,
 					int *lock, long long o, long long n);
-extern long long __atomic64_and(volatile long long *p, int *lock, long long n);
-extern long long __atomic64_or(volatile long long *p, int *lock, long long n);
-extern long long __atomic64_xor(volatile long long *p, int *lock, long long n);
+extern long long __atomic64_fetch_and(volatile long long *p, int *lock, long long n);
+extern long long __atomic64_fetch_or(volatile long long *p, int *lock, long long n);
+extern long long __atomic64_fetch_xor(volatile long long *p, int *lock, long long n);
 
 /* Return failure from the atomic wrappers. */
 struct __get_user __atomic_bad_address(int __user *addr);
diff --git a/arch/tile/include/asm/atomic_64.h b/arch/tile/include/asm/atomic_64.h
index b0531a623653..4cefa0c9fd81 100644
--- a/arch/tile/include/asm/atomic_64.h
+++ b/arch/tile/include/asm/atomic_64.h
@@ -32,11 +32,6 @@
  * on any routine which updates memory and returns a value.
  */
 
-static inline void atomic_add(int i, atomic_t *v)
-{
-	__insn_fetchadd4((void *)&v->counter, i);
-}
-
 /*
  * Note a subtlety of the locking here.  We are required to provide a
  * full memory barrier before and after the operation.  However, we
@@ -59,28 +54,39 @@ static inline int atomic_add_return(int i, atomic_t *v)
 	return val;
 }
 
-static inline int __atomic_add_unless(atomic_t *v, int a, int u)
+#define ATOMIC_OPS(op)							\
+static inline int atomic_fetch_##op(int i, atomic_t *v)			\
+{									\
+	int val;							\
+	smp_mb();							\
+	val = __insn_fetch##op##4((void *)&v->counter, i);		\
+	smp_mb();							\
+	return val;							\
+}									\
+static inline void atomic_##op(int i, atomic_t *v)			\
+{									\
+	__insn_fetch##op##4((void *)&v->counter, i);			\
+}
+
+ATOMIC_OPS(add)
+ATOMIC_OPS(and)
+ATOMIC_OPS(or)
+
+#undef ATOMIC_OPS
+
+static inline int atomic_fetch_xor(int i, atomic_t *v)
 {
 	int guess, oldval = v->counter;
+	smp_mb();
 	do {
-		if (oldval == u)
-			break;
 		guess = oldval;
-		oldval = cmpxchg(&v->counter, guess, guess + a);
+		__insn_mtspr(SPR_CMPEXCH_VALUE, guess);
+		oldval = __insn_cmpexch4(&v->counter, guess ^ i);
 	} while (guess != oldval);
+	smp_mb();
 	return oldval;
 }
 
-static inline void atomic_and(int i, atomic_t *v)
-{
-	__insn_fetchand4((void *)&v->counter, i);
-}
-
-static inline void atomic_or(int i, atomic_t *v)
-{
-	__insn_fetchor4((void *)&v->counter, i);
-}
-
 static inline void atomic_xor(int i, atomic_t *v)
 {
 	int guess, oldval = v->counter;
@@ -91,6 +97,18 @@ static inline void atomic_xor(int i, atomic_t *v)
 	} while (guess != oldval);
 }
 
+static inline int __atomic_add_unless(atomic_t *v, int a, int u)
+{
+	int guess, oldval = v->counter;
+	do {
+		if (oldval == u)
+			break;
+		guess = oldval;
+		oldval = cmpxchg(&v->counter, guess, guess + a);
+	} while (guess != oldval);
+	return oldval;
+}
+
 /* Now the true 64-bit operations. */
 
 #define ATOMIC64_INIT(i)	{ (i) }
@@ -98,11 +116,6 @@ static inline void atomic_xor(int i, atomic_t *v)
 #define atomic64_read(v)	READ_ONCE((v)->counter)
 #define atomic64_set(v, i)	WRITE_ONCE((v)->counter, (i))
 
-static inline void atomic64_add(long i, atomic64_t *v)
-{
-	__insn_fetchadd((void *)&v->counter, i);
-}
-
 static inline long atomic64_add_return(long i, atomic64_t *v)
 {
 	int val;
@@ -112,26 +125,37 @@ static inline long atomic64_add_return(long i, atomic64_t *v)
 	return val;
 }
 
-static inline long atomic64_add_unless(atomic64_t *v, long a, long u)
+#define ATOMIC64_OPS(op)						\
+static inline long atomic64_fetch_##op(long i, atomic64_t *v)		\
+{									\
+	long val;							\
+	smp_mb();							\
+	val = __insn_fetch##op((void *)&v->counter, i);			\
+	smp_mb();							\
+	return val;							\
+}									\
+static inline void atomic64_##op(long i, atomic64_t *v)			\
+{									\
+	__insn_fetch##op((void *)&v->counter, i);			\
+}
+
+ATOMIC64_OPS(add)
+ATOMIC64_OPS(and)
+ATOMIC64_OPS(or)
+
+#undef ATOMIC64_OPS
+
+static inline long atomic64_fetch_xor(long i, atomic64_t *v)
 {
 	long guess, oldval = v->counter;
+	smp_mb();
 	do {
-		if (oldval == u)
-			break;
 		guess = oldval;
-		oldval = cmpxchg(&v->counter, guess, guess + a);
+		__insn_mtspr(SPR_CMPEXCH_VALUE, guess);
+		oldval = __insn_cmpexch(&v->counter, guess ^ i);
 	} while (guess != oldval);
-	return oldval != u;
-}
-
-static inline void atomic64_and(long i, atomic64_t *v)
-{
-	__insn_fetchand((void *)&v->counter, i);
-}
-
-static inline void atomic64_or(long i, atomic64_t *v)
-{
-	__insn_fetchor((void *)&v->counter, i);
+	smp_mb();
+	return oldval;
 }
 
 static inline void atomic64_xor(long i, atomic64_t *v)
@@ -144,7 +168,20 @@ static inline void atomic64_xor(long i, atomic64_t *v)
 	} while (guess != oldval);
 }
 
+static inline long atomic64_add_unless(atomic64_t *v, long a, long u)
+{
+	long guess, oldval = v->counter;
+	do {
+		if (oldval == u)
+			break;
+		guess = oldval;
+		oldval = cmpxchg(&v->counter, guess, guess + a);
+	} while (guess != oldval);
+	return oldval != u;
+}
+
 #define atomic64_sub_return(i, v)	atomic64_add_return(-(i), (v))
+#define atomic64_fetch_sub(i, v)	atomic64_fetch_add(-(i), (v))
 #define atomic64_sub(i, v)		atomic64_add(-(i), (v))
 #define atomic64_inc_return(v)		atomic64_add_return(1, (v))
 #define atomic64_dec_return(v)		atomic64_sub_return(1, (v))
diff --git a/arch/tile/include/asm/bitops_32.h b/arch/tile/include/asm/bitops_32.h
index bbf7b666f21d..d1406a95f6b7 100644
--- a/arch/tile/include/asm/bitops_32.h
+++ b/arch/tile/include/asm/bitops_32.h
@@ -19,9 +19,9 @@
 #include <asm/barrier.h>
 
 /* Tile-specific routines to support <asm/bitops.h>. */
-unsigned long _atomic_or(volatile unsigned long *p, unsigned long mask);
-unsigned long _atomic_andn(volatile unsigned long *p, unsigned long mask);
-unsigned long _atomic_xor(volatile unsigned long *p, unsigned long mask);
+unsigned long _atomic_fetch_or(volatile unsigned long *p, unsigned long mask);
+unsigned long _atomic_fetch_andn(volatile unsigned long *p, unsigned long mask);
+unsigned long _atomic_fetch_xor(volatile unsigned long *p, unsigned long mask);
 
 /**
  * set_bit - Atomically set a bit in memory
@@ -35,7 +35,7 @@ unsigned long _atomic_xor(volatile unsigned long *p, unsigned long mask);
  */
 static inline void set_bit(unsigned nr, volatile unsigned long *addr)
 {
-	_atomic_or(addr + BIT_WORD(nr), BIT_MASK(nr));
+	_atomic_fetch_or(addr + BIT_WORD(nr), BIT_MASK(nr));
 }
 
 /**
@@ -54,7 +54,7 @@ static inline void set_bit(unsigned nr, volatile unsigned long *addr)
  */
 static inline void clear_bit(unsigned nr, volatile unsigned long *addr)
 {
-	_atomic_andn(addr + BIT_WORD(nr), BIT_MASK(nr));
+	_atomic_fetch_andn(addr + BIT_WORD(nr), BIT_MASK(nr));
 }
 
 /**
@@ -69,7 +69,7 @@ static inline void clear_bit(unsigned nr, volatile unsigned long *addr)
  */
 static inline void change_bit(unsigned nr, volatile unsigned long *addr)
 {
-	_atomic_xor(addr + BIT_WORD(nr), BIT_MASK(nr));
+	_atomic_fetch_xor(addr + BIT_WORD(nr), BIT_MASK(nr));
 }
 
 /**
@@ -85,7 +85,7 @@ static inline int test_and_set_bit(unsigned nr, volatile unsigned long *addr)
 	unsigned long mask = BIT_MASK(nr);
 	addr += BIT_WORD(nr);
 	smp_mb();  /* barrier for proper semantics */
-	return (_atomic_or(addr, mask) & mask) != 0;
+	return (_atomic_fetch_or(addr, mask) & mask) != 0;
 }
 
 /**
@@ -101,7 +101,7 @@ static inline int test_and_clear_bit(unsigned nr, volatile unsigned long *addr)
 	unsigned long mask = BIT_MASK(nr);
 	addr += BIT_WORD(nr);
 	smp_mb();  /* barrier for proper semantics */
-	return (_atomic_andn(addr, mask) & mask) != 0;
+	return (_atomic_fetch_andn(addr, mask) & mask) != 0;
 }
 
 /**
@@ -118,7 +118,7 @@ static inline int test_and_change_bit(unsigned nr,
 	unsigned long mask = BIT_MASK(nr);
 	addr += BIT_WORD(nr);
 	smp_mb();  /* barrier for proper semantics */
-	return (_atomic_xor(addr, mask) & mask) != 0;
+	return (_atomic_fetch_xor(addr, mask) & mask) != 0;
 }
 
 #include <asm-generic/bitops/ext2-atomic.h>
diff --git a/arch/tile/lib/atomic_32.c b/arch/tile/lib/atomic_32.c
index 298df1e9912a..5b6bd932c9c7 100644
--- a/arch/tile/lib/atomic_32.c
+++ b/arch/tile/lib/atomic_32.c
@@ -88,29 +88,29 @@ int _atomic_cmpxchg(int *v, int o, int n)
 }
 EXPORT_SYMBOL(_atomic_cmpxchg);
 
-unsigned long _atomic_or(volatile unsigned long *p, unsigned long mask)
+unsigned long _atomic_fetch_or(volatile unsigned long *p, unsigned long mask)
 {
-	return __atomic_or((int *)p, __atomic_setup(p), mask).val;
+	return __atomic_fetch_or((int *)p, __atomic_setup(p), mask).val;
 }
-EXPORT_SYMBOL(_atomic_or);
+EXPORT_SYMBOL(_atomic_fetch_or);
 
-unsigned long _atomic_and(volatile unsigned long *p, unsigned long mask)
+unsigned long _atomic_fetch_and(volatile unsigned long *p, unsigned long mask)
 {
-	return __atomic_and((int *)p, __atomic_setup(p), mask).val;
+	return __atomic_fetch_and((int *)p, __atomic_setup(p), mask).val;
 }
-EXPORT_SYMBOL(_atomic_and);
+EXPORT_SYMBOL(_atomic_fetch_and);
 
-unsigned long _atomic_andn(volatile unsigned long *p, unsigned long mask)
+unsigned long _atomic_fetch_andn(volatile unsigned long *p, unsigned long mask)
 {
-	return __atomic_andn((int *)p, __atomic_setup(p), mask).val;
+	return __atomic_fetch_andn((int *)p, __atomic_setup(p), mask).val;
 }
-EXPORT_SYMBOL(_atomic_andn);
+EXPORT_SYMBOL(_atomic_fetch_andn);
 
-unsigned long _atomic_xor(volatile unsigned long *p, unsigned long mask)
+unsigned long _atomic_fetch_xor(volatile unsigned long *p, unsigned long mask)
 {
-	return __atomic_xor((int *)p, __atomic_setup(p), mask).val;
+	return __atomic_fetch_xor((int *)p, __atomic_setup(p), mask).val;
 }
-EXPORT_SYMBOL(_atomic_xor);
+EXPORT_SYMBOL(_atomic_fetch_xor);
 
 
 long long _atomic64_xchg(long long *v, long long n)
@@ -142,23 +142,23 @@ long long _atomic64_cmpxchg(long long *v, long long o, long long n)
 }
 EXPORT_SYMBOL(_atomic64_cmpxchg);
 
-long long _atomic64_and(long long *v, long long n)
+long long _atomic64_fetch_and(long long *v, long long n)
 {
-	return __atomic64_and(v, __atomic_setup(v), n);
+	return __atomic64_fetch_and(v, __atomic_setup(v), n);
 }
-EXPORT_SYMBOL(_atomic64_and);
+EXPORT_SYMBOL(_atomic64_fetch_and);
 
-long long _atomic64_or(long long *v, long long n)
+long long _atomic64_fetch_or(long long *v, long long n)
 {
-	return __atomic64_or(v, __atomic_setup(v), n);
+	return __atomic64_fetch_or(v, __atomic_setup(v), n);
 }
-EXPORT_SYMBOL(_atomic64_or);
+EXPORT_SYMBOL(_atomic64_fetch_or);
 
-long long _atomic64_xor(long long *v, long long n)
+long long _atomic64_fetch_xor(long long *v, long long n)
 {
-	return __atomic64_xor(v, __atomic_setup(v), n);
+	return __atomic64_fetch_xor(v, __atomic_setup(v), n);
 }
-EXPORT_SYMBOL(_atomic64_xor);
+EXPORT_SYMBOL(_atomic64_fetch_xor);
 
 /*
  * If any of the atomic or futex routines hit a bad address (not in
diff --git a/arch/tile/lib/atomic_asm_32.S b/arch/tile/lib/atomic_asm_32.S
index f611265633d6..507abdd2bf9a 100644
--- a/arch/tile/lib/atomic_asm_32.S
+++ b/arch/tile/lib/atomic_asm_32.S
@@ -177,10 +177,10 @@ atomic_op _xchg, 32, "move r24, r2"
 atomic_op _xchg_add, 32, "add r24, r22, r2"
 atomic_op _xchg_add_unless, 32, \
 	"sne r26, r22, r2; { bbns r26, 3f; add r24, r22, r3 }"
-atomic_op _or, 32, "or r24, r22, r2"
-atomic_op _and, 32, "and r24, r22, r2"
-atomic_op _andn, 32, "nor r2, r2, zero; and r24, r22, r2"
-atomic_op _xor, 32, "xor r24, r22, r2"
+atomic_op _fetch_or, 32, "or r24, r22, r2"
+atomic_op _fetch_and, 32, "and r24, r22, r2"
+atomic_op _fetch_andn, 32, "nor r2, r2, zero; and r24, r22, r2"
+atomic_op _fetch_xor, 32, "xor r24, r22, r2"
 
 atomic_op 64_cmpxchg, 64, "{ seq r26, r22, r2; seq r27, r23, r3 }; \
 	{ bbns r26, 3f; move r24, r4 }; { bbns r27, 3f; move r25, r5 }"
@@ -192,9 +192,9 @@ atomic_op 64_xchg_add_unless, 64, \
 	{ bbns r26, 3f; add r24, r22, r4 }; \
 	{ bbns r27, 3f; add r25, r23, r5 }; \
 	slt_u r26, r24, r22; add r25, r25, r26"
-atomic_op 64_or, 64, "{ or r24, r22, r2; or r25, r23, r3 }"
-atomic_op 64_and, 64, "{ and r24, r22, r2; and r25, r23, r3 }"
-atomic_op 64_xor, 64, "{ xor r24, r22, r2; xor r25, r23, r3 }"
+atomic_op 64_fetch_or, 64, "{ or r24, r22, r2; or r25, r23, r3 }"
+atomic_op 64_fetch_and, 64, "{ and r24, r22, r2; and r25, r23, r3 }"
+atomic_op 64_fetch_xor, 64, "{ xor r24, r22, r2; xor r25, r23, r3 }"
 
 	jrp     lr              /* happy backtracer */
 
-- 
cgit v1.2.3-59-g8ed1b


From b53d6bedbe781974097fd8c38263f6cc78ff9ea7 Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <peterz@infradead.org>
Date: Mon, 18 Apr 2016 00:58:25 +0200
Subject: locking/atomic: Remove linux/atomic.h:atomic_fetch_or()

Since all architectures have this implemented now natively, remove this
dead code.

Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: linux-arch@vger.kernel.org
Cc: linux-kernel@vger.kernel.org
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 arch/alpha/include/asm/atomic.h    |  2 --
 arch/arc/include/asm/atomic.h      |  2 --
 arch/arm/include/asm/atomic.h      |  2 --
 arch/arm64/include/asm/atomic.h    |  2 --
 arch/avr32/include/asm/atomic.h    |  2 --
 arch/frv/include/asm/atomic.h      |  2 --
 arch/h8300/include/asm/atomic.h    |  2 --
 arch/hexagon/include/asm/atomic.h  |  2 --
 arch/m32r/include/asm/atomic.h     |  2 --
 arch/m68k/include/asm/atomic.h     |  2 --
 arch/metag/include/asm/atomic.h    |  2 --
 arch/mips/include/asm/atomic.h     |  2 --
 arch/mn10300/include/asm/atomic.h  |  2 --
 arch/parisc/include/asm/atomic.h   |  2 --
 arch/s390/include/asm/atomic.h     |  2 --
 arch/sh/include/asm/atomic.h       |  2 --
 arch/sparc/include/asm/atomic.h    |  1 -
 arch/sparc/include/asm/atomic_32.h |  2 --
 arch/tile/include/asm/atomic.h     |  2 --
 arch/x86/include/asm/atomic.h      |  2 --
 arch/xtensa/include/asm/atomic.h   |  2 --
 include/asm-generic/atomic.h       |  2 --
 include/linux/atomic.h             | 21 ---------------------
 23 files changed, 64 deletions(-)

(limited to 'arch/tile')

diff --git a/arch/alpha/include/asm/atomic.h b/arch/alpha/include/asm/atomic.h
index 8243f17999e3..5377ca8bb503 100644
--- a/arch/alpha/include/asm/atomic.h
+++ b/arch/alpha/include/asm/atomic.h
@@ -153,8 +153,6 @@ ATOMIC_OPS(sub)
 #define atomic_andnot atomic_andnot
 #define atomic64_andnot atomic64_andnot
 
-#define atomic_fetch_or atomic_fetch_or
-
 #undef ATOMIC_OPS
 #define ATOMIC_OPS(op, asm)						\
 	ATOMIC_OP(op, asm)						\
diff --git a/arch/arc/include/asm/atomic.h b/arch/arc/include/asm/atomic.h
index c066a21caaaf..bd9c51cb2bfd 100644
--- a/arch/arc/include/asm/atomic.h
+++ b/arch/arc/include/asm/atomic.h
@@ -189,8 +189,6 @@ ATOMIC_OPS(sub, -=, sub)
 
 #define atomic_andnot atomic_andnot
 
-#define atomic_fetch_or atomic_fetch_or
-
 #undef ATOMIC_OPS
 #define ATOMIC_OPS(op, c_op, asm_op)					\
 	ATOMIC_OP(op, c_op, asm_op)					\
diff --git a/arch/arm/include/asm/atomic.h b/arch/arm/include/asm/atomic.h
index 0feb110ec542..66d0e215a773 100644
--- a/arch/arm/include/asm/atomic.h
+++ b/arch/arm/include/asm/atomic.h
@@ -201,8 +201,6 @@ static inline int atomic_fetch_##op(int i, atomic_t *v)			\
 	return val;							\
 }
 
-#define atomic_fetch_or atomic_fetch_or
-
 static inline int atomic_cmpxchg(atomic_t *v, int old, int new)
 {
 	int ret;
diff --git a/arch/arm64/include/asm/atomic.h b/arch/arm64/include/asm/atomic.h
index 3128c3d7c1ff..c0235e0ff849 100644
--- a/arch/arm64/include/asm/atomic.h
+++ b/arch/arm64/include/asm/atomic.h
@@ -128,8 +128,6 @@
 #define __atomic_add_unless(v, a, u)	___atomic_add_unless(v, a, u,)
 #define atomic_andnot			atomic_andnot
 
-#define atomic_fetch_or atomic_fetch_or
-
 /*
  * 64-bit atomic operations.
  */
diff --git a/arch/avr32/include/asm/atomic.h b/arch/avr32/include/asm/atomic.h
index b8681fd495ef..3d5ce38a6f0b 100644
--- a/arch/avr32/include/asm/atomic.h
+++ b/arch/avr32/include/asm/atomic.h
@@ -66,8 +66,6 @@ ATOMIC_OP_RETURN(add, add, r)
 ATOMIC_FETCH_OP (sub, sub, rKs21)
 ATOMIC_FETCH_OP (add, add, r)
 
-#define atomic_fetch_or atomic_fetch_or
-
 #define ATOMIC_OPS(op, asm_op)						\
 ATOMIC_OP_RETURN(op, asm_op, r)						\
 static inline void atomic_##op(int i, atomic_t *v)			\
diff --git a/arch/frv/include/asm/atomic.h b/arch/frv/include/asm/atomic.h
index e3e06da0cd59..1c2a5e264fc7 100644
--- a/arch/frv/include/asm/atomic.h
+++ b/arch/frv/include/asm/atomic.h
@@ -74,8 +74,6 @@ static inline void atomic_dec(atomic_t *v)
 #define atomic_dec_and_test(v)		(atomic_sub_return(1, (v)) == 0)
 #define atomic_inc_and_test(v)		(atomic_add_return(1, (v)) == 0)
 
-#define atomic_fetch_or atomic_fetch_or
-
 /*
  * 64-bit atomic ops
  */
diff --git a/arch/h8300/include/asm/atomic.h b/arch/h8300/include/asm/atomic.h
index 0961b618bdde..349a47a918db 100644
--- a/arch/h8300/include/asm/atomic.h
+++ b/arch/h8300/include/asm/atomic.h
@@ -54,8 +54,6 @@ static inline void atomic_##op(int i, atomic_t *v)		\
 ATOMIC_OP_RETURN(add, +=)
 ATOMIC_OP_RETURN(sub, -=)
 
-#define atomic_fetch_or atomic_fetch_or
-
 #define ATOMIC_OPS(op, c_op)					\
 	ATOMIC_OP(op, c_op)					\
 	ATOMIC_FETCH_OP(op, c_op)
diff --git a/arch/hexagon/include/asm/atomic.h b/arch/hexagon/include/asm/atomic.h
index 07dbb3332b4a..a62ba368b27d 100644
--- a/arch/hexagon/include/asm/atomic.h
+++ b/arch/hexagon/include/asm/atomic.h
@@ -152,8 +152,6 @@ ATOMIC_OPS(sub)
 #undef ATOMIC_OPS
 #define ATOMIC_OPS(op) ATOMIC_OP(op) ATOMIC_FETCH_OP(op)
 
-#define atomic_fetch_or atomic_fetch_or
-
 ATOMIC_OPS(and)
 ATOMIC_OPS(or)
 ATOMIC_OPS(xor)
diff --git a/arch/m32r/include/asm/atomic.h b/arch/m32r/include/asm/atomic.h
index 8ba8a0ab5d5d..640cc1c7099f 100644
--- a/arch/m32r/include/asm/atomic.h
+++ b/arch/m32r/include/asm/atomic.h
@@ -121,8 +121,6 @@ ATOMIC_OPS(sub)
 #undef ATOMIC_OPS
 #define ATOMIC_OPS(op) ATOMIC_OP(op) ATOMIC_FETCH_OP(op)
 
-#define atomic_fetch_or atomic_fetch_or
-
 ATOMIC_OPS(and)
 ATOMIC_OPS(or)
 ATOMIC_OPS(xor)
diff --git a/arch/m68k/include/asm/atomic.h b/arch/m68k/include/asm/atomic.h
index 5cf9b3b1b6ac..3e03de7ae33b 100644
--- a/arch/m68k/include/asm/atomic.h
+++ b/arch/m68k/include/asm/atomic.h
@@ -119,8 +119,6 @@ ATOMIC_OPS(sub, -=, sub)
 	ATOMIC_OP(op, c_op, asm_op)					\
 	ATOMIC_FETCH_OP(op, c_op, asm_op)
 
-#define atomic_fetch_or atomic_fetch_or
-
 ATOMIC_OPS(and, &=, and)
 ATOMIC_OPS(or, |=, or)
 ATOMIC_OPS(xor, ^=, eor)
diff --git a/arch/metag/include/asm/atomic.h b/arch/metag/include/asm/atomic.h
index 6ca210de8a7d..470e365f04ea 100644
--- a/arch/metag/include/asm/atomic.h
+++ b/arch/metag/include/asm/atomic.h
@@ -17,8 +17,6 @@
 #include <asm/atomic_lnkget.h>
 #endif
 
-#define atomic_fetch_or atomic_fetch_or
-
 #define atomic_add_negative(a, v)       (atomic_add_return((a), (v)) < 0)
 
 #define atomic_dec_return(v) atomic_sub_return(1, (v))
diff --git a/arch/mips/include/asm/atomic.h b/arch/mips/include/asm/atomic.h
index 431079f8e483..387ce288334e 100644
--- a/arch/mips/include/asm/atomic.h
+++ b/arch/mips/include/asm/atomic.h
@@ -194,8 +194,6 @@ ATOMIC_OPS(sub, -=, subu)
 	ATOMIC_OP(op, c_op, asm_op)					      \
 	ATOMIC_FETCH_OP(op, c_op, asm_op)
 
-#define atomic_fetch_or atomic_fetch_or
-
 ATOMIC_OPS(and, &=, and)
 ATOMIC_OPS(or, |=, or)
 ATOMIC_OPS(xor, ^=, xor)
diff --git a/arch/mn10300/include/asm/atomic.h b/arch/mn10300/include/asm/atomic.h
index 3580f789f3a6..36389efd45e8 100644
--- a/arch/mn10300/include/asm/atomic.h
+++ b/arch/mn10300/include/asm/atomic.h
@@ -113,8 +113,6 @@ ATOMIC_OPS(sub)
 #undef ATOMIC_OPS
 #define ATOMIC_OPS(op) ATOMIC_OP(op) ATOMIC_FETCH_OP(op)
 
-#define atomic_fetch_or atomic_fetch_or
-
 ATOMIC_OPS(and)
 ATOMIC_OPS(or)
 ATOMIC_OPS(xor)
diff --git a/arch/parisc/include/asm/atomic.h b/arch/parisc/include/asm/atomic.h
index 29df1f871910..5394b9c5f914 100644
--- a/arch/parisc/include/asm/atomic.h
+++ b/arch/parisc/include/asm/atomic.h
@@ -148,8 +148,6 @@ ATOMIC_OPS(sub, -=)
 	ATOMIC_OP(op, c_op)						\
 	ATOMIC_FETCH_OP(op, c_op)
 
-#define atomic_fetch_or atomic_fetch_or
-
 ATOMIC_OPS(and, &=)
 ATOMIC_OPS(or, |=)
 ATOMIC_OPS(xor, ^=)
diff --git a/arch/s390/include/asm/atomic.h b/arch/s390/include/asm/atomic.h
index 2324e759b544..d28cc2f5b7b2 100644
--- a/arch/s390/include/asm/atomic.h
+++ b/arch/s390/include/asm/atomic.h
@@ -135,8 +135,6 @@ static inline int atomic_fetch_##op(int i, atomic_t *v)			\
 	return __ATOMIC_LOOP(v, i, __ATOMIC_##OP, __ATOMIC_BARRIER);	\
 }
 
-#define atomic_fetch_or atomic_fetch_or
-
 ATOMIC_OPS(and, AND)
 ATOMIC_OPS(or, OR)
 ATOMIC_OPS(xor, XOR)
diff --git a/arch/sh/include/asm/atomic.h b/arch/sh/include/asm/atomic.h
index d93ed7ce1b2f..c399e1c55685 100644
--- a/arch/sh/include/asm/atomic.h
+++ b/arch/sh/include/asm/atomic.h
@@ -25,8 +25,6 @@
 #include <asm/atomic-irq.h>
 #endif
 
-#define atomic_fetch_or atomic_fetch_or
-
 #define atomic_add_negative(a, v)	(atomic_add_return((a), (v)) < 0)
 #define atomic_dec_return(v)		atomic_sub_return(1, (v))
 #define atomic_inc_return(v)		atomic_add_return(1, (v))
diff --git a/arch/sparc/include/asm/atomic.h b/arch/sparc/include/asm/atomic.h
index 1f741bcc73b7..8ff83d8cc33f 100644
--- a/arch/sparc/include/asm/atomic.h
+++ b/arch/sparc/include/asm/atomic.h
@@ -5,5 +5,4 @@
 #else
 #include <asm/atomic_32.h>
 #endif
-#define atomic_fetch_or atomic_fetch_or
 #endif
diff --git a/arch/sparc/include/asm/atomic_32.h b/arch/sparc/include/asm/atomic_32.h
index 5cfb20a599d9..ee3f11c43cda 100644
--- a/arch/sparc/include/asm/atomic_32.h
+++ b/arch/sparc/include/asm/atomic_32.h
@@ -36,8 +36,6 @@ void atomic_set(atomic_t *, int);
 #define atomic_inc(v)		((void)atomic_add_return(        1, (v)))
 #define atomic_dec(v)		((void)atomic_add_return(       -1, (v)))
 
-#define atomic_fetch_or	atomic_fetch_or
-
 #define atomic_and(i, v)	((void)atomic_fetch_and((i), (v)))
 #define atomic_or(i, v)		((void)atomic_fetch_or((i), (v)))
 #define atomic_xor(i, v)	((void)atomic_fetch_xor((i), (v)))
diff --git a/arch/tile/include/asm/atomic.h b/arch/tile/include/asm/atomic.h
index 9807030557c4..8dda3c8ff5ab 100644
--- a/arch/tile/include/asm/atomic.h
+++ b/arch/tile/include/asm/atomic.h
@@ -48,8 +48,6 @@ static inline int atomic_read(const atomic_t *v)
 
 #define atomic_fetch_sub(i, v)		atomic_fetch_add(-(int)(i), (v))
 
-#define atomic_fetch_or atomic_fetch_or
-
 /**
  * atomic_sub - subtract integer from atomic variable
  * @i: integer value to subtract
diff --git a/arch/x86/include/asm/atomic.h b/arch/x86/include/asm/atomic.h
index 73b8463b89e9..a58b99811105 100644
--- a/arch/x86/include/asm/atomic.h
+++ b/arch/x86/include/asm/atomic.h
@@ -217,8 +217,6 @@ static inline int atomic_fetch_##op(int i, atomic_t *v)		\
 	ATOMIC_OP(op)							\
 	ATOMIC_FETCH_OP(op, c_op)
 
-#define atomic_fetch_or atomic_fetch_or
-
 ATOMIC_OPS(and, &)
 ATOMIC_OPS(or , |)
 ATOMIC_OPS(xor, ^)
diff --git a/arch/xtensa/include/asm/atomic.h b/arch/xtensa/include/asm/atomic.h
index d95a8aa1a6d3..e7a23f2a519a 100644
--- a/arch/xtensa/include/asm/atomic.h
+++ b/arch/xtensa/include/asm/atomic.h
@@ -188,8 +188,6 @@ ATOMIC_OPS(sub)
 #undef ATOMIC_OPS
 #define ATOMIC_OPS(op) ATOMIC_OP(op) ATOMIC_FETCH_OP(op)
 
-#define atomic_fetch_or atomic_fetch_or
-
 ATOMIC_OPS(and)
 ATOMIC_OPS(or)
 ATOMIC_OPS(xor)
diff --git a/include/asm-generic/atomic.h b/include/asm-generic/atomic.h
index a2304ccf4ed0..9ed8b987185b 100644
--- a/include/asm-generic/atomic.h
+++ b/include/asm-generic/atomic.h
@@ -137,8 +137,6 @@ ATOMIC_FETCH_OP(and, &)
 #endif
 
 #ifndef atomic_fetch_or
-#define atomic_fetch_or atomic_fetch_or
-
 ATOMIC_FETCH_OP(or, |)
 #endif
 
diff --git a/include/linux/atomic.h b/include/linux/atomic.h
index 2e6c013ac5a4..0b3802d33125 100644
--- a/include/linux/atomic.h
+++ b/include/linux/atomic.h
@@ -573,27 +573,6 @@ static inline int atomic_dec_if_positive(atomic_t *v)
 }
 #endif
 
-/**
- * atomic_fetch_or - perform *p |= mask and return old value of *p
- * @mask: mask to OR on the atomic_t
- * @p: pointer to atomic_t
- */
-#ifndef atomic_fetch_or
-static inline int atomic_fetch_or(int mask, atomic_t *p)
-{
-	int old, val = atomic_read(p);
-
-	for (;;) {
-		old = atomic_cmpxchg(p, val, val | mask);
-		if (old == val)
-			break;
-		val = old;
-	}
-
-	return old;
-}
-#endif
-
 #ifdef CONFIG_GENERIC_ATOMIC64
 #include <asm-generic/atomic64.h>
 #endif
-- 
cgit v1.2.3-59-g8ed1b


From b7271b9f3e18181559b96a610f4e42bdb04b07f5 Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <peterz@infradead.org>
Date: Wed, 22 Jun 2016 11:16:49 +0200
Subject: locking/atomic, arch/tile: Fix tilepro build

The tilepro change wasn't ever compiled it seems (the 0day built bot
also doesn't have a toolchain for it).

Make it work.

The thing that makes the patch bigger than desired is namespace
collision with the C11 __atomic builtin functions. So rename the
tilepro functions to __atomic32.

Reported-by: Sudip Mukherjee <sudipm.mukherjee@gmail.com>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Acked-by: Chris Metcalf <cmetcalf@mellanox.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Stephen Rothwell <sfr@canb.auug.org.au>
Cc: Thomas Gleixner <tglx@linutronix.de>
Fixes: 1af5de9af138 ("locking/atomic, arch/tile: Implement atomic{,64}_fetch_{add,sub,and,or,xor}()")
Link: http://lkml.kernel.org/r/20160622091649.GB30154@twins.programming.kicks-ass.net
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 arch/tile/include/asm/atomic_32.h | 24 ++++++++++++------------
 arch/tile/include/asm/futex.h     | 14 +++++++-------
 arch/tile/lib/atomic_32.c         | 16 ++++++++--------
 arch/tile/lib/atomic_asm_32.S     | 21 +++++++++++++--------
 4 files changed, 40 insertions(+), 35 deletions(-)

(limited to 'arch/tile')

diff --git a/arch/tile/include/asm/atomic_32.h b/arch/tile/include/asm/atomic_32.h
index da8eb4ed3752..a93774255136 100644
--- a/arch/tile/include/asm/atomic_32.h
+++ b/arch/tile/include/asm/atomic_32.h
@@ -143,15 +143,15 @@ static inline void atomic64_##op(long long i, atomic64_t *v)	\
 {								\
 	_atomic64_fetch_##op(&v->counter, i);			\
 }								\
-static inline void atomic64_##op(long long i, atomic64_t *v)	\
+static inline long long atomic64_fetch_##op(long long i, atomic64_t *v)	\
 {								\
 	smp_mb();						\
 	return _atomic64_fetch_##op(&v->counter, i);		\
 }
 
-ATOMIC64_OP(and)
-ATOMIC64_OP(or)
-ATOMIC64_OP(xor)
+ATOMIC64_OPS(and)
+ATOMIC64_OPS(or)
+ATOMIC64_OPS(xor)
 
 #undef ATOMIC64_OPS
 
@@ -266,16 +266,16 @@ struct __get_user {
 	unsigned long val;
 	int err;
 };
-extern struct __get_user __atomic_cmpxchg(volatile int *p,
+extern struct __get_user __atomic32_cmpxchg(volatile int *p,
 					  int *lock, int o, int n);
-extern struct __get_user __atomic_xchg(volatile int *p, int *lock, int n);
-extern struct __get_user __atomic_xchg_add(volatile int *p, int *lock, int n);
-extern struct __get_user __atomic_xchg_add_unless(volatile int *p,
+extern struct __get_user __atomic32_xchg(volatile int *p, int *lock, int n);
+extern struct __get_user __atomic32_xchg_add(volatile int *p, int *lock, int n);
+extern struct __get_user __atomic32_xchg_add_unless(volatile int *p,
 						  int *lock, int o, int n);
-extern struct __get_user __atomic_fetch_or(volatile int *p, int *lock, int n);
-extern struct __get_user __atomic_fetch_and(volatile int *p, int *lock, int n);
-extern struct __get_user __atomic_fetch_andn(volatile int *p, int *lock, int n);
-extern struct __get_user __atomic_fetch_xor(volatile int *p, int *lock, int n);
+extern struct __get_user __atomic32_fetch_or(volatile int *p, int *lock, int n);
+extern struct __get_user __atomic32_fetch_and(volatile int *p, int *lock, int n);
+extern struct __get_user __atomic32_fetch_andn(volatile int *p, int *lock, int n);
+extern struct __get_user __atomic32_fetch_xor(volatile int *p, int *lock, int n);
 extern long long __atomic64_cmpxchg(volatile long long *p, int *lock,
 					long long o, long long n);
 extern long long __atomic64_xchg(volatile long long *p, int *lock, long long n);
diff --git a/arch/tile/include/asm/futex.h b/arch/tile/include/asm/futex.h
index 1a6ef1b69cb1..e64a1b75fc38 100644
--- a/arch/tile/include/asm/futex.h
+++ b/arch/tile/include/asm/futex.h
@@ -80,16 +80,16 @@
 		ret = gu.err;						\
 	}
 
-#define __futex_set() __futex_call(__atomic_xchg)
-#define __futex_add() __futex_call(__atomic_xchg_add)
-#define __futex_or() __futex_call(__atomic_or)
-#define __futex_andn() __futex_call(__atomic_andn)
-#define __futex_xor() __futex_call(__atomic_xor)
+#define __futex_set() __futex_call(__atomic32_xchg)
+#define __futex_add() __futex_call(__atomic32_xchg_add)
+#define __futex_or() __futex_call(__atomic32_fetch_or)
+#define __futex_andn() __futex_call(__atomic32_fetch_andn)
+#define __futex_xor() __futex_call(__atomic32_fetch_xor)
 
 #define __futex_cmpxchg()						\
 	{								\
-		struct __get_user gu = __atomic_cmpxchg((u32 __force *)uaddr, \
-							lock, oldval, oparg); \
+		struct __get_user gu = __atomic32_cmpxchg((u32 __force *)uaddr, \
+							  lock, oldval, oparg); \
 		val = gu.val;						\
 		ret = gu.err;						\
 	}
diff --git a/arch/tile/lib/atomic_32.c b/arch/tile/lib/atomic_32.c
index 5b6bd932c9c7..f8128800dbf5 100644
--- a/arch/tile/lib/atomic_32.c
+++ b/arch/tile/lib/atomic_32.c
@@ -61,13 +61,13 @@ static inline int *__atomic_setup(volatile void *v)
 
 int _atomic_xchg(int *v, int n)
 {
-	return __atomic_xchg(v, __atomic_setup(v), n).val;
+	return __atomic32_xchg(v, __atomic_setup(v), n).val;
 }
 EXPORT_SYMBOL(_atomic_xchg);
 
 int _atomic_xchg_add(int *v, int i)
 {
-	return __atomic_xchg_add(v, __atomic_setup(v), i).val;
+	return __atomic32_xchg_add(v, __atomic_setup(v), i).val;
 }
 EXPORT_SYMBOL(_atomic_xchg_add);
 
@@ -78,37 +78,37 @@ int _atomic_xchg_add_unless(int *v, int a, int u)
 	 * to use the first argument consistently as the "old value"
 	 * in the assembly, as is done for _atomic_cmpxchg().
 	 */
-	return __atomic_xchg_add_unless(v, __atomic_setup(v), u, a).val;
+	return __atomic32_xchg_add_unless(v, __atomic_setup(v), u, a).val;
 }
 EXPORT_SYMBOL(_atomic_xchg_add_unless);
 
 int _atomic_cmpxchg(int *v, int o, int n)
 {
-	return __atomic_cmpxchg(v, __atomic_setup(v), o, n).val;
+	return __atomic32_cmpxchg(v, __atomic_setup(v), o, n).val;
 }
 EXPORT_SYMBOL(_atomic_cmpxchg);
 
 unsigned long _atomic_fetch_or(volatile unsigned long *p, unsigned long mask)
 {
-	return __atomic_fetch_or((int *)p, __atomic_setup(p), mask).val;
+	return __atomic32_fetch_or((int *)p, __atomic_setup(p), mask).val;
 }
 EXPORT_SYMBOL(_atomic_fetch_or);
 
 unsigned long _atomic_fetch_and(volatile unsigned long *p, unsigned long mask)
 {
-	return __atomic_fetch_and((int *)p, __atomic_setup(p), mask).val;
+	return __atomic32_fetch_and((int *)p, __atomic_setup(p), mask).val;
 }
 EXPORT_SYMBOL(_atomic_fetch_and);
 
 unsigned long _atomic_fetch_andn(volatile unsigned long *p, unsigned long mask)
 {
-	return __atomic_fetch_andn((int *)p, __atomic_setup(p), mask).val;
+	return __atomic32_fetch_andn((int *)p, __atomic_setup(p), mask).val;
 }
 EXPORT_SYMBOL(_atomic_fetch_andn);
 
 unsigned long _atomic_fetch_xor(volatile unsigned long *p, unsigned long mask)
 {
-	return __atomic_fetch_xor((int *)p, __atomic_setup(p), mask).val;
+	return __atomic32_fetch_xor((int *)p, __atomic_setup(p), mask).val;
 }
 EXPORT_SYMBOL(_atomic_fetch_xor);
 
diff --git a/arch/tile/lib/atomic_asm_32.S b/arch/tile/lib/atomic_asm_32.S
index 507abdd2bf9a..1a70e6c0f259 100644
--- a/arch/tile/lib/atomic_asm_32.S
+++ b/arch/tile/lib/atomic_asm_32.S
@@ -172,15 +172,20 @@ STD_ENTRY_SECTION(__atomic\name, .text.atomic)
 	.endif
 	.endm
 
-atomic_op _cmpxchg, 32, "seq r26, r22, r2; { bbns r26, 3f; move r24, r3 }"
-atomic_op _xchg, 32, "move r24, r2"
-atomic_op _xchg_add, 32, "add r24, r22, r2"
-atomic_op _xchg_add_unless, 32, \
+
+/*
+ * Use __atomic32 prefix to avoid collisions with GCC builtin __atomic functions.
+ */
+
+atomic_op 32_cmpxchg, 32, "seq r26, r22, r2; { bbns r26, 3f; move r24, r3 }"
+atomic_op 32_xchg, 32, "move r24, r2"
+atomic_op 32_xchg_add, 32, "add r24, r22, r2"
+atomic_op 32_xchg_add_unless, 32, \
 	"sne r26, r22, r2; { bbns r26, 3f; add r24, r22, r3 }"
-atomic_op _fetch_or, 32, "or r24, r22, r2"
-atomic_op _fetch_and, 32, "and r24, r22, r2"
-atomic_op _fetch_andn, 32, "nor r2, r2, zero; and r24, r22, r2"
-atomic_op _fetch_xor, 32, "xor r24, r22, r2"
+atomic_op 32_fetch_or, 32, "or r24, r22, r2"
+atomic_op 32_fetch_and, 32, "and r24, r22, r2"
+atomic_op 32_fetch_andn, 32, "nor r2, r2, zero; and r24, r22, r2"
+atomic_op 32_fetch_xor, 32, "xor r24, r22, r2"
 
 atomic_op 64_cmpxchg, 64, "{ seq r26, r22, r2; seq r27, r23, r3 }; \
 	{ bbns r26, 3f; move r24, r4 }; { bbns r27, 3f; move r25, r5 }"
-- 
cgit v1.2.3-59-g8ed1b