RISC-V Atomic Cleanups

This patch set is the result of some feedback that filtered through after our original patch set was reviewed, some of which was the result of me missing some email. It contains: * A new READ_ONCE in arch_spin_is_locked() * __test_and_op_bit_ord() is now actually ordered * Improvements to various comments * Removal of some dead code
author: Palmer Dabbelt <palmer@sifive.com> 2017-12-01 13:10:42 -0800
committer: Palmer Dabbelt <palmer@sifive.com> 2017-12-01 13:10:42 -0800
commit: f8182f613c9887744eb469b1a3352636481cb395 (patch)
tree: bb7872d1a46c98aa3fe9cd7c27f24bde1a4d1f90
parent: Linux 4.15-rc1 (diff)
parent: RISC-V: remove spin_unlock_wait() (diff)
download: linux-dev-f8182f613c9887744eb469b1a3352636481cb395.tar.xz
linux-dev-f8182f613c9887744eb469b1a3352636481cb395.zip
5 files changed, 60 insertions, 84 deletions
diff --git a/arch/riscv/include/asm/atomic.h b/arch/riscv/include/asm/atomic.h
index e2e37c57cbeb..e65d1cd89e28 100644
--- a/arch/riscv/include/asm/atomic.h
+++ b/arch/riscv/include/asm/atomic.h
@@ -50,30 +50,30 @@ static __always_inline void atomic64_set(atomic64_t *v, long i)
  * have the AQ or RL bits set.  These don't return anything, so there's only
  * one version to worry about.
  */
-#define ATOMIC_OP(op, asm_op, c_op, I, asm_type, c_type, prefix)				\
-static __always_inline void atomic##prefix##_##op(c_type i, atomic##prefix##_t *v)		\
-{												\
-	__asm__ __volatile__ (									\
-		"amo" #asm_op "." #asm_type " zero, %1, %0"					\
-		: "+A" (v->counter)								\
-		: "r" (I)									\
-		: "memory");									\
+#define ATOMIC_OP(op, asm_op, I, asm_type, c_type, prefix)				\
+static __always_inline void atomic##prefix##_##op(c_type i, atomic##prefix##_t *v)	\
+{											\
+	__asm__ __volatile__ (								\
+		"amo" #asm_op "." #asm_type " zero, %1, %0"				\
+		: "+A" (v->counter)							\
+		: "r" (I)								\
+		: "memory");								\
 }
 
 #ifdef CONFIG_GENERIC_ATOMIC64
-#define ATOMIC_OPS(op, asm_op, c_op, I)			\
-        ATOMIC_OP (op, asm_op, c_op, I, w,  int,   )
+#define ATOMIC_OPS(op, asm_op, I)			\
+        ATOMIC_OP (op, asm_op, I, w,  int,   )
 #else
-#define ATOMIC_OPS(op, asm_op, c_op, I)			\
-        ATOMIC_OP (op, asm_op, c_op, I, w,  int,   )	\
-        ATOMIC_OP (op, asm_op, c_op, I, d, long, 64)
+#define ATOMIC_OPS(op, asm_op, I)			\
+        ATOMIC_OP (op, asm_op, I, w,  int,   )	\
+        ATOMIC_OP (op, asm_op, I, d, long, 64)
 #endif
 
-ATOMIC_OPS(add, add, +,  i)
-ATOMIC_OPS(sub, add, +, -i)
-ATOMIC_OPS(and, and, &,  i)
-ATOMIC_OPS( or,  or, |,  i)
-ATOMIC_OPS(xor, xor, ^,  i)
+ATOMIC_OPS(add, add,  i)
+ATOMIC_OPS(sub, add, -i)
+ATOMIC_OPS(and, and,  i)
+ATOMIC_OPS( or,  or,  i)
+ATOMIC_OPS(xor, xor,  i)
 
 #undef ATOMIC_OP
 #undef ATOMIC_OPS
@@ -83,7 +83,7 @@ ATOMIC_OPS(xor, xor, ^,  i)
  * There's two flavors of these: the arithmatic ops have both fetch and return
  * versions, while the logical ops only have fetch versions.
  */
-#define ATOMIC_FETCH_OP(op, asm_op, c_op, I, asm_or, c_or, asm_type, c_type, prefix)			\
+#define ATOMIC_FETCH_OP(op, asm_op, I, asm_or, c_or, asm_type, c_type, prefix)				\
 static __always_inline c_type atomic##prefix##_fetch_##op##c_or(c_type i, atomic##prefix##_t *v)	\
 {													\
 	register c_type ret;										\
@@ -103,13 +103,13 @@ static __always_inline c_type atomic##prefix##_##op##_return##c_or(c_type i, ato
 
 #ifdef CONFIG_GENERIC_ATOMIC64
 #define ATOMIC_OPS(op, asm_op, c_op, I, asm_or, c_or)				\
-        ATOMIC_FETCH_OP (op, asm_op, c_op, I, asm_or, c_or, w,  int,   )	\
+        ATOMIC_FETCH_OP (op, asm_op,       I, asm_or, c_or, w,  int,   )	\
         ATOMIC_OP_RETURN(op, asm_op, c_op, I, asm_or, c_or, w,  int,   )
 #else
 #define ATOMIC_OPS(op, asm_op, c_op, I, asm_or, c_or)				\
-        ATOMIC_FETCH_OP (op, asm_op, c_op, I, asm_or, c_or, w,  int,   )	\
+        ATOMIC_FETCH_OP (op, asm_op,       I, asm_or, c_or, w,  int,   )	\
         ATOMIC_OP_RETURN(op, asm_op, c_op, I, asm_or, c_or, w,  int,   )	\
-        ATOMIC_FETCH_OP (op, asm_op, c_op, I, asm_or, c_or, d, long, 64)	\
+        ATOMIC_FETCH_OP (op, asm_op,       I, asm_or, c_or, d, long, 64)	\
         ATOMIC_OP_RETURN(op, asm_op, c_op, I, asm_or, c_or, d, long, 64)
 #endif
 
@@ -126,28 +126,28 @@ ATOMIC_OPS(sub, add, +, -i, .aqrl,         )
 #undef ATOMIC_OPS
 
 #ifdef CONFIG_GENERIC_ATOMIC64
-#define ATOMIC_OPS(op, asm_op, c_op, I, asm_or, c_or)				\
-        ATOMIC_FETCH_OP(op, asm_op, c_op, I, asm_or, c_or, w,  int,   )
+#define ATOMIC_OPS(op, asm_op, I, asm_or, c_or)				\
+        ATOMIC_FETCH_OP(op, asm_op, I, asm_or, c_or, w,  int,   )
 #else
-#define ATOMIC_OPS(op, asm_op, c_op, I, asm_or, c_or)				\
-        ATOMIC_FETCH_OP(op, asm_op, c_op, I, asm_or, c_or, w,  int,   )		\
-        ATOMIC_FETCH_OP(op, asm_op, c_op, I, asm_or, c_or, d, long, 64)
+#define ATOMIC_OPS(op, asm_op, I, asm_or, c_or)				\
+        ATOMIC_FETCH_OP(op, asm_op, I, asm_or, c_or, w,  int,   )	\
+        ATOMIC_FETCH_OP(op, asm_op, I, asm_or, c_or, d, long, 64)
 #endif
 
-ATOMIC_OPS(and, and, &,  i,      , _relaxed)
-ATOMIC_OPS(and, and, &,  i, .aq  , _acquire)
-ATOMIC_OPS(and, and, &,  i, .rl  , _release)
-ATOMIC_OPS(and, and, &,  i, .aqrl,         )
+ATOMIC_OPS(and, and, i,      , _relaxed)
+ATOMIC_OPS(and, and, i, .aq  , _acquire)
+ATOMIC_OPS(and, and, i, .rl  , _release)
+ATOMIC_OPS(and, and, i, .aqrl,         )
 
-ATOMIC_OPS( or,  or, |,  i,      , _relaxed)
-ATOMIC_OPS( or,  or, |,  i, .aq  , _acquire)
-ATOMIC_OPS( or,  or, |,  i, .rl  , _release)
-ATOMIC_OPS( or,  or, |,  i, .aqrl,         )
+ATOMIC_OPS( or,  or, i,      , _relaxed)
+ATOMIC_OPS( or,  or, i, .aq  , _acquire)
+ATOMIC_OPS( or,  or, i, .rl  , _release)
+ATOMIC_OPS( or,  or, i, .aqrl,         )
 
-ATOMIC_OPS(xor, xor, ^,  i,      , _relaxed)
-ATOMIC_OPS(xor, xor, ^,  i, .aq  , _acquire)
-ATOMIC_OPS(xor, xor, ^,  i, .rl  , _release)
-ATOMIC_OPS(xor, xor, ^,  i, .aqrl,         )
+ATOMIC_OPS(xor, xor, i,      , _relaxed)
+ATOMIC_OPS(xor, xor, i, .aq  , _acquire)
+ATOMIC_OPS(xor, xor, i, .rl  , _release)
+ATOMIC_OPS(xor, xor, i, .aqrl,         )
 
 #undef ATOMIC_OPS
 
@@ -182,13 +182,13 @@ ATOMIC_OPS(add_negative, add,  <, 0)
 #undef ATOMIC_OP
 #undef ATOMIC_OPS
 
-#define ATOMIC_OP(op, func_op, c_op, I, c_type, prefix)				\
+#define ATOMIC_OP(op, func_op, I, c_type, prefix)				\
 static __always_inline void atomic##prefix##_##op(atomic##prefix##_t *v)	\
 {										\
 	atomic##prefix##_##func_op(I, v);					\
 }
 
-#define ATOMIC_FETCH_OP(op, func_op, c_op, I, c_type, prefix)				\
+#define ATOMIC_FETCH_OP(op, func_op, I, c_type, prefix)					\
 static __always_inline c_type atomic##prefix##_fetch_##op(atomic##prefix##_t *v)	\
 {											\
 	return atomic##prefix##_fetch_##func_op(I, v);					\
@@ -202,16 +202,16 @@ static __always_inline c_type atomic##prefix##_##op##_return(atomic##prefix##_t
 
 #ifdef CONFIG_GENERIC_ATOMIC64
 #define ATOMIC_OPS(op, asm_op, c_op, I)						\
-        ATOMIC_OP       (op, asm_op, c_op, I,  int,   )				\
-        ATOMIC_FETCH_OP (op, asm_op, c_op, I,  int,   )				\
+        ATOMIC_OP       (op, asm_op,       I,  int,   )				\
+        ATOMIC_FETCH_OP (op, asm_op,       I,  int,   )				\
         ATOMIC_OP_RETURN(op, asm_op, c_op, I,  int,   )
 #else
 #define ATOMIC_OPS(op, asm_op, c_op, I)						\
-        ATOMIC_OP       (op, asm_op, c_op, I,  int,   )				\
-        ATOMIC_FETCH_OP (op, asm_op, c_op, I,  int,   )				\
+        ATOMIC_OP       (op, asm_op,       I,  int,   )				\
+        ATOMIC_FETCH_OP (op, asm_op,       I,  int,   )				\
         ATOMIC_OP_RETURN(op, asm_op, c_op, I,  int,   )				\
-        ATOMIC_OP       (op, asm_op, c_op, I, long, 64)				\
-        ATOMIC_FETCH_OP (op, asm_op, c_op, I, long, 64)				\
+        ATOMIC_OP       (op, asm_op,       I, long, 64)				\
+        ATOMIC_FETCH_OP (op, asm_op,       I, long, 64)				\
         ATOMIC_OP_RETURN(op, asm_op, c_op, I, long, 64)
 #endif
 
@@ -300,8 +300,13 @@ static __always_inline long atomic64_inc_not_zero(atomic64_t *v)
 
 /*
  * atomic_{cmp,}xchg is required to have exactly the same ordering semantics as
- * {cmp,}xchg and the operations that return, so they need a barrier.  We just
- * use the other implementations directly.
+ * {cmp,}xchg and the operations that return, so they need a barrier.
+ */
+/*
+ * FIXME: atomic_cmpxchg_{acquire,release,relaxed} are all implemented by
+ * assigning the same barrier to both the LR and SC operations, but that might
+ * not make any sense.  We're waiting on a memory model specification to
+ * determine exactly what the right thing to do is here.
  */
 #define ATOMIC_OP(c_t, prefix, c_or, size, asm_or)						\
 static __always_inline c_t atomic##prefix##_cmpxchg##c_or(atomic##prefix##_t *v, c_t o, c_t n) 	\
diff --git a/arch/riscv/include/asm/barrier.h b/arch/riscv/include/asm/barrier.h
index 183534b7c39b..773c4e039cd7 100644
--- a/arch/riscv/include/asm/barrier.h
+++ b/arch/riscv/include/asm/barrier.h
@@ -38,29 +38,6 @@
 #define smp_rmb()	RISCV_FENCE(r,r)
 #define smp_wmb()	RISCV_FENCE(w,w)
 
-/*
- * These fences exist to enforce ordering around the relaxed AMOs.  The
- * documentation defines that
- * "
- *     atomic_fetch_add();
- *   is equivalent to:
- *     smp_mb__before_atomic();
- *     atomic_fetch_add_relaxed();
- *     smp_mb__after_atomic();
- * "
- * So we emit full fences on both sides.
- */
-#define __smb_mb__before_atomic()	smp_mb()
-#define __smb_mb__after_atomic()	smp_mb()
-
-/*
- * These barriers prevent accesses performed outside a spinlock from being moved
- * inside a spinlock.  Since RISC-V sets the aq/rl bits on our spinlock only
- * enforce release consistency, we need full fences here.
- */
-#define smb_mb__before_spinlock()	smp_mb()
-#define smb_mb__after_spinlock()	smp_mb()
-
 #include <asm-generic/barrier.h>
 
 #endif /* __ASSEMBLY__ */
diff --git a/arch/riscv/include/asm/bitops.h b/arch/riscv/include/asm/bitops.h
index 7c281ef1d583..f30daf26f08f 100644
--- a/arch/riscv/include/asm/bitops.h
+++ b/arch/riscv/include/asm/bitops.h
@@ -67,7 +67,7 @@
 		: "memory");
 
 #define __test_and_op_bit(op, mod, nr, addr) 			\
-	__test_and_op_bit_ord(op, mod, nr, addr, )
+	__test_and_op_bit_ord(op, mod, nr, addr, .aqrl)
 #define __op_bit(op, mod, nr, addr)				\
 	__op_bit_ord(op, mod, nr, addr, )
 
diff --git a/arch/riscv/include/asm/spinlock.h b/arch/riscv/include/asm/spinlock.h
index 04c71d938afd..2fd27e8ef1fd 100644
--- a/arch/riscv/include/asm/spinlock.h
+++ b/arch/riscv/include/asm/spinlock.h
@@ -24,7 +24,7 @@
 
 /* FIXME: Replace this with a ticket lock, like MIPS. */
 
-#define arch_spin_is_locked(x)	((x)->lock != 0)
+#define arch_spin_is_locked(x)	(READ_ONCE((x)->lock) != 0)
 
 static inline void arch_spin_unlock(arch_spinlock_t *lock)
 {
@@ -58,15 +58,6 @@ static inline void arch_spin_lock(arch_spinlock_t *lock)
 	}
 }
 
-static inline void arch_spin_unlock_wait(arch_spinlock_t *lock)
-{
-	smp_rmb();
-	do {
-		cpu_relax();
-	} while (arch_spin_is_locked(lock));
-	smp_acquire__after_ctrl_dep();
-}
-
 /***********************************************************/
 
 static inline void arch_read_lock(arch_rwlock_t *lock)
diff --git a/arch/riscv/include/asm/tlbflush.h b/arch/riscv/include/asm/tlbflush.h
index 5ee4ae370b5e..c79fab3d377d 100644
--- a/arch/riscv/include/asm/tlbflush.h
+++ b/arch/riscv/include/asm/tlbflush.h
@@ -17,7 +17,10 @@
 
 #ifdef CONFIG_MMU
 
-/* Flush entire local TLB */
+/*
+ * Flush entire local TLB.  'sfence.vma' implicitly fences with the instruction
+ * cache as well, so a 'fence.i' is not necessary.
+ */
 static inline void local_flush_tlb_all(void)
 {
 	__asm__ __volatile__ ("sfence.vma" : : : "memory");
author	Palmer Dabbelt <palmer@sifive.com>	2017-12-01 13:10:42 -0800
committer	Palmer Dabbelt <palmer@sifive.com>	2017-12-01 13:10:42 -0800
commit	f8182f613c9887744eb469b1a3352636481cb395 (patch)
tree	bb7872d1a46c98aa3fe9cd7c27f24bde1a4d1f90
parent	Linux 4.15-rc1 (diff)
parent	RISC-V: remove spin_unlock_wait() (diff)
download	linux-dev-f8182f613c9887744eb469b1a3352636481cb395.tar.xz linux-dev-f8182f613c9887744eb469b1a3352636481cb395.zip