From 8a449718414ff10b9d5559ed3e8e09c7178774f2 Mon Sep 17 00:00:00 2001 From: Alexander Duyck Date: Thu, 11 Dec 2014 15:01:55 -0800 Subject: arch: Cleanup read_barrier_depends() and comments This patch is meant to cleanup the handling of read_barrier_depends and smp_read_barrier_depends. In multiple spots in the kernel headers read_barrier_depends is defined as "do {} while (0)", however we then go into the SMP vs non-SMP sections and have the SMP version reference read_barrier_depends, and the non-SMP define it as yet another empty do/while. With this commit I went through and cleaned out the duplicate definitions and reduced the number of definitions down to 2 per header. In addition I moved the 50 line comments for the macro from the x86 and mips headers that defined it as an empty do/while to those that were actually defining the macro, alpha and blackfin. Signed-off-by: Alexander Duyck Signed-off-by: David S. Miller --- arch/x86/include/asm/barrier.h | 59 +++--------------------------------------- arch/x86/um/asm/barrier.h | 7 +++-- 2 files changed, 6 insertions(+), 60 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/include/asm/barrier.h b/arch/x86/include/asm/barrier.h index 0f4460b5636d..5238000285c1 100644 --- a/arch/x86/include/asm/barrier.h +++ b/arch/x86/include/asm/barrier.h @@ -24,60 +24,6 @@ #define wmb() asm volatile("sfence" ::: "memory") #endif -/** - * read_barrier_depends - Flush all pending reads that subsequents reads - * depend on. - * - * No data-dependent reads from memory-like regions are ever reordered - * over this barrier. All reads preceding this primitive are guaranteed - * to access memory (but not necessarily other CPUs' caches) before any - * reads following this primitive that depend on the data return by - * any of the preceding reads. This primitive is much lighter weight than - * rmb() on most CPUs, and is never heavier weight than is - * rmb(). - * - * These ordering constraints are respected by both the local CPU - * and the compiler. - * - * Ordering is not guaranteed by anything other than these primitives, - * not even by data dependencies. See the documentation for - * memory_barrier() for examples and URLs to more information. - * - * For example, the following code would force ordering (the initial - * value of "a" is zero, "b" is one, and "p" is "&a"): - * - * - * CPU 0 CPU 1 - * - * b = 2; - * memory_barrier(); - * p = &b; q = p; - * read_barrier_depends(); - * d = *q; - * - * - * because the read of "*q" depends on the read of "p" and these - * two reads are separated by a read_barrier_depends(). However, - * the following code, with the same initial values for "a" and "b": - * - * - * CPU 0 CPU 1 - * - * a = 2; - * memory_barrier(); - * b = 3; y = b; - * read_barrier_depends(); - * x = a; - * - * - * does not enforce ordering, since there is no data dependency between - * the read of "a" and the read of "b". Therefore, on some CPUs, such - * as Alpha, "y" could be set to 3 and "x" to 0. Use rmb() - * in cases like this where there are no data dependencies. - **/ - -#define read_barrier_depends() do { } while (0) - #ifdef CONFIG_SMP #define smp_mb() mb() #ifdef CONFIG_X86_PPRO_FENCE @@ -86,16 +32,17 @@ # define smp_rmb() barrier() #endif #define smp_wmb() barrier() -#define smp_read_barrier_depends() read_barrier_depends() #define set_mb(var, value) do { (void)xchg(&var, value); } while (0) #else /* !SMP */ #define smp_mb() barrier() #define smp_rmb() barrier() #define smp_wmb() barrier() -#define smp_read_barrier_depends() do { } while (0) #define set_mb(var, value) do { var = value; barrier(); } while (0) #endif /* SMP */ +#define read_barrier_depends() do { } while (0) +#define smp_read_barrier_depends() do { } while (0) + #if defined(CONFIG_X86_PPRO_FENCE) /* diff --git a/arch/x86/um/asm/barrier.h b/arch/x86/um/asm/barrier.h index cc04e67bfd05..d6511d954e2b 100644 --- a/arch/x86/um/asm/barrier.h +++ b/arch/x86/um/asm/barrier.h @@ -29,8 +29,6 @@ #endif /* CONFIG_X86_32 */ -#define read_barrier_depends() do { } while (0) - #ifdef CONFIG_SMP #define smp_mb() mb() @@ -42,7 +40,6 @@ #define smp_wmb() barrier() -#define smp_read_barrier_depends() read_barrier_depends() #define set_mb(var, value) do { (void)xchg(&var, value); } while (0) #else /* CONFIG_SMP */ @@ -50,11 +47,13 @@ #define smp_mb() barrier() #define smp_rmb() barrier() #define smp_wmb() barrier() -#define smp_read_barrier_depends() do { } while (0) #define set_mb(var, value) do { var = value; barrier(); } while (0) #endif /* CONFIG_SMP */ +#define read_barrier_depends() do { } while (0) +#define smp_read_barrier_depends() do { } while (0) + /* * Stop RDTSC speculation. This is needed when you need to use RDTSC * (or get_cycles or vread that possibly accesses the TSC) in a defined -- cgit v1.2.3-59-g8ed1b From 1077fa36f23e259858caf6f269a47393a5aff523 Mon Sep 17 00:00:00 2001 From: Alexander Duyck Date: Thu, 11 Dec 2014 15:02:06 -0800 Subject: arch: Add lightweight memory barriers dma_rmb() and dma_wmb() There are a number of situations where the mandatory barriers rmb() and wmb() are used to order memory/memory operations in the device drivers and those barriers are much heavier than they actually need to be. For example in the case of PowerPC wmb() calls the heavy-weight sync instruction when for coherent memory operations all that is really needed is an lsync or eieio instruction. This commit adds a coherent only version of the mandatory memory barriers rmb() and wmb(). In most cases this should result in the barrier being the same as the SMP barriers for the SMP case, however in some cases we use a barrier that is somewhere in between rmb() and smp_rmb(). For example on ARM the rmb barriers break down as follows: Barrier Call Explanation --------- -------- ---------------------------------- rmb() dsb() Data synchronization barrier - system dma_rmb() dmb(osh) data memory barrier - outer sharable smp_rmb() dmb(ish) data memory barrier - inner sharable These new barriers are not as safe as the standard rmb() and wmb(). Specifically they do not guarantee ordering between coherent and incoherent memories. The primary use case for these would be to enforce ordering of reads and writes when accessing coherent memory that is shared between the CPU and a device. It may also be noted that there is no dma_mb(). Most architectures don't provide a good mechanism for performing a coherent only full barrier without resorting to the same mechanism used in mb(). As such there isn't much to be gained in trying to define such a function. Cc: Frederic Weisbecker Cc: Mathieu Desnoyers Cc: Michael Ellerman Cc: Michael Neuling Cc: Russell King Cc: Geert Uytterhoeven Cc: Heiko Carstens Cc: Linus Torvalds Cc: Martin Schwidefsky Cc: Tony Luck Cc: Oleg Nesterov Cc: "Paul E. McKenney" Cc: Peter Zijlstra Cc: Ingo Molnar Cc: David Miller Acked-by: Benjamin Herrenschmidt Acked-by: Will Deacon Signed-off-by: Alexander Duyck Signed-off-by: David S. Miller --- Documentation/memory-barriers.txt | 42 +++++++++++++++++++++++++++++++++++++ arch/arm/include/asm/barrier.h | 4 ++++ arch/arm64/include/asm/barrier.h | 3 +++ arch/ia64/include/asm/barrier.h | 3 +++ arch/metag/include/asm/barrier.h | 14 ++++++------- arch/mips/include/asm/barrier.h | 9 ++++---- arch/powerpc/include/asm/barrier.h | 13 +++++++----- arch/s390/include/asm/barrier.h | 2 ++ arch/sparc/include/asm/barrier_64.h | 3 +++ arch/x86/include/asm/barrier.h | 11 ++++++---- arch/x86/um/asm/barrier.h | 13 ++++++------ include/asm-generic/barrier.h | 8 +++++++ 12 files changed, 99 insertions(+), 26 deletions(-) (limited to 'arch/x86') diff --git a/Documentation/memory-barriers.txt b/Documentation/memory-barriers.txt index 7ee2ae6d5451..70a09f8a0383 100644 --- a/Documentation/memory-barriers.txt +++ b/Documentation/memory-barriers.txt @@ -1633,6 +1633,48 @@ There are some more advanced barrier functions: operations" subsection for information on where to use these. + (*) dma_wmb(); + (*) dma_rmb(); + + These are for use with consistent memory to guarantee the ordering + of writes or reads of shared memory accessible to both the CPU and a + DMA capable device. + + For example, consider a device driver that shares memory with a device + and uses a descriptor status value to indicate if the descriptor belongs + to the device or the CPU, and a doorbell to notify it when new + descriptors are available: + + if (desc->status != DEVICE_OWN) { + /* do not read data until we own descriptor */ + dma_rmb(); + + /* read/modify data */ + read_data = desc->data; + desc->data = write_data; + + /* flush modifications before status update */ + dma_wmb(); + + /* assign ownership */ + desc->status = DEVICE_OWN; + + /* force memory to sync before notifying device via MMIO */ + wmb(); + + /* notify device of new descriptors */ + writel(DESC_NOTIFY, doorbell); + } + + The dma_rmb() allows us guarantee the device has released ownership + before we read the data from the descriptor, and he dma_wmb() allows + us to guarantee the data is written to the descriptor before the device + can see it now has ownership. The wmb() is needed to guarantee that the + cache coherent memory writes have completed before attempting a write to + the cache incoherent MMIO region. + + See Documentation/DMA-API.txt for more information on consistent memory. + MMIO WRITE BARRIER ------------------ diff --git a/arch/arm/include/asm/barrier.h b/arch/arm/include/asm/barrier.h index c6a3e73a6e24..d2f81e6b8c1c 100644 --- a/arch/arm/include/asm/barrier.h +++ b/arch/arm/include/asm/barrier.h @@ -43,10 +43,14 @@ #define mb() do { dsb(); outer_sync(); } while (0) #define rmb() dsb() #define wmb() do { dsb(st); outer_sync(); } while (0) +#define dma_rmb() dmb(osh) +#define dma_wmb() dmb(oshst) #else #define mb() barrier() #define rmb() barrier() #define wmb() barrier() +#define dma_rmb() barrier() +#define dma_wmb() barrier() #endif #ifndef CONFIG_SMP diff --git a/arch/arm64/include/asm/barrier.h b/arch/arm64/include/asm/barrier.h index 6389d60574d9..a5abb0062d6e 100644 --- a/arch/arm64/include/asm/barrier.h +++ b/arch/arm64/include/asm/barrier.h @@ -32,6 +32,9 @@ #define rmb() dsb(ld) #define wmb() dsb(st) +#define dma_rmb() dmb(oshld) +#define dma_wmb() dmb(oshst) + #ifndef CONFIG_SMP #define smp_mb() barrier() #define smp_rmb() barrier() diff --git a/arch/ia64/include/asm/barrier.h b/arch/ia64/include/asm/barrier.h index e8fffb03963c..f6769eb2bbf9 100644 --- a/arch/ia64/include/asm/barrier.h +++ b/arch/ia64/include/asm/barrier.h @@ -39,6 +39,9 @@ #define rmb() mb() #define wmb() mb() +#define dma_rmb() mb() +#define dma_wmb() mb() + #ifdef CONFIG_SMP # define smp_mb() mb() #else diff --git a/arch/metag/include/asm/barrier.h b/arch/metag/include/asm/barrier.h index 6d8b8c9b7c25..d703d8e26a65 100644 --- a/arch/metag/include/asm/barrier.h +++ b/arch/metag/include/asm/barrier.h @@ -4,8 +4,6 @@ #include #define nop() asm volatile ("NOP") -#define mb() wmb() -#define rmb() barrier() #ifdef CONFIG_METAG_META21 @@ -41,11 +39,13 @@ static inline void wr_fence(void) #endif /* !CONFIG_METAG_META21 */ -static inline void wmb(void) -{ - /* flush writes through the write combiner */ - wr_fence(); -} +/* flush writes through the write combiner */ +#define mb() wr_fence() +#define rmb() barrier() +#define wmb() mb() + +#define dma_rmb() rmb() +#define dma_wmb() wmb() #ifndef CONFIG_SMP #define fence() do { } while (0) diff --git a/arch/mips/include/asm/barrier.h b/arch/mips/include/asm/barrier.h index 3d69aa829a76..2b8bbbcb9be0 100644 --- a/arch/mips/include/asm/barrier.h +++ b/arch/mips/include/asm/barrier.h @@ -75,20 +75,21 @@ #include -#define wmb() fast_wmb() -#define rmb() fast_rmb() #define mb() wbflush() #define iob() wbflush() #else /* !CONFIG_CPU_HAS_WB */ -#define wmb() fast_wmb() -#define rmb() fast_rmb() #define mb() fast_mb() #define iob() fast_iob() #endif /* !CONFIG_CPU_HAS_WB */ +#define wmb() fast_wmb() +#define rmb() fast_rmb() +#define dma_wmb() fast_wmb() +#define dma_rmb() fast_rmb() + #if defined(CONFIG_WEAK_ORDERING) && defined(CONFIG_SMP) # ifdef CONFIG_CPU_CAVIUM_OCTEON # define smp_mb() __sync() diff --git a/arch/powerpc/include/asm/barrier.h b/arch/powerpc/include/asm/barrier.h index cb6d66c6e3e1..a3bf5be111ff 100644 --- a/arch/powerpc/include/asm/barrier.h +++ b/arch/powerpc/include/asm/barrier.h @@ -36,8 +36,6 @@ #define set_mb(var, value) do { var = value; mb(); } while (0) -#ifdef CONFIG_SMP - #ifdef __SUBARCH_HAS_LWSYNC # define SMPWMB LWSYNC #else @@ -45,12 +43,17 @@ #endif #define __lwsync() __asm__ __volatile__ (stringify_in_c(LWSYNC) : : :"memory") +#define dma_rmb() __lwsync() +#define dma_wmb() __asm__ __volatile__ (stringify_in_c(SMPWMB) : : :"memory") + +#ifdef CONFIG_SMP +#define smp_lwsync() __lwsync() #define smp_mb() mb() #define smp_rmb() __lwsync() #define smp_wmb() __asm__ __volatile__ (stringify_in_c(SMPWMB) : : :"memory") #else -#define __lwsync() barrier() +#define smp_lwsync() barrier() #define smp_mb() barrier() #define smp_rmb() barrier() @@ -72,7 +75,7 @@ #define smp_store_release(p, v) \ do { \ compiletime_assert_atomic_type(*p); \ - __lwsync(); \ + smp_lwsync(); \ ACCESS_ONCE(*p) = (v); \ } while (0) @@ -80,7 +83,7 @@ do { \ ({ \ typeof(*p) ___p1 = ACCESS_ONCE(*p); \ compiletime_assert_atomic_type(*p); \ - __lwsync(); \ + smp_lwsync(); \ ___p1; \ }) diff --git a/arch/s390/include/asm/barrier.h b/arch/s390/include/asm/barrier.h index 33d191d295e4..8d724718ec21 100644 --- a/arch/s390/include/asm/barrier.h +++ b/arch/s390/include/asm/barrier.h @@ -24,6 +24,8 @@ #define rmb() mb() #define wmb() mb() +#define dma_rmb() rmb() +#define dma_wmb() wmb() #define smp_mb() mb() #define smp_rmb() rmb() #define smp_wmb() wmb() diff --git a/arch/sparc/include/asm/barrier_64.h b/arch/sparc/include/asm/barrier_64.h index 6c974c0977ad..76648941fea7 100644 --- a/arch/sparc/include/asm/barrier_64.h +++ b/arch/sparc/include/asm/barrier_64.h @@ -37,6 +37,9 @@ do { __asm__ __volatile__("ba,pt %%xcc, 1f\n\t" \ #define rmb() __asm__ __volatile__("":::"memory") #define wmb() __asm__ __volatile__("":::"memory") +#define dma_rmb() rmb() +#define dma_wmb() wmb() + #define set_mb(__var, __value) \ do { __var = __value; membar_safe("#StoreLoad"); } while(0) diff --git a/arch/x86/include/asm/barrier.h b/arch/x86/include/asm/barrier.h index 5238000285c1..2ab1eb33106e 100644 --- a/arch/x86/include/asm/barrier.h +++ b/arch/x86/include/asm/barrier.h @@ -24,13 +24,16 @@ #define wmb() asm volatile("sfence" ::: "memory") #endif -#ifdef CONFIG_SMP -#define smp_mb() mb() #ifdef CONFIG_X86_PPRO_FENCE -# define smp_rmb() rmb() +#define dma_rmb() rmb() #else -# define smp_rmb() barrier() +#define dma_rmb() barrier() #endif +#define dma_wmb() barrier() + +#ifdef CONFIG_SMP +#define smp_mb() mb() +#define smp_rmb() dma_rmb() #define smp_wmb() barrier() #define set_mb(var, value) do { (void)xchg(&var, value); } while (0) #else /* !SMP */ diff --git a/arch/x86/um/asm/barrier.h b/arch/x86/um/asm/barrier.h index d6511d954e2b..2d7d9a1f5b53 100644 --- a/arch/x86/um/asm/barrier.h +++ b/arch/x86/um/asm/barrier.h @@ -29,17 +29,18 @@ #endif /* CONFIG_X86_32 */ -#ifdef CONFIG_SMP - -#define smp_mb() mb() #ifdef CONFIG_X86_PPRO_FENCE -#define smp_rmb() rmb() +#define dma_rmb() rmb() #else /* CONFIG_X86_PPRO_FENCE */ -#define smp_rmb() barrier() +#define dma_rmb() barrier() #endif /* CONFIG_X86_PPRO_FENCE */ +#define dma_wmb() barrier() -#define smp_wmb() barrier() +#ifdef CONFIG_SMP +#define smp_mb() mb() +#define smp_rmb() dma_rmb() +#define smp_wmb() barrier() #define set_mb(var, value) do { (void)xchg(&var, value); } while (0) #else /* CONFIG_SMP */ diff --git a/include/asm-generic/barrier.h b/include/asm-generic/barrier.h index 1402fa855388..f5c40b0fadc2 100644 --- a/include/asm-generic/barrier.h +++ b/include/asm-generic/barrier.h @@ -42,6 +42,14 @@ #define wmb() mb() #endif +#ifndef dma_rmb +#define dma_rmb() rmb() +#endif + +#ifndef dma_wmb +#define dma_wmb() wmb() +#endif + #ifndef read_barrier_depends #define read_barrier_depends() do { } while (0) #endif -- cgit v1.2.3-59-g8ed1b