From c15ac4fd60d5ffdb151bb2c7805f377fd7f90363 Mon Sep 17 00:00:00 2001 From: Alan Kao Date: Tue, 13 Feb 2018 13:13:17 +0800 Subject: riscv/ftrace: Add dynamic function tracer support We now have dynamic ftrace with the following added items: * ftrace_make_call, ftrace_make_nop (in kernel/ftrace.c) The two functions turn each recorded call site of filtered functions into a call to ftrace_caller or nops * ftracce_update_ftrace_func (in kernel/ftrace.c) turns the nops at ftrace_call into a call to a generic entry for function tracers. * ftrace_caller (in kernel/mcount-dyn.S) The entry where each _mcount call sites calls to once they are filtered to be traced. Also, this patch fixes the semantic problems in mcount.S, which will be treated as only a reference implementation once we have the dynamic ftrace. Cc: Greentime Hu Signed-off-by: Alan Kao Signed-off-by: Palmer Dabbelt --- arch/riscv/include/asm/ftrace.h | 54 +++++++++++++++++++++++++++++++++++++++++ 1 file changed, 54 insertions(+) (limited to 'arch/riscv/include') diff --git a/arch/riscv/include/asm/ftrace.h b/arch/riscv/include/asm/ftrace.h index 66d4175eb13e..078743aacfd3 100644 --- a/arch/riscv/include/asm/ftrace.h +++ b/arch/riscv/include/asm/ftrace.h @@ -8,3 +8,57 @@ #if defined(CONFIG_FUNCTION_GRAPH_TRACER) && defined(CONFIG_FRAME_POINTER) #define HAVE_FUNCTION_GRAPH_FP_TEST #endif + +#ifndef __ASSEMBLY__ +void _mcount(void); +static inline unsigned long ftrace_call_adjust(unsigned long addr) +{ + return addr; +} + +struct dyn_arch_ftrace { +}; +#endif + +#ifdef CONFIG_DYNAMIC_FTRACE +/* + * A general call in RISC-V is a pair of insts: + * 1) auipc: setting high-20 pc-related bits to ra register + * 2) jalr: setting low-12 offset to ra, jump to ra, and set ra to + * return address (original pc + 4) + * + * Dynamic ftrace generates probes to call sites, so we must deal with + * both auipc and jalr at the same time. + */ + +#define MCOUNT_ADDR ((unsigned long)_mcount) +#define JALR_SIGN_MASK (0x00000800) +#define JALR_OFFSET_MASK (0x00000fff) +#define AUIPC_OFFSET_MASK (0xfffff000) +#define AUIPC_PAD (0x00001000) +#define JALR_SHIFT 20 +#define JALR_BASIC (0x000080e7) +#define AUIPC_BASIC (0x00000097) +#define NOP4 (0x00000013) + +#define make_call(caller, callee, call) \ +do { \ + call[0] = to_auipc_insn((unsigned int)((unsigned long)callee - \ + (unsigned long)caller)); \ + call[1] = to_jalr_insn((unsigned int)((unsigned long)callee - \ + (unsigned long)caller)); \ +} while (0) + +#define to_jalr_insn(offset) \ + (((offset & JALR_OFFSET_MASK) << JALR_SHIFT) | JALR_BASIC) + +#define to_auipc_insn(offset) \ + ((offset & JALR_SIGN_MASK) ? \ + (((offset & AUIPC_OFFSET_MASK) + AUIPC_PAD) | AUIPC_BASIC) : \ + ((offset & AUIPC_OFFSET_MASK) | AUIPC_BASIC)) + +/* + * Let auipc+jalr be the basic *mcount unit*, so we make it 8 bytes here. + */ +#define MCOUNT_INSN_SIZE 8 +#endif -- cgit v1.2.3-59-g8ed1b From 71e736a7d65551e49136c1efc4759e5902729cc2 Mon Sep 17 00:00:00 2001 From: Alan Kao Date: Tue, 13 Feb 2018 13:13:19 +0800 Subject: riscv/ftrace: Add ARCH_SUPPORTS_FTRACE_OPS support Cc: Greentime Hu Signed-off-by: Alan Kao Signed-off-by: Palmer Dabbelt --- arch/riscv/include/asm/ftrace.h | 1 + arch/riscv/kernel/mcount-dyn.S | 3 +++ 2 files changed, 4 insertions(+) (limited to 'arch/riscv/include') diff --git a/arch/riscv/include/asm/ftrace.h b/arch/riscv/include/asm/ftrace.h index 078743aacfd3..fedadc40e358 100644 --- a/arch/riscv/include/asm/ftrace.h +++ b/arch/riscv/include/asm/ftrace.h @@ -9,6 +9,7 @@ #define HAVE_FUNCTION_GRAPH_FP_TEST #endif +#define ARCH_SUPPORTS_FTRACE_OPS 1 #ifndef __ASSEMBLY__ void _mcount(void); static inline unsigned long ftrace_call_adjust(unsigned long addr) diff --git a/arch/riscv/kernel/mcount-dyn.S b/arch/riscv/kernel/mcount-dyn.S index 739e07a6fd85..6bbc3f88fcb3 100644 --- a/arch/riscv/kernel/mcount-dyn.S +++ b/arch/riscv/kernel/mcount-dyn.S @@ -74,9 +74,12 @@ ENTRY(ftrace_caller) /* * a0: the address in the caller when calling ftrace_caller * a1: the caller's return address + * a2: the address of global variable function_trace_op */ ld a1, -8(s0) addi a0, ra, -MCOUNT_INSN_SIZE + la t5, function_trace_op + ld a2, 0(t5) #ifdef CONFIG_FUNCTION_GRAPH_TRACER /* -- cgit v1.2.3-59-g8ed1b From aea4c671fb985e6a9ffc365c43ea6f5e0d737fea Mon Sep 17 00:00:00 2001 From: Alan Kao Date: Tue, 13 Feb 2018 13:13:20 +0800 Subject: riscv/ftrace: Add DYNAMIC_FTRACE_WITH_REGS support Cc: Greentime Hu Signed-off-by: Alan Kao Signed-off-by: Palmer Dabbelt --- arch/riscv/Kconfig | 1 + arch/riscv/include/asm/ftrace.h | 1 + arch/riscv/kernel/ftrace.c | 17 ++++++ arch/riscv/kernel/mcount-dyn.S | 122 ++++++++++++++++++++++++++++++++++++++++ 4 files changed, 141 insertions(+) (limited to 'arch/riscv/include') diff --git a/arch/riscv/Kconfig b/arch/riscv/Kconfig index 1e9d878c1ac4..61dd82709898 100644 --- a/arch/riscv/Kconfig +++ b/arch/riscv/Kconfig @@ -116,6 +116,7 @@ config ARCH_RV64I select HAVE_FUNCTION_GRAPH_TRACER select HAVE_FTRACE_MCOUNT_RECORD select HAVE_DYNAMIC_FTRACE + select HAVE_DYNAMIC_FTRACE_WITH_REGS endchoice diff --git a/arch/riscv/include/asm/ftrace.h b/arch/riscv/include/asm/ftrace.h index fedadc40e358..c6dcc5291f97 100644 --- a/arch/riscv/include/asm/ftrace.h +++ b/arch/riscv/include/asm/ftrace.h @@ -8,6 +8,7 @@ #if defined(CONFIG_FUNCTION_GRAPH_TRACER) && defined(CONFIG_FRAME_POINTER) #define HAVE_FUNCTION_GRAPH_FP_TEST #endif +#define HAVE_FUNCTION_GRAPH_RET_ADDR_PTR #define ARCH_SUPPORTS_FTRACE_OPS 1 #ifndef __ASSEMBLY__ diff --git a/arch/riscv/kernel/ftrace.c b/arch/riscv/kernel/ftrace.c index 5bbe1afd9463..48b5353691c3 100644 --- a/arch/riscv/kernel/ftrace.c +++ b/arch/riscv/kernel/ftrace.c @@ -106,6 +106,23 @@ int __init ftrace_dyn_arch_init(void) } #endif +#ifdef CONFIG_DYNAMIC_FTRACE_WITH_REGS +int ftrace_modify_call(struct dyn_ftrace *rec, unsigned long old_addr, + unsigned long addr) +{ + unsigned int call[2]; + int ret; + + make_call(rec->ip, old_addr, call); + ret = ftrace_check_current_call(rec->ip, call); + + if (ret) + return ret; + + return __ftrace_modify_call(rec->ip, addr, true); +} +#endif + #ifdef CONFIG_FUNCTION_GRAPH_TRACER /* * Most of this function is copied from arm64. diff --git a/arch/riscv/kernel/mcount-dyn.S b/arch/riscv/kernel/mcount-dyn.S index 6bbc3f88fcb3..35a6ed76cb8b 100644 --- a/arch/riscv/kernel/mcount-dyn.S +++ b/arch/riscv/kernel/mcount-dyn.S @@ -115,3 +115,125 @@ ftrace_call: RESTORE_ABI_STATE ret ENDPROC(ftrace_caller) + +#ifdef CONFIG_DYNAMIC_FTRACE_WITH_REGS + .macro SAVE_ALL + addi sp, sp, -(PT_SIZE_ON_STACK+16) + sd s0, (PT_SIZE_ON_STACK)(sp) + sd ra, (PT_SIZE_ON_STACK+8)(sp) + addi s0, sp, (PT_SIZE_ON_STACK+16) + + sd x1, PT_RA(sp) + sd x2, PT_SP(sp) + sd x3, PT_GP(sp) + sd x4, PT_TP(sp) + sd x5, PT_T0(sp) + sd x6, PT_T1(sp) + sd x7, PT_T2(sp) + sd x8, PT_S0(sp) + sd x9, PT_S1(sp) + sd x10, PT_A0(sp) + sd x11, PT_A1(sp) + sd x12, PT_A2(sp) + sd x13, PT_A3(sp) + sd x14, PT_A4(sp) + sd x15, PT_A5(sp) + sd x16, PT_A6(sp) + sd x17, PT_A7(sp) + sd x18, PT_S2(sp) + sd x19, PT_S3(sp) + sd x20, PT_S4(sp) + sd x21, PT_S5(sp) + sd x22, PT_S6(sp) + sd x23, PT_S7(sp) + sd x24, PT_S8(sp) + sd x25, PT_S9(sp) + sd x26, PT_S10(sp) + sd x27, PT_S11(sp) + sd x28, PT_T3(sp) + sd x29, PT_T4(sp) + sd x30, PT_T5(sp) + sd x31, PT_T6(sp) + .endm + + .macro RESTORE_ALL + ld x1, PT_RA(sp) + ld x2, PT_SP(sp) + ld x3, PT_GP(sp) + ld x4, PT_TP(sp) + ld x5, PT_T0(sp) + ld x6, PT_T1(sp) + ld x7, PT_T2(sp) + ld x8, PT_S0(sp) + ld x9, PT_S1(sp) + ld x10, PT_A0(sp) + ld x11, PT_A1(sp) + ld x12, PT_A2(sp) + ld x13, PT_A3(sp) + ld x14, PT_A4(sp) + ld x15, PT_A5(sp) + ld x16, PT_A6(sp) + ld x17, PT_A7(sp) + ld x18, PT_S2(sp) + ld x19, PT_S3(sp) + ld x20, PT_S4(sp) + ld x21, PT_S5(sp) + ld x22, PT_S6(sp) + ld x23, PT_S7(sp) + ld x24, PT_S8(sp) + ld x25, PT_S9(sp) + ld x26, PT_S10(sp) + ld x27, PT_S11(sp) + ld x28, PT_T3(sp) + ld x29, PT_T4(sp) + ld x30, PT_T5(sp) + ld x31, PT_T6(sp) + + ld s0, (PT_SIZE_ON_STACK)(sp) + ld ra, (PT_SIZE_ON_STACK+8)(sp) + addi sp, sp, (PT_SIZE_ON_STACK+16) + .endm + + .macro RESTORE_GRAPH_REG_ARGS + ld a0, PT_T0(sp) + ld a1, PT_T1(sp) +#ifdef HAVE_FUNCTION_GRAPH_FP_TEST + ld a2, PT_T2(sp) +#endif + .endm + +/* + * Most of the contents are the same as ftrace_caller. + */ +ENTRY(ftrace_regs_caller) + /* + * a3: the address of all registers in the stack + */ + ld a1, -8(s0) + addi a0, ra, -MCOUNT_INSN_SIZE + la t5, function_trace_op + ld a2, 0(t5) + addi a3, sp, -(PT_SIZE_ON_STACK+16) + +#ifdef CONFIG_FUNCTION_GRAPH_TRACER + addi t0, s0, -8 + mv t1, a0 +#ifdef HAVE_FUNCTION_GRAPH_FP_TEST + ld t2, -16(s0) +#endif +#endif + SAVE_ALL + +ftrace_regs_call: + .global ftrace_regs_call + call ftrace_stub + +#ifdef CONFIG_FUNCTION_GRAPH_TRACER + RESTORE_GRAPH_REG_ARGS + call ftrace_graph_caller +#endif + + RESTORE_ALL + ret +ENDPROC(ftrace_regs_caller) +#endif /* CONFIG_DYNAMIC_FTRACE_WITH_REGS */ -- cgit v1.2.3-59-g8ed1b From 8d235b174af5d0af35ff206c15041fc2b02a0993 Mon Sep 17 00:00:00 2001 From: Andrea Parri Date: Tue, 27 Feb 2018 03:24:11 +0100 Subject: riscv/barrier: Define __smp_{store_release,load_acquire} Introduce __smp_{store_release,load_acquire}, and rely on the generic definitions for smp_{store_release,load_acquire}. This avoids the use of full ("rw,rw") fences on SMP. Signed-off-by: Andrea Parri Signed-off-by: Palmer Dabbelt --- arch/riscv/include/asm/barrier.h | 15 +++++++++++++++ 1 file changed, 15 insertions(+) (limited to 'arch/riscv/include') diff --git a/arch/riscv/include/asm/barrier.h b/arch/riscv/include/asm/barrier.h index 5510366d169a..d4628e4b3a5e 100644 --- a/arch/riscv/include/asm/barrier.h +++ b/arch/riscv/include/asm/barrier.h @@ -38,6 +38,21 @@ #define __smp_rmb() RISCV_FENCE(r,r) #define __smp_wmb() RISCV_FENCE(w,w) +#define __smp_store_release(p, v) \ +do { \ + compiletime_assert_atomic_type(*p); \ + RISCV_FENCE(rw,w); \ + WRITE_ONCE(*p, v); \ +} while (0) + +#define __smp_load_acquire(p) \ +({ \ + typeof(*p) ___p1 = READ_ONCE(*p); \ + compiletime_assert_atomic_type(*p); \ + RISCV_FENCE(r,rw); \ + ___p1; \ +}) + /* * This is a very specific barrier: it's currently only used in two places in * the kernel, both in the scheduler. See include/linux/spinlock.h for the two -- cgit v1.2.3-59-g8ed1b From 0123f4d76ca63b7b895f40089be0ce4809e392d8 Mon Sep 17 00:00:00 2001 From: Andrea Parri Date: Fri, 9 Mar 2018 13:13:20 +0100 Subject: riscv/spinlock: Strengthen implementations with fences Current implementations map locking operations using .rl and .aq annotations. However, this mapping is unsound w.r.t. the kernel memory consistency model (LKMM) [1]: Referring to the "unlock-lock-read-ordering" test reported below, Daniel wrote: "I think an RCpc interpretation of .aq and .rl would in fact allow the two normal loads in P1 to be reordered [...] The intuition would be that the amoswap.w.aq can forward from the amoswap.w.rl while that's still in the store buffer, and then the lw x3,0(x4) can also perform while the amoswap.w.rl is still in the store buffer, all before the l1 x1,0(x2) executes. That's not forbidden unless the amoswaps are RCsc, unless I'm missing something. Likewise even if the unlock()/lock() is between two stores. A control dependency might originate from the load part of the amoswap.w.aq, but there still would have to be something to ensure that this load part in fact performs after the store part of the amoswap.w.rl performs globally, and that's not automatic under RCpc." Simulation of the RISC-V memory consistency model confirmed this expectation. In order to "synchronize" LKMM and RISC-V's implementation, this commit strengthens the implementations of the locking operations by replacing .rl and .aq with the use of ("lightweigth") fences, resp., "fence rw, w" and "fence r , rw". C unlock-lock-read-ordering {} /* s initially owned by P1 */ P0(int *x, int *y) { WRITE_ONCE(*x, 1); smp_wmb(); WRITE_ONCE(*y, 1); } P1(int *x, int *y, spinlock_t *s) { int r0; int r1; r0 = READ_ONCE(*y); spin_unlock(s); spin_lock(s); r1 = READ_ONCE(*x); } exists (1:r0=1 /\ 1:r1=0) [1] https://marc.info/?l=linux-kernel&m=151930201102853&w=2 https://groups.google.com/a/groups.riscv.org/forum/#!topic/isa-dev/hKywNHBkAXM https://marc.info/?l=linux-kernel&m=151633436614259&w=2 Signed-off-by: Andrea Parri Cc: Palmer Dabbelt Cc: Albert Ou Cc: Daniel Lustig Cc: Alan Stern Cc: Will Deacon Cc: Peter Zijlstra Cc: Boqun Feng Cc: Nicholas Piggin Cc: David Howells Cc: Jade Alglave Cc: Luc Maranget Cc: "Paul E. McKenney" Cc: Akira Yokosawa Cc: Ingo Molnar Cc: Linus Torvalds Cc: linux-riscv@lists.infradead.org Cc: linux-kernel@vger.kernel.org Signed-off-by: Palmer Dabbelt --- arch/riscv/include/asm/fence.h | 12 ++++++++++++ arch/riscv/include/asm/spinlock.h | 29 +++++++++++++++-------------- 2 files changed, 27 insertions(+), 14 deletions(-) create mode 100644 arch/riscv/include/asm/fence.h (limited to 'arch/riscv/include') diff --git a/arch/riscv/include/asm/fence.h b/arch/riscv/include/asm/fence.h new file mode 100644 index 000000000000..2b443a3a487f --- /dev/null +++ b/arch/riscv/include/asm/fence.h @@ -0,0 +1,12 @@ +#ifndef _ASM_RISCV_FENCE_H +#define _ASM_RISCV_FENCE_H + +#ifdef CONFIG_SMP +#define RISCV_ACQUIRE_BARRIER "\tfence r , rw\n" +#define RISCV_RELEASE_BARRIER "\tfence rw, w\n" +#else +#define RISCV_ACQUIRE_BARRIER +#define RISCV_RELEASE_BARRIER +#endif + +#endif /* _ASM_RISCV_FENCE_H */ diff --git a/arch/riscv/include/asm/spinlock.h b/arch/riscv/include/asm/spinlock.h index 2fd27e8ef1fd..8eb26d1ede81 100644 --- a/arch/riscv/include/asm/spinlock.h +++ b/arch/riscv/include/asm/spinlock.h @@ -17,6 +17,7 @@ #include #include +#include /* * Simple spin lock operations. These provide no fairness guarantees. @@ -28,10 +29,7 @@ static inline void arch_spin_unlock(arch_spinlock_t *lock) { - __asm__ __volatile__ ( - "amoswap.w.rl x0, x0, %0" - : "=A" (lock->lock) - :: "memory"); + smp_store_release(&lock->lock, 0); } static inline int arch_spin_trylock(arch_spinlock_t *lock) @@ -39,7 +37,8 @@ static inline int arch_spin_trylock(arch_spinlock_t *lock) int tmp = 1, busy; __asm__ __volatile__ ( - "amoswap.w.aq %0, %2, %1" + " amoswap.w %0, %2, %1\n" + RISCV_ACQUIRE_BARRIER : "=r" (busy), "+A" (lock->lock) : "r" (tmp) : "memory"); @@ -68,8 +67,9 @@ static inline void arch_read_lock(arch_rwlock_t *lock) "1: lr.w %1, %0\n" " bltz %1, 1b\n" " addi %1, %1, 1\n" - " sc.w.aq %1, %1, %0\n" + " sc.w %1, %1, %0\n" " bnez %1, 1b\n" + RISCV_ACQUIRE_BARRIER : "+A" (lock->lock), "=&r" (tmp) :: "memory"); } @@ -82,8 +82,9 @@ static inline void arch_write_lock(arch_rwlock_t *lock) "1: lr.w %1, %0\n" " bnez %1, 1b\n" " li %1, -1\n" - " sc.w.aq %1, %1, %0\n" + " sc.w %1, %1, %0\n" " bnez %1, 1b\n" + RISCV_ACQUIRE_BARRIER : "+A" (lock->lock), "=&r" (tmp) :: "memory"); } @@ -96,8 +97,9 @@ static inline int arch_read_trylock(arch_rwlock_t *lock) "1: lr.w %1, %0\n" " bltz %1, 1f\n" " addi %1, %1, 1\n" - " sc.w.aq %1, %1, %0\n" + " sc.w %1, %1, %0\n" " bnez %1, 1b\n" + RISCV_ACQUIRE_BARRIER "1:\n" : "+A" (lock->lock), "=&r" (busy) :: "memory"); @@ -113,8 +115,9 @@ static inline int arch_write_trylock(arch_rwlock_t *lock) "1: lr.w %1, %0\n" " bnez %1, 1f\n" " li %1, -1\n" - " sc.w.aq %1, %1, %0\n" + " sc.w %1, %1, %0\n" " bnez %1, 1b\n" + RISCV_ACQUIRE_BARRIER "1:\n" : "+A" (lock->lock), "=&r" (busy) :: "memory"); @@ -125,7 +128,8 @@ static inline int arch_write_trylock(arch_rwlock_t *lock) static inline void arch_read_unlock(arch_rwlock_t *lock) { __asm__ __volatile__( - "amoadd.w.rl x0, %1, %0" + RISCV_RELEASE_BARRIER + " amoadd.w x0, %1, %0\n" : "+A" (lock->lock) : "r" (-1) : "memory"); @@ -133,10 +137,7 @@ static inline void arch_read_unlock(arch_rwlock_t *lock) static inline void arch_write_unlock(arch_rwlock_t *lock) { - __asm__ __volatile__ ( - "amoswap.w.rl x0, x0, %0" - : "=A" (lock->lock) - :: "memory"); + smp_store_release(&lock->lock, 0); } #endif /* _ASM_RISCV_SPINLOCK_H */ -- cgit v1.2.3-59-g8ed1b From 5ce6c1f3535fa8d2134468547377b7b737042834 Mon Sep 17 00:00:00 2001 From: Andrea Parri Date: Fri, 9 Mar 2018 13:13:40 +0100 Subject: riscv/atomic: Strengthen implementations with fences Atomics present the same issue with locking: release and acquire variants need to be strengthened to meet the constraints defined by the Linux-kernel memory consistency model [1]. Atomics present a further issue: implementations of atomics such as atomic_cmpxchg() and atomic_add_unless() rely on LR/SC pairs, which do not give full-ordering with .aqrl; for example, current implementations allow the "lr-sc-aqrl-pair-vs-full-barrier" test below to end up with the state indicated in the "exists" clause. In order to "synchronize" LKMM and RISC-V's implementation, this commit strengthens the implementations of the atomics operations by replacing .rl and .aq with the use of ("lightweigth") fences, and by replacing .aqrl LR/SC pairs in sequences such as: 0: lr.w.aqrl %0, %addr bne %0, %old, 1f ... sc.w.aqrl %1, %new, %addr bnez %1, 0b 1: with sequences of the form: 0: lr.w %0, %addr bne %0, %old, 1f ... sc.w.rl %1, %new, %addr /* SC-release */ bnez %1, 0b fence rw, rw /* "full" fence */ 1: following Daniel's suggestion. These modifications were validated with simulation of the RISC-V memory consistency model. C lr-sc-aqrl-pair-vs-full-barrier {} P0(int *x, int *y, atomic_t *u) { int r0; int r1; WRITE_ONCE(*x, 1); r0 = atomic_cmpxchg(u, 0, 1); r1 = READ_ONCE(*y); } P1(int *x, int *y, atomic_t *v) { int r0; int r1; WRITE_ONCE(*y, 1); r0 = atomic_cmpxchg(v, 0, 1); r1 = READ_ONCE(*x); } exists (u=1 /\ v=1 /\ 0:r1=0 /\ 1:r1=0) [1] https://marc.info/?l=linux-kernel&m=151930201102853&w=2 https://groups.google.com/a/groups.riscv.org/forum/#!topic/isa-dev/hKywNHBkAXM https://marc.info/?l=linux-kernel&m=151633436614259&w=2 Suggested-by: Daniel Lustig Signed-off-by: Andrea Parri Cc: Palmer Dabbelt Cc: Albert Ou Cc: Daniel Lustig Cc: Alan Stern Cc: Will Deacon Cc: Peter Zijlstra Cc: Boqun Feng Cc: Nicholas Piggin Cc: David Howells Cc: Jade Alglave Cc: Luc Maranget Cc: "Paul E. McKenney" Cc: Akira Yokosawa Cc: Ingo Molnar Cc: Linus Torvalds Cc: linux-riscv@lists.infradead.org Cc: linux-kernel@vger.kernel.org Signed-off-by: Palmer Dabbelt --- arch/riscv/include/asm/atomic.h | 417 +++++++++++++++++++++++++-------------- arch/riscv/include/asm/cmpxchg.h | 391 +++++++++++++++++++++++++++++------- 2 files changed, 588 insertions(+), 220 deletions(-) (limited to 'arch/riscv/include') diff --git a/arch/riscv/include/asm/atomic.h b/arch/riscv/include/asm/atomic.h index e65d1cd89e28..855115ace98c 100644 --- a/arch/riscv/include/asm/atomic.h +++ b/arch/riscv/include/asm/atomic.h @@ -24,6 +24,20 @@ #include #define ATOMIC_INIT(i) { (i) } + +#define __atomic_op_acquire(op, args...) \ +({ \ + typeof(op##_relaxed(args)) __ret = op##_relaxed(args); \ + __asm__ __volatile__(RISCV_ACQUIRE_BARRIER "" ::: "memory"); \ + __ret; \ +}) + +#define __atomic_op_release(op, args...) \ +({ \ + __asm__ __volatile__(RISCV_RELEASE_BARRIER "" ::: "memory"); \ + op##_relaxed(args); \ +}) + static __always_inline int atomic_read(const atomic_t *v) { return READ_ONCE(v->counter); @@ -50,22 +64,23 @@ static __always_inline void atomic64_set(atomic64_t *v, long i) * have the AQ or RL bits set. These don't return anything, so there's only * one version to worry about. */ -#define ATOMIC_OP(op, asm_op, I, asm_type, c_type, prefix) \ -static __always_inline void atomic##prefix##_##op(c_type i, atomic##prefix##_t *v) \ -{ \ - __asm__ __volatile__ ( \ - "amo" #asm_op "." #asm_type " zero, %1, %0" \ - : "+A" (v->counter) \ - : "r" (I) \ - : "memory"); \ -} +#define ATOMIC_OP(op, asm_op, I, asm_type, c_type, prefix) \ +static __always_inline \ +void atomic##prefix##_##op(c_type i, atomic##prefix##_t *v) \ +{ \ + __asm__ __volatile__ ( \ + " amo" #asm_op "." #asm_type " zero, %1, %0" \ + : "+A" (v->counter) \ + : "r" (I) \ + : "memory"); \ +} \ #ifdef CONFIG_GENERIC_ATOMIC64 -#define ATOMIC_OPS(op, asm_op, I) \ +#define ATOMIC_OPS(op, asm_op, I) \ ATOMIC_OP (op, asm_op, I, w, int, ) #else -#define ATOMIC_OPS(op, asm_op, I) \ - ATOMIC_OP (op, asm_op, I, w, int, ) \ +#define ATOMIC_OPS(op, asm_op, I) \ + ATOMIC_OP (op, asm_op, I, w, int, ) \ ATOMIC_OP (op, asm_op, I, d, long, 64) #endif @@ -79,75 +94,115 @@ ATOMIC_OPS(xor, xor, i) #undef ATOMIC_OPS /* - * Atomic ops that have ordered, relaxed, acquire, and relese variants. + * Atomic ops that have ordered, relaxed, acquire, and release variants. * There's two flavors of these: the arithmatic ops have both fetch and return * versions, while the logical ops only have fetch versions. */ -#define ATOMIC_FETCH_OP(op, asm_op, I, asm_or, c_or, asm_type, c_type, prefix) \ -static __always_inline c_type atomic##prefix##_fetch_##op##c_or(c_type i, atomic##prefix##_t *v) \ -{ \ - register c_type ret; \ - __asm__ __volatile__ ( \ - "amo" #asm_op "." #asm_type #asm_or " %1, %2, %0" \ - : "+A" (v->counter), "=r" (ret) \ - : "r" (I) \ - : "memory"); \ - return ret; \ +#define ATOMIC_FETCH_OP(op, asm_op, I, asm_type, c_type, prefix) \ +static __always_inline \ +c_type atomic##prefix##_fetch_##op##_relaxed(c_type i, \ + atomic##prefix##_t *v) \ +{ \ + register c_type ret; \ + __asm__ __volatile__ ( \ + " amo" #asm_op "." #asm_type " %1, %2, %0" \ + : "+A" (v->counter), "=r" (ret) \ + : "r" (I) \ + : "memory"); \ + return ret; \ +} \ +static __always_inline \ +c_type atomic##prefix##_fetch_##op(c_type i, atomic##prefix##_t *v) \ +{ \ + register c_type ret; \ + __asm__ __volatile__ ( \ + " amo" #asm_op "." #asm_type ".aqrl %1, %2, %0" \ + : "+A" (v->counter), "=r" (ret) \ + : "r" (I) \ + : "memory"); \ + return ret; \ } -#define ATOMIC_OP_RETURN(op, asm_op, c_op, I, asm_or, c_or, asm_type, c_type, prefix) \ -static __always_inline c_type atomic##prefix##_##op##_return##c_or(c_type i, atomic##prefix##_t *v) \ -{ \ - return atomic##prefix##_fetch_##op##c_or(i, v) c_op I; \ +#define ATOMIC_OP_RETURN(op, asm_op, c_op, I, asm_type, c_type, prefix) \ +static __always_inline \ +c_type atomic##prefix##_##op##_return_relaxed(c_type i, \ + atomic##prefix##_t *v) \ +{ \ + return atomic##prefix##_fetch_##op##_relaxed(i, v) c_op I; \ +} \ +static __always_inline \ +c_type atomic##prefix##_##op##_return(c_type i, atomic##prefix##_t *v) \ +{ \ + return atomic##prefix##_fetch_##op(i, v) c_op I; \ } #ifdef CONFIG_GENERIC_ATOMIC64 -#define ATOMIC_OPS(op, asm_op, c_op, I, asm_or, c_or) \ - ATOMIC_FETCH_OP (op, asm_op, I, asm_or, c_or, w, int, ) \ - ATOMIC_OP_RETURN(op, asm_op, c_op, I, asm_or, c_or, w, int, ) +#define ATOMIC_OPS(op, asm_op, c_op, I) \ + ATOMIC_FETCH_OP( op, asm_op, I, w, int, ) \ + ATOMIC_OP_RETURN(op, asm_op, c_op, I, w, int, ) #else -#define ATOMIC_OPS(op, asm_op, c_op, I, asm_or, c_or) \ - ATOMIC_FETCH_OP (op, asm_op, I, asm_or, c_or, w, int, ) \ - ATOMIC_OP_RETURN(op, asm_op, c_op, I, asm_or, c_or, w, int, ) \ - ATOMIC_FETCH_OP (op, asm_op, I, asm_or, c_or, d, long, 64) \ - ATOMIC_OP_RETURN(op, asm_op, c_op, I, asm_or, c_or, d, long, 64) +#define ATOMIC_OPS(op, asm_op, c_op, I) \ + ATOMIC_FETCH_OP( op, asm_op, I, w, int, ) \ + ATOMIC_OP_RETURN(op, asm_op, c_op, I, w, int, ) \ + ATOMIC_FETCH_OP( op, asm_op, I, d, long, 64) \ + ATOMIC_OP_RETURN(op, asm_op, c_op, I, d, long, 64) #endif -ATOMIC_OPS(add, add, +, i, , _relaxed) -ATOMIC_OPS(add, add, +, i, .aq , _acquire) -ATOMIC_OPS(add, add, +, i, .rl , _release) -ATOMIC_OPS(add, add, +, i, .aqrl, ) +ATOMIC_OPS(add, add, +, i) +ATOMIC_OPS(sub, add, +, -i) + +#define atomic_add_return_relaxed atomic_add_return_relaxed +#define atomic_sub_return_relaxed atomic_sub_return_relaxed +#define atomic_add_return atomic_add_return +#define atomic_sub_return atomic_sub_return -ATOMIC_OPS(sub, add, +, -i, , _relaxed) -ATOMIC_OPS(sub, add, +, -i, .aq , _acquire) -ATOMIC_OPS(sub, add, +, -i, .rl , _release) -ATOMIC_OPS(sub, add, +, -i, .aqrl, ) +#define atomic_fetch_add_relaxed atomic_fetch_add_relaxed +#define atomic_fetch_sub_relaxed atomic_fetch_sub_relaxed +#define atomic_fetch_add atomic_fetch_add +#define atomic_fetch_sub atomic_fetch_sub + +#ifndef CONFIG_GENERIC_ATOMIC64 +#define atomic64_add_return_relaxed atomic64_add_return_relaxed +#define atomic64_sub_return_relaxed atomic64_sub_return_relaxed +#define atomic64_add_return atomic64_add_return +#define atomic64_sub_return atomic64_sub_return + +#define atomic64_fetch_add_relaxed atomic64_fetch_add_relaxed +#define atomic64_fetch_sub_relaxed atomic64_fetch_sub_relaxed +#define atomic64_fetch_add atomic64_fetch_add +#define atomic64_fetch_sub atomic64_fetch_sub +#endif #undef ATOMIC_OPS #ifdef CONFIG_GENERIC_ATOMIC64 -#define ATOMIC_OPS(op, asm_op, I, asm_or, c_or) \ - ATOMIC_FETCH_OP(op, asm_op, I, asm_or, c_or, w, int, ) +#define ATOMIC_OPS(op, asm_op, I) \ + ATOMIC_FETCH_OP(op, asm_op, I, w, int, ) #else -#define ATOMIC_OPS(op, asm_op, I, asm_or, c_or) \ - ATOMIC_FETCH_OP(op, asm_op, I, asm_or, c_or, w, int, ) \ - ATOMIC_FETCH_OP(op, asm_op, I, asm_or, c_or, d, long, 64) +#define ATOMIC_OPS(op, asm_op, I) \ + ATOMIC_FETCH_OP(op, asm_op, I, w, int, ) \ + ATOMIC_FETCH_OP(op, asm_op, I, d, long, 64) #endif -ATOMIC_OPS(and, and, i, , _relaxed) -ATOMIC_OPS(and, and, i, .aq , _acquire) -ATOMIC_OPS(and, and, i, .rl , _release) -ATOMIC_OPS(and, and, i, .aqrl, ) +ATOMIC_OPS(and, and, i) +ATOMIC_OPS( or, or, i) +ATOMIC_OPS(xor, xor, i) -ATOMIC_OPS( or, or, i, , _relaxed) -ATOMIC_OPS( or, or, i, .aq , _acquire) -ATOMIC_OPS( or, or, i, .rl , _release) -ATOMIC_OPS( or, or, i, .aqrl, ) +#define atomic_fetch_and_relaxed atomic_fetch_and_relaxed +#define atomic_fetch_or_relaxed atomic_fetch_or_relaxed +#define atomic_fetch_xor_relaxed atomic_fetch_xor_relaxed +#define atomic_fetch_and atomic_fetch_and +#define atomic_fetch_or atomic_fetch_or +#define atomic_fetch_xor atomic_fetch_xor -ATOMIC_OPS(xor, xor, i, , _relaxed) -ATOMIC_OPS(xor, xor, i, .aq , _acquire) -ATOMIC_OPS(xor, xor, i, .rl , _release) -ATOMIC_OPS(xor, xor, i, .aqrl, ) +#ifndef CONFIG_GENERIC_ATOMIC64 +#define atomic64_fetch_and_relaxed atomic64_fetch_and_relaxed +#define atomic64_fetch_or_relaxed atomic64_fetch_or_relaxed +#define atomic64_fetch_xor_relaxed atomic64_fetch_xor_relaxed +#define atomic64_fetch_and atomic64_fetch_and +#define atomic64_fetch_or atomic64_fetch_or +#define atomic64_fetch_xor atomic64_fetch_xor +#endif #undef ATOMIC_OPS @@ -157,22 +212,24 @@ ATOMIC_OPS(xor, xor, i, .aqrl, ) /* * The extra atomic operations that are constructed from one of the core * AMO-based operations above (aside from sub, which is easier to fit above). - * These are required to perform a barrier, but they're OK this way because - * atomic_*_return is also required to perform a barrier. + * These are required to perform a full barrier, but they're OK this way + * because atomic_*_return is also required to perform a full barrier. + * */ -#define ATOMIC_OP(op, func_op, comp_op, I, c_type, prefix) \ -static __always_inline bool atomic##prefix##_##op(c_type i, atomic##prefix##_t *v) \ -{ \ - return atomic##prefix##_##func_op##_return(i, v) comp_op I; \ +#define ATOMIC_OP(op, func_op, comp_op, I, c_type, prefix) \ +static __always_inline \ +bool atomic##prefix##_##op(c_type i, atomic##prefix##_t *v) \ +{ \ + return atomic##prefix##_##func_op##_return(i, v) comp_op I; \ } #ifdef CONFIG_GENERIC_ATOMIC64 -#define ATOMIC_OPS(op, func_op, comp_op, I) \ - ATOMIC_OP (op, func_op, comp_op, I, int, ) +#define ATOMIC_OPS(op, func_op, comp_op, I) \ + ATOMIC_OP(op, func_op, comp_op, I, int, ) #else -#define ATOMIC_OPS(op, func_op, comp_op, I) \ - ATOMIC_OP (op, func_op, comp_op, I, int, ) \ - ATOMIC_OP (op, func_op, comp_op, I, long, 64) +#define ATOMIC_OPS(op, func_op, comp_op, I) \ + ATOMIC_OP(op, func_op, comp_op, I, int, ) \ + ATOMIC_OP(op, func_op, comp_op, I, long, 64) #endif ATOMIC_OPS(add_and_test, add, ==, 0) @@ -182,51 +239,87 @@ ATOMIC_OPS(add_negative, add, <, 0) #undef ATOMIC_OP #undef ATOMIC_OPS -#define ATOMIC_OP(op, func_op, I, c_type, prefix) \ -static __always_inline void atomic##prefix##_##op(atomic##prefix##_t *v) \ -{ \ - atomic##prefix##_##func_op(I, v); \ +#define ATOMIC_OP(op, func_op, I, c_type, prefix) \ +static __always_inline \ +void atomic##prefix##_##op(atomic##prefix##_t *v) \ +{ \ + atomic##prefix##_##func_op(I, v); \ } -#define ATOMIC_FETCH_OP(op, func_op, I, c_type, prefix) \ -static __always_inline c_type atomic##prefix##_fetch_##op(atomic##prefix##_t *v) \ -{ \ - return atomic##prefix##_fetch_##func_op(I, v); \ +#define ATOMIC_FETCH_OP(op, func_op, I, c_type, prefix) \ +static __always_inline \ +c_type atomic##prefix##_fetch_##op##_relaxed(atomic##prefix##_t *v) \ +{ \ + return atomic##prefix##_fetch_##func_op##_relaxed(I, v); \ +} \ +static __always_inline \ +c_type atomic##prefix##_fetch_##op(atomic##prefix##_t *v) \ +{ \ + return atomic##prefix##_fetch_##func_op(I, v); \ } -#define ATOMIC_OP_RETURN(op, asm_op, c_op, I, c_type, prefix) \ -static __always_inline c_type atomic##prefix##_##op##_return(atomic##prefix##_t *v) \ -{ \ - return atomic##prefix##_fetch_##op(v) c_op I; \ +#define ATOMIC_OP_RETURN(op, asm_op, c_op, I, c_type, prefix) \ +static __always_inline \ +c_type atomic##prefix##_##op##_return_relaxed(atomic##prefix##_t *v) \ +{ \ + return atomic##prefix##_fetch_##op##_relaxed(v) c_op I; \ +} \ +static __always_inline \ +c_type atomic##prefix##_##op##_return(atomic##prefix##_t *v) \ +{ \ + return atomic##prefix##_fetch_##op(v) c_op I; \ } #ifdef CONFIG_GENERIC_ATOMIC64 -#define ATOMIC_OPS(op, asm_op, c_op, I) \ - ATOMIC_OP (op, asm_op, I, int, ) \ - ATOMIC_FETCH_OP (op, asm_op, I, int, ) \ +#define ATOMIC_OPS(op, asm_op, c_op, I) \ + ATOMIC_OP( op, asm_op, I, int, ) \ + ATOMIC_FETCH_OP( op, asm_op, I, int, ) \ ATOMIC_OP_RETURN(op, asm_op, c_op, I, int, ) #else -#define ATOMIC_OPS(op, asm_op, c_op, I) \ - ATOMIC_OP (op, asm_op, I, int, ) \ - ATOMIC_FETCH_OP (op, asm_op, I, int, ) \ - ATOMIC_OP_RETURN(op, asm_op, c_op, I, int, ) \ - ATOMIC_OP (op, asm_op, I, long, 64) \ - ATOMIC_FETCH_OP (op, asm_op, I, long, 64) \ +#define ATOMIC_OPS(op, asm_op, c_op, I) \ + ATOMIC_OP( op, asm_op, I, int, ) \ + ATOMIC_FETCH_OP( op, asm_op, I, int, ) \ + ATOMIC_OP_RETURN(op, asm_op, c_op, I, int, ) \ + ATOMIC_OP( op, asm_op, I, long, 64) \ + ATOMIC_FETCH_OP( op, asm_op, I, long, 64) \ ATOMIC_OP_RETURN(op, asm_op, c_op, I, long, 64) #endif ATOMIC_OPS(inc, add, +, 1) ATOMIC_OPS(dec, add, +, -1) +#define atomic_inc_return_relaxed atomic_inc_return_relaxed +#define atomic_dec_return_relaxed atomic_dec_return_relaxed +#define atomic_inc_return atomic_inc_return +#define atomic_dec_return atomic_dec_return + +#define atomic_fetch_inc_relaxed atomic_fetch_inc_relaxed +#define atomic_fetch_dec_relaxed atomic_fetch_dec_relaxed +#define atomic_fetch_inc atomic_fetch_inc +#define atomic_fetch_dec atomic_fetch_dec + +#ifndef CONFIG_GENERIC_ATOMIC64 +#define atomic64_inc_return_relaxed atomic64_inc_return_relaxed +#define atomic64_dec_return_relaxed atomic64_dec_return_relaxed +#define atomic64_inc_return atomic64_inc_return +#define atomic64_dec_return atomic64_dec_return + +#define atomic64_fetch_inc_relaxed atomic64_fetch_inc_relaxed +#define atomic64_fetch_dec_relaxed atomic64_fetch_dec_relaxed +#define atomic64_fetch_inc atomic64_fetch_inc +#define atomic64_fetch_dec atomic64_fetch_dec +#endif + #undef ATOMIC_OPS #undef ATOMIC_OP #undef ATOMIC_FETCH_OP #undef ATOMIC_OP_RETURN -#define ATOMIC_OP(op, func_op, comp_op, I, prefix) \ -static __always_inline bool atomic##prefix##_##op(atomic##prefix##_t *v) \ -{ \ - return atomic##prefix##_##func_op##_return(v) comp_op I; \ +#define ATOMIC_OP(op, func_op, comp_op, I, prefix) \ +static __always_inline \ +bool atomic##prefix##_##op(atomic##prefix##_t *v) \ +{ \ + return atomic##prefix##_##func_op##_return(v) comp_op I; \ } ATOMIC_OP(inc_and_test, inc, ==, 0, ) @@ -238,19 +331,19 @@ ATOMIC_OP(dec_and_test, dec, ==, 0, 64) #undef ATOMIC_OP -/* This is required to provide a barrier on success. */ +/* This is required to provide a full barrier on success. */ static __always_inline int __atomic_add_unless(atomic_t *v, int a, int u) { int prev, rc; __asm__ __volatile__ ( - "0:\n\t" - "lr.w.aqrl %[p], %[c]\n\t" - "beq %[p], %[u], 1f\n\t" - "add %[rc], %[p], %[a]\n\t" - "sc.w.aqrl %[rc], %[rc], %[c]\n\t" - "bnez %[rc], 0b\n\t" - "1:" + "0: lr.w %[p], %[c]\n" + " beq %[p], %[u], 1f\n" + " add %[rc], %[p], %[a]\n" + " sc.w.rl %[rc], %[rc], %[c]\n" + " bnez %[rc], 0b\n" + " fence rw, rw\n" + "1:\n" : [p]"=&r" (prev), [rc]"=&r" (rc), [c]"+A" (v->counter) : [a]"r" (a), [u]"r" (u) : "memory"); @@ -263,13 +356,13 @@ static __always_inline long __atomic64_add_unless(atomic64_t *v, long a, long u) long prev, rc; __asm__ __volatile__ ( - "0:\n\t" - "lr.d.aqrl %[p], %[c]\n\t" - "beq %[p], %[u], 1f\n\t" - "add %[rc], %[p], %[a]\n\t" - "sc.d.aqrl %[rc], %[rc], %[c]\n\t" - "bnez %[rc], 0b\n\t" - "1:" + "0: lr.d %[p], %[c]\n" + " beq %[p], %[u], 1f\n" + " add %[rc], %[p], %[a]\n" + " sc.d.rl %[rc], %[rc], %[c]\n" + " bnez %[rc], 0b\n" + " fence rw, rw\n" + "1:\n" : [p]"=&r" (prev), [rc]"=&r" (rc), [c]"+A" (v->counter) : [a]"r" (a), [u]"r" (u) : "memory"); @@ -300,37 +393,63 @@ static __always_inline long atomic64_inc_not_zero(atomic64_t *v) /* * atomic_{cmp,}xchg is required to have exactly the same ordering semantics as - * {cmp,}xchg and the operations that return, so they need a barrier. - */ -/* - * FIXME: atomic_cmpxchg_{acquire,release,relaxed} are all implemented by - * assigning the same barrier to both the LR and SC operations, but that might - * not make any sense. We're waiting on a memory model specification to - * determine exactly what the right thing to do is here. + * {cmp,}xchg and the operations that return, so they need a full barrier. */ -#define ATOMIC_OP(c_t, prefix, c_or, size, asm_or) \ -static __always_inline c_t atomic##prefix##_cmpxchg##c_or(atomic##prefix##_t *v, c_t o, c_t n) \ -{ \ - return __cmpxchg(&(v->counter), o, n, size, asm_or, asm_or); \ -} \ -static __always_inline c_t atomic##prefix##_xchg##c_or(atomic##prefix##_t *v, c_t n) \ -{ \ - return __xchg(n, &(v->counter), size, asm_or); \ +#define ATOMIC_OP(c_t, prefix, size) \ +static __always_inline \ +c_t atomic##prefix##_xchg_relaxed(atomic##prefix##_t *v, c_t n) \ +{ \ + return __xchg_relaxed(&(v->counter), n, size); \ +} \ +static __always_inline \ +c_t atomic##prefix##_xchg_acquire(atomic##prefix##_t *v, c_t n) \ +{ \ + return __xchg_acquire(&(v->counter), n, size); \ +} \ +static __always_inline \ +c_t atomic##prefix##_xchg_release(atomic##prefix##_t *v, c_t n) \ +{ \ + return __xchg_release(&(v->counter), n, size); \ +} \ +static __always_inline \ +c_t atomic##prefix##_xchg(atomic##prefix##_t *v, c_t n) \ +{ \ + return __xchg(&(v->counter), n, size); \ +} \ +static __always_inline \ +c_t atomic##prefix##_cmpxchg_relaxed(atomic##prefix##_t *v, \ + c_t o, c_t n) \ +{ \ + return __cmpxchg_relaxed(&(v->counter), o, n, size); \ +} \ +static __always_inline \ +c_t atomic##prefix##_cmpxchg_acquire(atomic##prefix##_t *v, \ + c_t o, c_t n) \ +{ \ + return __cmpxchg_acquire(&(v->counter), o, n, size); \ +} \ +static __always_inline \ +c_t atomic##prefix##_cmpxchg_release(atomic##prefix##_t *v, \ + c_t o, c_t n) \ +{ \ + return __cmpxchg_release(&(v->counter), o, n, size); \ +} \ +static __always_inline \ +c_t atomic##prefix##_cmpxchg(atomic##prefix##_t *v, c_t o, c_t n) \ +{ \ + return __cmpxchg(&(v->counter), o, n, size); \ } #ifdef CONFIG_GENERIC_ATOMIC64 -#define ATOMIC_OPS(c_or, asm_or) \ - ATOMIC_OP( int, , c_or, 4, asm_or) +#define ATOMIC_OPS() \ + ATOMIC_OP( int, , 4) #else -#define ATOMIC_OPS(c_or, asm_or) \ - ATOMIC_OP( int, , c_or, 4, asm_or) \ - ATOMIC_OP(long, 64, c_or, 8, asm_or) +#define ATOMIC_OPS() \ + ATOMIC_OP( int, , 4) \ + ATOMIC_OP(long, 64, 8) #endif -ATOMIC_OPS( , .aqrl) -ATOMIC_OPS(_acquire, .aq) -ATOMIC_OPS(_release, .rl) -ATOMIC_OPS(_relaxed, ) +ATOMIC_OPS() #undef ATOMIC_OPS #undef ATOMIC_OP @@ -340,13 +459,13 @@ static __always_inline int atomic_sub_if_positive(atomic_t *v, int offset) int prev, rc; __asm__ __volatile__ ( - "0:\n\t" - "lr.w.aqrl %[p], %[c]\n\t" - "sub %[rc], %[p], %[o]\n\t" - "bltz %[rc], 1f\n\t" - "sc.w.aqrl %[rc], %[rc], %[c]\n\t" - "bnez %[rc], 0b\n\t" - "1:" + "0: lr.w %[p], %[c]\n" + " sub %[rc], %[p], %[o]\n" + " bltz %[rc], 1f\n" + " sc.w.rl %[rc], %[rc], %[c]\n" + " bnez %[rc], 0b\n" + " fence rw, rw\n" + "1:\n" : [p]"=&r" (prev), [rc]"=&r" (rc), [c]"+A" (v->counter) : [o]"r" (offset) : "memory"); @@ -361,13 +480,13 @@ static __always_inline long atomic64_sub_if_positive(atomic64_t *v, int offset) long prev, rc; __asm__ __volatile__ ( - "0:\n\t" - "lr.d.aqrl %[p], %[c]\n\t" - "sub %[rc], %[p], %[o]\n\t" - "bltz %[rc], 1f\n\t" - "sc.d.aqrl %[rc], %[rc], %[c]\n\t" - "bnez %[rc], 0b\n\t" - "1:" + "0: lr.d %[p], %[c]\n" + " sub %[rc], %[p], %[o]\n" + " bltz %[rc], 1f\n" + " sc.d.rl %[rc], %[rc], %[c]\n" + " bnez %[rc], 0b\n" + " fence rw, rw\n" + "1:\n" : [p]"=&r" (prev), [rc]"=&r" (rc), [c]"+A" (v->counter) : [o]"r" (offset) : "memory"); diff --git a/arch/riscv/include/asm/cmpxchg.h b/arch/riscv/include/asm/cmpxchg.h index db249dbc7b97..c12833f7b6bd 100644 --- a/arch/riscv/include/asm/cmpxchg.h +++ b/arch/riscv/include/asm/cmpxchg.h @@ -17,45 +17,153 @@ #include #include +#include -#define __xchg(new, ptr, size, asm_or) \ -({ \ - __typeof__(ptr) __ptr = (ptr); \ - __typeof__(new) __new = (new); \ - __typeof__(*(ptr)) __ret; \ - switch (size) { \ - case 4: \ - __asm__ __volatile__ ( \ - "amoswap.w" #asm_or " %0, %2, %1" \ - : "=r" (__ret), "+A" (*__ptr) \ - : "r" (__new) \ - : "memory"); \ - break; \ - case 8: \ - __asm__ __volatile__ ( \ - "amoswap.d" #asm_or " %0, %2, %1" \ - : "=r" (__ret), "+A" (*__ptr) \ - : "r" (__new) \ - : "memory"); \ - break; \ - default: \ - BUILD_BUG(); \ - } \ - __ret; \ -}) - -#define xchg(ptr, x) (__xchg((x), (ptr), sizeof(*(ptr)), .aqrl)) - -#define xchg32(ptr, x) \ -({ \ - BUILD_BUG_ON(sizeof(*(ptr)) != 4); \ - xchg((ptr), (x)); \ -}) - -#define xchg64(ptr, x) \ -({ \ - BUILD_BUG_ON(sizeof(*(ptr)) != 8); \ - xchg((ptr), (x)); \ +#define __xchg_relaxed(ptr, new, size) \ +({ \ + __typeof__(ptr) __ptr = (ptr); \ + __typeof__(new) __new = (new); \ + __typeof__(*(ptr)) __ret; \ + switch (size) { \ + case 4: \ + __asm__ __volatile__ ( \ + " amoswap.w %0, %2, %1\n" \ + : "=r" (__ret), "+A" (*__ptr) \ + : "r" (__new) \ + : "memory"); \ + break; \ + case 8: \ + __asm__ __volatile__ ( \ + " amoswap.d %0, %2, %1\n" \ + : "=r" (__ret), "+A" (*__ptr) \ + : "r" (__new) \ + : "memory"); \ + break; \ + default: \ + BUILD_BUG(); \ + } \ + __ret; \ +}) + +#define xchg_relaxed(ptr, x) \ +({ \ + __typeof__(*(ptr)) _x_ = (x); \ + (__typeof__(*(ptr))) __xchg_relaxed((ptr), \ + _x_, sizeof(*(ptr))); \ +}) + +#define __xchg_acquire(ptr, new, size) \ +({ \ + __typeof__(ptr) __ptr = (ptr); \ + __typeof__(new) __new = (new); \ + __typeof__(*(ptr)) __ret; \ + switch (size) { \ + case 4: \ + __asm__ __volatile__ ( \ + " amoswap.w %0, %2, %1\n" \ + RISCV_ACQUIRE_BARRIER \ + : "=r" (__ret), "+A" (*__ptr) \ + : "r" (__new) \ + : "memory"); \ + break; \ + case 8: \ + __asm__ __volatile__ ( \ + " amoswap.d %0, %2, %1\n" \ + RISCV_ACQUIRE_BARRIER \ + : "=r" (__ret), "+A" (*__ptr) \ + : "r" (__new) \ + : "memory"); \ + break; \ + default: \ + BUILD_BUG(); \ + } \ + __ret; \ +}) + +#define xchg_acquire(ptr, x) \ +({ \ + __typeof__(*(ptr)) _x_ = (x); \ + (__typeof__(*(ptr))) __xchg_acquire((ptr), \ + _x_, sizeof(*(ptr))); \ +}) + +#define __xchg_release(ptr, new, size) \ +({ \ + __typeof__(ptr) __ptr = (ptr); \ + __typeof__(new) __new = (new); \ + __typeof__(*(ptr)) __ret; \ + switch (size) { \ + case 4: \ + __asm__ __volatile__ ( \ + RISCV_RELEASE_BARRIER \ + " amoswap.w %0, %2, %1\n" \ + : "=r" (__ret), "+A" (*__ptr) \ + : "r" (__new) \ + : "memory"); \ + break; \ + case 8: \ + __asm__ __volatile__ ( \ + RISCV_RELEASE_BARRIER \ + " amoswap.d %0, %2, %1\n" \ + : "=r" (__ret), "+A" (*__ptr) \ + : "r" (__new) \ + : "memory"); \ + break; \ + default: \ + BUILD_BUG(); \ + } \ + __ret; \ +}) + +#define xchg_release(ptr, x) \ +({ \ + __typeof__(*(ptr)) _x_ = (x); \ + (__typeof__(*(ptr))) __xchg_release((ptr), \ + _x_, sizeof(*(ptr))); \ +}) + +#define __xchg(ptr, new, size) \ +({ \ + __typeof__(ptr) __ptr = (ptr); \ + __typeof__(new) __new = (new); \ + __typeof__(*(ptr)) __ret; \ + switch (size) { \ + case 4: \ + __asm__ __volatile__ ( \ + " amoswap.w.aqrl %0, %2, %1\n" \ + : "=r" (__ret), "+A" (*__ptr) \ + : "r" (__new) \ + : "memory"); \ + break; \ + case 8: \ + __asm__ __volatile__ ( \ + " amoswap.d.aqrl %0, %2, %1\n" \ + : "=r" (__ret), "+A" (*__ptr) \ + : "r" (__new) \ + : "memory"); \ + break; \ + default: \ + BUILD_BUG(); \ + } \ + __ret; \ +}) + +#define xchg(ptr, x) \ +({ \ + __typeof__(*(ptr)) _x_ = (x); \ + (__typeof__(*(ptr))) __xchg((ptr), _x_, sizeof(*(ptr))); \ +}) + +#define xchg32(ptr, x) \ +({ \ + BUILD_BUG_ON(sizeof(*(ptr)) != 4); \ + xchg((ptr), (x)); \ +}) + +#define xchg64(ptr, x) \ +({ \ + BUILD_BUG_ON(sizeof(*(ptr)) != 8); \ + xchg((ptr), (x)); \ }) /* @@ -63,7 +171,51 @@ * store NEW in MEM. Return the initial value in MEM. Success is * indicated by comparing RETURN with OLD. */ -#define __cmpxchg(ptr, old, new, size, lrb, scb) \ +#define __cmpxchg_relaxed(ptr, old, new, size) \ +({ \ + __typeof__(ptr) __ptr = (ptr); \ + __typeof__(*(ptr)) __old = (old); \ + __typeof__(*(ptr)) __new = (new); \ + __typeof__(*(ptr)) __ret; \ + register unsigned int __rc; \ + switch (size) { \ + case 4: \ + __asm__ __volatile__ ( \ + "0: lr.w %0, %2\n" \ + " bne %0, %z3, 1f\n" \ + " sc.w %1, %z4, %2\n" \ + " bnez %1, 0b\n" \ + "1:\n" \ + : "=&r" (__ret), "=&r" (__rc), "+A" (*__ptr) \ + : "rJ" (__old), "rJ" (__new) \ + : "memory"); \ + break; \ + case 8: \ + __asm__ __volatile__ ( \ + "0: lr.d %0, %2\n" \ + " bne %0, %z3, 1f\n" \ + " sc.d %1, %z4, %2\n" \ + " bnez %1, 0b\n" \ + "1:\n" \ + : "=&r" (__ret), "=&r" (__rc), "+A" (*__ptr) \ + : "rJ" (__old), "rJ" (__new) \ + : "memory"); \ + break; \ + default: \ + BUILD_BUG(); \ + } \ + __ret; \ +}) + +#define cmpxchg_relaxed(ptr, o, n) \ +({ \ + __typeof__(*(ptr)) _o_ = (o); \ + __typeof__(*(ptr)) _n_ = (n); \ + (__typeof__(*(ptr))) __cmpxchg_relaxed((ptr), \ + _o_, _n_, sizeof(*(ptr))); \ +}) + +#define __cmpxchg_acquire(ptr, old, new, size) \ ({ \ __typeof__(ptr) __ptr = (ptr); \ __typeof__(*(ptr)) __old = (old); \ @@ -73,24 +225,24 @@ switch (size) { \ case 4: \ __asm__ __volatile__ ( \ - "0:" \ - "lr.w" #scb " %0, %2\n" \ - "bne %0, %z3, 1f\n" \ - "sc.w" #lrb " %1, %z4, %2\n" \ - "bnez %1, 0b\n" \ - "1:" \ + "0: lr.w %0, %2\n" \ + " bne %0, %z3, 1f\n" \ + " sc.w %1, %z4, %2\n" \ + " bnez %1, 0b\n" \ + RISCV_ACQUIRE_BARRIER \ + "1:\n" \ : "=&r" (__ret), "=&r" (__rc), "+A" (*__ptr) \ : "rJ" (__old), "rJ" (__new) \ : "memory"); \ break; \ case 8: \ __asm__ __volatile__ ( \ - "0:" \ - "lr.d" #scb " %0, %2\n" \ - "bne %0, %z3, 1f\n" \ - "sc.d" #lrb " %1, %z4, %2\n" \ - "bnez %1, 0b\n" \ - "1:" \ + "0: lr.d %0, %2\n" \ + " bne %0, %z3, 1f\n" \ + " sc.d %1, %z4, %2\n" \ + " bnez %1, 0b\n" \ + RISCV_ACQUIRE_BARRIER \ + "1:\n" \ : "=&r" (__ret), "=&r" (__rc), "+A" (*__ptr) \ : "rJ" (__old), "rJ" (__new) \ : "memory"); \ @@ -101,34 +253,131 @@ __ret; \ }) -#define cmpxchg(ptr, o, n) \ - (__cmpxchg((ptr), (o), (n), sizeof(*(ptr)), .aqrl, .aqrl)) +#define cmpxchg_acquire(ptr, o, n) \ +({ \ + __typeof__(*(ptr)) _o_ = (o); \ + __typeof__(*(ptr)) _n_ = (n); \ + (__typeof__(*(ptr))) __cmpxchg_acquire((ptr), \ + _o_, _n_, sizeof(*(ptr))); \ +}) -#define cmpxchg_local(ptr, o, n) \ - (__cmpxchg((ptr), (o), (n), sizeof(*(ptr)), , )) +#define __cmpxchg_release(ptr, old, new, size) \ +({ \ + __typeof__(ptr) __ptr = (ptr); \ + __typeof__(*(ptr)) __old = (old); \ + __typeof__(*(ptr)) __new = (new); \ + __typeof__(*(ptr)) __ret; \ + register unsigned int __rc; \ + switch (size) { \ + case 4: \ + __asm__ __volatile__ ( \ + RISCV_RELEASE_BARRIER \ + "0: lr.w %0, %2\n" \ + " bne %0, %z3, 1f\n" \ + " sc.w %1, %z4, %2\n" \ + " bnez %1, 0b\n" \ + "1:\n" \ + : "=&r" (__ret), "=&r" (__rc), "+A" (*__ptr) \ + : "rJ" (__old), "rJ" (__new) \ + : "memory"); \ + break; \ + case 8: \ + __asm__ __volatile__ ( \ + RISCV_RELEASE_BARRIER \ + "0: lr.d %0, %2\n" \ + " bne %0, %z3, 1f\n" \ + " sc.d %1, %z4, %2\n" \ + " bnez %1, 0b\n" \ + "1:\n" \ + : "=&r" (__ret), "=&r" (__rc), "+A" (*__ptr) \ + : "rJ" (__old), "rJ" (__new) \ + : "memory"); \ + break; \ + default: \ + BUILD_BUG(); \ + } \ + __ret; \ +}) + +#define cmpxchg_release(ptr, o, n) \ +({ \ + __typeof__(*(ptr)) _o_ = (o); \ + __typeof__(*(ptr)) _n_ = (n); \ + (__typeof__(*(ptr))) __cmpxchg_release((ptr), \ + _o_, _n_, sizeof(*(ptr))); \ +}) + +#define __cmpxchg(ptr, old, new, size) \ +({ \ + __typeof__(ptr) __ptr = (ptr); \ + __typeof__(*(ptr)) __old = (old); \ + __typeof__(*(ptr)) __new = (new); \ + __typeof__(*(ptr)) __ret; \ + register unsigned int __rc; \ + switch (size) { \ + case 4: \ + __asm__ __volatile__ ( \ + "0: lr.w %0, %2\n" \ + " bne %0, %z3, 1f\n" \ + " sc.w.rl %1, %z4, %2\n" \ + " bnez %1, 0b\n" \ + " fence rw, rw\n" \ + "1:\n" \ + : "=&r" (__ret), "=&r" (__rc), "+A" (*__ptr) \ + : "rJ" (__old), "rJ" (__new) \ + : "memory"); \ + break; \ + case 8: \ + __asm__ __volatile__ ( \ + "0: lr.d %0, %2\n" \ + " bne %0, %z3, 1f\n" \ + " sc.d.rl %1, %z4, %2\n" \ + " bnez %1, 0b\n" \ + " fence rw, rw\n" \ + "1:\n" \ + : "=&r" (__ret), "=&r" (__rc), "+A" (*__ptr) \ + : "rJ" (__old), "rJ" (__new) \ + : "memory"); \ + break; \ + default: \ + BUILD_BUG(); \ + } \ + __ret; \ +}) -#define cmpxchg32(ptr, o, n) \ -({ \ - BUILD_BUG_ON(sizeof(*(ptr)) != 4); \ - cmpxchg((ptr), (o), (n)); \ +#define cmpxchg(ptr, o, n) \ +({ \ + __typeof__(*(ptr)) _o_ = (o); \ + __typeof__(*(ptr)) _n_ = (n); \ + (__typeof__(*(ptr))) __cmpxchg((ptr), \ + _o_, _n_, sizeof(*(ptr))); \ }) -#define cmpxchg32_local(ptr, o, n) \ -({ \ - BUILD_BUG_ON(sizeof(*(ptr)) != 4); \ - cmpxchg_local((ptr), (o), (n)); \ +#define cmpxchg_local(ptr, o, n) \ + (__cmpxchg_relaxed((ptr), (o), (n), sizeof(*(ptr)))) + +#define cmpxchg32(ptr, o, n) \ +({ \ + BUILD_BUG_ON(sizeof(*(ptr)) != 4); \ + cmpxchg((ptr), (o), (n)); \ }) -#define cmpxchg64(ptr, o, n) \ -({ \ - BUILD_BUG_ON(sizeof(*(ptr)) != 8); \ - cmpxchg((ptr), (o), (n)); \ +#define cmpxchg32_local(ptr, o, n) \ +({ \ + BUILD_BUG_ON(sizeof(*(ptr)) != 4); \ + cmpxchg_relaxed((ptr), (o), (n)) \ }) -#define cmpxchg64_local(ptr, o, n) \ -({ \ - BUILD_BUG_ON(sizeof(*(ptr)) != 8); \ - cmpxchg_local((ptr), (o), (n)); \ +#define cmpxchg64(ptr, o, n) \ +({ \ + BUILD_BUG_ON(sizeof(*(ptr)) != 8); \ + cmpxchg((ptr), (o), (n)); \ +}) + +#define cmpxchg64_local(ptr, o, n) \ +({ \ + BUILD_BUG_ON(sizeof(*(ptr)) != 8); \ + cmpxchg_relaxed((ptr), (o), (n)); \ }) #endif /* _ASM_RISCV_CMPXCHG_H */ -- cgit v1.2.3-59-g8ed1b From ab1ef68e54019937cf859f2c86c9ead6f3e62f19 Mon Sep 17 00:00:00 2001 From: Zong Li Date: Thu, 15 Mar 2018 16:50:41 +0800 Subject: RISC-V: Add sections of PLT and GOT for kernel module The address of external symbols will locate more than 32-bit offset in 64-bit kernel with sv39 or sv48 virtual addressing. Module loader emits the GOT and PLT entries for data symbols and function symbols respectively. The PLT entry is a trampoline code for jumping to the 64-bit real address. The GOT entry is just the data symbol address. Signed-off-by: Zong Li Signed-off-by: Palmer Dabbelt --- arch/riscv/Kconfig | 5 ++ arch/riscv/Makefile | 5 ++ arch/riscv/include/asm/module.h | 103 ++++++++++++++++++++++++++ arch/riscv/kernel/Makefile | 1 + arch/riscv/kernel/module-sections.c | 139 ++++++++++++++++++++++++++++++++++++ arch/riscv/kernel/module.lds | 7 ++ 6 files changed, 260 insertions(+) create mode 100644 arch/riscv/include/asm/module.h create mode 100644 arch/riscv/kernel/module-sections.c create mode 100644 arch/riscv/kernel/module.lds (limited to 'arch/riscv/include') diff --git a/arch/riscv/Kconfig b/arch/riscv/Kconfig index 04807c7f64cc..90ff52059794 100644 --- a/arch/riscv/Kconfig +++ b/arch/riscv/Kconfig @@ -131,6 +131,10 @@ choice bool "medium any code model" endchoice +config MODULE_SECTIONS + bool + select HAVE_MOD_ARCH_SPECIFIC + choice prompt "Maximum Physical Memory" default MAXPHYSMEM_2GB if 32BIT @@ -141,6 +145,7 @@ choice bool "2GiB" config MAXPHYSMEM_128GB depends on 64BIT && CMODEL_MEDANY + select MODULE_SECTIONS if MODULES bool "128GiB" endchoice diff --git a/arch/riscv/Makefile b/arch/riscv/Makefile index 6719dd30ec5b..c72d408c05c0 100644 --- a/arch/riscv/Makefile +++ b/arch/riscv/Makefile @@ -56,6 +56,11 @@ endif ifeq ($(CONFIG_CMODEL_MEDANY),y) KBUILD_CFLAGS += -mcmodel=medany endif +ifeq ($(CONFIG_MODULE_SECTIONS),y) + KBUILD_LDFLAGS_MODULE += -T $(srctree)/arch/riscv/kernel/module.lds +endif + +KBUILD_CFLAGS_MODULE += $(call cc-option,-mno-relax) # GCC versions that support the "-mstrict-align" option default to allowing # unaligned accesses. While unaligned accesses are explicitly allowed in the diff --git a/arch/riscv/include/asm/module.h b/arch/riscv/include/asm/module.h new file mode 100644 index 000000000000..e61d73f82d4d --- /dev/null +++ b/arch/riscv/include/asm/module.h @@ -0,0 +1,103 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* Copyright (C) 2017 Andes Technology Corporation */ + +#ifndef _ASM_RISCV_MODULE_H +#define _ASM_RISCV_MODULE_H + +#include + +#define MODULE_ARCH_VERMAGIC "riscv" + +u64 module_emit_got_entry(struct module *mod, u64 val); +u64 module_emit_plt_entry(struct module *mod, u64 val); + +#ifdef CONFIG_MODULE_SECTIONS +struct mod_section { + struct elf64_shdr *shdr; + int num_entries; + int max_entries; +}; + +struct mod_arch_specific { + struct mod_section got; + struct mod_section plt; +}; + +struct got_entry { + u64 symbol_addr; /* the real variable address */ +}; + +static inline struct got_entry emit_got_entry(u64 val) +{ + return (struct got_entry) {val}; +} + +static inline struct got_entry *get_got_entry(u64 val, + const struct mod_section *sec) +{ + struct got_entry *got = (struct got_entry *)sec->shdr->sh_addr; + int i; + for (i = 0; i < sec->num_entries; i++) { + if (got[i].symbol_addr == val) + return &got[i]; + } + return NULL; +} + +struct plt_entry { + /* + * Trampoline code to real target address. The return address + * should be the original (pc+4) before entring plt entry. + * For 8 byte alignment of symbol_addr, + * don't pack structure to remove the padding. + */ + u32 insn_auipc; /* auipc t0, 0x0 */ + u32 insn_ld; /* ld t1, 0x10(t0) */ + u32 insn_jr; /* jr t1 */ + u64 symbol_addr; /* the real jump target address */ +}; + +#define OPC_AUIPC 0x0017 +#define OPC_LD 0x3003 +#define OPC_JALR 0x0067 +#define REG_T0 0x5 +#define REG_T1 0x6 +#define IMM_OFFSET 0x10 + +static inline struct plt_entry emit_plt_entry(u64 val) +{ + /* + * U-Type encoding: + * +------------+----------+----------+ + * | imm[31:12] | rd[11:7] | opc[6:0] | + * +------------+----------+----------+ + * + * I-Type encoding: + * +------------+------------+--------+----------+----------+ + * | imm[31:20] | rs1[19:15] | funct3 | rd[11:7] | opc[6:0] | + * +------------+------------+--------+----------+----------+ + * + */ + return (struct plt_entry) { + OPC_AUIPC | (REG_T0 << 7), + OPC_LD | (IMM_OFFSET << 20) | (REG_T0 << 15) | (REG_T1 << 7), + OPC_JALR | (REG_T1 << 15), + val + }; +} + +static inline struct plt_entry *get_plt_entry(u64 val, + const struct mod_section *sec) +{ + struct plt_entry *plt = (struct plt_entry *)sec->shdr->sh_addr; + int i; + for (i = 0; i < sec->num_entries; i++) { + if (plt[i].symbol_addr == val) + return &plt[i]; + } + return NULL; +} + +#endif /* CONFIG_MODULE_SECTIONS */ + +#endif /* _ASM_RISCV_MODULE_H */ diff --git a/arch/riscv/kernel/Makefile b/arch/riscv/kernel/Makefile index 196f62ffc428..d355e3c18278 100644 --- a/arch/riscv/kernel/Makefile +++ b/arch/riscv/kernel/Makefile @@ -34,6 +34,7 @@ CFLAGS_setup.o := -mcmodel=medany obj-$(CONFIG_SMP) += smpboot.o obj-$(CONFIG_SMP) += smp.o obj-$(CONFIG_MODULES) += module.o +obj-$(CONFIG_MODULE_SECTIONS) += module-sections.o obj-$(CONFIG_FUNCTION_TRACER) += mcount.o obj-$(CONFIG_FUNCTION_GRAPH_TRACER) += ftrace.o diff --git a/arch/riscv/kernel/module-sections.c b/arch/riscv/kernel/module-sections.c new file mode 100644 index 000000000000..94ba1551eac3 --- /dev/null +++ b/arch/riscv/kernel/module-sections.c @@ -0,0 +1,139 @@ +/* SPDX-License-Identifier: GPL-2.0 + * + * Copyright (C) 2014-2017 Linaro Ltd. + * + * Copyright (C) 2018 Andes Technology Corporation + */ + +#include +#include +#include + +u64 module_emit_got_entry(struct module *mod, u64 val) +{ + struct mod_section *got_sec = &mod->arch.got; + int i = got_sec->num_entries; + struct got_entry *got = get_got_entry(val, got_sec); + + if (got) + return (u64)got; + + /* There is no duplicate entry, create a new one */ + got = (struct got_entry *)got_sec->shdr->sh_addr; + got[i] = emit_got_entry(val); + + got_sec->num_entries++; + BUG_ON(got_sec->num_entries > got_sec->max_entries); + + return (u64)&got[i]; +} + +u64 module_emit_plt_entry(struct module *mod, u64 val) +{ + struct mod_section *plt_sec = &mod->arch.plt; + struct plt_entry *plt = get_plt_entry(val, plt_sec); + int i = plt_sec->num_entries; + + if (plt) + return (u64)plt; + + /* There is no duplicate entry, create a new one */ + plt = (struct plt_entry *)plt_sec->shdr->sh_addr; + plt[i] = emit_plt_entry(val); + + plt_sec->num_entries++; + BUG_ON(plt_sec->num_entries > plt_sec->max_entries); + + return (u64)&plt[i]; +} + +static int is_rela_equal(const Elf64_Rela *x, const Elf64_Rela *y) +{ + return x->r_info == y->r_info && x->r_addend == y->r_addend; +} + +static bool duplicate_rela(const Elf64_Rela *rela, int idx) +{ + int i; + for (i = 0; i < idx; i++) { + if (is_rela_equal(&rela[i], &rela[idx])) + return true; + } + return false; +} + +static void count_max_entries(Elf64_Rela *relas, int num, + unsigned int *plts, unsigned int *gots) +{ + unsigned int type, i; + + for (i = 0; i < num; i++) { + type = ELF64_R_TYPE(relas[i].r_info); + if (type == R_RISCV_CALL_PLT) { + if (!duplicate_rela(relas, i)) + (*plts)++; + } else if (type == R_RISCV_GOT_HI20) { + if (!duplicate_rela(relas, i)) + (*gots)++; + } + } +} + +int module_frob_arch_sections(Elf_Ehdr *ehdr, Elf_Shdr *sechdrs, + char *secstrings, struct module *mod) +{ + unsigned int num_plts = 0; + unsigned int num_gots = 0; + int i; + + /* + * Find the empty .got and .plt sections. + */ + for (i = 0; i < ehdr->e_shnum; i++) { + if (!strcmp(secstrings + sechdrs[i].sh_name, ".plt")) + mod->arch.plt.shdr = sechdrs + i; + else if (!strcmp(secstrings + sechdrs[i].sh_name, ".got")) + mod->arch.got.shdr = sechdrs + i; + } + + if (!mod->arch.plt.shdr) { + pr_err("%s: module PLT section(s) missing\n", mod->name); + return -ENOEXEC; + } + if (!mod->arch.got.shdr) { + pr_err("%s: module GOT section(s) missing\n", mod->name); + return -ENOEXEC; + } + + /* Calculate the maxinum number of entries */ + for (i = 0; i < ehdr->e_shnum; i++) { + Elf64_Rela *relas = (void *)ehdr + sechdrs[i].sh_offset; + int num_rela = sechdrs[i].sh_size / sizeof(Elf64_Rela); + Elf64_Shdr *dst_sec = sechdrs + sechdrs[i].sh_info; + + if (sechdrs[i].sh_type != SHT_RELA) + continue; + + /* ignore relocations that operate on non-exec sections */ + if (!(dst_sec->sh_flags & SHF_EXECINSTR)) + continue; + + count_max_entries(relas, num_rela, &num_plts, &num_gots); + } + + mod->arch.plt.shdr->sh_type = SHT_NOBITS; + mod->arch.plt.shdr->sh_flags = SHF_EXECINSTR | SHF_ALLOC; + mod->arch.plt.shdr->sh_addralign = L1_CACHE_BYTES; + mod->arch.plt.shdr->sh_size = (num_plts + 1) * sizeof(struct plt_entry); + mod->arch.plt.num_entries = 0; + mod->arch.plt.max_entries = num_plts; + + mod->arch.got.shdr->sh_type = SHT_NOBITS; + mod->arch.got.shdr->sh_flags = SHF_ALLOC; + mod->arch.got.shdr->sh_addralign = L1_CACHE_BYTES; + mod->arch.got.shdr->sh_size = (num_gots + 1) * sizeof(struct got_entry); + mod->arch.got.num_entries = 0; + mod->arch.got.max_entries = num_gots; + + return 0; +} diff --git a/arch/riscv/kernel/module.lds b/arch/riscv/kernel/module.lds new file mode 100644 index 000000000000..7ef580e62883 --- /dev/null +++ b/arch/riscv/kernel/module.lds @@ -0,0 +1,7 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* Copyright (C) 2017 Andes Technology Corporation */ + +SECTIONS { + .plt (NOLOAD) : { BYTE(0) } + .got (NOLOAD) : { BYTE(0) } +} -- cgit v1.2.3-59-g8ed1b From b8bde0ef12bd43f013d879973a1900930bfb95ee Mon Sep 17 00:00:00 2001 From: Zong Li Date: Thu, 15 Mar 2018 16:50:42 +0800 Subject: RISC-V: Add section of GOT.PLT for kernel module Separate the function symbol address from .plt to .got.plt section. The original plt entry has trampoline code with symbol address, there is a 32-bit padding bwtween jar instruction and symbol address. Extract the symbol address to .got.plt to reduce the module size. Signed-off-by: Zong Li Signed-off-by: Palmer Dabbelt --- arch/riscv/include/asm/module.h | 40 +++++++++++++++++++++++-------------- arch/riscv/kernel/module-sections.c | 21 +++++++++++++++++-- arch/riscv/kernel/module.lds | 1 + 3 files changed, 45 insertions(+), 17 deletions(-) (limited to 'arch/riscv/include') diff --git a/arch/riscv/include/asm/module.h b/arch/riscv/include/asm/module.h index e61d73f82d4d..349df33808c4 100644 --- a/arch/riscv/include/asm/module.h +++ b/arch/riscv/include/asm/module.h @@ -21,6 +21,7 @@ struct mod_section { struct mod_arch_specific { struct mod_section got; struct mod_section plt; + struct mod_section got_plt; }; struct got_entry { @@ -48,13 +49,10 @@ struct plt_entry { /* * Trampoline code to real target address. The return address * should be the original (pc+4) before entring plt entry. - * For 8 byte alignment of symbol_addr, - * don't pack structure to remove the padding. */ u32 insn_auipc; /* auipc t0, 0x0 */ u32 insn_ld; /* ld t1, 0x10(t0) */ u32 insn_jr; /* jr t1 */ - u64 symbol_addr; /* the real jump target address */ }; #define OPC_AUIPC 0x0017 @@ -62,9 +60,8 @@ struct plt_entry { #define OPC_JALR 0x0067 #define REG_T0 0x5 #define REG_T1 0x6 -#define IMM_OFFSET 0x10 -static inline struct plt_entry emit_plt_entry(u64 val) +static inline struct plt_entry emit_plt_entry(u64 val, u64 plt, u64 got_plt) { /* * U-Type encoding: @@ -78,24 +75,37 @@ static inline struct plt_entry emit_plt_entry(u64 val) * +------------+------------+--------+----------+----------+ * */ + u64 offset = got_plt - plt; + u32 hi20 = (offset + 0x800) & 0xfffff000; + u32 lo12 = (offset - hi20); return (struct plt_entry) { - OPC_AUIPC | (REG_T0 << 7), - OPC_LD | (IMM_OFFSET << 20) | (REG_T0 << 15) | (REG_T1 << 7), - OPC_JALR | (REG_T1 << 15), - val + OPC_AUIPC | (REG_T0 << 7) | hi20, + OPC_LD | (lo12 << 20) | (REG_T0 << 15) | (REG_T1 << 7), + OPC_JALR | (REG_T1 << 15) }; } -static inline struct plt_entry *get_plt_entry(u64 val, - const struct mod_section *sec) +static inline int get_got_plt_idx(u64 val, const struct mod_section *sec) { - struct plt_entry *plt = (struct plt_entry *)sec->shdr->sh_addr; + struct got_entry *got_plt = (struct got_entry *)sec->shdr->sh_addr; int i; for (i = 0; i < sec->num_entries; i++) { - if (plt[i].symbol_addr == val) - return &plt[i]; + if (got_plt[i].symbol_addr == val) + return i; } - return NULL; + return -1; +} + +static inline struct plt_entry *get_plt_entry(u64 val, + const struct mod_section *sec_plt, + const struct mod_section *sec_got_plt) +{ + struct plt_entry *plt = (struct plt_entry *)sec_plt->shdr->sh_addr; + int got_plt_idx = get_got_plt_idx(val, sec_got_plt); + if (got_plt_idx >= 0) + return plt + got_plt_idx; + else + return NULL; } #endif /* CONFIG_MODULE_SECTIONS */ diff --git a/arch/riscv/kernel/module-sections.c b/arch/riscv/kernel/module-sections.c index 94ba1551eac3..bbbd26e19bfd 100644 --- a/arch/riscv/kernel/module-sections.c +++ b/arch/riscv/kernel/module-sections.c @@ -30,18 +30,23 @@ u64 module_emit_got_entry(struct module *mod, u64 val) u64 module_emit_plt_entry(struct module *mod, u64 val) { + struct mod_section *got_plt_sec = &mod->arch.got_plt; + struct got_entry *got_plt; struct mod_section *plt_sec = &mod->arch.plt; - struct plt_entry *plt = get_plt_entry(val, plt_sec); + struct plt_entry *plt = get_plt_entry(val, plt_sec, got_plt_sec); int i = plt_sec->num_entries; if (plt) return (u64)plt; /* There is no duplicate entry, create a new one */ + got_plt = (struct got_entry *)got_plt_sec->shdr->sh_addr; + got_plt[i] = emit_got_entry(val); plt = (struct plt_entry *)plt_sec->shdr->sh_addr; - plt[i] = emit_plt_entry(val); + plt[i] = emit_plt_entry(val, (u64)&plt[i], (u64)&got_plt[i]); plt_sec->num_entries++; + got_plt_sec->num_entries++; BUG_ON(plt_sec->num_entries > plt_sec->max_entries); return (u64)&plt[i]; @@ -94,6 +99,8 @@ int module_frob_arch_sections(Elf_Ehdr *ehdr, Elf_Shdr *sechdrs, mod->arch.plt.shdr = sechdrs + i; else if (!strcmp(secstrings + sechdrs[i].sh_name, ".got")) mod->arch.got.shdr = sechdrs + i; + else if (!strcmp(secstrings + sechdrs[i].sh_name, ".got.plt")) + mod->arch.got_plt.shdr = sechdrs + i; } if (!mod->arch.plt.shdr) { @@ -104,6 +111,10 @@ int module_frob_arch_sections(Elf_Ehdr *ehdr, Elf_Shdr *sechdrs, pr_err("%s: module GOT section(s) missing\n", mod->name); return -ENOEXEC; } + if (!mod->arch.got_plt.shdr) { + pr_err("%s: module GOT.PLT section(s) missing\n", mod->name); + return -ENOEXEC; + } /* Calculate the maxinum number of entries */ for (i = 0; i < ehdr->e_shnum; i++) { @@ -135,5 +146,11 @@ int module_frob_arch_sections(Elf_Ehdr *ehdr, Elf_Shdr *sechdrs, mod->arch.got.num_entries = 0; mod->arch.got.max_entries = num_gots; + mod->arch.got_plt.shdr->sh_type = SHT_NOBITS; + mod->arch.got_plt.shdr->sh_flags = SHF_ALLOC; + mod->arch.got_plt.shdr->sh_addralign = L1_CACHE_BYTES; + mod->arch.got_plt.shdr->sh_size = (num_plts + 1) * sizeof(struct got_entry); + mod->arch.got_plt.num_entries = 0; + mod->arch.got_plt.max_entries = num_plts; return 0; } diff --git a/arch/riscv/kernel/module.lds b/arch/riscv/kernel/module.lds index 7ef580e62883..295ecfb341a2 100644 --- a/arch/riscv/kernel/module.lds +++ b/arch/riscv/kernel/module.lds @@ -4,4 +4,5 @@ SECTIONS { .plt (NOLOAD) : { BYTE(0) } .got (NOLOAD) : { BYTE(0) } + .got.plt (NOLOAD) : { BYTE(0) } } -- cgit v1.2.3-59-g8ed1b From e21d54219c7a698b10d5f1e6a1023ebde284cd7b Mon Sep 17 00:00:00 2001 From: Zong Li Date: Thu, 15 Mar 2018 16:50:51 +0800 Subject: RISC-V: Add definition of relocation types Signed-off-by: Zong Li Signed-off-by: Palmer Dabbelt --- arch/riscv/include/uapi/asm/elf.h | 7 +++++++ 1 file changed, 7 insertions(+) (limited to 'arch/riscv/include') diff --git a/arch/riscv/include/uapi/asm/elf.h b/arch/riscv/include/uapi/asm/elf.h index a510edfa8226..5cae4c30cd8e 100644 --- a/arch/riscv/include/uapi/asm/elf.h +++ b/arch/riscv/include/uapi/asm/elf.h @@ -79,5 +79,12 @@ typedef union __riscv_fp_state elf_fpregset_t; #define R_RISCV_TPREL_I 49 #define R_RISCV_TPREL_S 50 #define R_RISCV_RELAX 51 +#define R_RISCV_SUB6 52 +#define R_RISCV_SET6 53 +#define R_RISCV_SET8 54 +#define R_RISCV_SET16 55 +#define R_RISCV_SET32 56 +#define R_RISCV_32_PCREL 57 + #endif /* _UAPI_ASM_ELF_H */ -- cgit v1.2.3-59-g8ed1b