diff options
author | 2021-12-17 11:48:13 +0000 | |
---|---|---|
committer | 2021-12-17 11:48:13 +0000 | |
commit | 9cf72c358a20b95e040e6a54a03baf6d264e0719 (patch) | |
tree | 92146e58caa9f383a8a6894aeeaa54bb1c563f7f /arch/arm/kernel | |
parent | Linux 5.16-rc1 (diff) | |
parent | ARM: v7m: enable support for IRQ stacks (diff) | |
download | linux-dev-9cf72c358a20b95e040e6a54a03baf6d264e0719.tar.xz linux-dev-9cf72c358a20b95e040e6a54a03baf6d264e0719.zip |
Merge tag 'arm-irq-and-vmap-stacks-for-rmk' of git://git.kernel.org/pub/scm/linux/kernel/git/ardb/linux into devel-stable
ARM: support for IRQ and vmap'ed stacks
This PR covers all the work related to implementing IRQ stacks and
vmap'ed stacks for all 32-bit ARM systems that are currently supported
by the Linux kernel, including RiscPC and Footbridge. It has been
submitted for review in three different waves:
- IRQ stacks support for v7 SMP systems [0],
- vmap'ed stacks support for v7 SMP systems[1],
- extending support for both IRQ stacks and vmap'ed stacks for all
remaining configurations, including v6/v7 SMP multiplatform kernels
and uniprocessor configurations including v7-M [2]
[0] https://lore.kernel.org/linux-arm-kernel/20211115084732.3704393-1-ardb@kernel.org/
[1] https://lore.kernel.org/linux-arm-kernel/20211122092816.2865873-1-ardb@kernel.org/
[2] https://lore.kernel.org/linux-arm-kernel/20211206164659.1495084-1-ardb@kernel.org/
Diffstat (limited to 'arch/arm/kernel')
-rw-r--r-- | arch/arm/kernel/asm-offsets.c | 3 | ||||
-rw-r--r-- | arch/arm/kernel/entry-armv.S | 209 | ||||
-rw-r--r-- | arch/arm/kernel/entry-common.S | 16 | ||||
-rw-r--r-- | arch/arm/kernel/entry-header.S | 48 | ||||
-rw-r--r-- | arch/arm/kernel/entry-v7m.S | 39 | ||||
-rw-r--r-- | arch/arm/kernel/head-common.S | 4 | ||||
-rw-r--r-- | arch/arm/kernel/irq.c | 61 | ||||
-rw-r--r-- | arch/arm/kernel/module.c | 85 | ||||
-rw-r--r-- | arch/arm/kernel/process.c | 7 | ||||
-rw-r--r-- | arch/arm/kernel/setup.c | 8 | ||||
-rw-r--r-- | arch/arm/kernel/sleep.S | 6 | ||||
-rw-r--r-- | arch/arm/kernel/smp.c | 16 | ||||
-rw-r--r-- | arch/arm/kernel/traps.c | 107 | ||||
-rw-r--r-- | arch/arm/kernel/unwind.c | 50 | ||||
-rw-r--r-- | arch/arm/kernel/vmlinux.lds.S | 4 |
15 files changed, 551 insertions, 112 deletions
diff --git a/arch/arm/kernel/asm-offsets.c b/arch/arm/kernel/asm-offsets.c index 645845e4982a..2c8d76fd7c66 100644 --- a/arch/arm/kernel/asm-offsets.c +++ b/arch/arm/kernel/asm-offsets.c @@ -43,9 +43,6 @@ int main(void) BLANK(); DEFINE(TI_FLAGS, offsetof(struct thread_info, flags)); DEFINE(TI_PREEMPT, offsetof(struct thread_info, preempt_count)); -#ifndef CONFIG_THREAD_INFO_IN_TASK - DEFINE(TI_TASK, offsetof(struct thread_info, task)); -#endif DEFINE(TI_CPU, offsetof(struct thread_info, cpu)); DEFINE(TI_CPU_DOMAIN, offsetof(struct thread_info, cpu_domain)); DEFINE(TI_CPU_SAVE, offsetof(struct thread_info, cpu_context)); diff --git a/arch/arm/kernel/entry-armv.S b/arch/arm/kernel/entry-armv.S index deff286eb5ea..b58bda51e4b8 100644 --- a/arch/arm/kernel/entry-armv.S +++ b/arch/arm/kernel/entry-armv.S @@ -19,9 +19,6 @@ #include <asm/glue-df.h> #include <asm/glue-pf.h> #include <asm/vfpmacros.h> -#ifndef CONFIG_GENERIC_IRQ_MULTI_HANDLER -#include <mach/entry-macro.S> -#endif #include <asm/thread_notify.h> #include <asm/unwind.h> #include <asm/unistd.h> @@ -30,19 +27,65 @@ #include <asm/uaccess-asm.h> #include "entry-header.S" -#include <asm/entry-macro-multi.S> #include <asm/probes.h> /* * Interrupt handling. */ - .macro irq_handler -#ifdef CONFIG_GENERIC_IRQ_MULTI_HANDLER + .macro irq_handler, from_user:req mov r0, sp - bl generic_handle_arch_irq +#ifdef CONFIG_IRQSTACKS +#ifdef CONFIG_UNWINDER_ARM + mov fpreg, sp @ Preserve original SP #else - arch_irq_handler_default + mov r8, fp @ Preserve original FP + mov r9, sp @ Preserve original SP +#endif + ldr_this_cpu sp, irq_stack_ptr, r2, r3 + + .if \from_user == 0 +UNWIND( .setfp fpreg, sp ) + @ + @ If we took the interrupt while running in the kernel, we may already + @ be using the IRQ stack, so revert to the original value in that case. + @ + subs r2, sp, r0 @ SP above bottom of IRQ stack? + rsbscs r2, r2, #THREAD_SIZE @ ... and below the top? +#ifdef CONFIG_VMAP_STACK + ldr_va r2, high_memory, cc @ End of the linear region + cmpcc r2, r0 @ Stack pointer was below it? #endif + movcs sp, r0 @ If so, revert to incoming SP + +#ifndef CONFIG_UNWINDER_ARM + @ + @ Inform the frame pointer unwinder where the next frame lives + @ + movcc lr, pc @ Make LR point into .entry.text so + @ that we will get a dump of the + @ exception stack for this frame. +#ifdef CONFIG_CC_IS_GCC + movcc ip, r0 @ Store the old SP in the frame record. + stmdbcc sp!, {fp, ip, lr, pc} @ Push frame record + addcc fp, sp, #12 +#else + stmdbcc sp!, {fp, lr} @ Push frame record + movcc fp, sp +#endif // CONFIG_CC_IS_GCC +#endif // CONFIG_UNWINDER_ARM + .endif +#endif // CONFIG_IRQSTACKS + + bl generic_handle_arch_irq + +#ifdef CONFIG_IRQSTACKS +#ifdef CONFIG_UNWINDER_ARM + mov sp, fpreg @ Restore original SP +#else + mov fp, r8 @ Restore original FP + mov sp, r9 @ Restore original SP +#endif // CONFIG_UNWINDER_ARM +#endif // CONFIG_IRQSTACKS .endm .macro pabt_helper @@ -140,27 +183,35 @@ ENDPROC(__und_invalid) #define SPFIX(code...) #endif - .macro svc_entry, stack_hole=0, trace=1, uaccess=1 + .macro svc_entry, stack_hole=0, trace=1, uaccess=1, overflow_check=1 UNWIND(.fnstart ) - UNWIND(.save {r0 - pc} ) - sub sp, sp, #(SVC_REGS_SIZE + \stack_hole - 4) + sub sp, sp, #(SVC_REGS_SIZE + \stack_hole) + THUMB( add sp, r1 ) @ get SP in a GPR without + THUMB( sub r1, sp, r1 ) @ using a temp register + + .if \overflow_check + UNWIND(.save {r0 - pc} ) + do_overflow_check (SVC_REGS_SIZE + \stack_hole) + .endif + #ifdef CONFIG_THUMB2_KERNEL - SPFIX( str r0, [sp] ) @ temporarily saved - SPFIX( mov r0, sp ) - SPFIX( tst r0, #4 ) @ test original stack alignment - SPFIX( ldr r0, [sp] ) @ restored + tst r1, #4 @ test stack pointer alignment + sub r1, sp, r1 @ restore original R1 + sub sp, r1 @ restore original SP #else SPFIX( tst sp, #4 ) #endif - SPFIX( subeq sp, sp, #4 ) - stmia sp, {r1 - r12} + SPFIX( subne sp, sp, #4 ) + + ARM( stmib sp, {r1 - r12} ) + THUMB( stmia sp, {r0 - r12} ) @ No STMIB in Thumb-2 ldmia r0, {r3 - r5} - add r7, sp, #S_SP - 4 @ here for interlock avoidance + add r7, sp, #S_SP @ here for interlock avoidance mov r6, #-1 @ "" "" "" "" - add r2, sp, #(SVC_REGS_SIZE + \stack_hole - 4) - SPFIX( addeq r2, r2, #4 ) - str r3, [sp, #-4]! @ save the "real" r0 copied + add r2, sp, #(SVC_REGS_SIZE + \stack_hole) + SPFIX( addne r2, r2, #4 ) + str r3, [sp] @ save the "real" r0 copied @ from the exception stack mov r3, lr @@ -199,7 +250,7 @@ ENDPROC(__dabt_svc) .align 5 __irq_svc: svc_entry - irq_handler + irq_handler from_user=0 #ifdef CONFIG_PREEMPTION ldr r8, [tsk, #TI_PREEMPT] @ get preempt count @@ -426,7 +477,7 @@ ENDPROC(__dabt_usr) __irq_usr: usr_entry kuser_cmpxchg_check - irq_handler + irq_handler from_user=1 get_thread_info tsk mov why, #0 b ret_to_user_from_irq @@ -754,16 +805,17 @@ ENTRY(__switch_to) ldr r6, [r2, #TI_CPU_DOMAIN] #endif switch_tls r1, r4, r5, r3, r7 -#if defined(CONFIG_STACKPROTECTOR) && !defined(CONFIG_SMP) - ldr r7, [r2, #TI_TASK] +#if defined(CONFIG_STACKPROTECTOR) && !defined(CONFIG_SMP) && \ + !defined(CONFIG_STACKPROTECTOR_PER_TASK) ldr r8, =__stack_chk_guard .if (TSK_STACK_CANARY > IMM12_MASK) - add r7, r7, #TSK_STACK_CANARY & ~IMM12_MASK + add r9, r2, #TSK_STACK_CANARY & ~IMM12_MASK + ldr r9, [r9, #TSK_STACK_CANARY & IMM12_MASK] + .else + ldr r9, [r2, #TSK_STACK_CANARY & IMM12_MASK] .endif - ldr r7, [r7, #TSK_STACK_CANARY & IMM12_MASK] -#elif defined(CONFIG_CURRENT_POINTER_IN_TPIDRURO) - mov r7, r2 @ Preserve 'next' #endif + mov r7, r2 @ Preserve 'next' #ifdef CONFIG_CPU_USE_DOMAINS mcr p15, 0, r6, c3, c0, 0 @ Set domain register #endif @@ -772,19 +824,102 @@ ENTRY(__switch_to) ldr r0, =thread_notify_head mov r1, #THREAD_NOTIFY_SWITCH bl atomic_notifier_call_chain -#if defined(CONFIG_STACKPROTECTOR) && !defined(CONFIG_SMP) - str r7, [r8] +#if defined(CONFIG_STACKPROTECTOR) && !defined(CONFIG_SMP) && \ + !defined(CONFIG_STACKPROTECTOR_PER_TASK) + str r9, [r8] #endif - THUMB( mov ip, r4 ) mov r0, r5 - set_current r7 - ARM( ldmia r4, {r4 - sl, fp, sp, pc} ) @ Load all regs saved previously - THUMB( ldmia ip!, {r4 - sl, fp} ) @ Load all regs saved previously - THUMB( ldr sp, [ip], #4 ) - THUMB( ldr pc, [ip] ) +#if !defined(CONFIG_THUMB2_KERNEL) && !defined(CONFIG_VMAP_STACK) + set_current r7, r8 + ldmia r4, {r4 - sl, fp, sp, pc} @ Load all regs saved previously +#else + mov r1, r7 + ldmia r4, {r4 - sl, fp, ip, lr} @ Load all regs saved previously +#ifdef CONFIG_VMAP_STACK + @ + @ Do a dummy read from the new stack while running from the old one so + @ that we can rely on do_translation_fault() to fix up any stale PMD + @ entries covering the vmalloc region. + @ + ldr r2, [ip] +#endif + + @ When CONFIG_THREAD_INFO_IN_TASK=n, the update of SP itself is what + @ effectuates the task switch, as that is what causes the observable + @ values of current and current_thread_info to change. When + @ CONFIG_THREAD_INFO_IN_TASK=y, setting current (and therefore + @ current_thread_info) is done explicitly, and the update of SP just + @ switches us to another stack, with few other side effects. In order + @ to prevent this distinction from causing any inconsistencies, let's + @ keep the 'set_current' call as close as we can to the update of SP. + set_current r1, r2 + mov sp, ip + ret lr +#endif UNWIND(.fnend ) ENDPROC(__switch_to) +#ifdef CONFIG_VMAP_STACK + .text + .align 2 +__bad_stack: + @ + @ We've just detected an overflow. We need to load the address of this + @ CPU's overflow stack into the stack pointer register. We have only one + @ scratch register so let's use a sequence of ADDs including one + @ involving the PC, and decorate them with PC-relative group + @ relocations. As these are ARM only, switch to ARM mode first. + @ + @ We enter here with IP clobbered and its value stashed on the mode + @ stack. + @ +THUMB( bx pc ) +THUMB( nop ) +THUMB( .arm ) + ldr_this_cpu_armv6 ip, overflow_stack_ptr + + str sp, [ip, #-4]! @ Preserve original SP value + mov sp, ip @ Switch to overflow stack + pop {ip} @ Original SP in IP + +#if defined(CONFIG_UNWINDER_FRAME_POINTER) && defined(CONFIG_CC_IS_GCC) + mov ip, ip @ mov expected by unwinder + push {fp, ip, lr, pc} @ GCC flavor frame record +#else + str ip, [sp, #-8]! @ store original SP + push {fpreg, lr} @ Clang flavor frame record +#endif +UNWIND( ldr ip, [r0, #4] ) @ load exception LR +UNWIND( str ip, [sp, #12] ) @ store in the frame record + ldr ip, [r0, #12] @ reload IP + + @ Store the original GPRs to the new stack. + svc_entry uaccess=0, overflow_check=0 + +UNWIND( .save {sp, pc} ) +UNWIND( .save {fpreg, lr} ) +UNWIND( .setfp fpreg, sp ) + + ldr fpreg, [sp, #S_SP] @ Add our frame record + @ to the linked list +#if defined(CONFIG_UNWINDER_FRAME_POINTER) && defined(CONFIG_CC_IS_GCC) + ldr r1, [fp, #4] @ reload SP at entry + add fp, fp, #12 +#else + ldr r1, [fpreg, #8] +#endif + str r1, [sp, #S_SP] @ store in pt_regs + + @ Stash the regs for handle_bad_stack + mov r0, sp + + @ Time to die + bl handle_bad_stack + nop +UNWIND( .fnend ) +ENDPROC(__bad_stack) +#endif + __INIT /* diff --git a/arch/arm/kernel/entry-common.S b/arch/arm/kernel/entry-common.S index ac86c34682bb..c928d6b04cce 100644 --- a/arch/arm/kernel/entry-common.S +++ b/arch/arm/kernel/entry-common.S @@ -16,12 +16,14 @@ .equ NR_syscalls, __NR_syscalls -#ifdef CONFIG_NEED_RET_TO_USER -#include <mach/entry-macro.S> -#else - .macro arch_ret_to_user, tmp1, tmp2 - .endm + .macro arch_ret_to_user, tmp +#ifdef CONFIG_ARCH_IOP32X + mrc p15, 0, \tmp, c15, c1, 0 + tst \tmp, #(1 << 6) + bicne \tmp, \tmp, #(1 << 6) + mcrne p15, 0, \tmp, c15, c1, 0 @ Disable cp6 access #endif + .endm #include "entry-header.S" @@ -55,7 +57,7 @@ __ret_fast_syscall: /* perform architecture specific actions before user return */ - arch_ret_to_user r1, lr + arch_ret_to_user r1 restore_user_regs fast = 1, offset = S_OFF UNWIND(.fnend ) @@ -128,7 +130,7 @@ no_work_pending: asm_trace_hardirqs_on save = 0 /* perform architecture specific actions before user return */ - arch_ret_to_user r1, lr + arch_ret_to_user r1 ct_user_enter save = 0 restore_user_regs fast = 0, offset = 0 diff --git a/arch/arm/kernel/entry-header.S b/arch/arm/kernel/entry-header.S index ae24dd54e9ef..cb82ff5adec1 100644 --- a/arch/arm/kernel/entry-header.S +++ b/arch/arm/kernel/entry-header.S @@ -292,12 +292,21 @@ .macro restore_user_regs, fast = 0, offset = 0 -#if defined(CONFIG_CPU_32v6K) && !defined(CONFIG_CPU_V6) +#if defined(CONFIG_CPU_32v6K) || defined(CONFIG_SMP) +#if defined(CONFIG_CPU_V6) && defined(CONFIG_SMP) +ALT_SMP(b .L1_\@ ) +ALT_UP( nop ) + ldr_va r1, elf_hwcap + tst r1, #HWCAP_TLS @ hardware TLS available? + beq .L2_\@ +.L1_\@: +#endif @ The TLS register update is deferred until return to user space so we @ can use it for other things while running in the kernel get_thread_info r1 ldr r1, [r1, #TI_TP_VALUE] mcr p15, 0, r1, c13, c0, 3 @ set TLS register +.L2_\@: #endif uaccess_enable r1, isb=0 @@ -423,3 +432,40 @@ scno .req r7 @ syscall number tbl .req r8 @ syscall table pointer why .req r8 @ Linux syscall (!= 0) tsk .req r9 @ current thread_info + + .macro do_overflow_check, frame_size:req +#ifdef CONFIG_VMAP_STACK + @ + @ Test whether the SP has overflowed. Task and IRQ stacks are aligned + @ so that SP & BIT(THREAD_SIZE_ORDER + PAGE_SHIFT) should always be + @ zero. + @ +ARM( tst sp, #1 << (THREAD_SIZE_ORDER + PAGE_SHIFT) ) +THUMB( tst r1, #1 << (THREAD_SIZE_ORDER + PAGE_SHIFT) ) +THUMB( it ne ) + bne .Lstack_overflow_check\@ + + .pushsection .text +.Lstack_overflow_check\@: + @ + @ The stack pointer is not pointing to a valid vmap'ed stack, but it + @ may be pointing into the linear map instead, which may happen if we + @ are already running from the overflow stack. We cannot detect overflow + @ in such cases so just carry on. + @ + str ip, [r0, #12] @ Stash IP on the mode stack + ldr_va ip, high_memory @ Start of VMALLOC space +ARM( cmp sp, ip ) @ SP in vmalloc space? +THUMB( cmp r1, ip ) +THUMB( itt lo ) + ldrlo ip, [r0, #12] @ Restore IP + blo .Lout\@ @ Carry on + +THUMB( sub r1, sp, r1 ) @ Restore original R1 +THUMB( sub sp, r1 ) @ Restore original SP + add sp, sp, #\frame_size @ Undo svc_entry's SP change + b __bad_stack @ Handle VMAP stack overflow + .popsection +.Lout\@: +#endif + .endm diff --git a/arch/arm/kernel/entry-v7m.S b/arch/arm/kernel/entry-v7m.S index 7bde93c10962..de8a60363c85 100644 --- a/arch/arm/kernel/entry-v7m.S +++ b/arch/arm/kernel/entry-v7m.S @@ -39,16 +39,25 @@ __irq_entry: @ @ Invoke the IRQ handler @ - mrs r0, ipsr - ldr r1, =V7M_xPSR_EXCEPTIONNO - and r0, r1 - sub r0, #16 - mov r1, sp - stmdb sp!, {lr} - @ routine called with r0 = irq number, r1 = struct pt_regs * - bl nvic_handle_irq - - pop {lr} + mov r0, sp + ldr_this_cpu sp, irq_stack_ptr, r1, r2 + + @ + @ If we took the interrupt while running in the kernel, we may already + @ be using the IRQ stack, so revert to the original value in that case. + @ + subs r2, sp, r0 @ SP above bottom of IRQ stack? + rsbscs r2, r2, #THREAD_SIZE @ ... and below the top? + movcs sp, r0 + + push {r0, lr} @ preserve LR and original SP + + @ routine called with r0 = struct pt_regs * + bl generic_handle_arch_irq + + pop {r0, lr} + mov sp, r0 + @ @ Check for any pending work if returning to user @ @@ -101,15 +110,17 @@ ENTRY(__switch_to) str sp, [ip], #4 str lr, [ip], #4 mov r5, r0 + mov r6, r2 @ Preserve 'next' add r4, r2, #TI_CPU_SAVE ldr r0, =thread_notify_head mov r1, #THREAD_NOTIFY_SWITCH bl atomic_notifier_call_chain - mov ip, r4 mov r0, r5 - ldmia ip!, {r4 - r11} @ Load all regs saved previously - ldr sp, [ip] - ldr pc, [ip, #4]! + mov r1, r6 + ldmia r4, {r4 - r12, lr} @ Load all regs saved previously + set_current r1, r2 + mov sp, ip + bx lr .fnend ENDPROC(__switch_to) diff --git a/arch/arm/kernel/head-common.S b/arch/arm/kernel/head-common.S index da18e0a17dc2..42cae73fcc19 100644 --- a/arch/arm/kernel/head-common.S +++ b/arch/arm/kernel/head-common.S @@ -105,10 +105,8 @@ __mmap_switched: mov r1, #0 bl __memset @ clear .bss -#ifdef CONFIG_CURRENT_POINTER_IN_TPIDRURO adr_l r0, init_task @ get swapper task_struct - set_current r0 -#endif + set_current r0, r1 ldmia r4, {r0, r1, r2, r3} str r9, [r0] @ Save processor ID diff --git a/arch/arm/kernel/irq.c b/arch/arm/kernel/irq.c index b79975bd988c..5c6f8d11a3ce 100644 --- a/arch/arm/kernel/irq.c +++ b/arch/arm/kernel/irq.c @@ -36,13 +36,53 @@ #include <asm/hardware/cache-l2x0.h> #include <asm/hardware/cache-uniphier.h> #include <asm/outercache.h> +#include <asm/softirq_stack.h> #include <asm/exception.h> #include <asm/mach/arch.h> #include <asm/mach/irq.h> #include <asm/mach/time.h> +#include "reboot.h" + unsigned long irq_err_count; +#ifdef CONFIG_IRQSTACKS + +asmlinkage DEFINE_PER_CPU_READ_MOSTLY(u8 *, irq_stack_ptr); + +static void __init init_irq_stacks(void) +{ + u8 *stack; + int cpu; + + for_each_possible_cpu(cpu) { + if (!IS_ENABLED(CONFIG_VMAP_STACK)) + stack = (u8 *)__get_free_pages(GFP_KERNEL, + THREAD_SIZE_ORDER); + else + stack = __vmalloc_node(THREAD_SIZE, THREAD_ALIGN, + THREADINFO_GFP, NUMA_NO_NODE, + __builtin_return_address(0)); + + if (WARN_ON(!stack)) + break; + per_cpu(irq_stack_ptr, cpu) = &stack[THREAD_SIZE]; + } +} + +static void ____do_softirq(void *arg) +{ + __do_softirq(); +} + +void do_softirq_own_stack(void) +{ + call_with_stack(____do_softirq, NULL, + __this_cpu_read(irq_stack_ptr)); +} + +#endif + int arch_show_interrupts(struct seq_file *p, int prec) { #ifdef CONFIG_FIQ @@ -80,27 +120,14 @@ void handle_IRQ(unsigned int irq, struct pt_regs *regs) ack_bad_irq(irq); } -/* - * asm_do_IRQ is the interface to be used from assembly code. - */ -asmlinkage void __exception_irq_entry -asm_do_IRQ(unsigned int irq, struct pt_regs *regs) -{ - struct pt_regs *old_regs; - - irq_enter(); - old_regs = set_irq_regs(regs); - - handle_IRQ(irq, regs); - - set_irq_regs(old_regs); - irq_exit(); -} - void __init init_IRQ(void) { int ret; +#ifdef CONFIG_IRQSTACKS + init_irq_stacks(); +#endif + if (IS_ENABLED(CONFIG_OF) && !machine_desc->init_irq) irqchip_init(); else diff --git a/arch/arm/kernel/module.c b/arch/arm/kernel/module.c index beac45e89ba6..4d33a7acf617 100644 --- a/arch/arm/kernel/module.c +++ b/arch/arm/kernel/module.c @@ -68,6 +68,42 @@ bool module_exit_section(const char *name) strstarts(name, ".ARM.exidx.exit"); } +/* + * This implements the partitioning algorithm for group relocations as + * documented in the ARM AArch32 ELF psABI (IHI 0044). + * + * A single PC-relative symbol reference is divided in up to 3 add or subtract + * operations, where the final one could be incorporated into a load/store + * instruction with immediate offset. E.g., + * + * ADD Rd, PC, #... or ADD Rd, PC, #... + * ADD Rd, Rd, #... ADD Rd, Rd, #... + * LDR Rd, [Rd, #...] ADD Rd, Rd, #... + * + * The latter has a guaranteed range of only 16 MiB (3x8 == 24 bits), so it is + * of limited use in the kernel. However, the ADD/ADD/LDR combo has a range of + * -/+ 256 MiB, (2x8 + 12 == 28 bits), which means it has sufficient range for + * any in-kernel symbol reference (unless module PLTs are being used). + * + * The main advantage of this approach over the typical pattern using a literal + * load is that literal loads may miss in the D-cache, and generally lead to + * lower cache efficiency for variables that are referenced often from many + * different places in the code. + */ +static u32 get_group_rem(u32 group, u32 *offset) +{ + u32 val = *offset; + u32 shift; + do { + shift = val ? (31 - __fls(val)) & ~1 : 32; + *offset = val; + if (!val) + break; + val &= 0xffffff >> shift; + } while (group--); + return shift; +} + int apply_relocate(Elf32_Shdr *sechdrs, const char *strtab, unsigned int symindex, unsigned int relindex, struct module *module) @@ -82,6 +118,7 @@ apply_relocate(Elf32_Shdr *sechdrs, const char *strtab, unsigned int symindex, unsigned long loc; Elf32_Sym *sym; const char *symname; + u32 shift, group = 1; s32 offset; u32 tmp; #ifdef CONFIG_THUMB2_KERNEL @@ -212,6 +249,54 @@ apply_relocate(Elf32_Shdr *sechdrs, const char *strtab, unsigned int symindex, *(u32 *)loc = __opcode_to_mem_arm(tmp); break; + case R_ARM_ALU_PC_G0_NC: + group = 0; + fallthrough; + case R_ARM_ALU_PC_G1_NC: + tmp = __mem_to_opcode_arm(*(u32 *)loc); + offset = ror32(tmp & 0xff, (tmp & 0xf00) >> 7); + if (tmp & BIT(22)) + offset = -offset; + offset += sym->st_value - loc; + if (offset < 0) { + offset = -offset; + tmp = (tmp & ~BIT(23)) | BIT(22); // SUB opcode + } else { + tmp = (tmp & ~BIT(22)) | BIT(23); // ADD opcode + } + + shift = get_group_rem(group, &offset); + if (shift < 24) { + offset >>= 24 - shift; + offset |= (shift + 8) << 7; + } + *(u32 *)loc = __opcode_to_mem_arm((tmp & ~0xfff) | offset); + break; + + case R_ARM_LDR_PC_G2: + tmp = __mem_to_opcode_arm(*(u32 *)loc); + offset = tmp & 0xfff; + if (~tmp & BIT(23)) // U bit cleared? + offset = -offset; + offset += sym->st_value - loc; + if (offset < 0) { + offset = -offset; + tmp &= ~BIT(23); // clear U bit + } else { + tmp |= BIT(23); // set U bit + } + get_group_rem(2, &offset); + + if (offset > 0xfff) { + pr_err("%s: section %u reloc %u sym '%s': relocation %u out of range (%#lx -> %#x)\n", + module->name, relindex, i, symname, + ELF32_R_TYPE(rel->r_info), loc, + sym->st_value); + return -ENOEXEC; + } + *(u32 *)loc = __opcode_to_mem_arm((tmp & ~0xfff) | offset); + break; + #ifdef CONFIG_THUMB2_KERNEL case R_ARM_THM_CALL: case R_ARM_THM_JUMP24: diff --git a/arch/arm/kernel/process.c b/arch/arm/kernel/process.c index d47159f3791c..0617af11377f 100644 --- a/arch/arm/kernel/process.c +++ b/arch/arm/kernel/process.c @@ -36,7 +36,7 @@ #include "signal.h" -#ifdef CONFIG_CURRENT_POINTER_IN_TPIDRURO +#if defined(CONFIG_CURRENT_POINTER_IN_TPIDRURO) || defined(CONFIG_SMP) DEFINE_PER_CPU(struct task_struct *, __entry_task); #endif @@ -46,6 +46,11 @@ unsigned long __stack_chk_guard __read_mostly; EXPORT_SYMBOL(__stack_chk_guard); #endif +#ifndef CONFIG_CURRENT_POINTER_IN_TPIDRURO +asmlinkage struct task_struct *__current; +EXPORT_SYMBOL(__current); +#endif + static const char *processor_modes[] __maybe_unused = { "USER_26", "FIQ_26" , "IRQ_26" , "SVC_26" , "UK4_26" , "UK5_26" , "UK6_26" , "UK7_26" , "UK8_26" , "UK9_26" , "UK10_26", "UK11_26", "UK12_26", "UK13_26", "UK14_26", "UK15_26", diff --git a/arch/arm/kernel/setup.c b/arch/arm/kernel/setup.c index 284a80c0b6e1..039feb7cd590 100644 --- a/arch/arm/kernel/setup.c +++ b/arch/arm/kernel/setup.c @@ -141,10 +141,10 @@ EXPORT_SYMBOL(outer_cache); int __cpu_architecture __read_mostly = CPU_ARCH_UNKNOWN; struct stack { - u32 irq[3]; - u32 abt[3]; - u32 und[3]; - u32 fiq[3]; + u32 irq[4]; + u32 abt[4]; + u32 und[4]; + u32 fiq[4]; } ____cacheline_aligned; #ifndef CONFIG_CPU_V7M diff --git a/arch/arm/kernel/sleep.S b/arch/arm/kernel/sleep.S index 43077e11dafd..f909baf17912 100644 --- a/arch/arm/kernel/sleep.S +++ b/arch/arm/kernel/sleep.S @@ -67,6 +67,12 @@ ENTRY(__cpu_suspend) ldr r4, =cpu_suspend_size #endif mov r5, sp @ current virtual SP +#ifdef CONFIG_VMAP_STACK + @ Run the suspend code from the overflow stack so we don't have to rely + @ on vmalloc-to-phys conversions anywhere in the arch suspend code. + @ The original SP value captured in R5 will be restored on the way out. + ldr_this_cpu sp, overflow_stack_ptr, r6, r7 +#endif add r4, r4, #12 @ Space for pgd, virt sp, phys resume fn sub sp, sp, r4 @ allocate CPU state on stack ldr r3, =sleep_save_sp diff --git a/arch/arm/kernel/smp.c b/arch/arm/kernel/smp.c index cde5b6d8bac5..951559e5bea3 100644 --- a/arch/arm/kernel/smp.c +++ b/arch/arm/kernel/smp.c @@ -403,6 +403,17 @@ static void smp_store_cpu_info(unsigned int cpuid) check_cpu_icache_size(cpuid); } +static void set_current(struct task_struct *cur) +{ + if (!IS_ENABLED(CONFIG_CURRENT_POINTER_IN_TPIDRURO) && !is_smp()) { + __current = cur; + return; + } + + /* Set TPIDRURO */ + asm("mcr p15, 0, %0, c13, c0, 3" :: "r"(cur) : "memory"); +} + /* * This is the secondary CPU boot entry. We're using this CPUs * idle thread stack, but a set of temporary page tables. @@ -631,11 +642,6 @@ static void ipi_complete(unsigned int cpu) /* * Main handler for inter-processor interrupts */ -asmlinkage void __exception_irq_entry do_IPI(int ipinr, struct pt_regs *regs) -{ - handle_IPI(ipinr, regs); -} - static void do_handle_IPI(int ipinr) { unsigned int cpu = smp_processor_id(); diff --git a/arch/arm/kernel/traps.c b/arch/arm/kernel/traps.c index 195dff58bafc..3f38357efc46 100644 --- a/arch/arm/kernel/traps.c +++ b/arch/arm/kernel/traps.c @@ -35,6 +35,7 @@ #include <asm/ptrace.h> #include <asm/unwind.h> #include <asm/tls.h> +#include <asm/stacktrace.h> #include <asm/system_misc.h> #include <asm/opcodes.h> @@ -60,13 +61,24 @@ static int __init user_debug_setup(char *str) __setup("user_debug=", user_debug_setup); #endif -static void dump_mem(const char *, const char *, unsigned long, unsigned long); - void dump_backtrace_entry(unsigned long where, unsigned long from, unsigned long frame, const char *loglvl) { unsigned long end = frame + 4 + sizeof(struct pt_regs); + if (IS_ENABLED(CONFIG_UNWINDER_FRAME_POINTER) && + IS_ENABLED(CONFIG_CC_IS_GCC) && + end > ALIGN(frame, THREAD_SIZE)) { + /* + * If we are walking past the end of the stack, it may be due + * to the fact that we are on an IRQ or overflow stack. In this + * case, we can load the address of the other stack from the + * frame record. + */ + frame = ((unsigned long *)frame)[-2] - 4; + end = frame + 4 + sizeof(struct pt_regs); + } + #ifdef CONFIG_KALLSYMS printk("%s[<%08lx>] (%ps) from [<%08lx>] (%pS)\n", loglvl, where, (void *)where, from, (void *)from); @@ -75,7 +87,8 @@ void dump_backtrace_entry(unsigned long where, unsigned long from, loglvl, where, from); #endif - if (in_entry_text(from) && end <= ALIGN(frame, THREAD_SIZE)) + if (!IS_ENABLED(CONFIG_UNWINDER_ARM) && + in_entry_text(from) && end <= ALIGN(frame, THREAD_SIZE)) dump_mem(loglvl, "Exception stack", frame + 4, end); } @@ -108,7 +121,8 @@ void dump_backtrace_stm(u32 *stack, u32 instruction, const char *loglvl) static int verify_stack(unsigned long sp) { if (sp < PAGE_OFFSET || - (sp > (unsigned long)high_memory && high_memory != NULL)) + (!IS_ENABLED(CONFIG_VMAP_STACK) && + sp > (unsigned long)high_memory && high_memory != NULL)) return -EFAULT; return 0; @@ -118,8 +132,8 @@ static int verify_stack(unsigned long sp) /* * Dump out the contents of some memory nicely... */ -static void dump_mem(const char *lvl, const char *str, unsigned long bottom, - unsigned long top) +void dump_mem(const char *lvl, const char *str, unsigned long bottom, + unsigned long top) { unsigned long first; int i; @@ -278,7 +292,8 @@ static int __die(const char *str, int err, struct pt_regs *regs) if (!user_mode(regs) || in_interrupt()) { dump_mem(KERN_EMERG, "Stack: ", regs->ARM_sp, - THREAD_SIZE + (unsigned long)task_stack_page(tsk)); + ALIGN(regs->ARM_sp - THREAD_SIZE, THREAD_ALIGN) + + THREAD_SIZE); dump_backtrace(regs, tsk, KERN_EMERG); dump_instr(KERN_EMERG, regs); } @@ -825,3 +840,81 @@ void __init early_trap_init(void *vectors_base) */ #endif } + +#ifdef CONFIG_VMAP_STACK + +DECLARE_PER_CPU(u8 *, irq_stack_ptr); + +asmlinkage DEFINE_PER_CPU(u8 *, overflow_stack_ptr); + +static int __init allocate_overflow_stacks(void) +{ + u8 *stack; + int cpu; + + for_each_possible_cpu(cpu) { + stack = (u8 *)__get_free_page(GFP_KERNEL); + if (WARN_ON(!stack)) + return -ENOMEM; + per_cpu(overflow_stack_ptr, cpu) = &stack[OVERFLOW_STACK_SIZE]; + } + return 0; +} +early_initcall(allocate_overflow_stacks); + +asmlinkage void handle_bad_stack(struct pt_regs *regs) +{ + unsigned long tsk_stk = (unsigned long)current->stack; +#ifdef CONFIG_IRQSTACKS + unsigned long irq_stk = (unsigned long)this_cpu_read(irq_stack_ptr); +#endif + unsigned long ovf_stk = (unsigned long)this_cpu_read(overflow_stack_ptr); + + console_verbose(); + pr_emerg("Insufficient stack space to handle exception!"); + + pr_emerg("Task stack: [0x%08lx..0x%08lx]\n", + tsk_stk, tsk_stk + THREAD_SIZE); +#ifdef CONFIG_IRQSTACKS + pr_emerg("IRQ stack: [0x%08lx..0x%08lx]\n", + irq_stk - THREAD_SIZE, irq_stk); +#endif + pr_emerg("Overflow stack: [0x%08lx..0x%08lx]\n", + ovf_stk - OVERFLOW_STACK_SIZE, ovf_stk); + + die("kernel stack overflow", regs, 0); +} + +/* + * Normally, we rely on the logic in do_translation_fault() to update stale PMD + * entries covering the vmalloc space in a task's page tables when it first + * accesses the region in question. Unfortunately, this is not sufficient when + * the task stack resides in the vmalloc region, as do_translation_fault() is a + * C function that needs a stack to run. + * + * So we need to ensure that these PMD entries are up to date *before* the MM + * switch. As we already have some logic in the MM switch path that takes care + * of this, let's trigger it by bumping the counter every time the core vmalloc + * code modifies a PMD entry in the vmalloc region. + */ +void arch_sync_kernel_mappings(unsigned long start, unsigned long end) +{ + if (start > VMALLOC_END || end < VMALLOC_START) + return; + + /* + * This hooks into the core vmalloc code to receive notifications of + * any PMD level changes that have been made to the kernel page tables. + * This means it should only be triggered once for every MiB worth of + * vmalloc space, given that we don't support huge vmalloc/vmap on ARM, + * and that kernel PMD level table entries are rarely (if ever) + * updated. + * + * This means that the counter is going to max out at ~250 for the + * typical case. If it overflows, something entirely unexpected has + * occurred so let's throw a warning if that happens. + */ + WARN_ON(++init_mm.context.vmalloc_seq == UINT_MAX); +} + +#endif diff --git a/arch/arm/kernel/unwind.c b/arch/arm/kernel/unwind.c index 59fdf257bf8b..c5ea328c428d 100644 --- a/arch/arm/kernel/unwind.c +++ b/arch/arm/kernel/unwind.c @@ -29,6 +29,7 @@ #include <linux/spinlock.h> #include <linux/list.h> +#include <asm/sections.h> #include <asm/stacktrace.h> #include <asm/traps.h> #include <asm/unwind.h> @@ -52,6 +53,7 @@ EXPORT_SYMBOL(__aeabi_unwind_cpp_pr2); struct unwind_ctrl_block { unsigned long vrs[16]; /* virtual register set */ const unsigned long *insn; /* pointer to the current instructions word */ + unsigned long sp_low; /* lowest value of sp allowed */ unsigned long sp_high; /* highest value of sp allowed */ /* * 1 : check for stack overflow for each register pop. @@ -256,8 +258,12 @@ static int unwind_exec_pop_subset_r4_to_r13(struct unwind_ctrl_block *ctrl, mask >>= 1; reg++; } - if (!load_sp) + if (!load_sp) { ctrl->vrs[SP] = (unsigned long)vsp; + } else { + ctrl->sp_low = ctrl->vrs[SP]; + ctrl->sp_high = ALIGN(ctrl->sp_low, THREAD_SIZE); + } return URC_OK; } @@ -313,9 +319,10 @@ static int unwind_exec_insn(struct unwind_ctrl_block *ctrl) if ((insn & 0xc0) == 0x00) ctrl->vrs[SP] += ((insn & 0x3f) << 2) + 4; - else if ((insn & 0xc0) == 0x40) + else if ((insn & 0xc0) == 0x40) { ctrl->vrs[SP] -= ((insn & 0x3f) << 2) + 4; - else if ((insn & 0xf0) == 0x80) { + ctrl->sp_low = ctrl->vrs[SP]; + } else if ((insn & 0xf0) == 0x80) { unsigned long mask; insn = (insn << 8) | unwind_get_byte(ctrl); @@ -330,9 +337,11 @@ static int unwind_exec_insn(struct unwind_ctrl_block *ctrl) if (ret) goto error; } else if ((insn & 0xf0) == 0x90 && - (insn & 0x0d) != 0x0d) + (insn & 0x0d) != 0x0d) { ctrl->vrs[SP] = ctrl->vrs[insn & 0x0f]; - else if ((insn & 0xf0) == 0xa0) { + ctrl->sp_low = ctrl->vrs[SP]; + ctrl->sp_high = ALIGN(ctrl->sp_low, THREAD_SIZE); + } else if ((insn & 0xf0) == 0xa0) { ret = unwind_exec_pop_r4_to_rN(ctrl, insn); if (ret) goto error; @@ -375,13 +384,13 @@ error: */ int unwind_frame(struct stackframe *frame) { - unsigned long low; const struct unwind_idx *idx; struct unwind_ctrl_block ctrl; /* store the highest address on the stack to avoid crossing it*/ - low = frame->sp; - ctrl.sp_high = ALIGN(low, THREAD_SIZE); + ctrl.sp_low = frame->sp; + ctrl.sp_high = ALIGN(ctrl.sp_low - THREAD_SIZE, THREAD_ALIGN) + + THREAD_SIZE; pr_debug("%s(pc = %08lx lr = %08lx sp = %08lx)\n", __func__, frame->pc, frame->lr, frame->sp); @@ -403,7 +412,21 @@ int unwind_frame(struct stackframe *frame) if (idx->insn == 1) /* can't unwind */ return -URC_FAILURE; - else if ((idx->insn & 0x80000000) == 0) + else if (frame->pc == prel31_to_addr(&idx->addr_offset)) { + /* + * Unwinding is tricky when we're halfway through the prologue, + * since the stack frame that the unwinder expects may not be + * fully set up yet. However, one thing we do know for sure is + * that if we are unwinding from the very first instruction of + * a function, we are still effectively in the stack frame of + * the caller, and the unwind info has no relevance yet. + */ + if (frame->pc == frame->lr) + return -URC_FAILURE; + frame->sp_low = frame->sp; + frame->pc = frame->lr; + return URC_OK; + } else if ((idx->insn & 0x80000000) == 0) /* prel31 to the unwind table */ ctrl.insn = (unsigned long *)prel31_to_addr(&idx->insn); else if ((idx->insn & 0xff000000) == 0x80000000) @@ -437,7 +460,7 @@ int unwind_frame(struct stackframe *frame) urc = unwind_exec_insn(&ctrl); if (urc < 0) return urc; - if (ctrl.vrs[SP] < low || ctrl.vrs[SP] >= ctrl.sp_high) + if (ctrl.vrs[SP] < ctrl.sp_low || ctrl.vrs[SP] > ctrl.sp_high) return -URC_FAILURE; } @@ -452,6 +475,7 @@ int unwind_frame(struct stackframe *frame) frame->sp = ctrl.vrs[SP]; frame->lr = ctrl.vrs[LR]; frame->pc = ctrl.vrs[PC]; + frame->sp_low = ctrl.sp_low; return URC_OK; } @@ -495,7 +519,11 @@ void unwind_backtrace(struct pt_regs *regs, struct task_struct *tsk, urc = unwind_frame(&frame); if (urc < 0) break; - dump_backtrace_entry(where, frame.pc, frame.sp - 4, loglvl); + if (in_entry_text(where)) + dump_mem(loglvl, "Exception stack", frame.sp_low, + frame.sp_low + sizeof(struct pt_regs)); + + dump_backtrace_entry(where, frame.pc, 0, loglvl); } } diff --git a/arch/arm/kernel/vmlinux.lds.S b/arch/arm/kernel/vmlinux.lds.S index f02d617e3359..aa12b65a7fd6 100644 --- a/arch/arm/kernel/vmlinux.lds.S +++ b/arch/arm/kernel/vmlinux.lds.S @@ -138,12 +138,12 @@ SECTIONS #ifdef CONFIG_STRICT_KERNEL_RWX . = ALIGN(1<<SECTION_SHIFT); #else - . = ALIGN(THREAD_SIZE); + . = ALIGN(THREAD_ALIGN); #endif __init_end = .; _sdata = .; - RW_DATA(L1_CACHE_BYTES, PAGE_SIZE, THREAD_SIZE) + RW_DATA(L1_CACHE_BYTES, PAGE_SIZE, THREAD_ALIGN) _edata = .; BSS_SECTION(0, 0, 0) |