diff options
Diffstat (limited to 'arch/arm/kernel')
36 files changed, 1225 insertions, 674 deletions
diff --git a/arch/arm/kernel/Makefile b/arch/arm/kernel/Makefile index ae295a3bcfef..48737ec800eb 100644 --- a/arch/arm/kernel/Makefile +++ b/arch/arm/kernel/Makefile @@ -10,6 +10,7 @@ ifdef CONFIG_FUNCTION_TRACER CFLAGS_REMOVE_ftrace.o = -pg CFLAGS_REMOVE_insn.o = -pg CFLAGS_REMOVE_patch.o = -pg +CFLAGS_REMOVE_unwind.o = -pg endif CFLAGS_REMOVE_return_address.o = -pg @@ -44,7 +45,6 @@ obj-$(CONFIG_ISA_DMA_API) += dma.o obj-$(CONFIG_FIQ) += fiq.o fiqasm.o obj-$(CONFIG_MODULES) += armksyms.o module.o obj-$(CONFIG_ARM_MODULE_PLTS) += module-plts.o -obj-$(CONFIG_ISA_DMA) += dma-isa.o obj-$(CONFIG_PCI) += bios32.o isa.o obj-$(CONFIG_ARM_CPU_SUSPEND) += sleep.o suspend.o obj-$(CONFIG_HIBERNATION) += hibernate.o @@ -88,7 +88,7 @@ obj-$(CONFIG_VDSO) += vdso.o obj-$(CONFIG_EFI) += efi.o obj-$(CONFIG_PARAVIRT) += paravirt.o -head-y := head$(MMUEXT).o +obj-y += head$(MMUEXT).o obj-$(CONFIG_DEBUG_LL) += debug.o obj-$(CONFIG_EARLY_PRINTK) += early_printk.o obj-$(CONFIG_ARM_PATCH_PHYS_VIRT) += phys2virt.o @@ -106,4 +106,6 @@ endif obj-$(CONFIG_HAVE_ARM_SMCCC) += smccc-call.o -extra-y := $(head-y) vmlinux.lds +obj-$(CONFIG_GENERIC_CPU_VULNERABILITIES) += spectre.o + +extra-y := vmlinux.lds diff --git a/arch/arm/kernel/asm-offsets.c b/arch/arm/kernel/asm-offsets.c index 645845e4982a..2c8d76fd7c66 100644 --- a/arch/arm/kernel/asm-offsets.c +++ b/arch/arm/kernel/asm-offsets.c @@ -43,9 +43,6 @@ int main(void) BLANK(); DEFINE(TI_FLAGS, offsetof(struct thread_info, flags)); DEFINE(TI_PREEMPT, offsetof(struct thread_info, preempt_count)); -#ifndef CONFIG_THREAD_INFO_IN_TASK - DEFINE(TI_TASK, offsetof(struct thread_info, task)); -#endif DEFINE(TI_CPU, offsetof(struct thread_info, cpu)); DEFINE(TI_CPU_DOMAIN, offsetof(struct thread_info, cpu_domain)); DEFINE(TI_CPU_SAVE, offsetof(struct thread_info, cpu_context)); diff --git a/arch/arm/kernel/atags_proc.c b/arch/arm/kernel/atags_proc.c index 3c2faf2bd124..3ec2afe78423 100644 --- a/arch/arm/kernel/atags_proc.c +++ b/arch/arm/kernel/atags_proc.c @@ -13,7 +13,7 @@ struct buffer { static ssize_t atags_read(struct file *file, char __user *buf, size_t count, loff_t *ppos) { - struct buffer *b = PDE_DATA(file_inode(file)); + struct buffer *b = pde_data(file_inode(file)); return simple_read_from_buffer(buf, count, ppos, b->data, b->size); } diff --git a/arch/arm/kernel/crash_dump.c b/arch/arm/kernel/crash_dump.c index 53cb92435392..938bd932df9a 100644 --- a/arch/arm/kernel/crash_dump.c +++ b/arch/arm/kernel/crash_dump.c @@ -14,22 +14,10 @@ #include <linux/crash_dump.h> #include <linux/uaccess.h> #include <linux/io.h> +#include <linux/uio.h> -/** - * copy_oldmem_page() - copy one page from old kernel memory - * @pfn: page frame number to be copied - * @buf: buffer where the copied page is placed - * @csize: number of bytes to copy - * @offset: offset in bytes into the page - * @userbuf: if set, @buf is int he user address space - * - * This function copies one page from old kernel memory into buffer pointed by - * @buf. If @buf is in userspace, set @userbuf to %1. Returns number of bytes - * copied or negative error in case of failure. - */ -ssize_t copy_oldmem_page(unsigned long pfn, char *buf, - size_t csize, unsigned long offset, - int userbuf) +ssize_t copy_oldmem_page(struct iov_iter *iter, unsigned long pfn, + size_t csize, unsigned long offset) { void *vaddr; @@ -40,14 +28,7 @@ ssize_t copy_oldmem_page(unsigned long pfn, char *buf, if (!vaddr) return -ENOMEM; - if (userbuf) { - if (copy_to_user(buf, vaddr + offset, csize)) { - iounmap(vaddr); - return -EFAULT; - } - } else { - memcpy(buf, vaddr + offset, csize); - } + csize = copy_to_iter(vaddr + offset, csize, iter); iounmap(vaddr); return csize; diff --git a/arch/arm/kernel/devtree.c b/arch/arm/kernel/devtree.c index 02839d8b6202..264827281113 100644 --- a/arch/arm/kernel/devtree.c +++ b/arch/arm/kernel/devtree.c @@ -194,14 +194,12 @@ const struct machine_desc * __init setup_machine_fdt(void *dt_virt) { const struct machine_desc *mdesc, *mdesc_best = NULL; -#if defined(CONFIG_ARCH_MULTIPLATFORM) || defined(CONFIG_ARM_SINGLE_ARMV7M) DT_MACHINE_START(GENERIC_DT, "Generic DT based system") .l2c_aux_val = 0x0, .l2c_aux_mask = ~0x0, MACHINE_END mdesc_best = &__mach_desc_GENERIC_DT; -#endif if (!dt_virt || !early_init_dt_verify(dt_virt)) return NULL; diff --git a/arch/arm/kernel/dma-isa.c b/arch/arm/kernel/dma-isa.c deleted file mode 100644 index 2d90ecce5a11..000000000000 --- a/arch/arm/kernel/dma-isa.c +++ /dev/null @@ -1,225 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0-only -/* - * linux/arch/arm/kernel/dma-isa.c - * - * Copyright (C) 1999-2000 Russell King - * - * ISA DMA primitives - * Taken from various sources, including: - * linux/include/asm/dma.h: Defines for using and allocating dma channels. - * Written by Hennus Bergman, 1992. - * High DMA channel support & info by Hannu Savolainen and John Boyd, - * Nov. 1992. - * arch/arm/kernel/dma-ebsa285.c - * Copyright (C) 1998 Phil Blundell - */ -#include <linux/ioport.h> -#include <linux/init.h> -#include <linux/dma-mapping.h> -#include <linux/io.h> - -#include <asm/dma.h> -#include <asm/mach/dma.h> - -#define ISA_DMA_MASK 0 -#define ISA_DMA_MODE 1 -#define ISA_DMA_CLRFF 2 -#define ISA_DMA_PGHI 3 -#define ISA_DMA_PGLO 4 -#define ISA_DMA_ADDR 5 -#define ISA_DMA_COUNT 6 - -static unsigned int isa_dma_port[8][7] = { - /* MASK MODE CLRFF PAGE_HI PAGE_LO ADDR COUNT */ - { 0x0a, 0x0b, 0x0c, 0x487, 0x087, 0x00, 0x01 }, - { 0x0a, 0x0b, 0x0c, 0x483, 0x083, 0x02, 0x03 }, - { 0x0a, 0x0b, 0x0c, 0x481, 0x081, 0x04, 0x05 }, - { 0x0a, 0x0b, 0x0c, 0x482, 0x082, 0x06, 0x07 }, - { 0xd4, 0xd6, 0xd8, 0x000, 0x000, 0xc0, 0xc2 }, - { 0xd4, 0xd6, 0xd8, 0x48b, 0x08b, 0xc4, 0xc6 }, - { 0xd4, 0xd6, 0xd8, 0x489, 0x089, 0xc8, 0xca }, - { 0xd4, 0xd6, 0xd8, 0x48a, 0x08a, 0xcc, 0xce } -}; - -static int isa_get_dma_residue(unsigned int chan, dma_t *dma) -{ - unsigned int io_port = isa_dma_port[chan][ISA_DMA_COUNT]; - int count; - - count = 1 + inb(io_port); - count |= inb(io_port) << 8; - - return chan < 4 ? count : (count << 1); -} - -static struct device isa_dma_dev = { - .init_name = "fallback device", - .coherent_dma_mask = ~(dma_addr_t)0, - .dma_mask = &isa_dma_dev.coherent_dma_mask, -}; - -static void isa_enable_dma(unsigned int chan, dma_t *dma) -{ - if (dma->invalid) { - unsigned long address, length; - unsigned int mode; - enum dma_data_direction direction; - - mode = (chan & 3) | dma->dma_mode; - switch (dma->dma_mode & DMA_MODE_MASK) { - case DMA_MODE_READ: - direction = DMA_FROM_DEVICE; - break; - - case DMA_MODE_WRITE: - direction = DMA_TO_DEVICE; - break; - - case DMA_MODE_CASCADE: - direction = DMA_BIDIRECTIONAL; - break; - - default: - direction = DMA_NONE; - break; - } - - if (!dma->sg) { - /* - * Cope with ISA-style drivers which expect cache - * coherence. - */ - dma->sg = &dma->buf; - dma->sgcount = 1; - dma->buf.length = dma->count; - dma->buf.dma_address = dma_map_single(&isa_dma_dev, - dma->addr, dma->count, - direction); - } - - address = dma->buf.dma_address; - length = dma->buf.length - 1; - - outb(address >> 16, isa_dma_port[chan][ISA_DMA_PGLO]); - outb(address >> 24, isa_dma_port[chan][ISA_DMA_PGHI]); - - if (chan >= 4) { - address >>= 1; - length >>= 1; - } - - outb(0, isa_dma_port[chan][ISA_DMA_CLRFF]); - - outb(address, isa_dma_port[chan][ISA_DMA_ADDR]); - outb(address >> 8, isa_dma_port[chan][ISA_DMA_ADDR]); - - outb(length, isa_dma_port[chan][ISA_DMA_COUNT]); - outb(length >> 8, isa_dma_port[chan][ISA_DMA_COUNT]); - - outb(mode, isa_dma_port[chan][ISA_DMA_MODE]); - dma->invalid = 0; - } - outb(chan & 3, isa_dma_port[chan][ISA_DMA_MASK]); -} - -static void isa_disable_dma(unsigned int chan, dma_t *dma) -{ - outb(chan | 4, isa_dma_port[chan][ISA_DMA_MASK]); -} - -static struct dma_ops isa_dma_ops = { - .type = "ISA", - .enable = isa_enable_dma, - .disable = isa_disable_dma, - .residue = isa_get_dma_residue, -}; - -static struct resource dma_resources[] = { { - .name = "dma1", - .start = 0x0000, - .end = 0x000f -}, { - .name = "dma low page", - .start = 0x0080, - .end = 0x008f -}, { - .name = "dma2", - .start = 0x00c0, - .end = 0x00df -}, { - .name = "dma high page", - .start = 0x0480, - .end = 0x048f -} }; - -static dma_t isa_dma[8]; - -/* - * ISA DMA always starts at channel 0 - */ -void __init isa_init_dma(void) -{ - /* - * Try to autodetect presence of an ISA DMA controller. - * We do some minimal initialisation, and check that - * channel 0's DMA address registers are writeable. - */ - outb(0xff, 0x0d); - outb(0xff, 0xda); - - /* - * Write high and low address, and then read them back - * in the same order. - */ - outb(0x55, 0x00); - outb(0xaa, 0x00); - - if (inb(0) == 0x55 && inb(0) == 0xaa) { - unsigned int chan, i; - - for (chan = 0; chan < 8; chan++) { - isa_dma[chan].d_ops = &isa_dma_ops; - isa_disable_dma(chan, NULL); - } - - outb(0x40, 0x0b); - outb(0x41, 0x0b); - outb(0x42, 0x0b); - outb(0x43, 0x0b); - - outb(0xc0, 0xd6); - outb(0x41, 0xd6); - outb(0x42, 0xd6); - outb(0x43, 0xd6); - - outb(0, 0xd4); - - outb(0x10, 0x08); - outb(0x10, 0xd0); - - /* - * Is this correct? According to my documentation, it - * doesn't appear to be. It should be: - * outb(0x3f, 0x40b); outb(0x3f, 0x4d6); - */ - outb(0x30, 0x40b); - outb(0x31, 0x40b); - outb(0x32, 0x40b); - outb(0x33, 0x40b); - outb(0x31, 0x4d6); - outb(0x32, 0x4d6); - outb(0x33, 0x4d6); - - for (i = 0; i < ARRAY_SIZE(dma_resources); i++) - request_resource(&ioport_resource, dma_resources + i); - - for (chan = 0; chan < 8; chan++) { - int ret = isa_dma_add(chan, &isa_dma[chan]); - if (ret) - pr_err("ISADMA%u: unable to register: %d\n", - chan, ret); - } - - request_dma(DMA_ISA_CASCADE, "cascade"); - } -} diff --git a/arch/arm/kernel/efi.c b/arch/arm/kernel/efi.c index e57dbcc89123..e50ad7eefc02 100644 --- a/arch/arm/kernel/efi.c +++ b/arch/arm/kernel/efi.c @@ -4,6 +4,7 @@ */ #include <linux/efi.h> +#include <linux/memblock.h> #include <asm/efi.h> #include <asm/mach/map.h> #include <asm/mmu_context.h> @@ -73,3 +74,81 @@ int __init efi_create_mapping(struct mm_struct *mm, efi_memory_desc_t *md) return efi_set_mapping_permissions(mm, md); return 0; } + +static unsigned long __initdata screen_info_table = EFI_INVALID_TABLE_ADDR; +static unsigned long __initdata cpu_state_table = EFI_INVALID_TABLE_ADDR; + +const efi_config_table_type_t efi_arch_tables[] __initconst = { + {LINUX_EFI_ARM_SCREEN_INFO_TABLE_GUID, &screen_info_table}, + {LINUX_EFI_ARM_CPU_STATE_TABLE_GUID, &cpu_state_table}, + {} +}; + +static void __init load_screen_info_table(void) +{ + struct screen_info *si; + + if (screen_info_table != EFI_INVALID_TABLE_ADDR) { + si = early_memremap_ro(screen_info_table, sizeof(*si)); + if (!si) { + pr_err("Could not map screen_info config table\n"); + return; + } + screen_info = *si; + early_memunmap(si, sizeof(*si)); + + /* dummycon on ARM needs non-zero values for columns/lines */ + screen_info.orig_video_cols = 80; + screen_info.orig_video_lines = 25; + + if (memblock_is_map_memory(screen_info.lfb_base)) + memblock_mark_nomap(screen_info.lfb_base, + screen_info.lfb_size); + } +} + +static void __init load_cpu_state_table(void) +{ + if (cpu_state_table != EFI_INVALID_TABLE_ADDR) { + struct efi_arm_entry_state *state; + bool dump_state = true; + + state = early_memremap_ro(cpu_state_table, + sizeof(struct efi_arm_entry_state)); + if (state == NULL) { + pr_warn("Unable to map CPU entry state table.\n"); + return; + } + + if ((state->sctlr_before_ebs & 1) == 0) + pr_warn(FW_BUG "EFI stub was entered with MMU and Dcache disabled, please fix your firmware!\n"); + else if ((state->sctlr_after_ebs & 1) == 0) + pr_warn(FW_BUG "ExitBootServices() returned with MMU and Dcache disabled, please fix your firmware!\n"); + else + dump_state = false; + + if (dump_state || efi_enabled(EFI_DBG)) { + pr_info("CPSR at EFI stub entry : 0x%08x\n", + state->cpsr_before_ebs); + pr_info("SCTLR at EFI stub entry : 0x%08x\n", + state->sctlr_before_ebs); + pr_info("CPSR after ExitBootServices() : 0x%08x\n", + state->cpsr_after_ebs); + pr_info("SCTLR after ExitBootServices(): 0x%08x\n", + state->sctlr_after_ebs); + } + early_memunmap(state, sizeof(struct efi_arm_entry_state)); + } +} + +void __init arm_efi_init(void) +{ + efi_init(); + + load_screen_info_table(); + + /* ARM does not permit early mappings to persist across paging_init() */ + efi_memmap_unmap(); + + load_cpu_state_table(); +} diff --git a/arch/arm/kernel/entry-armv.S b/arch/arm/kernel/entry-armv.S index deff286eb5ea..c39303e5c234 100644 --- a/arch/arm/kernel/entry-armv.S +++ b/arch/arm/kernel/entry-armv.S @@ -19,9 +19,6 @@ #include <asm/glue-df.h> #include <asm/glue-pf.h> #include <asm/vfpmacros.h> -#ifndef CONFIG_GENERIC_IRQ_MULTI_HANDLER -#include <mach/entry-macro.S> -#endif #include <asm/thread_notify.h> #include <asm/unwind.h> #include <asm/unistd.h> @@ -30,27 +27,42 @@ #include <asm/uaccess-asm.h> #include "entry-header.S" -#include <asm/entry-macro-multi.S> #include <asm/probes.h> /* * Interrupt handling. */ - .macro irq_handler -#ifdef CONFIG_GENERIC_IRQ_MULTI_HANDLER - mov r0, sp - bl generic_handle_arch_irq -#else - arch_irq_handler_default + .macro irq_handler, from_user:req + mov r1, sp + ldr_this_cpu r2, irq_stack_ptr, r2, r3 + .if \from_user == 0 + @ + @ If we took the interrupt while running in the kernel, we may already + @ be using the IRQ stack, so revert to the original value in that case. + @ + subs r3, r2, r1 @ SP above bottom of IRQ stack? + rsbscs r3, r3, #THREAD_SIZE @ ... and below the top? +#ifdef CONFIG_VMAP_STACK + ldr_va r3, high_memory, cc @ End of the linear region + cmpcc r3, r1 @ Stack pointer was below it? #endif + bcc 0f @ If not, switch to the IRQ stack + mov r0, r1 + bl generic_handle_arch_irq + b 1f +0: + .endif + + mov_l r0, generic_handle_arch_irq + bl call_with_stack +1: .endm .macro pabt_helper @ PABORT handler takes pt_regs in r2, fault address in r4 and psr in r5 #ifdef MULTI_PABORT - ldr ip, .LCprocfns - mov lr, pc - ldr pc, [ip, #PROCESSOR_PABT_FUNC] + ldr_va ip, processor, offset=PROCESSOR_PABT_FUNC + bl_r ip #else bl CPU_PABORT_HANDLER #endif @@ -69,9 +81,8 @@ @ the fault status register in r1. r9 must be preserved. @ #ifdef MULTI_DABORT - ldr ip, .LCprocfns - mov lr, pc - ldr pc, [ip, #PROCESSOR_DABT_FUNC] + ldr_va ip, processor, offset=PROCESSOR_DABT_FUNC + bl_r ip #else bl CPU_DABORT_HANDLER #endif @@ -140,27 +151,35 @@ ENDPROC(__und_invalid) #define SPFIX(code...) #endif - .macro svc_entry, stack_hole=0, trace=1, uaccess=1 + .macro svc_entry, stack_hole=0, trace=1, uaccess=1, overflow_check=1 UNWIND(.fnstart ) - UNWIND(.save {r0 - pc} ) - sub sp, sp, #(SVC_REGS_SIZE + \stack_hole - 4) + sub sp, sp, #(SVC_REGS_SIZE + \stack_hole) + THUMB( add sp, r1 ) @ get SP in a GPR without + THUMB( sub r1, sp, r1 ) @ using a temp register + + .if \overflow_check + UNWIND(.save {r0 - pc} ) + do_overflow_check (SVC_REGS_SIZE + \stack_hole) + .endif + #ifdef CONFIG_THUMB2_KERNEL - SPFIX( str r0, [sp] ) @ temporarily saved - SPFIX( mov r0, sp ) - SPFIX( tst r0, #4 ) @ test original stack alignment - SPFIX( ldr r0, [sp] ) @ restored + tst r1, #4 @ test stack pointer alignment + sub r1, sp, r1 @ restore original R1 + sub sp, r1 @ restore original SP #else SPFIX( tst sp, #4 ) #endif - SPFIX( subeq sp, sp, #4 ) - stmia sp, {r1 - r12} + SPFIX( subne sp, sp, #4 ) + + ARM( stmib sp, {r1 - r12} ) + THUMB( stmia sp, {r0 - r12} ) @ No STMIB in Thumb-2 ldmia r0, {r3 - r5} - add r7, sp, #S_SP - 4 @ here for interlock avoidance + add r7, sp, #S_SP @ here for interlock avoidance mov r6, #-1 @ "" "" "" "" - add r2, sp, #(SVC_REGS_SIZE + \stack_hole - 4) - SPFIX( addeq r2, r2, #4 ) - str r3, [sp, #-4]! @ save the "real" r0 copied + add r2, sp, #(SVC_REGS_SIZE + \stack_hole) + SPFIX( addne r2, r2, #4 ) + str r3, [sp] @ save the "real" r0 copied @ from the exception stack mov r3, lr @@ -199,7 +218,7 @@ ENDPROC(__dabt_svc) .align 5 __irq_svc: svc_entry - irq_handler + irq_handler from_user=0 #ifdef CONFIG_PREEMPTION ldr r8, [tsk, #TI_PREEMPT] @ get preempt count @@ -281,16 +300,6 @@ __fiq_svc: UNWIND(.fnend ) ENDPROC(__fiq_svc) - .align 5 -.LCcralign: - .word cr_alignment -#ifdef MULTI_DABORT -.LCprocfns: - .word processor -#endif -.LCfp: - .word fp_enter - /* * Abort mode handlers */ @@ -349,7 +358,7 @@ ENDPROC(__fiq_abt) THUMB( stmia sp, {r0 - r12} ) ATRAP( mrc p15, 0, r7, c1, c0, 0) - ATRAP( ldr r8, .LCcralign) + ATRAP( ldr_va r8, cr_alignment) ldmia r0, {r3 - r5} add r0, sp, #S_PC @ here for interlock avoidance @@ -358,8 +367,6 @@ ENDPROC(__fiq_abt) str r3, [sp] @ save the "real" r0 copied @ from the exception stack - ATRAP( ldr r8, [r8, #0]) - @ @ We are now ready to fill in the remaining blanks on the stack: @ @@ -426,7 +433,7 @@ ENDPROC(__dabt_usr) __irq_usr: usr_entry kuser_cmpxchg_check - irq_handler + irq_handler from_user=1 get_thread_info tsk mov why, #0 b ret_to_user_from_irq @@ -484,9 +491,7 @@ __und_usr_thumb: */ #if __LINUX_ARM_ARCH__ < 7 /* If the target CPU may not be Thumb-2-capable, a run-time check is needed: */ -#define NEED_CPU_ARCHITECTURE - ldr r5, .LCcpu_architecture - ldr r5, [r5] + ldr_va r5, cpu_architecture cmp r5, #CPU_ARCH_ARMv7 blo __und_usr_fault_16 @ 16bit undefined instruction /* @@ -596,11 +601,9 @@ call_fpe: tstne r0, #0x04000000 @ bit 26 set on both ARM and Thumb-2 reteq lr and r8, r0, #0x00000f00 @ mask out CP number - THUMB( lsr r8, r8, #8 ) mov r7, #1 - add r6, r10, #TI_USED_CP - ARM( strb r7, [r6, r8, lsr #8] ) @ set appropriate used_cp[] - THUMB( strb r7, [r6, r8] ) @ set appropriate used_cp[] + add r6, r10, r8, lsr #8 @ add used_cp[] array offset first + strb r7, [r6, #TI_USED_CP] @ set appropriate used_cp[] #ifdef CONFIG_IWMMXT @ Test if we need to give access to iWMMXt coprocessors ldr r5, [r10, #TI_FLAGS] @@ -609,7 +612,7 @@ call_fpe: bcs iwmmxt_task_enable #endif ARM( add pc, pc, r8, lsr #6 ) - THUMB( lsl r8, r8, #2 ) + THUMB( lsr r8, r8, #6 ) THUMB( add pc, r8 ) nop @@ -635,12 +638,6 @@ call_fpe: ret.w lr @ CP#14 (Debug) ret.w lr @ CP#15 (Control) -#ifdef NEED_CPU_ARCHITECTURE - .align 2 -.LCcpu_architecture: - .word __cpu_architecture -#endif - #ifdef CONFIG_NEON .align 6 @@ -666,9 +663,8 @@ call_fpe: #endif do_fpe: - ldr r4, .LCfp add r10, r10, #TI_FPSTATE @ r10 = workspace - ldr pc, [r4] @ Call FP module USR entry point + ldr_va pc, fp_enter, tmp=r4 @ Call FP module USR entry point /* * The FP module is called with these registers set: @@ -754,16 +750,17 @@ ENTRY(__switch_to) ldr r6, [r2, #TI_CPU_DOMAIN] #endif switch_tls r1, r4, r5, r3, r7 -#if defined(CONFIG_STACKPROTECTOR) && !defined(CONFIG_SMP) - ldr r7, [r2, #TI_TASK] +#if defined(CONFIG_STACKPROTECTOR) && !defined(CONFIG_SMP) && \ + !defined(CONFIG_STACKPROTECTOR_PER_TASK) ldr r8, =__stack_chk_guard .if (TSK_STACK_CANARY > IMM12_MASK) - add r7, r7, #TSK_STACK_CANARY & ~IMM12_MASK + add r9, r2, #TSK_STACK_CANARY & ~IMM12_MASK + ldr r9, [r9, #TSK_STACK_CANARY & IMM12_MASK] + .else + ldr r9, [r2, #TSK_STACK_CANARY & IMM12_MASK] .endif - ldr r7, [r7, #TSK_STACK_CANARY & IMM12_MASK] -#elif defined(CONFIG_CURRENT_POINTER_IN_TPIDRURO) - mov r7, r2 @ Preserve 'next' #endif + mov r7, r2 @ Preserve 'next' #ifdef CONFIG_CPU_USE_DOMAINS mcr p15, 0, r6, c3, c0, 0 @ Set domain register #endif @@ -772,19 +769,102 @@ ENTRY(__switch_to) ldr r0, =thread_notify_head mov r1, #THREAD_NOTIFY_SWITCH bl atomic_notifier_call_chain -#if defined(CONFIG_STACKPROTECTOR) && !defined(CONFIG_SMP) - str r7, [r8] +#if defined(CONFIG_STACKPROTECTOR) && !defined(CONFIG_SMP) && \ + !defined(CONFIG_STACKPROTECTOR_PER_TASK) + str r9, [r8] #endif - THUMB( mov ip, r4 ) mov r0, r5 - set_current r7 - ARM( ldmia r4, {r4 - sl, fp, sp, pc} ) @ Load all regs saved previously - THUMB( ldmia ip!, {r4 - sl, fp} ) @ Load all regs saved previously - THUMB( ldr sp, [ip], #4 ) - THUMB( ldr pc, [ip] ) +#if !defined(CONFIG_THUMB2_KERNEL) && !defined(CONFIG_VMAP_STACK) + set_current r7, r8 + ldmia r4, {r4 - sl, fp, sp, pc} @ Load all regs saved previously +#else + mov r1, r7 + ldmia r4, {r4 - sl, fp, ip, lr} @ Load all regs saved previously +#ifdef CONFIG_VMAP_STACK + @ + @ Do a dummy read from the new stack while running from the old one so + @ that we can rely on do_translation_fault() to fix up any stale PMD + @ entries covering the vmalloc region. + @ + ldr r2, [ip] +#endif + + @ When CONFIG_THREAD_INFO_IN_TASK=n, the update of SP itself is what + @ effectuates the task switch, as that is what causes the observable + @ values of current and current_thread_info to change. When + @ CONFIG_THREAD_INFO_IN_TASK=y, setting current (and therefore + @ current_thread_info) is done explicitly, and the update of SP just + @ switches us to another stack, with few other side effects. In order + @ to prevent this distinction from causing any inconsistencies, let's + @ keep the 'set_current' call as close as we can to the update of SP. + set_current r1, r2 + mov sp, ip + ret lr +#endif UNWIND(.fnend ) ENDPROC(__switch_to) +#ifdef CONFIG_VMAP_STACK + .text + .align 2 +__bad_stack: + @ + @ We've just detected an overflow. We need to load the address of this + @ CPU's overflow stack into the stack pointer register. We have only one + @ scratch register so let's use a sequence of ADDs including one + @ involving the PC, and decorate them with PC-relative group + @ relocations. As these are ARM only, switch to ARM mode first. + @ + @ We enter here with IP clobbered and its value stashed on the mode + @ stack. + @ +THUMB( bx pc ) +THUMB( nop ) +THUMB( .arm ) + ldr_this_cpu_armv6 ip, overflow_stack_ptr + + str sp, [ip, #-4]! @ Preserve original SP value + mov sp, ip @ Switch to overflow stack + pop {ip} @ Original SP in IP + +#if defined(CONFIG_UNWINDER_FRAME_POINTER) && defined(CONFIG_CC_IS_GCC) + mov ip, ip @ mov expected by unwinder + push {fp, ip, lr, pc} @ GCC flavor frame record +#else + str ip, [sp, #-8]! @ store original SP + push {fpreg, lr} @ Clang flavor frame record +#endif +UNWIND( ldr ip, [r0, #4] ) @ load exception LR +UNWIND( str ip, [sp, #12] ) @ store in the frame record + ldr ip, [r0, #12] @ reload IP + + @ Store the original GPRs to the new stack. + svc_entry uaccess=0, overflow_check=0 + +UNWIND( .save {sp, pc} ) +UNWIND( .save {fpreg, lr} ) +UNWIND( .setfp fpreg, sp ) + + ldr fpreg, [sp, #S_SP] @ Add our frame record + @ to the linked list +#if defined(CONFIG_UNWINDER_FRAME_POINTER) && defined(CONFIG_CC_IS_GCC) + ldr r1, [fp, #4] @ reload SP at entry + add fp, fp, #12 +#else + ldr r1, [fpreg, #8] +#endif + str r1, [sp, #S_SP] @ store in pt_regs + + @ Stash the regs for handle_bad_stack + mov r0, sp + + @ Time to die + bl handle_bad_stack + nop +UNWIND( .fnend ) +ENDPROC(__bad_stack) +#endif + __INIT /* @@ -998,17 +1078,23 @@ __kuser_helper_end: */ .macro vector_stub, name, mode, correction=0 .align 5 +#ifdef CONFIG_HARDEN_BRANCH_HISTORY +vector_bhb_bpiall_\name: + mcr p15, 0, r0, c7, c5, 6 @ BPIALL + @ isb not needed due to "movs pc, lr" in the vector stub + @ which gives a "context synchronisation". +#endif vector_\name: .if \correction sub lr, lr, #\correction .endif - @ - @ Save r0, lr_<exception> (parent PC) and spsr_<exception> - @ (parent CPSR) - @ + @ Save r0, lr_<exception> (parent PC) stmia sp, {r0, lr} @ save r0, lr + + @ Save spsr_<exception> (parent CPSR) +.Lvec_\name: mrs lr, spsr str lr, [sp, #8] @ save spsr @@ -1030,14 +1116,47 @@ vector_\name: movs pc, lr @ branch to handler in SVC mode ENDPROC(vector_\name) +#ifdef CONFIG_HARDEN_BRANCH_HISTORY + .subsection 1 + .align 5 +vector_bhb_loop8_\name: + .if \correction + sub lr, lr, #\correction + .endif + + @ Save r0, lr_<exception> (parent PC) + stmia sp, {r0, lr} + + @ bhb workaround + mov r0, #8 +3: W(b) . + 4 + subs r0, r0, #1 + bne 3b + dsb nsh + @ isb not needed due to "movs pc, lr" in the vector stub + @ which gives a "context synchronisation". + b .Lvec_\name +ENDPROC(vector_bhb_loop8_\name) + .previous +#endif + .align 2 @ handler addresses follow this label 1: .endm .section .stubs, "ax", %progbits - @ This must be the first word + @ These need to remain at the start of the section so that + @ they are in range of the 'SWI' entries in the vector tables + @ located 4k down. +.L__vector_swi: .word vector_swi +#ifdef CONFIG_HARDEN_BRANCH_HISTORY +.L__vector_bhb_loop8_swi: + .word vector_bhb_loop8_swi +.L__vector_bhb_bpiall_swi: + .word vector_bhb_bpiall_swi +#endif vector_rst: ARM( swi SYS_ERROR0 ) @@ -1152,8 +1271,10 @@ vector_addrexcptn: * FIQ "NMI" handler *----------------------------------------------------------------------------- * Handle a FIQ using the SVC stack allowing FIQ act like NMI on x86 - * systems. + * systems. This must be the last vector stub, so lets place it in its own + * subsection. */ + .subsection 2 vector_stub fiq, FIQ_MODE, 4 .long __fiq_usr @ 0 (USR_26 / USR_32) @@ -1176,16 +1297,43 @@ vector_addrexcptn: .globl vector_fiq .section .vectors, "ax", %progbits -.L__vectors_start: W(b) vector_rst W(b) vector_und - W(ldr) pc, .L__vectors_start + 0x1000 +ARM( .reloc ., R_ARM_LDR_PC_G0, .L__vector_swi ) +THUMB( .reloc ., R_ARM_THM_PC12, .L__vector_swi ) + W(ldr) pc, . W(b) vector_pabt W(b) vector_dabt W(b) vector_addrexcptn W(b) vector_irq W(b) vector_fiq +#ifdef CONFIG_HARDEN_BRANCH_HISTORY + .section .vectors.bhb.loop8, "ax", %progbits + W(b) vector_rst + W(b) vector_bhb_loop8_und +ARM( .reloc ., R_ARM_LDR_PC_G0, .L__vector_bhb_loop8_swi ) +THUMB( .reloc ., R_ARM_THM_PC12, .L__vector_bhb_loop8_swi ) + W(ldr) pc, . + W(b) vector_bhb_loop8_pabt + W(b) vector_bhb_loop8_dabt + W(b) vector_addrexcptn + W(b) vector_bhb_loop8_irq + W(b) vector_bhb_loop8_fiq + + .section .vectors.bhb.bpiall, "ax", %progbits + W(b) vector_rst + W(b) vector_bhb_bpiall_und +ARM( .reloc ., R_ARM_LDR_PC_G0, .L__vector_bhb_bpiall_swi ) +THUMB( .reloc ., R_ARM_THM_PC12, .L__vector_bhb_bpiall_swi ) + W(ldr) pc, . + W(b) vector_bhb_bpiall_pabt + W(b) vector_bhb_bpiall_dabt + W(b) vector_addrexcptn + W(b) vector_bhb_bpiall_irq + W(b) vector_bhb_bpiall_fiq +#endif + .data .align 2 diff --git a/arch/arm/kernel/entry-common.S b/arch/arm/kernel/entry-common.S index ac86c34682bb..405a607b754f 100644 --- a/arch/arm/kernel/entry-common.S +++ b/arch/arm/kernel/entry-common.S @@ -16,17 +16,19 @@ .equ NR_syscalls, __NR_syscalls -#ifdef CONFIG_NEED_RET_TO_USER -#include <mach/entry-macro.S> -#else - .macro arch_ret_to_user, tmp1, tmp2 - .endm + .macro arch_ret_to_user, tmp +#ifdef CONFIG_ARCH_IOP32X + mrc p15, 0, \tmp, c15, c1, 0 + tst \tmp, #(1 << 6) + bicne \tmp, \tmp, #(1 << 6) + mcrne p15, 0, \tmp, c15, c1, 0 @ Disable cp6 access #endif + .endm #include "entry-header.S" saved_psr .req r8 -#if defined(CONFIG_TRACE_IRQFLAGS) || defined(CONFIG_CONTEXT_TRACKING) +#if defined(CONFIG_TRACE_IRQFLAGS) || defined(CONFIG_CONTEXT_TRACKING_USER) saved_pc .req r9 #define TRACE(x...) x #else @@ -36,7 +38,7 @@ saved_pc .req lr .section .entry.text,"ax",%progbits .align 5 -#if !(IS_ENABLED(CONFIG_TRACE_IRQFLAGS) || IS_ENABLED(CONFIG_CONTEXT_TRACKING) || \ +#if !(IS_ENABLED(CONFIG_TRACE_IRQFLAGS) || IS_ENABLED(CONFIG_CONTEXT_TRACKING_USER) || \ IS_ENABLED(CONFIG_DEBUG_RSEQ)) /* * This is the fast syscall return path. We do as little as possible here, @@ -55,7 +57,7 @@ __ret_fast_syscall: /* perform architecture specific actions before user return */ - arch_ret_to_user r1, lr + arch_ret_to_user r1 restore_user_regs fast = 1, offset = S_OFF UNWIND(.fnend ) @@ -128,7 +130,7 @@ no_work_pending: asm_trace_hardirqs_on save = 0 /* perform architecture specific actions before user return */ - arch_ret_to_user r1, lr + arch_ret_to_user r1 ct_user_enter save = 0 restore_user_regs fast = 0, offset = 0 @@ -154,12 +156,36 @@ ENDPROC(ret_from_fork) */ .align 5 +#ifdef CONFIG_HARDEN_BRANCH_HISTORY +ENTRY(vector_bhb_loop8_swi) + sub sp, sp, #PT_REGS_SIZE + stmia sp, {r0 - r12} + mov r8, #8 +1: b 2f +2: subs r8, r8, #1 + bne 1b + dsb nsh + isb + b 3f +ENDPROC(vector_bhb_loop8_swi) + + .align 5 +ENTRY(vector_bhb_bpiall_swi) + sub sp, sp, #PT_REGS_SIZE + stmia sp, {r0 - r12} + mcr p15, 0, r8, c7, c5, 6 @ BPIALL + isb + b 3f +ENDPROC(vector_bhb_bpiall_swi) +#endif + .align 5 ENTRY(vector_swi) #ifdef CONFIG_CPU_V7M v7m_exception_entry #else sub sp, sp, #PT_REGS_SIZE stmia sp, {r0 - r12} @ Calling r0 - r12 +3: ARM( add r8, sp, #S_PC ) ARM( stmdb r8, {sp, lr}^ ) @ Calling sp, lr THUMB( mov r8, sp ) @@ -172,7 +198,7 @@ ENTRY(vector_swi) #endif reload_current r10, ip zero_fp - alignment_trap r10, ip, __cr_alignment + alignment_trap r10, ip, cr_alignment asm_trace_hardirqs_on save=0 enable_irq_notrace ct_user_exit save=0 @@ -276,6 +302,7 @@ local_restart: b ret_fast_syscall #endif ENDPROC(vector_swi) + .ltorg /* * This is the really slow path. We're going to be doing @@ -302,14 +329,6 @@ __sys_trace_return: bl syscall_trace_exit b ret_slow_syscall - .align 5 -#ifdef CONFIG_ALIGNMENT_TRAP - .type __cr_alignment, #object -__cr_alignment: - .word cr_alignment -#endif - .ltorg - .macro syscall_table_start, sym .equ __sys_nr, 0 .type \sym, #object diff --git a/arch/arm/kernel/entry-ftrace.S b/arch/arm/kernel/entry-ftrace.S index a74289ebc803..3e7bcaca5e07 100644 --- a/arch/arm/kernel/entry-ftrace.S +++ b/arch/arm/kernel/entry-ftrace.S @@ -22,12 +22,9 @@ * mcount can be thought of as a function called in the middle of a subroutine * call. As such, it needs to be transparent for both the caller and the * callee: the original lr needs to be restored when leaving mcount, and no - * registers should be clobbered. (In the __gnu_mcount_nc implementation, we - * clobber the ip register. This is OK because the ARM calling convention - * allows it to be clobbered in subroutines and doesn't use it to hold - * parameters.) + * registers should be clobbered. * - * When using dynamic ftrace, we patch out the mcount call by a "pop {lr}" + * When using dynamic ftrace, we patch out the mcount call by a "add sp, #4" * instead of the __gnu_mcount_nc call (see arch/arm/kernel/ftrace.c). */ @@ -38,23 +35,20 @@ .macro __mcount suffix mcount_enter - ldr r0, =ftrace_trace_function - ldr r2, [r0] - adr r0, .Lftrace_stub + ldr_va r2, ftrace_trace_function + badr r0, .Lftrace_stub cmp r0, r2 bne 1f #ifdef CONFIG_FUNCTION_GRAPH_TRACER - ldr r1, =ftrace_graph_return - ldr r2, [r1] - cmp r0, r2 - bne ftrace_graph_caller\suffix - - ldr r1, =ftrace_graph_entry - ldr r2, [r1] - ldr r0, =ftrace_graph_entry_stub - cmp r0, r2 - bne ftrace_graph_caller\suffix + ldr_va r2, ftrace_graph_return + cmp r0, r2 + bne ftrace_graph_caller\suffix + + ldr_va r2, ftrace_graph_entry + mov_l r0, ftrace_graph_entry_stub + cmp r0, r2 + bne ftrace_graph_caller\suffix #endif mcount_exit @@ -70,29 +64,27 @@ .macro __ftrace_regs_caller - sub sp, sp, #8 @ space for PC and CPSR OLD_R0, + str lr, [sp, #-8]! @ store LR as PC and make space for CPSR/OLD_R0, @ OLD_R0 will overwrite previous LR - add ip, sp, #12 @ move in IP the value of SP as it was - @ before the push {lr} of the mcount mechanism + ldr lr, [sp, #8] @ get previous LR - str lr, [sp, #0] @ store LR instead of PC + str r0, [sp, #8] @ write r0 as OLD_R0 over previous LR - ldr lr, [sp, #8] @ get previous LR + str lr, [sp, #-4]! @ store previous LR as LR - str r0, [sp, #8] @ write r0 as OLD_R0 over previous LR + add lr, sp, #16 @ move in LR the value of SP as it was + @ before the push {lr} of the mcount mechanism - stmdb sp!, {ip, lr} - stmdb sp!, {r0-r11, lr} + push {r0-r11, ip, lr} @ stack content at this point: @ 0 4 48 52 56 60 64 68 72 - @ R0 | R1 | ... | LR | SP + 4 | previous LR | LR | PSR | OLD_R0 | + @ R0 | R1 | ... | IP | SP + 4 | previous LR | LR | PSR | OLD_R0 | - mov r3, sp @ struct pt_regs* + mov r3, sp @ struct pt_regs* - ldr r2, =function_trace_op - ldr r2, [r2] @ pointer to the current + ldr_va r2, function_trace_op @ pointer to the current @ function tracing op ldr r1, [sp, #S_LR] @ lr of instrumented func @@ -108,35 +100,37 @@ ftrace_regs_call: #ifdef CONFIG_FUNCTION_GRAPH_TRACER .globl ftrace_graph_regs_call ftrace_graph_regs_call: - mov r0, r0 +ARM( mov r0, r0 ) +THUMB( nop.w ) #endif @ pop saved regs - ldmia sp!, {r0-r12} @ restore r0 through r12 - ldr ip, [sp, #8] @ restore PC - ldr lr, [sp, #4] @ restore LR - ldr sp, [sp, #0] @ restore SP - mov pc, ip @ return + pop {r0-r11, ip, lr} @ restore r0 through r12 + ldr lr, [sp], #4 @ restore LR + ldr pc, [sp], #12 .endm #ifdef CONFIG_FUNCTION_GRAPH_TRACER .macro __ftrace_graph_regs_caller - sub r0, fp, #4 @ lr of instrumented routine (parent) +#ifdef CONFIG_UNWINDER_FRAME_POINTER + sub r0, fp, #4 @ lr of instrumented routine (parent) +#else + add r0, sp, #S_LR +#endif @ called from __ftrace_regs_caller - ldr r1, [sp, #S_PC] @ instrumented routine (func) + ldr r1, [sp, #S_PC] @ instrumented routine (func) mcount_adjust_addr r1, r1 - mov r2, fp @ frame pointer + mov r2, fpreg @ frame pointer + add r3, sp, #PT_REGS_SIZE bl prepare_ftrace_return @ pop registers saved in ftrace_regs_caller - ldmia sp!, {r0-r12} @ restore r0 through r12 - ldr ip, [sp, #8] @ restore PC - ldr lr, [sp, #4] @ restore LR - ldr sp, [sp, #0] @ restore SP - mov pc, ip @ return + pop {r0-r11, ip, lr} @ restore r0 through r12 + ldr lr, [sp], #4 @ restore LR + ldr pc, [sp], #12 .endm #endif @@ -149,8 +143,7 @@ ftrace_graph_regs_call: mcount_adjust_addr r0, lr @ instrumented function #ifdef CONFIG_DYNAMIC_FTRACE_WITH_REGS - ldr r2, =function_trace_op - ldr r2, [r2] @ pointer to the current + ldr_va r2, function_trace_op @ pointer to the current @ function tracing op mov r3, #0 @ regs is NULL #endif @@ -162,14 +155,19 @@ ftrace_call\suffix: #ifdef CONFIG_FUNCTION_GRAPH_TRACER .globl ftrace_graph_call\suffix ftrace_graph_call\suffix: - mov r0, r0 +ARM( mov r0, r0 ) +THUMB( nop.w ) #endif mcount_exit .endm .macro __ftrace_graph_caller +#ifdef CONFIG_UNWINDER_FRAME_POINTER sub r0, fp, #4 @ &lr of instrumented routine (&parent) +#else + add r0, sp, #20 +#endif #ifdef CONFIG_DYNAMIC_FTRACE @ called from __ftrace_caller, saved in mcount_enter ldr r1, [sp, #16] @ instrumented routine (func) @@ -178,7 +176,8 @@ ftrace_graph_call\suffix: @ called from __mcount, untouched in lr mcount_adjust_addr r1, lr @ instrumented routine (func) #endif - mov r2, fp @ frame pointer + mov r2, fpreg @ frame pointer + add r3, sp, #24 bl prepare_ftrace_return mcount_exit .endm @@ -202,16 +201,17 @@ ftrace_graph_call\suffix: .endm .macro mcount_exit - ldmia sp!, {r0-r3, ip, lr} - ret ip + ldmia sp!, {r0-r3} + ldr lr, [sp, #4] + ldr pc, [sp], #8 .endm ENTRY(__gnu_mcount_nc) UNWIND(.fnstart) #ifdef CONFIG_DYNAMIC_FTRACE - mov ip, lr - ldmia sp!, {lr} - ret ip + push {lr} + ldr lr, [sp, #4] + ldr pc, [sp], #8 #else __mcount #endif @@ -256,17 +256,33 @@ ENDPROC(ftrace_graph_regs_caller) .purgem mcount_exit #ifdef CONFIG_FUNCTION_GRAPH_TRACER - .globl return_to_handler -return_to_handler: +ENTRY(return_to_handler) stmdb sp!, {r0-r3} - mov r0, fp @ frame pointer + add r0, sp, #16 @ sp at exit of instrumented routine bl ftrace_return_to_handler mov lr, r0 @ r0 has real ret addr ldmia sp!, {r0-r3} ret lr +ENDPROC(return_to_handler) #endif ENTRY(ftrace_stub) .Lftrace_stub: ret lr ENDPROC(ftrace_stub) + +#ifdef CONFIG_DYNAMIC_FTRACE + + __INIT + + .macro init_tramp, dst:req +ENTRY(\dst\()_from_init) + ldr pc, =\dst +ENDPROC(\dst\()_from_init) + .endm + + init_tramp ftrace_caller +#ifdef CONFIG_DYNAMIC_FTRACE_WITH_REGS + init_tramp ftrace_regs_caller +#endif +#endif diff --git a/arch/arm/kernel/entry-header.S b/arch/arm/kernel/entry-header.S index ae24dd54e9ef..99411fa91350 100644 --- a/arch/arm/kernel/entry-header.S +++ b/arch/arm/kernel/entry-header.S @@ -48,8 +48,7 @@ .macro alignment_trap, rtmp1, rtmp2, label #ifdef CONFIG_ALIGNMENT_TRAP mrc p15, 0, \rtmp2, c1, c0, 0 - ldr \rtmp1, \label - ldr \rtmp1, [\rtmp1] + ldr_va \rtmp1, \label teq \rtmp1, \rtmp2 mcrne p15, 0, \rtmp1, c1, c0, 0 #endif @@ -292,12 +291,18 @@ .macro restore_user_regs, fast = 0, offset = 0 -#if defined(CONFIG_CPU_32v6K) && !defined(CONFIG_CPU_V6) +#if defined(CONFIG_CPU_32v6K) && \ + (!defined(CONFIG_CPU_V6) || defined(CONFIG_SMP)) +#ifdef CONFIG_CPU_V6 +ALT_SMP(nop) +ALT_UP_B(.L1_\@) +#endif @ The TLS register update is deferred until return to user space so we @ can use it for other things while running in the kernel - get_thread_info r1 + mrc p15, 0, r1, c13, c0, 3 @ get current_thread_info pointer ldr r1, [r1, #TI_TP_VALUE] mcr p15, 0, r1, c13, c0, 3 @ set TLS register +.L1_\@: #endif uaccess_enable r1, isb=0 @@ -361,25 +366,25 @@ * between user and kernel mode. */ .macro ct_user_exit, save = 1 -#ifdef CONFIG_CONTEXT_TRACKING +#ifdef CONFIG_CONTEXT_TRACKING_USER .if \save stmdb sp!, {r0-r3, ip, lr} - bl context_tracking_user_exit + bl user_exit_callable ldmia sp!, {r0-r3, ip, lr} .else - bl context_tracking_user_exit + bl user_exit_callable .endif #endif .endm .macro ct_user_enter, save = 1 -#ifdef CONFIG_CONTEXT_TRACKING +#ifdef CONFIG_CONTEXT_TRACKING_USER .if \save stmdb sp!, {r0-r3, ip, lr} - bl context_tracking_user_enter + bl user_enter_callable ldmia sp!, {r0-r3, ip, lr} .else - bl context_tracking_user_enter + bl user_enter_callable .endif #endif .endm @@ -423,3 +428,40 @@ scno .req r7 @ syscall number tbl .req r8 @ syscall table pointer why .req r8 @ Linux syscall (!= 0) tsk .req r9 @ current thread_info + + .macro do_overflow_check, frame_size:req +#ifdef CONFIG_VMAP_STACK + @ + @ Test whether the SP has overflowed. Task and IRQ stacks are aligned + @ so that SP & BIT(THREAD_SIZE_ORDER + PAGE_SHIFT) should always be + @ zero. + @ +ARM( tst sp, #1 << (THREAD_SIZE_ORDER + PAGE_SHIFT) ) +THUMB( tst r1, #1 << (THREAD_SIZE_ORDER + PAGE_SHIFT) ) +THUMB( it ne ) + bne .Lstack_overflow_check\@ + + .pushsection .text +.Lstack_overflow_check\@: + @ + @ The stack pointer is not pointing to a valid vmap'ed stack, but it + @ may be pointing into the linear map instead, which may happen if we + @ are already running from the overflow stack. We cannot detect overflow + @ in such cases so just carry on. + @ + str ip, [r0, #12] @ Stash IP on the mode stack + ldr_va ip, high_memory @ Start of VMALLOC space +ARM( cmp sp, ip ) @ SP in vmalloc space? +THUMB( cmp r1, ip ) +THUMB( itt lo ) + ldrlo ip, [r0, #12] @ Restore IP + blo .Lout\@ @ Carry on + +THUMB( sub r1, sp, r1 ) @ Restore original R1 +THUMB( sub sp, r1 ) @ Restore original SP + add sp, sp, #\frame_size @ Undo svc_entry's SP change + b __bad_stack @ Handle VMAP stack overflow + .popsection +.Lout\@: +#endif + .endm diff --git a/arch/arm/kernel/entry-v7m.S b/arch/arm/kernel/entry-v7m.S index 7bde93c10962..de8a60363c85 100644 --- a/arch/arm/kernel/entry-v7m.S +++ b/arch/arm/kernel/entry-v7m.S @@ -39,16 +39,25 @@ __irq_entry: @ @ Invoke the IRQ handler @ - mrs r0, ipsr - ldr r1, =V7M_xPSR_EXCEPTIONNO - and r0, r1 - sub r0, #16 - mov r1, sp - stmdb sp!, {lr} - @ routine called with r0 = irq number, r1 = struct pt_regs * - bl nvic_handle_irq - - pop {lr} + mov r0, sp + ldr_this_cpu sp, irq_stack_ptr, r1, r2 + + @ + @ If we took the interrupt while running in the kernel, we may already + @ be using the IRQ stack, so revert to the original value in that case. + @ + subs r2, sp, r0 @ SP above bottom of IRQ stack? + rsbscs r2, r2, #THREAD_SIZE @ ... and below the top? + movcs sp, r0 + + push {r0, lr} @ preserve LR and original SP + + @ routine called with r0 = struct pt_regs * + bl generic_handle_arch_irq + + pop {r0, lr} + mov sp, r0 + @ @ Check for any pending work if returning to user @ @@ -101,15 +110,17 @@ ENTRY(__switch_to) str sp, [ip], #4 str lr, [ip], #4 mov r5, r0 + mov r6, r2 @ Preserve 'next' add r4, r2, #TI_CPU_SAVE ldr r0, =thread_notify_head mov r1, #THREAD_NOTIFY_SWITCH bl atomic_notifier_call_chain - mov ip, r4 mov r0, r5 - ldmia ip!, {r4 - r11} @ Load all regs saved previously - ldr sp, [ip] - ldr pc, [ip, #4]! + mov r1, r6 + ldmia r4, {r4 - r12, lr} @ Load all regs saved previously + set_current r1, r2 + mov sp, ip + bx lr .fnend ENDPROC(__switch_to) diff --git a/arch/arm/kernel/ftrace.c b/arch/arm/kernel/ftrace.c index a006585e1c09..a0b6d1e3812f 100644 --- a/arch/arm/kernel/ftrace.c +++ b/arch/arm/kernel/ftrace.c @@ -22,12 +22,24 @@ #include <asm/ftrace.h> #include <asm/insn.h> #include <asm/set_memory.h> +#include <asm/stacktrace.h> #include <asm/patch.h> +/* + * The compiler emitted profiling hook consists of + * + * PUSH {LR} + * BL __gnu_mcount_nc + * + * To turn this combined sequence into a NOP, we need to restore the value of + * SP before the PUSH. Let's use an ADD rather than a POP into LR, as LR is not + * modified anyway, and reloading LR from memory is highly likely to be less + * efficient. + */ #ifdef CONFIG_THUMB2_KERNEL -#define NOP 0xf85deb04 /* pop.w {lr} */ +#define NOP 0xf10d0d04 /* add.w sp, sp, #4 */ #else -#define NOP 0xe8bd4000 /* pop {lr} */ +#define NOP 0xe28dd004 /* add sp, sp, #4 */ #endif #ifdef CONFIG_DYNAMIC_FTRACE @@ -51,21 +63,30 @@ static unsigned long ftrace_nop_replace(struct dyn_ftrace *rec) return NOP; } -static unsigned long adjust_address(struct dyn_ftrace *rec, unsigned long addr) +void ftrace_caller_from_init(void); +void ftrace_regs_caller_from_init(void); + +static unsigned long __ref adjust_address(struct dyn_ftrace *rec, + unsigned long addr) { - return addr; + if (!IS_ENABLED(CONFIG_DYNAMIC_FTRACE) || + system_state >= SYSTEM_FREEING_INITMEM || + likely(!is_kernel_inittext(rec->ip))) + return addr; + if (!IS_ENABLED(CONFIG_DYNAMIC_FTRACE_WITH_REGS) || + addr == (unsigned long)&ftrace_caller) + return (unsigned long)&ftrace_caller_from_init; + return (unsigned long)&ftrace_regs_caller_from_init; } -int ftrace_arch_code_modify_prepare(void) +void ftrace_arch_code_modify_prepare(void) { - return 0; } -int ftrace_arch_code_modify_post_process(void) +void ftrace_arch_code_modify_post_process(void) { /* Make sure any TLB misses during machine stop are cleared. */ flush_tlb_all(); - return 0; } static unsigned long ftrace_call_replace(unsigned long pc, unsigned long addr, @@ -189,15 +210,23 @@ int ftrace_make_nop(struct module *mod, #endif new = ftrace_nop_replace(rec); - ret = ftrace_modify_code(ip, old, new, true); + /* + * Locations in .init.text may call __gnu_mcount_mc via a linker + * emitted veneer if they are too far away from its implementation, and + * so validation may fail spuriously in such cases. Let's work around + * this by omitting those from validation. + */ + ret = ftrace_modify_code(ip, old, new, !is_kernel_inittext(ip)); return ret; } #endif /* CONFIG_DYNAMIC_FTRACE */ #ifdef CONFIG_FUNCTION_GRAPH_TRACER +asmlinkage void prepare_ftrace_return(unsigned long *parent, unsigned long self_addr, - unsigned long frame_pointer) + unsigned long frame_pointer, + unsigned long stack_pointer) { unsigned long return_hooker = (unsigned long) &return_to_handler; unsigned long old; @@ -205,6 +234,23 @@ void prepare_ftrace_return(unsigned long *parent, unsigned long self_addr, if (unlikely(atomic_read(¤t->tracing_graph_pause))) return; + if (IS_ENABLED(CONFIG_UNWINDER_FRAME_POINTER)) { + /* FP points one word below parent's top of stack */ + frame_pointer += 4; + } else { + struct stackframe frame = { + .fp = frame_pointer, + .sp = stack_pointer, + .lr = self_addr, + .pc = self_addr, + }; + if (unwind_frame(&frame) < 0) + return; + if (frame.lr != self_addr) + parent = frame.lr_addr; + frame_pointer = frame.sp; + } + old = *parent; *parent = return_hooker; @@ -225,7 +271,7 @@ static int __ftrace_modify_caller(unsigned long *callsite, unsigned long caller_fn = (unsigned long) func; unsigned long pc = (unsigned long) callsite; unsigned long branch = arm_gen_branch(pc, caller_fn); - unsigned long nop = 0xe1a00000; /* mov r0, r0 */ + unsigned long nop = arm_gen_nop(); unsigned long old = enable ? nop : branch; unsigned long new = enable ? branch : nop; diff --git a/arch/arm/kernel/head-common.S b/arch/arm/kernel/head-common.S index da18e0a17dc2..42cae73fcc19 100644 --- a/arch/arm/kernel/head-common.S +++ b/arch/arm/kernel/head-common.S @@ -105,10 +105,8 @@ __mmap_switched: mov r1, #0 bl __memset @ clear .bss -#ifdef CONFIG_CURRENT_POINTER_IN_TPIDRURO adr_l r0, init_task @ get swapper task_struct - set_current r0 -#endif + set_current r0, r1 ldmia r4, {r0, r1, r2, r3} str r9, [r0] @ Save processor ID diff --git a/arch/arm/kernel/head-nommu.S b/arch/arm/kernel/head-nommu.S index fadfee9e2b45..950bef83339f 100644 --- a/arch/arm/kernel/head-nommu.S +++ b/arch/arm/kernel/head-nommu.S @@ -114,6 +114,7 @@ ENTRY(secondary_startup) add r12, r12, r10 ret r12 1: bl __after_proc_init + ldr r7, __secondary_data @ reload r7 ldr sp, [r7, #12] @ set up the stack pointer ldr r0, [r7, #16] @ set up task pointer mov fp, #0 diff --git a/arch/arm/kernel/head.S b/arch/arm/kernel/head.S index c04dd94630c7..29e2900178a1 100644 --- a/arch/arm/kernel/head.S +++ b/arch/arm/kernel/head.S @@ -38,10 +38,10 @@ #ifdef CONFIG_ARM_LPAE /* LPAE requires an additional page for the PGD */ #define PG_DIR_SIZE 0x5000 -#define PMD_ORDER 3 +#define PMD_ENTRY_ORDER 3 /* PMD entry size is 2^PMD_ENTRY_ORDER */ #else #define PG_DIR_SIZE 0x4000 -#define PMD_ORDER 2 +#define PMD_ENTRY_ORDER 2 #endif .globl swapper_pg_dir @@ -240,7 +240,7 @@ __create_page_tables: mov r6, r6, lsr #SECTION_SHIFT 1: orr r3, r7, r5, lsl #SECTION_SHIFT @ flags + kernel base - str r3, [r4, r5, lsl #PMD_ORDER] @ identity mapping + str r3, [r4, r5, lsl #PMD_ENTRY_ORDER] @ identity mapping cmp r5, r6 addlo r5, r5, #1 @ next section blo 1b @@ -250,7 +250,7 @@ __create_page_tables: * set two variables to indicate the physical start and end of the * kernel. */ - add r0, r4, #KERNEL_OFFSET >> (SECTION_SHIFT - PMD_ORDER) + add r0, r4, #KERNEL_OFFSET >> (SECTION_SHIFT - PMD_ENTRY_ORDER) ldr r6, =(_end - 1) adr_l r5, kernel_sec_start @ _pa(kernel_sec_start) #if defined CONFIG_CPU_ENDIAN_BE8 || defined CONFIG_CPU_ENDIAN_BE32 @@ -259,8 +259,8 @@ __create_page_tables: str r8, [r5] @ Save physical start of kernel (LE) #endif orr r3, r8, r7 @ Add the MMU flags - add r6, r4, r6, lsr #(SECTION_SHIFT - PMD_ORDER) -1: str r3, [r0], #1 << PMD_ORDER + add r6, r4, r6, lsr #(SECTION_SHIFT - PMD_ENTRY_ORDER) +1: str r3, [r0], #1 << PMD_ENTRY_ORDER add r3, r3, #1 << SECTION_SHIFT cmp r0, r6 bls 1b @@ -280,14 +280,14 @@ __create_page_tables: mov r3, pc mov r3, r3, lsr #SECTION_SHIFT orr r3, r7, r3, lsl #SECTION_SHIFT - add r0, r4, #(XIP_START & 0xff000000) >> (SECTION_SHIFT - PMD_ORDER) - str r3, [r0, #((XIP_START & 0x00f00000) >> SECTION_SHIFT) << PMD_ORDER]! + add r0, r4, #(XIP_START & 0xff000000) >> (SECTION_SHIFT - PMD_ENTRY_ORDER) + str r3, [r0, #((XIP_START & 0x00f00000) >> SECTION_SHIFT) << PMD_ENTRY_ORDER]! ldr r6, =(_edata_loc - 1) - add r0, r0, #1 << PMD_ORDER - add r6, r4, r6, lsr #(SECTION_SHIFT - PMD_ORDER) + add r0, r0, #1 << PMD_ENTRY_ORDER + add r6, r4, r6, lsr #(SECTION_SHIFT - PMD_ENTRY_ORDER) 1: cmp r0, r6 add r3, r3, #1 << SECTION_SHIFT - strls r3, [r0], #1 << PMD_ORDER + strls r3, [r0], #1 << PMD_ENTRY_ORDER bls 1b #endif @@ -297,10 +297,10 @@ __create_page_tables: */ mov r0, r2, lsr #SECTION_SHIFT cmp r2, #0 - ldrne r3, =FDT_FIXED_BASE >> (SECTION_SHIFT - PMD_ORDER) + ldrne r3, =FDT_FIXED_BASE >> (SECTION_SHIFT - PMD_ENTRY_ORDER) addne r3, r3, r4 orrne r6, r7, r0, lsl #SECTION_SHIFT - strne r6, [r3], #1 << PMD_ORDER + strne r6, [r3], #1 << PMD_ENTRY_ORDER addne r6, r6, #1 << SECTION_SHIFT strne r6, [r3] @@ -319,7 +319,7 @@ __create_page_tables: addruart r7, r3, r0 mov r3, r3, lsr #SECTION_SHIFT - mov r3, r3, lsl #PMD_ORDER + mov r3, r3, lsl #PMD_ENTRY_ORDER add r0, r4, r3 mov r3, r7, lsr #SECTION_SHIFT @@ -349,7 +349,7 @@ __create_page_tables: * If we're using the NetWinder or CATS, we also need to map * in the 16550-type serial port for the debug messages */ - add r0, r4, #0xff000000 >> (SECTION_SHIFT - PMD_ORDER) + add r0, r4, #0xff000000 >> (SECTION_SHIFT - PMD_ENTRY_ORDER) orr r3, r7, #0x7c000000 str r3, [r0] #endif @@ -359,10 +359,10 @@ __create_page_tables: * Similar reasons here - for debug. This is * only for Acorn RiscPC architectures. */ - add r0, r4, #0x02000000 >> (SECTION_SHIFT - PMD_ORDER) + add r0, r4, #0x02000000 >> (SECTION_SHIFT - PMD_ENTRY_ORDER) orr r3, r7, #0x02000000 str r3, [r0] - add r0, r4, #0xd8000000 >> (SECTION_SHIFT - PMD_ORDER) + add r0, r4, #0xd8000000 >> (SECTION_SHIFT - PMD_ENTRY_ORDER) str r3, [r0] #endif #endif @@ -424,6 +424,13 @@ ENDPROC(secondary_startup) ENDPROC(secondary_startup_arm) ENTRY(__secondary_switched) +#if defined(CONFIG_VMAP_STACK) && !defined(CONFIG_ARM_LPAE) + @ Before using the vmap'ed stack, we have to switch to swapper_pg_dir + @ as the ID map does not cover the vmalloc region. + mrc p15, 0, ip, c2, c0, 1 @ read TTBR1 + mcr p15, 0, ip, c2, c0, 0 @ set TTBR0 + instr_sync +#endif adr_l r7, secondary_data + 12 @ get secondary_data.stack ldr sp, [r7] ldr r0, [r7, #4] @ get secondary_data.task diff --git a/arch/arm/kernel/hw_breakpoint.c b/arch/arm/kernel/hw_breakpoint.c index b1423fb130ea..054e9199f30d 100644 --- a/arch/arm/kernel/hw_breakpoint.c +++ b/arch/arm/kernel/hw_breakpoint.c @@ -941,6 +941,23 @@ static int hw_breakpoint_pending(unsigned long addr, unsigned int fsr, return ret; } +#ifdef CONFIG_ARM_ERRATA_764319 +static int oslsr_fault; + +static int debug_oslsr_trap(struct pt_regs *regs, unsigned int instr) +{ + oslsr_fault = 1; + instruction_pointer(regs) += 4; + return 0; +} + +static struct undef_hook debug_oslsr_hook = { + .instr_mask = 0xffffffff, + .instr_val = 0xee115e91, + .fn = debug_oslsr_trap, +}; +#endif + /* * One-time initialisation. */ @@ -974,7 +991,16 @@ static bool core_has_os_save_restore(void) case ARM_DEBUG_ARCH_V7_1: return true; case ARM_DEBUG_ARCH_V7_ECP14: +#ifdef CONFIG_ARM_ERRATA_764319 + oslsr_fault = 0; + register_undef_hook(&debug_oslsr_hook); ARM_DBG_READ(c1, c1, 4, oslsr); + unregister_undef_hook(&debug_oslsr_hook); + if (oslsr_fault) + return false; +#else + ARM_DBG_READ(c1, c1, 4, oslsr); +#endif if (oslsr & ARM_OSLSR_OSLM0) return true; fallthrough; diff --git a/arch/arm/kernel/irq.c b/arch/arm/kernel/irq.c index b79975bd988c..fe28fc1f759d 100644 --- a/arch/arm/kernel/irq.c +++ b/arch/arm/kernel/irq.c @@ -36,13 +36,54 @@ #include <asm/hardware/cache-l2x0.h> #include <asm/hardware/cache-uniphier.h> #include <asm/outercache.h> +#include <asm/softirq_stack.h> #include <asm/exception.h> #include <asm/mach/arch.h> #include <asm/mach/irq.h> #include <asm/mach/time.h> +#include "reboot.h" + unsigned long irq_err_count; +#ifdef CONFIG_IRQSTACKS + +asmlinkage DEFINE_PER_CPU_READ_MOSTLY(u8 *, irq_stack_ptr); + +static void __init init_irq_stacks(void) +{ + u8 *stack; + int cpu; + + for_each_possible_cpu(cpu) { + if (!IS_ENABLED(CONFIG_VMAP_STACK)) + stack = (u8 *)__get_free_pages(GFP_KERNEL, + THREAD_SIZE_ORDER); + else + stack = __vmalloc_node(THREAD_SIZE, THREAD_ALIGN, + THREADINFO_GFP, NUMA_NO_NODE, + __builtin_return_address(0)); + + if (WARN_ON(!stack)) + break; + per_cpu(irq_stack_ptr, cpu) = &stack[THREAD_SIZE]; + } +} + +#ifdef CONFIG_SOFTIRQ_ON_OWN_STACK +static void ____do_softirq(void *arg) +{ + __do_softirq(); +} + +void do_softirq_own_stack(void) +{ + call_with_stack(____do_softirq, NULL, + __this_cpu_read(irq_stack_ptr)); +} +#endif +#endif + int arch_show_interrupts(struct seq_file *p, int prec) { #ifdef CONFIG_FIQ @@ -80,27 +121,14 @@ void handle_IRQ(unsigned int irq, struct pt_regs *regs) ack_bad_irq(irq); } -/* - * asm_do_IRQ is the interface to be used from assembly code. - */ -asmlinkage void __exception_irq_entry -asm_do_IRQ(unsigned int irq, struct pt_regs *regs) -{ - struct pt_regs *old_regs; - - irq_enter(); - old_regs = set_irq_regs(regs); - - handle_IRQ(irq, regs); - - set_irq_regs(old_regs); - irq_exit(); -} - void __init init_IRQ(void) { int ret; +#ifdef CONFIG_IRQSTACKS + init_irq_stacks(); +#endif + if (IS_ENABLED(CONFIG_OF) && !machine_desc->init_irq) irqchip_init(); else diff --git a/arch/arm/kernel/jump_label.c b/arch/arm/kernel/jump_label.c index 303b3ab87f7e..eb9c24b6e8e2 100644 --- a/arch/arm/kernel/jump_label.c +++ b/arch/arm/kernel/jump_label.c @@ -27,9 +27,3 @@ void arch_jump_label_transform(struct jump_entry *entry, { __arch_jump_label_transform(entry, type, false); } - -void arch_jump_label_transform_static(struct jump_entry *entry, - enum jump_label_type type) -{ - __arch_jump_label_transform(entry, type, true); -} diff --git a/arch/arm/kernel/kgdb.c b/arch/arm/kernel/kgdb.c index 7bd30c0a4280..22f937e6f3ff 100644 --- a/arch/arm/kernel/kgdb.c +++ b/arch/arm/kernel/kgdb.c @@ -154,22 +154,38 @@ static int kgdb_compiled_brk_fn(struct pt_regs *regs, unsigned int instr) return 0; } -static struct undef_hook kgdb_brkpt_hook = { +static struct undef_hook kgdb_brkpt_arm_hook = { .instr_mask = 0xffffffff, .instr_val = KGDB_BREAKINST, - .cpsr_mask = MODE_MASK, + .cpsr_mask = PSR_T_BIT | MODE_MASK, .cpsr_val = SVC_MODE, .fn = kgdb_brk_fn }; -static struct undef_hook kgdb_compiled_brkpt_hook = { +static struct undef_hook kgdb_brkpt_thumb_hook = { + .instr_mask = 0xffff, + .instr_val = KGDB_BREAKINST & 0xffff, + .cpsr_mask = PSR_T_BIT | MODE_MASK, + .cpsr_val = PSR_T_BIT | SVC_MODE, + .fn = kgdb_brk_fn +}; + +static struct undef_hook kgdb_compiled_brkpt_arm_hook = { .instr_mask = 0xffffffff, .instr_val = KGDB_COMPILED_BREAK, - .cpsr_mask = MODE_MASK, + .cpsr_mask = PSR_T_BIT | MODE_MASK, .cpsr_val = SVC_MODE, .fn = kgdb_compiled_brk_fn }; +static struct undef_hook kgdb_compiled_brkpt_thumb_hook = { + .instr_mask = 0xffff, + .instr_val = KGDB_COMPILED_BREAK & 0xffff, + .cpsr_mask = PSR_T_BIT | MODE_MASK, + .cpsr_val = PSR_T_BIT | SVC_MODE, + .fn = kgdb_compiled_brk_fn +}; + static int __kgdb_notify(struct die_args *args, unsigned long cmd) { struct pt_regs *regs = args->regs; @@ -210,8 +226,10 @@ int kgdb_arch_init(void) if (ret != 0) return ret; - register_undef_hook(&kgdb_brkpt_hook); - register_undef_hook(&kgdb_compiled_brkpt_hook); + register_undef_hook(&kgdb_brkpt_arm_hook); + register_undef_hook(&kgdb_brkpt_thumb_hook); + register_undef_hook(&kgdb_compiled_brkpt_arm_hook); + register_undef_hook(&kgdb_compiled_brkpt_thumb_hook); return 0; } @@ -224,8 +242,10 @@ int kgdb_arch_init(void) */ void kgdb_arch_exit(void) { - unregister_undef_hook(&kgdb_brkpt_hook); - unregister_undef_hook(&kgdb_compiled_brkpt_hook); + unregister_undef_hook(&kgdb_brkpt_arm_hook); + unregister_undef_hook(&kgdb_brkpt_thumb_hook); + unregister_undef_hook(&kgdb_compiled_brkpt_arm_hook); + unregister_undef_hook(&kgdb_compiled_brkpt_thumb_hook); unregister_die_notifier(&kgdb_notifier); } diff --git a/arch/arm/kernel/module.c b/arch/arm/kernel/module.c index beac45e89ba6..d59c36dc0494 100644 --- a/arch/arm/kernel/module.c +++ b/arch/arm/kernel/module.c @@ -68,6 +68,44 @@ bool module_exit_section(const char *name) strstarts(name, ".ARM.exidx.exit"); } +#ifdef CONFIG_ARM_HAS_GROUP_RELOCS +/* + * This implements the partitioning algorithm for group relocations as + * documented in the ARM AArch32 ELF psABI (IHI 0044). + * + * A single PC-relative symbol reference is divided in up to 3 add or subtract + * operations, where the final one could be incorporated into a load/store + * instruction with immediate offset. E.g., + * + * ADD Rd, PC, #... or ADD Rd, PC, #... + * ADD Rd, Rd, #... ADD Rd, Rd, #... + * LDR Rd, [Rd, #...] ADD Rd, Rd, #... + * + * The latter has a guaranteed range of only 16 MiB (3x8 == 24 bits), so it is + * of limited use in the kernel. However, the ADD/ADD/LDR combo has a range of + * -/+ 256 MiB, (2x8 + 12 == 28 bits), which means it has sufficient range for + * any in-kernel symbol reference (unless module PLTs are being used). + * + * The main advantage of this approach over the typical pattern using a literal + * load is that literal loads may miss in the D-cache, and generally lead to + * lower cache efficiency for variables that are referenced often from many + * different places in the code. + */ +static u32 get_group_rem(u32 group, u32 *offset) +{ + u32 val = *offset; + u32 shift; + do { + shift = val ? (31 - __fls(val)) & ~1 : 32; + *offset = val; + if (!val) + break; + val &= 0xffffff >> shift; + } while (group--); + return shift; +} +#endif + int apply_relocate(Elf32_Shdr *sechdrs, const char *strtab, unsigned int symindex, unsigned int relindex, struct module *module) @@ -82,6 +120,9 @@ apply_relocate(Elf32_Shdr *sechdrs, const char *strtab, unsigned int symindex, unsigned long loc; Elf32_Sym *sym; const char *symname; +#ifdef CONFIG_ARM_HAS_GROUP_RELOCS + u32 shift, group = 1; +#endif s32 offset; u32 tmp; #ifdef CONFIG_THUMB2_KERNEL @@ -212,6 +253,55 @@ apply_relocate(Elf32_Shdr *sechdrs, const char *strtab, unsigned int symindex, *(u32 *)loc = __opcode_to_mem_arm(tmp); break; +#ifdef CONFIG_ARM_HAS_GROUP_RELOCS + case R_ARM_ALU_PC_G0_NC: + group = 0; + fallthrough; + case R_ARM_ALU_PC_G1_NC: + tmp = __mem_to_opcode_arm(*(u32 *)loc); + offset = ror32(tmp & 0xff, (tmp & 0xf00) >> 7); + if (tmp & BIT(22)) + offset = -offset; + offset += sym->st_value - loc; + if (offset < 0) { + offset = -offset; + tmp = (tmp & ~BIT(23)) | BIT(22); // SUB opcode + } else { + tmp = (tmp & ~BIT(22)) | BIT(23); // ADD opcode + } + + shift = get_group_rem(group, &offset); + if (shift < 24) { + offset >>= 24 - shift; + offset |= (shift + 8) << 7; + } + *(u32 *)loc = __opcode_to_mem_arm((tmp & ~0xfff) | offset); + break; + + case R_ARM_LDR_PC_G2: + tmp = __mem_to_opcode_arm(*(u32 *)loc); + offset = tmp & 0xfff; + if (~tmp & BIT(23)) // U bit cleared? + offset = -offset; + offset += sym->st_value - loc; + if (offset < 0) { + offset = -offset; + tmp &= ~BIT(23); // clear U bit + } else { + tmp |= BIT(23); // set U bit + } + get_group_rem(2, &offset); + + if (offset > 0xfff) { + pr_err("%s: section %u reloc %u sym '%s': relocation %u out of range (%#lx -> %#x)\n", + module->name, relindex, i, symname, + ELF32_R_TYPE(rel->r_info), loc, + sym->st_value); + return -ENOEXEC; + } + *(u32 *)loc = __opcode_to_mem_arm((tmp & ~0xfff) | offset); + break; +#endif #ifdef CONFIG_THUMB2_KERNEL case R_ARM_THM_CALL: case R_ARM_THM_JUMP24: @@ -369,46 +459,40 @@ int module_finalize(const Elf32_Ehdr *hdr, const Elf_Shdr *sechdrs, #ifdef CONFIG_ARM_UNWIND const char *secstrs = (void *)hdr + sechdrs[hdr->e_shstrndx].sh_offset; const Elf_Shdr *sechdrs_end = sechdrs + hdr->e_shnum; - struct mod_unwind_map maps[ARM_SEC_MAX]; - int i; + struct list_head *unwind_list = &mod->arch.unwind_list; - memset(maps, 0, sizeof(maps)); + INIT_LIST_HEAD(unwind_list); + mod->arch.init_table = NULL; for (s = sechdrs; s < sechdrs_end; s++) { const char *secname = secstrs + s->sh_name; + const char *txtname; + const Elf_Shdr *txt_sec; - if (!(s->sh_flags & SHF_ALLOC)) + if (!(s->sh_flags & SHF_ALLOC) || + s->sh_type != ELF_SECTION_UNWIND) continue; - if (strcmp(".ARM.exidx.init.text", secname) == 0) - maps[ARM_SEC_INIT].unw_sec = s; - else if (strcmp(".ARM.exidx", secname) == 0) - maps[ARM_SEC_CORE].unw_sec = s; - else if (strcmp(".ARM.exidx.exit.text", secname) == 0) - maps[ARM_SEC_EXIT].unw_sec = s; - else if (strcmp(".ARM.exidx.text.unlikely", secname) == 0) - maps[ARM_SEC_UNLIKELY].unw_sec = s; - else if (strcmp(".ARM.exidx.text.hot", secname) == 0) - maps[ARM_SEC_HOT].unw_sec = s; - else if (strcmp(".init.text", secname) == 0) - maps[ARM_SEC_INIT].txt_sec = s; - else if (strcmp(".text", secname) == 0) - maps[ARM_SEC_CORE].txt_sec = s; - else if (strcmp(".exit.text", secname) == 0) - maps[ARM_SEC_EXIT].txt_sec = s; - else if (strcmp(".text.unlikely", secname) == 0) - maps[ARM_SEC_UNLIKELY].txt_sec = s; - else if (strcmp(".text.hot", secname) == 0) - maps[ARM_SEC_HOT].txt_sec = s; - } + if (!strcmp(".ARM.exidx", secname)) + txtname = ".text"; + else + txtname = secname + strlen(".ARM.exidx"); + txt_sec = find_mod_section(hdr, sechdrs, txtname); + + if (txt_sec) { + struct unwind_table *table = + unwind_table_add(s->sh_addr, + s->sh_size, + txt_sec->sh_addr, + txt_sec->sh_size); + + list_add(&table->mod_list, unwind_list); - for (i = 0; i < ARM_SEC_MAX; i++) - if (maps[i].unw_sec && maps[i].txt_sec) - mod->arch.unwind[i] = - unwind_table_add(maps[i].unw_sec->sh_addr, - maps[i].unw_sec->sh_size, - maps[i].txt_sec->sh_addr, - maps[i].txt_sec->sh_size); + /* save init table for module_arch_freeing_init */ + if (strcmp(".ARM.exidx.init.text", secname) == 0) + mod->arch.init_table = table; + } + } #endif #ifdef CONFIG_ARM_PATCH_PHYS_VIRT s = find_mod_section(hdr, sechdrs, ".pv_table"); @@ -429,19 +513,27 @@ void module_arch_cleanup(struct module *mod) { #ifdef CONFIG_ARM_UNWIND - int i; + struct unwind_table *tmp; + struct unwind_table *n; - for (i = 0; i < ARM_SEC_MAX; i++) { - unwind_table_del(mod->arch.unwind[i]); - mod->arch.unwind[i] = NULL; + list_for_each_entry_safe(tmp, n, + &mod->arch.unwind_list, mod_list) { + list_del(&tmp->mod_list); + unwind_table_del(tmp); } + mod->arch.init_table = NULL; #endif } void __weak module_arch_freeing_init(struct module *mod) { #ifdef CONFIG_ARM_UNWIND - unwind_table_del(mod->arch.unwind[ARM_SEC_INIT]); - mod->arch.unwind[ARM_SEC_INIT] = NULL; + struct unwind_table *init = mod->arch.init_table; + + if (init) { + mod->arch.init_table = NULL; + list_del(&init->mod_list); + unwind_table_del(init); + } #endif } diff --git a/arch/arm/kernel/perf_callchain.c b/arch/arm/kernel/perf_callchain.c index 3b69a76d341e..bc6b246ab55e 100644 --- a/arch/arm/kernel/perf_callchain.c +++ b/arch/arm/kernel/perf_callchain.c @@ -64,11 +64,6 @@ perf_callchain_user(struct perf_callchain_entry_ctx *entry, struct pt_regs *regs { struct frame_tail __user *tail; - if (perf_guest_cbs && perf_guest_cbs->is_in_guest()) { - /* We don't support guest os callchain now */ - return; - } - perf_callchain_store(entry, regs->ARM_pc); if (!current->mm) @@ -100,20 +95,12 @@ perf_callchain_kernel(struct perf_callchain_entry_ctx *entry, struct pt_regs *re { struct stackframe fr; - if (perf_guest_cbs && perf_guest_cbs->is_in_guest()) { - /* We don't support guest os callchain now */ - return; - } - arm_get_current_stackframe(regs, &fr); walk_stackframe(&fr, callchain_trace, entry); } unsigned long perf_instruction_pointer(struct pt_regs *regs) { - if (perf_guest_cbs && perf_guest_cbs->is_in_guest()) - return perf_guest_cbs->get_guest_ip(); - return instruction_pointer(regs); } @@ -121,17 +108,10 @@ unsigned long perf_misc_flags(struct pt_regs *regs) { int misc = 0; - if (perf_guest_cbs && perf_guest_cbs->is_in_guest()) { - if (perf_guest_cbs->is_user_mode()) - misc |= PERF_RECORD_MISC_GUEST_USER; - else - misc |= PERF_RECORD_MISC_GUEST_KERNEL; - } else { - if (user_mode(regs)) - misc |= PERF_RECORD_MISC_USER; - else - misc |= PERF_RECORD_MISC_KERNEL; - } + if (user_mode(regs)) + misc |= PERF_RECORD_MISC_USER; + else + misc |= PERF_RECORD_MISC_KERNEL; return misc; } diff --git a/arch/arm/kernel/process.c b/arch/arm/kernel/process.c index d47159f3791c..a2b31d91a1b6 100644 --- a/arch/arm/kernel/process.c +++ b/arch/arm/kernel/process.c @@ -36,7 +36,7 @@ #include "signal.h" -#ifdef CONFIG_CURRENT_POINTER_IN_TPIDRURO +#if defined(CONFIG_CURRENT_POINTER_IN_TPIDRURO) || defined(CONFIG_SMP) DEFINE_PER_CPU(struct task_struct *, __entry_task); #endif @@ -46,6 +46,11 @@ unsigned long __stack_chk_guard __read_mostly; EXPORT_SYMBOL(__stack_chk_guard); #endif +#ifndef CONFIG_CURRENT_POINTER_IN_TPIDRURO +asmlinkage struct task_struct *__current; +EXPORT_SYMBOL(__current); +#endif + static const char *processor_modes[] __maybe_unused = { "USER_26", "FIQ_26" , "IRQ_26" , "SVC_26" , "UK4_26" , "UK5_26" , "UK6_26" , "UK7_26" , "UK8_26" , "UK9_26" , "UK10_26", "UK11_26", "UK12_26", "UK13_26", "UK14_26", "UK15_26", @@ -196,7 +201,7 @@ void __show_regs(struct pt_regs *regs) void show_regs(struct pt_regs * regs) { __show_regs(regs); - dump_stack(); + dump_backtrace(regs, NULL, KERN_DEFAULT); } ATOMIC_NOTIFIER_HEAD(thread_notify_head); @@ -227,15 +232,13 @@ void flush_thread(void) thread_notify(THREAD_NOTIFY_FLUSH, thread); } -void release_thread(struct task_struct *dead_task) -{ -} - asmlinkage void ret_from_fork(void) __asm__("ret_from_fork"); -int copy_thread(unsigned long clone_flags, unsigned long stack_start, - unsigned long stk_sz, struct task_struct *p, unsigned long tls) +int copy_thread(struct task_struct *p, const struct kernel_clone_args *args) { + unsigned long clone_flags = args->flags; + unsigned long stack_start = args->stack; + unsigned long tls = args->tls; struct thread_info *thread = task_thread_info(p); struct pt_regs *childregs = task_pt_regs(p); @@ -251,15 +254,15 @@ int copy_thread(unsigned long clone_flags, unsigned long stack_start, thread->cpu_domain = get_domain(); #endif - if (likely(!(p->flags & (PF_KTHREAD | PF_IO_WORKER)))) { + if (likely(!args->fn)) { *childregs = *current_pt_regs(); childregs->ARM_r0 = 0; if (stack_start) childregs->ARM_sp = stack_start; } else { memset(childregs, 0, sizeof(struct pt_regs)); - thread->cpu_context.r4 = stk_sz; - thread->cpu_context.r5 = stack_start; + thread->cpu_context.r4 = (unsigned long)args->fn_arg; + thread->cpu_context.r5 = (unsigned long)args->fn; childregs->ARM_cpsr = SVC_MODE; } thread->cpu_context.pc = (unsigned long)ret_from_fork; @@ -368,7 +371,7 @@ static unsigned long sigpage_addr(const struct mm_struct *mm, slots = ((last - first) >> PAGE_SHIFT) + 1; - offset = get_random_int() % slots; + offset = prandom_u32_max(slots); addr = first + (offset << PAGE_SHIFT); diff --git a/arch/arm/kernel/ptrace.c b/arch/arm/kernel/ptrace.c index 43b963ea4a0e..bfe88c6e60d5 100644 --- a/arch/arm/kernel/ptrace.c +++ b/arch/arm/kernel/ptrace.c @@ -22,7 +22,6 @@ #include <linux/hw_breakpoint.h> #include <linux/regset.h> #include <linux/audit.h> -#include <linux/tracehook.h> #include <linux/unistd.h> #include <asm/syscall.h> @@ -831,8 +830,7 @@ enum ptrace_syscall_dir { PTRACE_SYSCALL_EXIT, }; -static void tracehook_report_syscall(struct pt_regs *regs, - enum ptrace_syscall_dir dir) +static void report_syscall(struct pt_regs *regs, enum ptrace_syscall_dir dir) { unsigned long ip; @@ -844,8 +842,8 @@ static void tracehook_report_syscall(struct pt_regs *regs, regs->ARM_ip = dir; if (dir == PTRACE_SYSCALL_EXIT) - tracehook_report_syscall_exit(regs, 0); - else if (tracehook_report_syscall_entry(regs)) + ptrace_report_syscall_exit(regs, 0); + else if (ptrace_report_syscall_entry(regs)) current_thread_info()->abi_syscall = -1; regs->ARM_ip = ip; @@ -856,7 +854,7 @@ asmlinkage int syscall_trace_enter(struct pt_regs *regs) int scno; if (test_thread_flag(TIF_SYSCALL_TRACE)) - tracehook_report_syscall(regs, PTRACE_SYSCALL_ENTER); + report_syscall(regs, PTRACE_SYSCALL_ENTER); /* Do seccomp after ptrace; syscall may have changed. */ #ifdef CONFIG_HAVE_ARCH_SECCOMP_FILTER @@ -897,5 +895,5 @@ asmlinkage void syscall_trace_exit(struct pt_regs *regs) trace_sys_exit(regs, regs_return_value(regs)); if (test_thread_flag(TIF_SYSCALL_TRACE)) - tracehook_report_syscall(regs, PTRACE_SYSCALL_EXIT); + report_syscall(regs, PTRACE_SYSCALL_EXIT); } diff --git a/arch/arm/kernel/reboot.c b/arch/arm/kernel/reboot.c index 3044fcb8d073..3f0d5c3dae11 100644 --- a/arch/arm/kernel/reboot.c +++ b/arch/arm/kernel/reboot.c @@ -10,6 +10,7 @@ #include <asm/cacheflush.h> #include <asm/idmap.h> #include <asm/virt.h> +#include <asm/system_misc.h> #include "reboot.h" @@ -116,9 +117,7 @@ void machine_power_off(void) { local_irq_disable(); smp_send_stop(); - - if (pm_power_off) - pm_power_off(); + do_kernel_power_off(); } /* diff --git a/arch/arm/kernel/return_address.c b/arch/arm/kernel/return_address.c index 00c11579406c..38f1ea9c724d 100644 --- a/arch/arm/kernel/return_address.c +++ b/arch/arm/kernel/return_address.c @@ -41,11 +41,13 @@ void *return_address(unsigned int level) frame.fp = (unsigned long)__builtin_frame_address(0); frame.sp = current_stack_pointer; frame.lr = (unsigned long)__builtin_return_address(0); - frame.pc = (unsigned long)return_address; +here: + frame.pc = (unsigned long)&&here; #ifdef CONFIG_KRETPROBES frame.kr_cur = NULL; frame.tsk = current; #endif + frame.ex_frame = false; walk_stackframe(&frame, save_return_addr, &data); diff --git a/arch/arm/kernel/setup.c b/arch/arm/kernel/setup.c index 284a80c0b6e1..cb88c6e69377 100644 --- a/arch/arm/kernel/setup.c +++ b/arch/arm/kernel/setup.c @@ -141,10 +141,10 @@ EXPORT_SYMBOL(outer_cache); int __cpu_architecture __read_mostly = CPU_ARCH_UNKNOWN; struct stack { - u32 irq[3]; - u32 abt[3]; - u32 und[3]; - u32 fiq[3]; + u32 irq[4]; + u32 abt[4]; + u32 und[4]; + u32 fiq[4]; } ____cacheline_aligned; #ifndef CONFIG_CPU_V7M @@ -1004,7 +1004,8 @@ static void __init reserve_crashkernel(void) total_mem = get_total_mem(); ret = parse_crashkernel(boot_command_line, total_mem, &crash_size, &crash_base); - if (ret) + /* invalid value specified or crashkernel=0 */ + if (ret || !crash_size) return; if (crash_base <= 0) { @@ -1140,7 +1141,7 @@ void __init setup_arch(char **cmdline_p) #endif setup_dma_zone(mdesc); xen_early_init(); - efi_init(); + arm_efi_init(); /* * Make sure the calculation for lowmem/highmem is set appropriately * before reserving/allocating any memory diff --git a/arch/arm/kernel/signal.c b/arch/arm/kernel/signal.c index a41e27ace391..e07f359254c3 100644 --- a/arch/arm/kernel/signal.c +++ b/arch/arm/kernel/signal.c @@ -9,7 +9,7 @@ #include <linux/signal.h> #include <linux/personality.h> #include <linux/uaccess.h> -#include <linux/tracehook.h> +#include <linux/resume_user_mode.h> #include <linux/uprobes.h> #include <linux/syscalls.h> @@ -627,11 +627,11 @@ do_work_pending(struct pt_regs *regs, unsigned int thread_flags, int syscall) } else if (thread_flags & _TIF_UPROBE) { uprobe_notify_resume(regs); } else { - tracehook_notify_resume(regs); + resume_user_mode_work(regs); } } local_irq_disable(); - thread_flags = current_thread_info()->flags; + thread_flags = read_thread_flags(); } while (thread_flags & _TIF_WORK_MASK); return 0; } @@ -655,7 +655,7 @@ struct page *get_signal_page(void) PAGE_SIZE / sizeof(u32)); /* Give the signal return code some randomness */ - offset = 0x200 + (get_random_int() & 0x7fc); + offset = 0x200 + (get_random_u16() & 0x7fc); signal_return_offset = offset; /* Copy signal return handlers into the page */ @@ -708,6 +708,7 @@ static_assert(offsetof(siginfo_t, si_upper) == 0x18); static_assert(offsetof(siginfo_t, si_pkey) == 0x14); static_assert(offsetof(siginfo_t, si_perf_data) == 0x10); static_assert(offsetof(siginfo_t, si_perf_type) == 0x14); +static_assert(offsetof(siginfo_t, si_perf_flags) == 0x18); static_assert(offsetof(siginfo_t, si_band) == 0x0c); static_assert(offsetof(siginfo_t, si_fd) == 0x10); static_assert(offsetof(siginfo_t, si_call_addr) == 0x0c); diff --git a/arch/arm/kernel/sleep.S b/arch/arm/kernel/sleep.S index 43077e11dafd..a86a1d4f3461 100644 --- a/arch/arm/kernel/sleep.S +++ b/arch/arm/kernel/sleep.S @@ -67,6 +67,12 @@ ENTRY(__cpu_suspend) ldr r4, =cpu_suspend_size #endif mov r5, sp @ current virtual SP +#ifdef CONFIG_VMAP_STACK + @ Run the suspend code from the overflow stack so we don't have to rely + @ on vmalloc-to-phys conversions anywhere in the arch suspend code. + @ The original SP value captured in R5 will be restored on the way out. + ldr_this_cpu sp, overflow_stack_ptr, r6, r7 +#endif add r4, r4, #12 @ Space for pgd, virt sp, phys resume fn sub sp, sp, r4 @ allocate CPU state on stack ldr r3, =sleep_save_sp @@ -113,6 +119,13 @@ ENTRY(cpu_resume_mmu) ENDPROC(cpu_resume_mmu) .popsection cpu_resume_after_mmu: +#if defined(CONFIG_VMAP_STACK) && !defined(CONFIG_ARM_LPAE) + @ Before using the vmap'ed stack, we have to switch to swapper_pg_dir + @ as the ID map does not cover the vmalloc region. + mrc p15, 0, ip, c2, c0, 1 @ read TTBR1 + mcr p15, 0, ip, c2, c0, 0 @ set TTBR0 + instr_sync +#endif bl cpu_init @ restore the und/abt/irq banked regs mov r0, #0 @ return zero on success ldmfd sp!, {r4 - r11, pc} diff --git a/arch/arm/kernel/smp.c b/arch/arm/kernel/smp.c index cde5b6d8bac5..978db2d96b44 100644 --- a/arch/arm/kernel/smp.c +++ b/arch/arm/kernel/smp.c @@ -154,9 +154,6 @@ int __cpu_up(unsigned int cpu, struct task_struct *idle) secondary_data.swapper_pg_dir = get_arch_pgd(swapper_pg_dir); #endif secondary_data.task = idle; - if (IS_ENABLED(CONFIG_THREAD_INFO_IN_TASK)) - task_thread_info(idle)->cpu = cpu; - sync_cache_w(&secondary_data); /* @@ -403,6 +400,12 @@ static void smp_store_cpu_info(unsigned int cpuid) check_cpu_icache_size(cpuid); } +static void set_current(struct task_struct *cur) +{ + /* Set TPIDRURO */ + asm("mcr p15, 0, %0, c13, c0, 3" :: "r"(cur) : "memory"); +} + /* * This is the secondary CPU boot entry. We're using this CPUs * idle thread stack, but a set of temporary page tables. @@ -631,11 +634,6 @@ static void ipi_complete(unsigned int cpu) /* * Main handler for inter-processor interrupts */ -asmlinkage void __exception_irq_entry do_IPI(int ipinr, struct pt_regs *regs) -{ - handle_IPI(ipinr, regs); -} - static void do_handle_IPI(int ipinr) { unsigned int cpu = smp_processor_id(); @@ -789,14 +787,6 @@ void panic_smp_self_stop(void) cpu_relax(); } -/* - * not supported here - */ -int setup_profiling_timer(unsigned int multiplier) -{ - return -EINVAL; -} - #ifdef CONFIG_CPU_FREQ static DEFINE_PER_CPU(unsigned long, l_p_j_ref); diff --git a/arch/arm/kernel/spectre.c b/arch/arm/kernel/spectre.c new file mode 100644 index 000000000000..0dcefc36fb7a --- /dev/null +++ b/arch/arm/kernel/spectre.c @@ -0,0 +1,71 @@ +// SPDX-License-Identifier: GPL-2.0-only +#include <linux/bpf.h> +#include <linux/cpu.h> +#include <linux/device.h> + +#include <asm/spectre.h> + +static bool _unprivileged_ebpf_enabled(void) +{ +#ifdef CONFIG_BPF_SYSCALL + return !sysctl_unprivileged_bpf_disabled; +#else + return false; +#endif +} + +ssize_t cpu_show_spectre_v1(struct device *dev, struct device_attribute *attr, + char *buf) +{ + return sprintf(buf, "Mitigation: __user pointer sanitization\n"); +} + +static unsigned int spectre_v2_state; +static unsigned int spectre_v2_methods; + +void spectre_v2_update_state(unsigned int state, unsigned int method) +{ + if (state > spectre_v2_state) + spectre_v2_state = state; + spectre_v2_methods |= method; +} + +ssize_t cpu_show_spectre_v2(struct device *dev, struct device_attribute *attr, + char *buf) +{ + const char *method; + + if (spectre_v2_state == SPECTRE_UNAFFECTED) + return sprintf(buf, "%s\n", "Not affected"); + + if (spectre_v2_state != SPECTRE_MITIGATED) + return sprintf(buf, "%s\n", "Vulnerable"); + + if (_unprivileged_ebpf_enabled()) + return sprintf(buf, "Vulnerable: Unprivileged eBPF enabled\n"); + + switch (spectre_v2_methods) { + case SPECTRE_V2_METHOD_BPIALL: + method = "Branch predictor hardening"; + break; + + case SPECTRE_V2_METHOD_ICIALLU: + method = "I-cache invalidation"; + break; + + case SPECTRE_V2_METHOD_SMC: + case SPECTRE_V2_METHOD_HVC: + method = "Firmware call"; + break; + + case SPECTRE_V2_METHOD_LOOP8: + method = "History overwrite"; + break; + + default: + method = "Multiple mitigations"; + break; + } + + return sprintf(buf, "Mitigation: %s\n", method); +} diff --git a/arch/arm/kernel/stacktrace.c b/arch/arm/kernel/stacktrace.c index 75e905508f27..85443b5d1922 100644 --- a/arch/arm/kernel/stacktrace.c +++ b/arch/arm/kernel/stacktrace.c @@ -9,6 +9,8 @@ #include <asm/stacktrace.h> #include <asm/traps.h> +#include "reboot.h" + #if defined(CONFIG_FRAME_POINTER) && !defined(CONFIG_ARM_UNWIND) /* * Unwind the current stack frame and store the new register values in the @@ -39,32 +41,77 @@ * Note that with framepointer enabled, even the leaf functions have the same * prologue and epilogue, therefore we can ignore the LR value in this case. */ -int notrace unwind_frame(struct stackframe *frame) + +extern unsigned long call_with_stack_end; + +static int frame_pointer_check(struct stackframe *frame) { unsigned long high, low; unsigned long fp = frame->fp; + unsigned long pc = frame->pc; + + /* + * call_with_stack() is the only place we allow SP to jump from one + * stack to another, with FP and SP pointing to different stacks, + * skipping the FP boundary check at this point. + */ + if (pc >= (unsigned long)&call_with_stack && + pc < (unsigned long)&call_with_stack_end) + return 0; /* only go to a higher address on the stack */ low = frame->sp; high = ALIGN(low, THREAD_SIZE); -#ifdef CONFIG_CC_IS_CLANG /* check current frame pointer is within bounds */ +#ifdef CONFIG_CC_IS_CLANG if (fp < low + 4 || fp > high - 4) return -EINVAL; - - frame->sp = frame->fp; - frame->fp = *(unsigned long *)(fp); - frame->pc = *(unsigned long *)(fp + 4); #else - /* check current frame pointer is within bounds */ if (fp < low + 12 || fp > high - 4) return -EINVAL; +#endif + + return 0; +} + +int notrace unwind_frame(struct stackframe *frame) +{ + unsigned long fp = frame->fp; + + if (frame_pointer_check(frame)) + return -EINVAL; + + /* + * When we unwind through an exception stack, include the saved PC + * value into the stack trace. + */ + if (frame->ex_frame) { + struct pt_regs *regs = (struct pt_regs *)frame->sp; + + /* + * We check that 'regs + sizeof(struct pt_regs)' (that is, + * ®s[1]) does not exceed the bottom of the stack to avoid + * accessing data outside the task's stack. This may happen + * when frame->ex_frame is a false positive. + */ + if ((unsigned long)®s[1] > ALIGN(frame->sp, THREAD_SIZE)) + return -EINVAL; + + frame->pc = regs->ARM_pc; + frame->ex_frame = false; + return 0; + } /* restore the registers from the stack frame */ - frame->fp = *(unsigned long *)(fp - 12); - frame->sp = *(unsigned long *)(fp - 8); - frame->pc = *(unsigned long *)(fp - 4); +#ifdef CONFIG_CC_IS_CLANG + frame->sp = frame->fp; + frame->fp = READ_ONCE_NOCHECK(*(unsigned long *)(fp)); + frame->pc = READ_ONCE_NOCHECK(*(unsigned long *)(fp + 4)); +#else + frame->fp = READ_ONCE_NOCHECK(*(unsigned long *)(fp - 12)); + frame->sp = READ_ONCE_NOCHECK(*(unsigned long *)(fp - 8)); + frame->pc = READ_ONCE_NOCHECK(*(unsigned long *)(fp - 4)); #endif #ifdef CONFIG_KRETPROBES if (is_kretprobe_trampoline(frame->pc)) @@ -72,6 +119,9 @@ int notrace unwind_frame(struct stackframe *frame) (void *)frame->fp, &frame->kr_cur); #endif + if (in_entry_text(frame->pc)) + frame->ex_frame = true; + return 0; } #endif @@ -102,7 +152,6 @@ static int save_trace(struct stackframe *frame, void *d) { struct stack_trace_data *data = d; struct stack_trace *trace = data->trace; - struct pt_regs *regs; unsigned long addr = frame->pc; if (data->no_sched_functions && in_sched_functions(addr)) @@ -113,19 +162,6 @@ static int save_trace(struct stackframe *frame, void *d) } trace->entries[trace->nr_entries++] = addr; - - if (trace->nr_entries >= trace->max_entries) - return 1; - - if (!in_entry_text(frame->pc)) - return 0; - - regs = (struct pt_regs *)frame->sp; - if ((unsigned long)®s[1] > ALIGN(frame->sp, THREAD_SIZE)) - return 0; - - trace->entries[trace->nr_entries++] = regs->ARM_pc; - return trace->nr_entries >= trace->max_entries; } @@ -160,12 +196,16 @@ static noinline void __save_stack_trace(struct task_struct *tsk, frame.fp = (unsigned long)__builtin_frame_address(0); frame.sp = current_stack_pointer; frame.lr = (unsigned long)__builtin_return_address(0); - frame.pc = (unsigned long)__save_stack_trace; +here: + frame.pc = (unsigned long)&&here; } #ifdef CONFIG_KRETPROBES frame.kr_cur = NULL; frame.tsk = tsk; #endif +#ifdef CONFIG_UNWINDER_FRAME_POINTER + frame.ex_frame = false; +#endif walk_stackframe(&frame, save_trace, &data); } @@ -187,6 +227,9 @@ void save_stack_trace_regs(struct pt_regs *regs, struct stack_trace *trace) frame.kr_cur = NULL; frame.tsk = current; #endif +#ifdef CONFIG_UNWINDER_FRAME_POINTER + frame.ex_frame = in_entry_text(frame.pc); +#endif walk_stackframe(&frame, save_trace, &data); } diff --git a/arch/arm/kernel/swp_emulate.c b/arch/arm/kernel/swp_emulate.c index 6166ba38bf99..b74bfcf94fb1 100644 --- a/arch/arm/kernel/swp_emulate.c +++ b/arch/arm/kernel/swp_emulate.c @@ -195,7 +195,7 @@ static int swp_handler(struct pt_regs *regs, unsigned int instr) destreg, EXTRACT_REG_NUM(instr, RT2_OFFSET), data); /* Check access in reasonable access range for both SWP and SWPB */ - if (!access_ok((address & ~3), 4)) { + if (!access_ok((void __user *)(address & ~3), 4)) { pr_debug("SWP{B} emulation: access to %p not allowed!\n", (void *)address); res = -EFAULT; diff --git a/arch/arm/kernel/traps.c b/arch/arm/kernel/traps.c index 195dff58bafc..20b2db6dcd1c 100644 --- a/arch/arm/kernel/traps.c +++ b/arch/arm/kernel/traps.c @@ -30,11 +30,13 @@ #include <linux/atomic.h> #include <asm/cacheflush.h> #include <asm/exception.h> +#include <asm/spectre.h> #include <asm/unistd.h> #include <asm/traps.h> #include <asm/ptrace.h> #include <asm/unwind.h> #include <asm/tls.h> +#include <asm/stacktrace.h> #include <asm/system_misc.h> #include <asm/opcodes.h> @@ -60,19 +62,32 @@ static int __init user_debug_setup(char *str) __setup("user_debug=", user_debug_setup); #endif -static void dump_mem(const char *, const char *, unsigned long, unsigned long); - void dump_backtrace_entry(unsigned long where, unsigned long from, unsigned long frame, const char *loglvl) { unsigned long end = frame + 4 + sizeof(struct pt_regs); -#ifdef CONFIG_KALLSYMS + if (IS_ENABLED(CONFIG_UNWINDER_FRAME_POINTER) && + IS_ENABLED(CONFIG_CC_IS_GCC) && + end > ALIGN(frame, THREAD_SIZE)) { + /* + * If we are walking past the end of the stack, it may be due + * to the fact that we are on an IRQ or overflow stack. In this + * case, we can load the address of the other stack from the + * frame record. + */ + frame = ((unsigned long *)frame)[-2] - 4; + end = frame + 4 + sizeof(struct pt_regs); + } + +#ifndef CONFIG_KALLSYMS + printk("%sFunction entered at [<%08lx>] from [<%08lx>]\n", + loglvl, where, from); +#elif defined CONFIG_BACKTRACE_VERBOSE printk("%s[<%08lx>] (%ps) from [<%08lx>] (%pS)\n", loglvl, where, (void *)where, from, (void *)from); #else - printk("%sFunction entered at [<%08lx>] from [<%08lx>]\n", - loglvl, where, from); + printk("%s %ps from %pS\n", loglvl, (void *)where, (void *)from); #endif if (in_entry_text(from) && end <= ALIGN(frame, THREAD_SIZE)) @@ -108,7 +123,8 @@ void dump_backtrace_stm(u32 *stack, u32 instruction, const char *loglvl) static int verify_stack(unsigned long sp) { if (sp < PAGE_OFFSET || - (sp > (unsigned long)high_memory && high_memory != NULL)) + (!IS_ENABLED(CONFIG_VMAP_STACK) && + sp > (unsigned long)high_memory && high_memory != NULL)) return -EFAULT; return 0; @@ -118,8 +134,8 @@ static int verify_stack(unsigned long sp) /* * Dump out the contents of some memory nicely... */ -static void dump_mem(const char *lvl, const char *str, unsigned long bottom, - unsigned long top) +void dump_mem(const char *lvl, const char *str, unsigned long bottom, + unsigned long top) { unsigned long first; int i; @@ -189,14 +205,14 @@ static void dump_instr(const char *lvl, struct pt_regs *regs) } #ifdef CONFIG_ARM_UNWIND -static inline void dump_backtrace(struct pt_regs *regs, struct task_struct *tsk, - const char *loglvl) +void dump_backtrace(struct pt_regs *regs, struct task_struct *tsk, + const char *loglvl) { unwind_backtrace(regs, tsk, loglvl); } #else -static void dump_backtrace(struct pt_regs *regs, struct task_struct *tsk, - const char *loglvl) +void dump_backtrace(struct pt_regs *regs, struct task_struct *tsk, + const char *loglvl) { unsigned int fp, mode; int ok = 1; @@ -278,7 +294,8 @@ static int __die(const char *str, int err, struct pt_regs *regs) if (!user_mode(regs) || in_interrupt()) { dump_mem(KERN_EMERG, "Stack: ", regs->ARM_sp, - THREAD_SIZE + (unsigned long)task_stack_page(tsk)); + ALIGN(regs->ARM_sp - THREAD_SIZE, THREAD_ALIGN) + + THREAD_SIZE); dump_backtrace(regs, tsk, KERN_EMERG); dump_instr(KERN_EMERG, regs); } @@ -333,7 +350,7 @@ static void oops_end(unsigned long flags, struct pt_regs *regs, int signr) if (panic_on_oops) panic("Fatal exception"); if (signr) - do_exit(signr); + make_task_dead(signr); } /* @@ -470,7 +487,7 @@ asmlinkage void do_undefinstr(struct pt_regs *regs) die_sig: #ifdef CONFIG_DEBUG_USER if (user_debug & UDBG_UNDEFINED) { - pr_info("%s (%d): undefined instruction: pc=%p\n", + pr_info("%s (%d): undefined instruction: pc=%px\n", current->comm, task_pid_nr(current), pc); __show_regs(regs); dump_instr(KERN_INFO, regs); @@ -574,7 +591,7 @@ do_cache_op(unsigned long start, unsigned long end, int flags) if (end < start || flags) return -EINVAL; - if (!access_ok(start, end - start)) + if (!access_ok((void __user *)start, end - start)) return -EFAULT; return __do_cache_op(start, end); @@ -787,10 +804,59 @@ static inline void __init kuser_init(void *vectors) } #endif +#ifndef CONFIG_CPU_V7M +static void copy_from_lma(void *vma, void *lma_start, void *lma_end) +{ + memcpy(vma, lma_start, lma_end - lma_start); +} + +static void flush_vectors(void *vma, size_t offset, size_t size) +{ + unsigned long start = (unsigned long)vma + offset; + unsigned long end = start + size; + + flush_icache_range(start, end); +} + +#ifdef CONFIG_HARDEN_BRANCH_HISTORY +int spectre_bhb_update_vectors(unsigned int method) +{ + extern char __vectors_bhb_bpiall_start[], __vectors_bhb_bpiall_end[]; + extern char __vectors_bhb_loop8_start[], __vectors_bhb_loop8_end[]; + void *vec_start, *vec_end; + + if (system_state >= SYSTEM_FREEING_INITMEM) { + pr_err("CPU%u: Spectre BHB workaround too late - system vulnerable\n", + smp_processor_id()); + return SPECTRE_VULNERABLE; + } + + switch (method) { + case SPECTRE_V2_METHOD_LOOP8: + vec_start = __vectors_bhb_loop8_start; + vec_end = __vectors_bhb_loop8_end; + break; + + case SPECTRE_V2_METHOD_BPIALL: + vec_start = __vectors_bhb_bpiall_start; + vec_end = __vectors_bhb_bpiall_end; + break; + + default: + pr_err("CPU%u: unknown Spectre BHB state %d\n", + smp_processor_id(), method); + return SPECTRE_VULNERABLE; + } + + copy_from_lma(vectors_page, vec_start, vec_end); + flush_vectors(vectors_page, 0, vec_end - vec_start); + + return SPECTRE_MITIGATED; +} +#endif + void __init early_trap_init(void *vectors_base) { -#ifndef CONFIG_CPU_V7M - unsigned long vectors = (unsigned long)vectors_base; extern char __stubs_start[], __stubs_end[]; extern char __vectors_start[], __vectors_end[]; unsigned i; @@ -811,17 +877,87 @@ void __init early_trap_init(void *vectors_base) * into the vector page, mapped at 0xffff0000, and ensure these * are visible to the instruction stream. */ - memcpy((void *)vectors, __vectors_start, __vectors_end - __vectors_start); - memcpy((void *)vectors + 0x1000, __stubs_start, __stubs_end - __stubs_start); + copy_from_lma(vectors_base, __vectors_start, __vectors_end); + copy_from_lma(vectors_base + 0x1000, __stubs_start, __stubs_end); kuser_init(vectors_base); - flush_icache_range(vectors, vectors + PAGE_SIZE * 2); + flush_vectors(vectors_base, 0, PAGE_SIZE * 2); +} #else /* ifndef CONFIG_CPU_V7M */ +void __init early_trap_init(void *vectors_base) +{ /* * on V7-M there is no need to copy the vector table to a dedicated * memory area. The address is configurable and so a table in the kernel * image can be used. */ +} +#endif + +#ifdef CONFIG_VMAP_STACK + +DECLARE_PER_CPU(u8 *, irq_stack_ptr); + +asmlinkage DEFINE_PER_CPU(u8 *, overflow_stack_ptr); + +static int __init allocate_overflow_stacks(void) +{ + u8 *stack; + int cpu; + + for_each_possible_cpu(cpu) { + stack = (u8 *)__get_free_page(GFP_KERNEL); + if (WARN_ON(!stack)) + return -ENOMEM; + per_cpu(overflow_stack_ptr, cpu) = &stack[OVERFLOW_STACK_SIZE]; + } + return 0; +} +early_initcall(allocate_overflow_stacks); + +asmlinkage void handle_bad_stack(struct pt_regs *regs) +{ + unsigned long tsk_stk = (unsigned long)current->stack; +#ifdef CONFIG_IRQSTACKS + unsigned long irq_stk = (unsigned long)raw_cpu_read(irq_stack_ptr); +#endif + unsigned long ovf_stk = (unsigned long)raw_cpu_read(overflow_stack_ptr); + + console_verbose(); + pr_emerg("Insufficient stack space to handle exception!"); + + pr_emerg("Task stack: [0x%08lx..0x%08lx]\n", + tsk_stk, tsk_stk + THREAD_SIZE); +#ifdef CONFIG_IRQSTACKS + pr_emerg("IRQ stack: [0x%08lx..0x%08lx]\n", + irq_stk - THREAD_SIZE, irq_stk); #endif + pr_emerg("Overflow stack: [0x%08lx..0x%08lx]\n", + ovf_stk - OVERFLOW_STACK_SIZE, ovf_stk); + + die("kernel stack overflow", regs, 0); } + +#ifndef CONFIG_ARM_LPAE +/* + * Normally, we rely on the logic in do_translation_fault() to update stale PMD + * entries covering the vmalloc space in a task's page tables when it first + * accesses the region in question. Unfortunately, this is not sufficient when + * the task stack resides in the vmalloc region, as do_translation_fault() is a + * C function that needs a stack to run. + * + * So we need to ensure that these PMD entries are up to date *before* the MM + * switch. As we already have some logic in the MM switch path that takes care + * of this, let's trigger it by bumping the counter every time the core vmalloc + * code modifies a PMD entry in the vmalloc region. Use release semantics on + * the store so that other CPUs observing the counter's new value are + * guaranteed to see the updated page table entries as well. + */ +void arch_sync_kernel_mappings(unsigned long start, unsigned long end) +{ + if (start < VMALLOC_END && end > VMALLOC_START) + atomic_inc_return_release(&init_mm.context.vmalloc_seq); +} +#endif +#endif diff --git a/arch/arm/kernel/unwind.c b/arch/arm/kernel/unwind.c index 59fdf257bf8b..a37ea6c772cd 100644 --- a/arch/arm/kernel/unwind.c +++ b/arch/arm/kernel/unwind.c @@ -33,6 +33,8 @@ #include <asm/traps.h> #include <asm/unwind.h> +#include "reboot.h" + /* Dummy functions to avoid linker complaints */ void __aeabi_unwind_cpp_pr0(void) { @@ -53,6 +55,7 @@ struct unwind_ctrl_block { unsigned long vrs[16]; /* virtual register set */ const unsigned long *insn; /* pointer to the current instructions word */ unsigned long sp_high; /* highest value of sp allowed */ + unsigned long *lr_addr; /* address of LR value on the stack */ /* * 1 : check for stack overflow for each register pop. * 0 : save overhead if there is plenty of stack remaining. @@ -237,6 +240,8 @@ static int unwind_pop_register(struct unwind_ctrl_block *ctrl, * from being tracked by KASAN. */ ctrl->vrs[reg] = READ_ONCE_NOCHECK(*(*vsp)); + if (reg == 14) + ctrl->lr_addr = *vsp; (*vsp)++; return URC_OK; } @@ -256,8 +261,9 @@ static int unwind_exec_pop_subset_r4_to_r13(struct unwind_ctrl_block *ctrl, mask >>= 1; reg++; } - if (!load_sp) + if (!load_sp) { ctrl->vrs[SP] = (unsigned long)vsp; + } return URC_OK; } @@ -313,9 +319,9 @@ static int unwind_exec_insn(struct unwind_ctrl_block *ctrl) if ((insn & 0xc0) == 0x00) ctrl->vrs[SP] += ((insn & 0x3f) << 2) + 4; - else if ((insn & 0xc0) == 0x40) + else if ((insn & 0xc0) == 0x40) { ctrl->vrs[SP] -= ((insn & 0x3f) << 2) + 4; - else if ((insn & 0xf0) == 0x80) { + } else if ((insn & 0xf0) == 0x80) { unsigned long mask; insn = (insn << 8) | unwind_get_byte(ctrl); @@ -330,9 +336,9 @@ static int unwind_exec_insn(struct unwind_ctrl_block *ctrl) if (ret) goto error; } else if ((insn & 0xf0) == 0x90 && - (insn & 0x0d) != 0x0d) + (insn & 0x0d) != 0x0d) { ctrl->vrs[SP] = ctrl->vrs[insn & 0x0f]; - else if ((insn & 0xf0) == 0xa0) { + } else if ((insn & 0xf0) == 0xa0) { ret = unwind_exec_pop_r4_to_rN(ctrl, insn); if (ret) goto error; @@ -375,23 +381,22 @@ error: */ int unwind_frame(struct stackframe *frame) { - unsigned long low; const struct unwind_idx *idx; struct unwind_ctrl_block ctrl; + unsigned long sp_low; /* store the highest address on the stack to avoid crossing it*/ - low = frame->sp; - ctrl.sp_high = ALIGN(low, THREAD_SIZE); + sp_low = frame->sp; + ctrl.sp_high = ALIGN(sp_low - THREAD_SIZE, THREAD_ALIGN) + + THREAD_SIZE; pr_debug("%s(pc = %08lx lr = %08lx sp = %08lx)\n", __func__, frame->pc, frame->lr, frame->sp); - if (!kernel_text_address(frame->pc)) - return -URC_FAILURE; - idx = unwind_find_idx(frame->pc); if (!idx) { - pr_warn("unwind: Index not found %08lx\n", frame->pc); + if (frame->pc && kernel_text_address(frame->pc)) + pr_warn("unwind: Index not found %08lx\n", frame->pc); return -URC_FAILURE; } @@ -403,7 +408,20 @@ int unwind_frame(struct stackframe *frame) if (idx->insn == 1) /* can't unwind */ return -URC_FAILURE; - else if ((idx->insn & 0x80000000) == 0) + else if (frame->pc == prel31_to_addr(&idx->addr_offset)) { + /* + * Unwinding is tricky when we're halfway through the prologue, + * since the stack frame that the unwinder expects may not be + * fully set up yet. However, one thing we do know for sure is + * that if we are unwinding from the very first instruction of + * a function, we are still effectively in the stack frame of + * the caller, and the unwind info has no relevance yet. + */ + if (frame->pc == frame->lr) + return -URC_FAILURE; + frame->pc = frame->lr; + return URC_OK; + } else if ((idx->insn & 0x80000000) == 0) /* prel31 to the unwind table */ ctrl.insn = (unsigned long *)prel31_to_addr(&idx->insn); else if ((idx->insn & 0xff000000) == 0x80000000) @@ -430,6 +448,16 @@ int unwind_frame(struct stackframe *frame) ctrl.check_each_pop = 0; + if (prel31_to_addr(&idx->addr_offset) == (u32)&call_with_stack) { + /* + * call_with_stack() is the only place where we permit SP to + * jump from one stack to another, and since we know it is + * guaranteed to happen, set up the SP bounds accordingly. + */ + sp_low = frame->fp; + ctrl.sp_high = ALIGN(frame->fp, THREAD_SIZE); + } + while (ctrl.entries > 0) { int urc; if ((ctrl.sp_high - ctrl.vrs[SP]) < sizeof(ctrl.vrs)) @@ -437,7 +465,7 @@ int unwind_frame(struct stackframe *frame) urc = unwind_exec_insn(&ctrl); if (urc < 0) return urc; - if (ctrl.vrs[SP] < low || ctrl.vrs[SP] >= ctrl.sp_high) + if (ctrl.vrs[SP] < sp_low || ctrl.vrs[SP] > ctrl.sp_high) return -URC_FAILURE; } @@ -452,6 +480,7 @@ int unwind_frame(struct stackframe *frame) frame->sp = ctrl.vrs[SP]; frame->lr = ctrl.vrs[LR]; frame->pc = ctrl.vrs[PC]; + frame->lr_addr = ctrl.lr_addr; return URC_OK; } @@ -475,7 +504,12 @@ void unwind_backtrace(struct pt_regs *regs, struct task_struct *tsk, frame.fp = (unsigned long)__builtin_frame_address(0); frame.sp = current_stack_pointer; frame.lr = (unsigned long)__builtin_return_address(0); - frame.pc = (unsigned long)unwind_backtrace; + /* We are saving the stack and execution state at this + * point, so we should ensure that frame.pc is within + * this block of code. + */ +here: + frame.pc = (unsigned long)&&here; } else { /* task blocked in __switch_to */ frame.fp = thread_saved_fp(tsk); diff --git a/arch/arm/kernel/vmlinux.lds.S b/arch/arm/kernel/vmlinux.lds.S index f02d617e3359..aa12b65a7fd6 100644 --- a/arch/arm/kernel/vmlinux.lds.S +++ b/arch/arm/kernel/vmlinux.lds.S @@ -138,12 +138,12 @@ SECTIONS #ifdef CONFIG_STRICT_KERNEL_RWX . = ALIGN(1<<SECTION_SHIFT); #else - . = ALIGN(THREAD_SIZE); + . = ALIGN(THREAD_ALIGN); #endif __init_end = .; _sdata = .; - RW_DATA(L1_CACHE_BYTES, PAGE_SIZE, THREAD_SIZE) + RW_DATA(L1_CACHE_BYTES, PAGE_SIZE, THREAD_ALIGN) _edata = .; BSS_SECTION(0, 0, 0) |