From 532826f3712b607256eb30f92f23d1c604d3fa34 Mon Sep 17 00:00:00 2001 From: Michael Weiser Date: Thu, 1 Feb 2018 23:13:37 +0100 Subject: arm64: Mirror arm for unimplemented compat syscalls Mirror arm behaviour for unimplemented syscalls: Below 2048 return -ENOSYS, above 2048 raise SIGILL. Signed-off-by: Michael Weiser [will: Tweak die string to identify as compat syscall] Signed-off-by: Will Deacon --- arch/arm64/kernel/sys_compat.c | 21 ++++++++++++++++++++- 1 file changed, 20 insertions(+), 1 deletion(-) (limited to 'arch') diff --git a/arch/arm64/kernel/sys_compat.c b/arch/arm64/kernel/sys_compat.c index a382b2a1b84e..9897f416b29e 100644 --- a/arch/arm64/kernel/sys_compat.c +++ b/arch/arm64/kernel/sys_compat.c @@ -27,6 +27,7 @@ #include #include +#include #include static long @@ -67,6 +68,7 @@ do_compat_cache_op(unsigned long start, unsigned long end, int flags) */ long compat_arm_syscall(struct pt_regs *regs) { + siginfo_t info; unsigned int no = regs->regs[7]; switch (no) { @@ -99,6 +101,23 @@ long compat_arm_syscall(struct pt_regs *regs) return 0; default: - return -ENOSYS; + /* + * Calls 9f00xx..9f07ff are defined to return -ENOSYS + * if not implemented, rather than raising SIGILL. This + * way the calling program can gracefully determine whether + * a feature is supported. + */ + if ((no & 0xffff) <= 0x7ff) + return -ENOSYS; + break; } + + info.si_signo = SIGILL; + info.si_errno = 0; + info.si_code = ILL_ILLTRP; + info.si_addr = (void __user *)instruction_pointer(regs) - + (compat_thumb_mode(regs) ? 2 : 4); + + arm64_notify_die("Oops - bad compat syscall(2)", regs, &info, no); + return 0; } -- cgit v1.2.3-59-g8ed1b From 6141ac1c274741ea8a47dfda759071f1c2eb5573 Mon Sep 17 00:00:00 2001 From: Ard Biesheuvel Date: Wed, 17 Jan 2018 16:11:27 +0000 Subject: arm64/kernel: kaslr: drop special Image placement logic Now that the early kernel mapping logic can tolerate placements of Image that cross swapper table boundaries, we can remove the logic that adjusts the offset if the dice roll produced an offset that puts the kernel right on top of one. Reviewed-by: Steve Capper Signed-off-by: Ard Biesheuvel Signed-off-by: Will Deacon --- arch/arm64/kernel/kaslr.c | 15 --------------- 1 file changed, 15 deletions(-) (limited to 'arch') diff --git a/arch/arm64/kernel/kaslr.c b/arch/arm64/kernel/kaslr.c index 47080c49cc7e..e3d5cbe2167b 100644 --- a/arch/arm64/kernel/kaslr.c +++ b/arch/arm64/kernel/kaslr.c @@ -128,21 +128,6 @@ u64 __init kaslr_early_init(u64 dt_phys) /* use the top 16 bits to randomize the linear region */ memstart_offset_seed = seed >> 48; - /* - * The kernel Image should not extend across a 1GB/32MB/512MB alignment - * boundary (for 4KB/16KB/64KB granule kernels, respectively). If this - * happens, round down the KASLR offset by (1 << SWAPPER_TABLE_SHIFT). - * - * NOTE: The references to _text and _end below will already take the - * modulo offset (the physical displacement modulo 2 MB) into - * account, given that the physical placement is controlled by - * the loader, and will not change as a result of the virtual - * mapping we choose. - */ - if ((((u64)_text + offset) >> SWAPPER_TABLE_SHIFT) != - (((u64)_end + offset) >> SWAPPER_TABLE_SHIFT)) - offset = round_down(offset, 1 << SWAPPER_TABLE_SHIFT); - if (IS_ENABLED(CONFIG_KASAN)) /* * KASAN does not expect the module region to intersect the -- cgit v1.2.3-59-g8ed1b From 2e6f549fe91344999ef3e479ec9fbd039614f2e5 Mon Sep 17 00:00:00 2001 From: Kees Cook Date: Wed, 21 Feb 2018 10:18:21 -0800 Subject: arm64: cpufeature: Relocate PAN emulation report The PAN emulation notification was only happening for non-boot CPUs if CPU capabilities had already been configured. This seems to be the wrong place, as it's system-wide and isn't attached to capabilities, so its reporting didn't normally happen. Instead, report it once from the boot CPU. Before (missing PAN emulation report): SMP: Total of 4 processors activated. CPU features: detected feature: 32-bit EL0 Support CPU features: detected feature: Kernel page table isolation (KPTI) CPU: All CPU(s) started at EL2 After: SMP: Total of 4 processors activated. CPU features: detected feature: 32-bit EL0 Support CPU features: detected feature: Kernel page table isolation (KPTI) CPU features: emulated: Privileged Access Never (PAN) using TTBR0_EL1 switching CPU: All CPU(s) started at EL2 Signed-off-by: Kees Cook Acked-by: Mark Rutland Signed-off-by: Will Deacon --- arch/arm64/kernel/cpufeature.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'arch') diff --git a/arch/arm64/kernel/cpufeature.c b/arch/arm64/kernel/cpufeature.c index 2985a067fc13..64a711c493e0 100644 --- a/arch/arm64/kernel/cpufeature.c +++ b/arch/arm64/kernel/cpufeature.c @@ -1335,9 +1335,6 @@ static void verify_local_cpu_capabilities(void) if (system_supports_sve()) verify_sve_features(); - - if (system_uses_ttbr0_pan()) - pr_info("Emulating Privileged Access Never (PAN) using TTBR0_EL1 switching\n"); } void check_local_cpu_capabilities(void) @@ -1396,6 +1393,9 @@ void __init setup_cpu_features(void) if (system_supports_32bit_el0()) setup_elf_hwcaps(compat_elf_hwcaps); + if (system_uses_ttbr0_pan()) + pr_info("emulated: Privileged Access Never (PAN) using TTBR0_EL1 switching\n"); + sve_setup(); /* Advertise that we have computed the system capabilities */ -- cgit v1.2.3-59-g8ed1b From e0f6429dc1c0aeac8439e16a0c8e2539f401190f Mon Sep 17 00:00:00 2001 From: Kees Cook Date: Wed, 21 Feb 2018 10:18:22 -0800 Subject: arm64: cpufeature: Remove redundant "feature" in reports The word "feature" is repeated in the CPU features reporting. This drops it for improved readability. Before (redundant "feature" word): SMP: Total of 4 processors activated. CPU features: detected feature: 32-bit EL0 Support CPU features: detected feature: Kernel page table isolation (KPTI) CPU features: emulated: Privileged Access Never (PAN) using TTBR0_EL1 switching CPU: All CPU(s) started at EL2 After: SMP: Total of 4 processors activated. CPU features: detected: 32-bit EL0 Support CPU features: detected: Kernel page table isolation (KPTI) CPU features: emulated: Privileged Access Never (PAN) using TTBR0_EL1 switching CPU: All CPU(s) started at EL2 Signed-off-by: Kees Cook Acked-by: Mark Rutland Signed-off-by: Will Deacon --- arch/arm64/kernel/cpufeature.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch') diff --git a/arch/arm64/kernel/cpufeature.c b/arch/arm64/kernel/cpufeature.c index 64a711c493e0..3c7dfaf24d6c 100644 --- a/arch/arm64/kernel/cpufeature.c +++ b/arch/arm64/kernel/cpufeature.c @@ -1359,7 +1359,7 @@ void check_local_cpu_capabilities(void) static void __init setup_feature_capabilities(void) { - update_cpu_capabilities(arm64_features, "detected feature:"); + update_cpu_capabilities(arm64_features, "detected:"); enable_cpu_capabilities(arm64_features); } -- cgit v1.2.3-59-g8ed1b From 2c9120f3a86a809518ece1787d76ae07dd01e01b Mon Sep 17 00:00:00 2001 From: Will Deacon Date: Tue, 20 Feb 2018 14:16:29 +0000 Subject: arm64: signal: Make force_signal_inject more robust force_signal_inject is a little flakey: * It only knows about SIGILL and SIGSEGV, so can potentially deliver other signals based on a partially initialised siginfo_t * It sets si_addr to point at the PC for SIGSEGV * It always operates on current, so doesn't need the regs argument This patch fixes these issues by always assigning the si_addr field to the address parameter of the function and updates the callers (including those that indirectly call via arm64_notify_segfault) accordingly. Signed-off-by: Will Deacon --- arch/arm64/include/asm/traps.h | 6 ++---- arch/arm64/kernel/armv8_deprecated.c | 2 +- arch/arm64/kernel/fpsimd.c | 5 ++--- arch/arm64/kernel/traps.c | 23 ++++++++++++----------- 4 files changed, 17 insertions(+), 19 deletions(-) (limited to 'arch') diff --git a/arch/arm64/include/asm/traps.h b/arch/arm64/include/asm/traps.h index 178e338d2889..1ee63dc38579 100644 --- a/arch/arm64/include/asm/traps.h +++ b/arch/arm64/include/asm/traps.h @@ -35,10 +35,8 @@ struct undef_hook { void register_undef_hook(struct undef_hook *hook); void unregister_undef_hook(struct undef_hook *hook); -void force_signal_inject(int signal, int code, struct pt_regs *regs, - unsigned long address); - -void arm64_notify_segfault(struct pt_regs *regs, unsigned long addr); +void force_signal_inject(int signal, int code, unsigned long address); +void arm64_notify_segfault(unsigned long addr); /* * Move regs->pc to next instruction and do necessary setup before it diff --git a/arch/arm64/kernel/armv8_deprecated.c b/arch/arm64/kernel/armv8_deprecated.c index 68450e954d47..6e47fc3ab549 100644 --- a/arch/arm64/kernel/armv8_deprecated.c +++ b/arch/arm64/kernel/armv8_deprecated.c @@ -429,7 +429,7 @@ ret: fault: pr_debug("SWP{B} emulation: access caused memory abort!\n"); - arm64_notify_segfault(regs, address); + arm64_notify_segfault(address); return 0; } diff --git a/arch/arm64/kernel/fpsimd.c b/arch/arm64/kernel/fpsimd.c index e7226c4c7493..6964ff867d4a 100644 --- a/arch/arm64/kernel/fpsimd.c +++ b/arch/arm64/kernel/fpsimd.c @@ -285,8 +285,7 @@ static void task_fpsimd_save(void) * re-enter user with corrupt state. * There's no way to recover, so kill it: */ - force_signal_inject( - SIGKILL, 0, current_pt_regs(), 0); + force_signal_inject(SIGKILL, 0, 0); return; } @@ -831,7 +830,7 @@ asmlinkage void do_sve_acc(unsigned int esr, struct pt_regs *regs) { /* Even if we chose not to use SVE, the hardware could still trap: */ if (unlikely(!system_supports_sve()) || WARN_ON(is_compat_task())) { - force_signal_inject(SIGILL, ILL_ILLOPC, regs, 0); + force_signal_inject(SIGILL, ILL_ILLOPC, regs->pc); return; } diff --git a/arch/arm64/kernel/traps.c b/arch/arm64/kernel/traps.c index eb2d15147e8d..c478d8e27649 100644 --- a/arch/arm64/kernel/traps.c +++ b/arch/arm64/kernel/traps.c @@ -311,12 +311,13 @@ exit: return fn ? fn(regs, instr) : 1; } -void force_signal_inject(int signal, int code, struct pt_regs *regs, - unsigned long address) +void force_signal_inject(int signal, int code, unsigned long address) { siginfo_t info; - void __user *pc = (void __user *)instruction_pointer(regs); const char *desc; + struct pt_regs *regs = current_pt_regs(); + + clear_siginfo(&info); switch (signal) { case SIGILL: @@ -332,15 +333,15 @@ void force_signal_inject(int signal, int code, struct pt_regs *regs, if (unhandled_signal(current, signal) && show_unhandled_signals_ratelimited()) { - pr_info("%s[%d]: %s: pc=%p\n", - current->comm, task_pid_nr(current), desc, pc); + pr_info("%s[%d]: %s: pc=%08llx\n", + current->comm, task_pid_nr(current), desc, regs->pc); dump_instr(KERN_INFO, regs); } info.si_signo = signal; info.si_errno = 0; info.si_code = code; - info.si_addr = pc; + info.si_addr = (void __user *)address; arm64_notify_die(desc, regs, &info, 0); } @@ -348,7 +349,7 @@ void force_signal_inject(int signal, int code, struct pt_regs *regs, /* * Set up process info to signal segmentation fault - called on access error. */ -void arm64_notify_segfault(struct pt_regs *regs, unsigned long addr) +void arm64_notify_segfault(unsigned long addr) { int code; @@ -359,7 +360,7 @@ void arm64_notify_segfault(struct pt_regs *regs, unsigned long addr) code = SEGV_ACCERR; up_read(¤t->mm->mmap_sem); - force_signal_inject(SIGSEGV, code, regs, addr); + force_signal_inject(SIGSEGV, code, addr); } asmlinkage void __exception do_undefinstr(struct pt_regs *regs) @@ -371,7 +372,7 @@ asmlinkage void __exception do_undefinstr(struct pt_regs *regs) if (call_undef_hook(regs) == 0) return; - force_signal_inject(SIGILL, ILL_ILLOPC, regs, 0); + force_signal_inject(SIGILL, ILL_ILLOPC, regs->pc); } int cpu_enable_cache_maint_trap(void *__unused) @@ -426,12 +427,12 @@ static void user_cache_maint_handler(unsigned int esr, struct pt_regs *regs) __user_cache_maint("ic ivau", address, ret); break; default: - force_signal_inject(SIGILL, ILL_ILLOPC, regs, 0); + force_signal_inject(SIGILL, ILL_ILLOPC, regs->pc); return; } if (ret) - arm64_notify_segfault(regs, address); + arm64_notify_segfault(address); else arm64_skip_faulting_instruction(regs, AARCH64_INSN_SIZE); } -- cgit v1.2.3-59-g8ed1b From a7e6f1ca90354a31946873d102cfa999ddf6ecb4 Mon Sep 17 00:00:00 2001 From: Will Deacon Date: Tue, 20 Feb 2018 18:08:40 +0000 Subject: arm64: signal: Force SIGKILL for unknown signals in force_signal_inject For signals other than SIGKILL or those with siginfo_layout(signal, code) == SIL_FAULT then force_signal_inject does not initialise the siginfo_t properly. Since the signal number is determined solely by the caller, simply WARN on unknown signals and force to SIGKILL. Reported-by: Dave Martin Signed-off-by: Will Deacon --- arch/arm64/kernel/traps.c | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'arch') diff --git a/arch/arm64/kernel/traps.c b/arch/arm64/kernel/traps.c index c478d8e27649..3f52c07b4bf4 100644 --- a/arch/arm64/kernel/traps.c +++ b/arch/arm64/kernel/traps.c @@ -331,6 +331,12 @@ void force_signal_inject(int signal, int code, unsigned long address) break; } + /* Force signals we don't understand to SIGKILL */ + if (WARN_ON(signal != SIGKILL || + siginfo_layout(signal, code) != SIL_FAULT)) { + signal = SIGKILL; + } + if (unhandled_signal(current, signal) && show_unhandled_signals_ratelimited()) { pr_info("%s[%d]: %s: pc=%08llx\n", -- cgit v1.2.3-59-g8ed1b From a1ece8216c41c9dbb4040f7b8b3fbcd17662c665 Mon Sep 17 00:00:00 2001 From: Will Deacon Date: Tue, 20 Feb 2018 13:46:05 +0000 Subject: arm64: Introduce arm64_force_sig_info and hook up in arm64_notify_die In preparation for consolidating our handling of printing unhandled signals, introduce a wrapper around force_sig_info which can act as the canonical place for dealing with show_unhandled_signals. Initially, we just hook this up to arm64_notify_die. Signed-off-by: Will Deacon --- arch/arm64/include/asm/traps.h | 2 ++ arch/arm64/kernel/traps.c | 28 +++++++++++++++++++++++++++- 2 files changed, 29 insertions(+), 1 deletion(-) (limited to 'arch') diff --git a/arch/arm64/include/asm/traps.h b/arch/arm64/include/asm/traps.h index 1ee63dc38579..c320f3bf6c57 100644 --- a/arch/arm64/include/asm/traps.h +++ b/arch/arm64/include/asm/traps.h @@ -37,6 +37,8 @@ void register_undef_hook(struct undef_hook *hook); void unregister_undef_hook(struct undef_hook *hook); void force_signal_inject(int signal, int code, unsigned long address); void arm64_notify_segfault(unsigned long addr); +void arm64_force_sig_info(struct siginfo *info, const char *str, + struct task_struct *tsk); /* * Move regs->pc to next instruction and do necessary setup before it diff --git a/arch/arm64/kernel/traps.c b/arch/arm64/kernel/traps.c index 3f52c07b4bf4..00516f3956e4 100644 --- a/arch/arm64/kernel/traps.c +++ b/arch/arm64/kernel/traps.c @@ -223,13 +223,39 @@ void die(const char *str, struct pt_regs *regs, int err) do_exit(SIGSEGV); } +void arm64_force_sig_info(struct siginfo *info, const char *str, + struct task_struct *tsk) +{ + unsigned int esr = tsk->thread.fault_code; + struct pt_regs *regs = task_pt_regs(tsk); + + if (!unhandled_signal(tsk, info->si_signo)) + goto send_sig; + + if (!show_unhandled_signals_ratelimited()) + goto send_sig; + + pr_info("%s[%d]: unhandled exception: ", tsk->comm, task_pid_nr(tsk)); + if (esr) + pr_cont("%s, ESR 0x%08x, ", esr_get_class_string(esr), esr); + + pr_cont("%s", str); + print_vma_addr(KERN_CONT " in ", regs->pc); + pr_cont("\n"); + __show_regs(regs); + +send_sig: + force_sig_info(info->si_signo, info, tsk); +} + void arm64_notify_die(const char *str, struct pt_regs *regs, struct siginfo *info, int err) { if (user_mode(regs)) { + WARN_ON(regs != current_pt_regs()); current->thread.fault_address = 0; current->thread.fault_code = err; - force_sig_info(info->si_signo, info, current); + arm64_force_sig_info(info, str, current); } else { die(str, regs, err); } -- cgit v1.2.3-59-g8ed1b From 15b67321e7e9671881c7174a651a1c7d74c59f72 Mon Sep 17 00:00:00 2001 From: Will Deacon Date: Tue, 20 Feb 2018 15:25:19 +0000 Subject: arm64: signal: Don't print anything directly in force_signal_inject arm64_notify_die deals with printing out information regarding unhandled signals, so there's no need to roll our own code here. Signed-off-by: Will Deacon --- arch/arm64/kernel/traps.c | 7 ------- 1 file changed, 7 deletions(-) (limited to 'arch') diff --git a/arch/arm64/kernel/traps.c b/arch/arm64/kernel/traps.c index 00516f3956e4..835411cab38c 100644 --- a/arch/arm64/kernel/traps.c +++ b/arch/arm64/kernel/traps.c @@ -363,13 +363,6 @@ void force_signal_inject(int signal, int code, unsigned long address) signal = SIGKILL; } - if (unhandled_signal(current, signal) && - show_unhandled_signals_ratelimited()) { - pr_info("%s[%d]: %s: pc=%08llx\n", - current->comm, task_pid_nr(current), desc, regs->pc); - dump_instr(KERN_INFO, regs); - } - info.si_signo = signal; info.si_errno = 0; info.si_code = code; -- cgit v1.2.3-59-g8ed1b From 1049c30871701a6533dd41e555612b31a29acb33 Mon Sep 17 00:00:00 2001 From: Will Deacon Date: Tue, 20 Feb 2018 14:41:02 +0000 Subject: arm64: Pass user fault info to arm64_notify_die instead of printing it There's no need for callers of arm64_notify_die to print information about user faults. Instead, they can pass a string to arm64_notify_die which will be printed subject to show_unhandled_signals. Signed-off-by: Will Deacon --- arch/arm64/mm/fault.c | 30 ++++++++---------------------- 1 file changed, 8 insertions(+), 22 deletions(-) (limited to 'arch') diff --git a/arch/arm64/mm/fault.c b/arch/arm64/mm/fault.c index bff11553eb05..fd5928afd9cd 100644 --- a/arch/arm64/mm/fault.c +++ b/arch/arm64/mm/fault.c @@ -582,8 +582,6 @@ static int do_sea(unsigned long addr, unsigned int esr, struct pt_regs *regs) const struct fault_info *inf; inf = esr_to_fault_info(esr); - pr_err("Synchronous External Abort: %s (0x%08x) at 0x%016lx\n", - inf->name, esr, addr); /* * Synchronous aborts may interrupt code which had interrupts masked. @@ -607,7 +605,7 @@ static int do_sea(unsigned long addr, unsigned int esr, struct pt_regs *regs) info.si_addr = NULL; else info.si_addr = (void __user *)addr; - arm64_notify_die("", regs, &info, esr); + arm64_notify_die(inf->name, regs, &info, esr); return 0; } @@ -698,19 +696,17 @@ asmlinkage void __exception do_mem_abort(unsigned long addr, unsigned int esr, if (!inf->fn(addr, esr, regs)) return; - pr_alert("Unhandled fault: %s at 0x%016lx\n", - inf->name, addr); - - mem_abort_decode(esr); - - if (!user_mode(regs)) + if (!user_mode(regs)) { + pr_alert("Unhandled fault at 0x%016lx\n", addr); + mem_abort_decode(esr); show_pte(addr); + } info.si_signo = inf->sig; info.si_errno = 0; info.si_code = inf->code; info.si_addr = (void __user *)addr; - arm64_notify_die("", regs, &info, esr); + arm64_notify_die(inf->name, regs, &info, esr); } asmlinkage void __exception do_el0_irq_bp_hardening(void) @@ -741,7 +737,6 @@ asmlinkage void __exception do_sp_pc_abort(unsigned long addr, struct pt_regs *regs) { struct siginfo info; - struct task_struct *tsk = current; if (user_mode(regs)) { if (instruction_pointer(regs) > TASK_SIZE) @@ -749,17 +744,11 @@ asmlinkage void __exception do_sp_pc_abort(unsigned long addr, local_irq_enable(); } - if (show_unhandled_signals && unhandled_signal(tsk, SIGBUS)) - pr_info_ratelimited("%s[%d]: %s exception: pc=%p sp=%p\n", - tsk->comm, task_pid_nr(tsk), - esr_get_class_string(esr), (void *)regs->pc, - (void *)regs->sp); - info.si_signo = SIGBUS; info.si_errno = 0; info.si_code = BUS_ADRALN; info.si_addr = (void __user *)addr; - arm64_notify_die("Oops - SP/PC alignment exception", regs, &info, esr); + arm64_notify_die("SP/PC alignment exception", regs, &info, esr); } int __init early_brk64(unsigned long addr, unsigned int esr, @@ -814,14 +803,11 @@ asmlinkage int __exception do_debug_exception(unsigned long addr, if (!inf->fn(addr, esr, regs)) { rv = 1; } else { - pr_alert("Unhandled debug exception: %s (0x%08x) at 0x%016lx\n", - inf->name, esr, addr); - info.si_signo = inf->sig; info.si_errno = 0; info.si_code = inf->code; info.si_addr = (void __user *)addr; - arm64_notify_die("", regs, &info, 0); + arm64_notify_die(inf->name, regs, &info, esr); rv = 0; } -- cgit v1.2.3-59-g8ed1b From 92ff0674f5d8013704cbaeaceb8e3576b36754ee Mon Sep 17 00:00:00 2001 From: Will Deacon Date: Tue, 20 Feb 2018 14:53:22 +0000 Subject: arm64: mm: Rework unhandled user pagefaults to call arm64_force_sig_info Reporting unhandled user pagefaults via arm64_force_sig_info means that __do_user_fault can be drastically simplified, since it no longer has to worry about printing the fault information and can consequently just take the siginfo as a parameter. Signed-off-by: Will Deacon --- arch/arm64/mm/fault.c | 87 +++++++++++++++++++++------------------------------ 1 file changed, 36 insertions(+), 51 deletions(-) (limited to 'arch') diff --git a/arch/arm64/mm/fault.c b/arch/arm64/mm/fault.c index fd5928afd9cd..49dfb08a6c4d 100644 --- a/arch/arm64/mm/fault.c +++ b/arch/arm64/mm/fault.c @@ -43,6 +43,7 @@ #include #include #include +#include #include @@ -289,58 +290,31 @@ static void __do_kernel_fault(unsigned long addr, unsigned int esr, do_exit(SIGKILL); } -static void __do_user_fault(struct task_struct *tsk, unsigned long addr, - unsigned int esr, unsigned int sig, int code, - struct pt_regs *regs, int fault) +static void __do_user_fault(struct siginfo *info, unsigned int esr) { - struct siginfo si; - const struct fault_info *inf; - unsigned int lsb = 0; - - if (unhandled_signal(tsk, sig) && show_unhandled_signals_ratelimited()) { - inf = esr_to_fault_info(esr); - pr_info("%s[%d]: unhandled %s (%d) at 0x%08lx, esr 0x%03x", - tsk->comm, task_pid_nr(tsk), inf->name, sig, - addr, esr); - print_vma_addr(KERN_CONT ", in ", regs->pc); - pr_cont("\n"); - __show_regs(regs); - } - - tsk->thread.fault_address = addr; - tsk->thread.fault_code = esr; - si.si_signo = sig; - si.si_errno = 0; - si.si_code = code; - si.si_addr = (void __user *)addr; - /* - * Either small page or large page may be poisoned. - * In other words, VM_FAULT_HWPOISON_LARGE and - * VM_FAULT_HWPOISON are mutually exclusive. - */ - if (fault & VM_FAULT_HWPOISON_LARGE) - lsb = hstate_index_to_shift(VM_FAULT_GET_HINDEX(fault)); - else if (fault & VM_FAULT_HWPOISON) - lsb = PAGE_SHIFT; - si.si_addr_lsb = lsb; - - force_sig_info(sig, &si, tsk); + current->thread.fault_address = (unsigned long)info->si_addr; + current->thread.fault_code = esr; + arm64_force_sig_info(info, esr_to_fault_info(esr)->name, current); } static void do_bad_area(unsigned long addr, unsigned int esr, struct pt_regs *regs) { - struct task_struct *tsk = current; - const struct fault_info *inf; - /* * If we are in kernel mode at this point, we have no context to * handle this fault with. */ if (user_mode(regs)) { - inf = esr_to_fault_info(esr); - __do_user_fault(tsk, addr, esr, inf->sig, inf->code, regs, 0); - } else + const struct fault_info *inf = esr_to_fault_info(esr); + struct siginfo si = { + .si_signo = inf->sig, + .si_code = inf->code, + .si_addr = (void __user *)addr, + }; + + __do_user_fault(&si, esr); + } else { __do_kernel_fault(addr, esr, regs); + } } #define VM_FAULT_BADMAP 0x010000 @@ -393,7 +367,8 @@ static int __kprobes do_page_fault(unsigned long addr, unsigned int esr, { struct task_struct *tsk; struct mm_struct *mm; - int fault, sig, code, major = 0; + struct siginfo si; + int fault, major = 0; unsigned long vm_flags = VM_READ | VM_WRITE; unsigned int mm_flags = FAULT_FLAG_ALLOW_RETRY | FAULT_FLAG_KILLABLE; @@ -525,27 +500,37 @@ retry: return 0; } + clear_siginfo(&si); + si.si_addr = (void __user *)addr; + if (fault & VM_FAULT_SIGBUS) { /* * We had some memory, but were unable to successfully fix up * this page fault. */ - sig = SIGBUS; - code = BUS_ADRERR; - } else if (fault & (VM_FAULT_HWPOISON | VM_FAULT_HWPOISON_LARGE)) { - sig = SIGBUS; - code = BUS_MCEERR_AR; + si.si_signo = SIGBUS; + si.si_code = BUS_ADRERR; + } else if (fault & VM_FAULT_HWPOISON_LARGE) { + unsigned int hindex = VM_FAULT_GET_HINDEX(fault); + + si.si_signo = SIGBUS; + si.si_code = BUS_MCEERR_AR; + si.si_addr_lsb = hstate_index_to_shift(hindex); + } else if (fault & VM_FAULT_HWPOISON) { + si.si_signo = SIGBUS; + si.si_code = BUS_MCEERR_AR; + si.si_addr_lsb = PAGE_SHIFT; } else { /* * Something tried to access memory that isn't in our memory * map. */ - sig = SIGSEGV; - code = fault == VM_FAULT_BADACCESS ? - SEGV_ACCERR : SEGV_MAPERR; + si.si_signo = SIGSEGV; + si.si_code = fault == VM_FAULT_BADACCESS ? + SEGV_ACCERR : SEGV_MAPERR; } - __do_user_fault(tsk, addr, esr, sig, code, regs, fault); + __do_user_fault(&si, esr); return 0; no_context: -- cgit v1.2.3-59-g8ed1b From f71016a8a8c5696530ec7173ee969c68e91d3719 Mon Sep 17 00:00:00 2001 From: Will Deacon Date: Tue, 20 Feb 2018 15:05:17 +0000 Subject: arm64: signal: Call arm64_notify_segfault when failing to deliver signal If we fail to deliver a signal due to taking an unhandled fault on the stackframe, we can call arm64_notify_segfault to deliver a SEGV can deal with printing any unhandled signal messages for us, rather than roll our own printing code. A side-effect of this change is that we now deliver the frame address in si_addr along with an si_code of SEGV_{ACC,MAP}ERR, rather than an si_addr of 0 and an si_code of SI_KERNEL as before. Signed-off-by: Will Deacon --- arch/arm64/kernel/signal.c | 7 ++----- arch/arm64/kernel/signal32.c | 13 +++---------- 2 files changed, 5 insertions(+), 15 deletions(-) (limited to 'arch') diff --git a/arch/arm64/kernel/signal.c b/arch/arm64/kernel/signal.c index f60c052e8d1c..e5c656d0e316 100644 --- a/arch/arm64/kernel/signal.c +++ b/arch/arm64/kernel/signal.c @@ -40,6 +40,7 @@ #include #include #include +#include #include /* @@ -565,11 +566,7 @@ asmlinkage long sys_rt_sigreturn(struct pt_regs *regs) return regs->regs[0]; badframe: - if (show_unhandled_signals) - pr_info_ratelimited("%s[%d]: bad frame in %s: pc=%08llx sp=%08llx\n", - current->comm, task_pid_nr(current), __func__, - regs->pc, regs->sp); - force_sig(SIGSEGV, current); + arm64_notify_segfault(regs->sp); return 0; } diff --git a/arch/arm64/kernel/signal32.c b/arch/arm64/kernel/signal32.c index 79feb861929b..68f5e07b592b 100644 --- a/arch/arm64/kernel/signal32.c +++ b/arch/arm64/kernel/signal32.c @@ -26,6 +26,7 @@ #include #include #include +#include #include #include @@ -307,11 +308,7 @@ asmlinkage int compat_sys_sigreturn(struct pt_regs *regs) return regs->regs[0]; badframe: - if (show_unhandled_signals) - pr_info_ratelimited("%s[%d]: bad frame in %s: pc=%08llx sp=%08llx\n", - current->comm, task_pid_nr(current), __func__, - regs->pc, regs->compat_sp); - force_sig(SIGSEGV, current); + arm64_notify_segfault(regs->compat_sp); return 0; } @@ -344,11 +341,7 @@ asmlinkage int compat_sys_rt_sigreturn(struct pt_regs *regs) return regs->regs[0]; badframe: - if (show_unhandled_signals) - pr_info_ratelimited("%s[%d]: bad frame in %s: pc=%08llx sp=%08llx\n", - current->comm, task_pid_nr(current), __func__, - regs->pc, regs->compat_sp); - force_sig(SIGSEGV, current); + arm64_notify_segfault(regs->compat_sp); return 0; } -- cgit v1.2.3-59-g8ed1b From a26731d9d1e3fa93db0b5781d7e8dd9dbff1313e Mon Sep 17 00:00:00 2001 From: Will Deacon Date: Tue, 20 Feb 2018 15:08:51 +0000 Subject: arm64: Move show_unhandled_signals_ratelimited into traps.c show_unhandled_signals_ratelimited is only called in traps.c, so move it out of its macro in the dreaded system_misc.h and into a static function in traps.c Signed-off-by: Will Deacon --- arch/arm64/include/asm/system_misc.h | 11 ----------- arch/arm64/kernel/traps.c | 7 +++++++ 2 files changed, 7 insertions(+), 11 deletions(-) (limited to 'arch') diff --git a/arch/arm64/include/asm/system_misc.h b/arch/arm64/include/asm/system_misc.h index 07aa8e3c5630..28893a0b141d 100644 --- a/arch/arm64/include/asm/system_misc.h +++ b/arch/arm64/include/asm/system_misc.h @@ -45,17 +45,6 @@ extern void __show_regs(struct pt_regs *); extern void (*arm_pm_restart)(enum reboot_mode reboot_mode, const char *cmd); -#define show_unhandled_signals_ratelimited() \ -({ \ - static DEFINE_RATELIMIT_STATE(_rs, \ - DEFAULT_RATELIMIT_INTERVAL, \ - DEFAULT_RATELIMIT_BURST); \ - bool __show_ratelimited = false; \ - if (show_unhandled_signals && __ratelimit(&_rs)) \ - __show_ratelimited = true; \ - __show_ratelimited; \ -}) - int handle_guest_sea(phys_addr_t addr, unsigned int esr); #endif /* __ASSEMBLY__ */ diff --git a/arch/arm64/kernel/traps.c b/arch/arm64/kernel/traps.c index 835411cab38c..b139fe2d2126 100644 --- a/arch/arm64/kernel/traps.c +++ b/arch/arm64/kernel/traps.c @@ -223,6 +223,13 @@ void die(const char *str, struct pt_regs *regs, int err) do_exit(SIGSEGV); } +static bool show_unhandled_signals_ratelimited(void) +{ + static DEFINE_RATELIMIT_STATE(rs, DEFAULT_RATELIMIT_INTERVAL, + DEFAULT_RATELIMIT_BURST); + return show_unhandled_signals && __ratelimit(&rs); +} + void arm64_force_sig_info(struct siginfo *info, const char *str, struct task_struct *tsk) { -- cgit v1.2.3-59-g8ed1b From 4e829b6735475313016787ec3d256e102167b94d Mon Sep 17 00:00:00 2001 From: Will Deacon Date: Tue, 20 Feb 2018 15:18:13 +0000 Subject: arm64: Use arm64_force_sig_info instead of force_sig_info Using arm64_force_sig_info means that printing messages about unhandled signals is dealt with for us, so use that in preference to force_sig_info and remove any homebrew printing code. Signed-off-by: Will Deacon --- arch/arm64/kernel/debug-monitors.c | 3 ++- arch/arm64/kernel/ptrace.c | 2 +- arch/arm64/kernel/traps.c | 9 ++------- 3 files changed, 5 insertions(+), 9 deletions(-) (limited to 'arch') diff --git a/arch/arm64/kernel/debug-monitors.c b/arch/arm64/kernel/debug-monitors.c index 53781f5687c5..06ca574495af 100644 --- a/arch/arm64/kernel/debug-monitors.c +++ b/arch/arm64/kernel/debug-monitors.c @@ -33,6 +33,7 @@ #include #include #include +#include /* Determine debug architecture. */ u8 debug_monitors_arch(void) @@ -223,7 +224,7 @@ static void send_user_sigtrap(int si_code) if (interrupts_enabled(regs)) local_irq_enable(); - force_sig_info(SIGTRAP, &info, current); + arm64_force_sig_info(&info, "User debug trap", current); } static int single_step_handler(unsigned long addr, unsigned int esr, diff --git a/arch/arm64/kernel/ptrace.c b/arch/arm64/kernel/ptrace.c index 9ae31f7e2243..6228476e74ba 100644 --- a/arch/arm64/kernel/ptrace.c +++ b/arch/arm64/kernel/ptrace.c @@ -209,7 +209,7 @@ static void ptrace_hbptriggered(struct perf_event *bp, force_sig_ptrace_errno_trap(si_errno, (void __user *)bkpt->trigger); } #endif - force_sig_info(SIGTRAP, &info, current); + arm64_force_sig_info(&info, "Hardware breakpoint trap (ptrace)", current); } /* diff --git a/arch/arm64/kernel/traps.c b/arch/arm64/kernel/traps.c index b139fe2d2126..2b478565d774 100644 --- a/arch/arm64/kernel/traps.c +++ b/arch/arm64/kernel/traps.c @@ -633,11 +633,6 @@ asmlinkage void bad_el0_sync(struct pt_regs *regs, int reason, unsigned int esr) { siginfo_t info; void __user *pc = (void __user *)instruction_pointer(regs); - console_verbose(); - - pr_crit("Bad EL0 synchronous exception detected on CPU%d, code 0x%08x -- %s\n", - smp_processor_id(), esr, esr_get_class_string(esr)); - __show_regs(regs); info.si_signo = SIGILL; info.si_errno = 0; @@ -645,9 +640,9 @@ asmlinkage void bad_el0_sync(struct pt_regs *regs, int reason, unsigned int esr) info.si_addr = pc; current->thread.fault_address = 0; - current->thread.fault_code = 0; + current->thread.fault_code = esr; - force_sig_info(info.si_signo, &info, current); + arm64_force_sig_info(&info, "Bad EL0 synchronous exception", current); } #ifdef CONFIG_VMAP_STACK -- cgit v1.2.3-59-g8ed1b From 6b24442d68e78c57c8837920ea5dfb252571847a Mon Sep 17 00:00:00 2001 From: Will Deacon Date: Fri, 9 Feb 2018 13:19:47 +0000 Subject: arm64: lse: Pass -fomit-frame-pointer to out-of-line ll/sc atomics In cases where x30 is used as a temporary in the out-of-line ll/sc atomics (e.g. atomic_fetch_add), the compiler tends to put out a full stackframe, which included pointing the x29 at the new frame. Since these things aren't traceable anyway, we can pass -fomit-frame-pointer to reduce the work when spilling. Since this is incompatible with -pg, we also remove that from the CFLAGS for this file. Signed-off-by: Will Deacon --- arch/arm64/lib/Makefile | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'arch') diff --git a/arch/arm64/lib/Makefile b/arch/arm64/lib/Makefile index 4e696f96451f..0ead8a1d1679 100644 --- a/arch/arm64/lib/Makefile +++ b/arch/arm64/lib/Makefile @@ -17,6 +17,7 @@ CFLAGS_atomic_ll_sc.o := -fcall-used-x0 -ffixed-x1 -ffixed-x2 \ -ffixed-x7 -fcall-saved-x8 -fcall-saved-x9 \ -fcall-saved-x10 -fcall-saved-x11 -fcall-saved-x12 \ -fcall-saved-x13 -fcall-saved-x14 -fcall-saved-x15 \ - -fcall-saved-x18 + -fcall-saved-x18 -fomit-frame-pointer +CFLAGS_REMOVE_atomic_ll_sc.o := -pg lib-$(CONFIG_ARCH_HAS_UACCESS_FLUSHCACHE) += uaccess_flushcache.o -- cgit v1.2.3-59-g8ed1b From 1f85b42a691cd8329ba82dbcaeec80ac1231b32a Mon Sep 17 00:00:00 2001 From: Catalin Marinas Date: Wed, 28 Feb 2018 18:47:20 +0000 Subject: arm64: Revert L1_CACHE_SHIFT back to 6 (64-byte cache line size) Commit 97303480753e ("arm64: Increase the max granular size") increased the cache line size to 128 to match Cavium ThunderX, apparently for some performance benefit which could not be confirmed. This change, however, has an impact on the network packets allocation in certain circumstances, requiring slightly over a 4K page with a significant performance degradation. This patch reverts L1_CACHE_SHIFT back to 6 (64-byte cache line) while keeping ARCH_DMA_MINALIGN at 128. The cache_line_size() function was changed to default to ARCH_DMA_MINALIGN in the absence of a meaningful CTR_EL0.CWG bit field. In addition, if a system with ARCH_DMA_MINALIGN < CTR_EL0.CWG is detected, the kernel will force swiotlb bounce buffering for all non-coherent devices since DMA cache maintenance on sub-CWG ranges is not safe, leading to data corruption. Cc: Tirumalesh Chalamarla Cc: Timur Tabi Cc: Florian Fainelli Acked-by: Robin Murphy Signed-off-by: Catalin Marinas Signed-off-by: Will Deacon --- arch/arm64/Kconfig | 1 + arch/arm64/include/asm/cache.h | 6 +++--- arch/arm64/include/asm/dma-direct.h | 43 +++++++++++++++++++++++++++++++++++++ arch/arm64/kernel/cpufeature.c | 9 ++------ arch/arm64/mm/dma-mapping.c | 17 +++++++++++++++ arch/arm64/mm/init.c | 3 ++- 6 files changed, 68 insertions(+), 11 deletions(-) create mode 100644 arch/arm64/include/asm/dma-direct.h (limited to 'arch') diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig index 7381eeb7ef8e..655c0e99d9fa 100644 --- a/arch/arm64/Kconfig +++ b/arch/arm64/Kconfig @@ -17,6 +17,7 @@ config ARM64 select ARCH_HAS_GIGANTIC_PAGE if (MEMORY_ISOLATION && COMPACTION) || CMA select ARCH_HAS_KCOV select ARCH_HAS_MEMBARRIER_SYNC_CORE + select ARCH_HAS_PHYS_TO_DMA select ARCH_HAS_SET_MEMORY select ARCH_HAS_SG_CHAIN select ARCH_HAS_STRICT_KERNEL_RWX diff --git a/arch/arm64/include/asm/cache.h b/arch/arm64/include/asm/cache.h index ea9bb4e0e9bb..b2e6ece23713 100644 --- a/arch/arm64/include/asm/cache.h +++ b/arch/arm64/include/asm/cache.h @@ -29,7 +29,7 @@ #define ICACHE_POLICY_VIPT 2 #define ICACHE_POLICY_PIPT 3 -#define L1_CACHE_SHIFT 7 +#define L1_CACHE_SHIFT (6) #define L1_CACHE_BYTES (1 << L1_CACHE_SHIFT) /* @@ -39,7 +39,7 @@ * cache before the transfer is done, causing old data to be seen by * the CPU. */ -#define ARCH_DMA_MINALIGN L1_CACHE_BYTES +#define ARCH_DMA_MINALIGN (128) #ifndef __ASSEMBLY__ @@ -73,7 +73,7 @@ static inline u32 cache_type_cwg(void) static inline int cache_line_size(void) { u32 cwg = cache_type_cwg(); - return cwg ? 4 << cwg : L1_CACHE_BYTES; + return cwg ? 4 << cwg : ARCH_DMA_MINALIGN; } #endif /* __ASSEMBLY__ */ diff --git a/arch/arm64/include/asm/dma-direct.h b/arch/arm64/include/asm/dma-direct.h new file mode 100644 index 000000000000..abb1b40ec751 --- /dev/null +++ b/arch/arm64/include/asm/dma-direct.h @@ -0,0 +1,43 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef __ASM_DMA_DIRECT_H +#define __ASM_DMA_DIRECT_H + +#include +#include + +#include + +DECLARE_STATIC_KEY_FALSE(swiotlb_noncoherent_bounce); + +static inline dma_addr_t phys_to_dma(struct device *dev, phys_addr_t paddr) +{ + dma_addr_t dev_addr = (dma_addr_t)paddr; + + return dev_addr - ((dma_addr_t)dev->dma_pfn_offset << PAGE_SHIFT); +} + +static inline phys_addr_t dma_to_phys(struct device *dev, dma_addr_t dev_addr) +{ + phys_addr_t paddr = (phys_addr_t)dev_addr; + + return paddr + ((phys_addr_t)dev->dma_pfn_offset << PAGE_SHIFT); +} + +static inline bool dma_capable(struct device *dev, dma_addr_t addr, size_t size) +{ + if (!dev->dma_mask) + return false; + + /* + * Force swiotlb buffer bouncing when ARCH_DMA_MINALIGN < CWG. The + * swiotlb bounce buffers are aligned to (1 << IO_TLB_SHIFT). + */ + if (static_branch_unlikely(&swiotlb_noncoherent_bounce) && + !is_device_dma_coherent(dev) && + !is_swiotlb_buffer(dma_to_phys(dev, addr))) + return false; + + return addr + size - 1 <= *dev->dma_mask; +} + +#endif /* __ASM_DMA_DIRECT_H */ diff --git a/arch/arm64/kernel/cpufeature.c b/arch/arm64/kernel/cpufeature.c index 3c7dfaf24d6c..f96b3449034b 100644 --- a/arch/arm64/kernel/cpufeature.c +++ b/arch/arm64/kernel/cpufeature.c @@ -1382,7 +1382,6 @@ bool this_cpu_has_cap(unsigned int cap) void __init setup_cpu_features(void) { u32 cwg; - int cls; /* Set the CPU feature capabilies */ setup_feature_capabilities(); @@ -1405,13 +1404,9 @@ void __init setup_cpu_features(void) * Check for sane CTR_EL0.CWG value. */ cwg = cache_type_cwg(); - cls = cache_line_size(); if (!cwg) - pr_warn("No Cache Writeback Granule information, assuming cache line size %d\n", - cls); - if (L1_CACHE_BYTES < cls) - pr_warn("L1_CACHE_BYTES smaller than the Cache Writeback Granule (%d < %d)\n", - L1_CACHE_BYTES, cls); + pr_warn("No Cache Writeback Granule information, assuming %d\n", + ARCH_DMA_MINALIGN); } static bool __maybe_unused diff --git a/arch/arm64/mm/dma-mapping.c b/arch/arm64/mm/dma-mapping.c index a96ec0181818..1e9dac8684ca 100644 --- a/arch/arm64/mm/dma-mapping.c +++ b/arch/arm64/mm/dma-mapping.c @@ -33,6 +33,7 @@ #include static int swiotlb __ro_after_init; +DEFINE_STATIC_KEY_FALSE(swiotlb_noncoherent_bounce); static pgprot_t __get_dma_pgprot(unsigned long attrs, pgprot_t prot, bool coherent) @@ -504,6 +505,14 @@ static int __init arm64_dma_init(void) max_pfn > (arm64_dma_phys_limit >> PAGE_SHIFT)) swiotlb = 1; + if (WARN_TAINT(ARCH_DMA_MINALIGN < cache_line_size(), + TAINT_CPU_OUT_OF_SPEC, + "ARCH_DMA_MINALIGN smaller than CTR_EL0.CWG (%d < %d)", + ARCH_DMA_MINALIGN, cache_line_size())) { + swiotlb = 1; + static_branch_enable(&swiotlb_noncoherent_bounce); + } + return atomic_pool_init(); } arch_initcall(arm64_dma_init); @@ -882,6 +891,14 @@ static void __iommu_setup_dma_ops(struct device *dev, u64 dma_base, u64 size, void arch_setup_dma_ops(struct device *dev, u64 dma_base, u64 size, const struct iommu_ops *iommu, bool coherent) { + /* + * Enable swiotlb for buffer bouncing if ARCH_DMA_MINALIGN < CWG. + * dma_capable() forces the actual bounce if the device is + * non-coherent. + */ + if (static_branch_unlikely(&swiotlb_noncoherent_bounce) && !coherent) + iommu = NULL; + if (!dev->dma_ops) dev->dma_ops = &arm64_swiotlb_dma_ops; diff --git a/arch/arm64/mm/init.c b/arch/arm64/mm/init.c index 9f3c47acf8ff..664acf177799 100644 --- a/arch/arm64/mm/init.c +++ b/arch/arm64/mm/init.c @@ -586,7 +586,8 @@ static void __init free_unused_memmap(void) void __init mem_init(void) { if (swiotlb_force == SWIOTLB_FORCE || - max_pfn > (arm64_dma_phys_limit >> PAGE_SHIFT)) + max_pfn > (arm64_dma_phys_limit >> PAGE_SHIFT) || + ARCH_DMA_MINALIGN < cache_line_size()) swiotlb_init(1); else swiotlb_force = SWIOTLB_NO_FORCE; -- cgit v1.2.3-59-g8ed1b From 9597e74396c02c8189215dcdfc08088a9b8d8087 Mon Sep 17 00:00:00 2001 From: Andrey Konovalov Date: Thu, 1 Mar 2018 18:07:12 +0100 Subject: kasan, arm64: clean up KASAN_SHADOW_SCALE_SHIFT usage This is a follow up patch to the series I sent recently that cleans up KASAN_SHADOW_SCALE_SHIFT usage (which value was hardcoded and scattered all over the code). This fixes the one place that I forgot to fix. The change is purely aesthetical, instead of hardcoding the value for KASAN_SHADOW_SCALE_SHIFT in arch/arm64/Makefile, an appropriate variable is declared and used. Signed-off-by: Andrey Konovalov Signed-off-by: Will Deacon --- arch/arm64/Makefile | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) (limited to 'arch') diff --git a/arch/arm64/Makefile b/arch/arm64/Makefile index b481b4a7c011..4bb18aee4846 100644 --- a/arch/arm64/Makefile +++ b/arch/arm64/Makefile @@ -97,12 +97,14 @@ else TEXT_OFFSET := 0x00080000 endif -# KASAN_SHADOW_OFFSET = VA_START + (1 << (VA_BITS - 3)) - (1 << 61) +# KASAN_SHADOW_OFFSET = VA_START + (1 << (VA_BITS - KASAN_SHADOW_SCALE_SHIFT)) +# - (1 << (64 - KASAN_SHADOW_SCALE_SHIFT)) # in 32-bit arithmetic +KASAN_SHADOW_SCALE_SHIFT := 3 KASAN_SHADOW_OFFSET := $(shell printf "0x%08x00000000\n" $$(( \ - (0xffffffff & (-1 << ($(CONFIG_ARM64_VA_BITS) - 32))) \ - + (1 << ($(CONFIG_ARM64_VA_BITS) - 32 - 3)) \ - - (1 << (64 - 32 - 3)) )) ) + (0xffffffff & (-1 << ($(CONFIG_ARM64_VA_BITS) - 32))) \ + + (1 << ($(CONFIG_ARM64_VA_BITS) - 32 - $(KASAN_SHADOW_SCALE_SHIFT))) \ + - (1 << (64 - 32 - $(KASAN_SHADOW_SCALE_SHIFT))) )) ) export TEXT_OFFSET GZFLAGS -- cgit v1.2.3-59-g8ed1b From 24153c03d4f5dc4ac0ccd446a9636c269c1d4a02 Mon Sep 17 00:00:00 2001 From: Douglas Anderson Date: Mon, 5 Mar 2018 15:43:09 -0800 Subject: arm64/debug: Fix registers on sleeping tasks This is the equivalent of commit 001bf455d206 ("ARM: 8428/1: kgdb: Fix registers on sleeping tasks") but for arm64. Nuff said. ...well, perhaps I could also add that task_pt_regs are userspace registers and that's not what kgdb is supposed to be reporting. We're supposed to be reporting kernel registers. Signed-off-by: Douglas Anderson Signed-off-by: Will Deacon --- arch/arm64/kernel/kgdb.c | 21 ++++++++++++++++----- 1 file changed, 16 insertions(+), 5 deletions(-) (limited to 'arch') diff --git a/arch/arm64/kernel/kgdb.c b/arch/arm64/kernel/kgdb.c index 2122cd187f19..a20de58061a8 100644 --- a/arch/arm64/kernel/kgdb.c +++ b/arch/arm64/kernel/kgdb.c @@ -138,14 +138,25 @@ int dbg_set_reg(int regno, void *mem, struct pt_regs *regs) void sleeping_thread_to_gdb_regs(unsigned long *gdb_regs, struct task_struct *task) { - struct pt_regs *thread_regs; + struct cpu_context *cpu_context = &task->thread.cpu_context; /* Initialize to zero */ memset((char *)gdb_regs, 0, NUMREGBYTES); - thread_regs = task_pt_regs(task); - memcpy((void *)gdb_regs, (void *)thread_regs->regs, GP_REG_BYTES); - /* Special case for PSTATE (check comments in asm/kgdb.h for details) */ - dbg_get_reg(33, gdb_regs + GP_REG_BYTES, thread_regs); + + gdb_regs[19] = cpu_context->x19; + gdb_regs[20] = cpu_context->x20; + gdb_regs[21] = cpu_context->x21; + gdb_regs[22] = cpu_context->x22; + gdb_regs[23] = cpu_context->x23; + gdb_regs[24] = cpu_context->x24; + gdb_regs[25] = cpu_context->x25; + gdb_regs[26] = cpu_context->x26; + gdb_regs[27] = cpu_context->x27; + gdb_regs[28] = cpu_context->x28; + gdb_regs[29] = cpu_context->fp; + + gdb_regs[31] = cpu_context->sp; + gdb_regs[32] = cpu_context->pc; } void kgdb_arch_set_pc(struct pt_regs *regs, unsigned long pc) -- cgit v1.2.3-59-g8ed1b From e03e61c3173c1079058920210ab40c458a0e0899 Mon Sep 17 00:00:00 2001 From: Will Deacon Date: Tue, 27 Feb 2018 14:15:49 +0000 Subject: arm64: kaslr: Set TCR_EL1.NFD1 when CONFIG_RANDOMIZE_BASE=y TCR_EL1.NFD1 was allocated by SVE and ensures that fault-surpressing SVE memory accesses (e.g. speculative accesses from a first-fault gather load) which translate via TTBR1_EL1 result in a translation fault if they miss in the TLB when executed from EL0. This mitigates some timing attacks against KASLR, where the kernel address space could otherwise be probed efficiently using the FFR in conjunction with suppressed faults on SVE loads. Cc: Dave Martin Acked-by: Ard Biesheuvel Signed-off-by: Will Deacon --- arch/arm64/include/asm/pgtable-hwdef.h | 1 + arch/arm64/mm/proc.S | 9 ++++++++- 2 files changed, 9 insertions(+), 1 deletion(-) (limited to 'arch') diff --git a/arch/arm64/include/asm/pgtable-hwdef.h b/arch/arm64/include/asm/pgtable-hwdef.h index cdfe3e657a9e..fd208eac9f2a 100644 --- a/arch/arm64/include/asm/pgtable-hwdef.h +++ b/arch/arm64/include/asm/pgtable-hwdef.h @@ -291,6 +291,7 @@ #define TCR_TBI0 (UL(1) << 37) #define TCR_HA (UL(1) << 39) #define TCR_HD (UL(1) << 40) +#define TCR_NFD1 (UL(1) << 54) /* * TTBR. diff --git a/arch/arm64/mm/proc.S b/arch/arm64/mm/proc.S index c0af47617299..8f074d64b760 100644 --- a/arch/arm64/mm/proc.S +++ b/arch/arm64/mm/proc.S @@ -36,6 +36,12 @@ #define TCR_TG_FLAGS TCR_TG0_4K | TCR_TG1_4K #endif +#ifdef CONFIG_RANDOMIZE_BASE +#define TCR_KASLR_FLAGS TCR_NFD1 +#else +#define TCR_KASLR_FLAGS 0 +#endif + #define TCR_SMP_FLAGS TCR_SHARED /* PTWs cacheable, inner/outer WBWA */ @@ -432,7 +438,8 @@ ENTRY(__cpu_setup) * both user and kernel. */ ldr x10, =TCR_TxSZ(VA_BITS) | TCR_CACHE_FLAGS | TCR_SMP_FLAGS | \ - TCR_TG_FLAGS | TCR_ASID16 | TCR_TBI0 | TCR_A1 + TCR_TG_FLAGS | TCR_KASLR_FLAGS | TCR_ASID16 | \ + TCR_TBI0 | TCR_A1 tcr_set_idmap_t0sz x10, x9 /* -- cgit v1.2.3-59-g8ed1b From 5e8307b9c6f40526f290663e5a4de0f78bb0446a Mon Sep 17 00:00:00 2001 From: Ard Biesheuvel Date: Tue, 6 Mar 2018 17:15:31 +0000 Subject: arm64: module: don't BUG when exceeding preallocated PLT count When PLTs are emitted at relocation time, we really should not exceed the number that we counted when parsing the relocation tables, and so currently, we BUG() on this condition. However, even though this is a clear bug in this particular piece of code, we can easily recover by failing to load the module. So instead, return 0 from module_emit_plt_entry() if this condition occurs, which is not a valid kernel address, and can hence serve as a flag value that makes the relocation routine bail out. Signed-off-by: Ard Biesheuvel Signed-off-by: Will Deacon --- arch/arm64/kernel/module-plts.c | 3 ++- arch/arm64/kernel/module.c | 2 ++ 2 files changed, 4 insertions(+), 1 deletion(-) (limited to 'arch') diff --git a/arch/arm64/kernel/module-plts.c b/arch/arm64/kernel/module-plts.c index ea640f92fe5a..6bf07c602bd4 100644 --- a/arch/arm64/kernel/module-plts.c +++ b/arch/arm64/kernel/module-plts.c @@ -36,7 +36,8 @@ u64 module_emit_plt_entry(struct module *mod, void *loc, const Elf64_Rela *rela, return (u64)&plt[i - 1]; pltsec->plt_num_entries++; - BUG_ON(pltsec->plt_num_entries > pltsec->plt_max_entries); + if (WARN_ON(pltsec->plt_num_entries > pltsec->plt_max_entries)) + return 0; return (u64)&plt[i]; } diff --git a/arch/arm64/kernel/module.c b/arch/arm64/kernel/module.c index f469e0435903..c8c6c2828b79 100644 --- a/arch/arm64/kernel/module.c +++ b/arch/arm64/kernel/module.c @@ -386,6 +386,8 @@ int apply_relocate_add(Elf64_Shdr *sechdrs, if (IS_ENABLED(CONFIG_ARM64_MODULE_PLTS) && ovf == -ERANGE) { val = module_emit_plt_entry(me, loc, &rel[i], sym); + if (!val) + return -ENOEXEC; ovf = reloc_insn_imm(RELOC_OP_PREL, loc, val, 2, 26, AARCH64_INSN_IMM_26); } -- cgit v1.2.3-59-g8ed1b From f2b9ba871beb92fd6884b957acb14621b15fbe2b Mon Sep 17 00:00:00 2001 From: Ard Biesheuvel Date: Tue, 6 Mar 2018 17:15:32 +0000 Subject: arm64/kernel: kaslr: reduce module randomization range to 4 GB We currently have to rely on the GCC large code model for KASLR for two distinct but related reasons: - if we enable full randomization, modules will be loaded very far away from the core kernel, where they are out of range for ADRP instructions, - even without full randomization, the fact that the 128 MB module region is now no longer fully reserved for kernel modules means that there is a very low likelihood that the normal bottom-up allocation of other vmalloc regions may collide, and use up the range for other things. Large model code is suboptimal, given that each symbol reference involves a literal load that goes through the D-cache, reducing cache utilization. But more importantly, literals are not instructions but part of .text nonetheless, and hence mapped with executable permissions. So let's get rid of our dependency on the large model for KASLR, by: - reducing the full randomization range to 4 GB, thereby ensuring that ADRP references between modules and the kernel are always in range, - reduce the spillover range to 4 GB as well, so that we fallback to a region that is still guaranteed to be in range - move the randomization window of the core kernel to the middle of the VMALLOC space Note that KASAN always uses the module region outside of the vmalloc space, so keep the kernel close to that if KASAN is enabled. Signed-off-by: Ard Biesheuvel Signed-off-by: Will Deacon --- arch/arm64/Kconfig | 7 +++---- arch/arm64/kernel/kaslr.c | 20 ++++++++++++-------- arch/arm64/kernel/module.c | 7 ++++--- include/linux/sizes.h | 4 ++++ 4 files changed, 23 insertions(+), 15 deletions(-) (limited to 'arch') diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig index 655c0e99d9fa..b4234ddf6570 100644 --- a/arch/arm64/Kconfig +++ b/arch/arm64/Kconfig @@ -1110,7 +1110,6 @@ config ARM64_MODULE_CMODEL_LARGE config ARM64_MODULE_PLTS bool - select ARM64_MODULE_CMODEL_LARGE select HAVE_MOD_ARCH_SPECIFIC config RELOCATABLE @@ -1144,12 +1143,12 @@ config RANDOMIZE_BASE If unsure, say N. config RANDOMIZE_MODULE_REGION_FULL - bool "Randomize the module region independently from the core kernel" + bool "Randomize the module region over a 4 GB range" depends on RANDOMIZE_BASE default y help - Randomizes the location of the module region without considering the - location of the core kernel. This way, it is impossible for modules + Randomizes the location of the module region inside a 4 GB window + covering the core kernel. This way, it is less likely for modules to leak information about the location of core kernel data structures but it does imply that function calls between modules and the core kernel will need to be resolved via veneers in the module PLT. diff --git a/arch/arm64/kernel/kaslr.c b/arch/arm64/kernel/kaslr.c index e3d5cbe2167b..f0e6ab8abe9c 100644 --- a/arch/arm64/kernel/kaslr.c +++ b/arch/arm64/kernel/kaslr.c @@ -117,13 +117,15 @@ u64 __init kaslr_early_init(u64 dt_phys) /* * OK, so we are proceeding with KASLR enabled. Calculate a suitable * kernel image offset from the seed. Let's place the kernel in the - * lower half of the VMALLOC area (VA_BITS - 2). + * middle half of the VMALLOC area (VA_BITS - 2), and stay clear of + * the lower and upper quarters to avoid colliding with other + * allocations. * Even if we could randomize at page granularity for 16k and 64k pages, * let's always round to 2 MB so we don't interfere with the ability to * map using contiguous PTEs */ mask = ((1UL << (VA_BITS - 2)) - 1) & ~(SZ_2M - 1); - offset = seed & mask; + offset = BIT(VA_BITS - 3) + (seed & mask); /* use the top 16 bits to randomize the linear region */ memstart_offset_seed = seed >> 48; @@ -134,21 +136,23 @@ u64 __init kaslr_early_init(u64 dt_phys) * vmalloc region, since shadow memory is allocated for each * module at load time, whereas the vmalloc region is shadowed * by KASAN zero pages. So keep modules out of the vmalloc - * region if KASAN is enabled. + * region if KASAN is enabled, and put the kernel well within + * 4 GB of the module region. */ - return offset; + return offset % SZ_2G; if (IS_ENABLED(CONFIG_RANDOMIZE_MODULE_REGION_FULL)) { /* - * Randomize the module region independently from the core - * kernel. This prevents modules from leaking any information + * Randomize the module region over a 4 GB window covering the + * kernel. This reduces the risk of modules leaking information * about the address of the kernel itself, but results in * branches between modules and the core kernel that are * resolved via PLTs. (Branches between modules will be * resolved normally.) */ - module_range = VMALLOC_END - VMALLOC_START - MODULES_VSIZE; - module_alloc_base = VMALLOC_START; + module_range = SZ_4G - (u64)(_end - _stext); + module_alloc_base = max((u64)_end + offset - SZ_4G, + (u64)MODULES_VADDR); } else { /* * Randomize the module region by setting module_alloc_base to diff --git a/arch/arm64/kernel/module.c b/arch/arm64/kernel/module.c index c8c6c2828b79..70c3e5518e95 100644 --- a/arch/arm64/kernel/module.c +++ b/arch/arm64/kernel/module.c @@ -55,9 +55,10 @@ void *module_alloc(unsigned long size) * less likely that the module region gets exhausted, so we * can simply omit this fallback in that case. */ - p = __vmalloc_node_range(size, MODULE_ALIGN, VMALLOC_START, - VMALLOC_END, GFP_KERNEL, PAGE_KERNEL_EXEC, 0, - NUMA_NO_NODE, __builtin_return_address(0)); + p = __vmalloc_node_range(size, MODULE_ALIGN, module_alloc_base, + module_alloc_base + SZ_4G, GFP_KERNEL, + PAGE_KERNEL_EXEC, 0, NUMA_NO_NODE, + __builtin_return_address(0)); if (p && (kasan_module_alloc(p, size) < 0)) { vfree(p); diff --git a/include/linux/sizes.h b/include/linux/sizes.h index ce3e8150c174..fbde0bc7e882 100644 --- a/include/linux/sizes.h +++ b/include/linux/sizes.h @@ -8,6 +8,8 @@ #ifndef __LINUX_SIZES_H__ #define __LINUX_SIZES_H__ +#include + #define SZ_1 0x00000001 #define SZ_2 0x00000002 #define SZ_4 0x00000004 @@ -44,4 +46,6 @@ #define SZ_1G 0x40000000 #define SZ_2G 0x80000000 +#define SZ_4G _AC(0x100000000, ULL) + #endif /* __LINUX_SIZES_H__ */ -- cgit v1.2.3-59-g8ed1b From a257e02579e42703de1b7825cbd56cd7191f97b0 Mon Sep 17 00:00:00 2001 From: Ard Biesheuvel Date: Tue, 6 Mar 2018 17:15:33 +0000 Subject: arm64/kernel: don't ban ADRP to work around Cortex-A53 erratum #843419 Working around Cortex-A53 erratum #843419 involves special handling of ADRP instructions that end up in the last two instruction slots of a 4k page, or whose output register gets overwritten without having been read. (Note that the latter instruction sequence is never emitted by a properly functioning compiler, which is why it is disregarded by the handling of the same erratum in the bfd.ld linker which we rely on for the core kernel) Normally, this gets taken care of by the linker, which can spot such sequences at final link time, and insert a veneer if the ADRP ends up at a vulnerable offset. However, linux kernel modules are partially linked ELF objects, and so there is no 'final link time' other than the runtime loading of the module, at which time all the static relocations are resolved. For this reason, we have implemented the #843419 workaround for modules by avoiding ADRP instructions altogether, by using the large C model, and by passing -mpc-relative-literal-loads to recent versions of GCC that may emit adrp/ldr pairs to perform literal loads. However, this workaround forces us to keep literal data mixed with the instructions in the executable .text segment, and literal data may inadvertently turn into an exploitable speculative gadget depending on the relative offsets of arbitrary symbols. So let's reimplement this workaround in a way that allows us to switch back to the small C model, and to drop the -mpc-relative-literal-loads GCC switch, by patching affected ADRP instructions at runtime: - ADRP instructions that do not appear at 4k relative offset 0xff8 or 0xffc are ignored - ADRP instructions that are within 1 MB of their target symbol are converted into ADR instructions - remaining ADRP instructions are redirected via a veneer that performs the load using an unaffected movn/movk sequence. Signed-off-by: Ard Biesheuvel [will: tidied up ADRP -> ADR instruction patching.] [will: use ULL suffix for 64-bit immediate] Signed-off-by: Will Deacon --- arch/arm64/Kconfig | 11 ++--- arch/arm64/Makefile | 5 --- arch/arm64/include/asm/module.h | 2 + arch/arm64/kernel/module-plts.c | 86 +++++++++++++++++++++++++++++++++++-- arch/arm64/kernel/module.c | 34 +++++++++++++-- arch/arm64/kernel/reloc_test_core.c | 4 +- arch/arm64/kernel/reloc_test_syms.S | 12 ++++-- 7 files changed, 130 insertions(+), 24 deletions(-) (limited to 'arch') diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig index b4234ddf6570..0f350b80aa84 100644 --- a/arch/arm64/Kconfig +++ b/arch/arm64/Kconfig @@ -456,12 +456,12 @@ config ARM64_ERRATUM_845719 config ARM64_ERRATUM_843419 bool "Cortex-A53: 843419: A load or store might access an incorrect address" default y - select ARM64_MODULE_CMODEL_LARGE if MODULES + select ARM64_MODULE_PLTS if MODULES help This option links the kernel with '--fix-cortex-a53-843419' and - builds modules using the large memory model in order to avoid the use - of the ADRP instruction, which can cause a subsequent memory access - to use an incorrect address on Cortex-A53 parts up to r0p4. + enables PLT support to replace certain ADRP instructions, which can + cause subsequent memory accesses to use an incorrect address on + Cortex-A53 parts up to r0p4. If unsure, say Y. @@ -1105,9 +1105,6 @@ config ARM64_SVE To enable use of this extension on CPUs that implement it, say Y. -config ARM64_MODULE_CMODEL_LARGE - bool - config ARM64_MODULE_PLTS bool select HAVE_MOD_ARCH_SPECIFIC diff --git a/arch/arm64/Makefile b/arch/arm64/Makefile index 4bb18aee4846..15402861bb59 100644 --- a/arch/arm64/Makefile +++ b/arch/arm64/Makefile @@ -51,7 +51,6 @@ endif KBUILD_CFLAGS += -mgeneral-regs-only $(lseinstr) $(brokengasinst) KBUILD_CFLAGS += -fno-asynchronous-unwind-tables -KBUILD_CFLAGS += $(call cc-option, -mpc-relative-literal-loads) KBUILD_AFLAGS += $(lseinstr) $(brokengasinst) KBUILD_CFLAGS += $(call cc-option,-mabi=lp64) @@ -77,10 +76,6 @@ endif CHECKFLAGS += -D__aarch64__ -m64 -ifeq ($(CONFIG_ARM64_MODULE_CMODEL_LARGE), y) -KBUILD_CFLAGS_MODULE += -mcmodel=large -endif - ifeq ($(CONFIG_ARM64_MODULE_PLTS),y) KBUILD_LDFLAGS_MODULE += -T $(srctree)/arch/arm64/kernel/module.lds endif diff --git a/arch/arm64/include/asm/module.h b/arch/arm64/include/asm/module.h index 4f766178fa6f..b6dbbe3123a9 100644 --- a/arch/arm64/include/asm/module.h +++ b/arch/arm64/include/asm/module.h @@ -39,6 +39,8 @@ struct mod_arch_specific { u64 module_emit_plt_entry(struct module *mod, void *loc, const Elf64_Rela *rela, Elf64_Sym *sym); +u64 module_emit_adrp_veneer(struct module *mod, void *loc, u64 val); + #ifdef CONFIG_RANDOMIZE_BASE extern u64 module_alloc_base; #else diff --git a/arch/arm64/kernel/module-plts.c b/arch/arm64/kernel/module-plts.c index 6bf07c602bd4..271b77390de0 100644 --- a/arch/arm64/kernel/module-plts.c +++ b/arch/arm64/kernel/module-plts.c @@ -42,6 +42,47 @@ u64 module_emit_plt_entry(struct module *mod, void *loc, const Elf64_Rela *rela, return (u64)&plt[i]; } +#ifdef CONFIG_ARM64_ERRATUM_843419 +u64 module_emit_adrp_veneer(struct module *mod, void *loc, u64 val) +{ + struct mod_plt_sec *pltsec = !in_init(mod, loc) ? &mod->arch.core : + &mod->arch.init; + struct plt_entry *plt = (struct plt_entry *)pltsec->plt->sh_addr; + int i = pltsec->plt_num_entries++; + u32 mov0, mov1, mov2, br; + int rd; + + if (WARN_ON(pltsec->plt_num_entries > pltsec->plt_max_entries)) + return 0; + + /* get the destination register of the ADRP instruction */ + rd = aarch64_insn_decode_register(AARCH64_INSN_REGTYPE_RD, + le32_to_cpup((__le32 *)loc)); + + /* generate the veneer instructions */ + mov0 = aarch64_insn_gen_movewide(rd, (u16)~val, 0, + AARCH64_INSN_VARIANT_64BIT, + AARCH64_INSN_MOVEWIDE_INVERSE); + mov1 = aarch64_insn_gen_movewide(rd, (u16)(val >> 16), 16, + AARCH64_INSN_VARIANT_64BIT, + AARCH64_INSN_MOVEWIDE_KEEP); + mov2 = aarch64_insn_gen_movewide(rd, (u16)(val >> 32), 32, + AARCH64_INSN_VARIANT_64BIT, + AARCH64_INSN_MOVEWIDE_KEEP); + br = aarch64_insn_gen_branch_imm((u64)&plt[i].br, (u64)loc + 4, + AARCH64_INSN_BRANCH_NOLINK); + + plt[i] = (struct plt_entry){ + cpu_to_le32(mov0), + cpu_to_le32(mov1), + cpu_to_le32(mov2), + cpu_to_le32(br) + }; + + return (u64)&plt[i]; +} +#endif + #define cmp_3way(a,b) ((a) < (b) ? -1 : (a) > (b)) static int cmp_rela(const void *a, const void *b) @@ -69,16 +110,21 @@ static bool duplicate_rel(const Elf64_Rela *rela, int num) } static unsigned int count_plts(Elf64_Sym *syms, Elf64_Rela *rela, int num, - Elf64_Word dstidx) + Elf64_Word dstidx, Elf_Shdr *dstsec) { unsigned int ret = 0; Elf64_Sym *s; int i; for (i = 0; i < num; i++) { + u64 min_align; + switch (ELF64_R_TYPE(rela[i].r_info)) { case R_AARCH64_JUMP26: case R_AARCH64_CALL26: + if (!IS_ENABLED(CONFIG_RANDOMIZE_BASE)) + break; + /* * We only have to consider branch targets that resolve * to symbols that are defined in a different section. @@ -110,6 +156,40 @@ static unsigned int count_plts(Elf64_Sym *syms, Elf64_Rela *rela, int num, if (rela[i].r_addend != 0 || !duplicate_rel(rela, i)) ret++; break; + case R_AARCH64_ADR_PREL_PG_HI21_NC: + case R_AARCH64_ADR_PREL_PG_HI21: + if (!IS_ENABLED(CONFIG_ARM64_ERRATUM_843419)) + break; + + /* + * Determine the minimal safe alignment for this ADRP + * instruction: the section alignment at which it is + * guaranteed not to appear at a vulnerable offset. + * + * This comes down to finding the least significant zero + * bit in bits [11:3] of the section offset, and + * increasing the section's alignment so that the + * resulting address of this instruction is guaranteed + * to equal the offset in that particular bit (as well + * as all less signficant bits). This ensures that the + * address modulo 4 KB != 0xfff8 or 0xfffc (which would + * have all ones in bits [11:3]) + */ + min_align = 2ULL << ffz(rela[i].r_offset | 0x7); + + /* + * Allocate veneer space for each ADRP that may appear + * at a vulnerable offset nonetheless. At relocation + * time, some of these will remain unused since some + * ADRP instructions can be patched to ADR instructions + * instead. + */ + if (min_align > SZ_4K) + ret++; + else + dstsec->sh_addralign = max(dstsec->sh_addralign, + min_align); + break; } } return ret; @@ -167,10 +247,10 @@ int module_frob_arch_sections(Elf_Ehdr *ehdr, Elf_Shdr *sechdrs, if (strncmp(secstrings + dstsec->sh_name, ".init", 5) != 0) core_plts += count_plts(syms, rels, numrels, - sechdrs[i].sh_info); + sechdrs[i].sh_info, dstsec); else init_plts += count_plts(syms, rels, numrels, - sechdrs[i].sh_info); + sechdrs[i].sh_info, dstsec); } mod->arch.core.plt->sh_type = SHT_NOBITS; diff --git a/arch/arm64/kernel/module.c b/arch/arm64/kernel/module.c index 70c3e5518e95..f9d824947c34 100644 --- a/arch/arm64/kernel/module.c +++ b/arch/arm64/kernel/module.c @@ -198,6 +198,33 @@ static int reloc_insn_imm(enum aarch64_reloc_op op, __le32 *place, u64 val, return 0; } +static int reloc_insn_adrp(struct module *mod, __le32 *place, u64 val) +{ + u32 insn; + + if (!IS_ENABLED(CONFIG_ARM64_ERRATUM_843419) || + ((u64)place & 0xfff) < 0xff8) + return reloc_insn_imm(RELOC_OP_PAGE, place, val, 12, 21, + AARCH64_INSN_IMM_ADR); + + /* patch ADRP to ADR if it is in range */ + if (!reloc_insn_imm(RELOC_OP_PREL, place, val & ~0xfff, 0, 21, + AARCH64_INSN_IMM_ADR)) { + insn = le32_to_cpu(*place); + insn &= ~BIT(31); + } else { + /* out of range for ADR -> emit a veneer */ + val = module_emit_adrp_veneer(mod, place, val & ~0xfff); + if (!val) + return -ENOEXEC; + insn = aarch64_insn_gen_branch_imm((u64)place, val, + AARCH64_INSN_BRANCH_NOLINK); + } + + *place = cpu_to_le32(insn); + return 0; +} + int apply_relocate_add(Elf64_Shdr *sechdrs, const char *strtab, unsigned int symindex, @@ -337,14 +364,13 @@ int apply_relocate_add(Elf64_Shdr *sechdrs, ovf = reloc_insn_imm(RELOC_OP_PREL, loc, val, 0, 21, AARCH64_INSN_IMM_ADR); break; -#ifndef CONFIG_ARM64_ERRATUM_843419 case R_AARCH64_ADR_PREL_PG_HI21_NC: overflow_check = false; case R_AARCH64_ADR_PREL_PG_HI21: - ovf = reloc_insn_imm(RELOC_OP_PAGE, loc, val, 12, 21, - AARCH64_INSN_IMM_ADR); + ovf = reloc_insn_adrp(me, loc, val); + if (ovf && ovf != -ERANGE) + return ovf; break; -#endif case R_AARCH64_ADD_ABS_LO12_NC: case R_AARCH64_LDST8_ABS_LO12_NC: overflow_check = false; diff --git a/arch/arm64/kernel/reloc_test_core.c b/arch/arm64/kernel/reloc_test_core.c index c124752a8bd3..a70489c584c7 100644 --- a/arch/arm64/kernel/reloc_test_core.c +++ b/arch/arm64/kernel/reloc_test_core.c @@ -28,6 +28,7 @@ asmlinkage u64 absolute_data16(void); asmlinkage u64 signed_movw(void); asmlinkage u64 unsigned_movw(void); asmlinkage u64 relative_adrp(void); +asmlinkage u64 relative_adrp_far(void); asmlinkage u64 relative_adr(void); asmlinkage u64 relative_data64(void); asmlinkage u64 relative_data32(void); @@ -43,9 +44,8 @@ static struct { { "R_AARCH64_ABS16", absolute_data16, UL(SYM16_ABS_VAL) }, { "R_AARCH64_MOVW_SABS_Gn", signed_movw, UL(SYM64_ABS_VAL) }, { "R_AARCH64_MOVW_UABS_Gn", unsigned_movw, UL(SYM64_ABS_VAL) }, -#ifndef CONFIG_ARM64_ERRATUM_843419 { "R_AARCH64_ADR_PREL_PG_HI21", relative_adrp, (u64)&sym64_rel }, -#endif + { "R_AARCH64_ADR_PREL_PG_HI21", relative_adrp_far, (u64)&printk }, { "R_AARCH64_ADR_PREL_LO21", relative_adr, (u64)&sym64_rel }, { "R_AARCH64_PREL64", relative_data64, (u64)&sym64_rel }, { "R_AARCH64_PREL32", relative_data32, (u64)&sym64_rel }, diff --git a/arch/arm64/kernel/reloc_test_syms.S b/arch/arm64/kernel/reloc_test_syms.S index e1edcefeb02d..f333b4b7880d 100644 --- a/arch/arm64/kernel/reloc_test_syms.S +++ b/arch/arm64/kernel/reloc_test_syms.S @@ -43,15 +43,21 @@ ENTRY(unsigned_movw) ret ENDPROC(unsigned_movw) -#ifndef CONFIG_ARM64_ERRATUM_843419 - + .align 12 + .space 0xff8 ENTRY(relative_adrp) adrp x0, sym64_rel add x0, x0, #:lo12:sym64_rel ret ENDPROC(relative_adrp) -#endif + .align 12 + .space 0xffc +ENTRY(relative_adrp_far) + adrp x0, printk + add x0, x0, #:lo12:printk + ret +ENDPROC(relative_adrp_far) ENTRY(relative_adr) adr x0, sym64_rel -- cgit v1.2.3-59-g8ed1b From e8002e02abf052c07bb87b867789034bc79aac10 Mon Sep 17 00:00:00 2001 From: Ard Biesheuvel Date: Tue, 6 Mar 2018 17:15:34 +0000 Subject: arm64/errata: add REVIDR handling to framework In some cases, core variants that are affected by a certain erratum also exist in versions that have the erratum fixed, and this fact is recorded in a dedicated bit in system register REVIDR_EL1. Since the architecture does not require that a certain bit retains its meaning across different variants of the same model, each such REVIDR bit is tightly coupled to a certain revision/variant value, and so we need a list of revidr_mask/midr pairs to carry this information. So add the struct member and the associated macros and handling to allow REVIDR fixes to be taken into account. Signed-off-by: Ard Biesheuvel Signed-off-by: Will Deacon --- arch/arm64/include/asm/cpufeature.h | 4 ++++ arch/arm64/kernel/cpu_errata.c | 21 ++++++++++++++++++--- 2 files changed, 22 insertions(+), 3 deletions(-) (limited to 'arch') diff --git a/arch/arm64/include/asm/cpufeature.h b/arch/arm64/include/asm/cpufeature.h index 060e3a4008ab..fbf0aab94d67 100644 --- a/arch/arm64/include/asm/cpufeature.h +++ b/arch/arm64/include/asm/cpufeature.h @@ -105,6 +105,10 @@ struct arm64_cpu_capabilities { struct { /* To be used for erratum handling only */ u32 midr_model; u32 midr_range_min, midr_range_max; + const struct arm64_midr_revidr { + u32 midr_rv; /* revision/variant */ + u32 revidr_mask; + } * const fixed_revs; }; struct { /* Feature register checking */ diff --git a/arch/arm64/kernel/cpu_errata.c b/arch/arm64/kernel/cpu_errata.c index 52f15cd896e1..b161abdd6e27 100644 --- a/arch/arm64/kernel/cpu_errata.c +++ b/arch/arm64/kernel/cpu_errata.c @@ -24,10 +24,22 @@ static bool __maybe_unused is_affected_midr_range(const struct arm64_cpu_capabilities *entry, int scope) { + const struct arm64_midr_revidr *fix; + u32 midr = read_cpuid_id(), revidr; + WARN_ON(scope != SCOPE_LOCAL_CPU || preemptible()); - return MIDR_IS_CPU_MODEL_RANGE(read_cpuid_id(), entry->midr_model, - entry->midr_range_min, - entry->midr_range_max); + if (!MIDR_IS_CPU_MODEL_RANGE(midr, entry->midr_model, + entry->midr_range_min, + entry->midr_range_max)) + return false; + + midr &= MIDR_REVISION_MASK | MIDR_VARIANT_MASK; + revidr = read_cpuid(REVIDR_EL1); + for (fix = entry->fixed_revs; fix && fix->revidr_mask; fix++) + if (midr == fix->midr_rv && (revidr & fix->revidr_mask)) + return false; + + return true; } static bool __maybe_unused @@ -242,6 +254,9 @@ static int qcom_enable_link_stack_sanitization(void *data) .midr_range_min = 0, \ .midr_range_max = (MIDR_VARIANT_MASK | MIDR_REVISION_MASK) +#define MIDR_FIXED(rev, revidr_mask) \ + .fixed_revs = (struct arm64_midr_revidr[]){{ (rev), (revidr_mask) }, {}} + const struct arm64_cpu_capabilities arm64_errata[] = { #if defined(CONFIG_ARM64_ERRATUM_826319) || \ defined(CONFIG_ARM64_ERRATUM_827319) || \ -- cgit v1.2.3-59-g8ed1b From ca79acca273630935f2cfdfdf3fc7425ff51ce1c Mon Sep 17 00:00:00 2001 From: Ard Biesheuvel Date: Tue, 6 Mar 2018 17:15:35 +0000 Subject: arm64/kernel: enable A53 erratum #8434319 handling at runtime Omit patching of ADRP instruction at module load time if the current CPUs are not susceptible to the erratum. Signed-off-by: Ard Biesheuvel [will: Drop duplicate initialisation of .def_scope field] Signed-off-by: Will Deacon --- arch/arm64/include/asm/cpucaps.h | 3 ++- arch/arm64/kernel/cpu_errata.c | 9 +++++++++ arch/arm64/kernel/module-plts.c | 3 ++- arch/arm64/kernel/module.c | 1 + 4 files changed, 14 insertions(+), 2 deletions(-) (limited to 'arch') diff --git a/arch/arm64/include/asm/cpucaps.h b/arch/arm64/include/asm/cpucaps.h index bb263820de13..39134c46bb13 100644 --- a/arch/arm64/include/asm/cpucaps.h +++ b/arch/arm64/include/asm/cpucaps.h @@ -45,7 +45,8 @@ #define ARM64_HARDEN_BRANCH_PREDICTOR 24 #define ARM64_HARDEN_BP_POST_GUEST_EXIT 25 #define ARM64_HAS_RAS_EXTN 26 +#define ARM64_WORKAROUND_843419 27 -#define ARM64_NCAPS 27 +#define ARM64_NCAPS 28 #endif /* __ASM_CPUCAPS_H */ diff --git a/arch/arm64/kernel/cpu_errata.c b/arch/arm64/kernel/cpu_errata.c index b161abdd6e27..186c0fc61dcd 100644 --- a/arch/arm64/kernel/cpu_errata.c +++ b/arch/arm64/kernel/cpu_errata.c @@ -298,6 +298,15 @@ const struct arm64_cpu_capabilities arm64_errata[] = { MIDR_CPU_VAR_REV(1, 2)), }, #endif +#ifdef CONFIG_ARM64_ERRATUM_843419 + { + /* Cortex-A53 r0p[01234] */ + .desc = "ARM erratum 843419", + .capability = ARM64_WORKAROUND_843419, + MIDR_RANGE(MIDR_CORTEX_A53, 0x00, 0x04), + MIDR_FIXED(0x4, BIT(8)), + }, +#endif #ifdef CONFIG_ARM64_ERRATUM_845719 { /* Cortex-A53 r0p[01234] */ diff --git a/arch/arm64/kernel/module-plts.c b/arch/arm64/kernel/module-plts.c index 271b77390de0..fa3637284a3d 100644 --- a/arch/arm64/kernel/module-plts.c +++ b/arch/arm64/kernel/module-plts.c @@ -158,7 +158,8 @@ static unsigned int count_plts(Elf64_Sym *syms, Elf64_Rela *rela, int num, break; case R_AARCH64_ADR_PREL_PG_HI21_NC: case R_AARCH64_ADR_PREL_PG_HI21: - if (!IS_ENABLED(CONFIG_ARM64_ERRATUM_843419)) + if (!IS_ENABLED(CONFIG_ARM64_ERRATUM_843419) || + !cpus_have_const_cap(ARM64_WORKAROUND_843419)) break; /* diff --git a/arch/arm64/kernel/module.c b/arch/arm64/kernel/module.c index f9d824947c34..719fde8dcc19 100644 --- a/arch/arm64/kernel/module.c +++ b/arch/arm64/kernel/module.c @@ -203,6 +203,7 @@ static int reloc_insn_adrp(struct module *mod, __le32 *place, u64 val) u32 insn; if (!IS_ENABLED(CONFIG_ARM64_ERRATUM_843419) || + !cpus_have_const_cap(ARM64_WORKAROUND_843419) || ((u64)place & 0xfff) < 0xff8) return reloc_insn_imm(RELOC_OP_PAGE, place, val, 12, 21, AARCH64_INSN_IMM_ADR); -- cgit v1.2.3-59-g8ed1b From 6ae4b6e0578886eb36cedbf99f04031d93f9e315 Mon Sep 17 00:00:00 2001 From: Shanker Donthineni Date: Wed, 7 Mar 2018 09:00:08 -0600 Subject: arm64: Add support for new control bits CTR_EL0.DIC and CTR_EL0.IDC The DCache clean & ICache invalidation requirements for instructions to be data coherence are discoverable through new fields in CTR_EL0. The following two control bits DIC and IDC were defined for this purpose. No need to perform point of unification cache maintenance operations from software on systems where CPU caches are transparent. This patch optimize the three functions __flush_cache_user_range(), clean_dcache_area_pou() and invalidate_icache_range() if the hardware reports CTR_EL0.IDC and/or CTR_EL0.IDC. Basically it skips the two instructions 'DC CVAU' and 'IC IVAU', and the associated loop logic in order to avoid the unnecessary overhead. CTR_EL0.DIC: Instruction cache invalidation requirements for instruction to data coherence. The meaning of this bit[29]. 0: Instruction cache invalidation to the point of unification is required for instruction to data coherence. 1: Instruction cache cleaning to the point of unification is not required for instruction to data coherence. CTR_EL0.IDC: Data cache clean requirements for instruction to data coherence. The meaning of this bit[28]. 0: Data cache clean to the point of unification is required for instruction to data coherence, unless CLIDR_EL1.LoC == 0b000 or (CLIDR_EL1.LoUIS == 0b000 && CLIDR_EL1.LoUU == 0b000). 1: Data cache clean to the point of unification is not required for instruction to data coherence. Co-authored-by: Philip Elcan Reviewed-by: Mark Rutland Signed-off-by: Shanker Donthineni Signed-off-by: Will Deacon --- arch/arm64/include/asm/cache.h | 4 ++++ arch/arm64/include/asm/cacheflush.h | 3 +++ arch/arm64/include/asm/cpucaps.h | 4 +++- arch/arm64/kernel/cpufeature.c | 36 ++++++++++++++++++++++++++++++------ arch/arm64/mm/cache.S | 21 ++++++++++++++++++++- 5 files changed, 60 insertions(+), 8 deletions(-) (limited to 'arch') diff --git a/arch/arm64/include/asm/cache.h b/arch/arm64/include/asm/cache.h index b2e6ece23713..5df5cfe1c143 100644 --- a/arch/arm64/include/asm/cache.h +++ b/arch/arm64/include/asm/cache.h @@ -20,8 +20,12 @@ #define CTR_L1IP_SHIFT 14 #define CTR_L1IP_MASK 3 +#define CTR_DMINLINE_SHIFT 16 +#define CTR_ERG_SHIFT 20 #define CTR_CWG_SHIFT 24 #define CTR_CWG_MASK 15 +#define CTR_IDC_SHIFT 28 +#define CTR_DIC_SHIFT 29 #define CTR_L1IP(ctr) (((ctr) >> CTR_L1IP_SHIFT) & CTR_L1IP_MASK) diff --git a/arch/arm64/include/asm/cacheflush.h b/arch/arm64/include/asm/cacheflush.h index bef9f418f089..7dfcec4700fe 100644 --- a/arch/arm64/include/asm/cacheflush.h +++ b/arch/arm64/include/asm/cacheflush.h @@ -133,6 +133,9 @@ extern void flush_dcache_page(struct page *); static inline void __flush_icache_all(void) { + if (cpus_have_const_cap(ARM64_HAS_CACHE_DIC)) + return; + asm("ic ialluis"); dsb(ish); } diff --git a/arch/arm64/include/asm/cpucaps.h b/arch/arm64/include/asm/cpucaps.h index 39134c46bb13..ff9fb3aba17b 100644 --- a/arch/arm64/include/asm/cpucaps.h +++ b/arch/arm64/include/asm/cpucaps.h @@ -46,7 +46,9 @@ #define ARM64_HARDEN_BP_POST_GUEST_EXIT 25 #define ARM64_HAS_RAS_EXTN 26 #define ARM64_WORKAROUND_843419 27 +#define ARM64_HAS_CACHE_IDC 28 +#define ARM64_HAS_CACHE_DIC 29 -#define ARM64_NCAPS 28 +#define ARM64_NCAPS 30 #endif /* __ASM_CPUCAPS_H */ diff --git a/arch/arm64/kernel/cpufeature.c b/arch/arm64/kernel/cpufeature.c index f96b3449034b..bdab55223866 100644 --- a/arch/arm64/kernel/cpufeature.c +++ b/arch/arm64/kernel/cpufeature.c @@ -199,12 +199,12 @@ static const struct arm64_ftr_bits ftr_id_aa64mmfr2[] = { }; static const struct arm64_ftr_bits ftr_ctr[] = { - ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_EXACT, 31, 1, 1), /* RES1 */ - ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_LOWER_SAFE, 29, 1, 1), /* DIC */ - ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_LOWER_SAFE, 28, 1, 1), /* IDC */ - ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_HIGHER_SAFE, 24, 4, 0), /* CWG */ - ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_HIGHER_SAFE, 20, 4, 0), /* ERG */ - ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_LOWER_SAFE, 16, 4, 1), /* DminLine */ + ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_EXACT, 31, 1, 1), /* RES1 */ + ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_LOWER_SAFE, CTR_DIC_SHIFT, 1, 1), + ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_LOWER_SAFE, CTR_IDC_SHIFT, 1, 1), + ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_HIGHER_SAFE, CTR_CWG_SHIFT, 4, 0), + ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_HIGHER_SAFE, CTR_ERG_SHIFT, 4, 0), + ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_LOWER_SAFE, CTR_DMINLINE_SHIFT, 4, 1), /* * Linux can handle differing I-cache policies. Userspace JITs will * make use of *minLine. @@ -852,6 +852,18 @@ static bool has_no_fpsimd(const struct arm64_cpu_capabilities *entry, int __unus ID_AA64PFR0_FP_SHIFT) < 0; } +static bool has_cache_idc(const struct arm64_cpu_capabilities *entry, + int __unused) +{ + return read_sanitised_ftr_reg(SYS_CTR_EL0) & BIT(CTR_IDC_SHIFT); +} + +static bool has_cache_dic(const struct arm64_cpu_capabilities *entry, + int __unused) +{ + return read_sanitised_ftr_reg(SYS_CTR_EL0) & BIT(CTR_DIC_SHIFT); +} + #ifdef CONFIG_UNMAP_KERNEL_AT_EL0 static int __kpti_forced; /* 0: not forced, >0: forced on, <0: forced off */ @@ -1088,6 +1100,18 @@ static const struct arm64_cpu_capabilities arm64_features[] = { .enable = cpu_clear_disr, }, #endif /* CONFIG_ARM64_RAS_EXTN */ + { + .desc = "Data cache clean to the PoU not required for I/D coherence", + .capability = ARM64_HAS_CACHE_IDC, + .def_scope = SCOPE_SYSTEM, + .matches = has_cache_idc, + }, + { + .desc = "Instruction cache invalidation not required for I/D coherence", + .capability = ARM64_HAS_CACHE_DIC, + .def_scope = SCOPE_SYSTEM, + .matches = has_cache_dic, + }, {}, }; diff --git a/arch/arm64/mm/cache.S b/arch/arm64/mm/cache.S index 758bde7e2fa6..30334d81b021 100644 --- a/arch/arm64/mm/cache.S +++ b/arch/arm64/mm/cache.S @@ -50,6 +50,10 @@ ENTRY(flush_icache_range) */ ENTRY(__flush_cache_user_range) uaccess_ttbr0_enable x2, x3, x4 +alternative_if ARM64_HAS_CACHE_IDC + dsb ishst + b 7f +alternative_else_nop_endif dcache_line_size x2, x3 sub x3, x2, #1 bic x4, x0, x3 @@ -60,8 +64,13 @@ user_alt 9f, "dc cvau, x4", "dc civac, x4", ARM64_WORKAROUND_CLEAN_CACHE b.lo 1b dsb ish +7: +alternative_if ARM64_HAS_CACHE_DIC + isb + b 8f +alternative_else_nop_endif invalidate_icache_by_line x0, x1, x2, x3, 9f - mov x0, #0 +8: mov x0, #0 1: uaccess_ttbr0_disable x1, x2 ret @@ -80,6 +89,12 @@ ENDPROC(__flush_cache_user_range) * - end - virtual end address of region */ ENTRY(invalidate_icache_range) +alternative_if ARM64_HAS_CACHE_DIC + mov x0, xzr + isb + ret +alternative_else_nop_endif + uaccess_ttbr0_enable x2, x3, x4 invalidate_icache_by_line x0, x1, x2, x3, 2f @@ -116,6 +131,10 @@ ENDPIPROC(__flush_dcache_area) * - size - size in question */ ENTRY(__clean_dcache_area_pou) +alternative_if ARM64_HAS_CACHE_IDC + dsb ishst + ret +alternative_else_nop_endif dcache_by_line_op cvau, ish, x0, x1, x2, x3 ret ENDPROC(__clean_dcache_area_pou) -- cgit v1.2.3-59-g8ed1b From af40ff687bc9d351030685fde2f57ba45ab4fc14 Mon Sep 17 00:00:00 2001 From: Dave Martin Date: Thu, 8 Mar 2018 17:41:05 +0000 Subject: arm64: signal: Ensure si_code is valid for all fault signals Currently, as reported by Eric, an invalid si_code value 0 is passed in many signals delivered to userspace in response to faults and other kernel errors. Typically 0 is passed when the fault is insufficiently diagnosable or when there does not appear to be any sensible alternative value to choose. This appears to violate POSIX, and is intuitively wrong for at least two reasons arising from the fact that 0 == SI_USER: 1) si_code is a union selector, and SI_USER (and si_code <= 0 in general) implies the existence of a different set of fields (siginfo._kill) from that which exists for a fault signal (siginfo._sigfault). However, the code raising the signal typically writes only the _sigfault fields, and the _kill fields make no sense in this case. Thus when userspace sees si_code == 0 (SI_USER) it may legitimately inspect fields in the inactive union member _kill and obtain garbage as a result. There appears to be software in the wild relying on this, albeit generally only for printing diagnostic messages. 2) Software that wants to be robust against spurious signals may discard signals where si_code == SI_USER (or <= 0), or may filter such signals based on the si_uid and si_pid fields of siginfo._sigkill. In the case of fault signals, this means that important (and usually fatal) error conditions may be silently ignored. In practice, many of the faults for which arm64 passes si_code == 0 are undiagnosable conditions such as exceptions with syndrome values in ESR_ELx to which the architecture does not yet assign any meaning, or conditions indicative of a bug or error in the kernel or system and thus that are unrecoverable and should never occur in normal operation. The approach taken in this patch is to translate all such undiagnosable or "impossible" synchronous fault conditions to SIGKILL, since these are at least probably localisable to a single process. Some of these conditions should really result in a kernel panic, but due to the lack of diagnostic information it is difficult to be certain: this patch does not add any calls to panic(), but this could change later if justified. Although si_code will not reach userspace in the case of SIGKILL, it is still desirable to pass a nonzero value so that the common siginfo handling code can detect incorrect use of si_code == 0 without false positives. In this case the si_code dependent siginfo fields will not be correctly initialised, but since they are not passed to userspace I deem this not to matter. A few faults can reasonably occur in realistic userspace scenarios, and _should_ raise a regular, handleable (but perhaps not ignorable/blockable) signal: for these, this patch attempts to choose a suitable standard si_code value for the raised signal in each case instead of 0. arm64 was the only arch to define a BUS_FIXME code, so after this patch nobody defines it. This patch therefore also removes the relevant code from siginfo_layout(). Cc: James Morse Reported-by: Eric W. Biederman Signed-off-by: Dave Martin Signed-off-by: Will Deacon --- arch/arm64/include/uapi/asm/siginfo.h | 14 ---- arch/arm64/kernel/fpsimd.c | 2 +- arch/arm64/mm/fault.c | 116 +++++++++++++++++----------------- kernel/signal.c | 4 -- 4 files changed, 59 insertions(+), 77 deletions(-) (limited to 'arch') diff --git a/arch/arm64/include/uapi/asm/siginfo.h b/arch/arm64/include/uapi/asm/siginfo.h index 9b4d91277742..8d7dbbcce780 100644 --- a/arch/arm64/include/uapi/asm/siginfo.h +++ b/arch/arm64/include/uapi/asm/siginfo.h @@ -28,18 +28,4 @@ #define FPE_FIXME 0 /* Broken dup of SI_USER */ #endif /* __KERNEL__ */ -/* - * SIGBUS si_codes - */ -#ifdef __KERNEL__ -#define BUS_FIXME 0 /* Broken dup of SI_USER */ -#endif /* __KERNEL__ */ - -/* - * SIGTRAP si_codes - */ -#ifdef __KERNEL__ -#define TRAP_FIXME 0 /* Broken dup of SI_USER */ -#endif /* __KERNEL__ */ - #endif diff --git a/arch/arm64/kernel/fpsimd.c b/arch/arm64/kernel/fpsimd.c index 6964ff867d4a..65fc87645ec6 100644 --- a/arch/arm64/kernel/fpsimd.c +++ b/arch/arm64/kernel/fpsimd.c @@ -285,7 +285,7 @@ static void task_fpsimd_save(void) * re-enter user with corrupt state. * There's no way to recover, so kill it: */ - force_signal_inject(SIGKILL, 0, 0); + force_signal_inject(SIGKILL, SI_KERNEL, 0); return; } diff --git a/arch/arm64/mm/fault.c b/arch/arm64/mm/fault.c index 49dfb08a6c4d..551d044fb31f 100644 --- a/arch/arm64/mm/fault.c +++ b/arch/arm64/mm/fault.c @@ -583,9 +583,9 @@ static int do_sea(unsigned long addr, unsigned int esr, struct pt_regs *regs) nmi_exit(); } - info.si_signo = SIGBUS; + info.si_signo = inf->sig; info.si_errno = 0; - info.si_code = BUS_FIXME; + info.si_code = inf->code; if (esr & ESR_ELx_FnV) info.si_addr = NULL; else @@ -596,70 +596,70 @@ static int do_sea(unsigned long addr, unsigned int esr, struct pt_regs *regs) } static const struct fault_info fault_info[] = { - { do_bad, SIGBUS, BUS_FIXME, "ttbr address size fault" }, - { do_bad, SIGBUS, BUS_FIXME, "level 1 address size fault" }, - { do_bad, SIGBUS, BUS_FIXME, "level 2 address size fault" }, - { do_bad, SIGBUS, BUS_FIXME, "level 3 address size fault" }, + { do_bad, SIGKILL, SI_KERNEL, "ttbr address size fault" }, + { do_bad, SIGKILL, SI_KERNEL, "level 1 address size fault" }, + { do_bad, SIGKILL, SI_KERNEL, "level 2 address size fault" }, + { do_bad, SIGKILL, SI_KERNEL, "level 3 address size fault" }, { do_translation_fault, SIGSEGV, SEGV_MAPERR, "level 0 translation fault" }, { do_translation_fault, SIGSEGV, SEGV_MAPERR, "level 1 translation fault" }, { do_translation_fault, SIGSEGV, SEGV_MAPERR, "level 2 translation fault" }, { do_translation_fault, SIGSEGV, SEGV_MAPERR, "level 3 translation fault" }, - { do_bad, SIGBUS, BUS_FIXME, "unknown 8" }, + { do_bad, SIGKILL, SI_KERNEL, "unknown 8" }, { do_page_fault, SIGSEGV, SEGV_ACCERR, "level 1 access flag fault" }, { do_page_fault, SIGSEGV, SEGV_ACCERR, "level 2 access flag fault" }, { do_page_fault, SIGSEGV, SEGV_ACCERR, "level 3 access flag fault" }, - { do_bad, SIGBUS, BUS_FIXME, "unknown 12" }, + { do_bad, SIGKILL, SI_KERNEL, "unknown 12" }, { do_page_fault, SIGSEGV, SEGV_ACCERR, "level 1 permission fault" }, { do_page_fault, SIGSEGV, SEGV_ACCERR, "level 2 permission fault" }, { do_page_fault, SIGSEGV, SEGV_ACCERR, "level 3 permission fault" }, - { do_sea, SIGBUS, BUS_FIXME, "synchronous external abort" }, - { do_bad, SIGBUS, BUS_FIXME, "unknown 17" }, - { do_bad, SIGBUS, BUS_FIXME, "unknown 18" }, - { do_bad, SIGBUS, BUS_FIXME, "unknown 19" }, - { do_sea, SIGBUS, BUS_FIXME, "level 0 (translation table walk)" }, - { do_sea, SIGBUS, BUS_FIXME, "level 1 (translation table walk)" }, - { do_sea, SIGBUS, BUS_FIXME, "level 2 (translation table walk)" }, - { do_sea, SIGBUS, BUS_FIXME, "level 3 (translation table walk)" }, - { do_sea, SIGBUS, BUS_FIXME, "synchronous parity or ECC error" }, // Reserved when RAS is implemented - { do_bad, SIGBUS, BUS_FIXME, "unknown 25" }, - { do_bad, SIGBUS, BUS_FIXME, "unknown 26" }, - { do_bad, SIGBUS, BUS_FIXME, "unknown 27" }, - { do_sea, SIGBUS, BUS_FIXME, "level 0 synchronous parity error (translation table walk)" }, // Reserved when RAS is implemented - { do_sea, SIGBUS, BUS_FIXME, "level 1 synchronous parity error (translation table walk)" }, // Reserved when RAS is implemented - { do_sea, SIGBUS, BUS_FIXME, "level 2 synchronous parity error (translation table walk)" }, // Reserved when RAS is implemented - { do_sea, SIGBUS, BUS_FIXME, "level 3 synchronous parity error (translation table walk)" }, // Reserved when RAS is implemented - { do_bad, SIGBUS, BUS_FIXME, "unknown 32" }, + { do_sea, SIGBUS, BUS_OBJERR, "synchronous external abort" }, + { do_bad, SIGKILL, SI_KERNEL, "unknown 17" }, + { do_bad, SIGKILL, SI_KERNEL, "unknown 18" }, + { do_bad, SIGKILL, SI_KERNEL, "unknown 19" }, + { do_sea, SIGKILL, SI_KERNEL, "level 0 (translation table walk)" }, + { do_sea, SIGKILL, SI_KERNEL, "level 1 (translation table walk)" }, + { do_sea, SIGKILL, SI_KERNEL, "level 2 (translation table walk)" }, + { do_sea, SIGKILL, SI_KERNEL, "level 3 (translation table walk)" }, + { do_sea, SIGBUS, BUS_OBJERR, "synchronous parity or ECC error" }, // Reserved when RAS is implemented + { do_bad, SIGKILL, SI_KERNEL, "unknown 25" }, + { do_bad, SIGKILL, SI_KERNEL, "unknown 26" }, + { do_bad, SIGKILL, SI_KERNEL, "unknown 27" }, + { do_sea, SIGKILL, SI_KERNEL, "level 0 synchronous parity error (translation table walk)" }, // Reserved when RAS is implemented + { do_sea, SIGKILL, SI_KERNEL, "level 1 synchronous parity error (translation table walk)" }, // Reserved when RAS is implemented + { do_sea, SIGKILL, SI_KERNEL, "level 2 synchronous parity error (translation table walk)" }, // Reserved when RAS is implemented + { do_sea, SIGKILL, SI_KERNEL, "level 3 synchronous parity error (translation table walk)" }, // Reserved when RAS is implemented + { do_bad, SIGKILL, SI_KERNEL, "unknown 32" }, { do_alignment_fault, SIGBUS, BUS_ADRALN, "alignment fault" }, - { do_bad, SIGBUS, BUS_FIXME, "unknown 34" }, - { do_bad, SIGBUS, BUS_FIXME, "unknown 35" }, - { do_bad, SIGBUS, BUS_FIXME, "unknown 36" }, - { do_bad, SIGBUS, BUS_FIXME, "unknown 37" }, - { do_bad, SIGBUS, BUS_FIXME, "unknown 38" }, - { do_bad, SIGBUS, BUS_FIXME, "unknown 39" }, - { do_bad, SIGBUS, BUS_FIXME, "unknown 40" }, - { do_bad, SIGBUS, BUS_FIXME, "unknown 41" }, - { do_bad, SIGBUS, BUS_FIXME, "unknown 42" }, - { do_bad, SIGBUS, BUS_FIXME, "unknown 43" }, - { do_bad, SIGBUS, BUS_FIXME, "unknown 44" }, - { do_bad, SIGBUS, BUS_FIXME, "unknown 45" }, - { do_bad, SIGBUS, BUS_FIXME, "unknown 46" }, - { do_bad, SIGBUS, BUS_FIXME, "unknown 47" }, - { do_bad, SIGBUS, BUS_FIXME, "TLB conflict abort" }, - { do_bad, SIGBUS, BUS_FIXME, "Unsupported atomic hardware update fault" }, - { do_bad, SIGBUS, BUS_FIXME, "unknown 50" }, - { do_bad, SIGBUS, BUS_FIXME, "unknown 51" }, - { do_bad, SIGBUS, BUS_FIXME, "implementation fault (lockdown abort)" }, - { do_bad, SIGBUS, BUS_FIXME, "implementation fault (unsupported exclusive)" }, - { do_bad, SIGBUS, BUS_FIXME, "unknown 54" }, - { do_bad, SIGBUS, BUS_FIXME, "unknown 55" }, - { do_bad, SIGBUS, BUS_FIXME, "unknown 56" }, - { do_bad, SIGBUS, BUS_FIXME, "unknown 57" }, - { do_bad, SIGBUS, BUS_FIXME, "unknown 58" }, - { do_bad, SIGBUS, BUS_FIXME, "unknown 59" }, - { do_bad, SIGBUS, BUS_FIXME, "unknown 60" }, - { do_bad, SIGBUS, BUS_FIXME, "section domain fault" }, - { do_bad, SIGBUS, BUS_FIXME, "page domain fault" }, - { do_bad, SIGBUS, BUS_FIXME, "unknown 63" }, + { do_bad, SIGKILL, SI_KERNEL, "unknown 34" }, + { do_bad, SIGKILL, SI_KERNEL, "unknown 35" }, + { do_bad, SIGKILL, SI_KERNEL, "unknown 36" }, + { do_bad, SIGKILL, SI_KERNEL, "unknown 37" }, + { do_bad, SIGKILL, SI_KERNEL, "unknown 38" }, + { do_bad, SIGKILL, SI_KERNEL, "unknown 39" }, + { do_bad, SIGKILL, SI_KERNEL, "unknown 40" }, + { do_bad, SIGKILL, SI_KERNEL, "unknown 41" }, + { do_bad, SIGKILL, SI_KERNEL, "unknown 42" }, + { do_bad, SIGKILL, SI_KERNEL, "unknown 43" }, + { do_bad, SIGKILL, SI_KERNEL, "unknown 44" }, + { do_bad, SIGKILL, SI_KERNEL, "unknown 45" }, + { do_bad, SIGKILL, SI_KERNEL, "unknown 46" }, + { do_bad, SIGKILL, SI_KERNEL, "unknown 47" }, + { do_bad, SIGKILL, SI_KERNEL, "TLB conflict abort" }, + { do_bad, SIGKILL, SI_KERNEL, "Unsupported atomic hardware update fault" }, + { do_bad, SIGKILL, SI_KERNEL, "unknown 50" }, + { do_bad, SIGKILL, SI_KERNEL, "unknown 51" }, + { do_bad, SIGKILL, SI_KERNEL, "implementation fault (lockdown abort)" }, + { do_bad, SIGBUS, BUS_OBJERR, "implementation fault (unsupported exclusive)" }, + { do_bad, SIGKILL, SI_KERNEL, "unknown 54" }, + { do_bad, SIGKILL, SI_KERNEL, "unknown 55" }, + { do_bad, SIGKILL, SI_KERNEL, "unknown 56" }, + { do_bad, SIGKILL, SI_KERNEL, "unknown 57" }, + { do_bad, SIGKILL, SI_KERNEL, "unknown 58" }, + { do_bad, SIGKILL, SI_KERNEL, "unknown 59" }, + { do_bad, SIGKILL, SI_KERNEL, "unknown 60" }, + { do_bad, SIGKILL, SI_KERNEL, "section domain fault" }, + { do_bad, SIGKILL, SI_KERNEL, "page domain fault" }, + { do_bad, SIGKILL, SI_KERNEL, "unknown 63" }, }; int handle_guest_sea(phys_addr_t addr, unsigned int esr) @@ -748,11 +748,11 @@ static struct fault_info __refdata debug_fault_info[] = { { do_bad, SIGTRAP, TRAP_HWBKPT, "hardware breakpoint" }, { do_bad, SIGTRAP, TRAP_HWBKPT, "hardware single-step" }, { do_bad, SIGTRAP, TRAP_HWBKPT, "hardware watchpoint" }, - { do_bad, SIGBUS, BUS_FIXME, "unknown 3" }, + { do_bad, SIGKILL, SI_KERNEL, "unknown 3" }, { do_bad, SIGTRAP, TRAP_BRKPT, "aarch32 BKPT" }, - { do_bad, SIGTRAP, TRAP_FIXME, "aarch32 vector catch" }, + { do_bad, SIGKILL, SI_KERNEL, "aarch32 vector catch" }, { early_brk64, SIGTRAP, TRAP_BRKPT, "aarch64 BRK" }, - { do_bad, SIGBUS, BUS_FIXME, "unknown 7" }, + { do_bad, SIGKILL, SI_KERNEL, "unknown 7" }, }; void __init hook_debug_fault_code(int nr, diff --git a/kernel/signal.c b/kernel/signal.c index c6e4c83dc090..049a482e705c 100644 --- a/kernel/signal.c +++ b/kernel/signal.c @@ -2843,10 +2843,6 @@ enum siginfo_layout siginfo_layout(int sig, int si_code) #ifdef FPE_FIXME if ((sig == SIGFPE) && (si_code == FPE_FIXME)) layout = SIL_FAULT; -#endif -#ifdef BUS_FIXME - if ((sig == SIGBUS) && (si_code == BUS_FIXME)) - layout = SIL_FAULT; #endif } return layout; -- cgit v1.2.3-59-g8ed1b From 266da65e9156d93e1126e185259a4aae68188d0e Mon Sep 17 00:00:00 2001 From: Dave Martin Date: Thu, 1 Mar 2018 17:44:06 +0000 Subject: signal: Add FPE_FLTUNK si_code for undiagnosable fp exceptions Some architectures cannot always report accurately what kind of floating-point exception triggered a floating-point exception trap. This can occur with fp exceptions occurring on lanes in a vector instruction on arm64 for example. Rather than have every architecture come up with its own way of describing such a condition, this patch adds a common FPE_FLTUNK si_code value to report that an fp exception caused a trap but we cannot be certain which kind of fp exception it was. Signed-off-by: Dave Martin Signed-off-by: Eric W. Biederman --- arch/x86/kernel/signal_compat.c | 2 +- include/uapi/asm-generic/siginfo.h | 3 ++- 2 files changed, 3 insertions(+), 2 deletions(-) (limited to 'arch') diff --git a/arch/x86/kernel/signal_compat.c b/arch/x86/kernel/signal_compat.c index 0d930d8987cc..d2884e951bb5 100644 --- a/arch/x86/kernel/signal_compat.c +++ b/arch/x86/kernel/signal_compat.c @@ -26,7 +26,7 @@ static inline void signal_compat_build_tests(void) * new fields are handled in copy_siginfo_to_user32()! */ BUILD_BUG_ON(NSIGILL != 11); - BUILD_BUG_ON(NSIGFPE != 13); + BUILD_BUG_ON(NSIGFPE != 14); BUILD_BUG_ON(NSIGSEGV != 4); BUILD_BUG_ON(NSIGBUS != 5); BUILD_BUG_ON(NSIGTRAP != 4); diff --git a/include/uapi/asm-generic/siginfo.h b/include/uapi/asm-generic/siginfo.h index 99c902e460c2..4b3520bf67ba 100644 --- a/include/uapi/asm-generic/siginfo.h +++ b/include/uapi/asm-generic/siginfo.h @@ -229,7 +229,8 @@ typedef struct siginfo { # define __FPE_INVASC 12 /* invalid ASCII digit */ # define __FPE_INVDEC 13 /* invalid decimal digit */ #endif -#define NSIGFPE 13 +#define FPE_FLTUNK 14 /* undiagnosed floating-point exception */ +#define NSIGFPE 14 /* * SIGSEGV si_codes -- cgit v1.2.3-59-g8ed1b From bd99f9a159b072be743c6681f81e06b9ebd370a4 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Tue, 13 Mar 2018 12:41:41 +0100 Subject: arm64: fix undefined reference to 'printk' The printk symbol was intended as a generic address that is always exported, however that turned out to be false with CONFIG_PRINTK=n: ERROR: "printk" [arch/arm64/kernel/arm64-reloc-test.ko] undefined! This changes the references to memstart_addr, which should be there regardless of configuration. Fixes: a257e02579e4 ("arm64/kernel: don't ban ADRP to work around Cortex-A53 erratum #843419") Acked-by: Ard Biesheuvel Signed-off-by: Arnd Bergmann Signed-off-by: Will Deacon --- arch/arm64/kernel/reloc_test_core.c | 2 +- arch/arm64/kernel/reloc_test_syms.S | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) (limited to 'arch') diff --git a/arch/arm64/kernel/reloc_test_core.c b/arch/arm64/kernel/reloc_test_core.c index a70489c584c7..5915ce5759cc 100644 --- a/arch/arm64/kernel/reloc_test_core.c +++ b/arch/arm64/kernel/reloc_test_core.c @@ -45,7 +45,7 @@ static struct { { "R_AARCH64_MOVW_SABS_Gn", signed_movw, UL(SYM64_ABS_VAL) }, { "R_AARCH64_MOVW_UABS_Gn", unsigned_movw, UL(SYM64_ABS_VAL) }, { "R_AARCH64_ADR_PREL_PG_HI21", relative_adrp, (u64)&sym64_rel }, - { "R_AARCH64_ADR_PREL_PG_HI21", relative_adrp_far, (u64)&printk }, + { "R_AARCH64_ADR_PREL_PG_HI21", relative_adrp_far, (u64)&memstart_addr }, { "R_AARCH64_ADR_PREL_LO21", relative_adr, (u64)&sym64_rel }, { "R_AARCH64_PREL64", relative_data64, (u64)&sym64_rel }, { "R_AARCH64_PREL32", relative_data32, (u64)&sym64_rel }, diff --git a/arch/arm64/kernel/reloc_test_syms.S b/arch/arm64/kernel/reloc_test_syms.S index f333b4b7880d..2b8d9cb8b078 100644 --- a/arch/arm64/kernel/reloc_test_syms.S +++ b/arch/arm64/kernel/reloc_test_syms.S @@ -54,8 +54,8 @@ ENDPROC(relative_adrp) .align 12 .space 0xffc ENTRY(relative_adrp_far) - adrp x0, printk - add x0, x0, #:lo12:printk + adrp x0, memstart_addr + add x0, x0, #:lo12:memstart_addr ret ENDPROC(relative_adrp_far) -- cgit v1.2.3-59-g8ed1b From 350e1dad0dd8c55750f9d4fa6b19cea1a0037ace Mon Sep 17 00:00:00 2001 From: Ard Biesheuvel Date: Sat, 10 Mar 2018 14:59:29 +0000 Subject: arm64: asm: drop special versions of adr_l/ldr_l/str_l for modules Now that we started keeping modules within 4 GB of the core kernel in all cases, we no longer need to special case the adr_l/ldr_l/str_l macros for modules to deal with them being loaded farther away. Signed-off-by: Ard Biesheuvel Signed-off-by: Will Deacon --- arch/arm64/include/asm/assembler.h | 34 ++-------------------------------- 1 file changed, 2 insertions(+), 32 deletions(-) (limited to 'arch') diff --git a/arch/arm64/include/asm/assembler.h b/arch/arm64/include/asm/assembler.h index 3c78835bba94..053d83e8db6f 100644 --- a/arch/arm64/include/asm/assembler.h +++ b/arch/arm64/include/asm/assembler.h @@ -202,25 +202,15 @@ lr .req x30 // link register /* * Pseudo-ops for PC-relative adr/ldr/str , where - * is within the range +/- 4 GB of the PC when running - * in core kernel context. In module context, a movz/movk sequence - * is used, since modules may be loaded far away from the kernel - * when KASLR is in effect. + * is within the range +/- 4 GB of the PC. */ /* * @dst: destination register (64 bit wide) * @sym: name of the symbol */ .macro adr_l, dst, sym -#ifndef MODULE adrp \dst, \sym add \dst, \dst, :lo12:\sym -#else - movz \dst, #:abs_g3:\sym - movk \dst, #:abs_g2_nc:\sym - movk \dst, #:abs_g1_nc:\sym - movk \dst, #:abs_g0_nc:\sym -#endif .endm /* @@ -231,7 +221,6 @@ lr .req x30 // link register * the address */ .macro ldr_l, dst, sym, tmp= -#ifndef MODULE .ifb \tmp adrp \dst, \sym ldr \dst, [\dst, :lo12:\sym] @@ -239,15 +228,6 @@ lr .req x30 // link register adrp \tmp, \sym ldr \dst, [\tmp, :lo12:\sym] .endif -#else - .ifb \tmp - adr_l \dst, \sym - ldr \dst, [\dst] - .else - adr_l \tmp, \sym - ldr \dst, [\tmp] - .endif -#endif .endm /* @@ -257,28 +237,18 @@ lr .req x30 // link register * while needs to be preserved. */ .macro str_l, src, sym, tmp -#ifndef MODULE adrp \tmp, \sym str \src, [\tmp, :lo12:\sym] -#else - adr_l \tmp, \sym - str \src, [\tmp] -#endif .endm /* - * @dst: Result of per_cpu(sym, smp_processor_id()), can be SP for - * non-module code + * @dst: Result of per_cpu(sym, smp_processor_id()) (can be SP) * @sym: The name of the per-cpu variable * @tmp: scratch register */ .macro adr_this_cpu, dst, sym, tmp -#ifndef MODULE adrp \tmp, \sym add \dst, \tmp, #:lo12:\sym -#else - adr_l \dst, \sym -#endif alternative_if_not ARM64_HAS_VIRT_HOST_EXTN mrs \tmp, tpidr_el1 alternative_else -- cgit v1.2.3-59-g8ed1b From 7206dc93a58fb76421c4411eefa3c003337bcb2d Mon Sep 17 00:00:00 2001 From: Suzuki K Poulose Date: Mon, 12 Mar 2018 10:04:14 +0000 Subject: arm64: Expose Arm v8.4 features Expose the new features introduced by Arm v8.4 extensions to Arm v8-A profile. These include : 1) Data indpendent timing of instructions. (DIT, exposed as HWCAP_DIT) 2) Unaligned atomic instructions and Single-copy atomicity of loads and stores. (AT, expose as HWCAP_USCAT) 3) LDAPR and STLR instructions with immediate offsets (extension to LRCPC, exposed as HWCAP_ILRCPC) 4) Flag manipulation instructions (TS, exposed as HWCAP_FLAGM). Cc: Catalin Marinas Cc: Will Deacon Cc: Mark Rutland Reviewed-by: Dave Martin Signed-off-by: Suzuki K Poulose Signed-off-by: Will Deacon --- Documentation/arm64/cpu-feature-registers.txt | 10 ++++++++++ Documentation/arm64/elf_hwcaps.txt | 16 ++++++++++++++++ arch/arm64/include/asm/sysreg.h | 3 +++ arch/arm64/include/uapi/asm/hwcap.h | 4 ++++ arch/arm64/kernel/cpufeature.c | 7 +++++++ arch/arm64/kernel/cpuinfo.c | 4 ++++ 6 files changed, 44 insertions(+) (limited to 'arch') diff --git a/Documentation/arm64/cpu-feature-registers.txt b/Documentation/arm64/cpu-feature-registers.txt index 22cfb86143ee..7964f03846b1 100644 --- a/Documentation/arm64/cpu-feature-registers.txt +++ b/Documentation/arm64/cpu-feature-registers.txt @@ -110,6 +110,7 @@ infrastructure: x--------------------------------------------------x | Name | bits | visible | |--------------------------------------------------| + | TS | [55-52] | y | |--------------------------------------------------| | FHM | [51-48] | y | |--------------------------------------------------| @@ -139,6 +140,7 @@ infrastructure: x--------------------------------------------------x | Name | bits | visible | |--------------------------------------------------| + | DIT | [51-48] | y | |--------------------------------------------------| | SVE | [35-32] | y | |--------------------------------------------------| @@ -191,6 +193,14 @@ infrastructure: | DPB | [3-0] | y | x--------------------------------------------------x + 5) ID_AA64MMFR2_EL1 - Memory model feature register 2 + + x--------------------------------------------------x + | Name | bits | visible | + |--------------------------------------------------| + | AT | [35-32] | y | + x--------------------------------------------------x + Appendix I: Example --------------------------- diff --git a/Documentation/arm64/elf_hwcaps.txt b/Documentation/arm64/elf_hwcaps.txt index 57324ee55ecc..d6aff2c5e9e2 100644 --- a/Documentation/arm64/elf_hwcaps.txt +++ b/Documentation/arm64/elf_hwcaps.txt @@ -162,3 +162,19 @@ HWCAP_SVE HWCAP_ASIMDFHM Functionality implied by ID_AA64ISAR0_EL1.FHM == 0b0001. + +HWCAP_DIT + + Functionality implied by ID_AA64PFR0_EL1.DIT == 0b0001. + +HWCAP_USCAT + + Functionality implied by ID_AA64MMFR2_EL1.AT == 0b0001. + +HWCAP_ILRCPC + + Functionality implied by ID_AA64ISR1_EL1.LRCPC == 0b0002. + +HWCAP_FLAGM + + Functionality implied by ID_AA64ISAR0_EL1.TS == 0b0001. diff --git a/arch/arm64/include/asm/sysreg.h b/arch/arm64/include/asm/sysreg.h index 0e1960c59197..e7b9f154e476 100644 --- a/arch/arm64/include/asm/sysreg.h +++ b/arch/arm64/include/asm/sysreg.h @@ -490,6 +490,7 @@ #define SCTLR_EL1_BUILD_BUG_ON_MISSING_BITS BUILD_BUG_ON((SCTLR_EL1_SET ^ SCTLR_EL1_CLEAR) != ~0) /* id_aa64isar0 */ +#define ID_AA64ISAR0_TS_SHIFT 52 #define ID_AA64ISAR0_FHM_SHIFT 48 #define ID_AA64ISAR0_DP_SHIFT 44 #define ID_AA64ISAR0_SM4_SHIFT 40 @@ -511,6 +512,7 @@ /* id_aa64pfr0 */ #define ID_AA64PFR0_CSV3_SHIFT 60 #define ID_AA64PFR0_CSV2_SHIFT 56 +#define ID_AA64PFR0_DIT_SHIFT 48 #define ID_AA64PFR0_SVE_SHIFT 32 #define ID_AA64PFR0_RAS_SHIFT 28 #define ID_AA64PFR0_GIC_SHIFT 24 @@ -568,6 +570,7 @@ #define ID_AA64MMFR1_VMIDBITS_16 2 /* id_aa64mmfr2 */ +#define ID_AA64MMFR2_AT_SHIFT 32 #define ID_AA64MMFR2_LVA_SHIFT 16 #define ID_AA64MMFR2_IESB_SHIFT 12 #define ID_AA64MMFR2_LSM_SHIFT 8 diff --git a/arch/arm64/include/uapi/asm/hwcap.h b/arch/arm64/include/uapi/asm/hwcap.h index f018c3deea3b..17c65c8f33cb 100644 --- a/arch/arm64/include/uapi/asm/hwcap.h +++ b/arch/arm64/include/uapi/asm/hwcap.h @@ -44,5 +44,9 @@ #define HWCAP_SHA512 (1 << 21) #define HWCAP_SVE (1 << 22) #define HWCAP_ASIMDFHM (1 << 23) +#define HWCAP_DIT (1 << 24) +#define HWCAP_USCAT (1 << 25) +#define HWCAP_ILRCPC (1 << 26) +#define HWCAP_FLAGM (1 << 27) #endif /* _UAPI__ASM_HWCAP_H */ diff --git a/arch/arm64/kernel/cpufeature.c b/arch/arm64/kernel/cpufeature.c index bdab55223866..de4ea6a0208e 100644 --- a/arch/arm64/kernel/cpufeature.c +++ b/arch/arm64/kernel/cpufeature.c @@ -123,6 +123,7 @@ cpufeature_pan_not_uao(const struct arm64_cpu_capabilities *entry, int __unused) * sync with the documentation of the CPU feature register ABI. */ static const struct arm64_ftr_bits ftr_id_aa64isar0[] = { + ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64ISAR0_TS_SHIFT, 4, 0), ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64ISAR0_FHM_SHIFT, 4, 0), ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64ISAR0_DP_SHIFT, 4, 0), ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64ISAR0_SM4_SHIFT, 4, 0), @@ -148,6 +149,7 @@ static const struct arm64_ftr_bits ftr_id_aa64isar1[] = { static const struct arm64_ftr_bits ftr_id_aa64pfr0[] = { ARM64_FTR_BITS(FTR_HIDDEN, FTR_NONSTRICT, FTR_LOWER_SAFE, ID_AA64PFR0_CSV3_SHIFT, 4, 0), ARM64_FTR_BITS(FTR_HIDDEN, FTR_NONSTRICT, FTR_LOWER_SAFE, ID_AA64PFR0_CSV2_SHIFT, 4, 0), + ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64PFR0_DIT_SHIFT, 4, 0), ARM64_FTR_BITS(FTR_VISIBLE_IF_IS_ENABLED(CONFIG_ARM64_SVE), FTR_STRICT, FTR_LOWER_SAFE, ID_AA64PFR0_SVE_SHIFT, 4, 0), ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64PFR0_RAS_SHIFT, 4, 0), @@ -190,6 +192,7 @@ static const struct arm64_ftr_bits ftr_id_aa64mmfr1[] = { }; static const struct arm64_ftr_bits ftr_id_aa64mmfr2[] = { + ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64MMFR2_AT_SHIFT, 4, 0), ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64MMFR2_LVA_SHIFT, 4, 0), ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64MMFR2_IESB_SHIFT, 4, 0), ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64MMFR2_LSM_SHIFT, 4, 0), @@ -1142,14 +1145,18 @@ static const struct arm64_cpu_capabilities arm64_elf_hwcaps[] = { HWCAP_CAP(SYS_ID_AA64ISAR0_EL1, ID_AA64ISAR0_SM4_SHIFT, FTR_UNSIGNED, 1, CAP_HWCAP, HWCAP_SM4), HWCAP_CAP(SYS_ID_AA64ISAR0_EL1, ID_AA64ISAR0_DP_SHIFT, FTR_UNSIGNED, 1, CAP_HWCAP, HWCAP_ASIMDDP), HWCAP_CAP(SYS_ID_AA64ISAR0_EL1, ID_AA64ISAR0_FHM_SHIFT, FTR_UNSIGNED, 1, CAP_HWCAP, HWCAP_ASIMDFHM), + HWCAP_CAP(SYS_ID_AA64ISAR0_EL1, ID_AA64ISAR0_TS_SHIFT, FTR_UNSIGNED, 1, CAP_HWCAP, HWCAP_FLAGM), HWCAP_CAP(SYS_ID_AA64PFR0_EL1, ID_AA64PFR0_FP_SHIFT, FTR_SIGNED, 0, CAP_HWCAP, HWCAP_FP), HWCAP_CAP(SYS_ID_AA64PFR0_EL1, ID_AA64PFR0_FP_SHIFT, FTR_SIGNED, 1, CAP_HWCAP, HWCAP_FPHP), HWCAP_CAP(SYS_ID_AA64PFR0_EL1, ID_AA64PFR0_ASIMD_SHIFT, FTR_SIGNED, 0, CAP_HWCAP, HWCAP_ASIMD), HWCAP_CAP(SYS_ID_AA64PFR0_EL1, ID_AA64PFR0_ASIMD_SHIFT, FTR_SIGNED, 1, CAP_HWCAP, HWCAP_ASIMDHP), + HWCAP_CAP(SYS_ID_AA64PFR0_EL1, ID_AA64PFR0_DIT_SHIFT, FTR_SIGNED, 1, CAP_HWCAP, HWCAP_DIT), HWCAP_CAP(SYS_ID_AA64ISAR1_EL1, ID_AA64ISAR1_DPB_SHIFT, FTR_UNSIGNED, 1, CAP_HWCAP, HWCAP_DCPOP), HWCAP_CAP(SYS_ID_AA64ISAR1_EL1, ID_AA64ISAR1_JSCVT_SHIFT, FTR_UNSIGNED, 1, CAP_HWCAP, HWCAP_JSCVT), HWCAP_CAP(SYS_ID_AA64ISAR1_EL1, ID_AA64ISAR1_FCMA_SHIFT, FTR_UNSIGNED, 1, CAP_HWCAP, HWCAP_FCMA), HWCAP_CAP(SYS_ID_AA64ISAR1_EL1, ID_AA64ISAR1_LRCPC_SHIFT, FTR_UNSIGNED, 1, CAP_HWCAP, HWCAP_LRCPC), + HWCAP_CAP(SYS_ID_AA64ISAR1_EL1, ID_AA64ISAR1_LRCPC_SHIFT, FTR_UNSIGNED, 2, CAP_HWCAP, HWCAP_ILRCPC), + HWCAP_CAP(SYS_ID_AA64MMFR2_EL1, ID_AA64MMFR2_AT_SHIFT, FTR_UNSIGNED, 1, CAP_HWCAP, HWCAP_USCAT), #ifdef CONFIG_ARM64_SVE HWCAP_CAP(SYS_ID_AA64PFR0_EL1, ID_AA64PFR0_SVE_SHIFT, FTR_UNSIGNED, ID_AA64PFR0_SVE, CAP_HWCAP, HWCAP_SVE), #endif diff --git a/arch/arm64/kernel/cpuinfo.c b/arch/arm64/kernel/cpuinfo.c index 7f94623df8a5..e9ab7b3ed317 100644 --- a/arch/arm64/kernel/cpuinfo.c +++ b/arch/arm64/kernel/cpuinfo.c @@ -77,6 +77,10 @@ static const char *const hwcap_str[] = { "sha512", "sve", "asimdfhm", + "dit", + "uscat", + "ilrcpc", + "flagm", NULL }; -- cgit v1.2.3-59-g8ed1b From af4a81b9cd847441e047f99c2a2bc113ba96b0cd Mon Sep 17 00:00:00 2001 From: Dave Martin Date: Thu, 1 Mar 2018 17:44:07 +0000 Subject: arm64: fpsimd: Fix bad si_code for undiagnosed SIGFPE Currently a SIGFPE delivered in response to a floating-point exception trap may have si_code set to 0 on arm64. As reported by Eric, this is a bad idea since this is the value of SI_USER -- yet this signal is definitely not the result of kill(2), tgkill(2) etc. and si_uid and si_pid make limited sense whereas we do want to yield a value for si_addr (which doesn't exist for SI_USER). It's not entirely clear whether the architecure permits a "spurious" fp exception trap where none of the exception flag bits in ESR_ELx is set. (IMHO the architectural intent is to forbid this.) However, it does permit those bits to contain garbage if the TFV bit in ESR_ELx is 0. That case isn't currently handled at all and may result in si_code == 0 or si_code containing a FPE_FLT* constant corresponding to an exception that did not in fact happen. There is nothing sensible we can return for si_code in such cases, but SI_USER is certainly not appropriate and will lead to violation of legitimate userspace assumptions. This patch allocates a new si_code value FPE_UNKNOWN that at least does not conflict with any existing SI_* or FPE_* code, and yields this in si_code for undiagnosable cases. This is probably the best simplicity/incorrectness tradeoff achieveable without relying on implementation-dependent features or adding a lot of code. In any case, there appears to be no perfect solution possible that would justify a lot of effort here. Yielding FPE_UNKNOWN when some well-defined fp exception caused the trap is a violation of POSIX, but this is forced by the architecture. We have no realistic prospect of yielding the correct code in such cases. At present I am not aware of any ARMv8 implementation that supports trapped floating-point exceptions in any case. The new code may be applicable to other architectures for similar reasons. No attempt is made to provide ESR_ELx to userspace in the signal frame, since architectural limitations mean that it is unlikely to provide much diagnostic value, doesn't benefit existing software and would create ABI with no proven purpose. The existing mechanism for passing it also has problems of its own which may result in the wrong value being passed to userspace due to interaction with mm faults. The implied rework does not appear justified. Acked-by: "Eric W. Biederman" Reported-by: "Eric W. Biederman" Signed-off-by: Dave Martin Signed-off-by: Will Deacon --- arch/arm64/include/asm/esr.h | 9 +++++++++ arch/arm64/include/uapi/asm/siginfo.h | 7 ------- arch/arm64/kernel/fpsimd.c | 27 +++++++++++++++------------ 3 files changed, 24 insertions(+), 19 deletions(-) (limited to 'arch') diff --git a/arch/arm64/include/asm/esr.h b/arch/arm64/include/asm/esr.h index 803443d74926..ce70c3ffb993 100644 --- a/arch/arm64/include/asm/esr.h +++ b/arch/arm64/include/asm/esr.h @@ -240,6 +240,15 @@ (((e) & ESR_ELx_SYS64_ISS_OP2_MASK) >> \ ESR_ELx_SYS64_ISS_OP2_SHIFT)) +/* + * ISS field definitions for floating-point exception traps + * (FP_EXC_32/FP_EXC_64). + * + * (The FPEXC_* constants are used instead for common bits.) + */ + +#define ESR_ELx_FP_EXC_TFV (UL(1) << 23) + #ifndef __ASSEMBLY__ #include diff --git a/arch/arm64/include/uapi/asm/siginfo.h b/arch/arm64/include/uapi/asm/siginfo.h index 8d7dbbcce780..574d12f86039 100644 --- a/arch/arm64/include/uapi/asm/siginfo.h +++ b/arch/arm64/include/uapi/asm/siginfo.h @@ -21,11 +21,4 @@ #include -/* - * SIGFPE si_codes - */ -#ifdef __KERNEL__ -#define FPE_FIXME 0 /* Broken dup of SI_USER */ -#endif /* __KERNEL__ */ - #endif diff --git a/arch/arm64/kernel/fpsimd.c b/arch/arm64/kernel/fpsimd.c index 65fc87645ec6..20a5b30de877 100644 --- a/arch/arm64/kernel/fpsimd.c +++ b/arch/arm64/kernel/fpsimd.c @@ -39,6 +39,7 @@ #include #include +#include #include #include #include @@ -866,18 +867,20 @@ asmlinkage void do_fpsimd_acc(unsigned int esr, struct pt_regs *regs) asmlinkage void do_fpsimd_exc(unsigned int esr, struct pt_regs *regs) { siginfo_t info; - unsigned int si_code = FPE_FIXME; - - if (esr & FPEXC_IOF) - si_code = FPE_FLTINV; - else if (esr & FPEXC_DZF) - si_code = FPE_FLTDIV; - else if (esr & FPEXC_OFF) - si_code = FPE_FLTOVF; - else if (esr & FPEXC_UFF) - si_code = FPE_FLTUND; - else if (esr & FPEXC_IXF) - si_code = FPE_FLTRES; + unsigned int si_code = FPE_FLTUNK; + + if (esr & ESR_ELx_FP_EXC_TFV) { + if (esr & FPEXC_IOF) + si_code = FPE_FLTINV; + else if (esr & FPEXC_DZF) + si_code = FPE_FLTDIV; + else if (esr & FPEXC_OFF) + si_code = FPE_FLTOVF; + else if (esr & FPEXC_UFF) + si_code = FPE_FLTUND; + else if (esr & FPEXC_IXF) + si_code = FPE_FLTRES; + } memset(&info, 0, sizeof(info)); info.si_signo = SIGFPE; -- cgit v1.2.3-59-g8ed1b From 5043694eb8270a19040fb798f6d6f1dbd86c7a3c Mon Sep 17 00:00:00 2001 From: Dave Martin Date: Fri, 23 Mar 2018 18:08:31 +0000 Subject: arm64/sve: Document firmware support requirements in Kconfig Use of SVE by EL2 and below requires explicit support in the firmware. There is no means to hide the presence of SVE from EL2, so a kernel configured with CONFIG_ARM64_SVE=y will typically not work correctly on SVE capable hardware unless the firmware does include the appropriate support. This is not expected to pose a problem in the wild, since platform integrators are responsible for ensuring that they ship up-to-date firmware to support their hardware. However, developers may hit the issue when using mismatched compoments. In order to draw attention to the issue and how to solve it, this patch adds some Kconfig text giving a brief explanation and details of compatible firmware versions. Signed-off-by: Dave Martin Acked-by: Catalin Marinas Signed-off-by: Will Deacon --- arch/arm64/Kconfig | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) (limited to 'arch') diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig index 0f350b80aa84..a6688fcf3dc6 100644 --- a/arch/arm64/Kconfig +++ b/arch/arm64/Kconfig @@ -1105,6 +1105,23 @@ config ARM64_SVE To enable use of this extension on CPUs that implement it, say Y. + Note that for architectural reasons, firmware _must_ implement SVE + support when running on SVE capable hardware. The required support + is present in: + + * version 1.5 and later of the ARM Trusted Firmware + * the AArch64 boot wrapper since commit 5e1261e08abf + ("bootwrapper: SVE: Enable SVE for EL2 and below"). + + For other firmware implementations, consult the firmware documentation + or vendor. + + If you need the kernel to boot on SVE-capable hardware with broken + firmware, you may need to say N here until you get your firmware + fixed. Otherwise, you may experience firmware panics or lockups when + booting the kernel. If unsure and you are not observing these + symptoms, you should assume that it is safe to say Y. + config ARM64_MODULE_PLTS bool select HAVE_MOD_ARCH_SPECIFIC -- cgit v1.2.3-59-g8ed1b From c0cda3b8ee6b4b6851b2fd8b6db91fd7b0e2524a Mon Sep 17 00:00:00 2001 From: Dave Martin Date: Mon, 26 Mar 2018 15:12:28 +0100 Subject: arm64: capabilities: Update prototype for enable call back We issue the enable() call back for all CPU hwcaps capabilities available on the system, on all the CPUs. So far we have ignored the argument passed to the call back, which had a prototype to accept a "void *" for use with on_each_cpu() and later with stop_machine(). However, with commit 0a0d111d40fd1 ("arm64: cpufeature: Pass capability structure to ->enable callback"), there are some users of the argument who wants the matching capability struct pointer where there are multiple matching criteria for a single capability. Clean up the declaration of the call back to make it clear. 1) Renamed to cpu_enable(), to imply taking necessary actions on the called CPU for the entry. 2) Pass const pointer to the capability, to allow the call back to check the entry. (e.,g to check if any action is needed on the CPU) 3) We don't care about the result of the call back, turning this to a void. Cc: Will Deacon Cc: Catalin Marinas Cc: Mark Rutland Cc: Andre Przywara Cc: James Morse Acked-by: Robin Murphy Reviewed-by: Julien Thierry Signed-off-by: Dave Martin [suzuki: convert more users, rename call back and drop results] Signed-off-by: Suzuki K Poulose Signed-off-by: Will Deacon --- arch/arm64/include/asm/cpufeature.h | 7 ++++- arch/arm64/include/asm/fpsimd.h | 4 ++- arch/arm64/include/asm/processor.h | 7 ++--- arch/arm64/kernel/cpu_errata.c | 53 +++++++++++++++++-------------------- arch/arm64/kernel/cpufeature.c | 42 ++++++++++++++++------------- arch/arm64/kernel/fpsimd.c | 5 ++-- arch/arm64/kernel/traps.c | 4 +-- arch/arm64/mm/fault.c | 3 +-- 8 files changed, 67 insertions(+), 58 deletions(-) (limited to 'arch') diff --git a/arch/arm64/include/asm/cpufeature.h b/arch/arm64/include/asm/cpufeature.h index fbf0aab94d67..cba67cb24b22 100644 --- a/arch/arm64/include/asm/cpufeature.h +++ b/arch/arm64/include/asm/cpufeature.h @@ -100,7 +100,12 @@ struct arm64_cpu_capabilities { u16 capability; int def_scope; /* default scope */ bool (*matches)(const struct arm64_cpu_capabilities *caps, int scope); - int (*enable)(void *); /* Called on all active CPUs */ + /* + * Take the appropriate actions to enable this capability for this CPU. + * For each successfully booted CPU, this method is called for each + * globally detected capability. + */ + void (*cpu_enable)(const struct arm64_cpu_capabilities *cap); union { struct { /* To be used for erratum handling only */ u32 midr_model; diff --git a/arch/arm64/include/asm/fpsimd.h b/arch/arm64/include/asm/fpsimd.h index 8857a0f0d0f7..7623762f7fa6 100644 --- a/arch/arm64/include/asm/fpsimd.h +++ b/arch/arm64/include/asm/fpsimd.h @@ -83,7 +83,9 @@ extern void sve_save_state(void *state, u32 *pfpsr); extern void sve_load_state(void const *state, u32 const *pfpsr, unsigned long vq_minus_1); extern unsigned int sve_get_vl(void); -extern int sve_kernel_enable(void *); + +struct arm64_cpu_capabilities; +extern void sve_kernel_enable(const struct arm64_cpu_capabilities *__unused); extern int __ro_after_init sve_max_vl; diff --git a/arch/arm64/include/asm/processor.h b/arch/arm64/include/asm/processor.h index fce604e3e599..4fc8867fde4d 100644 --- a/arch/arm64/include/asm/processor.h +++ b/arch/arm64/include/asm/processor.h @@ -37,6 +37,7 @@ #include #include +#include #include #include #include @@ -227,9 +228,9 @@ static inline void spin_lock_prefetch(const void *ptr) #endif -int cpu_enable_pan(void *__unused); -int cpu_enable_cache_maint_trap(void *__unused); -int cpu_clear_disr(void *__unused); +void cpu_enable_pan(const struct arm64_cpu_capabilities *__unused); +void cpu_enable_cache_maint_trap(const struct arm64_cpu_capabilities *__unused); +void cpu_clear_disr(const struct arm64_cpu_capabilities *__unused); /* Userspace interface for PR_SVE_{SET,GET}_VL prctl()s: */ #define SVE_SET_VL(arg) sve_set_current_vl(arg) diff --git a/arch/arm64/kernel/cpu_errata.c b/arch/arm64/kernel/cpu_errata.c index 186c0fc61dcd..74f058421dd8 100644 --- a/arch/arm64/kernel/cpu_errata.c +++ b/arch/arm64/kernel/cpu_errata.c @@ -65,11 +65,11 @@ has_mismatched_cache_line_size(const struct arm64_cpu_capabilities *entry, (arm64_ftr_reg_ctrel0.sys_val & arm64_ftr_reg_ctrel0.strict_mask); } -static int cpu_enable_trap_ctr_access(void *__unused) +static void +cpu_enable_trap_ctr_access(const struct arm64_cpu_capabilities *__unused) { /* Clear SCTLR_EL1.UCT */ config_sctlr_el1(SCTLR_EL1_UCT, 0); - return 0; } #ifdef CONFIG_HARDEN_BRANCH_PREDICTOR @@ -173,25 +173,25 @@ static void call_hvc_arch_workaround_1(void) arm_smccc_1_1_hvc(ARM_SMCCC_ARCH_WORKAROUND_1, NULL); } -static int enable_smccc_arch_workaround_1(void *data) +static void +enable_smccc_arch_workaround_1(const struct arm64_cpu_capabilities *entry) { - const struct arm64_cpu_capabilities *entry = data; bp_hardening_cb_t cb; void *smccc_start, *smccc_end; struct arm_smccc_res res; if (!entry->matches(entry, SCOPE_LOCAL_CPU)) - return 0; + return; if (psci_ops.smccc_version == SMCCC_VERSION_1_0) - return 0; + return; switch (psci_ops.conduit) { case PSCI_CONDUIT_HVC: arm_smccc_1_1_hvc(ARM_SMCCC_ARCH_FEATURES_FUNC_ID, ARM_SMCCC_ARCH_WORKAROUND_1, &res); if (res.a0) - return 0; + return; cb = call_hvc_arch_workaround_1; smccc_start = __smccc_workaround_1_hvc_start; smccc_end = __smccc_workaround_1_hvc_end; @@ -201,19 +201,19 @@ static int enable_smccc_arch_workaround_1(void *data) arm_smccc_1_1_smc(ARM_SMCCC_ARCH_FEATURES_FUNC_ID, ARM_SMCCC_ARCH_WORKAROUND_1, &res); if (res.a0) - return 0; + return; cb = call_smc_arch_workaround_1; smccc_start = __smccc_workaround_1_smc_start; smccc_end = __smccc_workaround_1_smc_end; break; default: - return 0; + return; } install_bp_hardening_cb(entry, cb, smccc_start, smccc_end); - return 0; + return; } static void qcom_link_stack_sanitization(void) @@ -228,15 +228,12 @@ static void qcom_link_stack_sanitization(void) : "=&r" (tmp)); } -static int qcom_enable_link_stack_sanitization(void *data) +static void +qcom_enable_link_stack_sanitization(const struct arm64_cpu_capabilities *entry) { - const struct arm64_cpu_capabilities *entry = data; - install_bp_hardening_cb(entry, qcom_link_stack_sanitization, __qcom_hyp_sanitize_link_stack_start, __qcom_hyp_sanitize_link_stack_end); - - return 0; } #endif /* CONFIG_HARDEN_BRANCH_PREDICTOR */ @@ -266,7 +263,7 @@ const struct arm64_cpu_capabilities arm64_errata[] = { .desc = "ARM errata 826319, 827319, 824069", .capability = ARM64_WORKAROUND_CLEAN_CACHE, MIDR_RANGE(MIDR_CORTEX_A53, 0x00, 0x02), - .enable = cpu_enable_cache_maint_trap, + .cpu_enable = cpu_enable_cache_maint_trap, }, #endif #ifdef CONFIG_ARM64_ERRATUM_819472 @@ -275,7 +272,7 @@ const struct arm64_cpu_capabilities arm64_errata[] = { .desc = "ARM errata 819472", .capability = ARM64_WORKAROUND_CLEAN_CACHE, MIDR_RANGE(MIDR_CORTEX_A53, 0x00, 0x01), - .enable = cpu_enable_cache_maint_trap, + .cpu_enable = cpu_enable_cache_maint_trap, }, #endif #ifdef CONFIG_ARM64_ERRATUM_832075 @@ -365,7 +362,7 @@ const struct arm64_cpu_capabilities arm64_errata[] = { .capability = ARM64_MISMATCHED_CACHE_LINE_SIZE, .matches = has_mismatched_cache_line_size, .def_scope = SCOPE_LOCAL_CPU, - .enable = cpu_enable_trap_ctr_access, + .cpu_enable = cpu_enable_trap_ctr_access, }, #ifdef CONFIG_QCOM_FALKOR_ERRATUM_1003 { @@ -404,27 +401,27 @@ const struct arm64_cpu_capabilities arm64_errata[] = { { .capability = ARM64_HARDEN_BRANCH_PREDICTOR, MIDR_ALL_VERSIONS(MIDR_CORTEX_A57), - .enable = enable_smccc_arch_workaround_1, + .cpu_enable = enable_smccc_arch_workaround_1, }, { .capability = ARM64_HARDEN_BRANCH_PREDICTOR, MIDR_ALL_VERSIONS(MIDR_CORTEX_A72), - .enable = enable_smccc_arch_workaround_1, + .cpu_enable = enable_smccc_arch_workaround_1, }, { .capability = ARM64_HARDEN_BRANCH_PREDICTOR, MIDR_ALL_VERSIONS(MIDR_CORTEX_A73), - .enable = enable_smccc_arch_workaround_1, + .cpu_enable = enable_smccc_arch_workaround_1, }, { .capability = ARM64_HARDEN_BRANCH_PREDICTOR, MIDR_ALL_VERSIONS(MIDR_CORTEX_A75), - .enable = enable_smccc_arch_workaround_1, + .cpu_enable = enable_smccc_arch_workaround_1, }, { .capability = ARM64_HARDEN_BRANCH_PREDICTOR, MIDR_ALL_VERSIONS(MIDR_QCOM_FALKOR_V1), - .enable = qcom_enable_link_stack_sanitization, + .cpu_enable = qcom_enable_link_stack_sanitization, }, { .capability = ARM64_HARDEN_BP_POST_GUEST_EXIT, @@ -433,7 +430,7 @@ const struct arm64_cpu_capabilities arm64_errata[] = { { .capability = ARM64_HARDEN_BRANCH_PREDICTOR, MIDR_ALL_VERSIONS(MIDR_QCOM_FALKOR), - .enable = qcom_enable_link_stack_sanitization, + .cpu_enable = qcom_enable_link_stack_sanitization, }, { .capability = ARM64_HARDEN_BP_POST_GUEST_EXIT, @@ -442,12 +439,12 @@ const struct arm64_cpu_capabilities arm64_errata[] = { { .capability = ARM64_HARDEN_BRANCH_PREDICTOR, MIDR_ALL_VERSIONS(MIDR_BRCM_VULCAN), - .enable = enable_smccc_arch_workaround_1, + .cpu_enable = enable_smccc_arch_workaround_1, }, { .capability = ARM64_HARDEN_BRANCH_PREDICTOR, MIDR_ALL_VERSIONS(MIDR_CAVIUM_THUNDERX2), - .enable = enable_smccc_arch_workaround_1, + .cpu_enable = enable_smccc_arch_workaround_1, }, #endif { @@ -465,8 +462,8 @@ void verify_local_cpu_errata_workarounds(void) for (; caps->matches; caps++) { if (cpus_have_cap(caps->capability)) { - if (caps->enable) - caps->enable((void *)caps); + if (caps->cpu_enable) + caps->cpu_enable(caps); } else if (caps->matches(caps, SCOPE_LOCAL_CPU)) { pr_crit("CPU%d: Requires work around for %s, not detected" " at boot time\n", diff --git a/arch/arm64/kernel/cpufeature.c b/arch/arm64/kernel/cpufeature.c index de4ea6a0208e..d66f64b81557 100644 --- a/arch/arm64/kernel/cpufeature.c +++ b/arch/arm64/kernel/cpufeature.c @@ -909,7 +909,8 @@ static bool unmap_kernel_at_el0(const struct arm64_cpu_capabilities *entry, ID_AA64PFR0_CSV3_SHIFT); } -static int kpti_install_ng_mappings(void *__unused) +static void +kpti_install_ng_mappings(const struct arm64_cpu_capabilities *__unused) { typedef void (kpti_remap_fn)(int, int, phys_addr_t); extern kpti_remap_fn idmap_kpti_install_ng_mappings; @@ -919,7 +920,7 @@ static int kpti_install_ng_mappings(void *__unused) int cpu = smp_processor_id(); if (kpti_applied) - return 0; + return; remap_fn = (void *)__pa_symbol(idmap_kpti_install_ng_mappings); @@ -930,7 +931,7 @@ static int kpti_install_ng_mappings(void *__unused) if (!cpu) kpti_applied = true; - return 0; + return; } static int __init parse_kpti(char *str) @@ -947,7 +948,7 @@ static int __init parse_kpti(char *str) __setup("kpti=", parse_kpti); #endif /* CONFIG_UNMAP_KERNEL_AT_EL0 */ -static int cpu_copy_el2regs(void *__unused) +static void cpu_copy_el2regs(const struct arm64_cpu_capabilities *__unused) { /* * Copy register values that aren't redirected by hardware. @@ -959,8 +960,6 @@ static int cpu_copy_el2regs(void *__unused) */ if (!alternatives_applied) write_sysreg(read_sysreg(tpidr_el1), tpidr_el2); - - return 0; } static const struct arm64_cpu_capabilities arm64_features[] = { @@ -984,7 +983,7 @@ static const struct arm64_cpu_capabilities arm64_features[] = { .field_pos = ID_AA64MMFR1_PAN_SHIFT, .sign = FTR_UNSIGNED, .min_field_value = 1, - .enable = cpu_enable_pan, + .cpu_enable = cpu_enable_pan, }, #endif /* CONFIG_ARM64_PAN */ #if defined(CONFIG_AS_LSE) && defined(CONFIG_ARM64_LSE_ATOMICS) @@ -1032,7 +1031,7 @@ static const struct arm64_cpu_capabilities arm64_features[] = { .capability = ARM64_HAS_VIRT_HOST_EXTN, .def_scope = SCOPE_SYSTEM, .matches = runs_at_el2, - .enable = cpu_copy_el2regs, + .cpu_enable = cpu_copy_el2regs, }, { .desc = "32-bit EL0 Support", @@ -1056,7 +1055,7 @@ static const struct arm64_cpu_capabilities arm64_features[] = { .capability = ARM64_UNMAP_KERNEL_AT_EL0, .def_scope = SCOPE_SYSTEM, .matches = unmap_kernel_at_el0, - .enable = kpti_install_ng_mappings, + .cpu_enable = kpti_install_ng_mappings, }, #endif { @@ -1087,7 +1086,7 @@ static const struct arm64_cpu_capabilities arm64_features[] = { .field_pos = ID_AA64PFR0_SVE_SHIFT, .min_field_value = ID_AA64PFR0_SVE, .matches = has_cpuid_feature, - .enable = sve_kernel_enable, + .cpu_enable = sve_kernel_enable, }, #endif /* CONFIG_ARM64_SVE */ #ifdef CONFIG_ARM64_RAS_EXTN @@ -1100,7 +1099,7 @@ static const struct arm64_cpu_capabilities arm64_features[] = { .sign = FTR_UNSIGNED, .field_pos = ID_AA64PFR0_RAS_SHIFT, .min_field_value = ID_AA64PFR0_RAS_V1, - .enable = cpu_clear_disr, + .cpu_enable = cpu_clear_disr, }, #endif /* CONFIG_ARM64_RAS_EXTN */ { @@ -1260,6 +1259,14 @@ void update_cpu_capabilities(const struct arm64_cpu_capabilities *caps, } } +static int __enable_cpu_capability(void *arg) +{ + const struct arm64_cpu_capabilities *cap = arg; + + cap->cpu_enable(cap); + return 0; +} + /* * Run through the enabled capabilities and enable() it on all active * CPUs @@ -1275,14 +1282,15 @@ void __init enable_cpu_capabilities(const struct arm64_cpu_capabilities *caps) /* Ensure cpus_have_const_cap(num) works */ static_branch_enable(&cpu_hwcap_keys[num]); - if (caps->enable) { + if (caps->cpu_enable) { /* * Use stop_machine() as it schedules the work allowing * us to modify PSTATE, instead of on_each_cpu() which * uses an IPI, giving us a PSTATE that disappears when * we return. */ - stop_machine(caps->enable, (void *)caps, cpu_online_mask); + stop_machine(__enable_cpu_capability, (void *)caps, + cpu_online_mask); } } } @@ -1325,8 +1333,8 @@ verify_local_cpu_features(const struct arm64_cpu_capabilities *caps_list) smp_processor_id(), caps->desc); cpu_die_early(); } - if (caps->enable) - caps->enable((void *)caps); + if (caps->cpu_enable) + caps->cpu_enable(caps); } } @@ -1544,10 +1552,8 @@ static int __init enable_mrs_emulation(void) core_initcall(enable_mrs_emulation); -int cpu_clear_disr(void *__unused) +void cpu_clear_disr(const struct arm64_cpu_capabilities *__unused) { /* Firmware may have left a deferred SError in this register. */ write_sysreg_s(0, SYS_DISR_EL1); - - return 0; } diff --git a/arch/arm64/kernel/fpsimd.c b/arch/arm64/kernel/fpsimd.c index 20a5b30de877..91f9f4f4aebe 100644 --- a/arch/arm64/kernel/fpsimd.c +++ b/arch/arm64/kernel/fpsimd.c @@ -41,6 +41,7 @@ #include #include +#include #include #include #include @@ -757,12 +758,10 @@ fail: * Enable SVE for EL1. * Intended for use by the cpufeatures code during CPU boot. */ -int sve_kernel_enable(void *__always_unused p) +void sve_kernel_enable(const struct arm64_cpu_capabilities *__always_unused p) { write_sysreg(read_sysreg(CPACR_EL1) | CPACR_EL1_ZEN_EL1EN, CPACR_EL1); isb(); - - return 0; } void __init sve_setup(void) diff --git a/arch/arm64/kernel/traps.c b/arch/arm64/kernel/traps.c index 2b478565d774..ba964da31a25 100644 --- a/arch/arm64/kernel/traps.c +++ b/arch/arm64/kernel/traps.c @@ -38,6 +38,7 @@ #include #include +#include #include #include #include @@ -407,10 +408,9 @@ asmlinkage void __exception do_undefinstr(struct pt_regs *regs) force_signal_inject(SIGILL, ILL_ILLOPC, regs->pc); } -int cpu_enable_cache_maint_trap(void *__unused) +void cpu_enable_cache_maint_trap(const struct arm64_cpu_capabilities *__unused) { config_sctlr_el1(SCTLR_EL1_UCI, 0); - return 0; } #define __user_cache_maint(insn, address, res) \ diff --git a/arch/arm64/mm/fault.c b/arch/arm64/mm/fault.c index 551d044fb31f..4165485e8b6e 100644 --- a/arch/arm64/mm/fault.c +++ b/arch/arm64/mm/fault.c @@ -804,7 +804,7 @@ asmlinkage int __exception do_debug_exception(unsigned long addr, NOKPROBE_SYMBOL(do_debug_exception); #ifdef CONFIG_ARM64_PAN -int cpu_enable_pan(void *__unused) +void cpu_enable_pan(const struct arm64_cpu_capabilities *__unused) { /* * We modify PSTATE. This won't work from irq context as the PSTATE @@ -814,6 +814,5 @@ int cpu_enable_pan(void *__unused) config_sctlr_el1(SCTLR_EL1_SPAN, 0); asm(SET_PSTATE_PAN(1)); - return 0; } #endif /* CONFIG_ARM64_PAN */ -- cgit v1.2.3-59-g8ed1b From 5e91107b06811f0ca147cebbedce53626c9c4443 Mon Sep 17 00:00:00 2001 From: Suzuki K Poulose Date: Mon, 26 Mar 2018 15:12:29 +0100 Subject: arm64: capabilities: Move errata work around check on boot CPU We trigger CPU errata work around check on the boot CPU from smp_prepare_boot_cpu() to make sure that we run the checks only after the CPU feature infrastructure is initialised. While this is correct, we can also do this from init_cpu_features() which initilises the infrastructure, and is called only on the Boot CPU. This helps to consolidate the CPU capability handling to cpufeature.c. No functional changes. Cc: Will Deacon Cc: Catalin Marinas Cc: Mark Rutland Reviewed-by: Dave Martin Signed-off-by: Suzuki K Poulose Signed-off-by: Will Deacon --- arch/arm64/kernel/cpufeature.c | 6 ++++++ arch/arm64/kernel/smp.c | 6 ------ 2 files changed, 6 insertions(+), 6 deletions(-) (limited to 'arch') diff --git a/arch/arm64/kernel/cpufeature.c b/arch/arm64/kernel/cpufeature.c index d66f64b81557..b4cbcfe6e00c 100644 --- a/arch/arm64/kernel/cpufeature.c +++ b/arch/arm64/kernel/cpufeature.c @@ -551,6 +551,12 @@ void __init init_cpu_features(struct cpuinfo_arm64 *info) init_cpu_ftr_reg(SYS_ZCR_EL1, info->reg_zcr); sve_init_vq_map(); } + + /* + * Run the errata work around checks on the boot CPU, once we have + * initialised the cpu feature infrastructure. + */ + update_cpu_errata_workarounds(); } static void update_cpu_ftr_reg(struct arm64_ftr_reg *reg, u64 new) diff --git a/arch/arm64/kernel/smp.c b/arch/arm64/kernel/smp.c index 3b8ad7be9c33..5cef11450183 100644 --- a/arch/arm64/kernel/smp.c +++ b/arch/arm64/kernel/smp.c @@ -448,12 +448,6 @@ void __init smp_prepare_boot_cpu(void) jump_label_init(); cpuinfo_store_boot_cpu(); save_boot_cpu_run_el(); - /* - * Run the errata work around checks on the boot CPU, once we have - * initialised the cpu feature infrastructure from - * cpuinfo_store_boot_cpu() above. - */ - update_cpu_errata_workarounds(); } static u64 __init of_get_cpu_mpidr(struct device_node *dn) -- cgit v1.2.3-59-g8ed1b From 1e89baed5d50d2b8d9fd420830902570270703f1 Mon Sep 17 00:00:00 2001 From: Suzuki K Poulose Date: Mon, 26 Mar 2018 15:12:30 +0100 Subject: arm64: capabilities: Move errata processing code We have errata work around processing code in cpu_errata.c, which calls back into helpers defined in cpufeature.c. Now that we are going to make the handling of capabilities generic, by adding the information to each capability, move the errata work around specific processing code. No functional changes. Cc: Will Deacon Cc: Marc Zyngier Cc: Mark Rutland Cc: Andre Przywara Reviewed-by: Dave Martin Signed-off-by: Suzuki K Poulose Signed-off-by: Will Deacon --- arch/arm64/include/asm/cpufeature.h | 7 ------ arch/arm64/kernel/cpu_errata.c | 33 ---------------------------- arch/arm64/kernel/cpufeature.c | 43 ++++++++++++++++++++++++++++++++++--- 3 files changed, 40 insertions(+), 43 deletions(-) (limited to 'arch') diff --git a/arch/arm64/include/asm/cpufeature.h b/arch/arm64/include/asm/cpufeature.h index cba67cb24b22..8efbda2858a8 100644 --- a/arch/arm64/include/asm/cpufeature.h +++ b/arch/arm64/include/asm/cpufeature.h @@ -245,15 +245,8 @@ static inline bool id_aa64pfr0_sve(u64 pfr0) } void __init setup_cpu_features(void); - -void update_cpu_capabilities(const struct arm64_cpu_capabilities *caps, - const char *info); -void enable_cpu_capabilities(const struct arm64_cpu_capabilities *caps); void check_local_cpu_capabilities(void); -void update_cpu_errata_workarounds(void); -void __init enable_errata_workarounds(void); -void verify_local_cpu_errata_workarounds(void); u64 read_sanitised_ftr_reg(u32 id); diff --git a/arch/arm64/kernel/cpu_errata.c b/arch/arm64/kernel/cpu_errata.c index 74f058421dd8..0de0ab6a874c 100644 --- a/arch/arm64/kernel/cpu_errata.c +++ b/arch/arm64/kernel/cpu_errata.c @@ -450,36 +450,3 @@ const struct arm64_cpu_capabilities arm64_errata[] = { { } }; - -/* - * The CPU Errata work arounds are detected and applied at boot time - * and the related information is freed soon after. If the new CPU requires - * an errata not detected at boot, fail this CPU. - */ -void verify_local_cpu_errata_workarounds(void) -{ - const struct arm64_cpu_capabilities *caps = arm64_errata; - - for (; caps->matches; caps++) { - if (cpus_have_cap(caps->capability)) { - if (caps->cpu_enable) - caps->cpu_enable(caps); - } else if (caps->matches(caps, SCOPE_LOCAL_CPU)) { - pr_crit("CPU%d: Requires work around for %s, not detected" - " at boot time\n", - smp_processor_id(), - caps->desc ? : "an erratum"); - cpu_die_early(); - } - } -} - -void update_cpu_errata_workarounds(void) -{ - update_cpu_capabilities(arm64_errata, "enabling workaround for"); -} - -void __init enable_errata_workarounds(void) -{ - enable_cpu_capabilities(arm64_errata); -} diff --git a/arch/arm64/kernel/cpufeature.c b/arch/arm64/kernel/cpufeature.c index b4cbcfe6e00c..205c18c25fb2 100644 --- a/arch/arm64/kernel/cpufeature.c +++ b/arch/arm64/kernel/cpufeature.c @@ -509,6 +509,9 @@ static void __init init_cpu_ftr_reg(u32 sys_reg, u64 new) reg->user_mask = user_mask; } +extern const struct arm64_cpu_capabilities arm64_errata[]; +static void update_cpu_errata_workarounds(void); + void __init init_cpu_features(struct cpuinfo_arm64 *info) { /* Before we start using the tables, make sure it is sorted */ @@ -1252,8 +1255,8 @@ static bool __this_cpu_has_cap(const struct arm64_cpu_capabilities *cap_array, return false; } -void update_cpu_capabilities(const struct arm64_cpu_capabilities *caps, - const char *info) +static void update_cpu_capabilities(const struct arm64_cpu_capabilities *caps, + const char *info) { for (; caps->matches; caps++) { if (!caps->matches(caps, caps->def_scope)) @@ -1277,7 +1280,8 @@ static int __enable_cpu_capability(void *arg) * Run through the enabled capabilities and enable() it on all active * CPUs */ -void __init enable_cpu_capabilities(const struct arm64_cpu_capabilities *caps) +static void __init +enable_cpu_capabilities(const struct arm64_cpu_capabilities *caps) { for (; caps->matches; caps++) { unsigned int num = caps->capability; @@ -1361,6 +1365,39 @@ static void verify_sve_features(void) /* Add checks on other ZCR bits here if necessary */ } +/* + * The CPU Errata work arounds are detected and applied at boot time + * and the related information is freed soon after. If the new CPU requires + * an errata not detected at boot, fail this CPU. + */ +static void verify_local_cpu_errata_workarounds(void) +{ + const struct arm64_cpu_capabilities *caps = arm64_errata; + + for (; caps->matches; caps++) { + if (cpus_have_cap(caps->capability)) { + if (caps->cpu_enable) + caps->cpu_enable(caps); + } else if (caps->matches(caps, SCOPE_LOCAL_CPU)) { + pr_crit("CPU%d: Requires work around for %s, not detected" + " at boot time\n", + smp_processor_id(), + caps->desc ? : "an erratum"); + cpu_die_early(); + } + } +} + +static void update_cpu_errata_workarounds(void) +{ + update_cpu_capabilities(arm64_errata, "enabling workaround for"); +} + +static void __init enable_errata_workarounds(void) +{ + enable_cpu_capabilities(arm64_errata); +} + /* * Run through the enabled system capabilities and enable() it on this CPU. * The capabilities were decided based on the available CPUs at the boot time. -- cgit v1.2.3-59-g8ed1b From 143ba05d867af34827faf99e0eed4de27106c7cb Mon Sep 17 00:00:00 2001 From: Suzuki K Poulose Date: Mon, 26 Mar 2018 15:12:31 +0100 Subject: arm64: capabilities: Prepare for fine grained capabilities We use arm64_cpu_capabilities to represent CPU ELF HWCAPs exposed to the userspace and the CPU hwcaps used by the kernel, which include cpu features and CPU errata work arounds. Capabilities have some properties that decide how they should be treated : 1) Detection, i.e scope : A cap could be "detected" either : - if it is present on at least one CPU (SCOPE_LOCAL_CPU) Or - if it is present on all the CPUs (SCOPE_SYSTEM) 2) When is it enabled ? - A cap is treated as "enabled" when the system takes some action based on whether the capability is detected or not. e.g, setting some control register, patching the kernel code. Right now, we treat all caps are enabled at boot-time, after all the CPUs are brought up by the kernel. But there are certain caps, which are enabled early during the boot (e.g, VHE, GIC_CPUIF for NMI) and kernel starts using them, even before the secondary CPUs are brought up. We would need a way to describe this for each capability. 3) Conflict on a late CPU - When a CPU is brought up, it is checked against the caps that are known to be enabled on the system (via verify_local_cpu_capabilities()). Based on the state of the capability on the CPU vs. that of System we could have the following combinations of conflict. x-----------------------------x | Type | System | Late CPU | ------------------------------| | a | y | n | ------------------------------| | b | n | y | x-----------------------------x Case (a) is not permitted for caps which are system features, which the system expects all the CPUs to have (e.g VHE). While (a) is ignored for all errata work arounds. However, there could be exceptions to the plain filtering approach. e.g, KPTI is an optional feature for a late CPU as long as the system already enables it. Case (b) is not permitted for errata work arounds which requires some work around, which cannot be delayed. And we ignore (b) for features. Here, yet again, KPTI is an exception, where if a late CPU needs KPTI we are too late to enable it (because we change the allocation of ASIDs etc). So this calls for a lot more fine grained behavior for each capability. And if we define all the attributes to control their behavior properly, we may be able to use a single table for the CPU hwcaps (which cover errata and features, not the ELF HWCAPs). This is a prepartory step to get there. More bits would be added for the properties listed above. We are going to use a bit-mask to encode all the properties of a capabilities. This patch encodes the "SCOPE" of the capability. As such there is no change in how the capabilities are treated. Cc: Mark Rutland Reviewed-by: Dave Martin Signed-off-by: Suzuki K Poulose Signed-off-by: Will Deacon --- arch/arm64/include/asm/cpufeature.h | 105 +++++++++++++++++++++++++++++++++--- arch/arm64/kernel/cpu_errata.c | 8 +-- arch/arm64/kernel/cpufeature.c | 42 +++++++-------- 3 files changed, 124 insertions(+), 31 deletions(-) (limited to 'arch') diff --git a/arch/arm64/include/asm/cpufeature.h b/arch/arm64/include/asm/cpufeature.h index 8efbda2858a8..13fde0952c31 100644 --- a/arch/arm64/include/asm/cpufeature.h +++ b/arch/arm64/include/asm/cpufeature.h @@ -89,16 +89,104 @@ struct arm64_ftr_reg { extern struct arm64_ftr_reg arm64_ftr_reg_ctrel0; -/* scope of capability check */ -enum { - SCOPE_SYSTEM, - SCOPE_LOCAL_CPU, -}; +/* + * CPU capabilities: + * + * We use arm64_cpu_capabilities to represent system features, errata work + * arounds (both used internally by kernel and tracked in cpu_hwcaps) and + * ELF HWCAPs (which are exposed to user). + * + * To support systems with heterogeneous CPUs, we need to make sure that we + * detect the capabilities correctly on the system and take appropriate + * measures to ensure there are no incompatibilities. + * + * This comment tries to explain how we treat the capabilities. + * Each capability has the following list of attributes : + * + * 1) Scope of Detection : The system detects a given capability by + * performing some checks at runtime. This could be, e.g, checking the + * value of a field in CPU ID feature register or checking the cpu + * model. The capability provides a call back ( @matches() ) to + * perform the check. Scope defines how the checks should be performed. + * There are two cases: + * + * a) SCOPE_LOCAL_CPU: check all the CPUs and "detect" if at least one + * matches. This implies, we have to run the check on all the + * booting CPUs, until the system decides that state of the + * capability is finalised. (See section 2 below) + * Or + * b) SCOPE_SYSTEM: check all the CPUs and "detect" if all the CPUs + * matches. This implies, we run the check only once, when the + * system decides to finalise the state of the capability. If the + * capability relies on a field in one of the CPU ID feature + * registers, we use the sanitised value of the register from the + * CPU feature infrastructure to make the decision. + * + * The process of detection is usually denoted by "update" capability + * state in the code. + * + * 2) Finalise the state : The kernel should finalise the state of a + * capability at some point during its execution and take necessary + * actions if any. Usually, this is done, after all the boot-time + * enabled CPUs are brought up by the kernel, so that it can make + * better decision based on the available set of CPUs. However, there + * are some special cases, where the action is taken during the early + * boot by the primary boot CPU. (e.g, running the kernel at EL2 with + * Virtualisation Host Extensions). The kernel usually disallows any + * changes to the state of a capability once it finalises the capability + * and takes any action, as it may be impossible to execute the actions + * safely. A CPU brought up after a capability is "finalised" is + * referred to as "Late CPU" w.r.t the capability. e.g, all secondary + * CPUs are treated "late CPUs" for capabilities determined by the boot + * CPU. + * + * 3) Verification: When a CPU is brought online (e.g, by user or by the + * kernel), the kernel should make sure that it is safe to use the CPU, + * by verifying that the CPU is compliant with the state of the + * capabilities finalised already. This happens via : + * + * secondary_start_kernel()-> check_local_cpu_capabilities() + * + * As explained in (2) above, capabilities could be finalised at + * different points in the execution. Each CPU is verified against the + * "finalised" capabilities and if there is a conflict, the kernel takes + * an action, based on the severity (e.g, a CPU could be prevented from + * booting or cause a kernel panic). The CPU is allowed to "affect" the + * state of the capability, if it has not been finalised already. + * + * 4) Action: As mentioned in (2), the kernel can take an action for each + * detected capability, on all CPUs on the system. Appropriate actions + * include, turning on an architectural feature, modifying the control + * registers (e.g, SCTLR, TCR etc.) or patching the kernel via + * alternatives. The kernel patching is batched and performed at later + * point. The actions are always initiated only after the capability + * is finalised. This is usally denoted by "enabling" the capability. + * The actions are initiated as follows : + * a) Action is triggered on all online CPUs, after the capability is + * finalised, invoked within the stop_machine() context from + * enable_cpu_capabilitie(). + * + * b) Any late CPU, brought up after (1), the action is triggered via: + * + * check_local_cpu_capabilities() -> verify_local_cpu_capabilities() + * + */ + + +/* Decide how the capability is detected. On a local CPU vs System wide */ +#define ARM64_CPUCAP_SCOPE_LOCAL_CPU ((u16)BIT(0)) +#define ARM64_CPUCAP_SCOPE_SYSTEM ((u16)BIT(1)) +#define ARM64_CPUCAP_SCOPE_MASK \ + (ARM64_CPUCAP_SCOPE_SYSTEM | \ + ARM64_CPUCAP_SCOPE_LOCAL_CPU) + +#define SCOPE_SYSTEM ARM64_CPUCAP_SCOPE_SYSTEM +#define SCOPE_LOCAL_CPU ARM64_CPUCAP_SCOPE_LOCAL_CPU struct arm64_cpu_capabilities { const char *desc; u16 capability; - int def_scope; /* default scope */ + u16 type; bool (*matches)(const struct arm64_cpu_capabilities *caps, int scope); /* * Take the appropriate actions to enable this capability for this CPU. @@ -127,6 +215,11 @@ struct arm64_cpu_capabilities { }; }; +static inline int cpucap_default_scope(const struct arm64_cpu_capabilities *cap) +{ + return cap->type & ARM64_CPUCAP_SCOPE_MASK; +} + extern DECLARE_BITMAP(cpu_hwcaps, ARM64_NCAPS); extern struct static_key_false cpu_hwcap_keys[ARM64_NCAPS]; extern struct static_key_false arm64_const_caps_ready; diff --git a/arch/arm64/kernel/cpu_errata.c b/arch/arm64/kernel/cpu_errata.c index 0de0ab6a874c..2b5233157af1 100644 --- a/arch/arm64/kernel/cpu_errata.c +++ b/arch/arm64/kernel/cpu_errata.c @@ -238,14 +238,14 @@ qcom_enable_link_stack_sanitization(const struct arm64_cpu_capabilities *entry) #endif /* CONFIG_HARDEN_BRANCH_PREDICTOR */ #define MIDR_RANGE(model, min, max) \ - .def_scope = SCOPE_LOCAL_CPU, \ + .type = ARM64_CPUCAP_SCOPE_LOCAL_CPU, \ .matches = is_affected_midr_range, \ .midr_model = model, \ .midr_range_min = min, \ .midr_range_max = max #define MIDR_ALL_VERSIONS(model) \ - .def_scope = SCOPE_LOCAL_CPU, \ + .type = ARM64_CPUCAP_SCOPE_LOCAL_CPU, \ .matches = is_affected_midr_range, \ .midr_model = model, \ .midr_range_min = 0, \ @@ -361,7 +361,7 @@ const struct arm64_cpu_capabilities arm64_errata[] = { .desc = "Mismatched cache line size", .capability = ARM64_MISMATCHED_CACHE_LINE_SIZE, .matches = has_mismatched_cache_line_size, - .def_scope = SCOPE_LOCAL_CPU, + .type = ARM64_CPUCAP_SCOPE_LOCAL_CPU, .cpu_enable = cpu_enable_trap_ctr_access, }, #ifdef CONFIG_QCOM_FALKOR_ERRATUM_1003 @@ -375,7 +375,7 @@ const struct arm64_cpu_capabilities arm64_errata[] = { { .desc = "Qualcomm Technologies Kryo erratum 1003", .capability = ARM64_WORKAROUND_QCOM_FALKOR_E1003, - .def_scope = SCOPE_LOCAL_CPU, + .type = ARM64_CPUCAP_SCOPE_LOCAL_CPU, .midr_model = MIDR_QCOM_KRYO, .matches = is_kryo_midr, }, diff --git a/arch/arm64/kernel/cpufeature.c b/arch/arm64/kernel/cpufeature.c index 205c18c25fb2..19955fd5eb02 100644 --- a/arch/arm64/kernel/cpufeature.c +++ b/arch/arm64/kernel/cpufeature.c @@ -975,7 +975,7 @@ static const struct arm64_cpu_capabilities arm64_features[] = { { .desc = "GIC system register CPU interface", .capability = ARM64_HAS_SYSREG_GIC_CPUIF, - .def_scope = SCOPE_SYSTEM, + .type = ARM64_CPUCAP_SCOPE_SYSTEM, .matches = has_useable_gicv3_cpuif, .sys_reg = SYS_ID_AA64PFR0_EL1, .field_pos = ID_AA64PFR0_GIC_SHIFT, @@ -986,7 +986,7 @@ static const struct arm64_cpu_capabilities arm64_features[] = { { .desc = "Privileged Access Never", .capability = ARM64_HAS_PAN, - .def_scope = SCOPE_SYSTEM, + .type = ARM64_CPUCAP_SCOPE_SYSTEM, .matches = has_cpuid_feature, .sys_reg = SYS_ID_AA64MMFR1_EL1, .field_pos = ID_AA64MMFR1_PAN_SHIFT, @@ -999,7 +999,7 @@ static const struct arm64_cpu_capabilities arm64_features[] = { { .desc = "LSE atomic instructions", .capability = ARM64_HAS_LSE_ATOMICS, - .def_scope = SCOPE_SYSTEM, + .type = ARM64_CPUCAP_SCOPE_SYSTEM, .matches = has_cpuid_feature, .sys_reg = SYS_ID_AA64ISAR0_EL1, .field_pos = ID_AA64ISAR0_ATOMICS_SHIFT, @@ -1010,14 +1010,14 @@ static const struct arm64_cpu_capabilities arm64_features[] = { { .desc = "Software prefetching using PRFM", .capability = ARM64_HAS_NO_HW_PREFETCH, - .def_scope = SCOPE_SYSTEM, + .type = ARM64_CPUCAP_SCOPE_SYSTEM, .matches = has_no_hw_prefetch, }, #ifdef CONFIG_ARM64_UAO { .desc = "User Access Override", .capability = ARM64_HAS_UAO, - .def_scope = SCOPE_SYSTEM, + .type = ARM64_CPUCAP_SCOPE_SYSTEM, .matches = has_cpuid_feature, .sys_reg = SYS_ID_AA64MMFR2_EL1, .field_pos = ID_AA64MMFR2_UAO_SHIFT, @@ -1031,21 +1031,21 @@ static const struct arm64_cpu_capabilities arm64_features[] = { #ifdef CONFIG_ARM64_PAN { .capability = ARM64_ALT_PAN_NOT_UAO, - .def_scope = SCOPE_SYSTEM, + .type = ARM64_CPUCAP_SCOPE_SYSTEM, .matches = cpufeature_pan_not_uao, }, #endif /* CONFIG_ARM64_PAN */ { .desc = "Virtualization Host Extensions", .capability = ARM64_HAS_VIRT_HOST_EXTN, - .def_scope = SCOPE_SYSTEM, + .type = ARM64_CPUCAP_SCOPE_SYSTEM, .matches = runs_at_el2, .cpu_enable = cpu_copy_el2regs, }, { .desc = "32-bit EL0 Support", .capability = ARM64_HAS_32BIT_EL0, - .def_scope = SCOPE_SYSTEM, + .type = ARM64_CPUCAP_SCOPE_SYSTEM, .matches = has_cpuid_feature, .sys_reg = SYS_ID_AA64PFR0_EL1, .sign = FTR_UNSIGNED, @@ -1055,14 +1055,14 @@ static const struct arm64_cpu_capabilities arm64_features[] = { { .desc = "Reduced HYP mapping offset", .capability = ARM64_HYP_OFFSET_LOW, - .def_scope = SCOPE_SYSTEM, + .type = ARM64_CPUCAP_SCOPE_SYSTEM, .matches = hyp_offset_low, }, #ifdef CONFIG_UNMAP_KERNEL_AT_EL0 { .desc = "Kernel page table isolation (KPTI)", .capability = ARM64_UNMAP_KERNEL_AT_EL0, - .def_scope = SCOPE_SYSTEM, + .type = ARM64_CPUCAP_SCOPE_SYSTEM, .matches = unmap_kernel_at_el0, .cpu_enable = kpti_install_ng_mappings, }, @@ -1070,7 +1070,7 @@ static const struct arm64_cpu_capabilities arm64_features[] = { { /* FP/SIMD is not implemented */ .capability = ARM64_HAS_NO_FPSIMD, - .def_scope = SCOPE_SYSTEM, + .type = ARM64_CPUCAP_SCOPE_SYSTEM, .min_field_value = 0, .matches = has_no_fpsimd, }, @@ -1078,7 +1078,7 @@ static const struct arm64_cpu_capabilities arm64_features[] = { { .desc = "Data cache clean to Point of Persistence", .capability = ARM64_HAS_DCPOP, - .def_scope = SCOPE_SYSTEM, + .type = ARM64_CPUCAP_SCOPE_SYSTEM, .matches = has_cpuid_feature, .sys_reg = SYS_ID_AA64ISAR1_EL1, .field_pos = ID_AA64ISAR1_DPB_SHIFT, @@ -1088,8 +1088,8 @@ static const struct arm64_cpu_capabilities arm64_features[] = { #ifdef CONFIG_ARM64_SVE { .desc = "Scalable Vector Extension", + .type = ARM64_CPUCAP_SCOPE_SYSTEM, .capability = ARM64_SVE, - .def_scope = SCOPE_SYSTEM, .sys_reg = SYS_ID_AA64PFR0_EL1, .sign = FTR_UNSIGNED, .field_pos = ID_AA64PFR0_SVE_SHIFT, @@ -1102,7 +1102,7 @@ static const struct arm64_cpu_capabilities arm64_features[] = { { .desc = "RAS Extension Support", .capability = ARM64_HAS_RAS_EXTN, - .def_scope = SCOPE_SYSTEM, + .type = ARM64_CPUCAP_SCOPE_SYSTEM, .matches = has_cpuid_feature, .sys_reg = SYS_ID_AA64PFR0_EL1, .sign = FTR_UNSIGNED, @@ -1114,28 +1114,28 @@ static const struct arm64_cpu_capabilities arm64_features[] = { { .desc = "Data cache clean to the PoU not required for I/D coherence", .capability = ARM64_HAS_CACHE_IDC, - .def_scope = SCOPE_SYSTEM, + .type = ARM64_CPUCAP_SCOPE_SYSTEM, .matches = has_cache_idc, }, { .desc = "Instruction cache invalidation not required for I/D coherence", .capability = ARM64_HAS_CACHE_DIC, - .def_scope = SCOPE_SYSTEM, + .type = ARM64_CPUCAP_SCOPE_SYSTEM, .matches = has_cache_dic, }, {}, }; -#define HWCAP_CAP(reg, field, s, min_value, type, cap) \ +#define HWCAP_CAP(reg, field, s, min_value, cap_type, cap) \ { \ .desc = #cap, \ - .def_scope = SCOPE_SYSTEM, \ + .type = ARM64_CPUCAP_SCOPE_SYSTEM, \ .matches = has_cpuid_feature, \ .sys_reg = reg, \ .field_pos = field, \ .sign = s, \ .min_field_value = min_value, \ - .hwcap_type = type, \ + .hwcap_type = cap_type, \ .hwcap = cap, \ } @@ -1232,7 +1232,7 @@ static void __init setup_elf_hwcaps(const struct arm64_cpu_capabilities *hwcaps) /* We support emulation of accesses to CPU ID feature registers */ elf_hwcap |= HWCAP_CPUID; for (; hwcaps->matches; hwcaps++) - if (hwcaps->matches(hwcaps, hwcaps->def_scope)) + if (hwcaps->matches(hwcaps, cpucap_default_scope(hwcaps))) cap_set_elf_hwcap(hwcaps); } @@ -1259,7 +1259,7 @@ static void update_cpu_capabilities(const struct arm64_cpu_capabilities *caps, const char *info) { for (; caps->matches; caps++) { - if (!caps->matches(caps, caps->def_scope)) + if (!caps->matches(caps, cpucap_default_scope(caps))) continue; if (!cpus_have_cap(caps->capability) && caps->desc) -- cgit v1.2.3-59-g8ed1b From 5b4747c5dce7a873e1e7fe1608835825f714267a Mon Sep 17 00:00:00 2001 From: Suzuki K Poulose Date: Mon, 26 Mar 2018 15:12:32 +0100 Subject: arm64: capabilities: Add flags to handle the conflicts on late CPU When a CPU is brought up, it is checked against the caps that are known to be enabled on the system (via verify_local_cpu_capabilities()). Based on the state of the capability on the CPU vs. that of System we could have the following combinations of conflict. x-----------------------------x | Type | System | Late CPU | |-----------------------------| | a | y | n | |-----------------------------| | b | n | y | x-----------------------------x Case (a) is not permitted for caps which are system features, which the system expects all the CPUs to have (e.g VHE). While (a) is ignored for all errata work arounds. However, there could be exceptions to the plain filtering approach. e.g, KPTI is an optional feature for a late CPU as long as the system already enables it. Case (b) is not permitted for errata work arounds that cannot be activated after the kernel has finished booting.And we ignore (b) for features. Here, yet again, KPTI is an exception, where if a late CPU needs KPTI we are too late to enable it (because we change the allocation of ASIDs etc). Add two different flags to indicate how the conflict should be handled. ARM64_CPUCAP_PERMITTED_FOR_LATE_CPU - CPUs may have the capability ARM64_CPUCAP_OPTIONAL_FOR_LATE_CPU - CPUs may not have the cappability. Now that we have the flags to describe the behavior of the errata and the features, as we treat them, define types for ERRATUM and FEATURE. Cc: Will Deacon Cc: Mark Rutland Reviewed-by: Dave Martin Signed-off-by: Suzuki K Poulose Signed-off-by: Will Deacon --- arch/arm64/include/asm/cpufeature.h | 68 +++++++++++++++++++++++++++++++++++++ arch/arm64/kernel/cpu_errata.c | 8 ++--- arch/arm64/kernel/cpufeature.c | 34 +++++++++---------- 3 files changed, 89 insertions(+), 21 deletions(-) (limited to 'arch') diff --git a/arch/arm64/include/asm/cpufeature.h b/arch/arm64/include/asm/cpufeature.h index 13fde0952c31..b8d8e6012385 100644 --- a/arch/arm64/include/asm/cpufeature.h +++ b/arch/arm64/include/asm/cpufeature.h @@ -153,6 +153,7 @@ extern struct arm64_ftr_reg arm64_ftr_reg_ctrel0; * an action, based on the severity (e.g, a CPU could be prevented from * booting or cause a kernel panic). The CPU is allowed to "affect" the * state of the capability, if it has not been finalised already. + * See section 5 for more details on conflicts. * * 4) Action: As mentioned in (2), the kernel can take an action for each * detected capability, on all CPUs on the system. Appropriate actions @@ -170,6 +171,34 @@ extern struct arm64_ftr_reg arm64_ftr_reg_ctrel0; * * check_local_cpu_capabilities() -> verify_local_cpu_capabilities() * + * 5) Conflicts: Based on the state of the capability on a late CPU vs. + * the system state, we could have the following combinations : + * + * x-----------------------------x + * | Type | System | Late CPU | + * |-----------------------------| + * | a | y | n | + * |-----------------------------| + * | b | n | y | + * x-----------------------------x + * + * Two separate flag bits are defined to indicate whether each kind of + * conflict can be allowed: + * ARM64_CPUCAP_OPTIONAL_FOR_LATE_CPU - Case(a) is allowed + * ARM64_CPUCAP_PERMITTED_FOR_LATE_CPU - Case(b) is allowed + * + * Case (a) is not permitted for a capability that the system requires + * all CPUs to have in order for the capability to be enabled. This is + * typical for capabilities that represent enhanced functionality. + * + * Case (b) is not permitted for a capability that must be enabled + * during boot if any CPU in the system requires it in order to run + * safely. This is typical for erratum work arounds that cannot be + * enabled after the corresponding capability is finalised. + * + * In some non-typical cases either both (a) and (b), or neither, + * should be permitted. This can be described by including neither + * or both flags in the capability's type field. */ @@ -183,6 +212,33 @@ extern struct arm64_ftr_reg arm64_ftr_reg_ctrel0; #define SCOPE_SYSTEM ARM64_CPUCAP_SCOPE_SYSTEM #define SCOPE_LOCAL_CPU ARM64_CPUCAP_SCOPE_LOCAL_CPU +/* + * Is it permitted for a late CPU to have this capability when system + * hasn't already enabled it ? + */ +#define ARM64_CPUCAP_PERMITTED_FOR_LATE_CPU ((u16)BIT(4)) +/* Is it safe for a late CPU to miss this capability when system has it */ +#define ARM64_CPUCAP_OPTIONAL_FOR_LATE_CPU ((u16)BIT(5)) + +/* + * CPU errata workarounds that need to be enabled at boot time if one or + * more CPUs in the system requires it. When one of these capabilities + * has been enabled, it is safe to allow any CPU to boot that doesn't + * require the workaround. However, it is not safe if a "late" CPU + * requires a workaround and the system hasn't enabled it already. + */ +#define ARM64_CPUCAP_LOCAL_CPU_ERRATUM \ + (ARM64_CPUCAP_SCOPE_LOCAL_CPU | ARM64_CPUCAP_OPTIONAL_FOR_LATE_CPU) +/* + * CPU feature detected at boot time based on system-wide value of a + * feature. It is safe for a late CPU to have this feature even though + * the system hasn't enabled it, although the featuer will not be used + * by Linux in this case. If the system has enabled this feature already, + * then every late CPU must have it. + */ +#define ARM64_CPUCAP_SYSTEM_FEATURE \ + (ARM64_CPUCAP_SCOPE_SYSTEM | ARM64_CPUCAP_PERMITTED_FOR_LATE_CPU) + struct arm64_cpu_capabilities { const char *desc; u16 capability; @@ -220,6 +276,18 @@ static inline int cpucap_default_scope(const struct arm64_cpu_capabilities *cap) return cap->type & ARM64_CPUCAP_SCOPE_MASK; } +static inline bool +cpucap_late_cpu_optional(const struct arm64_cpu_capabilities *cap) +{ + return !!(cap->type & ARM64_CPUCAP_OPTIONAL_FOR_LATE_CPU); +} + +static inline bool +cpucap_late_cpu_permitted(const struct arm64_cpu_capabilities *cap) +{ + return !!(cap->type & ARM64_CPUCAP_PERMITTED_FOR_LATE_CPU); +} + extern DECLARE_BITMAP(cpu_hwcaps, ARM64_NCAPS); extern struct static_key_false cpu_hwcap_keys[ARM64_NCAPS]; extern struct static_key_false arm64_const_caps_ready; diff --git a/arch/arm64/kernel/cpu_errata.c b/arch/arm64/kernel/cpu_errata.c index 2b5233157af1..f5ab9545c5ea 100644 --- a/arch/arm64/kernel/cpu_errata.c +++ b/arch/arm64/kernel/cpu_errata.c @@ -238,14 +238,14 @@ qcom_enable_link_stack_sanitization(const struct arm64_cpu_capabilities *entry) #endif /* CONFIG_HARDEN_BRANCH_PREDICTOR */ #define MIDR_RANGE(model, min, max) \ - .type = ARM64_CPUCAP_SCOPE_LOCAL_CPU, \ + .type = ARM64_CPUCAP_LOCAL_CPU_ERRATUM, \ .matches = is_affected_midr_range, \ .midr_model = model, \ .midr_range_min = min, \ .midr_range_max = max #define MIDR_ALL_VERSIONS(model) \ - .type = ARM64_CPUCAP_SCOPE_LOCAL_CPU, \ + .type = ARM64_CPUCAP_LOCAL_CPU_ERRATUM, \ .matches = is_affected_midr_range, \ .midr_model = model, \ .midr_range_min = 0, \ @@ -361,7 +361,7 @@ const struct arm64_cpu_capabilities arm64_errata[] = { .desc = "Mismatched cache line size", .capability = ARM64_MISMATCHED_CACHE_LINE_SIZE, .matches = has_mismatched_cache_line_size, - .type = ARM64_CPUCAP_SCOPE_LOCAL_CPU, + .type = ARM64_CPUCAP_LOCAL_CPU_ERRATUM, .cpu_enable = cpu_enable_trap_ctr_access, }, #ifdef CONFIG_QCOM_FALKOR_ERRATUM_1003 @@ -375,7 +375,7 @@ const struct arm64_cpu_capabilities arm64_errata[] = { { .desc = "Qualcomm Technologies Kryo erratum 1003", .capability = ARM64_WORKAROUND_QCOM_FALKOR_E1003, - .type = ARM64_CPUCAP_SCOPE_LOCAL_CPU, + .type = ARM64_CPUCAP_LOCAL_CPU_ERRATUM, .midr_model = MIDR_QCOM_KRYO, .matches = is_kryo_midr, }, diff --git a/arch/arm64/kernel/cpufeature.c b/arch/arm64/kernel/cpufeature.c index 19955fd5eb02..0b8e6b9e24e1 100644 --- a/arch/arm64/kernel/cpufeature.c +++ b/arch/arm64/kernel/cpufeature.c @@ -975,7 +975,7 @@ static const struct arm64_cpu_capabilities arm64_features[] = { { .desc = "GIC system register CPU interface", .capability = ARM64_HAS_SYSREG_GIC_CPUIF, - .type = ARM64_CPUCAP_SCOPE_SYSTEM, + .type = ARM64_CPUCAP_SYSTEM_FEATURE, .matches = has_useable_gicv3_cpuif, .sys_reg = SYS_ID_AA64PFR0_EL1, .field_pos = ID_AA64PFR0_GIC_SHIFT, @@ -986,7 +986,7 @@ static const struct arm64_cpu_capabilities arm64_features[] = { { .desc = "Privileged Access Never", .capability = ARM64_HAS_PAN, - .type = ARM64_CPUCAP_SCOPE_SYSTEM, + .type = ARM64_CPUCAP_SYSTEM_FEATURE, .matches = has_cpuid_feature, .sys_reg = SYS_ID_AA64MMFR1_EL1, .field_pos = ID_AA64MMFR1_PAN_SHIFT, @@ -999,7 +999,7 @@ static const struct arm64_cpu_capabilities arm64_features[] = { { .desc = "LSE atomic instructions", .capability = ARM64_HAS_LSE_ATOMICS, - .type = ARM64_CPUCAP_SCOPE_SYSTEM, + .type = ARM64_CPUCAP_SYSTEM_FEATURE, .matches = has_cpuid_feature, .sys_reg = SYS_ID_AA64ISAR0_EL1, .field_pos = ID_AA64ISAR0_ATOMICS_SHIFT, @@ -1010,14 +1010,14 @@ static const struct arm64_cpu_capabilities arm64_features[] = { { .desc = "Software prefetching using PRFM", .capability = ARM64_HAS_NO_HW_PREFETCH, - .type = ARM64_CPUCAP_SCOPE_SYSTEM, + .type = ARM64_CPUCAP_SYSTEM_FEATURE, .matches = has_no_hw_prefetch, }, #ifdef CONFIG_ARM64_UAO { .desc = "User Access Override", .capability = ARM64_HAS_UAO, - .type = ARM64_CPUCAP_SCOPE_SYSTEM, + .type = ARM64_CPUCAP_SYSTEM_FEATURE, .matches = has_cpuid_feature, .sys_reg = SYS_ID_AA64MMFR2_EL1, .field_pos = ID_AA64MMFR2_UAO_SHIFT, @@ -1031,21 +1031,21 @@ static const struct arm64_cpu_capabilities arm64_features[] = { #ifdef CONFIG_ARM64_PAN { .capability = ARM64_ALT_PAN_NOT_UAO, - .type = ARM64_CPUCAP_SCOPE_SYSTEM, + .type = ARM64_CPUCAP_SYSTEM_FEATURE, .matches = cpufeature_pan_not_uao, }, #endif /* CONFIG_ARM64_PAN */ { .desc = "Virtualization Host Extensions", .capability = ARM64_HAS_VIRT_HOST_EXTN, - .type = ARM64_CPUCAP_SCOPE_SYSTEM, + .type = ARM64_CPUCAP_SYSTEM_FEATURE, .matches = runs_at_el2, .cpu_enable = cpu_copy_el2regs, }, { .desc = "32-bit EL0 Support", .capability = ARM64_HAS_32BIT_EL0, - .type = ARM64_CPUCAP_SCOPE_SYSTEM, + .type = ARM64_CPUCAP_SYSTEM_FEATURE, .matches = has_cpuid_feature, .sys_reg = SYS_ID_AA64PFR0_EL1, .sign = FTR_UNSIGNED, @@ -1055,14 +1055,14 @@ static const struct arm64_cpu_capabilities arm64_features[] = { { .desc = "Reduced HYP mapping offset", .capability = ARM64_HYP_OFFSET_LOW, - .type = ARM64_CPUCAP_SCOPE_SYSTEM, + .type = ARM64_CPUCAP_SYSTEM_FEATURE, .matches = hyp_offset_low, }, #ifdef CONFIG_UNMAP_KERNEL_AT_EL0 { .desc = "Kernel page table isolation (KPTI)", .capability = ARM64_UNMAP_KERNEL_AT_EL0, - .type = ARM64_CPUCAP_SCOPE_SYSTEM, + .type = ARM64_CPUCAP_SYSTEM_FEATURE, .matches = unmap_kernel_at_el0, .cpu_enable = kpti_install_ng_mappings, }, @@ -1070,7 +1070,7 @@ static const struct arm64_cpu_capabilities arm64_features[] = { { /* FP/SIMD is not implemented */ .capability = ARM64_HAS_NO_FPSIMD, - .type = ARM64_CPUCAP_SCOPE_SYSTEM, + .type = ARM64_CPUCAP_SYSTEM_FEATURE, .min_field_value = 0, .matches = has_no_fpsimd, }, @@ -1078,7 +1078,7 @@ static const struct arm64_cpu_capabilities arm64_features[] = { { .desc = "Data cache clean to Point of Persistence", .capability = ARM64_HAS_DCPOP, - .type = ARM64_CPUCAP_SCOPE_SYSTEM, + .type = ARM64_CPUCAP_SYSTEM_FEATURE, .matches = has_cpuid_feature, .sys_reg = SYS_ID_AA64ISAR1_EL1, .field_pos = ID_AA64ISAR1_DPB_SHIFT, @@ -1088,7 +1088,7 @@ static const struct arm64_cpu_capabilities arm64_features[] = { #ifdef CONFIG_ARM64_SVE { .desc = "Scalable Vector Extension", - .type = ARM64_CPUCAP_SCOPE_SYSTEM, + .type = ARM64_CPUCAP_SYSTEM_FEATURE, .capability = ARM64_SVE, .sys_reg = SYS_ID_AA64PFR0_EL1, .sign = FTR_UNSIGNED, @@ -1102,7 +1102,7 @@ static const struct arm64_cpu_capabilities arm64_features[] = { { .desc = "RAS Extension Support", .capability = ARM64_HAS_RAS_EXTN, - .type = ARM64_CPUCAP_SCOPE_SYSTEM, + .type = ARM64_CPUCAP_SYSTEM_FEATURE, .matches = has_cpuid_feature, .sys_reg = SYS_ID_AA64PFR0_EL1, .sign = FTR_UNSIGNED, @@ -1114,13 +1114,13 @@ static const struct arm64_cpu_capabilities arm64_features[] = { { .desc = "Data cache clean to the PoU not required for I/D coherence", .capability = ARM64_HAS_CACHE_IDC, - .type = ARM64_CPUCAP_SCOPE_SYSTEM, + .type = ARM64_CPUCAP_SYSTEM_FEATURE, .matches = has_cache_idc, }, { .desc = "Instruction cache invalidation not required for I/D coherence", .capability = ARM64_HAS_CACHE_DIC, - .type = ARM64_CPUCAP_SCOPE_SYSTEM, + .type = ARM64_CPUCAP_SYSTEM_FEATURE, .matches = has_cache_dic, }, {}, @@ -1129,7 +1129,7 @@ static const struct arm64_cpu_capabilities arm64_features[] = { #define HWCAP_CAP(reg, field, s, min_value, cap_type, cap) \ { \ .desc = #cap, \ - .type = ARM64_CPUCAP_SCOPE_SYSTEM, \ + .type = ARM64_CPUCAP_SYSTEM_FEATURE, \ .matches = has_cpuid_feature, \ .sys_reg = reg, \ .field_pos = field, \ -- cgit v1.2.3-59-g8ed1b From eaac4d83daa50fc1b9b7850346e9a62adfd4647e Mon Sep 17 00:00:00 2001 From: Suzuki K Poulose Date: Mon, 26 Mar 2018 15:12:33 +0100 Subject: arm64: capabilities: Unify the verification Now that each capability describes how to treat the conflicts of CPU cap state vs System wide cap state, we can unify the verification logic to a single place. Reviewed-by: Dave Martin Signed-off-by: Suzuki K Poulose Signed-off-by: Will Deacon --- arch/arm64/kernel/cpufeature.c | 91 +++++++++++++++++++++++++++--------------- 1 file changed, 58 insertions(+), 33 deletions(-) (limited to 'arch') diff --git a/arch/arm64/kernel/cpufeature.c b/arch/arm64/kernel/cpufeature.c index 0b8e6b9e24e1..1fda957279e9 100644 --- a/arch/arm64/kernel/cpufeature.c +++ b/arch/arm64/kernel/cpufeature.c @@ -1305,6 +1305,58 @@ enable_cpu_capabilities(const struct arm64_cpu_capabilities *caps) } } +/* + * Run through the list of capabilities to check for conflicts. + * If the system has already detected a capability, take necessary + * action on this CPU. + * + * Returns "false" on conflicts. + */ +static bool +__verify_local_cpu_caps(const struct arm64_cpu_capabilities *caps_list) +{ + bool cpu_has_cap, system_has_cap; + const struct arm64_cpu_capabilities *caps; + + for (caps = caps_list; caps->matches; caps++) { + cpu_has_cap = __this_cpu_has_cap(caps_list, caps->capability); + system_has_cap = cpus_have_cap(caps->capability); + + if (system_has_cap) { + /* + * Check if the new CPU misses an advertised feature, + * which is not safe to miss. + */ + if (!cpu_has_cap && !cpucap_late_cpu_optional(caps)) + break; + /* + * We have to issue cpu_enable() irrespective of + * whether the CPU has it or not, as it is enabeld + * system wide. It is upto the call back to take + * appropriate action on this CPU. + */ + if (caps->cpu_enable) + caps->cpu_enable(caps); + } else { + /* + * Check if the CPU has this capability if it isn't + * safe to have when the system doesn't. + */ + if (cpu_has_cap && !cpucap_late_cpu_permitted(caps)) + break; + } + } + + if (caps->matches) { + pr_crit("CPU%d: Detected conflict for capability %d (%s), System: %d, CPU: %d\n", + smp_processor_id(), caps->capability, + caps->desc, system_has_cap, cpu_has_cap); + return false; + } + + return true; +} + /* * Check for CPU features that are used in early boot * based on the Boot CPU value. @@ -1327,25 +1379,10 @@ verify_local_elf_hwcaps(const struct arm64_cpu_capabilities *caps) } } -static void -verify_local_cpu_features(const struct arm64_cpu_capabilities *caps_list) +static void verify_local_cpu_features(void) { - const struct arm64_cpu_capabilities *caps = caps_list; - for (; caps->matches; caps++) { - if (!cpus_have_cap(caps->capability)) - continue; - /* - * If the new CPU misses an advertised feature, we cannot proceed - * further, park the cpu. - */ - if (!__this_cpu_has_cap(caps_list, caps->capability)) { - pr_crit("CPU%d: missing feature: %s\n", - smp_processor_id(), caps->desc); - cpu_die_early(); - } - if (caps->cpu_enable) - caps->cpu_enable(caps); - } + if (!__verify_local_cpu_caps(arm64_features)) + cpu_die_early(); } static void verify_sve_features(void) @@ -1372,20 +1409,8 @@ static void verify_sve_features(void) */ static void verify_local_cpu_errata_workarounds(void) { - const struct arm64_cpu_capabilities *caps = arm64_errata; - - for (; caps->matches; caps++) { - if (cpus_have_cap(caps->capability)) { - if (caps->cpu_enable) - caps->cpu_enable(caps); - } else if (caps->matches(caps, SCOPE_LOCAL_CPU)) { - pr_crit("CPU%d: Requires work around for %s, not detected" - " at boot time\n", - smp_processor_id(), - caps->desc ? : "an erratum"); - cpu_die_early(); - } - } + if (!__verify_local_cpu_caps(arm64_errata)) + cpu_die_early(); } static void update_cpu_errata_workarounds(void) @@ -1409,7 +1434,7 @@ static void __init enable_errata_workarounds(void) static void verify_local_cpu_capabilities(void) { verify_local_cpu_errata_workarounds(); - verify_local_cpu_features(arm64_features); + verify_local_cpu_features(); verify_local_elf_hwcaps(arm64_elf_hwcaps); if (system_supports_32bit_el0()) -- cgit v1.2.3-59-g8ed1b From cce360b54ce6ca1bcf4b0a870ec076d83606775e Mon Sep 17 00:00:00 2001 From: Suzuki K Poulose Date: Mon, 26 Mar 2018 15:12:34 +0100 Subject: arm64: capabilities: Filter the entries based on a given mask While processing the list of capabilities, it is useful to filter out some of the entries based on the given mask for the scope of the capabilities to allow better control. This can be used later for handling LOCAL vs SYSTEM wide capabilities and more. All capabilities should have their scope set to either LOCAL_CPU or SYSTEM. No functional/flow change. Cc: Will Deacon Cc: Mark Rutland Reviewed-by: Dave Martin Signed-off-by: Suzuki K Poulose Signed-off-by: Will Deacon --- arch/arm64/include/asm/cpufeature.h | 1 + arch/arm64/kernel/cpufeature.c | 33 ++++++++++++++++++++++----------- 2 files changed, 23 insertions(+), 11 deletions(-) (limited to 'arch') diff --git a/arch/arm64/include/asm/cpufeature.h b/arch/arm64/include/asm/cpufeature.h index b8d8e6012385..c187b926daf9 100644 --- a/arch/arm64/include/asm/cpufeature.h +++ b/arch/arm64/include/asm/cpufeature.h @@ -211,6 +211,7 @@ extern struct arm64_ftr_reg arm64_ftr_reg_ctrel0; #define SCOPE_SYSTEM ARM64_CPUCAP_SCOPE_SYSTEM #define SCOPE_LOCAL_CPU ARM64_CPUCAP_SCOPE_LOCAL_CPU +#define SCOPE_ALL ARM64_CPUCAP_SCOPE_MASK /* * Is it permitted for a late CPU to have this capability when system diff --git a/arch/arm64/kernel/cpufeature.c b/arch/arm64/kernel/cpufeature.c index 1fda957279e9..c49535de2fdf 100644 --- a/arch/arm64/kernel/cpufeature.c +++ b/arch/arm64/kernel/cpufeature.c @@ -1256,10 +1256,12 @@ static bool __this_cpu_has_cap(const struct arm64_cpu_capabilities *cap_array, } static void update_cpu_capabilities(const struct arm64_cpu_capabilities *caps, - const char *info) + u16 scope_mask, const char *info) { + scope_mask &= ARM64_CPUCAP_SCOPE_MASK; for (; caps->matches; caps++) { - if (!caps->matches(caps, cpucap_default_scope(caps))) + if (!(caps->type & scope_mask) || + !caps->matches(caps, cpucap_default_scope(caps))) continue; if (!cpus_have_cap(caps->capability) && caps->desc) @@ -1281,12 +1283,14 @@ static int __enable_cpu_capability(void *arg) * CPUs */ static void __init -enable_cpu_capabilities(const struct arm64_cpu_capabilities *caps) +enable_cpu_capabilities(const struct arm64_cpu_capabilities *caps, + u16 scope_mask) { + scope_mask &= ARM64_CPUCAP_SCOPE_MASK; for (; caps->matches; caps++) { unsigned int num = caps->capability; - if (!cpus_have_cap(num)) + if (!(caps->type & scope_mask) || !cpus_have_cap(num)) continue; /* Ensure cpus_have_const_cap(num) works */ @@ -1313,12 +1317,18 @@ enable_cpu_capabilities(const struct arm64_cpu_capabilities *caps) * Returns "false" on conflicts. */ static bool -__verify_local_cpu_caps(const struct arm64_cpu_capabilities *caps_list) +__verify_local_cpu_caps(const struct arm64_cpu_capabilities *caps_list, + u16 scope_mask) { bool cpu_has_cap, system_has_cap; const struct arm64_cpu_capabilities *caps; + scope_mask &= ARM64_CPUCAP_SCOPE_MASK; + for (caps = caps_list; caps->matches; caps++) { + if (!(caps->type & scope_mask)) + continue; + cpu_has_cap = __this_cpu_has_cap(caps_list, caps->capability); system_has_cap = cpus_have_cap(caps->capability); @@ -1381,7 +1391,7 @@ verify_local_elf_hwcaps(const struct arm64_cpu_capabilities *caps) static void verify_local_cpu_features(void) { - if (!__verify_local_cpu_caps(arm64_features)) + if (!__verify_local_cpu_caps(arm64_features, SCOPE_ALL)) cpu_die_early(); } @@ -1409,18 +1419,19 @@ static void verify_sve_features(void) */ static void verify_local_cpu_errata_workarounds(void) { - if (!__verify_local_cpu_caps(arm64_errata)) + if (!__verify_local_cpu_caps(arm64_errata, SCOPE_ALL)) cpu_die_early(); } static void update_cpu_errata_workarounds(void) { - update_cpu_capabilities(arm64_errata, "enabling workaround for"); + update_cpu_capabilities(arm64_errata, SCOPE_ALL, + "enabling workaround for"); } static void __init enable_errata_workarounds(void) { - enable_cpu_capabilities(arm64_errata); + enable_cpu_capabilities(arm64_errata, SCOPE_ALL); } /* @@ -1466,8 +1477,8 @@ void check_local_cpu_capabilities(void) static void __init setup_feature_capabilities(void) { - update_cpu_capabilities(arm64_features, "detected:"); - enable_cpu_capabilities(arm64_features); + update_cpu_capabilities(arm64_features, SCOPE_ALL, "detected:"); + enable_cpu_capabilities(arm64_features, SCOPE_ALL); } DEFINE_STATIC_KEY_FALSE(arm64_const_caps_ready); -- cgit v1.2.3-59-g8ed1b From 600b9c919c2f4d07a7bf67864086aa3432224674 Mon Sep 17 00:00:00 2001 From: Suzuki K Poulose Date: Mon, 26 Mar 2018 15:12:35 +0100 Subject: arm64: capabilities: Prepare for grouping features and errata work arounds We are about to group the handling of all capabilities (features and errata workarounds). This patch open codes the wrapper routines to make it easier to merge the handling. Reviewed-by: Dave Martin Signed-off-by: Suzuki K Poulose Signed-off-by: Will Deacon --- arch/arm64/kernel/cpufeature.c | 58 +++++++++++++----------------------------- 1 file changed, 18 insertions(+), 40 deletions(-) (limited to 'arch') diff --git a/arch/arm64/kernel/cpufeature.c b/arch/arm64/kernel/cpufeature.c index c49535de2fdf..703215f32a91 100644 --- a/arch/arm64/kernel/cpufeature.c +++ b/arch/arm64/kernel/cpufeature.c @@ -510,7 +510,8 @@ static void __init init_cpu_ftr_reg(u32 sys_reg, u64 new) } extern const struct arm64_cpu_capabilities arm64_errata[]; -static void update_cpu_errata_workarounds(void); +static void update_cpu_capabilities(const struct arm64_cpu_capabilities *caps, + u16 scope_mask, const char *info); void __init init_cpu_features(struct cpuinfo_arm64 *info) { @@ -559,7 +560,8 @@ void __init init_cpu_features(struct cpuinfo_arm64 *info) * Run the errata work around checks on the boot CPU, once we have * initialised the cpu feature infrastructure. */ - update_cpu_errata_workarounds(); + update_cpu_capabilities(arm64_errata, SCOPE_ALL, + "enabling workaround for"); } static void update_cpu_ftr_reg(struct arm64_ftr_reg *reg, u64 new) @@ -1389,12 +1391,6 @@ verify_local_elf_hwcaps(const struct arm64_cpu_capabilities *caps) } } -static void verify_local_cpu_features(void) -{ - if (!__verify_local_cpu_caps(arm64_features, SCOPE_ALL)) - cpu_die_early(); -} - static void verify_sve_features(void) { u64 safe_zcr = read_sanitised_ftr_reg(SYS_ZCR_EL1); @@ -1412,27 +1408,6 @@ static void verify_sve_features(void) /* Add checks on other ZCR bits here if necessary */ } -/* - * The CPU Errata work arounds are detected and applied at boot time - * and the related information is freed soon after. If the new CPU requires - * an errata not detected at boot, fail this CPU. - */ -static void verify_local_cpu_errata_workarounds(void) -{ - if (!__verify_local_cpu_caps(arm64_errata, SCOPE_ALL)) - cpu_die_early(); -} - -static void update_cpu_errata_workarounds(void) -{ - update_cpu_capabilities(arm64_errata, SCOPE_ALL, - "enabling workaround for"); -} - -static void __init enable_errata_workarounds(void) -{ - enable_cpu_capabilities(arm64_errata, SCOPE_ALL); -} /* * Run through the enabled system capabilities and enable() it on this CPU. @@ -1444,8 +1419,15 @@ static void __init enable_errata_workarounds(void) */ static void verify_local_cpu_capabilities(void) { - verify_local_cpu_errata_workarounds(); - verify_local_cpu_features(); + /* + * The CPU Errata work arounds are detected and applied at boot time + * and the related information is freed soon after. If the new CPU + * requires an errata not detected at boot, fail this CPU. + */ + if (!__verify_local_cpu_caps(arm64_errata, SCOPE_ALL)) + cpu_die_early(); + if (!__verify_local_cpu_caps(arm64_features, SCOPE_ALL)) + cpu_die_early(); verify_local_elf_hwcaps(arm64_elf_hwcaps); if (system_supports_32bit_el0()) @@ -1470,17 +1452,12 @@ void check_local_cpu_capabilities(void) * advertised capabilities. */ if (!sys_caps_initialised) - update_cpu_errata_workarounds(); + update_cpu_capabilities(arm64_errata, SCOPE_ALL, + "enabling workaround for"); else verify_local_cpu_capabilities(); } -static void __init setup_feature_capabilities(void) -{ - update_cpu_capabilities(arm64_features, SCOPE_ALL, "detected:"); - enable_cpu_capabilities(arm64_features, SCOPE_ALL); -} - DEFINE_STATIC_KEY_FALSE(arm64_const_caps_ready); EXPORT_SYMBOL(arm64_const_caps_ready); @@ -1502,8 +1479,9 @@ void __init setup_cpu_features(void) u32 cwg; /* Set the CPU feature capabilies */ - setup_feature_capabilities(); - enable_errata_workarounds(); + update_cpu_capabilities(arm64_features, SCOPE_ALL, "detected:"); + enable_cpu_capabilities(arm64_features, SCOPE_ALL); + enable_cpu_capabilities(arm64_errata, SCOPE_ALL); mark_const_caps_ready(); setup_elf_hwcaps(arm64_elf_hwcaps); -- cgit v1.2.3-59-g8ed1b From d69fe9a7e7214d49fe157ec20889892388d0fe23 Mon Sep 17 00:00:00 2001 From: Suzuki K Poulose Date: Mon, 26 Mar 2018 15:12:36 +0100 Subject: arm64: capabilities: Split the processing of errata work arounds Right now we run through the errata workarounds check on all boot active CPUs, with SCOPE_ALL. This wouldn't help for detecting erratum workarounds with a SYSTEM_SCOPE. There are none yet, but we plan to introduce some: let us clean this up so that such workarounds can be detected and enabled correctly. So, we run the checks with SCOPE_LOCAL_CPU on all CPUs and SCOPE_SYSTEM checks are run only once after all the boot time CPUs are active. Reviewed-by: Dave Martin Signed-off-by: Suzuki K Poulose Signed-off-by: Will Deacon --- arch/arm64/kernel/cpufeature.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) (limited to 'arch') diff --git a/arch/arm64/kernel/cpufeature.c b/arch/arm64/kernel/cpufeature.c index 703215f32a91..b8d23f4b259e 100644 --- a/arch/arm64/kernel/cpufeature.c +++ b/arch/arm64/kernel/cpufeature.c @@ -560,7 +560,7 @@ void __init init_cpu_features(struct cpuinfo_arm64 *info) * Run the errata work around checks on the boot CPU, once we have * initialised the cpu feature infrastructure. */ - update_cpu_capabilities(arm64_errata, SCOPE_ALL, + update_cpu_capabilities(arm64_errata, SCOPE_LOCAL_CPU, "enabling workaround for"); } @@ -1452,7 +1452,7 @@ void check_local_cpu_capabilities(void) * advertised capabilities. */ if (!sys_caps_initialised) - update_cpu_capabilities(arm64_errata, SCOPE_ALL, + update_cpu_capabilities(arm64_errata, SCOPE_LOCAL_CPU, "enabling workaround for"); else verify_local_cpu_capabilities(); @@ -1480,6 +1480,8 @@ void __init setup_cpu_features(void) /* Set the CPU feature capabilies */ update_cpu_capabilities(arm64_features, SCOPE_ALL, "detected:"); + update_cpu_capabilities(arm64_errata, SCOPE_SYSTEM, + "enabling workaround for"); enable_cpu_capabilities(arm64_features, SCOPE_ALL); enable_cpu_capabilities(arm64_errata, SCOPE_ALL); mark_const_caps_ready(); -- cgit v1.2.3-59-g8ed1b From fbd890b9b8497bab04c1d338bd97579a7bc53fab Mon Sep 17 00:00:00 2001 From: Suzuki K Poulose Date: Mon, 26 Mar 2018 15:12:37 +0100 Subject: arm64: capabilities: Allow features based on local CPU scope So far we have treated the feature capabilities as system wide and this wouldn't help with features that could be detected locally on one or more CPUs (e.g, KPTI, Software prefetch). This patch splits the feature detection to two phases : 1) Local CPU features are checked on all boot time active CPUs. 2) System wide features are checked only once after all CPUs are active. Reviewed-by: Dave Martin Signed-off-by: Suzuki K Poulose Signed-off-by: Will Deacon --- arch/arm64/kernel/cpufeature.c | 17 +++++++++++------ 1 file changed, 11 insertions(+), 6 deletions(-) (limited to 'arch') diff --git a/arch/arm64/kernel/cpufeature.c b/arch/arm64/kernel/cpufeature.c index b8d23f4b259e..7c5f32960f43 100644 --- a/arch/arm64/kernel/cpufeature.c +++ b/arch/arm64/kernel/cpufeature.c @@ -510,6 +510,7 @@ static void __init init_cpu_ftr_reg(u32 sys_reg, u64 new) } extern const struct arm64_cpu_capabilities arm64_errata[]; +static const struct arm64_cpu_capabilities arm64_features[]; static void update_cpu_capabilities(const struct arm64_cpu_capabilities *caps, u16 scope_mask, const char *info); @@ -557,11 +558,12 @@ void __init init_cpu_features(struct cpuinfo_arm64 *info) } /* - * Run the errata work around checks on the boot CPU, once we have - * initialised the cpu feature infrastructure. + * Run the errata work around and local feature checks on the + * boot CPU, once we have initialised the cpu feature infrastructure. */ update_cpu_capabilities(arm64_errata, SCOPE_LOCAL_CPU, "enabling workaround for"); + update_cpu_capabilities(arm64_features, SCOPE_LOCAL_CPU, "detected:"); } static void update_cpu_ftr_reg(struct arm64_ftr_reg *reg, u64 new) @@ -1447,15 +1449,18 @@ void check_local_cpu_capabilities(void) /* * If we haven't finalised the system capabilities, this CPU gets - * a chance to update the errata work arounds. + * a chance to update the errata work arounds and local features. * Otherwise, this CPU should verify that it has all the system * advertised capabilities. */ - if (!sys_caps_initialised) + if (!sys_caps_initialised) { update_cpu_capabilities(arm64_errata, SCOPE_LOCAL_CPU, "enabling workaround for"); - else + update_cpu_capabilities(arm64_features, SCOPE_LOCAL_CPU, + "detected:"); + } else { verify_local_cpu_capabilities(); + } } DEFINE_STATIC_KEY_FALSE(arm64_const_caps_ready); @@ -1479,7 +1484,7 @@ void __init setup_cpu_features(void) u32 cwg; /* Set the CPU feature capabilies */ - update_cpu_capabilities(arm64_features, SCOPE_ALL, "detected:"); + update_cpu_capabilities(arm64_features, SCOPE_SYSTEM, "detected:"); update_cpu_capabilities(arm64_errata, SCOPE_SYSTEM, "enabling workaround for"); enable_cpu_capabilities(arm64_features, SCOPE_ALL); -- cgit v1.2.3-59-g8ed1b From ed478b3f9e4ac97fdbe07007fb2662415de8fe25 Mon Sep 17 00:00:00 2001 From: Suzuki K Poulose Date: Mon, 26 Mar 2018 15:12:38 +0100 Subject: arm64: capabilities: Group handling of features and errata workarounds Now that the features and errata workarounds have the same rules and flow, group the handling of the tables. Reviewed-by: Dave Martin Signed-off-by: Suzuki K Poulose Signed-off-by: Will Deacon --- arch/arm64/kernel/cpufeature.c | 73 ++++++++++++++++++++++++------------------ 1 file changed, 42 insertions(+), 31 deletions(-) (limited to 'arch') diff --git a/arch/arm64/kernel/cpufeature.c b/arch/arm64/kernel/cpufeature.c index 7c5f32960f43..d3301359e981 100644 --- a/arch/arm64/kernel/cpufeature.c +++ b/arch/arm64/kernel/cpufeature.c @@ -510,9 +510,7 @@ static void __init init_cpu_ftr_reg(u32 sys_reg, u64 new) } extern const struct arm64_cpu_capabilities arm64_errata[]; -static const struct arm64_cpu_capabilities arm64_features[]; -static void update_cpu_capabilities(const struct arm64_cpu_capabilities *caps, - u16 scope_mask, const char *info); +static void update_cpu_capabilities(u16 scope_mask); void __init init_cpu_features(struct cpuinfo_arm64 *info) { @@ -561,9 +559,7 @@ void __init init_cpu_features(struct cpuinfo_arm64 *info) * Run the errata work around and local feature checks on the * boot CPU, once we have initialised the cpu feature infrastructure. */ - update_cpu_capabilities(arm64_errata, SCOPE_LOCAL_CPU, - "enabling workaround for"); - update_cpu_capabilities(arm64_features, SCOPE_LOCAL_CPU, "detected:"); + update_cpu_capabilities(SCOPE_LOCAL_CPU); } static void update_cpu_ftr_reg(struct arm64_ftr_reg *reg, u64 new) @@ -1259,8 +1255,8 @@ static bool __this_cpu_has_cap(const struct arm64_cpu_capabilities *cap_array, return false; } -static void update_cpu_capabilities(const struct arm64_cpu_capabilities *caps, - u16 scope_mask, const char *info) +static void __update_cpu_capabilities(const struct arm64_cpu_capabilities *caps, + u16 scope_mask, const char *info) { scope_mask &= ARM64_CPUCAP_SCOPE_MASK; for (; caps->matches; caps++) { @@ -1274,6 +1270,13 @@ static void update_cpu_capabilities(const struct arm64_cpu_capabilities *caps, } } +static void update_cpu_capabilities(u16 scope_mask) +{ + __update_cpu_capabilities(arm64_features, scope_mask, "detected:"); + __update_cpu_capabilities(arm64_errata, scope_mask, + "enabling workaround for"); +} + static int __enable_cpu_capability(void *arg) { const struct arm64_cpu_capabilities *cap = arg; @@ -1287,8 +1290,8 @@ static int __enable_cpu_capability(void *arg) * CPUs */ static void __init -enable_cpu_capabilities(const struct arm64_cpu_capabilities *caps, - u16 scope_mask) +__enable_cpu_capabilities(const struct arm64_cpu_capabilities *caps, + u16 scope_mask) { scope_mask &= ARM64_CPUCAP_SCOPE_MASK; for (; caps->matches; caps++) { @@ -1313,6 +1316,12 @@ enable_cpu_capabilities(const struct arm64_cpu_capabilities *caps, } } +static void __init enable_cpu_capabilities(u16 scope_mask) +{ + __enable_cpu_capabilities(arm64_features, scope_mask); + __enable_cpu_capabilities(arm64_errata, scope_mask); +} + /* * Run through the list of capabilities to check for conflicts. * If the system has already detected a capability, take necessary @@ -1371,6 +1380,12 @@ __verify_local_cpu_caps(const struct arm64_cpu_capabilities *caps_list, return true; } +static bool verify_local_cpu_caps(u16 scope_mask) +{ + return __verify_local_cpu_caps(arm64_errata, scope_mask) && + __verify_local_cpu_caps(arm64_features, scope_mask); +} + /* * Check for CPU features that are used in early boot * based on the Boot CPU value. @@ -1421,15 +1436,9 @@ static void verify_sve_features(void) */ static void verify_local_cpu_capabilities(void) { - /* - * The CPU Errata work arounds are detected and applied at boot time - * and the related information is freed soon after. If the new CPU - * requires an errata not detected at boot, fail this CPU. - */ - if (!__verify_local_cpu_caps(arm64_errata, SCOPE_ALL)) - cpu_die_early(); - if (!__verify_local_cpu_caps(arm64_features, SCOPE_ALL)) + if (!verify_local_cpu_caps(SCOPE_ALL)) cpu_die_early(); + verify_local_elf_hwcaps(arm64_elf_hwcaps); if (system_supports_32bit_el0()) @@ -1453,14 +1462,10 @@ void check_local_cpu_capabilities(void) * Otherwise, this CPU should verify that it has all the system * advertised capabilities. */ - if (!sys_caps_initialised) { - update_cpu_capabilities(arm64_errata, SCOPE_LOCAL_CPU, - "enabling workaround for"); - update_cpu_capabilities(arm64_features, SCOPE_LOCAL_CPU, - "detected:"); - } else { + if (!sys_caps_initialised) + update_cpu_capabilities(SCOPE_LOCAL_CPU); + else verify_local_cpu_capabilities(); - } } DEFINE_STATIC_KEY_FALSE(arm64_const_caps_ready); @@ -1479,16 +1484,22 @@ bool this_cpu_has_cap(unsigned int cap) __this_cpu_has_cap(arm64_errata, cap)); } +static void __init setup_system_capabilities(void) +{ + /* + * We have finalised the system-wide safe feature + * registers, finalise the capabilities that depend + * on it. Also enable all the available capabilities. + */ + update_cpu_capabilities(SCOPE_SYSTEM); + enable_cpu_capabilities(SCOPE_ALL); +} + void __init setup_cpu_features(void) { u32 cwg; - /* Set the CPU feature capabilies */ - update_cpu_capabilities(arm64_features, SCOPE_SYSTEM, "detected:"); - update_cpu_capabilities(arm64_errata, SCOPE_SYSTEM, - "enabling workaround for"); - enable_cpu_capabilities(arm64_features, SCOPE_ALL); - enable_cpu_capabilities(arm64_errata, SCOPE_ALL); + setup_system_capabilities(); mark_const_caps_ready(); setup_elf_hwcaps(arm64_elf_hwcaps); -- cgit v1.2.3-59-g8ed1b From 5c137714dd8cae464dbd5f028c07af149e6d09fc Mon Sep 17 00:00:00 2001 From: Suzuki K Poulose Date: Mon, 26 Mar 2018 15:12:39 +0100 Subject: arm64: capabilities: Introduce weak features based on local CPU Now that we have the flexibility of defining system features based on individual CPUs, introduce CPU feature type that can be detected on a local SCOPE and ignores the conflict on late CPUs. This is applicable for ARM64_HAS_NO_HW_PREFETCH, where it is fine for the system to have CPUs without hardware prefetch turning up later. We only suffer a performance penalty, nothing fatal. Cc: Will Deacon Reviewed-by: Dave Martin Signed-off-by: Suzuki K Poulose Signed-off-by: Will Deacon --- arch/arm64/include/asm/cpufeature.h | 8 ++++++++ arch/arm64/kernel/cpufeature.c | 2 +- 2 files changed, 9 insertions(+), 1 deletion(-) (limited to 'arch') diff --git a/arch/arm64/include/asm/cpufeature.h b/arch/arm64/include/asm/cpufeature.h index c187b926daf9..effb1c038221 100644 --- a/arch/arm64/include/asm/cpufeature.h +++ b/arch/arm64/include/asm/cpufeature.h @@ -239,6 +239,14 @@ extern struct arm64_ftr_reg arm64_ftr_reg_ctrel0; */ #define ARM64_CPUCAP_SYSTEM_FEATURE \ (ARM64_CPUCAP_SCOPE_SYSTEM | ARM64_CPUCAP_PERMITTED_FOR_LATE_CPU) +/* + * CPU feature detected at boot time based on feature of one or more CPUs. + * All possible conflicts for a late CPU are ignored. + */ +#define ARM64_CPUCAP_WEAK_LOCAL_CPU_FEATURE \ + (ARM64_CPUCAP_SCOPE_LOCAL_CPU | \ + ARM64_CPUCAP_OPTIONAL_FOR_LATE_CPU | \ + ARM64_CPUCAP_PERMITTED_FOR_LATE_CPU) struct arm64_cpu_capabilities { const char *desc; diff --git a/arch/arm64/kernel/cpufeature.c b/arch/arm64/kernel/cpufeature.c index d3301359e981..a80ca94080b7 100644 --- a/arch/arm64/kernel/cpufeature.c +++ b/arch/arm64/kernel/cpufeature.c @@ -1010,7 +1010,7 @@ static const struct arm64_cpu_capabilities arm64_features[] = { { .desc = "Software prefetching using PRFM", .capability = ARM64_HAS_NO_HW_PREFETCH, - .type = ARM64_CPUCAP_SYSTEM_FEATURE, + .type = ARM64_CPUCAP_WEAK_LOCAL_CPU_FEATURE, .matches = has_no_hw_prefetch, }, #ifdef CONFIG_ARM64_UAO -- cgit v1.2.3-59-g8ed1b From d3aec8a28be3b88bf75442e7c24fd9da8d69a6df Mon Sep 17 00:00:00 2001 From: Suzuki K Poulose Date: Mon, 26 Mar 2018 15:12:40 +0100 Subject: arm64: capabilities: Restrict KPTI detection to boot-time CPUs KPTI is treated as a system wide feature and is only detected if all the CPUs in the sysetm needs the defense, unless it is forced via kernel command line. This leaves a system with a mix of CPUs with and without the defense vulnerable. Also, if a late CPU needs KPTI but KPTI was not activated at boot time, the CPU is currently allowed to boot, which is a potential security vulnerability. This patch ensures that the KPTI is turned on if at least one CPU detects the capability (i.e, change scope to SCOPE_LOCAL_CPU). Also rejetcs a late CPU, if it requires the defense, when the system hasn't enabled it, Cc: Will Deacon Reviewed-by: Dave Martin Signed-off-by: Suzuki K Poulose Signed-off-by: Will Deacon --- arch/arm64/include/asm/cpufeature.h | 9 +++++++++ arch/arm64/kernel/cpufeature.c | 16 +++++++++++----- 2 files changed, 20 insertions(+), 5 deletions(-) (limited to 'arch') diff --git a/arch/arm64/include/asm/cpufeature.h b/arch/arm64/include/asm/cpufeature.h index effb1c038221..4f050259dff3 100644 --- a/arch/arm64/include/asm/cpufeature.h +++ b/arch/arm64/include/asm/cpufeature.h @@ -248,6 +248,15 @@ extern struct arm64_ftr_reg arm64_ftr_reg_ctrel0; ARM64_CPUCAP_OPTIONAL_FOR_LATE_CPU | \ ARM64_CPUCAP_PERMITTED_FOR_LATE_CPU) +/* + * CPU feature detected at boot time, on one or more CPUs. A late CPU + * is not allowed to have the capability when the system doesn't have it. + * It is Ok for a late CPU to miss the feature. + */ +#define ARM64_CPUCAP_BOOT_RESTRICTED_CPU_LOCAL_FEATURE \ + (ARM64_CPUCAP_SCOPE_LOCAL_CPU | \ + ARM64_CPUCAP_OPTIONAL_FOR_LATE_CPU) + struct arm64_cpu_capabilities { const char *desc; u16 capability; diff --git a/arch/arm64/kernel/cpufeature.c b/arch/arm64/kernel/cpufeature.c index a80ca94080b7..cae3dcef0efa 100644 --- a/arch/arm64/kernel/cpufeature.c +++ b/arch/arm64/kernel/cpufeature.c @@ -880,10 +880,9 @@ static bool has_cache_dic(const struct arm64_cpu_capabilities *entry, static int __kpti_forced; /* 0: not forced, >0: forced on, <0: forced off */ static bool unmap_kernel_at_el0(const struct arm64_cpu_capabilities *entry, - int __unused) + int scope) { char const *str = "command line option"; - u64 pfr0 = read_sanitised_ftr_reg(SYS_ID_AA64PFR0_EL1); /* * For reasons that aren't entirely clear, enabling KPTI on Cavium @@ -914,8 +913,7 @@ static bool unmap_kernel_at_el0(const struct arm64_cpu_capabilities *entry, } /* Defer to CPU feature registers */ - return !cpuid_feature_extract_unsigned_field(pfr0, - ID_AA64PFR0_CSV3_SHIFT); + return !has_cpuid_feature(entry, scope); } static void @@ -1062,7 +1060,15 @@ static const struct arm64_cpu_capabilities arm64_features[] = { { .desc = "Kernel page table isolation (KPTI)", .capability = ARM64_UNMAP_KERNEL_AT_EL0, - .type = ARM64_CPUCAP_SYSTEM_FEATURE, + .type = ARM64_CPUCAP_BOOT_RESTRICTED_CPU_LOCAL_FEATURE, + /* + * The ID feature fields below are used to indicate that + * the CPU doesn't need KPTI. See unmap_kernel_at_el0 for + * more details. + */ + .sys_reg = SYS_ID_AA64PFR0_EL1, + .field_pos = ID_AA64PFR0_CSV3_SHIFT, + .min_field_value = 1, .matches = unmap_kernel_at_el0, .cpu_enable = kpti_install_ng_mappings, }, -- cgit v1.2.3-59-g8ed1b From fd9d63da17daf09c0099e3d5e3f0c0f03d9b251b Mon Sep 17 00:00:00 2001 From: Suzuki K Poulose Date: Mon, 26 Mar 2018 15:12:41 +0100 Subject: arm64: capabilities: Add support for features enabled early The kernel detects and uses some of the features based on the boot CPU and expects that all the following CPUs conform to it. e.g, with VHE and the boot CPU running at EL2, the kernel decides to keep the kernel running at EL2. If another CPU is brought up without this capability, we use custom hooks (via check_early_cpu_features()) to handle it. To handle such capabilities add support for detecting and enabling capabilities based on the boot CPU. A bit is added to indicate if the capability should be detected early on the boot CPU. The infrastructure then ensures that such capabilities are probed and "enabled" early on in the boot CPU and, enabled on the subsequent CPUs. Cc: Julien Thierry Cc: Will Deacon Cc: Mark Rutland Cc: Marc Zyngier Reviewed-by: Dave Martin Signed-off-by: Suzuki K Poulose Signed-off-by: Will Deacon --- arch/arm64/include/asm/cpufeature.h | 48 +++++++++++++++++++++++++------ arch/arm64/kernel/cpufeature.c | 57 ++++++++++++++++++++++++++++--------- 2 files changed, 83 insertions(+), 22 deletions(-) (limited to 'arch') diff --git a/arch/arm64/include/asm/cpufeature.h b/arch/arm64/include/asm/cpufeature.h index 4f050259dff3..2f5edefdff99 100644 --- a/arch/arm64/include/asm/cpufeature.h +++ b/arch/arm64/include/asm/cpufeature.h @@ -108,7 +108,7 @@ extern struct arm64_ftr_reg arm64_ftr_reg_ctrel0; * value of a field in CPU ID feature register or checking the cpu * model. The capability provides a call back ( @matches() ) to * perform the check. Scope defines how the checks should be performed. - * There are two cases: + * There are three cases: * * a) SCOPE_LOCAL_CPU: check all the CPUs and "detect" if at least one * matches. This implies, we have to run the check on all the @@ -121,6 +121,11 @@ extern struct arm64_ftr_reg arm64_ftr_reg_ctrel0; * capability relies on a field in one of the CPU ID feature * registers, we use the sanitised value of the register from the * CPU feature infrastructure to make the decision. + * Or + * c) SCOPE_BOOT_CPU: Check only on the primary boot CPU to detect the + * feature. This category is for features that are "finalised" + * (or used) by the kernel very early even before the SMP cpus + * are brought up. * * The process of detection is usually denoted by "update" capability * state in the code. @@ -140,6 +145,11 @@ extern struct arm64_ftr_reg arm64_ftr_reg_ctrel0; * CPUs are treated "late CPUs" for capabilities determined by the boot * CPU. * + * At the moment there are two passes of finalising the capabilities. + * a) Boot CPU scope capabilities - Finalised by primary boot CPU via + * setup_boot_cpu_capabilities(). + * b) Everything except (a) - Run via setup_system_capabilities(). + * * 3) Verification: When a CPU is brought online (e.g, by user or by the * kernel), the kernel should make sure that it is safe to use the CPU, * by verifying that the CPU is compliant with the state of the @@ -148,12 +158,21 @@ extern struct arm64_ftr_reg arm64_ftr_reg_ctrel0; * secondary_start_kernel()-> check_local_cpu_capabilities() * * As explained in (2) above, capabilities could be finalised at - * different points in the execution. Each CPU is verified against the - * "finalised" capabilities and if there is a conflict, the kernel takes - * an action, based on the severity (e.g, a CPU could be prevented from - * booting or cause a kernel panic). The CPU is allowed to "affect" the - * state of the capability, if it has not been finalised already. - * See section 5 for more details on conflicts. + * different points in the execution. Each newly booted CPU is verified + * against the capabilities that have been finalised by the time it + * boots. + * + * a) SCOPE_BOOT_CPU : All CPUs are verified against the capability + * except for the primary boot CPU. + * + * b) SCOPE_LOCAL_CPU, SCOPE_SYSTEM: All CPUs hotplugged on by the + * user after the kernel boot are verified against the capability. + * + * If there is a conflict, the kernel takes an action, based on the + * severity (e.g, a CPU could be prevented from booting or cause a + * kernel panic). The CPU is allowed to "affect" the state of the + * capability, if it has not been finalised already. See section 5 + * for more details on conflicts. * * 4) Action: As mentioned in (2), the kernel can take an action for each * detected capability, on all CPUs on the system. Appropriate actions @@ -202,15 +221,26 @@ extern struct arm64_ftr_reg arm64_ftr_reg_ctrel0; */ -/* Decide how the capability is detected. On a local CPU vs System wide */ +/* + * Decide how the capability is detected. + * On any local CPU vs System wide vs the primary boot CPU + */ #define ARM64_CPUCAP_SCOPE_LOCAL_CPU ((u16)BIT(0)) #define ARM64_CPUCAP_SCOPE_SYSTEM ((u16)BIT(1)) +/* + * The capabilitiy is detected on the Boot CPU and is used by kernel + * during early boot. i.e, the capability should be "detected" and + * "enabled" as early as possibly on all booting CPUs. + */ +#define ARM64_CPUCAP_SCOPE_BOOT_CPU ((u16)BIT(2)) #define ARM64_CPUCAP_SCOPE_MASK \ (ARM64_CPUCAP_SCOPE_SYSTEM | \ - ARM64_CPUCAP_SCOPE_LOCAL_CPU) + ARM64_CPUCAP_SCOPE_LOCAL_CPU | \ + ARM64_CPUCAP_SCOPE_BOOT_CPU) #define SCOPE_SYSTEM ARM64_CPUCAP_SCOPE_SYSTEM #define SCOPE_LOCAL_CPU ARM64_CPUCAP_SCOPE_LOCAL_CPU +#define SCOPE_BOOT_CPU ARM64_CPUCAP_SCOPE_BOOT_CPU #define SCOPE_ALL ARM64_CPUCAP_SCOPE_MASK /* diff --git a/arch/arm64/kernel/cpufeature.c b/arch/arm64/kernel/cpufeature.c index cae3dcef0efa..b2903d5d3e29 100644 --- a/arch/arm64/kernel/cpufeature.c +++ b/arch/arm64/kernel/cpufeature.c @@ -510,7 +510,7 @@ static void __init init_cpu_ftr_reg(u32 sys_reg, u64 new) } extern const struct arm64_cpu_capabilities arm64_errata[]; -static void update_cpu_capabilities(u16 scope_mask); +static void __init setup_boot_cpu_capabilities(void); void __init init_cpu_features(struct cpuinfo_arm64 *info) { @@ -556,10 +556,10 @@ void __init init_cpu_features(struct cpuinfo_arm64 *info) } /* - * Run the errata work around and local feature checks on the - * boot CPU, once we have initialised the cpu feature infrastructure. + * Detect and enable early CPU capabilities based on the boot CPU, + * after we have initialised the CPU feature infrastructure. */ - update_cpu_capabilities(SCOPE_LOCAL_CPU); + setup_boot_cpu_capabilities(); } static void update_cpu_ftr_reg(struct arm64_ftr_reg *reg, u64 new) @@ -1311,13 +1311,24 @@ __enable_cpu_capabilities(const struct arm64_cpu_capabilities *caps, if (caps->cpu_enable) { /* - * Use stop_machine() as it schedules the work allowing - * us to modify PSTATE, instead of on_each_cpu() which - * uses an IPI, giving us a PSTATE that disappears when - * we return. + * Capabilities with SCOPE_BOOT_CPU scope are finalised + * before any secondary CPU boots. Thus, each secondary + * will enable the capability as appropriate via + * check_local_cpu_capabilities(). The only exception is + * the boot CPU, for which the capability must be + * enabled here. This approach avoids costly + * stop_machine() calls for this case. + * + * Otherwise, use stop_machine() as it schedules the + * work allowing us to modify PSTATE, instead of + * on_each_cpu() which uses an IPI, giving us a PSTATE + * that disappears when we return. */ - stop_machine(__enable_cpu_capability, (void *)caps, - cpu_online_mask); + if (scope_mask & SCOPE_BOOT_CPU) + caps->cpu_enable(caps); + else + stop_machine(__enable_cpu_capability, + (void *)caps, cpu_online_mask); } } } @@ -1400,6 +1411,12 @@ static void check_early_cpu_features(void) { verify_cpu_run_el(); verify_cpu_asid_bits(); + /* + * Early features are used by the kernel already. If there + * is a conflict, we cannot proceed further. + */ + if (!verify_local_cpu_caps(SCOPE_BOOT_CPU)) + cpu_panic_kernel(); } static void @@ -1442,7 +1459,12 @@ static void verify_sve_features(void) */ static void verify_local_cpu_capabilities(void) { - if (!verify_local_cpu_caps(SCOPE_ALL)) + /* + * The capabilities with SCOPE_BOOT_CPU are checked from + * check_early_cpu_features(), as they need to be verified + * on all secondary CPUs. + */ + if (!verify_local_cpu_caps(SCOPE_ALL & ~SCOPE_BOOT_CPU)) cpu_die_early(); verify_local_elf_hwcaps(arm64_elf_hwcaps); @@ -1474,6 +1496,14 @@ void check_local_cpu_capabilities(void) verify_local_cpu_capabilities(); } +static void __init setup_boot_cpu_capabilities(void) +{ + /* Detect capabilities with either SCOPE_BOOT_CPU or SCOPE_LOCAL_CPU */ + update_cpu_capabilities(SCOPE_BOOT_CPU | SCOPE_LOCAL_CPU); + /* Enable the SCOPE_BOOT_CPU capabilities alone right away */ + enable_cpu_capabilities(SCOPE_BOOT_CPU); +} + DEFINE_STATIC_KEY_FALSE(arm64_const_caps_ready); EXPORT_SYMBOL(arm64_const_caps_ready); @@ -1495,10 +1525,11 @@ static void __init setup_system_capabilities(void) /* * We have finalised the system-wide safe feature * registers, finalise the capabilities that depend - * on it. Also enable all the available capabilities. + * on it. Also enable all the available capabilities, + * that are not enabled already. */ update_cpu_capabilities(SCOPE_SYSTEM); - enable_cpu_capabilities(SCOPE_ALL); + enable_cpu_capabilities(SCOPE_ALL & ~SCOPE_BOOT_CPU); } void __init setup_cpu_features(void) -- cgit v1.2.3-59-g8ed1b From 830dcc9f9a7cd26a812522a26efaacf7df6fc365 Mon Sep 17 00:00:00 2001 From: Suzuki K Poulose Date: Mon, 26 Mar 2018 15:12:42 +0100 Subject: arm64: capabilities: Change scope of VHE to Boot CPU feature We expect all CPUs to be running at the same EL inside the kernel with or without VHE enabled and we have strict checks to ensure that any mismatch triggers a kernel panic. If VHE is enabled, we use the feature based on the boot CPU and all other CPUs should follow. This makes it a perfect candidate for a capability based on the boot CPU, which should be matched by all the CPUs (both when is ON and OFF). This saves us some not-so-pretty hooks and special code, just for verifying the conflict. The patch also makes the VHE capability entry depend on CONFIG_ARM64_VHE. Cc: Marc Zyngier Cc: Will Deacon Reviewed-by: Dave Martin Signed-off-by: Suzuki K Poulose Signed-off-by: Will Deacon --- arch/arm64/include/asm/cpufeature.h | 6 ++++++ arch/arm64/include/asm/virt.h | 6 ------ arch/arm64/kernel/cpufeature.c | 5 +++-- arch/arm64/kernel/smp.c | 38 ------------------------------------- 4 files changed, 9 insertions(+), 46 deletions(-) (limited to 'arch') diff --git a/arch/arm64/include/asm/cpufeature.h b/arch/arm64/include/asm/cpufeature.h index 2f5edefdff99..6a1280493f57 100644 --- a/arch/arm64/include/asm/cpufeature.h +++ b/arch/arm64/include/asm/cpufeature.h @@ -287,6 +287,12 @@ extern struct arm64_ftr_reg arm64_ftr_reg_ctrel0; (ARM64_CPUCAP_SCOPE_LOCAL_CPU | \ ARM64_CPUCAP_OPTIONAL_FOR_LATE_CPU) +/* + * CPU feature used early in the boot based on the boot CPU. All secondary + * CPUs must match the state of the capability as detected by the boot CPU. + */ +#define ARM64_CPUCAP_STRICT_BOOT_CPU_FEATURE ARM64_CPUCAP_SCOPE_BOOT_CPU + struct arm64_cpu_capabilities { const char *desc; u16 capability; diff --git a/arch/arm64/include/asm/virt.h b/arch/arm64/include/asm/virt.h index c5f89442785c..9d1e24e030b3 100644 --- a/arch/arm64/include/asm/virt.h +++ b/arch/arm64/include/asm/virt.h @@ -102,12 +102,6 @@ static inline bool has_vhe(void) return false; } -#ifdef CONFIG_ARM64_VHE -extern void verify_cpu_run_el(void); -#else -static inline void verify_cpu_run_el(void) {} -#endif - #endif /* __ASSEMBLY__ */ #endif /* ! __ASM__VIRT_H */ diff --git a/arch/arm64/kernel/cpufeature.c b/arch/arm64/kernel/cpufeature.c index b2903d5d3e29..17132a10a3db 100644 --- a/arch/arm64/kernel/cpufeature.c +++ b/arch/arm64/kernel/cpufeature.c @@ -1033,13 +1033,15 @@ static const struct arm64_cpu_capabilities arm64_features[] = { .matches = cpufeature_pan_not_uao, }, #endif /* CONFIG_ARM64_PAN */ +#ifdef CONFIG_ARM64_VHE { .desc = "Virtualization Host Extensions", .capability = ARM64_HAS_VIRT_HOST_EXTN, - .type = ARM64_CPUCAP_SYSTEM_FEATURE, + .type = ARM64_CPUCAP_STRICT_BOOT_CPU_FEATURE, .matches = runs_at_el2, .cpu_enable = cpu_copy_el2regs, }, +#endif /* CONFIG_ARM64_VHE */ { .desc = "32-bit EL0 Support", .capability = ARM64_HAS_32BIT_EL0, @@ -1409,7 +1411,6 @@ static bool verify_local_cpu_caps(u16 scope_mask) */ static void check_early_cpu_features(void) { - verify_cpu_run_el(); verify_cpu_asid_bits(); /* * Early features are used by the kernel already. If there diff --git a/arch/arm64/kernel/smp.c b/arch/arm64/kernel/smp.c index 5cef11450183..f3e2e3aec0b0 100644 --- a/arch/arm64/kernel/smp.c +++ b/arch/arm64/kernel/smp.c @@ -85,43 +85,6 @@ enum ipi_msg_type { IPI_WAKEUP }; -#ifdef CONFIG_ARM64_VHE - -/* Whether the boot CPU is running in HYP mode or not*/ -static bool boot_cpu_hyp_mode; - -static inline void save_boot_cpu_run_el(void) -{ - boot_cpu_hyp_mode = is_kernel_in_hyp_mode(); -} - -static inline bool is_boot_cpu_in_hyp_mode(void) -{ - return boot_cpu_hyp_mode; -} - -/* - * Verify that a secondary CPU is running the kernel at the same - * EL as that of the boot CPU. - */ -void verify_cpu_run_el(void) -{ - bool in_el2 = is_kernel_in_hyp_mode(); - bool boot_cpu_el2 = is_boot_cpu_in_hyp_mode(); - - if (in_el2 ^ boot_cpu_el2) { - pr_crit("CPU%d: mismatched Exception Level(EL%d) with boot CPU(EL%d)\n", - smp_processor_id(), - in_el2 ? 2 : 1, - boot_cpu_el2 ? 2 : 1); - cpu_panic_kernel(); - } -} - -#else -static inline void save_boot_cpu_run_el(void) {} -#endif - #ifdef CONFIG_HOTPLUG_CPU static int op_cpu_kill(unsigned int cpu); #else @@ -447,7 +410,6 @@ void __init smp_prepare_boot_cpu(void) */ jump_label_init(); cpuinfo_store_boot_cpu(); - save_boot_cpu_run_el(); } static u64 __init of_get_cpu_mpidr(struct device_node *dn) -- cgit v1.2.3-59-g8ed1b From 5e7951ce19abf4113645ae789c033917356ee96f Mon Sep 17 00:00:00 2001 From: Suzuki K Poulose Date: Mon, 26 Mar 2018 15:12:43 +0100 Subject: arm64: capabilities: Clean up midr range helpers We are about to introduce generic MIDR range helpers. Clean up the existing helpers in erratum handling, preparing them to use generic version. Cc: Will Deacon Cc: Mark Rutland Cc: Ard Biesheuvel Reviewed-by: Dave Martin Signed-off-by: Suzuki K Poulose Signed-off-by: Will Deacon --- arch/arm64/kernel/cpu_errata.c | 108 +++++++++++++++++++++++------------------ 1 file changed, 60 insertions(+), 48 deletions(-) (limited to 'arch') diff --git a/arch/arm64/kernel/cpu_errata.c b/arch/arm64/kernel/cpu_errata.c index f5ab9545c5ea..9ea14954972c 100644 --- a/arch/arm64/kernel/cpu_errata.c +++ b/arch/arm64/kernel/cpu_errata.c @@ -237,23 +237,38 @@ qcom_enable_link_stack_sanitization(const struct arm64_cpu_capabilities *entry) } #endif /* CONFIG_HARDEN_BRANCH_PREDICTOR */ -#define MIDR_RANGE(model, min, max) \ - .type = ARM64_CPUCAP_LOCAL_CPU_ERRATUM, \ - .matches = is_affected_midr_range, \ - .midr_model = model, \ - .midr_range_min = min, \ - .midr_range_max = max - -#define MIDR_ALL_VERSIONS(model) \ - .type = ARM64_CPUCAP_LOCAL_CPU_ERRATUM, \ - .matches = is_affected_midr_range, \ - .midr_model = model, \ - .midr_range_min = 0, \ +#define CAP_MIDR_RANGE(model, v_min, r_min, v_max, r_max) \ + .matches = is_affected_midr_range, \ + .midr_model = model, \ + .midr_range_min = MIDR_CPU_VAR_REV(v_min, r_min), \ + .midr_range_max = MIDR_CPU_VAR_REV(v_max, r_max) + +#define CAP_MIDR_ALL_VERSIONS(model) \ + .matches = is_affected_midr_range, \ + .midr_model = model, \ + .midr_range_min = MIDR_CPU_VAR_REV(0, 0), \ .midr_range_max = (MIDR_VARIANT_MASK | MIDR_REVISION_MASK) #define MIDR_FIXED(rev, revidr_mask) \ .fixed_revs = (struct arm64_midr_revidr[]){{ (rev), (revidr_mask) }, {}} +#define ERRATA_MIDR_RANGE(model, v_min, r_min, v_max, r_max) \ + .type = ARM64_CPUCAP_LOCAL_CPU_ERRATUM, \ + CAP_MIDR_RANGE(model, v_min, r_min, v_max, r_max) + +/* Errata affecting a range of revisions of given model variant */ +#define ERRATA_MIDR_REV_RANGE(m, var, r_min, r_max) \ + ERRATA_MIDR_RANGE(m, var, r_min, var, r_max) + +/* Errata affecting a single variant/revision of a model */ +#define ERRATA_MIDR_REV(model, var, rev) \ + ERRATA_MIDR_RANGE(model, var, rev, var, rev) + +/* Errata affecting all variants/revisions of a given a model */ +#define ERRATA_MIDR_ALL_VERSIONS(model) \ + .type = ARM64_CPUCAP_LOCAL_CPU_ERRATUM, \ + CAP_MIDR_ALL_VERSIONS(model) + const struct arm64_cpu_capabilities arm64_errata[] = { #if defined(CONFIG_ARM64_ERRATUM_826319) || \ defined(CONFIG_ARM64_ERRATUM_827319) || \ @@ -262,7 +277,7 @@ const struct arm64_cpu_capabilities arm64_errata[] = { /* Cortex-A53 r0p[012] */ .desc = "ARM errata 826319, 827319, 824069", .capability = ARM64_WORKAROUND_CLEAN_CACHE, - MIDR_RANGE(MIDR_CORTEX_A53, 0x00, 0x02), + ERRATA_MIDR_REV_RANGE(MIDR_CORTEX_A53, 0, 0, 2), .cpu_enable = cpu_enable_cache_maint_trap, }, #endif @@ -271,7 +286,7 @@ const struct arm64_cpu_capabilities arm64_errata[] = { /* Cortex-A53 r0p[01] */ .desc = "ARM errata 819472", .capability = ARM64_WORKAROUND_CLEAN_CACHE, - MIDR_RANGE(MIDR_CORTEX_A53, 0x00, 0x01), + ERRATA_MIDR_REV_RANGE(MIDR_CORTEX_A53, 0, 0, 1), .cpu_enable = cpu_enable_cache_maint_trap, }, #endif @@ -280,9 +295,9 @@ const struct arm64_cpu_capabilities arm64_errata[] = { /* Cortex-A57 r0p0 - r1p2 */ .desc = "ARM erratum 832075", .capability = ARM64_WORKAROUND_DEVICE_LOAD_ACQUIRE, - MIDR_RANGE(MIDR_CORTEX_A57, - MIDR_CPU_VAR_REV(0, 0), - MIDR_CPU_VAR_REV(1, 2)), + ERRATA_MIDR_RANGE(MIDR_CORTEX_A57, + 0, 0, + 1, 2), }, #endif #ifdef CONFIG_ARM64_ERRATUM_834220 @@ -290,9 +305,9 @@ const struct arm64_cpu_capabilities arm64_errata[] = { /* Cortex-A57 r0p0 - r1p2 */ .desc = "ARM erratum 834220", .capability = ARM64_WORKAROUND_834220, - MIDR_RANGE(MIDR_CORTEX_A57, - MIDR_CPU_VAR_REV(0, 0), - MIDR_CPU_VAR_REV(1, 2)), + ERRATA_MIDR_RANGE(MIDR_CORTEX_A57, + 0, 0, + 1, 2), }, #endif #ifdef CONFIG_ARM64_ERRATUM_843419 @@ -300,7 +315,7 @@ const struct arm64_cpu_capabilities arm64_errata[] = { /* Cortex-A53 r0p[01234] */ .desc = "ARM erratum 843419", .capability = ARM64_WORKAROUND_843419, - MIDR_RANGE(MIDR_CORTEX_A53, 0x00, 0x04), + ERRATA_MIDR_REV_RANGE(MIDR_CORTEX_A53, 0, 0, 4), MIDR_FIXED(0x4, BIT(8)), }, #endif @@ -309,7 +324,7 @@ const struct arm64_cpu_capabilities arm64_errata[] = { /* Cortex-A53 r0p[01234] */ .desc = "ARM erratum 845719", .capability = ARM64_WORKAROUND_845719, - MIDR_RANGE(MIDR_CORTEX_A53, 0x00, 0x04), + ERRATA_MIDR_REV_RANGE(MIDR_CORTEX_A53, 0, 0, 4), }, #endif #ifdef CONFIG_CAVIUM_ERRATUM_23154 @@ -317,7 +332,7 @@ const struct arm64_cpu_capabilities arm64_errata[] = { /* Cavium ThunderX, pass 1.x */ .desc = "Cavium erratum 23154", .capability = ARM64_WORKAROUND_CAVIUM_23154, - MIDR_RANGE(MIDR_THUNDERX, 0x00, 0x01), + ERRATA_MIDR_REV_RANGE(MIDR_THUNDERX, 0, 0, 1), }, #endif #ifdef CONFIG_CAVIUM_ERRATUM_27456 @@ -325,15 +340,15 @@ const struct arm64_cpu_capabilities arm64_errata[] = { /* Cavium ThunderX, T88 pass 1.x - 2.1 */ .desc = "Cavium erratum 27456", .capability = ARM64_WORKAROUND_CAVIUM_27456, - MIDR_RANGE(MIDR_THUNDERX, - MIDR_CPU_VAR_REV(0, 0), - MIDR_CPU_VAR_REV(1, 1)), + ERRATA_MIDR_RANGE(MIDR_THUNDERX, + 0, 0, + 1, 1), }, { /* Cavium ThunderX, T81 pass 1.0 */ .desc = "Cavium erratum 27456", .capability = ARM64_WORKAROUND_CAVIUM_27456, - MIDR_RANGE(MIDR_THUNDERX_81XX, 0x00, 0x00), + ERRATA_MIDR_REV(MIDR_THUNDERX_81XX, 0, 0), }, #endif #ifdef CONFIG_CAVIUM_ERRATUM_30115 @@ -341,20 +356,21 @@ const struct arm64_cpu_capabilities arm64_errata[] = { /* Cavium ThunderX, T88 pass 1.x - 2.2 */ .desc = "Cavium erratum 30115", .capability = ARM64_WORKAROUND_CAVIUM_30115, - MIDR_RANGE(MIDR_THUNDERX, 0x00, - (1 << MIDR_VARIANT_SHIFT) | 2), + ERRATA_MIDR_RANGE(MIDR_THUNDERX, + 0, 0, + 1, 2), }, { /* Cavium ThunderX, T81 pass 1.0 - 1.2 */ .desc = "Cavium erratum 30115", .capability = ARM64_WORKAROUND_CAVIUM_30115, - MIDR_RANGE(MIDR_THUNDERX_81XX, 0x00, 0x02), + ERRATA_MIDR_REV_RANGE(MIDR_THUNDERX_81XX, 0, 0, 2), }, { /* Cavium ThunderX, T83 pass 1.0 */ .desc = "Cavium erratum 30115", .capability = ARM64_WORKAROUND_CAVIUM_30115, - MIDR_RANGE(MIDR_THUNDERX_83XX, 0x00, 0x00), + ERRATA_MIDR_REV(MIDR_THUNDERX_83XX, 0, 0), }, #endif { @@ -368,9 +384,7 @@ const struct arm64_cpu_capabilities arm64_errata[] = { { .desc = "Qualcomm Technologies Falkor erratum 1003", .capability = ARM64_WORKAROUND_QCOM_FALKOR_E1003, - MIDR_RANGE(MIDR_QCOM_FALKOR_V1, - MIDR_CPU_VAR_REV(0, 0), - MIDR_CPU_VAR_REV(0, 0)), + ERRATA_MIDR_REV(MIDR_QCOM_FALKOR_V1, 0, 0), }, { .desc = "Qualcomm Technologies Kryo erratum 1003", @@ -384,9 +398,7 @@ const struct arm64_cpu_capabilities arm64_errata[] = { { .desc = "Qualcomm Technologies Falkor erratum 1009", .capability = ARM64_WORKAROUND_REPEAT_TLBI, - MIDR_RANGE(MIDR_QCOM_FALKOR_V1, - MIDR_CPU_VAR_REV(0, 0), - MIDR_CPU_VAR_REV(0, 0)), + ERRATA_MIDR_REV(MIDR_QCOM_FALKOR_V1, 0, 0), }, #endif #ifdef CONFIG_ARM64_ERRATUM_858921 @@ -394,56 +406,56 @@ const struct arm64_cpu_capabilities arm64_errata[] = { /* Cortex-A73 all versions */ .desc = "ARM erratum 858921", .capability = ARM64_WORKAROUND_858921, - MIDR_ALL_VERSIONS(MIDR_CORTEX_A73), + ERRATA_MIDR_ALL_VERSIONS(MIDR_CORTEX_A73), }, #endif #ifdef CONFIG_HARDEN_BRANCH_PREDICTOR { .capability = ARM64_HARDEN_BRANCH_PREDICTOR, - MIDR_ALL_VERSIONS(MIDR_CORTEX_A57), + ERRATA_MIDR_ALL_VERSIONS(MIDR_CORTEX_A57), .cpu_enable = enable_smccc_arch_workaround_1, }, { .capability = ARM64_HARDEN_BRANCH_PREDICTOR, - MIDR_ALL_VERSIONS(MIDR_CORTEX_A72), + ERRATA_MIDR_ALL_VERSIONS(MIDR_CORTEX_A72), .cpu_enable = enable_smccc_arch_workaround_1, }, { .capability = ARM64_HARDEN_BRANCH_PREDICTOR, - MIDR_ALL_VERSIONS(MIDR_CORTEX_A73), + ERRATA_MIDR_ALL_VERSIONS(MIDR_CORTEX_A73), .cpu_enable = enable_smccc_arch_workaround_1, }, { .capability = ARM64_HARDEN_BRANCH_PREDICTOR, - MIDR_ALL_VERSIONS(MIDR_CORTEX_A75), + ERRATA_MIDR_ALL_VERSIONS(MIDR_CORTEX_A75), .cpu_enable = enable_smccc_arch_workaround_1, }, { .capability = ARM64_HARDEN_BRANCH_PREDICTOR, - MIDR_ALL_VERSIONS(MIDR_QCOM_FALKOR_V1), + ERRATA_MIDR_ALL_VERSIONS(MIDR_QCOM_FALKOR_V1), .cpu_enable = qcom_enable_link_stack_sanitization, }, { .capability = ARM64_HARDEN_BP_POST_GUEST_EXIT, - MIDR_ALL_VERSIONS(MIDR_QCOM_FALKOR_V1), + ERRATA_MIDR_ALL_VERSIONS(MIDR_QCOM_FALKOR_V1), }, { .capability = ARM64_HARDEN_BRANCH_PREDICTOR, - MIDR_ALL_VERSIONS(MIDR_QCOM_FALKOR), + ERRATA_MIDR_ALL_VERSIONS(MIDR_QCOM_FALKOR), .cpu_enable = qcom_enable_link_stack_sanitization, }, { .capability = ARM64_HARDEN_BP_POST_GUEST_EXIT, - MIDR_ALL_VERSIONS(MIDR_QCOM_FALKOR), + ERRATA_MIDR_ALL_VERSIONS(MIDR_QCOM_FALKOR), }, { .capability = ARM64_HARDEN_BRANCH_PREDICTOR, - MIDR_ALL_VERSIONS(MIDR_BRCM_VULCAN), + ERRATA_MIDR_ALL_VERSIONS(MIDR_BRCM_VULCAN), .cpu_enable = enable_smccc_arch_workaround_1, }, { .capability = ARM64_HARDEN_BRANCH_PREDICTOR, - MIDR_ALL_VERSIONS(MIDR_CAVIUM_THUNDERX2), + ERRATA_MIDR_ALL_VERSIONS(MIDR_CAVIUM_THUNDERX2), .cpu_enable = enable_smccc_arch_workaround_1, }, #endif -- cgit v1.2.3-59-g8ed1b From 1df310505d6d544802016f6bae49aab836ae8510 Mon Sep 17 00:00:00 2001 From: Suzuki K Poulose Date: Mon, 26 Mar 2018 15:12:44 +0100 Subject: arm64: Add helpers for checking CPU MIDR against a range Add helpers for checking if the given CPU midr falls in a range of variants/revisions for a given model. Cc: Will Deacon Cc: Mark Rutland Cc: Ard Biesheuvel Reviewed-by: Dave Martin Signed-off-by: Suzuki K Poulose Signed-off-by: Will Deacon --- arch/arm64/include/asm/cpufeature.h | 4 ++-- arch/arm64/include/asm/cputype.h | 30 ++++++++++++++++++++++++++++++ arch/arm64/kernel/cpu_errata.c | 16 +++++----------- 3 files changed, 37 insertions(+), 13 deletions(-) (limited to 'arch') diff --git a/arch/arm64/include/asm/cpufeature.h b/arch/arm64/include/asm/cpufeature.h index 6a1280493f57..cd245871b578 100644 --- a/arch/arm64/include/asm/cpufeature.h +++ b/arch/arm64/include/asm/cpufeature.h @@ -10,6 +10,7 @@ #define __ASM_CPUFEATURE_H #include +#include #include #include #include @@ -306,8 +307,7 @@ struct arm64_cpu_capabilities { void (*cpu_enable)(const struct arm64_cpu_capabilities *cap); union { struct { /* To be used for erratum handling only */ - u32 midr_model; - u32 midr_range_min, midr_range_max; + struct midr_range midr_range; const struct arm64_midr_revidr { u32 midr_rv; /* revision/variant */ u32 revidr_mask; diff --git a/arch/arm64/include/asm/cputype.h b/arch/arm64/include/asm/cputype.h index 350c76a1d15b..bf6cfdab743f 100644 --- a/arch/arm64/include/asm/cputype.h +++ b/arch/arm64/include/asm/cputype.h @@ -117,6 +117,36 @@ #define read_cpuid(reg) read_sysreg_s(SYS_ ## reg) +/* + * Represent a range of MIDR values for a given CPU model and a + * range of variant/revision values. + * + * @model - CPU model as defined by MIDR_CPU_MODEL + * @rv_min - Minimum value for the revision/variant as defined by + * MIDR_CPU_VAR_REV + * @rv_max - Maximum value for the variant/revision for the range. + */ +struct midr_range { + u32 model; + u32 rv_min; + u32 rv_max; +}; + +#define MIDR_RANGE(m, v_min, r_min, v_max, r_max) \ + { \ + .model = m, \ + .rv_min = MIDR_CPU_VAR_REV(v_min, r_min), \ + .rv_max = MIDR_CPU_VAR_REV(v_max, r_max), \ + } + +#define MIDR_ALL_VERSIONS(m) MIDR_RANGE(m, 0, 0, 0xf, 0xf) + +static inline bool is_midr_in_range(u32 midr, struct midr_range const *range) +{ + return MIDR_IS_CPU_MODEL_RANGE(midr, range->model, + range->rv_min, range->rv_max); +} + /* * The CPU ID never changes at run time, so we might as well tell the * compiler that it's constant. Use this function to read the CPU ID diff --git a/arch/arm64/kernel/cpu_errata.c b/arch/arm64/kernel/cpu_errata.c index 9ea14954972c..3c0bb6c4ed02 100644 --- a/arch/arm64/kernel/cpu_errata.c +++ b/arch/arm64/kernel/cpu_errata.c @@ -28,9 +28,7 @@ is_affected_midr_range(const struct arm64_cpu_capabilities *entry, int scope) u32 midr = read_cpuid_id(), revidr; WARN_ON(scope != SCOPE_LOCAL_CPU || preemptible()); - if (!MIDR_IS_CPU_MODEL_RANGE(midr, entry->midr_model, - entry->midr_range_min, - entry->midr_range_max)) + if (!is_midr_in_range(midr, &entry->midr_range)) return false; midr &= MIDR_REVISION_MASK | MIDR_VARIANT_MASK; @@ -53,7 +51,7 @@ is_kryo_midr(const struct arm64_cpu_capabilities *entry, int scope) model &= MIDR_IMPLEMENTOR_MASK | (0xf00 << MIDR_PARTNUM_SHIFT) | MIDR_ARCHITECTURE_MASK; - return model == entry->midr_model; + return model == entry->midr_range.model; } static bool @@ -239,15 +237,11 @@ qcom_enable_link_stack_sanitization(const struct arm64_cpu_capabilities *entry) #define CAP_MIDR_RANGE(model, v_min, r_min, v_max, r_max) \ .matches = is_affected_midr_range, \ - .midr_model = model, \ - .midr_range_min = MIDR_CPU_VAR_REV(v_min, r_min), \ - .midr_range_max = MIDR_CPU_VAR_REV(v_max, r_max) + .midr_range = MIDR_RANGE(model, v_min, r_min, v_max, r_max) #define CAP_MIDR_ALL_VERSIONS(model) \ .matches = is_affected_midr_range, \ - .midr_model = model, \ - .midr_range_min = MIDR_CPU_VAR_REV(0, 0), \ - .midr_range_max = (MIDR_VARIANT_MASK | MIDR_REVISION_MASK) + .midr_range = MIDR_ALL_VERSIONS(model) #define MIDR_FIXED(rev, revidr_mask) \ .fixed_revs = (struct arm64_midr_revidr[]){{ (rev), (revidr_mask) }, {}} @@ -390,7 +384,7 @@ const struct arm64_cpu_capabilities arm64_errata[] = { .desc = "Qualcomm Technologies Kryo erratum 1003", .capability = ARM64_WORKAROUND_QCOM_FALKOR_E1003, .type = ARM64_CPUCAP_LOCAL_CPU_ERRATUM, - .midr_model = MIDR_QCOM_KRYO, + .midr_range.model = MIDR_QCOM_KRYO, .matches = is_kryo_midr, }, #endif -- cgit v1.2.3-59-g8ed1b From be5b299830c63ed76e0357473c4218c85fb388b3 Mon Sep 17 00:00:00 2001 From: Suzuki K Poulose Date: Mon, 26 Mar 2018 15:12:45 +0100 Subject: arm64: capabilities: Add support for checks based on a list of MIDRs Add helpers for detecting an errata on list of midr ranges of affected CPUs, with the same work around. Cc: Will Deacon Cc: Mark Rutland Cc: Ard Biesheuvel Reviewed-by: Dave Martin Signed-off-by: Suzuki K Poulose Signed-off-by: Will Deacon --- arch/arm64/include/asm/cpufeature.h | 1 + arch/arm64/include/asm/cputype.h | 9 +++++ arch/arm64/kernel/cpu_errata.c | 81 ++++++++++++++++++++----------------- arch/arm64/kernel/cpufeature.c | 10 +++-- 4 files changed, 60 insertions(+), 41 deletions(-) (limited to 'arch') diff --git a/arch/arm64/include/asm/cpufeature.h b/arch/arm64/include/asm/cpufeature.h index cd245871b578..a16eb0731290 100644 --- a/arch/arm64/include/asm/cpufeature.h +++ b/arch/arm64/include/asm/cpufeature.h @@ -314,6 +314,7 @@ struct arm64_cpu_capabilities { } * const fixed_revs; }; + const struct midr_range *midr_range_list; struct { /* Feature register checking */ u32 sys_reg; u8 field_pos; diff --git a/arch/arm64/include/asm/cputype.h b/arch/arm64/include/asm/cputype.h index bf6cfdab743f..e86e65c187f8 100644 --- a/arch/arm64/include/asm/cputype.h +++ b/arch/arm64/include/asm/cputype.h @@ -147,6 +147,15 @@ static inline bool is_midr_in_range(u32 midr, struct midr_range const *range) range->rv_min, range->rv_max); } +static inline bool +is_midr_in_range_list(u32 midr, struct midr_range const *ranges) +{ + while (ranges->model) + if (is_midr_in_range(midr, ranges++)) + return true; + return false; +} + /* * The CPU ID never changes at run time, so we might as well tell the * compiler that it's constant. Use this function to read the CPU ID diff --git a/arch/arm64/kernel/cpu_errata.c b/arch/arm64/kernel/cpu_errata.c index 3c0bb6c4ed02..9490b560d3fe 100644 --- a/arch/arm64/kernel/cpu_errata.c +++ b/arch/arm64/kernel/cpu_errata.c @@ -40,6 +40,14 @@ is_affected_midr_range(const struct arm64_cpu_capabilities *entry, int scope) return true; } +static bool __maybe_unused +is_affected_midr_range_list(const struct arm64_cpu_capabilities *entry, + int scope) +{ + WARN_ON(scope != SCOPE_LOCAL_CPU || preemptible()); + return is_midr_in_range_list(read_cpuid_id(), entry->midr_range_list); +} + static bool __maybe_unused is_kryo_midr(const struct arm64_cpu_capabilities *entry, int scope) { @@ -250,6 +258,10 @@ qcom_enable_link_stack_sanitization(const struct arm64_cpu_capabilities *entry) .type = ARM64_CPUCAP_LOCAL_CPU_ERRATUM, \ CAP_MIDR_RANGE(model, v_min, r_min, v_max, r_max) +#define CAP_MIDR_RANGE_LIST(list) \ + .matches = is_affected_midr_range_list, \ + .midr_range_list = list + /* Errata affecting a range of revisions of given model variant */ #define ERRATA_MIDR_REV_RANGE(m, var, r_min, r_max) \ ERRATA_MIDR_RANGE(m, var, r_min, var, r_max) @@ -263,6 +275,35 @@ qcom_enable_link_stack_sanitization(const struct arm64_cpu_capabilities *entry) .type = ARM64_CPUCAP_LOCAL_CPU_ERRATUM, \ CAP_MIDR_ALL_VERSIONS(model) +/* Errata affecting a list of midr ranges, with same work around */ +#define ERRATA_MIDR_RANGE_LIST(midr_list) \ + .type = ARM64_CPUCAP_LOCAL_CPU_ERRATUM, \ + CAP_MIDR_RANGE_LIST(midr_list) + +#ifdef CONFIG_HARDEN_BRANCH_PREDICTOR + +/* + * List of CPUs where we need to issue a psci call to + * harden the branch predictor. + */ +static const struct midr_range arm64_bp_harden_smccc_cpus[] = { + MIDR_ALL_VERSIONS(MIDR_CORTEX_A57), + MIDR_ALL_VERSIONS(MIDR_CORTEX_A72), + MIDR_ALL_VERSIONS(MIDR_CORTEX_A73), + MIDR_ALL_VERSIONS(MIDR_CORTEX_A75), + MIDR_ALL_VERSIONS(MIDR_BRCM_VULCAN), + MIDR_ALL_VERSIONS(MIDR_CAVIUM_THUNDERX2), + {}, +}; + +static const struct midr_range qcom_bp_harden_cpus[] = { + MIDR_ALL_VERSIONS(MIDR_QCOM_FALKOR_V1), + MIDR_ALL_VERSIONS(MIDR_QCOM_FALKOR), + {}, +}; + +#endif + const struct arm64_cpu_capabilities arm64_errata[] = { #if defined(CONFIG_ARM64_ERRATUM_826319) || \ defined(CONFIG_ARM64_ERRATUM_827319) || \ @@ -406,51 +447,17 @@ const struct arm64_cpu_capabilities arm64_errata[] = { #ifdef CONFIG_HARDEN_BRANCH_PREDICTOR { .capability = ARM64_HARDEN_BRANCH_PREDICTOR, - ERRATA_MIDR_ALL_VERSIONS(MIDR_CORTEX_A57), - .cpu_enable = enable_smccc_arch_workaround_1, - }, - { - .capability = ARM64_HARDEN_BRANCH_PREDICTOR, - ERRATA_MIDR_ALL_VERSIONS(MIDR_CORTEX_A72), - .cpu_enable = enable_smccc_arch_workaround_1, - }, - { - .capability = ARM64_HARDEN_BRANCH_PREDICTOR, - ERRATA_MIDR_ALL_VERSIONS(MIDR_CORTEX_A73), - .cpu_enable = enable_smccc_arch_workaround_1, - }, - { - .capability = ARM64_HARDEN_BRANCH_PREDICTOR, - ERRATA_MIDR_ALL_VERSIONS(MIDR_CORTEX_A75), + ERRATA_MIDR_RANGE_LIST(arm64_bp_harden_smccc_cpus), .cpu_enable = enable_smccc_arch_workaround_1, }, { .capability = ARM64_HARDEN_BRANCH_PREDICTOR, - ERRATA_MIDR_ALL_VERSIONS(MIDR_QCOM_FALKOR_V1), + ERRATA_MIDR_RANGE_LIST(qcom_bp_harden_cpus), .cpu_enable = qcom_enable_link_stack_sanitization, }, { .capability = ARM64_HARDEN_BP_POST_GUEST_EXIT, - ERRATA_MIDR_ALL_VERSIONS(MIDR_QCOM_FALKOR_V1), - }, - { - .capability = ARM64_HARDEN_BRANCH_PREDICTOR, - ERRATA_MIDR_ALL_VERSIONS(MIDR_QCOM_FALKOR), - .cpu_enable = qcom_enable_link_stack_sanitization, - }, - { - .capability = ARM64_HARDEN_BP_POST_GUEST_EXIT, - ERRATA_MIDR_ALL_VERSIONS(MIDR_QCOM_FALKOR), - }, - { - .capability = ARM64_HARDEN_BRANCH_PREDICTOR, - ERRATA_MIDR_ALL_VERSIONS(MIDR_BRCM_VULCAN), - .cpu_enable = enable_smccc_arch_workaround_1, - }, - { - .capability = ARM64_HARDEN_BRANCH_PREDICTOR, - ERRATA_MIDR_ALL_VERSIONS(MIDR_CAVIUM_THUNDERX2), - .cpu_enable = enable_smccc_arch_workaround_1, + ERRATA_MIDR_RANGE_LIST(qcom_bp_harden_cpus), }, #endif { diff --git a/arch/arm64/kernel/cpufeature.c b/arch/arm64/kernel/cpufeature.c index 17132a10a3db..86e2b987a08e 100644 --- a/arch/arm64/kernel/cpufeature.c +++ b/arch/arm64/kernel/cpufeature.c @@ -882,6 +882,11 @@ static int __kpti_forced; /* 0: not forced, >0: forced on, <0: forced off */ static bool unmap_kernel_at_el0(const struct arm64_cpu_capabilities *entry, int scope) { + /* List of CPUs that are not vulnerable and don't need KPTI */ + static const struct midr_range kpti_safe_list[] = { + MIDR_ALL_VERSIONS(MIDR_CAVIUM_THUNDERX2), + MIDR_ALL_VERSIONS(MIDR_BRCM_VULCAN), + }; char const *str = "command line option"; /* @@ -906,11 +911,8 @@ static bool unmap_kernel_at_el0(const struct arm64_cpu_capabilities *entry, return true; /* Don't force KPTI for CPUs that are not vulnerable */ - switch (read_cpuid_id() & MIDR_CPU_MODEL_MASK) { - case MIDR_CAVIUM_THUNDERX2: - case MIDR_BRCM_VULCAN: + if (is_midr_in_range_list(read_cpuid_id(), kpti_safe_list)) return false; - } /* Defer to CPU feature registers */ return !has_cpuid_feature(entry, scope); -- cgit v1.2.3-59-g8ed1b From ba7d9233c21997eb7eb8514cfb21ff46247dc162 Mon Sep 17 00:00:00 2001 From: Suzuki K Poulose Date: Mon, 26 Mar 2018 15:12:46 +0100 Subject: arm64: capabilities: Handle shared entries Some capabilities have different criteria for detection and associated actions based on the matching criteria, even though they all share the same capability bit. So far we have used multiple entries with the same capability bit to handle this. This is prone to errors, as the cpu_enable is invoked for each entry, irrespective of whether the detection rule applies to the CPU or not. And also this complicates other helpers, e.g, __this_cpu_has_cap. This patch adds a wrapper entry to cover all the possible variations of a capability by maintaining list of matches + cpu_enable callbacks. To avoid complicating the prototypes for the "matches()", we use arm64_cpu_capabilities maintain the list and we ignore all the other fields except the matches & cpu_enable. This ensures : 1) The capabilitiy is set when at least one of the entry detects 2) Action is only taken for the entries that "matches". This avoids explicit checks in the cpu_enable() take some action. The only constraint here is that, all the entries should have the same "type" (i.e, scope and conflict rules). If a cpu_enable() method is associated with multiple matches for a single capability, care should be taken that either the match criteria are mutually exclusive, or that the method is robust against being called multiple times. This also reverts the changes introduced by commit 67948af41f2e6818ed ("arm64: capabilities: Handle duplicate entries for a capability"). Cc: Robin Murphy Reviewed-by: Dave Martin Signed-off-by: Suzuki K Poulose Signed-off-by: Will Deacon --- arch/arm64/include/asm/cpufeature.h | 12 ++++++++ arch/arm64/kernel/cpu_errata.c | 55 ++++++++++++++++++++++++++++++++----- arch/arm64/kernel/cpufeature.c | 13 ++++----- 3 files changed, 66 insertions(+), 14 deletions(-) (limited to 'arch') diff --git a/arch/arm64/include/asm/cpufeature.h b/arch/arm64/include/asm/cpufeature.h index a16eb0731290..09b0f2a80c8f 100644 --- a/arch/arm64/include/asm/cpufeature.h +++ b/arch/arm64/include/asm/cpufeature.h @@ -323,6 +323,18 @@ struct arm64_cpu_capabilities { bool sign; unsigned long hwcap; }; + /* + * A list of "matches/cpu_enable" pair for the same + * "capability" of the same "type" as described by the parent. + * Only matches(), cpu_enable() and fields relevant to these + * methods are significant in the list. The cpu_enable is + * invoked only if the corresponding entry "matches()". + * However, if a cpu_enable() method is associated + * with multiple matches(), care should be taken that either + * the match criteria are mutually exclusive, or that the + * method is robust against being called multiple times. + */ + const struct arm64_cpu_capabilities *match_list; }; }; diff --git a/arch/arm64/kernel/cpu_errata.c b/arch/arm64/kernel/cpu_errata.c index 9490b560d3fe..6de823a1be10 100644 --- a/arch/arm64/kernel/cpu_errata.c +++ b/arch/arm64/kernel/cpu_errata.c @@ -280,6 +280,38 @@ qcom_enable_link_stack_sanitization(const struct arm64_cpu_capabilities *entry) .type = ARM64_CPUCAP_LOCAL_CPU_ERRATUM, \ CAP_MIDR_RANGE_LIST(midr_list) +/* + * Generic helper for handling capabilties with multiple (match,enable) pairs + * of call backs, sharing the same capability bit. + * Iterate over each entry to see if at least one matches. + */ +static bool multi_entry_cap_matches(const struct arm64_cpu_capabilities *entry, + int scope) +{ + const struct arm64_cpu_capabilities *caps; + + for (caps = entry->match_list; caps->matches; caps++) + if (caps->matches(caps, scope)) + return true; + + return false; +} + +/* + * Take appropriate action for all matching entries in the shared capability + * entry. + */ +static void +multi_entry_cap_cpu_enable(const struct arm64_cpu_capabilities *entry) +{ + const struct arm64_cpu_capabilities *caps; + + for (caps = entry->match_list; caps->matches; caps++) + if (caps->matches(caps, SCOPE_LOCAL_CPU) && + caps->cpu_enable) + caps->cpu_enable(caps); +} + #ifdef CONFIG_HARDEN_BRANCH_PREDICTOR /* @@ -302,6 +334,18 @@ static const struct midr_range qcom_bp_harden_cpus[] = { {}, }; +static const struct arm64_cpu_capabilities arm64_bp_harden_list[] = { + { + CAP_MIDR_RANGE_LIST(arm64_bp_harden_smccc_cpus), + .cpu_enable = enable_smccc_arch_workaround_1, + }, + { + CAP_MIDR_RANGE_LIST(qcom_bp_harden_cpus), + .cpu_enable = qcom_enable_link_stack_sanitization, + }, + {}, +}; + #endif const struct arm64_cpu_capabilities arm64_errata[] = { @@ -447,13 +491,10 @@ const struct arm64_cpu_capabilities arm64_errata[] = { #ifdef CONFIG_HARDEN_BRANCH_PREDICTOR { .capability = ARM64_HARDEN_BRANCH_PREDICTOR, - ERRATA_MIDR_RANGE_LIST(arm64_bp_harden_smccc_cpus), - .cpu_enable = enable_smccc_arch_workaround_1, - }, - { - .capability = ARM64_HARDEN_BRANCH_PREDICTOR, - ERRATA_MIDR_RANGE_LIST(qcom_bp_harden_cpus), - .cpu_enable = qcom_enable_link_stack_sanitization, + .type = ARM64_CPUCAP_LOCAL_CPU_ERRATUM, + .matches = multi_entry_cap_matches, + .cpu_enable = multi_entry_cap_cpu_enable, + .match_list = arm64_bp_harden_list, }, { .capability = ARM64_HARDEN_BP_POST_GUEST_EXIT, diff --git a/arch/arm64/kernel/cpufeature.c b/arch/arm64/kernel/cpufeature.c index 86e2b987a08e..00ed75398f60 100644 --- a/arch/arm64/kernel/cpufeature.c +++ b/arch/arm64/kernel/cpufeature.c @@ -1259,9 +1259,9 @@ static bool __this_cpu_has_cap(const struct arm64_cpu_capabilities *cap_array, return false; for (caps = cap_array; caps->matches; caps++) - if (caps->capability == cap && - caps->matches(caps, SCOPE_LOCAL_CPU)) - return true; + if (caps->capability == cap) + return caps->matches(caps, SCOPE_LOCAL_CPU); + return false; } @@ -1351,19 +1351,18 @@ static void __init enable_cpu_capabilities(u16 scope_mask) * Returns "false" on conflicts. */ static bool -__verify_local_cpu_caps(const struct arm64_cpu_capabilities *caps_list, +__verify_local_cpu_caps(const struct arm64_cpu_capabilities *caps, u16 scope_mask) { bool cpu_has_cap, system_has_cap; - const struct arm64_cpu_capabilities *caps; scope_mask &= ARM64_CPUCAP_SCOPE_MASK; - for (caps = caps_list; caps->matches; caps++) { + for (; caps->matches; caps++) { if (!(caps->type & scope_mask)) continue; - cpu_has_cap = __this_cpu_has_cap(caps_list, caps->capability); + cpu_has_cap = caps->matches(caps, SCOPE_LOCAL_CPU); system_has_cap = cpus_have_cap(caps->capability); if (system_has_cap) { -- cgit v1.2.3-59-g8ed1b From 6e616864f21160d8d503523b60a53a29cecc6f24 Mon Sep 17 00:00:00 2001 From: Suzuki K Poulose Date: Mon, 26 Mar 2018 15:12:47 +0100 Subject: arm64: Add MIDR encoding for Arm Cortex-A55 and Cortex-A35 Update the MIDR encodings for the Cortex-A55 and Cortex-A35 Cc: Mark Rutland Reviewed-by: Dave Martin Signed-off-by: Suzuki K Poulose Signed-off-by: Will Deacon --- arch/arm64/include/asm/cputype.h | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'arch') diff --git a/arch/arm64/include/asm/cputype.h b/arch/arm64/include/asm/cputype.h index e86e65c187f8..30014a9f8f2b 100644 --- a/arch/arm64/include/asm/cputype.h +++ b/arch/arm64/include/asm/cputype.h @@ -83,6 +83,8 @@ #define ARM_CPU_PART_CORTEX_A53 0xD03 #define ARM_CPU_PART_CORTEX_A73 0xD09 #define ARM_CPU_PART_CORTEX_A75 0xD0A +#define ARM_CPU_PART_CORTEX_A35 0xD04 +#define ARM_CPU_PART_CORTEX_A55 0xD05 #define APM_CPU_PART_POTENZA 0x000 @@ -102,6 +104,8 @@ #define MIDR_CORTEX_A72 MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_CORTEX_A72) #define MIDR_CORTEX_A73 MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_CORTEX_A73) #define MIDR_CORTEX_A75 MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_CORTEX_A75) +#define MIDR_CORTEX_A35 MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_CORTEX_A35) +#define MIDR_CORTEX_A55 MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_CORTEX_A55) #define MIDR_THUNDERX MIDR_CPU_MODEL(ARM_CPU_IMP_CAVIUM, CAVIUM_CPU_PART_THUNDERX) #define MIDR_THUNDERX_81XX MIDR_CPU_MODEL(ARM_CPU_IMP_CAVIUM, CAVIUM_CPU_PART_THUNDERX_81XX) #define MIDR_THUNDERX_83XX MIDR_CPU_MODEL(ARM_CPU_IMP_CAVIUM, CAVIUM_CPU_PART_THUNDERX_83XX) -- cgit v1.2.3-59-g8ed1b From 05abb595bbaccc9c4290bee62086d0eeea9f0f32 Mon Sep 17 00:00:00 2001 From: Suzuki K Poulose Date: Mon, 26 Mar 2018 15:12:48 +0100 Subject: arm64: Delay enabling hardware DBM feature We enable hardware DBM bit in a capable CPU, very early in the boot via __cpu_setup. This doesn't give us a flexibility of optionally disable the feature, as the clearing the bit is a bit costly as the TLB can cache the settings. Instead, we delay enabling the feature until the CPU is brought up into the kernel. We use the feature capability mechanism to handle it. The hardware DBM is a non-conflicting feature. i.e, the kernel can safely run with a mix of CPUs with some using the feature and the others don't. So, it is safe for a late CPU to have this capability and enable it, even if the active CPUs don't. To get this handled properly by the infrastructure, we unconditionally set the capability and only enable it on CPUs which really have the feature. Also, we print the feature detection from the "matches" call back to make sure we don't mislead the user when none of the CPUs could use the feature. Cc: Catalin Marinas Reviewed-by: Dave Martin Signed-off-by: Suzuki K Poulose Signed-off-by: Will Deacon --- arch/arm64/include/asm/cpucaps.h | 3 +- arch/arm64/kernel/cpufeature.c | 71 ++++++++++++++++++++++++++++++++++++++++ arch/arm64/mm/proc.S | 13 ++++---- 3 files changed, 79 insertions(+), 8 deletions(-) (limited to 'arch') diff --git a/arch/arm64/include/asm/cpucaps.h b/arch/arm64/include/asm/cpucaps.h index ff9fb3aba17b..21bb624e0a7a 100644 --- a/arch/arm64/include/asm/cpucaps.h +++ b/arch/arm64/include/asm/cpucaps.h @@ -48,7 +48,8 @@ #define ARM64_WORKAROUND_843419 27 #define ARM64_HAS_CACHE_IDC 28 #define ARM64_HAS_CACHE_DIC 29 +#define ARM64_HW_DBM 30 -#define ARM64_NCAPS 30 +#define ARM64_NCAPS 31 #endif /* __ASM_CPUCAPS_H */ diff --git a/arch/arm64/kernel/cpufeature.c b/arch/arm64/kernel/cpufeature.c index 00ed75398f60..35e7ae8967f3 100644 --- a/arch/arm64/kernel/cpufeature.c +++ b/arch/arm64/kernel/cpufeature.c @@ -957,6 +957,57 @@ static int __init parse_kpti(char *str) __setup("kpti=", parse_kpti); #endif /* CONFIG_UNMAP_KERNEL_AT_EL0 */ +#ifdef CONFIG_ARM64_HW_AFDBM +static inline void __cpu_enable_hw_dbm(void) +{ + u64 tcr = read_sysreg(tcr_el1) | TCR_HD; + + write_sysreg(tcr, tcr_el1); + isb(); +} + +static bool cpu_can_use_dbm(const struct arm64_cpu_capabilities *cap) +{ + return has_cpuid_feature(cap, SCOPE_LOCAL_CPU); +} + +static void cpu_enable_hw_dbm(struct arm64_cpu_capabilities const *cap) +{ + if (cpu_can_use_dbm(cap)) + __cpu_enable_hw_dbm(); +} + +static bool has_hw_dbm(const struct arm64_cpu_capabilities *cap, + int __unused) +{ + static bool detected = false; + /* + * DBM is a non-conflicting feature. i.e, the kernel can safely + * run a mix of CPUs with and without the feature. So, we + * unconditionally enable the capability to allow any late CPU + * to use the feature. We only enable the control bits on the + * CPU, if it actually supports. + * + * We have to make sure we print the "feature" detection only + * when at least one CPU actually uses it. So check if this CPU + * can actually use it and print the message exactly once. + * + * This is safe as all CPUs (including secondary CPUs - due to the + * LOCAL_CPU scope - and the hotplugged CPUs - via verification) + * goes through the "matches" check exactly once. Also if a CPU + * matches the criteria, it is guaranteed that the CPU will turn + * the DBM on, as the capability is unconditionally enabled. + */ + if (!detected && cpu_can_use_dbm(cap)) { + detected = true; + pr_info("detected: Hardware dirty bit management\n"); + } + + return true; +} + +#endif + static void cpu_copy_el2regs(const struct arm64_cpu_capabilities *__unused) { /* @@ -1133,6 +1184,26 @@ static const struct arm64_cpu_capabilities arm64_features[] = { .type = ARM64_CPUCAP_SYSTEM_FEATURE, .matches = has_cache_dic, }, +#ifdef CONFIG_ARM64_HW_AFDBM + { + /* + * Since we turn this on always, we don't want the user to + * think that the feature is available when it may not be. + * So hide the description. + * + * .desc = "Hardware pagetable Dirty Bit Management", + * + */ + .type = ARM64_CPUCAP_WEAK_LOCAL_CPU_FEATURE, + .capability = ARM64_HW_DBM, + .sys_reg = SYS_ID_AA64MMFR1_EL1, + .sign = FTR_UNSIGNED, + .field_pos = ID_AA64MMFR1_HADBS_SHIFT, + .min_field_value = 2, + .matches = has_hw_dbm, + .cpu_enable = cpu_enable_hw_dbm, + }, +#endif {}, }; diff --git a/arch/arm64/mm/proc.S b/arch/arm64/mm/proc.S index 8f074d64b760..5f9a73a4452c 100644 --- a/arch/arm64/mm/proc.S +++ b/arch/arm64/mm/proc.S @@ -448,16 +448,15 @@ ENTRY(__cpu_setup) tcr_compute_pa_size x10, #TCR_IPS_SHIFT, x5, x6 #ifdef CONFIG_ARM64_HW_AFDBM /* - * Hardware update of the Access and Dirty bits. + * Enable hardware update of the Access Flags bit. + * Hardware dirty bit management is enabled later, + * via capabilities. */ mrs x9, ID_AA64MMFR1_EL1 and x9, x9, #0xf - cbz x9, 2f - cmp x9, #2 - b.lt 1f - orr x10, x10, #TCR_HD // hardware Dirty flag update -1: orr x10, x10, #TCR_HA // hardware Access flag update -2: + cbz x9, 1f + orr x10, x10, #TCR_HA // hardware Access flag update +1: #endif /* CONFIG_ARM64_HW_AFDBM */ msr tcr_el1, x10 ret // return to head.S -- cgit v1.2.3-59-g8ed1b From ece1397cbc89c51914fae1aec729539cfd8bd62b Mon Sep 17 00:00:00 2001 From: Suzuki K Poulose Date: Mon, 26 Mar 2018 15:12:49 +0100 Subject: arm64: Add work around for Arm Cortex-A55 Erratum 1024718 Some variants of the Arm Cortex-55 cores (r0p0, r0p1, r1p0) suffer from an erratum 1024718, which causes incorrect updates when DBM/AP bits in a page table entry is modified without a break-before-make sequence. The work around is to skip enabling the hardware DBM feature on the affected cores. The hardware Access Flag management features is not affected. There are some other cores suffering from this errata, which could be added to the midr_list to trigger the work around. Cc: Catalin Marinas Cc: ckadabi@codeaurora.org Reviewed-by: Dave Martin Signed-off-by: Suzuki K Poulose Signed-off-by: Will Deacon --- Documentation/arm64/silicon-errata.txt | 1 + arch/arm64/Kconfig | 14 ++++++++++++++ arch/arm64/kernel/cpufeature.c | 16 +++++++++++++++- 3 files changed, 30 insertions(+), 1 deletion(-) (limited to 'arch') diff --git a/Documentation/arm64/silicon-errata.txt b/Documentation/arm64/silicon-errata.txt index c1d520de6dfe..3b2f2dd82225 100644 --- a/Documentation/arm64/silicon-errata.txt +++ b/Documentation/arm64/silicon-errata.txt @@ -55,6 +55,7 @@ stable kernels. | ARM | Cortex-A57 | #834220 | ARM64_ERRATUM_834220 | | ARM | Cortex-A72 | #853709 | N/A | | ARM | Cortex-A73 | #858921 | ARM64_ERRATUM_858921 | +| ARM | Cortex-A55 | #1024718 | ARM64_ERRATUM_1024718 | | ARM | MMU-500 | #841119,#826419 | N/A | | | | | | | Cavium | ThunderX ITS | #22375, #24313 | CAVIUM_ERRATUM_22375 | diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig index a6688fcf3dc6..fd74c5830232 100644 --- a/arch/arm64/Kconfig +++ b/arch/arm64/Kconfig @@ -465,6 +465,20 @@ config ARM64_ERRATUM_843419 If unsure, say Y. +config ARM64_ERRATUM_1024718 + bool "Cortex-A55: 1024718: Update of DBM/AP bits without break before make might result in incorrect update" + default y + help + This option adds work around for Arm Cortex-A55 Erratum 1024718. + + Affected Cortex-A55 cores (r0p0, r0p1, r1p0) could cause incorrect + update of the hardware dirty bit when the DBM/AP bits are updated + without a break-before-make. The work around is to disable the usage + of hardware DBM locally on the affected cores. CPUs not affected by + erratum will continue to use the feature. + + If unsure, say Y. + config CAVIUM_ERRATUM_22375 bool "Cavium erratum 22375, 24313" default y diff --git a/arch/arm64/kernel/cpufeature.c b/arch/arm64/kernel/cpufeature.c index 35e7ae8967f3..381bb4077563 100644 --- a/arch/arm64/kernel/cpufeature.c +++ b/arch/arm64/kernel/cpufeature.c @@ -966,9 +966,23 @@ static inline void __cpu_enable_hw_dbm(void) isb(); } +static bool cpu_has_broken_dbm(void) +{ + /* List of CPUs which have broken DBM support. */ + static const struct midr_range cpus[] = { +#ifdef CONFIG_ARM64_ERRATUM_1024718 + MIDR_RANGE(MIDR_CORTEX_A55, 0, 0, 1, 0), // A55 r0p0 -r1p0 +#endif + {}, + }; + + return is_midr_in_range_list(read_cpuid_id(), cpus); +} + static bool cpu_can_use_dbm(const struct arm64_cpu_capabilities *cap) { - return has_cpuid_feature(cap, SCOPE_LOCAL_CPU); + return has_cpuid_feature(cap, SCOPE_LOCAL_CPU) && + !cpu_has_broken_dbm(); } static void cpu_enable_hw_dbm(struct arm64_cpu_capabilities const *cap) -- cgit v1.2.3-59-g8ed1b From 12eb369125abe92bfc55e9ce198200f5807b63ff Mon Sep 17 00:00:00 2001 From: Will Deacon Date: Tue, 27 Mar 2018 11:51:12 +0100 Subject: arm64: cpufeature: Avoid warnings due to unused symbols An allnoconfig build complains about unused symbols due to functions that are called via conditional cpufeature and cpu_errata table entries. Annotate these as __maybe_unused if they are likely to be generic, or predicate their compilation on the same option as the table entry if they are specific to a given alternative. Signed-off-by: Will Deacon --- arch/arm64/kernel/cpu_errata.c | 6 +++--- arch/arm64/kernel/cpufeature.c | 12 +++++++----- 2 files changed, 10 insertions(+), 8 deletions(-) (limited to 'arch') diff --git a/arch/arm64/kernel/cpu_errata.c b/arch/arm64/kernel/cpu_errata.c index 6de823a1be10..4613e4d75c73 100644 --- a/arch/arm64/kernel/cpu_errata.c +++ b/arch/arm64/kernel/cpu_errata.c @@ -285,8 +285,8 @@ qcom_enable_link_stack_sanitization(const struct arm64_cpu_capabilities *entry) * of call backs, sharing the same capability bit. * Iterate over each entry to see if at least one matches. */ -static bool multi_entry_cap_matches(const struct arm64_cpu_capabilities *entry, - int scope) +static bool __maybe_unused +multi_entry_cap_matches(const struct arm64_cpu_capabilities *entry, int scope) { const struct arm64_cpu_capabilities *caps; @@ -301,7 +301,7 @@ static bool multi_entry_cap_matches(const struct arm64_cpu_capabilities *entry, * Take appropriate action for all matching entries in the shared capability * entry. */ -static void +static void __maybe_unused multi_entry_cap_cpu_enable(const struct arm64_cpu_capabilities *entry) { const struct arm64_cpu_capabilities *caps; diff --git a/arch/arm64/kernel/cpufeature.c b/arch/arm64/kernel/cpufeature.c index 381bb4077563..071a4548a231 100644 --- a/arch/arm64/kernel/cpufeature.c +++ b/arch/arm64/kernel/cpufeature.c @@ -838,11 +838,6 @@ static bool has_no_hw_prefetch(const struct arm64_cpu_capabilities *entry, int _ MIDR_CPU_VAR_REV(1, MIDR_REVISION_MASK)); } -static bool runs_at_el2(const struct arm64_cpu_capabilities *entry, int __unused) -{ - return is_kernel_in_hyp_mode(); -} - static bool hyp_offset_low(const struct arm64_cpu_capabilities *entry, int __unused) { @@ -1022,6 +1017,12 @@ static bool has_hw_dbm(const struct arm64_cpu_capabilities *cap, #endif +#ifdef CONFIG_ARM64_VHE +static bool runs_at_el2(const struct arm64_cpu_capabilities *entry, int __unused) +{ + return is_kernel_in_hyp_mode(); +} + static void cpu_copy_el2regs(const struct arm64_cpu_capabilities *__unused) { /* @@ -1035,6 +1036,7 @@ static void cpu_copy_el2regs(const struct arm64_cpu_capabilities *__unused) if (!alternatives_applied) write_sysreg(read_sysreg(tpidr_el1), tpidr_el2); } +#endif static const struct arm64_cpu_capabilities arm64_features[] = { { -- cgit v1.2.3-59-g8ed1b From 3f251cf0abec2afb6eca67f71380670dd55bdebe Mon Sep 17 00:00:00 2001 From: Will Deacon Date: Tue, 27 Mar 2018 12:04:51 +0100 Subject: Revert "arm64: Revert L1_CACHE_SHIFT back to 6 (64-byte cache line size)" This reverts commit 1f85b42a691cd8329ba82dbcaeec80ac1231b32a. The internal dma-direct.h API has changed in -next, which collides with us trying to use it to manage non-coherent DMA devices on systems with unreasonably large cache writeback granules. This isn't at all trivial to resolve, so revert our changes for now and we can revisit this after the merge window. Effectively, this just restores our behaviour back to that of 4.16. Signed-off-by: Will Deacon --- arch/arm64/Kconfig | 1 - arch/arm64/include/asm/cache.h | 6 +++--- arch/arm64/include/asm/dma-direct.h | 43 ------------------------------------- arch/arm64/kernel/cpufeature.c | 9 ++++++-- arch/arm64/mm/dma-mapping.c | 17 --------------- arch/arm64/mm/init.c | 3 +-- 6 files changed, 11 insertions(+), 68 deletions(-) delete mode 100644 arch/arm64/include/asm/dma-direct.h (limited to 'arch') diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig index fd74c5830232..d4d53c87267e 100644 --- a/arch/arm64/Kconfig +++ b/arch/arm64/Kconfig @@ -17,7 +17,6 @@ config ARM64 select ARCH_HAS_GIGANTIC_PAGE if (MEMORY_ISOLATION && COMPACTION) || CMA select ARCH_HAS_KCOV select ARCH_HAS_MEMBARRIER_SYNC_CORE - select ARCH_HAS_PHYS_TO_DMA select ARCH_HAS_SET_MEMORY select ARCH_HAS_SG_CHAIN select ARCH_HAS_STRICT_KERNEL_RWX diff --git a/arch/arm64/include/asm/cache.h b/arch/arm64/include/asm/cache.h index 5df5cfe1c143..9bbffc7a301f 100644 --- a/arch/arm64/include/asm/cache.h +++ b/arch/arm64/include/asm/cache.h @@ -33,7 +33,7 @@ #define ICACHE_POLICY_VIPT 2 #define ICACHE_POLICY_PIPT 3 -#define L1_CACHE_SHIFT (6) +#define L1_CACHE_SHIFT 7 #define L1_CACHE_BYTES (1 << L1_CACHE_SHIFT) /* @@ -43,7 +43,7 @@ * cache before the transfer is done, causing old data to be seen by * the CPU. */ -#define ARCH_DMA_MINALIGN (128) +#define ARCH_DMA_MINALIGN L1_CACHE_BYTES #ifndef __ASSEMBLY__ @@ -77,7 +77,7 @@ static inline u32 cache_type_cwg(void) static inline int cache_line_size(void) { u32 cwg = cache_type_cwg(); - return cwg ? 4 << cwg : ARCH_DMA_MINALIGN; + return cwg ? 4 << cwg : L1_CACHE_BYTES; } #endif /* __ASSEMBLY__ */ diff --git a/arch/arm64/include/asm/dma-direct.h b/arch/arm64/include/asm/dma-direct.h deleted file mode 100644 index abb1b40ec751..000000000000 --- a/arch/arm64/include/asm/dma-direct.h +++ /dev/null @@ -1,43 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -#ifndef __ASM_DMA_DIRECT_H -#define __ASM_DMA_DIRECT_H - -#include -#include - -#include - -DECLARE_STATIC_KEY_FALSE(swiotlb_noncoherent_bounce); - -static inline dma_addr_t phys_to_dma(struct device *dev, phys_addr_t paddr) -{ - dma_addr_t dev_addr = (dma_addr_t)paddr; - - return dev_addr - ((dma_addr_t)dev->dma_pfn_offset << PAGE_SHIFT); -} - -static inline phys_addr_t dma_to_phys(struct device *dev, dma_addr_t dev_addr) -{ - phys_addr_t paddr = (phys_addr_t)dev_addr; - - return paddr + ((phys_addr_t)dev->dma_pfn_offset << PAGE_SHIFT); -} - -static inline bool dma_capable(struct device *dev, dma_addr_t addr, size_t size) -{ - if (!dev->dma_mask) - return false; - - /* - * Force swiotlb buffer bouncing when ARCH_DMA_MINALIGN < CWG. The - * swiotlb bounce buffers are aligned to (1 << IO_TLB_SHIFT). - */ - if (static_branch_unlikely(&swiotlb_noncoherent_bounce) && - !is_device_dma_coherent(dev) && - !is_swiotlb_buffer(dma_to_phys(dev, addr))) - return false; - - return addr + size - 1 <= *dev->dma_mask; -} - -#endif /* __ASM_DMA_DIRECT_H */ diff --git a/arch/arm64/kernel/cpufeature.c b/arch/arm64/kernel/cpufeature.c index 071a4548a231..96b15d7b10a8 100644 --- a/arch/arm64/kernel/cpufeature.c +++ b/arch/arm64/kernel/cpufeature.c @@ -1624,6 +1624,7 @@ static void __init setup_system_capabilities(void) void __init setup_cpu_features(void) { u32 cwg; + int cls; setup_system_capabilities(); mark_const_caps_ready(); @@ -1644,9 +1645,13 @@ void __init setup_cpu_features(void) * Check for sane CTR_EL0.CWG value. */ cwg = cache_type_cwg(); + cls = cache_line_size(); if (!cwg) - pr_warn("No Cache Writeback Granule information, assuming %d\n", - ARCH_DMA_MINALIGN); + pr_warn("No Cache Writeback Granule information, assuming cache line size %d\n", + cls); + if (L1_CACHE_BYTES < cls) + pr_warn("L1_CACHE_BYTES smaller than the Cache Writeback Granule (%d < %d)\n", + L1_CACHE_BYTES, cls); } static bool __maybe_unused diff --git a/arch/arm64/mm/dma-mapping.c b/arch/arm64/mm/dma-mapping.c index 1e9dac8684ca..a96ec0181818 100644 --- a/arch/arm64/mm/dma-mapping.c +++ b/arch/arm64/mm/dma-mapping.c @@ -33,7 +33,6 @@ #include static int swiotlb __ro_after_init; -DEFINE_STATIC_KEY_FALSE(swiotlb_noncoherent_bounce); static pgprot_t __get_dma_pgprot(unsigned long attrs, pgprot_t prot, bool coherent) @@ -505,14 +504,6 @@ static int __init arm64_dma_init(void) max_pfn > (arm64_dma_phys_limit >> PAGE_SHIFT)) swiotlb = 1; - if (WARN_TAINT(ARCH_DMA_MINALIGN < cache_line_size(), - TAINT_CPU_OUT_OF_SPEC, - "ARCH_DMA_MINALIGN smaller than CTR_EL0.CWG (%d < %d)", - ARCH_DMA_MINALIGN, cache_line_size())) { - swiotlb = 1; - static_branch_enable(&swiotlb_noncoherent_bounce); - } - return atomic_pool_init(); } arch_initcall(arm64_dma_init); @@ -891,14 +882,6 @@ static void __iommu_setup_dma_ops(struct device *dev, u64 dma_base, u64 size, void arch_setup_dma_ops(struct device *dev, u64 dma_base, u64 size, const struct iommu_ops *iommu, bool coherent) { - /* - * Enable swiotlb for buffer bouncing if ARCH_DMA_MINALIGN < CWG. - * dma_capable() forces the actual bounce if the device is - * non-coherent. - */ - if (static_branch_unlikely(&swiotlb_noncoherent_bounce) && !coherent) - iommu = NULL; - if (!dev->dma_ops) dev->dma_ops = &arm64_swiotlb_dma_ops; diff --git a/arch/arm64/mm/init.c b/arch/arm64/mm/init.c index 664acf177799..9f3c47acf8ff 100644 --- a/arch/arm64/mm/init.c +++ b/arch/arm64/mm/init.c @@ -586,8 +586,7 @@ static void __init free_unused_memmap(void) void __init mem_init(void) { if (swiotlb_force == SWIOTLB_FORCE || - max_pfn > (arm64_dma_phys_limit >> PAGE_SHIFT) || - ARCH_DMA_MINALIGN < cache_line_size()) + max_pfn > (arm64_dma_phys_limit >> PAGE_SHIFT)) swiotlb_init(1); else swiotlb_force = SWIOTLB_NO_FORCE; -- cgit v1.2.3-59-g8ed1b From b4f9b39074878ede60d3e7bdc64a2d89ed5e9297 Mon Sep 17 00:00:00 2001 From: Will Deacon Date: Tue, 13 Feb 2018 17:43:23 +0000 Subject: arm64: fpsimd: include in fpsimd.h fpsimd.h uses the __init annotation, so pull in linux/init.h Acked-by: Mark Rutland Signed-off-by: Will Deacon --- arch/arm64/include/asm/fpsimd.h | 1 + 1 file changed, 1 insertion(+) (limited to 'arch') diff --git a/arch/arm64/include/asm/fpsimd.h b/arch/arm64/include/asm/fpsimd.h index 7623762f7fa6..e3454087275f 100644 --- a/arch/arm64/include/asm/fpsimd.h +++ b/arch/arm64/include/asm/fpsimd.h @@ -22,6 +22,7 @@ #ifndef __ASSEMBLY__ #include +#include #include /* -- cgit v1.2.3-59-g8ed1b From 8a624f145c0d40903cb73090f51797e480bd1295 Mon Sep 17 00:00:00 2001 From: Will Deacon Date: Mon, 19 Feb 2018 13:08:56 +0000 Subject: arm64: lse: Include compiler_types.h and export.h for out-of-line LL/SC When the LL/SC atomics are moved out-of-line, they are annotated as notrace and exported to modules. Ensure we pull in the relevant include files so that these macros are defined when we need them. Acked-by: Mark Rutland Signed-off-by: Will Deacon --- arch/arm64/include/asm/lse.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'arch') diff --git a/arch/arm64/include/asm/lse.h b/arch/arm64/include/asm/lse.h index eec95768eaad..8262325e2fc6 100644 --- a/arch/arm64/include/asm/lse.h +++ b/arch/arm64/include/asm/lse.h @@ -4,8 +4,11 @@ #if defined(CONFIG_AS_LSE) && defined(CONFIG_ARM64_LSE_ATOMICS) +#include +#include #include #include +#include #ifdef __ASSEMBLER__ -- cgit v1.2.3-59-g8ed1b From e8a2d040fee54606ff62cc1f22e14ad9b2677f15 Mon Sep 17 00:00:00 2001 From: Will Deacon Date: Mon, 19 Feb 2018 11:39:23 +0000 Subject: arm64: cmpxchg: Include build_bug.h instead of bug.h for BUILD_BUG Having asm/cmpxchg.h pull in linux/bug.h is problematic because this ends up pulling in the atomic bitops which themselves may be built on top of atomic.h and cmpxchg.h. Instead, just include build_bug.h for the definition of BUILD_BUG. Signed-off-by: Will Deacon --- arch/arm64/include/asm/cmpxchg.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch') diff --git a/arch/arm64/include/asm/cmpxchg.h b/arch/arm64/include/asm/cmpxchg.h index ae852add053d..bc9e07bc6428 100644 --- a/arch/arm64/include/asm/cmpxchg.h +++ b/arch/arm64/include/asm/cmpxchg.h @@ -18,7 +18,7 @@ #ifndef __ASM_CMPXCHG_H #define __ASM_CMPXCHG_H -#include +#include #include #include -- cgit v1.2.3-59-g8ed1b From c9406e514b95e825af20967430786a234d2dcabd Mon Sep 17 00:00:00 2001 From: Will Deacon Date: Tue, 27 Feb 2018 10:48:31 +0000 Subject: arm64: move percpu cmpxchg implementation from cmpxchg.h to percpu.h We want to avoid pulling linux/preempt.h into cmpxchg.h, since that can introduce a circular dependency on linux/bitops.h. linux/preempt.h is only needed by the per-cpu cmpxchg implementation, which is better off alongside the per-cpu xchg implementation in percpu.h, so move it there and add the missing #include. Reported-by: Mark Rutland Signed-off-by: Will Deacon --- arch/arm64/include/asm/cmpxchg.h | 26 -------------------------- arch/arm64/include/asm/percpu.h | 29 +++++++++++++++++++++++++++++ 2 files changed, 29 insertions(+), 26 deletions(-) (limited to 'arch') diff --git a/arch/arm64/include/asm/cmpxchg.h b/arch/arm64/include/asm/cmpxchg.h index bc9e07bc6428..19d4a18c2ac8 100644 --- a/arch/arm64/include/asm/cmpxchg.h +++ b/arch/arm64/include/asm/cmpxchg.h @@ -196,32 +196,6 @@ __CMPXCHG_GEN(_mb) __ret; \ }) -/* this_cpu_cmpxchg */ -#define _protect_cmpxchg_local(pcp, o, n) \ -({ \ - typeof(*raw_cpu_ptr(&(pcp))) __ret; \ - preempt_disable(); \ - __ret = cmpxchg_local(raw_cpu_ptr(&(pcp)), o, n); \ - preempt_enable(); \ - __ret; \ -}) - -#define this_cpu_cmpxchg_1(ptr, o, n) _protect_cmpxchg_local(ptr, o, n) -#define this_cpu_cmpxchg_2(ptr, o, n) _protect_cmpxchg_local(ptr, o, n) -#define this_cpu_cmpxchg_4(ptr, o, n) _protect_cmpxchg_local(ptr, o, n) -#define this_cpu_cmpxchg_8(ptr, o, n) _protect_cmpxchg_local(ptr, o, n) - -#define this_cpu_cmpxchg_double_8(ptr1, ptr2, o1, o2, n1, n2) \ -({ \ - int __ret; \ - preempt_disable(); \ - __ret = cmpxchg_double_local( raw_cpu_ptr(&(ptr1)), \ - raw_cpu_ptr(&(ptr2)), \ - o1, o2, n1, n2); \ - preempt_enable(); \ - __ret; \ -}) - #define __CMPWAIT_CASE(w, sz, name) \ static inline void __cmpwait_case_##name(volatile void *ptr, \ unsigned long val) \ diff --git a/arch/arm64/include/asm/percpu.h b/arch/arm64/include/asm/percpu.h index 43393208229e..9234013e759e 100644 --- a/arch/arm64/include/asm/percpu.h +++ b/arch/arm64/include/asm/percpu.h @@ -16,7 +16,10 @@ #ifndef __ASM_PERCPU_H #define __ASM_PERCPU_H +#include + #include +#include #include static inline void set_my_cpu_offset(unsigned long off) @@ -197,6 +200,32 @@ static inline unsigned long __percpu_xchg(void *ptr, unsigned long val, return ret; } +/* this_cpu_cmpxchg */ +#define _protect_cmpxchg_local(pcp, o, n) \ +({ \ + typeof(*raw_cpu_ptr(&(pcp))) __ret; \ + preempt_disable(); \ + __ret = cmpxchg_local(raw_cpu_ptr(&(pcp)), o, n); \ + preempt_enable(); \ + __ret; \ +}) + +#define this_cpu_cmpxchg_1(ptr, o, n) _protect_cmpxchg_local(ptr, o, n) +#define this_cpu_cmpxchg_2(ptr, o, n) _protect_cmpxchg_local(ptr, o, n) +#define this_cpu_cmpxchg_4(ptr, o, n) _protect_cmpxchg_local(ptr, o, n) +#define this_cpu_cmpxchg_8(ptr, o, n) _protect_cmpxchg_local(ptr, o, n) + +#define this_cpu_cmpxchg_double_8(ptr1, ptr2, o1, o2, n1, n2) \ +({ \ + int __ret; \ + preempt_disable(); \ + __ret = cmpxchg_double_local( raw_cpu_ptr(&(ptr1)), \ + raw_cpu_ptr(&(ptr2)), \ + o1, o2, n1, n2); \ + preempt_enable(); \ + __ret; \ +}) + #define _percpu_read(pcp) \ ({ \ typeof(pcp) __retval; \ -- cgit v1.2.3-59-g8ed1b From 2a58fca9a7b4a3953c3e983ef80e36df85293a7c Mon Sep 17 00:00:00 2001 From: Will Deacon Date: Tue, 27 Feb 2018 10:50:20 +0000 Subject: arm64: cmpxchg: Include linux/compiler.h in asm/cmpxchg.h We need linux/compiler.h for unreachable(), so #include it here. Reported-by: Mark Rutland Signed-off-by: Will Deacon --- arch/arm64/include/asm/cmpxchg.h | 1 + 1 file changed, 1 insertion(+) (limited to 'arch') diff --git a/arch/arm64/include/asm/cmpxchg.h b/arch/arm64/include/asm/cmpxchg.h index 19d4a18c2ac8..4f5fd2a36e6e 100644 --- a/arch/arm64/include/asm/cmpxchg.h +++ b/arch/arm64/include/asm/cmpxchg.h @@ -19,6 +19,7 @@ #define __ASM_CMPXCHG_H #include +#include #include #include -- cgit v1.2.3-59-g8ed1b From 7f170499f734c417290518aa50cac11953bf8161 Mon Sep 17 00:00:00 2001 From: Philip Elcan Date: Tue, 27 Mar 2018 21:55:32 -0400 Subject: arm64: tlbflush: avoid writing RES0 bits Several of the bits of the TLBI register operand are RES0 per the ARM ARM, so TLBI operations should avoid writing non-zero values to these bits. This patch adds a macro __TLBI_VADDR(addr, asid) that creates the operand register in the correct format and honors the RES0 bits. Acked-by: Mark Rutland Signed-off-by: Philip Elcan Signed-off-by: Will Deacon --- arch/arm64/include/asm/tlbflush.h | 25 +++++++++++++++++-------- 1 file changed, 17 insertions(+), 8 deletions(-) (limited to 'arch') diff --git a/arch/arm64/include/asm/tlbflush.h b/arch/arm64/include/asm/tlbflush.h index 9e82dd79c7db..dfc61d73f740 100644 --- a/arch/arm64/include/asm/tlbflush.h +++ b/arch/arm64/include/asm/tlbflush.h @@ -60,6 +60,15 @@ __tlbi(op, (arg) | USER_ASID_FLAG); \ } while (0) +/* This macro creates a properly formatted VA operand for the TLBI */ +#define __TLBI_VADDR(addr, asid) \ + ({ \ + unsigned long __ta = (addr) >> 12; \ + __ta &= GENMASK_ULL(43, 0); \ + __ta |= (unsigned long)(asid) << 48; \ + __ta; \ + }) + /* * TLB Management * ============== @@ -117,7 +126,7 @@ static inline void flush_tlb_all(void) static inline void flush_tlb_mm(struct mm_struct *mm) { - unsigned long asid = ASID(mm) << 48; + unsigned long asid = __TLBI_VADDR(0, ASID(mm)); dsb(ishst); __tlbi(aside1is, asid); @@ -128,7 +137,7 @@ static inline void flush_tlb_mm(struct mm_struct *mm) static inline void flush_tlb_page(struct vm_area_struct *vma, unsigned long uaddr) { - unsigned long addr = uaddr >> 12 | (ASID(vma->vm_mm) << 48); + unsigned long addr = __TLBI_VADDR(uaddr, ASID(vma->vm_mm)); dsb(ishst); __tlbi(vale1is, addr); @@ -146,7 +155,7 @@ static inline void __flush_tlb_range(struct vm_area_struct *vma, unsigned long start, unsigned long end, bool last_level) { - unsigned long asid = ASID(vma->vm_mm) << 48; + unsigned long asid = ASID(vma->vm_mm); unsigned long addr; if ((end - start) > MAX_TLB_RANGE) { @@ -154,8 +163,8 @@ static inline void __flush_tlb_range(struct vm_area_struct *vma, return; } - start = asid | (start >> 12); - end = asid | (end >> 12); + start = __TLBI_VADDR(start, asid); + end = __TLBI_VADDR(end, asid); dsb(ishst); for (addr = start; addr < end; addr += 1 << (PAGE_SHIFT - 12)) { @@ -185,8 +194,8 @@ static inline void flush_tlb_kernel_range(unsigned long start, unsigned long end return; } - start >>= 12; - end >>= 12; + start = __TLBI_VADDR(start, 0); + end = __TLBI_VADDR(end, 0); dsb(ishst); for (addr = start; addr < end; addr += 1 << (PAGE_SHIFT - 12)) @@ -202,7 +211,7 @@ static inline void flush_tlb_kernel_range(unsigned long start, unsigned long end static inline void __flush_tlb_pgtable(struct mm_struct *mm, unsigned long uaddr) { - unsigned long addr = uaddr >> 12 | (ASID(mm) << 48); + unsigned long addr = __TLBI_VADDR(uaddr, ASID(mm)); __tlbi(vae1is, addr); __tlbi_user(vae1is, addr); -- cgit v1.2.3-59-g8ed1b From 20b8547277a6e8ee1d928792c1b2782c9a2a6cf5 Mon Sep 17 00:00:00 2001 From: Dave Martin Date: Wed, 28 Mar 2018 10:50:48 +0100 Subject: arm64: fpsimd: Split cpu field out from struct fpsimd_state In preparation for using a common representation of the FPSIMD state for tasks and KVM vcpus, this patch separates out the "cpu" field that is used to track the cpu on which the state was most recently loaded. This will allow common code to operate on task and vcpu contexts without requiring the cpu field to be stored at the same offset from the FPSIMD register data in both cases. This should avoid the need for messing with the definition of those parts of struct vcpu_arch that are exposed in the KVM user ABI. The resulting change is also convenient for grouping and defining the set of thread_struct fields that are supposed to be accessible to copy_{to,from}_user(), which includes user_fpsimd_state but should exclude the cpu field. This patch does not amend the usercopy whitelist to match: that will be addressed in a subsequent patch. Signed-off-by: Dave Martin [will: inline fpsimd_flush_state for now] Signed-off-by: Will Deacon --- arch/arm64/include/asm/fpsimd.h | 29 ++--------------------------- arch/arm64/include/asm/processor.h | 4 ++-- arch/arm64/kernel/fpsimd.c | 37 ++++++++++++++++++------------------- arch/arm64/kernel/ptrace.c | 10 +++++----- arch/arm64/kernel/signal.c | 3 +-- arch/arm64/kernel/signal32.c | 3 +-- 6 files changed, 29 insertions(+), 57 deletions(-) (limited to 'arch') diff --git a/arch/arm64/include/asm/fpsimd.h b/arch/arm64/include/asm/fpsimd.h index e3454087275f..aa7162ae93e3 100644 --- a/arch/arm64/include/asm/fpsimd.h +++ b/arch/arm64/include/asm/fpsimd.h @@ -25,31 +25,6 @@ #include #include -/* - * FP/SIMD storage area has: - * - FPSR and FPCR - * - 32 128-bit data registers - * - * Note that user_fpsimd forms a prefix of this structure, which is - * relied upon in the ptrace FP/SIMD accessors. - */ -struct fpsimd_state { - union { - struct user_fpsimd_state user_fpsimd; - struct { - __uint128_t vregs[32]; - u32 fpsr; - u32 fpcr; - /* - * For ptrace compatibility, pad to next 128-bit - * boundary here if extending this struct. - */ - }; - }; - /* the id of the last cpu to have restored this state */ - unsigned int cpu; -}; - #if defined(__KERNEL__) && defined(CONFIG_COMPAT) /* Masks for extracting the FPSR and FPCR from the FPSCR */ #define VFP_FPSCR_STAT_MASK 0xf800009f @@ -63,8 +38,8 @@ struct fpsimd_state { struct task_struct; -extern void fpsimd_save_state(struct fpsimd_state *state); -extern void fpsimd_load_state(struct fpsimd_state *state); +extern void fpsimd_save_state(struct user_fpsimd_state *state); +extern void fpsimd_load_state(struct user_fpsimd_state *state); extern void fpsimd_thread_switch(struct task_struct *next); extern void fpsimd_flush_thread(void); diff --git a/arch/arm64/include/asm/processor.h b/arch/arm64/include/asm/processor.h index 4fc8867fde4d..63d3850db224 100644 --- a/arch/arm64/include/asm/processor.h +++ b/arch/arm64/include/asm/processor.h @@ -38,7 +38,6 @@ #include #include -#include #include #include #include @@ -108,7 +107,8 @@ struct thread_struct { #ifdef CONFIG_COMPAT unsigned long tp2_value; #endif - struct fpsimd_state fpsimd_state; + struct user_fpsimd_state fpsimd_state; + unsigned int fpsimd_cpu; void *sve_state; /* SVE registers, if any */ unsigned int sve_vl; /* SVE vector length */ unsigned int sve_vl_onexec; /* SVE vl after next exec */ diff --git a/arch/arm64/kernel/fpsimd.c b/arch/arm64/kernel/fpsimd.c index 91f9f4f4aebe..b13a9b4dd13b 100644 --- a/arch/arm64/kernel/fpsimd.c +++ b/arch/arm64/kernel/fpsimd.c @@ -66,7 +66,7 @@ * been loaded into its FPSIMD registers most recently, or whether it has * been used to perform kernel mode NEON in the meantime. * - * For (a), we add a 'cpu' field to struct fpsimd_state, which gets updated to + * For (a), we add a fpsimd_cpu field to thread_struct, which gets updated to * the id of the current CPU every time the state is loaded onto a CPU. For (b), * we add the per-cpu variable 'fpsimd_last_state' (below), which contains the * address of the userland FPSIMD state of the task that was loaded onto the CPU @@ -75,7 +75,7 @@ * With this in place, we no longer have to restore the next FPSIMD state right * when switching between tasks. Instead, we can defer this check to userland * resume, at which time we verify whether the CPU's fpsimd_last_state and the - * task's fpsimd_state.cpu are still mutually in sync. If this is the case, we + * task's fpsimd_cpu are still mutually in sync. If this is the case, we * can omit the FPSIMD restore. * * As an optimization, we use the thread_info flag TIF_FOREIGN_FPSTATE to @@ -92,14 +92,14 @@ * flag with local_bh_disable() unless softirqs are already masked. * * For a certain task, the sequence may look something like this: - * - the task gets scheduled in; if both the task's fpsimd_state.cpu field + * - the task gets scheduled in; if both the task's fpsimd_cpu field * contains the id of the current CPU, and the CPU's fpsimd_last_state per-cpu * variable points to the task's fpsimd_state, the TIF_FOREIGN_FPSTATE flag is * cleared, otherwise it is set; * * - the task returns to userland; if TIF_FOREIGN_FPSTATE is set, the task's * userland FPSIMD state is copied from memory to the registers, the task's - * fpsimd_state.cpu field is set to the id of the current CPU, the current + * fpsimd_cpu field is set to the id of the current CPU, the current * CPU's fpsimd_last_state pointer is set to this task's fpsimd_state and the * TIF_FOREIGN_FPSTATE flag is cleared; * @@ -117,7 +117,7 @@ * whatever is in the FPSIMD registers is not saved to memory, but discarded. */ struct fpsimd_last_state_struct { - struct fpsimd_state *st; + struct user_fpsimd_state *st; bool sve_in_use; }; @@ -418,7 +418,7 @@ static void fpsimd_to_sve(struct task_struct *task) { unsigned int vq; void *sst = task->thread.sve_state; - struct fpsimd_state const *fst = &task->thread.fpsimd_state; + struct user_fpsimd_state const *fst = &task->thread.fpsimd_state; unsigned int i; if (!system_supports_sve()) @@ -444,7 +444,7 @@ static void sve_to_fpsimd(struct task_struct *task) { unsigned int vq; void const *sst = task->thread.sve_state; - struct fpsimd_state *fst = &task->thread.fpsimd_state; + struct user_fpsimd_state *fst = &task->thread.fpsimd_state; unsigned int i; if (!system_supports_sve()) @@ -540,7 +540,7 @@ void sve_sync_from_fpsimd_zeropad(struct task_struct *task) { unsigned int vq; void *sst = task->thread.sve_state; - struct fpsimd_state const *fst = &task->thread.fpsimd_state; + struct user_fpsimd_state const *fst = &task->thread.fpsimd_state; unsigned int i; if (!test_tsk_thread_flag(task, TIF_SVE)) @@ -909,10 +909,9 @@ void fpsimd_thread_switch(struct task_struct *next) * the TIF_FOREIGN_FPSTATE flag so the state will be loaded * upon the next return to userland. */ - struct fpsimd_state *st = &next->thread.fpsimd_state; - - if (__this_cpu_read(fpsimd_last_state.st) == st - && st->cpu == smp_processor_id()) + if (__this_cpu_read(fpsimd_last_state.st) == + &next->thread.fpsimd_state + && next->thread.fpsimd_cpu == smp_processor_id()) clear_tsk_thread_flag(next, TIF_FOREIGN_FPSTATE); else set_tsk_thread_flag(next, TIF_FOREIGN_FPSTATE); @@ -928,7 +927,8 @@ void fpsimd_flush_thread(void) local_bh_disable(); - memset(¤t->thread.fpsimd_state, 0, sizeof(struct fpsimd_state)); + memset(¤t->thread.fpsimd_state, 0, + sizeof(current->thread.fpsimd_state)); fpsimd_flush_task_state(current); if (system_supports_sve()) { @@ -1005,11 +1005,10 @@ static void fpsimd_bind_to_cpu(void) { struct fpsimd_last_state_struct *last = this_cpu_ptr(&fpsimd_last_state); - struct fpsimd_state *st = ¤t->thread.fpsimd_state; - last->st = st; + last->st = ¤t->thread.fpsimd_state; last->sve_in_use = test_thread_flag(TIF_SVE); - st->cpu = smp_processor_id(); + current->thread.fpsimd_cpu = smp_processor_id(); } /* @@ -1044,7 +1043,7 @@ void fpsimd_update_current_state(struct user_fpsimd_state const *state) local_bh_disable(); - current->thread.fpsimd_state.user_fpsimd = *state; + current->thread.fpsimd_state = *state; if (system_supports_sve() && test_thread_flag(TIF_SVE)) fpsimd_to_sve(current); @@ -1061,7 +1060,7 @@ void fpsimd_update_current_state(struct user_fpsimd_state const *state) */ void fpsimd_flush_task_state(struct task_struct *t) { - t->thread.fpsimd_state.cpu = NR_CPUS; + t->thread.fpsimd_cpu = NR_CPUS; } static inline void fpsimd_flush_cpu_state(void) @@ -1160,7 +1159,7 @@ EXPORT_SYMBOL(kernel_neon_end); #ifdef CONFIG_EFI -static DEFINE_PER_CPU(struct fpsimd_state, efi_fpsimd_state); +static DEFINE_PER_CPU(struct user_fpsimd_state, efi_fpsimd_state); static DEFINE_PER_CPU(bool, efi_fpsimd_state_used); static DEFINE_PER_CPU(bool, efi_sve_state_used); diff --git a/arch/arm64/kernel/ptrace.c b/arch/arm64/kernel/ptrace.c index 6228476e74ba..fd9e8ed22b70 100644 --- a/arch/arm64/kernel/ptrace.c +++ b/arch/arm64/kernel/ptrace.c @@ -629,7 +629,7 @@ static int __fpr_get(struct task_struct *target, sve_sync_to_fpsimd(target); - uregs = &target->thread.fpsimd_state.user_fpsimd; + uregs = &target->thread.fpsimd_state; return user_regset_copyout(&pos, &count, &kbuf, &ubuf, uregs, start_pos, start_pos + sizeof(*uregs)); @@ -660,14 +660,14 @@ static int __fpr_set(struct task_struct *target, */ sve_sync_to_fpsimd(target); - newstate = target->thread.fpsimd_state.user_fpsimd; + newstate = target->thread.fpsimd_state; ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf, &newstate, start_pos, start_pos + sizeof(newstate)); if (ret) return ret; - target->thread.fpsimd_state.user_fpsimd = newstate; + target->thread.fpsimd_state = newstate; return ret; } @@ -1169,7 +1169,7 @@ static int compat_vfp_get(struct task_struct *target, compat_ulong_t fpscr; int ret, vregs_end_pos; - uregs = &target->thread.fpsimd_state.user_fpsimd; + uregs = &target->thread.fpsimd_state; if (target == current) fpsimd_preserve_current_state(); @@ -1202,7 +1202,7 @@ static int compat_vfp_set(struct task_struct *target, compat_ulong_t fpscr; int ret, vregs_end_pos; - uregs = &target->thread.fpsimd_state.user_fpsimd; + uregs = &target->thread.fpsimd_state; vregs_end_pos = VFP_STATE_SIZE - sizeof(compat_ulong_t); ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf, uregs, 0, diff --git a/arch/arm64/kernel/signal.c b/arch/arm64/kernel/signal.c index e5c656d0e316..bfeee9a51cee 100644 --- a/arch/arm64/kernel/signal.c +++ b/arch/arm64/kernel/signal.c @@ -179,8 +179,7 @@ static void __user *apply_user_offset( static int preserve_fpsimd_context(struct fpsimd_context __user *ctx) { - struct user_fpsimd_state const *fpsimd = - ¤t->thread.fpsimd_state.user_fpsimd; + struct user_fpsimd_state const *fpsimd = ¤t->thread.fpsimd_state; int err; /* copy the FP and status/control registers */ diff --git a/arch/arm64/kernel/signal32.c b/arch/arm64/kernel/signal32.c index 68f5e07b592b..374333703cbd 100644 --- a/arch/arm64/kernel/signal32.c +++ b/arch/arm64/kernel/signal32.c @@ -149,8 +149,7 @@ union __fpsimd_vreg { static int compat_preserve_vfp_context(struct compat_vfp_sigframe __user *frame) { - struct user_fpsimd_state const *fpsimd = - ¤t->thread.fpsimd_state.user_fpsimd; + struct user_fpsimd_state const *fpsimd = ¤t->thread.fpsimd_state; compat_ulong_t magic = VFP_MAGIC; compat_ulong_t size = VFP_STORAGE_SIZE; compat_ulong_t fpscr, fpexc; -- cgit v1.2.3-59-g8ed1b From 65896545b69ffaac947c12e11d3dcc57fd1fb772 Mon Sep 17 00:00:00 2001 From: Dave Martin Date: Wed, 28 Mar 2018 10:50:49 +0100 Subject: arm64: uaccess: Fix omissions from usercopy whitelist When the hardend usercopy support was added for arm64, it was concluded that all cases of usercopy into and out of thread_struct were statically sized and so didn't require explicit whitelisting of the appropriate fields in thread_struct. Testing with usercopy hardening enabled has revealed that this is not the case for certain ptrace regset manipulation calls on arm64. This occurs because the sizes of usercopies associated with the regset API are dynamic by construction, and because arm64 does not always stage such copies via the stack: indeed the regset API is designed to avoid the need for that by adding some bounds checking. This is currently believed to affect only the fpsimd and TLS registers. Because the whitelisted fields in thread_struct must be contiguous, this patch groups them together in a nested struct. It is also necessary to be able to determine the location and size of that struct, so rather than making the struct anonymous (which would save on edits elsewhere) or adding an anonymous union containing named and unnamed instances of the same struct (gross), this patch gives the struct a name and makes the necessary edits to code that references it (noisy but simple). Care is needed to ensure that the new struct does not contain padding (which the usercopy hardening would fail to protect). For this reason, the presence of tp2_value is made unconditional, since a padding field would be needed there in any case. This pads up to the 16-byte alignment required by struct user_fpsimd_state. Acked-by: Kees Cook Reported-by: Mark Rutland Fixes: 9e8084d3f761 ("arm64: Implement thread_struct whitelist for hardened usercopy") Signed-off-by: Dave Martin Signed-off-by: Will Deacon --- arch/arm64/include/asm/processor.h | 38 +++++++++++++++++++----------- arch/arm64/kernel/fpsimd.c | 47 +++++++++++++++++++------------------- arch/arm64/kernel/process.c | 6 ++--- arch/arm64/kernel/ptrace.c | 30 ++++++++++++------------ arch/arm64/kernel/signal.c | 3 ++- arch/arm64/kernel/signal32.c | 3 ++- arch/arm64/kernel/sys_compat.c | 2 +- 7 files changed, 72 insertions(+), 57 deletions(-) (limited to 'arch') diff --git a/arch/arm64/include/asm/processor.h b/arch/arm64/include/asm/processor.h index 63d3850db224..767598932549 100644 --- a/arch/arm64/include/asm/processor.h +++ b/arch/arm64/include/asm/processor.h @@ -34,6 +34,8 @@ #ifdef __KERNEL__ +#include +#include #include #include @@ -103,11 +105,18 @@ struct cpu_context { struct thread_struct { struct cpu_context cpu_context; /* cpu context */ - unsigned long tp_value; /* TLS register */ -#ifdef CONFIG_COMPAT - unsigned long tp2_value; -#endif - struct user_fpsimd_state fpsimd_state; + + /* + * Whitelisted fields for hardened usercopy: + * Maintainers must ensure manually that this contains no + * implicit padding. + */ + struct { + unsigned long tp_value; /* TLS register */ + unsigned long tp2_value; + struct user_fpsimd_state fpsimd_state; + } uw; + unsigned int fpsimd_cpu; void *sve_state; /* SVE registers, if any */ unsigned int sve_vl; /* SVE vector length */ @@ -117,14 +126,17 @@ struct thread_struct { struct debug_info debug; /* debugging */ }; -/* - * Everything usercopied to/from thread_struct is statically-sized, so - * no hardened usercopy whitelist is needed. - */ static inline void arch_thread_struct_whitelist(unsigned long *offset, unsigned long *size) { - *offset = *size = 0; + /* Verify that there is no padding among the whitelisted fields: */ + BUILD_BUG_ON(sizeof_field(struct thread_struct, uw) != + sizeof_field(struct thread_struct, uw.tp_value) + + sizeof_field(struct thread_struct, uw.tp2_value) + + sizeof_field(struct thread_struct, uw.fpsimd_state)); + + *offset = offsetof(struct thread_struct, uw); + *size = sizeof_field(struct thread_struct, uw); } #ifdef CONFIG_COMPAT @@ -132,13 +144,13 @@ static inline void arch_thread_struct_whitelist(unsigned long *offset, ({ \ unsigned long *__tls; \ if (is_compat_thread(task_thread_info(t))) \ - __tls = &(t)->thread.tp2_value; \ + __tls = &(t)->thread.uw.tp2_value; \ else \ - __tls = &(t)->thread.tp_value; \ + __tls = &(t)->thread.uw.tp_value; \ __tls; \ }) #else -#define task_user_tls(t) (&(t)->thread.tp_value) +#define task_user_tls(t) (&(t)->thread.uw.tp_value) #endif /* Sync TPIDR_EL0 back to thread_struct for current */ diff --git a/arch/arm64/kernel/fpsimd.c b/arch/arm64/kernel/fpsimd.c index b13a9b4dd13b..87a35364e750 100644 --- a/arch/arm64/kernel/fpsimd.c +++ b/arch/arm64/kernel/fpsimd.c @@ -224,7 +224,7 @@ static void sve_user_enable(void) * sets TIF_SVE. * * When stored, FPSIMD registers V0-V31 are encoded in - * task->fpsimd_state; bits [max : 128] for each of Z0-Z31 are + * task->thread.uw.fpsimd_state; bits [max : 128] for each of Z0-Z31 are * logically zero but not stored anywhere; P0-P15 and FFR are not * stored and have unspecified values from userspace's point of * view. For hygiene purposes, the kernel zeroes them on next use, @@ -233,9 +233,9 @@ static void sve_user_enable(void) * task->thread.sve_state does not need to be non-NULL, valid or any * particular size: it must not be dereferenced. * - * * FPSR and FPCR are always stored in task->fpsimd_state irrespctive of - * whether TIF_SVE is clear or set, since these are not vector length - * dependent. + * * FPSR and FPCR are always stored in task->thread.uw.fpsimd_state + * irrespective of whether TIF_SVE is clear or set, since these are + * not vector length dependent. */ /* @@ -253,10 +253,10 @@ static void task_fpsimd_load(void) if (system_supports_sve() && test_thread_flag(TIF_SVE)) sve_load_state(sve_pffr(current), - ¤t->thread.fpsimd_state.fpsr, + ¤t->thread.uw.fpsimd_state.fpsr, sve_vq_from_vl(current->thread.sve_vl) - 1); else - fpsimd_load_state(¤t->thread.fpsimd_state); + fpsimd_load_state(¤t->thread.uw.fpsimd_state); if (system_supports_sve()) { /* Toggle SVE trapping for userspace if needed */ @@ -292,9 +292,9 @@ static void task_fpsimd_save(void) } sve_save_state(sve_pffr(current), - ¤t->thread.fpsimd_state.fpsr); + ¤t->thread.uw.fpsimd_state.fpsr); } else - fpsimd_save_state(¤t->thread.fpsimd_state); + fpsimd_save_state(¤t->thread.uw.fpsimd_state); } } @@ -405,20 +405,21 @@ static int __init sve_sysctl_init(void) { return 0; } (SVE_SIG_ZREG_OFFSET(vq, n) - SVE_SIG_REGS_OFFSET)) /* - * Transfer the FPSIMD state in task->thread.fpsimd_state to + * Transfer the FPSIMD state in task->thread.uw.fpsimd_state to * task->thread.sve_state. * * Task can be a non-runnable task, or current. In the latter case, * softirqs (and preemption) must be disabled. * task->thread.sve_state must point to at least sve_state_size(task) * bytes of allocated kernel memory. - * task->thread.fpsimd_state must be up to date before calling this function. + * task->thread.uw.fpsimd_state must be up to date before calling this + * function. */ static void fpsimd_to_sve(struct task_struct *task) { unsigned int vq; void *sst = task->thread.sve_state; - struct user_fpsimd_state const *fst = &task->thread.fpsimd_state; + struct user_fpsimd_state const *fst = &task->thread.uw.fpsimd_state; unsigned int i; if (!system_supports_sve()) @@ -432,7 +433,7 @@ static void fpsimd_to_sve(struct task_struct *task) /* * Transfer the SVE state in task->thread.sve_state to - * task->thread.fpsimd_state. + * task->thread.uw.fpsimd_state. * * Task can be a non-runnable task, or current. In the latter case, * softirqs (and preemption) must be disabled. @@ -444,7 +445,7 @@ static void sve_to_fpsimd(struct task_struct *task) { unsigned int vq; void const *sst = task->thread.sve_state; - struct user_fpsimd_state *fst = &task->thread.fpsimd_state; + struct user_fpsimd_state *fst = &task->thread.uw.fpsimd_state; unsigned int i; if (!system_supports_sve()) @@ -511,7 +512,7 @@ void fpsimd_sync_to_sve(struct task_struct *task) } /* - * Ensure that task->thread.fpsimd_state is up to date with respect to + * Ensure that task->thread.uw.fpsimd_state is up to date with respect to * the user task, irrespective of whether SVE is in use or not. * * This should only be called by ptrace. task must be non-runnable. @@ -526,21 +527,21 @@ void sve_sync_to_fpsimd(struct task_struct *task) /* * Ensure that task->thread.sve_state is up to date with respect to - * the task->thread.fpsimd_state. + * the task->thread.uw.fpsimd_state. * * This should only be called by ptrace to merge new FPSIMD register * values into a task for which SVE is currently active. * task must be non-runnable. * task->thread.sve_state must point to at least sve_state_size(task) * bytes of allocated kernel memory. - * task->thread.fpsimd_state must already have been initialised with + * task->thread.uw.fpsimd_state must already have been initialised with * the new FPSIMD register values to be merged in. */ void sve_sync_from_fpsimd_zeropad(struct task_struct *task) { unsigned int vq; void *sst = task->thread.sve_state; - struct user_fpsimd_state const *fst = &task->thread.fpsimd_state; + struct user_fpsimd_state const *fst = &task->thread.uw.fpsimd_state; unsigned int i; if (!test_tsk_thread_flag(task, TIF_SVE)) @@ -910,7 +911,7 @@ void fpsimd_thread_switch(struct task_struct *next) * upon the next return to userland. */ if (__this_cpu_read(fpsimd_last_state.st) == - &next->thread.fpsimd_state + &next->thread.uw.fpsimd_state && next->thread.fpsimd_cpu == smp_processor_id()) clear_tsk_thread_flag(next, TIF_FOREIGN_FPSTATE); else @@ -927,8 +928,8 @@ void fpsimd_flush_thread(void) local_bh_disable(); - memset(¤t->thread.fpsimd_state, 0, - sizeof(current->thread.fpsimd_state)); + memset(¤t->thread.uw.fpsimd_state, 0, + sizeof(current->thread.uw.fpsimd_state)); fpsimd_flush_task_state(current); if (system_supports_sve()) { @@ -987,7 +988,7 @@ void fpsimd_preserve_current_state(void) /* * Like fpsimd_preserve_current_state(), but ensure that - * current->thread.fpsimd_state is updated so that it can be copied to + * current->thread.uw.fpsimd_state is updated so that it can be copied to * the signal frame. */ void fpsimd_signal_preserve_current_state(void) @@ -1006,7 +1007,7 @@ static void fpsimd_bind_to_cpu(void) struct fpsimd_last_state_struct *last = this_cpu_ptr(&fpsimd_last_state); - last->st = ¤t->thread.fpsimd_state; + last->st = ¤t->thread.uw.fpsimd_state; last->sve_in_use = test_thread_flag(TIF_SVE); current->thread.fpsimd_cpu = smp_processor_id(); } @@ -1043,7 +1044,7 @@ void fpsimd_update_current_state(struct user_fpsimd_state const *state) local_bh_disable(); - current->thread.fpsimd_state = *state; + current->thread.uw.fpsimd_state = *state; if (system_supports_sve() && test_thread_flag(TIF_SVE)) fpsimd_to_sve(current); diff --git a/arch/arm64/kernel/process.c b/arch/arm64/kernel/process.c index c0da6efe5465..f08a2ed9db0d 100644 --- a/arch/arm64/kernel/process.c +++ b/arch/arm64/kernel/process.c @@ -257,7 +257,7 @@ static void tls_thread_flush(void) write_sysreg(0, tpidr_el0); if (is_compat_task()) { - current->thread.tp_value = 0; + current->thread.uw.tp_value = 0; /* * We need to ensure ordering between the shadow state and the @@ -351,7 +351,7 @@ int copy_thread(unsigned long clone_flags, unsigned long stack_start, * for the new thread. */ if (clone_flags & CLONE_SETTLS) - p->thread.tp_value = childregs->regs[3]; + p->thread.uw.tp_value = childregs->regs[3]; } else { memset(childregs, 0, sizeof(struct pt_regs)); childregs->pstate = PSR_MODE_EL1h; @@ -379,7 +379,7 @@ static void tls_thread_switch(struct task_struct *next) tls_preserve_current_state(); if (is_compat_thread(task_thread_info(next))) - write_sysreg(next->thread.tp_value, tpidrro_el0); + write_sysreg(next->thread.uw.tp_value, tpidrro_el0); else if (!arm64_kernel_unmapped_at_el0()) write_sysreg(0, tpidrro_el0); diff --git a/arch/arm64/kernel/ptrace.c b/arch/arm64/kernel/ptrace.c index fd9e8ed22b70..71d99af24ef2 100644 --- a/arch/arm64/kernel/ptrace.c +++ b/arch/arm64/kernel/ptrace.c @@ -629,7 +629,7 @@ static int __fpr_get(struct task_struct *target, sve_sync_to_fpsimd(target); - uregs = &target->thread.fpsimd_state; + uregs = &target->thread.uw.fpsimd_state; return user_regset_copyout(&pos, &count, &kbuf, &ubuf, uregs, start_pos, start_pos + sizeof(*uregs)); @@ -655,19 +655,19 @@ static int __fpr_set(struct task_struct *target, struct user_fpsimd_state newstate; /* - * Ensure target->thread.fpsimd_state is up to date, so that a + * Ensure target->thread.uw.fpsimd_state is up to date, so that a * short copyin can't resurrect stale data. */ sve_sync_to_fpsimd(target); - newstate = target->thread.fpsimd_state; + newstate = target->thread.uw.fpsimd_state; ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf, &newstate, start_pos, start_pos + sizeof(newstate)); if (ret) return ret; - target->thread.fpsimd_state = newstate; + target->thread.uw.fpsimd_state = newstate; return ret; } @@ -692,7 +692,7 @@ static int tls_get(struct task_struct *target, const struct user_regset *regset, unsigned int pos, unsigned int count, void *kbuf, void __user *ubuf) { - unsigned long *tls = &target->thread.tp_value; + unsigned long *tls = &target->thread.uw.tp_value; if (target == current) tls_preserve_current_state(); @@ -705,13 +705,13 @@ static int tls_set(struct task_struct *target, const struct user_regset *regset, const void *kbuf, const void __user *ubuf) { int ret; - unsigned long tls = target->thread.tp_value; + unsigned long tls = target->thread.uw.tp_value; ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf, &tls, 0, -1); if (ret) return ret; - target->thread.tp_value = tls; + target->thread.uw.tp_value = tls; return ret; } @@ -842,7 +842,7 @@ static int sve_get(struct task_struct *target, start = end; end = SVE_PT_SVE_FPCR_OFFSET(vq) + SVE_PT_SVE_FPCR_SIZE; ret = user_regset_copyout(&pos, &count, &kbuf, &ubuf, - &target->thread.fpsimd_state.fpsr, + &target->thread.uw.fpsimd_state.fpsr, start, end); if (ret) return ret; @@ -941,7 +941,7 @@ static int sve_set(struct task_struct *target, start = end; end = SVE_PT_SVE_FPCR_OFFSET(vq) + SVE_PT_SVE_FPCR_SIZE; ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf, - &target->thread.fpsimd_state.fpsr, + &target->thread.uw.fpsimd_state.fpsr, start, end); out: @@ -1169,7 +1169,7 @@ static int compat_vfp_get(struct task_struct *target, compat_ulong_t fpscr; int ret, vregs_end_pos; - uregs = &target->thread.fpsimd_state; + uregs = &target->thread.uw.fpsimd_state; if (target == current) fpsimd_preserve_current_state(); @@ -1202,7 +1202,7 @@ static int compat_vfp_set(struct task_struct *target, compat_ulong_t fpscr; int ret, vregs_end_pos; - uregs = &target->thread.fpsimd_state; + uregs = &target->thread.uw.fpsimd_state; vregs_end_pos = VFP_STATE_SIZE - sizeof(compat_ulong_t); ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf, uregs, 0, @@ -1225,7 +1225,7 @@ static int compat_tls_get(struct task_struct *target, const struct user_regset *regset, unsigned int pos, unsigned int count, void *kbuf, void __user *ubuf) { - compat_ulong_t tls = (compat_ulong_t)target->thread.tp_value; + compat_ulong_t tls = (compat_ulong_t)target->thread.uw.tp_value; return user_regset_copyout(&pos, &count, &kbuf, &ubuf, &tls, 0, -1); } @@ -1235,13 +1235,13 @@ static int compat_tls_set(struct task_struct *target, const void __user *ubuf) { int ret; - compat_ulong_t tls = target->thread.tp_value; + compat_ulong_t tls = target->thread.uw.tp_value; ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf, &tls, 0, -1); if (ret) return ret; - target->thread.tp_value = tls; + target->thread.uw.tp_value = tls; return ret; } @@ -1538,7 +1538,7 @@ long compat_arch_ptrace(struct task_struct *child, compat_long_t request, break; case COMPAT_PTRACE_GET_THREAD_AREA: - ret = put_user((compat_ulong_t)child->thread.tp_value, + ret = put_user((compat_ulong_t)child->thread.uw.tp_value, (compat_ulong_t __user *)datap); break; diff --git a/arch/arm64/kernel/signal.c b/arch/arm64/kernel/signal.c index bfeee9a51cee..154b7d30145d 100644 --- a/arch/arm64/kernel/signal.c +++ b/arch/arm64/kernel/signal.c @@ -179,7 +179,8 @@ static void __user *apply_user_offset( static int preserve_fpsimd_context(struct fpsimd_context __user *ctx) { - struct user_fpsimd_state const *fpsimd = ¤t->thread.fpsimd_state; + struct user_fpsimd_state const *fpsimd = + ¤t->thread.uw.fpsimd_state; int err; /* copy the FP and status/control registers */ diff --git a/arch/arm64/kernel/signal32.c b/arch/arm64/kernel/signal32.c index 374333703cbd..77b91f478995 100644 --- a/arch/arm64/kernel/signal32.c +++ b/arch/arm64/kernel/signal32.c @@ -149,7 +149,8 @@ union __fpsimd_vreg { static int compat_preserve_vfp_context(struct compat_vfp_sigframe __user *frame) { - struct user_fpsimd_state const *fpsimd = ¤t->thread.fpsimd_state; + struct user_fpsimd_state const *fpsimd = + ¤t->thread.uw.fpsimd_state; compat_ulong_t magic = VFP_MAGIC; compat_ulong_t size = VFP_STORAGE_SIZE; compat_ulong_t fpscr, fpexc; diff --git a/arch/arm64/kernel/sys_compat.c b/arch/arm64/kernel/sys_compat.c index 9897f416b29e..93ab57dcfc14 100644 --- a/arch/arm64/kernel/sys_compat.c +++ b/arch/arm64/kernel/sys_compat.c @@ -90,7 +90,7 @@ long compat_arm_syscall(struct pt_regs *regs) return do_compat_cache_op(regs->regs[0], regs->regs[1], regs->regs[2]); case __ARM_NR_compat_set_tls: - current->thread.tp_value = regs->regs[0]; + current->thread.uw.tp_value = regs->regs[0]; /* * Protect against register corruption from context switch. -- cgit v1.2.3-59-g8ed1b