From 0e455d8e80aa22d7ed42c9f5e4e3a75c558aa543 Mon Sep 17 00:00:00 2001 From: Robin Murphy Date: Tue, 31 May 2016 18:04:40 +0100 Subject: arm64: Implement optimised IP checksum helpers AArch64 is capable of 128-bit memory accesses without alignment restrictions, which makes it both possible and highly practical to slurp up a typical 20-byte IP header in just 2 loads. Implement our own version of ip_fast_checksum() to take advantage of that, resulting in considerably fewer instructions and memory accesses than the generic version. We can also get more optimal code generation for csum_fold() by defining it a slightly different way round from the generic version, so throw that into the mix too. Suggested-by: Luke Starrett Acked-by: Luke Starrett Signed-off-by: Robin Murphy Signed-off-by: Catalin Marinas --- arch/arm64/include/asm/Kbuild | 1 - arch/arm64/include/asm/checksum.h | 51 +++++++++++++++++++++++++++++++++++++++ 2 files changed, 51 insertions(+), 1 deletion(-) create mode 100644 arch/arm64/include/asm/checksum.h (limited to 'arch') diff --git a/arch/arm64/include/asm/Kbuild b/arch/arm64/include/asm/Kbuild index cff532a6744e..f43d2c44c765 100644 --- a/arch/arm64/include/asm/Kbuild +++ b/arch/arm64/include/asm/Kbuild @@ -1,6 +1,5 @@ generic-y += bug.h generic-y += bugs.h -generic-y += checksum.h generic-y += clkdev.h generic-y += cputime.h generic-y += current.h diff --git a/arch/arm64/include/asm/checksum.h b/arch/arm64/include/asm/checksum.h new file mode 100644 index 000000000000..09f65339d66d --- /dev/null +++ b/arch/arm64/include/asm/checksum.h @@ -0,0 +1,51 @@ +/* + * Copyright (C) 2016 ARM Ltd. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see . + */ +#ifndef __ASM_CHECKSUM_H +#define __ASM_CHECKSUM_H + +#include + +static inline __sum16 csum_fold(__wsum csum) +{ + u32 sum = (__force u32)csum; + sum += (sum >> 16) | (sum << 16); + return ~(__force __sum16)(sum >> 16); +} +#define csum_fold csum_fold + +static inline __sum16 ip_fast_csum(const void *iph, unsigned int ihl) +{ + __uint128_t tmp; + u64 sum; + + tmp = *(const __uint128_t *)iph; + iph += 16; + ihl -= 4; + tmp += ((tmp >> 64) | (tmp << 64)); + sum = tmp >> 64; + do { + sum += *(const u32 *)iph; + iph += 4; + } while (--ihl); + + sum += ((sum >> 32) | (sum << 32)); + return csum_fold(sum >> 32); +} +#define ip_fast_csum ip_fast_csum + +#include + +#endif /* __ASM_CHECKSUM_H */ -- cgit v1.2.3-59-g8ed1b From 4674fdb9f1493f12a15c5f2158f260060b58953e Mon Sep 17 00:00:00 2001 From: Mark Rutland Date: Tue, 31 May 2016 14:49:01 +0100 Subject: arm64: mm: dump: make page table dumping reusable For debugging purposes, it would be nice if we could export page tables other than the swapper_pg_dir to userspace. To enable this, this patch refactors the arm64 page table dumping code such that multiple tables may be registered with the framework, and exported under debugfs. Signed-off-by: Mark Rutland Signed-off-by: Ard Biesheuvel Cc: Laura Abbott Cc: Will Deacon Signed-off-by: Catalin Marinas --- arch/arm64/include/asm/ptdump.h | 44 +++++++++++++++++++++++++++++++++++++++++ arch/arm64/mm/dump.c | 32 +++++++++++++++++++----------- 2 files changed, 64 insertions(+), 12 deletions(-) create mode 100644 arch/arm64/include/asm/ptdump.h (limited to 'arch') diff --git a/arch/arm64/include/asm/ptdump.h b/arch/arm64/include/asm/ptdump.h new file mode 100644 index 000000000000..07b8ed037dee --- /dev/null +++ b/arch/arm64/include/asm/ptdump.h @@ -0,0 +1,44 @@ +/* + * Copyright (C) 2014 ARM Ltd. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see . + */ +#ifndef __ASM_PTDUMP_H +#define __ASM_PTDUMP_H + +#ifdef CONFIG_ARM64_PTDUMP + +#include + +struct addr_marker { + unsigned long start_address; + char *name; +}; + +struct ptdump_info { + struct mm_struct *mm; + const struct addr_marker *markers; + unsigned long base_addr; + unsigned long max_addr; +}; + +int ptdump_register(struct ptdump_info *info, const char *name); + +#else +static inline int ptdump_register(struct ptdump_info *info, const char *name) +{ + return 0; +} +#endif /* CONFIG_ARM64_PTDUMP */ + +#endif /* __ASM_PTDUMP_H */ diff --git a/arch/arm64/mm/dump.c b/arch/arm64/mm/dump.c index ccfde237d6e6..f94b80eb295d 100644 --- a/arch/arm64/mm/dump.c +++ b/arch/arm64/mm/dump.c @@ -27,11 +27,7 @@ #include #include #include - -struct addr_marker { - unsigned long start_address; - const char *name; -}; +#include static const struct addr_marker address_markers[] = { #ifdef CONFIG_KASAN @@ -290,7 +286,8 @@ static void walk_pud(struct pg_state *st, pgd_t *pgd, unsigned long start) } } -static void walk_pgd(struct pg_state *st, struct mm_struct *mm, unsigned long start) +static void walk_pgd(struct pg_state *st, struct mm_struct *mm, + unsigned long start) { pgd_t *pgd = pgd_offset(mm, 0UL); unsigned i; @@ -309,12 +306,13 @@ static void walk_pgd(struct pg_state *st, struct mm_struct *mm, unsigned long st static int ptdump_show(struct seq_file *m, void *v) { + struct ptdump_info *info = m->private; struct pg_state st = { .seq = m, - .marker = address_markers, + .marker = info->markers, }; - walk_pgd(&st, &init_mm, VA_START); + walk_pgd(&st, info->mm, info->base_addr); note_page(&st, 0, 0, 0); return 0; @@ -322,7 +320,7 @@ static int ptdump_show(struct seq_file *m, void *v) static int ptdump_open(struct inode *inode, struct file *file) { - return single_open(file, ptdump_show, NULL); + return single_open(file, ptdump_show, inode->i_private); } static const struct file_operations ptdump_fops = { @@ -332,7 +330,7 @@ static const struct file_operations ptdump_fops = { .release = single_release, }; -static int ptdump_init(void) +int ptdump_register(struct ptdump_info *info, const char *name) { struct dentry *pe; unsigned i, j; @@ -342,8 +340,18 @@ static int ptdump_init(void) for (j = 0; j < pg_level[i].num; j++) pg_level[i].mask |= pg_level[i].bits[j].mask; - pe = debugfs_create_file("kernel_page_tables", 0400, NULL, NULL, - &ptdump_fops); + pe = debugfs_create_file(name, 0400, NULL, info, &ptdump_fops); return pe ? 0 : -ENOMEM; } + +static struct ptdump_info kernel_ptdump_info = { + .mm = &init_mm, + .markers = address_markers, + .base_addr = VA_START, +}; + +static int ptdump_init(void) +{ + return ptdump_register(&kernel_ptdump_info, "kernel_page_tables"); +} device_initcall(ptdump_init); -- cgit v1.2.3-59-g8ed1b From bffe1baff5d57521b0c41b6997c41ff1993e9818 Mon Sep 17 00:00:00 2001 From: Yang Shi Date: Wed, 8 Jun 2016 14:40:56 -0700 Subject: arm64: kasan: instrument user memory access API The upstream commit 1771c6e1a567ea0ba2cccc0a4ffe68a1419fd8ef ("x86/kasan: instrument user memory access API") added KASAN instrument to x86 user memory access API, so added such instrument to ARM64 too. Define __copy_to/from_user in C in order to add kasan_check_read/write call, rename assembly implementation to __arch_copy_to/from_user. Tested by test_kasan module. Acked-by: Andrey Ryabinin Reviewed-by: Mark Rutland Tested-by: Mark Rutland Signed-off-by: Yang Shi Signed-off-by: Catalin Marinas --- arch/arm64/include/asm/uaccess.h | 25 +++++++++++++++++++++---- arch/arm64/kernel/arm64ksyms.c | 4 ++-- arch/arm64/lib/copy_from_user.S | 4 ++-- arch/arm64/lib/copy_to_user.S | 4 ++-- 4 files changed, 27 insertions(+), 10 deletions(-) (limited to 'arch') diff --git a/arch/arm64/include/asm/uaccess.h b/arch/arm64/include/asm/uaccess.h index 9e397a542756..5e834d10b291 100644 --- a/arch/arm64/include/asm/uaccess.h +++ b/arch/arm64/include/asm/uaccess.h @@ -21,6 +21,7 @@ /* * User space memory access functions */ +#include #include #include @@ -256,15 +257,29 @@ do { \ -EFAULT; \ }) -extern unsigned long __must_check __copy_from_user(void *to, const void __user *from, unsigned long n); -extern unsigned long __must_check __copy_to_user(void __user *to, const void *from, unsigned long n); +extern unsigned long __must_check __arch_copy_from_user(void *to, const void __user *from, unsigned long n); +extern unsigned long __must_check __arch_copy_to_user(void __user *to, const void *from, unsigned long n); extern unsigned long __must_check __copy_in_user(void __user *to, const void __user *from, unsigned long n); extern unsigned long __must_check __clear_user(void __user *addr, unsigned long n); +static inline unsigned long __must_check __copy_from_user(void *to, const void __user *from, unsigned long n) +{ + kasan_check_write(to, n); + return __arch_copy_from_user(to, from, n); +} + +static inline unsigned long __must_check __copy_to_user(void __user *to, const void *from, unsigned long n) +{ + kasan_check_read(from, n); + return __arch_copy_to_user(to, from, n); +} + static inline unsigned long __must_check copy_from_user(void *to, const void __user *from, unsigned long n) { + kasan_check_write(to, n); + if (access_ok(VERIFY_READ, from, n)) - n = __copy_from_user(to, from, n); + n = __arch_copy_from_user(to, from, n); else /* security hole - plug it */ memset(to, 0, n); return n; @@ -272,8 +287,10 @@ static inline unsigned long __must_check copy_from_user(void *to, const void __u static inline unsigned long __must_check copy_to_user(void __user *to, const void *from, unsigned long n) { + kasan_check_read(from, n); + if (access_ok(VERIFY_WRITE, to, n)) - n = __copy_to_user(to, from, n); + n = __arch_copy_to_user(to, from, n); return n; } diff --git a/arch/arm64/kernel/arm64ksyms.c b/arch/arm64/kernel/arm64ksyms.c index 678f30b05a45..2dc44406a7ad 100644 --- a/arch/arm64/kernel/arm64ksyms.c +++ b/arch/arm64/kernel/arm64ksyms.c @@ -34,8 +34,8 @@ EXPORT_SYMBOL(copy_page); EXPORT_SYMBOL(clear_page); /* user mem (segment) */ -EXPORT_SYMBOL(__copy_from_user); -EXPORT_SYMBOL(__copy_to_user); +EXPORT_SYMBOL(__arch_copy_from_user); +EXPORT_SYMBOL(__arch_copy_to_user); EXPORT_SYMBOL(__clear_user); EXPORT_SYMBOL(__copy_in_user); diff --git a/arch/arm64/lib/copy_from_user.S b/arch/arm64/lib/copy_from_user.S index 17e8306dca29..0b90497d4424 100644 --- a/arch/arm64/lib/copy_from_user.S +++ b/arch/arm64/lib/copy_from_user.S @@ -66,7 +66,7 @@ .endm end .req x5 -ENTRY(__copy_from_user) +ENTRY(__arch_copy_from_user) ALTERNATIVE("nop", __stringify(SET_PSTATE_PAN(0)), ARM64_ALT_PAN_NOT_UAO, \ CONFIG_ARM64_PAN) add end, x0, x2 @@ -75,7 +75,7 @@ ALTERNATIVE("nop", __stringify(SET_PSTATE_PAN(1)), ARM64_ALT_PAN_NOT_UAO, \ CONFIG_ARM64_PAN) mov x0, #0 // Nothing to copy ret -ENDPROC(__copy_from_user) +ENDPROC(__arch_copy_from_user) .section .fixup,"ax" .align 2 diff --git a/arch/arm64/lib/copy_to_user.S b/arch/arm64/lib/copy_to_user.S index 21faae60f988..7a7efe255034 100644 --- a/arch/arm64/lib/copy_to_user.S +++ b/arch/arm64/lib/copy_to_user.S @@ -65,7 +65,7 @@ .endm end .req x5 -ENTRY(__copy_to_user) +ENTRY(__arch_copy_to_user) ALTERNATIVE("nop", __stringify(SET_PSTATE_PAN(0)), ARM64_ALT_PAN_NOT_UAO, \ CONFIG_ARM64_PAN) add end, x0, x2 @@ -74,7 +74,7 @@ ALTERNATIVE("nop", __stringify(SET_PSTATE_PAN(1)), ARM64_ALT_PAN_NOT_UAO, \ CONFIG_ARM64_PAN) mov x0, #0 ret -ENDPROC(__copy_to_user) +ENDPROC(__arch_copy_to_user) .section .fixup,"ax" .align 2 -- cgit v1.2.3-59-g8ed1b From 7ceb3a1040524c32aa71df8807427e348fac49da Mon Sep 17 00:00:00 2001 From: Mark Rutland Date: Mon, 13 Jun 2016 11:15:15 +0100 Subject: arm64: simplify dump_mem Currently dump_mem attempts to dump memory in 64-bit chunks when reporting a failure in 64-bit code, or 32-bit chunks when reporting a failure in 32-bit code. We added code to handle these two cases separately in commit e147ae6d7f908412 ("arm64: modify the dump mem for 64 bit addresses"). However, in all cases dump_mem is called, the failing context is a kernel rather than user context. Additionally dump_mem is assumed to only be used for kernel contexts, as internally it switches to KERNEL_DS, and its callers pass kernel stack bounds. This patch removes the redundant 32-bit chunk logic and associated compat parameter, largely reverting the aforementioned commit. For the call in __die(), the check of in_interrupt() is removed also, as __die() is only called in response to faults from the kernel's exception level, and thus the !user_mode(regs) check is sufficient. Were this not the case, the used of task_stack_page(tsk) to generate the stack bounds would be erroneous. Signed-off-by: Mark Rutland Cc: Will Deacon Signed-off-by: Catalin Marinas --- arch/arm64/kernel/traps.c | 31 +++++++++++-------------------- 1 file changed, 11 insertions(+), 20 deletions(-) (limited to 'arch') diff --git a/arch/arm64/kernel/traps.c b/arch/arm64/kernel/traps.c index 2a43012616b7..d9da2c56e4f8 100644 --- a/arch/arm64/kernel/traps.c +++ b/arch/arm64/kernel/traps.c @@ -52,15 +52,14 @@ static const char *handler[]= { int show_unhandled_signals = 1; /* - * Dump out the contents of some memory nicely... + * Dump out the contents of some kernel memory nicely... */ static void dump_mem(const char *lvl, const char *str, unsigned long bottom, - unsigned long top, bool compat) + unsigned long top) { unsigned long first; mm_segment_t fs; int i; - unsigned int width = compat ? 4 : 8; /* * We need to switch to kernel mode so that we can use __get_user @@ -78,22 +77,15 @@ static void dump_mem(const char *lvl, const char *str, unsigned long bottom, memset(str, ' ', sizeof(str)); str[sizeof(str) - 1] = '\0'; - for (p = first, i = 0; i < (32 / width) - && p < top; i++, p += width) { + for (p = first, i = 0; i < (32 / 8) + && p < top; i++, p += 8) { if (p >= bottom && p < top) { unsigned long val; - if (width == 8) { - if (__get_user(val, (unsigned long *)p) == 0) - sprintf(str + i * 17, " %016lx", val); - else - sprintf(str + i * 17, " ????????????????"); - } else { - if (__get_user(val, (unsigned int *)p) == 0) - sprintf(str + i * 9, " %08lx", val); - else - sprintf(str + i * 9, " ????????"); - } + if (__get_user(val, (unsigned long *)p) == 0) + sprintf(str + i * 17, " %016lx", val); + else + sprintf(str + i * 17, " ????????????????"); } } printk("%s%04lx:%s\n", lvl, first & 0xffff, str); @@ -216,7 +208,7 @@ static void dump_backtrace(struct pt_regs *regs, struct task_struct *tsk) stack = IRQ_STACK_TO_TASK_STACK(irq_stack_ptr); dump_mem("", "Exception stack", stack, - stack + sizeof(struct pt_regs), false); + stack + sizeof(struct pt_regs)); } } } @@ -254,10 +246,9 @@ static int __die(const char *str, int err, struct thread_info *thread, pr_emerg("Process %.*s (pid: %d, stack limit = 0x%p)\n", TASK_COMM_LEN, tsk->comm, task_pid_nr(tsk), thread + 1); - if (!user_mode(regs) || in_interrupt()) { + if (!user_mode(regs)) { dump_mem(KERN_EMERG, "Stack: ", regs->sp, - THREAD_SIZE + (unsigned long)task_stack_page(tsk), - compat_user_mode(regs)); + THREAD_SIZE + (unsigned long)task_stack_page(tsk)); dump_backtrace(regs, tsk); dump_instr(KERN_EMERG, regs); } -- cgit v1.2.3-59-g8ed1b From 6b14c517a2a866c8407e9864c1cd3fcc6fed55ab Mon Sep 17 00:00:00 2001 From: Kefeng Wang Date: Tue, 21 Jun 2016 14:55:23 +0800 Subject: arm64: Add support ARCH_SUPPORTS_INT128 The gcc support __SIZEOF_INT128__ and __int128 in arm64, thus, enable ARCH_SUPPORTS_INT128 to make mul_u64_u32_shr() a bit more efficient in scheduler. Signed-off-by: Kefeng Wang Signed-off-by: Catalin Marinas --- arch/arm64/Kconfig | 1 + 1 file changed, 1 insertion(+) (limited to 'arch') diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig index 5a0a691d4220..ab319eb750d3 100644 --- a/arch/arm64/Kconfig +++ b/arch/arm64/Kconfig @@ -11,6 +11,7 @@ config ARM64 select ARCH_HAS_TICK_BROADCAST if GENERIC_CLOCKEVENTS_BROADCAST select ARCH_USE_CMPXCHG_LOCKREF select ARCH_SUPPORTS_ATOMIC_RMW + select ARCH_SUPPORTS_INT128 select ARCH_SUPPORTS_NUMA_BALANCING select ARCH_WANT_OPTIONAL_GPIOLIB select ARCH_WANT_COMPAT_IPC_PARSE_VERSION -- cgit v1.2.3-59-g8ed1b From 5e4c7549f7082b06bbba566c68696dbb8d2e5b6b Mon Sep 17 00:00:00 2001 From: Alexander Potapenko Date: Thu, 16 Jun 2016 18:39:52 +0200 Subject: arm64: allow building with kcov coverage on ARM64 Add ARCH_HAS_KCOV to ARM64 config. To avoid potential crashes, disable instrumentation of the files in arch/arm64/kvm/hyp/*. Signed-off-by: Alexander Potapenko Acked-by: Mark Rutland Acked-by: Marc Zyngier Tested-by: James Morse Signed-off-by: Catalin Marinas --- arch/arm64/Kconfig | 1 + arch/arm64/kvm/hyp/Makefile | 4 ++++ 2 files changed, 5 insertions(+) (limited to 'arch') diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig index ab319eb750d3..73362f52d186 100644 --- a/arch/arm64/Kconfig +++ b/arch/arm64/Kconfig @@ -7,6 +7,7 @@ config ARM64 select ARCH_HAS_ATOMIC64_DEC_IF_POSITIVE select ARCH_HAS_ELF_RANDOMIZE select ARCH_HAS_GCOV_PROFILE_ALL + select ARCH_HAS_KCOV select ARCH_HAS_SG_CHAIN select ARCH_HAS_TICK_BROADCAST if GENERIC_CLOCKEVENTS_BROADCAST select ARCH_USE_CMPXCHG_LOCKREF diff --git a/arch/arm64/kvm/hyp/Makefile b/arch/arm64/kvm/hyp/Makefile index 778d0effa2af..0c85febcc1eb 100644 --- a/arch/arm64/kvm/hyp/Makefile +++ b/arch/arm64/kvm/hyp/Makefile @@ -17,6 +17,10 @@ obj-$(CONFIG_KVM_ARM_HOST) += tlb.o obj-$(CONFIG_KVM_ARM_HOST) += hyp-entry.o obj-$(CONFIG_KVM_ARM_HOST) += s2-setup.o +# KVM code is run at a different exception code with a different map, so +# compiler instrumentation that inserts callbacks or checks into the code may +# cause crashes. Just disable it. GCOV_PROFILE := n KASAN_SANITIZE := n UBSAN_SANITIZE := n +KCOV_INSTRUMENT := n -- cgit v1.2.3-59-g8ed1b From b67a8b29df7e6410c605c2759707c96512b15578 Mon Sep 17 00:00:00 2001 From: Jisheng Zhang Date: Wed, 8 Jun 2016 15:53:46 +0800 Subject: arm64: mm: only initialize swiotlb when necessary we only initialize swiotlb when swiotlb_force is true or not all system memory is DMA-able, this trivial optimization saves us 64MB when swiotlb is not necessary. Signed-off-by: Jisheng Zhang Signed-off-by: Catalin Marinas --- arch/arm64/mm/dma-mapping.c | 15 ++++++++++++++- arch/arm64/mm/init.c | 3 ++- 2 files changed, 16 insertions(+), 2 deletions(-) (limited to 'arch') diff --git a/arch/arm64/mm/dma-mapping.c b/arch/arm64/mm/dma-mapping.c index c566ec83719f..46a4157adc17 100644 --- a/arch/arm64/mm/dma-mapping.c +++ b/arch/arm64/mm/dma-mapping.c @@ -19,6 +19,7 @@ #include #include +#include #include #include #include @@ -29,6 +30,8 @@ #include +static int swiotlb __read_mostly; + static pgprot_t __get_dma_pgprot(struct dma_attrs *attrs, pgprot_t prot, bool coherent) { @@ -341,6 +344,13 @@ static int __swiotlb_get_sgtable(struct device *dev, struct sg_table *sgt, return ret; } +static int __swiotlb_dma_supported(struct device *hwdev, u64 mask) +{ + if (swiotlb) + return swiotlb_dma_supported(hwdev, mask); + return 1; +} + static struct dma_map_ops swiotlb_dma_ops = { .alloc = __dma_alloc, .free = __dma_free, @@ -354,7 +364,7 @@ static struct dma_map_ops swiotlb_dma_ops = { .sync_single_for_device = __swiotlb_sync_single_for_device, .sync_sg_for_cpu = __swiotlb_sync_sg_for_cpu, .sync_sg_for_device = __swiotlb_sync_sg_for_device, - .dma_supported = swiotlb_dma_supported, + .dma_supported = __swiotlb_dma_supported, .mapping_error = swiotlb_dma_mapping_error, }; @@ -513,6 +523,9 @@ EXPORT_SYMBOL(dummy_dma_ops); static int __init arm64_dma_init(void) { + if (swiotlb_force || max_pfn > (arm64_dma_phys_limit >> PAGE_SHIFT)) + swiotlb = 1; + return atomic_pool_init(); } arch_initcall(arm64_dma_init); diff --git a/arch/arm64/mm/init.c b/arch/arm64/mm/init.c index d45f8627012c..7d25b4d00677 100644 --- a/arch/arm64/mm/init.c +++ b/arch/arm64/mm/init.c @@ -403,7 +403,8 @@ static void __init free_unused_memmap(void) */ void __init mem_init(void) { - swiotlb_init(1); + if (swiotlb_force || max_pfn > (arm64_dma_phys_limit >> PAGE_SHIFT)) + swiotlb_init(1); set_max_mapnr(pfn_to_page(max_pfn) - mem_map); -- cgit v1.2.3-59-g8ed1b From 275f344bec51e9100bae81f3cc8c6940bbfb24c0 Mon Sep 17 00:00:00 2001 From: Mark Rutland Date: Tue, 31 May 2016 12:33:01 +0100 Subject: arm64: add macro to extract ESR_ELx.EC Several places open-code extraction of the EC field from an ESR_ELx value, in subtly different ways. This is unfortunate duplication and variation, and the precise logic used to extract the field is a distraction. This patch adds a new macro, ESR_ELx_EC(), to extract the EC field from an ESR_ELx value in a consistent fashion. Existing open-coded extractions in core arm64 code are moved over to the new helper. KVM code is left as-is for the moment. Signed-off-by: Mark Rutland Tested-by: Huang Shijie Cc: Dave P Martin Cc: James Morse Cc: Marc Zyngier Cc: Will Deacon Signed-off-by: Catalin Marinas --- arch/arm64/include/asm/esr.h | 1 + arch/arm64/kernel/traps.c | 2 +- arch/arm64/mm/fault.c | 2 +- 3 files changed, 3 insertions(+), 2 deletions(-) (limited to 'arch') diff --git a/arch/arm64/include/asm/esr.h b/arch/arm64/include/asm/esr.h index 77eeb2cc648f..f772e15c4766 100644 --- a/arch/arm64/include/asm/esr.h +++ b/arch/arm64/include/asm/esr.h @@ -74,6 +74,7 @@ #define ESR_ELx_EC_SHIFT (26) #define ESR_ELx_EC_MASK (UL(0x3F) << ESR_ELx_EC_SHIFT) +#define ESR_ELx_EC(esr) (((esr) & ESR_ELx_EC_MASK) >> ESR_ELx_EC_SHIFT) #define ESR_ELx_IL (UL(1) << 25) #define ESR_ELx_ISS_MASK (ESR_ELx_IL - 1) diff --git a/arch/arm64/kernel/traps.c b/arch/arm64/kernel/traps.c index d9da2c56e4f8..a4250a59f2b9 100644 --- a/arch/arm64/kernel/traps.c +++ b/arch/arm64/kernel/traps.c @@ -456,7 +456,7 @@ static const char *esr_class_str[] = { const char *esr_get_class_string(u32 esr) { - return esr_class_str[esr >> ESR_ELx_EC_SHIFT]; + return esr_class_str[ESR_ELx_EC(esr)]; } /* diff --git a/arch/arm64/mm/fault.c b/arch/arm64/mm/fault.c index 013e2cbe7924..d2c124cbd18c 100644 --- a/arch/arm64/mm/fault.c +++ b/arch/arm64/mm/fault.c @@ -244,7 +244,7 @@ out: static inline int permission_fault(unsigned int esr) { - unsigned int ec = (esr & ESR_ELx_EC_MASK) >> ESR_ELx_EC_SHIFT; + unsigned int ec = ESR_ELx_EC(esr); unsigned int fsc_type = esr & ESR_ELx_FSC_TYPE; return (ec == ESR_ELx_EC_DABT_CUR && fsc_type == ESR_ELx_FSC_PERM); -- cgit v1.2.3-59-g8ed1b From 561454e25dfa27aeac9e9d05f88ce7cb43d02d71 Mon Sep 17 00:00:00 2001 From: Mark Rutland Date: Tue, 31 May 2016 12:33:02 +0100 Subject: arm64/kvm: use ESR_ELx_EC to extract EC Now that we have a helper to extract the EC from an ESR_ELx value, make use of this in the arm64 KVM code for simplicity and consistency. There should be no functional changes as a result of this patch. Signed-off-by: Mark Rutland Cc: Dave P Martin Cc: Huang Shijie Cc: James Morse Cc: Marc Zyngier Cc: Will Deacon Cc: kvmarm@lists.cs.columbia.edu Acked-by: Christoffer Dall Signed-off-by: Catalin Marinas --- arch/arm64/include/asm/kvm_emulate.h | 2 +- arch/arm64/kvm/handle_exit.c | 4 ++-- arch/arm64/kvm/hyp/switch.c | 2 +- 3 files changed, 4 insertions(+), 4 deletions(-) (limited to 'arch') diff --git a/arch/arm64/include/asm/kvm_emulate.h b/arch/arm64/include/asm/kvm_emulate.h index 40bc1681b6d5..4cdeae3b17c6 100644 --- a/arch/arm64/include/asm/kvm_emulate.h +++ b/arch/arm64/include/asm/kvm_emulate.h @@ -210,7 +210,7 @@ static inline bool kvm_vcpu_trap_il_is32bit(const struct kvm_vcpu *vcpu) static inline u8 kvm_vcpu_trap_get_class(const struct kvm_vcpu *vcpu) { - return kvm_vcpu_get_hsr(vcpu) >> ESR_ELx_EC_SHIFT; + return ESR_ELx_EC(kvm_vcpu_get_hsr(vcpu)); } static inline bool kvm_vcpu_trap_is_iabt(const struct kvm_vcpu *vcpu) diff --git a/arch/arm64/kvm/handle_exit.c b/arch/arm64/kvm/handle_exit.c index 3246c4aba5b1..fa96fe2bd469 100644 --- a/arch/arm64/kvm/handle_exit.c +++ b/arch/arm64/kvm/handle_exit.c @@ -106,7 +106,7 @@ static int kvm_handle_guest_debug(struct kvm_vcpu *vcpu, struct kvm_run *run) run->exit_reason = KVM_EXIT_DEBUG; run->debug.arch.hsr = hsr; - switch (hsr >> ESR_ELx_EC_SHIFT) { + switch (ESR_ELx_EC(hsr)) { case ESR_ELx_EC_WATCHPT_LOW: run->debug.arch.far = vcpu->arch.fault.far_el2; /* fall through */ @@ -149,7 +149,7 @@ static exit_handle_fn arm_exit_handlers[] = { static exit_handle_fn kvm_get_exit_handler(struct kvm_vcpu *vcpu) { u32 hsr = kvm_vcpu_get_hsr(vcpu); - u8 hsr_ec = hsr >> ESR_ELx_EC_SHIFT; + u8 hsr_ec = ESR_ELx_EC(hsr); if (hsr_ec >= ARRAY_SIZE(arm_exit_handlers) || !arm_exit_handlers[hsr_ec]) { diff --git a/arch/arm64/kvm/hyp/switch.c b/arch/arm64/kvm/hyp/switch.c index 437cfad5e3d8..4373997d1a70 100644 --- a/arch/arm64/kvm/hyp/switch.c +++ b/arch/arm64/kvm/hyp/switch.c @@ -198,7 +198,7 @@ static bool __hyp_text __translate_far_to_hpfar(u64 far, u64 *hpfar) static bool __hyp_text __populate_fault_info(struct kvm_vcpu *vcpu) { u64 esr = read_sysreg_el2(esr); - u8 ec = esr >> ESR_ELx_EC_SHIFT; + u8 ec = ESR_ELx_EC(esr); u64 hpfar, far; vcpu->arch.fault.esr_el2 = esr; -- cgit v1.2.3-59-g8ed1b From 541ec870ef31433018d245614254bd9d810a9ac3 Mon Sep 17 00:00:00 2001 From: Mark Rutland Date: Tue, 31 May 2016 12:33:03 +0100 Subject: arm64: kill ESR_LNX_EXEC Currently we treat ESR_EL1 bit 24 as software-defined for distinguishing instruction aborts from data aborts, but this bit is architecturally RES0 for instruction aborts, and could be allocated for an arbitrary purpose in future. Additionally, we hard-code the value in entry.S without the mnemonic, making the code difficult to understand. Instead, remove ESR_LNX_EXEC, and distinguish aborts based on the esr, which we already pass to the sole use of ESR_LNX_EXEC. A new helper, is_el0_instruction_abort() is added to make the logic clear. Any instruction aborts taken from EL1 will already have been handled by bad_mode, so we need not handle that case in the helper. For consistency, the existing permission_fault helper is renamed to is_permission_fault, and the return type is changed to bool. There should be no functional changes as the return value was a boolean expression, and the result is only used in another boolean expression. Signed-off-by: Mark Rutland Cc: Dave P Martin Cc: Huang Shijie Cc: James Morse Cc: Marc Zyngier Cc: Will Deacon Signed-off-by: Catalin Marinas --- arch/arm64/kernel/entry.S | 2 +- arch/arm64/mm/fault.c | 13 ++++++++----- 2 files changed, 9 insertions(+), 6 deletions(-) (limited to 'arch') diff --git a/arch/arm64/kernel/entry.S b/arch/arm64/kernel/entry.S index 12e8d2bcb3f9..eefffa81c6df 100644 --- a/arch/arm64/kernel/entry.S +++ b/arch/arm64/kernel/entry.S @@ -532,7 +532,7 @@ el0_ia: enable_dbg_and_irq ct_user_exit mov x0, x26 - orr x1, x25, #1 << 24 // use reserved ISS bit for instruction aborts + mov x1, x25 mov x2, sp bl do_mem_abort b ret_to_user diff --git a/arch/arm64/mm/fault.c b/arch/arm64/mm/fault.c index d2c124cbd18c..fc5a34a72c6d 100644 --- a/arch/arm64/mm/fault.c +++ b/arch/arm64/mm/fault.c @@ -202,8 +202,6 @@ static void do_bad_area(unsigned long addr, unsigned int esr, struct pt_regs *re #define VM_FAULT_BADMAP 0x010000 #define VM_FAULT_BADACCESS 0x020000 -#define ESR_LNX_EXEC (1 << 24) - static int __do_page_fault(struct mm_struct *mm, unsigned long addr, unsigned int mm_flags, unsigned long vm_flags, struct task_struct *tsk) @@ -242,7 +240,7 @@ out: return fault; } -static inline int permission_fault(unsigned int esr) +static inline bool is_permission_fault(unsigned int esr) { unsigned int ec = ESR_ELx_EC(esr); unsigned int fsc_type = esr & ESR_ELx_FSC_TYPE; @@ -250,6 +248,11 @@ static inline int permission_fault(unsigned int esr) return (ec == ESR_ELx_EC_DABT_CUR && fsc_type == ESR_ELx_FSC_PERM); } +static bool is_el0_instruction_abort(unsigned int esr) +{ + return ESR_ELx_EC(esr) == ESR_ELx_EC_IABT_LOW; +} + static int __kprobes do_page_fault(unsigned long addr, unsigned int esr, struct pt_regs *regs) { @@ -272,14 +275,14 @@ static int __kprobes do_page_fault(unsigned long addr, unsigned int esr, if (user_mode(regs)) mm_flags |= FAULT_FLAG_USER; - if (esr & ESR_LNX_EXEC) { + if (is_el0_instruction_abort(esr)) { vm_flags = VM_EXEC; } else if ((esr & ESR_ELx_WNR) && !(esr & ESR_ELx_CM)) { vm_flags = VM_WRITE; mm_flags |= FAULT_FLAG_WRITE; } - if (permission_fault(esr) && (addr < USER_DS)) { + if (is_permission_fault(esr) && (addr < USER_DS)) { if (get_fs() == KERNEL_DS) die("Accessing user space memory with fs=KERNEL_DS", regs, esr); -- cgit v1.2.3-59-g8ed1b From d770b5a04790a4ec1c0864621eef95f5afe773cf Mon Sep 17 00:00:00 2001 From: Catalin Marinas Date: Wed, 22 Jun 2016 12:00:46 +0100 Subject: Revert "arm64: Add support ARCH_SUPPORTS_INT128" This reverts commit 6b14c517a2a866c8407e9864c1cd3fcc6fed55ab. The original patch and UBSAN+KASAN enabled causes Linux to fail to link with: lib/built-in.o: In function `get_signed_val': lib/ubsan.c:93: undefined reference to `__ashlti3' lib/ubsan.c:93: undefined reference to `__ashrti3' Signed-off-by: Catalin Marinas --- arch/arm64/Kconfig | 1 - 1 file changed, 1 deletion(-) (limited to 'arch') diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig index 73362f52d186..eb0b0a05751e 100644 --- a/arch/arm64/Kconfig +++ b/arch/arm64/Kconfig @@ -12,7 +12,6 @@ config ARM64 select ARCH_HAS_TICK_BROADCAST if GENERIC_CLOCKEVENTS_BROADCAST select ARCH_USE_CMPXCHG_LOCKREF select ARCH_SUPPORTS_ATOMIC_RMW - select ARCH_SUPPORTS_INT128 select ARCH_SUPPORTS_NUMA_BALANCING select ARCH_WANT_OPTIONAL_GPIOLIB select ARCH_WANT_COMPAT_IPC_PARSE_VERSION -- cgit v1.2.3-59-g8ed1b From b69e0dc14ce3c4abbd11725ff98a885d4616f9fe Mon Sep 17 00:00:00 2001 From: James Morse Date: Wed, 22 Jun 2016 10:06:12 +0100 Subject: arm64: smp: Add function to determine if cpus are stuck in the kernel kernel/smp.c has a fancy counter that keeps track of the number of CPUs it marked as not-present and left in cpu_park_loop(). If there are any CPUs spinning in here, features like kexec or hibernate may release them by overwriting this memory. This problem also occurs on machines using spin-tables to release secondary cores. After commit 44dbcc93ab67 ("arm64: Fix behavior of maxcpus=N") we bring all known cpus into the secondary holding pen, meaning this memory can't be re-used by kexec or hibernate. Add a function cpus_are_stuck_in_kernel() to determine if either of these cases have occurred. Signed-off-by: James Morse Acked-by: Mark Rutland Reviewed-by: Suzuki K Poulose Signed-off-by: Will Deacon [catalin.marinas@arm.com: cherry-picked from mainline for kexec dependency] Signed-off-by: Catalin Marinas --- arch/arm64/include/asm/smp.h | 12 ++++++++++++ arch/arm64/kernel/smp.c | 18 ++++++++++++++++++ 2 files changed, 30 insertions(+) (limited to 'arch') diff --git a/arch/arm64/include/asm/smp.h b/arch/arm64/include/asm/smp.h index 433e50405274..022644704a93 100644 --- a/arch/arm64/include/asm/smp.h +++ b/arch/arm64/include/asm/smp.h @@ -124,6 +124,18 @@ static inline void cpu_panic_kernel(void) cpu_park_loop(); } +/* + * If a secondary CPU enters the kernel but fails to come online, + * (e.g. due to mismatched features), and cannot exit the kernel, + * we increment cpus_stuck_in_kernel and leave the CPU in a + * quiesecent loop within the kernel text. The memory containing + * this loop must not be re-used for anything else as the 'stuck' + * core is executing it. + * + * This function is used to inhibit features like kexec and hibernate. + */ +bool cpus_are_stuck_in_kernel(void); + #endif /* ifndef __ASSEMBLY__ */ #endif /* ifndef __ASM_SMP_H */ diff --git a/arch/arm64/kernel/smp.c b/arch/arm64/kernel/smp.c index 678e0842cb3b..62ff3c0622e2 100644 --- a/arch/arm64/kernel/smp.c +++ b/arch/arm64/kernel/smp.c @@ -909,3 +909,21 @@ int setup_profiling_timer(unsigned int multiplier) { return -EINVAL; } + +static bool have_cpu_die(void) +{ +#ifdef CONFIG_HOTPLUG_CPU + int any_cpu = raw_smp_processor_id(); + + if (cpu_ops[any_cpu]->cpu_die) + return true; +#endif + return false; +} + +bool cpus_are_stuck_in_kernel(void) +{ + bool smp_spin_tables = (num_possible_cpus() > 1 && !have_cpu_die()); + + return !!cpus_stuck_in_kernel || smp_spin_tables; +} -- cgit v1.2.3-59-g8ed1b From f9076ecfb1216a478312b1c078d04792df6d4477 Mon Sep 17 00:00:00 2001 From: Geoff Levand Date: Thu, 23 Jun 2016 17:54:48 +0000 Subject: arm64: Add back cpu reset routines Commit 68234df4ea79 ("arm64: kill flush_cache_all()") removed the global arm64 routines cpu_reset() and cpu_soft_restart() needed by the arm64 kexec and kdump support. Add back a simplified version of cpu_soft_restart() with some changes needed for kexec in the new files cpu_reset.S, and cpu_reset.h. When a CPU is reset it needs to be put into the exception level it had when it entered the kernel. Update cpu_soft_restart() to accept an argument which signals if the reset address should be entered at EL1 or EL2, and add a new hypercall HVC_SOFT_RESTART which is used for the EL2 switch. Signed-off-by: Geoff Levand Reviewed-by: James Morse Signed-off-by: Catalin Marinas --- arch/arm64/include/asm/virt.h | 5 ++++ arch/arm64/kernel/cpu-reset.S | 54 +++++++++++++++++++++++++++++++++++++++++++ arch/arm64/kernel/cpu-reset.h | 34 +++++++++++++++++++++++++++ arch/arm64/kernel/hyp-stub.S | 10 +++++++- 4 files changed, 102 insertions(+), 1 deletion(-) create mode 100644 arch/arm64/kernel/cpu-reset.S create mode 100644 arch/arm64/kernel/cpu-reset.h (limited to 'arch') diff --git a/arch/arm64/include/asm/virt.h b/arch/arm64/include/asm/virt.h index dcbcf8dcbefb..bbc6a8cf83f1 100644 --- a/arch/arm64/include/asm/virt.h +++ b/arch/arm64/include/asm/virt.h @@ -34,6 +34,11 @@ */ #define HVC_SET_VECTORS 1 +/* + * HVC_SOFT_RESTART - CPU soft reset, used by the cpu_soft_restart routine. + */ +#define HVC_SOFT_RESTART 2 + #define BOOT_CPU_MODE_EL1 (0xe11) #define BOOT_CPU_MODE_EL2 (0xe12) diff --git a/arch/arm64/kernel/cpu-reset.S b/arch/arm64/kernel/cpu-reset.S new file mode 100644 index 000000000000..65f42d257414 --- /dev/null +++ b/arch/arm64/kernel/cpu-reset.S @@ -0,0 +1,54 @@ +/* + * CPU reset routines + * + * Copyright (C) 2001 Deep Blue Solutions Ltd. + * Copyright (C) 2012 ARM Ltd. + * Copyright (C) 2015 Huawei Futurewei Technologies. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + +#include +#include +#include +#include + +.text +.pushsection .idmap.text, "ax" + +/* + * __cpu_soft_restart(el2_switch, entry, arg0, arg1, arg2) - Helper for + * cpu_soft_restart. + * + * @el2_switch: Flag to indicate a swich to EL2 is needed. + * @entry: Location to jump to for soft reset. + * arg0: First argument passed to @entry. + * arg1: Second argument passed to @entry. + * arg2: Third argument passed to @entry. + * + * Put the CPU into the same state as it would be if it had been reset, and + * branch to what would be the reset vector. It must be executed with the + * flat identity mapping. + */ +ENTRY(__cpu_soft_restart) + /* Clear sctlr_el1 flags. */ + mrs x12, sctlr_el1 + ldr x13, =SCTLR_ELx_FLAGS + bic x12, x12, x13 + msr sctlr_el1, x12 + isb + + cbz x0, 1f // el2_switch? + mov x0, #HVC_SOFT_RESTART + hvc #0 // no return + +1: mov x18, x1 // entry + mov x0, x2 // arg0 + mov x1, x3 // arg1 + mov x2, x4 // arg2 + br x18 +ENDPROC(__cpu_soft_restart) + +.popsection diff --git a/arch/arm64/kernel/cpu-reset.h b/arch/arm64/kernel/cpu-reset.h new file mode 100644 index 000000000000..d4e9ecb264f0 --- /dev/null +++ b/arch/arm64/kernel/cpu-reset.h @@ -0,0 +1,34 @@ +/* + * CPU reset routines + * + * Copyright (C) 2015 Huawei Futurewei Technologies. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + +#ifndef _ARM64_CPU_RESET_H +#define _ARM64_CPU_RESET_H + +#include + +void __cpu_soft_restart(unsigned long el2_switch, unsigned long entry, + unsigned long arg0, unsigned long arg1, unsigned long arg2); + +static inline void __noreturn cpu_soft_restart(unsigned long el2_switch, + unsigned long entry, unsigned long arg0, unsigned long arg1, + unsigned long arg2) +{ + typeof(__cpu_soft_restart) *restart; + + el2_switch = el2_switch && !is_kernel_in_hyp_mode() && + is_hyp_mode_available(); + restart = (void *)virt_to_phys(__cpu_soft_restart); + + cpu_install_idmap(); + restart(el2_switch, entry, arg0, arg1, arg2); + unreachable(); +} + +#endif diff --git a/arch/arm64/kernel/hyp-stub.S b/arch/arm64/kernel/hyp-stub.S index 8727f4490772..d3b5f75e652e 100644 --- a/arch/arm64/kernel/hyp-stub.S +++ b/arch/arm64/kernel/hyp-stub.S @@ -71,8 +71,16 @@ el1_sync: msr vbar_el2, x1 b 9f +2: cmp x0, #HVC_SOFT_RESTART + b.ne 3f + mov x0, x2 + mov x2, x4 + mov x4, x1 + mov x1, x3 + br x4 // no return + /* Someone called kvm_call_hyp() against the hyp-stub... */ -2: mov x0, #ARM_EXCEPTION_HYP_GONE +3: mov x0, #ARM_EXCEPTION_HYP_GONE 9: eret ENDPROC(el1_sync) -- cgit v1.2.3-59-g8ed1b From d28f6df1305a86715e4e7ea0f043ba01c0a0e8d9 Mon Sep 17 00:00:00 2001 From: Geoff Levand Date: Thu, 23 Jun 2016 17:54:48 +0000 Subject: arm64/kexec: Add core kexec support Add three new files, kexec.h, machine_kexec.c and relocate_kernel.S to the arm64 architecture that add support for the kexec re-boot mechanism (CONFIG_KEXEC) on arm64 platforms. Signed-off-by: Geoff Levand Reviewed-by: James Morse [catalin.marinas@arm.com: removed dead code following James Morse's comments] Signed-off-by: Catalin Marinas --- arch/arm64/Kconfig | 10 +++ arch/arm64/include/asm/kexec.h | 48 ++++++++++ arch/arm64/kernel/Makefile | 2 + arch/arm64/kernel/machine_kexec.c | 170 ++++++++++++++++++++++++++++++++++++ arch/arm64/kernel/relocate_kernel.S | 130 +++++++++++++++++++++++++++ include/uapi/linux/kexec.h | 1 + 6 files changed, 361 insertions(+) create mode 100644 arch/arm64/include/asm/kexec.h create mode 100644 arch/arm64/kernel/machine_kexec.c create mode 100644 arch/arm64/kernel/relocate_kernel.S (limited to 'arch') diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig index eb0b0a05751e..1b196bf99320 100644 --- a/arch/arm64/Kconfig +++ b/arch/arm64/Kconfig @@ -665,6 +665,16 @@ config PARAVIRT_TIME_ACCOUNTING If in doubt, say N here. +config KEXEC + depends on PM_SLEEP_SMP + select KEXEC_CORE + bool "kexec system call" + ---help--- + kexec is a system call that implements the ability to shutdown your + current kernel, and to start another kernel. It is like a reboot + but it is independent of the system firmware. And like a reboot + you can start any kernel with it, not just Linux. + config XEN_DOM0 def_bool y depends on XEN diff --git a/arch/arm64/include/asm/kexec.h b/arch/arm64/include/asm/kexec.h new file mode 100644 index 000000000000..04744dc5fb61 --- /dev/null +++ b/arch/arm64/include/asm/kexec.h @@ -0,0 +1,48 @@ +/* + * kexec for arm64 + * + * Copyright (C) Linaro. + * Copyright (C) Huawei Futurewei Technologies. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + +#ifndef _ARM64_KEXEC_H +#define _ARM64_KEXEC_H + +/* Maximum physical address we can use pages from */ + +#define KEXEC_SOURCE_MEMORY_LIMIT (-1UL) + +/* Maximum address we can reach in physical address mode */ + +#define KEXEC_DESTINATION_MEMORY_LIMIT (-1UL) + +/* Maximum address we can use for the control code buffer */ + +#define KEXEC_CONTROL_MEMORY_LIMIT (-1UL) + +#define KEXEC_CONTROL_PAGE_SIZE 4096 + +#define KEXEC_ARCH KEXEC_ARCH_AARCH64 + +#ifndef __ASSEMBLY__ + +/** + * crash_setup_regs() - save registers for the panic kernel + * + * @newregs: registers are saved here + * @oldregs: registers to be saved (may be %NULL) + */ + +static inline void crash_setup_regs(struct pt_regs *newregs, + struct pt_regs *oldregs) +{ + /* Empty routine needed to avoid build errors. */ +} + +#endif /* __ASSEMBLY__ */ + +#endif diff --git a/arch/arm64/kernel/Makefile b/arch/arm64/kernel/Makefile index 2173149d8954..7700c0c23962 100644 --- a/arch/arm64/kernel/Makefile +++ b/arch/arm64/kernel/Makefile @@ -46,6 +46,8 @@ arm64-obj-$(CONFIG_ARM64_ACPI_PARKING_PROTOCOL) += acpi_parking_protocol.o arm64-obj-$(CONFIG_PARAVIRT) += paravirt.o arm64-obj-$(CONFIG_RANDOMIZE_BASE) += kaslr.o arm64-obj-$(CONFIG_HIBERNATION) += hibernate.o hibernate-asm.o +arm64-obj-$(CONFIG_KEXEC) += machine_kexec.o relocate_kernel.o \ + cpu-reset.o obj-y += $(arm64-obj-y) vdso/ obj-m += $(arm64-obj-m) diff --git a/arch/arm64/kernel/machine_kexec.c b/arch/arm64/kernel/machine_kexec.c new file mode 100644 index 000000000000..c40e64607545 --- /dev/null +++ b/arch/arm64/kernel/machine_kexec.c @@ -0,0 +1,170 @@ +/* + * kexec for arm64 + * + * Copyright (C) Linaro. + * Copyright (C) Huawei Futurewei Technologies. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + +#include +#include + +#include +#include +#include + +#include "cpu-reset.h" + +/* Global variables for the arm64_relocate_new_kernel routine. */ +extern const unsigned char arm64_relocate_new_kernel[]; +extern const unsigned long arm64_relocate_new_kernel_size; + +static unsigned long kimage_start; + +void machine_kexec_cleanup(struct kimage *kimage) +{ + /* Empty routine needed to avoid build errors. */ +} + +/** + * machine_kexec_prepare - Prepare for a kexec reboot. + * + * Called from the core kexec code when a kernel image is loaded. + * Forbid loading a kexec kernel if we have no way of hotplugging cpus or cpus + * are stuck in the kernel. This avoids a panic once we hit machine_kexec(). + */ +int machine_kexec_prepare(struct kimage *kimage) +{ + kimage_start = kimage->start; + + if (kimage->type != KEXEC_TYPE_CRASH && cpus_are_stuck_in_kernel()) { + pr_err("Can't kexec: CPUs are stuck in the kernel.\n"); + return -EBUSY; + } + + return 0; +} + +/** + * kexec_list_flush - Helper to flush the kimage list and source pages to PoC. + */ +static void kexec_list_flush(struct kimage *kimage) +{ + kimage_entry_t *entry; + + for (entry = &kimage->head; ; entry++) { + unsigned int flag; + void *addr; + + /* flush the list entries. */ + __flush_dcache_area(entry, sizeof(kimage_entry_t)); + + flag = *entry & IND_FLAGS; + if (flag == IND_DONE) + break; + + addr = phys_to_virt(*entry & PAGE_MASK); + + switch (flag) { + case IND_INDIRECTION: + /* Set entry point just before the new list page. */ + entry = (kimage_entry_t *)addr - 1; + break; + case IND_SOURCE: + /* flush the source pages. */ + __flush_dcache_area(addr, PAGE_SIZE); + break; + case IND_DESTINATION: + break; + default: + BUG(); + } + } +} + +/** + * kexec_segment_flush - Helper to flush the kimage segments to PoC. + */ +static void kexec_segment_flush(const struct kimage *kimage) +{ + unsigned long i; + + pr_debug("%s:\n", __func__); + + for (i = 0; i < kimage->nr_segments; i++) { + pr_debug(" segment[%lu]: %016lx - %016lx, 0x%lx bytes, %lu pages\n", + i, + kimage->segment[i].mem, + kimage->segment[i].mem + kimage->segment[i].memsz, + kimage->segment[i].memsz, + kimage->segment[i].memsz / PAGE_SIZE); + + __flush_dcache_area(phys_to_virt(kimage->segment[i].mem), + kimage->segment[i].memsz); + } +} + +/** + * machine_kexec - Do the kexec reboot. + * + * Called from the core kexec code for a sys_reboot with LINUX_REBOOT_CMD_KEXEC. + */ +void machine_kexec(struct kimage *kimage) +{ + phys_addr_t reboot_code_buffer_phys; + void *reboot_code_buffer; + + /* + * New cpus may have become stuck_in_kernel after we loaded the image. + */ + BUG_ON(cpus_are_stuck_in_kernel() || (num_online_cpus() > 1)); + + reboot_code_buffer_phys = page_to_phys(kimage->control_code_page); + reboot_code_buffer = phys_to_virt(reboot_code_buffer_phys); + + /* + * Copy arm64_relocate_new_kernel to the reboot_code_buffer for use + * after the kernel is shut down. + */ + memcpy(reboot_code_buffer, arm64_relocate_new_kernel, + arm64_relocate_new_kernel_size); + + /* Flush the reboot_code_buffer in preparation for its execution. */ + __flush_dcache_area(reboot_code_buffer, arm64_relocate_new_kernel_size); + flush_icache_range((uintptr_t)reboot_code_buffer, + arm64_relocate_new_kernel_size); + + /* Flush the kimage list and its buffers. */ + kexec_list_flush(kimage); + + /* Flush the new image if already in place. */ + if (kimage->head & IND_DONE) + kexec_segment_flush(kimage); + + pr_info("Bye!\n"); + + /* Disable all DAIF exceptions. */ + asm volatile ("msr daifset, #0xf" : : : "memory"); + + /* + * cpu_soft_restart will shutdown the MMU, disable data caches, then + * transfer control to the reboot_code_buffer which contains a copy of + * the arm64_relocate_new_kernel routine. arm64_relocate_new_kernel + * uses physical addressing to relocate the new image to its final + * position and transfers control to the image entry point when the + * relocation is complete. + */ + + cpu_soft_restart(1, reboot_code_buffer_phys, kimage->head, + kimage_start, 0); + + BUG(); /* Should never get here. */ +} + +void machine_crash_shutdown(struct pt_regs *regs) +{ + /* Empty routine needed to avoid build errors. */ +} diff --git a/arch/arm64/kernel/relocate_kernel.S b/arch/arm64/kernel/relocate_kernel.S new file mode 100644 index 000000000000..51b73cdde287 --- /dev/null +++ b/arch/arm64/kernel/relocate_kernel.S @@ -0,0 +1,130 @@ +/* + * kexec for arm64 + * + * Copyright (C) Linaro. + * Copyright (C) Huawei Futurewei Technologies. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + +#include +#include + +#include +#include +#include +#include + +/* + * arm64_relocate_new_kernel - Put a 2nd stage image in place and boot it. + * + * The memory that the old kernel occupies may be overwritten when coping the + * new image to its final location. To assure that the + * arm64_relocate_new_kernel routine which does that copy is not overwritten, + * all code and data needed by arm64_relocate_new_kernel must be between the + * symbols arm64_relocate_new_kernel and arm64_relocate_new_kernel_end. The + * machine_kexec() routine will copy arm64_relocate_new_kernel to the kexec + * control_code_page, a special page which has been set up to be preserved + * during the copy operation. + */ +ENTRY(arm64_relocate_new_kernel) + + /* Setup the list loop variables. */ + mov x17, x1 /* x17 = kimage_start */ + mov x16, x0 /* x16 = kimage_head */ + dcache_line_size x15, x0 /* x15 = dcache line size */ + mov x14, xzr /* x14 = entry ptr */ + mov x13, xzr /* x13 = copy dest */ + + /* Clear the sctlr_el2 flags. */ + mrs x0, CurrentEL + cmp x0, #CurrentEL_EL2 + b.ne 1f + mrs x0, sctlr_el2 + ldr x1, =SCTLR_ELx_FLAGS + bic x0, x0, x1 + msr sctlr_el2, x0 + isb +1: + + /* Check if the new image needs relocation. */ + tbnz x16, IND_DONE_BIT, .Ldone + +.Lloop: + and x12, x16, PAGE_MASK /* x12 = addr */ + + /* Test the entry flags. */ +.Ltest_source: + tbz x16, IND_SOURCE_BIT, .Ltest_indirection + + /* Invalidate dest page to PoC. */ + mov x0, x13 + add x20, x0, #PAGE_SIZE + sub x1, x15, #1 + bic x0, x0, x1 +2: dc ivac, x0 + add x0, x0, x15 + cmp x0, x20 + b.lo 2b + dsb sy + + mov x20, x13 + mov x21, x12 + copy_page x20, x21, x0, x1, x2, x3, x4, x5, x6, x7 + + /* dest += PAGE_SIZE */ + add x13, x13, PAGE_SIZE + b .Lnext + +.Ltest_indirection: + tbz x16, IND_INDIRECTION_BIT, .Ltest_destination + + /* ptr = addr */ + mov x14, x12 + b .Lnext + +.Ltest_destination: + tbz x16, IND_DESTINATION_BIT, .Lnext + + /* dest = addr */ + mov x13, x12 + +.Lnext: + /* entry = *ptr++ */ + ldr x16, [x14], #8 + + /* while (!(entry & DONE)) */ + tbz x16, IND_DONE_BIT, .Lloop + +.Ldone: + /* wait for writes from copy_page to finish */ + dsb nsh + ic iallu + dsb nsh + isb + + /* Start new image. */ + mov x0, xzr + mov x1, xzr + mov x2, xzr + mov x3, xzr + br x17 + +ENDPROC(arm64_relocate_new_kernel) + +.ltorg + +.align 3 /* To keep the 64-bit values below naturally aligned. */ + +.Lcopy_end: +.org KEXEC_CONTROL_PAGE_SIZE + +/* + * arm64_relocate_new_kernel_size - Number of bytes to copy to the + * control_code_page. + */ +.globl arm64_relocate_new_kernel_size +arm64_relocate_new_kernel_size: + .quad .Lcopy_end - arm64_relocate_new_kernel diff --git a/include/uapi/linux/kexec.h b/include/uapi/linux/kexec.h index 99048e501b88..aae5ebf2022b 100644 --- a/include/uapi/linux/kexec.h +++ b/include/uapi/linux/kexec.h @@ -39,6 +39,7 @@ #define KEXEC_ARCH_SH (42 << 16) #define KEXEC_ARCH_MIPS_LE (10 << 16) #define KEXEC_ARCH_MIPS ( 8 << 16) +#define KEXEC_ARCH_AARCH64 (183 << 16) /* The artificial cap on the number of segments passed to kexec_load. */ #define KEXEC_SEGMENT_MAX 16 -- cgit v1.2.3-59-g8ed1b From b26a4ae39963faf6f9014f6ff72e3661c83753ce Mon Sep 17 00:00:00 2001 From: Geoff Levand Date: Thu, 23 Jun 2016 17:54:48 +0000 Subject: arm64/kexec: Enable kexec in the arm64 defconfig Signed-off-by: Geoff Levand Signed-off-by: Catalin Marinas --- arch/arm64/configs/defconfig | 1 + 1 file changed, 1 insertion(+) (limited to 'arch') diff --git a/arch/arm64/configs/defconfig b/arch/arm64/configs/defconfig index fd2d74d0491e..4ed4756dfa97 100644 --- a/arch/arm64/configs/defconfig +++ b/arch/arm64/configs/defconfig @@ -70,6 +70,7 @@ CONFIG_KSM=y CONFIG_TRANSPARENT_HUGEPAGE=y CONFIG_CMA=y CONFIG_XEN=y +CONFIG_KEXEC=y # CONFIG_CORE_DUMP_DEFAULT_ELF_HEADERS is not set CONFIG_COMPAT=y CONFIG_CPU_IDLE=y -- cgit v1.2.3-59-g8ed1b From 221f2c770e10d3f8dca71e1e14e24e61dc8988dd Mon Sep 17 00:00:00 2001 From: Geoff Levand Date: Thu, 23 Jun 2016 17:54:48 +0000 Subject: arm64/kexec: Add pr_debug output To aid in debugging kexec problems or when adding new functionality to kexec add a new routine kexec_image_info() and several inline pr_debug statements. Signed-off-by: Geoff Levand Reviewed-by: James Morse Signed-off-by: Catalin Marinas --- arch/arm64/kernel/machine_kexec.c | 42 +++++++++++++++++++++++++++++++++++++++ 1 file changed, 42 insertions(+) (limited to 'arch') diff --git a/arch/arm64/kernel/machine_kexec.c b/arch/arm64/kernel/machine_kexec.c index c40e64607545..bc96c8a7fc79 100644 --- a/arch/arm64/kernel/machine_kexec.c +++ b/arch/arm64/kernel/machine_kexec.c @@ -24,6 +24,32 @@ extern const unsigned long arm64_relocate_new_kernel_size; static unsigned long kimage_start; +/** + * kexec_image_info - For debugging output. + */ +#define kexec_image_info(_i) _kexec_image_info(__func__, __LINE__, _i) +static void _kexec_image_info(const char *func, int line, + const struct kimage *kimage) +{ + unsigned long i; + + pr_debug("%s:%d:\n", func, line); + pr_debug(" kexec kimage info:\n"); + pr_debug(" type: %d\n", kimage->type); + pr_debug(" start: %lx\n", kimage->start); + pr_debug(" head: %lx\n", kimage->head); + pr_debug(" nr_segments: %lu\n", kimage->nr_segments); + + for (i = 0; i < kimage->nr_segments; i++) { + pr_debug(" segment[%lu]: %016lx - %016lx, 0x%lx bytes, %lu pages\n", + i, + kimage->segment[i].mem, + kimage->segment[i].mem + kimage->segment[i].memsz, + kimage->segment[i].memsz, + kimage->segment[i].memsz / PAGE_SIZE); + } +} + void machine_kexec_cleanup(struct kimage *kimage) { /* Empty routine needed to avoid build errors. */ @@ -40,6 +66,8 @@ int machine_kexec_prepare(struct kimage *kimage) { kimage_start = kimage->start; + kexec_image_info(kimage); + if (kimage->type != KEXEC_TYPE_CRASH && cpus_are_stuck_in_kernel()) { pr_err("Can't kexec: CPUs are stuck in the kernel.\n"); return -EBUSY; @@ -125,6 +153,20 @@ void machine_kexec(struct kimage *kimage) reboot_code_buffer_phys = page_to_phys(kimage->control_code_page); reboot_code_buffer = phys_to_virt(reboot_code_buffer_phys); + kexec_image_info(kimage); + + pr_debug("%s:%d: control_code_page: %p\n", __func__, __LINE__, + kimage->control_code_page); + pr_debug("%s:%d: reboot_code_buffer_phys: %pa\n", __func__, __LINE__, + &reboot_code_buffer_phys); + pr_debug("%s:%d: reboot_code_buffer: %p\n", __func__, __LINE__, + reboot_code_buffer); + pr_debug("%s:%d: relocate_new_kernel: %p\n", __func__, __LINE__, + arm64_relocate_new_kernel); + pr_debug("%s:%d: relocate_new_kernel_size: 0x%lx(%lu) bytes\n", + __func__, __LINE__, arm64_relocate_new_kernel_size, + arm64_relocate_new_kernel_size); + /* * Copy arm64_relocate_new_kernel to the reboot_code_buffer for use * after the kernel is shut down. -- cgit v1.2.3-59-g8ed1b From ea2cbee3bc671390139802dd0d50b08db024b03c Mon Sep 17 00:00:00 2001 From: Mark Rutland Date: Wed, 22 Jun 2016 12:13:45 +0100 Subject: arm64: mm: simplify memblock numa node extraction We currently open-code extracting the NUMA node of a memblock region, which requires an ifdef to cater for !CONFIG_NUMA builds where the memblock_region::nid field does not exist. The generic memblock_get_region_node helper is intended to cater for this. For CONFIG_HAVE_MEMBLOCK_NODE_MAP, builds this returns reg->nid, and for for !CONFIG_HAVE_MEMBLOCK_NODE_MAP builds this is a static inline that returns 0. Note that for arm64, CONFIG_HAVE_MEMBLOCK_NODE_MAP is selected iff CONFIG_NUMA is. This patch makes use of memblock_get_region_node to simplify the arm64 code. At the same time, we can move the nid variable definition into the loop, as this is the only place it is used. Signed-off-by: Mark Rutland Reviewed-by: Steve Capper Acked-by: Will Deacon Signed-off-by: Catalin Marinas --- arch/arm64/mm/init.c | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) (limited to 'arch') diff --git a/arch/arm64/mm/init.c b/arch/arm64/mm/init.c index 7d25b4d00677..64ea28306661 100644 --- a/arch/arm64/mm/init.c +++ b/arch/arm64/mm/init.c @@ -160,12 +160,10 @@ static void __init arm64_memory_present(void) static void __init arm64_memory_present(void) { struct memblock_region *reg; - int nid = 0; for_each_memblock(memory, reg) { -#ifdef CONFIG_NUMA - nid = reg->nid; -#endif + int nid = memblock_get_region_node(reg); + memory_present(nid, memblock_region_memory_base_pfn(reg), memblock_region_memory_end_pfn(reg)); } -- cgit v1.2.3-59-g8ed1b From 9fdc14c55cd6579d619ccd9d40982e0805e62b6d Mon Sep 17 00:00:00 2001 From: Ard Biesheuvel Date: Thu, 23 Jun 2016 15:53:17 +0200 Subject: arm64: mm: fix location of _etext As Kees Cook notes in the ARM counterpart of this patch [0]: The _etext position is defined to be the end of the kernel text code, and should not include any part of the data segments. This interferes with things that might check memory ranges and expect executable code up to _etext. In particular, Kees is referring to the HARDENED_USERCOPY patch set [1], which rejects attempts to call copy_to_user() on kernel ranges containing executable code, but does allow access to the .rodata segment. Regardless of whether one may or may not agree with the distinction, it makes sense for _etext to have the same meaning across architectures. So let's put _etext where it belongs, between .text and .rodata, and fix up existing references to use __init_begin instead, which unlike _end_rodata includes the exception and notes sections as well. The _etext references in kaslr.c are left untouched, since its references to [_stext, _etext) are meant to capture potential jump instruction targets, and so disregarding .rodata is actually an improvement here. [0] http://article.gmane.org/gmane.linux.kernel/2245084 [1] http://thread.gmane.org/gmane.linux.kernel.hardened.devel/2502 Reported-by: Kees Cook Reviewed-by: Mark Rutland Signed-off-by: Ard Biesheuvel Reviewed-by: Kees Cook Signed-off-by: Catalin Marinas --- arch/arm64/kernel/setup.c | 2 +- arch/arm64/kernel/vmlinux.lds.S | 7 ++++--- arch/arm64/mm/init.c | 4 ++-- arch/arm64/mm/mmu.c | 20 ++++++++++---------- 4 files changed, 17 insertions(+), 16 deletions(-) (limited to 'arch') diff --git a/arch/arm64/kernel/setup.c b/arch/arm64/kernel/setup.c index 3279defabaa2..c1509e654190 100644 --- a/arch/arm64/kernel/setup.c +++ b/arch/arm64/kernel/setup.c @@ -202,7 +202,7 @@ static void __init request_standard_resources(void) struct resource *res; kernel_code.start = virt_to_phys(_text); - kernel_code.end = virt_to_phys(_etext - 1); + kernel_code.end = virt_to_phys(__init_begin - 1); kernel_data.start = virt_to_phys(_sdata); kernel_data.end = virt_to_phys(_end - 1); diff --git a/arch/arm64/kernel/vmlinux.lds.S b/arch/arm64/kernel/vmlinux.lds.S index 435e820e898d..0de7be4f1a9d 100644 --- a/arch/arm64/kernel/vmlinux.lds.S +++ b/arch/arm64/kernel/vmlinux.lds.S @@ -131,12 +131,13 @@ SECTIONS } . = ALIGN(SEGMENT_ALIGN); - RO_DATA(PAGE_SIZE) /* everything from this point to */ - EXCEPTION_TABLE(8) /* _etext will be marked RO NX */ + _etext = .; /* End of text section */ + + RO_DATA(PAGE_SIZE) /* everything from this point to */ + EXCEPTION_TABLE(8) /* __init_begin will be marked RO NX */ NOTES . = ALIGN(SEGMENT_ALIGN); - _etext = .; /* End of text and rodata section */ __init_begin = .; INIT_TEXT_SECTION(8) diff --git a/arch/arm64/mm/init.c b/arch/arm64/mm/init.c index 64ea28306661..2ade7a6a10a7 100644 --- a/arch/arm64/mm/init.c +++ b/arch/arm64/mm/init.c @@ -429,9 +429,9 @@ void __init mem_init(void) pr_cont(" vmalloc : 0x%16lx - 0x%16lx (%6ld GB)\n", MLG(VMALLOC_START, VMALLOC_END)); pr_cont(" .text : 0x%p" " - 0x%p" " (%6ld KB)\n", - MLK_ROUNDUP(_text, __start_rodata)); + MLK_ROUNDUP(_text, _etext)); pr_cont(" .rodata : 0x%p" " - 0x%p" " (%6ld KB)\n", - MLK_ROUNDUP(__start_rodata, _etext)); + MLK_ROUNDUP(__start_rodata, __init_begin)); pr_cont(" .init : 0x%p" " - 0x%p" " (%6ld KB)\n", MLK_ROUNDUP(__init_begin, __init_end)); pr_cont(" .data : 0x%p" " - 0x%p" " (%6ld KB)\n", diff --git a/arch/arm64/mm/mmu.c b/arch/arm64/mm/mmu.c index 0f85a46c3e18..c356f7b84d4d 100644 --- a/arch/arm64/mm/mmu.c +++ b/arch/arm64/mm/mmu.c @@ -386,14 +386,14 @@ static void create_mapping_late(phys_addr_t phys, unsigned long virt, static void __init __map_memblock(pgd_t *pgd, phys_addr_t start, phys_addr_t end) { unsigned long kernel_start = __pa(_text); - unsigned long kernel_end = __pa(_etext); + unsigned long kernel_end = __pa(__init_begin); /* * Take care not to create a writable alias for the * read-only text and rodata sections of the kernel image. */ - /* No overlap with the kernel text */ + /* No overlap with the kernel text/rodata */ if (end < kernel_start || start >= kernel_end) { __create_pgd_mapping(pgd, start, __phys_to_virt(start), end - start, PAGE_KERNEL, @@ -402,7 +402,7 @@ static void __init __map_memblock(pgd_t *pgd, phys_addr_t start, phys_addr_t end } /* - * This block overlaps the kernel text mapping. + * This block overlaps the kernel text/rodata mappings. * Map the portion(s) which don't overlap. */ if (start < kernel_start) @@ -417,7 +417,7 @@ static void __init __map_memblock(pgd_t *pgd, phys_addr_t start, phys_addr_t end early_pgtable_alloc); /* - * Map the linear alias of the [_text, _etext) interval as + * Map the linear alias of the [_text, __init_begin) interval as * read-only/non-executable. This makes the contents of the * region accessible to subsystems such as hibernate, but * protects it from inadvertent modification or execution. @@ -449,14 +449,14 @@ void mark_rodata_ro(void) { unsigned long section_size; - section_size = (unsigned long)__start_rodata - (unsigned long)_text; + section_size = (unsigned long)_etext - (unsigned long)_text; create_mapping_late(__pa(_text), (unsigned long)_text, section_size, PAGE_KERNEL_ROX); /* - * mark .rodata as read only. Use _etext rather than __end_rodata to - * cover NOTES and EXCEPTION_TABLE. + * mark .rodata as read only. Use __init_begin rather than __end_rodata + * to cover NOTES and EXCEPTION_TABLE. */ - section_size = (unsigned long)_etext - (unsigned long)__start_rodata; + section_size = (unsigned long)__init_begin - (unsigned long)__start_rodata; create_mapping_late(__pa(__start_rodata), (unsigned long)__start_rodata, section_size, PAGE_KERNEL_RO); } @@ -499,8 +499,8 @@ static void __init map_kernel(pgd_t *pgd) { static struct vm_struct vmlinux_text, vmlinux_rodata, vmlinux_init, vmlinux_data; - map_kernel_segment(pgd, _text, __start_rodata, PAGE_KERNEL_EXEC, &vmlinux_text); - map_kernel_segment(pgd, __start_rodata, _etext, PAGE_KERNEL, &vmlinux_rodata); + map_kernel_segment(pgd, _text, _etext, PAGE_KERNEL_EXEC, &vmlinux_text); + map_kernel_segment(pgd, __start_rodata, __init_begin, PAGE_KERNEL, &vmlinux_rodata); map_kernel_segment(pgd, __init_begin, __init_end, PAGE_KERNEL_EXEC, &vmlinux_init); map_kernel_segment(pgd, _data, _end, PAGE_KERNEL, &vmlinux_data); -- cgit v1.2.3-59-g8ed1b From 6c5269f33e96722f3e20dc694fba636cc1d4b3eb Mon Sep 17 00:00:00 2001 From: Kefeng Wang Date: Thu, 30 Jun 2016 10:02:04 +0800 Subject: arm64: mm: remove unnecessary BUG_ON The memblock_alloc() and memblock_alloc_base() will panic on their own if no free memory, remove pointless BUG_ON. Signed-off-by: Kefeng Wang Acked-by: Mark Rutland Signed-off-by: Catalin Marinas --- arch/arm64/mm/mmu.c | 1 - 1 file changed, 1 deletion(-) (limited to 'arch') diff --git a/arch/arm64/mm/mmu.c b/arch/arm64/mm/mmu.c index c356f7b84d4d..0c95d6ec873d 100644 --- a/arch/arm64/mm/mmu.c +++ b/arch/arm64/mm/mmu.c @@ -77,7 +77,6 @@ static phys_addr_t __init early_pgtable_alloc(void) void *ptr; phys = memblock_alloc(PAGE_SIZE, PAGE_SIZE); - BUG_ON(!phys); /* * The FIX_{PGD,PUD,PMD} slots may be in active use, but the FIX_PTE -- cgit v1.2.3-59-g8ed1b From b82bfa4793cd0f8fde49b85e0ad66906682e7447 Mon Sep 17 00:00:00 2001 From: Andre Przywara Date: Tue, 28 Jun 2016 18:07:27 +0100 Subject: Revert "arm64: alternatives: add enable parameter to conditional asm macros" Commit 77ee306c0aea9 ("arm64: alternatives: add enable parameter to conditional asm macros") extended the alternative assembly macros. Unfortunately this does not really work as one would expect, as the enable parameter in fact correctly protects the alternative section magic, but not the actual code sequences. This results in having both the original instruction(s) _and_ the alternative ones, if enable if false. Since there is no user of this macros anyway, just revert it. This reverts commit 77ee306c0aea9a219daec256ad25982944affef8. Signed-off-by: Andre Przywara Signed-off-by: Catalin Marinas --- arch/arm64/include/asm/alternative.h | 12 +++--------- 1 file changed, 3 insertions(+), 9 deletions(-) (limited to 'arch') diff --git a/arch/arm64/include/asm/alternative.h b/arch/arm64/include/asm/alternative.h index beccbdefa106..7288071195e1 100644 --- a/arch/arm64/include/asm/alternative.h +++ b/arch/arm64/include/asm/alternative.h @@ -95,13 +95,11 @@ void apply_alternatives(void *start, size_t length); * The code that follows this macro will be assembled and linked as * normal. There are no restrictions on this code. */ -.macro alternative_if_not cap, enable = 1 - .if \enable +.macro alternative_if_not cap .pushsection .altinstructions, "a" altinstruction_entry 661f, 663f, \cap, 662f-661f, 664f-663f .popsection 661: - .endif .endm /* @@ -118,22 +116,18 @@ void apply_alternatives(void *start, size_t length); * alternative sequence it is defined in (branches into an * alternative sequence are not fixed up). */ -.macro alternative_else, enable = 1 - .if \enable +.macro alternative_else 662: .pushsection .altinstr_replacement, "ax" 663: - .endif .endm /* * Complete an alternative code sequence. */ -.macro alternative_endif, enable = 1 - .if \enable +.macro alternative_endif 664: .popsection .org . - (664b-663b) + (662b-661b) .org . - (662b-661b) + (664b-663b) - .endif .endm #define _ALTERNATIVE_CFG(insn1, insn2, cap, cfg, ...) \ -- cgit v1.2.3-59-g8ed1b From 290622efc76ece22ef76a30bf117755891ab27f6 Mon Sep 17 00:00:00 2001 From: Andre Przywara Date: Tue, 28 Jun 2016 18:07:28 +0100 Subject: arm64: fix "dc cvau" cache operation on errata-affected core The ARM errata 819472, 826319, 827319 and 824069 for affected Cortex-A53 cores demand to promote "dc cvau" instructions to "dc civac" as well. Attribute the usage of the instruction in __flush_cache_user_range to also be covered by our alternative patching efforts. For that we introduce an assembly macro which both deals with alternatives while still tagging the instructions as USER. Signed-off-by: Andre Przywara Signed-off-by: Catalin Marinas --- arch/arm64/include/asm/alternative.h | 4 ++++ arch/arm64/mm/cache.S | 2 +- 2 files changed, 5 insertions(+), 1 deletion(-) (limited to 'arch') diff --git a/arch/arm64/include/asm/alternative.h b/arch/arm64/include/asm/alternative.h index 7288071195e1..8746ff6abd77 100644 --- a/arch/arm64/include/asm/alternative.h +++ b/arch/arm64/include/asm/alternative.h @@ -133,6 +133,10 @@ void apply_alternatives(void *start, size_t length); #define _ALTERNATIVE_CFG(insn1, insn2, cap, cfg, ...) \ alternative_insn insn1, insn2, cap, IS_ENABLED(cfg) +.macro user_alt, label, oldinstr, newinstr, cond +9999: alternative_insn "\oldinstr", "\newinstr", \cond + _ASM_EXTABLE 9999b, \label +.endm /* * Generate the assembly for UAO alternatives with exception table entries. diff --git a/arch/arm64/mm/cache.S b/arch/arm64/mm/cache.S index 50ff9ba3a236..07d7352d7c38 100644 --- a/arch/arm64/mm/cache.S +++ b/arch/arm64/mm/cache.S @@ -52,7 +52,7 @@ ENTRY(__flush_cache_user_range) sub x3, x2, #1 bic x4, x0, x3 1: -USER(9f, dc cvau, x4 ) // clean D line to PoU +user_alt 9f, "dc cvau, x4", "dc civac, x4", ARM64_WORKAROUND_CLEAN_CACHE add x4, x4, x2 cmp x4, x1 b.lo 1b -- cgit v1.2.3-59-g8ed1b From 823066d9edcdfe4cedb06216c2b1f91efaf68a87 Mon Sep 17 00:00:00 2001 From: Andre Przywara Date: Tue, 28 Jun 2016 18:07:29 +0100 Subject: arm64: include alternative handling in dcache_by_line_op The newly introduced dcache_by_line_op macro is used at least in one occassion at the moment to issue a "dc cvau" instruction, which is affected by ARM errata 819472, 826319, 827319 and 824069. Change the macro to allow for alternative patching in there to protect affected Cortex-A53 cores. Signed-off-by: Andre Przywara [catalin.marinas@arm.com: indentation fixups] Signed-off-by: Catalin Marinas --- arch/arm64/include/asm/assembler.h | 12 +++++++++++- 1 file changed, 11 insertions(+), 1 deletion(-) (limited to 'arch') diff --git a/arch/arm64/include/asm/assembler.h b/arch/arm64/include/asm/assembler.h index 10b017c4bdd8..d5025c69ca81 100644 --- a/arch/arm64/include/asm/assembler.h +++ b/arch/arm64/include/asm/assembler.h @@ -24,6 +24,7 @@ #define __ASM_ASSEMBLER_H #include +#include #include #include #include @@ -261,7 +262,16 @@ lr .req x30 // link register add \size, \kaddr, \size sub \tmp2, \tmp1, #1 bic \kaddr, \kaddr, \tmp2 -9998: dc \op, \kaddr +9998: + .if (\op == cvau || \op == cvac) +alternative_if_not ARM64_WORKAROUND_CLEAN_CACHE + dc \op, \kaddr +alternative_else + dc civac, \kaddr +alternative_endif + .else + dc \op, \kaddr + .endif add \kaddr, \kaddr, \tmp1 cmp \kaddr, \size b.lo 9998b -- cgit v1.2.3-59-g8ed1b From 8e2318521bf5837dae093413f81292b59d49d030 Mon Sep 17 00:00:00 2001 From: Andre Przywara Date: Tue, 28 Jun 2016 18:07:30 +0100 Subject: arm64: errata: Calling enable functions for CPU errata too Currently we call the (optional) enable function for CPU _features_ only. As CPU _errata_ descriptions share the same data structure and having an enable function is useful for errata as well (for instance to set bits in SCTLR), lets call it when enumerating erratas too. Signed-off-by: Andre Przywara Reviewed-by: Suzuki K Poulose Signed-off-by: Catalin Marinas --- arch/arm64/include/asm/cpufeature.h | 2 ++ arch/arm64/kernel/cpu_errata.c | 5 +++++ arch/arm64/kernel/cpufeature.c | 4 ++-- 3 files changed, 9 insertions(+), 2 deletions(-) (limited to 'arch') diff --git a/arch/arm64/include/asm/cpufeature.h b/arch/arm64/include/asm/cpufeature.h index 224efe730e46..49dd1bd3ea50 100644 --- a/arch/arm64/include/asm/cpufeature.h +++ b/arch/arm64/include/asm/cpufeature.h @@ -191,7 +191,9 @@ void __init setup_cpu_features(void); void update_cpu_capabilities(const struct arm64_cpu_capabilities *caps, const char *info); +void enable_cpu_capabilities(const struct arm64_cpu_capabilities *caps); void check_local_cpu_errata(void); +void __init enable_errata_workarounds(void); void verify_local_cpu_errata(void); void verify_local_cpu_capabilities(void); diff --git a/arch/arm64/kernel/cpu_errata.c b/arch/arm64/kernel/cpu_errata.c index d42789499f17..c2261a7a316e 100644 --- a/arch/arm64/kernel/cpu_errata.c +++ b/arch/arm64/kernel/cpu_errata.c @@ -127,3 +127,8 @@ void check_local_cpu_errata(void) { update_cpu_capabilities(arm64_errata, "enabling workaround for"); } + +void __init enable_errata_workarounds(void) +{ + enable_cpu_capabilities(arm64_errata); +} diff --git a/arch/arm64/kernel/cpufeature.c b/arch/arm64/kernel/cpufeature.c index 811773d1c1d0..916d27ad79c1 100644 --- a/arch/arm64/kernel/cpufeature.c +++ b/arch/arm64/kernel/cpufeature.c @@ -913,8 +913,7 @@ void update_cpu_capabilities(const struct arm64_cpu_capabilities *caps, * Run through the enabled capabilities and enable() it on all active * CPUs */ -static void __init -enable_cpu_capabilities(const struct arm64_cpu_capabilities *caps) +void __init enable_cpu_capabilities(const struct arm64_cpu_capabilities *caps) { for (; caps->matches; caps++) if (caps->enable && cpus_have_cap(caps->capability)) @@ -1036,6 +1035,7 @@ void __init setup_cpu_features(void) /* Set the CPU feature capabilies */ setup_feature_capabilities(); + enable_errata_workarounds(); setup_elf_hwcaps(arm64_elf_hwcaps); if (system_supports_32bit_el0()) -- cgit v1.2.3-59-g8ed1b From 390bf1773c7eba3b45df62ae82b3d2be911185b7 Mon Sep 17 00:00:00 2001 From: Andre Przywara Date: Tue, 28 Jun 2016 18:07:31 +0100 Subject: arm64: consolidate signal injection on emulation errors The code for injecting a signal into userland if a trapped instruction fails emulation due to a _userland_ error (like an illegal address) will be used more often with the next patch. Factor out the core functionality into a separate function and use that both for the existing trap handler and for the deprecated instructions emulation. Signed-off-by: Andre Przywara [catalin.marinas@arm.com: s/set_segfault/arm64_notify_segfault/] Signed-off-by: Catalin Marinas --- arch/arm64/include/asm/traps.h | 2 ++ arch/arm64/kernel/armv8_deprecated.c | 25 ++------------ arch/arm64/kernel/traps.c | 63 ++++++++++++++++++++++++++++-------- 3 files changed, 54 insertions(+), 36 deletions(-) (limited to 'arch') diff --git a/arch/arm64/include/asm/traps.h b/arch/arm64/include/asm/traps.h index 0cc2f29bf9da..9cd03f3e812f 100644 --- a/arch/arm64/include/asm/traps.h +++ b/arch/arm64/include/asm/traps.h @@ -34,6 +34,8 @@ struct undef_hook { void register_undef_hook(struct undef_hook *hook); void unregister_undef_hook(struct undef_hook *hook); +void arm64_notify_segfault(struct pt_regs *regs, unsigned long addr); + #ifdef CONFIG_FUNCTION_GRAPH_TRACER static inline int __in_irqentry_text(unsigned long ptr) { diff --git a/arch/arm64/kernel/armv8_deprecated.c b/arch/arm64/kernel/armv8_deprecated.c index c37202c0c838..fab5603f57ea 100644 --- a/arch/arm64/kernel/armv8_deprecated.c +++ b/arch/arm64/kernel/armv8_deprecated.c @@ -316,28 +316,6 @@ static void __init register_insn_emulation_sysctl(struct ctl_table *table) */ #define TYPE_SWPB (1 << 22) -/* - * Set up process info to signal segmentation fault - called on access error. - */ -static void set_segfault(struct pt_regs *regs, unsigned long addr) -{ - siginfo_t info; - - down_read(¤t->mm->mmap_sem); - if (find_vma(current->mm, addr) == NULL) - info.si_code = SEGV_MAPERR; - else - info.si_code = SEGV_ACCERR; - up_read(¤t->mm->mmap_sem); - - info.si_signo = SIGSEGV; - info.si_errno = 0; - info.si_addr = (void *) instruction_pointer(regs); - - pr_debug("SWP{B} emulation: access caused memory abort!\n"); - arm64_notify_die("Illegal memory access", regs, &info, 0); -} - static int emulate_swpX(unsigned int address, unsigned int *data, unsigned int type) { @@ -430,7 +408,8 @@ ret: return 0; fault: - set_segfault(regs, address); + pr_debug("SWP{B} emulation: access caused memory abort!\n"); + arm64_notify_segfault(regs, address); return 0; } diff --git a/arch/arm64/kernel/traps.c b/arch/arm64/kernel/traps.c index a4250a59f2b9..d8a5366dcc24 100644 --- a/arch/arm64/kernel/traps.c +++ b/arch/arm64/kernel/traps.c @@ -364,30 +364,67 @@ exit: return fn ? fn(regs, instr) : 1; } -asmlinkage void __exception do_undefinstr(struct pt_regs *regs) +static void force_signal_inject(int signal, int code, struct pt_regs *regs, + unsigned long address) { siginfo_t info; void __user *pc = (void __user *)instruction_pointer(regs); + const char *desc; - /* check for AArch32 breakpoint instructions */ - if (!aarch32_break_handler(regs)) - return; - - if (call_undef_hook(regs) == 0) - return; + switch (signal) { + case SIGILL: + desc = "undefined instruction"; + break; + case SIGSEGV: + desc = "illegal memory access"; + break; + default: + desc = "bad mode"; + break; + } - if (unhandled_signal(current, SIGILL) && show_unhandled_signals_ratelimited()) { - pr_info("%s[%d]: undefined instruction: pc=%p\n", - current->comm, task_pid_nr(current), pc); + if (unhandled_signal(current, signal) && + show_unhandled_signals_ratelimited()) { + pr_info("%s[%d]: %s: pc=%p\n", + current->comm, task_pid_nr(current), desc, pc); dump_instr(KERN_INFO, regs); } - info.si_signo = SIGILL; + info.si_signo = signal; info.si_errno = 0; - info.si_code = ILL_ILLOPC; + info.si_code = code; info.si_addr = pc; - arm64_notify_die("Oops - undefined instruction", regs, &info, 0); + arm64_notify_die(desc, regs, &info, 0); +} + +/* + * Set up process info to signal segmentation fault - called on access error. + */ +void arm64_notify_segfault(struct pt_regs *regs, unsigned long addr) +{ + int code; + + down_read(¤t->mm->mmap_sem); + if (find_vma(current->mm, addr) == NULL) + code = SEGV_MAPERR; + else + code = SEGV_ACCERR; + up_read(¤t->mm->mmap_sem); + + force_signal_inject(SIGSEGV, code, regs, addr); +} + +asmlinkage void __exception do_undefinstr(struct pt_regs *regs) +{ + /* check for AArch32 breakpoint instructions */ + if (!aarch32_break_handler(regs)) + return; + + if (call_undef_hook(regs) == 0) + return; + + force_signal_inject(SIGILL, ILL_ILLOPC, regs, 0); } long compat_arm_syscall(struct pt_regs *regs); -- cgit v1.2.3-59-g8ed1b From 7dd01aef055792260287c6708daf75aac3918f66 Mon Sep 17 00:00:00 2001 From: Andre Przywara Date: Tue, 28 Jun 2016 18:07:32 +0100 Subject: arm64: trap userspace "dc cvau" cache operation on errata-affected core The ARM errata 819472, 826319, 827319 and 824069 for affected Cortex-A53 cores demand to promote "dc cvau" instructions to "dc civac". Since we allow userspace to also emit those instructions, we should make sure that "dc cvau" gets promoted there too. So lets grasp the nettle here and actually trap every userland cache maintenance instruction once we detect at least one affected core in the system. We then emulate the instruction by executing it on behalf of userland, promoting "dc cvau" to "dc civac" on the way and injecting access fault back into userspace. Signed-off-by: Andre Przywara [catalin.marinas@arm.com: s/set_segfault/arm64_notify_segfault/] Signed-off-by: Catalin Marinas --- arch/arm64/include/asm/processor.h | 1 + arch/arm64/include/asm/sysreg.h | 2 +- arch/arm64/kernel/cpu_errata.c | 2 ++ arch/arm64/kernel/entry.S | 12 +++++++- arch/arm64/kernel/traps.c | 60 ++++++++++++++++++++++++++++++++++++++ 5 files changed, 75 insertions(+), 2 deletions(-) (limited to 'arch') diff --git a/arch/arm64/include/asm/processor.h b/arch/arm64/include/asm/processor.h index cef1cf398356..ace0a96e7d6e 100644 --- a/arch/arm64/include/asm/processor.h +++ b/arch/arm64/include/asm/processor.h @@ -192,5 +192,6 @@ static inline void spin_lock_prefetch(const void *ptr) void cpu_enable_pan(void *__unused); void cpu_enable_uao(void *__unused); +void cpu_enable_cache_maint_trap(void *__unused); #endif /* __ASM_PROCESSOR_H */ diff --git a/arch/arm64/include/asm/sysreg.h b/arch/arm64/include/asm/sysreg.h index 751e901c8d37..cc06794b7346 100644 --- a/arch/arm64/include/asm/sysreg.h +++ b/arch/arm64/include/asm/sysreg.h @@ -98,11 +98,11 @@ SCTLR_ELx_SA | SCTLR_ELx_I) /* SCTLR_EL1 specific flags. */ +#define SCTLR_EL1_UCI (1 << 26) #define SCTLR_EL1_SPAN (1 << 23) #define SCTLR_EL1_SED (1 << 8) #define SCTLR_EL1_CP15BEN (1 << 5) - /* id_aa64isar0 */ #define ID_AA64ISAR0_RDM_SHIFT 28 #define ID_AA64ISAR0_ATOMICS_SHIFT 20 diff --git a/arch/arm64/kernel/cpu_errata.c b/arch/arm64/kernel/cpu_errata.c index c2261a7a316e..af647d2cee22 100644 --- a/arch/arm64/kernel/cpu_errata.c +++ b/arch/arm64/kernel/cpu_errata.c @@ -46,6 +46,7 @@ const struct arm64_cpu_capabilities arm64_errata[] = { .desc = "ARM errata 826319, 827319, 824069", .capability = ARM64_WORKAROUND_CLEAN_CACHE, MIDR_RANGE(MIDR_CORTEX_A53, 0x00, 0x02), + .enable = cpu_enable_cache_maint_trap, }, #endif #ifdef CONFIG_ARM64_ERRATUM_819472 @@ -54,6 +55,7 @@ const struct arm64_cpu_capabilities arm64_errata[] = { .desc = "ARM errata 819472", .capability = ARM64_WORKAROUND_CLEAN_CACHE, MIDR_RANGE(MIDR_CORTEX_A53, 0x00, 0x01), + .enable = cpu_enable_cache_maint_trap, }, #endif #ifdef CONFIG_ARM64_ERRATUM_832075 diff --git a/arch/arm64/kernel/entry.S b/arch/arm64/kernel/entry.S index eefffa81c6df..3eca5d34f7a6 100644 --- a/arch/arm64/kernel/entry.S +++ b/arch/arm64/kernel/entry.S @@ -451,7 +451,7 @@ el0_sync: cmp x24, #ESR_ELx_EC_FP_EXC64 // FP/ASIMD exception b.eq el0_fpsimd_exc cmp x24, #ESR_ELx_EC_SYS64 // configurable trap - b.eq el0_undef + b.eq el0_sys cmp x24, #ESR_ELx_EC_SP_ALIGN // stack alignment exception b.eq el0_sp_pc cmp x24, #ESR_ELx_EC_PC_ALIGN // pc alignment exception @@ -579,6 +579,16 @@ el0_undef: mov x0, sp bl do_undefinstr b ret_to_user +el0_sys: + /* + * System instructions, for trapped cache maintenance instructions + */ + enable_dbg_and_irq + ct_user_exit + mov x0, x25 + mov x1, sp + bl do_sysinstr + b ret_to_user el0_dbg: /* * Debug exception handling diff --git a/arch/arm64/kernel/traps.c b/arch/arm64/kernel/traps.c index d8a5366dcc24..e04f83873af7 100644 --- a/arch/arm64/kernel/traps.c +++ b/arch/arm64/kernel/traps.c @@ -41,6 +41,7 @@ #include #include #include +#include static const char *handler[]= { "Synchronous Abort", @@ -427,6 +428,65 @@ asmlinkage void __exception do_undefinstr(struct pt_regs *regs) force_signal_inject(SIGILL, ILL_ILLOPC, regs, 0); } +void cpu_enable_cache_maint_trap(void *__unused) +{ + config_sctlr_el1(SCTLR_EL1_UCI, 0); +} + +#define __user_cache_maint(insn, address, res) \ + asm volatile ( \ + "1: " insn ", %1\n" \ + " mov %w0, #0\n" \ + "2:\n" \ + " .pushsection .fixup,\"ax\"\n" \ + " .align 2\n" \ + "3: mov %w0, %w2\n" \ + " b 2b\n" \ + " .popsection\n" \ + _ASM_EXTABLE(1b, 3b) \ + : "=r" (res) \ + : "r" (address), "i" (-EFAULT) ) + +asmlinkage void __exception do_sysinstr(unsigned int esr, struct pt_regs *regs) +{ + unsigned long address; + int ret; + + /* if this is a write with: Op0=1, Op2=1, Op1=3, CRn=7 */ + if ((esr & 0x01fffc01) == 0x0012dc00) { + int rt = (esr >> 5) & 0x1f; + int crm = (esr >> 1) & 0x0f; + + address = (rt == 31) ? 0 : regs->regs[rt]; + + switch (crm) { + case 11: /* DC CVAU, gets promoted */ + __user_cache_maint("dc civac", address, ret); + break; + case 10: /* DC CVAC, gets promoted */ + __user_cache_maint("dc civac", address, ret); + break; + case 14: /* DC CIVAC */ + __user_cache_maint("dc civac", address, ret); + break; + case 5: /* IC IVAU */ + __user_cache_maint("ic ivau", address, ret); + break; + default: + force_signal_inject(SIGILL, ILL_ILLOPC, regs, 0); + return; + } + } else { + force_signal_inject(SIGILL, ILL_ILLOPC, regs, 0); + return; + } + + if (ret) + arm64_notify_segfault(regs, address); + else + regs->pc += 4; +} + long compat_arm_syscall(struct pt_regs *regs); asmlinkage long do_ni_syscall(struct pt_regs *regs) -- cgit v1.2.3-59-g8ed1b From 53e1b32910a3bc94d9f122321442b79b314219f8 Mon Sep 17 00:00:00 2001 From: Ard Biesheuvel Date: Wed, 29 Jun 2016 14:51:26 +0200 Subject: arm64: mm: add param to force create_pgd_mapping() to use page mappings Add a bool parameter 'allow_block_mappings' to create_pgd_mapping() and the various helper functions that it descends into, to give the caller control over whether block entries may be used to create the mapping. The UEFI runtime mapping routines will use this to avoid creating block entries that would need to split up into page entries when applying the permissions listed in the Memory Attributes firmware table. This also replaces the block_mappings_allowed() helper function that was added for DEBUG_PAGEALLOC functionality, but the resulting code is functionally equivalent (given that debug_page_alloc does not operate on EFI page table entries anyway) Signed-off-by: Ard Biesheuvel Signed-off-by: Catalin Marinas --- arch/arm64/include/asm/mmu.h | 2 +- arch/arm64/kernel/efi.c | 2 +- arch/arm64/mm/mmu.c | 67 ++++++++++++++++++-------------------------- 3 files changed, 29 insertions(+), 42 deletions(-) (limited to 'arch') diff --git a/arch/arm64/include/asm/mmu.h b/arch/arm64/include/asm/mmu.h index 97b1d8f26b9c..8d9fce037b2f 100644 --- a/arch/arm64/include/asm/mmu.h +++ b/arch/arm64/include/asm/mmu.h @@ -34,7 +34,7 @@ extern void __iomem *early_io_map(phys_addr_t phys, unsigned long virt); extern void init_mem_pgprot(void); extern void create_pgd_mapping(struct mm_struct *mm, phys_addr_t phys, unsigned long virt, phys_addr_t size, - pgprot_t prot); + pgprot_t prot, bool allow_block_mappings); extern void *fixmap_remap_fdt(phys_addr_t dt_phys); #endif diff --git a/arch/arm64/kernel/efi.c b/arch/arm64/kernel/efi.c index 78f52488f9ff..981604948521 100644 --- a/arch/arm64/kernel/efi.c +++ b/arch/arm64/kernel/efi.c @@ -65,7 +65,7 @@ int __init efi_create_mapping(struct mm_struct *mm, efi_memory_desc_t *md) create_pgd_mapping(mm, md->phys_addr, md->virt_addr, md->num_pages << EFI_PAGE_SHIFT, - __pgprot(prot_val | PTE_NG)); + __pgprot(prot_val | PTE_NG), true); return 0; } diff --git a/arch/arm64/mm/mmu.c b/arch/arm64/mm/mmu.c index 0c95d6ec873d..a289d66121b6 100644 --- a/arch/arm64/mm/mmu.c +++ b/arch/arm64/mm/mmu.c @@ -155,29 +155,10 @@ static void split_pud(pud_t *old_pud, pmd_t *pmd) } while (pmd++, i++, i < PTRS_PER_PMD); } -#ifdef CONFIG_DEBUG_PAGEALLOC -static bool block_mappings_allowed(phys_addr_t (*pgtable_alloc)(void)) -{ - - /* - * If debug_page_alloc is enabled we must map the linear map - * using pages. However, other mappings created by - * create_mapping_noalloc must use sections in some cases. Allow - * sections to be used in those cases, where no pgtable_alloc - * function is provided. - */ - return !pgtable_alloc || !debug_pagealloc_enabled(); -} -#else -static bool block_mappings_allowed(phys_addr_t (*pgtable_alloc)(void)) -{ - return true; -} -#endif - static void alloc_init_pmd(pud_t *pud, unsigned long addr, unsigned long end, phys_addr_t phys, pgprot_t prot, - phys_addr_t (*pgtable_alloc)(void)) + phys_addr_t (*pgtable_alloc)(void), + bool allow_block_mappings) { pmd_t *pmd; unsigned long next; @@ -208,7 +189,7 @@ static void alloc_init_pmd(pud_t *pud, unsigned long addr, unsigned long end, next = pmd_addr_end(addr, end); /* try section mapping first */ if (((addr | next | phys) & ~SECTION_MASK) == 0 && - block_mappings_allowed(pgtable_alloc)) { + allow_block_mappings) { pmd_t old_pmd =*pmd; pmd_set_huge(pmd, phys, prot); /* @@ -247,7 +228,8 @@ static inline bool use_1G_block(unsigned long addr, unsigned long next, static void alloc_init_pud(pgd_t *pgd, unsigned long addr, unsigned long end, phys_addr_t phys, pgprot_t prot, - phys_addr_t (*pgtable_alloc)(void)) + phys_addr_t (*pgtable_alloc)(void), + bool allow_block_mappings) { pud_t *pud; unsigned long next; @@ -267,8 +249,7 @@ static void alloc_init_pud(pgd_t *pgd, unsigned long addr, unsigned long end, /* * For 4K granule only, attempt to put down a 1GB block */ - if (use_1G_block(addr, next, phys) && - block_mappings_allowed(pgtable_alloc)) { + if (use_1G_block(addr, next, phys) && allow_block_mappings) { pud_t old_pud = *pud; pud_set_huge(pud, phys, prot); @@ -289,7 +270,7 @@ static void alloc_init_pud(pgd_t *pgd, unsigned long addr, unsigned long end, } } else { alloc_init_pmd(pud, addr, next, phys, prot, - pgtable_alloc); + pgtable_alloc, allow_block_mappings); } phys += next - addr; } while (pud++, addr = next, addr != end); @@ -303,7 +284,8 @@ static void alloc_init_pud(pgd_t *pgd, unsigned long addr, unsigned long end, */ static void init_pgd(pgd_t *pgd, phys_addr_t phys, unsigned long virt, phys_addr_t size, pgprot_t prot, - phys_addr_t (*pgtable_alloc)(void)) + phys_addr_t (*pgtable_alloc)(void), + bool allow_block_mappings) { unsigned long addr, length, end, next; @@ -321,7 +303,8 @@ static void init_pgd(pgd_t *pgd, phys_addr_t phys, unsigned long virt, end = addr + length; do { next = pgd_addr_end(addr, end); - alloc_init_pud(pgd, addr, next, phys, prot, pgtable_alloc); + alloc_init_pud(pgd, addr, next, phys, prot, pgtable_alloc, + allow_block_mappings); phys += next - addr; } while (pgd++, addr = next, addr != end); } @@ -339,9 +322,11 @@ static phys_addr_t late_pgtable_alloc(void) static void __create_pgd_mapping(pgd_t *pgdir, phys_addr_t phys, unsigned long virt, phys_addr_t size, pgprot_t prot, - phys_addr_t (*alloc)(void)) + phys_addr_t (*alloc)(void), + bool allow_block_mappings) { - init_pgd(pgd_offset_raw(pgdir, virt), phys, virt, size, prot, alloc); + init_pgd(pgd_offset_raw(pgdir, virt), phys, virt, size, prot, alloc, + allow_block_mappings); } /* @@ -357,16 +342,15 @@ static void __init create_mapping_noalloc(phys_addr_t phys, unsigned long virt, &phys, virt); return; } - __create_pgd_mapping(init_mm.pgd, phys, virt, size, prot, - NULL); + __create_pgd_mapping(init_mm.pgd, phys, virt, size, prot, NULL, true); } void __init create_pgd_mapping(struct mm_struct *mm, phys_addr_t phys, unsigned long virt, phys_addr_t size, - pgprot_t prot) + pgprot_t prot, bool allow_block_mappings) { __create_pgd_mapping(mm->pgd, phys, virt, size, prot, - late_pgtable_alloc); + late_pgtable_alloc, allow_block_mappings); } static void create_mapping_late(phys_addr_t phys, unsigned long virt, @@ -379,7 +363,7 @@ static void create_mapping_late(phys_addr_t phys, unsigned long virt, } __create_pgd_mapping(init_mm.pgd, phys, virt, size, prot, - late_pgtable_alloc); + late_pgtable_alloc, !debug_pagealloc_enabled()); } static void __init __map_memblock(pgd_t *pgd, phys_addr_t start, phys_addr_t end) @@ -396,7 +380,8 @@ static void __init __map_memblock(pgd_t *pgd, phys_addr_t start, phys_addr_t end if (end < kernel_start || start >= kernel_end) { __create_pgd_mapping(pgd, start, __phys_to_virt(start), end - start, PAGE_KERNEL, - early_pgtable_alloc); + early_pgtable_alloc, + !debug_pagealloc_enabled()); return; } @@ -408,12 +393,14 @@ static void __init __map_memblock(pgd_t *pgd, phys_addr_t start, phys_addr_t end __create_pgd_mapping(pgd, start, __phys_to_virt(start), kernel_start - start, PAGE_KERNEL, - early_pgtable_alloc); + early_pgtable_alloc, + !debug_pagealloc_enabled()); if (kernel_end < end) __create_pgd_mapping(pgd, kernel_end, __phys_to_virt(kernel_end), end - kernel_end, PAGE_KERNEL, - early_pgtable_alloc); + early_pgtable_alloc, + !debug_pagealloc_enabled()); /* * Map the linear alias of the [_text, __init_begin) interval as @@ -423,7 +410,7 @@ static void __init __map_memblock(pgd_t *pgd, phys_addr_t start, phys_addr_t end */ __create_pgd_mapping(pgd, kernel_start, __phys_to_virt(kernel_start), kernel_end - kernel_start, PAGE_KERNEL_RO, - early_pgtable_alloc); + early_pgtable_alloc, !debug_pagealloc_enabled()); } static void __init map_mem(pgd_t *pgd) @@ -480,7 +467,7 @@ static void __init map_kernel_segment(pgd_t *pgd, void *va_start, void *va_end, BUG_ON(!PAGE_ALIGNED(size)); __create_pgd_mapping(pgd, pa_start, (unsigned long)va_start, size, prot, - early_pgtable_alloc); + early_pgtable_alloc, !debug_pagealloc_enabled()); vma->addr = va_start; vma->phys_addr = pa_start; -- cgit v1.2.3-59-g8ed1b From bd264d046aad25e9922a142a7831e6841a2f0474 Mon Sep 17 00:00:00 2001 From: Ard Biesheuvel Date: Wed, 29 Jun 2016 14:51:27 +0200 Subject: arm64: efi: always map runtime services code and data regions down to pages To avoid triggering diagnostics in the MMU code that are finicky about splitting block mappings into more granular mappings, ensure that regions that are likely to appear in the Memory Attributes table as well as the UEFI memory map are always mapped down to pages. This way, we can use apply_to_page_range() instead of create_pgd_mapping() for the second pass, which cannot split or merge block entries, and operates strictly on PTEs. Note that this aligns the arm64 Memory Attributes table handling code with the ARM code, which already uses apply_to_page_range() to set the strict permissions. Signed-off-by: Ard Biesheuvel Signed-off-by: Catalin Marinas --- arch/arm64/include/asm/efi.h | 3 +-- arch/arm64/kernel/efi.c | 36 +++++++++++++++++++++++++++++++++++- 2 files changed, 36 insertions(+), 3 deletions(-) (limited to 'arch') diff --git a/arch/arm64/include/asm/efi.h b/arch/arm64/include/asm/efi.h index 622db3c6474e..8b13476cdf96 100644 --- a/arch/arm64/include/asm/efi.h +++ b/arch/arm64/include/asm/efi.h @@ -14,8 +14,7 @@ extern void efi_init(void); #endif int efi_create_mapping(struct mm_struct *mm, efi_memory_desc_t *md); - -#define efi_set_mapping_permissions efi_create_mapping +int efi_set_mapping_permissions(struct mm_struct *mm, efi_memory_desc_t *md); #define arch_efi_call_virt_setup() \ ({ \ diff --git a/arch/arm64/kernel/efi.c b/arch/arm64/kernel/efi.c index 981604948521..4aef89f37049 100644 --- a/arch/arm64/kernel/efi.c +++ b/arch/arm64/kernel/efi.c @@ -62,13 +62,47 @@ struct screen_info screen_info __section(.data); int __init efi_create_mapping(struct mm_struct *mm, efi_memory_desc_t *md) { pteval_t prot_val = create_mapping_protection(md); + bool allow_block_mappings = (md->type != EFI_RUNTIME_SERVICES_CODE && + md->type != EFI_RUNTIME_SERVICES_DATA); create_pgd_mapping(mm, md->phys_addr, md->virt_addr, md->num_pages << EFI_PAGE_SHIFT, - __pgprot(prot_val | PTE_NG), true); + __pgprot(prot_val | PTE_NG), allow_block_mappings); return 0; } +static int __init set_permissions(pte_t *ptep, pgtable_t token, + unsigned long addr, void *data) +{ + efi_memory_desc_t *md = data; + pte_t pte = *ptep; + + if (md->attribute & EFI_MEMORY_RO) + pte = set_pte_bit(pte, __pgprot(PTE_RDONLY)); + if (md->attribute & EFI_MEMORY_XP) + pte = set_pte_bit(pte, __pgprot(PTE_PXN)); + set_pte(ptep, pte); + return 0; +} + +int __init efi_set_mapping_permissions(struct mm_struct *mm, + efi_memory_desc_t *md) +{ + BUG_ON(md->type != EFI_RUNTIME_SERVICES_CODE && + md->type != EFI_RUNTIME_SERVICES_DATA); + + /* + * Calling apply_to_page_range() is only safe on regions that are + * guaranteed to be mapped down to pages. Since we are only called + * for regions that have been mapped using efi_create_mapping() above + * (and this is checked by the generic Memory Attributes table parsing + * routines), there is no need to check that again here. + */ + return apply_to_page_range(mm, md->virt_addr, + md->num_pages << EFI_PAGE_SHIFT, + set_permissions, md); +} + static int __init arm64_dmi_init(void) { /* -- cgit v1.2.3-59-g8ed1b From 74c102c988cd48fff8055a0bfb84234fd3509419 Mon Sep 17 00:00:00 2001 From: Ard Biesheuvel Date: Wed, 29 Jun 2016 14:51:28 +0200 Subject: arm64: efi: avoid block mappings for unaligned UEFI memory regions When running the OS with a page size > 4 KB, we need to round up mappings for regions that are not aligned to the OS's page size. We already avoid block mappings for EfiRuntimeServicesCode/Data regions for other reasons, but in the unlikely event that other unaliged regions exists that have the EFI_MEMORY_RUNTIME attribute set, ensure that unaligned regions are always mapped down to pages. This way, the overlapping page is guaranteed not to be covered by a block mapping that needs to be split. Signed-off-by: Ard Biesheuvel Signed-off-by: Catalin Marinas --- arch/arm64/kernel/efi.c | 14 ++++++++++++++ 1 file changed, 14 insertions(+) (limited to 'arch') diff --git a/arch/arm64/kernel/efi.c b/arch/arm64/kernel/efi.c index 4aef89f37049..ba9bee389fd5 100644 --- a/arch/arm64/kernel/efi.c +++ b/arch/arm64/kernel/efi.c @@ -65,6 +65,20 @@ int __init efi_create_mapping(struct mm_struct *mm, efi_memory_desc_t *md) bool allow_block_mappings = (md->type != EFI_RUNTIME_SERVICES_CODE && md->type != EFI_RUNTIME_SERVICES_DATA); + if (!PAGE_ALIGNED(md->phys_addr) || + !PAGE_ALIGNED(md->num_pages << EFI_PAGE_SHIFT)) { + /* + * If the end address of this region is not aligned to page + * size, the mapping is rounded up, and may end up sharing a + * page frame with the next UEFI memory region. If we create + * a block entry now, we may need to split it again when mapping + * the next region, and support for that is going to be removed + * from the MMU routines. So avoid block mappings altogether in + * that case. + */ + allow_block_mappings = false; + } + create_pgd_mapping(mm, md->phys_addr, md->virt_addr, md->num_pages << EFI_PAGE_SHIFT, __pgprot(prot_val | PTE_NG), allow_block_mappings); -- cgit v1.2.3-59-g8ed1b From 4133af6c0419b5a2a4da245ff7af7ceca7fd740d Mon Sep 17 00:00:00 2001 From: Catalin Marinas Date: Wed, 29 Jun 2016 14:51:29 +0200 Subject: arm64: mm: Remove split_p*d() functions Since the efi_create_mapping() no longer generates block mappings and being the last user of the split_p*d code, remove these functions and the corresponding TLBI. Signed-off-by: Catalin Marinas [ardb: replace 'overlapping regions' with 'block mappings' in commit log] Signed-off-by: Ard Biesheuvel Signed-off-by: Catalin Marinas --- arch/arm64/mm/mmu.c | 47 ++++------------------------------------------- 1 file changed, 4 insertions(+), 43 deletions(-) (limited to 'arch') diff --git a/arch/arm64/mm/mmu.c b/arch/arm64/mm/mmu.c index a289d66121b6..9d2d7cfb95b4 100644 --- a/arch/arm64/mm/mmu.c +++ b/arch/arm64/mm/mmu.c @@ -96,24 +96,6 @@ static phys_addr_t __init early_pgtable_alloc(void) return phys; } -/* - * remap a PMD into pages - */ -static void split_pmd(pmd_t *pmd, pte_t *pte) -{ - unsigned long pfn = pmd_pfn(*pmd); - int i = 0; - - do { - /* - * Need to have the least restrictive permissions available - * permissions will be fixed up later - */ - set_pte(pte, pfn_pte(pfn, PAGE_KERNEL_EXEC)); - pfn++; - } while (pte++, i++, i < PTRS_PER_PTE); -} - static void alloc_init_pte(pmd_t *pmd, unsigned long addr, unsigned long end, unsigned long pfn, pgprot_t prot, @@ -121,15 +103,13 @@ static void alloc_init_pte(pmd_t *pmd, unsigned long addr, { pte_t *pte; - if (pmd_none(*pmd) || pmd_sect(*pmd)) { + BUG_ON(pmd_sect(*pmd)); + if (pmd_none(*pmd)) { phys_addr_t pte_phys; BUG_ON(!pgtable_alloc); pte_phys = pgtable_alloc(); pte = pte_set_fixmap(pte_phys); - if (pmd_sect(*pmd)) - split_pmd(pmd, pte); __pmd_populate(pmd, pte_phys, PMD_TYPE_TABLE); - flush_tlb_all(); pte_clear_fixmap(); } BUG_ON(pmd_bad(*pmd)); @@ -143,18 +123,6 @@ static void alloc_init_pte(pmd_t *pmd, unsigned long addr, pte_clear_fixmap(); } -static void split_pud(pud_t *old_pud, pmd_t *pmd) -{ - unsigned long addr = pud_pfn(*old_pud) << PAGE_SHIFT; - pgprot_t prot = __pgprot(pud_val(*old_pud) ^ addr); - int i = 0; - - do { - set_pmd(pmd, __pmd(addr | pgprot_val(prot))); - addr += PMD_SIZE; - } while (pmd++, i++, i < PTRS_PER_PMD); -} - static void alloc_init_pmd(pud_t *pud, unsigned long addr, unsigned long end, phys_addr_t phys, pgprot_t prot, phys_addr_t (*pgtable_alloc)(void), @@ -166,20 +134,13 @@ static void alloc_init_pmd(pud_t *pud, unsigned long addr, unsigned long end, /* * Check for initial section mappings in the pgd/pud and remove them. */ - if (pud_none(*pud) || pud_sect(*pud)) { + BUG_ON(pud_sect(*pud)); + if (pud_none(*pud)) { phys_addr_t pmd_phys; BUG_ON(!pgtable_alloc); pmd_phys = pgtable_alloc(); pmd = pmd_set_fixmap(pmd_phys); - if (pud_sect(*pud)) { - /* - * need to have the 1G of mappings continue to be - * present - */ - split_pud(pud, pmd); - } __pud_populate(pud, pmd_phys, PUD_TYPE_TABLE); - flush_tlb_all(); pmd_clear_fixmap(); } BUG_ON(pud_bad(*pud)); -- cgit v1.2.3-59-g8ed1b From 40f87d3114b8a1e730ac8f3dc3cf1efe33124776 Mon Sep 17 00:00:00 2001 From: Ard Biesheuvel Date: Wed, 29 Jun 2016 14:51:30 +0200 Subject: arm64: mm: fold init_pgd() into __create_pgd_mapping() The routine __create_pgd_mapping() does nothing except calling init_pgd(), which has no other callers. So fold the latter into the former. Also, drop a comment that has gone stale. Signed-off-by: Ard Biesheuvel Signed-off-by: Catalin Marinas --- arch/arm64/mm/mmu.c | 24 ++++++------------------ 1 file changed, 6 insertions(+), 18 deletions(-) (limited to 'arch') diff --git a/arch/arm64/mm/mmu.c b/arch/arm64/mm/mmu.c index 9d2d7cfb95b4..a96a2413fa18 100644 --- a/arch/arm64/mm/mmu.c +++ b/arch/arm64/mm/mmu.c @@ -239,16 +239,14 @@ static void alloc_init_pud(pgd_t *pgd, unsigned long addr, unsigned long end, pud_clear_fixmap(); } -/* - * Create the page directory entries and any necessary page tables for the - * mapping specified by 'md'. - */ -static void init_pgd(pgd_t *pgd, phys_addr_t phys, unsigned long virt, - phys_addr_t size, pgprot_t prot, - phys_addr_t (*pgtable_alloc)(void), - bool allow_block_mappings) +static void __create_pgd_mapping(pgd_t *pgdir, phys_addr_t phys, + unsigned long virt, phys_addr_t size, + pgprot_t prot, + phys_addr_t (*pgtable_alloc)(void), + bool allow_block_mappings) { unsigned long addr, length, end, next; + pgd_t *pgd = pgd_offset_raw(pgdir, virt); /* * If the virtual and physical address don't have the same offset @@ -280,16 +278,6 @@ static phys_addr_t late_pgtable_alloc(void) return __pa(ptr); } -static void __create_pgd_mapping(pgd_t *pgdir, phys_addr_t phys, - unsigned long virt, phys_addr_t size, - pgprot_t prot, - phys_addr_t (*alloc)(void), - bool allow_block_mappings) -{ - init_pgd(pgd_offset_raw(pgdir, virt), phys, virt, size, prot, alloc, - allow_block_mappings); -} - /* * This function can only be used to modify existing table entries, * without allocating new levels of table. Note that this permits the -- cgit v1.2.3-59-g8ed1b From 90f777beb788d08300f4a1482cb4fd37a401b472 Mon Sep 17 00:00:00 2001 From: Catalin Marinas Date: Fri, 8 Jul 2016 12:13:20 +0100 Subject: arm64: Fix vdso-offsets.h dependency arch/arm64/kernel/{vdso,signal}.c include generated/vdso-offsets.h, and therefore the symbol offsets must be generated before these files are compiled. The current rules in arm64/kernel/Makefile do not actually enforce this, because even though $(obj)/vdso is listed as a prerequisite for vdso-offsets.h, this does not result in the intended effect of building the vdso subdirectory (before all the other objects). As a consequence, depending on the order in which the rules are followed, vdso-offsets.h is updated or not before arm64/kernel/{vdso,signal}.o are built. The current rules also impose an unnecessary dependency on vdso-offsets.h for all arm64/kernel/*.o, resulting in unnecessary rebuilds. This patch removes the arch/arm64/kernel/vdso/vdso-offsets.h file generation, leaving only the include/generated/vdso-offsets.h one. It adds a forced dependency check of the vdso-offsets.h file in arch/arm64/kernel/Makefile which, if not up to date according to the arch/arm64/kernel/vdso/Makefile rules (depending on vdso.so.dbg), will trigger the vdso/ subdirectory build and vdso-offsets.h re-generation. Automatic kbuild dependency rules between kernel/{vdso,signal}.c rules and vdso-offsets.h will guarantee that the vDSO object is built first, followed by the generated symbol offsets header file. Reported-by: Kevin Brodsky Signed-off-by: Catalin Marinas --- arch/arm64/kernel/Makefile | 7 ++++--- arch/arm64/kernel/vdso/Makefile | 7 +++---- 2 files changed, 7 insertions(+), 7 deletions(-) (limited to 'arch') diff --git a/arch/arm64/kernel/Makefile b/arch/arm64/kernel/Makefile index 7700c0c23962..efffc231a244 100644 --- a/arch/arm64/kernel/Makefile +++ b/arch/arm64/kernel/Makefile @@ -54,6 +54,7 @@ obj-m += $(arm64-obj-m) head-y := head.o extra-y += $(head-y) vmlinux.lds -# vDSO - this must be built first to generate the symbol offsets -$(call objectify,$(arm64-obj-y)): $(obj)/vdso/vdso-offsets.h -$(obj)/vdso/vdso-offsets.h: $(obj)/vdso +# Check that the vDSO symbol offsets header file is up to date and re-generate +# it if necessary. +$(objtree)/include/generated/vdso-offsets.h: FORCE + $(Q)$(MAKE) $(build)=$(obj)/vdso $@ diff --git a/arch/arm64/kernel/vdso/Makefile b/arch/arm64/kernel/vdso/Makefile index b467fd0a384b..285433720202 100644 --- a/arch/arm64/kernel/vdso/Makefile +++ b/arch/arm64/kernel/vdso/Makefile @@ -23,7 +23,7 @@ GCOV_PROFILE := n ccflags-y += -Wl,-shared obj-y += vdso.o -extra-y += vdso.lds vdso-offsets.h +extra-y += vdso.lds CPPFLAGS_vdso.lds += -P -C -U$(ARCH) # Force dependency (incbin is bad) @@ -42,11 +42,10 @@ $(obj)/%.so: $(obj)/%.so.dbg FORCE gen-vdsosym := $(srctree)/$(src)/gen_vdso_offsets.sh quiet_cmd_vdsosym = VDSOSYM $@ define cmd_vdsosym - $(NM) $< | $(gen-vdsosym) | LC_ALL=C sort > $@ && \ - cp $@ include/generated/ + $(NM) $< | $(gen-vdsosym) | LC_ALL=C sort > $@ endef -$(obj)/vdso-offsets.h: $(obj)/vdso.so.dbg FORCE +$(objtree)/include/generated/vdso-offsets.h: $(obj)/vdso.so.dbg FORCE $(call if_changed,vdsosym) # Assembly rules for the .S files -- cgit v1.2.3-59-g8ed1b From 16c11325cc44f0614a45e584d439e195059c3f5a Mon Sep 17 00:00:00 2001 From: Lorenzo Pieralisi Date: Fri, 1 Jul 2016 17:50:10 +0100 Subject: arm64: mm: change IOMMU notifier action to attach DMA ops Current bus notifier in ARM64 (__iommu_attach_notifier) attempts to attach dma_ops to a device on BUS_NOTIFY_ADD_DEVICE action notification. This will cause issues on ACPI based systems, where PCI devices can be added before the IOMMUs the devices are attached to had a chance to be probed, causing failures on attempts to attach dma_ops in that the domain for the respective IOMMU may not be set-up yet by the time the bus notifier is run. Devices dma_ops do not require to be set-up till the matching device drivers are probed. This means that instead of running the notifier attaching dma_ops to devices (__iommu_attach_notifier) on BUS_NOTIFY_ADD_DEVICE action, it can be run just before the device driver is bound to the device in question (on action BUS_NOTIFY_BIND_DRIVER) so that it is certain that its IOMMU group and domain are set-up accordingly at the time the notifier is triggered. This patch changes the notifier action upon which dma_ops are attached to devices and defer it to driver binding time, so that IOMMU devices have a chance to be probed and to register their bus notifiers before the dma_ops attach sequence for a device is actually carried out. As a result we also no longer need worry about racing with iommu_bus_notifier(), or about retrying the queue in case devices were added too early on DT-based systems, so clean up the notifier itself plus the additional workaround from 722ec35f7fae ("arm64: dma-mapping: fix handling of devices registered before arch_initcall") Acked-by: Will Deacon Cc: Marek Szyprowski Signed-off-by: Lorenzo Pieralisi [rm: get rid of other now-redundant bits] Signed-off-by: Robin Murphy Signed-off-by: Catalin Marinas --- arch/arm64/mm/dma-mapping.c | 22 +++++----------------- 1 file changed, 5 insertions(+), 17 deletions(-) (limited to 'arch') diff --git a/arch/arm64/mm/dma-mapping.c b/arch/arm64/mm/dma-mapping.c index 46a4157adc17..f6c55afab3e2 100644 --- a/arch/arm64/mm/dma-mapping.c +++ b/arch/arm64/mm/dma-mapping.c @@ -861,15 +861,16 @@ static int __iommu_attach_notifier(struct notifier_block *nb, { struct iommu_dma_notifier_data *master, *tmp; - if (action != BUS_NOTIFY_ADD_DEVICE) + if (action != BUS_NOTIFY_BIND_DRIVER) return 0; mutex_lock(&iommu_dma_notifier_lock); list_for_each_entry_safe(master, tmp, &iommu_dma_masters, list) { - if (do_iommu_attach(master->dev, master->ops, - master->dma_base, master->size)) { + if (data == master->dev && do_iommu_attach(master->dev, + master->ops, master->dma_base, master->size)) { list_del(&master->list); kfree(master); + break; } } mutex_unlock(&iommu_dma_notifier_lock); @@ -883,17 +884,8 @@ static int __init register_iommu_dma_ops_notifier(struct bus_type *bus) if (!nb) return -ENOMEM; - /* - * The device must be attached to a domain before the driver probe - * routine gets a chance to start allocating DMA buffers. However, - * the IOMMU driver also needs a chance to configure the iommu_group - * via its add_device callback first, so we need to make the attach - * happen between those two points. Since the IOMMU core uses a bus - * notifier with default priority for add_device, do the same but - * with a lower priority to ensure the appropriate ordering. - */ + nb->notifier_call = __iommu_attach_notifier; - nb->priority = -100; ret = bus_register_notifier(bus, nb); if (ret) { @@ -917,10 +909,6 @@ static int __init __iommu_dma_init(void) if (!ret) ret = register_iommu_dma_ops_notifier(&pci_bus_type); #endif - - /* handle devices queued before this arch_initcall */ - if (!ret) - __iommu_attach_notifier(NULL, BUS_NOTIFY_ADD_DEVICE, NULL); return ret; } arch_initcall(__iommu_dma_init); -- cgit v1.2.3-59-g8ed1b From 7d9a7086319daf826ebf06c1abe31728823e1240 Mon Sep 17 00:00:00 2001 From: Catalin Marinas Date: Mon, 11 Jul 2016 17:04:13 +0100 Subject: Revert "arm64: Fix vdso-offsets.h dependency" This reverts commit 90f777beb788d08300f4a1482cb4fd37a401b472. While this commit was aimed at fixing the dependencies, with a large make -j the vdso-offsets.h file is not generated, leading to build failures. Signed-off-by: Catalin Marinas --- arch/arm64/kernel/Makefile | 7 +++---- arch/arm64/kernel/vdso/Makefile | 7 ++++--- 2 files changed, 7 insertions(+), 7 deletions(-) (limited to 'arch') diff --git a/arch/arm64/kernel/Makefile b/arch/arm64/kernel/Makefile index efffc231a244..7700c0c23962 100644 --- a/arch/arm64/kernel/Makefile +++ b/arch/arm64/kernel/Makefile @@ -54,7 +54,6 @@ obj-m += $(arm64-obj-m) head-y := head.o extra-y += $(head-y) vmlinux.lds -# Check that the vDSO symbol offsets header file is up to date and re-generate -# it if necessary. -$(objtree)/include/generated/vdso-offsets.h: FORCE - $(Q)$(MAKE) $(build)=$(obj)/vdso $@ +# vDSO - this must be built first to generate the symbol offsets +$(call objectify,$(arm64-obj-y)): $(obj)/vdso/vdso-offsets.h +$(obj)/vdso/vdso-offsets.h: $(obj)/vdso diff --git a/arch/arm64/kernel/vdso/Makefile b/arch/arm64/kernel/vdso/Makefile index 285433720202..b467fd0a384b 100644 --- a/arch/arm64/kernel/vdso/Makefile +++ b/arch/arm64/kernel/vdso/Makefile @@ -23,7 +23,7 @@ GCOV_PROFILE := n ccflags-y += -Wl,-shared obj-y += vdso.o -extra-y += vdso.lds +extra-y += vdso.lds vdso-offsets.h CPPFLAGS_vdso.lds += -P -C -U$(ARCH) # Force dependency (incbin is bad) @@ -42,10 +42,11 @@ $(obj)/%.so: $(obj)/%.so.dbg FORCE gen-vdsosym := $(srctree)/$(src)/gen_vdso_offsets.sh quiet_cmd_vdsosym = VDSOSYM $@ define cmd_vdsosym - $(NM) $< | $(gen-vdsosym) | LC_ALL=C sort > $@ + $(NM) $< | $(gen-vdsosym) | LC_ALL=C sort > $@ && \ + cp $@ include/generated/ endef -$(objtree)/include/generated/vdso-offsets.h: $(obj)/vdso.so.dbg FORCE +$(obj)/vdso-offsets.h: $(obj)/vdso.so.dbg FORCE $(call if_changed,vdsosym) # Assembly rules for the .S files -- cgit v1.2.3-59-g8ed1b From a66649dab35033bd67988670fa60c268b0444cda Mon Sep 17 00:00:00 2001 From: Kevin Brodsky Date: Thu, 12 May 2016 17:39:15 +0100 Subject: arm64: fix vdso-offsets.h dependency arm64/kernel/{vdso,signal}.c include vdso-offsets.h, as well as any file that includes asm/vdso.h. Therefore, vdso-offsets.h must be generated before these files are compiled. The current rules in arm64/kernel/Makefile do not actually enforce this, because even though $(obj)/vdso is listed as a prerequisite for vdso-offsets.h, this does not result in the intended effect of building the vdso subdirectory (before all the other objects). As a consequence, depending on the order in which the rules are followed, vdso-offsets.h is updated or not before arm64/kernel/{vdso,signal}.o are built. The current rules also impose an unnecessary dependency on vdso-offsets.h for all arm64/kernel/*.o, resulting in unnecessary rebuilds. This is made obvious when using make -j: touch arch/arm64/kernel/vdso/gettimeofday.S && make -j$NCPUS arch/arm64/kernel will sometimes result in none of arm64/kernel/*.o being rebuilt, sometimes all of them, or even just some of them. It is quite difficult to ensure that a header is generated before it is used with recursive Makefiles by using normal rules. Instead, arch-specific generated headers are normally built in the archprepare recipe in the arch Makefile (see for instance arch/ia64/Makefile). Unfortunately, asm-offsets.h is included in gettimeofday.S, and must therefore be generated before vdso-offsets.h, which is not the case if archprepare is used. For this reason, a rule run after archprepare has to be used. This commit adds rules in arm64/Makefile to build vdso-offsets.h during the prepare step, ensuring that vdso-offsets.h is generated before building anything. It also removes the now-unnecessary dependencies on vdso-offsets.h in arm64/kernel/Makefile. Finally, it removes the duplication of asm-offsets.h between arm64/kernel/vdso/ and include/generated/ and makes include/generated/vdso-offsets.h a target in arm64/kernel/vdso/Makefile. Cc: Will Deacon Cc: Michal Marek Signed-off-by: Kevin Brodsky Signed-off-by: Catalin Marinas --- arch/arm64/Makefile | 10 ++++++++++ arch/arm64/kernel/Makefile | 4 ---- arch/arm64/kernel/vdso/Makefile | 7 +++---- 3 files changed, 13 insertions(+), 8 deletions(-) (limited to 'arch') diff --git a/arch/arm64/Makefile b/arch/arm64/Makefile index 7085e322dc42..4b6f84bf2ab9 100644 --- a/arch/arm64/Makefile +++ b/arch/arm64/Makefile @@ -121,6 +121,16 @@ archclean: $(Q)$(MAKE) $(clean)=$(boot) $(Q)$(MAKE) $(clean)=$(boot)/dts +# We need to generate vdso-offsets.h before compiling certain files in kernel/. +# In order to do that, we should use the archprepare target, but we can't since +# asm-offsets.h is included in some files used to generate vdso-offsets.h, and +# asm-offsets.h is built in prepare0, for which archprepare is a dependency. +# Therefore we need to generate the header after prepare0 has been made, hence +# this hack. +prepare: vdso_prepare +vdso_prepare: prepare0 + $(Q)$(MAKE) $(build)=arch/arm64/kernel/vdso include/generated/vdso-offsets.h + define archhelp echo '* Image.gz - Compressed kernel image (arch/$(ARCH)/boot/Image.gz)' echo ' Image - Uncompressed kernel image (arch/$(ARCH)/boot/Image)' diff --git a/arch/arm64/kernel/Makefile b/arch/arm64/kernel/Makefile index 7700c0c23962..9ef8ec681e30 100644 --- a/arch/arm64/kernel/Makefile +++ b/arch/arm64/kernel/Makefile @@ -53,7 +53,3 @@ obj-y += $(arm64-obj-y) vdso/ obj-m += $(arm64-obj-m) head-y := head.o extra-y += $(head-y) vmlinux.lds - -# vDSO - this must be built first to generate the symbol offsets -$(call objectify,$(arm64-obj-y)): $(obj)/vdso/vdso-offsets.h -$(obj)/vdso/vdso-offsets.h: $(obj)/vdso diff --git a/arch/arm64/kernel/vdso/Makefile b/arch/arm64/kernel/vdso/Makefile index b467fd0a384b..62c84f7cb01b 100644 --- a/arch/arm64/kernel/vdso/Makefile +++ b/arch/arm64/kernel/vdso/Makefile @@ -23,7 +23,7 @@ GCOV_PROFILE := n ccflags-y += -Wl,-shared obj-y += vdso.o -extra-y += vdso.lds vdso-offsets.h +extra-y += vdso.lds CPPFLAGS_vdso.lds += -P -C -U$(ARCH) # Force dependency (incbin is bad) @@ -42,11 +42,10 @@ $(obj)/%.so: $(obj)/%.so.dbg FORCE gen-vdsosym := $(srctree)/$(src)/gen_vdso_offsets.sh quiet_cmd_vdsosym = VDSOSYM $@ define cmd_vdsosym - $(NM) $< | $(gen-vdsosym) | LC_ALL=C sort > $@ && \ - cp $@ include/generated/ + $(NM) $< | $(gen-vdsosym) | LC_ALL=C sort > $@ endef -$(obj)/vdso-offsets.h: $(obj)/vdso.so.dbg FORCE +include/generated/vdso-offsets.h: $(obj)/vdso.so.dbg FORCE $(call if_changed,vdsosym) # Assembly rules for the .S files -- cgit v1.2.3-59-g8ed1b From b33f491f5a9aaf171b7de0f905362eb0314af478 Mon Sep 17 00:00:00 2001 From: Kevin Brodsky Date: Tue, 12 Jul 2016 11:23:59 +0100 Subject: arm64: Refactor vDSO time functions Time functions are directly implemented in assembly in arm64, and it is desirable to keep it this way for performance reasons (everything fits in registers, so that the stack is not used at all). However, the current implementation is quite difficult to read and understand (even considering it's assembly). Additionally, due to the structure of __kernel_clock_gettime, which heavily uses conditional branches to share code between the different clocks, it is difficult to support a new clock without making the branches even harder to follow. This commit completely refactors the structure of clock_gettime (and gettimeofday along the way) while keeping exactly the same algorithms. We no longer try to share code; instead, macros provide common operations. This new approach comes with a number of advantages: - In clock_gettime, clock implementations are no longer interspersed, making them much more readable. Additionally, macros only use registers passed as arguments or reserved with .req, this way it is easy to make sure that registers are properly allocated. To avoid a large number of branches in a given execution path, a jump table is used; a normal execution uses 3 unconditional branches. - __do_get_tspec has been replaced with 2 macros (get_ts_clock_mono, get_clock_shifted_nsec) and explicit loading of data from the vDSO page. Consequently, clock_gettime and gettimeofday are now leaf functions, and saving x30 (lr) is no longer necessary. - Variables protected by tb_seq_count are now loaded all at once, allowing to merge the seqcnt_read macro into seqcnt_check. - For CLOCK_REALTIME_COARSE, removed an unused load of the wall to monotonic timespec. - For CLOCK_MONOTONIC_COARSE, removed a few shift instructions. Obviously, the downside of sharing less code is an increase in code size. However since the vDSO has its own code page, this does not really matter, as long as the size of the DSO remains below 4 kB. For now this should be all right: Before After vdso.so size (B) 2776 3000 Signed-off-by: Kevin Brodsky Reviewed-by: Dave Martin Acked-by: Will Deacon Signed-off-by: Catalin Marinas --- arch/arm64/kernel/vdso/gettimeofday.S | 298 ++++++++++++++++++++-------------- 1 file changed, 172 insertions(+), 126 deletions(-) (limited to 'arch') diff --git a/arch/arm64/kernel/vdso/gettimeofday.S b/arch/arm64/kernel/vdso/gettimeofday.S index efa79e8d4196..c06919ee29e1 100644 --- a/arch/arm64/kernel/vdso/gettimeofday.S +++ b/arch/arm64/kernel/vdso/gettimeofday.S @@ -26,24 +26,100 @@ #define NSEC_PER_SEC_HI16 0x3b9a vdso_data .req x6 -use_syscall .req w7 -seqcnt .req w8 +seqcnt .req w7 +w_tmp .req w8 +x_tmp .req x8 + +/* + * Conventions for macro arguments: + * - An argument is write-only if its name starts with "res". + * - All other arguments are read-only, unless otherwise specified. + */ .macro seqcnt_acquire 9999: ldr seqcnt, [vdso_data, #VDSO_TB_SEQ_COUNT] tbnz seqcnt, #0, 9999b dmb ishld - ldr use_syscall, [vdso_data, #VDSO_USE_SYSCALL] .endm - .macro seqcnt_read, cnt + .macro seqcnt_check fail dmb ishld - ldr \cnt, [vdso_data, #VDSO_TB_SEQ_COUNT] + ldr w_tmp, [vdso_data, #VDSO_TB_SEQ_COUNT] + cmp w_tmp, seqcnt + b.ne \fail .endm - .macro seqcnt_check, cnt, fail - cmp \cnt, seqcnt - b.ne \fail + .macro syscall_check fail + ldr w_tmp, [vdso_data, #VDSO_USE_SYSCALL] + cbnz w_tmp, \fail + .endm + + .macro get_nsec_per_sec res + mov \res, #NSEC_PER_SEC_LO16 + movk \res, #NSEC_PER_SEC_HI16, lsl #16 + .endm + + /* + * Returns the clock delta, in nanoseconds left-shifted by the clock + * shift. + */ + .macro get_clock_shifted_nsec res, cycle_last, mult + /* Read the virtual counter. */ + isb + mrs x_tmp, cntvct_el0 + /* Calculate cycle delta and convert to ns. */ + sub \res, x_tmp, \cycle_last + /* We can only guarantee 56 bits of precision. */ + movn x_tmp, #0xff00, lsl #48 + and \res, x_tmp, \res + mul \res, \res, \mult + .endm + + /* + * Returns in res_{sec,nsec} the REALTIME timespec, based on the + * "wall time" (xtime) and the clock_mono delta. + */ + .macro get_ts_realtime res_sec, res_nsec, \ + clock_nsec, xtime_sec, xtime_nsec, nsec_to_sec + add \res_nsec, \clock_nsec, \xtime_nsec + udiv x_tmp, \res_nsec, \nsec_to_sec + add \res_sec, \xtime_sec, x_tmp + msub \res_nsec, x_tmp, \nsec_to_sec, \res_nsec + .endm + + /* sec and nsec are modified in place. */ + .macro add_ts sec, nsec, ts_sec, ts_nsec, nsec_to_sec + /* Add timespec. */ + add \sec, \sec, \ts_sec + add \nsec, \nsec, \ts_nsec + + /* Normalise the new timespec. */ + cmp \nsec, \nsec_to_sec + b.lt 9999f + sub \nsec, \nsec, \nsec_to_sec + add \sec, \sec, #1 +9999: + cmp \nsec, #0 + b.ge 9998f + add \nsec, \nsec, \nsec_to_sec + sub \sec, \sec, #1 +9998: + .endm + + .macro clock_gettime_return, shift=0 + .if \shift == 1 + lsr x11, x11, x12 + .endif + stp x10, x11, [x1, #TSPEC_TV_SEC] + mov x0, xzr + ret + .endm + + .macro jump_slot jumptable, index, label + .if (. - \jumptable) != 4 * (\index) + .error "Jump slot index mismatch" + .endif + b \label .endm .text @@ -51,18 +127,24 @@ seqcnt .req w8 /* int __kernel_gettimeofday(struct timeval *tv, struct timezone *tz); */ ENTRY(__kernel_gettimeofday) .cfi_startproc - mov x2, x30 - .cfi_register x30, x2 - - /* Acquire the sequence counter and get the timespec. */ adr vdso_data, _vdso_data -1: seqcnt_acquire - cbnz use_syscall, 4f - /* If tv is NULL, skip to the timezone code. */ cbz x0, 2f - bl __do_get_tspec - seqcnt_check w9, 1b + + /* Compute the time of day. */ +1: seqcnt_acquire + syscall_check fail=4f + ldr x10, [vdso_data, #VDSO_CS_CYCLE_LAST] + ldp w11, w12, [vdso_data, #VDSO_CS_MULT] + ldp x13, x14, [vdso_data, #VDSO_XTIME_CLK_SEC] + seqcnt_check fail=1b + + get_nsec_per_sec res=x9 + lsl x9, x9, x12 + + get_clock_shifted_nsec res=x15, cycle_last=x10, mult=x11 + get_ts_realtime res_sec=x10, res_nsec=x11, \ + clock_nsec=x15, xtime_sec=x13, xtime_nsec=x14, nsec_to_sec=x9 /* Convert ns to us. */ mov x13, #1000 @@ -76,95 +158,102 @@ ENTRY(__kernel_gettimeofday) stp w4, w5, [x1, #TZ_MINWEST] 3: mov x0, xzr - ret x2 + ret 4: /* Syscall fallback. */ mov x8, #__NR_gettimeofday svc #0 - ret x2 + ret .cfi_endproc ENDPROC(__kernel_gettimeofday) +#define JUMPSLOT_MAX CLOCK_MONOTONIC_COARSE + /* int __kernel_clock_gettime(clockid_t clock_id, struct timespec *tp); */ ENTRY(__kernel_clock_gettime) .cfi_startproc - cmp w0, #CLOCK_REALTIME - ccmp w0, #CLOCK_MONOTONIC, #0x4, ne - b.ne 2f - - mov x2, x30 - .cfi_register x30, x2 - - /* Get kernel timespec. */ + cmp w0, #JUMPSLOT_MAX + b.hi syscall adr vdso_data, _vdso_data -1: seqcnt_acquire - cbnz use_syscall, 7f + adr x_tmp, jumptable + add x_tmp, x_tmp, w0, uxtw #2 + br x_tmp + + ALIGN +jumptable: + jump_slot jumptable, CLOCK_REALTIME, realtime + jump_slot jumptable, CLOCK_MONOTONIC, monotonic + b syscall + b syscall + b syscall + jump_slot jumptable, CLOCK_REALTIME_COARSE, realtime_coarse + jump_slot jumptable, CLOCK_MONOTONIC_COARSE, monotonic_coarse + + .if (. - jumptable) != 4 * (JUMPSLOT_MAX + 1) + .error "Wrong jumptable size" + .endif + + ALIGN +realtime: + seqcnt_acquire + syscall_check fail=syscall + ldr x10, [vdso_data, #VDSO_CS_CYCLE_LAST] + ldp w11, w12, [vdso_data, #VDSO_CS_MULT] + ldp x13, x14, [vdso_data, #VDSO_XTIME_CLK_SEC] + seqcnt_check fail=realtime - bl __do_get_tspec - seqcnt_check w9, 1b + /* All computations are done with left-shifted nsecs. */ + get_nsec_per_sec res=x9 + lsl x9, x9, x12 - mov x30, x2 + get_clock_shifted_nsec res=x15, cycle_last=x10, mult=x11 + get_ts_realtime res_sec=x10, res_nsec=x11, \ + clock_nsec=x15, xtime_sec=x13, xtime_nsec=x14, nsec_to_sec=x9 + clock_gettime_return, shift=1 - cmp w0, #CLOCK_MONOTONIC - b.ne 6f + ALIGN +monotonic: + seqcnt_acquire + syscall_check fail=syscall + ldr x10, [vdso_data, #VDSO_CS_CYCLE_LAST] + ldp w11, w12, [vdso_data, #VDSO_CS_MULT] + ldp x13, x14, [vdso_data, #VDSO_XTIME_CLK_SEC] + ldp x3, x4, [vdso_data, #VDSO_WTM_CLK_SEC] + seqcnt_check fail=monotonic - /* Get wtm timespec. */ - ldp x13, x14, [vdso_data, #VDSO_WTM_CLK_SEC] + /* All computations are done with left-shifted nsecs. */ + lsl x4, x4, x12 + get_nsec_per_sec res=x9 + lsl x9, x9, x12 - /* Check the sequence counter. */ - seqcnt_read w9 - seqcnt_check w9, 1b - b 4f -2: - cmp w0, #CLOCK_REALTIME_COARSE - ccmp w0, #CLOCK_MONOTONIC_COARSE, #0x4, ne - b.ne 8f + get_clock_shifted_nsec res=x15, cycle_last=x10, mult=x11 + get_ts_realtime res_sec=x10, res_nsec=x11, \ + clock_nsec=x15, xtime_sec=x13, xtime_nsec=x14, nsec_to_sec=x9 - /* xtime_coarse_nsec is already right-shifted */ - mov x12, #0 + add_ts sec=x10, nsec=x11, ts_sec=x3, ts_nsec=x4, nsec_to_sec=x9 + clock_gettime_return, shift=1 - /* Get coarse timespec. */ - adr vdso_data, _vdso_data -3: seqcnt_acquire + ALIGN +realtime_coarse: + seqcnt_acquire ldp x10, x11, [vdso_data, #VDSO_XTIME_CRS_SEC] + seqcnt_check fail=realtime_coarse + clock_gettime_return - /* Get wtm timespec. */ + ALIGN +monotonic_coarse: + seqcnt_acquire + ldp x10, x11, [vdso_data, #VDSO_XTIME_CRS_SEC] ldp x13, x14, [vdso_data, #VDSO_WTM_CLK_SEC] + seqcnt_check fail=monotonic_coarse - /* Check the sequence counter. */ - seqcnt_read w9 - seqcnt_check w9, 3b - - cmp w0, #CLOCK_MONOTONIC_COARSE - b.ne 6f -4: - /* Add on wtm timespec. */ - add x10, x10, x13 - lsl x14, x14, x12 - add x11, x11, x14 + /* Computations are done in (non-shifted) nsecs. */ + get_nsec_per_sec res=x9 + add_ts sec=x10, nsec=x11, ts_sec=x13, ts_nsec=x14, nsec_to_sec=x9 + clock_gettime_return - /* Normalise the new timespec. */ - mov x15, #NSEC_PER_SEC_LO16 - movk x15, #NSEC_PER_SEC_HI16, lsl #16 - lsl x15, x15, x12 - cmp x11, x15 - b.lt 5f - sub x11, x11, x15 - add x10, x10, #1 -5: - cmp x11, #0 - b.ge 6f - add x11, x11, x15 - sub x10, x10, #1 - -6: /* Store to the user timespec. */ - lsr x11, x11, x12 - stp x10, x11, [x1, #TSPEC_TV_SEC] - mov x0, xzr - ret -7: - mov x30, x2 -8: /* Syscall fallback. */ + ALIGN +syscall: /* Syscall fallback. */ mov x8, #__NR_clock_gettime svc #0 ret @@ -203,46 +292,3 @@ ENTRY(__kernel_clock_getres) .quad CLOCK_COARSE_RES .cfi_endproc ENDPROC(__kernel_clock_getres) - -/* - * Read the current time from the architected counter. - * Expects vdso_data to be initialised. - * Clobbers the temporary registers (x9 - x15). - * Returns: - * - w9 = vDSO sequence counter - * - (x10, x11) = (ts->tv_sec, shifted ts->tv_nsec) - * - w12 = cs_shift - */ -ENTRY(__do_get_tspec) - .cfi_startproc - - /* Read from the vDSO data page. */ - ldr x10, [vdso_data, #VDSO_CS_CYCLE_LAST] - ldp x13, x14, [vdso_data, #VDSO_XTIME_CLK_SEC] - ldp w11, w12, [vdso_data, #VDSO_CS_MULT] - seqcnt_read w9 - - /* Read the virtual counter. */ - isb - mrs x15, cntvct_el0 - - /* Calculate cycle delta and convert to ns. */ - sub x10, x15, x10 - /* We can only guarantee 56 bits of precision. */ - movn x15, #0xff00, lsl #48 - and x10, x15, x10 - mul x10, x10, x11 - - /* Use the kernel time to calculate the new timespec. */ - mov x11, #NSEC_PER_SEC_LO16 - movk x11, #NSEC_PER_SEC_HI16, lsl #16 - lsl x11, x11, x12 - add x15, x10, x14 - udiv x14, x15, x11 - add x10, x13, x14 - mul x13, x14, x11 - sub x11, x15, x13 - - ret - .cfi_endproc -ENDPROC(__do_get_tspec) -- cgit v1.2.3-59-g8ed1b From 49eea433b326a0ac5c7c941a011b2c65990bd19b Mon Sep 17 00:00:00 2001 From: Kevin Brodsky Date: Tue, 12 Jul 2016 11:24:00 +0100 Subject: arm64: Add support for CLOCK_MONOTONIC_RAW in clock_gettime() vDSO So far the arm64 clock_gettime() vDSO implementation only supported the following clocks, falling back to the syscall for the others: - CLOCK_REALTIME{,_COARSE} - CLOCK_MONOTONIC{,_COARSE} This patch adds support for the CLOCK_MONOTONIC_RAW clock, taking advantage of the recent refactoring of the vDSO time functions. Like the non-_COARSE clocks, this only works when the "arch_sys_counter" clocksource is in use (allowing us to read the current time from the virtual counter register), otherwise we also have to fall back to the syscall. Most of the data is shared with CLOCK_MONOTONIC, and the algorithm is similar. The reference implementation in kernel/time/timekeeping.c shows that: - CLOCK_MONOTONIC = tk->wall_to_monotonic + tk->xtime_sec + timekeeping_get_ns(&tk->tkr_mono) - CLOCK_MONOTONIC_RAW = tk->raw_time + timekeeping_get_ns(&tk->tkr_raw) - tkr_mono and tkr_raw are identical (in particular, same clocksource), except these members: * mult (only mono's multiplier is NTP-adjusted) * xtime_nsec (always 0 for raw) Therefore, tk->raw_time and tkr_raw->mult are now also stored in the vDSO data page. Cc: Ali Saidi Signed-off-by: Kevin Brodsky Reviewed-by: Dave Martin Acked-by: Will Deacon Signed-off-by: Catalin Marinas --- arch/arm64/include/asm/vdso_datapage.h | 8 +++-- arch/arm64/kernel/asm-offsets.c | 6 +++- arch/arm64/kernel/vdso.c | 8 ++++- arch/arm64/kernel/vdso/gettimeofday.S | 57 +++++++++++++++++++++++++++------- 4 files changed, 64 insertions(+), 15 deletions(-) (limited to 'arch') diff --git a/arch/arm64/include/asm/vdso_datapage.h b/arch/arm64/include/asm/vdso_datapage.h index de66199673d7..2b9a63771eda 100644 --- a/arch/arm64/include/asm/vdso_datapage.h +++ b/arch/arm64/include/asm/vdso_datapage.h @@ -22,6 +22,8 @@ struct vdso_data { __u64 cs_cycle_last; /* Timebase at clocksource init */ + __u64 raw_time_sec; /* Raw time */ + __u64 raw_time_nsec; __u64 xtime_clock_sec; /* Kernel time */ __u64 xtime_clock_nsec; __u64 xtime_coarse_sec; /* Coarse time */ @@ -29,8 +31,10 @@ struct vdso_data { __u64 wtm_clock_sec; /* Wall to monotonic time */ __u64 wtm_clock_nsec; __u32 tb_seq_count; /* Timebase sequence counter */ - __u32 cs_mult; /* Clocksource multiplier */ - __u32 cs_shift; /* Clocksource shift */ + /* cs_* members must be adjacent and in this order (ldp accesses) */ + __u32 cs_mono_mult; /* NTP-adjusted clocksource multiplier */ + __u32 cs_shift; /* Clocksource shift (mono = raw) */ + __u32 cs_raw_mult; /* Raw clocksource multiplier */ __u32 tz_minuteswest; /* Whacky timezone stuff */ __u32 tz_dsttime; __u32 use_syscall; diff --git a/arch/arm64/kernel/asm-offsets.c b/arch/arm64/kernel/asm-offsets.c index f8e5d47f0880..334a1cd7a942 100644 --- a/arch/arm64/kernel/asm-offsets.c +++ b/arch/arm64/kernel/asm-offsets.c @@ -77,6 +77,7 @@ int main(void) BLANK(); DEFINE(CLOCK_REALTIME, CLOCK_REALTIME); DEFINE(CLOCK_MONOTONIC, CLOCK_MONOTONIC); + DEFINE(CLOCK_MONOTONIC_RAW, CLOCK_MONOTONIC_RAW); DEFINE(CLOCK_REALTIME_RES, MONOTONIC_RES_NSEC); DEFINE(CLOCK_REALTIME_COARSE, CLOCK_REALTIME_COARSE); DEFINE(CLOCK_MONOTONIC_COARSE,CLOCK_MONOTONIC_COARSE); @@ -84,6 +85,8 @@ int main(void) DEFINE(NSEC_PER_SEC, NSEC_PER_SEC); BLANK(); DEFINE(VDSO_CS_CYCLE_LAST, offsetof(struct vdso_data, cs_cycle_last)); + DEFINE(VDSO_RAW_TIME_SEC, offsetof(struct vdso_data, raw_time_sec)); + DEFINE(VDSO_RAW_TIME_NSEC, offsetof(struct vdso_data, raw_time_nsec)); DEFINE(VDSO_XTIME_CLK_SEC, offsetof(struct vdso_data, xtime_clock_sec)); DEFINE(VDSO_XTIME_CLK_NSEC, offsetof(struct vdso_data, xtime_clock_nsec)); DEFINE(VDSO_XTIME_CRS_SEC, offsetof(struct vdso_data, xtime_coarse_sec)); @@ -91,7 +94,8 @@ int main(void) DEFINE(VDSO_WTM_CLK_SEC, offsetof(struct vdso_data, wtm_clock_sec)); DEFINE(VDSO_WTM_CLK_NSEC, offsetof(struct vdso_data, wtm_clock_nsec)); DEFINE(VDSO_TB_SEQ_COUNT, offsetof(struct vdso_data, tb_seq_count)); - DEFINE(VDSO_CS_MULT, offsetof(struct vdso_data, cs_mult)); + DEFINE(VDSO_CS_MONO_MULT, offsetof(struct vdso_data, cs_mono_mult)); + DEFINE(VDSO_CS_RAW_MULT, offsetof(struct vdso_data, cs_raw_mult)); DEFINE(VDSO_CS_SHIFT, offsetof(struct vdso_data, cs_shift)); DEFINE(VDSO_TZ_MINWEST, offsetof(struct vdso_data, tz_minuteswest)); DEFINE(VDSO_TZ_DSTTIME, offsetof(struct vdso_data, tz_dsttime)); diff --git a/arch/arm64/kernel/vdso.c b/arch/arm64/kernel/vdso.c index 9fefb005812a..076312b17d4f 100644 --- a/arch/arm64/kernel/vdso.c +++ b/arch/arm64/kernel/vdso.c @@ -214,10 +214,16 @@ void update_vsyscall(struct timekeeper *tk) vdso_data->wtm_clock_nsec = tk->wall_to_monotonic.tv_nsec; if (!use_syscall) { + /* tkr_mono.cycle_last == tkr_raw.cycle_last */ vdso_data->cs_cycle_last = tk->tkr_mono.cycle_last; + vdso_data->raw_time_sec = tk->raw_time.tv_sec; + vdso_data->raw_time_nsec = tk->raw_time.tv_nsec; vdso_data->xtime_clock_sec = tk->xtime_sec; vdso_data->xtime_clock_nsec = tk->tkr_mono.xtime_nsec; - vdso_data->cs_mult = tk->tkr_mono.mult; + /* tkr_raw.xtime_nsec == 0 */ + vdso_data->cs_mono_mult = tk->tkr_mono.mult; + vdso_data->cs_raw_mult = tk->tkr_raw.mult; + /* tkr_mono.shift == tkr_raw.shift */ vdso_data->cs_shift = tk->tkr_mono.shift; } diff --git a/arch/arm64/kernel/vdso/gettimeofday.S b/arch/arm64/kernel/vdso/gettimeofday.S index c06919ee29e1..e00b4671bd7c 100644 --- a/arch/arm64/kernel/vdso/gettimeofday.S +++ b/arch/arm64/kernel/vdso/gettimeofday.S @@ -87,6 +87,15 @@ x_tmp .req x8 msub \res_nsec, x_tmp, \nsec_to_sec, \res_nsec .endm + /* + * Returns in res_{sec,nsec} the timespec based on the clock_raw delta, + * used for CLOCK_MONOTONIC_RAW. + */ + .macro get_ts_clock_raw res_sec, res_nsec, clock_nsec, nsec_to_sec + udiv \res_sec, \clock_nsec, \nsec_to_sec + msub \res_nsec, \res_sec, \nsec_to_sec, \clock_nsec + .endm + /* sec and nsec are modified in place. */ .macro add_ts sec, nsec, ts_sec, ts_nsec, nsec_to_sec /* Add timespec. */ @@ -135,7 +144,8 @@ ENTRY(__kernel_gettimeofday) 1: seqcnt_acquire syscall_check fail=4f ldr x10, [vdso_data, #VDSO_CS_CYCLE_LAST] - ldp w11, w12, [vdso_data, #VDSO_CS_MULT] + /* w11 = cs_mono_mult, w12 = cs_shift */ + ldp w11, w12, [vdso_data, #VDSO_CS_MONO_MULT] ldp x13, x14, [vdso_data, #VDSO_XTIME_CLK_SEC] seqcnt_check fail=1b @@ -172,20 +182,20 @@ ENDPROC(__kernel_gettimeofday) /* int __kernel_clock_gettime(clockid_t clock_id, struct timespec *tp); */ ENTRY(__kernel_clock_gettime) .cfi_startproc - cmp w0, #JUMPSLOT_MAX - b.hi syscall + cmp w0, #JUMPSLOT_MAX + b.hi syscall adr vdso_data, _vdso_data - adr x_tmp, jumptable - add x_tmp, x_tmp, w0, uxtw #2 - br x_tmp + adr x_tmp, jumptable + add x_tmp, x_tmp, w0, uxtw #2 + br x_tmp ALIGN jumptable: jump_slot jumptable, CLOCK_REALTIME, realtime jump_slot jumptable, CLOCK_MONOTONIC, monotonic - b syscall - b syscall - b syscall + b syscall + b syscall + jump_slot jumptable, CLOCK_MONOTONIC_RAW, monotonic_raw jump_slot jumptable, CLOCK_REALTIME_COARSE, realtime_coarse jump_slot jumptable, CLOCK_MONOTONIC_COARSE, monotonic_coarse @@ -198,7 +208,8 @@ realtime: seqcnt_acquire syscall_check fail=syscall ldr x10, [vdso_data, #VDSO_CS_CYCLE_LAST] - ldp w11, w12, [vdso_data, #VDSO_CS_MULT] + /* w11 = cs_mono_mult, w12 = cs_shift */ + ldp w11, w12, [vdso_data, #VDSO_CS_MONO_MULT] ldp x13, x14, [vdso_data, #VDSO_XTIME_CLK_SEC] seqcnt_check fail=realtime @@ -216,7 +227,8 @@ monotonic: seqcnt_acquire syscall_check fail=syscall ldr x10, [vdso_data, #VDSO_CS_CYCLE_LAST] - ldp w11, w12, [vdso_data, #VDSO_CS_MULT] + /* w11 = cs_mono_mult, w12 = cs_shift */ + ldp w11, w12, [vdso_data, #VDSO_CS_MONO_MULT] ldp x13, x14, [vdso_data, #VDSO_XTIME_CLK_SEC] ldp x3, x4, [vdso_data, #VDSO_WTM_CLK_SEC] seqcnt_check fail=monotonic @@ -233,6 +245,28 @@ monotonic: add_ts sec=x10, nsec=x11, ts_sec=x3, ts_nsec=x4, nsec_to_sec=x9 clock_gettime_return, shift=1 + ALIGN +monotonic_raw: + seqcnt_acquire + syscall_check fail=syscall + ldr x10, [vdso_data, #VDSO_CS_CYCLE_LAST] + /* w11 = cs_raw_mult, w12 = cs_shift */ + ldp w12, w11, [vdso_data, #VDSO_CS_SHIFT] + ldp x13, x14, [vdso_data, #VDSO_RAW_TIME_SEC] + seqcnt_check fail=monotonic_raw + + /* All computations are done with left-shifted nsecs. */ + lsl x14, x14, x12 + get_nsec_per_sec res=x9 + lsl x9, x9, x12 + + get_clock_shifted_nsec res=x15, cycle_last=x10, mult=x11 + get_ts_clock_raw res_sec=x10, res_nsec=x11, \ + clock_nsec=x15, nsec_to_sec=x9 + + add_ts sec=x10, nsec=x11, ts_sec=x13, ts_nsec=x14, nsec_to_sec=x9 + clock_gettime_return, shift=1 + ALIGN realtime_coarse: seqcnt_acquire @@ -265,6 +299,7 @@ ENTRY(__kernel_clock_getres) .cfi_startproc cmp w0, #CLOCK_REALTIME ccmp w0, #CLOCK_MONOTONIC, #0x4, ne + ccmp w0, #CLOCK_MONOTONIC_RAW, #0x4, ne b.ne 1f ldr x2, 5f -- cgit v1.2.3-59-g8ed1b From f8d9f924526ad3c983569437f60e3833d6c55578 Mon Sep 17 00:00:00 2001 From: Steve Capper Date: Fri, 8 Jul 2016 16:01:13 +0100 Subject: arm64: cpuinfo: Expose MIDR_EL1 and REVIDR_EL1 to sysfs It can be useful for JIT software to be aware of MIDR_EL1 and REVIDR_EL1 to ascertain the presence of any core errata that could affect code generation. This patch exposes these registers through sysfs: /sys/devices/system/cpu/cpu$ID/regs/identification/midr_el1 /sys/devices/system/cpu/cpu$ID/regs/identification/revidr_el1 where $ID is the cpu number. For big.LITTLE systems, one can have a mixture of cores (e.g. Cortex A53 and Cortex A57), thus all CPUs need to be enumerated. If the kernel does not have valid information to populate these entries with, an empty string is returned to userspace. Cc: Mark Rutland Reviewed-by: Will Deacon Signed-off-by: Steve Capper [suzuki.poulose@arm.com: ABI documentation updates, hotplug notifiers, kobject changes] Signed-off-by: Suzuki K Poulose Signed-off-by: Catalin Marinas --- Documentation/ABI/testing/sysfs-devices-system-cpu | 10 ++ arch/arm64/include/asm/cpu.h | 2 + arch/arm64/kernel/cpuinfo.c | 120 +++++++++++++++++++++ 3 files changed, 132 insertions(+) (limited to 'arch') diff --git a/Documentation/ABI/testing/sysfs-devices-system-cpu b/Documentation/ABI/testing/sysfs-devices-system-cpu index 16501334b99f..498741737055 100644 --- a/Documentation/ABI/testing/sysfs-devices-system-cpu +++ b/Documentation/ABI/testing/sysfs-devices-system-cpu @@ -340,3 +340,13 @@ Description: POWERNV CPUFreq driver's frequency throttle stats directory and 'policyX/throttle_stats' directory and all the attributes are same as the /sys/devices/system/cpu/cpuX/cpufreq/throttle_stats directory and attributes which give the frequency throttle information of the chip. + +What: /sys/devices/system/cpu/cpuX/regs/ + /sys/devices/system/cpu/cpuX/regs/identification/ + /sys/devices/system/cpu/cpuX/regs/identification/midr_el1 + /sys/devices/system/cpu/cpuX/regs/identification/revidr_el1 +Date: June 2016 +Contact: Linux ARM Kernel Mailing list +Description: AArch64 CPU registers + 'identification' directory exposes the CPU ID registers for + identifying model and revision of the CPU. diff --git a/arch/arm64/include/asm/cpu.h b/arch/arm64/include/asm/cpu.h index 13a6103130cd..889226b4c6e1 100644 --- a/arch/arm64/include/asm/cpu.h +++ b/arch/arm64/include/asm/cpu.h @@ -25,10 +25,12 @@ */ struct cpuinfo_arm64 { struct cpu cpu; + struct kobject kobj; u32 reg_ctr; u32 reg_cntfrq; u32 reg_dczid; u32 reg_midr; + u32 reg_revidr; u64 reg_id_aa64dfr0; u64 reg_id_aa64dfr1; diff --git a/arch/arm64/kernel/cpuinfo.c b/arch/arm64/kernel/cpuinfo.c index c173d329397f..ed1b84fe6925 100644 --- a/arch/arm64/kernel/cpuinfo.c +++ b/arch/arm64/kernel/cpuinfo.c @@ -183,6 +183,123 @@ const struct seq_operations cpuinfo_op = { .show = c_show }; + +static struct kobj_type cpuregs_kobj_type = { + .sysfs_ops = &kobj_sysfs_ops, +}; + +/* + * The ARM ARM uses the phrase "32-bit register" to describe a register + * whose upper 32 bits are RES0 (per C5.1.1, ARM DDI 0487A.i), however + * no statement is made as to whether the upper 32 bits will or will not + * be made use of in future, and between ARM DDI 0487A.c and ARM DDI + * 0487A.d CLIDR_EL1 was expanded from 32-bit to 64-bit. + * + * Thus, while both MIDR_EL1 and REVIDR_EL1 are described as 32-bit + * registers, we expose them both as 64 bit values to cater for possible + * future expansion without an ABI break. + */ +#define kobj_to_cpuinfo(kobj) container_of(kobj, struct cpuinfo_arm64, kobj) +#define CPUREGS_ATTR_RO(_name, _field) \ + static ssize_t _name##_show(struct kobject *kobj, \ + struct kobj_attribute *attr, char *buf) \ + { \ + struct cpuinfo_arm64 *info = kobj_to_cpuinfo(kobj); \ + \ + if (info->reg_midr) \ + return sprintf(buf, "0x%016x\n", info->reg_##_field); \ + else \ + return 0; \ + } \ + static struct kobj_attribute cpuregs_attr_##_name = __ATTR_RO(_name) + +CPUREGS_ATTR_RO(midr_el1, midr); +CPUREGS_ATTR_RO(revidr_el1, revidr); + +static struct attribute *cpuregs_id_attrs[] = { + &cpuregs_attr_midr_el1.attr, + &cpuregs_attr_revidr_el1.attr, + NULL +}; + +static struct attribute_group cpuregs_attr_group = { + .attrs = cpuregs_id_attrs, + .name = "identification" +}; + +static int cpuid_add_regs(int cpu) +{ + int rc; + struct device *dev; + struct cpuinfo_arm64 *info = &per_cpu(cpu_data, cpu); + + dev = get_cpu_device(cpu); + if (!dev) { + rc = -ENODEV; + goto out; + } + rc = kobject_add(&info->kobj, &dev->kobj, "regs"); + if (rc) + goto out; + rc = sysfs_create_group(&info->kobj, &cpuregs_attr_group); + if (rc) + kobject_del(&info->kobj); +out: + return rc; +} + +static int cpuid_remove_regs(int cpu) +{ + struct device *dev; + struct cpuinfo_arm64 *info = &per_cpu(cpu_data, cpu); + + dev = get_cpu_device(cpu); + if (!dev) + return -ENODEV; + if (info->kobj.parent) { + sysfs_remove_group(&info->kobj, &cpuregs_attr_group); + kobject_del(&info->kobj); + } + + return 0; +} + +static int cpuid_callback(struct notifier_block *nb, + unsigned long action, void *hcpu) +{ + int rc = 0; + unsigned long cpu = (unsigned long)hcpu; + + switch (action & ~CPU_TASKS_FROZEN) { + case CPU_ONLINE: + rc = cpuid_add_regs(cpu); + break; + case CPU_DEAD: + rc = cpuid_remove_regs(cpu); + break; + } + + return notifier_from_errno(rc); +} + +static int __init cpuinfo_regs_init(void) +{ + int cpu; + + cpu_notifier_register_begin(); + + for_each_possible_cpu(cpu) { + struct cpuinfo_arm64 *info = &per_cpu(cpu_data, cpu); + + kobject_init(&info->kobj, &cpuregs_kobj_type); + if (cpu_online(cpu)) + cpuid_add_regs(cpu); + } + __hotcpu_notifier(cpuid_callback, 0); + + cpu_notifier_register_done(); + return 0; +} static void cpuinfo_detect_icache_policy(struct cpuinfo_arm64 *info) { unsigned int cpu = smp_processor_id(); @@ -212,6 +329,7 @@ static void __cpuinfo_store_cpu(struct cpuinfo_arm64 *info) info->reg_ctr = read_cpuid_cachetype(); info->reg_dczid = read_cpuid(DCZID_EL0); info->reg_midr = read_cpuid_id(); + info->reg_revidr = read_cpuid(REVIDR_EL1); info->reg_id_aa64dfr0 = read_cpuid(ID_AA64DFR0_EL1); info->reg_id_aa64dfr1 = read_cpuid(ID_AA64DFR1_EL1); @@ -264,3 +382,5 @@ void __init cpuinfo_store_boot_cpu(void) boot_cpu_data = *info; init_cpu_features(&boot_cpu_data); } + +device_initcall(cpuinfo_regs_init); -- cgit v1.2.3-59-g8ed1b From 0a8ea52c3eb157dd65e224fc95b7c9c99fcba9f7 Mon Sep 17 00:00:00 2001 From: "David A. Long" Date: Fri, 8 Jul 2016 12:35:45 -0400 Subject: arm64: Add HAVE_REGS_AND_STACK_ACCESS_API feature Add HAVE_REGS_AND_STACK_ACCESS_API feature for arm64, including supporting functions and defines. Signed-off-by: David A. Long Acked-by: Masami Hiramatsu [catalin.marinas@arm.com: Remove unused functions] Signed-off-by: Catalin Marinas --- arch/arm/include/asm/ptrace.h | 1 - arch/arm64/Kconfig | 1 + arch/arm64/include/asm/ptrace.h | 50 ++++++++++++++++++++ arch/arm64/kernel/ptrace.c | 101 ++++++++++++++++++++++++++++++++++++++++ 4 files changed, 152 insertions(+), 1 deletion(-) (limited to 'arch') diff --git a/arch/arm/include/asm/ptrace.h b/arch/arm/include/asm/ptrace.h index 51622ba7c4a6..d3c0c23703b6 100644 --- a/arch/arm/include/asm/ptrace.h +++ b/arch/arm/include/asm/ptrace.h @@ -121,7 +121,6 @@ extern unsigned long profile_pc(struct pt_regs *regs); #define MAX_REG_OFFSET (offsetof(struct pt_regs, ARM_ORIG_r0)) extern int regs_query_register_offset(const char *name); -extern const char *regs_query_register_name(unsigned int offset); extern bool regs_within_kernel_stack(struct pt_regs *regs, unsigned long addr); extern unsigned long regs_get_kernel_stack_nth(struct pt_regs *regs, unsigned int n); diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig index 5a0a691d4220..fab133c18a09 100644 --- a/arch/arm64/Kconfig +++ b/arch/arm64/Kconfig @@ -85,6 +85,7 @@ config ARM64 select HAVE_PERF_EVENTS select HAVE_PERF_REGS select HAVE_PERF_USER_STACK_DUMP + select HAVE_REGS_AND_STACK_ACCESS_API select HAVE_RCU_TABLE_FREE select HAVE_SYSCALL_TRACEPOINTS select IOMMU_DMA if IOMMU_SUPPORT diff --git a/arch/arm64/include/asm/ptrace.h b/arch/arm64/include/asm/ptrace.h index a307eb6e7fa8..941e12d235be 100644 --- a/arch/arm64/include/asm/ptrace.h +++ b/arch/arm64/include/asm/ptrace.h @@ -74,6 +74,7 @@ #define COMPAT_PT_DATA_ADDR 0x10004 #define COMPAT_PT_TEXT_END_ADDR 0x10008 #ifndef __ASSEMBLY__ +#include /* sizeof(struct user) for AArch32 */ #define COMPAT_USER_SZ 296 @@ -119,6 +120,8 @@ struct pt_regs { u64 syscallno; }; +#define MAX_REG_OFFSET offsetof(struct pt_regs, pstate) + #define arch_has_single_step() (1) #ifdef CONFIG_COMPAT @@ -147,6 +150,53 @@ struct pt_regs { #define user_stack_pointer(regs) \ (!compat_user_mode(regs) ? (regs)->sp : (regs)->compat_sp) +extern int regs_query_register_offset(const char *name); +extern const char *regs_query_register_name(unsigned int offset); +extern unsigned long regs_get_kernel_stack_nth(struct pt_regs *regs, + unsigned int n); + +/** + * regs_get_register() - get register value from its offset + * @regs: pt_regs from which register value is gotten + * @offset: offset of the register. + * + * regs_get_register returns the value of a register whose offset from @regs. + * The @offset is the offset of the register in struct pt_regs. + * If @offset is bigger than MAX_REG_OFFSET, this returns 0. + */ +static inline u64 regs_get_register(struct pt_regs *regs, unsigned int offset) +{ + u64 val = 0; + + WARN_ON(offset & 7); + + offset >>= 3; + switch (offset) { + case 0 ... 30: + val = regs->regs[offset]; + break; + case offsetof(struct pt_regs, sp) >> 3: + val = regs->sp; + break; + case offsetof(struct pt_regs, pc) >> 3: + val = regs->pc; + break; + case offsetof(struct pt_regs, pstate) >> 3: + val = regs->pstate; + break; + default: + val = 0; + } + + return val; +} + +/* Valid only for Kernel mode traps. */ +static inline unsigned long kernel_stack_pointer(struct pt_regs *regs) +{ + return regs->sp; +} + static inline unsigned long regs_return_value(struct pt_regs *regs) { return regs->regs[0]; diff --git a/arch/arm64/kernel/ptrace.c b/arch/arm64/kernel/ptrace.c index 3f6cd5c5234f..030c1d5aa46d 100644 --- a/arch/arm64/kernel/ptrace.c +++ b/arch/arm64/kernel/ptrace.c @@ -48,6 +48,107 @@ #define CREATE_TRACE_POINTS #include +struct pt_regs_offset { + const char *name; + int offset; +}; + +#define REG_OFFSET_NAME(r) {.name = #r, .offset = offsetof(struct pt_regs, r)} +#define REG_OFFSET_END {.name = NULL, .offset = 0} +#define GPR_OFFSET_NAME(r) \ + {.name = "x" #r, .offset = offsetof(struct pt_regs, regs[r])} + +static const struct pt_regs_offset regoffset_table[] = { + GPR_OFFSET_NAME(0), + GPR_OFFSET_NAME(1), + GPR_OFFSET_NAME(2), + GPR_OFFSET_NAME(3), + GPR_OFFSET_NAME(4), + GPR_OFFSET_NAME(5), + GPR_OFFSET_NAME(6), + GPR_OFFSET_NAME(7), + GPR_OFFSET_NAME(8), + GPR_OFFSET_NAME(9), + GPR_OFFSET_NAME(10), + GPR_OFFSET_NAME(11), + GPR_OFFSET_NAME(12), + GPR_OFFSET_NAME(13), + GPR_OFFSET_NAME(14), + GPR_OFFSET_NAME(15), + GPR_OFFSET_NAME(16), + GPR_OFFSET_NAME(17), + GPR_OFFSET_NAME(18), + GPR_OFFSET_NAME(19), + GPR_OFFSET_NAME(20), + GPR_OFFSET_NAME(21), + GPR_OFFSET_NAME(22), + GPR_OFFSET_NAME(23), + GPR_OFFSET_NAME(24), + GPR_OFFSET_NAME(25), + GPR_OFFSET_NAME(26), + GPR_OFFSET_NAME(27), + GPR_OFFSET_NAME(28), + GPR_OFFSET_NAME(29), + GPR_OFFSET_NAME(30), + {.name = "lr", .offset = offsetof(struct pt_regs, regs[30])}, + REG_OFFSET_NAME(sp), + REG_OFFSET_NAME(pc), + REG_OFFSET_NAME(pstate), + REG_OFFSET_END, +}; + +/** + * regs_query_register_offset() - query register offset from its name + * @name: the name of a register + * + * regs_query_register_offset() returns the offset of a register in struct + * pt_regs from its name. If the name is invalid, this returns -EINVAL; + */ +int regs_query_register_offset(const char *name) +{ + const struct pt_regs_offset *roff; + + for (roff = regoffset_table; roff->name != NULL; roff++) + if (!strcmp(roff->name, name)) + return roff->offset; + return -EINVAL; +} + +/** + * regs_within_kernel_stack() - check the address in the stack + * @regs: pt_regs which contains kernel stack pointer. + * @addr: address which is checked. + * + * regs_within_kernel_stack() checks @addr is within the kernel stack page(s). + * If @addr is within the kernel stack, it returns true. If not, returns false. + */ +static bool regs_within_kernel_stack(struct pt_regs *regs, unsigned long addr) +{ + return ((addr & ~(THREAD_SIZE - 1)) == + (kernel_stack_pointer(regs) & ~(THREAD_SIZE - 1))) || + on_irq_stack(addr, raw_smp_processor_id()); +} + +/** + * regs_get_kernel_stack_nth() - get Nth entry of the stack + * @regs: pt_regs which contains kernel stack pointer. + * @n: stack entry number. + * + * regs_get_kernel_stack_nth() returns @n th entry of the kernel stack which + * is specified by @regs. If the @n th entry is NOT in the kernel stack, + * this returns 0. + */ +unsigned long regs_get_kernel_stack_nth(struct pt_regs *regs, unsigned int n) +{ + unsigned long *addr = (unsigned long *)kernel_stack_pointer(regs); + + addr += n; + if (regs_within_kernel_stack(regs, (unsigned long)addr)) + return *addr; + else + return 0; +} + /* * TODO: does not yet catch signals sent when the child dies. * in exit.c or in signal.c. -- cgit v1.2.3-59-g8ed1b From d59bee887231191c80f2ee674d7ec19179eb40ec Mon Sep 17 00:00:00 2001 From: "David A. Long" Date: Fri, 8 Jul 2016 12:35:46 -0400 Subject: arm64: Add more test functions to insn.c Certain instructions are hard to execute correctly out-of-line (as in kprobes). Test functions are added to insn.[hc] to identify these. The instructions include any that use PC-relative addressing, change the PC, or change interrupt masking. For efficiency and simplicity test functions are also added for small collections of related instructions. Signed-off-by: David A. Long Acked-by: Masami Hiramatsu Signed-off-by: Catalin Marinas --- arch/arm64/include/asm/insn.h | 36 ++++++++++++++++++++++++++++++++++++ arch/arm64/kernel/insn.c | 34 ++++++++++++++++++++++++++++++++++ 2 files changed, 70 insertions(+) (limited to 'arch') diff --git a/arch/arm64/include/asm/insn.h b/arch/arm64/include/asm/insn.h index 30e50eb54a67..497f7a2c5f68 100644 --- a/arch/arm64/include/asm/insn.h +++ b/arch/arm64/include/asm/insn.h @@ -120,6 +120,29 @@ enum aarch64_insn_register { AARCH64_INSN_REG_SP = 31 /* Stack pointer: as load/store base reg */ }; +enum aarch64_insn_special_register { + AARCH64_INSN_SPCLREG_SPSR_EL1 = 0xC200, + AARCH64_INSN_SPCLREG_ELR_EL1 = 0xC201, + AARCH64_INSN_SPCLREG_SP_EL0 = 0xC208, + AARCH64_INSN_SPCLREG_SPSEL = 0xC210, + AARCH64_INSN_SPCLREG_CURRENTEL = 0xC212, + AARCH64_INSN_SPCLREG_DAIF = 0xDA11, + AARCH64_INSN_SPCLREG_NZCV = 0xDA10, + AARCH64_INSN_SPCLREG_FPCR = 0xDA20, + AARCH64_INSN_SPCLREG_DSPSR_EL0 = 0xDA28, + AARCH64_INSN_SPCLREG_DLR_EL0 = 0xDA29, + AARCH64_INSN_SPCLREG_SPSR_EL2 = 0xE200, + AARCH64_INSN_SPCLREG_ELR_EL2 = 0xE201, + AARCH64_INSN_SPCLREG_SP_EL1 = 0xE208, + AARCH64_INSN_SPCLREG_SPSR_INQ = 0xE218, + AARCH64_INSN_SPCLREG_SPSR_ABT = 0xE219, + AARCH64_INSN_SPCLREG_SPSR_UND = 0xE21A, + AARCH64_INSN_SPCLREG_SPSR_FIQ = 0xE21B, + AARCH64_INSN_SPCLREG_SPSR_EL3 = 0xF200, + AARCH64_INSN_SPCLREG_ELR_EL3 = 0xF201, + AARCH64_INSN_SPCLREG_SP_EL2 = 0xF210 +}; + enum aarch64_insn_variant { AARCH64_INSN_VARIANT_32BIT, AARCH64_INSN_VARIANT_64BIT @@ -223,8 +246,13 @@ static __always_inline bool aarch64_insn_is_##abbr(u32 code) \ static __always_inline u32 aarch64_insn_get_##abbr##_value(void) \ { return (val); } +__AARCH64_INSN_FUNCS(adr_adrp, 0x1F000000, 0x10000000) +__AARCH64_INSN_FUNCS(prfm_lit, 0xFF000000, 0xD8000000) __AARCH64_INSN_FUNCS(str_reg, 0x3FE0EC00, 0x38206800) __AARCH64_INSN_FUNCS(ldr_reg, 0x3FE0EC00, 0x38606800) +__AARCH64_INSN_FUNCS(ldr_lit, 0xBF000000, 0x18000000) +__AARCH64_INSN_FUNCS(ldrsw_lit, 0xFF000000, 0x98000000) +__AARCH64_INSN_FUNCS(exclusive, 0x3F800000, 0x08000000) __AARCH64_INSN_FUNCS(stp_post, 0x7FC00000, 0x28800000) __AARCH64_INSN_FUNCS(ldp_post, 0x7FC00000, 0x28C00000) __AARCH64_INSN_FUNCS(stp_pre, 0x7FC00000, 0x29800000) @@ -273,10 +301,15 @@ __AARCH64_INSN_FUNCS(svc, 0xFFE0001F, 0xD4000001) __AARCH64_INSN_FUNCS(hvc, 0xFFE0001F, 0xD4000002) __AARCH64_INSN_FUNCS(smc, 0xFFE0001F, 0xD4000003) __AARCH64_INSN_FUNCS(brk, 0xFFE0001F, 0xD4200000) +__AARCH64_INSN_FUNCS(exception, 0xFF000000, 0xD4000000) __AARCH64_INSN_FUNCS(hint, 0xFFFFF01F, 0xD503201F) __AARCH64_INSN_FUNCS(br, 0xFFFFFC1F, 0xD61F0000) __AARCH64_INSN_FUNCS(blr, 0xFFFFFC1F, 0xD63F0000) __AARCH64_INSN_FUNCS(ret, 0xFFFFFC1F, 0xD65F0000) +__AARCH64_INSN_FUNCS(eret, 0xFFFFFFFF, 0xD69F03E0) +__AARCH64_INSN_FUNCS(mrs, 0xFFF00000, 0xD5300000) +__AARCH64_INSN_FUNCS(msr_imm, 0xFFF8F01F, 0xD500401F) +__AARCH64_INSN_FUNCS(msr_reg, 0xFFF00000, 0xD5100000) #undef __AARCH64_INSN_FUNCS @@ -286,6 +319,8 @@ bool aarch64_insn_is_branch_imm(u32 insn); int aarch64_insn_read(void *addr, u32 *insnp); int aarch64_insn_write(void *addr, u32 insn); enum aarch64_insn_encoding_class aarch64_get_insn_class(u32 insn); +bool aarch64_insn_uses_literal(u32 insn); +bool aarch64_insn_is_branch(u32 insn); u64 aarch64_insn_decode_immediate(enum aarch64_insn_imm_type type, u32 insn); u32 aarch64_insn_encode_immediate(enum aarch64_insn_imm_type type, u32 insn, u64 imm); @@ -367,6 +402,7 @@ bool aarch32_insn_is_wide(u32 insn); #define A32_RT_OFFSET 12 #define A32_RT2_OFFSET 0 +u32 aarch64_insn_extract_system_reg(u32 insn); u32 aarch32_insn_extract_reg_num(u32 insn, int offset); u32 aarch32_insn_mcr_extract_opc2(u32 insn); u32 aarch32_insn_mcr_extract_crm(u32 insn); diff --git a/arch/arm64/kernel/insn.c b/arch/arm64/kernel/insn.c index 368c08290dd8..28c6110ffcbb 100644 --- a/arch/arm64/kernel/insn.c +++ b/arch/arm64/kernel/insn.c @@ -162,6 +162,32 @@ static bool __kprobes __aarch64_insn_hotpatch_safe(u32 insn) aarch64_insn_is_nop(insn); } +bool __kprobes aarch64_insn_uses_literal(u32 insn) +{ + /* ldr/ldrsw (literal), prfm */ + + return aarch64_insn_is_ldr_lit(insn) || + aarch64_insn_is_ldrsw_lit(insn) || + aarch64_insn_is_adr_adrp(insn) || + aarch64_insn_is_prfm_lit(insn); +} + +bool __kprobes aarch64_insn_is_branch(u32 insn) +{ + /* b, bl, cb*, tb*, b.cond, br, blr */ + + return aarch64_insn_is_b(insn) || + aarch64_insn_is_bl(insn) || + aarch64_insn_is_cbz(insn) || + aarch64_insn_is_cbnz(insn) || + aarch64_insn_is_tbz(insn) || + aarch64_insn_is_tbnz(insn) || + aarch64_insn_is_ret(insn) || + aarch64_insn_is_br(insn) || + aarch64_insn_is_blr(insn) || + aarch64_insn_is_bcond(insn); +} + /* * ARM Architecture Reference Manual for ARMv8 Profile-A, Issue A.a * Section B2.6.5 "Concurrent modification and execution of instructions": @@ -1175,6 +1201,14 @@ u32 aarch64_set_branch_offset(u32 insn, s32 offset) BUG(); } +/* + * Extract the Op/CR data from a msr/mrs instruction. + */ +u32 aarch64_insn_extract_system_reg(u32 insn) +{ + return (insn & 0x1FFFE0) >> 5; +} + bool aarch32_insn_is_wide(u32 insn) { return insn >= 0xe800; -- cgit v1.2.3-59-g8ed1b From 2af3ec08b414ceb9c32fad2bb0f87252f3f18de8 Mon Sep 17 00:00:00 2001 From: "David A. Long" Date: Fri, 8 Jul 2016 12:35:47 -0400 Subject: arm64: add conditional instruction simulation support Cease using the arm32 arm_check_condition() function and replace it with a local version for use in deprecated instruction support on arm64. Also make the function table used by this available for future use by kprobes and/or uprobes. This function is derived from code written by Sandeepa Prabhu. Signed-off-by: Sandeepa Prabhu Signed-off-by: David A. Long Acked-by: Masami Hiramatsu Signed-off-by: Catalin Marinas --- arch/arm64/include/asm/insn.h | 3 ++ arch/arm64/kernel/Makefile | 3 +- arch/arm64/kernel/armv8_deprecated.c | 19 ++++++- arch/arm64/kernel/insn.c | 98 ++++++++++++++++++++++++++++++++++++ 4 files changed, 119 insertions(+), 4 deletions(-) (limited to 'arch') diff --git a/arch/arm64/include/asm/insn.h b/arch/arm64/include/asm/insn.h index 497f7a2c5f68..a44abbd61382 100644 --- a/arch/arm64/include/asm/insn.h +++ b/arch/arm64/include/asm/insn.h @@ -406,6 +406,9 @@ u32 aarch64_insn_extract_system_reg(u32 insn); u32 aarch32_insn_extract_reg_num(u32 insn, int offset); u32 aarch32_insn_mcr_extract_opc2(u32 insn); u32 aarch32_insn_mcr_extract_crm(u32 insn); + +typedef bool (pstate_check_t)(unsigned long); +extern pstate_check_t * const aarch32_opcode_cond_checks[16]; #endif /* __ASSEMBLY__ */ #endif /* __ASM_INSN_H */ diff --git a/arch/arm64/kernel/Makefile b/arch/arm64/kernel/Makefile index 2173149d8954..4653aca9bb61 100644 --- a/arch/arm64/kernel/Makefile +++ b/arch/arm64/kernel/Makefile @@ -26,8 +26,7 @@ $(obj)/%.stub.o: $(obj)/%.o FORCE $(call if_changed,objcopy) arm64-obj-$(CONFIG_COMPAT) += sys32.o kuser32.o signal32.o \ - sys_compat.o entry32.o \ - ../../arm/kernel/opcodes.o + sys_compat.o entry32.o arm64-obj-$(CONFIG_FUNCTION_TRACER) += ftrace.o entry-ftrace.o arm64-obj-$(CONFIG_MODULES) += arm64ksyms.o module.o arm64-obj-$(CONFIG_ARM64_MODULE_PLTS) += module-plts.o diff --git a/arch/arm64/kernel/armv8_deprecated.c b/arch/arm64/kernel/armv8_deprecated.c index c37202c0c838..293489476529 100644 --- a/arch/arm64/kernel/armv8_deprecated.c +++ b/arch/arm64/kernel/armv8_deprecated.c @@ -366,6 +366,21 @@ static int emulate_swpX(unsigned int address, unsigned int *data, return res; } +#define ARM_OPCODE_CONDITION_UNCOND 0xf + +static unsigned int __kprobes aarch32_check_condition(u32 opcode, u32 psr) +{ + u32 cc_bits = opcode >> 28; + + if (cc_bits != ARM_OPCODE_CONDITION_UNCOND) { + if ((*aarch32_opcode_cond_checks[cc_bits])(psr)) + return ARM_OPCODE_CONDTEST_PASS; + else + return ARM_OPCODE_CONDTEST_FAIL; + } + return ARM_OPCODE_CONDTEST_UNCOND; +} + /* * swp_handler logs the id of calling process, dissects the instruction, sanity * checks the memory location, calls emulate_swpX for the actual operation and @@ -380,7 +395,7 @@ static int swp_handler(struct pt_regs *regs, u32 instr) type = instr & TYPE_SWPB; - switch (arm_check_condition(instr, regs->pstate)) { + switch (aarch32_check_condition(instr, regs->pstate)) { case ARM_OPCODE_CONDTEST_PASS: break; case ARM_OPCODE_CONDTEST_FAIL: @@ -461,7 +476,7 @@ static int cp15barrier_handler(struct pt_regs *regs, u32 instr) { perf_sw_event(PERF_COUNT_SW_EMULATION_FAULTS, 1, regs, regs->pc); - switch (arm_check_condition(instr, regs->pstate)) { + switch (aarch32_check_condition(instr, regs->pstate)) { case ARM_OPCODE_CONDTEST_PASS: break; case ARM_OPCODE_CONDTEST_FAIL: diff --git a/arch/arm64/kernel/insn.c b/arch/arm64/kernel/insn.c index 28c6110ffcbb..5cb2f3db3da5 100644 --- a/arch/arm64/kernel/insn.c +++ b/arch/arm64/kernel/insn.c @@ -1234,3 +1234,101 @@ u32 aarch32_insn_mcr_extract_crm(u32 insn) { return insn & CRM_MASK; } + +static bool __kprobes __check_eq(unsigned long pstate) +{ + return (pstate & PSR_Z_BIT) != 0; +} + +static bool __kprobes __check_ne(unsigned long pstate) +{ + return (pstate & PSR_Z_BIT) == 0; +} + +static bool __kprobes __check_cs(unsigned long pstate) +{ + return (pstate & PSR_C_BIT) != 0; +} + +static bool __kprobes __check_cc(unsigned long pstate) +{ + return (pstate & PSR_C_BIT) == 0; +} + +static bool __kprobes __check_mi(unsigned long pstate) +{ + return (pstate & PSR_N_BIT) != 0; +} + +static bool __kprobes __check_pl(unsigned long pstate) +{ + return (pstate & PSR_N_BIT) == 0; +} + +static bool __kprobes __check_vs(unsigned long pstate) +{ + return (pstate & PSR_V_BIT) != 0; +} + +static bool __kprobes __check_vc(unsigned long pstate) +{ + return (pstate & PSR_V_BIT) == 0; +} + +static bool __kprobes __check_hi(unsigned long pstate) +{ + pstate &= ~(pstate >> 1); /* PSR_C_BIT &= ~PSR_Z_BIT */ + return (pstate & PSR_C_BIT) != 0; +} + +static bool __kprobes __check_ls(unsigned long pstate) +{ + pstate &= ~(pstate >> 1); /* PSR_C_BIT &= ~PSR_Z_BIT */ + return (pstate & PSR_C_BIT) == 0; +} + +static bool __kprobes __check_ge(unsigned long pstate) +{ + pstate ^= (pstate << 3); /* PSR_N_BIT ^= PSR_V_BIT */ + return (pstate & PSR_N_BIT) == 0; +} + +static bool __kprobes __check_lt(unsigned long pstate) +{ + pstate ^= (pstate << 3); /* PSR_N_BIT ^= PSR_V_BIT */ + return (pstate & PSR_N_BIT) != 0; +} + +static bool __kprobes __check_gt(unsigned long pstate) +{ + /*PSR_N_BIT ^= PSR_V_BIT */ + unsigned long temp = pstate ^ (pstate << 3); + + temp |= (pstate << 1); /*PSR_N_BIT |= PSR_Z_BIT */ + return (temp & PSR_N_BIT) == 0; +} + +static bool __kprobes __check_le(unsigned long pstate) +{ + /*PSR_N_BIT ^= PSR_V_BIT */ + unsigned long temp = pstate ^ (pstate << 3); + + temp |= (pstate << 1); /*PSR_N_BIT |= PSR_Z_BIT */ + return (temp & PSR_N_BIT) != 0; +} + +static bool __kprobes __check_al(unsigned long pstate) +{ + return true; +} + +/* + * Note that the ARMv8 ARM calls condition code 0b1111 "nv", but states that + * it behaves identically to 0b1110 ("al"). + */ +pstate_check_t * const aarch32_opcode_cond_checks[16] = { + __check_eq, __check_ne, __check_cs, __check_cc, + __check_mi, __check_pl, __check_vs, __check_vc, + __check_hi, __check_ls, __check_ge, __check_lt, + __check_gt, __check_le, __check_al, __check_al +}; -- cgit v1.2.3-59-g8ed1b From 2dd0e8d2d2a157dbc83295a78336c2217110f2f8 Mon Sep 17 00:00:00 2001 From: Sandeepa Prabhu Date: Fri, 8 Jul 2016 12:35:48 -0400 Subject: arm64: Kprobes with single stepping support Add support for basic kernel probes(kprobes) and jump probes (jprobes) for ARM64. Kprobes utilizes software breakpoint and single step debug exceptions supported on ARM v8. A software breakpoint is placed at the probe address to trap the kernel execution into the kprobe handler. ARM v8 supports enabling single stepping before the break exception return (ERET), with next PC in exception return address (ELR_EL1). The kprobe handler prepares an executable memory slot for out-of-line execution with a copy of the original instruction being probed, and enables single stepping. The PC is set to the out-of-line slot address before the ERET. With this scheme, the instruction is executed with the exact same register context except for the PC (and DAIF) registers. Debug mask (PSTATE.D) is enabled only when single stepping a recursive kprobe, e.g.: during kprobes reenter so that probed instruction can be single stepped within the kprobe handler -exception- context. The recursion depth of kprobe is always 2, i.e. upon probe re-entry, any further re-entry is prevented by not calling handlers and the case counted as a missed kprobe). Single stepping from the x-o-l slot has a drawback for PC-relative accesses like branching and symbolic literals access as the offset from the new PC (slot address) may not be ensured to fit in the immediate value of the opcode. Such instructions need simulation, so reject probing them. Instructions generating exceptions or cpu mode change are rejected for probing. Exclusive load/store instructions are rejected too. Additionally, the code is checked to see if it is inside an exclusive load/store sequence (code from Pratyush). System instructions are mostly enabled for stepping, except MSR/MRS accesses to "DAIF" flags in PSTATE, which are not safe for probing. This also changes arch/arm64/include/asm/ptrace.h to use include/asm-generic/ptrace.h. Thanks to Steve Capper and Pratyush Anand for several suggested Changes. Signed-off-by: Sandeepa Prabhu Signed-off-by: David A. Long Signed-off-by: Pratyush Anand Acked-by: Masami Hiramatsu Signed-off-by: Catalin Marinas --- arch/arm64/Kconfig | 1 + arch/arm64/include/asm/debug-monitors.h | 5 + arch/arm64/include/asm/insn.h | 2 + arch/arm64/include/asm/kprobes.h | 60 ++++ arch/arm64/include/asm/probes.h | 34 +++ arch/arm64/include/asm/ptrace.h | 14 +- arch/arm64/kernel/Makefile | 2 +- arch/arm64/kernel/debug-monitors.c | 16 +- arch/arm64/kernel/probes/Makefile | 1 + arch/arm64/kernel/probes/decode-insn.c | 143 +++++++++ arch/arm64/kernel/probes/decode-insn.h | 34 +++ arch/arm64/kernel/probes/kprobes.c | 525 ++++++++++++++++++++++++++++++++ arch/arm64/kernel/vmlinux.lds.S | 1 + arch/arm64/mm/fault.c | 26 ++ 14 files changed, 859 insertions(+), 5 deletions(-) create mode 100644 arch/arm64/include/asm/kprobes.h create mode 100644 arch/arm64/include/asm/probes.h create mode 100644 arch/arm64/kernel/probes/Makefile create mode 100644 arch/arm64/kernel/probes/decode-insn.c create mode 100644 arch/arm64/kernel/probes/decode-insn.h create mode 100644 arch/arm64/kernel/probes/kprobes.c (limited to 'arch') diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig index fab133c18a09..1f7d644f7f36 100644 --- a/arch/arm64/Kconfig +++ b/arch/arm64/Kconfig @@ -88,6 +88,7 @@ config ARM64 select HAVE_REGS_AND_STACK_ACCESS_API select HAVE_RCU_TABLE_FREE select HAVE_SYSCALL_TRACEPOINTS + select HAVE_KPROBES select IOMMU_DMA if IOMMU_SUPPORT select IRQ_DOMAIN select IRQ_FORCED_THREADING diff --git a/arch/arm64/include/asm/debug-monitors.h b/arch/arm64/include/asm/debug-monitors.h index 2fcb9b7c876c..4b6b3f72a215 100644 --- a/arch/arm64/include/asm/debug-monitors.h +++ b/arch/arm64/include/asm/debug-monitors.h @@ -66,6 +66,11 @@ #define CACHE_FLUSH_IS_SAFE 1 +/* kprobes BRK opcodes with ESR encoding */ +#define BRK64_ESR_MASK 0xFFFF +#define BRK64_ESR_KPROBES 0x0004 +#define BRK64_OPCODE_KPROBES (AARCH64_BREAK_MON | (BRK64_ESR_KPROBES << 5)) + /* AArch32 */ #define DBG_ESR_EVT_BKPT 0x4 #define DBG_ESR_EVT_VECC 0x5 diff --git a/arch/arm64/include/asm/insn.h b/arch/arm64/include/asm/insn.h index a44abbd61382..1dbaa901d7e5 100644 --- a/arch/arm64/include/asm/insn.h +++ b/arch/arm64/include/asm/insn.h @@ -253,6 +253,8 @@ __AARCH64_INSN_FUNCS(ldr_reg, 0x3FE0EC00, 0x38606800) __AARCH64_INSN_FUNCS(ldr_lit, 0xBF000000, 0x18000000) __AARCH64_INSN_FUNCS(ldrsw_lit, 0xFF000000, 0x98000000) __AARCH64_INSN_FUNCS(exclusive, 0x3F800000, 0x08000000) +__AARCH64_INSN_FUNCS(load_ex, 0x3F400000, 0x08400000) +__AARCH64_INSN_FUNCS(store_ex, 0x3F400000, 0x08000000) __AARCH64_INSN_FUNCS(stp_post, 0x7FC00000, 0x28800000) __AARCH64_INSN_FUNCS(ldp_post, 0x7FC00000, 0x28C00000) __AARCH64_INSN_FUNCS(stp_pre, 0x7FC00000, 0x29800000) diff --git a/arch/arm64/include/asm/kprobes.h b/arch/arm64/include/asm/kprobes.h new file mode 100644 index 000000000000..79c9511612b5 --- /dev/null +++ b/arch/arm64/include/asm/kprobes.h @@ -0,0 +1,60 @@ +/* + * arch/arm64/include/asm/kprobes.h + * + * Copyright (C) 2013 Linaro Limited + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + */ + +#ifndef _ARM_KPROBES_H +#define _ARM_KPROBES_H + +#include +#include +#include + +#define __ARCH_WANT_KPROBES_INSN_SLOT +#define MAX_INSN_SIZE 1 +#define MAX_STACK_SIZE 128 + +#define flush_insn_slot(p) do { } while (0) +#define kretprobe_blacklist_size 0 + +#include + +struct prev_kprobe { + struct kprobe *kp; + unsigned int status; +}; + +/* Single step context for kprobe */ +struct kprobe_step_ctx { + unsigned long ss_pending; + unsigned long match_addr; +}; + +/* per-cpu kprobe control block */ +struct kprobe_ctlblk { + unsigned int kprobe_status; + unsigned long saved_irqflag; + struct prev_kprobe prev_kprobe; + struct kprobe_step_ctx ss_ctx; + struct pt_regs jprobe_saved_regs; + char jprobes_stack[MAX_STACK_SIZE]; +}; + +void arch_remove_kprobe(struct kprobe *); +int kprobe_fault_handler(struct pt_regs *regs, unsigned int fsr); +int kprobe_exceptions_notify(struct notifier_block *self, + unsigned long val, void *data); +int kprobe_breakpoint_handler(struct pt_regs *regs, unsigned int esr); +int kprobe_single_step_handler(struct pt_regs *regs, unsigned int esr); + +#endif /* _ARM_KPROBES_H */ diff --git a/arch/arm64/include/asm/probes.h b/arch/arm64/include/asm/probes.h new file mode 100644 index 000000000000..1e8a21a96002 --- /dev/null +++ b/arch/arm64/include/asm/probes.h @@ -0,0 +1,34 @@ +/* + * arch/arm64/include/asm/probes.h + * + * Copyright (C) 2013 Linaro Limited + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + */ +#ifndef _ARM_PROBES_H +#define _ARM_PROBES_H + +struct kprobe; +struct arch_specific_insn; + +typedef u32 kprobe_opcode_t; +typedef unsigned long (kprobes_pstate_check_t)(unsigned long); +typedef void (kprobes_handler_t) (u32 opcode, long addr, struct pt_regs *); + +/* architecture specific copy of original instruction */ +struct arch_specific_insn { + kprobe_opcode_t *insn; + kprobes_pstate_check_t *pstate_cc; + kprobes_handler_t *handler; + /* restore address after step xol */ + unsigned long restore; +}; + +#endif diff --git a/arch/arm64/include/asm/ptrace.h b/arch/arm64/include/asm/ptrace.h index 941e12d235be..baf938135986 100644 --- a/arch/arm64/include/asm/ptrace.h +++ b/arch/arm64/include/asm/ptrace.h @@ -147,9 +147,12 @@ struct pt_regs { #define fast_interrupts_enabled(regs) \ (!((regs)->pstate & PSR_F_BIT)) -#define user_stack_pointer(regs) \ +#define GET_USP(regs) \ (!compat_user_mode(regs) ? (regs)->sp : (regs)->compat_sp) +#define SET_USP(ptregs, value) \ + (!compat_user_mode(regs) ? ((regs)->sp = value) : ((regs)->compat_sp = value)) + extern int regs_query_register_offset(const char *name); extern const char *regs_query_register_name(unsigned int offset); extern unsigned long regs_get_kernel_stack_nth(struct pt_regs *regs, @@ -206,8 +209,15 @@ static inline unsigned long regs_return_value(struct pt_regs *regs) struct task_struct; int valid_user_regs(struct user_pt_regs *regs, struct task_struct *task); -#define instruction_pointer(regs) ((unsigned long)(regs)->pc) +#define GET_IP(regs) ((unsigned long)(regs)->pc) +#define SET_IP(regs, value) ((regs)->pc = ((u64) (value))) + +#define GET_FP(ptregs) ((unsigned long)(ptregs)->regs[29]) +#define SET_FP(ptregs, value) ((ptregs)->regs[29] = ((u64) (value))) + +#include +#undef profile_pc extern unsigned long profile_pc(struct pt_regs *regs); #endif /* __ASSEMBLY__ */ diff --git a/arch/arm64/kernel/Makefile b/arch/arm64/kernel/Makefile index 4653aca9bb61..367673e38700 100644 --- a/arch/arm64/kernel/Makefile +++ b/arch/arm64/kernel/Makefile @@ -46,7 +46,7 @@ arm64-obj-$(CONFIG_PARAVIRT) += paravirt.o arm64-obj-$(CONFIG_RANDOMIZE_BASE) += kaslr.o arm64-obj-$(CONFIG_HIBERNATION) += hibernate.o hibernate-asm.o -obj-y += $(arm64-obj-y) vdso/ +obj-y += $(arm64-obj-y) vdso/ probes/ obj-m += $(arm64-obj-m) head-y := head.o extra-y += $(head-y) vmlinux.lds diff --git a/arch/arm64/kernel/debug-monitors.c b/arch/arm64/kernel/debug-monitors.c index 4fbf3c54275c..395de61108d8 100644 --- a/arch/arm64/kernel/debug-monitors.c +++ b/arch/arm64/kernel/debug-monitors.c @@ -23,6 +23,7 @@ #include #include #include +#include #include #include @@ -266,6 +267,10 @@ static int single_step_handler(unsigned long addr, unsigned int esr, */ user_rewind_single_step(current); } else { +#ifdef CONFIG_KPROBES + if (kprobe_single_step_handler(regs, esr) == DBG_HOOK_HANDLED) + return 0; +#endif if (call_step_hook(regs, esr) == DBG_HOOK_HANDLED) return 0; @@ -322,8 +327,15 @@ static int brk_handler(unsigned long addr, unsigned int esr, { if (user_mode(regs)) { send_user_sigtrap(TRAP_BRKPT); - } else if (call_break_hook(regs, esr) != DBG_HOOK_HANDLED) { - pr_warning("Unexpected kernel BRK exception at EL1\n"); + } +#ifdef CONFIG_KPROBES + else if ((esr & BRK64_ESR_MASK) == BRK64_ESR_KPROBES) { + if (kprobe_breakpoint_handler(regs, esr) != DBG_HOOK_HANDLED) + return -EFAULT; + } +#endif + else if (call_break_hook(regs, esr) != DBG_HOOK_HANDLED) { + pr_warn("Unexpected kernel BRK exception at EL1\n"); return -EFAULT; } diff --git a/arch/arm64/kernel/probes/Makefile b/arch/arm64/kernel/probes/Makefile new file mode 100644 index 000000000000..bc159bf56992 --- /dev/null +++ b/arch/arm64/kernel/probes/Makefile @@ -0,0 +1 @@ +obj-$(CONFIG_KPROBES) += kprobes.o decode-insn.o diff --git a/arch/arm64/kernel/probes/decode-insn.c b/arch/arm64/kernel/probes/decode-insn.c new file mode 100644 index 000000000000..95c0c5281e7b --- /dev/null +++ b/arch/arm64/kernel/probes/decode-insn.c @@ -0,0 +1,143 @@ +/* + * arch/arm64/kernel/probes/decode-insn.c + * + * Copyright (C) 2013 Linaro Limited. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + */ + +#include +#include +#include +#include +#include +#include + +#include "decode-insn.h" + +static bool __kprobes aarch64_insn_is_steppable(u32 insn) +{ + /* + * Branch instructions will write a new value into the PC which is + * likely to be relative to the XOL address and therefore invalid. + * Deliberate generation of an exception during stepping is also not + * currently safe. Lastly, MSR instructions can do any number of nasty + * things we can't handle during single-stepping. + */ + if (aarch64_get_insn_class(insn) == AARCH64_INSN_CLS_BR_SYS) { + if (aarch64_insn_is_branch(insn) || + aarch64_insn_is_msr_imm(insn) || + aarch64_insn_is_msr_reg(insn) || + aarch64_insn_is_exception(insn) || + aarch64_insn_is_eret(insn)) + return false; + + /* + * The MRS instruction may not return a correct value when + * executing in the single-stepping environment. We do make one + * exception, for reading the DAIF bits. + */ + if (aarch64_insn_is_mrs(insn)) + return aarch64_insn_extract_system_reg(insn) + != AARCH64_INSN_SPCLREG_DAIF; + + /* + * The HINT instruction is is problematic when single-stepping, + * except for the NOP case. + */ + if (aarch64_insn_is_hint(insn)) + return aarch64_insn_is_nop(insn); + + return true; + } + + /* + * Instructions which load PC relative literals are not going to work + * when executed from an XOL slot. Instructions doing an exclusive + * load/store are not going to complete successfully when single-step + * exception handling happens in the middle of the sequence. + */ + if (aarch64_insn_uses_literal(insn) || + aarch64_insn_is_exclusive(insn)) + return false; + + return true; +} + +/* Return: + * INSN_REJECTED If instruction is one not allowed to kprobe, + * INSN_GOOD If instruction is supported and uses instruction slot, + */ +static enum kprobe_insn __kprobes +arm_probe_decode_insn(kprobe_opcode_t insn, struct arch_specific_insn *asi) +{ + /* + * Instructions reading or modifying the PC won't work from the XOL + * slot. + */ + if (aarch64_insn_is_steppable(insn)) + return INSN_GOOD; + else + return INSN_REJECTED; +} + +static bool __kprobes +is_probed_address_atomic(kprobe_opcode_t *scan_start, kprobe_opcode_t *scan_end) +{ + while (scan_start > scan_end) { + /* + * atomic region starts from exclusive load and ends with + * exclusive store. + */ + if (aarch64_insn_is_store_ex(le32_to_cpu(*scan_start))) + return false; + else if (aarch64_insn_is_load_ex(le32_to_cpu(*scan_start))) + return true; + scan_start--; + } + + return false; +} + +enum kprobe_insn __kprobes +arm_kprobe_decode_insn(kprobe_opcode_t *addr, struct arch_specific_insn *asi) +{ + enum kprobe_insn decoded; + kprobe_opcode_t insn = le32_to_cpu(*addr); + kprobe_opcode_t *scan_start = addr - 1; + kprobe_opcode_t *scan_end = addr - MAX_ATOMIC_CONTEXT_SIZE; +#if defined(CONFIG_MODULES) && defined(MODULES_VADDR) + struct module *mod; +#endif + + if (addr >= (kprobe_opcode_t *)_text && + scan_end < (kprobe_opcode_t *)_text) + scan_end = (kprobe_opcode_t *)_text; +#if defined(CONFIG_MODULES) && defined(MODULES_VADDR) + else { + preempt_disable(); + mod = __module_address((unsigned long)addr); + if (mod && within_module_init((unsigned long)addr, mod) && + !within_module_init((unsigned long)scan_end, mod)) + scan_end = (kprobe_opcode_t *)mod->init_layout.base; + else if (mod && within_module_core((unsigned long)addr, mod) && + !within_module_core((unsigned long)scan_end, mod)) + scan_end = (kprobe_opcode_t *)mod->core_layout.base; + preempt_enable(); + } +#endif + decoded = arm_probe_decode_insn(insn, asi); + + if (decoded == INSN_REJECTED || + is_probed_address_atomic(scan_start, scan_end)) + return INSN_REJECTED; + + return decoded; +} diff --git a/arch/arm64/kernel/probes/decode-insn.h b/arch/arm64/kernel/probes/decode-insn.h new file mode 100644 index 000000000000..ad5ba9cc3d45 --- /dev/null +++ b/arch/arm64/kernel/probes/decode-insn.h @@ -0,0 +1,34 @@ +/* + * arch/arm64/kernel/probes/decode-insn.h + * + * Copyright (C) 2013 Linaro Limited. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + */ + +#ifndef _ARM_KERNEL_KPROBES_ARM64_H +#define _ARM_KERNEL_KPROBES_ARM64_H + +/* + * ARM strongly recommends a limit of 128 bytes between LoadExcl and + * StoreExcl instructions in a single thread of execution. So keep the + * max atomic context size as 32. + */ +#define MAX_ATOMIC_CONTEXT_SIZE (128 / sizeof(kprobe_opcode_t)) + +enum kprobe_insn { + INSN_REJECTED, + INSN_GOOD, +}; + +enum kprobe_insn __kprobes +arm_kprobe_decode_insn(kprobe_opcode_t *addr, struct arch_specific_insn *asi); + +#endif /* _ARM_KERNEL_KPROBES_ARM64_H */ diff --git a/arch/arm64/kernel/probes/kprobes.c b/arch/arm64/kernel/probes/kprobes.c new file mode 100644 index 000000000000..44968019ce0d --- /dev/null +++ b/arch/arm64/kernel/probes/kprobes.c @@ -0,0 +1,525 @@ +/* + * arch/arm64/kernel/probes/kprobes.c + * + * Kprobes support for ARM64 + * + * Copyright (C) 2013 Linaro Limited. + * Author: Sandeepa Prabhu + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + */ +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "decode-insn.h" + +#define MIN_STACK_SIZE(addr) (on_irq_stack(addr, raw_smp_processor_id()) ? \ + min((unsigned long)IRQ_STACK_SIZE, \ + IRQ_STACK_PTR(raw_smp_processor_id()) - (addr)) : \ + min((unsigned long)MAX_STACK_SIZE, \ + (unsigned long)current_thread_info() + THREAD_START_SP - (addr))) + +void jprobe_return_break(void); + +DEFINE_PER_CPU(struct kprobe *, current_kprobe) = NULL; +DEFINE_PER_CPU(struct kprobe_ctlblk, kprobe_ctlblk); + +static void __kprobes arch_prepare_ss_slot(struct kprobe *p) +{ + /* prepare insn slot */ + p->ainsn.insn[0] = cpu_to_le32(p->opcode); + + flush_icache_range((uintptr_t) (p->ainsn.insn), + (uintptr_t) (p->ainsn.insn) + + MAX_INSN_SIZE * sizeof(kprobe_opcode_t)); + + /* + * Needs restoring of return address after stepping xol. + */ + p->ainsn.restore = (unsigned long) p->addr + + sizeof(kprobe_opcode_t); +} + +int __kprobes arch_prepare_kprobe(struct kprobe *p) +{ + unsigned long probe_addr = (unsigned long)p->addr; + extern char __start_rodata[]; + extern char __end_rodata[]; + + if (probe_addr & 0x3) + return -EINVAL; + + /* copy instruction */ + p->opcode = le32_to_cpu(*p->addr); + + if (in_exception_text(probe_addr)) + return -EINVAL; + if (probe_addr >= (unsigned long) __start_rodata && + probe_addr <= (unsigned long) __end_rodata) + return -EINVAL; + + /* decode instruction */ + switch (arm_kprobe_decode_insn(p->addr, &p->ainsn)) { + case INSN_REJECTED: /* insn not supported */ + return -EINVAL; + + case INSN_GOOD: /* instruction uses slot */ + p->ainsn.insn = get_insn_slot(); + if (!p->ainsn.insn) + return -ENOMEM; + break; + }; + + /* prepare the instruction */ + arch_prepare_ss_slot(p); + + return 0; +} + +static int __kprobes patch_text(kprobe_opcode_t *addr, u32 opcode) +{ + void *addrs[1]; + u32 insns[1]; + + addrs[0] = (void *)addr; + insns[0] = (u32)opcode; + + return aarch64_insn_patch_text(addrs, insns, 1); +} + +/* arm kprobe: install breakpoint in text */ +void __kprobes arch_arm_kprobe(struct kprobe *p) +{ + patch_text(p->addr, BRK64_OPCODE_KPROBES); +} + +/* disarm kprobe: remove breakpoint from text */ +void __kprobes arch_disarm_kprobe(struct kprobe *p) +{ + patch_text(p->addr, p->opcode); +} + +void __kprobes arch_remove_kprobe(struct kprobe *p) +{ + if (p->ainsn.insn) { + free_insn_slot(p->ainsn.insn, 0); + p->ainsn.insn = NULL; + } +} + +static void __kprobes save_previous_kprobe(struct kprobe_ctlblk *kcb) +{ + kcb->prev_kprobe.kp = kprobe_running(); + kcb->prev_kprobe.status = kcb->kprobe_status; +} + +static void __kprobes restore_previous_kprobe(struct kprobe_ctlblk *kcb) +{ + __this_cpu_write(current_kprobe, kcb->prev_kprobe.kp); + kcb->kprobe_status = kcb->prev_kprobe.status; +} + +static void __kprobes set_current_kprobe(struct kprobe *p) +{ + __this_cpu_write(current_kprobe, p); +} + +/* + * The D-flag (Debug mask) is set (masked) upon debug exception entry. + * Kprobes needs to clear (unmask) D-flag -ONLY- in case of recursive + * probe i.e. when probe hit from kprobe handler context upon + * executing the pre/post handlers. In this case we return with + * D-flag clear so that single-stepping can be carried-out. + * + * Leave D-flag set in all other cases. + */ +static void __kprobes +spsr_set_debug_flag(struct pt_regs *regs, int mask) +{ + unsigned long spsr = regs->pstate; + + if (mask) + spsr |= PSR_D_BIT; + else + spsr &= ~PSR_D_BIT; + + regs->pstate = spsr; +} + +/* + * Interrupts need to be disabled before single-step mode is set, and not + * reenabled until after single-step mode ends. + * Without disabling interrupt on local CPU, there is a chance of + * interrupt occurrence in the period of exception return and start of + * out-of-line single-step, that result in wrongly single stepping + * into the interrupt handler. + */ +static void __kprobes kprobes_save_local_irqflag(struct kprobe_ctlblk *kcb, + struct pt_regs *regs) +{ + kcb->saved_irqflag = regs->pstate; + regs->pstate |= PSR_I_BIT; +} + +static void __kprobes kprobes_restore_local_irqflag(struct kprobe_ctlblk *kcb, + struct pt_regs *regs) +{ + if (kcb->saved_irqflag & PSR_I_BIT) + regs->pstate |= PSR_I_BIT; + else + regs->pstate &= ~PSR_I_BIT; +} + +static void __kprobes +set_ss_context(struct kprobe_ctlblk *kcb, unsigned long addr) +{ + kcb->ss_ctx.ss_pending = true; + kcb->ss_ctx.match_addr = addr + sizeof(kprobe_opcode_t); +} + +static void __kprobes clear_ss_context(struct kprobe_ctlblk *kcb) +{ + kcb->ss_ctx.ss_pending = false; + kcb->ss_ctx.match_addr = 0; +} + +static void __kprobes setup_singlestep(struct kprobe *p, + struct pt_regs *regs, + struct kprobe_ctlblk *kcb, int reenter) +{ + unsigned long slot; + + if (reenter) { + save_previous_kprobe(kcb); + set_current_kprobe(p); + kcb->kprobe_status = KPROBE_REENTER; + } else { + kcb->kprobe_status = KPROBE_HIT_SS; + } + + BUG_ON(!p->ainsn.insn); + + /* prepare for single stepping */ + slot = (unsigned long)p->ainsn.insn; + + set_ss_context(kcb, slot); /* mark pending ss */ + + if (kcb->kprobe_status == KPROBE_REENTER) + spsr_set_debug_flag(regs, 0); + + /* IRQs and single stepping do not mix well. */ + kprobes_save_local_irqflag(kcb, regs); + kernel_enable_single_step(regs); + instruction_pointer_set(regs, slot); +} + +static int __kprobes reenter_kprobe(struct kprobe *p, + struct pt_regs *regs, + struct kprobe_ctlblk *kcb) +{ + switch (kcb->kprobe_status) { + case KPROBE_HIT_SSDONE: + case KPROBE_HIT_ACTIVE: + kprobes_inc_nmissed_count(p); + setup_singlestep(p, regs, kcb, 1); + break; + case KPROBE_HIT_SS: + case KPROBE_REENTER: + pr_warn("Unrecoverable kprobe detected at %p.\n", p->addr); + dump_kprobe(p); + BUG(); + break; + default: + WARN_ON(1); + return 0; + } + + return 1; +} + +static void __kprobes +post_kprobe_handler(struct kprobe_ctlblk *kcb, struct pt_regs *regs) +{ + struct kprobe *cur = kprobe_running(); + + if (!cur) + return; + + /* return addr restore if non-branching insn */ + if (cur->ainsn.restore != 0) + instruction_pointer_set(regs, cur->ainsn.restore); + + /* restore back original saved kprobe variables and continue */ + if (kcb->kprobe_status == KPROBE_REENTER) { + restore_previous_kprobe(kcb); + return; + } + /* call post handler */ + kcb->kprobe_status = KPROBE_HIT_SSDONE; + if (cur->post_handler) { + /* post_handler can hit breakpoint and single step + * again, so we enable D-flag for recursive exception. + */ + cur->post_handler(cur, regs, 0); + } + + reset_current_kprobe(); +} + +int __kprobes kprobe_fault_handler(struct pt_regs *regs, unsigned int fsr) +{ + struct kprobe *cur = kprobe_running(); + struct kprobe_ctlblk *kcb = get_kprobe_ctlblk(); + + switch (kcb->kprobe_status) { + case KPROBE_HIT_SS: + case KPROBE_REENTER: + /* + * We are here because the instruction being single + * stepped caused a page fault. We reset the current + * kprobe and the ip points back to the probe address + * and allow the page fault handler to continue as a + * normal page fault. + */ + instruction_pointer_set(regs, (unsigned long) cur->addr); + if (!instruction_pointer(regs)) + BUG(); + + kernel_disable_single_step(); + if (kcb->kprobe_status == KPROBE_REENTER) + spsr_set_debug_flag(regs, 1); + + if (kcb->kprobe_status == KPROBE_REENTER) + restore_previous_kprobe(kcb); + else + reset_current_kprobe(); + + break; + case KPROBE_HIT_ACTIVE: + case KPROBE_HIT_SSDONE: + /* + * We increment the nmissed count for accounting, + * we can also use npre/npostfault count for accounting + * these specific fault cases. + */ + kprobes_inc_nmissed_count(cur); + + /* + * We come here because instructions in the pre/post + * handler caused the page_fault, this could happen + * if handler tries to access user space by + * copy_from_user(), get_user() etc. Let the + * user-specified handler try to fix it first. + */ + if (cur->fault_handler && cur->fault_handler(cur, regs, fsr)) + return 1; + + /* + * In case the user-specified fault handler returned + * zero, try to fix up. + */ + if (fixup_exception(regs)) + return 1; + } + return 0; +} + +int __kprobes kprobe_exceptions_notify(struct notifier_block *self, + unsigned long val, void *data) +{ + return NOTIFY_DONE; +} + +static void __kprobes kprobe_handler(struct pt_regs *regs) +{ + struct kprobe *p, *cur_kprobe; + struct kprobe_ctlblk *kcb; + unsigned long addr = instruction_pointer(regs); + + kcb = get_kprobe_ctlblk(); + cur_kprobe = kprobe_running(); + + p = get_kprobe((kprobe_opcode_t *) addr); + + if (p) { + if (cur_kprobe) { + if (reenter_kprobe(p, regs, kcb)) + return; + } else { + /* Probe hit */ + set_current_kprobe(p); + kcb->kprobe_status = KPROBE_HIT_ACTIVE; + + /* + * If we have no pre-handler or it returned 0, we + * continue with normal processing. If we have a + * pre-handler and it returned non-zero, it prepped + * for calling the break_handler below on re-entry, + * so get out doing nothing more here. + * + * pre_handler can hit a breakpoint and can step thru + * before return, keep PSTATE D-flag enabled until + * pre_handler return back. + */ + if (!p->pre_handler || !p->pre_handler(p, regs)) { + setup_singlestep(p, regs, kcb, 0); + return; + } + } + } else if ((le32_to_cpu(*(kprobe_opcode_t *) addr) == + BRK64_OPCODE_KPROBES) && cur_kprobe) { + /* We probably hit a jprobe. Call its break handler. */ + if (cur_kprobe->break_handler && + cur_kprobe->break_handler(cur_kprobe, regs)) { + setup_singlestep(cur_kprobe, regs, kcb, 0); + return; + } + } + /* + * The breakpoint instruction was removed right + * after we hit it. Another cpu has removed + * either a probepoint or a debugger breakpoint + * at this address. In either case, no further + * handling of this interrupt is appropriate. + * Return back to original instruction, and continue. + */ +} + +static int __kprobes +kprobe_ss_hit(struct kprobe_ctlblk *kcb, unsigned long addr) +{ + if ((kcb->ss_ctx.ss_pending) + && (kcb->ss_ctx.match_addr == addr)) { + clear_ss_context(kcb); /* clear pending ss */ + return DBG_HOOK_HANDLED; + } + /* not ours, kprobes should ignore it */ + return DBG_HOOK_ERROR; +} + +int __kprobes +kprobe_single_step_handler(struct pt_regs *regs, unsigned int esr) +{ + struct kprobe_ctlblk *kcb = get_kprobe_ctlblk(); + int retval; + + /* return error if this is not our step */ + retval = kprobe_ss_hit(kcb, instruction_pointer(regs)); + + if (retval == DBG_HOOK_HANDLED) { + kprobes_restore_local_irqflag(kcb, regs); + kernel_disable_single_step(); + + if (kcb->kprobe_status == KPROBE_REENTER) + spsr_set_debug_flag(regs, 1); + + post_kprobe_handler(kcb, regs); + } + + return retval; +} + +int __kprobes +kprobe_breakpoint_handler(struct pt_regs *regs, unsigned int esr) +{ + kprobe_handler(regs); + return DBG_HOOK_HANDLED; +} + +int __kprobes setjmp_pre_handler(struct kprobe *p, struct pt_regs *regs) +{ + struct jprobe *jp = container_of(p, struct jprobe, kp); + struct kprobe_ctlblk *kcb = get_kprobe_ctlblk(); + long stack_ptr = kernel_stack_pointer(regs); + + kcb->jprobe_saved_regs = *regs; + /* + * As Linus pointed out, gcc assumes that the callee + * owns the argument space and could overwrite it, e.g. + * tailcall optimization. So, to be absolutely safe + * we also save and restore enough stack bytes to cover + * the argument area. + */ + memcpy(kcb->jprobes_stack, (void *)stack_ptr, + MIN_STACK_SIZE(stack_ptr)); + + instruction_pointer_set(regs, (unsigned long) jp->entry); + preempt_disable(); + pause_graph_tracing(); + return 1; +} + +void __kprobes jprobe_return(void) +{ + struct kprobe_ctlblk *kcb = get_kprobe_ctlblk(); + + /* + * Jprobe handler return by entering break exception, + * encoded same as kprobe, but with following conditions + * -a magic number in x0 to identify from rest of other kprobes. + * -restore stack addr to original saved pt_regs + */ + asm volatile ("ldr x0, [%0]\n\t" + "mov sp, x0\n\t" + ".globl jprobe_return_break\n\t" + "jprobe_return_break:\n\t" + "brk %1\n\t" + : + : "r"(&kcb->jprobe_saved_regs.sp), + "I"(BRK64_ESR_KPROBES) + : "memory"); +} + +int __kprobes longjmp_break_handler(struct kprobe *p, struct pt_regs *regs) +{ + struct kprobe_ctlblk *kcb = get_kprobe_ctlblk(); + long stack_addr = kcb->jprobe_saved_regs.sp; + long orig_sp = kernel_stack_pointer(regs); + struct jprobe *jp = container_of(p, struct jprobe, kp); + + if (instruction_pointer(regs) != (u64) jprobe_return_break) + return 0; + + if (orig_sp != stack_addr) { + struct pt_regs *saved_regs = + (struct pt_regs *)kcb->jprobe_saved_regs.sp; + pr_err("current sp %lx does not match saved sp %lx\n", + orig_sp, stack_addr); + pr_err("Saved registers for jprobe %p\n", jp); + show_regs(saved_regs); + pr_err("Current registers\n"); + show_regs(regs); + BUG(); + } + unpause_graph_tracing(); + *regs = kcb->jprobe_saved_regs; + memcpy((void *)stack_addr, kcb->jprobes_stack, + MIN_STACK_SIZE(stack_addr)); + preempt_enable_no_resched(); + return 1; +} + +int __init arch_init_kprobes(void) +{ + return 0; +} diff --git a/arch/arm64/kernel/vmlinux.lds.S b/arch/arm64/kernel/vmlinux.lds.S index 435e820e898d..075ce322e82b 100644 --- a/arch/arm64/kernel/vmlinux.lds.S +++ b/arch/arm64/kernel/vmlinux.lds.S @@ -121,6 +121,7 @@ SECTIONS TEXT_TEXT SCHED_TEXT LOCK_TEXT + KPROBES_TEXT HYPERVISOR_TEXT IDMAP_TEXT HIBERNATE_TEXT diff --git a/arch/arm64/mm/fault.c b/arch/arm64/mm/fault.c index 013e2cbe7924..2408e51d781f 100644 --- a/arch/arm64/mm/fault.c +++ b/arch/arm64/mm/fault.c @@ -41,6 +41,28 @@ static const char *fault_name(unsigned int esr); +#ifdef CONFIG_KPROBES +static inline int notify_page_fault(struct pt_regs *regs, unsigned int esr) +{ + int ret = 0; + + /* kprobe_running() needs smp_processor_id() */ + if (!user_mode(regs)) { + preempt_disable(); + if (kprobe_running() && kprobe_fault_handler(regs, esr)) + ret = 1; + preempt_enable(); + } + + return ret; +} +#else +static inline int notify_page_fault(struct pt_regs *regs, unsigned int esr) +{ + return 0; +} +#endif + /* * Dump out the page tables associated with 'addr' in mm 'mm'. */ @@ -259,6 +281,9 @@ static int __kprobes do_page_fault(unsigned long addr, unsigned int esr, unsigned long vm_flags = VM_READ | VM_WRITE | VM_EXEC; unsigned int mm_flags = FAULT_FLAG_ALLOW_RETRY | FAULT_FLAG_KILLABLE; + if (notify_page_fault(regs, esr)) + return 0; + tsk = current; mm = tsk->mm; @@ -629,6 +654,7 @@ asmlinkage int __exception do_debug_exception(unsigned long addr, return rv; } +NOKPROBE_SYMBOL(do_debug_exception); #ifdef CONFIG_ARM64_PAN void cpu_enable_pan(void *__unused) -- cgit v1.2.3-59-g8ed1b From 44b53f67c99d0fc53af3066a05d9e7ca5080a850 Mon Sep 17 00:00:00 2001 From: Pratyush Anand Date: Fri, 8 Jul 2016 12:35:49 -0400 Subject: arm64: Blacklist non-kprobe-able symbol Add all function symbols which are called from do_debug_exception under NOKPROBE_SYMBOL, as they can not kprobed. Signed-off-by: Pratyush Anand Acked-by: Masami Hiramatsu Signed-off-by: Catalin Marinas --- arch/arm64/kernel/arm64ksyms.c | 2 ++ arch/arm64/kernel/debug-monitors.c | 17 +++++++++++++++++ arch/arm64/kernel/hw_breakpoint.c | 8 ++++++++ arch/arm64/kernel/kgdb.c | 4 ++++ 4 files changed, 31 insertions(+) (limited to 'arch') diff --git a/arch/arm64/kernel/arm64ksyms.c b/arch/arm64/kernel/arm64ksyms.c index 678f30b05a45..b96ff1ae2786 100644 --- a/arch/arm64/kernel/arm64ksyms.c +++ b/arch/arm64/kernel/arm64ksyms.c @@ -27,6 +27,7 @@ #include #include #include +#include #include @@ -68,6 +69,7 @@ EXPORT_SYMBOL(test_and_change_bit); #ifdef CONFIG_FUNCTION_TRACER EXPORT_SYMBOL(_mcount); +NOKPROBE_SYMBOL(_mcount); #endif /* arm-smccc */ diff --git a/arch/arm64/kernel/debug-monitors.c b/arch/arm64/kernel/debug-monitors.c index 395de61108d8..2fbc1b99e8fb 100644 --- a/arch/arm64/kernel/debug-monitors.c +++ b/arch/arm64/kernel/debug-monitors.c @@ -49,6 +49,7 @@ static void mdscr_write(u32 mdscr) asm volatile("msr mdscr_el1, %0" :: "r" (mdscr)); local_dbg_restore(flags); } +NOKPROBE_SYMBOL(mdscr_write); static u32 mdscr_read(void) { @@ -56,6 +57,7 @@ static u32 mdscr_read(void) asm volatile("mrs %0, mdscr_el1" : "=r" (mdscr)); return mdscr; } +NOKPROBE_SYMBOL(mdscr_read); /* * Allow root to disable self-hosted debug from userspace. @@ -104,6 +106,7 @@ void enable_debug_monitors(enum dbg_active_el el) mdscr_write(mdscr); } } +NOKPROBE_SYMBOL(enable_debug_monitors); void disable_debug_monitors(enum dbg_active_el el) { @@ -124,6 +127,7 @@ void disable_debug_monitors(enum dbg_active_el el) mdscr_write(mdscr); } } +NOKPROBE_SYMBOL(disable_debug_monitors); /* * OS lock clearing. @@ -174,6 +178,7 @@ static void set_regs_spsr_ss(struct pt_regs *regs) spsr |= DBG_SPSR_SS; regs->pstate = spsr; } +NOKPROBE_SYMBOL(set_regs_spsr_ss); static void clear_regs_spsr_ss(struct pt_regs *regs) { @@ -183,6 +188,7 @@ static void clear_regs_spsr_ss(struct pt_regs *regs) spsr &= ~DBG_SPSR_SS; regs->pstate = spsr; } +NOKPROBE_SYMBOL(clear_regs_spsr_ss); /* EL1 Single Step Handler hooks */ static LIST_HEAD(step_hook); @@ -226,6 +232,7 @@ static int call_step_hook(struct pt_regs *regs, unsigned int esr) return retval; } +NOKPROBE_SYMBOL(call_step_hook); static void send_user_sigtrap(int si_code) { @@ -284,6 +291,7 @@ static int single_step_handler(unsigned long addr, unsigned int esr, return 0; } +NOKPROBE_SYMBOL(single_step_handler); /* * Breakpoint handler is re-entrant as another breakpoint can @@ -321,6 +329,7 @@ static int call_break_hook(struct pt_regs *regs, unsigned int esr) return fn ? fn(regs, esr) : DBG_HOOK_ERROR; } +NOKPROBE_SYMBOL(call_break_hook); static int brk_handler(unsigned long addr, unsigned int esr, struct pt_regs *regs) @@ -341,6 +350,7 @@ static int brk_handler(unsigned long addr, unsigned int esr, return 0; } +NOKPROBE_SYMBOL(brk_handler); int aarch32_break_handler(struct pt_regs *regs) { @@ -377,6 +387,7 @@ int aarch32_break_handler(struct pt_regs *regs) send_user_sigtrap(TRAP_BRKPT); return 0; } +NOKPROBE_SYMBOL(aarch32_break_handler); static int __init debug_traps_init(void) { @@ -398,6 +409,7 @@ void user_rewind_single_step(struct task_struct *task) if (test_ti_thread_flag(task_thread_info(task), TIF_SINGLESTEP)) set_regs_spsr_ss(task_pt_regs(task)); } +NOKPROBE_SYMBOL(user_rewind_single_step); void user_fastforward_single_step(struct task_struct *task) { @@ -413,6 +425,7 @@ void kernel_enable_single_step(struct pt_regs *regs) mdscr_write(mdscr_read() | DBG_MDSCR_SS); enable_debug_monitors(DBG_ACTIVE_EL1); } +NOKPROBE_SYMBOL(kernel_enable_single_step); void kernel_disable_single_step(void) { @@ -420,12 +433,14 @@ void kernel_disable_single_step(void) mdscr_write(mdscr_read() & ~DBG_MDSCR_SS); disable_debug_monitors(DBG_ACTIVE_EL1); } +NOKPROBE_SYMBOL(kernel_disable_single_step); int kernel_active_single_step(void) { WARN_ON(!irqs_disabled()); return mdscr_read() & DBG_MDSCR_SS; } +NOKPROBE_SYMBOL(kernel_active_single_step); /* ptrace API */ void user_enable_single_step(struct task_struct *task) @@ -433,8 +448,10 @@ void user_enable_single_step(struct task_struct *task) set_ti_thread_flag(task_thread_info(task), TIF_SINGLESTEP); set_regs_spsr_ss(task_pt_regs(task)); } +NOKPROBE_SYMBOL(user_enable_single_step); void user_disable_single_step(struct task_struct *task) { clear_ti_thread_flag(task_thread_info(task), TIF_SINGLESTEP); } +NOKPROBE_SYMBOL(user_disable_single_step); diff --git a/arch/arm64/kernel/hw_breakpoint.c b/arch/arm64/kernel/hw_breakpoint.c index ce21aa88263f..26a6bf77d272 100644 --- a/arch/arm64/kernel/hw_breakpoint.c +++ b/arch/arm64/kernel/hw_breakpoint.c @@ -24,6 +24,7 @@ #include #include #include +#include #include #include #include @@ -127,6 +128,7 @@ static u64 read_wb_reg(int reg, int n) return val; } +NOKPROBE_SYMBOL(read_wb_reg); static void write_wb_reg(int reg, int n, u64 val) { @@ -140,6 +142,7 @@ static void write_wb_reg(int reg, int n, u64 val) } isb(); } +NOKPROBE_SYMBOL(write_wb_reg); /* * Convert a breakpoint privilege level to the corresponding exception @@ -157,6 +160,7 @@ static enum dbg_active_el debug_exception_level(int privilege) return -EINVAL; } } +NOKPROBE_SYMBOL(debug_exception_level); enum hw_breakpoint_ops { HW_BREAKPOINT_INSTALL, @@ -575,6 +579,7 @@ static void toggle_bp_registers(int reg, enum dbg_active_el el, int enable) write_wb_reg(reg, i, ctrl); } } +NOKPROBE_SYMBOL(toggle_bp_registers); /* * Debug exception handlers. @@ -654,6 +659,7 @@ unlock: return 0; } +NOKPROBE_SYMBOL(breakpoint_handler); static int watchpoint_handler(unsigned long addr, unsigned int esr, struct pt_regs *regs) @@ -756,6 +762,7 @@ unlock: return 0; } +NOKPROBE_SYMBOL(watchpoint_handler); /* * Handle single-step exception. @@ -813,6 +820,7 @@ int reinstall_suspended_bps(struct pt_regs *regs) return !handled_exception; } +NOKPROBE_SYMBOL(reinstall_suspended_bps); /* * Context-switcher for restoring suspended breakpoints. diff --git a/arch/arm64/kernel/kgdb.c b/arch/arm64/kernel/kgdb.c index b5f063e5eff7..8c57f6496e56 100644 --- a/arch/arm64/kernel/kgdb.c +++ b/arch/arm64/kernel/kgdb.c @@ -22,6 +22,7 @@ #include #include #include +#include #include struct dbg_reg_def_t dbg_reg_def[DBG_MAX_REG_NUM] = { @@ -230,6 +231,7 @@ static int kgdb_brk_fn(struct pt_regs *regs, unsigned int esr) kgdb_handle_exception(1, SIGTRAP, 0, regs); return 0; } +NOKPROBE_SYMBOL(kgdb_brk_fn) static int kgdb_compiled_brk_fn(struct pt_regs *regs, unsigned int esr) { @@ -238,12 +240,14 @@ static int kgdb_compiled_brk_fn(struct pt_regs *regs, unsigned int esr) return 0; } +NOKPROBE_SYMBOL(kgdb_compiled_brk_fn); static int kgdb_step_brk_fn(struct pt_regs *regs, unsigned int esr) { kgdb_handle_exception(1, SIGTRAP, 0, regs); return 0; } +NOKPROBE_SYMBOL(kgdb_step_brk_fn); static struct break_hook kgdb_brkpt_hook = { .esr_mask = 0xffffffff, -- cgit v1.2.3-59-g8ed1b From 888b3c8720e0a4033db09ba2364afde6a4763638 Mon Sep 17 00:00:00 2001 From: Pratyush Anand Date: Fri, 8 Jul 2016 12:35:50 -0400 Subject: arm64: Treat all entry code as non-kprobe-able Entry symbols are not kprobe safe. So blacklist them for kprobing. Signed-off-by: Pratyush Anand Signed-off-by: David A. Long Acked-by: Masami Hiramatsu [catalin.marinas@arm.com: Do not include syscall wrappers in .entry.text] Signed-off-by: Catalin Marinas --- arch/arm64/kernel/entry.S | 3 +++ arch/arm64/kernel/probes/kprobes.c | 26 ++++++++++++++++++++++++++ arch/arm64/kernel/vmlinux.lds.S | 1 + 3 files changed, 30 insertions(+) (limited to 'arch') diff --git a/arch/arm64/kernel/entry.S b/arch/arm64/kernel/entry.S index 12e8d2bcb3f9..492a2655fea4 100644 --- a/arch/arm64/kernel/entry.S +++ b/arch/arm64/kernel/entry.S @@ -242,6 +242,7 @@ tsk .req x28 // current thread_info /* * Exception vectors. */ + .pushsection ".entry.text", "ax" .align 11 ENTRY(vectors) @@ -774,6 +775,8 @@ __ni_sys_trace: bl do_ni_syscall b __sys_trace_return + .popsection // .entry.text + /* * Special system call wrappers. */ diff --git a/arch/arm64/kernel/probes/kprobes.c b/arch/arm64/kernel/probes/kprobes.c index 44968019ce0d..0fe2b6578163 100644 --- a/arch/arm64/kernel/probes/kprobes.c +++ b/arch/arm64/kernel/probes/kprobes.c @@ -30,6 +30,7 @@ #include #include #include +#include #include "decode-insn.h" @@ -519,6 +520,31 @@ int __kprobes longjmp_break_handler(struct kprobe *p, struct pt_regs *regs) return 1; } +bool arch_within_kprobe_blacklist(unsigned long addr) +{ + extern char __idmap_text_start[], __idmap_text_end[]; + extern char __hyp_idmap_text_start[], __hyp_idmap_text_end[]; + + if ((addr >= (unsigned long)__kprobes_text_start && + addr < (unsigned long)__kprobes_text_end) || + (addr >= (unsigned long)__entry_text_start && + addr < (unsigned long)__entry_text_end) || + (addr >= (unsigned long)__idmap_text_start && + addr < (unsigned long)__idmap_text_end) || + !!search_exception_tables(addr)) + return true; + + if (!is_kernel_in_hyp_mode()) { + if ((addr >= (unsigned long)__hyp_text_start && + addr < (unsigned long)__hyp_text_end) || + (addr >= (unsigned long)__hyp_idmap_text_start && + addr < (unsigned long)__hyp_idmap_text_end)) + return true; + } + + return false; +} + int __init arch_init_kprobes(void) { return 0; diff --git a/arch/arm64/kernel/vmlinux.lds.S b/arch/arm64/kernel/vmlinux.lds.S index 075ce322e82b..9f5939469ef6 100644 --- a/arch/arm64/kernel/vmlinux.lds.S +++ b/arch/arm64/kernel/vmlinux.lds.S @@ -118,6 +118,7 @@ SECTIONS __exception_text_end = .; IRQENTRY_TEXT SOFTIRQENTRY_TEXT + ENTRY_TEXT TEXT_TEXT SCHED_TEXT LOCK_TEXT -- cgit v1.2.3-59-g8ed1b From 39a67d49ba353630d144a8eb775500c041c89e7a Mon Sep 17 00:00:00 2001 From: Sandeepa Prabhu Date: Fri, 8 Jul 2016 12:35:51 -0400 Subject: arm64: kprobes instruction simulation support Kprobes needs simulation of instructions that cannot be stepped from a different memory location, e.g.: those instructions that uses PC-relative addressing. In simulation, the behaviour of the instruction is implemented using a copy of pt_regs. The following instruction categories are simulated: - All branching instructions(conditional, register, and immediate) - Literal access instructions(load-literal, adr/adrp) Conditional execution is limited to branching instructions in ARM v8. If conditions at PSTATE do not match the condition fields of opcode, the instruction is effectively NOP. Thanks to Will Cohen for assorted suggested changes. Signed-off-by: Sandeepa Prabhu Signed-off-by: William Cohen Signed-off-by: David A. Long Acked-by: Masami Hiramatsu [catalin.marinas@arm.com: removed linux/module.h include] Signed-off-by: Catalin Marinas --- arch/arm64/include/asm/probes.h | 5 +- arch/arm64/kernel/insn.c | 1 + arch/arm64/kernel/probes/Makefile | 3 +- arch/arm64/kernel/probes/decode-insn.c | 33 ++++- arch/arm64/kernel/probes/decode-insn.h | 1 + arch/arm64/kernel/probes/kprobes.c | 53 ++++++-- arch/arm64/kernel/probes/simulate-insn.c | 217 +++++++++++++++++++++++++++++++ arch/arm64/kernel/probes/simulate-insn.h | 28 ++++ 8 files changed, 326 insertions(+), 15 deletions(-) create mode 100644 arch/arm64/kernel/probes/simulate-insn.c create mode 100644 arch/arm64/kernel/probes/simulate-insn.h (limited to 'arch') diff --git a/arch/arm64/include/asm/probes.h b/arch/arm64/include/asm/probes.h index 1e8a21a96002..5af574d632fa 100644 --- a/arch/arm64/include/asm/probes.h +++ b/arch/arm64/include/asm/probes.h @@ -15,17 +15,18 @@ #ifndef _ARM_PROBES_H #define _ARM_PROBES_H +#include + struct kprobe; struct arch_specific_insn; typedef u32 kprobe_opcode_t; -typedef unsigned long (kprobes_pstate_check_t)(unsigned long); typedef void (kprobes_handler_t) (u32 opcode, long addr, struct pt_regs *); /* architecture specific copy of original instruction */ struct arch_specific_insn { kprobe_opcode_t *insn; - kprobes_pstate_check_t *pstate_cc; + pstate_check_t *pstate_cc; kprobes_handler_t *handler; /* restore address after step xol */ unsigned long restore; diff --git a/arch/arm64/kernel/insn.c b/arch/arm64/kernel/insn.c index 5cb2f3db3da5..63f9432d05e8 100644 --- a/arch/arm64/kernel/insn.c +++ b/arch/arm64/kernel/insn.c @@ -30,6 +30,7 @@ #include #include #include +#include #include #define AARCH64_INSN_SF_BIT BIT(31) diff --git a/arch/arm64/kernel/probes/Makefile b/arch/arm64/kernel/probes/Makefile index bc159bf56992..e184d00ebf01 100644 --- a/arch/arm64/kernel/probes/Makefile +++ b/arch/arm64/kernel/probes/Makefile @@ -1 +1,2 @@ -obj-$(CONFIG_KPROBES) += kprobes.o decode-insn.o +obj-$(CONFIG_KPROBES) += kprobes.o decode-insn.o \ + simulate-insn.o diff --git a/arch/arm64/kernel/probes/decode-insn.c b/arch/arm64/kernel/probes/decode-insn.c index 95c0c5281e7b..37e47a9d617e 100644 --- a/arch/arm64/kernel/probes/decode-insn.c +++ b/arch/arm64/kernel/probes/decode-insn.c @@ -21,6 +21,7 @@ #include #include "decode-insn.h" +#include "simulate-insn.h" static bool __kprobes aarch64_insn_is_steppable(u32 insn) { @@ -74,6 +75,7 @@ static bool __kprobes aarch64_insn_is_steppable(u32 insn) /* Return: * INSN_REJECTED If instruction is one not allowed to kprobe, * INSN_GOOD If instruction is supported and uses instruction slot, + * INSN_GOOD_NO_SLOT If instruction is supported but doesn't use its slot. */ static enum kprobe_insn __kprobes arm_probe_decode_insn(kprobe_opcode_t insn, struct arch_specific_insn *asi) @@ -84,8 +86,37 @@ arm_probe_decode_insn(kprobe_opcode_t insn, struct arch_specific_insn *asi) */ if (aarch64_insn_is_steppable(insn)) return INSN_GOOD; - else + + if (aarch64_insn_is_bcond(insn)) { + asi->handler = simulate_b_cond; + } else if (aarch64_insn_is_cbz(insn) || + aarch64_insn_is_cbnz(insn)) { + asi->handler = simulate_cbz_cbnz; + } else if (aarch64_insn_is_tbz(insn) || + aarch64_insn_is_tbnz(insn)) { + asi->handler = simulate_tbz_tbnz; + } else if (aarch64_insn_is_adr_adrp(insn)) { + asi->handler = simulate_adr_adrp; + } else if (aarch64_insn_is_b(insn) || + aarch64_insn_is_bl(insn)) { + asi->handler = simulate_b_bl; + } else if (aarch64_insn_is_br(insn) || + aarch64_insn_is_blr(insn) || + aarch64_insn_is_ret(insn)) { + asi->handler = simulate_br_blr_ret; + } else if (aarch64_insn_is_ldr_lit(insn)) { + asi->handler = simulate_ldr_literal; + } else if (aarch64_insn_is_ldrsw_lit(insn)) { + asi->handler = simulate_ldrsw_literal; + } else { + /* + * Instruction cannot be stepped out-of-line and we don't + * (yet) simulate it. + */ return INSN_REJECTED; + } + + return INSN_GOOD_NO_SLOT; } static bool __kprobes diff --git a/arch/arm64/kernel/probes/decode-insn.h b/arch/arm64/kernel/probes/decode-insn.h index ad5ba9cc3d45..d438289646a6 100644 --- a/arch/arm64/kernel/probes/decode-insn.h +++ b/arch/arm64/kernel/probes/decode-insn.h @@ -25,6 +25,7 @@ enum kprobe_insn { INSN_REJECTED, + INSN_GOOD_NO_SLOT, INSN_GOOD, }; diff --git a/arch/arm64/kernel/probes/kprobes.c b/arch/arm64/kernel/probes/kprobes.c index 0fe2b6578163..63eb0a14d8e9 100644 --- a/arch/arm64/kernel/probes/kprobes.c +++ b/arch/arm64/kernel/probes/kprobes.c @@ -45,6 +45,9 @@ void jprobe_return_break(void); DEFINE_PER_CPU(struct kprobe *, current_kprobe) = NULL; DEFINE_PER_CPU(struct kprobe_ctlblk, kprobe_ctlblk); +static void __kprobes +post_kprobe_handler(struct kprobe_ctlblk *, struct pt_regs *); + static void __kprobes arch_prepare_ss_slot(struct kprobe *p) { /* prepare insn slot */ @@ -61,6 +64,23 @@ static void __kprobes arch_prepare_ss_slot(struct kprobe *p) sizeof(kprobe_opcode_t); } +static void __kprobes arch_prepare_simulate(struct kprobe *p) +{ + /* This instructions is not executed xol. No need to adjust the PC */ + p->ainsn.restore = 0; +} + +static void __kprobes arch_simulate_insn(struct kprobe *p, struct pt_regs *regs) +{ + struct kprobe_ctlblk *kcb = get_kprobe_ctlblk(); + + if (p->ainsn.handler) + p->ainsn.handler((u32)p->opcode, (long)p->addr, regs); + + /* single step simulated, now go for post processing */ + post_kprobe_handler(kcb, regs); +} + int __kprobes arch_prepare_kprobe(struct kprobe *p) { unsigned long probe_addr = (unsigned long)p->addr; @@ -84,6 +104,10 @@ int __kprobes arch_prepare_kprobe(struct kprobe *p) case INSN_REJECTED: /* insn not supported */ return -EINVAL; + case INSN_GOOD_NO_SLOT: /* insn need simulation */ + p->ainsn.insn = NULL; + break; + case INSN_GOOD: /* instruction uses slot */ p->ainsn.insn = get_insn_slot(); if (!p->ainsn.insn) @@ -92,7 +116,10 @@ int __kprobes arch_prepare_kprobe(struct kprobe *p) }; /* prepare the instruction */ - arch_prepare_ss_slot(p); + if (p->ainsn.insn) + arch_prepare_ss_slot(p); + else + arch_prepare_simulate(p); return 0; } @@ -218,20 +245,24 @@ static void __kprobes setup_singlestep(struct kprobe *p, kcb->kprobe_status = KPROBE_HIT_SS; } - BUG_ON(!p->ainsn.insn); - /* prepare for single stepping */ - slot = (unsigned long)p->ainsn.insn; + if (p->ainsn.insn) { + /* prepare for single stepping */ + slot = (unsigned long)p->ainsn.insn; - set_ss_context(kcb, slot); /* mark pending ss */ + set_ss_context(kcb, slot); /* mark pending ss */ - if (kcb->kprobe_status == KPROBE_REENTER) - spsr_set_debug_flag(regs, 0); + if (kcb->kprobe_status == KPROBE_REENTER) + spsr_set_debug_flag(regs, 0); - /* IRQs and single stepping do not mix well. */ - kprobes_save_local_irqflag(kcb, regs); - kernel_enable_single_step(regs); - instruction_pointer_set(regs, slot); + /* IRQs and single stepping do not mix well. */ + kprobes_save_local_irqflag(kcb, regs); + kernel_enable_single_step(regs); + instruction_pointer_set(regs, slot); + } else { + /* insn simulation */ + arch_simulate_insn(p, regs); + } } static int __kprobes reenter_kprobe(struct kprobe *p, diff --git a/arch/arm64/kernel/probes/simulate-insn.c b/arch/arm64/kernel/probes/simulate-insn.c new file mode 100644 index 000000000000..8977ce9d009d --- /dev/null +++ b/arch/arm64/kernel/probes/simulate-insn.c @@ -0,0 +1,217 @@ +/* + * arch/arm64/kernel/probes/simulate-insn.c + * + * Copyright (C) 2013 Linaro Limited. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + */ + +#include +#include + +#include "simulate-insn.h" + +#define sign_extend(x, signbit) \ + ((x) | (0 - ((x) & (1 << (signbit))))) + +#define bbl_displacement(insn) \ + sign_extend(((insn) & 0x3ffffff) << 2, 27) + +#define bcond_displacement(insn) \ + sign_extend(((insn >> 5) & 0x7ffff) << 2, 20) + +#define cbz_displacement(insn) \ + sign_extend(((insn >> 5) & 0x7ffff) << 2, 20) + +#define tbz_displacement(insn) \ + sign_extend(((insn >> 5) & 0x3fff) << 2, 15) + +#define ldr_displacement(insn) \ + sign_extend(((insn >> 5) & 0x7ffff) << 2, 20) + +static inline void set_x_reg(struct pt_regs *regs, int reg, u64 val) +{ + if (reg < 31) + regs->regs[reg] = val; +} + +static inline void set_w_reg(struct pt_regs *regs, int reg, u64 val) +{ + if (reg < 31) + regs->regs[reg] = lower_32_bits(val); +} + +static inline u64 get_x_reg(struct pt_regs *regs, int reg) +{ + if (reg < 31) + return regs->regs[reg]; + else + return 0; +} + +static inline u32 get_w_reg(struct pt_regs *regs, int reg) +{ + if (reg < 31) + return lower_32_bits(regs->regs[reg]); + else + return 0; +} + +static bool __kprobes check_cbz(u32 opcode, struct pt_regs *regs) +{ + int xn = opcode & 0x1f; + + return (opcode & (1 << 31)) ? + (get_x_reg(regs, xn) == 0) : (get_w_reg(regs, xn) == 0); +} + +static bool __kprobes check_cbnz(u32 opcode, struct pt_regs *regs) +{ + int xn = opcode & 0x1f; + + return (opcode & (1 << 31)) ? + (get_x_reg(regs, xn) != 0) : (get_w_reg(regs, xn) != 0); +} + +static bool __kprobes check_tbz(u32 opcode, struct pt_regs *regs) +{ + int xn = opcode & 0x1f; + int bit_pos = ((opcode & (1 << 31)) >> 26) | ((opcode >> 19) & 0x1f); + + return ((get_x_reg(regs, xn) >> bit_pos) & 0x1) == 0; +} + +static bool __kprobes check_tbnz(u32 opcode, struct pt_regs *regs) +{ + int xn = opcode & 0x1f; + int bit_pos = ((opcode & (1 << 31)) >> 26) | ((opcode >> 19) & 0x1f); + + return ((get_x_reg(regs, xn) >> bit_pos) & 0x1) != 0; +} + +/* + * instruction simulation functions + */ +void __kprobes +simulate_adr_adrp(u32 opcode, long addr, struct pt_regs *regs) +{ + long imm, xn, val; + + xn = opcode & 0x1f; + imm = ((opcode >> 3) & 0x1ffffc) | ((opcode >> 29) & 0x3); + imm = sign_extend(imm, 20); + if (opcode & 0x80000000) + val = (imm<<12) + (addr & 0xfffffffffffff000); + else + val = imm + addr; + + set_x_reg(regs, xn, val); + + instruction_pointer_set(regs, instruction_pointer(regs) + 4); +} + +void __kprobes +simulate_b_bl(u32 opcode, long addr, struct pt_regs *regs) +{ + int disp = bbl_displacement(opcode); + + /* Link register is x30 */ + if (opcode & (1 << 31)) + set_x_reg(regs, 30, addr + 4); + + instruction_pointer_set(regs, addr + disp); +} + +void __kprobes +simulate_b_cond(u32 opcode, long addr, struct pt_regs *regs) +{ + int disp = 4; + + if (aarch32_opcode_cond_checks[opcode & 0xf](regs->pstate & 0xffffffff)) + disp = bcond_displacement(opcode); + + instruction_pointer_set(regs, addr + disp); +} + +void __kprobes +simulate_br_blr_ret(u32 opcode, long addr, struct pt_regs *regs) +{ + int xn = (opcode >> 5) & 0x1f; + + /* update pc first in case we're doing a "blr lr" */ + instruction_pointer_set(regs, get_x_reg(regs, xn)); + + /* Link register is x30 */ + if (((opcode >> 21) & 0x3) == 1) + set_x_reg(regs, 30, addr + 4); +} + +void __kprobes +simulate_cbz_cbnz(u32 opcode, long addr, struct pt_regs *regs) +{ + int disp = 4; + + if (opcode & (1 << 24)) { + if (check_cbnz(opcode, regs)) + disp = cbz_displacement(opcode); + } else { + if (check_cbz(opcode, regs)) + disp = cbz_displacement(opcode); + } + instruction_pointer_set(regs, addr + disp); +} + +void __kprobes +simulate_tbz_tbnz(u32 opcode, long addr, struct pt_regs *regs) +{ + int disp = 4; + + if (opcode & (1 << 24)) { + if (check_tbnz(opcode, regs)) + disp = tbz_displacement(opcode); + } else { + if (check_tbz(opcode, regs)) + disp = tbz_displacement(opcode); + } + instruction_pointer_set(regs, addr + disp); +} + +void __kprobes +simulate_ldr_literal(u32 opcode, long addr, struct pt_regs *regs) +{ + u64 *load_addr; + int xn = opcode & 0x1f; + int disp; + + disp = ldr_displacement(opcode); + load_addr = (u64 *) (addr + disp); + + if (opcode & (1 << 30)) /* x0-x30 */ + set_x_reg(regs, xn, *load_addr); + else /* w0-w30 */ + set_w_reg(regs, xn, *load_addr); + + instruction_pointer_set(regs, instruction_pointer(regs) + 4); +} + +void __kprobes +simulate_ldrsw_literal(u32 opcode, long addr, struct pt_regs *regs) +{ + s32 *load_addr; + int xn = opcode & 0x1f; + int disp; + + disp = ldr_displacement(opcode); + load_addr = (s32 *) (addr + disp); + + set_x_reg(regs, xn, *load_addr); + + instruction_pointer_set(regs, instruction_pointer(regs) + 4); +} diff --git a/arch/arm64/kernel/probes/simulate-insn.h b/arch/arm64/kernel/probes/simulate-insn.h new file mode 100644 index 000000000000..050bde683c2d --- /dev/null +++ b/arch/arm64/kernel/probes/simulate-insn.h @@ -0,0 +1,28 @@ +/* + * arch/arm64/kernel/probes/simulate-insn.h + * + * Copyright (C) 2013 Linaro Limited + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + */ + +#ifndef _ARM_KERNEL_KPROBES_SIMULATE_INSN_H +#define _ARM_KERNEL_KPROBES_SIMULATE_INSN_H + +void simulate_adr_adrp(u32 opcode, long addr, struct pt_regs *regs); +void simulate_b_bl(u32 opcode, long addr, struct pt_regs *regs); +void simulate_b_cond(u32 opcode, long addr, struct pt_regs *regs); +void simulate_br_blr_ret(u32 opcode, long addr, struct pt_regs *regs); +void simulate_cbz_cbnz(u32 opcode, long addr, struct pt_regs *regs); +void simulate_tbz_tbnz(u32 opcode, long addr, struct pt_regs *regs); +void simulate_ldr_literal(u32 opcode, long addr, struct pt_regs *regs); +void simulate_ldrsw_literal(u32 opcode, long addr, struct pt_regs *regs); + +#endif /* _ARM_KERNEL_KPROBES_SIMULATE_INSN_H */ -- cgit v1.2.3-59-g8ed1b From da6a91252ad98d49b49e83b76c1f032cdf6e5258 Mon Sep 17 00:00:00 2001 From: William Cohen Date: Fri, 8 Jul 2016 12:35:52 -0400 Subject: arm64: Add trampoline code for kretprobes The trampoline code is used by kretprobes to capture a return from a probed function. This is done by saving the registers, calling the handler, and restoring the registers. The code then returns to the original saved caller return address. It is necessary to do this directly instead of using a software breakpoint because the code used in processing that breakpoint could itself be kprobe'd and cause a problematic reentry into the debug exception handler. Signed-off-by: William Cohen Signed-off-by: David A. Long Acked-by: Masami Hiramatsu [catalin.marinas@arm.com: removed unnecessary masking of the PSTATE bits] Signed-off-by: Catalin Marinas --- arch/arm64/include/asm/kprobes.h | 2 + arch/arm64/kernel/asm-offsets.c | 11 ++++ arch/arm64/kernel/probes/Makefile | 1 + arch/arm64/kernel/probes/kprobes.c | 5 ++ arch/arm64/kernel/probes/kprobes_trampoline.S | 81 +++++++++++++++++++++++++++ 5 files changed, 100 insertions(+) create mode 100644 arch/arm64/kernel/probes/kprobes_trampoline.S (limited to 'arch') diff --git a/arch/arm64/include/asm/kprobes.h b/arch/arm64/include/asm/kprobes.h index 79c9511612b5..61b49150dfa3 100644 --- a/arch/arm64/include/asm/kprobes.h +++ b/arch/arm64/include/asm/kprobes.h @@ -56,5 +56,7 @@ int kprobe_exceptions_notify(struct notifier_block *self, unsigned long val, void *data); int kprobe_breakpoint_handler(struct pt_regs *regs, unsigned int esr); int kprobe_single_step_handler(struct pt_regs *regs, unsigned int esr); +void kretprobe_trampoline(void); +void __kprobes *trampoline_probe_handler(struct pt_regs *regs); #endif /* _ARM_KPROBES_H */ diff --git a/arch/arm64/kernel/asm-offsets.c b/arch/arm64/kernel/asm-offsets.c index f8e5d47f0880..03dfa27ccf0f 100644 --- a/arch/arm64/kernel/asm-offsets.c +++ b/arch/arm64/kernel/asm-offsets.c @@ -51,6 +51,17 @@ int main(void) DEFINE(S_X5, offsetof(struct pt_regs, regs[5])); DEFINE(S_X6, offsetof(struct pt_regs, regs[6])); DEFINE(S_X7, offsetof(struct pt_regs, regs[7])); + DEFINE(S_X8, offsetof(struct pt_regs, regs[8])); + DEFINE(S_X10, offsetof(struct pt_regs, regs[10])); + DEFINE(S_X12, offsetof(struct pt_regs, regs[12])); + DEFINE(S_X14, offsetof(struct pt_regs, regs[14])); + DEFINE(S_X16, offsetof(struct pt_regs, regs[16])); + DEFINE(S_X18, offsetof(struct pt_regs, regs[18])); + DEFINE(S_X20, offsetof(struct pt_regs, regs[20])); + DEFINE(S_X22, offsetof(struct pt_regs, regs[22])); + DEFINE(S_X24, offsetof(struct pt_regs, regs[24])); + DEFINE(S_X26, offsetof(struct pt_regs, regs[26])); + DEFINE(S_X28, offsetof(struct pt_regs, regs[28])); DEFINE(S_LR, offsetof(struct pt_regs, regs[30])); DEFINE(S_SP, offsetof(struct pt_regs, sp)); #ifdef CONFIG_COMPAT diff --git a/arch/arm64/kernel/probes/Makefile b/arch/arm64/kernel/probes/Makefile index e184d00ebf01..ce06312e3d34 100644 --- a/arch/arm64/kernel/probes/Makefile +++ b/arch/arm64/kernel/probes/Makefile @@ -1,2 +1,3 @@ obj-$(CONFIG_KPROBES) += kprobes.o decode-insn.o \ + kprobes_trampoline.o \ simulate-insn.o diff --git a/arch/arm64/kernel/probes/kprobes.c b/arch/arm64/kernel/probes/kprobes.c index 63eb0a14d8e9..be1f074b5736 100644 --- a/arch/arm64/kernel/probes/kprobes.c +++ b/arch/arm64/kernel/probes/kprobes.c @@ -576,6 +576,11 @@ bool arch_within_kprobe_blacklist(unsigned long addr) return false; } +void __kprobes __used *trampoline_probe_handler(struct pt_regs *regs) +{ + return NULL; +} + int __init arch_init_kprobes(void) { return 0; diff --git a/arch/arm64/kernel/probes/kprobes_trampoline.S b/arch/arm64/kernel/probes/kprobes_trampoline.S new file mode 100644 index 000000000000..5d6e7f14638c --- /dev/null +++ b/arch/arm64/kernel/probes/kprobes_trampoline.S @@ -0,0 +1,81 @@ +/* + * trampoline entry and return code for kretprobes. + */ + +#include +#include +#include + + .text + + .macro save_all_base_regs + stp x0, x1, [sp, #S_X0] + stp x2, x3, [sp, #S_X2] + stp x4, x5, [sp, #S_X4] + stp x6, x7, [sp, #S_X6] + stp x8, x9, [sp, #S_X8] + stp x10, x11, [sp, #S_X10] + stp x12, x13, [sp, #S_X12] + stp x14, x15, [sp, #S_X14] + stp x16, x17, [sp, #S_X16] + stp x18, x19, [sp, #S_X18] + stp x20, x21, [sp, #S_X20] + stp x22, x23, [sp, #S_X22] + stp x24, x25, [sp, #S_X24] + stp x26, x27, [sp, #S_X26] + stp x28, x29, [sp, #S_X28] + add x0, sp, #S_FRAME_SIZE + stp lr, x0, [sp, #S_LR] + /* + * Construct a useful saved PSTATE + */ + mrs x0, nzcv + mrs x1, daif + orr x0, x0, x1 + mrs x1, CurrentEL + orr x0, x0, x1 + mrs x1, SPSel + orr x0, x0, x1 + stp xzr, x0, [sp, #S_PC] + .endm + + .macro restore_all_base_regs + ldr x0, [sp, #S_PSTATE] + and x0, x0, #(PSR_N_BIT | PSR_Z_BIT | PSR_C_BIT | PSR_V_BIT) + msr nzcv, x0 + ldp x0, x1, [sp, #S_X0] + ldp x2, x3, [sp, #S_X2] + ldp x4, x5, [sp, #S_X4] + ldp x6, x7, [sp, #S_X6] + ldp x8, x9, [sp, #S_X8] + ldp x10, x11, [sp, #S_X10] + ldp x12, x13, [sp, #S_X12] + ldp x14, x15, [sp, #S_X14] + ldp x16, x17, [sp, #S_X16] + ldp x18, x19, [sp, #S_X18] + ldp x20, x21, [sp, #S_X20] + ldp x22, x23, [sp, #S_X22] + ldp x24, x25, [sp, #S_X24] + ldp x26, x27, [sp, #S_X26] + ldp x28, x29, [sp, #S_X28] + .endm + +ENTRY(kretprobe_trampoline) + sub sp, sp, #S_FRAME_SIZE + + save_all_base_regs + + mov x0, sp + bl trampoline_probe_handler + /* + * Replace trampoline address in lr with actual orig_ret_addr return + * address. + */ + mov lr, x0 + + restore_all_base_regs + + add sp, sp, #S_FRAME_SIZE + ret + +ENDPROC(kretprobe_trampoline) -- cgit v1.2.3-59-g8ed1b From fcfd708b8cf86b8c1ca6ce014d50287f61c0eb88 Mon Sep 17 00:00:00 2001 From: Sandeepa Prabhu Date: Fri, 8 Jul 2016 12:35:53 -0400 Subject: arm64: Add kernel return probes support (kretprobes) The pre-handler of this special 'trampoline' kprobe executes the return probe handler functions and restores original return address in ELR_EL1. This way the saved pt_regs still hold the original register context to be carried back to the probed kernel function. Signed-off-by: Sandeepa Prabhu Signed-off-by: David A. Long Acked-by: Masami Hiramatsu Signed-off-by: Catalin Marinas --- arch/arm64/Kconfig | 1 + arch/arm64/kernel/probes/kprobes.c | 90 +++++++++++++++++++++++++++++++++++++- 2 files changed, 90 insertions(+), 1 deletion(-) (limited to 'arch') diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig index 1f7d644f7f36..6af0e2e5c710 100644 --- a/arch/arm64/Kconfig +++ b/arch/arm64/Kconfig @@ -89,6 +89,7 @@ config ARM64 select HAVE_RCU_TABLE_FREE select HAVE_SYSCALL_TRACEPOINTS select HAVE_KPROBES + select HAVE_KRETPROBES if HAVE_KPROBES select IOMMU_DMA if IOMMU_SUPPORT select IRQ_DOMAIN select IRQ_FORCED_THREADING diff --git a/arch/arm64/kernel/probes/kprobes.c b/arch/arm64/kernel/probes/kprobes.c index be1f074b5736..9c70e8812ea9 100644 --- a/arch/arm64/kernel/probes/kprobes.c +++ b/arch/arm64/kernel/probes/kprobes.c @@ -578,7 +578,95 @@ bool arch_within_kprobe_blacklist(unsigned long addr) void __kprobes __used *trampoline_probe_handler(struct pt_regs *regs) { - return NULL; + struct kretprobe_instance *ri = NULL; + struct hlist_head *head, empty_rp; + struct hlist_node *tmp; + unsigned long flags, orig_ret_address = 0; + unsigned long trampoline_address = + (unsigned long)&kretprobe_trampoline; + kprobe_opcode_t *correct_ret_addr = NULL; + + INIT_HLIST_HEAD(&empty_rp); + kretprobe_hash_lock(current, &head, &flags); + + /* + * It is possible to have multiple instances associated with a given + * task either because multiple functions in the call path have + * return probes installed on them, and/or more than one + * return probe was registered for a target function. + * + * We can handle this because: + * - instances are always pushed into the head of the list + * - when multiple return probes are registered for the same + * function, the (chronologically) first instance's ret_addr + * will be the real return address, and all the rest will + * point to kretprobe_trampoline. + */ + hlist_for_each_entry_safe(ri, tmp, head, hlist) { + if (ri->task != current) + /* another task is sharing our hash bucket */ + continue; + + orig_ret_address = (unsigned long)ri->ret_addr; + + if (orig_ret_address != trampoline_address) + /* + * This is the real return address. Any other + * instances associated with this task are for + * other calls deeper on the call stack + */ + break; + } + + kretprobe_assert(ri, orig_ret_address, trampoline_address); + + correct_ret_addr = ri->ret_addr; + hlist_for_each_entry_safe(ri, tmp, head, hlist) { + if (ri->task != current) + /* another task is sharing our hash bucket */ + continue; + + orig_ret_address = (unsigned long)ri->ret_addr; + if (ri->rp && ri->rp->handler) { + __this_cpu_write(current_kprobe, &ri->rp->kp); + get_kprobe_ctlblk()->kprobe_status = KPROBE_HIT_ACTIVE; + ri->ret_addr = correct_ret_addr; + ri->rp->handler(ri, regs); + __this_cpu_write(current_kprobe, NULL); + } + + recycle_rp_inst(ri, &empty_rp); + + if (orig_ret_address != trampoline_address) + /* + * This is the real return address. Any other + * instances associated with this task are for + * other calls deeper on the call stack + */ + break; + } + + kretprobe_hash_unlock(current, &flags); + + hlist_for_each_entry_safe(ri, tmp, &empty_rp, hlist) { + hlist_del(&ri->hlist); + kfree(ri); + } + return (void *)orig_ret_address; +} + +void __kprobes arch_prepare_kretprobe(struct kretprobe_instance *ri, + struct pt_regs *regs) +{ + ri->ret_addr = (kprobe_opcode_t *)regs->regs[30]; + + /* replace return addr (x30) with trampoline */ + regs->regs[30] = (long)&kretprobe_trampoline; +} + +int __kprobes arch_trampoline_kprobe(struct kprobe *p) +{ + return 0; } int __init arch_init_kprobes(void) -- cgit v1.2.3-59-g8ed1b From 9df53ff2bb89bd9c60e655baaf78251c49c72578 Mon Sep 17 00:00:00 2001 From: Vladimir Murzin Date: Mon, 18 Jul 2016 11:20:54 +0100 Subject: arm64: ptrace: remove extra define for CPSR's E bit ...and do not confuse source navigation tools ;) Signed-off-by: Vladimir Murzin Acked-by: Will Deacon Signed-off-by: Catalin Marinas --- arch/arm64/include/asm/ptrace.h | 1 - 1 file changed, 1 deletion(-) (limited to 'arch') diff --git a/arch/arm64/include/asm/ptrace.h b/arch/arm64/include/asm/ptrace.h index a307eb6e7fa8..10e6f1d7269c 100644 --- a/arch/arm64/include/asm/ptrace.h +++ b/arch/arm64/include/asm/ptrace.h @@ -46,7 +46,6 @@ #define COMPAT_PSR_MODE_UND 0x0000001b #define COMPAT_PSR_MODE_SYS 0x0000001f #define COMPAT_PSR_T_BIT 0x00000020 -#define COMPAT_PSR_E_BIT 0x00000200 #define COMPAT_PSR_F_BIT 0x00000040 #define COMPAT_PSR_I_BIT 0x00000080 #define COMPAT_PSR_A_BIT 0x00000100 -- cgit v1.2.3-59-g8ed1b From f8fa70f392fab8697b6358bb7e4f8c3e8429ec7b Mon Sep 17 00:00:00 2001 From: Mark Rutland Date: Tue, 12 Jul 2016 16:28:01 +0100 Subject: arm64: localise Image objcopy flags We currently define OBJCOPYFLAGS in the top-level arm64 Makefile, and thus these flags will be passed to all uses of objcopy, kernel-wide, for which they are not explicitly overridden. The flags we set are intended for converting vmlinux (and ELF) into Image (a raw binary), and thus the flags chosen are problematic for some other uses which do not expect a raw binary result, e.g. the upcoming lkdtm rodata test: http://www.openwall.com/lists/kernel-hardening/2016/06/08/2 This patch localises the objcopy flags such that they are only used for the vmlinux -> Image conversion. Signed-off-by: Mark Rutland Cc: Will Deacon Acked-by: Kees Cook Tested-by: Laura Abbott Signed-off-by: Catalin Marinas --- arch/arm64/Makefile | 1 - arch/arm64/boot/Makefile | 2 ++ 2 files changed, 2 insertions(+), 1 deletion(-) (limited to 'arch') diff --git a/arch/arm64/Makefile b/arch/arm64/Makefile index 4b6f84bf2ab9..393103b8abe4 100644 --- a/arch/arm64/Makefile +++ b/arch/arm64/Makefile @@ -12,7 +12,6 @@ LDFLAGS_vmlinux :=-p --no-undefined -X CPPFLAGS_vmlinux.lds = -DTEXT_OFFSET=$(TEXT_OFFSET) -OBJCOPYFLAGS :=-O binary -R .note -R .note.gnu.build-id -R .comment -S GZFLAGS :=-9 ifneq ($(CONFIG_RELOCATABLE),) diff --git a/arch/arm64/boot/Makefile b/arch/arm64/boot/Makefile index 305c552b5ec1..1f012c506434 100644 --- a/arch/arm64/boot/Makefile +++ b/arch/arm64/boot/Makefile @@ -14,6 +14,8 @@ # Based on the ia64 boot/Makefile. # +OBJCOPYFLAGS_Image :=-O binary -R .note -R .note.gnu.build-id -R .comment -S + targets := Image Image.gz $(obj)/Image: vmlinux FORCE -- cgit v1.2.3-59-g8ed1b From 2ce39ad15182604beb6c8fa8bed5e46b59fd1082 Mon Sep 17 00:00:00 2001 From: Will Deacon Date: Tue, 19 Jul 2016 15:07:37 +0100 Subject: arm64: debug: unmask PSTATE.D earlier Clearing PSTATE.D is one of the requirements for generating a debug exception. The arm64 booting protocol requires that PSTATE.D is set, since many of the debug registers (for example, the hw_breakpoint registers) are UNKNOWN out of reset and could potentially generate spurious, fatal debug exceptions in early boot code if PSTATE.D was clear. Once the debug registers have been safely initialised, PSTATE.D is cleared, however this is currently broken for two reasons: (1) The boot CPU clears PSTATE.D in a postcore_initcall and secondary CPUs clear PSTATE.D in secondary_start_kernel. Since the initcall runs after SMP (and the scheduler) have been initialised, there is no guarantee that it is actually running on the boot CPU. In this case, the boot CPU is left with PSTATE.D set and is not capable of generating debug exceptions. (2) In a preemptible kernel, we may explicitly schedule on the IRQ return path to EL1. If an IRQ occurs with PSTATE.D set in the idle thread, then we may schedule the kthread_init thread, run the postcore_initcall to clear PSTATE.D and then context switch back to the idle thread before returning from the IRQ. The exception return path will then restore PSTATE.D from the stack, and set it again. This patch fixes the problem by moving the clearing of PSTATE.D earlier to proc.S. This has the desirable effect of clearing it in one place for all CPUs, long before we have to worry about the scheduler or any exception handling. We ensure that the previous reset of MDSCR_EL1 has completed before unmasking the exception, so that any spurious exceptions resulting from UNKNOWN debug registers are not generated. Without this patch applied, the kprobes selftests have been seen to fail under KVM, where we end up attempting to step the OOL instruction buffer with PSTATE.D set and therefore fail to complete the step. Cc: Acked-by: Mark Rutland Reported-by: Catalin Marinas Tested-by: Marc Zyngier Signed-off-by: Will Deacon Reviewed-by: Catalin Marinas Tested-by: Catalin Marinas Signed-off-by: Catalin Marinas --- arch/arm64/kernel/debug-monitors.c | 1 - arch/arm64/kernel/smp.c | 1 - arch/arm64/mm/proc.S | 2 ++ 3 files changed, 2 insertions(+), 2 deletions(-) (limited to 'arch') diff --git a/arch/arm64/kernel/debug-monitors.c b/arch/arm64/kernel/debug-monitors.c index 4fbf3c54275c..0800d23e2fdd 100644 --- a/arch/arm64/kernel/debug-monitors.c +++ b/arch/arm64/kernel/debug-monitors.c @@ -151,7 +151,6 @@ static int debug_monitors_init(void) /* Clear the OS lock. */ on_each_cpu(clear_os_lock, NULL, 1); isb(); - local_dbg_enable(); /* Register hotplug handler. */ __register_cpu_notifier(&os_lock_nb); diff --git a/arch/arm64/kernel/smp.c b/arch/arm64/kernel/smp.c index 62ff3c0622e2..c0a772560ab7 100644 --- a/arch/arm64/kernel/smp.c +++ b/arch/arm64/kernel/smp.c @@ -267,7 +267,6 @@ asmlinkage void secondary_start_kernel(void) set_cpu_online(cpu, true); complete(&cpu_running); - local_dbg_enable(); local_irq_enable(); local_async_enable(); diff --git a/arch/arm64/mm/proc.S b/arch/arm64/mm/proc.S index c4317879b938..5bb61de23201 100644 --- a/arch/arm64/mm/proc.S +++ b/arch/arm64/mm/proc.S @@ -180,6 +180,8 @@ ENTRY(__cpu_setup) msr cpacr_el1, x0 // Enable FP/ASIMD mov x0, #1 << 12 // Reset mdscr_el1 and disable msr mdscr_el1, x0 // access to the DCC from EL0 + isb // Unmask debug exceptions now, + enable_dbg // since this is per-cpu reset_pmuserenr_el0 x0 // Disable PMU access from EL0 /* * Memory region attributes for LPAE: -- cgit v1.2.3-59-g8ed1b From 6b68e14e71b0a6811ed776c6261d4e6c0389e2c0 Mon Sep 17 00:00:00 2001 From: Will Deacon Date: Tue, 19 Jul 2016 15:07:38 +0100 Subject: arm64: debug: remove redundant spsr manipulation There is no need to explicitly clear the SS bit immediately before setting it unconditionally. Reported-by: Catalin Marinas Signed-off-by: Will Deacon Acked-by: Mark Rutland Signed-off-by: Catalin Marinas --- arch/arm64/kernel/debug-monitors.c | 13 ++----------- 1 file changed, 2 insertions(+), 11 deletions(-) (limited to 'arch') diff --git a/arch/arm64/kernel/debug-monitors.c b/arch/arm64/kernel/debug-monitors.c index 0800d23e2fdd..f17134d39e6b 100644 --- a/arch/arm64/kernel/debug-monitors.c +++ b/arch/arm64/kernel/debug-monitors.c @@ -165,21 +165,12 @@ postcore_initcall(debug_monitors_init); */ static void set_regs_spsr_ss(struct pt_regs *regs) { - unsigned long spsr; - - spsr = regs->pstate; - spsr &= ~DBG_SPSR_SS; - spsr |= DBG_SPSR_SS; - regs->pstate = spsr; + regs->pstate |= DBG_SPSR_SS; } static void clear_regs_spsr_ss(struct pt_regs *regs) { - unsigned long spsr; - - spsr = regs->pstate; - spsr &= ~DBG_SPSR_SS; - regs->pstate = spsr; + regs->pstate &= ~DBG_SPSR_SS; } /* EL1 Single Step Handler hooks */ -- cgit v1.2.3-59-g8ed1b From 2572214170fb95370be21915c0397f4b6a27e7e3 Mon Sep 17 00:00:00 2001 From: Will Deacon Date: Tue, 19 Jul 2016 15:07:40 +0100 Subject: arm64: debug: remove unused local_dbg_{enable, disable} macros The debug enable/disable macros are not used anywhere in the kernel, so remove them from irqflags.h Reported-by: Robin Murphy Signed-off-by: Will Deacon Acked-by: Mark Rutland Signed-off-by: Catalin Marinas --- arch/arm64/include/asm/irqflags.h | 3 --- 1 file changed, 3 deletions(-) (limited to 'arch') diff --git a/arch/arm64/include/asm/irqflags.h b/arch/arm64/include/asm/irqflags.h index 11cc941bd107..8c581281fa12 100644 --- a/arch/arm64/include/asm/irqflags.h +++ b/arch/arm64/include/asm/irqflags.h @@ -110,8 +110,5 @@ static inline int arch_irqs_disabled_flags(unsigned long flags) : : "r" (flags) : "memory"); \ } while (0) -#define local_dbg_enable() asm("msr daifclr, #8" : : : "memory") -#define local_dbg_disable() asm("msr daifset, #8" : : : "memory") - #endif #endif -- cgit v1.2.3-59-g8ed1b From 44bd887ce10eb8061f6a137f8a73f823957edd82 Mon Sep 17 00:00:00 2001 From: Will Deacon Date: Tue, 19 Jul 2016 15:07:39 +0100 Subject: arm64: kprobes: WARN if attempting to step with PSTATE.D=1 Stepping with PSTATE.D=1 is bad news. The step won't generate a debug exception and we'll likely walk off into random data structures. This should never happen, but when it does, it's a PITA to debug. Add a WARN_ON to shout if we realise this is about to take place. Signed-off-by: Will Deacon Acked-by: Mark Rutland Signed-off-by: Catalin Marinas --- arch/arm64/kernel/probes/kprobes.c | 2 ++ 1 file changed, 2 insertions(+) (limited to 'arch') diff --git a/arch/arm64/kernel/probes/kprobes.c b/arch/arm64/kernel/probes/kprobes.c index 9c70e8812ea9..c89811d1e294 100644 --- a/arch/arm64/kernel/probes/kprobes.c +++ b/arch/arm64/kernel/probes/kprobes.c @@ -254,6 +254,8 @@ static void __kprobes setup_singlestep(struct kprobe *p, if (kcb->kprobe_status == KPROBE_REENTER) spsr_set_debug_flag(regs, 0); + else + WARN_ON(regs->pstate & PSR_D_BIT); /* IRQs and single stepping do not mix well. */ kprobes_save_local_irqflag(kcb, regs); -- cgit v1.2.3-59-g8ed1b From ab4c1325d4bf111a590a1f773e3d93bde7f40201 Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Wed, 20 Jul 2016 17:46:58 +0100 Subject: arm64: kprobes: Fix overflow when saving stack The MIN_STACK_SIZE macro tries evaluate how much stack space needs to be saved in the jprobes_stack array, sized at 128 bytes. When using the IRQ stack, said macro can happily return up to IRQ_STACK_SIZE, which is 16kB. Mayhem follows. This patch fixes things by getting rid of the crazy macro and limiting the copy to be at most the size of the jprobes_stack array, no matter which stack we're on. Signed-off-by: Marc Zyngier Signed-off-by: Catalin Marinas --- arch/arm64/kernel/probes/kprobes.c | 22 ++++++++++++++-------- 1 file changed, 14 insertions(+), 8 deletions(-) (limited to 'arch') diff --git a/arch/arm64/kernel/probes/kprobes.c b/arch/arm64/kernel/probes/kprobes.c index c89811d1e294..09f8ae98da5a 100644 --- a/arch/arm64/kernel/probes/kprobes.c +++ b/arch/arm64/kernel/probes/kprobes.c @@ -34,12 +34,6 @@ #include "decode-insn.h" -#define MIN_STACK_SIZE(addr) (on_irq_stack(addr, raw_smp_processor_id()) ? \ - min((unsigned long)IRQ_STACK_SIZE, \ - IRQ_STACK_PTR(raw_smp_processor_id()) - (addr)) : \ - min((unsigned long)MAX_STACK_SIZE, \ - (unsigned long)current_thread_info() + THREAD_START_SP - (addr))) - void jprobe_return_break(void); DEFINE_PER_CPU(struct kprobe *, current_kprobe) = NULL; @@ -48,6 +42,18 @@ DEFINE_PER_CPU(struct kprobe_ctlblk, kprobe_ctlblk); static void __kprobes post_kprobe_handler(struct kprobe_ctlblk *, struct pt_regs *); +static inline unsigned long min_stack_size(unsigned long addr) +{ + unsigned long size; + + if (on_irq_stack(addr, raw_smp_processor_id())) + size = IRQ_STACK_PTR(raw_smp_processor_id()) - addr; + else + size = (unsigned long)current_thread_info() + THREAD_START_SP - addr; + + return min(size, FIELD_SIZEOF(struct kprobe_ctlblk, jprobes_stack)); +} + static void __kprobes arch_prepare_ss_slot(struct kprobe *p) { /* prepare insn slot */ @@ -495,7 +501,7 @@ int __kprobes setjmp_pre_handler(struct kprobe *p, struct pt_regs *regs) * the argument area. */ memcpy(kcb->jprobes_stack, (void *)stack_ptr, - MIN_STACK_SIZE(stack_ptr)); + min_stack_size(stack_ptr)); instruction_pointer_set(regs, (unsigned long) jp->entry); preempt_disable(); @@ -548,7 +554,7 @@ int __kprobes longjmp_break_handler(struct kprobe *p, struct pt_regs *regs) unpause_graph_tracing(); *regs = kcb->jprobe_saved_regs; memcpy((void *)stack_addr, kcb->jprobes_stack, - MIN_STACK_SIZE(stack_addr)); + min_stack_size(stack_addr)); preempt_enable_no_resched(); return 1; } -- cgit v1.2.3-59-g8ed1b From 3b7d14e9f3f1efd4c4348800e977fd1ce4ca660e Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Thu, 21 Jul 2016 09:44:17 +0100 Subject: arm64: kprobes: Cleanup jprobe_return jprobe_return seems to have aged badly. Comments referring to non-existent behaviours, and a dangerous habit of messing with registers without telling the compiler. This patches applies the following remedies: - Fix the comments to describe the actual behaviour - Tidy up the asm sequence to directly assign the stack pointer without clobbering extra registers - Mark the rest of the function as unreachable() so that the compiler knows that there is no need for an epilogue - Stop making jprobe_return_break a global function (you really don't want to call that guy, and it isn't even a function). Tested with tcp_probe. Signed-off-by: Marc Zyngier Signed-off-by: Catalin Marinas --- arch/arm64/kernel/probes/kprobes.c | 22 ++++++++++------------ 1 file changed, 10 insertions(+), 12 deletions(-) (limited to 'arch') diff --git a/arch/arm64/kernel/probes/kprobes.c b/arch/arm64/kernel/probes/kprobes.c index 09f8ae98da5a..973c15df5211 100644 --- a/arch/arm64/kernel/probes/kprobes.c +++ b/arch/arm64/kernel/probes/kprobes.c @@ -34,8 +34,6 @@ #include "decode-insn.h" -void jprobe_return_break(void); - DEFINE_PER_CPU(struct kprobe *, current_kprobe) = NULL; DEFINE_PER_CPU(struct kprobe_ctlblk, kprobe_ctlblk); @@ -516,18 +514,17 @@ void __kprobes jprobe_return(void) /* * Jprobe handler return by entering break exception, * encoded same as kprobe, but with following conditions - * -a magic number in x0 to identify from rest of other kprobes. + * -a special PC to identify it from the other kprobes. * -restore stack addr to original saved pt_regs */ - asm volatile ("ldr x0, [%0]\n\t" - "mov sp, x0\n\t" - ".globl jprobe_return_break\n\t" - "jprobe_return_break:\n\t" - "brk %1\n\t" - : - : "r"(&kcb->jprobe_saved_regs.sp), - "I"(BRK64_ESR_KPROBES) - : "memory"); + asm volatile(" mov sp, %0 \n" + "jprobe_return_break: brk %1 \n" + : + : "r" (kcb->jprobe_saved_regs.sp), + "I" (BRK64_ESR_KPROBES) + : "memory"); + + unreachable(); } int __kprobes longjmp_break_handler(struct kprobe *p, struct pt_regs *regs) @@ -536,6 +533,7 @@ int __kprobes longjmp_break_handler(struct kprobe *p, struct pt_regs *regs) long stack_addr = kcb->jprobe_saved_regs.sp; long orig_sp = kernel_stack_pointer(regs); struct jprobe *jp = container_of(p, struct jprobe, kp); + extern const char jprobe_return_break[]; if (instruction_pointer(regs) != (u64) jprobe_return_break) return 0; -- cgit v1.2.3-59-g8ed1b From f7e35c5ba4322838ce84b23a2f1a6d6b7f0b57ec Mon Sep 17 00:00:00 2001 From: Catalin Marinas Date: Thu, 21 Jul 2016 10:54:54 +0100 Subject: arm64: kprobes: Add KASAN instrumentation around stack accesses This patch disables KASAN around the memcpy from/to the kernel or IRQ stacks to avoid warnings like below: BUG: KASAN: stack-out-of-bounds in setjmp_pre_handler+0xe4/0x170 at addr ffff800935cbbbc0 Read of size 128 by task swapper/0/1 page:ffff7e0024d72ec0 count:0 mapcount:0 mapping: (null) index:0x0 flags: 0x1000000000000000() page dumped because: kasan: bad access detected CPU: 4 PID: 1 Comm: swapper/0 Not tainted 4.7.0-rc4+ #1 Hardware name: ARM Juno development board (r0) (DT) Call trace: [] dump_backtrace+0x0/0x280 [] show_stack+0x14/0x20 [] dump_stack+0xa4/0xc8 [] kasan_report_error+0x4fc/0x528 [] kasan_report+0x40/0x48 [] check_memory_region+0x144/0x1a0 [] memcpy+0x34/0x68 [] setjmp_pre_handler+0xe4/0x170 [] kprobe_breakpoint_handler+0xec/0x1d8 [] brk_handler+0x5c/0xa0 [] do_debug_exception+0xa0/0x138 Signed-off-by: Catalin Marinas --- arch/arm64/kernel/probes/kprobes.c | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'arch') diff --git a/arch/arm64/kernel/probes/kprobes.c b/arch/arm64/kernel/probes/kprobes.c index 973c15df5211..bf9768588288 100644 --- a/arch/arm64/kernel/probes/kprobes.c +++ b/arch/arm64/kernel/probes/kprobes.c @@ -16,6 +16,7 @@ * General Public License for more details. * */ +#include #include #include #include @@ -498,8 +499,10 @@ int __kprobes setjmp_pre_handler(struct kprobe *p, struct pt_regs *regs) * we also save and restore enough stack bytes to cover * the argument area. */ + kasan_disable_current(); memcpy(kcb->jprobes_stack, (void *)stack_ptr, min_stack_size(stack_ptr)); + kasan_enable_current(); instruction_pointer_set(regs, (unsigned long) jp->entry); preempt_disable(); @@ -551,8 +554,10 @@ int __kprobes longjmp_break_handler(struct kprobe *p, struct pt_regs *regs) } unpause_graph_tracing(); *regs = kcb->jprobe_saved_regs; + kasan_disable_current(); memcpy((void *)stack_addr, kcb->jprobes_stack, min_stack_size(stack_addr)); + kasan_enable_current(); preempt_enable_no_resched(); return 1; } -- cgit v1.2.3-59-g8ed1b From 9113c2aa05e9848cd4f1154abee17d4f265f012d Mon Sep 17 00:00:00 2001 From: Suzuki K Poulose Date: Thu, 21 Jul 2016 11:12:55 +0100 Subject: arm64: Fix incorrect per-cpu usage for boot CPU In smp_prepare_boot_cpu(), we invoke cpuinfo_store_boot_cpu to store the cpuinfo in a per-cpu ptr, before initialising the per-cpu offset for the boot CPU. This patch reorders the sequence to make sure we initialise the per-cpu offset before accessing the per-cpu area. Commit 4b998ff1885eec ("arm64: Delay cpuinfo_store_boot_cpu") fixed the issue where we modified the per-cpu area even before the kernel initialises the per-cpu areas, but failed to wait until the boot cpu updated it's offset. Fixes: 4b998ff1885e ("arm64: Delay cpuinfo_store_boot_cpu") Cc: # 4.4+ Cc: Will Deacon Signed-off-by: Suzuki K Poulose Acked-by: Mark Rutland Signed-off-by: Catalin Marinas --- arch/arm64/kernel/smp.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch') diff --git a/arch/arm64/kernel/smp.c b/arch/arm64/kernel/smp.c index c0a772560ab7..298dd745651e 100644 --- a/arch/arm64/kernel/smp.c +++ b/arch/arm64/kernel/smp.c @@ -436,9 +436,9 @@ void __init smp_cpus_done(unsigned int max_cpus) void __init smp_prepare_boot_cpu(void) { + set_my_cpu_offset(per_cpu_offset(smp_processor_id())); cpuinfo_store_boot_cpu(); save_boot_cpu_run_el(); - set_my_cpu_offset(per_cpu_offset(smp_processor_id())); } static u64 __init of_get_cpu_mpidr(struct device_node *dn) -- cgit v1.2.3-59-g8ed1b From e75118a7b581b19b08282c7819c1ec6f68b91b79 Mon Sep 17 00:00:00 2001 From: Suzuki K Poulose Date: Thu, 21 Jul 2016 11:15:27 +0100 Subject: arm64: Honor nosmp kernel command line option Passing "nosmp" should boot the kernel with a single processor, without provision to enable secondary CPUs even if they are present. "nosmp" is implemented by setting maxcpus=0. At the moment we still mark the secondary CPUs present even with nosmp, which allows the userspace to bring them up. This patch corrects the smp_prepare_cpus() to honor the maxcpus == 0. Commit 44dbcc93ab67145 ("arm64: Fix behavior of maxcpus=N") fixed the behavior for maxcpus >= 1, but broke maxcpus = 0. Fixes: 44dbcc93ab67 ("arm64: Fix behavior of maxcpus=N") Cc: # 4.7+ Cc: Will Deacon Cc: James Morse Signed-off-by: Suzuki K Poulose Acked-by: Mark Rutland [catalin.marinas@arm.com: updated code comment] Signed-off-by: Catalin Marinas --- arch/arm64/kernel/smp.c | 7 +++++++ 1 file changed, 7 insertions(+) (limited to 'arch') diff --git a/arch/arm64/kernel/smp.c b/arch/arm64/kernel/smp.c index 298dd745651e..490db85dec23 100644 --- a/arch/arm64/kernel/smp.c +++ b/arch/arm64/kernel/smp.c @@ -692,6 +692,13 @@ void __init smp_prepare_cpus(unsigned int max_cpus) smp_store_cpu_info(smp_processor_id()); + /* + * If UP is mandated by "nosmp" (which implies "maxcpus=0"), don't set + * secondary CPUs present. + */ + if (max_cpus == 0) + return; + /* * Initialise the present map (which describes the set of CPUs * actually populated at the present time) and release the -- cgit v1.2.3-59-g8ed1b From 258a1605c78878ada00417de4840c4c427673ffa Mon Sep 17 00:00:00 2001 From: Ard Biesheuvel Date: Fri, 22 Jul 2016 19:32:24 +0200 Subject: arm64: mm: make create_mapping_late() non-allocating The only purpose served by create_mapping_late() is to remap the already mapped .text and .rodata kernel segments with read-only permissions. Since we no longer allow block mappings to be split or merged, create_mapping_late() should not pass an allocation function pointer into __create_pgd_mapping(). So pass NULL instead. Signed-off-by: Ard Biesheuvel Reviewed-by: Laura Abbott Tested-by: Sudeep Holla Acked-by: Mark Rutland Signed-off-by: Catalin Marinas --- arch/arm64/mm/mmu.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch') diff --git a/arch/arm64/mm/mmu.c b/arch/arm64/mm/mmu.c index a96a2413fa18..33f36cede02d 100644 --- a/arch/arm64/mm/mmu.c +++ b/arch/arm64/mm/mmu.c @@ -312,7 +312,7 @@ static void create_mapping_late(phys_addr_t phys, unsigned long virt, } __create_pgd_mapping(init_mm.pgd, phys, virt, size, prot, - late_pgtable_alloc, !debug_pagealloc_enabled()); + NULL, !debug_pagealloc_enabled()); } static void __init __map_memblock(pgd_t *pgd, phys_addr_t start, phys_addr_t end) -- cgit v1.2.3-59-g8ed1b From 1378dc3d4ba07ccd295b04ef59e943162531ad25 Mon Sep 17 00:00:00 2001 From: Ard Biesheuvel Date: Fri, 22 Jul 2016 19:32:25 +0200 Subject: arm64: mm: run pgtable_page_ctor() on non-swapper translation table pages The kernel page table creation routines are accessible to other subsystems (e.g., EFI) via the create_pgd_mapping() entry point, which allows mappings to be created that are not covered by init_mm. Since generic code such as apply_to_page_range() may expect translation table pages that are not associated with init_mm to be covered by fully constructed struct pages, add a call to pgtable_page_ctor() in the alloc function used by create_pgd_mapping. Since it is no longer used by create_mapping_late(), also update the name of this function to better reflect its purpose. Signed-off-by: Ard Biesheuvel Reviewed-by: Laura Abbott Tested-by: Sudeep Holla Signed-off-by: Catalin Marinas --- arch/arm64/mm/mmu.c | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) (limited to 'arch') diff --git a/arch/arm64/mm/mmu.c b/arch/arm64/mm/mmu.c index 33f36cede02d..51a558195bb9 100644 --- a/arch/arm64/mm/mmu.c +++ b/arch/arm64/mm/mmu.c @@ -268,10 +268,11 @@ static void __create_pgd_mapping(pgd_t *pgdir, phys_addr_t phys, } while (pgd++, addr = next, addr != end); } -static phys_addr_t late_pgtable_alloc(void) +static phys_addr_t pgd_pgtable_alloc(void) { void *ptr = (void *)__get_free_page(PGALLOC_GFP); - BUG_ON(!ptr); + if (!ptr || !pgtable_page_ctor(virt_to_page(ptr))) + BUG(); /* Ensure the zeroed page is visible to the page table walker */ dsb(ishst); @@ -298,8 +299,10 @@ void __init create_pgd_mapping(struct mm_struct *mm, phys_addr_t phys, unsigned long virt, phys_addr_t size, pgprot_t prot, bool allow_block_mappings) { + BUG_ON(mm == &init_mm); + __create_pgd_mapping(mm->pgd, phys, virt, size, prot, - late_pgtable_alloc, allow_block_mappings); + pgd_pgtable_alloc, allow_block_mappings); } static void create_mapping_late(phys_addr_t phys, unsigned long virt, -- cgit v1.2.3-59-g8ed1b From b9c220b589daaf140f5b8ebe502c98745b94e65c Mon Sep 17 00:00:00 2001 From: Catalin Marinas Date: Tue, 26 Jul 2016 10:16:55 -0700 Subject: arm64: Only select ARM64_MODULE_PLTS if MODULES=y MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Selecting CONFIG_RANDOMIZE_BASE=y and CONFIG_MODULES=n fails to build the module PLTs support: CC arch/arm64/kernel/module-plts.o /work/Linux/linux-2.6-aarch64/arch/arm64/kernel/module-plts.c: In function ‘module_emit_plt_entry’: /work/Linux/linux-2.6-aarch64/arch/arm64/kernel/module-plts.c:32:49: error: dereferencing pointer to incomplete type ‘struct module’ This patch selects ARM64_MODULE_PLTS conditionally only if MODULES is enabled. Fixes: f80fb3a3d508 ("arm64: add support for kernel ASLR") Cc: # 4.6+ Reported-by: Jeff Vander Stoep Acked-by: Ard Biesheuvel Acked-by: Mark Rutland Signed-off-by: Catalin Marinas --- arch/arm64/Kconfig | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch') diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig index ac4746f454ed..effc9f33ade0 100644 --- a/arch/arm64/Kconfig +++ b/arch/arm64/Kconfig @@ -886,7 +886,7 @@ config RELOCATABLE config RANDOMIZE_BASE bool "Randomize the address of the kernel image" - select ARM64_MODULE_PLTS + select ARM64_MODULE_PLTS if MODULES select RELOCATABLE help Randomizes the virtual address at which the kernel image is -- cgit v1.2.3-59-g8ed1b From fd6380b75065fd2ff51b5f7cbbe6be77d71ea9c7 Mon Sep 17 00:00:00 2001 From: Catalin Marinas Date: Wed, 27 Jul 2016 08:11:10 +0100 Subject: arm64: arm: Fix-up the removal of the arm64 regs_query_register_name() prototype Commit 0a8ea52c3eb1 ("arm64: Add HAVE_REGS_AND_STACK_ACCESS_API feature") inadvertently removed the arch/arm prototype instead of the arm64 one introduced by the original patch. There should not be any bisection issues since this function is not called from anywhere else (it could as well be removed from arch/arm at some point). Fixes: 0a8ea52c3eb1 ("arm64: Add HAVE_REGS_AND_STACK_ACCESS_API feature") Signed-off-by: Catalin Marinas --- arch/arm/include/asm/ptrace.h | 1 + arch/arm64/include/asm/ptrace.h | 1 - 2 files changed, 1 insertion(+), 1 deletion(-) (limited to 'arch') diff --git a/arch/arm/include/asm/ptrace.h b/arch/arm/include/asm/ptrace.h index d3c0c23703b6..51622ba7c4a6 100644 --- a/arch/arm/include/asm/ptrace.h +++ b/arch/arm/include/asm/ptrace.h @@ -121,6 +121,7 @@ extern unsigned long profile_pc(struct pt_regs *regs); #define MAX_REG_OFFSET (offsetof(struct pt_regs, ARM_ORIG_r0)) extern int regs_query_register_offset(const char *name); +extern const char *regs_query_register_name(unsigned int offset); extern bool regs_within_kernel_stack(struct pt_regs *regs, unsigned long addr); extern unsigned long regs_get_kernel_stack_nth(struct pt_regs *regs, unsigned int n); diff --git a/arch/arm64/include/asm/ptrace.h b/arch/arm64/include/asm/ptrace.h index 3fd15fdf4181..8fbac706b906 100644 --- a/arch/arm64/include/asm/ptrace.h +++ b/arch/arm64/include/asm/ptrace.h @@ -153,7 +153,6 @@ struct pt_regs { (!compat_user_mode(regs) ? ((regs)->sp = value) : ((regs)->compat_sp = value)) extern int regs_query_register_offset(const char *name); -extern const char *regs_query_register_name(unsigned int offset); extern unsigned long regs_get_kernel_stack_nth(struct pt_regs *regs, unsigned int n); -- cgit v1.2.3-59-g8ed1b