From 86d0dd34eafffbc76a81aba6ae2d71927d3835a8 Mon Sep 17 00:00:00 2001 From: Ard Biesheuvel Date: Mon, 27 Aug 2018 13:02:43 +0200 Subject: arm64: cpufeature: add feature for CRC32 instructions Add a CRC32 feature bit and wire it up to the CPU id register so we will be able to use alternatives patching for CRC32 operations. Acked-by: Will Deacon Signed-off-by: Ard Biesheuvel Signed-off-by: Catalin Marinas --- arch/arm64/include/asm/cpucaps.h | 3 ++- arch/arm64/kernel/cpufeature.c | 9 +++++++++ 2 files changed, 11 insertions(+), 1 deletion(-) (limited to 'arch') diff --git a/arch/arm64/include/asm/cpucaps.h b/arch/arm64/include/asm/cpucaps.h index ae1f70450fb2..9932aca9704b 100644 --- a/arch/arm64/include/asm/cpucaps.h +++ b/arch/arm64/include/asm/cpucaps.h @@ -51,7 +51,8 @@ #define ARM64_SSBD 30 #define ARM64_MISMATCHED_CACHE_TYPE 31 #define ARM64_HAS_STAGE2_FWB 32 +#define ARM64_HAS_CRC32 33 -#define ARM64_NCAPS 33 +#define ARM64_NCAPS 34 #endif /* __ASM_CPUCAPS_H */ diff --git a/arch/arm64/kernel/cpufeature.c b/arch/arm64/kernel/cpufeature.c index e238b7932096..7626b80128f5 100644 --- a/arch/arm64/kernel/cpufeature.c +++ b/arch/arm64/kernel/cpufeature.c @@ -1222,6 +1222,15 @@ static const struct arm64_cpu_capabilities arm64_features[] = { .cpu_enable = cpu_enable_hw_dbm, }, #endif + { + .desc = "CRC32 instructions", + .capability = ARM64_HAS_CRC32, + .type = ARM64_CPUCAP_SYSTEM_FEATURE, + .matches = has_cpuid_feature, + .sys_reg = SYS_ID_AA64ISAR0_EL1, + .field_pos = ID_AA64ISAR0_CRC32_SHIFT, + .min_field_value = 1, + }, {}, }; -- cgit v1.2.3-59-g8ed1b From 7481cddf29ede204b475facc40e6f65459939881 Mon Sep 17 00:00:00 2001 From: Ard Biesheuvel Date: Mon, 27 Aug 2018 13:02:44 +0200 Subject: arm64/lib: add accelerated crc32 routines Unlike crc32c(), which is wired up to the crypto API internally so the optimal driver is selected based on the platform's capabilities, crc32_le() is implemented as a library function using a slice-by-8 table based C implementation. Even though few of the call sites may be bottlenecks, calling a time variant implementation with a non-negligible D-cache footprint is a bit of a waste, given that ARMv8.1 and up mandates support for the CRC32 instructions that were optional in ARMv8.0, but are already widely available, even on the Cortex-A53 based Raspberry Pi. So implement routines that use these instructions if available, and fall back to the existing generic routines otherwise. The selection is based on alternatives patching. Note that this unconditionally selects CONFIG_CRC32 as a builtin. Since CRC32 is relied upon by core functionality such as CONFIG_OF_FLATTREE, this just codifies the status quo. Signed-off-by: Ard Biesheuvel Signed-off-by: Catalin Marinas --- arch/arm64/Kconfig | 1 + arch/arm64/lib/Makefile | 2 ++ arch/arm64/lib/crc32.S | 60 +++++++++++++++++++++++++++++++++++++++++++++++++ 3 files changed, 63 insertions(+) create mode 100644 arch/arm64/lib/crc32.S (limited to 'arch') diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig index 1b1a0e95c751..b4c1f1f55aec 100644 --- a/arch/arm64/Kconfig +++ b/arch/arm64/Kconfig @@ -75,6 +75,7 @@ config ARM64 select CLONE_BACKWARDS select COMMON_CLK select CPU_PM if (SUSPEND || CPU_IDLE) + select CRC32 select DCACHE_WORD_ACCESS select DMA_DIRECT_OPS select EDAC_SUPPORT diff --git a/arch/arm64/lib/Makefile b/arch/arm64/lib/Makefile index 68755fd70dcf..f28f91fd96a2 100644 --- a/arch/arm64/lib/Makefile +++ b/arch/arm64/lib/Makefile @@ -25,3 +25,5 @@ KCOV_INSTRUMENT_atomic_ll_sc.o := n UBSAN_SANITIZE_atomic_ll_sc.o := n lib-$(CONFIG_ARCH_HAS_UACCESS_FLUSHCACHE) += uaccess_flushcache.o + +obj-$(CONFIG_CRC32) += crc32.o diff --git a/arch/arm64/lib/crc32.S b/arch/arm64/lib/crc32.S new file mode 100644 index 000000000000..5bc1e85b4e1c --- /dev/null +++ b/arch/arm64/lib/crc32.S @@ -0,0 +1,60 @@ +/* + * Accelerated CRC32(C) using AArch64 CRC instructions + * + * Copyright (C) 2016 - 2018 Linaro Ltd + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + +#include +#include +#include + + .cpu generic+crc + + .macro __crc32, c +0: subs x2, x2, #16 + b.mi 8f + ldp x3, x4, [x1], #16 +CPU_BE( rev x3, x3 ) +CPU_BE( rev x4, x4 ) + crc32\c\()x w0, w0, x3 + crc32\c\()x w0, w0, x4 + b.ne 0b + ret + +8: tbz x2, #3, 4f + ldr x3, [x1], #8 +CPU_BE( rev x3, x3 ) + crc32\c\()x w0, w0, x3 +4: tbz x2, #2, 2f + ldr w3, [x1], #4 +CPU_BE( rev w3, w3 ) + crc32\c\()w w0, w0, w3 +2: tbz x2, #1, 1f + ldrh w3, [x1], #2 +CPU_BE( rev16 w3, w3 ) + crc32\c\()h w0, w0, w3 +1: tbz x2, #0, 0f + ldrb w3, [x1] + crc32\c\()b w0, w0, w3 +0: ret + .endm + + .align 5 +ENTRY(crc32_le) +alternative_if_not ARM64_HAS_CRC32 + b crc32_le_base +alternative_else_nop_endif + __crc32 +ENDPROC(crc32_le) + + .align 5 +ENTRY(__crc32c_le) +alternative_if_not ARM64_HAS_CRC32 + b __crc32c_le_base +alternative_else_nop_endif + __crc32 c +ENDPROC(__crc32c_le) -- cgit v1.2.3-59-g8ed1b From 4733c7c79e8c466fedb4fd4eb29bfc5b1bdb336f Mon Sep 17 00:00:00 2001 From: Will Deacon Date: Wed, 5 Sep 2018 15:12:27 +0100 Subject: arm64: dump: Use consistent capitalisation for page-table dumps Being consistent in our capitalisation for page-table dumps helps when grepping for things like "end". Signed-off-by: Will Deacon Signed-off-by: Catalin Marinas --- arch/arm64/mm/dump.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'arch') diff --git a/arch/arm64/mm/dump.c b/arch/arm64/mm/dump.c index 65dfc8571bf8..fcb1f2a6d7c6 100644 --- a/arch/arm64/mm/dump.c +++ b/arch/arm64/mm/dump.c @@ -36,8 +36,8 @@ static const struct addr_marker address_markers[] = { #endif { MODULES_VADDR, "Modules start" }, { MODULES_END, "Modules end" }, - { VMALLOC_START, "vmalloc() Area" }, - { VMALLOC_END, "vmalloc() End" }, + { VMALLOC_START, "vmalloc() area" }, + { VMALLOC_END, "vmalloc() end" }, { FIXADDR_START, "Fixmap start" }, { FIXADDR_TOP, "Fixmap end" }, { PCI_IO_START, "PCI I/O start" }, @@ -46,7 +46,7 @@ static const struct addr_marker address_markers[] = { { VMEMMAP_START, "vmemmap start" }, { VMEMMAP_START + VMEMMAP_SIZE, "vmemmap end" }, #endif - { PAGE_OFFSET, "Linear Mapping" }, + { PAGE_OFFSET, "Linear mapping" }, { -1, NULL }, }; -- cgit v1.2.3-59-g8ed1b From a1f33941f7e103bcf471eaf8461b212223c642d6 Mon Sep 17 00:00:00 2001 From: Julien Thierry Date: Thu, 6 Sep 2018 12:09:56 +0100 Subject: arm64: uaccess: implement unsafe accessors Current implementation of get/put_user_unsafe default to get/put_user which toggle PAN before each access, despite having been told by the caller that multiple accesses to user memory were about to happen. Provide implementations for user_access_begin/end to turn PAN off/on and implement unsafe accessors that assume PAN was already turned off. Tested-by: Will Deacon Signed-off-by: Julien Thierry Signed-off-by: Catalin Marinas --- arch/arm64/include/asm/uaccess.h | 61 ++++++++++++++++++++++++++++++---------- 1 file changed, 46 insertions(+), 15 deletions(-) (limited to 'arch') diff --git a/arch/arm64/include/asm/uaccess.h b/arch/arm64/include/asm/uaccess.h index e66b0fca99c2..c4528dd90875 100644 --- a/arch/arm64/include/asm/uaccess.h +++ b/arch/arm64/include/asm/uaccess.h @@ -277,11 +277,9 @@ static inline void __user *__uaccess_mask_ptr(const void __user *ptr) : "+r" (err), "=&r" (x) \ : "r" (addr), "i" (-EFAULT)) -#define __get_user_err(x, ptr, err) \ +#define __get_user_err_unsafe(x, ptr, err) \ do { \ unsigned long __gu_val; \ - __chk_user_ptr(ptr); \ - uaccess_enable_not_uao(); \ switch (sizeof(*(ptr))) { \ case 1: \ __get_user_asm("ldrb", "ldtrb", "%w", __gu_val, (ptr), \ @@ -302,17 +300,24 @@ do { \ default: \ BUILD_BUG(); \ } \ - uaccess_disable_not_uao(); \ (x) = (__force __typeof__(*(ptr)))__gu_val; \ } while (0) -#define __get_user_check(x, ptr, err) \ +#define __get_user_err_check(x, ptr, err) \ +do { \ + __chk_user_ptr(ptr); \ + uaccess_enable_not_uao(); \ + __get_user_err_unsafe((x), (ptr), (err)); \ + uaccess_disable_not_uao(); \ +} while (0) + +#define __get_user_err(x, ptr, err, accessor) \ ({ \ __typeof__(*(ptr)) __user *__p = (ptr); \ might_fault(); \ if (access_ok(VERIFY_READ, __p, sizeof(*__p))) { \ __p = uaccess_mask_ptr(__p); \ - __get_user_err((x), __p, (err)); \ + accessor((x), __p, (err)); \ } else { \ (x) = 0; (err) = -EFAULT; \ } \ @@ -320,14 +325,14 @@ do { \ #define __get_user_error(x, ptr, err) \ ({ \ - __get_user_check((x), (ptr), (err)); \ + __get_user_err((x), (ptr), (err), __get_user_err_check); \ (void)0; \ }) #define __get_user(x, ptr) \ ({ \ int __gu_err = 0; \ - __get_user_check((x), (ptr), __gu_err); \ + __get_user_err((x), (ptr), __gu_err, __get_user_err_check); \ __gu_err; \ }) @@ -347,11 +352,9 @@ do { \ : "+r" (err) \ : "r" (x), "r" (addr), "i" (-EFAULT)) -#define __put_user_err(x, ptr, err) \ +#define __put_user_err_unsafe(x, ptr, err) \ do { \ __typeof__(*(ptr)) __pu_val = (x); \ - __chk_user_ptr(ptr); \ - uaccess_enable_not_uao(); \ switch (sizeof(*(ptr))) { \ case 1: \ __put_user_asm("strb", "sttrb", "%w", __pu_val, (ptr), \ @@ -372,16 +375,24 @@ do { \ default: \ BUILD_BUG(); \ } \ +} while (0) + + +#define __put_user_err_check(x, ptr, err) \ +do { \ + __chk_user_ptr(ptr); \ + uaccess_enable_not_uao(); \ + __put_user_err_unsafe((x), (ptr), (err)); \ uaccess_disable_not_uao(); \ } while (0) -#define __put_user_check(x, ptr, err) \ +#define __put_user_err(x, ptr, err, accessor) \ ({ \ __typeof__(*(ptr)) __user *__p = (ptr); \ might_fault(); \ if (access_ok(VERIFY_WRITE, __p, sizeof(*__p))) { \ __p = uaccess_mask_ptr(__p); \ - __put_user_err((x), __p, (err)); \ + accessor((x), __p, (err)); \ } else { \ (err) = -EFAULT; \ } \ @@ -389,19 +400,39 @@ do { \ #define __put_user_error(x, ptr, err) \ ({ \ - __put_user_check((x), (ptr), (err)); \ + __put_user_err((x), (ptr), (err), __put_user_err_check); \ (void)0; \ }) #define __put_user(x, ptr) \ ({ \ int __pu_err = 0; \ - __put_user_check((x), (ptr), __pu_err); \ + __put_user_err((x), (ptr), __pu_err, __put_user_err_check); \ __pu_err; \ }) #define put_user __put_user + +#define user_access_begin() uaccess_enable_not_uao() +#define user_access_end() uaccess_disable_not_uao() + +#define unsafe_get_user(x, ptr, err) \ +do { \ + int __gu_err = 0; \ + __get_user_err((x), (ptr), __gu_err, __get_user_err_unsafe); \ + if (__gu_err != 0) \ + goto err; \ +} while (0) + +#define unsafe_put_user(x, ptr, err) \ +do { \ + int __pu_err = 0; \ + __put_user_err((x), (ptr), __pu_err, __put_user_err_unsafe); \ + if (__pu_err != 0) \ + goto err; \ +} while (0) + extern unsigned long __must_check __arch_copy_from_user(void *to, const void __user *from, unsigned long n); #define raw_copy_from_user(to, from, n) \ ({ \ -- cgit v1.2.3-59-g8ed1b From 6899a4c82faf9b41bbddf330651a4d1155f8b64e Mon Sep 17 00:00:00 2001 From: Will Deacon Date: Wed, 22 Aug 2018 21:23:05 +0100 Subject: arm64: tlb: Use last-level invalidation in flush_tlb_kernel_range() flush_tlb_kernel_range() is only ever used to invalidate last-level entries, so we can restrict the scope of the TLB invalidation instruction. Acked-by: Peter Zijlstra (Intel) Signed-off-by: Will Deacon Signed-off-by: Catalin Marinas --- arch/arm64/include/asm/tlbflush.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch') diff --git a/arch/arm64/include/asm/tlbflush.h b/arch/arm64/include/asm/tlbflush.h index a4a1901140ee..7e2a35424ca4 100644 --- a/arch/arm64/include/asm/tlbflush.h +++ b/arch/arm64/include/asm/tlbflush.h @@ -199,7 +199,7 @@ static inline void flush_tlb_kernel_range(unsigned long start, unsigned long end dsb(ishst); for (addr = start; addr < end; addr += 1 << (PAGE_SHIFT - 12)) - __tlbi(vaae1is, addr); + __tlbi(vaale1is, addr); dsb(ish); isb(); } -- cgit v1.2.3-59-g8ed1b From 45a284bc5ee3d629b6da1498c2273cb22361416e Mon Sep 17 00:00:00 2001 From: Will Deacon Date: Wed, 22 Aug 2018 21:40:30 +0100 Subject: arm64: tlb: Add DSB ISHST prior to TLBI in __flush_tlb_[kernel_]pgtable() __flush_tlb_[kernel_]pgtable() rely on set_pXd() having a DSB after writing the new table entry and therefore avoid the barrier prior to the TLBI instruction. In preparation for delaying our walk-cache invalidation on the unmap() path, move the DSB into the TLB invalidation routines. Acked-by: Peter Zijlstra (Intel) Signed-off-by: Will Deacon Signed-off-by: Catalin Marinas --- arch/arm64/include/asm/tlbflush.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'arch') diff --git a/arch/arm64/include/asm/tlbflush.h b/arch/arm64/include/asm/tlbflush.h index 7e2a35424ca4..e257f8655b84 100644 --- a/arch/arm64/include/asm/tlbflush.h +++ b/arch/arm64/include/asm/tlbflush.h @@ -213,6 +213,7 @@ static inline void __flush_tlb_pgtable(struct mm_struct *mm, { unsigned long addr = __TLBI_VADDR(uaddr, ASID(mm)); + dsb(ishst); __tlbi(vae1is, addr); __tlbi_user(vae1is, addr); dsb(ish); @@ -222,6 +223,7 @@ static inline void __flush_tlb_kernel_pgtable(unsigned long kaddr) { unsigned long addr = __TLBI_VADDR(kaddr, 0); + dsb(ishst); __tlbi(vaae1is, addr); dsb(ish); } -- cgit v1.2.3-59-g8ed1b From 0795edaf3f1ff1ea58048515211280db004bbd68 Mon Sep 17 00:00:00 2001 From: Will Deacon Date: Wed, 22 Aug 2018 21:36:31 +0100 Subject: arm64: pgtable: Implement p[mu]d_valid() and check in set_p[mu]d() Now that our walk-cache invalidation routines imply a DSB before the invalidation, we no longer need one when we are clearing an entry during unmap. Acked-by: Peter Zijlstra (Intel) Signed-off-by: Will Deacon Signed-off-by: Catalin Marinas --- arch/arm64/include/asm/pgtable.h | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) (limited to 'arch') diff --git a/arch/arm64/include/asm/pgtable.h b/arch/arm64/include/asm/pgtable.h index 1bdeca8918a6..2ab2031b778c 100644 --- a/arch/arm64/include/asm/pgtable.h +++ b/arch/arm64/include/asm/pgtable.h @@ -360,6 +360,7 @@ static inline int pmd_protnone(pmd_t pmd) #define pmd_present(pmd) pte_present(pmd_pte(pmd)) #define pmd_dirty(pmd) pte_dirty(pmd_pte(pmd)) #define pmd_young(pmd) pte_young(pmd_pte(pmd)) +#define pmd_valid(pmd) pte_valid(pmd_pte(pmd)) #define pmd_wrprotect(pmd) pte_pmd(pte_wrprotect(pmd_pte(pmd))) #define pmd_mkold(pmd) pte_pmd(pte_mkold(pmd_pte(pmd))) #define pmd_mkwrite(pmd) pte_pmd(pte_mkwrite(pmd_pte(pmd))) @@ -431,7 +432,9 @@ extern pgprot_t phys_mem_access_prot(struct file *file, unsigned long pfn, static inline void set_pmd(pmd_t *pmdp, pmd_t pmd) { WRITE_ONCE(*pmdp, pmd); - dsb(ishst); + + if (pmd_valid(pmd)) + dsb(ishst); } static inline void pmd_clear(pmd_t *pmdp) @@ -477,11 +480,14 @@ static inline phys_addr_t pmd_page_paddr(pmd_t pmd) #define pud_none(pud) (!pud_val(pud)) #define pud_bad(pud) (!(pud_val(pud) & PUD_TABLE_BIT)) #define pud_present(pud) pte_present(pud_pte(pud)) +#define pud_valid(pud) pte_valid(pud_pte(pud)) static inline void set_pud(pud_t *pudp, pud_t pud) { WRITE_ONCE(*pudp, pud); - dsb(ishst); + + if (pud_valid(pud)) + dsb(ishst); } static inline void pud_clear(pud_t *pudp) -- cgit v1.2.3-59-g8ed1b From d8289d3a5854a2a0ae144bff106a78738fe63050 Mon Sep 17 00:00:00 2001 From: Will Deacon Date: Thu, 23 Aug 2018 19:08:15 +0100 Subject: arm64: tlb: Justify non-leaf invalidation in flush_tlb_range() Add a comment to explain why we can't get away with last-level invalidation in flush_tlb_range() Acked-by: Peter Zijlstra (Intel) Signed-off-by: Will Deacon Signed-off-by: Catalin Marinas --- arch/arm64/include/asm/tlbflush.h | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'arch') diff --git a/arch/arm64/include/asm/tlbflush.h b/arch/arm64/include/asm/tlbflush.h index e257f8655b84..ddbf1718669d 100644 --- a/arch/arm64/include/asm/tlbflush.h +++ b/arch/arm64/include/asm/tlbflush.h @@ -182,6 +182,10 @@ static inline void __flush_tlb_range(struct vm_area_struct *vma, static inline void flush_tlb_range(struct vm_area_struct *vma, unsigned long start, unsigned long end) { + /* + * We cannot use leaf-only invalidation here, since we may be invalidating + * table entries as part of collapsing hugepages or moving page tables. + */ __flush_tlb_range(vma, start, end, false); } -- cgit v1.2.3-59-g8ed1b From 67a902ac598dca056366a7342f401aa6f605072f Mon Sep 17 00:00:00 2001 From: Will Deacon Date: Thu, 23 Aug 2018 19:26:21 +0100 Subject: arm64: tlbflush: Allow stride to be specified for __flush_tlb_range() When we are unmapping intermediate page-table entries or huge pages, we don't need to issue a TLBI instruction for every PAGE_SIZE chunk in the VA range being unmapped. Allow the invalidation stride to be passed to __flush_tlb_range(), and adjust our "just nuke the ASID" heuristic to take this into account. Acked-by: Peter Zijlstra (Intel) Signed-off-by: Will Deacon Signed-off-by: Catalin Marinas --- arch/arm64/include/asm/tlb.h | 2 +- arch/arm64/include/asm/tlbflush.h | 15 +++++++++------ 2 files changed, 10 insertions(+), 7 deletions(-) (limited to 'arch') diff --git a/arch/arm64/include/asm/tlb.h b/arch/arm64/include/asm/tlb.h index a3233167be60..1e1f68ce28f4 100644 --- a/arch/arm64/include/asm/tlb.h +++ b/arch/arm64/include/asm/tlb.h @@ -53,7 +53,7 @@ static inline void tlb_flush(struct mmu_gather *tlb) * the __(pte|pmd|pud)_free_tlb() functions, so last level * TLBI is sufficient here. */ - __flush_tlb_range(&vma, tlb->start, tlb->end, true); + __flush_tlb_range(&vma, tlb->start, tlb->end, PAGE_SIZE, true); } static inline void __pte_free_tlb(struct mmu_gather *tlb, pgtable_t pte, diff --git a/arch/arm64/include/asm/tlbflush.h b/arch/arm64/include/asm/tlbflush.h index ddbf1718669d..37ccdb246b20 100644 --- a/arch/arm64/include/asm/tlbflush.h +++ b/arch/arm64/include/asm/tlbflush.h @@ -149,25 +149,28 @@ static inline void flush_tlb_page(struct vm_area_struct *vma, * This is meant to avoid soft lock-ups on large TLB flushing ranges and not * necessarily a performance improvement. */ -#define MAX_TLB_RANGE (1024UL << PAGE_SHIFT) +#define MAX_TLBI_OPS 1024UL static inline void __flush_tlb_range(struct vm_area_struct *vma, unsigned long start, unsigned long end, - bool last_level) + unsigned long stride, bool last_level) { unsigned long asid = ASID(vma->vm_mm); unsigned long addr; - if ((end - start) > MAX_TLB_RANGE) { + if ((end - start) > (MAX_TLBI_OPS * stride)) { flush_tlb_mm(vma->vm_mm); return; } + /* Convert the stride into units of 4k */ + stride >>= 12; + start = __TLBI_VADDR(start, asid); end = __TLBI_VADDR(end, asid); dsb(ishst); - for (addr = start; addr < end; addr += 1 << (PAGE_SHIFT - 12)) { + for (addr = start; addr < end; addr += stride) { if (last_level) { __tlbi(vale1is, addr); __tlbi_user(vale1is, addr); @@ -186,14 +189,14 @@ static inline void flush_tlb_range(struct vm_area_struct *vma, * We cannot use leaf-only invalidation here, since we may be invalidating * table entries as part of collapsing hugepages or moving page tables. */ - __flush_tlb_range(vma, start, end, false); + __flush_tlb_range(vma, start, end, PAGE_SIZE, false); } static inline void flush_tlb_kernel_range(unsigned long start, unsigned long end) { unsigned long addr; - if ((end - start) > MAX_TLB_RANGE) { + if ((end - start) > (MAX_TLBI_OPS * PAGE_SIZE)) { flush_tlb_all(); return; } -- cgit v1.2.3-59-g8ed1b From 07212cd47efef1df971e2289a89c087a5f6f6a2b Mon Sep 17 00:00:00 2001 From: Will Deacon Date: Thu, 23 Aug 2018 19:48:44 +0100 Subject: arm64: tlb: Remove redundant !CONFIG_HAVE_RCU_TABLE_FREE code If there's one thing the RCU-based table freeing doesn't need, it's more ifdeffery. Remove the redundant !CONFIG_HAVE_RCU_TABLE_FREE code, since this option is unconditionally selected in our Kconfig. Acked-by: Peter Zijlstra (Intel) Signed-off-by: Will Deacon Signed-off-by: Catalin Marinas --- arch/arm64/include/asm/tlb.h | 12 +++--------- 1 file changed, 3 insertions(+), 9 deletions(-) (limited to 'arch') diff --git a/arch/arm64/include/asm/tlb.h b/arch/arm64/include/asm/tlb.h index 1e1f68ce28f4..bd00017d529a 100644 --- a/arch/arm64/include/asm/tlb.h +++ b/arch/arm64/include/asm/tlb.h @@ -22,16 +22,10 @@ #include #include -#ifdef CONFIG_HAVE_RCU_TABLE_FREE - -#define tlb_remove_entry(tlb, entry) tlb_remove_table(tlb, entry) static inline void __tlb_remove_table(void *_table) { free_page_and_swap_cache((struct page *)_table); } -#else -#define tlb_remove_entry(tlb, entry) tlb_remove_page(tlb, entry) -#endif /* CONFIG_HAVE_RCU_TABLE_FREE */ static void tlb_flush(struct mmu_gather *tlb); @@ -61,7 +55,7 @@ static inline void __pte_free_tlb(struct mmu_gather *tlb, pgtable_t pte, { __flush_tlb_pgtable(tlb->mm, addr); pgtable_page_dtor(pte); - tlb_remove_entry(tlb, pte); + tlb_remove_table(tlb, pte); } #if CONFIG_PGTABLE_LEVELS > 2 @@ -69,7 +63,7 @@ static inline void __pmd_free_tlb(struct mmu_gather *tlb, pmd_t *pmdp, unsigned long addr) { __flush_tlb_pgtable(tlb->mm, addr); - tlb_remove_entry(tlb, virt_to_page(pmdp)); + tlb_remove_table(tlb, virt_to_page(pmdp)); } #endif @@ -78,7 +72,7 @@ static inline void __pud_free_tlb(struct mmu_gather *tlb, pud_t *pudp, unsigned long addr) { __flush_tlb_pgtable(tlb->mm, addr); - tlb_remove_entry(tlb, virt_to_page(pudp)); + tlb_remove_table(tlb, virt_to_page(pudp)); } #endif -- cgit v1.2.3-59-g8ed1b From f270ab88fdf205be1a7a46ccb61f4a343be543a2 Mon Sep 17 00:00:00 2001 From: Will Deacon Date: Thu, 23 Aug 2018 21:08:31 +0100 Subject: arm64: tlb: Adjust stride and type of TLBI according to mmu_gather Now that the core mmu_gather code keeps track of both the levels of page table cleared and also whether or not these entries correspond to intermediate entries, we can use this in our tlb_flush() callback to reduce the number of invalidations we issue as well as their scope. Acked-by: Peter Zijlstra (Intel) Signed-off-by: Will Deacon Signed-off-by: Catalin Marinas --- arch/arm64/include/asm/tlb.h | 19 ++++++++++--------- 1 file changed, 10 insertions(+), 9 deletions(-) (limited to 'arch') diff --git a/arch/arm64/include/asm/tlb.h b/arch/arm64/include/asm/tlb.h index bd00017d529a..b078fdec10d5 100644 --- a/arch/arm64/include/asm/tlb.h +++ b/arch/arm64/include/asm/tlb.h @@ -34,20 +34,21 @@ static void tlb_flush(struct mmu_gather *tlb); static inline void tlb_flush(struct mmu_gather *tlb) { struct vm_area_struct vma = TLB_FLUSH_VMA(tlb->mm, 0); + bool last_level = !tlb->freed_tables; + unsigned long stride = tlb_get_unmap_size(tlb); /* - * The ASID allocator will either invalidate the ASID or mark - * it as used. + * If we're tearing down the address space then we only care about + * invalidating the walk-cache, since the ASID allocator won't + * reallocate our ASID without invalidating the entire TLB. */ - if (tlb->fullmm) + if (tlb->fullmm) { + if (!last_level) + flush_tlb_mm(tlb->mm); return; + } - /* - * The intermediate page table levels are already handled by - * the __(pte|pmd|pud)_free_tlb() functions, so last level - * TLBI is sufficient here. - */ - __flush_tlb_range(&vma, tlb->start, tlb->end, PAGE_SIZE, true); + __flush_tlb_range(&vma, tlb->start, tlb->end, stride, last_level); } static inline void __pte_free_tlb(struct mmu_gather *tlb, pgtable_t pte, -- cgit v1.2.3-59-g8ed1b From ace8cb754539077ed75f3f15b77b2b51b5b7a431 Mon Sep 17 00:00:00 2001 From: Will Deacon Date: Thu, 23 Aug 2018 21:16:50 +0100 Subject: arm64: tlb: Avoid synchronous TLBIs when freeing page tables By selecting HAVE_RCU_TABLE_INVALIDATE, we can rely on tlb_flush() being called if we fail to batch table pages for freeing. This in turn allows us to postpone walk-cache invalidation until tlb_finish_mmu(), which avoids lots of unnecessary DSBs and means we can shoot down the ASID if the range is large enough. Acked-by: Peter Zijlstra (Intel) Signed-off-by: Will Deacon Signed-off-by: Catalin Marinas --- arch/arm64/Kconfig | 1 + arch/arm64/include/asm/tlb.h | 3 --- arch/arm64/include/asm/tlbflush.h | 11 ----------- 3 files changed, 1 insertion(+), 14 deletions(-) (limited to 'arch') diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig index b4c1f1f55aec..58eb02796b16 100644 --- a/arch/arm64/Kconfig +++ b/arch/arm64/Kconfig @@ -143,6 +143,7 @@ config ARM64 select HAVE_PERF_USER_STACK_DUMP select HAVE_REGS_AND_STACK_ACCESS_API select HAVE_RCU_TABLE_FREE + select HAVE_RCU_TABLE_INVALIDATE select HAVE_RSEQ select HAVE_STACKPROTECTOR select HAVE_SYSCALL_TRACEPOINTS diff --git a/arch/arm64/include/asm/tlb.h b/arch/arm64/include/asm/tlb.h index b078fdec10d5..106fdc951b6e 100644 --- a/arch/arm64/include/asm/tlb.h +++ b/arch/arm64/include/asm/tlb.h @@ -54,7 +54,6 @@ static inline void tlb_flush(struct mmu_gather *tlb) static inline void __pte_free_tlb(struct mmu_gather *tlb, pgtable_t pte, unsigned long addr) { - __flush_tlb_pgtable(tlb->mm, addr); pgtable_page_dtor(pte); tlb_remove_table(tlb, pte); } @@ -63,7 +62,6 @@ static inline void __pte_free_tlb(struct mmu_gather *tlb, pgtable_t pte, static inline void __pmd_free_tlb(struct mmu_gather *tlb, pmd_t *pmdp, unsigned long addr) { - __flush_tlb_pgtable(tlb->mm, addr); tlb_remove_table(tlb, virt_to_page(pmdp)); } #endif @@ -72,7 +70,6 @@ static inline void __pmd_free_tlb(struct mmu_gather *tlb, pmd_t *pmdp, static inline void __pud_free_tlb(struct mmu_gather *tlb, pud_t *pudp, unsigned long addr) { - __flush_tlb_pgtable(tlb->mm, addr); tlb_remove_table(tlb, virt_to_page(pudp)); } #endif diff --git a/arch/arm64/include/asm/tlbflush.h b/arch/arm64/include/asm/tlbflush.h index 37ccdb246b20..c98ed8871030 100644 --- a/arch/arm64/include/asm/tlbflush.h +++ b/arch/arm64/include/asm/tlbflush.h @@ -215,17 +215,6 @@ static inline void flush_tlb_kernel_range(unsigned long start, unsigned long end * Used to invalidate the TLB (walk caches) corresponding to intermediate page * table levels (pgd/pud/pmd). */ -static inline void __flush_tlb_pgtable(struct mm_struct *mm, - unsigned long uaddr) -{ - unsigned long addr = __TLBI_VADDR(uaddr, ASID(mm)); - - dsb(ishst); - __tlbi(vae1is, addr); - __tlbi_user(vae1is, addr); - dsb(ish); -} - static inline void __flush_tlb_kernel_pgtable(unsigned long kaddr) { unsigned long addr = __TLBI_VADDR(kaddr, 0); -- cgit v1.2.3-59-g8ed1b From 7f08872774eb971693ba79eeb2d4db364c9f5bfb Mon Sep 17 00:00:00 2001 From: Will Deacon Date: Tue, 28 Aug 2018 14:52:17 +0100 Subject: arm64: tlb: Rewrite stale comment in asm/tlbflush.h Peter Z asked me to justify the barrier usage in asm/tlbflush.h, but actually that whole block comment needs to be rewritten. Reported-by: Peter Zijlstra Acked-by: Peter Zijlstra (Intel) Signed-off-by: Will Deacon Signed-off-by: Catalin Marinas --- arch/arm64/include/asm/tlbflush.h | 80 +++++++++++++++++++++++++++------------ 1 file changed, 55 insertions(+), 25 deletions(-) (limited to 'arch') diff --git a/arch/arm64/include/asm/tlbflush.h b/arch/arm64/include/asm/tlbflush.h index c98ed8871030..c3c0387aee18 100644 --- a/arch/arm64/include/asm/tlbflush.h +++ b/arch/arm64/include/asm/tlbflush.h @@ -70,43 +70,73 @@ }) /* - * TLB Management - * ============== + * TLB Invalidation + * ================ * - * The TLB specific code is expected to perform whatever tests it needs - * to determine if it should invalidate the TLB for each call. Start - * addresses are inclusive and end addresses are exclusive; it is safe to - * round these addresses down. + * This header file implements the low-level TLB invalidation routines + * (sometimes referred to as "flushing" in the kernel) for arm64. * - * flush_tlb_all() + * Every invalidation operation uses the following template: + * + * DSB ISHST // Ensure prior page-table updates have completed + * TLBI ... // Invalidate the TLB + * DSB ISH // Ensure the TLB invalidation has completed + * if (invalidated kernel mappings) + * ISB // Discard any instructions fetched from the old mapping + * + * + * The following functions form part of the "core" TLB invalidation API, + * as documented in Documentation/core-api/cachetlb.rst: * - * Invalidate the entire TLB. + * flush_tlb_all() + * Invalidate the entire TLB (kernel + user) on all CPUs * * flush_tlb_mm(mm) + * Invalidate an entire user address space on all CPUs. + * The 'mm' argument identifies the ASID to invalidate. + * + * flush_tlb_range(vma, start, end) + * Invalidate the virtual-address range '[start, end)' on all + * CPUs for the user address space corresponding to 'vma->mm'. + * Note that this operation also invalidates any walk-cache + * entries associated with translations for the specified address + * range. + * + * flush_tlb_kernel_range(start, end) + * Same as flush_tlb_range(..., start, end), but applies to + * kernel mappings rather than a particular user address space. + * Whilst not explicitly documented, this function is used when + * unmapping pages from vmalloc/io space. + * + * flush_tlb_page(vma, addr) + * Invalidate a single user mapping for address 'addr' in the + * address space corresponding to 'vma->mm'. Note that this + * operation only invalidates a single, last-level page-table + * entry and therefore does not affect any walk-caches. * - * Invalidate all TLB entries in a particular address space. - * - mm - mm_struct describing address space * - * flush_tlb_range(mm,start,end) + * Next, we have some undocumented invalidation routines that you probably + * don't want to call unless you know what you're doing: * - * Invalidate a range of TLB entries in the specified address - * space. - * - mm - mm_struct describing address space - * - start - start address (may not be aligned) - * - end - end address (exclusive, may not be aligned) + * local_flush_tlb_all() + * Same as flush_tlb_all(), but only applies to the calling CPU. * - * flush_tlb_page(vaddr,vma) + * __flush_tlb_kernel_pgtable(addr) + * Invalidate a single kernel mapping for address 'addr' on all + * CPUs, ensuring that any walk-cache entries associated with the + * translation are also invalidated. * - * Invalidate the specified page in the specified address range. - * - vaddr - virtual address (may not be aligned) - * - vma - vma_struct describing address range + * __flush_tlb_range(vma, start, end, stride, last_level) + * Invalidate the virtual-address range '[start, end)' on all + * CPUs for the user address space corresponding to 'vma->mm'. + * The invalidation operations are issued at a granularity + * determined by 'stride' and only affect any walk-cache entries + * if 'last_level' is equal to false. * - * flush_kern_tlb_page(kaddr) * - * Invalidate the TLB entry for the specified page. The address - * will be in the kernels virtual memory space. Current uses - * only require the D-TLB to be invalidated. - * - kaddr - Kernel virtual memory address + * Finally, take a look at asm/tlb.h to see how tlb_flush() is implemented + * on top of these routines, since that is our interface to the mmu_gather + * API as used by munmap() and friends. */ static inline void local_flush_tlb_all(void) { -- cgit v1.2.3-59-g8ed1b From ca7f686ac9fe87a9175696a8744e095ab9749c49 Mon Sep 17 00:00:00 2001 From: Will Deacon Date: Fri, 15 Jun 2018 11:36:43 +0100 Subject: arm64: Fix silly typo in comment I was passing through and figuered I'd fix this up: featuer -> feature Signed-off-by: Will Deacon Signed-off-by: Catalin Marinas --- arch/arm64/include/asm/cpufeature.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch') diff --git a/arch/arm64/include/asm/cpufeature.h b/arch/arm64/include/asm/cpufeature.h index 1717ba1db35d..9079715794af 100644 --- a/arch/arm64/include/asm/cpufeature.h +++ b/arch/arm64/include/asm/cpufeature.h @@ -262,7 +262,7 @@ extern struct arm64_ftr_reg arm64_ftr_reg_ctrel0; /* * CPU feature detected at boot time based on system-wide value of a * feature. It is safe for a late CPU to have this feature even though - * the system hasn't enabled it, although the featuer will not be used + * the system hasn't enabled it, although the feature will not be used * by Linux in this case. If the system has enabled this feature already, * then every late CPU must have it. */ -- cgit v1.2.3-59-g8ed1b From d71be2b6c0e19180b5f80a6d42039cc074a693a2 Mon Sep 17 00:00:00 2001 From: Will Deacon Date: Fri, 15 Jun 2018 11:37:34 +0100 Subject: arm64: cpufeature: Detect SSBS and advertise to userspace Armv8.5 introduces a new PSTATE bit known as Speculative Store Bypass Safe (SSBS) which can be used as a mitigation against Spectre variant 4. Additionally, a CPU may provide instructions to manipulate PSTATE.SSBS directly, so that userspace can toggle the SSBS control without trapping to the kernel. This patch probes for the existence of SSBS and advertise the new instructions to userspace if they exist. Reviewed-by: Suzuki K Poulose Signed-off-by: Will Deacon Signed-off-by: Catalin Marinas --- arch/arm64/include/asm/cpucaps.h | 3 ++- arch/arm64/include/asm/sysreg.h | 16 ++++++++++++---- arch/arm64/include/uapi/asm/hwcap.h | 1 + arch/arm64/kernel/cpufeature.c | 19 +++++++++++++++++-- arch/arm64/kernel/cpuinfo.c | 1 + 5 files changed, 33 insertions(+), 7 deletions(-) (limited to 'arch') diff --git a/arch/arm64/include/asm/cpucaps.h b/arch/arm64/include/asm/cpucaps.h index 9932aca9704b..38eec9cf30f2 100644 --- a/arch/arm64/include/asm/cpucaps.h +++ b/arch/arm64/include/asm/cpucaps.h @@ -52,7 +52,8 @@ #define ARM64_MISMATCHED_CACHE_TYPE 31 #define ARM64_HAS_STAGE2_FWB 32 #define ARM64_HAS_CRC32 33 +#define ARM64_SSBS 34 -#define ARM64_NCAPS 34 +#define ARM64_NCAPS 35 #endif /* __ASM_CPUCAPS_H */ diff --git a/arch/arm64/include/asm/sysreg.h b/arch/arm64/include/asm/sysreg.h index c1470931b897..2fc6242baf11 100644 --- a/arch/arm64/include/asm/sysreg.h +++ b/arch/arm64/include/asm/sysreg.h @@ -419,6 +419,7 @@ #define SYS_ICH_LR15_EL2 __SYS__LR8_EL2(7) /* Common SCTLR_ELx flags. */ +#define SCTLR_ELx_DSSBS (1UL << 44) #define SCTLR_ELx_EE (1 << 25) #define SCTLR_ELx_IESB (1 << 21) #define SCTLR_ELx_WXN (1 << 19) @@ -439,7 +440,7 @@ (1 << 10) | (1 << 13) | (1 << 14) | (1 << 15) | \ (1 << 17) | (1 << 20) | (1 << 24) | (1 << 26) | \ (1 << 27) | (1 << 30) | (1 << 31) | \ - (0xffffffffUL << 32)) + (0xffffefffUL << 32)) #ifdef CONFIG_CPU_BIG_ENDIAN #define ENDIAN_SET_EL2 SCTLR_ELx_EE @@ -453,7 +454,7 @@ #define SCTLR_EL2_SET (SCTLR_ELx_IESB | ENDIAN_SET_EL2 | SCTLR_EL2_RES1) #define SCTLR_EL2_CLEAR (SCTLR_ELx_M | SCTLR_ELx_A | SCTLR_ELx_C | \ SCTLR_ELx_SA | SCTLR_ELx_I | SCTLR_ELx_WXN | \ - ENDIAN_CLEAR_EL2 | SCTLR_EL2_RES0) + SCTLR_ELx_DSSBS | ENDIAN_CLEAR_EL2 | SCTLR_EL2_RES0) #if (SCTLR_EL2_SET ^ SCTLR_EL2_CLEAR) != 0xffffffffffffffff #error "Inconsistent SCTLR_EL2 set/clear bits" @@ -477,7 +478,7 @@ (1 << 29)) #define SCTLR_EL1_RES0 ((1 << 6) | (1 << 10) | (1 << 13) | (1 << 17) | \ (1 << 27) | (1 << 30) | (1 << 31) | \ - (0xffffffffUL << 32)) + (0xffffefffUL << 32)) #ifdef CONFIG_CPU_BIG_ENDIAN #define ENDIAN_SET_EL1 (SCTLR_EL1_E0E | SCTLR_ELx_EE) @@ -494,7 +495,7 @@ ENDIAN_SET_EL1 | SCTLR_EL1_UCI | SCTLR_EL1_RES1) #define SCTLR_EL1_CLEAR (SCTLR_ELx_A | SCTLR_EL1_CP15BEN | SCTLR_EL1_ITD |\ SCTLR_EL1_UMA | SCTLR_ELx_WXN | ENDIAN_CLEAR_EL1 |\ - SCTLR_EL1_RES0) + SCTLR_ELx_DSSBS | SCTLR_EL1_RES0) #if (SCTLR_EL1_SET ^ SCTLR_EL1_CLEAR) != 0xffffffffffffffff #error "Inconsistent SCTLR_EL1 set/clear bits" @@ -544,6 +545,13 @@ #define ID_AA64PFR0_EL0_64BIT_ONLY 0x1 #define ID_AA64PFR0_EL0_32BIT_64BIT 0x2 +/* id_aa64pfr1 */ +#define ID_AA64PFR1_SSBS_SHIFT 4 + +#define ID_AA64PFR1_SSBS_PSTATE_NI 0 +#define ID_AA64PFR1_SSBS_PSTATE_ONLY 1 +#define ID_AA64PFR1_SSBS_PSTATE_INSNS 2 + /* id_aa64mmfr0 */ #define ID_AA64MMFR0_TGRAN4_SHIFT 28 #define ID_AA64MMFR0_TGRAN64_SHIFT 24 diff --git a/arch/arm64/include/uapi/asm/hwcap.h b/arch/arm64/include/uapi/asm/hwcap.h index 17c65c8f33cb..2bcd6e4f3474 100644 --- a/arch/arm64/include/uapi/asm/hwcap.h +++ b/arch/arm64/include/uapi/asm/hwcap.h @@ -48,5 +48,6 @@ #define HWCAP_USCAT (1 << 25) #define HWCAP_ILRCPC (1 << 26) #define HWCAP_FLAGM (1 << 27) +#define HWCAP_SSBS (1 << 28) #endif /* _UAPI__ASM_HWCAP_H */ diff --git a/arch/arm64/kernel/cpufeature.c b/arch/arm64/kernel/cpufeature.c index 7626b80128f5..5794959d8beb 100644 --- a/arch/arm64/kernel/cpufeature.c +++ b/arch/arm64/kernel/cpufeature.c @@ -164,6 +164,11 @@ static const struct arm64_ftr_bits ftr_id_aa64pfr0[] = { ARM64_FTR_END, }; +static const struct arm64_ftr_bits ftr_id_aa64pfr1[] = { + ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64PFR1_SSBS_SHIFT, 4, ID_AA64PFR1_SSBS_PSTATE_NI), + ARM64_FTR_END, +}; + static const struct arm64_ftr_bits ftr_id_aa64mmfr0[] = { S_ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64MMFR0_TGRAN4_SHIFT, 4, ID_AA64MMFR0_TGRAN4_NI), S_ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64MMFR0_TGRAN64_SHIFT, 4, ID_AA64MMFR0_TGRAN64_NI), @@ -371,7 +376,7 @@ static const struct __ftr_reg_entry { /* Op1 = 0, CRn = 0, CRm = 4 */ ARM64_FTR_REG(SYS_ID_AA64PFR0_EL1, ftr_id_aa64pfr0), - ARM64_FTR_REG(SYS_ID_AA64PFR1_EL1, ftr_raz), + ARM64_FTR_REG(SYS_ID_AA64PFR1_EL1, ftr_id_aa64pfr1), ARM64_FTR_REG(SYS_ID_AA64ZFR0_EL1, ftr_raz), /* Op1 = 0, CRn = 0, CRm = 5 */ @@ -657,7 +662,6 @@ void update_cpu_features(int cpu, /* * EL3 is not our concern. - * ID_AA64PFR1 is currently RES0. */ taint |= check_update_ftr_reg(SYS_ID_AA64PFR0_EL1, cpu, info->reg_id_aa64pfr0, boot->reg_id_aa64pfr0); @@ -1231,6 +1235,16 @@ static const struct arm64_cpu_capabilities arm64_features[] = { .field_pos = ID_AA64ISAR0_CRC32_SHIFT, .min_field_value = 1, }, + { + .desc = "Speculative Store Bypassing Safe (SSBS)", + .capability = ARM64_SSBS, + .type = ARM64_CPUCAP_WEAK_LOCAL_CPU_FEATURE, + .matches = has_cpuid_feature, + .sys_reg = SYS_ID_AA64PFR1_EL1, + .field_pos = ID_AA64PFR1_SSBS_SHIFT, + .sign = FTR_UNSIGNED, + .min_field_value = ID_AA64PFR1_SSBS_PSTATE_ONLY, + }, {}, }; @@ -1276,6 +1290,7 @@ static const struct arm64_cpu_capabilities arm64_elf_hwcaps[] = { #ifdef CONFIG_ARM64_SVE HWCAP_CAP(SYS_ID_AA64PFR0_EL1, ID_AA64PFR0_SVE_SHIFT, FTR_UNSIGNED, ID_AA64PFR0_SVE, CAP_HWCAP, HWCAP_SVE), #endif + HWCAP_CAP(SYS_ID_AA64PFR1_EL1, ID_AA64PFR1_SSBS_SHIFT, FTR_UNSIGNED, ID_AA64PFR1_SSBS_PSTATE_INSNS, CAP_HWCAP, HWCAP_SSBS), {}, }; diff --git a/arch/arm64/kernel/cpuinfo.c b/arch/arm64/kernel/cpuinfo.c index e9ab7b3ed317..dce971f2c167 100644 --- a/arch/arm64/kernel/cpuinfo.c +++ b/arch/arm64/kernel/cpuinfo.c @@ -81,6 +81,7 @@ static const char *const hwcap_str[] = { "uscat", "ilrcpc", "flagm", + "ssbs", NULL }; -- cgit v1.2.3-59-g8ed1b From 2d1b2a91d56b19636b740ea70c8399d1df249f20 Mon Sep 17 00:00:00 2001 From: Will Deacon Date: Fri, 15 Jun 2018 11:50:42 +0100 Subject: arm64: ssbd: Drop #ifdefs for PR_SPEC_STORE_BYPASS Now that we're all merged nicely into mainline, there's no need to check to see if PR_SPEC_STORE_BYPASS is defined. Signed-off-by: Will Deacon Signed-off-by: Catalin Marinas --- arch/arm64/kernel/ssbd.c | 3 --- 1 file changed, 3 deletions(-) (limited to 'arch') diff --git a/arch/arm64/kernel/ssbd.c b/arch/arm64/kernel/ssbd.c index 3432e5ef9f41..07b12c034ec2 100644 --- a/arch/arm64/kernel/ssbd.c +++ b/arch/arm64/kernel/ssbd.c @@ -11,9 +11,7 @@ /* * prctl interface for SSBD - * FIXME: Drop the below ifdefery once merged in 4.18. */ -#ifdef PR_SPEC_STORE_BYPASS static int ssbd_prctl_set(struct task_struct *task, unsigned long ctrl) { int state = arm64_get_ssbd_state(); @@ -107,4 +105,3 @@ int arch_prctl_spec_ctrl_get(struct task_struct *task, unsigned long which) return -ENODEV; } } -#endif /* PR_SPEC_STORE_BYPASS */ -- cgit v1.2.3-59-g8ed1b From 0bf0f444b2c49241b2b39aa3cf210d7c95ef6c34 Mon Sep 17 00:00:00 2001 From: Will Deacon Date: Tue, 7 Aug 2018 13:43:06 +0100 Subject: arm64: entry: Allow handling of undefined instructions from EL1 Rather than panic() when taking an undefined instruction exception from EL1, allow a hook to be registered in case we want to emulate the instruction, like we will for the SSBS PSTATE manipulation instructions. Signed-off-by: Will Deacon Signed-off-by: Catalin Marinas --- arch/arm64/kernel/entry.S | 2 +- arch/arm64/kernel/traps.c | 11 +++++++---- 2 files changed, 8 insertions(+), 5 deletions(-) (limited to 'arch') diff --git a/arch/arm64/kernel/entry.S b/arch/arm64/kernel/entry.S index 09dbea221a27..8556876c9109 100644 --- a/arch/arm64/kernel/entry.S +++ b/arch/arm64/kernel/entry.S @@ -589,7 +589,7 @@ el1_undef: inherit_daif pstate=x23, tmp=x2 mov x0, sp bl do_undefinstr - ASM_BUG() + kernel_exit 1 el1_dbg: /* * Debug exception handling diff --git a/arch/arm64/kernel/traps.c b/arch/arm64/kernel/traps.c index 039e9ff379cc..b9da093e0341 100644 --- a/arch/arm64/kernel/traps.c +++ b/arch/arm64/kernel/traps.c @@ -310,10 +310,12 @@ static int call_undef_hook(struct pt_regs *regs) int (*fn)(struct pt_regs *regs, u32 instr) = NULL; void __user *pc = (void __user *)instruction_pointer(regs); - if (!user_mode(regs)) - return 1; - - if (compat_thumb_mode(regs)) { + if (!user_mode(regs)) { + __le32 instr_le; + if (probe_kernel_address((__force __le32 *)pc, instr_le)) + goto exit; + instr = le32_to_cpu(instr_le); + } else if (compat_thumb_mode(regs)) { /* 16-bit Thumb instruction */ __le16 instr_le; if (get_user(instr_le, (__le16 __user *)pc)) @@ -407,6 +409,7 @@ asmlinkage void __exception do_undefinstr(struct pt_regs *regs) return; force_signal_inject(SIGILL, ILL_ILLOPC, regs->pc); + BUG_ON(!user_mode(regs)); } void cpu_enable_cache_maint_trap(const struct arm64_cpu_capabilities *__unused) -- cgit v1.2.3-59-g8ed1b From 8f04e8e6e29c93421a95b61cad62e3918425eac7 Mon Sep 17 00:00:00 2001 From: Will Deacon Date: Tue, 7 Aug 2018 13:47:06 +0100 Subject: arm64: ssbd: Add support for PSTATE.SSBS rather than trapping to EL3 On CPUs with support for PSTATE.SSBS, the kernel can toggle the SSBD state without needing to call into firmware. This patch hooks into the existing SSBD infrastructure so that SSBS is used on CPUs that support it, but it's all made horribly complicated by the very real possibility of big/little systems that don't uniformly provide the new capability. Signed-off-by: Will Deacon Signed-off-by: Catalin Marinas --- arch/arm64/include/asm/processor.h | 7 ++++++ arch/arm64/include/asm/ptrace.h | 1 + arch/arm64/include/asm/sysreg.h | 3 +++ arch/arm64/include/uapi/asm/ptrace.h | 1 + arch/arm64/kernel/cpu_errata.c | 26 +++++++++++++++++++-- arch/arm64/kernel/cpufeature.c | 45 ++++++++++++++++++++++++++++++++++++ arch/arm64/kernel/process.c | 4 ++++ arch/arm64/kernel/ssbd.c | 21 +++++++++++++++++ 8 files changed, 106 insertions(+), 2 deletions(-) (limited to 'arch') diff --git a/arch/arm64/include/asm/processor.h b/arch/arm64/include/asm/processor.h index 79657ad91397..f6835374ed9f 100644 --- a/arch/arm64/include/asm/processor.h +++ b/arch/arm64/include/asm/processor.h @@ -174,6 +174,10 @@ static inline void start_thread(struct pt_regs *regs, unsigned long pc, { start_thread_common(regs, pc); regs->pstate = PSR_MODE_EL0t; + + if (arm64_get_ssbd_state() != ARM64_SSBD_FORCE_ENABLE) + regs->pstate |= PSR_SSBS_BIT; + regs->sp = sp; } @@ -190,6 +194,9 @@ static inline void compat_start_thread(struct pt_regs *regs, unsigned long pc, regs->pstate |= PSR_AA32_E_BIT; #endif + if (arm64_get_ssbd_state() != ARM64_SSBD_FORCE_ENABLE) + regs->pstate |= PSR_AA32_SSBS_BIT; + regs->compat_sp = sp; } #endif diff --git a/arch/arm64/include/asm/ptrace.h b/arch/arm64/include/asm/ptrace.h index 177b851ca6d9..6bc43889d11e 100644 --- a/arch/arm64/include/asm/ptrace.h +++ b/arch/arm64/include/asm/ptrace.h @@ -50,6 +50,7 @@ #define PSR_AA32_I_BIT 0x00000080 #define PSR_AA32_A_BIT 0x00000100 #define PSR_AA32_E_BIT 0x00000200 +#define PSR_AA32_SSBS_BIT 0x00800000 #define PSR_AA32_DIT_BIT 0x01000000 #define PSR_AA32_Q_BIT 0x08000000 #define PSR_AA32_V_BIT 0x10000000 diff --git a/arch/arm64/include/asm/sysreg.h b/arch/arm64/include/asm/sysreg.h index 2fc6242baf11..3091ae5975a3 100644 --- a/arch/arm64/include/asm/sysreg.h +++ b/arch/arm64/include/asm/sysreg.h @@ -86,11 +86,14 @@ #define REG_PSTATE_PAN_IMM sys_reg(0, 0, 4, 0, 4) #define REG_PSTATE_UAO_IMM sys_reg(0, 0, 4, 0, 3) +#define REG_PSTATE_SSBS_IMM sys_reg(0, 3, 4, 0, 1) #define SET_PSTATE_PAN(x) __emit_inst(0xd5000000 | REG_PSTATE_PAN_IMM | \ (!!x)<<8 | 0x1f) #define SET_PSTATE_UAO(x) __emit_inst(0xd5000000 | REG_PSTATE_UAO_IMM | \ (!!x)<<8 | 0x1f) +#define SET_PSTATE_SSBS(x) __emit_inst(0xd5000000 | REG_PSTATE_SSBS_IMM | \ + (!!x)<<8 | 0x1f) #define SYS_DC_ISW sys_insn(1, 0, 7, 6, 2) #define SYS_DC_CSW sys_insn(1, 0, 7, 10, 2) diff --git a/arch/arm64/include/uapi/asm/ptrace.h b/arch/arm64/include/uapi/asm/ptrace.h index 98c4ce55d9c3..a36227fdb084 100644 --- a/arch/arm64/include/uapi/asm/ptrace.h +++ b/arch/arm64/include/uapi/asm/ptrace.h @@ -46,6 +46,7 @@ #define PSR_I_BIT 0x00000080 #define PSR_A_BIT 0x00000100 #define PSR_D_BIT 0x00000200 +#define PSR_SSBS_BIT 0x00001000 #define PSR_PAN_BIT 0x00400000 #define PSR_UAO_BIT 0x00800000 #define PSR_V_BIT 0x10000000 diff --git a/arch/arm64/kernel/cpu_errata.c b/arch/arm64/kernel/cpu_errata.c index dec10898d688..c063490d7b51 100644 --- a/arch/arm64/kernel/cpu_errata.c +++ b/arch/arm64/kernel/cpu_errata.c @@ -312,6 +312,14 @@ void __init arm64_enable_wa2_handling(struct alt_instr *alt, void arm64_set_ssbd_mitigation(bool state) { + if (this_cpu_has_cap(ARM64_SSBS)) { + if (state) + asm volatile(SET_PSTATE_SSBS(0)); + else + asm volatile(SET_PSTATE_SSBS(1)); + return; + } + switch (psci_ops.conduit) { case PSCI_CONDUIT_HVC: arm_smccc_1_1_hvc(ARM_SMCCC_ARCH_WORKAROUND_2, state, NULL); @@ -336,6 +344,11 @@ static bool has_ssbd_mitigation(const struct arm64_cpu_capabilities *entry, WARN_ON(scope != SCOPE_LOCAL_CPU || preemptible()); + if (this_cpu_has_cap(ARM64_SSBS)) { + required = false; + goto out_printmsg; + } + if (psci_ops.smccc_version == SMCCC_VERSION_1_0) { ssbd_state = ARM64_SSBD_UNKNOWN; return false; @@ -384,7 +397,6 @@ static bool has_ssbd_mitigation(const struct arm64_cpu_capabilities *entry, switch (ssbd_state) { case ARM64_SSBD_FORCE_DISABLE: - pr_info_once("%s disabled from command-line\n", entry->desc); arm64_set_ssbd_mitigation(false); required = false; break; @@ -397,7 +409,6 @@ static bool has_ssbd_mitigation(const struct arm64_cpu_capabilities *entry, break; case ARM64_SSBD_FORCE_ENABLE: - pr_info_once("%s forced from command-line\n", entry->desc); arm64_set_ssbd_mitigation(true); required = true; break; @@ -407,6 +418,17 @@ static bool has_ssbd_mitigation(const struct arm64_cpu_capabilities *entry, break; } +out_printmsg: + switch (ssbd_state) { + case ARM64_SSBD_FORCE_DISABLE: + pr_info_once("%s disabled from command-line\n", entry->desc); + break; + + case ARM64_SSBD_FORCE_ENABLE: + pr_info_once("%s forced from command-line\n", entry->desc); + break; + } + return required; } #endif /* CONFIG_ARM64_SSBD */ diff --git a/arch/arm64/kernel/cpufeature.c b/arch/arm64/kernel/cpufeature.c index 5794959d8beb..9aa18a0df0d7 100644 --- a/arch/arm64/kernel/cpufeature.c +++ b/arch/arm64/kernel/cpufeature.c @@ -1039,6 +1039,48 @@ static void cpu_has_fwb(const struct arm64_cpu_capabilities *__unused) WARN_ON(val & (7 << 27 | 7 << 21)); } +#ifdef CONFIG_ARM64_SSBD +static int ssbs_emulation_handler(struct pt_regs *regs, u32 instr) +{ + if (user_mode(regs)) + return 1; + + if (instr & BIT(CRm_shift)) + regs->pstate |= PSR_SSBS_BIT; + else + regs->pstate &= ~PSR_SSBS_BIT; + + arm64_skip_faulting_instruction(regs, 4); + return 0; +} + +static struct undef_hook ssbs_emulation_hook = { + .instr_mask = ~(1U << CRm_shift), + .instr_val = 0xd500001f | REG_PSTATE_SSBS_IMM, + .fn = ssbs_emulation_handler, +}; + +static void cpu_enable_ssbs(const struct arm64_cpu_capabilities *__unused) +{ + static bool undef_hook_registered = false; + static DEFINE_SPINLOCK(hook_lock); + + spin_lock(&hook_lock); + if (!undef_hook_registered) { + register_undef_hook(&ssbs_emulation_hook); + undef_hook_registered = true; + } + spin_unlock(&hook_lock); + + if (arm64_get_ssbd_state() == ARM64_SSBD_FORCE_DISABLE) { + sysreg_clear_set(sctlr_el1, 0, SCTLR_ELx_DSSBS); + arm64_set_ssbd_mitigation(false); + } else { + arm64_set_ssbd_mitigation(true); + } +} +#endif /* CONFIG_ARM64_SSBD */ + static const struct arm64_cpu_capabilities arm64_features[] = { { .desc = "GIC system register CPU interface", @@ -1226,6 +1268,7 @@ static const struct arm64_cpu_capabilities arm64_features[] = { .cpu_enable = cpu_enable_hw_dbm, }, #endif +#ifdef CONFIG_ARM64_SSBD { .desc = "CRC32 instructions", .capability = ARM64_HAS_CRC32, @@ -1244,7 +1287,9 @@ static const struct arm64_cpu_capabilities arm64_features[] = { .field_pos = ID_AA64PFR1_SSBS_SHIFT, .sign = FTR_UNSIGNED, .min_field_value = ID_AA64PFR1_SSBS_PSTATE_ONLY, + .cpu_enable = cpu_enable_ssbs, }, +#endif {}, }; diff --git a/arch/arm64/kernel/process.c b/arch/arm64/kernel/process.c index 7f1628effe6d..ce99c58cd1f1 100644 --- a/arch/arm64/kernel/process.c +++ b/arch/arm64/kernel/process.c @@ -358,6 +358,10 @@ int copy_thread(unsigned long clone_flags, unsigned long stack_start, if (IS_ENABLED(CONFIG_ARM64_UAO) && cpus_have_const_cap(ARM64_HAS_UAO)) childregs->pstate |= PSR_UAO_BIT; + + if (arm64_get_ssbd_state() == ARM64_SSBD_FORCE_DISABLE) + childregs->pstate |= PSR_SSBS_BIT; + p->thread.cpu_context.x19 = stack_start; p->thread.cpu_context.x20 = stk_sz; } diff --git a/arch/arm64/kernel/ssbd.c b/arch/arm64/kernel/ssbd.c index 07b12c034ec2..885f13e58708 100644 --- a/arch/arm64/kernel/ssbd.c +++ b/arch/arm64/kernel/ssbd.c @@ -3,12 +3,30 @@ * Copyright (C) 2018 ARM Ltd, All Rights Reserved. */ +#include #include #include +#include #include #include +static void ssbd_ssbs_enable(struct task_struct *task) +{ + u64 val = is_compat_thread(task_thread_info(task)) ? + PSR_AA32_SSBS_BIT : PSR_SSBS_BIT; + + task_pt_regs(task)->pstate |= val; +} + +static void ssbd_ssbs_disable(struct task_struct *task) +{ + u64 val = is_compat_thread(task_thread_info(task)) ? + PSR_AA32_SSBS_BIT : PSR_SSBS_BIT; + + task_pt_regs(task)->pstate &= ~val; +} + /* * prctl interface for SSBD */ @@ -44,12 +62,14 @@ static int ssbd_prctl_set(struct task_struct *task, unsigned long ctrl) return -EPERM; task_clear_spec_ssb_disable(task); clear_tsk_thread_flag(task, TIF_SSBD); + ssbd_ssbs_enable(task); break; case PR_SPEC_DISABLE: if (state == ARM64_SSBD_FORCE_DISABLE) return -EPERM; task_set_spec_ssb_disable(task); set_tsk_thread_flag(task, TIF_SSBD); + ssbd_ssbs_disable(task); break; case PR_SPEC_FORCE_DISABLE: if (state == ARM64_SSBD_FORCE_DISABLE) @@ -57,6 +77,7 @@ static int ssbd_prctl_set(struct task_struct *task, unsigned long ctrl) task_set_spec_ssb_disable(task); task_set_spec_ssb_force_disable(task); set_tsk_thread_flag(task, TIF_SSBD); + ssbd_ssbs_disable(task); break; default: return -ERANGE; -- cgit v1.2.3-59-g8ed1b From 7c36447ae5a090729e7b129f24705bb231a07e0b Mon Sep 17 00:00:00 2001 From: Will Deacon Date: Wed, 8 Aug 2018 16:10:54 +0100 Subject: KVM: arm64: Set SCTLR_EL2.DSSBS if SSBD is forcefully disabled and !vhe When running without VHE, it is necessary to set SCTLR_EL2.DSSBS if SSBD has been forcefully disabled on the kernel command-line. Acked-by: Christoffer Dall Signed-off-by: Will Deacon Signed-off-by: Catalin Marinas --- arch/arm64/include/asm/kvm_host.h | 11 +++++++++++ arch/arm64/kvm/hyp/sysreg-sr.c | 11 +++++++++++ 2 files changed, 22 insertions(+) (limited to 'arch') diff --git a/arch/arm64/include/asm/kvm_host.h b/arch/arm64/include/asm/kvm_host.h index f26055f2306e..15501921fc75 100644 --- a/arch/arm64/include/asm/kvm_host.h +++ b/arch/arm64/include/asm/kvm_host.h @@ -389,6 +389,8 @@ struct kvm_vcpu *kvm_mpidr_to_vcpu(struct kvm *kvm, unsigned long mpidr); DECLARE_PER_CPU(kvm_cpu_context_t, kvm_host_cpu_state); +void __kvm_enable_ssbs(void); + static inline void __cpu_init_hyp_mode(phys_addr_t pgd_ptr, unsigned long hyp_stack_ptr, unsigned long vector_ptr) @@ -409,6 +411,15 @@ static inline void __cpu_init_hyp_mode(phys_addr_t pgd_ptr, */ BUG_ON(!static_branch_likely(&arm64_const_caps_ready)); __kvm_call_hyp((void *)pgd_ptr, hyp_stack_ptr, vector_ptr, tpidr_el2); + + /* + * Disabling SSBD on a non-VHE system requires us to enable SSBS + * at EL2. + */ + if (!has_vhe() && this_cpu_has_cap(ARM64_SSBS) && + arm64_get_ssbd_state() == ARM64_SSBD_FORCE_DISABLE) { + kvm_call_hyp(__kvm_enable_ssbs); + } } static inline bool kvm_arch_check_sve_has_vhe(void) diff --git a/arch/arm64/kvm/hyp/sysreg-sr.c b/arch/arm64/kvm/hyp/sysreg-sr.c index 9ce223944983..76d016b446b2 100644 --- a/arch/arm64/kvm/hyp/sysreg-sr.c +++ b/arch/arm64/kvm/hyp/sysreg-sr.c @@ -288,3 +288,14 @@ void kvm_vcpu_put_sysregs(struct kvm_vcpu *vcpu) vcpu->arch.sysregs_loaded_on_cpu = false; } + +void __hyp_text __kvm_enable_ssbs(void) +{ + u64 tmp; + + asm volatile( + "mrs %0, sctlr_el2\n" + "orr %0, %0, %1\n" + "msr sctlr_el2, %0" + : "=&r" (tmp) : "L" (SCTLR_ELx_DSSBS)); +} -- cgit v1.2.3-59-g8ed1b From b8925ee2e12d1cb9a11d6f28b5814f2bfa59dce1 Mon Sep 17 00:00:00 2001 From: Will Deacon Date: Tue, 7 Aug 2018 13:53:41 +0100 Subject: arm64: cpu: Move errata and feature enable callbacks closer to callers The cpu errata and feature enable callbacks are only called via their respective arm64_cpu_capabilities structure and therefore shouldn't exist in the global namespace. Move the PAN, RAS and cache maintenance emulation enable callbacks into the same files as their corresponding arm64_cpu_capabilities structures, making them static in the process. Signed-off-by: Will Deacon Signed-off-by: Catalin Marinas --- arch/arm64/include/asm/processor.h | 4 ---- arch/arm64/kernel/cpu_errata.c | 6 ++++++ arch/arm64/kernel/cpufeature.c | 28 ++++++++++++++++++++++------ arch/arm64/kernel/traps.c | 5 ----- arch/arm64/mm/fault.c | 14 -------------- 5 files changed, 28 insertions(+), 29 deletions(-) (limited to 'arch') diff --git a/arch/arm64/include/asm/processor.h b/arch/arm64/include/asm/processor.h index f6835374ed9f..2bf6691371c2 100644 --- a/arch/arm64/include/asm/processor.h +++ b/arch/arm64/include/asm/processor.h @@ -251,10 +251,6 @@ static inline void spin_lock_prefetch(const void *ptr) #endif -void cpu_enable_pan(const struct arm64_cpu_capabilities *__unused); -void cpu_enable_cache_maint_trap(const struct arm64_cpu_capabilities *__unused); -void cpu_clear_disr(const struct arm64_cpu_capabilities *__unused); - extern unsigned long __ro_after_init signal_minsigstksz; /* sigframe size */ extern void __init minsigstksz_setup(void); diff --git a/arch/arm64/kernel/cpu_errata.c b/arch/arm64/kernel/cpu_errata.c index c063490d7b51..8900cb0615f8 100644 --- a/arch/arm64/kernel/cpu_errata.c +++ b/arch/arm64/kernel/cpu_errata.c @@ -433,6 +433,12 @@ out_printmsg: } #endif /* CONFIG_ARM64_SSBD */ +static void __maybe_unused +cpu_enable_cache_maint_trap(const struct arm64_cpu_capabilities *__unused) +{ + sysreg_clear_set(sctlr_el1, SCTLR_EL1_UCI, 0); +} + #define CAP_MIDR_RANGE(model, v_min, r_min, v_max, r_max) \ .matches = is_affected_midr_range, \ .midr_range = MIDR_RANGE(model, v_min, r_min, v_max, r_max) diff --git a/arch/arm64/kernel/cpufeature.c b/arch/arm64/kernel/cpufeature.c index 9aa18a0df0d7..35796ca1db50 100644 --- a/arch/arm64/kernel/cpufeature.c +++ b/arch/arm64/kernel/cpufeature.c @@ -1081,6 +1081,28 @@ static void cpu_enable_ssbs(const struct arm64_cpu_capabilities *__unused) } #endif /* CONFIG_ARM64_SSBD */ +#ifdef CONFIG_ARM64_PAN +static void cpu_enable_pan(const struct arm64_cpu_capabilities *__unused) +{ + /* + * We modify PSTATE. This won't work from irq context as the PSTATE + * is discarded once we return from the exception. + */ + WARN_ON_ONCE(in_interrupt()); + + sysreg_clear_set(sctlr_el1, SCTLR_EL1_SPAN, 0); + asm(SET_PSTATE_PAN(1)); +} +#endif /* CONFIG_ARM64_PAN */ + +#ifdef CONFIG_ARM64_RAS_EXTN +static void cpu_clear_disr(const struct arm64_cpu_capabilities *__unused) +{ + /* Firmware may have left a deferred SError in this register. */ + write_sysreg_s(0, SYS_DISR_EL1); +} +#endif /* CONFIG_ARM64_RAS_EXTN */ + static const struct arm64_cpu_capabilities arm64_features[] = { { .desc = "GIC system register CPU interface", @@ -1824,9 +1846,3 @@ static int __init enable_mrs_emulation(void) } core_initcall(enable_mrs_emulation); - -void cpu_clear_disr(const struct arm64_cpu_capabilities *__unused) -{ - /* Firmware may have left a deferred SError in this register. */ - write_sysreg_s(0, SYS_DISR_EL1); -} diff --git a/arch/arm64/kernel/traps.c b/arch/arm64/kernel/traps.c index b9da093e0341..148de417ed3e 100644 --- a/arch/arm64/kernel/traps.c +++ b/arch/arm64/kernel/traps.c @@ -412,11 +412,6 @@ asmlinkage void __exception do_undefinstr(struct pt_regs *regs) BUG_ON(!user_mode(regs)); } -void cpu_enable_cache_maint_trap(const struct arm64_cpu_capabilities *__unused) -{ - sysreg_clear_set(sctlr_el1, SCTLR_EL1_UCI, 0); -} - #define __user_cache_maint(insn, address, res) \ if (address >= user_addr_max()) { \ res = -EFAULT; \ diff --git a/arch/arm64/mm/fault.c b/arch/arm64/mm/fault.c index 50b30ff30de4..6342f1793c70 100644 --- a/arch/arm64/mm/fault.c +++ b/arch/arm64/mm/fault.c @@ -864,17 +864,3 @@ asmlinkage int __exception do_debug_exception(unsigned long addr, return rv; } NOKPROBE_SYMBOL(do_debug_exception); - -#ifdef CONFIG_ARM64_PAN -void cpu_enable_pan(const struct arm64_cpu_capabilities *__unused) -{ - /* - * We modify PSTATE. This won't work from irq context as the PSTATE - * is discarded once we return from the exception. - */ - WARN_ON_ONCE(in_interrupt()); - - sysreg_clear_set(sctlr_el1, SCTLR_EL1_SPAN, 0); - asm(SET_PSTATE_PAN(1)); -} -#endif /* CONFIG_ARM64_PAN */ -- cgit v1.2.3-59-g8ed1b From 8a60419d36762a1131c2b29f7bd14371db4df1b5 Mon Sep 17 00:00:00 2001 From: Will Deacon Date: Tue, 14 Aug 2018 16:24:54 +0100 Subject: arm64: force_signal_inject: WARN if called from kernel context force_signal_inject() is designed to send a fatal signal to userspace, so WARN if the current pt_regs indicates a kernel context. This can currently happen for the undefined instruction trap, so patch that up so we always BUG() if we didn't have a handler. Signed-off-by: Will Deacon Signed-off-by: Catalin Marinas --- arch/arm64/kernel/traps.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) (limited to 'arch') diff --git a/arch/arm64/kernel/traps.c b/arch/arm64/kernel/traps.c index 148de417ed3e..539b470f9526 100644 --- a/arch/arm64/kernel/traps.c +++ b/arch/arm64/kernel/traps.c @@ -354,6 +354,9 @@ void force_signal_inject(int signal, int code, unsigned long address) const char *desc; struct pt_regs *regs = current_pt_regs(); + if (WARN_ON(!user_mode(regs))) + return; + clear_siginfo(&info); switch (signal) { @@ -408,8 +411,8 @@ asmlinkage void __exception do_undefinstr(struct pt_regs *regs) if (call_undef_hook(regs) == 0) return; - force_signal_inject(SIGILL, ILL_ILLOPC, regs->pc); BUG_ON(!user_mode(regs)); + force_signal_inject(SIGILL, ILL_ILLOPC, regs->pc); } #define __user_cache_maint(insn, address, res) \ -- cgit v1.2.3-59-g8ed1b From e4ba15debcfd27f60d43da940a58108783bff2a6 Mon Sep 17 00:00:00 2001 From: Hari Vyas Date: Tue, 7 Aug 2018 16:33:48 +0530 Subject: arm64: fix for bad_mode() handler to always result in panic The bad_mode() handler is called if we encounter an uunknown exception, with the expectation that the subsequent call to panic() will halt the system. Unfortunately, if the exception calling bad_mode() is taken from EL0, then the call to die() can end up killing the current user task and calling schedule() instead of falling through to panic(). Remove the die() call altogether, since we really want to bring down the machine in this "impossible" case. Signed-off-by: Hari Vyas Signed-off-by: Will Deacon Signed-off-by: Catalin Marinas --- arch/arm64/kernel/traps.c | 1 - 1 file changed, 1 deletion(-) (limited to 'arch') diff --git a/arch/arm64/kernel/traps.c b/arch/arm64/kernel/traps.c index 539b470f9526..ce29973b9bfe 100644 --- a/arch/arm64/kernel/traps.c +++ b/arch/arm64/kernel/traps.c @@ -606,7 +606,6 @@ asmlinkage void bad_mode(struct pt_regs *regs, int reason, unsigned int esr) handler[reason], smp_processor_id(), esr, esr_get_class_string(esr)); - die("Oops - bad mode", regs, 0); local_daif_mask(); panic("bad mode"); } -- cgit v1.2.3-59-g8ed1b From 74e248286e1d04b0d9bfdd002450ef0211f6f29f Mon Sep 17 00:00:00 2001 From: Suzuki K Poulose Date: Sun, 16 Sep 2018 23:17:23 +0100 Subject: arm64: sysreg: Clean up instructions for modifying PSTATE fields Instructions for modifying the PSTATE fields which were not supported in the older toolchains (e.g, PAN, UAO) are generated using macros. We have so far used the normal sys_reg() helper for defining the PSTATE fields. While this works fine, it is really difficult to correlate the code with the Arm ARM definition. As per Arm ARM, the PSTATE fields are defined only using Op1, Op2 fields, with fixed values for Op0, CRn. Also the CRm field has been reserved for the Immediate value for the instruction. So using the sys_reg() looks quite confusing. This patch cleans up the instruction helpers by bringing them in line with the Arm ARM definitions to make it easier to correlate code with the document. No functional changes. Cc: Will Deacon Cc: Mark Rutland Signed-off-by: Suzuki K Poulose Signed-off-by: Catalin Marinas --- arch/arm64/include/asm/sysreg.h | 30 ++++++++++++++++++++---------- arch/arm64/kernel/cpufeature.c | 6 +++--- 2 files changed, 23 insertions(+), 13 deletions(-) (limited to 'arch') diff --git a/arch/arm64/include/asm/sysreg.h b/arch/arm64/include/asm/sysreg.h index 3091ae5975a3..7e9ab1fa090c 100644 --- a/arch/arm64/include/asm/sysreg.h +++ b/arch/arm64/include/asm/sysreg.h @@ -84,16 +84,26 @@ #endif /* CONFIG_BROKEN_GAS_INST */ -#define REG_PSTATE_PAN_IMM sys_reg(0, 0, 4, 0, 4) -#define REG_PSTATE_UAO_IMM sys_reg(0, 0, 4, 0, 3) -#define REG_PSTATE_SSBS_IMM sys_reg(0, 3, 4, 0, 1) - -#define SET_PSTATE_PAN(x) __emit_inst(0xd5000000 | REG_PSTATE_PAN_IMM | \ - (!!x)<<8 | 0x1f) -#define SET_PSTATE_UAO(x) __emit_inst(0xd5000000 | REG_PSTATE_UAO_IMM | \ - (!!x)<<8 | 0x1f) -#define SET_PSTATE_SSBS(x) __emit_inst(0xd5000000 | REG_PSTATE_SSBS_IMM | \ - (!!x)<<8 | 0x1f) +/* + * Instructions for modifying PSTATE fields. + * As per Arm ARM for v8-A, Section "C.5.1.3 op0 == 0b00, architectural hints, + * barriers and CLREX, and PSTATE access", ARM DDI 0487 C.a, system instructions + * for accessing PSTATE fields have the following encoding: + * Op0 = 0, CRn = 4 + * Op1, Op2 encodes the PSTATE field modified and defines the constraints. + * CRm = Imm4 for the instruction. + * Rt = 0x1f + */ +#define pstate_field(op1, op2) ((op1) << Op1_shift | (op2) << Op2_shift) +#define PSTATE_Imm_shift CRm_shift + +#define PSTATE_PAN pstate_field(0, 4) +#define PSTATE_UAO pstate_field(0, 3) +#define PSTATE_SSBS pstate_field(3, 1) + +#define SET_PSTATE_PAN(x) __emit_inst(0xd500401f | PSTATE_PAN | ((!!x) << PSTATE_Imm_shift)) +#define SET_PSTATE_UAO(x) __emit_inst(0xd500401f | PSTATE_UAO | ((!!x) << PSTATE_Imm_shift)) +#define SET_PSTATE_SSBS(x) __emit_inst(0xd500401f | PSTATE_SSBS | ((!!x) << PSTATE_Imm_shift)) #define SYS_DC_ISW sys_insn(1, 0, 7, 6, 2) #define SYS_DC_CSW sys_insn(1, 0, 7, 10, 2) diff --git a/arch/arm64/kernel/cpufeature.c b/arch/arm64/kernel/cpufeature.c index 35796ca1db50..f15e2fb97011 100644 --- a/arch/arm64/kernel/cpufeature.c +++ b/arch/arm64/kernel/cpufeature.c @@ -1045,7 +1045,7 @@ static int ssbs_emulation_handler(struct pt_regs *regs, u32 instr) if (user_mode(regs)) return 1; - if (instr & BIT(CRm_shift)) + if (instr & BIT(PSTATE_Imm_shift)) regs->pstate |= PSR_SSBS_BIT; else regs->pstate &= ~PSR_SSBS_BIT; @@ -1055,8 +1055,8 @@ static int ssbs_emulation_handler(struct pt_regs *regs, u32 instr) } static struct undef_hook ssbs_emulation_hook = { - .instr_mask = ~(1U << CRm_shift), - .instr_val = 0xd500001f | REG_PSTATE_SSBS_IMM, + .instr_mask = ~(1U << PSTATE_Imm_shift), + .instr_val = 0xd500401f | PSTATE_SSBS, .fn = ssbs_emulation_handler, }; -- cgit v1.2.3-59-g8ed1b From 5ffdfaedfa0aba3f5db0fbb8ed4f3192be2b39b8 Mon Sep 17 00:00:00 2001 From: Vladimir Murzin Date: Tue, 31 Jul 2018 14:08:56 +0100 Subject: arm64: mm: Support Common Not Private translations Common Not Private (CNP) is a feature of ARMv8.2 extension which allows translation table entries to be shared between different PEs in the same inner shareable domain, so the hardware can use this fact to optimise the caching of such entries in the TLB. CNP occupies one bit in TTBRx_ELy and VTTBR_EL2, which advertises to the hardware that the translation table entries pointed to by this TTBR are the same as every PE in the same inner shareable domain for which the equivalent TTBR also has CNP bit set. In case CNP bit is set but TTBR does not point at the same translation table entries for a given ASID and VMID, then the system is mis-configured, so the results of translations are UNPREDICTABLE. For kernel we postpone setting CNP till all cpus are up and rely on cpufeature framework to 1) patch the code which is sensitive to CNP and 2) update TTBR1_EL1 with CNP bit set. TTBR1_EL1 can be reprogrammed as result of hibernation or cpuidle (via __enable_mmu). For these two cases we restore CnP bit via __cpu_suspend_exit(). There are a few cases we need to care of changes in TTBR0_EL1: - a switch to idmap - software emulated PAN we rule out latter via Kconfig options and for the former we make sure that CNP is set for non-zero ASIDs only. Reviewed-by: James Morse Reviewed-by: Suzuki K Poulose Reviewed-by: Catalin Marinas Signed-off-by: Vladimir Murzin [catalin.marinas@arm.com: default y for CONFIG_ARM64_CNP] Signed-off-by: Catalin Marinas --- arch/arm64/Kconfig | 14 ++++++++++++++ arch/arm64/include/asm/cpucaps.h | 3 ++- arch/arm64/include/asm/cpufeature.h | 6 ++++++ arch/arm64/include/asm/mmu_context.h | 17 +++++++++++++++-- arch/arm64/include/asm/pgtable-hwdef.h | 2 ++ arch/arm64/kernel/cpufeature.c | 34 ++++++++++++++++++++++++++++++++++ arch/arm64/kernel/suspend.c | 4 ++++ arch/arm64/mm/context.c | 3 +++ arch/arm64/mm/proc.S | 11 ++++++++--- 9 files changed, 88 insertions(+), 6 deletions(-) (limited to 'arch') diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig index 58eb02796b16..fabac617d605 100644 --- a/arch/arm64/Kconfig +++ b/arch/arm64/Kconfig @@ -1134,6 +1134,20 @@ config ARM64_RAS_EXTN and access the new registers if the system supports the extension. Platform RAS features may additionally depend on firmware support. +config ARM64_CNP + bool "Enable support for Common Not Private (CNP) translations" + default y + depends on ARM64_PAN || !ARM64_SW_TTBR0_PAN + help + Common Not Private (CNP) allows translation table entries to + be shared between different PEs in the same inner shareable + domain, so the hardware can use this fact to optimise the + caching of such entries in the TLB. + + Selecting this option allows the CNP feature to be detected + at runtime, and does not affect PEs that do not implement + this feature. + endmenu config ARM64_SVE diff --git a/arch/arm64/include/asm/cpucaps.h b/arch/arm64/include/asm/cpucaps.h index 38eec9cf30f2..c51d7e868f3f 100644 --- a/arch/arm64/include/asm/cpucaps.h +++ b/arch/arm64/include/asm/cpucaps.h @@ -53,7 +53,8 @@ #define ARM64_HAS_STAGE2_FWB 32 #define ARM64_HAS_CRC32 33 #define ARM64_SSBS 34 +#define ARM64_HAS_CNP 35 -#define ARM64_NCAPS 35 +#define ARM64_NCAPS 36 #endif /* __ASM_CPUCAPS_H */ diff --git a/arch/arm64/include/asm/cpufeature.h b/arch/arm64/include/asm/cpufeature.h index 9079715794af..bb9fbf6f910a 100644 --- a/arch/arm64/include/asm/cpufeature.h +++ b/arch/arm64/include/asm/cpufeature.h @@ -508,6 +508,12 @@ static inline bool system_supports_sve(void) cpus_have_const_cap(ARM64_SVE); } +static inline bool system_supports_cnp(void) +{ + return IS_ENABLED(CONFIG_ARM64_CNP) && + cpus_have_const_cap(ARM64_HAS_CNP); +} + #define ARM64_SSBD_UNKNOWN -1 #define ARM64_SSBD_FORCE_DISABLE 0 #define ARM64_SSBD_KERNEL 1 diff --git a/arch/arm64/include/asm/mmu_context.h b/arch/arm64/include/asm/mmu_context.h index 39ec0b8a689e..1e58bf58c22b 100644 --- a/arch/arm64/include/asm/mmu_context.h +++ b/arch/arm64/include/asm/mmu_context.h @@ -147,12 +147,25 @@ static inline void cpu_replace_ttbr1(pgd_t *pgdp) extern ttbr_replace_func idmap_cpu_replace_ttbr1; ttbr_replace_func *replace_phys; - phys_addr_t pgd_phys = virt_to_phys(pgdp); + /* phys_to_ttbr() zeros lower 2 bits of ttbr with 52-bit PA */ + phys_addr_t ttbr1 = phys_to_ttbr(virt_to_phys(pgdp)); + + if (system_supports_cnp() && !WARN_ON(pgdp != lm_alias(swapper_pg_dir))) { + /* + * cpu_replace_ttbr1() is used when there's a boot CPU + * up (i.e. cpufeature framework is not up yet) and + * latter only when we enable CNP via cpufeature's + * enable() callback. + * Also we rely on the cpu_hwcap bit being set before + * calling the enable() function. + */ + ttbr1 |= TTBR_CNP_BIT; + } replace_phys = (void *)__pa_symbol(idmap_cpu_replace_ttbr1); cpu_install_idmap(); - replace_phys(pgd_phys); + replace_phys(ttbr1); cpu_uninstall_idmap(); } diff --git a/arch/arm64/include/asm/pgtable-hwdef.h b/arch/arm64/include/asm/pgtable-hwdef.h index fd208eac9f2a..1d7d8da2ef9b 100644 --- a/arch/arm64/include/asm/pgtable-hwdef.h +++ b/arch/arm64/include/asm/pgtable-hwdef.h @@ -211,6 +211,8 @@ #define PHYS_MASK_SHIFT (CONFIG_ARM64_PA_BITS) #define PHYS_MASK ((UL(1) << PHYS_MASK_SHIFT) - 1) +#define TTBR_CNP_BIT (UL(1) << 0) + /* * TCR flags. */ diff --git a/arch/arm64/kernel/cpufeature.c b/arch/arm64/kernel/cpufeature.c index f15e2fb97011..237f8822a391 100644 --- a/arch/arm64/kernel/cpufeature.c +++ b/arch/arm64/kernel/cpufeature.c @@ -20,6 +20,7 @@ #include #include +#include #include #include #include @@ -117,6 +118,7 @@ EXPORT_SYMBOL(cpu_hwcap_keys); static bool __maybe_unused cpufeature_pan_not_uao(const struct arm64_cpu_capabilities *entry, int __unused); +static void cpu_enable_cnp(struct arm64_cpu_capabilities const *cap); /* * NOTE: Any changes to the visibility of features should be kept in @@ -863,6 +865,20 @@ static bool has_cache_dic(const struct arm64_cpu_capabilities *entry, return read_sanitised_ftr_reg(SYS_CTR_EL0) & BIT(CTR_DIC_SHIFT); } +static bool __maybe_unused +has_useable_cnp(const struct arm64_cpu_capabilities *entry, int scope) +{ + /* + * Kdump isn't guaranteed to power-off all secondary CPUs, CNP + * may share TLB entries with a CPU stuck in the crashed + * kernel. + */ + if (is_kdump_kernel()) + return false; + + return has_cpuid_feature(entry, scope); +} + #ifdef CONFIG_UNMAP_KERNEL_AT_EL0 static int __kpti_forced; /* 0: not forced, >0: forced on, <0: forced off */ @@ -1311,6 +1327,19 @@ static const struct arm64_cpu_capabilities arm64_features[] = { .min_field_value = ID_AA64PFR1_SSBS_PSTATE_ONLY, .cpu_enable = cpu_enable_ssbs, }, +#endif +#ifdef CONFIG_ARM64_CNP + { + .desc = "Common not Private translations", + .capability = ARM64_HAS_CNP, + .type = ARM64_CPUCAP_SYSTEM_FEATURE, + .matches = has_useable_cnp, + .sys_reg = SYS_ID_AA64MMFR2_EL1, + .sign = FTR_UNSIGNED, + .field_pos = ID_AA64MMFR2_CNP_SHIFT, + .min_field_value = 1, + .cpu_enable = cpu_enable_cnp, + }, #endif {}, }; @@ -1749,6 +1778,11 @@ cpufeature_pan_not_uao(const struct arm64_cpu_capabilities *entry, int __unused) return (cpus_have_const_cap(ARM64_HAS_PAN) && !cpus_have_const_cap(ARM64_HAS_UAO)); } +static void __maybe_unused cpu_enable_cnp(struct arm64_cpu_capabilities const *cap) +{ + cpu_replace_ttbr1(lm_alias(swapper_pg_dir)); +} + /* * We emulate only the following system register space. * Op0 = 0x3, CRn = 0x0, Op1 = 0x0, CRm = [0, 4 - 7] diff --git a/arch/arm64/kernel/suspend.c b/arch/arm64/kernel/suspend.c index 70c283368b64..9405d1b7f4b0 100644 --- a/arch/arm64/kernel/suspend.c +++ b/arch/arm64/kernel/suspend.c @@ -48,6 +48,10 @@ void notrace __cpu_suspend_exit(void) */ cpu_uninstall_idmap(); + /* Restore CnP bit in TTBR1_EL1 */ + if (system_supports_cnp()) + cpu_replace_ttbr1(lm_alias(swapper_pg_dir)); + /* * PSTATE was not saved over suspend/resume, re-enable any detected * features that might not have been set correctly. diff --git a/arch/arm64/mm/context.c b/arch/arm64/mm/context.c index c127f94da8e2..a65af49e12e7 100644 --- a/arch/arm64/mm/context.c +++ b/arch/arm64/mm/context.c @@ -196,6 +196,9 @@ void check_and_switch_context(struct mm_struct *mm, unsigned int cpu) unsigned long flags; u64 asid, old_active_asid; + if (system_supports_cnp()) + cpu_set_reserved_ttbr0(); + asid = atomic64_read(&mm->context.id); /* diff --git a/arch/arm64/mm/proc.S b/arch/arm64/mm/proc.S index 03646e6a2ef4..2c75b0b903ae 100644 --- a/arch/arm64/mm/proc.S +++ b/arch/arm64/mm/proc.S @@ -160,6 +160,12 @@ ENTRY(cpu_do_switch_mm) mrs x2, ttbr1_el1 mmid x1, x1 // get mm->context.id phys_to_ttbr x3, x0 + +alternative_if ARM64_HAS_CNP + cbz x1, 1f // skip CNP for reserved ASID + orr x3, x3, #TTBR_CNP_BIT +1: +alternative_else_nop_endif #ifdef CONFIG_ARM64_SW_TTBR0_PAN bfi x3, x1, #48, #16 // set the ASID field in TTBR0 #endif @@ -184,7 +190,7 @@ ENDPROC(cpu_do_switch_mm) .endm /* - * void idmap_cpu_replace_ttbr1(phys_addr_t new_pgd) + * void idmap_cpu_replace_ttbr1(phys_addr_t ttbr1) * * This is the low-level counterpart to cpu_replace_ttbr1, and should not be * called by anything else. It can only be executed from a TTBR0 mapping. @@ -194,8 +200,7 @@ ENTRY(idmap_cpu_replace_ttbr1) __idmap_cpu_set_reserved_ttbr1 x1, x3 - phys_to_ttbr x3, x0 - msr ttbr1_el1, x3 + msr ttbr1_el1, x0 isb restore_daif x2 -- cgit v1.2.3-59-g8ed1b From ab510027dc4dbd1eeb611a34b0cda8b21fcde492 Mon Sep 17 00:00:00 2001 From: Vladimir Murzin Date: Tue, 31 Jul 2018 14:08:57 +0100 Subject: arm64: KVM: Enable Common Not Private translations We rely on cpufeature framework to detect and enable CNP so for KVM we need to patch hyp to set CNP bit just before TTBR0_EL2 gets written. For the guest we encode CNP bit while building vttbr, so we don't need to bother with that in a world switch. Reviewed-by: James Morse Acked-by: Catalin Marinas Acked-by: Marc Zyngier Signed-off-by: Vladimir Murzin Signed-off-by: Catalin Marinas --- arch/arm/include/asm/kvm_arm.h | 1 + arch/arm/include/asm/kvm_mmu.h | 5 +++++ arch/arm64/include/asm/kvm_arm.h | 1 + arch/arm64/include/asm/kvm_mmu.h | 5 +++++ arch/arm64/kvm/hyp-init.S | 3 +++ virt/kvm/arm/arm.c | 4 ++-- 6 files changed, 17 insertions(+), 2 deletions(-) (limited to 'arch') diff --git a/arch/arm/include/asm/kvm_arm.h b/arch/arm/include/asm/kvm_arm.h index 3ab8b3781bfe..2d43dca29c72 100644 --- a/arch/arm/include/asm/kvm_arm.h +++ b/arch/arm/include/asm/kvm_arm.h @@ -161,6 +161,7 @@ #else #define VTTBR_X (5 - KVM_T0SZ) #endif +#define VTTBR_CNP_BIT _AC(1, UL) #define VTTBR_BADDR_MASK (((_AC(1, ULL) << (40 - VTTBR_X)) - 1) << VTTBR_X) #define VTTBR_VMID_SHIFT _AC(48, ULL) #define VTTBR_VMID_MASK(size) (_AT(u64, (1 << size) - 1) << VTTBR_VMID_SHIFT) diff --git a/arch/arm/include/asm/kvm_mmu.h b/arch/arm/include/asm/kvm_mmu.h index 265ea9cf7df7..847f01fa429d 100644 --- a/arch/arm/include/asm/kvm_mmu.h +++ b/arch/arm/include/asm/kvm_mmu.h @@ -355,6 +355,11 @@ static inline int hyp_map_aux_data(void) #define kvm_phys_to_vttbr(addr) (addr) +static inline bool kvm_cpu_has_cnp(void) +{ + return false; +} + #endif /* !__ASSEMBLY__ */ #endif /* __ARM_KVM_MMU_H__ */ diff --git a/arch/arm64/include/asm/kvm_arm.h b/arch/arm64/include/asm/kvm_arm.h index aa45df752a16..b476bc46f0ab 100644 --- a/arch/arm64/include/asm/kvm_arm.h +++ b/arch/arm64/include/asm/kvm_arm.h @@ -175,6 +175,7 @@ #define VTCR_EL2_FLAGS (VTCR_EL2_COMMON_BITS | VTCR_EL2_TGRAN_FLAGS) #define VTTBR_X (VTTBR_X_TGRAN_MAGIC - VTCR_EL2_T0SZ_IPA) +#define VTTBR_CNP_BIT (UL(1)) #define VTTBR_BADDR_MASK (((UL(1) << (PHYS_MASK_SHIFT - VTTBR_X)) - 1) << VTTBR_X) #define VTTBR_VMID_SHIFT (UL(48)) #define VTTBR_VMID_MASK(size) (_AT(u64, (1 << size) - 1) << VTTBR_VMID_SHIFT) diff --git a/arch/arm64/include/asm/kvm_mmu.h b/arch/arm64/include/asm/kvm_mmu.h index d6fff7de5539..64337afbf124 100644 --- a/arch/arm64/include/asm/kvm_mmu.h +++ b/arch/arm64/include/asm/kvm_mmu.h @@ -517,5 +517,10 @@ static inline int hyp_map_aux_data(void) #define kvm_phys_to_vttbr(addr) phys_to_ttbr(addr) +static inline bool kvm_cpu_has_cnp(void) +{ + return system_supports_cnp(); +} + #endif /* __ASSEMBLY__ */ #endif /* __ARM64_KVM_MMU_H__ */ diff --git a/arch/arm64/kvm/hyp-init.S b/arch/arm64/kvm/hyp-init.S index ea9225160786..4576b86a5579 100644 --- a/arch/arm64/kvm/hyp-init.S +++ b/arch/arm64/kvm/hyp-init.S @@ -65,6 +65,9 @@ __do_hyp_init: b.lo __kvm_handle_stub_hvc phys_to_ttbr x4, x0 +alternative_if ARM64_HAS_CNP + orr x4, x4, #TTBR_CNP_BIT +alternative_else_nop_endif msr ttbr0_el2, x4 mrs x4, tcr_el1 diff --git a/virt/kvm/arm/arm.c b/virt/kvm/arm/arm.c index c92053bc3f96..150c8a69cdaf 100644 --- a/virt/kvm/arm/arm.c +++ b/virt/kvm/arm/arm.c @@ -496,7 +496,7 @@ static bool need_new_vmid_gen(struct kvm *kvm) static void update_vttbr(struct kvm *kvm) { phys_addr_t pgd_phys; - u64 vmid; + u64 vmid, cnp = kvm_cpu_has_cnp() ? VTTBR_CNP_BIT : 0; bool new_gen; read_lock(&kvm_vmid_lock); @@ -546,7 +546,7 @@ static void update_vttbr(struct kvm *kvm) pgd_phys = virt_to_phys(kvm->arch.pgd); BUG_ON(pgd_phys & ~VTTBR_BADDR_MASK); vmid = ((u64)(kvm->arch.vmid) << VTTBR_VMID_SHIFT) & VTTBR_VMID_MASK(kvm_vmid_bits); - kvm->arch.vttbr = kvm_phys_to_vttbr(pgd_phys) | vmid; + kvm->arch.vttbr = kvm_phys_to_vttbr(pgd_phys) | vmid | cnp; write_unlock(&kvm_vmid_lock); } -- cgit v1.2.3-59-g8ed1b From 880f7cc47265e7b195781dfa9a0cd62ef78304e3 Mon Sep 17 00:00:00 2001 From: Will Deacon Date: Wed, 19 Sep 2018 11:41:21 +0100 Subject: arm64: cpu_errata: Remove ARM64_MISMATCHED_CACHE_LINE_SIZE There's no need to treat mismatched cache-line sizes reported by CTR_EL0 differently to any other mismatched fields that we treat as "STRICT" in the cpufeature code. In both cases we need to trap and emulate EL0 accesses to the register, so drop ARM64_MISMATCHED_CACHE_LINE_SIZE and rely on ARM64_MISMATCHED_CACHE_TYPE instead. Reviewed-by: Suzuki K Poulose Signed-off-by: Will Deacon [catalin.marinas@arm.com: move ARM64_HAS_CNP in the empty cpucaps.h slot] Signed-off-by: Catalin Marinas --- arch/arm64/include/asm/assembler.h | 7 +++---- arch/arm64/include/asm/cpucaps.h | 5 ++--- arch/arm64/kernel/cpu_errata.c | 15 ++------------- 3 files changed, 7 insertions(+), 20 deletions(-) (limited to 'arch') diff --git a/arch/arm64/include/asm/assembler.h b/arch/arm64/include/asm/assembler.h index 0bcc98dbba56..6142402c2eb4 100644 --- a/arch/arm64/include/asm/assembler.h +++ b/arch/arm64/include/asm/assembler.h @@ -286,12 +286,11 @@ alternative_endif ldr \rd, [\rn, #MM_CONTEXT_ID] .endm /* - * read_ctr - read CTR_EL0. If the system has mismatched - * cache line sizes, provide the system wide safe value - * from arm64_ftr_reg_ctrel0.sys_val + * read_ctr - read CTR_EL0. If the system has mismatched register fields, + * provide the system wide safe value from arm64_ftr_reg_ctrel0.sys_val */ .macro read_ctr, reg -alternative_if_not ARM64_MISMATCHED_CACHE_LINE_SIZE +alternative_if_not ARM64_MISMATCHED_CACHE_TYPE mrs \reg, ctr_el0 // read CTR nop alternative_else diff --git a/arch/arm64/include/asm/cpucaps.h b/arch/arm64/include/asm/cpucaps.h index c51d7e868f3f..6eb1b3fd0493 100644 --- a/arch/arm64/include/asm/cpucaps.h +++ b/arch/arm64/include/asm/cpucaps.h @@ -33,7 +33,7 @@ #define ARM64_WORKAROUND_CAVIUM_27456 12 #define ARM64_HAS_32BIT_EL0 13 #define ARM64_HARDEN_EL2_VECTORS 14 -#define ARM64_MISMATCHED_CACHE_LINE_SIZE 15 +#define ARM64_HAS_CNP 15 #define ARM64_HAS_NO_FPSIMD 16 #define ARM64_WORKAROUND_REPEAT_TLBI 17 #define ARM64_WORKAROUND_QCOM_FALKOR_E1003 18 @@ -53,8 +53,7 @@ #define ARM64_HAS_STAGE2_FWB 32 #define ARM64_HAS_CRC32 33 #define ARM64_SSBS 34 -#define ARM64_HAS_CNP 35 -#define ARM64_NCAPS 36 +#define ARM64_NCAPS 35 #endif /* __ASM_CPUCAPS_H */ diff --git a/arch/arm64/kernel/cpu_errata.c b/arch/arm64/kernel/cpu_errata.c index 8900cb0615f8..20be4c578e0a 100644 --- a/arch/arm64/kernel/cpu_errata.c +++ b/arch/arm64/kernel/cpu_errata.c @@ -68,11 +68,7 @@ static bool has_mismatched_cache_type(const struct arm64_cpu_capabilities *entry, int scope) { - u64 mask = CTR_CACHE_MINLINE_MASK; - - /* Skip matching the min line sizes for cache type check */ - if (entry->capability == ARM64_MISMATCHED_CACHE_TYPE) - mask ^= arm64_ftr_reg_ctrel0.strict_mask; + u64 mask = arm64_ftr_reg_ctrel0.strict_mask;; WARN_ON(scope != SCOPE_LOCAL_CPU || preemptible()); return (read_cpuid_cachetype() & mask) != @@ -644,14 +640,7 @@ const struct arm64_cpu_capabilities arm64_errata[] = { }, #endif { - .desc = "Mismatched cache line size", - .capability = ARM64_MISMATCHED_CACHE_LINE_SIZE, - .matches = has_mismatched_cache_type, - .type = ARM64_CPUCAP_LOCAL_CPU_ERRATUM, - .cpu_enable = cpu_enable_trap_ctr_access, - }, - { - .desc = "Mismatched cache type", + .desc = "Mismatched cache type (CTR_EL0)", .capability = ARM64_MISMATCHED_CACHE_TYPE, .matches = has_mismatched_cache_type, .type = ARM64_CPUCAP_LOCAL_CPU_ERRATUM, -- cgit v1.2.3-59-g8ed1b From 1c8391412d7794e0b38393ed98fef9a974401f05 Mon Sep 17 00:00:00 2001 From: Anshuman Khandual Date: Thu, 20 Sep 2018 09:36:19 +0530 Subject: arm64/cpufeatures: Introduce ESR_ELx_SYS64_ISS_RT() Extracting target register from ESR.ISS encoding has already been required at multiple instances. Just make it a macro definition and replace all the existing use cases. Reviewed-by: Suzuki K Poulose Acked-by: Mark Rutland Signed-off-by: Anshuman Khandual Signed-off-by: Catalin Marinas --- arch/arm64/include/asm/esr.h | 2 ++ arch/arm64/include/asm/kvm_emulate.h | 2 +- arch/arm64/kernel/traps.c | 8 ++++---- 3 files changed, 7 insertions(+), 5 deletions(-) (limited to 'arch') diff --git a/arch/arm64/include/asm/esr.h b/arch/arm64/include/asm/esr.h index ce70c3ffb993..cc2d9e7bafe6 100644 --- a/arch/arm64/include/asm/esr.h +++ b/arch/arm64/include/asm/esr.h @@ -187,6 +187,8 @@ #define ESR_ELx_SYS64_ISS_SYS_OP_MASK (ESR_ELx_SYS64_ISS_SYS_MASK | \ ESR_ELx_SYS64_ISS_DIR_MASK) +#define ESR_ELx_SYS64_ISS_RT(esr) \ + (((esr) & ESR_ELx_SYS64_ISS_RT_MASK) >> ESR_ELx_SYS64_ISS_RT_SHIFT) /* * User space cache operations have the following sysreg encoding * in System instructions. diff --git a/arch/arm64/include/asm/kvm_emulate.h b/arch/arm64/include/asm/kvm_emulate.h index 6106a85ae0be..21247870def7 100644 --- a/arch/arm64/include/asm/kvm_emulate.h +++ b/arch/arm64/include/asm/kvm_emulate.h @@ -335,7 +335,7 @@ static inline bool kvm_vcpu_dabt_isextabt(const struct kvm_vcpu *vcpu) static inline int kvm_vcpu_sys_get_rt(struct kvm_vcpu *vcpu) { u32 esr = kvm_vcpu_get_hsr(vcpu); - return (esr & ESR_ELx_SYS64_ISS_RT_MASK) >> ESR_ELx_SYS64_ISS_RT_SHIFT; + return ESR_ELx_SYS64_ISS_RT(esr); } static inline unsigned long kvm_vcpu_get_mpidr_aff(struct kvm_vcpu *vcpu) diff --git a/arch/arm64/kernel/traps.c b/arch/arm64/kernel/traps.c index ce29973b9bfe..abfb304e1025 100644 --- a/arch/arm64/kernel/traps.c +++ b/arch/arm64/kernel/traps.c @@ -438,7 +438,7 @@ asmlinkage void __exception do_undefinstr(struct pt_regs *regs) static void user_cache_maint_handler(unsigned int esr, struct pt_regs *regs) { unsigned long address; - int rt = (esr & ESR_ELx_SYS64_ISS_RT_MASK) >> ESR_ELx_SYS64_ISS_RT_SHIFT; + int rt = ESR_ELx_SYS64_ISS_RT(esr); int crm = (esr & ESR_ELx_SYS64_ISS_CRM_MASK) >> ESR_ELx_SYS64_ISS_CRM_SHIFT; int ret = 0; @@ -473,7 +473,7 @@ static void user_cache_maint_handler(unsigned int esr, struct pt_regs *regs) static void ctr_read_handler(unsigned int esr, struct pt_regs *regs) { - int rt = (esr & ESR_ELx_SYS64_ISS_RT_MASK) >> ESR_ELx_SYS64_ISS_RT_SHIFT; + int rt = ESR_ELx_SYS64_ISS_RT(esr); unsigned long val = arm64_ftr_reg_user_value(&arm64_ftr_reg_ctrel0); pt_regs_write_reg(regs, rt, val); @@ -483,7 +483,7 @@ static void ctr_read_handler(unsigned int esr, struct pt_regs *regs) static void cntvct_read_handler(unsigned int esr, struct pt_regs *regs) { - int rt = (esr & ESR_ELx_SYS64_ISS_RT_MASK) >> ESR_ELx_SYS64_ISS_RT_SHIFT; + int rt = ESR_ELx_SYS64_ISS_RT(esr); pt_regs_write_reg(regs, rt, arch_counter_get_cntvct()); arm64_skip_faulting_instruction(regs, AARCH64_INSN_SIZE); @@ -491,7 +491,7 @@ static void cntvct_read_handler(unsigned int esr, struct pt_regs *regs) static void cntfrq_read_handler(unsigned int esr, struct pt_regs *regs) { - int rt = (esr & ESR_ELx_SYS64_ISS_RT_MASK) >> ESR_ELx_SYS64_ISS_RT_SHIFT; + int rt = ESR_ELx_SYS64_ISS_RT(esr); pt_regs_write_reg(regs, rt, arch_timer_get_rate()); arm64_skip_faulting_instruction(regs, AARCH64_INSN_SIZE); -- cgit v1.2.3-59-g8ed1b From 520ad98871a072471d2583a9386b9d7243fa584d Mon Sep 17 00:00:00 2001 From: Anshuman Khandual Date: Thu, 20 Sep 2018 09:36:20 +0530 Subject: arm64/cpufeatures: Factorize emulate_mrs() MRS emulation gets triggered with exception class (0x00 or 0x18) eventually calling the function emulate_mrs() which fetches the user space instruction and analyses it's encodings (OP0, OP1, OP2, CRN, CRM, RT). The kernel tries to emulate the given instruction looking into the encoding details. Going forward these encodings can also be parsed from ESR_ELx.ISS fields without requiring to fetch/decode faulting userspace instruction which can improve performance. This factorizes emulate_mrs() function in a way that it can be called directly with MRS encodings (OP0, OP1, OP2, CRN, CRM) for any given target register which can then be used directly from 0x18 exception class. Reviewed-by: Suzuki K Poulose Acked-by: Mark Rutland Signed-off-by: Anshuman Khandual Signed-off-by: Catalin Marinas --- arch/arm64/include/asm/cpufeature.h | 1 + arch/arm64/kernel/cpufeature.c | 25 +++++++++++++++---------- 2 files changed, 16 insertions(+), 10 deletions(-) (limited to 'arch') diff --git a/arch/arm64/include/asm/cpufeature.h b/arch/arm64/include/asm/cpufeature.h index bb9fbf6f910a..6db48d90ad63 100644 --- a/arch/arm64/include/asm/cpufeature.h +++ b/arch/arm64/include/asm/cpufeature.h @@ -536,6 +536,7 @@ void arm64_set_ssbd_mitigation(bool state); static inline void arm64_set_ssbd_mitigation(bool state) {} #endif +extern int do_emulate_mrs(struct pt_regs *regs, u32 sys_reg, u32 rt); #endif /* __ASSEMBLY__ */ #endif diff --git a/arch/arm64/kernel/cpufeature.c b/arch/arm64/kernel/cpufeature.c index 237f8822a391..00e7c313f088 100644 --- a/arch/arm64/kernel/cpufeature.c +++ b/arch/arm64/kernel/cpufeature.c @@ -1844,27 +1844,32 @@ static int emulate_sys_reg(u32 id, u64 *valp) return 0; } -static int emulate_mrs(struct pt_regs *regs, u32 insn) +int do_emulate_mrs(struct pt_regs *regs, u32 sys_reg, u32 rt) { int rc; - u32 sys_reg, dst; u64 val; - /* - * sys_reg values are defined as used in mrs/msr instruction. - * shift the imm value to get the encoding. - */ - sys_reg = (u32)aarch64_insn_decode_immediate(AARCH64_INSN_IMM_16, insn) << 5; rc = emulate_sys_reg(sys_reg, &val); if (!rc) { - dst = aarch64_insn_decode_register(AARCH64_INSN_REGTYPE_RT, insn); - pt_regs_write_reg(regs, dst, val); + pt_regs_write_reg(regs, rt, val); arm64_skip_faulting_instruction(regs, AARCH64_INSN_SIZE); } - return rc; } +static int emulate_mrs(struct pt_regs *regs, u32 insn) +{ + u32 sys_reg, rt; + + /* + * sys_reg values are defined as used in mrs/msr instruction. + * shift the imm value to get the encoding. + */ + sys_reg = (u32)aarch64_insn_decode_immediate(AARCH64_INSN_IMM_16, insn) << 5; + rt = aarch64_insn_decode_register(AARCH64_INSN_REGTYPE_RT, insn); + return do_emulate_mrs(regs, sys_reg, rt); +} + static struct undef_hook mrs_hook = { .instr_mask = 0xfff00000, .instr_val = 0xd5300000, -- cgit v1.2.3-59-g8ed1b From 21f84796177443695680180a8493a9e20d254d5e Mon Sep 17 00:00:00 2001 From: Anshuman Khandual Date: Thu, 20 Sep 2018 09:36:21 +0530 Subject: arm64/cpufeatures: Emulate MRS instructions by parsing ESR_ELx.ISS Armv8.4-A extension enables MRS instruction encodings inside ESR_ELx.ISS during exception class ESR_ELx_EC_SYS64 (0x18). This encoding can be used to emulate MRS instructions which can avoid fetch/decode from user space thus improving performance. This adds a new sys64_hook structure element with applicable ESR mask/value pair for MRS instructions on various system registers but constrained by sysreg encodings which is currently allowed to be emulated. Reviewed-by: Suzuki K Poulose Acked-by: Mark Rutland Signed-off-by: Anshuman Khandual Signed-off-by: Catalin Marinas --- arch/arm64/include/asm/esr.h | 12 ++++++++++++ arch/arm64/kernel/traps.c | 17 +++++++++++++++++ 2 files changed, 29 insertions(+) (limited to 'arch') diff --git a/arch/arm64/include/asm/esr.h b/arch/arm64/include/asm/esr.h index cc2d9e7bafe6..37e84f277ee2 100644 --- a/arch/arm64/include/asm/esr.h +++ b/arch/arm64/include/asm/esr.h @@ -208,6 +208,18 @@ #define ESR_ELx_SYS64_ISS_EL0_CACHE_OP_VAL \ (ESR_ELx_SYS64_ISS_SYS_VAL(1, 3, 1, 7, 0) | \ ESR_ELx_SYS64_ISS_DIR_WRITE) +/* + * User space MRS operations which are supported for emulation + * have the following sysreg encoding in System instructions. + * op0 = 3, op1= 0, crn = 0, {crm = 0, 4-7}, READ (L = 1) + */ +#define ESR_ELx_SYS64_ISS_SYS_MRS_OP_MASK (ESR_ELx_SYS64_ISS_OP0_MASK | \ + ESR_ELx_SYS64_ISS_OP1_MASK | \ + ESR_ELx_SYS64_ISS_CRN_MASK | \ + ESR_ELx_SYS64_ISS_DIR_MASK) +#define ESR_ELx_SYS64_ISS_SYS_MRS_OP_VAL \ + (ESR_ELx_SYS64_ISS_SYS_VAL(3, 0, 0, 0, 0) | \ + ESR_ELx_SYS64_ISS_DIR_READ) #define ESR_ELx_SYS64_ISS_SYS_CTR ESR_ELx_SYS64_ISS_SYS_VAL(3, 3, 1, 0, 0) #define ESR_ELx_SYS64_ISS_SYS_CTR_READ (ESR_ELx_SYS64_ISS_SYS_CTR | \ diff --git a/arch/arm64/kernel/traps.c b/arch/arm64/kernel/traps.c index abfb304e1025..21689c6a985f 100644 --- a/arch/arm64/kernel/traps.c +++ b/arch/arm64/kernel/traps.c @@ -497,6 +497,17 @@ static void cntfrq_read_handler(unsigned int esr, struct pt_regs *regs) arm64_skip_faulting_instruction(regs, AARCH64_INSN_SIZE); } +static void mrs_handler(unsigned int esr, struct pt_regs *regs) +{ + u32 sysreg, rt; + + rt = ESR_ELx_SYS64_ISS_RT(esr); + sysreg = esr_sys64_to_sysreg(esr); + + if (do_emulate_mrs(regs, sysreg, rt) != 0) + force_signal_inject(SIGILL, ILL_ILLOPC, regs->pc); +} + struct sys64_hook { unsigned int esr_mask; unsigned int esr_val; @@ -527,6 +538,12 @@ static struct sys64_hook sys64_hooks[] = { .esr_val = ESR_ELx_SYS64_ISS_SYS_CNTFRQ, .handler = cntfrq_read_handler, }, + { + /* Trap read access to CPUID registers */ + .esr_mask = ESR_ELx_SYS64_ISS_SYS_MRS_OP_MASK, + .esr_val = ESR_ELx_SYS64_ISS_SYS_MRS_OP_VAL, + .handler = mrs_handler, + }, {}, }; -- cgit v1.2.3-59-g8ed1b From 8a695a5873339c2e7c746ee51e3774fedd07d0a9 Mon Sep 17 00:00:00 2001 From: James Morse Date: Fri, 31 Aug 2018 16:19:43 +0100 Subject: arm64: Kconfig: Remove ARCH_HAS_HOLES_MEMORYMODEL include/linux/mmzone.h describes ARCH_HAS_HOLES_MEMORYMODEL as relevant when parts the memmap have been free()d. This would happen on systems where memory is smaller than a sparsemem-section, and the extra struct pages are expensive. pfn_valid() on these systems returns true for the whole sparsemem-section, so an extra memmap_valid_within() check is needed. On arm64 we have nomap memory, so always provide pfn_valid() to test for nomap pages. This means ARCH_HAS_HOLES_MEMORYMODEL's extra checks are already rolled up into pfn_valid(). Remove it. Acked-by: Will Deacon Signed-off-by: James Morse Signed-off-by: Catalin Marinas --- arch/arm64/Kconfig | 5 +---- arch/arm64/include/asm/page.h | 2 -- arch/arm64/mm/init.c | 2 -- 3 files changed, 1 insertion(+), 8 deletions(-) (limited to 'arch') diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig index fabac617d605..da5e6f085561 100644 --- a/arch/arm64/Kconfig +++ b/arch/arm64/Kconfig @@ -771,9 +771,6 @@ source kernel/Kconfig.hz config ARCH_SUPPORTS_DEBUG_PAGEALLOC def_bool y -config ARCH_HAS_HOLES_MEMORYMODEL - def_bool y if SPARSEMEM - config ARCH_SPARSEMEM_ENABLE def_bool y select SPARSEMEM_VMEMMAP_ENABLE @@ -788,7 +785,7 @@ config ARCH_FLATMEM_ENABLE def_bool !NUMA config HAVE_ARCH_PFN_VALID - def_bool ARCH_HAS_HOLES_MEMORYMODEL || !SPARSEMEM + def_bool y config HW_PERF_EVENTS def_bool y diff --git a/arch/arm64/include/asm/page.h b/arch/arm64/include/asm/page.h index 60d02c81a3a2..c88a3cb117a1 100644 --- a/arch/arm64/include/asm/page.h +++ b/arch/arm64/include/asm/page.h @@ -37,9 +37,7 @@ extern void clear_page(void *to); typedef struct page *pgtable_t; -#ifdef CONFIG_HAVE_ARCH_PFN_VALID extern int pfn_valid(unsigned long); -#endif #include diff --git a/arch/arm64/mm/init.c b/arch/arm64/mm/init.c index 787e27964ab9..3cf87341859f 100644 --- a/arch/arm64/mm/init.c +++ b/arch/arm64/mm/init.c @@ -284,7 +284,6 @@ static void __init zone_sizes_init(unsigned long min, unsigned long max) #endif /* CONFIG_NUMA */ -#ifdef CONFIG_HAVE_ARCH_PFN_VALID int pfn_valid(unsigned long pfn) { phys_addr_t addr = pfn << PAGE_SHIFT; @@ -294,7 +293,6 @@ int pfn_valid(unsigned long pfn) return memblock_is_map_memory(addr); } EXPORT_SYMBOL(pfn_valid); -#endif #ifndef CONFIG_SPARSEMEM static void __init arm64_memory_present(void) -- cgit v1.2.3-59-g8ed1b From 0b8af74549c2f985bfc3d2a983961b2a66882890 Mon Sep 17 00:00:00 2001 From: Andrew Murray Date: Thu, 13 Sep 2018 12:56:40 +0100 Subject: arm64: Remove unused VGA console support Support for VGA_CONSOLE is not allowable due to commit ee23794b8668 ("video: vgacon: Don't build on arm64"), thus remove the associated unused code. Whilst PCI on arm64 would support VGA a valid screen_info structure is missing. Acked-by: Will Deacon Signed-off-by: Andrew Murray Signed-off-by: Catalin Marinas --- arch/arm64/kernel/setup.c | 4 ---- 1 file changed, 4 deletions(-) (limited to 'arch') diff --git a/arch/arm64/kernel/setup.c b/arch/arm64/kernel/setup.c index 5b4fac434c84..95670a1eafb0 100644 --- a/arch/arm64/kernel/setup.c +++ b/arch/arm64/kernel/setup.c @@ -351,11 +351,7 @@ void __init setup_arch(char **cmdline_p) #endif #ifdef CONFIG_VT -#if defined(CONFIG_VGA_CONSOLE) - conswitchp = &vga_con; -#elif defined(CONFIG_DUMMY_CONSOLE) conswitchp = &dummy_con; -#endif #endif if (boot_args[1] || boot_args[2] || boot_args[3]) { pr_err("WARNING: x1-x3 nonzero in violation of boot protocol:\n" -- cgit v1.2.3-59-g8ed1b From 2a6c7c367de82951c98a290a21156770f6f82c84 Mon Sep 17 00:00:00 2001 From: Tri Vo Date: Wed, 19 Sep 2018 12:27:50 -0700 Subject: arm64: lse: remove -fcall-used-x0 flag x0 is not callee-saved in the PCS. So there is no need to specify -fcall-used-x0. Clang doesn't currently support -fcall-used flags. This patch will help building the kernel with clang. Tested-by: Nick Desaulniers Acked-by: Will Deacon Signed-off-by: Tri Vo Signed-off-by: Catalin Marinas --- arch/arm64/lib/Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch') diff --git a/arch/arm64/lib/Makefile b/arch/arm64/lib/Makefile index f28f91fd96a2..69ff9887f724 100644 --- a/arch/arm64/lib/Makefile +++ b/arch/arm64/lib/Makefile @@ -12,7 +12,7 @@ lib-y := clear_user.o delay.o copy_from_user.o \ # when supported by the CPU. Result and argument registers are handled # correctly, based on the function prototype. lib-$(CONFIG_ARM64_LSE_ATOMICS) += atomic_ll_sc.o -CFLAGS_atomic_ll_sc.o := -fcall-used-x0 -ffixed-x1 -ffixed-x2 \ +CFLAGS_atomic_ll_sc.o := -ffixed-x1 -ffixed-x2 \ -ffixed-x3 -ffixed-x4 -ffixed-x5 -ffixed-x6 \ -ffixed-x7 -fcall-saved-x8 -fcall-saved-x9 \ -fcall-saved-x10 -fcall-saved-x11 -fcall-saved-x12 \ -- cgit v1.2.3-59-g8ed1b From 693d5639b44a8f3787444902d3600edc7e0105a2 Mon Sep 17 00:00:00 2001 From: Jun Yao Date: Mon, 24 Sep 2018 14:51:13 +0100 Subject: arm64/mm: Pass ttbr1 as a parameter to __enable_mmu() In subsequent patches we'll use a transient pgd during the primary cpu's boot process. To make this work while allowing secondary cpus to use the swapper_pg_dir, we need to pass the relevant TTBR1 pgd as a parameter to __enable_mmu(). This patch updates __enable__mmu() to take this as a parameter, updating callsites to pass swapper_pg_dir for now. There should be no functional change as a result of this patch. Signed-off-by: Jun Yao Reviewed-by: James Morse [Mark: simplify assembly, clarify commit message] Signed-off-by: Mark Rutland Signed-off-by: Catalin Marinas --- arch/arm64/kernel/head.S | 20 +++++++++++--------- arch/arm64/kernel/sleep.S | 1 + 2 files changed, 12 insertions(+), 9 deletions(-) (limited to 'arch') diff --git a/arch/arm64/kernel/head.S b/arch/arm64/kernel/head.S index b0853069702f..7983ddf0c0e9 100644 --- a/arch/arm64/kernel/head.S +++ b/arch/arm64/kernel/head.S @@ -706,6 +706,7 @@ secondary_startup: * Common entry point for secondary CPUs. */ bl __cpu_setup // initialise processor + adrp x1, swapper_pg_dir bl __enable_mmu ldr x8, =__secondary_switched br x8 @@ -748,6 +749,7 @@ ENDPROC(__secondary_switched) * Enable the MMU. * * x0 = SCTLR_EL1 value for turning on the MMU. + * x1 = TTBR1_EL1 value * * Returns to the caller via x30/lr. This requires the caller to be covered * by the .idmap.text section. @@ -756,17 +758,16 @@ ENDPROC(__secondary_switched) * If it isn't, park the CPU */ ENTRY(__enable_mmu) - mrs x1, ID_AA64MMFR0_EL1 - ubfx x2, x1, #ID_AA64MMFR0_TGRAN_SHIFT, 4 + mrs x2, ID_AA64MMFR0_EL1 + ubfx x2, x2, #ID_AA64MMFR0_TGRAN_SHIFT, 4 cmp x2, #ID_AA64MMFR0_TGRAN_SUPPORTED b.ne __no_granule_support - update_early_cpu_boot_status 0, x1, x2 - adrp x1, idmap_pg_dir - adrp x2, swapper_pg_dir - phys_to_ttbr x3, x1 - phys_to_ttbr x4, x2 - msr ttbr0_el1, x3 // load TTBR0 - msr ttbr1_el1, x4 // load TTBR1 + update_early_cpu_boot_status 0, x2, x3 + adrp x2, idmap_pg_dir + phys_to_ttbr x1, x1 + phys_to_ttbr x2, x2 + msr ttbr0_el1, x2 // load TTBR0 + msr ttbr1_el1, x1 // load TTBR1 isb msr sctlr_el1, x0 isb @@ -823,6 +824,7 @@ __primary_switch: mrs x20, sctlr_el1 // preserve old SCTLR_EL1 value #endif + adrp x1, swapper_pg_dir bl __enable_mmu #ifdef CONFIG_RELOCATABLE bl __relocate_kernel diff --git a/arch/arm64/kernel/sleep.S b/arch/arm64/kernel/sleep.S index bebec8ef9372..3e53ffa07994 100644 --- a/arch/arm64/kernel/sleep.S +++ b/arch/arm64/kernel/sleep.S @@ -101,6 +101,7 @@ ENTRY(cpu_resume) bl el2_setup // if in EL2 drop to EL1 cleanly bl __cpu_setup /* enable the MMU early - so we can access sleep_save_stash by va */ + adrp x1, swapper_pg_dir bl __enable_mmu ldr x8, =_cpu_resume br x8 -- cgit v1.2.3-59-g8ed1b From 2b5548b68199c17c1466d5798cf2c9cd806bdaa9 Mon Sep 17 00:00:00 2001 From: Jun Yao Date: Mon, 24 Sep 2018 15:47:49 +0100 Subject: arm64/mm: Separate boot-time page tables from swapper_pg_dir Since the address of swapper_pg_dir is fixed for a given kernel image, it is an attractive target for manipulation via an arbitrary write. To mitigate this we'd like to make it read-only by moving it into the rodata section. We require that swapper_pg_dir is at a fixed offset from tramp_pg_dir and reserved_ttbr0, so these will also need to move into rodata. However, swapper_pg_dir is allocated along with some transient page tables used for boot which we do not want to move into rodata. As a step towards this, this patch separates the boot-time page tables into a new init_pg_dir, and reduces swapper_pg_dir to the single page it needs to be. This allows us to retain the relationship between swapper_pg_dir, tramp_pg_dir, and swapper_pg_dir, while cleanly separating these from the boot-time page tables. The init_pg_dir holds all of the pgd/pud/pmd/pte levels needed during boot, and all of these levels will be freed when we switch to the swapper_pg_dir, which is initialized by the existing code in paging_init(). Since we start off on the init_pg_dir, we no longer need to allocate a transient page table in paging_init() in order to ensure that swapper_pg_dir isn't live while we initialize it. There should be no functional change as a result of this patch. Signed-off-by: Jun Yao Reviewed-by: James Morse [Mark: place init_pg_dir after BSS, fold mm changes, commit message] Signed-off-by: Mark Rutland Signed-off-by: Catalin Marinas --- arch/arm64/include/asm/kernel-pgtable.h | 2 +- arch/arm64/include/asm/mmu.h | 3 +++ arch/arm64/include/asm/pgtable.h | 3 ++- arch/arm64/kernel/head.S | 10 +++++----- arch/arm64/kernel/vmlinux.lds.S | 6 +++++- arch/arm64/mm/mmu.c | 30 +++++------------------------- 6 files changed, 21 insertions(+), 33 deletions(-) (limited to 'arch') diff --git a/arch/arm64/include/asm/kernel-pgtable.h b/arch/arm64/include/asm/kernel-pgtable.h index a780f6714b44..850e2122d53f 100644 --- a/arch/arm64/include/asm/kernel-pgtable.h +++ b/arch/arm64/include/asm/kernel-pgtable.h @@ -97,7 +97,7 @@ + EARLY_PGDS((vstart), (vend)) /* each PGDIR needs a next level page table */ \ + EARLY_PUDS((vstart), (vend)) /* each PUD needs a next level page table */ \ + EARLY_PMDS((vstart), (vend))) /* each PMD needs a next level page table */ -#define SWAPPER_DIR_SIZE (PAGE_SIZE * EARLY_PAGES(KIMAGE_VADDR + TEXT_OFFSET, _end)) +#define INIT_DIR_SIZE (PAGE_SIZE * EARLY_PAGES(KIMAGE_VADDR + TEXT_OFFSET, _end)) #define IDMAP_DIR_SIZE (IDMAP_PGTABLE_LEVELS * PAGE_SIZE) #ifdef CONFIG_ARM64_SW_TTBR0_PAN diff --git a/arch/arm64/include/asm/mmu.h b/arch/arm64/include/asm/mmu.h index dd320df0d026..7689c7aa1d77 100644 --- a/arch/arm64/include/asm/mmu.h +++ b/arch/arm64/include/asm/mmu.h @@ -95,5 +95,8 @@ extern void create_pgd_mapping(struct mm_struct *mm, phys_addr_t phys, extern void *fixmap_remap_fdt(phys_addr_t dt_phys); extern void mark_linear_text_alias_ro(void); +#define INIT_MM_CONTEXT(name) \ + .pgd = init_pg_dir, + #endif /* !__ASSEMBLY__ */ #endif diff --git a/arch/arm64/include/asm/pgtable.h b/arch/arm64/include/asm/pgtable.h index 2ab2031b778c..92bcc50b0cc2 100644 --- a/arch/arm64/include/asm/pgtable.h +++ b/arch/arm64/include/asm/pgtable.h @@ -718,8 +718,9 @@ static inline pmd_t pmdp_establish(struct vm_area_struct *vma, } #endif +extern pgd_t init_pg_dir[PTRS_PER_PGD]; +extern pgd_t init_pg_end[]; extern pgd_t swapper_pg_dir[PTRS_PER_PGD]; -extern pgd_t swapper_pg_end[]; extern pgd_t idmap_pg_dir[PTRS_PER_PGD]; extern pgd_t tramp_pg_dir[PTRS_PER_PGD]; diff --git a/arch/arm64/kernel/head.S b/arch/arm64/kernel/head.S index 7983ddf0c0e9..7bf52ee0a805 100644 --- a/arch/arm64/kernel/head.S +++ b/arch/arm64/kernel/head.S @@ -291,7 +291,7 @@ __create_page_tables: * dirty cache lines being evicted. */ adrp x0, idmap_pg_dir - adrp x1, swapper_pg_end + adrp x1, init_pg_end sub x1, x1, x0 bl __inval_dcache_area @@ -299,7 +299,7 @@ __create_page_tables: * Clear the idmap and swapper page tables. */ adrp x0, idmap_pg_dir - adrp x1, swapper_pg_end + adrp x1, init_pg_end sub x1, x1, x0 1: stp xzr, xzr, [x0], #16 stp xzr, xzr, [x0], #16 @@ -373,7 +373,7 @@ __create_page_tables: /* * Map the kernel image (starting with PHYS_OFFSET). */ - adrp x0, swapper_pg_dir + adrp x0, init_pg_dir mov_q x5, KIMAGE_VADDR + TEXT_OFFSET // compile time __va(_text) add x5, x5, x23 // add KASLR displacement mov x4, PTRS_PER_PGD @@ -390,7 +390,7 @@ __create_page_tables: * tables again to remove any speculatively loaded cache lines. */ adrp x0, idmap_pg_dir - adrp x1, swapper_pg_end + adrp x1, init_pg_end sub x1, x1, x0 dmb sy bl __inval_dcache_area @@ -824,7 +824,7 @@ __primary_switch: mrs x20, sctlr_el1 // preserve old SCTLR_EL1 value #endif - adrp x1, swapper_pg_dir + adrp x1, init_pg_dir bl __enable_mmu #ifdef CONFIG_RELOCATABLE bl __relocate_kernel diff --git a/arch/arm64/kernel/vmlinux.lds.S b/arch/arm64/kernel/vmlinux.lds.S index 605d1b60469c..ac4c10351b17 100644 --- a/arch/arm64/kernel/vmlinux.lds.S +++ b/arch/arm64/kernel/vmlinux.lds.S @@ -229,9 +229,13 @@ SECTIONS . += RESERVED_TTBR0_SIZE; #endif swapper_pg_dir = .; - . += SWAPPER_DIR_SIZE; + . += PAGE_SIZE; swapper_pg_end = .; + init_pg_dir = .; + . += INIT_DIR_SIZE; + init_pg_end = .; + __pecoff_data_size = ABSOLUTE(. - __initdata_begin); _end = .; diff --git a/arch/arm64/mm/mmu.c b/arch/arm64/mm/mmu.c index 65f86271f02b..8a5491053717 100644 --- a/arch/arm64/mm/mmu.c +++ b/arch/arm64/mm/mmu.c @@ -629,34 +629,14 @@ static void __init map_kernel(pgd_t *pgdp) */ void __init paging_init(void) { - phys_addr_t pgd_phys = early_pgtable_alloc(); - pgd_t *pgdp = pgd_set_fixmap(pgd_phys); + map_kernel(swapper_pg_dir); + map_mem(swapper_pg_dir); - map_kernel(pgdp); - map_mem(pgdp); - - /* - * We want to reuse the original swapper_pg_dir so we don't have to - * communicate the new address to non-coherent secondaries in - * secondary_entry, and so cpu_switch_mm can generate the address with - * adrp+add rather than a load from some global variable. - * - * To do this we need to go via a temporary pgd. - */ - cpu_replace_ttbr1(__va(pgd_phys)); - memcpy(swapper_pg_dir, pgdp, PGD_SIZE); cpu_replace_ttbr1(lm_alias(swapper_pg_dir)); + init_mm.pgd = swapper_pg_dir; - pgd_clear_fixmap(); - memblock_free(pgd_phys, PAGE_SIZE); - - /* - * We only reuse the PGD from the swapper_pg_dir, not the pud + pmd - * allocated with it. - */ - memblock_free(__pa_symbol(swapper_pg_dir) + PAGE_SIZE, - __pa_symbol(swapper_pg_end) - __pa_symbol(swapper_pg_dir) - - PAGE_SIZE); + memblock_free(__pa_symbol(init_pg_dir), + __pa_symbol(init_pg_end) - __pa_symbol(init_pg_dir)); } /* -- cgit v1.2.3-59-g8ed1b From 2330b7ca78350efcb1a3b919ea4b3e0e4c57d99f Mon Sep 17 00:00:00 2001 From: Jun Yao Date: Mon, 24 Sep 2018 17:15:02 +0100 Subject: arm64/mm: use fixmap to modify swapper_pg_dir Once swapper_pg_dir is in the rodata section, it will not be possible to modify it directly, but we will need to modify it in some cases. To enable this, we can use the fixmap when deliberately modifying swapper_pg_dir. As the pgd is only transiently mapped, this provides some resilience against illicit modification of the pgd, e.g. for Kernel Space Mirror Attack (KSMA). Signed-off-by: Jun Yao Reviewed-by: James Morse [Mark: simplify ifdeffery, commit message] Signed-off-by: Mark Rutland Signed-off-by: Catalin Marinas --- arch/arm64/include/asm/pgtable.h | 35 +++++++++++++++++++++++++++++------ arch/arm64/mm/mmu.c | 26 ++++++++++++++++++++++++-- 2 files changed, 53 insertions(+), 8 deletions(-) (limited to 'arch') diff --git a/arch/arm64/include/asm/pgtable.h b/arch/arm64/include/asm/pgtable.h index 92bcc50b0cc2..b58f764babf8 100644 --- a/arch/arm64/include/asm/pgtable.h +++ b/arch/arm64/include/asm/pgtable.h @@ -429,8 +429,27 @@ extern pgprot_t phys_mem_access_prot(struct file *file, unsigned long pfn, PUD_TYPE_TABLE) #endif +extern pgd_t init_pg_dir[PTRS_PER_PGD]; +extern pgd_t init_pg_end[]; +extern pgd_t swapper_pg_dir[PTRS_PER_PGD]; +extern pgd_t idmap_pg_dir[PTRS_PER_PGD]; +extern pgd_t tramp_pg_dir[PTRS_PER_PGD]; + +extern void set_swapper_pgd(pgd_t *pgdp, pgd_t pgd); + +static inline bool in_swapper_pgdir(void *addr) +{ + return ((unsigned long)addr & PAGE_MASK) == + ((unsigned long)swapper_pg_dir & PAGE_MASK); +} + static inline void set_pmd(pmd_t *pmdp, pmd_t pmd) { + if (__is_defined(__PAGETABLE_PMD_FOLDED) && in_swapper_pgdir(pmdp)) { + set_swapper_pgd((pgd_t *)pmdp, __pgd(pmd_val(pmd))); + return; + } + WRITE_ONCE(*pmdp, pmd); if (pmd_valid(pmd)) @@ -484,6 +503,11 @@ static inline phys_addr_t pmd_page_paddr(pmd_t pmd) static inline void set_pud(pud_t *pudp, pud_t pud) { + if (__is_defined(__PAGETABLE_PUD_FOLDED) && in_swapper_pgdir(pudp)) { + set_swapper_pgd((pgd_t *)pudp, __pgd(pud_val(pud))); + return; + } + WRITE_ONCE(*pudp, pud); if (pud_valid(pud)) @@ -538,6 +562,11 @@ static inline phys_addr_t pud_page_paddr(pud_t pud) static inline void set_pgd(pgd_t *pgdp, pgd_t pgd) { + if (in_swapper_pgdir(pgdp)) { + set_swapper_pgd(pgdp, pgd); + return; + } + WRITE_ONCE(*pgdp, pgd); dsb(ishst); } @@ -718,12 +747,6 @@ static inline pmd_t pmdp_establish(struct vm_area_struct *vma, } #endif -extern pgd_t init_pg_dir[PTRS_PER_PGD]; -extern pgd_t init_pg_end[]; -extern pgd_t swapper_pg_dir[PTRS_PER_PGD]; -extern pgd_t idmap_pg_dir[PTRS_PER_PGD]; -extern pgd_t tramp_pg_dir[PTRS_PER_PGD]; - /* * Encode and decode a swap entry: * bits 0-1: present (must be zero) diff --git a/arch/arm64/mm/mmu.c b/arch/arm64/mm/mmu.c index 8a5491053717..6f0e2edcc114 100644 --- a/arch/arm64/mm/mmu.c +++ b/arch/arm64/mm/mmu.c @@ -67,6 +67,24 @@ static pte_t bm_pte[PTRS_PER_PTE] __page_aligned_bss; static pmd_t bm_pmd[PTRS_PER_PMD] __page_aligned_bss __maybe_unused; static pud_t bm_pud[PTRS_PER_PUD] __page_aligned_bss __maybe_unused; +static DEFINE_SPINLOCK(swapper_pgdir_lock); + +void set_swapper_pgd(pgd_t *pgdp, pgd_t pgd) +{ + pgd_t *fixmap_pgdp; + + spin_lock(&swapper_pgdir_lock); + fixmap_pgdp = pgd_set_fixmap(__pa(pgdp)); + WRITE_ONCE(*fixmap_pgdp, pgd); + /* + * We need dsb(ishst) here to ensure the page-table-walker sees + * our new entry before set_p?d() returns. The fixmap's + * flush_tlb_kernel_range() via clear_fixmap() does this for us. + */ + pgd_clear_fixmap(); + spin_unlock(&swapper_pgdir_lock); +} + pgprot_t phys_mem_access_prot(struct file *file, unsigned long pfn, unsigned long size, pgprot_t vma_prot) { @@ -629,8 +647,12 @@ static void __init map_kernel(pgd_t *pgdp) */ void __init paging_init(void) { - map_kernel(swapper_pg_dir); - map_mem(swapper_pg_dir); + pgd_t *pgdp = pgd_set_fixmap(__pa_symbol(swapper_pg_dir)); + + map_kernel(pgdp); + map_mem(pgdp); + + pgd_clear_fixmap(); cpu_replace_ttbr1(lm_alias(swapper_pg_dir)); init_mm.pgd = swapper_pg_dir; -- cgit v1.2.3-59-g8ed1b From 8eb7e28d4c642c310f25c18f80a44dd4b01c694e Mon Sep 17 00:00:00 2001 From: Jun Yao Date: Mon, 24 Sep 2018 17:56:18 +0100 Subject: arm64/mm: move runtime pgds to rodata Now that deliberate writes to swapper_pg_dir are made via the fixmap, we can defend against errant writes by moving it into the rodata section. Since tramp_pg_dir and reserved_ttbr0 must be at a fixed offset from swapper_pg_dir, and are not modified at runtime, these are also moved into the rodata section. Likewise, idmap_pg_dir is not modified at runtime, and is moved into rodata. Signed-off-by: Jun Yao Reviewed-by: James Morse [Mark: simplify linker script, commit message] Signed-off-by: Mark Rutland Signed-off-by: Catalin Marinas --- arch/arm64/kernel/head.S | 12 +++++++----- arch/arm64/kernel/vmlinux.lds.S | 33 +++++++++++++++++---------------- 2 files changed, 24 insertions(+), 21 deletions(-) (limited to 'arch') diff --git a/arch/arm64/kernel/head.S b/arch/arm64/kernel/head.S index 7bf52ee0a805..4471f570a295 100644 --- a/arch/arm64/kernel/head.S +++ b/arch/arm64/kernel/head.S @@ -287,18 +287,20 @@ __create_page_tables: mov x28, lr /* - * Invalidate the idmap and swapper page tables to avoid potential - * dirty cache lines being evicted. + * Invalidate the init page tables to avoid potential dirty cache lines + * being evicted. Other page tables are allocated in rodata as part of + * the kernel image, and thus are clean to the PoC per the boot + * protocol. */ - adrp x0, idmap_pg_dir + adrp x0, init_pg_dir adrp x1, init_pg_end sub x1, x1, x0 bl __inval_dcache_area /* - * Clear the idmap and swapper page tables. + * Clear the init page tables. */ - adrp x0, idmap_pg_dir + adrp x0, init_pg_dir adrp x1, init_pg_end sub x1, x1, x0 1: stp xzr, xzr, [x0], #16 diff --git a/arch/arm64/kernel/vmlinux.lds.S b/arch/arm64/kernel/vmlinux.lds.S index ac4c10351b17..ab29c06a7d4b 100644 --- a/arch/arm64/kernel/vmlinux.lds.S +++ b/arch/arm64/kernel/vmlinux.lds.S @@ -138,6 +138,23 @@ SECTIONS EXCEPTION_TABLE(8) /* __init_begin will be marked RO NX */ NOTES + . = ALIGN(PAGE_SIZE); + idmap_pg_dir = .; + . += IDMAP_DIR_SIZE; + +#ifdef CONFIG_UNMAP_KERNEL_AT_EL0 + tramp_pg_dir = .; + . += PAGE_SIZE; +#endif + +#ifdef CONFIG_ARM64_SW_TTBR0_PAN + reserved_ttbr0 = .; + . += RESERVED_TTBR0_SIZE; +#endif + swapper_pg_dir = .; + . += PAGE_SIZE; + swapper_pg_end = .; + . = ALIGN(SEGMENT_ALIGN); __init_begin = .; __inittext_begin = .; @@ -216,22 +233,6 @@ SECTIONS BSS_SECTION(0, 0, 0) . = ALIGN(PAGE_SIZE); - idmap_pg_dir = .; - . += IDMAP_DIR_SIZE; - -#ifdef CONFIG_UNMAP_KERNEL_AT_EL0 - tramp_pg_dir = .; - . += PAGE_SIZE; -#endif - -#ifdef CONFIG_ARM64_SW_TTBR0_PAN - reserved_ttbr0 = .; - . += RESERVED_TTBR0_SIZE; -#endif - swapper_pg_dir = .; - . += PAGE_SIZE; - swapper_pg_end = .; - init_pg_dir = .; . += INIT_DIR_SIZE; init_pg_end = .; -- cgit v1.2.3-59-g8ed1b From 24951465cbd279f60b1fdc2421b3694405bcff42 Mon Sep 17 00:00:00 2001 From: Will Deacon Date: Wed, 5 Sep 2018 15:34:43 +0100 Subject: arm64: compat: Provide definition for COMPAT_SIGMINSTKSZ arch/arm/ defines a SIGMINSTKSZ of 2k, so we should use the same value for compat tasks. Cc: Arnd Bergmann Cc: Dominik Brodowski Cc: "Eric W. Biederman" Cc: Andrew Morton Cc: Al Viro Cc: Oleg Nesterov Reviewed-by: Dave Martin Reported-by: Steve McIntyre Tested-by: Steve McIntyre <93sam@debian.org> Signed-off-by: Will Deacon Signed-off-by: Catalin Marinas --- arch/arm64/include/asm/compat.h | 1 + 1 file changed, 1 insertion(+) (limited to 'arch') diff --git a/arch/arm64/include/asm/compat.h b/arch/arm64/include/asm/compat.h index 1a037b94eba1..cee28a05ee98 100644 --- a/arch/arm64/include/asm/compat.h +++ b/arch/arm64/include/asm/compat.h @@ -159,6 +159,7 @@ static inline compat_uptr_t ptr_to_compat(void __user *uptr) } #define compat_user_stack_pointer() (user_stack_pointer(task_pt_regs(current))) +#define COMPAT_MINSIGSTKSZ 2048 static inline void __user *arch_compat_alloc_user_space(long len) { -- cgit v1.2.3-59-g8ed1b From 9376b1e7b62523c84fde34908af01a85b8d0468a Mon Sep 17 00:00:00 2001 From: Ard Biesheuvel Date: Thu, 27 Sep 2018 15:07:37 +0200 Subject: arm64: remove unused asm/compiler.h header file arm64 does not define CONFIG_HAVE_ARCH_COMPILER_H, nor does it keep anything useful in its copy of asm/compiler.h, so let's remove it before anybody starts using it. Acked-by: Will Deacon Signed-off-by: Ard Biesheuvel Signed-off-by: Catalin Marinas --- arch/arm64/include/asm/compiler.h | 30 ------------------------------ arch/arm64/include/asm/sysreg.h | 1 - arch/arm64/include/asm/uaccess.h | 1 - arch/arm64/kernel/psci.c | 1 - 4 files changed, 33 deletions(-) delete mode 100644 arch/arm64/include/asm/compiler.h (limited to 'arch') diff --git a/arch/arm64/include/asm/compiler.h b/arch/arm64/include/asm/compiler.h deleted file mode 100644 index ee35fd0f2236..000000000000 --- a/arch/arm64/include/asm/compiler.h +++ /dev/null @@ -1,30 +0,0 @@ -/* - * Based on arch/arm/include/asm/compiler.h - * - * Copyright (C) 2012 ARM Ltd. - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 as - * published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program. If not, see . - */ -#ifndef __ASM_COMPILER_H -#define __ASM_COMPILER_H - -/* - * This is used to ensure the compiler did actually allocate the register we - * asked it for some inline assembly sequences. Apparently we can't trust the - * compiler from one version to another so a bit of paranoia won't hurt. This - * string is meant to be concatenated with the inline asm string and will - * cause compilation to stop on mismatch. (for details, see gcc PR 15089) - */ -#define __asmeq(x, y) ".ifnc " x "," y " ; .err ; .endif\n\t" - -#endif /* __ASM_COMPILER_H */ diff --git a/arch/arm64/include/asm/sysreg.h b/arch/arm64/include/asm/sysreg.h index 7e9ab1fa090c..98e6f44cb021 100644 --- a/arch/arm64/include/asm/sysreg.h +++ b/arch/arm64/include/asm/sysreg.h @@ -20,7 +20,6 @@ #ifndef __ASM_SYSREG_H #define __ASM_SYSREG_H -#include #include /* diff --git a/arch/arm64/include/asm/uaccess.h b/arch/arm64/include/asm/uaccess.h index c4528dd90875..8ac6e34922e7 100644 --- a/arch/arm64/include/asm/uaccess.h +++ b/arch/arm64/include/asm/uaccess.h @@ -32,7 +32,6 @@ #include #include #include -#include #include #define get_ds() (KERNEL_DS) diff --git a/arch/arm64/kernel/psci.c b/arch/arm64/kernel/psci.c index e8edbf13302a..8cdaf25e99cd 100644 --- a/arch/arm64/kernel/psci.c +++ b/arch/arm64/kernel/psci.c @@ -24,7 +24,6 @@ #include -#include #include #include #include -- cgit v1.2.3-59-g8ed1b From bd7ac140b82f2bedfc792d8ccf9b2a108c1324f3 Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Thu, 27 Sep 2018 17:15:28 +0100 Subject: arm64: Add decoding macros for CP15_32 and CP15_64 traps So far, we don't have anything to help decoding ESR_ELx when dealing with ESR_ELx_EC_CP15_{32,64}. As we're about to handle some of those, let's add some useful macros. Signed-off-by: Marc Zyngier Signed-off-by: Catalin Marinas --- arch/arm64/include/asm/esr.h | 52 ++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 52 insertions(+) (limited to 'arch') diff --git a/arch/arm64/include/asm/esr.h b/arch/arm64/include/asm/esr.h index 37e84f277ee2..56d32e5557a5 100644 --- a/arch/arm64/include/asm/esr.h +++ b/arch/arm64/include/asm/esr.h @@ -263,6 +263,58 @@ #define ESR_ELx_FP_EXC_TFV (UL(1) << 23) +/* + * ISS field definitions for CP15 accesses + */ +#define ESR_ELx_CP15_32_ISS_DIR_MASK 0x1 +#define ESR_ELx_CP15_32_ISS_DIR_READ 0x1 +#define ESR_ELx_CP15_32_ISS_DIR_WRITE 0x0 + +#define ESR_ELx_CP15_32_ISS_RT_SHIFT 5 +#define ESR_ELx_CP15_32_ISS_RT_MASK (UL(0x1f) << ESR_ELx_CP15_32_ISS_RT_SHIFT) +#define ESR_ELx_CP15_32_ISS_CRM_SHIFT 1 +#define ESR_ELx_CP15_32_ISS_CRM_MASK (UL(0xf) << ESR_ELx_CP15_32_ISS_CRM_SHIFT) +#define ESR_ELx_CP15_32_ISS_CRN_SHIFT 10 +#define ESR_ELx_CP15_32_ISS_CRN_MASK (UL(0xf) << ESR_ELx_CP15_32_ISS_CRN_SHIFT) +#define ESR_ELx_CP15_32_ISS_OP1_SHIFT 14 +#define ESR_ELx_CP15_32_ISS_OP1_MASK (UL(0x7) << ESR_ELx_CP15_32_ISS_OP1_SHIFT) +#define ESR_ELx_CP15_32_ISS_OP2_SHIFT 17 +#define ESR_ELx_CP15_32_ISS_OP2_MASK (UL(0x7) << ESR_ELx_CP15_32_ISS_OP2_SHIFT) + +#define ESR_ELx_CP15_32_ISS_SYS_MASK (ESR_ELx_CP15_32_ISS_OP1_MASK | \ + ESR_ELx_CP15_32_ISS_OP2_MASK | \ + ESR_ELx_CP15_32_ISS_CRN_MASK | \ + ESR_ELx_CP15_32_ISS_CRM_MASK | \ + ESR_ELx_CP15_32_ISS_DIR_MASK) +#define ESR_ELx_CP15_32_ISS_SYS_VAL(op1, op2, crn, crm) \ + (((op1) << ESR_ELx_CP15_32_ISS_OP1_SHIFT) | \ + ((op2) << ESR_ELx_CP15_32_ISS_OP2_SHIFT) | \ + ((crn) << ESR_ELx_CP15_32_ISS_CRN_SHIFT) | \ + ((crm) << ESR_ELx_CP15_32_ISS_CRM_SHIFT)) + +#define ESR_ELx_CP15_64_ISS_DIR_MASK 0x1 +#define ESR_ELx_CP15_64_ISS_DIR_READ 0x1 +#define ESR_ELx_CP15_64_ISS_DIR_WRITE 0x0 + +#define ESR_ELx_CP15_64_ISS_RT_SHIFT 5 +#define ESR_ELx_CP15_64_ISS_RT_MASK (UL(0x1f) << ESR_ELx_CP15_64_ISS_RT_SHIFT) + +#define ESR_ELx_CP15_64_ISS_RT2_SHIFT 10 +#define ESR_ELx_CP15_64_ISS_RT2_MASK (UL(0x1f) << ESR_ELx_CP15_64_ISS_RT2_SHIFT) + +#define ESR_ELx_CP15_64_ISS_OP1_SHIFT 16 +#define ESR_ELx_CP15_64_ISS_OP1_MASK (UL(0xf) << ESR_ELx_CP15_64_ISS_OP1_SHIFT) +#define ESR_ELx_CP15_64_ISS_CRM_SHIFT 1 +#define ESR_ELx_CP15_64_ISS_CRM_MASK (UL(0xf) << ESR_ELx_CP15_64_ISS_CRM_SHIFT) + +#define ESR_ELx_CP15_64_ISS_SYS_VAL(op1, crm) \ + (((op1) << ESR_ELx_CP15_64_ISS_OP1_SHIFT) | \ + ((crm) << ESR_ELx_CP15_64_ISS_CRM_SHIFT)) + +#define ESR_ELx_CP15_64_ISS_SYS_MASK (ESR_ELx_CP15_64_ISS_OP1_MASK | \ + ESR_ELx_CP15_64_ISS_CRM_MASK | \ + ESR_ELx_CP15_64_ISS_DIR_MASK) + #ifndef __ASSEMBLY__ #include -- cgit v1.2.3-59-g8ed1b From 70c63cdfd6ee615714c5453cff370032587723c2 Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Thu, 27 Sep 2018 17:15:29 +0100 Subject: arm64: compat: Add separate CP15 trapping hook Instead of directly generating an UNDEF when trapping a CP15 access, let's add a new entry point to that effect (which only generates an UNDEF for now). Acked-by: Mark Rutland Signed-off-by: Marc Zyngier Signed-off-by: Catalin Marinas --- arch/arm64/kernel/entry.S | 15 +++++++++++++-- arch/arm64/kernel/traps.c | 13 +++++++++++++ 2 files changed, 26 insertions(+), 2 deletions(-) (limited to 'arch') diff --git a/arch/arm64/kernel/entry.S b/arch/arm64/kernel/entry.S index 8556876c9109..f0a0464d4809 100644 --- a/arch/arm64/kernel/entry.S +++ b/arch/arm64/kernel/entry.S @@ -697,9 +697,9 @@ el0_sync_compat: cmp x24, #ESR_ELx_EC_UNKNOWN // unknown exception in EL0 b.eq el0_undef cmp x24, #ESR_ELx_EC_CP15_32 // CP15 MRC/MCR trap - b.eq el0_undef + b.eq el0_cp15 cmp x24, #ESR_ELx_EC_CP15_64 // CP15 MRRC/MCRR trap - b.eq el0_undef + b.eq el0_cp15 cmp x24, #ESR_ELx_EC_CP14_MR // CP14 MRC/MCR trap b.eq el0_undef cmp x24, #ESR_ELx_EC_CP14_LS // CP14 LDC/STC trap @@ -722,6 +722,17 @@ el0_irq_compat: el0_error_compat: kernel_entry 0, 32 b el0_error_naked + +el0_cp15: + /* + * Trapped CP15 (MRC, MCR, MRRC, MCRR) instructions + */ + enable_daif + ct_user_exit + mov x0, x25 + mov x1, sp + bl do_cp15instr + b ret_to_user #endif el0_da: diff --git a/arch/arm64/kernel/traps.c b/arch/arm64/kernel/traps.c index 21689c6a985f..0e2665936493 100644 --- a/arch/arm64/kernel/traps.c +++ b/arch/arm64/kernel/traps.c @@ -547,6 +547,19 @@ static struct sys64_hook sys64_hooks[] = { {}, }; + +#ifdef CONFIG_COMPAT +asmlinkage void __exception do_cp15instr(unsigned int esr, struct pt_regs *regs) +{ + /* + * New cp15 instructions may previously have been undefined at + * EL0. Fall back to our usual undefined instruction handler + * so that we handle these consistently. + */ + do_undefinstr(regs); +} +#endif + asmlinkage void __exception do_sysinstr(unsigned int esr, struct pt_regs *regs) { struct sys64_hook *hook; -- cgit v1.2.3-59-g8ed1b From 1f1c014035a8084a768e7e902c6f5857995b1220 Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Thu, 27 Sep 2018 17:15:30 +0100 Subject: arm64: compat: Add condition code checks and IT advance Here's a /really nice/ part of the architecture: a CP15 access is allowed to trap even if it fails its condition check, and SW must handle it. This includes decoding the IT state if this happens in am IT block. As a consequence, SW must also deal with advancing the IT state machine. Reviewed-by: Mark Rutland Signed-off-by: Marc Zyngier Signed-off-by: Catalin Marinas --- arch/arm64/kernel/traps.c | 85 +++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 85 insertions(+) (limited to 'arch') diff --git a/arch/arm64/kernel/traps.c b/arch/arm64/kernel/traps.c index 0e2665936493..95a646c154fe 100644 --- a/arch/arm64/kernel/traps.c +++ b/arch/arm64/kernel/traps.c @@ -549,8 +549,93 @@ static struct sys64_hook sys64_hooks[] = { #ifdef CONFIG_COMPAT +#define PSTATE_IT_1_0_SHIFT 25 +#define PSTATE_IT_1_0_MASK (0x3 << PSTATE_IT_1_0_SHIFT) +#define PSTATE_IT_7_2_SHIFT 10 +#define PSTATE_IT_7_2_MASK (0x3f << PSTATE_IT_7_2_SHIFT) + +static u32 compat_get_it_state(struct pt_regs *regs) +{ + u32 it, pstate = regs->pstate; + + it = (pstate & PSTATE_IT_1_0_MASK) >> PSTATE_IT_1_0_SHIFT; + it |= ((pstate & PSTATE_IT_7_2_MASK) >> PSTATE_IT_7_2_SHIFT) << 2; + + return it; +} + +static void compat_set_it_state(struct pt_regs *regs, u32 it) +{ + u32 pstate_it; + + pstate_it = (it << PSTATE_IT_1_0_SHIFT) & PSTATE_IT_1_0_MASK; + pstate_it |= ((it >> 2) << PSTATE_IT_7_2_SHIFT) & PSTATE_IT_7_2_MASK; + + regs->pstate &= ~PSR_AA32_IT_MASK; + regs->pstate |= pstate_it; +} + +static bool cp15_cond_valid(unsigned int esr, struct pt_regs *regs) +{ + int cond; + + /* Only a T32 instruction can trap without CV being set */ + if (!(esr & ESR_ELx_CV)) { + u32 it; + + it = compat_get_it_state(regs); + if (!it) + return true; + + cond = it >> 4; + } else { + cond = (esr & ESR_ELx_COND_MASK) >> ESR_ELx_COND_SHIFT; + } + + return aarch32_opcode_cond_checks[cond](regs->pstate); +} + +static void advance_itstate(struct pt_regs *regs) +{ + u32 it; + + /* ARM mode */ + if (!(regs->pstate & PSR_AA32_T_BIT) || + !(regs->pstate & PSR_AA32_IT_MASK)) + return; + + it = compat_get_it_state(regs); + + /* + * If this is the last instruction of the block, wipe the IT + * state. Otherwise advance it. + */ + if (!(it & 7)) + it = 0; + else + it = (it & 0xe0) | ((it << 1) & 0x1f); + + compat_set_it_state(regs, it); +} + +static void arm64_compat_skip_faulting_instruction(struct pt_regs *regs, + unsigned int sz) +{ + advance_itstate(regs); + arm64_skip_faulting_instruction(regs, sz); +} + asmlinkage void __exception do_cp15instr(unsigned int esr, struct pt_regs *regs) { + if (!cp15_cond_valid(esr, regs)) { + /* + * There is no T16 variant of a CP access, so we + * always advance PC by 4 bytes. + */ + arm64_compat_skip_faulting_instruction(regs, 4); + return; + } + /* * New cp15 instructions may previously have been undefined at * EL0. Fall back to our usual undefined instruction handler -- cgit v1.2.3-59-g8ed1b From 2a8905e18c55d5576d7a53da495b4de0cfcbc459 Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Thu, 27 Sep 2018 17:15:31 +0100 Subject: arm64: compat: Add cp15_32 and cp15_64 handler arrays We're now ready to start handling CP15 access. Let's add (empty) arrays for both 32 and 64bit accessors, and the code that deals with them. Reviewed-by: Mark Rutland Signed-off-by: Marc Zyngier Signed-off-by: Catalin Marinas --- arch/arm64/kernel/traps.c | 28 ++++++++++++++++++++++++++++ 1 file changed, 28 insertions(+) (limited to 'arch') diff --git a/arch/arm64/kernel/traps.c b/arch/arm64/kernel/traps.c index 95a646c154fe..76ffb9f42aa4 100644 --- a/arch/arm64/kernel/traps.c +++ b/arch/arm64/kernel/traps.c @@ -625,8 +625,18 @@ static void arm64_compat_skip_faulting_instruction(struct pt_regs *regs, arm64_skip_faulting_instruction(regs, sz); } +static struct sys64_hook cp15_32_hooks[] = { + {}, +}; + +static struct sys64_hook cp15_64_hooks[] = { + {}, +}; + asmlinkage void __exception do_cp15instr(unsigned int esr, struct pt_regs *regs) { + struct sys64_hook *hook, *hook_base; + if (!cp15_cond_valid(esr, regs)) { /* * There is no T16 variant of a CP access, so we @@ -636,6 +646,24 @@ asmlinkage void __exception do_cp15instr(unsigned int esr, struct pt_regs *regs) return; } + switch (ESR_ELx_EC(esr)) { + case ESR_ELx_EC_CP15_32: + hook_base = cp15_32_hooks; + break; + case ESR_ELx_EC_CP15_64: + hook_base = cp15_64_hooks; + break; + default: + do_undefinstr(regs); + return; + } + + for (hook = hook_base; hook->handler; hook++) + if ((hook->esr_mask & esr) == hook->esr_val) { + hook->handler(esr, regs); + return; + } + /* * New cp15 instructions may previously have been undefined at * EL0. Fall back to our usual undefined instruction handler -- cgit v1.2.3-59-g8ed1b From 50de013d22e4e112d7b0778a0e7d032f16c46778 Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Thu, 27 Sep 2018 17:15:32 +0100 Subject: arm64: compat: Add CNTVCT trap handler Since people seem to make a point in breaking the userspace visible counter, we have no choice but to trap the access. We already do this for 64bit userspace, but this is lacking for compat. Let's provide the required handler. Reviewed-by: Mark Rutland Signed-off-by: Marc Zyngier Signed-off-by: Catalin Marinas --- arch/arm64/include/asm/esr.h | 3 +++ arch/arm64/kernel/traps.c | 16 ++++++++++++++++ 2 files changed, 19 insertions(+) (limited to 'arch') diff --git a/arch/arm64/include/asm/esr.h b/arch/arm64/include/asm/esr.h index 56d32e5557a5..5548712ce6e5 100644 --- a/arch/arm64/include/asm/esr.h +++ b/arch/arm64/include/asm/esr.h @@ -315,6 +315,9 @@ ESR_ELx_CP15_64_ISS_CRM_MASK | \ ESR_ELx_CP15_64_ISS_DIR_MASK) +#define ESR_ELx_CP15_64_ISS_SYS_CNTVCT (ESR_ELx_CP15_64_ISS_SYS_VAL(1, 14) | \ + ESR_ELx_CP15_64_ISS_DIR_READ) + #ifndef __ASSEMBLY__ #include diff --git a/arch/arm64/kernel/traps.c b/arch/arm64/kernel/traps.c index 76ffb9f42aa4..3602b900ff1c 100644 --- a/arch/arm64/kernel/traps.c +++ b/arch/arm64/kernel/traps.c @@ -629,7 +629,23 @@ static struct sys64_hook cp15_32_hooks[] = { {}, }; +static void compat_cntvct_read_handler(unsigned int esr, struct pt_regs *regs) +{ + int rt = (esr & ESR_ELx_CP15_64_ISS_RT_MASK) >> ESR_ELx_CP15_64_ISS_RT_SHIFT; + int rt2 = (esr & ESR_ELx_CP15_64_ISS_RT2_MASK) >> ESR_ELx_CP15_64_ISS_RT2_SHIFT; + u64 val = arch_counter_get_cntvct(); + + pt_regs_write_reg(regs, rt, lower_32_bits(val)); + pt_regs_write_reg(regs, rt2, upper_32_bits(val)); + arm64_compat_skip_faulting_instruction(regs, 4); +} + static struct sys64_hook cp15_64_hooks[] = { + { + .esr_mask = ESR_ELx_CP15_64_ISS_SYS_MASK, + .esr_val = ESR_ELx_CP15_64_ISS_SYS_CNTVCT, + .handler = compat_cntvct_read_handler, + }, {}, }; -- cgit v1.2.3-59-g8ed1b From 32a3e635fb0ecc1b197d54f710e76c6481cf19f0 Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Thu, 27 Sep 2018 17:15:33 +0100 Subject: arm64: compat: Add CNTFRQ trap handler Just like CNTVCT, we need to handle userspace trapping into the kernel if we're decided that the timer wasn't fit for purpose... 64bit userspace is already dealt with, but we're missing the equivalent compat handling. Reviewed-by: Mark Rutland Signed-off-by: Marc Zyngier Signed-off-by: Catalin Marinas --- arch/arm64/include/asm/esr.h | 3 +++ arch/arm64/kernel/traps.c | 13 +++++++++++++ 2 files changed, 16 insertions(+) (limited to 'arch') diff --git a/arch/arm64/include/asm/esr.h b/arch/arm64/include/asm/esr.h index 5548712ce6e5..fb7dfe1b51bb 100644 --- a/arch/arm64/include/asm/esr.h +++ b/arch/arm64/include/asm/esr.h @@ -318,6 +318,9 @@ #define ESR_ELx_CP15_64_ISS_SYS_CNTVCT (ESR_ELx_CP15_64_ISS_SYS_VAL(1, 14) | \ ESR_ELx_CP15_64_ISS_DIR_READ) +#define ESR_ELx_CP15_32_ISS_SYS_CNTFRQ (ESR_ELx_CP15_32_ISS_SYS_VAL(0, 0, 14, 0) |\ + ESR_ELx_CP15_32_ISS_DIR_READ) + #ifndef __ASSEMBLY__ #include diff --git a/arch/arm64/kernel/traps.c b/arch/arm64/kernel/traps.c index 3602b900ff1c..58134a97928f 100644 --- a/arch/arm64/kernel/traps.c +++ b/arch/arm64/kernel/traps.c @@ -625,7 +625,20 @@ static void arm64_compat_skip_faulting_instruction(struct pt_regs *regs, arm64_skip_faulting_instruction(regs, sz); } +static void compat_cntfrq_read_handler(unsigned int esr, struct pt_regs *regs) +{ + int reg = (esr & ESR_ELx_CP15_32_ISS_RT_MASK) >> ESR_ELx_CP15_32_ISS_RT_SHIFT; + + pt_regs_write_reg(regs, reg, arch_timer_get_rate()); + arm64_compat_skip_faulting_instruction(regs, 4); +} + static struct sys64_hook cp15_32_hooks[] = { + { + .esr_mask = ESR_ELx_CP15_32_ISS_SYS_MASK, + .esr_val = ESR_ELx_CP15_32_ISS_SYS_CNTFRQ, + .handler = compat_cntfrq_read_handler, + }, {}, }; -- cgit v1.2.3-59-g8ed1b From 95b861a4a6d94f64d5242605569218160ebacdbe Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Thu, 27 Sep 2018 17:15:34 +0100 Subject: arm64: arch_timer: Add workaround for ARM erratum 1188873 When running on Cortex-A76, a timer access from an AArch32 EL0 task may end up with a corrupted value or register. The workaround for this is to trap these accesses at EL1/EL2 and execute them there. This only affects versions r0p0, r1p0 and r2p0 of the CPU. Acked-by: Mark Rutland Signed-off-by: Marc Zyngier Signed-off-by: Catalin Marinas --- arch/arm64/Kconfig | 12 ++++++++++++ arch/arm64/include/asm/cpucaps.h | 3 ++- arch/arm64/include/asm/cputype.h | 2 ++ arch/arm64/kernel/cpu_errata.c | 8 ++++++++ drivers/clocksource/arm_arch_timer.c | 15 +++++++++++++++ 5 files changed, 39 insertions(+), 1 deletion(-) (limited to 'arch') diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig index da5e6f085561..52985d175e5a 100644 --- a/arch/arm64/Kconfig +++ b/arch/arm64/Kconfig @@ -481,6 +481,18 @@ config ARM64_ERRATUM_1024718 If unsure, say Y. +config ARM64_ERRATUM_1188873 + bool "Cortex-A76: MRC read following MRRC read of specific Generic Timer in AArch32 might give incorrect result" + default y + help + This option adds work arounds for ARM Cortex-A76 erratum 1188873 + + Affected Cortex-A76 cores (r0p0, r1p0, r2p0) could cause + register corruption when accessing the timer registers from + AArch32 userspace. + + If unsure, say Y. + config CAVIUM_ERRATUM_22375 bool "Cavium erratum 22375, 24313" default y diff --git a/arch/arm64/include/asm/cpucaps.h b/arch/arm64/include/asm/cpucaps.h index 6eb1b3fd0493..6e2d254c09eb 100644 --- a/arch/arm64/include/asm/cpucaps.h +++ b/arch/arm64/include/asm/cpucaps.h @@ -53,7 +53,8 @@ #define ARM64_HAS_STAGE2_FWB 32 #define ARM64_HAS_CRC32 33 #define ARM64_SSBS 34 +#define ARM64_WORKAROUND_1188873 35 -#define ARM64_NCAPS 35 +#define ARM64_NCAPS 36 #endif /* __ASM_CPUCAPS_H */ diff --git a/arch/arm64/include/asm/cputype.h b/arch/arm64/include/asm/cputype.h index ea690b3562af..12f93e4d2452 100644 --- a/arch/arm64/include/asm/cputype.h +++ b/arch/arm64/include/asm/cputype.h @@ -86,6 +86,7 @@ #define ARM_CPU_PART_CORTEX_A75 0xD0A #define ARM_CPU_PART_CORTEX_A35 0xD04 #define ARM_CPU_PART_CORTEX_A55 0xD05 +#define ARM_CPU_PART_CORTEX_A76 0xD0B #define APM_CPU_PART_POTENZA 0x000 @@ -110,6 +111,7 @@ #define MIDR_CORTEX_A75 MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_CORTEX_A75) #define MIDR_CORTEX_A35 MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_CORTEX_A35) #define MIDR_CORTEX_A55 MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_CORTEX_A55) +#define MIDR_CORTEX_A76 MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_CORTEX_A76) #define MIDR_THUNDERX MIDR_CPU_MODEL(ARM_CPU_IMP_CAVIUM, CAVIUM_CPU_PART_THUNDERX) #define MIDR_THUNDERX_81XX MIDR_CPU_MODEL(ARM_CPU_IMP_CAVIUM, CAVIUM_CPU_PART_THUNDERX_81XX) #define MIDR_THUNDERX_83XX MIDR_CPU_MODEL(ARM_CPU_IMP_CAVIUM, CAVIUM_CPU_PART_THUNDERX_83XX) diff --git a/arch/arm64/kernel/cpu_errata.c b/arch/arm64/kernel/cpu_errata.c index 20be4c578e0a..cde948991d68 100644 --- a/arch/arm64/kernel/cpu_errata.c +++ b/arch/arm64/kernel/cpu_errata.c @@ -696,6 +696,14 @@ const struct arm64_cpu_capabilities arm64_errata[] = { .type = ARM64_CPUCAP_LOCAL_CPU_ERRATUM, .matches = has_ssbd_mitigation, }, +#endif +#ifdef CONFIG_ARM64_ERRATUM_1188873 + { + /* Cortex-A76 r0p0 to r2p0 */ + .desc = "ARM erratum 1188873", + .capability = ARM64_WORKAROUND_1188873, + ERRATA_MIDR_RANGE(MIDR_CORTEX_A76, 0, 0, 2, 0), + }, #endif { } diff --git a/drivers/clocksource/arm_arch_timer.c b/drivers/clocksource/arm_arch_timer.c index d8c7f5750cdb..9a7d4dc00b6e 100644 --- a/drivers/clocksource/arm_arch_timer.c +++ b/drivers/clocksource/arm_arch_timer.c @@ -319,6 +319,13 @@ static u64 notrace arm64_858921_read_cntvct_el0(void) } #endif +#ifdef CONFIG_ARM64_ERRATUM_1188873 +static u64 notrace arm64_1188873_read_cntvct_el0(void) +{ + return read_sysreg(cntvct_el0); +} +#endif + #ifdef CONFIG_ARM_ARCH_TIMER_OOL_WORKAROUND DEFINE_PER_CPU(const struct arch_timer_erratum_workaround *, timer_unstable_counter_workaround); EXPORT_SYMBOL_GPL(timer_unstable_counter_workaround); @@ -408,6 +415,14 @@ static const struct arch_timer_erratum_workaround ool_workarounds[] = { .read_cntvct_el0 = arm64_858921_read_cntvct_el0, }, #endif +#ifdef CONFIG_ARM64_ERRATUM_1188873 + { + .match_type = ate_match_local_cap_id, + .id = (void *)ARM64_WORKAROUND_1188873, + .desc = "ARM erratum 1188873", + .read_cntvct_el0 = arm64_1188873_read_cntvct_el0, + }, +#endif }; typedef bool (*ate_match_fn_t)(const struct arch_timer_erratum_workaround *, -- cgit v1.2.3-59-g8ed1b From 00bbd5d9016d49fa8dddef020a06b94fedca9148 Mon Sep 17 00:00:00 2001 From: Anshuman Khandual Date: Sat, 22 Sep 2018 21:09:52 +0530 Subject: arm64/mm: Use ESR_ELx_FSC macro while decoding fault exception Just replace hard code value of 63 (0x111111) with an existing macro ESR_ELx_FSC when parsing for the status code during fault exception. Acked-by: Mark Rutland Signed-off-by: Anshuman Khandual Signed-off-by: Catalin Marinas --- arch/arm64/mm/fault.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch') diff --git a/arch/arm64/mm/fault.c b/arch/arm64/mm/fault.c index 6342f1793c70..6d9acd910104 100644 --- a/arch/arm64/mm/fault.c +++ b/arch/arm64/mm/fault.c @@ -59,7 +59,7 @@ static const struct fault_info fault_info[]; static inline const struct fault_info *esr_to_fault_info(unsigned int esr) { - return fault_info + (esr & 63); + return fault_info + (esr & ESR_ELx_FSC); } #ifdef CONFIG_KPROBES -- cgit v1.2.3-59-g8ed1b From dbfe3828a6f36be904427ed7f196744cb02d3072 Mon Sep 17 00:00:00 2001 From: Anshuman Khandual Date: Sat, 22 Sep 2018 21:09:53 +0530 Subject: arm64/mm: Reorganize arguments for is_el1_permission_fault() Most memory abort exception handling related functions have the arguments in the order (addr, esr, regs) except is_el1_permission_fault(). This changes the argument order in this function as (addr, esr, regs) like others. Acked-by: Mark Rutland Signed-off-by: Anshuman Khandual Signed-off-by: Catalin Marinas --- arch/arm64/mm/fault.c | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) (limited to 'arch') diff --git a/arch/arm64/mm/fault.c b/arch/arm64/mm/fault.c index 6d9acd910104..0e0ea5fe6ab3 100644 --- a/arch/arm64/mm/fault.c +++ b/arch/arm64/mm/fault.c @@ -235,9 +235,8 @@ static bool is_el1_instruction_abort(unsigned int esr) return ESR_ELx_EC(esr) == ESR_ELx_EC_IABT_CUR; } -static inline bool is_el1_permission_fault(unsigned int esr, - struct pt_regs *regs, - unsigned long addr) +static inline bool is_el1_permission_fault(unsigned long addr, unsigned int esr, + struct pt_regs *regs) { unsigned int ec = ESR_ELx_EC(esr); unsigned int fsc_type = esr & ESR_ELx_FSC_TYPE; @@ -283,7 +282,7 @@ static void __do_kernel_fault(unsigned long addr, unsigned int esr, if (!is_el1_instruction_abort(esr) && fixup_exception(regs)) return; - if (is_el1_permission_fault(esr, regs, addr)) { + if (is_el1_permission_fault(addr, esr, regs)) { if (esr & ESR_ELx_WNR) msg = "write to read-only memory"; else @@ -454,7 +453,7 @@ static int __kprobes do_page_fault(unsigned long addr, unsigned int esr, mm_flags |= FAULT_FLAG_WRITE; } - if (addr < TASK_SIZE && is_el1_permission_fault(esr, regs, addr)) { + if (addr < TASK_SIZE && is_el1_permission_fault(addr, esr, regs)) { /* regs->orig_addr_limit may be 0 if we entered from EL0 */ if (regs->orig_addr_limit == KERNEL_DS) die_kernel_fault("access to user memory with fs=KERNEL_DS", -- cgit v1.2.3-59-g8ed1b From 359048f91db4be5e8be9c2b95c788c79a6756797 Mon Sep 17 00:00:00 2001 From: Anshuman Khandual Date: Sat, 22 Sep 2018 21:09:54 +0530 Subject: arm64/mm: Define esr_to_debug_fault_info() fault_info[] and debug_fault_info[] are static arrays defining memory abort exception handling functions looking into ESR fault status code encodings. As esr_to_fault_info() is already available providing fault_info[] array lookup, it really makes sense to have a corresponding debug_fault_info[] array lookup function as well. This just adds an equivalent helper function esr_to_debug_fault_info(). Acked-by: Mark Rutland Signed-off-by: Anshuman Khandual Signed-off-by: Catalin Marinas --- arch/arm64/mm/fault.c | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) (limited to 'arch') diff --git a/arch/arm64/mm/fault.c b/arch/arm64/mm/fault.c index 0e0ea5fe6ab3..1aa487a37a0a 100644 --- a/arch/arm64/mm/fault.c +++ b/arch/arm64/mm/fault.c @@ -56,12 +56,18 @@ struct fault_info { }; static const struct fault_info fault_info[]; +static struct fault_info debug_fault_info[]; static inline const struct fault_info *esr_to_fault_info(unsigned int esr) { return fault_info + (esr & ESR_ELx_FSC); } +static inline const struct fault_info *esr_to_debug_fault_info(unsigned int esr) +{ + return debug_fault_info + DBG_ESR_EVT(esr); +} + #ifdef CONFIG_KPROBES static inline int notify_page_fault(struct pt_regs *regs, unsigned int esr) { @@ -830,7 +836,7 @@ asmlinkage int __exception do_debug_exception(unsigned long addr, unsigned int esr, struct pt_regs *regs) { - const struct fault_info *inf = debug_fault_info + DBG_ESR_EVT(esr); + const struct fault_info *inf = esr_to_debug_fault_info(esr); int rv; /* -- cgit v1.2.3-59-g8ed1b From 77cfe950901e5c13aca2df6437a05f39dd9a929b Mon Sep 17 00:00:00 2001 From: Anshuman Khandual Date: Sat, 22 Sep 2018 21:09:55 +0530 Subject: arm64/numa: Report correct memblock range for the dummy node The dummy node ID is marked into all memory ranges on the system. So the dummy node really extends the entire memblock.memory. Hence report correct extent information for the dummy node using memblock range helper functions instead of the range [0LLU, PFN_PHYS(max_pfn) - 1)]. Fixes: 1a2db30034 ("arm64, numa: Add NUMA support for arm64 platforms") Acked-by: Punit Agrawal Signed-off-by: Anshuman Khandual Signed-off-by: Catalin Marinas --- arch/arm64/mm/numa.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch') diff --git a/arch/arm64/mm/numa.c b/arch/arm64/mm/numa.c index 146c04ceaa51..54529b4ed513 100644 --- a/arch/arm64/mm/numa.c +++ b/arch/arm64/mm/numa.c @@ -432,7 +432,7 @@ static int __init dummy_numa_init(void) if (numa_off) pr_info("NUMA disabled\n"); /* Forced off on command line. */ pr_info("Faking a node at [mem %#018Lx-%#018Lx]\n", - 0LLU, PFN_PHYS(max_pfn) - 1); + memblock_start_of_DRAM(), memblock_end_of_DRAM() - 1); for_each_memblock(memory, mblk) { ret = numa_add_memblk(0, mblk->base, mblk->base + mblk->size); -- cgit v1.2.3-59-g8ed1b From 52338088ef0569290b7eae0759c58a3de494e6c0 Mon Sep 17 00:00:00 2001 From: Anshuman Khandual Date: Sat, 22 Sep 2018 21:09:56 +0530 Subject: arm64/numa: Unify common error path in numa_init() At present numa_free_distance() is being called before numa_distance is even initialized with numa_alloc_distance() which is really pointless. Instead lets call numa_free_distance() on the common error path inside numa_init() after numa_alloc_distance() has been successful. Fixes: 1a2db30034 ("arm64, numa: Add NUMA support for arm64 platforms") Acked-by: Punit Agrawal Signed-off-by: Anshuman Khandual Signed-off-by: Catalin Marinas --- arch/arm64/mm/numa.c | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) (limited to 'arch') diff --git a/arch/arm64/mm/numa.c b/arch/arm64/mm/numa.c index 54529b4ed513..d7b66fc5e1c5 100644 --- a/arch/arm64/mm/numa.c +++ b/arch/arm64/mm/numa.c @@ -391,7 +391,6 @@ static int __init numa_init(int (*init_func)(void)) nodes_clear(numa_nodes_parsed); nodes_clear(node_possible_map); nodes_clear(node_online_map); - numa_free_distance(); ret = numa_alloc_distance(); if (ret < 0) @@ -399,20 +398,24 @@ static int __init numa_init(int (*init_func)(void)) ret = init_func(); if (ret < 0) - return ret; + goto out_free_distance; if (nodes_empty(numa_nodes_parsed)) { pr_info("No NUMA configuration found\n"); - return -EINVAL; + ret = -EINVAL; + goto out_free_distance; } ret = numa_register_nodes(); if (ret < 0) - return ret; + goto out_free_distance; setup_node_to_cpumask_map(); return 0; +out_free_distance: + numa_free_distance(); + return ret; } /** -- cgit v1.2.3-59-g8ed1b From 2ba0dacbae94482e80b00dbd368f0a4aeaabbdec Mon Sep 17 00:00:00 2001 From: zhong jiang Date: Thu, 9 Aug 2018 22:20:40 +0800 Subject: arm64/kprobes: remove an extra semicolon in arch_prepare_kprobe There is an extra semicolon in arch_prepare_kprobe, remove it. Signed-off-by: zhong jiang Signed-off-by: Catalin Marinas --- arch/arm64/kernel/probes/kprobes.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch') diff --git a/arch/arm64/kernel/probes/kprobes.c b/arch/arm64/kernel/probes/kprobes.c index e78c3ef04d95..9b65132e789a 100644 --- a/arch/arm64/kernel/probes/kprobes.c +++ b/arch/arm64/kernel/probes/kprobes.c @@ -107,7 +107,7 @@ int __kprobes arch_prepare_kprobe(struct kprobe *p) if (!p->ainsn.api.insn) return -ENOMEM; break; - }; + } /* prepare the instruction */ if (p->ainsn.api.insn) -- cgit v1.2.3-59-g8ed1b From c219bc4e920518feb025749bdf9623aa57e94b64 Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Mon, 1 Oct 2018 12:19:43 +0100 Subject: arm64: Trap WFI executed in userspace It recently came to light that userspace can execute WFI, and that the arm64 kernel doesn't trap this event. This sounds rather benign, but the kernel should decide when it wants to wait for an interrupt, and not userspace. Let's trap WFI and immediately return after having skipped the instruction. This effectively makes WFI a rather expensive NOP. Signed-off-by: Marc Zyngier Signed-off-by: Catalin Marinas --- arch/arm64/include/asm/esr.h | 5 +++++ arch/arm64/include/asm/sysreg.h | 4 ++-- arch/arm64/kernel/entry.S | 1 + arch/arm64/kernel/traps.c | 11 +++++++++++ 4 files changed, 19 insertions(+), 2 deletions(-) (limited to 'arch') diff --git a/arch/arm64/include/asm/esr.h b/arch/arm64/include/asm/esr.h index fb7dfe1b51bb..676de2ec1762 100644 --- a/arch/arm64/include/asm/esr.h +++ b/arch/arm64/include/asm/esr.h @@ -137,6 +137,8 @@ #define ESR_ELx_CV (UL(1) << 24) #define ESR_ELx_COND_SHIFT (20) #define ESR_ELx_COND_MASK (UL(0xF) << ESR_ELx_COND_SHIFT) +#define ESR_ELx_WFx_ISS_TI (UL(1) << 0) +#define ESR_ELx_WFx_ISS_WFI (UL(0) << 0) #define ESR_ELx_WFx_ISS_WFE (UL(1) << 0) #define ESR_ELx_xVC_IMM_MASK ((1UL << 16) - 1) @@ -148,6 +150,9 @@ #define DISR_EL1_ESR_MASK (ESR_ELx_AET | ESR_ELx_EA | ESR_ELx_FSC) /* ESR value templates for specific events */ +#define ESR_ELx_WFx_MASK (ESR_ELx_EC_MASK | ESR_ELx_WFx_ISS_TI) +#define ESR_ELx_WFx_WFI_VAL ((ESR_ELx_EC_WFx << ESR_ELx_EC_SHIFT) | \ + ESR_ELx_WFx_ISS_WFI) /* BRK instruction trap from AArch64 state */ #define ESR_ELx_VAL_BRK64(imm) \ diff --git a/arch/arm64/include/asm/sysreg.h b/arch/arm64/include/asm/sysreg.h index 98e6f44cb021..0c909c4a932f 100644 --- a/arch/arm64/include/asm/sysreg.h +++ b/arch/arm64/include/asm/sysreg.h @@ -502,12 +502,12 @@ #define SCTLR_EL1_SET (SCTLR_ELx_M | SCTLR_ELx_C | SCTLR_ELx_SA |\ SCTLR_EL1_SA0 | SCTLR_EL1_SED | SCTLR_ELx_I |\ - SCTLR_EL1_DZE | SCTLR_EL1_UCT | SCTLR_EL1_NTWI |\ + SCTLR_EL1_DZE | SCTLR_EL1_UCT |\ SCTLR_EL1_NTWE | SCTLR_ELx_IESB | SCTLR_EL1_SPAN |\ ENDIAN_SET_EL1 | SCTLR_EL1_UCI | SCTLR_EL1_RES1) #define SCTLR_EL1_CLEAR (SCTLR_ELx_A | SCTLR_EL1_CP15BEN | SCTLR_EL1_ITD |\ SCTLR_EL1_UMA | SCTLR_ELx_WXN | ENDIAN_CLEAR_EL1 |\ - SCTLR_ELx_DSSBS | SCTLR_EL1_RES0) + SCTLR_ELx_DSSBS | SCTLR_EL1_NTWI | SCTLR_EL1_RES0) #if (SCTLR_EL1_SET ^ SCTLR_EL1_CLEAR) != 0xffffffffffffffff #error "Inconsistent SCTLR_EL1 set/clear bits" diff --git a/arch/arm64/kernel/entry.S b/arch/arm64/kernel/entry.S index f0a0464d4809..039144ecbcb2 100644 --- a/arch/arm64/kernel/entry.S +++ b/arch/arm64/kernel/entry.S @@ -665,6 +665,7 @@ el0_sync: cmp x24, #ESR_ELx_EC_FP_EXC64 // FP/ASIMD exception b.eq el0_fpsimd_exc cmp x24, #ESR_ELx_EC_SYS64 // configurable trap + ccmp x24, #ESR_ELx_EC_WFx, #4, ne b.eq el0_sys cmp x24, #ESR_ELx_EC_SP_ALIGN // stack alignment exception b.eq el0_sp_pc diff --git a/arch/arm64/kernel/traps.c b/arch/arm64/kernel/traps.c index 58134a97928f..4066da7f1e5e 100644 --- a/arch/arm64/kernel/traps.c +++ b/arch/arm64/kernel/traps.c @@ -508,6 +508,11 @@ static void mrs_handler(unsigned int esr, struct pt_regs *regs) force_signal_inject(SIGILL, ILL_ILLOPC, regs->pc); } +static void wfi_handler(unsigned int esr, struct pt_regs *regs) +{ + arm64_skip_faulting_instruction(regs, AARCH64_INSN_SIZE); +} + struct sys64_hook { unsigned int esr_mask; unsigned int esr_val; @@ -544,6 +549,12 @@ static struct sys64_hook sys64_hooks[] = { .esr_val = ESR_ELx_SYS64_ISS_SYS_MRS_OP_VAL, .handler = mrs_handler, }, + { + /* Trap WFI instructions executed in userspace */ + .esr_mask = ESR_ELx_WFx_MASK, + .esr_val = ESR_ELx_WFx_WFI_VAL, + .handler = wfi_handler, + }, {}, }; -- cgit v1.2.3-59-g8ed1b From 040f340134751d73bd03ee92fabb992946c55b3d Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Tue, 2 Oct 2018 23:11:44 +0200 Subject: arm64: arch_timer: avoid unused function warning arm64_1188873_read_cntvct_el0() is protected by the correct CONFIG_ARM64_ERRATUM_1188873 #ifdef, but the only reference to it is also inside of an CONFIG_ARM_ARCH_TIMER_OOL_WORKAROUND section, and causes a warning if that is disabled: drivers/clocksource/arm_arch_timer.c:323:20: error: 'arm64_1188873_read_cntvct_el0' defined but not used [-Werror=unused-function] Since the erratum requires that we always apply the workaround in the timer driver, select that symbol as we do for SoC specific errata. Fixes: 95b861a4a6d9 ("arm64: arch_timer: Add workaround for ARM erratum 1188873") Acked-by: Marc Zyngier Signed-off-by: Arnd Bergmann Signed-off-by: Catalin Marinas --- arch/arm64/Kconfig | 1 + 1 file changed, 1 insertion(+) (limited to 'arch') diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig index 52985d175e5a..a8ae30fab508 100644 --- a/arch/arm64/Kconfig +++ b/arch/arm64/Kconfig @@ -484,6 +484,7 @@ config ARM64_ERRATUM_1024718 config ARM64_ERRATUM_1188873 bool "Cortex-A76: MRC read following MRRC read of specific Generic Timer in AArch32 might give incorrect result" default y + select ARM_ARCH_TIMER_OOL_WORKAROUND help This option adds work arounds for ARM Cortex-A76 erratum 1188873 -- cgit v1.2.3-59-g8ed1b From f05692919bdd44ad2a1a1fe85222dec16fec7e80 Mon Sep 17 00:00:00 2001 From: Julien Thierry Date: Tue, 28 Aug 2018 16:51:14 +0100 Subject: arm64: daifflags: Use irqflags functions for daifflags Some of the work done in daifflags save/restore is already provided by irqflags functions. Daifflags should always be a superset of irqflags (it handles irq status + status of other flags). Modifying behaviour of irqflags should alter the behaviour of daifflags. Use irqflags_save/restore functions for the corresponding daifflags operation. Reviewed-by: James Morse Signed-off-by: Julien Thierry Cc: Will Deacon Signed-off-by: Catalin Marinas --- arch/arm64/include/asm/daifflags.h | 15 +++++---------- 1 file changed, 5 insertions(+), 10 deletions(-) (limited to 'arch') diff --git a/arch/arm64/include/asm/daifflags.h b/arch/arm64/include/asm/daifflags.h index 22e4c83de5a5..8d91f2233135 100644 --- a/arch/arm64/include/asm/daifflags.h +++ b/arch/arm64/include/asm/daifflags.h @@ -36,11 +36,8 @@ static inline unsigned long local_daif_save(void) { unsigned long flags; - asm volatile( - "mrs %0, daif // local_daif_save\n" - : "=r" (flags) - : - : "memory"); + flags = arch_local_save_flags(); + local_daif_mask(); return flags; @@ -60,11 +57,9 @@ static inline void local_daif_restore(unsigned long flags) { if (!arch_irqs_disabled_flags(flags)) trace_hardirqs_on(); - asm volatile( - "msr daif, %0 // local_daif_restore" - : - : "r" (flags) - : "memory"); + + arch_local_irq_restore(flags); + if (arch_irqs_disabled_flags(flags)) trace_hardirqs_off(); } -- cgit v1.2.3-59-g8ed1b From 9a0c032825e073e002ee80cf2577431d7296a7f7 Mon Sep 17 00:00:00 2001 From: Julien Thierry Date: Tue, 28 Aug 2018 16:51:15 +0100 Subject: arm64: Use daifflag_restore after bp_hardening For EL0 entries requiring bp_hardening, daif status is kept at DAIF_PROCCTX_NOIRQ until after hardening has been done. Then interrupts are enabled through local_irq_enable(). Before using local_irq_* functions, daifflags should be properly restored to a state where IRQs are enabled. Enable IRQs by restoring DAIF_PROCCTX state after bp hardening. Acked-by: James Morse Signed-off-by: Julien Thierry Cc: Will Deacon Cc: Mark Rutland Signed-off-by: Catalin Marinas --- arch/arm64/mm/fault.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) (limited to 'arch') diff --git a/arch/arm64/mm/fault.c b/arch/arm64/mm/fault.c index 1aa487a37a0a..d0e638ef3af6 100644 --- a/arch/arm64/mm/fault.c +++ b/arch/arm64/mm/fault.c @@ -37,6 +37,7 @@ #include #include #include +#include #include #include #include @@ -776,7 +777,7 @@ asmlinkage void __exception do_el0_ia_bp_hardening(unsigned long addr, if (addr > TASK_SIZE) arm64_apply_bp_hardening(); - local_irq_enable(); + local_daif_restore(DAIF_PROCCTX); do_mem_abort(addr, esr, regs); } @@ -790,7 +791,7 @@ asmlinkage void __exception do_sp_pc_abort(unsigned long addr, if (user_mode(regs)) { if (instruction_pointer(regs) > TASK_SIZE) arm64_apply_bp_hardening(); - local_irq_enable(); + local_daif_restore(DAIF_PROCCTX); } clear_siginfo(&info); -- cgit v1.2.3-59-g8ed1b From b0506a8bbb42a859f6d25b3ecc4b6da93bae8d5a Mon Sep 17 00:00:00 2001 From: Julien Thierry Date: Tue, 28 Aug 2018 16:51:17 +0100 Subject: arm64: xen: Use existing helper to check interrupt status The status of interrupts might depend on more than just pstate. Use interrupts_disabled() instead of raw_irqs_disabled_flags() to take the full context into account. Acked-by: Stefano Stabellini Signed-off-by: Julien Thierry Cc: Will Deacon Signed-off-by: Catalin Marinas --- arch/arm64/include/asm/xen/events.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch') diff --git a/arch/arm64/include/asm/xen/events.h b/arch/arm64/include/asm/xen/events.h index 4e22b7a8c038..2788e95d0ff0 100644 --- a/arch/arm64/include/asm/xen/events.h +++ b/arch/arm64/include/asm/xen/events.h @@ -14,7 +14,7 @@ enum ipi_vector { static inline int xen_irqs_disabled(struct pt_regs *regs) { - return raw_irqs_disabled_flags((unsigned long) regs->pstate); + return !interrupts_enabled(regs); } #define xchg_xen_ulong(ptr, val) xchg((ptr), (val)) -- cgit v1.2.3-59-g8ed1b From 0293c8ba807c8611ea5503f9511029dd9082025a Mon Sep 17 00:00:00 2001 From: Kyrylo Tkachov Date: Thu, 4 Oct 2018 17:06:46 +0100 Subject: arm64: Fix typo in a comment in arch/arm64/mm/kasan_init.c "bellow" -> "below" The recommendation from kegel.com/kerspell is to only fix the howlers. "Bellow" is a synonym of "howl" so this should be appropriate. Signed-off-by: Kyrylo Tkachov Signed-off-by: Catalin Marinas --- arch/arm64/mm/kasan_init.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch') diff --git a/arch/arm64/mm/kasan_init.c b/arch/arm64/mm/kasan_init.c index 12145874c02b..fccb1a6f8c6f 100644 --- a/arch/arm64/mm/kasan_init.c +++ b/arch/arm64/mm/kasan_init.c @@ -192,7 +192,7 @@ void __init kasan_init(void) /* * We are going to perform proper setup of shadow memory. - * At first we should unmap early shadow (clear_pgds() call bellow). + * At first we should unmap early shadow (clear_pgds() call below). * However, instrumented code couldn't execute without shadow memory. * tmp_pg_dir used to keep early shadow mapped until full shadow * setup will be finished. -- cgit v1.2.3-59-g8ed1b From e9ed821be48600ea3ec7f7b76e478c769729f83c Mon Sep 17 00:00:00 2001 From: James Morse Date: Fri, 5 Oct 2018 14:49:16 +0100 Subject: arm64: mm: Use #ifdef for the __PAGETABLE_P?D_FOLDED defines __is_defined(__PAGETABLE_P?D_FOLDED) doesn't quite work as intended as these symbols are internal to asm-generic and aren't defined in the way kconfig expects. This makes them always evaluate to false. Switch to #ifdef. Acked-by: Mark Rutland Signed-off-by: James Morse Signed-off-by: Catalin Marinas --- arch/arm64/include/asm/pgtable.h | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) (limited to 'arch') diff --git a/arch/arm64/include/asm/pgtable.h b/arch/arm64/include/asm/pgtable.h index b58f764babf8..50b1ef8584c0 100644 --- a/arch/arm64/include/asm/pgtable.h +++ b/arch/arm64/include/asm/pgtable.h @@ -445,10 +445,12 @@ static inline bool in_swapper_pgdir(void *addr) static inline void set_pmd(pmd_t *pmdp, pmd_t pmd) { - if (__is_defined(__PAGETABLE_PMD_FOLDED) && in_swapper_pgdir(pmdp)) { +#ifdef __PAGETABLE_PMD_FOLDED + if (in_swapper_pgdir(pmdp)) { set_swapper_pgd((pgd_t *)pmdp, __pgd(pmd_val(pmd))); return; } +#endif /* __PAGETABLE_PMD_FOLDED */ WRITE_ONCE(*pmdp, pmd); @@ -503,10 +505,12 @@ static inline phys_addr_t pmd_page_paddr(pmd_t pmd) static inline void set_pud(pud_t *pudp, pud_t pud) { - if (__is_defined(__PAGETABLE_PUD_FOLDED) && in_swapper_pgdir(pudp)) { +#ifdef __PAGETABLE_PUD_FOLDED + if (in_swapper_pgdir(pudp)) { set_swapper_pgd((pgd_t *)pudp, __pgd(pud_val(pud))); return; } +#endif /* __PAGETABLE_PUD_FOLDED */ WRITE_ONCE(*pudp, pud); -- cgit v1.2.3-59-g8ed1b From 742fafa50b62cb9f379ba14a13443f52afdc0c5d Mon Sep 17 00:00:00 2001 From: Shaokun Zhang Date: Sat, 6 Oct 2018 16:49:04 +0800 Subject: arm64: mm: Drop the unused cpu parameter Cpu parameter is never used in flush_context, remove it. Acked-by: Will Deacon Signed-off-by: Shaokun Zhang Signed-off-by: Catalin Marinas --- arch/arm64/mm/context.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) (limited to 'arch') diff --git a/arch/arm64/mm/context.c b/arch/arm64/mm/context.c index a65af49e12e7..1f0ea2facf24 100644 --- a/arch/arm64/mm/context.c +++ b/arch/arm64/mm/context.c @@ -88,7 +88,7 @@ void verify_cpu_asid_bits(void) } } -static void flush_context(unsigned int cpu) +static void flush_context(void) { int i; u64 asid; @@ -142,7 +142,7 @@ static bool check_update_reserved_asid(u64 asid, u64 newasid) return hit; } -static u64 new_context(struct mm_struct *mm, unsigned int cpu) +static u64 new_context(struct mm_struct *mm) { static u32 cur_idx = 1; u64 asid = atomic64_read(&mm->context.id); @@ -180,7 +180,7 @@ static u64 new_context(struct mm_struct *mm, unsigned int cpu) /* We're out of ASIDs, so increment the global generation count */ generation = atomic64_add_return_relaxed(ASID_FIRST_VERSION, &asid_generation); - flush_context(cpu); + flush_context(); /* We have more ASIDs than CPUs, so this will always succeed */ asid = find_next_zero_bit(asid_map, NUM_USER_ASIDS, 1); @@ -226,7 +226,7 @@ void check_and_switch_context(struct mm_struct *mm, unsigned int cpu) /* Check that our ASID belongs to the current generation. */ asid = atomic64_read(&mm->context.id); if ((asid ^ atomic64_read(&asid_generation)) >> asid_bits) { - asid = new_context(mm, cpu); + asid = new_context(mm); atomic64_set(&mm->context.id, asid); } -- cgit v1.2.3-59-g8ed1b From 3b82a6ea23277f56c4f005872bb73c8ce29779d7 Mon Sep 17 00:00:00 2001 From: James Morse Date: Wed, 10 Oct 2018 16:55:44 +0100 Subject: Revert "arm64: uaccess: implement unsafe accessors" This reverts commit a1f33941f7e103bcf471eaf8461b212223c642d6. The unsafe accessors allow the PAN enable/disable calls to be made once for a group of accesses. Adding these means we can now have sequences that look like this: | user_access_begin(); | unsafe_put_user(static-value, x, err); | unsafe_put_user(helper-that-sleeps(), x, err); | user_access_end(); Calling schedule() without taking an exception doesn't switch the PSTATE or TTBRs. We can switch out of a uaccess-enabled region, and run other code with uaccess enabled for a different thread. We can also switch from uaccess-disabled code back into this region, meaning the unsafe_put_user()s will fault. For software-PAN, threads that do this will get stuck as handle_mm_fault() will determine the page has already been mapped in, but we fault again as the page tables aren't loaded. To solve this we need code in __switch_to() that save/restores the PAN state. Acked-by: Julien Thierry Acked-by: Mark Rutland Signed-off-by: James Morse Signed-off-by: Catalin Marinas --- arch/arm64/include/asm/uaccess.h | 61 ++++++++++------------------------------ 1 file changed, 15 insertions(+), 46 deletions(-) (limited to 'arch') diff --git a/arch/arm64/include/asm/uaccess.h b/arch/arm64/include/asm/uaccess.h index 8ac6e34922e7..07c34087bd5e 100644 --- a/arch/arm64/include/asm/uaccess.h +++ b/arch/arm64/include/asm/uaccess.h @@ -276,9 +276,11 @@ static inline void __user *__uaccess_mask_ptr(const void __user *ptr) : "+r" (err), "=&r" (x) \ : "r" (addr), "i" (-EFAULT)) -#define __get_user_err_unsafe(x, ptr, err) \ +#define __get_user_err(x, ptr, err) \ do { \ unsigned long __gu_val; \ + __chk_user_ptr(ptr); \ + uaccess_enable_not_uao(); \ switch (sizeof(*(ptr))) { \ case 1: \ __get_user_asm("ldrb", "ldtrb", "%w", __gu_val, (ptr), \ @@ -299,24 +301,17 @@ do { \ default: \ BUILD_BUG(); \ } \ - (x) = (__force __typeof__(*(ptr)))__gu_val; \ -} while (0) - -#define __get_user_err_check(x, ptr, err) \ -do { \ - __chk_user_ptr(ptr); \ - uaccess_enable_not_uao(); \ - __get_user_err_unsafe((x), (ptr), (err)); \ uaccess_disable_not_uao(); \ + (x) = (__force __typeof__(*(ptr)))__gu_val; \ } while (0) -#define __get_user_err(x, ptr, err, accessor) \ +#define __get_user_check(x, ptr, err) \ ({ \ __typeof__(*(ptr)) __user *__p = (ptr); \ might_fault(); \ if (access_ok(VERIFY_READ, __p, sizeof(*__p))) { \ __p = uaccess_mask_ptr(__p); \ - accessor((x), __p, (err)); \ + __get_user_err((x), __p, (err)); \ } else { \ (x) = 0; (err) = -EFAULT; \ } \ @@ -324,14 +319,14 @@ do { \ #define __get_user_error(x, ptr, err) \ ({ \ - __get_user_err((x), (ptr), (err), __get_user_err_check); \ + __get_user_check((x), (ptr), (err)); \ (void)0; \ }) #define __get_user(x, ptr) \ ({ \ int __gu_err = 0; \ - __get_user_err((x), (ptr), __gu_err, __get_user_err_check); \ + __get_user_check((x), (ptr), __gu_err); \ __gu_err; \ }) @@ -351,9 +346,11 @@ do { \ : "+r" (err) \ : "r" (x), "r" (addr), "i" (-EFAULT)) -#define __put_user_err_unsafe(x, ptr, err) \ +#define __put_user_err(x, ptr, err) \ do { \ __typeof__(*(ptr)) __pu_val = (x); \ + __chk_user_ptr(ptr); \ + uaccess_enable_not_uao(); \ switch (sizeof(*(ptr))) { \ case 1: \ __put_user_asm("strb", "sttrb", "%w", __pu_val, (ptr), \ @@ -374,24 +371,16 @@ do { \ default: \ BUILD_BUG(); \ } \ -} while (0) - - -#define __put_user_err_check(x, ptr, err) \ -do { \ - __chk_user_ptr(ptr); \ - uaccess_enable_not_uao(); \ - __put_user_err_unsafe((x), (ptr), (err)); \ uaccess_disable_not_uao(); \ } while (0) -#define __put_user_err(x, ptr, err, accessor) \ +#define __put_user_check(x, ptr, err) \ ({ \ __typeof__(*(ptr)) __user *__p = (ptr); \ might_fault(); \ if (access_ok(VERIFY_WRITE, __p, sizeof(*__p))) { \ __p = uaccess_mask_ptr(__p); \ - accessor((x), __p, (err)); \ + __put_user_err((x), __p, (err)); \ } else { \ (err) = -EFAULT; \ } \ @@ -399,39 +388,19 @@ do { \ #define __put_user_error(x, ptr, err) \ ({ \ - __put_user_err((x), (ptr), (err), __put_user_err_check); \ + __put_user_check((x), (ptr), (err)); \ (void)0; \ }) #define __put_user(x, ptr) \ ({ \ int __pu_err = 0; \ - __put_user_err((x), (ptr), __pu_err, __put_user_err_check); \ + __put_user_check((x), (ptr), __pu_err); \ __pu_err; \ }) #define put_user __put_user - -#define user_access_begin() uaccess_enable_not_uao() -#define user_access_end() uaccess_disable_not_uao() - -#define unsafe_get_user(x, ptr, err) \ -do { \ - int __gu_err = 0; \ - __get_user_err((x), (ptr), __gu_err, __get_user_err_unsafe); \ - if (__gu_err != 0) \ - goto err; \ -} while (0) - -#define unsafe_put_user(x, ptr, err) \ -do { \ - int __pu_err = 0; \ - __put_user_err((x), (ptr), __pu_err, __put_user_err_unsafe); \ - if (__pu_err != 0) \ - goto err; \ -} while (0) - extern unsigned long __must_check __arch_copy_from_user(void *to, const void __user *from, unsigned long n); #define raw_copy_from_user(to, from, n) \ ({ \ -- cgit v1.2.3-59-g8ed1b From 26a6f87ef596e612ab79e456155e195f2fa9b891 Mon Sep 17 00:00:00 2001 From: James Morse Date: Wed, 10 Oct 2018 15:43:22 +0100 Subject: arm64: mm: Use __pa_symbol() for set_swapper_pgd() commit 2330b7ca78350efcb ("arm64/mm: use fixmap to modify swapper_pg_dir") modifies the swapper_pg_dir via the fixmap as the kernel page tables have been moved to a read-only part of the kernel mapping. Using __pa() to setup the fixmap causes CONFIG_DEBUG_VIRTUAL to fire, as this function is used on the kernel-image swapper address. The in_swapper_pgdir() test before each call of this function means set_swapper_pgd() will only ever be called when pgdp points somewhere in the kernel-image mapping of swapper_pd_dir. Use __pa_symbol(). Reported-by: Geert Uytterhoeven Acked-by: Will Deacon Cc: Jun Yao Tested-by: Geert Uytterhoeven Signed-off-by: James Morse Signed-off-by: Catalin Marinas --- arch/arm64/mm/mmu.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch') diff --git a/arch/arm64/mm/mmu.c b/arch/arm64/mm/mmu.c index 6f0e2edcc114..6deb836a102a 100644 --- a/arch/arm64/mm/mmu.c +++ b/arch/arm64/mm/mmu.c @@ -74,7 +74,7 @@ void set_swapper_pgd(pgd_t *pgdp, pgd_t pgd) pgd_t *fixmap_pgdp; spin_lock(&swapper_pgdir_lock); - fixmap_pgdp = pgd_set_fixmap(__pa(pgdp)); + fixmap_pgdp = pgd_set_fixmap(__pa_symbol(pgdp)); WRITE_ONCE(*fixmap_pgdp, pgd); /* * We need dsb(ishst) here to ensure the page-table-walker sees -- cgit v1.2.3-59-g8ed1b From 8ab66cbe63aeaf9e5970fb4aaef1c660fca59321 Mon Sep 17 00:00:00 2001 From: Suzuki K Poulose Date: Tue, 9 Oct 2018 14:47:05 +0100 Subject: arm64: cpufeature: ctr: Fix cpu capability check for late CPUs The matches() routine for a capability must honor the "scope" passed to it and return the proper results. i.e, when passed with SCOPE_LOCAL_CPU, it should check the status of the capability on the current CPU. This is used by verify_local_cpu_capabilities() on a late secondary CPU to make sure that it's compliant with the established system features. However, ARM64_HAS_CACHE_{IDC/DIC} always checks the system wide registers and this could mean that a late secondary CPU could return "true" (since the CPU hasn't updated the system wide registers yet) and thus lead the system in an inconsistent state, where the system assumes it has IDC/DIC feature, while the new CPU doesn't. Fixes: commit 6ae4b6e0578886eb36 ("arm64: Add support for new control bits CTR_EL0.DIC and CTR_EL0.IDC") Cc: Philip Elcan Cc: Shanker Donthineni Cc: Mark Rutland Cc: Will Deacon Signed-off-by: Suzuki K Poulose Signed-off-by: Catalin Marinas --- arch/arm64/kernel/cpufeature.c | 22 ++++++++++++++++++---- 1 file changed, 18 insertions(+), 4 deletions(-) (limited to 'arch') diff --git a/arch/arm64/kernel/cpufeature.c b/arch/arm64/kernel/cpufeature.c index 00e7c313f088..ba16bb7762ca 100644 --- a/arch/arm64/kernel/cpufeature.c +++ b/arch/arm64/kernel/cpufeature.c @@ -854,15 +854,29 @@ static bool has_no_fpsimd(const struct arm64_cpu_capabilities *entry, int __unus } static bool has_cache_idc(const struct arm64_cpu_capabilities *entry, - int __unused) + int scope) { - return read_sanitised_ftr_reg(SYS_CTR_EL0) & BIT(CTR_IDC_SHIFT); + u64 ctr; + + if (scope == SCOPE_SYSTEM) + ctr = arm64_ftr_reg_ctrel0.sys_val; + else + ctr = read_cpuid_cachetype(); + + return ctr & BIT(CTR_IDC_SHIFT); } static bool has_cache_dic(const struct arm64_cpu_capabilities *entry, - int __unused) + int scope) { - return read_sanitised_ftr_reg(SYS_CTR_EL0) & BIT(CTR_DIC_SHIFT); + u64 ctr; + + if (scope == SCOPE_SYSTEM) + ctr = arm64_ftr_reg_ctrel0.sys_val; + else + ctr = read_cpuid_cachetype(); + + return ctr & BIT(CTR_DIC_SHIFT); } static bool __maybe_unused -- cgit v1.2.3-59-g8ed1b From 1602df02f33f61fe0de1bbfeba0d1c97c14bff19 Mon Sep 17 00:00:00 2001 From: Suzuki K Poulose Date: Tue, 9 Oct 2018 14:47:06 +0100 Subject: arm64: cpufeature: Fix handling of CTR_EL0.IDC field CTR_EL0.IDC reports the data cache clean requirements for instruction to data coherence. However, if the field is 0, we need to check the CLIDR_EL1 fields to detect the status of the feature. Currently we don't do this and generate a warning with tainting the kernel, when there is a mismatch in the field among the CPUs. Also the userspace doesn't have a reliable way to check the CLIDR_EL1 register to check the status. This patch fixes the problem by checking the CLIDR_EL1 fields, when (CTR_EL0.IDC == 0) and updates the kernel's copy of the CTR_EL0 for the CPU with the actual status of the feature. This would allow the sanity check infrastructure to do the proper checking of the fields and also allow the CTR_EL0 emulation code to supply the real status of the feature. Now, if a CPU has raw CTR_EL0.IDC == 0 and effective IDC == 1 (with overall system wide IDC == 1), we need to expose the real value to the user. So, we trap CTR_EL0 access on the CPU which reports incorrect CTR_EL0.IDC. Fixes: commit 6ae4b6e057888 ("arm64: Add support for new control bits CTR_EL0.DIC and CTR_EL0.IDC") Cc: Shanker Donthineni Cc: Philip Elcan Cc: Will Deacon Cc: Mark Rutland Signed-off-by: Suzuki K Poulose Signed-off-by: Catalin Marinas --- arch/arm64/include/asm/cache.h | 40 ++++++++++++++++++++++++++++++++++++++++ arch/arm64/kernel/cpu_errata.c | 27 ++++++++++++++++++++++++--- arch/arm64/kernel/cpufeature.c | 15 ++++++++++++++- arch/arm64/kernel/cpuinfo.c | 10 +++++++++- 4 files changed, 87 insertions(+), 5 deletions(-) (limited to 'arch') diff --git a/arch/arm64/include/asm/cache.h b/arch/arm64/include/asm/cache.h index 5ee5bca8c24b..13dd42c3ad4e 100644 --- a/arch/arm64/include/asm/cache.h +++ b/arch/arm64/include/asm/cache.h @@ -40,6 +40,15 @@ #define L1_CACHE_SHIFT (6) #define L1_CACHE_BYTES (1 << L1_CACHE_SHIFT) + +#define CLIDR_LOUU_SHIFT 27 +#define CLIDR_LOC_SHIFT 24 +#define CLIDR_LOUIS_SHIFT 21 + +#define CLIDR_LOUU(clidr) (((clidr) >> CLIDR_LOUU_SHIFT) & 0x7) +#define CLIDR_LOC(clidr) (((clidr) >> CLIDR_LOC_SHIFT) & 0x7) +#define CLIDR_LOUIS(clidr) (((clidr) >> CLIDR_LOUIS_SHIFT) & 0x7) + /* * Memory returned by kmalloc() may be used for DMA, so we must make * sure that all such allocations are cache aligned. Otherwise, @@ -84,6 +93,37 @@ static inline int cache_line_size(void) return cwg ? 4 << cwg : ARCH_DMA_MINALIGN; } +/* + * Read the effective value of CTR_EL0. + * + * According to ARM ARM for ARMv8-A (ARM DDI 0487C.a), + * section D10.2.33 "CTR_EL0, Cache Type Register" : + * + * CTR_EL0.IDC reports the data cache clean requirements for + * instruction to data coherence. + * + * 0 - dcache clean to PoU is required unless : + * (CLIDR_EL1.LoC == 0) || (CLIDR_EL1.LoUIS == 0 && CLIDR_EL1.LoUU == 0) + * 1 - dcache clean to PoU is not required for i-to-d coherence. + * + * This routine provides the CTR_EL0 with the IDC field updated to the + * effective state. + */ +static inline u32 __attribute_const__ read_cpuid_effective_cachetype(void) +{ + u32 ctr = read_cpuid_cachetype(); + + if (!(ctr & BIT(CTR_IDC_SHIFT))) { + u64 clidr = read_sysreg(clidr_el1); + + if (CLIDR_LOC(clidr) == 0 || + (CLIDR_LOUIS(clidr) == 0 && CLIDR_LOUU(clidr) == 0)) + ctr |= BIT(CTR_IDC_SHIFT); + } + + return ctr; +} + #endif /* __ASSEMBLY__ */ #endif diff --git a/arch/arm64/kernel/cpu_errata.c b/arch/arm64/kernel/cpu_errata.c index cde948991d68..3deb01c6ed49 100644 --- a/arch/arm64/kernel/cpu_errata.c +++ b/arch/arm64/kernel/cpu_errata.c @@ -68,11 +68,32 @@ static bool has_mismatched_cache_type(const struct arm64_cpu_capabilities *entry, int scope) { - u64 mask = arm64_ftr_reg_ctrel0.strict_mask;; + u64 mask = arm64_ftr_reg_ctrel0.strict_mask; + u64 sys = arm64_ftr_reg_ctrel0.sys_val & mask; + u64 ctr_raw, ctr_real; WARN_ON(scope != SCOPE_LOCAL_CPU || preemptible()); - return (read_cpuid_cachetype() & mask) != - (arm64_ftr_reg_ctrel0.sys_val & mask); + + /* + * We want to make sure that all the CPUs in the system expose + * a consistent CTR_EL0 to make sure that applications behaves + * correctly with migration. + * + * If a CPU has CTR_EL0.IDC but does not advertise it via CTR_EL0 : + * + * 1) It is safe if the system doesn't support IDC, as CPU anyway + * reports IDC = 0, consistent with the rest. + * + * 2) If the system has IDC, it is still safe as we trap CTR_EL0 + * access on this CPU via the ARM64_HAS_CACHE_IDC capability. + * + * So, we need to make sure either the raw CTR_EL0 or the effective + * CTR_EL0 matches the system's copy to allow a secondary CPU to boot. + */ + ctr_raw = read_cpuid_cachetype() & mask; + ctr_real = read_cpuid_effective_cachetype() & mask; + + return (ctr_real != sys) && (ctr_raw != sys); } static void diff --git a/arch/arm64/kernel/cpufeature.c b/arch/arm64/kernel/cpufeature.c index ba16bb7762ca..af50064dea51 100644 --- a/arch/arm64/kernel/cpufeature.c +++ b/arch/arm64/kernel/cpufeature.c @@ -861,11 +861,23 @@ static bool has_cache_idc(const struct arm64_cpu_capabilities *entry, if (scope == SCOPE_SYSTEM) ctr = arm64_ftr_reg_ctrel0.sys_val; else - ctr = read_cpuid_cachetype(); + ctr = read_cpuid_effective_cachetype(); return ctr & BIT(CTR_IDC_SHIFT); } +static void cpu_emulate_effective_ctr(const struct arm64_cpu_capabilities *__unused) +{ + /* + * If the CPU exposes raw CTR_EL0.IDC = 0, while effectively + * CTR_EL0.IDC = 1 (from CLIDR values), we need to trap accesses + * to the CTR_EL0 on this CPU and emulate it with the real/safe + * value. + */ + if (!(read_cpuid_cachetype() & BIT(CTR_IDC_SHIFT))) + sysreg_clear_set(sctlr_el1, SCTLR_EL1_UCT, 0); +} + static bool has_cache_dic(const struct arm64_cpu_capabilities *entry, int scope) { @@ -1282,6 +1294,7 @@ static const struct arm64_cpu_capabilities arm64_features[] = { .capability = ARM64_HAS_CACHE_IDC, .type = ARM64_CPUCAP_SYSTEM_FEATURE, .matches = has_cache_idc, + .cpu_enable = cpu_emulate_effective_ctr, }, { .desc = "Instruction cache invalidation not required for I/D coherence", diff --git a/arch/arm64/kernel/cpuinfo.c b/arch/arm64/kernel/cpuinfo.c index dce971f2c167..bcc2831399cb 100644 --- a/arch/arm64/kernel/cpuinfo.c +++ b/arch/arm64/kernel/cpuinfo.c @@ -325,7 +325,15 @@ static void cpuinfo_detect_icache_policy(struct cpuinfo_arm64 *info) static void __cpuinfo_store_cpu(struct cpuinfo_arm64 *info) { info->reg_cntfrq = arch_timer_get_cntfrq(); - info->reg_ctr = read_cpuid_cachetype(); + /* + * Use the effective value of the CTR_EL0 than the raw value + * exposed by the CPU. CTR_E0.IDC field value must be interpreted + * with the CLIDR_EL1 fields to avoid triggering false warnings + * when there is a mismatch across the CPUs. Keep track of the + * effective value of the CTR_EL0 in our internal records for + * acurate sanity check and feature enablement. + */ + info->reg_ctr = read_cpuid_effective_cachetype(); info->reg_dczid = read_cpuid(DCZID_EL0); info->reg_midr = read_cpuid_id(); info->reg_revidr = read_cpuid(REVIDR_EL1); -- cgit v1.2.3-59-g8ed1b From 4afe8e79da920bdf6698b01bc668fffc6758f37b Mon Sep 17 00:00:00 2001 From: Suzuki K Poulose Date: Tue, 9 Oct 2018 14:47:07 +0100 Subject: arm64: cpufeature: Trap CTR_EL0 access only where it is necessary When there is a mismatch in the CTR_EL0 field, we trap access to CTR from EL0 on all CPUs to expose the safe value. However, we could skip trapping on a CPU which matches the safe value. Cc: Mark Rutland Cc: Will Deacon Signed-off-by: Suzuki K Poulose Signed-off-by: Catalin Marinas --- arch/arm64/kernel/cpu_errata.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) (limited to 'arch') diff --git a/arch/arm64/kernel/cpu_errata.c b/arch/arm64/kernel/cpu_errata.c index 3deb01c6ed49..9af8df96ec49 100644 --- a/arch/arm64/kernel/cpu_errata.c +++ b/arch/arm64/kernel/cpu_errata.c @@ -99,7 +99,12 @@ has_mismatched_cache_type(const struct arm64_cpu_capabilities *entry, static void cpu_enable_trap_ctr_access(const struct arm64_cpu_capabilities *__unused) { - sysreg_clear_set(sctlr_el1, SCTLR_EL1_UCT, 0); + u64 mask = arm64_ftr_reg_ctrel0.strict_mask; + + /* Trap CTR_EL0 access on this CPU, only if it has a mismatch */ + if ((read_cpuid_cachetype() & mask) != + (arm64_ftr_reg_ctrel0.sys_val & mask)) + sysreg_clear_set(sctlr_el1, SCTLR_EL1_UCT, 0); } atomic_t arm64_el2_vector_last_slot = ATOMIC_INIT(-1); -- cgit v1.2.3-59-g8ed1b From 4debef5510071032c6d5dace31ca1cc42a120073 Mon Sep 17 00:00:00 2001 From: James Morse Date: Fri, 21 Sep 2018 21:49:19 +0100 Subject: arm64: KVM: Guests can skip __install_bp_hardening_cb()s HYP work enable_smccc_arch_workaround_1() passes NULL as the hyp_vecs start and end if the HVC conduit is in use, and ARM_SMCCC_ARCH_WORKAROUND_1 is detected. If the guest kernel happened to be built with KVM_INDIRECT_VECTORS, we go on to allocate a slot, memcpy() the empty workaround in and do the appropriate cache maintenance. This works as we always tell memcpy() the range is 0, so it never accesses the NULL src pointer, but we still do the cache maintenance. If hyp_vecs_start is NULL we know we're a guest, just update the fn like the !KVM_INDIRECT_VECTORS version. Reviewed-by: Julien Thierry Acked-by: Marc Zyngier Signed-off-by: James Morse Signed-off-by: Catalin Marinas --- arch/arm64/kernel/cpu_errata.c | 9 +++++++++ 1 file changed, 9 insertions(+) (limited to 'arch') diff --git a/arch/arm64/kernel/cpu_errata.c b/arch/arm64/kernel/cpu_errata.c index 9af8df96ec49..a509e35132d2 100644 --- a/arch/arm64/kernel/cpu_errata.c +++ b/arch/arm64/kernel/cpu_errata.c @@ -138,6 +138,15 @@ static void __install_bp_hardening_cb(bp_hardening_cb_t fn, static DEFINE_SPINLOCK(bp_lock); int cpu, slot = -1; + /* + * enable_smccc_arch_workaround_1() passes NULL for the hyp_vecs + * start/end if we're a guest. Skip the hyp-vectors work. + */ + if (!hyp_vecs_start) { + __this_cpu_write(bp_hardening_data.fn, fn); + return; + } + spin_lock(&bp_lock); for_each_possible_cpu(cpu) { if (per_cpu(bp_hardening_data.fn, cpu) == fn) { -- cgit v1.2.3-59-g8ed1b