From fa63da2ab046b885a7f70291aafc4e8ce015429b Mon Sep 17 00:00:00 2001 From: Nathan Chancellor Date: Tue, 11 Jun 2019 10:19:32 -0700 Subject: arm64: Don't unconditionally add -Wno-psabi to KBUILD_CFLAGS MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This is a GCC only option, which warns about ABI changes within GCC, so unconditionally adding it breaks Clang with tons of: warning: unknown warning option '-Wno-psabi' [-Wunknown-warning-option] and link time failures: ld.lld: error: undefined symbol: __efistub___stack_chk_guard >>> referenced by arm-stub.c:73 (/home/nathan/cbl/linux/drivers/firmware/efi/libstub/arm-stub.c:73) >>> arm-stub.stub.o:(__efistub_install_memreserve_table) in archive ./drivers/firmware/efi/libstub/lib.a These failures come from the lack of -fno-stack-protector, which is added via cc-option in drivers/firmware/efi/libstub/Makefile. When an unknown flag is added to KBUILD_CFLAGS, clang will noisily warn that it is ignoring the option like above, unlike gcc, who will just error. $ echo "int main() { return 0; }" > tmp.c $ clang -Wno-psabi tmp.c; echo $? warning: unknown warning option '-Wno-psabi' [-Wunknown-warning-option] 1 warning generated. 0 $ gcc -Wsometimes-uninitialized tmp.c; echo $? gcc: error: unrecognized command line option ‘-Wsometimes-uninitialized’; did you mean ‘-Wmaybe-uninitialized’? 1 For cc-option to work properly with clang and behave like gcc, -Werror is needed, which was done in commit c3f0d0bc5b01 ("kbuild, LLVMLinux: Add -Werror to cc-option to support clang"). $ clang -Werror -Wno-psabi tmp.c; echo $? error: unknown warning option '-Wno-psabi' [-Werror,-Wunknown-warning-option] 1 As a consequence of this, when an unknown flag is unconditionally added to KBUILD_CFLAGS, it will cause cc-option to always fail and those flags will never get added: $ clang -Werror -Wno-psabi -fno-stack-protector tmp.c; echo $? error: unknown warning option '-Wno-psabi' [-Werror,-Wunknown-warning-option] 1 This can be seen when compiling the whole kernel as some warnings that are normally disabled (see below) show up. The full list of flags missing from drivers/firmware/efi/libstub are the following (gathered from diffing .arm64-stub.o.cmd): -fno-delete-null-pointer-checks -Wno-address-of-packed-member -Wframe-larger-than=2048 -Wno-unused-const-variable -fno-strict-overflow -fno-merge-all-constants -fno-stack-check -Werror=date-time -Werror=incompatible-pointer-types -ffreestanding -fno-stack-protector Use cc-disable-warning so that it gets disabled for GCC and does nothing for Clang. Fixes: ebcc5928c5d9 ("arm64: Silence gcc warnings about arch ABI drift") Link: https://github.com/ClangBuiltLinux/linux/issues/511 Reported-by: Qian Cai Acked-by: Dave Martin Reviewed-by: Nick Desaulniers Signed-off-by: Nathan Chancellor Signed-off-by: Will Deacon --- arch/arm64/Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch') diff --git a/arch/arm64/Makefile b/arch/arm64/Makefile index 8fbd583b18e1..e9d2e578cbe6 100644 --- a/arch/arm64/Makefile +++ b/arch/arm64/Makefile @@ -51,7 +51,7 @@ endif KBUILD_CFLAGS += -mgeneral-regs-only $(lseinstr) $(brokengasinst) KBUILD_CFLAGS += -fno-asynchronous-unwind-tables -KBUILD_CFLAGS += -Wno-psabi +KBUILD_CFLAGS += $(call cc-disable-warning, psabi) KBUILD_AFLAGS += $(lseinstr) $(brokengasinst) KBUILD_CFLAGS += $(call cc-option,-mabi=lp64) -- cgit v1.2.3-59-g8ed1b From 01d57485fcdb9f9101a10a18e32d5f8b023cab86 Mon Sep 17 00:00:00 2001 From: Will Deacon Date: Tue, 11 Jun 2019 12:47:34 +0100 Subject: arm64: tlbflush: Ensure start/end of address range are aligned to stride Since commit 3d65b6bbc01e ("arm64: tlbi: Set MAX_TLBI_OPS to PTRS_PER_PTE"), we resort to per-ASID invalidation when attempting to perform more than PTRS_PER_PTE invalidation instructions in a single call to __flush_tlb_range(). Whilst this is beneficial, the mmu_gather code does not ensure that the end address of the range is rounded-up to the stride when freeing intermediate page tables in pXX_free_tlb(), which defeats our range checking. Align the bounds passed into __flush_tlb_range(). Cc: Catalin Marinas Cc: Peter Zijlstra Reported-by: Hanjun Guo Tested-by: Hanjun Guo Reviewed-by: Hanjun Guo Signed-off-by: Will Deacon --- arch/arm64/include/asm/tlbflush.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'arch') diff --git a/arch/arm64/include/asm/tlbflush.h b/arch/arm64/include/asm/tlbflush.h index 3a1870228946..dff8f9ea5754 100644 --- a/arch/arm64/include/asm/tlbflush.h +++ b/arch/arm64/include/asm/tlbflush.h @@ -195,6 +195,9 @@ static inline void __flush_tlb_range(struct vm_area_struct *vma, unsigned long asid = ASID(vma->vm_mm); unsigned long addr; + start = round_down(start, stride); + end = round_up(end, stride); + if ((end - start) >= (MAX_TLBI_OPS * stride)) { flush_tlb_mm(vma->vm_mm); return; -- cgit v1.2.3-59-g8ed1b From 41040cf7c5f0f26c368bc5d3016fed3a9ca6dba4 Mon Sep 17 00:00:00 2001 From: Dave Martin Date: Wed, 12 Jun 2019 17:00:32 +0100 Subject: arm64/sve: Fix missing SVE/FPSIMD endianness conversions MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The in-memory representation of SVE and FPSIMD registers is different: the FPSIMD V-registers are stored as single 128-bit host-endian values, whereas SVE registers are stored in an endianness-invariant byte order. This means that the two representations differ when running on a big-endian host. But we blindly copy data from one representation to another when converting between the two, resulting in the register contents being unintentionally byteswapped in certain situations. Currently this can be triggered by the first SVE instruction after a syscall, for example (though the potential trigger points may vary in future). So, fix the conversion functions fpsimd_to_sve(), sve_to_fpsimd() and sve_sync_from_fpsimd_zeropad() to swab where appropriate. There is no common swahl128() or swab128() that we could use here. Maybe it would be worth making this generic, but for now add a simple local hack. Since the byte order differences are exposed in ABI, also clarify the documentation. Cc: Alex Bennée Cc: Peter Maydell Cc: Alan Hayward Cc: Julien Grall Fixes: bc0ee4760364 ("arm64/sve: Core task context handling") Fixes: 8cd969d28fd2 ("arm64/sve: Signal handling support") Fixes: 43d4da2c45b2 ("arm64/sve: ptrace and ELF coredump support") Signed-off-by: Dave Martin [will: Fix typos in comments and docs spotted by Julien] Signed-off-by: Will Deacon --- Documentation/arm64/sve.txt | 16 ++++++++++++ arch/arm64/include/uapi/asm/kvm.h | 7 ++++++ arch/arm64/include/uapi/asm/ptrace.h | 4 +++ arch/arm64/include/uapi/asm/sigcontext.h | 14 +++++++++++ arch/arm64/kernel/fpsimd.c | 42 +++++++++++++++++++++++++------- 5 files changed, 74 insertions(+), 9 deletions(-) (limited to 'arch') diff --git a/Documentation/arm64/sve.txt b/Documentation/arm64/sve.txt index 9940e924a47e..5689fc9a976a 100644 --- a/Documentation/arm64/sve.txt +++ b/Documentation/arm64/sve.txt @@ -56,6 +56,18 @@ model features for SVE is included in Appendix A. is to connect to a target process first and then attempt a ptrace(PTRACE_GETREGSET, pid, NT_ARM_SVE, &iov). +* Whenever SVE scalable register values (Zn, Pn, FFR) are exchanged in memory + between userspace and the kernel, the register value is encoded in memory in + an endianness-invariant layout, with bits [(8 * i + 7) : (8 * i)] encoded at + byte offset i from the start of the memory representation. This affects for + example the signal frame (struct sve_context) and ptrace interface + (struct user_sve_header) and associated data. + + Beware that on big-endian systems this results in a different byte order than + for the FPSIMD V-registers, which are stored as single host-endian 128-bit + values, with bits [(127 - 8 * i) : (120 - 8 * i)] of the register encoded at + byte offset i. (struct fpsimd_context, struct user_fpsimd_state). + 2. Vector length terminology ----------------------------- @@ -124,6 +136,10 @@ the SVE instruction set architecture. size and layout. Macros SVE_SIG_* are defined [1] to facilitate access to the members. +* Each scalable register (Zn, Pn, FFR) is stored in an endianness-invariant + layout, with bits [(8 * i + 7) : (8 * i)] stored at byte offset i from the + start of the register's representation in memory. + * If the SVE context is too big to fit in sigcontext.__reserved[], then extra space is allocated on the stack, an extra_context record is written in __reserved[] referencing this space. sve_context is then written in the diff --git a/arch/arm64/include/uapi/asm/kvm.h b/arch/arm64/include/uapi/asm/kvm.h index 7b7ac0f6cec9..d819a3e8b552 100644 --- a/arch/arm64/include/uapi/asm/kvm.h +++ b/arch/arm64/include/uapi/asm/kvm.h @@ -260,6 +260,13 @@ struct kvm_vcpu_events { KVM_REG_SIZE_U256 | \ ((i) & (KVM_ARM64_SVE_MAX_SLICES - 1))) +/* + * Register values for KVM_REG_ARM64_SVE_ZREG(), KVM_REG_ARM64_SVE_PREG() and + * KVM_REG_ARM64_SVE_FFR() are represented in memory in an endianness- + * invariant layout which differs from the layout used for the FPSIMD + * V-registers on big-endian systems: see sigcontext.h for more explanation. + */ + #define KVM_ARM64_SVE_VQ_MIN __SVE_VQ_MIN #define KVM_ARM64_SVE_VQ_MAX __SVE_VQ_MAX diff --git a/arch/arm64/include/uapi/asm/ptrace.h b/arch/arm64/include/uapi/asm/ptrace.h index d78623acb649..97c53203150b 100644 --- a/arch/arm64/include/uapi/asm/ptrace.h +++ b/arch/arm64/include/uapi/asm/ptrace.h @@ -176,6 +176,10 @@ struct user_sve_header { * FPCR uint32_t FPCR * * Additional data might be appended in the future. + * + * The Z-, P- and FFR registers are represented in memory in an endianness- + * invariant layout which differs from the layout used for the FPSIMD + * V-registers on big-endian systems: see sigcontext.h for more explanation. */ #define SVE_PT_SVE_ZREG_SIZE(vq) __SVE_ZREG_SIZE(vq) diff --git a/arch/arm64/include/uapi/asm/sigcontext.h b/arch/arm64/include/uapi/asm/sigcontext.h index 5f3c0cec5af9..3d448a0bb225 100644 --- a/arch/arm64/include/uapi/asm/sigcontext.h +++ b/arch/arm64/include/uapi/asm/sigcontext.h @@ -77,6 +77,15 @@ struct fpsimd_context { __uint128_t vregs[32]; }; +/* + * Note: similarly to all other integer fields, each V-register is stored in an + * endianness-dependent format, with the byte at offset i from the start of the + * in-memory representation of the register value containing + * + * bits [(7 + 8 * i) : (8 * i)] of the register on little-endian hosts; or + * bits [(127 - 8 * i) : (120 - 8 * i)] on big-endian hosts. + */ + /* ESR_EL1 context */ #define ESR_MAGIC 0x45535201 @@ -204,6 +213,11 @@ struct sve_context { * FFR uint16_t[vq] first-fault status register * * Additional data might be appended in the future. + * + * Unlike vregs[] in fpsimd_context, each SVE scalable register (Z-, P- or FFR) + * is encoded in memory in an endianness-invariant format, with the byte at + * offset i from the start of the in-memory representation containing bits + * [(7 + 8 * i) : (8 * i)] of the register value. */ #define SVE_SIG_ZREG_SIZE(vq) __SVE_ZREG_SIZE(vq) diff --git a/arch/arm64/kernel/fpsimd.c b/arch/arm64/kernel/fpsimd.c index a38bf74bcca8..bb42cd04baec 100644 --- a/arch/arm64/kernel/fpsimd.c +++ b/arch/arm64/kernel/fpsimd.c @@ -39,6 +39,7 @@ #include #include #include +#include #include #include @@ -352,6 +353,23 @@ static int __init sve_sysctl_init(void) { return 0; } #define ZREG(sve_state, vq, n) ((char *)(sve_state) + \ (SVE_SIG_ZREG_OFFSET(vq, n) - SVE_SIG_REGS_OFFSET)) +#ifdef CONFIG_CPU_BIG_ENDIAN +static __uint128_t arm64_cpu_to_le128(__uint128_t x) +{ + u64 a = swab64(x); + u64 b = swab64(x >> 64); + + return ((__uint128_t)a << 64) | b; +} +#else +static __uint128_t arm64_cpu_to_le128(__uint128_t x) +{ + return x; +} +#endif + +#define arm64_le128_to_cpu(x) arm64_cpu_to_le128(x) + /* * Transfer the FPSIMD state in task->thread.uw.fpsimd_state to * task->thread.sve_state. @@ -369,14 +387,16 @@ static void fpsimd_to_sve(struct task_struct *task) void *sst = task->thread.sve_state; struct user_fpsimd_state const *fst = &task->thread.uw.fpsimd_state; unsigned int i; + __uint128_t *p; if (!system_supports_sve()) return; vq = sve_vq_from_vl(task->thread.sve_vl); - for (i = 0; i < 32; ++i) - memcpy(ZREG(sst, vq, i), &fst->vregs[i], - sizeof(fst->vregs[i])); + for (i = 0; i < 32; ++i) { + p = (__uint128_t *)ZREG(sst, vq, i); + *p = arm64_cpu_to_le128(fst->vregs[i]); + } } /* @@ -395,14 +415,16 @@ static void sve_to_fpsimd(struct task_struct *task) void const *sst = task->thread.sve_state; struct user_fpsimd_state *fst = &task->thread.uw.fpsimd_state; unsigned int i; + __uint128_t const *p; if (!system_supports_sve()) return; vq = sve_vq_from_vl(task->thread.sve_vl); - for (i = 0; i < 32; ++i) - memcpy(&fst->vregs[i], ZREG(sst, vq, i), - sizeof(fst->vregs[i])); + for (i = 0; i < 32; ++i) { + p = (__uint128_t const *)ZREG(sst, vq, i); + fst->vregs[i] = arm64_le128_to_cpu(*p); + } } #ifdef CONFIG_ARM64_SVE @@ -491,6 +513,7 @@ void sve_sync_from_fpsimd_zeropad(struct task_struct *task) void *sst = task->thread.sve_state; struct user_fpsimd_state const *fst = &task->thread.uw.fpsimd_state; unsigned int i; + __uint128_t *p; if (!test_tsk_thread_flag(task, TIF_SVE)) return; @@ -499,9 +522,10 @@ void sve_sync_from_fpsimd_zeropad(struct task_struct *task) memset(sst, 0, SVE_SIG_REGS_SIZE(vq)); - for (i = 0; i < 32; ++i) - memcpy(ZREG(sst, vq, i), &fst->vregs[i], - sizeof(fst->vregs[i])); + for (i = 0; i < 32; ++i) { + p = (__uint128_t *)ZREG(sst, vq, i); + *p = arm64_cpu_to_le128(fst->vregs[i]); + } } int sve_set_vector_length(struct task_struct *task, -- cgit v1.2.3-59-g8ed1b