diff options
Diffstat (limited to 'arch/arm64/include')
162 files changed, 10649 insertions, 4886 deletions
diff --git a/arch/arm64/include/asm/Kbuild b/arch/arm64/include/asm/Kbuild index 5c8ee5a541d2..d2ff8f6c3231 100644 --- a/arch/arm64/include/asm/Kbuild +++ b/arch/arm64/include/asm/Kbuild @@ -1,10 +1,20 @@ # SPDX-License-Identifier: GPL-2.0 +syscall-y += syscall_table_32.h +syscall-y += syscall_table_64.h + +# arm32 syscall table used by lib/compat_audit.c: +syscall-y += unistd_32.h +# same constants with prefixes, used by vdso, seccomp and sigreturn: +syscall-y += unistd_compat_32.h + generic-y += early_ioremap.h +generic-y += fprobe.h generic-y += mcs_spinlock.h +generic-y += mmzone.h generic-y += qrwlock.h generic-y += qspinlock.h generic-y += parport.h generic-y += user.h -generated-y += cpucaps.h +generated-y += cpucap-defs.h generated-y += sysreg-defs.h diff --git a/arch/arm64/include/asm/acpi.h b/arch/arm64/include/asm/acpi.h index bd68e1b7f29f..c07a58b96329 100644 --- a/arch/arm64/include/asm/acpi.h +++ b/arch/arm64/include/asm/acpi.h @@ -9,6 +9,7 @@ #ifndef _ASM_ACPI_H #define _ASM_ACPI_H +#include <linux/cpuidle.h> #include <linux/efi.h> #include <linux/memblock.h> #include <linux/psci.h> @@ -42,6 +43,27 @@ #define ACPI_MADT_GICC_SPE (offsetof(struct acpi_madt_generic_interrupt, \ spe_interrupt) + sizeof(u16)) +#define ACPI_MADT_GICC_TRBE (offsetof(struct acpi_madt_generic_interrupt, \ + trbe_interrupt) + sizeof(u16)) +/* + * Arm® Functional Fixed Hardware Specification Version 1.2. + * Table 2: Arm Architecture context loss flags + */ +#define CPUIDLE_CORE_CTXT BIT(0) /* Core context Lost */ + +static inline unsigned int arch_get_idle_state_flags(u32 arch_flags) +{ + if (arch_flags & CPUIDLE_CORE_CTXT) + return CPUIDLE_FLAG_TIMER_STOP; + + return 0; +} +#define arch_get_idle_state_flags arch_get_idle_state_flags + +#define CPUIDLE_TRACE_CTXT BIT(1) /* Trace context loss */ +#define CPUIDLE_GICR_CTXT BIT(2) /* GICR */ +#define CPUIDLE_GICD_CTXT BIT(3) /* GICD */ + /* Basic configuration for ACPI */ #ifdef CONFIG_ACPI pgprot_t __acpi_get_mem_attribute(phys_addr_t addr); @@ -97,6 +119,18 @@ static inline u32 get_acpi_id_for_cpu(unsigned int cpu) return acpi_cpu_get_madt_gicc(cpu)->uid; } +static inline int get_cpu_for_acpi_id(u32 uid) +{ + int cpu; + + for (cpu = 0; cpu < nr_cpu_ids; cpu++) + if (acpi_cpu_get_madt_gicc(cpu) && + uid == get_acpi_id_for_cpu(cpu)) + return cpu; + + return -EINVAL; +} + static inline void arch_fix_phys_package_id(int num, u32 slot) { } void __init acpi_init_cpus(void); int apei_claim_sea(struct pt_regs *regs); @@ -116,7 +150,7 @@ acpi_set_mailbox_entry(int cpu, struct acpi_madt_generic_interrupt *processor) {} #endif -static inline const char *acpi_get_enable_method(int cpu) +static __always_inline const char *acpi_get_enable_method(int cpu) { if (acpi_psci_present()) return "psci"; diff --git a/arch/arm64/include/asm/alternative-macros.h b/arch/arm64/include/asm/alternative-macros.h index 7e157ab6cd50..c8c77f9e36d6 100644 --- a/arch/arm64/include/asm/alternative-macros.h +++ b/arch/arm64/include/asm/alternative-macros.h @@ -2,26 +2,38 @@ #ifndef __ASM_ALTERNATIVE_MACROS_H #define __ASM_ALTERNATIVE_MACROS_H +#include <linux/const.h> +#include <vdso/bits.h> + #include <asm/cpucaps.h> #include <asm/insn-def.h> -#define ARM64_CB_PATCH ARM64_NCAPS +/* + * Binutils 2.27.0 can't handle a 'UL' suffix on constants, so for the assembly + * macros below we must use we must use `(1 << ARM64_CB_SHIFT)`. + */ +#define ARM64_CB_SHIFT 15 +#define ARM64_CB_BIT BIT(ARM64_CB_SHIFT) + +#if ARM64_NCAPS >= ARM64_CB_BIT +#error "cpucaps have overflown ARM64_CB_BIT" +#endif #ifndef __ASSEMBLY__ #include <linux/stringify.h> -#define ALTINSTR_ENTRY(feature) \ +#define ALTINSTR_ENTRY(cpucap) \ " .word 661b - .\n" /* label */ \ " .word 663f - .\n" /* new instruction */ \ - " .hword " __stringify(feature) "\n" /* feature bit */ \ + " .hword " __stringify(cpucap) "\n" /* cpucap */ \ " .byte 662b-661b\n" /* source len */ \ " .byte 664f-663f\n" /* replacement len */ -#define ALTINSTR_ENTRY_CB(feature, cb) \ +#define ALTINSTR_ENTRY_CB(cpucap, cb) \ " .word 661b - .\n" /* label */ \ - " .word " __stringify(cb) "- .\n" /* callback */ \ - " .hword " __stringify(feature) "\n" /* feature bit */ \ + " .word " __stringify(cb) "- .\n" /* callback */ \ + " .hword " __stringify(cpucap) "\n" /* cpucap */ \ " .byte 662b-661b\n" /* source len */ \ " .byte 664f-663f\n" /* replacement len */ @@ -41,13 +53,13 @@ * * Alternatives with callbacks do not generate replacement instructions. */ -#define __ALTERNATIVE_CFG(oldinstr, newinstr, feature, cfg_enabled) \ +#define __ALTERNATIVE_CFG(oldinstr, newinstr, cpucap, cfg_enabled) \ ".if "__stringify(cfg_enabled)" == 1\n" \ "661:\n\t" \ oldinstr "\n" \ "662:\n" \ ".pushsection .altinstructions,\"a\"\n" \ - ALTINSTR_ENTRY(feature) \ + ALTINSTR_ENTRY(cpucap) \ ".popsection\n" \ ".subsection 1\n" \ "663:\n\t" \ @@ -58,31 +70,31 @@ ".previous\n" \ ".endif\n" -#define __ALTERNATIVE_CFG_CB(oldinstr, feature, cfg_enabled, cb) \ +#define __ALTERNATIVE_CFG_CB(oldinstr, cpucap, cfg_enabled, cb) \ ".if "__stringify(cfg_enabled)" == 1\n" \ "661:\n\t" \ oldinstr "\n" \ "662:\n" \ ".pushsection .altinstructions,\"a\"\n" \ - ALTINSTR_ENTRY_CB(feature, cb) \ + ALTINSTR_ENTRY_CB(cpucap, cb) \ ".popsection\n" \ "663:\n\t" \ "664:\n\t" \ ".endif\n" -#define _ALTERNATIVE_CFG(oldinstr, newinstr, feature, cfg, ...) \ - __ALTERNATIVE_CFG(oldinstr, newinstr, feature, IS_ENABLED(cfg)) +#define _ALTERNATIVE_CFG(oldinstr, newinstr, cpucap, cfg, ...) \ + __ALTERNATIVE_CFG(oldinstr, newinstr, cpucap, IS_ENABLED(cfg)) -#define ALTERNATIVE_CB(oldinstr, cb) \ - __ALTERNATIVE_CFG_CB(oldinstr, ARM64_CB_PATCH, 1, cb) +#define ALTERNATIVE_CB(oldinstr, cpucap, cb) \ + __ALTERNATIVE_CFG_CB(oldinstr, (1 << ARM64_CB_SHIFT) | (cpucap), 1, cb) #else #include <asm/assembler.h> -.macro altinstruction_entry orig_offset alt_offset feature orig_len alt_len +.macro altinstruction_entry orig_offset alt_offset cpucap orig_len alt_len .word \orig_offset - . .word \alt_offset - . - .hword \feature + .hword (\cpucap) .byte \orig_len .byte \alt_len .endm @@ -141,10 +153,10 @@ 661: .endm -.macro alternative_cb cb +.macro alternative_cb cap, cb .set .Lasm_alt_mode, 0 .pushsection .altinstructions, "a" - altinstruction_entry 661f, \cb, ARM64_CB_PATCH, 662f-661f, 0 + altinstruction_entry 661f, \cb, (1 << ARM64_CB_SHIFT) | \cap, 662f-661f, 0 .popsection 661: .endm @@ -198,13 +210,59 @@ alternative_endif #endif /* __ASSEMBLY__ */ /* - * Usage: asm(ALTERNATIVE(oldinstr, newinstr, feature)); + * Usage: asm(ALTERNATIVE(oldinstr, newinstr, cpucap)); * - * Usage: asm(ALTERNATIVE(oldinstr, newinstr, feature, CONFIG_FOO)); + * Usage: asm(ALTERNATIVE(oldinstr, newinstr, cpucap, CONFIG_FOO)); * N.B. If CONFIG_FOO is specified, but not selected, the whole block * will be omitted, including oldinstr. */ #define ALTERNATIVE(oldinstr, newinstr, ...) \ _ALTERNATIVE_CFG(oldinstr, newinstr, __VA_ARGS__, 1) +#ifndef __ASSEMBLY__ + +#include <linux/types.h> + +static __always_inline bool +alternative_has_cap_likely(const unsigned long cpucap) +{ + if (!cpucap_is_possible(cpucap)) + return false; + + asm goto( +#ifdef BUILD_VDSO + ALTERNATIVE("b %l[l_no]", "nop", %[cpucap]) +#else + ALTERNATIVE_CB("b %l[l_no]", %[cpucap], alt_cb_patch_nops) +#endif + : + : [cpucap] "i" (cpucap) + : + : l_no); + + return true; +l_no: + return false; +} + +static __always_inline bool +alternative_has_cap_unlikely(const unsigned long cpucap) +{ + if (!cpucap_is_possible(cpucap)) + return false; + + asm goto( + ALTERNATIVE("nop", "b %l[l_yes]", %[cpucap]) + : + : [cpucap] "i" (cpucap) + : + : l_yes); + + return false; +l_yes: + return true; +} + +#endif /* __ASSEMBLY__ */ + #endif /* __ASM_ALTERNATIVE_MACROS_H */ diff --git a/arch/arm64/include/asm/alternative.h b/arch/arm64/include/asm/alternative.h index a38b92e11811..00d97b8a757f 100644 --- a/arch/arm64/include/asm/alternative.h +++ b/arch/arm64/include/asm/alternative.h @@ -13,7 +13,7 @@ struct alt_instr { s32 orig_offset; /* offset to original instruction */ s32 alt_offset; /* offset to replacement instruction */ - u16 cpufeature; /* cpufeature bit set for replacement */ + u16 cpucap; /* cpucap bit set for replacement */ u8 orig_len; /* size of original instruction(s) */ u8 alt_len; /* size of new instruction(s), <= orig_len */ }; @@ -23,7 +23,7 @@ typedef void (*alternative_cb_t)(struct alt_instr *alt, void __init apply_boot_alternatives(void); void __init apply_alternatives_all(void); -bool alternative_is_applied(u16 cpufeature); +bool alternative_is_applied(u16 cpucap); #ifdef CONFIG_MODULES void apply_alternatives_module(void *start, size_t length); @@ -31,5 +31,8 @@ void apply_alternatives_module(void *start, size_t length); static inline void apply_alternatives_module(void *start, size_t length) { } #endif +void alt_cb_patch_nops(struct alt_instr *alt, __le32 *origptr, + __le32 *updptr, int nr_inst); + #endif /* __ASSEMBLY__ */ #endif /* __ASM_ALTERNATIVE_H */ diff --git a/arch/arm64/include/asm/apple_m1_pmu.h b/arch/arm64/include/asm/apple_m1_pmu.h index 99483b19b99f..02e05d05851f 100644 --- a/arch/arm64/include/asm/apple_m1_pmu.h +++ b/arch/arm64/include/asm/apple_m1_pmu.h @@ -37,6 +37,7 @@ #define PMCR0_PMI_ENABLE_8_9 GENMASK(45, 44) #define SYS_IMP_APL_PMCR1_EL1 sys_reg(3, 1, 15, 1, 0) +#define SYS_IMP_APL_PMCR1_EL12 sys_reg(3, 1, 15, 7, 2) #define PMCR1_COUNT_A64_EL0_0_7 GENMASK(15, 8) #define PMCR1_COUNT_A64_EL1_0_7 GENMASK(23, 16) #define PMCR1_COUNT_A64_EL0_8_9 GENMASK(41, 40) diff --git a/arch/arm64/include/asm/arch_gicv3.h b/arch/arm64/include/asm/arch_gicv3.h index 48d4473e8eee..9e96f024b2f1 100644 --- a/arch/arm64/include/asm/arch_gicv3.h +++ b/arch/arm64/include/asm/arch_gicv3.h @@ -79,6 +79,14 @@ static inline u64 gic_read_iar_cavium_thunderx(void) return 0x3ff; } +static u64 __maybe_unused gic_read_iar(void) +{ + if (alternative_has_cap_unlikely(ARM64_WORKAROUND_CAVIUM_23154)) + return gic_read_iar_cavium_thunderx(); + else + return gic_read_iar_common(); +} + static inline void gic_write_ctlr(u32 val) { write_sysreg_s(val, SYS_ICC_CTLR_EL1); @@ -167,21 +175,6 @@ static inline bool gic_prio_masking_enabled(void) static inline void gic_pmr_mask_irqs(void) { - BUILD_BUG_ON(GICD_INT_DEF_PRI < (__GIC_PRIO_IRQOFF | - GIC_PRIO_PSR_I_SET)); - BUILD_BUG_ON(GICD_INT_DEF_PRI >= GIC_PRIO_IRQON); - /* - * Need to make sure IRQON allows IRQs when SCR_EL3.FIQ is cleared - * and non-secure PMR accesses are not subject to the shifts that - * are applied to IRQ priorities - */ - BUILD_BUG_ON((0x80 | (GICD_INT_DEF_PRI >> 1)) >= GIC_PRIO_IRQON); - /* - * Same situation as above, but now we make sure that we can mask - * regular interrupts. - */ - BUILD_BUG_ON((0x80 | (GICD_INT_DEF_PRI >> 1)) < (__GIC_PRIO_IRQOFF_NS | - GIC_PRIO_PSR_I_SET)); gic_write_pmr(GIC_PRIO_IRQOFF); } @@ -190,5 +183,10 @@ static inline void gic_arch_enable_irqs(void) asm volatile ("msr daifclr, #3" : : : "memory"); } +static inline bool gic_has_relaxed_pmr_sync(void) +{ + return cpus_have_cap(ARM64_HAS_GIC_PRIO_RELAXED_SYNC); +} + #endif /* __ASSEMBLY__ */ #endif /* __ASM_ARCH_GICV3_H */ diff --git a/arch/arm64/include/asm/arch_timer.h b/arch/arm64/include/asm/arch_timer.h index af1fafbe7e1d..f5794d50f51d 100644 --- a/arch/arm64/include/asm/arch_timer.h +++ b/arch/arm64/include/asm/arch_timer.h @@ -15,7 +15,7 @@ #include <linux/bug.h> #include <linux/init.h> #include <linux/jump_label.h> -#include <linux/smp.h> +#include <linux/percpu.h> #include <linux/types.h> #include <clocksource/arm_arch_timer.h> @@ -88,13 +88,7 @@ static inline notrace u64 arch_timer_read_cntvct_el0(void) #define arch_timer_reg_read_stable(reg) \ ({ \ - u64 _val; \ - \ - preempt_disable_notrace(); \ - _val = erratum_handler(read_ ## reg)(); \ - preempt_enable_notrace(); \ - \ - _val; \ + erratum_handler(read_ ## reg)(); \ }) /* diff --git a/arch/arm64/include/asm/archrandom.h b/arch/arm64/include/asm/archrandom.h index 109e2a4454be..8babfbe31f95 100644 --- a/arch/arm64/include/asm/archrandom.h +++ b/arch/arm64/include/asm/archrandom.h @@ -5,6 +5,7 @@ #include <linux/arm-smccc.h> #include <linux/bug.h> #include <linux/kernel.h> +#include <linux/irqflags.h> #include <asm/cpufeature.h> #define ARM_SMCCC_TRNG_MIN_VERSION 0x10000UL @@ -58,6 +59,13 @@ static inline bool __arm64_rndrrs(unsigned long *v) return ok; } +static __always_inline bool __cpu_has_rng(void) +{ + if (unlikely(!system_capabilities_finalized() && !preemptible())) + return this_cpu_has_cap(ARM64_HAS_RNG); + return alternative_has_cap_unlikely(ARM64_HAS_RNG); +} + static inline size_t __must_check arch_get_random_longs(unsigned long *v, size_t max_longs) { /* @@ -66,7 +74,7 @@ static inline size_t __must_check arch_get_random_longs(unsigned long *v, size_t * cpufeature code and with potential scheduling between CPUs * with and without the feature. */ - if (max_longs && cpus_have_const_cap(ARM64_HAS_RNG) && __arm64_rndr(v)) + if (max_longs && __cpu_has_rng() && __arm64_rndr(v)) return 1; return 0; } @@ -108,7 +116,7 @@ static inline size_t __must_check arch_get_random_seed_longs(unsigned long *v, s * reseeded after each invocation. This is not a 100% fit but good * enough to implement this API if no other entropy source exists. */ - if (cpus_have_const_cap(ARM64_HAS_RNG) && __arm64_rndrrs(v)) + if (__cpu_has_rng() && __arm64_rndrrs(v)) return 1; return 0; @@ -121,40 +129,4 @@ static inline bool __init __early_cpu_has_rndr(void) return (ftr >> ID_AA64ISAR0_EL1_RNDR_SHIFT) & 0xf; } -static inline size_t __init __must_check -arch_get_random_seed_longs_early(unsigned long *v, size_t max_longs) -{ - WARN_ON(system_state != SYSTEM_BOOTING); - - if (!max_longs) - return 0; - - if (smccc_trng_available) { - struct arm_smccc_res res; - - max_longs = min_t(size_t, 3, max_longs); - arm_smccc_1_1_invoke(ARM_SMCCC_TRNG_RND64, max_longs * 64, &res); - if ((int)res.a0 >= 0) { - switch (max_longs) { - case 3: - *v++ = res.a1; - fallthrough; - case 2: - *v++ = res.a2; - fallthrough; - case 1: - *v++ = res.a3; - break; - } - return max_longs; - } - } - - if (__early_cpu_has_rndr() && __arm64_rndr(v)) - return 1; - - return 0; -} -#define arch_get_random_seed_longs_early arch_get_random_seed_longs_early - #endif /* _ASM_ARCHRANDOM_H */ diff --git a/arch/arm64/include/asm/arm_pmuv3.h b/arch/arm64/include/asm/arm_pmuv3.h new file mode 100644 index 000000000000..8a777dec8d88 --- /dev/null +++ b/arch/arm64/include/asm/arm_pmuv3.h @@ -0,0 +1,191 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Copyright (C) 2012 ARM Ltd. + */ + +#ifndef __ASM_PMUV3_H +#define __ASM_PMUV3_H + +#include <asm/kvm_host.h> + +#include <asm/cpufeature.h> +#include <asm/sysreg.h> + +#define RETURN_READ_PMEVCNTRN(n) \ + return read_sysreg(pmevcntr##n##_el0) +static inline unsigned long read_pmevcntrn(int n) +{ + PMEVN_SWITCH(n, RETURN_READ_PMEVCNTRN); + return 0; +} + +#define WRITE_PMEVCNTRN(n) \ + write_sysreg(val, pmevcntr##n##_el0) +static inline void write_pmevcntrn(int n, unsigned long val) +{ + PMEVN_SWITCH(n, WRITE_PMEVCNTRN); +} + +#define WRITE_PMEVTYPERN(n) \ + write_sysreg(val, pmevtyper##n##_el0) +static inline void write_pmevtypern(int n, unsigned long val) +{ + PMEVN_SWITCH(n, WRITE_PMEVTYPERN); +} + +#define RETURN_READ_PMEVTYPERN(n) \ + return read_sysreg(pmevtyper##n##_el0) +static inline unsigned long read_pmevtypern(int n) +{ + PMEVN_SWITCH(n, RETURN_READ_PMEVTYPERN); + return 0; +} + +static inline unsigned long read_pmmir(void) +{ + return read_cpuid(PMMIR_EL1); +} + +static inline u32 read_pmuver(void) +{ + u64 dfr0 = read_sysreg(id_aa64dfr0_el1); + + return cpuid_feature_extract_unsigned_field(dfr0, + ID_AA64DFR0_EL1_PMUVer_SHIFT); +} + +static inline bool pmuv3_has_icntr(void) +{ + u64 dfr1 = read_sysreg(id_aa64dfr1_el1); + + return !!cpuid_feature_extract_unsigned_field(dfr1, + ID_AA64DFR1_EL1_PMICNTR_SHIFT); +} + +static inline void write_pmcr(u64 val) +{ + write_sysreg(val, pmcr_el0); +} + +static inline u64 read_pmcr(void) +{ + return read_sysreg(pmcr_el0); +} + +static inline void write_pmselr(u32 val) +{ + write_sysreg(val, pmselr_el0); +} + +static inline void write_pmccntr(u64 val) +{ + write_sysreg(val, pmccntr_el0); +} + +static inline u64 read_pmccntr(void) +{ + return read_sysreg(pmccntr_el0); +} + +static inline void write_pmicntr(u64 val) +{ + write_sysreg_s(val, SYS_PMICNTR_EL0); +} + +static inline u64 read_pmicntr(void) +{ + return read_sysreg_s(SYS_PMICNTR_EL0); +} + +static inline void write_pmcntenset(u64 val) +{ + write_sysreg(val, pmcntenset_el0); +} + +static inline void write_pmcntenclr(u64 val) +{ + write_sysreg(val, pmcntenclr_el0); +} + +static inline void write_pmintenset(u64 val) +{ + write_sysreg(val, pmintenset_el1); +} + +static inline void write_pmintenclr(u64 val) +{ + write_sysreg(val, pmintenclr_el1); +} + +static inline void write_pmccfiltr(u64 val) +{ + write_sysreg(val, pmccfiltr_el0); +} + +static inline u64 read_pmccfiltr(void) +{ + return read_sysreg(pmccfiltr_el0); +} + +static inline void write_pmicfiltr(u64 val) +{ + write_sysreg_s(val, SYS_PMICFILTR_EL0); +} + +static inline u64 read_pmicfiltr(void) +{ + return read_sysreg_s(SYS_PMICFILTR_EL0); +} + +static inline void write_pmovsclr(u64 val) +{ + write_sysreg(val, pmovsclr_el0); +} + +static inline u64 read_pmovsclr(void) +{ + return read_sysreg(pmovsclr_el0); +} + +static inline void write_pmuserenr(u32 val) +{ + write_sysreg(val, pmuserenr_el0); +} + +static inline void write_pmuacr(u64 val) +{ + write_sysreg_s(val, SYS_PMUACR_EL1); +} + +static inline u64 read_pmceid0(void) +{ + return read_sysreg(pmceid0_el0); +} + +static inline u64 read_pmceid1(void) +{ + return read_sysreg(pmceid1_el0); +} + +static inline bool pmuv3_implemented(int pmuver) +{ + return !(pmuver == ID_AA64DFR0_EL1_PMUVer_IMP_DEF || + pmuver == ID_AA64DFR0_EL1_PMUVer_NI); +} + +static inline bool is_pmuv3p4(int pmuver) +{ + return pmuver >= ID_AA64DFR0_EL1_PMUVer_V3P4; +} + +static inline bool is_pmuv3p5(int pmuver) +{ + return pmuver >= ID_AA64DFR0_EL1_PMUVer_V3P5; +} + +static inline bool is_pmuv3p9(int pmuver) +{ + return pmuver >= ID_AA64DFR0_EL1_PMUVer_V3P9; +} + +#endif diff --git a/arch/arm64/include/asm/asm-bug.h b/arch/arm64/include/asm/asm-bug.h index c762038ba400..a5f13801b784 100644 --- a/arch/arm64/include/asm/asm-bug.h +++ b/arch/arm64/include/asm/asm-bug.h @@ -21,15 +21,21 @@ #endif #ifdef CONFIG_GENERIC_BUG - -#define __BUG_ENTRY(flags) \ +#define __BUG_ENTRY_START \ .pushsection __bug_table,"aw"; \ .align 2; \ 14470: .long 14471f - .; \ -_BUGVERBOSE_LOCATION(__FILE__, __LINE__) \ - .short flags; \ + +#define __BUG_ENTRY_END \ + .align 2; \ .popsection; \ 14471: + +#define __BUG_ENTRY(flags) \ + __BUG_ENTRY_START \ +_BUGVERBOSE_LOCATION(__FILE__, __LINE__) \ + .short flags; \ + __BUG_ENTRY_END #else #define __BUG_ENTRY(flags) #endif @@ -40,4 +46,24 @@ _BUGVERBOSE_LOCATION(__FILE__, __LINE__) \ #define ASM_BUG() ASM_BUG_FLAGS(0) +#ifdef CONFIG_DEBUG_BUGVERBOSE +#define __BUG_LOCATION_STRING(file, line) \ + ".long " file "- .;" \ + ".short " line ";" +#else +#define __BUG_LOCATION_STRING(file, line) +#endif + +#define __BUG_ENTRY_STRING(file, line, flags) \ + __stringify(__BUG_ENTRY_START) \ + __BUG_LOCATION_STRING(file, line) \ + ".short " flags ";" \ + __stringify(__BUG_ENTRY_END) + +#define ARCH_WARN_ASM(file, line, flags, size) \ + __BUG_ENTRY_STRING(file, line, flags) \ + __stringify(brk BUG_BRK_IMM) + +#define ARCH_WARN_REACHABLE + #endif /* __ASM_ASM_BUG_H */ diff --git a/arch/arm64/include/asm/asm-extable.h b/arch/arm64/include/asm/asm-extable.h index 980d1dd8e1a3..292f2687a12e 100644 --- a/arch/arm64/include/asm/asm-extable.h +++ b/arch/arm64/include/asm/asm-extable.h @@ -9,7 +9,8 @@ #define EX_TYPE_BPF 1 #define EX_TYPE_UACCESS_ERR_ZERO 2 #define EX_TYPE_KACCESS_ERR_ZERO 3 -#define EX_TYPE_LOAD_UNALIGNED_ZEROPAD 4 +#define EX_TYPE_UACCESS_CPY 4 +#define EX_TYPE_LOAD_UNALIGNED_ZEROPAD 5 /* Data fields for EX_TYPE_UACCESS_ERR_ZERO */ #define EX_DATA_REG_ERR_SHIFT 0 @@ -23,6 +24,9 @@ #define EX_DATA_REG_ADDR_SHIFT 5 #define EX_DATA_REG_ADDR GENMASK(9, 5) +/* Data fields for EX_TYPE_UACCESS_CPY */ +#define EX_DATA_UACCESS_WRITE BIT(0) + #ifdef __ASSEMBLY__ #define __ASM_EXTABLE_RAW(insn, fixup, type, data) \ @@ -69,6 +73,10 @@ .endif .endm + .macro _asm_extable_uaccess_cpy, insn, fixup, uaccess_is_write + __ASM_EXTABLE_RAW(\insn, \fixup, EX_TYPE_UACCESS_CPY, \uaccess_is_write) + .endm + #else /* __ASSEMBLY__ */ #include <linux/stringify.h> @@ -112,6 +120,9 @@ #define _ASM_EXTABLE_KACCESS_ERR(insn, fixup, err) \ _ASM_EXTABLE_KACCESS_ERR_ZERO(insn, fixup, err, wzr) +#define _ASM_EXTABLE_KACCESS(insn, fixup) \ + _ASM_EXTABLE_KACCESS_ERR_ZERO(insn, fixup, wzr, wzr) + #define _ASM_EXTABLE_LOAD_UNALIGNED_ZEROPAD(insn, fixup, data, addr) \ __DEFINE_ASM_GPR_NUMS \ __ASM_EXTABLE_RAW(#insn, #fixup, \ diff --git a/arch/arm64/include/asm/asm-uaccess.h b/arch/arm64/include/asm/asm-uaccess.h index 75b211c98dea..9148f5a31968 100644 --- a/arch/arm64/include/asm/asm-uaccess.h +++ b/arch/arm64/include/asm/asm-uaccess.h @@ -18,7 +18,6 @@ bic \tmp1, \tmp1, #TTBR_ASID_MASK sub \tmp1, \tmp1, #RESERVED_SWAPPER_OFFSET // reserved_pg_dir msr ttbr0_el1, \tmp1 // set reserved TTBR0_EL1 - isb add \tmp1, \tmp1, #RESERVED_SWAPPER_OFFSET msr ttbr1_el1, \tmp1 // set reserved ASID isb @@ -31,7 +30,6 @@ extr \tmp2, \tmp2, \tmp1, #48 ror \tmp2, \tmp2, #16 msr ttbr1_el1, \tmp2 // set the active ASID - isb msr ttbr0_el1, \tmp1 // set the non-PAN TTBR0_EL1 isb .endm @@ -63,6 +61,10 @@ alternative_else_nop_endif 9999: x; \ _asm_extable_uaccess 9999b, l +#define USER_CPY(l, uaccess_is_write, x...) \ +9999: x; \ + _asm_extable_uaccess_cpy 9999b, l, uaccess_is_write + /* * Generate the assembly for LDTR/STTR with exception table entries. * This is complicated as there is no post-increment or pair versions of the diff --git a/arch/arm64/include/asm/assembler.h b/arch/arm64/include/asm/assembler.h index 5846145be523..23be85d93348 100644 --- a/arch/arm64/include/asm/assembler.h +++ b/arch/arm64/include/asm/assembler.h @@ -12,7 +12,7 @@ #ifndef __ASM_ASSEMBLER_H #define __ASM_ASSEMBLER_H -#include <asm-generic/export.h> +#include <linux/export.h> #include <asm/alternative.h> #include <asm/asm-bug.h> @@ -34,31 +34,18 @@ wx\n .req w\n .endr - .macro save_and_disable_daif, flags - mrs \flags, daif - msr daifset, #0xf - .endm - .macro disable_daif msr daifset, #0xf .endm - .macro enable_daif - msr daifclr, #0xf - .endm - - .macro restore_daif, flags:req - msr daif, \flags - .endm - - /* IRQ/FIQ are the lowest priority flags, unconditionally unmask the rest. */ - .macro enable_da - msr daifclr, #(8 | 4) - .endm - /* * Save/restore interrupts. */ + .macro save_and_disable_daif, flags + mrs \flags, daif + msr daifset, #0xf + .endm + .macro save_and_disable_irq, flags mrs \flags, daif msr daifset, #3 @@ -68,16 +55,12 @@ msr daif, \flags .endm - .macro enable_dbg - msr daifclr, #8 - .endm - .macro disable_step_tsk, flgs, tmp tbz \flgs, #TIF_SINGLESTEP, 9990f mrs \tmp, mdscr_el1 - bic \tmp, \tmp, #DBG_MDSCR_SS + bic \tmp, \tmp, #MDSCR_EL1_SS msr mdscr_el1, \tmp - isb // Synchronise with enable_dbg + isb // Take effect before a subsequent clear of DAIF.D 9990: .endm @@ -85,7 +68,7 @@ .macro enable_step_tsk, flgs, tmp tbz \flgs, #TIF_SINGLESTEP, 9990f mrs \tmp, mdscr_el1 - orr \tmp, \tmp, #DBG_MDSCR_SS + orr \tmp, \tmp, #MDSCR_EL1_SS msr mdscr_el1, \tmp 9990: .endm @@ -271,13 +254,6 @@ alternative_endif .endm /* - * vma_vm_mm - get mm pointer from vma pointer (vma->vm_mm) - */ - .macro vma_vm_mm, rd, rn - ldr \rd, [\rn, #VMA_VM_MM] - .endm - -/* * read_ctr - read CTR_EL0. If the system has mismatched register fields, * provide the system wide safe value from arm64_ftr_reg_ctrel0.sys_val */ @@ -293,7 +269,7 @@ alternative_endif alternative_if_not ARM64_KVM_PROTECTED_MODE ASM_BUG() alternative_else_nop_endif -alternative_cb kvm_compute_final_ctr_el0 +alternative_cb ARM64_ALWAYS_SYSTEM, kvm_compute_final_ctr_el0 movz \reg, #0 movk \reg, #0, lsl #16 movk \reg, #0, lsl #32 @@ -360,20 +336,6 @@ alternative_cb_end .endm /* - * idmap_get_t0sz - get the T0SZ value needed to cover the ID map - * - * Calculate the maximum allowed value for TCR_EL1.T0SZ so that the - * entire ID map region can be mapped. As T0SZ == (64 - #bits used), - * this number conveniently equals the number of leading zeroes in - * the physical address of _end. - */ - .macro idmap_get_t0sz, reg - adrp \reg, _end - orr \reg, \reg, #(1 << VA_BITS_MIN) - 1 - clz \reg, \reg - .endm - -/* * tcr_compute_pa_size - set TCR.(I)PS to the highest supported * ID_AA64MMFR0_EL1.PARange value * @@ -384,8 +346,13 @@ alternative_cb_end .macro tcr_compute_pa_size, tcr, pos, tmp0, tmp1 mrs \tmp0, ID_AA64MMFR0_EL1 // Narrow PARange to fit the PS field in TCR_ELx - ubfx \tmp0, \tmp0, #ID_AA64MMFR0_PARANGE_SHIFT, #3 - mov \tmp1, #ID_AA64MMFR0_PARANGE_MAX + ubfx \tmp0, \tmp0, #ID_AA64MMFR0_EL1_PARANGE_SHIFT, #3 + mov \tmp1, #ID_AA64MMFR0_EL1_PARANGE_MAX +#ifdef CONFIG_ARM64_LPA2 +alternative_if_not ARM64_HAS_VA52 + mov \tmp1, #ID_AA64MMFR0_EL1_PARANGE_48 +alternative_else_nop_endif +#endif cmp \tmp0, \tmp1 csel \tmp0, \tmp1, \tmp0, hi bfi \tcr, \tmp0, \pos, #3 @@ -512,9 +479,10 @@ alternative_endif */ .macro reset_pmuserenr_el0, tmpreg mrs \tmpreg, id_aa64dfr0_el1 - sbfx \tmpreg, \tmpreg, #ID_AA64DFR0_PMUVER_SHIFT, #4 - cmp \tmpreg, #1 // Skip if no PMU present - b.lt 9000f + ubfx \tmpreg, \tmpreg, #ID_AA64DFR0_EL1_PMUVer_SHIFT, #4 + cmp \tmpreg, #ID_AA64DFR0_EL1_PMUVer_NI + ccmp \tmpreg, #ID_AA64DFR0_EL1_PMUVer_IMP_DEF, #4, ne + b.eq 9000f // Skip if no PMU present or IMP_DEF msr pmuserenr_el0, xzr // Disable PMU access from EL0 9000: .endm @@ -524,7 +492,7 @@ alternative_endif */ .macro reset_amuserenr_el0, tmpreg mrs \tmpreg, id_aa64pfr0_el1 // Check ID_AA64PFR0_EL1 - ubfx \tmpreg, \tmpreg, #ID_AA64PFR0_AMU_SHIFT, #4 + ubfx \tmpreg, \tmpreg, #ID_AA64PFR0_EL1_AMU_SHIFT, #4 cbz \tmpreg, .Lskip_\@ // Skip if no AMU present msr_s SYS_AMUSERENR_EL0, xzr // Disable AMU access from EL0 .Lskip_\@: @@ -604,29 +572,27 @@ alternative_endif .endm /* - * Offset ttbr1 to allow for 48-bit kernel VAs set with 52-bit PTRS_PER_PGD. + * If the kernel is built for 52-bit virtual addressing but the hardware only + * supports 48 bits, we cannot program the pgdir address into TTBR1 directly, + * but we have to add an offset so that the TTBR1 address corresponds with the + * pgdir entry that covers the lowest 48-bit addressable VA. + * + * Note that this trick is only used for LVA/64k pages - LPA2/4k pages uses an + * additional paging level, and on LPA2/16k pages, we would end up with a root + * level table with only 2 entries, which is suboptimal in terms of TLB + * utilization, so there we fall back to 47 bits of translation if LPA2 is not + * supported. + * * orr is used as it can cover the immediate value (and is idempotent). - * In future this may be nop'ed out when dealing with 52-bit kernel VAs. * ttbr: Value of ttbr to set, modified. */ .macro offset_ttbr1, ttbr, tmp -#ifdef CONFIG_ARM64_VA_BITS_52 - mrs_s \tmp, SYS_ID_AA64MMFR2_EL1 - and \tmp, \tmp, #(0xf << ID_AA64MMFR2_LVA_SHIFT) - cbnz \tmp, .Lskipoffs_\@ - orr \ttbr, \ttbr, #TTBR1_BADDR_4852_OFFSET -.Lskipoffs_\@ : -#endif - .endm - -/* - * Perform the reverse of offset_ttbr1. - * bic is used as it can cover the immediate value and, in future, won't need - * to be nop'ed out when dealing with 52-bit kernel VAs. - */ - .macro restore_ttbr1, ttbr -#ifdef CONFIG_ARM64_VA_BITS_52 - bic \ttbr, \ttbr, #TTBR1_BADDR_4852_OFFSET +#if defined(CONFIG_ARM64_VA_BITS_52) && !defined(CONFIG_ARM64_LPA2) + mrs \tmp, tcr_el1 + and \tmp, \tmp, #TCR_T1SZ_MASK + cmp \tmp, #TCR_T1SZ(VA_BITS_MIN) + orr \tmp, \ttbr, #TTBR1_BADDR_4852_OFFSET + csel \ttbr, \tmp, \ttbr, eq #endif .endm @@ -648,27 +614,13 @@ alternative_endif .macro phys_to_pte, pte, phys #ifdef CONFIG_ARM64_PA_BITS_52 - /* - * We assume \phys is 64K aligned and this is guaranteed by only - * supporting this configuration with 64K pages. - */ - orr \pte, \phys, \phys, lsr #36 - and \pte, \pte, #PTE_ADDR_MASK + orr \pte, \phys, \phys, lsr #PTE_ADDR_HIGH_SHIFT + and \pte, \pte, #PHYS_TO_PTE_ADDR_MASK #else mov \pte, \phys #endif .endm - .macro pte_to_phys, phys, pte -#ifdef CONFIG_ARM64_PA_BITS_52 - ubfiz \phys, \pte, #(48 - 16 - 12), #16 - bfxil \phys, \pte, #16, #32 - lsl \phys, \phys, #16 -#else - and \phys, \pte, #PTE_ADDR_MASK -#endif - .endm - /* * tcr_clear_errata_bits - Clear TCR bits that trigger an errata on this CPU. */ @@ -787,32 +739,25 @@ alternative_endif .endm /* - * Check whether preempt/bh-disabled asm code should yield as soon as - * it is able. This is the case if we are currently running in task - * context, and either a softirq is pending, or the TIF_NEED_RESCHED - * flag is set and re-enabling preemption a single time would result in - * a preempt count of zero. (Note that the TIF_NEED_RESCHED flag is - * stored negated in the top word of the thread_info::preempt_count + * Check whether asm code should yield as soon as it is able. This is + * the case if we are currently running in task context, and the + * TIF_NEED_RESCHED flag is set. (Note that the TIF_NEED_RESCHED flag + * is stored negated in the top word of the thread_info::preempt_count * field) */ - .macro cond_yield, lbl:req, tmp:req, tmp2:req + .macro cond_yield, lbl:req, tmp:req, tmp2 +#ifdef CONFIG_PREEMPT_VOLUNTARY get_current_task \tmp ldr \tmp, [\tmp, #TSK_TI_PREEMPT] /* * If we are serving a softirq, there is no point in yielding: the * softirq will not be preempted no matter what we do, so we should - * run to completion as quickly as we can. + * run to completion as quickly as we can. The preempt_count field will + * have BIT(SOFTIRQ_SHIFT) set in this case, so the zero check will + * catch this case too. */ - tbnz \tmp, #SOFTIRQ_SHIFT, .Lnoyield_\@ -#ifdef CONFIG_PREEMPTION - sub \tmp, \tmp, #PREEMPT_DISABLE_OFFSET cbz \tmp, \lbl #endif - adr_l \tmp, irq_stat + IRQ_CPUSTAT_SOFTIRQ_PENDING - get_this_cpu_offset \tmp2 - ldr w\tmp, [\tmp, \tmp2] - cbnz w\tmp, \lbl // yield on pending softirq in task context -.Lnoyield_\@: .endm /* @@ -877,7 +822,7 @@ alternative_endif .macro __mitigate_spectre_bhb_loop tmp #ifdef CONFIG_MITIGATE_SPECTRE_BRANCH_HISTORY -alternative_cb spectre_bhb_patch_loop_iter +alternative_cb ARM64_ALWAYS_SYSTEM, spectre_bhb_patch_loop_iter mov \tmp, #32 // Patched to correct the immediate alternative_cb_end .Lspectre_bhb_loop\@: @@ -890,7 +835,7 @@ alternative_cb_end .macro mitigate_spectre_bhb_loop tmp #ifdef CONFIG_MITIGATE_SPECTRE_BRANCH_HISTORY -alternative_cb spectre_bhb_patch_loop_mitigation_enable +alternative_cb ARM64_ALWAYS_SYSTEM, spectre_bhb_patch_loop_mitigation_enable b .L_spectre_bhb_loop_done\@ // Patched to NOP alternative_cb_end __mitigate_spectre_bhb_loop \tmp @@ -904,7 +849,7 @@ alternative_cb_end stp x0, x1, [sp, #-16]! stp x2, x3, [sp, #-16]! mov w0, #ARM_SMCCC_ARCH_WORKAROUND_3 -alternative_cb smccc_patch_fw_mitigation_conduit +alternative_cb ARM64_ALWAYS_SYSTEM, smccc_patch_fw_mitigation_conduit nop // Patched to SMC/HVC #0 alternative_cb_end ldp x2, x3, [sp], #16 @@ -914,7 +859,7 @@ alternative_cb_end .macro mitigate_spectre_bhb_clear_insn #ifdef CONFIG_MITIGATE_SPECTRE_BRANCH_HISTORY -alternative_cb spectre_bhb_patch_clearbhb +alternative_cb ARM64_ALWAYS_SYSTEM, spectre_bhb_patch_clearbhb /* Patched to NOP when not supported */ clearbhb isb diff --git a/arch/arm64/include/asm/atomic.h b/arch/arm64/include/asm/atomic.h index c9979273d389..400d279e0f8d 100644 --- a/arch/arm64/include/asm/atomic.h +++ b/arch/arm64/include/asm/atomic.h @@ -142,24 +142,6 @@ static __always_inline long arch_atomic64_dec_if_positive(atomic64_t *v) #define arch_atomic_fetch_xor_release arch_atomic_fetch_xor_release #define arch_atomic_fetch_xor arch_atomic_fetch_xor -#define arch_atomic_xchg_relaxed(v, new) \ - arch_xchg_relaxed(&((v)->counter), (new)) -#define arch_atomic_xchg_acquire(v, new) \ - arch_xchg_acquire(&((v)->counter), (new)) -#define arch_atomic_xchg_release(v, new) \ - arch_xchg_release(&((v)->counter), (new)) -#define arch_atomic_xchg(v, new) \ - arch_xchg(&((v)->counter), (new)) - -#define arch_atomic_cmpxchg_relaxed(v, old, new) \ - arch_cmpxchg_relaxed(&((v)->counter), (old), (new)) -#define arch_atomic_cmpxchg_acquire(v, old, new) \ - arch_cmpxchg_acquire(&((v)->counter), (old), (new)) -#define arch_atomic_cmpxchg_release(v, old, new) \ - arch_cmpxchg_release(&((v)->counter), (old), (new)) -#define arch_atomic_cmpxchg(v, old, new) \ - arch_cmpxchg(&((v)->counter), (old), (new)) - #define arch_atomic_andnot arch_atomic_andnot /* @@ -209,16 +191,6 @@ static __always_inline long arch_atomic64_dec_if_positive(atomic64_t *v) #define arch_atomic64_fetch_xor_release arch_atomic64_fetch_xor_release #define arch_atomic64_fetch_xor arch_atomic64_fetch_xor -#define arch_atomic64_xchg_relaxed arch_atomic_xchg_relaxed -#define arch_atomic64_xchg_acquire arch_atomic_xchg_acquire -#define arch_atomic64_xchg_release arch_atomic_xchg_release -#define arch_atomic64_xchg arch_atomic_xchg - -#define arch_atomic64_cmpxchg_relaxed arch_atomic_cmpxchg_relaxed -#define arch_atomic64_cmpxchg_acquire arch_atomic_cmpxchg_acquire -#define arch_atomic64_cmpxchg_release arch_atomic_cmpxchg_release -#define arch_atomic64_cmpxchg arch_atomic_cmpxchg - #define arch_atomic64_andnot arch_atomic64_andnot #define arch_atomic64_dec_if_positive arch_atomic64_dec_if_positive diff --git a/arch/arm64/include/asm/atomic_ll_sc.h b/arch/arm64/include/asm/atomic_ll_sc.h index fe0db8d416fb..89d2ba272359 100644 --- a/arch/arm64/include/asm/atomic_ll_sc.h +++ b/arch/arm64/include/asm/atomic_ll_sc.h @@ -12,19 +12,6 @@ #include <linux/stringify.h> -#ifdef CONFIG_ARM64_LSE_ATOMICS -#define __LL_SC_FALLBACK(asm_ops) \ -" b 3f\n" \ -" .subsection 1\n" \ -"3:\n" \ -asm_ops "\n" \ -" b 4f\n" \ -" .previous\n" \ -"4:\n" -#else -#define __LL_SC_FALLBACK(asm_ops) asm_ops -#endif - #ifndef CONFIG_CC_HAS_K_CONSTRAINT #define K #endif @@ -36,38 +23,36 @@ asm_ops "\n" \ */ #define ATOMIC_OP(op, asm_op, constraint) \ -static inline void \ +static __always_inline void \ __ll_sc_atomic_##op(int i, atomic_t *v) \ { \ unsigned long tmp; \ int result; \ \ asm volatile("// atomic_" #op "\n" \ - __LL_SC_FALLBACK( \ " prfm pstl1strm, %2\n" \ "1: ldxr %w0, %2\n" \ " " #asm_op " %w0, %w0, %w3\n" \ " stxr %w1, %w0, %2\n" \ - " cbnz %w1, 1b\n") \ + " cbnz %w1, 1b\n" \ : "=&r" (result), "=&r" (tmp), "+Q" (v->counter) \ : __stringify(constraint) "r" (i)); \ } #define ATOMIC_OP_RETURN(name, mb, acq, rel, cl, op, asm_op, constraint)\ -static inline int \ +static __always_inline int \ __ll_sc_atomic_##op##_return##name(int i, atomic_t *v) \ { \ unsigned long tmp; \ int result; \ \ asm volatile("// atomic_" #op "_return" #name "\n" \ - __LL_SC_FALLBACK( \ " prfm pstl1strm, %2\n" \ "1: ld" #acq "xr %w0, %2\n" \ " " #asm_op " %w0, %w0, %w3\n" \ " st" #rel "xr %w1, %w0, %2\n" \ " cbnz %w1, 1b\n" \ - " " #mb ) \ + " " #mb \ : "=&r" (result), "=&r" (tmp), "+Q" (v->counter) \ : __stringify(constraint) "r" (i) \ : cl); \ @@ -76,20 +61,19 @@ __ll_sc_atomic_##op##_return##name(int i, atomic_t *v) \ } #define ATOMIC_FETCH_OP(name, mb, acq, rel, cl, op, asm_op, constraint) \ -static inline int \ +static __always_inline int \ __ll_sc_atomic_fetch_##op##name(int i, atomic_t *v) \ { \ unsigned long tmp; \ int val, result; \ \ asm volatile("// atomic_fetch_" #op #name "\n" \ - __LL_SC_FALLBACK( \ " prfm pstl1strm, %3\n" \ "1: ld" #acq "xr %w0, %3\n" \ " " #asm_op " %w1, %w0, %w4\n" \ " st" #rel "xr %w2, %w1, %3\n" \ " cbnz %w2, 1b\n" \ - " " #mb ) \ + " " #mb \ : "=&r" (result), "=&r" (val), "=&r" (tmp), "+Q" (v->counter) \ : __stringify(constraint) "r" (i) \ : cl); \ @@ -135,38 +119,36 @@ ATOMIC_OPS(andnot, bic, ) #undef ATOMIC_OP #define ATOMIC64_OP(op, asm_op, constraint) \ -static inline void \ +static __always_inline void \ __ll_sc_atomic64_##op(s64 i, atomic64_t *v) \ { \ s64 result; \ unsigned long tmp; \ \ asm volatile("// atomic64_" #op "\n" \ - __LL_SC_FALLBACK( \ " prfm pstl1strm, %2\n" \ "1: ldxr %0, %2\n" \ " " #asm_op " %0, %0, %3\n" \ " stxr %w1, %0, %2\n" \ - " cbnz %w1, 1b") \ + " cbnz %w1, 1b" \ : "=&r" (result), "=&r" (tmp), "+Q" (v->counter) \ : __stringify(constraint) "r" (i)); \ } #define ATOMIC64_OP_RETURN(name, mb, acq, rel, cl, op, asm_op, constraint)\ -static inline long \ +static __always_inline long \ __ll_sc_atomic64_##op##_return##name(s64 i, atomic64_t *v) \ { \ s64 result; \ unsigned long tmp; \ \ asm volatile("// atomic64_" #op "_return" #name "\n" \ - __LL_SC_FALLBACK( \ " prfm pstl1strm, %2\n" \ "1: ld" #acq "xr %0, %2\n" \ " " #asm_op " %0, %0, %3\n" \ " st" #rel "xr %w1, %0, %2\n" \ " cbnz %w1, 1b\n" \ - " " #mb ) \ + " " #mb \ : "=&r" (result), "=&r" (tmp), "+Q" (v->counter) \ : __stringify(constraint) "r" (i) \ : cl); \ @@ -175,20 +157,19 @@ __ll_sc_atomic64_##op##_return##name(s64 i, atomic64_t *v) \ } #define ATOMIC64_FETCH_OP(name, mb, acq, rel, cl, op, asm_op, constraint)\ -static inline long \ +static __always_inline long \ __ll_sc_atomic64_fetch_##op##name(s64 i, atomic64_t *v) \ { \ s64 result, val; \ unsigned long tmp; \ \ asm volatile("// atomic64_fetch_" #op #name "\n" \ - __LL_SC_FALLBACK( \ " prfm pstl1strm, %3\n" \ "1: ld" #acq "xr %0, %3\n" \ " " #asm_op " %1, %0, %4\n" \ " st" #rel "xr %w2, %1, %3\n" \ " cbnz %w2, 1b\n" \ - " " #mb ) \ + " " #mb \ : "=&r" (result), "=&r" (val), "=&r" (tmp), "+Q" (v->counter) \ : __stringify(constraint) "r" (i) \ : cl); \ @@ -233,14 +214,13 @@ ATOMIC64_OPS(andnot, bic, ) #undef ATOMIC64_OP_RETURN #undef ATOMIC64_OP -static inline s64 +static __always_inline s64 __ll_sc_atomic64_dec_if_positive(atomic64_t *v) { s64 result; unsigned long tmp; asm volatile("// atomic64_dec_if_positive\n" - __LL_SC_FALLBACK( " prfm pstl1strm, %2\n" "1: ldxr %0, %2\n" " subs %0, %0, #1\n" @@ -248,7 +228,7 @@ __ll_sc_atomic64_dec_if_positive(atomic64_t *v) " stlxr %w1, %0, %2\n" " cbnz %w1, 1b\n" " dmb ish\n" - "2:") + "2:" : "=&r" (result), "=&r" (tmp), "+Q" (v->counter) : : "cc", "memory"); @@ -257,7 +237,7 @@ __ll_sc_atomic64_dec_if_positive(atomic64_t *v) } #define __CMPXCHG_CASE(w, sfx, name, sz, mb, acq, rel, cl, constraint) \ -static inline u##sz \ +static __always_inline u##sz \ __ll_sc__cmpxchg_case_##name##sz(volatile void *ptr, \ unsigned long old, \ u##sz new) \ @@ -274,7 +254,6 @@ __ll_sc__cmpxchg_case_##name##sz(volatile void *ptr, \ old = (u##sz)old; \ \ asm volatile( \ - __LL_SC_FALLBACK( \ " prfm pstl1strm, %[v]\n" \ "1: ld" #acq "xr" #sfx "\t%" #w "[oldval], %[v]\n" \ " eor %" #w "[tmp], %" #w "[oldval], %" #w "[old]\n" \ @@ -282,7 +261,7 @@ __ll_sc__cmpxchg_case_##name##sz(volatile void *ptr, \ " st" #rel "xr" #sfx "\t%w[tmp], %" #w "[new], %[v]\n" \ " cbnz %w[tmp], 1b\n" \ " " #mb "\n" \ - "2:") \ + "2:" \ : [tmp] "=&r" (tmp), [oldval] "=&r" (oldval), \ [v] "+Q" (*(u##sz *)ptr) \ : [old] __stringify(constraint) "r" (old), [new] "r" (new) \ @@ -315,39 +294,46 @@ __CMPXCHG_CASE( , , mb_, 64, dmb ish, , l, "memory", L) #undef __CMPXCHG_CASE -#define __CMPXCHG_DBL(name, mb, rel, cl) \ -static inline long \ -__ll_sc__cmpxchg_double##name(unsigned long old1, \ - unsigned long old2, \ - unsigned long new1, \ - unsigned long new2, \ - volatile void *ptr) \ +union __u128_halves { + u128 full; + struct { + u64 low, high; + }; +}; + +#define __CMPXCHG128(name, mb, rel, cl...) \ +static __always_inline u128 \ +__ll_sc__cmpxchg128##name(volatile u128 *ptr, u128 old, u128 new) \ { \ - unsigned long tmp, ret; \ + union __u128_halves r, o = { .full = (old) }, \ + n = { .full = (new) }; \ + unsigned int tmp; \ \ - asm volatile("// __cmpxchg_double" #name "\n" \ - __LL_SC_FALLBACK( \ - " prfm pstl1strm, %2\n" \ - "1: ldxp %0, %1, %2\n" \ - " eor %0, %0, %3\n" \ - " eor %1, %1, %4\n" \ - " orr %1, %0, %1\n" \ - " cbnz %1, 2f\n" \ - " st" #rel "xp %w0, %5, %6, %2\n" \ - " cbnz %w0, 1b\n" \ + asm volatile("// __cmpxchg128" #name "\n" \ + " prfm pstl1strm, %[v]\n" \ + "1: ldxp %[rl], %[rh], %[v]\n" \ + " cmp %[rl], %[ol]\n" \ + " ccmp %[rh], %[oh], 0, eq\n" \ + " b.ne 2f\n" \ + " st" #rel "xp %w[tmp], %[nl], %[nh], %[v]\n" \ + " cbnz %w[tmp], 1b\n" \ " " #mb "\n" \ - "2:") \ - : "=&r" (tmp), "=&r" (ret), "+Q" (*(unsigned long *)ptr) \ - : "r" (old1), "r" (old2), "r" (new1), "r" (new2) \ - : cl); \ + "2:" \ + : [v] "+Q" (*(u128 *)ptr), \ + [rl] "=&r" (r.low), [rh] "=&r" (r.high), \ + [tmp] "=&r" (tmp) \ + : [ol] "r" (o.low), [oh] "r" (o.high), \ + [nl] "r" (n.low), [nh] "r" (n.high) \ + : "cc", ##cl); \ \ - return ret; \ + return r.full; \ } -__CMPXCHG_DBL( , , , ) -__CMPXCHG_DBL(_mb, dmb ish, l, "memory") +__CMPXCHG128( , , ) +__CMPXCHG128(_mb, dmb ish, l, "memory") + +#undef __CMPXCHG128 -#undef __CMPXCHG_DBL #undef K #endif /* __ASM_ATOMIC_LL_SC_H */ diff --git a/arch/arm64/include/asm/atomic_lse.h b/arch/arm64/include/asm/atomic_lse.h index 5d460f6b7675..87f568a94e55 100644 --- a/arch/arm64/include/asm/atomic_lse.h +++ b/arch/arm64/include/asm/atomic_lse.h @@ -11,7 +11,8 @@ #define __ASM_ATOMIC_LSE_H #define ATOMIC_OP(op, asm_op) \ -static inline void __lse_atomic_##op(int i, atomic_t *v) \ +static __always_inline void \ +__lse_atomic_##op(int i, atomic_t *v) \ { \ asm volatile( \ __LSE_PREAMBLE \ @@ -25,7 +26,7 @@ ATOMIC_OP(or, stset) ATOMIC_OP(xor, steor) ATOMIC_OP(add, stadd) -static inline void __lse_atomic_sub(int i, atomic_t *v) +static __always_inline void __lse_atomic_sub(int i, atomic_t *v) { __lse_atomic_add(-i, v); } @@ -33,7 +34,8 @@ static inline void __lse_atomic_sub(int i, atomic_t *v) #undef ATOMIC_OP #define ATOMIC_FETCH_OP(name, mb, op, asm_op, cl...) \ -static inline int __lse_atomic_fetch_##op##name(int i, atomic_t *v) \ +static __always_inline int \ +__lse_atomic_fetch_##op##name(int i, atomic_t *v) \ { \ int old; \ \ @@ -63,7 +65,8 @@ ATOMIC_FETCH_OPS(add, ldadd) #undef ATOMIC_FETCH_OPS #define ATOMIC_FETCH_OP_SUB(name) \ -static inline int __lse_atomic_fetch_sub##name(int i, atomic_t *v) \ +static __always_inline int \ +__lse_atomic_fetch_sub##name(int i, atomic_t *v) \ { \ return __lse_atomic_fetch_add##name(-i, v); \ } @@ -76,12 +79,14 @@ ATOMIC_FETCH_OP_SUB( ) #undef ATOMIC_FETCH_OP_SUB #define ATOMIC_OP_ADD_SUB_RETURN(name) \ -static inline int __lse_atomic_add_return##name(int i, atomic_t *v) \ +static __always_inline int \ +__lse_atomic_add_return##name(int i, atomic_t *v) \ { \ return __lse_atomic_fetch_add##name(i, v) + i; \ } \ \ -static inline int __lse_atomic_sub_return##name(int i, atomic_t *v) \ +static __always_inline int \ +__lse_atomic_sub_return##name(int i, atomic_t *v) \ { \ return __lse_atomic_fetch_sub(i, v) - i; \ } @@ -93,13 +98,14 @@ ATOMIC_OP_ADD_SUB_RETURN( ) #undef ATOMIC_OP_ADD_SUB_RETURN -static inline void __lse_atomic_and(int i, atomic_t *v) +static __always_inline void __lse_atomic_and(int i, atomic_t *v) { return __lse_atomic_andnot(~i, v); } #define ATOMIC_FETCH_OP_AND(name, mb, cl...) \ -static inline int __lse_atomic_fetch_and##name(int i, atomic_t *v) \ +static __always_inline int \ +__lse_atomic_fetch_and##name(int i, atomic_t *v) \ { \ return __lse_atomic_fetch_andnot##name(~i, v); \ } @@ -112,7 +118,8 @@ ATOMIC_FETCH_OP_AND( , al, "memory") #undef ATOMIC_FETCH_OP_AND #define ATOMIC64_OP(op, asm_op) \ -static inline void __lse_atomic64_##op(s64 i, atomic64_t *v) \ +static __always_inline void \ +__lse_atomic64_##op(s64 i, atomic64_t *v) \ { \ asm volatile( \ __LSE_PREAMBLE \ @@ -126,7 +133,7 @@ ATOMIC64_OP(or, stset) ATOMIC64_OP(xor, steor) ATOMIC64_OP(add, stadd) -static inline void __lse_atomic64_sub(s64 i, atomic64_t *v) +static __always_inline void __lse_atomic64_sub(s64 i, atomic64_t *v) { __lse_atomic64_add(-i, v); } @@ -134,7 +141,8 @@ static inline void __lse_atomic64_sub(s64 i, atomic64_t *v) #undef ATOMIC64_OP #define ATOMIC64_FETCH_OP(name, mb, op, asm_op, cl...) \ -static inline long __lse_atomic64_fetch_##op##name(s64 i, atomic64_t *v)\ +static __always_inline long \ +__lse_atomic64_fetch_##op##name(s64 i, atomic64_t *v) \ { \ s64 old; \ \ @@ -164,7 +172,8 @@ ATOMIC64_FETCH_OPS(add, ldadd) #undef ATOMIC64_FETCH_OPS #define ATOMIC64_FETCH_OP_SUB(name) \ -static inline long __lse_atomic64_fetch_sub##name(s64 i, atomic64_t *v) \ +static __always_inline long \ +__lse_atomic64_fetch_sub##name(s64 i, atomic64_t *v) \ { \ return __lse_atomic64_fetch_add##name(-i, v); \ } @@ -177,12 +186,14 @@ ATOMIC64_FETCH_OP_SUB( ) #undef ATOMIC64_FETCH_OP_SUB #define ATOMIC64_OP_ADD_SUB_RETURN(name) \ -static inline long __lse_atomic64_add_return##name(s64 i, atomic64_t *v)\ +static __always_inline long \ +__lse_atomic64_add_return##name(s64 i, atomic64_t *v) \ { \ return __lse_atomic64_fetch_add##name(i, v) + i; \ } \ \ -static inline long __lse_atomic64_sub_return##name(s64 i, atomic64_t *v)\ +static __always_inline long \ +__lse_atomic64_sub_return##name(s64 i, atomic64_t *v) \ { \ return __lse_atomic64_fetch_sub##name(i, v) - i; \ } @@ -194,13 +205,14 @@ ATOMIC64_OP_ADD_SUB_RETURN( ) #undef ATOMIC64_OP_ADD_SUB_RETURN -static inline void __lse_atomic64_and(s64 i, atomic64_t *v) +static __always_inline void __lse_atomic64_and(s64 i, atomic64_t *v) { return __lse_atomic64_andnot(~i, v); } #define ATOMIC64_FETCH_OP_AND(name, mb, cl...) \ -static inline long __lse_atomic64_fetch_and##name(s64 i, atomic64_t *v) \ +static __always_inline long \ +__lse_atomic64_fetch_and##name(s64 i, atomic64_t *v) \ { \ return __lse_atomic64_fetch_andnot##name(~i, v); \ } @@ -212,7 +224,7 @@ ATOMIC64_FETCH_OP_AND( , al, "memory") #undef ATOMIC64_FETCH_OP_AND -static inline s64 __lse_atomic64_dec_if_positive(atomic64_t *v) +static __always_inline s64 __lse_atomic64_dec_if_positive(atomic64_t *v) { unsigned long tmp; @@ -239,22 +251,15 @@ __lse__cmpxchg_case_##name##sz(volatile void *ptr, \ u##sz old, \ u##sz new) \ { \ - register unsigned long x0 asm ("x0") = (unsigned long)ptr; \ - register u##sz x1 asm ("x1") = old; \ - register u##sz x2 asm ("x2") = new; \ - unsigned long tmp; \ - \ asm volatile( \ __LSE_PREAMBLE \ - " mov %" #w "[tmp], %" #w "[old]\n" \ - " cas" #mb #sfx "\t%" #w "[tmp], %" #w "[new], %[v]\n" \ - " mov %" #w "[ret], %" #w "[tmp]" \ - : [ret] "+r" (x0), [v] "+Q" (*(u##sz *)ptr), \ - [tmp] "=&r" (tmp) \ - : [old] "r" (x1), [new] "r" (x2) \ + " cas" #mb #sfx " %" #w "[old], %" #w "[new], %[v]\n" \ + : [v] "+Q" (*(u##sz *)ptr), \ + [old] "+r" (old) \ + : [new] "rZ" (new) \ : cl); \ \ - return x0; \ + return old; \ } __CMPXCHG_CASE(w, b, , 8, ) @@ -276,40 +281,35 @@ __CMPXCHG_CASE(x, , mb_, 64, al, "memory") #undef __CMPXCHG_CASE -#define __CMPXCHG_DBL(name, mb, cl...) \ -static __always_inline long \ -__lse__cmpxchg_double##name(unsigned long old1, \ - unsigned long old2, \ - unsigned long new1, \ - unsigned long new2, \ - volatile void *ptr) \ +#define __CMPXCHG128(name, mb, cl...) \ +static __always_inline u128 \ +__lse__cmpxchg128##name(volatile u128 *ptr, u128 old, u128 new) \ { \ - unsigned long oldval1 = old1; \ - unsigned long oldval2 = old2; \ - register unsigned long x0 asm ("x0") = old1; \ - register unsigned long x1 asm ("x1") = old2; \ - register unsigned long x2 asm ("x2") = new1; \ - register unsigned long x3 asm ("x3") = new2; \ + union __u128_halves r, o = { .full = (old) }, \ + n = { .full = (new) }; \ + register unsigned long x0 asm ("x0") = o.low; \ + register unsigned long x1 asm ("x1") = o.high; \ + register unsigned long x2 asm ("x2") = n.low; \ + register unsigned long x3 asm ("x3") = n.high; \ register unsigned long x4 asm ("x4") = (unsigned long)ptr; \ \ asm volatile( \ __LSE_PREAMBLE \ " casp" #mb "\t%[old1], %[old2], %[new1], %[new2], %[v]\n"\ - " eor %[old1], %[old1], %[oldval1]\n" \ - " eor %[old2], %[old2], %[oldval2]\n" \ - " orr %[old1], %[old1], %[old2]" \ : [old1] "+&r" (x0), [old2] "+&r" (x1), \ - [v] "+Q" (*(unsigned long *)ptr) \ + [v] "+Q" (*(u128 *)ptr) \ : [new1] "r" (x2), [new2] "r" (x3), [ptr] "r" (x4), \ - [oldval1] "r" (oldval1), [oldval2] "r" (oldval2) \ + [oldval1] "r" (o.low), [oldval2] "r" (o.high) \ : cl); \ \ - return x0; \ + r.low = x0; r.high = x1; \ + \ + return r.full; \ } -__CMPXCHG_DBL( , ) -__CMPXCHG_DBL(_mb, al, "memory") +__CMPXCHG128( , ) +__CMPXCHG128(_mb, al, "memory") -#undef __CMPXCHG_DBL +#undef __CMPXCHG128 #endif /* __ASM_ATOMIC_LSE_H */ diff --git a/arch/arm64/include/asm/barrier.h b/arch/arm64/include/asm/barrier.h index 2cfc4245d2e2..f5801b0ba9e9 100644 --- a/arch/arm64/include/asm/barrier.h +++ b/arch/arm64/include/asm/barrier.h @@ -11,6 +11,8 @@ #include <linux/kasan-checks.h> +#include <asm/alternative-macros.h> + #define __nops(n) ".rept " #n "\nnop\n.endr\n" #define nops(n) asm volatile(__nops(n)) @@ -38,13 +40,21 @@ */ #define dgh() asm volatile("hint #6" : : : "memory") +#define spec_bar() asm volatile(ALTERNATIVE("dsb nsh\nisb\n", \ + SB_BARRIER_INSN"nop\n", \ + ARM64_HAS_SB)) + +#define gsb_ack() asm volatile(GSB_ACK_BARRIER_INSN : : : "memory") +#define gsb_sys() asm volatile(GSB_SYS_BARRIER_INSN : : : "memory") + #ifdef CONFIG_ARM64_PSEUDO_NMI #define pmr_sync() \ do { \ - extern struct static_key_false gic_pmr_sync; \ - \ - if (static_branch_unlikely(&gic_pmr_sync)) \ - dsb(sy); \ + asm volatile( \ + ALTERNATIVE_CB("dsb sy", \ + ARM64_HAS_GIC_PRIO_RELAXED_SYNC, \ + alt_cb_patch_nops) \ + ); \ } while(0) #else #define pmr_sync() do {} while (0) @@ -128,25 +138,25 @@ do { \ case 1: \ asm volatile ("stlrb %w1, %0" \ : "=Q" (*__p) \ - : "r" (*(__u8 *)__u.__c) \ + : "rZ" (*(__u8 *)__u.__c) \ : "memory"); \ break; \ case 2: \ asm volatile ("stlrh %w1, %0" \ : "=Q" (*__p) \ - : "r" (*(__u16 *)__u.__c) \ + : "rZ" (*(__u16 *)__u.__c) \ : "memory"); \ break; \ case 4: \ asm volatile ("stlr %w1, %0" \ : "=Q" (*__p) \ - : "r" (*(__u32 *)__u.__c) \ + : "rZ" (*(__u32 *)__u.__c) \ : "memory"); \ break; \ case 8: \ - asm volatile ("stlr %1, %0" \ + asm volatile ("stlr %x1, %0" \ : "=Q" (*__p) \ - : "r" (*(__u64 *)__u.__c) \ + : "rZ" (*(__u64 *)__u.__c) \ : "memory"); \ break; \ } \ diff --git a/arch/arm64/include/asm/brk-imm.h b/arch/arm64/include/asm/brk-imm.h index ec7720dbe2c8..beb42c62b6ac 100644 --- a/arch/arm64/include/asm/brk-imm.h +++ b/arch/arm64/include/asm/brk-imm.h @@ -11,21 +11,32 @@ * 0x004: for installing kprobes * 0x005: for installing uprobes * 0x006: for kprobe software single-step + * 0x007: for kretprobe return * Allowed values for kgdb are 0x400 - 0x7ff * 0x100: for triggering a fault on purpose (reserved) * 0x400: for dynamic BRK instruction * 0x401: for compile time BRK instruction * 0x800: kernel-mode BUG() and WARN() traps * 0x9xx: tag-based KASAN trap (allowed values 0x900 - 0x9ff) + * 0x55xx: Undefined Behavior Sanitizer traps ('U' << 8) + * 0x8xxx: Control-Flow Integrity traps */ #define KPROBES_BRK_IMM 0x004 #define UPROBES_BRK_IMM 0x005 #define KPROBES_BRK_SS_IMM 0x006 +#define KRETPROBES_BRK_IMM 0x007 #define FAULT_BRK_IMM 0x100 #define KGDB_DYN_DBG_BRK_IMM 0x400 #define KGDB_COMPILED_DBG_BRK_IMM 0x401 #define BUG_BRK_IMM 0x800 #define KASAN_BRK_IMM 0x900 #define KASAN_BRK_MASK 0x0ff +#define UBSAN_BRK_IMM 0x5500 +#define UBSAN_BRK_MASK 0x00ff + +#define CFI_BRK_IMM_TARGET GENMASK(4, 0) +#define CFI_BRK_IMM_TYPE GENMASK(9, 5) +#define CFI_BRK_IMM_BASE 0x8000 +#define CFI_BRK_IMM_MASK (CFI_BRK_IMM_TARGET | CFI_BRK_IMM_TYPE) #endif diff --git a/arch/arm64/include/asm/cache.h b/arch/arm64/include/asm/cache.h index ca9b487112cc..09963004ceea 100644 --- a/arch/arm64/include/asm/cache.h +++ b/arch/arm64/include/asm/cache.h @@ -16,6 +16,15 @@ #define CLIDR_LOC(clidr) (((clidr) >> CLIDR_LOC_SHIFT) & 0x7) #define CLIDR_LOUIS(clidr) (((clidr) >> CLIDR_LOUIS_SHIFT) & 0x7) +/* Ctypen, bits[3(n - 1) + 2 : 3(n - 1)], for n = 1 to 7 */ +#define CLIDR_CTYPE_SHIFT(level) (3 * (level - 1)) +#define CLIDR_CTYPE_MASK(level) (7 << CLIDR_CTYPE_SHIFT(level)) +#define CLIDR_CTYPE(clidr, level) \ + (((clidr) & CLIDR_CTYPE_MASK(level)) >> CLIDR_CTYPE_SHIFT(level)) + +/* Ttypen, bits [2(n - 1) + 34 : 2(n - 1) + 33], for n = 1 to 7 */ +#define CLIDR_TTYPE_SHIFT(level) (2 * ((level) - 1) + CLIDR_EL1_Ttypen_SHIFT) + /* * Memory returned by kmalloc() may be used for DMA, so we must make * sure that all such allocations are cache aligned. Otherwise, @@ -24,8 +33,9 @@ * the CPU. */ #define ARCH_DMA_MINALIGN (128) +#define ARCH_KMALLOC_MINALIGN (8) -#ifndef __ASSEMBLY__ +#if !defined(__ASSEMBLY__) && !defined(BUILD_VDSO) #include <linux/bitops.h> #include <linux/kasan-enabled.h> @@ -45,14 +55,9 @@ static inline unsigned int arch_slab_minalign(void) #define arch_slab_minalign() arch_slab_minalign() #endif -#define CTR_CACHE_MINLINE_MASK \ - (0xf << CTR_EL0_DMINLINE_SHIFT | \ - CTR_EL0_IMINLINE_MASK << CTR_EL0_IMINLINE_SHIFT) - #define CTR_L1IP(ctr) SYS_FIELD_GET(CTR_EL0, L1Ip, ctr) #define ICACHEF_ALIASING 0 -#define ICACHEF_VPIPT 1 extern unsigned long __icache_flags; /* @@ -64,14 +69,9 @@ static inline int icache_is_aliasing(void) return test_bit(ICACHEF_ALIASING, &__icache_flags); } -static __always_inline int icache_is_vpipt(void) -{ - return test_bit(ICACHEF_VPIPT, &__icache_flags); -} - static inline u32 cache_type_cwg(void) { - return (read_cpuid_cachetype() >> CTR_EL0_CWG_SHIFT) & CTR_EL0_CWG_MASK; + return SYS_FIELD_GET(CTR_EL0, CWG, read_cpuid_cachetype()); } #define __read_mostly __section(".data..read_mostly") @@ -85,6 +85,25 @@ static inline int cache_line_size_of_cpu(void) int cache_line_size(void); +#define dma_get_cache_alignment cache_line_size + +/* Compress a u64 MPIDR value into 32 bits. */ +static inline u64 arch_compact_of_hwid(u64 id) +{ + u64 aff3 = MPIDR_AFFINITY_LEVEL(id, 3); + + /* + * These bits are expected to be RES0. If not, return a value with + * the upper 32 bits set to force the caller to give up on 32 bit + * cache ids. + */ + if (FIELD_GET(GENMASK_ULL(63, 40), id)) + return id; + + return (aff3 << 24) | FIELD_GET(GENMASK_ULL(23, 0), id); +} +#define arch_compact_of_hwid arch_compact_of_hwid + /* * Read the effective value of CTR_EL0. * @@ -116,6 +135,6 @@ static inline u32 __attribute_const__ read_cpuid_effective_cachetype(void) return ctr; } -#endif /* __ASSEMBLY__ */ +#endif /* !defined(__ASSEMBLY__) && !defined(BUILD_VDSO) */ #endif diff --git a/arch/arm64/include/asm/cacheflush.h b/arch/arm64/include/asm/cacheflush.h index 37185e978aeb..28ab96e808ef 100644 --- a/arch/arm64/include/asm/cacheflush.h +++ b/arch/arm64/include/asm/cacheflush.h @@ -114,10 +114,10 @@ extern void copy_to_user_page(struct vm_area_struct *, struct page *, #define copy_to_user_page copy_to_user_page /* - * flush_dcache_page is used when the kernel has written to the page + * flush_dcache_folio is used when the kernel has written to the page * cache page at virtual address page->virtual. * - * If this page isn't mapped (ie, page_mapping == NULL), or it might + * If this page isn't mapped (ie, folio_mapping == NULL), or it might * have userspace mappings, then we _must_ always clean + invalidate * the dcache entries associated with the kernel mapping. * @@ -127,10 +127,12 @@ extern void copy_to_user_page(struct vm_area_struct *, struct page *, */ #define ARCH_IMPLEMENTS_FLUSH_DCACHE_PAGE 1 extern void flush_dcache_page(struct page *); +void flush_dcache_folio(struct folio *); +#define flush_dcache_folio flush_dcache_folio static __always_inline void icache_inval_all_pou(void) { - if (cpus_have_const_cap(ARM64_HAS_CACHE_DIC)) + if (alternative_has_cap_unlikely(ARM64_HAS_CACHE_DIC)) return; asm("ic ialluis"); diff --git a/arch/arm64/include/asm/cfi.h b/arch/arm64/include/asm/cfi.h new file mode 100644 index 000000000000..ab90f0351b7a --- /dev/null +++ b/arch/arm64/include/asm/cfi.h @@ -0,0 +1,7 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _ASM_ARM64_CFI_H +#define _ASM_ARM64_CFI_H + +#define __bpfcall + +#endif /* _ASM_ARM64_CFI_H */ diff --git a/arch/arm64/include/asm/cmpxchg.h b/arch/arm64/include/asm/cmpxchg.h index 497acf134d99..d7a540736741 100644 --- a/arch/arm64/include/asm/cmpxchg.h +++ b/arch/arm64/include/asm/cmpxchg.h @@ -62,9 +62,8 @@ __XCHG_CASE( , , mb_, 64, dmb ish, nop, , a, l, "memory") #undef __XCHG_CASE #define __XCHG_GEN(sfx) \ -static __always_inline unsigned long __xchg##sfx(unsigned long x, \ - volatile void *ptr, \ - int size) \ +static __always_inline unsigned long \ +__arch_xchg##sfx(unsigned long x, volatile void *ptr, int size) \ { \ switch (size) { \ case 1: \ @@ -93,7 +92,7 @@ __XCHG_GEN(_mb) ({ \ __typeof__(*(ptr)) __ret; \ __ret = (__typeof__(*(ptr))) \ - __xchg##sfx((unsigned long)(x), (ptr), sizeof(*(ptr))); \ + __arch_xchg##sfx((unsigned long)(x), (ptr), sizeof(*(ptr))); \ __ret; \ }) @@ -131,21 +130,18 @@ __CMPXCHG_CASE(mb_, 64) #undef __CMPXCHG_CASE -#define __CMPXCHG_DBL(name) \ -static inline long __cmpxchg_double##name(unsigned long old1, \ - unsigned long old2, \ - unsigned long new1, \ - unsigned long new2, \ - volatile void *ptr) \ +#define __CMPXCHG128(name) \ +static inline u128 __cmpxchg128##name(volatile u128 *ptr, \ + u128 old, u128 new) \ { \ - return __lse_ll_sc_body(_cmpxchg_double##name, \ - old1, old2, new1, new2, ptr); \ + return __lse_ll_sc_body(_cmpxchg128##name, \ + ptr, old, new); \ } -__CMPXCHG_DBL( ) -__CMPXCHG_DBL(_mb) +__CMPXCHG128( ) +__CMPXCHG128(_mb) -#undef __CMPXCHG_DBL +#undef __CMPXCHG128 #define __CMPXCHG_GEN(sfx) \ static __always_inline unsigned long __cmpxchg##sfx(volatile void *ptr, \ @@ -199,34 +195,17 @@ __CMPXCHG_GEN(_mb) #define arch_cmpxchg64 arch_cmpxchg #define arch_cmpxchg64_local arch_cmpxchg_local -/* cmpxchg_double */ -#define system_has_cmpxchg_double() 1 - -#define __cmpxchg_double_check(ptr1, ptr2) \ -({ \ - if (sizeof(*(ptr1)) != 8) \ - BUILD_BUG(); \ - VM_BUG_ON((unsigned long *)(ptr2) - (unsigned long *)(ptr1) != 1); \ -}) +/* cmpxchg128 */ +#define system_has_cmpxchg128() 1 -#define arch_cmpxchg_double(ptr1, ptr2, o1, o2, n1, n2) \ +#define arch_cmpxchg128(ptr, o, n) \ ({ \ - int __ret; \ - __cmpxchg_double_check(ptr1, ptr2); \ - __ret = !__cmpxchg_double_mb((unsigned long)(o1), (unsigned long)(o2), \ - (unsigned long)(n1), (unsigned long)(n2), \ - ptr1); \ - __ret; \ + __cmpxchg128_mb((ptr), (o), (n)); \ }) -#define arch_cmpxchg_double_local(ptr1, ptr2, o1, o2, n1, n2) \ +#define arch_cmpxchg128_local(ptr, o, n) \ ({ \ - int __ret; \ - __cmpxchg_double_check(ptr1, ptr2); \ - __ret = !__cmpxchg_double((unsigned long)(o1), (unsigned long)(o2), \ - (unsigned long)(n1), (unsigned long)(n2), \ - ptr1); \ - __ret; \ + __cmpxchg128((ptr), (o), (n)); \ }) #define __CMPWAIT_CASE(w, sfx, sz) \ diff --git a/arch/arm64/include/asm/compat.h b/arch/arm64/include/asm/compat.h index 9f362274a4f7..ae904a1ad529 100644 --- a/arch/arm64/include/asm/compat.h +++ b/arch/arm64/include/asm/compat.h @@ -83,10 +83,6 @@ struct compat_statfs { int f_spare[4]; }; -#define COMPAT_RLIM_INFINITY 0xffffffff - -#define COMPAT_OFF_T_MAX 0x7fffffff - #define compat_user_stack_pointer() (user_stack_pointer(task_pt_regs(current))) #define COMPAT_MINSIGSTKSZ 2048 @@ -100,6 +96,8 @@ static inline int is_compat_thread(struct thread_info *thread) return test_ti_thread_flag(thread, TIF_32BIT); } +long compat_arm_syscall(struct pt_regs *regs, int scno); + #else /* !CONFIG_COMPAT */ static inline int is_compat_thread(struct thread_info *thread) diff --git a/arch/arm64/include/asm/compiler.h b/arch/arm64/include/asm/compiler.h index 6fb2e6bcc392..9bbd7b7097ff 100644 --- a/arch/arm64/include/asm/compiler.h +++ b/arch/arm64/include/asm/compiler.h @@ -8,19 +8,33 @@ #define ARM64_ASM_PREAMBLE #endif -/* - * The EL0/EL1 pointer bits used by a pointer authentication code. - * This is dependent on TBI0/TBI1 being enabled, or bits 63:56 would also apply. - */ -#define ptrauth_user_pac_mask() GENMASK_ULL(54, vabits_actual) -#define ptrauth_kernel_pac_mask() GENMASK_ULL(63, vabits_actual) +#define xpaclri(ptr) \ +({ \ + register unsigned long __xpaclri_ptr asm("x30") = (ptr); \ + \ + asm( \ + ARM64_ASM_PREAMBLE \ + " hint #7\n" \ + : "+r" (__xpaclri_ptr)); \ + \ + __xpaclri_ptr; \ +}) -/* Valid for EL0 TTBR0 and EL1 TTBR1 instruction pointers */ -#define ptrauth_clear_pac(ptr) \ - ((ptr & BIT_ULL(55)) ? (ptr | ptrauth_kernel_pac_mask()) : \ - (ptr & ~ptrauth_user_pac_mask())) +#ifdef CONFIG_ARM64_PTR_AUTH_KERNEL +#define ptrauth_strip_kernel_insn_pac(ptr) xpaclri(ptr) +#else +#define ptrauth_strip_kernel_insn_pac(ptr) (ptr) +#endif + +#ifdef CONFIG_ARM64_PTR_AUTH +#define ptrauth_strip_user_insn_pac(ptr) xpaclri(ptr) +#else +#define ptrauth_strip_user_insn_pac(ptr) (ptr) +#endif +#if !defined(CONFIG_BUILTIN_RETURN_ADDRESS_STRIPS_PAC) #define __builtin_return_address(val) \ - (void *)(ptrauth_clear_pac((unsigned long)__builtin_return_address(val))) + (void *)(ptrauth_strip_kernel_insn_pac((unsigned long)__builtin_return_address(val))) +#endif #endif /* __ASM_COMPILER_H */ diff --git a/arch/arm64/include/asm/cpu.h b/arch/arm64/include/asm/cpu.h index fd7a92219eea..71493b760b83 100644 --- a/arch/arm64/include/asm/cpu.h +++ b/arch/arm64/include/asm/cpu.h @@ -38,36 +38,36 @@ struct cpuinfo_32bit { }; struct cpuinfo_arm64 { - struct cpu cpu; struct kobject kobj; u64 reg_ctr; u64 reg_cntfrq; u64 reg_dczid; u64 reg_midr; u64 reg_revidr; + u64 reg_aidr; u64 reg_gmid; u64 reg_smidr; + u64 reg_mpamidr; u64 reg_id_aa64dfr0; u64 reg_id_aa64dfr1; u64 reg_id_aa64isar0; u64 reg_id_aa64isar1; u64 reg_id_aa64isar2; + u64 reg_id_aa64isar3; u64 reg_id_aa64mmfr0; u64 reg_id_aa64mmfr1; u64 reg_id_aa64mmfr2; + u64 reg_id_aa64mmfr3; + u64 reg_id_aa64mmfr4; u64 reg_id_aa64pfr0; u64 reg_id_aa64pfr1; + u64 reg_id_aa64pfr2; u64 reg_id_aa64zfr0; u64 reg_id_aa64smfr0; + u64 reg_id_aa64fpfr0; struct cpuinfo_32bit aarch32; - - /* pseudo-ZCR for recording maximum ZCR_EL1 LEN value: */ - u64 reg_zcr; - - /* pseudo-SMCR for recording maximum SMCR_EL1 LEN value: */ - u64 reg_smcr; }; DECLARE_PER_CPU(struct cpuinfo_arm64, cpu_data); diff --git a/arch/arm64/include/asm/cpucaps.h b/arch/arm64/include/asm/cpucaps.h new file mode 100644 index 000000000000..9d769291a306 --- /dev/null +++ b/arch/arm64/include/asm/cpucaps.h @@ -0,0 +1,82 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ + +#ifndef __ASM_CPUCAPS_H +#define __ASM_CPUCAPS_H + +#include <asm/cpucap-defs.h> + +#ifndef __ASSEMBLY__ +#include <linux/types.h> +/* + * Check whether a cpucap is possible at compiletime. + */ +static __always_inline bool +cpucap_is_possible(const unsigned int cap) +{ + compiletime_assert(__builtin_constant_p(cap), + "cap must be a constant"); + compiletime_assert(cap < ARM64_NCAPS, + "cap must be < ARM64_NCAPS"); + + switch (cap) { + case ARM64_HAS_PAN: + return IS_ENABLED(CONFIG_ARM64_PAN); + case ARM64_HAS_EPAN: + return IS_ENABLED(CONFIG_ARM64_EPAN); + case ARM64_SVE: + return IS_ENABLED(CONFIG_ARM64_SVE); + case ARM64_SME: + case ARM64_SME2: + case ARM64_SME_FA64: + return IS_ENABLED(CONFIG_ARM64_SME); + case ARM64_HAS_CNP: + return IS_ENABLED(CONFIG_ARM64_CNP); + case ARM64_HAS_ADDRESS_AUTH: + case ARM64_HAS_GENERIC_AUTH: + return IS_ENABLED(CONFIG_ARM64_PTR_AUTH); + case ARM64_HAS_GIC_PRIO_MASKING: + return IS_ENABLED(CONFIG_ARM64_PSEUDO_NMI); + case ARM64_MTE: + return IS_ENABLED(CONFIG_ARM64_MTE); + case ARM64_BTI: + return IS_ENABLED(CONFIG_ARM64_BTI); + case ARM64_HAS_TLB_RANGE: + return IS_ENABLED(CONFIG_ARM64_TLB_RANGE); + case ARM64_HAS_S1POE: + return IS_ENABLED(CONFIG_ARM64_POE); + case ARM64_HAS_GCS: + return IS_ENABLED(CONFIG_ARM64_GCS); + case ARM64_HAFT: + return IS_ENABLED(CONFIG_ARM64_HAFT); + case ARM64_UNMAP_KERNEL_AT_EL0: + return IS_ENABLED(CONFIG_UNMAP_KERNEL_AT_EL0); + case ARM64_WORKAROUND_843419: + return IS_ENABLED(CONFIG_ARM64_ERRATUM_843419); + case ARM64_WORKAROUND_1742098: + return IS_ENABLED(CONFIG_ARM64_ERRATUM_1742098); + case ARM64_WORKAROUND_2645198: + return IS_ENABLED(CONFIG_ARM64_ERRATUM_2645198); + case ARM64_WORKAROUND_2658417: + return IS_ENABLED(CONFIG_ARM64_ERRATUM_2658417); + case ARM64_WORKAROUND_CAVIUM_23154: + return IS_ENABLED(CONFIG_CAVIUM_ERRATUM_23154); + case ARM64_WORKAROUND_NVIDIA_CARMEL_CNP: + return IS_ENABLED(CONFIG_NVIDIA_CARMEL_CNP_ERRATUM); + case ARM64_WORKAROUND_REPEAT_TLBI: + return IS_ENABLED(CONFIG_ARM64_WORKAROUND_REPEAT_TLBI); + case ARM64_WORKAROUND_SPECULATIVE_SSBS: + return IS_ENABLED(CONFIG_ARM64_ERRATUM_3194386); + case ARM64_MPAM: + /* + * KVM MPAM support doesn't rely on the host kernel supporting MPAM. + */ + return true; + case ARM64_HAS_PMUV3: + return IS_ENABLED(CONFIG_HW_PERF_EVENTS); + } + + return true; +} +#endif /* __ASSEMBLY__ */ + +#endif /* __ASM_CPUCAPS_H */ diff --git a/arch/arm64/include/asm/cpufeature.h b/arch/arm64/include/asm/cpufeature.h index fd7d75a275f6..bf13d676aae2 100644 --- a/arch/arm64/include/asm/cpufeature.h +++ b/arch/arm64/include/asm/cpufeature.h @@ -6,19 +6,25 @@ #ifndef __ASM_CPUFEATURE_H #define __ASM_CPUFEATURE_H +#include <asm/alternative-macros.h> #include <asm/cpucaps.h> #include <asm/cputype.h> #include <asm/hwcap.h> #include <asm/sysreg.h> -#define MAX_CPU_FEATURES 128 +#define MAX_CPU_FEATURES 192 #define cpu_feature(x) KERNEL_HWCAP_ ## x +#define ARM64_SW_FEATURE_OVERRIDE_NOKASLR 0 +#define ARM64_SW_FEATURE_OVERRIDE_HVHE 4 +#define ARM64_SW_FEATURE_OVERRIDE_RODATA_OFF 8 + #ifndef __ASSEMBLY__ #include <linux/bug.h> #include <linux/jump_label.h> #include <linux/kernel.h> +#include <linux/cpumask.h> /* * CPU feature register tracking @@ -78,7 +84,7 @@ struct arm64_ftr_bits { * to full-0 denotes that this field has no override * * A @mask field set to full-0 with the corresponding @val field set - * to full-1 denotes thath this field has an invalid override. + * to full-1 denotes that this field has an invalid override. */ struct arm64_ftr_override { u64 val; @@ -106,7 +112,7 @@ extern struct arm64_ftr_reg arm64_ftr_reg_ctrel0; * CPU capabilities: * * We use arm64_cpu_capabilities to represent system features, errata work - * arounds (both used internally by kernel and tracked in cpu_hwcaps) and + * arounds (both used internally by kernel and tracked in system_cpucaps) and * ELF HWCAPs (which are exposed to user). * * To support systems with heterogeneous CPUs, we need to make sure that we @@ -269,6 +275,14 @@ extern struct arm64_ftr_reg arm64_ftr_reg_ctrel0; #define ARM64_CPUCAP_OPTIONAL_FOR_LATE_CPU ((u16)BIT(5)) /* Panic when a conflict is detected */ #define ARM64_CPUCAP_PANIC_ON_CONFLICT ((u16)BIT(6)) +/* + * When paired with SCOPE_LOCAL_CPU, all early CPUs must satisfy the + * condition. This is different from SCOPE_SYSTEM where the check is performed + * only once at the end of the SMP boot on the sanitised ID registers. + * SCOPE_SYSTEM is not suitable for cases where the capability depends on + * properties local to a CPU like MIDR_EL1. + */ +#define ARM64_CPUCAP_MATCH_ALL_EARLY_CPUS ((u16)BIT(7)) /* * CPU errata workarounds that need to be enabled at boot time if one or @@ -298,6 +312,16 @@ extern struct arm64_ftr_reg arm64_ftr_reg_ctrel0; (ARM64_CPUCAP_SCOPE_LOCAL_CPU | \ ARM64_CPUCAP_OPTIONAL_FOR_LATE_CPU | \ ARM64_CPUCAP_PERMITTED_FOR_LATE_CPU) +/* + * CPU feature detected at boot time and present on all early CPUs. Late CPUs + * are permitted to have the feature even if it hasn't been enabled, although + * the feature will not be used by Linux in this case. If all early CPUs have + * the feature, then every late CPU must have it. + */ +#define ARM64_CPUCAP_EARLY_LOCAL_CPU_FEATURE \ + (ARM64_CPUCAP_SCOPE_LOCAL_CPU | \ + ARM64_CPUCAP_PERMITTED_FOR_LATE_CPU | \ + ARM64_CPUCAP_MATCH_ALL_EARLY_CPUS) /* * CPU feature detected at boot time, on one or more CPUs. A late CPU @@ -358,6 +382,7 @@ struct arm64_cpu_capabilities { u8 field_pos; u8 field_width; u8 min_field_value; + u8 max_field_value; u8 hwcap_type; bool sign; unsigned long hwcap; @@ -376,6 +401,7 @@ struct arm64_cpu_capabilities { * method is robust against being called multiple times. */ const struct arm64_cpu_capabilities *match_list; + const struct cpumask *cpus; }; static inline int cpucap_default_scope(const struct arm64_cpu_capabilities *cap) @@ -383,6 +409,11 @@ static inline int cpucap_default_scope(const struct arm64_cpu_capabilities *cap) return cap->type & ARM64_CPUCAP_SCOPE_MASK; } +static inline bool cpucap_match_all_early_cpus(const struct arm64_cpu_capabilities *cap) +{ + return cap->type & ARM64_CPUCAP_MATCH_ALL_EARLY_CPUS; +} + /* * Generic helper for handling capabilities with multiple (match,enable) pairs * of call backs, sharing the same capability bit. @@ -418,29 +449,31 @@ static __always_inline bool is_hyp_code(void) return is_vhe_hyp_code() || is_nvhe_hyp_code(); } -extern DECLARE_BITMAP(cpu_hwcaps, ARM64_NCAPS); -extern struct static_key_false cpu_hwcap_keys[ARM64_NCAPS]; -extern struct static_key_false arm64_const_caps_ready; +extern DECLARE_BITMAP(system_cpucaps, ARM64_NCAPS); -/* ARM64 CAPS + alternative_cb */ -#define ARM64_NPATCHABLE (ARM64_NCAPS + 1) -extern DECLARE_BITMAP(boot_capabilities, ARM64_NPATCHABLE); +extern DECLARE_BITMAP(boot_cpucaps, ARM64_NCAPS); #define for_each_available_cap(cap) \ - for_each_set_bit(cap, cpu_hwcaps, ARM64_NCAPS) + for_each_set_bit(cap, system_cpucaps, ARM64_NCAPS) bool this_cpu_has_cap(unsigned int cap); void cpu_set_feature(unsigned int num); bool cpu_have_feature(unsigned int num); unsigned long cpu_get_elf_hwcap(void); unsigned long cpu_get_elf_hwcap2(void); +unsigned long cpu_get_elf_hwcap3(void); #define cpu_set_named_feature(name) cpu_set_feature(cpu_feature(name)) #define cpu_have_named_feature(name) cpu_have_feature(cpu_feature(name)) +static __always_inline bool boot_capabilities_finalized(void) +{ + return alternative_has_cap_likely(ARM64_ALWAYS_BOOT); +} + static __always_inline bool system_capabilities_finalized(void) { - return static_branch_likely(&arm64_const_caps_ready); + return alternative_has_cap_likely(ARM64_ALWAYS_SYSTEM); } /* @@ -448,75 +481,49 @@ static __always_inline bool system_capabilities_finalized(void) * * Before the capability is detected, this returns false. */ -static inline bool cpus_have_cap(unsigned int num) +static __always_inline bool cpus_have_cap(unsigned int num) { + if (__builtin_constant_p(num) && !cpucap_is_possible(num)) + return false; if (num >= ARM64_NCAPS) return false; - return test_bit(num, cpu_hwcaps); + return arch_test_bit(num, system_cpucaps); } /* * Test for a capability without a runtime check. * - * Before capabilities are finalized, this returns false. - * After capabilities are finalized, this is patched to avoid a runtime check. + * Before boot capabilities are finalized, this will BUG(). + * After boot capabilities are finalized, this is patched to avoid a runtime + * check. * * @num must be a compile-time constant. */ -static __always_inline bool __cpus_have_const_cap(int num) +static __always_inline bool cpus_have_final_boot_cap(int num) { - if (num >= ARM64_NCAPS) - return false; - return static_branch_unlikely(&cpu_hwcap_keys[num]); + if (boot_capabilities_finalized()) + return alternative_has_cap_unlikely(num); + else + BUG(); } /* * Test for a capability without a runtime check. * - * Before capabilities are finalized, this will BUG(). - * After capabilities are finalized, this is patched to avoid a runtime check. + * Before system capabilities are finalized, this will BUG(). + * After system capabilities are finalized, this is patched to avoid a runtime + * check. * * @num must be a compile-time constant. */ static __always_inline bool cpus_have_final_cap(int num) { if (system_capabilities_finalized()) - return __cpus_have_const_cap(num); + return alternative_has_cap_unlikely(num); else BUG(); } -/* - * Test for a capability, possibly with a runtime check for non-hyp code. - * - * For hyp code, this behaves the same as cpus_have_final_cap(). - * - * For non-hyp code: - * Before capabilities are finalized, this behaves as cpus_have_cap(). - * After capabilities are finalized, this is patched to avoid a runtime check. - * - * @num must be a compile-time constant. - */ -static __always_inline bool cpus_have_const_cap(int num) -{ - if (is_hyp_code()) - return cpus_have_final_cap(num); - else if (system_capabilities_finalized()) - return __cpus_have_const_cap(num); - else - return cpus_have_cap(num); -} - -static inline void cpus_set_cap(unsigned int num) -{ - if (num >= ARM64_NCAPS) { - pr_warn("Attempt to set an illegal CPU capability (%d >= %d)\n", - num, ARM64_NCAPS); - } else { - __set_bit(num, cpu_hwcaps); - } -} - static inline int __attribute_const__ cpuid_feature_extract_signed_field_width(u64 features, int field, int width) { @@ -541,29 +548,6 @@ cpuid_feature_extract_unsigned_field(u64 features, int field) return cpuid_feature_extract_unsigned_field_width(features, field, 4); } -/* - * Fields that identify the version of the Performance Monitors Extension do - * not follow the standard ID scheme. See ARM DDI 0487E.a page D13-2825, - * "Alternative ID scheme used for the Performance Monitors Extension version". - */ -static inline u64 __attribute_const__ -cpuid_feature_cap_perfmon_field(u64 features, int field, u64 cap) -{ - u64 val = cpuid_feature_extract_unsigned_field(features, field); - u64 mask = GENMASK_ULL(field + 3, field); - - /* Treat IMPLEMENTATION DEFINED functionality as unimplemented */ - if (val == ID_AA64DFR0_PMUVER_IMP_DEF) - val = 0; - - if (val > cap) { - features &= ~mask; - features |= (cap << field) & mask; - } - - return features; -} - static inline u64 arm64_ftr_mask(const struct arm64_ftr_bits *ftrp) { return (u64)GENMASK(ftrp->shift + ftrp->width - 1, ftrp->shift); @@ -597,46 +581,56 @@ static inline s64 arm64_ftr_value(const struct arm64_ftr_bits *ftrp, u64 val) static inline bool id_aa64mmfr0_mixed_endian_el0(u64 mmfr0) { - return cpuid_feature_extract_unsigned_field(mmfr0, ID_AA64MMFR0_BIGENDEL_SHIFT) == 0x1 || - cpuid_feature_extract_unsigned_field(mmfr0, ID_AA64MMFR0_BIGENDEL0_SHIFT) == 0x1; + return cpuid_feature_extract_unsigned_field(mmfr0, ID_AA64MMFR0_EL1_BIGEND_SHIFT) == 0x1 || + cpuid_feature_extract_unsigned_field(mmfr0, ID_AA64MMFR0_EL1_BIGENDEL0_SHIFT) == 0x1; } static inline bool id_aa64pfr0_32bit_el1(u64 pfr0) { - u32 val = cpuid_feature_extract_unsigned_field(pfr0, ID_AA64PFR0_EL1_SHIFT); + u32 val = cpuid_feature_extract_unsigned_field(pfr0, ID_AA64PFR0_EL1_EL1_SHIFT); - return val == ID_AA64PFR0_ELx_32BIT_64BIT; + return val == ID_AA64PFR0_EL1_EL1_AARCH32; } static inline bool id_aa64pfr0_32bit_el0(u64 pfr0) { - u32 val = cpuid_feature_extract_unsigned_field(pfr0, ID_AA64PFR0_EL0_SHIFT); + u32 val = cpuid_feature_extract_unsigned_field(pfr0, ID_AA64PFR0_EL1_EL0_SHIFT); - return val == ID_AA64PFR0_ELx_32BIT_64BIT; + return val == ID_AA64PFR0_EL1_EL0_AARCH32; } static inline bool id_aa64pfr0_sve(u64 pfr0) { - u32 val = cpuid_feature_extract_unsigned_field(pfr0, ID_AA64PFR0_SVE_SHIFT); + u32 val = cpuid_feature_extract_unsigned_field(pfr0, ID_AA64PFR0_EL1_SVE_SHIFT); return val > 0; } static inline bool id_aa64pfr1_sme(u64 pfr1) { - u32 val = cpuid_feature_extract_unsigned_field(pfr1, ID_AA64PFR1_SME_SHIFT); + u32 val = cpuid_feature_extract_unsigned_field(pfr1, ID_AA64PFR1_EL1_SME_SHIFT); + + return val > 0; +} + +static inline bool id_aa64pfr0_mpam(u64 pfr0) +{ + u32 val = cpuid_feature_extract_unsigned_field(pfr0, ID_AA64PFR0_EL1_MPAM_SHIFT); return val > 0; } static inline bool id_aa64pfr1_mte(u64 pfr1) { - u32 val = cpuid_feature_extract_unsigned_field(pfr1, ID_AA64PFR1_MTE_SHIFT); + u32 val = cpuid_feature_extract_unsigned_field(pfr1, ID_AA64PFR1_EL1_MTE_SHIFT); - return val >= ID_AA64PFR1_MTE; + return val >= ID_AA64PFR1_EL1_MTE_MTE2; } -void __init setup_cpu_features(void); +void __init setup_boot_cpu_features(void); +void __init setup_system_features(void); +void __init setup_user_features(void); + void check_local_cpu_capabilities(void); u64 read_sanitised_ftr_reg(u32 id); @@ -659,7 +653,7 @@ static inline bool supports_csv2p3(int scope) pfr0 = read_sanitised_ftr_reg(SYS_ID_AA64PFR0_EL1); csv2_val = cpuid_feature_extract_unsigned_field(pfr0, - ID_AA64PFR0_CSV2_SHIFT); + ID_AA64PFR0_EL1_CSV2_SHIFT); return csv2_val == 3; } @@ -673,10 +667,11 @@ static inline bool supports_clearbhb(int scope) isar2 = read_sanitised_ftr_reg(SYS_ID_AA64ISAR2_EL1); return cpuid_feature_extract_unsigned_field(isar2, - ID_AA64ISAR2_EL1_BC_SHIFT); + ID_AA64ISAR2_EL1_CLRBHB_SHIFT); } const struct cpumask *system_32bit_el0_cpumask(void); +const struct cpumask *fallback_32bit_el0_cpumask(void); DECLARE_STATIC_KEY_FALSE(arm64_mismatched_32bit_el0); static inline bool system_supports_32bit_el0(void) @@ -694,10 +689,10 @@ static inline bool system_supports_4kb_granule(void) mmfr0 = read_sanitised_ftr_reg(SYS_ID_AA64MMFR0_EL1); val = cpuid_feature_extract_unsigned_field(mmfr0, - ID_AA64MMFR0_TGRAN4_SHIFT); + ID_AA64MMFR0_EL1_TGRAN4_SHIFT); - return (val >= ID_AA64MMFR0_TGRAN4_SUPPORTED_MIN) && - (val <= ID_AA64MMFR0_TGRAN4_SUPPORTED_MAX); + return (val >= ID_AA64MMFR0_EL1_TGRAN4_SUPPORTED_MIN) && + (val <= ID_AA64MMFR0_EL1_TGRAN4_SUPPORTED_MAX); } static inline bool system_supports_64kb_granule(void) @@ -707,10 +702,10 @@ static inline bool system_supports_64kb_granule(void) mmfr0 = read_sanitised_ftr_reg(SYS_ID_AA64MMFR0_EL1); val = cpuid_feature_extract_unsigned_field(mmfr0, - ID_AA64MMFR0_TGRAN64_SHIFT); + ID_AA64MMFR0_EL1_TGRAN64_SHIFT); - return (val >= ID_AA64MMFR0_TGRAN64_SUPPORTED_MIN) && - (val <= ID_AA64MMFR0_TGRAN64_SUPPORTED_MAX); + return (val >= ID_AA64MMFR0_EL1_TGRAN64_SUPPORTED_MIN) && + (val <= ID_AA64MMFR0_EL1_TGRAN64_SUPPORTED_MAX); } static inline bool system_supports_16kb_granule(void) @@ -720,10 +715,10 @@ static inline bool system_supports_16kb_granule(void) mmfr0 = read_sanitised_ftr_reg(SYS_ID_AA64MMFR0_EL1); val = cpuid_feature_extract_unsigned_field(mmfr0, - ID_AA64MMFR0_TGRAN16_SHIFT); + ID_AA64MMFR0_EL1_TGRAN16_SHIFT); - return (val >= ID_AA64MMFR0_TGRAN16_SUPPORTED_MIN) && - (val <= ID_AA64MMFR0_TGRAN16_SUPPORTED_MAX); + return (val >= ID_AA64MMFR0_EL1_TGRAN16_SUPPORTED_MIN) && + (val <= ID_AA64MMFR0_EL1_TGRAN16_SUPPORTED_MAX); } static inline bool system_supports_mixed_endian_el0(void) @@ -738,20 +733,19 @@ static inline bool system_supports_mixed_endian(void) mmfr0 = read_sanitised_ftr_reg(SYS_ID_AA64MMFR0_EL1); val = cpuid_feature_extract_unsigned_field(mmfr0, - ID_AA64MMFR0_BIGENDEL_SHIFT); + ID_AA64MMFR0_EL1_BIGEND_SHIFT); return val == 0x1; } static __always_inline bool system_supports_fpsimd(void) { - return !cpus_have_const_cap(ARM64_HAS_NO_FPSIMD); + return alternative_has_cap_likely(ARM64_HAS_FPSIMD); } static inline bool system_uses_hw_pan(void) { - return IS_ENABLED(CONFIG_ARM64_PAN) && - cpus_have_const_cap(ARM64_HAS_PAN); + return alternative_has_cap_unlikely(ARM64_HAS_PAN); } static inline bool system_uses_ttbr0_pan(void) @@ -762,20 +756,22 @@ static inline bool system_uses_ttbr0_pan(void) static __always_inline bool system_supports_sve(void) { - return IS_ENABLED(CONFIG_ARM64_SVE) && - cpus_have_const_cap(ARM64_SVE); + return alternative_has_cap_unlikely(ARM64_SVE); } static __always_inline bool system_supports_sme(void) { - return IS_ENABLED(CONFIG_ARM64_SME) && - cpus_have_const_cap(ARM64_SME); + return alternative_has_cap_unlikely(ARM64_SME); +} + +static __always_inline bool system_supports_sme2(void) +{ + return alternative_has_cap_unlikely(ARM64_SME2); } static __always_inline bool system_supports_fa64(void) { - return IS_ENABLED(CONFIG_ARM64_SME) && - cpus_have_const_cap(ARM64_SME_FA64); + return alternative_has_cap_unlikely(ARM64_SME_FA64); } static __always_inline bool system_supports_tpidr2(void) @@ -783,22 +779,24 @@ static __always_inline bool system_supports_tpidr2(void) return system_supports_sme(); } +static __always_inline bool system_supports_fpmr(void) +{ + return alternative_has_cap_unlikely(ARM64_HAS_FPMR); +} + static __always_inline bool system_supports_cnp(void) { - return IS_ENABLED(CONFIG_ARM64_CNP) && - cpus_have_const_cap(ARM64_HAS_CNP); + return alternative_has_cap_unlikely(ARM64_HAS_CNP); } static inline bool system_supports_address_auth(void) { - return IS_ENABLED(CONFIG_ARM64_PTR_AUTH) && - cpus_have_const_cap(ARM64_HAS_ADDRESS_AUTH); + return cpus_have_final_boot_cap(ARM64_HAS_ADDRESS_AUTH); } static inline bool system_supports_generic_auth(void) { - return IS_ENABLED(CONFIG_ARM64_PTR_AUTH) && - cpus_have_const_cap(ARM64_HAS_GENERIC_AUTH); + return alternative_has_cap_unlikely(ARM64_HAS_GENERIC_AUTH); } static inline bool system_has_full_ptr_auth(void) @@ -808,14 +806,12 @@ static inline bool system_has_full_ptr_auth(void) static __always_inline bool system_uses_irq_prio_masking(void) { - return IS_ENABLED(CONFIG_ARM64_PSEUDO_NMI) && - cpus_have_const_cap(ARM64_HAS_IRQ_PRIO_MASKING); + return alternative_has_cap_unlikely(ARM64_HAS_GIC_PRIO_MASKING); } static inline bool system_supports_mte(void) { - return IS_ENABLED(CONFIG_ARM64_MTE) && - cpus_have_const_cap(ARM64_MTE); + return alternative_has_cap_unlikely(ARM64_MTE); } static inline bool system_has_prio_mask_debugging(void) @@ -826,27 +822,73 @@ static inline bool system_has_prio_mask_debugging(void) static inline bool system_supports_bti(void) { - return IS_ENABLED(CONFIG_ARM64_BTI) && cpus_have_const_cap(ARM64_BTI); + return cpus_have_final_cap(ARM64_BTI); +} + +static inline bool system_supports_bti_kernel(void) +{ + return IS_ENABLED(CONFIG_ARM64_BTI_KERNEL) && + cpus_have_final_boot_cap(ARM64_BTI); } static inline bool system_supports_tlb_range(void) { - return IS_ENABLED(CONFIG_ARM64_TLB_RANGE) && - cpus_have_const_cap(ARM64_HAS_TLB_RANGE); + return alternative_has_cap_unlikely(ARM64_HAS_TLB_RANGE); +} + +static inline bool system_supports_lpa2(void) +{ + return cpus_have_final_cap(ARM64_HAS_LPA2); +} + +static inline bool system_supports_poe(void) +{ + return alternative_has_cap_unlikely(ARM64_HAS_S1POE); +} + +static inline bool system_supports_gcs(void) +{ + return alternative_has_cap_unlikely(ARM64_HAS_GCS); +} + +static inline bool system_supports_haft(void) +{ + return cpus_have_final_cap(ARM64_HAFT); +} + +static __always_inline bool system_supports_mpam(void) +{ + return alternative_has_cap_unlikely(ARM64_MPAM); +} + +static __always_inline bool system_supports_mpam_hcr(void) +{ + return alternative_has_cap_unlikely(ARM64_MPAM_HCR); } -extern int do_emulate_mrs(struct pt_regs *regs, u32 sys_reg, u32 rt); +static inline bool system_supports_pmuv3(void) +{ + return cpus_have_final_cap(ARM64_HAS_PMUV3); +} + +static inline bool system_supports_bbml2_noabort(void) +{ + return alternative_has_cap_unlikely(ARM64_HAS_BBML2_NOABORT); +} + +int do_emulate_mrs(struct pt_regs *regs, u32 sys_reg, u32 rt); +bool try_emulate_mrs(struct pt_regs *regs, u32 isn); static inline u32 id_aa64mmfr0_parange_to_phys_shift(int parange) { switch (parange) { - case ID_AA64MMFR0_PARANGE_32: return 32; - case ID_AA64MMFR0_PARANGE_36: return 36; - case ID_AA64MMFR0_PARANGE_40: return 40; - case ID_AA64MMFR0_PARANGE_42: return 42; - case ID_AA64MMFR0_PARANGE_44: return 44; - case ID_AA64MMFR0_PARANGE_48: return 48; - case ID_AA64MMFR0_PARANGE_52: return 52; + case ID_AA64MMFR0_EL1_PARANGE_32: return 32; + case ID_AA64MMFR0_EL1_PARANGE_36: return 36; + case ID_AA64MMFR0_EL1_PARANGE_40: return 40; + case ID_AA64MMFR0_EL1_PARANGE_42: return 42; + case ID_AA64MMFR0_EL1_PARANGE_44: return 44; + case ID_AA64MMFR0_EL1_PARANGE_48: return 48; + case ID_AA64MMFR0_EL1_PARANGE_52: return 52; /* * A future PE could use a value unknown to the kernel. * However, by the "D10.1.4 Principles of the ID scheme @@ -866,16 +908,20 @@ static inline bool cpu_has_hw_af(void) if (!IS_ENABLED(CONFIG_ARM64_HW_AFDBM)) return false; - mmfr1 = read_cpuid(ID_AA64MMFR1_EL1); + /* + * Use cached version to avoid emulated msr operation on KVM + * guests. + */ + mmfr1 = read_sanitised_ftr_reg(SYS_ID_AA64MMFR1_EL1); return cpuid_feature_extract_unsigned_field(mmfr1, - ID_AA64MMFR1_HADBS_SHIFT); + ID_AA64MMFR1_EL1_HAFDBS_SHIFT); } static inline bool cpu_has_pan(void) { u64 mmfr1 = read_cpuid(ID_AA64MMFR1_EL1); return cpuid_feature_extract_unsigned_field(mmfr1, - ID_AA64MMFR1_PAN_SHIFT); + ID_AA64MMFR1_EL1_PAN_SHIFT); } #ifdef CONFIG_ARM64_AMU_EXTN @@ -896,8 +942,8 @@ static inline unsigned int get_vmid_bits(u64 mmfr1) int vmid_bits; vmid_bits = cpuid_feature_extract_unsigned_field(mmfr1, - ID_AA64MMFR1_VMIDBITS_SHIFT); - if (vmid_bits == ID_AA64MMFR1_VMIDBITS_16) + ID_AA64MMFR1_EL1_VMIDBits_SHIFT); + if (vmid_bits == ID_AA64MMFR1_EL1_VMIDBits_16) return 16; /* @@ -907,7 +953,12 @@ static inline unsigned int get_vmid_bits(u64 mmfr1) return 8; } +s64 arm64_ftr_safe_value(const struct arm64_ftr_bits *ftrp, s64 new, s64 cur); +struct arm64_ftr_reg *get_arm64_ftr_reg(u32 sys_id); + +extern struct arm64_ftr_override id_aa64mmfr0_override; extern struct arm64_ftr_override id_aa64mmfr1_override; +extern struct arm64_ftr_override id_aa64mmfr2_override; extern struct arm64_ftr_override id_aa64pfr0_override; extern struct arm64_ftr_override id_aa64pfr1_override; extern struct arm64_ftr_override id_aa64zfr0_override; @@ -915,9 +966,116 @@ extern struct arm64_ftr_override id_aa64smfr0_override; extern struct arm64_ftr_override id_aa64isar1_override; extern struct arm64_ftr_override id_aa64isar2_override; +extern struct arm64_ftr_override arm64_sw_feature_override; + +static inline +u64 arm64_apply_feature_override(u64 val, int feat, int width, + const struct arm64_ftr_override *override) +{ + u64 oval = override->val; + + /* + * When it encounters an invalid override (e.g., an override that + * cannot be honoured due to a missing CPU feature), the early idreg + * override code will set the mask to 0x0 and the value to non-zero for + * the field in question. In order to determine whether the override is + * valid or not for the field we are interested in, we first need to + * disregard bits belonging to other fields. + */ + oval &= GENMASK_ULL(feat + width - 1, feat); + + /* + * The override is valid if all value bits are accounted for in the + * mask. If so, replace the masked bits with the override value. + */ + if (oval == (oval & override->mask)) { + val &= ~override->mask; + val |= oval; + } + + /* Extract the field from the updated value */ + return cpuid_feature_extract_unsigned_field(val, feat); +} + +static inline bool arm64_test_sw_feature_override(int feat) +{ + /* + * Software features are pseudo CPU features that have no underlying + * CPUID system register value to apply the override to. + */ + return arm64_apply_feature_override(0, feat, 4, + &arm64_sw_feature_override); +} + +static inline bool kaslr_disabled_cmdline(void) +{ + return arm64_test_sw_feature_override(ARM64_SW_FEATURE_OVERRIDE_NOKASLR); +} + u32 get_kvm_ipa_limit(void); void dump_cpu_features(void); +static inline bool cpu_has_bti(void) +{ + if (!IS_ENABLED(CONFIG_ARM64_BTI)) + return false; + + return arm64_apply_feature_override(read_cpuid(ID_AA64PFR1_EL1), + ID_AA64PFR1_EL1_BT_SHIFT, 4, + &id_aa64pfr1_override); +} + +static inline bool cpu_has_pac(void) +{ + u64 isar1, isar2; + + if (!IS_ENABLED(CONFIG_ARM64_PTR_AUTH)) + return false; + + isar1 = read_cpuid(ID_AA64ISAR1_EL1); + isar2 = read_cpuid(ID_AA64ISAR2_EL1); + + if (arm64_apply_feature_override(isar1, ID_AA64ISAR1_EL1_APA_SHIFT, 4, + &id_aa64isar1_override)) + return true; + + if (arm64_apply_feature_override(isar1, ID_AA64ISAR1_EL1_API_SHIFT, 4, + &id_aa64isar1_override)) + return true; + + return arm64_apply_feature_override(isar2, ID_AA64ISAR2_EL1_APA3_SHIFT, 4, + &id_aa64isar2_override); +} + +static inline bool cpu_has_lva(void) +{ + u64 mmfr2; + + mmfr2 = read_sysreg_s(SYS_ID_AA64MMFR2_EL1); + mmfr2 &= ~id_aa64mmfr2_override.mask; + mmfr2 |= id_aa64mmfr2_override.val; + return cpuid_feature_extract_unsigned_field(mmfr2, + ID_AA64MMFR2_EL1_VARange_SHIFT); +} + +static inline bool cpu_has_lpa2(void) +{ +#ifdef CONFIG_ARM64_LPA2 + u64 mmfr0; + int feat; + + mmfr0 = read_sysreg(id_aa64mmfr0_el1); + mmfr0 &= ~id_aa64mmfr0_override.mask; + mmfr0 |= id_aa64mmfr0_override.val; + feat = cpuid_feature_extract_signed_field(mmfr0, + ID_AA64MMFR0_EL1_TGRAN_SHIFT); + + return feat >= ID_AA64MMFR0_EL1_TGRAN_LPA2; +#else + return false; +#endif +} + #endif /* __ASSEMBLY__ */ #endif diff --git a/arch/arm64/include/asm/cputype.h b/arch/arm64/include/asm/cputype.h index 8aa0d276a636..661735616787 100644 --- a/arch/arm64/include/asm/cputype.h +++ b/arch/arm64/include/asm/cputype.h @@ -41,7 +41,7 @@ (((midr) & MIDR_IMPLEMENTOR_MASK) >> MIDR_IMPLEMENTOR_SHIFT) #define MIDR_CPU_MODEL(imp, partnum) \ - (((imp) << MIDR_IMPLEMENTOR_SHIFT) | \ + ((_AT(u32, imp) << MIDR_IMPLEMENTOR_SHIFT) | \ (0xf << MIDR_ARCHITECTURE_SHIFT) | \ ((partnum) << MIDR_PARTNUM_SHIFT)) @@ -60,6 +60,8 @@ #define ARM_CPU_IMP_FUJITSU 0x46 #define ARM_CPU_IMP_HISI 0x48 #define ARM_CPU_IMP_APPLE 0x61 +#define ARM_CPU_IMP_AMPERE 0xC0 +#define ARM_CPU_IMP_MICROSOFT 0x6D #define ARM_CPU_PART_AEM_V8 0xD0F #define ARM_CPU_PART_FOUNDATION 0xD00 @@ -73,17 +75,31 @@ #define ARM_CPU_PART_CORTEX_A76 0xD0B #define ARM_CPU_PART_NEOVERSE_N1 0xD0C #define ARM_CPU_PART_CORTEX_A77 0xD0D +#define ARM_CPU_PART_CORTEX_A76AE 0xD0E #define ARM_CPU_PART_NEOVERSE_V1 0xD40 #define ARM_CPU_PART_CORTEX_A78 0xD41 #define ARM_CPU_PART_CORTEX_A78AE 0xD42 #define ARM_CPU_PART_CORTEX_X1 0xD44 #define ARM_CPU_PART_CORTEX_A510 0xD46 +#define ARM_CPU_PART_CORTEX_X1C 0xD4C +#define ARM_CPU_PART_CORTEX_A520 0xD80 #define ARM_CPU_PART_CORTEX_A710 0xD47 +#define ARM_CPU_PART_CORTEX_A715 0xD4D #define ARM_CPU_PART_CORTEX_X2 0xD48 #define ARM_CPU_PART_NEOVERSE_N2 0xD49 #define ARM_CPU_PART_CORTEX_A78C 0xD4B - -#define APM_CPU_PART_POTENZA 0x000 +#define ARM_CPU_PART_CORTEX_X1C 0xD4C +#define ARM_CPU_PART_CORTEX_X3 0xD4E +#define ARM_CPU_PART_NEOVERSE_V2 0xD4F +#define ARM_CPU_PART_CORTEX_A720 0xD81 +#define ARM_CPU_PART_CORTEX_X4 0xD82 +#define ARM_CPU_PART_NEOVERSE_V3 0xD84 +#define ARM_CPU_PART_CORTEX_X925 0xD85 +#define ARM_CPU_PART_CORTEX_A725 0xD87 +#define ARM_CPU_PART_NEOVERSE_N3 0xD8E + +#define APM_CPU_PART_XGENE 0x000 +#define APM_CPU_VAR_POTENZA 0x00 #define CAVIUM_CPU_PART_THUNDERX 0x0A1 #define CAVIUM_CPU_PART_THUNDERX_81XX 0x0A2 @@ -105,9 +121,11 @@ #define QCOM_CPU_PART_KRYO 0x200 #define QCOM_CPU_PART_KRYO_2XX_GOLD 0x800 #define QCOM_CPU_PART_KRYO_2XX_SILVER 0x801 +#define QCOM_CPU_PART_KRYO_3XX_GOLD 0x802 #define QCOM_CPU_PART_KRYO_3XX_SILVER 0x803 #define QCOM_CPU_PART_KRYO_4XX_GOLD 0x804 #define QCOM_CPU_PART_KRYO_4XX_SILVER 0x805 +#define QCOM_CPU_PART_ORYON_X1 0x001 #define NVIDIA_CPU_PART_DENVER 0x003 #define NVIDIA_CPU_PART_CARMEL 0x004 @@ -115,6 +133,8 @@ #define FUJITSU_CPU_PART_A64FX 0x001 #define HISI_CPU_PART_TSV110 0xD01 +#define HISI_CPU_PART_HIP09 0xD02 +#define HISI_CPU_PART_HIP12 0xD06 #define APPLE_CPU_PART_M1_ICESTORM 0x022 #define APPLE_CPU_PART_M1_FIRESTORM 0x023 @@ -122,6 +142,17 @@ #define APPLE_CPU_PART_M1_FIRESTORM_PRO 0x025 #define APPLE_CPU_PART_M1_ICESTORM_MAX 0x028 #define APPLE_CPU_PART_M1_FIRESTORM_MAX 0x029 +#define APPLE_CPU_PART_M2_BLIZZARD 0x032 +#define APPLE_CPU_PART_M2_AVALANCHE 0x033 +#define APPLE_CPU_PART_M2_BLIZZARD_PRO 0x034 +#define APPLE_CPU_PART_M2_AVALANCHE_PRO 0x035 +#define APPLE_CPU_PART_M2_BLIZZARD_MAX 0x038 +#define APPLE_CPU_PART_M2_AVALANCHE_MAX 0x039 + +#define AMPERE_CPU_PART_AMPERE1 0xAC3 +#define AMPERE_CPU_PART_AMPERE1A 0xAC4 + +#define MICROSOFT_CPU_PART_AZURE_COBALT_100 0xD49 /* Based on r0p0 of ARM Neoverse N2 */ #define MIDR_CORTEX_A53 MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_CORTEX_A53) #define MIDR_CORTEX_A57 MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_CORTEX_A57) @@ -133,15 +164,28 @@ #define MIDR_CORTEX_A76 MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_CORTEX_A76) #define MIDR_NEOVERSE_N1 MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_NEOVERSE_N1) #define MIDR_CORTEX_A77 MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_CORTEX_A77) +#define MIDR_CORTEX_A76AE MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_CORTEX_A76AE) #define MIDR_NEOVERSE_V1 MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_NEOVERSE_V1) #define MIDR_CORTEX_A78 MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_CORTEX_A78) #define MIDR_CORTEX_A78AE MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_CORTEX_A78AE) #define MIDR_CORTEX_X1 MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_CORTEX_X1) #define MIDR_CORTEX_A510 MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_CORTEX_A510) +#define MIDR_CORTEX_X1C MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_CORTEX_X1C) +#define MIDR_CORTEX_A520 MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_CORTEX_A520) #define MIDR_CORTEX_A710 MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_CORTEX_A710) +#define MIDR_CORTEX_A715 MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_CORTEX_A715) #define MIDR_CORTEX_X2 MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_CORTEX_X2) #define MIDR_NEOVERSE_N2 MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_NEOVERSE_N2) #define MIDR_CORTEX_A78C MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_CORTEX_A78C) +#define MIDR_CORTEX_X1C MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_CORTEX_X1C) +#define MIDR_CORTEX_X3 MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_CORTEX_X3) +#define MIDR_NEOVERSE_V2 MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_NEOVERSE_V2) +#define MIDR_CORTEX_A720 MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_CORTEX_A720) +#define MIDR_CORTEX_X4 MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_CORTEX_X4) +#define MIDR_NEOVERSE_V3 MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_NEOVERSE_V3) +#define MIDR_CORTEX_X925 MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_CORTEX_X925) +#define MIDR_CORTEX_A725 MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_CORTEX_A725) +#define MIDR_NEOVERSE_N3 MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_NEOVERSE_N3) #define MIDR_THUNDERX MIDR_CPU_MODEL(ARM_CPU_IMP_CAVIUM, CAVIUM_CPU_PART_THUNDERX) #define MIDR_THUNDERX_81XX MIDR_CPU_MODEL(ARM_CPU_IMP_CAVIUM, CAVIUM_CPU_PART_THUNDERX_81XX) #define MIDR_THUNDERX_83XX MIDR_CPU_MODEL(ARM_CPU_IMP_CAVIUM, CAVIUM_CPU_PART_THUNDERX_83XX) @@ -159,19 +203,42 @@ #define MIDR_QCOM_KRYO MIDR_CPU_MODEL(ARM_CPU_IMP_QCOM, QCOM_CPU_PART_KRYO) #define MIDR_QCOM_KRYO_2XX_GOLD MIDR_CPU_MODEL(ARM_CPU_IMP_QCOM, QCOM_CPU_PART_KRYO_2XX_GOLD) #define MIDR_QCOM_KRYO_2XX_SILVER MIDR_CPU_MODEL(ARM_CPU_IMP_QCOM, QCOM_CPU_PART_KRYO_2XX_SILVER) +#define MIDR_QCOM_KRYO_3XX_GOLD MIDR_CPU_MODEL(ARM_CPU_IMP_QCOM, QCOM_CPU_PART_KRYO_3XX_GOLD) #define MIDR_QCOM_KRYO_3XX_SILVER MIDR_CPU_MODEL(ARM_CPU_IMP_QCOM, QCOM_CPU_PART_KRYO_3XX_SILVER) #define MIDR_QCOM_KRYO_4XX_GOLD MIDR_CPU_MODEL(ARM_CPU_IMP_QCOM, QCOM_CPU_PART_KRYO_4XX_GOLD) #define MIDR_QCOM_KRYO_4XX_SILVER MIDR_CPU_MODEL(ARM_CPU_IMP_QCOM, QCOM_CPU_PART_KRYO_4XX_SILVER) +#define MIDR_QCOM_ORYON_X1 MIDR_CPU_MODEL(ARM_CPU_IMP_QCOM, QCOM_CPU_PART_ORYON_X1) + +/* + * NOTES: + * - Qualcomm Kryo 5XX Prime / Gold ID themselves as MIDR_CORTEX_A77 + * - Qualcomm Kryo 5XX Silver IDs itself as MIDR_QCOM_KRYO_4XX_SILVER + * - Qualcomm Kryo 6XX Prime IDs itself as MIDR_CORTEX_X1 + * - Qualcomm Kryo 6XX Gold IDs itself as ARM_CPU_PART_CORTEX_A78 + * - Qualcomm Kryo 6XX Silver IDs itself as MIDR_CORTEX_A55 + */ + #define MIDR_NVIDIA_DENVER MIDR_CPU_MODEL(ARM_CPU_IMP_NVIDIA, NVIDIA_CPU_PART_DENVER) #define MIDR_NVIDIA_CARMEL MIDR_CPU_MODEL(ARM_CPU_IMP_NVIDIA, NVIDIA_CPU_PART_CARMEL) #define MIDR_FUJITSU_A64FX MIDR_CPU_MODEL(ARM_CPU_IMP_FUJITSU, FUJITSU_CPU_PART_A64FX) #define MIDR_HISI_TSV110 MIDR_CPU_MODEL(ARM_CPU_IMP_HISI, HISI_CPU_PART_TSV110) +#define MIDR_HISI_HIP09 MIDR_CPU_MODEL(ARM_CPU_IMP_HISI, HISI_CPU_PART_HIP09) +#define MIDR_HISI_HIP12 MIDR_CPU_MODEL(ARM_CPU_IMP_HISI, HISI_CPU_PART_HIP12) #define MIDR_APPLE_M1_ICESTORM MIDR_CPU_MODEL(ARM_CPU_IMP_APPLE, APPLE_CPU_PART_M1_ICESTORM) #define MIDR_APPLE_M1_FIRESTORM MIDR_CPU_MODEL(ARM_CPU_IMP_APPLE, APPLE_CPU_PART_M1_FIRESTORM) #define MIDR_APPLE_M1_ICESTORM_PRO MIDR_CPU_MODEL(ARM_CPU_IMP_APPLE, APPLE_CPU_PART_M1_ICESTORM_PRO) #define MIDR_APPLE_M1_FIRESTORM_PRO MIDR_CPU_MODEL(ARM_CPU_IMP_APPLE, APPLE_CPU_PART_M1_FIRESTORM_PRO) #define MIDR_APPLE_M1_ICESTORM_MAX MIDR_CPU_MODEL(ARM_CPU_IMP_APPLE, APPLE_CPU_PART_M1_ICESTORM_MAX) #define MIDR_APPLE_M1_FIRESTORM_MAX MIDR_CPU_MODEL(ARM_CPU_IMP_APPLE, APPLE_CPU_PART_M1_FIRESTORM_MAX) +#define MIDR_APPLE_M2_BLIZZARD MIDR_CPU_MODEL(ARM_CPU_IMP_APPLE, APPLE_CPU_PART_M2_BLIZZARD) +#define MIDR_APPLE_M2_AVALANCHE MIDR_CPU_MODEL(ARM_CPU_IMP_APPLE, APPLE_CPU_PART_M2_AVALANCHE) +#define MIDR_APPLE_M2_BLIZZARD_PRO MIDR_CPU_MODEL(ARM_CPU_IMP_APPLE, APPLE_CPU_PART_M2_BLIZZARD_PRO) +#define MIDR_APPLE_M2_AVALANCHE_PRO MIDR_CPU_MODEL(ARM_CPU_IMP_APPLE, APPLE_CPU_PART_M2_AVALANCHE_PRO) +#define MIDR_APPLE_M2_BLIZZARD_MAX MIDR_CPU_MODEL(ARM_CPU_IMP_APPLE, APPLE_CPU_PART_M2_BLIZZARD_MAX) +#define MIDR_APPLE_M2_AVALANCHE_MAX MIDR_CPU_MODEL(ARM_CPU_IMP_APPLE, APPLE_CPU_PART_M2_AVALANCHE_MAX) +#define MIDR_AMPERE1 MIDR_CPU_MODEL(ARM_CPU_IMP_AMPERE, AMPERE_CPU_PART_AMPERE1) +#define MIDR_AMPERE1A MIDR_CPU_MODEL(ARM_CPU_IMP_AMPERE, AMPERE_CPU_PART_AMPERE1A) +#define MIDR_MICROSOFT_AZURE_COBALT_100 MIDR_CPU_MODEL(ARM_CPU_IMP_MICROSOFT, MICROSOFT_CPU_PART_AZURE_COBALT_100) /* Fujitsu Erratum 010001 affects A64FX 1.0 and 1.1, (v0r0 and v1r0) */ #define MIDR_FUJITSU_ERRATUM_010001 MIDR_FUJITSU_A64FX @@ -185,6 +252,16 @@ #define read_cpuid(reg) read_sysreg_s(SYS_ ## reg) /* + * The CPU ID never changes at run time, so we might as well tell the + * compiler that it's constant. Use this function to read the CPU ID + * rather than directly reading processor_id or read_cpuid() directly. + */ +static inline u32 __attribute_const__ read_cpuid_id(void) +{ + return read_cpuid(MIDR_EL1); +} + +/* * Represent a range of MIDR values for a given CPU model and a * range of variant/revision values. * @@ -219,30 +296,14 @@ static inline bool midr_is_cpu_model_range(u32 midr, u32 model, u32 rv_min, return _model == model && rv >= rv_min && rv <= rv_max; } -static inline bool is_midr_in_range(u32 midr, struct midr_range const *range) -{ - return midr_is_cpu_model_range(midr, range->model, - range->rv_min, range->rv_max); -} - -static inline bool -is_midr_in_range_list(u32 midr, struct midr_range const *ranges) -{ - while (ranges->model) - if (is_midr_in_range(midr, ranges++)) - return true; - return false; -} +struct target_impl_cpu { + u64 midr; + u64 revidr; + u64 aidr; +}; -/* - * The CPU ID never changes at run time, so we might as well tell the - * compiler that it's constant. Use this function to read the CPU ID - * rather than directly reading processor_id or read_cpuid() directly. - */ -static inline u32 __attribute_const__ read_cpuid_id(void) -{ - return read_cpuid(MIDR_EL1); -} +bool cpu_errata_set_target_impl(u64 num, void *impl_cpus); +bool is_midr_in_range_list(struct midr_range const *ranges); static inline u64 __attribute_const__ read_cpuid_mpidr(void) { diff --git a/arch/arm64/include/asm/crash_reserve.h b/arch/arm64/include/asm/crash_reserve.h new file mode 100644 index 000000000000..4afe027a4e7b --- /dev/null +++ b/arch/arm64/include/asm/crash_reserve.h @@ -0,0 +1,10 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +#ifndef _ARM64_CRASH_RESERVE_H +#define _ARM64_CRASH_RESERVE_H + +/* Current arm64 boot protocol requires 2MB alignment */ +#define CRASH_ALIGN SZ_2M + +#define CRASH_ADDR_LOW_MAX arm64_dma_phys_limit +#define CRASH_ADDR_HIGH_MAX (PHYS_MASK + 1) +#endif diff --git a/arch/arm64/include/asm/daifflags.h b/arch/arm64/include/asm/daifflags.h index 55f57dfa8e2f..fbb5c99eb2f9 100644 --- a/arch/arm64/include/asm/daifflags.h +++ b/arch/arm64/include/asm/daifflags.h @@ -132,7 +132,7 @@ static inline void local_daif_inherit(struct pt_regs *regs) trace_hardirqs_on(); if (system_uses_irq_prio_masking()) - gic_write_pmr(regs->pmr_save); + gic_write_pmr(regs->pmr); /* * We can't use local_daif_restore(regs->pstate) here as diff --git a/arch/arm64/include/asm/debug-monitors.h b/arch/arm64/include/asm/debug-monitors.h index 7b7e05c02691..f5e3ed2420ce 100644 --- a/arch/arm64/include/asm/debug-monitors.h +++ b/arch/arm64/include/asm/debug-monitors.h @@ -13,14 +13,8 @@ #include <asm/ptrace.h> /* Low-level stepping controls. */ -#define DBG_MDSCR_SS (1 << 0) #define DBG_SPSR_SS (1 << 21) -/* MDSCR_EL1 enabling bits */ -#define DBG_MDSCR_KDE (1 << 13) -#define DBG_MDSCR_MDE (1 << 15) -#define DBG_MDSCR_MASK ~(DBG_MDSCR_KDE | DBG_MDSCR_MDE) - #define DBG_ESR_EVT(x) (((x) >> 27) & 0x7) /* AArch64 */ @@ -62,30 +56,6 @@ struct task_struct; #define DBG_HOOK_HANDLED 0 #define DBG_HOOK_ERROR 1 -struct step_hook { - struct list_head node; - int (*fn)(struct pt_regs *regs, unsigned long esr); -}; - -void register_user_step_hook(struct step_hook *hook); -void unregister_user_step_hook(struct step_hook *hook); - -void register_kernel_step_hook(struct step_hook *hook); -void unregister_kernel_step_hook(struct step_hook *hook); - -struct break_hook { - struct list_head node; - int (*fn)(struct pt_regs *regs, unsigned long esr); - u16 imm; - u16 mask; /* These bits are ignored when comparing with imm */ -}; - -void register_user_break_hook(struct break_hook *hook); -void unregister_user_break_hook(struct break_hook *hook); - -void register_kernel_break_hook(struct break_hook *hook); -void unregister_kernel_break_hook(struct break_hook *hook); - u8 debug_monitors_arch(void); enum dbg_active_el { @@ -104,19 +74,19 @@ void user_regs_reset_single_step(struct user_pt_regs *regs, void kernel_enable_single_step(struct pt_regs *regs); void kernel_disable_single_step(void); int kernel_active_single_step(void); +void kernel_rewind_single_step(struct pt_regs *regs); +void kernel_fastforward_single_step(struct pt_regs *regs); #ifdef CONFIG_HAVE_HW_BREAKPOINT -int reinstall_suspended_bps(struct pt_regs *regs); +bool try_step_suspended_breakpoints(struct pt_regs *regs); #else -static inline int reinstall_suspended_bps(struct pt_regs *regs) +static inline bool try_step_suspended_breakpoints(struct pt_regs *regs) { - return -ENODEV; + return false; } #endif -int aarch32_break_handler(struct pt_regs *regs); - -void debug_traps_init(void); +bool try_handle_aarch32_break(struct pt_regs *regs); #endif /* __ASSEMBLY */ #endif /* __ASM_DEBUG_MONITORS_H */ diff --git a/arch/arm64/include/asm/efi.h b/arch/arm64/include/asm/efi.h index 439e2bc5d5d8..bcd5622aa096 100644 --- a/arch/arm64/include/asm/efi.h +++ b/arch/arm64/include/asm/efi.h @@ -14,31 +14,40 @@ #ifdef CONFIG_EFI extern void efi_init(void); + +bool efi_runtime_fixup_exception(struct pt_regs *regs, const char *msg); #else #define efi_init() + +static inline +bool efi_runtime_fixup_exception(struct pt_regs *regs, const char *msg) +{ + return false; +} #endif int efi_create_mapping(struct mm_struct *mm, efi_memory_desc_t *md); -int efi_set_mapping_permissions(struct mm_struct *mm, efi_memory_desc_t *md); - -#define arch_efi_call_virt_setup() \ -({ \ - efi_virtmap_load(); \ - __efi_fpsimd_begin(); \ -}) +int efi_set_mapping_permissions(struct mm_struct *mm, efi_memory_desc_t *md, + bool has_bti); #undef arch_efi_call_virt #define arch_efi_call_virt(p, f, args...) \ __efi_rt_asm_wrapper((p)->f, #f, args) -#define arch_efi_call_virt_teardown() \ -({ \ - __efi_fpsimd_end(); \ - efi_virtmap_unload(); \ -}) - +extern u64 *efi_rt_stack_top; efi_status_t __efi_rt_asm_wrapper(void *, const char *, ...); +void arch_efi_call_virt_setup(void); +void arch_efi_call_virt_teardown(void); + +/* + * efi_rt_stack_top[-1] contains the value the stack pointer had before + * switching to the EFI runtime stack. + */ +#define current_in_efi() \ + (!preemptible() && efi_rt_stack_top != NULL && \ + on_task_stack(current, READ_ONCE(efi_rt_stack_top[-1]), 1)) + #define ARCH_EFI_IRQ_FLAGS_MASK (PSR_D_BIT | PSR_A_BIT | PSR_I_BIT | PSR_F_BIT) /* @@ -67,7 +76,7 @@ efi_status_t __efi_rt_asm_wrapper(void *, const char *, ...); * guaranteed to cover the kernel Image. * * Since the EFI stub is part of the kernel Image, we can relax the - * usual requirements in Documentation/arm64/booting.rst, which still + * usual requirements in Documentation/arch/arm64/booting.rst, which still * apply to other bootloaders, and are required for some kernel * configurations. */ @@ -76,13 +85,25 @@ static inline unsigned long efi_get_max_initrd_addr(unsigned long image_addr) return (image_addr & ~(SZ_1G - 1UL)) + (1UL << (VA_BITS_MIN - 1)); } -#define alloc_screen_info(x...) &screen_info - -static inline void free_screen_info(struct screen_info *si) +static inline unsigned long efi_get_kimg_min_align(void) { + extern bool efi_nokaslr; + + /* + * Although relocatable kernels can fix up the misalignment with + * respect to MIN_KIMG_ALIGN, the resulting virtual text addresses are + * subtly out of sync with those recorded in the vmlinux when kaslr is + * disabled but the image required relocation anyway. Therefore retain + * 2M alignment if KASLR was explicitly disabled, even if it was not + * going to be activated to begin with. + */ + return efi_nokaslr ? MIN_KIMG_ALIGN : EFI_KIMG_ALIGN; } #define EFI_ALLOC_ALIGN SZ_64K +#define EFI_ALLOC_LIMIT ((1UL << 48) - 1) + +extern unsigned long primary_entry_offset(void); /* * On ARM systems, virtually remapped UEFI runtime services are set up in two @@ -133,4 +154,8 @@ static inline void efi_capsule_flush_cache_range(void *addr, int size) dcache_clean_inval_poc((unsigned long)addr, (unsigned long)addr + size); } +efi_status_t efi_handle_corrupted_x18(efi_status_t s, const char *f); + +void efi_icache_sync(unsigned long start, unsigned long end); + #endif /* _ASM_EFI_H */ diff --git a/arch/arm64/include/asm/el2_setup.h b/arch/arm64/include/asm/el2_setup.h index 2630faa5bc08..46033027510c 100644 --- a/arch/arm64/include/asm/el2_setup.h +++ b/arch/arm64/include/asm/el2_setup.h @@ -16,12 +16,62 @@ #include <asm/sysreg.h> #include <linux/irqchip/arm-gic-v3.h> +.macro init_el2_hcr val + mov_q x0, \val + + /* + * Compliant CPUs advertise their VHE-onlyness with + * ID_AA64MMFR4_EL1.E2H0 < 0. On such CPUs HCR_EL2.E2H is RES1, but it + * can reset into an UNKNOWN state and might not read as 1 until it has + * been initialized explicitly. + * + * Fruity CPUs seem to have HCR_EL2.E2H set to RAO/WI, but + * don't advertise it (they predate this relaxation). + * + * Initalize HCR_EL2.E2H so that later code can rely upon HCR_EL2.E2H + * indicating whether the CPU is running in E2H mode. + */ + mrs_s x1, SYS_ID_AA64MMFR4_EL1 + sbfx x1, x1, #ID_AA64MMFR4_EL1_E2H0_SHIFT, #ID_AA64MMFR4_EL1_E2H0_WIDTH + cmp x1, #0 + b.ge .LnVHE_\@ + + orr x0, x0, #HCR_E2H +.LnVHE_\@: + msr_hcr_el2 x0 + isb +.endm + .macro __init_el2_sctlr mov_q x0, INIT_SCTLR_EL2_MMU_OFF msr sctlr_el2, x0 isb .endm +.macro __init_el2_hcrx + mrs x0, id_aa64mmfr1_el1 + ubfx x0, x0, #ID_AA64MMFR1_EL1_HCX_SHIFT, #4 + cbz x0, .Lskip_hcrx_\@ + mov_q x0, (HCRX_EL2_MSCEn | HCRX_EL2_TCR2En | HCRX_EL2_EnFPM) + + /* Enable GCS if supported */ + mrs_s x1, SYS_ID_AA64PFR1_EL1 + ubfx x1, x1, #ID_AA64PFR1_EL1_GCS_SHIFT, #4 + cbz x1, .Lset_hcrx_\@ + orr x0, x0, #HCRX_EL2_GCSEn + +.Lset_hcrx_\@: + msr_s SYS_HCRX_EL2, x0 +.Lskip_hcrx_\@: +.endm + +/* Check if running in host at EL2 mode, i.e., (h)VHE. Jump to fail if not. */ +.macro __check_hvhe fail, tmp + mrs \tmp, hcr_el2 + and \tmp, \tmp, #HCR_E2H + cbz \tmp, \fail +.endm + /* * Allow Non-secure EL1 and EL0 to access physical timer and counter. * This is not necessary for VHE, since the host kernel runs in EL2, @@ -34,45 +84,49 @@ */ .macro __init_el2_timers mov x0, #3 // Enable EL1 physical timers + __check_hvhe .LnVHE_\@, x1 + lsl x0, x0, #10 +.LnVHE_\@: msr cnthctl_el2, x0 msr cntvoff_el2, xzr // Clear virtual offset .endm .macro __init_el2_debug mrs x1, id_aa64dfr0_el1 - sbfx x0, x1, #ID_AA64DFR0_PMUVER_SHIFT, #4 - cmp x0, #1 - b.lt .Lskip_pmu_\@ // Skip if no PMU present + ubfx x0, x1, #ID_AA64DFR0_EL1_PMUVer_SHIFT, #4 + cmp x0, #ID_AA64DFR0_EL1_PMUVer_NI + ccmp x0, #ID_AA64DFR0_EL1_PMUVer_IMP_DEF, #4, ne + b.eq .Lskip_pmu_\@ // Skip if no PMU present or IMP_DEF mrs x0, pmcr_el0 // Disable debug access traps ubfx x0, x0, #11, #5 // to EL2 and allow access to .Lskip_pmu_\@: - csel x2, xzr, x0, lt // all PMU counters from EL1 + csel x2, xzr, x0, eq // all PMU counters from EL1 /* Statistical profiling */ - ubfx x0, x1, #ID_AA64DFR0_PMSVER_SHIFT, #4 + ubfx x0, x1, #ID_AA64DFR0_EL1_PMSVer_SHIFT, #4 cbz x0, .Lskip_spe_\@ // Skip if SPE not present mrs_s x0, SYS_PMBIDR_EL1 // If SPE available at EL2, - and x0, x0, #(1 << SYS_PMBIDR_EL1_P_SHIFT) + and x0, x0, #(1 << PMBIDR_EL1_P_SHIFT) cbnz x0, .Lskip_spe_el2_\@ // then permit sampling of physical - mov x0, #(1 << SYS_PMSCR_EL2_PCT_SHIFT | \ - 1 << SYS_PMSCR_EL2_PA_SHIFT) + mov x0, #(1 << PMSCR_EL2_PCT_SHIFT | \ + 1 << PMSCR_EL2_PA_SHIFT) msr_s SYS_PMSCR_EL2, x0 // addresses and physical counter .Lskip_spe_el2_\@: - mov x0, #(MDCR_EL2_E2PB_MASK << MDCR_EL2_E2PB_SHIFT) + mov x0, #MDCR_EL2_E2PB_MASK orr x2, x2, x0 // If we don't have VHE, then // use EL1&0 translation. .Lskip_spe_\@: /* Trace buffer */ - ubfx x0, x1, #ID_AA64DFR0_TRBE_SHIFT, #4 + ubfx x0, x1, #ID_AA64DFR0_EL1_TraceBuffer_SHIFT, #4 cbz x0, .Lskip_trace_\@ // Skip if TraceBuffer is not present mrs_s x0, SYS_TRBIDR_EL1 - and x0, x0, TRBIDR_PROG + and x0, x0, TRBIDR_EL1_P cbnz x0, .Lskip_trace_\@ // If TRBE is available at EL2 - mov x0, #(MDCR_EL2_E2TB_MASK << MDCR_EL2_E2TB_SHIFT) + mov x0, #MDCR_EL2_E2TB_MASK orr x2, x2, x0 // allow the EL1&0 translation // to own it. @@ -83,7 +137,7 @@ /* LORegions */ .macro __init_el2_lor mrs x1, id_aa64mmfr1_el1 - ubfx x0, x1, #ID_AA64MMFR1_LOR_SHIFT, 4 + ubfx x0, x1, #ID_AA64MMFR1_EL1_LO_SHIFT, 4 cbz x0, .Lskip_lor_\@ msr_s SYS_LORC_EL1, xzr .Lskip_lor_\@: @@ -97,7 +151,7 @@ /* GICv3 system register access */ .macro __init_el2_gicv3 mrs x0, id_aa64pfr0_el1 - ubfx x0, x0, #ID_AA64PFR0_GIC_SHIFT, #4 + ubfx x0, x0, #ID_AA64PFR0_EL1_GIC_SHIFT, #4 cbz x0, .Lskip_gicv3_\@ mrs_s x0, SYS_ICC_SRE_EL2 @@ -111,6 +165,50 @@ .Lskip_gicv3_\@: .endm +/* GICv5 system register access */ +.macro __init_el2_gicv5 + mrs_s x0, SYS_ID_AA64PFR2_EL1 + ubfx x0, x0, #ID_AA64PFR2_EL1_GCIE_SHIFT, #4 + cbz x0, .Lskip_gicv5_\@ + + mov x0, #(ICH_HFGITR_EL2_GICRCDNMIA | \ + ICH_HFGITR_EL2_GICRCDIA | \ + ICH_HFGITR_EL2_GICCDDI | \ + ICH_HFGITR_EL2_GICCDEOI | \ + ICH_HFGITR_EL2_GICCDHM | \ + ICH_HFGITR_EL2_GICCDRCFG | \ + ICH_HFGITR_EL2_GICCDPEND | \ + ICH_HFGITR_EL2_GICCDAFF | \ + ICH_HFGITR_EL2_GICCDPRI | \ + ICH_HFGITR_EL2_GICCDDIS | \ + ICH_HFGITR_EL2_GICCDEN) + msr_s SYS_ICH_HFGITR_EL2, x0 // Disable instruction traps + mov_q x0, (ICH_HFGRTR_EL2_ICC_PPI_ACTIVERn_EL1 | \ + ICH_HFGRTR_EL2_ICC_PPI_PRIORITYRn_EL1 | \ + ICH_HFGRTR_EL2_ICC_PPI_PENDRn_EL1 | \ + ICH_HFGRTR_EL2_ICC_PPI_ENABLERn_EL1 | \ + ICH_HFGRTR_EL2_ICC_PPI_HMRn_EL1 | \ + ICH_HFGRTR_EL2_ICC_IAFFIDR_EL1 | \ + ICH_HFGRTR_EL2_ICC_ICSR_EL1 | \ + ICH_HFGRTR_EL2_ICC_PCR_EL1 | \ + ICH_HFGRTR_EL2_ICC_HPPIR_EL1 | \ + ICH_HFGRTR_EL2_ICC_HAPR_EL1 | \ + ICH_HFGRTR_EL2_ICC_CR0_EL1 | \ + ICH_HFGRTR_EL2_ICC_IDRn_EL1 | \ + ICH_HFGRTR_EL2_ICC_APR_EL1) + msr_s SYS_ICH_HFGRTR_EL2, x0 // Disable reg read traps + mov_q x0, (ICH_HFGWTR_EL2_ICC_PPI_ACTIVERn_EL1 | \ + ICH_HFGWTR_EL2_ICC_PPI_PRIORITYRn_EL1 | \ + ICH_HFGWTR_EL2_ICC_PPI_PENDRn_EL1 | \ + ICH_HFGWTR_EL2_ICC_PPI_ENABLERn_EL1 | \ + ICH_HFGWTR_EL2_ICC_ICSR_EL1 | \ + ICH_HFGWTR_EL2_ICC_PCR_EL1 | \ + ICH_HFGWTR_EL2_ICC_CR0_EL1 | \ + ICH_HFGWTR_EL2_ICC_APR_EL1) + msr_s SYS_ICH_HFGWTR_EL2, x0 // Disable reg write traps +.Lskip_gicv5_\@: +.endm + .macro __init_el2_hstr msr hstr_el2, xzr // Disable CP15 traps to EL2 .endm @@ -124,76 +222,330 @@ .endm /* Coprocessor traps */ -.macro __init_el2_nvhe_cptr +.macro __init_el2_cptr + __check_hvhe .LnVHE_\@, x1 + mov x0, #CPACR_EL1_FPEN + msr cpacr_el1, x0 + b .Lskip_set_cptr_\@ +.LnVHE_\@: mov x0, #0x33ff msr cptr_el2, x0 // Disable copro. traps to EL2 +.Lskip_set_cptr_\@: +.endm + +/* + * Configure BRBE to permit recording cycle counts and branch mispredicts. + * + * At any EL, to record cycle counts BRBE requires that both BRBCR_EL2.CC=1 and + * BRBCR_EL1.CC=1. + * + * At any EL, to record branch mispredicts BRBE requires that both + * BRBCR_EL2.MPRED=1 and BRBCR_EL1.MPRED=1. + * + * Set {CC,MPRED} in BRBCR_EL2 in case nVHE mode is used and we are + * executing in EL1. + */ +.macro __init_el2_brbe + mrs x1, id_aa64dfr0_el1 + ubfx x1, x1, #ID_AA64DFR0_EL1_BRBE_SHIFT, #4 + cbz x1, .Lskip_brbe_\@ + + mov_q x0, BRBCR_ELx_CC | BRBCR_ELx_MPRED + msr_s SYS_BRBCR_EL2, x0 +.Lskip_brbe_\@: .endm /* Disable any fine grained traps */ .macro __init_el2_fgt mrs x1, id_aa64mmfr0_el1 - ubfx x1, x1, #ID_AA64MMFR0_FGT_SHIFT, #4 + ubfx x1, x1, #ID_AA64MMFR0_EL1_FGT_SHIFT, #4 cbz x1, .Lskip_fgt_\@ mov x0, xzr + mov x2, xzr mrs x1, id_aa64dfr0_el1 - ubfx x1, x1, #ID_AA64DFR0_PMSVER_SHIFT, #4 + ubfx x1, x1, #ID_AA64DFR0_EL1_PMSVer_SHIFT, #4 cmp x1, #3 - b.lt .Lset_debug_fgt_\@ + b.lt .Lskip_spe_fgt_\@ /* Disable PMSNEVFR_EL1 read and write traps */ - orr x0, x0, #(1 << 62) + orr x0, x0, #HDFGRTR_EL2_nPMSNEVFR_EL1_MASK + orr x2, x2, #HDFGWTR_EL2_nPMSNEVFR_EL1_MASK + +.Lskip_spe_fgt_\@: + mrs x1, id_aa64dfr0_el1 + ubfx x1, x1, #ID_AA64DFR0_EL1_BRBE_SHIFT, #4 + cbz x1, .Lskip_brbe_fgt_\@ + + /* + * Disable read traps for the following registers + * + * [BRBSRC|BRBTGT|RBINF]_EL1 + * [BRBSRCINJ|BRBTGTINJ|BRBINFINJ|BRBTS]_EL1 + */ + orr x0, x0, #HDFGRTR_EL2_nBRBDATA_MASK + + /* + * Disable write traps for the following registers + * + * [BRBSRCINJ|BRBTGTINJ|BRBINFINJ|BRBTS]_EL1 + */ + orr x2, x2, #HDFGWTR_EL2_nBRBDATA_MASK + + /* Disable read and write traps for [BRBCR|BRBFCR]_EL1 */ + orr x0, x0, #HDFGRTR_EL2_nBRBCTL_MASK + orr x2, x2, #HDFGWTR_EL2_nBRBCTL_MASK + + /* Disable read traps for BRBIDR_EL1 */ + orr x0, x0, #HDFGRTR_EL2_nBRBIDR_MASK + +.Lskip_brbe_fgt_\@: .Lset_debug_fgt_\@: msr_s SYS_HDFGRTR_EL2, x0 - msr_s SYS_HDFGWTR_EL2, x0 + msr_s SYS_HDFGWTR_EL2, x2 mov x0, xzr + mov x2, xzr + + mrs x1, id_aa64dfr0_el1 + ubfx x1, x1, #ID_AA64DFR0_EL1_BRBE_SHIFT, #4 + cbz x1, .Lskip_brbe_insn_fgt_\@ + + /* Disable traps for BRBIALL instruction */ + orr x2, x2, #HFGITR_EL2_nBRBIALL_MASK + + /* Disable traps for BRBINJ instruction */ + orr x2, x2, #HFGITR_EL2_nBRBINJ_MASK + +.Lskip_brbe_insn_fgt_\@: mrs x1, id_aa64pfr1_el1 - ubfx x1, x1, #ID_AA64PFR1_SME_SHIFT, #4 - cbz x1, .Lset_fgt_\@ + ubfx x1, x1, #ID_AA64PFR1_EL1_SME_SHIFT, #4 + cbz x1, .Lskip_sme_fgt_\@ /* Disable nVHE traps of TPIDR2 and SMPRI */ - orr x0, x0, #HFGxTR_EL2_nSMPRI_EL1_MASK - orr x0, x0, #HFGxTR_EL2_nTPIDR2_EL0_MASK + orr x0, x0, #HFGRTR_EL2_nSMPRI_EL1_MASK + orr x0, x0, #HFGRTR_EL2_nTPIDR2_EL0_MASK + +.Lskip_sme_fgt_\@: + mrs_s x1, SYS_ID_AA64MMFR3_EL1 + ubfx x1, x1, #ID_AA64MMFR3_EL1_S1PIE_SHIFT, #4 + cbz x1, .Lskip_pie_fgt_\@ + + /* Disable trapping of PIR_EL1 / PIRE0_EL1 */ + orr x0, x0, #HFGRTR_EL2_nPIR_EL1 + orr x0, x0, #HFGRTR_EL2_nPIRE0_EL1 + +.Lskip_pie_fgt_\@: + mrs_s x1, SYS_ID_AA64MMFR3_EL1 + ubfx x1, x1, #ID_AA64MMFR3_EL1_S1POE_SHIFT, #4 + cbz x1, .Lskip_poe_fgt_\@ + + /* Disable trapping of POR_EL0 */ + orr x0, x0, #HFGRTR_EL2_nPOR_EL0 + +.Lskip_poe_fgt_\@: + /* GCS depends on PIE so we don't check it if PIE is absent */ + mrs_s x1, SYS_ID_AA64PFR1_EL1 + ubfx x1, x1, #ID_AA64PFR1_EL1_GCS_SHIFT, #4 + cbz x1, .Lskip_gce_fgt_\@ + + /* Disable traps of access to GCS registers at EL0 and EL1 */ + orr x0, x0, #HFGRTR_EL2_nGCS_EL1_MASK + orr x0, x0, #HFGRTR_EL2_nGCS_EL0_MASK + +.Lskip_gce_fgt_\@: .Lset_fgt_\@: msr_s SYS_HFGRTR_EL2, x0 msr_s SYS_HFGWTR_EL2, x0 - msr_s SYS_HFGITR_EL2, xzr + msr_s SYS_HFGITR_EL2, x2 mrs x1, id_aa64pfr0_el1 // AMU traps UNDEF without AMU - ubfx x1, x1, #ID_AA64PFR0_AMU_SHIFT, #4 - cbz x1, .Lskip_fgt_\@ + ubfx x1, x1, #ID_AA64PFR0_EL1_AMU_SHIFT, #4 + cbz x1, .Lskip_amu_fgt_\@ msr_s SYS_HAFGRTR_EL2, xzr + +.Lskip_amu_fgt_\@: + .Lskip_fgt_\@: .endm -.macro __init_el2_nvhe_prepare_eret - mov x0, #INIT_PSTATE_EL1 - msr spsr_el2, x0 +.macro __init_el2_fgt2 + mrs x1, id_aa64mmfr0_el1 + ubfx x1, x1, #ID_AA64MMFR0_EL1_FGT_SHIFT, #4 + cmp x1, #ID_AA64MMFR0_EL1_FGT_FGT2 + b.lt .Lskip_fgt2_\@ + + mov x0, xzr + mrs x1, id_aa64dfr0_el1 + ubfx x1, x1, #ID_AA64DFR0_EL1_PMUVer_SHIFT, #4 + cmp x1, #ID_AA64DFR0_EL1_PMUVer_V3P9 + b.lt .Lskip_pmuv3p9_\@ + + orr x0, x0, #HDFGRTR2_EL2_nPMICNTR_EL0 + orr x0, x0, #HDFGRTR2_EL2_nPMICFILTR_EL0 + orr x0, x0, #HDFGRTR2_EL2_nPMUACR_EL1 +.Lskip_pmuv3p9_\@: + msr_s SYS_HDFGRTR2_EL2, x0 + msr_s SYS_HDFGWTR2_EL2, x0 + msr_s SYS_HFGRTR2_EL2, xzr + msr_s SYS_HFGWTR2_EL2, xzr + msr_s SYS_HFGITR2_EL2, xzr +.Lskip_fgt2_\@: .endm /** * Initialize EL2 registers to sane values. This should be called early on all * cores that were booted in EL2. Note that everything gets initialised as - * if VHE was not evailable. The kernel context will be upgraded to VHE + * if VHE was not available. The kernel context will be upgraded to VHE * if possible later on in the boot process * * Regs: x0, x1 and x2 are clobbered. */ .macro init_el2_state __init_el2_sctlr + __init_el2_hcrx __init_el2_timers __init_el2_debug + __init_el2_brbe __init_el2_lor __init_el2_stage2 __init_el2_gicv3 + __init_el2_gicv5 __init_el2_hstr __init_el2_nvhe_idregs - __init_el2_nvhe_cptr + __init_el2_cptr __init_el2_fgt - __init_el2_nvhe_prepare_eret + __init_el2_fgt2 +.endm + +#ifndef __KVM_NVHE_HYPERVISOR__ +// This will clobber tmp1 and tmp2, and expect tmp1 to contain +// the id register value as read from the HW +.macro __check_override idreg, fld, width, pass, fail, tmp1, tmp2 + ubfx \tmp1, \tmp1, #\fld, #\width + cbz \tmp1, \fail + + adr_l \tmp1, \idreg\()_override + ldr \tmp2, [\tmp1, FTR_OVR_VAL_OFFSET] + ldr \tmp1, [\tmp1, FTR_OVR_MASK_OFFSET] + ubfx \tmp2, \tmp2, #\fld, #\width + ubfx \tmp1, \tmp1, #\fld, #\width + cmp \tmp1, xzr + and \tmp2, \tmp2, \tmp1 + csinv \tmp2, \tmp2, xzr, ne + cbnz \tmp2, \pass + b \fail +.endm + +// This will clobber tmp1 and tmp2 +.macro check_override idreg, fld, pass, fail, tmp1, tmp2 + mrs \tmp1, \idreg\()_el1 + __check_override \idreg \fld 4 \pass \fail \tmp1 \tmp2 +.endm +#else +// This will clobber tmp +.macro __check_override idreg, fld, width, pass, fail, tmp, ignore + ldr_l \tmp, \idreg\()_el1_sys_val + ubfx \tmp, \tmp, #\fld, #\width + cbnz \tmp, \pass + b \fail +.endm + +.macro check_override idreg, fld, pass, fail, tmp, ignore + __check_override \idreg \fld 4 \pass \fail \tmp \ignore +.endm +#endif + +.macro finalise_el2_state + check_override id_aa64pfr0, ID_AA64PFR0_EL1_MPAM_SHIFT, .Linit_mpam_\@, .Lskip_mpam_\@, x1, x2 + +.Linit_mpam_\@: + msr_s SYS_MPAM2_EL2, xzr // use the default partition + // and disable lower traps + mrs_s x0, SYS_MPAMIDR_EL1 + tbz x0, #MPAMIDR_EL1_HAS_HCR_SHIFT, .Lskip_mpam_\@ // skip if no MPAMHCR reg + msr_s SYS_MPAMHCR_EL2, xzr // clear TRAP_MPAMIDR_EL1 -> EL2 + +.Lskip_mpam_\@: + check_override id_aa64pfr1, ID_AA64PFR1_EL1_GCS_SHIFT, .Linit_gcs_\@, .Lskip_gcs_\@, x1, x2 + +.Linit_gcs_\@: + msr_s SYS_GCSCR_EL1, xzr + msr_s SYS_GCSCRE0_EL1, xzr + +.Lskip_gcs_\@: + check_override id_aa64pfr0, ID_AA64PFR0_EL1_SVE_SHIFT, .Linit_sve_\@, .Lskip_sve_\@, x1, x2 + +.Linit_sve_\@: /* SVE register access */ + __check_hvhe .Lcptr_nvhe_\@, x1 + + // (h)VHE case + mrs x0, cpacr_el1 // Disable SVE traps + orr x0, x0, #CPACR_EL1_ZEN + msr cpacr_el1, x0 + b .Lskip_set_cptr_\@ + +.Lcptr_nvhe_\@: // nVHE case + mrs x0, cptr_el2 // Disable SVE traps + bic x0, x0, #CPTR_EL2_TZ + msr cptr_el2, x0 +.Lskip_set_cptr_\@: + isb + mov x1, #ZCR_ELx_LEN_MASK // SVE: Enable full vector + msr_s SYS_ZCR_EL2, x1 // length for EL1. + +.Lskip_sve_\@: + check_override id_aa64pfr1, ID_AA64PFR1_EL1_SME_SHIFT, .Linit_sme_\@, .Lskip_sme_\@, x1, x2 + +.Linit_sme_\@: /* SME register access and priority mapping */ + __check_hvhe .Lcptr_nvhe_sme_\@, x1 + + // (h)VHE case + mrs x0, cpacr_el1 // Disable SME traps + orr x0, x0, #CPACR_EL1_SMEN + msr cpacr_el1, x0 + b .Lskip_set_cptr_sme_\@ + +.Lcptr_nvhe_sme_\@: // nVHE case + mrs x0, cptr_el2 // Disable SME traps + bic x0, x0, #CPTR_EL2_TSM + msr cptr_el2, x0 +.Lskip_set_cptr_sme_\@: + isb + + mrs x1, sctlr_el2 + orr x1, x1, #SCTLR_ELx_ENTP2 // Disable TPIDR2 traps + msr sctlr_el2, x1 + isb + + mov x0, #0 // SMCR controls + + // Full FP in SM? + mrs_s x1, SYS_ID_AA64SMFR0_EL1 + __check_override id_aa64smfr0, ID_AA64SMFR0_EL1_FA64_SHIFT, 1, .Linit_sme_fa64_\@, .Lskip_sme_fa64_\@, x1, x2 + +.Linit_sme_fa64_\@: + orr x0, x0, SMCR_ELx_FA64_MASK +.Lskip_sme_fa64_\@: + + // ZT0 available? + mrs_s x1, SYS_ID_AA64SMFR0_EL1 + __check_override id_aa64smfr0, ID_AA64SMFR0_EL1_SMEver_SHIFT, 4, .Linit_sme_zt0_\@, .Lskip_sme_zt0_\@, x1, x2 +.Linit_sme_zt0_\@: + orr x0, x0, SMCR_ELx_EZT0_MASK +.Lskip_sme_zt0_\@: + + orr x0, x0, #SMCR_ELx_LEN_MASK // Enable full SME vector + msr_s SYS_SMCR_EL2, x0 // length for EL1. + + mrs_s x1, SYS_SMIDR_EL1 // Priority mapping supported? + ubfx x1, x1, #SMIDR_EL1_SMPS_SHIFT, #1 + cbz x1, .Lskip_sme_\@ + + msr_s SYS_SMPRIMAP_EL2, xzr // Make all priorities equal +.Lskip_sme_\@: .endm #endif /* __ARM_KVM_INIT_H__ */ diff --git a/arch/arm64/include/asm/elf.h b/arch/arm64/include/asm/elf.h index 97932fbf973d..3f93f4eef953 100644 --- a/arch/arm64/include/asm/elf.h +++ b/arch/arm64/include/asm/elf.h @@ -201,16 +201,16 @@ extern int arch_setup_additional_pages(struct linux_binprm *bprm, #define COMPAT_ELF_PLATFORM ("v8l") #endif -#ifdef CONFIG_COMPAT - -/* PIE load location for compat arm. Must match ARM ELF_ET_DYN_BASE. */ -#define COMPAT_ELF_ET_DYN_BASE 0x000400000UL - /* AArch32 registers. */ #define COMPAT_ELF_NGREG 18 typedef unsigned int compat_elf_greg_t; typedef compat_elf_greg_t compat_elf_gregset_t[COMPAT_ELF_NGREG]; +#ifdef CONFIG_COMPAT + +/* PIE load location for compat arm. Must match ARM ELF_ET_DYN_BASE. */ +#define COMPAT_ELF_ET_DYN_BASE 0x000400000UL + /* AArch32 EABI. */ #define EF_ARM_EABI_MASK 0xff000000 int compat_elf_check_arch(const struct elf32_hdr *); diff --git a/arch/arm64/include/asm/esr.h b/arch/arm64/include/asm/esr.h index 15b34fbfca66..e1deed824464 100644 --- a/arch/arm64/include/asm/esr.h +++ b/arch/arm64/include/asm/esr.h @@ -10,63 +10,65 @@ #include <asm/memory.h> #include <asm/sysreg.h> -#define ESR_ELx_EC_UNKNOWN (0x00) -#define ESR_ELx_EC_WFx (0x01) +#define ESR_ELx_EC_UNKNOWN UL(0x00) +#define ESR_ELx_EC_WFx UL(0x01) /* Unallocated EC: 0x02 */ -#define ESR_ELx_EC_CP15_32 (0x03) -#define ESR_ELx_EC_CP15_64 (0x04) -#define ESR_ELx_EC_CP14_MR (0x05) -#define ESR_ELx_EC_CP14_LS (0x06) -#define ESR_ELx_EC_FP_ASIMD (0x07) -#define ESR_ELx_EC_CP10_ID (0x08) /* EL2 only */ -#define ESR_ELx_EC_PAC (0x09) /* EL2 and above */ -/* Unallocated EC: 0x0A - 0x0B */ -#define ESR_ELx_EC_CP14_64 (0x0C) -#define ESR_ELx_EC_BTI (0x0D) -#define ESR_ELx_EC_ILL (0x0E) +#define ESR_ELx_EC_CP15_32 UL(0x03) +#define ESR_ELx_EC_CP15_64 UL(0x04) +#define ESR_ELx_EC_CP14_MR UL(0x05) +#define ESR_ELx_EC_CP14_LS UL(0x06) +#define ESR_ELx_EC_FP_ASIMD UL(0x07) +#define ESR_ELx_EC_CP10_ID UL(0x08) /* EL2 only */ +#define ESR_ELx_EC_PAC UL(0x09) /* EL2 and above */ +#define ESR_ELx_EC_OTHER UL(0x0A) +/* Unallocated EC: 0x0B */ +#define ESR_ELx_EC_CP14_64 UL(0x0C) +#define ESR_ELx_EC_BTI UL(0x0D) +#define ESR_ELx_EC_ILL UL(0x0E) /* Unallocated EC: 0x0F - 0x10 */ -#define ESR_ELx_EC_SVC32 (0x11) -#define ESR_ELx_EC_HVC32 (0x12) /* EL2 only */ -#define ESR_ELx_EC_SMC32 (0x13) /* EL2 and above */ +#define ESR_ELx_EC_SVC32 UL(0x11) +#define ESR_ELx_EC_HVC32 UL(0x12) /* EL2 only */ +#define ESR_ELx_EC_SMC32 UL(0x13) /* EL2 and above */ /* Unallocated EC: 0x14 */ -#define ESR_ELx_EC_SVC64 (0x15) -#define ESR_ELx_EC_HVC64 (0x16) /* EL2 and above */ -#define ESR_ELx_EC_SMC64 (0x17) /* EL2 and above */ -#define ESR_ELx_EC_SYS64 (0x18) -#define ESR_ELx_EC_SVE (0x19) -#define ESR_ELx_EC_ERET (0x1a) /* EL2 only */ +#define ESR_ELx_EC_SVC64 UL(0x15) +#define ESR_ELx_EC_HVC64 UL(0x16) /* EL2 and above */ +#define ESR_ELx_EC_SMC64 UL(0x17) /* EL2 and above */ +#define ESR_ELx_EC_SYS64 UL(0x18) +#define ESR_ELx_EC_SVE UL(0x19) +#define ESR_ELx_EC_ERET UL(0x1a) /* EL2 only */ /* Unallocated EC: 0x1B */ -#define ESR_ELx_EC_FPAC (0x1C) /* EL1 and above */ -#define ESR_ELx_EC_SME (0x1D) +#define ESR_ELx_EC_FPAC UL(0x1C) /* EL1 and above */ +#define ESR_ELx_EC_SME UL(0x1D) /* Unallocated EC: 0x1E */ -#define ESR_ELx_EC_IMP_DEF (0x1f) /* EL3 only */ -#define ESR_ELx_EC_IABT_LOW (0x20) -#define ESR_ELx_EC_IABT_CUR (0x21) -#define ESR_ELx_EC_PC_ALIGN (0x22) +#define ESR_ELx_EC_IMP_DEF UL(0x1f) /* EL3 only */ +#define ESR_ELx_EC_IABT_LOW UL(0x20) +#define ESR_ELx_EC_IABT_CUR UL(0x21) +#define ESR_ELx_EC_PC_ALIGN UL(0x22) /* Unallocated EC: 0x23 */ -#define ESR_ELx_EC_DABT_LOW (0x24) -#define ESR_ELx_EC_DABT_CUR (0x25) -#define ESR_ELx_EC_SP_ALIGN (0x26) -/* Unallocated EC: 0x27 */ -#define ESR_ELx_EC_FP_EXC32 (0x28) +#define ESR_ELx_EC_DABT_LOW UL(0x24) +#define ESR_ELx_EC_DABT_CUR UL(0x25) +#define ESR_ELx_EC_SP_ALIGN UL(0x26) +#define ESR_ELx_EC_MOPS UL(0x27) +#define ESR_ELx_EC_FP_EXC32 UL(0x28) /* Unallocated EC: 0x29 - 0x2B */ -#define ESR_ELx_EC_FP_EXC64 (0x2C) -/* Unallocated EC: 0x2D - 0x2E */ -#define ESR_ELx_EC_SERROR (0x2F) -#define ESR_ELx_EC_BREAKPT_LOW (0x30) -#define ESR_ELx_EC_BREAKPT_CUR (0x31) -#define ESR_ELx_EC_SOFTSTP_LOW (0x32) -#define ESR_ELx_EC_SOFTSTP_CUR (0x33) -#define ESR_ELx_EC_WATCHPT_LOW (0x34) -#define ESR_ELx_EC_WATCHPT_CUR (0x35) +#define ESR_ELx_EC_FP_EXC64 UL(0x2C) +#define ESR_ELx_EC_GCS UL(0x2D) +/* Unallocated EC: 0x2E */ +#define ESR_ELx_EC_SERROR UL(0x2F) +#define ESR_ELx_EC_BREAKPT_LOW UL(0x30) +#define ESR_ELx_EC_BREAKPT_CUR UL(0x31) +#define ESR_ELx_EC_SOFTSTP_LOW UL(0x32) +#define ESR_ELx_EC_SOFTSTP_CUR UL(0x33) +#define ESR_ELx_EC_WATCHPT_LOW UL(0x34) +#define ESR_ELx_EC_WATCHPT_CUR UL(0x35) /* Unallocated EC: 0x36 - 0x37 */ -#define ESR_ELx_EC_BKPT32 (0x38) +#define ESR_ELx_EC_BKPT32 UL(0x38) /* Unallocated EC: 0x39 */ -#define ESR_ELx_EC_VECTOR32 (0x3A) /* EL2 only */ +#define ESR_ELx_EC_VECTOR32 UL(0x3A) /* EL2 only */ /* Unallocated EC: 0x3B */ -#define ESR_ELx_EC_BRK64 (0x3C) +#define ESR_ELx_EC_BRK64 UL(0x3C) /* Unallocated EC: 0x3D - 0x3F */ -#define ESR_ELx_EC_MAX (0x3F) +#define ESR_ELx_EC_MAX UL(0x3F) #define ESR_ELx_EC_SHIFT (26) #define ESR_ELx_EC_WIDTH (6) @@ -75,8 +77,11 @@ #define ESR_ELx_IL_SHIFT (25) #define ESR_ELx_IL (UL(1) << ESR_ELx_IL_SHIFT) -#define ESR_ELx_ISS_MASK (ESR_ELx_IL - 1) +#define ESR_ELx_ISS_MASK (GENMASK(24, 0)) #define ESR_ELx_ISS(esr) ((esr) & ESR_ELx_ISS_MASK) +#define ESR_ELx_ISS2_SHIFT (32) +#define ESR_ELx_ISS2_MASK (GENMASK_ULL(55, 32)) +#define ESR_ELx_ISS2(esr) (((esr) & ESR_ELx_ISS2_MASK) >> ESR_ELx_ISS2_SHIFT) /* ISS field definitions shared by different classes */ #define ESR_ELx_WNR_SHIFT (6) @@ -95,6 +100,8 @@ #define ESR_ELx_AET_CE (UL(6) << ESR_ELx_AET_SHIFT) /* Shared ISS field definitions for Data/Instruction aborts */ +#define ESR_ELx_VNCR_SHIFT (13) +#define ESR_ELx_VNCR (UL(1) << ESR_ELx_VNCR_SHIFT) #define ESR_ELx_SET_SHIFT (11) #define ESR_ELx_SET_MASK (UL(3) << ESR_ELx_SET_SHIFT) #define ESR_ELx_FnV_SHIFT (10) @@ -114,6 +121,26 @@ #define ESR_ELx_FSC_ACCESS (0x08) #define ESR_ELx_FSC_FAULT (0x04) #define ESR_ELx_FSC_PERM (0x0C) +#define ESR_ELx_FSC_SEA_TTW(n) (0x14 + (n)) +#define ESR_ELx_FSC_SECC (0x18) +#define ESR_ELx_FSC_SECC_TTW(n) (0x1c + (n)) +#define ESR_ELx_FSC_ADDRSZ (0x00) + +/* + * Annoyingly, the negative levels for Address size faults aren't laid out + * contiguously (or in the desired order) + */ +#define ESR_ELx_FSC_ADDRSZ_nL(n) ((n) == -1 ? 0x25 : 0x2C) +#define ESR_ELx_FSC_ADDRSZ_L(n) ((n) < 0 ? ESR_ELx_FSC_ADDRSZ_nL(n) : \ + (ESR_ELx_FSC_ADDRSZ + (n))) + +/* Status codes for individual page table levels */ +#define ESR_ELx_FSC_ACCESS_L(n) (ESR_ELx_FSC_ACCESS + (n)) +#define ESR_ELx_FSC_PERM_L(n) (ESR_ELx_FSC_PERM + (n)) + +#define ESR_ELx_FSC_FAULT_nL (0x2C) +#define ESR_ELx_FSC_FAULT_L(n) (((n) < 0 ? ESR_ELx_FSC_FAULT_nL : \ + ESR_ELx_FSC_FAULT) + (n)) /* ISS field definitions for Data Aborts */ #define ESR_ELx_ISV_SHIFT (24) @@ -131,6 +158,20 @@ #define ESR_ELx_CM_SHIFT (8) #define ESR_ELx_CM (UL(1) << ESR_ELx_CM_SHIFT) +/* ISS2 field definitions for Data Aborts */ +#define ESR_ELx_TnD_SHIFT (10) +#define ESR_ELx_TnD (UL(1) << ESR_ELx_TnD_SHIFT) +#define ESR_ELx_TagAccess_SHIFT (9) +#define ESR_ELx_TagAccess (UL(1) << ESR_ELx_TagAccess_SHIFT) +#define ESR_ELx_GCS_SHIFT (8) +#define ESR_ELx_GCS (UL(1) << ESR_ELx_GCS_SHIFT) +#define ESR_ELx_Overlay_SHIFT (6) +#define ESR_ELx_Overlay (UL(1) << ESR_ELx_Overlay_SHIFT) +#define ESR_ELx_DirtyBit_SHIFT (5) +#define ESR_ELx_DirtyBit (UL(1) << ESR_ELx_DirtyBit_SHIFT) +#define ESR_ELx_Xs_SHIFT (0) +#define ESR_ELx_Xs_MASK (GENMASK_ULL(4, 0)) + /* ISS field definitions for exceptions taken in to Hyp */ #define ESR_ELx_CV (UL(1) << 24) #define ESR_ELx_COND_SHIFT (20) @@ -143,6 +184,13 @@ #define ESR_ELx_WFx_ISS_WFE (UL(1) << 0) #define ESR_ELx_xVC_IMM_MASK ((UL(1) << 16) - 1) +/* ISS definitions for LD64B/ST64B/{T,P}SBCSYNC instructions */ +#define ESR_ELx_ISS_OTHER_ST64BV (0) +#define ESR_ELx_ISS_OTHER_ST64BV0 (1) +#define ESR_ELx_ISS_OTHER_LDST64B (2) +#define ESR_ELx_ISS_OTHER_TSBCSYNC (3) +#define ESR_ELx_ISS_OTHER_PSBCSYNC (4) + #define DISR_EL1_IDS (UL(1) << 24) /* * DISR_EL1 and ESR_ELx share the bottom 13 bits, but the RES0 bits may mean @@ -263,6 +311,10 @@ (((e) & ESR_ELx_SYS64_ISS_OP2_MASK) >> \ ESR_ELx_SYS64_ISS_OP2_SHIFT)) +/* ISS field definitions for ERET/ERETAA/ERETAB trapping */ +#define ESR_ELx_ERET_ISS_ERET 0x2 +#define ESR_ELx_ERET_ISS_ERETA 0x1 + /* * ISS field definitions for floating-point exception traps * (FP_EXC_32/FP_EXC_64). @@ -336,15 +388,57 @@ /* * ISS values for SME traps */ - -#define ESR_ELx_SME_ISS_SME_DISABLED 0 -#define ESR_ELx_SME_ISS_ILL 1 -#define ESR_ELx_SME_ISS_SM_DISABLED 2 -#define ESR_ELx_SME_ISS_ZA_DISABLED 3 +#define ESR_ELx_SME_ISS_SMTC_MASK GENMASK(2, 0) +#define ESR_ELx_SME_ISS_SMTC(esr) ((esr) & ESR_ELx_SME_ISS_SMTC_MASK) + +#define ESR_ELx_SME_ISS_SMTC_SME_DISABLED 0 +#define ESR_ELx_SME_ISS_SMTC_ILL 1 +#define ESR_ELx_SME_ISS_SMTC_SM_DISABLED 2 +#define ESR_ELx_SME_ISS_SMTC_ZA_DISABLED 3 +#define ESR_ELx_SME_ISS_SMTC_ZT_DISABLED 4 + +/* ISS field definitions for MOPS exceptions */ +#define ESR_ELx_MOPS_ISS_MEM_INST (UL(1) << 24) +#define ESR_ELx_MOPS_ISS_FROM_EPILOGUE (UL(1) << 18) +#define ESR_ELx_MOPS_ISS_WRONG_OPTION (UL(1) << 17) +#define ESR_ELx_MOPS_ISS_OPTION_A (UL(1) << 16) +#define ESR_ELx_MOPS_ISS_DESTREG(esr) (((esr) & (UL(0x1f) << 10)) >> 10) +#define ESR_ELx_MOPS_ISS_SRCREG(esr) (((esr) & (UL(0x1f) << 5)) >> 5) +#define ESR_ELx_MOPS_ISS_SIZEREG(esr) (((esr) & (UL(0x1f) << 0)) >> 0) + +/* ISS field definitions for GCS */ +#define ESR_ELx_ExType_SHIFT (20) +#define ESR_ELx_ExType_MASK GENMASK(23, 20) +#define ESR_ELx_Raddr_SHIFT (10) +#define ESR_ELx_Raddr_MASK GENMASK(14, 10) +#define ESR_ELx_Rn_SHIFT (5) +#define ESR_ELx_Rn_MASK GENMASK(9, 5) +#define ESR_ELx_Rvalue_SHIFT 5 +#define ESR_ELx_Rvalue_MASK GENMASK(9, 5) +#define ESR_ELx_IT_SHIFT (0) +#define ESR_ELx_IT_MASK GENMASK(4, 0) + +#define ESR_ELx_ExType_DATA_CHECK 0 +#define ESR_ELx_ExType_EXLOCK 1 +#define ESR_ELx_ExType_STR 2 + +#define ESR_ELx_IT_RET 0 +#define ESR_ELx_IT_GCSPOPM 1 +#define ESR_ELx_IT_RET_KEYA 2 +#define ESR_ELx_IT_RET_KEYB 3 +#define ESR_ELx_IT_GCSSS1 4 +#define ESR_ELx_IT_GCSSS2 5 +#define ESR_ELx_IT_GCSPOPCX 6 +#define ESR_ELx_IT_GCSPOPX 7 #ifndef __ASSEMBLY__ #include <asm/types.h> +static inline unsigned long esr_brk_comment(unsigned long esr) +{ + return esr & ESR_ELx_BRK64_ISS_COMMENT_MASK; +} + static inline bool esr_is_data_abort(unsigned long esr) { const unsigned long ec = ESR_ELx_EC(esr); @@ -352,6 +446,93 @@ static inline bool esr_is_data_abort(unsigned long esr) return ec == ESR_ELx_EC_DABT_LOW || ec == ESR_ELx_EC_DABT_CUR; } +static inline bool esr_is_cfi_brk(unsigned long esr) +{ + return ESR_ELx_EC(esr) == ESR_ELx_EC_BRK64 && + (esr_brk_comment(esr) & ~CFI_BRK_IMM_MASK) == CFI_BRK_IMM_BASE; +} + +static inline bool esr_is_ubsan_brk(unsigned long esr) +{ + return (esr_brk_comment(esr) & ~UBSAN_BRK_MASK) == UBSAN_BRK_IMM; +} + +static inline bool esr_fsc_is_translation_fault(unsigned long esr) +{ + esr = esr & ESR_ELx_FSC; + + return (esr == ESR_ELx_FSC_FAULT_L(3)) || + (esr == ESR_ELx_FSC_FAULT_L(2)) || + (esr == ESR_ELx_FSC_FAULT_L(1)) || + (esr == ESR_ELx_FSC_FAULT_L(0)) || + (esr == ESR_ELx_FSC_FAULT_L(-1)); +} + +static inline bool esr_fsc_is_permission_fault(unsigned long esr) +{ + esr = esr & ESR_ELx_FSC; + + return (esr == ESR_ELx_FSC_PERM_L(3)) || + (esr == ESR_ELx_FSC_PERM_L(2)) || + (esr == ESR_ELx_FSC_PERM_L(1)) || + (esr == ESR_ELx_FSC_PERM_L(0)); +} + +static inline bool esr_fsc_is_access_flag_fault(unsigned long esr) +{ + esr = esr & ESR_ELx_FSC; + + return (esr == ESR_ELx_FSC_ACCESS_L(3)) || + (esr == ESR_ELx_FSC_ACCESS_L(2)) || + (esr == ESR_ELx_FSC_ACCESS_L(1)) || + (esr == ESR_ELx_FSC_ACCESS_L(0)); +} + +static inline bool esr_fsc_is_addr_sz_fault(unsigned long esr) +{ + esr &= ESR_ELx_FSC; + + return (esr == ESR_ELx_FSC_ADDRSZ_L(3)) || + (esr == ESR_ELx_FSC_ADDRSZ_L(2)) || + (esr == ESR_ELx_FSC_ADDRSZ_L(1)) || + (esr == ESR_ELx_FSC_ADDRSZ_L(0)) || + (esr == ESR_ELx_FSC_ADDRSZ_L(-1)); +} + +static inline bool esr_fsc_is_sea_ttw(unsigned long esr) +{ + esr = esr & ESR_ELx_FSC; + + return (esr == ESR_ELx_FSC_SEA_TTW(3)) || + (esr == ESR_ELx_FSC_SEA_TTW(2)) || + (esr == ESR_ELx_FSC_SEA_TTW(1)) || + (esr == ESR_ELx_FSC_SEA_TTW(0)) || + (esr == ESR_ELx_FSC_SEA_TTW(-1)); +} + +static inline bool esr_fsc_is_secc_ttw(unsigned long esr) +{ + esr = esr & ESR_ELx_FSC; + + return (esr == ESR_ELx_FSC_SECC_TTW(3)) || + (esr == ESR_ELx_FSC_SECC_TTW(2)) || + (esr == ESR_ELx_FSC_SECC_TTW(1)) || + (esr == ESR_ELx_FSC_SECC_TTW(0)) || + (esr == ESR_ELx_FSC_SECC_TTW(-1)); +} + +/* Indicate whether ESR.EC==0x1A is for an ERETAx instruction */ +static inline bool esr_iss_is_eretax(unsigned long esr) +{ + return esr & ESR_ELx_ERET_ISS_ERET; +} + +/* Indicate which key is used for ERETAx (false: A-Key, true: B-Key) */ +static inline bool esr_iss_is_eretab(unsigned long esr) +{ + return esr & ESR_ELx_ERET_ISS_ERETA; +} + const char *esr_get_class_string(unsigned long esr); #endif /* __ASSEMBLY */ diff --git a/arch/arm64/include/asm/exception.h b/arch/arm64/include/asm/exception.h index d94aecff9690..e3874c4fc399 100644 --- a/arch/arm64/include/asm/exception.h +++ b/arch/arm64/include/asm/exception.h @@ -8,16 +8,11 @@ #define __ASM_EXCEPTION_H #include <asm/esr.h> -#include <asm/kprobes.h> #include <asm/ptrace.h> #include <linux/interrupt.h> -#ifdef CONFIG_FUNCTION_GRAPH_TRACER #define __exception_irq_entry __irq_entry -#else -#define __exception_irq_entry __kprobes -#endif static inline unsigned long disr_to_esr(u64 disr) { @@ -31,7 +26,7 @@ static inline unsigned long disr_to_esr(u64 disr) return esr; } -asmlinkage void handle_bad_stack(struct pt_regs *regs); +asmlinkage void __noreturn handle_bad_stack(struct pt_regs *regs); asmlinkage void el1t_64_sync_handler(struct pt_regs *regs); asmlinkage void el1t_64_irq_handler(struct pt_regs *regs); @@ -58,23 +53,43 @@ asmlinkage void call_on_irq_stack(struct pt_regs *regs, asmlinkage void asm_exit_to_user_mode(struct pt_regs *regs); void do_mem_abort(unsigned long far, unsigned long esr, struct pt_regs *regs); -void do_undefinstr(struct pt_regs *regs); -void do_bti(struct pt_regs *regs); -void do_debug_exception(unsigned long addr_if_watchpoint, unsigned long esr, +void do_el0_undef(struct pt_regs *regs, unsigned long esr); +void do_el1_undef(struct pt_regs *regs, unsigned long esr); +void do_el0_bti(struct pt_regs *regs); +void do_el1_bti(struct pt_regs *regs, unsigned long esr); +void do_el0_gcs(struct pt_regs *regs, unsigned long esr); +void do_el1_gcs(struct pt_regs *regs, unsigned long esr); +#ifdef CONFIG_HAVE_HW_BREAKPOINT +void do_breakpoint(unsigned long esr, struct pt_regs *regs); +void do_watchpoint(unsigned long addr, unsigned long esr, struct pt_regs *regs); +#else +static inline void do_breakpoint(unsigned long esr, struct pt_regs *regs) {} +static inline void do_watchpoint(unsigned long addr, unsigned long esr, + struct pt_regs *regs) {} +#endif /* CONFIG_HAVE_HW_BREAKPOINT */ +void do_el0_softstep(unsigned long esr, struct pt_regs *regs); +void do_el1_softstep(unsigned long esr, struct pt_regs *regs); +void do_el0_brk64(unsigned long esr, struct pt_regs *regs); +void do_el1_brk64(unsigned long esr, struct pt_regs *regs); +void do_bkpt32(unsigned long esr, struct pt_regs *regs); void do_fpsimd_acc(unsigned long esr, struct pt_regs *regs); void do_sve_acc(unsigned long esr, struct pt_regs *regs); void do_sme_acc(unsigned long esr, struct pt_regs *regs); void do_fpsimd_exc(unsigned long esr, struct pt_regs *regs); -void do_sysinstr(unsigned long esr, struct pt_regs *regs); +void do_el0_sys(unsigned long esr, struct pt_regs *regs); void do_sp_pc_abort(unsigned long addr, unsigned long esr, struct pt_regs *regs); void bad_el0_sync(struct pt_regs *regs, int reason, unsigned long esr); -void do_cp15instr(unsigned long esr, struct pt_regs *regs); +void do_el0_cp15(unsigned long esr, struct pt_regs *regs); +int do_compat_alignment_fixup(unsigned long addr, struct pt_regs *regs); void do_el0_svc(struct pt_regs *regs); void do_el0_svc_compat(struct pt_regs *regs); -void do_ptrauth_fault(struct pt_regs *regs, unsigned long esr); +void do_el0_fpac(struct pt_regs *regs, unsigned long esr); +void do_el1_fpac(struct pt_regs *regs, unsigned long esr); +void do_el0_mops(struct pt_regs *regs, unsigned long esr); +void do_el1_mops(struct pt_regs *regs, unsigned long esr); void do_serror(struct pt_regs *regs, unsigned long esr); -void do_notify_resume(struct pt_regs *regs, unsigned long thread_flags); +void do_signal(struct pt_regs *regs); -void panic_bad_stack(struct pt_regs *regs, unsigned long esr, unsigned long far); +void __noreturn panic_bad_stack(struct pt_regs *regs, unsigned long esr, unsigned long far); #endif /* __ASM_EXCEPTION_H */ diff --git a/arch/arm64/include/asm/extable.h b/arch/arm64/include/asm/extable.h index 72b0e71cc3de..9dc39612bdf5 100644 --- a/arch/arm64/include/asm/extable.h +++ b/arch/arm64/include/asm/extable.h @@ -33,6 +33,8 @@ do { \ (b)->data = (tmp).data; \ } while (0) +bool insn_may_access_user(unsigned long addr, unsigned long esr); + #ifdef CONFIG_BPF_JIT bool ex_handler_bpf(const struct exception_table_entry *ex, struct pt_regs *regs); @@ -45,5 +47,5 @@ bool ex_handler_bpf(const struct exception_table_entry *ex, } #endif /* !CONFIG_BPF_JIT */ -bool fixup_exception(struct pt_regs *regs); +bool fixup_exception(struct pt_regs *regs, unsigned long esr); #endif diff --git a/arch/arm64/include/asm/fb.h b/arch/arm64/include/asm/fb.h deleted file mode 100644 index bdc735ee1f67..000000000000 --- a/arch/arm64/include/asm/fb.h +++ /dev/null @@ -1,23 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0-only */ -/* - * Copyright (C) 2012 ARM Ltd. - */ -#ifndef __ASM_FB_H_ -#define __ASM_FB_H_ - -#include <linux/fb.h> -#include <linux/fs.h> -#include <asm/page.h> - -static inline void fb_pgprotect(struct file *file, struct vm_area_struct *vma, - unsigned long off) -{ - vma->vm_page_prot = pgprot_writecombine(vma->vm_page_prot); -} - -static inline int fb_is_primary_device(struct fb_info *info) -{ - return 0; -} - -#endif /* __ASM_FB_H_ */ diff --git a/arch/arm64/include/asm/fixmap.h b/arch/arm64/include/asm/fixmap.h index 71ed5fdf718b..635a43c4ec85 100644 --- a/arch/arm64/include/asm/fixmap.h +++ b/arch/arm64/include/asm/fixmap.h @@ -17,6 +17,7 @@ #ifndef __ASSEMBLY__ #include <linux/kernel.h> +#include <linux/math.h> #include <linux/sizes.h> #include <asm/boot.h> #include <asm/page.h> @@ -36,21 +37,23 @@ enum fixed_addresses { FIX_HOLE, /* - * Reserve a virtual window for the FDT that is 2 MB larger than the - * maximum supported size, and put it at the top of the fixmap region. - * The additional space ensures that any FDT that does not exceed - * MAX_FDT_SIZE can be mapped regardless of whether it crosses any - * 2 MB alignment boundaries. - * - * Keep this at the top so it remains 2 MB aligned. + * Reserve a virtual window for the FDT that is a page bigger than the + * maximum supported size. The additional space ensures that any FDT + * that does not exceed MAX_FDT_SIZE can be mapped regardless of + * whether it crosses any page boundary. */ -#define FIX_FDT_SIZE (MAX_FDT_SIZE + SZ_2M) FIX_FDT_END, - FIX_FDT = FIX_FDT_END + FIX_FDT_SIZE / PAGE_SIZE - 1, + FIX_FDT = FIX_FDT_END + DIV_ROUND_UP(MAX_FDT_SIZE, PAGE_SIZE) + 1, FIX_EARLYCON_MEM_BASE, FIX_TEXT_POKE0, +#ifdef CONFIG_KVM + /* One slot per CPU, mapping the guest's VNCR page at EL2. */ + FIX_VNCR_END, + FIX_VNCR = FIX_VNCR_END + NR_CPUS, +#endif + #ifdef CONFIG_ACPI_APEI_GHES /* Used for GHES mapping from assorted contexts */ FIX_APEI_GHES_IRQ, @@ -90,13 +93,16 @@ enum fixed_addresses { FIX_PTE, FIX_PMD, FIX_PUD, + FIX_P4D, FIX_PGD, __end_of_fixed_addresses }; -#define FIXADDR_SIZE (__end_of_permanent_fixed_addresses << PAGE_SHIFT) -#define FIXADDR_START (FIXADDR_TOP - FIXADDR_SIZE) +#define FIXADDR_SIZE (__end_of_permanent_fixed_addresses << PAGE_SHIFT) +#define FIXADDR_START (FIXADDR_TOP - FIXADDR_SIZE) +#define FIXADDR_TOT_SIZE (__end_of_fixed_addresses << PAGE_SHIFT) +#define FIXADDR_TOT_START (FIXADDR_TOP - FIXADDR_TOT_SIZE) #define FIXMAP_PAGE_IO __pgprot(PROT_DEVICE_nGnRE) diff --git a/arch/arm64/include/asm/fpsimd.h b/arch/arm64/include/asm/fpsimd.h index 9bb1873f5295..b8cf0ea43cc0 100644 --- a/arch/arm64/include/asm/fpsimd.h +++ b/arch/arm64/include/asm/fpsimd.h @@ -6,6 +6,7 @@ #define __ASM_FP_H #include <asm/errno.h> +#include <asm/percpu.h> #include <asm/ptrace.h> #include <asm/processor.h> #include <asm/sigcontext.h> @@ -21,7 +22,6 @@ #include <linux/stddef.h> #include <linux/types.h> -#ifdef CONFIG_COMPAT /* Masks for extracting the FPSR and FPCR from the FPSCR */ #define VFP_FPSCR_STAT_MASK 0xf800009f #define VFP_FPSCR_CTRL_MASK 0x07f79f00 @@ -30,19 +30,44 @@ * control/status register. */ #define VFP_STATE_SIZE ((32 * 8) + 4) -#endif + +static inline unsigned long cpacr_save_enable_kernel_sve(void) +{ + unsigned long old = read_sysreg(cpacr_el1); + unsigned long set = CPACR_EL1_FPEN_EL1EN | CPACR_EL1_ZEN_EL1EN; + + write_sysreg(old | set, cpacr_el1); + isb(); + return old; +} + +static inline unsigned long cpacr_save_enable_kernel_sme(void) +{ + unsigned long old = read_sysreg(cpacr_el1); + unsigned long set = CPACR_EL1_FPEN_EL1EN | CPACR_EL1_SMEN_EL1EN; + + write_sysreg(old | set, cpacr_el1); + isb(); + return old; +} + +static inline void cpacr_restore(unsigned long cpacr) +{ + write_sysreg(cpacr, cpacr_el1); + isb(); +} /* * When we defined the maximum SVE vector length we defined the ABI so * that the maximum vector length included all the reserved for future * expansion bits in ZCR rather than those just currently defined by - * the architecture. While SME follows a similar pattern the fact that - * it includes a square matrix means that any allocations that attempt - * to cover the maximum potential vector length (such as happen with - * the regset used for ptrace) end up being extremely large. Define - * the much lower actual limit for use in such situations. + * the architecture. Using this length to allocate worst size buffers + * results in excessively large allocations, and this effect is even + * more pronounced for SME due to ZA. Define more suitable VLs for + * these situations. */ -#define SME_VQ_MAX 16 +#define ARCH_SVE_VQ_MAX ((ZCR_ELx_LEN_MASK >> ZCR_ELx_LEN_SHIFT) + 1) +#define SME_VQ_MAX ((SMCR_ELx_LEN_MASK >> SMCR_ELx_LEN_SHIFT) + 1) struct task_struct; @@ -52,17 +77,28 @@ extern void fpsimd_load_state(struct user_fpsimd_state *state); extern void fpsimd_thread_switch(struct task_struct *next); extern void fpsimd_flush_thread(void); -extern void fpsimd_signal_preserve_current_state(void); extern void fpsimd_preserve_current_state(void); extern void fpsimd_restore_current_state(void); extern void fpsimd_update_current_state(struct user_fpsimd_state const *state); -extern void fpsimd_bind_state_to_cpu(struct user_fpsimd_state *state, - void *sve_state, unsigned int sve_vl, - void *za_state, unsigned int sme_vl, - u64 *svcr); +struct cpu_fp_state { + struct user_fpsimd_state *st; + void *sve_state; + void *sme_state; + u64 *svcr; + u64 *fpmr; + unsigned int sve_vl; + unsigned int sme_vl; + enum fp_type *fp_type; + enum fp_type to_save; +}; + +DECLARE_PER_CPU(struct cpu_fp_state, fpsimd_last_state); + +extern void fpsimd_bind_state_to_cpu(struct cpu_fp_state *fp_state); extern void fpsimd_flush_task_state(struct task_struct *target); +extern void fpsimd_save_and_flush_current_state(void); extern void fpsimd_save_and_flush_cpu_state(void); static inline bool thread_sm_enabled(struct thread_struct *thread) @@ -75,6 +111,8 @@ static inline bool thread_za_enabled(struct thread_struct *thread) return system_supports_sme() && (thread->svcr & SVCR_ZA_MASK); } +extern void task_smstop_sm(struct task_struct *task); + /* Maximum VL that SVE/SME VL-agnostic software can transparently support */ #define VL_ARCH_MAX 0x100 @@ -96,6 +134,13 @@ static inline void *sve_pffr(struct thread_struct *thread) return (char *)thread->sve_state + sve_ffr_offset(vl); } +static inline void *thread_zt_state(struct thread_struct *thread) +{ + /* The ZT register state is stored immediately after the ZA state */ + unsigned int sme_vq = sve_vq_from_vl(thread_get_sme_vl(thread)); + return thread->sme_state + ZA_SIG_REGS_SIZE(sme_vq); +} + extern void sve_save_state(void *state, u32 *pfpsr, int save_ffr); extern void sve_load_state(void const *state, u32 const *pfpsr, int restore_ffr); @@ -103,16 +148,16 @@ extern void sve_flush_live(bool flush_ffr, unsigned long vq_minus_1); extern unsigned int sve_get_vl(void); extern void sve_set_vq(unsigned long vq_minus_1); extern void sme_set_vq(unsigned long vq_minus_1); -extern void za_save_state(void *state); -extern void za_load_state(void const *state); +extern void sme_save_state(void *state, int zt); +extern void sme_load_state(void const *state, int zt); struct arm64_cpu_capabilities; -extern void sve_kernel_enable(const struct arm64_cpu_capabilities *__unused); -extern void sme_kernel_enable(const struct arm64_cpu_capabilities *__unused); -extern void fa64_kernel_enable(const struct arm64_cpu_capabilities *__unused); - -extern u64 read_zcr_features(void); -extern u64 read_smcr_features(void); +extern void cpu_enable_fpsimd(const struct arm64_cpu_capabilities *__unused); +extern void cpu_enable_sve(const struct arm64_cpu_capabilities *__unused); +extern void cpu_enable_sme(const struct arm64_cpu_capabilities *__unused); +extern void cpu_enable_sme2(const struct arm64_cpu_capabilities *__unused); +extern void cpu_enable_fa64(const struct arm64_cpu_capabilities *__unused); +extern void cpu_enable_fpmr(const struct arm64_cpu_capabilities *__unused); /* * Helpers to translate bit indices in sve_vq_map to VQ values (and @@ -153,12 +198,10 @@ struct vl_info { #ifdef CONFIG_ARM64_SVE -extern void sve_alloc(struct task_struct *task); +extern void sve_alloc(struct task_struct *task, bool flush); extern void fpsimd_release_task(struct task_struct *task); -extern void fpsimd_sync_to_sve(struct task_struct *task); -extern void fpsimd_force_sync_to_sve(struct task_struct *task); -extern void sve_sync_to_fpsimd(struct task_struct *task); -extern void sve_sync_from_fpsimd_zeropad(struct task_struct *task); +extern void fpsimd_sync_from_effective_state(struct task_struct *task); +extern void fpsimd_sync_to_effective_state_zeropad(struct task_struct *task); extern int vec_set_vector_length(struct task_struct *task, enum vec_type type, unsigned long vl, unsigned long flags); @@ -252,14 +295,29 @@ static inline bool sve_vq_available(unsigned int vq) return vq_available(ARM64_VEC_SVE, vq); } -size_t sve_state_size(struct task_struct const *task); +static inline size_t __sve_state_size(unsigned int sve_vl, unsigned int sme_vl) +{ + unsigned int vl = max(sve_vl, sme_vl); + return SVE_SIG_REGS_SIZE(sve_vq_from_vl(vl)); +} + +/* + * Return how many bytes of memory are required to store the full SVE + * state for task, given task's currently configured vector length. + */ +static inline size_t sve_state_size(struct task_struct const *task) +{ + unsigned int sve_vl = task_get_sve_vl(task); + unsigned int sme_vl = task_get_sme_vl(task); + return __sve_state_size(sve_vl, sme_vl); +} #else /* ! CONFIG_ARM64_SVE */ -static inline void sve_alloc(struct task_struct *task) { } +static inline void sve_alloc(struct task_struct *task, bool flush) { } static inline void fpsimd_release_task(struct task_struct *task) { } -static inline void sve_sync_to_fpsimd(struct task_struct *task) { } -static inline void sve_sync_from_fpsimd_zeropad(struct task_struct *task) { } +static inline void fpsimd_sync_from_effective_state(struct task_struct *task) { } +static inline void fpsimd_sync_to_effective_state_zeropad(struct task_struct *task) { } static inline int sve_max_virtualisable_vl(void) { @@ -293,6 +351,11 @@ static inline void vec_update_vq_map(enum vec_type t) { } static inline int vec_verify_vq_map(enum vec_type t) { return 0; } static inline void sve_setup(void) { } +static inline size_t __sve_state_size(unsigned int sve_vl, unsigned int sme_vl) +{ + return 0; +} + static inline size_t sve_state_size(struct task_struct const *task) { return 0; @@ -339,21 +402,30 @@ static inline int sme_max_virtualisable_vl(void) return vec_max_virtualisable_vl(ARM64_VEC_SME); } -extern void sme_alloc(struct task_struct *task); +extern void sme_alloc(struct task_struct *task, bool flush); extern unsigned int sme_get_vl(void); extern int sme_set_current_vl(unsigned long arg); extern int sme_get_current_vl(void); +extern void sme_suspend_exit(void); + +static inline size_t __sme_state_size(unsigned int sme_vl) +{ + size_t size = ZA_SIG_REGS_SIZE(sve_vq_from_vl(sme_vl)); + + if (system_supports_sme2()) + size += ZT_SIG_REG_SIZE; + + return size; +} /* * Return how many bytes of memory are required to store the full SME - * specific state (currently just ZA) for task, given task's currently - * configured vector length. + * specific state for task, given task's currently configured vector + * length. */ -static inline size_t za_state_size(struct task_struct const *task) +static inline size_t sme_state_size(struct task_struct const *task) { - unsigned int vl = task_get_sme_vl(task); - - return ZA_SIG_REGS_SIZE(sve_vq_from_vl(vl)); + return __sme_state_size(task_get_sme_vl(task)); } #else @@ -365,15 +437,21 @@ static inline void sme_smstart_sm(void) { } static inline void sme_smstop_sm(void) { } static inline void sme_smstop(void) { } -static inline void sme_alloc(struct task_struct *task) { } +static inline void sme_alloc(struct task_struct *task, bool flush) { } static inline void sme_setup(void) { } static inline unsigned int sme_get_vl(void) { return 0; } static inline int sme_max_vl(void) { return 0; } static inline int sme_max_virtualisable_vl(void) { return 0; } static inline int sme_set_current_vl(unsigned long arg) { return -EINVAL; } static inline int sme_get_current_vl(void) { return -EINVAL; } +static inline void sme_suspend_exit(void) { } + +static inline size_t __sme_state_size(unsigned int sme_vl) +{ + return 0; +} -static inline size_t za_state_size(struct task_struct const *task) +static inline size_t sme_state_size(struct task_struct const *task) { return 0; } diff --git a/arch/arm64/include/asm/fpsimdmacros.h b/arch/arm64/include/asm/fpsimdmacros.h index 5e0910cf4832..cda81d009c9b 100644 --- a/arch/arm64/include/asm/fpsimdmacros.h +++ b/arch/arm64/include/asm/fpsimdmacros.h @@ -221,11 +221,25 @@ .endm /* - * Zero the entire ZA array - * ZERO ZA + * LDR (ZT0) + * + * LDR ZT0, nx + */ +.macro _ldr_zt nx + _check_general_reg \nx + .inst 0xe11f8000 \ + | (\nx << 5) +.endm + +/* + * STR (ZT0) + * + * STR ZT0, nx */ -.macro zero_za - .inst 0xc00800ff +.macro _str_zt nx + _check_general_reg \nx + .inst 0xe13f8000 \ + | (\nx << 5) .endm .macro __for from:req, to:req @@ -294,12 +308,12 @@ _for n, 0, 15, _sve_str_p \n, \nxbase, \n - 16 cbz \save_ffr, 921f _sve_rdffr 0 - _sve_str_p 0, \nxbase - _sve_ldr_p 0, \nxbase, -16 b 922f 921: - str xzr, [x\nxbase] // Zero out FFR + _sve_pfalse 0 // Zero out FFR 922: + _sve_str_p 0, \nxbase + _sve_ldr_p 0, \nxbase, -16 mrs x\nxtmp, fpsr str w\nxtmp, [\xpfpsr] mrs x\nxtmp, fpcr diff --git a/arch/arm64/include/asm/fpu.h b/arch/arm64/include/asm/fpu.h new file mode 100644 index 000000000000..2ae50bdce59b --- /dev/null +++ b/arch/arm64/include/asm/fpu.h @@ -0,0 +1,15 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* + * Copyright (C) 2023 SiFive + */ + +#ifndef __ASM_FPU_H +#define __ASM_FPU_H + +#include <asm/neon.h> + +#define kernel_fpu_available() cpu_has_neon() +#define kernel_fpu_begin() kernel_neon_begin() +#define kernel_fpu_end() kernel_neon_end() + +#endif /* ! __ASM_FPU_H */ diff --git a/arch/arm64/include/asm/ftrace.h b/arch/arm64/include/asm/ftrace.h index dbc45a4157fa..bfe3ce9df197 100644 --- a/arch/arm64/include/asm/ftrace.h +++ b/arch/arm64/include/asm/ftrace.h @@ -12,29 +12,17 @@ #define HAVE_FUNCTION_GRAPH_FP_TEST -/* - * HAVE_FUNCTION_GRAPH_RET_ADDR_PTR means that the architecture can provide a - * "return address pointer" which can be used to uniquely identify a return - * address which has been overwritten. - * - * On arm64 we use the address of the caller's frame record, which remains the - * same for the lifetime of the instrumented function, unlike the return - * address in the LR. - */ -#define HAVE_FUNCTION_GRAPH_RET_ADDR_PTR - -#ifdef CONFIG_DYNAMIC_FTRACE_WITH_REGS +#ifdef CONFIG_DYNAMIC_FTRACE_WITH_ARGS #define ARCH_SUPPORTS_FTRACE_OPS 1 #else -#define MCOUNT_ADDR ((unsigned long)function_nocfi(_mcount)) +#define MCOUNT_ADDR ((unsigned long)_mcount) #endif /* The BL at the callsite's adjusted rec->ip */ #define MCOUNT_INSN_SIZE AARCH64_INSN_SIZE #define FTRACE_PLT_IDX 0 -#define FTRACE_REGS_PLT_IDX 1 -#define NR_FTRACE_PLTS 2 +#define NR_FTRACE_PLTS 1 /* * Currently, gcc tends to save the link register after the local variables @@ -63,25 +51,119 @@ extern unsigned long ftrace_graph_call; extern void return_to_handler(void); -static inline unsigned long ftrace_call_adjust(unsigned long addr) -{ - /* - * Adjust addr to point at the BL in the callsite. - * See ftrace_init_nop() for the callsite sequence. - */ - if (IS_ENABLED(CONFIG_DYNAMIC_FTRACE_WITH_REGS)) - return addr + AARCH64_INSN_SIZE; - /* - * addr is the address of the mcount call instruction. - * recordmcount does the necessary offset calculation. - */ - return addr; -} +unsigned long ftrace_call_adjust(unsigned long addr); +unsigned long arch_ftrace_get_symaddr(unsigned long fentry_ip); +#define ftrace_get_symaddr(fentry_ip) arch_ftrace_get_symaddr(fentry_ip) -#ifdef CONFIG_DYNAMIC_FTRACE_WITH_REGS +#ifdef CONFIG_DYNAMIC_FTRACE_WITH_ARGS +#define HAVE_ARCH_FTRACE_REGS struct dyn_ftrace; struct ftrace_ops; struct ftrace_regs; +#define arch_ftrace_regs(fregs) ((struct __arch_ftrace_regs *)(fregs)) + +#define arch_ftrace_get_regs(regs) NULL + +/* + * Note: sizeof(struct ftrace_regs) must be a multiple of 16 to ensure correct + * stack alignment + */ +struct __arch_ftrace_regs { + /* x0 - x8 */ + unsigned long regs[9]; + +#ifdef CONFIG_DYNAMIC_FTRACE_WITH_DIRECT_CALLS + unsigned long direct_tramp; +#else + unsigned long __unused; +#endif + + unsigned long fp; + unsigned long lr; + + unsigned long sp; + unsigned long pc; +}; + +static __always_inline unsigned long +ftrace_regs_get_instruction_pointer(const struct ftrace_regs *fregs) +{ + return arch_ftrace_regs(fregs)->pc; +} + +static __always_inline void +ftrace_regs_set_instruction_pointer(struct ftrace_regs *fregs, + unsigned long pc) +{ + arch_ftrace_regs(fregs)->pc = pc; +} + +static __always_inline unsigned long +ftrace_regs_get_stack_pointer(const struct ftrace_regs *fregs) +{ + return arch_ftrace_regs(fregs)->sp; +} + +static __always_inline unsigned long +ftrace_regs_get_argument(struct ftrace_regs *fregs, unsigned int n) +{ + if (n < 8) + return arch_ftrace_regs(fregs)->regs[n]; + return 0; +} + +static __always_inline unsigned long +ftrace_regs_get_return_value(const struct ftrace_regs *fregs) +{ + return arch_ftrace_regs(fregs)->regs[0]; +} + +static __always_inline void +ftrace_regs_set_return_value(struct ftrace_regs *fregs, + unsigned long ret) +{ + arch_ftrace_regs(fregs)->regs[0] = ret; +} + +static __always_inline void +ftrace_override_function_with_return(struct ftrace_regs *fregs) +{ + arch_ftrace_regs(fregs)->pc = arch_ftrace_regs(fregs)->lr; +} + +static __always_inline unsigned long +ftrace_regs_get_frame_pointer(const struct ftrace_regs *fregs) +{ + return arch_ftrace_regs(fregs)->fp; +} + +static __always_inline unsigned long +ftrace_regs_get_return_address(const struct ftrace_regs *fregs) +{ + return arch_ftrace_regs(fregs)->lr; +} + +static __always_inline struct pt_regs * +ftrace_partial_regs(const struct ftrace_regs *fregs, struct pt_regs *regs) +{ + struct __arch_ftrace_regs *afregs = arch_ftrace_regs(fregs); + + memcpy(regs->regs, afregs->regs, sizeof(afregs->regs)); + regs->sp = afregs->sp; + regs->pc = afregs->pc; + regs->regs[29] = afregs->fp; + regs->regs[30] = afregs->lr; + return regs; +} + +#define arch_ftrace_fill_perf_regs(fregs, _regs) do { \ + (_regs)->pc = arch_ftrace_regs(fregs)->pc; \ + (_regs)->regs[29] = arch_ftrace_regs(fregs)->fp; \ + (_regs)->sp = arch_ftrace_regs(fregs)->sp; \ + (_regs)->pstate = PSR_MODE_EL1h; \ + } while (0) + +int ftrace_regs_query_register_offset(const char *name); int ftrace_init_nop(struct module *mod, struct dyn_ftrace *rec); #define ftrace_init_nop ftrace_init_nop @@ -89,6 +171,19 @@ int ftrace_init_nop(struct module *mod, struct dyn_ftrace *rec); void ftrace_graph_func(unsigned long ip, unsigned long parent_ip, struct ftrace_ops *op, struct ftrace_regs *fregs); #define ftrace_graph_func ftrace_graph_func + +#ifdef CONFIG_DYNAMIC_FTRACE_WITH_DIRECT_CALLS +static inline void arch_ftrace_set_direct_caller(struct ftrace_regs *fregs, + unsigned long addr) +{ + /* + * The ftrace trampoline will return to this address instead of the + * instrumented function. + */ + arch_ftrace_regs(fregs)->direct_tramp = addr; +} +#endif /* CONFIG_DYNAMIC_FTRACE_WITH_DIRECT_CALLS */ + #endif #define ftrace_return_address(n) return_address(n) @@ -123,4 +218,13 @@ static inline bool arch_syscall_match_sym_name(const char *sym, } #endif /* ifndef __ASSEMBLY__ */ +#ifndef __ASSEMBLY__ +#ifdef CONFIG_FUNCTION_GRAPH_TRACER + +void prepare_ftrace_return(unsigned long self_addr, unsigned long *parent, + unsigned long frame_pointer); + +#endif /* ifdef CONFIG_FUNCTION_GRAPH_TRACER */ +#endif + #endif /* __ASM_FTRACE_H */ diff --git a/arch/arm64/include/asm/gcs.h b/arch/arm64/include/asm/gcs.h new file mode 100644 index 000000000000..5bc432234d3a --- /dev/null +++ b/arch/arm64/include/asm/gcs.h @@ -0,0 +1,107 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* + * Copyright (C) 2023 ARM Ltd. + */ +#ifndef __ASM_GCS_H +#define __ASM_GCS_H + +#include <asm/types.h> +#include <asm/uaccess.h> + +struct kernel_clone_args; +struct ksignal; + +static inline void gcsb_dsync(void) +{ + asm volatile(".inst 0xd503227f" : : : "memory"); +} + +static inline void gcsstr(u64 *addr, u64 val) +{ + register u64 *_addr __asm__ ("x0") = addr; + register long _val __asm__ ("x1") = val; + + /* GCSSTTR x1, x0 */ + asm volatile( + ".inst 0xd91f1c01\n" + : + : "rZ" (_val), "r" (_addr) + : "memory"); +} + +static inline void gcsss1(u64 Xt) +{ + asm volatile ( + "sys #3, C7, C7, #2, %0\n" + : + : "rZ" (Xt) + : "memory"); +} + +static inline u64 gcsss2(void) +{ + u64 Xt; + + asm volatile( + "SYSL %0, #3, C7, C7, #3\n" + : "=r" (Xt) + : + : "memory"); + + return Xt; +} + +#define PR_SHADOW_STACK_SUPPORTED_STATUS_MASK \ + (PR_SHADOW_STACK_ENABLE | PR_SHADOW_STACK_WRITE | PR_SHADOW_STACK_PUSH) + +#ifdef CONFIG_ARM64_GCS + +static inline bool task_gcs_el0_enabled(struct task_struct *task) +{ + return task->thread.gcs_el0_mode & PR_SHADOW_STACK_ENABLE; +} + +void gcs_set_el0_mode(struct task_struct *task); +void gcs_free(struct task_struct *task); +void gcs_preserve_current_state(void); +unsigned long gcs_alloc_thread_stack(struct task_struct *tsk, + const struct kernel_clone_args *args); + +static inline int gcs_check_locked(struct task_struct *task, + unsigned long new_val) +{ + unsigned long cur_val = task->thread.gcs_el0_mode; + + cur_val &= task->thread.gcs_el0_locked; + new_val &= task->thread.gcs_el0_locked; + + if (cur_val != new_val) + return -EBUSY; + + return 0; +} + +#else + +static inline bool task_gcs_el0_enabled(struct task_struct *task) +{ + return false; +} + +static inline void gcs_set_el0_mode(struct task_struct *task) { } +static inline void gcs_free(struct task_struct *task) { } +static inline void gcs_preserve_current_state(void) { } +static inline unsigned long gcs_alloc_thread_stack(struct task_struct *tsk, + const struct kernel_clone_args *args) +{ + return -ENOTSUPP; +} +static inline int gcs_check_locked(struct task_struct *task, + unsigned long new_val) +{ + return 0; +} + +#endif + +#endif diff --git a/arch/arm64/include/asm/hardirq.h b/arch/arm64/include/asm/hardirq.h index cbfa7b6f2e09..77d6b8c63d4e 100644 --- a/arch/arm64/include/asm/hardirq.h +++ b/arch/arm64/include/asm/hardirq.h @@ -41,7 +41,7 @@ do { \ \ ___hcr = read_sysreg(hcr_el2); \ if (!(___hcr & HCR_TGE)) { \ - write_sysreg(___hcr | HCR_TGE, hcr_el2); \ + write_sysreg_hcr(___hcr | HCR_TGE); \ isb(); \ } \ /* \ @@ -82,7 +82,7 @@ do { \ */ \ barrier(); \ if (!___ctx->cnt && !(___hcr & HCR_TGE)) \ - write_sysreg(___hcr, hcr_el2); \ + write_sysreg_hcr(___hcr); \ } while (0) static inline void ack_bad_irq(unsigned int irq) diff --git a/arch/arm64/include/asm/hugetlb.h b/arch/arm64/include/asm/hugetlb.h index 1fd2846dbefe..2a8155c4a882 100644 --- a/arch/arm64/include/asm/hugetlb.h +++ b/arch/arm64/include/asm/hugetlb.h @@ -10,6 +10,8 @@ #ifndef __ASM_HUGETLB_H #define __ASM_HUGETLB_H +#include <asm/cacheflush.h> +#include <asm/mte.h> #include <asm/page.h> #ifdef CONFIG_ARCH_ENABLE_HUGEPAGE_MIGRATION @@ -17,24 +19,31 @@ extern bool arch_hugetlb_migration_supported(struct hstate *h); #endif -static inline void arch_clear_hugepage_flags(struct page *page) +static inline void arch_clear_hugetlb_flags(struct folio *folio) { - clear_bit(PG_dcache_clean, &page->flags); + clear_bit(PG_dcache_clean, &folio->flags); + +#ifdef CONFIG_ARM64_MTE + if (system_supports_mte()) { + clear_bit(PG_mte_tagged, &folio->flags); + clear_bit(PG_mte_lock, &folio->flags); + } +#endif } -#define arch_clear_hugepage_flags arch_clear_hugepage_flags +#define arch_clear_hugetlb_flags arch_clear_hugetlb_flags pte_t arch_make_huge_pte(pte_t entry, unsigned int shift, vm_flags_t flags); #define arch_make_huge_pte arch_make_huge_pte #define __HAVE_ARCH_HUGE_SET_HUGE_PTE_AT extern void set_huge_pte_at(struct mm_struct *mm, unsigned long addr, - pte_t *ptep, pte_t pte); + pte_t *ptep, pte_t pte, unsigned long sz); #define __HAVE_ARCH_HUGE_PTEP_SET_ACCESS_FLAGS extern int huge_ptep_set_access_flags(struct vm_area_struct *vma, unsigned long addr, pte_t *ptep, pte_t pte, int dirty); #define __HAVE_ARCH_HUGE_PTEP_GET_AND_CLEAR -extern pte_t huge_ptep_get_and_clear(struct mm_struct *mm, - unsigned long addr, pte_t *ptep); +extern pte_t huge_ptep_get_and_clear(struct mm_struct *mm, unsigned long addr, + pte_t *ptep, unsigned long sz); #define __HAVE_ARCH_HUGE_PTEP_SET_WRPROTECT extern void huge_ptep_set_wrprotect(struct mm_struct *mm, unsigned long addr, pte_t *ptep); @@ -45,13 +54,53 @@ extern pte_t huge_ptep_clear_flush(struct vm_area_struct *vma, extern void huge_pte_clear(struct mm_struct *mm, unsigned long addr, pte_t *ptep, unsigned long sz); #define __HAVE_ARCH_HUGE_PTEP_GET -extern pte_t huge_ptep_get(pte_t *ptep); -extern void set_huge_swap_pte_at(struct mm_struct *mm, unsigned long addr, - pte_t *ptep, pte_t pte, unsigned long sz); -#define set_huge_swap_pte_at set_huge_swap_pte_at +extern pte_t huge_ptep_get(struct mm_struct *mm, unsigned long addr, pte_t *ptep); void __init arm64_hugetlb_cma_reserve(void); +#define huge_ptep_modify_prot_start huge_ptep_modify_prot_start +extern pte_t huge_ptep_modify_prot_start(struct vm_area_struct *vma, + unsigned long addr, pte_t *ptep); + +#define huge_ptep_modify_prot_commit huge_ptep_modify_prot_commit +extern void huge_ptep_modify_prot_commit(struct vm_area_struct *vma, + unsigned long addr, pte_t *ptep, + pte_t old_pte, pte_t new_pte); + #include <asm-generic/hugetlb.h> +static inline void __flush_hugetlb_tlb_range(struct vm_area_struct *vma, + unsigned long start, + unsigned long end, + unsigned long stride, + bool last_level) +{ + switch (stride) { +#ifndef __PAGETABLE_PMD_FOLDED + case PUD_SIZE: + __flush_tlb_range(vma, start, end, PUD_SIZE, last_level, 1); + break; +#endif + case CONT_PMD_SIZE: + case PMD_SIZE: + __flush_tlb_range(vma, start, end, PMD_SIZE, last_level, 2); + break; + case CONT_PTE_SIZE: + __flush_tlb_range(vma, start, end, PAGE_SIZE, last_level, 3); + break; + default: + __flush_tlb_range(vma, start, end, PAGE_SIZE, last_level, TLBI_TTL_UNKNOWN); + } +} + +#define __HAVE_ARCH_FLUSH_HUGETLB_TLB_RANGE +static inline void flush_hugetlb_tlb_range(struct vm_area_struct *vma, + unsigned long start, + unsigned long end) +{ + unsigned long stride = huge_page_size(hstate_vma(vma)); + + __flush_hugetlb_tlb_range(vma, start, end, stride, false); +} + #endif /* __ASM_HUGETLB_H */ diff --git a/arch/arm64/include/asm/hw_breakpoint.h b/arch/arm64/include/asm/hw_breakpoint.h index bc7aaed4b34e..bd81cf17744a 100644 --- a/arch/arm64/include/asm/hw_breakpoint.h +++ b/arch/arm64/include/asm/hw_breakpoint.h @@ -59,7 +59,6 @@ static inline void decode_ctrl_reg(u32 reg, /* Watchpoints */ #define ARM_BREAKPOINT_LOAD 1 #define ARM_BREAKPOINT_STORE 2 -#define AARCH64_ESR_ACCESS_MASK (1 << 6) /* Lengths */ #define ARM_BREAKPOINT_LEN_1 0x1 @@ -142,7 +141,7 @@ static inline int get_num_brps(void) u64 dfr0 = read_sanitised_ftr_reg(SYS_ID_AA64DFR0_EL1); return 1 + cpuid_feature_extract_unsigned_field(dfr0, - ID_AA64DFR0_BRPS_SHIFT); + ID_AA64DFR0_EL1_BRPs_SHIFT); } /* Determine number of WRP registers available. */ @@ -151,7 +150,15 @@ static inline int get_num_wrps(void) u64 dfr0 = read_sanitised_ftr_reg(SYS_ID_AA64DFR0_EL1); return 1 + cpuid_feature_extract_unsigned_field(dfr0, - ID_AA64DFR0_WRPS_SHIFT); + ID_AA64DFR0_EL1_WRPs_SHIFT); } +#ifdef CONFIG_CPU_PM +extern void cpu_suspend_set_dbg_restorer(int (*hw_bp_restore)(unsigned int)); +#else +static inline void cpu_suspend_set_dbg_restorer(int (*hw_bp_restore)(unsigned int)) +{ +} +#endif + #endif /* __ASM_BREAKPOINT_H */ diff --git a/arch/arm64/include/asm/hwcap.h b/arch/arm64/include/asm/hwcap.h index cef4ae7a3d8b..13f94c8ddfc0 100644 --- a/arch/arm64/include/asm/hwcap.h +++ b/arch/arm64/include/asm/hwcap.h @@ -31,12 +31,20 @@ #define COMPAT_HWCAP_VFPD32 (1 << 19) #define COMPAT_HWCAP_LPAE (1 << 20) #define COMPAT_HWCAP_EVTSTRM (1 << 21) +#define COMPAT_HWCAP_FPHP (1 << 22) +#define COMPAT_HWCAP_ASIMDHP (1 << 23) +#define COMPAT_HWCAP_ASIMDDP (1 << 24) +#define COMPAT_HWCAP_ASIMDFHM (1 << 25) +#define COMPAT_HWCAP_ASIMDBF16 (1 << 26) +#define COMPAT_HWCAP_I8MM (1 << 27) #define COMPAT_HWCAP2_AES (1 << 0) #define COMPAT_HWCAP2_PMULL (1 << 1) #define COMPAT_HWCAP2_SHA1 (1 << 2) #define COMPAT_HWCAP2_SHA2 (1 << 3) #define COMPAT_HWCAP2_CRC32 (1 << 4) +#define COMPAT_HWCAP2_SB (1 << 5) +#define COMPAT_HWCAP2_SSBS (1 << 6) #ifndef __ASSEMBLY__ #include <linux/log2.h> @@ -84,6 +92,22 @@ #define KERNEL_HWCAP_SB __khwcap_feature(SB) #define KERNEL_HWCAP_PACA __khwcap_feature(PACA) #define KERNEL_HWCAP_PACG __khwcap_feature(PACG) +#define KERNEL_HWCAP_GCS __khwcap_feature(GCS) +#define KERNEL_HWCAP_CMPBR __khwcap_feature(CMPBR) +#define KERNEL_HWCAP_FPRCVT __khwcap_feature(FPRCVT) +#define KERNEL_HWCAP_F8MM8 __khwcap_feature(F8MM8) +#define KERNEL_HWCAP_F8MM4 __khwcap_feature(F8MM4) +#define KERNEL_HWCAP_SVE_F16MM __khwcap_feature(SVE_F16MM) +#define KERNEL_HWCAP_SVE_ELTPERM __khwcap_feature(SVE_ELTPERM) +#define KERNEL_HWCAP_SVE_AES2 __khwcap_feature(SVE_AES2) +#define KERNEL_HWCAP_SVE_BFSCALE __khwcap_feature(SVE_BFSCALE) +#define KERNEL_HWCAP_SVE2P2 __khwcap_feature(SVE2P2) +#define KERNEL_HWCAP_SME2P2 __khwcap_feature(SME2P2) +#define KERNEL_HWCAP_SME_SBITPERM __khwcap_feature(SME_SBITPERM) +#define KERNEL_HWCAP_SME_AES __khwcap_feature(SME_AES) +#define KERNEL_HWCAP_SME_SFEXPA __khwcap_feature(SME_SFEXPA) +#define KERNEL_HWCAP_SME_STMOP __khwcap_feature(SME_STMOP) +#define KERNEL_HWCAP_SME_SMOP4 __khwcap_feature(SME_SMOP4) #define __khwcap2_feature(x) (const_ilog2(HWCAP2_ ## x) + 64) #define KERNEL_HWCAP_DCPODP __khwcap2_feature(DCPODP) @@ -119,6 +143,41 @@ #define KERNEL_HWCAP_SME_FA64 __khwcap2_feature(SME_FA64) #define KERNEL_HWCAP_WFXT __khwcap2_feature(WFXT) #define KERNEL_HWCAP_EBF16 __khwcap2_feature(EBF16) +#define KERNEL_HWCAP_SVE_EBF16 __khwcap2_feature(SVE_EBF16) +#define KERNEL_HWCAP_CSSC __khwcap2_feature(CSSC) +#define KERNEL_HWCAP_RPRFM __khwcap2_feature(RPRFM) +#define KERNEL_HWCAP_SVE2P1 __khwcap2_feature(SVE2P1) +#define KERNEL_HWCAP_SME2 __khwcap2_feature(SME2) +#define KERNEL_HWCAP_SME2P1 __khwcap2_feature(SME2P1) +#define KERNEL_HWCAP_SME_I16I32 __khwcap2_feature(SME_I16I32) +#define KERNEL_HWCAP_SME_BI32I32 __khwcap2_feature(SME_BI32I32) +#define KERNEL_HWCAP_SME_B16B16 __khwcap2_feature(SME_B16B16) +#define KERNEL_HWCAP_SME_F16F16 __khwcap2_feature(SME_F16F16) +#define KERNEL_HWCAP_MOPS __khwcap2_feature(MOPS) +#define KERNEL_HWCAP_HBC __khwcap2_feature(HBC) +#define KERNEL_HWCAP_SVE_B16B16 __khwcap2_feature(SVE_B16B16) +#define KERNEL_HWCAP_LRCPC3 __khwcap2_feature(LRCPC3) +#define KERNEL_HWCAP_LSE128 __khwcap2_feature(LSE128) +#define KERNEL_HWCAP_FPMR __khwcap2_feature(FPMR) +#define KERNEL_HWCAP_LUT __khwcap2_feature(LUT) +#define KERNEL_HWCAP_FAMINMAX __khwcap2_feature(FAMINMAX) +#define KERNEL_HWCAP_F8CVT __khwcap2_feature(F8CVT) +#define KERNEL_HWCAP_F8FMA __khwcap2_feature(F8FMA) +#define KERNEL_HWCAP_F8DP4 __khwcap2_feature(F8DP4) +#define KERNEL_HWCAP_F8DP2 __khwcap2_feature(F8DP2) +#define KERNEL_HWCAP_F8E4M3 __khwcap2_feature(F8E4M3) +#define KERNEL_HWCAP_F8E5M2 __khwcap2_feature(F8E5M2) +#define KERNEL_HWCAP_SME_LUTV2 __khwcap2_feature(SME_LUTV2) +#define KERNEL_HWCAP_SME_F8F16 __khwcap2_feature(SME_F8F16) +#define KERNEL_HWCAP_SME_F8F32 __khwcap2_feature(SME_F8F32) +#define KERNEL_HWCAP_SME_SF8FMA __khwcap2_feature(SME_SF8FMA) +#define KERNEL_HWCAP_SME_SF8DP4 __khwcap2_feature(SME_SF8DP4) +#define KERNEL_HWCAP_SME_SF8DP2 __khwcap2_feature(SME_SF8DP2) +#define KERNEL_HWCAP_POE __khwcap2_feature(POE) + +#define __khwcap3_feature(x) (const_ilog2(HWCAP3_ ## x) + 128) +#define KERNEL_HWCAP_MTE_FAR __khwcap3_feature(MTE_FAR) +#define KERNEL_HWCAP_MTE_STORE_ONLY __khwcap3_feature(MTE_STORE_ONLY) /* * This yields a mask that user programs can use to figure out what @@ -126,11 +185,13 @@ */ #define ELF_HWCAP cpu_get_elf_hwcap() #define ELF_HWCAP2 cpu_get_elf_hwcap2() +#define ELF_HWCAP3 cpu_get_elf_hwcap3() #ifdef CONFIG_COMPAT #define COMPAT_ELF_HWCAP (compat_elf_hwcap) #define COMPAT_ELF_HWCAP2 (compat_elf_hwcap2) -extern unsigned int compat_elf_hwcap, compat_elf_hwcap2; +#define COMPAT_ELF_HWCAP3 (compat_elf_hwcap3) +extern unsigned int compat_elf_hwcap, compat_elf_hwcap2, compat_elf_hwcap3; #endif enum { diff --git a/arch/arm64/include/asm/hyperv-tlfs.h b/arch/arm64/include/asm/hyperv-tlfs.h deleted file mode 100644 index bc6c7ac934a1..000000000000 --- a/arch/arm64/include/asm/hyperv-tlfs.h +++ /dev/null @@ -1,78 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ - -/* - * This file contains definitions from the Hyper-V Hypervisor Top-Level - * Functional Specification (TLFS): - * https://docs.microsoft.com/en-us/virtualization/hyper-v-on-windows/reference/tlfs - * - * Copyright (C) 2021, Microsoft, Inc. - * - * Author : Michael Kelley <mikelley@microsoft.com> - */ - -#ifndef _ASM_HYPERV_TLFS_H -#define _ASM_HYPERV_TLFS_H - -#include <linux/types.h> - -/* - * All data structures defined in the TLFS that are shared between Hyper-V - * and a guest VM use Little Endian byte ordering. This matches the default - * byte ordering of Linux running on ARM64, so no special handling is required. - */ - -/* - * These Hyper-V registers provide information equivalent to the CPUID - * instruction on x86/x64. - */ -#define HV_REGISTER_HYPERVISOR_VERSION 0x00000100 /*CPUID 0x40000002 */ -#define HV_REGISTER_FEATURES 0x00000200 /*CPUID 0x40000003 */ -#define HV_REGISTER_ENLIGHTENMENTS 0x00000201 /*CPUID 0x40000004 */ - -/* - * Group C Features. See the asm-generic version of hyperv-tlfs.h - * for a description of Feature Groups. - */ - -/* Crash MSRs available */ -#define HV_FEATURE_GUEST_CRASH_MSR_AVAILABLE BIT(8) - -/* STIMER direct mode is available */ -#define HV_STIMER_DIRECT_MODE_AVAILABLE BIT(13) - -/* - * Synthetic register definitions equivalent to MSRs on x86/x64 - */ -#define HV_REGISTER_CRASH_P0 0x00000210 -#define HV_REGISTER_CRASH_P1 0x00000211 -#define HV_REGISTER_CRASH_P2 0x00000212 -#define HV_REGISTER_CRASH_P3 0x00000213 -#define HV_REGISTER_CRASH_P4 0x00000214 -#define HV_REGISTER_CRASH_CTL 0x00000215 - -#define HV_REGISTER_GUEST_OSID 0x00090002 -#define HV_REGISTER_VP_INDEX 0x00090003 -#define HV_REGISTER_TIME_REF_COUNT 0x00090004 -#define HV_REGISTER_REFERENCE_TSC 0x00090017 - -#define HV_REGISTER_SINT0 0x000A0000 -#define HV_REGISTER_SCONTROL 0x000A0010 -#define HV_REGISTER_SIEFP 0x000A0012 -#define HV_REGISTER_SIMP 0x000A0013 -#define HV_REGISTER_EOM 0x000A0014 - -#define HV_REGISTER_STIMER0_CONFIG 0x000B0000 -#define HV_REGISTER_STIMER0_COUNT 0x000B0001 - -union hv_msi_entry { - u64 as_uint64[2]; - struct { - u64 address; - u32 data; - u32 reserved; - } __packed; -}; - -#include <asm-generic/hyperv-tlfs.h> - -#endif diff --git a/arch/arm64/include/asm/hypervisor.h b/arch/arm64/include/asm/hypervisor.h index 0ae427f352c8..a12fd897c877 100644 --- a/arch/arm64/include/asm/hypervisor.h +++ b/arch/arm64/include/asm/hypervisor.h @@ -6,5 +6,17 @@ void kvm_init_hyp_services(void); bool kvm_arm_hyp_service_available(u32 func_id); +void kvm_arm_target_impl_cpu_init(void); + +#ifdef CONFIG_ARM_PKVM_GUEST +void pkvm_init_hyp_services(void); +#else +static inline void pkvm_init_hyp_services(void) { }; +#endif + +static inline void kvm_arch_init_hyp_services(void) +{ + pkvm_init_hyp_services(); +}; #endif diff --git a/arch/arm64/include/asm/image.h b/arch/arm64/include/asm/image.h index c2b13213c720..c09cf942dc92 100644 --- a/arch/arm64/include/asm/image.h +++ b/arch/arm64/include/asm/image.h @@ -27,7 +27,7 @@ /* * struct arm64_image_header - arm64 kernel image header - * See Documentation/arm64/booting.rst for details + * See Documentation/arch/arm64/booting.rst for details * * @code0: Executable code, or * @mz_header alternatively used for part of MZ header diff --git a/arch/arm64/include/asm/insn.h b/arch/arm64/include/asm/insn.h index 834bff720582..18c7811774d3 100644 --- a/arch/arm64/include/asm/insn.h +++ b/arch/arm64/include/asm/insn.h @@ -13,31 +13,6 @@ #include <asm/insn-def.h> #ifndef __ASSEMBLY__ -/* - * ARM Architecture Reference Manual for ARMv8 Profile-A, Issue A.a - * Section C3.1 "A64 instruction index by encoding": - * AArch64 main encoding table - * Bit position - * 28 27 26 25 Encoding Group - * 0 0 - - Unallocated - * 1 0 0 - Data processing, immediate - * 1 0 1 - Branch, exception generation and system instructions - * - 1 - 0 Loads and stores - * - 1 0 1 Data processing - register - * 0 1 1 1 Data processing - SIMD and floating point - * 1 1 1 1 Data processing - SIMD and floating point - * "-" means "don't care" - */ -enum aarch64_insn_encoding_class { - AARCH64_INSN_CLS_UNKNOWN, /* UNALLOCATED */ - AARCH64_INSN_CLS_SVE, /* SVE instructions */ - AARCH64_INSN_CLS_DP_IMM, /* Data processing - immediate */ - AARCH64_INSN_CLS_DP_REG, /* Data processing - register */ - AARCH64_INSN_CLS_DP_FPSIMD, /* Data processing - SIMD and FP */ - AARCH64_INSN_CLS_LDST, /* Loads and stores */ - AARCH64_INSN_CLS_BR_SYS, /* Branch, exception generation and - * system instructions */ -}; enum aarch64_insn_hint_cr_op { AARCH64_INSN_HINT_NOP = 0x0 << 5, @@ -160,6 +135,12 @@ enum aarch64_insn_special_register { AARCH64_INSN_SPCLREG_SP_EL2 = 0xF210 }; +enum aarch64_insn_system_register { + AARCH64_INSN_SYSREG_TPIDR_EL1 = 0x4684, + AARCH64_INSN_SYSREG_TPIDR_EL2 = 0x6682, + AARCH64_INSN_SYSREG_SP_EL0 = 0x4208, +}; + enum aarch64_insn_variant { AARCH64_INSN_VARIANT_32BIT, AARCH64_INSN_VARIANT_64BIT @@ -207,10 +188,14 @@ enum aarch64_insn_ldst_type { AARCH64_INSN_LDST_STORE_PAIR_PRE_INDEX, AARCH64_INSN_LDST_LOAD_PAIR_POST_INDEX, AARCH64_INSN_LDST_STORE_PAIR_POST_INDEX, + AARCH64_INSN_LDST_LOAD_ACQ, AARCH64_INSN_LDST_LOAD_EX, AARCH64_INSN_LDST_LOAD_ACQ_EX, + AARCH64_INSN_LDST_STORE_REL, AARCH64_INSN_LDST_STORE_EX, AARCH64_INSN_LDST_STORE_REL_EX, + AARCH64_INSN_LDST_SIGNED_LOAD_IMM_OFFSET, + AARCH64_INSN_LDST_SIGNED_LOAD_REG_OFFSET, }; enum aarch64_insn_adsb_type { @@ -326,12 +311,30 @@ static __always_inline u32 aarch64_insn_get_##abbr##_value(void) \ return (val); \ } +/* + * ARM Architecture Reference Manual for ARMv8 Profile-A, Issue A.a + * Section C3.1 "A64 instruction index by encoding": + * AArch64 main encoding table + * Bit position + * 28 27 26 25 Encoding Group + * 0 0 - - Unallocated + * 1 0 0 - Data processing, immediate + * 1 0 1 - Branch, exception generation and system instructions + * - 1 - 0 Loads and stores + * - 1 0 1 Data processing - register + * 0 1 1 1 Data processing - SIMD and floating point + * 1 1 1 1 Data processing - SIMD and floating point + * "-" means "don't care" + */ +__AARCH64_INSN_FUNCS(class_branch_sys, 0x1c000000, 0x14000000) + __AARCH64_INSN_FUNCS(adr, 0x9F000000, 0x10000000) __AARCH64_INSN_FUNCS(adrp, 0x9F000000, 0x90000000) __AARCH64_INSN_FUNCS(prfm, 0x3FC00000, 0x39800000) __AARCH64_INSN_FUNCS(prfm_lit, 0xFF000000, 0xD8000000) __AARCH64_INSN_FUNCS(store_imm, 0x3FC00000, 0x39000000) __AARCH64_INSN_FUNCS(load_imm, 0x3FC00000, 0x39400000) +__AARCH64_INSN_FUNCS(signed_load_imm, 0X3FC00000, 0x39800000) __AARCH64_INSN_FUNCS(store_pre, 0x3FE00C00, 0x38000C00) __AARCH64_INSN_FUNCS(load_pre, 0x3FE00C00, 0x38400C00) __AARCH64_INSN_FUNCS(store_post, 0x3FE00C00, 0x38000400) @@ -345,12 +348,16 @@ __AARCH64_INSN_FUNCS(ldset, 0x3F20FC00, 0x38203000) __AARCH64_INSN_FUNCS(swp, 0x3F20FC00, 0x38208000) __AARCH64_INSN_FUNCS(cas, 0x3FA07C00, 0x08A07C00) __AARCH64_INSN_FUNCS(ldr_reg, 0x3FE0EC00, 0x38606800) +__AARCH64_INSN_FUNCS(signed_ldr_reg, 0X3FE0FC00, 0x38A0E800) __AARCH64_INSN_FUNCS(ldr_imm, 0x3FC00000, 0x39400000) __AARCH64_INSN_FUNCS(ldr_lit, 0xBF000000, 0x18000000) __AARCH64_INSN_FUNCS(ldrsw_lit, 0xFF000000, 0x98000000) __AARCH64_INSN_FUNCS(exclusive, 0x3F800000, 0x08000000) -__AARCH64_INSN_FUNCS(load_ex, 0x3F400000, 0x08400000) -__AARCH64_INSN_FUNCS(store_ex, 0x3F400000, 0x08000000) +__AARCH64_INSN_FUNCS(load_acq, 0x3FDFFC00, 0x08DFFC00) +__AARCH64_INSN_FUNCS(store_rel, 0x3FDFFC00, 0x089FFC00) +__AARCH64_INSN_FUNCS(load_ex, 0x3FC00000, 0x08400000) +__AARCH64_INSN_FUNCS(store_ex, 0x3FC00000, 0x08000000) +__AARCH64_INSN_FUNCS(mops, 0x3B200C00, 0x19000400) __AARCH64_INSN_FUNCS(stp, 0x7FC00000, 0x29000000) __AARCH64_INSN_FUNCS(ldp, 0x7FC00000, 0x29400000) __AARCH64_INSN_FUNCS(stp_post, 0x7FC00000, 0x28800000) @@ -428,61 +435,126 @@ __AARCH64_INSN_FUNCS(sb, 0xFFFFFFFF, 0xD50330FF) __AARCH64_INSN_FUNCS(clrex, 0xFFFFF0FF, 0xD503305F) __AARCH64_INSN_FUNCS(ssbb, 0xFFFFFFFF, 0xD503309F) __AARCH64_INSN_FUNCS(pssbb, 0xFFFFFFFF, 0xD503349F) +__AARCH64_INSN_FUNCS(bti, 0xFFFFFF3F, 0xD503241f) #undef __AARCH64_INSN_FUNCS -bool aarch64_insn_is_steppable_hint(u32 insn); -bool aarch64_insn_is_branch_imm(u32 insn); +static __always_inline bool aarch64_insn_is_steppable_hint(u32 insn) +{ + if (!aarch64_insn_is_hint(insn)) + return false; + + switch (insn & 0xFE0) { + case AARCH64_INSN_HINT_XPACLRI: + case AARCH64_INSN_HINT_PACIA_1716: + case AARCH64_INSN_HINT_PACIB_1716: + case AARCH64_INSN_HINT_PACIAZ: + case AARCH64_INSN_HINT_PACIASP: + case AARCH64_INSN_HINT_PACIBZ: + case AARCH64_INSN_HINT_PACIBSP: + case AARCH64_INSN_HINT_BTI: + case AARCH64_INSN_HINT_BTIC: + case AARCH64_INSN_HINT_BTIJ: + case AARCH64_INSN_HINT_BTIJC: + case AARCH64_INSN_HINT_NOP: + return true; + default: + return false; + } +} + +static __always_inline bool aarch64_insn_is_branch(u32 insn) +{ + /* b, bl, cb*, tb*, ret*, b.cond, br*, blr* */ + + return aarch64_insn_is_b(insn) || + aarch64_insn_is_bl(insn) || + aarch64_insn_is_cbz(insn) || + aarch64_insn_is_cbnz(insn) || + aarch64_insn_is_tbz(insn) || + aarch64_insn_is_tbnz(insn) || + aarch64_insn_is_ret(insn) || + aarch64_insn_is_ret_auth(insn) || + aarch64_insn_is_br(insn) || + aarch64_insn_is_br_auth(insn) || + aarch64_insn_is_blr(insn) || + aarch64_insn_is_blr_auth(insn) || + aarch64_insn_is_bcond(insn); +} + +static __always_inline bool aarch64_insn_is_branch_imm(u32 insn) +{ + return aarch64_insn_is_b(insn) || + aarch64_insn_is_bl(insn) || + aarch64_insn_is_tbz(insn) || + aarch64_insn_is_tbnz(insn) || + aarch64_insn_is_cbz(insn) || + aarch64_insn_is_cbnz(insn) || + aarch64_insn_is_bcond(insn); +} -static inline bool aarch64_insn_is_adr_adrp(u32 insn) +static __always_inline bool aarch64_insn_is_adr_adrp(u32 insn) { - return aarch64_insn_is_adr(insn) || aarch64_insn_is_adrp(insn); + return aarch64_insn_is_adr(insn) || + aarch64_insn_is_adrp(insn); } -static inline bool aarch64_insn_is_dsb(u32 insn) +static __always_inline bool aarch64_insn_is_dsb(u32 insn) { - return aarch64_insn_is_dsb_base(insn) || aarch64_insn_is_dsb_nxs(insn); + return aarch64_insn_is_dsb_base(insn) || + aarch64_insn_is_dsb_nxs(insn); } -static inline bool aarch64_insn_is_barrier(u32 insn) +static __always_inline bool aarch64_insn_is_barrier(u32 insn) { - return aarch64_insn_is_dmb(insn) || aarch64_insn_is_dsb(insn) || - aarch64_insn_is_isb(insn) || aarch64_insn_is_sb(insn) || - aarch64_insn_is_clrex(insn) || aarch64_insn_is_ssbb(insn) || + return aarch64_insn_is_dmb(insn) || + aarch64_insn_is_dsb(insn) || + aarch64_insn_is_isb(insn) || + aarch64_insn_is_sb(insn) || + aarch64_insn_is_clrex(insn) || + aarch64_insn_is_ssbb(insn) || aarch64_insn_is_pssbb(insn); } -static inline bool aarch64_insn_is_store_single(u32 insn) +static __always_inline bool aarch64_insn_is_store_single(u32 insn) { return aarch64_insn_is_store_imm(insn) || aarch64_insn_is_store_pre(insn) || aarch64_insn_is_store_post(insn); } -static inline bool aarch64_insn_is_store_pair(u32 insn) +static __always_inline bool aarch64_insn_is_store_pair(u32 insn) { return aarch64_insn_is_stp(insn) || aarch64_insn_is_stp_pre(insn) || aarch64_insn_is_stp_post(insn); } -static inline bool aarch64_insn_is_load_single(u32 insn) +static __always_inline bool aarch64_insn_is_load_single(u32 insn) { return aarch64_insn_is_load_imm(insn) || aarch64_insn_is_load_pre(insn) || aarch64_insn_is_load_post(insn); } -static inline bool aarch64_insn_is_load_pair(u32 insn) +static __always_inline bool aarch64_insn_is_load_pair(u32 insn) { return aarch64_insn_is_ldp(insn) || aarch64_insn_is_ldp_pre(insn) || aarch64_insn_is_ldp_post(insn); } +static __always_inline bool aarch64_insn_uses_literal(u32 insn) +{ + /* ldr/ldrsw (literal), prfm */ + + return aarch64_insn_is_ldr_lit(insn) || + aarch64_insn_is_ldrsw_lit(insn) || + aarch64_insn_is_adr_adrp(insn) || + aarch64_insn_is_prfm_lit(insn); +} + enum aarch64_insn_encoding_class aarch64_get_insn_class(u32 insn); -bool aarch64_insn_uses_literal(u32 insn); -bool aarch64_insn_is_branch(u32 insn); u64 aarch64_insn_decode_immediate(enum aarch64_insn_imm_type type, u32 insn); u32 aarch64_insn_encode_immediate(enum aarch64_insn_imm_type type, u32 insn, u64 imm); @@ -496,8 +568,23 @@ u32 aarch64_insn_gen_comp_branch_imm(unsigned long pc, unsigned long addr, enum aarch64_insn_branch_type type); u32 aarch64_insn_gen_cond_branch_imm(unsigned long pc, unsigned long addr, enum aarch64_insn_condition cond); -u32 aarch64_insn_gen_hint(enum aarch64_insn_hint_cr_op op); -u32 aarch64_insn_gen_nop(void); + +static __always_inline u32 +aarch64_insn_gen_hint(enum aarch64_insn_hint_cr_op op) +{ + return aarch64_insn_get_hint_value() | op; +} + +static __always_inline u32 aarch64_insn_gen_nop(void) +{ + return aarch64_insn_gen_hint(AARCH64_INSN_HINT_NOP); +} + +static __always_inline bool aarch64_insn_is_nop(u32 insn) +{ + return insn == aarch64_insn_gen_nop(); +} + u32 aarch64_insn_gen_branch_reg(enum aarch64_insn_register reg, enum aarch64_insn_branch_type type); u32 aarch64_insn_gen_load_store_reg(enum aarch64_insn_register reg, @@ -519,6 +606,10 @@ u32 aarch64_insn_gen_load_store_pair(enum aarch64_insn_register reg1, int offset, enum aarch64_insn_variant variant, enum aarch64_insn_ldst_type type); +u32 aarch64_insn_gen_load_acq_store_rel(enum aarch64_insn_register reg, + enum aarch64_insn_register base, + enum aarch64_insn_size_type size, + enum aarch64_insn_ldst_type type); u32 aarch64_insn_gen_load_store_ex(enum aarch64_insn_register reg, enum aarch64_insn_register base, enum aarch64_insn_register state, @@ -580,10 +671,6 @@ u32 aarch64_insn_gen_extr(enum aarch64_insn_variant variant, enum aarch64_insn_register Rn, enum aarch64_insn_register Rd, u8 lsb); -u32 aarch64_insn_gen_prefetch(enum aarch64_insn_register base, - enum aarch64_insn_prfm_type type, - enum aarch64_insn_prfm_target target, - enum aarch64_insn_prfm_policy policy); #ifdef CONFIG_ARM64_LSE_ATOMICS u32 aarch64_insn_gen_atomic_ld_op(enum aarch64_insn_register result, enum aarch64_insn_register address, @@ -619,6 +706,9 @@ u32 aarch64_insn_gen_cas(enum aarch64_insn_register result, } #endif u32 aarch64_insn_gen_dmb(enum aarch64_insn_mb_type type); +u32 aarch64_insn_gen_dsb(enum aarch64_insn_mb_type type); +u32 aarch64_insn_gen_mrs(enum aarch64_insn_register result, + enum aarch64_insn_system_register sysreg); s32 aarch64_get_branch_offset(u32 insn); u32 aarch64_set_branch_offset(u32 insn, s32 offset); diff --git a/arch/arm64/include/asm/io.h b/arch/arm64/include/asm/io.h index 87dd42d74afe..9b96840fb979 100644 --- a/arch/arm64/include/asm/io.h +++ b/arch/arm64/include/asm/io.h @@ -17,36 +17,41 @@ #include <asm/early_ioremap.h> #include <asm/alternative.h> #include <asm/cpufeature.h> +#include <asm/rsi.h> /* * Generic IO read/write. These perform native-endian accesses. */ #define __raw_writeb __raw_writeb -static inline void __raw_writeb(u8 val, volatile void __iomem *addr) +static __always_inline void __raw_writeb(u8 val, volatile void __iomem *addr) { - asm volatile("strb %w0, [%1]" : : "rZ" (val), "r" (addr)); + volatile u8 __iomem *ptr = addr; + asm volatile("strb %w0, %1" : : "rZ" (val), "Qo" (*ptr)); } #define __raw_writew __raw_writew -static inline void __raw_writew(u16 val, volatile void __iomem *addr) +static __always_inline void __raw_writew(u16 val, volatile void __iomem *addr) { - asm volatile("strh %w0, [%1]" : : "rZ" (val), "r" (addr)); + volatile u16 __iomem *ptr = addr; + asm volatile("strh %w0, %1" : : "rZ" (val), "Qo" (*ptr)); } #define __raw_writel __raw_writel static __always_inline void __raw_writel(u32 val, volatile void __iomem *addr) { - asm volatile("str %w0, [%1]" : : "rZ" (val), "r" (addr)); + volatile u32 __iomem *ptr = addr; + asm volatile("str %w0, %1" : : "rZ" (val), "Qo" (*ptr)); } #define __raw_writeq __raw_writeq -static inline void __raw_writeq(u64 val, volatile void __iomem *addr) +static __always_inline void __raw_writeq(u64 val, volatile void __iomem *addr) { - asm volatile("str %x0, [%1]" : : "rZ" (val), "r" (addr)); + volatile u64 __iomem *ptr = addr; + asm volatile("str %x0, %1" : : "rZ" (val), "Qo" (*ptr)); } #define __raw_readb __raw_readb -static inline u8 __raw_readb(const volatile void __iomem *addr) +static __always_inline u8 __raw_readb(const volatile void __iomem *addr) { u8 val; asm volatile(ALTERNATIVE("ldrb %w0, [%1]", @@ -57,7 +62,7 @@ static inline u8 __raw_readb(const volatile void __iomem *addr) } #define __raw_readw __raw_readw -static inline u16 __raw_readw(const volatile void __iomem *addr) +static __always_inline u16 __raw_readw(const volatile void __iomem *addr) { u16 val; @@ -80,7 +85,7 @@ static __always_inline u32 __raw_readl(const volatile void __iomem *addr) } #define __raw_readq __raw_readq -static inline u64 __raw_readq(const volatile void __iomem *addr) +static __always_inline u64 __raw_readq(const volatile void __iomem *addr) { u64 val; asm volatile(ALTERNATIVE("ldr %0, [%1]", @@ -91,7 +96,7 @@ static inline u64 __raw_readq(const volatile void __iomem *addr) } /* IO barriers */ -#define __iormb(v) \ +#define __io_ar(v) \ ({ \ unsigned long tmp; \ \ @@ -108,39 +113,14 @@ static inline u64 __raw_readq(const volatile void __iomem *addr) : "memory"); \ }) -#define __io_par(v) __iormb(v) -#define __iowmb() dma_wmb() -#define __iomb() dma_mb() - -/* - * Relaxed I/O memory access primitives. These follow the Device memory - * ordering rules but do not guarantee any ordering relative to Normal memory - * accesses. - */ -#define readb_relaxed(c) ({ u8 __r = __raw_readb(c); __r; }) -#define readw_relaxed(c) ({ u16 __r = le16_to_cpu((__force __le16)__raw_readw(c)); __r; }) -#define readl_relaxed(c) ({ u32 __r = le32_to_cpu((__force __le32)__raw_readl(c)); __r; }) -#define readq_relaxed(c) ({ u64 __r = le64_to_cpu((__force __le64)__raw_readq(c)); __r; }) - -#define writeb_relaxed(v,c) ((void)__raw_writeb((v),(c))) -#define writew_relaxed(v,c) ((void)__raw_writew((__force u16)cpu_to_le16(v),(c))) -#define writel_relaxed(v,c) ((void)__raw_writel((__force u32)cpu_to_le32(v),(c))) -#define writeq_relaxed(v,c) ((void)__raw_writeq((__force u64)cpu_to_le64(v),(c))) - -/* - * I/O memory access primitives. Reads are ordered relative to any - * following Normal memory access. Writes are ordered relative to any prior - * Normal memory access. - */ -#define readb(c) ({ u8 __v = readb_relaxed(c); __iormb(__v); __v; }) -#define readw(c) ({ u16 __v = readw_relaxed(c); __iormb(__v); __v; }) -#define readl(c) ({ u32 __v = readl_relaxed(c); __iormb(__v); __v; }) -#define readq(c) ({ u64 __v = readq_relaxed(c); __iormb(__v); __v; }) +#define __io_bw() dma_wmb() +#define __io_br(v) +#define __io_aw(v) -#define writeb(v,c) ({ __iowmb(); writeb_relaxed((v),(c)); }) -#define writew(v,c) ({ __iowmb(); writew_relaxed((v),(c)); }) -#define writel(v,c) ({ __iowmb(); writel_relaxed((v),(c)); }) -#define writeq(v,c) ({ __iowmb(); writeq_relaxed((v),(c)); }) +/* arm64-specific, don't use in portable drivers */ +#define __iormb(v) __io_ar(v) +#define __iowmb() __io_bw() +#define __iomb() dma_mb() /* * I/O port access primitives. @@ -150,29 +130,149 @@ static inline u64 __raw_readq(const volatile void __iomem *addr) #define PCI_IOBASE ((void __iomem *)PCI_IO_START) /* - * String version of I/O memory access operations. + * The ARM64 iowrite implementation is intended to support drivers that want to + * use write combining. For instance PCI drivers using write combining with a 64 + * byte __iowrite64_copy() expect to get a 64 byte MemWr TLP on the PCIe bus. + * + * Newer ARM core have sensitive write combining buffers, it is important that + * the stores be contiguous blocks of store instructions. Normal memcpy + * approaches have a very low chance to generate write combining. + * + * Since this is the only API on ARM64 that should be used with write combining + * it also integrates the DGH hint which is supposed to lower the latency to + * emit the large TLP from the CPU. */ -extern void __memcpy_fromio(void *, const volatile void __iomem *, size_t); -extern void __memcpy_toio(volatile void __iomem *, const void *, size_t); -extern void __memset_io(volatile void __iomem *, int, size_t); -#define memset_io(c,v,l) __memset_io((c),(v),(l)) -#define memcpy_fromio(a,c,l) __memcpy_fromio((a),(c),(l)) -#define memcpy_toio(c,a,l) __memcpy_toio((c),(a),(l)) +static __always_inline void +__const_memcpy_toio_aligned32(volatile u32 __iomem *to, const u32 *from, + size_t count) +{ + switch (count) { + case 8: + asm volatile("str %w0, [%8, #4 * 0]\n" + "str %w1, [%8, #4 * 1]\n" + "str %w2, [%8, #4 * 2]\n" + "str %w3, [%8, #4 * 3]\n" + "str %w4, [%8, #4 * 4]\n" + "str %w5, [%8, #4 * 5]\n" + "str %w6, [%8, #4 * 6]\n" + "str %w7, [%8, #4 * 7]\n" + : + : "rZ"(from[0]), "rZ"(from[1]), "rZ"(from[2]), + "rZ"(from[3]), "rZ"(from[4]), "rZ"(from[5]), + "rZ"(from[6]), "rZ"(from[7]), "r"(to)); + break; + case 4: + asm volatile("str %w0, [%4, #4 * 0]\n" + "str %w1, [%4, #4 * 1]\n" + "str %w2, [%4, #4 * 2]\n" + "str %w3, [%4, #4 * 3]\n" + : + : "rZ"(from[0]), "rZ"(from[1]), "rZ"(from[2]), + "rZ"(from[3]), "r"(to)); + break; + case 2: + asm volatile("str %w0, [%2, #4 * 0]\n" + "str %w1, [%2, #4 * 1]\n" + : + : "rZ"(from[0]), "rZ"(from[1]), "r"(to)); + break; + case 1: + __raw_writel(*from, to); + break; + default: + BUILD_BUG(); + } +} + +void __iowrite32_copy_full(void __iomem *to, const void *from, size_t count); + +static __always_inline void +__iowrite32_copy(void __iomem *to, const void *from, size_t count) +{ + if (__builtin_constant_p(count) && + (count == 8 || count == 4 || count == 2 || count == 1)) { + __const_memcpy_toio_aligned32(to, from, count); + dgh(); + } else { + __iowrite32_copy_full(to, from, count); + } +} +#define __iowrite32_copy __iowrite32_copy + +static __always_inline void +__const_memcpy_toio_aligned64(volatile u64 __iomem *to, const u64 *from, + size_t count) +{ + switch (count) { + case 8: + asm volatile("str %x0, [%8, #8 * 0]\n" + "str %x1, [%8, #8 * 1]\n" + "str %x2, [%8, #8 * 2]\n" + "str %x3, [%8, #8 * 3]\n" + "str %x4, [%8, #8 * 4]\n" + "str %x5, [%8, #8 * 5]\n" + "str %x6, [%8, #8 * 6]\n" + "str %x7, [%8, #8 * 7]\n" + : + : "rZ"(from[0]), "rZ"(from[1]), "rZ"(from[2]), + "rZ"(from[3]), "rZ"(from[4]), "rZ"(from[5]), + "rZ"(from[6]), "rZ"(from[7]), "r"(to)); + break; + case 4: + asm volatile("str %x0, [%4, #8 * 0]\n" + "str %x1, [%4, #8 * 1]\n" + "str %x2, [%4, #8 * 2]\n" + "str %x3, [%4, #8 * 3]\n" + : + : "rZ"(from[0]), "rZ"(from[1]), "rZ"(from[2]), + "rZ"(from[3]), "r"(to)); + break; + case 2: + asm volatile("str %x0, [%2, #8 * 0]\n" + "str %x1, [%2, #8 * 1]\n" + : + : "rZ"(from[0]), "rZ"(from[1]), "r"(to)); + break; + case 1: + __raw_writeq(*from, to); + break; + default: + BUILD_BUG(); + } +} + +void __iowrite64_copy_full(void __iomem *to, const void *from, size_t count); + +static __always_inline void +__iowrite64_copy(void __iomem *to, const void *from, size_t count) +{ + if (__builtin_constant_p(count) && + (count == 8 || count == 4 || count == 2 || count == 1)) { + __const_memcpy_toio_aligned64(to, from, count); + dgh(); + } else { + __iowrite64_copy_full(to, from, count); + } +} +#define __iowrite64_copy __iowrite64_copy /* * I/O memory mapping functions. */ -bool ioremap_allowed(phys_addr_t phys_addr, size_t size, unsigned long prot); -#define ioremap_allowed ioremap_allowed +typedef int (*ioremap_prot_hook_t)(phys_addr_t phys_addr, size_t size, + pgprot_t *prot); +int arm64_ioremap_prot_hook_register(const ioremap_prot_hook_t hook); + +#define ioremap_prot ioremap_prot #define _PAGE_IOREMAP PROT_DEVICE_nGnRE #define ioremap_wc(addr, size) \ - ioremap_prot((addr), (size), PROT_NORMAL_NC) + ioremap_prot((addr), (size), __pgprot(PROT_NORMAL_NC)) #define ioremap_np(addr, size) \ - ioremap_prot((addr), (size), PROT_DEVICE_nGnRnE) + ioremap_prot((addr), (size), __pgprot(PROT_DEVICE_nGnRnE)) /* * io{read,write}{16,32,64}be() macros @@ -193,7 +293,7 @@ static inline void __iomem *ioremap_cache(phys_addr_t addr, size_t size) if (pfn_is_map_memory(__phys_to_pfn(addr))) return (void __iomem *)__phys_to_virt(addr); - return ioremap_prot(addr, size, PROT_NORMAL); + return ioremap_prot(addr, size, __pgprot(PROT_NORMAL)); } /* @@ -208,4 +308,11 @@ extern bool arch_memremap_can_ram_remap(resource_size_t offset, size_t size, unsigned long flags); #define arch_memremap_can_ram_remap arch_memremap_can_ram_remap +static inline bool arm64_is_protected_mmio(phys_addr_t phys_addr, size_t size) +{ + if (unlikely(is_realm_world())) + return __arm64_is_protected_mmio(phys_addr, size); + return false; +} + #endif /* __ASM_IO_H */ diff --git a/arch/arm64/include/asm/irq.h b/arch/arm64/include/asm/irq.h index fac08e18bcd5..e93548914c36 100644 --- a/arch/arm64/include/asm/irq.h +++ b/arch/arm64/include/asm/irq.h @@ -4,8 +4,13 @@ #ifndef __ASSEMBLER__ +#include <linux/cpumask.h> + #include <asm-generic/irq.h> +void arch_trigger_cpumask_backtrace(const cpumask_t *mask, int exclude_cpu); +#define arch_trigger_cpumask_backtrace arch_trigger_cpumask_backtrace + struct pt_regs; int set_handle_irq(void (*handle_irq)(struct pt_regs *)); diff --git a/arch/arm64/include/asm/irq_work.h b/arch/arm64/include/asm/irq_work.h index 81bbfa3a035b..a1020285ea75 100644 --- a/arch/arm64/include/asm/irq_work.h +++ b/arch/arm64/include/asm/irq_work.h @@ -2,8 +2,6 @@ #ifndef __ASM_IRQ_WORK_H #define __ASM_IRQ_WORK_H -extern void arch_irq_work_raise(void); - static inline bool arch_irq_work_has_interrupt(void) { return true; diff --git a/arch/arm64/include/asm/irqflags.h b/arch/arm64/include/asm/irqflags.h index b57b9b1e4344..d4d7451c2c12 100644 --- a/arch/arm64/include/asm/irqflags.h +++ b/arch/arm64/include/asm/irqflags.h @@ -5,7 +5,6 @@ #ifndef __ASM_IRQFLAGS_H #define __ASM_IRQFLAGS_H -#include <asm/alternative.h> #include <asm/barrier.h> #include <asm/ptrace.h> #include <asm/sysreg.h> @@ -21,43 +20,71 @@ * exceptions should be unmasked. */ -/* - * CPU interrupt mask handling. - */ -static inline void arch_local_irq_enable(void) +static __always_inline void __daif_local_irq_enable(void) { - if (system_has_prio_mask_debugging()) { - u32 pmr = read_sysreg_s(SYS_ICC_PMR_EL1); + barrier(); + asm volatile("msr daifclr, #3"); + barrier(); +} +static __always_inline void __pmr_local_irq_enable(void) +{ + if (IS_ENABLED(CONFIG_ARM64_DEBUG_PRIORITY_MASKING)) { + u32 pmr = read_sysreg_s(SYS_ICC_PMR_EL1); WARN_ON_ONCE(pmr != GIC_PRIO_IRQON && pmr != GIC_PRIO_IRQOFF); } - asm volatile(ALTERNATIVE( - "msr daifclr, #3 // arch_local_irq_enable", - __msr_s(SYS_ICC_PMR_EL1, "%0"), - ARM64_HAS_IRQ_PRIO_MASKING) - : - : "r" ((unsigned long) GIC_PRIO_IRQON) - : "memory"); - + barrier(); + write_sysreg_s(GIC_PRIO_IRQON, SYS_ICC_PMR_EL1); pmr_sync(); + barrier(); } -static inline void arch_local_irq_disable(void) +static inline void arch_local_irq_enable(void) { - if (system_has_prio_mask_debugging()) { - u32 pmr = read_sysreg_s(SYS_ICC_PMR_EL1); + if (system_uses_irq_prio_masking()) { + __pmr_local_irq_enable(); + } else { + __daif_local_irq_enable(); + } +} + +static __always_inline void __daif_local_irq_disable(void) +{ + barrier(); + asm volatile("msr daifset, #3"); + barrier(); +} +static __always_inline void __pmr_local_irq_disable(void) +{ + if (IS_ENABLED(CONFIG_ARM64_DEBUG_PRIORITY_MASKING)) { + u32 pmr = read_sysreg_s(SYS_ICC_PMR_EL1); WARN_ON_ONCE(pmr != GIC_PRIO_IRQON && pmr != GIC_PRIO_IRQOFF); } - asm volatile(ALTERNATIVE( - "msr daifset, #3 // arch_local_irq_disable", - __msr_s(SYS_ICC_PMR_EL1, "%0"), - ARM64_HAS_IRQ_PRIO_MASKING) - : - : "r" ((unsigned long) GIC_PRIO_IRQOFF) - : "memory"); + barrier(); + write_sysreg_s(GIC_PRIO_IRQOFF, SYS_ICC_PMR_EL1); + barrier(); +} + +static inline void arch_local_irq_disable(void) +{ + if (system_uses_irq_prio_masking()) { + __pmr_local_irq_disable(); + } else { + __daif_local_irq_disable(); + } +} + +static __always_inline unsigned long __daif_local_save_flags(void) +{ + return read_sysreg(daif); +} + +static __always_inline unsigned long __pmr_local_save_flags(void) +{ + return read_sysreg_s(SYS_ICC_PMR_EL1); } /* @@ -65,69 +92,108 @@ static inline void arch_local_irq_disable(void) */ static inline unsigned long arch_local_save_flags(void) { - unsigned long flags; + if (system_uses_irq_prio_masking()) { + return __pmr_local_save_flags(); + } else { + return __daif_local_save_flags(); + } +} - asm volatile(ALTERNATIVE( - "mrs %0, daif", - __mrs_s("%0", SYS_ICC_PMR_EL1), - ARM64_HAS_IRQ_PRIO_MASKING) - : "=&r" (flags) - : - : "memory"); +static __always_inline bool __daif_irqs_disabled_flags(unsigned long flags) +{ + return flags & PSR_I_BIT; +} - return flags; +static __always_inline bool __pmr_irqs_disabled_flags(unsigned long flags) +{ + return flags != GIC_PRIO_IRQON; } -static inline int arch_irqs_disabled_flags(unsigned long flags) +static inline bool arch_irqs_disabled_flags(unsigned long flags) { - int res; + if (system_uses_irq_prio_masking()) { + return __pmr_irqs_disabled_flags(flags); + } else { + return __daif_irqs_disabled_flags(flags); + } +} - asm volatile(ALTERNATIVE( - "and %w0, %w1, #" __stringify(PSR_I_BIT), - "eor %w0, %w1, #" __stringify(GIC_PRIO_IRQON), - ARM64_HAS_IRQ_PRIO_MASKING) - : "=&r" (res) - : "r" ((int) flags) - : "memory"); +static __always_inline bool __daif_irqs_disabled(void) +{ + return __daif_irqs_disabled_flags(__daif_local_save_flags()); +} - return res; +static __always_inline bool __pmr_irqs_disabled(void) +{ + return __pmr_irqs_disabled_flags(__pmr_local_save_flags()); } -static inline int arch_irqs_disabled(void) +static inline bool arch_irqs_disabled(void) { - return arch_irqs_disabled_flags(arch_local_save_flags()); + if (system_uses_irq_prio_masking()) { + return __pmr_irqs_disabled(); + } else { + return __daif_irqs_disabled(); + } } -static inline unsigned long arch_local_irq_save(void) +static __always_inline unsigned long __daif_local_irq_save(void) { - unsigned long flags; + unsigned long flags = __daif_local_save_flags(); + + __daif_local_irq_disable(); - flags = arch_local_save_flags(); + return flags; +} + +static __always_inline unsigned long __pmr_local_irq_save(void) +{ + unsigned long flags = __pmr_local_save_flags(); /* * There are too many states with IRQs disabled, just keep the current * state if interrupts are already disabled/masked. */ - if (!arch_irqs_disabled_flags(flags)) - arch_local_irq_disable(); + if (!__pmr_irqs_disabled_flags(flags)) + __pmr_local_irq_disable(); return flags; } +static inline unsigned long arch_local_irq_save(void) +{ + if (system_uses_irq_prio_masking()) { + return __pmr_local_irq_save(); + } else { + return __daif_local_irq_save(); + } +} + +static __always_inline void __daif_local_irq_restore(unsigned long flags) +{ + barrier(); + write_sysreg(flags, daif); + barrier(); +} + +static __always_inline void __pmr_local_irq_restore(unsigned long flags) +{ + barrier(); + write_sysreg_s(flags, SYS_ICC_PMR_EL1); + pmr_sync(); + barrier(); +} + /* * restore saved IRQ state */ static inline void arch_local_irq_restore(unsigned long flags) { - asm volatile(ALTERNATIVE( - "msr daif, %0", - __msr_s(SYS_ICC_PMR_EL1, "%0"), - ARM64_HAS_IRQ_PRIO_MASKING) - : - : "r" (flags) - : "memory"); - - pmr_sync(); + if (system_uses_irq_prio_masking()) { + __pmr_local_irq_restore(flags); + } else { + __daif_local_irq_restore(flags); + } } #endif /* __ASM_IRQFLAGS_H */ diff --git a/arch/arm64/include/asm/jump_label.h b/arch/arm64/include/asm/jump_label.h index cea441b6aa5d..424ed421cd97 100644 --- a/arch/arm64/include/asm/jump_label.h +++ b/arch/arm64/include/asm/jump_label.h @@ -13,37 +13,46 @@ #include <linux/types.h> #include <asm/insn.h> +#define HAVE_JUMP_LABEL_BATCH #define JUMP_LABEL_NOP_SIZE AARCH64_INSN_SIZE -static __always_inline bool arch_static_branch(struct static_key *key, - bool branch) +#define JUMP_TABLE_ENTRY(key, label) \ + ".pushsection __jump_table, \"aw\"\n\t" \ + ".align 3\n\t" \ + ".long 1b - ., " label " - .\n\t" \ + ".quad " key " - .\n\t" \ + ".popsection\n\t" + +/* This macro is also expanded on the Rust side. */ +#define ARCH_STATIC_BRANCH_ASM(key, label) \ + "1: nop\n\t" \ + JUMP_TABLE_ENTRY(key, label) + +static __always_inline bool arch_static_branch(struct static_key * const key, + const bool branch) { - asm_volatile_goto( - "1: nop \n\t" - " .pushsection __jump_table, \"aw\" \n\t" - " .align 3 \n\t" - " .long 1b - ., %l[l_yes] - . \n\t" - " .quad %c0 - . \n\t" - " .popsection \n\t" - : : "i"(&((char *)key)[branch]) : : l_yes); + char *k = &((char *)key)[branch]; + + asm goto( + ARCH_STATIC_BRANCH_ASM("%c0", "%l[l_yes]") + : : "i"(k) : : l_yes + ); return false; l_yes: return true; } -static __always_inline bool arch_static_branch_jump(struct static_key *key, - bool branch) +static __always_inline bool arch_static_branch_jump(struct static_key * const key, + const bool branch) { - asm_volatile_goto( - "1: b %l[l_yes] \n\t" - " .pushsection __jump_table, \"aw\" \n\t" - " .align 3 \n\t" - " .long 1b - ., %l[l_yes] - . \n\t" - " .quad %c0 - . \n\t" - " .popsection \n\t" - : : "i"(&((char *)key)[branch]) : : l_yes); + char *k = &((char *)key)[branch]; + asm goto( + "1: b %l[l_yes] \n\t" + JUMP_TABLE_ENTRY("%c0", "%l[l_yes]") + : : "i"(k) : : l_yes + ); return false; l_yes: return true; diff --git a/arch/arm64/include/asm/kasan.h b/arch/arm64/include/asm/kasan.h index 12d5f47f7dbe..e1b57c13f8a4 100644 --- a/arch/arm64/include/asm/kasan.h +++ b/arch/arm64/include/asm/kasan.h @@ -15,33 +15,11 @@ #if defined(CONFIG_KASAN_GENERIC) || defined(CONFIG_KASAN_SW_TAGS) -void kasan_init(void); - -/* - * KASAN_SHADOW_START: beginning of the kernel virtual addresses. - * KASAN_SHADOW_END: KASAN_SHADOW_START + 1/N of kernel virtual addresses, - * where N = (1 << KASAN_SHADOW_SCALE_SHIFT). - * - * KASAN_SHADOW_OFFSET: - * This value is used to map an address to the corresponding shadow - * address by the following formula: - * shadow_addr = (address >> KASAN_SHADOW_SCALE_SHIFT) + KASAN_SHADOW_OFFSET - * - * (1 << (64 - KASAN_SHADOW_SCALE_SHIFT)) shadow addresses that lie in range - * [KASAN_SHADOW_OFFSET, KASAN_SHADOW_END) cover all 64-bits of virtual - * addresses. So KASAN_SHADOW_OFFSET should satisfy the following equation: - * KASAN_SHADOW_OFFSET = KASAN_SHADOW_END - - * (1ULL << (64 - KASAN_SHADOW_SCALE_SHIFT)) - */ -#define _KASAN_SHADOW_START(va) (KASAN_SHADOW_END - (1UL << ((va) - KASAN_SHADOW_SCALE_SHIFT))) -#define KASAN_SHADOW_START _KASAN_SHADOW_START(vabits_actual) - -void kasan_copy_shadow(pgd_t *pgdir); asmlinkage void kasan_early_init(void); +void kasan_init(void); #else static inline void kasan_init(void) { } -static inline void kasan_copy_shadow(pgd_t *pgdir) { } #endif #endif diff --git a/arch/arm64/include/asm/kernel-pgtable.h b/arch/arm64/include/asm/kernel-pgtable.h index 02e59fa8f293..74a4f738c5f5 100644 --- a/arch/arm64/include/asm/kernel-pgtable.h +++ b/arch/arm64/include/asm/kernel-pgtable.h @@ -13,138 +13,74 @@ #include <asm/sparsemem.h> /* - * The linear mapping and the start of memory are both 2M aligned (per - * the arm64 booting.txt requirements). Hence we can use section mapping - * with 4K (section size = 2M) but not with 16K (section size = 32M) or - * 64K (section size = 512M). + * The physical and virtual addresses of the start of the kernel image are + * equal modulo 2 MiB (per the arm64 booting.txt requirements). Hence we can + * use section mapping with 4K (section size = 2M) but not with 16K (section + * size = 32M) or 64K (section size = 512M). */ -#ifdef CONFIG_ARM64_4K_PAGES -#define ARM64_KERNEL_USES_PMD_MAPS 1 +#if defined(PMD_SIZE) && PMD_SIZE <= MIN_KIMG_ALIGN +#define SWAPPER_BLOCK_SHIFT PMD_SHIFT +#define SWAPPER_SKIP_LEVEL 1 #else -#define ARM64_KERNEL_USES_PMD_MAPS 0 +#define SWAPPER_BLOCK_SHIFT PAGE_SHIFT +#define SWAPPER_SKIP_LEVEL 0 #endif +#define SWAPPER_BLOCK_SIZE (UL(1) << SWAPPER_BLOCK_SHIFT) -/* - * The idmap and swapper page tables need some space reserved in the kernel - * image. Both require pgd, pud (4 levels only) and pmd tables to (section) - * map the kernel. With the 64K page configuration, swapper and idmap need to - * map to pte level. The swapper also maps the FDT (see __create_page_tables - * for more information). Note that the number of ID map translation levels - * could be increased on the fly if system RAM is out of reach for the default - * VA range, so pages required to map highest possible PA are reserved in all - * cases. - */ -#if ARM64_KERNEL_USES_PMD_MAPS -#define SWAPPER_PGTABLE_LEVELS (CONFIG_PGTABLE_LEVELS - 1) -#else -#define SWAPPER_PGTABLE_LEVELS (CONFIG_PGTABLE_LEVELS) -#endif +#define SWAPPER_PGTABLE_LEVELS (CONFIG_PGTABLE_LEVELS - SWAPPER_SKIP_LEVEL) +#define INIT_IDMAP_PGTABLE_LEVELS (IDMAP_LEVELS - SWAPPER_SKIP_LEVEL) +#define IDMAP_VA_BITS 48 +#define IDMAP_LEVELS ARM64_HW_PGTABLE_LEVELS(IDMAP_VA_BITS) +#define IDMAP_ROOT_LEVEL (4 - IDMAP_LEVELS) /* - * If KASLR is enabled, then an offset K is added to the kernel address - * space. The bottom 21 bits of this offset are zero to guarantee 2MB - * alignment for PA and VA. - * - * For each pagetable level of the swapper, we know that the shift will - * be larger than 21 (for the 4KB granule case we use section maps thus - * the smallest shift is actually 30) thus there is the possibility that - * KASLR can increase the number of pagetable entries by 1, so we make - * room for this extra entry. - * - * Note KASLR cannot increase the number of required entries for a level - * by more than one because it increments both the virtual start and end - * addresses equally (the extra entry comes from the case where the end - * address is just pushed over a boundary and the start address isn't). + * A relocatable kernel may execute from an address that differs from the one at + * which it was linked. In the worst case, its runtime placement may intersect + * with two adjacent PGDIR entries, which means that an additional page table + * may be needed at each subordinate level. */ +#define EXTRA_PAGE __is_defined(CONFIG_RELOCATABLE) -#ifdef CONFIG_RANDOMIZE_BASE -#define EARLY_KASLR (1) -#else -#define EARLY_KASLR (0) -#endif - -#define EARLY_ENTRIES(vstart, vend, shift) \ - ((((vend) - 1) >> (shift)) - ((vstart) >> (shift)) + 1 + EARLY_KASLR) - -#define EARLY_PGDS(vstart, vend) (EARLY_ENTRIES(vstart, vend, PGDIR_SHIFT)) - -#if SWAPPER_PGTABLE_LEVELS > 3 -#define EARLY_PUDS(vstart, vend) (EARLY_ENTRIES(vstart, vend, PUD_SHIFT)) -#else -#define EARLY_PUDS(vstart, vend) (0) -#endif - -#if SWAPPER_PGTABLE_LEVELS > 2 -#define EARLY_PMDS(vstart, vend) (EARLY_ENTRIES(vstart, vend, SWAPPER_TABLE_SHIFT)) -#else -#define EARLY_PMDS(vstart, vend) (0) -#endif +#define SPAN_NR_ENTRIES(vstart, vend, shift) \ + ((((vend) - 1) >> (shift)) - ((vstart) >> (shift)) + 1) -#define EARLY_PAGES(vstart, vend) ( 1 /* PGDIR page */ \ - + EARLY_PGDS((vstart), (vend)) /* each PGDIR needs a next level page table */ \ - + EARLY_PUDS((vstart), (vend)) /* each PUD needs a next level page table */ \ - + EARLY_PMDS((vstart), (vend))) /* each PMD needs a next level page table */ -#define INIT_DIR_SIZE (PAGE_SIZE * EARLY_PAGES(KIMAGE_VADDR, _end)) +#define EARLY_ENTRIES(lvl, vstart, vend) \ + SPAN_NR_ENTRIES(vstart, vend, SWAPPER_BLOCK_SHIFT + lvl * PTDESC_TABLE_SHIFT) -/* the initial ID map may need two extra pages if it needs to be extended */ -#if VA_BITS < 48 -#define INIT_IDMAP_DIR_SIZE ((INIT_IDMAP_DIR_PAGES + 2) * PAGE_SIZE) -#else -#define INIT_IDMAP_DIR_SIZE (INIT_IDMAP_DIR_PAGES * PAGE_SIZE) -#endif -#define INIT_IDMAP_DIR_PAGES EARLY_PAGES(KIMAGE_VADDR, _end + MAX_FDT_SIZE + SWAPPER_BLOCK_SIZE) +#define EARLY_LEVEL(lvl, lvls, vstart, vend, add) \ + ((lvls) > (lvl) ? EARLY_ENTRIES(lvl, vstart, vend) + (add) : 0) -/* Initial memory map size */ -#if ARM64_KERNEL_USES_PMD_MAPS -#define SWAPPER_BLOCK_SHIFT PMD_SHIFT -#define SWAPPER_BLOCK_SIZE PMD_SIZE -#define SWAPPER_TABLE_SHIFT PUD_SHIFT -#else -#define SWAPPER_BLOCK_SHIFT PAGE_SHIFT -#define SWAPPER_BLOCK_SIZE PAGE_SIZE -#define SWAPPER_TABLE_SHIFT PMD_SHIFT -#endif +#define EARLY_PAGES(lvls, vstart, vend, add) (1 /* PGDIR page */ \ + + EARLY_LEVEL(3, (lvls), (vstart), (vend), add) /* each entry needs a next level page table */ \ + + EARLY_LEVEL(2, (lvls), (vstart), (vend), add) /* each entry needs a next level page table */ \ + + EARLY_LEVEL(1, (lvls), (vstart), (vend), add))/* each entry needs a next level page table */ +#define INIT_DIR_SIZE (PAGE_SIZE * (EARLY_PAGES(SWAPPER_PGTABLE_LEVELS, KIMAGE_VADDR, _end, EXTRA_PAGE) \ + + EARLY_SEGMENT_EXTRA_PAGES)) -/* - * Initial memory map attributes. - */ -#define SWAPPER_PTE_FLAGS (PTE_TYPE_PAGE | PTE_AF | PTE_SHARED) -#define SWAPPER_PMD_FLAGS (PMD_TYPE_SECT | PMD_SECT_AF | PMD_SECT_S) +#define INIT_IDMAP_DIR_PAGES (EARLY_PAGES(INIT_IDMAP_PGTABLE_LEVELS, KIMAGE_VADDR, kimage_limit, 1)) +#define INIT_IDMAP_DIR_SIZE ((INIT_IDMAP_DIR_PAGES + EARLY_IDMAP_EXTRA_PAGES) * PAGE_SIZE) -#if ARM64_KERNEL_USES_PMD_MAPS -#define SWAPPER_RW_MMUFLAGS (PMD_ATTRINDX(MT_NORMAL) | SWAPPER_PMD_FLAGS) -#define SWAPPER_RX_MMUFLAGS (SWAPPER_RW_MMUFLAGS | PMD_SECT_RDONLY) -#else -#define SWAPPER_RW_MMUFLAGS (PTE_ATTRINDX(MT_NORMAL) | SWAPPER_PTE_FLAGS) -#define SWAPPER_RX_MMUFLAGS (SWAPPER_RW_MMUFLAGS | PTE_RDONLY) -#endif +#define INIT_IDMAP_FDT_PAGES (EARLY_PAGES(INIT_IDMAP_PGTABLE_LEVELS, 0UL, UL(MAX_FDT_SIZE), 1) - 1) +#define INIT_IDMAP_FDT_SIZE ((INIT_IDMAP_FDT_PAGES + EARLY_IDMAP_EXTRA_FDT_PAGES) * PAGE_SIZE) -/* - * To make optimal use of block mappings when laying out the linear - * mapping, round down the base of physical memory to a size that can - * be mapped efficiently, i.e., either PUD_SIZE (4k granule) or PMD_SIZE - * (64k granule), or a multiple that can be mapped using contiguous bits - * in the page tables: 32 * PMD_SIZE (16k granule) - */ -#if defined(CONFIG_ARM64_4K_PAGES) -#define ARM64_MEMSTART_SHIFT PUD_SHIFT -#elif defined(CONFIG_ARM64_16K_PAGES) -#define ARM64_MEMSTART_SHIFT CONT_PMD_SHIFT -#else -#define ARM64_MEMSTART_SHIFT PMD_SHIFT -#endif +/* The number of segments in the kernel image (text, rodata, inittext, initdata, data+bss) */ +#define KERNEL_SEGMENT_COUNT 5 +#if SWAPPER_BLOCK_SIZE > SEGMENT_ALIGN +#define EARLY_SEGMENT_EXTRA_PAGES (KERNEL_SEGMENT_COUNT + 1) /* - * sparsemem vmemmap imposes an additional requirement on the alignment of - * memstart_addr, due to the fact that the base of the vmemmap region - * has a direct correspondence, and needs to appear sufficiently aligned - * in the virtual address space. + * The initial ID map consists of the kernel image, mapped as two separate + * segments, and may appear misaligned wrt the swapper block size. This means + * we need 3 additional pages. The DT could straddle a swapper block boundary, + * so it may need 2. */ -#if ARM64_MEMSTART_SHIFT < SECTION_SIZE_BITS -#define ARM64_MEMSTART_ALIGN (1UL << SECTION_SIZE_BITS) +#define EARLY_IDMAP_EXTRA_PAGES 3 +#define EARLY_IDMAP_EXTRA_FDT_PAGES 2 #else -#define ARM64_MEMSTART_ALIGN (1UL << ARM64_MEMSTART_SHIFT) +#define EARLY_SEGMENT_EXTRA_PAGES 0 +#define EARLY_IDMAP_EXTRA_PAGES 0 +#define EARLY_IDMAP_EXTRA_FDT_PAGES 0 #endif #endif /* __ASM_KERNEL_PGTABLE_H */ diff --git a/arch/arm64/include/asm/kexec.h b/arch/arm64/include/asm/kexec.h index 559bfae26715..4d9cc7a76d9c 100644 --- a/arch/arm64/include/asm/kexec.h +++ b/arch/arm64/include/asm/kexec.h @@ -80,7 +80,7 @@ static inline void crash_setup_regs(struct pt_regs *newregs, } } -#if defined(CONFIG_KEXEC_CORE) && defined(CONFIG_HIBERNATION) +#if defined(CONFIG_CRASH_DUMP) && defined(CONFIG_HIBERNATION) extern bool crash_is_nosave(unsigned long pfn); extern void crash_prepare_suspend(void); extern void crash_post_resume(void); @@ -102,12 +102,6 @@ void cpu_soft_restart(unsigned long el2_switch, unsigned long entry, int machine_kexec_post_load(struct kimage *image); #define machine_kexec_post_load machine_kexec_post_load - -void arch_kexec_protect_crashkres(void); -#define arch_kexec_protect_crashkres arch_kexec_protect_crashkres - -void arch_kexec_unprotect_crashkres(void); -#define arch_kexec_unprotect_crashkres arch_kexec_unprotect_crashkres #endif #define ARCH_HAS_KIMAGE_ARCH diff --git a/arch/arm64/include/asm/kfence.h b/arch/arm64/include/asm/kfence.h index aa855c6a0ae6..a81937fae9f6 100644 --- a/arch/arm64/include/asm/kfence.h +++ b/arch/arm64/include/asm/kfence.h @@ -19,4 +19,14 @@ static inline bool kfence_protect_page(unsigned long addr, bool protect) return true; } +#ifdef CONFIG_KFENCE +extern bool kfence_early_init; +static inline bool arm64_kfence_can_set_direct_map(void) +{ + return !kfence_early_init; +} +#else /* CONFIG_KFENCE */ +static inline bool arm64_kfence_can_set_direct_map(void) { return false; } +#endif /* CONFIG_KFENCE */ + #endif /* __ASM_KFENCE_H */ diff --git a/arch/arm64/include/asm/kgdb.h b/arch/arm64/include/asm/kgdb.h index 21fc85e9d2be..3184f5d1e3ae 100644 --- a/arch/arm64/include/asm/kgdb.h +++ b/arch/arm64/include/asm/kgdb.h @@ -24,6 +24,18 @@ static inline void arch_kgdb_breakpoint(void) extern void kgdb_handle_bus_error(void); extern int kgdb_fault_expected; +int kgdb_brk_handler(struct pt_regs *regs, unsigned long esr); +int kgdb_compiled_brk_handler(struct pt_regs *regs, unsigned long esr); +#ifdef CONFIG_KGDB +int kgdb_single_step_handler(struct pt_regs *regs, unsigned long esr); +#else +static inline int kgdb_single_step_handler(struct pt_regs *regs, + unsigned long esr) +{ + return DBG_HOOK_ERROR; +} +#endif + #endif /* !__ASSEMBLY__ */ /* diff --git a/arch/arm64/include/asm/kprobes.h b/arch/arm64/include/asm/kprobes.h index 05cd82eeca13..f2782560647b 100644 --- a/arch/arm64/include/asm/kprobes.h +++ b/arch/arm64/include/asm/kprobes.h @@ -37,10 +37,16 @@ struct kprobe_ctlblk { void arch_remove_kprobe(struct kprobe *); int kprobe_fault_handler(struct pt_regs *regs, unsigned int fsr); -int kprobe_exceptions_notify(struct notifier_block *self, - unsigned long val, void *data); void __kretprobe_trampoline(void); void __kprobes *trampoline_probe_handler(struct pt_regs *regs); #endif /* CONFIG_KPROBES */ + +int __kprobes kprobe_brk_handler(struct pt_regs *regs, + unsigned long esr); +int __kprobes kprobe_ss_brk_handler(struct pt_regs *regs, + unsigned long esr); +int __kprobes kretprobe_brk_handler(struct pt_regs *regs, + unsigned long esr); + #endif /* _ARM_KPROBES_H */ diff --git a/arch/arm64/include/asm/kvm_arm.h b/arch/arm64/include/asm/kvm_arm.h index 8aa8492dafc0..1da290aeedce 100644 --- a/arch/arm64/include/asm/kvm_arm.h +++ b/arch/arm64/include/asm/kvm_arm.h @@ -9,59 +9,73 @@ #include <asm/esr.h> #include <asm/memory.h> +#include <asm/sysreg.h> #include <asm/types.h> -/* Hyp Configuration Register (HCR) bits */ - -#define HCR_TID5 (UL(1) << 58) -#define HCR_DCT (UL(1) << 57) -#define HCR_ATA_SHIFT 56 -#define HCR_ATA (UL(1) << HCR_ATA_SHIFT) -#define HCR_AMVOFFEN (UL(1) << 51) -#define HCR_FIEN (UL(1) << 47) -#define HCR_FWB (UL(1) << 46) -#define HCR_API (UL(1) << 41) -#define HCR_APK (UL(1) << 40) -#define HCR_TEA (UL(1) << 37) -#define HCR_TERR (UL(1) << 36) -#define HCR_TLOR (UL(1) << 35) -#define HCR_E2H (UL(1) << 34) -#define HCR_ID (UL(1) << 33) -#define HCR_CD (UL(1) << 32) -#define HCR_RW_SHIFT 31 -#define HCR_RW (UL(1) << HCR_RW_SHIFT) -#define HCR_TRVM (UL(1) << 30) -#define HCR_HCD (UL(1) << 29) -#define HCR_TDZ (UL(1) << 28) -#define HCR_TGE (UL(1) << 27) -#define HCR_TVM (UL(1) << 26) -#define HCR_TTLB (UL(1) << 25) -#define HCR_TPU (UL(1) << 24) -#define HCR_TPC (UL(1) << 23) /* HCR_TPCP if FEAT_DPB */ -#define HCR_TSW (UL(1) << 22) -#define HCR_TACR (UL(1) << 21) -#define HCR_TIDCP (UL(1) << 20) -#define HCR_TSC (UL(1) << 19) -#define HCR_TID3 (UL(1) << 18) -#define HCR_TID2 (UL(1) << 17) -#define HCR_TID1 (UL(1) << 16) -#define HCR_TID0 (UL(1) << 15) -#define HCR_TWE (UL(1) << 14) -#define HCR_TWI (UL(1) << 13) -#define HCR_DC (UL(1) << 12) -#define HCR_BSU (3 << 10) -#define HCR_BSU_IS (UL(1) << 10) -#define HCR_FB (UL(1) << 9) -#define HCR_VSE (UL(1) << 8) -#define HCR_VI (UL(1) << 7) -#define HCR_VF (UL(1) << 6) -#define HCR_AMO (UL(1) << 5) -#define HCR_IMO (UL(1) << 4) -#define HCR_FMO (UL(1) << 3) -#define HCR_PTW (UL(1) << 2) -#define HCR_SWIO (UL(1) << 1) -#define HCR_VM (UL(1) << 0) -#define HCR_RES0 ((UL(1) << 48) | (UL(1) << 39)) +/* + * Because I'm terribly lazy and that repainting the whole of the KVM + * code with the proper names is a pain, use a helper to map the names + * inherited from AArch32 with the new fancy nomenclature. One day... + */ +#define __HCR(x) HCR_EL2_##x + +#define HCR_TID5 __HCR(TID5) +#define HCR_DCT __HCR(DCT) +#define HCR_ATA_SHIFT __HCR(ATA_SHIFT) +#define HCR_ATA __HCR(ATA) +#define HCR_TTLBOS __HCR(TTLBOS) +#define HCR_TTLBIS __HCR(TTLBIS) +#define HCR_ENSCXT __HCR(EnSCXT) +#define HCR_TOCU __HCR(TOCU) +#define HCR_AMVOFFEN __HCR(AMVOFFEN) +#define HCR_TICAB __HCR(TICAB) +#define HCR_TID4 __HCR(TID4) +#define HCR_FIEN __HCR(FIEN) +#define HCR_FWB __HCR(FWB) +#define HCR_NV2 __HCR(NV2) +#define HCR_AT __HCR(AT) +#define HCR_NV1 __HCR(NV1) +#define HCR_NV __HCR(NV) +#define HCR_API __HCR(API) +#define HCR_APK __HCR(APK) +#define HCR_TEA __HCR(TEA) +#define HCR_TERR __HCR(TERR) +#define HCR_TLOR __HCR(TLOR) +#define HCR_E2H __HCR(E2H) +#define HCR_ID __HCR(ID) +#define HCR_CD __HCR(CD) +#define HCR_RW __HCR(RW) +#define HCR_TRVM __HCR(TRVM) +#define HCR_HCD __HCR(HCD) +#define HCR_TDZ __HCR(TDZ) +#define HCR_TGE __HCR(TGE) +#define HCR_TVM __HCR(TVM) +#define HCR_TTLB __HCR(TTLB) +#define HCR_TPU __HCR(TPU) +#define HCR_TPC __HCR(TPCP) +#define HCR_TSW __HCR(TSW) +#define HCR_TACR __HCR(TACR) +#define HCR_TIDCP __HCR(TIDCP) +#define HCR_TSC __HCR(TSC) +#define HCR_TID3 __HCR(TID3) +#define HCR_TID2 __HCR(TID2) +#define HCR_TID1 __HCR(TID1) +#define HCR_TID0 __HCR(TID0) +#define HCR_TWE __HCR(TWE) +#define HCR_TWI __HCR(TWI) +#define HCR_DC __HCR(DC) +#define HCR_BSU __HCR(BSU) +#define HCR_BSU_IS __HCR(BSU_IS) +#define HCR_FB __HCR(FB) +#define HCR_VSE __HCR(VSE) +#define HCR_VI __HCR(VI) +#define HCR_VF __HCR(VF) +#define HCR_AMO __HCR(AMO) +#define HCR_IMO __HCR(IMO) +#define HCR_FMO __HCR(FMO) +#define HCR_PTW __HCR(PTW) +#define HCR_SWIO __HCR(SWIO) +#define HCR_VM __HCR(VM) /* * The bits we set in HCR: @@ -81,18 +95,22 @@ * SWIO: Turn set/way invalidates into set/way clean+invalidate * PTW: Take a stage2 fault if a stage1 walk steps in device memory * TID3: Trap EL1 reads of group 3 ID registers + * TID1: Trap REVIDR_EL1, AIDR_EL1, and SMIDR_EL1 */ #define HCR_GUEST_FLAGS (HCR_TSC | HCR_TSW | HCR_TWE | HCR_TWI | HCR_VM | \ HCR_BSU_IS | HCR_FB | HCR_TACR | \ HCR_AMO | HCR_SWIO | HCR_TIDCP | HCR_RW | HCR_TLOR | \ - HCR_FMO | HCR_IMO | HCR_PTW | HCR_TID3 ) -#define HCR_VIRT_EXCP_MASK (HCR_VSE | HCR_VI | HCR_VF) + HCR_FMO | HCR_IMO | HCR_PTW | HCR_TID3 | HCR_TID1) #define HCR_HOST_NVHE_FLAGS (HCR_RW | HCR_API | HCR_APK | HCR_ATA) #define HCR_HOST_NVHE_PROTECTED_FLAGS (HCR_HOST_NVHE_FLAGS | HCR_TSC) -#define HCR_HOST_VHE_FLAGS (HCR_RW | HCR_TGE | HCR_E2H) +#define HCR_HOST_VHE_FLAGS (HCR_RW | HCR_TGE | HCR_E2H | HCR_AMO | HCR_IMO | HCR_FMO) + +#define MPAMHCR_HOST_FLAGS 0 /* TCR_EL2 Registers bits */ +#define TCR_EL2_DS (1UL << 32) #define TCR_EL2_RES1 ((1U << 31) | (1 << 23)) +#define TCR_EL2_HPD (1 << 24) #define TCR_EL2_TBI (1 << 20) #define TCR_EL2_PS_SHIFT 16 #define TCR_EL2_PS_MASK (7 << TCR_EL2_PS_SHIFT) @@ -103,9 +121,10 @@ #define TCR_EL2_IRGN0_MASK TCR_IRGN0_MASK #define TCR_EL2_T0SZ_MASK 0x3f #define TCR_EL2_MASK (TCR_EL2_TG0_MASK | TCR_EL2_SH0_MASK | \ - TCR_EL2_ORGN0_MASK | TCR_EL2_IRGN0_MASK | TCR_EL2_T0SZ_MASK) + TCR_EL2_ORGN0_MASK | TCR_EL2_IRGN0_MASK) /* VTCR_EL2 Registers bits */ +#define VTCR_EL2_DS TCR_EL2_DS #define VTCR_EL2_RES1 (1U << 31) #define VTCR_EL2_HD (1 << 22) #define VTCR_EL2_HA (1 << 21) @@ -135,7 +154,7 @@ * 40 bits wide (T0SZ = 24). Systems with a PARange smaller than 40 bits are * not known to exist and will break with this configuration. * - * The VTCR_EL2 is configured per VM and is initialised in kvm_arm_setup_stage2(). + * The VTCR_EL2 is configured per VM and is initialised in kvm_init_stage2_mmu. * * Note that when using 4K pages, we concatenate two first level page tables * together. With 16K pages, we concatenate 16 first level page tables. @@ -283,56 +302,32 @@ #define CPTR_EL2_TSM (1 << 12) #define CPTR_EL2_TFP (1 << CPTR_EL2_TFP_SHIFT) #define CPTR_EL2_TZ (1 << 8) -#define CPTR_NVHE_EL2_RES1 0x000032ff /* known RES1 bits in CPTR_EL2 (nVHE) */ -#define CPTR_EL2_DEFAULT CPTR_NVHE_EL2_RES1 +#define CPTR_NVHE_EL2_RES1 (BIT(13) | BIT(9) | GENMASK(7, 0)) #define CPTR_NVHE_EL2_RES0 (GENMASK(63, 32) | \ GENMASK(29, 21) | \ GENMASK(19, 14) | \ BIT(11)) -/* Hyp Debug Configuration Register bits */ -#define MDCR_EL2_E2TB_MASK (UL(0x3)) -#define MDCR_EL2_E2TB_SHIFT (UL(24)) -#define MDCR_EL2_HPMFZS (UL(1) << 36) -#define MDCR_EL2_HPMFZO (UL(1) << 29) -#define MDCR_EL2_MTPME (UL(1) << 28) -#define MDCR_EL2_TDCC (UL(1) << 27) -#define MDCR_EL2_HLP (UL(1) << 26) -#define MDCR_EL2_HCCD (UL(1) << 23) -#define MDCR_EL2_TTRF (UL(1) << 19) -#define MDCR_EL2_HPMD (UL(1) << 17) -#define MDCR_EL2_TPMS (UL(1) << 14) -#define MDCR_EL2_E2PB_MASK (UL(0x3)) -#define MDCR_EL2_E2PB_SHIFT (UL(12)) -#define MDCR_EL2_TDRA (UL(1) << 11) -#define MDCR_EL2_TDOSA (UL(1) << 10) -#define MDCR_EL2_TDA (UL(1) << 9) -#define MDCR_EL2_TDE (UL(1) << 8) -#define MDCR_EL2_HPME (UL(1) << 7) -#define MDCR_EL2_TPM (UL(1) << 6) -#define MDCR_EL2_TPMCR (UL(1) << 5) -#define MDCR_EL2_HPMN_MASK (UL(0x1F)) -#define MDCR_EL2_RES0 (GENMASK(63, 37) | \ - GENMASK(35, 30) | \ - GENMASK(25, 24) | \ - GENMASK(22, 20) | \ - BIT(18) | \ - GENMASK(16, 15)) - -/* For compatibility with fault code shared with 32-bit */ -#define FSC_FAULT ESR_ELx_FSC_FAULT -#define FSC_ACCESS ESR_ELx_FSC_ACCESS -#define FSC_PERM ESR_ELx_FSC_PERM -#define FSC_SEA ESR_ELx_FSC_EXTABT -#define FSC_SEA_TTW0 (0x14) -#define FSC_SEA_TTW1 (0x15) -#define FSC_SEA_TTW2 (0x16) -#define FSC_SEA_TTW3 (0x17) -#define FSC_SECC (0x18) -#define FSC_SECC_TTW0 (0x1c) -#define FSC_SECC_TTW1 (0x1d) -#define FSC_SECC_TTW2 (0x1e) -#define FSC_SECC_TTW3 (0x1f) +#define CPTR_VHE_EL2_RES0 (GENMASK(63, 32) | \ + GENMASK(27, 26) | \ + GENMASK(23, 22) | \ + GENMASK(19, 18) | \ + GENMASK(15, 0)) + +/* + * Polarity masks for HCRX_EL2, limited to the bits that we know about + * at this point in time. It doesn't mean that we actually *handle* + * them, but that at least those that are not advertised to a guest + * will be RES0 for that guest. + */ +#define __HCRX_EL2_MASK (BIT_ULL(6)) +#define __HCRX_EL2_nMASK (GENMASK_ULL(24, 14) | \ + GENMASK_ULL(11, 7) | \ + GENMASK_ULL(5, 0)) +#define __HCRX_EL2_RES0 ~(__HCRX_EL2_nMASK | __HCRX_EL2_MASK) +#define __HCRX_EL2_RES1 ~(__HCRX_EL2_nMASK | \ + __HCRX_EL2_MASK | \ + __HCRX_EL2_RES0) /* Hyp Prefetch Fault Address Register (HPFAR/HDFAR) */ #define HPFAR_MASK (~UL(0xf)) @@ -340,9 +335,13 @@ * We have * PAR [PA_Shift - 1 : 12] = PA [PA_Shift - 1 : 12] * HPFAR [PA_Shift - 9 : 4] = FIPA [PA_Shift - 1 : 12] + * + * Always assume 52 bit PA since at this point, we don't know how many PA bits + * the page table has been set up for. This should be safe since unused address + * bits in PAR are res0. */ #define PAR_TO_HPFAR(par) \ - (((par) & GENMASK_ULL(PHYS_MASK_SHIFT - 1, 12)) >> 8) + (((par) & GENMASK_ULL(52 - 1, 12)) >> 8) #define ECN(x) { ESR_ELx_EC_##x, #x } @@ -355,10 +354,23 @@ ECN(SP_ALIGN), ECN(FP_EXC32), ECN(FP_EXC64), ECN(SERROR), \ ECN(BREAKPT_LOW), ECN(BREAKPT_CUR), ECN(SOFTSTP_LOW), \ ECN(SOFTSTP_CUR), ECN(WATCHPT_LOW), ECN(WATCHPT_CUR), \ - ECN(BKPT32), ECN(VECTOR32), ECN(BRK64) + ECN(BKPT32), ECN(VECTOR32), ECN(BRK64), ECN(ERET) -#define CPACR_EL1_TTA (1 << 28) -#define CPACR_EL1_DEFAULT (CPACR_EL1_FPEN_EL0EN | CPACR_EL1_FPEN_EL1EN |\ - CPACR_EL1_ZEN_EL1EN) +#define kvm_mode_names \ + { PSR_MODE_EL0t, "EL0t" }, \ + { PSR_MODE_EL1t, "EL1t" }, \ + { PSR_MODE_EL1h, "EL1h" }, \ + { PSR_MODE_EL2t, "EL2t" }, \ + { PSR_MODE_EL2h, "EL2h" }, \ + { PSR_MODE_EL3t, "EL3t" }, \ + { PSR_MODE_EL3h, "EL3h" }, \ + { PSR_AA32_MODE_USR, "32-bit USR" }, \ + { PSR_AA32_MODE_FIQ, "32-bit FIQ" }, \ + { PSR_AA32_MODE_IRQ, "32-bit IRQ" }, \ + { PSR_AA32_MODE_SVC, "32-bit SVC" }, \ + { PSR_AA32_MODE_ABT, "32-bit ABT" }, \ + { PSR_AA32_MODE_HYP, "32-bit HYP" }, \ + { PSR_AA32_MODE_UND, "32-bit UND" }, \ + { PSR_AA32_MODE_SYS, "32-bit SYS" } #endif /* __ARM64_KVM_ARM_H__ */ diff --git a/arch/arm64/include/asm/kvm_asm.h b/arch/arm64/include/asm/kvm_asm.h index 2e277f2ed671..bec227f9500a 100644 --- a/arch/arm64/include/asm/kvm_asm.h +++ b/arch/arm64/include/asm/kvm_asm.h @@ -10,6 +10,7 @@ #include <asm/hyp_image.h> #include <asm/insn.h> #include <asm/virt.h> +#include <asm/sysreg.h> #define ARM_EXIT_WITH_SERROR_BIT 31 #define ARM_EXCEPTION_CODE(x) ((x) & ~(1U << ARM_EXIT_WITH_SERROR_BIT)) @@ -52,8 +53,7 @@ enum __kvm_host_smccc_func { /* Hypercalls available only prior to pKVM finalisation */ /* __KVM_HOST_SMCCC_FUNC___kvm_hyp_init */ - __KVM_HOST_SMCCC_FUNC___kvm_get_mdcr_el2 = __KVM_HOST_SMCCC_FUNC___kvm_hyp_init + 1, - __KVM_HOST_SMCCC_FUNC___pkvm_init, + __KVM_HOST_SMCCC_FUNC___pkvm_init = __KVM_HOST_SMCCC_FUNC___kvm_hyp_init + 1, __KVM_HOST_SMCCC_FUNC___pkvm_create_private_mapping, __KVM_HOST_SMCCC_FUNC___pkvm_cpu_set_vector, __KVM_HOST_SMCCC_FUNC___kvm_enable_ssbs, @@ -64,18 +64,29 @@ enum __kvm_host_smccc_func { /* Hypercalls available after pKVM finalisation */ __KVM_HOST_SMCCC_FUNC___pkvm_host_share_hyp, __KVM_HOST_SMCCC_FUNC___pkvm_host_unshare_hyp, + __KVM_HOST_SMCCC_FUNC___pkvm_host_share_guest, + __KVM_HOST_SMCCC_FUNC___pkvm_host_unshare_guest, + __KVM_HOST_SMCCC_FUNC___pkvm_host_relax_perms_guest, + __KVM_HOST_SMCCC_FUNC___pkvm_host_wrprotect_guest, + __KVM_HOST_SMCCC_FUNC___pkvm_host_test_clear_young_guest, + __KVM_HOST_SMCCC_FUNC___pkvm_host_mkyoung_guest, __KVM_HOST_SMCCC_FUNC___kvm_adjust_pc, __KVM_HOST_SMCCC_FUNC___kvm_vcpu_run, __KVM_HOST_SMCCC_FUNC___kvm_flush_vm_context, __KVM_HOST_SMCCC_FUNC___kvm_tlb_flush_vmid_ipa, + __KVM_HOST_SMCCC_FUNC___kvm_tlb_flush_vmid_ipa_nsh, __KVM_HOST_SMCCC_FUNC___kvm_tlb_flush_vmid, + __KVM_HOST_SMCCC_FUNC___kvm_tlb_flush_vmid_range, __KVM_HOST_SMCCC_FUNC___kvm_flush_cpu_context, __KVM_HOST_SMCCC_FUNC___kvm_timer_set_cntvoff, - __KVM_HOST_SMCCC_FUNC___vgic_v3_read_vmcr, - __KVM_HOST_SMCCC_FUNC___vgic_v3_write_vmcr, - __KVM_HOST_SMCCC_FUNC___vgic_v3_save_aprs, - __KVM_HOST_SMCCC_FUNC___vgic_v3_restore_aprs, - __KVM_HOST_SMCCC_FUNC___pkvm_vcpu_init_traps, + __KVM_HOST_SMCCC_FUNC___vgic_v3_save_vmcr_aprs, + __KVM_HOST_SMCCC_FUNC___vgic_v3_restore_vmcr_aprs, + __KVM_HOST_SMCCC_FUNC___pkvm_init_vm, + __KVM_HOST_SMCCC_FUNC___pkvm_init_vcpu, + __KVM_HOST_SMCCC_FUNC___pkvm_teardown_vm, + __KVM_HOST_SMCCC_FUNC___pkvm_vcpu_load, + __KVM_HOST_SMCCC_FUNC___pkvm_vcpu_put, + __KVM_HOST_SMCCC_FUNC___pkvm_tlb_flush_vmid, }; #define DECLARE_KVM_VHE_SYM(sym) extern char sym[] @@ -106,7 +117,7 @@ enum __kvm_host_smccc_func { #define per_cpu_ptr_nvhe_sym(sym, cpu) \ ({ \ unsigned long base, off; \ - base = kvm_arm_hyp_percpu_base[cpu]; \ + base = kvm_nvhe_sym(kvm_arm_hyp_percpu_base)[cpu]; \ off = (unsigned long)&CHOOSE_NVHE_SYM(sym) - \ (unsigned long)&CHOOSE_NVHE_SYM(__per_cpu_start); \ base ? (typeof(CHOOSE_NVHE_SYM(sym))*)(base + off) : NULL; \ @@ -174,6 +185,23 @@ struct kvm_nvhe_init_params { unsigned long hcr_el2; unsigned long vttbr; unsigned long vtcr; + unsigned long tmp; +}; + +/* + * Used by the host in EL1 to dump the nVHE hypervisor backtrace on + * hyp_panic() in non-protected mode. + * + * @stack_base: hyp VA of the hyp_stack base. + * @overflow_stack_base: hyp VA of the hyp_overflow_stack base. + * @fp: hyp FP where the backtrace begins. + * @pc: hyp PC where the backtrace begins. + */ +struct kvm_nvhe_stacktrace_info { + unsigned long stack_base; + unsigned long overflow_stack_base; + unsigned long fp; + unsigned long pc; }; /* Translate a kernel address @ptr into its equivalent linear mapping */ @@ -195,7 +223,7 @@ DECLARE_KVM_HYP_SYM(__kvm_hyp_vector); #define __kvm_hyp_init CHOOSE_NVHE_SYM(__kvm_hyp_init) #define __kvm_hyp_vector CHOOSE_HYP_SYM(__kvm_hyp_vector) -extern unsigned long kvm_arm_hyp_percpu_base[NR_CPUS]; +extern unsigned long kvm_nvhe_sym(kvm_arm_hyp_percpu_base)[]; DECLARE_KVM_NVHE_SYM(__per_cpu_start); DECLARE_KVM_NVHE_SYM(__per_cpu_end); @@ -206,21 +234,27 @@ extern void __kvm_flush_vm_context(void); extern void __kvm_flush_cpu_context(struct kvm_s2_mmu *mmu); extern void __kvm_tlb_flush_vmid_ipa(struct kvm_s2_mmu *mmu, phys_addr_t ipa, int level); +extern void __kvm_tlb_flush_vmid_ipa_nsh(struct kvm_s2_mmu *mmu, + phys_addr_t ipa, + int level); +extern void __kvm_tlb_flush_vmid_range(struct kvm_s2_mmu *mmu, + phys_addr_t start, unsigned long pages); extern void __kvm_tlb_flush_vmid(struct kvm_s2_mmu *mmu); +extern int __kvm_tlbi_s1e2(struct kvm_s2_mmu *mmu, u64 va, u64 sys_encoding); + extern void __kvm_timer_set_cntvoff(u64 cntvoff); +extern void __kvm_at_s1e01(struct kvm_vcpu *vcpu, u32 op, u64 vaddr); +extern void __kvm_at_s1e2(struct kvm_vcpu *vcpu, u32 op, u64 vaddr); +extern void __kvm_at_s12(struct kvm_vcpu *vcpu, u32 op, u64 vaddr); extern int __kvm_vcpu_run(struct kvm_vcpu *vcpu); extern void __kvm_adjust_pc(struct kvm_vcpu *vcpu); extern u64 __vgic_v3_get_gic_config(void); -extern u64 __vgic_v3_read_vmcr(void); -extern void __vgic_v3_write_vmcr(u32 vmcr); extern void __vgic_v3_init_lrs(void); -extern u64 __kvm_get_mdcr_el2(void); - #define __KVM_EXTABLE(from, to) \ " .pushsection __kvm_ex_table, \"a\"\n" \ " .align 3\n" \ @@ -235,7 +269,7 @@ extern u64 __kvm_get_mdcr_el2(void); asm volatile( \ " mrs %1, spsr_el2\n" \ " mrs %2, elr_el2\n" \ - "1: at "at_op", %3\n" \ + "1: " __msr_s(at_op, "%3") "\n" \ " isb\n" \ " b 9f\n" \ "2: msr spsr_el2, %1\n" \ @@ -248,6 +282,24 @@ extern u64 __kvm_get_mdcr_el2(void); __kvm_at_err; \ } ) +void __noreturn hyp_panic(void); +asmlinkage void kvm_unexpected_el2_exception(void); +asmlinkage void __noreturn hyp_panic(void); +asmlinkage void __noreturn hyp_panic_bad_stack(void); +asmlinkage void kvm_unexpected_el2_exception(void); +struct kvm_cpu_context; +void handle_trap(struct kvm_cpu_context *host_ctxt); +asmlinkage void __noreturn __kvm_host_psci_cpu_entry(bool is_cpu_on); +void __noreturn __pkvm_init_finalise(void); +void kvm_nvhe_prepare_backtrace(unsigned long fp, unsigned long pc); +void kvm_patch_vector_branch(struct alt_instr *alt, + __le32 *origptr, __le32 *updptr, int nr_inst); +void kvm_get_kimage_voffset(struct alt_instr *alt, + __le32 *origptr, __le32 *updptr, int nr_inst); +void kvm_compute_final_ctr_el0(struct alt_instr *alt, + __le32 *origptr, __le32 *updptr, int nr_inst); +void __noreturn __cold nvhe_hyp_panic_handler(u64 esr, u64 spsr, u64 elr_virt, + u64 elr_phys, u64 par, uintptr_t vcpu, u64 far, u64 hpfar); #else /* __ASSEMBLY__ */ diff --git a/arch/arm64/include/asm/kvm_emulate.h b/arch/arm64/include/asm/kvm_emulate.h index 0e66edd3aff2..fa8a08a1ccd5 100644 --- a/arch/arm64/include/asm/kvm_emulate.h +++ b/arch/arm64/include/asm/kvm_emulate.h @@ -11,12 +11,14 @@ #ifndef __ARM64_KVM_EMULATE_H__ #define __ARM64_KVM_EMULATE_H__ +#include <linux/bitfield.h> #include <linux/kvm_host.h> #include <asm/debug-monitors.h> #include <asm/esr.h> #include <asm/kvm_arm.h> #include <asm/kvm_hyp.h> +#include <asm/kvm_nested.h> #include <asm/ptrace.h> #include <asm/cputype.h> #include <asm/virt.h> @@ -33,17 +35,58 @@ enum exception_type { except_type_serror = 0x180, }; +#define kvm_exception_type_names \ + { except_type_sync, "SYNC" }, \ + { except_type_irq, "IRQ" }, \ + { except_type_fiq, "FIQ" }, \ + { except_type_serror, "SERROR" } + bool kvm_condition_valid32(const struct kvm_vcpu *vcpu); void kvm_skip_instr32(struct kvm_vcpu *vcpu); void kvm_inject_undefined(struct kvm_vcpu *vcpu); -void kvm_inject_vabt(struct kvm_vcpu *vcpu); -void kvm_inject_dabt(struct kvm_vcpu *vcpu, unsigned long addr); -void kvm_inject_pabt(struct kvm_vcpu *vcpu, unsigned long addr); +int kvm_inject_serror_esr(struct kvm_vcpu *vcpu, u64 esr); +int kvm_inject_sea(struct kvm_vcpu *vcpu, bool iabt, u64 addr); void kvm_inject_size_fault(struct kvm_vcpu *vcpu); +static inline int kvm_inject_sea_dabt(struct kvm_vcpu *vcpu, u64 addr) +{ + return kvm_inject_sea(vcpu, false, addr); +} + +static inline int kvm_inject_sea_iabt(struct kvm_vcpu *vcpu, u64 addr) +{ + return kvm_inject_sea(vcpu, true, addr); +} + +static inline int kvm_inject_serror(struct kvm_vcpu *vcpu) +{ + /* + * ESR_ELx.ISV (later renamed to IDS) indicates whether or not + * ESR_ELx.ISS contains IMPLEMENTATION DEFINED syndrome information. + * + * Set the bit when injecting an SError w/o an ESR to indicate ISS + * does not follow the architected format. + */ + return kvm_inject_serror_esr(vcpu, ESR_ELx_ISV); +} + void kvm_vcpu_wfi(struct kvm_vcpu *vcpu); +void kvm_emulate_nested_eret(struct kvm_vcpu *vcpu); +int kvm_inject_nested_sync(struct kvm_vcpu *vcpu, u64 esr_el2); +int kvm_inject_nested_irq(struct kvm_vcpu *vcpu); +int kvm_inject_nested_sea(struct kvm_vcpu *vcpu, bool iabt, u64 addr); +int kvm_inject_nested_serror(struct kvm_vcpu *vcpu, u64 esr); + +static inline void kvm_inject_nested_sve_trap(struct kvm_vcpu *vcpu) +{ + u64 esr = FIELD_PREP(ESR_ELx_EC_MASK, ESR_ELx_EC_SVE) | + ESR_ELx_IL; + + kvm_inject_nested_sync(vcpu, esr); +} + #if defined(__KVM_VHE_HYPERVISOR__) || defined(__KVM_NVHE_HYPERVISOR__) static __always_inline bool vcpu_el1_is_32bit(struct kvm_vcpu *vcpu) { @@ -52,48 +95,23 @@ static __always_inline bool vcpu_el1_is_32bit(struct kvm_vcpu *vcpu) #else static __always_inline bool vcpu_el1_is_32bit(struct kvm_vcpu *vcpu) { - struct kvm *kvm = vcpu->kvm; - - WARN_ON_ONCE(!test_bit(KVM_ARCH_FLAG_REG_WIDTH_CONFIGURED, - &kvm->arch.flags)); - - return test_bit(KVM_ARCH_FLAG_EL1_32BIT, &kvm->arch.flags); + return vcpu_has_feature(vcpu, KVM_ARM_VCPU_EL1_32BIT); } #endif static inline void vcpu_reset_hcr(struct kvm_vcpu *vcpu) { - vcpu->arch.hcr_el2 = HCR_GUEST_FLAGS; - if (is_kernel_in_hyp_mode()) - vcpu->arch.hcr_el2 |= HCR_E2H; - if (cpus_have_const_cap(ARM64_HAS_RAS_EXTN)) { - /* route synchronous external abort exceptions to EL2 */ - vcpu->arch.hcr_el2 |= HCR_TEA; - /* trap error record accesses */ - vcpu->arch.hcr_el2 |= HCR_TERR; - } + if (!vcpu_has_run_once(vcpu)) + vcpu->arch.hcr_el2 = HCR_GUEST_FLAGS; - if (cpus_have_const_cap(ARM64_HAS_STAGE2_FWB)) { - vcpu->arch.hcr_el2 |= HCR_FWB; - } else { - /* - * For non-FWB CPUs, we trap VM ops (HCR_EL2.TVM) until M+C - * get set in SCTLR_EL1 such that we can detect when the guest - * MMU gets turned on and do the necessary cache maintenance - * then. - */ + /* + * For non-FWB CPUs, we trap VM ops (HCR_EL2.TVM) until M+C + * get set in SCTLR_EL1 such that we can detect when the guest + * MMU gets turned on and do the necessary cache maintenance + * then. + */ + if (!cpus_have_final_cap(ARM64_HAS_STAGE2_FWB)) vcpu->arch.hcr_el2 |= HCR_TVM; - } - - if (vcpu_el1_is_32bit(vcpu)) - vcpu->arch.hcr_el2 &= ~HCR_RW; - - if (cpus_have_const_cap(ARM64_MISMATCHED_CACHE_TYPE) || - vcpu_el1_is_32bit(vcpu)) - vcpu->arch.hcr_el2 |= HCR_TID2; - - if (kvm_has_mte(vcpu->kvm)) - vcpu->arch.hcr_el2 |= HCR_ATA; } static inline unsigned long *vcpu_hcr(struct kvm_vcpu *vcpu) @@ -117,16 +135,6 @@ static inline void vcpu_set_wfx_traps(struct kvm_vcpu *vcpu) vcpu->arch.hcr_el2 |= HCR_TWI; } -static inline void vcpu_ptrauth_enable(struct kvm_vcpu *vcpu) -{ - vcpu->arch.hcr_el2 |= (HCR_API | HCR_APK); -} - -static inline void vcpu_ptrauth_disable(struct kvm_vcpu *vcpu) -{ - vcpu->arch.hcr_el2 &= ~(HCR_API | HCR_APK); -} - static inline unsigned long vcpu_get_vsesr(struct kvm_vcpu *vcpu) { return vcpu->arch.vsesr_el2; @@ -183,6 +191,81 @@ static __always_inline void vcpu_set_reg(struct kvm_vcpu *vcpu, u8 reg_num, vcpu_gp_regs(vcpu)->regs[reg_num] = val; } +static inline bool vcpu_is_el2_ctxt(const struct kvm_cpu_context *ctxt) +{ + switch (ctxt->regs.pstate & (PSR_MODE32_BIT | PSR_MODE_MASK)) { + case PSR_MODE_EL2h: + case PSR_MODE_EL2t: + return true; + default: + return false; + } +} + +static inline bool vcpu_is_el2(const struct kvm_vcpu *vcpu) +{ + return vcpu_is_el2_ctxt(&vcpu->arch.ctxt); +} + +static inline bool vcpu_el2_e2h_is_set(const struct kvm_vcpu *vcpu) +{ + return (!cpus_have_final_cap(ARM64_HAS_HCR_NV1) || + (__vcpu_sys_reg(vcpu, HCR_EL2) & HCR_E2H)); +} + +static inline bool vcpu_el2_tge_is_set(const struct kvm_vcpu *vcpu) +{ + return ctxt_sys_reg(&vcpu->arch.ctxt, HCR_EL2) & HCR_TGE; +} + +static inline bool vcpu_el2_amo_is_set(const struct kvm_vcpu *vcpu) +{ + return ctxt_sys_reg(&vcpu->arch.ctxt, HCR_EL2) & HCR_AMO; +} + +static inline bool is_hyp_ctxt(const struct kvm_vcpu *vcpu) +{ + bool e2h, tge; + u64 hcr; + + if (!vcpu_has_nv(vcpu)) + return false; + + hcr = __vcpu_sys_reg(vcpu, HCR_EL2); + + e2h = (hcr & HCR_E2H); + tge = (hcr & HCR_TGE); + + /* + * We are in a hypervisor context if the vcpu mode is EL2 or + * E2H and TGE bits are set. The latter means we are in the user space + * of the VHE kernel. ARMv8.1 ARM describes this as 'InHost' + * + * Note that the HCR_EL2.{E2H,TGE}={0,1} isn't really handled in the + * rest of the KVM code, and will result in a misbehaving guest. + */ + return vcpu_is_el2(vcpu) || (e2h && tge) || tge; +} + +static inline bool vcpu_is_host_el0(const struct kvm_vcpu *vcpu) +{ + return is_hyp_ctxt(vcpu) && !vcpu_is_el2(vcpu); +} + +static inline bool is_nested_ctxt(struct kvm_vcpu *vcpu) +{ + return vcpu_has_nv(vcpu) && !is_hyp_ctxt(vcpu); +} + +static inline bool vserror_state_is_nested(struct kvm_vcpu *vcpu) +{ + if (!is_nested_ctxt(vcpu)) + return false; + + return vcpu_el2_amo_is_set(vcpu) || + (__vcpu_sys_reg(vcpu, HCRX_EL2) & HCRX_EL2_TMEA); +} + /* * The layout of SPSR for an AArch32 state is different when observed from an * AArch64 SPSR_ELx or an AArch32 SPSR_*. This function generates the AArch32 @@ -234,6 +317,19 @@ static __always_inline u64 kvm_vcpu_get_esr(const struct kvm_vcpu *vcpu) return vcpu->arch.fault.esr_el2; } +static inline bool guest_hyp_wfx_traps_enabled(const struct kvm_vcpu *vcpu) +{ + u64 esr = kvm_vcpu_get_esr(vcpu); + bool is_wfe = !!(esr & ESR_ELx_WFx_ISS_WFE); + u64 hcr_el2 = __vcpu_sys_reg(vcpu, HCR_EL2); + + if (!vcpu_has_nv(vcpu) || vcpu_is_el2(vcpu)) + return false; + + return ((is_wfe && (hcr_el2 & HCR_TWE)) || + (!is_wfe && (hcr_el2 & HCR_TWI))); +} + static __always_inline int kvm_vcpu_get_condition(const struct kvm_vcpu *vcpu) { u64 esr = kvm_vcpu_get_esr(vcpu); @@ -251,7 +347,12 @@ static __always_inline unsigned long kvm_vcpu_get_hfar(const struct kvm_vcpu *vc static __always_inline phys_addr_t kvm_vcpu_get_fault_ipa(const struct kvm_vcpu *vcpu) { - return ((phys_addr_t)vcpu->arch.fault.hpfar_el2 & HPFAR_MASK) << 8; + u64 hpfar = vcpu->arch.fault.hpfar_el2; + + if (unlikely(!(hpfar & HPFAR_EL2_NS))) + return INVALID_GPA; + + return FIELD_GET(HPFAR_EL2_FIPA, hpfar) << 12; } static inline u64 kvm_vcpu_get_disr(const struct kvm_vcpu *vcpu) @@ -336,29 +437,34 @@ static __always_inline u8 kvm_vcpu_trap_get_fault(const struct kvm_vcpu *vcpu) return kvm_vcpu_get_esr(vcpu) & ESR_ELx_FSC; } -static __always_inline u8 kvm_vcpu_trap_get_fault_type(const struct kvm_vcpu *vcpu) +static inline +bool kvm_vcpu_trap_is_permission_fault(const struct kvm_vcpu *vcpu) +{ + return esr_fsc_is_permission_fault(kvm_vcpu_get_esr(vcpu)); +} + +static inline +bool kvm_vcpu_trap_is_translation_fault(const struct kvm_vcpu *vcpu) { - return kvm_vcpu_get_esr(vcpu) & ESR_ELx_FSC_TYPE; + return esr_fsc_is_translation_fault(kvm_vcpu_get_esr(vcpu)); } -static __always_inline u8 kvm_vcpu_trap_get_fault_level(const struct kvm_vcpu *vcpu) +static inline +u64 kvm_vcpu_trap_get_perm_fault_granule(const struct kvm_vcpu *vcpu) { - return kvm_vcpu_get_esr(vcpu) & ESR_ELx_FSC_LEVEL; + unsigned long esr = kvm_vcpu_get_esr(vcpu); + + BUG_ON(!esr_fsc_is_permission_fault(esr)); + return BIT(ARM64_HW_PGTABLE_LEVEL_SHIFT(esr & ESR_ELx_FSC_LEVEL)); } static __always_inline bool kvm_vcpu_abt_issea(const struct kvm_vcpu *vcpu) { switch (kvm_vcpu_trap_get_fault(vcpu)) { - case FSC_SEA: - case FSC_SEA_TTW0: - case FSC_SEA_TTW1: - case FSC_SEA_TTW2: - case FSC_SEA_TTW3: - case FSC_SECC: - case FSC_SECC_TTW0: - case FSC_SECC_TTW1: - case FSC_SECC_TTW2: - case FSC_SECC_TTW3: + case ESR_ELx_FSC_EXTABT: + case ESR_ELx_FSC_SEA_TTW(-1) ... ESR_ELx_FSC_SEA_TTW(3): + case ESR_ELx_FSC_SECC: + case ESR_ELx_FSC_SECC_TTW(-1) ... ESR_ELx_FSC_SECC_TTW(3): return true; default: return false; @@ -373,8 +479,21 @@ static __always_inline int kvm_vcpu_sys_get_rt(struct kvm_vcpu *vcpu) static inline bool kvm_is_write_fault(struct kvm_vcpu *vcpu) { - if (kvm_vcpu_abt_iss1tw(vcpu)) - return true; + if (kvm_vcpu_abt_iss1tw(vcpu)) { + /* + * Only a permission fault on a S1PTW should be + * considered as a write. Otherwise, page tables baked + * in a read-only memslot will result in an exception + * being delivered in the guest. + * + * The drawback is that we end-up faulting twice if the + * guest is using any of HW AF/DB: a translation fault + * to map the page containing the PT (read only at + * first), then a permission fault to allow the flags + * to be set. + */ + return kvm_vcpu_trap_is_permission_fault(vcpu); + } if (kvm_vcpu_trap_is_iabt(vcpu)) return false; @@ -384,7 +503,7 @@ static inline bool kvm_is_write_fault(struct kvm_vcpu *vcpu) static inline unsigned long kvm_vcpu_get_mpidr_aff(struct kvm_vcpu *vcpu) { - return vcpu_read_sys_reg(vcpu, MPIDR_EL1) & MPIDR_HWID_BITMASK; + return __vcpu_sys_reg(vcpu, MPIDR_EL1) & MPIDR_HWID_BITMASK; } static inline void kvm_vcpu_set_be(struct kvm_vcpu *vcpu) @@ -473,12 +592,86 @@ static inline unsigned long vcpu_data_host_to_guest(struct kvm_vcpu *vcpu, static __always_inline void kvm_incr_pc(struct kvm_vcpu *vcpu) { - vcpu->arch.flags |= KVM_ARM64_INCREMENT_PC; + WARN_ON(vcpu_get_flag(vcpu, PENDING_EXCEPTION)); + vcpu_set_flag(vcpu, INCREMENT_PC); +} + +#define kvm_pend_exception(v, e) \ + do { \ + WARN_ON(vcpu_get_flag((v), INCREMENT_PC)); \ + vcpu_set_flag((v), PENDING_EXCEPTION); \ + vcpu_set_flag((v), e); \ + } while (0) + +/* + * Returns a 'sanitised' view of CPTR_EL2, translating from nVHE to the VHE + * format if E2H isn't set. + */ +static inline u64 vcpu_sanitised_cptr_el2(const struct kvm_vcpu *vcpu) +{ + u64 cptr = __vcpu_sys_reg(vcpu, CPTR_EL2); + + if (!vcpu_el2_e2h_is_set(vcpu)) + cptr = translate_cptr_el2_to_cpacr_el1(cptr); + + return cptr; } -static inline bool vcpu_has_feature(struct kvm_vcpu *vcpu, int feature) +static inline bool ____cptr_xen_trap_enabled(const struct kvm_vcpu *vcpu, + unsigned int xen) +{ + switch (xen) { + case 0b00: + case 0b10: + return true; + case 0b01: + return vcpu_el2_tge_is_set(vcpu) && !vcpu_is_el2(vcpu); + case 0b11: + default: + return false; + } +} + +#define __guest_hyp_cptr_xen_trap_enabled(vcpu, xen) \ + (!vcpu_has_nv(vcpu) ? false : \ + ____cptr_xen_trap_enabled(vcpu, \ + SYS_FIELD_GET(CPACR_EL1, xen, \ + vcpu_sanitised_cptr_el2(vcpu)))) + +static inline bool guest_hyp_fpsimd_traps_enabled(const struct kvm_vcpu *vcpu) { - return test_bit(feature, vcpu->arch.features); + return __guest_hyp_cptr_xen_trap_enabled(vcpu, FPEN); } +static inline bool guest_hyp_sve_traps_enabled(const struct kvm_vcpu *vcpu) +{ + return __guest_hyp_cptr_xen_trap_enabled(vcpu, ZEN); +} + +static inline void vcpu_set_hcrx(struct kvm_vcpu *vcpu) +{ + struct kvm *kvm = vcpu->kvm; + + if (cpus_have_final_cap(ARM64_HAS_HCX)) { + /* + * In general, all HCRX_EL2 bits are gated by a feature. + * The only reason we can set SMPME without checking any + * feature is that its effects are not directly observable + * from the guest. + */ + vcpu->arch.hcrx_el2 = HCRX_EL2_SMPME; + + if (kvm_has_feat(kvm, ID_AA64ISAR2_EL1, MOPS, IMP)) + vcpu->arch.hcrx_el2 |= (HCRX_EL2_MSCEn | HCRX_EL2_MCE2); + + if (kvm_has_tcr2(kvm)) + vcpu->arch.hcrx_el2 |= HCRX_EL2_TCR2En; + + if (kvm_has_fpmr(kvm)) + vcpu->arch.hcrx_el2 |= HCRX_EL2_EnFPM; + + if (kvm_has_sctlr2(kvm)) + vcpu->arch.hcrx_el2 |= HCRX_EL2_SCTLR2En; + } +} #endif /* __ARM64_KVM_EMULATE_H__ */ diff --git a/arch/arm64/include/asm/kvm_host.h b/arch/arm64/include/asm/kvm_host.h index de32152cea04..2b07f0a27a7d 100644 --- a/arch/arm64/include/asm/kvm_host.h +++ b/arch/arm64/include/asm/kvm_host.h @@ -16,6 +16,7 @@ #include <linux/types.h> #include <linux/jump_label.h> #include <linux/kvm_types.h> +#include <linux/maple_tree.h> #include <linux/percpu.h> #include <linux/psci.h> #include <asm/arch_gicv3.h> @@ -26,6 +27,7 @@ #include <asm/fpsimd.h> #include <asm/kvm.h> #include <asm/kvm_asm.h> +#include <asm/vncr_mapping.h> #define __KVM_HAVE_ARCH_INTC_INITIALIZED @@ -37,16 +39,21 @@ #define KVM_MAX_VCPUS VGIC_V3_MAX_CPUS -#define KVM_VCPU_MAX_FEATURES 7 +#define KVM_VCPU_MAX_FEATURES 9 +#define KVM_VCPU_VALID_FEATURES (BIT(KVM_VCPU_MAX_FEATURES) - 1) #define KVM_REQ_SLEEP \ KVM_ARCH_REQ_FLAGS(0, KVM_REQUEST_WAIT | KVM_REQUEST_NO_WAKEUP) -#define KVM_REQ_IRQ_PENDING KVM_ARCH_REQ(1) -#define KVM_REQ_VCPU_RESET KVM_ARCH_REQ(2) -#define KVM_REQ_RECORD_STEAL KVM_ARCH_REQ(3) -#define KVM_REQ_RELOAD_GICv4 KVM_ARCH_REQ(4) -#define KVM_REQ_RELOAD_PMU KVM_ARCH_REQ(5) -#define KVM_REQ_SUSPEND KVM_ARCH_REQ(6) +#define KVM_REQ_IRQ_PENDING KVM_ARCH_REQ(1) +#define KVM_REQ_VCPU_RESET KVM_ARCH_REQ(2) +#define KVM_REQ_RECORD_STEAL KVM_ARCH_REQ(3) +#define KVM_REQ_RELOAD_GICv4 KVM_ARCH_REQ(4) +#define KVM_REQ_RELOAD_PMU KVM_ARCH_REQ(5) +#define KVM_REQ_SUSPEND KVM_ARCH_REQ(6) +#define KVM_REQ_RESYNC_PMU_EL0 KVM_ARCH_REQ(7) +#define KVM_REQ_NESTED_S2_UNMAP KVM_ARCH_REQ(8) +#define KVM_REQ_GUEST_HYP_IRQ_PENDING KVM_ARCH_REQ(9) +#define KVM_REQ_MAP_L1_VNCR_EL2 KVM_ARCH_REQ(10) #define KVM_DIRTY_LOG_MANUAL_CAPS (KVM_DIRTY_LOG_MANUAL_PROTECT_ENABLE | \ KVM_DIRTY_LOG_INITIALLY_SET) @@ -60,19 +67,84 @@ enum kvm_mode { KVM_MODE_DEFAULT, KVM_MODE_PROTECTED, + KVM_MODE_NV, KVM_MODE_NONE, }; +#ifdef CONFIG_KVM enum kvm_mode kvm_get_mode(void); +#else +static inline enum kvm_mode kvm_get_mode(void) { return KVM_MODE_NONE; }; +#endif -DECLARE_STATIC_KEY_FALSE(userspace_irqchip_in_use); - -extern unsigned int kvm_sve_max_vl; -int kvm_arm_init_sve(void); +extern unsigned int __ro_after_init kvm_sve_max_vl; +extern unsigned int __ro_after_init kvm_host_sve_max_vl; +int __init kvm_arm_init_sve(void); u32 __attribute_const__ kvm_target_cpu(void); -int kvm_reset_vcpu(struct kvm_vcpu *vcpu); +void kvm_reset_vcpu(struct kvm_vcpu *vcpu); void kvm_arm_vcpu_destroy(struct kvm_vcpu *vcpu); +struct kvm_hyp_memcache { + phys_addr_t head; + unsigned long nr_pages; + struct pkvm_mapping *mapping; /* only used from EL1 */ + +#define HYP_MEMCACHE_ACCOUNT_STAGE2 BIT(1) + unsigned long flags; +}; + +static inline void push_hyp_memcache(struct kvm_hyp_memcache *mc, + phys_addr_t *p, + phys_addr_t (*to_pa)(void *virt)) +{ + *p = mc->head; + mc->head = to_pa(p); + mc->nr_pages++; +} + +static inline void *pop_hyp_memcache(struct kvm_hyp_memcache *mc, + void *(*to_va)(phys_addr_t phys)) +{ + phys_addr_t *p = to_va(mc->head & PAGE_MASK); + + if (!mc->nr_pages) + return NULL; + + mc->head = *p; + mc->nr_pages--; + + return p; +} + +static inline int __topup_hyp_memcache(struct kvm_hyp_memcache *mc, + unsigned long min_pages, + void *(*alloc_fn)(void *arg), + phys_addr_t (*to_pa)(void *virt), + void *arg) +{ + while (mc->nr_pages < min_pages) { + phys_addr_t *p = alloc_fn(arg); + + if (!p) + return -ENOMEM; + push_hyp_memcache(mc, p, to_pa); + } + + return 0; +} + +static inline void __free_hyp_memcache(struct kvm_hyp_memcache *mc, + void (*free_fn)(void *virt, void *arg), + void *(*to_va)(phys_addr_t phys), + void *arg) +{ + while (mc->nr_pages) + free_fn(pop_hyp_memcache(mc, to_va), arg); +} + +void free_hyp_memcache(struct kvm_hyp_memcache *mc); +int topup_hyp_memcache(struct kvm_hyp_memcache *mc, unsigned long min_pages); + struct kvm_vmid { atomic64_t id; }; @@ -93,10 +165,68 @@ struct kvm_s2_mmu { phys_addr_t pgd_phys; struct kvm_pgtable *pgt; + /* + * VTCR value used on the host. For a non-NV guest (or a NV + * guest that runs in a context where its own S2 doesn't + * apply), its T0SZ value reflects that of the IPA size. + * + * For a shadow S2 MMU, T0SZ reflects the PARange exposed to + * the guest. + */ + u64 vtcr; + /* The last vcpu id that ran on each physical CPU */ int __percpu *last_vcpu_ran; +#define KVM_ARM_EAGER_SPLIT_CHUNK_SIZE_DEFAULT 0 + /* + * Memory cache used to split + * KVM_CAP_ARM_EAGER_SPLIT_CHUNK_SIZE worth of huge pages. It + * is used to allocate stage2 page tables while splitting huge + * pages. The choice of KVM_CAP_ARM_EAGER_SPLIT_CHUNK_SIZE + * influences both the capacity of the split page cache, and + * how often KVM reschedules. Be wary of raising CHUNK_SIZE + * too high. + * + * Protected by kvm->slots_lock. + */ + struct kvm_mmu_memory_cache split_page_cache; + uint64_t split_page_chunk_size; + struct kvm_arch *arch; + + /* + * For a shadow stage-2 MMU, the virtual vttbr used by the + * host to parse the guest S2. + * This either contains: + * - the virtual VTTBR programmed by the guest hypervisor with + * CnP cleared + * - The value 1 (VMID=0, BADDR=0, CnP=1) if invalid + * + * We also cache the full VTCR which gets used for TLB invalidation, + * taking the ARM ARM's "Any of the bits in VTCR_EL2 are permitted + * to be cached in a TLB" to the letter. + */ + u64 tlb_vttbr; + u64 tlb_vtcr; + + /* + * true when this represents a nested context where virtual + * HCR_EL2.VM == 1 + */ + bool nested_stage2_enabled; + + /* + * true when this MMU needs to be unmapped before being used for a new + * purpose. + */ + bool pending_unmap; + + /* + * 0: Nobody is currently using this, check vttbr for validity + * >0: Somebody is actively using this. + */ + atomic_t refcnt; }; struct kvm_arch_memory_slot { @@ -112,21 +242,86 @@ struct kvm_arch_memory_slot { struct kvm_smccc_features { unsigned long std_bmap; unsigned long std_hyp_bmap; - unsigned long vendor_hyp_bmap; + unsigned long vendor_hyp_bmap; /* Function numbers 0-63 */ + unsigned long vendor_hyp_bmap_2; /* Function numbers 64-127 */ +}; + +typedef unsigned int pkvm_handle_t; + +struct kvm_protected_vm { + pkvm_handle_t handle; + struct kvm_hyp_memcache teardown_mc; + struct kvm_hyp_memcache stage2_teardown_mc; + bool enabled; +}; + +struct kvm_mpidr_data { + u64 mpidr_mask; + DECLARE_FLEX_ARRAY(u16, cmpidr_to_idx); +}; + +static inline u16 kvm_mpidr_index(struct kvm_mpidr_data *data, u64 mpidr) +{ + unsigned long index = 0, mask = data->mpidr_mask; + unsigned long aff = mpidr & MPIDR_HWID_BITMASK; + + bitmap_gather(&index, &aff, &mask, fls(mask)); + + return index; +} + +struct kvm_sysreg_masks; + +enum fgt_group_id { + __NO_FGT_GROUP__, + HFGRTR_GROUP, + HFGWTR_GROUP = HFGRTR_GROUP, + HDFGRTR_GROUP, + HDFGWTR_GROUP = HDFGRTR_GROUP, + HFGITR_GROUP, + HAFGRTR_GROUP, + HFGRTR2_GROUP, + HFGWTR2_GROUP = HFGRTR2_GROUP, + HDFGRTR2_GROUP, + HDFGWTR2_GROUP = HDFGRTR2_GROUP, + HFGITR2_GROUP, + + /* Must be last */ + __NR_FGT_GROUP_IDS__ }; struct kvm_arch { struct kvm_s2_mmu mmu; - /* VTCR_EL2 value for this VM */ - u64 vtcr; + /* + * Fine-Grained UNDEF, mimicking the FGT layout defined by the + * architecture. We track them globally, as we present the + * same feature-set to all vcpus. + * + * Index 0 is currently spare. + */ + u64 fgu[__NR_FGT_GROUP_IDS__]; + + /* + * Stage 2 paging state for VMs with nested S2 using a virtual + * VMID. + */ + struct kvm_s2_mmu *nested_mmus; + size_t nested_mmus_size; + int nested_mmus_next; /* Interrupt controller */ struct vgic_dist vgic; + /* Timers */ + struct arch_timer_vm_data timer_data; + /* Mandated version of PSCI */ u32 psci_version; + /* Protects VM-scoped configuration data */ + struct mutex config_lock; + /* * If we encounter a data abort without valid instruction syndrome * information, report this to user space. User space can (and @@ -138,20 +333,30 @@ struct kvm_arch { #define KVM_ARCH_FLAG_MTE_ENABLED 1 /* At least one vCPU has ran in the VM */ #define KVM_ARCH_FLAG_HAS_RAN_ONCE 2 - /* - * The following two bits are used to indicate the guest's EL1 - * register width configuration. A value of KVM_ARCH_FLAG_EL1_32BIT - * bit is valid only when KVM_ARCH_FLAG_REG_WIDTH_CONFIGURED is set. - * Otherwise, the guest's EL1 register width has not yet been - * determined yet. - */ -#define KVM_ARCH_FLAG_REG_WIDTH_CONFIGURED 3 -#define KVM_ARCH_FLAG_EL1_32BIT 4 + /* The vCPU feature set for the VM is configured */ +#define KVM_ARCH_FLAG_VCPU_FEATURES_CONFIGURED 3 /* PSCI SYSTEM_SUSPEND enabled for the guest */ -#define KVM_ARCH_FLAG_SYSTEM_SUSPEND_ENABLED 5 - +#define KVM_ARCH_FLAG_SYSTEM_SUSPEND_ENABLED 4 + /* VM counter offset */ +#define KVM_ARCH_FLAG_VM_COUNTER_OFFSET 5 + /* Timer PPIs made immutable */ +#define KVM_ARCH_FLAG_TIMER_PPIS_IMMUTABLE 6 + /* Initial ID reg values loaded */ +#define KVM_ARCH_FLAG_ID_REGS_INITIALIZED 7 + /* Fine-Grained UNDEF initialised */ +#define KVM_ARCH_FLAG_FGU_INITIALIZED 8 + /* SVE exposed to guest */ +#define KVM_ARCH_FLAG_GUEST_HAS_SVE 9 + /* MIDR_EL1, REVIDR_EL1, and AIDR_EL1 are writable from userspace */ +#define KVM_ARCH_FLAG_WRITABLE_IMP_ID_REGS 10 unsigned long flags; + /* VM-wide vCPU feature set */ + DECLARE_BITMAP(vcpu_features, KVM_VCPU_MAX_FEATURES); + + /* MPIDR to vcpu index mapping, optional */ + struct kvm_mpidr_data *mpidr_data; + /* * VM-wide PMU filter, implemented as a bitmap and big enough for * up to 2^10 events (ARMv8.0) or 2^16 events (ARMv8.1+). @@ -161,11 +366,44 @@ struct kvm_arch { cpumask_var_t supported_cpus; - u8 pfr0_csv2; - u8 pfr0_csv3; + /* Maximum number of counters for the guest */ + u8 nr_pmu_counters; + + /* Iterator for idreg debugfs */ + u8 idreg_debugfs_iter; /* Hypercall features firmware registers' descriptor */ struct kvm_smccc_features smccc_feat; + struct maple_tree smccc_filter; + + /* + * Emulated CPU ID registers per VM + * (Op0, Op1, CRn, CRm, Op2) of the ID registers to be saved in it + * is (3, 0, 0, crm, op2), where 1<=crm<8, 0<=op2<8. + * + * These emulated idregs are VM-wide, but accessed from the context of a vCPU. + * Atomic access to multiple idregs are guarded by kvm_arch.config_lock. + */ +#define IDREG_IDX(id) (((sys_reg_CRm(id) - 1) << 3) | sys_reg_Op2(id)) +#define KVM_ARM_ID_REG_NUM (IDREG_IDX(sys_reg(3, 0, 0, 7, 7)) + 1) + u64 id_regs[KVM_ARM_ID_REG_NUM]; + + u64 midr_el1; + u64 revidr_el1; + u64 aidr_el1; + u64 ctr_el0; + + /* Masks for VNCR-backed and general EL2 sysregs */ + struct kvm_sysreg_masks *sysreg_masks; + + /* Count the number of VNCR_EL2 currently mapped */ + atomic_t vncr_map_count; + + /* + * For an untrusted host VM, 'pkvm.handle' is used to lookup + * the associated pKVM instance in the hypervisor. + */ + struct kvm_protected_vm pkvm; }; struct kvm_vcpu_fault_info { @@ -175,31 +413,36 @@ struct kvm_vcpu_fault_info { u64 disr_el1; /* Deferred [SError] Status Register */ }; +/* + * VNCR() just places the VNCR_capable registers in the enum after + * __VNCR_START__, and the value (after correction) to be an 8-byte offset + * from the VNCR base. As we don't require the enum to be otherwise ordered, + * we need the terrible hack below to ensure that we correctly size the + * sys_regs array, no matter what. + * + * The __MAX__ macro has been lifted from Sean Eron Anderson's wonderful + * treasure trove of bit hacks: + * https://graphics.stanford.edu/~seander/bithacks.html#IntegerMinOrMax + */ +#define __MAX__(x,y) ((x) ^ (((x) ^ (y)) & -((x) < (y)))) +#define VNCR(r) \ + __before_##r, \ + r = __VNCR_START__ + ((VNCR_ ## r) / 8), \ + __after_##r = __MAX__(__before_##r - 1, r) + +#define MARKER(m) \ + m, __after_##m = m - 1 + enum vcpu_sysreg { __INVALID_SYSREG__, /* 0 is reserved as an invalid value */ MPIDR_EL1, /* MultiProcessor Affinity Register */ + CLIDR_EL1, /* Cache Level ID Register */ CSSELR_EL1, /* Cache Size Selection Register */ - SCTLR_EL1, /* System Control Register */ - ACTLR_EL1, /* Auxiliary Control Register */ - CPACR_EL1, /* Coprocessor Access Control */ - ZCR_EL1, /* SVE Control */ - TTBR0_EL1, /* Translation Table Base Register 0 */ - TTBR1_EL1, /* Translation Table Base Register 1 */ - TCR_EL1, /* Translation Control Register */ - ESR_EL1, /* Exception Syndrome Register */ - AFSR0_EL1, /* Auxiliary Fault Status Register 0 */ - AFSR1_EL1, /* Auxiliary Fault Status Register 1 */ - FAR_EL1, /* Fault Address Register */ - MAIR_EL1, /* Memory Attribute Indirection Register */ - VBAR_EL1, /* Vector Base Address Register */ - CONTEXTIDR_EL1, /* Context ID Register */ TPIDR_EL0, /* Thread ID, User R/W */ TPIDRRO_EL0, /* Thread ID, User R/O */ TPIDR_EL1, /* Thread ID, Privileged */ - AMAIR_EL1, /* Aux Memory Attribute Indirection Register */ CNTKCTL_EL1, /* Timer Control Register (EL1) */ PAR_EL1, /* Physical Address Register */ - MDSCR_EL1, /* Monitor Debug System Control Register */ MDCCINT_EL1, /* Monitor Debug Comms Channel Interrupt Enable Reg */ OSLSR_EL1, /* OS Lock Status Register */ DISR_EL1, /* Deferred Interrupt Status Register */ @@ -230,31 +473,193 @@ enum vcpu_sysreg { APGAKEYLO_EL1, APGAKEYHI_EL1, - ELR_EL1, - SP_EL1, - SPSR_EL1, - - CNTVOFF_EL2, - CNTV_CVAL_EL0, - CNTV_CTL_EL0, - CNTP_CVAL_EL0, - CNTP_CTL_EL0, - /* Memory Tagging Extension registers */ RGSR_EL1, /* Random Allocation Tag Seed Register */ GCR_EL1, /* Tag Control Register */ - TFSR_EL1, /* Tag Fault Status Register (EL1) */ TFSRE0_EL1, /* Tag Fault Status Register (EL0) */ - /* 32bit specific registers. Keep them at the end of the range */ + POR_EL0, /* Permission Overlay Register 0 (EL0) */ + + /* FP/SIMD/SVE */ + SVCR, + FPMR, + + /* 32bit specific registers. */ DACR32_EL2, /* Domain Access Control Register */ IFSR32_EL2, /* Instruction Fault Status Register */ FPEXC32_EL2, /* Floating-Point Exception Control Register */ DBGVCR32_EL2, /* Debug Vector Catch Register */ + /* EL2 registers */ + SCTLR_EL2, /* System Control Register (EL2) */ + ACTLR_EL2, /* Auxiliary Control Register (EL2) */ + CPTR_EL2, /* Architectural Feature Trap Register (EL2) */ + HACR_EL2, /* Hypervisor Auxiliary Control Register */ + ZCR_EL2, /* SVE Control Register (EL2) */ + TTBR0_EL2, /* Translation Table Base Register 0 (EL2) */ + TTBR1_EL2, /* Translation Table Base Register 1 (EL2) */ + TCR_EL2, /* Translation Control Register (EL2) */ + PIRE0_EL2, /* Permission Indirection Register 0 (EL2) */ + PIR_EL2, /* Permission Indirection Register 1 (EL2) */ + POR_EL2, /* Permission Overlay Register 2 (EL2) */ + SPSR_EL2, /* EL2 saved program status register */ + ELR_EL2, /* EL2 exception link register */ + AFSR0_EL2, /* Auxiliary Fault Status Register 0 (EL2) */ + AFSR1_EL2, /* Auxiliary Fault Status Register 1 (EL2) */ + ESR_EL2, /* Exception Syndrome Register (EL2) */ + FAR_EL2, /* Fault Address Register (EL2) */ + HPFAR_EL2, /* Hypervisor IPA Fault Address Register */ + MAIR_EL2, /* Memory Attribute Indirection Register (EL2) */ + AMAIR_EL2, /* Auxiliary Memory Attribute Indirection Register (EL2) */ + VBAR_EL2, /* Vector Base Address Register (EL2) */ + RVBAR_EL2, /* Reset Vector Base Address Register */ + CONTEXTIDR_EL2, /* Context ID Register (EL2) */ + SP_EL2, /* EL2 Stack Pointer */ + CNTHP_CTL_EL2, + CNTHP_CVAL_EL2, + CNTHV_CTL_EL2, + CNTHV_CVAL_EL2, + + /* Anything from this can be RES0/RES1 sanitised */ + MARKER(__SANITISED_REG_START__), + TCR2_EL2, /* Extended Translation Control Register (EL2) */ + SCTLR2_EL2, /* System Control Register 2 (EL2) */ + MDCR_EL2, /* Monitor Debug Configuration Register (EL2) */ + CNTHCTL_EL2, /* Counter-timer Hypervisor Control register */ + + /* Any VNCR-capable reg goes after this point */ + MARKER(__VNCR_START__), + + VNCR(SCTLR_EL1),/* System Control Register */ + VNCR(ACTLR_EL1),/* Auxiliary Control Register */ + VNCR(CPACR_EL1),/* Coprocessor Access Control */ + VNCR(ZCR_EL1), /* SVE Control */ + VNCR(TTBR0_EL1),/* Translation Table Base Register 0 */ + VNCR(TTBR1_EL1),/* Translation Table Base Register 1 */ + VNCR(TCR_EL1), /* Translation Control Register */ + VNCR(TCR2_EL1), /* Extended Translation Control Register */ + VNCR(SCTLR2_EL1), /* System Control Register 2 */ + VNCR(ESR_EL1), /* Exception Syndrome Register */ + VNCR(AFSR0_EL1),/* Auxiliary Fault Status Register 0 */ + VNCR(AFSR1_EL1),/* Auxiliary Fault Status Register 1 */ + VNCR(FAR_EL1), /* Fault Address Register */ + VNCR(MAIR_EL1), /* Memory Attribute Indirection Register */ + VNCR(VBAR_EL1), /* Vector Base Address Register */ + VNCR(CONTEXTIDR_EL1), /* Context ID Register */ + VNCR(AMAIR_EL1),/* Aux Memory Attribute Indirection Register */ + VNCR(MDSCR_EL1),/* Monitor Debug System Control Register */ + VNCR(ELR_EL1), + VNCR(SP_EL1), + VNCR(SPSR_EL1), + VNCR(TFSR_EL1), /* Tag Fault Status Register (EL1) */ + VNCR(VPIDR_EL2),/* Virtualization Processor ID Register */ + VNCR(VMPIDR_EL2),/* Virtualization Multiprocessor ID Register */ + VNCR(HCR_EL2), /* Hypervisor Configuration Register */ + VNCR(HSTR_EL2), /* Hypervisor System Trap Register */ + VNCR(VTTBR_EL2),/* Virtualization Translation Table Base Register */ + VNCR(VTCR_EL2), /* Virtualization Translation Control Register */ + VNCR(TPIDR_EL2),/* EL2 Software Thread ID Register */ + VNCR(HCRX_EL2), /* Extended Hypervisor Configuration Register */ + + /* Permission Indirection Extension registers */ + VNCR(PIR_EL1), /* Permission Indirection Register 1 (EL1) */ + VNCR(PIRE0_EL1), /* Permission Indirection Register 0 (EL1) */ + + VNCR(POR_EL1), /* Permission Overlay Register 1 (EL1) */ + + /* FEAT_RAS registers */ + VNCR(VDISR_EL2), + VNCR(VSESR_EL2), + + VNCR(HFGRTR_EL2), + VNCR(HFGWTR_EL2), + VNCR(HFGITR_EL2), + VNCR(HDFGRTR_EL2), + VNCR(HDFGWTR_EL2), + VNCR(HAFGRTR_EL2), + VNCR(HFGRTR2_EL2), + VNCR(HFGWTR2_EL2), + VNCR(HFGITR2_EL2), + VNCR(HDFGRTR2_EL2), + VNCR(HDFGWTR2_EL2), + + VNCR(VNCR_EL2), + + VNCR(CNTVOFF_EL2), + VNCR(CNTV_CVAL_EL0), + VNCR(CNTV_CTL_EL0), + VNCR(CNTP_CVAL_EL0), + VNCR(CNTP_CTL_EL0), + + VNCR(ICH_LR0_EL2), + VNCR(ICH_LR1_EL2), + VNCR(ICH_LR2_EL2), + VNCR(ICH_LR3_EL2), + VNCR(ICH_LR4_EL2), + VNCR(ICH_LR5_EL2), + VNCR(ICH_LR6_EL2), + VNCR(ICH_LR7_EL2), + VNCR(ICH_LR8_EL2), + VNCR(ICH_LR9_EL2), + VNCR(ICH_LR10_EL2), + VNCR(ICH_LR11_EL2), + VNCR(ICH_LR12_EL2), + VNCR(ICH_LR13_EL2), + VNCR(ICH_LR14_EL2), + VNCR(ICH_LR15_EL2), + + VNCR(ICH_AP0R0_EL2), + VNCR(ICH_AP0R1_EL2), + VNCR(ICH_AP0R2_EL2), + VNCR(ICH_AP0R3_EL2), + VNCR(ICH_AP1R0_EL2), + VNCR(ICH_AP1R1_EL2), + VNCR(ICH_AP1R2_EL2), + VNCR(ICH_AP1R3_EL2), + VNCR(ICH_HCR_EL2), + VNCR(ICH_VMCR_EL2), + NR_SYS_REGS /* Nothing after this line! */ }; +struct kvm_sysreg_masks { + struct { + u64 res0; + u64 res1; + } mask[NR_SYS_REGS - __SANITISED_REG_START__]; +}; + +struct fgt_masks { + const char *str; + u64 mask; + u64 nmask; + u64 res0; +}; + +extern struct fgt_masks hfgrtr_masks; +extern struct fgt_masks hfgwtr_masks; +extern struct fgt_masks hfgitr_masks; +extern struct fgt_masks hdfgrtr_masks; +extern struct fgt_masks hdfgwtr_masks; +extern struct fgt_masks hafgrtr_masks; +extern struct fgt_masks hfgrtr2_masks; +extern struct fgt_masks hfgwtr2_masks; +extern struct fgt_masks hfgitr2_masks; +extern struct fgt_masks hdfgrtr2_masks; +extern struct fgt_masks hdfgwtr2_masks; + +extern struct fgt_masks kvm_nvhe_sym(hfgrtr_masks); +extern struct fgt_masks kvm_nvhe_sym(hfgwtr_masks); +extern struct fgt_masks kvm_nvhe_sym(hfgitr_masks); +extern struct fgt_masks kvm_nvhe_sym(hdfgrtr_masks); +extern struct fgt_masks kvm_nvhe_sym(hdfgwtr_masks); +extern struct fgt_masks kvm_nvhe_sym(hafgrtr_masks); +extern struct fgt_masks kvm_nvhe_sym(hfgrtr2_masks); +extern struct fgt_masks kvm_nvhe_sym(hfgwtr2_masks); +extern struct fgt_masks kvm_nvhe_sym(hfgitr2_masks); +extern struct fgt_masks kvm_nvhe_sym(hdfgrtr2_masks); +extern struct fgt_masks kvm_nvhe_sym(hdfgwtr2_masks); + struct kvm_cpu_context { struct user_pt_regs regs; /* sp = sp_el0 */ @@ -268,15 +673,95 @@ struct kvm_cpu_context { u64 sys_regs[NR_SYS_REGS]; struct kvm_vcpu *__hyp_running_vcpu; + + /* This pointer has to be 4kB aligned. */ + u64 *vncr_array; +}; + +struct cpu_sve_state { + __u64 zcr_el1; + + /* + * Ordering is important since __sve_save_state/__sve_restore_state + * relies on it. + */ + __u32 fpsr; + __u32 fpcr; + + /* Must be SVE_VQ_BYTES (128 bit) aligned. */ + __u8 sve_regs[]; }; +/* + * This structure is instantiated on a per-CPU basis, and contains + * data that is: + * + * - tied to a single physical CPU, and + * - either have a lifetime that does not extend past vcpu_put() + * - or is an invariant for the lifetime of the system + * + * Use host_data_ptr(field) as a way to access a pointer to such a + * field. + */ struct kvm_host_data { +#define KVM_HOST_DATA_FLAG_HAS_SPE 0 +#define KVM_HOST_DATA_FLAG_HAS_TRBE 1 +#define KVM_HOST_DATA_FLAG_TRBE_ENABLED 4 +#define KVM_HOST_DATA_FLAG_EL1_TRACING_CONFIGURED 5 +#define KVM_HOST_DATA_FLAG_VCPU_IN_HYP_CONTEXT 6 +#define KVM_HOST_DATA_FLAG_L1_VNCR_MAPPED 7 +#define KVM_HOST_DATA_FLAG_HAS_BRBE 8 + unsigned long flags; + struct kvm_cpu_context host_ctxt; + + /* + * Hyp VA. + * sve_state is only used in pKVM and if system_supports_sve(). + */ + struct cpu_sve_state *sve_state; + + /* Used by pKVM only. */ + u64 fpmr; + + /* Ownership of the FP regs */ + enum { + FP_STATE_FREE, + FP_STATE_HOST_OWNED, + FP_STATE_GUEST_OWNED, + } fp_owner; + + /* + * host_debug_state contains the host registers which are + * saved and restored during world switches. + */ + struct { + /* {Break,watch}point registers */ + struct kvm_guest_debug_arch regs; + /* Statistical profiling extension */ + u64 pmscr_el1; + /* Self-hosted trace */ + u64 trfcr_el1; + /* Values of trap registers for the host before guest entry. */ + u64 mdcr_el2; + u64 brbcr_el1; + } host_debug_state; + + /* Guest trace filter value */ + u64 trfcr_while_in_guest; + + /* Number of programmable event counters (PMCR_EL0.N) for this CPU */ + unsigned int nr_event_counters; + + /* Number of debug breakpoints/watchpoints for this CPU (minus 1) */ + unsigned int debug_brps; + unsigned int debug_wrps; }; struct kvm_host_psci_config { /* PSCI version used by host. */ u32 version; + u32 smccc_version; /* Function IDs used by host if version is v0.1. */ struct psci_0_1_function_ids function_ids_0_1; @@ -303,88 +788,87 @@ struct vcpu_reset_state { bool reset; }; +struct vncr_tlb; + struct kvm_vcpu_arch { struct kvm_cpu_context ctxt; - /* Guest floating point state */ + /* + * Guest floating point state + * + * The architecture has two main floating point extensions, + * the original FPSIMD and SVE. These have overlapping + * register views, with the FPSIMD V registers occupying the + * low 128 bits of the SVE Z registers. When the core + * floating point code saves the register state of a task it + * records which view it saved in fp_type. + */ void *sve_state; + enum fp_type fp_type; unsigned int sve_max_vl; - u64 svcr; /* Stage 2 paging state used by the hardware on next switch */ struct kvm_s2_mmu *hw_mmu; /* Values of trap registers for the guest. */ u64 hcr_el2; + u64 hcrx_el2; u64 mdcr_el2; - u64 cptr_el2; - - /* Values of trap registers for the host before guest entry. */ - u64 mdcr_el2_host; /* Exception Information */ struct kvm_vcpu_fault_info fault; - /* Miscellaneous vcpu state flags */ - u64 flags; + /* Configuration flags, set once and for all before the vcpu can run */ + u8 cflags; + + /* Input flags to the hypervisor code, potentially cleared after use */ + u8 iflags; + + /* State flags for kernel bookkeeping, unused by the hypervisor code */ + u16 sflags; + + /* + * Don't run the guest (internal implementation need). + * + * Contrary to the flags above, this is set/cleared outside of + * a vcpu context, and thus cannot be mixed with the flags + * themselves (or the flag accesses need to be made atomic). + */ + bool pause; /* * We maintain more than a single set of debug registers to support * debugging the guest from the host and to maintain separate host and * guest state during world switches. vcpu_debug_state are the debug - * registers of the vcpu as the guest sees them. host_debug_state are - * the host registers which are saved and restored during - * world switches. external_debug_state contains the debug - * values we want to debug the guest. This is set via the - * KVM_SET_GUEST_DEBUG ioctl. + * registers of the vcpu as the guest sees them. * - * debug_ptr points to the set of debug registers that should be loaded - * onto the hardware when running the guest. + * external_debug_state contains the debug values we want to debug the + * guest. This is set via the KVM_SET_GUEST_DEBUG ioctl. */ - struct kvm_guest_debug_arch *debug_ptr; struct kvm_guest_debug_arch vcpu_debug_state; struct kvm_guest_debug_arch external_debug_state; + u64 external_mdscr_el1; - struct user_fpsimd_state *host_fpsimd_state; /* hyp VA */ - struct task_struct *parent_task; - - struct { - /* {Break,watch}point registers */ - struct kvm_guest_debug_arch regs; - /* Statistical profiling extension */ - u64 pmscr_el1; - /* Self-hosted trace */ - u64 trfcr_el1; - } host_debug_state; + enum { + VCPU_DEBUG_FREE, + VCPU_DEBUG_HOST_OWNED, + VCPU_DEBUG_GUEST_OWNED, + } debug_owner; /* VGIC state */ struct vgic_cpu vgic_cpu; struct arch_timer_cpu timer_cpu; struct kvm_pmu pmu; - /* - * Guest registers we preserve during guest debugging. - * - * These shadow registers are updated by the kvm_handle_sys_reg - * trap handler if the guest accesses or updates them while we - * are using guest debug. - */ - struct { - u32 mdscr_el1; - } guest_debug_preserved; - /* vcpu power state */ struct kvm_mp_state mp_state; - - /* Don't run the guest (internal implementation need) */ - bool pause; + spinlock_t mp_state_lock; /* Cache some mmu pages needed inside spinlock regions */ struct kvm_mmu_memory_cache mmu_page_cache; - /* Target CPU and feature flags */ - int target; - DECLARE_BITMAP(features, KVM_VCPU_MAX_FEATURES); + /* Pages to top-up the pKVM/EL2 guest pool */ + struct kvm_hyp_memcache pkvm_memcache; /* Virtual SError ESR to restore when HCR_EL2.VSE is set */ u64 vsesr_el2; @@ -392,101 +876,226 @@ struct kvm_vcpu_arch { /* Additional reset state */ struct vcpu_reset_state reset_state; - /* True when deferrable sysregs are loaded on the physical CPU, - * see kvm_vcpu_load_sysregs_vhe and kvm_vcpu_put_sysregs_vhe. */ - bool sysregs_loaded_on_cpu; - /* Guest PV state */ struct { u64 last_steal; gpa_t base; } steal; + + /* Per-vcpu CCSIDR override or NULL */ + u32 *ccsidr; + + /* Per-vcpu TLB for VNCR_EL2 -- NULL when !NV */ + struct vncr_tlb *vncr_tlb; }; +/* + * Each 'flag' is composed of a comma-separated triplet: + * + * - the flag-set it belongs to in the vcpu->arch structure + * - the value for that flag + * - the mask for that flag + * + * __vcpu_single_flag() builds such a triplet for a single-bit flag. + * unpack_vcpu_flag() extract the flag value from the triplet for + * direct use outside of the flag accessors. + */ +#define __vcpu_single_flag(_set, _f) _set, (_f), (_f) + +#define __unpack_flag(_set, _f, _m) _f +#define unpack_vcpu_flag(...) __unpack_flag(__VA_ARGS__) + +#define __build_check_flag(v, flagset, f, m) \ + do { \ + typeof(v->arch.flagset) *_fset; \ + \ + /* Check that the flags fit in the mask */ \ + BUILD_BUG_ON(HWEIGHT(m) != HWEIGHT((f) | (m))); \ + /* Check that the flags fit in the type */ \ + BUILD_BUG_ON((sizeof(*_fset) * 8) <= __fls(m)); \ + } while (0) + +#define __vcpu_get_flag(v, flagset, f, m) \ + ({ \ + __build_check_flag(v, flagset, f, m); \ + \ + READ_ONCE(v->arch.flagset) & (m); \ + }) + +/* + * Note that the set/clear accessors must be preempt-safe in order to + * avoid nesting them with load/put which also manipulate flags... + */ +#ifdef __KVM_NVHE_HYPERVISOR__ +/* the nVHE hypervisor is always non-preemptible */ +#define __vcpu_flags_preempt_disable() +#define __vcpu_flags_preempt_enable() +#else +#define __vcpu_flags_preempt_disable() preempt_disable() +#define __vcpu_flags_preempt_enable() preempt_enable() +#endif + +#define __vcpu_set_flag(v, flagset, f, m) \ + do { \ + typeof(v->arch.flagset) *fset; \ + \ + __build_check_flag(v, flagset, f, m); \ + \ + fset = &v->arch.flagset; \ + __vcpu_flags_preempt_disable(); \ + if (HWEIGHT(m) > 1) \ + *fset &= ~(m); \ + *fset |= (f); \ + __vcpu_flags_preempt_enable(); \ + } while (0) + +#define __vcpu_clear_flag(v, flagset, f, m) \ + do { \ + typeof(v->arch.flagset) *fset; \ + \ + __build_check_flag(v, flagset, f, m); \ + \ + fset = &v->arch.flagset; \ + __vcpu_flags_preempt_disable(); \ + *fset &= ~(m); \ + __vcpu_flags_preempt_enable(); \ + } while (0) + +#define __vcpu_test_and_clear_flag(v, flagset, f, m) \ + ({ \ + typeof(v->arch.flagset) set; \ + \ + set = __vcpu_get_flag(v, flagset, f, m); \ + __vcpu_clear_flag(v, flagset, f, m); \ + \ + set; \ + }) + +#define vcpu_get_flag(v, ...) __vcpu_get_flag((v), __VA_ARGS__) +#define vcpu_set_flag(v, ...) __vcpu_set_flag((v), __VA_ARGS__) +#define vcpu_clear_flag(v, ...) __vcpu_clear_flag((v), __VA_ARGS__) +#define vcpu_test_and_clear_flag(v, ...) \ + __vcpu_test_and_clear_flag((v), __VA_ARGS__) + +/* KVM_ARM_VCPU_INIT completed */ +#define VCPU_INITIALIZED __vcpu_single_flag(cflags, BIT(0)) +/* SVE config completed */ +#define VCPU_SVE_FINALIZED __vcpu_single_flag(cflags, BIT(1)) +/* pKVM VCPU setup completed */ +#define VCPU_PKVM_FINALIZED __vcpu_single_flag(cflags, BIT(2)) + +/* Exception pending */ +#define PENDING_EXCEPTION __vcpu_single_flag(iflags, BIT(0)) +/* + * PC increment. Overlaps with EXCEPT_MASK on purpose so that it can't + * be set together with an exception... + */ +#define INCREMENT_PC __vcpu_single_flag(iflags, BIT(1)) +/* Target EL/MODE (not a single flag, but let's abuse the macro) */ +#define EXCEPT_MASK __vcpu_single_flag(iflags, GENMASK(3, 1)) + +/* Helpers to encode exceptions with minimum fuss */ +#define __EXCEPT_MASK_VAL unpack_vcpu_flag(EXCEPT_MASK) +#define __EXCEPT_SHIFT __builtin_ctzl(__EXCEPT_MASK_VAL) +#define __vcpu_except_flags(_f) iflags, (_f << __EXCEPT_SHIFT), __EXCEPT_MASK_VAL + +/* + * When PENDING_EXCEPTION is set, EXCEPT_MASK can take the following + * values: + * + * For AArch32 EL1: + */ +#define EXCEPT_AA32_UND __vcpu_except_flags(0) +#define EXCEPT_AA32_IABT __vcpu_except_flags(1) +#define EXCEPT_AA32_DABT __vcpu_except_flags(2) +/* For AArch64: */ +#define EXCEPT_AA64_EL1_SYNC __vcpu_except_flags(0) +#define EXCEPT_AA64_EL1_IRQ __vcpu_except_flags(1) +#define EXCEPT_AA64_EL1_FIQ __vcpu_except_flags(2) +#define EXCEPT_AA64_EL1_SERR __vcpu_except_flags(3) +/* For AArch64 with NV: */ +#define EXCEPT_AA64_EL2_SYNC __vcpu_except_flags(4) +#define EXCEPT_AA64_EL2_IRQ __vcpu_except_flags(5) +#define EXCEPT_AA64_EL2_FIQ __vcpu_except_flags(6) +#define EXCEPT_AA64_EL2_SERR __vcpu_except_flags(7) + +/* Physical CPU not in supported_cpus */ +#define ON_UNSUPPORTED_CPU __vcpu_single_flag(sflags, BIT(0)) +/* WFIT instruction trapped */ +#define IN_WFIT __vcpu_single_flag(sflags, BIT(1)) +/* vcpu system registers loaded on physical CPU */ +#define SYSREGS_ON_CPU __vcpu_single_flag(sflags, BIT(2)) +/* Software step state is Active-pending for external debug */ +#define HOST_SS_ACTIVE_PENDING __vcpu_single_flag(sflags, BIT(3)) +/* Software step state is Active pending for guest debug */ +#define GUEST_SS_ACTIVE_PENDING __vcpu_single_flag(sflags, BIT(4)) +/* PMUSERENR for the guest EL0 is on physical CPU */ +#define PMUSERENR_ON_CPU __vcpu_single_flag(sflags, BIT(5)) +/* WFI instruction trapped */ +#define IN_WFI __vcpu_single_flag(sflags, BIT(6)) +/* KVM is currently emulating a nested ERET */ +#define IN_NESTED_ERET __vcpu_single_flag(sflags, BIT(7)) +/* SError pending for nested guest */ +#define NESTED_SERROR_PENDING __vcpu_single_flag(sflags, BIT(8)) + + /* Pointer to the vcpu's SVE FFR for sve_{save,load}_state() */ #define vcpu_sve_pffr(vcpu) (kern_hyp_va((vcpu)->arch.sve_state) + \ sve_ffr_offset((vcpu)->arch.sve_max_vl)) #define vcpu_sve_max_vq(vcpu) sve_vq_from_vl((vcpu)->arch.sve_max_vl) -#define vcpu_sve_state_size(vcpu) ({ \ +#define vcpu_sve_zcr_elx(vcpu) \ + (unlikely(is_hyp_ctxt(vcpu)) ? ZCR_EL2 : ZCR_EL1) + +#define sve_state_size_from_vl(sve_max_vl) ({ \ size_t __size_ret; \ - unsigned int __vcpu_vq; \ + unsigned int __vq; \ \ - if (WARN_ON(!sve_vl_valid((vcpu)->arch.sve_max_vl))) { \ + if (WARN_ON(!sve_vl_valid(sve_max_vl))) { \ __size_ret = 0; \ } else { \ - __vcpu_vq = vcpu_sve_max_vq(vcpu); \ - __size_ret = SVE_SIG_REGS_SIZE(__vcpu_vq); \ + __vq = sve_vq_from_vl(sve_max_vl); \ + __size_ret = SVE_SIG_REGS_SIZE(__vq); \ } \ \ __size_ret; \ }) -/* vcpu_arch flags field values: */ -#define KVM_ARM64_DEBUG_DIRTY (1 << 0) -#define KVM_ARM64_FP_ENABLED (1 << 1) /* guest FP regs loaded */ -#define KVM_ARM64_FP_HOST (1 << 2) /* host FP regs loaded */ -#define KVM_ARM64_HOST_SVE_ENABLED (1 << 4) /* SVE enabled for EL0 */ -#define KVM_ARM64_GUEST_HAS_SVE (1 << 5) /* SVE exposed to guest */ -#define KVM_ARM64_VCPU_SVE_FINALIZED (1 << 6) /* SVE config completed */ -#define KVM_ARM64_GUEST_HAS_PTRAUTH (1 << 7) /* PTRAUTH exposed to guest */ -#define KVM_ARM64_PENDING_EXCEPTION (1 << 8) /* Exception pending */ -/* - * Overlaps with KVM_ARM64_EXCEPT_MASK on purpose so that it can't be - * set together with an exception... - */ -#define KVM_ARM64_INCREMENT_PC (1 << 9) /* Increment PC */ -#define KVM_ARM64_EXCEPT_MASK (7 << 9) /* Target EL/MODE */ -/* - * When KVM_ARM64_PENDING_EXCEPTION is set, KVM_ARM64_EXCEPT_MASK can - * take the following values: - * - * For AArch32 EL1: - */ -#define KVM_ARM64_EXCEPT_AA32_UND (0 << 9) -#define KVM_ARM64_EXCEPT_AA32_IABT (1 << 9) -#define KVM_ARM64_EXCEPT_AA32_DABT (2 << 9) -/* For AArch64: */ -#define KVM_ARM64_EXCEPT_AA64_ELx_SYNC (0 << 9) -#define KVM_ARM64_EXCEPT_AA64_ELx_IRQ (1 << 9) -#define KVM_ARM64_EXCEPT_AA64_ELx_FIQ (2 << 9) -#define KVM_ARM64_EXCEPT_AA64_ELx_SERR (3 << 9) -#define KVM_ARM64_EXCEPT_AA64_EL1 (0 << 11) -#define KVM_ARM64_EXCEPT_AA64_EL2 (1 << 11) - -#define KVM_ARM64_DEBUG_STATE_SAVE_SPE (1 << 12) /* Save SPE context if active */ -#define KVM_ARM64_DEBUG_STATE_SAVE_TRBE (1 << 13) /* Save TRBE context if active */ -#define KVM_ARM64_FP_FOREIGN_FPSTATE (1 << 14) -#define KVM_ARM64_ON_UNSUPPORTED_CPU (1 << 15) /* Physical CPU not in supported_cpus */ -#define KVM_ARM64_HOST_SME_ENABLED (1 << 16) /* SME enabled for EL0 */ -#define KVM_ARM64_WFIT (1 << 17) /* WFIT instruction trapped */ +#define vcpu_sve_state_size(vcpu) sve_state_size_from_vl((vcpu)->arch.sve_max_vl) #define KVM_GUESTDBG_VALID_MASK (KVM_GUESTDBG_ENABLE | \ KVM_GUESTDBG_USE_SW_BP | \ KVM_GUESTDBG_USE_HW | \ KVM_GUESTDBG_SINGLESTEP) -#define vcpu_has_sve(vcpu) (system_supports_sve() && \ - ((vcpu)->arch.flags & KVM_ARM64_GUEST_HAS_SVE)) +#define kvm_has_sve(kvm) (system_supports_sve() && \ + test_bit(KVM_ARCH_FLAG_GUEST_HAS_SVE, &(kvm)->arch.flags)) + +#ifdef __KVM_NVHE_HYPERVISOR__ +#define vcpu_has_sve(vcpu) kvm_has_sve(kern_hyp_va((vcpu)->kvm)) +#else +#define vcpu_has_sve(vcpu) kvm_has_sve((vcpu)->kvm) +#endif #ifdef CONFIG_ARM64_PTR_AUTH #define vcpu_has_ptrauth(vcpu) \ ((cpus_have_final_cap(ARM64_HAS_ADDRESS_AUTH) || \ cpus_have_final_cap(ARM64_HAS_GENERIC_AUTH)) && \ - (vcpu)->arch.flags & KVM_ARM64_GUEST_HAS_PTRAUTH) + (vcpu_has_feature(vcpu, KVM_ARM_VCPU_PTRAUTH_ADDRESS) || \ + vcpu_has_feature(vcpu, KVM_ARM_VCPU_PTRAUTH_GENERIC))) #else #define vcpu_has_ptrauth(vcpu) false #endif #define vcpu_on_unsupported_cpu(vcpu) \ - ((vcpu)->arch.flags & KVM_ARM64_ON_UNSUPPORTED_CPU) + vcpu_get_flag(vcpu, ON_UNSUPPORTED_CPU) #define vcpu_set_on_unsupported_cpu(vcpu) \ - ((vcpu)->arch.flags |= KVM_ARM64_ON_UNSUPPORTED_CPU) + vcpu_set_flag(vcpu, ON_UNSUPPORTED_CPU) #define vcpu_clear_on_unsupported_cpu(vcpu) \ - ((vcpu)->arch.flags &= ~KVM_ARM64_ON_UNSUPPORTED_CPU) + vcpu_clear_flag(vcpu, ON_UNSUPPORTED_CPU) #define vcpu_gp_regs(v) (&(v)->arch.ctxt.regs) @@ -496,106 +1105,63 @@ struct kvm_vcpu_arch { * accessed by a running VCPU. For example, for userspace access or * for system registers that are never context switched, but only * emulated. + * + * Don't bother with VNCR-based accesses in the nVHE code, it has no + * business dealing with NV. */ -#define __ctxt_sys_reg(c,r) (&(c)->sys_regs[(r)]) - -#define ctxt_sys_reg(c,r) (*__ctxt_sys_reg(c,r)) - -#define __vcpu_sys_reg(v,r) (ctxt_sys_reg(&(v)->arch.ctxt, (r))) +static inline u64 *___ctxt_sys_reg(const struct kvm_cpu_context *ctxt, int r) +{ +#if !defined (__KVM_NVHE_HYPERVISOR__) + if (unlikely(cpus_have_final_cap(ARM64_HAS_NESTED_VIRT) && + r >= __VNCR_START__ && ctxt->vncr_array)) + return &ctxt->vncr_array[r - __VNCR_START__]; +#endif + return (u64 *)&ctxt->sys_regs[r]; +} -u64 vcpu_read_sys_reg(const struct kvm_vcpu *vcpu, int reg); -void vcpu_write_sys_reg(struct kvm_vcpu *vcpu, u64 val, int reg); +#define __ctxt_sys_reg(c,r) \ + ({ \ + BUILD_BUG_ON(__builtin_constant_p(r) && \ + (r) >= NR_SYS_REGS); \ + ___ctxt_sys_reg(c, r); \ + }) -static inline bool __vcpu_read_sys_reg_from_cpu(int reg, u64 *val) -{ - /* - * *** VHE ONLY *** - * - * System registers listed in the switch are not saved on every - * exit from the guest but are only saved on vcpu_put. - * - * Note that MPIDR_EL1 for the guest is set by KVM via VMPIDR_EL2 but - * should never be listed below, because the guest cannot modify its - * own MPIDR_EL1 and MPIDR_EL1 is accessed for VCPU A from VCPU B's - * thread when emulating cross-VCPU communication. - */ - if (!has_vhe()) - return false; +#define ctxt_sys_reg(c,r) (*__ctxt_sys_reg(c,r)) - switch (reg) { - case CSSELR_EL1: *val = read_sysreg_s(SYS_CSSELR_EL1); break; - case SCTLR_EL1: *val = read_sysreg_s(SYS_SCTLR_EL12); break; - case CPACR_EL1: *val = read_sysreg_s(SYS_CPACR_EL12); break; - case TTBR0_EL1: *val = read_sysreg_s(SYS_TTBR0_EL12); break; - case TTBR1_EL1: *val = read_sysreg_s(SYS_TTBR1_EL12); break; - case TCR_EL1: *val = read_sysreg_s(SYS_TCR_EL12); break; - case ESR_EL1: *val = read_sysreg_s(SYS_ESR_EL12); break; - case AFSR0_EL1: *val = read_sysreg_s(SYS_AFSR0_EL12); break; - case AFSR1_EL1: *val = read_sysreg_s(SYS_AFSR1_EL12); break; - case FAR_EL1: *val = read_sysreg_s(SYS_FAR_EL12); break; - case MAIR_EL1: *val = read_sysreg_s(SYS_MAIR_EL12); break; - case VBAR_EL1: *val = read_sysreg_s(SYS_VBAR_EL12); break; - case CONTEXTIDR_EL1: *val = read_sysreg_s(SYS_CONTEXTIDR_EL12);break; - case TPIDR_EL0: *val = read_sysreg_s(SYS_TPIDR_EL0); break; - case TPIDRRO_EL0: *val = read_sysreg_s(SYS_TPIDRRO_EL0); break; - case TPIDR_EL1: *val = read_sysreg_s(SYS_TPIDR_EL1); break; - case AMAIR_EL1: *val = read_sysreg_s(SYS_AMAIR_EL12); break; - case CNTKCTL_EL1: *val = read_sysreg_s(SYS_CNTKCTL_EL12); break; - case ELR_EL1: *val = read_sysreg_s(SYS_ELR_EL12); break; - case PAR_EL1: *val = read_sysreg_par(); break; - case DACR32_EL2: *val = read_sysreg_s(SYS_DACR32_EL2); break; - case IFSR32_EL2: *val = read_sysreg_s(SYS_IFSR32_EL2); break; - case DBGVCR32_EL2: *val = read_sysreg_s(SYS_DBGVCR32_EL2); break; - default: return false; - } +u64 kvm_vcpu_apply_reg_masks(const struct kvm_vcpu *, enum vcpu_sysreg, u64); - return true; -} +#define __vcpu_assign_sys_reg(v, r, val) \ + do { \ + const struct kvm_cpu_context *ctxt = &(v)->arch.ctxt; \ + u64 __v = (val); \ + if (vcpu_has_nv((v)) && (r) >= __SANITISED_REG_START__) \ + __v = kvm_vcpu_apply_reg_masks((v), (r), __v); \ + \ + ctxt_sys_reg(ctxt, (r)) = __v; \ + } while (0) -static inline bool __vcpu_write_sys_reg_to_cpu(u64 val, int reg) -{ - /* - * *** VHE ONLY *** - * - * System registers listed in the switch are not restored on every - * entry to the guest but are only restored on vcpu_load. - * - * Note that MPIDR_EL1 for the guest is set by KVM via VMPIDR_EL2 but - * should never be listed below, because the MPIDR should only be set - * once, before running the VCPU, and never changed later. - */ - if (!has_vhe()) - return false; +#define __vcpu_rmw_sys_reg(v, r, op, val) \ + do { \ + const struct kvm_cpu_context *ctxt = &(v)->arch.ctxt; \ + u64 __v = ctxt_sys_reg(ctxt, (r)); \ + __v op (val); \ + if (vcpu_has_nv((v)) && (r) >= __SANITISED_REG_START__) \ + __v = kvm_vcpu_apply_reg_masks((v), (r), __v); \ + \ + ctxt_sys_reg(ctxt, (r)) = __v; \ + } while (0) - switch (reg) { - case CSSELR_EL1: write_sysreg_s(val, SYS_CSSELR_EL1); break; - case SCTLR_EL1: write_sysreg_s(val, SYS_SCTLR_EL12); break; - case CPACR_EL1: write_sysreg_s(val, SYS_CPACR_EL12); break; - case TTBR0_EL1: write_sysreg_s(val, SYS_TTBR0_EL12); break; - case TTBR1_EL1: write_sysreg_s(val, SYS_TTBR1_EL12); break; - case TCR_EL1: write_sysreg_s(val, SYS_TCR_EL12); break; - case ESR_EL1: write_sysreg_s(val, SYS_ESR_EL12); break; - case AFSR0_EL1: write_sysreg_s(val, SYS_AFSR0_EL12); break; - case AFSR1_EL1: write_sysreg_s(val, SYS_AFSR1_EL12); break; - case FAR_EL1: write_sysreg_s(val, SYS_FAR_EL12); break; - case MAIR_EL1: write_sysreg_s(val, SYS_MAIR_EL12); break; - case VBAR_EL1: write_sysreg_s(val, SYS_VBAR_EL12); break; - case CONTEXTIDR_EL1: write_sysreg_s(val, SYS_CONTEXTIDR_EL12);break; - case TPIDR_EL0: write_sysreg_s(val, SYS_TPIDR_EL0); break; - case TPIDRRO_EL0: write_sysreg_s(val, SYS_TPIDRRO_EL0); break; - case TPIDR_EL1: write_sysreg_s(val, SYS_TPIDR_EL1); break; - case AMAIR_EL1: write_sysreg_s(val, SYS_AMAIR_EL12); break; - case CNTKCTL_EL1: write_sysreg_s(val, SYS_CNTKCTL_EL12); break; - case ELR_EL1: write_sysreg_s(val, SYS_ELR_EL12); break; - case PAR_EL1: write_sysreg_s(val, SYS_PAR_EL1); break; - case DACR32_EL2: write_sysreg_s(val, SYS_DACR32_EL2); break; - case IFSR32_EL2: write_sysreg_s(val, SYS_IFSR32_EL2); break; - case DBGVCR32_EL2: write_sysreg_s(val, SYS_DBGVCR32_EL2); break; - default: return false; - } +#define __vcpu_sys_reg(v,r) \ + ({ \ + const struct kvm_cpu_context *ctxt = &(v)->arch.ctxt; \ + u64 __v = ctxt_sys_reg(ctxt, (r)); \ + if (vcpu_has_nv((v)) && (r) >= __SANITISED_REG_START__) \ + __v = kvm_vcpu_apply_reg_masks((v), (r), __v); \ + __v; \ + }) - return true; -} +u64 vcpu_read_sys_reg(const struct kvm_vcpu *, enum vcpu_sysreg); +void vcpu_write_sys_reg(struct kvm_vcpu *, u64, enum vcpu_sysreg); struct kvm_vm_stat { struct kvm_vm_stat_generic generic; @@ -612,7 +1178,6 @@ struct kvm_vcpu_stat { u64 exits; }; -void kvm_vcpu_preferred_target(struct kvm_vcpu_init *init); unsigned long kvm_arm_num_regs(struct kvm_vcpu *vcpu); int kvm_arm_copy_reg_indices(struct kvm_vcpu *vcpu, u64 __user *indices); int kvm_arm_get_reg(struct kvm_vcpu *vcpu, const struct kvm_one_reg *reg); @@ -620,8 +1185,6 @@ int kvm_arm_set_reg(struct kvm_vcpu *vcpu, const struct kvm_one_reg *reg); unsigned long kvm_arm_num_sys_reg_descs(struct kvm_vcpu *vcpu); int kvm_arm_copy_sys_reg_indices(struct kvm_vcpu *vcpu, u64 __user *uindices); -int kvm_arm_sys_reg_get_reg(struct kvm_vcpu *vcpu, const struct kvm_one_reg *); -int kvm_arm_sys_reg_set_reg(struct kvm_vcpu *vcpu, const struct kvm_one_reg *); int __kvm_arm_vcpu_get_events(struct kvm_vcpu *vcpu, struct kvm_vcpu_events *events); @@ -629,12 +1192,10 @@ int __kvm_arm_vcpu_get_events(struct kvm_vcpu *vcpu, int __kvm_arm_vcpu_set_events(struct kvm_vcpu *vcpu, struct kvm_vcpu_events *events); -#define KVM_ARCH_WANT_MMU_NOTIFIER - void kvm_arm_halt_guest(struct kvm *kvm); void kvm_arm_resume_guest(struct kvm *kvm); -#define vcpu_has_run_once(vcpu) !!rcu_access_pointer((vcpu)->pid) +#define vcpu_has_run_once(vcpu) (!!READ_ONCE((vcpu)->pid)) #ifndef __KVM_NVHE_HYPERVISOR__ #define kvm_call_hyp_nvhe(f, ...) \ @@ -649,9 +1210,8 @@ void kvm_arm_resume_guest(struct kvm *kvm); }) /* - * The couple of isb() below are there to guarantee the same behaviour - * on VHE as on !VHE, where the eret to EL1 acts as a context - * synchronization event. + * The isb() below is there to guarantee the same behaviour on VHE as on !VHE, + * where the eret to EL1 acts as a context synchronization event. */ #define kvm_call_hyp(f, ...) \ do { \ @@ -669,7 +1229,6 @@ void kvm_arm_resume_guest(struct kvm *kvm); \ if (has_vhe()) { \ ret = f(__VA_ARGS__); \ - isb(); \ } else { \ ret = kvm_call_hyp_nvhe(f, ##__VA_ARGS__); \ } \ @@ -682,8 +1241,6 @@ void kvm_arm_resume_guest(struct kvm *kvm); #define kvm_call_hyp_nvhe(f, ...) f(__VA_ARGS__) #endif /* __KVM_NVHE_HYPERVISOR__ */ -void force_vm_exit(const cpumask_t *mask); - int handle_exit(struct kvm_vcpu *vcpu, int exception_index); void handle_exit_early(struct kvm_vcpu *vcpu, int exception_index); @@ -695,9 +1252,16 @@ int kvm_handle_cp15_64(struct kvm_vcpu *vcpu); int kvm_handle_sys_reg(struct kvm_vcpu *vcpu); int kvm_handle_cp10_id(struct kvm_vcpu *vcpu); +void kvm_sys_regs_create_debugfs(struct kvm *kvm); void kvm_reset_sys_regs(struct kvm_vcpu *vcpu); -int kvm_sys_reg_table_init(void); +int __init kvm_sys_reg_table_init(void); +struct sys_reg_desc; +int __init populate_sysreg_config(const struct sys_reg_desc *sr, + unsigned int idx); +int __init populate_nv_trap_config(void); + +void kvm_calculate_traps(struct kvm_vcpu *vcpu); /* MMIO helpers */ void kvm_mmio_write_buf(void *buf, unsigned int len, unsigned long data); @@ -728,28 +1292,71 @@ int kvm_arm_pvtime_get_attr(struct kvm_vcpu *vcpu, int kvm_arm_pvtime_has_attr(struct kvm_vcpu *vcpu, struct kvm_device_attr *attr); -extern unsigned int kvm_arm_vmid_bits; -int kvm_arm_vmid_alloc_init(void); -void kvm_arm_vmid_alloc_free(void); +extern unsigned int __ro_after_init kvm_arm_vmid_bits; +int __init kvm_arm_vmid_alloc_init(void); +void __init kvm_arm_vmid_alloc_free(void); void kvm_arm_vmid_update(struct kvm_vmid *kvm_vmid); void kvm_arm_vmid_clear_active(void); static inline void kvm_arm_pvtime_vcpu_init(struct kvm_vcpu_arch *vcpu_arch) { - vcpu_arch->steal.base = GPA_INVALID; + vcpu_arch->steal.base = INVALID_GPA; } static inline bool kvm_arm_is_pvtime_enabled(struct kvm_vcpu_arch *vcpu_arch) { - return (vcpu_arch->steal.base != GPA_INVALID); + return (vcpu_arch->steal.base != INVALID_GPA); } -void kvm_set_sei_esr(struct kvm_vcpu *vcpu, u64 syndrome); - struct kvm_vcpu *kvm_mpidr_to_vcpu(struct kvm *kvm, unsigned long mpidr); DECLARE_KVM_HYP_PER_CPU(struct kvm_host_data, kvm_host_data); +/* + * How we access per-CPU host data depends on the where we access it from, + * and the mode we're in: + * + * - VHE and nVHE hypervisor bits use their locally defined instance + * + * - the rest of the kernel use either the VHE or nVHE one, depending on + * the mode we're running in. + * + * Unless we're in protected mode, fully deprivileged, and the nVHE + * per-CPU stuff is exclusively accessible to the protected EL2 code. + * In this case, the EL1 code uses the *VHE* data as its private state + * (which makes sense in a way as there shouldn't be any shared state + * between the host and the hypervisor). + * + * Yes, this is all totally trivial. Shoot me now. + */ +#if defined(__KVM_NVHE_HYPERVISOR__) || defined(__KVM_VHE_HYPERVISOR__) +#define host_data_ptr(f) (&this_cpu_ptr(&kvm_host_data)->f) +#else +#define host_data_ptr(f) \ + (static_branch_unlikely(&kvm_protected_mode_initialized) ? \ + &this_cpu_ptr(&kvm_host_data)->f : \ + &this_cpu_ptr_hyp_sym(kvm_host_data)->f) +#endif + +#define host_data_test_flag(flag) \ + (test_bit(KVM_HOST_DATA_FLAG_##flag, host_data_ptr(flags))) +#define host_data_set_flag(flag) \ + set_bit(KVM_HOST_DATA_FLAG_##flag, host_data_ptr(flags)) +#define host_data_clear_flag(flag) \ + clear_bit(KVM_HOST_DATA_FLAG_##flag, host_data_ptr(flags)) + +/* Check whether the FP regs are owned by the guest */ +static inline bool guest_owns_fp_regs(void) +{ + return *host_data_ptr(fp_owner) == FP_STATE_GUEST_OWNED; +} + +/* Check whether the FP regs are owned by the host */ +static inline bool host_owns_fp_regs(void) +{ + return *host_data_ptr(fp_owner) == FP_STATE_HOST_OWNED; +} + static inline void kvm_init_host_cpu_context(struct kvm_cpu_context *cpu_ctxt) { /* The host's MPIDR is immutable, so let's set it up at boot time */ @@ -758,23 +1365,24 @@ static inline void kvm_init_host_cpu_context(struct kvm_cpu_context *cpu_ctxt) static inline bool kvm_system_needs_idmapped_vectors(void) { - return cpus_have_const_cap(ARM64_SPECTRE_V3A); + return cpus_have_final_cap(ARM64_SPECTRE_V3A); } -void kvm_arm_vcpu_ptrauth_trap(struct kvm_vcpu *vcpu); - -static inline void kvm_arch_hardware_unsetup(void) {} -static inline void kvm_arch_sync_events(struct kvm *kvm) {} -static inline void kvm_arch_sched_in(struct kvm_vcpu *vcpu, int cpu) {} - -void kvm_arm_init_debug(void); -void kvm_arm_vcpu_init_debug(struct kvm_vcpu *vcpu); -void kvm_arm_setup_debug(struct kvm_vcpu *vcpu); -void kvm_arm_clear_debug(struct kvm_vcpu *vcpu); -void kvm_arm_reset_debug_ptr(struct kvm_vcpu *vcpu); +void kvm_init_host_debug_data(void); +void kvm_vcpu_load_debug(struct kvm_vcpu *vcpu); +void kvm_vcpu_put_debug(struct kvm_vcpu *vcpu); +void kvm_debug_set_guest_ownership(struct kvm_vcpu *vcpu); +void kvm_debug_handle_oslar(struct kvm_vcpu *vcpu, u64 val); #define kvm_vcpu_os_lock_enabled(vcpu) \ - (!!(__vcpu_sys_reg(vcpu, OSLSR_EL1) & SYS_OSLSR_OSLK)) + (!!(__vcpu_sys_reg(vcpu, OSLSR_EL1) & OSLSR_EL1_OSLK)) + +#define kvm_debug_regs_in_use(vcpu) \ + ((vcpu)->arch.debug_owner != VCPU_DEBUG_FREE) +#define kvm_host_owns_debug_regs(vcpu) \ + ((vcpu)->arch.debug_owner == VCPU_DEBUG_HOST_OWNED) +#define kvm_guest_owns_debug_regs(vcpu) \ + ((vcpu)->arch.debug_owner == VCPU_DEBUG_GUEST_OWNED) int kvm_arm_vcpu_arch_set_attr(struct kvm_vcpu *vcpu, struct kvm_device_attr *attr); @@ -783,61 +1391,86 @@ int kvm_arm_vcpu_arch_get_attr(struct kvm_vcpu *vcpu, int kvm_arm_vcpu_arch_has_attr(struct kvm_vcpu *vcpu, struct kvm_device_attr *attr); -long kvm_vm_ioctl_mte_copy_tags(struct kvm *kvm, - struct kvm_arm_copy_mte_tags *copy_tags); +int kvm_vm_ioctl_mte_copy_tags(struct kvm *kvm, + struct kvm_arm_copy_mte_tags *copy_tags); +int kvm_vm_ioctl_set_counter_offset(struct kvm *kvm, + struct kvm_arm_counter_offset *offset); +int kvm_vm_ioctl_get_reg_writable_masks(struct kvm *kvm, + struct reg_mask_range *range); /* Guest/host FPSIMD coordination helpers */ -int kvm_arch_vcpu_run_map_fp(struct kvm_vcpu *vcpu); void kvm_arch_vcpu_load_fp(struct kvm_vcpu *vcpu); void kvm_arch_vcpu_ctxflush_fp(struct kvm_vcpu *vcpu); void kvm_arch_vcpu_ctxsync_fp(struct kvm_vcpu *vcpu); void kvm_arch_vcpu_put_fp(struct kvm_vcpu *vcpu); -void kvm_vcpu_unshare_task_fp(struct kvm_vcpu *vcpu); static inline bool kvm_pmu_counter_deferred(struct perf_event_attr *attr) { return (!has_vhe() && attr->exclude_host); } -/* Flags for host debug state */ -void kvm_arch_vcpu_load_debug_state_flags(struct kvm_vcpu *vcpu); -void kvm_arch_vcpu_put_debug_state_flags(struct kvm_vcpu *vcpu); - #ifdef CONFIG_KVM -void kvm_set_pmu_events(u32 set, struct perf_event_attr *attr); -void kvm_clr_pmu_events(u32 clr); +void kvm_set_pmu_events(u64 set, struct perf_event_attr *attr); +void kvm_clr_pmu_events(u64 clr); +bool kvm_set_pmuserenr(u64 val); +void kvm_enable_trbe(void); +void kvm_disable_trbe(void); +void kvm_tracing_set_el1_configuration(u64 trfcr_while_in_guest); #else -static inline void kvm_set_pmu_events(u32 set, struct perf_event_attr *attr) {} -static inline void kvm_clr_pmu_events(u32 clr) {} +static inline void kvm_set_pmu_events(u64 set, struct perf_event_attr *attr) {} +static inline void kvm_clr_pmu_events(u64 clr) {} +static inline bool kvm_set_pmuserenr(u64 val) +{ + return false; +} +static inline void kvm_enable_trbe(void) {} +static inline void kvm_disable_trbe(void) {} +static inline void kvm_tracing_set_el1_configuration(u64 trfcr_while_in_guest) {} #endif -void kvm_vcpu_load_sysregs_vhe(struct kvm_vcpu *vcpu); -void kvm_vcpu_put_sysregs_vhe(struct kvm_vcpu *vcpu); +void kvm_vcpu_load_vhe(struct kvm_vcpu *vcpu); +void kvm_vcpu_put_vhe(struct kvm_vcpu *vcpu); -int kvm_set_ipa_limit(void); +int __init kvm_set_ipa_limit(void); +u32 kvm_get_pa_bits(struct kvm *kvm); #define __KVM_HAVE_ARCH_VM_ALLOC struct kvm *kvm_arch_alloc_vm(void); -int kvm_arm_setup_stage2(struct kvm *kvm, unsigned long type); +#define __KVM_HAVE_ARCH_FLUSH_REMOTE_TLBS -static inline bool kvm_vm_is_protected(struct kvm *kvm) -{ - return false; -} +#define __KVM_HAVE_ARCH_FLUSH_REMOTE_TLBS_RANGE + +#define kvm_vm_is_protected(kvm) (is_protected_kvm_enabled() && (kvm)->arch.pkvm.enabled) -void kvm_init_protected_traps(struct kvm_vcpu *vcpu); +#define vcpu_is_protected(vcpu) kvm_vm_is_protected((vcpu)->kvm) int kvm_arm_vcpu_finalize(struct kvm_vcpu *vcpu, int feature); bool kvm_arm_vcpu_is_finalized(struct kvm_vcpu *vcpu); -#define kvm_arm_vcpu_sve_finalized(vcpu) \ - ((vcpu)->arch.flags & KVM_ARM64_VCPU_SVE_FINALIZED) +#define kvm_arm_vcpu_sve_finalized(vcpu) vcpu_get_flag(vcpu, VCPU_SVE_FINALIZED) #define kvm_has_mte(kvm) \ (system_supports_mte() && \ test_bit(KVM_ARCH_FLAG_MTE_ENABLED, &(kvm)->arch.flags)) +#define kvm_supports_32bit_el0() \ + (system_supports_32bit_el0() && \ + !static_branch_unlikely(&arm64_mismatched_32bit_el0)) + +#define kvm_vm_has_ran_once(kvm) \ + (test_bit(KVM_ARCH_FLAG_HAS_RAN_ONCE, &(kvm)->arch.flags)) + +static inline bool __vcpu_has_feature(const struct kvm_arch *ka, int feature) +{ + return test_bit(feature, ka->vcpu_features); +} + +#define kvm_vcpu_has_feature(k, f) __vcpu_has_feature(&(k)->arch, (f)) +#define vcpu_has_feature(v, f) __vcpu_has_feature(&(v)->kvm->arch, (f)) + +#define kvm_vcpu_initialized(v) vcpu_get_flag(vcpu, VCPU_INITIALIZED) + int kvm_trng_call(struct kvm_vcpu *vcpu); #ifdef CONFIG_KVM extern phys_addr_t hyp_mem_base; @@ -850,4 +1483,121 @@ static inline void kvm_hyp_reserve(void) { } void kvm_arm_vcpu_power_off(struct kvm_vcpu *vcpu); bool kvm_arm_vcpu_stopped(struct kvm_vcpu *vcpu); +static inline u64 *__vm_id_reg(struct kvm_arch *ka, u32 reg) +{ + switch (reg) { + case sys_reg(3, 0, 0, 1, 0) ... sys_reg(3, 0, 0, 7, 7): + return &ka->id_regs[IDREG_IDX(reg)]; + case SYS_CTR_EL0: + return &ka->ctr_el0; + case SYS_MIDR_EL1: + return &ka->midr_el1; + case SYS_REVIDR_EL1: + return &ka->revidr_el1; + case SYS_AIDR_EL1: + return &ka->aidr_el1; + default: + WARN_ON_ONCE(1); + return NULL; + } +} + +#define kvm_read_vm_id_reg(kvm, reg) \ + ({ u64 __val = *__vm_id_reg(&(kvm)->arch, reg); __val; }) + +void kvm_set_vm_id_reg(struct kvm *kvm, u32 reg, u64 val); + +#define __expand_field_sign_unsigned(id, fld, val) \ + ((u64)SYS_FIELD_VALUE(id, fld, val)) + +#define __expand_field_sign_signed(id, fld, val) \ + ({ \ + u64 __val = SYS_FIELD_VALUE(id, fld, val); \ + sign_extend64(__val, id##_##fld##_WIDTH - 1); \ + }) + +#define get_idreg_field_unsigned(kvm, id, fld) \ + ({ \ + u64 __val = kvm_read_vm_id_reg((kvm), SYS_##id); \ + FIELD_GET(id##_##fld##_MASK, __val); \ + }) + +#define get_idreg_field_signed(kvm, id, fld) \ + ({ \ + u64 __val = get_idreg_field_unsigned(kvm, id, fld); \ + sign_extend64(__val, id##_##fld##_WIDTH - 1); \ + }) + +#define get_idreg_field_enum(kvm, id, fld) \ + get_idreg_field_unsigned(kvm, id, fld) + +#define kvm_cmp_feat_signed(kvm, id, fld, op, limit) \ + (get_idreg_field_signed((kvm), id, fld) op __expand_field_sign_signed(id, fld, limit)) + +#define kvm_cmp_feat_unsigned(kvm, id, fld, op, limit) \ + (get_idreg_field_unsigned((kvm), id, fld) op __expand_field_sign_unsigned(id, fld, limit)) + +#define kvm_cmp_feat(kvm, id, fld, op, limit) \ + (id##_##fld##_SIGNED ? \ + kvm_cmp_feat_signed(kvm, id, fld, op, limit) : \ + kvm_cmp_feat_unsigned(kvm, id, fld, op, limit)) + +#define __kvm_has_feat(kvm, id, fld, limit) \ + kvm_cmp_feat(kvm, id, fld, >=, limit) + +#define kvm_has_feat(kvm, ...) __kvm_has_feat(kvm, __VA_ARGS__) + +#define __kvm_has_feat_enum(kvm, id, fld, val) \ + kvm_cmp_feat_unsigned(kvm, id, fld, ==, val) + +#define kvm_has_feat_enum(kvm, ...) __kvm_has_feat_enum(kvm, __VA_ARGS__) + +#define kvm_has_feat_range(kvm, id, fld, min, max) \ + (kvm_cmp_feat(kvm, id, fld, >=, min) && \ + kvm_cmp_feat(kvm, id, fld, <=, max)) + +/* Check for a given level of PAuth support */ +#define kvm_has_pauth(k, l) \ + ({ \ + bool pa, pi, pa3; \ + \ + pa = kvm_has_feat((k), ID_AA64ISAR1_EL1, APA, l); \ + pa &= kvm_has_feat((k), ID_AA64ISAR1_EL1, GPA, IMP); \ + pi = kvm_has_feat((k), ID_AA64ISAR1_EL1, API, l); \ + pi &= kvm_has_feat((k), ID_AA64ISAR1_EL1, GPI, IMP); \ + pa3 = kvm_has_feat((k), ID_AA64ISAR2_EL1, APA3, l); \ + pa3 &= kvm_has_feat((k), ID_AA64ISAR2_EL1, GPA3, IMP); \ + \ + (pa + pi + pa3) == 1; \ + }) + +#define kvm_has_fpmr(k) \ + (system_supports_fpmr() && \ + kvm_has_feat((k), ID_AA64PFR2_EL1, FPMR, IMP)) + +#define kvm_has_tcr2(k) \ + (kvm_has_feat((k), ID_AA64MMFR3_EL1, TCRX, IMP)) + +#define kvm_has_s1pie(k) \ + (kvm_has_feat((k), ID_AA64MMFR3_EL1, S1PIE, IMP)) + +#define kvm_has_s1poe(k) \ + (kvm_has_feat((k), ID_AA64MMFR3_EL1, S1POE, IMP)) + +#define kvm_has_ras(k) \ + (kvm_has_feat((k), ID_AA64PFR0_EL1, RAS, IMP)) + +#define kvm_has_sctlr2(k) \ + (kvm_has_feat((k), ID_AA64MMFR3_EL1, SCTLRX, IMP)) + +static inline bool kvm_arch_has_irq_bypass(void) +{ + return true; +} + +void compute_fgu(struct kvm *kvm, enum fgt_group_id fgt); +void get_reg_fixed_bits(struct kvm *kvm, enum vcpu_sysreg reg, u64 *res0, u64 *res1); +void check_feature_map(void); + + #endif /* __ARM64_KVM_HOST_H__ */ diff --git a/arch/arm64/include/asm/kvm_hyp.h b/arch/arm64/include/asm/kvm_hyp.h index aa7fa2a08f06..e6be1f5d0967 100644 --- a/arch/arm64/include/asm/kvm_hyp.h +++ b/arch/arm64/include/asm/kvm_hyp.h @@ -16,12 +16,35 @@ DECLARE_PER_CPU(struct kvm_cpu_context, kvm_hyp_ctxt); DECLARE_PER_CPU(unsigned long, kvm_hyp_vector); DECLARE_PER_CPU(struct kvm_nvhe_init_params, kvm_init_params); +/* + * Unified accessors for registers that have a different encoding + * between VHE and non-VHE. They must be specified without their "ELx" + * encoding, but with the SYS_ prefix, as defined in asm/sysreg.h. + */ + +#if defined(__KVM_VHE_HYPERVISOR__) + +#define read_sysreg_el0(r) read_sysreg_s(r##_EL02) +#define write_sysreg_el0(v,r) write_sysreg_s(v, r##_EL02) +#define read_sysreg_el1(r) read_sysreg_s(r##_EL12) +#define write_sysreg_el1(v,r) write_sysreg_s(v, r##_EL12) +#define read_sysreg_el2(r) read_sysreg_s(r##_EL1) +#define write_sysreg_el2(v,r) write_sysreg_s(v, r##_EL1) + +#else // !__KVM_VHE_HYPERVISOR__ + +#if defined(__KVM_NVHE_HYPERVISOR__) +#define VHE_ALT_KEY ARM64_KVM_HVHE +#else +#define VHE_ALT_KEY ARM64_HAS_VIRT_HOST_EXTN +#endif + #define read_sysreg_elx(r,nvh,vh) \ ({ \ u64 reg; \ - asm volatile(ALTERNATIVE(__mrs_s("%0", r##nvh), \ + asm volatile(ALTERNATIVE(__mrs_s("%0", r##nvh), \ __mrs_s("%0", r##vh), \ - ARM64_HAS_VIRT_HOST_EXTN) \ + VHE_ALT_KEY) \ : "=r" (reg)); \ reg; \ }) @@ -31,16 +54,10 @@ DECLARE_PER_CPU(struct kvm_nvhe_init_params, kvm_init_params); u64 __val = (u64)(v); \ asm volatile(ALTERNATIVE(__msr_s(r##nvh, "%x0"), \ __msr_s(r##vh, "%x0"), \ - ARM64_HAS_VIRT_HOST_EXTN) \ + VHE_ALT_KEY) \ : : "rZ" (__val)); \ } while (0) -/* - * Unified accessors for registers that have a different encoding - * between VHE and non-VHE. They must be specified without their "ELx" - * encoding, but with the SYS_ prefix, as defined in asm/sysreg.h. - */ - #define read_sysreg_el0(r) read_sysreg_elx(r, _EL0, _EL02) #define write_sysreg_el0(v,r) write_sysreg_elx(v, r, _EL0, _EL02) #define read_sysreg_el1(r) read_sysreg_elx(r, _EL1, _EL12) @@ -48,21 +65,25 @@ DECLARE_PER_CPU(struct kvm_nvhe_init_params, kvm_init_params); #define read_sysreg_el2(r) read_sysreg_elx(r, _EL2, _EL1) #define write_sysreg_el2(v,r) write_sysreg_elx(v, r, _EL2, _EL1) +#endif // __KVM_VHE_HYPERVISOR__ + /* * Without an __arch_swab32(), we fall back to ___constant_swab32(), but the * static inline can allow the compiler to out-of-line this. KVM always wants - * the macro version as its always inlined. + * the macro version as it's always inlined. */ #define __kvm_swab32(x) ___constant_swab32(x) int __vgic_v2_perform_cpuif_access(struct kvm_vcpu *vcpu); +u64 __gic_v3_get_lr(unsigned int lr); + void __vgic_v3_save_state(struct vgic_v3_cpu_if *cpu_if); void __vgic_v3_restore_state(struct vgic_v3_cpu_if *cpu_if); void __vgic_v3_activate_traps(struct vgic_v3_cpu_if *cpu_if); void __vgic_v3_deactivate_traps(struct vgic_v3_cpu_if *cpu_if); -void __vgic_v3_save_aprs(struct vgic_v3_cpu_if *cpu_if); -void __vgic_v3_restore_aprs(struct vgic_v3_cpu_if *cpu_if); +void __vgic_v3_save_vmcr_aprs(struct vgic_v3_cpu_if *cpu_if); +void __vgic_v3_restore_vmcr_aprs(struct vgic_v3_cpu_if *cpu_if); int __vgic_v3_perform_cpuif_access(struct kvm_vcpu *vcpu); #ifdef __KVM_NVHE_HYPERVISOR__ @@ -74,6 +95,8 @@ void __timer_disable_traps(struct kvm_vcpu *vcpu); void __sysreg_save_state_nvhe(struct kvm_cpu_context *ctxt); void __sysreg_restore_state_nvhe(struct kvm_cpu_context *ctxt); #else +void __vcpu_load_switch_sysregs(struct kvm_vcpu *vcpu); +void __vcpu_put_switch_sysregs(struct kvm_vcpu *vcpu); void sysreg_save_host_state_vhe(struct kvm_cpu_context *ctxt); void sysreg_restore_host_state_vhe(struct kvm_cpu_context *ctxt); void sysreg_save_guest_state_vhe(struct kvm_cpu_context *ctxt); @@ -90,16 +113,12 @@ void __debug_restore_host_buffers_nvhe(struct kvm_vcpu *vcpu); void __fpsimd_save_state(struct user_fpsimd_state *fp_regs); void __fpsimd_restore_state(struct user_fpsimd_state *fp_regs); -void __sve_restore_state(void *sve_pffr, u32 *fpsr); - -#ifndef __KVM_NVHE_HYPERVISOR__ -void activate_traps_vhe_load(struct kvm_vcpu *vcpu); -void deactivate_traps_vhe_put(struct kvm_vcpu *vcpu); -#endif +void __sve_save_state(void *sve_pffr, u32 *fpsr, int save_ffr); +void __sve_restore_state(void *sve_pffr, u32 *fpsr, int restore_ffr); u64 __guest_enter(struct kvm_vcpu *vcpu); -bool kvm_host_psci_handler(struct kvm_cpu_context *host_ctxt); +bool kvm_host_psci_handler(struct kvm_cpu_context *host_ctxt, u32 func_id); #ifdef __KVM_NVHE_HYPERVISOR__ void __noreturn __hyp_do_panic(struct kvm_cpu_context *host_ctxt, u64 spsr, @@ -107,8 +126,8 @@ void __noreturn __hyp_do_panic(struct kvm_cpu_context *host_ctxt, u64 spsr, #endif #ifdef __KVM_NVHE_HYPERVISOR__ -void __pkvm_init_switch_pgd(phys_addr_t phys, unsigned long size, - phys_addr_t pgd, void *sp, void *cont_fn); +void __pkvm_init_switch_pgd(phys_addr_t pgd, unsigned long sp, + void (*fn)(void)); int __pkvm_init(phys_addr_t phys, unsigned long size, unsigned long nr_cpus, unsigned long *per_cpu_base, u32 hyp_va_bits); void __noreturn __host_enter(struct kvm_cpu_context *host_ctxt); @@ -122,5 +141,10 @@ extern u64 kvm_nvhe_sym(id_aa64isar2_el1_sys_val); extern u64 kvm_nvhe_sym(id_aa64mmfr0_el1_sys_val); extern u64 kvm_nvhe_sym(id_aa64mmfr1_el1_sys_val); extern u64 kvm_nvhe_sym(id_aa64mmfr2_el1_sys_val); +extern u64 kvm_nvhe_sym(id_aa64smfr0_el1_sys_val); + +extern unsigned long kvm_nvhe_sym(__icache_flags); +extern unsigned int kvm_nvhe_sym(kvm_arm_vmid_bits); +extern unsigned int kvm_nvhe_sym(kvm_host_sve_max_vl); #endif /* __ARM64_KVM_HYP_H__ */ diff --git a/arch/arm64/include/asm/kvm_mmu.h b/arch/arm64/include/asm/kvm_mmu.h index b208da3bebec..e4069f2ce642 100644 --- a/arch/arm64/include/asm/kvm_mmu.h +++ b/arch/arm64/include/asm/kvm_mmu.h @@ -54,25 +54,6 @@ #include <asm/alternative.h> /* - * Convert a kernel VA into a HYP VA. - * reg: VA to be converted. - * - * The actual code generation takes place in kvm_update_va_mask, and - * the instructions below are only there to reserve the space and - * perform the register allocation (kvm_update_va_mask uses the - * specific registers encoded in the instructions). - */ -.macro kern_hyp_va reg -alternative_cb kvm_update_va_mask - and \reg, \reg, #1 /* mask with va_mask */ - ror \reg, \reg, #1 /* rotate to the first tag bit */ - add \reg, \reg, #0 /* insert the low 12 bits of the tag */ - add \reg, \reg, #0, lsl 12 /* insert the top 12 bits of the tag */ - ror \reg, \reg, #63 /* rotate back */ -alternative_cb_end -.endm - -/* * Convert a hypervisor VA to a PA * reg: hypervisor address to be converted in place * tmp: temporary register @@ -97,7 +78,7 @@ alternative_cb_end hyp_pa \reg, \tmp /* Load kimage_voffset. */ -alternative_cb kvm_get_kimage_voffset +alternative_cb ARM64_ALWAYS_SYSTEM, kvm_get_kimage_voffset movz \tmp, #0 movk \tmp, #0, lsl #16 movk \tmp, #0, lsl #32 @@ -115,7 +96,9 @@ alternative_cb_end #include <asm/cache.h> #include <asm/cacheflush.h> #include <asm/mmu_context.h> +#include <asm/kvm_emulate.h> #include <asm/kvm_host.h> +#include <asm/kvm_nested.h> void kvm_update_va_mask(struct alt_instr *alt, __le32 *origptr, __le32 *updptr, int nr_inst); @@ -124,29 +107,49 @@ void kvm_apply_hyp_relocations(void); #define __hyp_pa(x) (((phys_addr_t)(x)) + hyp_physvirt_offset) +/* + * Convert a kernel VA into a HYP VA. + * + * Can be called from hyp or non-hyp context. + * + * The actual code generation takes place in kvm_update_va_mask(), and + * the instructions below are only there to reserve the space and + * perform the register allocation (kvm_update_va_mask() uses the + * specific registers encoded in the instructions). + */ static __always_inline unsigned long __kern_hyp_va(unsigned long v) { - asm volatile(ALTERNATIVE_CB("and %0, %0, #1\n" - "ror %0, %0, #1\n" - "add %0, %0, #0\n" - "add %0, %0, #0, lsl 12\n" - "ror %0, %0, #63\n", +/* + * This #ifndef is an optimisation for when this is called from VHE hyp + * context. When called from a VHE non-hyp context, kvm_update_va_mask() will + * replace the instructions with `nop`s. + */ +#ifndef __KVM_VHE_HYPERVISOR__ + asm volatile(ALTERNATIVE_CB("and %0, %0, #1\n" /* mask with va_mask */ + "ror %0, %0, #1\n" /* rotate to the first tag bit */ + "add %0, %0, #0\n" /* insert the low 12 bits of the tag */ + "add %0, %0, #0, lsl 12\n" /* insert the top 12 bits of the tag */ + "ror %0, %0, #63\n", /* rotate back */ + ARM64_ALWAYS_SYSTEM, kvm_update_va_mask) : "+r" (v)); +#endif return v; } #define kern_hyp_va(v) ((typeof(v))(__kern_hyp_va((unsigned long)(v)))) +extern u32 __hyp_va_bits; + /* * We currently support using a VM-specified IPA size. For backward * compatibility, the default IPA size is fixed to 40bits. */ #define KVM_PHYS_SHIFT (40) -#define kvm_phys_shift(kvm) VTCR_EL2_IPA(kvm->arch.vtcr) -#define kvm_phys_size(kvm) (_AC(1, ULL) << kvm_phys_shift(kvm)) -#define kvm_phys_mask(kvm) (kvm_phys_size(kvm) - _AC(1, ULL)) +#define kvm_phys_shift(mmu) VTCR_EL2_IPA((mmu)->vtcr) +#define kvm_phys_size(mmu) (_AC(1, ULL) << kvm_phys_shift(mmu)) +#define kvm_phys_mask(mmu) (kvm_phys_size(mmu) - _AC(1, ULL)) #include <asm/kvm_pgtable.h> #include <asm/stage2_pgtable.h> @@ -162,19 +165,27 @@ int create_hyp_io_mappings(phys_addr_t phys_addr, size_t size, void __iomem **haddr); int create_hyp_exec_mappings(phys_addr_t phys_addr, size_t size, void **haddr); -void free_hyp_pgds(void); +int create_hyp_stack(phys_addr_t phys_addr, unsigned long *haddr); +void __init free_hyp_pgds(void); + +void kvm_stage2_unmap_range(struct kvm_s2_mmu *mmu, phys_addr_t start, + u64 size, bool may_block); +void kvm_stage2_flush_range(struct kvm_s2_mmu *mmu, phys_addr_t addr, phys_addr_t end); +void kvm_stage2_wp_range(struct kvm_s2_mmu *mmu, phys_addr_t addr, phys_addr_t end); void stage2_unmap_vm(struct kvm *kvm); -int kvm_init_stage2_mmu(struct kvm *kvm, struct kvm_s2_mmu *mmu); +int kvm_init_stage2_mmu(struct kvm *kvm, struct kvm_s2_mmu *mmu, unsigned long type); +void kvm_uninit_stage2_mmu(struct kvm *kvm); void kvm_free_stage2_pgd(struct kvm_s2_mmu *mmu); int kvm_phys_addr_ioremap(struct kvm *kvm, phys_addr_t guest_ipa, phys_addr_t pa, unsigned long size, bool writable); +int kvm_handle_guest_sea(struct kvm_vcpu *vcpu); int kvm_handle_guest_abort(struct kvm_vcpu *vcpu); phys_addr_t kvm_mmu_get_httbr(void); phys_addr_t kvm_get_idmap_vector(void); -int kvm_mmu_init(u32 *hyp_va_bits); +int __init kvm_mmu_init(u32 *hyp_va_bits); static inline void *__kvm_vector_slot2addr(void *base, enum arm64_hyp_spectre_vector slot) @@ -191,7 +202,15 @@ struct kvm; static inline bool vcpu_has_cache_enabled(struct kvm_vcpu *vcpu) { - return (vcpu_read_sys_reg(vcpu, SCTLR_EL1) & 0b101) == 0b101; + u64 cache_bits = SCTLR_ELx_M | SCTLR_ELx_C; + int reg; + + if (vcpu_is_el2(vcpu)) + reg = SCTLR_EL2; + else + reg = SCTLR_EL1; + + return (vcpu_read_sys_reg(vcpu, reg) & cache_bits) == cache_bits; } static inline void __clean_dcache_guest_page(void *va, size_t size) @@ -202,21 +221,40 @@ static inline void __clean_dcache_guest_page(void *va, size_t size) * faulting in pages. Furthermore, FWB implies IDC, so cleaning to * PoU is not required either in this case. */ - if (cpus_have_const_cap(ARM64_HAS_STAGE2_FWB)) + if (cpus_have_final_cap(ARM64_HAS_STAGE2_FWB)) return; kvm_flush_dcache_to_poc(va, size); } +static inline size_t __invalidate_icache_max_range(void) +{ + u8 iminline; + u64 ctr; + + asm volatile(ALTERNATIVE_CB("movz %0, #0\n" + "movk %0, #0, lsl #16\n" + "movk %0, #0, lsl #32\n" + "movk %0, #0, lsl #48\n", + ARM64_ALWAYS_SYSTEM, + kvm_compute_final_ctr_el0) + : "=r" (ctr)); + + iminline = SYS_FIELD_GET(CTR_EL0, IminLine, ctr) + 2; + return MAX_DVM_OPS << iminline; +} + static inline void __invalidate_icache_guest_page(void *va, size_t size) { - if (icache_is_aliasing()) { - /* any kind of VIPT cache */ + /* + * Blow the whole I-cache if it is aliasing (i.e. VIPT) or the + * invalidation range exceeds our arbitrary limit on invadations by + * cache line. + */ + if (icache_is_aliasing() || size > __invalidate_icache_max_range()) icache_inval_all_pou(); - } else if (is_kernel_in_hyp_mode() || !icache_is_vpipt()) { - /* PIPT or VPIPT at EL2 (see comment in __kvm_tlb_flush_vmid_ipa) */ + else icache_inval_pou((unsigned long)va, (unsigned long)va + size); - } } void kvm_set_way_flush(struct kvm_vcpu *vcpu); @@ -282,7 +320,7 @@ static __always_inline u64 kvm_get_vttbr(struct kvm_s2_mmu *mmu) static __always_inline void __load_stage2(struct kvm_s2_mmu *mmu, struct kvm_arch *arch) { - write_sysreg(arch->vtcr, vtcr_el2); + write_sysreg(mmu->vtcr, vtcr_el2); write_sysreg(kvm_get_vttbr(mmu), vttbr_el2); /* @@ -297,5 +335,66 @@ static inline struct kvm *kvm_s2_mmu_to_kvm(struct kvm_s2_mmu *mmu) { return container_of(mmu->arch, struct kvm, arch); } + +static inline u64 get_vmid(u64 vttbr) +{ + return (vttbr & VTTBR_VMID_MASK(kvm_get_vmid_bits())) >> + VTTBR_VMID_SHIFT; +} + +static inline bool kvm_s2_mmu_valid(struct kvm_s2_mmu *mmu) +{ + return !(mmu->tlb_vttbr & VTTBR_CNP_BIT); +} + +static inline bool kvm_is_nested_s2_mmu(struct kvm *kvm, struct kvm_s2_mmu *mmu) +{ + /* + * Be careful, mmu may not be fully initialised so do look at + * *any* of its fields. + */ + return &kvm->arch.mmu != mmu; +} + +static inline void kvm_fault_lock(struct kvm *kvm) +{ + if (is_protected_kvm_enabled()) + write_lock(&kvm->mmu_lock); + else + read_lock(&kvm->mmu_lock); +} + +static inline void kvm_fault_unlock(struct kvm *kvm) +{ + if (is_protected_kvm_enabled()) + write_unlock(&kvm->mmu_lock); + else + read_unlock(&kvm->mmu_lock); +} + +/* + * ARM64 KVM relies on a simple conversion from physaddr to a kernel + * virtual address (KVA) when it does cache maintenance as the CMO + * instructions work on virtual addresses. This is incompatible with + * VM_PFNMAP VMAs which may not have a kernel direct mapping to a + * virtual address. + * + * With S2FWB and CACHE DIC features, KVM need not do cache flushing + * and CMOs are NOP'd. This has the effect of no longer requiring a + * KVA for addresses mapped into the S2. The presence of these features + * are thus necessary to support cacheable S2 mapping of VM_PFNMAP. + */ +static inline bool kvm_supports_cacheable_pfnmap(void) +{ + return cpus_have_final_cap(ARM64_HAS_STAGE2_FWB) && + cpus_have_final_cap(ARM64_HAS_CACHE_DIC); +} + +#ifdef CONFIG_PTDUMP_STAGE2_DEBUGFS +void kvm_s2_ptdump_create_debugfs(struct kvm *kvm); +#else +static inline void kvm_s2_ptdump_create_debugfs(struct kvm *kvm) {} +#endif /* CONFIG_PTDUMP_STAGE2_DEBUGFS */ + #endif /* __ASSEMBLY__ */ #endif /* __ARM64_KVM_MMU_H__ */ diff --git a/arch/arm64/include/asm/kvm_nested.h b/arch/arm64/include/asm/kvm_nested.h new file mode 100644 index 000000000000..7fd76f41c296 --- /dev/null +++ b/arch/arm64/include/asm/kvm_nested.h @@ -0,0 +1,350 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef __ARM64_KVM_NESTED_H +#define __ARM64_KVM_NESTED_H + +#include <linux/bitfield.h> +#include <linux/kvm_host.h> +#include <asm/kvm_emulate.h> +#include <asm/kvm_pgtable.h> + +static inline bool vcpu_has_nv(const struct kvm_vcpu *vcpu) +{ + return (!__is_defined(__KVM_NVHE_HYPERVISOR__) && + cpus_have_final_cap(ARM64_HAS_NESTED_VIRT) && + vcpu_has_feature(vcpu, KVM_ARM_VCPU_HAS_EL2)); +} + +/* Translation helpers from non-VHE EL2 to EL1 */ +static inline u64 tcr_el2_ps_to_tcr_el1_ips(u64 tcr_el2) +{ + return (u64)FIELD_GET(TCR_EL2_PS_MASK, tcr_el2) << TCR_IPS_SHIFT; +} + +static inline u64 translate_tcr_el2_to_tcr_el1(u64 tcr) +{ + return TCR_EPD1_MASK | /* disable TTBR1_EL1 */ + ((tcr & TCR_EL2_TBI) ? TCR_TBI0 : 0) | + tcr_el2_ps_to_tcr_el1_ips(tcr) | + (tcr & TCR_EL2_TG0_MASK) | + (tcr & TCR_EL2_ORGN0_MASK) | + (tcr & TCR_EL2_IRGN0_MASK) | + (tcr & TCR_EL2_T0SZ_MASK); +} + +static inline u64 translate_cptr_el2_to_cpacr_el1(u64 cptr_el2) +{ + u64 cpacr_el1 = CPACR_EL1_RES1; + + if (cptr_el2 & CPTR_EL2_TTA) + cpacr_el1 |= CPACR_EL1_TTA; + if (!(cptr_el2 & CPTR_EL2_TFP)) + cpacr_el1 |= CPACR_EL1_FPEN; + if (!(cptr_el2 & CPTR_EL2_TZ)) + cpacr_el1 |= CPACR_EL1_ZEN; + + cpacr_el1 |= cptr_el2 & (CPTR_EL2_TCPAC | CPTR_EL2_TAM); + + return cpacr_el1; +} + +static inline u64 translate_sctlr_el2_to_sctlr_el1(u64 val) +{ + /* Only preserve the minimal set of bits we support */ + val &= (SCTLR_ELx_M | SCTLR_ELx_A | SCTLR_ELx_C | SCTLR_ELx_SA | + SCTLR_ELx_I | SCTLR_ELx_IESB | SCTLR_ELx_WXN | SCTLR_ELx_EE); + val |= SCTLR_EL1_RES1; + + return val; +} + +static inline u64 translate_ttbr0_el2_to_ttbr0_el1(u64 ttbr0) +{ + /* Clear the ASID field */ + return ttbr0 & ~GENMASK_ULL(63, 48); +} + +extern bool forward_smc_trap(struct kvm_vcpu *vcpu); +extern bool forward_debug_exception(struct kvm_vcpu *vcpu); +extern void kvm_init_nested(struct kvm *kvm); +extern int kvm_vcpu_init_nested(struct kvm_vcpu *vcpu); +extern void kvm_init_nested_s2_mmu(struct kvm_s2_mmu *mmu); +extern struct kvm_s2_mmu *lookup_s2_mmu(struct kvm_vcpu *vcpu); + +union tlbi_info; + +extern void kvm_s2_mmu_iterate_by_vmid(struct kvm *kvm, u16 vmid, + const union tlbi_info *info, + void (*)(struct kvm_s2_mmu *, + const union tlbi_info *)); +extern void kvm_vcpu_load_hw_mmu(struct kvm_vcpu *vcpu); +extern void kvm_vcpu_put_hw_mmu(struct kvm_vcpu *vcpu); + +extern void check_nested_vcpu_requests(struct kvm_vcpu *vcpu); +extern void kvm_nested_flush_hwstate(struct kvm_vcpu *vcpu); +extern void kvm_nested_sync_hwstate(struct kvm_vcpu *vcpu); + +struct kvm_s2_trans { + phys_addr_t output; + unsigned long block_size; + bool writable; + bool readable; + int level; + u32 esr; + u64 desc; +}; + +static inline phys_addr_t kvm_s2_trans_output(struct kvm_s2_trans *trans) +{ + return trans->output; +} + +static inline unsigned long kvm_s2_trans_size(struct kvm_s2_trans *trans) +{ + return trans->block_size; +} + +static inline u32 kvm_s2_trans_esr(struct kvm_s2_trans *trans) +{ + return trans->esr; +} + +static inline bool kvm_s2_trans_readable(struct kvm_s2_trans *trans) +{ + return trans->readable; +} + +static inline bool kvm_s2_trans_writable(struct kvm_s2_trans *trans) +{ + return trans->writable; +} + +static inline bool kvm_s2_trans_executable(struct kvm_s2_trans *trans) +{ + return !(trans->desc & BIT(54)); +} + +extern int kvm_walk_nested_s2(struct kvm_vcpu *vcpu, phys_addr_t gipa, + struct kvm_s2_trans *result); +extern int kvm_s2_handle_perm_fault(struct kvm_vcpu *vcpu, + struct kvm_s2_trans *trans); +extern int kvm_inject_s2_fault(struct kvm_vcpu *vcpu, u64 esr_el2); +extern void kvm_nested_s2_wp(struct kvm *kvm); +extern void kvm_nested_s2_unmap(struct kvm *kvm, bool may_block); +extern void kvm_nested_s2_flush(struct kvm *kvm); + +unsigned long compute_tlb_inval_range(struct kvm_s2_mmu *mmu, u64 val); + +static inline bool kvm_supported_tlbi_s1e1_op(struct kvm_vcpu *vpcu, u32 instr) +{ + struct kvm *kvm = vpcu->kvm; + u8 CRm = sys_reg_CRm(instr); + + if (!(sys_reg_Op0(instr) == TLBI_Op0 && + sys_reg_Op1(instr) == TLBI_Op1_EL1)) + return false; + + if (!(sys_reg_CRn(instr) == TLBI_CRn_XS || + (sys_reg_CRn(instr) == TLBI_CRn_nXS && + kvm_has_feat(kvm, ID_AA64ISAR1_EL1, XS, IMP)))) + return false; + + if (CRm == TLBI_CRm_nROS && + !kvm_has_feat(kvm, ID_AA64ISAR0_EL1, TLB, OS)) + return false; + + if ((CRm == TLBI_CRm_RIS || CRm == TLBI_CRm_ROS || + CRm == TLBI_CRm_RNS) && + !kvm_has_feat(kvm, ID_AA64ISAR0_EL1, TLB, RANGE)) + return false; + + return true; +} + +static inline bool kvm_supported_tlbi_s1e2_op(struct kvm_vcpu *vpcu, u32 instr) +{ + struct kvm *kvm = vpcu->kvm; + u8 CRm = sys_reg_CRm(instr); + + if (!(sys_reg_Op0(instr) == TLBI_Op0 && + sys_reg_Op1(instr) == TLBI_Op1_EL2)) + return false; + + if (!(sys_reg_CRn(instr) == TLBI_CRn_XS || + (sys_reg_CRn(instr) == TLBI_CRn_nXS && + kvm_has_feat(kvm, ID_AA64ISAR1_EL1, XS, IMP)))) + return false; + + if (CRm == TLBI_CRm_IPAIS || CRm == TLBI_CRm_IPAONS) + return false; + + if (CRm == TLBI_CRm_nROS && + !kvm_has_feat(kvm, ID_AA64ISAR0_EL1, TLB, OS)) + return false; + + if ((CRm == TLBI_CRm_RIS || CRm == TLBI_CRm_ROS || + CRm == TLBI_CRm_RNS) && + !kvm_has_feat(kvm, ID_AA64ISAR0_EL1, TLB, RANGE)) + return false; + + return true; +} + +int kvm_init_nv_sysregs(struct kvm_vcpu *vcpu); +u64 limit_nv_id_reg(struct kvm *kvm, u32 reg, u64 val); + +#ifdef CONFIG_ARM64_PTR_AUTH +bool kvm_auth_eretax(struct kvm_vcpu *vcpu, u64 *elr); +#else +static inline bool kvm_auth_eretax(struct kvm_vcpu *vcpu, u64 *elr) +{ + /* We really should never execute this... */ + WARN_ON_ONCE(1); + *elr = 0xbad9acc0debadbad; + return false; +} +#endif + +#define KVM_NV_GUEST_MAP_SZ (KVM_PGTABLE_PROT_SW1 | KVM_PGTABLE_PROT_SW0) + +static inline u64 kvm_encode_nested_level(struct kvm_s2_trans *trans) +{ + return FIELD_PREP(KVM_NV_GUEST_MAP_SZ, trans->level); +} + +/* Adjust alignment for the contiguous bit as per StageOA() */ +#define contiguous_bit_shift(d, wi, l) \ + ({ \ + u8 shift = 0; \ + \ + if ((d) & PTE_CONT) { \ + switch (BIT((wi)->pgshift)) { \ + case SZ_4K: \ + shift = 4; \ + break; \ + case SZ_16K: \ + shift = (l) == 2 ? 5 : 7; \ + break; \ + case SZ_64K: \ + shift = 5; \ + break; \ + } \ + } \ + \ + shift; \ + }) + +static inline u64 decode_range_tlbi(u64 val, u64 *range, u16 *asid) +{ + u64 base, tg, num, scale; + int shift; + + tg = FIELD_GET(GENMASK(47, 46), val); + + switch(tg) { + case 1: + shift = 12; + break; + case 2: + shift = 14; + break; + case 3: + default: /* IMPDEF: handle tg==0 as 64k */ + shift = 16; + break; + } + + base = (val & GENMASK(36, 0)) << shift; + + if (asid) + *asid = FIELD_GET(TLBIR_ASID_MASK, val); + + scale = FIELD_GET(GENMASK(45, 44), val); + num = FIELD_GET(GENMASK(43, 39), val); + *range = __TLBI_RANGE_PAGES(num, scale) << shift; + + return base; +} + +static inline unsigned int ps_to_output_size(unsigned int ps) +{ + switch (ps) { + case 0: return 32; + case 1: return 36; + case 2: return 40; + case 3: return 42; + case 4: return 44; + case 5: + default: + return 48; + } +} + +enum trans_regime { + TR_EL10, + TR_EL20, + TR_EL2, +}; + +struct s1_walk_info { + u64 baddr; + enum trans_regime regime; + unsigned int max_oa_bits; + unsigned int pgshift; + unsigned int txsz; + int sl; + bool as_el0; + bool hpd; + bool e0poe; + bool poe; + bool pan; + bool be; + bool s2; +}; + +struct s1_walk_result { + union { + struct { + u64 desc; + u64 pa; + s8 level; + u8 APTable; + bool nG; + u16 asid; + bool UXNTable; + bool PXNTable; + bool uwxn; + bool uov; + bool ur; + bool uw; + bool ux; + bool pwxn; + bool pov; + bool pr; + bool pw; + bool px; + }; + struct { + u8 fst; + bool ptw; + bool s2; + }; + }; + bool failed; +}; + +int __kvm_translate_va(struct kvm_vcpu *vcpu, struct s1_walk_info *wi, + struct s1_walk_result *wr, u64 va); + +/* VNCR management */ +int kvm_vcpu_allocate_vncr_tlb(struct kvm_vcpu *vcpu); +int kvm_handle_vncr_abort(struct kvm_vcpu *vcpu); +void kvm_handle_s1e2_tlbi(struct kvm_vcpu *vcpu, u32 inst, u64 val); + +#define vncr_fixmap(c) \ + ({ \ + u32 __c = (c); \ + BUG_ON(__c >= NR_CPUS); \ + (FIX_VNCR - __c); \ + }) + +#endif /* __ARM64_KVM_NESTED_H */ diff --git a/arch/arm64/include/asm/kvm_pgtable.h b/arch/arm64/include/asm/kvm_pgtable.h index 9f339dffbc1a..1246216616b5 100644 --- a/arch/arm64/include/asm/kvm_pgtable.h +++ b/arch/arm64/include/asm/kvm_pgtable.h @@ -11,14 +11,39 @@ #include <linux/kvm_host.h> #include <linux/types.h> -#define KVM_PGTABLE_MAX_LEVELS 4U +#define KVM_PGTABLE_FIRST_LEVEL -1 +#define KVM_PGTABLE_LAST_LEVEL 3 + +/* + * The largest supported block sizes for KVM (no 52-bit PA support): + * - 4K (level 1): 1GB + * - 16K (level 2): 32MB + * - 64K (level 2): 512MB + */ +#ifdef CONFIG_ARM64_4K_PAGES +#define KVM_PGTABLE_MIN_BLOCK_LEVEL 1 +#else +#define KVM_PGTABLE_MIN_BLOCK_LEVEL 2 +#endif + +#define kvm_lpa2_is_enabled() system_supports_lpa2() + +static inline u64 kvm_get_parange_max(void) +{ + if (kvm_lpa2_is_enabled() || + (IS_ENABLED(CONFIG_ARM64_PA_BITS_52) && PAGE_SHIFT == 16)) + return ID_AA64MMFR0_EL1_PARANGE_52; + else + return ID_AA64MMFR0_EL1_PARANGE_48; +} static inline u64 kvm_get_parange(u64 mmfr0) { + u64 parange_max = kvm_get_parange_max(); u64 parange = cpuid_feature_extract_unsigned_field(mmfr0, - ID_AA64MMFR0_PARANGE_SHIFT); - if (parange > ID_AA64MMFR0_PARANGE_MAX) - parange = ID_AA64MMFR0_PARANGE_MAX; + ID_AA64MMFR0_EL1_PARANGE_SHIFT); + if (parange > parange_max) + parange = parange_max; return parange; } @@ -29,6 +54,57 @@ typedef u64 kvm_pte_t; #define KVM_PTE_ADDR_MASK GENMASK(47, PAGE_SHIFT) #define KVM_PTE_ADDR_51_48 GENMASK(15, 12) +#define KVM_PTE_ADDR_MASK_LPA2 GENMASK(49, PAGE_SHIFT) +#define KVM_PTE_ADDR_51_50_LPA2 GENMASK(9, 8) + +#define KVM_PHYS_INVALID (-1ULL) + +#define KVM_PTE_TYPE BIT(1) +#define KVM_PTE_TYPE_BLOCK 0 +#define KVM_PTE_TYPE_PAGE 1 +#define KVM_PTE_TYPE_TABLE 1 + +#define KVM_PTE_LEAF_ATTR_LO GENMASK(11, 2) + +#define KVM_PTE_LEAF_ATTR_LO_S1_ATTRIDX GENMASK(4, 2) +#define KVM_PTE_LEAF_ATTR_LO_S1_AP GENMASK(7, 6) +#define KVM_PTE_LEAF_ATTR_LO_S1_AP_RO \ + ({ cpus_have_final_cap(ARM64_KVM_HVHE) ? 2 : 3; }) +#define KVM_PTE_LEAF_ATTR_LO_S1_AP_RW \ + ({ cpus_have_final_cap(ARM64_KVM_HVHE) ? 0 : 1; }) +#define KVM_PTE_LEAF_ATTR_LO_S1_SH GENMASK(9, 8) +#define KVM_PTE_LEAF_ATTR_LO_S1_SH_IS 3 +#define KVM_PTE_LEAF_ATTR_LO_S1_AF BIT(10) + +#define KVM_PTE_LEAF_ATTR_LO_S2_MEMATTR GENMASK(5, 2) +#define KVM_PTE_LEAF_ATTR_LO_S2_S2AP_R BIT(6) +#define KVM_PTE_LEAF_ATTR_LO_S2_S2AP_W BIT(7) +#define KVM_PTE_LEAF_ATTR_LO_S2_SH GENMASK(9, 8) +#define KVM_PTE_LEAF_ATTR_LO_S2_SH_IS 3 +#define KVM_PTE_LEAF_ATTR_LO_S2_AF BIT(10) + +#define KVM_PTE_LEAF_ATTR_HI GENMASK(63, 50) + +#define KVM_PTE_LEAF_ATTR_HI_SW GENMASK(58, 55) + +#define KVM_PTE_LEAF_ATTR_HI_S1_XN BIT(54) + +#define KVM_PTE_LEAF_ATTR_HI_S2_XN BIT(54) + +#define KVM_PTE_LEAF_ATTR_HI_S1_GP BIT(50) + +#define KVM_PTE_LEAF_ATTR_S2_PERMS (KVM_PTE_LEAF_ATTR_LO_S2_S2AP_R | \ + KVM_PTE_LEAF_ATTR_LO_S2_S2AP_W | \ + KVM_PTE_LEAF_ATTR_HI_S2_XN) + +#define KVM_INVALID_PTE_OWNER_MASK GENMASK(9, 2) +#define KVM_MAX_OWNER_ID 1 + +/* + * Used to indicate a pte for which a 'break-before-make' sequence is in + * progress. + */ +#define KVM_INVALID_PTE_LOCKED BIT(10) static inline bool kvm_pte_valid(kvm_pte_t pte) { @@ -37,32 +113,76 @@ static inline bool kvm_pte_valid(kvm_pte_t pte) static inline u64 kvm_pte_to_phys(kvm_pte_t pte) { - u64 pa = pte & KVM_PTE_ADDR_MASK; + u64 pa; - if (PAGE_SHIFT == 16) - pa |= FIELD_GET(KVM_PTE_ADDR_51_48, pte) << 48; + if (kvm_lpa2_is_enabled()) { + pa = pte & KVM_PTE_ADDR_MASK_LPA2; + pa |= FIELD_GET(KVM_PTE_ADDR_51_50_LPA2, pte) << 50; + } else { + pa = pte & KVM_PTE_ADDR_MASK; + if (PAGE_SHIFT == 16) + pa |= FIELD_GET(KVM_PTE_ADDR_51_48, pte) << 48; + } return pa; } -static inline u64 kvm_granule_shift(u32 level) +static inline kvm_pte_t kvm_phys_to_pte(u64 pa) +{ + kvm_pte_t pte; + + if (kvm_lpa2_is_enabled()) { + pte = pa & KVM_PTE_ADDR_MASK_LPA2; + pa &= GENMASK(51, 50); + pte |= FIELD_PREP(KVM_PTE_ADDR_51_50_LPA2, pa >> 50); + } else { + pte = pa & KVM_PTE_ADDR_MASK; + if (PAGE_SHIFT == 16) { + pa &= GENMASK(51, 48); + pte |= FIELD_PREP(KVM_PTE_ADDR_51_48, pa >> 48); + } + } + + return pte; +} + +static inline kvm_pfn_t kvm_pte_to_pfn(kvm_pte_t pte) { - /* Assumes KVM_PGTABLE_MAX_LEVELS is 4 */ + return __phys_to_pfn(kvm_pte_to_phys(pte)); +} + +static inline u64 kvm_granule_shift(s8 level) +{ + /* Assumes KVM_PGTABLE_LAST_LEVEL is 3 */ return ARM64_HW_PGTABLE_LEVEL_SHIFT(level); } -static inline u64 kvm_granule_size(u32 level) +static inline u64 kvm_granule_size(s8 level) { return BIT(kvm_granule_shift(level)); } -static inline bool kvm_level_supports_block_mapping(u32 level) +static inline bool kvm_level_supports_block_mapping(s8 level) { - /* - * Reject invalid block mappings and don't bother with 4TB mappings for - * 52-bit PAs. - */ - return !(level == 0 || (PAGE_SIZE != SZ_4K && level == 1)); + return level >= KVM_PGTABLE_MIN_BLOCK_LEVEL; +} + +static inline u32 kvm_supported_block_sizes(void) +{ + s8 level = KVM_PGTABLE_MIN_BLOCK_LEVEL; + u32 r = 0; + + for (; level <= KVM_PGTABLE_LAST_LEVEL; level++) + r |= BIT(kvm_granule_shift(level)); + + return r; +} + +static inline bool kvm_is_block_size_supported(u64 size) +{ + bool is_power_of_two = IS_ALIGNED(size, size); + + return is_power_of_two && (size & kvm_supported_block_sizes()); } /** @@ -77,6 +197,8 @@ static inline bool kvm_level_supports_block_mapping(u32 level) * allocation is physically contiguous. * @free_pages_exact: Free an exact number of memory pages previously * allocated by zalloc_pages_exact. + * @free_unlinked_table: Free an unlinked paging structure by unlinking and + * dropping references. * @get_page: Increment the refcount on a page. * @put_page: Decrement the refcount on a page. When the * refcount reaches 0 the page is automatically @@ -95,6 +217,7 @@ struct kvm_pgtable_mm_ops { void* (*zalloc_page)(void *arg); void* (*zalloc_pages_exact)(size_t size); void (*free_pages_exact)(void *addr, size_t size); + void (*free_unlinked_table)(void *addr, s8 level); void (*get_page)(void *addr); void (*put_page)(void *addr); int (*page_count)(void *addr); @@ -121,6 +244,7 @@ enum kvm_pgtable_stage2_flags { * @KVM_PGTABLE_PROT_W: Write permission. * @KVM_PGTABLE_PROT_R: Read permission. * @KVM_PGTABLE_PROT_DEVICE: Device attributes. + * @KVM_PGTABLE_PROT_NORMAL_NC: Normal noncacheable attributes. * @KVM_PGTABLE_PROT_SW0: Software bit 0. * @KVM_PGTABLE_PROT_SW1: Software bit 1. * @KVM_PGTABLE_PROT_SW2: Software bit 2. @@ -132,6 +256,7 @@ enum kvm_pgtable_prot { KVM_PGTABLE_PROT_R = BIT(2), KVM_PGTABLE_PROT_DEVICE = BIT(3), + KVM_PGTABLE_PROT_NORMAL_NC = BIT(4), KVM_PGTABLE_PROT_SW0 = BIT(55), KVM_PGTABLE_PROT_SW1 = BIT(56), @@ -154,29 +279,6 @@ typedef bool (*kvm_pgtable_force_pte_cb_t)(u64 addr, u64 end, enum kvm_pgtable_prot prot); /** - * struct kvm_pgtable - KVM page-table. - * @ia_bits: Maximum input address size, in bits. - * @start_level: Level at which the page-table walk starts. - * @pgd: Pointer to the first top-level entry of the page-table. - * @mm_ops: Memory management callbacks. - * @mmu: Stage-2 KVM MMU struct. Unused for stage-1 page-tables. - * @flags: Stage-2 page-table flags. - * @force_pte_cb: Function that returns true if page level mappings must - * be used instead of block mappings. - */ -struct kvm_pgtable { - u32 ia_bits; - u32 start_level; - kvm_pte_t *pgd; - struct kvm_pgtable_mm_ops *mm_ops; - - /* Stage-2 only */ - struct kvm_s2_mmu *mmu; - enum kvm_pgtable_stage2_flags flags; - kvm_pgtable_force_pte_cb_t force_pte_cb; -}; - -/** * enum kvm_pgtable_walk_flags - Flags to control a depth-first page-table walk. * @KVM_PGTABLE_WALK_LEAF: Visit leaf entries, including invalid * entries. @@ -184,17 +286,46 @@ struct kvm_pgtable { * children. * @KVM_PGTABLE_WALK_TABLE_POST: Visit table entries after their * children. + * @KVM_PGTABLE_WALK_SHARED: Indicates the page-tables may be shared + * with other software walkers. + * @KVM_PGTABLE_WALK_HANDLE_FAULT: Indicates the page-table walk was + * invoked from a fault handler. + * @KVM_PGTABLE_WALK_SKIP_BBM_TLBI: Visit and update table entries + * without Break-before-make's + * TLB invalidation. + * @KVM_PGTABLE_WALK_SKIP_CMO: Visit and update table entries + * without Cache maintenance + * operations required. */ enum kvm_pgtable_walk_flags { KVM_PGTABLE_WALK_LEAF = BIT(0), KVM_PGTABLE_WALK_TABLE_PRE = BIT(1), KVM_PGTABLE_WALK_TABLE_POST = BIT(2), + KVM_PGTABLE_WALK_SHARED = BIT(3), + KVM_PGTABLE_WALK_HANDLE_FAULT = BIT(4), + KVM_PGTABLE_WALK_SKIP_BBM_TLBI = BIT(5), + KVM_PGTABLE_WALK_SKIP_CMO = BIT(6), +}; + +struct kvm_pgtable_visit_ctx { + kvm_pte_t *ptep; + kvm_pte_t old; + void *arg; + struct kvm_pgtable_mm_ops *mm_ops; + u64 start; + u64 addr; + u64 end; + s8 level; + enum kvm_pgtable_walk_flags flags; }; -typedef int (*kvm_pgtable_visitor_fn_t)(u64 addr, u64 end, u32 level, - kvm_pte_t *ptep, - enum kvm_pgtable_walk_flags flag, - void * const arg); +typedef int (*kvm_pgtable_visitor_fn_t)(const struct kvm_pgtable_visit_ctx *ctx, + enum kvm_pgtable_walk_flags visit); + +static inline bool kvm_pgtable_walk_shared(const struct kvm_pgtable_visit_ctx *ctx) +{ + return ctx->flags & KVM_PGTABLE_WALK_SHARED; +} /** * struct kvm_pgtable_walker - Hook into a page-table walk. @@ -209,6 +340,109 @@ struct kvm_pgtable_walker { const enum kvm_pgtable_walk_flags flags; }; +/* + * RCU cannot be used in a non-kernel context such as the hyp. As such, page + * table walkers used in hyp do not call into RCU and instead use other + * synchronization mechanisms (such as a spinlock). + */ +#if defined(__KVM_NVHE_HYPERVISOR__) || defined(__KVM_VHE_HYPERVISOR__) + +typedef kvm_pte_t *kvm_pteref_t; + +static inline kvm_pte_t *kvm_dereference_pteref(struct kvm_pgtable_walker *walker, + kvm_pteref_t pteref) +{ + return pteref; +} + +static inline kvm_pte_t *kvm_dereference_pteref_raw(kvm_pteref_t pteref) +{ + return pteref; +} + +static inline int kvm_pgtable_walk_begin(struct kvm_pgtable_walker *walker) +{ + /* + * Due to the lack of RCU (or a similar protection scheme), only + * non-shared table walkers are allowed in the hypervisor. + */ + if (walker->flags & KVM_PGTABLE_WALK_SHARED) + return -EPERM; + + return 0; +} + +static inline void kvm_pgtable_walk_end(struct kvm_pgtable_walker *walker) {} + +static inline bool kvm_pgtable_walk_lock_held(void) +{ + return true; +} + +#else + +typedef kvm_pte_t __rcu *kvm_pteref_t; + +static inline kvm_pte_t *kvm_dereference_pteref(struct kvm_pgtable_walker *walker, + kvm_pteref_t pteref) +{ + return rcu_dereference_check(pteref, !(walker->flags & KVM_PGTABLE_WALK_SHARED)); +} + +static inline kvm_pte_t *kvm_dereference_pteref_raw(kvm_pteref_t pteref) +{ + return rcu_dereference_raw(pteref); +} + +static inline int kvm_pgtable_walk_begin(struct kvm_pgtable_walker *walker) +{ + if (walker->flags & KVM_PGTABLE_WALK_SHARED) + rcu_read_lock(); + + return 0; +} + +static inline void kvm_pgtable_walk_end(struct kvm_pgtable_walker *walker) +{ + if (walker->flags & KVM_PGTABLE_WALK_SHARED) + rcu_read_unlock(); +} + +static inline bool kvm_pgtable_walk_lock_held(void) +{ + return rcu_read_lock_held(); +} + +#endif + +/** + * struct kvm_pgtable - KVM page-table. + * @ia_bits: Maximum input address size, in bits. + * @start_level: Level at which the page-table walk starts. + * @pgd: Pointer to the first top-level entry of the page-table. + * @mm_ops: Memory management callbacks. + * @mmu: Stage-2 KVM MMU struct. Unused for stage-1 page-tables. + * @flags: Stage-2 page-table flags. + * @force_pte_cb: Function that returns true if page level mappings must + * be used instead of block mappings. + */ +struct kvm_pgtable { + union { + struct rb_root_cached pkvm_mappings; + struct { + u32 ia_bits; + s8 start_level; + kvm_pteref_t pgd; + struct kvm_pgtable_mm_ops *mm_ops; + + /* Stage-2 only */ + enum kvm_pgtable_stage2_flags flags; + kvm_pgtable_force_pte_cb_t force_pte_cb; + }; + }; + struct kvm_s2_mmu *mmu; +}; + /** * kvm_pgtable_hyp_init() - Initialise a hypervisor stage-1 page-table. * @pgt: Uninitialised page-table structure to initialise. @@ -289,6 +523,14 @@ u64 kvm_pgtable_hyp_unmap(struct kvm_pgtable *pgt, u64 addr, u64 size); u64 kvm_get_vtcr(u64 mmfr0, u64 mmfr1, u32 phys_shift); /** + * kvm_pgtable_stage2_pgd_size() - Helper to compute size of a stage-2 PGD + * @vtcr: Content of the VTCR register. + * + * Return: the size (in bytes) of the stage-2 PGD + */ +size_t kvm_pgtable_stage2_pgd_size(u64 vtcr); + +/** * __kvm_pgtable_stage2_init() - Initialise a guest stage-2 page-table. * @pgt: Uninitialised page-table structure to initialise. * @mmu: S2 MMU context for this S2 translation @@ -304,8 +546,11 @@ int __kvm_pgtable_stage2_init(struct kvm_pgtable *pgt, struct kvm_s2_mmu *mmu, enum kvm_pgtable_stage2_flags flags, kvm_pgtable_force_pte_cb_t force_pte_cb); -#define kvm_pgtable_stage2_init(pgt, mmu, mm_ops) \ - __kvm_pgtable_stage2_init(pgt, mmu, mm_ops, 0, NULL) +static inline int kvm_pgtable_stage2_init(struct kvm_pgtable *pgt, struct kvm_s2_mmu *mmu, + struct kvm_pgtable_mm_ops *mm_ops) +{ + return __kvm_pgtable_stage2_init(pgt, mmu, mm_ops, 0, NULL); +} /** * kvm_pgtable_stage2_destroy() - Destroy an unused guest stage-2 page-table. @@ -317,6 +562,63 @@ int __kvm_pgtable_stage2_init(struct kvm_pgtable *pgt, struct kvm_s2_mmu *mmu, void kvm_pgtable_stage2_destroy(struct kvm_pgtable *pgt); /** + * kvm_pgtable_stage2_destroy_range() - Destroy the unlinked range of addresses. + * @pgt: Page-table structure initialised by kvm_pgtable_stage2_init*(). + * @addr: Intermediate physical address at which to place the mapping. + * @size: Size of the mapping. + * + * The page-table is assumed to be unreachable by any hardware walkers prior + * to freeing and therefore no TLB invalidation is performed. + */ +void kvm_pgtable_stage2_destroy_range(struct kvm_pgtable *pgt, + u64 addr, u64 size); + +/** + * kvm_pgtable_stage2_destroy_pgd() - Destroy the PGD of guest stage-2 page-table. + * @pgt: Page-table structure initialised by kvm_pgtable_stage2_init*(). + * + * It is assumed that the rest of the page-table is freed before this operation. + */ +void kvm_pgtable_stage2_destroy_pgd(struct kvm_pgtable *pgt); + +/** + * kvm_pgtable_stage2_free_unlinked() - Free an unlinked stage-2 paging structure. + * @mm_ops: Memory management callbacks. + * @pgtable: Unlinked stage-2 paging structure to be freed. + * @level: Level of the stage-2 paging structure to be freed. + * + * The page-table is assumed to be unreachable by any hardware walkers prior to + * freeing and therefore no TLB invalidation is performed. + */ +void kvm_pgtable_stage2_free_unlinked(struct kvm_pgtable_mm_ops *mm_ops, void *pgtable, s8 level); + +/** + * kvm_pgtable_stage2_create_unlinked() - Create an unlinked stage-2 paging structure. + * @pgt: Page-table structure initialised by kvm_pgtable_stage2_init*(). + * @phys: Physical address of the memory to map. + * @level: Starting level of the stage-2 paging structure to be created. + * @prot: Permissions and attributes for the mapping. + * @mc: Cache of pre-allocated and zeroed memory from which to allocate + * page-table pages. + * @force_pte: Force mappings to PAGE_SIZE granularity. + * + * Returns an unlinked page-table tree. This new page-table tree is + * not reachable (i.e., it is unlinked) from the root pgd and it's + * therefore unreachableby the hardware page-table walker. No TLB + * invalidation or CMOs are performed. + * + * If device attributes are not explicitly requested in @prot, then the + * mapping will be normal, cacheable. + * + * Return: The fully populated (unlinked) stage-2 paging structure, or + * an ERR_PTR(error) on failure. + */ +kvm_pte_t *kvm_pgtable_stage2_create_unlinked(struct kvm_pgtable *pgt, + u64 phys, s8 level, + enum kvm_pgtable_prot prot, + void *mc, bool force_pte); + +/** * kvm_pgtable_stage2_map() - Install a mapping in a guest stage-2 page-table. * @pgt: Page-table structure initialised by kvm_pgtable_stage2_init*(). * @addr: Intermediate physical address at which to place the mapping. @@ -325,6 +627,7 @@ void kvm_pgtable_stage2_destroy(struct kvm_pgtable *pgt); * @prot: Permissions and attributes for the mapping. * @mc: Cache of pre-allocated and zeroed memory from which to allocate * page-table pages. + * @flags: Flags to control the page-table walk (ex. a shared walk) * * The offset of @addr within a page is ignored, @size is rounded-up to * the next page boundary and @phys is rounded-down to the previous page @@ -346,7 +649,7 @@ void kvm_pgtable_stage2_destroy(struct kvm_pgtable *pgt); */ int kvm_pgtable_stage2_map(struct kvm_pgtable *pgt, u64 addr, u64 size, u64 phys, enum kvm_pgtable_prot prot, - void *mc); + void *mc, enum kvm_pgtable_walk_flags flags); /** * kvm_pgtable_stage2_set_owner() - Unmap and annotate pages in the IPA space to @@ -409,33 +712,37 @@ int kvm_pgtable_stage2_wrprotect(struct kvm_pgtable *pgt, u64 addr, u64 size); * kvm_pgtable_stage2_mkyoung() - Set the access flag in a page-table entry. * @pgt: Page-table structure initialised by kvm_pgtable_stage2_init*(). * @addr: Intermediate physical address to identify the page-table entry. + * @flags: Flags to control the page-table walk (ex. a shared walk) * * The offset of @addr within a page is ignored. * * If there is a valid, leaf page-table entry used to translate @addr, then * set the access flag in that entry. - * - * Return: The old page-table entry prior to setting the flag, 0 on failure. */ -kvm_pte_t kvm_pgtable_stage2_mkyoung(struct kvm_pgtable *pgt, u64 addr); +void kvm_pgtable_stage2_mkyoung(struct kvm_pgtable *pgt, u64 addr, + enum kvm_pgtable_walk_flags flags); /** - * kvm_pgtable_stage2_mkold() - Clear the access flag in a page-table entry. + * kvm_pgtable_stage2_test_clear_young() - Test and optionally clear the access + * flag in a page-table entry. * @pgt: Page-table structure initialised by kvm_pgtable_stage2_init*(). * @addr: Intermediate physical address to identify the page-table entry. + * @size: Size of the address range to visit. + * @mkold: True if the access flag should be cleared. * * The offset of @addr within a page is ignored. * - * If there is a valid, leaf page-table entry used to translate @addr, then - * clear the access flag in that entry. + * Tests and conditionally clears the access flag for every valid, leaf + * page-table entry used to translate the range [@addr, @addr + @size). * * Note that it is the caller's responsibility to invalidate the TLB after * calling this function to ensure that the updated permissions are visible * to the CPUs. * - * Return: The old page-table entry prior to clearing the flag, 0 on failure. + * Return: True if any of the visited PTEs had the access flag set. */ -kvm_pte_t kvm_pgtable_stage2_mkold(struct kvm_pgtable *pgt, u64 addr); +bool kvm_pgtable_stage2_test_clear_young(struct kvm_pgtable *pgt, u64 addr, + u64 size, bool mkold); /** * kvm_pgtable_stage2_relax_perms() - Relax the permissions enforced by a @@ -443,6 +750,7 @@ kvm_pte_t kvm_pgtable_stage2_mkold(struct kvm_pgtable *pgt, u64 addr); * @pgt: Page-table structure initialised by kvm_pgtable_stage2_init*(). * @addr: Intermediate physical address to identify the page-table entry. * @prot: Additional permissions to grant for the mapping. + * @flags: Flags to control the page-table walk (ex. a shared walk) * * The offset of @addr within a page is ignored. * @@ -455,19 +763,8 @@ kvm_pte_t kvm_pgtable_stage2_mkold(struct kvm_pgtable *pgt, u64 addr); * Return: 0 on success, negative error code on failure. */ int kvm_pgtable_stage2_relax_perms(struct kvm_pgtable *pgt, u64 addr, - enum kvm_pgtable_prot prot); - -/** - * kvm_pgtable_stage2_is_young() - Test whether a page-table entry has the - * access flag set. - * @pgt: Page-table structure initialised by kvm_pgtable_stage2_init*(). - * @addr: Intermediate physical address to identify the page-table entry. - * - * The offset of @addr within a page is ignored. - * - * Return: True if the page-table entry has the access flag set, false otherwise. - */ -bool kvm_pgtable_stage2_is_young(struct kvm_pgtable *pgt, u64 addr); + enum kvm_pgtable_prot prot, + enum kvm_pgtable_walk_flags flags); /** * kvm_pgtable_stage2_flush_range() - Clean and invalidate data cache to Point @@ -485,6 +782,25 @@ bool kvm_pgtable_stage2_is_young(struct kvm_pgtable *pgt, u64 addr); int kvm_pgtable_stage2_flush(struct kvm_pgtable *pgt, u64 addr, u64 size); /** + * kvm_pgtable_stage2_split() - Split a range of huge pages into leaf PTEs pointing + * to PAGE_SIZE guest pages. + * @pgt: Page-table structure initialised by kvm_pgtable_stage2_init(). + * @addr: Intermediate physical address from which to split. + * @size: Size of the range. + * @mc: Cache of pre-allocated and zeroed memory from which to allocate + * page-table pages. + * + * The function tries to split any level 1 or 2 entry that overlaps + * with the input range (given by @addr and @size). + * + * Return: 0 on success, negative error code on failure. Note that + * kvm_pgtable_stage2_split() is best effort: it tries to break as many + * blocks in the input range as allowed by @mc_capacity. + */ +int kvm_pgtable_stage2_split(struct kvm_pgtable *pgt, u64 addr, u64 size, + struct kvm_mmu_memory_cache *mc); + +/** * kvm_pgtable_walk() - Walk a page-table. * @pgt: Page-table structure initialised by kvm_pgtable_*_init(). * @addr: Input address for the start of the walk. @@ -496,9 +812,9 @@ int kvm_pgtable_stage2_flush(struct kvm_pgtable *pgt, u64 addr, u64 size); * * The walker will walk the page-table entries corresponding to the input * address range specified, visiting entries according to the walker flags. - * Invalid entries are treated as leaf entries. Leaf entries are reloaded - * after invoking the walker callback, allowing the walker to descend into - * a newly installed table. + * Invalid entries are treated as leaf entries. The visited page table entry is + * reloaded after invoking the walker callback, allowing the walker to descend + * into a newly installed table. * * Returning a negative error code from the walker callback function will * terminate the walk immediately with the same error code. @@ -526,7 +842,7 @@ int kvm_pgtable_walk(struct kvm_pgtable *pgt, u64 addr, u64 size, * Return: 0 on success, negative error code on failure. */ int kvm_pgtable_get_leaf(struct kvm_pgtable *pgt, u64 addr, - kvm_pte_t *ptep, u32 *level); + kvm_pte_t *ptep, s8 *level); /** * kvm_pgtable_stage2_pte_prot() - Retrieve the protection attributes of a @@ -547,4 +863,14 @@ enum kvm_pgtable_prot kvm_pgtable_stage2_pte_prot(kvm_pte_t pte); * kvm_pgtable_prot format. */ enum kvm_pgtable_prot kvm_pgtable_hyp_pte_prot(kvm_pte_t pte); + +/** + * kvm_tlb_flush_vmid_range() - Invalidate/flush a range of TLB entries + * + * @mmu: Stage-2 KVM MMU struct + * @addr: The base Intermediate physical address from which to invalidate + * @size: Size of the range from the base to invalidate + */ +void kvm_tlb_flush_vmid_range(struct kvm_s2_mmu *mmu, + phys_addr_t addr, size_t size); #endif /* __ARM64_KVM_PGTABLE_H__ */ diff --git a/arch/arm64/include/asm/kvm_pkvm.h b/arch/arm64/include/asm/kvm_pkvm.h index 9f4ad2a8df59..35f9d9478004 100644 --- a/arch/arm64/include/asm/kvm_pkvm.h +++ b/arch/arm64/include/asm/kvm_pkvm.h @@ -6,20 +6,87 @@ #ifndef __ARM64_KVM_PKVM_H__ #define __ARM64_KVM_PKVM_H__ +#include <linux/arm_ffa.h> #include <linux/memblock.h> +#include <linux/scatterlist.h> #include <asm/kvm_pgtable.h> +/* Maximum number of VMs that can co-exist under pKVM. */ +#define KVM_MAX_PVMS 255 + #define HYP_MEMBLOCK_REGIONS 128 +int pkvm_init_host_vm(struct kvm *kvm); +int pkvm_create_hyp_vm(struct kvm *kvm); +void pkvm_destroy_hyp_vm(struct kvm *kvm); +int pkvm_create_hyp_vcpu(struct kvm_vcpu *vcpu); + +/* + * This functions as an allow-list of protected VM capabilities. + * Features not explicitly allowed by this function are denied. + */ +static inline bool kvm_pvm_ext_allowed(long ext) +{ + switch (ext) { + case KVM_CAP_IRQCHIP: + case KVM_CAP_ARM_PSCI: + case KVM_CAP_ARM_PSCI_0_2: + case KVM_CAP_NR_VCPUS: + case KVM_CAP_MAX_VCPUS: + case KVM_CAP_MAX_VCPU_ID: + case KVM_CAP_MSI_DEVID: + case KVM_CAP_ARM_VM_IPA_SIZE: + case KVM_CAP_ARM_PMU_V3: + case KVM_CAP_ARM_SVE: + case KVM_CAP_ARM_PTRAUTH_ADDRESS: + case KVM_CAP_ARM_PTRAUTH_GENERIC: + return true; + default: + return false; + } +} + extern struct memblock_region kvm_nvhe_sym(hyp_memory)[]; extern unsigned int kvm_nvhe_sym(hyp_memblock_nr); +static inline unsigned long +hyp_vmemmap_memblock_size(struct memblock_region *reg, size_t vmemmap_entry_size) +{ + unsigned long nr_pages = reg->size >> PAGE_SHIFT; + unsigned long start, end; + + start = (reg->base >> PAGE_SHIFT) * vmemmap_entry_size; + end = start + nr_pages * vmemmap_entry_size; + start = ALIGN_DOWN(start, PAGE_SIZE); + end = ALIGN(end, PAGE_SIZE); + + return end - start; +} + +static inline unsigned long hyp_vmemmap_pages(size_t vmemmap_entry_size) +{ + unsigned long res = 0, i; + + for (i = 0; i < kvm_nvhe_sym(hyp_memblock_nr); i++) { + res += hyp_vmemmap_memblock_size(&kvm_nvhe_sym(hyp_memory)[i], + vmemmap_entry_size); + } + + return res >> PAGE_SHIFT; +} + +static inline unsigned long hyp_vm_table_pages(void) +{ + return PAGE_ALIGN(KVM_MAX_PVMS * sizeof(void *)) >> PAGE_SHIFT; +} + static inline unsigned long __hyp_pgtable_max_pages(unsigned long nr_pages) { - unsigned long total = 0, i; + unsigned long total = 0; + int i; /* Provision the worst case scenario */ - for (i = 0; i < KVM_PGTABLE_MAX_LEVELS; i++) { + for (i = KVM_PGTABLE_FIRST_LEVEL; i <= KVM_PGTABLE_LAST_LEVEL; i++) { nr_pages = DIV_ROUND_UP(nr_pages, PTRS_PER_PTE); total += nr_pages; } @@ -68,4 +135,68 @@ static inline unsigned long host_s2_pgtable_pages(void) return res; } +#ifdef CONFIG_NVHE_EL2_DEBUG +static inline unsigned long pkvm_selftest_pages(void) { return 32; } +#else +static inline unsigned long pkvm_selftest_pages(void) { return 0; } +#endif + +#define KVM_FFA_MBOX_NR_PAGES 1 + +static inline unsigned long hyp_ffa_proxy_pages(void) +{ + size_t desc_max; + + /* + * The hypervisor FFA proxy needs enough memory to buffer a fragmented + * descriptor returned from EL3 in response to a RETRIEVE_REQ call. + */ + desc_max = sizeof(struct ffa_mem_region) + + sizeof(struct ffa_mem_region_attributes) + + sizeof(struct ffa_composite_mem_region) + + SG_MAX_SEGMENTS * sizeof(struct ffa_mem_region_addr_range); + + /* Plus a page each for the hypervisor's RX and TX mailboxes. */ + return (2 * KVM_FFA_MBOX_NR_PAGES) + DIV_ROUND_UP(desc_max, PAGE_SIZE); +} + +static inline size_t pkvm_host_sve_state_size(void) +{ + if (!system_supports_sve()) + return 0; + + return size_add(sizeof(struct cpu_sve_state), + SVE_SIG_REGS_SIZE(sve_vq_from_vl(kvm_host_sve_max_vl))); +} + +struct pkvm_mapping { + struct rb_node node; + u64 gfn; + u64 pfn; + u64 nr_pages; + u64 __subtree_last; /* Internal member for interval tree */ +}; + +int pkvm_pgtable_stage2_init(struct kvm_pgtable *pgt, struct kvm_s2_mmu *mmu, + struct kvm_pgtable_mm_ops *mm_ops); +void pkvm_pgtable_stage2_destroy_range(struct kvm_pgtable *pgt, + u64 addr, u64 size); +void pkvm_pgtable_stage2_destroy_pgd(struct kvm_pgtable *pgt); +int pkvm_pgtable_stage2_map(struct kvm_pgtable *pgt, u64 addr, u64 size, u64 phys, + enum kvm_pgtable_prot prot, void *mc, + enum kvm_pgtable_walk_flags flags); +int pkvm_pgtable_stage2_unmap(struct kvm_pgtable *pgt, u64 addr, u64 size); +int pkvm_pgtable_stage2_wrprotect(struct kvm_pgtable *pgt, u64 addr, u64 size); +int pkvm_pgtable_stage2_flush(struct kvm_pgtable *pgt, u64 addr, u64 size); +bool pkvm_pgtable_stage2_test_clear_young(struct kvm_pgtable *pgt, u64 addr, u64 size, bool mkold); +int pkvm_pgtable_stage2_relax_perms(struct kvm_pgtable *pgt, u64 addr, enum kvm_pgtable_prot prot, + enum kvm_pgtable_walk_flags flags); +void pkvm_pgtable_stage2_mkyoung(struct kvm_pgtable *pgt, u64 addr, + enum kvm_pgtable_walk_flags flags); +int pkvm_pgtable_stage2_split(struct kvm_pgtable *pgt, u64 addr, u64 size, + struct kvm_mmu_memory_cache *mc); +void pkvm_pgtable_stage2_free_unlinked(struct kvm_pgtable_mm_ops *mm_ops, void *pgtable, s8 level); +kvm_pte_t *pkvm_pgtable_stage2_create_unlinked(struct kvm_pgtable *pgt, u64 phys, s8 level, + enum kvm_pgtable_prot prot, void *mc, + bool force_pte); #endif /* __ARM64_KVM_PKVM_H__ */ diff --git a/arch/arm64/include/asm/kvm_ptrauth.h b/arch/arm64/include/asm/kvm_ptrauth.h index 0cd0965255d2..6199c9f7ec6e 100644 --- a/arch/arm64/include/asm/kvm_ptrauth.h +++ b/arch/arm64/include/asm/kvm_ptrauth.h @@ -99,5 +99,26 @@ alternative_else_nop_endif .macro ptrauth_switch_to_hyp g_ctxt, h_ctxt, reg1, reg2, reg3 .endm #endif /* CONFIG_ARM64_PTR_AUTH */ + +#else /* !__ASSEMBLY */ + +#define __ptrauth_save_key(ctxt, key) \ + do { \ + u64 __val; \ + __val = read_sysreg_s(SYS_ ## key ## KEYLO_EL1); \ + ctxt_sys_reg(ctxt, key ## KEYLO_EL1) = __val; \ + __val = read_sysreg_s(SYS_ ## key ## KEYHI_EL1); \ + ctxt_sys_reg(ctxt, key ## KEYHI_EL1) = __val; \ + } while(0) + +#define ptrauth_save_keys(ctxt) \ + do { \ + __ptrauth_save_key(ctxt, APIA); \ + __ptrauth_save_key(ctxt, APIB); \ + __ptrauth_save_key(ctxt, APDA); \ + __ptrauth_save_key(ctxt, APDB); \ + __ptrauth_save_key(ctxt, APGA); \ + } while(0) + #endif /* __ASSEMBLY__ */ #endif /* __ASM_KVM_PTRAUTH_H */ diff --git a/arch/arm64/include/asm/kvm_ras.h b/arch/arm64/include/asm/kvm_ras.h deleted file mode 100644 index 87e10d9a635b..000000000000 --- a/arch/arm64/include/asm/kvm_ras.h +++ /dev/null @@ -1,25 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -/* Copyright (C) 2018 - Arm Ltd */ - -#ifndef __ARM64_KVM_RAS_H__ -#define __ARM64_KVM_RAS_H__ - -#include <linux/acpi.h> -#include <linux/errno.h> -#include <linux/types.h> - -#include <asm/acpi.h> - -/* - * Was this synchronous external abort a RAS notification? - * Returns '0' for errors handled by some RAS subsystem, or -ENOENT. - */ -static inline int kvm_handle_guest_sea(phys_addr_t addr, u64 esr) -{ - /* apei_claim_sea(NULL) expects to mask interrupts itself */ - lockdep_assert_irqs_enabled(); - - return apei_claim_sea(NULL); -} - -#endif /* __ARM64_KVM_RAS_H__ */ diff --git a/arch/arm64/include/asm/linkage.h b/arch/arm64/include/asm/linkage.h index 43f8c25b3fda..d3acd9c87509 100644 --- a/arch/arm64/include/asm/linkage.h +++ b/arch/arm64/include/asm/linkage.h @@ -5,8 +5,8 @@ #include <asm/assembler.h> #endif -#define __ALIGN .align 2 -#define __ALIGN_STR ".align 2" +#define __ALIGN .balign CONFIG_FUNCTION_ALIGNMENT +#define __ALIGN_STR ".balign " #CONFIG_FUNCTION_ALIGNMENT /* * When using in-kernel BTI we need to ensure that PCS-conformant @@ -39,4 +39,8 @@ SYM_START(name, SYM_L_WEAK, SYM_A_NONE) \ bti c ; +#define SYM_TYPED_FUNC_START(name) \ + SYM_TYPED_START(name, SYM_L_GLOBAL, SYM_A_ALIGN) \ + bti c ; + #endif diff --git a/arch/arm64/include/asm/lse.h b/arch/arm64/include/asm/lse.h index 29c85810ae69..3129a5819d0e 100644 --- a/arch/arm64/include/asm/lse.h +++ b/arch/arm64/include/asm/lse.h @@ -10,22 +10,15 @@ #include <linux/compiler_types.h> #include <linux/export.h> -#include <linux/jump_label.h> #include <linux/stringify.h> #include <asm/alternative.h> +#include <asm/alternative-macros.h> #include <asm/atomic_lse.h> #include <asm/cpucaps.h> -extern struct static_key_false cpu_hwcap_keys[ARM64_NCAPS]; - -static __always_inline bool system_uses_lse_atomics(void) -{ - return static_branch_likely(&cpu_hwcap_keys[ARM64_HAS_LSE_ATOMICS]); -} - #define __lse_ll_sc_body(op, ...) \ ({ \ - system_uses_lse_atomics() ? \ + alternative_has_cap_likely(ARM64_HAS_LSE_ATOMICS) ? \ __lse_##op(__VA_ARGS__) : \ __ll_sc_##op(__VA_ARGS__); \ }) @@ -36,8 +29,6 @@ static __always_inline bool system_uses_lse_atomics(void) #else /* CONFIG_ARM64_LSE_ATOMICS */ -static inline bool system_uses_lse_atomics(void) { return false; } - #define __lse_ll_sc_body(op, ...) __ll_sc_##op(__VA_ARGS__) #define ARM64_LSE_ATOMIC_INSN(llsc, lse) llsc diff --git a/arch/arm64/include/asm/mem_encrypt.h b/arch/arm64/include/asm/mem_encrypt.h new file mode 100644 index 000000000000..314b2b52025f --- /dev/null +++ b/arch/arm64/include/asm/mem_encrypt.h @@ -0,0 +1,37 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +#ifndef __ASM_MEM_ENCRYPT_H +#define __ASM_MEM_ENCRYPT_H + +#include <asm/rsi.h> + +struct device; + +struct arm64_mem_crypt_ops { + int (*encrypt)(unsigned long addr, int numpages); + int (*decrypt)(unsigned long addr, int numpages); +}; + +int arm64_mem_crypt_ops_register(const struct arm64_mem_crypt_ops *ops); + +int set_memory_encrypted(unsigned long addr, int numpages); +int set_memory_decrypted(unsigned long addr, int numpages); + +int realm_register_memory_enc_ops(void); + +static inline bool force_dma_unencrypted(struct device *dev) +{ + return is_realm_world(); +} + +/* + * For Arm CCA guests, canonical addresses are "encrypted", so no changes + * required for dma_addr_encrypted(). + * The unencrypted DMA buffers must be accessed via the unprotected IPA, + * "top IPA bit" set. + */ +#define dma_addr_unencrypted(x) ((x) | PROT_NS_SHARED) + +/* Clear the "top" IPA bit while converting back */ +#define dma_addr_canonical(x) ((x) & ~PROT_NS_SHARED) + +#endif /* __ASM_MEM_ENCRYPT_H */ diff --git a/arch/arm64/include/asm/memory.h b/arch/arm64/include/asm/memory.h index 227d256cd4b9..5213248e081b 100644 --- a/arch/arm64/include/asm/memory.h +++ b/arch/arm64/include/asm/memory.h @@ -30,8 +30,8 @@ * keep a constant PAGE_OFFSET and "fallback" to using the higher end * of the VMEMMAP where 52-bit support is not available in hardware. */ -#define VMEMMAP_SHIFT (PAGE_SHIFT - STRUCT_PAGE_MAX_SHIFT) -#define VMEMMAP_SIZE ((_PAGE_END(VA_BITS_MIN) - PAGE_OFFSET) >> VMEMMAP_SHIFT) +#define VMEMMAP_RANGE (_PAGE_END(VA_BITS_MIN) - PAGE_OFFSET) +#define VMEMMAP_SIZE ((VMEMMAP_RANGE >> PAGE_SHIFT) * sizeof(struct page)) /* * PAGE_OFFSET - the virtual address of the start of the linear map, at the @@ -46,15 +46,19 @@ #define KIMAGE_VADDR (MODULES_END) #define MODULES_END (MODULES_VADDR + MODULES_VSIZE) #define MODULES_VADDR (_PAGE_END(VA_BITS_MIN)) -#define MODULES_VSIZE (SZ_128M) -#define VMEMMAP_START (-(UL(1) << (VA_BITS - VMEMMAP_SHIFT))) -#define VMEMMAP_END (VMEMMAP_START + VMEMMAP_SIZE) -#define PCI_IO_END (VMEMMAP_START - SZ_8M) -#define PCI_IO_START (PCI_IO_END - PCI_IO_SIZE) -#define FIXADDR_TOP (VMEMMAP_START - SZ_32M) +#define MODULES_VSIZE (SZ_2G) +#define VMEMMAP_START (VMEMMAP_END - VMEMMAP_SIZE) +#define VMEMMAP_END (-UL(SZ_1G)) +#define PCI_IO_START (VMEMMAP_END + SZ_8M) +#define PCI_IO_END (PCI_IO_START + PCI_IO_SIZE) +#define FIXADDR_TOP (-UL(SZ_8M)) #if VA_BITS > 48 +#ifdef CONFIG_ARM64_16K_PAGES +#define VA_BITS_MIN (47) +#else #define VA_BITS_MIN (48) +#endif #else #define VA_BITS_MIN (VA_BITS) #endif @@ -65,28 +69,56 @@ #define KERNEL_END _end /* - * Generic and tag-based KASAN require 1/8th and 1/16th of the kernel virtual - * address space for the shadow region respectively. They can bloat the stack - * significantly, so double the (minimum) stack size when they are in use. + * Generic and Software Tag-Based KASAN modes require 1/8th and 1/16th of the + * kernel virtual address space for storing the shadow memory respectively. + * + * The mapping between a virtual memory address and its corresponding shadow + * memory address is defined based on the formula: + * + * shadow_addr = (addr >> KASAN_SHADOW_SCALE_SHIFT) + KASAN_SHADOW_OFFSET + * + * where KASAN_SHADOW_SCALE_SHIFT is the order of the number of bits that map + * to a single shadow byte and KASAN_SHADOW_OFFSET is a constant that offsets + * the mapping. Note that KASAN_SHADOW_OFFSET does not point to the start of + * the shadow memory region. + * + * Based on this mapping, we define two constants: + * + * KASAN_SHADOW_START: the start of the shadow memory region; + * KASAN_SHADOW_END: the end of the shadow memory region. + * + * KASAN_SHADOW_END is defined first as the shadow address that corresponds to + * the upper bound of possible virtual kernel memory addresses UL(1) << 64 + * according to the mapping formula. + * + * KASAN_SHADOW_START is defined second based on KASAN_SHADOW_END. The shadow + * memory start must map to the lowest possible kernel virtual memory address + * and thus it depends on the actual bitness of the address space. + * + * As KASAN inserts redzones between stack variables, this increases the stack + * memory usage significantly. Thus, we double the (minimum) stack size. */ #if defined(CONFIG_KASAN_GENERIC) || defined(CONFIG_KASAN_SW_TAGS) #define KASAN_SHADOW_OFFSET _AC(CONFIG_KASAN_SHADOW_OFFSET, UL) -#define KASAN_SHADOW_END ((UL(1) << (64 - KASAN_SHADOW_SCALE_SHIFT)) \ - + KASAN_SHADOW_OFFSET) -#define PAGE_END (KASAN_SHADOW_END - (1UL << (vabits_actual - KASAN_SHADOW_SCALE_SHIFT))) +#define KASAN_SHADOW_END ((UL(1) << (64 - KASAN_SHADOW_SCALE_SHIFT)) + KASAN_SHADOW_OFFSET) +#define _KASAN_SHADOW_START(va) (KASAN_SHADOW_END - (UL(1) << ((va) - KASAN_SHADOW_SCALE_SHIFT))) +#define KASAN_SHADOW_START _KASAN_SHADOW_START(vabits_actual) +#define PAGE_END KASAN_SHADOW_START #define KASAN_THREAD_SHIFT 1 #else #define KASAN_THREAD_SHIFT 0 #define PAGE_END (_PAGE_END(VA_BITS_MIN)) #endif /* CONFIG_KASAN */ +#define DIRECT_MAP_PHYSMEM_END __pa(PAGE_END - 1) + #define MIN_THREAD_SHIFT (14 + KASAN_THREAD_SHIFT) /* * VMAP'd stacks are allocated at page granularity, so we must ensure that such * stacks are a multiple of page size. */ -#if defined(CONFIG_VMAP_STACK) && (MIN_THREAD_SHIFT < PAGE_SHIFT) +#if (MIN_THREAD_SHIFT < PAGE_SHIFT) #define THREAD_SHIFT PAGE_SHIFT #else #define THREAD_SHIFT MIN_THREAD_SHIFT @@ -103,16 +135,23 @@ * checking sp & (1 << THREAD_SHIFT), which we can do cheaply in the entry * assembly. */ -#ifdef CONFIG_VMAP_STACK #define THREAD_ALIGN (2 * THREAD_SIZE) -#else -#define THREAD_ALIGN THREAD_SIZE -#endif #define IRQ_STACK_SIZE THREAD_SIZE #define OVERFLOW_STACK_SIZE SZ_4K +#define NVHE_STACK_SHIFT PAGE_SHIFT +#define NVHE_STACK_SIZE (UL(1) << NVHE_STACK_SHIFT) + +/* + * With the minimum frame size of [x29, x30], exactly half the combined + * sizes of the hyp and overflow stacks is the maximum size needed to + * save the unwinded stacktrace; plus an additional entry to delimit the + * end. + */ +#define NVHE_STACKTRACE_SIZE ((OVERFLOW_STACK_SIZE + NVHE_STACK_SIZE) / 2 + sizeof(long)) + /* * Alignment of kernel segments (e.g. .text, .data). * @@ -139,6 +178,7 @@ * Memory types for Stage-2 translation */ #define MT_S2_NORMAL 0xf +#define MT_S2_NORMAL_NC 0x5 #define MT_S2_DEVICE_nGnRE 0x1 /* @@ -146,6 +186,7 @@ * Stage-2 enforces Normal-WB and Device-nGnRE */ #define MT_S2_FWB_NORMAL 6 +#define MT_S2_FWB_NORMAL_NC 5 #define MT_S2_FWB_DEVICE_nGnRE 1 #ifdef CONFIG_ARM64_4K_PAGES @@ -172,10 +213,23 @@ #include <linux/compiler.h> #include <linux/mmdebug.h> #include <linux/types.h> +#include <asm/boot.h> #include <asm/bug.h> +#include <asm/sections.h> +#include <asm/sysreg.h> + +static inline u64 __pure read_tcr(void) +{ + u64 tcr; + + // read_sysreg() uses asm volatile, so avoid it here + asm("mrs %0, tcr_el1" : "=r"(tcr)); + return tcr; +} #if VA_BITS > 48 -extern u64 vabits_actual; +// For reasons of #include hell, we can't use TCR_T1SZ_OFFSET/TCR_T1SZ_MASK here +#define vabits_actual (64 - ((read_tcr() >> 16) & 63)) #else #define vabits_actual ((u64)VA_BITS) #endif @@ -184,17 +238,26 @@ extern s64 memstart_addr; /* PHYS_OFFSET - the physical address of the start of memory. */ #define PHYS_OFFSET ({ VM_BUG_ON(memstart_addr & 1); memstart_addr; }) -/* the virtual base of the kernel image */ -extern u64 kimage_vaddr; - /* the offset between the kernel virtual and physical mappings */ extern u64 kimage_voffset; static inline unsigned long kaslr_offset(void) { - return kimage_vaddr - KIMAGE_VADDR; + return (u64)&_text - KIMAGE_VADDR; } +#ifdef CONFIG_RANDOMIZE_BASE +void kaslr_init(void); +static inline bool kaslr_enabled(void) +{ + extern bool __kaslr_is_enabled; + return __kaslr_is_enabled; +} +#else +static inline void kaslr_init(void) { } +static inline bool kaslr_enabled(void) { return false; } +#endif + /* * Allow all memory at the discovery stage. We will clip it later. */ @@ -242,9 +305,11 @@ static inline const void *__tag_set(const void *addr, u8 tag) } #ifdef CONFIG_KASAN_HW_TAGS -#define arch_enable_tagging_sync() mte_enable_kernel_sync() -#define arch_enable_tagging_async() mte_enable_kernel_async() -#define arch_enable_tagging_asymm() mte_enable_kernel_asymm() +#define arch_enable_tag_checks_sync() mte_enable_kernel_sync() +#define arch_enable_tag_checks_async() mte_enable_kernel_async() +#define arch_enable_tag_checks_asymm() mte_enable_kernel_asymm() +#define arch_suppress_tag_checks_start() mte_enable_tco() +#define arch_suppress_tag_checks_stop() mte_disable_tco() #define arch_force_async_tag_fault() mte_check_tfsr_exit() #define arch_get_random_tag() mte_get_random_tag() #define arch_get_mem_tag(addr) mte_get_mem_tag(addr) @@ -288,12 +353,6 @@ extern phys_addr_t __phys_addr_symbol(unsigned long x); #define __phys_to_kimg(x) ((unsigned long)((x) + kimage_voffset)) /* - * Convert a page to/from a physical address - */ -#define page_to_phys(page) (__pfn_to_phys(page_to_pfn(page))) -#define phys_to_page(phys) (pfn_to_page(__phys_to_pfn(phys))) - -/* * Note: Drivers should NOT use these. They are the wrong * translation for translating DMA addresses. Use the driver * DMA support - see dma-mapping.h. @@ -310,6 +369,14 @@ static inline void *phys_to_virt(phys_addr_t x) return (void *)(__phys_to_virt(x)); } +/* Needed already here for resolving __phys_to_pfn() in virt_to_pfn() */ +#include <asm-generic/memory_model.h> + +static inline unsigned long virt_to_pfn(const void *kaddr) +{ + return __phys_to_pfn(virt_to_phys(kaddr)); +} + /* * Drivers should NOT use these either. */ @@ -318,7 +385,6 @@ static inline void *phys_to_virt(phys_addr_t x) #define __pa_nodebug(x) __virt_to_phys_nodebug((unsigned long)(x)) #define __va(x) ((void *)__phys_to_virt((phys_addr_t)(x))) #define pfn_to_kaddr(pfn) __va((pfn) << PAGE_SHIFT) -#define virt_to_pfn(x) __phys_to_pfn(__virt_to_phys((unsigned long)(x))) #define sym_to_pfn(x) __phys_to_pfn(__pa_symbol(x)) /* @@ -355,11 +421,6 @@ static inline void *phys_to_virt(phys_addr_t x) }) void dump_mem_limit(void); - -static inline bool defer_reserve_crashkernel(void) -{ - return IS_ENABLED(CONFIG_ZONE_DMA) || IS_ENABLED(CONFIG_ZONE_DMA32); -} #endif /* !ASSEMBLY */ /* @@ -373,6 +434,14 @@ static inline bool defer_reserve_crashkernel(void) # define INIT_MEMBLOCK_RESERVED_REGIONS (INIT_MEMBLOCK_REGIONS + NR_CPUS + 1) #endif -#include <asm-generic/memory_model.h> +/* + * memory regions which marked with flag MEMBLOCK_NOMAP(for example, the memory + * of the EFI_UNUSABLE_MEMORY type) may divide a continuous memory block into + * multiple parts. As a result, the number of memory regions is large. + */ +#ifdef CONFIG_EFI +#define INIT_MEMBLOCK_MEMORY_REGIONS (INIT_MEMBLOCK_REGIONS * 8) +#endif + #endif /* __ASM_MEMORY_H */ diff --git a/arch/arm64/include/asm/mman.h b/arch/arm64/include/asm/mman.h index 5966ee4a6154..8770c7ee759f 100644 --- a/arch/arm64/include/asm/mman.h +++ b/arch/arm64/include/asm/mman.h @@ -2,14 +2,19 @@ #ifndef __ASM_MMAN_H__ #define __ASM_MMAN_H__ +#include <uapi/asm/mman.h> + +#ifndef BUILD_VDSO #include <linux/compiler.h> +#include <linux/fs.h> +#include <linux/hugetlb.h> +#include <linux/shmem_fs.h> #include <linux/types.h> -#include <uapi/asm/mman.h> -static inline unsigned long arch_calc_vm_prot_bits(unsigned long prot, - unsigned long pkey __always_unused) +static inline vm_flags_t arch_calc_vm_prot_bits(unsigned long prot, + unsigned long pkey) { - unsigned long ret = 0; + vm_flags_t ret = 0; if (system_supports_bti() && (prot & PROT_BTI)) ret |= VM_ARM64_BTI; @@ -17,23 +22,36 @@ static inline unsigned long arch_calc_vm_prot_bits(unsigned long prot, if (system_supports_mte() && (prot & PROT_MTE)) ret |= VM_MTE; +#ifdef CONFIG_ARCH_HAS_PKEYS + if (system_supports_poe()) { + ret |= pkey & BIT(0) ? VM_PKEY_BIT0 : 0; + ret |= pkey & BIT(1) ? VM_PKEY_BIT1 : 0; + ret |= pkey & BIT(2) ? VM_PKEY_BIT2 : 0; + } +#endif + return ret; } #define arch_calc_vm_prot_bits(prot, pkey) arch_calc_vm_prot_bits(prot, pkey) -static inline unsigned long arch_calc_vm_flag_bits(unsigned long flags) +static inline vm_flags_t arch_calc_vm_flag_bits(struct file *file, + unsigned long flags) { /* * Only allow MTE on anonymous mappings as these are guaranteed to be * backed by tags-capable memory. The vm_flags may be overridden by a * filesystem supporting MTE (RAM-based). */ - if (system_supports_mte() && (flags & MAP_ANONYMOUS)) - return VM_MTE_ALLOWED; + if (system_supports_mte()) { + if (flags & (MAP_ANONYMOUS | MAP_HUGETLB)) + return VM_MTE_ALLOWED; + if (shmem_file(file) || is_file_hugepages(file)) + return VM_MTE_ALLOWED; + } return 0; } -#define arch_calc_vm_flag_bits(flags) arch_calc_vm_flag_bits(flags) +#define arch_calc_vm_flag_bits(file, flags) arch_calc_vm_flag_bits(file, flags) static inline bool arch_validate_prot(unsigned long prot, unsigned long addr __always_unused) @@ -50,14 +68,31 @@ static inline bool arch_validate_prot(unsigned long prot, } #define arch_validate_prot(prot, addr) arch_validate_prot(prot, addr) -static inline bool arch_validate_flags(unsigned long vm_flags) +static inline bool arch_validate_flags(vm_flags_t vm_flags) { - if (!system_supports_mte()) - return true; + if (system_supports_mte()) { + /* + * only allow VM_MTE if VM_MTE_ALLOWED has been set + * previously + */ + if ((vm_flags & VM_MTE) && !(vm_flags & VM_MTE_ALLOWED)) + return false; + } + + if (system_supports_gcs() && (vm_flags & VM_SHADOW_STACK)) { + /* An executable GCS isn't a good idea. */ + if (vm_flags & VM_EXEC) + return false; + + /* The memory management core should prevent this */ + VM_WARN_ON(vm_flags & VM_SHARED); + } + + return true; - /* only allow VM_MTE if VM_MTE_ALLOWED has been set previously */ - return !(vm_flags & VM_MTE) || (vm_flags & VM_MTE_ALLOWED); } #define arch_validate_flags(vm_flags) arch_validate_flags(vm_flags) +#endif /* !BUILD_VDSO */ + #endif /* ! __ASM_MMAN_H__ */ diff --git a/arch/arm64/include/asm/mmu.h b/arch/arm64/include/asm/mmu.h index 48f8466a4be9..49f1a810df16 100644 --- a/arch/arm64/include/asm/mmu.h +++ b/arch/arm64/include/asm/mmu.h @@ -17,6 +17,13 @@ #include <linux/refcount.h> #include <asm/cpufeature.h> +enum pgtable_type { + TABLE_PTE, + TABLE_PMD, + TABLE_PUD, + TABLE_P4D, +}; + typedef struct { atomic64_t id; #ifdef CONFIG_COMPAT @@ -25,6 +32,7 @@ typedef struct { refcount_t pinned; void *vdso; unsigned long flags; + u8 pkey_allocation_map; } mm_context_t; /* @@ -57,23 +65,44 @@ typedef struct { static inline bool arm64_kernel_unmapped_at_el0(void) { - return cpus_have_const_cap(ARM64_UNMAP_KERNEL_AT_EL0); + return alternative_has_cap_unlikely(ARM64_UNMAP_KERNEL_AT_EL0); } extern void arm64_memblock_init(void); extern void paging_init(void); extern void bootmem_init(void); -extern void __iomem *early_io_map(phys_addr_t phys, unsigned long virt); -extern void init_mem_pgprot(void); +extern void create_mapping_noalloc(phys_addr_t phys, unsigned long virt, + phys_addr_t size, pgprot_t prot); extern void create_pgd_mapping(struct mm_struct *mm, phys_addr_t phys, unsigned long virt, phys_addr_t size, pgprot_t prot, bool page_mappings_only); extern void *fixmap_remap_fdt(phys_addr_t dt_phys, int *size, pgprot_t prot); extern void mark_linear_text_alias_ro(void); -extern bool kaslr_requires_kpti(void); -#define INIT_MM_CONTEXT(name) \ - .pgd = init_pg_dir, +/* + * This check is triggered during the early boot before the cpufeature + * is initialised. Checking the status on the local CPU allows the boot + * CPU to detect the need for non-global mappings and thus avoiding a + * pagetable re-write after all the CPUs are booted. This check will be + * anyway run on individual CPUs, allowing us to get the consistent + * state once the SMP CPUs are up and thus make the switch to non-global + * mappings if required. + */ +static inline bool kaslr_requires_kpti(void) +{ + /* + * E0PD does a similar job to KPTI so can be used instead + * where available. + */ + if (IS_ENABLED(CONFIG_ARM64_E0PD)) { + u64 mmfr2 = read_sysreg_s(SYS_ID_AA64MMFR2_EL1); + if (cpuid_feature_extract_unsigned_field(mmfr2, + ID_AA64MMFR2_EL1_E0PD_SHIFT)) + return false; + } + + return true; +} #endif /* !__ASSEMBLY__ */ #endif diff --git a/arch/arm64/include/asm/mmu_context.h b/arch/arm64/include/asm/mmu_context.h index c7ccd82db1d2..0dbe3b29049b 100644 --- a/arch/arm64/include/asm/mmu_context.h +++ b/arch/arm64/include/asm/mmu_context.h @@ -15,11 +15,13 @@ #include <linux/sched/hotplug.h> #include <linux/mm_types.h> #include <linux/pgtable.h> +#include <linux/pkeys.h> #include <asm/cacheflush.h> #include <asm/cpufeature.h> +#include <asm/daifflags.h> +#include <asm/gcs.h> #include <asm/proc-fns.h> -#include <asm-generic/mm_hooks.h> #include <asm/cputype.h> #include <asm/sysreg.h> #include <asm/tlbflush.h> @@ -38,11 +40,16 @@ static inline void contextidr_thread_switch(struct task_struct *next) /* * Set TTBR0 to reserved_pg_dir. No translations will be possible via TTBR0. */ -static inline void cpu_set_reserved_ttbr0(void) +static inline void cpu_set_reserved_ttbr0_nosync(void) { unsigned long ttbr = phys_to_ttbr(__pa_symbol(reserved_pg_dir)); write_sysreg(ttbr, ttbr0_el1); +} + +static inline void cpu_set_reserved_ttbr0(void) +{ + cpu_set_reserved_ttbr0_nosync(); isb(); } @@ -51,16 +58,13 @@ void cpu_do_switch_mm(phys_addr_t pgd_phys, struct mm_struct *mm); static inline void cpu_switch_mm(pgd_t *pgd, struct mm_struct *mm) { BUG_ON(pgd == swapper_pg_dir); - cpu_set_reserved_ttbr0(); cpu_do_switch_mm(virt_to_phys(pgd),mm); } /* - * TCR.T0SZ value to use when the ID map is active. Usually equals - * TCR_T0SZ(VA_BITS), unless system RAM is positioned very high in - * physical memory, in which case it will be smaller. + * TCR.T0SZ value to use when the ID map is active. */ -extern int idmap_t0sz; +#define idmap_t0sz TCR_T0SZ(IDMAP_VA_BITS) /* * Ensure TCR.T0SZ is set to the provided value. @@ -69,11 +73,11 @@ static inline void __cpu_set_tcr_t0sz(unsigned long t0sz) { unsigned long tcr = read_sysreg(tcr_el1); - if ((tcr & TCR_T0SZ_MASK) >> TCR_T0SZ_OFFSET == t0sz) + if ((tcr & TCR_T0SZ_MASK) == t0sz) return; tcr &= ~TCR_T0SZ_MASK; - tcr |= t0sz << TCR_T0SZ_OFFSET; + tcr |= t0sz; write_sysreg(tcr, tcr_el1); isb(); } @@ -105,18 +109,13 @@ static inline void cpu_uninstall_idmap(void) cpu_switch_mm(mm->pgd, mm); } -static inline void __cpu_install_idmap(pgd_t *idmap) +static inline void cpu_install_idmap(void) { cpu_set_reserved_ttbr0(); local_flush_tlb_all(); cpu_set_idmap_tcr_t0sz(); - cpu_switch_mm(lm_alias(idmap), &init_mm); -} - -static inline void cpu_install_idmap(void) -{ - __cpu_install_idmap(idmap_pg_dir); + cpu_switch_mm(lm_alias(idmap_pg_dir), &init_mm); } /* @@ -143,36 +142,21 @@ static inline void cpu_install_ttbr0(phys_addr_t ttbr0, unsigned long t0sz) isb(); } -/* - * Atomically replaces the active TTBR1_EL1 PGD with a new VA-compatible PGD, - * avoiding the possibility of conflicting TLB entries being allocated. - */ -static inline void __nocfi cpu_replace_ttbr1(pgd_t *pgdp, pgd_t *idmap) -{ - typedef void (ttbr_replace_func)(phys_addr_t); - extern ttbr_replace_func idmap_cpu_replace_ttbr1; - ttbr_replace_func *replace_phys; - - /* phys_to_ttbr() zeros lower 2 bits of ttbr with 52-bit PA */ - phys_addr_t ttbr1 = phys_to_ttbr(virt_to_phys(pgdp)); - - if (system_supports_cnp() && !WARN_ON(pgdp != lm_alias(swapper_pg_dir))) { - /* - * cpu_replace_ttbr1() is used when there's a boot CPU - * up (i.e. cpufeature framework is not up yet) and - * latter only when we enable CNP via cpufeature's - * enable() callback. - * Also we rely on the cpu_hwcap bit being set before - * calling the enable() function. - */ - ttbr1 |= TTBR_CNP_BIT; - } +void __cpu_replace_ttbr1(pgd_t *pgdp, bool cnp); - replace_phys = (void *)__pa_symbol(function_nocfi(idmap_cpu_replace_ttbr1)); +static inline void cpu_enable_swapper_cnp(void) +{ + __cpu_replace_ttbr1(lm_alias(swapper_pg_dir), true); +} - __cpu_install_idmap(idmap); - replace_phys(ttbr1); - cpu_uninstall_idmap(); +static inline void cpu_replace_ttbr1(pgd_t *pgdp) +{ + /* + * Only for early TTBR1 replacement before cpucaps are finalized and + * before we've decided whether to use CNP. + */ + WARN_ON(system_capabilities_finalized()); + __cpu_replace_ttbr1(pgdp, false); } /* @@ -192,9 +176,36 @@ init_new_context(struct task_struct *tsk, struct mm_struct *mm) { atomic64_set(&mm->context.id, 0); refcount_set(&mm->context.pinned, 0); + + /* pkey 0 is the default, so always reserve it. */ + mm->context.pkey_allocation_map = BIT(0); + return 0; } +static inline void arch_dup_pkeys(struct mm_struct *oldmm, + struct mm_struct *mm) +{ + /* Duplicate the oldmm pkey state in mm: */ + mm->context.pkey_allocation_map = oldmm->context.pkey_allocation_map; +} + +static inline int arch_dup_mmap(struct mm_struct *oldmm, struct mm_struct *mm) +{ + arch_dup_pkeys(oldmm, mm); + + return 0; +} + +static inline void arch_exit_mmap(struct mm_struct *mm) +{ +} + +static inline void arch_unmap(struct mm_struct *mm, + unsigned long start, unsigned long end) +{ +} + #ifdef CONFIG_ARM64_SW_TTBR0_PAN static inline void update_saved_ttbr0(struct task_struct *tsk, struct mm_struct *mm) @@ -260,24 +271,63 @@ switch_mm(struct mm_struct *prev, struct mm_struct *next, } static inline const struct cpumask * -task_cpu_possible_mask(struct task_struct *p) +__task_cpu_possible_mask(struct task_struct *p, const struct cpumask *mask) { if (!static_branch_unlikely(&arm64_mismatched_32bit_el0)) - return cpu_possible_mask; + return mask; if (!is_compat_thread(task_thread_info(p))) - return cpu_possible_mask; + return mask; return system_32bit_el0_cpumask(); } + +static inline const struct cpumask * +task_cpu_possible_mask(struct task_struct *p) +{ + return __task_cpu_possible_mask(p, cpu_possible_mask); +} #define task_cpu_possible_mask task_cpu_possible_mask +const struct cpumask *task_cpu_fallback_mask(struct task_struct *p); + void verify_cpu_asid_bits(void); void post_ttbr_update_workaround(void); unsigned long arm64_mm_context_get(struct mm_struct *mm); void arm64_mm_context_put(struct mm_struct *mm); +#define mm_untag_mask mm_untag_mask +static inline unsigned long mm_untag_mask(struct mm_struct *mm) +{ + return -1UL >> 8; +} + +/* + * Only enforce protection keys on the current process, because there is no + * user context to access POR_EL0 for another address space. + */ +static inline bool arch_vma_access_permitted(struct vm_area_struct *vma, + bool write, bool execute, bool foreign) +{ + if (!system_supports_poe()) + return true; + + /* allow access if the VMA is not one from this process */ + if (foreign || vma_is_foreign(vma)) + return true; + + return por_el0_allows_pkey(vma_pkey(vma), write, execute); +} + +#define deactivate_mm deactivate_mm +static inline void deactivate_mm(struct task_struct *tsk, + struct mm_struct *mm) +{ + gcs_free(tsk); +} + + #include <asm-generic/mmu_context.h> #endif /* !__ASSEMBLY__ */ diff --git a/arch/arm64/include/asm/mmzone.h b/arch/arm64/include/asm/mmzone.h deleted file mode 100644 index fa17e01d9ab2..000000000000 --- a/arch/arm64/include/asm/mmzone.h +++ /dev/null @@ -1,13 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -#ifndef __ASM_MMZONE_H -#define __ASM_MMZONE_H - -#ifdef CONFIG_NUMA - -#include <asm/numa.h> - -extern struct pglist_data *node_data[]; -#define NODE_DATA(nid) (node_data[(nid)]) - -#endif /* CONFIG_NUMA */ -#endif /* __ASM_MMZONE_H */ diff --git a/arch/arm64/include/asm/module.h b/arch/arm64/include/asm/module.h index 4e7fa2623896..79550b22ba19 100644 --- a/arch/arm64/include/asm/module.h +++ b/arch/arm64/include/asm/module.h @@ -7,7 +7,6 @@ #include <asm-generic/module.h> -#ifdef CONFIG_ARM64_MODULE_PLTS struct mod_plt_sec { int plt_shndx; int plt_num_entries; @@ -21,7 +20,6 @@ struct mod_arch_specific { /* for CONFIG_DYNAMIC_FTRACE */ struct plt_entry *ftrace_trampolines; }; -#endif u64 module_emit_plt_entry(struct module *mod, Elf64_Shdr *sechdrs, void *loc, const Elf64_Rela *rela, @@ -30,12 +28,6 @@ u64 module_emit_plt_entry(struct module *mod, Elf64_Shdr *sechdrs, u64 module_emit_veneer_for_adrp(struct module *mod, Elf64_Shdr *sechdrs, void *loc, u64 val); -#ifdef CONFIG_RANDOMIZE_BASE -extern u64 module_alloc_base; -#else -#define module_alloc_base ((u64)_etext - MODULES_VSIZE) -#endif - struct plt_entry { /* * A program that conforms to the AArch64 Procedure Call Standard @@ -52,17 +44,25 @@ struct plt_entry { static inline bool is_forbidden_offset_for_adrp(void *place) { - return IS_ENABLED(CONFIG_ARM64_ERRATUM_843419) && - cpus_have_const_cap(ARM64_WORKAROUND_843419) && + return cpus_have_final_cap(ARM64_WORKAROUND_843419) && ((u64)place & 0xfff) >= 0xff8; } struct plt_entry get_plt_entry(u64 dst, void *pc); -bool plt_entries_equal(const struct plt_entry *a, const struct plt_entry *b); -static inline bool plt_entry_is_initialized(const struct plt_entry *e) +static inline const Elf_Shdr *find_section(const Elf_Ehdr *hdr, + const Elf_Shdr *sechdrs, + const char *name) { - return e->adrp || e->add || e->br; + const Elf_Shdr *s, *se; + const char *secstrs = (void *)hdr + sechdrs[hdr->e_shstrndx].sh_offset; + + for (s = sechdrs, se = sechdrs + hdr->e_shnum; s < se; s++) { + if (strcmp(name, secstrs + s->sh_name) == 0) + return s; + } + + return NULL; } #endif /* __ASM_MODULE_H */ diff --git a/arch/arm64/include/asm/module.lds.h b/arch/arm64/include/asm/module.lds.h index 094701ec5500..b9ae8349e35d 100644 --- a/arch/arm64/include/asm/module.lds.h +++ b/arch/arm64/include/asm/module.lds.h @@ -1,9 +1,7 @@ SECTIONS { -#ifdef CONFIG_ARM64_MODULE_PLTS .plt 0 : { BYTE(0) } .init.plt 0 : { BYTE(0) } .text.ftrace_trampoline 0 : { BYTE(0) } -#endif #ifdef CONFIG_KASAN_SW_TAGS /* @@ -17,4 +15,12 @@ SECTIONS { */ .text.hot : { *(.text.hot) } #endif + +#ifdef CONFIG_UNWIND_TABLES + /* + * Currently, we only use unwind info at module load time, so we can + * put it into the .init allocation. + */ + .init.eh_frame : { *(.eh_frame) } +#endif } diff --git a/arch/arm64/include/asm/mshyperv.h b/arch/arm64/include/asm/mshyperv.h index 20070a847304..b721d3134ab6 100644 --- a/arch/arm64/include/asm/mshyperv.h +++ b/arch/arm64/include/asm/mshyperv.h @@ -6,9 +6,8 @@ * the ARM64 architecture. See include/asm-generic/mshyperv.h for * definitions are that architecture independent. * - * Definitions that are specified in the Hyper-V Top Level Functional - * Spec (TLFS) should not go in this file, but should instead go in - * hyperv-tlfs.h. + * Definitions that are derived from Hyper-V code or headers should not go in + * this file, but should instead go in the relevant files in include/hyperv. * * Copyright (C) 2021, Microsoft, Inc. * @@ -20,7 +19,7 @@ #include <linux/types.h> #include <linux/arm-smccc.h> -#include <asm/hyperv-tlfs.h> +#include <hyperv/hvhdk.h> /* * Declare calls to get and set Hyper-V VP register values on ARM64, which @@ -31,16 +30,29 @@ void hv_set_vpreg(u32 reg, u64 value); u64 hv_get_vpreg(u32 reg); void hv_get_vpreg_128(u32 reg, struct hv_get_vp_registers_output *result); -static inline void hv_set_register(unsigned int reg, u64 value) +static inline void hv_set_msr(unsigned int reg, u64 value) { hv_set_vpreg(reg, value); } -static inline u64 hv_get_register(unsigned int reg) +static inline u64 hv_get_msr(unsigned int reg) { return hv_get_vpreg(reg); } +/* + * Nested is not supported on arm64 + */ +static inline void hv_set_non_nested_msr(unsigned int reg, u64 value) +{ + hv_set_msr(reg, value); +} + +static inline u64 hv_get_non_nested_msr(unsigned int reg) +{ + return hv_get_msr(reg); +} + /* SMCCC hypercall parameters */ #define HV_SMCCC_FUNC_NUMBER 1 #define HV_FUNC_ID ARM_SMCCC_CALL_VAL( \ diff --git a/arch/arm64/include/asm/mte-kasan.h b/arch/arm64/include/asm/mte-kasan.h index 9f79425fc65a..2e98028c1965 100644 --- a/arch/arm64/include/asm/mte-kasan.h +++ b/arch/arm64/include/asm/mte-kasan.h @@ -13,9 +13,74 @@ #include <linux/types.h> +#ifdef CONFIG_KASAN_HW_TAGS + +/* Whether the MTE asynchronous mode is enabled. */ +DECLARE_STATIC_KEY_FALSE(mte_async_or_asymm_mode); + +static inline bool system_uses_mte_async_or_asymm_mode(void) +{ + return static_branch_unlikely(&mte_async_or_asymm_mode); +} + +#else /* CONFIG_KASAN_HW_TAGS */ + +static inline bool system_uses_mte_async_or_asymm_mode(void) +{ + return false; +} + +#endif /* CONFIG_KASAN_HW_TAGS */ + #ifdef CONFIG_ARM64_MTE /* + * The Tag Check Flag (TCF) mode for MTE is per EL, hence TCF0 + * affects EL0 and TCF affects EL1 irrespective of which TTBR is + * used. + * The kernel accesses TTBR0 usually with LDTR/STTR instructions + * when UAO is available, so these would act as EL0 accesses using + * TCF0. + * However futex.h code uses exclusives which would be executed as + * EL1, this can potentially cause a tag check fault even if the + * user disables TCF0. + * + * To address the problem we set the PSTATE.TCO bit in uaccess_enable() + * and reset it in uaccess_disable(). + * + * The Tag check override (TCO) bit disables temporarily the tag checking + * preventing the issue. + */ +static inline void mte_disable_tco(void) +{ + asm volatile(ALTERNATIVE("nop", SET_PSTATE_TCO(0), + ARM64_MTE, CONFIG_KASAN_HW_TAGS)); +} + +static inline void mte_enable_tco(void) +{ + asm volatile(ALTERNATIVE("nop", SET_PSTATE_TCO(1), + ARM64_MTE, CONFIG_KASAN_HW_TAGS)); +} + +/* + * These functions disable tag checking only if in MTE async mode + * since the sync mode generates exceptions synchronously and the + * nofault or load_unaligned_zeropad can handle them. + */ +static inline void __mte_disable_tco_async(void) +{ + if (system_uses_mte_async_or_asymm_mode()) + mte_disable_tco(); +} + +static inline void __mte_enable_tco_async(void) +{ + if (system_uses_mte_async_or_asymm_mode()) + mte_enable_tco(); +} + +/* * These functions are meant to be only used from KASAN runtime through * the arch_*() interface defined in asm/memory.h. * These functions don't include system_supports_mte() checks, @@ -138,6 +203,22 @@ void mte_enable_kernel_asymm(void); #else /* CONFIG_ARM64_MTE */ +static inline void mte_disable_tco(void) +{ +} + +static inline void mte_enable_tco(void) +{ +} + +static inline void __mte_disable_tco_async(void) +{ +} + +static inline void __mte_enable_tco_async(void) +{ +} + static inline u8 mte_get_ptr_tag(void *ptr) { return 0xFF; diff --git a/arch/arm64/include/asm/mte.h b/arch/arm64/include/asm/mte.h index aa523591a44e..6567df8ec8ca 100644 --- a/arch/arm64/include/asm/mte.h +++ b/arch/arm64/include/asm/mte.h @@ -25,7 +25,7 @@ unsigned long mte_copy_tags_to_user(void __user *to, void *from, unsigned long n); int mte_save_tags(struct page *page); void mte_save_page_tags(const void *page_addr, void *tag_storage); -bool mte_restore_tags(swp_entry_t entry, struct page *page); +void mte_restore_tags(swp_entry_t entry, struct page *page); void mte_restore_page_tags(void *page_addr, const void *tag_storage); void mte_invalidate_tags(int type, pgoff_t offset); void mte_invalidate_tags_area(int type); @@ -36,13 +36,73 @@ void mte_free_tag_storage(char *storage); /* track which pages have valid allocation tags */ #define PG_mte_tagged PG_arch_2 +/* simple lock to avoid multiple threads tagging the same page */ +#define PG_mte_lock PG_arch_3 + +static inline void set_page_mte_tagged(struct page *page) +{ + VM_WARN_ON_ONCE(folio_test_hugetlb(page_folio(page))); + + /* + * Ensure that the tags written prior to this function are visible + * before the page flags update. + */ + smp_wmb(); + set_bit(PG_mte_tagged, &page->flags); +} + +static inline bool page_mte_tagged(struct page *page) +{ + bool ret = test_bit(PG_mte_tagged, &page->flags); + + VM_WARN_ON_ONCE(folio_test_hugetlb(page_folio(page))); + + /* + * If the page is tagged, ensure ordering with a likely subsequent + * read of the tags. + */ + if (ret) + smp_rmb(); + return ret; +} + +/* + * Lock the page for tagging and return 'true' if the page can be tagged, + * 'false' if already tagged. PG_mte_tagged is never cleared and therefore the + * locking only happens once for page initialisation. + * + * The page MTE lock state: + * + * Locked: PG_mte_lock && !PG_mte_tagged + * Unlocked: !PG_mte_lock || PG_mte_tagged + * + * Acquire semantics only if the page is tagged (returning 'false'). + */ +static inline bool try_page_mte_tagging(struct page *page) +{ + VM_WARN_ON_ONCE(folio_test_hugetlb(page_folio(page))); + + if (!test_and_set_bit(PG_mte_lock, &page->flags)) + return true; + + /* + * The tags are either being initialised or may have been initialised + * already. Check if the PG_mte_tagged flag has been set or wait + * otherwise. + */ + smp_cond_load_acquire(&page->flags, VAL & (1UL << PG_mte_tagged)); + + return false; +} void mte_zero_clear_page_tags(void *addr); -void mte_sync_tags(pte_t old_pte, pte_t pte); +void mte_sync_tags(pte_t pte, unsigned int nr_pages); void mte_copy_page_tags(void *kto, const void *kfrom); void mte_thread_init_user(void); void mte_thread_switch(struct task_struct *next); +void mte_cpu_setup(void); void mte_suspend_enter(void); +void mte_suspend_exit(void); long set_mte_ctrl(struct task_struct *task, unsigned long arg); long get_mte_ctrl(struct task_struct *task); int mte_ptrace_copy_tags(struct task_struct *child, long request, @@ -54,10 +114,21 @@ size_t mte_probe_user_range(const char __user *uaddr, size_t size); /* unused if !CONFIG_ARM64_MTE, silence the compiler */ #define PG_mte_tagged 0 +static inline void set_page_mte_tagged(struct page *page) +{ +} +static inline bool page_mte_tagged(struct page *page) +{ + return false; +} +static inline bool try_page_mte_tagging(struct page *page) +{ + return false; +} static inline void mte_zero_clear_page_tags(void *addr) { } -static inline void mte_sync_tags(pte_t old_pte, pte_t pte) +static inline void mte_sync_tags(pte_t pte, unsigned int nr_pages) { } static inline void mte_copy_page_tags(void *kto, const void *kfrom) @@ -72,6 +143,9 @@ static inline void mte_thread_switch(struct task_struct *next) static inline void mte_suspend_enter(void) { } +static inline void mte_suspend_exit(void) +{ +} static inline long set_mte_ctrl(struct task_struct *task, unsigned long arg) { return 0; @@ -89,6 +163,67 @@ static inline int mte_ptrace_copy_tags(struct task_struct *child, #endif /* CONFIG_ARM64_MTE */ +#if defined(CONFIG_HUGETLB_PAGE) && defined(CONFIG_ARM64_MTE) +static inline void folio_set_hugetlb_mte_tagged(struct folio *folio) +{ + VM_WARN_ON_ONCE(!folio_test_hugetlb(folio)); + + /* + * Ensure that the tags written prior to this function are visible + * before the folio flags update. + */ + smp_wmb(); + set_bit(PG_mte_tagged, &folio->flags); + +} + +static inline bool folio_test_hugetlb_mte_tagged(struct folio *folio) +{ + bool ret = test_bit(PG_mte_tagged, &folio->flags); + + VM_WARN_ON_ONCE(!folio_test_hugetlb(folio)); + + /* + * If the folio is tagged, ensure ordering with a likely subsequent + * read of the tags. + */ + if (ret) + smp_rmb(); + return ret; +} + +static inline bool folio_try_hugetlb_mte_tagging(struct folio *folio) +{ + VM_WARN_ON_ONCE(!folio_test_hugetlb(folio)); + + if (!test_and_set_bit(PG_mte_lock, &folio->flags)) + return true; + + /* + * The tags are either being initialised or may have been initialised + * already. Check if the PG_mte_tagged flag has been set or wait + * otherwise. + */ + smp_cond_load_acquire(&folio->flags, VAL & (1UL << PG_mte_tagged)); + + return false; +} +#else +static inline void folio_set_hugetlb_mte_tagged(struct folio *folio) +{ +} + +static inline bool folio_test_hugetlb_mte_tagged(struct folio *folio) +{ + return false; +} + +static inline bool folio_try_hugetlb_mte_tagging(struct folio *folio) +{ + return false; +} +#endif + static inline void mte_disable_tco_entry(struct task_struct *task) { if (!system_supports_mte()) @@ -110,19 +245,11 @@ static inline void mte_disable_tco_entry(struct task_struct *task) } #ifdef CONFIG_KASAN_HW_TAGS -/* Whether the MTE asynchronous mode is enabled. */ -DECLARE_STATIC_KEY_FALSE(mte_async_or_asymm_mode); - -static inline bool system_uses_mte_async_or_asymm_mode(void) -{ - return static_branch_unlikely(&mte_async_or_asymm_mode); -} - void mte_check_tfsr_el1(void); static inline void mte_check_tfsr_entry(void) { - if (!system_supports_mte()) + if (!kasan_hw_tags_enabled()) return; mte_check_tfsr_el1(); @@ -130,7 +257,7 @@ static inline void mte_check_tfsr_entry(void) static inline void mte_check_tfsr_exit(void) { - if (!system_supports_mte()) + if (!kasan_hw_tags_enabled()) return; /* @@ -144,10 +271,6 @@ static inline void mte_check_tfsr_exit(void) mte_check_tfsr_el1(); } #else -static inline bool system_uses_mte_async_or_asymm_mode(void) -{ - return false; -} static inline void mte_check_tfsr_el1(void) { } diff --git a/arch/arm64/include/asm/page-def.h b/arch/arm64/include/asm/page-def.h index 2403f7b4cdbf..d402e08442ee 100644 --- a/arch/arm64/include/asm/page-def.h +++ b/arch/arm64/include/asm/page-def.h @@ -10,9 +10,6 @@ #include <linux/const.h> -/* PAGE_SHIFT determines the page size */ -#define PAGE_SHIFT CONFIG_ARM64_PAGE_SHIFT -#define PAGE_SIZE (_AC(1, UL) << PAGE_SHIFT) -#define PAGE_MASK (~(PAGE_SIZE-1)) +#include <vdso/page.h> #endif /* __ASM_PAGE_DEF_H */ diff --git a/arch/arm64/include/asm/page.h b/arch/arm64/include/asm/page.h index 993a27ea6f54..2312e6ee595f 100644 --- a/arch/arm64/include/asm/page.h +++ b/arch/arm64/include/asm/page.h @@ -29,9 +29,9 @@ void copy_user_highpage(struct page *to, struct page *from, void copy_highpage(struct page *to, struct page *from); #define __HAVE_ARCH_COPY_HIGHPAGE -struct page *alloc_zeroed_user_highpage_movable(struct vm_area_struct *vma, +struct folio *vma_alloc_zeroed_movable_folio(struct vm_area_struct *vma, unsigned long vaddr); -#define __HAVE_ARCH_ALLOC_ZEROED_USER_HIGHPAGE_MOVABLE +#define vma_alloc_zeroed_movable_folio vma_alloc_zeroed_movable_folio void tag_clear_highpage(struct page *to); #define __HAVE_ARCH_TAG_CLEAR_HIGHPAGE diff --git a/arch/arm64/include/asm/pci.h b/arch/arm64/include/asm/pci.h index b33ca260e3c9..016eb6b46dc0 100644 --- a/arch/arm64/include/asm/pci.h +++ b/arch/arm64/include/asm/pci.h @@ -9,7 +9,6 @@ #include <asm/io.h> #define PCIBIOS_MIN_IO 0x1000 -#define PCIBIOS_MIN_MEM 0 /* * Set to 1 if the kernel should re-assign all PCI bus numbers @@ -18,21 +17,8 @@ (pci_has_flag(PCI_REASSIGN_ALL_BUS)) #define arch_can_pci_mmap_wc() 1 -#define ARCH_GENERIC_PCI_MMAP_RESOURCE 1 -extern int isa_dma_bridge_buggy; - -#ifdef CONFIG_PCI -static inline int pci_get_legacy_ide_irq(struct pci_dev *dev, int channel) -{ - /* no legacy IRQ on arm64 */ - return -ENODEV; -} - -static inline int pci_proc_domain(struct pci_bus *bus) -{ - return 1; -} -#endif /* CONFIG_PCI */ +/* Generic PCI */ +#include <asm-generic/pci.h> #endif /* __ASM_PCI_H */ diff --git a/arch/arm64/include/asm/percpu.h b/arch/arm64/include/asm/percpu.h index b9ba19dbdb69..9abcc8ef3087 100644 --- a/arch/arm64/include/asm/percpu.h +++ b/arch/arm64/include/asm/percpu.h @@ -140,17 +140,11 @@ PERCPU_RET_OP(add, add, ldadd) * re-enabling preemption for preemptible kernels, but doing that in a way * which builds inside a module would mean messing directly with the preempt * count. If you do this, peterz and tglx will hunt you down. + * + * Not to mention it'll break the actual preemption model for missing a + * preemption point when TIF_NEED_RESCHED gets set while preemption is + * disabled. */ -#define this_cpu_cmpxchg_double_8(ptr1, ptr2, o1, o2, n1, n2) \ -({ \ - int __ret; \ - preempt_disable_notrace(); \ - __ret = cmpxchg_double_local( raw_cpu_ptr(&(ptr1)), \ - raw_cpu_ptr(&(ptr2)), \ - o1, o2, n1, n2); \ - preempt_enable_notrace(); \ - __ret; \ -}) #define _pcp_protect(op, pcp, ...) \ ({ \ @@ -240,6 +234,22 @@ PERCPU_RET_OP(add, add, ldadd) #define this_cpu_cmpxchg_8(pcp, o, n) \ _pcp_protect_return(cmpxchg_relaxed, pcp, o, n) +#define this_cpu_cmpxchg64(pcp, o, n) this_cpu_cmpxchg_8(pcp, o, n) + +#define this_cpu_cmpxchg128(pcp, o, n) \ +({ \ + typedef typeof(pcp) pcp_op_T__; \ + u128 old__, new__, ret__; \ + pcp_op_T__ *ptr__; \ + old__ = o; \ + new__ = n; \ + preempt_disable_notrace(); \ + ptr__ = raw_cpu_ptr(&(pcp)); \ + ret__ = cmpxchg128_local((void *)ptr__, old__, new__); \ + preempt_enable_notrace(); \ + ret__; \ +}) + #ifdef __KVM_NVHE_HYPERVISOR__ extern unsigned long __hyp_per_cpu_offset(unsigned int cpu); #define __per_cpu_offset diff --git a/arch/arm64/include/asm/perf_event.h b/arch/arm64/include/asm/perf_event.h index 3eaf462f5752..ee45b4e77347 100644 --- a/arch/arm64/include/asm/perf_event.h +++ b/arch/arm64/include/asm/perf_event.h @@ -9,260 +9,7 @@ #include <asm/stack_pointer.h> #include <asm/ptrace.h> -#define ARMV8_PMU_MAX_COUNTERS 32 -#define ARMV8_PMU_COUNTER_MASK (ARMV8_PMU_MAX_COUNTERS - 1) - -/* - * Common architectural and microarchitectural event numbers. - */ -#define ARMV8_PMUV3_PERFCTR_SW_INCR 0x0000 -#define ARMV8_PMUV3_PERFCTR_L1I_CACHE_REFILL 0x0001 -#define ARMV8_PMUV3_PERFCTR_L1I_TLB_REFILL 0x0002 -#define ARMV8_PMUV3_PERFCTR_L1D_CACHE_REFILL 0x0003 -#define ARMV8_PMUV3_PERFCTR_L1D_CACHE 0x0004 -#define ARMV8_PMUV3_PERFCTR_L1D_TLB_REFILL 0x0005 -#define ARMV8_PMUV3_PERFCTR_LD_RETIRED 0x0006 -#define ARMV8_PMUV3_PERFCTR_ST_RETIRED 0x0007 -#define ARMV8_PMUV3_PERFCTR_INST_RETIRED 0x0008 -#define ARMV8_PMUV3_PERFCTR_EXC_TAKEN 0x0009 -#define ARMV8_PMUV3_PERFCTR_EXC_RETURN 0x000A -#define ARMV8_PMUV3_PERFCTR_CID_WRITE_RETIRED 0x000B -#define ARMV8_PMUV3_PERFCTR_PC_WRITE_RETIRED 0x000C -#define ARMV8_PMUV3_PERFCTR_BR_IMMED_RETIRED 0x000D -#define ARMV8_PMUV3_PERFCTR_BR_RETURN_RETIRED 0x000E -#define ARMV8_PMUV3_PERFCTR_UNALIGNED_LDST_RETIRED 0x000F -#define ARMV8_PMUV3_PERFCTR_BR_MIS_PRED 0x0010 -#define ARMV8_PMUV3_PERFCTR_CPU_CYCLES 0x0011 -#define ARMV8_PMUV3_PERFCTR_BR_PRED 0x0012 -#define ARMV8_PMUV3_PERFCTR_MEM_ACCESS 0x0013 -#define ARMV8_PMUV3_PERFCTR_L1I_CACHE 0x0014 -#define ARMV8_PMUV3_PERFCTR_L1D_CACHE_WB 0x0015 -#define ARMV8_PMUV3_PERFCTR_L2D_CACHE 0x0016 -#define ARMV8_PMUV3_PERFCTR_L2D_CACHE_REFILL 0x0017 -#define ARMV8_PMUV3_PERFCTR_L2D_CACHE_WB 0x0018 -#define ARMV8_PMUV3_PERFCTR_BUS_ACCESS 0x0019 -#define ARMV8_PMUV3_PERFCTR_MEMORY_ERROR 0x001A -#define ARMV8_PMUV3_PERFCTR_INST_SPEC 0x001B -#define ARMV8_PMUV3_PERFCTR_TTBR_WRITE_RETIRED 0x001C -#define ARMV8_PMUV3_PERFCTR_BUS_CYCLES 0x001D -#define ARMV8_PMUV3_PERFCTR_CHAIN 0x001E -#define ARMV8_PMUV3_PERFCTR_L1D_CACHE_ALLOCATE 0x001F -#define ARMV8_PMUV3_PERFCTR_L2D_CACHE_ALLOCATE 0x0020 -#define ARMV8_PMUV3_PERFCTR_BR_RETIRED 0x0021 -#define ARMV8_PMUV3_PERFCTR_BR_MIS_PRED_RETIRED 0x0022 -#define ARMV8_PMUV3_PERFCTR_STALL_FRONTEND 0x0023 -#define ARMV8_PMUV3_PERFCTR_STALL_BACKEND 0x0024 -#define ARMV8_PMUV3_PERFCTR_L1D_TLB 0x0025 -#define ARMV8_PMUV3_PERFCTR_L1I_TLB 0x0026 -#define ARMV8_PMUV3_PERFCTR_L2I_CACHE 0x0027 -#define ARMV8_PMUV3_PERFCTR_L2I_CACHE_REFILL 0x0028 -#define ARMV8_PMUV3_PERFCTR_L3D_CACHE_ALLOCATE 0x0029 -#define ARMV8_PMUV3_PERFCTR_L3D_CACHE_REFILL 0x002A -#define ARMV8_PMUV3_PERFCTR_L3D_CACHE 0x002B -#define ARMV8_PMUV3_PERFCTR_L3D_CACHE_WB 0x002C -#define ARMV8_PMUV3_PERFCTR_L2D_TLB_REFILL 0x002D -#define ARMV8_PMUV3_PERFCTR_L2I_TLB_REFILL 0x002E -#define ARMV8_PMUV3_PERFCTR_L2D_TLB 0x002F -#define ARMV8_PMUV3_PERFCTR_L2I_TLB 0x0030 -#define ARMV8_PMUV3_PERFCTR_REMOTE_ACCESS 0x0031 -#define ARMV8_PMUV3_PERFCTR_LL_CACHE 0x0032 -#define ARMV8_PMUV3_PERFCTR_LL_CACHE_MISS 0x0033 -#define ARMV8_PMUV3_PERFCTR_DTLB_WALK 0x0034 -#define ARMV8_PMUV3_PERFCTR_ITLB_WALK 0x0035 -#define ARMV8_PMUV3_PERFCTR_LL_CACHE_RD 0x0036 -#define ARMV8_PMUV3_PERFCTR_LL_CACHE_MISS_RD 0x0037 -#define ARMV8_PMUV3_PERFCTR_REMOTE_ACCESS_RD 0x0038 -#define ARMV8_PMUV3_PERFCTR_L1D_CACHE_LMISS_RD 0x0039 -#define ARMV8_PMUV3_PERFCTR_OP_RETIRED 0x003A -#define ARMV8_PMUV3_PERFCTR_OP_SPEC 0x003B -#define ARMV8_PMUV3_PERFCTR_STALL 0x003C -#define ARMV8_PMUV3_PERFCTR_STALL_SLOT_BACKEND 0x003D -#define ARMV8_PMUV3_PERFCTR_STALL_SLOT_FRONTEND 0x003E -#define ARMV8_PMUV3_PERFCTR_STALL_SLOT 0x003F - -/* Statistical profiling extension microarchitectural events */ -#define ARMV8_SPE_PERFCTR_SAMPLE_POP 0x4000 -#define ARMV8_SPE_PERFCTR_SAMPLE_FEED 0x4001 -#define ARMV8_SPE_PERFCTR_SAMPLE_FILTRATE 0x4002 -#define ARMV8_SPE_PERFCTR_SAMPLE_COLLISION 0x4003 - -/* AMUv1 architecture events */ -#define ARMV8_AMU_PERFCTR_CNT_CYCLES 0x4004 -#define ARMV8_AMU_PERFCTR_STALL_BACKEND_MEM 0x4005 - -/* long-latency read miss events */ -#define ARMV8_PMUV3_PERFCTR_L1I_CACHE_LMISS 0x4006 -#define ARMV8_PMUV3_PERFCTR_L2D_CACHE_LMISS_RD 0x4009 -#define ARMV8_PMUV3_PERFCTR_L2I_CACHE_LMISS 0x400A -#define ARMV8_PMUV3_PERFCTR_L3D_CACHE_LMISS_RD 0x400B - -/* Trace buffer events */ -#define ARMV8_PMUV3_PERFCTR_TRB_WRAP 0x400C -#define ARMV8_PMUV3_PERFCTR_TRB_TRIG 0x400E - -/* Trace unit events */ -#define ARMV8_PMUV3_PERFCTR_TRCEXTOUT0 0x4010 -#define ARMV8_PMUV3_PERFCTR_TRCEXTOUT1 0x4011 -#define ARMV8_PMUV3_PERFCTR_TRCEXTOUT2 0x4012 -#define ARMV8_PMUV3_PERFCTR_TRCEXTOUT3 0x4013 -#define ARMV8_PMUV3_PERFCTR_CTI_TRIGOUT4 0x4018 -#define ARMV8_PMUV3_PERFCTR_CTI_TRIGOUT5 0x4019 -#define ARMV8_PMUV3_PERFCTR_CTI_TRIGOUT6 0x401A -#define ARMV8_PMUV3_PERFCTR_CTI_TRIGOUT7 0x401B - -/* additional latency from alignment events */ -#define ARMV8_PMUV3_PERFCTR_LDST_ALIGN_LAT 0x4020 -#define ARMV8_PMUV3_PERFCTR_LD_ALIGN_LAT 0x4021 -#define ARMV8_PMUV3_PERFCTR_ST_ALIGN_LAT 0x4022 - -/* Armv8.5 Memory Tagging Extension events */ -#define ARMV8_MTE_PERFCTR_MEM_ACCESS_CHECKED 0x4024 -#define ARMV8_MTE_PERFCTR_MEM_ACCESS_CHECKED_RD 0x4025 -#define ARMV8_MTE_PERFCTR_MEM_ACCESS_CHECKED_WR 0x4026 - -/* ARMv8 recommended implementation defined event types */ -#define ARMV8_IMPDEF_PERFCTR_L1D_CACHE_RD 0x0040 -#define ARMV8_IMPDEF_PERFCTR_L1D_CACHE_WR 0x0041 -#define ARMV8_IMPDEF_PERFCTR_L1D_CACHE_REFILL_RD 0x0042 -#define ARMV8_IMPDEF_PERFCTR_L1D_CACHE_REFILL_WR 0x0043 -#define ARMV8_IMPDEF_PERFCTR_L1D_CACHE_REFILL_INNER 0x0044 -#define ARMV8_IMPDEF_PERFCTR_L1D_CACHE_REFILL_OUTER 0x0045 -#define ARMV8_IMPDEF_PERFCTR_L1D_CACHE_WB_VICTIM 0x0046 -#define ARMV8_IMPDEF_PERFCTR_L1D_CACHE_WB_CLEAN 0x0047 -#define ARMV8_IMPDEF_PERFCTR_L1D_CACHE_INVAL 0x0048 - -#define ARMV8_IMPDEF_PERFCTR_L1D_TLB_REFILL_RD 0x004C -#define ARMV8_IMPDEF_PERFCTR_L1D_TLB_REFILL_WR 0x004D -#define ARMV8_IMPDEF_PERFCTR_L1D_TLB_RD 0x004E -#define ARMV8_IMPDEF_PERFCTR_L1D_TLB_WR 0x004F -#define ARMV8_IMPDEF_PERFCTR_L2D_CACHE_RD 0x0050 -#define ARMV8_IMPDEF_PERFCTR_L2D_CACHE_WR 0x0051 -#define ARMV8_IMPDEF_PERFCTR_L2D_CACHE_REFILL_RD 0x0052 -#define ARMV8_IMPDEF_PERFCTR_L2D_CACHE_REFILL_WR 0x0053 - -#define ARMV8_IMPDEF_PERFCTR_L2D_CACHE_WB_VICTIM 0x0056 -#define ARMV8_IMPDEF_PERFCTR_L2D_CACHE_WB_CLEAN 0x0057 -#define ARMV8_IMPDEF_PERFCTR_L2D_CACHE_INVAL 0x0058 - -#define ARMV8_IMPDEF_PERFCTR_L2D_TLB_REFILL_RD 0x005C -#define ARMV8_IMPDEF_PERFCTR_L2D_TLB_REFILL_WR 0x005D -#define ARMV8_IMPDEF_PERFCTR_L2D_TLB_RD 0x005E -#define ARMV8_IMPDEF_PERFCTR_L2D_TLB_WR 0x005F -#define ARMV8_IMPDEF_PERFCTR_BUS_ACCESS_RD 0x0060 -#define ARMV8_IMPDEF_PERFCTR_BUS_ACCESS_WR 0x0061 -#define ARMV8_IMPDEF_PERFCTR_BUS_ACCESS_SHARED 0x0062 -#define ARMV8_IMPDEF_PERFCTR_BUS_ACCESS_NOT_SHARED 0x0063 -#define ARMV8_IMPDEF_PERFCTR_BUS_ACCESS_NORMAL 0x0064 -#define ARMV8_IMPDEF_PERFCTR_BUS_ACCESS_PERIPH 0x0065 -#define ARMV8_IMPDEF_PERFCTR_MEM_ACCESS_RD 0x0066 -#define ARMV8_IMPDEF_PERFCTR_MEM_ACCESS_WR 0x0067 -#define ARMV8_IMPDEF_PERFCTR_UNALIGNED_LD_SPEC 0x0068 -#define ARMV8_IMPDEF_PERFCTR_UNALIGNED_ST_SPEC 0x0069 -#define ARMV8_IMPDEF_PERFCTR_UNALIGNED_LDST_SPEC 0x006A - -#define ARMV8_IMPDEF_PERFCTR_LDREX_SPEC 0x006C -#define ARMV8_IMPDEF_PERFCTR_STREX_PASS_SPEC 0x006D -#define ARMV8_IMPDEF_PERFCTR_STREX_FAIL_SPEC 0x006E -#define ARMV8_IMPDEF_PERFCTR_STREX_SPEC 0x006F -#define ARMV8_IMPDEF_PERFCTR_LD_SPEC 0x0070 -#define ARMV8_IMPDEF_PERFCTR_ST_SPEC 0x0071 -#define ARMV8_IMPDEF_PERFCTR_LDST_SPEC 0x0072 -#define ARMV8_IMPDEF_PERFCTR_DP_SPEC 0x0073 -#define ARMV8_IMPDEF_PERFCTR_ASE_SPEC 0x0074 -#define ARMV8_IMPDEF_PERFCTR_VFP_SPEC 0x0075 -#define ARMV8_IMPDEF_PERFCTR_PC_WRITE_SPEC 0x0076 -#define ARMV8_IMPDEF_PERFCTR_CRYPTO_SPEC 0x0077 -#define ARMV8_IMPDEF_PERFCTR_BR_IMMED_SPEC 0x0078 -#define ARMV8_IMPDEF_PERFCTR_BR_RETURN_SPEC 0x0079 -#define ARMV8_IMPDEF_PERFCTR_BR_INDIRECT_SPEC 0x007A - -#define ARMV8_IMPDEF_PERFCTR_ISB_SPEC 0x007C -#define ARMV8_IMPDEF_PERFCTR_DSB_SPEC 0x007D -#define ARMV8_IMPDEF_PERFCTR_DMB_SPEC 0x007E - -#define ARMV8_IMPDEF_PERFCTR_EXC_UNDEF 0x0081 -#define ARMV8_IMPDEF_PERFCTR_EXC_SVC 0x0082 -#define ARMV8_IMPDEF_PERFCTR_EXC_PABORT 0x0083 -#define ARMV8_IMPDEF_PERFCTR_EXC_DABORT 0x0084 - -#define ARMV8_IMPDEF_PERFCTR_EXC_IRQ 0x0086 -#define ARMV8_IMPDEF_PERFCTR_EXC_FIQ 0x0087 -#define ARMV8_IMPDEF_PERFCTR_EXC_SMC 0x0088 - -#define ARMV8_IMPDEF_PERFCTR_EXC_HVC 0x008A -#define ARMV8_IMPDEF_PERFCTR_EXC_TRAP_PABORT 0x008B -#define ARMV8_IMPDEF_PERFCTR_EXC_TRAP_DABORT 0x008C -#define ARMV8_IMPDEF_PERFCTR_EXC_TRAP_OTHER 0x008D -#define ARMV8_IMPDEF_PERFCTR_EXC_TRAP_IRQ 0x008E -#define ARMV8_IMPDEF_PERFCTR_EXC_TRAP_FIQ 0x008F -#define ARMV8_IMPDEF_PERFCTR_RC_LD_SPEC 0x0090 -#define ARMV8_IMPDEF_PERFCTR_RC_ST_SPEC 0x0091 - -#define ARMV8_IMPDEF_PERFCTR_L3D_CACHE_RD 0x00A0 -#define ARMV8_IMPDEF_PERFCTR_L3D_CACHE_WR 0x00A1 -#define ARMV8_IMPDEF_PERFCTR_L3D_CACHE_REFILL_RD 0x00A2 -#define ARMV8_IMPDEF_PERFCTR_L3D_CACHE_REFILL_WR 0x00A3 - -#define ARMV8_IMPDEF_PERFCTR_L3D_CACHE_WB_VICTIM 0x00A6 -#define ARMV8_IMPDEF_PERFCTR_L3D_CACHE_WB_CLEAN 0x00A7 -#define ARMV8_IMPDEF_PERFCTR_L3D_CACHE_INVAL 0x00A8 - -/* - * Per-CPU PMCR: config reg - */ -#define ARMV8_PMU_PMCR_E (1 << 0) /* Enable all counters */ -#define ARMV8_PMU_PMCR_P (1 << 1) /* Reset all counters */ -#define ARMV8_PMU_PMCR_C (1 << 2) /* Cycle counter reset */ -#define ARMV8_PMU_PMCR_D (1 << 3) /* CCNT counts every 64th cpu cycle */ -#define ARMV8_PMU_PMCR_X (1 << 4) /* Export to ETM */ -#define ARMV8_PMU_PMCR_DP (1 << 5) /* Disable CCNT if non-invasive debug*/ -#define ARMV8_PMU_PMCR_LC (1 << 6) /* Overflow on 64 bit cycle counter */ -#define ARMV8_PMU_PMCR_LP (1 << 7) /* Long event counter enable */ -#define ARMV8_PMU_PMCR_N_SHIFT 11 /* Number of counters supported */ -#define ARMV8_PMU_PMCR_N_MASK 0x1f -#define ARMV8_PMU_PMCR_MASK 0xff /* Mask for writable bits */ - -/* - * PMOVSR: counters overflow flag status reg - */ -#define ARMV8_PMU_OVSR_MASK 0xffffffff /* Mask for writable bits */ -#define ARMV8_PMU_OVERFLOWED_MASK ARMV8_PMU_OVSR_MASK - -/* - * PMXEVTYPER: Event selection reg - */ -#define ARMV8_PMU_EVTYPE_MASK 0xc800ffff /* Mask for writable bits */ -#define ARMV8_PMU_EVTYPE_EVENT 0xffff /* Mask for EVENT bits */ - -/* - * Event filters for PMUv3 - */ -#define ARMV8_PMU_EXCLUDE_EL1 (1U << 31) -#define ARMV8_PMU_EXCLUDE_EL0 (1U << 30) -#define ARMV8_PMU_INCLUDE_EL2 (1U << 27) - -/* - * PMUSERENR: user enable reg - */ -#define ARMV8_PMU_USERENR_MASK 0xf /* Mask for writable bits */ -#define ARMV8_PMU_USERENR_EN (1 << 0) /* PMU regs can be accessed at EL0 */ -#define ARMV8_PMU_USERENR_SW (1 << 1) /* PMSWINC can be written at EL0 */ -#define ARMV8_PMU_USERENR_CR (1 << 2) /* Cycle counter can be read at EL0 */ -#define ARMV8_PMU_USERENR_ER (1 << 3) /* Event counter can be read at EL0 */ - -/* PMMIR_EL1.SLOTS mask */ -#define ARMV8_PMU_SLOTS_MASK 0xff - -#define ARMV8_PMU_BUS_SLOTS_SHIFT 8 -#define ARMV8_PMU_BUS_SLOTS_MASK 0xff -#define ARMV8_PMU_BUS_WIDTH_SHIFT 16 -#define ARMV8_PMU_BUS_WIDTH_MASK 0xf - #ifdef CONFIG_PERF_EVENTS -struct pt_regs; -extern unsigned long perf_instruction_pointer(struct pt_regs *regs); -extern unsigned long perf_misc_flags(struct pt_regs *regs); -#define perf_misc_flags(regs) perf_misc_flags(regs) #define perf_arch_bpf_user_pt_regs(regs) ®s->user_regs #endif diff --git a/arch/arm64/include/asm/pgalloc.h b/arch/arm64/include/asm/pgalloc.h index 237224484d0f..1b4509d3382c 100644 --- a/arch/arm64/include/asm/pgalloc.h +++ b/arch/arm64/include/asm/pgalloc.h @@ -14,6 +14,7 @@ #include <asm/tlbflush.h> #define __HAVE_ARCH_PGD_FREE +#define __HAVE_ARCH_PUD_FREE #include <asm-generic/pgalloc.h> #define PGD_SIZE (PTRS_PER_PGD * sizeof(pgd_t)) @@ -27,7 +28,7 @@ static inline void __pud_populate(pud_t *pudp, phys_addr_t pmdp, pudval_t prot) static inline void pud_populate(struct mm_struct *mm, pud_t *pudp, pmd_t *pmdp) { - pudval_t pudval = PUD_TYPE_TABLE; + pudval_t pudval = PUD_TYPE_TABLE | PUD_TABLE_AF; pudval |= (mm == &init_mm) ? PUD_TABLE_UXN : PUD_TABLE_PXN; __pud_populate(pudp, __pa(pmdp), pudval); @@ -43,16 +44,24 @@ static inline void __pud_populate(pud_t *pudp, phys_addr_t pmdp, pudval_t prot) static inline void __p4d_populate(p4d_t *p4dp, phys_addr_t pudp, p4dval_t prot) { - set_p4d(p4dp, __p4d(__phys_to_p4d_val(pudp) | prot)); + if (pgtable_l4_enabled()) + set_p4d(p4dp, __p4d(__phys_to_p4d_val(pudp) | prot)); } static inline void p4d_populate(struct mm_struct *mm, p4d_t *p4dp, pud_t *pudp) { - p4dval_t p4dval = P4D_TYPE_TABLE; + p4dval_t p4dval = P4D_TYPE_TABLE | P4D_TABLE_AF; p4dval |= (mm == &init_mm) ? P4D_TABLE_UXN : P4D_TABLE_PXN; __p4d_populate(p4dp, __pa(pudp), p4dval); } + +static inline void pud_free(struct mm_struct *mm, pud_t *pud) +{ + if (!pgtable_l4_enabled()) + return; + __pud_free(mm, pud); +} #else static inline void __p4d_populate(p4d_t *p4dp, phys_addr_t pudp, p4dval_t prot) { @@ -60,6 +69,29 @@ static inline void __p4d_populate(p4d_t *p4dp, phys_addr_t pudp, p4dval_t prot) } #endif /* CONFIG_PGTABLE_LEVELS > 3 */ +#if CONFIG_PGTABLE_LEVELS > 4 + +static inline void __pgd_populate(pgd_t *pgdp, phys_addr_t p4dp, pgdval_t prot) +{ + if (pgtable_l5_enabled()) + set_pgd(pgdp, __pgd(__phys_to_pgd_val(p4dp) | prot)); +} + +static inline void pgd_populate(struct mm_struct *mm, pgd_t *pgdp, p4d_t *p4dp) +{ + pgdval_t pgdval = PGD_TYPE_TABLE | PGD_TABLE_AF; + + pgdval |= (mm == &init_mm) ? PGD_TABLE_UXN : PGD_TABLE_PXN; + __pgd_populate(pgdp, __pa(p4dp), pgdval); +} + +#else +static inline void __pgd_populate(pgd_t *pgdp, phys_addr_t p4dp, pgdval_t prot) +{ + BUILD_BUG(); +} +#endif /* CONFIG_PGTABLE_LEVELS > 4 */ + extern pgd_t *pgd_alloc(struct mm_struct *mm); extern void pgd_free(struct mm_struct *mm, pgd_t *pgdp); @@ -77,14 +109,16 @@ static inline void pmd_populate_kernel(struct mm_struct *mm, pmd_t *pmdp, pte_t *ptep) { VM_BUG_ON(mm && mm != &init_mm); - __pmd_populate(pmdp, __pa(ptep), PMD_TYPE_TABLE | PMD_TABLE_UXN); + __pmd_populate(pmdp, __pa(ptep), + PMD_TYPE_TABLE | PMD_TABLE_AF | PMD_TABLE_UXN); } static inline void pmd_populate(struct mm_struct *mm, pmd_t *pmdp, pgtable_t ptep) { VM_BUG_ON(mm == &init_mm); - __pmd_populate(pmdp, page_to_phys(ptep), PMD_TYPE_TABLE | PMD_TABLE_PXN); + __pmd_populate(pmdp, page_to_phys(ptep), + PMD_TYPE_TABLE | PMD_TABLE_AF | PMD_TABLE_PXN); } #endif diff --git a/arch/arm64/include/asm/pgtable-hwdef.h b/arch/arm64/include/asm/pgtable-hwdef.h index 5ab8d163198f..f3b77deedfa2 100644 --- a/arch/arm64/include/asm/pgtable-hwdef.h +++ b/arch/arm64/include/asm/pgtable-hwdef.h @@ -7,40 +7,46 @@ #include <asm/memory.h> +#define PTDESC_ORDER 3 + +/* Number of VA bits resolved by a single translation table level */ +#define PTDESC_TABLE_SHIFT (PAGE_SHIFT - PTDESC_ORDER) + /* * Number of page-table levels required to address 'va_bits' wide * address, without section mapping. We resolve the top (va_bits - PAGE_SHIFT) - * bits with (PAGE_SHIFT - 3) bits at each page table level. Hence: + * bits with PTDESC_TABLE_SHIFT bits at each page table level. Hence: * - * levels = DIV_ROUND_UP((va_bits - PAGE_SHIFT), (PAGE_SHIFT - 3)) + * levels = DIV_ROUND_UP((va_bits - PAGE_SHIFT), PTDESC_TABLE_SHIFT) * * where DIV_ROUND_UP(n, d) => (((n) + (d) - 1) / (d)) * * We cannot include linux/kernel.h which defines DIV_ROUND_UP here * due to build issues. So we open code DIV_ROUND_UP here: * - * ((((va_bits) - PAGE_SHIFT) + (PAGE_SHIFT - 3) - 1) / (PAGE_SHIFT - 3)) + * ((((va_bits) - PAGE_SHIFT) + PTDESC_TABLE_SHIFT - 1) / PTDESC_TABLE_SHIFT) * * which gets simplified as : */ -#define ARM64_HW_PGTABLE_LEVELS(va_bits) (((va_bits) - 4) / (PAGE_SHIFT - 3)) +#define ARM64_HW_PGTABLE_LEVELS(va_bits) \ + (((va_bits) - PTDESC_ORDER - 1) / PTDESC_TABLE_SHIFT) /* - * Size mapped by an entry at level n ( 0 <= n <= 3) - * We map (PAGE_SHIFT - 3) at all translation levels and PAGE_SHIFT bits + * Size mapped by an entry at level n ( -1 <= n <= 3) + * We map PTDESC_TABLE_SHIFT at all translation levels and PAGE_SHIFT bits * in the final page. The maximum number of translation levels supported by - * the architecture is 4. Hence, starting at level n, we have further + * the architecture is 5. Hence, starting at level n, we have further * ((4 - n) - 1) levels of translation excluding the offset within the page. * So, the total number of bits mapped by an entry at level n is : * - * ((4 - n) - 1) * (PAGE_SHIFT - 3) + PAGE_SHIFT + * ((4 - n) - 1) * PTDESC_TABLE_SHIFT + PAGE_SHIFT * * Rearranging it a bit we get : - * (4 - n) * (PAGE_SHIFT - 3) + 3 + * (4 - n) * PTDESC_TABLE_SHIFT + PTDESC_ORDER */ -#define ARM64_HW_PGTABLE_LEVEL_SHIFT(n) ((PAGE_SHIFT - 3) * (4 - (n)) + 3) +#define ARM64_HW_PGTABLE_LEVEL_SHIFT(n) (PTDESC_TABLE_SHIFT * (4 - (n)) + PTDESC_ORDER) -#define PTRS_PER_PTE (1 << (PAGE_SHIFT - 3)) +#define PTRS_PER_PTE (1 << PTDESC_TABLE_SHIFT) /* * PMD_SHIFT determines the size a level 2 page table entry can map. @@ -49,7 +55,7 @@ #define PMD_SHIFT ARM64_HW_PGTABLE_LEVEL_SHIFT(2) #define PMD_SIZE (_AC(1, UL) << PMD_SHIFT) #define PMD_MASK (~(PMD_SIZE-1)) -#define PTRS_PER_PMD (1 << (PAGE_SHIFT - 3)) +#define PTRS_PER_PMD (1 << PTDESC_TABLE_SHIFT) #endif /* @@ -59,12 +65,19 @@ #define PUD_SHIFT ARM64_HW_PGTABLE_LEVEL_SHIFT(1) #define PUD_SIZE (_AC(1, UL) << PUD_SHIFT) #define PUD_MASK (~(PUD_SIZE-1)) -#define PTRS_PER_PUD (1 << (PAGE_SHIFT - 3)) +#define PTRS_PER_PUD (1 << PTDESC_TABLE_SHIFT) +#endif + +#if CONFIG_PGTABLE_LEVELS > 4 +#define P4D_SHIFT ARM64_HW_PGTABLE_LEVEL_SHIFT(0) +#define P4D_SIZE (_AC(1, UL) << P4D_SHIFT) +#define P4D_MASK (~(P4D_SIZE-1)) +#define PTRS_PER_P4D (1 << PTDESC_TABLE_SHIFT) #endif /* * PGDIR_SHIFT determines the size a top-level page table entry can map - * (depending on the configuration, this level can be 0, 1 or 2). + * (depending on the configuration, this level can be -1, 0, 1 or 2). */ #define PGDIR_SHIFT ARM64_HW_PGTABLE_LEVEL_SHIFT(4 - CONFIG_PGTABLE_LEVELS) #define PGDIR_SIZE (_AC(1, UL) << PGDIR_SHIFT) @@ -87,13 +100,22 @@ /* * Hardware page table definitions. * + * Level -1 descriptor (PGD). + */ +#define PGD_TYPE_TABLE (_AT(pgdval_t, 3) << 0) +#define PGD_TYPE_MASK (_AT(pgdval_t, 3) << 0) +#define PGD_TABLE_AF (_AT(pgdval_t, 1) << 10) /* Ignored if no FEAT_HAFT */ +#define PGD_TABLE_PXN (_AT(pgdval_t, 1) << 59) +#define PGD_TABLE_UXN (_AT(pgdval_t, 1) << 60) + +/* * Level 0 descriptor (P4D). */ #define P4D_TYPE_TABLE (_AT(p4dval_t, 3) << 0) -#define P4D_TABLE_BIT (_AT(p4dval_t, 1) << 1) #define P4D_TYPE_MASK (_AT(p4dval_t, 3) << 0) #define P4D_TYPE_SECT (_AT(p4dval_t, 1) << 0) #define P4D_SECT_RDONLY (_AT(p4dval_t, 1) << 7) /* AP[2] */ +#define P4D_TABLE_AF (_AT(p4dval_t, 1) << 10) /* Ignored if no FEAT_HAFT */ #define P4D_TABLE_PXN (_AT(p4dval_t, 1) << 59) #define P4D_TABLE_UXN (_AT(p4dval_t, 1) << 60) @@ -101,10 +123,10 @@ * Level 1 descriptor (PUD). */ #define PUD_TYPE_TABLE (_AT(pudval_t, 3) << 0) -#define PUD_TABLE_BIT (_AT(pudval_t, 1) << 1) #define PUD_TYPE_MASK (_AT(pudval_t, 3) << 0) #define PUD_TYPE_SECT (_AT(pudval_t, 1) << 0) #define PUD_SECT_RDONLY (_AT(pudval_t, 1) << 7) /* AP[2] */ +#define PUD_TABLE_AF (_AT(pudval_t, 1) << 10) /* Ignored if no FEAT_HAFT */ #define PUD_TABLE_PXN (_AT(pudval_t, 1) << 59) #define PUD_TABLE_UXN (_AT(pudval_t, 1) << 60) @@ -114,12 +136,11 @@ #define PMD_TYPE_MASK (_AT(pmdval_t, 3) << 0) #define PMD_TYPE_TABLE (_AT(pmdval_t, 3) << 0) #define PMD_TYPE_SECT (_AT(pmdval_t, 1) << 0) -#define PMD_TABLE_BIT (_AT(pmdval_t, 1) << 1) +#define PMD_TABLE_AF (_AT(pmdval_t, 1) << 10) /* Ignored if no FEAT_HAFT */ /* * Section */ -#define PMD_SECT_VALID (_AT(pmdval_t, 1) << 0) #define PMD_SECT_USER (_AT(pmdval_t, 1) << 6) /* AP[1] */ #define PMD_SECT_RDONLY (_AT(pmdval_t, 1) << 7) /* AP[2] */ #define PMD_SECT_S (_AT(pmdval_t, 3) << 8) @@ -143,7 +164,6 @@ #define PTE_VALID (_AT(pteval_t, 1) << 0) #define PTE_TYPE_MASK (_AT(pteval_t, 3) << 0) #define PTE_TYPE_PAGE (_AT(pteval_t, 3) << 0) -#define PTE_TABLE_BIT (_AT(pteval_t, 1) << 1) #define PTE_USER (_AT(pteval_t, 1) << 6) /* AP[1] */ #define PTE_RDONLY (_AT(pteval_t, 1) << 7) /* AP[2] */ #define PTE_SHARED (_AT(pteval_t, 3) << 8) /* SH[1:0], inner shareable */ @@ -154,13 +174,19 @@ #define PTE_CONT (_AT(pteval_t, 1) << 52) /* Contiguous range */ #define PTE_PXN (_AT(pteval_t, 1) << 53) /* Privileged XN */ #define PTE_UXN (_AT(pteval_t, 1) << 54) /* User XN */ +#define PTE_SWBITS_MASK _AT(pteval_t, (BIT(63) | GENMASK(58, 55))) -#define PTE_ADDR_LOW (((_AT(pteval_t, 1) << (48 - PAGE_SHIFT)) - 1) << PAGE_SHIFT) +#define PTE_ADDR_LOW (((_AT(pteval_t, 1) << (50 - PAGE_SHIFT)) - 1) << PAGE_SHIFT) #ifdef CONFIG_ARM64_PA_BITS_52 +#ifdef CONFIG_ARM64_64K_PAGES #define PTE_ADDR_HIGH (_AT(pteval_t, 0xf) << 12) -#define PTE_ADDR_MASK (PTE_ADDR_LOW | PTE_ADDR_HIGH) +#define PTE_ADDR_HIGH_SHIFT 36 +#define PHYS_TO_PTE_ADDR_MASK (PTE_ADDR_LOW | PTE_ADDR_HIGH) #else -#define PTE_ADDR_MASK PTE_ADDR_LOW +#define PTE_ADDR_HIGH (_AT(pteval_t, 0x3) << 8) +#define PTE_ADDR_HIGH_SHIFT 42 +#define PHYS_TO_PTE_ADDR_MASK GENMASK_ULL(49, 8) +#endif #endif /* @@ -170,15 +196,32 @@ #define PTE_ATTRINDX_MASK (_AT(pteval_t, 7) << 2) /* + * PIIndex[3:0] encoding (Permission Indirection Extension) + */ +#define PTE_PI_IDX_0 6 /* AP[1], USER */ +#define PTE_PI_IDX_1 51 /* DBM */ +#define PTE_PI_IDX_2 53 /* PXN */ +#define PTE_PI_IDX_3 54 /* UXN */ + +/* + * POIndex[2:0] encoding (Permission Overlay Extension) + */ +#define PTE_PO_IDX_0 (_AT(pteval_t, 1) << 60) +#define PTE_PO_IDX_1 (_AT(pteval_t, 1) << 61) +#define PTE_PO_IDX_2 (_AT(pteval_t, 1) << 62) + +#define PTE_PO_IDX_MASK GENMASK_ULL(62, 60) + + +/* * Memory Attribute override for Stage-2 (MemAttr[3:0]) */ #define PTE_S2_MEMATTR(t) (_AT(pteval_t, (t)) << 2) /* - * Highest possible physical address supported. + * Hierarchical permission for Stage-1 tables */ -#define PHYS_MASK_SHIFT (CONFIG_ARM64_PA_BITS) -#define PHYS_MASK ((UL(1) << PHYS_MASK_SHIFT) - 1) +#define S1_TABLE_AP (_AT(pmdval_t, 3) << 61) #define TTBR_CNP_BIT (UL(1) << 0) @@ -268,6 +311,11 @@ #define TCR_TBI1 (UL(1) << 38) #define TCR_HA (UL(1) << 39) #define TCR_HD (UL(1) << 40) +#define TCR_HPD0_SHIFT 41 +#define TCR_HPD0 (UL(1) << TCR_HPD0_SHIFT) +#define TCR_HPD1_SHIFT 42 +#define TCR_HPD1 (UL(1) << TCR_HPD1_SHIFT) +#define TCR_TBID0 (UL(1) << 51) #define TCR_TBID1 (UL(1) << 52) #define TCR_NFD0 (UL(1) << 53) #define TCR_NFD1 (UL(1) << 54) @@ -275,6 +323,7 @@ #define TCR_E0PD1 (UL(1) << 56) #define TCR_TCMA0 (UL(1) << 57) #define TCR_TCMA1 (UL(1) << 58) +#define TCR_DS (UL(1) << 59) /* * TTBR. diff --git a/arch/arm64/include/asm/pgtable-prot.h b/arch/arm64/include/asm/pgtable-prot.h index 62e0ebeed720..85dceb1c66f4 100644 --- a/arch/arm64/include/asm/pgtable-prot.h +++ b/arch/arm64/include/asm/pgtable-prot.h @@ -17,59 +17,97 @@ #define PTE_SWP_EXCLUSIVE (_AT(pteval_t, 1) << 2) /* only for swp ptes */ #define PTE_DIRTY (_AT(pteval_t, 1) << 55) #define PTE_SPECIAL (_AT(pteval_t, 1) << 56) -#define PTE_DEVMAP (_AT(pteval_t, 1) << 57) -#define PTE_PROT_NONE (_AT(pteval_t, 1) << 58) /* only when !PTE_VALID */ /* - * This bit indicates that the entry is present i.e. pmd_page() - * still points to a valid huge page in memory even if the pmd - * has been invalidated. + * PTE_PRESENT_INVALID=1 & PTE_VALID=0 indicates that the pte's fields should be + * interpreted according to the HW layout by SW but any attempted HW access to + * the address will result in a fault. pte_present() returns true. */ -#define PMD_PRESENT_INVALID (_AT(pteval_t, 1) << 59) /* only when !PMD_SECT_VALID */ +#define PTE_PRESENT_INVALID (PTE_NG) /* only when !PTE_VALID */ + +#ifdef CONFIG_HAVE_ARCH_USERFAULTFD_WP +#define PTE_UFFD_WP (_AT(pteval_t, 1) << 58) /* uffd-wp tracking */ +#define PTE_SWP_UFFD_WP (_AT(pteval_t, 1) << 3) /* only for swp ptes */ +#else +#define PTE_UFFD_WP (_AT(pteval_t, 0)) +#define PTE_SWP_UFFD_WP (_AT(pteval_t, 0)) +#endif /* CONFIG_HAVE_ARCH_USERFAULTFD_WP */ + +#define _PROT_DEFAULT (PTE_TYPE_PAGE | PTE_AF | PTE_SHARED) + +#define PROT_DEFAULT (PTE_TYPE_PAGE | PTE_MAYBE_NG | PTE_MAYBE_SHARED | PTE_AF) +#define PROT_SECT_DEFAULT (PMD_TYPE_SECT | PMD_MAYBE_NG | PMD_MAYBE_SHARED | PMD_SECT_AF) + +#define PROT_DEVICE_nGnRnE (PROT_DEFAULT | PTE_PXN | PTE_UXN | PTE_WRITE | PTE_ATTRINDX(MT_DEVICE_nGnRnE)) +#define PROT_DEVICE_nGnRE (PROT_DEFAULT | PTE_PXN | PTE_UXN | PTE_WRITE | PTE_ATTRINDX(MT_DEVICE_nGnRE)) +#define PROT_NORMAL_NC (PROT_DEFAULT | PTE_PXN | PTE_UXN | PTE_WRITE | PTE_ATTRINDX(MT_NORMAL_NC)) +#define PROT_NORMAL (PROT_DEFAULT | PTE_PXN | PTE_UXN | PTE_WRITE | PTE_ATTRINDX(MT_NORMAL)) +#define PROT_NORMAL_TAGGED (PROT_DEFAULT | PTE_PXN | PTE_UXN | PTE_WRITE | PTE_ATTRINDX(MT_NORMAL_TAGGED)) + +#define PROT_SECT_DEVICE_nGnRE (PROT_SECT_DEFAULT | PMD_SECT_PXN | PMD_SECT_UXN | PMD_ATTRINDX(MT_DEVICE_nGnRE)) +#define PROT_SECT_NORMAL (PROT_SECT_DEFAULT | PMD_SECT_PXN | PMD_SECT_UXN | PTE_WRITE | PMD_ATTRINDX(MT_NORMAL)) +#define PROT_SECT_NORMAL_EXEC (PROT_SECT_DEFAULT | PMD_SECT_UXN | PMD_ATTRINDX(MT_NORMAL)) + +#define _PAGE_DEFAULT (_PROT_DEFAULT | PTE_ATTRINDX(MT_NORMAL)) + +#define _PAGE_KERNEL (PROT_NORMAL) +#define _PAGE_KERNEL_RO ((PROT_NORMAL & ~PTE_WRITE) | PTE_RDONLY) +#define _PAGE_KERNEL_ROX ((PROT_NORMAL & ~(PTE_WRITE | PTE_PXN)) | PTE_RDONLY) +#define _PAGE_KERNEL_EXEC (PROT_NORMAL & ~PTE_PXN) +#define _PAGE_KERNEL_EXEC_CONT ((PROT_NORMAL & ~PTE_PXN) | PTE_CONT) + +#define _PAGE_SHARED (_PAGE_DEFAULT | PTE_USER | PTE_RDONLY | PTE_NG | PTE_PXN | PTE_UXN | PTE_WRITE) +#define _PAGE_SHARED_EXEC (_PAGE_DEFAULT | PTE_USER | PTE_RDONLY | PTE_NG | PTE_PXN | PTE_WRITE) +#define _PAGE_READONLY (_PAGE_DEFAULT | PTE_USER | PTE_RDONLY | PTE_NG | PTE_PXN | PTE_UXN) +#define _PAGE_READONLY_EXEC (_PAGE_DEFAULT | PTE_USER | PTE_RDONLY | PTE_NG | PTE_PXN) +#define _PAGE_EXECONLY (_PAGE_DEFAULT | PTE_RDONLY | PTE_NG | PTE_PXN) #ifndef __ASSEMBLY__ #include <asm/cpufeature.h> #include <asm/pgtable-types.h> +#include <asm/rsi.h> extern bool arm64_use_ng_mappings; +extern unsigned long prot_ns_shared; -#define _PROT_DEFAULT (PTE_TYPE_PAGE | PTE_AF | PTE_SHARED) -#define _PROT_SECT_DEFAULT (PMD_TYPE_SECT | PMD_SECT_AF | PMD_SECT_S) +#define PROT_NS_SHARED (is_realm_world() ? prot_ns_shared : 0) #define PTE_MAYBE_NG (arm64_use_ng_mappings ? PTE_NG : 0) #define PMD_MAYBE_NG (arm64_use_ng_mappings ? PMD_SECT_NG : 0) -/* - * If we have userspace only BTI we don't want to mark kernel pages - * guarded even if the system does support BTI. - */ -#ifdef CONFIG_ARM64_BTI_KERNEL -#define PTE_MAYBE_GP (system_supports_bti() ? PTE_GP : 0) +#ifndef CONFIG_ARM64_LPA2 +#define lpa2_is_enabled() false +#define PTE_MAYBE_SHARED PTE_SHARED +#define PMD_MAYBE_SHARED PMD_SECT_S +#define PHYS_MASK_SHIFT (CONFIG_ARM64_PA_BITS) #else -#define PTE_MAYBE_GP 0 +static inline bool __pure lpa2_is_enabled(void) +{ + return read_tcr() & TCR_DS; +} + +#define PTE_MAYBE_SHARED (lpa2_is_enabled() ? 0 : PTE_SHARED) +#define PMD_MAYBE_SHARED (lpa2_is_enabled() ? 0 : PMD_SECT_S) +#define PHYS_MASK_SHIFT (lpa2_is_enabled() ? CONFIG_ARM64_PA_BITS : 48) #endif -#define PROT_DEFAULT (_PROT_DEFAULT | PTE_MAYBE_NG) -#define PROT_SECT_DEFAULT (_PROT_SECT_DEFAULT | PMD_MAYBE_NG) - -#define PROT_DEVICE_nGnRnE (PROT_DEFAULT | PTE_PXN | PTE_UXN | PTE_WRITE | PTE_ATTRINDX(MT_DEVICE_nGnRnE)) -#define PROT_DEVICE_nGnRE (PROT_DEFAULT | PTE_PXN | PTE_UXN | PTE_WRITE | PTE_ATTRINDX(MT_DEVICE_nGnRE)) -#define PROT_NORMAL_NC (PROT_DEFAULT | PTE_PXN | PTE_UXN | PTE_WRITE | PTE_ATTRINDX(MT_NORMAL_NC)) -#define PROT_NORMAL (PROT_DEFAULT | PTE_PXN | PTE_UXN | PTE_WRITE | PTE_ATTRINDX(MT_NORMAL)) -#define PROT_NORMAL_TAGGED (PROT_DEFAULT | PTE_PXN | PTE_UXN | PTE_WRITE | PTE_ATTRINDX(MT_NORMAL_TAGGED)) - -#define PROT_SECT_DEVICE_nGnRE (PROT_SECT_DEFAULT | PMD_SECT_PXN | PMD_SECT_UXN | PMD_ATTRINDX(MT_DEVICE_nGnRE)) -#define PROT_SECT_NORMAL (PROT_SECT_DEFAULT | PMD_SECT_PXN | PMD_SECT_UXN | PMD_ATTRINDX(MT_NORMAL)) -#define PROT_SECT_NORMAL_EXEC (PROT_SECT_DEFAULT | PMD_SECT_UXN | PMD_ATTRINDX(MT_NORMAL)) +/* + * Highest possible physical address supported. + */ +#define PHYS_MASK ((UL(1) << PHYS_MASK_SHIFT) - 1) -#define _PAGE_DEFAULT (_PROT_DEFAULT | PTE_ATTRINDX(MT_NORMAL)) +/* + * If we have userspace only BTI we don't want to mark kernel pages + * guarded even if the system does support BTI. + */ +#define PTE_MAYBE_GP (system_supports_bti_kernel() ? PTE_GP : 0) -#define PAGE_KERNEL __pgprot(PROT_NORMAL) -#define PAGE_KERNEL_RO __pgprot((PROT_NORMAL & ~PTE_WRITE) | PTE_RDONLY) -#define PAGE_KERNEL_ROX __pgprot((PROT_NORMAL & ~(PTE_WRITE | PTE_PXN)) | PTE_RDONLY) -#define PAGE_KERNEL_EXEC __pgprot(PROT_NORMAL & ~PTE_PXN) -#define PAGE_KERNEL_EXEC_CONT __pgprot((PROT_NORMAL & ~PTE_PXN) | PTE_CONT) +#define PAGE_KERNEL __pgprot(_PAGE_KERNEL) +#define PAGE_KERNEL_RO __pgprot(_PAGE_KERNEL_RO) +#define PAGE_KERNEL_ROX __pgprot(_PAGE_KERNEL_ROX) +#define PAGE_KERNEL_EXEC __pgprot(_PAGE_KERNEL_EXEC) +#define PAGE_KERNEL_EXEC_CONT __pgprot(_PAGE_KERNEL_EXEC_CONT) #define PAGE_S2_MEMATTR(attr, has_fwb) \ ({ \ @@ -81,32 +119,74 @@ extern bool arm64_use_ng_mappings; __val; \ }) -#define PAGE_NONE __pgprot(((_PAGE_DEFAULT) & ~PTE_VALID) | PTE_PROT_NONE | PTE_RDONLY | PTE_NG | PTE_PXN | PTE_UXN) +#define PAGE_NONE __pgprot(((_PAGE_DEFAULT) & ~PTE_VALID) | PTE_PRESENT_INVALID | PTE_RDONLY | PTE_NG | PTE_PXN | PTE_UXN) /* shared+writable pages are clean by default, hence PTE_RDONLY|PTE_WRITE */ -#define PAGE_SHARED __pgprot(_PAGE_DEFAULT | PTE_USER | PTE_RDONLY | PTE_NG | PTE_PXN | PTE_UXN | PTE_WRITE) -#define PAGE_SHARED_EXEC __pgprot(_PAGE_DEFAULT | PTE_USER | PTE_RDONLY | PTE_NG | PTE_PXN | PTE_WRITE) -#define PAGE_READONLY __pgprot(_PAGE_DEFAULT | PTE_USER | PTE_RDONLY | PTE_NG | PTE_PXN | PTE_UXN) -#define PAGE_READONLY_EXEC __pgprot(_PAGE_DEFAULT | PTE_USER | PTE_RDONLY | PTE_NG | PTE_PXN) -#define PAGE_EXECONLY __pgprot(_PAGE_DEFAULT | PTE_RDONLY | PTE_NG | PTE_PXN) - -#define __P000 PAGE_NONE -#define __P001 PAGE_READONLY -#define __P010 PAGE_READONLY -#define __P011 PAGE_READONLY -#define __P100 PAGE_READONLY_EXEC /* PAGE_EXECONLY if Enhanced PAN */ -#define __P101 PAGE_READONLY_EXEC -#define __P110 PAGE_READONLY_EXEC -#define __P111 PAGE_READONLY_EXEC - -#define __S000 PAGE_NONE -#define __S001 PAGE_READONLY -#define __S010 PAGE_SHARED -#define __S011 PAGE_SHARED -#define __S100 PAGE_READONLY_EXEC /* PAGE_EXECONLY if Enhanced PAN */ -#define __S101 PAGE_READONLY_EXEC -#define __S110 PAGE_SHARED_EXEC -#define __S111 PAGE_SHARED_EXEC +#define PAGE_SHARED __pgprot(_PAGE_SHARED) +#define PAGE_SHARED_EXEC __pgprot(_PAGE_SHARED_EXEC) +#define PAGE_READONLY __pgprot(_PAGE_READONLY) +#define PAGE_READONLY_EXEC __pgprot(_PAGE_READONLY_EXEC) +#define PAGE_EXECONLY __pgprot(_PAGE_EXECONLY) #endif /* __ASSEMBLY__ */ +#define pte_pi_index(pte) ( \ + ((pte & BIT(PTE_PI_IDX_3)) >> (PTE_PI_IDX_3 - 3)) | \ + ((pte & BIT(PTE_PI_IDX_2)) >> (PTE_PI_IDX_2 - 2)) | \ + ((pte & BIT(PTE_PI_IDX_1)) >> (PTE_PI_IDX_1 - 1)) | \ + ((pte & BIT(PTE_PI_IDX_0)) >> (PTE_PI_IDX_0 - 0))) + +/* + * Page types used via Permission Indirection Extension (PIE). PIE uses + * the USER, DBM, PXN and UXN bits to to generate an index which is used + * to look up the actual permission in PIR_ELx and PIRE0_EL1. We define + * combinations we use on non-PIE systems with the same encoding, for + * convenience these are listed here as comments as are the unallocated + * encodings. + */ + +/* 0: PAGE_DEFAULT */ +/* 1: PTE_USER */ +/* 2: PTE_WRITE */ +/* 3: PTE_WRITE | PTE_USER */ +/* 4: PAGE_EXECONLY PTE_PXN */ +/* 5: PAGE_READONLY_EXEC PTE_PXN | PTE_USER */ +/* 6: PTE_PXN | PTE_WRITE */ +/* 7: PAGE_SHARED_EXEC PTE_PXN | PTE_WRITE | PTE_USER */ +/* 8: PAGE_KERNEL_ROX PTE_UXN */ +/* 9: PAGE_GCS_RO PTE_UXN | PTE_USER */ +/* a: PAGE_KERNEL_EXEC PTE_UXN | PTE_WRITE */ +/* b: PAGE_GCS PTE_UXN | PTE_WRITE | PTE_USER */ +/* c: PAGE_KERNEL_RO PTE_UXN | PTE_PXN */ +/* d: PAGE_READONLY PTE_UXN | PTE_PXN | PTE_USER */ +/* e: PAGE_KERNEL PTE_UXN | PTE_PXN | PTE_WRITE */ +/* f: PAGE_SHARED PTE_UXN | PTE_PXN | PTE_WRITE | PTE_USER */ + +#define _PAGE_GCS (_PAGE_DEFAULT | PTE_NG | PTE_UXN | PTE_WRITE | PTE_USER) +#define _PAGE_GCS_RO (_PAGE_DEFAULT | PTE_NG | PTE_UXN | PTE_USER) + +#define PAGE_GCS __pgprot(_PAGE_GCS) +#define PAGE_GCS_RO __pgprot(_PAGE_GCS_RO) + +#define PIE_E0 ( \ + PIRx_ELx_PERM_PREP(pte_pi_index(_PAGE_GCS), PIE_GCS) | \ + PIRx_ELx_PERM_PREP(pte_pi_index(_PAGE_GCS_RO), PIE_R) | \ + PIRx_ELx_PERM_PREP(pte_pi_index(_PAGE_EXECONLY), PIE_X_O) | \ + PIRx_ELx_PERM_PREP(pte_pi_index(_PAGE_READONLY_EXEC), PIE_RX_O) | \ + PIRx_ELx_PERM_PREP(pte_pi_index(_PAGE_SHARED_EXEC), PIE_RWX_O) | \ + PIRx_ELx_PERM_PREP(pte_pi_index(_PAGE_READONLY), PIE_R_O) | \ + PIRx_ELx_PERM_PREP(pte_pi_index(_PAGE_SHARED), PIE_RW_O)) + +#define PIE_E1 ( \ + PIRx_ELx_PERM_PREP(pte_pi_index(_PAGE_GCS), PIE_NONE_O) | \ + PIRx_ELx_PERM_PREP(pte_pi_index(_PAGE_GCS_RO), PIE_NONE_O) | \ + PIRx_ELx_PERM_PREP(pte_pi_index(_PAGE_EXECONLY), PIE_NONE_O) | \ + PIRx_ELx_PERM_PREP(pte_pi_index(_PAGE_READONLY_EXEC), PIE_R) | \ + PIRx_ELx_PERM_PREP(pte_pi_index(_PAGE_SHARED_EXEC), PIE_RW) | \ + PIRx_ELx_PERM_PREP(pte_pi_index(_PAGE_READONLY), PIE_R) | \ + PIRx_ELx_PERM_PREP(pte_pi_index(_PAGE_SHARED), PIE_RW) | \ + PIRx_ELx_PERM_PREP(pte_pi_index(_PAGE_KERNEL_ROX), PIE_RX) | \ + PIRx_ELx_PERM_PREP(pte_pi_index(_PAGE_KERNEL_EXEC), PIE_RWX) | \ + PIRx_ELx_PERM_PREP(pte_pi_index(_PAGE_KERNEL_RO), PIE_R) | \ + PIRx_ELx_PERM_PREP(pte_pi_index(_PAGE_KERNEL), PIE_RW)) + #endif /* __ASM_PGTABLE_PROT_H */ diff --git a/arch/arm64/include/asm/pgtable-types.h b/arch/arm64/include/asm/pgtable-types.h index b8f158ae2527..265e8301d7ba 100644 --- a/arch/arm64/include/asm/pgtable-types.h +++ b/arch/arm64/include/asm/pgtable-types.h @@ -11,11 +11,19 @@ #include <asm/types.h> -typedef u64 pteval_t; -typedef u64 pmdval_t; -typedef u64 pudval_t; -typedef u64 p4dval_t; -typedef u64 pgdval_t; +/* + * Page Table Descriptor + * + * Generic page table descriptor format from which + * all level specific descriptors can be derived. + */ +typedef u64 ptdesc_t; + +typedef ptdesc_t pteval_t; +typedef ptdesc_t pmdval_t; +typedef ptdesc_t pudval_t; +typedef ptdesc_t p4dval_t; +typedef ptdesc_t pgdval_t; /* * These are used to make use of C type-checking.. @@ -36,11 +44,17 @@ typedef struct { pudval_t pud; } pud_t; #define __pud(x) ((pud_t) { (x) } ) #endif +#if CONFIG_PGTABLE_LEVELS > 4 +typedef struct { p4dval_t p4d; } p4d_t; +#define p4d_val(x) ((x).p4d) +#define __p4d(x) ((p4d_t) { (x) } ) +#endif + typedef struct { pgdval_t pgd; } pgd_t; #define pgd_val(x) ((x).pgd) #define __pgd(x) ((pgd_t) { (x) } ) -typedef struct { pteval_t pgprot; } pgprot_t; +typedef struct { ptdesc_t pgprot; } pgprot_t; #define pgprot_val(x) ((x).pgprot) #define __pgprot(x) ((pgprot_t) { (x) } ) diff --git a/arch/arm64/include/asm/pgtable.h b/arch/arm64/include/asm/pgtable.h index b5df82aa99e6..abd2dee416b3 100644 --- a/arch/arm64/include/asm/pgtable.h +++ b/arch/arm64/include/asm/pgtable.h @@ -18,11 +18,15 @@ * VMALLOC range. * * VMALLOC_START: beginning of the kernel vmalloc space - * VMALLOC_END: extends to the available space below vmemmap, PCI I/O space - * and fixed mappings + * VMALLOC_END: extends to the available space below vmemmap */ #define VMALLOC_START (MODULES_END) -#define VMALLOC_END (VMEMMAP_START - SZ_256M) +#if VA_BITS == VA_BITS_MIN +#define VMALLOC_END (VMEMMAP_START - SZ_8M) +#else +#define VMEMMAP_UNUSED_NPAGES ((_PAGE_OFFSET(vabits_actual) - PAGE_OFFSET) >> PAGE_SHIFT) +#define VMALLOC_END (VMEMMAP_START + VMEMMAP_UNUSED_NPAGES * sizeof(struct page) - SZ_8M) +#endif #define vmemmap ((struct page *)VMEMMAP_START - (memstart_addr >> PAGE_SHIFT)) @@ -30,11 +34,91 @@ #include <asm/cmpxchg.h> #include <asm/fixmap.h> +#include <asm/por.h> #include <linux/mmdebug.h> #include <linux/mm_types.h> #include <linux/sched.h> #include <linux/page_table_check.h> +static inline void emit_pte_barriers(void) +{ + /* + * These barriers are emitted under certain conditions after a pte entry + * was modified (see e.g. __set_pte_complete()). The dsb makes the store + * visible to the table walker. The isb ensures that any previous + * speculative "invalid translation" marker that is in the CPU's + * pipeline gets cleared, so that any access to that address after + * setting the pte to valid won't cause a spurious fault. If the thread + * gets preempted after storing to the pgtable but before emitting these + * barriers, __switch_to() emits a dsb which ensure the walker gets to + * see the store. There is no guarantee of an isb being issued though. + * This is safe because it will still get issued (albeit on a + * potentially different CPU) when the thread starts running again, + * before any access to the address. + */ + dsb(ishst); + isb(); +} + +static inline void queue_pte_barriers(void) +{ + unsigned long flags; + + if (in_interrupt()) { + emit_pte_barriers(); + return; + } + + flags = read_thread_flags(); + + if (flags & BIT(TIF_LAZY_MMU)) { + /* Avoid the atomic op if already set. */ + if (!(flags & BIT(TIF_LAZY_MMU_PENDING))) + set_thread_flag(TIF_LAZY_MMU_PENDING); + } else { + emit_pte_barriers(); + } +} + +#define __HAVE_ARCH_ENTER_LAZY_MMU_MODE +static inline void arch_enter_lazy_mmu_mode(void) +{ + /* + * lazy_mmu_mode is not supposed to permit nesting. But in practice this + * does happen with CONFIG_DEBUG_PAGEALLOC, where a page allocation + * inside a lazy_mmu_mode section (such as zap_pte_range()) will change + * permissions on the linear map with apply_to_page_range(), which + * re-enters lazy_mmu_mode. So we tolerate nesting in our + * implementation. The first call to arch_leave_lazy_mmu_mode() will + * flush and clear the flag such that the remainder of the work in the + * outer nest behaves as if outside of lazy mmu mode. This is safe and + * keeps tracking simple. + */ + + if (in_interrupt()) + return; + + set_thread_flag(TIF_LAZY_MMU); +} + +static inline void arch_flush_lazy_mmu_mode(void) +{ + if (in_interrupt()) + return; + + if (test_and_clear_thread_flag(TIF_LAZY_MMU_PENDING)) + emit_pte_barriers(); +} + +static inline void arch_leave_lazy_mmu_mode(void) +{ + if (in_interrupt()) + return; + + arch_flush_lazy_mmu_mode(); + clear_thread_flag(TIF_LAZY_MMU); +} + #ifdef CONFIG_TRANSPARENT_HUGEPAGE #define __HAVE_ARCH_FLUSH_PMD_TLB_RANGE @@ -45,19 +129,13 @@ __flush_tlb_range(vma, addr, end, PUD_SIZE, false, 1) #endif /* CONFIG_TRANSPARENT_HUGEPAGE */ -static inline bool arch_thp_swp_supported(void) -{ - return !system_supports_mte(); -} -#define arch_thp_swp_supported arch_thp_swp_supported - /* * Outside of a few very special situations (e.g. hibernation), we always * use broadcast TLB invalidation instructions, therefore a spurious page * fault on one CPU which has been handled concurrently by another CPU * does not need to perform additional invalidation. */ -#define flush_tlb_fix_spurious_fault(vma, address) do { } while (0) +#define flush_tlb_fix_spurious_fault(vma, address, ptep) do { } while (0) /* * ZERO_PAGE is a global shared page that is always zero: used @@ -69,23 +147,27 @@ extern unsigned long empty_zero_page[PAGE_SIZE / sizeof(unsigned long)]; #define pte_ERROR(e) \ pr_err("%s:%d: bad pte %016llx.\n", __FILE__, __LINE__, pte_val(e)) -/* - * Macros to convert between a physical address and its placement in a - * page table entry, taking care of 52-bit addresses. - */ #ifdef CONFIG_ARM64_PA_BITS_52 static inline phys_addr_t __pte_to_phys(pte_t pte) { + pte_val(pte) &= ~PTE_MAYBE_SHARED; return (pte_val(pte) & PTE_ADDR_LOW) | - ((pte_val(pte) & PTE_ADDR_HIGH) << 36); + ((pte_val(pte) & PTE_ADDR_HIGH) << PTE_ADDR_HIGH_SHIFT); } static inline pteval_t __phys_to_pte_val(phys_addr_t phys) { - return (phys | (phys >> 36)) & PTE_ADDR_MASK; + return (phys | (phys >> PTE_ADDR_HIGH_SHIFT)) & PHYS_TO_PTE_ADDR_MASK; } #else -#define __pte_to_phys(pte) (pte_val(pte) & PTE_ADDR_MASK) -#define __phys_to_pte_val(phys) (phys) +static inline phys_addr_t __pte_to_phys(pte_t pte) +{ + return pte_val(pte) & PTE_ADDR_LOW; +} + +static inline pteval_t __phys_to_pte_val(phys_addr_t phys) +{ + return phys; +} #endif #define pte_pfn(pte) (__pte_to_phys(pte) >> PAGE_SHIFT) @@ -93,20 +175,21 @@ static inline pteval_t __phys_to_pte_val(phys_addr_t phys) __pte(__phys_to_pte_val((phys_addr_t)(pfn) << PAGE_SHIFT) | pgprot_val(prot)) #define pte_none(pte) (!pte_val(pte)) -#define pte_clear(mm,addr,ptep) set_pte(ptep, __pte(0)) +#define __pte_clear(mm, addr, ptep) \ + __set_pte(ptep, __pte(0)) #define pte_page(pte) (pfn_to_page(pte_pfn(pte))) /* * The following only work if pte_present(). Undefined behaviour otherwise. */ -#define pte_present(pte) (!!(pte_val(pte) & (PTE_VALID | PTE_PROT_NONE))) +#define pte_present(pte) (pte_valid(pte) || pte_present_invalid(pte)) #define pte_young(pte) (!!(pte_val(pte) & PTE_AF)) #define pte_special(pte) (!!(pte_val(pte) & PTE_SPECIAL)) #define pte_write(pte) (!!(pte_val(pte) & PTE_WRITE)) +#define pte_rdonly(pte) (!!(pte_val(pte) & PTE_RDONLY)) #define pte_user(pte) (!!(pte_val(pte) & PTE_USER)) #define pte_user_exec(pte) (!(pte_val(pte) & PTE_UXN)) #define pte_cont(pte) (!!(pte_val(pte) & PTE_CONT)) -#define pte_devmap(pte) (!!(pte_val(pte) & PTE_DEVMAP)) #define pte_tagged(pte) ((pte_val(pte) & PTE_ATTRINDX_MASK) == \ PTE_ATTRINDX(MT_NORMAL_TAGGED)) @@ -120,11 +203,13 @@ static inline pteval_t __phys_to_pte_val(phys_addr_t phys) (__boundary - 1 < (end) - 1) ? __boundary : (end); \ }) -#define pte_hw_dirty(pte) (pte_write(pte) && !(pte_val(pte) & PTE_RDONLY)) +#define pte_hw_dirty(pte) (pte_write(pte) && !pte_rdonly(pte)) #define pte_sw_dirty(pte) (!!(pte_val(pte) & PTE_DIRTY)) #define pte_dirty(pte) (pte_sw_dirty(pte) || pte_hw_dirty(pte)) #define pte_valid(pte) (!!(pte_val(pte) & PTE_VALID)) +#define pte_present_invalid(pte) \ + ((pte_val(pte) & (PTE_VALID | PTE_PRESENT_INVALID)) == PTE_PRESENT_INVALID) /* * Execute-only user mappings do not have the PTE_USER bit set. All valid * kernel mappings have the PTE_UXN bit set. @@ -132,16 +217,38 @@ static inline pteval_t __phys_to_pte_val(phys_addr_t phys) #define pte_valid_not_user(pte) \ ((pte_val(pte) & (PTE_VALID | PTE_USER | PTE_UXN)) == (PTE_VALID | PTE_UXN)) /* + * Returns true if the pte is valid and has the contiguous bit set. + */ +#define pte_valid_cont(pte) (pte_valid(pte) && pte_cont(pte)) +/* * Could the pte be present in the TLB? We must check mm_tlb_flush_pending * so that we don't erroneously return false for pages that have been * remapped as PROT_NONE but are yet to be flushed from the TLB. * Note that we can't make any assumptions based on the state of the access - * flag, since ptep_clear_flush_young() elides a DSB when invalidating the + * flag, since __ptep_clear_flush_young() elides a DSB when invalidating the * TLB. */ #define pte_accessible(mm, pte) \ (mm_tlb_flush_pending(mm) ? pte_present(pte) : pte_valid(pte)) +static inline bool por_el0_allows_pkey(u8 pkey, bool write, bool execute) +{ + u64 por; + + if (!system_supports_poe()) + return true; + + por = read_sysreg_s(SYS_POR_EL0); + + if (write) + return por_elx_allows_write(por, pkey); + + if (execute) + return por_elx_allows_exec(por, pkey); + + return por_elx_allows_read(por, pkey); +} + /* * p??_access_permitted() is true for valid user mappings (PTE_USER * bit set, subject to the write permission check). For execute-only @@ -149,8 +256,11 @@ static inline pteval_t __phys_to_pte_val(phys_addr_t phys) * not set) must return false. PROT_NONE mappings do not have the * PTE_VALID bit set. */ -#define pte_access_permitted(pte, write) \ +#define pte_access_permitted_no_overlay(pte, write) \ (((pte_val(pte) & (PTE_VALID | PTE_USER)) == (PTE_VALID | PTE_USER)) && (!(write) || pte_write(pte))) +#define pte_access_permitted(pte, write) \ + (pte_access_permitted_no_overlay(pte, write) && \ + por_el0_allows_pkey(FIELD_GET(PTE_PO_IDX_MASK, pte_val(pte)), write, false)) #define pmd_access_permitted(pmd, write) \ (pte_access_permitted(pmd_pte(pmd), (write))) #define pud_access_permitted(pud, write) \ @@ -180,7 +290,7 @@ static inline pmd_t set_pmd_bit(pmd_t pmd, pgprot_t prot) return pmd; } -static inline pte_t pte_mkwrite(pte_t pte) +static inline pte_t pte_mkwrite_novma(pte_t pte) { pte = set_pte_bit(pte, __pgprot(PTE_WRITE)); pte = clear_pte_bit(pte, __pgprot(PTE_RDONLY)); @@ -212,7 +322,7 @@ static inline pte_t pte_wrprotect(pte_t pte) * clear), set the PTE_DIRTY bit. */ if (pte_hw_dirty(pte)) - pte = pte_mkdirty(pte); + pte = set_pte_bit(pte, __pgprot(PTE_DIRTY)); pte = clear_pte_bit(pte, __pgprot(PTE_WRITE)); pte = set_pte_bit(pte, __pgprot(PTE_RDONLY)); @@ -236,8 +346,7 @@ static inline pte_t pte_mkspecial(pte_t pte) static inline pte_t pte_mkcont(pte_t pte) { - pte = set_pte_bit(pte, __pgprot(PTE_CONT)); - return set_pte_bit(pte, __pgprot(PTE_TYPE_PAGE)); + return set_pte_bit(pte, __pgprot(PTE_CONT)); } static inline pte_t pte_mknoncont(pte_t pte) @@ -245,36 +354,68 @@ static inline pte_t pte_mknoncont(pte_t pte) return clear_pte_bit(pte, __pgprot(PTE_CONT)); } -static inline pte_t pte_mkpresent(pte_t pte) +static inline pte_t pte_mkvalid(pte_t pte) { return set_pte_bit(pte, __pgprot(PTE_VALID)); } +static inline pte_t pte_mkinvalid(pte_t pte) +{ + pte = set_pte_bit(pte, __pgprot(PTE_PRESENT_INVALID)); + pte = clear_pte_bit(pte, __pgprot(PTE_VALID)); + return pte; +} + static inline pmd_t pmd_mkcont(pmd_t pmd) { return __pmd(pmd_val(pmd) | PMD_SECT_CONT); } -static inline pte_t pte_mkdevmap(pte_t pte) +#ifdef CONFIG_HAVE_ARCH_USERFAULTFD_WP +static inline int pte_uffd_wp(pte_t pte) { - return set_pte_bit(pte, __pgprot(PTE_DEVMAP | PTE_SPECIAL)); + return !!(pte_val(pte) & PTE_UFFD_WP); } -static inline void set_pte(pte_t *ptep, pte_t pte) +static inline pte_t pte_mkuffd_wp(pte_t pte) +{ + return pte_wrprotect(set_pte_bit(pte, __pgprot(PTE_UFFD_WP))); +} + +static inline pte_t pte_clear_uffd_wp(pte_t pte) +{ + return clear_pte_bit(pte, __pgprot(PTE_UFFD_WP)); +} +#endif /* CONFIG_HAVE_ARCH_USERFAULTFD_WP */ + +static inline void __set_pte_nosync(pte_t *ptep, pte_t pte) { WRITE_ONCE(*ptep, pte); +} +static inline void __set_pte_complete(pte_t pte) +{ /* * Only if the new pte is valid and kernel, otherwise TLB maintenance - * or update_mmu_cache() have the necessary barriers. + * has the necessary barriers. */ - if (pte_valid_not_user(pte)) { - dsb(ishst); - isb(); - } + if (pte_valid_not_user(pte)) + queue_pte_barriers(); +} + +static inline void __set_pte(pte_t *ptep, pte_t pte) +{ + __set_pte_nosync(ptep, pte); + __set_pte_complete(pte); +} + +static inline pte_t __ptep_get(pte_t *ptep) +{ + return READ_ONCE(*ptep); } extern void __sync_icache_dcache(pte_t pteval); +bool pgattr_change_is_safe(pteval_t old, pteval_t new); /* * PTE bits configuration in the presence of hardware Dirty Bit Management @@ -292,7 +433,7 @@ extern void __sync_icache_dcache(pte_t pteval); * PTE_DIRTY || (PTE_WRITE && !PTE_RDONLY) */ -static inline void __check_racy_pte_update(struct mm_struct *mm, pte_t *ptep, +static inline void __check_safe_pte_update(struct mm_struct *mm, pte_t *ptep, pte_t pte) { pte_t old_pte; @@ -300,7 +441,7 @@ static inline void __check_racy_pte_update(struct mm_struct *mm, pte_t *ptep, if (!IS_ENABLED(CONFIG_DEBUG_VM)) return; - old_pte = READ_ONCE(*ptep); + old_pte = __ptep_get(ptep); if (!pte_valid(old_pte) || !pte_valid(pte)) return; @@ -309,7 +450,7 @@ static inline void __check_racy_pte_update(struct mm_struct *mm, pte_t *ptep, /* * Check for potential race with hardware updates of the pte - * (ptep_set_access_flags safely changes valid ptes without going + * (__ptep_set_access_flags safely changes valid ptes without going * through an invalid entry). */ VM_WARN_ONCE(!pte_young(pte), @@ -318,10 +459,12 @@ static inline void __check_racy_pte_update(struct mm_struct *mm, pte_t *ptep, VM_WARN_ONCE(pte_write(old_pte) && !pte_dirty(pte), "%s: racy dirty state clearing: 0x%016llx -> 0x%016llx", __func__, pte_val(old_pte), pte_val(pte)); + VM_WARN_ONCE(!pgattr_change_is_safe(pte_val(old_pte), pte_val(pte)), + "%s: unsafe attribute change: 0x%016llx -> 0x%016llx", + __func__, pte_val(old_pte), pte_val(pte)); } -static inline void __set_pte_at(struct mm_struct *mm, unsigned long addr, - pte_t *ptep, pte_t pte) +static inline void __sync_cache_and_tags(pte_t pte, unsigned int nr_pages) { if (pte_present(pte) && pte_user_exec(pte) && !pte_special(pte)) __sync_icache_dcache(pte); @@ -329,41 +472,33 @@ static inline void __set_pte_at(struct mm_struct *mm, unsigned long addr, /* * If the PTE would provide user space access to the tags associated * with it then ensure that the MTE tags are synchronised. Although - * pte_access_permitted() returns false for exec only mappings, they - * don't expose tags (instruction fetches don't check tags). + * pte_access_permitted_no_overlay() returns false for exec only + * mappings, they don't expose tags (instruction fetches don't check + * tags). */ - if (system_supports_mte() && pte_access_permitted(pte, false) && - !pte_special(pte)) { - pte_t old_pte = READ_ONCE(*ptep); - /* - * We only need to synchronise if the new PTE has tags enabled - * or if swapping in (in which case another mapping may have - * set tags in the past even if this PTE isn't tagged). - * (!pte_none() && !pte_present()) is an open coded version of - * is_swap_pte() - */ - if (pte_tagged(pte) || (!pte_none(old_pte) && !pte_present(old_pte))) - mte_sync_tags(old_pte, pte); - } + if (system_supports_mte() && pte_access_permitted_no_overlay(pte, false) && + !pte_special(pte) && pte_tagged(pte)) + mte_sync_tags(pte, nr_pages); +} - __check_racy_pte_update(mm, ptep, pte); +/* + * Select all bits except the pfn + */ +#define pte_pgprot pte_pgprot +static inline pgprot_t pte_pgprot(pte_t pte) +{ + unsigned long pfn = pte_pfn(pte); - set_pte(ptep, pte); + return __pgprot(pte_val(pfn_pte(pfn, __pgprot(0))) ^ pte_val(pte)); } -static inline void set_pte_at(struct mm_struct *mm, unsigned long addr, - pte_t *ptep, pte_t pte) +#define pte_advance_pfn pte_advance_pfn +static inline pte_t pte_advance_pfn(pte_t pte, unsigned long nr) { - page_table_check_pte_set(mm, addr, ptep, pte); - return __set_pte_at(mm, addr, ptep, pte); + return pfn_pte(pte_pfn(pte) + nr, pte_pgprot(pte)); } /* - * Huge pte definitions. - */ -#define pte_mkhuge(pte) (__pte(pte_val(pte) & ~PTE_TABLE_BIT)) - -/* * Hugetlb definitions. */ #define HUGE_MAX_HSTATE 4 @@ -409,21 +544,20 @@ static inline pmd_t pte_pmd(pte_t pte) static inline pgprot_t mk_pud_sect_prot(pgprot_t prot) { - return __pgprot((pgprot_val(prot) & ~PUD_TABLE_BIT) | PUD_TYPE_SECT); + return __pgprot((pgprot_val(prot) & ~PUD_TYPE_MASK) | PUD_TYPE_SECT); } static inline pgprot_t mk_pmd_sect_prot(pgprot_t prot) { - return __pgprot((pgprot_val(prot) & ~PMD_TABLE_BIT) | PMD_TYPE_SECT); + return __pgprot((pgprot_val(prot) & ~PMD_TYPE_MASK) | PMD_TYPE_SECT); } -#define __HAVE_ARCH_PTE_SWP_EXCLUSIVE static inline pte_t pte_swp_mkexclusive(pte_t pte) { return set_pte_bit(pte, __pgprot(PTE_SWP_EXCLUSIVE)); } -static inline int pte_swp_exclusive(pte_t pte) +static inline bool pte_swp_exclusive(pte_t pte) { return pte_val(pte) & PTE_SWP_EXCLUSIVE; } @@ -433,23 +567,39 @@ static inline pte_t pte_swp_clear_exclusive(pte_t pte) return clear_pte_bit(pte, __pgprot(PTE_SWP_EXCLUSIVE)); } -/* - * Select all bits except the pfn - */ -static inline pgprot_t pte_pgprot(pte_t pte) +#ifdef CONFIG_HAVE_ARCH_USERFAULTFD_WP +static inline pte_t pte_swp_mkuffd_wp(pte_t pte) { - unsigned long pfn = pte_pfn(pte); + return set_pte_bit(pte, __pgprot(PTE_SWP_UFFD_WP)); +} - return __pgprot(pte_val(pfn_pte(pfn, __pgprot(0))) ^ pte_val(pte)); +static inline int pte_swp_uffd_wp(pte_t pte) +{ + return !!(pte_val(pte) & PTE_SWP_UFFD_WP); } +static inline pte_t pte_swp_clear_uffd_wp(pte_t pte) +{ + return clear_pte_bit(pte, __pgprot(PTE_SWP_UFFD_WP)); +} +#endif /* CONFIG_HAVE_ARCH_USERFAULTFD_WP */ + #ifdef CONFIG_NUMA_BALANCING /* * See the comment in include/linux/pgtable.h */ static inline int pte_protnone(pte_t pte) { - return (pte_val(pte) & (PTE_VALID | PTE_PROT_NONE)) == PTE_PROT_NONE; + /* + * pte_present_invalid() tells us that the pte is invalid from HW + * perspective but present from SW perspective, so the fields are to be + * interpretted as per the HW layout. The second 2 checks are the unique + * encoding that we use for PROT_NONE. It is insufficient to only use + * the first check because we share the same encoding scheme with pmds + * which support pmd_mkinvalid(), so can be present-invalid without + * being PROT_NONE. + */ + return pte_present_invalid(pte) && !pte_user(pte) && !pte_user_exec(pte); } static inline int pmd_protnone(pmd_t pmd) @@ -458,24 +608,7 @@ static inline int pmd_protnone(pmd_t pmd) } #endif -#define pmd_present_invalid(pmd) (!!(pmd_val(pmd) & PMD_PRESENT_INVALID)) - -static inline int pmd_present(pmd_t pmd) -{ - return pte_present(pmd_pte(pmd)) || pmd_present_invalid(pmd); -} - -/* - * THP definitions. - */ - -#ifdef CONFIG_TRANSPARENT_HUGEPAGE -static inline int pmd_trans_huge(pmd_t pmd) -{ - return pmd_val(pmd) && pmd_present(pmd) && !(pmd_val(pmd) & PMD_TABLE_BIT); -} -#endif /* CONFIG_TRANSPARENT_HUGEPAGE */ - +#define pmd_present(pmd) pte_present(pmd_pte(pmd)) #define pmd_dirty(pmd) pte_dirty(pmd_pte(pmd)) #define pmd_young(pmd) pte_young(pmd_pte(pmd)) #define pmd_valid(pmd) pte_valid(pmd_pte(pmd)) @@ -484,64 +617,146 @@ static inline int pmd_trans_huge(pmd_t pmd) #define pmd_cont(pmd) pte_cont(pmd_pte(pmd)) #define pmd_wrprotect(pmd) pte_pmd(pte_wrprotect(pmd_pte(pmd))) #define pmd_mkold(pmd) pte_pmd(pte_mkold(pmd_pte(pmd))) -#define pmd_mkwrite(pmd) pte_pmd(pte_mkwrite(pmd_pte(pmd))) +#define pmd_mkwrite_novma(pmd) pte_pmd(pte_mkwrite_novma(pmd_pte(pmd))) #define pmd_mkclean(pmd) pte_pmd(pte_mkclean(pmd_pte(pmd))) #define pmd_mkdirty(pmd) pte_pmd(pte_mkdirty(pmd_pte(pmd))) #define pmd_mkyoung(pmd) pte_pmd(pte_mkyoung(pmd_pte(pmd))) +#define pmd_mkinvalid(pmd) pte_pmd(pte_mkinvalid(pmd_pte(pmd))) +#ifdef CONFIG_HAVE_ARCH_USERFAULTFD_WP +#define pmd_uffd_wp(pmd) pte_uffd_wp(pmd_pte(pmd)) +#define pmd_mkuffd_wp(pmd) pte_pmd(pte_mkuffd_wp(pmd_pte(pmd))) +#define pmd_clear_uffd_wp(pmd) pte_pmd(pte_clear_uffd_wp(pmd_pte(pmd))) +#define pmd_swp_uffd_wp(pmd) pte_swp_uffd_wp(pmd_pte(pmd)) +#define pmd_swp_mkuffd_wp(pmd) pte_pmd(pte_swp_mkuffd_wp(pmd_pte(pmd))) +#define pmd_swp_clear_uffd_wp(pmd) \ + pte_pmd(pte_swp_clear_uffd_wp(pmd_pte(pmd))) +#endif /* CONFIG_HAVE_ARCH_USERFAULTFD_WP */ + +#define pmd_write(pmd) pte_write(pmd_pte(pmd)) -static inline pmd_t pmd_mkinvalid(pmd_t pmd) +static inline pmd_t pmd_mkhuge(pmd_t pmd) { - pmd = set_pmd_bit(pmd, __pgprot(PMD_PRESENT_INVALID)); - pmd = clear_pmd_bit(pmd, __pgprot(PMD_SECT_VALID)); + /* + * It's possible that the pmd is present-invalid on entry + * and in that case it needs to remain present-invalid on + * exit. So ensure the VALID bit does not get modified. + */ + pmdval_t mask = PMD_TYPE_MASK & ~PTE_VALID; + pmdval_t val = PMD_TYPE_SECT & ~PTE_VALID; - return pmd; + return __pmd((pmd_val(pmd) & ~mask) | val); } -#define pmd_thp_or_huge(pmd) (pmd_huge(pmd) || pmd_trans_huge(pmd)) - -#define pmd_write(pmd) pte_write(pmd_pte(pmd)) - -#define pmd_mkhuge(pmd) (__pmd(pmd_val(pmd) & ~PMD_TABLE_BIT)) - -#ifdef CONFIG_TRANSPARENT_HUGEPAGE -#define pmd_devmap(pmd) pte_devmap(pmd_pte(pmd)) -#endif -static inline pmd_t pmd_mkdevmap(pmd_t pmd) +#ifdef CONFIG_ARCH_SUPPORTS_PMD_PFNMAP +#define pmd_special(pte) (!!((pmd_val(pte) & PTE_SPECIAL))) +static inline pmd_t pmd_mkspecial(pmd_t pmd) { - return pte_pmd(set_pte_bit(pmd_pte(pmd), __pgprot(PTE_DEVMAP))); + return set_pmd_bit(pmd, __pgprot(PTE_SPECIAL)); } +#endif #define __pmd_to_phys(pmd) __pte_to_phys(pmd_pte(pmd)) #define __phys_to_pmd_val(phys) __phys_to_pte_val(phys) #define pmd_pfn(pmd) ((__pmd_to_phys(pmd) & PMD_MASK) >> PAGE_SHIFT) #define pfn_pmd(pfn,prot) __pmd(__phys_to_pmd_val((phys_addr_t)(pfn) << PAGE_SHIFT) | pgprot_val(prot)) -#define mk_pmd(page,prot) pfn_pmd(page_to_pfn(page),prot) #define pud_young(pud) pte_young(pud_pte(pud)) #define pud_mkyoung(pud) pte_pud(pte_mkyoung(pud_pte(pud))) #define pud_write(pud) pte_write(pud_pte(pud)) -#define pud_mkhuge(pud) (__pud(pud_val(pud) & ~PUD_TABLE_BIT)) +static inline pud_t pud_mkhuge(pud_t pud) +{ + /* + * It's possible that the pud is present-invalid on entry + * and in that case it needs to remain present-invalid on + * exit. So ensure the VALID bit does not get modified. + */ + pudval_t mask = PUD_TYPE_MASK & ~PTE_VALID; + pudval_t val = PUD_TYPE_SECT & ~PTE_VALID; + + return __pud((pud_val(pud) & ~mask) | val); +} #define __pud_to_phys(pud) __pte_to_phys(pud_pte(pud)) #define __phys_to_pud_val(phys) __phys_to_pte_val(phys) #define pud_pfn(pud) ((__pud_to_phys(pud) & PUD_MASK) >> PAGE_SHIFT) #define pfn_pud(pfn,prot) __pud(__phys_to_pud_val((phys_addr_t)(pfn) << PAGE_SHIFT) | pgprot_val(prot)) -static inline void set_pmd_at(struct mm_struct *mm, unsigned long addr, - pmd_t *pmdp, pmd_t pmd) +#define pmd_pgprot pmd_pgprot +static inline pgprot_t pmd_pgprot(pmd_t pmd) { - page_table_check_pmd_set(mm, addr, pmdp, pmd); - return __set_pte_at(mm, addr, (pte_t *)pmdp, pmd_pte(pmd)); + unsigned long pfn = pmd_pfn(pmd); + + return __pgprot(pmd_val(pfn_pmd(pfn, __pgprot(0))) ^ pmd_val(pmd)); } -static inline void set_pud_at(struct mm_struct *mm, unsigned long addr, - pud_t *pudp, pud_t pud) +#define pud_pgprot pud_pgprot +static inline pgprot_t pud_pgprot(pud_t pud) { - page_table_check_pud_set(mm, addr, pudp, pud); - return __set_pte_at(mm, addr, (pte_t *)pudp, pud_pte(pud)); + unsigned long pfn = pud_pfn(pud); + + return __pgprot(pud_val(pfn_pud(pfn, __pgprot(0))) ^ pud_val(pud)); } +static inline void __set_ptes_anysz(struct mm_struct *mm, pte_t *ptep, + pte_t pte, unsigned int nr, + unsigned long pgsize) +{ + unsigned long stride = pgsize >> PAGE_SHIFT; + + switch (pgsize) { + case PAGE_SIZE: + page_table_check_ptes_set(mm, ptep, pte, nr); + break; + case PMD_SIZE: + page_table_check_pmds_set(mm, (pmd_t *)ptep, pte_pmd(pte), nr); + break; +#ifndef __PAGETABLE_PMD_FOLDED + case PUD_SIZE: + page_table_check_puds_set(mm, (pud_t *)ptep, pte_pud(pte), nr); + break; +#endif + default: + VM_WARN_ON(1); + } + + __sync_cache_and_tags(pte, nr * stride); + + for (;;) { + __check_safe_pte_update(mm, ptep, pte); + __set_pte_nosync(ptep, pte); + if (--nr == 0) + break; + ptep++; + pte = pte_advance_pfn(pte, stride); + } + + __set_pte_complete(pte); +} + +static inline void __set_ptes(struct mm_struct *mm, + unsigned long __always_unused addr, + pte_t *ptep, pte_t pte, unsigned int nr) +{ + __set_ptes_anysz(mm, ptep, pte, nr, PAGE_SIZE); +} + +static inline void __set_pmds(struct mm_struct *mm, + unsigned long __always_unused addr, + pmd_t *pmdp, pmd_t pmd, unsigned int nr) +{ + __set_ptes_anysz(mm, (pte_t *)pmdp, pmd_pte(pmd), nr, PMD_SIZE); +} +#define set_pmd_at(mm, addr, pmdp, pmd) __set_pmds(mm, addr, pmdp, pmd, 1) + +static inline void __set_puds(struct mm_struct *mm, + unsigned long __always_unused addr, + pud_t *pudp, pud_t pud, unsigned int nr) +{ + __set_ptes_anysz(mm, (pte_t *)pudp, pud_pte(pud), nr, PUD_SIZE); +} +#define set_pud_at(mm, addr, pudp, pud) __set_puds(mm, addr, pudp, pud, 1) + #define __p4d_to_phys(p4d) __pte_to_phys(p4d_pte(p4d)) #define __phys_to_p4d_val(phys) __phys_to_pte_val(phys) @@ -554,6 +769,11 @@ static inline void set_pud_at(struct mm_struct *mm, unsigned long addr, #define pgprot_nx(prot) \ __pgprot_modify(prot, PTE_MAYBE_GP, PTE_PXN) +#define pgprot_decrypted(prot) \ + __pgprot_modify(prot, PROT_NS_SHARED, PROT_NS_SHARED) +#define pgprot_encrypted(prot) \ + __pgprot_modify(prot, PROT_NS_SHARED, 0) + /* * Mark the prot value as uncacheable and unbufferable. */ @@ -595,6 +815,17 @@ extern pgprot_t phys_mem_access_prot(struct file *file, unsigned long pfn, #define pmd_leaf_size(pmd) (pmd_cont(pmd) ? CONT_PMD_SIZE : PMD_SIZE) #define pte_leaf_size(pte) (pte_cont(pte) ? CONT_PTE_SIZE : PAGE_SIZE) +#ifdef CONFIG_TRANSPARENT_HUGEPAGE +static inline int pmd_trans_huge(pmd_t pmd) +{ + /* + * If pmd is present-invalid, pmd_table() won't detect it + * as a table, so force the valid bit for the comparison. + */ + return pmd_present(pmd) && !pmd_table(__pmd(pmd_val(pmd) | PTE_VALID)); +} +#endif /* CONFIG_TRANSPARENT_HUGEPAGE */ + #if defined(CONFIG_ARM64_64K_PAGES) || CONFIG_PGTABLE_LEVELS < 3 static inline bool pud_sect(pud_t pud) { return false; } static inline bool pud_table(pud_t pud) { return true; } @@ -605,13 +836,10 @@ static inline bool pud_table(pud_t pud) { return true; } PUD_TYPE_TABLE) #endif -extern pgd_t init_pg_dir[PTRS_PER_PGD]; -extern pgd_t init_pg_end[]; -extern pgd_t swapper_pg_dir[PTRS_PER_PGD]; -extern pgd_t idmap_pg_dir[PTRS_PER_PGD]; -extern pgd_t idmap_pg_end[]; -extern pgd_t tramp_pg_dir[PTRS_PER_PGD]; -extern pgd_t reserved_pg_dir[PTRS_PER_PGD]; +extern pgd_t swapper_pg_dir[]; +extern pgd_t idmap_pg_dir[]; +extern pgd_t tramp_pg_dir[]; +extern pgd_t reserved_pg_dir[]; extern void set_swapper_pgd(pgd_t *pgdp, pgd_t pgd); @@ -632,10 +860,8 @@ static inline void set_pmd(pmd_t *pmdp, pmd_t pmd) WRITE_ONCE(*pmdp, pmd); - if (pmd_valid(pmd)) { - dsb(ishst); - isb(); - } + if (pmd_valid(pmd)) + queue_pte_barriers(); } static inline void pmd_clear(pmd_t *pmdp) @@ -665,40 +891,37 @@ static inline unsigned long pmd_page_vaddr(pmd_t pmd) /* use ONLY for statically allocated translation tables */ #define pte_offset_kimg(dir,addr) ((pte_t *)__phys_to_kimg(pte_offset_phys((dir), (addr)))) -/* - * Conversion functions: convert a page and protection to a page entry, - * and a page entry and page directory to the page they refer to. - */ -#define mk_pte(page,prot) pfn_pte(page_to_pfn(page),prot) - #if CONFIG_PGTABLE_LEVELS > 2 #define pmd_ERROR(e) \ pr_err("%s:%d: bad pmd %016llx.\n", __FILE__, __LINE__, pmd_val(e)) #define pud_none(pud) (!pud_val(pud)) -#define pud_bad(pud) (!pud_table(pud)) +#define pud_bad(pud) ((pud_val(pud) & PUD_TYPE_MASK) != \ + PUD_TYPE_TABLE) #define pud_present(pud) pte_present(pud_pte(pud)) +#ifndef __PAGETABLE_PMD_FOLDED #define pud_leaf(pud) (pud_present(pud) && !pud_table(pud)) +#else +#define pud_leaf(pud) false +#endif #define pud_valid(pud) pte_valid(pud_pte(pud)) #define pud_user(pud) pte_user(pud_pte(pud)) +#define pud_user_exec(pud) pte_user_exec(pud_pte(pud)) +static inline bool pgtable_l4_enabled(void); static inline void set_pud(pud_t *pudp, pud_t pud) { -#ifdef __PAGETABLE_PUD_FOLDED - if (in_swapper_pgdir(pudp)) { + if (!pgtable_l4_enabled() && in_swapper_pgdir(pudp)) { set_swapper_pgd((pgd_t *)pudp, __pgd(pud_val(pud))); return; } -#endif /* __PAGETABLE_PUD_FOLDED */ WRITE_ONCE(*pudp, pud); - if (pud_valid(pud)) { - dsb(ishst); - isb(); - } + if (pud_valid(pud)) + queue_pte_barriers(); } static inline void pud_clear(pud_t *pudp) @@ -730,7 +953,9 @@ static inline pmd_t *pud_pgtable(pud_t pud) #else +#define pud_valid(pud) false #define pud_page_paddr(pud) ({ BUILD_BUG(); 0; }) +#define pud_user_exec(pud) pud_user(pud) /* Always 0 with folding */ /* Match pmd_offset folding in <asm/generic/pgtable-nopmd.h> */ #define pmd_set_fixmap(addr) NULL @@ -743,12 +968,29 @@ static inline pmd_t *pud_pgtable(pud_t pud) #if CONFIG_PGTABLE_LEVELS > 3 +static __always_inline bool pgtable_l4_enabled(void) +{ + if (CONFIG_PGTABLE_LEVELS > 4 || !IS_ENABLED(CONFIG_ARM64_LPA2)) + return true; + if (!alternative_has_cap_likely(ARM64_ALWAYS_BOOT)) + return vabits_actual == VA_BITS; + return alternative_has_cap_unlikely(ARM64_HAS_VA52); +} + +static inline bool mm_pud_folded(const struct mm_struct *mm) +{ + return !pgtable_l4_enabled(); +} +#define mm_pud_folded mm_pud_folded + #define pud_ERROR(e) \ pr_err("%s:%d: bad pud %016llx.\n", __FILE__, __LINE__, pud_val(e)) -#define p4d_none(p4d) (!p4d_val(p4d)) -#define p4d_bad(p4d) (!(p4d_val(p4d) & 2)) -#define p4d_present(p4d) (p4d_val(p4d)) +#define p4d_none(p4d) (pgtable_l4_enabled() && !p4d_val(p4d)) +#define p4d_bad(p4d) (pgtable_l4_enabled() && \ + ((p4d_val(p4d) & P4D_TYPE_MASK) != \ + P4D_TYPE_TABLE)) +#define p4d_present(p4d) (!p4d_none(p4d)) static inline void set_p4d(p4d_t *p4dp, p4d_t p4d) { @@ -758,13 +1000,13 @@ static inline void set_p4d(p4d_t *p4dp, p4d_t p4d) } WRITE_ONCE(*p4dp, p4d); - dsb(ishst); - isb(); + queue_pte_barriers(); } static inline void p4d_clear(p4d_t *p4dp) { - set_p4d(p4dp, __p4d(0)); + if (pgtable_l4_enabled()) + set_p4d(p4dp, __p4d(0)); } static inline phys_addr_t p4d_page_paddr(p4d_t p4d) @@ -772,27 +1014,78 @@ static inline phys_addr_t p4d_page_paddr(p4d_t p4d) return __p4d_to_phys(p4d); } +#define pud_index(addr) (((addr) >> PUD_SHIFT) & (PTRS_PER_PUD - 1)) + +static inline pud_t *p4d_to_folded_pud(p4d_t *p4dp, unsigned long addr) +{ + /* Ensure that 'p4dp' indexes a page table according to 'addr' */ + VM_BUG_ON(((addr >> P4D_SHIFT) ^ ((u64)p4dp >> 3)) % PTRS_PER_P4D); + + return (pud_t *)PTR_ALIGN_DOWN(p4dp, PAGE_SIZE) + pud_index(addr); +} + static inline pud_t *p4d_pgtable(p4d_t p4d) { return (pud_t *)__va(p4d_page_paddr(p4d)); } -/* Find an entry in the first-level page table. */ -#define pud_offset_phys(dir, addr) (p4d_page_paddr(READ_ONCE(*(dir))) + pud_index(addr) * sizeof(pud_t)) +static inline phys_addr_t pud_offset_phys(p4d_t *p4dp, unsigned long addr) +{ + BUG_ON(!pgtable_l4_enabled()); -#define pud_set_fixmap(addr) ((pud_t *)set_fixmap_offset(FIX_PUD, addr)) -#define pud_set_fixmap_offset(p4d, addr) pud_set_fixmap(pud_offset_phys(p4d, addr)) -#define pud_clear_fixmap() clear_fixmap(FIX_PUD) + return p4d_page_paddr(READ_ONCE(*p4dp)) + pud_index(addr) * sizeof(pud_t); +} -#define p4d_page(p4d) pfn_to_page(__phys_to_pfn(__p4d_to_phys(p4d))) +static inline +pud_t *pud_offset_lockless(p4d_t *p4dp, p4d_t p4d, unsigned long addr) +{ + if (!pgtable_l4_enabled()) + return p4d_to_folded_pud(p4dp, addr); + return (pud_t *)__va(p4d_page_paddr(p4d)) + pud_index(addr); +} +#define pud_offset_lockless pud_offset_lockless + +static inline pud_t *pud_offset(p4d_t *p4dp, unsigned long addr) +{ + return pud_offset_lockless(p4dp, READ_ONCE(*p4dp), addr); +} +#define pud_offset pud_offset + +static inline pud_t *pud_set_fixmap(unsigned long addr) +{ + if (!pgtable_l4_enabled()) + return NULL; + return (pud_t *)set_fixmap_offset(FIX_PUD, addr); +} + +static inline pud_t *pud_set_fixmap_offset(p4d_t *p4dp, unsigned long addr) +{ + if (!pgtable_l4_enabled()) + return p4d_to_folded_pud(p4dp, addr); + return pud_set_fixmap(pud_offset_phys(p4dp, addr)); +} + +static inline void pud_clear_fixmap(void) +{ + if (pgtable_l4_enabled()) + clear_fixmap(FIX_PUD); +} /* use ONLY for statically allocated translation tables */ -#define pud_offset_kimg(dir,addr) ((pud_t *)__phys_to_kimg(pud_offset_phys((dir), (addr)))) +static inline pud_t *pud_offset_kimg(p4d_t *p4dp, u64 addr) +{ + if (!pgtable_l4_enabled()) + return p4d_to_folded_pud(p4dp, addr); + return (pud_t *)__phys_to_kimg(pud_offset_phys(p4dp, addr)); +} + +#define p4d_page(p4d) pfn_to_page(__phys_to_pfn(__p4d_to_phys(p4d))) #else +static inline bool pgtable_l4_enabled(void) { return false; } + #define p4d_page_paddr(p4d) ({ BUILD_BUG(); 0;}) -#define pgd_page_paddr(pgd) ({ BUILD_BUG(); 0;}) /* Match pud_offset folding in <asm/generic/pgtable-nopud.h> */ #define pud_set_fixmap(addr) NULL @@ -803,6 +1096,150 @@ static inline pud_t *p4d_pgtable(p4d_t p4d) #endif /* CONFIG_PGTABLE_LEVELS > 3 */ +#if CONFIG_PGTABLE_LEVELS > 4 + +static __always_inline bool pgtable_l5_enabled(void) +{ + if (!alternative_has_cap_likely(ARM64_ALWAYS_BOOT)) + return vabits_actual == VA_BITS; + return alternative_has_cap_unlikely(ARM64_HAS_VA52); +} + +static inline bool mm_p4d_folded(const struct mm_struct *mm) +{ + return !pgtable_l5_enabled(); +} +#define mm_p4d_folded mm_p4d_folded + +#define p4d_ERROR(e) \ + pr_err("%s:%d: bad p4d %016llx.\n", __FILE__, __LINE__, p4d_val(e)) + +#define pgd_none(pgd) (pgtable_l5_enabled() && !pgd_val(pgd)) +#define pgd_bad(pgd) (pgtable_l5_enabled() && \ + ((pgd_val(pgd) & PGD_TYPE_MASK) != \ + PGD_TYPE_TABLE)) +#define pgd_present(pgd) (!pgd_none(pgd)) + +static inline void set_pgd(pgd_t *pgdp, pgd_t pgd) +{ + if (in_swapper_pgdir(pgdp)) { + set_swapper_pgd(pgdp, __pgd(pgd_val(pgd))); + return; + } + + WRITE_ONCE(*pgdp, pgd); + queue_pte_barriers(); +} + +static inline void pgd_clear(pgd_t *pgdp) +{ + if (pgtable_l5_enabled()) + set_pgd(pgdp, __pgd(0)); +} + +static inline phys_addr_t pgd_page_paddr(pgd_t pgd) +{ + return __pgd_to_phys(pgd); +} + +#define p4d_index(addr) (((addr) >> P4D_SHIFT) & (PTRS_PER_P4D - 1)) + +static inline p4d_t *pgd_to_folded_p4d(pgd_t *pgdp, unsigned long addr) +{ + /* Ensure that 'pgdp' indexes a page table according to 'addr' */ + VM_BUG_ON(((addr >> PGDIR_SHIFT) ^ ((u64)pgdp >> 3)) % PTRS_PER_PGD); + + return (p4d_t *)PTR_ALIGN_DOWN(pgdp, PAGE_SIZE) + p4d_index(addr); +} + +static inline phys_addr_t p4d_offset_phys(pgd_t *pgdp, unsigned long addr) +{ + BUG_ON(!pgtable_l5_enabled()); + + return pgd_page_paddr(READ_ONCE(*pgdp)) + p4d_index(addr) * sizeof(p4d_t); +} + +static inline +p4d_t *p4d_offset_lockless(pgd_t *pgdp, pgd_t pgd, unsigned long addr) +{ + if (!pgtable_l5_enabled()) + return pgd_to_folded_p4d(pgdp, addr); + return (p4d_t *)__va(pgd_page_paddr(pgd)) + p4d_index(addr); +} +#define p4d_offset_lockless p4d_offset_lockless + +static inline p4d_t *p4d_offset(pgd_t *pgdp, unsigned long addr) +{ + return p4d_offset_lockless(pgdp, READ_ONCE(*pgdp), addr); +} + +static inline p4d_t *p4d_set_fixmap(unsigned long addr) +{ + if (!pgtable_l5_enabled()) + return NULL; + return (p4d_t *)set_fixmap_offset(FIX_P4D, addr); +} + +static inline p4d_t *p4d_set_fixmap_offset(pgd_t *pgdp, unsigned long addr) +{ + if (!pgtable_l5_enabled()) + return pgd_to_folded_p4d(pgdp, addr); + return p4d_set_fixmap(p4d_offset_phys(pgdp, addr)); +} + +static inline void p4d_clear_fixmap(void) +{ + if (pgtable_l5_enabled()) + clear_fixmap(FIX_P4D); +} + +/* use ONLY for statically allocated translation tables */ +static inline p4d_t *p4d_offset_kimg(pgd_t *pgdp, u64 addr) +{ + if (!pgtable_l5_enabled()) + return pgd_to_folded_p4d(pgdp, addr); + return (p4d_t *)__phys_to_kimg(p4d_offset_phys(pgdp, addr)); +} + +#define pgd_page(pgd) pfn_to_page(__phys_to_pfn(__pgd_to_phys(pgd))) + +#else + +static inline bool pgtable_l5_enabled(void) { return false; } + +#define p4d_index(addr) (((addr) >> P4D_SHIFT) & (PTRS_PER_P4D - 1)) + +/* Match p4d_offset folding in <asm/generic/pgtable-nop4d.h> */ +#define p4d_set_fixmap(addr) NULL +#define p4d_set_fixmap_offset(p4dp, addr) ((p4d_t *)p4dp) +#define p4d_clear_fixmap() + +#define p4d_offset_kimg(dir,addr) ((p4d_t *)dir) + +static inline +p4d_t *p4d_offset_lockless_folded(pgd_t *pgdp, pgd_t pgd, unsigned long addr) +{ + /* + * With runtime folding of the pud, pud_offset_lockless() passes + * the 'pgd_t *' we return here to p4d_to_folded_pud(), which + * will offset the pointer assuming that it points into + * a page-table page. However, the fast GUP path passes us a + * pgd_t allocated on the stack and so we must use the original + * pointer in 'pgdp' to construct the p4d pointer instead of + * using the generic p4d_offset_lockless() implementation. + * + * Note: reusing the original pointer means that we may + * dereference the same (live) page-table entry multiple times. + * This is safe because it is still only loaded once in the + * context of each level and the CPU guarantees same-address + * read-after-read ordering. + */ + return p4d_offset(pgdp, addr); +} +#define p4d_offset_lockless p4d_offset_lockless_folded + +#endif /* CONFIG_PGTABLE_LEVELS > 4 */ + #define pgd_ERROR(e) \ pr_err("%s:%d: bad pgd %016llx.\n", __FILE__, __LINE__, pgd_val(e)) @@ -816,12 +1253,20 @@ static inline pte_t pte_modify(pte_t pte, pgprot_t newprot) * in MAIR_EL1. The mask below has to include PTE_ATTRINDX_MASK. */ const pteval_t mask = PTE_USER | PTE_PXN | PTE_UXN | PTE_RDONLY | - PTE_PROT_NONE | PTE_VALID | PTE_WRITE | PTE_GP | - PTE_ATTRINDX_MASK; + PTE_PRESENT_INVALID | PTE_VALID | PTE_WRITE | + PTE_GP | PTE_ATTRINDX_MASK | PTE_PO_IDX_MASK; + /* preserve the hardware dirty information */ if (pte_hw_dirty(pte)) - pte = pte_mkdirty(pte); + pte = set_pte_bit(pte, __pgprot(PTE_DIRTY)); + pte_val(pte) = (pte_val(pte) & ~mask) | (pgprot_val(newprot) & mask); + /* + * If we end up clearing hw dirtiness for a sw-dirty PTE, set hardware + * dirtiness again. + */ + if (pte_sw_dirty(pte)) + pte = pte_mkdirty(pte); return pte; } @@ -830,8 +1275,7 @@ static inline pmd_t pmd_modify(pmd_t pmd, pgprot_t newprot) return pte_pmd(pte_modify(pmd_pte(pmd), newprot)); } -#define __HAVE_ARCH_PTEP_SET_ACCESS_FLAGS -extern int ptep_set_access_flags(struct vm_area_struct *vma, +extern int __ptep_set_access_flags(struct vm_area_struct *vma, unsigned long address, pte_t *ptep, pte_t entry, int dirty); @@ -841,46 +1285,38 @@ static inline int pmdp_set_access_flags(struct vm_area_struct *vma, unsigned long address, pmd_t *pmdp, pmd_t entry, int dirty) { - return ptep_set_access_flags(vma, address, (pte_t *)pmdp, pmd_pte(entry), dirty); -} - -static inline int pud_devmap(pud_t pud) -{ - return 0; -} - -static inline int pgd_devmap(pgd_t pgd) -{ - return 0; + return __ptep_set_access_flags(vma, address, (pte_t *)pmdp, + pmd_pte(entry), dirty); } #endif #ifdef CONFIG_PAGE_TABLE_CHECK static inline bool pte_user_accessible_page(pte_t pte) { - return pte_present(pte) && (pte_user(pte) || pte_user_exec(pte)); + return pte_valid(pte) && (pte_user(pte) || pte_user_exec(pte)); } static inline bool pmd_user_accessible_page(pmd_t pmd) { - return pmd_present(pmd) && (pmd_user(pmd) || pmd_user_exec(pmd)); + return pmd_valid(pmd) && !pmd_table(pmd) && (pmd_user(pmd) || pmd_user_exec(pmd)); } static inline bool pud_user_accessible_page(pud_t pud) { - return pud_present(pud) && pud_user(pud); + return pud_valid(pud) && !pud_table(pud) && (pud_user(pud) || pud_user_exec(pud)); } #endif /* * Atomic pte/pmd modifications. */ -#define __HAVE_ARCH_PTEP_TEST_AND_CLEAR_YOUNG -static inline int __ptep_test_and_clear_young(pte_t *ptep) +static inline int __ptep_test_and_clear_young(struct vm_area_struct *vma, + unsigned long address, + pte_t *ptep) { pte_t old_pte, pte; - pte = READ_ONCE(*ptep); + pte = __ptep_get(ptep); do { old_pte = pte; pte = pte_mkold(pte); @@ -891,18 +1327,10 @@ static inline int __ptep_test_and_clear_young(pte_t *ptep) return pte_young(pte); } -static inline int ptep_test_and_clear_young(struct vm_area_struct *vma, - unsigned long address, - pte_t *ptep) -{ - return __ptep_test_and_clear_young(ptep); -} - -#define __HAVE_ARCH_PTEP_CLEAR_YOUNG_FLUSH -static inline int ptep_clear_flush_young(struct vm_area_struct *vma, +static inline int __ptep_clear_flush_young(struct vm_area_struct *vma, unsigned long address, pte_t *ptep) { - int young = ptep_test_and_clear_young(vma, address, ptep); + int young = __ptep_test_and_clear_young(vma, address, ptep); if (young) { /* @@ -919,24 +1347,77 @@ static inline int ptep_clear_flush_young(struct vm_area_struct *vma, return young; } -#ifdef CONFIG_TRANSPARENT_HUGEPAGE +#if defined(CONFIG_TRANSPARENT_HUGEPAGE) || defined(CONFIG_ARCH_HAS_NONLEAF_PMD_YOUNG) #define __HAVE_ARCH_PMDP_TEST_AND_CLEAR_YOUNG static inline int pmdp_test_and_clear_young(struct vm_area_struct *vma, unsigned long address, pmd_t *pmdp) { - return ptep_test_and_clear_young(vma, address, (pte_t *)pmdp); + /* Operation applies to PMD table entry only if FEAT_HAFT is enabled */ + VM_WARN_ON(pmd_table(READ_ONCE(*pmdp)) && !system_supports_haft()); + return __ptep_test_and_clear_young(vma, address, (pte_t *)pmdp); } -#endif /* CONFIG_TRANSPARENT_HUGEPAGE */ +#endif /* CONFIG_TRANSPARENT_HUGEPAGE || CONFIG_ARCH_HAS_NONLEAF_PMD_YOUNG */ -#define __HAVE_ARCH_PTEP_GET_AND_CLEAR -static inline pte_t ptep_get_and_clear(struct mm_struct *mm, - unsigned long address, pte_t *ptep) +static inline pte_t __ptep_get_and_clear_anysz(struct mm_struct *mm, + pte_t *ptep, + unsigned long pgsize) { pte_t pte = __pte(xchg_relaxed(&pte_val(*ptep), 0)); - page_table_check_pte_clear(mm, address, pte); + switch (pgsize) { + case PAGE_SIZE: + page_table_check_pte_clear(mm, pte); + break; + case PMD_SIZE: + page_table_check_pmd_clear(mm, pte_pmd(pte)); + break; +#ifndef __PAGETABLE_PMD_FOLDED + case PUD_SIZE: + page_table_check_pud_clear(mm, pte_pud(pte)); + break; +#endif + default: + VM_WARN_ON(1); + } + + return pte; +} + +static inline pte_t __ptep_get_and_clear(struct mm_struct *mm, + unsigned long address, pte_t *ptep) +{ + return __ptep_get_and_clear_anysz(mm, ptep, PAGE_SIZE); +} + +static inline void __clear_full_ptes(struct mm_struct *mm, unsigned long addr, + pte_t *ptep, unsigned int nr, int full) +{ + for (;;) { + __ptep_get_and_clear(mm, addr, ptep); + if (--nr == 0) + break; + ptep++; + addr += PAGE_SIZE; + } +} + +static inline pte_t __get_and_clear_full_ptes(struct mm_struct *mm, + unsigned long addr, pte_t *ptep, + unsigned int nr, int full) +{ + pte_t pte, tmp_pte; + pte = __ptep_get_and_clear(mm, addr, ptep); + while (--nr) { + ptep++; + addr += PAGE_SIZE; + tmp_pte = __ptep_get_and_clear(mm, addr, ptep); + if (pte_dirty(tmp_pte)) + pte = pte_mkdirty(pte); + if (pte_young(tmp_pte)) + pte = pte_mkyoung(pte); + } return pte; } @@ -945,45 +1426,96 @@ static inline pte_t ptep_get_and_clear(struct mm_struct *mm, static inline pmd_t pmdp_huge_get_and_clear(struct mm_struct *mm, unsigned long address, pmd_t *pmdp) { - pmd_t pmd = __pmd(xchg_relaxed(&pmd_val(*pmdp), 0)); + return pte_pmd(__ptep_get_and_clear_anysz(mm, (pte_t *)pmdp, PMD_SIZE)); +} +#endif /* CONFIG_TRANSPARENT_HUGEPAGE */ - page_table_check_pmd_clear(mm, address, pmd); +static inline void ___ptep_set_wrprotect(struct mm_struct *mm, + unsigned long address, pte_t *ptep, + pte_t pte) +{ + pte_t old_pte; - return pmd; + do { + old_pte = pte; + pte = pte_wrprotect(pte); + pte_val(pte) = cmpxchg_relaxed(&pte_val(*ptep), + pte_val(old_pte), pte_val(pte)); + } while (pte_val(pte) != pte_val(old_pte)); } -#endif /* CONFIG_TRANSPARENT_HUGEPAGE */ /* - * ptep_set_wrprotect - mark read-only while trasferring potential hardware + * __ptep_set_wrprotect - mark read-only while transferring potential hardware * dirty status (PTE_DBM && !PTE_RDONLY) to the software PTE_DIRTY bit. */ -#define __HAVE_ARCH_PTEP_SET_WRPROTECT -static inline void ptep_set_wrprotect(struct mm_struct *mm, unsigned long address, pte_t *ptep) +static inline void __ptep_set_wrprotect(struct mm_struct *mm, + unsigned long address, pte_t *ptep) { - pte_t old_pte, pte; + ___ptep_set_wrprotect(mm, address, ptep, __ptep_get(ptep)); +} + +static inline void __wrprotect_ptes(struct mm_struct *mm, unsigned long address, + pte_t *ptep, unsigned int nr) +{ + unsigned int i; + + for (i = 0; i < nr; i++, address += PAGE_SIZE, ptep++) + __ptep_set_wrprotect(mm, address, ptep); +} + +static inline void __clear_young_dirty_pte(struct vm_area_struct *vma, + unsigned long addr, pte_t *ptep, + pte_t pte, cydp_t flags) +{ + pte_t old_pte; - pte = READ_ONCE(*ptep); do { old_pte = pte; - pte = pte_wrprotect(pte); + + if (flags & CYDP_CLEAR_YOUNG) + pte = pte_mkold(pte); + if (flags & CYDP_CLEAR_DIRTY) + pte = pte_mkclean(pte); + pte_val(pte) = cmpxchg_relaxed(&pte_val(*ptep), pte_val(old_pte), pte_val(pte)); } while (pte_val(pte) != pte_val(old_pte)); } +static inline void __clear_young_dirty_ptes(struct vm_area_struct *vma, + unsigned long addr, pte_t *ptep, + unsigned int nr, cydp_t flags) +{ + pte_t pte; + + for (;;) { + pte = __ptep_get(ptep); + + if (flags == (CYDP_CLEAR_YOUNG | CYDP_CLEAR_DIRTY)) + __set_pte(ptep, pte_mkclean(pte_mkold(pte))); + else + __clear_young_dirty_pte(vma, addr, ptep, pte, flags); + + if (--nr == 0) + break; + ptep++; + addr += PAGE_SIZE; + } +} + #ifdef CONFIG_TRANSPARENT_HUGEPAGE #define __HAVE_ARCH_PMDP_SET_WRPROTECT static inline void pmdp_set_wrprotect(struct mm_struct *mm, unsigned long address, pmd_t *pmdp) { - ptep_set_wrprotect(mm, address, (pte_t *)pmdp); + __ptep_set_wrprotect(mm, address, (pte_t *)pmdp); } #define pmdp_establish pmdp_establish static inline pmd_t pmdp_establish(struct vm_area_struct *vma, unsigned long address, pmd_t *pmdp, pmd_t pmd) { - page_table_check_pmd_set(vma->vm_mm, address, pmdp, pmd); + page_table_check_pmd_set(vma->vm_mm, pmdp, pmd); return __pmd(xchg_relaxed(&pmd_val(*pmdp), pmd_val(pmd))); } #endif @@ -992,15 +1524,16 @@ static inline pmd_t pmdp_establish(struct vm_area_struct *vma, * Encode and decode a swap entry: * bits 0-1: present (must be zero) * bits 2: remember PG_anon_exclusive - * bits 3-7: swap type - * bits 8-57: swap offset - * bit 58: PTE_PROT_NONE (must be zero) + * bit 3: remember uffd-wp state + * bits 6-10: swap type + * bit 11: PTE_PRESENT_INVALID (must be zero) + * bits 12-61: swap offset */ -#define __SWP_TYPE_SHIFT 3 +#define __SWP_TYPE_SHIFT 6 #define __SWP_TYPE_BITS 5 -#define __SWP_OFFSET_BITS 50 #define __SWP_TYPE_MASK ((1 << __SWP_TYPE_BITS) - 1) -#define __SWP_OFFSET_SHIFT (__SWP_TYPE_BITS + __SWP_TYPE_SHIFT) +#define __SWP_OFFSET_SHIFT 12 +#define __SWP_OFFSET_BITS 50 #define __SWP_OFFSET_MASK ((1UL << __SWP_OFFSET_BITS) - 1) #define __swp_type(x) (((x).val >> __SWP_TYPE_SHIFT) & __SWP_TYPE_MASK) @@ -1021,17 +1554,10 @@ static inline pmd_t pmdp_establish(struct vm_area_struct *vma, */ #define MAX_SWAPFILES_CHECK() BUILD_BUG_ON(MAX_SWAPFILES_SHIFT > __SWP_TYPE_BITS) -extern int kern_addr_valid(unsigned long addr); - #ifdef CONFIG_ARM64_MTE #define __HAVE_ARCH_PREPARE_TO_SWAP -static inline int arch_prepare_to_swap(struct page *page) -{ - if (system_supports_mte()) - return mte_save_tags(page); - return 0; -} +extern int arch_prepare_to_swap(struct folio *folio); #define __HAVE_ARCH_SWAP_INVALIDATE static inline void arch_swap_invalidate_page(int type, pgoff_t offset) @@ -1047,19 +1573,16 @@ static inline void arch_swap_invalidate_area(int type) } #define __HAVE_ARCH_SWAP_RESTORE -static inline void arch_swap_restore(swp_entry_t entry, struct folio *folio) -{ - if (system_supports_mte() && mte_restore_tags(entry, &folio->page)) - set_bit(PG_mte_tagged, &folio->flags); -} +extern void arch_swap_restore(swp_entry_t entry, struct folio *folio); #endif /* CONFIG_ARM64_MTE */ /* - * On AArch64, the cache coherency is handled via the set_pte_at() function. + * On AArch64, the cache coherency is handled via the __set_ptes() function. */ -static inline void update_mmu_cache(struct vm_area_struct *vma, - unsigned long addr, pte_t *ptep) +static inline void update_mmu_cache_range(struct vm_fault *vmf, + struct vm_area_struct *vma, unsigned long addr, pte_t *ptep, + unsigned int nr) { /* * We don't do anything here, so there's a very small chance of @@ -1068,6 +1591,8 @@ static inline void update_mmu_cache(struct vm_area_struct *vma, */ } +#define update_mmu_cache(vma, addr, ptep) \ + update_mmu_cache_range(NULL, vma, addr, ptep, 1) #define update_mmu_cache_pmd(vma, address, pmd) do { } while (0) #ifdef CONFIG_ARM64_PA_BITS_52 @@ -1082,24 +1607,25 @@ static inline void update_mmu_cache(struct vm_area_struct *vma, * page after fork() + CoW for pfn mappings. We don't always have a * hardware-managed access flag on arm64. */ -static inline bool arch_faults_on_old_pte(void) -{ - /* The register read below requires a stable CPU to make any sense */ - cant_migrate(); +#define arch_has_hw_pte_young cpu_has_hw_af - return !cpu_has_hw_af(); -} -#define arch_faults_on_old_pte arch_faults_on_old_pte +#ifdef CONFIG_ARCH_HAS_NONLEAF_PMD_YOUNG +#define arch_has_hw_nonleaf_pmd_young system_supports_haft +#endif /* * Experimentally, it's cheap to set the access flag in hardware and we * benefit from prefaulting mappings as 'old' to start with. */ -static inline bool arch_wants_old_prefaulted_pte(void) -{ - return !arch_faults_on_old_pte(); -} -#define arch_wants_old_prefaulted_pte arch_wants_old_prefaulted_pte +#define arch_wants_old_prefaulted_pte cpu_has_hw_af + +/* + * Request exec memory is read into pagecache in at least 64K folios. This size + * can be contpte-mapped when 4K base pages are in use (16 pages into 1 iTLB + * entry), and HPA can coalesce it (4 pages into 1 TLB entry) when 16K base + * pages are in use. + */ +#define exec_folio_order() ilog2(SZ_64K >> PAGE_SHIFT) static inline bool pud_sect_supported(void) { @@ -1107,6 +1633,316 @@ static inline bool pud_sect_supported(void) } +#define __HAVE_ARCH_PTEP_MODIFY_PROT_TRANSACTION +#define ptep_modify_prot_start ptep_modify_prot_start +extern pte_t ptep_modify_prot_start(struct vm_area_struct *vma, + unsigned long addr, pte_t *ptep); + +#define ptep_modify_prot_commit ptep_modify_prot_commit +extern void ptep_modify_prot_commit(struct vm_area_struct *vma, + unsigned long addr, pte_t *ptep, + pte_t old_pte, pte_t new_pte); + +#define modify_prot_start_ptes modify_prot_start_ptes +extern pte_t modify_prot_start_ptes(struct vm_area_struct *vma, + unsigned long addr, pte_t *ptep, + unsigned int nr); + +#define modify_prot_commit_ptes modify_prot_commit_ptes +extern void modify_prot_commit_ptes(struct vm_area_struct *vma, unsigned long addr, + pte_t *ptep, pte_t old_pte, pte_t pte, + unsigned int nr); + +#ifdef CONFIG_ARM64_CONTPTE + +/* + * The contpte APIs are used to transparently manage the contiguous bit in ptes + * where it is possible and makes sense to do so. The PTE_CONT bit is considered + * a private implementation detail of the public ptep API (see below). + */ +extern void __contpte_try_fold(struct mm_struct *mm, unsigned long addr, + pte_t *ptep, pte_t pte); +extern void __contpte_try_unfold(struct mm_struct *mm, unsigned long addr, + pte_t *ptep, pte_t pte); +extern pte_t contpte_ptep_get(pte_t *ptep, pte_t orig_pte); +extern pte_t contpte_ptep_get_lockless(pte_t *orig_ptep); +extern void contpte_set_ptes(struct mm_struct *mm, unsigned long addr, + pte_t *ptep, pte_t pte, unsigned int nr); +extern void contpte_clear_full_ptes(struct mm_struct *mm, unsigned long addr, + pte_t *ptep, unsigned int nr, int full); +extern pte_t contpte_get_and_clear_full_ptes(struct mm_struct *mm, + unsigned long addr, pte_t *ptep, + unsigned int nr, int full); +extern int contpte_ptep_test_and_clear_young(struct vm_area_struct *vma, + unsigned long addr, pte_t *ptep); +extern int contpte_ptep_clear_flush_young(struct vm_area_struct *vma, + unsigned long addr, pte_t *ptep); +extern void contpte_wrprotect_ptes(struct mm_struct *mm, unsigned long addr, + pte_t *ptep, unsigned int nr); +extern int contpte_ptep_set_access_flags(struct vm_area_struct *vma, + unsigned long addr, pte_t *ptep, + pte_t entry, int dirty); +extern void contpte_clear_young_dirty_ptes(struct vm_area_struct *vma, + unsigned long addr, pte_t *ptep, + unsigned int nr, cydp_t flags); + +static __always_inline void contpte_try_fold(struct mm_struct *mm, + unsigned long addr, pte_t *ptep, pte_t pte) +{ + /* + * Only bother trying if both the virtual and physical addresses are + * aligned and correspond to the last entry in a contig range. The core + * code mostly modifies ranges from low to high, so this is the likely + * the last modification in the contig range, so a good time to fold. + * We can't fold special mappings, because there is no associated folio. + */ + + const unsigned long contmask = CONT_PTES - 1; + bool valign = ((addr >> PAGE_SHIFT) & contmask) == contmask; + + if (unlikely(valign)) { + bool palign = (pte_pfn(pte) & contmask) == contmask; + + if (unlikely(palign && + pte_valid(pte) && !pte_cont(pte) && !pte_special(pte))) + __contpte_try_fold(mm, addr, ptep, pte); + } +} + +static __always_inline void contpte_try_unfold(struct mm_struct *mm, + unsigned long addr, pte_t *ptep, pte_t pte) +{ + if (unlikely(pte_valid_cont(pte))) + __contpte_try_unfold(mm, addr, ptep, pte); +} + +#define pte_batch_hint pte_batch_hint +static inline unsigned int pte_batch_hint(pte_t *ptep, pte_t pte) +{ + if (!pte_valid_cont(pte)) + return 1; + + return CONT_PTES - (((unsigned long)ptep >> 3) & (CONT_PTES - 1)); +} + +/* + * The below functions constitute the public API that arm64 presents to the + * core-mm to manipulate PTE entries within their page tables (or at least this + * is the subset of the API that arm64 needs to implement). These public + * versions will automatically and transparently apply the contiguous bit where + * it makes sense to do so. Therefore any users that are contig-aware (e.g. + * hugetlb, kernel mapper) should NOT use these APIs, but instead use the + * private versions, which are prefixed with double underscore. All of these + * APIs except for ptep_get_lockless() are expected to be called with the PTL + * held. Although the contiguous bit is considered private to the + * implementation, it is deliberately allowed to leak through the getters (e.g. + * ptep_get()), back to core code. This is required so that pte_leaf_size() can + * provide an accurate size for perf_get_pgtable_size(). But this leakage means + * its possible a pte will be passed to a setter with the contiguous bit set, so + * we explicitly clear the contiguous bit in those cases to prevent accidentally + * setting it in the pgtable. + */ + +#define ptep_get ptep_get +static inline pte_t ptep_get(pte_t *ptep) +{ + pte_t pte = __ptep_get(ptep); + + if (likely(!pte_valid_cont(pte))) + return pte; + + return contpte_ptep_get(ptep, pte); +} + +#define ptep_get_lockless ptep_get_lockless +static inline pte_t ptep_get_lockless(pte_t *ptep) +{ + pte_t pte = __ptep_get(ptep); + + if (likely(!pte_valid_cont(pte))) + return pte; + + return contpte_ptep_get_lockless(ptep); +} + +static inline void set_pte(pte_t *ptep, pte_t pte) +{ + /* + * We don't have the mm or vaddr so cannot unfold contig entries (since + * it requires tlb maintenance). set_pte() is not used in core code, so + * this should never even be called. Regardless do our best to service + * any call and emit a warning if there is any attempt to set a pte on + * top of an existing contig range. + */ + pte_t orig_pte = __ptep_get(ptep); + + WARN_ON_ONCE(pte_valid_cont(orig_pte)); + __set_pte(ptep, pte_mknoncont(pte)); +} + +#define set_ptes set_ptes +static __always_inline void set_ptes(struct mm_struct *mm, unsigned long addr, + pte_t *ptep, pte_t pte, unsigned int nr) +{ + pte = pte_mknoncont(pte); + + if (likely(nr == 1)) { + contpte_try_unfold(mm, addr, ptep, __ptep_get(ptep)); + __set_ptes(mm, addr, ptep, pte, 1); + contpte_try_fold(mm, addr, ptep, pte); + } else { + contpte_set_ptes(mm, addr, ptep, pte, nr); + } +} + +static inline void pte_clear(struct mm_struct *mm, + unsigned long addr, pte_t *ptep) +{ + contpte_try_unfold(mm, addr, ptep, __ptep_get(ptep)); + __pte_clear(mm, addr, ptep); +} + +#define clear_full_ptes clear_full_ptes +static inline void clear_full_ptes(struct mm_struct *mm, unsigned long addr, + pte_t *ptep, unsigned int nr, int full) +{ + if (likely(nr == 1)) { + contpte_try_unfold(mm, addr, ptep, __ptep_get(ptep)); + __clear_full_ptes(mm, addr, ptep, nr, full); + } else { + contpte_clear_full_ptes(mm, addr, ptep, nr, full); + } +} + +#define get_and_clear_full_ptes get_and_clear_full_ptes +static inline pte_t get_and_clear_full_ptes(struct mm_struct *mm, + unsigned long addr, pte_t *ptep, + unsigned int nr, int full) +{ + pte_t pte; + + if (likely(nr == 1)) { + contpte_try_unfold(mm, addr, ptep, __ptep_get(ptep)); + pte = __get_and_clear_full_ptes(mm, addr, ptep, nr, full); + } else { + pte = contpte_get_and_clear_full_ptes(mm, addr, ptep, nr, full); + } + + return pte; +} + +#define __HAVE_ARCH_PTEP_GET_AND_CLEAR +static inline pte_t ptep_get_and_clear(struct mm_struct *mm, + unsigned long addr, pte_t *ptep) +{ + contpte_try_unfold(mm, addr, ptep, __ptep_get(ptep)); + return __ptep_get_and_clear(mm, addr, ptep); +} + +#define __HAVE_ARCH_PTEP_TEST_AND_CLEAR_YOUNG +static inline int ptep_test_and_clear_young(struct vm_area_struct *vma, + unsigned long addr, pte_t *ptep) +{ + pte_t orig_pte = __ptep_get(ptep); + + if (likely(!pte_valid_cont(orig_pte))) + return __ptep_test_and_clear_young(vma, addr, ptep); + + return contpte_ptep_test_and_clear_young(vma, addr, ptep); +} + +#define __HAVE_ARCH_PTEP_CLEAR_YOUNG_FLUSH +static inline int ptep_clear_flush_young(struct vm_area_struct *vma, + unsigned long addr, pte_t *ptep) +{ + pte_t orig_pte = __ptep_get(ptep); + + if (likely(!pte_valid_cont(orig_pte))) + return __ptep_clear_flush_young(vma, addr, ptep); + + return contpte_ptep_clear_flush_young(vma, addr, ptep); +} + +#define wrprotect_ptes wrprotect_ptes +static __always_inline void wrprotect_ptes(struct mm_struct *mm, + unsigned long addr, pte_t *ptep, unsigned int nr) +{ + if (likely(nr == 1)) { + /* + * Optimization: wrprotect_ptes() can only be called for present + * ptes so we only need to check contig bit as condition for + * unfold, and we can remove the contig bit from the pte we read + * to avoid re-reading. This speeds up fork() which is sensitive + * for order-0 folios. Equivalent to contpte_try_unfold(). + */ + pte_t orig_pte = __ptep_get(ptep); + + if (unlikely(pte_cont(orig_pte))) { + __contpte_try_unfold(mm, addr, ptep, orig_pte); + orig_pte = pte_mknoncont(orig_pte); + } + ___ptep_set_wrprotect(mm, addr, ptep, orig_pte); + } else { + contpte_wrprotect_ptes(mm, addr, ptep, nr); + } +} + +#define __HAVE_ARCH_PTEP_SET_WRPROTECT +static inline void ptep_set_wrprotect(struct mm_struct *mm, + unsigned long addr, pte_t *ptep) +{ + wrprotect_ptes(mm, addr, ptep, 1); +} + +#define __HAVE_ARCH_PTEP_SET_ACCESS_FLAGS +static inline int ptep_set_access_flags(struct vm_area_struct *vma, + unsigned long addr, pte_t *ptep, + pte_t entry, int dirty) +{ + pte_t orig_pte = __ptep_get(ptep); + + entry = pte_mknoncont(entry); + + if (likely(!pte_valid_cont(orig_pte))) + return __ptep_set_access_flags(vma, addr, ptep, entry, dirty); + + return contpte_ptep_set_access_flags(vma, addr, ptep, entry, dirty); +} + +#define clear_young_dirty_ptes clear_young_dirty_ptes +static inline void clear_young_dirty_ptes(struct vm_area_struct *vma, + unsigned long addr, pte_t *ptep, + unsigned int nr, cydp_t flags) +{ + if (likely(nr == 1 && !pte_cont(__ptep_get(ptep)))) + __clear_young_dirty_ptes(vma, addr, ptep, nr, flags); + else + contpte_clear_young_dirty_ptes(vma, addr, ptep, nr, flags); +} + +#else /* CONFIG_ARM64_CONTPTE */ + +#define ptep_get __ptep_get +#define set_pte __set_pte +#define set_ptes __set_ptes +#define pte_clear __pte_clear +#define clear_full_ptes __clear_full_ptes +#define get_and_clear_full_ptes __get_and_clear_full_ptes +#define __HAVE_ARCH_PTEP_GET_AND_CLEAR +#define ptep_get_and_clear __ptep_get_and_clear +#define __HAVE_ARCH_PTEP_TEST_AND_CLEAR_YOUNG +#define ptep_test_and_clear_young __ptep_test_and_clear_young +#define __HAVE_ARCH_PTEP_CLEAR_YOUNG_FLUSH +#define ptep_clear_flush_young __ptep_clear_flush_young +#define __HAVE_ARCH_PTEP_SET_WRPROTECT +#define ptep_set_wrprotect __ptep_set_wrprotect +#define wrprotect_ptes __wrprotect_ptes +#define __HAVE_ARCH_PTEP_SET_ACCESS_FLAGS +#define ptep_set_access_flags __ptep_set_access_flags +#define clear_young_dirty_ptes __clear_young_dirty_ptes + +#endif /* CONFIG_ARM64_CONTPTE */ + #endif /* !__ASSEMBLY__ */ #endif /* __ASM_PGTABLE_H */ diff --git a/arch/arm64/include/asm/pkeys.h b/arch/arm64/include/asm/pkeys.h new file mode 100644 index 000000000000..0ca5f83ce148 --- /dev/null +++ b/arch/arm64/include/asm/pkeys.h @@ -0,0 +1,106 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Copyright (C) 2023 Arm Ltd. + * + * Based on arch/x86/include/asm/pkeys.h + */ + +#ifndef _ASM_ARM64_PKEYS_H +#define _ASM_ARM64_PKEYS_H + +#define ARCH_VM_PKEY_FLAGS (VM_PKEY_BIT0 | VM_PKEY_BIT1 | VM_PKEY_BIT2) + +#define arch_max_pkey() 8 + +int arch_set_user_pkey_access(struct task_struct *tsk, int pkey, + unsigned long init_val); + +static inline bool arch_pkeys_enabled(void) +{ + return system_supports_poe(); +} + +static inline int vma_pkey(struct vm_area_struct *vma) +{ + return (vma->vm_flags & ARCH_VM_PKEY_FLAGS) >> VM_PKEY_SHIFT; +} + +static inline int arch_override_mprotect_pkey(struct vm_area_struct *vma, + int prot, int pkey) +{ + if (pkey != -1) + return pkey; + + return vma_pkey(vma); +} + +static inline int execute_only_pkey(struct mm_struct *mm) +{ + // Execute-only mappings are handled by EPAN/FEAT_PAN3. + return -1; +} + +#define mm_pkey_allocation_map(mm) (mm)->context.pkey_allocation_map +#define mm_set_pkey_allocated(mm, pkey) do { \ + mm_pkey_allocation_map(mm) |= (1U << pkey); \ +} while (0) +#define mm_set_pkey_free(mm, pkey) do { \ + mm_pkey_allocation_map(mm) &= ~(1U << pkey); \ +} while (0) + +static inline bool mm_pkey_is_allocated(struct mm_struct *mm, int pkey) +{ + /* + * "Allocated" pkeys are those that have been returned + * from pkey_alloc() or pkey 0 which is allocated + * implicitly when the mm is created. + */ + if (pkey < 0 || pkey >= arch_max_pkey()) + return false; + + return mm_pkey_allocation_map(mm) & (1U << pkey); +} + +/* + * Returns a positive, 3-bit key on success, or -1 on failure. + */ +static inline int mm_pkey_alloc(struct mm_struct *mm) +{ + /* + * Note: this is the one and only place we make sure + * that the pkey is valid as far as the hardware is + * concerned. The rest of the kernel trusts that + * only good, valid pkeys come out of here. + */ + u8 all_pkeys_mask = GENMASK(arch_max_pkey() - 1, 0); + int ret; + + if (!arch_pkeys_enabled()) + return -1; + + /* + * Are we out of pkeys? We must handle this specially + * because ffz() behavior is undefined if there are no + * zeros. + */ + if (mm_pkey_allocation_map(mm) == all_pkeys_mask) + return -1; + + ret = ffz(mm_pkey_allocation_map(mm)); + + mm_set_pkey_allocated(mm, ret); + + return ret; +} + +static inline int mm_pkey_free(struct mm_struct *mm, int pkey) +{ + if (!mm_pkey_is_allocated(mm, pkey)) + return -EINVAL; + + mm_set_pkey_free(mm, pkey); + + return 0; +} + +#endif /* _ASM_ARM64_PKEYS_H */ diff --git a/arch/arm64/include/asm/pointer_auth.h b/arch/arm64/include/asm/pointer_auth.h index efb098de3a84..d2e0306e65d3 100644 --- a/arch/arm64/include/asm/pointer_auth.h +++ b/arch/arm64/include/asm/pointer_auth.h @@ -10,6 +10,13 @@ #include <asm/memory.h> #include <asm/sysreg.h> +/* + * The EL0/EL1 pointer bits used by a pointer authentication code. + * This is dependent on TBI0/TBI1 being enabled, or bits 63:56 would also apply. + */ +#define ptrauth_user_pac_mask() GENMASK_ULL(54, vabits_actual) +#define ptrauth_kernel_pac_mask() GENMASK_ULL(63, vabits_actual) + #define PR_PAC_ENABLED_KEYS_MASK \ (PR_PAC_APIAKEY | PR_PAC_APIBKEY | PR_PAC_APDAKEY | PR_PAC_APDBKEY) @@ -97,11 +104,6 @@ extern int ptrauth_set_enabled_keys(struct task_struct *tsk, unsigned long keys, unsigned long enabled); extern int ptrauth_get_enabled_keys(struct task_struct *tsk); -static inline unsigned long ptrauth_strip_insn_pac(unsigned long ptr) -{ - return ptrauth_clear_pac(ptr); -} - static __always_inline void ptrauth_enable(void) { if (!system_supports_address_auth()) @@ -133,7 +135,6 @@ static __always_inline void ptrauth_enable(void) #define ptrauth_prctl_reset_keys(tsk, arg) (-EINVAL) #define ptrauth_set_enabled_keys(tsk, keys, enabled) (-EINVAL) #define ptrauth_get_enabled_keys(tsk) (-EINVAL) -#define ptrauth_strip_insn_pac(lr) (lr) #define ptrauth_suspend_exit() #define ptrauth_thread_init_user() #define ptrauth_thread_switch_user(tsk) diff --git a/arch/arm64/include/asm/por.h b/arch/arm64/include/asm/por.h new file mode 100644 index 000000000000..d913d5b529e4 --- /dev/null +++ b/arch/arm64/include/asm/por.h @@ -0,0 +1,34 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Copyright (C) 2023 Arm Ltd. + */ + +#ifndef _ASM_ARM64_POR_H +#define _ASM_ARM64_POR_H + +#include <asm/sysreg.h> + +#define POR_EL0_INIT POR_ELx_PERM_PREP(0, POE_RWX) + +static inline bool por_elx_allows_read(u64 por, u8 pkey) +{ + u8 perm = POR_ELx_PERM_GET(pkey, por); + + return perm & POE_R; +} + +static inline bool por_elx_allows_write(u64 por, u8 pkey) +{ + u8 perm = POR_ELx_PERM_GET(pkey, por); + + return perm & POE_W; +} + +static inline bool por_elx_allows_exec(u64 por, u8 pkey) +{ + u8 perm = POR_ELx_PERM_GET(pkey, por); + + return perm & POE_X; +} + +#endif /* _ASM_ARM64_POR_H */ diff --git a/arch/arm64/include/asm/probes.h b/arch/arm64/include/asm/probes.h index 006946745352..d49368886309 100644 --- a/arch/arm64/include/asm/probes.h +++ b/arch/arm64/include/asm/probes.h @@ -9,21 +9,18 @@ #include <asm/insn.h> -typedef u32 probe_opcode_t; typedef void (probes_handler_t) (u32 opcode, long addr, struct pt_regs *); -/* architecture specific copy of original instruction */ struct arch_probe_insn { - probe_opcode_t *insn; - pstate_check_t *pstate_cc; probes_handler_t *handler; - /* restore address after step xol */ - unsigned long restore; }; #ifdef CONFIG_KPROBES -typedef u32 kprobe_opcode_t; +typedef __le32 kprobe_opcode_t; struct arch_specific_insn { struct arch_probe_insn api; + kprobe_opcode_t *xol_insn; + /* restore address after step xol */ + unsigned long xol_restore; }; #endif diff --git a/arch/arm64/include/asm/processor.h b/arch/arm64/include/asm/processor.h index 86eb0bfe3b38..61d62bfd5a7b 100644 --- a/arch/arm64/include/asm/processor.h +++ b/arch/arm64/include/asm/processor.h @@ -23,6 +23,8 @@ #define MTE_CTRL_TCF_ASYNC (1UL << 17) #define MTE_CTRL_TCF_ASYMM (1UL << 18) +#define MTE_CTRL_STORE_ONLY (1UL << 19) + #ifndef __ASSEMBLY__ #include <linux/build_bug.h> @@ -122,6 +124,12 @@ enum vec_type { ARM64_VEC_MAX, }; +enum fp_type { + FP_STATE_CURRENT, /* Save based on current task state. */ + FP_STATE_FPSIMD, + FP_STATE_SVE, +}; + struct cpu_context { unsigned long x19; unsigned long x20; @@ -149,17 +157,23 @@ struct thread_struct { struct { unsigned long tp_value; /* TLS register */ unsigned long tp2_value; + u64 fpmr; + unsigned long pad; struct user_fpsimd_state fpsimd_state; } uw; + enum fp_type fp_type; /* registers FPSIMD or SVE? */ unsigned int fpsimd_cpu; void *sve_state; /* SVE registers, if any */ - void *za_state; /* ZA register, if any */ + void *sme_state; /* ZA and ZT state, if any */ unsigned int vl[ARM64_VEC_MAX]; /* vector length */ unsigned int vl_onexec[ARM64_VEC_MAX]; /* vl after next exec */ unsigned long fault_address; /* fault info */ unsigned long fault_code; /* ESR_EL1 value */ struct debug_info debug; /* debugging */ + + struct user_fpsimd_state kernel_fpsimd_state; + unsigned int kernel_fpsimd_cpu; #ifdef CONFIG_ARM64_PTR_AUTH struct ptrauth_keys_user keys_user; #ifdef CONFIG_ARM64_PTR_AUTH_KERNEL @@ -172,6 +186,14 @@ struct thread_struct { u64 sctlr_user; u64 svcr; u64 tpidr2_el0; + u64 por_el0; +#ifdef CONFIG_ARM64_GCS + unsigned int gcs_el0_mode; + unsigned int gcs_el0_locked; + u64 gcspr_el0; + u64 gcs_base; + u64 gcs_size; +#endif }; static inline unsigned int thread_get_vl(struct thread_struct *thread, @@ -243,6 +265,8 @@ static inline void arch_thread_struct_whitelist(unsigned long *offset, BUILD_BUG_ON(sizeof_field(struct thread_struct, uw) != sizeof_field(struct thread_struct, uw.tp_value) + sizeof_field(struct thread_struct, uw.tp2_value) + + sizeof_field(struct thread_struct, uw.fpmr) + + sizeof_field(struct thread_struct, uw.pad) + sizeof_field(struct thread_struct, uw.fpsimd_state)); *offset = offsetof(struct thread_struct, uw); @@ -270,22 +294,44 @@ void tls_preserve_current_state(void); .fpsimd_cpu = NR_CPUS, \ } -static inline void start_thread_common(struct pt_regs *regs, unsigned long pc) +static inline void start_thread_common(struct pt_regs *regs, unsigned long pc, + unsigned long pstate) { - s32 previous_syscall = regs->syscallno; - memset(regs, 0, sizeof(*regs)); - regs->syscallno = previous_syscall; - regs->pc = pc; + /* + * Ensure all GPRs are zeroed, and initialize PC + PSTATE. + * The SP (or compat SP) will be initialized later. + */ + regs->user_regs = (struct user_pt_regs) { + .pc = pc, + .pstate = pstate, + }; + /* + * To allow the syscalls:sys_exit_execve tracepoint we need to preserve + * syscallno, but do not need orig_x0 or the original GPRs. + */ + regs->orig_x0 = 0; + + /* + * An exec from a kernel thread won't have an existing PMR value. + */ if (system_uses_irq_prio_masking()) - regs->pmr_save = GIC_PRIO_IRQON; + regs->pmr = GIC_PRIO_IRQON; + + /* + * The pt_regs::stackframe field must remain valid throughout this + * function as a stacktrace can be taken at any time. Any user or + * kernel task should have a valid final frame. + */ + WARN_ON_ONCE(regs->stackframe.record.fp != 0); + WARN_ON_ONCE(regs->stackframe.record.lr != 0); + WARN_ON_ONCE(regs->stackframe.type != FRAME_META_TYPE_FINAL); } static inline void start_thread(struct pt_regs *regs, unsigned long pc, unsigned long sp) { - start_thread_common(regs, pc); - regs->pstate = PSR_MODE_EL0t; + start_thread_common(regs, pc, PSR_MODE_EL0t); spectre_v4_enable_task_mitigation(current); regs->sp = sp; } @@ -294,27 +340,25 @@ static inline void start_thread(struct pt_regs *regs, unsigned long pc, static inline void compat_start_thread(struct pt_regs *regs, unsigned long pc, unsigned long sp) { - start_thread_common(regs, pc); - regs->pstate = PSR_AA32_MODE_USR; + unsigned long pstate = PSR_AA32_MODE_USR; if (pc & 1) - regs->pstate |= PSR_AA32_T_BIT; - -#ifdef __AARCH64EB__ - regs->pstate |= PSR_AA32_E_BIT; -#endif + pstate |= PSR_AA32_T_BIT; + if (IS_ENABLED(CONFIG_CPU_BIG_ENDIAN)) + pstate |= PSR_AA32_E_BIT; + start_thread_common(regs, pc, pstate); spectre_v4_enable_task_mitigation(current); regs->compat_sp = sp; } #endif -static inline bool is_ttbr0_addr(unsigned long addr) +static __always_inline bool is_ttbr0_addr(unsigned long addr) { /* entry assembly clears tags for TTBR0 addrs */ return addr < TASK_SIZE; } -static inline bool is_ttbr1_addr(unsigned long addr) +static __always_inline bool is_ttbr1_addr(unsigned long addr) { /* TTBR1 addresses may have a tag if KASAN_SW_TAGS is in use */ return arch_kasan_reset_tag(addr) >= PAGE_OFFSET; @@ -323,9 +367,6 @@ static inline bool is_ttbr1_addr(unsigned long addr) /* Forward declaration, a strange C thing */ struct task_struct; -/* Free all resources held by a thread. */ -extern void release_thread(struct task_struct *); - unsigned long __get_wchan(struct task_struct *p); void update_sctlr_el1(u64 sctlr); @@ -355,14 +396,6 @@ static inline void prefetchw(const void *ptr) asm volatile("prfm pstl1keep, %a0\n" : : "p" (ptr)); } -#define ARCH_HAS_SPINLOCK_PREFETCH -static inline void spin_lock_prefetch(const void *ptr) -{ - asm volatile(ARM64_LSE_ATOMIC_INSN( - "prfm pstl1strm, %a0", - "nop") : : "p" (ptr)); -} - extern unsigned long __ro_after_init signal_minsigstksz; /* sigframe size */ extern void __init minsigstksz_setup(void); @@ -399,18 +432,10 @@ long get_tagged_addr_ctrl(struct task_struct *task); #define GET_TAGGED_ADDR_CTRL() get_tagged_addr_ctrl(current) #endif -/* - * For CONFIG_GCC_PLUGIN_STACKLEAK - * - * These need to be macros because otherwise we get stuck in a nightmare - * of header definitions for the use of task_stack_page. - */ - -/* - * The top of the current task's task stack - */ -#define current_top_of_stack() ((unsigned long)current->stack + THREAD_SIZE) -#define on_thread_stack() (on_task_stack(current, current_stack_pointer, 1, NULL)) +int get_tsc_mode(unsigned long adr); +int set_tsc_mode(unsigned int val); +#define GET_TSC_CTL(adr) get_tsc_mode((adr)) +#define SET_TSC_CTL(val) set_tsc_mode((val)) #endif /* __ASSEMBLY__ */ #endif /* __ASM_PROCESSOR_H */ diff --git a/arch/arm64/include/asm/ptdump.h b/arch/arm64/include/asm/ptdump.h index b1dd7ecff7ef..fded5358641f 100644 --- a/arch/arm64/include/asm/ptdump.h +++ b/arch/arm64/include/asm/ptdump.h @@ -5,7 +5,9 @@ #ifndef __ASM_PTDUMP_H #define __ASM_PTDUMP_H -#ifdef CONFIG_PTDUMP_CORE +#include <linux/ptdump.h> + +#ifdef CONFIG_PTDUMP #include <linux/mm_types.h> #include <linux/seq_file.h> @@ -21,20 +23,65 @@ struct ptdump_info { unsigned long base_addr; }; +struct ptdump_prot_bits { + ptdesc_t mask; + ptdesc_t val; + const char *set; + const char *clear; +}; + +struct ptdump_pg_level { + const struct ptdump_prot_bits *bits; + char name[4]; + int num; + ptdesc_t mask; +}; + +/* + * The page dumper groups page table entries of the same type into a single + * description. It uses pg_state to track the range information while + * iterating over the pte entries. When the continuity is broken it then + * dumps out a description of the range. + */ +struct ptdump_pg_state { + struct ptdump_state ptdump; + struct ptdump_pg_level *pg_level; + struct seq_file *seq; + const struct addr_marker *marker; + const struct mm_struct *mm; + unsigned long start_address; + int level; + ptdesc_t current_prot; + bool check_wx; + unsigned long wx_pages; + unsigned long uxn_pages; +}; + void ptdump_walk(struct seq_file *s, struct ptdump_info *info); +void note_page(struct ptdump_state *pt_st, unsigned long addr, int level, + pteval_t val); +void note_page_pte(struct ptdump_state *st, unsigned long addr, pte_t pte); +void note_page_pmd(struct ptdump_state *st, unsigned long addr, pmd_t pmd); +void note_page_pud(struct ptdump_state *st, unsigned long addr, pud_t pud); +void note_page_p4d(struct ptdump_state *st, unsigned long addr, p4d_t p4d); +void note_page_pgd(struct ptdump_state *st, unsigned long addr, pgd_t pgd); +void note_page_flush(struct ptdump_state *st); #ifdef CONFIG_PTDUMP_DEBUGFS +#define EFI_RUNTIME_MAP_END DEFAULT_MAP_WINDOW_64 void __init ptdump_debugfs_register(struct ptdump_info *info, const char *name); #else static inline void ptdump_debugfs_register(struct ptdump_info *info, const char *name) { } -#endif -void ptdump_check_wx(void); -#endif /* CONFIG_PTDUMP_CORE */ - -#ifdef CONFIG_DEBUG_WX -#define debug_checkwx() ptdump_check_wx() +#endif /* CONFIG_PTDUMP_DEBUGFS */ #else -#define debug_checkwx() do { } while (0) -#endif +static inline void note_page(struct ptdump_state *pt_st, unsigned long addr, + int level, pteval_t val) { } +static inline void note_page_pte(struct ptdump_state *st, unsigned long addr, pte_t pte) { } +static inline void note_page_pmd(struct ptdump_state *st, unsigned long addr, pmd_t pmd) { } +static inline void note_page_pud(struct ptdump_state *st, unsigned long addr, pud_t pud) { } +static inline void note_page_p4d(struct ptdump_state *st, unsigned long addr, p4d_t p4d) { } +static inline void note_page_pgd(struct ptdump_state *st, unsigned long addr, pgd_t pgd) { } +static inline void note_page_flush(struct ptdump_state *st) { } +#endif /* CONFIG_PTDUMP */ #endif /* __ASM_PTDUMP_H */ diff --git a/arch/arm64/include/asm/ptrace.h b/arch/arm64/include/asm/ptrace.h index 41b332c054ab..47ff8654c5ec 100644 --- a/arch/arm64/include/asm/ptrace.h +++ b/arch/arm64/include/asm/ptrace.h @@ -21,35 +21,12 @@ #define INIT_PSTATE_EL2 \ (PSR_D_BIT | PSR_A_BIT | PSR_I_BIT | PSR_F_BIT | PSR_MODE_EL2h) -/* - * PMR values used to mask/unmask interrupts. - * - * GIC priority masking works as follows: if an IRQ's priority is a higher value - * than the value held in PMR, that IRQ is masked. Lowering the value of PMR - * means masking more IRQs (or at least that the same IRQs remain masked). - * - * To mask interrupts, we clear the most significant bit of PMR. - * - * Some code sections either automatically switch back to PSR.I or explicitly - * require to not use priority masking. If bit GIC_PRIO_PSR_I_SET is included - * in the priority mask, it indicates that PSR.I should be set and - * interrupt disabling temporarily does not rely on IRQ priorities. - */ -#define GIC_PRIO_IRQON 0xe0 -#define __GIC_PRIO_IRQOFF (GIC_PRIO_IRQON & ~0x80) -#define __GIC_PRIO_IRQOFF_NS 0xa0 -#define GIC_PRIO_PSR_I_SET (1 << 4) - -#define GIC_PRIO_IRQOFF \ - ({ \ - extern struct static_key_false gic_nonsecure_priorities;\ - u8 __prio = __GIC_PRIO_IRQOFF; \ - \ - if (static_branch_unlikely(&gic_nonsecure_priorities)) \ - __prio = __GIC_PRIO_IRQOFF_NS; \ - \ - __prio; \ - }) +#include <linux/irqchip/arm-gic-v3-prio.h> + +#define GIC_PRIO_IRQON GICV3_PRIO_UNMASKED +#define GIC_PRIO_IRQOFF GICV3_PRIO_IRQ + +#define GIC_PRIO_PSR_I_SET GICV3_PRIO_PSR_I_SET /* Additional SPSR bits not exposed in the UABI */ #define PSR_MODE_THREAD_BIT (1 << 0) @@ -121,6 +98,8 @@ #include <linux/bug.h> #include <linux/types.h> +#include <asm/stacktrace/frame.h> + /* sizeof(struct user) for AArch32 */ #define COMPAT_USER_SZ 296 @@ -172,8 +151,7 @@ static inline unsigned long pstate_to_compat_psr(const unsigned long pstate) /* * This struct defines the way the registers are stored on the stack during an - * exception. Note that sizeof(struct pt_regs) has to be a multiple of 16 (for - * stack alignment). struct user_pt_regs must form a prefix of struct pt_regs. + * exception. struct user_pt_regs must form a prefix of struct pt_regs. */ struct pt_regs { union { @@ -186,23 +164,20 @@ struct pt_regs { }; }; u64 orig_x0; -#ifdef __AARCH64EB__ - u32 unused2; - s32 syscallno; -#else s32 syscallno; - u32 unused2; -#endif + u32 pmr; + u64 sdei_ttbr1; - /* Only valid when ARM64_HAS_IRQ_PRIO_MASKING is enabled. */ - u64 pmr_save; - u64 stackframe[2]; + struct frame_record_meta stackframe; /* Only valid for some EL1 exceptions. */ u64 lockdep_hardirqs; u64 exit_rcu; }; +/* For correct stack alignment, pt_regs has to be a multiple of 16 bytes. */ +static_assert(IS_ALIGNED(sizeof(struct pt_regs), 16)); + static inline bool in_syscall(struct pt_regs const *regs) { return regs->syscallno != NO_SYSCALL; @@ -236,7 +211,7 @@ static inline void forget_syscall(struct pt_regs *regs) #define irqs_priority_unmasked(regs) \ (system_uses_irq_prio_masking() ? \ - (regs)->pmr_save == GIC_PRIO_IRQON : \ + (regs)->pmr == GIC_PRIO_IRQON : \ true) #define interrupts_enabled(regs) \ diff --git a/arch/arm64/include/asm/rqspinlock.h b/arch/arm64/include/asm/rqspinlock.h new file mode 100644 index 000000000000..9ea0a74e5892 --- /dev/null +++ b/arch/arm64/include/asm/rqspinlock.h @@ -0,0 +1,93 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _ASM_RQSPINLOCK_H +#define _ASM_RQSPINLOCK_H + +#include <asm/barrier.h> + +/* + * Hardcode res_smp_cond_load_acquire implementations for arm64 to a custom + * version based on [0]. In rqspinlock code, our conditional expression involves + * checking the value _and_ additionally a timeout. However, on arm64, the + * WFE-based implementation may never spin again if no stores occur to the + * locked byte in the lock word. As such, we may be stuck forever if + * event-stream based unblocking is not available on the platform for WFE spin + * loops (arch_timer_evtstrm_available). + * + * Once support for smp_cond_load_acquire_timewait [0] lands, we can drop this + * copy-paste. + * + * While we rely on the implementation to amortize the cost of sampling + * cond_expr for us, it will not happen when event stream support is + * unavailable, time_expr check is amortized. This is not the common case, and + * it would be difficult to fit our logic in the time_expr_ns >= time_limit_ns + * comparison, hence just let it be. In case of event-stream, the loop is woken + * up at microsecond granularity. + * + * [0]: https://lore.kernel.org/lkml/20250203214911.898276-1-ankur.a.arora@oracle.com + */ + +#ifndef smp_cond_load_acquire_timewait + +#define smp_cond_time_check_count 200 + +#define __smp_cond_load_relaxed_spinwait(ptr, cond_expr, time_expr_ns, \ + time_limit_ns) ({ \ + typeof(ptr) __PTR = (ptr); \ + __unqual_scalar_typeof(*ptr) VAL; \ + unsigned int __count = 0; \ + for (;;) { \ + VAL = READ_ONCE(*__PTR); \ + if (cond_expr) \ + break; \ + cpu_relax(); \ + if (__count++ < smp_cond_time_check_count) \ + continue; \ + if ((time_expr_ns) >= (time_limit_ns)) \ + break; \ + __count = 0; \ + } \ + (typeof(*ptr))VAL; \ +}) + +#define __smp_cond_load_acquire_timewait(ptr, cond_expr, \ + time_expr_ns, time_limit_ns) \ +({ \ + typeof(ptr) __PTR = (ptr); \ + __unqual_scalar_typeof(*ptr) VAL; \ + for (;;) { \ + VAL = smp_load_acquire(__PTR); \ + if (cond_expr) \ + break; \ + __cmpwait_relaxed(__PTR, VAL); \ + if ((time_expr_ns) >= (time_limit_ns)) \ + break; \ + } \ + (typeof(*ptr))VAL; \ +}) + +#define smp_cond_load_acquire_timewait(ptr, cond_expr, \ + time_expr_ns, time_limit_ns) \ +({ \ + __unqual_scalar_typeof(*ptr) _val; \ + int __wfe = arch_timer_evtstrm_available(); \ + \ + if (likely(__wfe)) { \ + _val = __smp_cond_load_acquire_timewait(ptr, cond_expr, \ + time_expr_ns, \ + time_limit_ns); \ + } else { \ + _val = __smp_cond_load_relaxed_spinwait(ptr, cond_expr, \ + time_expr_ns, \ + time_limit_ns); \ + smp_acquire__after_ctrl_dep(); \ + } \ + (typeof(*ptr))_val; \ +}) + +#endif + +#define res_smp_cond_load_acquire(v, c) smp_cond_load_acquire_timewait(v, c, 0, 1) + +#include <asm-generic/rqspinlock.h> + +#endif /* _ASM_RQSPINLOCK_H */ diff --git a/arch/arm64/include/asm/rsi.h b/arch/arm64/include/asm/rsi.h new file mode 100644 index 000000000000..b42aeac05340 --- /dev/null +++ b/arch/arm64/include/asm/rsi.h @@ -0,0 +1,70 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* + * Copyright (C) 2024 ARM Ltd. + */ + +#ifndef __ASM_RSI_H_ +#define __ASM_RSI_H_ + +#include <linux/errno.h> +#include <linux/jump_label.h> +#include <asm/rsi_cmds.h> + +#define RSI_PDEV_NAME "arm-cca-dev" + +DECLARE_STATIC_KEY_FALSE(rsi_present); + +void __init arm64_rsi_init(void); + +bool __arm64_is_protected_mmio(phys_addr_t base, size_t size); + +static inline bool is_realm_world(void) +{ + return static_branch_unlikely(&rsi_present); +} + +static inline int rsi_set_memory_range(phys_addr_t start, phys_addr_t end, + enum ripas state, unsigned long flags) +{ + unsigned long ret; + phys_addr_t top; + + while (start != end) { + ret = rsi_set_addr_range_state(start, end, state, flags, &top); + if (ret || top < start || top > end) + return -EINVAL; + start = top; + } + + return 0; +} + +/* + * Convert the specified range to RAM. Do not use this if you rely on the + * contents of a page that may already be in RAM state. + */ +static inline int rsi_set_memory_range_protected(phys_addr_t start, + phys_addr_t end) +{ + return rsi_set_memory_range(start, end, RSI_RIPAS_RAM, + RSI_CHANGE_DESTROYED); +} + +/* + * Convert the specified range to RAM. Do not convert any pages that may have + * been DESTROYED, without our permission. + */ +static inline int rsi_set_memory_range_protected_safe(phys_addr_t start, + phys_addr_t end) +{ + return rsi_set_memory_range(start, end, RSI_RIPAS_RAM, + RSI_NO_CHANGE_DESTROYED); +} + +static inline int rsi_set_memory_range_shared(phys_addr_t start, + phys_addr_t end) +{ + return rsi_set_memory_range(start, end, RSI_RIPAS_EMPTY, + RSI_CHANGE_DESTROYED); +} +#endif /* __ASM_RSI_H_ */ diff --git a/arch/arm64/include/asm/rsi_cmds.h b/arch/arm64/include/asm/rsi_cmds.h new file mode 100644 index 000000000000..2c8763876dfb --- /dev/null +++ b/arch/arm64/include/asm/rsi_cmds.h @@ -0,0 +1,162 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* + * Copyright (C) 2023 ARM Ltd. + */ + +#ifndef __ASM_RSI_CMDS_H +#define __ASM_RSI_CMDS_H + +#include <linux/arm-smccc.h> +#include <linux/string.h> +#include <asm/memory.h> + +#include <asm/rsi_smc.h> + +#define RSI_GRANULE_SHIFT 12 +#define RSI_GRANULE_SIZE (_AC(1, UL) << RSI_GRANULE_SHIFT) + +enum ripas { + RSI_RIPAS_EMPTY = 0, + RSI_RIPAS_RAM = 1, + RSI_RIPAS_DESTROYED = 2, + RSI_RIPAS_DEV = 3, +}; + +static inline unsigned long rsi_request_version(unsigned long req, + unsigned long *out_lower, + unsigned long *out_higher) +{ + struct arm_smccc_res res; + + arm_smccc_smc(SMC_RSI_ABI_VERSION, req, 0, 0, 0, 0, 0, 0, &res); + + if (out_lower) + *out_lower = res.a1; + if (out_higher) + *out_higher = res.a2; + + return res.a0; +} + +static inline unsigned long rsi_get_realm_config(struct realm_config *cfg) +{ + struct arm_smccc_res res; + + arm_smccc_smc(SMC_RSI_REALM_CONFIG, virt_to_phys(cfg), + 0, 0, 0, 0, 0, 0, &res); + return res.a0; +} + +static inline unsigned long rsi_ipa_state_get(phys_addr_t start, + phys_addr_t end, + enum ripas *state, + phys_addr_t *top) +{ + struct arm_smccc_res res; + + arm_smccc_smc(SMC_RSI_IPA_STATE_GET, + start, end, 0, 0, 0, 0, 0, + &res); + + if (res.a0 == RSI_SUCCESS) { + if (top) + *top = res.a1; + if (state) + *state = res.a2; + } + + return res.a0; +} + +static inline long rsi_set_addr_range_state(phys_addr_t start, + phys_addr_t end, + enum ripas state, + unsigned long flags, + phys_addr_t *top) +{ + struct arm_smccc_res res; + + arm_smccc_smc(SMC_RSI_IPA_STATE_SET, start, end, state, + flags, 0, 0, 0, &res); + + if (top) + *top = res.a1; + + if (res.a2 != RSI_ACCEPT) + return -EPERM; + + return res.a0; +} + +/** + * rsi_attestation_token_init - Initialise the operation to retrieve an + * attestation token. + * + * @challenge: The challenge data to be used in the attestation token + * generation. + * @size: Size of the challenge data in bytes. + * + * Initialises the attestation token generation and returns an upper bound + * on the attestation token size that can be used to allocate an adequate + * buffer. The caller is expected to subsequently call + * rsi_attestation_token_continue() to retrieve the attestation token data on + * the same CPU. + * + * Returns: + * On success, returns the upper limit of the attestation report size. + * Otherwise, -EINVAL + */ +static inline long +rsi_attestation_token_init(const u8 *challenge, unsigned long size) +{ + struct arm_smccc_1_2_regs regs = { 0 }; + + /* The challenge must be at least 32bytes and at most 64bytes */ + if (!challenge || size < 32 || size > 64) + return -EINVAL; + + regs.a0 = SMC_RSI_ATTESTATION_TOKEN_INIT; + memcpy(®s.a1, challenge, size); + arm_smccc_1_2_smc(®s, ®s); + + if (regs.a0 == RSI_SUCCESS) + return regs.a1; + + return -EINVAL; +} + +/** + * rsi_attestation_token_continue - Continue the operation to retrieve an + * attestation token. + * + * @granule: {I}PA of the Granule to which the token will be written. + * @offset: Offset within Granule to start of buffer in bytes. + * @size: The size of the buffer. + * @len: The number of bytes written to the buffer. + * + * Retrieves up to a RSI_GRANULE_SIZE worth of token data per call. The caller + * is expected to call rsi_attestation_token_init() before calling this + * function to retrieve the attestation token. + * + * Return: + * * %RSI_SUCCESS - Attestation token retrieved successfully. + * * %RSI_INCOMPLETE - Token generation is not complete. + * * %RSI_ERROR_INPUT - A parameter was not valid. + * * %RSI_ERROR_STATE - Attestation not in progress. + */ +static inline unsigned long rsi_attestation_token_continue(phys_addr_t granule, + unsigned long offset, + unsigned long size, + unsigned long *len) +{ + struct arm_smccc_res res; + + arm_smccc_1_1_invoke(SMC_RSI_ATTESTATION_TOKEN_CONTINUE, + granule, offset, size, 0, &res); + + if (len) + *len = res.a1; + return res.a0; +} + +#endif /* __ASM_RSI_CMDS_H */ diff --git a/arch/arm64/include/asm/rsi_smc.h b/arch/arm64/include/asm/rsi_smc.h new file mode 100644 index 000000000000..6cb070eca9e9 --- /dev/null +++ b/arch/arm64/include/asm/rsi_smc.h @@ -0,0 +1,193 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* + * Copyright (C) 2023 ARM Ltd. + */ + +#ifndef __ASM_RSI_SMC_H_ +#define __ASM_RSI_SMC_H_ + +#include <linux/arm-smccc.h> + +/* + * This file describes the Realm Services Interface (RSI) Application Binary + * Interface (ABI) for SMC calls made from within the Realm to the RMM and + * serviced by the RMM. + */ + +/* + * The major version number of the RSI implementation. This is increased when + * the binary format or semantics of the SMC calls change. + */ +#define RSI_ABI_VERSION_MAJOR UL(1) + +/* + * The minor version number of the RSI implementation. This is increased when + * a bug is fixed, or a feature is added without breaking binary compatibility. + */ +#define RSI_ABI_VERSION_MINOR UL(0) + +#define RSI_ABI_VERSION ((RSI_ABI_VERSION_MAJOR << 16) | \ + RSI_ABI_VERSION_MINOR) + +#define RSI_ABI_VERSION_GET_MAJOR(_version) ((_version) >> 16) +#define RSI_ABI_VERSION_GET_MINOR(_version) ((_version) & 0xFFFF) + +#define RSI_SUCCESS UL(0) +#define RSI_ERROR_INPUT UL(1) +#define RSI_ERROR_STATE UL(2) +#define RSI_INCOMPLETE UL(3) +#define RSI_ERROR_UNKNOWN UL(4) + +#define SMC_RSI_FID(n) ARM_SMCCC_CALL_VAL(ARM_SMCCC_FAST_CALL, \ + ARM_SMCCC_SMC_64, \ + ARM_SMCCC_OWNER_STANDARD, \ + n) + +/* + * Returns RSI version. + * + * arg1 == Requested interface revision + * ret0 == Status / error + * ret1 == Lower implemented interface revision + * ret2 == Higher implemented interface revision + */ +#define SMC_RSI_ABI_VERSION SMC_RSI_FID(0x190) + +/* + * Read feature register. + * + * arg1 == Feature register index + * ret0 == Status / error + * ret1 == Feature register value + */ +#define SMC_RSI_FEATURES SMC_RSI_FID(0x191) + +/* + * Read measurement for the current Realm. + * + * arg1 == Index, which measurements slot to read + * ret0 == Status / error + * ret1 == Measurement value, bytes: 0 - 7 + * ret2 == Measurement value, bytes: 8 - 15 + * ret3 == Measurement value, bytes: 16 - 23 + * ret4 == Measurement value, bytes: 24 - 31 + * ret5 == Measurement value, bytes: 32 - 39 + * ret6 == Measurement value, bytes: 40 - 47 + * ret7 == Measurement value, bytes: 48 - 55 + * ret8 == Measurement value, bytes: 56 - 63 + */ +#define SMC_RSI_MEASUREMENT_READ SMC_RSI_FID(0x192) + +/* + * Extend Realm Extensible Measurement (REM) value. + * + * arg1 == Index, which measurements slot to extend + * arg2 == Size of realm measurement in bytes, max 64 bytes + * arg3 == Measurement value, bytes: 0 - 7 + * arg4 == Measurement value, bytes: 8 - 15 + * arg5 == Measurement value, bytes: 16 - 23 + * arg6 == Measurement value, bytes: 24 - 31 + * arg7 == Measurement value, bytes: 32 - 39 + * arg8 == Measurement value, bytes: 40 - 47 + * arg9 == Measurement value, bytes: 48 - 55 + * arg10 == Measurement value, bytes: 56 - 63 + * ret0 == Status / error + */ +#define SMC_RSI_MEASUREMENT_EXTEND SMC_RSI_FID(0x193) + +/* + * Initialize the operation to retrieve an attestation token. + * + * arg1 == Challenge value, bytes: 0 - 7 + * arg2 == Challenge value, bytes: 8 - 15 + * arg3 == Challenge value, bytes: 16 - 23 + * arg4 == Challenge value, bytes: 24 - 31 + * arg5 == Challenge value, bytes: 32 - 39 + * arg6 == Challenge value, bytes: 40 - 47 + * arg7 == Challenge value, bytes: 48 - 55 + * arg8 == Challenge value, bytes: 56 - 63 + * ret0 == Status / error + * ret1 == Upper bound of token size in bytes + */ +#define SMC_RSI_ATTESTATION_TOKEN_INIT SMC_RSI_FID(0x194) + +/* + * Continue the operation to retrieve an attestation token. + * + * arg1 == The IPA of token buffer + * arg2 == Offset within the granule of the token buffer + * arg3 == Size of the granule buffer + * ret0 == Status / error + * ret1 == Length of token bytes copied to the granule buffer + */ +#define SMC_RSI_ATTESTATION_TOKEN_CONTINUE SMC_RSI_FID(0x195) + +#ifndef __ASSEMBLY__ + +struct realm_config { + union { + struct { + unsigned long ipa_bits; /* Width of IPA in bits */ + unsigned long hash_algo; /* Hash algorithm */ + }; + u8 pad[0x200]; + }; + union { + u8 rpv[64]; /* Realm Personalization Value */ + u8 pad2[0xe00]; + }; + /* + * The RMM requires the configuration structure to be aligned to a 4k + * boundary, ensure this happens by aligning this structure. + */ +} __aligned(0x1000); + +#endif /* __ASSEMBLY__ */ + +/* + * Read configuration for the current Realm. + * + * arg1 == struct realm_config addr + * ret0 == Status / error + */ +#define SMC_RSI_REALM_CONFIG SMC_RSI_FID(0x196) + +/* + * Request RIPAS of a target IPA range to be changed to a specified value. + * + * arg1 == Base IPA address of target region + * arg2 == Top of the region + * arg3 == RIPAS value + * arg4 == flags + * ret0 == Status / error + * ret1 == Top of modified IPA range + * ret2 == Whether the Host accepted or rejected the request + */ +#define SMC_RSI_IPA_STATE_SET SMC_RSI_FID(0x197) + +#define RSI_NO_CHANGE_DESTROYED UL(0) +#define RSI_CHANGE_DESTROYED UL(1) + +#define RSI_ACCEPT UL(0) +#define RSI_REJECT UL(1) + +/* + * Get RIPAS of a target IPA range. + * + * arg1 == Base IPA of target region + * arg2 == End of target IPA region + * ret0 == Status / error + * ret1 == Top of IPA region which has the reported RIPAS value + * ret2 == RIPAS value + */ +#define SMC_RSI_IPA_STATE_GET SMC_RSI_FID(0x198) + +/* + * Make a Host call. + * + * arg1 == IPA of host call structure + * ret0 == Status / error + */ +#define SMC_RSI_HOST_CALL SMC_RSI_FID(0x199) + +#endif /* __ASM_RSI_SMC_H_ */ diff --git a/arch/arm64/include/asm/runtime-const.h b/arch/arm64/include/asm/runtime-const.h new file mode 100644 index 000000000000..be5915669d23 --- /dev/null +++ b/arch/arm64/include/asm/runtime-const.h @@ -0,0 +1,88 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _ASM_RUNTIME_CONST_H +#define _ASM_RUNTIME_CONST_H + +#include <asm/cacheflush.h> + +/* Sigh. You can still run arm64 in BE mode */ +#include <asm/byteorder.h> + +#define runtime_const_ptr(sym) ({ \ + typeof(sym) __ret; \ + asm_inline("1:\t" \ + "movz %0, #0xcdef\n\t" \ + "movk %0, #0x89ab, lsl #16\n\t" \ + "movk %0, #0x4567, lsl #32\n\t" \ + "movk %0, #0x0123, lsl #48\n\t" \ + ".pushsection runtime_ptr_" #sym ",\"a\"\n\t" \ + ".long 1b - .\n\t" \ + ".popsection" \ + :"=r" (__ret)); \ + __ret; }) + +#define runtime_const_shift_right_32(val, sym) ({ \ + unsigned long __ret; \ + asm_inline("1:\t" \ + "lsr %w0,%w1,#12\n\t" \ + ".pushsection runtime_shift_" #sym ",\"a\"\n\t" \ + ".long 1b - .\n\t" \ + ".popsection" \ + :"=r" (__ret) \ + :"r" (0u+(val))); \ + __ret; }) + +#define runtime_const_init(type, sym) do { \ + extern s32 __start_runtime_##type##_##sym[]; \ + extern s32 __stop_runtime_##type##_##sym[]; \ + runtime_const_fixup(__runtime_fixup_##type, \ + (unsigned long)(sym), \ + __start_runtime_##type##_##sym, \ + __stop_runtime_##type##_##sym); \ +} while (0) + +/* 16-bit immediate for wide move (movz and movk) in bits 5..20 */ +static inline void __runtime_fixup_16(__le32 *p, unsigned int val) +{ + u32 insn = le32_to_cpu(*p); + insn &= 0xffe0001f; + insn |= (val & 0xffff) << 5; + *p = cpu_to_le32(insn); +} + +static inline void __runtime_fixup_caches(void *where, unsigned int insns) +{ + unsigned long va = (unsigned long)where; + caches_clean_inval_pou(va, va + 4*insns); +} + +static inline void __runtime_fixup_ptr(void *where, unsigned long val) +{ + __le32 *p = lm_alias(where); + __runtime_fixup_16(p, val); + __runtime_fixup_16(p+1, val >> 16); + __runtime_fixup_16(p+2, val >> 32); + __runtime_fixup_16(p+3, val >> 48); + __runtime_fixup_caches(where, 4); +} + +/* Immediate value is 6 bits starting at bit #16 */ +static inline void __runtime_fixup_shift(void *where, unsigned long val) +{ + __le32 *p = lm_alias(where); + u32 insn = le32_to_cpu(*p); + insn &= 0xffc0ffff; + insn |= (val & 63) << 16; + *p = cpu_to_le32(insn); + __runtime_fixup_caches(where, 1); +} + +static inline void runtime_const_fixup(void (*fn)(void *, unsigned long), + unsigned long val, s32 *start, s32 *end) +{ + while (start < end) { + fn(*start + (void *)start, val); + start++; + } +} + +#endif diff --git a/arch/arm64/include/asm/rwonce.h b/arch/arm64/include/asm/rwonce.h index 56f7b1d4d54b..97d9256d33c9 100644 --- a/arch/arm64/include/asm/rwonce.h +++ b/arch/arm64/include/asm/rwonce.h @@ -12,16 +12,12 @@ #ifndef BUILD_VDSO -#ifdef CONFIG_AS_HAS_LDAPR #define __LOAD_RCPC(sfx, regs...) \ ALTERNATIVE( \ "ldar" #sfx "\t" #regs, \ ".arch_extension rcpc\n" \ "ldapr" #sfx "\t" #regs, \ ARM64_HAS_LDAPR) -#else -#define __LOAD_RCPC(sfx, regs...) "ldar" #sfx "\t" #regs -#endif /* CONFIG_AS_HAS_LDAPR */ /* * When building with LTO, there is an increased risk of the compiler diff --git a/arch/arm64/include/asm/scs.h b/arch/arm64/include/asm/scs.h index 8297bccf0784..a76f9b387a26 100644 --- a/arch/arm64/include/asm/scs.h +++ b/arch/arm64/include/asm/scs.h @@ -5,25 +5,56 @@ #ifdef __ASSEMBLY__ #include <asm/asm-offsets.h> +#include <asm/sysreg.h> #ifdef CONFIG_SHADOW_CALL_STACK scs_sp .req x18 - .macro scs_load tsk - ldr scs_sp, [\tsk, #TSK_TI_SCS_SP] + .macro scs_load_current + get_current_task scs_sp + ldr scs_sp, [scs_sp, #TSK_TI_SCS_SP] .endm .macro scs_save tsk str scs_sp, [\tsk, #TSK_TI_SCS_SP] .endm #else - .macro scs_load tsk + .macro scs_load_current .endm .macro scs_save tsk .endm #endif /* CONFIG_SHADOW_CALL_STACK */ + +#else + +#include <linux/scs.h> +#include <asm/cpufeature.h> + +#ifdef CONFIG_UNWIND_PATCH_PAC_INTO_SCS +static inline void dynamic_scs_init(void) +{ + extern bool __pi_dynamic_scs_is_enabled; + + if (__pi_dynamic_scs_is_enabled) { + pr_info("Enabling dynamic shadow call stack\n"); + static_branch_enable(&dynamic_scs_enabled); + } +} +#else +static inline void dynamic_scs_init(void) {} +#endif + +enum { + EDYNSCS_INVALID_CIE_HEADER = 1, + EDYNSCS_INVALID_CIE_SDATA_SIZE = 2, + EDYNSCS_INVALID_FDE_AUGM_DATA_SIZE = 3, + EDYNSCS_INVALID_CFA_OPCODE = 4, +}; + +int __pi_scs_patch(const u8 eh_frame[], int size); + #endif /* __ASSEMBLY __ */ #endif /* _ASM_SCS_H */ diff --git a/arch/arm64/include/asm/sdei.h b/arch/arm64/include/asm/sdei.h index 7bea1d705dd6..484cb6972e99 100644 --- a/arch/arm64/include/asm/sdei.h +++ b/arch/arm64/include/asm/sdei.h @@ -17,6 +17,9 @@ #include <asm/virt.h> +DECLARE_PER_CPU(struct sdei_registered_event *, sdei_active_normal_event); +DECLARE_PER_CPU(struct sdei_registered_event *, sdei_active_critical_event); + extern unsigned long sdei_exit_mode; /* Software Delegated Exception entry point from firmware*/ @@ -29,6 +32,9 @@ asmlinkage void __sdei_asm_entry_trampoline(unsigned long event_num, unsigned long pc, unsigned long pstate); +/* Abort a running handler. Context is discarded. */ +void __sdei_handler_abort(void); + /* * The above entry point does the minimum to call C code. This function does * anything else, before calling the driver. @@ -43,22 +49,5 @@ unsigned long do_sdei_event(struct pt_regs *regs, unsigned long sdei_arch_get_entry_point(int conduit); #define sdei_arch_get_entry_point(x) sdei_arch_get_entry_point(x) -struct stack_info; - -bool _on_sdei_stack(unsigned long sp, unsigned long size, - struct stack_info *info); -static inline bool on_sdei_stack(unsigned long sp, unsigned long size, - struct stack_info *info) -{ - if (!IS_ENABLED(CONFIG_VMAP_STACK)) - return false; - if (!IS_ENABLED(CONFIG_ARM_SDE_INTERFACE)) - return false; - if (in_nmi()) - return _on_sdei_stack(sp, size, info); - - return false; -} - #endif /* __ASSEMBLY__ */ #endif /* __ASM_SDEI_H */ diff --git a/arch/arm64/include/asm/seccomp.h b/arch/arm64/include/asm/seccomp.h index 30256233788b..bf6bf40bc5ab 100644 --- a/arch/arm64/include/asm/seccomp.h +++ b/arch/arm64/include/asm/seccomp.h @@ -8,13 +8,13 @@ #ifndef _ASM_SECCOMP_H #define _ASM_SECCOMP_H -#include <asm/unistd.h> +#include <asm/unistd_compat_32.h> #ifdef CONFIG_COMPAT -#define __NR_seccomp_read_32 __NR_compat_read -#define __NR_seccomp_write_32 __NR_compat_write -#define __NR_seccomp_exit_32 __NR_compat_exit -#define __NR_seccomp_sigreturn_32 __NR_compat_rt_sigreturn +#define __NR_seccomp_read_32 __NR_compat32_read +#define __NR_seccomp_write_32 __NR_compat32_write +#define __NR_seccomp_exit_32 __NR_compat32_exit +#define __NR_seccomp_sigreturn_32 __NR_compat32_rt_sigreturn #endif /* CONFIG_COMPAT */ #include <asm-generic/seccomp.h> @@ -24,7 +24,7 @@ #define SECCOMP_ARCH_NATIVE_NAME "aarch64" #ifdef CONFIG_COMPAT # define SECCOMP_ARCH_COMPAT AUDIT_ARCH_ARM -# define SECCOMP_ARCH_COMPAT_NR __NR_compat_syscalls +# define SECCOMP_ARCH_COMPAT_NR __NR_compat32_syscalls # define SECCOMP_ARCH_COMPAT_NAME "arm" #endif diff --git a/arch/arm64/include/asm/sections.h b/arch/arm64/include/asm/sections.h index 40971ac1303f..51b0d594239e 100644 --- a/arch/arm64/include/asm/sections.h +++ b/arch/arm64/include/asm/sections.h @@ -11,6 +11,7 @@ extern char __alt_instructions[], __alt_instructions_end[]; extern char __hibernate_exit_text_start[], __hibernate_exit_text_end[]; extern char __hyp_idmap_text_start[], __hyp_idmap_text_end[]; extern char __hyp_text_start[], __hyp_text_end[]; +extern char __hyp_data_start[], __hyp_data_end[]; extern char __hyp_rodata_start[], __hyp_rodata_end[]; extern char __hyp_reloc_begin[], __hyp_reloc_end[]; extern char __hyp_bss_start[], __hyp_bss_end[]; diff --git a/arch/arm64/include/asm/semihost.h b/arch/arm64/include/asm/semihost.h new file mode 100644 index 000000000000..87e353dab868 --- /dev/null +++ b/arch/arm64/include/asm/semihost.h @@ -0,0 +1,24 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Copyright (C) 2012 ARM Ltd. + * Author: Marc Zyngier <marc.zyngier@arm.com> + * + * Adapted for ARM and earlycon: + * Copyright (C) 2014 Linaro Ltd. + * Author: Rob Herring <robh@kernel.org> + */ + +#ifndef _ARM64_SEMIHOST_H_ +#define _ARM64_SEMIHOST_H_ + +struct uart_port; + +static inline void smh_putc(struct uart_port *port, unsigned char c) +{ + asm volatile("mov x1, %0\n" + "mov x0, #3\n" + "hlt 0xf000\n" + : : "r" (&c) : "x0", "x1", "memory"); +} + +#endif /* _ARM64_SEMIHOST_H_ */ diff --git a/arch/arm64/include/asm/set_memory.h b/arch/arm64/include/asm/set_memory.h index 0f740b781187..90f61b17275e 100644 --- a/arch/arm64/include/asm/set_memory.h +++ b/arch/arm64/include/asm/set_memory.h @@ -3,6 +3,7 @@ #ifndef _ASM_ARM64_SET_MEMORY_H #define _ASM_ARM64_SET_MEMORY_H +#include <asm/mem_encrypt.h> #include <asm-generic/set_memory.h> bool can_set_direct_map(void); @@ -12,6 +13,10 @@ int set_memory_valid(unsigned long addr, int numpages, int enable); int set_direct_map_invalid_noflush(struct page *page); int set_direct_map_default_noflush(struct page *page); +int set_direct_map_valid_noflush(struct page *page, unsigned nr, bool valid); bool kernel_page_present(struct page *page); +int set_memory_encrypted(unsigned long addr, int numpages); +int set_memory_decrypted(unsigned long addr, int numpages); + #endif /* _ASM_ARM64_SET_MEMORY_H */ diff --git a/arch/arm64/include/asm/setup.h b/arch/arm64/include/asm/setup.h index 6437df661700..ba269a7a3201 100644 --- a/arch/arm64/include/asm/setup.h +++ b/arch/arm64/include/asm/setup.h @@ -3,10 +3,9 @@ #ifndef __ARM64_ASM_SETUP_H #define __ARM64_ASM_SETUP_H -#include <uapi/asm/setup.h> +#include <linux/string.h> -void *get_early_fdt_ptr(void); -void early_fdt_map(u64 dt_phys); +#include <uapi/asm/setup.h> /* * These two variables are used in the head.S file. @@ -14,4 +13,32 @@ void early_fdt_map(u64 dt_phys); extern phys_addr_t __fdt_pointer __initdata; extern u64 __cacheline_aligned boot_args[4]; +static inline bool arch_parse_debug_rodata(char *arg) +{ + extern bool rodata_enabled; + extern bool rodata_full; + + if (!arg) + return false; + + if (!strcmp(arg, "full")) { + rodata_enabled = rodata_full = true; + return true; + } + + if (!strcmp(arg, "off")) { + rodata_enabled = rodata_full = false; + return true; + } + + if (!strcmp(arg, "on")) { + rodata_enabled = true; + rodata_full = false; + return true; + } + + return false; +} +#define arch_parse_debug_rodata arch_parse_debug_rodata + #endif diff --git a/arch/arm64/include/asm/simd.h b/arch/arm64/include/asm/simd.h index 6a75d7ecdcaa..8e86c9e70e48 100644 --- a/arch/arm64/include/asm/simd.h +++ b/arch/arm64/include/asm/simd.h @@ -12,8 +12,6 @@ #include <linux/preempt.h> #include <linux/types.h> -DECLARE_PER_CPU(bool, fpsimd_context_busy); - #ifdef CONFIG_KERNEL_MODE_NEON /* @@ -28,17 +26,10 @@ static __must_check inline bool may_use_simd(void) /* * We must make sure that the SVE has been initialized properly * before using the SIMD in kernel. - * fpsimd_context_busy is only set while preemption is disabled, - * and is clear whenever preemption is enabled. Since - * this_cpu_read() is atomic w.r.t. preemption, fpsimd_context_busy - * cannot change under our feet -- if it's set we cannot be - * migrated, and if it's clear we cannot be migrated to a CPU - * where it is set. */ return !WARN_ON(!system_capabilities_finalized()) && system_supports_fpsimd() && - !in_hardirq() && !irqs_disabled() && !in_nmi() && - !this_cpu_read(fpsimd_context_busy); + !in_hardirq() && !irqs_disabled() && !in_nmi(); } #else /* ! CONFIG_KERNEL_MODE_NEON */ diff --git a/arch/arm64/include/asm/smp.h b/arch/arm64/include/asm/smp.h index fc55f5a57a06..d48ef6d5abcc 100644 --- a/arch/arm64/include/asm/smp.h +++ b/arch/arm64/include/asm/smp.h @@ -25,22 +25,11 @@ #ifndef __ASSEMBLY__ -#include <asm/percpu.h> - #include <linux/threads.h> #include <linux/cpumask.h> #include <linux/thread_info.h> -DECLARE_PER_CPU_READ_MOSTLY(int, cpu_number); - -/* - * We don't use this_cpu_read(cpu_number) as that has implicit writes to - * preempt_count, and associated (compiler) barriers, that we'd like to avoid - * the expense of. If we're preemptible, the value can be stale at use anyway. - * And we can't use this_cpu_ptr() either, as that winds up recursing back - * here under CONFIG_DEBUG_PREEMPT=y. - */ -#define raw_smp_processor_id() (*raw_cpu_ptr(&cpu_number)) +#define raw_smp_processor_id() (current_thread_info()->cpu) /* * Logical CPU mapping. @@ -61,10 +50,32 @@ struct seq_file; */ extern void smp_init_cpus(void); +enum ipi_msg_type { + IPI_RESCHEDULE, + IPI_CALL_FUNC, + IPI_CPU_STOP, + IPI_CPU_STOP_NMI, + IPI_TIMER, + IPI_IRQ_WORK, + NR_IPI, + /* + * Any enum >= NR_IPI and < MAX_IPI is special and not tracable + * with trace_ipi_* + */ + IPI_CPU_BACKTRACE = NR_IPI, + IPI_KGDB_ROUNDUP, + MAX_IPI +}; + /* * Register IPI interrupts with the arch SMP code */ -extern void set_smp_ipi_range(int ipi_base, int nr_ipi); +extern void set_smp_ipi_range_percpu(int ipi_base, int nr_ipi, int ncpus); + +static inline void set_smp_ipi_range(int ipi_base, int n) +{ + set_smp_ipi_range_percpu(ipi_base, n, 0); +} /* * Called from the secondary holding pen, this is the secondary CPU entry point. @@ -89,9 +100,9 @@ extern void arch_send_call_function_single_ipi(int cpu); extern void arch_send_call_function_ipi_mask(const struct cpumask *mask); #ifdef CONFIG_ARM64_ACPI_PARKING_PROTOCOL -extern void arch_send_wakeup_ipi_mask(const struct cpumask *mask); +extern void arch_send_wakeup_ipi(unsigned int cpu); #else -static inline void arch_send_wakeup_ipi_mask(const struct cpumask *mask) +static inline void arch_send_wakeup_ipi(unsigned int cpu) { BUILD_BUG(); } @@ -99,11 +110,11 @@ static inline void arch_send_wakeup_ipi_mask(const struct cpumask *mask) extern int __cpu_disable(void); -extern void __cpu_die(unsigned int cpu); -extern void cpu_die(void); -extern void cpu_die_early(void); +static inline void __cpu_die(unsigned int cpu) { } +extern void __noreturn cpu_die(void); +extern void __noreturn cpu_die_early(void); -static inline void cpu_park_loop(void) +static inline void __noreturn cpu_park_loop(void) { for (;;) { wfe(); @@ -123,7 +134,7 @@ static inline void update_cpu_boot_status(int val) * which calls for a kernel panic. Update the boot status and park the calling * CPU. */ -static inline void cpu_panic_kernel(void) +static inline void __noreturn cpu_panic_kernel(void) { update_cpu_boot_status(CPU_PANIC_KERNEL); cpu_park_loop(); @@ -143,7 +154,6 @@ bool cpus_are_stuck_in_kernel(void); extern void crash_smp_send_stop(void); extern bool smp_crash_stop_failed(void); -extern void panic_smp_self_stop(void); #endif /* ifndef __ASSEMBLY__ */ diff --git a/arch/arm64/include/asm/sparsemem.h b/arch/arm64/include/asm/sparsemem.h index 4b73463423c3..84783efdc9d1 100644 --- a/arch/arm64/include/asm/sparsemem.h +++ b/arch/arm64/include/asm/sparsemem.h @@ -5,12 +5,15 @@ #ifndef __ASM_SPARSEMEM_H #define __ASM_SPARSEMEM_H -#define MAX_PHYSMEM_BITS CONFIG_ARM64_PA_BITS +#include <asm/pgtable-prot.h> + +#define MAX_PHYSMEM_BITS PHYS_MASK_SHIFT +#define MAX_POSSIBLE_PHYSMEM_BITS (52) /* * Section size must be at least 512MB for 64K base * page size config. Otherwise it will be less than - * (MAX_ORDER - 1) and the build process will fail. + * MAX_PAGE_ORDER and the build process will fail. */ #ifdef CONFIG_ARM64_64K_PAGES #define SECTION_SIZE_BITS 29 diff --git a/arch/arm64/include/asm/spectre.h b/arch/arm64/include/asm/spectre.h index aa3d3607d5c8..8fef12626090 100644 --- a/arch/arm64/include/asm/spectre.h +++ b/arch/arm64/include/asm/spectre.h @@ -13,8 +13,8 @@ #define __BP_HARDEN_HYP_VECS_SZ ((BP_HARDEN_EL2_SLOTS - 1) * SZ_2K) #ifndef __ASSEMBLY__ - -#include <linux/percpu.h> +#include <linux/smp.h> +#include <asm/percpu.h> #include <asm/cpufeature.h> #include <asm/virt.h> @@ -26,6 +26,7 @@ enum mitigation_state { SPECTRE_VULNERABLE, }; +struct pt_regs; struct task_struct; /* @@ -72,7 +73,7 @@ static __always_inline void arm64_apply_bp_hardening(void) { struct bp_hardening_data *d; - if (!cpus_have_const_cap(ARM64_SPECTRE_V2)) + if (!alternative_has_cap_unlikely(ARM64_SPECTRE_V2)) return; d = this_cpu_ptr(&bp_hardening_data); @@ -96,7 +97,26 @@ enum mitigation_state arm64_get_meltdown_state(void); enum mitigation_state arm64_get_spectre_bhb_state(void); bool is_spectre_bhb_affected(const struct arm64_cpu_capabilities *entry, int scope); -u8 spectre_bhb_loop_affected(int scope); +extern bool __nospectre_bhb; +u8 get_spectre_bhb_loop_value(void); +bool is_spectre_bhb_fw_mitigated(void); void spectre_bhb_enable_mitigation(const struct arm64_cpu_capabilities *__unused); +bool try_emulate_el1_ssbs(struct pt_regs *regs, u32 instr); + +void spectre_v4_patch_fw_mitigation_enable(struct alt_instr *alt, __le32 *origptr, + __le32 *updptr, int nr_inst); +void smccc_patch_fw_mitigation_conduit(struct alt_instr *alt, __le32 *origptr, + __le32 *updptr, int nr_inst); +void spectre_bhb_patch_loop_mitigation_enable(struct alt_instr *alt, __le32 *origptr, + __le32 *updptr, int nr_inst); +void spectre_bhb_patch_fw_mitigation_enabled(struct alt_instr *alt, __le32 *origptr, + __le32 *updptr, int nr_inst); +void spectre_bhb_patch_loop_iter(struct alt_instr *alt, + __le32 *origptr, __le32 *updptr, int nr_inst); +void spectre_bhb_patch_wa3(struct alt_instr *alt, + __le32 *origptr, __le32 *updptr, int nr_inst); +void spectre_bhb_patch_clearbhb(struct alt_instr *alt, + __le32 *origptr, __le32 *updptr, int nr_inst); + #endif /* __ASSEMBLY__ */ #endif /* __ASM_SPECTRE_H */ diff --git a/arch/arm64/include/asm/spinlock_types.h b/arch/arm64/include/asm/spinlock_types.h index 11ab1c077697..7cde3d8bd0ad 100644 --- a/arch/arm64/include/asm/spinlock_types.h +++ b/arch/arm64/include/asm/spinlock_types.h @@ -6,7 +6,7 @@ #define __ASM_SPINLOCK_TYPES_H #if !defined(__LINUX_SPINLOCK_TYPES_RAW_H) && !defined(__ASM_SPINLOCK_H) -# error "please don't include this file directly" +# error "Please do not include this file directly." #endif #include <asm-generic/qspinlock_types.h> diff --git a/arch/arm64/include/asm/stackprotector.h b/arch/arm64/include/asm/stackprotector.h index 33f1bb453150..ae3ad80f51fe 100644 --- a/arch/arm64/include/asm/stackprotector.h +++ b/arch/arm64/include/asm/stackprotector.h @@ -13,8 +13,6 @@ #ifndef __ASM_STACKPROTECTOR_H #define __ASM_STACKPROTECTOR_H -#include <linux/random.h> -#include <linux/version.h> #include <asm/pointer_auth.h> extern unsigned long __stack_chk_guard; @@ -28,12 +26,7 @@ extern unsigned long __stack_chk_guard; static __always_inline void boot_init_stack_canary(void) { #if defined(CONFIG_STACKPROTECTOR) - unsigned long canary; - - /* Try to get a semi random initial value. */ - get_random_bytes(&canary, sizeof(canary)); - canary ^= LINUX_VERSION_CODE; - canary &= CANARY_MASK; + unsigned long canary = get_random_canary(); current->stack_canary = canary; if (!IS_ENABLED(CONFIG_STACKPROTECTOR_PER_TASK)) diff --git a/arch/arm64/include/asm/stacktrace.h b/arch/arm64/include/asm/stacktrace.h index aec9315bf156..6d3280932bf5 100644 --- a/arch/arm64/include/asm/stacktrace.h +++ b/arch/arm64/include/asm/stacktrace.h @@ -8,111 +8,113 @@ #include <linux/percpu.h> #include <linux/sched.h> #include <linux/sched/task_stack.h> -#include <linux/types.h> #include <linux/llist.h> #include <asm/memory.h> +#include <asm/pointer_auth.h> #include <asm/ptrace.h> #include <asm/sdei.h> -enum stack_type { - STACK_TYPE_UNKNOWN, - STACK_TYPE_TASK, - STACK_TYPE_IRQ, - STACK_TYPE_OVERFLOW, - STACK_TYPE_SDEI_NORMAL, - STACK_TYPE_SDEI_CRITICAL, - __NR_STACK_TYPES -}; - -struct stack_info { - unsigned long low; - unsigned long high; - enum stack_type type; -}; +#include <asm/stacktrace/common.h> extern void dump_backtrace(struct pt_regs *regs, struct task_struct *tsk, const char *loglvl); DECLARE_PER_CPU(unsigned long *, irq_stack_ptr); -static inline bool on_stack(unsigned long sp, unsigned long size, - unsigned long low, unsigned long high, - enum stack_type type, struct stack_info *info) -{ - if (!low) - return false; - - if (sp < low || sp + size < sp || sp + size > high) - return false; - - if (info) { - info->low = low; - info->high = high; - info->type = type; - } - return true; -} - -static inline bool on_irq_stack(unsigned long sp, unsigned long size, - struct stack_info *info) +static inline struct stack_info stackinfo_get_irq(void) { unsigned long low = (unsigned long)raw_cpu_read(irq_stack_ptr); unsigned long high = low + IRQ_STACK_SIZE; - return on_stack(sp, size, low, high, STACK_TYPE_IRQ, info); + return (struct stack_info) { + .low = low, + .high = high, + }; } -static inline bool on_task_stack(const struct task_struct *tsk, - unsigned long sp, unsigned long size, - struct stack_info *info) +static inline bool on_irq_stack(unsigned long sp, unsigned long size) +{ + struct stack_info info = stackinfo_get_irq(); + return stackinfo_on_stack(&info, sp, size); +} + +static inline struct stack_info stackinfo_get_task(const struct task_struct *tsk) { unsigned long low = (unsigned long)task_stack_page(tsk); unsigned long high = low + THREAD_SIZE; - return on_stack(sp, size, low, high, STACK_TYPE_TASK, info); + return (struct stack_info) { + .low = low, + .high = high, + }; } -#ifdef CONFIG_VMAP_STACK +static inline bool on_task_stack(const struct task_struct *tsk, + unsigned long sp, unsigned long size) +{ + struct stack_info info = stackinfo_get_task(tsk); + return stackinfo_on_stack(&info, sp, size); +} + +#define on_thread_stack() (on_task_stack(current, current_stack_pointer, 1)) + DECLARE_PER_CPU(unsigned long [OVERFLOW_STACK_SIZE/sizeof(long)], overflow_stack); -static inline bool on_overflow_stack(unsigned long sp, unsigned long size, - struct stack_info *info) +static inline struct stack_info stackinfo_get_overflow(void) { unsigned long low = (unsigned long)raw_cpu_ptr(overflow_stack); unsigned long high = low + OVERFLOW_STACK_SIZE; - return on_stack(sp, size, low, high, STACK_TYPE_OVERFLOW, info); + return (struct stack_info) { + .low = low, + .high = high, + }; +} + +#if defined(CONFIG_ARM_SDE_INTERFACE) +DECLARE_PER_CPU(unsigned long *, sdei_stack_normal_ptr); +DECLARE_PER_CPU(unsigned long *, sdei_stack_critical_ptr); + +static inline struct stack_info stackinfo_get_sdei_normal(void) +{ + unsigned long low = (unsigned long)raw_cpu_read(sdei_stack_normal_ptr); + unsigned long high = low + SDEI_STACK_SIZE; + + return (struct stack_info) { + .low = low, + .high = high, + }; +} + +static inline struct stack_info stackinfo_get_sdei_critical(void) +{ + unsigned long low = (unsigned long)raw_cpu_read(sdei_stack_critical_ptr); + unsigned long high = low + SDEI_STACK_SIZE; + + return (struct stack_info) { + .low = low, + .high = high, + }; } #else -static inline bool on_overflow_stack(unsigned long sp, unsigned long size, - struct stack_info *info) { return false; } +#define stackinfo_get_sdei_normal() stackinfo_get_unknown() +#define stackinfo_get_sdei_critical() stackinfo_get_unknown() #endif +#ifdef CONFIG_EFI +extern u64 *efi_rt_stack_top; -/* - * We can only safely access per-cpu stacks from current in a non-preemptible - * context. - */ -static inline bool on_accessible_stack(const struct task_struct *tsk, - unsigned long sp, unsigned long size, - struct stack_info *info) +static inline struct stack_info stackinfo_get_efi(void) { - if (info) - info->type = STACK_TYPE_UNKNOWN; - - if (on_task_stack(tsk, sp, size, info)) - return true; - if (tsk != current || preemptible()) - return false; - if (on_irq_stack(sp, size, info)) - return true; - if (on_overflow_stack(sp, size, info)) - return true; - if (on_sdei_stack(sp, size, info)) - return true; - - return false; + unsigned long high = (u64)efi_rt_stack_top; + unsigned long low = high - THREAD_SIZE; + + return (struct stack_info) { + .low = low, + .high = high, + }; } +#endif #endif /* __ASM_STACKTRACE_H */ diff --git a/arch/arm64/include/asm/stacktrace/common.h b/arch/arm64/include/asm/stacktrace/common.h new file mode 100644 index 000000000000..821a8fdd31af --- /dev/null +++ b/arch/arm64/include/asm/stacktrace/common.h @@ -0,0 +1,171 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* + * Common arm64 stack unwinder code. + * + * See: arch/arm64/kernel/stacktrace.c for the reference implementation. + * + * Copyright (C) 2012 ARM Ltd. + */ +#ifndef __ASM_STACKTRACE_COMMON_H +#define __ASM_STACKTRACE_COMMON_H + +#include <linux/types.h> + +struct stack_info { + unsigned long low; + unsigned long high; +}; + +/** + * struct unwind_state - state used for robust unwinding. + * + * @fp: The fp value in the frame record (or the real fp) + * @pc: The lr value in the frame record (or the real lr) + * + * @stack: The stack currently being unwound. + * @stacks: An array of stacks which can be unwound. + * @nr_stacks: The number of stacks in @stacks. + */ +struct unwind_state { + unsigned long fp; + unsigned long pc; + + struct stack_info stack; + struct stack_info *stacks; + int nr_stacks; +}; + +static inline struct stack_info stackinfo_get_unknown(void) +{ + return (struct stack_info) { + .low = 0, + .high = 0, + }; +} + +static inline bool stackinfo_on_stack(const struct stack_info *info, + unsigned long sp, unsigned long size) +{ + if (!info->low) + return false; + + if (sp < info->low || sp + size < sp || sp + size > info->high) + return false; + + return true; +} + +static inline void unwind_init_common(struct unwind_state *state) +{ + state->stack = stackinfo_get_unknown(); +} + +/** + * unwind_find_stack() - Find the accessible stack which entirely contains an + * object. + * + * @state: the current unwind state. + * @sp: the base address of the object. + * @size: the size of the object. + * + * Return: a pointer to the relevant stack_info if found; NULL otherwise. + */ +static struct stack_info *unwind_find_stack(struct unwind_state *state, + unsigned long sp, + unsigned long size) +{ + struct stack_info *info = &state->stack; + + if (stackinfo_on_stack(info, sp, size)) + return info; + + for (int i = 0; i < state->nr_stacks; i++) { + info = &state->stacks[i]; + if (stackinfo_on_stack(info, sp, size)) + return info; + } + + return NULL; +} + +/** + * unwind_consume_stack() - Update stack boundaries so that future unwind steps + * cannot consume this object again. + * + * @state: the current unwind state. + * @info: the stack_info of the stack containing the object. + * @sp: the base address of the object. + * @size: the size of the object. + * + * Return: 0 upon success, an error code otherwise. + */ +static inline void unwind_consume_stack(struct unwind_state *state, + struct stack_info *info, + unsigned long sp, + unsigned long size) +{ + struct stack_info tmp; + + /* + * Stack transitions are strictly one-way, and once we've + * transitioned from one stack to another, it's never valid to + * unwind back to the old stack. + * + * Destroy the old stack info so that it cannot be found upon a + * subsequent transition. If the stack has not changed, we'll + * immediately restore the current stack info. + * + * Note that stacks can nest in several valid orders, e.g. + * + * TASK -> IRQ -> OVERFLOW -> SDEI_NORMAL + * TASK -> SDEI_NORMAL -> SDEI_CRITICAL -> OVERFLOW + * HYP -> OVERFLOW + * + * ... so we do not check the specific order of stack + * transitions. + */ + tmp = *info; + *info = stackinfo_get_unknown(); + state->stack = tmp; + + /* + * Future unwind steps can only consume stack above this frame record. + * Update the current stack to start immediately above it. + */ + state->stack.low = sp + size; +} + +/** + * unwind_next_frame_record() - Unwind to the next frame record. + * + * @state: the current unwind state. + * + * Return: 0 upon success, an error code otherwise. + */ +static inline int +unwind_next_frame_record(struct unwind_state *state) +{ + struct stack_info *info; + struct frame_record *record; + unsigned long fp = state->fp; + + if (fp & 0x7) + return -EINVAL; + + info = unwind_find_stack(state, fp, sizeof(*record)); + if (!info) + return -EINVAL; + + unwind_consume_stack(state, info, fp, sizeof(*record)); + + /* + * Record this frame record's values. + */ + record = (struct frame_record *)fp; + state->fp = READ_ONCE(record->fp); + state->pc = READ_ONCE(record->lr); + + return 0; +} + +#endif /* __ASM_STACKTRACE_COMMON_H */ diff --git a/arch/arm64/include/asm/stacktrace/frame.h b/arch/arm64/include/asm/stacktrace/frame.h new file mode 100644 index 000000000000..0ee0f6ba0fd8 --- /dev/null +++ b/arch/arm64/include/asm/stacktrace/frame.h @@ -0,0 +1,48 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +#ifndef __ASM_STACKTRACE_FRAME_H +#define __ASM_STACKTRACE_FRAME_H + +/* + * - FRAME_META_TYPE_NONE + * + * This value is reserved. + * + * - FRAME_META_TYPE_FINAL + * + * The record is the last entry on the stack. + * Unwinding should terminate successfully. + * + * - FRAME_META_TYPE_PT_REGS + * + * The record is embedded within a struct pt_regs, recording the registers at + * an arbitrary point in time. + * Unwinding should consume pt_regs::pc, followed by pt_regs::lr. + * + * Note: all other values are reserved and should result in unwinding + * terminating with an error. + */ +#define FRAME_META_TYPE_NONE 0 +#define FRAME_META_TYPE_FINAL 1 +#define FRAME_META_TYPE_PT_REGS 2 + +#ifndef __ASSEMBLY__ +/* + * A standard AAPCS64 frame record. + */ +struct frame_record { + u64 fp; + u64 lr; +}; + +/* + * A metadata frame record indicating a special unwind. + * The record::{fp,lr} fields must be zero to indicate the presence of + * metadata. + */ +struct frame_record_meta { + struct frame_record record; + u64 type; +}; +#endif /* __ASSEMBLY */ + +#endif /* __ASM_STACKTRACE_FRAME_H */ diff --git a/arch/arm64/include/asm/stacktrace/nvhe.h b/arch/arm64/include/asm/stacktrace/nvhe.h new file mode 100644 index 000000000000..171f9edef49f --- /dev/null +++ b/arch/arm64/include/asm/stacktrace/nvhe.h @@ -0,0 +1,55 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* + * KVM nVHE hypervisor stack tracing support. + * + * The unwinder implementation depends on the nVHE mode: + * + * 1) Non-protected nVHE mode - the host can directly access the + * HYP stack pages and unwind the HYP stack in EL1. This saves having + * to allocate shared buffers for the host to read the unwinded + * stacktrace. + * + * 2) pKVM (protected nVHE) mode - the host cannot directly access + * the HYP memory. The stack is unwinded in EL2 and dumped to a shared + * buffer where the host can read and print the stacktrace. + * + * Copyright (C) 2022 Google LLC + */ +#ifndef __ASM_STACKTRACE_NVHE_H +#define __ASM_STACKTRACE_NVHE_H + +#include <asm/stacktrace/common.h> + +/** + * kvm_nvhe_unwind_init() - Start an unwind from the given nVHE HYP fp and pc + * + * @state : unwind_state to initialize + * @fp : frame pointer at which to start the unwinding. + * @pc : program counter at which to start the unwinding. + */ +static inline void kvm_nvhe_unwind_init(struct unwind_state *state, + unsigned long fp, + unsigned long pc) +{ + unwind_init_common(state); + + state->fp = fp; + state->pc = pc; +} + +#ifndef __KVM_NVHE_HYPERVISOR__ +/* + * Conventional (non-protected) nVHE HYP stack unwinder + * + * In non-protected mode, the unwinding is done from kernel proper context + * (by the host in EL1). + */ + +DECLARE_KVM_NVHE_PER_CPU(unsigned long [OVERFLOW_STACK_SIZE/sizeof(long)], overflow_stack); +DECLARE_KVM_NVHE_PER_CPU(struct kvm_nvhe_stacktrace_info, kvm_stacktrace_info); +DECLARE_PER_CPU(unsigned long, kvm_arm_hyp_stack_base); + +void kvm_nvhe_dump_backtrace(unsigned long hyp_offset); + +#endif /* __KVM_NVHE_HYPERVISOR__ */ +#endif /* __ASM_STACKTRACE_NVHE_H */ diff --git a/arch/arm64/include/asm/stage2_pgtable.h b/arch/arm64/include/asm/stage2_pgtable.h index fe341a6578c3..23d27623e478 100644 --- a/arch/arm64/include/asm/stage2_pgtable.h +++ b/arch/arm64/include/asm/stage2_pgtable.h @@ -11,13 +11,6 @@ #include <linux/pgtable.h> /* - * PGDIR_SHIFT determines the size a top-level page table entry can map - * and depends on the number of levels in the page table. Compute the - * PGDIR_SHIFT for a given number of levels. - */ -#define pt_levels_pgdir_shift(lvls) ARM64_HW_PGTABLE_LEVEL_SHIFT(4 - (lvls)) - -/* * The hardware supports concatenation of up to 16 tables at stage2 entry * level and we use the feature whenever possible, which means we resolve 4 * additional bits of address at the entry level. @@ -28,26 +21,13 @@ * (IPA_SHIFT - 4). */ #define stage2_pgtable_levels(ipa) ARM64_HW_PGTABLE_LEVELS((ipa) - 4) -#define kvm_stage2_levels(kvm) VTCR_EL2_LVLS(kvm->arch.vtcr) - -/* stage2_pgdir_shift() is the size mapped by top-level stage2 entry for the VM */ -#define stage2_pgdir_shift(kvm) pt_levels_pgdir_shift(kvm_stage2_levels(kvm)) -#define stage2_pgdir_size(kvm) (1ULL << stage2_pgdir_shift(kvm)) -#define stage2_pgdir_mask(kvm) ~(stage2_pgdir_size(kvm) - 1) +#define kvm_stage2_levels(mmu) VTCR_EL2_LVLS((mmu)->vtcr) /* * kvm_mmmu_cache_min_pages() is the number of pages required to install * a stage-2 translation. We pre-allocate the entry level page table at * the VM creation. */ -#define kvm_mmu_cache_min_pages(kvm) (kvm_stage2_levels(kvm) - 1) - -static inline phys_addr_t -stage2_pgd_addr_end(struct kvm *kvm, phys_addr_t addr, phys_addr_t end) -{ - phys_addr_t boundary = (addr + stage2_pgdir_size(kvm)) & stage2_pgdir_mask(kvm); - - return (boundary - 1 < end - 1) ? boundary : end; -} +#define kvm_mmu_cache_min_pages(mmu) (kvm_stage2_levels(mmu) - 1) #endif /* __ARM64_S2_PGTABLE_H_ */ diff --git a/arch/arm64/include/asm/syscall.h b/arch/arm64/include/asm/syscall.h index 4cfe9b49709b..712daa90e643 100644 --- a/arch/arm64/include/asm/syscall.h +++ b/arch/arm64/include/asm/syscall.h @@ -61,6 +61,22 @@ static inline void syscall_set_return_value(struct task_struct *task, regs->regs[0] = val; } +static inline void syscall_set_nr(struct task_struct *task, + struct pt_regs *regs, + int nr) +{ + regs->syscallno = nr; + if (nr == -1) { + /* + * When the syscall number is set to -1, the syscall will be + * skipped. In this case the syscall return value has to be + * set explicitly, otherwise the first syscall argument is + * returned as the syscall return value. + */ + syscall_set_return_value(task, regs, -ENOSYS, 0); + } +} + #define SYSCALL_MAX_ARGS 6 static inline void syscall_get_arguments(struct task_struct *task, @@ -73,6 +89,19 @@ static inline void syscall_get_arguments(struct task_struct *task, memcpy(args, ®s->regs[1], 5 * sizeof(args[0])); } +static inline void syscall_set_arguments(struct task_struct *task, + struct pt_regs *regs, + const unsigned long *args) +{ + memcpy(®s->regs[0], args, 6 * sizeof(args[0])); + /* + * Also copy the first argument into orig_x0 + * so that syscall_get_arguments() would return it + * instead of the previous value. + */ + regs->orig_x0 = regs->regs[0]; +} + /* * We don't care about endianness (__AUDIT_ARCH_LE bit) here because * AArch64 has the same system calls both on little- and big- endian. @@ -85,4 +114,7 @@ static inline int syscall_get_arch(struct task_struct *task) return AUDIT_ARCH_AARCH64; } +int syscall_trace_enter(struct pt_regs *regs); +void syscall_trace_exit(struct pt_regs *regs); + #endif /* __ASM_SYSCALL_H */ diff --git a/arch/arm64/include/asm/syscall_wrapper.h b/arch/arm64/include/asm/syscall_wrapper.h index b383b4802a7b..abb57bc54305 100644 --- a/arch/arm64/include/asm/syscall_wrapper.h +++ b/arch/arm64/include/asm/syscall_wrapper.h @@ -8,7 +8,7 @@ #ifndef __ASM_SYSCALL_WRAPPER_H #define __ASM_SYSCALL_WRAPPER_H -struct pt_regs; +#include <asm/ptrace.h> #define SC_ARM64_REGS_TO_ARGS(x, ...) \ __MAP(x,__SC_ARGS \ @@ -38,14 +38,12 @@ struct pt_regs; asmlinkage long __arm64_compat_sys_##sname(const struct pt_regs *__unused) #define COND_SYSCALL_COMPAT(name) \ + asmlinkage long __arm64_compat_sys_##name(const struct pt_regs *regs); \ asmlinkage long __weak __arm64_compat_sys_##name(const struct pt_regs *regs) \ { \ return sys_ni_syscall(); \ } -#define COMPAT_SYS_NI(name) \ - SYSCALL_ALIAS(__arm64_compat_sys_##name, sys_ni_posix_timers); - #endif /* CONFIG_COMPAT */ #define __SYSCALL_DEFINEx(x, name, ...) \ @@ -73,11 +71,12 @@ struct pt_regs; asmlinkage long __arm64_sys_##sname(const struct pt_regs *__unused) #define COND_SYSCALL(name) \ + asmlinkage long __arm64_sys_##name(const struct pt_regs *regs); \ asmlinkage long __weak __arm64_sys_##name(const struct pt_regs *regs) \ { \ return sys_ni_syscall(); \ } -#define SYS_NI(name) SYSCALL_ALIAS(__arm64_sys_##name, sys_ni_posix_timers); +asmlinkage long __arm64_sys_ni_syscall(const struct pt_regs *__unused); #endif /* __ASM_SYSCALL_WRAPPER_H */ diff --git a/arch/arm64/include/asm/sysreg.h b/arch/arm64/include/asm/sysreg.h index 7c71358d44c4..6604fd6f33f4 100644 --- a/arch/arm64/include/asm/sysreg.h +++ b/arch/arm64/include/asm/sysreg.h @@ -12,6 +12,7 @@ #include <linux/bits.h> #include <linux/stringify.h> #include <linux/kasan-tags.h> +#include <linux/kconfig.h> #include <asm/gpr-num.h> @@ -90,29 +91,80 @@ */ #define pstate_field(op1, op2) ((op1) << Op1_shift | (op2) << Op2_shift) #define PSTATE_Imm_shift CRm_shift +#define SET_PSTATE(x, r) __emit_inst(0xd500401f | PSTATE_ ## r | ((!!x) << PSTATE_Imm_shift)) #define PSTATE_PAN pstate_field(0, 4) #define PSTATE_UAO pstate_field(0, 3) #define PSTATE_SSBS pstate_field(3, 1) +#define PSTATE_DIT pstate_field(3, 2) #define PSTATE_TCO pstate_field(3, 4) -#define SET_PSTATE_PAN(x) __emit_inst(0xd500401f | PSTATE_PAN | ((!!x) << PSTATE_Imm_shift)) -#define SET_PSTATE_UAO(x) __emit_inst(0xd500401f | PSTATE_UAO | ((!!x) << PSTATE_Imm_shift)) -#define SET_PSTATE_SSBS(x) __emit_inst(0xd500401f | PSTATE_SSBS | ((!!x) << PSTATE_Imm_shift)) -#define SET_PSTATE_TCO(x) __emit_inst(0xd500401f | PSTATE_TCO | ((!!x) << PSTATE_Imm_shift)) +#define SET_PSTATE_PAN(x) SET_PSTATE((x), PAN) +#define SET_PSTATE_UAO(x) SET_PSTATE((x), UAO) +#define SET_PSTATE_SSBS(x) SET_PSTATE((x), SSBS) +#define SET_PSTATE_DIT(x) SET_PSTATE((x), DIT) +#define SET_PSTATE_TCO(x) SET_PSTATE((x), TCO) #define set_pstate_pan(x) asm volatile(SET_PSTATE_PAN(x)) #define set_pstate_uao(x) asm volatile(SET_PSTATE_UAO(x)) #define set_pstate_ssbs(x) asm volatile(SET_PSTATE_SSBS(x)) +#define set_pstate_dit(x) asm volatile(SET_PSTATE_DIT(x)) -#define __SYS_BARRIER_INSN(CRm, op2, Rt) \ - __emit_inst(0xd5000000 | sys_insn(0, 3, 3, (CRm), (op2)) | ((Rt) & 0x1f)) +/* Register-based PAN access, for save/restore purposes */ +#define SYS_PSTATE_PAN sys_reg(3, 0, 4, 2, 3) -#define SB_BARRIER_INSN __SYS_BARRIER_INSN(0, 7, 31) +#define __SYS_BARRIER_INSN(op0, op1, CRn, CRm, op2, Rt) \ + __emit_inst(0xd5000000 | \ + sys_insn((op0), (op1), (CRn), (CRm), (op2)) | \ + ((Rt) & 0x1f)) +#define SB_BARRIER_INSN __SYS_BARRIER_INSN(0, 3, 3, 0, 7, 31) +#define GSB_SYS_BARRIER_INSN __SYS_BARRIER_INSN(1, 0, 12, 0, 0, 31) +#define GSB_ACK_BARRIER_INSN __SYS_BARRIER_INSN(1, 0, 12, 0, 1, 31) + +/* Data cache zero operations */ #define SYS_DC_ISW sys_insn(1, 0, 7, 6, 2) +#define SYS_DC_IGSW sys_insn(1, 0, 7, 6, 4) +#define SYS_DC_IGDSW sys_insn(1, 0, 7, 6, 6) #define SYS_DC_CSW sys_insn(1, 0, 7, 10, 2) +#define SYS_DC_CGSW sys_insn(1, 0, 7, 10, 4) +#define SYS_DC_CGDSW sys_insn(1, 0, 7, 10, 6) #define SYS_DC_CISW sys_insn(1, 0, 7, 14, 2) +#define SYS_DC_CIGSW sys_insn(1, 0, 7, 14, 4) +#define SYS_DC_CIGDSW sys_insn(1, 0, 7, 14, 6) + +#define SYS_IC_IALLUIS sys_insn(1, 0, 7, 1, 0) +#define SYS_IC_IALLU sys_insn(1, 0, 7, 5, 0) +#define SYS_IC_IVAU sys_insn(1, 3, 7, 5, 1) + +#define SYS_DC_IVAC sys_insn(1, 0, 7, 6, 1) +#define SYS_DC_IGVAC sys_insn(1, 0, 7, 6, 3) +#define SYS_DC_IGDVAC sys_insn(1, 0, 7, 6, 5) + +#define SYS_DC_CVAC sys_insn(1, 3, 7, 10, 1) +#define SYS_DC_CGVAC sys_insn(1, 3, 7, 10, 3) +#define SYS_DC_CGDVAC sys_insn(1, 3, 7, 10, 5) + +#define SYS_DC_CVAU sys_insn(1, 3, 7, 11, 1) + +#define SYS_DC_CVAP sys_insn(1, 3, 7, 12, 1) +#define SYS_DC_CGVAP sys_insn(1, 3, 7, 12, 3) +#define SYS_DC_CGDVAP sys_insn(1, 3, 7, 12, 5) + +#define SYS_DC_CVADP sys_insn(1, 3, 7, 13, 1) +#define SYS_DC_CGVADP sys_insn(1, 3, 7, 13, 3) +#define SYS_DC_CGDVADP sys_insn(1, 3, 7, 13, 5) + +#define SYS_DC_CIVAC sys_insn(1, 3, 7, 14, 1) +#define SYS_DC_CIGVAC sys_insn(1, 3, 7, 14, 3) +#define SYS_DC_CIGDVAC sys_insn(1, 3, 7, 14, 5) + +#define SYS_DC_ZVA sys_insn(1, 3, 7, 4, 1) +#define SYS_DC_GVA sys_insn(1, 3, 7, 4, 3) +#define SYS_DC_GZVA sys_insn(1, 3, 7, 4, 4) + +#define SYS_DC_CIVAPS sys_insn(1, 0, 7, 15, 1) +#define SYS_DC_CIGDVAPS sys_insn(1, 0, 7, 15, 5) /* * Automatically generated definitions for system registers, the @@ -130,25 +182,17 @@ #define SYS_SVCR_SMSTART_SM_EL0 sys_reg(0, 3, 4, 3, 3) #define SYS_SVCR_SMSTOP_SMZA_EL0 sys_reg(0, 3, 4, 6, 3) -#define SYS_OSDTRRX_EL1 sys_reg(2, 0, 0, 0, 2) -#define SYS_MDCCINT_EL1 sys_reg(2, 0, 0, 2, 0) -#define SYS_MDSCR_EL1 sys_reg(2, 0, 0, 2, 2) -#define SYS_OSDTRTX_EL1 sys_reg(2, 0, 0, 3, 2) -#define SYS_OSECCR_EL1 sys_reg(2, 0, 0, 6, 2) #define SYS_DBGBVRn_EL1(n) sys_reg(2, 0, 0, n, 4) #define SYS_DBGBCRn_EL1(n) sys_reg(2, 0, 0, n, 5) #define SYS_DBGWVRn_EL1(n) sys_reg(2, 0, 0, n, 6) #define SYS_DBGWCRn_EL1(n) sys_reg(2, 0, 0, n, 7) #define SYS_MDRAR_EL1 sys_reg(2, 0, 1, 0, 0) -#define SYS_OSLAR_EL1 sys_reg(2, 0, 1, 0, 4) -#define SYS_OSLAR_OSLK BIT(0) - #define SYS_OSLSR_EL1 sys_reg(2, 0, 1, 1, 4) -#define SYS_OSLSR_OSLM_MASK (BIT(3) | BIT(0)) -#define SYS_OSLSR_OSLM_NI 0 -#define SYS_OSLSR_OSLM_IMPLEMENTED BIT(3) -#define SYS_OSLSR_OSLK BIT(1) +#define OSLSR_EL1_OSLM_MASK (BIT(3) | BIT(0)) +#define OSLSR_EL1_OSLM_NI 0 +#define OSLSR_EL1_OSLM_IMPLEMENTED BIT(3) +#define OSLSR_EL1_OSLK BIT(1) #define SYS_OSDLR_EL1 sys_reg(2, 0, 1, 3, 4) #define SYS_DBGPRCR_EL1 sys_reg(2, 0, 1, 4, 4) @@ -161,54 +205,82 @@ #define SYS_DBGDTRTX_EL0 sys_reg(2, 3, 0, 5, 0) #define SYS_DBGVCR32_EL2 sys_reg(2, 4, 0, 7, 0) +#define SYS_BRBINF_EL1(n) sys_reg(2, 1, 8, (n & 15), (((n & 16) >> 2) | 0)) +#define SYS_BRBSRC_EL1(n) sys_reg(2, 1, 8, (n & 15), (((n & 16) >> 2) | 1)) +#define SYS_BRBTGT_EL1(n) sys_reg(2, 1, 8, (n & 15), (((n & 16) >> 2) | 2)) + +#define SYS_TRCITECR_EL1 sys_reg(3, 0, 1, 2, 3) +#define SYS_TRCACATR(m) sys_reg(2, 1, 2, ((m & 7) << 1), (2 | (m >> 3))) +#define SYS_TRCACVR(m) sys_reg(2, 1, 2, ((m & 7) << 1), (0 | (m >> 3))) +#define SYS_TRCAUTHSTATUS sys_reg(2, 1, 7, 14, 6) +#define SYS_TRCAUXCTLR sys_reg(2, 1, 0, 6, 0) +#define SYS_TRCBBCTLR sys_reg(2, 1, 0, 15, 0) +#define SYS_TRCCCCTLR sys_reg(2, 1, 0, 14, 0) +#define SYS_TRCCIDCCTLR0 sys_reg(2, 1, 3, 0, 2) +#define SYS_TRCCIDCCTLR1 sys_reg(2, 1, 3, 1, 2) +#define SYS_TRCCIDCVR(m) sys_reg(2, 1, 3, ((m & 7) << 1), 0) +#define SYS_TRCCLAIMCLR sys_reg(2, 1, 7, 9, 6) +#define SYS_TRCCLAIMSET sys_reg(2, 1, 7, 8, 6) +#define SYS_TRCCNTCTLR(m) sys_reg(2, 1, 0, (4 | (m & 3)), 5) +#define SYS_TRCCNTRLDVR(m) sys_reg(2, 1, 0, (0 | (m & 3)), 5) +#define SYS_TRCCNTVR(m) sys_reg(2, 1, 0, (8 | (m & 3)), 5) +#define SYS_TRCCONFIGR sys_reg(2, 1, 0, 4, 0) +#define SYS_TRCDEVARCH sys_reg(2, 1, 7, 15, 6) +#define SYS_TRCDEVID sys_reg(2, 1, 7, 2, 7) +#define SYS_TRCEVENTCTL0R sys_reg(2, 1, 0, 8, 0) +#define SYS_TRCEVENTCTL1R sys_reg(2, 1, 0, 9, 0) +#define SYS_TRCEXTINSELR(m) sys_reg(2, 1, 0, (8 | (m & 3)), 4) +#define SYS_TRCIDR0 sys_reg(2, 1, 0, 8, 7) +#define SYS_TRCIDR10 sys_reg(2, 1, 0, 2, 6) +#define SYS_TRCIDR11 sys_reg(2, 1, 0, 3, 6) +#define SYS_TRCIDR12 sys_reg(2, 1, 0, 4, 6) +#define SYS_TRCIDR13 sys_reg(2, 1, 0, 5, 6) +#define SYS_TRCIDR1 sys_reg(2, 1, 0, 9, 7) +#define SYS_TRCIDR2 sys_reg(2, 1, 0, 10, 7) +#define SYS_TRCIDR3 sys_reg(2, 1, 0, 11, 7) +#define SYS_TRCIDR4 sys_reg(2, 1, 0, 12, 7) +#define SYS_TRCIDR5 sys_reg(2, 1, 0, 13, 7) +#define SYS_TRCIDR6 sys_reg(2, 1, 0, 14, 7) +#define SYS_TRCIDR7 sys_reg(2, 1, 0, 15, 7) +#define SYS_TRCIDR8 sys_reg(2, 1, 0, 0, 6) +#define SYS_TRCIDR9 sys_reg(2, 1, 0, 1, 6) +#define SYS_TRCIMSPEC(m) sys_reg(2, 1, 0, (m & 7), 7) +#define SYS_TRCITEEDCR sys_reg(2, 1, 0, 2, 1) +#define SYS_TRCOSLSR sys_reg(2, 1, 1, 1, 4) +#define SYS_TRCPRGCTLR sys_reg(2, 1, 0, 1, 0) +#define SYS_TRCQCTLR sys_reg(2, 1, 0, 1, 1) +#define SYS_TRCRSCTLR(m) sys_reg(2, 1, 1, (m & 15), (0 | (m >> 4))) +#define SYS_TRCRSR sys_reg(2, 1, 0, 10, 0) +#define SYS_TRCSEQEVR(m) sys_reg(2, 1, 0, (m & 3), 4) +#define SYS_TRCSEQRSTEVR sys_reg(2, 1, 0, 6, 4) +#define SYS_TRCSEQSTR sys_reg(2, 1, 0, 7, 4) +#define SYS_TRCSSCCR(m) sys_reg(2, 1, 1, (m & 7), 2) +#define SYS_TRCSSCSR(m) sys_reg(2, 1, 1, (8 | (m & 7)), 2) +#define SYS_TRCSSPCICR(m) sys_reg(2, 1, 1, (m & 7), 3) +#define SYS_TRCSTALLCTLR sys_reg(2, 1, 0, 11, 0) +#define SYS_TRCSTATR sys_reg(2, 1, 0, 3, 0) +#define SYS_TRCSYNCPR sys_reg(2, 1, 0, 13, 0) +#define SYS_TRCTRACEIDR sys_reg(2, 1, 0, 0, 1) +#define SYS_TRCTSCTLR sys_reg(2, 1, 0, 12, 0) +#define SYS_TRCVICTLR sys_reg(2, 1, 0, 0, 2) +#define SYS_TRCVIIECTLR sys_reg(2, 1, 0, 1, 2) +#define SYS_TRCVIPCSSCTLR sys_reg(2, 1, 0, 3, 2) +#define SYS_TRCVISSCTLR sys_reg(2, 1, 0, 2, 2) +#define SYS_TRCVMIDCCTLR0 sys_reg(2, 1, 3, 2, 2) +#define SYS_TRCVMIDCCTLR1 sys_reg(2, 1, 3, 3, 2) +#define SYS_TRCVMIDCVR(m) sys_reg(2, 1, 3, ((m & 7) << 1), 1) + +/* ETM */ +#define SYS_TRCOSLAR sys_reg(2, 1, 1, 0, 4) + #define SYS_MIDR_EL1 sys_reg(3, 0, 0, 0, 0) #define SYS_MPIDR_EL1 sys_reg(3, 0, 0, 0, 5) #define SYS_REVIDR_EL1 sys_reg(3, 0, 0, 0, 6) -#define SYS_ID_PFR0_EL1 sys_reg(3, 0, 0, 1, 0) -#define SYS_ID_PFR1_EL1 sys_reg(3, 0, 0, 1, 1) -#define SYS_ID_PFR2_EL1 sys_reg(3, 0, 0, 3, 4) -#define SYS_ID_DFR0_EL1 sys_reg(3, 0, 0, 1, 2) -#define SYS_ID_DFR1_EL1 sys_reg(3, 0, 0, 3, 5) -#define SYS_ID_AFR0_EL1 sys_reg(3, 0, 0, 1, 3) -#define SYS_ID_MMFR0_EL1 sys_reg(3, 0, 0, 1, 4) -#define SYS_ID_MMFR1_EL1 sys_reg(3, 0, 0, 1, 5) -#define SYS_ID_MMFR2_EL1 sys_reg(3, 0, 0, 1, 6) -#define SYS_ID_MMFR3_EL1 sys_reg(3, 0, 0, 1, 7) -#define SYS_ID_MMFR4_EL1 sys_reg(3, 0, 0, 2, 6) -#define SYS_ID_MMFR5_EL1 sys_reg(3, 0, 0, 3, 6) - -#define SYS_ID_ISAR0_EL1 sys_reg(3, 0, 0, 2, 0) -#define SYS_ID_ISAR1_EL1 sys_reg(3, 0, 0, 2, 1) -#define SYS_ID_ISAR2_EL1 sys_reg(3, 0, 0, 2, 2) -#define SYS_ID_ISAR3_EL1 sys_reg(3, 0, 0, 2, 3) -#define SYS_ID_ISAR4_EL1 sys_reg(3, 0, 0, 2, 4) -#define SYS_ID_ISAR5_EL1 sys_reg(3, 0, 0, 2, 5) -#define SYS_ID_ISAR6_EL1 sys_reg(3, 0, 0, 2, 7) - -#define SYS_MVFR0_EL1 sys_reg(3, 0, 0, 3, 0) -#define SYS_MVFR1_EL1 sys_reg(3, 0, 0, 3, 1) -#define SYS_MVFR2_EL1 sys_reg(3, 0, 0, 3, 2) - -#define SYS_ID_AA64PFR0_EL1 sys_reg(3, 0, 0, 4, 0) -#define SYS_ID_AA64PFR1_EL1 sys_reg(3, 0, 0, 4, 1) - -#define SYS_ID_AA64DFR0_EL1 sys_reg(3, 0, 0, 5, 0) -#define SYS_ID_AA64DFR1_EL1 sys_reg(3, 0, 0, 5, 1) - -#define SYS_ID_AA64AFR0_EL1 sys_reg(3, 0, 0, 5, 4) -#define SYS_ID_AA64AFR1_EL1 sys_reg(3, 0, 0, 5, 5) - -#define SYS_ID_AA64MMFR0_EL1 sys_reg(3, 0, 0, 7, 0) -#define SYS_ID_AA64MMFR1_EL1 sys_reg(3, 0, 0, 7, 1) -#define SYS_ID_AA64MMFR2_EL1 sys_reg(3, 0, 0, 7, 2) - #define SYS_ACTLR_EL1 sys_reg(3, 0, 1, 0, 1) #define SYS_RGSR_EL1 sys_reg(3, 0, 1, 0, 5) #define SYS_GCR_EL1 sys_reg(3, 0, 1, 0, 6) -#define SYS_TRFCR_EL1 sys_reg(3, 0, 1, 2, 1) - #define SYS_TCR_EL1 sys_reg(3, 0, 2, 0, 2) #define SYS_APIAKEYLO_EL1 sys_reg(3, 0, 2, 1, 0) @@ -239,163 +311,61 @@ #define SYS_ERXCTLR_EL1 sys_reg(3, 0, 5, 4, 1) #define SYS_ERXSTATUS_EL1 sys_reg(3, 0, 5, 4, 2) #define SYS_ERXADDR_EL1 sys_reg(3, 0, 5, 4, 3) +#define SYS_ERXPFGF_EL1 sys_reg(3, 0, 5, 4, 4) +#define SYS_ERXPFGCTL_EL1 sys_reg(3, 0, 5, 4, 5) +#define SYS_ERXPFGCDN_EL1 sys_reg(3, 0, 5, 4, 6) #define SYS_ERXMISC0_EL1 sys_reg(3, 0, 5, 5, 0) #define SYS_ERXMISC1_EL1 sys_reg(3, 0, 5, 5, 1) +#define SYS_ERXMISC2_EL1 sys_reg(3, 0, 5, 5, 2) +#define SYS_ERXMISC3_EL1 sys_reg(3, 0, 5, 5, 3) #define SYS_TFSR_EL1 sys_reg(3, 0, 5, 6, 0) #define SYS_TFSRE0_EL1 sys_reg(3, 0, 5, 6, 1) #define SYS_PAR_EL1 sys_reg(3, 0, 7, 4, 0) #define SYS_PAR_EL1_F BIT(0) +/* When PAR_EL1.F == 1 */ #define SYS_PAR_EL1_FST GENMASK(6, 1) +#define SYS_PAR_EL1_PTW BIT(8) +#define SYS_PAR_EL1_S BIT(9) +#define SYS_PAR_EL1_AssuredOnly BIT(12) +#define SYS_PAR_EL1_TopLevel BIT(13) +#define SYS_PAR_EL1_Overlay BIT(14) +#define SYS_PAR_EL1_DirtyBit BIT(15) +#define SYS_PAR_EL1_F1_IMPDEF GENMASK_ULL(63, 48) +#define SYS_PAR_EL1_F1_RES0 (BIT(7) | BIT(10) | GENMASK_ULL(47, 16)) +#define SYS_PAR_EL1_RES1 BIT(11) +/* When PAR_EL1.F == 0 */ +#define SYS_PAR_EL1_SH GENMASK_ULL(8, 7) +#define SYS_PAR_EL1_NS BIT(9) +#define SYS_PAR_EL1_F0_IMPDEF BIT(10) +#define SYS_PAR_EL1_NSE BIT(11) +#define SYS_PAR_EL1_PA GENMASK_ULL(51, 12) +#define SYS_PAR_EL1_ATTR GENMASK_ULL(63, 56) +#define SYS_PAR_EL1_F0_RES0 (GENMASK_ULL(6, 1) | GENMASK_ULL(55, 52)) /*** Statistical Profiling Extension ***/ -/* ID registers */ -#define SYS_PMSIDR_EL1 sys_reg(3, 0, 9, 9, 7) -#define SYS_PMSIDR_EL1_FE_SHIFT 0 -#define SYS_PMSIDR_EL1_FT_SHIFT 1 -#define SYS_PMSIDR_EL1_FL_SHIFT 2 -#define SYS_PMSIDR_EL1_ARCHINST_SHIFT 3 -#define SYS_PMSIDR_EL1_LDS_SHIFT 4 -#define SYS_PMSIDR_EL1_ERND_SHIFT 5 -#define SYS_PMSIDR_EL1_INTERVAL_SHIFT 8 -#define SYS_PMSIDR_EL1_INTERVAL_MASK 0xfUL -#define SYS_PMSIDR_EL1_MAXSIZE_SHIFT 12 -#define SYS_PMSIDR_EL1_MAXSIZE_MASK 0xfUL -#define SYS_PMSIDR_EL1_COUNTSIZE_SHIFT 16 -#define SYS_PMSIDR_EL1_COUNTSIZE_MASK 0xfUL - -#define SYS_PMBIDR_EL1 sys_reg(3, 0, 9, 10, 7) -#define SYS_PMBIDR_EL1_ALIGN_SHIFT 0 -#define SYS_PMBIDR_EL1_ALIGN_MASK 0xfU -#define SYS_PMBIDR_EL1_P_SHIFT 4 -#define SYS_PMBIDR_EL1_F_SHIFT 5 - -/* Sampling controls */ -#define SYS_PMSCR_EL1 sys_reg(3, 0, 9, 9, 0) -#define SYS_PMSCR_EL1_E0SPE_SHIFT 0 -#define SYS_PMSCR_EL1_E1SPE_SHIFT 1 -#define SYS_PMSCR_EL1_CX_SHIFT 3 -#define SYS_PMSCR_EL1_PA_SHIFT 4 -#define SYS_PMSCR_EL1_TS_SHIFT 5 -#define SYS_PMSCR_EL1_PCT_SHIFT 6 - -#define SYS_PMSCR_EL2 sys_reg(3, 4, 9, 9, 0) -#define SYS_PMSCR_EL2_E0HSPE_SHIFT 0 -#define SYS_PMSCR_EL2_E2SPE_SHIFT 1 -#define SYS_PMSCR_EL2_CX_SHIFT 3 -#define SYS_PMSCR_EL2_PA_SHIFT 4 -#define SYS_PMSCR_EL2_TS_SHIFT 5 -#define SYS_PMSCR_EL2_PCT_SHIFT 6 - -#define SYS_PMSICR_EL1 sys_reg(3, 0, 9, 9, 2) - -#define SYS_PMSIRR_EL1 sys_reg(3, 0, 9, 9, 3) -#define SYS_PMSIRR_EL1_RND_SHIFT 0 -#define SYS_PMSIRR_EL1_INTERVAL_SHIFT 8 -#define SYS_PMSIRR_EL1_INTERVAL_MASK 0xffffffUL - -/* Filtering controls */ -#define SYS_PMSNEVFR_EL1 sys_reg(3, 0, 9, 9, 1) - -#define SYS_PMSFCR_EL1 sys_reg(3, 0, 9, 9, 4) -#define SYS_PMSFCR_EL1_FE_SHIFT 0 -#define SYS_PMSFCR_EL1_FT_SHIFT 1 -#define SYS_PMSFCR_EL1_FL_SHIFT 2 -#define SYS_PMSFCR_EL1_B_SHIFT 16 -#define SYS_PMSFCR_EL1_LD_SHIFT 17 -#define SYS_PMSFCR_EL1_ST_SHIFT 18 - -#define SYS_PMSEVFR_EL1 sys_reg(3, 0, 9, 9, 5) -#define SYS_PMSEVFR_EL1_RES0_8_2 \ +#define PMSEVFR_EL1_RES0_IMP \ (GENMASK_ULL(47, 32) | GENMASK_ULL(23, 16) | GENMASK_ULL(11, 8) |\ BIT_ULL(6) | BIT_ULL(4) | BIT_ULL(2) | BIT_ULL(0)) -#define SYS_PMSEVFR_EL1_RES0_8_3 \ - (SYS_PMSEVFR_EL1_RES0_8_2 & ~(BIT_ULL(18) | BIT_ULL(17) | BIT_ULL(11))) - -#define SYS_PMSLATFR_EL1 sys_reg(3, 0, 9, 9, 6) -#define SYS_PMSLATFR_EL1_MINLAT_SHIFT 0 - -/* Buffer controls */ -#define SYS_PMBLIMITR_EL1 sys_reg(3, 0, 9, 10, 0) -#define SYS_PMBLIMITR_EL1_E_SHIFT 0 -#define SYS_PMBLIMITR_EL1_FM_SHIFT 1 -#define SYS_PMBLIMITR_EL1_FM_MASK 0x3UL -#define SYS_PMBLIMITR_EL1_FM_STOP_IRQ (0 << SYS_PMBLIMITR_EL1_FM_SHIFT) - -#define SYS_PMBPTR_EL1 sys_reg(3, 0, 9, 10, 1) +#define PMSEVFR_EL1_RES0_V1P1 \ + (PMSEVFR_EL1_RES0_IMP & ~(BIT_ULL(18) | BIT_ULL(17) | BIT_ULL(11))) +#define PMSEVFR_EL1_RES0_V1P2 \ + (PMSEVFR_EL1_RES0_V1P1 & ~BIT_ULL(6)) /* Buffer error reporting */ -#define SYS_PMBSR_EL1 sys_reg(3, 0, 9, 10, 3) -#define SYS_PMBSR_EL1_COLL_SHIFT 16 -#define SYS_PMBSR_EL1_S_SHIFT 17 -#define SYS_PMBSR_EL1_EA_SHIFT 18 -#define SYS_PMBSR_EL1_DL_SHIFT 19 -#define SYS_PMBSR_EL1_EC_SHIFT 26 -#define SYS_PMBSR_EL1_EC_MASK 0x3fUL - -#define SYS_PMBSR_EL1_EC_BUF (0x0UL << SYS_PMBSR_EL1_EC_SHIFT) -#define SYS_PMBSR_EL1_EC_FAULT_S1 (0x24UL << SYS_PMBSR_EL1_EC_SHIFT) -#define SYS_PMBSR_EL1_EC_FAULT_S2 (0x25UL << SYS_PMBSR_EL1_EC_SHIFT) +#define PMBSR_EL1_FAULT_FSC_SHIFT PMBSR_EL1_MSS_SHIFT +#define PMBSR_EL1_FAULT_FSC_MASK PMBSR_EL1_MSS_MASK -#define SYS_PMBSR_EL1_FAULT_FSC_SHIFT 0 -#define SYS_PMBSR_EL1_FAULT_FSC_MASK 0x3fUL +#define PMBSR_EL1_BUF_BSC_SHIFT PMBSR_EL1_MSS_SHIFT +#define PMBSR_EL1_BUF_BSC_MASK PMBSR_EL1_MSS_MASK -#define SYS_PMBSR_EL1_BUF_BSC_SHIFT 0 -#define SYS_PMBSR_EL1_BUF_BSC_MASK 0x3fUL - -#define SYS_PMBSR_EL1_BUF_BSC_FULL (0x1UL << SYS_PMBSR_EL1_BUF_BSC_SHIFT) +#define PMBSR_EL1_BUF_BSC_FULL 0x1UL /*** End of Statistical Profiling Extension ***/ -/* - * TRBE Registers - */ -#define SYS_TRBLIMITR_EL1 sys_reg(3, 0, 9, 11, 0) -#define SYS_TRBPTR_EL1 sys_reg(3, 0, 9, 11, 1) -#define SYS_TRBBASER_EL1 sys_reg(3, 0, 9, 11, 2) -#define SYS_TRBSR_EL1 sys_reg(3, 0, 9, 11, 3) -#define SYS_TRBMAR_EL1 sys_reg(3, 0, 9, 11, 4) -#define SYS_TRBTRG_EL1 sys_reg(3, 0, 9, 11, 6) -#define SYS_TRBIDR_EL1 sys_reg(3, 0, 9, 11, 7) - -#define TRBLIMITR_LIMIT_MASK GENMASK_ULL(51, 0) -#define TRBLIMITR_LIMIT_SHIFT 12 -#define TRBLIMITR_NVM BIT(5) -#define TRBLIMITR_TRIG_MODE_MASK GENMASK(1, 0) -#define TRBLIMITR_TRIG_MODE_SHIFT 3 -#define TRBLIMITR_FILL_MODE_MASK GENMASK(1, 0) -#define TRBLIMITR_FILL_MODE_SHIFT 1 -#define TRBLIMITR_ENABLE BIT(0) -#define TRBPTR_PTR_MASK GENMASK_ULL(63, 0) -#define TRBPTR_PTR_SHIFT 0 -#define TRBBASER_BASE_MASK GENMASK_ULL(51, 0) -#define TRBBASER_BASE_SHIFT 12 -#define TRBSR_EC_MASK GENMASK(5, 0) -#define TRBSR_EC_SHIFT 26 -#define TRBSR_IRQ BIT(22) -#define TRBSR_TRG BIT(21) -#define TRBSR_WRAP BIT(20) -#define TRBSR_ABORT BIT(18) -#define TRBSR_STOP BIT(17) -#define TRBSR_MSS_MASK GENMASK(15, 0) -#define TRBSR_MSS_SHIFT 0 -#define TRBSR_BSC_MASK GENMASK(5, 0) -#define TRBSR_BSC_SHIFT 0 -#define TRBSR_FSC_MASK GENMASK(5, 0) -#define TRBSR_FSC_SHIFT 0 -#define TRBMAR_SHARE_MASK GENMASK(1, 0) -#define TRBMAR_SHARE_SHIFT 8 -#define TRBMAR_OUTER_MASK GENMASK(3, 0) -#define TRBMAR_OUTER_SHIFT 4 -#define TRBMAR_INNER_MASK GENMASK(3, 0) -#define TRBMAR_INNER_SHIFT 0 -#define TRBTRG_TRG_MASK GENMASK(31, 0) -#define TRBTRG_TRG_SHIFT 0 -#define TRBIDR_FLAG BIT(5) -#define TRBIDR_PROG BIT(4) -#define TRBIDR_ALIGN_MASK GENMASK(3, 0) -#define TRBIDR_ALIGN_SHIFT 0 +#define TRBSR_EL1_BSC_MASK GENMASK(5, 0) +#define TRBSR_EL1_BSC_SHIFT 0 #define SYS_PMINTENSET_EL1 sys_reg(3, 0, 9, 14, 1) #define SYS_PMINTENCLR_EL1 sys_reg(3, 0, 9, 14, 2) @@ -436,19 +406,12 @@ #define SYS_ICC_IGRPEN0_EL1 sys_reg(3, 0, 12, 12, 6) #define SYS_ICC_IGRPEN1_EL1 sys_reg(3, 0, 12, 12, 7) -#define SYS_TPIDR_EL1 sys_reg(3, 0, 13, 0, 4) - -#define SYS_SCXTNUM_EL1 sys_reg(3, 0, 13, 0, 7) +#define SYS_ACCDATA_EL1 sys_reg(3, 0, 13, 0, 5) #define SYS_CNTKCTL_EL1 sys_reg(3, 0, 14, 1, 0) -#define SYS_CCSIDR_EL1 sys_reg(3, 1, 0, 0, 0) #define SYS_AIDR_EL1 sys_reg(3, 1, 0, 0, 7) -#define SMIDR_EL1_IMPLEMENTER_SHIFT 24 -#define SMIDR_EL1_SMPS_SHIFT 15 -#define SMIDR_EL1_AFFINITY_SHIFT 0 - #define SYS_RNDR_EL0 sys_reg(3, 3, 2, 4, 0) #define SYS_RNDRRS_EL0 sys_reg(3, 3, 2, 4, 1) @@ -457,7 +420,6 @@ #define SYS_PMCNTENCLR_EL0 sys_reg(3, 3, 9, 12, 2) #define SYS_PMOVSCLR_EL0 sys_reg(3, 3, 9, 12, 3) #define SYS_PMSWINC_EL0 sys_reg(3, 3, 9, 12, 4) -#define SYS_PMSELR_EL0 sys_reg(3, 3, 9, 12, 5) #define SYS_PMCEID0_EL0 sys_reg(3, 3, 9, 12, 6) #define SYS_PMCEID1_EL0 sys_reg(3, 3, 9, 12, 7) #define SYS_PMCCNTR_EL0 sys_reg(3, 3, 9, 13, 0) @@ -510,6 +472,8 @@ #define SYS_CNTFRQ_EL0 sys_reg(3, 3, 14, 0, 0) +#define SYS_CNTPCT_EL0 sys_reg(3, 3, 14, 0, 1) +#define SYS_CNTVCT_EL0 sys_reg(3, 3, 14, 0, 2) #define SYS_CNTPCTSS_EL0 sys_reg(3, 3, 14, 0, 5) #define SYS_CNTVCTSS_EL0 sys_reg(3, 3, 14, 0, 6) @@ -517,39 +481,80 @@ #define SYS_CNTP_CTL_EL0 sys_reg(3, 3, 14, 2, 1) #define SYS_CNTP_CVAL_EL0 sys_reg(3, 3, 14, 2, 2) +#define SYS_CNTV_TVAL_EL0 sys_reg(3, 3, 14, 3, 0) #define SYS_CNTV_CTL_EL0 sys_reg(3, 3, 14, 3, 1) #define SYS_CNTV_CVAL_EL0 sys_reg(3, 3, 14, 3, 2) #define SYS_AARCH32_CNTP_TVAL sys_reg(0, 0, 14, 2, 0) #define SYS_AARCH32_CNTP_CTL sys_reg(0, 0, 14, 2, 1) +#define SYS_AARCH32_CNTPCT sys_reg(0, 0, 0, 14, 0) +#define SYS_AARCH32_CNTVCT sys_reg(0, 1, 0, 14, 0) #define SYS_AARCH32_CNTP_CVAL sys_reg(0, 2, 0, 14, 0) +#define SYS_AARCH32_CNTPCTSS sys_reg(0, 8, 0, 14, 0) +#define SYS_AARCH32_CNTVCTSS sys_reg(0, 9, 0, 14, 0) #define __PMEV_op2(n) ((n) & 0x7) #define __CNTR_CRm(n) (0x8 | (((n) >> 3) & 0x3)) +#define SYS_PMEVCNTSVRn_EL1(n) sys_reg(2, 0, 14, __CNTR_CRm(n), __PMEV_op2(n)) #define SYS_PMEVCNTRn_EL0(n) sys_reg(3, 3, 14, __CNTR_CRm(n), __PMEV_op2(n)) #define __TYPER_CRm(n) (0xc | (((n) >> 3) & 0x3)) #define SYS_PMEVTYPERn_EL0(n) sys_reg(3, 3, 14, __TYPER_CRm(n), __PMEV_op2(n)) #define SYS_PMCCFILTR_EL0 sys_reg(3, 3, 14, 15, 7) +#define SYS_SPMCGCRn_EL1(n) sys_reg(2, 0, 9, 13, ((n) & 1)) + +#define __SPMEV_op2(n) ((n) & 0x7) +#define __SPMEV_crm(p, n) ((((p) & 7) << 1) | (((n) >> 3) & 1)) +#define SYS_SPMEVCNTRn_EL0(n) sys_reg(2, 3, 14, __SPMEV_crm(0b000, n), __SPMEV_op2(n)) +#define SYS_SPMEVFILT2Rn_EL0(n) sys_reg(2, 3, 14, __SPMEV_crm(0b011, n), __SPMEV_op2(n)) +#define SYS_SPMEVFILTRn_EL0(n) sys_reg(2, 3, 14, __SPMEV_crm(0b010, n), __SPMEV_op2(n)) +#define SYS_SPMEVTYPERn_EL0(n) sys_reg(2, 3, 14, __SPMEV_crm(0b001, n), __SPMEV_op2(n)) + +#define SYS_VPIDR_EL2 sys_reg(3, 4, 0, 0, 0) +#define SYS_VMPIDR_EL2 sys_reg(3, 4, 0, 0, 5) + #define SYS_SCTLR_EL2 sys_reg(3, 4, 1, 0, 0) -#define SYS_HFGRTR_EL2 sys_reg(3, 4, 1, 1, 4) -#define SYS_HFGWTR_EL2 sys_reg(3, 4, 1, 1, 5) -#define SYS_HFGITR_EL2 sys_reg(3, 4, 1, 1, 6) -#define SYS_TRFCR_EL2 sys_reg(3, 4, 1, 2, 1) -#define SYS_HCRX_EL2 sys_reg(3, 4, 1, 2, 2) -#define SYS_HDFGRTR_EL2 sys_reg(3, 4, 3, 1, 4) -#define SYS_HDFGWTR_EL2 sys_reg(3, 4, 3, 1, 5) +#define SYS_ACTLR_EL2 sys_reg(3, 4, 1, 0, 1) +#define SYS_SCTLR2_EL2 sys_reg(3, 4, 1, 0, 3) +#define SYS_HCR_EL2 sys_reg(3, 4, 1, 1, 0) +#define SYS_MDCR_EL2 sys_reg(3, 4, 1, 1, 1) +#define SYS_CPTR_EL2 sys_reg(3, 4, 1, 1, 2) +#define SYS_HSTR_EL2 sys_reg(3, 4, 1, 1, 3) +#define SYS_HACR_EL2 sys_reg(3, 4, 1, 1, 7) + +#define SYS_TTBR0_EL2 sys_reg(3, 4, 2, 0, 0) +#define SYS_TTBR1_EL2 sys_reg(3, 4, 2, 0, 1) +#define SYS_TCR_EL2 sys_reg(3, 4, 2, 0, 2) +#define SYS_VTTBR_EL2 sys_reg(3, 4, 2, 1, 0) +#define SYS_VTCR_EL2 sys_reg(3, 4, 2, 1, 2) + #define SYS_HAFGRTR_EL2 sys_reg(3, 4, 3, 1, 6) #define SYS_SPSR_EL2 sys_reg(3, 4, 4, 0, 0) #define SYS_ELR_EL2 sys_reg(3, 4, 4, 0, 1) +#define SYS_SP_EL1 sys_reg(3, 4, 4, 1, 0) +#define SYS_SPSR_irq sys_reg(3, 4, 4, 3, 0) +#define SYS_SPSR_abt sys_reg(3, 4, 4, 3, 1) +#define SYS_SPSR_und sys_reg(3, 4, 4, 3, 2) +#define SYS_SPSR_fiq sys_reg(3, 4, 4, 3, 3) #define SYS_IFSR32_EL2 sys_reg(3, 4, 5, 0, 1) +#define SYS_AFSR0_EL2 sys_reg(3, 4, 5, 1, 0) +#define SYS_AFSR1_EL2 sys_reg(3, 4, 5, 1, 1) #define SYS_ESR_EL2 sys_reg(3, 4, 5, 2, 0) #define SYS_VSESR_EL2 sys_reg(3, 4, 5, 2, 3) #define SYS_FPEXC32_EL2 sys_reg(3, 4, 5, 3, 0) #define SYS_TFSR_EL2 sys_reg(3, 4, 5, 6, 0) -#define SYS_VDISR_EL2 sys_reg(3, 4, 12, 1, 1) +#define SYS_FAR_EL2 sys_reg(3, 4, 6, 0, 0) +#define SYS_HPFAR_EL2 sys_reg(3, 4, 6, 0, 4) + +#define SYS_MAIR_EL2 sys_reg(3, 4, 10, 2, 0) +#define SYS_AMAIR_EL2 sys_reg(3, 4, 10, 3, 0) + +#define SYS_VBAR_EL2 sys_reg(3, 4, 12, 0, 0) +#define SYS_RVBAR_EL2 sys_reg(3, 4, 12, 0, 1) +#define SYS_RMR_EL2 sys_reg(3, 4, 12, 0, 2) +#define SYS_VDISR_EL2 sys_reg(3, 4, 12, 1, 1) #define __SYS__AP0Rx_EL2(x) sys_reg(3, 4, 12, 8, x) #define SYS_ICH_AP0R0_EL2 __SYS__AP0Rx_EL2(0) #define SYS_ICH_AP0R1_EL2 __SYS__AP0Rx_EL2(1) @@ -564,9 +569,6 @@ #define SYS_ICH_VSEIR_EL2 sys_reg(3, 4, 12, 9, 4) #define SYS_ICC_SRE_EL2 sys_reg(3, 4, 12, 9, 5) -#define SYS_ICH_HCR_EL2 sys_reg(3, 4, 12, 11, 0) -#define SYS_ICH_VTR_EL2 sys_reg(3, 4, 12, 11, 1) -#define SYS_ICH_MISR_EL2 sys_reg(3, 4, 12, 11, 2) #define SYS_ICH_EISR_EL2 sys_reg(3, 4, 12, 11, 3) #define SYS_ICH_ELRSR_EL2 sys_reg(3, 4, 12, 11, 5) #define SYS_ICH_VMCR_EL2 sys_reg(3, 4, 12, 11, 7) @@ -591,20 +593,41 @@ #define SYS_ICH_LR14_EL2 __SYS__LR8_EL2(6) #define SYS_ICH_LR15_EL2 __SYS__LR8_EL2(7) +#define SYS_CONTEXTIDR_EL2 sys_reg(3, 4, 13, 0, 1) +#define SYS_TPIDR_EL2 sys_reg(3, 4, 13, 0, 2) +#define SYS_SCXTNUM_EL2 sys_reg(3, 4, 13, 0, 7) + +#define __AMEV_op2(m) (m & 0x7) +#define __AMEV_CRm(n, m) (n | ((m & 0x8) >> 3)) +#define __SYS__AMEVCNTVOFF0n_EL2(m) sys_reg(3, 4, 13, __AMEV_CRm(0x8, m), __AMEV_op2(m)) +#define SYS_AMEVCNTVOFF0n_EL2(m) __SYS__AMEVCNTVOFF0n_EL2(m) +#define __SYS__AMEVCNTVOFF1n_EL2(m) sys_reg(3, 4, 13, __AMEV_CRm(0xA, m), __AMEV_op2(m)) +#define SYS_AMEVCNTVOFF1n_EL2(m) __SYS__AMEVCNTVOFF1n_EL2(m) + +#define SYS_CNTVOFF_EL2 sys_reg(3, 4, 14, 0, 3) +#define SYS_CNTHCTL_EL2 sys_reg(3, 4, 14, 1, 0) +#define SYS_CNTHP_TVAL_EL2 sys_reg(3, 4, 14, 2, 0) +#define SYS_CNTHP_CTL_EL2 sys_reg(3, 4, 14, 2, 1) +#define SYS_CNTHP_CVAL_EL2 sys_reg(3, 4, 14, 2, 2) +#define SYS_CNTHV_TVAL_EL2 sys_reg(3, 4, 14, 3, 0) +#define SYS_CNTHV_CTL_EL2 sys_reg(3, 4, 14, 3, 1) +#define SYS_CNTHV_CVAL_EL2 sys_reg(3, 4, 14, 3, 2) + /* VHE encodings for architectural EL0/1 system registers */ -#define SYS_SCTLR_EL12 sys_reg(3, 5, 1, 0, 0) +#define SYS_BRBCR_EL12 sys_reg(2, 5, 9, 0, 0) #define SYS_TTBR0_EL12 sys_reg(3, 5, 2, 0, 0) #define SYS_TTBR1_EL12 sys_reg(3, 5, 2, 0, 1) -#define SYS_TCR_EL12 sys_reg(3, 5, 2, 0, 2) #define SYS_SPSR_EL12 sys_reg(3, 5, 4, 0, 0) #define SYS_ELR_EL12 sys_reg(3, 5, 4, 0, 1) #define SYS_AFSR0_EL12 sys_reg(3, 5, 5, 1, 0) #define SYS_AFSR1_EL12 sys_reg(3, 5, 5, 1, 1) #define SYS_ESR_EL12 sys_reg(3, 5, 5, 2, 0) #define SYS_TFSR_EL12 sys_reg(3, 5, 5, 6, 0) +#define SYS_PMSCR_EL12 sys_reg(3, 5, 9, 9, 0) #define SYS_MAIR_EL12 sys_reg(3, 5, 10, 2, 0) #define SYS_AMAIR_EL12 sys_reg(3, 5, 10, 3, 0) #define SYS_VBAR_EL12 sys_reg(3, 5, 12, 0, 0) +#define SYS_SCXTNUM_EL12 sys_reg(3, 5, 13, 0, 7) #define SYS_CNTKCTL_EL12 sys_reg(3, 5, 14, 1, 0) #define SYS_CNTP_TVAL_EL02 sys_reg(3, 5, 14, 2, 0) #define SYS_CNTP_CTL_EL02 sys_reg(3, 5, 14, 2, 1) @@ -613,11 +636,197 @@ #define SYS_CNTV_CTL_EL02 sys_reg(3, 5, 14, 3, 1) #define SYS_CNTV_CVAL_EL02 sys_reg(3, 5, 14, 3, 2) +#define SYS_SP_EL2 sys_reg(3, 6, 4, 1, 0) + +/* AT instructions */ +#define AT_Op0 1 +#define AT_CRn 7 + +#define OP_AT_S1E1R sys_insn(AT_Op0, 0, AT_CRn, 8, 0) +#define OP_AT_S1E1W sys_insn(AT_Op0, 0, AT_CRn, 8, 1) +#define OP_AT_S1E0R sys_insn(AT_Op0, 0, AT_CRn, 8, 2) +#define OP_AT_S1E0W sys_insn(AT_Op0, 0, AT_CRn, 8, 3) +#define OP_AT_S1E1RP sys_insn(AT_Op0, 0, AT_CRn, 9, 0) +#define OP_AT_S1E1WP sys_insn(AT_Op0, 0, AT_CRn, 9, 1) +#define OP_AT_S1E1A sys_insn(AT_Op0, 0, AT_CRn, 9, 2) +#define OP_AT_S1E2R sys_insn(AT_Op0, 4, AT_CRn, 8, 0) +#define OP_AT_S1E2W sys_insn(AT_Op0, 4, AT_CRn, 8, 1) +#define OP_AT_S12E1R sys_insn(AT_Op0, 4, AT_CRn, 8, 4) +#define OP_AT_S12E1W sys_insn(AT_Op0, 4, AT_CRn, 8, 5) +#define OP_AT_S12E0R sys_insn(AT_Op0, 4, AT_CRn, 8, 6) +#define OP_AT_S12E0W sys_insn(AT_Op0, 4, AT_CRn, 8, 7) +#define OP_AT_S1E2A sys_insn(AT_Op0, 4, AT_CRn, 9, 2) + +/* TLBI instructions */ +#define TLBI_Op0 1 + +#define TLBI_Op1_EL1 0 /* Accessible from EL1 or higher */ +#define TLBI_Op1_EL2 4 /* Accessible from EL2 or higher */ + +#define TLBI_CRn_XS 8 /* Extra Slow (the common one) */ +#define TLBI_CRn_nXS 9 /* not Extra Slow (which nobody uses)*/ + +#define TLBI_CRm_IPAIS 0 /* S2 Inner-Shareable */ +#define TLBI_CRm_nROS 1 /* non-Range, Outer-Sharable */ +#define TLBI_CRm_RIS 2 /* Range, Inner-Sharable */ +#define TLBI_CRm_nRIS 3 /* non-Range, Inner-Sharable */ +#define TLBI_CRm_IPAONS 4 /* S2 Outer and Non-Shareable */ +#define TLBI_CRm_ROS 5 /* Range, Outer-Sharable */ +#define TLBI_CRm_RNS 6 /* Range, Non-Sharable */ +#define TLBI_CRm_nRNS 7 /* non-Range, Non-Sharable */ + +#define OP_TLBI_VMALLE1OS sys_insn(1, 0, 8, 1, 0) +#define OP_TLBI_VAE1OS sys_insn(1, 0, 8, 1, 1) +#define OP_TLBI_ASIDE1OS sys_insn(1, 0, 8, 1, 2) +#define OP_TLBI_VAAE1OS sys_insn(1, 0, 8, 1, 3) +#define OP_TLBI_VALE1OS sys_insn(1, 0, 8, 1, 5) +#define OP_TLBI_VAALE1OS sys_insn(1, 0, 8, 1, 7) +#define OP_TLBI_RVAE1IS sys_insn(1, 0, 8, 2, 1) +#define OP_TLBI_RVAAE1IS sys_insn(1, 0, 8, 2, 3) +#define OP_TLBI_RVALE1IS sys_insn(1, 0, 8, 2, 5) +#define OP_TLBI_RVAALE1IS sys_insn(1, 0, 8, 2, 7) +#define OP_TLBI_VMALLE1IS sys_insn(1, 0, 8, 3, 0) +#define OP_TLBI_VAE1IS sys_insn(1, 0, 8, 3, 1) +#define OP_TLBI_ASIDE1IS sys_insn(1, 0, 8, 3, 2) +#define OP_TLBI_VAAE1IS sys_insn(1, 0, 8, 3, 3) +#define OP_TLBI_VALE1IS sys_insn(1, 0, 8, 3, 5) +#define OP_TLBI_VAALE1IS sys_insn(1, 0, 8, 3, 7) +#define OP_TLBI_RVAE1OS sys_insn(1, 0, 8, 5, 1) +#define OP_TLBI_RVAAE1OS sys_insn(1, 0, 8, 5, 3) +#define OP_TLBI_RVALE1OS sys_insn(1, 0, 8, 5, 5) +#define OP_TLBI_RVAALE1OS sys_insn(1, 0, 8, 5, 7) +#define OP_TLBI_RVAE1 sys_insn(1, 0, 8, 6, 1) +#define OP_TLBI_RVAAE1 sys_insn(1, 0, 8, 6, 3) +#define OP_TLBI_RVALE1 sys_insn(1, 0, 8, 6, 5) +#define OP_TLBI_RVAALE1 sys_insn(1, 0, 8, 6, 7) +#define OP_TLBI_VMALLE1 sys_insn(1, 0, 8, 7, 0) +#define OP_TLBI_VAE1 sys_insn(1, 0, 8, 7, 1) +#define OP_TLBI_ASIDE1 sys_insn(1, 0, 8, 7, 2) +#define OP_TLBI_VAAE1 sys_insn(1, 0, 8, 7, 3) +#define OP_TLBI_VALE1 sys_insn(1, 0, 8, 7, 5) +#define OP_TLBI_VAALE1 sys_insn(1, 0, 8, 7, 7) +#define OP_TLBI_VMALLE1OSNXS sys_insn(1, 0, 9, 1, 0) +#define OP_TLBI_VAE1OSNXS sys_insn(1, 0, 9, 1, 1) +#define OP_TLBI_ASIDE1OSNXS sys_insn(1, 0, 9, 1, 2) +#define OP_TLBI_VAAE1OSNXS sys_insn(1, 0, 9, 1, 3) +#define OP_TLBI_VALE1OSNXS sys_insn(1, 0, 9, 1, 5) +#define OP_TLBI_VAALE1OSNXS sys_insn(1, 0, 9, 1, 7) +#define OP_TLBI_RVAE1ISNXS sys_insn(1, 0, 9, 2, 1) +#define OP_TLBI_RVAAE1ISNXS sys_insn(1, 0, 9, 2, 3) +#define OP_TLBI_RVALE1ISNXS sys_insn(1, 0, 9, 2, 5) +#define OP_TLBI_RVAALE1ISNXS sys_insn(1, 0, 9, 2, 7) +#define OP_TLBI_VMALLE1ISNXS sys_insn(1, 0, 9, 3, 0) +#define OP_TLBI_VAE1ISNXS sys_insn(1, 0, 9, 3, 1) +#define OP_TLBI_ASIDE1ISNXS sys_insn(1, 0, 9, 3, 2) +#define OP_TLBI_VAAE1ISNXS sys_insn(1, 0, 9, 3, 3) +#define OP_TLBI_VALE1ISNXS sys_insn(1, 0, 9, 3, 5) +#define OP_TLBI_VAALE1ISNXS sys_insn(1, 0, 9, 3, 7) +#define OP_TLBI_RVAE1OSNXS sys_insn(1, 0, 9, 5, 1) +#define OP_TLBI_RVAAE1OSNXS sys_insn(1, 0, 9, 5, 3) +#define OP_TLBI_RVALE1OSNXS sys_insn(1, 0, 9, 5, 5) +#define OP_TLBI_RVAALE1OSNXS sys_insn(1, 0, 9, 5, 7) +#define OP_TLBI_RVAE1NXS sys_insn(1, 0, 9, 6, 1) +#define OP_TLBI_RVAAE1NXS sys_insn(1, 0, 9, 6, 3) +#define OP_TLBI_RVALE1NXS sys_insn(1, 0, 9, 6, 5) +#define OP_TLBI_RVAALE1NXS sys_insn(1, 0, 9, 6, 7) +#define OP_TLBI_VMALLE1NXS sys_insn(1, 0, 9, 7, 0) +#define OP_TLBI_VAE1NXS sys_insn(1, 0, 9, 7, 1) +#define OP_TLBI_ASIDE1NXS sys_insn(1, 0, 9, 7, 2) +#define OP_TLBI_VAAE1NXS sys_insn(1, 0, 9, 7, 3) +#define OP_TLBI_VALE1NXS sys_insn(1, 0, 9, 7, 5) +#define OP_TLBI_VAALE1NXS sys_insn(1, 0, 9, 7, 7) +#define OP_TLBI_IPAS2E1IS sys_insn(1, 4, 8, 0, 1) +#define OP_TLBI_RIPAS2E1IS sys_insn(1, 4, 8, 0, 2) +#define OP_TLBI_IPAS2LE1IS sys_insn(1, 4, 8, 0, 5) +#define OP_TLBI_RIPAS2LE1IS sys_insn(1, 4, 8, 0, 6) +#define OP_TLBI_ALLE2OS sys_insn(1, 4, 8, 1, 0) +#define OP_TLBI_VAE2OS sys_insn(1, 4, 8, 1, 1) +#define OP_TLBI_ALLE1OS sys_insn(1, 4, 8, 1, 4) +#define OP_TLBI_VALE2OS sys_insn(1, 4, 8, 1, 5) +#define OP_TLBI_VMALLS12E1OS sys_insn(1, 4, 8, 1, 6) +#define OP_TLBI_RVAE2IS sys_insn(1, 4, 8, 2, 1) +#define OP_TLBI_RVALE2IS sys_insn(1, 4, 8, 2, 5) +#define OP_TLBI_ALLE2IS sys_insn(1, 4, 8, 3, 0) +#define OP_TLBI_VAE2IS sys_insn(1, 4, 8, 3, 1) +#define OP_TLBI_ALLE1IS sys_insn(1, 4, 8, 3, 4) +#define OP_TLBI_VALE2IS sys_insn(1, 4, 8, 3, 5) +#define OP_TLBI_VMALLS12E1IS sys_insn(1, 4, 8, 3, 6) +#define OP_TLBI_IPAS2E1OS sys_insn(1, 4, 8, 4, 0) +#define OP_TLBI_IPAS2E1 sys_insn(1, 4, 8, 4, 1) +#define OP_TLBI_RIPAS2E1 sys_insn(1, 4, 8, 4, 2) +#define OP_TLBI_RIPAS2E1OS sys_insn(1, 4, 8, 4, 3) +#define OP_TLBI_IPAS2LE1OS sys_insn(1, 4, 8, 4, 4) +#define OP_TLBI_IPAS2LE1 sys_insn(1, 4, 8, 4, 5) +#define OP_TLBI_RIPAS2LE1 sys_insn(1, 4, 8, 4, 6) +#define OP_TLBI_RIPAS2LE1OS sys_insn(1, 4, 8, 4, 7) +#define OP_TLBI_RVAE2OS sys_insn(1, 4, 8, 5, 1) +#define OP_TLBI_RVALE2OS sys_insn(1, 4, 8, 5, 5) +#define OP_TLBI_RVAE2 sys_insn(1, 4, 8, 6, 1) +#define OP_TLBI_RVALE2 sys_insn(1, 4, 8, 6, 5) +#define OP_TLBI_ALLE2 sys_insn(1, 4, 8, 7, 0) +#define OP_TLBI_VAE2 sys_insn(1, 4, 8, 7, 1) +#define OP_TLBI_ALLE1 sys_insn(1, 4, 8, 7, 4) +#define OP_TLBI_VALE2 sys_insn(1, 4, 8, 7, 5) +#define OP_TLBI_VMALLS12E1 sys_insn(1, 4, 8, 7, 6) +#define OP_TLBI_IPAS2E1ISNXS sys_insn(1, 4, 9, 0, 1) +#define OP_TLBI_RIPAS2E1ISNXS sys_insn(1, 4, 9, 0, 2) +#define OP_TLBI_IPAS2LE1ISNXS sys_insn(1, 4, 9, 0, 5) +#define OP_TLBI_RIPAS2LE1ISNXS sys_insn(1, 4, 9, 0, 6) +#define OP_TLBI_ALLE2OSNXS sys_insn(1, 4, 9, 1, 0) +#define OP_TLBI_VAE2OSNXS sys_insn(1, 4, 9, 1, 1) +#define OP_TLBI_ALLE1OSNXS sys_insn(1, 4, 9, 1, 4) +#define OP_TLBI_VALE2OSNXS sys_insn(1, 4, 9, 1, 5) +#define OP_TLBI_VMALLS12E1OSNXS sys_insn(1, 4, 9, 1, 6) +#define OP_TLBI_RVAE2ISNXS sys_insn(1, 4, 9, 2, 1) +#define OP_TLBI_RVALE2ISNXS sys_insn(1, 4, 9, 2, 5) +#define OP_TLBI_ALLE2ISNXS sys_insn(1, 4, 9, 3, 0) +#define OP_TLBI_VAE2ISNXS sys_insn(1, 4, 9, 3, 1) +#define OP_TLBI_ALLE1ISNXS sys_insn(1, 4, 9, 3, 4) +#define OP_TLBI_VALE2ISNXS sys_insn(1, 4, 9, 3, 5) +#define OP_TLBI_VMALLS12E1ISNXS sys_insn(1, 4, 9, 3, 6) +#define OP_TLBI_IPAS2E1OSNXS sys_insn(1, 4, 9, 4, 0) +#define OP_TLBI_IPAS2E1NXS sys_insn(1, 4, 9, 4, 1) +#define OP_TLBI_RIPAS2E1NXS sys_insn(1, 4, 9, 4, 2) +#define OP_TLBI_RIPAS2E1OSNXS sys_insn(1, 4, 9, 4, 3) +#define OP_TLBI_IPAS2LE1OSNXS sys_insn(1, 4, 9, 4, 4) +#define OP_TLBI_IPAS2LE1NXS sys_insn(1, 4, 9, 4, 5) +#define OP_TLBI_RIPAS2LE1NXS sys_insn(1, 4, 9, 4, 6) +#define OP_TLBI_RIPAS2LE1OSNXS sys_insn(1, 4, 9, 4, 7) +#define OP_TLBI_RVAE2OSNXS sys_insn(1, 4, 9, 5, 1) +#define OP_TLBI_RVALE2OSNXS sys_insn(1, 4, 9, 5, 5) +#define OP_TLBI_RVAE2NXS sys_insn(1, 4, 9, 6, 1) +#define OP_TLBI_RVALE2NXS sys_insn(1, 4, 9, 6, 5) +#define OP_TLBI_ALLE2NXS sys_insn(1, 4, 9, 7, 0) +#define OP_TLBI_VAE2NXS sys_insn(1, 4, 9, 7, 1) +#define OP_TLBI_ALLE1NXS sys_insn(1, 4, 9, 7, 4) +#define OP_TLBI_VALE2NXS sys_insn(1, 4, 9, 7, 5) +#define OP_TLBI_VMALLS12E1NXS sys_insn(1, 4, 9, 7, 6) + +/* Misc instructions */ +#define OP_GCSPUSHX sys_insn(1, 0, 7, 7, 4) +#define OP_GCSPOPCX sys_insn(1, 0, 7, 7, 5) +#define OP_GCSPOPX sys_insn(1, 0, 7, 7, 6) +#define OP_GCSPUSHM sys_insn(1, 3, 7, 7, 0) + +#define OP_BRB_IALL sys_insn(1, 1, 7, 2, 4) +#define OP_BRB_INJ sys_insn(1, 1, 7, 2, 5) +#define OP_CFP_RCTX sys_insn(1, 3, 7, 3, 4) +#define OP_DVP_RCTX sys_insn(1, 3, 7, 3, 5) +#define OP_COSP_RCTX sys_insn(1, 3, 7, 3, 6) +#define OP_CPP_RCTX sys_insn(1, 3, 7, 3, 7) + +/* + * BRBE Instructions + */ +#define BRB_IALL_INSN __emit_inst(0xd5000000 | OP_BRB_IALL | (0x1f)) +#define BRB_INJ_INSN __emit_inst(0xd5000000 | OP_BRB_INJ | (0x1f)) + /* Common SCTLR_ELx flags. */ #define SCTLR_ELx_ENTP2 (BIT(60)) #define SCTLR_ELx_DSSBS (BIT(44)) #define SCTLR_ELx_ATA (BIT(43)) +#define SCTLR_ELx_EE_SHIFT 25 #define SCTLR_ELx_ENIA_SHIFT 31 #define SCTLR_ELx_ITFSB (BIT(37)) @@ -626,7 +835,7 @@ #define SCTLR_ELx_LSMAOE (BIT(29)) #define SCTLR_ELx_nTLSMD (BIT(28)) #define SCTLR_ELx_ENDA (BIT(27)) -#define SCTLR_ELx_EE (BIT(25)) +#define SCTLR_ELx_EE (BIT(SCTLR_ELx_EE_SHIFT)) #define SCTLR_ELx_EIS (BIT(22)) #define SCTLR_ELx_IESB (BIT(21)) #define SCTLR_ELx_TSCXT (BIT(20)) @@ -644,6 +853,7 @@ (BIT(18)) | (BIT(22)) | (BIT(23)) | (BIT(28)) | \ (BIT(29))) +#define SCTLR_EL2_BT (BIT(36)) #ifdef CONFIG_CPU_BIG_ENDIAN #define ENDIAN_SET_EL2 SCTLR_ELx_EE #else @@ -689,291 +899,49 @@ /* Position the attr at the correct index */ #define MAIR_ATTRIDX(attr, idx) ((attr) << ((idx) * 8)) -/* id_aa64pfr0 */ -#define ID_AA64PFR0_CSV3_SHIFT 60 -#define ID_AA64PFR0_CSV2_SHIFT 56 -#define ID_AA64PFR0_DIT_SHIFT 48 -#define ID_AA64PFR0_AMU_SHIFT 44 -#define ID_AA64PFR0_MPAM_SHIFT 40 -#define ID_AA64PFR0_SEL2_SHIFT 36 -#define ID_AA64PFR0_SVE_SHIFT 32 -#define ID_AA64PFR0_RAS_SHIFT 28 -#define ID_AA64PFR0_GIC_SHIFT 24 -#define ID_AA64PFR0_ASIMD_SHIFT 20 -#define ID_AA64PFR0_FP_SHIFT 16 -#define ID_AA64PFR0_EL3_SHIFT 12 -#define ID_AA64PFR0_EL2_SHIFT 8 -#define ID_AA64PFR0_EL1_SHIFT 4 -#define ID_AA64PFR0_EL0_SHIFT 0 - -#define ID_AA64PFR0_AMU 0x1 -#define ID_AA64PFR0_SVE 0x1 -#define ID_AA64PFR0_RAS_V1 0x1 -#define ID_AA64PFR0_RAS_V1P1 0x2 -#define ID_AA64PFR0_FP_NI 0xf -#define ID_AA64PFR0_FP_SUPPORTED 0x0 -#define ID_AA64PFR0_ASIMD_NI 0xf -#define ID_AA64PFR0_ASIMD_SUPPORTED 0x0 -#define ID_AA64PFR0_ELx_64BIT_ONLY 0x1 -#define ID_AA64PFR0_ELx_32BIT_64BIT 0x2 - -/* id_aa64pfr1 */ -#define ID_AA64PFR1_SME_SHIFT 24 -#define ID_AA64PFR1_MPAMFRAC_SHIFT 16 -#define ID_AA64PFR1_RASFRAC_SHIFT 12 -#define ID_AA64PFR1_MTE_SHIFT 8 -#define ID_AA64PFR1_SSBS_SHIFT 4 -#define ID_AA64PFR1_BT_SHIFT 0 - -#define ID_AA64PFR1_SSBS_PSTATE_NI 0 -#define ID_AA64PFR1_SSBS_PSTATE_ONLY 1 -#define ID_AA64PFR1_SSBS_PSTATE_INSNS 2 -#define ID_AA64PFR1_BT_BTI 0x1 -#define ID_AA64PFR1_SME 1 - -#define ID_AA64PFR1_MTE_NI 0x0 -#define ID_AA64PFR1_MTE_EL0 0x1 -#define ID_AA64PFR1_MTE 0x2 -#define ID_AA64PFR1_MTE_ASYMM 0x3 - /* id_aa64mmfr0 */ -#define ID_AA64MMFR0_ECV_SHIFT 60 -#define ID_AA64MMFR0_FGT_SHIFT 56 -#define ID_AA64MMFR0_EXS_SHIFT 44 -#define ID_AA64MMFR0_TGRAN4_2_SHIFT 40 -#define ID_AA64MMFR0_TGRAN64_2_SHIFT 36 -#define ID_AA64MMFR0_TGRAN16_2_SHIFT 32 -#define ID_AA64MMFR0_TGRAN4_SHIFT 28 -#define ID_AA64MMFR0_TGRAN64_SHIFT 24 -#define ID_AA64MMFR0_TGRAN16_SHIFT 20 -#define ID_AA64MMFR0_BIGENDEL0_SHIFT 16 -#define ID_AA64MMFR0_SNSMEM_SHIFT 12 -#define ID_AA64MMFR0_BIGENDEL_SHIFT 8 -#define ID_AA64MMFR0_ASID_SHIFT 4 -#define ID_AA64MMFR0_PARANGE_SHIFT 0 - -#define ID_AA64MMFR0_ASID_8 0x0 -#define ID_AA64MMFR0_ASID_16 0x2 - -#define ID_AA64MMFR0_TGRAN4_NI 0xf -#define ID_AA64MMFR0_TGRAN4_SUPPORTED_MIN 0x0 -#define ID_AA64MMFR0_TGRAN4_SUPPORTED_MAX 0x7 -#define ID_AA64MMFR0_TGRAN64_NI 0xf -#define ID_AA64MMFR0_TGRAN64_SUPPORTED_MIN 0x0 -#define ID_AA64MMFR0_TGRAN64_SUPPORTED_MAX 0x7 -#define ID_AA64MMFR0_TGRAN16_NI 0x0 -#define ID_AA64MMFR0_TGRAN16_SUPPORTED_MIN 0x1 -#define ID_AA64MMFR0_TGRAN16_SUPPORTED_MAX 0xf - -#define ID_AA64MMFR0_PARANGE_32 0x0 -#define ID_AA64MMFR0_PARANGE_36 0x1 -#define ID_AA64MMFR0_PARANGE_40 0x2 -#define ID_AA64MMFR0_PARANGE_42 0x3 -#define ID_AA64MMFR0_PARANGE_44 0x4 -#define ID_AA64MMFR0_PARANGE_48 0x5 -#define ID_AA64MMFR0_PARANGE_52 0x6 +#define ID_AA64MMFR0_EL1_TGRAN4_SUPPORTED_MIN 0x0 +#define ID_AA64MMFR0_EL1_TGRAN4_LPA2 ID_AA64MMFR0_EL1_TGRAN4_52_BIT +#define ID_AA64MMFR0_EL1_TGRAN4_SUPPORTED_MAX 0x7 +#define ID_AA64MMFR0_EL1_TGRAN64_SUPPORTED_MIN 0x0 +#define ID_AA64MMFR0_EL1_TGRAN64_SUPPORTED_MAX 0x7 +#define ID_AA64MMFR0_EL1_TGRAN16_SUPPORTED_MIN 0x1 +#define ID_AA64MMFR0_EL1_TGRAN16_LPA2 ID_AA64MMFR0_EL1_TGRAN16_52_BIT +#define ID_AA64MMFR0_EL1_TGRAN16_SUPPORTED_MAX 0xf #define ARM64_MIN_PARANGE_BITS 32 -#define ID_AA64MMFR0_TGRAN_2_SUPPORTED_DEFAULT 0x0 -#define ID_AA64MMFR0_TGRAN_2_SUPPORTED_NONE 0x1 -#define ID_AA64MMFR0_TGRAN_2_SUPPORTED_MIN 0x2 -#define ID_AA64MMFR0_TGRAN_2_SUPPORTED_MAX 0x7 +#define ID_AA64MMFR0_EL1_TGRAN_2_SUPPORTED_DEFAULT 0x0 +#define ID_AA64MMFR0_EL1_TGRAN_2_SUPPORTED_NONE 0x1 +#define ID_AA64MMFR0_EL1_TGRAN_2_SUPPORTED_MIN 0x2 +#define ID_AA64MMFR0_EL1_TGRAN_2_SUPPORTED_LPA2 0x3 +#define ID_AA64MMFR0_EL1_TGRAN_2_SUPPORTED_MAX 0x7 #ifdef CONFIG_ARM64_PA_BITS_52 -#define ID_AA64MMFR0_PARANGE_MAX ID_AA64MMFR0_PARANGE_52 +#define ID_AA64MMFR0_EL1_PARANGE_MAX ID_AA64MMFR0_EL1_PARANGE_52 #else -#define ID_AA64MMFR0_PARANGE_MAX ID_AA64MMFR0_PARANGE_48 +#define ID_AA64MMFR0_EL1_PARANGE_MAX ID_AA64MMFR0_EL1_PARANGE_48 #endif -/* id_aa64mmfr1 */ -#define ID_AA64MMFR1_ECBHB_SHIFT 60 -#define ID_AA64MMFR1_TIDCP1_SHIFT 52 -#define ID_AA64MMFR1_HCX_SHIFT 40 -#define ID_AA64MMFR1_AFP_SHIFT 44 -#define ID_AA64MMFR1_ETS_SHIFT 36 -#define ID_AA64MMFR1_TWED_SHIFT 32 -#define ID_AA64MMFR1_XNX_SHIFT 28 -#define ID_AA64MMFR1_SPECSEI_SHIFT 24 -#define ID_AA64MMFR1_PAN_SHIFT 20 -#define ID_AA64MMFR1_LOR_SHIFT 16 -#define ID_AA64MMFR1_HPD_SHIFT 12 -#define ID_AA64MMFR1_VHE_SHIFT 8 -#define ID_AA64MMFR1_VMIDBITS_SHIFT 4 -#define ID_AA64MMFR1_HADBS_SHIFT 0 - -#define ID_AA64MMFR1_VMIDBITS_8 0 -#define ID_AA64MMFR1_VMIDBITS_16 2 - -#define ID_AA64MMFR1_TIDCP1_NI 0 -#define ID_AA64MMFR1_TIDCP1_IMP 1 - -/* id_aa64mmfr2 */ -#define ID_AA64MMFR2_E0PD_SHIFT 60 -#define ID_AA64MMFR2_EVT_SHIFT 56 -#define ID_AA64MMFR2_BBM_SHIFT 52 -#define ID_AA64MMFR2_TTL_SHIFT 48 -#define ID_AA64MMFR2_FWB_SHIFT 40 -#define ID_AA64MMFR2_IDS_SHIFT 36 -#define ID_AA64MMFR2_AT_SHIFT 32 -#define ID_AA64MMFR2_ST_SHIFT 28 -#define ID_AA64MMFR2_NV_SHIFT 24 -#define ID_AA64MMFR2_CCIDX_SHIFT 20 -#define ID_AA64MMFR2_LVA_SHIFT 16 -#define ID_AA64MMFR2_IESB_SHIFT 12 -#define ID_AA64MMFR2_LSM_SHIFT 8 -#define ID_AA64MMFR2_UAO_SHIFT 4 -#define ID_AA64MMFR2_CNP_SHIFT 0 - -/* id_aa64dfr0 */ -#define ID_AA64DFR0_MTPMU_SHIFT 48 -#define ID_AA64DFR0_TRBE_SHIFT 44 -#define ID_AA64DFR0_TRACE_FILT_SHIFT 40 -#define ID_AA64DFR0_DOUBLELOCK_SHIFT 36 -#define ID_AA64DFR0_PMSVER_SHIFT 32 -#define ID_AA64DFR0_CTX_CMPS_SHIFT 28 -#define ID_AA64DFR0_WRPS_SHIFT 20 -#define ID_AA64DFR0_BRPS_SHIFT 12 -#define ID_AA64DFR0_PMUVER_SHIFT 8 -#define ID_AA64DFR0_TRACEVER_SHIFT 4 -#define ID_AA64DFR0_DEBUGVER_SHIFT 0 - -#define ID_AA64DFR0_PMUVER_8_0 0x1 -#define ID_AA64DFR0_PMUVER_8_1 0x4 -#define ID_AA64DFR0_PMUVER_8_4 0x5 -#define ID_AA64DFR0_PMUVER_8_5 0x6 -#define ID_AA64DFR0_PMUVER_8_7 0x7 -#define ID_AA64DFR0_PMUVER_IMP_DEF 0xf - -#define ID_AA64DFR0_PMSVER_8_2 0x1 -#define ID_AA64DFR0_PMSVER_8_3 0x2 - -#define ID_DFR0_PERFMON_SHIFT 24 - -#define ID_DFR0_PERFMON_8_0 0x3 -#define ID_DFR0_PERFMON_8_1 0x4 -#define ID_DFR0_PERFMON_8_4 0x5 -#define ID_DFR0_PERFMON_8_5 0x6 - -#define ID_ISAR4_SWP_FRAC_SHIFT 28 -#define ID_ISAR4_PSR_M_SHIFT 24 -#define ID_ISAR4_SYNCH_PRIM_FRAC_SHIFT 20 -#define ID_ISAR4_BARRIER_SHIFT 16 -#define ID_ISAR4_SMC_SHIFT 12 -#define ID_ISAR4_WRITEBACK_SHIFT 8 -#define ID_ISAR4_WITHSHIFTS_SHIFT 4 -#define ID_ISAR4_UNPRIV_SHIFT 0 - -#define ID_DFR1_MTPMU_SHIFT 0 - -#define ID_ISAR0_DIVIDE_SHIFT 24 -#define ID_ISAR0_DEBUG_SHIFT 20 -#define ID_ISAR0_COPROC_SHIFT 16 -#define ID_ISAR0_CMPBRANCH_SHIFT 12 -#define ID_ISAR0_BITFIELD_SHIFT 8 -#define ID_ISAR0_BITCOUNT_SHIFT 4 -#define ID_ISAR0_SWAP_SHIFT 0 - -#define ID_ISAR5_RDM_SHIFT 24 -#define ID_ISAR5_CRC32_SHIFT 16 -#define ID_ISAR5_SHA2_SHIFT 12 -#define ID_ISAR5_SHA1_SHIFT 8 -#define ID_ISAR5_AES_SHIFT 4 -#define ID_ISAR5_SEVL_SHIFT 0 - -#define ID_ISAR6_I8MM_SHIFT 24 -#define ID_ISAR6_BF16_SHIFT 20 -#define ID_ISAR6_SPECRES_SHIFT 16 -#define ID_ISAR6_SB_SHIFT 12 -#define ID_ISAR6_FHM_SHIFT 8 -#define ID_ISAR6_DP_SHIFT 4 -#define ID_ISAR6_JSCVT_SHIFT 0 - -#define ID_MMFR0_INNERSHR_SHIFT 28 -#define ID_MMFR0_FCSE_SHIFT 24 -#define ID_MMFR0_AUXREG_SHIFT 20 -#define ID_MMFR0_TCM_SHIFT 16 -#define ID_MMFR0_SHARELVL_SHIFT 12 -#define ID_MMFR0_OUTERSHR_SHIFT 8 -#define ID_MMFR0_PMSA_SHIFT 4 -#define ID_MMFR0_VMSA_SHIFT 0 - -#define ID_MMFR4_EVT_SHIFT 28 -#define ID_MMFR4_CCIDX_SHIFT 24 -#define ID_MMFR4_LSM_SHIFT 20 -#define ID_MMFR4_HPDS_SHIFT 16 -#define ID_MMFR4_CNP_SHIFT 12 -#define ID_MMFR4_XNX_SHIFT 8 -#define ID_MMFR4_AC2_SHIFT 4 -#define ID_MMFR4_SPECSEI_SHIFT 0 - -#define ID_MMFR5_ETS_SHIFT 0 - -#define ID_PFR0_DIT_SHIFT 24 -#define ID_PFR0_CSV2_SHIFT 16 -#define ID_PFR0_STATE3_SHIFT 12 -#define ID_PFR0_STATE2_SHIFT 8 -#define ID_PFR0_STATE1_SHIFT 4 -#define ID_PFR0_STATE0_SHIFT 0 - -#define ID_DFR0_PERFMON_SHIFT 24 -#define ID_DFR0_MPROFDBG_SHIFT 20 -#define ID_DFR0_MMAPTRC_SHIFT 16 -#define ID_DFR0_COPTRC_SHIFT 12 -#define ID_DFR0_MMAPDBG_SHIFT 8 -#define ID_DFR0_COPSDBG_SHIFT 4 -#define ID_DFR0_COPDBG_SHIFT 0 - -#define ID_PFR2_SSBS_SHIFT 4 -#define ID_PFR2_CSV3_SHIFT 0 - -#define MVFR0_FPROUND_SHIFT 28 -#define MVFR0_FPSHVEC_SHIFT 24 -#define MVFR0_FPSQRT_SHIFT 20 -#define MVFR0_FPDIVIDE_SHIFT 16 -#define MVFR0_FPTRAP_SHIFT 12 -#define MVFR0_FPDP_SHIFT 8 -#define MVFR0_FPSP_SHIFT 4 -#define MVFR0_SIMD_SHIFT 0 - -#define MVFR1_SIMDFMAC_SHIFT 28 -#define MVFR1_FPHP_SHIFT 24 -#define MVFR1_SIMDHP_SHIFT 20 -#define MVFR1_SIMDSP_SHIFT 16 -#define MVFR1_SIMDINT_SHIFT 12 -#define MVFR1_SIMDLS_SHIFT 8 -#define MVFR1_FPDNAN_SHIFT 4 -#define MVFR1_FPFTZ_SHIFT 0 - -#define ID_PFR1_GIC_SHIFT 28 -#define ID_PFR1_VIRT_FRAC_SHIFT 24 -#define ID_PFR1_SEC_FRAC_SHIFT 20 -#define ID_PFR1_GENTIMER_SHIFT 16 -#define ID_PFR1_VIRTUALIZATION_SHIFT 12 -#define ID_PFR1_MPROGMOD_SHIFT 8 -#define ID_PFR1_SECURITY_SHIFT 4 -#define ID_PFR1_PROGMOD_SHIFT 0 - #if defined(CONFIG_ARM64_4K_PAGES) -#define ID_AA64MMFR0_TGRAN_SHIFT ID_AA64MMFR0_TGRAN4_SHIFT -#define ID_AA64MMFR0_TGRAN_SUPPORTED_MIN ID_AA64MMFR0_TGRAN4_SUPPORTED_MIN -#define ID_AA64MMFR0_TGRAN_SUPPORTED_MAX ID_AA64MMFR0_TGRAN4_SUPPORTED_MAX -#define ID_AA64MMFR0_TGRAN_2_SHIFT ID_AA64MMFR0_TGRAN4_2_SHIFT +#define ID_AA64MMFR0_EL1_TGRAN_SHIFT ID_AA64MMFR0_EL1_TGRAN4_SHIFT +#define ID_AA64MMFR0_EL1_TGRAN_LPA2 ID_AA64MMFR0_EL1_TGRAN4_52_BIT +#define ID_AA64MMFR0_EL1_TGRAN_SUPPORTED_MIN ID_AA64MMFR0_EL1_TGRAN4_SUPPORTED_MIN +#define ID_AA64MMFR0_EL1_TGRAN_SUPPORTED_MAX ID_AA64MMFR0_EL1_TGRAN4_SUPPORTED_MAX +#define ID_AA64MMFR0_EL1_TGRAN_2_SHIFT ID_AA64MMFR0_EL1_TGRAN4_2_SHIFT #elif defined(CONFIG_ARM64_16K_PAGES) -#define ID_AA64MMFR0_TGRAN_SHIFT ID_AA64MMFR0_TGRAN16_SHIFT -#define ID_AA64MMFR0_TGRAN_SUPPORTED_MIN ID_AA64MMFR0_TGRAN16_SUPPORTED_MIN -#define ID_AA64MMFR0_TGRAN_SUPPORTED_MAX ID_AA64MMFR0_TGRAN16_SUPPORTED_MAX -#define ID_AA64MMFR0_TGRAN_2_SHIFT ID_AA64MMFR0_TGRAN16_2_SHIFT +#define ID_AA64MMFR0_EL1_TGRAN_SHIFT ID_AA64MMFR0_EL1_TGRAN16_SHIFT +#define ID_AA64MMFR0_EL1_TGRAN_LPA2 ID_AA64MMFR0_EL1_TGRAN16_52_BIT +#define ID_AA64MMFR0_EL1_TGRAN_SUPPORTED_MIN ID_AA64MMFR0_EL1_TGRAN16_SUPPORTED_MIN +#define ID_AA64MMFR0_EL1_TGRAN_SUPPORTED_MAX ID_AA64MMFR0_EL1_TGRAN16_SUPPORTED_MAX +#define ID_AA64MMFR0_EL1_TGRAN_2_SHIFT ID_AA64MMFR0_EL1_TGRAN16_2_SHIFT #elif defined(CONFIG_ARM64_64K_PAGES) -#define ID_AA64MMFR0_TGRAN_SHIFT ID_AA64MMFR0_TGRAN64_SHIFT -#define ID_AA64MMFR0_TGRAN_SUPPORTED_MIN ID_AA64MMFR0_TGRAN64_SUPPORTED_MIN -#define ID_AA64MMFR0_TGRAN_SUPPORTED_MAX ID_AA64MMFR0_TGRAN64_SUPPORTED_MAX -#define ID_AA64MMFR0_TGRAN_2_SHIFT ID_AA64MMFR0_TGRAN64_2_SHIFT +#define ID_AA64MMFR0_EL1_TGRAN_SHIFT ID_AA64MMFR0_EL1_TGRAN64_SHIFT +#define ID_AA64MMFR0_EL1_TGRAN_SUPPORTED_MIN ID_AA64MMFR0_EL1_TGRAN64_SUPPORTED_MIN +#define ID_AA64MMFR0_EL1_TGRAN_SUPPORTED_MAX ID_AA64MMFR0_EL1_TGRAN64_SUPPORTED_MAX +#define ID_AA64MMFR0_EL1_TGRAN_2_SHIFT ID_AA64MMFR0_EL1_TGRAN64_2_SHIFT #endif -#define MVFR2_FPMISC_SHIFT 4 -#define MVFR2_SIMDMISC_SHIFT 0 - #define CPACR_EL1_FPEN_EL1EN (BIT(20)) /* enable EL1 access */ #define CPACR_EL1_FPEN_EL0EN (BIT(21)) /* enable EL0 access, if EL1EN set */ @@ -1007,10 +975,6 @@ #define SYS_RGSR_EL1_SEED_SHIFT 8 #define SYS_RGSR_EL1_SEED_MASK 0xffffUL -/* GMID_EL1 field definitions */ -#define GMID_EL1_BS_SHIFT 0 -#define GMID_EL1_BS_SIZE 4 - /* TFSR{,E0}_EL1 bit definitions */ #define SYS_TFSR_EL1_TF0_SHIFT 0 #define SYS_TFSR_EL1_TF1_SHIFT 1 @@ -1020,22 +984,7 @@ /* Safe value for MPIDR_EL1: Bit31:RES1, Bit30:U:0, Bit24:MT:0 */ #define SYS_MPIDR_SAFE_VAL (BIT(31)) -#define TRFCR_ELx_TS_SHIFT 5 -#define TRFCR_ELx_TS_VIRTUAL ((0x1UL) << TRFCR_ELx_TS_SHIFT) -#define TRFCR_ELx_TS_GUEST_PHYSICAL ((0x2UL) << TRFCR_ELx_TS_SHIFT) -#define TRFCR_ELx_TS_PHYSICAL ((0x3UL) << TRFCR_ELx_TS_SHIFT) -#define TRFCR_EL2_CX BIT(3) -#define TRFCR_ELx_ExTRE BIT(1) -#define TRFCR_ELx_E0TRE BIT(0) - -/* HCRX_EL2 definitions */ -#define HCRX_EL2_SMPME_MASK (1 << 5) - /* GIC Hypervisor interface registers */ -/* ICH_MISR_EL2 bit definitions */ -#define ICH_MISR_EOI (1 << 0) -#define ICH_MISR_U (1 << 1) - /* ICH_LR*_EL2 bit definitions */ #define ICH_LR_VIRTUAL_ID_MASK ((1ULL << 32) - 1) @@ -1050,17 +999,6 @@ #define ICH_LR_PRIORITY_SHIFT 48 #define ICH_LR_PRIORITY_MASK (0xffULL << ICH_LR_PRIORITY_SHIFT) -/* ICH_HCR_EL2 bit definitions */ -#define ICH_HCR_EN (1 << 0) -#define ICH_HCR_UIE (1 << 1) -#define ICH_HCR_NPIE (1 << 3) -#define ICH_HCR_TC (1 << 10) -#define ICH_HCR_TALL0 (1 << 11) -#define ICH_HCR_TALL1 (1 << 12) -#define ICH_HCR_TDIR (1 << 14) -#define ICH_HCR_EOIcount_SHIFT 27 -#define ICH_HCR_EOIcount_MASK (0x1f << ICH_HCR_EOIcount_SHIFT) - /* ICH_VMCR_EL2 bit definitions */ #define ICH_VMCR_ACK_CTL_SHIFT 2 #define ICH_VMCR_ACK_CTL_MASK (1 << ICH_VMCR_ACK_CTL_SHIFT) @@ -1081,28 +1019,128 @@ #define ICH_VMCR_ENG1_SHIFT 1 #define ICH_VMCR_ENG1_MASK (1 << ICH_VMCR_ENG1_SHIFT) -/* ICH_VTR_EL2 bit definitions */ -#define ICH_VTR_PRI_BITS_SHIFT 29 -#define ICH_VTR_PRI_BITS_MASK (7 << ICH_VTR_PRI_BITS_SHIFT) -#define ICH_VTR_ID_BITS_SHIFT 23 -#define ICH_VTR_ID_BITS_MASK (7 << ICH_VTR_ID_BITS_SHIFT) -#define ICH_VTR_SEIS_SHIFT 22 -#define ICH_VTR_SEIS_MASK (1 << ICH_VTR_SEIS_SHIFT) -#define ICH_VTR_A3V_SHIFT 21 -#define ICH_VTR_A3V_MASK (1 << ICH_VTR_A3V_SHIFT) -#define ICH_VTR_TDS_SHIFT 19 -#define ICH_VTR_TDS_MASK (1 << ICH_VTR_TDS_SHIFT) - -/* HFG[WR]TR_EL2 bit definitions */ -#define HFGxTR_EL2_nTPIDR2_EL0_SHIFT 55 -#define HFGxTR_EL2_nTPIDR2_EL0_MASK BIT_MASK(HFGxTR_EL2_nTPIDR2_EL0_SHIFT) -#define HFGxTR_EL2_nSMPRI_EL1_SHIFT 54 -#define HFGxTR_EL2_nSMPRI_EL1_MASK BIT_MASK(HFGxTR_EL2_nSMPRI_EL1_SHIFT) +/* + * Permission Indirection Extension (PIE) permission encodings. + * Encodings with the _O suffix, have overlays applied (Permission Overlay Extension). + */ +#define PIE_NONE_O UL(0x0) +#define PIE_R_O UL(0x1) +#define PIE_X_O UL(0x2) +#define PIE_RX_O UL(0x3) +#define PIE_RW_O UL(0x5) +#define PIE_RWnX_O UL(0x6) +#define PIE_RWX_O UL(0x7) +#define PIE_R UL(0x8) +#define PIE_GCS UL(0x9) +#define PIE_RX UL(0xa) +#define PIE_RW UL(0xc) +#define PIE_RWX UL(0xe) +#define PIE_MASK UL(0xf) + +#define PIRx_ELx_BITS_PER_IDX 4 +#define PIRx_ELx_PERM_SHIFT(idx) ((idx) * PIRx_ELx_BITS_PER_IDX) +#define PIRx_ELx_PERM_PREP(idx, perm) (((perm) & PIE_MASK) << PIRx_ELx_PERM_SHIFT(idx)) + +/* + * Permission Overlay Extension (POE) permission encodings. + */ +#define POE_NONE UL(0x0) +#define POE_R UL(0x1) +#define POE_X UL(0x2) +#define POE_RX UL(0x3) +#define POE_W UL(0x4) +#define POE_RW UL(0x5) +#define POE_WX UL(0x6) +#define POE_RWX UL(0x7) +#define POE_MASK UL(0xf) + +#define POR_ELx_BITS_PER_IDX 4 +#define POR_ELx_PERM_SHIFT(idx) ((idx) * POR_ELx_BITS_PER_IDX) +#define POR_ELx_PERM_GET(idx, reg) (((reg) >> POR_ELx_PERM_SHIFT(idx)) & POE_MASK) +#define POR_ELx_PERM_PREP(idx, perm) (((perm) & POE_MASK) << POR_ELx_PERM_SHIFT(idx)) -#define ARM64_FEATURE_FIELD_BITS 4 +/* + * Definitions for Guarded Control Stack + */ + +#define GCS_CAP_ADDR_MASK GENMASK(63, 12) +#define GCS_CAP_ADDR_SHIFT 12 +#define GCS_CAP_ADDR_WIDTH 52 +#define GCS_CAP_ADDR(x) FIELD_GET(GCS_CAP_ADDR_MASK, x) -/* Create a mask for the feature bits of the specified feature. */ -#define ARM64_FEATURE_MASK(x) (GENMASK_ULL(x##_SHIFT + ARM64_FEATURE_FIELD_BITS - 1, x##_SHIFT)) +#define GCS_CAP_TOKEN_MASK GENMASK(11, 0) +#define GCS_CAP_TOKEN_SHIFT 0 +#define GCS_CAP_TOKEN_WIDTH 12 +#define GCS_CAP_TOKEN(x) FIELD_GET(GCS_CAP_TOKEN_MASK, x) + +#define GCS_CAP_VALID_TOKEN 0x1 +#define GCS_CAP_IN_PROGRESS_TOKEN 0x5 + +#define GCS_CAP(x) ((((unsigned long)x) & GCS_CAP_ADDR_MASK) | \ + GCS_CAP_VALID_TOKEN) +/* + * Definitions for GICv5 instructions + */ +#define GICV5_OP_GIC_CDAFF sys_insn(1, 0, 12, 1, 3) +#define GICV5_OP_GIC_CDDI sys_insn(1, 0, 12, 2, 0) +#define GICV5_OP_GIC_CDDIS sys_insn(1, 0, 12, 1, 0) +#define GICV5_OP_GIC_CDHM sys_insn(1, 0, 12, 2, 1) +#define GICV5_OP_GIC_CDEN sys_insn(1, 0, 12, 1, 1) +#define GICV5_OP_GIC_CDEOI sys_insn(1, 0, 12, 1, 7) +#define GICV5_OP_GIC_CDPEND sys_insn(1, 0, 12, 1, 4) +#define GICV5_OP_GIC_CDPRI sys_insn(1, 0, 12, 1, 2) +#define GICV5_OP_GIC_CDRCFG sys_insn(1, 0, 12, 1, 5) +#define GICV5_OP_GICR_CDIA sys_insn(1, 0, 12, 3, 0) + +/* Definitions for GIC CDAFF */ +#define GICV5_GIC_CDAFF_IAFFID_MASK GENMASK_ULL(47, 32) +#define GICV5_GIC_CDAFF_TYPE_MASK GENMASK_ULL(31, 29) +#define GICV5_GIC_CDAFF_IRM_MASK BIT_ULL(28) +#define GICV5_GIC_CDAFF_ID_MASK GENMASK_ULL(23, 0) + +/* Definitions for GIC CDDI */ +#define GICV5_GIC_CDDI_TYPE_MASK GENMASK_ULL(31, 29) +#define GICV5_GIC_CDDI_ID_MASK GENMASK_ULL(23, 0) + +/* Definitions for GIC CDDIS */ +#define GICV5_GIC_CDDIS_TYPE_MASK GENMASK_ULL(31, 29) +#define GICV5_GIC_CDDIS_TYPE(r) FIELD_GET(GICV5_GIC_CDDIS_TYPE_MASK, r) +#define GICV5_GIC_CDDIS_ID_MASK GENMASK_ULL(23, 0) +#define GICV5_GIC_CDDIS_ID(r) FIELD_GET(GICV5_GIC_CDDIS_ID_MASK, r) + +/* Definitions for GIC CDEN */ +#define GICV5_GIC_CDEN_TYPE_MASK GENMASK_ULL(31, 29) +#define GICV5_GIC_CDEN_ID_MASK GENMASK_ULL(23, 0) + +/* Definitions for GIC CDHM */ +#define GICV5_GIC_CDHM_HM_MASK BIT_ULL(32) +#define GICV5_GIC_CDHM_TYPE_MASK GENMASK_ULL(31, 29) +#define GICV5_GIC_CDHM_ID_MASK GENMASK_ULL(23, 0) + +/* Definitions for GIC CDPEND */ +#define GICV5_GIC_CDPEND_PENDING_MASK BIT_ULL(32) +#define GICV5_GIC_CDPEND_TYPE_MASK GENMASK_ULL(31, 29) +#define GICV5_GIC_CDPEND_ID_MASK GENMASK_ULL(23, 0) + +/* Definitions for GIC CDPRI */ +#define GICV5_GIC_CDPRI_PRIORITY_MASK GENMASK_ULL(39, 35) +#define GICV5_GIC_CDPRI_TYPE_MASK GENMASK_ULL(31, 29) +#define GICV5_GIC_CDPRI_ID_MASK GENMASK_ULL(23, 0) + +/* Definitions for GIC CDRCFG */ +#define GICV5_GIC_CDRCFG_TYPE_MASK GENMASK_ULL(31, 29) +#define GICV5_GIC_CDRCFG_ID_MASK GENMASK_ULL(23, 0) + +/* Definitions for GICR CDIA */ +#define GICV5_GIC_CDIA_VALID_MASK BIT_ULL(32) +#define GICV5_GICR_CDIA_VALID(r) FIELD_GET(GICV5_GIC_CDIA_VALID_MASK, r) +#define GICV5_GIC_CDIA_TYPE_MASK GENMASK_ULL(31, 29) +#define GICV5_GIC_CDIA_ID_MASK GENMASK_ULL(23, 0) + +#define gicr_insn(insn) read_sysreg_s(GICV5_OP_GICR_##insn) +#define gic_insn(v, insn) write_sysreg_s(v, GICV5_OP_GIC_##insn) + +#define ARM64_FEATURE_FIELD_BITS 4 #ifdef __ASSEMBLY__ @@ -1114,8 +1152,18 @@ __emit_inst(0xd5000000|(\sreg)|(.L__gpr_num_\rt)) .endm + .macro msr_hcr_el2, reg +#if IS_ENABLED(CONFIG_AMPERE_ERRATUM_AC04_CPU_23) + dsb nsh + msr hcr_el2, \reg + isb +#else + msr hcr_el2, \reg +#endif + .endm #else +#include <linux/bitfield.h> #include <linux/build_bug.h> #include <linux/types.h> #include <asm/alternative.h> @@ -1171,15 +1219,21 @@ /* * For registers without architectural names, or simply unsupported by * GAS. + * + * __check_r forces warnings to be generated by the compiler when + * evaluating r which wouldn't normally happen due to being passed to + * the assembler via __stringify(r). */ #define read_sysreg_s(r) ({ \ u64 __val; \ + u32 __maybe_unused __check_r = (u32)(r); \ asm volatile(__mrs_s("%0", r) : "=r" (__val)); \ __val; \ }) #define write_sysreg_s(v, r) do { \ u64 __val = (u64)(v); \ + u32 __maybe_unused __check_r = (u32)(r); \ asm volatile(__msr_s(r, "%x0") : : "rZ" (__val)); \ } while (0) @@ -1194,6 +1248,13 @@ write_sysreg(__scs_new, sysreg); \ } while (0) +#define sysreg_clear_set_hcr(clear, set) do { \ + u64 __scs_val = read_sysreg(hcr_el2); \ + u64 __scs_new = (__scs_val & ~(u64)(clear)) | (set); \ + if (__scs_new != __scs_val) \ + write_sysreg_hcr(__scs_new); \ +} while (0) + #define sysreg_clear_set_s(sysreg, clear, set) do { \ u64 __scs_val = read_sysreg_s(sysreg); \ u64 __scs_new = (__scs_val & ~(u64)(clear)) | (set); \ @@ -1201,6 +1262,17 @@ write_sysreg_s(__scs_new, sysreg); \ } while (0) +#define write_sysreg_hcr(__val) do { \ + if (IS_ENABLED(CONFIG_AMPERE_ERRATUM_AC04_CPU_23) && \ + (!system_capabilities_finalized() || \ + alternative_has_cap_unlikely(ARM64_WORKAROUND_AMPERE_AC04_CPU_23))) \ + asm volatile("dsb nsh; msr hcr_el2, %x0; isb" \ + : : "rZ" (__val)); \ + else \ + asm volatile("msr hcr_el2, %x0" \ + : : "rZ" (__val)); \ +} while (0) + #define read_sysreg_par() ({ \ u64 par; \ asm(ALTERNATIVE("nop", "dmb sy", ARM64_WORKAROUND_1508412)); \ @@ -1209,7 +1281,7 @@ par; \ }) -#endif +#define SYS_FIELD_VALUE(reg, field, val) reg##_##field##_##val #define SYS_FIELD_GET(reg, field, val) \ FIELD_GET(reg##_##field##_MASK, val) @@ -1218,6 +1290,9 @@ FIELD_PREP(reg##_##field##_MASK, val) #define SYS_FIELD_PREP_ENUM(reg, field, val) \ - FIELD_PREP(reg##_##field##_MASK, reg##_##field##_##val) + FIELD_PREP(reg##_##field##_MASK, \ + SYS_FIELD_VALUE(reg, field, val)) + +#endif #endif /* __ASM_SYSREG_H */ diff --git a/arch/arm64/include/asm/system_misc.h b/arch/arm64/include/asm/system_misc.h index 0eb7709422e2..344b1c1a4bbb 100644 --- a/arch/arm64/include/asm/system_misc.h +++ b/arch/arm64/include/asm/system_misc.h @@ -18,17 +18,13 @@ struct pt_regs; -void die(const char *msg, struct pt_regs *regs, int err); +void die(const char *msg, struct pt_regs *regs, long err); struct siginfo; void arm64_notify_die(const char *str, struct pt_regs *regs, int signo, int sicode, unsigned long far, unsigned long err); -void hook_debug_fault_code(int nr, int (*fn)(unsigned long, unsigned long, - struct pt_regs *), - int sig, int code, const char *name); - struct mm_struct; extern void __show_regs(struct pt_regs *); diff --git a/arch/arm64/include/asm/patching.h b/arch/arm64/include/asm/text-patching.h index 6bf5adc56295..587bdb91ab7a 100644 --- a/arch/arm64/include/asm/patching.h +++ b/arch/arm64/include/asm/text-patching.h @@ -7,6 +7,10 @@ int aarch64_insn_read(void *addr, u32 *insnp); int aarch64_insn_write(void *addr, u32 insn); +int aarch64_insn_write_literal_u64(void *addr, u64 val); +void *aarch64_insn_set(void *dst, u32 insn, size_t len); +void *aarch64_insn_copy(void *dst, void *src, size_t len); + int aarch64_insn_patch_text_nosync(void *addr, u32 insn); int aarch64_insn_patch_text(void *addrs[], u32 insns[], int cnt); diff --git a/arch/arm64/include/asm/thread_info.h b/arch/arm64/include/asm/thread_info.h index 848739c15de8..f241b8601ebd 100644 --- a/arch/arm64/include/asm/thread_info.h +++ b/arch/arm64/include/asm/thread_info.h @@ -55,24 +55,22 @@ struct thread_info { void arch_setup_new_exec(void); #define arch_setup_new_exec arch_setup_new_exec -void arch_release_task_struct(struct task_struct *tsk); -int arch_dup_task_struct(struct task_struct *dst, - struct task_struct *src); - #endif #define TIF_SIGPENDING 0 /* signal pending */ #define TIF_NEED_RESCHED 1 /* rescheduling necessary */ -#define TIF_NOTIFY_RESUME 2 /* callback before returning to user */ -#define TIF_FOREIGN_FPSTATE 3 /* CPU's FP state is not current's */ -#define TIF_UPROBE 4 /* uprobe breakpoint or singlestep */ -#define TIF_MTE_ASYNC_FAULT 5 /* MTE Asynchronous Tag Check Fault */ -#define TIF_NOTIFY_SIGNAL 6 /* signal notifications exist */ +#define TIF_NEED_RESCHED_LAZY 2 /* Lazy rescheduling needed */ +#define TIF_NOTIFY_RESUME 3 /* callback before returning to user */ +#define TIF_FOREIGN_FPSTATE 4 /* CPU's FP state is not current's */ +#define TIF_UPROBE 5 /* uprobe breakpoint or singlestep */ +#define TIF_MTE_ASYNC_FAULT 6 /* MTE Asynchronous Tag Check Fault */ +#define TIF_NOTIFY_SIGNAL 7 /* signal notifications exist */ #define TIF_SYSCALL_TRACE 8 /* syscall trace active */ #define TIF_SYSCALL_AUDIT 9 /* syscall auditing */ #define TIF_SYSCALL_TRACEPOINT 10 /* syscall tracepoint for ftrace */ #define TIF_SECCOMP 11 /* syscall secure computing */ #define TIF_SYSCALL_EMU 12 /* syscall emulation active */ +#define TIF_PATCH_PENDING 13 /* pending live patching update */ #define TIF_MEMDIE 18 /* is terminating due to OOM killer */ #define TIF_FREEZE 19 #define TIF_RESTORE_SIGMASK 20 @@ -84,9 +82,14 @@ int arch_dup_task_struct(struct task_struct *dst, #define TIF_TAGGED_ADDR 26 /* Allow tagged user addresses */ #define TIF_SME 27 /* SME in use */ #define TIF_SME_VL_INHERIT 28 /* Inherit SME vl_onexec across exec */ +#define TIF_KERNEL_FPSTATE 29 /* Task is in a kernel mode FPSIMD section */ +#define TIF_TSC_SIGSEGV 30 /* SIGSEGV on counter-timer access */ +#define TIF_LAZY_MMU 31 /* Task in lazy mmu mode */ +#define TIF_LAZY_MMU_PENDING 32 /* Ops pending for lazy mmu mode exit */ #define _TIF_SIGPENDING (1 << TIF_SIGPENDING) #define _TIF_NEED_RESCHED (1 << TIF_NEED_RESCHED) +#define _TIF_NEED_RESCHED_LAZY (1 << TIF_NEED_RESCHED_LAZY) #define _TIF_NOTIFY_RESUME (1 << TIF_NOTIFY_RESUME) #define _TIF_FOREIGN_FPSTATE (1 << TIF_FOREIGN_FPSTATE) #define _TIF_SYSCALL_TRACE (1 << TIF_SYSCALL_TRACE) @@ -94,17 +97,20 @@ int arch_dup_task_struct(struct task_struct *dst, #define _TIF_SYSCALL_TRACEPOINT (1 << TIF_SYSCALL_TRACEPOINT) #define _TIF_SECCOMP (1 << TIF_SECCOMP) #define _TIF_SYSCALL_EMU (1 << TIF_SYSCALL_EMU) +#define _TIF_PATCH_PENDING (1 << TIF_PATCH_PENDING) #define _TIF_UPROBE (1 << TIF_UPROBE) #define _TIF_SINGLESTEP (1 << TIF_SINGLESTEP) #define _TIF_32BIT (1 << TIF_32BIT) #define _TIF_SVE (1 << TIF_SVE) #define _TIF_MTE_ASYNC_FAULT (1 << TIF_MTE_ASYNC_FAULT) #define _TIF_NOTIFY_SIGNAL (1 << TIF_NOTIFY_SIGNAL) +#define _TIF_TSC_SIGSEGV (1 << TIF_TSC_SIGSEGV) -#define _TIF_WORK_MASK (_TIF_NEED_RESCHED | _TIF_SIGPENDING | \ +#define _TIF_WORK_MASK (_TIF_NEED_RESCHED | _TIF_NEED_RESCHED_LAZY | \ _TIF_NOTIFY_RESUME | _TIF_FOREIGN_FPSTATE | \ _TIF_UPROBE | _TIF_MTE_ASYNC_FAULT | \ - _TIF_NOTIFY_SIGNAL) + _TIF_NOTIFY_SIGNAL | _TIF_SIGPENDING | \ + _TIF_PATCH_PENDING) #define _TIF_SYSCALL_WORK (_TIF_SYSCALL_TRACE | _TIF_SYSCALL_AUDIT | \ _TIF_SYSCALL_TRACEPOINT | _TIF_SECCOMP | \ diff --git a/arch/arm64/include/asm/tlb.h b/arch/arm64/include/asm/tlb.h index c995d1f4594f..8d762607285c 100644 --- a/arch/arm64/include/asm/tlb.h +++ b/arch/arm64/include/asm/tlb.h @@ -9,12 +9,7 @@ #define __ASM_TLB_H #include <linux/pagemap.h> -#include <linux/swap.h> -static inline void __tlb_remove_table(void *_table) -{ - free_page_and_swap_cache((struct page *)_table); -} #define tlb_flush tlb_flush static void tlb_flush(struct mmu_gather *tlb); @@ -22,15 +17,15 @@ static void tlb_flush(struct mmu_gather *tlb); #include <asm-generic/tlb.h> /* - * get the tlbi levels in arm64. Default value is 0 if more than one - * of cleared_* is set or neither is set. - * Arm64 doesn't support p4ds now. + * get the tlbi levels in arm64. Default value is TLBI_TTL_UNKNOWN if more than + * one of cleared_* is set or neither is set - this elides the level hinting to + * the hardware. */ static inline int tlb_get_level(struct mmu_gather *tlb) { /* The TTL field is only valid for the leaf entry. */ if (tlb->freed_tables) - return 0; + return TLBI_TTL_UNKNOWN; if (tlb->cleared_ptes && !(tlb->cleared_pmds || tlb->cleared_puds || @@ -47,7 +42,12 @@ static inline int tlb_get_level(struct mmu_gather *tlb) tlb->cleared_p4ds)) return 1; - return 0; + if (tlb->cleared_p4ds && !(tlb->cleared_ptes || + tlb->cleared_pmds || + tlb->cleared_puds)) + return 0; + + return TLBI_TTL_UNKNOWN; } static inline void tlb_flush(struct mmu_gather *tlb) @@ -75,18 +75,18 @@ static inline void tlb_flush(struct mmu_gather *tlb) static inline void __pte_free_tlb(struct mmu_gather *tlb, pgtable_t pte, unsigned long addr) { - pgtable_pte_page_dtor(pte); - tlb_remove_table(tlb, pte); + struct ptdesc *ptdesc = page_ptdesc(pte); + + tlb_remove_ptdesc(tlb, ptdesc); } #if CONFIG_PGTABLE_LEVELS > 2 static inline void __pmd_free_tlb(struct mmu_gather *tlb, pmd_t *pmdp, unsigned long addr) { - struct page *page = virt_to_page(pmdp); + struct ptdesc *ptdesc = virt_to_ptdesc(pmdp); - pgtable_pmd_page_dtor(page); - tlb_remove_table(tlb, page); + tlb_remove_ptdesc(tlb, ptdesc); } #endif @@ -94,7 +94,25 @@ static inline void __pmd_free_tlb(struct mmu_gather *tlb, pmd_t *pmdp, static inline void __pud_free_tlb(struct mmu_gather *tlb, pud_t *pudp, unsigned long addr) { - tlb_remove_table(tlb, virt_to_page(pudp)); + struct ptdesc *ptdesc = virt_to_ptdesc(pudp); + + if (!pgtable_l4_enabled()) + return; + + tlb_remove_ptdesc(tlb, ptdesc); +} +#endif + +#if CONFIG_PGTABLE_LEVELS > 4 +static inline void __p4d_free_tlb(struct mmu_gather *tlb, p4d_t *p4dp, + unsigned long addr) +{ + struct ptdesc *ptdesc = virt_to_ptdesc(p4dp); + + if (!pgtable_l5_enabled()) + return; + + tlb_remove_ptdesc(tlb, ptdesc); } #endif diff --git a/arch/arm64/include/asm/tlbbatch.h b/arch/arm64/include/asm/tlbbatch.h new file mode 100644 index 000000000000..fedb0b87b8db --- /dev/null +++ b/arch/arm64/include/asm/tlbbatch.h @@ -0,0 +1,12 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _ARCH_ARM64_TLBBATCH_H +#define _ARCH_ARM64_TLBBATCH_H + +struct arch_tlbflush_unmap_batch { + /* + * For arm64, HW can do tlb shootdown, so we don't + * need to record cpumask for sending IPI + */ +}; + +#endif /* _ARCH_ARM64_TLBBATCH_H */ diff --git a/arch/arm64/include/asm/tlbflush.h b/arch/arm64/include/asm/tlbflush.h index 412a3b9a3c25..18a5dc0c9a54 100644 --- a/arch/arm64/include/asm/tlbflush.h +++ b/arch/arm64/include/asm/tlbflush.h @@ -13,6 +13,7 @@ #include <linux/bitfield.h> #include <linux/mm_types.h> #include <linux/sched.h> +#include <linux/mmu_notifier.h> #include <asm/cputype.h> #include <asm/mmu.h> @@ -93,19 +94,22 @@ static inline unsigned long get_trans_granule(void) * When ARMv8.4-TTL exists, TLBI operations take an additional hint for * the level at which the invalidation must take place. If the level is * wrong, no invalidation may take place. In the case where the level - * cannot be easily determined, a 0 value for the level parameter will - * perform a non-hinted invalidation. + * cannot be easily determined, the value TLBI_TTL_UNKNOWN will perform + * a non-hinted invalidation. Any provided level outside the hint range + * will also cause fall-back to non-hinted invalidation. * * For Stage-2 invalidation, use the level values provided to that effect * in asm/stage2_pgtable.h. */ #define TLBI_TTL_MASK GENMASK_ULL(47, 44) +#define TLBI_TTL_UNKNOWN INT_MAX + #define __tlbi_level(op, addr, level) do { \ u64 arg = addr; \ \ - if (cpus_have_const_cap(ARM64_HAS_ARMv8_4_TTL) && \ - level) { \ + if (alternative_has_cap_unlikely(ARM64_HAS_ARMv8_4_TTL) && \ + level >= 0 && level <= 3) { \ u64 ttl = level & 3; \ ttl |= get_trans_granule() << 2; \ arg &= ~TLBI_TTL_MASK; \ @@ -121,28 +125,41 @@ static inline unsigned long get_trans_granule(void) } while (0) /* - * This macro creates a properly formatted VA operand for the TLB RANGE. - * The value bit assignments are: + * This macro creates a properly formatted VA operand for the TLB RANGE. The + * value bit assignments are: * * +----------+------+-------+-------+-------+----------------------+ * | ASID | TG | SCALE | NUM | TTL | BADDR | * +-----------------+-------+-------+-------+----------------------+ * |63 48|47 46|45 44|43 39|38 37|36 0| * - * The address range is determined by below formula: - * [BADDR, BADDR + (NUM + 1) * 2^(5*SCALE + 1) * PAGESIZE) + * The address range is determined by below formula: [BADDR, BADDR + (NUM + 1) * + * 2^(5*SCALE + 1) * PAGESIZE) + * + * Note that the first argument, baddr, is pre-shifted; If LPA2 is in use, BADDR + * holds addr[52:16]. Else BADDR holds page number. See for example ARM DDI + * 0487J.a section C5.5.60 "TLBI VAE1IS, TLBI VAE1ISNXS, TLB Invalidate by VA, + * EL1, Inner Shareable". * */ -#define __TLBI_VADDR_RANGE(addr, asid, scale, num, ttl) \ - ({ \ - unsigned long __ta = (addr) >> PAGE_SHIFT; \ - __ta &= GENMASK_ULL(36, 0); \ - __ta |= (unsigned long)(ttl) << 37; \ - __ta |= (unsigned long)(num) << 39; \ - __ta |= (unsigned long)(scale) << 44; \ - __ta |= get_trans_granule() << 46; \ - __ta |= (unsigned long)(asid) << 48; \ - __ta; \ +#define TLBIR_ASID_MASK GENMASK_ULL(63, 48) +#define TLBIR_TG_MASK GENMASK_ULL(47, 46) +#define TLBIR_SCALE_MASK GENMASK_ULL(45, 44) +#define TLBIR_NUM_MASK GENMASK_ULL(43, 39) +#define TLBIR_TTL_MASK GENMASK_ULL(38, 37) +#define TLBIR_BADDR_MASK GENMASK_ULL(36, 0) + +#define __TLBI_VADDR_RANGE(baddr, asid, scale, num, ttl) \ + ({ \ + unsigned long __ta = 0; \ + unsigned long __ttl = (ttl >= 1 && ttl <= 3) ? ttl : 0; \ + __ta |= FIELD_PREP(TLBIR_BADDR_MASK, baddr); \ + __ta |= FIELD_PREP(TLBIR_TTL_MASK, __ttl); \ + __ta |= FIELD_PREP(TLBIR_NUM_MASK, num); \ + __ta |= FIELD_PREP(TLBIR_SCALE_MASK, scale); \ + __ta |= FIELD_PREP(TLBIR_TG_MASK, get_trans_granule()); \ + __ta |= FIELD_PREP(TLBIR_ASID_MASK, asid); \ + __ta; \ }) /* These macros are used by the TLBI RANGE feature. */ @@ -151,12 +168,18 @@ static inline unsigned long get_trans_granule(void) #define MAX_TLBI_RANGE_PAGES __TLBI_RANGE_PAGES(31, 3) /* - * Generate 'num' values from -1 to 30 with -1 rejected by the - * __flush_tlb_range() loop below. + * Generate 'num' values from -1 to 31 with -1 rejected by the + * __flush_tlb_range() loop below. Its return value is only + * significant for a maximum of MAX_TLBI_RANGE_PAGES pages. If + * 'pages' is more than that, you must iterate over the overall + * range. */ -#define TLBI_RANGE_MASK GENMASK_ULL(4, 0) -#define __TLBI_RANGE_NUM(pages, scale) \ - ((((pages) >> (5 * (scale) + 1)) & TLBI_RANGE_MASK) - 1) +#define __TLBI_RANGE_NUM(pages, scale) \ + ({ \ + int __pages = min((pages), \ + __TLBI_RANGE_PAGES(31, (scale))); \ + (__pages >> (5 * (scale) + 1)) - 1; \ + }) /* * TLB Invalidation @@ -215,12 +238,16 @@ static inline unsigned long get_trans_granule(void) * CPUs, ensuring that any walk-cache entries associated with the * translation are also invalidated. * - * __flush_tlb_range(vma, start, end, stride, last_level) + * __flush_tlb_range(vma, start, end, stride, last_level, tlb_level) * Invalidate the virtual-address range '[start, end)' on all * CPUs for the user address space corresponding to 'vma->mm'. * The invalidation operations are issued at a granularity * determined by 'stride' and only affect any walk-cache entries - * if 'last_level' is equal to false. + * if 'last_level' is equal to false. tlb_level is the level at + * which the invalidation must take place. If the level is wrong, + * no invalidation may take place. In the case where the level + * cannot be easily determined, the value TLBI_TTL_UNKNOWN will + * perform a non-hinted invalidation. * * * Finally, take a look at asm/tlb.h to see how tlb_flush() is implemented @@ -252,17 +279,26 @@ static inline void flush_tlb_mm(struct mm_struct *mm) __tlbi(aside1is, asid); __tlbi_user(aside1is, asid); dsb(ish); + mmu_notifier_arch_invalidate_secondary_tlbs(mm, 0, -1UL); } -static inline void flush_tlb_page_nosync(struct vm_area_struct *vma, - unsigned long uaddr) +static inline void __flush_tlb_page_nosync(struct mm_struct *mm, + unsigned long uaddr) { unsigned long addr; dsb(ishst); - addr = __TLBI_VADDR(uaddr, ASID(vma->vm_mm)); + addr = __TLBI_VADDR(uaddr, ASID(mm)); __tlbi(vale1is, addr); __tlbi_user(vale1is, addr); + mmu_notifier_arch_invalidate_secondary_tlbs(mm, uaddr & PAGE_MASK, + (uaddr & PAGE_MASK) + PAGE_SIZE); +} + +static inline void flush_tlb_page_nosync(struct vm_area_struct *vma, + unsigned long uaddr) +{ + return __flush_tlb_page_nosync(vma->vm_mm, uaddr); } static inline void flush_tlb_page(struct vm_area_struct *vma, @@ -272,91 +308,167 @@ static inline void flush_tlb_page(struct vm_area_struct *vma, dsb(ish); } +static inline bool arch_tlbbatch_should_defer(struct mm_struct *mm) +{ + /* + * TLB flush deferral is not required on systems which are affected by + * ARM64_WORKAROUND_REPEAT_TLBI, as __tlbi()/__tlbi_user() implementation + * will have two consecutive TLBI instructions with a dsb(ish) in between + * defeating the purpose (i.e save overall 'dsb ish' cost). + */ + if (alternative_has_cap_unlikely(ARM64_WORKAROUND_REPEAT_TLBI)) + return false; + + return true; +} + +/* + * To support TLB batched flush for multiple pages unmapping, we only send + * the TLBI for each page in arch_tlbbatch_add_pending() and wait for the + * completion at the end in arch_tlbbatch_flush(). Since we've already issued + * TLBI for each page so only a DSB is needed to synchronise its effect on the + * other CPUs. + * + * This will save the time waiting on DSB comparing issuing a TLBI;DSB sequence + * for each page. + */ +static inline void arch_tlbbatch_flush(struct arch_tlbflush_unmap_batch *batch) +{ + dsb(ish); +} + /* * This is meant to avoid soft lock-ups on large TLB flushing ranges and not * necessarily a performance improvement. */ -#define MAX_TLBI_OPS PTRS_PER_PTE +#define MAX_DVM_OPS PTRS_PER_PTE -static inline void __flush_tlb_range(struct vm_area_struct *vma, +/* + * __flush_tlb_range_op - Perform TLBI operation upon a range + * + * @op: TLBI instruction that operates on a range (has 'r' prefix) + * @start: The start address of the range + * @pages: Range as the number of pages from 'start' + * @stride: Flush granularity + * @asid: The ASID of the task (0 for IPA instructions) + * @tlb_level: Translation Table level hint, if known + * @tlbi_user: If 'true', call an additional __tlbi_user() + * (typically for user ASIDs). 'flase' for IPA instructions + * @lpa2: If 'true', the lpa2 scheme is used as set out below + * + * When the CPU does not support TLB range operations, flush the TLB + * entries one by one at the granularity of 'stride'. If the TLB + * range ops are supported, then: + * + * 1. If FEAT_LPA2 is in use, the start address of a range operation must be + * 64KB aligned, so flush pages one by one until the alignment is reached + * using the non-range operations. This step is skipped if LPA2 is not in + * use. + * + * 2. The minimum range granularity is decided by 'scale', so multiple range + * TLBI operations may be required. Start from scale = 3, flush the largest + * possible number of pages ((num+1)*2^(5*scale+1)) that fit into the + * requested range, then decrement scale and continue until one or zero pages + * are left. We must start from highest scale to ensure 64KB start alignment + * is maintained in the LPA2 case. + * + * 3. If there is 1 page remaining, flush it through non-range operations. Range + * operations can only span an even number of pages. We save this for last to + * ensure 64KB start alignment is maintained for the LPA2 case. + */ +#define __flush_tlb_range_op(op, start, pages, stride, \ + asid, tlb_level, tlbi_user, lpa2) \ +do { \ + typeof(start) __flush_start = start; \ + typeof(pages) __flush_pages = pages; \ + int num = 0; \ + int scale = 3; \ + int shift = lpa2 ? 16 : PAGE_SHIFT; \ + unsigned long addr; \ + \ + while (__flush_pages > 0) { \ + if (!system_supports_tlb_range() || \ + __flush_pages == 1 || \ + (lpa2 && __flush_start != ALIGN(__flush_start, SZ_64K))) { \ + addr = __TLBI_VADDR(__flush_start, asid); \ + __tlbi_level(op, addr, tlb_level); \ + if (tlbi_user) \ + __tlbi_user_level(op, addr, tlb_level); \ + __flush_start += stride; \ + __flush_pages -= stride >> PAGE_SHIFT; \ + continue; \ + } \ + \ + num = __TLBI_RANGE_NUM(__flush_pages, scale); \ + if (num >= 0) { \ + addr = __TLBI_VADDR_RANGE(__flush_start >> shift, asid, \ + scale, num, tlb_level); \ + __tlbi(r##op, addr); \ + if (tlbi_user) \ + __tlbi_user(r##op, addr); \ + __flush_start += __TLBI_RANGE_PAGES(num, scale) << PAGE_SHIFT; \ + __flush_pages -= __TLBI_RANGE_PAGES(num, scale);\ + } \ + scale--; \ + } \ +} while (0) + +#define __flush_s2_tlb_range_op(op, start, pages, stride, tlb_level) \ + __flush_tlb_range_op(op, start, pages, stride, 0, tlb_level, false, kvm_lpa2_is_enabled()); + +static inline bool __flush_tlb_range_limit_excess(unsigned long start, + unsigned long end, unsigned long pages, unsigned long stride) +{ + /* + * When the system does not support TLB range based flush + * operation, (MAX_DVM_OPS - 1) pages can be handled. But + * with TLB range based operation, MAX_TLBI_RANGE_PAGES + * pages can be handled. + */ + if ((!system_supports_tlb_range() && + (end - start) >= (MAX_DVM_OPS * stride)) || + pages > MAX_TLBI_RANGE_PAGES) + return true; + + return false; +} + +static inline void __flush_tlb_range_nosync(struct mm_struct *mm, unsigned long start, unsigned long end, unsigned long stride, bool last_level, int tlb_level) { - int num = 0; - int scale = 0; - unsigned long asid, addr, pages; + unsigned long asid, pages; start = round_down(start, stride); end = round_up(end, stride); pages = (end - start) >> PAGE_SHIFT; - /* - * When not uses TLB range ops, we can handle up to - * (MAX_TLBI_OPS - 1) pages; - * When uses TLB range ops, we can handle up to - * (MAX_TLBI_RANGE_PAGES - 1) pages. - */ - if ((!system_supports_tlb_range() && - (end - start) >= (MAX_TLBI_OPS * stride)) || - pages >= MAX_TLBI_RANGE_PAGES) { - flush_tlb_mm(vma->vm_mm); + if (__flush_tlb_range_limit_excess(start, end, pages, stride)) { + flush_tlb_mm(mm); return; } dsb(ishst); - asid = ASID(vma->vm_mm); + asid = ASID(mm); - /* - * When the CPU does not support TLB range operations, flush the TLB - * entries one by one at the granularity of 'stride'. If the TLB - * range ops are supported, then: - * - * 1. If 'pages' is odd, flush the first page through non-range - * operations; - * - * 2. For remaining pages: the minimum range granularity is decided - * by 'scale', so multiple range TLBI operations may be required. - * Start from scale = 0, flush the corresponding number of pages - * ((num+1)*2^(5*scale+1) starting from 'addr'), then increase it - * until no pages left. - * - * Note that certain ranges can be represented by either num = 31 and - * scale or num = 0 and scale + 1. The loop below favours the latter - * since num is limited to 30 by the __TLBI_RANGE_NUM() macro. - */ - while (pages > 0) { - if (!system_supports_tlb_range() || - pages % 2 == 1) { - addr = __TLBI_VADDR(start, asid); - if (last_level) { - __tlbi_level(vale1is, addr, tlb_level); - __tlbi_user_level(vale1is, addr, tlb_level); - } else { - __tlbi_level(vae1is, addr, tlb_level); - __tlbi_user_level(vae1is, addr, tlb_level); - } - start += stride; - pages -= stride >> PAGE_SHIFT; - continue; - } - - num = __TLBI_RANGE_NUM(pages, scale); - if (num >= 0) { - addr = __TLBI_VADDR_RANGE(start, asid, scale, - num, tlb_level); - if (last_level) { - __tlbi(rvale1is, addr); - __tlbi_user(rvale1is, addr); - } else { - __tlbi(rvae1is, addr); - __tlbi_user(rvae1is, addr); - } - start += __TLBI_RANGE_PAGES(num, scale) << PAGE_SHIFT; - pages -= __TLBI_RANGE_PAGES(num, scale); - } - scale++; - } + if (last_level) + __flush_tlb_range_op(vale1is, start, pages, stride, asid, + tlb_level, true, lpa2_is_enabled()); + else + __flush_tlb_range_op(vae1is, start, pages, stride, asid, + tlb_level, true, lpa2_is_enabled()); + + mmu_notifier_arch_invalidate_secondary_tlbs(mm, start, end); +} + +static inline void __flush_tlb_range(struct vm_area_struct *vma, + unsigned long start, unsigned long end, + unsigned long stride, bool last_level, + int tlb_level) +{ + __flush_tlb_range_nosync(vma->vm_mm, start, end, stride, + last_level, tlb_level); dsb(ish); } @@ -366,26 +478,29 @@ static inline void flush_tlb_range(struct vm_area_struct *vma, /* * We cannot use leaf-only invalidation here, since we may be invalidating * table entries as part of collapsing hugepages or moving page tables. - * Set the tlb_level to 0 because we can not get enough information here. + * Set the tlb_level to TLBI_TTL_UNKNOWN because we can not get enough + * information here. */ - __flush_tlb_range(vma, start, end, PAGE_SIZE, false, 0); + __flush_tlb_range(vma, start, end, PAGE_SIZE, false, TLBI_TTL_UNKNOWN); } static inline void flush_tlb_kernel_range(unsigned long start, unsigned long end) { - unsigned long addr; + const unsigned long stride = PAGE_SIZE; + unsigned long pages; - if ((end - start) > (MAX_TLBI_OPS * PAGE_SIZE)) { + start = round_down(start, stride); + end = round_up(end, stride); + pages = (end - start) >> PAGE_SHIFT; + + if (__flush_tlb_range_limit_excess(start, end, pages, stride)) { flush_tlb_all(); return; } - start = __TLBI_VADDR(start, 0); - end = __TLBI_VADDR(end, 0); - dsb(ishst); - for (addr = start; addr < end; addr += 1 << (PAGE_SHIFT - 12)) - __tlbi(vaale1is, addr); + __flush_tlb_range_op(vaale1is, start, pages, stride, 0, + TLBI_TTL_UNKNOWN, false, lpa2_is_enabled()); dsb(ish); isb(); } @@ -403,6 +518,12 @@ static inline void __flush_tlb_kernel_pgtable(unsigned long kaddr) dsb(ish); isb(); } + +static inline void arch_tlbbatch_add_pending(struct arch_tlbflush_unmap_batch *batch, + struct mm_struct *mm, unsigned long start, unsigned long end) +{ + __flush_tlb_range_nosync(mm, start, end, PAGE_SIZE, true, 3); +} #endif #endif diff --git a/arch/arm64/include/asm/topology.h b/arch/arm64/include/asm/topology.h index 9fab663dd2de..341174bf9106 100644 --- a/arch/arm64/include/asm/topology.h +++ b/arch/arm64/include/asm/topology.h @@ -5,6 +5,7 @@ #include <linux/cpumask.h> #ifdef CONFIG_NUMA +#include <asm/numa.h> struct pci_bus; int pcibus_to_node(struct pci_bus *bus); @@ -23,10 +24,7 @@ void update_freq_counters_refs(void); #define arch_set_freq_scale topology_set_freq_scale #define arch_scale_freq_capacity topology_get_freq_scale #define arch_scale_freq_invariant topology_scale_freq_invariant - -#ifdef CONFIG_ACPI_CPPC_LIB -#define arch_init_invariance_cppc topology_init_cpu_capacity_cppc -#endif +#define arch_scale_freq_ref topology_get_freq_ref /* Replace task scheduler's default cpu-invariant accounting */ #define arch_scale_cpu_capacity topology_get_cpu_scale @@ -34,9 +32,9 @@ void update_freq_counters_refs(void); /* Enable topology flag updates */ #define arch_update_cpu_topology topology_update_cpu_topology -/* Replace task scheduler's default thermal pressure API */ -#define arch_scale_thermal_pressure topology_get_thermal_pressure -#define arch_update_thermal_pressure topology_update_thermal_pressure +/* Replace task scheduler's default HW pressure API */ +#define arch_scale_hw_pressure topology_get_hw_pressure +#define arch_update_hw_pressure topology_update_hw_pressure #include <asm-generic/topology.h> diff --git a/arch/arm64/include/asm/traps.h b/arch/arm64/include/asm/traps.h index 6e5826470bea..e3e8944a71c3 100644 --- a/arch/arm64/include/asm/traps.h +++ b/arch/arm64/include/asm/traps.h @@ -9,27 +9,34 @@ #include <linux/list.h> #include <asm/esr.h> +#include <asm/ptrace.h> #include <asm/sections.h> -struct pt_regs; - -struct undef_hook { - struct list_head node; - u32 instr_mask; - u32 instr_val; - u64 pstate_mask; - u64 pstate_val; - int (*fn)(struct pt_regs *regs, u32 instr); -}; +#ifdef CONFIG_ARMV8_DEPRECATED +bool try_emulate_armv8_deprecated(struct pt_regs *regs, u32 insn); +#else +static inline bool +try_emulate_armv8_deprecated(struct pt_regs *regs, u32 insn) +{ + return false; +} +#endif /* CONFIG_ARMV8_DEPRECATED */ -void register_undef_hook(struct undef_hook *hook); -void unregister_undef_hook(struct undef_hook *hook); void force_signal_inject(int signal, int code, unsigned long address, unsigned long err); void arm64_notify_segfault(unsigned long addr); void arm64_force_sig_fault(int signo, int code, unsigned long far, const char *str); +void arm64_force_sig_fault_pkey(unsigned long far, const char *str, int pkey); void arm64_force_sig_mceerr(int code, unsigned long far, short lsb, const char *str); void arm64_force_sig_ptrace_errno_trap(int errno, unsigned long far, const char *str); +int bug_brk_handler(struct pt_regs *regs, unsigned long esr); +int cfi_brk_handler(struct pt_regs *regs, unsigned long esr); +int reserved_fault_brk_handler(struct pt_regs *regs, unsigned long esr); +int kasan_brk_handler(struct pt_regs *regs, unsigned long esr); +int ubsan_brk_handler(struct pt_regs *regs, unsigned long esr); + +int early_brk64(unsigned long addr, unsigned long esr, struct pt_regs *regs); + /* * Move regs->pc to next instruction and do necessary setup before it * is executed. @@ -100,4 +107,55 @@ static inline unsigned long arm64_ras_serror_get_severity(unsigned long esr) bool arm64_is_fatal_ras_serror(struct pt_regs *regs, unsigned long esr); void __noreturn arm64_serror_panic(struct pt_regs *regs, unsigned long esr); + +static inline void arm64_mops_reset_regs(struct user_pt_regs *regs, unsigned long esr) +{ + bool wrong_option = esr & ESR_ELx_MOPS_ISS_WRONG_OPTION; + bool option_a = esr & ESR_ELx_MOPS_ISS_OPTION_A; + int dstreg = ESR_ELx_MOPS_ISS_DESTREG(esr); + int srcreg = ESR_ELx_MOPS_ISS_SRCREG(esr); + int sizereg = ESR_ELx_MOPS_ISS_SIZEREG(esr); + unsigned long dst, size; + + dst = regs->regs[dstreg]; + size = regs->regs[sizereg]; + + /* + * Put the registers back in the original format suitable for a + * prologue instruction, using the generic return routine from the + * Arm ARM (DDI 0487I.a) rules CNTMJ and MWFQH. + */ + if (esr & ESR_ELx_MOPS_ISS_MEM_INST) { + /* SET* instruction */ + if (option_a ^ wrong_option) { + /* Format is from Option A; forward set */ + regs->regs[dstreg] = dst + size; + regs->regs[sizereg] = -size; + } + } else { + /* CPY* instruction */ + unsigned long src = regs->regs[srcreg]; + if (!(option_a ^ wrong_option)) { + /* Format is from Option B */ + if (regs->pstate & PSR_N_BIT) { + /* Backward copy */ + regs->regs[dstreg] = dst - size; + regs->regs[srcreg] = src - size; + } + } else { + /* Format is from Option A */ + if (size & BIT(63)) { + /* Forward copy */ + regs->regs[dstreg] = dst + size; + regs->regs[srcreg] = src + size; + regs->regs[sizereg] = -size; + } + } + } + + if (esr & ESR_ELx_MOPS_ISS_FROM_EPILOGUE) + regs->pc -= 8; + else + regs->pc -= 4; +} #endif diff --git a/arch/arm64/include/asm/uaccess.h b/arch/arm64/include/asm/uaccess.h index 2fc9f0861769..5b91803201ef 100644 --- a/arch/arm64/include/asm/uaccess.h +++ b/arch/arm64/include/asm/uaccess.h @@ -65,7 +65,6 @@ static inline void __uaccess_ttbr0_disable(void) ttbr &= ~TTBR_ASID_MASK; /* reserved_pg_dir placed before swapper_pg_dir */ write_sysreg(ttbr - RESERVED_SWAPPER_OFFSET, ttbr0_el1); - isb(); /* Set reserved ASID */ write_sysreg(ttbr, ttbr1_el1); isb(); @@ -89,7 +88,6 @@ static inline void __uaccess_ttbr0_enable(void) ttbr1 &= ~TTBR_ASID_MASK; /* safety measure */ ttbr1 |= ttbr0 & TTBR_ASID_MASK; write_sysreg(ttbr1, ttbr1_el1); - isb(); /* Restore user page table */ write_sysreg(ttbr0, ttbr0_el1); @@ -136,55 +134,9 @@ static inline void __uaccess_enable_hw_pan(void) CONFIG_ARM64_PAN)); } -/* - * The Tag Check Flag (TCF) mode for MTE is per EL, hence TCF0 - * affects EL0 and TCF affects EL1 irrespective of which TTBR is - * used. - * The kernel accesses TTBR0 usually with LDTR/STTR instructions - * when UAO is available, so these would act as EL0 accesses using - * TCF0. - * However futex.h code uses exclusives which would be executed as - * EL1, this can potentially cause a tag check fault even if the - * user disables TCF0. - * - * To address the problem we set the PSTATE.TCO bit in uaccess_enable() - * and reset it in uaccess_disable(). - * - * The Tag check override (TCO) bit disables temporarily the tag checking - * preventing the issue. - */ -static inline void __uaccess_disable_tco(void) -{ - asm volatile(ALTERNATIVE("nop", SET_PSTATE_TCO(0), - ARM64_MTE, CONFIG_KASAN_HW_TAGS)); -} - -static inline void __uaccess_enable_tco(void) -{ - asm volatile(ALTERNATIVE("nop", SET_PSTATE_TCO(1), - ARM64_MTE, CONFIG_KASAN_HW_TAGS)); -} - -/* - * These functions disable tag checking only if in MTE async mode - * since the sync mode generates exceptions synchronously and the - * nofault or load_unaligned_zeropad can handle them. - */ -static inline void __uaccess_disable_tco_async(void) -{ - if (system_uses_mte_async_or_asymm_mode()) - __uaccess_disable_tco(); -} - -static inline void __uaccess_enable_tco_async(void) -{ - if (system_uses_mte_async_or_asymm_mode()) - __uaccess_enable_tco(); -} - static inline void uaccess_disable_privileged(void) { - __uaccess_disable_tco(); + mte_disable_tco(); if (uaccess_ttbr0_disable()) return; @@ -194,7 +146,7 @@ static inline void uaccess_disable_privileged(void) static inline void uaccess_enable_privileged(void) { - __uaccess_enable_tco(); + mte_enable_tco(); if (uaccess_ttbr0_enable()) return; @@ -203,9 +155,11 @@ static inline void uaccess_enable_privileged(void) } /* - * Sanitise a uaccess pointer such that it becomes NULL if above the maximum - * user address. In case the pointer is tagged (has the top byte set), untag - * the pointer before checking. + * Sanitize a uaccess pointer such that it cannot reach any kernel address. + * + * Clearing bit 55 ensures the pointer cannot address any portion of the TTBR1 + * address range (i.e. any kernel address), and either the pointer falls within + * the TTBR0 address range or must cause a fault. */ #define uaccess_mask_ptr(ptr) (__typeof__(ptr))__uaccess_mask_ptr(ptr) static inline void __user *__uaccess_mask_ptr(const void __user *ptr) @@ -213,14 +167,12 @@ static inline void __user *__uaccess_mask_ptr(const void __user *ptr) void __user *safe_ptr; asm volatile( - " bics xzr, %3, %2\n" - " csel %0, %1, xzr, eq\n" - : "=&r" (safe_ptr) - : "r" (ptr), "r" (TASK_SIZE_MAX - 1), - "r" (untagged_addr(ptr)) - : "cc"); - - csdb(); + " bic %0, %1, %2\n" + : "=r" (safe_ptr) + : "r" (ptr), + "i" (BIT(55)) + ); + return safe_ptr; } @@ -232,29 +184,40 @@ static inline void __user *__uaccess_mask_ptr(const void __user *ptr) * The "__xxx_error" versions set the third argument to -EFAULT if an error * occurs, and leave it unchanged on success. */ -#define __get_mem_asm(load, reg, x, addr, err, type) \ +#ifdef CONFIG_CC_HAS_ASM_GOTO_OUTPUT +#define __get_mem_asm(load, reg, x, addr, label, type) \ + asm_goto_output( \ + "1: " load " " reg "0, [%1]\n" \ + _ASM_EXTABLE_##type##ACCESS(1b, %l2) \ + : "=r" (x) \ + : "r" (addr) : : label) +#else +#define __get_mem_asm(load, reg, x, addr, label, type) do { \ + int __gma_err = 0; \ asm volatile( \ "1: " load " " reg "1, [%2]\n" \ "2:\n" \ _ASM_EXTABLE_##type##ACCESS_ERR_ZERO(1b, 2b, %w0, %w1) \ - : "+r" (err), "=&r" (x) \ - : "r" (addr)) + : "+r" (__gma_err), "=r" (x) \ + : "r" (addr)); \ + if (__gma_err) goto label; } while (0) +#endif -#define __raw_get_mem(ldr, x, ptr, err, type) \ +#define __raw_get_mem(ldr, x, ptr, label, type) \ do { \ unsigned long __gu_val; \ switch (sizeof(*(ptr))) { \ case 1: \ - __get_mem_asm(ldr "b", "%w", __gu_val, (ptr), (err), type); \ + __get_mem_asm(ldr "b", "%w", __gu_val, (ptr), label, type); \ break; \ case 2: \ - __get_mem_asm(ldr "h", "%w", __gu_val, (ptr), (err), type); \ + __get_mem_asm(ldr "h", "%w", __gu_val, (ptr), label, type); \ break; \ case 4: \ - __get_mem_asm(ldr, "%w", __gu_val, (ptr), (err), type); \ + __get_mem_asm(ldr, "%w", __gu_val, (ptr), label, type); \ break; \ case 8: \ - __get_mem_asm(ldr, "%x", __gu_val, (ptr), (err), type); \ + __get_mem_asm(ldr, "%x", __gu_val, (ptr), label, type); \ break; \ default: \ BUILD_BUG(); \ @@ -267,27 +230,34 @@ do { \ * uaccess_ttbr0_disable(). As `x` and `ptr` could contain blocking functions, * we must evaluate these outside of the critical section. */ -#define __raw_get_user(x, ptr, err) \ +#define __raw_get_user(x, ptr, label) \ do { \ __typeof__(*(ptr)) __user *__rgu_ptr = (ptr); \ __typeof__(x) __rgu_val; \ __chk_user_ptr(ptr); \ - \ - uaccess_ttbr0_enable(); \ - __raw_get_mem("ldtr", __rgu_val, __rgu_ptr, err, U); \ - uaccess_ttbr0_disable(); \ - \ - (x) = __rgu_val; \ + do { \ + __label__ __rgu_failed; \ + uaccess_ttbr0_enable(); \ + __raw_get_mem("ldtr", __rgu_val, __rgu_ptr, __rgu_failed, U); \ + uaccess_ttbr0_disable(); \ + (x) = __rgu_val; \ + break; \ + __rgu_failed: \ + uaccess_ttbr0_disable(); \ + goto label; \ + } while (0); \ } while (0) #define __get_user_error(x, ptr, err) \ do { \ + __label__ __gu_failed; \ __typeof__(*(ptr)) __user *__p = (ptr); \ might_fault(); \ if (access_ok(__p, sizeof(*__p))) { \ __p = uaccess_mask_ptr(__p); \ - __raw_get_user((x), __p, (err)); \ + __raw_get_user((x), __p, __gu_failed); \ } else { \ + __gu_failed: \ (x) = (__force __typeof__(x))0; (err) = -EFAULT; \ } \ } while (0) @@ -302,48 +272,50 @@ do { \ #define get_user __get_user /* - * We must not call into the scheduler between __uaccess_enable_tco_async() and - * __uaccess_disable_tco_async(). As `dst` and `src` may contain blocking + * We must not call into the scheduler between __mte_enable_tco_async() and + * __mte_disable_tco_async(). As `dst` and `src` may contain blocking * functions, we must evaluate these outside of the critical section. */ #define __get_kernel_nofault(dst, src, type, err_label) \ do { \ __typeof__(dst) __gkn_dst = (dst); \ __typeof__(src) __gkn_src = (src); \ - int __gkn_err = 0; \ + do { \ + __label__ __gkn_label; \ \ - __uaccess_enable_tco_async(); \ - __raw_get_mem("ldr", *((type *)(__gkn_dst)), \ - (__force type *)(__gkn_src), __gkn_err, K); \ - __uaccess_disable_tco_async(); \ - \ - if (unlikely(__gkn_err)) \ + __mte_enable_tco_async(); \ + __raw_get_mem("ldr", *((type *)(__gkn_dst)), \ + (__force type *)(__gkn_src), __gkn_label, K); \ + __mte_disable_tco_async(); \ + break; \ + __gkn_label: \ + __mte_disable_tco_async(); \ goto err_label; \ + } while (0); \ } while (0) -#define __put_mem_asm(store, reg, x, addr, err, type) \ - asm volatile( \ - "1: " store " " reg "1, [%2]\n" \ +#define __put_mem_asm(store, reg, x, addr, label, type) \ + asm goto( \ + "1: " store " " reg "0, [%1]\n" \ "2:\n" \ - _ASM_EXTABLE_##type##ACCESS_ERR(1b, 2b, %w0) \ - : "+r" (err) \ - : "r" (x), "r" (addr)) + _ASM_EXTABLE_##type##ACCESS(1b, %l2) \ + : : "rZ" (x), "r" (addr) : : label) -#define __raw_put_mem(str, x, ptr, err, type) \ +#define __raw_put_mem(str, x, ptr, label, type) \ do { \ __typeof__(*(ptr)) __pu_val = (x); \ switch (sizeof(*(ptr))) { \ case 1: \ - __put_mem_asm(str "b", "%w", __pu_val, (ptr), (err), type); \ + __put_mem_asm(str "b", "%w", __pu_val, (ptr), label, type); \ break; \ case 2: \ - __put_mem_asm(str "h", "%w", __pu_val, (ptr), (err), type); \ + __put_mem_asm(str "h", "%w", __pu_val, (ptr), label, type); \ break; \ case 4: \ - __put_mem_asm(str, "%w", __pu_val, (ptr), (err), type); \ + __put_mem_asm(str, "%w", __pu_val, (ptr), label, type); \ break; \ case 8: \ - __put_mem_asm(str, "%x", __pu_val, (ptr), (err), type); \ + __put_mem_asm(str, "%x", __pu_val, (ptr), label, type); \ break; \ default: \ BUILD_BUG(); \ @@ -355,25 +327,34 @@ do { \ * uaccess_ttbr0_disable(). As `x` and `ptr` could contain blocking functions, * we must evaluate these outside of the critical section. */ -#define __raw_put_user(x, ptr, err) \ +#define __raw_put_user(x, ptr, label) \ do { \ + __label__ __rpu_failed; \ __typeof__(*(ptr)) __user *__rpu_ptr = (ptr); \ __typeof__(*(ptr)) __rpu_val = (x); \ __chk_user_ptr(__rpu_ptr); \ \ - uaccess_ttbr0_enable(); \ - __raw_put_mem("sttr", __rpu_val, __rpu_ptr, err, U); \ - uaccess_ttbr0_disable(); \ + do { \ + uaccess_ttbr0_enable(); \ + __raw_put_mem("sttr", __rpu_val, __rpu_ptr, __rpu_failed, U); \ + uaccess_ttbr0_disable(); \ + break; \ + __rpu_failed: \ + uaccess_ttbr0_disable(); \ + goto label; \ + } while (0); \ } while (0) #define __put_user_error(x, ptr, err) \ do { \ + __label__ __pu_failed; \ __typeof__(*(ptr)) __user *__p = (ptr); \ might_fault(); \ if (access_ok(__p, sizeof(*__p))) { \ __p = uaccess_mask_ptr(__p); \ - __raw_put_user((x), __p, (err)); \ + __raw_put_user((x), __p, __pu_failed); \ } else { \ + __pu_failed: \ (err) = -EFAULT; \ } \ } while (0) @@ -388,23 +369,26 @@ do { \ #define put_user __put_user /* - * We must not call into the scheduler between __uaccess_enable_tco_async() and - * __uaccess_disable_tco_async(). As `dst` and `src` may contain blocking + * We must not call into the scheduler between __mte_enable_tco_async() and + * __mte_disable_tco_async(). As `dst` and `src` may contain blocking * functions, we must evaluate these outside of the critical section. */ #define __put_kernel_nofault(dst, src, type, err_label) \ do { \ __typeof__(dst) __pkn_dst = (dst); \ __typeof__(src) __pkn_src = (src); \ - int __pkn_err = 0; \ - \ - __uaccess_enable_tco_async(); \ - __raw_put_mem("str", *((type *)(__pkn_src)), \ - (__force type *)(__pkn_dst), __pkn_err, K); \ - __uaccess_disable_tco_async(); \ \ - if (unlikely(__pkn_err)) \ + do { \ + __label__ __pkn_err; \ + __mte_enable_tco_async(); \ + __raw_put_mem("str", *((type *)(__pkn_src)), \ + (__force type *)(__pkn_dst), __pkn_err, K); \ + __mte_disable_tco_async(); \ + break; \ + __pkn_err: \ + __mte_disable_tco_async(); \ goto err_label; \ + } while (0); \ } while(0) extern unsigned long __must_check __arch_copy_from_user(void *to, const void __user *from, unsigned long n); @@ -429,6 +413,51 @@ extern unsigned long __must_check __arch_copy_to_user(void __user *to, const voi __actu_ret; \ }) +static __must_check __always_inline bool user_access_begin(const void __user *ptr, size_t len) +{ + if (unlikely(!access_ok(ptr,len))) + return 0; + uaccess_ttbr0_enable(); + return 1; +} +#define user_access_begin(a,b) user_access_begin(a,b) +#define user_access_end() uaccess_ttbr0_disable() +#define unsafe_put_user(x, ptr, label) \ + __raw_put_mem("sttr", x, uaccess_mask_ptr(ptr), label, U) +#define unsafe_get_user(x, ptr, label) \ + __raw_get_mem("ldtr", x, uaccess_mask_ptr(ptr), label, U) + +/* + * KCSAN uses these to save and restore ttbr state. + * We do not support KCSAN with ARM64_SW_TTBR0_PAN, so + * they are no-ops. + */ +static inline unsigned long user_access_save(void) { return 0; } +static inline void user_access_restore(unsigned long enabled) { } + +/* + * We want the unsafe accessors to always be inlined and use + * the error labels - thus the macro games. + */ +#define unsafe_copy_loop(dst, src, len, type, label) \ + while (len >= sizeof(type)) { \ + unsafe_put_user(*(type *)(src),(type __user *)(dst),label); \ + dst += sizeof(type); \ + src += sizeof(type); \ + len -= sizeof(type); \ + } + +#define unsafe_copy_to_user(_dst,_src,_len,label) \ +do { \ + char __user *__ucu_dst = (_dst); \ + const char *__ucu_src = (_src); \ + size_t __ucu_len = (_len); \ + unsafe_copy_loop(__ucu_dst, __ucu_src, __ucu_len, u64, label); \ + unsafe_copy_loop(__ucu_dst, __ucu_src, __ucu_len, u32, label); \ + unsafe_copy_loop(__ucu_dst, __ucu_src, __ucu_len, u16, label); \ + unsafe_copy_loop(__ucu_dst, __ucu_src, __ucu_len, u8, label); \ +} while (0) + #define INLINE_COPY_TO_USER #define INLINE_COPY_FROM_USER @@ -449,8 +478,6 @@ extern long strncpy_from_user(char *dest, const char __user *src, long count); extern __must_check long strnlen_user(const char __user *str, long n); #ifdef CONFIG_ARCH_HAS_UACCESS_FLUSHCACHE -struct page; -void memcpy_page_flushcache(char *to, struct page *page, size_t offset, size_t len); extern unsigned long __must_check __copy_user_flushcache(void *to, const void __user *from, unsigned long n); static inline int __copy_from_user_flushcache(void *dst, const void __user *src, unsigned size) @@ -475,4 +502,44 @@ static inline size_t probe_subpage_writeable(const char __user *uaddr, #endif /* CONFIG_ARCH_HAS_SUBPAGE_FAULTS */ +#ifdef CONFIG_ARM64_GCS + +static inline int gcssttr(unsigned long __user *addr, unsigned long val) +{ + register unsigned long __user *_addr __asm__ ("x0") = addr; + register unsigned long _val __asm__ ("x1") = val; + int err = 0; + + /* GCSSTTR x1, x0 */ + asm volatile( + "1: .inst 0xd91f1c01\n" + "2: \n" + _ASM_EXTABLE_UACCESS_ERR(1b, 2b, %w0) + : "+r" (err) + : "rZ" (_val), "r" (_addr) + : "memory"); + + return err; +} + +static inline void put_user_gcs(unsigned long val, unsigned long __user *addr, + int *err) +{ + int ret; + + if (!access_ok((char __user *)addr, sizeof(u64))) { + *err = -EFAULT; + return; + } + + uaccess_ttbr0_enable(); + ret = gcssttr(addr, val); + if (ret != 0) + *err = ret; + uaccess_ttbr0_disable(); +} + + +#endif /* CONFIG_ARM64_GCS */ + #endif /* __ASM_UACCESS_H */ diff --git a/arch/arm64/include/asm/unistd.h b/arch/arm64/include/asm/unistd.h index 037feba03a51..80618c9bbcd8 100644 --- a/arch/arm64/include/asm/unistd.h +++ b/arch/arm64/include/asm/unistd.h @@ -17,35 +17,17 @@ #define __ARCH_WANT_SYS_VFORK /* - * Compat syscall numbers used by the AArch64 kernel. - */ -#define __NR_compat_restart_syscall 0 -#define __NR_compat_exit 1 -#define __NR_compat_read 3 -#define __NR_compat_write 4 -#define __NR_compat_gettimeofday 78 -#define __NR_compat_sigreturn 119 -#define __NR_compat_rt_sigreturn 173 -#define __NR_compat_clock_gettime 263 -#define __NR_compat_clock_getres 264 -#define __NR_compat_clock_gettime64 403 -#define __NR_compat_clock_getres_time64 406 - -/* * The following SVCs are ARM private. */ #define __ARM_NR_COMPAT_BASE 0x0f0000 #define __ARM_NR_compat_cacheflush (__ARM_NR_COMPAT_BASE + 2) #define __ARM_NR_compat_set_tls (__ARM_NR_COMPAT_BASE + 5) #define __ARM_NR_COMPAT_END (__ARM_NR_COMPAT_BASE + 0x800) - -#define __NR_compat_syscalls 451 #endif #define __ARCH_WANT_SYS_CLONE +#define __ARCH_WANT_NEW_STAT -#ifndef __COMPAT_SYSCALL_NR -#include <uapi/asm/unistd.h> -#endif +#include <asm/unistd_64.h> #define NR_syscalls (__NR_syscalls) diff --git a/arch/arm64/include/asm/unistd32.h b/arch/arm64/include/asm/unistd32.h index 604a2053d006..e0b1a0b57f75 100644 --- a/arch/arm64/include/asm/unistd32.h +++ b/arch/arm64/include/asm/unistd32.h @@ -1,914 +1,9 @@ /* SPDX-License-Identifier: GPL-2.0-only */ -/* - * AArch32 (compat) system call definitions. - * - * Copyright (C) 2001-2005 Russell King - * Copyright (C) 2012 ARM Ltd. - */ +#ifndef _UAPI__ASM_ARM_UNISTD_H +#define _UAPI__ASM_ARM_UNISTD_H -#ifndef __SYSCALL -#define __SYSCALL(x, y) -#endif +#include <asm/unistd_32.h> -#define __NR_restart_syscall 0 -__SYSCALL(__NR_restart_syscall, sys_restart_syscall) -#define __NR_exit 1 -__SYSCALL(__NR_exit, sys_exit) -#define __NR_fork 2 -__SYSCALL(__NR_fork, sys_fork) -#define __NR_read 3 -__SYSCALL(__NR_read, sys_read) -#define __NR_write 4 -__SYSCALL(__NR_write, sys_write) -#define __NR_open 5 -__SYSCALL(__NR_open, compat_sys_open) -#define __NR_close 6 -__SYSCALL(__NR_close, sys_close) - /* 7 was sys_waitpid */ -__SYSCALL(7, sys_ni_syscall) -#define __NR_creat 8 -__SYSCALL(__NR_creat, sys_creat) -#define __NR_link 9 -__SYSCALL(__NR_link, sys_link) -#define __NR_unlink 10 -__SYSCALL(__NR_unlink, sys_unlink) -#define __NR_execve 11 -__SYSCALL(__NR_execve, compat_sys_execve) -#define __NR_chdir 12 -__SYSCALL(__NR_chdir, sys_chdir) - /* 13 was sys_time */ -__SYSCALL(13, sys_ni_syscall) -#define __NR_mknod 14 -__SYSCALL(__NR_mknod, sys_mknod) -#define __NR_chmod 15 -__SYSCALL(__NR_chmod, sys_chmod) -#define __NR_lchown 16 -__SYSCALL(__NR_lchown, sys_lchown16) - /* 17 was sys_break */ -__SYSCALL(17, sys_ni_syscall) - /* 18 was sys_stat */ -__SYSCALL(18, sys_ni_syscall) -#define __NR_lseek 19 -__SYSCALL(__NR_lseek, compat_sys_lseek) -#define __NR_getpid 20 -__SYSCALL(__NR_getpid, sys_getpid) -#define __NR_mount 21 -__SYSCALL(__NR_mount, sys_mount) - /* 22 was sys_umount */ -__SYSCALL(22, sys_ni_syscall) -#define __NR_setuid 23 -__SYSCALL(__NR_setuid, sys_setuid16) -#define __NR_getuid 24 -__SYSCALL(__NR_getuid, sys_getuid16) - /* 25 was sys_stime */ -__SYSCALL(25, sys_ni_syscall) -#define __NR_ptrace 26 -__SYSCALL(__NR_ptrace, compat_sys_ptrace) - /* 27 was sys_alarm */ -__SYSCALL(27, sys_ni_syscall) - /* 28 was sys_fstat */ -__SYSCALL(28, sys_ni_syscall) -#define __NR_pause 29 -__SYSCALL(__NR_pause, sys_pause) - /* 30 was sys_utime */ -__SYSCALL(30, sys_ni_syscall) - /* 31 was sys_stty */ -__SYSCALL(31, sys_ni_syscall) - /* 32 was sys_gtty */ -__SYSCALL(32, sys_ni_syscall) -#define __NR_access 33 -__SYSCALL(__NR_access, sys_access) -#define __NR_nice 34 -__SYSCALL(__NR_nice, sys_nice) - /* 35 was sys_ftime */ -__SYSCALL(35, sys_ni_syscall) -#define __NR_sync 36 -__SYSCALL(__NR_sync, sys_sync) -#define __NR_kill 37 -__SYSCALL(__NR_kill, sys_kill) -#define __NR_rename 38 -__SYSCALL(__NR_rename, sys_rename) -#define __NR_mkdir 39 -__SYSCALL(__NR_mkdir, sys_mkdir) -#define __NR_rmdir 40 -__SYSCALL(__NR_rmdir, sys_rmdir) -#define __NR_dup 41 -__SYSCALL(__NR_dup, sys_dup) -#define __NR_pipe 42 -__SYSCALL(__NR_pipe, sys_pipe) -#define __NR_times 43 -__SYSCALL(__NR_times, compat_sys_times) - /* 44 was sys_prof */ -__SYSCALL(44, sys_ni_syscall) -#define __NR_brk 45 -__SYSCALL(__NR_brk, sys_brk) -#define __NR_setgid 46 -__SYSCALL(__NR_setgid, sys_setgid16) -#define __NR_getgid 47 -__SYSCALL(__NR_getgid, sys_getgid16) - /* 48 was sys_signal */ -__SYSCALL(48, sys_ni_syscall) -#define __NR_geteuid 49 -__SYSCALL(__NR_geteuid, sys_geteuid16) -#define __NR_getegid 50 -__SYSCALL(__NR_getegid, sys_getegid16) -#define __NR_acct 51 -__SYSCALL(__NR_acct, sys_acct) -#define __NR_umount2 52 -__SYSCALL(__NR_umount2, sys_umount) - /* 53 was sys_lock */ -__SYSCALL(53, sys_ni_syscall) -#define __NR_ioctl 54 -__SYSCALL(__NR_ioctl, compat_sys_ioctl) -#define __NR_fcntl 55 -__SYSCALL(__NR_fcntl, compat_sys_fcntl) - /* 56 was sys_mpx */ -__SYSCALL(56, sys_ni_syscall) -#define __NR_setpgid 57 -__SYSCALL(__NR_setpgid, sys_setpgid) - /* 58 was sys_ulimit */ -__SYSCALL(58, sys_ni_syscall) - /* 59 was sys_olduname */ -__SYSCALL(59, sys_ni_syscall) -#define __NR_umask 60 -__SYSCALL(__NR_umask, sys_umask) -#define __NR_chroot 61 -__SYSCALL(__NR_chroot, sys_chroot) -#define __NR_ustat 62 -__SYSCALL(__NR_ustat, compat_sys_ustat) -#define __NR_dup2 63 -__SYSCALL(__NR_dup2, sys_dup2) -#define __NR_getppid 64 -__SYSCALL(__NR_getppid, sys_getppid) -#define __NR_getpgrp 65 -__SYSCALL(__NR_getpgrp, sys_getpgrp) -#define __NR_setsid 66 -__SYSCALL(__NR_setsid, sys_setsid) -#define __NR_sigaction 67 -__SYSCALL(__NR_sigaction, compat_sys_sigaction) - /* 68 was sys_sgetmask */ -__SYSCALL(68, sys_ni_syscall) - /* 69 was sys_ssetmask */ -__SYSCALL(69, sys_ni_syscall) -#define __NR_setreuid 70 -__SYSCALL(__NR_setreuid, sys_setreuid16) -#define __NR_setregid 71 -__SYSCALL(__NR_setregid, sys_setregid16) -#define __NR_sigsuspend 72 -__SYSCALL(__NR_sigsuspend, sys_sigsuspend) -#define __NR_sigpending 73 -__SYSCALL(__NR_sigpending, compat_sys_sigpending) -#define __NR_sethostname 74 -__SYSCALL(__NR_sethostname, sys_sethostname) -#define __NR_setrlimit 75 -__SYSCALL(__NR_setrlimit, compat_sys_setrlimit) - /* 76 was compat_sys_getrlimit */ -__SYSCALL(76, sys_ni_syscall) -#define __NR_getrusage 77 -__SYSCALL(__NR_getrusage, compat_sys_getrusage) -#define __NR_gettimeofday 78 -__SYSCALL(__NR_gettimeofday, compat_sys_gettimeofday) -#define __NR_settimeofday 79 -__SYSCALL(__NR_settimeofday, compat_sys_settimeofday) -#define __NR_getgroups 80 -__SYSCALL(__NR_getgroups, sys_getgroups16) -#define __NR_setgroups 81 -__SYSCALL(__NR_setgroups, sys_setgroups16) - /* 82 was compat_sys_select */ -__SYSCALL(82, sys_ni_syscall) -#define __NR_symlink 83 -__SYSCALL(__NR_symlink, sys_symlink) - /* 84 was sys_lstat */ -__SYSCALL(84, sys_ni_syscall) -#define __NR_readlink 85 -__SYSCALL(__NR_readlink, sys_readlink) -#define __NR_uselib 86 -__SYSCALL(__NR_uselib, sys_uselib) -#define __NR_swapon 87 -__SYSCALL(__NR_swapon, sys_swapon) -#define __NR_reboot 88 -__SYSCALL(__NR_reboot, sys_reboot) - /* 89 was sys_readdir */ -__SYSCALL(89, sys_ni_syscall) - /* 90 was sys_mmap */ -__SYSCALL(90, sys_ni_syscall) -#define __NR_munmap 91 -__SYSCALL(__NR_munmap, sys_munmap) -#define __NR_truncate 92 -__SYSCALL(__NR_truncate, compat_sys_truncate) -#define __NR_ftruncate 93 -__SYSCALL(__NR_ftruncate, compat_sys_ftruncate) -#define __NR_fchmod 94 -__SYSCALL(__NR_fchmod, sys_fchmod) -#define __NR_fchown 95 -__SYSCALL(__NR_fchown, sys_fchown16) -#define __NR_getpriority 96 -__SYSCALL(__NR_getpriority, sys_getpriority) -#define __NR_setpriority 97 -__SYSCALL(__NR_setpriority, sys_setpriority) - /* 98 was sys_profil */ -__SYSCALL(98, sys_ni_syscall) -#define __NR_statfs 99 -__SYSCALL(__NR_statfs, compat_sys_statfs) -#define __NR_fstatfs 100 -__SYSCALL(__NR_fstatfs, compat_sys_fstatfs) - /* 101 was sys_ioperm */ -__SYSCALL(101, sys_ni_syscall) - /* 102 was sys_socketcall */ -__SYSCALL(102, sys_ni_syscall) -#define __NR_syslog 103 -__SYSCALL(__NR_syslog, sys_syslog) -#define __NR_setitimer 104 -__SYSCALL(__NR_setitimer, compat_sys_setitimer) -#define __NR_getitimer 105 -__SYSCALL(__NR_getitimer, compat_sys_getitimer) -#define __NR_stat 106 -__SYSCALL(__NR_stat, compat_sys_newstat) -#define __NR_lstat 107 -__SYSCALL(__NR_lstat, compat_sys_newlstat) -#define __NR_fstat 108 -__SYSCALL(__NR_fstat, compat_sys_newfstat) - /* 109 was sys_uname */ -__SYSCALL(109, sys_ni_syscall) - /* 110 was sys_iopl */ -__SYSCALL(110, sys_ni_syscall) -#define __NR_vhangup 111 -__SYSCALL(__NR_vhangup, sys_vhangup) - /* 112 was sys_idle */ -__SYSCALL(112, sys_ni_syscall) - /* 113 was sys_syscall */ -__SYSCALL(113, sys_ni_syscall) -#define __NR_wait4 114 -__SYSCALL(__NR_wait4, compat_sys_wait4) -#define __NR_swapoff 115 -__SYSCALL(__NR_swapoff, sys_swapoff) -#define __NR_sysinfo 116 -__SYSCALL(__NR_sysinfo, compat_sys_sysinfo) - /* 117 was sys_ipc */ -__SYSCALL(117, sys_ni_syscall) -#define __NR_fsync 118 -__SYSCALL(__NR_fsync, sys_fsync) -#define __NR_sigreturn 119 -__SYSCALL(__NR_sigreturn, compat_sys_sigreturn) -#define __NR_clone 120 -__SYSCALL(__NR_clone, sys_clone) -#define __NR_setdomainname 121 -__SYSCALL(__NR_setdomainname, sys_setdomainname) -#define __NR_uname 122 -__SYSCALL(__NR_uname, sys_newuname) - /* 123 was sys_modify_ldt */ -__SYSCALL(123, sys_ni_syscall) -#define __NR_adjtimex 124 -__SYSCALL(__NR_adjtimex, sys_adjtimex_time32) -#define __NR_mprotect 125 -__SYSCALL(__NR_mprotect, sys_mprotect) -#define __NR_sigprocmask 126 -__SYSCALL(__NR_sigprocmask, compat_sys_sigprocmask) - /* 127 was sys_create_module */ -__SYSCALL(127, sys_ni_syscall) -#define __NR_init_module 128 -__SYSCALL(__NR_init_module, sys_init_module) -#define __NR_delete_module 129 -__SYSCALL(__NR_delete_module, sys_delete_module) - /* 130 was sys_get_kernel_syms */ -__SYSCALL(130, sys_ni_syscall) -#define __NR_quotactl 131 -__SYSCALL(__NR_quotactl, sys_quotactl) -#define __NR_getpgid 132 -__SYSCALL(__NR_getpgid, sys_getpgid) -#define __NR_fchdir 133 -__SYSCALL(__NR_fchdir, sys_fchdir) -#define __NR_bdflush 134 -__SYSCALL(__NR_bdflush, sys_ni_syscall) -#define __NR_sysfs 135 -__SYSCALL(__NR_sysfs, sys_sysfs) -#define __NR_personality 136 -__SYSCALL(__NR_personality, sys_personality) - /* 137 was sys_afs_syscall */ -__SYSCALL(137, sys_ni_syscall) -#define __NR_setfsuid 138 -__SYSCALL(__NR_setfsuid, sys_setfsuid16) -#define __NR_setfsgid 139 -__SYSCALL(__NR_setfsgid, sys_setfsgid16) -#define __NR__llseek 140 -__SYSCALL(__NR__llseek, sys_llseek) -#define __NR_getdents 141 -__SYSCALL(__NR_getdents, compat_sys_getdents) -#define __NR__newselect 142 -__SYSCALL(__NR__newselect, compat_sys_select) -#define __NR_flock 143 -__SYSCALL(__NR_flock, sys_flock) -#define __NR_msync 144 -__SYSCALL(__NR_msync, sys_msync) -#define __NR_readv 145 -__SYSCALL(__NR_readv, sys_readv) -#define __NR_writev 146 -__SYSCALL(__NR_writev, sys_writev) -#define __NR_getsid 147 -__SYSCALL(__NR_getsid, sys_getsid) -#define __NR_fdatasync 148 -__SYSCALL(__NR_fdatasync, sys_fdatasync) - /* 149 was sys_sysctl */ -__SYSCALL(149, sys_ni_syscall) -#define __NR_mlock 150 -__SYSCALL(__NR_mlock, sys_mlock) -#define __NR_munlock 151 -__SYSCALL(__NR_munlock, sys_munlock) -#define __NR_mlockall 152 -__SYSCALL(__NR_mlockall, sys_mlockall) -#define __NR_munlockall 153 -__SYSCALL(__NR_munlockall, sys_munlockall) -#define __NR_sched_setparam 154 -__SYSCALL(__NR_sched_setparam, sys_sched_setparam) -#define __NR_sched_getparam 155 -__SYSCALL(__NR_sched_getparam, sys_sched_getparam) -#define __NR_sched_setscheduler 156 -__SYSCALL(__NR_sched_setscheduler, sys_sched_setscheduler) -#define __NR_sched_getscheduler 157 -__SYSCALL(__NR_sched_getscheduler, sys_sched_getscheduler) -#define __NR_sched_yield 158 -__SYSCALL(__NR_sched_yield, sys_sched_yield) -#define __NR_sched_get_priority_max 159 -__SYSCALL(__NR_sched_get_priority_max, sys_sched_get_priority_max) -#define __NR_sched_get_priority_min 160 -__SYSCALL(__NR_sched_get_priority_min, sys_sched_get_priority_min) -#define __NR_sched_rr_get_interval 161 -__SYSCALL(__NR_sched_rr_get_interval, sys_sched_rr_get_interval_time32) -#define __NR_nanosleep 162 -__SYSCALL(__NR_nanosleep, sys_nanosleep_time32) -#define __NR_mremap 163 -__SYSCALL(__NR_mremap, sys_mremap) -#define __NR_setresuid 164 -__SYSCALL(__NR_setresuid, sys_setresuid16) -#define __NR_getresuid 165 -__SYSCALL(__NR_getresuid, sys_getresuid16) - /* 166 was sys_vm86 */ -__SYSCALL(166, sys_ni_syscall) - /* 167 was sys_query_module */ -__SYSCALL(167, sys_ni_syscall) -#define __NR_poll 168 -__SYSCALL(__NR_poll, sys_poll) -#define __NR_nfsservctl 169 -__SYSCALL(__NR_nfsservctl, sys_ni_syscall) -#define __NR_setresgid 170 -__SYSCALL(__NR_setresgid, sys_setresgid16) -#define __NR_getresgid 171 -__SYSCALL(__NR_getresgid, sys_getresgid16) -#define __NR_prctl 172 -__SYSCALL(__NR_prctl, sys_prctl) -#define __NR_rt_sigreturn 173 -__SYSCALL(__NR_rt_sigreturn, compat_sys_rt_sigreturn) -#define __NR_rt_sigaction 174 -__SYSCALL(__NR_rt_sigaction, compat_sys_rt_sigaction) -#define __NR_rt_sigprocmask 175 -__SYSCALL(__NR_rt_sigprocmask, compat_sys_rt_sigprocmask) -#define __NR_rt_sigpending 176 -__SYSCALL(__NR_rt_sigpending, compat_sys_rt_sigpending) -#define __NR_rt_sigtimedwait 177 -__SYSCALL(__NR_rt_sigtimedwait, compat_sys_rt_sigtimedwait_time32) -#define __NR_rt_sigqueueinfo 178 -__SYSCALL(__NR_rt_sigqueueinfo, compat_sys_rt_sigqueueinfo) -#define __NR_rt_sigsuspend 179 -__SYSCALL(__NR_rt_sigsuspend, compat_sys_rt_sigsuspend) -#define __NR_pread64 180 -__SYSCALL(__NR_pread64, compat_sys_aarch32_pread64) -#define __NR_pwrite64 181 -__SYSCALL(__NR_pwrite64, compat_sys_aarch32_pwrite64) -#define __NR_chown 182 -__SYSCALL(__NR_chown, sys_chown16) -#define __NR_getcwd 183 -__SYSCALL(__NR_getcwd, sys_getcwd) -#define __NR_capget 184 -__SYSCALL(__NR_capget, sys_capget) -#define __NR_capset 185 -__SYSCALL(__NR_capset, sys_capset) -#define __NR_sigaltstack 186 -__SYSCALL(__NR_sigaltstack, compat_sys_sigaltstack) -#define __NR_sendfile 187 -__SYSCALL(__NR_sendfile, compat_sys_sendfile) - /* 188 reserved */ -__SYSCALL(188, sys_ni_syscall) - /* 189 reserved */ -__SYSCALL(189, sys_ni_syscall) -#define __NR_vfork 190 -__SYSCALL(__NR_vfork, sys_vfork) -#define __NR_ugetrlimit 191 /* SuS compliant getrlimit */ -__SYSCALL(__NR_ugetrlimit, compat_sys_getrlimit) /* SuS compliant getrlimit */ -#define __NR_mmap2 192 -__SYSCALL(__NR_mmap2, compat_sys_aarch32_mmap2) -#define __NR_truncate64 193 -__SYSCALL(__NR_truncate64, compat_sys_aarch32_truncate64) -#define __NR_ftruncate64 194 -__SYSCALL(__NR_ftruncate64, compat_sys_aarch32_ftruncate64) -#define __NR_stat64 195 -__SYSCALL(__NR_stat64, sys_stat64) -#define __NR_lstat64 196 -__SYSCALL(__NR_lstat64, sys_lstat64) -#define __NR_fstat64 197 -__SYSCALL(__NR_fstat64, sys_fstat64) -#define __NR_lchown32 198 -__SYSCALL(__NR_lchown32, sys_lchown) -#define __NR_getuid32 199 -__SYSCALL(__NR_getuid32, sys_getuid) -#define __NR_getgid32 200 -__SYSCALL(__NR_getgid32, sys_getgid) -#define __NR_geteuid32 201 -__SYSCALL(__NR_geteuid32, sys_geteuid) -#define __NR_getegid32 202 -__SYSCALL(__NR_getegid32, sys_getegid) -#define __NR_setreuid32 203 -__SYSCALL(__NR_setreuid32, sys_setreuid) -#define __NR_setregid32 204 -__SYSCALL(__NR_setregid32, sys_setregid) -#define __NR_getgroups32 205 -__SYSCALL(__NR_getgroups32, sys_getgroups) -#define __NR_setgroups32 206 -__SYSCALL(__NR_setgroups32, sys_setgroups) -#define __NR_fchown32 207 -__SYSCALL(__NR_fchown32, sys_fchown) -#define __NR_setresuid32 208 -__SYSCALL(__NR_setresuid32, sys_setresuid) -#define __NR_getresuid32 209 -__SYSCALL(__NR_getresuid32, sys_getresuid) -#define __NR_setresgid32 210 -__SYSCALL(__NR_setresgid32, sys_setresgid) -#define __NR_getresgid32 211 -__SYSCALL(__NR_getresgid32, sys_getresgid) -#define __NR_chown32 212 -__SYSCALL(__NR_chown32, sys_chown) -#define __NR_setuid32 213 -__SYSCALL(__NR_setuid32, sys_setuid) -#define __NR_setgid32 214 -__SYSCALL(__NR_setgid32, sys_setgid) -#define __NR_setfsuid32 215 -__SYSCALL(__NR_setfsuid32, sys_setfsuid) -#define __NR_setfsgid32 216 -__SYSCALL(__NR_setfsgid32, sys_setfsgid) -#define __NR_getdents64 217 -__SYSCALL(__NR_getdents64, sys_getdents64) -#define __NR_pivot_root 218 -__SYSCALL(__NR_pivot_root, sys_pivot_root) -#define __NR_mincore 219 -__SYSCALL(__NR_mincore, sys_mincore) -#define __NR_madvise 220 -__SYSCALL(__NR_madvise, sys_madvise) -#define __NR_fcntl64 221 -__SYSCALL(__NR_fcntl64, compat_sys_fcntl64) - /* 222 for tux */ -__SYSCALL(222, sys_ni_syscall) - /* 223 is unused */ -__SYSCALL(223, sys_ni_syscall) -#define __NR_gettid 224 -__SYSCALL(__NR_gettid, sys_gettid) -#define __NR_readahead 225 -__SYSCALL(__NR_readahead, compat_sys_aarch32_readahead) -#define __NR_setxattr 226 -__SYSCALL(__NR_setxattr, sys_setxattr) -#define __NR_lsetxattr 227 -__SYSCALL(__NR_lsetxattr, sys_lsetxattr) -#define __NR_fsetxattr 228 -__SYSCALL(__NR_fsetxattr, sys_fsetxattr) -#define __NR_getxattr 229 -__SYSCALL(__NR_getxattr, sys_getxattr) -#define __NR_lgetxattr 230 -__SYSCALL(__NR_lgetxattr, sys_lgetxattr) -#define __NR_fgetxattr 231 -__SYSCALL(__NR_fgetxattr, sys_fgetxattr) -#define __NR_listxattr 232 -__SYSCALL(__NR_listxattr, sys_listxattr) -#define __NR_llistxattr 233 -__SYSCALL(__NR_llistxattr, sys_llistxattr) -#define __NR_flistxattr 234 -__SYSCALL(__NR_flistxattr, sys_flistxattr) -#define __NR_removexattr 235 -__SYSCALL(__NR_removexattr, sys_removexattr) -#define __NR_lremovexattr 236 -__SYSCALL(__NR_lremovexattr, sys_lremovexattr) -#define __NR_fremovexattr 237 -__SYSCALL(__NR_fremovexattr, sys_fremovexattr) -#define __NR_tkill 238 -__SYSCALL(__NR_tkill, sys_tkill) -#define __NR_sendfile64 239 -__SYSCALL(__NR_sendfile64, sys_sendfile64) -#define __NR_futex 240 -__SYSCALL(__NR_futex, sys_futex_time32) -#define __NR_sched_setaffinity 241 -__SYSCALL(__NR_sched_setaffinity, compat_sys_sched_setaffinity) -#define __NR_sched_getaffinity 242 -__SYSCALL(__NR_sched_getaffinity, compat_sys_sched_getaffinity) -#define __NR_io_setup 243 -__SYSCALL(__NR_io_setup, compat_sys_io_setup) -#define __NR_io_destroy 244 -__SYSCALL(__NR_io_destroy, sys_io_destroy) -#define __NR_io_getevents 245 -__SYSCALL(__NR_io_getevents, sys_io_getevents_time32) -#define __NR_io_submit 246 -__SYSCALL(__NR_io_submit, compat_sys_io_submit) -#define __NR_io_cancel 247 -__SYSCALL(__NR_io_cancel, sys_io_cancel) -#define __NR_exit_group 248 -__SYSCALL(__NR_exit_group, sys_exit_group) -#define __NR_lookup_dcookie 249 -__SYSCALL(__NR_lookup_dcookie, compat_sys_lookup_dcookie) -#define __NR_epoll_create 250 -__SYSCALL(__NR_epoll_create, sys_epoll_create) -#define __NR_epoll_ctl 251 -__SYSCALL(__NR_epoll_ctl, sys_epoll_ctl) -#define __NR_epoll_wait 252 -__SYSCALL(__NR_epoll_wait, sys_epoll_wait) -#define __NR_remap_file_pages 253 -__SYSCALL(__NR_remap_file_pages, sys_remap_file_pages) - /* 254 for set_thread_area */ -__SYSCALL(254, sys_ni_syscall) - /* 255 for get_thread_area */ -__SYSCALL(255, sys_ni_syscall) -#define __NR_set_tid_address 256 -__SYSCALL(__NR_set_tid_address, sys_set_tid_address) -#define __NR_timer_create 257 -__SYSCALL(__NR_timer_create, compat_sys_timer_create) -#define __NR_timer_settime 258 -__SYSCALL(__NR_timer_settime, sys_timer_settime32) -#define __NR_timer_gettime 259 -__SYSCALL(__NR_timer_gettime, sys_timer_gettime32) -#define __NR_timer_getoverrun 260 -__SYSCALL(__NR_timer_getoverrun, sys_timer_getoverrun) -#define __NR_timer_delete 261 -__SYSCALL(__NR_timer_delete, sys_timer_delete) -#define __NR_clock_settime 262 -__SYSCALL(__NR_clock_settime, sys_clock_settime32) -#define __NR_clock_gettime 263 -__SYSCALL(__NR_clock_gettime, sys_clock_gettime32) -#define __NR_clock_getres 264 -__SYSCALL(__NR_clock_getres, sys_clock_getres_time32) -#define __NR_clock_nanosleep 265 -__SYSCALL(__NR_clock_nanosleep, sys_clock_nanosleep_time32) -#define __NR_statfs64 266 -__SYSCALL(__NR_statfs64, compat_sys_aarch32_statfs64) -#define __NR_fstatfs64 267 -__SYSCALL(__NR_fstatfs64, compat_sys_aarch32_fstatfs64) -#define __NR_tgkill 268 -__SYSCALL(__NR_tgkill, sys_tgkill) -#define __NR_utimes 269 -__SYSCALL(__NR_utimes, sys_utimes_time32) -#define __NR_arm_fadvise64_64 270 -__SYSCALL(__NR_arm_fadvise64_64, compat_sys_aarch32_fadvise64_64) -#define __NR_pciconfig_iobase 271 -__SYSCALL(__NR_pciconfig_iobase, sys_pciconfig_iobase) -#define __NR_pciconfig_read 272 -__SYSCALL(__NR_pciconfig_read, sys_pciconfig_read) -#define __NR_pciconfig_write 273 -__SYSCALL(__NR_pciconfig_write, sys_pciconfig_write) -#define __NR_mq_open 274 -__SYSCALL(__NR_mq_open, compat_sys_mq_open) -#define __NR_mq_unlink 275 -__SYSCALL(__NR_mq_unlink, sys_mq_unlink) -#define __NR_mq_timedsend 276 -__SYSCALL(__NR_mq_timedsend, sys_mq_timedsend_time32) -#define __NR_mq_timedreceive 277 -__SYSCALL(__NR_mq_timedreceive, sys_mq_timedreceive_time32) -#define __NR_mq_notify 278 -__SYSCALL(__NR_mq_notify, compat_sys_mq_notify) -#define __NR_mq_getsetattr 279 -__SYSCALL(__NR_mq_getsetattr, compat_sys_mq_getsetattr) -#define __NR_waitid 280 -__SYSCALL(__NR_waitid, compat_sys_waitid) -#define __NR_socket 281 -__SYSCALL(__NR_socket, sys_socket) -#define __NR_bind 282 -__SYSCALL(__NR_bind, sys_bind) -#define __NR_connect 283 -__SYSCALL(__NR_connect, sys_connect) -#define __NR_listen 284 -__SYSCALL(__NR_listen, sys_listen) -#define __NR_accept 285 -__SYSCALL(__NR_accept, sys_accept) -#define __NR_getsockname 286 -__SYSCALL(__NR_getsockname, sys_getsockname) -#define __NR_getpeername 287 -__SYSCALL(__NR_getpeername, sys_getpeername) -#define __NR_socketpair 288 -__SYSCALL(__NR_socketpair, sys_socketpair) -#define __NR_send 289 -__SYSCALL(__NR_send, sys_send) -#define __NR_sendto 290 -__SYSCALL(__NR_sendto, sys_sendto) -#define __NR_recv 291 -__SYSCALL(__NR_recv, compat_sys_recv) -#define __NR_recvfrom 292 -__SYSCALL(__NR_recvfrom, compat_sys_recvfrom) -#define __NR_shutdown 293 -__SYSCALL(__NR_shutdown, sys_shutdown) -#define __NR_setsockopt 294 -__SYSCALL(__NR_setsockopt, sys_setsockopt) -#define __NR_getsockopt 295 -__SYSCALL(__NR_getsockopt, sys_getsockopt) -#define __NR_sendmsg 296 -__SYSCALL(__NR_sendmsg, compat_sys_sendmsg) -#define __NR_recvmsg 297 -__SYSCALL(__NR_recvmsg, compat_sys_recvmsg) -#define __NR_semop 298 -__SYSCALL(__NR_semop, sys_semop) -#define __NR_semget 299 -__SYSCALL(__NR_semget, sys_semget) -#define __NR_semctl 300 -__SYSCALL(__NR_semctl, compat_sys_old_semctl) -#define __NR_msgsnd 301 -__SYSCALL(__NR_msgsnd, compat_sys_msgsnd) -#define __NR_msgrcv 302 -__SYSCALL(__NR_msgrcv, compat_sys_msgrcv) -#define __NR_msgget 303 -__SYSCALL(__NR_msgget, sys_msgget) -#define __NR_msgctl 304 -__SYSCALL(__NR_msgctl, compat_sys_old_msgctl) -#define __NR_shmat 305 -__SYSCALL(__NR_shmat, compat_sys_shmat) -#define __NR_shmdt 306 -__SYSCALL(__NR_shmdt, sys_shmdt) -#define __NR_shmget 307 -__SYSCALL(__NR_shmget, sys_shmget) -#define __NR_shmctl 308 -__SYSCALL(__NR_shmctl, compat_sys_old_shmctl) -#define __NR_add_key 309 -__SYSCALL(__NR_add_key, sys_add_key) -#define __NR_request_key 310 -__SYSCALL(__NR_request_key, sys_request_key) -#define __NR_keyctl 311 -__SYSCALL(__NR_keyctl, compat_sys_keyctl) -#define __NR_semtimedop 312 -__SYSCALL(__NR_semtimedop, sys_semtimedop_time32) -#define __NR_vserver 313 -__SYSCALL(__NR_vserver, sys_ni_syscall) -#define __NR_ioprio_set 314 -__SYSCALL(__NR_ioprio_set, sys_ioprio_set) -#define __NR_ioprio_get 315 -__SYSCALL(__NR_ioprio_get, sys_ioprio_get) -#define __NR_inotify_init 316 -__SYSCALL(__NR_inotify_init, sys_inotify_init) -#define __NR_inotify_add_watch 317 -__SYSCALL(__NR_inotify_add_watch, sys_inotify_add_watch) -#define __NR_inotify_rm_watch 318 -__SYSCALL(__NR_inotify_rm_watch, sys_inotify_rm_watch) -#define __NR_mbind 319 -__SYSCALL(__NR_mbind, sys_mbind) -#define __NR_get_mempolicy 320 -__SYSCALL(__NR_get_mempolicy, sys_get_mempolicy) -#define __NR_set_mempolicy 321 -__SYSCALL(__NR_set_mempolicy, sys_set_mempolicy) -#define __NR_openat 322 -__SYSCALL(__NR_openat, compat_sys_openat) -#define __NR_mkdirat 323 -__SYSCALL(__NR_mkdirat, sys_mkdirat) -#define __NR_mknodat 324 -__SYSCALL(__NR_mknodat, sys_mknodat) -#define __NR_fchownat 325 -__SYSCALL(__NR_fchownat, sys_fchownat) -#define __NR_futimesat 326 -__SYSCALL(__NR_futimesat, sys_futimesat_time32) -#define __NR_fstatat64 327 -__SYSCALL(__NR_fstatat64, sys_fstatat64) -#define __NR_unlinkat 328 -__SYSCALL(__NR_unlinkat, sys_unlinkat) -#define __NR_renameat 329 -__SYSCALL(__NR_renameat, sys_renameat) -#define __NR_linkat 330 -__SYSCALL(__NR_linkat, sys_linkat) -#define __NR_symlinkat 331 -__SYSCALL(__NR_symlinkat, sys_symlinkat) -#define __NR_readlinkat 332 -__SYSCALL(__NR_readlinkat, sys_readlinkat) -#define __NR_fchmodat 333 -__SYSCALL(__NR_fchmodat, sys_fchmodat) -#define __NR_faccessat 334 -__SYSCALL(__NR_faccessat, sys_faccessat) -#define __NR_pselect6 335 -__SYSCALL(__NR_pselect6, compat_sys_pselect6_time32) -#define __NR_ppoll 336 -__SYSCALL(__NR_ppoll, compat_sys_ppoll_time32) -#define __NR_unshare 337 -__SYSCALL(__NR_unshare, sys_unshare) -#define __NR_set_robust_list 338 -__SYSCALL(__NR_set_robust_list, compat_sys_set_robust_list) -#define __NR_get_robust_list 339 -__SYSCALL(__NR_get_robust_list, compat_sys_get_robust_list) -#define __NR_splice 340 -__SYSCALL(__NR_splice, sys_splice) -#define __NR_sync_file_range2 341 -__SYSCALL(__NR_sync_file_range2, compat_sys_aarch32_sync_file_range2) -#define __NR_tee 342 -__SYSCALL(__NR_tee, sys_tee) -#define __NR_vmsplice 343 -__SYSCALL(__NR_vmsplice, sys_vmsplice) -#define __NR_move_pages 344 -__SYSCALL(__NR_move_pages, sys_move_pages) -#define __NR_getcpu 345 -__SYSCALL(__NR_getcpu, sys_getcpu) -#define __NR_epoll_pwait 346 -__SYSCALL(__NR_epoll_pwait, compat_sys_epoll_pwait) -#define __NR_kexec_load 347 -__SYSCALL(__NR_kexec_load, compat_sys_kexec_load) -#define __NR_utimensat 348 -__SYSCALL(__NR_utimensat, sys_utimensat_time32) -#define __NR_signalfd 349 -__SYSCALL(__NR_signalfd, compat_sys_signalfd) -#define __NR_timerfd_create 350 -__SYSCALL(__NR_timerfd_create, sys_timerfd_create) -#define __NR_eventfd 351 -__SYSCALL(__NR_eventfd, sys_eventfd) -#define __NR_fallocate 352 -__SYSCALL(__NR_fallocate, compat_sys_aarch32_fallocate) -#define __NR_timerfd_settime 353 -__SYSCALL(__NR_timerfd_settime, sys_timerfd_settime32) -#define __NR_timerfd_gettime 354 -__SYSCALL(__NR_timerfd_gettime, sys_timerfd_gettime32) -#define __NR_signalfd4 355 -__SYSCALL(__NR_signalfd4, compat_sys_signalfd4) -#define __NR_eventfd2 356 -__SYSCALL(__NR_eventfd2, sys_eventfd2) -#define __NR_epoll_create1 357 -__SYSCALL(__NR_epoll_create1, sys_epoll_create1) -#define __NR_dup3 358 -__SYSCALL(__NR_dup3, sys_dup3) -#define __NR_pipe2 359 -__SYSCALL(__NR_pipe2, sys_pipe2) -#define __NR_inotify_init1 360 -__SYSCALL(__NR_inotify_init1, sys_inotify_init1) -#define __NR_preadv 361 -__SYSCALL(__NR_preadv, compat_sys_preadv) -#define __NR_pwritev 362 -__SYSCALL(__NR_pwritev, compat_sys_pwritev) -#define __NR_rt_tgsigqueueinfo 363 -__SYSCALL(__NR_rt_tgsigqueueinfo, compat_sys_rt_tgsigqueueinfo) -#define __NR_perf_event_open 364 -__SYSCALL(__NR_perf_event_open, sys_perf_event_open) -#define __NR_recvmmsg 365 -__SYSCALL(__NR_recvmmsg, compat_sys_recvmmsg_time32) -#define __NR_accept4 366 -__SYSCALL(__NR_accept4, sys_accept4) -#define __NR_fanotify_init 367 -__SYSCALL(__NR_fanotify_init, sys_fanotify_init) -#define __NR_fanotify_mark 368 -__SYSCALL(__NR_fanotify_mark, compat_sys_fanotify_mark) -#define __NR_prlimit64 369 -__SYSCALL(__NR_prlimit64, sys_prlimit64) -#define __NR_name_to_handle_at 370 -__SYSCALL(__NR_name_to_handle_at, sys_name_to_handle_at) -#define __NR_open_by_handle_at 371 -__SYSCALL(__NR_open_by_handle_at, compat_sys_open_by_handle_at) -#define __NR_clock_adjtime 372 -__SYSCALL(__NR_clock_adjtime, sys_clock_adjtime32) -#define __NR_syncfs 373 -__SYSCALL(__NR_syncfs, sys_syncfs) -#define __NR_sendmmsg 374 -__SYSCALL(__NR_sendmmsg, compat_sys_sendmmsg) -#define __NR_setns 375 -__SYSCALL(__NR_setns, sys_setns) -#define __NR_process_vm_readv 376 -__SYSCALL(__NR_process_vm_readv, sys_process_vm_readv) -#define __NR_process_vm_writev 377 -__SYSCALL(__NR_process_vm_writev, sys_process_vm_writev) -#define __NR_kcmp 378 -__SYSCALL(__NR_kcmp, sys_kcmp) -#define __NR_finit_module 379 -__SYSCALL(__NR_finit_module, sys_finit_module) -#define __NR_sched_setattr 380 -__SYSCALL(__NR_sched_setattr, sys_sched_setattr) -#define __NR_sched_getattr 381 -__SYSCALL(__NR_sched_getattr, sys_sched_getattr) -#define __NR_renameat2 382 -__SYSCALL(__NR_renameat2, sys_renameat2) -#define __NR_seccomp 383 -__SYSCALL(__NR_seccomp, sys_seccomp) -#define __NR_getrandom 384 -__SYSCALL(__NR_getrandom, sys_getrandom) -#define __NR_memfd_create 385 -__SYSCALL(__NR_memfd_create, sys_memfd_create) -#define __NR_bpf 386 -__SYSCALL(__NR_bpf, sys_bpf) -#define __NR_execveat 387 -__SYSCALL(__NR_execveat, compat_sys_execveat) -#define __NR_userfaultfd 388 -__SYSCALL(__NR_userfaultfd, sys_userfaultfd) -#define __NR_membarrier 389 -__SYSCALL(__NR_membarrier, sys_membarrier) -#define __NR_mlock2 390 -__SYSCALL(__NR_mlock2, sys_mlock2) -#define __NR_copy_file_range 391 -__SYSCALL(__NR_copy_file_range, sys_copy_file_range) -#define __NR_preadv2 392 -__SYSCALL(__NR_preadv2, compat_sys_preadv2) -#define __NR_pwritev2 393 -__SYSCALL(__NR_pwritev2, compat_sys_pwritev2) -#define __NR_pkey_mprotect 394 -__SYSCALL(__NR_pkey_mprotect, sys_pkey_mprotect) -#define __NR_pkey_alloc 395 -__SYSCALL(__NR_pkey_alloc, sys_pkey_alloc) -#define __NR_pkey_free 396 -__SYSCALL(__NR_pkey_free, sys_pkey_free) -#define __NR_statx 397 -__SYSCALL(__NR_statx, sys_statx) -#define __NR_rseq 398 -__SYSCALL(__NR_rseq, sys_rseq) -#define __NR_io_pgetevents 399 -__SYSCALL(__NR_io_pgetevents, compat_sys_io_pgetevents) -#define __NR_migrate_pages 400 -__SYSCALL(__NR_migrate_pages, sys_migrate_pages) -#define __NR_kexec_file_load 401 -__SYSCALL(__NR_kexec_file_load, sys_kexec_file_load) -/* 402 is unused */ -#define __NR_clock_gettime64 403 -__SYSCALL(__NR_clock_gettime64, sys_clock_gettime) -#define __NR_clock_settime64 404 -__SYSCALL(__NR_clock_settime64, sys_clock_settime) -#define __NR_clock_adjtime64 405 -__SYSCALL(__NR_clock_adjtime64, sys_clock_adjtime) -#define __NR_clock_getres_time64 406 -__SYSCALL(__NR_clock_getres_time64, sys_clock_getres) -#define __NR_clock_nanosleep_time64 407 -__SYSCALL(__NR_clock_nanosleep_time64, sys_clock_nanosleep) -#define __NR_timer_gettime64 408 -__SYSCALL(__NR_timer_gettime64, sys_timer_gettime) -#define __NR_timer_settime64 409 -__SYSCALL(__NR_timer_settime64, sys_timer_settime) -#define __NR_timerfd_gettime64 410 -__SYSCALL(__NR_timerfd_gettime64, sys_timerfd_gettime) -#define __NR_timerfd_settime64 411 -__SYSCALL(__NR_timerfd_settime64, sys_timerfd_settime) -#define __NR_utimensat_time64 412 -__SYSCALL(__NR_utimensat_time64, sys_utimensat) -#define __NR_pselect6_time64 413 -__SYSCALL(__NR_pselect6_time64, compat_sys_pselect6_time64) -#define __NR_ppoll_time64 414 -__SYSCALL(__NR_ppoll_time64, compat_sys_ppoll_time64) -#define __NR_io_pgetevents_time64 416 -__SYSCALL(__NR_io_pgetevents_time64, sys_io_pgetevents) -#define __NR_recvmmsg_time64 417 -__SYSCALL(__NR_recvmmsg_time64, compat_sys_recvmmsg_time64) -#define __NR_mq_timedsend_time64 418 -__SYSCALL(__NR_mq_timedsend_time64, sys_mq_timedsend) -#define __NR_mq_timedreceive_time64 419 -__SYSCALL(__NR_mq_timedreceive_time64, sys_mq_timedreceive) -#define __NR_semtimedop_time64 420 -__SYSCALL(__NR_semtimedop_time64, sys_semtimedop) -#define __NR_rt_sigtimedwait_time64 421 -__SYSCALL(__NR_rt_sigtimedwait_time64, compat_sys_rt_sigtimedwait_time64) -#define __NR_futex_time64 422 -__SYSCALL(__NR_futex_time64, sys_futex) -#define __NR_sched_rr_get_interval_time64 423 -__SYSCALL(__NR_sched_rr_get_interval_time64, sys_sched_rr_get_interval) -#define __NR_pidfd_send_signal 424 -__SYSCALL(__NR_pidfd_send_signal, sys_pidfd_send_signal) -#define __NR_io_uring_setup 425 -__SYSCALL(__NR_io_uring_setup, sys_io_uring_setup) -#define __NR_io_uring_enter 426 -__SYSCALL(__NR_io_uring_enter, sys_io_uring_enter) -#define __NR_io_uring_register 427 -__SYSCALL(__NR_io_uring_register, sys_io_uring_register) -#define __NR_open_tree 428 -__SYSCALL(__NR_open_tree, sys_open_tree) -#define __NR_move_mount 429 -__SYSCALL(__NR_move_mount, sys_move_mount) -#define __NR_fsopen 430 -__SYSCALL(__NR_fsopen, sys_fsopen) -#define __NR_fsconfig 431 -__SYSCALL(__NR_fsconfig, sys_fsconfig) -#define __NR_fsmount 432 -__SYSCALL(__NR_fsmount, sys_fsmount) -#define __NR_fspick 433 -__SYSCALL(__NR_fspick, sys_fspick) -#define __NR_pidfd_open 434 -__SYSCALL(__NR_pidfd_open, sys_pidfd_open) -#define __NR_clone3 435 -__SYSCALL(__NR_clone3, sys_clone3) -#define __NR_close_range 436 -__SYSCALL(__NR_close_range, sys_close_range) -#define __NR_openat2 437 -__SYSCALL(__NR_openat2, sys_openat2) -#define __NR_pidfd_getfd 438 -__SYSCALL(__NR_pidfd_getfd, sys_pidfd_getfd) -#define __NR_faccessat2 439 -__SYSCALL(__NR_faccessat2, sys_faccessat2) -#define __NR_process_madvise 440 -__SYSCALL(__NR_process_madvise, sys_process_madvise) -#define __NR_epoll_pwait2 441 -__SYSCALL(__NR_epoll_pwait2, compat_sys_epoll_pwait2) -#define __NR_mount_setattr 442 -__SYSCALL(__NR_mount_setattr, sys_mount_setattr) -#define __NR_quotactl_fd 443 -__SYSCALL(__NR_quotactl_fd, sys_quotactl_fd) -#define __NR_landlock_create_ruleset 444 -__SYSCALL(__NR_landlock_create_ruleset, sys_landlock_create_ruleset) -#define __NR_landlock_add_rule 445 -__SYSCALL(__NR_landlock_add_rule, sys_landlock_add_rule) -#define __NR_landlock_restrict_self 446 -__SYSCALL(__NR_landlock_restrict_self, sys_landlock_restrict_self) -#define __NR_process_mrelease 448 -__SYSCALL(__NR_process_mrelease, sys_process_mrelease) -#define __NR_futex_waitv 449 -__SYSCALL(__NR_futex_waitv, sys_futex_waitv) -#define __NR_set_mempolicy_home_node 450 -__SYSCALL(__NR_set_mempolicy_home_node, sys_set_mempolicy_home_node) +#define __NR_sync_file_range2 __NR_arm_sync_file_range -/* - * Please add new compat syscalls above this comment and update - * __NR_compat_syscalls in asm/unistd.h. - */ +#endif /* _UAPI__ASM_ARM_UNISTD_H */ diff --git a/arch/arm64/include/asm/uprobes.h b/arch/arm64/include/asm/uprobes.h index 315eef654e39..89bfb0213a50 100644 --- a/arch/arm64/include/asm/uprobes.h +++ b/arch/arm64/include/asm/uprobes.h @@ -10,24 +10,33 @@ #include <asm/insn.h> #include <asm/probes.h> -#define MAX_UINSN_BYTES AARCH64_INSN_SIZE - -#define UPROBE_SWBP_INSN BRK64_OPCODE_UPROBES +#define UPROBE_SWBP_INSN cpu_to_le32(BRK64_OPCODE_UPROBES) #define UPROBE_SWBP_INSN_SIZE AARCH64_INSN_SIZE -#define UPROBE_XOL_SLOT_BYTES MAX_UINSN_BYTES +#define UPROBE_XOL_SLOT_BYTES AARCH64_INSN_SIZE -typedef u32 uprobe_opcode_t; +typedef __le32 uprobe_opcode_t; struct arch_uprobe_task { }; struct arch_uprobe { union { - u8 insn[MAX_UINSN_BYTES]; - u8 ixol[MAX_UINSN_BYTES]; + __le32 insn; + __le32 ixol; }; struct arch_probe_insn api; bool simulate; }; +int uprobe_brk_handler(struct pt_regs *regs, unsigned long esr); +#ifdef CONFIG_UPROBES +int uprobe_single_step_handler(struct pt_regs *regs, unsigned long esr); +#else +static inline int uprobe_single_step_handler(struct pt_regs *regs, + unsigned long esr) +{ + return DBG_HOOK_ERROR; +} +#endif + #endif diff --git a/arch/arm64/include/asm/vdso.h b/arch/arm64/include/asm/vdso.h index f99dcb94b438..61679070f595 100644 --- a/arch/arm64/include/asm/vdso.h +++ b/arch/arm64/include/asm/vdso.h @@ -5,27 +5,20 @@ #ifndef __ASM_VDSO_H #define __ASM_VDSO_H -/* - * Default link address for the vDSO. - * Since we randomise the VDSO mapping, there's little point in trying - * to prelink this. - */ -#define VDSO_LBASE 0x0 - -#define __VVAR_PAGES 2 +#define __VDSO_PAGES 4 #ifndef __ASSEMBLY__ #include <generated/vdso-offsets.h> -#ifdef CONFIG_COMPAT_VDSO -#include <generated/vdso32-offsets.h> -#endif #define VDSO_SYMBOL(base, name) \ ({ \ - (void *)(vdso_offset_##name - VDSO_LBASE + (unsigned long)(base)); \ + (void *)(vdso_offset_##name + (unsigned long)(base)); \ }) +extern char vdso_start[], vdso_end[]; +extern char vdso32_start[], vdso32_end[]; + #endif /* !__ASSEMBLY__ */ #endif /* __ASM_VDSO_H */ diff --git a/arch/arm64/include/asm/vdso/compat_gettimeofday.h b/arch/arm64/include/asm/vdso/compat_gettimeofday.h index ecb6fd4c3c64..d60ea7a72a9c 100644 --- a/arch/arm64/include/asm/vdso/compat_gettimeofday.h +++ b/arch/arm64/include/asm/vdso/compat_gettimeofday.h @@ -8,7 +8,7 @@ #ifndef __ASSEMBLY__ #include <asm/barrier.h> -#include <asm/unistd.h> +#include <asm/unistd_compat_32.h> #include <asm/errno.h> #include <asm/vdso/compat_barrier.h> @@ -24,7 +24,7 @@ int gettimeofday_fallback(struct __kernel_old_timeval *_tv, register struct timezone *tz asm("r1") = _tz; register struct __kernel_old_timeval *tv asm("r0") = _tv; register long ret asm ("r0"); - register long nr asm("r7") = __NR_compat_gettimeofday; + register long nr asm("r7") = __NR_compat32_gettimeofday; asm volatile( " swi #0\n" @@ -41,7 +41,7 @@ long clock_gettime_fallback(clockid_t _clkid, struct __kernel_timespec *_ts) register struct __kernel_timespec *ts asm("r1") = _ts; register clockid_t clkid asm("r0") = _clkid; register long ret asm ("r0"); - register long nr asm("r7") = __NR_compat_clock_gettime64; + register long nr asm("r7") = __NR_compat32_clock_gettime64; asm volatile( " swi #0\n" @@ -58,7 +58,7 @@ long clock_gettime32_fallback(clockid_t _clkid, struct old_timespec32 *_ts) register struct old_timespec32 *ts asm("r1") = _ts; register clockid_t clkid asm("r0") = _clkid; register long ret asm ("r0"); - register long nr asm("r7") = __NR_compat_clock_gettime; + register long nr asm("r7") = __NR_compat32_clock_gettime; asm volatile( " swi #0\n" @@ -75,7 +75,7 @@ int clock_getres_fallback(clockid_t _clkid, struct __kernel_timespec *_ts) register struct __kernel_timespec *ts asm("r1") = _ts; register clockid_t clkid asm("r0") = _clkid; register long ret asm ("r0"); - register long nr asm("r7") = __NR_compat_clock_getres_time64; + register long nr asm("r7") = __NR_compat32_clock_getres_time64; asm volatile( " swi #0\n" @@ -92,7 +92,7 @@ int clock_getres32_fallback(clockid_t _clkid, struct old_timespec32 *_ts) register struct old_timespec32 *ts asm("r1") = _ts; register clockid_t clkid asm("r0") = _clkid; register long ret asm ("r0"); - register long nr asm("r7") = __NR_compat_clock_getres; + register long nr asm("r7") = __NR_compat32_clock_getres; asm volatile( " swi #0\n" @@ -104,7 +104,7 @@ int clock_getres32_fallback(clockid_t _clkid, struct old_timespec32 *_ts) } static __always_inline u64 __arch_get_hw_counter(s32 clock_mode, - const struct vdso_data *vd) + const struct vdso_time_data *vd) { u64 res; @@ -131,45 +131,33 @@ static __always_inline u64 __arch_get_hw_counter(s32 clock_mode, return res; } -static __always_inline const struct vdso_data *__arch_get_vdso_data(void) +static __always_inline const struct vdso_time_data *__arch_get_vdso_u_time_data(void) { - const struct vdso_data *ret; + const struct vdso_time_data *ret; /* - * This simply puts &_vdso_data into ret. The reason why we don't use - * `ret = _vdso_data` is that the compiler tends to optimise this in a - * very suboptimal way: instead of keeping &_vdso_data in a register, - * it goes through a relocation almost every time _vdso_data must be + * This simply puts &_vdso_time_data into ret. The reason why we don't use + * `ret = _vdso_time_data` is that the compiler tends to optimise this in a + * very suboptimal way: instead of keeping &_vdso_time_data in a register, + * it goes through a relocation almost every time _vdso_time_data must be * accessed (even in subfunctions). This is both time and space * consuming: each relocation uses a word in the code section, and it * has to be loaded at runtime. * * This trick hides the assignment from the compiler. Since it cannot * track where the pointer comes from, it will only use one relocation - * where __arch_get_vdso_data() is called, and then keep the result in - * a register. + * where __aarch64_get_vdso_u_time_data() is called, and then keep the + * result in a register. */ - asm volatile("mov %0, %1" : "=r"(ret) : "r"(_vdso_data)); + asm volatile("mov %0, %1" : "=r"(ret) : "r"(&vdso_u_time_data)); return ret; } +#define __arch_get_vdso_u_time_data __arch_get_vdso_u_time_data -#ifdef CONFIG_TIME_NS -static __always_inline -const struct vdso_data *__arch_get_timens_vdso_data(const struct vdso_data *vd) -{ - const struct vdso_data *ret; - - /* See __arch_get_vdso_data(). */ - asm volatile("mov %0, %1" : "=r"(ret) : "r"(_timens_data)); - - return ret; -} -#endif - -static inline bool vdso_clocksource_ok(const struct vdso_data *vd) +static inline bool vdso_clocksource_ok(const struct vdso_clock *vc) { - return vd->clock_mode == VDSO_CLOCKMODE_ARCHTIMER; + return vc->clock_mode == VDSO_CLOCKMODE_ARCHTIMER; } #define vdso_clocksource_ok vdso_clocksource_ok diff --git a/arch/arm64/include/asm/vdso/getrandom.h b/arch/arm64/include/asm/vdso/getrandom.h new file mode 100644 index 000000000000..a2197da1951b --- /dev/null +++ b/arch/arm64/include/asm/vdso/getrandom.h @@ -0,0 +1,38 @@ +/* SPDX-License-Identifier: GPL-2.0 */ + +#ifndef __ASM_VDSO_GETRANDOM_H +#define __ASM_VDSO_GETRANDOM_H + +#ifndef __ASSEMBLY__ + +#include <asm/unistd.h> +#include <asm/vdso/vsyscall.h> +#include <vdso/datapage.h> + +/** + * getrandom_syscall - Invoke the getrandom() syscall. + * @buffer: Destination buffer to fill with random bytes. + * @len: Size of @buffer in bytes. + * @flags: Zero or more GRND_* flags. + * Returns: The number of random bytes written to @buffer, or a negative value indicating an error. + */ +static __always_inline ssize_t getrandom_syscall(void *_buffer, size_t _len, unsigned int _flags) +{ + register void *buffer asm ("x0") = _buffer; + register size_t len asm ("x1") = _len; + register unsigned int flags asm ("x2") = _flags; + register long ret asm ("x0"); + register long nr asm ("x8") = __NR_getrandom; + + asm volatile( + " svc #0\n" + : "=r" (ret) + : "r" (buffer), "r" (len), "r" (flags), "r" (nr) + : "memory"); + + return ret; +} + +#endif /* !__ASSEMBLY__ */ + +#endif /* __ASM_VDSO_GETRANDOM_H */ diff --git a/arch/arm64/include/asm/vdso/gettimeofday.h b/arch/arm64/include/asm/vdso/gettimeofday.h index 4f7a629df81f..da1ab8759592 100644 --- a/arch/arm64/include/asm/vdso/gettimeofday.h +++ b/arch/arm64/include/asm/vdso/gettimeofday.h @@ -7,8 +7,11 @@ #ifndef __ASSEMBLY__ +#include <asm/alternative.h> +#include <asm/arch_timer.h> #include <asm/barrier.h> #include <asm/unistd.h> +#include <asm/sysreg.h> #define VDSO_HAS_CLOCK_GETRES 1 @@ -65,10 +68,8 @@ int clock_getres_fallback(clockid_t _clkid, struct __kernel_timespec *_ts) } static __always_inline u64 __arch_get_hw_counter(s32 clock_mode, - const struct vdso_data *vd) + const struct vdso_time_data *vd) { - u64 res; - /* * Core checks for mode already, so this raced against a concurrent * update. Return something. Core will do another round and then @@ -77,30 +78,21 @@ static __always_inline u64 __arch_get_hw_counter(s32 clock_mode, if (clock_mode == VDSO_CLOCKMODE_NONE) return 0; - /* - * This isb() is required to prevent that the counter value - * is speculated. - */ - isb(); - asm volatile("mrs %0, cntvct_el0" : "=r" (res) :: "memory"); - arch_counter_enforce_ordering(res); - - return res; + return __arch_counter_get_cntvct(); } -static __always_inline -const struct vdso_data *__arch_get_vdso_data(void) +#if IS_ENABLED(CONFIG_CC_IS_GCC) && IS_ENABLED(CONFIG_PAGE_SIZE_64KB) +static __always_inline const struct vdso_time_data *__arch_get_vdso_u_time_data(void) { - return _vdso_data; -} + const struct vdso_time_data *ret = &vdso_u_time_data; -#ifdef CONFIG_TIME_NS -static __always_inline -const struct vdso_data *__arch_get_timens_vdso_data(const struct vdso_data *vd) -{ - return _timens_data; + /* Work around invalid absolute relocations */ + OPTIMIZER_HIDE_VAR(ret); + + return ret; } -#endif +#define __arch_get_vdso_u_time_data __arch_get_vdso_u_time_data +#endif /* IS_ENABLED(CONFIG_CC_IS_GCC) && IS_ENABLED(CONFIG_PAGE_SIZE_64KB) */ #endif /* !__ASSEMBLY__ */ diff --git a/arch/arm64/include/asm/vdso/vsyscall.h b/arch/arm64/include/asm/vdso/vsyscall.h index f94b1457c117..417aae5763a8 100644 --- a/arch/arm64/include/asm/vdso/vsyscall.h +++ b/arch/arm64/include/asm/vdso/vsyscall.h @@ -4,30 +4,20 @@ #ifndef __ASSEMBLY__ -#include <linux/timekeeper_internal.h> #include <vdso/datapage.h> #define VDSO_PRECISION_MASK ~(0xFF00ULL<<48) -extern struct vdso_data *vdso_data; /* * Update the vDSO data page to keep in sync with kernel timekeeping. */ static __always_inline -struct vdso_data *__arm64_get_k_vdso_data(void) +void __arch_update_vdso_clock(struct vdso_clock *vc) { - return vdso_data; + vc->mask = VDSO_PRECISION_MASK; } -#define __arch_get_k_vdso_data __arm64_get_k_vdso_data - -static __always_inline -void __arm64_update_vsyscall(struct vdso_data *vdata, struct timekeeper *tk) -{ - vdata[CS_HRES_COARSE].mask = VDSO_PRECISION_MASK; - vdata[CS_RAW].mask = VDSO_PRECISION_MASK; -} -#define __arch_update_vsyscall __arm64_update_vsyscall +#define __arch_update_vdso_clock __arch_update_vdso_clock /* The asm-generic header needs to be included after the definitions above */ #include <asm-generic/vdso/vsyscall.h> diff --git a/arch/arm64/include/asm/vectors.h b/arch/arm64/include/asm/vectors.h index bc9a2145f419..b815d8f2c0dc 100644 --- a/arch/arm64/include/asm/vectors.h +++ b/arch/arm64/include/asm/vectors.h @@ -62,7 +62,7 @@ DECLARE_PER_CPU_READ_MOSTLY(const char *, this_cpu_vector); static inline const char * arm64_get_bp_hardening_vector(enum arm64_bp_harden_el1_vectors slot) { - if (arm64_kernel_unmapped_at_el0()) + if (cpus_have_cap(ARM64_UNMAP_KERNEL_AT_EL0)) return (char *)(TRAMP_VALIAS + SZ_2K * slot); WARN_ON_ONCE(slot == EL1_VECTOR_KPTI); diff --git a/arch/arm64/include/asm/virt.h b/arch/arm64/include/asm/virt.h index 4eb601e7de50..aa280f356b96 100644 --- a/arch/arm64/include/asm/virt.h +++ b/arch/arm64/include/asm/virt.h @@ -67,7 +67,8 @@ * __boot_cpu_mode records what mode CPUs were booted in. * A correctly-implemented bootloader must start all CPUs in the same mode: * In this case, both 32bit halves of __boot_cpu_mode will contain the - * same value (either 0 if booted in EL1, BOOT_CPU_MODE_EL2 if booted in EL2). + * same value (either BOOT_CPU_MODE_EL1 if booted in EL1, BOOT_CPU_MODE_EL2 if + * booted in EL2). * * Should the bootloader fail to do this, the two values will be different. * This allows the kernel to flag an error when the secondaries have come up. @@ -78,9 +79,16 @@ extern u32 __boot_cpu_mode[2]; void __hyp_set_vectors(phys_addr_t phys_vector_base); void __hyp_reset_vectors(void); +bool is_kvm_arm_initialised(void); DECLARE_STATIC_KEY_FALSE(kvm_protected_mode_initialized); +static inline bool is_pkvm_initialized(void) +{ + return IS_ENABLED(CONFIG_KVM) && + static_branch_likely(&kvm_protected_mode_initialized); +} + /* Reports the availability of HYP mode */ static inline bool is_hyp_mode_available(void) { @@ -88,8 +96,7 @@ static inline bool is_hyp_mode_available(void) * If KVM protected mode is initialized, all CPUs must have been booted * in EL2. Avoid checking __boot_cpu_mode as CPUs now come up in EL1. */ - if (IS_ENABLED(CONFIG_KVM) && - static_branch_likely(&kvm_protected_mode_initialized)) + if (is_pkvm_initialized()) return true; return (__boot_cpu_mode[0] == BOOT_CPU_MODE_EL2 && @@ -103,15 +110,16 @@ static inline bool is_hyp_mode_mismatched(void) * If KVM protected mode is initialized, all CPUs must have been booted * in EL2. Avoid checking __boot_cpu_mode as CPUs now come up in EL1. */ - if (IS_ENABLED(CONFIG_KVM) && - static_branch_likely(&kvm_protected_mode_initialized)) + if (is_pkvm_initialized()) return false; return __boot_cpu_mode[0] != __boot_cpu_mode[1]; } -static inline bool is_kernel_in_hyp_mode(void) +static __always_inline bool is_kernel_in_hyp_mode(void) { + BUILD_BUG_ON(__is_defined(__KVM_NVHE_HYPERVISOR__) || + __is_defined(__KVM_VHE_HYPERVISOR__)); return read_sysreg(CurrentEL) == CurrentEL_EL2; } @@ -140,6 +148,14 @@ static __always_inline bool is_protected_kvm_enabled(void) return cpus_have_final_cap(ARM64_KVM_PROTECTED_MODE); } +static __always_inline bool has_hvhe(void) +{ + if (is_vhe_hyp_code()) + return false; + + return cpus_have_final_cap(ARM64_KVM_HVHE); +} + static inline bool is_hyp_nvhe(void) { return is_hyp_mode_available() && !is_kernel_in_hyp_mode(); diff --git a/arch/arm64/include/asm/vmalloc.h b/arch/arm64/include/asm/vmalloc.h index 38fafffe699f..12f534e8f3ed 100644 --- a/arch/arm64/include/asm/vmalloc.h +++ b/arch/arm64/include/asm/vmalloc.h @@ -23,6 +23,51 @@ static inline bool arch_vmap_pmd_supported(pgprot_t prot) return !IS_ENABLED(CONFIG_PTDUMP_DEBUGFS); } +#define arch_vmap_pte_range_map_size arch_vmap_pte_range_map_size +static inline unsigned long arch_vmap_pte_range_map_size(unsigned long addr, + unsigned long end, u64 pfn, + unsigned int max_page_shift) +{ + /* + * If the block is at least CONT_PTE_SIZE in size, and is naturally + * aligned in both virtual and physical space, then we can pte-map the + * block using the PTE_CONT bit for more efficient use of the TLB. + */ + if (max_page_shift < CONT_PTE_SHIFT) + return PAGE_SIZE; + + if (end - addr < CONT_PTE_SIZE) + return PAGE_SIZE; + + if (!IS_ALIGNED(addr, CONT_PTE_SIZE)) + return PAGE_SIZE; + + if (!IS_ALIGNED(PFN_PHYS(pfn), CONT_PTE_SIZE)) + return PAGE_SIZE; + + return CONT_PTE_SIZE; +} + +#define arch_vmap_pte_range_unmap_size arch_vmap_pte_range_unmap_size +static inline unsigned long arch_vmap_pte_range_unmap_size(unsigned long addr, + pte_t *ptep) +{ + /* + * The caller handles alignment so it's sufficient just to check + * PTE_CONT. + */ + return pte_valid_cont(__ptep_get(ptep)) ? CONT_PTE_SIZE : PAGE_SIZE; +} + +#define arch_vmap_pte_supported_shift arch_vmap_pte_supported_shift +static inline int arch_vmap_pte_supported_shift(unsigned long size) +{ + if (size >= CONT_PTE_SIZE) + return CONT_PTE_SHIFT; + + return PAGE_SHIFT; +} + #endif #define arch_vmap_pgprot_tagged arch_vmap_pgprot_tagged diff --git a/arch/arm64/include/asm/vncr_mapping.h b/arch/arm64/include/asm/vncr_mapping.h new file mode 100644 index 000000000000..f6ec500ad3fa --- /dev/null +++ b/arch/arm64/include/asm/vncr_mapping.h @@ -0,0 +1,110 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * System register offsets in the VNCR page + * All offsets are *byte* displacements! + */ + +#ifndef __ARM64_VNCR_MAPPING_H__ +#define __ARM64_VNCR_MAPPING_H__ + +#define VNCR_VTTBR_EL2 0x020 +#define VNCR_VTCR_EL2 0x040 +#define VNCR_VMPIDR_EL2 0x050 +#define VNCR_CNTVOFF_EL2 0x060 +#define VNCR_HCR_EL2 0x078 +#define VNCR_HSTR_EL2 0x080 +#define VNCR_VPIDR_EL2 0x088 +#define VNCR_TPIDR_EL2 0x090 +#define VNCR_HCRX_EL2 0x0A0 +#define VNCR_VNCR_EL2 0x0B0 +#define VNCR_CPACR_EL1 0x100 +#define VNCR_CONTEXTIDR_EL1 0x108 +#define VNCR_SCTLR_EL1 0x110 +#define VNCR_ACTLR_EL1 0x118 +#define VNCR_TCR_EL1 0x120 +#define VNCR_AFSR0_EL1 0x128 +#define VNCR_AFSR1_EL1 0x130 +#define VNCR_ESR_EL1 0x138 +#define VNCR_MAIR_EL1 0x140 +#define VNCR_AMAIR_EL1 0x148 +#define VNCR_MDSCR_EL1 0x158 +#define VNCR_SPSR_EL1 0x160 +#define VNCR_CNTV_CVAL_EL0 0x168 +#define VNCR_CNTV_CTL_EL0 0x170 +#define VNCR_CNTP_CVAL_EL0 0x178 +#define VNCR_CNTP_CTL_EL0 0x180 +#define VNCR_SCXTNUM_EL1 0x188 +#define VNCR_TFSR_EL1 0x190 +#define VNCR_HDFGRTR2_EL2 0x1A0 +#define VNCR_HDFGWTR2_EL2 0x1B0 +#define VNCR_HFGRTR_EL2 0x1B8 +#define VNCR_HFGWTR_EL2 0x1C0 +#define VNCR_HFGITR_EL2 0x1C8 +#define VNCR_HDFGRTR_EL2 0x1D0 +#define VNCR_HDFGWTR_EL2 0x1D8 +#define VNCR_ZCR_EL1 0x1E0 +#define VNCR_HAFGRTR_EL2 0x1E8 +#define VNCR_TTBR0_EL1 0x200 +#define VNCR_TTBR1_EL1 0x210 +#define VNCR_FAR_EL1 0x220 +#define VNCR_ELR_EL1 0x230 +#define VNCR_SP_EL1 0x240 +#define VNCR_VBAR_EL1 0x250 +#define VNCR_TCR2_EL1 0x270 +#define VNCR_SCTLR2_EL1 0x278 +#define VNCR_PIRE0_EL1 0x290 +#define VNCR_PIR_EL1 0x2A0 +#define VNCR_POR_EL1 0x2A8 +#define VNCR_HFGRTR2_EL2 0x2C0 +#define VNCR_HFGWTR2_EL2 0x2C8 +#define VNCR_HFGITR2_EL2 0x310 +#define VNCR_ICH_LR0_EL2 0x400 +#define VNCR_ICH_LR1_EL2 0x408 +#define VNCR_ICH_LR2_EL2 0x410 +#define VNCR_ICH_LR3_EL2 0x418 +#define VNCR_ICH_LR4_EL2 0x420 +#define VNCR_ICH_LR5_EL2 0x428 +#define VNCR_ICH_LR6_EL2 0x430 +#define VNCR_ICH_LR7_EL2 0x438 +#define VNCR_ICH_LR8_EL2 0x440 +#define VNCR_ICH_LR9_EL2 0x448 +#define VNCR_ICH_LR10_EL2 0x450 +#define VNCR_ICH_LR11_EL2 0x458 +#define VNCR_ICH_LR12_EL2 0x460 +#define VNCR_ICH_LR13_EL2 0x468 +#define VNCR_ICH_LR14_EL2 0x470 +#define VNCR_ICH_LR15_EL2 0x478 +#define VNCR_ICH_AP0R0_EL2 0x480 +#define VNCR_ICH_AP0R1_EL2 0x488 +#define VNCR_ICH_AP0R2_EL2 0x490 +#define VNCR_ICH_AP0R3_EL2 0x498 +#define VNCR_ICH_AP1R0_EL2 0x4A0 +#define VNCR_ICH_AP1R1_EL2 0x4A8 +#define VNCR_ICH_AP1R2_EL2 0x4B0 +#define VNCR_ICH_AP1R3_EL2 0x4B8 +#define VNCR_ICH_HCR_EL2 0x4C0 +#define VNCR_ICH_VMCR_EL2 0x4C8 +#define VNCR_VDISR_EL2 0x500 +#define VNCR_VSESR_EL2 0x508 +#define VNCR_PMBLIMITR_EL1 0x800 +#define VNCR_PMBPTR_EL1 0x810 +#define VNCR_PMBSR_EL1 0x820 +#define VNCR_PMSCR_EL1 0x828 +#define VNCR_PMSEVFR_EL1 0x830 +#define VNCR_PMSICR_EL1 0x838 +#define VNCR_PMSIRR_EL1 0x840 +#define VNCR_PMSLATFR_EL1 0x848 +#define VNCR_TRFCR_EL1 0x880 +#define VNCR_MPAM1_EL1 0x900 +#define VNCR_MPAMHCR_EL2 0x930 +#define VNCR_MPAMVPMV_EL2 0x938 +#define VNCR_MPAMVPM0_EL2 0x940 +#define VNCR_MPAMVPM1_EL2 0x948 +#define VNCR_MPAMVPM2_EL2 0x950 +#define VNCR_MPAMVPM3_EL2 0x958 +#define VNCR_MPAMVPM4_EL2 0x960 +#define VNCR_MPAMVPM5_EL2 0x968 +#define VNCR_MPAMVPM6_EL2 0x970 +#define VNCR_MPAMVPM7_EL2 0x978 + +#endif /* __ARM64_VNCR_MAPPING_H__ */ diff --git a/arch/arm64/include/asm/word-at-a-time.h b/arch/arm64/include/asm/word-at-a-time.h index 1c8e4f2490bf..824ca6987a51 100644 --- a/arch/arm64/include/asm/word-at-a-time.h +++ b/arch/arm64/include/asm/word-at-a-time.h @@ -9,7 +9,8 @@ #ifndef __AARCH64EB__ -#include <linux/kernel.h> +#include <linux/bitops.h> +#include <linux/wordpart.h> struct word_at_a_time { const unsigned long one_bits, high_bits; @@ -26,20 +27,15 @@ static inline unsigned long has_zero(unsigned long a, unsigned long *bits, } #define prep_zero_mask(a, bits, c) (bits) +#define create_zero_mask(bits) (bits) +#define find_zero(bits) (__ffs(bits) >> 3) -static inline unsigned long create_zero_mask(unsigned long bits) +static inline unsigned long zero_bytemask(unsigned long bits) { bits = (bits - 1) & ~bits; return bits >> 7; } -static inline unsigned long find_zero(unsigned long mask) -{ - return fls64(mask) >> 3; -} - -#define zero_bytemask(mask) (mask) - #else /* __AARCH64EB__ */ #include <asm-generic/word-at-a-time.h> #endif @@ -55,7 +51,7 @@ static inline unsigned long load_unaligned_zeropad(const void *addr) { unsigned long ret; - __uaccess_enable_tco_async(); + __mte_enable_tco_async(); /* Load word from unaligned pointer addr */ asm( @@ -65,7 +61,7 @@ static inline unsigned long load_unaligned_zeropad(const void *addr) : "=&r" (ret) : "r" (addr), "Q" (*(unsigned long *)addr)); - __uaccess_disable_tco_async(); + __mte_disable_tco_async(); return ret; } diff --git a/arch/arm64/include/uapi/asm/Kbuild b/arch/arm64/include/uapi/asm/Kbuild index 602d137932dc..c6d141d7b7d7 100644 --- a/arch/arm64/include/uapi/asm/Kbuild +++ b/arch/arm64/include/uapi/asm/Kbuild @@ -1,3 +1,4 @@ # SPDX-License-Identifier: GPL-2.0 +syscall-y += unistd_64.h generic-y += kvm_para.h diff --git a/arch/arm64/include/uapi/asm/hwcap.h b/arch/arm64/include/uapi/asm/hwcap.h index 1ad2568a2569..72c78468b806 100644 --- a/arch/arm64/include/uapi/asm/hwcap.h +++ b/arch/arm64/include/uapi/asm/hwcap.h @@ -21,7 +21,7 @@ * HWCAP flags - for AT_HWCAP * * Bits 62 and 63 are reserved for use by libc. - * Bits 32-61 are unallocated for potential use by libc. + * Bits 33-61 are unallocated for potential use by libc. */ #define HWCAP_FP (1 << 0) #define HWCAP_ASIMD (1 << 1) @@ -55,6 +55,22 @@ #define HWCAP_SB (1 << 29) #define HWCAP_PACA (1 << 30) #define HWCAP_PACG (1UL << 31) +#define HWCAP_GCS (1UL << 32) +#define HWCAP_CMPBR (1UL << 33) +#define HWCAP_FPRCVT (1UL << 34) +#define HWCAP_F8MM8 (1UL << 35) +#define HWCAP_F8MM4 (1UL << 36) +#define HWCAP_SVE_F16MM (1UL << 37) +#define HWCAP_SVE_ELTPERM (1UL << 38) +#define HWCAP_SVE_AES2 (1UL << 39) +#define HWCAP_SVE_BFSCALE (1UL << 40) +#define HWCAP_SVE2P2 (1UL << 41) +#define HWCAP_SME2P2 (1UL << 42) +#define HWCAP_SME_SBITPERM (1UL << 43) +#define HWCAP_SME_AES (1UL << 44) +#define HWCAP_SME_SFEXPA (1UL << 45) +#define HWCAP_SME_STMOP (1UL << 46) +#define HWCAP_SME_SMOP4 (1UL << 47) /* * HWCAP2 flags - for AT_HWCAP2 @@ -92,5 +108,42 @@ #define HWCAP2_SME_FA64 (1 << 30) #define HWCAP2_WFXT (1UL << 31) #define HWCAP2_EBF16 (1UL << 32) +#define HWCAP2_SVE_EBF16 (1UL << 33) +#define HWCAP2_CSSC (1UL << 34) +#define HWCAP2_RPRFM (1UL << 35) +#define HWCAP2_SVE2P1 (1UL << 36) +#define HWCAP2_SME2 (1UL << 37) +#define HWCAP2_SME2P1 (1UL << 38) +#define HWCAP2_SME_I16I32 (1UL << 39) +#define HWCAP2_SME_BI32I32 (1UL << 40) +#define HWCAP2_SME_B16B16 (1UL << 41) +#define HWCAP2_SME_F16F16 (1UL << 42) +#define HWCAP2_MOPS (1UL << 43) +#define HWCAP2_HBC (1UL << 44) +#define HWCAP2_SVE_B16B16 (1UL << 45) +#define HWCAP2_LRCPC3 (1UL << 46) +#define HWCAP2_LSE128 (1UL << 47) +#define HWCAP2_FPMR (1UL << 48) +#define HWCAP2_LUT (1UL << 49) +#define HWCAP2_FAMINMAX (1UL << 50) +#define HWCAP2_F8CVT (1UL << 51) +#define HWCAP2_F8FMA (1UL << 52) +#define HWCAP2_F8DP4 (1UL << 53) +#define HWCAP2_F8DP2 (1UL << 54) +#define HWCAP2_F8E4M3 (1UL << 55) +#define HWCAP2_F8E5M2 (1UL << 56) +#define HWCAP2_SME_LUTV2 (1UL << 57) +#define HWCAP2_SME_F8F16 (1UL << 58) +#define HWCAP2_SME_F8F32 (1UL << 59) +#define HWCAP2_SME_SF8FMA (1UL << 60) +#define HWCAP2_SME_SF8DP4 (1UL << 61) +#define HWCAP2_SME_SF8DP2 (1UL << 62) +#define HWCAP2_POE (1UL << 63) + +/* + * HWCAP3 flags - for AT_HWCAP3 + */ +#define HWCAP3_MTE_FAR (1UL << 0) +#define HWCAP3_MTE_STORE_ONLY (1UL << 1) #endif /* _UAPI__ASM_HWCAP_H */ diff --git a/arch/arm64/include/uapi/asm/kvm.h b/arch/arm64/include/uapi/asm/kvm.h index 3bb134355874..ed5f3892674c 100644 --- a/arch/arm64/include/uapi/asm/kvm.h +++ b/arch/arm64/include/uapi/asm/kvm.h @@ -37,15 +37,11 @@ #include <asm/ptrace.h> #include <asm/sve_context.h> -#define __KVM_HAVE_GUEST_DEBUG #define __KVM_HAVE_IRQ_LINE -#define __KVM_HAVE_READONLY_MEM #define __KVM_HAVE_VCPU_EVENTS #define KVM_COALESCED_MMIO_PAGE_OFFSET 1 - -#define KVM_REG_SIZE(id) \ - (1U << (((id) & KVM_REG_SIZE_MASK) >> KVM_REG_SIZE_SHIFT)) +#define KVM_DIRTY_LOG_PAGE_OFFSET 64 struct kvm_regs { struct user_pt_regs regs; /* sp = sp_el0 */ @@ -75,9 +71,11 @@ struct kvm_regs { /* KVM_ARM_SET_DEVICE_ADDR ioctl id encoding */ #define KVM_ARM_DEVICE_TYPE_SHIFT 0 -#define KVM_ARM_DEVICE_TYPE_MASK (0xffff << KVM_ARM_DEVICE_TYPE_SHIFT) +#define KVM_ARM_DEVICE_TYPE_MASK __GENMASK(KVM_ARM_DEVICE_TYPE_SHIFT + 15, \ + KVM_ARM_DEVICE_TYPE_SHIFT) #define KVM_ARM_DEVICE_ID_SHIFT 16 -#define KVM_ARM_DEVICE_ID_MASK (0xffff << KVM_ARM_DEVICE_ID_SHIFT) +#define KVM_ARM_DEVICE_ID_MASK __GENMASK(KVM_ARM_DEVICE_ID_SHIFT + 15, \ + KVM_ARM_DEVICE_ID_SHIFT) /* Supported device IDs */ #define KVM_ARM_DEVICE_VGIC_V2 0 @@ -106,6 +104,8 @@ struct kvm_regs { #define KVM_ARM_VCPU_SVE 4 /* enable SVE for this CPU */ #define KVM_ARM_VCPU_PTRAUTH_ADDRESS 5 /* VCPU uses address authentication */ #define KVM_ARM_VCPU_PTRAUTH_GENERIC 6 /* VCPU uses generic authentication */ +#define KVM_ARM_VCPU_HAS_EL2 7 /* Support nested virtualization */ +#define KVM_ARM_VCPU_HAS_EL2_E2H0 8 /* Limit NV support to E2H RES0 */ struct kvm_vcpu_init { __u32 target; @@ -158,6 +158,11 @@ struct kvm_sync_regs { __u64 device_irq_level; }; +/* Bits for run->s.regs.device_irq_level */ +#define KVM_ARM_DEV_EL1_VTIMER (1 << 0) +#define KVM_ARM_DEV_EL1_PTIMER (1 << 1) +#define KVM_ARM_DEV_PMU (1 << 2) + /* * PMU filter structure. Describe a range of events with a particular * action. To be used with KVM_ARM_VCPU_PMU_V3_FILTER. @@ -194,6 +199,15 @@ struct kvm_arm_copy_mte_tags { __u64 reserved[2]; }; +/* + * Counter/Timer offset structure. Describe the virtual/physical offset. + * To be used with KVM_ARM_SET_COUNTER_OFFSET. + */ +struct kvm_arm_counter_offset { + __u64 counter_offset; + __u64 reserved; +}; + #define KVM_ARM_TAGS_TO_GUEST 0 #define KVM_ARM_TAGS_FROM_GUEST 1 @@ -358,6 +372,7 @@ enum { #endif }; +/* Vendor hyper call function numbers 0-63 */ #define KVM_REG_ARM_VENDOR_HYP_BMAP KVM_REG_ARM_FW_FEAT_BMAP_REG(2) enum { @@ -368,6 +383,21 @@ enum { #endif }; +/* Vendor hyper call function numbers 64-127 */ +#define KVM_REG_ARM_VENDOR_HYP_BMAP_2 KVM_REG_ARM_FW_FEAT_BMAP_REG(3) + +enum { + KVM_REG_ARM_VENDOR_HYP_BIT_DISCOVER_IMPL_VER = 0, + KVM_REG_ARM_VENDOR_HYP_BIT_DISCOVER_IMPL_CPUS = 1, +#ifdef __KERNEL__ + KVM_REG_ARM_VENDOR_HYP_BMAP_2_BIT_COUNT, +#endif +}; + +/* Device Control API on vm fd */ +#define KVM_ARM_VM_SMCCC_CTRL 0 +#define KVM_ARM_VM_SMCCC_FILTER 0 + /* Device Control API: ARM VGIC */ #define KVM_DEV_ARM_VGIC_GRP_ADDR 0 #define KVM_DEV_ARM_VGIC_GRP_DIST_REGS 1 @@ -386,6 +416,7 @@ enum { #define KVM_DEV_ARM_VGIC_GRP_CPU_SYSREGS 6 #define KVM_DEV_ARM_VGIC_GRP_LEVEL_INFO 7 #define KVM_DEV_ARM_VGIC_GRP_ITS_REGS 8 +#define KVM_DEV_ARM_VGIC_GRP_MAINT_IRQ 9 #define KVM_DEV_ARM_VGIC_LINE_LEVEL_INFO_SHIFT 10 #define KVM_DEV_ARM_VGIC_LINE_LEVEL_INFO_MASK \ (0x3fffffULL << KVM_DEV_ARM_VGIC_LINE_LEVEL_INFO_SHIFT) @@ -400,13 +431,16 @@ enum { /* Device Control API on vcpu fd */ #define KVM_ARM_VCPU_PMU_V3_CTRL 0 -#define KVM_ARM_VCPU_PMU_V3_IRQ 0 -#define KVM_ARM_VCPU_PMU_V3_INIT 1 -#define KVM_ARM_VCPU_PMU_V3_FILTER 2 -#define KVM_ARM_VCPU_PMU_V3_SET_PMU 3 +#define KVM_ARM_VCPU_PMU_V3_IRQ 0 +#define KVM_ARM_VCPU_PMU_V3_INIT 1 +#define KVM_ARM_VCPU_PMU_V3_FILTER 2 +#define KVM_ARM_VCPU_PMU_V3_SET_PMU 3 +#define KVM_ARM_VCPU_PMU_V3_SET_NR_COUNTERS 4 #define KVM_ARM_VCPU_TIMER_CTRL 1 #define KVM_ARM_VCPU_TIMER_IRQ_VTIMER 0 #define KVM_ARM_VCPU_TIMER_IRQ_PTIMER 1 +#define KVM_ARM_VCPU_TIMER_IRQ_HVTIMER 2 +#define KVM_ARM_VCPU_TIMER_IRQ_HPTIMER 3 #define KVM_ARM_VCPU_PVTIME_CTRL 2 #define KVM_ARM_VCPU_PVTIME_IPA 0 @@ -462,9 +496,68 @@ enum { */ #define KVM_SYSTEM_EVENT_RESET_FLAG_PSCI_RESET2 (1ULL << 0) +/* + * Shutdown caused by a PSCI v1.3 SYSTEM_OFF2 call. + * Valid only when the system event has a type of KVM_SYSTEM_EVENT_SHUTDOWN. + */ +#define KVM_SYSTEM_EVENT_SHUTDOWN_FLAG_PSCI_OFF2 (1ULL << 0) + /* run->fail_entry.hardware_entry_failure_reason codes. */ #define KVM_EXIT_FAIL_ENTRY_CPU_UNSUPPORTED (1ULL << 0) +enum kvm_smccc_filter_action { + KVM_SMCCC_FILTER_HANDLE = 0, + KVM_SMCCC_FILTER_DENY, + KVM_SMCCC_FILTER_FWD_TO_USER, + +#ifdef __KERNEL__ + NR_SMCCC_FILTER_ACTIONS +#endif +}; + +struct kvm_smccc_filter { + __u32 base; + __u32 nr_functions; + __u8 action; + __u8 pad[15]; +}; + +/* arm64-specific KVM_EXIT_HYPERCALL flags */ +#define KVM_HYPERCALL_EXIT_SMC (1U << 0) +#define KVM_HYPERCALL_EXIT_16BIT (1U << 1) + +/* + * Get feature ID registers userspace writable mask. + * + * From DDI0487J.a, D19.2.66 ("ID_AA64MMFR2_EL1, AArch64 Memory Model + * Feature Register 2"): + * + * "The Feature ID space is defined as the System register space in + * AArch64 with op0==3, op1=={0, 1, 3}, CRn==0, CRm=={0-7}, + * op2=={0-7}." + * + * This covers all currently known R/O registers that indicate + * anything useful feature wise, including the ID registers. + * + * If we ever need to introduce a new range, it will be described as + * such in the range field. + */ +#define KVM_ARM_FEATURE_ID_RANGE_IDX(op0, op1, crn, crm, op2) \ + ({ \ + __u64 __op1 = (op1) & 3; \ + __op1 -= (__op1 == 3); \ + (__op1 << 6 | ((crm) & 7) << 3 | (op2)); \ + }) + +#define KVM_ARM_FEATURE_ID_RANGE 0 +#define KVM_ARM_FEATURE_ID_RANGE_SIZE (3 * 8 * 8) + +struct reg_mask_range { + __u64 addr; /* Pointer to mask array */ + __u32 range; /* Requested range */ + __u32 reserved[13]; +}; + #endif #endif /* __ARM_KVM_H__ */ diff --git a/arch/arm64/include/uapi/asm/mman.h b/arch/arm64/include/uapi/asm/mman.h index 1e6482a838e1..e7e0c8216243 100644 --- a/arch/arm64/include/uapi/asm/mman.h +++ b/arch/arm64/include/uapi/asm/mman.h @@ -7,4 +7,13 @@ #define PROT_BTI 0x10 /* BTI guarded page */ #define PROT_MTE 0x20 /* Normal Tagged mapping */ +/* Override any generic PKEY permission defines */ +#define PKEY_DISABLE_EXECUTE 0x4 +#define PKEY_DISABLE_READ 0x8 +#undef PKEY_ACCESS_MASK +#define PKEY_ACCESS_MASK (PKEY_DISABLE_ACCESS |\ + PKEY_DISABLE_WRITE |\ + PKEY_DISABLE_READ |\ + PKEY_DISABLE_EXECUTE) + #endif /* ! _UAPI__ASM_MMAN_H */ diff --git a/arch/arm64/include/uapi/asm/perf_regs.h b/arch/arm64/include/uapi/asm/perf_regs.h index d54daafa89e3..86e556429e0e 100644 --- a/arch/arm64/include/uapi/asm/perf_regs.h +++ b/arch/arm64/include/uapi/asm/perf_regs.h @@ -37,5 +37,12 @@ enum perf_event_arm_regs { PERF_REG_ARM64_SP, PERF_REG_ARM64_PC, PERF_REG_ARM64_MAX, + + /* Extended/pseudo registers */ + PERF_REG_ARM64_VG = 46, /* SVE Vector Granule */ + PERF_REG_ARM64_EXTENDED_MAX }; + +#define PERF_REG_EXTENDED_MASK (1ULL << PERF_REG_ARM64_VG) + #endif /* _ASM_ARM64_PERF_REGS_H */ diff --git a/arch/arm64/include/uapi/asm/ptrace.h b/arch/arm64/include/uapi/asm/ptrace.h index 7fa2f7036aa7..0f39ba4f3efd 100644 --- a/arch/arm64/include/uapi/asm/ptrace.h +++ b/arch/arm64/include/uapi/asm/ptrace.h @@ -324,6 +324,14 @@ struct user_za_header { #define ZA_PT_SIZE(vq) \ (ZA_PT_ZA_OFFSET + ZA_PT_ZA_SIZE(vq)) +/* GCS state (NT_ARM_GCS) */ + +struct user_gcs { + __u64 features_enabled; + __u64 features_locked; + __u64 gcspr_el0; +}; + #endif /* __ASSEMBLY__ */ #endif /* _UAPI__ASM_PTRACE_H */ diff --git a/arch/arm64/include/uapi/asm/sigcontext.h b/arch/arm64/include/uapi/asm/sigcontext.h index 4aaf31e3bf16..d42f7a92238b 100644 --- a/arch/arm64/include/uapi/asm/sigcontext.h +++ b/arch/arm64/include/uapi/asm/sigcontext.h @@ -62,6 +62,10 @@ struct sigcontext { * context. Such structures must be placed after the rt_sigframe on the stack * and be 16-byte aligned. The last structure must be a dummy one with the * magic and size set to 0. + * + * Note that the values allocated for use as magic should be chosen to + * be meaningful in ASCII to aid manual parsing, ZA doesn't follow this + * convention due to oversight but it should be observed for future additions. */ struct _aarch64_ctx { __u32 magic; @@ -94,6 +98,13 @@ struct esr_context { __u64 esr; }; +#define POE_MAGIC 0x504f4530 + +struct poe_context { + struct _aarch64_ctx head; + __u64 por_el0; +}; + /* * extra_context: describes extra space in the signal frame for * additional structures that don't fit in sigcontext.__reserved[]. @@ -140,6 +151,22 @@ struct sve_context { #define SVE_SIG_FLAG_SM 0x1 /* Context describes streaming mode */ +/* TPIDR2_EL0 context */ +#define TPIDR2_MAGIC 0x54504902 + +struct tpidr2_context { + struct _aarch64_ctx head; + __u64 tpidr2; +}; + +/* FPMR context */ +#define FPMR_MAGIC 0x46504d52 + +struct fpmr_context { + struct _aarch64_ctx head; + __u64 fpmr; +}; + #define ZA_MAGIC 0x54366345 struct za_context { @@ -148,6 +175,23 @@ struct za_context { __u16 __reserved[3]; }; +#define ZT_MAGIC 0x5a544e01 + +struct zt_context { + struct _aarch64_ctx head; + __u16 nregs; + __u16 __reserved[3]; +}; + +#define GCS_MAGIC 0x47435300 + +struct gcs_context { + struct _aarch64_ctx head; + __u64 gcspr; + __u64 features_enabled; + __u64 reserved; +}; + #endif /* !__ASSEMBLY__ */ #include <asm/sve_context.h> @@ -157,7 +201,7 @@ struct za_context { * vector length beyond its initial architectural limit of 2048 bits * (16 quadwords). * - * See linux/Documentation/arm64/sve.rst for a description of the VL/VQ + * See linux/Documentation/arch/arm64/sve.rst for a description of the VL/VQ * terminology. */ #define SVE_VQ_BYTES __SVE_VQ_BYTES /* bytes per quadword */ @@ -292,12 +336,23 @@ struct za_context { ((sizeof(struct za_context) + (__SVE_VQ_BYTES - 1)) \ / __SVE_VQ_BYTES * __SVE_VQ_BYTES) -#define ZA_SIG_REGS_SIZE(vq) ((vq * __SVE_VQ_BYTES) * (vq * __SVE_VQ_BYTES)) +#define ZA_SIG_REGS_SIZE(vq) (((vq) * __SVE_VQ_BYTES) * ((vq) * __SVE_VQ_BYTES)) #define ZA_SIG_ZAV_OFFSET(vq, n) (ZA_SIG_REGS_OFFSET + \ - (SVE_SIG_ZREG_SIZE(vq) * n)) + (SVE_SIG_ZREG_SIZE(vq) * (n))) #define ZA_SIG_CONTEXT_SIZE(vq) \ (ZA_SIG_REGS_OFFSET + ZA_SIG_REGS_SIZE(vq)) +#define ZT_SIG_REG_SIZE 512 + +#define ZT_SIG_REG_BYTES (ZT_SIG_REG_SIZE / 8) + +#define ZT_SIG_REGS_OFFSET sizeof(struct zt_context) + +#define ZT_SIG_REGS_SIZE(n) (ZT_SIG_REG_BYTES * (n)) + +#define ZT_SIG_CONTEXT_SIZE(n) \ + (sizeof(struct zt_context) + ZT_SIG_REGS_SIZE(n)) + #endif /* _UAPI__ASM_SIGCONTEXT_H */ diff --git a/arch/arm64/include/uapi/asm/sve_context.h b/arch/arm64/include/uapi/asm/sve_context.h index 754ab751b523..72aefc081061 100644 --- a/arch/arm64/include/uapi/asm/sve_context.h +++ b/arch/arm64/include/uapi/asm/sve_context.h @@ -13,6 +13,17 @@ #define __SVE_VQ_BYTES 16 /* number of bytes per quadword */ +/* + * Yes, __SVE_VQ_MAX is 512 QUADWORDS. + * + * To help ensure forward portability, this is much larger than the + * current maximum value defined by the SVE architecture. While arrays + * or static allocations can be sized based on this value, watch out! + * It will waste a surprisingly large amount of memory. + * + * Dynamic sizing based on the actual runtime vector length is likely to + * be preferable for most purposes. + */ #define __SVE_VQ_MIN 1 #define __SVE_VQ_MAX 512 diff --git a/arch/arm64/include/uapi/asm/unistd.h b/arch/arm64/include/uapi/asm/unistd.h index ce2ee8f1e361..df36f23876e8 100644 --- a/arch/arm64/include/uapi/asm/unistd.h +++ b/arch/arm64/include/uapi/asm/unistd.h @@ -1,25 +1,2 @@ /* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ -/* - * Copyright (C) 2012 ARM Ltd. - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 as - * published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program. If not, see <http://www.gnu.org/licenses/>. - */ - -#define __ARCH_WANT_RENAMEAT -#define __ARCH_WANT_NEW_STAT -#define __ARCH_WANT_SET_GET_RLIMIT -#define __ARCH_WANT_TIME32_SYSCALLS -#define __ARCH_WANT_SYS_CLONE3 -#define __ARCH_WANT_MEMFD_SECRET - -#include <asm-generic/unistd.h> +#include <asm/unistd_64.h> |
