aboutsummaryrefslogtreecommitdiffstatshomepage
path: root/arch/arm64/include
diff options
context:
space:
mode:
Diffstat (limited to 'arch/arm64/include')
-rw-r--r--arch/arm64/include/asm/Kbuild12
-rw-r--r--arch/arm64/include/asm/acpi.h36
-rw-r--r--arch/arm64/include/asm/alternative-macros.h98
-rw-r--r--arch/arm64/include/asm/alternative.h7
-rw-r--r--arch/arm64/include/asm/apple_m1_pmu.h1
-rw-r--r--arch/arm64/include/asm/arch_gicv3.h28
-rw-r--r--arch/arm64/include/asm/arch_timer.h10
-rw-r--r--arch/arm64/include/asm/archrandom.h48
-rw-r--r--arch/arm64/include/asm/arm_pmuv3.h191
-rw-r--r--arch/arm64/include/asm/asm-bug.h34
-rw-r--r--arch/arm64/include/asm/asm-extable.h13
-rw-r--r--arch/arm64/include/asm/asm-uaccess.h6
-rw-r--r--arch/arm64/include/asm/assembler.h163
-rw-r--r--arch/arm64/include/asm/atomic.h28
-rw-r--r--arch/arm64/include/asm/atomic_ll_sc.h112
-rw-r--r--arch/arm64/include/asm/atomic_lse.h102
-rw-r--r--arch/arm64/include/asm/barrier.h28
-rw-r--r--arch/arm64/include/asm/brk-imm.h11
-rw-r--r--arch/arm64/include/asm/cache.h45
-rw-r--r--arch/arm64/include/asm/cacheflush.h8
-rw-r--r--arch/arm64/include/asm/cfi.h7
-rw-r--r--arch/arm64/include/asm/cmpxchg.h55
-rw-r--r--arch/arm64/include/asm/compat.h6
-rw-r--r--arch/arm64/include/asm/compiler.h36
-rw-r--r--arch/arm64/include/asm/cpu.h14
-rw-r--r--arch/arm64/include/asm/cpucaps.h82
-rw-r--r--arch/arm64/include/asm/cpufeature.h426
-rw-r--r--arch/arm64/include/asm/cputype.h113
-rw-r--r--arch/arm64/include/asm/crash_reserve.h10
-rw-r--r--arch/arm64/include/asm/daifflags.h2
-rw-r--r--arch/arm64/include/asm/debug-monitors.h42
-rw-r--r--arch/arm64/include/asm/efi.h59
-rw-r--r--arch/arm64/include/asm/el2_setup.h418
-rw-r--r--arch/arm64/include/asm/elf.h10
-rw-r--r--arch/arm64/include/asm/esr.h285
-rw-r--r--arch/arm64/include/asm/exception.h43
-rw-r--r--arch/arm64/include/asm/extable.h4
-rw-r--r--arch/arm64/include/asm/fb.h23
-rw-r--r--arch/arm64/include/asm/fixmap.h28
-rw-r--r--arch/arm64/include/asm/fpsimd.h156
-rw-r--r--arch/arm64/include/asm/fpsimdmacros.h28
-rw-r--r--arch/arm64/include/asm/fpu.h15
-rw-r--r--arch/arm64/include/asm/ftrace.h164
-rw-r--r--arch/arm64/include/asm/gcs.h107
-rw-r--r--arch/arm64/include/asm/hardirq.h4
-rw-r--r--arch/arm64/include/asm/hugetlb.h69
-rw-r--r--arch/arm64/include/asm/hw_breakpoint.h13
-rw-r--r--arch/arm64/include/asm/hwcap.h63
-rw-r--r--arch/arm64/include/asm/hyperv-tlfs.h78
-rw-r--r--arch/arm64/include/asm/hypervisor.h12
-rw-r--r--arch/arm64/include/asm/image.h2
-rw-r--r--arch/arm64/include/asm/insn.h188
-rw-r--r--arch/arm64/include/asm/io.h217
-rw-r--r--arch/arm64/include/asm/irq.h5
-rw-r--r--arch/arm64/include/asm/irq_work.h2
-rw-r--r--arch/arm64/include/asm/irqflags.h186
-rw-r--r--arch/arm64/include/asm/jump_label.h49
-rw-r--r--arch/arm64/include/asm/kasan.h24
-rw-r--r--arch/arm64/include/asm/kernel-pgtable.h162
-rw-r--r--arch/arm64/include/asm/kexec.h8
-rw-r--r--arch/arm64/include/asm/kfence.h10
-rw-r--r--arch/arm64/include/asm/kgdb.h12
-rw-r--r--arch/arm64/include/asm/kprobes.h10
-rw-r--r--arch/arm64/include/asm/kvm_arm.h224
-rw-r--r--arch/arm64/include/asm/kvm_asm.h80
-rw-r--r--arch/arm64/include/asm/kvm_emulate.h331
-rw-r--r--arch/arm64/include/asm/kvm_host.h1352
-rw-r--r--arch/arm64/include/asm/kvm_hyp.h66
-rw-r--r--arch/arm64/include/asm/kvm_mmu.h177
-rw-r--r--arch/arm64/include/asm/kvm_nested.h350
-rw-r--r--arch/arm64/include/asm/kvm_pgtable.h468
-rw-r--r--arch/arm64/include/asm/kvm_pkvm.h135
-rw-r--r--arch/arm64/include/asm/kvm_ptrauth.h21
-rw-r--r--arch/arm64/include/asm/kvm_ras.h25
-rw-r--r--arch/arm64/include/asm/linkage.h8
-rw-r--r--arch/arm64/include/asm/lse.h13
-rw-r--r--arch/arm64/include/asm/mem_encrypt.h37
-rw-r--r--arch/arm64/include/asm/memory.h149
-rw-r--r--arch/arm64/include/asm/mman.h61
-rw-r--r--arch/arm64/include/asm/mmu.h41
-rw-r--r--arch/arm64/include/asm/mmu_context.h144
-rw-r--r--arch/arm64/include/asm/mmzone.h13
-rw-r--r--arch/arm64/include/asm/module.h26
-rw-r--r--arch/arm64/include/asm/module.lds.h10
-rw-r--r--arch/arm64/include/asm/mshyperv.h24
-rw-r--r--arch/arm64/include/asm/mte-kasan.h81
-rw-r--r--arch/arm64/include/asm/mte.h157
-rw-r--r--arch/arm64/include/asm/page-def.h5
-rw-r--r--arch/arm64/include/asm/page.h4
-rw-r--r--arch/arm64/include/asm/pci.h18
-rw-r--r--arch/arm64/include/asm/percpu.h30
-rw-r--r--arch/arm64/include/asm/perf_event.h253
-rw-r--r--arch/arm64/include/asm/pgalloc.h44
-rw-r--r--arch/arm64/include/asm/pgtable-hwdef.h99
-rw-r--r--arch/arm64/include/asm/pgtable-prot.h194
-rw-r--r--arch/arm64/include/asm/pgtable-types.h26
-rw-r--r--arch/arm64/include/asm/pgtable.h1330
-rw-r--r--arch/arm64/include/asm/pkeys.h106
-rw-r--r--arch/arm64/include/asm/pointer_auth.h13
-rw-r--r--arch/arm64/include/asm/por.h34
-rw-r--r--arch/arm64/include/asm/probes.h11
-rw-r--r--arch/arm64/include/asm/processor.h107
-rw-r--r--arch/arm64/include/asm/ptdump.h65
-rw-r--r--arch/arm64/include/asm/ptrace.h57
-rw-r--r--arch/arm64/include/asm/rqspinlock.h93
-rw-r--r--arch/arm64/include/asm/rsi.h70
-rw-r--r--arch/arm64/include/asm/rsi_cmds.h162
-rw-r--r--arch/arm64/include/asm/rsi_smc.h193
-rw-r--r--arch/arm64/include/asm/runtime-const.h88
-rw-r--r--arch/arm64/include/asm/rwonce.h4
-rw-r--r--arch/arm64/include/asm/scs.h37
-rw-r--r--arch/arm64/include/asm/sdei.h23
-rw-r--r--arch/arm64/include/asm/seccomp.h12
-rw-r--r--arch/arm64/include/asm/sections.h1
-rw-r--r--arch/arm64/include/asm/semihost.h24
-rw-r--r--arch/arm64/include/asm/set_memory.h5
-rw-r--r--arch/arm64/include/asm/setup.h33
-rw-r--r--arch/arm64/include/asm/simd.h11
-rw-r--r--arch/arm64/include/asm/smp.h52
-rw-r--r--arch/arm64/include/asm/sparsemem.h7
-rw-r--r--arch/arm64/include/asm/spectre.h28
-rw-r--r--arch/arm64/include/asm/spinlock_types.h2
-rw-r--r--arch/arm64/include/asm/stackprotector.h9
-rw-r--r--arch/arm64/include/asm/stacktrace.h140
-rw-r--r--arch/arm64/include/asm/stacktrace/common.h171
-rw-r--r--arch/arm64/include/asm/stacktrace/frame.h48
-rw-r--r--arch/arm64/include/asm/stacktrace/nvhe.h55
-rw-r--r--arch/arm64/include/asm/stage2_pgtable.h24
-rw-r--r--arch/arm64/include/asm/syscall.h32
-rw-r--r--arch/arm64/include/asm/syscall_wrapper.h9
-rw-r--r--arch/arm64/include/asm/sysreg.h1159
-rw-r--r--arch/arm64/include/asm/system_misc.h6
-rw-r--r--arch/arm64/include/asm/text-patching.h (renamed from arch/arm64/include/asm/patching.h)4
-rw-r--r--arch/arm64/include/asm/thread_info.h28
-rw-r--r--arch/arm64/include/asm/tlb.h50
-rw-r--r--arch/arm64/include/asm/tlbbatch.h12
-rw-r--r--arch/arm64/include/asm/tlbflush.h327
-rw-r--r--arch/arm64/include/asm/topology.h12
-rw-r--r--arch/arm64/include/asm/traps.h82
-rw-r--r--arch/arm64/include/asm/uaccess.h293
-rw-r--r--arch/arm64/include/asm/unistd.h22
-rw-r--r--arch/arm64/include/asm/unistd32.h915
-rw-r--r--arch/arm64/include/asm/uprobes.h23
-rw-r--r--arch/arm64/include/asm/vdso.h17
-rw-r--r--arch/arm64/include/asm/vdso/compat_gettimeofday.h50
-rw-r--r--arch/arm64/include/asm/vdso/getrandom.h38
-rw-r--r--arch/arm64/include/asm/vdso/gettimeofday.h36
-rw-r--r--arch/arm64/include/asm/vdso/vsyscall.h16
-rw-r--r--arch/arm64/include/asm/vectors.h2
-rw-r--r--arch/arm64/include/asm/virt.h28
-rw-r--r--arch/arm64/include/asm/vmalloc.h45
-rw-r--r--arch/arm64/include/asm/vncr_mapping.h110
-rw-r--r--arch/arm64/include/asm/word-at-a-time.h18
-rw-r--r--arch/arm64/include/uapi/asm/Kbuild1
-rw-r--r--arch/arm64/include/uapi/asm/hwcap.h55
-rw-r--r--arch/arm64/include/uapi/asm/kvm.h115
-rw-r--r--arch/arm64/include/uapi/asm/mman.h9
-rw-r--r--arch/arm64/include/uapi/asm/perf_regs.h7
-rw-r--r--arch/arm64/include/uapi/asm/ptrace.h8
-rw-r--r--arch/arm64/include/uapi/asm/sigcontext.h61
-rw-r--r--arch/arm64/include/uapi/asm/sve_context.h11
-rw-r--r--arch/arm64/include/uapi/asm/unistd.h25
162 files changed, 10649 insertions, 4886 deletions
diff --git a/arch/arm64/include/asm/Kbuild b/arch/arm64/include/asm/Kbuild
index 5c8ee5a541d2..d2ff8f6c3231 100644
--- a/arch/arm64/include/asm/Kbuild
+++ b/arch/arm64/include/asm/Kbuild
@@ -1,10 +1,20 @@
# SPDX-License-Identifier: GPL-2.0
+syscall-y += syscall_table_32.h
+syscall-y += syscall_table_64.h
+
+# arm32 syscall table used by lib/compat_audit.c:
+syscall-y += unistd_32.h
+# same constants with prefixes, used by vdso, seccomp and sigreturn:
+syscall-y += unistd_compat_32.h
+
generic-y += early_ioremap.h
+generic-y += fprobe.h
generic-y += mcs_spinlock.h
+generic-y += mmzone.h
generic-y += qrwlock.h
generic-y += qspinlock.h
generic-y += parport.h
generic-y += user.h
-generated-y += cpucaps.h
+generated-y += cpucap-defs.h
generated-y += sysreg-defs.h
diff --git a/arch/arm64/include/asm/acpi.h b/arch/arm64/include/asm/acpi.h
index bd68e1b7f29f..c07a58b96329 100644
--- a/arch/arm64/include/asm/acpi.h
+++ b/arch/arm64/include/asm/acpi.h
@@ -9,6 +9,7 @@
#ifndef _ASM_ACPI_H
#define _ASM_ACPI_H
+#include <linux/cpuidle.h>
#include <linux/efi.h>
#include <linux/memblock.h>
#include <linux/psci.h>
@@ -42,6 +43,27 @@
#define ACPI_MADT_GICC_SPE (offsetof(struct acpi_madt_generic_interrupt, \
spe_interrupt) + sizeof(u16))
+#define ACPI_MADT_GICC_TRBE (offsetof(struct acpi_madt_generic_interrupt, \
+ trbe_interrupt) + sizeof(u16))
+/*
+ * Arm® Functional Fixed Hardware Specification Version 1.2.
+ * Table 2: Arm Architecture context loss flags
+ */
+#define CPUIDLE_CORE_CTXT BIT(0) /* Core context Lost */
+
+static inline unsigned int arch_get_idle_state_flags(u32 arch_flags)
+{
+ if (arch_flags & CPUIDLE_CORE_CTXT)
+ return CPUIDLE_FLAG_TIMER_STOP;
+
+ return 0;
+}
+#define arch_get_idle_state_flags arch_get_idle_state_flags
+
+#define CPUIDLE_TRACE_CTXT BIT(1) /* Trace context loss */
+#define CPUIDLE_GICR_CTXT BIT(2) /* GICR */
+#define CPUIDLE_GICD_CTXT BIT(3) /* GICD */
+
/* Basic configuration for ACPI */
#ifdef CONFIG_ACPI
pgprot_t __acpi_get_mem_attribute(phys_addr_t addr);
@@ -97,6 +119,18 @@ static inline u32 get_acpi_id_for_cpu(unsigned int cpu)
return acpi_cpu_get_madt_gicc(cpu)->uid;
}
+static inline int get_cpu_for_acpi_id(u32 uid)
+{
+ int cpu;
+
+ for (cpu = 0; cpu < nr_cpu_ids; cpu++)
+ if (acpi_cpu_get_madt_gicc(cpu) &&
+ uid == get_acpi_id_for_cpu(cpu))
+ return cpu;
+
+ return -EINVAL;
+}
+
static inline void arch_fix_phys_package_id(int num, u32 slot) { }
void __init acpi_init_cpus(void);
int apei_claim_sea(struct pt_regs *regs);
@@ -116,7 +150,7 @@ acpi_set_mailbox_entry(int cpu, struct acpi_madt_generic_interrupt *processor)
{}
#endif
-static inline const char *acpi_get_enable_method(int cpu)
+static __always_inline const char *acpi_get_enable_method(int cpu)
{
if (acpi_psci_present())
return "psci";
diff --git a/arch/arm64/include/asm/alternative-macros.h b/arch/arm64/include/asm/alternative-macros.h
index 7e157ab6cd50..c8c77f9e36d6 100644
--- a/arch/arm64/include/asm/alternative-macros.h
+++ b/arch/arm64/include/asm/alternative-macros.h
@@ -2,26 +2,38 @@
#ifndef __ASM_ALTERNATIVE_MACROS_H
#define __ASM_ALTERNATIVE_MACROS_H
+#include <linux/const.h>
+#include <vdso/bits.h>
+
#include <asm/cpucaps.h>
#include <asm/insn-def.h>
-#define ARM64_CB_PATCH ARM64_NCAPS
+/*
+ * Binutils 2.27.0 can't handle a 'UL' suffix on constants, so for the assembly
+ * macros below we must use we must use `(1 << ARM64_CB_SHIFT)`.
+ */
+#define ARM64_CB_SHIFT 15
+#define ARM64_CB_BIT BIT(ARM64_CB_SHIFT)
+
+#if ARM64_NCAPS >= ARM64_CB_BIT
+#error "cpucaps have overflown ARM64_CB_BIT"
+#endif
#ifndef __ASSEMBLY__
#include <linux/stringify.h>
-#define ALTINSTR_ENTRY(feature) \
+#define ALTINSTR_ENTRY(cpucap) \
" .word 661b - .\n" /* label */ \
" .word 663f - .\n" /* new instruction */ \
- " .hword " __stringify(feature) "\n" /* feature bit */ \
+ " .hword " __stringify(cpucap) "\n" /* cpucap */ \
" .byte 662b-661b\n" /* source len */ \
" .byte 664f-663f\n" /* replacement len */
-#define ALTINSTR_ENTRY_CB(feature, cb) \
+#define ALTINSTR_ENTRY_CB(cpucap, cb) \
" .word 661b - .\n" /* label */ \
- " .word " __stringify(cb) "- .\n" /* callback */ \
- " .hword " __stringify(feature) "\n" /* feature bit */ \
+ " .word " __stringify(cb) "- .\n" /* callback */ \
+ " .hword " __stringify(cpucap) "\n" /* cpucap */ \
" .byte 662b-661b\n" /* source len */ \
" .byte 664f-663f\n" /* replacement len */
@@ -41,13 +53,13 @@
*
* Alternatives with callbacks do not generate replacement instructions.
*/
-#define __ALTERNATIVE_CFG(oldinstr, newinstr, feature, cfg_enabled) \
+#define __ALTERNATIVE_CFG(oldinstr, newinstr, cpucap, cfg_enabled) \
".if "__stringify(cfg_enabled)" == 1\n" \
"661:\n\t" \
oldinstr "\n" \
"662:\n" \
".pushsection .altinstructions,\"a\"\n" \
- ALTINSTR_ENTRY(feature) \
+ ALTINSTR_ENTRY(cpucap) \
".popsection\n" \
".subsection 1\n" \
"663:\n\t" \
@@ -58,31 +70,31 @@
".previous\n" \
".endif\n"
-#define __ALTERNATIVE_CFG_CB(oldinstr, feature, cfg_enabled, cb) \
+#define __ALTERNATIVE_CFG_CB(oldinstr, cpucap, cfg_enabled, cb) \
".if "__stringify(cfg_enabled)" == 1\n" \
"661:\n\t" \
oldinstr "\n" \
"662:\n" \
".pushsection .altinstructions,\"a\"\n" \
- ALTINSTR_ENTRY_CB(feature, cb) \
+ ALTINSTR_ENTRY_CB(cpucap, cb) \
".popsection\n" \
"663:\n\t" \
"664:\n\t" \
".endif\n"
-#define _ALTERNATIVE_CFG(oldinstr, newinstr, feature, cfg, ...) \
- __ALTERNATIVE_CFG(oldinstr, newinstr, feature, IS_ENABLED(cfg))
+#define _ALTERNATIVE_CFG(oldinstr, newinstr, cpucap, cfg, ...) \
+ __ALTERNATIVE_CFG(oldinstr, newinstr, cpucap, IS_ENABLED(cfg))
-#define ALTERNATIVE_CB(oldinstr, cb) \
- __ALTERNATIVE_CFG_CB(oldinstr, ARM64_CB_PATCH, 1, cb)
+#define ALTERNATIVE_CB(oldinstr, cpucap, cb) \
+ __ALTERNATIVE_CFG_CB(oldinstr, (1 << ARM64_CB_SHIFT) | (cpucap), 1, cb)
#else
#include <asm/assembler.h>
-.macro altinstruction_entry orig_offset alt_offset feature orig_len alt_len
+.macro altinstruction_entry orig_offset alt_offset cpucap orig_len alt_len
.word \orig_offset - .
.word \alt_offset - .
- .hword \feature
+ .hword (\cpucap)
.byte \orig_len
.byte \alt_len
.endm
@@ -141,10 +153,10 @@
661:
.endm
-.macro alternative_cb cb
+.macro alternative_cb cap, cb
.set .Lasm_alt_mode, 0
.pushsection .altinstructions, "a"
- altinstruction_entry 661f, \cb, ARM64_CB_PATCH, 662f-661f, 0
+ altinstruction_entry 661f, \cb, (1 << ARM64_CB_SHIFT) | \cap, 662f-661f, 0
.popsection
661:
.endm
@@ -198,13 +210,59 @@ alternative_endif
#endif /* __ASSEMBLY__ */
/*
- * Usage: asm(ALTERNATIVE(oldinstr, newinstr, feature));
+ * Usage: asm(ALTERNATIVE(oldinstr, newinstr, cpucap));
*
- * Usage: asm(ALTERNATIVE(oldinstr, newinstr, feature, CONFIG_FOO));
+ * Usage: asm(ALTERNATIVE(oldinstr, newinstr, cpucap, CONFIG_FOO));
* N.B. If CONFIG_FOO is specified, but not selected, the whole block
* will be omitted, including oldinstr.
*/
#define ALTERNATIVE(oldinstr, newinstr, ...) \
_ALTERNATIVE_CFG(oldinstr, newinstr, __VA_ARGS__, 1)
+#ifndef __ASSEMBLY__
+
+#include <linux/types.h>
+
+static __always_inline bool
+alternative_has_cap_likely(const unsigned long cpucap)
+{
+ if (!cpucap_is_possible(cpucap))
+ return false;
+
+ asm goto(
+#ifdef BUILD_VDSO
+ ALTERNATIVE("b %l[l_no]", "nop", %[cpucap])
+#else
+ ALTERNATIVE_CB("b %l[l_no]", %[cpucap], alt_cb_patch_nops)
+#endif
+ :
+ : [cpucap] "i" (cpucap)
+ :
+ : l_no);
+
+ return true;
+l_no:
+ return false;
+}
+
+static __always_inline bool
+alternative_has_cap_unlikely(const unsigned long cpucap)
+{
+ if (!cpucap_is_possible(cpucap))
+ return false;
+
+ asm goto(
+ ALTERNATIVE("nop", "b %l[l_yes]", %[cpucap])
+ :
+ : [cpucap] "i" (cpucap)
+ :
+ : l_yes);
+
+ return false;
+l_yes:
+ return true;
+}
+
+#endif /* __ASSEMBLY__ */
+
#endif /* __ASM_ALTERNATIVE_MACROS_H */
diff --git a/arch/arm64/include/asm/alternative.h b/arch/arm64/include/asm/alternative.h
index a38b92e11811..00d97b8a757f 100644
--- a/arch/arm64/include/asm/alternative.h
+++ b/arch/arm64/include/asm/alternative.h
@@ -13,7 +13,7 @@
struct alt_instr {
s32 orig_offset; /* offset to original instruction */
s32 alt_offset; /* offset to replacement instruction */
- u16 cpufeature; /* cpufeature bit set for replacement */
+ u16 cpucap; /* cpucap bit set for replacement */
u8 orig_len; /* size of original instruction(s) */
u8 alt_len; /* size of new instruction(s), <= orig_len */
};
@@ -23,7 +23,7 @@ typedef void (*alternative_cb_t)(struct alt_instr *alt,
void __init apply_boot_alternatives(void);
void __init apply_alternatives_all(void);
-bool alternative_is_applied(u16 cpufeature);
+bool alternative_is_applied(u16 cpucap);
#ifdef CONFIG_MODULES
void apply_alternatives_module(void *start, size_t length);
@@ -31,5 +31,8 @@ void apply_alternatives_module(void *start, size_t length);
static inline void apply_alternatives_module(void *start, size_t length) { }
#endif
+void alt_cb_patch_nops(struct alt_instr *alt, __le32 *origptr,
+ __le32 *updptr, int nr_inst);
+
#endif /* __ASSEMBLY__ */
#endif /* __ASM_ALTERNATIVE_H */
diff --git a/arch/arm64/include/asm/apple_m1_pmu.h b/arch/arm64/include/asm/apple_m1_pmu.h
index 99483b19b99f..02e05d05851f 100644
--- a/arch/arm64/include/asm/apple_m1_pmu.h
+++ b/arch/arm64/include/asm/apple_m1_pmu.h
@@ -37,6 +37,7 @@
#define PMCR0_PMI_ENABLE_8_9 GENMASK(45, 44)
#define SYS_IMP_APL_PMCR1_EL1 sys_reg(3, 1, 15, 1, 0)
+#define SYS_IMP_APL_PMCR1_EL12 sys_reg(3, 1, 15, 7, 2)
#define PMCR1_COUNT_A64_EL0_0_7 GENMASK(15, 8)
#define PMCR1_COUNT_A64_EL1_0_7 GENMASK(23, 16)
#define PMCR1_COUNT_A64_EL0_8_9 GENMASK(41, 40)
diff --git a/arch/arm64/include/asm/arch_gicv3.h b/arch/arm64/include/asm/arch_gicv3.h
index 48d4473e8eee..9e96f024b2f1 100644
--- a/arch/arm64/include/asm/arch_gicv3.h
+++ b/arch/arm64/include/asm/arch_gicv3.h
@@ -79,6 +79,14 @@ static inline u64 gic_read_iar_cavium_thunderx(void)
return 0x3ff;
}
+static u64 __maybe_unused gic_read_iar(void)
+{
+ if (alternative_has_cap_unlikely(ARM64_WORKAROUND_CAVIUM_23154))
+ return gic_read_iar_cavium_thunderx();
+ else
+ return gic_read_iar_common();
+}
+
static inline void gic_write_ctlr(u32 val)
{
write_sysreg_s(val, SYS_ICC_CTLR_EL1);
@@ -167,21 +175,6 @@ static inline bool gic_prio_masking_enabled(void)
static inline void gic_pmr_mask_irqs(void)
{
- BUILD_BUG_ON(GICD_INT_DEF_PRI < (__GIC_PRIO_IRQOFF |
- GIC_PRIO_PSR_I_SET));
- BUILD_BUG_ON(GICD_INT_DEF_PRI >= GIC_PRIO_IRQON);
- /*
- * Need to make sure IRQON allows IRQs when SCR_EL3.FIQ is cleared
- * and non-secure PMR accesses are not subject to the shifts that
- * are applied to IRQ priorities
- */
- BUILD_BUG_ON((0x80 | (GICD_INT_DEF_PRI >> 1)) >= GIC_PRIO_IRQON);
- /*
- * Same situation as above, but now we make sure that we can mask
- * regular interrupts.
- */
- BUILD_BUG_ON((0x80 | (GICD_INT_DEF_PRI >> 1)) < (__GIC_PRIO_IRQOFF_NS |
- GIC_PRIO_PSR_I_SET));
gic_write_pmr(GIC_PRIO_IRQOFF);
}
@@ -190,5 +183,10 @@ static inline void gic_arch_enable_irqs(void)
asm volatile ("msr daifclr, #3" : : : "memory");
}
+static inline bool gic_has_relaxed_pmr_sync(void)
+{
+ return cpus_have_cap(ARM64_HAS_GIC_PRIO_RELAXED_SYNC);
+}
+
#endif /* __ASSEMBLY__ */
#endif /* __ASM_ARCH_GICV3_H */
diff --git a/arch/arm64/include/asm/arch_timer.h b/arch/arm64/include/asm/arch_timer.h
index af1fafbe7e1d..f5794d50f51d 100644
--- a/arch/arm64/include/asm/arch_timer.h
+++ b/arch/arm64/include/asm/arch_timer.h
@@ -15,7 +15,7 @@
#include <linux/bug.h>
#include <linux/init.h>
#include <linux/jump_label.h>
-#include <linux/smp.h>
+#include <linux/percpu.h>
#include <linux/types.h>
#include <clocksource/arm_arch_timer.h>
@@ -88,13 +88,7 @@ static inline notrace u64 arch_timer_read_cntvct_el0(void)
#define arch_timer_reg_read_stable(reg) \
({ \
- u64 _val; \
- \
- preempt_disable_notrace(); \
- _val = erratum_handler(read_ ## reg)(); \
- preempt_enable_notrace(); \
- \
- _val; \
+ erratum_handler(read_ ## reg)(); \
})
/*
diff --git a/arch/arm64/include/asm/archrandom.h b/arch/arm64/include/asm/archrandom.h
index 109e2a4454be..8babfbe31f95 100644
--- a/arch/arm64/include/asm/archrandom.h
+++ b/arch/arm64/include/asm/archrandom.h
@@ -5,6 +5,7 @@
#include <linux/arm-smccc.h>
#include <linux/bug.h>
#include <linux/kernel.h>
+#include <linux/irqflags.h>
#include <asm/cpufeature.h>
#define ARM_SMCCC_TRNG_MIN_VERSION 0x10000UL
@@ -58,6 +59,13 @@ static inline bool __arm64_rndrrs(unsigned long *v)
return ok;
}
+static __always_inline bool __cpu_has_rng(void)
+{
+ if (unlikely(!system_capabilities_finalized() && !preemptible()))
+ return this_cpu_has_cap(ARM64_HAS_RNG);
+ return alternative_has_cap_unlikely(ARM64_HAS_RNG);
+}
+
static inline size_t __must_check arch_get_random_longs(unsigned long *v, size_t max_longs)
{
/*
@@ -66,7 +74,7 @@ static inline size_t __must_check arch_get_random_longs(unsigned long *v, size_t
* cpufeature code and with potential scheduling between CPUs
* with and without the feature.
*/
- if (max_longs && cpus_have_const_cap(ARM64_HAS_RNG) && __arm64_rndr(v))
+ if (max_longs && __cpu_has_rng() && __arm64_rndr(v))
return 1;
return 0;
}
@@ -108,7 +116,7 @@ static inline size_t __must_check arch_get_random_seed_longs(unsigned long *v, s
* reseeded after each invocation. This is not a 100% fit but good
* enough to implement this API if no other entropy source exists.
*/
- if (cpus_have_const_cap(ARM64_HAS_RNG) && __arm64_rndrrs(v))
+ if (__cpu_has_rng() && __arm64_rndrrs(v))
return 1;
return 0;
@@ -121,40 +129,4 @@ static inline bool __init __early_cpu_has_rndr(void)
return (ftr >> ID_AA64ISAR0_EL1_RNDR_SHIFT) & 0xf;
}
-static inline size_t __init __must_check
-arch_get_random_seed_longs_early(unsigned long *v, size_t max_longs)
-{
- WARN_ON(system_state != SYSTEM_BOOTING);
-
- if (!max_longs)
- return 0;
-
- if (smccc_trng_available) {
- struct arm_smccc_res res;
-
- max_longs = min_t(size_t, 3, max_longs);
- arm_smccc_1_1_invoke(ARM_SMCCC_TRNG_RND64, max_longs * 64, &res);
- if ((int)res.a0 >= 0) {
- switch (max_longs) {
- case 3:
- *v++ = res.a1;
- fallthrough;
- case 2:
- *v++ = res.a2;
- fallthrough;
- case 1:
- *v++ = res.a3;
- break;
- }
- return max_longs;
- }
- }
-
- if (__early_cpu_has_rndr() && __arm64_rndr(v))
- return 1;
-
- return 0;
-}
-#define arch_get_random_seed_longs_early arch_get_random_seed_longs_early
-
#endif /* _ASM_ARCHRANDOM_H */
diff --git a/arch/arm64/include/asm/arm_pmuv3.h b/arch/arm64/include/asm/arm_pmuv3.h
new file mode 100644
index 000000000000..8a777dec8d88
--- /dev/null
+++ b/arch/arm64/include/asm/arm_pmuv3.h
@@ -0,0 +1,191 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Copyright (C) 2012 ARM Ltd.
+ */
+
+#ifndef __ASM_PMUV3_H
+#define __ASM_PMUV3_H
+
+#include <asm/kvm_host.h>
+
+#include <asm/cpufeature.h>
+#include <asm/sysreg.h>
+
+#define RETURN_READ_PMEVCNTRN(n) \
+ return read_sysreg(pmevcntr##n##_el0)
+static inline unsigned long read_pmevcntrn(int n)
+{
+ PMEVN_SWITCH(n, RETURN_READ_PMEVCNTRN);
+ return 0;
+}
+
+#define WRITE_PMEVCNTRN(n) \
+ write_sysreg(val, pmevcntr##n##_el0)
+static inline void write_pmevcntrn(int n, unsigned long val)
+{
+ PMEVN_SWITCH(n, WRITE_PMEVCNTRN);
+}
+
+#define WRITE_PMEVTYPERN(n) \
+ write_sysreg(val, pmevtyper##n##_el0)
+static inline void write_pmevtypern(int n, unsigned long val)
+{
+ PMEVN_SWITCH(n, WRITE_PMEVTYPERN);
+}
+
+#define RETURN_READ_PMEVTYPERN(n) \
+ return read_sysreg(pmevtyper##n##_el0)
+static inline unsigned long read_pmevtypern(int n)
+{
+ PMEVN_SWITCH(n, RETURN_READ_PMEVTYPERN);
+ return 0;
+}
+
+static inline unsigned long read_pmmir(void)
+{
+ return read_cpuid(PMMIR_EL1);
+}
+
+static inline u32 read_pmuver(void)
+{
+ u64 dfr0 = read_sysreg(id_aa64dfr0_el1);
+
+ return cpuid_feature_extract_unsigned_field(dfr0,
+ ID_AA64DFR0_EL1_PMUVer_SHIFT);
+}
+
+static inline bool pmuv3_has_icntr(void)
+{
+ u64 dfr1 = read_sysreg(id_aa64dfr1_el1);
+
+ return !!cpuid_feature_extract_unsigned_field(dfr1,
+ ID_AA64DFR1_EL1_PMICNTR_SHIFT);
+}
+
+static inline void write_pmcr(u64 val)
+{
+ write_sysreg(val, pmcr_el0);
+}
+
+static inline u64 read_pmcr(void)
+{
+ return read_sysreg(pmcr_el0);
+}
+
+static inline void write_pmselr(u32 val)
+{
+ write_sysreg(val, pmselr_el0);
+}
+
+static inline void write_pmccntr(u64 val)
+{
+ write_sysreg(val, pmccntr_el0);
+}
+
+static inline u64 read_pmccntr(void)
+{
+ return read_sysreg(pmccntr_el0);
+}
+
+static inline void write_pmicntr(u64 val)
+{
+ write_sysreg_s(val, SYS_PMICNTR_EL0);
+}
+
+static inline u64 read_pmicntr(void)
+{
+ return read_sysreg_s(SYS_PMICNTR_EL0);
+}
+
+static inline void write_pmcntenset(u64 val)
+{
+ write_sysreg(val, pmcntenset_el0);
+}
+
+static inline void write_pmcntenclr(u64 val)
+{
+ write_sysreg(val, pmcntenclr_el0);
+}
+
+static inline void write_pmintenset(u64 val)
+{
+ write_sysreg(val, pmintenset_el1);
+}
+
+static inline void write_pmintenclr(u64 val)
+{
+ write_sysreg(val, pmintenclr_el1);
+}
+
+static inline void write_pmccfiltr(u64 val)
+{
+ write_sysreg(val, pmccfiltr_el0);
+}
+
+static inline u64 read_pmccfiltr(void)
+{
+ return read_sysreg(pmccfiltr_el0);
+}
+
+static inline void write_pmicfiltr(u64 val)
+{
+ write_sysreg_s(val, SYS_PMICFILTR_EL0);
+}
+
+static inline u64 read_pmicfiltr(void)
+{
+ return read_sysreg_s(SYS_PMICFILTR_EL0);
+}
+
+static inline void write_pmovsclr(u64 val)
+{
+ write_sysreg(val, pmovsclr_el0);
+}
+
+static inline u64 read_pmovsclr(void)
+{
+ return read_sysreg(pmovsclr_el0);
+}
+
+static inline void write_pmuserenr(u32 val)
+{
+ write_sysreg(val, pmuserenr_el0);
+}
+
+static inline void write_pmuacr(u64 val)
+{
+ write_sysreg_s(val, SYS_PMUACR_EL1);
+}
+
+static inline u64 read_pmceid0(void)
+{
+ return read_sysreg(pmceid0_el0);
+}
+
+static inline u64 read_pmceid1(void)
+{
+ return read_sysreg(pmceid1_el0);
+}
+
+static inline bool pmuv3_implemented(int pmuver)
+{
+ return !(pmuver == ID_AA64DFR0_EL1_PMUVer_IMP_DEF ||
+ pmuver == ID_AA64DFR0_EL1_PMUVer_NI);
+}
+
+static inline bool is_pmuv3p4(int pmuver)
+{
+ return pmuver >= ID_AA64DFR0_EL1_PMUVer_V3P4;
+}
+
+static inline bool is_pmuv3p5(int pmuver)
+{
+ return pmuver >= ID_AA64DFR0_EL1_PMUVer_V3P5;
+}
+
+static inline bool is_pmuv3p9(int pmuver)
+{
+ return pmuver >= ID_AA64DFR0_EL1_PMUVer_V3P9;
+}
+
+#endif
diff --git a/arch/arm64/include/asm/asm-bug.h b/arch/arm64/include/asm/asm-bug.h
index c762038ba400..a5f13801b784 100644
--- a/arch/arm64/include/asm/asm-bug.h
+++ b/arch/arm64/include/asm/asm-bug.h
@@ -21,15 +21,21 @@
#endif
#ifdef CONFIG_GENERIC_BUG
-
-#define __BUG_ENTRY(flags) \
+#define __BUG_ENTRY_START \
.pushsection __bug_table,"aw"; \
.align 2; \
14470: .long 14471f - .; \
-_BUGVERBOSE_LOCATION(__FILE__, __LINE__) \
- .short flags; \
+
+#define __BUG_ENTRY_END \
+ .align 2; \
.popsection; \
14471:
+
+#define __BUG_ENTRY(flags) \
+ __BUG_ENTRY_START \
+_BUGVERBOSE_LOCATION(__FILE__, __LINE__) \
+ .short flags; \
+ __BUG_ENTRY_END
#else
#define __BUG_ENTRY(flags)
#endif
@@ -40,4 +46,24 @@ _BUGVERBOSE_LOCATION(__FILE__, __LINE__) \
#define ASM_BUG() ASM_BUG_FLAGS(0)
+#ifdef CONFIG_DEBUG_BUGVERBOSE
+#define __BUG_LOCATION_STRING(file, line) \
+ ".long " file "- .;" \
+ ".short " line ";"
+#else
+#define __BUG_LOCATION_STRING(file, line)
+#endif
+
+#define __BUG_ENTRY_STRING(file, line, flags) \
+ __stringify(__BUG_ENTRY_START) \
+ __BUG_LOCATION_STRING(file, line) \
+ ".short " flags ";" \
+ __stringify(__BUG_ENTRY_END)
+
+#define ARCH_WARN_ASM(file, line, flags, size) \
+ __BUG_ENTRY_STRING(file, line, flags) \
+ __stringify(brk BUG_BRK_IMM)
+
+#define ARCH_WARN_REACHABLE
+
#endif /* __ASM_ASM_BUG_H */
diff --git a/arch/arm64/include/asm/asm-extable.h b/arch/arm64/include/asm/asm-extable.h
index 980d1dd8e1a3..292f2687a12e 100644
--- a/arch/arm64/include/asm/asm-extable.h
+++ b/arch/arm64/include/asm/asm-extable.h
@@ -9,7 +9,8 @@
#define EX_TYPE_BPF 1
#define EX_TYPE_UACCESS_ERR_ZERO 2
#define EX_TYPE_KACCESS_ERR_ZERO 3
-#define EX_TYPE_LOAD_UNALIGNED_ZEROPAD 4
+#define EX_TYPE_UACCESS_CPY 4
+#define EX_TYPE_LOAD_UNALIGNED_ZEROPAD 5
/* Data fields for EX_TYPE_UACCESS_ERR_ZERO */
#define EX_DATA_REG_ERR_SHIFT 0
@@ -23,6 +24,9 @@
#define EX_DATA_REG_ADDR_SHIFT 5
#define EX_DATA_REG_ADDR GENMASK(9, 5)
+/* Data fields for EX_TYPE_UACCESS_CPY */
+#define EX_DATA_UACCESS_WRITE BIT(0)
+
#ifdef __ASSEMBLY__
#define __ASM_EXTABLE_RAW(insn, fixup, type, data) \
@@ -69,6 +73,10 @@
.endif
.endm
+ .macro _asm_extable_uaccess_cpy, insn, fixup, uaccess_is_write
+ __ASM_EXTABLE_RAW(\insn, \fixup, EX_TYPE_UACCESS_CPY, \uaccess_is_write)
+ .endm
+
#else /* __ASSEMBLY__ */
#include <linux/stringify.h>
@@ -112,6 +120,9 @@
#define _ASM_EXTABLE_KACCESS_ERR(insn, fixup, err) \
_ASM_EXTABLE_KACCESS_ERR_ZERO(insn, fixup, err, wzr)
+#define _ASM_EXTABLE_KACCESS(insn, fixup) \
+ _ASM_EXTABLE_KACCESS_ERR_ZERO(insn, fixup, wzr, wzr)
+
#define _ASM_EXTABLE_LOAD_UNALIGNED_ZEROPAD(insn, fixup, data, addr) \
__DEFINE_ASM_GPR_NUMS \
__ASM_EXTABLE_RAW(#insn, #fixup, \
diff --git a/arch/arm64/include/asm/asm-uaccess.h b/arch/arm64/include/asm/asm-uaccess.h
index 75b211c98dea..9148f5a31968 100644
--- a/arch/arm64/include/asm/asm-uaccess.h
+++ b/arch/arm64/include/asm/asm-uaccess.h
@@ -18,7 +18,6 @@
bic \tmp1, \tmp1, #TTBR_ASID_MASK
sub \tmp1, \tmp1, #RESERVED_SWAPPER_OFFSET // reserved_pg_dir
msr ttbr0_el1, \tmp1 // set reserved TTBR0_EL1
- isb
add \tmp1, \tmp1, #RESERVED_SWAPPER_OFFSET
msr ttbr1_el1, \tmp1 // set reserved ASID
isb
@@ -31,7 +30,6 @@
extr \tmp2, \tmp2, \tmp1, #48
ror \tmp2, \tmp2, #16
msr ttbr1_el1, \tmp2 // set the active ASID
- isb
msr ttbr0_el1, \tmp1 // set the non-PAN TTBR0_EL1
isb
.endm
@@ -63,6 +61,10 @@ alternative_else_nop_endif
9999: x; \
_asm_extable_uaccess 9999b, l
+#define USER_CPY(l, uaccess_is_write, x...) \
+9999: x; \
+ _asm_extable_uaccess_cpy 9999b, l, uaccess_is_write
+
/*
* Generate the assembly for LDTR/STTR with exception table entries.
* This is complicated as there is no post-increment or pair versions of the
diff --git a/arch/arm64/include/asm/assembler.h b/arch/arm64/include/asm/assembler.h
index 5846145be523..23be85d93348 100644
--- a/arch/arm64/include/asm/assembler.h
+++ b/arch/arm64/include/asm/assembler.h
@@ -12,7 +12,7 @@
#ifndef __ASM_ASSEMBLER_H
#define __ASM_ASSEMBLER_H
-#include <asm-generic/export.h>
+#include <linux/export.h>
#include <asm/alternative.h>
#include <asm/asm-bug.h>
@@ -34,31 +34,18 @@
wx\n .req w\n
.endr
- .macro save_and_disable_daif, flags
- mrs \flags, daif
- msr daifset, #0xf
- .endm
-
.macro disable_daif
msr daifset, #0xf
.endm
- .macro enable_daif
- msr daifclr, #0xf
- .endm
-
- .macro restore_daif, flags:req
- msr daif, \flags
- .endm
-
- /* IRQ/FIQ are the lowest priority flags, unconditionally unmask the rest. */
- .macro enable_da
- msr daifclr, #(8 | 4)
- .endm
-
/*
* Save/restore interrupts.
*/
+ .macro save_and_disable_daif, flags
+ mrs \flags, daif
+ msr daifset, #0xf
+ .endm
+
.macro save_and_disable_irq, flags
mrs \flags, daif
msr daifset, #3
@@ -68,16 +55,12 @@
msr daif, \flags
.endm
- .macro enable_dbg
- msr daifclr, #8
- .endm
-
.macro disable_step_tsk, flgs, tmp
tbz \flgs, #TIF_SINGLESTEP, 9990f
mrs \tmp, mdscr_el1
- bic \tmp, \tmp, #DBG_MDSCR_SS
+ bic \tmp, \tmp, #MDSCR_EL1_SS
msr mdscr_el1, \tmp
- isb // Synchronise with enable_dbg
+ isb // Take effect before a subsequent clear of DAIF.D
9990:
.endm
@@ -85,7 +68,7 @@
.macro enable_step_tsk, flgs, tmp
tbz \flgs, #TIF_SINGLESTEP, 9990f
mrs \tmp, mdscr_el1
- orr \tmp, \tmp, #DBG_MDSCR_SS
+ orr \tmp, \tmp, #MDSCR_EL1_SS
msr mdscr_el1, \tmp
9990:
.endm
@@ -271,13 +254,6 @@ alternative_endif
.endm
/*
- * vma_vm_mm - get mm pointer from vma pointer (vma->vm_mm)
- */
- .macro vma_vm_mm, rd, rn
- ldr \rd, [\rn, #VMA_VM_MM]
- .endm
-
-/*
* read_ctr - read CTR_EL0. If the system has mismatched register fields,
* provide the system wide safe value from arm64_ftr_reg_ctrel0.sys_val
*/
@@ -293,7 +269,7 @@ alternative_endif
alternative_if_not ARM64_KVM_PROTECTED_MODE
ASM_BUG()
alternative_else_nop_endif
-alternative_cb kvm_compute_final_ctr_el0
+alternative_cb ARM64_ALWAYS_SYSTEM, kvm_compute_final_ctr_el0
movz \reg, #0
movk \reg, #0, lsl #16
movk \reg, #0, lsl #32
@@ -360,20 +336,6 @@ alternative_cb_end
.endm
/*
- * idmap_get_t0sz - get the T0SZ value needed to cover the ID map
- *
- * Calculate the maximum allowed value for TCR_EL1.T0SZ so that the
- * entire ID map region can be mapped. As T0SZ == (64 - #bits used),
- * this number conveniently equals the number of leading zeroes in
- * the physical address of _end.
- */
- .macro idmap_get_t0sz, reg
- adrp \reg, _end
- orr \reg, \reg, #(1 << VA_BITS_MIN) - 1
- clz \reg, \reg
- .endm
-
-/*
* tcr_compute_pa_size - set TCR.(I)PS to the highest supported
* ID_AA64MMFR0_EL1.PARange value
*
@@ -384,8 +346,13 @@ alternative_cb_end
.macro tcr_compute_pa_size, tcr, pos, tmp0, tmp1
mrs \tmp0, ID_AA64MMFR0_EL1
// Narrow PARange to fit the PS field in TCR_ELx
- ubfx \tmp0, \tmp0, #ID_AA64MMFR0_PARANGE_SHIFT, #3
- mov \tmp1, #ID_AA64MMFR0_PARANGE_MAX
+ ubfx \tmp0, \tmp0, #ID_AA64MMFR0_EL1_PARANGE_SHIFT, #3
+ mov \tmp1, #ID_AA64MMFR0_EL1_PARANGE_MAX
+#ifdef CONFIG_ARM64_LPA2
+alternative_if_not ARM64_HAS_VA52
+ mov \tmp1, #ID_AA64MMFR0_EL1_PARANGE_48
+alternative_else_nop_endif
+#endif
cmp \tmp0, \tmp1
csel \tmp0, \tmp1, \tmp0, hi
bfi \tcr, \tmp0, \pos, #3
@@ -512,9 +479,10 @@ alternative_endif
*/
.macro reset_pmuserenr_el0, tmpreg
mrs \tmpreg, id_aa64dfr0_el1
- sbfx \tmpreg, \tmpreg, #ID_AA64DFR0_PMUVER_SHIFT, #4
- cmp \tmpreg, #1 // Skip if no PMU present
- b.lt 9000f
+ ubfx \tmpreg, \tmpreg, #ID_AA64DFR0_EL1_PMUVer_SHIFT, #4
+ cmp \tmpreg, #ID_AA64DFR0_EL1_PMUVer_NI
+ ccmp \tmpreg, #ID_AA64DFR0_EL1_PMUVer_IMP_DEF, #4, ne
+ b.eq 9000f // Skip if no PMU present or IMP_DEF
msr pmuserenr_el0, xzr // Disable PMU access from EL0
9000:
.endm
@@ -524,7 +492,7 @@ alternative_endif
*/
.macro reset_amuserenr_el0, tmpreg
mrs \tmpreg, id_aa64pfr0_el1 // Check ID_AA64PFR0_EL1
- ubfx \tmpreg, \tmpreg, #ID_AA64PFR0_AMU_SHIFT, #4
+ ubfx \tmpreg, \tmpreg, #ID_AA64PFR0_EL1_AMU_SHIFT, #4
cbz \tmpreg, .Lskip_\@ // Skip if no AMU present
msr_s SYS_AMUSERENR_EL0, xzr // Disable AMU access from EL0
.Lskip_\@:
@@ -604,29 +572,27 @@ alternative_endif
.endm
/*
- * Offset ttbr1 to allow for 48-bit kernel VAs set with 52-bit PTRS_PER_PGD.
+ * If the kernel is built for 52-bit virtual addressing but the hardware only
+ * supports 48 bits, we cannot program the pgdir address into TTBR1 directly,
+ * but we have to add an offset so that the TTBR1 address corresponds with the
+ * pgdir entry that covers the lowest 48-bit addressable VA.
+ *
+ * Note that this trick is only used for LVA/64k pages - LPA2/4k pages uses an
+ * additional paging level, and on LPA2/16k pages, we would end up with a root
+ * level table with only 2 entries, which is suboptimal in terms of TLB
+ * utilization, so there we fall back to 47 bits of translation if LPA2 is not
+ * supported.
+ *
* orr is used as it can cover the immediate value (and is idempotent).
- * In future this may be nop'ed out when dealing with 52-bit kernel VAs.
* ttbr: Value of ttbr to set, modified.
*/
.macro offset_ttbr1, ttbr, tmp
-#ifdef CONFIG_ARM64_VA_BITS_52
- mrs_s \tmp, SYS_ID_AA64MMFR2_EL1
- and \tmp, \tmp, #(0xf << ID_AA64MMFR2_LVA_SHIFT)
- cbnz \tmp, .Lskipoffs_\@
- orr \ttbr, \ttbr, #TTBR1_BADDR_4852_OFFSET
-.Lskipoffs_\@ :
-#endif
- .endm
-
-/*
- * Perform the reverse of offset_ttbr1.
- * bic is used as it can cover the immediate value and, in future, won't need
- * to be nop'ed out when dealing with 52-bit kernel VAs.
- */
- .macro restore_ttbr1, ttbr
-#ifdef CONFIG_ARM64_VA_BITS_52
- bic \ttbr, \ttbr, #TTBR1_BADDR_4852_OFFSET
+#if defined(CONFIG_ARM64_VA_BITS_52) && !defined(CONFIG_ARM64_LPA2)
+ mrs \tmp, tcr_el1
+ and \tmp, \tmp, #TCR_T1SZ_MASK
+ cmp \tmp, #TCR_T1SZ(VA_BITS_MIN)
+ orr \tmp, \ttbr, #TTBR1_BADDR_4852_OFFSET
+ csel \ttbr, \tmp, \ttbr, eq
#endif
.endm
@@ -648,27 +614,13 @@ alternative_endif
.macro phys_to_pte, pte, phys
#ifdef CONFIG_ARM64_PA_BITS_52
- /*
- * We assume \phys is 64K aligned and this is guaranteed by only
- * supporting this configuration with 64K pages.
- */
- orr \pte, \phys, \phys, lsr #36
- and \pte, \pte, #PTE_ADDR_MASK
+ orr \pte, \phys, \phys, lsr #PTE_ADDR_HIGH_SHIFT
+ and \pte, \pte, #PHYS_TO_PTE_ADDR_MASK
#else
mov \pte, \phys
#endif
.endm
- .macro pte_to_phys, phys, pte
-#ifdef CONFIG_ARM64_PA_BITS_52
- ubfiz \phys, \pte, #(48 - 16 - 12), #16
- bfxil \phys, \pte, #16, #32
- lsl \phys, \phys, #16
-#else
- and \phys, \pte, #PTE_ADDR_MASK
-#endif
- .endm
-
/*
* tcr_clear_errata_bits - Clear TCR bits that trigger an errata on this CPU.
*/
@@ -787,32 +739,25 @@ alternative_endif
.endm
/*
- * Check whether preempt/bh-disabled asm code should yield as soon as
- * it is able. This is the case if we are currently running in task
- * context, and either a softirq is pending, or the TIF_NEED_RESCHED
- * flag is set and re-enabling preemption a single time would result in
- * a preempt count of zero. (Note that the TIF_NEED_RESCHED flag is
- * stored negated in the top word of the thread_info::preempt_count
+ * Check whether asm code should yield as soon as it is able. This is
+ * the case if we are currently running in task context, and the
+ * TIF_NEED_RESCHED flag is set. (Note that the TIF_NEED_RESCHED flag
+ * is stored negated in the top word of the thread_info::preempt_count
* field)
*/
- .macro cond_yield, lbl:req, tmp:req, tmp2:req
+ .macro cond_yield, lbl:req, tmp:req, tmp2
+#ifdef CONFIG_PREEMPT_VOLUNTARY
get_current_task \tmp
ldr \tmp, [\tmp, #TSK_TI_PREEMPT]
/*
* If we are serving a softirq, there is no point in yielding: the
* softirq will not be preempted no matter what we do, so we should
- * run to completion as quickly as we can.
+ * run to completion as quickly as we can. The preempt_count field will
+ * have BIT(SOFTIRQ_SHIFT) set in this case, so the zero check will
+ * catch this case too.
*/
- tbnz \tmp, #SOFTIRQ_SHIFT, .Lnoyield_\@
-#ifdef CONFIG_PREEMPTION
- sub \tmp, \tmp, #PREEMPT_DISABLE_OFFSET
cbz \tmp, \lbl
#endif
- adr_l \tmp, irq_stat + IRQ_CPUSTAT_SOFTIRQ_PENDING
- get_this_cpu_offset \tmp2
- ldr w\tmp, [\tmp, \tmp2]
- cbnz w\tmp, \lbl // yield on pending softirq in task context
-.Lnoyield_\@:
.endm
/*
@@ -877,7 +822,7 @@ alternative_endif
.macro __mitigate_spectre_bhb_loop tmp
#ifdef CONFIG_MITIGATE_SPECTRE_BRANCH_HISTORY
-alternative_cb spectre_bhb_patch_loop_iter
+alternative_cb ARM64_ALWAYS_SYSTEM, spectre_bhb_patch_loop_iter
mov \tmp, #32 // Patched to correct the immediate
alternative_cb_end
.Lspectre_bhb_loop\@:
@@ -890,7 +835,7 @@ alternative_cb_end
.macro mitigate_spectre_bhb_loop tmp
#ifdef CONFIG_MITIGATE_SPECTRE_BRANCH_HISTORY
-alternative_cb spectre_bhb_patch_loop_mitigation_enable
+alternative_cb ARM64_ALWAYS_SYSTEM, spectre_bhb_patch_loop_mitigation_enable
b .L_spectre_bhb_loop_done\@ // Patched to NOP
alternative_cb_end
__mitigate_spectre_bhb_loop \tmp
@@ -904,7 +849,7 @@ alternative_cb_end
stp x0, x1, [sp, #-16]!
stp x2, x3, [sp, #-16]!
mov w0, #ARM_SMCCC_ARCH_WORKAROUND_3
-alternative_cb smccc_patch_fw_mitigation_conduit
+alternative_cb ARM64_ALWAYS_SYSTEM, smccc_patch_fw_mitigation_conduit
nop // Patched to SMC/HVC #0
alternative_cb_end
ldp x2, x3, [sp], #16
@@ -914,7 +859,7 @@ alternative_cb_end
.macro mitigate_spectre_bhb_clear_insn
#ifdef CONFIG_MITIGATE_SPECTRE_BRANCH_HISTORY
-alternative_cb spectre_bhb_patch_clearbhb
+alternative_cb ARM64_ALWAYS_SYSTEM, spectre_bhb_patch_clearbhb
/* Patched to NOP when not supported */
clearbhb
isb
diff --git a/arch/arm64/include/asm/atomic.h b/arch/arm64/include/asm/atomic.h
index c9979273d389..400d279e0f8d 100644
--- a/arch/arm64/include/asm/atomic.h
+++ b/arch/arm64/include/asm/atomic.h
@@ -142,24 +142,6 @@ static __always_inline long arch_atomic64_dec_if_positive(atomic64_t *v)
#define arch_atomic_fetch_xor_release arch_atomic_fetch_xor_release
#define arch_atomic_fetch_xor arch_atomic_fetch_xor
-#define arch_atomic_xchg_relaxed(v, new) \
- arch_xchg_relaxed(&((v)->counter), (new))
-#define arch_atomic_xchg_acquire(v, new) \
- arch_xchg_acquire(&((v)->counter), (new))
-#define arch_atomic_xchg_release(v, new) \
- arch_xchg_release(&((v)->counter), (new))
-#define arch_atomic_xchg(v, new) \
- arch_xchg(&((v)->counter), (new))
-
-#define arch_atomic_cmpxchg_relaxed(v, old, new) \
- arch_cmpxchg_relaxed(&((v)->counter), (old), (new))
-#define arch_atomic_cmpxchg_acquire(v, old, new) \
- arch_cmpxchg_acquire(&((v)->counter), (old), (new))
-#define arch_atomic_cmpxchg_release(v, old, new) \
- arch_cmpxchg_release(&((v)->counter), (old), (new))
-#define arch_atomic_cmpxchg(v, old, new) \
- arch_cmpxchg(&((v)->counter), (old), (new))
-
#define arch_atomic_andnot arch_atomic_andnot
/*
@@ -209,16 +191,6 @@ static __always_inline long arch_atomic64_dec_if_positive(atomic64_t *v)
#define arch_atomic64_fetch_xor_release arch_atomic64_fetch_xor_release
#define arch_atomic64_fetch_xor arch_atomic64_fetch_xor
-#define arch_atomic64_xchg_relaxed arch_atomic_xchg_relaxed
-#define arch_atomic64_xchg_acquire arch_atomic_xchg_acquire
-#define arch_atomic64_xchg_release arch_atomic_xchg_release
-#define arch_atomic64_xchg arch_atomic_xchg
-
-#define arch_atomic64_cmpxchg_relaxed arch_atomic_cmpxchg_relaxed
-#define arch_atomic64_cmpxchg_acquire arch_atomic_cmpxchg_acquire
-#define arch_atomic64_cmpxchg_release arch_atomic_cmpxchg_release
-#define arch_atomic64_cmpxchg arch_atomic_cmpxchg
-
#define arch_atomic64_andnot arch_atomic64_andnot
#define arch_atomic64_dec_if_positive arch_atomic64_dec_if_positive
diff --git a/arch/arm64/include/asm/atomic_ll_sc.h b/arch/arm64/include/asm/atomic_ll_sc.h
index fe0db8d416fb..89d2ba272359 100644
--- a/arch/arm64/include/asm/atomic_ll_sc.h
+++ b/arch/arm64/include/asm/atomic_ll_sc.h
@@ -12,19 +12,6 @@
#include <linux/stringify.h>
-#ifdef CONFIG_ARM64_LSE_ATOMICS
-#define __LL_SC_FALLBACK(asm_ops) \
-" b 3f\n" \
-" .subsection 1\n" \
-"3:\n" \
-asm_ops "\n" \
-" b 4f\n" \
-" .previous\n" \
-"4:\n"
-#else
-#define __LL_SC_FALLBACK(asm_ops) asm_ops
-#endif
-
#ifndef CONFIG_CC_HAS_K_CONSTRAINT
#define K
#endif
@@ -36,38 +23,36 @@ asm_ops "\n" \
*/
#define ATOMIC_OP(op, asm_op, constraint) \
-static inline void \
+static __always_inline void \
__ll_sc_atomic_##op(int i, atomic_t *v) \
{ \
unsigned long tmp; \
int result; \
\
asm volatile("// atomic_" #op "\n" \
- __LL_SC_FALLBACK( \
" prfm pstl1strm, %2\n" \
"1: ldxr %w0, %2\n" \
" " #asm_op " %w0, %w0, %w3\n" \
" stxr %w1, %w0, %2\n" \
- " cbnz %w1, 1b\n") \
+ " cbnz %w1, 1b\n" \
: "=&r" (result), "=&r" (tmp), "+Q" (v->counter) \
: __stringify(constraint) "r" (i)); \
}
#define ATOMIC_OP_RETURN(name, mb, acq, rel, cl, op, asm_op, constraint)\
-static inline int \
+static __always_inline int \
__ll_sc_atomic_##op##_return##name(int i, atomic_t *v) \
{ \
unsigned long tmp; \
int result; \
\
asm volatile("// atomic_" #op "_return" #name "\n" \
- __LL_SC_FALLBACK( \
" prfm pstl1strm, %2\n" \
"1: ld" #acq "xr %w0, %2\n" \
" " #asm_op " %w0, %w0, %w3\n" \
" st" #rel "xr %w1, %w0, %2\n" \
" cbnz %w1, 1b\n" \
- " " #mb ) \
+ " " #mb \
: "=&r" (result), "=&r" (tmp), "+Q" (v->counter) \
: __stringify(constraint) "r" (i) \
: cl); \
@@ -76,20 +61,19 @@ __ll_sc_atomic_##op##_return##name(int i, atomic_t *v) \
}
#define ATOMIC_FETCH_OP(name, mb, acq, rel, cl, op, asm_op, constraint) \
-static inline int \
+static __always_inline int \
__ll_sc_atomic_fetch_##op##name(int i, atomic_t *v) \
{ \
unsigned long tmp; \
int val, result; \
\
asm volatile("// atomic_fetch_" #op #name "\n" \
- __LL_SC_FALLBACK( \
" prfm pstl1strm, %3\n" \
"1: ld" #acq "xr %w0, %3\n" \
" " #asm_op " %w1, %w0, %w4\n" \
" st" #rel "xr %w2, %w1, %3\n" \
" cbnz %w2, 1b\n" \
- " " #mb ) \
+ " " #mb \
: "=&r" (result), "=&r" (val), "=&r" (tmp), "+Q" (v->counter) \
: __stringify(constraint) "r" (i) \
: cl); \
@@ -135,38 +119,36 @@ ATOMIC_OPS(andnot, bic, )
#undef ATOMIC_OP
#define ATOMIC64_OP(op, asm_op, constraint) \
-static inline void \
+static __always_inline void \
__ll_sc_atomic64_##op(s64 i, atomic64_t *v) \
{ \
s64 result; \
unsigned long tmp; \
\
asm volatile("// atomic64_" #op "\n" \
- __LL_SC_FALLBACK( \
" prfm pstl1strm, %2\n" \
"1: ldxr %0, %2\n" \
" " #asm_op " %0, %0, %3\n" \
" stxr %w1, %0, %2\n" \
- " cbnz %w1, 1b") \
+ " cbnz %w1, 1b" \
: "=&r" (result), "=&r" (tmp), "+Q" (v->counter) \
: __stringify(constraint) "r" (i)); \
}
#define ATOMIC64_OP_RETURN(name, mb, acq, rel, cl, op, asm_op, constraint)\
-static inline long \
+static __always_inline long \
__ll_sc_atomic64_##op##_return##name(s64 i, atomic64_t *v) \
{ \
s64 result; \
unsigned long tmp; \
\
asm volatile("// atomic64_" #op "_return" #name "\n" \
- __LL_SC_FALLBACK( \
" prfm pstl1strm, %2\n" \
"1: ld" #acq "xr %0, %2\n" \
" " #asm_op " %0, %0, %3\n" \
" st" #rel "xr %w1, %0, %2\n" \
" cbnz %w1, 1b\n" \
- " " #mb ) \
+ " " #mb \
: "=&r" (result), "=&r" (tmp), "+Q" (v->counter) \
: __stringify(constraint) "r" (i) \
: cl); \
@@ -175,20 +157,19 @@ __ll_sc_atomic64_##op##_return##name(s64 i, atomic64_t *v) \
}
#define ATOMIC64_FETCH_OP(name, mb, acq, rel, cl, op, asm_op, constraint)\
-static inline long \
+static __always_inline long \
__ll_sc_atomic64_fetch_##op##name(s64 i, atomic64_t *v) \
{ \
s64 result, val; \
unsigned long tmp; \
\
asm volatile("// atomic64_fetch_" #op #name "\n" \
- __LL_SC_FALLBACK( \
" prfm pstl1strm, %3\n" \
"1: ld" #acq "xr %0, %3\n" \
" " #asm_op " %1, %0, %4\n" \
" st" #rel "xr %w2, %1, %3\n" \
" cbnz %w2, 1b\n" \
- " " #mb ) \
+ " " #mb \
: "=&r" (result), "=&r" (val), "=&r" (tmp), "+Q" (v->counter) \
: __stringify(constraint) "r" (i) \
: cl); \
@@ -233,14 +214,13 @@ ATOMIC64_OPS(andnot, bic, )
#undef ATOMIC64_OP_RETURN
#undef ATOMIC64_OP
-static inline s64
+static __always_inline s64
__ll_sc_atomic64_dec_if_positive(atomic64_t *v)
{
s64 result;
unsigned long tmp;
asm volatile("// atomic64_dec_if_positive\n"
- __LL_SC_FALLBACK(
" prfm pstl1strm, %2\n"
"1: ldxr %0, %2\n"
" subs %0, %0, #1\n"
@@ -248,7 +228,7 @@ __ll_sc_atomic64_dec_if_positive(atomic64_t *v)
" stlxr %w1, %0, %2\n"
" cbnz %w1, 1b\n"
" dmb ish\n"
- "2:")
+ "2:"
: "=&r" (result), "=&r" (tmp), "+Q" (v->counter)
:
: "cc", "memory");
@@ -257,7 +237,7 @@ __ll_sc_atomic64_dec_if_positive(atomic64_t *v)
}
#define __CMPXCHG_CASE(w, sfx, name, sz, mb, acq, rel, cl, constraint) \
-static inline u##sz \
+static __always_inline u##sz \
__ll_sc__cmpxchg_case_##name##sz(volatile void *ptr, \
unsigned long old, \
u##sz new) \
@@ -274,7 +254,6 @@ __ll_sc__cmpxchg_case_##name##sz(volatile void *ptr, \
old = (u##sz)old; \
\
asm volatile( \
- __LL_SC_FALLBACK( \
" prfm pstl1strm, %[v]\n" \
"1: ld" #acq "xr" #sfx "\t%" #w "[oldval], %[v]\n" \
" eor %" #w "[tmp], %" #w "[oldval], %" #w "[old]\n" \
@@ -282,7 +261,7 @@ __ll_sc__cmpxchg_case_##name##sz(volatile void *ptr, \
" st" #rel "xr" #sfx "\t%w[tmp], %" #w "[new], %[v]\n" \
" cbnz %w[tmp], 1b\n" \
" " #mb "\n" \
- "2:") \
+ "2:" \
: [tmp] "=&r" (tmp), [oldval] "=&r" (oldval), \
[v] "+Q" (*(u##sz *)ptr) \
: [old] __stringify(constraint) "r" (old), [new] "r" (new) \
@@ -315,39 +294,46 @@ __CMPXCHG_CASE( , , mb_, 64, dmb ish, , l, "memory", L)
#undef __CMPXCHG_CASE
-#define __CMPXCHG_DBL(name, mb, rel, cl) \
-static inline long \
-__ll_sc__cmpxchg_double##name(unsigned long old1, \
- unsigned long old2, \
- unsigned long new1, \
- unsigned long new2, \
- volatile void *ptr) \
+union __u128_halves {
+ u128 full;
+ struct {
+ u64 low, high;
+ };
+};
+
+#define __CMPXCHG128(name, mb, rel, cl...) \
+static __always_inline u128 \
+__ll_sc__cmpxchg128##name(volatile u128 *ptr, u128 old, u128 new) \
{ \
- unsigned long tmp, ret; \
+ union __u128_halves r, o = { .full = (old) }, \
+ n = { .full = (new) }; \
+ unsigned int tmp; \
\
- asm volatile("// __cmpxchg_double" #name "\n" \
- __LL_SC_FALLBACK( \
- " prfm pstl1strm, %2\n" \
- "1: ldxp %0, %1, %2\n" \
- " eor %0, %0, %3\n" \
- " eor %1, %1, %4\n" \
- " orr %1, %0, %1\n" \
- " cbnz %1, 2f\n" \
- " st" #rel "xp %w0, %5, %6, %2\n" \
- " cbnz %w0, 1b\n" \
+ asm volatile("// __cmpxchg128" #name "\n" \
+ " prfm pstl1strm, %[v]\n" \
+ "1: ldxp %[rl], %[rh], %[v]\n" \
+ " cmp %[rl], %[ol]\n" \
+ " ccmp %[rh], %[oh], 0, eq\n" \
+ " b.ne 2f\n" \
+ " st" #rel "xp %w[tmp], %[nl], %[nh], %[v]\n" \
+ " cbnz %w[tmp], 1b\n" \
" " #mb "\n" \
- "2:") \
- : "=&r" (tmp), "=&r" (ret), "+Q" (*(unsigned long *)ptr) \
- : "r" (old1), "r" (old2), "r" (new1), "r" (new2) \
- : cl); \
+ "2:" \
+ : [v] "+Q" (*(u128 *)ptr), \
+ [rl] "=&r" (r.low), [rh] "=&r" (r.high), \
+ [tmp] "=&r" (tmp) \
+ : [ol] "r" (o.low), [oh] "r" (o.high), \
+ [nl] "r" (n.low), [nh] "r" (n.high) \
+ : "cc", ##cl); \
\
- return ret; \
+ return r.full; \
}
-__CMPXCHG_DBL( , , , )
-__CMPXCHG_DBL(_mb, dmb ish, l, "memory")
+__CMPXCHG128( , , )
+__CMPXCHG128(_mb, dmb ish, l, "memory")
+
+#undef __CMPXCHG128
-#undef __CMPXCHG_DBL
#undef K
#endif /* __ASM_ATOMIC_LL_SC_H */
diff --git a/arch/arm64/include/asm/atomic_lse.h b/arch/arm64/include/asm/atomic_lse.h
index 5d460f6b7675..87f568a94e55 100644
--- a/arch/arm64/include/asm/atomic_lse.h
+++ b/arch/arm64/include/asm/atomic_lse.h
@@ -11,7 +11,8 @@
#define __ASM_ATOMIC_LSE_H
#define ATOMIC_OP(op, asm_op) \
-static inline void __lse_atomic_##op(int i, atomic_t *v) \
+static __always_inline void \
+__lse_atomic_##op(int i, atomic_t *v) \
{ \
asm volatile( \
__LSE_PREAMBLE \
@@ -25,7 +26,7 @@ ATOMIC_OP(or, stset)
ATOMIC_OP(xor, steor)
ATOMIC_OP(add, stadd)
-static inline void __lse_atomic_sub(int i, atomic_t *v)
+static __always_inline void __lse_atomic_sub(int i, atomic_t *v)
{
__lse_atomic_add(-i, v);
}
@@ -33,7 +34,8 @@ static inline void __lse_atomic_sub(int i, atomic_t *v)
#undef ATOMIC_OP
#define ATOMIC_FETCH_OP(name, mb, op, asm_op, cl...) \
-static inline int __lse_atomic_fetch_##op##name(int i, atomic_t *v) \
+static __always_inline int \
+__lse_atomic_fetch_##op##name(int i, atomic_t *v) \
{ \
int old; \
\
@@ -63,7 +65,8 @@ ATOMIC_FETCH_OPS(add, ldadd)
#undef ATOMIC_FETCH_OPS
#define ATOMIC_FETCH_OP_SUB(name) \
-static inline int __lse_atomic_fetch_sub##name(int i, atomic_t *v) \
+static __always_inline int \
+__lse_atomic_fetch_sub##name(int i, atomic_t *v) \
{ \
return __lse_atomic_fetch_add##name(-i, v); \
}
@@ -76,12 +79,14 @@ ATOMIC_FETCH_OP_SUB( )
#undef ATOMIC_FETCH_OP_SUB
#define ATOMIC_OP_ADD_SUB_RETURN(name) \
-static inline int __lse_atomic_add_return##name(int i, atomic_t *v) \
+static __always_inline int \
+__lse_atomic_add_return##name(int i, atomic_t *v) \
{ \
return __lse_atomic_fetch_add##name(i, v) + i; \
} \
\
-static inline int __lse_atomic_sub_return##name(int i, atomic_t *v) \
+static __always_inline int \
+__lse_atomic_sub_return##name(int i, atomic_t *v) \
{ \
return __lse_atomic_fetch_sub(i, v) - i; \
}
@@ -93,13 +98,14 @@ ATOMIC_OP_ADD_SUB_RETURN( )
#undef ATOMIC_OP_ADD_SUB_RETURN
-static inline void __lse_atomic_and(int i, atomic_t *v)
+static __always_inline void __lse_atomic_and(int i, atomic_t *v)
{
return __lse_atomic_andnot(~i, v);
}
#define ATOMIC_FETCH_OP_AND(name, mb, cl...) \
-static inline int __lse_atomic_fetch_and##name(int i, atomic_t *v) \
+static __always_inline int \
+__lse_atomic_fetch_and##name(int i, atomic_t *v) \
{ \
return __lse_atomic_fetch_andnot##name(~i, v); \
}
@@ -112,7 +118,8 @@ ATOMIC_FETCH_OP_AND( , al, "memory")
#undef ATOMIC_FETCH_OP_AND
#define ATOMIC64_OP(op, asm_op) \
-static inline void __lse_atomic64_##op(s64 i, atomic64_t *v) \
+static __always_inline void \
+__lse_atomic64_##op(s64 i, atomic64_t *v) \
{ \
asm volatile( \
__LSE_PREAMBLE \
@@ -126,7 +133,7 @@ ATOMIC64_OP(or, stset)
ATOMIC64_OP(xor, steor)
ATOMIC64_OP(add, stadd)
-static inline void __lse_atomic64_sub(s64 i, atomic64_t *v)
+static __always_inline void __lse_atomic64_sub(s64 i, atomic64_t *v)
{
__lse_atomic64_add(-i, v);
}
@@ -134,7 +141,8 @@ static inline void __lse_atomic64_sub(s64 i, atomic64_t *v)
#undef ATOMIC64_OP
#define ATOMIC64_FETCH_OP(name, mb, op, asm_op, cl...) \
-static inline long __lse_atomic64_fetch_##op##name(s64 i, atomic64_t *v)\
+static __always_inline long \
+__lse_atomic64_fetch_##op##name(s64 i, atomic64_t *v) \
{ \
s64 old; \
\
@@ -164,7 +172,8 @@ ATOMIC64_FETCH_OPS(add, ldadd)
#undef ATOMIC64_FETCH_OPS
#define ATOMIC64_FETCH_OP_SUB(name) \
-static inline long __lse_atomic64_fetch_sub##name(s64 i, atomic64_t *v) \
+static __always_inline long \
+__lse_atomic64_fetch_sub##name(s64 i, atomic64_t *v) \
{ \
return __lse_atomic64_fetch_add##name(-i, v); \
}
@@ -177,12 +186,14 @@ ATOMIC64_FETCH_OP_SUB( )
#undef ATOMIC64_FETCH_OP_SUB
#define ATOMIC64_OP_ADD_SUB_RETURN(name) \
-static inline long __lse_atomic64_add_return##name(s64 i, atomic64_t *v)\
+static __always_inline long \
+__lse_atomic64_add_return##name(s64 i, atomic64_t *v) \
{ \
return __lse_atomic64_fetch_add##name(i, v) + i; \
} \
\
-static inline long __lse_atomic64_sub_return##name(s64 i, atomic64_t *v)\
+static __always_inline long \
+__lse_atomic64_sub_return##name(s64 i, atomic64_t *v) \
{ \
return __lse_atomic64_fetch_sub##name(i, v) - i; \
}
@@ -194,13 +205,14 @@ ATOMIC64_OP_ADD_SUB_RETURN( )
#undef ATOMIC64_OP_ADD_SUB_RETURN
-static inline void __lse_atomic64_and(s64 i, atomic64_t *v)
+static __always_inline void __lse_atomic64_and(s64 i, atomic64_t *v)
{
return __lse_atomic64_andnot(~i, v);
}
#define ATOMIC64_FETCH_OP_AND(name, mb, cl...) \
-static inline long __lse_atomic64_fetch_and##name(s64 i, atomic64_t *v) \
+static __always_inline long \
+__lse_atomic64_fetch_and##name(s64 i, atomic64_t *v) \
{ \
return __lse_atomic64_fetch_andnot##name(~i, v); \
}
@@ -212,7 +224,7 @@ ATOMIC64_FETCH_OP_AND( , al, "memory")
#undef ATOMIC64_FETCH_OP_AND
-static inline s64 __lse_atomic64_dec_if_positive(atomic64_t *v)
+static __always_inline s64 __lse_atomic64_dec_if_positive(atomic64_t *v)
{
unsigned long tmp;
@@ -239,22 +251,15 @@ __lse__cmpxchg_case_##name##sz(volatile void *ptr, \
u##sz old, \
u##sz new) \
{ \
- register unsigned long x0 asm ("x0") = (unsigned long)ptr; \
- register u##sz x1 asm ("x1") = old; \
- register u##sz x2 asm ("x2") = new; \
- unsigned long tmp; \
- \
asm volatile( \
__LSE_PREAMBLE \
- " mov %" #w "[tmp], %" #w "[old]\n" \
- " cas" #mb #sfx "\t%" #w "[tmp], %" #w "[new], %[v]\n" \
- " mov %" #w "[ret], %" #w "[tmp]" \
- : [ret] "+r" (x0), [v] "+Q" (*(u##sz *)ptr), \
- [tmp] "=&r" (tmp) \
- : [old] "r" (x1), [new] "r" (x2) \
+ " cas" #mb #sfx " %" #w "[old], %" #w "[new], %[v]\n" \
+ : [v] "+Q" (*(u##sz *)ptr), \
+ [old] "+r" (old) \
+ : [new] "rZ" (new) \
: cl); \
\
- return x0; \
+ return old; \
}
__CMPXCHG_CASE(w, b, , 8, )
@@ -276,40 +281,35 @@ __CMPXCHG_CASE(x, , mb_, 64, al, "memory")
#undef __CMPXCHG_CASE
-#define __CMPXCHG_DBL(name, mb, cl...) \
-static __always_inline long \
-__lse__cmpxchg_double##name(unsigned long old1, \
- unsigned long old2, \
- unsigned long new1, \
- unsigned long new2, \
- volatile void *ptr) \
+#define __CMPXCHG128(name, mb, cl...) \
+static __always_inline u128 \
+__lse__cmpxchg128##name(volatile u128 *ptr, u128 old, u128 new) \
{ \
- unsigned long oldval1 = old1; \
- unsigned long oldval2 = old2; \
- register unsigned long x0 asm ("x0") = old1; \
- register unsigned long x1 asm ("x1") = old2; \
- register unsigned long x2 asm ("x2") = new1; \
- register unsigned long x3 asm ("x3") = new2; \
+ union __u128_halves r, o = { .full = (old) }, \
+ n = { .full = (new) }; \
+ register unsigned long x0 asm ("x0") = o.low; \
+ register unsigned long x1 asm ("x1") = o.high; \
+ register unsigned long x2 asm ("x2") = n.low; \
+ register unsigned long x3 asm ("x3") = n.high; \
register unsigned long x4 asm ("x4") = (unsigned long)ptr; \
\
asm volatile( \
__LSE_PREAMBLE \
" casp" #mb "\t%[old1], %[old2], %[new1], %[new2], %[v]\n"\
- " eor %[old1], %[old1], %[oldval1]\n" \
- " eor %[old2], %[old2], %[oldval2]\n" \
- " orr %[old1], %[old1], %[old2]" \
: [old1] "+&r" (x0), [old2] "+&r" (x1), \
- [v] "+Q" (*(unsigned long *)ptr) \
+ [v] "+Q" (*(u128 *)ptr) \
: [new1] "r" (x2), [new2] "r" (x3), [ptr] "r" (x4), \
- [oldval1] "r" (oldval1), [oldval2] "r" (oldval2) \
+ [oldval1] "r" (o.low), [oldval2] "r" (o.high) \
: cl); \
\
- return x0; \
+ r.low = x0; r.high = x1; \
+ \
+ return r.full; \
}
-__CMPXCHG_DBL( , )
-__CMPXCHG_DBL(_mb, al, "memory")
+__CMPXCHG128( , )
+__CMPXCHG128(_mb, al, "memory")
-#undef __CMPXCHG_DBL
+#undef __CMPXCHG128
#endif /* __ASM_ATOMIC_LSE_H */
diff --git a/arch/arm64/include/asm/barrier.h b/arch/arm64/include/asm/barrier.h
index 2cfc4245d2e2..f5801b0ba9e9 100644
--- a/arch/arm64/include/asm/barrier.h
+++ b/arch/arm64/include/asm/barrier.h
@@ -11,6 +11,8 @@
#include <linux/kasan-checks.h>
+#include <asm/alternative-macros.h>
+
#define __nops(n) ".rept " #n "\nnop\n.endr\n"
#define nops(n) asm volatile(__nops(n))
@@ -38,13 +40,21 @@
*/
#define dgh() asm volatile("hint #6" : : : "memory")
+#define spec_bar() asm volatile(ALTERNATIVE("dsb nsh\nisb\n", \
+ SB_BARRIER_INSN"nop\n", \
+ ARM64_HAS_SB))
+
+#define gsb_ack() asm volatile(GSB_ACK_BARRIER_INSN : : : "memory")
+#define gsb_sys() asm volatile(GSB_SYS_BARRIER_INSN : : : "memory")
+
#ifdef CONFIG_ARM64_PSEUDO_NMI
#define pmr_sync() \
do { \
- extern struct static_key_false gic_pmr_sync; \
- \
- if (static_branch_unlikely(&gic_pmr_sync)) \
- dsb(sy); \
+ asm volatile( \
+ ALTERNATIVE_CB("dsb sy", \
+ ARM64_HAS_GIC_PRIO_RELAXED_SYNC, \
+ alt_cb_patch_nops) \
+ ); \
} while(0)
#else
#define pmr_sync() do {} while (0)
@@ -128,25 +138,25 @@ do { \
case 1: \
asm volatile ("stlrb %w1, %0" \
: "=Q" (*__p) \
- : "r" (*(__u8 *)__u.__c) \
+ : "rZ" (*(__u8 *)__u.__c) \
: "memory"); \
break; \
case 2: \
asm volatile ("stlrh %w1, %0" \
: "=Q" (*__p) \
- : "r" (*(__u16 *)__u.__c) \
+ : "rZ" (*(__u16 *)__u.__c) \
: "memory"); \
break; \
case 4: \
asm volatile ("stlr %w1, %0" \
: "=Q" (*__p) \
- : "r" (*(__u32 *)__u.__c) \
+ : "rZ" (*(__u32 *)__u.__c) \
: "memory"); \
break; \
case 8: \
- asm volatile ("stlr %1, %0" \
+ asm volatile ("stlr %x1, %0" \
: "=Q" (*__p) \
- : "r" (*(__u64 *)__u.__c) \
+ : "rZ" (*(__u64 *)__u.__c) \
: "memory"); \
break; \
} \
diff --git a/arch/arm64/include/asm/brk-imm.h b/arch/arm64/include/asm/brk-imm.h
index ec7720dbe2c8..beb42c62b6ac 100644
--- a/arch/arm64/include/asm/brk-imm.h
+++ b/arch/arm64/include/asm/brk-imm.h
@@ -11,21 +11,32 @@
* 0x004: for installing kprobes
* 0x005: for installing uprobes
* 0x006: for kprobe software single-step
+ * 0x007: for kretprobe return
* Allowed values for kgdb are 0x400 - 0x7ff
* 0x100: for triggering a fault on purpose (reserved)
* 0x400: for dynamic BRK instruction
* 0x401: for compile time BRK instruction
* 0x800: kernel-mode BUG() and WARN() traps
* 0x9xx: tag-based KASAN trap (allowed values 0x900 - 0x9ff)
+ * 0x55xx: Undefined Behavior Sanitizer traps ('U' << 8)
+ * 0x8xxx: Control-Flow Integrity traps
*/
#define KPROBES_BRK_IMM 0x004
#define UPROBES_BRK_IMM 0x005
#define KPROBES_BRK_SS_IMM 0x006
+#define KRETPROBES_BRK_IMM 0x007
#define FAULT_BRK_IMM 0x100
#define KGDB_DYN_DBG_BRK_IMM 0x400
#define KGDB_COMPILED_DBG_BRK_IMM 0x401
#define BUG_BRK_IMM 0x800
#define KASAN_BRK_IMM 0x900
#define KASAN_BRK_MASK 0x0ff
+#define UBSAN_BRK_IMM 0x5500
+#define UBSAN_BRK_MASK 0x00ff
+
+#define CFI_BRK_IMM_TARGET GENMASK(4, 0)
+#define CFI_BRK_IMM_TYPE GENMASK(9, 5)
+#define CFI_BRK_IMM_BASE 0x8000
+#define CFI_BRK_IMM_MASK (CFI_BRK_IMM_TARGET | CFI_BRK_IMM_TYPE)
#endif
diff --git a/arch/arm64/include/asm/cache.h b/arch/arm64/include/asm/cache.h
index ca9b487112cc..09963004ceea 100644
--- a/arch/arm64/include/asm/cache.h
+++ b/arch/arm64/include/asm/cache.h
@@ -16,6 +16,15 @@
#define CLIDR_LOC(clidr) (((clidr) >> CLIDR_LOC_SHIFT) & 0x7)
#define CLIDR_LOUIS(clidr) (((clidr) >> CLIDR_LOUIS_SHIFT) & 0x7)
+/* Ctypen, bits[3(n - 1) + 2 : 3(n - 1)], for n = 1 to 7 */
+#define CLIDR_CTYPE_SHIFT(level) (3 * (level - 1))
+#define CLIDR_CTYPE_MASK(level) (7 << CLIDR_CTYPE_SHIFT(level))
+#define CLIDR_CTYPE(clidr, level) \
+ (((clidr) & CLIDR_CTYPE_MASK(level)) >> CLIDR_CTYPE_SHIFT(level))
+
+/* Ttypen, bits [2(n - 1) + 34 : 2(n - 1) + 33], for n = 1 to 7 */
+#define CLIDR_TTYPE_SHIFT(level) (2 * ((level) - 1) + CLIDR_EL1_Ttypen_SHIFT)
+
/*
* Memory returned by kmalloc() may be used for DMA, so we must make
* sure that all such allocations are cache aligned. Otherwise,
@@ -24,8 +33,9 @@
* the CPU.
*/
#define ARCH_DMA_MINALIGN (128)
+#define ARCH_KMALLOC_MINALIGN (8)
-#ifndef __ASSEMBLY__
+#if !defined(__ASSEMBLY__) && !defined(BUILD_VDSO)
#include <linux/bitops.h>
#include <linux/kasan-enabled.h>
@@ -45,14 +55,9 @@ static inline unsigned int arch_slab_minalign(void)
#define arch_slab_minalign() arch_slab_minalign()
#endif
-#define CTR_CACHE_MINLINE_MASK \
- (0xf << CTR_EL0_DMINLINE_SHIFT | \
- CTR_EL0_IMINLINE_MASK << CTR_EL0_IMINLINE_SHIFT)
-
#define CTR_L1IP(ctr) SYS_FIELD_GET(CTR_EL0, L1Ip, ctr)
#define ICACHEF_ALIASING 0
-#define ICACHEF_VPIPT 1
extern unsigned long __icache_flags;
/*
@@ -64,14 +69,9 @@ static inline int icache_is_aliasing(void)
return test_bit(ICACHEF_ALIASING, &__icache_flags);
}
-static __always_inline int icache_is_vpipt(void)
-{
- return test_bit(ICACHEF_VPIPT, &__icache_flags);
-}
-
static inline u32 cache_type_cwg(void)
{
- return (read_cpuid_cachetype() >> CTR_EL0_CWG_SHIFT) & CTR_EL0_CWG_MASK;
+ return SYS_FIELD_GET(CTR_EL0, CWG, read_cpuid_cachetype());
}
#define __read_mostly __section(".data..read_mostly")
@@ -85,6 +85,25 @@ static inline int cache_line_size_of_cpu(void)
int cache_line_size(void);
+#define dma_get_cache_alignment cache_line_size
+
+/* Compress a u64 MPIDR value into 32 bits. */
+static inline u64 arch_compact_of_hwid(u64 id)
+{
+ u64 aff3 = MPIDR_AFFINITY_LEVEL(id, 3);
+
+ /*
+ * These bits are expected to be RES0. If not, return a value with
+ * the upper 32 bits set to force the caller to give up on 32 bit
+ * cache ids.
+ */
+ if (FIELD_GET(GENMASK_ULL(63, 40), id))
+ return id;
+
+ return (aff3 << 24) | FIELD_GET(GENMASK_ULL(23, 0), id);
+}
+#define arch_compact_of_hwid arch_compact_of_hwid
+
/*
* Read the effective value of CTR_EL0.
*
@@ -116,6 +135,6 @@ static inline u32 __attribute_const__ read_cpuid_effective_cachetype(void)
return ctr;
}
-#endif /* __ASSEMBLY__ */
+#endif /* !defined(__ASSEMBLY__) && !defined(BUILD_VDSO) */
#endif
diff --git a/arch/arm64/include/asm/cacheflush.h b/arch/arm64/include/asm/cacheflush.h
index 37185e978aeb..28ab96e808ef 100644
--- a/arch/arm64/include/asm/cacheflush.h
+++ b/arch/arm64/include/asm/cacheflush.h
@@ -114,10 +114,10 @@ extern void copy_to_user_page(struct vm_area_struct *, struct page *,
#define copy_to_user_page copy_to_user_page
/*
- * flush_dcache_page is used when the kernel has written to the page
+ * flush_dcache_folio is used when the kernel has written to the page
* cache page at virtual address page->virtual.
*
- * If this page isn't mapped (ie, page_mapping == NULL), or it might
+ * If this page isn't mapped (ie, folio_mapping == NULL), or it might
* have userspace mappings, then we _must_ always clean + invalidate
* the dcache entries associated with the kernel mapping.
*
@@ -127,10 +127,12 @@ extern void copy_to_user_page(struct vm_area_struct *, struct page *,
*/
#define ARCH_IMPLEMENTS_FLUSH_DCACHE_PAGE 1
extern void flush_dcache_page(struct page *);
+void flush_dcache_folio(struct folio *);
+#define flush_dcache_folio flush_dcache_folio
static __always_inline void icache_inval_all_pou(void)
{
- if (cpus_have_const_cap(ARM64_HAS_CACHE_DIC))
+ if (alternative_has_cap_unlikely(ARM64_HAS_CACHE_DIC))
return;
asm("ic ialluis");
diff --git a/arch/arm64/include/asm/cfi.h b/arch/arm64/include/asm/cfi.h
new file mode 100644
index 000000000000..ab90f0351b7a
--- /dev/null
+++ b/arch/arm64/include/asm/cfi.h
@@ -0,0 +1,7 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _ASM_ARM64_CFI_H
+#define _ASM_ARM64_CFI_H
+
+#define __bpfcall
+
+#endif /* _ASM_ARM64_CFI_H */
diff --git a/arch/arm64/include/asm/cmpxchg.h b/arch/arm64/include/asm/cmpxchg.h
index 497acf134d99..d7a540736741 100644
--- a/arch/arm64/include/asm/cmpxchg.h
+++ b/arch/arm64/include/asm/cmpxchg.h
@@ -62,9 +62,8 @@ __XCHG_CASE( , , mb_, 64, dmb ish, nop, , a, l, "memory")
#undef __XCHG_CASE
#define __XCHG_GEN(sfx) \
-static __always_inline unsigned long __xchg##sfx(unsigned long x, \
- volatile void *ptr, \
- int size) \
+static __always_inline unsigned long \
+__arch_xchg##sfx(unsigned long x, volatile void *ptr, int size) \
{ \
switch (size) { \
case 1: \
@@ -93,7 +92,7 @@ __XCHG_GEN(_mb)
({ \
__typeof__(*(ptr)) __ret; \
__ret = (__typeof__(*(ptr))) \
- __xchg##sfx((unsigned long)(x), (ptr), sizeof(*(ptr))); \
+ __arch_xchg##sfx((unsigned long)(x), (ptr), sizeof(*(ptr))); \
__ret; \
})
@@ -131,21 +130,18 @@ __CMPXCHG_CASE(mb_, 64)
#undef __CMPXCHG_CASE
-#define __CMPXCHG_DBL(name) \
-static inline long __cmpxchg_double##name(unsigned long old1, \
- unsigned long old2, \
- unsigned long new1, \
- unsigned long new2, \
- volatile void *ptr) \
+#define __CMPXCHG128(name) \
+static inline u128 __cmpxchg128##name(volatile u128 *ptr, \
+ u128 old, u128 new) \
{ \
- return __lse_ll_sc_body(_cmpxchg_double##name, \
- old1, old2, new1, new2, ptr); \
+ return __lse_ll_sc_body(_cmpxchg128##name, \
+ ptr, old, new); \
}
-__CMPXCHG_DBL( )
-__CMPXCHG_DBL(_mb)
+__CMPXCHG128( )
+__CMPXCHG128(_mb)
-#undef __CMPXCHG_DBL
+#undef __CMPXCHG128
#define __CMPXCHG_GEN(sfx) \
static __always_inline unsigned long __cmpxchg##sfx(volatile void *ptr, \
@@ -199,34 +195,17 @@ __CMPXCHG_GEN(_mb)
#define arch_cmpxchg64 arch_cmpxchg
#define arch_cmpxchg64_local arch_cmpxchg_local
-/* cmpxchg_double */
-#define system_has_cmpxchg_double() 1
-
-#define __cmpxchg_double_check(ptr1, ptr2) \
-({ \
- if (sizeof(*(ptr1)) != 8) \
- BUILD_BUG(); \
- VM_BUG_ON((unsigned long *)(ptr2) - (unsigned long *)(ptr1) != 1); \
-})
+/* cmpxchg128 */
+#define system_has_cmpxchg128() 1
-#define arch_cmpxchg_double(ptr1, ptr2, o1, o2, n1, n2) \
+#define arch_cmpxchg128(ptr, o, n) \
({ \
- int __ret; \
- __cmpxchg_double_check(ptr1, ptr2); \
- __ret = !__cmpxchg_double_mb((unsigned long)(o1), (unsigned long)(o2), \
- (unsigned long)(n1), (unsigned long)(n2), \
- ptr1); \
- __ret; \
+ __cmpxchg128_mb((ptr), (o), (n)); \
})
-#define arch_cmpxchg_double_local(ptr1, ptr2, o1, o2, n1, n2) \
+#define arch_cmpxchg128_local(ptr, o, n) \
({ \
- int __ret; \
- __cmpxchg_double_check(ptr1, ptr2); \
- __ret = !__cmpxchg_double((unsigned long)(o1), (unsigned long)(o2), \
- (unsigned long)(n1), (unsigned long)(n2), \
- ptr1); \
- __ret; \
+ __cmpxchg128((ptr), (o), (n)); \
})
#define __CMPWAIT_CASE(w, sfx, sz) \
diff --git a/arch/arm64/include/asm/compat.h b/arch/arm64/include/asm/compat.h
index 9f362274a4f7..ae904a1ad529 100644
--- a/arch/arm64/include/asm/compat.h
+++ b/arch/arm64/include/asm/compat.h
@@ -83,10 +83,6 @@ struct compat_statfs {
int f_spare[4];
};
-#define COMPAT_RLIM_INFINITY 0xffffffff
-
-#define COMPAT_OFF_T_MAX 0x7fffffff
-
#define compat_user_stack_pointer() (user_stack_pointer(task_pt_regs(current)))
#define COMPAT_MINSIGSTKSZ 2048
@@ -100,6 +96,8 @@ static inline int is_compat_thread(struct thread_info *thread)
return test_ti_thread_flag(thread, TIF_32BIT);
}
+long compat_arm_syscall(struct pt_regs *regs, int scno);
+
#else /* !CONFIG_COMPAT */
static inline int is_compat_thread(struct thread_info *thread)
diff --git a/arch/arm64/include/asm/compiler.h b/arch/arm64/include/asm/compiler.h
index 6fb2e6bcc392..9bbd7b7097ff 100644
--- a/arch/arm64/include/asm/compiler.h
+++ b/arch/arm64/include/asm/compiler.h
@@ -8,19 +8,33 @@
#define ARM64_ASM_PREAMBLE
#endif
-/*
- * The EL0/EL1 pointer bits used by a pointer authentication code.
- * This is dependent on TBI0/TBI1 being enabled, or bits 63:56 would also apply.
- */
-#define ptrauth_user_pac_mask() GENMASK_ULL(54, vabits_actual)
-#define ptrauth_kernel_pac_mask() GENMASK_ULL(63, vabits_actual)
+#define xpaclri(ptr) \
+({ \
+ register unsigned long __xpaclri_ptr asm("x30") = (ptr); \
+ \
+ asm( \
+ ARM64_ASM_PREAMBLE \
+ " hint #7\n" \
+ : "+r" (__xpaclri_ptr)); \
+ \
+ __xpaclri_ptr; \
+})
-/* Valid for EL0 TTBR0 and EL1 TTBR1 instruction pointers */
-#define ptrauth_clear_pac(ptr) \
- ((ptr & BIT_ULL(55)) ? (ptr | ptrauth_kernel_pac_mask()) : \
- (ptr & ~ptrauth_user_pac_mask()))
+#ifdef CONFIG_ARM64_PTR_AUTH_KERNEL
+#define ptrauth_strip_kernel_insn_pac(ptr) xpaclri(ptr)
+#else
+#define ptrauth_strip_kernel_insn_pac(ptr) (ptr)
+#endif
+
+#ifdef CONFIG_ARM64_PTR_AUTH
+#define ptrauth_strip_user_insn_pac(ptr) xpaclri(ptr)
+#else
+#define ptrauth_strip_user_insn_pac(ptr) (ptr)
+#endif
+#if !defined(CONFIG_BUILTIN_RETURN_ADDRESS_STRIPS_PAC)
#define __builtin_return_address(val) \
- (void *)(ptrauth_clear_pac((unsigned long)__builtin_return_address(val)))
+ (void *)(ptrauth_strip_kernel_insn_pac((unsigned long)__builtin_return_address(val)))
+#endif
#endif /* __ASM_COMPILER_H */
diff --git a/arch/arm64/include/asm/cpu.h b/arch/arm64/include/asm/cpu.h
index fd7a92219eea..71493b760b83 100644
--- a/arch/arm64/include/asm/cpu.h
+++ b/arch/arm64/include/asm/cpu.h
@@ -38,36 +38,36 @@ struct cpuinfo_32bit {
};
struct cpuinfo_arm64 {
- struct cpu cpu;
struct kobject kobj;
u64 reg_ctr;
u64 reg_cntfrq;
u64 reg_dczid;
u64 reg_midr;
u64 reg_revidr;
+ u64 reg_aidr;
u64 reg_gmid;
u64 reg_smidr;
+ u64 reg_mpamidr;
u64 reg_id_aa64dfr0;
u64 reg_id_aa64dfr1;
u64 reg_id_aa64isar0;
u64 reg_id_aa64isar1;
u64 reg_id_aa64isar2;
+ u64 reg_id_aa64isar3;
u64 reg_id_aa64mmfr0;
u64 reg_id_aa64mmfr1;
u64 reg_id_aa64mmfr2;
+ u64 reg_id_aa64mmfr3;
+ u64 reg_id_aa64mmfr4;
u64 reg_id_aa64pfr0;
u64 reg_id_aa64pfr1;
+ u64 reg_id_aa64pfr2;
u64 reg_id_aa64zfr0;
u64 reg_id_aa64smfr0;
+ u64 reg_id_aa64fpfr0;
struct cpuinfo_32bit aarch32;
-
- /* pseudo-ZCR for recording maximum ZCR_EL1 LEN value: */
- u64 reg_zcr;
-
- /* pseudo-SMCR for recording maximum SMCR_EL1 LEN value: */
- u64 reg_smcr;
};
DECLARE_PER_CPU(struct cpuinfo_arm64, cpu_data);
diff --git a/arch/arm64/include/asm/cpucaps.h b/arch/arm64/include/asm/cpucaps.h
new file mode 100644
index 000000000000..9d769291a306
--- /dev/null
+++ b/arch/arm64/include/asm/cpucaps.h
@@ -0,0 +1,82 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+
+#ifndef __ASM_CPUCAPS_H
+#define __ASM_CPUCAPS_H
+
+#include <asm/cpucap-defs.h>
+
+#ifndef __ASSEMBLY__
+#include <linux/types.h>
+/*
+ * Check whether a cpucap is possible at compiletime.
+ */
+static __always_inline bool
+cpucap_is_possible(const unsigned int cap)
+{
+ compiletime_assert(__builtin_constant_p(cap),
+ "cap must be a constant");
+ compiletime_assert(cap < ARM64_NCAPS,
+ "cap must be < ARM64_NCAPS");
+
+ switch (cap) {
+ case ARM64_HAS_PAN:
+ return IS_ENABLED(CONFIG_ARM64_PAN);
+ case ARM64_HAS_EPAN:
+ return IS_ENABLED(CONFIG_ARM64_EPAN);
+ case ARM64_SVE:
+ return IS_ENABLED(CONFIG_ARM64_SVE);
+ case ARM64_SME:
+ case ARM64_SME2:
+ case ARM64_SME_FA64:
+ return IS_ENABLED(CONFIG_ARM64_SME);
+ case ARM64_HAS_CNP:
+ return IS_ENABLED(CONFIG_ARM64_CNP);
+ case ARM64_HAS_ADDRESS_AUTH:
+ case ARM64_HAS_GENERIC_AUTH:
+ return IS_ENABLED(CONFIG_ARM64_PTR_AUTH);
+ case ARM64_HAS_GIC_PRIO_MASKING:
+ return IS_ENABLED(CONFIG_ARM64_PSEUDO_NMI);
+ case ARM64_MTE:
+ return IS_ENABLED(CONFIG_ARM64_MTE);
+ case ARM64_BTI:
+ return IS_ENABLED(CONFIG_ARM64_BTI);
+ case ARM64_HAS_TLB_RANGE:
+ return IS_ENABLED(CONFIG_ARM64_TLB_RANGE);
+ case ARM64_HAS_S1POE:
+ return IS_ENABLED(CONFIG_ARM64_POE);
+ case ARM64_HAS_GCS:
+ return IS_ENABLED(CONFIG_ARM64_GCS);
+ case ARM64_HAFT:
+ return IS_ENABLED(CONFIG_ARM64_HAFT);
+ case ARM64_UNMAP_KERNEL_AT_EL0:
+ return IS_ENABLED(CONFIG_UNMAP_KERNEL_AT_EL0);
+ case ARM64_WORKAROUND_843419:
+ return IS_ENABLED(CONFIG_ARM64_ERRATUM_843419);
+ case ARM64_WORKAROUND_1742098:
+ return IS_ENABLED(CONFIG_ARM64_ERRATUM_1742098);
+ case ARM64_WORKAROUND_2645198:
+ return IS_ENABLED(CONFIG_ARM64_ERRATUM_2645198);
+ case ARM64_WORKAROUND_2658417:
+ return IS_ENABLED(CONFIG_ARM64_ERRATUM_2658417);
+ case ARM64_WORKAROUND_CAVIUM_23154:
+ return IS_ENABLED(CONFIG_CAVIUM_ERRATUM_23154);
+ case ARM64_WORKAROUND_NVIDIA_CARMEL_CNP:
+ return IS_ENABLED(CONFIG_NVIDIA_CARMEL_CNP_ERRATUM);
+ case ARM64_WORKAROUND_REPEAT_TLBI:
+ return IS_ENABLED(CONFIG_ARM64_WORKAROUND_REPEAT_TLBI);
+ case ARM64_WORKAROUND_SPECULATIVE_SSBS:
+ return IS_ENABLED(CONFIG_ARM64_ERRATUM_3194386);
+ case ARM64_MPAM:
+ /*
+ * KVM MPAM support doesn't rely on the host kernel supporting MPAM.
+ */
+ return true;
+ case ARM64_HAS_PMUV3:
+ return IS_ENABLED(CONFIG_HW_PERF_EVENTS);
+ }
+
+ return true;
+}
+#endif /* __ASSEMBLY__ */
+
+#endif /* __ASM_CPUCAPS_H */
diff --git a/arch/arm64/include/asm/cpufeature.h b/arch/arm64/include/asm/cpufeature.h
index fd7d75a275f6..bf13d676aae2 100644
--- a/arch/arm64/include/asm/cpufeature.h
+++ b/arch/arm64/include/asm/cpufeature.h
@@ -6,19 +6,25 @@
#ifndef __ASM_CPUFEATURE_H
#define __ASM_CPUFEATURE_H
+#include <asm/alternative-macros.h>
#include <asm/cpucaps.h>
#include <asm/cputype.h>
#include <asm/hwcap.h>
#include <asm/sysreg.h>
-#define MAX_CPU_FEATURES 128
+#define MAX_CPU_FEATURES 192
#define cpu_feature(x) KERNEL_HWCAP_ ## x
+#define ARM64_SW_FEATURE_OVERRIDE_NOKASLR 0
+#define ARM64_SW_FEATURE_OVERRIDE_HVHE 4
+#define ARM64_SW_FEATURE_OVERRIDE_RODATA_OFF 8
+
#ifndef __ASSEMBLY__
#include <linux/bug.h>
#include <linux/jump_label.h>
#include <linux/kernel.h>
+#include <linux/cpumask.h>
/*
* CPU feature register tracking
@@ -78,7 +84,7 @@ struct arm64_ftr_bits {
* to full-0 denotes that this field has no override
*
* A @mask field set to full-0 with the corresponding @val field set
- * to full-1 denotes thath this field has an invalid override.
+ * to full-1 denotes that this field has an invalid override.
*/
struct arm64_ftr_override {
u64 val;
@@ -106,7 +112,7 @@ extern struct arm64_ftr_reg arm64_ftr_reg_ctrel0;
* CPU capabilities:
*
* We use arm64_cpu_capabilities to represent system features, errata work
- * arounds (both used internally by kernel and tracked in cpu_hwcaps) and
+ * arounds (both used internally by kernel and tracked in system_cpucaps) and
* ELF HWCAPs (which are exposed to user).
*
* To support systems with heterogeneous CPUs, we need to make sure that we
@@ -269,6 +275,14 @@ extern struct arm64_ftr_reg arm64_ftr_reg_ctrel0;
#define ARM64_CPUCAP_OPTIONAL_FOR_LATE_CPU ((u16)BIT(5))
/* Panic when a conflict is detected */
#define ARM64_CPUCAP_PANIC_ON_CONFLICT ((u16)BIT(6))
+/*
+ * When paired with SCOPE_LOCAL_CPU, all early CPUs must satisfy the
+ * condition. This is different from SCOPE_SYSTEM where the check is performed
+ * only once at the end of the SMP boot on the sanitised ID registers.
+ * SCOPE_SYSTEM is not suitable for cases where the capability depends on
+ * properties local to a CPU like MIDR_EL1.
+ */
+#define ARM64_CPUCAP_MATCH_ALL_EARLY_CPUS ((u16)BIT(7))
/*
* CPU errata workarounds that need to be enabled at boot time if one or
@@ -298,6 +312,16 @@ extern struct arm64_ftr_reg arm64_ftr_reg_ctrel0;
(ARM64_CPUCAP_SCOPE_LOCAL_CPU | \
ARM64_CPUCAP_OPTIONAL_FOR_LATE_CPU | \
ARM64_CPUCAP_PERMITTED_FOR_LATE_CPU)
+/*
+ * CPU feature detected at boot time and present on all early CPUs. Late CPUs
+ * are permitted to have the feature even if it hasn't been enabled, although
+ * the feature will not be used by Linux in this case. If all early CPUs have
+ * the feature, then every late CPU must have it.
+ */
+#define ARM64_CPUCAP_EARLY_LOCAL_CPU_FEATURE \
+ (ARM64_CPUCAP_SCOPE_LOCAL_CPU | \
+ ARM64_CPUCAP_PERMITTED_FOR_LATE_CPU | \
+ ARM64_CPUCAP_MATCH_ALL_EARLY_CPUS)
/*
* CPU feature detected at boot time, on one or more CPUs. A late CPU
@@ -358,6 +382,7 @@ struct arm64_cpu_capabilities {
u8 field_pos;
u8 field_width;
u8 min_field_value;
+ u8 max_field_value;
u8 hwcap_type;
bool sign;
unsigned long hwcap;
@@ -376,6 +401,7 @@ struct arm64_cpu_capabilities {
* method is robust against being called multiple times.
*/
const struct arm64_cpu_capabilities *match_list;
+ const struct cpumask *cpus;
};
static inline int cpucap_default_scope(const struct arm64_cpu_capabilities *cap)
@@ -383,6 +409,11 @@ static inline int cpucap_default_scope(const struct arm64_cpu_capabilities *cap)
return cap->type & ARM64_CPUCAP_SCOPE_MASK;
}
+static inline bool cpucap_match_all_early_cpus(const struct arm64_cpu_capabilities *cap)
+{
+ return cap->type & ARM64_CPUCAP_MATCH_ALL_EARLY_CPUS;
+}
+
/*
* Generic helper for handling capabilities with multiple (match,enable) pairs
* of call backs, sharing the same capability bit.
@@ -418,29 +449,31 @@ static __always_inline bool is_hyp_code(void)
return is_vhe_hyp_code() || is_nvhe_hyp_code();
}
-extern DECLARE_BITMAP(cpu_hwcaps, ARM64_NCAPS);
-extern struct static_key_false cpu_hwcap_keys[ARM64_NCAPS];
-extern struct static_key_false arm64_const_caps_ready;
+extern DECLARE_BITMAP(system_cpucaps, ARM64_NCAPS);
-/* ARM64 CAPS + alternative_cb */
-#define ARM64_NPATCHABLE (ARM64_NCAPS + 1)
-extern DECLARE_BITMAP(boot_capabilities, ARM64_NPATCHABLE);
+extern DECLARE_BITMAP(boot_cpucaps, ARM64_NCAPS);
#define for_each_available_cap(cap) \
- for_each_set_bit(cap, cpu_hwcaps, ARM64_NCAPS)
+ for_each_set_bit(cap, system_cpucaps, ARM64_NCAPS)
bool this_cpu_has_cap(unsigned int cap);
void cpu_set_feature(unsigned int num);
bool cpu_have_feature(unsigned int num);
unsigned long cpu_get_elf_hwcap(void);
unsigned long cpu_get_elf_hwcap2(void);
+unsigned long cpu_get_elf_hwcap3(void);
#define cpu_set_named_feature(name) cpu_set_feature(cpu_feature(name))
#define cpu_have_named_feature(name) cpu_have_feature(cpu_feature(name))
+static __always_inline bool boot_capabilities_finalized(void)
+{
+ return alternative_has_cap_likely(ARM64_ALWAYS_BOOT);
+}
+
static __always_inline bool system_capabilities_finalized(void)
{
- return static_branch_likely(&arm64_const_caps_ready);
+ return alternative_has_cap_likely(ARM64_ALWAYS_SYSTEM);
}
/*
@@ -448,75 +481,49 @@ static __always_inline bool system_capabilities_finalized(void)
*
* Before the capability is detected, this returns false.
*/
-static inline bool cpus_have_cap(unsigned int num)
+static __always_inline bool cpus_have_cap(unsigned int num)
{
+ if (__builtin_constant_p(num) && !cpucap_is_possible(num))
+ return false;
if (num >= ARM64_NCAPS)
return false;
- return test_bit(num, cpu_hwcaps);
+ return arch_test_bit(num, system_cpucaps);
}
/*
* Test for a capability without a runtime check.
*
- * Before capabilities are finalized, this returns false.
- * After capabilities are finalized, this is patched to avoid a runtime check.
+ * Before boot capabilities are finalized, this will BUG().
+ * After boot capabilities are finalized, this is patched to avoid a runtime
+ * check.
*
* @num must be a compile-time constant.
*/
-static __always_inline bool __cpus_have_const_cap(int num)
+static __always_inline bool cpus_have_final_boot_cap(int num)
{
- if (num >= ARM64_NCAPS)
- return false;
- return static_branch_unlikely(&cpu_hwcap_keys[num]);
+ if (boot_capabilities_finalized())
+ return alternative_has_cap_unlikely(num);
+ else
+ BUG();
}
/*
* Test for a capability without a runtime check.
*
- * Before capabilities are finalized, this will BUG().
- * After capabilities are finalized, this is patched to avoid a runtime check.
+ * Before system capabilities are finalized, this will BUG().
+ * After system capabilities are finalized, this is patched to avoid a runtime
+ * check.
*
* @num must be a compile-time constant.
*/
static __always_inline bool cpus_have_final_cap(int num)
{
if (system_capabilities_finalized())
- return __cpus_have_const_cap(num);
+ return alternative_has_cap_unlikely(num);
else
BUG();
}
-/*
- * Test for a capability, possibly with a runtime check for non-hyp code.
- *
- * For hyp code, this behaves the same as cpus_have_final_cap().
- *
- * For non-hyp code:
- * Before capabilities are finalized, this behaves as cpus_have_cap().
- * After capabilities are finalized, this is patched to avoid a runtime check.
- *
- * @num must be a compile-time constant.
- */
-static __always_inline bool cpus_have_const_cap(int num)
-{
- if (is_hyp_code())
- return cpus_have_final_cap(num);
- else if (system_capabilities_finalized())
- return __cpus_have_const_cap(num);
- else
- return cpus_have_cap(num);
-}
-
-static inline void cpus_set_cap(unsigned int num)
-{
- if (num >= ARM64_NCAPS) {
- pr_warn("Attempt to set an illegal CPU capability (%d >= %d)\n",
- num, ARM64_NCAPS);
- } else {
- __set_bit(num, cpu_hwcaps);
- }
-}
-
static inline int __attribute_const__
cpuid_feature_extract_signed_field_width(u64 features, int field, int width)
{
@@ -541,29 +548,6 @@ cpuid_feature_extract_unsigned_field(u64 features, int field)
return cpuid_feature_extract_unsigned_field_width(features, field, 4);
}
-/*
- * Fields that identify the version of the Performance Monitors Extension do
- * not follow the standard ID scheme. See ARM DDI 0487E.a page D13-2825,
- * "Alternative ID scheme used for the Performance Monitors Extension version".
- */
-static inline u64 __attribute_const__
-cpuid_feature_cap_perfmon_field(u64 features, int field, u64 cap)
-{
- u64 val = cpuid_feature_extract_unsigned_field(features, field);
- u64 mask = GENMASK_ULL(field + 3, field);
-
- /* Treat IMPLEMENTATION DEFINED functionality as unimplemented */
- if (val == ID_AA64DFR0_PMUVER_IMP_DEF)
- val = 0;
-
- if (val > cap) {
- features &= ~mask;
- features |= (cap << field) & mask;
- }
-
- return features;
-}
-
static inline u64 arm64_ftr_mask(const struct arm64_ftr_bits *ftrp)
{
return (u64)GENMASK(ftrp->shift + ftrp->width - 1, ftrp->shift);
@@ -597,46 +581,56 @@ static inline s64 arm64_ftr_value(const struct arm64_ftr_bits *ftrp, u64 val)
static inline bool id_aa64mmfr0_mixed_endian_el0(u64 mmfr0)
{
- return cpuid_feature_extract_unsigned_field(mmfr0, ID_AA64MMFR0_BIGENDEL_SHIFT) == 0x1 ||
- cpuid_feature_extract_unsigned_field(mmfr0, ID_AA64MMFR0_BIGENDEL0_SHIFT) == 0x1;
+ return cpuid_feature_extract_unsigned_field(mmfr0, ID_AA64MMFR0_EL1_BIGEND_SHIFT) == 0x1 ||
+ cpuid_feature_extract_unsigned_field(mmfr0, ID_AA64MMFR0_EL1_BIGENDEL0_SHIFT) == 0x1;
}
static inline bool id_aa64pfr0_32bit_el1(u64 pfr0)
{
- u32 val = cpuid_feature_extract_unsigned_field(pfr0, ID_AA64PFR0_EL1_SHIFT);
+ u32 val = cpuid_feature_extract_unsigned_field(pfr0, ID_AA64PFR0_EL1_EL1_SHIFT);
- return val == ID_AA64PFR0_ELx_32BIT_64BIT;
+ return val == ID_AA64PFR0_EL1_EL1_AARCH32;
}
static inline bool id_aa64pfr0_32bit_el0(u64 pfr0)
{
- u32 val = cpuid_feature_extract_unsigned_field(pfr0, ID_AA64PFR0_EL0_SHIFT);
+ u32 val = cpuid_feature_extract_unsigned_field(pfr0, ID_AA64PFR0_EL1_EL0_SHIFT);
- return val == ID_AA64PFR0_ELx_32BIT_64BIT;
+ return val == ID_AA64PFR0_EL1_EL0_AARCH32;
}
static inline bool id_aa64pfr0_sve(u64 pfr0)
{
- u32 val = cpuid_feature_extract_unsigned_field(pfr0, ID_AA64PFR0_SVE_SHIFT);
+ u32 val = cpuid_feature_extract_unsigned_field(pfr0, ID_AA64PFR0_EL1_SVE_SHIFT);
return val > 0;
}
static inline bool id_aa64pfr1_sme(u64 pfr1)
{
- u32 val = cpuid_feature_extract_unsigned_field(pfr1, ID_AA64PFR1_SME_SHIFT);
+ u32 val = cpuid_feature_extract_unsigned_field(pfr1, ID_AA64PFR1_EL1_SME_SHIFT);
+
+ return val > 0;
+}
+
+static inline bool id_aa64pfr0_mpam(u64 pfr0)
+{
+ u32 val = cpuid_feature_extract_unsigned_field(pfr0, ID_AA64PFR0_EL1_MPAM_SHIFT);
return val > 0;
}
static inline bool id_aa64pfr1_mte(u64 pfr1)
{
- u32 val = cpuid_feature_extract_unsigned_field(pfr1, ID_AA64PFR1_MTE_SHIFT);
+ u32 val = cpuid_feature_extract_unsigned_field(pfr1, ID_AA64PFR1_EL1_MTE_SHIFT);
- return val >= ID_AA64PFR1_MTE;
+ return val >= ID_AA64PFR1_EL1_MTE_MTE2;
}
-void __init setup_cpu_features(void);
+void __init setup_boot_cpu_features(void);
+void __init setup_system_features(void);
+void __init setup_user_features(void);
+
void check_local_cpu_capabilities(void);
u64 read_sanitised_ftr_reg(u32 id);
@@ -659,7 +653,7 @@ static inline bool supports_csv2p3(int scope)
pfr0 = read_sanitised_ftr_reg(SYS_ID_AA64PFR0_EL1);
csv2_val = cpuid_feature_extract_unsigned_field(pfr0,
- ID_AA64PFR0_CSV2_SHIFT);
+ ID_AA64PFR0_EL1_CSV2_SHIFT);
return csv2_val == 3;
}
@@ -673,10 +667,11 @@ static inline bool supports_clearbhb(int scope)
isar2 = read_sanitised_ftr_reg(SYS_ID_AA64ISAR2_EL1);
return cpuid_feature_extract_unsigned_field(isar2,
- ID_AA64ISAR2_EL1_BC_SHIFT);
+ ID_AA64ISAR2_EL1_CLRBHB_SHIFT);
}
const struct cpumask *system_32bit_el0_cpumask(void);
+const struct cpumask *fallback_32bit_el0_cpumask(void);
DECLARE_STATIC_KEY_FALSE(arm64_mismatched_32bit_el0);
static inline bool system_supports_32bit_el0(void)
@@ -694,10 +689,10 @@ static inline bool system_supports_4kb_granule(void)
mmfr0 = read_sanitised_ftr_reg(SYS_ID_AA64MMFR0_EL1);
val = cpuid_feature_extract_unsigned_field(mmfr0,
- ID_AA64MMFR0_TGRAN4_SHIFT);
+ ID_AA64MMFR0_EL1_TGRAN4_SHIFT);
- return (val >= ID_AA64MMFR0_TGRAN4_SUPPORTED_MIN) &&
- (val <= ID_AA64MMFR0_TGRAN4_SUPPORTED_MAX);
+ return (val >= ID_AA64MMFR0_EL1_TGRAN4_SUPPORTED_MIN) &&
+ (val <= ID_AA64MMFR0_EL1_TGRAN4_SUPPORTED_MAX);
}
static inline bool system_supports_64kb_granule(void)
@@ -707,10 +702,10 @@ static inline bool system_supports_64kb_granule(void)
mmfr0 = read_sanitised_ftr_reg(SYS_ID_AA64MMFR0_EL1);
val = cpuid_feature_extract_unsigned_field(mmfr0,
- ID_AA64MMFR0_TGRAN64_SHIFT);
+ ID_AA64MMFR0_EL1_TGRAN64_SHIFT);
- return (val >= ID_AA64MMFR0_TGRAN64_SUPPORTED_MIN) &&
- (val <= ID_AA64MMFR0_TGRAN64_SUPPORTED_MAX);
+ return (val >= ID_AA64MMFR0_EL1_TGRAN64_SUPPORTED_MIN) &&
+ (val <= ID_AA64MMFR0_EL1_TGRAN64_SUPPORTED_MAX);
}
static inline bool system_supports_16kb_granule(void)
@@ -720,10 +715,10 @@ static inline bool system_supports_16kb_granule(void)
mmfr0 = read_sanitised_ftr_reg(SYS_ID_AA64MMFR0_EL1);
val = cpuid_feature_extract_unsigned_field(mmfr0,
- ID_AA64MMFR0_TGRAN16_SHIFT);
+ ID_AA64MMFR0_EL1_TGRAN16_SHIFT);
- return (val >= ID_AA64MMFR0_TGRAN16_SUPPORTED_MIN) &&
- (val <= ID_AA64MMFR0_TGRAN16_SUPPORTED_MAX);
+ return (val >= ID_AA64MMFR0_EL1_TGRAN16_SUPPORTED_MIN) &&
+ (val <= ID_AA64MMFR0_EL1_TGRAN16_SUPPORTED_MAX);
}
static inline bool system_supports_mixed_endian_el0(void)
@@ -738,20 +733,19 @@ static inline bool system_supports_mixed_endian(void)
mmfr0 = read_sanitised_ftr_reg(SYS_ID_AA64MMFR0_EL1);
val = cpuid_feature_extract_unsigned_field(mmfr0,
- ID_AA64MMFR0_BIGENDEL_SHIFT);
+ ID_AA64MMFR0_EL1_BIGEND_SHIFT);
return val == 0x1;
}
static __always_inline bool system_supports_fpsimd(void)
{
- return !cpus_have_const_cap(ARM64_HAS_NO_FPSIMD);
+ return alternative_has_cap_likely(ARM64_HAS_FPSIMD);
}
static inline bool system_uses_hw_pan(void)
{
- return IS_ENABLED(CONFIG_ARM64_PAN) &&
- cpus_have_const_cap(ARM64_HAS_PAN);
+ return alternative_has_cap_unlikely(ARM64_HAS_PAN);
}
static inline bool system_uses_ttbr0_pan(void)
@@ -762,20 +756,22 @@ static inline bool system_uses_ttbr0_pan(void)
static __always_inline bool system_supports_sve(void)
{
- return IS_ENABLED(CONFIG_ARM64_SVE) &&
- cpus_have_const_cap(ARM64_SVE);
+ return alternative_has_cap_unlikely(ARM64_SVE);
}
static __always_inline bool system_supports_sme(void)
{
- return IS_ENABLED(CONFIG_ARM64_SME) &&
- cpus_have_const_cap(ARM64_SME);
+ return alternative_has_cap_unlikely(ARM64_SME);
+}
+
+static __always_inline bool system_supports_sme2(void)
+{
+ return alternative_has_cap_unlikely(ARM64_SME2);
}
static __always_inline bool system_supports_fa64(void)
{
- return IS_ENABLED(CONFIG_ARM64_SME) &&
- cpus_have_const_cap(ARM64_SME_FA64);
+ return alternative_has_cap_unlikely(ARM64_SME_FA64);
}
static __always_inline bool system_supports_tpidr2(void)
@@ -783,22 +779,24 @@ static __always_inline bool system_supports_tpidr2(void)
return system_supports_sme();
}
+static __always_inline bool system_supports_fpmr(void)
+{
+ return alternative_has_cap_unlikely(ARM64_HAS_FPMR);
+}
+
static __always_inline bool system_supports_cnp(void)
{
- return IS_ENABLED(CONFIG_ARM64_CNP) &&
- cpus_have_const_cap(ARM64_HAS_CNP);
+ return alternative_has_cap_unlikely(ARM64_HAS_CNP);
}
static inline bool system_supports_address_auth(void)
{
- return IS_ENABLED(CONFIG_ARM64_PTR_AUTH) &&
- cpus_have_const_cap(ARM64_HAS_ADDRESS_AUTH);
+ return cpus_have_final_boot_cap(ARM64_HAS_ADDRESS_AUTH);
}
static inline bool system_supports_generic_auth(void)
{
- return IS_ENABLED(CONFIG_ARM64_PTR_AUTH) &&
- cpus_have_const_cap(ARM64_HAS_GENERIC_AUTH);
+ return alternative_has_cap_unlikely(ARM64_HAS_GENERIC_AUTH);
}
static inline bool system_has_full_ptr_auth(void)
@@ -808,14 +806,12 @@ static inline bool system_has_full_ptr_auth(void)
static __always_inline bool system_uses_irq_prio_masking(void)
{
- return IS_ENABLED(CONFIG_ARM64_PSEUDO_NMI) &&
- cpus_have_const_cap(ARM64_HAS_IRQ_PRIO_MASKING);
+ return alternative_has_cap_unlikely(ARM64_HAS_GIC_PRIO_MASKING);
}
static inline bool system_supports_mte(void)
{
- return IS_ENABLED(CONFIG_ARM64_MTE) &&
- cpus_have_const_cap(ARM64_MTE);
+ return alternative_has_cap_unlikely(ARM64_MTE);
}
static inline bool system_has_prio_mask_debugging(void)
@@ -826,27 +822,73 @@ static inline bool system_has_prio_mask_debugging(void)
static inline bool system_supports_bti(void)
{
- return IS_ENABLED(CONFIG_ARM64_BTI) && cpus_have_const_cap(ARM64_BTI);
+ return cpus_have_final_cap(ARM64_BTI);
+}
+
+static inline bool system_supports_bti_kernel(void)
+{
+ return IS_ENABLED(CONFIG_ARM64_BTI_KERNEL) &&
+ cpus_have_final_boot_cap(ARM64_BTI);
}
static inline bool system_supports_tlb_range(void)
{
- return IS_ENABLED(CONFIG_ARM64_TLB_RANGE) &&
- cpus_have_const_cap(ARM64_HAS_TLB_RANGE);
+ return alternative_has_cap_unlikely(ARM64_HAS_TLB_RANGE);
+}
+
+static inline bool system_supports_lpa2(void)
+{
+ return cpus_have_final_cap(ARM64_HAS_LPA2);
+}
+
+static inline bool system_supports_poe(void)
+{
+ return alternative_has_cap_unlikely(ARM64_HAS_S1POE);
+}
+
+static inline bool system_supports_gcs(void)
+{
+ return alternative_has_cap_unlikely(ARM64_HAS_GCS);
+}
+
+static inline bool system_supports_haft(void)
+{
+ return cpus_have_final_cap(ARM64_HAFT);
+}
+
+static __always_inline bool system_supports_mpam(void)
+{
+ return alternative_has_cap_unlikely(ARM64_MPAM);
+}
+
+static __always_inline bool system_supports_mpam_hcr(void)
+{
+ return alternative_has_cap_unlikely(ARM64_MPAM_HCR);
}
-extern int do_emulate_mrs(struct pt_regs *regs, u32 sys_reg, u32 rt);
+static inline bool system_supports_pmuv3(void)
+{
+ return cpus_have_final_cap(ARM64_HAS_PMUV3);
+}
+
+static inline bool system_supports_bbml2_noabort(void)
+{
+ return alternative_has_cap_unlikely(ARM64_HAS_BBML2_NOABORT);
+}
+
+int do_emulate_mrs(struct pt_regs *regs, u32 sys_reg, u32 rt);
+bool try_emulate_mrs(struct pt_regs *regs, u32 isn);
static inline u32 id_aa64mmfr0_parange_to_phys_shift(int parange)
{
switch (parange) {
- case ID_AA64MMFR0_PARANGE_32: return 32;
- case ID_AA64MMFR0_PARANGE_36: return 36;
- case ID_AA64MMFR0_PARANGE_40: return 40;
- case ID_AA64MMFR0_PARANGE_42: return 42;
- case ID_AA64MMFR0_PARANGE_44: return 44;
- case ID_AA64MMFR0_PARANGE_48: return 48;
- case ID_AA64MMFR0_PARANGE_52: return 52;
+ case ID_AA64MMFR0_EL1_PARANGE_32: return 32;
+ case ID_AA64MMFR0_EL1_PARANGE_36: return 36;
+ case ID_AA64MMFR0_EL1_PARANGE_40: return 40;
+ case ID_AA64MMFR0_EL1_PARANGE_42: return 42;
+ case ID_AA64MMFR0_EL1_PARANGE_44: return 44;
+ case ID_AA64MMFR0_EL1_PARANGE_48: return 48;
+ case ID_AA64MMFR0_EL1_PARANGE_52: return 52;
/*
* A future PE could use a value unknown to the kernel.
* However, by the "D10.1.4 Principles of the ID scheme
@@ -866,16 +908,20 @@ static inline bool cpu_has_hw_af(void)
if (!IS_ENABLED(CONFIG_ARM64_HW_AFDBM))
return false;
- mmfr1 = read_cpuid(ID_AA64MMFR1_EL1);
+ /*
+ * Use cached version to avoid emulated msr operation on KVM
+ * guests.
+ */
+ mmfr1 = read_sanitised_ftr_reg(SYS_ID_AA64MMFR1_EL1);
return cpuid_feature_extract_unsigned_field(mmfr1,
- ID_AA64MMFR1_HADBS_SHIFT);
+ ID_AA64MMFR1_EL1_HAFDBS_SHIFT);
}
static inline bool cpu_has_pan(void)
{
u64 mmfr1 = read_cpuid(ID_AA64MMFR1_EL1);
return cpuid_feature_extract_unsigned_field(mmfr1,
- ID_AA64MMFR1_PAN_SHIFT);
+ ID_AA64MMFR1_EL1_PAN_SHIFT);
}
#ifdef CONFIG_ARM64_AMU_EXTN
@@ -896,8 +942,8 @@ static inline unsigned int get_vmid_bits(u64 mmfr1)
int vmid_bits;
vmid_bits = cpuid_feature_extract_unsigned_field(mmfr1,
- ID_AA64MMFR1_VMIDBITS_SHIFT);
- if (vmid_bits == ID_AA64MMFR1_VMIDBITS_16)
+ ID_AA64MMFR1_EL1_VMIDBits_SHIFT);
+ if (vmid_bits == ID_AA64MMFR1_EL1_VMIDBits_16)
return 16;
/*
@@ -907,7 +953,12 @@ static inline unsigned int get_vmid_bits(u64 mmfr1)
return 8;
}
+s64 arm64_ftr_safe_value(const struct arm64_ftr_bits *ftrp, s64 new, s64 cur);
+struct arm64_ftr_reg *get_arm64_ftr_reg(u32 sys_id);
+
+extern struct arm64_ftr_override id_aa64mmfr0_override;
extern struct arm64_ftr_override id_aa64mmfr1_override;
+extern struct arm64_ftr_override id_aa64mmfr2_override;
extern struct arm64_ftr_override id_aa64pfr0_override;
extern struct arm64_ftr_override id_aa64pfr1_override;
extern struct arm64_ftr_override id_aa64zfr0_override;
@@ -915,9 +966,116 @@ extern struct arm64_ftr_override id_aa64smfr0_override;
extern struct arm64_ftr_override id_aa64isar1_override;
extern struct arm64_ftr_override id_aa64isar2_override;
+extern struct arm64_ftr_override arm64_sw_feature_override;
+
+static inline
+u64 arm64_apply_feature_override(u64 val, int feat, int width,
+ const struct arm64_ftr_override *override)
+{
+ u64 oval = override->val;
+
+ /*
+ * When it encounters an invalid override (e.g., an override that
+ * cannot be honoured due to a missing CPU feature), the early idreg
+ * override code will set the mask to 0x0 and the value to non-zero for
+ * the field in question. In order to determine whether the override is
+ * valid or not for the field we are interested in, we first need to
+ * disregard bits belonging to other fields.
+ */
+ oval &= GENMASK_ULL(feat + width - 1, feat);
+
+ /*
+ * The override is valid if all value bits are accounted for in the
+ * mask. If so, replace the masked bits with the override value.
+ */
+ if (oval == (oval & override->mask)) {
+ val &= ~override->mask;
+ val |= oval;
+ }
+
+ /* Extract the field from the updated value */
+ return cpuid_feature_extract_unsigned_field(val, feat);
+}
+
+static inline bool arm64_test_sw_feature_override(int feat)
+{
+ /*
+ * Software features are pseudo CPU features that have no underlying
+ * CPUID system register value to apply the override to.
+ */
+ return arm64_apply_feature_override(0, feat, 4,
+ &arm64_sw_feature_override);
+}
+
+static inline bool kaslr_disabled_cmdline(void)
+{
+ return arm64_test_sw_feature_override(ARM64_SW_FEATURE_OVERRIDE_NOKASLR);
+}
+
u32 get_kvm_ipa_limit(void);
void dump_cpu_features(void);
+static inline bool cpu_has_bti(void)
+{
+ if (!IS_ENABLED(CONFIG_ARM64_BTI))
+ return false;
+
+ return arm64_apply_feature_override(read_cpuid(ID_AA64PFR1_EL1),
+ ID_AA64PFR1_EL1_BT_SHIFT, 4,
+ &id_aa64pfr1_override);
+}
+
+static inline bool cpu_has_pac(void)
+{
+ u64 isar1, isar2;
+
+ if (!IS_ENABLED(CONFIG_ARM64_PTR_AUTH))
+ return false;
+
+ isar1 = read_cpuid(ID_AA64ISAR1_EL1);
+ isar2 = read_cpuid(ID_AA64ISAR2_EL1);
+
+ if (arm64_apply_feature_override(isar1, ID_AA64ISAR1_EL1_APA_SHIFT, 4,
+ &id_aa64isar1_override))
+ return true;
+
+ if (arm64_apply_feature_override(isar1, ID_AA64ISAR1_EL1_API_SHIFT, 4,
+ &id_aa64isar1_override))
+ return true;
+
+ return arm64_apply_feature_override(isar2, ID_AA64ISAR2_EL1_APA3_SHIFT, 4,
+ &id_aa64isar2_override);
+}
+
+static inline bool cpu_has_lva(void)
+{
+ u64 mmfr2;
+
+ mmfr2 = read_sysreg_s(SYS_ID_AA64MMFR2_EL1);
+ mmfr2 &= ~id_aa64mmfr2_override.mask;
+ mmfr2 |= id_aa64mmfr2_override.val;
+ return cpuid_feature_extract_unsigned_field(mmfr2,
+ ID_AA64MMFR2_EL1_VARange_SHIFT);
+}
+
+static inline bool cpu_has_lpa2(void)
+{
+#ifdef CONFIG_ARM64_LPA2
+ u64 mmfr0;
+ int feat;
+
+ mmfr0 = read_sysreg(id_aa64mmfr0_el1);
+ mmfr0 &= ~id_aa64mmfr0_override.mask;
+ mmfr0 |= id_aa64mmfr0_override.val;
+ feat = cpuid_feature_extract_signed_field(mmfr0,
+ ID_AA64MMFR0_EL1_TGRAN_SHIFT);
+
+ return feat >= ID_AA64MMFR0_EL1_TGRAN_LPA2;
+#else
+ return false;
+#endif
+}
+
#endif /* __ASSEMBLY__ */
#endif
diff --git a/arch/arm64/include/asm/cputype.h b/arch/arm64/include/asm/cputype.h
index 8aa0d276a636..661735616787 100644
--- a/arch/arm64/include/asm/cputype.h
+++ b/arch/arm64/include/asm/cputype.h
@@ -41,7 +41,7 @@
(((midr) & MIDR_IMPLEMENTOR_MASK) >> MIDR_IMPLEMENTOR_SHIFT)
#define MIDR_CPU_MODEL(imp, partnum) \
- (((imp) << MIDR_IMPLEMENTOR_SHIFT) | \
+ ((_AT(u32, imp) << MIDR_IMPLEMENTOR_SHIFT) | \
(0xf << MIDR_ARCHITECTURE_SHIFT) | \
((partnum) << MIDR_PARTNUM_SHIFT))
@@ -60,6 +60,8 @@
#define ARM_CPU_IMP_FUJITSU 0x46
#define ARM_CPU_IMP_HISI 0x48
#define ARM_CPU_IMP_APPLE 0x61
+#define ARM_CPU_IMP_AMPERE 0xC0
+#define ARM_CPU_IMP_MICROSOFT 0x6D
#define ARM_CPU_PART_AEM_V8 0xD0F
#define ARM_CPU_PART_FOUNDATION 0xD00
@@ -73,17 +75,31 @@
#define ARM_CPU_PART_CORTEX_A76 0xD0B
#define ARM_CPU_PART_NEOVERSE_N1 0xD0C
#define ARM_CPU_PART_CORTEX_A77 0xD0D
+#define ARM_CPU_PART_CORTEX_A76AE 0xD0E
#define ARM_CPU_PART_NEOVERSE_V1 0xD40
#define ARM_CPU_PART_CORTEX_A78 0xD41
#define ARM_CPU_PART_CORTEX_A78AE 0xD42
#define ARM_CPU_PART_CORTEX_X1 0xD44
#define ARM_CPU_PART_CORTEX_A510 0xD46
+#define ARM_CPU_PART_CORTEX_X1C 0xD4C
+#define ARM_CPU_PART_CORTEX_A520 0xD80
#define ARM_CPU_PART_CORTEX_A710 0xD47
+#define ARM_CPU_PART_CORTEX_A715 0xD4D
#define ARM_CPU_PART_CORTEX_X2 0xD48
#define ARM_CPU_PART_NEOVERSE_N2 0xD49
#define ARM_CPU_PART_CORTEX_A78C 0xD4B
-
-#define APM_CPU_PART_POTENZA 0x000
+#define ARM_CPU_PART_CORTEX_X1C 0xD4C
+#define ARM_CPU_PART_CORTEX_X3 0xD4E
+#define ARM_CPU_PART_NEOVERSE_V2 0xD4F
+#define ARM_CPU_PART_CORTEX_A720 0xD81
+#define ARM_CPU_PART_CORTEX_X4 0xD82
+#define ARM_CPU_PART_NEOVERSE_V3 0xD84
+#define ARM_CPU_PART_CORTEX_X925 0xD85
+#define ARM_CPU_PART_CORTEX_A725 0xD87
+#define ARM_CPU_PART_NEOVERSE_N3 0xD8E
+
+#define APM_CPU_PART_XGENE 0x000
+#define APM_CPU_VAR_POTENZA 0x00
#define CAVIUM_CPU_PART_THUNDERX 0x0A1
#define CAVIUM_CPU_PART_THUNDERX_81XX 0x0A2
@@ -105,9 +121,11 @@
#define QCOM_CPU_PART_KRYO 0x200
#define QCOM_CPU_PART_KRYO_2XX_GOLD 0x800
#define QCOM_CPU_PART_KRYO_2XX_SILVER 0x801
+#define QCOM_CPU_PART_KRYO_3XX_GOLD 0x802
#define QCOM_CPU_PART_KRYO_3XX_SILVER 0x803
#define QCOM_CPU_PART_KRYO_4XX_GOLD 0x804
#define QCOM_CPU_PART_KRYO_4XX_SILVER 0x805
+#define QCOM_CPU_PART_ORYON_X1 0x001
#define NVIDIA_CPU_PART_DENVER 0x003
#define NVIDIA_CPU_PART_CARMEL 0x004
@@ -115,6 +133,8 @@
#define FUJITSU_CPU_PART_A64FX 0x001
#define HISI_CPU_PART_TSV110 0xD01
+#define HISI_CPU_PART_HIP09 0xD02
+#define HISI_CPU_PART_HIP12 0xD06
#define APPLE_CPU_PART_M1_ICESTORM 0x022
#define APPLE_CPU_PART_M1_FIRESTORM 0x023
@@ -122,6 +142,17 @@
#define APPLE_CPU_PART_M1_FIRESTORM_PRO 0x025
#define APPLE_CPU_PART_M1_ICESTORM_MAX 0x028
#define APPLE_CPU_PART_M1_FIRESTORM_MAX 0x029
+#define APPLE_CPU_PART_M2_BLIZZARD 0x032
+#define APPLE_CPU_PART_M2_AVALANCHE 0x033
+#define APPLE_CPU_PART_M2_BLIZZARD_PRO 0x034
+#define APPLE_CPU_PART_M2_AVALANCHE_PRO 0x035
+#define APPLE_CPU_PART_M2_BLIZZARD_MAX 0x038
+#define APPLE_CPU_PART_M2_AVALANCHE_MAX 0x039
+
+#define AMPERE_CPU_PART_AMPERE1 0xAC3
+#define AMPERE_CPU_PART_AMPERE1A 0xAC4
+
+#define MICROSOFT_CPU_PART_AZURE_COBALT_100 0xD49 /* Based on r0p0 of ARM Neoverse N2 */
#define MIDR_CORTEX_A53 MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_CORTEX_A53)
#define MIDR_CORTEX_A57 MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_CORTEX_A57)
@@ -133,15 +164,28 @@
#define MIDR_CORTEX_A76 MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_CORTEX_A76)
#define MIDR_NEOVERSE_N1 MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_NEOVERSE_N1)
#define MIDR_CORTEX_A77 MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_CORTEX_A77)
+#define MIDR_CORTEX_A76AE MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_CORTEX_A76AE)
#define MIDR_NEOVERSE_V1 MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_NEOVERSE_V1)
#define MIDR_CORTEX_A78 MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_CORTEX_A78)
#define MIDR_CORTEX_A78AE MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_CORTEX_A78AE)
#define MIDR_CORTEX_X1 MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_CORTEX_X1)
#define MIDR_CORTEX_A510 MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_CORTEX_A510)
+#define MIDR_CORTEX_X1C MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_CORTEX_X1C)
+#define MIDR_CORTEX_A520 MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_CORTEX_A520)
#define MIDR_CORTEX_A710 MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_CORTEX_A710)
+#define MIDR_CORTEX_A715 MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_CORTEX_A715)
#define MIDR_CORTEX_X2 MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_CORTEX_X2)
#define MIDR_NEOVERSE_N2 MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_NEOVERSE_N2)
#define MIDR_CORTEX_A78C MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_CORTEX_A78C)
+#define MIDR_CORTEX_X1C MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_CORTEX_X1C)
+#define MIDR_CORTEX_X3 MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_CORTEX_X3)
+#define MIDR_NEOVERSE_V2 MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_NEOVERSE_V2)
+#define MIDR_CORTEX_A720 MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_CORTEX_A720)
+#define MIDR_CORTEX_X4 MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_CORTEX_X4)
+#define MIDR_NEOVERSE_V3 MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_NEOVERSE_V3)
+#define MIDR_CORTEX_X925 MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_CORTEX_X925)
+#define MIDR_CORTEX_A725 MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_CORTEX_A725)
+#define MIDR_NEOVERSE_N3 MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_NEOVERSE_N3)
#define MIDR_THUNDERX MIDR_CPU_MODEL(ARM_CPU_IMP_CAVIUM, CAVIUM_CPU_PART_THUNDERX)
#define MIDR_THUNDERX_81XX MIDR_CPU_MODEL(ARM_CPU_IMP_CAVIUM, CAVIUM_CPU_PART_THUNDERX_81XX)
#define MIDR_THUNDERX_83XX MIDR_CPU_MODEL(ARM_CPU_IMP_CAVIUM, CAVIUM_CPU_PART_THUNDERX_83XX)
@@ -159,19 +203,42 @@
#define MIDR_QCOM_KRYO MIDR_CPU_MODEL(ARM_CPU_IMP_QCOM, QCOM_CPU_PART_KRYO)
#define MIDR_QCOM_KRYO_2XX_GOLD MIDR_CPU_MODEL(ARM_CPU_IMP_QCOM, QCOM_CPU_PART_KRYO_2XX_GOLD)
#define MIDR_QCOM_KRYO_2XX_SILVER MIDR_CPU_MODEL(ARM_CPU_IMP_QCOM, QCOM_CPU_PART_KRYO_2XX_SILVER)
+#define MIDR_QCOM_KRYO_3XX_GOLD MIDR_CPU_MODEL(ARM_CPU_IMP_QCOM, QCOM_CPU_PART_KRYO_3XX_GOLD)
#define MIDR_QCOM_KRYO_3XX_SILVER MIDR_CPU_MODEL(ARM_CPU_IMP_QCOM, QCOM_CPU_PART_KRYO_3XX_SILVER)
#define MIDR_QCOM_KRYO_4XX_GOLD MIDR_CPU_MODEL(ARM_CPU_IMP_QCOM, QCOM_CPU_PART_KRYO_4XX_GOLD)
#define MIDR_QCOM_KRYO_4XX_SILVER MIDR_CPU_MODEL(ARM_CPU_IMP_QCOM, QCOM_CPU_PART_KRYO_4XX_SILVER)
+#define MIDR_QCOM_ORYON_X1 MIDR_CPU_MODEL(ARM_CPU_IMP_QCOM, QCOM_CPU_PART_ORYON_X1)
+
+/*
+ * NOTES:
+ * - Qualcomm Kryo 5XX Prime / Gold ID themselves as MIDR_CORTEX_A77
+ * - Qualcomm Kryo 5XX Silver IDs itself as MIDR_QCOM_KRYO_4XX_SILVER
+ * - Qualcomm Kryo 6XX Prime IDs itself as MIDR_CORTEX_X1
+ * - Qualcomm Kryo 6XX Gold IDs itself as ARM_CPU_PART_CORTEX_A78
+ * - Qualcomm Kryo 6XX Silver IDs itself as MIDR_CORTEX_A55
+ */
+
#define MIDR_NVIDIA_DENVER MIDR_CPU_MODEL(ARM_CPU_IMP_NVIDIA, NVIDIA_CPU_PART_DENVER)
#define MIDR_NVIDIA_CARMEL MIDR_CPU_MODEL(ARM_CPU_IMP_NVIDIA, NVIDIA_CPU_PART_CARMEL)
#define MIDR_FUJITSU_A64FX MIDR_CPU_MODEL(ARM_CPU_IMP_FUJITSU, FUJITSU_CPU_PART_A64FX)
#define MIDR_HISI_TSV110 MIDR_CPU_MODEL(ARM_CPU_IMP_HISI, HISI_CPU_PART_TSV110)
+#define MIDR_HISI_HIP09 MIDR_CPU_MODEL(ARM_CPU_IMP_HISI, HISI_CPU_PART_HIP09)
+#define MIDR_HISI_HIP12 MIDR_CPU_MODEL(ARM_CPU_IMP_HISI, HISI_CPU_PART_HIP12)
#define MIDR_APPLE_M1_ICESTORM MIDR_CPU_MODEL(ARM_CPU_IMP_APPLE, APPLE_CPU_PART_M1_ICESTORM)
#define MIDR_APPLE_M1_FIRESTORM MIDR_CPU_MODEL(ARM_CPU_IMP_APPLE, APPLE_CPU_PART_M1_FIRESTORM)
#define MIDR_APPLE_M1_ICESTORM_PRO MIDR_CPU_MODEL(ARM_CPU_IMP_APPLE, APPLE_CPU_PART_M1_ICESTORM_PRO)
#define MIDR_APPLE_M1_FIRESTORM_PRO MIDR_CPU_MODEL(ARM_CPU_IMP_APPLE, APPLE_CPU_PART_M1_FIRESTORM_PRO)
#define MIDR_APPLE_M1_ICESTORM_MAX MIDR_CPU_MODEL(ARM_CPU_IMP_APPLE, APPLE_CPU_PART_M1_ICESTORM_MAX)
#define MIDR_APPLE_M1_FIRESTORM_MAX MIDR_CPU_MODEL(ARM_CPU_IMP_APPLE, APPLE_CPU_PART_M1_FIRESTORM_MAX)
+#define MIDR_APPLE_M2_BLIZZARD MIDR_CPU_MODEL(ARM_CPU_IMP_APPLE, APPLE_CPU_PART_M2_BLIZZARD)
+#define MIDR_APPLE_M2_AVALANCHE MIDR_CPU_MODEL(ARM_CPU_IMP_APPLE, APPLE_CPU_PART_M2_AVALANCHE)
+#define MIDR_APPLE_M2_BLIZZARD_PRO MIDR_CPU_MODEL(ARM_CPU_IMP_APPLE, APPLE_CPU_PART_M2_BLIZZARD_PRO)
+#define MIDR_APPLE_M2_AVALANCHE_PRO MIDR_CPU_MODEL(ARM_CPU_IMP_APPLE, APPLE_CPU_PART_M2_AVALANCHE_PRO)
+#define MIDR_APPLE_M2_BLIZZARD_MAX MIDR_CPU_MODEL(ARM_CPU_IMP_APPLE, APPLE_CPU_PART_M2_BLIZZARD_MAX)
+#define MIDR_APPLE_M2_AVALANCHE_MAX MIDR_CPU_MODEL(ARM_CPU_IMP_APPLE, APPLE_CPU_PART_M2_AVALANCHE_MAX)
+#define MIDR_AMPERE1 MIDR_CPU_MODEL(ARM_CPU_IMP_AMPERE, AMPERE_CPU_PART_AMPERE1)
+#define MIDR_AMPERE1A MIDR_CPU_MODEL(ARM_CPU_IMP_AMPERE, AMPERE_CPU_PART_AMPERE1A)
+#define MIDR_MICROSOFT_AZURE_COBALT_100 MIDR_CPU_MODEL(ARM_CPU_IMP_MICROSOFT, MICROSOFT_CPU_PART_AZURE_COBALT_100)
/* Fujitsu Erratum 010001 affects A64FX 1.0 and 1.1, (v0r0 and v1r0) */
#define MIDR_FUJITSU_ERRATUM_010001 MIDR_FUJITSU_A64FX
@@ -185,6 +252,16 @@
#define read_cpuid(reg) read_sysreg_s(SYS_ ## reg)
/*
+ * The CPU ID never changes at run time, so we might as well tell the
+ * compiler that it's constant. Use this function to read the CPU ID
+ * rather than directly reading processor_id or read_cpuid() directly.
+ */
+static inline u32 __attribute_const__ read_cpuid_id(void)
+{
+ return read_cpuid(MIDR_EL1);
+}
+
+/*
* Represent a range of MIDR values for a given CPU model and a
* range of variant/revision values.
*
@@ -219,30 +296,14 @@ static inline bool midr_is_cpu_model_range(u32 midr, u32 model, u32 rv_min,
return _model == model && rv >= rv_min && rv <= rv_max;
}
-static inline bool is_midr_in_range(u32 midr, struct midr_range const *range)
-{
- return midr_is_cpu_model_range(midr, range->model,
- range->rv_min, range->rv_max);
-}
-
-static inline bool
-is_midr_in_range_list(u32 midr, struct midr_range const *ranges)
-{
- while (ranges->model)
- if (is_midr_in_range(midr, ranges++))
- return true;
- return false;
-}
+struct target_impl_cpu {
+ u64 midr;
+ u64 revidr;
+ u64 aidr;
+};
-/*
- * The CPU ID never changes at run time, so we might as well tell the
- * compiler that it's constant. Use this function to read the CPU ID
- * rather than directly reading processor_id or read_cpuid() directly.
- */
-static inline u32 __attribute_const__ read_cpuid_id(void)
-{
- return read_cpuid(MIDR_EL1);
-}
+bool cpu_errata_set_target_impl(u64 num, void *impl_cpus);
+bool is_midr_in_range_list(struct midr_range const *ranges);
static inline u64 __attribute_const__ read_cpuid_mpidr(void)
{
diff --git a/arch/arm64/include/asm/crash_reserve.h b/arch/arm64/include/asm/crash_reserve.h
new file mode 100644
index 000000000000..4afe027a4e7b
--- /dev/null
+++ b/arch/arm64/include/asm/crash_reserve.h
@@ -0,0 +1,10 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+#ifndef _ARM64_CRASH_RESERVE_H
+#define _ARM64_CRASH_RESERVE_H
+
+/* Current arm64 boot protocol requires 2MB alignment */
+#define CRASH_ALIGN SZ_2M
+
+#define CRASH_ADDR_LOW_MAX arm64_dma_phys_limit
+#define CRASH_ADDR_HIGH_MAX (PHYS_MASK + 1)
+#endif
diff --git a/arch/arm64/include/asm/daifflags.h b/arch/arm64/include/asm/daifflags.h
index 55f57dfa8e2f..fbb5c99eb2f9 100644
--- a/arch/arm64/include/asm/daifflags.h
+++ b/arch/arm64/include/asm/daifflags.h
@@ -132,7 +132,7 @@ static inline void local_daif_inherit(struct pt_regs *regs)
trace_hardirqs_on();
if (system_uses_irq_prio_masking())
- gic_write_pmr(regs->pmr_save);
+ gic_write_pmr(regs->pmr);
/*
* We can't use local_daif_restore(regs->pstate) here as
diff --git a/arch/arm64/include/asm/debug-monitors.h b/arch/arm64/include/asm/debug-monitors.h
index 7b7e05c02691..f5e3ed2420ce 100644
--- a/arch/arm64/include/asm/debug-monitors.h
+++ b/arch/arm64/include/asm/debug-monitors.h
@@ -13,14 +13,8 @@
#include <asm/ptrace.h>
/* Low-level stepping controls. */
-#define DBG_MDSCR_SS (1 << 0)
#define DBG_SPSR_SS (1 << 21)
-/* MDSCR_EL1 enabling bits */
-#define DBG_MDSCR_KDE (1 << 13)
-#define DBG_MDSCR_MDE (1 << 15)
-#define DBG_MDSCR_MASK ~(DBG_MDSCR_KDE | DBG_MDSCR_MDE)
-
#define DBG_ESR_EVT(x) (((x) >> 27) & 0x7)
/* AArch64 */
@@ -62,30 +56,6 @@ struct task_struct;
#define DBG_HOOK_HANDLED 0
#define DBG_HOOK_ERROR 1
-struct step_hook {
- struct list_head node;
- int (*fn)(struct pt_regs *regs, unsigned long esr);
-};
-
-void register_user_step_hook(struct step_hook *hook);
-void unregister_user_step_hook(struct step_hook *hook);
-
-void register_kernel_step_hook(struct step_hook *hook);
-void unregister_kernel_step_hook(struct step_hook *hook);
-
-struct break_hook {
- struct list_head node;
- int (*fn)(struct pt_regs *regs, unsigned long esr);
- u16 imm;
- u16 mask; /* These bits are ignored when comparing with imm */
-};
-
-void register_user_break_hook(struct break_hook *hook);
-void unregister_user_break_hook(struct break_hook *hook);
-
-void register_kernel_break_hook(struct break_hook *hook);
-void unregister_kernel_break_hook(struct break_hook *hook);
-
u8 debug_monitors_arch(void);
enum dbg_active_el {
@@ -104,19 +74,19 @@ void user_regs_reset_single_step(struct user_pt_regs *regs,
void kernel_enable_single_step(struct pt_regs *regs);
void kernel_disable_single_step(void);
int kernel_active_single_step(void);
+void kernel_rewind_single_step(struct pt_regs *regs);
+void kernel_fastforward_single_step(struct pt_regs *regs);
#ifdef CONFIG_HAVE_HW_BREAKPOINT
-int reinstall_suspended_bps(struct pt_regs *regs);
+bool try_step_suspended_breakpoints(struct pt_regs *regs);
#else
-static inline int reinstall_suspended_bps(struct pt_regs *regs)
+static inline bool try_step_suspended_breakpoints(struct pt_regs *regs)
{
- return -ENODEV;
+ return false;
}
#endif
-int aarch32_break_handler(struct pt_regs *regs);
-
-void debug_traps_init(void);
+bool try_handle_aarch32_break(struct pt_regs *regs);
#endif /* __ASSEMBLY */
#endif /* __ASM_DEBUG_MONITORS_H */
diff --git a/arch/arm64/include/asm/efi.h b/arch/arm64/include/asm/efi.h
index 439e2bc5d5d8..bcd5622aa096 100644
--- a/arch/arm64/include/asm/efi.h
+++ b/arch/arm64/include/asm/efi.h
@@ -14,31 +14,40 @@
#ifdef CONFIG_EFI
extern void efi_init(void);
+
+bool efi_runtime_fixup_exception(struct pt_regs *regs, const char *msg);
#else
#define efi_init()
+
+static inline
+bool efi_runtime_fixup_exception(struct pt_regs *regs, const char *msg)
+{
+ return false;
+}
#endif
int efi_create_mapping(struct mm_struct *mm, efi_memory_desc_t *md);
-int efi_set_mapping_permissions(struct mm_struct *mm, efi_memory_desc_t *md);
-
-#define arch_efi_call_virt_setup() \
-({ \
- efi_virtmap_load(); \
- __efi_fpsimd_begin(); \
-})
+int efi_set_mapping_permissions(struct mm_struct *mm, efi_memory_desc_t *md,
+ bool has_bti);
#undef arch_efi_call_virt
#define arch_efi_call_virt(p, f, args...) \
__efi_rt_asm_wrapper((p)->f, #f, args)
-#define arch_efi_call_virt_teardown() \
-({ \
- __efi_fpsimd_end(); \
- efi_virtmap_unload(); \
-})
-
+extern u64 *efi_rt_stack_top;
efi_status_t __efi_rt_asm_wrapper(void *, const char *, ...);
+void arch_efi_call_virt_setup(void);
+void arch_efi_call_virt_teardown(void);
+
+/*
+ * efi_rt_stack_top[-1] contains the value the stack pointer had before
+ * switching to the EFI runtime stack.
+ */
+#define current_in_efi() \
+ (!preemptible() && efi_rt_stack_top != NULL && \
+ on_task_stack(current, READ_ONCE(efi_rt_stack_top[-1]), 1))
+
#define ARCH_EFI_IRQ_FLAGS_MASK (PSR_D_BIT | PSR_A_BIT | PSR_I_BIT | PSR_F_BIT)
/*
@@ -67,7 +76,7 @@ efi_status_t __efi_rt_asm_wrapper(void *, const char *, ...);
* guaranteed to cover the kernel Image.
*
* Since the EFI stub is part of the kernel Image, we can relax the
- * usual requirements in Documentation/arm64/booting.rst, which still
+ * usual requirements in Documentation/arch/arm64/booting.rst, which still
* apply to other bootloaders, and are required for some kernel
* configurations.
*/
@@ -76,13 +85,25 @@ static inline unsigned long efi_get_max_initrd_addr(unsigned long image_addr)
return (image_addr & ~(SZ_1G - 1UL)) + (1UL << (VA_BITS_MIN - 1));
}
-#define alloc_screen_info(x...) &screen_info
-
-static inline void free_screen_info(struct screen_info *si)
+static inline unsigned long efi_get_kimg_min_align(void)
{
+ extern bool efi_nokaslr;
+
+ /*
+ * Although relocatable kernels can fix up the misalignment with
+ * respect to MIN_KIMG_ALIGN, the resulting virtual text addresses are
+ * subtly out of sync with those recorded in the vmlinux when kaslr is
+ * disabled but the image required relocation anyway. Therefore retain
+ * 2M alignment if KASLR was explicitly disabled, even if it was not
+ * going to be activated to begin with.
+ */
+ return efi_nokaslr ? MIN_KIMG_ALIGN : EFI_KIMG_ALIGN;
}
#define EFI_ALLOC_ALIGN SZ_64K
+#define EFI_ALLOC_LIMIT ((1UL << 48) - 1)
+
+extern unsigned long primary_entry_offset(void);
/*
* On ARM systems, virtually remapped UEFI runtime services are set up in two
@@ -133,4 +154,8 @@ static inline void efi_capsule_flush_cache_range(void *addr, int size)
dcache_clean_inval_poc((unsigned long)addr, (unsigned long)addr + size);
}
+efi_status_t efi_handle_corrupted_x18(efi_status_t s, const char *f);
+
+void efi_icache_sync(unsigned long start, unsigned long end);
+
#endif /* _ASM_EFI_H */
diff --git a/arch/arm64/include/asm/el2_setup.h b/arch/arm64/include/asm/el2_setup.h
index 2630faa5bc08..46033027510c 100644
--- a/arch/arm64/include/asm/el2_setup.h
+++ b/arch/arm64/include/asm/el2_setup.h
@@ -16,12 +16,62 @@
#include <asm/sysreg.h>
#include <linux/irqchip/arm-gic-v3.h>
+.macro init_el2_hcr val
+ mov_q x0, \val
+
+ /*
+ * Compliant CPUs advertise their VHE-onlyness with
+ * ID_AA64MMFR4_EL1.E2H0 < 0. On such CPUs HCR_EL2.E2H is RES1, but it
+ * can reset into an UNKNOWN state and might not read as 1 until it has
+ * been initialized explicitly.
+ *
+ * Fruity CPUs seem to have HCR_EL2.E2H set to RAO/WI, but
+ * don't advertise it (they predate this relaxation).
+ *
+ * Initalize HCR_EL2.E2H so that later code can rely upon HCR_EL2.E2H
+ * indicating whether the CPU is running in E2H mode.
+ */
+ mrs_s x1, SYS_ID_AA64MMFR4_EL1
+ sbfx x1, x1, #ID_AA64MMFR4_EL1_E2H0_SHIFT, #ID_AA64MMFR4_EL1_E2H0_WIDTH
+ cmp x1, #0
+ b.ge .LnVHE_\@
+
+ orr x0, x0, #HCR_E2H
+.LnVHE_\@:
+ msr_hcr_el2 x0
+ isb
+.endm
+
.macro __init_el2_sctlr
mov_q x0, INIT_SCTLR_EL2_MMU_OFF
msr sctlr_el2, x0
isb
.endm
+.macro __init_el2_hcrx
+ mrs x0, id_aa64mmfr1_el1
+ ubfx x0, x0, #ID_AA64MMFR1_EL1_HCX_SHIFT, #4
+ cbz x0, .Lskip_hcrx_\@
+ mov_q x0, (HCRX_EL2_MSCEn | HCRX_EL2_TCR2En | HCRX_EL2_EnFPM)
+
+ /* Enable GCS if supported */
+ mrs_s x1, SYS_ID_AA64PFR1_EL1
+ ubfx x1, x1, #ID_AA64PFR1_EL1_GCS_SHIFT, #4
+ cbz x1, .Lset_hcrx_\@
+ orr x0, x0, #HCRX_EL2_GCSEn
+
+.Lset_hcrx_\@:
+ msr_s SYS_HCRX_EL2, x0
+.Lskip_hcrx_\@:
+.endm
+
+/* Check if running in host at EL2 mode, i.e., (h)VHE. Jump to fail if not. */
+.macro __check_hvhe fail, tmp
+ mrs \tmp, hcr_el2
+ and \tmp, \tmp, #HCR_E2H
+ cbz \tmp, \fail
+.endm
+
/*
* Allow Non-secure EL1 and EL0 to access physical timer and counter.
* This is not necessary for VHE, since the host kernel runs in EL2,
@@ -34,45 +84,49 @@
*/
.macro __init_el2_timers
mov x0, #3 // Enable EL1 physical timers
+ __check_hvhe .LnVHE_\@, x1
+ lsl x0, x0, #10
+.LnVHE_\@:
msr cnthctl_el2, x0
msr cntvoff_el2, xzr // Clear virtual offset
.endm
.macro __init_el2_debug
mrs x1, id_aa64dfr0_el1
- sbfx x0, x1, #ID_AA64DFR0_PMUVER_SHIFT, #4
- cmp x0, #1
- b.lt .Lskip_pmu_\@ // Skip if no PMU present
+ ubfx x0, x1, #ID_AA64DFR0_EL1_PMUVer_SHIFT, #4
+ cmp x0, #ID_AA64DFR0_EL1_PMUVer_NI
+ ccmp x0, #ID_AA64DFR0_EL1_PMUVer_IMP_DEF, #4, ne
+ b.eq .Lskip_pmu_\@ // Skip if no PMU present or IMP_DEF
mrs x0, pmcr_el0 // Disable debug access traps
ubfx x0, x0, #11, #5 // to EL2 and allow access to
.Lskip_pmu_\@:
- csel x2, xzr, x0, lt // all PMU counters from EL1
+ csel x2, xzr, x0, eq // all PMU counters from EL1
/* Statistical profiling */
- ubfx x0, x1, #ID_AA64DFR0_PMSVER_SHIFT, #4
+ ubfx x0, x1, #ID_AA64DFR0_EL1_PMSVer_SHIFT, #4
cbz x0, .Lskip_spe_\@ // Skip if SPE not present
mrs_s x0, SYS_PMBIDR_EL1 // If SPE available at EL2,
- and x0, x0, #(1 << SYS_PMBIDR_EL1_P_SHIFT)
+ and x0, x0, #(1 << PMBIDR_EL1_P_SHIFT)
cbnz x0, .Lskip_spe_el2_\@ // then permit sampling of physical
- mov x0, #(1 << SYS_PMSCR_EL2_PCT_SHIFT | \
- 1 << SYS_PMSCR_EL2_PA_SHIFT)
+ mov x0, #(1 << PMSCR_EL2_PCT_SHIFT | \
+ 1 << PMSCR_EL2_PA_SHIFT)
msr_s SYS_PMSCR_EL2, x0 // addresses and physical counter
.Lskip_spe_el2_\@:
- mov x0, #(MDCR_EL2_E2PB_MASK << MDCR_EL2_E2PB_SHIFT)
+ mov x0, #MDCR_EL2_E2PB_MASK
orr x2, x2, x0 // If we don't have VHE, then
// use EL1&0 translation.
.Lskip_spe_\@:
/* Trace buffer */
- ubfx x0, x1, #ID_AA64DFR0_TRBE_SHIFT, #4
+ ubfx x0, x1, #ID_AA64DFR0_EL1_TraceBuffer_SHIFT, #4
cbz x0, .Lskip_trace_\@ // Skip if TraceBuffer is not present
mrs_s x0, SYS_TRBIDR_EL1
- and x0, x0, TRBIDR_PROG
+ and x0, x0, TRBIDR_EL1_P
cbnz x0, .Lskip_trace_\@ // If TRBE is available at EL2
- mov x0, #(MDCR_EL2_E2TB_MASK << MDCR_EL2_E2TB_SHIFT)
+ mov x0, #MDCR_EL2_E2TB_MASK
orr x2, x2, x0 // allow the EL1&0 translation
// to own it.
@@ -83,7 +137,7 @@
/* LORegions */
.macro __init_el2_lor
mrs x1, id_aa64mmfr1_el1
- ubfx x0, x1, #ID_AA64MMFR1_LOR_SHIFT, 4
+ ubfx x0, x1, #ID_AA64MMFR1_EL1_LO_SHIFT, 4
cbz x0, .Lskip_lor_\@
msr_s SYS_LORC_EL1, xzr
.Lskip_lor_\@:
@@ -97,7 +151,7 @@
/* GICv3 system register access */
.macro __init_el2_gicv3
mrs x0, id_aa64pfr0_el1
- ubfx x0, x0, #ID_AA64PFR0_GIC_SHIFT, #4
+ ubfx x0, x0, #ID_AA64PFR0_EL1_GIC_SHIFT, #4
cbz x0, .Lskip_gicv3_\@
mrs_s x0, SYS_ICC_SRE_EL2
@@ -111,6 +165,50 @@
.Lskip_gicv3_\@:
.endm
+/* GICv5 system register access */
+.macro __init_el2_gicv5
+ mrs_s x0, SYS_ID_AA64PFR2_EL1
+ ubfx x0, x0, #ID_AA64PFR2_EL1_GCIE_SHIFT, #4
+ cbz x0, .Lskip_gicv5_\@
+
+ mov x0, #(ICH_HFGITR_EL2_GICRCDNMIA | \
+ ICH_HFGITR_EL2_GICRCDIA | \
+ ICH_HFGITR_EL2_GICCDDI | \
+ ICH_HFGITR_EL2_GICCDEOI | \
+ ICH_HFGITR_EL2_GICCDHM | \
+ ICH_HFGITR_EL2_GICCDRCFG | \
+ ICH_HFGITR_EL2_GICCDPEND | \
+ ICH_HFGITR_EL2_GICCDAFF | \
+ ICH_HFGITR_EL2_GICCDPRI | \
+ ICH_HFGITR_EL2_GICCDDIS | \
+ ICH_HFGITR_EL2_GICCDEN)
+ msr_s SYS_ICH_HFGITR_EL2, x0 // Disable instruction traps
+ mov_q x0, (ICH_HFGRTR_EL2_ICC_PPI_ACTIVERn_EL1 | \
+ ICH_HFGRTR_EL2_ICC_PPI_PRIORITYRn_EL1 | \
+ ICH_HFGRTR_EL2_ICC_PPI_PENDRn_EL1 | \
+ ICH_HFGRTR_EL2_ICC_PPI_ENABLERn_EL1 | \
+ ICH_HFGRTR_EL2_ICC_PPI_HMRn_EL1 | \
+ ICH_HFGRTR_EL2_ICC_IAFFIDR_EL1 | \
+ ICH_HFGRTR_EL2_ICC_ICSR_EL1 | \
+ ICH_HFGRTR_EL2_ICC_PCR_EL1 | \
+ ICH_HFGRTR_EL2_ICC_HPPIR_EL1 | \
+ ICH_HFGRTR_EL2_ICC_HAPR_EL1 | \
+ ICH_HFGRTR_EL2_ICC_CR0_EL1 | \
+ ICH_HFGRTR_EL2_ICC_IDRn_EL1 | \
+ ICH_HFGRTR_EL2_ICC_APR_EL1)
+ msr_s SYS_ICH_HFGRTR_EL2, x0 // Disable reg read traps
+ mov_q x0, (ICH_HFGWTR_EL2_ICC_PPI_ACTIVERn_EL1 | \
+ ICH_HFGWTR_EL2_ICC_PPI_PRIORITYRn_EL1 | \
+ ICH_HFGWTR_EL2_ICC_PPI_PENDRn_EL1 | \
+ ICH_HFGWTR_EL2_ICC_PPI_ENABLERn_EL1 | \
+ ICH_HFGWTR_EL2_ICC_ICSR_EL1 | \
+ ICH_HFGWTR_EL2_ICC_PCR_EL1 | \
+ ICH_HFGWTR_EL2_ICC_CR0_EL1 | \
+ ICH_HFGWTR_EL2_ICC_APR_EL1)
+ msr_s SYS_ICH_HFGWTR_EL2, x0 // Disable reg write traps
+.Lskip_gicv5_\@:
+.endm
+
.macro __init_el2_hstr
msr hstr_el2, xzr // Disable CP15 traps to EL2
.endm
@@ -124,76 +222,330 @@
.endm
/* Coprocessor traps */
-.macro __init_el2_nvhe_cptr
+.macro __init_el2_cptr
+ __check_hvhe .LnVHE_\@, x1
+ mov x0, #CPACR_EL1_FPEN
+ msr cpacr_el1, x0
+ b .Lskip_set_cptr_\@
+.LnVHE_\@:
mov x0, #0x33ff
msr cptr_el2, x0 // Disable copro. traps to EL2
+.Lskip_set_cptr_\@:
+.endm
+
+/*
+ * Configure BRBE to permit recording cycle counts and branch mispredicts.
+ *
+ * At any EL, to record cycle counts BRBE requires that both BRBCR_EL2.CC=1 and
+ * BRBCR_EL1.CC=1.
+ *
+ * At any EL, to record branch mispredicts BRBE requires that both
+ * BRBCR_EL2.MPRED=1 and BRBCR_EL1.MPRED=1.
+ *
+ * Set {CC,MPRED} in BRBCR_EL2 in case nVHE mode is used and we are
+ * executing in EL1.
+ */
+.macro __init_el2_brbe
+ mrs x1, id_aa64dfr0_el1
+ ubfx x1, x1, #ID_AA64DFR0_EL1_BRBE_SHIFT, #4
+ cbz x1, .Lskip_brbe_\@
+
+ mov_q x0, BRBCR_ELx_CC | BRBCR_ELx_MPRED
+ msr_s SYS_BRBCR_EL2, x0
+.Lskip_brbe_\@:
.endm
/* Disable any fine grained traps */
.macro __init_el2_fgt
mrs x1, id_aa64mmfr0_el1
- ubfx x1, x1, #ID_AA64MMFR0_FGT_SHIFT, #4
+ ubfx x1, x1, #ID_AA64MMFR0_EL1_FGT_SHIFT, #4
cbz x1, .Lskip_fgt_\@
mov x0, xzr
+ mov x2, xzr
mrs x1, id_aa64dfr0_el1
- ubfx x1, x1, #ID_AA64DFR0_PMSVER_SHIFT, #4
+ ubfx x1, x1, #ID_AA64DFR0_EL1_PMSVer_SHIFT, #4
cmp x1, #3
- b.lt .Lset_debug_fgt_\@
+ b.lt .Lskip_spe_fgt_\@
/* Disable PMSNEVFR_EL1 read and write traps */
- orr x0, x0, #(1 << 62)
+ orr x0, x0, #HDFGRTR_EL2_nPMSNEVFR_EL1_MASK
+ orr x2, x2, #HDFGWTR_EL2_nPMSNEVFR_EL1_MASK
+
+.Lskip_spe_fgt_\@:
+ mrs x1, id_aa64dfr0_el1
+ ubfx x1, x1, #ID_AA64DFR0_EL1_BRBE_SHIFT, #4
+ cbz x1, .Lskip_brbe_fgt_\@
+
+ /*
+ * Disable read traps for the following registers
+ *
+ * [BRBSRC|BRBTGT|RBINF]_EL1
+ * [BRBSRCINJ|BRBTGTINJ|BRBINFINJ|BRBTS]_EL1
+ */
+ orr x0, x0, #HDFGRTR_EL2_nBRBDATA_MASK
+
+ /*
+ * Disable write traps for the following registers
+ *
+ * [BRBSRCINJ|BRBTGTINJ|BRBINFINJ|BRBTS]_EL1
+ */
+ orr x2, x2, #HDFGWTR_EL2_nBRBDATA_MASK
+
+ /* Disable read and write traps for [BRBCR|BRBFCR]_EL1 */
+ orr x0, x0, #HDFGRTR_EL2_nBRBCTL_MASK
+ orr x2, x2, #HDFGWTR_EL2_nBRBCTL_MASK
+
+ /* Disable read traps for BRBIDR_EL1 */
+ orr x0, x0, #HDFGRTR_EL2_nBRBIDR_MASK
+
+.Lskip_brbe_fgt_\@:
.Lset_debug_fgt_\@:
msr_s SYS_HDFGRTR_EL2, x0
- msr_s SYS_HDFGWTR_EL2, x0
+ msr_s SYS_HDFGWTR_EL2, x2
mov x0, xzr
+ mov x2, xzr
+
+ mrs x1, id_aa64dfr0_el1
+ ubfx x1, x1, #ID_AA64DFR0_EL1_BRBE_SHIFT, #4
+ cbz x1, .Lskip_brbe_insn_fgt_\@
+
+ /* Disable traps for BRBIALL instruction */
+ orr x2, x2, #HFGITR_EL2_nBRBIALL_MASK
+
+ /* Disable traps for BRBINJ instruction */
+ orr x2, x2, #HFGITR_EL2_nBRBINJ_MASK
+
+.Lskip_brbe_insn_fgt_\@:
mrs x1, id_aa64pfr1_el1
- ubfx x1, x1, #ID_AA64PFR1_SME_SHIFT, #4
- cbz x1, .Lset_fgt_\@
+ ubfx x1, x1, #ID_AA64PFR1_EL1_SME_SHIFT, #4
+ cbz x1, .Lskip_sme_fgt_\@
/* Disable nVHE traps of TPIDR2 and SMPRI */
- orr x0, x0, #HFGxTR_EL2_nSMPRI_EL1_MASK
- orr x0, x0, #HFGxTR_EL2_nTPIDR2_EL0_MASK
+ orr x0, x0, #HFGRTR_EL2_nSMPRI_EL1_MASK
+ orr x0, x0, #HFGRTR_EL2_nTPIDR2_EL0_MASK
+
+.Lskip_sme_fgt_\@:
+ mrs_s x1, SYS_ID_AA64MMFR3_EL1
+ ubfx x1, x1, #ID_AA64MMFR3_EL1_S1PIE_SHIFT, #4
+ cbz x1, .Lskip_pie_fgt_\@
+
+ /* Disable trapping of PIR_EL1 / PIRE0_EL1 */
+ orr x0, x0, #HFGRTR_EL2_nPIR_EL1
+ orr x0, x0, #HFGRTR_EL2_nPIRE0_EL1
+
+.Lskip_pie_fgt_\@:
+ mrs_s x1, SYS_ID_AA64MMFR3_EL1
+ ubfx x1, x1, #ID_AA64MMFR3_EL1_S1POE_SHIFT, #4
+ cbz x1, .Lskip_poe_fgt_\@
+
+ /* Disable trapping of POR_EL0 */
+ orr x0, x0, #HFGRTR_EL2_nPOR_EL0
+
+.Lskip_poe_fgt_\@:
+ /* GCS depends on PIE so we don't check it if PIE is absent */
+ mrs_s x1, SYS_ID_AA64PFR1_EL1
+ ubfx x1, x1, #ID_AA64PFR1_EL1_GCS_SHIFT, #4
+ cbz x1, .Lskip_gce_fgt_\@
+
+ /* Disable traps of access to GCS registers at EL0 and EL1 */
+ orr x0, x0, #HFGRTR_EL2_nGCS_EL1_MASK
+ orr x0, x0, #HFGRTR_EL2_nGCS_EL0_MASK
+
+.Lskip_gce_fgt_\@:
.Lset_fgt_\@:
msr_s SYS_HFGRTR_EL2, x0
msr_s SYS_HFGWTR_EL2, x0
- msr_s SYS_HFGITR_EL2, xzr
+ msr_s SYS_HFGITR_EL2, x2
mrs x1, id_aa64pfr0_el1 // AMU traps UNDEF without AMU
- ubfx x1, x1, #ID_AA64PFR0_AMU_SHIFT, #4
- cbz x1, .Lskip_fgt_\@
+ ubfx x1, x1, #ID_AA64PFR0_EL1_AMU_SHIFT, #4
+ cbz x1, .Lskip_amu_fgt_\@
msr_s SYS_HAFGRTR_EL2, xzr
+
+.Lskip_amu_fgt_\@:
+
.Lskip_fgt_\@:
.endm
-.macro __init_el2_nvhe_prepare_eret
- mov x0, #INIT_PSTATE_EL1
- msr spsr_el2, x0
+.macro __init_el2_fgt2
+ mrs x1, id_aa64mmfr0_el1
+ ubfx x1, x1, #ID_AA64MMFR0_EL1_FGT_SHIFT, #4
+ cmp x1, #ID_AA64MMFR0_EL1_FGT_FGT2
+ b.lt .Lskip_fgt2_\@
+
+ mov x0, xzr
+ mrs x1, id_aa64dfr0_el1
+ ubfx x1, x1, #ID_AA64DFR0_EL1_PMUVer_SHIFT, #4
+ cmp x1, #ID_AA64DFR0_EL1_PMUVer_V3P9
+ b.lt .Lskip_pmuv3p9_\@
+
+ orr x0, x0, #HDFGRTR2_EL2_nPMICNTR_EL0
+ orr x0, x0, #HDFGRTR2_EL2_nPMICFILTR_EL0
+ orr x0, x0, #HDFGRTR2_EL2_nPMUACR_EL1
+.Lskip_pmuv3p9_\@:
+ msr_s SYS_HDFGRTR2_EL2, x0
+ msr_s SYS_HDFGWTR2_EL2, x0
+ msr_s SYS_HFGRTR2_EL2, xzr
+ msr_s SYS_HFGWTR2_EL2, xzr
+ msr_s SYS_HFGITR2_EL2, xzr
+.Lskip_fgt2_\@:
.endm
/**
* Initialize EL2 registers to sane values. This should be called early on all
* cores that were booted in EL2. Note that everything gets initialised as
- * if VHE was not evailable. The kernel context will be upgraded to VHE
+ * if VHE was not available. The kernel context will be upgraded to VHE
* if possible later on in the boot process
*
* Regs: x0, x1 and x2 are clobbered.
*/
.macro init_el2_state
__init_el2_sctlr
+ __init_el2_hcrx
__init_el2_timers
__init_el2_debug
+ __init_el2_brbe
__init_el2_lor
__init_el2_stage2
__init_el2_gicv3
+ __init_el2_gicv5
__init_el2_hstr
__init_el2_nvhe_idregs
- __init_el2_nvhe_cptr
+ __init_el2_cptr
__init_el2_fgt
- __init_el2_nvhe_prepare_eret
+ __init_el2_fgt2
+.endm
+
+#ifndef __KVM_NVHE_HYPERVISOR__
+// This will clobber tmp1 and tmp2, and expect tmp1 to contain
+// the id register value as read from the HW
+.macro __check_override idreg, fld, width, pass, fail, tmp1, tmp2
+ ubfx \tmp1, \tmp1, #\fld, #\width
+ cbz \tmp1, \fail
+
+ adr_l \tmp1, \idreg\()_override
+ ldr \tmp2, [\tmp1, FTR_OVR_VAL_OFFSET]
+ ldr \tmp1, [\tmp1, FTR_OVR_MASK_OFFSET]
+ ubfx \tmp2, \tmp2, #\fld, #\width
+ ubfx \tmp1, \tmp1, #\fld, #\width
+ cmp \tmp1, xzr
+ and \tmp2, \tmp2, \tmp1
+ csinv \tmp2, \tmp2, xzr, ne
+ cbnz \tmp2, \pass
+ b \fail
+.endm
+
+// This will clobber tmp1 and tmp2
+.macro check_override idreg, fld, pass, fail, tmp1, tmp2
+ mrs \tmp1, \idreg\()_el1
+ __check_override \idreg \fld 4 \pass \fail \tmp1 \tmp2
+.endm
+#else
+// This will clobber tmp
+.macro __check_override idreg, fld, width, pass, fail, tmp, ignore
+ ldr_l \tmp, \idreg\()_el1_sys_val
+ ubfx \tmp, \tmp, #\fld, #\width
+ cbnz \tmp, \pass
+ b \fail
+.endm
+
+.macro check_override idreg, fld, pass, fail, tmp, ignore
+ __check_override \idreg \fld 4 \pass \fail \tmp \ignore
+.endm
+#endif
+
+.macro finalise_el2_state
+ check_override id_aa64pfr0, ID_AA64PFR0_EL1_MPAM_SHIFT, .Linit_mpam_\@, .Lskip_mpam_\@, x1, x2
+
+.Linit_mpam_\@:
+ msr_s SYS_MPAM2_EL2, xzr // use the default partition
+ // and disable lower traps
+ mrs_s x0, SYS_MPAMIDR_EL1
+ tbz x0, #MPAMIDR_EL1_HAS_HCR_SHIFT, .Lskip_mpam_\@ // skip if no MPAMHCR reg
+ msr_s SYS_MPAMHCR_EL2, xzr // clear TRAP_MPAMIDR_EL1 -> EL2
+
+.Lskip_mpam_\@:
+ check_override id_aa64pfr1, ID_AA64PFR1_EL1_GCS_SHIFT, .Linit_gcs_\@, .Lskip_gcs_\@, x1, x2
+
+.Linit_gcs_\@:
+ msr_s SYS_GCSCR_EL1, xzr
+ msr_s SYS_GCSCRE0_EL1, xzr
+
+.Lskip_gcs_\@:
+ check_override id_aa64pfr0, ID_AA64PFR0_EL1_SVE_SHIFT, .Linit_sve_\@, .Lskip_sve_\@, x1, x2
+
+.Linit_sve_\@: /* SVE register access */
+ __check_hvhe .Lcptr_nvhe_\@, x1
+
+ // (h)VHE case
+ mrs x0, cpacr_el1 // Disable SVE traps
+ orr x0, x0, #CPACR_EL1_ZEN
+ msr cpacr_el1, x0
+ b .Lskip_set_cptr_\@
+
+.Lcptr_nvhe_\@: // nVHE case
+ mrs x0, cptr_el2 // Disable SVE traps
+ bic x0, x0, #CPTR_EL2_TZ
+ msr cptr_el2, x0
+.Lskip_set_cptr_\@:
+ isb
+ mov x1, #ZCR_ELx_LEN_MASK // SVE: Enable full vector
+ msr_s SYS_ZCR_EL2, x1 // length for EL1.
+
+.Lskip_sve_\@:
+ check_override id_aa64pfr1, ID_AA64PFR1_EL1_SME_SHIFT, .Linit_sme_\@, .Lskip_sme_\@, x1, x2
+
+.Linit_sme_\@: /* SME register access and priority mapping */
+ __check_hvhe .Lcptr_nvhe_sme_\@, x1
+
+ // (h)VHE case
+ mrs x0, cpacr_el1 // Disable SME traps
+ orr x0, x0, #CPACR_EL1_SMEN
+ msr cpacr_el1, x0
+ b .Lskip_set_cptr_sme_\@
+
+.Lcptr_nvhe_sme_\@: // nVHE case
+ mrs x0, cptr_el2 // Disable SME traps
+ bic x0, x0, #CPTR_EL2_TSM
+ msr cptr_el2, x0
+.Lskip_set_cptr_sme_\@:
+ isb
+
+ mrs x1, sctlr_el2
+ orr x1, x1, #SCTLR_ELx_ENTP2 // Disable TPIDR2 traps
+ msr sctlr_el2, x1
+ isb
+
+ mov x0, #0 // SMCR controls
+
+ // Full FP in SM?
+ mrs_s x1, SYS_ID_AA64SMFR0_EL1
+ __check_override id_aa64smfr0, ID_AA64SMFR0_EL1_FA64_SHIFT, 1, .Linit_sme_fa64_\@, .Lskip_sme_fa64_\@, x1, x2
+
+.Linit_sme_fa64_\@:
+ orr x0, x0, SMCR_ELx_FA64_MASK
+.Lskip_sme_fa64_\@:
+
+ // ZT0 available?
+ mrs_s x1, SYS_ID_AA64SMFR0_EL1
+ __check_override id_aa64smfr0, ID_AA64SMFR0_EL1_SMEver_SHIFT, 4, .Linit_sme_zt0_\@, .Lskip_sme_zt0_\@, x1, x2
+.Linit_sme_zt0_\@:
+ orr x0, x0, SMCR_ELx_EZT0_MASK
+.Lskip_sme_zt0_\@:
+
+ orr x0, x0, #SMCR_ELx_LEN_MASK // Enable full SME vector
+ msr_s SYS_SMCR_EL2, x0 // length for EL1.
+
+ mrs_s x1, SYS_SMIDR_EL1 // Priority mapping supported?
+ ubfx x1, x1, #SMIDR_EL1_SMPS_SHIFT, #1
+ cbz x1, .Lskip_sme_\@
+
+ msr_s SYS_SMPRIMAP_EL2, xzr // Make all priorities equal
+.Lskip_sme_\@:
.endm
#endif /* __ARM_KVM_INIT_H__ */
diff --git a/arch/arm64/include/asm/elf.h b/arch/arm64/include/asm/elf.h
index 97932fbf973d..3f93f4eef953 100644
--- a/arch/arm64/include/asm/elf.h
+++ b/arch/arm64/include/asm/elf.h
@@ -201,16 +201,16 @@ extern int arch_setup_additional_pages(struct linux_binprm *bprm,
#define COMPAT_ELF_PLATFORM ("v8l")
#endif
-#ifdef CONFIG_COMPAT
-
-/* PIE load location for compat arm. Must match ARM ELF_ET_DYN_BASE. */
-#define COMPAT_ELF_ET_DYN_BASE 0x000400000UL
-
/* AArch32 registers. */
#define COMPAT_ELF_NGREG 18
typedef unsigned int compat_elf_greg_t;
typedef compat_elf_greg_t compat_elf_gregset_t[COMPAT_ELF_NGREG];
+#ifdef CONFIG_COMPAT
+
+/* PIE load location for compat arm. Must match ARM ELF_ET_DYN_BASE. */
+#define COMPAT_ELF_ET_DYN_BASE 0x000400000UL
+
/* AArch32 EABI. */
#define EF_ARM_EABI_MASK 0xff000000
int compat_elf_check_arch(const struct elf32_hdr *);
diff --git a/arch/arm64/include/asm/esr.h b/arch/arm64/include/asm/esr.h
index 15b34fbfca66..e1deed824464 100644
--- a/arch/arm64/include/asm/esr.h
+++ b/arch/arm64/include/asm/esr.h
@@ -10,63 +10,65 @@
#include <asm/memory.h>
#include <asm/sysreg.h>
-#define ESR_ELx_EC_UNKNOWN (0x00)
-#define ESR_ELx_EC_WFx (0x01)
+#define ESR_ELx_EC_UNKNOWN UL(0x00)
+#define ESR_ELx_EC_WFx UL(0x01)
/* Unallocated EC: 0x02 */
-#define ESR_ELx_EC_CP15_32 (0x03)
-#define ESR_ELx_EC_CP15_64 (0x04)
-#define ESR_ELx_EC_CP14_MR (0x05)
-#define ESR_ELx_EC_CP14_LS (0x06)
-#define ESR_ELx_EC_FP_ASIMD (0x07)
-#define ESR_ELx_EC_CP10_ID (0x08) /* EL2 only */
-#define ESR_ELx_EC_PAC (0x09) /* EL2 and above */
-/* Unallocated EC: 0x0A - 0x0B */
-#define ESR_ELx_EC_CP14_64 (0x0C)
-#define ESR_ELx_EC_BTI (0x0D)
-#define ESR_ELx_EC_ILL (0x0E)
+#define ESR_ELx_EC_CP15_32 UL(0x03)
+#define ESR_ELx_EC_CP15_64 UL(0x04)
+#define ESR_ELx_EC_CP14_MR UL(0x05)
+#define ESR_ELx_EC_CP14_LS UL(0x06)
+#define ESR_ELx_EC_FP_ASIMD UL(0x07)
+#define ESR_ELx_EC_CP10_ID UL(0x08) /* EL2 only */
+#define ESR_ELx_EC_PAC UL(0x09) /* EL2 and above */
+#define ESR_ELx_EC_OTHER UL(0x0A)
+/* Unallocated EC: 0x0B */
+#define ESR_ELx_EC_CP14_64 UL(0x0C)
+#define ESR_ELx_EC_BTI UL(0x0D)
+#define ESR_ELx_EC_ILL UL(0x0E)
/* Unallocated EC: 0x0F - 0x10 */
-#define ESR_ELx_EC_SVC32 (0x11)
-#define ESR_ELx_EC_HVC32 (0x12) /* EL2 only */
-#define ESR_ELx_EC_SMC32 (0x13) /* EL2 and above */
+#define ESR_ELx_EC_SVC32 UL(0x11)
+#define ESR_ELx_EC_HVC32 UL(0x12) /* EL2 only */
+#define ESR_ELx_EC_SMC32 UL(0x13) /* EL2 and above */
/* Unallocated EC: 0x14 */
-#define ESR_ELx_EC_SVC64 (0x15)
-#define ESR_ELx_EC_HVC64 (0x16) /* EL2 and above */
-#define ESR_ELx_EC_SMC64 (0x17) /* EL2 and above */
-#define ESR_ELx_EC_SYS64 (0x18)
-#define ESR_ELx_EC_SVE (0x19)
-#define ESR_ELx_EC_ERET (0x1a) /* EL2 only */
+#define ESR_ELx_EC_SVC64 UL(0x15)
+#define ESR_ELx_EC_HVC64 UL(0x16) /* EL2 and above */
+#define ESR_ELx_EC_SMC64 UL(0x17) /* EL2 and above */
+#define ESR_ELx_EC_SYS64 UL(0x18)
+#define ESR_ELx_EC_SVE UL(0x19)
+#define ESR_ELx_EC_ERET UL(0x1a) /* EL2 only */
/* Unallocated EC: 0x1B */
-#define ESR_ELx_EC_FPAC (0x1C) /* EL1 and above */
-#define ESR_ELx_EC_SME (0x1D)
+#define ESR_ELx_EC_FPAC UL(0x1C) /* EL1 and above */
+#define ESR_ELx_EC_SME UL(0x1D)
/* Unallocated EC: 0x1E */
-#define ESR_ELx_EC_IMP_DEF (0x1f) /* EL3 only */
-#define ESR_ELx_EC_IABT_LOW (0x20)
-#define ESR_ELx_EC_IABT_CUR (0x21)
-#define ESR_ELx_EC_PC_ALIGN (0x22)
+#define ESR_ELx_EC_IMP_DEF UL(0x1f) /* EL3 only */
+#define ESR_ELx_EC_IABT_LOW UL(0x20)
+#define ESR_ELx_EC_IABT_CUR UL(0x21)
+#define ESR_ELx_EC_PC_ALIGN UL(0x22)
/* Unallocated EC: 0x23 */
-#define ESR_ELx_EC_DABT_LOW (0x24)
-#define ESR_ELx_EC_DABT_CUR (0x25)
-#define ESR_ELx_EC_SP_ALIGN (0x26)
-/* Unallocated EC: 0x27 */
-#define ESR_ELx_EC_FP_EXC32 (0x28)
+#define ESR_ELx_EC_DABT_LOW UL(0x24)
+#define ESR_ELx_EC_DABT_CUR UL(0x25)
+#define ESR_ELx_EC_SP_ALIGN UL(0x26)
+#define ESR_ELx_EC_MOPS UL(0x27)
+#define ESR_ELx_EC_FP_EXC32 UL(0x28)
/* Unallocated EC: 0x29 - 0x2B */
-#define ESR_ELx_EC_FP_EXC64 (0x2C)
-/* Unallocated EC: 0x2D - 0x2E */
-#define ESR_ELx_EC_SERROR (0x2F)
-#define ESR_ELx_EC_BREAKPT_LOW (0x30)
-#define ESR_ELx_EC_BREAKPT_CUR (0x31)
-#define ESR_ELx_EC_SOFTSTP_LOW (0x32)
-#define ESR_ELx_EC_SOFTSTP_CUR (0x33)
-#define ESR_ELx_EC_WATCHPT_LOW (0x34)
-#define ESR_ELx_EC_WATCHPT_CUR (0x35)
+#define ESR_ELx_EC_FP_EXC64 UL(0x2C)
+#define ESR_ELx_EC_GCS UL(0x2D)
+/* Unallocated EC: 0x2E */
+#define ESR_ELx_EC_SERROR UL(0x2F)
+#define ESR_ELx_EC_BREAKPT_LOW UL(0x30)
+#define ESR_ELx_EC_BREAKPT_CUR UL(0x31)
+#define ESR_ELx_EC_SOFTSTP_LOW UL(0x32)
+#define ESR_ELx_EC_SOFTSTP_CUR UL(0x33)
+#define ESR_ELx_EC_WATCHPT_LOW UL(0x34)
+#define ESR_ELx_EC_WATCHPT_CUR UL(0x35)
/* Unallocated EC: 0x36 - 0x37 */
-#define ESR_ELx_EC_BKPT32 (0x38)
+#define ESR_ELx_EC_BKPT32 UL(0x38)
/* Unallocated EC: 0x39 */
-#define ESR_ELx_EC_VECTOR32 (0x3A) /* EL2 only */
+#define ESR_ELx_EC_VECTOR32 UL(0x3A) /* EL2 only */
/* Unallocated EC: 0x3B */
-#define ESR_ELx_EC_BRK64 (0x3C)
+#define ESR_ELx_EC_BRK64 UL(0x3C)
/* Unallocated EC: 0x3D - 0x3F */
-#define ESR_ELx_EC_MAX (0x3F)
+#define ESR_ELx_EC_MAX UL(0x3F)
#define ESR_ELx_EC_SHIFT (26)
#define ESR_ELx_EC_WIDTH (6)
@@ -75,8 +77,11 @@
#define ESR_ELx_IL_SHIFT (25)
#define ESR_ELx_IL (UL(1) << ESR_ELx_IL_SHIFT)
-#define ESR_ELx_ISS_MASK (ESR_ELx_IL - 1)
+#define ESR_ELx_ISS_MASK (GENMASK(24, 0))
#define ESR_ELx_ISS(esr) ((esr) & ESR_ELx_ISS_MASK)
+#define ESR_ELx_ISS2_SHIFT (32)
+#define ESR_ELx_ISS2_MASK (GENMASK_ULL(55, 32))
+#define ESR_ELx_ISS2(esr) (((esr) & ESR_ELx_ISS2_MASK) >> ESR_ELx_ISS2_SHIFT)
/* ISS field definitions shared by different classes */
#define ESR_ELx_WNR_SHIFT (6)
@@ -95,6 +100,8 @@
#define ESR_ELx_AET_CE (UL(6) << ESR_ELx_AET_SHIFT)
/* Shared ISS field definitions for Data/Instruction aborts */
+#define ESR_ELx_VNCR_SHIFT (13)
+#define ESR_ELx_VNCR (UL(1) << ESR_ELx_VNCR_SHIFT)
#define ESR_ELx_SET_SHIFT (11)
#define ESR_ELx_SET_MASK (UL(3) << ESR_ELx_SET_SHIFT)
#define ESR_ELx_FnV_SHIFT (10)
@@ -114,6 +121,26 @@
#define ESR_ELx_FSC_ACCESS (0x08)
#define ESR_ELx_FSC_FAULT (0x04)
#define ESR_ELx_FSC_PERM (0x0C)
+#define ESR_ELx_FSC_SEA_TTW(n) (0x14 + (n))
+#define ESR_ELx_FSC_SECC (0x18)
+#define ESR_ELx_FSC_SECC_TTW(n) (0x1c + (n))
+#define ESR_ELx_FSC_ADDRSZ (0x00)
+
+/*
+ * Annoyingly, the negative levels for Address size faults aren't laid out
+ * contiguously (or in the desired order)
+ */
+#define ESR_ELx_FSC_ADDRSZ_nL(n) ((n) == -1 ? 0x25 : 0x2C)
+#define ESR_ELx_FSC_ADDRSZ_L(n) ((n) < 0 ? ESR_ELx_FSC_ADDRSZ_nL(n) : \
+ (ESR_ELx_FSC_ADDRSZ + (n)))
+
+/* Status codes for individual page table levels */
+#define ESR_ELx_FSC_ACCESS_L(n) (ESR_ELx_FSC_ACCESS + (n))
+#define ESR_ELx_FSC_PERM_L(n) (ESR_ELx_FSC_PERM + (n))
+
+#define ESR_ELx_FSC_FAULT_nL (0x2C)
+#define ESR_ELx_FSC_FAULT_L(n) (((n) < 0 ? ESR_ELx_FSC_FAULT_nL : \
+ ESR_ELx_FSC_FAULT) + (n))
/* ISS field definitions for Data Aborts */
#define ESR_ELx_ISV_SHIFT (24)
@@ -131,6 +158,20 @@
#define ESR_ELx_CM_SHIFT (8)
#define ESR_ELx_CM (UL(1) << ESR_ELx_CM_SHIFT)
+/* ISS2 field definitions for Data Aborts */
+#define ESR_ELx_TnD_SHIFT (10)
+#define ESR_ELx_TnD (UL(1) << ESR_ELx_TnD_SHIFT)
+#define ESR_ELx_TagAccess_SHIFT (9)
+#define ESR_ELx_TagAccess (UL(1) << ESR_ELx_TagAccess_SHIFT)
+#define ESR_ELx_GCS_SHIFT (8)
+#define ESR_ELx_GCS (UL(1) << ESR_ELx_GCS_SHIFT)
+#define ESR_ELx_Overlay_SHIFT (6)
+#define ESR_ELx_Overlay (UL(1) << ESR_ELx_Overlay_SHIFT)
+#define ESR_ELx_DirtyBit_SHIFT (5)
+#define ESR_ELx_DirtyBit (UL(1) << ESR_ELx_DirtyBit_SHIFT)
+#define ESR_ELx_Xs_SHIFT (0)
+#define ESR_ELx_Xs_MASK (GENMASK_ULL(4, 0))
+
/* ISS field definitions for exceptions taken in to Hyp */
#define ESR_ELx_CV (UL(1) << 24)
#define ESR_ELx_COND_SHIFT (20)
@@ -143,6 +184,13 @@
#define ESR_ELx_WFx_ISS_WFE (UL(1) << 0)
#define ESR_ELx_xVC_IMM_MASK ((UL(1) << 16) - 1)
+/* ISS definitions for LD64B/ST64B/{T,P}SBCSYNC instructions */
+#define ESR_ELx_ISS_OTHER_ST64BV (0)
+#define ESR_ELx_ISS_OTHER_ST64BV0 (1)
+#define ESR_ELx_ISS_OTHER_LDST64B (2)
+#define ESR_ELx_ISS_OTHER_TSBCSYNC (3)
+#define ESR_ELx_ISS_OTHER_PSBCSYNC (4)
+
#define DISR_EL1_IDS (UL(1) << 24)
/*
* DISR_EL1 and ESR_ELx share the bottom 13 bits, but the RES0 bits may mean
@@ -263,6 +311,10 @@
(((e) & ESR_ELx_SYS64_ISS_OP2_MASK) >> \
ESR_ELx_SYS64_ISS_OP2_SHIFT))
+/* ISS field definitions for ERET/ERETAA/ERETAB trapping */
+#define ESR_ELx_ERET_ISS_ERET 0x2
+#define ESR_ELx_ERET_ISS_ERETA 0x1
+
/*
* ISS field definitions for floating-point exception traps
* (FP_EXC_32/FP_EXC_64).
@@ -336,15 +388,57 @@
/*
* ISS values for SME traps
*/
-
-#define ESR_ELx_SME_ISS_SME_DISABLED 0
-#define ESR_ELx_SME_ISS_ILL 1
-#define ESR_ELx_SME_ISS_SM_DISABLED 2
-#define ESR_ELx_SME_ISS_ZA_DISABLED 3
+#define ESR_ELx_SME_ISS_SMTC_MASK GENMASK(2, 0)
+#define ESR_ELx_SME_ISS_SMTC(esr) ((esr) & ESR_ELx_SME_ISS_SMTC_MASK)
+
+#define ESR_ELx_SME_ISS_SMTC_SME_DISABLED 0
+#define ESR_ELx_SME_ISS_SMTC_ILL 1
+#define ESR_ELx_SME_ISS_SMTC_SM_DISABLED 2
+#define ESR_ELx_SME_ISS_SMTC_ZA_DISABLED 3
+#define ESR_ELx_SME_ISS_SMTC_ZT_DISABLED 4
+
+/* ISS field definitions for MOPS exceptions */
+#define ESR_ELx_MOPS_ISS_MEM_INST (UL(1) << 24)
+#define ESR_ELx_MOPS_ISS_FROM_EPILOGUE (UL(1) << 18)
+#define ESR_ELx_MOPS_ISS_WRONG_OPTION (UL(1) << 17)
+#define ESR_ELx_MOPS_ISS_OPTION_A (UL(1) << 16)
+#define ESR_ELx_MOPS_ISS_DESTREG(esr) (((esr) & (UL(0x1f) << 10)) >> 10)
+#define ESR_ELx_MOPS_ISS_SRCREG(esr) (((esr) & (UL(0x1f) << 5)) >> 5)
+#define ESR_ELx_MOPS_ISS_SIZEREG(esr) (((esr) & (UL(0x1f) << 0)) >> 0)
+
+/* ISS field definitions for GCS */
+#define ESR_ELx_ExType_SHIFT (20)
+#define ESR_ELx_ExType_MASK GENMASK(23, 20)
+#define ESR_ELx_Raddr_SHIFT (10)
+#define ESR_ELx_Raddr_MASK GENMASK(14, 10)
+#define ESR_ELx_Rn_SHIFT (5)
+#define ESR_ELx_Rn_MASK GENMASK(9, 5)
+#define ESR_ELx_Rvalue_SHIFT 5
+#define ESR_ELx_Rvalue_MASK GENMASK(9, 5)
+#define ESR_ELx_IT_SHIFT (0)
+#define ESR_ELx_IT_MASK GENMASK(4, 0)
+
+#define ESR_ELx_ExType_DATA_CHECK 0
+#define ESR_ELx_ExType_EXLOCK 1
+#define ESR_ELx_ExType_STR 2
+
+#define ESR_ELx_IT_RET 0
+#define ESR_ELx_IT_GCSPOPM 1
+#define ESR_ELx_IT_RET_KEYA 2
+#define ESR_ELx_IT_RET_KEYB 3
+#define ESR_ELx_IT_GCSSS1 4
+#define ESR_ELx_IT_GCSSS2 5
+#define ESR_ELx_IT_GCSPOPCX 6
+#define ESR_ELx_IT_GCSPOPX 7
#ifndef __ASSEMBLY__
#include <asm/types.h>
+static inline unsigned long esr_brk_comment(unsigned long esr)
+{
+ return esr & ESR_ELx_BRK64_ISS_COMMENT_MASK;
+}
+
static inline bool esr_is_data_abort(unsigned long esr)
{
const unsigned long ec = ESR_ELx_EC(esr);
@@ -352,6 +446,93 @@ static inline bool esr_is_data_abort(unsigned long esr)
return ec == ESR_ELx_EC_DABT_LOW || ec == ESR_ELx_EC_DABT_CUR;
}
+static inline bool esr_is_cfi_brk(unsigned long esr)
+{
+ return ESR_ELx_EC(esr) == ESR_ELx_EC_BRK64 &&
+ (esr_brk_comment(esr) & ~CFI_BRK_IMM_MASK) == CFI_BRK_IMM_BASE;
+}
+
+static inline bool esr_is_ubsan_brk(unsigned long esr)
+{
+ return (esr_brk_comment(esr) & ~UBSAN_BRK_MASK) == UBSAN_BRK_IMM;
+}
+
+static inline bool esr_fsc_is_translation_fault(unsigned long esr)
+{
+ esr = esr & ESR_ELx_FSC;
+
+ return (esr == ESR_ELx_FSC_FAULT_L(3)) ||
+ (esr == ESR_ELx_FSC_FAULT_L(2)) ||
+ (esr == ESR_ELx_FSC_FAULT_L(1)) ||
+ (esr == ESR_ELx_FSC_FAULT_L(0)) ||
+ (esr == ESR_ELx_FSC_FAULT_L(-1));
+}
+
+static inline bool esr_fsc_is_permission_fault(unsigned long esr)
+{
+ esr = esr & ESR_ELx_FSC;
+
+ return (esr == ESR_ELx_FSC_PERM_L(3)) ||
+ (esr == ESR_ELx_FSC_PERM_L(2)) ||
+ (esr == ESR_ELx_FSC_PERM_L(1)) ||
+ (esr == ESR_ELx_FSC_PERM_L(0));
+}
+
+static inline bool esr_fsc_is_access_flag_fault(unsigned long esr)
+{
+ esr = esr & ESR_ELx_FSC;
+
+ return (esr == ESR_ELx_FSC_ACCESS_L(3)) ||
+ (esr == ESR_ELx_FSC_ACCESS_L(2)) ||
+ (esr == ESR_ELx_FSC_ACCESS_L(1)) ||
+ (esr == ESR_ELx_FSC_ACCESS_L(0));
+}
+
+static inline bool esr_fsc_is_addr_sz_fault(unsigned long esr)
+{
+ esr &= ESR_ELx_FSC;
+
+ return (esr == ESR_ELx_FSC_ADDRSZ_L(3)) ||
+ (esr == ESR_ELx_FSC_ADDRSZ_L(2)) ||
+ (esr == ESR_ELx_FSC_ADDRSZ_L(1)) ||
+ (esr == ESR_ELx_FSC_ADDRSZ_L(0)) ||
+ (esr == ESR_ELx_FSC_ADDRSZ_L(-1));
+}
+
+static inline bool esr_fsc_is_sea_ttw(unsigned long esr)
+{
+ esr = esr & ESR_ELx_FSC;
+
+ return (esr == ESR_ELx_FSC_SEA_TTW(3)) ||
+ (esr == ESR_ELx_FSC_SEA_TTW(2)) ||
+ (esr == ESR_ELx_FSC_SEA_TTW(1)) ||
+ (esr == ESR_ELx_FSC_SEA_TTW(0)) ||
+ (esr == ESR_ELx_FSC_SEA_TTW(-1));
+}
+
+static inline bool esr_fsc_is_secc_ttw(unsigned long esr)
+{
+ esr = esr & ESR_ELx_FSC;
+
+ return (esr == ESR_ELx_FSC_SECC_TTW(3)) ||
+ (esr == ESR_ELx_FSC_SECC_TTW(2)) ||
+ (esr == ESR_ELx_FSC_SECC_TTW(1)) ||
+ (esr == ESR_ELx_FSC_SECC_TTW(0)) ||
+ (esr == ESR_ELx_FSC_SECC_TTW(-1));
+}
+
+/* Indicate whether ESR.EC==0x1A is for an ERETAx instruction */
+static inline bool esr_iss_is_eretax(unsigned long esr)
+{
+ return esr & ESR_ELx_ERET_ISS_ERET;
+}
+
+/* Indicate which key is used for ERETAx (false: A-Key, true: B-Key) */
+static inline bool esr_iss_is_eretab(unsigned long esr)
+{
+ return esr & ESR_ELx_ERET_ISS_ERETA;
+}
+
const char *esr_get_class_string(unsigned long esr);
#endif /* __ASSEMBLY */
diff --git a/arch/arm64/include/asm/exception.h b/arch/arm64/include/asm/exception.h
index d94aecff9690..e3874c4fc399 100644
--- a/arch/arm64/include/asm/exception.h
+++ b/arch/arm64/include/asm/exception.h
@@ -8,16 +8,11 @@
#define __ASM_EXCEPTION_H
#include <asm/esr.h>
-#include <asm/kprobes.h>
#include <asm/ptrace.h>
#include <linux/interrupt.h>
-#ifdef CONFIG_FUNCTION_GRAPH_TRACER
#define __exception_irq_entry __irq_entry
-#else
-#define __exception_irq_entry __kprobes
-#endif
static inline unsigned long disr_to_esr(u64 disr)
{
@@ -31,7 +26,7 @@ static inline unsigned long disr_to_esr(u64 disr)
return esr;
}
-asmlinkage void handle_bad_stack(struct pt_regs *regs);
+asmlinkage void __noreturn handle_bad_stack(struct pt_regs *regs);
asmlinkage void el1t_64_sync_handler(struct pt_regs *regs);
asmlinkage void el1t_64_irq_handler(struct pt_regs *regs);
@@ -58,23 +53,43 @@ asmlinkage void call_on_irq_stack(struct pt_regs *regs,
asmlinkage void asm_exit_to_user_mode(struct pt_regs *regs);
void do_mem_abort(unsigned long far, unsigned long esr, struct pt_regs *regs);
-void do_undefinstr(struct pt_regs *regs);
-void do_bti(struct pt_regs *regs);
-void do_debug_exception(unsigned long addr_if_watchpoint, unsigned long esr,
+void do_el0_undef(struct pt_regs *regs, unsigned long esr);
+void do_el1_undef(struct pt_regs *regs, unsigned long esr);
+void do_el0_bti(struct pt_regs *regs);
+void do_el1_bti(struct pt_regs *regs, unsigned long esr);
+void do_el0_gcs(struct pt_regs *regs, unsigned long esr);
+void do_el1_gcs(struct pt_regs *regs, unsigned long esr);
+#ifdef CONFIG_HAVE_HW_BREAKPOINT
+void do_breakpoint(unsigned long esr, struct pt_regs *regs);
+void do_watchpoint(unsigned long addr, unsigned long esr,
struct pt_regs *regs);
+#else
+static inline void do_breakpoint(unsigned long esr, struct pt_regs *regs) {}
+static inline void do_watchpoint(unsigned long addr, unsigned long esr,
+ struct pt_regs *regs) {}
+#endif /* CONFIG_HAVE_HW_BREAKPOINT */
+void do_el0_softstep(unsigned long esr, struct pt_regs *regs);
+void do_el1_softstep(unsigned long esr, struct pt_regs *regs);
+void do_el0_brk64(unsigned long esr, struct pt_regs *regs);
+void do_el1_brk64(unsigned long esr, struct pt_regs *regs);
+void do_bkpt32(unsigned long esr, struct pt_regs *regs);
void do_fpsimd_acc(unsigned long esr, struct pt_regs *regs);
void do_sve_acc(unsigned long esr, struct pt_regs *regs);
void do_sme_acc(unsigned long esr, struct pt_regs *regs);
void do_fpsimd_exc(unsigned long esr, struct pt_regs *regs);
-void do_sysinstr(unsigned long esr, struct pt_regs *regs);
+void do_el0_sys(unsigned long esr, struct pt_regs *regs);
void do_sp_pc_abort(unsigned long addr, unsigned long esr, struct pt_regs *regs);
void bad_el0_sync(struct pt_regs *regs, int reason, unsigned long esr);
-void do_cp15instr(unsigned long esr, struct pt_regs *regs);
+void do_el0_cp15(unsigned long esr, struct pt_regs *regs);
+int do_compat_alignment_fixup(unsigned long addr, struct pt_regs *regs);
void do_el0_svc(struct pt_regs *regs);
void do_el0_svc_compat(struct pt_regs *regs);
-void do_ptrauth_fault(struct pt_regs *regs, unsigned long esr);
+void do_el0_fpac(struct pt_regs *regs, unsigned long esr);
+void do_el1_fpac(struct pt_regs *regs, unsigned long esr);
+void do_el0_mops(struct pt_regs *regs, unsigned long esr);
+void do_el1_mops(struct pt_regs *regs, unsigned long esr);
void do_serror(struct pt_regs *regs, unsigned long esr);
-void do_notify_resume(struct pt_regs *regs, unsigned long thread_flags);
+void do_signal(struct pt_regs *regs);
-void panic_bad_stack(struct pt_regs *regs, unsigned long esr, unsigned long far);
+void __noreturn panic_bad_stack(struct pt_regs *regs, unsigned long esr, unsigned long far);
#endif /* __ASM_EXCEPTION_H */
diff --git a/arch/arm64/include/asm/extable.h b/arch/arm64/include/asm/extable.h
index 72b0e71cc3de..9dc39612bdf5 100644
--- a/arch/arm64/include/asm/extable.h
+++ b/arch/arm64/include/asm/extable.h
@@ -33,6 +33,8 @@ do { \
(b)->data = (tmp).data; \
} while (0)
+bool insn_may_access_user(unsigned long addr, unsigned long esr);
+
#ifdef CONFIG_BPF_JIT
bool ex_handler_bpf(const struct exception_table_entry *ex,
struct pt_regs *regs);
@@ -45,5 +47,5 @@ bool ex_handler_bpf(const struct exception_table_entry *ex,
}
#endif /* !CONFIG_BPF_JIT */
-bool fixup_exception(struct pt_regs *regs);
+bool fixup_exception(struct pt_regs *regs, unsigned long esr);
#endif
diff --git a/arch/arm64/include/asm/fb.h b/arch/arm64/include/asm/fb.h
deleted file mode 100644
index bdc735ee1f67..000000000000
--- a/arch/arm64/include/asm/fb.h
+++ /dev/null
@@ -1,23 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0-only */
-/*
- * Copyright (C) 2012 ARM Ltd.
- */
-#ifndef __ASM_FB_H_
-#define __ASM_FB_H_
-
-#include <linux/fb.h>
-#include <linux/fs.h>
-#include <asm/page.h>
-
-static inline void fb_pgprotect(struct file *file, struct vm_area_struct *vma,
- unsigned long off)
-{
- vma->vm_page_prot = pgprot_writecombine(vma->vm_page_prot);
-}
-
-static inline int fb_is_primary_device(struct fb_info *info)
-{
- return 0;
-}
-
-#endif /* __ASM_FB_H_ */
diff --git a/arch/arm64/include/asm/fixmap.h b/arch/arm64/include/asm/fixmap.h
index 71ed5fdf718b..635a43c4ec85 100644
--- a/arch/arm64/include/asm/fixmap.h
+++ b/arch/arm64/include/asm/fixmap.h
@@ -17,6 +17,7 @@
#ifndef __ASSEMBLY__
#include <linux/kernel.h>
+#include <linux/math.h>
#include <linux/sizes.h>
#include <asm/boot.h>
#include <asm/page.h>
@@ -36,21 +37,23 @@ enum fixed_addresses {
FIX_HOLE,
/*
- * Reserve a virtual window for the FDT that is 2 MB larger than the
- * maximum supported size, and put it at the top of the fixmap region.
- * The additional space ensures that any FDT that does not exceed
- * MAX_FDT_SIZE can be mapped regardless of whether it crosses any
- * 2 MB alignment boundaries.
- *
- * Keep this at the top so it remains 2 MB aligned.
+ * Reserve a virtual window for the FDT that is a page bigger than the
+ * maximum supported size. The additional space ensures that any FDT
+ * that does not exceed MAX_FDT_SIZE can be mapped regardless of
+ * whether it crosses any page boundary.
*/
-#define FIX_FDT_SIZE (MAX_FDT_SIZE + SZ_2M)
FIX_FDT_END,
- FIX_FDT = FIX_FDT_END + FIX_FDT_SIZE / PAGE_SIZE - 1,
+ FIX_FDT = FIX_FDT_END + DIV_ROUND_UP(MAX_FDT_SIZE, PAGE_SIZE) + 1,
FIX_EARLYCON_MEM_BASE,
FIX_TEXT_POKE0,
+#ifdef CONFIG_KVM
+ /* One slot per CPU, mapping the guest's VNCR page at EL2. */
+ FIX_VNCR_END,
+ FIX_VNCR = FIX_VNCR_END + NR_CPUS,
+#endif
+
#ifdef CONFIG_ACPI_APEI_GHES
/* Used for GHES mapping from assorted contexts */
FIX_APEI_GHES_IRQ,
@@ -90,13 +93,16 @@ enum fixed_addresses {
FIX_PTE,
FIX_PMD,
FIX_PUD,
+ FIX_P4D,
FIX_PGD,
__end_of_fixed_addresses
};
-#define FIXADDR_SIZE (__end_of_permanent_fixed_addresses << PAGE_SHIFT)
-#define FIXADDR_START (FIXADDR_TOP - FIXADDR_SIZE)
+#define FIXADDR_SIZE (__end_of_permanent_fixed_addresses << PAGE_SHIFT)
+#define FIXADDR_START (FIXADDR_TOP - FIXADDR_SIZE)
+#define FIXADDR_TOT_SIZE (__end_of_fixed_addresses << PAGE_SHIFT)
+#define FIXADDR_TOT_START (FIXADDR_TOP - FIXADDR_TOT_SIZE)
#define FIXMAP_PAGE_IO __pgprot(PROT_DEVICE_nGnRE)
diff --git a/arch/arm64/include/asm/fpsimd.h b/arch/arm64/include/asm/fpsimd.h
index 9bb1873f5295..b8cf0ea43cc0 100644
--- a/arch/arm64/include/asm/fpsimd.h
+++ b/arch/arm64/include/asm/fpsimd.h
@@ -6,6 +6,7 @@
#define __ASM_FP_H
#include <asm/errno.h>
+#include <asm/percpu.h>
#include <asm/ptrace.h>
#include <asm/processor.h>
#include <asm/sigcontext.h>
@@ -21,7 +22,6 @@
#include <linux/stddef.h>
#include <linux/types.h>
-#ifdef CONFIG_COMPAT
/* Masks for extracting the FPSR and FPCR from the FPSCR */
#define VFP_FPSCR_STAT_MASK 0xf800009f
#define VFP_FPSCR_CTRL_MASK 0x07f79f00
@@ -30,19 +30,44 @@
* control/status register.
*/
#define VFP_STATE_SIZE ((32 * 8) + 4)
-#endif
+
+static inline unsigned long cpacr_save_enable_kernel_sve(void)
+{
+ unsigned long old = read_sysreg(cpacr_el1);
+ unsigned long set = CPACR_EL1_FPEN_EL1EN | CPACR_EL1_ZEN_EL1EN;
+
+ write_sysreg(old | set, cpacr_el1);
+ isb();
+ return old;
+}
+
+static inline unsigned long cpacr_save_enable_kernel_sme(void)
+{
+ unsigned long old = read_sysreg(cpacr_el1);
+ unsigned long set = CPACR_EL1_FPEN_EL1EN | CPACR_EL1_SMEN_EL1EN;
+
+ write_sysreg(old | set, cpacr_el1);
+ isb();
+ return old;
+}
+
+static inline void cpacr_restore(unsigned long cpacr)
+{
+ write_sysreg(cpacr, cpacr_el1);
+ isb();
+}
/*
* When we defined the maximum SVE vector length we defined the ABI so
* that the maximum vector length included all the reserved for future
* expansion bits in ZCR rather than those just currently defined by
- * the architecture. While SME follows a similar pattern the fact that
- * it includes a square matrix means that any allocations that attempt
- * to cover the maximum potential vector length (such as happen with
- * the regset used for ptrace) end up being extremely large. Define
- * the much lower actual limit for use in such situations.
+ * the architecture. Using this length to allocate worst size buffers
+ * results in excessively large allocations, and this effect is even
+ * more pronounced for SME due to ZA. Define more suitable VLs for
+ * these situations.
*/
-#define SME_VQ_MAX 16
+#define ARCH_SVE_VQ_MAX ((ZCR_ELx_LEN_MASK >> ZCR_ELx_LEN_SHIFT) + 1)
+#define SME_VQ_MAX ((SMCR_ELx_LEN_MASK >> SMCR_ELx_LEN_SHIFT) + 1)
struct task_struct;
@@ -52,17 +77,28 @@ extern void fpsimd_load_state(struct user_fpsimd_state *state);
extern void fpsimd_thread_switch(struct task_struct *next);
extern void fpsimd_flush_thread(void);
-extern void fpsimd_signal_preserve_current_state(void);
extern void fpsimd_preserve_current_state(void);
extern void fpsimd_restore_current_state(void);
extern void fpsimd_update_current_state(struct user_fpsimd_state const *state);
-extern void fpsimd_bind_state_to_cpu(struct user_fpsimd_state *state,
- void *sve_state, unsigned int sve_vl,
- void *za_state, unsigned int sme_vl,
- u64 *svcr);
+struct cpu_fp_state {
+ struct user_fpsimd_state *st;
+ void *sve_state;
+ void *sme_state;
+ u64 *svcr;
+ u64 *fpmr;
+ unsigned int sve_vl;
+ unsigned int sme_vl;
+ enum fp_type *fp_type;
+ enum fp_type to_save;
+};
+
+DECLARE_PER_CPU(struct cpu_fp_state, fpsimd_last_state);
+
+extern void fpsimd_bind_state_to_cpu(struct cpu_fp_state *fp_state);
extern void fpsimd_flush_task_state(struct task_struct *target);
+extern void fpsimd_save_and_flush_current_state(void);
extern void fpsimd_save_and_flush_cpu_state(void);
static inline bool thread_sm_enabled(struct thread_struct *thread)
@@ -75,6 +111,8 @@ static inline bool thread_za_enabled(struct thread_struct *thread)
return system_supports_sme() && (thread->svcr & SVCR_ZA_MASK);
}
+extern void task_smstop_sm(struct task_struct *task);
+
/* Maximum VL that SVE/SME VL-agnostic software can transparently support */
#define VL_ARCH_MAX 0x100
@@ -96,6 +134,13 @@ static inline void *sve_pffr(struct thread_struct *thread)
return (char *)thread->sve_state + sve_ffr_offset(vl);
}
+static inline void *thread_zt_state(struct thread_struct *thread)
+{
+ /* The ZT register state is stored immediately after the ZA state */
+ unsigned int sme_vq = sve_vq_from_vl(thread_get_sme_vl(thread));
+ return thread->sme_state + ZA_SIG_REGS_SIZE(sme_vq);
+}
+
extern void sve_save_state(void *state, u32 *pfpsr, int save_ffr);
extern void sve_load_state(void const *state, u32 const *pfpsr,
int restore_ffr);
@@ -103,16 +148,16 @@ extern void sve_flush_live(bool flush_ffr, unsigned long vq_minus_1);
extern unsigned int sve_get_vl(void);
extern void sve_set_vq(unsigned long vq_minus_1);
extern void sme_set_vq(unsigned long vq_minus_1);
-extern void za_save_state(void *state);
-extern void za_load_state(void const *state);
+extern void sme_save_state(void *state, int zt);
+extern void sme_load_state(void const *state, int zt);
struct arm64_cpu_capabilities;
-extern void sve_kernel_enable(const struct arm64_cpu_capabilities *__unused);
-extern void sme_kernel_enable(const struct arm64_cpu_capabilities *__unused);
-extern void fa64_kernel_enable(const struct arm64_cpu_capabilities *__unused);
-
-extern u64 read_zcr_features(void);
-extern u64 read_smcr_features(void);
+extern void cpu_enable_fpsimd(const struct arm64_cpu_capabilities *__unused);
+extern void cpu_enable_sve(const struct arm64_cpu_capabilities *__unused);
+extern void cpu_enable_sme(const struct arm64_cpu_capabilities *__unused);
+extern void cpu_enable_sme2(const struct arm64_cpu_capabilities *__unused);
+extern void cpu_enable_fa64(const struct arm64_cpu_capabilities *__unused);
+extern void cpu_enable_fpmr(const struct arm64_cpu_capabilities *__unused);
/*
* Helpers to translate bit indices in sve_vq_map to VQ values (and
@@ -153,12 +198,10 @@ struct vl_info {
#ifdef CONFIG_ARM64_SVE
-extern void sve_alloc(struct task_struct *task);
+extern void sve_alloc(struct task_struct *task, bool flush);
extern void fpsimd_release_task(struct task_struct *task);
-extern void fpsimd_sync_to_sve(struct task_struct *task);
-extern void fpsimd_force_sync_to_sve(struct task_struct *task);
-extern void sve_sync_to_fpsimd(struct task_struct *task);
-extern void sve_sync_from_fpsimd_zeropad(struct task_struct *task);
+extern void fpsimd_sync_from_effective_state(struct task_struct *task);
+extern void fpsimd_sync_to_effective_state_zeropad(struct task_struct *task);
extern int vec_set_vector_length(struct task_struct *task, enum vec_type type,
unsigned long vl, unsigned long flags);
@@ -252,14 +295,29 @@ static inline bool sve_vq_available(unsigned int vq)
return vq_available(ARM64_VEC_SVE, vq);
}
-size_t sve_state_size(struct task_struct const *task);
+static inline size_t __sve_state_size(unsigned int sve_vl, unsigned int sme_vl)
+{
+ unsigned int vl = max(sve_vl, sme_vl);
+ return SVE_SIG_REGS_SIZE(sve_vq_from_vl(vl));
+}
+
+/*
+ * Return how many bytes of memory are required to store the full SVE
+ * state for task, given task's currently configured vector length.
+ */
+static inline size_t sve_state_size(struct task_struct const *task)
+{
+ unsigned int sve_vl = task_get_sve_vl(task);
+ unsigned int sme_vl = task_get_sme_vl(task);
+ return __sve_state_size(sve_vl, sme_vl);
+}
#else /* ! CONFIG_ARM64_SVE */
-static inline void sve_alloc(struct task_struct *task) { }
+static inline void sve_alloc(struct task_struct *task, bool flush) { }
static inline void fpsimd_release_task(struct task_struct *task) { }
-static inline void sve_sync_to_fpsimd(struct task_struct *task) { }
-static inline void sve_sync_from_fpsimd_zeropad(struct task_struct *task) { }
+static inline void fpsimd_sync_from_effective_state(struct task_struct *task) { }
+static inline void fpsimd_sync_to_effective_state_zeropad(struct task_struct *task) { }
static inline int sve_max_virtualisable_vl(void)
{
@@ -293,6 +351,11 @@ static inline void vec_update_vq_map(enum vec_type t) { }
static inline int vec_verify_vq_map(enum vec_type t) { return 0; }
static inline void sve_setup(void) { }
+static inline size_t __sve_state_size(unsigned int sve_vl, unsigned int sme_vl)
+{
+ return 0;
+}
+
static inline size_t sve_state_size(struct task_struct const *task)
{
return 0;
@@ -339,21 +402,30 @@ static inline int sme_max_virtualisable_vl(void)
return vec_max_virtualisable_vl(ARM64_VEC_SME);
}
-extern void sme_alloc(struct task_struct *task);
+extern void sme_alloc(struct task_struct *task, bool flush);
extern unsigned int sme_get_vl(void);
extern int sme_set_current_vl(unsigned long arg);
extern int sme_get_current_vl(void);
+extern void sme_suspend_exit(void);
+
+static inline size_t __sme_state_size(unsigned int sme_vl)
+{
+ size_t size = ZA_SIG_REGS_SIZE(sve_vq_from_vl(sme_vl));
+
+ if (system_supports_sme2())
+ size += ZT_SIG_REG_SIZE;
+
+ return size;
+}
/*
* Return how many bytes of memory are required to store the full SME
- * specific state (currently just ZA) for task, given task's currently
- * configured vector length.
+ * specific state for task, given task's currently configured vector
+ * length.
*/
-static inline size_t za_state_size(struct task_struct const *task)
+static inline size_t sme_state_size(struct task_struct const *task)
{
- unsigned int vl = task_get_sme_vl(task);
-
- return ZA_SIG_REGS_SIZE(sve_vq_from_vl(vl));
+ return __sme_state_size(task_get_sme_vl(task));
}
#else
@@ -365,15 +437,21 @@ static inline void sme_smstart_sm(void) { }
static inline void sme_smstop_sm(void) { }
static inline void sme_smstop(void) { }
-static inline void sme_alloc(struct task_struct *task) { }
+static inline void sme_alloc(struct task_struct *task, bool flush) { }
static inline void sme_setup(void) { }
static inline unsigned int sme_get_vl(void) { return 0; }
static inline int sme_max_vl(void) { return 0; }
static inline int sme_max_virtualisable_vl(void) { return 0; }
static inline int sme_set_current_vl(unsigned long arg) { return -EINVAL; }
static inline int sme_get_current_vl(void) { return -EINVAL; }
+static inline void sme_suspend_exit(void) { }
+
+static inline size_t __sme_state_size(unsigned int sme_vl)
+{
+ return 0;
+}
-static inline size_t za_state_size(struct task_struct const *task)
+static inline size_t sme_state_size(struct task_struct const *task)
{
return 0;
}
diff --git a/arch/arm64/include/asm/fpsimdmacros.h b/arch/arm64/include/asm/fpsimdmacros.h
index 5e0910cf4832..cda81d009c9b 100644
--- a/arch/arm64/include/asm/fpsimdmacros.h
+++ b/arch/arm64/include/asm/fpsimdmacros.h
@@ -221,11 +221,25 @@
.endm
/*
- * Zero the entire ZA array
- * ZERO ZA
+ * LDR (ZT0)
+ *
+ * LDR ZT0, nx
+ */
+.macro _ldr_zt nx
+ _check_general_reg \nx
+ .inst 0xe11f8000 \
+ | (\nx << 5)
+.endm
+
+/*
+ * STR (ZT0)
+ *
+ * STR ZT0, nx
*/
-.macro zero_za
- .inst 0xc00800ff
+.macro _str_zt nx
+ _check_general_reg \nx
+ .inst 0xe13f8000 \
+ | (\nx << 5)
.endm
.macro __for from:req, to:req
@@ -294,12 +308,12 @@
_for n, 0, 15, _sve_str_p \n, \nxbase, \n - 16
cbz \save_ffr, 921f
_sve_rdffr 0
- _sve_str_p 0, \nxbase
- _sve_ldr_p 0, \nxbase, -16
b 922f
921:
- str xzr, [x\nxbase] // Zero out FFR
+ _sve_pfalse 0 // Zero out FFR
922:
+ _sve_str_p 0, \nxbase
+ _sve_ldr_p 0, \nxbase, -16
mrs x\nxtmp, fpsr
str w\nxtmp, [\xpfpsr]
mrs x\nxtmp, fpcr
diff --git a/arch/arm64/include/asm/fpu.h b/arch/arm64/include/asm/fpu.h
new file mode 100644
index 000000000000..2ae50bdce59b
--- /dev/null
+++ b/arch/arm64/include/asm/fpu.h
@@ -0,0 +1,15 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ * Copyright (C) 2023 SiFive
+ */
+
+#ifndef __ASM_FPU_H
+#define __ASM_FPU_H
+
+#include <asm/neon.h>
+
+#define kernel_fpu_available() cpu_has_neon()
+#define kernel_fpu_begin() kernel_neon_begin()
+#define kernel_fpu_end() kernel_neon_end()
+
+#endif /* ! __ASM_FPU_H */
diff --git a/arch/arm64/include/asm/ftrace.h b/arch/arm64/include/asm/ftrace.h
index dbc45a4157fa..bfe3ce9df197 100644
--- a/arch/arm64/include/asm/ftrace.h
+++ b/arch/arm64/include/asm/ftrace.h
@@ -12,29 +12,17 @@
#define HAVE_FUNCTION_GRAPH_FP_TEST
-/*
- * HAVE_FUNCTION_GRAPH_RET_ADDR_PTR means that the architecture can provide a
- * "return address pointer" which can be used to uniquely identify a return
- * address which has been overwritten.
- *
- * On arm64 we use the address of the caller's frame record, which remains the
- * same for the lifetime of the instrumented function, unlike the return
- * address in the LR.
- */
-#define HAVE_FUNCTION_GRAPH_RET_ADDR_PTR
-
-#ifdef CONFIG_DYNAMIC_FTRACE_WITH_REGS
+#ifdef CONFIG_DYNAMIC_FTRACE_WITH_ARGS
#define ARCH_SUPPORTS_FTRACE_OPS 1
#else
-#define MCOUNT_ADDR ((unsigned long)function_nocfi(_mcount))
+#define MCOUNT_ADDR ((unsigned long)_mcount)
#endif
/* The BL at the callsite's adjusted rec->ip */
#define MCOUNT_INSN_SIZE AARCH64_INSN_SIZE
#define FTRACE_PLT_IDX 0
-#define FTRACE_REGS_PLT_IDX 1
-#define NR_FTRACE_PLTS 2
+#define NR_FTRACE_PLTS 1
/*
* Currently, gcc tends to save the link register after the local variables
@@ -63,25 +51,119 @@ extern unsigned long ftrace_graph_call;
extern void return_to_handler(void);
-static inline unsigned long ftrace_call_adjust(unsigned long addr)
-{
- /*
- * Adjust addr to point at the BL in the callsite.
- * See ftrace_init_nop() for the callsite sequence.
- */
- if (IS_ENABLED(CONFIG_DYNAMIC_FTRACE_WITH_REGS))
- return addr + AARCH64_INSN_SIZE;
- /*
- * addr is the address of the mcount call instruction.
- * recordmcount does the necessary offset calculation.
- */
- return addr;
-}
+unsigned long ftrace_call_adjust(unsigned long addr);
+unsigned long arch_ftrace_get_symaddr(unsigned long fentry_ip);
+#define ftrace_get_symaddr(fentry_ip) arch_ftrace_get_symaddr(fentry_ip)
-#ifdef CONFIG_DYNAMIC_FTRACE_WITH_REGS
+#ifdef CONFIG_DYNAMIC_FTRACE_WITH_ARGS
+#define HAVE_ARCH_FTRACE_REGS
struct dyn_ftrace;
struct ftrace_ops;
struct ftrace_regs;
+#define arch_ftrace_regs(fregs) ((struct __arch_ftrace_regs *)(fregs))
+
+#define arch_ftrace_get_regs(regs) NULL
+
+/*
+ * Note: sizeof(struct ftrace_regs) must be a multiple of 16 to ensure correct
+ * stack alignment
+ */
+struct __arch_ftrace_regs {
+ /* x0 - x8 */
+ unsigned long regs[9];
+
+#ifdef CONFIG_DYNAMIC_FTRACE_WITH_DIRECT_CALLS
+ unsigned long direct_tramp;
+#else
+ unsigned long __unused;
+#endif
+
+ unsigned long fp;
+ unsigned long lr;
+
+ unsigned long sp;
+ unsigned long pc;
+};
+
+static __always_inline unsigned long
+ftrace_regs_get_instruction_pointer(const struct ftrace_regs *fregs)
+{
+ return arch_ftrace_regs(fregs)->pc;
+}
+
+static __always_inline void
+ftrace_regs_set_instruction_pointer(struct ftrace_regs *fregs,
+ unsigned long pc)
+{
+ arch_ftrace_regs(fregs)->pc = pc;
+}
+
+static __always_inline unsigned long
+ftrace_regs_get_stack_pointer(const struct ftrace_regs *fregs)
+{
+ return arch_ftrace_regs(fregs)->sp;
+}
+
+static __always_inline unsigned long
+ftrace_regs_get_argument(struct ftrace_regs *fregs, unsigned int n)
+{
+ if (n < 8)
+ return arch_ftrace_regs(fregs)->regs[n];
+ return 0;
+}
+
+static __always_inline unsigned long
+ftrace_regs_get_return_value(const struct ftrace_regs *fregs)
+{
+ return arch_ftrace_regs(fregs)->regs[0];
+}
+
+static __always_inline void
+ftrace_regs_set_return_value(struct ftrace_regs *fregs,
+ unsigned long ret)
+{
+ arch_ftrace_regs(fregs)->regs[0] = ret;
+}
+
+static __always_inline void
+ftrace_override_function_with_return(struct ftrace_regs *fregs)
+{
+ arch_ftrace_regs(fregs)->pc = arch_ftrace_regs(fregs)->lr;
+}
+
+static __always_inline unsigned long
+ftrace_regs_get_frame_pointer(const struct ftrace_regs *fregs)
+{
+ return arch_ftrace_regs(fregs)->fp;
+}
+
+static __always_inline unsigned long
+ftrace_regs_get_return_address(const struct ftrace_regs *fregs)
+{
+ return arch_ftrace_regs(fregs)->lr;
+}
+
+static __always_inline struct pt_regs *
+ftrace_partial_regs(const struct ftrace_regs *fregs, struct pt_regs *regs)
+{
+ struct __arch_ftrace_regs *afregs = arch_ftrace_regs(fregs);
+
+ memcpy(regs->regs, afregs->regs, sizeof(afregs->regs));
+ regs->sp = afregs->sp;
+ regs->pc = afregs->pc;
+ regs->regs[29] = afregs->fp;
+ regs->regs[30] = afregs->lr;
+ return regs;
+}
+
+#define arch_ftrace_fill_perf_regs(fregs, _regs) do { \
+ (_regs)->pc = arch_ftrace_regs(fregs)->pc; \
+ (_regs)->regs[29] = arch_ftrace_regs(fregs)->fp; \
+ (_regs)->sp = arch_ftrace_regs(fregs)->sp; \
+ (_regs)->pstate = PSR_MODE_EL1h; \
+ } while (0)
+
+int ftrace_regs_query_register_offset(const char *name);
int ftrace_init_nop(struct module *mod, struct dyn_ftrace *rec);
#define ftrace_init_nop ftrace_init_nop
@@ -89,6 +171,19 @@ int ftrace_init_nop(struct module *mod, struct dyn_ftrace *rec);
void ftrace_graph_func(unsigned long ip, unsigned long parent_ip,
struct ftrace_ops *op, struct ftrace_regs *fregs);
#define ftrace_graph_func ftrace_graph_func
+
+#ifdef CONFIG_DYNAMIC_FTRACE_WITH_DIRECT_CALLS
+static inline void arch_ftrace_set_direct_caller(struct ftrace_regs *fregs,
+ unsigned long addr)
+{
+ /*
+ * The ftrace trampoline will return to this address instead of the
+ * instrumented function.
+ */
+ arch_ftrace_regs(fregs)->direct_tramp = addr;
+}
+#endif /* CONFIG_DYNAMIC_FTRACE_WITH_DIRECT_CALLS */
+
#endif
#define ftrace_return_address(n) return_address(n)
@@ -123,4 +218,13 @@ static inline bool arch_syscall_match_sym_name(const char *sym,
}
#endif /* ifndef __ASSEMBLY__ */
+#ifndef __ASSEMBLY__
+#ifdef CONFIG_FUNCTION_GRAPH_TRACER
+
+void prepare_ftrace_return(unsigned long self_addr, unsigned long *parent,
+ unsigned long frame_pointer);
+
+#endif /* ifdef CONFIG_FUNCTION_GRAPH_TRACER */
+#endif
+
#endif /* __ASM_FTRACE_H */
diff --git a/arch/arm64/include/asm/gcs.h b/arch/arm64/include/asm/gcs.h
new file mode 100644
index 000000000000..5bc432234d3a
--- /dev/null
+++ b/arch/arm64/include/asm/gcs.h
@@ -0,0 +1,107 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ * Copyright (C) 2023 ARM Ltd.
+ */
+#ifndef __ASM_GCS_H
+#define __ASM_GCS_H
+
+#include <asm/types.h>
+#include <asm/uaccess.h>
+
+struct kernel_clone_args;
+struct ksignal;
+
+static inline void gcsb_dsync(void)
+{
+ asm volatile(".inst 0xd503227f" : : : "memory");
+}
+
+static inline void gcsstr(u64 *addr, u64 val)
+{
+ register u64 *_addr __asm__ ("x0") = addr;
+ register long _val __asm__ ("x1") = val;
+
+ /* GCSSTTR x1, x0 */
+ asm volatile(
+ ".inst 0xd91f1c01\n"
+ :
+ : "rZ" (_val), "r" (_addr)
+ : "memory");
+}
+
+static inline void gcsss1(u64 Xt)
+{
+ asm volatile (
+ "sys #3, C7, C7, #2, %0\n"
+ :
+ : "rZ" (Xt)
+ : "memory");
+}
+
+static inline u64 gcsss2(void)
+{
+ u64 Xt;
+
+ asm volatile(
+ "SYSL %0, #3, C7, C7, #3\n"
+ : "=r" (Xt)
+ :
+ : "memory");
+
+ return Xt;
+}
+
+#define PR_SHADOW_STACK_SUPPORTED_STATUS_MASK \
+ (PR_SHADOW_STACK_ENABLE | PR_SHADOW_STACK_WRITE | PR_SHADOW_STACK_PUSH)
+
+#ifdef CONFIG_ARM64_GCS
+
+static inline bool task_gcs_el0_enabled(struct task_struct *task)
+{
+ return task->thread.gcs_el0_mode & PR_SHADOW_STACK_ENABLE;
+}
+
+void gcs_set_el0_mode(struct task_struct *task);
+void gcs_free(struct task_struct *task);
+void gcs_preserve_current_state(void);
+unsigned long gcs_alloc_thread_stack(struct task_struct *tsk,
+ const struct kernel_clone_args *args);
+
+static inline int gcs_check_locked(struct task_struct *task,
+ unsigned long new_val)
+{
+ unsigned long cur_val = task->thread.gcs_el0_mode;
+
+ cur_val &= task->thread.gcs_el0_locked;
+ new_val &= task->thread.gcs_el0_locked;
+
+ if (cur_val != new_val)
+ return -EBUSY;
+
+ return 0;
+}
+
+#else
+
+static inline bool task_gcs_el0_enabled(struct task_struct *task)
+{
+ return false;
+}
+
+static inline void gcs_set_el0_mode(struct task_struct *task) { }
+static inline void gcs_free(struct task_struct *task) { }
+static inline void gcs_preserve_current_state(void) { }
+static inline unsigned long gcs_alloc_thread_stack(struct task_struct *tsk,
+ const struct kernel_clone_args *args)
+{
+ return -ENOTSUPP;
+}
+static inline int gcs_check_locked(struct task_struct *task,
+ unsigned long new_val)
+{
+ return 0;
+}
+
+#endif
+
+#endif
diff --git a/arch/arm64/include/asm/hardirq.h b/arch/arm64/include/asm/hardirq.h
index cbfa7b6f2e09..77d6b8c63d4e 100644
--- a/arch/arm64/include/asm/hardirq.h
+++ b/arch/arm64/include/asm/hardirq.h
@@ -41,7 +41,7 @@ do { \
\
___hcr = read_sysreg(hcr_el2); \
if (!(___hcr & HCR_TGE)) { \
- write_sysreg(___hcr | HCR_TGE, hcr_el2); \
+ write_sysreg_hcr(___hcr | HCR_TGE); \
isb(); \
} \
/* \
@@ -82,7 +82,7 @@ do { \
*/ \
barrier(); \
if (!___ctx->cnt && !(___hcr & HCR_TGE)) \
- write_sysreg(___hcr, hcr_el2); \
+ write_sysreg_hcr(___hcr); \
} while (0)
static inline void ack_bad_irq(unsigned int irq)
diff --git a/arch/arm64/include/asm/hugetlb.h b/arch/arm64/include/asm/hugetlb.h
index 1fd2846dbefe..2a8155c4a882 100644
--- a/arch/arm64/include/asm/hugetlb.h
+++ b/arch/arm64/include/asm/hugetlb.h
@@ -10,6 +10,8 @@
#ifndef __ASM_HUGETLB_H
#define __ASM_HUGETLB_H
+#include <asm/cacheflush.h>
+#include <asm/mte.h>
#include <asm/page.h>
#ifdef CONFIG_ARCH_ENABLE_HUGEPAGE_MIGRATION
@@ -17,24 +19,31 @@
extern bool arch_hugetlb_migration_supported(struct hstate *h);
#endif
-static inline void arch_clear_hugepage_flags(struct page *page)
+static inline void arch_clear_hugetlb_flags(struct folio *folio)
{
- clear_bit(PG_dcache_clean, &page->flags);
+ clear_bit(PG_dcache_clean, &folio->flags);
+
+#ifdef CONFIG_ARM64_MTE
+ if (system_supports_mte()) {
+ clear_bit(PG_mte_tagged, &folio->flags);
+ clear_bit(PG_mte_lock, &folio->flags);
+ }
+#endif
}
-#define arch_clear_hugepage_flags arch_clear_hugepage_flags
+#define arch_clear_hugetlb_flags arch_clear_hugetlb_flags
pte_t arch_make_huge_pte(pte_t entry, unsigned int shift, vm_flags_t flags);
#define arch_make_huge_pte arch_make_huge_pte
#define __HAVE_ARCH_HUGE_SET_HUGE_PTE_AT
extern void set_huge_pte_at(struct mm_struct *mm, unsigned long addr,
- pte_t *ptep, pte_t pte);
+ pte_t *ptep, pte_t pte, unsigned long sz);
#define __HAVE_ARCH_HUGE_PTEP_SET_ACCESS_FLAGS
extern int huge_ptep_set_access_flags(struct vm_area_struct *vma,
unsigned long addr, pte_t *ptep,
pte_t pte, int dirty);
#define __HAVE_ARCH_HUGE_PTEP_GET_AND_CLEAR
-extern pte_t huge_ptep_get_and_clear(struct mm_struct *mm,
- unsigned long addr, pte_t *ptep);
+extern pte_t huge_ptep_get_and_clear(struct mm_struct *mm, unsigned long addr,
+ pte_t *ptep, unsigned long sz);
#define __HAVE_ARCH_HUGE_PTEP_SET_WRPROTECT
extern void huge_ptep_set_wrprotect(struct mm_struct *mm,
unsigned long addr, pte_t *ptep);
@@ -45,13 +54,53 @@ extern pte_t huge_ptep_clear_flush(struct vm_area_struct *vma,
extern void huge_pte_clear(struct mm_struct *mm, unsigned long addr,
pte_t *ptep, unsigned long sz);
#define __HAVE_ARCH_HUGE_PTEP_GET
-extern pte_t huge_ptep_get(pte_t *ptep);
-extern void set_huge_swap_pte_at(struct mm_struct *mm, unsigned long addr,
- pte_t *ptep, pte_t pte, unsigned long sz);
-#define set_huge_swap_pte_at set_huge_swap_pte_at
+extern pte_t huge_ptep_get(struct mm_struct *mm, unsigned long addr, pte_t *ptep);
void __init arm64_hugetlb_cma_reserve(void);
+#define huge_ptep_modify_prot_start huge_ptep_modify_prot_start
+extern pte_t huge_ptep_modify_prot_start(struct vm_area_struct *vma,
+ unsigned long addr, pte_t *ptep);
+
+#define huge_ptep_modify_prot_commit huge_ptep_modify_prot_commit
+extern void huge_ptep_modify_prot_commit(struct vm_area_struct *vma,
+ unsigned long addr, pte_t *ptep,
+ pte_t old_pte, pte_t new_pte);
+
#include <asm-generic/hugetlb.h>
+static inline void __flush_hugetlb_tlb_range(struct vm_area_struct *vma,
+ unsigned long start,
+ unsigned long end,
+ unsigned long stride,
+ bool last_level)
+{
+ switch (stride) {
+#ifndef __PAGETABLE_PMD_FOLDED
+ case PUD_SIZE:
+ __flush_tlb_range(vma, start, end, PUD_SIZE, last_level, 1);
+ break;
+#endif
+ case CONT_PMD_SIZE:
+ case PMD_SIZE:
+ __flush_tlb_range(vma, start, end, PMD_SIZE, last_level, 2);
+ break;
+ case CONT_PTE_SIZE:
+ __flush_tlb_range(vma, start, end, PAGE_SIZE, last_level, 3);
+ break;
+ default:
+ __flush_tlb_range(vma, start, end, PAGE_SIZE, last_level, TLBI_TTL_UNKNOWN);
+ }
+}
+
+#define __HAVE_ARCH_FLUSH_HUGETLB_TLB_RANGE
+static inline void flush_hugetlb_tlb_range(struct vm_area_struct *vma,
+ unsigned long start,
+ unsigned long end)
+{
+ unsigned long stride = huge_page_size(hstate_vma(vma));
+
+ __flush_hugetlb_tlb_range(vma, start, end, stride, false);
+}
+
#endif /* __ASM_HUGETLB_H */
diff --git a/arch/arm64/include/asm/hw_breakpoint.h b/arch/arm64/include/asm/hw_breakpoint.h
index bc7aaed4b34e..bd81cf17744a 100644
--- a/arch/arm64/include/asm/hw_breakpoint.h
+++ b/arch/arm64/include/asm/hw_breakpoint.h
@@ -59,7 +59,6 @@ static inline void decode_ctrl_reg(u32 reg,
/* Watchpoints */
#define ARM_BREAKPOINT_LOAD 1
#define ARM_BREAKPOINT_STORE 2
-#define AARCH64_ESR_ACCESS_MASK (1 << 6)
/* Lengths */
#define ARM_BREAKPOINT_LEN_1 0x1
@@ -142,7 +141,7 @@ static inline int get_num_brps(void)
u64 dfr0 = read_sanitised_ftr_reg(SYS_ID_AA64DFR0_EL1);
return 1 +
cpuid_feature_extract_unsigned_field(dfr0,
- ID_AA64DFR0_BRPS_SHIFT);
+ ID_AA64DFR0_EL1_BRPs_SHIFT);
}
/* Determine number of WRP registers available. */
@@ -151,7 +150,15 @@ static inline int get_num_wrps(void)
u64 dfr0 = read_sanitised_ftr_reg(SYS_ID_AA64DFR0_EL1);
return 1 +
cpuid_feature_extract_unsigned_field(dfr0,
- ID_AA64DFR0_WRPS_SHIFT);
+ ID_AA64DFR0_EL1_WRPs_SHIFT);
}
+#ifdef CONFIG_CPU_PM
+extern void cpu_suspend_set_dbg_restorer(int (*hw_bp_restore)(unsigned int));
+#else
+static inline void cpu_suspend_set_dbg_restorer(int (*hw_bp_restore)(unsigned int))
+{
+}
+#endif
+
#endif /* __ASM_BREAKPOINT_H */
diff --git a/arch/arm64/include/asm/hwcap.h b/arch/arm64/include/asm/hwcap.h
index cef4ae7a3d8b..13f94c8ddfc0 100644
--- a/arch/arm64/include/asm/hwcap.h
+++ b/arch/arm64/include/asm/hwcap.h
@@ -31,12 +31,20 @@
#define COMPAT_HWCAP_VFPD32 (1 << 19)
#define COMPAT_HWCAP_LPAE (1 << 20)
#define COMPAT_HWCAP_EVTSTRM (1 << 21)
+#define COMPAT_HWCAP_FPHP (1 << 22)
+#define COMPAT_HWCAP_ASIMDHP (1 << 23)
+#define COMPAT_HWCAP_ASIMDDP (1 << 24)
+#define COMPAT_HWCAP_ASIMDFHM (1 << 25)
+#define COMPAT_HWCAP_ASIMDBF16 (1 << 26)
+#define COMPAT_HWCAP_I8MM (1 << 27)
#define COMPAT_HWCAP2_AES (1 << 0)
#define COMPAT_HWCAP2_PMULL (1 << 1)
#define COMPAT_HWCAP2_SHA1 (1 << 2)
#define COMPAT_HWCAP2_SHA2 (1 << 3)
#define COMPAT_HWCAP2_CRC32 (1 << 4)
+#define COMPAT_HWCAP2_SB (1 << 5)
+#define COMPAT_HWCAP2_SSBS (1 << 6)
#ifndef __ASSEMBLY__
#include <linux/log2.h>
@@ -84,6 +92,22 @@
#define KERNEL_HWCAP_SB __khwcap_feature(SB)
#define KERNEL_HWCAP_PACA __khwcap_feature(PACA)
#define KERNEL_HWCAP_PACG __khwcap_feature(PACG)
+#define KERNEL_HWCAP_GCS __khwcap_feature(GCS)
+#define KERNEL_HWCAP_CMPBR __khwcap_feature(CMPBR)
+#define KERNEL_HWCAP_FPRCVT __khwcap_feature(FPRCVT)
+#define KERNEL_HWCAP_F8MM8 __khwcap_feature(F8MM8)
+#define KERNEL_HWCAP_F8MM4 __khwcap_feature(F8MM4)
+#define KERNEL_HWCAP_SVE_F16MM __khwcap_feature(SVE_F16MM)
+#define KERNEL_HWCAP_SVE_ELTPERM __khwcap_feature(SVE_ELTPERM)
+#define KERNEL_HWCAP_SVE_AES2 __khwcap_feature(SVE_AES2)
+#define KERNEL_HWCAP_SVE_BFSCALE __khwcap_feature(SVE_BFSCALE)
+#define KERNEL_HWCAP_SVE2P2 __khwcap_feature(SVE2P2)
+#define KERNEL_HWCAP_SME2P2 __khwcap_feature(SME2P2)
+#define KERNEL_HWCAP_SME_SBITPERM __khwcap_feature(SME_SBITPERM)
+#define KERNEL_HWCAP_SME_AES __khwcap_feature(SME_AES)
+#define KERNEL_HWCAP_SME_SFEXPA __khwcap_feature(SME_SFEXPA)
+#define KERNEL_HWCAP_SME_STMOP __khwcap_feature(SME_STMOP)
+#define KERNEL_HWCAP_SME_SMOP4 __khwcap_feature(SME_SMOP4)
#define __khwcap2_feature(x) (const_ilog2(HWCAP2_ ## x) + 64)
#define KERNEL_HWCAP_DCPODP __khwcap2_feature(DCPODP)
@@ -119,6 +143,41 @@
#define KERNEL_HWCAP_SME_FA64 __khwcap2_feature(SME_FA64)
#define KERNEL_HWCAP_WFXT __khwcap2_feature(WFXT)
#define KERNEL_HWCAP_EBF16 __khwcap2_feature(EBF16)
+#define KERNEL_HWCAP_SVE_EBF16 __khwcap2_feature(SVE_EBF16)
+#define KERNEL_HWCAP_CSSC __khwcap2_feature(CSSC)
+#define KERNEL_HWCAP_RPRFM __khwcap2_feature(RPRFM)
+#define KERNEL_HWCAP_SVE2P1 __khwcap2_feature(SVE2P1)
+#define KERNEL_HWCAP_SME2 __khwcap2_feature(SME2)
+#define KERNEL_HWCAP_SME2P1 __khwcap2_feature(SME2P1)
+#define KERNEL_HWCAP_SME_I16I32 __khwcap2_feature(SME_I16I32)
+#define KERNEL_HWCAP_SME_BI32I32 __khwcap2_feature(SME_BI32I32)
+#define KERNEL_HWCAP_SME_B16B16 __khwcap2_feature(SME_B16B16)
+#define KERNEL_HWCAP_SME_F16F16 __khwcap2_feature(SME_F16F16)
+#define KERNEL_HWCAP_MOPS __khwcap2_feature(MOPS)
+#define KERNEL_HWCAP_HBC __khwcap2_feature(HBC)
+#define KERNEL_HWCAP_SVE_B16B16 __khwcap2_feature(SVE_B16B16)
+#define KERNEL_HWCAP_LRCPC3 __khwcap2_feature(LRCPC3)
+#define KERNEL_HWCAP_LSE128 __khwcap2_feature(LSE128)
+#define KERNEL_HWCAP_FPMR __khwcap2_feature(FPMR)
+#define KERNEL_HWCAP_LUT __khwcap2_feature(LUT)
+#define KERNEL_HWCAP_FAMINMAX __khwcap2_feature(FAMINMAX)
+#define KERNEL_HWCAP_F8CVT __khwcap2_feature(F8CVT)
+#define KERNEL_HWCAP_F8FMA __khwcap2_feature(F8FMA)
+#define KERNEL_HWCAP_F8DP4 __khwcap2_feature(F8DP4)
+#define KERNEL_HWCAP_F8DP2 __khwcap2_feature(F8DP2)
+#define KERNEL_HWCAP_F8E4M3 __khwcap2_feature(F8E4M3)
+#define KERNEL_HWCAP_F8E5M2 __khwcap2_feature(F8E5M2)
+#define KERNEL_HWCAP_SME_LUTV2 __khwcap2_feature(SME_LUTV2)
+#define KERNEL_HWCAP_SME_F8F16 __khwcap2_feature(SME_F8F16)
+#define KERNEL_HWCAP_SME_F8F32 __khwcap2_feature(SME_F8F32)
+#define KERNEL_HWCAP_SME_SF8FMA __khwcap2_feature(SME_SF8FMA)
+#define KERNEL_HWCAP_SME_SF8DP4 __khwcap2_feature(SME_SF8DP4)
+#define KERNEL_HWCAP_SME_SF8DP2 __khwcap2_feature(SME_SF8DP2)
+#define KERNEL_HWCAP_POE __khwcap2_feature(POE)
+
+#define __khwcap3_feature(x) (const_ilog2(HWCAP3_ ## x) + 128)
+#define KERNEL_HWCAP_MTE_FAR __khwcap3_feature(MTE_FAR)
+#define KERNEL_HWCAP_MTE_STORE_ONLY __khwcap3_feature(MTE_STORE_ONLY)
/*
* This yields a mask that user programs can use to figure out what
@@ -126,11 +185,13 @@
*/
#define ELF_HWCAP cpu_get_elf_hwcap()
#define ELF_HWCAP2 cpu_get_elf_hwcap2()
+#define ELF_HWCAP3 cpu_get_elf_hwcap3()
#ifdef CONFIG_COMPAT
#define COMPAT_ELF_HWCAP (compat_elf_hwcap)
#define COMPAT_ELF_HWCAP2 (compat_elf_hwcap2)
-extern unsigned int compat_elf_hwcap, compat_elf_hwcap2;
+#define COMPAT_ELF_HWCAP3 (compat_elf_hwcap3)
+extern unsigned int compat_elf_hwcap, compat_elf_hwcap2, compat_elf_hwcap3;
#endif
enum {
diff --git a/arch/arm64/include/asm/hyperv-tlfs.h b/arch/arm64/include/asm/hyperv-tlfs.h
deleted file mode 100644
index bc6c7ac934a1..000000000000
--- a/arch/arm64/include/asm/hyperv-tlfs.h
+++ /dev/null
@@ -1,78 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-
-/*
- * This file contains definitions from the Hyper-V Hypervisor Top-Level
- * Functional Specification (TLFS):
- * https://docs.microsoft.com/en-us/virtualization/hyper-v-on-windows/reference/tlfs
- *
- * Copyright (C) 2021, Microsoft, Inc.
- *
- * Author : Michael Kelley <mikelley@microsoft.com>
- */
-
-#ifndef _ASM_HYPERV_TLFS_H
-#define _ASM_HYPERV_TLFS_H
-
-#include <linux/types.h>
-
-/*
- * All data structures defined in the TLFS that are shared between Hyper-V
- * and a guest VM use Little Endian byte ordering. This matches the default
- * byte ordering of Linux running on ARM64, so no special handling is required.
- */
-
-/*
- * These Hyper-V registers provide information equivalent to the CPUID
- * instruction on x86/x64.
- */
-#define HV_REGISTER_HYPERVISOR_VERSION 0x00000100 /*CPUID 0x40000002 */
-#define HV_REGISTER_FEATURES 0x00000200 /*CPUID 0x40000003 */
-#define HV_REGISTER_ENLIGHTENMENTS 0x00000201 /*CPUID 0x40000004 */
-
-/*
- * Group C Features. See the asm-generic version of hyperv-tlfs.h
- * for a description of Feature Groups.
- */
-
-/* Crash MSRs available */
-#define HV_FEATURE_GUEST_CRASH_MSR_AVAILABLE BIT(8)
-
-/* STIMER direct mode is available */
-#define HV_STIMER_DIRECT_MODE_AVAILABLE BIT(13)
-
-/*
- * Synthetic register definitions equivalent to MSRs on x86/x64
- */
-#define HV_REGISTER_CRASH_P0 0x00000210
-#define HV_REGISTER_CRASH_P1 0x00000211
-#define HV_REGISTER_CRASH_P2 0x00000212
-#define HV_REGISTER_CRASH_P3 0x00000213
-#define HV_REGISTER_CRASH_P4 0x00000214
-#define HV_REGISTER_CRASH_CTL 0x00000215
-
-#define HV_REGISTER_GUEST_OSID 0x00090002
-#define HV_REGISTER_VP_INDEX 0x00090003
-#define HV_REGISTER_TIME_REF_COUNT 0x00090004
-#define HV_REGISTER_REFERENCE_TSC 0x00090017
-
-#define HV_REGISTER_SINT0 0x000A0000
-#define HV_REGISTER_SCONTROL 0x000A0010
-#define HV_REGISTER_SIEFP 0x000A0012
-#define HV_REGISTER_SIMP 0x000A0013
-#define HV_REGISTER_EOM 0x000A0014
-
-#define HV_REGISTER_STIMER0_CONFIG 0x000B0000
-#define HV_REGISTER_STIMER0_COUNT 0x000B0001
-
-union hv_msi_entry {
- u64 as_uint64[2];
- struct {
- u64 address;
- u32 data;
- u32 reserved;
- } __packed;
-};
-
-#include <asm-generic/hyperv-tlfs.h>
-
-#endif
diff --git a/arch/arm64/include/asm/hypervisor.h b/arch/arm64/include/asm/hypervisor.h
index 0ae427f352c8..a12fd897c877 100644
--- a/arch/arm64/include/asm/hypervisor.h
+++ b/arch/arm64/include/asm/hypervisor.h
@@ -6,5 +6,17 @@
void kvm_init_hyp_services(void);
bool kvm_arm_hyp_service_available(u32 func_id);
+void kvm_arm_target_impl_cpu_init(void);
+
+#ifdef CONFIG_ARM_PKVM_GUEST
+void pkvm_init_hyp_services(void);
+#else
+static inline void pkvm_init_hyp_services(void) { };
+#endif
+
+static inline void kvm_arch_init_hyp_services(void)
+{
+ pkvm_init_hyp_services();
+};
#endif
diff --git a/arch/arm64/include/asm/image.h b/arch/arm64/include/asm/image.h
index c2b13213c720..c09cf942dc92 100644
--- a/arch/arm64/include/asm/image.h
+++ b/arch/arm64/include/asm/image.h
@@ -27,7 +27,7 @@
/*
* struct arm64_image_header - arm64 kernel image header
- * See Documentation/arm64/booting.rst for details
+ * See Documentation/arch/arm64/booting.rst for details
*
* @code0: Executable code, or
* @mz_header alternatively used for part of MZ header
diff --git a/arch/arm64/include/asm/insn.h b/arch/arm64/include/asm/insn.h
index 834bff720582..18c7811774d3 100644
--- a/arch/arm64/include/asm/insn.h
+++ b/arch/arm64/include/asm/insn.h
@@ -13,31 +13,6 @@
#include <asm/insn-def.h>
#ifndef __ASSEMBLY__
-/*
- * ARM Architecture Reference Manual for ARMv8 Profile-A, Issue A.a
- * Section C3.1 "A64 instruction index by encoding":
- * AArch64 main encoding table
- * Bit position
- * 28 27 26 25 Encoding Group
- * 0 0 - - Unallocated
- * 1 0 0 - Data processing, immediate
- * 1 0 1 - Branch, exception generation and system instructions
- * - 1 - 0 Loads and stores
- * - 1 0 1 Data processing - register
- * 0 1 1 1 Data processing - SIMD and floating point
- * 1 1 1 1 Data processing - SIMD and floating point
- * "-" means "don't care"
- */
-enum aarch64_insn_encoding_class {
- AARCH64_INSN_CLS_UNKNOWN, /* UNALLOCATED */
- AARCH64_INSN_CLS_SVE, /* SVE instructions */
- AARCH64_INSN_CLS_DP_IMM, /* Data processing - immediate */
- AARCH64_INSN_CLS_DP_REG, /* Data processing - register */
- AARCH64_INSN_CLS_DP_FPSIMD, /* Data processing - SIMD and FP */
- AARCH64_INSN_CLS_LDST, /* Loads and stores */
- AARCH64_INSN_CLS_BR_SYS, /* Branch, exception generation and
- * system instructions */
-};
enum aarch64_insn_hint_cr_op {
AARCH64_INSN_HINT_NOP = 0x0 << 5,
@@ -160,6 +135,12 @@ enum aarch64_insn_special_register {
AARCH64_INSN_SPCLREG_SP_EL2 = 0xF210
};
+enum aarch64_insn_system_register {
+ AARCH64_INSN_SYSREG_TPIDR_EL1 = 0x4684,
+ AARCH64_INSN_SYSREG_TPIDR_EL2 = 0x6682,
+ AARCH64_INSN_SYSREG_SP_EL0 = 0x4208,
+};
+
enum aarch64_insn_variant {
AARCH64_INSN_VARIANT_32BIT,
AARCH64_INSN_VARIANT_64BIT
@@ -207,10 +188,14 @@ enum aarch64_insn_ldst_type {
AARCH64_INSN_LDST_STORE_PAIR_PRE_INDEX,
AARCH64_INSN_LDST_LOAD_PAIR_POST_INDEX,
AARCH64_INSN_LDST_STORE_PAIR_POST_INDEX,
+ AARCH64_INSN_LDST_LOAD_ACQ,
AARCH64_INSN_LDST_LOAD_EX,
AARCH64_INSN_LDST_LOAD_ACQ_EX,
+ AARCH64_INSN_LDST_STORE_REL,
AARCH64_INSN_LDST_STORE_EX,
AARCH64_INSN_LDST_STORE_REL_EX,
+ AARCH64_INSN_LDST_SIGNED_LOAD_IMM_OFFSET,
+ AARCH64_INSN_LDST_SIGNED_LOAD_REG_OFFSET,
};
enum aarch64_insn_adsb_type {
@@ -326,12 +311,30 @@ static __always_inline u32 aarch64_insn_get_##abbr##_value(void) \
return (val); \
}
+/*
+ * ARM Architecture Reference Manual for ARMv8 Profile-A, Issue A.a
+ * Section C3.1 "A64 instruction index by encoding":
+ * AArch64 main encoding table
+ * Bit position
+ * 28 27 26 25 Encoding Group
+ * 0 0 - - Unallocated
+ * 1 0 0 - Data processing, immediate
+ * 1 0 1 - Branch, exception generation and system instructions
+ * - 1 - 0 Loads and stores
+ * - 1 0 1 Data processing - register
+ * 0 1 1 1 Data processing - SIMD and floating point
+ * 1 1 1 1 Data processing - SIMD and floating point
+ * "-" means "don't care"
+ */
+__AARCH64_INSN_FUNCS(class_branch_sys, 0x1c000000, 0x14000000)
+
__AARCH64_INSN_FUNCS(adr, 0x9F000000, 0x10000000)
__AARCH64_INSN_FUNCS(adrp, 0x9F000000, 0x90000000)
__AARCH64_INSN_FUNCS(prfm, 0x3FC00000, 0x39800000)
__AARCH64_INSN_FUNCS(prfm_lit, 0xFF000000, 0xD8000000)
__AARCH64_INSN_FUNCS(store_imm, 0x3FC00000, 0x39000000)
__AARCH64_INSN_FUNCS(load_imm, 0x3FC00000, 0x39400000)
+__AARCH64_INSN_FUNCS(signed_load_imm, 0X3FC00000, 0x39800000)
__AARCH64_INSN_FUNCS(store_pre, 0x3FE00C00, 0x38000C00)
__AARCH64_INSN_FUNCS(load_pre, 0x3FE00C00, 0x38400C00)
__AARCH64_INSN_FUNCS(store_post, 0x3FE00C00, 0x38000400)
@@ -345,12 +348,16 @@ __AARCH64_INSN_FUNCS(ldset, 0x3F20FC00, 0x38203000)
__AARCH64_INSN_FUNCS(swp, 0x3F20FC00, 0x38208000)
__AARCH64_INSN_FUNCS(cas, 0x3FA07C00, 0x08A07C00)
__AARCH64_INSN_FUNCS(ldr_reg, 0x3FE0EC00, 0x38606800)
+__AARCH64_INSN_FUNCS(signed_ldr_reg, 0X3FE0FC00, 0x38A0E800)
__AARCH64_INSN_FUNCS(ldr_imm, 0x3FC00000, 0x39400000)
__AARCH64_INSN_FUNCS(ldr_lit, 0xBF000000, 0x18000000)
__AARCH64_INSN_FUNCS(ldrsw_lit, 0xFF000000, 0x98000000)
__AARCH64_INSN_FUNCS(exclusive, 0x3F800000, 0x08000000)
-__AARCH64_INSN_FUNCS(load_ex, 0x3F400000, 0x08400000)
-__AARCH64_INSN_FUNCS(store_ex, 0x3F400000, 0x08000000)
+__AARCH64_INSN_FUNCS(load_acq, 0x3FDFFC00, 0x08DFFC00)
+__AARCH64_INSN_FUNCS(store_rel, 0x3FDFFC00, 0x089FFC00)
+__AARCH64_INSN_FUNCS(load_ex, 0x3FC00000, 0x08400000)
+__AARCH64_INSN_FUNCS(store_ex, 0x3FC00000, 0x08000000)
+__AARCH64_INSN_FUNCS(mops, 0x3B200C00, 0x19000400)
__AARCH64_INSN_FUNCS(stp, 0x7FC00000, 0x29000000)
__AARCH64_INSN_FUNCS(ldp, 0x7FC00000, 0x29400000)
__AARCH64_INSN_FUNCS(stp_post, 0x7FC00000, 0x28800000)
@@ -428,61 +435,126 @@ __AARCH64_INSN_FUNCS(sb, 0xFFFFFFFF, 0xD50330FF)
__AARCH64_INSN_FUNCS(clrex, 0xFFFFF0FF, 0xD503305F)
__AARCH64_INSN_FUNCS(ssbb, 0xFFFFFFFF, 0xD503309F)
__AARCH64_INSN_FUNCS(pssbb, 0xFFFFFFFF, 0xD503349F)
+__AARCH64_INSN_FUNCS(bti, 0xFFFFFF3F, 0xD503241f)
#undef __AARCH64_INSN_FUNCS
-bool aarch64_insn_is_steppable_hint(u32 insn);
-bool aarch64_insn_is_branch_imm(u32 insn);
+static __always_inline bool aarch64_insn_is_steppable_hint(u32 insn)
+{
+ if (!aarch64_insn_is_hint(insn))
+ return false;
+
+ switch (insn & 0xFE0) {
+ case AARCH64_INSN_HINT_XPACLRI:
+ case AARCH64_INSN_HINT_PACIA_1716:
+ case AARCH64_INSN_HINT_PACIB_1716:
+ case AARCH64_INSN_HINT_PACIAZ:
+ case AARCH64_INSN_HINT_PACIASP:
+ case AARCH64_INSN_HINT_PACIBZ:
+ case AARCH64_INSN_HINT_PACIBSP:
+ case AARCH64_INSN_HINT_BTI:
+ case AARCH64_INSN_HINT_BTIC:
+ case AARCH64_INSN_HINT_BTIJ:
+ case AARCH64_INSN_HINT_BTIJC:
+ case AARCH64_INSN_HINT_NOP:
+ return true;
+ default:
+ return false;
+ }
+}
+
+static __always_inline bool aarch64_insn_is_branch(u32 insn)
+{
+ /* b, bl, cb*, tb*, ret*, b.cond, br*, blr* */
+
+ return aarch64_insn_is_b(insn) ||
+ aarch64_insn_is_bl(insn) ||
+ aarch64_insn_is_cbz(insn) ||
+ aarch64_insn_is_cbnz(insn) ||
+ aarch64_insn_is_tbz(insn) ||
+ aarch64_insn_is_tbnz(insn) ||
+ aarch64_insn_is_ret(insn) ||
+ aarch64_insn_is_ret_auth(insn) ||
+ aarch64_insn_is_br(insn) ||
+ aarch64_insn_is_br_auth(insn) ||
+ aarch64_insn_is_blr(insn) ||
+ aarch64_insn_is_blr_auth(insn) ||
+ aarch64_insn_is_bcond(insn);
+}
+
+static __always_inline bool aarch64_insn_is_branch_imm(u32 insn)
+{
+ return aarch64_insn_is_b(insn) ||
+ aarch64_insn_is_bl(insn) ||
+ aarch64_insn_is_tbz(insn) ||
+ aarch64_insn_is_tbnz(insn) ||
+ aarch64_insn_is_cbz(insn) ||
+ aarch64_insn_is_cbnz(insn) ||
+ aarch64_insn_is_bcond(insn);
+}
-static inline bool aarch64_insn_is_adr_adrp(u32 insn)
+static __always_inline bool aarch64_insn_is_adr_adrp(u32 insn)
{
- return aarch64_insn_is_adr(insn) || aarch64_insn_is_adrp(insn);
+ return aarch64_insn_is_adr(insn) ||
+ aarch64_insn_is_adrp(insn);
}
-static inline bool aarch64_insn_is_dsb(u32 insn)
+static __always_inline bool aarch64_insn_is_dsb(u32 insn)
{
- return aarch64_insn_is_dsb_base(insn) || aarch64_insn_is_dsb_nxs(insn);
+ return aarch64_insn_is_dsb_base(insn) ||
+ aarch64_insn_is_dsb_nxs(insn);
}
-static inline bool aarch64_insn_is_barrier(u32 insn)
+static __always_inline bool aarch64_insn_is_barrier(u32 insn)
{
- return aarch64_insn_is_dmb(insn) || aarch64_insn_is_dsb(insn) ||
- aarch64_insn_is_isb(insn) || aarch64_insn_is_sb(insn) ||
- aarch64_insn_is_clrex(insn) || aarch64_insn_is_ssbb(insn) ||
+ return aarch64_insn_is_dmb(insn) ||
+ aarch64_insn_is_dsb(insn) ||
+ aarch64_insn_is_isb(insn) ||
+ aarch64_insn_is_sb(insn) ||
+ aarch64_insn_is_clrex(insn) ||
+ aarch64_insn_is_ssbb(insn) ||
aarch64_insn_is_pssbb(insn);
}
-static inline bool aarch64_insn_is_store_single(u32 insn)
+static __always_inline bool aarch64_insn_is_store_single(u32 insn)
{
return aarch64_insn_is_store_imm(insn) ||
aarch64_insn_is_store_pre(insn) ||
aarch64_insn_is_store_post(insn);
}
-static inline bool aarch64_insn_is_store_pair(u32 insn)
+static __always_inline bool aarch64_insn_is_store_pair(u32 insn)
{
return aarch64_insn_is_stp(insn) ||
aarch64_insn_is_stp_pre(insn) ||
aarch64_insn_is_stp_post(insn);
}
-static inline bool aarch64_insn_is_load_single(u32 insn)
+static __always_inline bool aarch64_insn_is_load_single(u32 insn)
{
return aarch64_insn_is_load_imm(insn) ||
aarch64_insn_is_load_pre(insn) ||
aarch64_insn_is_load_post(insn);
}
-static inline bool aarch64_insn_is_load_pair(u32 insn)
+static __always_inline bool aarch64_insn_is_load_pair(u32 insn)
{
return aarch64_insn_is_ldp(insn) ||
aarch64_insn_is_ldp_pre(insn) ||
aarch64_insn_is_ldp_post(insn);
}
+static __always_inline bool aarch64_insn_uses_literal(u32 insn)
+{
+ /* ldr/ldrsw (literal), prfm */
+
+ return aarch64_insn_is_ldr_lit(insn) ||
+ aarch64_insn_is_ldrsw_lit(insn) ||
+ aarch64_insn_is_adr_adrp(insn) ||
+ aarch64_insn_is_prfm_lit(insn);
+}
+
enum aarch64_insn_encoding_class aarch64_get_insn_class(u32 insn);
-bool aarch64_insn_uses_literal(u32 insn);
-bool aarch64_insn_is_branch(u32 insn);
u64 aarch64_insn_decode_immediate(enum aarch64_insn_imm_type type, u32 insn);
u32 aarch64_insn_encode_immediate(enum aarch64_insn_imm_type type,
u32 insn, u64 imm);
@@ -496,8 +568,23 @@ u32 aarch64_insn_gen_comp_branch_imm(unsigned long pc, unsigned long addr,
enum aarch64_insn_branch_type type);
u32 aarch64_insn_gen_cond_branch_imm(unsigned long pc, unsigned long addr,
enum aarch64_insn_condition cond);
-u32 aarch64_insn_gen_hint(enum aarch64_insn_hint_cr_op op);
-u32 aarch64_insn_gen_nop(void);
+
+static __always_inline u32
+aarch64_insn_gen_hint(enum aarch64_insn_hint_cr_op op)
+{
+ return aarch64_insn_get_hint_value() | op;
+}
+
+static __always_inline u32 aarch64_insn_gen_nop(void)
+{
+ return aarch64_insn_gen_hint(AARCH64_INSN_HINT_NOP);
+}
+
+static __always_inline bool aarch64_insn_is_nop(u32 insn)
+{
+ return insn == aarch64_insn_gen_nop();
+}
+
u32 aarch64_insn_gen_branch_reg(enum aarch64_insn_register reg,
enum aarch64_insn_branch_type type);
u32 aarch64_insn_gen_load_store_reg(enum aarch64_insn_register reg,
@@ -519,6 +606,10 @@ u32 aarch64_insn_gen_load_store_pair(enum aarch64_insn_register reg1,
int offset,
enum aarch64_insn_variant variant,
enum aarch64_insn_ldst_type type);
+u32 aarch64_insn_gen_load_acq_store_rel(enum aarch64_insn_register reg,
+ enum aarch64_insn_register base,
+ enum aarch64_insn_size_type size,
+ enum aarch64_insn_ldst_type type);
u32 aarch64_insn_gen_load_store_ex(enum aarch64_insn_register reg,
enum aarch64_insn_register base,
enum aarch64_insn_register state,
@@ -580,10 +671,6 @@ u32 aarch64_insn_gen_extr(enum aarch64_insn_variant variant,
enum aarch64_insn_register Rn,
enum aarch64_insn_register Rd,
u8 lsb);
-u32 aarch64_insn_gen_prefetch(enum aarch64_insn_register base,
- enum aarch64_insn_prfm_type type,
- enum aarch64_insn_prfm_target target,
- enum aarch64_insn_prfm_policy policy);
#ifdef CONFIG_ARM64_LSE_ATOMICS
u32 aarch64_insn_gen_atomic_ld_op(enum aarch64_insn_register result,
enum aarch64_insn_register address,
@@ -619,6 +706,9 @@ u32 aarch64_insn_gen_cas(enum aarch64_insn_register result,
}
#endif
u32 aarch64_insn_gen_dmb(enum aarch64_insn_mb_type type);
+u32 aarch64_insn_gen_dsb(enum aarch64_insn_mb_type type);
+u32 aarch64_insn_gen_mrs(enum aarch64_insn_register result,
+ enum aarch64_insn_system_register sysreg);
s32 aarch64_get_branch_offset(u32 insn);
u32 aarch64_set_branch_offset(u32 insn, s32 offset);
diff --git a/arch/arm64/include/asm/io.h b/arch/arm64/include/asm/io.h
index 87dd42d74afe..9b96840fb979 100644
--- a/arch/arm64/include/asm/io.h
+++ b/arch/arm64/include/asm/io.h
@@ -17,36 +17,41 @@
#include <asm/early_ioremap.h>
#include <asm/alternative.h>
#include <asm/cpufeature.h>
+#include <asm/rsi.h>
/*
* Generic IO read/write. These perform native-endian accesses.
*/
#define __raw_writeb __raw_writeb
-static inline void __raw_writeb(u8 val, volatile void __iomem *addr)
+static __always_inline void __raw_writeb(u8 val, volatile void __iomem *addr)
{
- asm volatile("strb %w0, [%1]" : : "rZ" (val), "r" (addr));
+ volatile u8 __iomem *ptr = addr;
+ asm volatile("strb %w0, %1" : : "rZ" (val), "Qo" (*ptr));
}
#define __raw_writew __raw_writew
-static inline void __raw_writew(u16 val, volatile void __iomem *addr)
+static __always_inline void __raw_writew(u16 val, volatile void __iomem *addr)
{
- asm volatile("strh %w0, [%1]" : : "rZ" (val), "r" (addr));
+ volatile u16 __iomem *ptr = addr;
+ asm volatile("strh %w0, %1" : : "rZ" (val), "Qo" (*ptr));
}
#define __raw_writel __raw_writel
static __always_inline void __raw_writel(u32 val, volatile void __iomem *addr)
{
- asm volatile("str %w0, [%1]" : : "rZ" (val), "r" (addr));
+ volatile u32 __iomem *ptr = addr;
+ asm volatile("str %w0, %1" : : "rZ" (val), "Qo" (*ptr));
}
#define __raw_writeq __raw_writeq
-static inline void __raw_writeq(u64 val, volatile void __iomem *addr)
+static __always_inline void __raw_writeq(u64 val, volatile void __iomem *addr)
{
- asm volatile("str %x0, [%1]" : : "rZ" (val), "r" (addr));
+ volatile u64 __iomem *ptr = addr;
+ asm volatile("str %x0, %1" : : "rZ" (val), "Qo" (*ptr));
}
#define __raw_readb __raw_readb
-static inline u8 __raw_readb(const volatile void __iomem *addr)
+static __always_inline u8 __raw_readb(const volatile void __iomem *addr)
{
u8 val;
asm volatile(ALTERNATIVE("ldrb %w0, [%1]",
@@ -57,7 +62,7 @@ static inline u8 __raw_readb(const volatile void __iomem *addr)
}
#define __raw_readw __raw_readw
-static inline u16 __raw_readw(const volatile void __iomem *addr)
+static __always_inline u16 __raw_readw(const volatile void __iomem *addr)
{
u16 val;
@@ -80,7 +85,7 @@ static __always_inline u32 __raw_readl(const volatile void __iomem *addr)
}
#define __raw_readq __raw_readq
-static inline u64 __raw_readq(const volatile void __iomem *addr)
+static __always_inline u64 __raw_readq(const volatile void __iomem *addr)
{
u64 val;
asm volatile(ALTERNATIVE("ldr %0, [%1]",
@@ -91,7 +96,7 @@ static inline u64 __raw_readq(const volatile void __iomem *addr)
}
/* IO barriers */
-#define __iormb(v) \
+#define __io_ar(v) \
({ \
unsigned long tmp; \
\
@@ -108,39 +113,14 @@ static inline u64 __raw_readq(const volatile void __iomem *addr)
: "memory"); \
})
-#define __io_par(v) __iormb(v)
-#define __iowmb() dma_wmb()
-#define __iomb() dma_mb()
-
-/*
- * Relaxed I/O memory access primitives. These follow the Device memory
- * ordering rules but do not guarantee any ordering relative to Normal memory
- * accesses.
- */
-#define readb_relaxed(c) ({ u8 __r = __raw_readb(c); __r; })
-#define readw_relaxed(c) ({ u16 __r = le16_to_cpu((__force __le16)__raw_readw(c)); __r; })
-#define readl_relaxed(c) ({ u32 __r = le32_to_cpu((__force __le32)__raw_readl(c)); __r; })
-#define readq_relaxed(c) ({ u64 __r = le64_to_cpu((__force __le64)__raw_readq(c)); __r; })
-
-#define writeb_relaxed(v,c) ((void)__raw_writeb((v),(c)))
-#define writew_relaxed(v,c) ((void)__raw_writew((__force u16)cpu_to_le16(v),(c)))
-#define writel_relaxed(v,c) ((void)__raw_writel((__force u32)cpu_to_le32(v),(c)))
-#define writeq_relaxed(v,c) ((void)__raw_writeq((__force u64)cpu_to_le64(v),(c)))
-
-/*
- * I/O memory access primitives. Reads are ordered relative to any
- * following Normal memory access. Writes are ordered relative to any prior
- * Normal memory access.
- */
-#define readb(c) ({ u8 __v = readb_relaxed(c); __iormb(__v); __v; })
-#define readw(c) ({ u16 __v = readw_relaxed(c); __iormb(__v); __v; })
-#define readl(c) ({ u32 __v = readl_relaxed(c); __iormb(__v); __v; })
-#define readq(c) ({ u64 __v = readq_relaxed(c); __iormb(__v); __v; })
+#define __io_bw() dma_wmb()
+#define __io_br(v)
+#define __io_aw(v)
-#define writeb(v,c) ({ __iowmb(); writeb_relaxed((v),(c)); })
-#define writew(v,c) ({ __iowmb(); writew_relaxed((v),(c)); })
-#define writel(v,c) ({ __iowmb(); writel_relaxed((v),(c)); })
-#define writeq(v,c) ({ __iowmb(); writeq_relaxed((v),(c)); })
+/* arm64-specific, don't use in portable drivers */
+#define __iormb(v) __io_ar(v)
+#define __iowmb() __io_bw()
+#define __iomb() dma_mb()
/*
* I/O port access primitives.
@@ -150,29 +130,149 @@ static inline u64 __raw_readq(const volatile void __iomem *addr)
#define PCI_IOBASE ((void __iomem *)PCI_IO_START)
/*
- * String version of I/O memory access operations.
+ * The ARM64 iowrite implementation is intended to support drivers that want to
+ * use write combining. For instance PCI drivers using write combining with a 64
+ * byte __iowrite64_copy() expect to get a 64 byte MemWr TLP on the PCIe bus.
+ *
+ * Newer ARM core have sensitive write combining buffers, it is important that
+ * the stores be contiguous blocks of store instructions. Normal memcpy
+ * approaches have a very low chance to generate write combining.
+ *
+ * Since this is the only API on ARM64 that should be used with write combining
+ * it also integrates the DGH hint which is supposed to lower the latency to
+ * emit the large TLP from the CPU.
*/
-extern void __memcpy_fromio(void *, const volatile void __iomem *, size_t);
-extern void __memcpy_toio(volatile void __iomem *, const void *, size_t);
-extern void __memset_io(volatile void __iomem *, int, size_t);
-#define memset_io(c,v,l) __memset_io((c),(v),(l))
-#define memcpy_fromio(a,c,l) __memcpy_fromio((a),(c),(l))
-#define memcpy_toio(c,a,l) __memcpy_toio((c),(a),(l))
+static __always_inline void
+__const_memcpy_toio_aligned32(volatile u32 __iomem *to, const u32 *from,
+ size_t count)
+{
+ switch (count) {
+ case 8:
+ asm volatile("str %w0, [%8, #4 * 0]\n"
+ "str %w1, [%8, #4 * 1]\n"
+ "str %w2, [%8, #4 * 2]\n"
+ "str %w3, [%8, #4 * 3]\n"
+ "str %w4, [%8, #4 * 4]\n"
+ "str %w5, [%8, #4 * 5]\n"
+ "str %w6, [%8, #4 * 6]\n"
+ "str %w7, [%8, #4 * 7]\n"
+ :
+ : "rZ"(from[0]), "rZ"(from[1]), "rZ"(from[2]),
+ "rZ"(from[3]), "rZ"(from[4]), "rZ"(from[5]),
+ "rZ"(from[6]), "rZ"(from[7]), "r"(to));
+ break;
+ case 4:
+ asm volatile("str %w0, [%4, #4 * 0]\n"
+ "str %w1, [%4, #4 * 1]\n"
+ "str %w2, [%4, #4 * 2]\n"
+ "str %w3, [%4, #4 * 3]\n"
+ :
+ : "rZ"(from[0]), "rZ"(from[1]), "rZ"(from[2]),
+ "rZ"(from[3]), "r"(to));
+ break;
+ case 2:
+ asm volatile("str %w0, [%2, #4 * 0]\n"
+ "str %w1, [%2, #4 * 1]\n"
+ :
+ : "rZ"(from[0]), "rZ"(from[1]), "r"(to));
+ break;
+ case 1:
+ __raw_writel(*from, to);
+ break;
+ default:
+ BUILD_BUG();
+ }
+}
+
+void __iowrite32_copy_full(void __iomem *to, const void *from, size_t count);
+
+static __always_inline void
+__iowrite32_copy(void __iomem *to, const void *from, size_t count)
+{
+ if (__builtin_constant_p(count) &&
+ (count == 8 || count == 4 || count == 2 || count == 1)) {
+ __const_memcpy_toio_aligned32(to, from, count);
+ dgh();
+ } else {
+ __iowrite32_copy_full(to, from, count);
+ }
+}
+#define __iowrite32_copy __iowrite32_copy
+
+static __always_inline void
+__const_memcpy_toio_aligned64(volatile u64 __iomem *to, const u64 *from,
+ size_t count)
+{
+ switch (count) {
+ case 8:
+ asm volatile("str %x0, [%8, #8 * 0]\n"
+ "str %x1, [%8, #8 * 1]\n"
+ "str %x2, [%8, #8 * 2]\n"
+ "str %x3, [%8, #8 * 3]\n"
+ "str %x4, [%8, #8 * 4]\n"
+ "str %x5, [%8, #8 * 5]\n"
+ "str %x6, [%8, #8 * 6]\n"
+ "str %x7, [%8, #8 * 7]\n"
+ :
+ : "rZ"(from[0]), "rZ"(from[1]), "rZ"(from[2]),
+ "rZ"(from[3]), "rZ"(from[4]), "rZ"(from[5]),
+ "rZ"(from[6]), "rZ"(from[7]), "r"(to));
+ break;
+ case 4:
+ asm volatile("str %x0, [%4, #8 * 0]\n"
+ "str %x1, [%4, #8 * 1]\n"
+ "str %x2, [%4, #8 * 2]\n"
+ "str %x3, [%4, #8 * 3]\n"
+ :
+ : "rZ"(from[0]), "rZ"(from[1]), "rZ"(from[2]),
+ "rZ"(from[3]), "r"(to));
+ break;
+ case 2:
+ asm volatile("str %x0, [%2, #8 * 0]\n"
+ "str %x1, [%2, #8 * 1]\n"
+ :
+ : "rZ"(from[0]), "rZ"(from[1]), "r"(to));
+ break;
+ case 1:
+ __raw_writeq(*from, to);
+ break;
+ default:
+ BUILD_BUG();
+ }
+}
+
+void __iowrite64_copy_full(void __iomem *to, const void *from, size_t count);
+
+static __always_inline void
+__iowrite64_copy(void __iomem *to, const void *from, size_t count)
+{
+ if (__builtin_constant_p(count) &&
+ (count == 8 || count == 4 || count == 2 || count == 1)) {
+ __const_memcpy_toio_aligned64(to, from, count);
+ dgh();
+ } else {
+ __iowrite64_copy_full(to, from, count);
+ }
+}
+#define __iowrite64_copy __iowrite64_copy
/*
* I/O memory mapping functions.
*/
-bool ioremap_allowed(phys_addr_t phys_addr, size_t size, unsigned long prot);
-#define ioremap_allowed ioremap_allowed
+typedef int (*ioremap_prot_hook_t)(phys_addr_t phys_addr, size_t size,
+ pgprot_t *prot);
+int arm64_ioremap_prot_hook_register(const ioremap_prot_hook_t hook);
+
+#define ioremap_prot ioremap_prot
#define _PAGE_IOREMAP PROT_DEVICE_nGnRE
#define ioremap_wc(addr, size) \
- ioremap_prot((addr), (size), PROT_NORMAL_NC)
+ ioremap_prot((addr), (size), __pgprot(PROT_NORMAL_NC))
#define ioremap_np(addr, size) \
- ioremap_prot((addr), (size), PROT_DEVICE_nGnRnE)
+ ioremap_prot((addr), (size), __pgprot(PROT_DEVICE_nGnRnE))
/*
* io{read,write}{16,32,64}be() macros
@@ -193,7 +293,7 @@ static inline void __iomem *ioremap_cache(phys_addr_t addr, size_t size)
if (pfn_is_map_memory(__phys_to_pfn(addr)))
return (void __iomem *)__phys_to_virt(addr);
- return ioremap_prot(addr, size, PROT_NORMAL);
+ return ioremap_prot(addr, size, __pgprot(PROT_NORMAL));
}
/*
@@ -208,4 +308,11 @@ extern bool arch_memremap_can_ram_remap(resource_size_t offset, size_t size,
unsigned long flags);
#define arch_memremap_can_ram_remap arch_memremap_can_ram_remap
+static inline bool arm64_is_protected_mmio(phys_addr_t phys_addr, size_t size)
+{
+ if (unlikely(is_realm_world()))
+ return __arm64_is_protected_mmio(phys_addr, size);
+ return false;
+}
+
#endif /* __ASM_IO_H */
diff --git a/arch/arm64/include/asm/irq.h b/arch/arm64/include/asm/irq.h
index fac08e18bcd5..e93548914c36 100644
--- a/arch/arm64/include/asm/irq.h
+++ b/arch/arm64/include/asm/irq.h
@@ -4,8 +4,13 @@
#ifndef __ASSEMBLER__
+#include <linux/cpumask.h>
+
#include <asm-generic/irq.h>
+void arch_trigger_cpumask_backtrace(const cpumask_t *mask, int exclude_cpu);
+#define arch_trigger_cpumask_backtrace arch_trigger_cpumask_backtrace
+
struct pt_regs;
int set_handle_irq(void (*handle_irq)(struct pt_regs *));
diff --git a/arch/arm64/include/asm/irq_work.h b/arch/arm64/include/asm/irq_work.h
index 81bbfa3a035b..a1020285ea75 100644
--- a/arch/arm64/include/asm/irq_work.h
+++ b/arch/arm64/include/asm/irq_work.h
@@ -2,8 +2,6 @@
#ifndef __ASM_IRQ_WORK_H
#define __ASM_IRQ_WORK_H
-extern void arch_irq_work_raise(void);
-
static inline bool arch_irq_work_has_interrupt(void)
{
return true;
diff --git a/arch/arm64/include/asm/irqflags.h b/arch/arm64/include/asm/irqflags.h
index b57b9b1e4344..d4d7451c2c12 100644
--- a/arch/arm64/include/asm/irqflags.h
+++ b/arch/arm64/include/asm/irqflags.h
@@ -5,7 +5,6 @@
#ifndef __ASM_IRQFLAGS_H
#define __ASM_IRQFLAGS_H
-#include <asm/alternative.h>
#include <asm/barrier.h>
#include <asm/ptrace.h>
#include <asm/sysreg.h>
@@ -21,43 +20,71 @@
* exceptions should be unmasked.
*/
-/*
- * CPU interrupt mask handling.
- */
-static inline void arch_local_irq_enable(void)
+static __always_inline void __daif_local_irq_enable(void)
{
- if (system_has_prio_mask_debugging()) {
- u32 pmr = read_sysreg_s(SYS_ICC_PMR_EL1);
+ barrier();
+ asm volatile("msr daifclr, #3");
+ barrier();
+}
+static __always_inline void __pmr_local_irq_enable(void)
+{
+ if (IS_ENABLED(CONFIG_ARM64_DEBUG_PRIORITY_MASKING)) {
+ u32 pmr = read_sysreg_s(SYS_ICC_PMR_EL1);
WARN_ON_ONCE(pmr != GIC_PRIO_IRQON && pmr != GIC_PRIO_IRQOFF);
}
- asm volatile(ALTERNATIVE(
- "msr daifclr, #3 // arch_local_irq_enable",
- __msr_s(SYS_ICC_PMR_EL1, "%0"),
- ARM64_HAS_IRQ_PRIO_MASKING)
- :
- : "r" ((unsigned long) GIC_PRIO_IRQON)
- : "memory");
-
+ barrier();
+ write_sysreg_s(GIC_PRIO_IRQON, SYS_ICC_PMR_EL1);
pmr_sync();
+ barrier();
}
-static inline void arch_local_irq_disable(void)
+static inline void arch_local_irq_enable(void)
{
- if (system_has_prio_mask_debugging()) {
- u32 pmr = read_sysreg_s(SYS_ICC_PMR_EL1);
+ if (system_uses_irq_prio_masking()) {
+ __pmr_local_irq_enable();
+ } else {
+ __daif_local_irq_enable();
+ }
+}
+
+static __always_inline void __daif_local_irq_disable(void)
+{
+ barrier();
+ asm volatile("msr daifset, #3");
+ barrier();
+}
+static __always_inline void __pmr_local_irq_disable(void)
+{
+ if (IS_ENABLED(CONFIG_ARM64_DEBUG_PRIORITY_MASKING)) {
+ u32 pmr = read_sysreg_s(SYS_ICC_PMR_EL1);
WARN_ON_ONCE(pmr != GIC_PRIO_IRQON && pmr != GIC_PRIO_IRQOFF);
}
- asm volatile(ALTERNATIVE(
- "msr daifset, #3 // arch_local_irq_disable",
- __msr_s(SYS_ICC_PMR_EL1, "%0"),
- ARM64_HAS_IRQ_PRIO_MASKING)
- :
- : "r" ((unsigned long) GIC_PRIO_IRQOFF)
- : "memory");
+ barrier();
+ write_sysreg_s(GIC_PRIO_IRQOFF, SYS_ICC_PMR_EL1);
+ barrier();
+}
+
+static inline void arch_local_irq_disable(void)
+{
+ if (system_uses_irq_prio_masking()) {
+ __pmr_local_irq_disable();
+ } else {
+ __daif_local_irq_disable();
+ }
+}
+
+static __always_inline unsigned long __daif_local_save_flags(void)
+{
+ return read_sysreg(daif);
+}
+
+static __always_inline unsigned long __pmr_local_save_flags(void)
+{
+ return read_sysreg_s(SYS_ICC_PMR_EL1);
}
/*
@@ -65,69 +92,108 @@ static inline void arch_local_irq_disable(void)
*/
static inline unsigned long arch_local_save_flags(void)
{
- unsigned long flags;
+ if (system_uses_irq_prio_masking()) {
+ return __pmr_local_save_flags();
+ } else {
+ return __daif_local_save_flags();
+ }
+}
- asm volatile(ALTERNATIVE(
- "mrs %0, daif",
- __mrs_s("%0", SYS_ICC_PMR_EL1),
- ARM64_HAS_IRQ_PRIO_MASKING)
- : "=&r" (flags)
- :
- : "memory");
+static __always_inline bool __daif_irqs_disabled_flags(unsigned long flags)
+{
+ return flags & PSR_I_BIT;
+}
- return flags;
+static __always_inline bool __pmr_irqs_disabled_flags(unsigned long flags)
+{
+ return flags != GIC_PRIO_IRQON;
}
-static inline int arch_irqs_disabled_flags(unsigned long flags)
+static inline bool arch_irqs_disabled_flags(unsigned long flags)
{
- int res;
+ if (system_uses_irq_prio_masking()) {
+ return __pmr_irqs_disabled_flags(flags);
+ } else {
+ return __daif_irqs_disabled_flags(flags);
+ }
+}
- asm volatile(ALTERNATIVE(
- "and %w0, %w1, #" __stringify(PSR_I_BIT),
- "eor %w0, %w1, #" __stringify(GIC_PRIO_IRQON),
- ARM64_HAS_IRQ_PRIO_MASKING)
- : "=&r" (res)
- : "r" ((int) flags)
- : "memory");
+static __always_inline bool __daif_irqs_disabled(void)
+{
+ return __daif_irqs_disabled_flags(__daif_local_save_flags());
+}
- return res;
+static __always_inline bool __pmr_irqs_disabled(void)
+{
+ return __pmr_irqs_disabled_flags(__pmr_local_save_flags());
}
-static inline int arch_irqs_disabled(void)
+static inline bool arch_irqs_disabled(void)
{
- return arch_irqs_disabled_flags(arch_local_save_flags());
+ if (system_uses_irq_prio_masking()) {
+ return __pmr_irqs_disabled();
+ } else {
+ return __daif_irqs_disabled();
+ }
}
-static inline unsigned long arch_local_irq_save(void)
+static __always_inline unsigned long __daif_local_irq_save(void)
{
- unsigned long flags;
+ unsigned long flags = __daif_local_save_flags();
+
+ __daif_local_irq_disable();
- flags = arch_local_save_flags();
+ return flags;
+}
+
+static __always_inline unsigned long __pmr_local_irq_save(void)
+{
+ unsigned long flags = __pmr_local_save_flags();
/*
* There are too many states with IRQs disabled, just keep the current
* state if interrupts are already disabled/masked.
*/
- if (!arch_irqs_disabled_flags(flags))
- arch_local_irq_disable();
+ if (!__pmr_irqs_disabled_flags(flags))
+ __pmr_local_irq_disable();
return flags;
}
+static inline unsigned long arch_local_irq_save(void)
+{
+ if (system_uses_irq_prio_masking()) {
+ return __pmr_local_irq_save();
+ } else {
+ return __daif_local_irq_save();
+ }
+}
+
+static __always_inline void __daif_local_irq_restore(unsigned long flags)
+{
+ barrier();
+ write_sysreg(flags, daif);
+ barrier();
+}
+
+static __always_inline void __pmr_local_irq_restore(unsigned long flags)
+{
+ barrier();
+ write_sysreg_s(flags, SYS_ICC_PMR_EL1);
+ pmr_sync();
+ barrier();
+}
+
/*
* restore saved IRQ state
*/
static inline void arch_local_irq_restore(unsigned long flags)
{
- asm volatile(ALTERNATIVE(
- "msr daif, %0",
- __msr_s(SYS_ICC_PMR_EL1, "%0"),
- ARM64_HAS_IRQ_PRIO_MASKING)
- :
- : "r" (flags)
- : "memory");
-
- pmr_sync();
+ if (system_uses_irq_prio_masking()) {
+ __pmr_local_irq_restore(flags);
+ } else {
+ __daif_local_irq_restore(flags);
+ }
}
#endif /* __ASM_IRQFLAGS_H */
diff --git a/arch/arm64/include/asm/jump_label.h b/arch/arm64/include/asm/jump_label.h
index cea441b6aa5d..424ed421cd97 100644
--- a/arch/arm64/include/asm/jump_label.h
+++ b/arch/arm64/include/asm/jump_label.h
@@ -13,37 +13,46 @@
#include <linux/types.h>
#include <asm/insn.h>
+#define HAVE_JUMP_LABEL_BATCH
#define JUMP_LABEL_NOP_SIZE AARCH64_INSN_SIZE
-static __always_inline bool arch_static_branch(struct static_key *key,
- bool branch)
+#define JUMP_TABLE_ENTRY(key, label) \
+ ".pushsection __jump_table, \"aw\"\n\t" \
+ ".align 3\n\t" \
+ ".long 1b - ., " label " - .\n\t" \
+ ".quad " key " - .\n\t" \
+ ".popsection\n\t"
+
+/* This macro is also expanded on the Rust side. */
+#define ARCH_STATIC_BRANCH_ASM(key, label) \
+ "1: nop\n\t" \
+ JUMP_TABLE_ENTRY(key, label)
+
+static __always_inline bool arch_static_branch(struct static_key * const key,
+ const bool branch)
{
- asm_volatile_goto(
- "1: nop \n\t"
- " .pushsection __jump_table, \"aw\" \n\t"
- " .align 3 \n\t"
- " .long 1b - ., %l[l_yes] - . \n\t"
- " .quad %c0 - . \n\t"
- " .popsection \n\t"
- : : "i"(&((char *)key)[branch]) : : l_yes);
+ char *k = &((char *)key)[branch];
+
+ asm goto(
+ ARCH_STATIC_BRANCH_ASM("%c0", "%l[l_yes]")
+ : : "i"(k) : : l_yes
+ );
return false;
l_yes:
return true;
}
-static __always_inline bool arch_static_branch_jump(struct static_key *key,
- bool branch)
+static __always_inline bool arch_static_branch_jump(struct static_key * const key,
+ const bool branch)
{
- asm_volatile_goto(
- "1: b %l[l_yes] \n\t"
- " .pushsection __jump_table, \"aw\" \n\t"
- " .align 3 \n\t"
- " .long 1b - ., %l[l_yes] - . \n\t"
- " .quad %c0 - . \n\t"
- " .popsection \n\t"
- : : "i"(&((char *)key)[branch]) : : l_yes);
+ char *k = &((char *)key)[branch];
+ asm goto(
+ "1: b %l[l_yes] \n\t"
+ JUMP_TABLE_ENTRY("%c0", "%l[l_yes]")
+ : : "i"(k) : : l_yes
+ );
return false;
l_yes:
return true;
diff --git a/arch/arm64/include/asm/kasan.h b/arch/arm64/include/asm/kasan.h
index 12d5f47f7dbe..e1b57c13f8a4 100644
--- a/arch/arm64/include/asm/kasan.h
+++ b/arch/arm64/include/asm/kasan.h
@@ -15,33 +15,11 @@
#if defined(CONFIG_KASAN_GENERIC) || defined(CONFIG_KASAN_SW_TAGS)
-void kasan_init(void);
-
-/*
- * KASAN_SHADOW_START: beginning of the kernel virtual addresses.
- * KASAN_SHADOW_END: KASAN_SHADOW_START + 1/N of kernel virtual addresses,
- * where N = (1 << KASAN_SHADOW_SCALE_SHIFT).
- *
- * KASAN_SHADOW_OFFSET:
- * This value is used to map an address to the corresponding shadow
- * address by the following formula:
- * shadow_addr = (address >> KASAN_SHADOW_SCALE_SHIFT) + KASAN_SHADOW_OFFSET
- *
- * (1 << (64 - KASAN_SHADOW_SCALE_SHIFT)) shadow addresses that lie in range
- * [KASAN_SHADOW_OFFSET, KASAN_SHADOW_END) cover all 64-bits of virtual
- * addresses. So KASAN_SHADOW_OFFSET should satisfy the following equation:
- * KASAN_SHADOW_OFFSET = KASAN_SHADOW_END -
- * (1ULL << (64 - KASAN_SHADOW_SCALE_SHIFT))
- */
-#define _KASAN_SHADOW_START(va) (KASAN_SHADOW_END - (1UL << ((va) - KASAN_SHADOW_SCALE_SHIFT)))
-#define KASAN_SHADOW_START _KASAN_SHADOW_START(vabits_actual)
-
-void kasan_copy_shadow(pgd_t *pgdir);
asmlinkage void kasan_early_init(void);
+void kasan_init(void);
#else
static inline void kasan_init(void) { }
-static inline void kasan_copy_shadow(pgd_t *pgdir) { }
#endif
#endif
diff --git a/arch/arm64/include/asm/kernel-pgtable.h b/arch/arm64/include/asm/kernel-pgtable.h
index 02e59fa8f293..74a4f738c5f5 100644
--- a/arch/arm64/include/asm/kernel-pgtable.h
+++ b/arch/arm64/include/asm/kernel-pgtable.h
@@ -13,138 +13,74 @@
#include <asm/sparsemem.h>
/*
- * The linear mapping and the start of memory are both 2M aligned (per
- * the arm64 booting.txt requirements). Hence we can use section mapping
- * with 4K (section size = 2M) but not with 16K (section size = 32M) or
- * 64K (section size = 512M).
+ * The physical and virtual addresses of the start of the kernel image are
+ * equal modulo 2 MiB (per the arm64 booting.txt requirements). Hence we can
+ * use section mapping with 4K (section size = 2M) but not with 16K (section
+ * size = 32M) or 64K (section size = 512M).
*/
-#ifdef CONFIG_ARM64_4K_PAGES
-#define ARM64_KERNEL_USES_PMD_MAPS 1
+#if defined(PMD_SIZE) && PMD_SIZE <= MIN_KIMG_ALIGN
+#define SWAPPER_BLOCK_SHIFT PMD_SHIFT
+#define SWAPPER_SKIP_LEVEL 1
#else
-#define ARM64_KERNEL_USES_PMD_MAPS 0
+#define SWAPPER_BLOCK_SHIFT PAGE_SHIFT
+#define SWAPPER_SKIP_LEVEL 0
#endif
+#define SWAPPER_BLOCK_SIZE (UL(1) << SWAPPER_BLOCK_SHIFT)
-/*
- * The idmap and swapper page tables need some space reserved in the kernel
- * image. Both require pgd, pud (4 levels only) and pmd tables to (section)
- * map the kernel. With the 64K page configuration, swapper and idmap need to
- * map to pte level. The swapper also maps the FDT (see __create_page_tables
- * for more information). Note that the number of ID map translation levels
- * could be increased on the fly if system RAM is out of reach for the default
- * VA range, so pages required to map highest possible PA are reserved in all
- * cases.
- */
-#if ARM64_KERNEL_USES_PMD_MAPS
-#define SWAPPER_PGTABLE_LEVELS (CONFIG_PGTABLE_LEVELS - 1)
-#else
-#define SWAPPER_PGTABLE_LEVELS (CONFIG_PGTABLE_LEVELS)
-#endif
+#define SWAPPER_PGTABLE_LEVELS (CONFIG_PGTABLE_LEVELS - SWAPPER_SKIP_LEVEL)
+#define INIT_IDMAP_PGTABLE_LEVELS (IDMAP_LEVELS - SWAPPER_SKIP_LEVEL)
+#define IDMAP_VA_BITS 48
+#define IDMAP_LEVELS ARM64_HW_PGTABLE_LEVELS(IDMAP_VA_BITS)
+#define IDMAP_ROOT_LEVEL (4 - IDMAP_LEVELS)
/*
- * If KASLR is enabled, then an offset K is added to the kernel address
- * space. The bottom 21 bits of this offset are zero to guarantee 2MB
- * alignment for PA and VA.
- *
- * For each pagetable level of the swapper, we know that the shift will
- * be larger than 21 (for the 4KB granule case we use section maps thus
- * the smallest shift is actually 30) thus there is the possibility that
- * KASLR can increase the number of pagetable entries by 1, so we make
- * room for this extra entry.
- *
- * Note KASLR cannot increase the number of required entries for a level
- * by more than one because it increments both the virtual start and end
- * addresses equally (the extra entry comes from the case where the end
- * address is just pushed over a boundary and the start address isn't).
+ * A relocatable kernel may execute from an address that differs from the one at
+ * which it was linked. In the worst case, its runtime placement may intersect
+ * with two adjacent PGDIR entries, which means that an additional page table
+ * may be needed at each subordinate level.
*/
+#define EXTRA_PAGE __is_defined(CONFIG_RELOCATABLE)
-#ifdef CONFIG_RANDOMIZE_BASE
-#define EARLY_KASLR (1)
-#else
-#define EARLY_KASLR (0)
-#endif
-
-#define EARLY_ENTRIES(vstart, vend, shift) \
- ((((vend) - 1) >> (shift)) - ((vstart) >> (shift)) + 1 + EARLY_KASLR)
-
-#define EARLY_PGDS(vstart, vend) (EARLY_ENTRIES(vstart, vend, PGDIR_SHIFT))
-
-#if SWAPPER_PGTABLE_LEVELS > 3
-#define EARLY_PUDS(vstart, vend) (EARLY_ENTRIES(vstart, vend, PUD_SHIFT))
-#else
-#define EARLY_PUDS(vstart, vend) (0)
-#endif
-
-#if SWAPPER_PGTABLE_LEVELS > 2
-#define EARLY_PMDS(vstart, vend) (EARLY_ENTRIES(vstart, vend, SWAPPER_TABLE_SHIFT))
-#else
-#define EARLY_PMDS(vstart, vend) (0)
-#endif
+#define SPAN_NR_ENTRIES(vstart, vend, shift) \
+ ((((vend) - 1) >> (shift)) - ((vstart) >> (shift)) + 1)
-#define EARLY_PAGES(vstart, vend) ( 1 /* PGDIR page */ \
- + EARLY_PGDS((vstart), (vend)) /* each PGDIR needs a next level page table */ \
- + EARLY_PUDS((vstart), (vend)) /* each PUD needs a next level page table */ \
- + EARLY_PMDS((vstart), (vend))) /* each PMD needs a next level page table */
-#define INIT_DIR_SIZE (PAGE_SIZE * EARLY_PAGES(KIMAGE_VADDR, _end))
+#define EARLY_ENTRIES(lvl, vstart, vend) \
+ SPAN_NR_ENTRIES(vstart, vend, SWAPPER_BLOCK_SHIFT + lvl * PTDESC_TABLE_SHIFT)
-/* the initial ID map may need two extra pages if it needs to be extended */
-#if VA_BITS < 48
-#define INIT_IDMAP_DIR_SIZE ((INIT_IDMAP_DIR_PAGES + 2) * PAGE_SIZE)
-#else
-#define INIT_IDMAP_DIR_SIZE (INIT_IDMAP_DIR_PAGES * PAGE_SIZE)
-#endif
-#define INIT_IDMAP_DIR_PAGES EARLY_PAGES(KIMAGE_VADDR, _end + MAX_FDT_SIZE + SWAPPER_BLOCK_SIZE)
+#define EARLY_LEVEL(lvl, lvls, vstart, vend, add) \
+ ((lvls) > (lvl) ? EARLY_ENTRIES(lvl, vstart, vend) + (add) : 0)
-/* Initial memory map size */
-#if ARM64_KERNEL_USES_PMD_MAPS
-#define SWAPPER_BLOCK_SHIFT PMD_SHIFT
-#define SWAPPER_BLOCK_SIZE PMD_SIZE
-#define SWAPPER_TABLE_SHIFT PUD_SHIFT
-#else
-#define SWAPPER_BLOCK_SHIFT PAGE_SHIFT
-#define SWAPPER_BLOCK_SIZE PAGE_SIZE
-#define SWAPPER_TABLE_SHIFT PMD_SHIFT
-#endif
+#define EARLY_PAGES(lvls, vstart, vend, add) (1 /* PGDIR page */ \
+ + EARLY_LEVEL(3, (lvls), (vstart), (vend), add) /* each entry needs a next level page table */ \
+ + EARLY_LEVEL(2, (lvls), (vstart), (vend), add) /* each entry needs a next level page table */ \
+ + EARLY_LEVEL(1, (lvls), (vstart), (vend), add))/* each entry needs a next level page table */
+#define INIT_DIR_SIZE (PAGE_SIZE * (EARLY_PAGES(SWAPPER_PGTABLE_LEVELS, KIMAGE_VADDR, _end, EXTRA_PAGE) \
+ + EARLY_SEGMENT_EXTRA_PAGES))
-/*
- * Initial memory map attributes.
- */
-#define SWAPPER_PTE_FLAGS (PTE_TYPE_PAGE | PTE_AF | PTE_SHARED)
-#define SWAPPER_PMD_FLAGS (PMD_TYPE_SECT | PMD_SECT_AF | PMD_SECT_S)
+#define INIT_IDMAP_DIR_PAGES (EARLY_PAGES(INIT_IDMAP_PGTABLE_LEVELS, KIMAGE_VADDR, kimage_limit, 1))
+#define INIT_IDMAP_DIR_SIZE ((INIT_IDMAP_DIR_PAGES + EARLY_IDMAP_EXTRA_PAGES) * PAGE_SIZE)
-#if ARM64_KERNEL_USES_PMD_MAPS
-#define SWAPPER_RW_MMUFLAGS (PMD_ATTRINDX(MT_NORMAL) | SWAPPER_PMD_FLAGS)
-#define SWAPPER_RX_MMUFLAGS (SWAPPER_RW_MMUFLAGS | PMD_SECT_RDONLY)
-#else
-#define SWAPPER_RW_MMUFLAGS (PTE_ATTRINDX(MT_NORMAL) | SWAPPER_PTE_FLAGS)
-#define SWAPPER_RX_MMUFLAGS (SWAPPER_RW_MMUFLAGS | PTE_RDONLY)
-#endif
+#define INIT_IDMAP_FDT_PAGES (EARLY_PAGES(INIT_IDMAP_PGTABLE_LEVELS, 0UL, UL(MAX_FDT_SIZE), 1) - 1)
+#define INIT_IDMAP_FDT_SIZE ((INIT_IDMAP_FDT_PAGES + EARLY_IDMAP_EXTRA_FDT_PAGES) * PAGE_SIZE)
-/*
- * To make optimal use of block mappings when laying out the linear
- * mapping, round down the base of physical memory to a size that can
- * be mapped efficiently, i.e., either PUD_SIZE (4k granule) or PMD_SIZE
- * (64k granule), or a multiple that can be mapped using contiguous bits
- * in the page tables: 32 * PMD_SIZE (16k granule)
- */
-#if defined(CONFIG_ARM64_4K_PAGES)
-#define ARM64_MEMSTART_SHIFT PUD_SHIFT
-#elif defined(CONFIG_ARM64_16K_PAGES)
-#define ARM64_MEMSTART_SHIFT CONT_PMD_SHIFT
-#else
-#define ARM64_MEMSTART_SHIFT PMD_SHIFT
-#endif
+/* The number of segments in the kernel image (text, rodata, inittext, initdata, data+bss) */
+#define KERNEL_SEGMENT_COUNT 5
+#if SWAPPER_BLOCK_SIZE > SEGMENT_ALIGN
+#define EARLY_SEGMENT_EXTRA_PAGES (KERNEL_SEGMENT_COUNT + 1)
/*
- * sparsemem vmemmap imposes an additional requirement on the alignment of
- * memstart_addr, due to the fact that the base of the vmemmap region
- * has a direct correspondence, and needs to appear sufficiently aligned
- * in the virtual address space.
+ * The initial ID map consists of the kernel image, mapped as two separate
+ * segments, and may appear misaligned wrt the swapper block size. This means
+ * we need 3 additional pages. The DT could straddle a swapper block boundary,
+ * so it may need 2.
*/
-#if ARM64_MEMSTART_SHIFT < SECTION_SIZE_BITS
-#define ARM64_MEMSTART_ALIGN (1UL << SECTION_SIZE_BITS)
+#define EARLY_IDMAP_EXTRA_PAGES 3
+#define EARLY_IDMAP_EXTRA_FDT_PAGES 2
#else
-#define ARM64_MEMSTART_ALIGN (1UL << ARM64_MEMSTART_SHIFT)
+#define EARLY_SEGMENT_EXTRA_PAGES 0
+#define EARLY_IDMAP_EXTRA_PAGES 0
+#define EARLY_IDMAP_EXTRA_FDT_PAGES 0
#endif
#endif /* __ASM_KERNEL_PGTABLE_H */
diff --git a/arch/arm64/include/asm/kexec.h b/arch/arm64/include/asm/kexec.h
index 559bfae26715..4d9cc7a76d9c 100644
--- a/arch/arm64/include/asm/kexec.h
+++ b/arch/arm64/include/asm/kexec.h
@@ -80,7 +80,7 @@ static inline void crash_setup_regs(struct pt_regs *newregs,
}
}
-#if defined(CONFIG_KEXEC_CORE) && defined(CONFIG_HIBERNATION)
+#if defined(CONFIG_CRASH_DUMP) && defined(CONFIG_HIBERNATION)
extern bool crash_is_nosave(unsigned long pfn);
extern void crash_prepare_suspend(void);
extern void crash_post_resume(void);
@@ -102,12 +102,6 @@ void cpu_soft_restart(unsigned long el2_switch, unsigned long entry,
int machine_kexec_post_load(struct kimage *image);
#define machine_kexec_post_load machine_kexec_post_load
-
-void arch_kexec_protect_crashkres(void);
-#define arch_kexec_protect_crashkres arch_kexec_protect_crashkres
-
-void arch_kexec_unprotect_crashkres(void);
-#define arch_kexec_unprotect_crashkres arch_kexec_unprotect_crashkres
#endif
#define ARCH_HAS_KIMAGE_ARCH
diff --git a/arch/arm64/include/asm/kfence.h b/arch/arm64/include/asm/kfence.h
index aa855c6a0ae6..a81937fae9f6 100644
--- a/arch/arm64/include/asm/kfence.h
+++ b/arch/arm64/include/asm/kfence.h
@@ -19,4 +19,14 @@ static inline bool kfence_protect_page(unsigned long addr, bool protect)
return true;
}
+#ifdef CONFIG_KFENCE
+extern bool kfence_early_init;
+static inline bool arm64_kfence_can_set_direct_map(void)
+{
+ return !kfence_early_init;
+}
+#else /* CONFIG_KFENCE */
+static inline bool arm64_kfence_can_set_direct_map(void) { return false; }
+#endif /* CONFIG_KFENCE */
+
#endif /* __ASM_KFENCE_H */
diff --git a/arch/arm64/include/asm/kgdb.h b/arch/arm64/include/asm/kgdb.h
index 21fc85e9d2be..3184f5d1e3ae 100644
--- a/arch/arm64/include/asm/kgdb.h
+++ b/arch/arm64/include/asm/kgdb.h
@@ -24,6 +24,18 @@ static inline void arch_kgdb_breakpoint(void)
extern void kgdb_handle_bus_error(void);
extern int kgdb_fault_expected;
+int kgdb_brk_handler(struct pt_regs *regs, unsigned long esr);
+int kgdb_compiled_brk_handler(struct pt_regs *regs, unsigned long esr);
+#ifdef CONFIG_KGDB
+int kgdb_single_step_handler(struct pt_regs *regs, unsigned long esr);
+#else
+static inline int kgdb_single_step_handler(struct pt_regs *regs,
+ unsigned long esr)
+{
+ return DBG_HOOK_ERROR;
+}
+#endif
+
#endif /* !__ASSEMBLY__ */
/*
diff --git a/arch/arm64/include/asm/kprobes.h b/arch/arm64/include/asm/kprobes.h
index 05cd82eeca13..f2782560647b 100644
--- a/arch/arm64/include/asm/kprobes.h
+++ b/arch/arm64/include/asm/kprobes.h
@@ -37,10 +37,16 @@ struct kprobe_ctlblk {
void arch_remove_kprobe(struct kprobe *);
int kprobe_fault_handler(struct pt_regs *regs, unsigned int fsr);
-int kprobe_exceptions_notify(struct notifier_block *self,
- unsigned long val, void *data);
void __kretprobe_trampoline(void);
void __kprobes *trampoline_probe_handler(struct pt_regs *regs);
#endif /* CONFIG_KPROBES */
+
+int __kprobes kprobe_brk_handler(struct pt_regs *regs,
+ unsigned long esr);
+int __kprobes kprobe_ss_brk_handler(struct pt_regs *regs,
+ unsigned long esr);
+int __kprobes kretprobe_brk_handler(struct pt_regs *regs,
+ unsigned long esr);
+
#endif /* _ARM_KPROBES_H */
diff --git a/arch/arm64/include/asm/kvm_arm.h b/arch/arm64/include/asm/kvm_arm.h
index 8aa8492dafc0..1da290aeedce 100644
--- a/arch/arm64/include/asm/kvm_arm.h
+++ b/arch/arm64/include/asm/kvm_arm.h
@@ -9,59 +9,73 @@
#include <asm/esr.h>
#include <asm/memory.h>
+#include <asm/sysreg.h>
#include <asm/types.h>
-/* Hyp Configuration Register (HCR) bits */
-
-#define HCR_TID5 (UL(1) << 58)
-#define HCR_DCT (UL(1) << 57)
-#define HCR_ATA_SHIFT 56
-#define HCR_ATA (UL(1) << HCR_ATA_SHIFT)
-#define HCR_AMVOFFEN (UL(1) << 51)
-#define HCR_FIEN (UL(1) << 47)
-#define HCR_FWB (UL(1) << 46)
-#define HCR_API (UL(1) << 41)
-#define HCR_APK (UL(1) << 40)
-#define HCR_TEA (UL(1) << 37)
-#define HCR_TERR (UL(1) << 36)
-#define HCR_TLOR (UL(1) << 35)
-#define HCR_E2H (UL(1) << 34)
-#define HCR_ID (UL(1) << 33)
-#define HCR_CD (UL(1) << 32)
-#define HCR_RW_SHIFT 31
-#define HCR_RW (UL(1) << HCR_RW_SHIFT)
-#define HCR_TRVM (UL(1) << 30)
-#define HCR_HCD (UL(1) << 29)
-#define HCR_TDZ (UL(1) << 28)
-#define HCR_TGE (UL(1) << 27)
-#define HCR_TVM (UL(1) << 26)
-#define HCR_TTLB (UL(1) << 25)
-#define HCR_TPU (UL(1) << 24)
-#define HCR_TPC (UL(1) << 23) /* HCR_TPCP if FEAT_DPB */
-#define HCR_TSW (UL(1) << 22)
-#define HCR_TACR (UL(1) << 21)
-#define HCR_TIDCP (UL(1) << 20)
-#define HCR_TSC (UL(1) << 19)
-#define HCR_TID3 (UL(1) << 18)
-#define HCR_TID2 (UL(1) << 17)
-#define HCR_TID1 (UL(1) << 16)
-#define HCR_TID0 (UL(1) << 15)
-#define HCR_TWE (UL(1) << 14)
-#define HCR_TWI (UL(1) << 13)
-#define HCR_DC (UL(1) << 12)
-#define HCR_BSU (3 << 10)
-#define HCR_BSU_IS (UL(1) << 10)
-#define HCR_FB (UL(1) << 9)
-#define HCR_VSE (UL(1) << 8)
-#define HCR_VI (UL(1) << 7)
-#define HCR_VF (UL(1) << 6)
-#define HCR_AMO (UL(1) << 5)
-#define HCR_IMO (UL(1) << 4)
-#define HCR_FMO (UL(1) << 3)
-#define HCR_PTW (UL(1) << 2)
-#define HCR_SWIO (UL(1) << 1)
-#define HCR_VM (UL(1) << 0)
-#define HCR_RES0 ((UL(1) << 48) | (UL(1) << 39))
+/*
+ * Because I'm terribly lazy and that repainting the whole of the KVM
+ * code with the proper names is a pain, use a helper to map the names
+ * inherited from AArch32 with the new fancy nomenclature. One day...
+ */
+#define __HCR(x) HCR_EL2_##x
+
+#define HCR_TID5 __HCR(TID5)
+#define HCR_DCT __HCR(DCT)
+#define HCR_ATA_SHIFT __HCR(ATA_SHIFT)
+#define HCR_ATA __HCR(ATA)
+#define HCR_TTLBOS __HCR(TTLBOS)
+#define HCR_TTLBIS __HCR(TTLBIS)
+#define HCR_ENSCXT __HCR(EnSCXT)
+#define HCR_TOCU __HCR(TOCU)
+#define HCR_AMVOFFEN __HCR(AMVOFFEN)
+#define HCR_TICAB __HCR(TICAB)
+#define HCR_TID4 __HCR(TID4)
+#define HCR_FIEN __HCR(FIEN)
+#define HCR_FWB __HCR(FWB)
+#define HCR_NV2 __HCR(NV2)
+#define HCR_AT __HCR(AT)
+#define HCR_NV1 __HCR(NV1)
+#define HCR_NV __HCR(NV)
+#define HCR_API __HCR(API)
+#define HCR_APK __HCR(APK)
+#define HCR_TEA __HCR(TEA)
+#define HCR_TERR __HCR(TERR)
+#define HCR_TLOR __HCR(TLOR)
+#define HCR_E2H __HCR(E2H)
+#define HCR_ID __HCR(ID)
+#define HCR_CD __HCR(CD)
+#define HCR_RW __HCR(RW)
+#define HCR_TRVM __HCR(TRVM)
+#define HCR_HCD __HCR(HCD)
+#define HCR_TDZ __HCR(TDZ)
+#define HCR_TGE __HCR(TGE)
+#define HCR_TVM __HCR(TVM)
+#define HCR_TTLB __HCR(TTLB)
+#define HCR_TPU __HCR(TPU)
+#define HCR_TPC __HCR(TPCP)
+#define HCR_TSW __HCR(TSW)
+#define HCR_TACR __HCR(TACR)
+#define HCR_TIDCP __HCR(TIDCP)
+#define HCR_TSC __HCR(TSC)
+#define HCR_TID3 __HCR(TID3)
+#define HCR_TID2 __HCR(TID2)
+#define HCR_TID1 __HCR(TID1)
+#define HCR_TID0 __HCR(TID0)
+#define HCR_TWE __HCR(TWE)
+#define HCR_TWI __HCR(TWI)
+#define HCR_DC __HCR(DC)
+#define HCR_BSU __HCR(BSU)
+#define HCR_BSU_IS __HCR(BSU_IS)
+#define HCR_FB __HCR(FB)
+#define HCR_VSE __HCR(VSE)
+#define HCR_VI __HCR(VI)
+#define HCR_VF __HCR(VF)
+#define HCR_AMO __HCR(AMO)
+#define HCR_IMO __HCR(IMO)
+#define HCR_FMO __HCR(FMO)
+#define HCR_PTW __HCR(PTW)
+#define HCR_SWIO __HCR(SWIO)
+#define HCR_VM __HCR(VM)
/*
* The bits we set in HCR:
@@ -81,18 +95,22 @@
* SWIO: Turn set/way invalidates into set/way clean+invalidate
* PTW: Take a stage2 fault if a stage1 walk steps in device memory
* TID3: Trap EL1 reads of group 3 ID registers
+ * TID1: Trap REVIDR_EL1, AIDR_EL1, and SMIDR_EL1
*/
#define HCR_GUEST_FLAGS (HCR_TSC | HCR_TSW | HCR_TWE | HCR_TWI | HCR_VM | \
HCR_BSU_IS | HCR_FB | HCR_TACR | \
HCR_AMO | HCR_SWIO | HCR_TIDCP | HCR_RW | HCR_TLOR | \
- HCR_FMO | HCR_IMO | HCR_PTW | HCR_TID3 )
-#define HCR_VIRT_EXCP_MASK (HCR_VSE | HCR_VI | HCR_VF)
+ HCR_FMO | HCR_IMO | HCR_PTW | HCR_TID3 | HCR_TID1)
#define HCR_HOST_NVHE_FLAGS (HCR_RW | HCR_API | HCR_APK | HCR_ATA)
#define HCR_HOST_NVHE_PROTECTED_FLAGS (HCR_HOST_NVHE_FLAGS | HCR_TSC)
-#define HCR_HOST_VHE_FLAGS (HCR_RW | HCR_TGE | HCR_E2H)
+#define HCR_HOST_VHE_FLAGS (HCR_RW | HCR_TGE | HCR_E2H | HCR_AMO | HCR_IMO | HCR_FMO)
+
+#define MPAMHCR_HOST_FLAGS 0
/* TCR_EL2 Registers bits */
+#define TCR_EL2_DS (1UL << 32)
#define TCR_EL2_RES1 ((1U << 31) | (1 << 23))
+#define TCR_EL2_HPD (1 << 24)
#define TCR_EL2_TBI (1 << 20)
#define TCR_EL2_PS_SHIFT 16
#define TCR_EL2_PS_MASK (7 << TCR_EL2_PS_SHIFT)
@@ -103,9 +121,10 @@
#define TCR_EL2_IRGN0_MASK TCR_IRGN0_MASK
#define TCR_EL2_T0SZ_MASK 0x3f
#define TCR_EL2_MASK (TCR_EL2_TG0_MASK | TCR_EL2_SH0_MASK | \
- TCR_EL2_ORGN0_MASK | TCR_EL2_IRGN0_MASK | TCR_EL2_T0SZ_MASK)
+ TCR_EL2_ORGN0_MASK | TCR_EL2_IRGN0_MASK)
/* VTCR_EL2 Registers bits */
+#define VTCR_EL2_DS TCR_EL2_DS
#define VTCR_EL2_RES1 (1U << 31)
#define VTCR_EL2_HD (1 << 22)
#define VTCR_EL2_HA (1 << 21)
@@ -135,7 +154,7 @@
* 40 bits wide (T0SZ = 24). Systems with a PARange smaller than 40 bits are
* not known to exist and will break with this configuration.
*
- * The VTCR_EL2 is configured per VM and is initialised in kvm_arm_setup_stage2().
+ * The VTCR_EL2 is configured per VM and is initialised in kvm_init_stage2_mmu.
*
* Note that when using 4K pages, we concatenate two first level page tables
* together. With 16K pages, we concatenate 16 first level page tables.
@@ -283,56 +302,32 @@
#define CPTR_EL2_TSM (1 << 12)
#define CPTR_EL2_TFP (1 << CPTR_EL2_TFP_SHIFT)
#define CPTR_EL2_TZ (1 << 8)
-#define CPTR_NVHE_EL2_RES1 0x000032ff /* known RES1 bits in CPTR_EL2 (nVHE) */
-#define CPTR_EL2_DEFAULT CPTR_NVHE_EL2_RES1
+#define CPTR_NVHE_EL2_RES1 (BIT(13) | BIT(9) | GENMASK(7, 0))
#define CPTR_NVHE_EL2_RES0 (GENMASK(63, 32) | \
GENMASK(29, 21) | \
GENMASK(19, 14) | \
BIT(11))
-/* Hyp Debug Configuration Register bits */
-#define MDCR_EL2_E2TB_MASK (UL(0x3))
-#define MDCR_EL2_E2TB_SHIFT (UL(24))
-#define MDCR_EL2_HPMFZS (UL(1) << 36)
-#define MDCR_EL2_HPMFZO (UL(1) << 29)
-#define MDCR_EL2_MTPME (UL(1) << 28)
-#define MDCR_EL2_TDCC (UL(1) << 27)
-#define MDCR_EL2_HLP (UL(1) << 26)
-#define MDCR_EL2_HCCD (UL(1) << 23)
-#define MDCR_EL2_TTRF (UL(1) << 19)
-#define MDCR_EL2_HPMD (UL(1) << 17)
-#define MDCR_EL2_TPMS (UL(1) << 14)
-#define MDCR_EL2_E2PB_MASK (UL(0x3))
-#define MDCR_EL2_E2PB_SHIFT (UL(12))
-#define MDCR_EL2_TDRA (UL(1) << 11)
-#define MDCR_EL2_TDOSA (UL(1) << 10)
-#define MDCR_EL2_TDA (UL(1) << 9)
-#define MDCR_EL2_TDE (UL(1) << 8)
-#define MDCR_EL2_HPME (UL(1) << 7)
-#define MDCR_EL2_TPM (UL(1) << 6)
-#define MDCR_EL2_TPMCR (UL(1) << 5)
-#define MDCR_EL2_HPMN_MASK (UL(0x1F))
-#define MDCR_EL2_RES0 (GENMASK(63, 37) | \
- GENMASK(35, 30) | \
- GENMASK(25, 24) | \
- GENMASK(22, 20) | \
- BIT(18) | \
- GENMASK(16, 15))
-
-/* For compatibility with fault code shared with 32-bit */
-#define FSC_FAULT ESR_ELx_FSC_FAULT
-#define FSC_ACCESS ESR_ELx_FSC_ACCESS
-#define FSC_PERM ESR_ELx_FSC_PERM
-#define FSC_SEA ESR_ELx_FSC_EXTABT
-#define FSC_SEA_TTW0 (0x14)
-#define FSC_SEA_TTW1 (0x15)
-#define FSC_SEA_TTW2 (0x16)
-#define FSC_SEA_TTW3 (0x17)
-#define FSC_SECC (0x18)
-#define FSC_SECC_TTW0 (0x1c)
-#define FSC_SECC_TTW1 (0x1d)
-#define FSC_SECC_TTW2 (0x1e)
-#define FSC_SECC_TTW3 (0x1f)
+#define CPTR_VHE_EL2_RES0 (GENMASK(63, 32) | \
+ GENMASK(27, 26) | \
+ GENMASK(23, 22) | \
+ GENMASK(19, 18) | \
+ GENMASK(15, 0))
+
+/*
+ * Polarity masks for HCRX_EL2, limited to the bits that we know about
+ * at this point in time. It doesn't mean that we actually *handle*
+ * them, but that at least those that are not advertised to a guest
+ * will be RES0 for that guest.
+ */
+#define __HCRX_EL2_MASK (BIT_ULL(6))
+#define __HCRX_EL2_nMASK (GENMASK_ULL(24, 14) | \
+ GENMASK_ULL(11, 7) | \
+ GENMASK_ULL(5, 0))
+#define __HCRX_EL2_RES0 ~(__HCRX_EL2_nMASK | __HCRX_EL2_MASK)
+#define __HCRX_EL2_RES1 ~(__HCRX_EL2_nMASK | \
+ __HCRX_EL2_MASK | \
+ __HCRX_EL2_RES0)
/* Hyp Prefetch Fault Address Register (HPFAR/HDFAR) */
#define HPFAR_MASK (~UL(0xf))
@@ -340,9 +335,13 @@
* We have
* PAR [PA_Shift - 1 : 12] = PA [PA_Shift - 1 : 12]
* HPFAR [PA_Shift - 9 : 4] = FIPA [PA_Shift - 1 : 12]
+ *
+ * Always assume 52 bit PA since at this point, we don't know how many PA bits
+ * the page table has been set up for. This should be safe since unused address
+ * bits in PAR are res0.
*/
#define PAR_TO_HPFAR(par) \
- (((par) & GENMASK_ULL(PHYS_MASK_SHIFT - 1, 12)) >> 8)
+ (((par) & GENMASK_ULL(52 - 1, 12)) >> 8)
#define ECN(x) { ESR_ELx_EC_##x, #x }
@@ -355,10 +354,23 @@
ECN(SP_ALIGN), ECN(FP_EXC32), ECN(FP_EXC64), ECN(SERROR), \
ECN(BREAKPT_LOW), ECN(BREAKPT_CUR), ECN(SOFTSTP_LOW), \
ECN(SOFTSTP_CUR), ECN(WATCHPT_LOW), ECN(WATCHPT_CUR), \
- ECN(BKPT32), ECN(VECTOR32), ECN(BRK64)
+ ECN(BKPT32), ECN(VECTOR32), ECN(BRK64), ECN(ERET)
-#define CPACR_EL1_TTA (1 << 28)
-#define CPACR_EL1_DEFAULT (CPACR_EL1_FPEN_EL0EN | CPACR_EL1_FPEN_EL1EN |\
- CPACR_EL1_ZEN_EL1EN)
+#define kvm_mode_names \
+ { PSR_MODE_EL0t, "EL0t" }, \
+ { PSR_MODE_EL1t, "EL1t" }, \
+ { PSR_MODE_EL1h, "EL1h" }, \
+ { PSR_MODE_EL2t, "EL2t" }, \
+ { PSR_MODE_EL2h, "EL2h" }, \
+ { PSR_MODE_EL3t, "EL3t" }, \
+ { PSR_MODE_EL3h, "EL3h" }, \
+ { PSR_AA32_MODE_USR, "32-bit USR" }, \
+ { PSR_AA32_MODE_FIQ, "32-bit FIQ" }, \
+ { PSR_AA32_MODE_IRQ, "32-bit IRQ" }, \
+ { PSR_AA32_MODE_SVC, "32-bit SVC" }, \
+ { PSR_AA32_MODE_ABT, "32-bit ABT" }, \
+ { PSR_AA32_MODE_HYP, "32-bit HYP" }, \
+ { PSR_AA32_MODE_UND, "32-bit UND" }, \
+ { PSR_AA32_MODE_SYS, "32-bit SYS" }
#endif /* __ARM64_KVM_ARM_H__ */
diff --git a/arch/arm64/include/asm/kvm_asm.h b/arch/arm64/include/asm/kvm_asm.h
index 2e277f2ed671..bec227f9500a 100644
--- a/arch/arm64/include/asm/kvm_asm.h
+++ b/arch/arm64/include/asm/kvm_asm.h
@@ -10,6 +10,7 @@
#include <asm/hyp_image.h>
#include <asm/insn.h>
#include <asm/virt.h>
+#include <asm/sysreg.h>
#define ARM_EXIT_WITH_SERROR_BIT 31
#define ARM_EXCEPTION_CODE(x) ((x) & ~(1U << ARM_EXIT_WITH_SERROR_BIT))
@@ -52,8 +53,7 @@
enum __kvm_host_smccc_func {
/* Hypercalls available only prior to pKVM finalisation */
/* __KVM_HOST_SMCCC_FUNC___kvm_hyp_init */
- __KVM_HOST_SMCCC_FUNC___kvm_get_mdcr_el2 = __KVM_HOST_SMCCC_FUNC___kvm_hyp_init + 1,
- __KVM_HOST_SMCCC_FUNC___pkvm_init,
+ __KVM_HOST_SMCCC_FUNC___pkvm_init = __KVM_HOST_SMCCC_FUNC___kvm_hyp_init + 1,
__KVM_HOST_SMCCC_FUNC___pkvm_create_private_mapping,
__KVM_HOST_SMCCC_FUNC___pkvm_cpu_set_vector,
__KVM_HOST_SMCCC_FUNC___kvm_enable_ssbs,
@@ -64,18 +64,29 @@ enum __kvm_host_smccc_func {
/* Hypercalls available after pKVM finalisation */
__KVM_HOST_SMCCC_FUNC___pkvm_host_share_hyp,
__KVM_HOST_SMCCC_FUNC___pkvm_host_unshare_hyp,
+ __KVM_HOST_SMCCC_FUNC___pkvm_host_share_guest,
+ __KVM_HOST_SMCCC_FUNC___pkvm_host_unshare_guest,
+ __KVM_HOST_SMCCC_FUNC___pkvm_host_relax_perms_guest,
+ __KVM_HOST_SMCCC_FUNC___pkvm_host_wrprotect_guest,
+ __KVM_HOST_SMCCC_FUNC___pkvm_host_test_clear_young_guest,
+ __KVM_HOST_SMCCC_FUNC___pkvm_host_mkyoung_guest,
__KVM_HOST_SMCCC_FUNC___kvm_adjust_pc,
__KVM_HOST_SMCCC_FUNC___kvm_vcpu_run,
__KVM_HOST_SMCCC_FUNC___kvm_flush_vm_context,
__KVM_HOST_SMCCC_FUNC___kvm_tlb_flush_vmid_ipa,
+ __KVM_HOST_SMCCC_FUNC___kvm_tlb_flush_vmid_ipa_nsh,
__KVM_HOST_SMCCC_FUNC___kvm_tlb_flush_vmid,
+ __KVM_HOST_SMCCC_FUNC___kvm_tlb_flush_vmid_range,
__KVM_HOST_SMCCC_FUNC___kvm_flush_cpu_context,
__KVM_HOST_SMCCC_FUNC___kvm_timer_set_cntvoff,
- __KVM_HOST_SMCCC_FUNC___vgic_v3_read_vmcr,
- __KVM_HOST_SMCCC_FUNC___vgic_v3_write_vmcr,
- __KVM_HOST_SMCCC_FUNC___vgic_v3_save_aprs,
- __KVM_HOST_SMCCC_FUNC___vgic_v3_restore_aprs,
- __KVM_HOST_SMCCC_FUNC___pkvm_vcpu_init_traps,
+ __KVM_HOST_SMCCC_FUNC___vgic_v3_save_vmcr_aprs,
+ __KVM_HOST_SMCCC_FUNC___vgic_v3_restore_vmcr_aprs,
+ __KVM_HOST_SMCCC_FUNC___pkvm_init_vm,
+ __KVM_HOST_SMCCC_FUNC___pkvm_init_vcpu,
+ __KVM_HOST_SMCCC_FUNC___pkvm_teardown_vm,
+ __KVM_HOST_SMCCC_FUNC___pkvm_vcpu_load,
+ __KVM_HOST_SMCCC_FUNC___pkvm_vcpu_put,
+ __KVM_HOST_SMCCC_FUNC___pkvm_tlb_flush_vmid,
};
#define DECLARE_KVM_VHE_SYM(sym) extern char sym[]
@@ -106,7 +117,7 @@ enum __kvm_host_smccc_func {
#define per_cpu_ptr_nvhe_sym(sym, cpu) \
({ \
unsigned long base, off; \
- base = kvm_arm_hyp_percpu_base[cpu]; \
+ base = kvm_nvhe_sym(kvm_arm_hyp_percpu_base)[cpu]; \
off = (unsigned long)&CHOOSE_NVHE_SYM(sym) - \
(unsigned long)&CHOOSE_NVHE_SYM(__per_cpu_start); \
base ? (typeof(CHOOSE_NVHE_SYM(sym))*)(base + off) : NULL; \
@@ -174,6 +185,23 @@ struct kvm_nvhe_init_params {
unsigned long hcr_el2;
unsigned long vttbr;
unsigned long vtcr;
+ unsigned long tmp;
+};
+
+/*
+ * Used by the host in EL1 to dump the nVHE hypervisor backtrace on
+ * hyp_panic() in non-protected mode.
+ *
+ * @stack_base: hyp VA of the hyp_stack base.
+ * @overflow_stack_base: hyp VA of the hyp_overflow_stack base.
+ * @fp: hyp FP where the backtrace begins.
+ * @pc: hyp PC where the backtrace begins.
+ */
+struct kvm_nvhe_stacktrace_info {
+ unsigned long stack_base;
+ unsigned long overflow_stack_base;
+ unsigned long fp;
+ unsigned long pc;
};
/* Translate a kernel address @ptr into its equivalent linear mapping */
@@ -195,7 +223,7 @@ DECLARE_KVM_HYP_SYM(__kvm_hyp_vector);
#define __kvm_hyp_init CHOOSE_NVHE_SYM(__kvm_hyp_init)
#define __kvm_hyp_vector CHOOSE_HYP_SYM(__kvm_hyp_vector)
-extern unsigned long kvm_arm_hyp_percpu_base[NR_CPUS];
+extern unsigned long kvm_nvhe_sym(kvm_arm_hyp_percpu_base)[];
DECLARE_KVM_NVHE_SYM(__per_cpu_start);
DECLARE_KVM_NVHE_SYM(__per_cpu_end);
@@ -206,21 +234,27 @@ extern void __kvm_flush_vm_context(void);
extern void __kvm_flush_cpu_context(struct kvm_s2_mmu *mmu);
extern void __kvm_tlb_flush_vmid_ipa(struct kvm_s2_mmu *mmu, phys_addr_t ipa,
int level);
+extern void __kvm_tlb_flush_vmid_ipa_nsh(struct kvm_s2_mmu *mmu,
+ phys_addr_t ipa,
+ int level);
+extern void __kvm_tlb_flush_vmid_range(struct kvm_s2_mmu *mmu,
+ phys_addr_t start, unsigned long pages);
extern void __kvm_tlb_flush_vmid(struct kvm_s2_mmu *mmu);
+extern int __kvm_tlbi_s1e2(struct kvm_s2_mmu *mmu, u64 va, u64 sys_encoding);
+
extern void __kvm_timer_set_cntvoff(u64 cntvoff);
+extern void __kvm_at_s1e01(struct kvm_vcpu *vcpu, u32 op, u64 vaddr);
+extern void __kvm_at_s1e2(struct kvm_vcpu *vcpu, u32 op, u64 vaddr);
+extern void __kvm_at_s12(struct kvm_vcpu *vcpu, u32 op, u64 vaddr);
extern int __kvm_vcpu_run(struct kvm_vcpu *vcpu);
extern void __kvm_adjust_pc(struct kvm_vcpu *vcpu);
extern u64 __vgic_v3_get_gic_config(void);
-extern u64 __vgic_v3_read_vmcr(void);
-extern void __vgic_v3_write_vmcr(u32 vmcr);
extern void __vgic_v3_init_lrs(void);
-extern u64 __kvm_get_mdcr_el2(void);
-
#define __KVM_EXTABLE(from, to) \
" .pushsection __kvm_ex_table, \"a\"\n" \
" .align 3\n" \
@@ -235,7 +269,7 @@ extern u64 __kvm_get_mdcr_el2(void);
asm volatile( \
" mrs %1, spsr_el2\n" \
" mrs %2, elr_el2\n" \
- "1: at "at_op", %3\n" \
+ "1: " __msr_s(at_op, "%3") "\n" \
" isb\n" \
" b 9f\n" \
"2: msr spsr_el2, %1\n" \
@@ -248,6 +282,24 @@ extern u64 __kvm_get_mdcr_el2(void);
__kvm_at_err; \
} )
+void __noreturn hyp_panic(void);
+asmlinkage void kvm_unexpected_el2_exception(void);
+asmlinkage void __noreturn hyp_panic(void);
+asmlinkage void __noreturn hyp_panic_bad_stack(void);
+asmlinkage void kvm_unexpected_el2_exception(void);
+struct kvm_cpu_context;
+void handle_trap(struct kvm_cpu_context *host_ctxt);
+asmlinkage void __noreturn __kvm_host_psci_cpu_entry(bool is_cpu_on);
+void __noreturn __pkvm_init_finalise(void);
+void kvm_nvhe_prepare_backtrace(unsigned long fp, unsigned long pc);
+void kvm_patch_vector_branch(struct alt_instr *alt,
+ __le32 *origptr, __le32 *updptr, int nr_inst);
+void kvm_get_kimage_voffset(struct alt_instr *alt,
+ __le32 *origptr, __le32 *updptr, int nr_inst);
+void kvm_compute_final_ctr_el0(struct alt_instr *alt,
+ __le32 *origptr, __le32 *updptr, int nr_inst);
+void __noreturn __cold nvhe_hyp_panic_handler(u64 esr, u64 spsr, u64 elr_virt,
+ u64 elr_phys, u64 par, uintptr_t vcpu, u64 far, u64 hpfar);
#else /* __ASSEMBLY__ */
diff --git a/arch/arm64/include/asm/kvm_emulate.h b/arch/arm64/include/asm/kvm_emulate.h
index 0e66edd3aff2..fa8a08a1ccd5 100644
--- a/arch/arm64/include/asm/kvm_emulate.h
+++ b/arch/arm64/include/asm/kvm_emulate.h
@@ -11,12 +11,14 @@
#ifndef __ARM64_KVM_EMULATE_H__
#define __ARM64_KVM_EMULATE_H__
+#include <linux/bitfield.h>
#include <linux/kvm_host.h>
#include <asm/debug-monitors.h>
#include <asm/esr.h>
#include <asm/kvm_arm.h>
#include <asm/kvm_hyp.h>
+#include <asm/kvm_nested.h>
#include <asm/ptrace.h>
#include <asm/cputype.h>
#include <asm/virt.h>
@@ -33,17 +35,58 @@ enum exception_type {
except_type_serror = 0x180,
};
+#define kvm_exception_type_names \
+ { except_type_sync, "SYNC" }, \
+ { except_type_irq, "IRQ" }, \
+ { except_type_fiq, "FIQ" }, \
+ { except_type_serror, "SERROR" }
+
bool kvm_condition_valid32(const struct kvm_vcpu *vcpu);
void kvm_skip_instr32(struct kvm_vcpu *vcpu);
void kvm_inject_undefined(struct kvm_vcpu *vcpu);
-void kvm_inject_vabt(struct kvm_vcpu *vcpu);
-void kvm_inject_dabt(struct kvm_vcpu *vcpu, unsigned long addr);
-void kvm_inject_pabt(struct kvm_vcpu *vcpu, unsigned long addr);
+int kvm_inject_serror_esr(struct kvm_vcpu *vcpu, u64 esr);
+int kvm_inject_sea(struct kvm_vcpu *vcpu, bool iabt, u64 addr);
void kvm_inject_size_fault(struct kvm_vcpu *vcpu);
+static inline int kvm_inject_sea_dabt(struct kvm_vcpu *vcpu, u64 addr)
+{
+ return kvm_inject_sea(vcpu, false, addr);
+}
+
+static inline int kvm_inject_sea_iabt(struct kvm_vcpu *vcpu, u64 addr)
+{
+ return kvm_inject_sea(vcpu, true, addr);
+}
+
+static inline int kvm_inject_serror(struct kvm_vcpu *vcpu)
+{
+ /*
+ * ESR_ELx.ISV (later renamed to IDS) indicates whether or not
+ * ESR_ELx.ISS contains IMPLEMENTATION DEFINED syndrome information.
+ *
+ * Set the bit when injecting an SError w/o an ESR to indicate ISS
+ * does not follow the architected format.
+ */
+ return kvm_inject_serror_esr(vcpu, ESR_ELx_ISV);
+}
+
void kvm_vcpu_wfi(struct kvm_vcpu *vcpu);
+void kvm_emulate_nested_eret(struct kvm_vcpu *vcpu);
+int kvm_inject_nested_sync(struct kvm_vcpu *vcpu, u64 esr_el2);
+int kvm_inject_nested_irq(struct kvm_vcpu *vcpu);
+int kvm_inject_nested_sea(struct kvm_vcpu *vcpu, bool iabt, u64 addr);
+int kvm_inject_nested_serror(struct kvm_vcpu *vcpu, u64 esr);
+
+static inline void kvm_inject_nested_sve_trap(struct kvm_vcpu *vcpu)
+{
+ u64 esr = FIELD_PREP(ESR_ELx_EC_MASK, ESR_ELx_EC_SVE) |
+ ESR_ELx_IL;
+
+ kvm_inject_nested_sync(vcpu, esr);
+}
+
#if defined(__KVM_VHE_HYPERVISOR__) || defined(__KVM_NVHE_HYPERVISOR__)
static __always_inline bool vcpu_el1_is_32bit(struct kvm_vcpu *vcpu)
{
@@ -52,48 +95,23 @@ static __always_inline bool vcpu_el1_is_32bit(struct kvm_vcpu *vcpu)
#else
static __always_inline bool vcpu_el1_is_32bit(struct kvm_vcpu *vcpu)
{
- struct kvm *kvm = vcpu->kvm;
-
- WARN_ON_ONCE(!test_bit(KVM_ARCH_FLAG_REG_WIDTH_CONFIGURED,
- &kvm->arch.flags));
-
- return test_bit(KVM_ARCH_FLAG_EL1_32BIT, &kvm->arch.flags);
+ return vcpu_has_feature(vcpu, KVM_ARM_VCPU_EL1_32BIT);
}
#endif
static inline void vcpu_reset_hcr(struct kvm_vcpu *vcpu)
{
- vcpu->arch.hcr_el2 = HCR_GUEST_FLAGS;
- if (is_kernel_in_hyp_mode())
- vcpu->arch.hcr_el2 |= HCR_E2H;
- if (cpus_have_const_cap(ARM64_HAS_RAS_EXTN)) {
- /* route synchronous external abort exceptions to EL2 */
- vcpu->arch.hcr_el2 |= HCR_TEA;
- /* trap error record accesses */
- vcpu->arch.hcr_el2 |= HCR_TERR;
- }
+ if (!vcpu_has_run_once(vcpu))
+ vcpu->arch.hcr_el2 = HCR_GUEST_FLAGS;
- if (cpus_have_const_cap(ARM64_HAS_STAGE2_FWB)) {
- vcpu->arch.hcr_el2 |= HCR_FWB;
- } else {
- /*
- * For non-FWB CPUs, we trap VM ops (HCR_EL2.TVM) until M+C
- * get set in SCTLR_EL1 such that we can detect when the guest
- * MMU gets turned on and do the necessary cache maintenance
- * then.
- */
+ /*
+ * For non-FWB CPUs, we trap VM ops (HCR_EL2.TVM) until M+C
+ * get set in SCTLR_EL1 such that we can detect when the guest
+ * MMU gets turned on and do the necessary cache maintenance
+ * then.
+ */
+ if (!cpus_have_final_cap(ARM64_HAS_STAGE2_FWB))
vcpu->arch.hcr_el2 |= HCR_TVM;
- }
-
- if (vcpu_el1_is_32bit(vcpu))
- vcpu->arch.hcr_el2 &= ~HCR_RW;
-
- if (cpus_have_const_cap(ARM64_MISMATCHED_CACHE_TYPE) ||
- vcpu_el1_is_32bit(vcpu))
- vcpu->arch.hcr_el2 |= HCR_TID2;
-
- if (kvm_has_mte(vcpu->kvm))
- vcpu->arch.hcr_el2 |= HCR_ATA;
}
static inline unsigned long *vcpu_hcr(struct kvm_vcpu *vcpu)
@@ -117,16 +135,6 @@ static inline void vcpu_set_wfx_traps(struct kvm_vcpu *vcpu)
vcpu->arch.hcr_el2 |= HCR_TWI;
}
-static inline void vcpu_ptrauth_enable(struct kvm_vcpu *vcpu)
-{
- vcpu->arch.hcr_el2 |= (HCR_API | HCR_APK);
-}
-
-static inline void vcpu_ptrauth_disable(struct kvm_vcpu *vcpu)
-{
- vcpu->arch.hcr_el2 &= ~(HCR_API | HCR_APK);
-}
-
static inline unsigned long vcpu_get_vsesr(struct kvm_vcpu *vcpu)
{
return vcpu->arch.vsesr_el2;
@@ -183,6 +191,81 @@ static __always_inline void vcpu_set_reg(struct kvm_vcpu *vcpu, u8 reg_num,
vcpu_gp_regs(vcpu)->regs[reg_num] = val;
}
+static inline bool vcpu_is_el2_ctxt(const struct kvm_cpu_context *ctxt)
+{
+ switch (ctxt->regs.pstate & (PSR_MODE32_BIT | PSR_MODE_MASK)) {
+ case PSR_MODE_EL2h:
+ case PSR_MODE_EL2t:
+ return true;
+ default:
+ return false;
+ }
+}
+
+static inline bool vcpu_is_el2(const struct kvm_vcpu *vcpu)
+{
+ return vcpu_is_el2_ctxt(&vcpu->arch.ctxt);
+}
+
+static inline bool vcpu_el2_e2h_is_set(const struct kvm_vcpu *vcpu)
+{
+ return (!cpus_have_final_cap(ARM64_HAS_HCR_NV1) ||
+ (__vcpu_sys_reg(vcpu, HCR_EL2) & HCR_E2H));
+}
+
+static inline bool vcpu_el2_tge_is_set(const struct kvm_vcpu *vcpu)
+{
+ return ctxt_sys_reg(&vcpu->arch.ctxt, HCR_EL2) & HCR_TGE;
+}
+
+static inline bool vcpu_el2_amo_is_set(const struct kvm_vcpu *vcpu)
+{
+ return ctxt_sys_reg(&vcpu->arch.ctxt, HCR_EL2) & HCR_AMO;
+}
+
+static inline bool is_hyp_ctxt(const struct kvm_vcpu *vcpu)
+{
+ bool e2h, tge;
+ u64 hcr;
+
+ if (!vcpu_has_nv(vcpu))
+ return false;
+
+ hcr = __vcpu_sys_reg(vcpu, HCR_EL2);
+
+ e2h = (hcr & HCR_E2H);
+ tge = (hcr & HCR_TGE);
+
+ /*
+ * We are in a hypervisor context if the vcpu mode is EL2 or
+ * E2H and TGE bits are set. The latter means we are in the user space
+ * of the VHE kernel. ARMv8.1 ARM describes this as 'InHost'
+ *
+ * Note that the HCR_EL2.{E2H,TGE}={0,1} isn't really handled in the
+ * rest of the KVM code, and will result in a misbehaving guest.
+ */
+ return vcpu_is_el2(vcpu) || (e2h && tge) || tge;
+}
+
+static inline bool vcpu_is_host_el0(const struct kvm_vcpu *vcpu)
+{
+ return is_hyp_ctxt(vcpu) && !vcpu_is_el2(vcpu);
+}
+
+static inline bool is_nested_ctxt(struct kvm_vcpu *vcpu)
+{
+ return vcpu_has_nv(vcpu) && !is_hyp_ctxt(vcpu);
+}
+
+static inline bool vserror_state_is_nested(struct kvm_vcpu *vcpu)
+{
+ if (!is_nested_ctxt(vcpu))
+ return false;
+
+ return vcpu_el2_amo_is_set(vcpu) ||
+ (__vcpu_sys_reg(vcpu, HCRX_EL2) & HCRX_EL2_TMEA);
+}
+
/*
* The layout of SPSR for an AArch32 state is different when observed from an
* AArch64 SPSR_ELx or an AArch32 SPSR_*. This function generates the AArch32
@@ -234,6 +317,19 @@ static __always_inline u64 kvm_vcpu_get_esr(const struct kvm_vcpu *vcpu)
return vcpu->arch.fault.esr_el2;
}
+static inline bool guest_hyp_wfx_traps_enabled(const struct kvm_vcpu *vcpu)
+{
+ u64 esr = kvm_vcpu_get_esr(vcpu);
+ bool is_wfe = !!(esr & ESR_ELx_WFx_ISS_WFE);
+ u64 hcr_el2 = __vcpu_sys_reg(vcpu, HCR_EL2);
+
+ if (!vcpu_has_nv(vcpu) || vcpu_is_el2(vcpu))
+ return false;
+
+ return ((is_wfe && (hcr_el2 & HCR_TWE)) ||
+ (!is_wfe && (hcr_el2 & HCR_TWI)));
+}
+
static __always_inline int kvm_vcpu_get_condition(const struct kvm_vcpu *vcpu)
{
u64 esr = kvm_vcpu_get_esr(vcpu);
@@ -251,7 +347,12 @@ static __always_inline unsigned long kvm_vcpu_get_hfar(const struct kvm_vcpu *vc
static __always_inline phys_addr_t kvm_vcpu_get_fault_ipa(const struct kvm_vcpu *vcpu)
{
- return ((phys_addr_t)vcpu->arch.fault.hpfar_el2 & HPFAR_MASK) << 8;
+ u64 hpfar = vcpu->arch.fault.hpfar_el2;
+
+ if (unlikely(!(hpfar & HPFAR_EL2_NS)))
+ return INVALID_GPA;
+
+ return FIELD_GET(HPFAR_EL2_FIPA, hpfar) << 12;
}
static inline u64 kvm_vcpu_get_disr(const struct kvm_vcpu *vcpu)
@@ -336,29 +437,34 @@ static __always_inline u8 kvm_vcpu_trap_get_fault(const struct kvm_vcpu *vcpu)
return kvm_vcpu_get_esr(vcpu) & ESR_ELx_FSC;
}
-static __always_inline u8 kvm_vcpu_trap_get_fault_type(const struct kvm_vcpu *vcpu)
+static inline
+bool kvm_vcpu_trap_is_permission_fault(const struct kvm_vcpu *vcpu)
+{
+ return esr_fsc_is_permission_fault(kvm_vcpu_get_esr(vcpu));
+}
+
+static inline
+bool kvm_vcpu_trap_is_translation_fault(const struct kvm_vcpu *vcpu)
{
- return kvm_vcpu_get_esr(vcpu) & ESR_ELx_FSC_TYPE;
+ return esr_fsc_is_translation_fault(kvm_vcpu_get_esr(vcpu));
}
-static __always_inline u8 kvm_vcpu_trap_get_fault_level(const struct kvm_vcpu *vcpu)
+static inline
+u64 kvm_vcpu_trap_get_perm_fault_granule(const struct kvm_vcpu *vcpu)
{
- return kvm_vcpu_get_esr(vcpu) & ESR_ELx_FSC_LEVEL;
+ unsigned long esr = kvm_vcpu_get_esr(vcpu);
+
+ BUG_ON(!esr_fsc_is_permission_fault(esr));
+ return BIT(ARM64_HW_PGTABLE_LEVEL_SHIFT(esr & ESR_ELx_FSC_LEVEL));
}
static __always_inline bool kvm_vcpu_abt_issea(const struct kvm_vcpu *vcpu)
{
switch (kvm_vcpu_trap_get_fault(vcpu)) {
- case FSC_SEA:
- case FSC_SEA_TTW0:
- case FSC_SEA_TTW1:
- case FSC_SEA_TTW2:
- case FSC_SEA_TTW3:
- case FSC_SECC:
- case FSC_SECC_TTW0:
- case FSC_SECC_TTW1:
- case FSC_SECC_TTW2:
- case FSC_SECC_TTW3:
+ case ESR_ELx_FSC_EXTABT:
+ case ESR_ELx_FSC_SEA_TTW(-1) ... ESR_ELx_FSC_SEA_TTW(3):
+ case ESR_ELx_FSC_SECC:
+ case ESR_ELx_FSC_SECC_TTW(-1) ... ESR_ELx_FSC_SECC_TTW(3):
return true;
default:
return false;
@@ -373,8 +479,21 @@ static __always_inline int kvm_vcpu_sys_get_rt(struct kvm_vcpu *vcpu)
static inline bool kvm_is_write_fault(struct kvm_vcpu *vcpu)
{
- if (kvm_vcpu_abt_iss1tw(vcpu))
- return true;
+ if (kvm_vcpu_abt_iss1tw(vcpu)) {
+ /*
+ * Only a permission fault on a S1PTW should be
+ * considered as a write. Otherwise, page tables baked
+ * in a read-only memslot will result in an exception
+ * being delivered in the guest.
+ *
+ * The drawback is that we end-up faulting twice if the
+ * guest is using any of HW AF/DB: a translation fault
+ * to map the page containing the PT (read only at
+ * first), then a permission fault to allow the flags
+ * to be set.
+ */
+ return kvm_vcpu_trap_is_permission_fault(vcpu);
+ }
if (kvm_vcpu_trap_is_iabt(vcpu))
return false;
@@ -384,7 +503,7 @@ static inline bool kvm_is_write_fault(struct kvm_vcpu *vcpu)
static inline unsigned long kvm_vcpu_get_mpidr_aff(struct kvm_vcpu *vcpu)
{
- return vcpu_read_sys_reg(vcpu, MPIDR_EL1) & MPIDR_HWID_BITMASK;
+ return __vcpu_sys_reg(vcpu, MPIDR_EL1) & MPIDR_HWID_BITMASK;
}
static inline void kvm_vcpu_set_be(struct kvm_vcpu *vcpu)
@@ -473,12 +592,86 @@ static inline unsigned long vcpu_data_host_to_guest(struct kvm_vcpu *vcpu,
static __always_inline void kvm_incr_pc(struct kvm_vcpu *vcpu)
{
- vcpu->arch.flags |= KVM_ARM64_INCREMENT_PC;
+ WARN_ON(vcpu_get_flag(vcpu, PENDING_EXCEPTION));
+ vcpu_set_flag(vcpu, INCREMENT_PC);
+}
+
+#define kvm_pend_exception(v, e) \
+ do { \
+ WARN_ON(vcpu_get_flag((v), INCREMENT_PC)); \
+ vcpu_set_flag((v), PENDING_EXCEPTION); \
+ vcpu_set_flag((v), e); \
+ } while (0)
+
+/*
+ * Returns a 'sanitised' view of CPTR_EL2, translating from nVHE to the VHE
+ * format if E2H isn't set.
+ */
+static inline u64 vcpu_sanitised_cptr_el2(const struct kvm_vcpu *vcpu)
+{
+ u64 cptr = __vcpu_sys_reg(vcpu, CPTR_EL2);
+
+ if (!vcpu_el2_e2h_is_set(vcpu))
+ cptr = translate_cptr_el2_to_cpacr_el1(cptr);
+
+ return cptr;
}
-static inline bool vcpu_has_feature(struct kvm_vcpu *vcpu, int feature)
+static inline bool ____cptr_xen_trap_enabled(const struct kvm_vcpu *vcpu,
+ unsigned int xen)
+{
+ switch (xen) {
+ case 0b00:
+ case 0b10:
+ return true;
+ case 0b01:
+ return vcpu_el2_tge_is_set(vcpu) && !vcpu_is_el2(vcpu);
+ case 0b11:
+ default:
+ return false;
+ }
+}
+
+#define __guest_hyp_cptr_xen_trap_enabled(vcpu, xen) \
+ (!vcpu_has_nv(vcpu) ? false : \
+ ____cptr_xen_trap_enabled(vcpu, \
+ SYS_FIELD_GET(CPACR_EL1, xen, \
+ vcpu_sanitised_cptr_el2(vcpu))))
+
+static inline bool guest_hyp_fpsimd_traps_enabled(const struct kvm_vcpu *vcpu)
{
- return test_bit(feature, vcpu->arch.features);
+ return __guest_hyp_cptr_xen_trap_enabled(vcpu, FPEN);
}
+static inline bool guest_hyp_sve_traps_enabled(const struct kvm_vcpu *vcpu)
+{
+ return __guest_hyp_cptr_xen_trap_enabled(vcpu, ZEN);
+}
+
+static inline void vcpu_set_hcrx(struct kvm_vcpu *vcpu)
+{
+ struct kvm *kvm = vcpu->kvm;
+
+ if (cpus_have_final_cap(ARM64_HAS_HCX)) {
+ /*
+ * In general, all HCRX_EL2 bits are gated by a feature.
+ * The only reason we can set SMPME without checking any
+ * feature is that its effects are not directly observable
+ * from the guest.
+ */
+ vcpu->arch.hcrx_el2 = HCRX_EL2_SMPME;
+
+ if (kvm_has_feat(kvm, ID_AA64ISAR2_EL1, MOPS, IMP))
+ vcpu->arch.hcrx_el2 |= (HCRX_EL2_MSCEn | HCRX_EL2_MCE2);
+
+ if (kvm_has_tcr2(kvm))
+ vcpu->arch.hcrx_el2 |= HCRX_EL2_TCR2En;
+
+ if (kvm_has_fpmr(kvm))
+ vcpu->arch.hcrx_el2 |= HCRX_EL2_EnFPM;
+
+ if (kvm_has_sctlr2(kvm))
+ vcpu->arch.hcrx_el2 |= HCRX_EL2_SCTLR2En;
+ }
+}
#endif /* __ARM64_KVM_EMULATE_H__ */
diff --git a/arch/arm64/include/asm/kvm_host.h b/arch/arm64/include/asm/kvm_host.h
index de32152cea04..2b07f0a27a7d 100644
--- a/arch/arm64/include/asm/kvm_host.h
+++ b/arch/arm64/include/asm/kvm_host.h
@@ -16,6 +16,7 @@
#include <linux/types.h>
#include <linux/jump_label.h>
#include <linux/kvm_types.h>
+#include <linux/maple_tree.h>
#include <linux/percpu.h>
#include <linux/psci.h>
#include <asm/arch_gicv3.h>
@@ -26,6 +27,7 @@
#include <asm/fpsimd.h>
#include <asm/kvm.h>
#include <asm/kvm_asm.h>
+#include <asm/vncr_mapping.h>
#define __KVM_HAVE_ARCH_INTC_INITIALIZED
@@ -37,16 +39,21 @@
#define KVM_MAX_VCPUS VGIC_V3_MAX_CPUS
-#define KVM_VCPU_MAX_FEATURES 7
+#define KVM_VCPU_MAX_FEATURES 9
+#define KVM_VCPU_VALID_FEATURES (BIT(KVM_VCPU_MAX_FEATURES) - 1)
#define KVM_REQ_SLEEP \
KVM_ARCH_REQ_FLAGS(0, KVM_REQUEST_WAIT | KVM_REQUEST_NO_WAKEUP)
-#define KVM_REQ_IRQ_PENDING KVM_ARCH_REQ(1)
-#define KVM_REQ_VCPU_RESET KVM_ARCH_REQ(2)
-#define KVM_REQ_RECORD_STEAL KVM_ARCH_REQ(3)
-#define KVM_REQ_RELOAD_GICv4 KVM_ARCH_REQ(4)
-#define KVM_REQ_RELOAD_PMU KVM_ARCH_REQ(5)
-#define KVM_REQ_SUSPEND KVM_ARCH_REQ(6)
+#define KVM_REQ_IRQ_PENDING KVM_ARCH_REQ(1)
+#define KVM_REQ_VCPU_RESET KVM_ARCH_REQ(2)
+#define KVM_REQ_RECORD_STEAL KVM_ARCH_REQ(3)
+#define KVM_REQ_RELOAD_GICv4 KVM_ARCH_REQ(4)
+#define KVM_REQ_RELOAD_PMU KVM_ARCH_REQ(5)
+#define KVM_REQ_SUSPEND KVM_ARCH_REQ(6)
+#define KVM_REQ_RESYNC_PMU_EL0 KVM_ARCH_REQ(7)
+#define KVM_REQ_NESTED_S2_UNMAP KVM_ARCH_REQ(8)
+#define KVM_REQ_GUEST_HYP_IRQ_PENDING KVM_ARCH_REQ(9)
+#define KVM_REQ_MAP_L1_VNCR_EL2 KVM_ARCH_REQ(10)
#define KVM_DIRTY_LOG_MANUAL_CAPS (KVM_DIRTY_LOG_MANUAL_PROTECT_ENABLE | \
KVM_DIRTY_LOG_INITIALLY_SET)
@@ -60,19 +67,84 @@
enum kvm_mode {
KVM_MODE_DEFAULT,
KVM_MODE_PROTECTED,
+ KVM_MODE_NV,
KVM_MODE_NONE,
};
+#ifdef CONFIG_KVM
enum kvm_mode kvm_get_mode(void);
+#else
+static inline enum kvm_mode kvm_get_mode(void) { return KVM_MODE_NONE; };
+#endif
-DECLARE_STATIC_KEY_FALSE(userspace_irqchip_in_use);
-
-extern unsigned int kvm_sve_max_vl;
-int kvm_arm_init_sve(void);
+extern unsigned int __ro_after_init kvm_sve_max_vl;
+extern unsigned int __ro_after_init kvm_host_sve_max_vl;
+int __init kvm_arm_init_sve(void);
u32 __attribute_const__ kvm_target_cpu(void);
-int kvm_reset_vcpu(struct kvm_vcpu *vcpu);
+void kvm_reset_vcpu(struct kvm_vcpu *vcpu);
void kvm_arm_vcpu_destroy(struct kvm_vcpu *vcpu);
+struct kvm_hyp_memcache {
+ phys_addr_t head;
+ unsigned long nr_pages;
+ struct pkvm_mapping *mapping; /* only used from EL1 */
+
+#define HYP_MEMCACHE_ACCOUNT_STAGE2 BIT(1)
+ unsigned long flags;
+};
+
+static inline void push_hyp_memcache(struct kvm_hyp_memcache *mc,
+ phys_addr_t *p,
+ phys_addr_t (*to_pa)(void *virt))
+{
+ *p = mc->head;
+ mc->head = to_pa(p);
+ mc->nr_pages++;
+}
+
+static inline void *pop_hyp_memcache(struct kvm_hyp_memcache *mc,
+ void *(*to_va)(phys_addr_t phys))
+{
+ phys_addr_t *p = to_va(mc->head & PAGE_MASK);
+
+ if (!mc->nr_pages)
+ return NULL;
+
+ mc->head = *p;
+ mc->nr_pages--;
+
+ return p;
+}
+
+static inline int __topup_hyp_memcache(struct kvm_hyp_memcache *mc,
+ unsigned long min_pages,
+ void *(*alloc_fn)(void *arg),
+ phys_addr_t (*to_pa)(void *virt),
+ void *arg)
+{
+ while (mc->nr_pages < min_pages) {
+ phys_addr_t *p = alloc_fn(arg);
+
+ if (!p)
+ return -ENOMEM;
+ push_hyp_memcache(mc, p, to_pa);
+ }
+
+ return 0;
+}
+
+static inline void __free_hyp_memcache(struct kvm_hyp_memcache *mc,
+ void (*free_fn)(void *virt, void *arg),
+ void *(*to_va)(phys_addr_t phys),
+ void *arg)
+{
+ while (mc->nr_pages)
+ free_fn(pop_hyp_memcache(mc, to_va), arg);
+}
+
+void free_hyp_memcache(struct kvm_hyp_memcache *mc);
+int topup_hyp_memcache(struct kvm_hyp_memcache *mc, unsigned long min_pages);
+
struct kvm_vmid {
atomic64_t id;
};
@@ -93,10 +165,68 @@ struct kvm_s2_mmu {
phys_addr_t pgd_phys;
struct kvm_pgtable *pgt;
+ /*
+ * VTCR value used on the host. For a non-NV guest (or a NV
+ * guest that runs in a context where its own S2 doesn't
+ * apply), its T0SZ value reflects that of the IPA size.
+ *
+ * For a shadow S2 MMU, T0SZ reflects the PARange exposed to
+ * the guest.
+ */
+ u64 vtcr;
+
/* The last vcpu id that ran on each physical CPU */
int __percpu *last_vcpu_ran;
+#define KVM_ARM_EAGER_SPLIT_CHUNK_SIZE_DEFAULT 0
+ /*
+ * Memory cache used to split
+ * KVM_CAP_ARM_EAGER_SPLIT_CHUNK_SIZE worth of huge pages. It
+ * is used to allocate stage2 page tables while splitting huge
+ * pages. The choice of KVM_CAP_ARM_EAGER_SPLIT_CHUNK_SIZE
+ * influences both the capacity of the split page cache, and
+ * how often KVM reschedules. Be wary of raising CHUNK_SIZE
+ * too high.
+ *
+ * Protected by kvm->slots_lock.
+ */
+ struct kvm_mmu_memory_cache split_page_cache;
+ uint64_t split_page_chunk_size;
+
struct kvm_arch *arch;
+
+ /*
+ * For a shadow stage-2 MMU, the virtual vttbr used by the
+ * host to parse the guest S2.
+ * This either contains:
+ * - the virtual VTTBR programmed by the guest hypervisor with
+ * CnP cleared
+ * - The value 1 (VMID=0, BADDR=0, CnP=1) if invalid
+ *
+ * We also cache the full VTCR which gets used for TLB invalidation,
+ * taking the ARM ARM's "Any of the bits in VTCR_EL2 are permitted
+ * to be cached in a TLB" to the letter.
+ */
+ u64 tlb_vttbr;
+ u64 tlb_vtcr;
+
+ /*
+ * true when this represents a nested context where virtual
+ * HCR_EL2.VM == 1
+ */
+ bool nested_stage2_enabled;
+
+ /*
+ * true when this MMU needs to be unmapped before being used for a new
+ * purpose.
+ */
+ bool pending_unmap;
+
+ /*
+ * 0: Nobody is currently using this, check vttbr for validity
+ * >0: Somebody is actively using this.
+ */
+ atomic_t refcnt;
};
struct kvm_arch_memory_slot {
@@ -112,21 +242,86 @@ struct kvm_arch_memory_slot {
struct kvm_smccc_features {
unsigned long std_bmap;
unsigned long std_hyp_bmap;
- unsigned long vendor_hyp_bmap;
+ unsigned long vendor_hyp_bmap; /* Function numbers 0-63 */
+ unsigned long vendor_hyp_bmap_2; /* Function numbers 64-127 */
+};
+
+typedef unsigned int pkvm_handle_t;
+
+struct kvm_protected_vm {
+ pkvm_handle_t handle;
+ struct kvm_hyp_memcache teardown_mc;
+ struct kvm_hyp_memcache stage2_teardown_mc;
+ bool enabled;
+};
+
+struct kvm_mpidr_data {
+ u64 mpidr_mask;
+ DECLARE_FLEX_ARRAY(u16, cmpidr_to_idx);
+};
+
+static inline u16 kvm_mpidr_index(struct kvm_mpidr_data *data, u64 mpidr)
+{
+ unsigned long index = 0, mask = data->mpidr_mask;
+ unsigned long aff = mpidr & MPIDR_HWID_BITMASK;
+
+ bitmap_gather(&index, &aff, &mask, fls(mask));
+
+ return index;
+}
+
+struct kvm_sysreg_masks;
+
+enum fgt_group_id {
+ __NO_FGT_GROUP__,
+ HFGRTR_GROUP,
+ HFGWTR_GROUP = HFGRTR_GROUP,
+ HDFGRTR_GROUP,
+ HDFGWTR_GROUP = HDFGRTR_GROUP,
+ HFGITR_GROUP,
+ HAFGRTR_GROUP,
+ HFGRTR2_GROUP,
+ HFGWTR2_GROUP = HFGRTR2_GROUP,
+ HDFGRTR2_GROUP,
+ HDFGWTR2_GROUP = HDFGRTR2_GROUP,
+ HFGITR2_GROUP,
+
+ /* Must be last */
+ __NR_FGT_GROUP_IDS__
};
struct kvm_arch {
struct kvm_s2_mmu mmu;
- /* VTCR_EL2 value for this VM */
- u64 vtcr;
+ /*
+ * Fine-Grained UNDEF, mimicking the FGT layout defined by the
+ * architecture. We track them globally, as we present the
+ * same feature-set to all vcpus.
+ *
+ * Index 0 is currently spare.
+ */
+ u64 fgu[__NR_FGT_GROUP_IDS__];
+
+ /*
+ * Stage 2 paging state for VMs with nested S2 using a virtual
+ * VMID.
+ */
+ struct kvm_s2_mmu *nested_mmus;
+ size_t nested_mmus_size;
+ int nested_mmus_next;
/* Interrupt controller */
struct vgic_dist vgic;
+ /* Timers */
+ struct arch_timer_vm_data timer_data;
+
/* Mandated version of PSCI */
u32 psci_version;
+ /* Protects VM-scoped configuration data */
+ struct mutex config_lock;
+
/*
* If we encounter a data abort without valid instruction syndrome
* information, report this to user space. User space can (and
@@ -138,20 +333,30 @@ struct kvm_arch {
#define KVM_ARCH_FLAG_MTE_ENABLED 1
/* At least one vCPU has ran in the VM */
#define KVM_ARCH_FLAG_HAS_RAN_ONCE 2
- /*
- * The following two bits are used to indicate the guest's EL1
- * register width configuration. A value of KVM_ARCH_FLAG_EL1_32BIT
- * bit is valid only when KVM_ARCH_FLAG_REG_WIDTH_CONFIGURED is set.
- * Otherwise, the guest's EL1 register width has not yet been
- * determined yet.
- */
-#define KVM_ARCH_FLAG_REG_WIDTH_CONFIGURED 3
-#define KVM_ARCH_FLAG_EL1_32BIT 4
+ /* The vCPU feature set for the VM is configured */
+#define KVM_ARCH_FLAG_VCPU_FEATURES_CONFIGURED 3
/* PSCI SYSTEM_SUSPEND enabled for the guest */
-#define KVM_ARCH_FLAG_SYSTEM_SUSPEND_ENABLED 5
-
+#define KVM_ARCH_FLAG_SYSTEM_SUSPEND_ENABLED 4
+ /* VM counter offset */
+#define KVM_ARCH_FLAG_VM_COUNTER_OFFSET 5
+ /* Timer PPIs made immutable */
+#define KVM_ARCH_FLAG_TIMER_PPIS_IMMUTABLE 6
+ /* Initial ID reg values loaded */
+#define KVM_ARCH_FLAG_ID_REGS_INITIALIZED 7
+ /* Fine-Grained UNDEF initialised */
+#define KVM_ARCH_FLAG_FGU_INITIALIZED 8
+ /* SVE exposed to guest */
+#define KVM_ARCH_FLAG_GUEST_HAS_SVE 9
+ /* MIDR_EL1, REVIDR_EL1, and AIDR_EL1 are writable from userspace */
+#define KVM_ARCH_FLAG_WRITABLE_IMP_ID_REGS 10
unsigned long flags;
+ /* VM-wide vCPU feature set */
+ DECLARE_BITMAP(vcpu_features, KVM_VCPU_MAX_FEATURES);
+
+ /* MPIDR to vcpu index mapping, optional */
+ struct kvm_mpidr_data *mpidr_data;
+
/*
* VM-wide PMU filter, implemented as a bitmap and big enough for
* up to 2^10 events (ARMv8.0) or 2^16 events (ARMv8.1+).
@@ -161,11 +366,44 @@ struct kvm_arch {
cpumask_var_t supported_cpus;
- u8 pfr0_csv2;
- u8 pfr0_csv3;
+ /* Maximum number of counters for the guest */
+ u8 nr_pmu_counters;
+
+ /* Iterator for idreg debugfs */
+ u8 idreg_debugfs_iter;
/* Hypercall features firmware registers' descriptor */
struct kvm_smccc_features smccc_feat;
+ struct maple_tree smccc_filter;
+
+ /*
+ * Emulated CPU ID registers per VM
+ * (Op0, Op1, CRn, CRm, Op2) of the ID registers to be saved in it
+ * is (3, 0, 0, crm, op2), where 1<=crm<8, 0<=op2<8.
+ *
+ * These emulated idregs are VM-wide, but accessed from the context of a vCPU.
+ * Atomic access to multiple idregs are guarded by kvm_arch.config_lock.
+ */
+#define IDREG_IDX(id) (((sys_reg_CRm(id) - 1) << 3) | sys_reg_Op2(id))
+#define KVM_ARM_ID_REG_NUM (IDREG_IDX(sys_reg(3, 0, 0, 7, 7)) + 1)
+ u64 id_regs[KVM_ARM_ID_REG_NUM];
+
+ u64 midr_el1;
+ u64 revidr_el1;
+ u64 aidr_el1;
+ u64 ctr_el0;
+
+ /* Masks for VNCR-backed and general EL2 sysregs */
+ struct kvm_sysreg_masks *sysreg_masks;
+
+ /* Count the number of VNCR_EL2 currently mapped */
+ atomic_t vncr_map_count;
+
+ /*
+ * For an untrusted host VM, 'pkvm.handle' is used to lookup
+ * the associated pKVM instance in the hypervisor.
+ */
+ struct kvm_protected_vm pkvm;
};
struct kvm_vcpu_fault_info {
@@ -175,31 +413,36 @@ struct kvm_vcpu_fault_info {
u64 disr_el1; /* Deferred [SError] Status Register */
};
+/*
+ * VNCR() just places the VNCR_capable registers in the enum after
+ * __VNCR_START__, and the value (after correction) to be an 8-byte offset
+ * from the VNCR base. As we don't require the enum to be otherwise ordered,
+ * we need the terrible hack below to ensure that we correctly size the
+ * sys_regs array, no matter what.
+ *
+ * The __MAX__ macro has been lifted from Sean Eron Anderson's wonderful
+ * treasure trove of bit hacks:
+ * https://graphics.stanford.edu/~seander/bithacks.html#IntegerMinOrMax
+ */
+#define __MAX__(x,y) ((x) ^ (((x) ^ (y)) & -((x) < (y))))
+#define VNCR(r) \
+ __before_##r, \
+ r = __VNCR_START__ + ((VNCR_ ## r) / 8), \
+ __after_##r = __MAX__(__before_##r - 1, r)
+
+#define MARKER(m) \
+ m, __after_##m = m - 1
+
enum vcpu_sysreg {
__INVALID_SYSREG__, /* 0 is reserved as an invalid value */
MPIDR_EL1, /* MultiProcessor Affinity Register */
+ CLIDR_EL1, /* Cache Level ID Register */
CSSELR_EL1, /* Cache Size Selection Register */
- SCTLR_EL1, /* System Control Register */
- ACTLR_EL1, /* Auxiliary Control Register */
- CPACR_EL1, /* Coprocessor Access Control */
- ZCR_EL1, /* SVE Control */
- TTBR0_EL1, /* Translation Table Base Register 0 */
- TTBR1_EL1, /* Translation Table Base Register 1 */
- TCR_EL1, /* Translation Control Register */
- ESR_EL1, /* Exception Syndrome Register */
- AFSR0_EL1, /* Auxiliary Fault Status Register 0 */
- AFSR1_EL1, /* Auxiliary Fault Status Register 1 */
- FAR_EL1, /* Fault Address Register */
- MAIR_EL1, /* Memory Attribute Indirection Register */
- VBAR_EL1, /* Vector Base Address Register */
- CONTEXTIDR_EL1, /* Context ID Register */
TPIDR_EL0, /* Thread ID, User R/W */
TPIDRRO_EL0, /* Thread ID, User R/O */
TPIDR_EL1, /* Thread ID, Privileged */
- AMAIR_EL1, /* Aux Memory Attribute Indirection Register */
CNTKCTL_EL1, /* Timer Control Register (EL1) */
PAR_EL1, /* Physical Address Register */
- MDSCR_EL1, /* Monitor Debug System Control Register */
MDCCINT_EL1, /* Monitor Debug Comms Channel Interrupt Enable Reg */
OSLSR_EL1, /* OS Lock Status Register */
DISR_EL1, /* Deferred Interrupt Status Register */
@@ -230,31 +473,193 @@ enum vcpu_sysreg {
APGAKEYLO_EL1,
APGAKEYHI_EL1,
- ELR_EL1,
- SP_EL1,
- SPSR_EL1,
-
- CNTVOFF_EL2,
- CNTV_CVAL_EL0,
- CNTV_CTL_EL0,
- CNTP_CVAL_EL0,
- CNTP_CTL_EL0,
-
/* Memory Tagging Extension registers */
RGSR_EL1, /* Random Allocation Tag Seed Register */
GCR_EL1, /* Tag Control Register */
- TFSR_EL1, /* Tag Fault Status Register (EL1) */
TFSRE0_EL1, /* Tag Fault Status Register (EL0) */
- /* 32bit specific registers. Keep them at the end of the range */
+ POR_EL0, /* Permission Overlay Register 0 (EL0) */
+
+ /* FP/SIMD/SVE */
+ SVCR,
+ FPMR,
+
+ /* 32bit specific registers. */
DACR32_EL2, /* Domain Access Control Register */
IFSR32_EL2, /* Instruction Fault Status Register */
FPEXC32_EL2, /* Floating-Point Exception Control Register */
DBGVCR32_EL2, /* Debug Vector Catch Register */
+ /* EL2 registers */
+ SCTLR_EL2, /* System Control Register (EL2) */
+ ACTLR_EL2, /* Auxiliary Control Register (EL2) */
+ CPTR_EL2, /* Architectural Feature Trap Register (EL2) */
+ HACR_EL2, /* Hypervisor Auxiliary Control Register */
+ ZCR_EL2, /* SVE Control Register (EL2) */
+ TTBR0_EL2, /* Translation Table Base Register 0 (EL2) */
+ TTBR1_EL2, /* Translation Table Base Register 1 (EL2) */
+ TCR_EL2, /* Translation Control Register (EL2) */
+ PIRE0_EL2, /* Permission Indirection Register 0 (EL2) */
+ PIR_EL2, /* Permission Indirection Register 1 (EL2) */
+ POR_EL2, /* Permission Overlay Register 2 (EL2) */
+ SPSR_EL2, /* EL2 saved program status register */
+ ELR_EL2, /* EL2 exception link register */
+ AFSR0_EL2, /* Auxiliary Fault Status Register 0 (EL2) */
+ AFSR1_EL2, /* Auxiliary Fault Status Register 1 (EL2) */
+ ESR_EL2, /* Exception Syndrome Register (EL2) */
+ FAR_EL2, /* Fault Address Register (EL2) */
+ HPFAR_EL2, /* Hypervisor IPA Fault Address Register */
+ MAIR_EL2, /* Memory Attribute Indirection Register (EL2) */
+ AMAIR_EL2, /* Auxiliary Memory Attribute Indirection Register (EL2) */
+ VBAR_EL2, /* Vector Base Address Register (EL2) */
+ RVBAR_EL2, /* Reset Vector Base Address Register */
+ CONTEXTIDR_EL2, /* Context ID Register (EL2) */
+ SP_EL2, /* EL2 Stack Pointer */
+ CNTHP_CTL_EL2,
+ CNTHP_CVAL_EL2,
+ CNTHV_CTL_EL2,
+ CNTHV_CVAL_EL2,
+
+ /* Anything from this can be RES0/RES1 sanitised */
+ MARKER(__SANITISED_REG_START__),
+ TCR2_EL2, /* Extended Translation Control Register (EL2) */
+ SCTLR2_EL2, /* System Control Register 2 (EL2) */
+ MDCR_EL2, /* Monitor Debug Configuration Register (EL2) */
+ CNTHCTL_EL2, /* Counter-timer Hypervisor Control register */
+
+ /* Any VNCR-capable reg goes after this point */
+ MARKER(__VNCR_START__),
+
+ VNCR(SCTLR_EL1),/* System Control Register */
+ VNCR(ACTLR_EL1),/* Auxiliary Control Register */
+ VNCR(CPACR_EL1),/* Coprocessor Access Control */
+ VNCR(ZCR_EL1), /* SVE Control */
+ VNCR(TTBR0_EL1),/* Translation Table Base Register 0 */
+ VNCR(TTBR1_EL1),/* Translation Table Base Register 1 */
+ VNCR(TCR_EL1), /* Translation Control Register */
+ VNCR(TCR2_EL1), /* Extended Translation Control Register */
+ VNCR(SCTLR2_EL1), /* System Control Register 2 */
+ VNCR(ESR_EL1), /* Exception Syndrome Register */
+ VNCR(AFSR0_EL1),/* Auxiliary Fault Status Register 0 */
+ VNCR(AFSR1_EL1),/* Auxiliary Fault Status Register 1 */
+ VNCR(FAR_EL1), /* Fault Address Register */
+ VNCR(MAIR_EL1), /* Memory Attribute Indirection Register */
+ VNCR(VBAR_EL1), /* Vector Base Address Register */
+ VNCR(CONTEXTIDR_EL1), /* Context ID Register */
+ VNCR(AMAIR_EL1),/* Aux Memory Attribute Indirection Register */
+ VNCR(MDSCR_EL1),/* Monitor Debug System Control Register */
+ VNCR(ELR_EL1),
+ VNCR(SP_EL1),
+ VNCR(SPSR_EL1),
+ VNCR(TFSR_EL1), /* Tag Fault Status Register (EL1) */
+ VNCR(VPIDR_EL2),/* Virtualization Processor ID Register */
+ VNCR(VMPIDR_EL2),/* Virtualization Multiprocessor ID Register */
+ VNCR(HCR_EL2), /* Hypervisor Configuration Register */
+ VNCR(HSTR_EL2), /* Hypervisor System Trap Register */
+ VNCR(VTTBR_EL2),/* Virtualization Translation Table Base Register */
+ VNCR(VTCR_EL2), /* Virtualization Translation Control Register */
+ VNCR(TPIDR_EL2),/* EL2 Software Thread ID Register */
+ VNCR(HCRX_EL2), /* Extended Hypervisor Configuration Register */
+
+ /* Permission Indirection Extension registers */
+ VNCR(PIR_EL1), /* Permission Indirection Register 1 (EL1) */
+ VNCR(PIRE0_EL1), /* Permission Indirection Register 0 (EL1) */
+
+ VNCR(POR_EL1), /* Permission Overlay Register 1 (EL1) */
+
+ /* FEAT_RAS registers */
+ VNCR(VDISR_EL2),
+ VNCR(VSESR_EL2),
+
+ VNCR(HFGRTR_EL2),
+ VNCR(HFGWTR_EL2),
+ VNCR(HFGITR_EL2),
+ VNCR(HDFGRTR_EL2),
+ VNCR(HDFGWTR_EL2),
+ VNCR(HAFGRTR_EL2),
+ VNCR(HFGRTR2_EL2),
+ VNCR(HFGWTR2_EL2),
+ VNCR(HFGITR2_EL2),
+ VNCR(HDFGRTR2_EL2),
+ VNCR(HDFGWTR2_EL2),
+
+ VNCR(VNCR_EL2),
+
+ VNCR(CNTVOFF_EL2),
+ VNCR(CNTV_CVAL_EL0),
+ VNCR(CNTV_CTL_EL0),
+ VNCR(CNTP_CVAL_EL0),
+ VNCR(CNTP_CTL_EL0),
+
+ VNCR(ICH_LR0_EL2),
+ VNCR(ICH_LR1_EL2),
+ VNCR(ICH_LR2_EL2),
+ VNCR(ICH_LR3_EL2),
+ VNCR(ICH_LR4_EL2),
+ VNCR(ICH_LR5_EL2),
+ VNCR(ICH_LR6_EL2),
+ VNCR(ICH_LR7_EL2),
+ VNCR(ICH_LR8_EL2),
+ VNCR(ICH_LR9_EL2),
+ VNCR(ICH_LR10_EL2),
+ VNCR(ICH_LR11_EL2),
+ VNCR(ICH_LR12_EL2),
+ VNCR(ICH_LR13_EL2),
+ VNCR(ICH_LR14_EL2),
+ VNCR(ICH_LR15_EL2),
+
+ VNCR(ICH_AP0R0_EL2),
+ VNCR(ICH_AP0R1_EL2),
+ VNCR(ICH_AP0R2_EL2),
+ VNCR(ICH_AP0R3_EL2),
+ VNCR(ICH_AP1R0_EL2),
+ VNCR(ICH_AP1R1_EL2),
+ VNCR(ICH_AP1R2_EL2),
+ VNCR(ICH_AP1R3_EL2),
+ VNCR(ICH_HCR_EL2),
+ VNCR(ICH_VMCR_EL2),
+
NR_SYS_REGS /* Nothing after this line! */
};
+struct kvm_sysreg_masks {
+ struct {
+ u64 res0;
+ u64 res1;
+ } mask[NR_SYS_REGS - __SANITISED_REG_START__];
+};
+
+struct fgt_masks {
+ const char *str;
+ u64 mask;
+ u64 nmask;
+ u64 res0;
+};
+
+extern struct fgt_masks hfgrtr_masks;
+extern struct fgt_masks hfgwtr_masks;
+extern struct fgt_masks hfgitr_masks;
+extern struct fgt_masks hdfgrtr_masks;
+extern struct fgt_masks hdfgwtr_masks;
+extern struct fgt_masks hafgrtr_masks;
+extern struct fgt_masks hfgrtr2_masks;
+extern struct fgt_masks hfgwtr2_masks;
+extern struct fgt_masks hfgitr2_masks;
+extern struct fgt_masks hdfgrtr2_masks;
+extern struct fgt_masks hdfgwtr2_masks;
+
+extern struct fgt_masks kvm_nvhe_sym(hfgrtr_masks);
+extern struct fgt_masks kvm_nvhe_sym(hfgwtr_masks);
+extern struct fgt_masks kvm_nvhe_sym(hfgitr_masks);
+extern struct fgt_masks kvm_nvhe_sym(hdfgrtr_masks);
+extern struct fgt_masks kvm_nvhe_sym(hdfgwtr_masks);
+extern struct fgt_masks kvm_nvhe_sym(hafgrtr_masks);
+extern struct fgt_masks kvm_nvhe_sym(hfgrtr2_masks);
+extern struct fgt_masks kvm_nvhe_sym(hfgwtr2_masks);
+extern struct fgt_masks kvm_nvhe_sym(hfgitr2_masks);
+extern struct fgt_masks kvm_nvhe_sym(hdfgrtr2_masks);
+extern struct fgt_masks kvm_nvhe_sym(hdfgwtr2_masks);
+
struct kvm_cpu_context {
struct user_pt_regs regs; /* sp = sp_el0 */
@@ -268,15 +673,95 @@ struct kvm_cpu_context {
u64 sys_regs[NR_SYS_REGS];
struct kvm_vcpu *__hyp_running_vcpu;
+
+ /* This pointer has to be 4kB aligned. */
+ u64 *vncr_array;
+};
+
+struct cpu_sve_state {
+ __u64 zcr_el1;
+
+ /*
+ * Ordering is important since __sve_save_state/__sve_restore_state
+ * relies on it.
+ */
+ __u32 fpsr;
+ __u32 fpcr;
+
+ /* Must be SVE_VQ_BYTES (128 bit) aligned. */
+ __u8 sve_regs[];
};
+/*
+ * This structure is instantiated on a per-CPU basis, and contains
+ * data that is:
+ *
+ * - tied to a single physical CPU, and
+ * - either have a lifetime that does not extend past vcpu_put()
+ * - or is an invariant for the lifetime of the system
+ *
+ * Use host_data_ptr(field) as a way to access a pointer to such a
+ * field.
+ */
struct kvm_host_data {
+#define KVM_HOST_DATA_FLAG_HAS_SPE 0
+#define KVM_HOST_DATA_FLAG_HAS_TRBE 1
+#define KVM_HOST_DATA_FLAG_TRBE_ENABLED 4
+#define KVM_HOST_DATA_FLAG_EL1_TRACING_CONFIGURED 5
+#define KVM_HOST_DATA_FLAG_VCPU_IN_HYP_CONTEXT 6
+#define KVM_HOST_DATA_FLAG_L1_VNCR_MAPPED 7
+#define KVM_HOST_DATA_FLAG_HAS_BRBE 8
+ unsigned long flags;
+
struct kvm_cpu_context host_ctxt;
+
+ /*
+ * Hyp VA.
+ * sve_state is only used in pKVM and if system_supports_sve().
+ */
+ struct cpu_sve_state *sve_state;
+
+ /* Used by pKVM only. */
+ u64 fpmr;
+
+ /* Ownership of the FP regs */
+ enum {
+ FP_STATE_FREE,
+ FP_STATE_HOST_OWNED,
+ FP_STATE_GUEST_OWNED,
+ } fp_owner;
+
+ /*
+ * host_debug_state contains the host registers which are
+ * saved and restored during world switches.
+ */
+ struct {
+ /* {Break,watch}point registers */
+ struct kvm_guest_debug_arch regs;
+ /* Statistical profiling extension */
+ u64 pmscr_el1;
+ /* Self-hosted trace */
+ u64 trfcr_el1;
+ /* Values of trap registers for the host before guest entry. */
+ u64 mdcr_el2;
+ u64 brbcr_el1;
+ } host_debug_state;
+
+ /* Guest trace filter value */
+ u64 trfcr_while_in_guest;
+
+ /* Number of programmable event counters (PMCR_EL0.N) for this CPU */
+ unsigned int nr_event_counters;
+
+ /* Number of debug breakpoints/watchpoints for this CPU (minus 1) */
+ unsigned int debug_brps;
+ unsigned int debug_wrps;
};
struct kvm_host_psci_config {
/* PSCI version used by host. */
u32 version;
+ u32 smccc_version;
/* Function IDs used by host if version is v0.1. */
struct psci_0_1_function_ids function_ids_0_1;
@@ -303,88 +788,87 @@ struct vcpu_reset_state {
bool reset;
};
+struct vncr_tlb;
+
struct kvm_vcpu_arch {
struct kvm_cpu_context ctxt;
- /* Guest floating point state */
+ /*
+ * Guest floating point state
+ *
+ * The architecture has two main floating point extensions,
+ * the original FPSIMD and SVE. These have overlapping
+ * register views, with the FPSIMD V registers occupying the
+ * low 128 bits of the SVE Z registers. When the core
+ * floating point code saves the register state of a task it
+ * records which view it saved in fp_type.
+ */
void *sve_state;
+ enum fp_type fp_type;
unsigned int sve_max_vl;
- u64 svcr;
/* Stage 2 paging state used by the hardware on next switch */
struct kvm_s2_mmu *hw_mmu;
/* Values of trap registers for the guest. */
u64 hcr_el2;
+ u64 hcrx_el2;
u64 mdcr_el2;
- u64 cptr_el2;
-
- /* Values of trap registers for the host before guest entry. */
- u64 mdcr_el2_host;
/* Exception Information */
struct kvm_vcpu_fault_info fault;
- /* Miscellaneous vcpu state flags */
- u64 flags;
+ /* Configuration flags, set once and for all before the vcpu can run */
+ u8 cflags;
+
+ /* Input flags to the hypervisor code, potentially cleared after use */
+ u8 iflags;
+
+ /* State flags for kernel bookkeeping, unused by the hypervisor code */
+ u16 sflags;
+
+ /*
+ * Don't run the guest (internal implementation need).
+ *
+ * Contrary to the flags above, this is set/cleared outside of
+ * a vcpu context, and thus cannot be mixed with the flags
+ * themselves (or the flag accesses need to be made atomic).
+ */
+ bool pause;
/*
* We maintain more than a single set of debug registers to support
* debugging the guest from the host and to maintain separate host and
* guest state during world switches. vcpu_debug_state are the debug
- * registers of the vcpu as the guest sees them. host_debug_state are
- * the host registers which are saved and restored during
- * world switches. external_debug_state contains the debug
- * values we want to debug the guest. This is set via the
- * KVM_SET_GUEST_DEBUG ioctl.
+ * registers of the vcpu as the guest sees them.
*
- * debug_ptr points to the set of debug registers that should be loaded
- * onto the hardware when running the guest.
+ * external_debug_state contains the debug values we want to debug the
+ * guest. This is set via the KVM_SET_GUEST_DEBUG ioctl.
*/
- struct kvm_guest_debug_arch *debug_ptr;
struct kvm_guest_debug_arch vcpu_debug_state;
struct kvm_guest_debug_arch external_debug_state;
+ u64 external_mdscr_el1;
- struct user_fpsimd_state *host_fpsimd_state; /* hyp VA */
- struct task_struct *parent_task;
-
- struct {
- /* {Break,watch}point registers */
- struct kvm_guest_debug_arch regs;
- /* Statistical profiling extension */
- u64 pmscr_el1;
- /* Self-hosted trace */
- u64 trfcr_el1;
- } host_debug_state;
+ enum {
+ VCPU_DEBUG_FREE,
+ VCPU_DEBUG_HOST_OWNED,
+ VCPU_DEBUG_GUEST_OWNED,
+ } debug_owner;
/* VGIC state */
struct vgic_cpu vgic_cpu;
struct arch_timer_cpu timer_cpu;
struct kvm_pmu pmu;
- /*
- * Guest registers we preserve during guest debugging.
- *
- * These shadow registers are updated by the kvm_handle_sys_reg
- * trap handler if the guest accesses or updates them while we
- * are using guest debug.
- */
- struct {
- u32 mdscr_el1;
- } guest_debug_preserved;
-
/* vcpu power state */
struct kvm_mp_state mp_state;
-
- /* Don't run the guest (internal implementation need) */
- bool pause;
+ spinlock_t mp_state_lock;
/* Cache some mmu pages needed inside spinlock regions */
struct kvm_mmu_memory_cache mmu_page_cache;
- /* Target CPU and feature flags */
- int target;
- DECLARE_BITMAP(features, KVM_VCPU_MAX_FEATURES);
+ /* Pages to top-up the pKVM/EL2 guest pool */
+ struct kvm_hyp_memcache pkvm_memcache;
/* Virtual SError ESR to restore when HCR_EL2.VSE is set */
u64 vsesr_el2;
@@ -392,101 +876,226 @@ struct kvm_vcpu_arch {
/* Additional reset state */
struct vcpu_reset_state reset_state;
- /* True when deferrable sysregs are loaded on the physical CPU,
- * see kvm_vcpu_load_sysregs_vhe and kvm_vcpu_put_sysregs_vhe. */
- bool sysregs_loaded_on_cpu;
-
/* Guest PV state */
struct {
u64 last_steal;
gpa_t base;
} steal;
+
+ /* Per-vcpu CCSIDR override or NULL */
+ u32 *ccsidr;
+
+ /* Per-vcpu TLB for VNCR_EL2 -- NULL when !NV */
+ struct vncr_tlb *vncr_tlb;
};
+/*
+ * Each 'flag' is composed of a comma-separated triplet:
+ *
+ * - the flag-set it belongs to in the vcpu->arch structure
+ * - the value for that flag
+ * - the mask for that flag
+ *
+ * __vcpu_single_flag() builds such a triplet for a single-bit flag.
+ * unpack_vcpu_flag() extract the flag value from the triplet for
+ * direct use outside of the flag accessors.
+ */
+#define __vcpu_single_flag(_set, _f) _set, (_f), (_f)
+
+#define __unpack_flag(_set, _f, _m) _f
+#define unpack_vcpu_flag(...) __unpack_flag(__VA_ARGS__)
+
+#define __build_check_flag(v, flagset, f, m) \
+ do { \
+ typeof(v->arch.flagset) *_fset; \
+ \
+ /* Check that the flags fit in the mask */ \
+ BUILD_BUG_ON(HWEIGHT(m) != HWEIGHT((f) | (m))); \
+ /* Check that the flags fit in the type */ \
+ BUILD_BUG_ON((sizeof(*_fset) * 8) <= __fls(m)); \
+ } while (0)
+
+#define __vcpu_get_flag(v, flagset, f, m) \
+ ({ \
+ __build_check_flag(v, flagset, f, m); \
+ \
+ READ_ONCE(v->arch.flagset) & (m); \
+ })
+
+/*
+ * Note that the set/clear accessors must be preempt-safe in order to
+ * avoid nesting them with load/put which also manipulate flags...
+ */
+#ifdef __KVM_NVHE_HYPERVISOR__
+/* the nVHE hypervisor is always non-preemptible */
+#define __vcpu_flags_preempt_disable()
+#define __vcpu_flags_preempt_enable()
+#else
+#define __vcpu_flags_preempt_disable() preempt_disable()
+#define __vcpu_flags_preempt_enable() preempt_enable()
+#endif
+
+#define __vcpu_set_flag(v, flagset, f, m) \
+ do { \
+ typeof(v->arch.flagset) *fset; \
+ \
+ __build_check_flag(v, flagset, f, m); \
+ \
+ fset = &v->arch.flagset; \
+ __vcpu_flags_preempt_disable(); \
+ if (HWEIGHT(m) > 1) \
+ *fset &= ~(m); \
+ *fset |= (f); \
+ __vcpu_flags_preempt_enable(); \
+ } while (0)
+
+#define __vcpu_clear_flag(v, flagset, f, m) \
+ do { \
+ typeof(v->arch.flagset) *fset; \
+ \
+ __build_check_flag(v, flagset, f, m); \
+ \
+ fset = &v->arch.flagset; \
+ __vcpu_flags_preempt_disable(); \
+ *fset &= ~(m); \
+ __vcpu_flags_preempt_enable(); \
+ } while (0)
+
+#define __vcpu_test_and_clear_flag(v, flagset, f, m) \
+ ({ \
+ typeof(v->arch.flagset) set; \
+ \
+ set = __vcpu_get_flag(v, flagset, f, m); \
+ __vcpu_clear_flag(v, flagset, f, m); \
+ \
+ set; \
+ })
+
+#define vcpu_get_flag(v, ...) __vcpu_get_flag((v), __VA_ARGS__)
+#define vcpu_set_flag(v, ...) __vcpu_set_flag((v), __VA_ARGS__)
+#define vcpu_clear_flag(v, ...) __vcpu_clear_flag((v), __VA_ARGS__)
+#define vcpu_test_and_clear_flag(v, ...) \
+ __vcpu_test_and_clear_flag((v), __VA_ARGS__)
+
+/* KVM_ARM_VCPU_INIT completed */
+#define VCPU_INITIALIZED __vcpu_single_flag(cflags, BIT(0))
+/* SVE config completed */
+#define VCPU_SVE_FINALIZED __vcpu_single_flag(cflags, BIT(1))
+/* pKVM VCPU setup completed */
+#define VCPU_PKVM_FINALIZED __vcpu_single_flag(cflags, BIT(2))
+
+/* Exception pending */
+#define PENDING_EXCEPTION __vcpu_single_flag(iflags, BIT(0))
+/*
+ * PC increment. Overlaps with EXCEPT_MASK on purpose so that it can't
+ * be set together with an exception...
+ */
+#define INCREMENT_PC __vcpu_single_flag(iflags, BIT(1))
+/* Target EL/MODE (not a single flag, but let's abuse the macro) */
+#define EXCEPT_MASK __vcpu_single_flag(iflags, GENMASK(3, 1))
+
+/* Helpers to encode exceptions with minimum fuss */
+#define __EXCEPT_MASK_VAL unpack_vcpu_flag(EXCEPT_MASK)
+#define __EXCEPT_SHIFT __builtin_ctzl(__EXCEPT_MASK_VAL)
+#define __vcpu_except_flags(_f) iflags, (_f << __EXCEPT_SHIFT), __EXCEPT_MASK_VAL
+
+/*
+ * When PENDING_EXCEPTION is set, EXCEPT_MASK can take the following
+ * values:
+ *
+ * For AArch32 EL1:
+ */
+#define EXCEPT_AA32_UND __vcpu_except_flags(0)
+#define EXCEPT_AA32_IABT __vcpu_except_flags(1)
+#define EXCEPT_AA32_DABT __vcpu_except_flags(2)
+/* For AArch64: */
+#define EXCEPT_AA64_EL1_SYNC __vcpu_except_flags(0)
+#define EXCEPT_AA64_EL1_IRQ __vcpu_except_flags(1)
+#define EXCEPT_AA64_EL1_FIQ __vcpu_except_flags(2)
+#define EXCEPT_AA64_EL1_SERR __vcpu_except_flags(3)
+/* For AArch64 with NV: */
+#define EXCEPT_AA64_EL2_SYNC __vcpu_except_flags(4)
+#define EXCEPT_AA64_EL2_IRQ __vcpu_except_flags(5)
+#define EXCEPT_AA64_EL2_FIQ __vcpu_except_flags(6)
+#define EXCEPT_AA64_EL2_SERR __vcpu_except_flags(7)
+
+/* Physical CPU not in supported_cpus */
+#define ON_UNSUPPORTED_CPU __vcpu_single_flag(sflags, BIT(0))
+/* WFIT instruction trapped */
+#define IN_WFIT __vcpu_single_flag(sflags, BIT(1))
+/* vcpu system registers loaded on physical CPU */
+#define SYSREGS_ON_CPU __vcpu_single_flag(sflags, BIT(2))
+/* Software step state is Active-pending for external debug */
+#define HOST_SS_ACTIVE_PENDING __vcpu_single_flag(sflags, BIT(3))
+/* Software step state is Active pending for guest debug */
+#define GUEST_SS_ACTIVE_PENDING __vcpu_single_flag(sflags, BIT(4))
+/* PMUSERENR for the guest EL0 is on physical CPU */
+#define PMUSERENR_ON_CPU __vcpu_single_flag(sflags, BIT(5))
+/* WFI instruction trapped */
+#define IN_WFI __vcpu_single_flag(sflags, BIT(6))
+/* KVM is currently emulating a nested ERET */
+#define IN_NESTED_ERET __vcpu_single_flag(sflags, BIT(7))
+/* SError pending for nested guest */
+#define NESTED_SERROR_PENDING __vcpu_single_flag(sflags, BIT(8))
+
+
/* Pointer to the vcpu's SVE FFR for sve_{save,load}_state() */
#define vcpu_sve_pffr(vcpu) (kern_hyp_va((vcpu)->arch.sve_state) + \
sve_ffr_offset((vcpu)->arch.sve_max_vl))
#define vcpu_sve_max_vq(vcpu) sve_vq_from_vl((vcpu)->arch.sve_max_vl)
-#define vcpu_sve_state_size(vcpu) ({ \
+#define vcpu_sve_zcr_elx(vcpu) \
+ (unlikely(is_hyp_ctxt(vcpu)) ? ZCR_EL2 : ZCR_EL1)
+
+#define sve_state_size_from_vl(sve_max_vl) ({ \
size_t __size_ret; \
- unsigned int __vcpu_vq; \
+ unsigned int __vq; \
\
- if (WARN_ON(!sve_vl_valid((vcpu)->arch.sve_max_vl))) { \
+ if (WARN_ON(!sve_vl_valid(sve_max_vl))) { \
__size_ret = 0; \
} else { \
- __vcpu_vq = vcpu_sve_max_vq(vcpu); \
- __size_ret = SVE_SIG_REGS_SIZE(__vcpu_vq); \
+ __vq = sve_vq_from_vl(sve_max_vl); \
+ __size_ret = SVE_SIG_REGS_SIZE(__vq); \
} \
\
__size_ret; \
})
-/* vcpu_arch flags field values: */
-#define KVM_ARM64_DEBUG_DIRTY (1 << 0)
-#define KVM_ARM64_FP_ENABLED (1 << 1) /* guest FP regs loaded */
-#define KVM_ARM64_FP_HOST (1 << 2) /* host FP regs loaded */
-#define KVM_ARM64_HOST_SVE_ENABLED (1 << 4) /* SVE enabled for EL0 */
-#define KVM_ARM64_GUEST_HAS_SVE (1 << 5) /* SVE exposed to guest */
-#define KVM_ARM64_VCPU_SVE_FINALIZED (1 << 6) /* SVE config completed */
-#define KVM_ARM64_GUEST_HAS_PTRAUTH (1 << 7) /* PTRAUTH exposed to guest */
-#define KVM_ARM64_PENDING_EXCEPTION (1 << 8) /* Exception pending */
-/*
- * Overlaps with KVM_ARM64_EXCEPT_MASK on purpose so that it can't be
- * set together with an exception...
- */
-#define KVM_ARM64_INCREMENT_PC (1 << 9) /* Increment PC */
-#define KVM_ARM64_EXCEPT_MASK (7 << 9) /* Target EL/MODE */
-/*
- * When KVM_ARM64_PENDING_EXCEPTION is set, KVM_ARM64_EXCEPT_MASK can
- * take the following values:
- *
- * For AArch32 EL1:
- */
-#define KVM_ARM64_EXCEPT_AA32_UND (0 << 9)
-#define KVM_ARM64_EXCEPT_AA32_IABT (1 << 9)
-#define KVM_ARM64_EXCEPT_AA32_DABT (2 << 9)
-/* For AArch64: */
-#define KVM_ARM64_EXCEPT_AA64_ELx_SYNC (0 << 9)
-#define KVM_ARM64_EXCEPT_AA64_ELx_IRQ (1 << 9)
-#define KVM_ARM64_EXCEPT_AA64_ELx_FIQ (2 << 9)
-#define KVM_ARM64_EXCEPT_AA64_ELx_SERR (3 << 9)
-#define KVM_ARM64_EXCEPT_AA64_EL1 (0 << 11)
-#define KVM_ARM64_EXCEPT_AA64_EL2 (1 << 11)
-
-#define KVM_ARM64_DEBUG_STATE_SAVE_SPE (1 << 12) /* Save SPE context if active */
-#define KVM_ARM64_DEBUG_STATE_SAVE_TRBE (1 << 13) /* Save TRBE context if active */
-#define KVM_ARM64_FP_FOREIGN_FPSTATE (1 << 14)
-#define KVM_ARM64_ON_UNSUPPORTED_CPU (1 << 15) /* Physical CPU not in supported_cpus */
-#define KVM_ARM64_HOST_SME_ENABLED (1 << 16) /* SME enabled for EL0 */
-#define KVM_ARM64_WFIT (1 << 17) /* WFIT instruction trapped */
+#define vcpu_sve_state_size(vcpu) sve_state_size_from_vl((vcpu)->arch.sve_max_vl)
#define KVM_GUESTDBG_VALID_MASK (KVM_GUESTDBG_ENABLE | \
KVM_GUESTDBG_USE_SW_BP | \
KVM_GUESTDBG_USE_HW | \
KVM_GUESTDBG_SINGLESTEP)
-#define vcpu_has_sve(vcpu) (system_supports_sve() && \
- ((vcpu)->arch.flags & KVM_ARM64_GUEST_HAS_SVE))
+#define kvm_has_sve(kvm) (system_supports_sve() && \
+ test_bit(KVM_ARCH_FLAG_GUEST_HAS_SVE, &(kvm)->arch.flags))
+
+#ifdef __KVM_NVHE_HYPERVISOR__
+#define vcpu_has_sve(vcpu) kvm_has_sve(kern_hyp_va((vcpu)->kvm))
+#else
+#define vcpu_has_sve(vcpu) kvm_has_sve((vcpu)->kvm)
+#endif
#ifdef CONFIG_ARM64_PTR_AUTH
#define vcpu_has_ptrauth(vcpu) \
((cpus_have_final_cap(ARM64_HAS_ADDRESS_AUTH) || \
cpus_have_final_cap(ARM64_HAS_GENERIC_AUTH)) && \
- (vcpu)->arch.flags & KVM_ARM64_GUEST_HAS_PTRAUTH)
+ (vcpu_has_feature(vcpu, KVM_ARM_VCPU_PTRAUTH_ADDRESS) || \
+ vcpu_has_feature(vcpu, KVM_ARM_VCPU_PTRAUTH_GENERIC)))
#else
#define vcpu_has_ptrauth(vcpu) false
#endif
#define vcpu_on_unsupported_cpu(vcpu) \
- ((vcpu)->arch.flags & KVM_ARM64_ON_UNSUPPORTED_CPU)
+ vcpu_get_flag(vcpu, ON_UNSUPPORTED_CPU)
#define vcpu_set_on_unsupported_cpu(vcpu) \
- ((vcpu)->arch.flags |= KVM_ARM64_ON_UNSUPPORTED_CPU)
+ vcpu_set_flag(vcpu, ON_UNSUPPORTED_CPU)
#define vcpu_clear_on_unsupported_cpu(vcpu) \
- ((vcpu)->arch.flags &= ~KVM_ARM64_ON_UNSUPPORTED_CPU)
+ vcpu_clear_flag(vcpu, ON_UNSUPPORTED_CPU)
#define vcpu_gp_regs(v) (&(v)->arch.ctxt.regs)
@@ -496,106 +1105,63 @@ struct kvm_vcpu_arch {
* accessed by a running VCPU. For example, for userspace access or
* for system registers that are never context switched, but only
* emulated.
+ *
+ * Don't bother with VNCR-based accesses in the nVHE code, it has no
+ * business dealing with NV.
*/
-#define __ctxt_sys_reg(c,r) (&(c)->sys_regs[(r)])
-
-#define ctxt_sys_reg(c,r) (*__ctxt_sys_reg(c,r))
-
-#define __vcpu_sys_reg(v,r) (ctxt_sys_reg(&(v)->arch.ctxt, (r)))
+static inline u64 *___ctxt_sys_reg(const struct kvm_cpu_context *ctxt, int r)
+{
+#if !defined (__KVM_NVHE_HYPERVISOR__)
+ if (unlikely(cpus_have_final_cap(ARM64_HAS_NESTED_VIRT) &&
+ r >= __VNCR_START__ && ctxt->vncr_array))
+ return &ctxt->vncr_array[r - __VNCR_START__];
+#endif
+ return (u64 *)&ctxt->sys_regs[r];
+}
-u64 vcpu_read_sys_reg(const struct kvm_vcpu *vcpu, int reg);
-void vcpu_write_sys_reg(struct kvm_vcpu *vcpu, u64 val, int reg);
+#define __ctxt_sys_reg(c,r) \
+ ({ \
+ BUILD_BUG_ON(__builtin_constant_p(r) && \
+ (r) >= NR_SYS_REGS); \
+ ___ctxt_sys_reg(c, r); \
+ })
-static inline bool __vcpu_read_sys_reg_from_cpu(int reg, u64 *val)
-{
- /*
- * *** VHE ONLY ***
- *
- * System registers listed in the switch are not saved on every
- * exit from the guest but are only saved on vcpu_put.
- *
- * Note that MPIDR_EL1 for the guest is set by KVM via VMPIDR_EL2 but
- * should never be listed below, because the guest cannot modify its
- * own MPIDR_EL1 and MPIDR_EL1 is accessed for VCPU A from VCPU B's
- * thread when emulating cross-VCPU communication.
- */
- if (!has_vhe())
- return false;
+#define ctxt_sys_reg(c,r) (*__ctxt_sys_reg(c,r))
- switch (reg) {
- case CSSELR_EL1: *val = read_sysreg_s(SYS_CSSELR_EL1); break;
- case SCTLR_EL1: *val = read_sysreg_s(SYS_SCTLR_EL12); break;
- case CPACR_EL1: *val = read_sysreg_s(SYS_CPACR_EL12); break;
- case TTBR0_EL1: *val = read_sysreg_s(SYS_TTBR0_EL12); break;
- case TTBR1_EL1: *val = read_sysreg_s(SYS_TTBR1_EL12); break;
- case TCR_EL1: *val = read_sysreg_s(SYS_TCR_EL12); break;
- case ESR_EL1: *val = read_sysreg_s(SYS_ESR_EL12); break;
- case AFSR0_EL1: *val = read_sysreg_s(SYS_AFSR0_EL12); break;
- case AFSR1_EL1: *val = read_sysreg_s(SYS_AFSR1_EL12); break;
- case FAR_EL1: *val = read_sysreg_s(SYS_FAR_EL12); break;
- case MAIR_EL1: *val = read_sysreg_s(SYS_MAIR_EL12); break;
- case VBAR_EL1: *val = read_sysreg_s(SYS_VBAR_EL12); break;
- case CONTEXTIDR_EL1: *val = read_sysreg_s(SYS_CONTEXTIDR_EL12);break;
- case TPIDR_EL0: *val = read_sysreg_s(SYS_TPIDR_EL0); break;
- case TPIDRRO_EL0: *val = read_sysreg_s(SYS_TPIDRRO_EL0); break;
- case TPIDR_EL1: *val = read_sysreg_s(SYS_TPIDR_EL1); break;
- case AMAIR_EL1: *val = read_sysreg_s(SYS_AMAIR_EL12); break;
- case CNTKCTL_EL1: *val = read_sysreg_s(SYS_CNTKCTL_EL12); break;
- case ELR_EL1: *val = read_sysreg_s(SYS_ELR_EL12); break;
- case PAR_EL1: *val = read_sysreg_par(); break;
- case DACR32_EL2: *val = read_sysreg_s(SYS_DACR32_EL2); break;
- case IFSR32_EL2: *val = read_sysreg_s(SYS_IFSR32_EL2); break;
- case DBGVCR32_EL2: *val = read_sysreg_s(SYS_DBGVCR32_EL2); break;
- default: return false;
- }
+u64 kvm_vcpu_apply_reg_masks(const struct kvm_vcpu *, enum vcpu_sysreg, u64);
- return true;
-}
+#define __vcpu_assign_sys_reg(v, r, val) \
+ do { \
+ const struct kvm_cpu_context *ctxt = &(v)->arch.ctxt; \
+ u64 __v = (val); \
+ if (vcpu_has_nv((v)) && (r) >= __SANITISED_REG_START__) \
+ __v = kvm_vcpu_apply_reg_masks((v), (r), __v); \
+ \
+ ctxt_sys_reg(ctxt, (r)) = __v; \
+ } while (0)
-static inline bool __vcpu_write_sys_reg_to_cpu(u64 val, int reg)
-{
- /*
- * *** VHE ONLY ***
- *
- * System registers listed in the switch are not restored on every
- * entry to the guest but are only restored on vcpu_load.
- *
- * Note that MPIDR_EL1 for the guest is set by KVM via VMPIDR_EL2 but
- * should never be listed below, because the MPIDR should only be set
- * once, before running the VCPU, and never changed later.
- */
- if (!has_vhe())
- return false;
+#define __vcpu_rmw_sys_reg(v, r, op, val) \
+ do { \
+ const struct kvm_cpu_context *ctxt = &(v)->arch.ctxt; \
+ u64 __v = ctxt_sys_reg(ctxt, (r)); \
+ __v op (val); \
+ if (vcpu_has_nv((v)) && (r) >= __SANITISED_REG_START__) \
+ __v = kvm_vcpu_apply_reg_masks((v), (r), __v); \
+ \
+ ctxt_sys_reg(ctxt, (r)) = __v; \
+ } while (0)
- switch (reg) {
- case CSSELR_EL1: write_sysreg_s(val, SYS_CSSELR_EL1); break;
- case SCTLR_EL1: write_sysreg_s(val, SYS_SCTLR_EL12); break;
- case CPACR_EL1: write_sysreg_s(val, SYS_CPACR_EL12); break;
- case TTBR0_EL1: write_sysreg_s(val, SYS_TTBR0_EL12); break;
- case TTBR1_EL1: write_sysreg_s(val, SYS_TTBR1_EL12); break;
- case TCR_EL1: write_sysreg_s(val, SYS_TCR_EL12); break;
- case ESR_EL1: write_sysreg_s(val, SYS_ESR_EL12); break;
- case AFSR0_EL1: write_sysreg_s(val, SYS_AFSR0_EL12); break;
- case AFSR1_EL1: write_sysreg_s(val, SYS_AFSR1_EL12); break;
- case FAR_EL1: write_sysreg_s(val, SYS_FAR_EL12); break;
- case MAIR_EL1: write_sysreg_s(val, SYS_MAIR_EL12); break;
- case VBAR_EL1: write_sysreg_s(val, SYS_VBAR_EL12); break;
- case CONTEXTIDR_EL1: write_sysreg_s(val, SYS_CONTEXTIDR_EL12);break;
- case TPIDR_EL0: write_sysreg_s(val, SYS_TPIDR_EL0); break;
- case TPIDRRO_EL0: write_sysreg_s(val, SYS_TPIDRRO_EL0); break;
- case TPIDR_EL1: write_sysreg_s(val, SYS_TPIDR_EL1); break;
- case AMAIR_EL1: write_sysreg_s(val, SYS_AMAIR_EL12); break;
- case CNTKCTL_EL1: write_sysreg_s(val, SYS_CNTKCTL_EL12); break;
- case ELR_EL1: write_sysreg_s(val, SYS_ELR_EL12); break;
- case PAR_EL1: write_sysreg_s(val, SYS_PAR_EL1); break;
- case DACR32_EL2: write_sysreg_s(val, SYS_DACR32_EL2); break;
- case IFSR32_EL2: write_sysreg_s(val, SYS_IFSR32_EL2); break;
- case DBGVCR32_EL2: write_sysreg_s(val, SYS_DBGVCR32_EL2); break;
- default: return false;
- }
+#define __vcpu_sys_reg(v,r) \
+ ({ \
+ const struct kvm_cpu_context *ctxt = &(v)->arch.ctxt; \
+ u64 __v = ctxt_sys_reg(ctxt, (r)); \
+ if (vcpu_has_nv((v)) && (r) >= __SANITISED_REG_START__) \
+ __v = kvm_vcpu_apply_reg_masks((v), (r), __v); \
+ __v; \
+ })
- return true;
-}
+u64 vcpu_read_sys_reg(const struct kvm_vcpu *, enum vcpu_sysreg);
+void vcpu_write_sys_reg(struct kvm_vcpu *, u64, enum vcpu_sysreg);
struct kvm_vm_stat {
struct kvm_vm_stat_generic generic;
@@ -612,7 +1178,6 @@ struct kvm_vcpu_stat {
u64 exits;
};
-void kvm_vcpu_preferred_target(struct kvm_vcpu_init *init);
unsigned long kvm_arm_num_regs(struct kvm_vcpu *vcpu);
int kvm_arm_copy_reg_indices(struct kvm_vcpu *vcpu, u64 __user *indices);
int kvm_arm_get_reg(struct kvm_vcpu *vcpu, const struct kvm_one_reg *reg);
@@ -620,8 +1185,6 @@ int kvm_arm_set_reg(struct kvm_vcpu *vcpu, const struct kvm_one_reg *reg);
unsigned long kvm_arm_num_sys_reg_descs(struct kvm_vcpu *vcpu);
int kvm_arm_copy_sys_reg_indices(struct kvm_vcpu *vcpu, u64 __user *uindices);
-int kvm_arm_sys_reg_get_reg(struct kvm_vcpu *vcpu, const struct kvm_one_reg *);
-int kvm_arm_sys_reg_set_reg(struct kvm_vcpu *vcpu, const struct kvm_one_reg *);
int __kvm_arm_vcpu_get_events(struct kvm_vcpu *vcpu,
struct kvm_vcpu_events *events);
@@ -629,12 +1192,10 @@ int __kvm_arm_vcpu_get_events(struct kvm_vcpu *vcpu,
int __kvm_arm_vcpu_set_events(struct kvm_vcpu *vcpu,
struct kvm_vcpu_events *events);
-#define KVM_ARCH_WANT_MMU_NOTIFIER
-
void kvm_arm_halt_guest(struct kvm *kvm);
void kvm_arm_resume_guest(struct kvm *kvm);
-#define vcpu_has_run_once(vcpu) !!rcu_access_pointer((vcpu)->pid)
+#define vcpu_has_run_once(vcpu) (!!READ_ONCE((vcpu)->pid))
#ifndef __KVM_NVHE_HYPERVISOR__
#define kvm_call_hyp_nvhe(f, ...) \
@@ -649,9 +1210,8 @@ void kvm_arm_resume_guest(struct kvm *kvm);
})
/*
- * The couple of isb() below are there to guarantee the same behaviour
- * on VHE as on !VHE, where the eret to EL1 acts as a context
- * synchronization event.
+ * The isb() below is there to guarantee the same behaviour on VHE as on !VHE,
+ * where the eret to EL1 acts as a context synchronization event.
*/
#define kvm_call_hyp(f, ...) \
do { \
@@ -669,7 +1229,6 @@ void kvm_arm_resume_guest(struct kvm *kvm);
\
if (has_vhe()) { \
ret = f(__VA_ARGS__); \
- isb(); \
} else { \
ret = kvm_call_hyp_nvhe(f, ##__VA_ARGS__); \
} \
@@ -682,8 +1241,6 @@ void kvm_arm_resume_guest(struct kvm *kvm);
#define kvm_call_hyp_nvhe(f, ...) f(__VA_ARGS__)
#endif /* __KVM_NVHE_HYPERVISOR__ */
-void force_vm_exit(const cpumask_t *mask);
-
int handle_exit(struct kvm_vcpu *vcpu, int exception_index);
void handle_exit_early(struct kvm_vcpu *vcpu, int exception_index);
@@ -695,9 +1252,16 @@ int kvm_handle_cp15_64(struct kvm_vcpu *vcpu);
int kvm_handle_sys_reg(struct kvm_vcpu *vcpu);
int kvm_handle_cp10_id(struct kvm_vcpu *vcpu);
+void kvm_sys_regs_create_debugfs(struct kvm *kvm);
void kvm_reset_sys_regs(struct kvm_vcpu *vcpu);
-int kvm_sys_reg_table_init(void);
+int __init kvm_sys_reg_table_init(void);
+struct sys_reg_desc;
+int __init populate_sysreg_config(const struct sys_reg_desc *sr,
+ unsigned int idx);
+int __init populate_nv_trap_config(void);
+
+void kvm_calculate_traps(struct kvm_vcpu *vcpu);
/* MMIO helpers */
void kvm_mmio_write_buf(void *buf, unsigned int len, unsigned long data);
@@ -728,28 +1292,71 @@ int kvm_arm_pvtime_get_attr(struct kvm_vcpu *vcpu,
int kvm_arm_pvtime_has_attr(struct kvm_vcpu *vcpu,
struct kvm_device_attr *attr);
-extern unsigned int kvm_arm_vmid_bits;
-int kvm_arm_vmid_alloc_init(void);
-void kvm_arm_vmid_alloc_free(void);
+extern unsigned int __ro_after_init kvm_arm_vmid_bits;
+int __init kvm_arm_vmid_alloc_init(void);
+void __init kvm_arm_vmid_alloc_free(void);
void kvm_arm_vmid_update(struct kvm_vmid *kvm_vmid);
void kvm_arm_vmid_clear_active(void);
static inline void kvm_arm_pvtime_vcpu_init(struct kvm_vcpu_arch *vcpu_arch)
{
- vcpu_arch->steal.base = GPA_INVALID;
+ vcpu_arch->steal.base = INVALID_GPA;
}
static inline bool kvm_arm_is_pvtime_enabled(struct kvm_vcpu_arch *vcpu_arch)
{
- return (vcpu_arch->steal.base != GPA_INVALID);
+ return (vcpu_arch->steal.base != INVALID_GPA);
}
-void kvm_set_sei_esr(struct kvm_vcpu *vcpu, u64 syndrome);
-
struct kvm_vcpu *kvm_mpidr_to_vcpu(struct kvm *kvm, unsigned long mpidr);
DECLARE_KVM_HYP_PER_CPU(struct kvm_host_data, kvm_host_data);
+/*
+ * How we access per-CPU host data depends on the where we access it from,
+ * and the mode we're in:
+ *
+ * - VHE and nVHE hypervisor bits use their locally defined instance
+ *
+ * - the rest of the kernel use either the VHE or nVHE one, depending on
+ * the mode we're running in.
+ *
+ * Unless we're in protected mode, fully deprivileged, and the nVHE
+ * per-CPU stuff is exclusively accessible to the protected EL2 code.
+ * In this case, the EL1 code uses the *VHE* data as its private state
+ * (which makes sense in a way as there shouldn't be any shared state
+ * between the host and the hypervisor).
+ *
+ * Yes, this is all totally trivial. Shoot me now.
+ */
+#if defined(__KVM_NVHE_HYPERVISOR__) || defined(__KVM_VHE_HYPERVISOR__)
+#define host_data_ptr(f) (&this_cpu_ptr(&kvm_host_data)->f)
+#else
+#define host_data_ptr(f) \
+ (static_branch_unlikely(&kvm_protected_mode_initialized) ? \
+ &this_cpu_ptr(&kvm_host_data)->f : \
+ &this_cpu_ptr_hyp_sym(kvm_host_data)->f)
+#endif
+
+#define host_data_test_flag(flag) \
+ (test_bit(KVM_HOST_DATA_FLAG_##flag, host_data_ptr(flags)))
+#define host_data_set_flag(flag) \
+ set_bit(KVM_HOST_DATA_FLAG_##flag, host_data_ptr(flags))
+#define host_data_clear_flag(flag) \
+ clear_bit(KVM_HOST_DATA_FLAG_##flag, host_data_ptr(flags))
+
+/* Check whether the FP regs are owned by the guest */
+static inline bool guest_owns_fp_regs(void)
+{
+ return *host_data_ptr(fp_owner) == FP_STATE_GUEST_OWNED;
+}
+
+/* Check whether the FP regs are owned by the host */
+static inline bool host_owns_fp_regs(void)
+{
+ return *host_data_ptr(fp_owner) == FP_STATE_HOST_OWNED;
+}
+
static inline void kvm_init_host_cpu_context(struct kvm_cpu_context *cpu_ctxt)
{
/* The host's MPIDR is immutable, so let's set it up at boot time */
@@ -758,23 +1365,24 @@ static inline void kvm_init_host_cpu_context(struct kvm_cpu_context *cpu_ctxt)
static inline bool kvm_system_needs_idmapped_vectors(void)
{
- return cpus_have_const_cap(ARM64_SPECTRE_V3A);
+ return cpus_have_final_cap(ARM64_SPECTRE_V3A);
}
-void kvm_arm_vcpu_ptrauth_trap(struct kvm_vcpu *vcpu);
-
-static inline void kvm_arch_hardware_unsetup(void) {}
-static inline void kvm_arch_sync_events(struct kvm *kvm) {}
-static inline void kvm_arch_sched_in(struct kvm_vcpu *vcpu, int cpu) {}
-
-void kvm_arm_init_debug(void);
-void kvm_arm_vcpu_init_debug(struct kvm_vcpu *vcpu);
-void kvm_arm_setup_debug(struct kvm_vcpu *vcpu);
-void kvm_arm_clear_debug(struct kvm_vcpu *vcpu);
-void kvm_arm_reset_debug_ptr(struct kvm_vcpu *vcpu);
+void kvm_init_host_debug_data(void);
+void kvm_vcpu_load_debug(struct kvm_vcpu *vcpu);
+void kvm_vcpu_put_debug(struct kvm_vcpu *vcpu);
+void kvm_debug_set_guest_ownership(struct kvm_vcpu *vcpu);
+void kvm_debug_handle_oslar(struct kvm_vcpu *vcpu, u64 val);
#define kvm_vcpu_os_lock_enabled(vcpu) \
- (!!(__vcpu_sys_reg(vcpu, OSLSR_EL1) & SYS_OSLSR_OSLK))
+ (!!(__vcpu_sys_reg(vcpu, OSLSR_EL1) & OSLSR_EL1_OSLK))
+
+#define kvm_debug_regs_in_use(vcpu) \
+ ((vcpu)->arch.debug_owner != VCPU_DEBUG_FREE)
+#define kvm_host_owns_debug_regs(vcpu) \
+ ((vcpu)->arch.debug_owner == VCPU_DEBUG_HOST_OWNED)
+#define kvm_guest_owns_debug_regs(vcpu) \
+ ((vcpu)->arch.debug_owner == VCPU_DEBUG_GUEST_OWNED)
int kvm_arm_vcpu_arch_set_attr(struct kvm_vcpu *vcpu,
struct kvm_device_attr *attr);
@@ -783,61 +1391,86 @@ int kvm_arm_vcpu_arch_get_attr(struct kvm_vcpu *vcpu,
int kvm_arm_vcpu_arch_has_attr(struct kvm_vcpu *vcpu,
struct kvm_device_attr *attr);
-long kvm_vm_ioctl_mte_copy_tags(struct kvm *kvm,
- struct kvm_arm_copy_mte_tags *copy_tags);
+int kvm_vm_ioctl_mte_copy_tags(struct kvm *kvm,
+ struct kvm_arm_copy_mte_tags *copy_tags);
+int kvm_vm_ioctl_set_counter_offset(struct kvm *kvm,
+ struct kvm_arm_counter_offset *offset);
+int kvm_vm_ioctl_get_reg_writable_masks(struct kvm *kvm,
+ struct reg_mask_range *range);
/* Guest/host FPSIMD coordination helpers */
-int kvm_arch_vcpu_run_map_fp(struct kvm_vcpu *vcpu);
void kvm_arch_vcpu_load_fp(struct kvm_vcpu *vcpu);
void kvm_arch_vcpu_ctxflush_fp(struct kvm_vcpu *vcpu);
void kvm_arch_vcpu_ctxsync_fp(struct kvm_vcpu *vcpu);
void kvm_arch_vcpu_put_fp(struct kvm_vcpu *vcpu);
-void kvm_vcpu_unshare_task_fp(struct kvm_vcpu *vcpu);
static inline bool kvm_pmu_counter_deferred(struct perf_event_attr *attr)
{
return (!has_vhe() && attr->exclude_host);
}
-/* Flags for host debug state */
-void kvm_arch_vcpu_load_debug_state_flags(struct kvm_vcpu *vcpu);
-void kvm_arch_vcpu_put_debug_state_flags(struct kvm_vcpu *vcpu);
-
#ifdef CONFIG_KVM
-void kvm_set_pmu_events(u32 set, struct perf_event_attr *attr);
-void kvm_clr_pmu_events(u32 clr);
+void kvm_set_pmu_events(u64 set, struct perf_event_attr *attr);
+void kvm_clr_pmu_events(u64 clr);
+bool kvm_set_pmuserenr(u64 val);
+void kvm_enable_trbe(void);
+void kvm_disable_trbe(void);
+void kvm_tracing_set_el1_configuration(u64 trfcr_while_in_guest);
#else
-static inline void kvm_set_pmu_events(u32 set, struct perf_event_attr *attr) {}
-static inline void kvm_clr_pmu_events(u32 clr) {}
+static inline void kvm_set_pmu_events(u64 set, struct perf_event_attr *attr) {}
+static inline void kvm_clr_pmu_events(u64 clr) {}
+static inline bool kvm_set_pmuserenr(u64 val)
+{
+ return false;
+}
+static inline void kvm_enable_trbe(void) {}
+static inline void kvm_disable_trbe(void) {}
+static inline void kvm_tracing_set_el1_configuration(u64 trfcr_while_in_guest) {}
#endif
-void kvm_vcpu_load_sysregs_vhe(struct kvm_vcpu *vcpu);
-void kvm_vcpu_put_sysregs_vhe(struct kvm_vcpu *vcpu);
+void kvm_vcpu_load_vhe(struct kvm_vcpu *vcpu);
+void kvm_vcpu_put_vhe(struct kvm_vcpu *vcpu);
-int kvm_set_ipa_limit(void);
+int __init kvm_set_ipa_limit(void);
+u32 kvm_get_pa_bits(struct kvm *kvm);
#define __KVM_HAVE_ARCH_VM_ALLOC
struct kvm *kvm_arch_alloc_vm(void);
-int kvm_arm_setup_stage2(struct kvm *kvm, unsigned long type);
+#define __KVM_HAVE_ARCH_FLUSH_REMOTE_TLBS
-static inline bool kvm_vm_is_protected(struct kvm *kvm)
-{
- return false;
-}
+#define __KVM_HAVE_ARCH_FLUSH_REMOTE_TLBS_RANGE
+
+#define kvm_vm_is_protected(kvm) (is_protected_kvm_enabled() && (kvm)->arch.pkvm.enabled)
-void kvm_init_protected_traps(struct kvm_vcpu *vcpu);
+#define vcpu_is_protected(vcpu) kvm_vm_is_protected((vcpu)->kvm)
int kvm_arm_vcpu_finalize(struct kvm_vcpu *vcpu, int feature);
bool kvm_arm_vcpu_is_finalized(struct kvm_vcpu *vcpu);
-#define kvm_arm_vcpu_sve_finalized(vcpu) \
- ((vcpu)->arch.flags & KVM_ARM64_VCPU_SVE_FINALIZED)
+#define kvm_arm_vcpu_sve_finalized(vcpu) vcpu_get_flag(vcpu, VCPU_SVE_FINALIZED)
#define kvm_has_mte(kvm) \
(system_supports_mte() && \
test_bit(KVM_ARCH_FLAG_MTE_ENABLED, &(kvm)->arch.flags))
+#define kvm_supports_32bit_el0() \
+ (system_supports_32bit_el0() && \
+ !static_branch_unlikely(&arm64_mismatched_32bit_el0))
+
+#define kvm_vm_has_ran_once(kvm) \
+ (test_bit(KVM_ARCH_FLAG_HAS_RAN_ONCE, &(kvm)->arch.flags))
+
+static inline bool __vcpu_has_feature(const struct kvm_arch *ka, int feature)
+{
+ return test_bit(feature, ka->vcpu_features);
+}
+
+#define kvm_vcpu_has_feature(k, f) __vcpu_has_feature(&(k)->arch, (f))
+#define vcpu_has_feature(v, f) __vcpu_has_feature(&(v)->kvm->arch, (f))
+
+#define kvm_vcpu_initialized(v) vcpu_get_flag(vcpu, VCPU_INITIALIZED)
+
int kvm_trng_call(struct kvm_vcpu *vcpu);
#ifdef CONFIG_KVM
extern phys_addr_t hyp_mem_base;
@@ -850,4 +1483,121 @@ static inline void kvm_hyp_reserve(void) { }
void kvm_arm_vcpu_power_off(struct kvm_vcpu *vcpu);
bool kvm_arm_vcpu_stopped(struct kvm_vcpu *vcpu);
+static inline u64 *__vm_id_reg(struct kvm_arch *ka, u32 reg)
+{
+ switch (reg) {
+ case sys_reg(3, 0, 0, 1, 0) ... sys_reg(3, 0, 0, 7, 7):
+ return &ka->id_regs[IDREG_IDX(reg)];
+ case SYS_CTR_EL0:
+ return &ka->ctr_el0;
+ case SYS_MIDR_EL1:
+ return &ka->midr_el1;
+ case SYS_REVIDR_EL1:
+ return &ka->revidr_el1;
+ case SYS_AIDR_EL1:
+ return &ka->aidr_el1;
+ default:
+ WARN_ON_ONCE(1);
+ return NULL;
+ }
+}
+
+#define kvm_read_vm_id_reg(kvm, reg) \
+ ({ u64 __val = *__vm_id_reg(&(kvm)->arch, reg); __val; })
+
+void kvm_set_vm_id_reg(struct kvm *kvm, u32 reg, u64 val);
+
+#define __expand_field_sign_unsigned(id, fld, val) \
+ ((u64)SYS_FIELD_VALUE(id, fld, val))
+
+#define __expand_field_sign_signed(id, fld, val) \
+ ({ \
+ u64 __val = SYS_FIELD_VALUE(id, fld, val); \
+ sign_extend64(__val, id##_##fld##_WIDTH - 1); \
+ })
+
+#define get_idreg_field_unsigned(kvm, id, fld) \
+ ({ \
+ u64 __val = kvm_read_vm_id_reg((kvm), SYS_##id); \
+ FIELD_GET(id##_##fld##_MASK, __val); \
+ })
+
+#define get_idreg_field_signed(kvm, id, fld) \
+ ({ \
+ u64 __val = get_idreg_field_unsigned(kvm, id, fld); \
+ sign_extend64(__val, id##_##fld##_WIDTH - 1); \
+ })
+
+#define get_idreg_field_enum(kvm, id, fld) \
+ get_idreg_field_unsigned(kvm, id, fld)
+
+#define kvm_cmp_feat_signed(kvm, id, fld, op, limit) \
+ (get_idreg_field_signed((kvm), id, fld) op __expand_field_sign_signed(id, fld, limit))
+
+#define kvm_cmp_feat_unsigned(kvm, id, fld, op, limit) \
+ (get_idreg_field_unsigned((kvm), id, fld) op __expand_field_sign_unsigned(id, fld, limit))
+
+#define kvm_cmp_feat(kvm, id, fld, op, limit) \
+ (id##_##fld##_SIGNED ? \
+ kvm_cmp_feat_signed(kvm, id, fld, op, limit) : \
+ kvm_cmp_feat_unsigned(kvm, id, fld, op, limit))
+
+#define __kvm_has_feat(kvm, id, fld, limit) \
+ kvm_cmp_feat(kvm, id, fld, >=, limit)
+
+#define kvm_has_feat(kvm, ...) __kvm_has_feat(kvm, __VA_ARGS__)
+
+#define __kvm_has_feat_enum(kvm, id, fld, val) \
+ kvm_cmp_feat_unsigned(kvm, id, fld, ==, val)
+
+#define kvm_has_feat_enum(kvm, ...) __kvm_has_feat_enum(kvm, __VA_ARGS__)
+
+#define kvm_has_feat_range(kvm, id, fld, min, max) \
+ (kvm_cmp_feat(kvm, id, fld, >=, min) && \
+ kvm_cmp_feat(kvm, id, fld, <=, max))
+
+/* Check for a given level of PAuth support */
+#define kvm_has_pauth(k, l) \
+ ({ \
+ bool pa, pi, pa3; \
+ \
+ pa = kvm_has_feat((k), ID_AA64ISAR1_EL1, APA, l); \
+ pa &= kvm_has_feat((k), ID_AA64ISAR1_EL1, GPA, IMP); \
+ pi = kvm_has_feat((k), ID_AA64ISAR1_EL1, API, l); \
+ pi &= kvm_has_feat((k), ID_AA64ISAR1_EL1, GPI, IMP); \
+ pa3 = kvm_has_feat((k), ID_AA64ISAR2_EL1, APA3, l); \
+ pa3 &= kvm_has_feat((k), ID_AA64ISAR2_EL1, GPA3, IMP); \
+ \
+ (pa + pi + pa3) == 1; \
+ })
+
+#define kvm_has_fpmr(k) \
+ (system_supports_fpmr() && \
+ kvm_has_feat((k), ID_AA64PFR2_EL1, FPMR, IMP))
+
+#define kvm_has_tcr2(k) \
+ (kvm_has_feat((k), ID_AA64MMFR3_EL1, TCRX, IMP))
+
+#define kvm_has_s1pie(k) \
+ (kvm_has_feat((k), ID_AA64MMFR3_EL1, S1PIE, IMP))
+
+#define kvm_has_s1poe(k) \
+ (kvm_has_feat((k), ID_AA64MMFR3_EL1, S1POE, IMP))
+
+#define kvm_has_ras(k) \
+ (kvm_has_feat((k), ID_AA64PFR0_EL1, RAS, IMP))
+
+#define kvm_has_sctlr2(k) \
+ (kvm_has_feat((k), ID_AA64MMFR3_EL1, SCTLRX, IMP))
+
+static inline bool kvm_arch_has_irq_bypass(void)
+{
+ return true;
+}
+
+void compute_fgu(struct kvm *kvm, enum fgt_group_id fgt);
+void get_reg_fixed_bits(struct kvm *kvm, enum vcpu_sysreg reg, u64 *res0, u64 *res1);
+void check_feature_map(void);
+
+
#endif /* __ARM64_KVM_HOST_H__ */
diff --git a/arch/arm64/include/asm/kvm_hyp.h b/arch/arm64/include/asm/kvm_hyp.h
index aa7fa2a08f06..e6be1f5d0967 100644
--- a/arch/arm64/include/asm/kvm_hyp.h
+++ b/arch/arm64/include/asm/kvm_hyp.h
@@ -16,12 +16,35 @@ DECLARE_PER_CPU(struct kvm_cpu_context, kvm_hyp_ctxt);
DECLARE_PER_CPU(unsigned long, kvm_hyp_vector);
DECLARE_PER_CPU(struct kvm_nvhe_init_params, kvm_init_params);
+/*
+ * Unified accessors for registers that have a different encoding
+ * between VHE and non-VHE. They must be specified without their "ELx"
+ * encoding, but with the SYS_ prefix, as defined in asm/sysreg.h.
+ */
+
+#if defined(__KVM_VHE_HYPERVISOR__)
+
+#define read_sysreg_el0(r) read_sysreg_s(r##_EL02)
+#define write_sysreg_el0(v,r) write_sysreg_s(v, r##_EL02)
+#define read_sysreg_el1(r) read_sysreg_s(r##_EL12)
+#define write_sysreg_el1(v,r) write_sysreg_s(v, r##_EL12)
+#define read_sysreg_el2(r) read_sysreg_s(r##_EL1)
+#define write_sysreg_el2(v,r) write_sysreg_s(v, r##_EL1)
+
+#else // !__KVM_VHE_HYPERVISOR__
+
+#if defined(__KVM_NVHE_HYPERVISOR__)
+#define VHE_ALT_KEY ARM64_KVM_HVHE
+#else
+#define VHE_ALT_KEY ARM64_HAS_VIRT_HOST_EXTN
+#endif
+
#define read_sysreg_elx(r,nvh,vh) \
({ \
u64 reg; \
- asm volatile(ALTERNATIVE(__mrs_s("%0", r##nvh), \
+ asm volatile(ALTERNATIVE(__mrs_s("%0", r##nvh), \
__mrs_s("%0", r##vh), \
- ARM64_HAS_VIRT_HOST_EXTN) \
+ VHE_ALT_KEY) \
: "=r" (reg)); \
reg; \
})
@@ -31,16 +54,10 @@ DECLARE_PER_CPU(struct kvm_nvhe_init_params, kvm_init_params);
u64 __val = (u64)(v); \
asm volatile(ALTERNATIVE(__msr_s(r##nvh, "%x0"), \
__msr_s(r##vh, "%x0"), \
- ARM64_HAS_VIRT_HOST_EXTN) \
+ VHE_ALT_KEY) \
: : "rZ" (__val)); \
} while (0)
-/*
- * Unified accessors for registers that have a different encoding
- * between VHE and non-VHE. They must be specified without their "ELx"
- * encoding, but with the SYS_ prefix, as defined in asm/sysreg.h.
- */
-
#define read_sysreg_el0(r) read_sysreg_elx(r, _EL0, _EL02)
#define write_sysreg_el0(v,r) write_sysreg_elx(v, r, _EL0, _EL02)
#define read_sysreg_el1(r) read_sysreg_elx(r, _EL1, _EL12)
@@ -48,21 +65,25 @@ DECLARE_PER_CPU(struct kvm_nvhe_init_params, kvm_init_params);
#define read_sysreg_el2(r) read_sysreg_elx(r, _EL2, _EL1)
#define write_sysreg_el2(v,r) write_sysreg_elx(v, r, _EL2, _EL1)
+#endif // __KVM_VHE_HYPERVISOR__
+
/*
* Without an __arch_swab32(), we fall back to ___constant_swab32(), but the
* static inline can allow the compiler to out-of-line this. KVM always wants
- * the macro version as its always inlined.
+ * the macro version as it's always inlined.
*/
#define __kvm_swab32(x) ___constant_swab32(x)
int __vgic_v2_perform_cpuif_access(struct kvm_vcpu *vcpu);
+u64 __gic_v3_get_lr(unsigned int lr);
+
void __vgic_v3_save_state(struct vgic_v3_cpu_if *cpu_if);
void __vgic_v3_restore_state(struct vgic_v3_cpu_if *cpu_if);
void __vgic_v3_activate_traps(struct vgic_v3_cpu_if *cpu_if);
void __vgic_v3_deactivate_traps(struct vgic_v3_cpu_if *cpu_if);
-void __vgic_v3_save_aprs(struct vgic_v3_cpu_if *cpu_if);
-void __vgic_v3_restore_aprs(struct vgic_v3_cpu_if *cpu_if);
+void __vgic_v3_save_vmcr_aprs(struct vgic_v3_cpu_if *cpu_if);
+void __vgic_v3_restore_vmcr_aprs(struct vgic_v3_cpu_if *cpu_if);
int __vgic_v3_perform_cpuif_access(struct kvm_vcpu *vcpu);
#ifdef __KVM_NVHE_HYPERVISOR__
@@ -74,6 +95,8 @@ void __timer_disable_traps(struct kvm_vcpu *vcpu);
void __sysreg_save_state_nvhe(struct kvm_cpu_context *ctxt);
void __sysreg_restore_state_nvhe(struct kvm_cpu_context *ctxt);
#else
+void __vcpu_load_switch_sysregs(struct kvm_vcpu *vcpu);
+void __vcpu_put_switch_sysregs(struct kvm_vcpu *vcpu);
void sysreg_save_host_state_vhe(struct kvm_cpu_context *ctxt);
void sysreg_restore_host_state_vhe(struct kvm_cpu_context *ctxt);
void sysreg_save_guest_state_vhe(struct kvm_cpu_context *ctxt);
@@ -90,16 +113,12 @@ void __debug_restore_host_buffers_nvhe(struct kvm_vcpu *vcpu);
void __fpsimd_save_state(struct user_fpsimd_state *fp_regs);
void __fpsimd_restore_state(struct user_fpsimd_state *fp_regs);
-void __sve_restore_state(void *sve_pffr, u32 *fpsr);
-
-#ifndef __KVM_NVHE_HYPERVISOR__
-void activate_traps_vhe_load(struct kvm_vcpu *vcpu);
-void deactivate_traps_vhe_put(struct kvm_vcpu *vcpu);
-#endif
+void __sve_save_state(void *sve_pffr, u32 *fpsr, int save_ffr);
+void __sve_restore_state(void *sve_pffr, u32 *fpsr, int restore_ffr);
u64 __guest_enter(struct kvm_vcpu *vcpu);
-bool kvm_host_psci_handler(struct kvm_cpu_context *host_ctxt);
+bool kvm_host_psci_handler(struct kvm_cpu_context *host_ctxt, u32 func_id);
#ifdef __KVM_NVHE_HYPERVISOR__
void __noreturn __hyp_do_panic(struct kvm_cpu_context *host_ctxt, u64 spsr,
@@ -107,8 +126,8 @@ void __noreturn __hyp_do_panic(struct kvm_cpu_context *host_ctxt, u64 spsr,
#endif
#ifdef __KVM_NVHE_HYPERVISOR__
-void __pkvm_init_switch_pgd(phys_addr_t phys, unsigned long size,
- phys_addr_t pgd, void *sp, void *cont_fn);
+void __pkvm_init_switch_pgd(phys_addr_t pgd, unsigned long sp,
+ void (*fn)(void));
int __pkvm_init(phys_addr_t phys, unsigned long size, unsigned long nr_cpus,
unsigned long *per_cpu_base, u32 hyp_va_bits);
void __noreturn __host_enter(struct kvm_cpu_context *host_ctxt);
@@ -122,5 +141,10 @@ extern u64 kvm_nvhe_sym(id_aa64isar2_el1_sys_val);
extern u64 kvm_nvhe_sym(id_aa64mmfr0_el1_sys_val);
extern u64 kvm_nvhe_sym(id_aa64mmfr1_el1_sys_val);
extern u64 kvm_nvhe_sym(id_aa64mmfr2_el1_sys_val);
+extern u64 kvm_nvhe_sym(id_aa64smfr0_el1_sys_val);
+
+extern unsigned long kvm_nvhe_sym(__icache_flags);
+extern unsigned int kvm_nvhe_sym(kvm_arm_vmid_bits);
+extern unsigned int kvm_nvhe_sym(kvm_host_sve_max_vl);
#endif /* __ARM64_KVM_HYP_H__ */
diff --git a/arch/arm64/include/asm/kvm_mmu.h b/arch/arm64/include/asm/kvm_mmu.h
index b208da3bebec..e4069f2ce642 100644
--- a/arch/arm64/include/asm/kvm_mmu.h
+++ b/arch/arm64/include/asm/kvm_mmu.h
@@ -54,25 +54,6 @@
#include <asm/alternative.h>
/*
- * Convert a kernel VA into a HYP VA.
- * reg: VA to be converted.
- *
- * The actual code generation takes place in kvm_update_va_mask, and
- * the instructions below are only there to reserve the space and
- * perform the register allocation (kvm_update_va_mask uses the
- * specific registers encoded in the instructions).
- */
-.macro kern_hyp_va reg
-alternative_cb kvm_update_va_mask
- and \reg, \reg, #1 /* mask with va_mask */
- ror \reg, \reg, #1 /* rotate to the first tag bit */
- add \reg, \reg, #0 /* insert the low 12 bits of the tag */
- add \reg, \reg, #0, lsl 12 /* insert the top 12 bits of the tag */
- ror \reg, \reg, #63 /* rotate back */
-alternative_cb_end
-.endm
-
-/*
* Convert a hypervisor VA to a PA
* reg: hypervisor address to be converted in place
* tmp: temporary register
@@ -97,7 +78,7 @@ alternative_cb_end
hyp_pa \reg, \tmp
/* Load kimage_voffset. */
-alternative_cb kvm_get_kimage_voffset
+alternative_cb ARM64_ALWAYS_SYSTEM, kvm_get_kimage_voffset
movz \tmp, #0
movk \tmp, #0, lsl #16
movk \tmp, #0, lsl #32
@@ -115,7 +96,9 @@ alternative_cb_end
#include <asm/cache.h>
#include <asm/cacheflush.h>
#include <asm/mmu_context.h>
+#include <asm/kvm_emulate.h>
#include <asm/kvm_host.h>
+#include <asm/kvm_nested.h>
void kvm_update_va_mask(struct alt_instr *alt,
__le32 *origptr, __le32 *updptr, int nr_inst);
@@ -124,29 +107,49 @@ void kvm_apply_hyp_relocations(void);
#define __hyp_pa(x) (((phys_addr_t)(x)) + hyp_physvirt_offset)
+/*
+ * Convert a kernel VA into a HYP VA.
+ *
+ * Can be called from hyp or non-hyp context.
+ *
+ * The actual code generation takes place in kvm_update_va_mask(), and
+ * the instructions below are only there to reserve the space and
+ * perform the register allocation (kvm_update_va_mask() uses the
+ * specific registers encoded in the instructions).
+ */
static __always_inline unsigned long __kern_hyp_va(unsigned long v)
{
- asm volatile(ALTERNATIVE_CB("and %0, %0, #1\n"
- "ror %0, %0, #1\n"
- "add %0, %0, #0\n"
- "add %0, %0, #0, lsl 12\n"
- "ror %0, %0, #63\n",
+/*
+ * This #ifndef is an optimisation for when this is called from VHE hyp
+ * context. When called from a VHE non-hyp context, kvm_update_va_mask() will
+ * replace the instructions with `nop`s.
+ */
+#ifndef __KVM_VHE_HYPERVISOR__
+ asm volatile(ALTERNATIVE_CB("and %0, %0, #1\n" /* mask with va_mask */
+ "ror %0, %0, #1\n" /* rotate to the first tag bit */
+ "add %0, %0, #0\n" /* insert the low 12 bits of the tag */
+ "add %0, %0, #0, lsl 12\n" /* insert the top 12 bits of the tag */
+ "ror %0, %0, #63\n", /* rotate back */
+ ARM64_ALWAYS_SYSTEM,
kvm_update_va_mask)
: "+r" (v));
+#endif
return v;
}
#define kern_hyp_va(v) ((typeof(v))(__kern_hyp_va((unsigned long)(v))))
+extern u32 __hyp_va_bits;
+
/*
* We currently support using a VM-specified IPA size. For backward
* compatibility, the default IPA size is fixed to 40bits.
*/
#define KVM_PHYS_SHIFT (40)
-#define kvm_phys_shift(kvm) VTCR_EL2_IPA(kvm->arch.vtcr)
-#define kvm_phys_size(kvm) (_AC(1, ULL) << kvm_phys_shift(kvm))
-#define kvm_phys_mask(kvm) (kvm_phys_size(kvm) - _AC(1, ULL))
+#define kvm_phys_shift(mmu) VTCR_EL2_IPA((mmu)->vtcr)
+#define kvm_phys_size(mmu) (_AC(1, ULL) << kvm_phys_shift(mmu))
+#define kvm_phys_mask(mmu) (kvm_phys_size(mmu) - _AC(1, ULL))
#include <asm/kvm_pgtable.h>
#include <asm/stage2_pgtable.h>
@@ -162,19 +165,27 @@ int create_hyp_io_mappings(phys_addr_t phys_addr, size_t size,
void __iomem **haddr);
int create_hyp_exec_mappings(phys_addr_t phys_addr, size_t size,
void **haddr);
-void free_hyp_pgds(void);
+int create_hyp_stack(phys_addr_t phys_addr, unsigned long *haddr);
+void __init free_hyp_pgds(void);
+
+void kvm_stage2_unmap_range(struct kvm_s2_mmu *mmu, phys_addr_t start,
+ u64 size, bool may_block);
+void kvm_stage2_flush_range(struct kvm_s2_mmu *mmu, phys_addr_t addr, phys_addr_t end);
+void kvm_stage2_wp_range(struct kvm_s2_mmu *mmu, phys_addr_t addr, phys_addr_t end);
void stage2_unmap_vm(struct kvm *kvm);
-int kvm_init_stage2_mmu(struct kvm *kvm, struct kvm_s2_mmu *mmu);
+int kvm_init_stage2_mmu(struct kvm *kvm, struct kvm_s2_mmu *mmu, unsigned long type);
+void kvm_uninit_stage2_mmu(struct kvm *kvm);
void kvm_free_stage2_pgd(struct kvm_s2_mmu *mmu);
int kvm_phys_addr_ioremap(struct kvm *kvm, phys_addr_t guest_ipa,
phys_addr_t pa, unsigned long size, bool writable);
+int kvm_handle_guest_sea(struct kvm_vcpu *vcpu);
int kvm_handle_guest_abort(struct kvm_vcpu *vcpu);
phys_addr_t kvm_mmu_get_httbr(void);
phys_addr_t kvm_get_idmap_vector(void);
-int kvm_mmu_init(u32 *hyp_va_bits);
+int __init kvm_mmu_init(u32 *hyp_va_bits);
static inline void *__kvm_vector_slot2addr(void *base,
enum arm64_hyp_spectre_vector slot)
@@ -191,7 +202,15 @@ struct kvm;
static inline bool vcpu_has_cache_enabled(struct kvm_vcpu *vcpu)
{
- return (vcpu_read_sys_reg(vcpu, SCTLR_EL1) & 0b101) == 0b101;
+ u64 cache_bits = SCTLR_ELx_M | SCTLR_ELx_C;
+ int reg;
+
+ if (vcpu_is_el2(vcpu))
+ reg = SCTLR_EL2;
+ else
+ reg = SCTLR_EL1;
+
+ return (vcpu_read_sys_reg(vcpu, reg) & cache_bits) == cache_bits;
}
static inline void __clean_dcache_guest_page(void *va, size_t size)
@@ -202,21 +221,40 @@ static inline void __clean_dcache_guest_page(void *va, size_t size)
* faulting in pages. Furthermore, FWB implies IDC, so cleaning to
* PoU is not required either in this case.
*/
- if (cpus_have_const_cap(ARM64_HAS_STAGE2_FWB))
+ if (cpus_have_final_cap(ARM64_HAS_STAGE2_FWB))
return;
kvm_flush_dcache_to_poc(va, size);
}
+static inline size_t __invalidate_icache_max_range(void)
+{
+ u8 iminline;
+ u64 ctr;
+
+ asm volatile(ALTERNATIVE_CB("movz %0, #0\n"
+ "movk %0, #0, lsl #16\n"
+ "movk %0, #0, lsl #32\n"
+ "movk %0, #0, lsl #48\n",
+ ARM64_ALWAYS_SYSTEM,
+ kvm_compute_final_ctr_el0)
+ : "=r" (ctr));
+
+ iminline = SYS_FIELD_GET(CTR_EL0, IminLine, ctr) + 2;
+ return MAX_DVM_OPS << iminline;
+}
+
static inline void __invalidate_icache_guest_page(void *va, size_t size)
{
- if (icache_is_aliasing()) {
- /* any kind of VIPT cache */
+ /*
+ * Blow the whole I-cache if it is aliasing (i.e. VIPT) or the
+ * invalidation range exceeds our arbitrary limit on invadations by
+ * cache line.
+ */
+ if (icache_is_aliasing() || size > __invalidate_icache_max_range())
icache_inval_all_pou();
- } else if (is_kernel_in_hyp_mode() || !icache_is_vpipt()) {
- /* PIPT or VPIPT at EL2 (see comment in __kvm_tlb_flush_vmid_ipa) */
+ else
icache_inval_pou((unsigned long)va, (unsigned long)va + size);
- }
}
void kvm_set_way_flush(struct kvm_vcpu *vcpu);
@@ -282,7 +320,7 @@ static __always_inline u64 kvm_get_vttbr(struct kvm_s2_mmu *mmu)
static __always_inline void __load_stage2(struct kvm_s2_mmu *mmu,
struct kvm_arch *arch)
{
- write_sysreg(arch->vtcr, vtcr_el2);
+ write_sysreg(mmu->vtcr, vtcr_el2);
write_sysreg(kvm_get_vttbr(mmu), vttbr_el2);
/*
@@ -297,5 +335,66 @@ static inline struct kvm *kvm_s2_mmu_to_kvm(struct kvm_s2_mmu *mmu)
{
return container_of(mmu->arch, struct kvm, arch);
}
+
+static inline u64 get_vmid(u64 vttbr)
+{
+ return (vttbr & VTTBR_VMID_MASK(kvm_get_vmid_bits())) >>
+ VTTBR_VMID_SHIFT;
+}
+
+static inline bool kvm_s2_mmu_valid(struct kvm_s2_mmu *mmu)
+{
+ return !(mmu->tlb_vttbr & VTTBR_CNP_BIT);
+}
+
+static inline bool kvm_is_nested_s2_mmu(struct kvm *kvm, struct kvm_s2_mmu *mmu)
+{
+ /*
+ * Be careful, mmu may not be fully initialised so do look at
+ * *any* of its fields.
+ */
+ return &kvm->arch.mmu != mmu;
+}
+
+static inline void kvm_fault_lock(struct kvm *kvm)
+{
+ if (is_protected_kvm_enabled())
+ write_lock(&kvm->mmu_lock);
+ else
+ read_lock(&kvm->mmu_lock);
+}
+
+static inline void kvm_fault_unlock(struct kvm *kvm)
+{
+ if (is_protected_kvm_enabled())
+ write_unlock(&kvm->mmu_lock);
+ else
+ read_unlock(&kvm->mmu_lock);
+}
+
+/*
+ * ARM64 KVM relies on a simple conversion from physaddr to a kernel
+ * virtual address (KVA) when it does cache maintenance as the CMO
+ * instructions work on virtual addresses. This is incompatible with
+ * VM_PFNMAP VMAs which may not have a kernel direct mapping to a
+ * virtual address.
+ *
+ * With S2FWB and CACHE DIC features, KVM need not do cache flushing
+ * and CMOs are NOP'd. This has the effect of no longer requiring a
+ * KVA for addresses mapped into the S2. The presence of these features
+ * are thus necessary to support cacheable S2 mapping of VM_PFNMAP.
+ */
+static inline bool kvm_supports_cacheable_pfnmap(void)
+{
+ return cpus_have_final_cap(ARM64_HAS_STAGE2_FWB) &&
+ cpus_have_final_cap(ARM64_HAS_CACHE_DIC);
+}
+
+#ifdef CONFIG_PTDUMP_STAGE2_DEBUGFS
+void kvm_s2_ptdump_create_debugfs(struct kvm *kvm);
+#else
+static inline void kvm_s2_ptdump_create_debugfs(struct kvm *kvm) {}
+#endif /* CONFIG_PTDUMP_STAGE2_DEBUGFS */
+
#endif /* __ASSEMBLY__ */
#endif /* __ARM64_KVM_MMU_H__ */
diff --git a/arch/arm64/include/asm/kvm_nested.h b/arch/arm64/include/asm/kvm_nested.h
new file mode 100644
index 000000000000..7fd76f41c296
--- /dev/null
+++ b/arch/arm64/include/asm/kvm_nested.h
@@ -0,0 +1,350 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef __ARM64_KVM_NESTED_H
+#define __ARM64_KVM_NESTED_H
+
+#include <linux/bitfield.h>
+#include <linux/kvm_host.h>
+#include <asm/kvm_emulate.h>
+#include <asm/kvm_pgtable.h>
+
+static inline bool vcpu_has_nv(const struct kvm_vcpu *vcpu)
+{
+ return (!__is_defined(__KVM_NVHE_HYPERVISOR__) &&
+ cpus_have_final_cap(ARM64_HAS_NESTED_VIRT) &&
+ vcpu_has_feature(vcpu, KVM_ARM_VCPU_HAS_EL2));
+}
+
+/* Translation helpers from non-VHE EL2 to EL1 */
+static inline u64 tcr_el2_ps_to_tcr_el1_ips(u64 tcr_el2)
+{
+ return (u64)FIELD_GET(TCR_EL2_PS_MASK, tcr_el2) << TCR_IPS_SHIFT;
+}
+
+static inline u64 translate_tcr_el2_to_tcr_el1(u64 tcr)
+{
+ return TCR_EPD1_MASK | /* disable TTBR1_EL1 */
+ ((tcr & TCR_EL2_TBI) ? TCR_TBI0 : 0) |
+ tcr_el2_ps_to_tcr_el1_ips(tcr) |
+ (tcr & TCR_EL2_TG0_MASK) |
+ (tcr & TCR_EL2_ORGN0_MASK) |
+ (tcr & TCR_EL2_IRGN0_MASK) |
+ (tcr & TCR_EL2_T0SZ_MASK);
+}
+
+static inline u64 translate_cptr_el2_to_cpacr_el1(u64 cptr_el2)
+{
+ u64 cpacr_el1 = CPACR_EL1_RES1;
+
+ if (cptr_el2 & CPTR_EL2_TTA)
+ cpacr_el1 |= CPACR_EL1_TTA;
+ if (!(cptr_el2 & CPTR_EL2_TFP))
+ cpacr_el1 |= CPACR_EL1_FPEN;
+ if (!(cptr_el2 & CPTR_EL2_TZ))
+ cpacr_el1 |= CPACR_EL1_ZEN;
+
+ cpacr_el1 |= cptr_el2 & (CPTR_EL2_TCPAC | CPTR_EL2_TAM);
+
+ return cpacr_el1;
+}
+
+static inline u64 translate_sctlr_el2_to_sctlr_el1(u64 val)
+{
+ /* Only preserve the minimal set of bits we support */
+ val &= (SCTLR_ELx_M | SCTLR_ELx_A | SCTLR_ELx_C | SCTLR_ELx_SA |
+ SCTLR_ELx_I | SCTLR_ELx_IESB | SCTLR_ELx_WXN | SCTLR_ELx_EE);
+ val |= SCTLR_EL1_RES1;
+
+ return val;
+}
+
+static inline u64 translate_ttbr0_el2_to_ttbr0_el1(u64 ttbr0)
+{
+ /* Clear the ASID field */
+ return ttbr0 & ~GENMASK_ULL(63, 48);
+}
+
+extern bool forward_smc_trap(struct kvm_vcpu *vcpu);
+extern bool forward_debug_exception(struct kvm_vcpu *vcpu);
+extern void kvm_init_nested(struct kvm *kvm);
+extern int kvm_vcpu_init_nested(struct kvm_vcpu *vcpu);
+extern void kvm_init_nested_s2_mmu(struct kvm_s2_mmu *mmu);
+extern struct kvm_s2_mmu *lookup_s2_mmu(struct kvm_vcpu *vcpu);
+
+union tlbi_info;
+
+extern void kvm_s2_mmu_iterate_by_vmid(struct kvm *kvm, u16 vmid,
+ const union tlbi_info *info,
+ void (*)(struct kvm_s2_mmu *,
+ const union tlbi_info *));
+extern void kvm_vcpu_load_hw_mmu(struct kvm_vcpu *vcpu);
+extern void kvm_vcpu_put_hw_mmu(struct kvm_vcpu *vcpu);
+
+extern void check_nested_vcpu_requests(struct kvm_vcpu *vcpu);
+extern void kvm_nested_flush_hwstate(struct kvm_vcpu *vcpu);
+extern void kvm_nested_sync_hwstate(struct kvm_vcpu *vcpu);
+
+struct kvm_s2_trans {
+ phys_addr_t output;
+ unsigned long block_size;
+ bool writable;
+ bool readable;
+ int level;
+ u32 esr;
+ u64 desc;
+};
+
+static inline phys_addr_t kvm_s2_trans_output(struct kvm_s2_trans *trans)
+{
+ return trans->output;
+}
+
+static inline unsigned long kvm_s2_trans_size(struct kvm_s2_trans *trans)
+{
+ return trans->block_size;
+}
+
+static inline u32 kvm_s2_trans_esr(struct kvm_s2_trans *trans)
+{
+ return trans->esr;
+}
+
+static inline bool kvm_s2_trans_readable(struct kvm_s2_trans *trans)
+{
+ return trans->readable;
+}
+
+static inline bool kvm_s2_trans_writable(struct kvm_s2_trans *trans)
+{
+ return trans->writable;
+}
+
+static inline bool kvm_s2_trans_executable(struct kvm_s2_trans *trans)
+{
+ return !(trans->desc & BIT(54));
+}
+
+extern int kvm_walk_nested_s2(struct kvm_vcpu *vcpu, phys_addr_t gipa,
+ struct kvm_s2_trans *result);
+extern int kvm_s2_handle_perm_fault(struct kvm_vcpu *vcpu,
+ struct kvm_s2_trans *trans);
+extern int kvm_inject_s2_fault(struct kvm_vcpu *vcpu, u64 esr_el2);
+extern void kvm_nested_s2_wp(struct kvm *kvm);
+extern void kvm_nested_s2_unmap(struct kvm *kvm, bool may_block);
+extern void kvm_nested_s2_flush(struct kvm *kvm);
+
+unsigned long compute_tlb_inval_range(struct kvm_s2_mmu *mmu, u64 val);
+
+static inline bool kvm_supported_tlbi_s1e1_op(struct kvm_vcpu *vpcu, u32 instr)
+{
+ struct kvm *kvm = vpcu->kvm;
+ u8 CRm = sys_reg_CRm(instr);
+
+ if (!(sys_reg_Op0(instr) == TLBI_Op0 &&
+ sys_reg_Op1(instr) == TLBI_Op1_EL1))
+ return false;
+
+ if (!(sys_reg_CRn(instr) == TLBI_CRn_XS ||
+ (sys_reg_CRn(instr) == TLBI_CRn_nXS &&
+ kvm_has_feat(kvm, ID_AA64ISAR1_EL1, XS, IMP))))
+ return false;
+
+ if (CRm == TLBI_CRm_nROS &&
+ !kvm_has_feat(kvm, ID_AA64ISAR0_EL1, TLB, OS))
+ return false;
+
+ if ((CRm == TLBI_CRm_RIS || CRm == TLBI_CRm_ROS ||
+ CRm == TLBI_CRm_RNS) &&
+ !kvm_has_feat(kvm, ID_AA64ISAR0_EL1, TLB, RANGE))
+ return false;
+
+ return true;
+}
+
+static inline bool kvm_supported_tlbi_s1e2_op(struct kvm_vcpu *vpcu, u32 instr)
+{
+ struct kvm *kvm = vpcu->kvm;
+ u8 CRm = sys_reg_CRm(instr);
+
+ if (!(sys_reg_Op0(instr) == TLBI_Op0 &&
+ sys_reg_Op1(instr) == TLBI_Op1_EL2))
+ return false;
+
+ if (!(sys_reg_CRn(instr) == TLBI_CRn_XS ||
+ (sys_reg_CRn(instr) == TLBI_CRn_nXS &&
+ kvm_has_feat(kvm, ID_AA64ISAR1_EL1, XS, IMP))))
+ return false;
+
+ if (CRm == TLBI_CRm_IPAIS || CRm == TLBI_CRm_IPAONS)
+ return false;
+
+ if (CRm == TLBI_CRm_nROS &&
+ !kvm_has_feat(kvm, ID_AA64ISAR0_EL1, TLB, OS))
+ return false;
+
+ if ((CRm == TLBI_CRm_RIS || CRm == TLBI_CRm_ROS ||
+ CRm == TLBI_CRm_RNS) &&
+ !kvm_has_feat(kvm, ID_AA64ISAR0_EL1, TLB, RANGE))
+ return false;
+
+ return true;
+}
+
+int kvm_init_nv_sysregs(struct kvm_vcpu *vcpu);
+u64 limit_nv_id_reg(struct kvm *kvm, u32 reg, u64 val);
+
+#ifdef CONFIG_ARM64_PTR_AUTH
+bool kvm_auth_eretax(struct kvm_vcpu *vcpu, u64 *elr);
+#else
+static inline bool kvm_auth_eretax(struct kvm_vcpu *vcpu, u64 *elr)
+{
+ /* We really should never execute this... */
+ WARN_ON_ONCE(1);
+ *elr = 0xbad9acc0debadbad;
+ return false;
+}
+#endif
+
+#define KVM_NV_GUEST_MAP_SZ (KVM_PGTABLE_PROT_SW1 | KVM_PGTABLE_PROT_SW0)
+
+static inline u64 kvm_encode_nested_level(struct kvm_s2_trans *trans)
+{
+ return FIELD_PREP(KVM_NV_GUEST_MAP_SZ, trans->level);
+}
+
+/* Adjust alignment for the contiguous bit as per StageOA() */
+#define contiguous_bit_shift(d, wi, l) \
+ ({ \
+ u8 shift = 0; \
+ \
+ if ((d) & PTE_CONT) { \
+ switch (BIT((wi)->pgshift)) { \
+ case SZ_4K: \
+ shift = 4; \
+ break; \
+ case SZ_16K: \
+ shift = (l) == 2 ? 5 : 7; \
+ break; \
+ case SZ_64K: \
+ shift = 5; \
+ break; \
+ } \
+ } \
+ \
+ shift; \
+ })
+
+static inline u64 decode_range_tlbi(u64 val, u64 *range, u16 *asid)
+{
+ u64 base, tg, num, scale;
+ int shift;
+
+ tg = FIELD_GET(GENMASK(47, 46), val);
+
+ switch(tg) {
+ case 1:
+ shift = 12;
+ break;
+ case 2:
+ shift = 14;
+ break;
+ case 3:
+ default: /* IMPDEF: handle tg==0 as 64k */
+ shift = 16;
+ break;
+ }
+
+ base = (val & GENMASK(36, 0)) << shift;
+
+ if (asid)
+ *asid = FIELD_GET(TLBIR_ASID_MASK, val);
+
+ scale = FIELD_GET(GENMASK(45, 44), val);
+ num = FIELD_GET(GENMASK(43, 39), val);
+ *range = __TLBI_RANGE_PAGES(num, scale) << shift;
+
+ return base;
+}
+
+static inline unsigned int ps_to_output_size(unsigned int ps)
+{
+ switch (ps) {
+ case 0: return 32;
+ case 1: return 36;
+ case 2: return 40;
+ case 3: return 42;
+ case 4: return 44;
+ case 5:
+ default:
+ return 48;
+ }
+}
+
+enum trans_regime {
+ TR_EL10,
+ TR_EL20,
+ TR_EL2,
+};
+
+struct s1_walk_info {
+ u64 baddr;
+ enum trans_regime regime;
+ unsigned int max_oa_bits;
+ unsigned int pgshift;
+ unsigned int txsz;
+ int sl;
+ bool as_el0;
+ bool hpd;
+ bool e0poe;
+ bool poe;
+ bool pan;
+ bool be;
+ bool s2;
+};
+
+struct s1_walk_result {
+ union {
+ struct {
+ u64 desc;
+ u64 pa;
+ s8 level;
+ u8 APTable;
+ bool nG;
+ u16 asid;
+ bool UXNTable;
+ bool PXNTable;
+ bool uwxn;
+ bool uov;
+ bool ur;
+ bool uw;
+ bool ux;
+ bool pwxn;
+ bool pov;
+ bool pr;
+ bool pw;
+ bool px;
+ };
+ struct {
+ u8 fst;
+ bool ptw;
+ bool s2;
+ };
+ };
+ bool failed;
+};
+
+int __kvm_translate_va(struct kvm_vcpu *vcpu, struct s1_walk_info *wi,
+ struct s1_walk_result *wr, u64 va);
+
+/* VNCR management */
+int kvm_vcpu_allocate_vncr_tlb(struct kvm_vcpu *vcpu);
+int kvm_handle_vncr_abort(struct kvm_vcpu *vcpu);
+void kvm_handle_s1e2_tlbi(struct kvm_vcpu *vcpu, u32 inst, u64 val);
+
+#define vncr_fixmap(c) \
+ ({ \
+ u32 __c = (c); \
+ BUG_ON(__c >= NR_CPUS); \
+ (FIX_VNCR - __c); \
+ })
+
+#endif /* __ARM64_KVM_NESTED_H */
diff --git a/arch/arm64/include/asm/kvm_pgtable.h b/arch/arm64/include/asm/kvm_pgtable.h
index 9f339dffbc1a..1246216616b5 100644
--- a/arch/arm64/include/asm/kvm_pgtable.h
+++ b/arch/arm64/include/asm/kvm_pgtable.h
@@ -11,14 +11,39 @@
#include <linux/kvm_host.h>
#include <linux/types.h>
-#define KVM_PGTABLE_MAX_LEVELS 4U
+#define KVM_PGTABLE_FIRST_LEVEL -1
+#define KVM_PGTABLE_LAST_LEVEL 3
+
+/*
+ * The largest supported block sizes for KVM (no 52-bit PA support):
+ * - 4K (level 1): 1GB
+ * - 16K (level 2): 32MB
+ * - 64K (level 2): 512MB
+ */
+#ifdef CONFIG_ARM64_4K_PAGES
+#define KVM_PGTABLE_MIN_BLOCK_LEVEL 1
+#else
+#define KVM_PGTABLE_MIN_BLOCK_LEVEL 2
+#endif
+
+#define kvm_lpa2_is_enabled() system_supports_lpa2()
+
+static inline u64 kvm_get_parange_max(void)
+{
+ if (kvm_lpa2_is_enabled() ||
+ (IS_ENABLED(CONFIG_ARM64_PA_BITS_52) && PAGE_SHIFT == 16))
+ return ID_AA64MMFR0_EL1_PARANGE_52;
+ else
+ return ID_AA64MMFR0_EL1_PARANGE_48;
+}
static inline u64 kvm_get_parange(u64 mmfr0)
{
+ u64 parange_max = kvm_get_parange_max();
u64 parange = cpuid_feature_extract_unsigned_field(mmfr0,
- ID_AA64MMFR0_PARANGE_SHIFT);
- if (parange > ID_AA64MMFR0_PARANGE_MAX)
- parange = ID_AA64MMFR0_PARANGE_MAX;
+ ID_AA64MMFR0_EL1_PARANGE_SHIFT);
+ if (parange > parange_max)
+ parange = parange_max;
return parange;
}
@@ -29,6 +54,57 @@ typedef u64 kvm_pte_t;
#define KVM_PTE_ADDR_MASK GENMASK(47, PAGE_SHIFT)
#define KVM_PTE_ADDR_51_48 GENMASK(15, 12)
+#define KVM_PTE_ADDR_MASK_LPA2 GENMASK(49, PAGE_SHIFT)
+#define KVM_PTE_ADDR_51_50_LPA2 GENMASK(9, 8)
+
+#define KVM_PHYS_INVALID (-1ULL)
+
+#define KVM_PTE_TYPE BIT(1)
+#define KVM_PTE_TYPE_BLOCK 0
+#define KVM_PTE_TYPE_PAGE 1
+#define KVM_PTE_TYPE_TABLE 1
+
+#define KVM_PTE_LEAF_ATTR_LO GENMASK(11, 2)
+
+#define KVM_PTE_LEAF_ATTR_LO_S1_ATTRIDX GENMASK(4, 2)
+#define KVM_PTE_LEAF_ATTR_LO_S1_AP GENMASK(7, 6)
+#define KVM_PTE_LEAF_ATTR_LO_S1_AP_RO \
+ ({ cpus_have_final_cap(ARM64_KVM_HVHE) ? 2 : 3; })
+#define KVM_PTE_LEAF_ATTR_LO_S1_AP_RW \
+ ({ cpus_have_final_cap(ARM64_KVM_HVHE) ? 0 : 1; })
+#define KVM_PTE_LEAF_ATTR_LO_S1_SH GENMASK(9, 8)
+#define KVM_PTE_LEAF_ATTR_LO_S1_SH_IS 3
+#define KVM_PTE_LEAF_ATTR_LO_S1_AF BIT(10)
+
+#define KVM_PTE_LEAF_ATTR_LO_S2_MEMATTR GENMASK(5, 2)
+#define KVM_PTE_LEAF_ATTR_LO_S2_S2AP_R BIT(6)
+#define KVM_PTE_LEAF_ATTR_LO_S2_S2AP_W BIT(7)
+#define KVM_PTE_LEAF_ATTR_LO_S2_SH GENMASK(9, 8)
+#define KVM_PTE_LEAF_ATTR_LO_S2_SH_IS 3
+#define KVM_PTE_LEAF_ATTR_LO_S2_AF BIT(10)
+
+#define KVM_PTE_LEAF_ATTR_HI GENMASK(63, 50)
+
+#define KVM_PTE_LEAF_ATTR_HI_SW GENMASK(58, 55)
+
+#define KVM_PTE_LEAF_ATTR_HI_S1_XN BIT(54)
+
+#define KVM_PTE_LEAF_ATTR_HI_S2_XN BIT(54)
+
+#define KVM_PTE_LEAF_ATTR_HI_S1_GP BIT(50)
+
+#define KVM_PTE_LEAF_ATTR_S2_PERMS (KVM_PTE_LEAF_ATTR_LO_S2_S2AP_R | \
+ KVM_PTE_LEAF_ATTR_LO_S2_S2AP_W | \
+ KVM_PTE_LEAF_ATTR_HI_S2_XN)
+
+#define KVM_INVALID_PTE_OWNER_MASK GENMASK(9, 2)
+#define KVM_MAX_OWNER_ID 1
+
+/*
+ * Used to indicate a pte for which a 'break-before-make' sequence is in
+ * progress.
+ */
+#define KVM_INVALID_PTE_LOCKED BIT(10)
static inline bool kvm_pte_valid(kvm_pte_t pte)
{
@@ -37,32 +113,76 @@ static inline bool kvm_pte_valid(kvm_pte_t pte)
static inline u64 kvm_pte_to_phys(kvm_pte_t pte)
{
- u64 pa = pte & KVM_PTE_ADDR_MASK;
+ u64 pa;
- if (PAGE_SHIFT == 16)
- pa |= FIELD_GET(KVM_PTE_ADDR_51_48, pte) << 48;
+ if (kvm_lpa2_is_enabled()) {
+ pa = pte & KVM_PTE_ADDR_MASK_LPA2;
+ pa |= FIELD_GET(KVM_PTE_ADDR_51_50_LPA2, pte) << 50;
+ } else {
+ pa = pte & KVM_PTE_ADDR_MASK;
+ if (PAGE_SHIFT == 16)
+ pa |= FIELD_GET(KVM_PTE_ADDR_51_48, pte) << 48;
+ }
return pa;
}
-static inline u64 kvm_granule_shift(u32 level)
+static inline kvm_pte_t kvm_phys_to_pte(u64 pa)
+{
+ kvm_pte_t pte;
+
+ if (kvm_lpa2_is_enabled()) {
+ pte = pa & KVM_PTE_ADDR_MASK_LPA2;
+ pa &= GENMASK(51, 50);
+ pte |= FIELD_PREP(KVM_PTE_ADDR_51_50_LPA2, pa >> 50);
+ } else {
+ pte = pa & KVM_PTE_ADDR_MASK;
+ if (PAGE_SHIFT == 16) {
+ pa &= GENMASK(51, 48);
+ pte |= FIELD_PREP(KVM_PTE_ADDR_51_48, pa >> 48);
+ }
+ }
+
+ return pte;
+}
+
+static inline kvm_pfn_t kvm_pte_to_pfn(kvm_pte_t pte)
{
- /* Assumes KVM_PGTABLE_MAX_LEVELS is 4 */
+ return __phys_to_pfn(kvm_pte_to_phys(pte));
+}
+
+static inline u64 kvm_granule_shift(s8 level)
+{
+ /* Assumes KVM_PGTABLE_LAST_LEVEL is 3 */
return ARM64_HW_PGTABLE_LEVEL_SHIFT(level);
}
-static inline u64 kvm_granule_size(u32 level)
+static inline u64 kvm_granule_size(s8 level)
{
return BIT(kvm_granule_shift(level));
}
-static inline bool kvm_level_supports_block_mapping(u32 level)
+static inline bool kvm_level_supports_block_mapping(s8 level)
{
- /*
- * Reject invalid block mappings and don't bother with 4TB mappings for
- * 52-bit PAs.
- */
- return !(level == 0 || (PAGE_SIZE != SZ_4K && level == 1));
+ return level >= KVM_PGTABLE_MIN_BLOCK_LEVEL;
+}
+
+static inline u32 kvm_supported_block_sizes(void)
+{
+ s8 level = KVM_PGTABLE_MIN_BLOCK_LEVEL;
+ u32 r = 0;
+
+ for (; level <= KVM_PGTABLE_LAST_LEVEL; level++)
+ r |= BIT(kvm_granule_shift(level));
+
+ return r;
+}
+
+static inline bool kvm_is_block_size_supported(u64 size)
+{
+ bool is_power_of_two = IS_ALIGNED(size, size);
+
+ return is_power_of_two && (size & kvm_supported_block_sizes());
}
/**
@@ -77,6 +197,8 @@ static inline bool kvm_level_supports_block_mapping(u32 level)
* allocation is physically contiguous.
* @free_pages_exact: Free an exact number of memory pages previously
* allocated by zalloc_pages_exact.
+ * @free_unlinked_table: Free an unlinked paging structure by unlinking and
+ * dropping references.
* @get_page: Increment the refcount on a page.
* @put_page: Decrement the refcount on a page. When the
* refcount reaches 0 the page is automatically
@@ -95,6 +217,7 @@ struct kvm_pgtable_mm_ops {
void* (*zalloc_page)(void *arg);
void* (*zalloc_pages_exact)(size_t size);
void (*free_pages_exact)(void *addr, size_t size);
+ void (*free_unlinked_table)(void *addr, s8 level);
void (*get_page)(void *addr);
void (*put_page)(void *addr);
int (*page_count)(void *addr);
@@ -121,6 +244,7 @@ enum kvm_pgtable_stage2_flags {
* @KVM_PGTABLE_PROT_W: Write permission.
* @KVM_PGTABLE_PROT_R: Read permission.
* @KVM_PGTABLE_PROT_DEVICE: Device attributes.
+ * @KVM_PGTABLE_PROT_NORMAL_NC: Normal noncacheable attributes.
* @KVM_PGTABLE_PROT_SW0: Software bit 0.
* @KVM_PGTABLE_PROT_SW1: Software bit 1.
* @KVM_PGTABLE_PROT_SW2: Software bit 2.
@@ -132,6 +256,7 @@ enum kvm_pgtable_prot {
KVM_PGTABLE_PROT_R = BIT(2),
KVM_PGTABLE_PROT_DEVICE = BIT(3),
+ KVM_PGTABLE_PROT_NORMAL_NC = BIT(4),
KVM_PGTABLE_PROT_SW0 = BIT(55),
KVM_PGTABLE_PROT_SW1 = BIT(56),
@@ -154,29 +279,6 @@ typedef bool (*kvm_pgtable_force_pte_cb_t)(u64 addr, u64 end,
enum kvm_pgtable_prot prot);
/**
- * struct kvm_pgtable - KVM page-table.
- * @ia_bits: Maximum input address size, in bits.
- * @start_level: Level at which the page-table walk starts.
- * @pgd: Pointer to the first top-level entry of the page-table.
- * @mm_ops: Memory management callbacks.
- * @mmu: Stage-2 KVM MMU struct. Unused for stage-1 page-tables.
- * @flags: Stage-2 page-table flags.
- * @force_pte_cb: Function that returns true if page level mappings must
- * be used instead of block mappings.
- */
-struct kvm_pgtable {
- u32 ia_bits;
- u32 start_level;
- kvm_pte_t *pgd;
- struct kvm_pgtable_mm_ops *mm_ops;
-
- /* Stage-2 only */
- struct kvm_s2_mmu *mmu;
- enum kvm_pgtable_stage2_flags flags;
- kvm_pgtable_force_pte_cb_t force_pte_cb;
-};
-
-/**
* enum kvm_pgtable_walk_flags - Flags to control a depth-first page-table walk.
* @KVM_PGTABLE_WALK_LEAF: Visit leaf entries, including invalid
* entries.
@@ -184,17 +286,46 @@ struct kvm_pgtable {
* children.
* @KVM_PGTABLE_WALK_TABLE_POST: Visit table entries after their
* children.
+ * @KVM_PGTABLE_WALK_SHARED: Indicates the page-tables may be shared
+ * with other software walkers.
+ * @KVM_PGTABLE_WALK_HANDLE_FAULT: Indicates the page-table walk was
+ * invoked from a fault handler.
+ * @KVM_PGTABLE_WALK_SKIP_BBM_TLBI: Visit and update table entries
+ * without Break-before-make's
+ * TLB invalidation.
+ * @KVM_PGTABLE_WALK_SKIP_CMO: Visit and update table entries
+ * without Cache maintenance
+ * operations required.
*/
enum kvm_pgtable_walk_flags {
KVM_PGTABLE_WALK_LEAF = BIT(0),
KVM_PGTABLE_WALK_TABLE_PRE = BIT(1),
KVM_PGTABLE_WALK_TABLE_POST = BIT(2),
+ KVM_PGTABLE_WALK_SHARED = BIT(3),
+ KVM_PGTABLE_WALK_HANDLE_FAULT = BIT(4),
+ KVM_PGTABLE_WALK_SKIP_BBM_TLBI = BIT(5),
+ KVM_PGTABLE_WALK_SKIP_CMO = BIT(6),
+};
+
+struct kvm_pgtable_visit_ctx {
+ kvm_pte_t *ptep;
+ kvm_pte_t old;
+ void *arg;
+ struct kvm_pgtable_mm_ops *mm_ops;
+ u64 start;
+ u64 addr;
+ u64 end;
+ s8 level;
+ enum kvm_pgtable_walk_flags flags;
};
-typedef int (*kvm_pgtable_visitor_fn_t)(u64 addr, u64 end, u32 level,
- kvm_pte_t *ptep,
- enum kvm_pgtable_walk_flags flag,
- void * const arg);
+typedef int (*kvm_pgtable_visitor_fn_t)(const struct kvm_pgtable_visit_ctx *ctx,
+ enum kvm_pgtable_walk_flags visit);
+
+static inline bool kvm_pgtable_walk_shared(const struct kvm_pgtable_visit_ctx *ctx)
+{
+ return ctx->flags & KVM_PGTABLE_WALK_SHARED;
+}
/**
* struct kvm_pgtable_walker - Hook into a page-table walk.
@@ -209,6 +340,109 @@ struct kvm_pgtable_walker {
const enum kvm_pgtable_walk_flags flags;
};
+/*
+ * RCU cannot be used in a non-kernel context such as the hyp. As such, page
+ * table walkers used in hyp do not call into RCU and instead use other
+ * synchronization mechanisms (such as a spinlock).
+ */
+#if defined(__KVM_NVHE_HYPERVISOR__) || defined(__KVM_VHE_HYPERVISOR__)
+
+typedef kvm_pte_t *kvm_pteref_t;
+
+static inline kvm_pte_t *kvm_dereference_pteref(struct kvm_pgtable_walker *walker,
+ kvm_pteref_t pteref)
+{
+ return pteref;
+}
+
+static inline kvm_pte_t *kvm_dereference_pteref_raw(kvm_pteref_t pteref)
+{
+ return pteref;
+}
+
+static inline int kvm_pgtable_walk_begin(struct kvm_pgtable_walker *walker)
+{
+ /*
+ * Due to the lack of RCU (or a similar protection scheme), only
+ * non-shared table walkers are allowed in the hypervisor.
+ */
+ if (walker->flags & KVM_PGTABLE_WALK_SHARED)
+ return -EPERM;
+
+ return 0;
+}
+
+static inline void kvm_pgtable_walk_end(struct kvm_pgtable_walker *walker) {}
+
+static inline bool kvm_pgtable_walk_lock_held(void)
+{
+ return true;
+}
+
+#else
+
+typedef kvm_pte_t __rcu *kvm_pteref_t;
+
+static inline kvm_pte_t *kvm_dereference_pteref(struct kvm_pgtable_walker *walker,
+ kvm_pteref_t pteref)
+{
+ return rcu_dereference_check(pteref, !(walker->flags & KVM_PGTABLE_WALK_SHARED));
+}
+
+static inline kvm_pte_t *kvm_dereference_pteref_raw(kvm_pteref_t pteref)
+{
+ return rcu_dereference_raw(pteref);
+}
+
+static inline int kvm_pgtable_walk_begin(struct kvm_pgtable_walker *walker)
+{
+ if (walker->flags & KVM_PGTABLE_WALK_SHARED)
+ rcu_read_lock();
+
+ return 0;
+}
+
+static inline void kvm_pgtable_walk_end(struct kvm_pgtable_walker *walker)
+{
+ if (walker->flags & KVM_PGTABLE_WALK_SHARED)
+ rcu_read_unlock();
+}
+
+static inline bool kvm_pgtable_walk_lock_held(void)
+{
+ return rcu_read_lock_held();
+}
+
+#endif
+
+/**
+ * struct kvm_pgtable - KVM page-table.
+ * @ia_bits: Maximum input address size, in bits.
+ * @start_level: Level at which the page-table walk starts.
+ * @pgd: Pointer to the first top-level entry of the page-table.
+ * @mm_ops: Memory management callbacks.
+ * @mmu: Stage-2 KVM MMU struct. Unused for stage-1 page-tables.
+ * @flags: Stage-2 page-table flags.
+ * @force_pte_cb: Function that returns true if page level mappings must
+ * be used instead of block mappings.
+ */
+struct kvm_pgtable {
+ union {
+ struct rb_root_cached pkvm_mappings;
+ struct {
+ u32 ia_bits;
+ s8 start_level;
+ kvm_pteref_t pgd;
+ struct kvm_pgtable_mm_ops *mm_ops;
+
+ /* Stage-2 only */
+ enum kvm_pgtable_stage2_flags flags;
+ kvm_pgtable_force_pte_cb_t force_pte_cb;
+ };
+ };
+ struct kvm_s2_mmu *mmu;
+};
+
/**
* kvm_pgtable_hyp_init() - Initialise a hypervisor stage-1 page-table.
* @pgt: Uninitialised page-table structure to initialise.
@@ -289,6 +523,14 @@ u64 kvm_pgtable_hyp_unmap(struct kvm_pgtable *pgt, u64 addr, u64 size);
u64 kvm_get_vtcr(u64 mmfr0, u64 mmfr1, u32 phys_shift);
/**
+ * kvm_pgtable_stage2_pgd_size() - Helper to compute size of a stage-2 PGD
+ * @vtcr: Content of the VTCR register.
+ *
+ * Return: the size (in bytes) of the stage-2 PGD
+ */
+size_t kvm_pgtable_stage2_pgd_size(u64 vtcr);
+
+/**
* __kvm_pgtable_stage2_init() - Initialise a guest stage-2 page-table.
* @pgt: Uninitialised page-table structure to initialise.
* @mmu: S2 MMU context for this S2 translation
@@ -304,8 +546,11 @@ int __kvm_pgtable_stage2_init(struct kvm_pgtable *pgt, struct kvm_s2_mmu *mmu,
enum kvm_pgtable_stage2_flags flags,
kvm_pgtable_force_pte_cb_t force_pte_cb);
-#define kvm_pgtable_stage2_init(pgt, mmu, mm_ops) \
- __kvm_pgtable_stage2_init(pgt, mmu, mm_ops, 0, NULL)
+static inline int kvm_pgtable_stage2_init(struct kvm_pgtable *pgt, struct kvm_s2_mmu *mmu,
+ struct kvm_pgtable_mm_ops *mm_ops)
+{
+ return __kvm_pgtable_stage2_init(pgt, mmu, mm_ops, 0, NULL);
+}
/**
* kvm_pgtable_stage2_destroy() - Destroy an unused guest stage-2 page-table.
@@ -317,6 +562,63 @@ int __kvm_pgtable_stage2_init(struct kvm_pgtable *pgt, struct kvm_s2_mmu *mmu,
void kvm_pgtable_stage2_destroy(struct kvm_pgtable *pgt);
/**
+ * kvm_pgtable_stage2_destroy_range() - Destroy the unlinked range of addresses.
+ * @pgt: Page-table structure initialised by kvm_pgtable_stage2_init*().
+ * @addr: Intermediate physical address at which to place the mapping.
+ * @size: Size of the mapping.
+ *
+ * The page-table is assumed to be unreachable by any hardware walkers prior
+ * to freeing and therefore no TLB invalidation is performed.
+ */
+void kvm_pgtable_stage2_destroy_range(struct kvm_pgtable *pgt,
+ u64 addr, u64 size);
+
+/**
+ * kvm_pgtable_stage2_destroy_pgd() - Destroy the PGD of guest stage-2 page-table.
+ * @pgt: Page-table structure initialised by kvm_pgtable_stage2_init*().
+ *
+ * It is assumed that the rest of the page-table is freed before this operation.
+ */
+void kvm_pgtable_stage2_destroy_pgd(struct kvm_pgtable *pgt);
+
+/**
+ * kvm_pgtable_stage2_free_unlinked() - Free an unlinked stage-2 paging structure.
+ * @mm_ops: Memory management callbacks.
+ * @pgtable: Unlinked stage-2 paging structure to be freed.
+ * @level: Level of the stage-2 paging structure to be freed.
+ *
+ * The page-table is assumed to be unreachable by any hardware walkers prior to
+ * freeing and therefore no TLB invalidation is performed.
+ */
+void kvm_pgtable_stage2_free_unlinked(struct kvm_pgtable_mm_ops *mm_ops, void *pgtable, s8 level);
+
+/**
+ * kvm_pgtable_stage2_create_unlinked() - Create an unlinked stage-2 paging structure.
+ * @pgt: Page-table structure initialised by kvm_pgtable_stage2_init*().
+ * @phys: Physical address of the memory to map.
+ * @level: Starting level of the stage-2 paging structure to be created.
+ * @prot: Permissions and attributes for the mapping.
+ * @mc: Cache of pre-allocated and zeroed memory from which to allocate
+ * page-table pages.
+ * @force_pte: Force mappings to PAGE_SIZE granularity.
+ *
+ * Returns an unlinked page-table tree. This new page-table tree is
+ * not reachable (i.e., it is unlinked) from the root pgd and it's
+ * therefore unreachableby the hardware page-table walker. No TLB
+ * invalidation or CMOs are performed.
+ *
+ * If device attributes are not explicitly requested in @prot, then the
+ * mapping will be normal, cacheable.
+ *
+ * Return: The fully populated (unlinked) stage-2 paging structure, or
+ * an ERR_PTR(error) on failure.
+ */
+kvm_pte_t *kvm_pgtable_stage2_create_unlinked(struct kvm_pgtable *pgt,
+ u64 phys, s8 level,
+ enum kvm_pgtable_prot prot,
+ void *mc, bool force_pte);
+
+/**
* kvm_pgtable_stage2_map() - Install a mapping in a guest stage-2 page-table.
* @pgt: Page-table structure initialised by kvm_pgtable_stage2_init*().
* @addr: Intermediate physical address at which to place the mapping.
@@ -325,6 +627,7 @@ void kvm_pgtable_stage2_destroy(struct kvm_pgtable *pgt);
* @prot: Permissions and attributes for the mapping.
* @mc: Cache of pre-allocated and zeroed memory from which to allocate
* page-table pages.
+ * @flags: Flags to control the page-table walk (ex. a shared walk)
*
* The offset of @addr within a page is ignored, @size is rounded-up to
* the next page boundary and @phys is rounded-down to the previous page
@@ -346,7 +649,7 @@ void kvm_pgtable_stage2_destroy(struct kvm_pgtable *pgt);
*/
int kvm_pgtable_stage2_map(struct kvm_pgtable *pgt, u64 addr, u64 size,
u64 phys, enum kvm_pgtable_prot prot,
- void *mc);
+ void *mc, enum kvm_pgtable_walk_flags flags);
/**
* kvm_pgtable_stage2_set_owner() - Unmap and annotate pages in the IPA space to
@@ -409,33 +712,37 @@ int kvm_pgtable_stage2_wrprotect(struct kvm_pgtable *pgt, u64 addr, u64 size);
* kvm_pgtable_stage2_mkyoung() - Set the access flag in a page-table entry.
* @pgt: Page-table structure initialised by kvm_pgtable_stage2_init*().
* @addr: Intermediate physical address to identify the page-table entry.
+ * @flags: Flags to control the page-table walk (ex. a shared walk)
*
* The offset of @addr within a page is ignored.
*
* If there is a valid, leaf page-table entry used to translate @addr, then
* set the access flag in that entry.
- *
- * Return: The old page-table entry prior to setting the flag, 0 on failure.
*/
-kvm_pte_t kvm_pgtable_stage2_mkyoung(struct kvm_pgtable *pgt, u64 addr);
+void kvm_pgtable_stage2_mkyoung(struct kvm_pgtable *pgt, u64 addr,
+ enum kvm_pgtable_walk_flags flags);
/**
- * kvm_pgtable_stage2_mkold() - Clear the access flag in a page-table entry.
+ * kvm_pgtable_stage2_test_clear_young() - Test and optionally clear the access
+ * flag in a page-table entry.
* @pgt: Page-table structure initialised by kvm_pgtable_stage2_init*().
* @addr: Intermediate physical address to identify the page-table entry.
+ * @size: Size of the address range to visit.
+ * @mkold: True if the access flag should be cleared.
*
* The offset of @addr within a page is ignored.
*
- * If there is a valid, leaf page-table entry used to translate @addr, then
- * clear the access flag in that entry.
+ * Tests and conditionally clears the access flag for every valid, leaf
+ * page-table entry used to translate the range [@addr, @addr + @size).
*
* Note that it is the caller's responsibility to invalidate the TLB after
* calling this function to ensure that the updated permissions are visible
* to the CPUs.
*
- * Return: The old page-table entry prior to clearing the flag, 0 on failure.
+ * Return: True if any of the visited PTEs had the access flag set.
*/
-kvm_pte_t kvm_pgtable_stage2_mkold(struct kvm_pgtable *pgt, u64 addr);
+bool kvm_pgtable_stage2_test_clear_young(struct kvm_pgtable *pgt, u64 addr,
+ u64 size, bool mkold);
/**
* kvm_pgtable_stage2_relax_perms() - Relax the permissions enforced by a
@@ -443,6 +750,7 @@ kvm_pte_t kvm_pgtable_stage2_mkold(struct kvm_pgtable *pgt, u64 addr);
* @pgt: Page-table structure initialised by kvm_pgtable_stage2_init*().
* @addr: Intermediate physical address to identify the page-table entry.
* @prot: Additional permissions to grant for the mapping.
+ * @flags: Flags to control the page-table walk (ex. a shared walk)
*
* The offset of @addr within a page is ignored.
*
@@ -455,19 +763,8 @@ kvm_pte_t kvm_pgtable_stage2_mkold(struct kvm_pgtable *pgt, u64 addr);
* Return: 0 on success, negative error code on failure.
*/
int kvm_pgtable_stage2_relax_perms(struct kvm_pgtable *pgt, u64 addr,
- enum kvm_pgtable_prot prot);
-
-/**
- * kvm_pgtable_stage2_is_young() - Test whether a page-table entry has the
- * access flag set.
- * @pgt: Page-table structure initialised by kvm_pgtable_stage2_init*().
- * @addr: Intermediate physical address to identify the page-table entry.
- *
- * The offset of @addr within a page is ignored.
- *
- * Return: True if the page-table entry has the access flag set, false otherwise.
- */
-bool kvm_pgtable_stage2_is_young(struct kvm_pgtable *pgt, u64 addr);
+ enum kvm_pgtable_prot prot,
+ enum kvm_pgtable_walk_flags flags);
/**
* kvm_pgtable_stage2_flush_range() - Clean and invalidate data cache to Point
@@ -485,6 +782,25 @@ bool kvm_pgtable_stage2_is_young(struct kvm_pgtable *pgt, u64 addr);
int kvm_pgtable_stage2_flush(struct kvm_pgtable *pgt, u64 addr, u64 size);
/**
+ * kvm_pgtable_stage2_split() - Split a range of huge pages into leaf PTEs pointing
+ * to PAGE_SIZE guest pages.
+ * @pgt: Page-table structure initialised by kvm_pgtable_stage2_init().
+ * @addr: Intermediate physical address from which to split.
+ * @size: Size of the range.
+ * @mc: Cache of pre-allocated and zeroed memory from which to allocate
+ * page-table pages.
+ *
+ * The function tries to split any level 1 or 2 entry that overlaps
+ * with the input range (given by @addr and @size).
+ *
+ * Return: 0 on success, negative error code on failure. Note that
+ * kvm_pgtable_stage2_split() is best effort: it tries to break as many
+ * blocks in the input range as allowed by @mc_capacity.
+ */
+int kvm_pgtable_stage2_split(struct kvm_pgtable *pgt, u64 addr, u64 size,
+ struct kvm_mmu_memory_cache *mc);
+
+/**
* kvm_pgtable_walk() - Walk a page-table.
* @pgt: Page-table structure initialised by kvm_pgtable_*_init().
* @addr: Input address for the start of the walk.
@@ -496,9 +812,9 @@ int kvm_pgtable_stage2_flush(struct kvm_pgtable *pgt, u64 addr, u64 size);
*
* The walker will walk the page-table entries corresponding to the input
* address range specified, visiting entries according to the walker flags.
- * Invalid entries are treated as leaf entries. Leaf entries are reloaded
- * after invoking the walker callback, allowing the walker to descend into
- * a newly installed table.
+ * Invalid entries are treated as leaf entries. The visited page table entry is
+ * reloaded after invoking the walker callback, allowing the walker to descend
+ * into a newly installed table.
*
* Returning a negative error code from the walker callback function will
* terminate the walk immediately with the same error code.
@@ -526,7 +842,7 @@ int kvm_pgtable_walk(struct kvm_pgtable *pgt, u64 addr, u64 size,
* Return: 0 on success, negative error code on failure.
*/
int kvm_pgtable_get_leaf(struct kvm_pgtable *pgt, u64 addr,
- kvm_pte_t *ptep, u32 *level);
+ kvm_pte_t *ptep, s8 *level);
/**
* kvm_pgtable_stage2_pte_prot() - Retrieve the protection attributes of a
@@ -547,4 +863,14 @@ enum kvm_pgtable_prot kvm_pgtable_stage2_pte_prot(kvm_pte_t pte);
* kvm_pgtable_prot format.
*/
enum kvm_pgtable_prot kvm_pgtable_hyp_pte_prot(kvm_pte_t pte);
+
+/**
+ * kvm_tlb_flush_vmid_range() - Invalidate/flush a range of TLB entries
+ *
+ * @mmu: Stage-2 KVM MMU struct
+ * @addr: The base Intermediate physical address from which to invalidate
+ * @size: Size of the range from the base to invalidate
+ */
+void kvm_tlb_flush_vmid_range(struct kvm_s2_mmu *mmu,
+ phys_addr_t addr, size_t size);
#endif /* __ARM64_KVM_PGTABLE_H__ */
diff --git a/arch/arm64/include/asm/kvm_pkvm.h b/arch/arm64/include/asm/kvm_pkvm.h
index 9f4ad2a8df59..35f9d9478004 100644
--- a/arch/arm64/include/asm/kvm_pkvm.h
+++ b/arch/arm64/include/asm/kvm_pkvm.h
@@ -6,20 +6,87 @@
#ifndef __ARM64_KVM_PKVM_H__
#define __ARM64_KVM_PKVM_H__
+#include <linux/arm_ffa.h>
#include <linux/memblock.h>
+#include <linux/scatterlist.h>
#include <asm/kvm_pgtable.h>
+/* Maximum number of VMs that can co-exist under pKVM. */
+#define KVM_MAX_PVMS 255
+
#define HYP_MEMBLOCK_REGIONS 128
+int pkvm_init_host_vm(struct kvm *kvm);
+int pkvm_create_hyp_vm(struct kvm *kvm);
+void pkvm_destroy_hyp_vm(struct kvm *kvm);
+int pkvm_create_hyp_vcpu(struct kvm_vcpu *vcpu);
+
+/*
+ * This functions as an allow-list of protected VM capabilities.
+ * Features not explicitly allowed by this function are denied.
+ */
+static inline bool kvm_pvm_ext_allowed(long ext)
+{
+ switch (ext) {
+ case KVM_CAP_IRQCHIP:
+ case KVM_CAP_ARM_PSCI:
+ case KVM_CAP_ARM_PSCI_0_2:
+ case KVM_CAP_NR_VCPUS:
+ case KVM_CAP_MAX_VCPUS:
+ case KVM_CAP_MAX_VCPU_ID:
+ case KVM_CAP_MSI_DEVID:
+ case KVM_CAP_ARM_VM_IPA_SIZE:
+ case KVM_CAP_ARM_PMU_V3:
+ case KVM_CAP_ARM_SVE:
+ case KVM_CAP_ARM_PTRAUTH_ADDRESS:
+ case KVM_CAP_ARM_PTRAUTH_GENERIC:
+ return true;
+ default:
+ return false;
+ }
+}
+
extern struct memblock_region kvm_nvhe_sym(hyp_memory)[];
extern unsigned int kvm_nvhe_sym(hyp_memblock_nr);
+static inline unsigned long
+hyp_vmemmap_memblock_size(struct memblock_region *reg, size_t vmemmap_entry_size)
+{
+ unsigned long nr_pages = reg->size >> PAGE_SHIFT;
+ unsigned long start, end;
+
+ start = (reg->base >> PAGE_SHIFT) * vmemmap_entry_size;
+ end = start + nr_pages * vmemmap_entry_size;
+ start = ALIGN_DOWN(start, PAGE_SIZE);
+ end = ALIGN(end, PAGE_SIZE);
+
+ return end - start;
+}
+
+static inline unsigned long hyp_vmemmap_pages(size_t vmemmap_entry_size)
+{
+ unsigned long res = 0, i;
+
+ for (i = 0; i < kvm_nvhe_sym(hyp_memblock_nr); i++) {
+ res += hyp_vmemmap_memblock_size(&kvm_nvhe_sym(hyp_memory)[i],
+ vmemmap_entry_size);
+ }
+
+ return res >> PAGE_SHIFT;
+}
+
+static inline unsigned long hyp_vm_table_pages(void)
+{
+ return PAGE_ALIGN(KVM_MAX_PVMS * sizeof(void *)) >> PAGE_SHIFT;
+}
+
static inline unsigned long __hyp_pgtable_max_pages(unsigned long nr_pages)
{
- unsigned long total = 0, i;
+ unsigned long total = 0;
+ int i;
/* Provision the worst case scenario */
- for (i = 0; i < KVM_PGTABLE_MAX_LEVELS; i++) {
+ for (i = KVM_PGTABLE_FIRST_LEVEL; i <= KVM_PGTABLE_LAST_LEVEL; i++) {
nr_pages = DIV_ROUND_UP(nr_pages, PTRS_PER_PTE);
total += nr_pages;
}
@@ -68,4 +135,68 @@ static inline unsigned long host_s2_pgtable_pages(void)
return res;
}
+#ifdef CONFIG_NVHE_EL2_DEBUG
+static inline unsigned long pkvm_selftest_pages(void) { return 32; }
+#else
+static inline unsigned long pkvm_selftest_pages(void) { return 0; }
+#endif
+
+#define KVM_FFA_MBOX_NR_PAGES 1
+
+static inline unsigned long hyp_ffa_proxy_pages(void)
+{
+ size_t desc_max;
+
+ /*
+ * The hypervisor FFA proxy needs enough memory to buffer a fragmented
+ * descriptor returned from EL3 in response to a RETRIEVE_REQ call.
+ */
+ desc_max = sizeof(struct ffa_mem_region) +
+ sizeof(struct ffa_mem_region_attributes) +
+ sizeof(struct ffa_composite_mem_region) +
+ SG_MAX_SEGMENTS * sizeof(struct ffa_mem_region_addr_range);
+
+ /* Plus a page each for the hypervisor's RX and TX mailboxes. */
+ return (2 * KVM_FFA_MBOX_NR_PAGES) + DIV_ROUND_UP(desc_max, PAGE_SIZE);
+}
+
+static inline size_t pkvm_host_sve_state_size(void)
+{
+ if (!system_supports_sve())
+ return 0;
+
+ return size_add(sizeof(struct cpu_sve_state),
+ SVE_SIG_REGS_SIZE(sve_vq_from_vl(kvm_host_sve_max_vl)));
+}
+
+struct pkvm_mapping {
+ struct rb_node node;
+ u64 gfn;
+ u64 pfn;
+ u64 nr_pages;
+ u64 __subtree_last; /* Internal member for interval tree */
+};
+
+int pkvm_pgtable_stage2_init(struct kvm_pgtable *pgt, struct kvm_s2_mmu *mmu,
+ struct kvm_pgtable_mm_ops *mm_ops);
+void pkvm_pgtable_stage2_destroy_range(struct kvm_pgtable *pgt,
+ u64 addr, u64 size);
+void pkvm_pgtable_stage2_destroy_pgd(struct kvm_pgtable *pgt);
+int pkvm_pgtable_stage2_map(struct kvm_pgtable *pgt, u64 addr, u64 size, u64 phys,
+ enum kvm_pgtable_prot prot, void *mc,
+ enum kvm_pgtable_walk_flags flags);
+int pkvm_pgtable_stage2_unmap(struct kvm_pgtable *pgt, u64 addr, u64 size);
+int pkvm_pgtable_stage2_wrprotect(struct kvm_pgtable *pgt, u64 addr, u64 size);
+int pkvm_pgtable_stage2_flush(struct kvm_pgtable *pgt, u64 addr, u64 size);
+bool pkvm_pgtable_stage2_test_clear_young(struct kvm_pgtable *pgt, u64 addr, u64 size, bool mkold);
+int pkvm_pgtable_stage2_relax_perms(struct kvm_pgtable *pgt, u64 addr, enum kvm_pgtable_prot prot,
+ enum kvm_pgtable_walk_flags flags);
+void pkvm_pgtable_stage2_mkyoung(struct kvm_pgtable *pgt, u64 addr,
+ enum kvm_pgtable_walk_flags flags);
+int pkvm_pgtable_stage2_split(struct kvm_pgtable *pgt, u64 addr, u64 size,
+ struct kvm_mmu_memory_cache *mc);
+void pkvm_pgtable_stage2_free_unlinked(struct kvm_pgtable_mm_ops *mm_ops, void *pgtable, s8 level);
+kvm_pte_t *pkvm_pgtable_stage2_create_unlinked(struct kvm_pgtable *pgt, u64 phys, s8 level,
+ enum kvm_pgtable_prot prot, void *mc,
+ bool force_pte);
#endif /* __ARM64_KVM_PKVM_H__ */
diff --git a/arch/arm64/include/asm/kvm_ptrauth.h b/arch/arm64/include/asm/kvm_ptrauth.h
index 0cd0965255d2..6199c9f7ec6e 100644
--- a/arch/arm64/include/asm/kvm_ptrauth.h
+++ b/arch/arm64/include/asm/kvm_ptrauth.h
@@ -99,5 +99,26 @@ alternative_else_nop_endif
.macro ptrauth_switch_to_hyp g_ctxt, h_ctxt, reg1, reg2, reg3
.endm
#endif /* CONFIG_ARM64_PTR_AUTH */
+
+#else /* !__ASSEMBLY */
+
+#define __ptrauth_save_key(ctxt, key) \
+ do { \
+ u64 __val; \
+ __val = read_sysreg_s(SYS_ ## key ## KEYLO_EL1); \
+ ctxt_sys_reg(ctxt, key ## KEYLO_EL1) = __val; \
+ __val = read_sysreg_s(SYS_ ## key ## KEYHI_EL1); \
+ ctxt_sys_reg(ctxt, key ## KEYHI_EL1) = __val; \
+ } while(0)
+
+#define ptrauth_save_keys(ctxt) \
+ do { \
+ __ptrauth_save_key(ctxt, APIA); \
+ __ptrauth_save_key(ctxt, APIB); \
+ __ptrauth_save_key(ctxt, APDA); \
+ __ptrauth_save_key(ctxt, APDB); \
+ __ptrauth_save_key(ctxt, APGA); \
+ } while(0)
+
#endif /* __ASSEMBLY__ */
#endif /* __ASM_KVM_PTRAUTH_H */
diff --git a/arch/arm64/include/asm/kvm_ras.h b/arch/arm64/include/asm/kvm_ras.h
deleted file mode 100644
index 87e10d9a635b..000000000000
--- a/arch/arm64/include/asm/kvm_ras.h
+++ /dev/null
@@ -1,25 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-/* Copyright (C) 2018 - Arm Ltd */
-
-#ifndef __ARM64_KVM_RAS_H__
-#define __ARM64_KVM_RAS_H__
-
-#include <linux/acpi.h>
-#include <linux/errno.h>
-#include <linux/types.h>
-
-#include <asm/acpi.h>
-
-/*
- * Was this synchronous external abort a RAS notification?
- * Returns '0' for errors handled by some RAS subsystem, or -ENOENT.
- */
-static inline int kvm_handle_guest_sea(phys_addr_t addr, u64 esr)
-{
- /* apei_claim_sea(NULL) expects to mask interrupts itself */
- lockdep_assert_irqs_enabled();
-
- return apei_claim_sea(NULL);
-}
-
-#endif /* __ARM64_KVM_RAS_H__ */
diff --git a/arch/arm64/include/asm/linkage.h b/arch/arm64/include/asm/linkage.h
index 43f8c25b3fda..d3acd9c87509 100644
--- a/arch/arm64/include/asm/linkage.h
+++ b/arch/arm64/include/asm/linkage.h
@@ -5,8 +5,8 @@
#include <asm/assembler.h>
#endif
-#define __ALIGN .align 2
-#define __ALIGN_STR ".align 2"
+#define __ALIGN .balign CONFIG_FUNCTION_ALIGNMENT
+#define __ALIGN_STR ".balign " #CONFIG_FUNCTION_ALIGNMENT
/*
* When using in-kernel BTI we need to ensure that PCS-conformant
@@ -39,4 +39,8 @@
SYM_START(name, SYM_L_WEAK, SYM_A_NONE) \
bti c ;
+#define SYM_TYPED_FUNC_START(name) \
+ SYM_TYPED_START(name, SYM_L_GLOBAL, SYM_A_ALIGN) \
+ bti c ;
+
#endif
diff --git a/arch/arm64/include/asm/lse.h b/arch/arm64/include/asm/lse.h
index 29c85810ae69..3129a5819d0e 100644
--- a/arch/arm64/include/asm/lse.h
+++ b/arch/arm64/include/asm/lse.h
@@ -10,22 +10,15 @@
#include <linux/compiler_types.h>
#include <linux/export.h>
-#include <linux/jump_label.h>
#include <linux/stringify.h>
#include <asm/alternative.h>
+#include <asm/alternative-macros.h>
#include <asm/atomic_lse.h>
#include <asm/cpucaps.h>
-extern struct static_key_false cpu_hwcap_keys[ARM64_NCAPS];
-
-static __always_inline bool system_uses_lse_atomics(void)
-{
- return static_branch_likely(&cpu_hwcap_keys[ARM64_HAS_LSE_ATOMICS]);
-}
-
#define __lse_ll_sc_body(op, ...) \
({ \
- system_uses_lse_atomics() ? \
+ alternative_has_cap_likely(ARM64_HAS_LSE_ATOMICS) ? \
__lse_##op(__VA_ARGS__) : \
__ll_sc_##op(__VA_ARGS__); \
})
@@ -36,8 +29,6 @@ static __always_inline bool system_uses_lse_atomics(void)
#else /* CONFIG_ARM64_LSE_ATOMICS */
-static inline bool system_uses_lse_atomics(void) { return false; }
-
#define __lse_ll_sc_body(op, ...) __ll_sc_##op(__VA_ARGS__)
#define ARM64_LSE_ATOMIC_INSN(llsc, lse) llsc
diff --git a/arch/arm64/include/asm/mem_encrypt.h b/arch/arm64/include/asm/mem_encrypt.h
new file mode 100644
index 000000000000..314b2b52025f
--- /dev/null
+++ b/arch/arm64/include/asm/mem_encrypt.h
@@ -0,0 +1,37 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+#ifndef __ASM_MEM_ENCRYPT_H
+#define __ASM_MEM_ENCRYPT_H
+
+#include <asm/rsi.h>
+
+struct device;
+
+struct arm64_mem_crypt_ops {
+ int (*encrypt)(unsigned long addr, int numpages);
+ int (*decrypt)(unsigned long addr, int numpages);
+};
+
+int arm64_mem_crypt_ops_register(const struct arm64_mem_crypt_ops *ops);
+
+int set_memory_encrypted(unsigned long addr, int numpages);
+int set_memory_decrypted(unsigned long addr, int numpages);
+
+int realm_register_memory_enc_ops(void);
+
+static inline bool force_dma_unencrypted(struct device *dev)
+{
+ return is_realm_world();
+}
+
+/*
+ * For Arm CCA guests, canonical addresses are "encrypted", so no changes
+ * required for dma_addr_encrypted().
+ * The unencrypted DMA buffers must be accessed via the unprotected IPA,
+ * "top IPA bit" set.
+ */
+#define dma_addr_unencrypted(x) ((x) | PROT_NS_SHARED)
+
+/* Clear the "top" IPA bit while converting back */
+#define dma_addr_canonical(x) ((x) & ~PROT_NS_SHARED)
+
+#endif /* __ASM_MEM_ENCRYPT_H */
diff --git a/arch/arm64/include/asm/memory.h b/arch/arm64/include/asm/memory.h
index 227d256cd4b9..5213248e081b 100644
--- a/arch/arm64/include/asm/memory.h
+++ b/arch/arm64/include/asm/memory.h
@@ -30,8 +30,8 @@
* keep a constant PAGE_OFFSET and "fallback" to using the higher end
* of the VMEMMAP where 52-bit support is not available in hardware.
*/
-#define VMEMMAP_SHIFT (PAGE_SHIFT - STRUCT_PAGE_MAX_SHIFT)
-#define VMEMMAP_SIZE ((_PAGE_END(VA_BITS_MIN) - PAGE_OFFSET) >> VMEMMAP_SHIFT)
+#define VMEMMAP_RANGE (_PAGE_END(VA_BITS_MIN) - PAGE_OFFSET)
+#define VMEMMAP_SIZE ((VMEMMAP_RANGE >> PAGE_SHIFT) * sizeof(struct page))
/*
* PAGE_OFFSET - the virtual address of the start of the linear map, at the
@@ -46,15 +46,19 @@
#define KIMAGE_VADDR (MODULES_END)
#define MODULES_END (MODULES_VADDR + MODULES_VSIZE)
#define MODULES_VADDR (_PAGE_END(VA_BITS_MIN))
-#define MODULES_VSIZE (SZ_128M)
-#define VMEMMAP_START (-(UL(1) << (VA_BITS - VMEMMAP_SHIFT)))
-#define VMEMMAP_END (VMEMMAP_START + VMEMMAP_SIZE)
-#define PCI_IO_END (VMEMMAP_START - SZ_8M)
-#define PCI_IO_START (PCI_IO_END - PCI_IO_SIZE)
-#define FIXADDR_TOP (VMEMMAP_START - SZ_32M)
+#define MODULES_VSIZE (SZ_2G)
+#define VMEMMAP_START (VMEMMAP_END - VMEMMAP_SIZE)
+#define VMEMMAP_END (-UL(SZ_1G))
+#define PCI_IO_START (VMEMMAP_END + SZ_8M)
+#define PCI_IO_END (PCI_IO_START + PCI_IO_SIZE)
+#define FIXADDR_TOP (-UL(SZ_8M))
#if VA_BITS > 48
+#ifdef CONFIG_ARM64_16K_PAGES
+#define VA_BITS_MIN (47)
+#else
#define VA_BITS_MIN (48)
+#endif
#else
#define VA_BITS_MIN (VA_BITS)
#endif
@@ -65,28 +69,56 @@
#define KERNEL_END _end
/*
- * Generic and tag-based KASAN require 1/8th and 1/16th of the kernel virtual
- * address space for the shadow region respectively. They can bloat the stack
- * significantly, so double the (minimum) stack size when they are in use.
+ * Generic and Software Tag-Based KASAN modes require 1/8th and 1/16th of the
+ * kernel virtual address space for storing the shadow memory respectively.
+ *
+ * The mapping between a virtual memory address and its corresponding shadow
+ * memory address is defined based on the formula:
+ *
+ * shadow_addr = (addr >> KASAN_SHADOW_SCALE_SHIFT) + KASAN_SHADOW_OFFSET
+ *
+ * where KASAN_SHADOW_SCALE_SHIFT is the order of the number of bits that map
+ * to a single shadow byte and KASAN_SHADOW_OFFSET is a constant that offsets
+ * the mapping. Note that KASAN_SHADOW_OFFSET does not point to the start of
+ * the shadow memory region.
+ *
+ * Based on this mapping, we define two constants:
+ *
+ * KASAN_SHADOW_START: the start of the shadow memory region;
+ * KASAN_SHADOW_END: the end of the shadow memory region.
+ *
+ * KASAN_SHADOW_END is defined first as the shadow address that corresponds to
+ * the upper bound of possible virtual kernel memory addresses UL(1) << 64
+ * according to the mapping formula.
+ *
+ * KASAN_SHADOW_START is defined second based on KASAN_SHADOW_END. The shadow
+ * memory start must map to the lowest possible kernel virtual memory address
+ * and thus it depends on the actual bitness of the address space.
+ *
+ * As KASAN inserts redzones between stack variables, this increases the stack
+ * memory usage significantly. Thus, we double the (minimum) stack size.
*/
#if defined(CONFIG_KASAN_GENERIC) || defined(CONFIG_KASAN_SW_TAGS)
#define KASAN_SHADOW_OFFSET _AC(CONFIG_KASAN_SHADOW_OFFSET, UL)
-#define KASAN_SHADOW_END ((UL(1) << (64 - KASAN_SHADOW_SCALE_SHIFT)) \
- + KASAN_SHADOW_OFFSET)
-#define PAGE_END (KASAN_SHADOW_END - (1UL << (vabits_actual - KASAN_SHADOW_SCALE_SHIFT)))
+#define KASAN_SHADOW_END ((UL(1) << (64 - KASAN_SHADOW_SCALE_SHIFT)) + KASAN_SHADOW_OFFSET)
+#define _KASAN_SHADOW_START(va) (KASAN_SHADOW_END - (UL(1) << ((va) - KASAN_SHADOW_SCALE_SHIFT)))
+#define KASAN_SHADOW_START _KASAN_SHADOW_START(vabits_actual)
+#define PAGE_END KASAN_SHADOW_START
#define KASAN_THREAD_SHIFT 1
#else
#define KASAN_THREAD_SHIFT 0
#define PAGE_END (_PAGE_END(VA_BITS_MIN))
#endif /* CONFIG_KASAN */
+#define DIRECT_MAP_PHYSMEM_END __pa(PAGE_END - 1)
+
#define MIN_THREAD_SHIFT (14 + KASAN_THREAD_SHIFT)
/*
* VMAP'd stacks are allocated at page granularity, so we must ensure that such
* stacks are a multiple of page size.
*/
-#if defined(CONFIG_VMAP_STACK) && (MIN_THREAD_SHIFT < PAGE_SHIFT)
+#if (MIN_THREAD_SHIFT < PAGE_SHIFT)
#define THREAD_SHIFT PAGE_SHIFT
#else
#define THREAD_SHIFT MIN_THREAD_SHIFT
@@ -103,16 +135,23 @@
* checking sp & (1 << THREAD_SHIFT), which we can do cheaply in the entry
* assembly.
*/
-#ifdef CONFIG_VMAP_STACK
#define THREAD_ALIGN (2 * THREAD_SIZE)
-#else
-#define THREAD_ALIGN THREAD_SIZE
-#endif
#define IRQ_STACK_SIZE THREAD_SIZE
#define OVERFLOW_STACK_SIZE SZ_4K
+#define NVHE_STACK_SHIFT PAGE_SHIFT
+#define NVHE_STACK_SIZE (UL(1) << NVHE_STACK_SHIFT)
+
+/*
+ * With the minimum frame size of [x29, x30], exactly half the combined
+ * sizes of the hyp and overflow stacks is the maximum size needed to
+ * save the unwinded stacktrace; plus an additional entry to delimit the
+ * end.
+ */
+#define NVHE_STACKTRACE_SIZE ((OVERFLOW_STACK_SIZE + NVHE_STACK_SIZE) / 2 + sizeof(long))
+
/*
* Alignment of kernel segments (e.g. .text, .data).
*
@@ -139,6 +178,7 @@
* Memory types for Stage-2 translation
*/
#define MT_S2_NORMAL 0xf
+#define MT_S2_NORMAL_NC 0x5
#define MT_S2_DEVICE_nGnRE 0x1
/*
@@ -146,6 +186,7 @@
* Stage-2 enforces Normal-WB and Device-nGnRE
*/
#define MT_S2_FWB_NORMAL 6
+#define MT_S2_FWB_NORMAL_NC 5
#define MT_S2_FWB_DEVICE_nGnRE 1
#ifdef CONFIG_ARM64_4K_PAGES
@@ -172,10 +213,23 @@
#include <linux/compiler.h>
#include <linux/mmdebug.h>
#include <linux/types.h>
+#include <asm/boot.h>
#include <asm/bug.h>
+#include <asm/sections.h>
+#include <asm/sysreg.h>
+
+static inline u64 __pure read_tcr(void)
+{
+ u64 tcr;
+
+ // read_sysreg() uses asm volatile, so avoid it here
+ asm("mrs %0, tcr_el1" : "=r"(tcr));
+ return tcr;
+}
#if VA_BITS > 48
-extern u64 vabits_actual;
+// For reasons of #include hell, we can't use TCR_T1SZ_OFFSET/TCR_T1SZ_MASK here
+#define vabits_actual (64 - ((read_tcr() >> 16) & 63))
#else
#define vabits_actual ((u64)VA_BITS)
#endif
@@ -184,17 +238,26 @@ extern s64 memstart_addr;
/* PHYS_OFFSET - the physical address of the start of memory. */
#define PHYS_OFFSET ({ VM_BUG_ON(memstart_addr & 1); memstart_addr; })
-/* the virtual base of the kernel image */
-extern u64 kimage_vaddr;
-
/* the offset between the kernel virtual and physical mappings */
extern u64 kimage_voffset;
static inline unsigned long kaslr_offset(void)
{
- return kimage_vaddr - KIMAGE_VADDR;
+ return (u64)&_text - KIMAGE_VADDR;
}
+#ifdef CONFIG_RANDOMIZE_BASE
+void kaslr_init(void);
+static inline bool kaslr_enabled(void)
+{
+ extern bool __kaslr_is_enabled;
+ return __kaslr_is_enabled;
+}
+#else
+static inline void kaslr_init(void) { }
+static inline bool kaslr_enabled(void) { return false; }
+#endif
+
/*
* Allow all memory at the discovery stage. We will clip it later.
*/
@@ -242,9 +305,11 @@ static inline const void *__tag_set(const void *addr, u8 tag)
}
#ifdef CONFIG_KASAN_HW_TAGS
-#define arch_enable_tagging_sync() mte_enable_kernel_sync()
-#define arch_enable_tagging_async() mte_enable_kernel_async()
-#define arch_enable_tagging_asymm() mte_enable_kernel_asymm()
+#define arch_enable_tag_checks_sync() mte_enable_kernel_sync()
+#define arch_enable_tag_checks_async() mte_enable_kernel_async()
+#define arch_enable_tag_checks_asymm() mte_enable_kernel_asymm()
+#define arch_suppress_tag_checks_start() mte_enable_tco()
+#define arch_suppress_tag_checks_stop() mte_disable_tco()
#define arch_force_async_tag_fault() mte_check_tfsr_exit()
#define arch_get_random_tag() mte_get_random_tag()
#define arch_get_mem_tag(addr) mte_get_mem_tag(addr)
@@ -288,12 +353,6 @@ extern phys_addr_t __phys_addr_symbol(unsigned long x);
#define __phys_to_kimg(x) ((unsigned long)((x) + kimage_voffset))
/*
- * Convert a page to/from a physical address
- */
-#define page_to_phys(page) (__pfn_to_phys(page_to_pfn(page)))
-#define phys_to_page(phys) (pfn_to_page(__phys_to_pfn(phys)))
-
-/*
* Note: Drivers should NOT use these. They are the wrong
* translation for translating DMA addresses. Use the driver
* DMA support - see dma-mapping.h.
@@ -310,6 +369,14 @@ static inline void *phys_to_virt(phys_addr_t x)
return (void *)(__phys_to_virt(x));
}
+/* Needed already here for resolving __phys_to_pfn() in virt_to_pfn() */
+#include <asm-generic/memory_model.h>
+
+static inline unsigned long virt_to_pfn(const void *kaddr)
+{
+ return __phys_to_pfn(virt_to_phys(kaddr));
+}
+
/*
* Drivers should NOT use these either.
*/
@@ -318,7 +385,6 @@ static inline void *phys_to_virt(phys_addr_t x)
#define __pa_nodebug(x) __virt_to_phys_nodebug((unsigned long)(x))
#define __va(x) ((void *)__phys_to_virt((phys_addr_t)(x)))
#define pfn_to_kaddr(pfn) __va((pfn) << PAGE_SHIFT)
-#define virt_to_pfn(x) __phys_to_pfn(__virt_to_phys((unsigned long)(x)))
#define sym_to_pfn(x) __phys_to_pfn(__pa_symbol(x))
/*
@@ -355,11 +421,6 @@ static inline void *phys_to_virt(phys_addr_t x)
})
void dump_mem_limit(void);
-
-static inline bool defer_reserve_crashkernel(void)
-{
- return IS_ENABLED(CONFIG_ZONE_DMA) || IS_ENABLED(CONFIG_ZONE_DMA32);
-}
#endif /* !ASSEMBLY */
/*
@@ -373,6 +434,14 @@ static inline bool defer_reserve_crashkernel(void)
# define INIT_MEMBLOCK_RESERVED_REGIONS (INIT_MEMBLOCK_REGIONS + NR_CPUS + 1)
#endif
-#include <asm-generic/memory_model.h>
+/*
+ * memory regions which marked with flag MEMBLOCK_NOMAP(for example, the memory
+ * of the EFI_UNUSABLE_MEMORY type) may divide a continuous memory block into
+ * multiple parts. As a result, the number of memory regions is large.
+ */
+#ifdef CONFIG_EFI
+#define INIT_MEMBLOCK_MEMORY_REGIONS (INIT_MEMBLOCK_REGIONS * 8)
+#endif
+
#endif /* __ASM_MEMORY_H */
diff --git a/arch/arm64/include/asm/mman.h b/arch/arm64/include/asm/mman.h
index 5966ee4a6154..8770c7ee759f 100644
--- a/arch/arm64/include/asm/mman.h
+++ b/arch/arm64/include/asm/mman.h
@@ -2,14 +2,19 @@
#ifndef __ASM_MMAN_H__
#define __ASM_MMAN_H__
+#include <uapi/asm/mman.h>
+
+#ifndef BUILD_VDSO
#include <linux/compiler.h>
+#include <linux/fs.h>
+#include <linux/hugetlb.h>
+#include <linux/shmem_fs.h>
#include <linux/types.h>
-#include <uapi/asm/mman.h>
-static inline unsigned long arch_calc_vm_prot_bits(unsigned long prot,
- unsigned long pkey __always_unused)
+static inline vm_flags_t arch_calc_vm_prot_bits(unsigned long prot,
+ unsigned long pkey)
{
- unsigned long ret = 0;
+ vm_flags_t ret = 0;
if (system_supports_bti() && (prot & PROT_BTI))
ret |= VM_ARM64_BTI;
@@ -17,23 +22,36 @@ static inline unsigned long arch_calc_vm_prot_bits(unsigned long prot,
if (system_supports_mte() && (prot & PROT_MTE))
ret |= VM_MTE;
+#ifdef CONFIG_ARCH_HAS_PKEYS
+ if (system_supports_poe()) {
+ ret |= pkey & BIT(0) ? VM_PKEY_BIT0 : 0;
+ ret |= pkey & BIT(1) ? VM_PKEY_BIT1 : 0;
+ ret |= pkey & BIT(2) ? VM_PKEY_BIT2 : 0;
+ }
+#endif
+
return ret;
}
#define arch_calc_vm_prot_bits(prot, pkey) arch_calc_vm_prot_bits(prot, pkey)
-static inline unsigned long arch_calc_vm_flag_bits(unsigned long flags)
+static inline vm_flags_t arch_calc_vm_flag_bits(struct file *file,
+ unsigned long flags)
{
/*
* Only allow MTE on anonymous mappings as these are guaranteed to be
* backed by tags-capable memory. The vm_flags may be overridden by a
* filesystem supporting MTE (RAM-based).
*/
- if (system_supports_mte() && (flags & MAP_ANONYMOUS))
- return VM_MTE_ALLOWED;
+ if (system_supports_mte()) {
+ if (flags & (MAP_ANONYMOUS | MAP_HUGETLB))
+ return VM_MTE_ALLOWED;
+ if (shmem_file(file) || is_file_hugepages(file))
+ return VM_MTE_ALLOWED;
+ }
return 0;
}
-#define arch_calc_vm_flag_bits(flags) arch_calc_vm_flag_bits(flags)
+#define arch_calc_vm_flag_bits(file, flags) arch_calc_vm_flag_bits(file, flags)
static inline bool arch_validate_prot(unsigned long prot,
unsigned long addr __always_unused)
@@ -50,14 +68,31 @@ static inline bool arch_validate_prot(unsigned long prot,
}
#define arch_validate_prot(prot, addr) arch_validate_prot(prot, addr)
-static inline bool arch_validate_flags(unsigned long vm_flags)
+static inline bool arch_validate_flags(vm_flags_t vm_flags)
{
- if (!system_supports_mte())
- return true;
+ if (system_supports_mte()) {
+ /*
+ * only allow VM_MTE if VM_MTE_ALLOWED has been set
+ * previously
+ */
+ if ((vm_flags & VM_MTE) && !(vm_flags & VM_MTE_ALLOWED))
+ return false;
+ }
+
+ if (system_supports_gcs() && (vm_flags & VM_SHADOW_STACK)) {
+ /* An executable GCS isn't a good idea. */
+ if (vm_flags & VM_EXEC)
+ return false;
+
+ /* The memory management core should prevent this */
+ VM_WARN_ON(vm_flags & VM_SHARED);
+ }
+
+ return true;
- /* only allow VM_MTE if VM_MTE_ALLOWED has been set previously */
- return !(vm_flags & VM_MTE) || (vm_flags & VM_MTE_ALLOWED);
}
#define arch_validate_flags(vm_flags) arch_validate_flags(vm_flags)
+#endif /* !BUILD_VDSO */
+
#endif /* ! __ASM_MMAN_H__ */
diff --git a/arch/arm64/include/asm/mmu.h b/arch/arm64/include/asm/mmu.h
index 48f8466a4be9..49f1a810df16 100644
--- a/arch/arm64/include/asm/mmu.h
+++ b/arch/arm64/include/asm/mmu.h
@@ -17,6 +17,13 @@
#include <linux/refcount.h>
#include <asm/cpufeature.h>
+enum pgtable_type {
+ TABLE_PTE,
+ TABLE_PMD,
+ TABLE_PUD,
+ TABLE_P4D,
+};
+
typedef struct {
atomic64_t id;
#ifdef CONFIG_COMPAT
@@ -25,6 +32,7 @@ typedef struct {
refcount_t pinned;
void *vdso;
unsigned long flags;
+ u8 pkey_allocation_map;
} mm_context_t;
/*
@@ -57,23 +65,44 @@ typedef struct {
static inline bool arm64_kernel_unmapped_at_el0(void)
{
- return cpus_have_const_cap(ARM64_UNMAP_KERNEL_AT_EL0);
+ return alternative_has_cap_unlikely(ARM64_UNMAP_KERNEL_AT_EL0);
}
extern void arm64_memblock_init(void);
extern void paging_init(void);
extern void bootmem_init(void);
-extern void __iomem *early_io_map(phys_addr_t phys, unsigned long virt);
-extern void init_mem_pgprot(void);
+extern void create_mapping_noalloc(phys_addr_t phys, unsigned long virt,
+ phys_addr_t size, pgprot_t prot);
extern void create_pgd_mapping(struct mm_struct *mm, phys_addr_t phys,
unsigned long virt, phys_addr_t size,
pgprot_t prot, bool page_mappings_only);
extern void *fixmap_remap_fdt(phys_addr_t dt_phys, int *size, pgprot_t prot);
extern void mark_linear_text_alias_ro(void);
-extern bool kaslr_requires_kpti(void);
-#define INIT_MM_CONTEXT(name) \
- .pgd = init_pg_dir,
+/*
+ * This check is triggered during the early boot before the cpufeature
+ * is initialised. Checking the status on the local CPU allows the boot
+ * CPU to detect the need for non-global mappings and thus avoiding a
+ * pagetable re-write after all the CPUs are booted. This check will be
+ * anyway run on individual CPUs, allowing us to get the consistent
+ * state once the SMP CPUs are up and thus make the switch to non-global
+ * mappings if required.
+ */
+static inline bool kaslr_requires_kpti(void)
+{
+ /*
+ * E0PD does a similar job to KPTI so can be used instead
+ * where available.
+ */
+ if (IS_ENABLED(CONFIG_ARM64_E0PD)) {
+ u64 mmfr2 = read_sysreg_s(SYS_ID_AA64MMFR2_EL1);
+ if (cpuid_feature_extract_unsigned_field(mmfr2,
+ ID_AA64MMFR2_EL1_E0PD_SHIFT))
+ return false;
+ }
+
+ return true;
+}
#endif /* !__ASSEMBLY__ */
#endif
diff --git a/arch/arm64/include/asm/mmu_context.h b/arch/arm64/include/asm/mmu_context.h
index c7ccd82db1d2..0dbe3b29049b 100644
--- a/arch/arm64/include/asm/mmu_context.h
+++ b/arch/arm64/include/asm/mmu_context.h
@@ -15,11 +15,13 @@
#include <linux/sched/hotplug.h>
#include <linux/mm_types.h>
#include <linux/pgtable.h>
+#include <linux/pkeys.h>
#include <asm/cacheflush.h>
#include <asm/cpufeature.h>
+#include <asm/daifflags.h>
+#include <asm/gcs.h>
#include <asm/proc-fns.h>
-#include <asm-generic/mm_hooks.h>
#include <asm/cputype.h>
#include <asm/sysreg.h>
#include <asm/tlbflush.h>
@@ -38,11 +40,16 @@ static inline void contextidr_thread_switch(struct task_struct *next)
/*
* Set TTBR0 to reserved_pg_dir. No translations will be possible via TTBR0.
*/
-static inline void cpu_set_reserved_ttbr0(void)
+static inline void cpu_set_reserved_ttbr0_nosync(void)
{
unsigned long ttbr = phys_to_ttbr(__pa_symbol(reserved_pg_dir));
write_sysreg(ttbr, ttbr0_el1);
+}
+
+static inline void cpu_set_reserved_ttbr0(void)
+{
+ cpu_set_reserved_ttbr0_nosync();
isb();
}
@@ -51,16 +58,13 @@ void cpu_do_switch_mm(phys_addr_t pgd_phys, struct mm_struct *mm);
static inline void cpu_switch_mm(pgd_t *pgd, struct mm_struct *mm)
{
BUG_ON(pgd == swapper_pg_dir);
- cpu_set_reserved_ttbr0();
cpu_do_switch_mm(virt_to_phys(pgd),mm);
}
/*
- * TCR.T0SZ value to use when the ID map is active. Usually equals
- * TCR_T0SZ(VA_BITS), unless system RAM is positioned very high in
- * physical memory, in which case it will be smaller.
+ * TCR.T0SZ value to use when the ID map is active.
*/
-extern int idmap_t0sz;
+#define idmap_t0sz TCR_T0SZ(IDMAP_VA_BITS)
/*
* Ensure TCR.T0SZ is set to the provided value.
@@ -69,11 +73,11 @@ static inline void __cpu_set_tcr_t0sz(unsigned long t0sz)
{
unsigned long tcr = read_sysreg(tcr_el1);
- if ((tcr & TCR_T0SZ_MASK) >> TCR_T0SZ_OFFSET == t0sz)
+ if ((tcr & TCR_T0SZ_MASK) == t0sz)
return;
tcr &= ~TCR_T0SZ_MASK;
- tcr |= t0sz << TCR_T0SZ_OFFSET;
+ tcr |= t0sz;
write_sysreg(tcr, tcr_el1);
isb();
}
@@ -105,18 +109,13 @@ static inline void cpu_uninstall_idmap(void)
cpu_switch_mm(mm->pgd, mm);
}
-static inline void __cpu_install_idmap(pgd_t *idmap)
+static inline void cpu_install_idmap(void)
{
cpu_set_reserved_ttbr0();
local_flush_tlb_all();
cpu_set_idmap_tcr_t0sz();
- cpu_switch_mm(lm_alias(idmap), &init_mm);
-}
-
-static inline void cpu_install_idmap(void)
-{
- __cpu_install_idmap(idmap_pg_dir);
+ cpu_switch_mm(lm_alias(idmap_pg_dir), &init_mm);
}
/*
@@ -143,36 +142,21 @@ static inline void cpu_install_ttbr0(phys_addr_t ttbr0, unsigned long t0sz)
isb();
}
-/*
- * Atomically replaces the active TTBR1_EL1 PGD with a new VA-compatible PGD,
- * avoiding the possibility of conflicting TLB entries being allocated.
- */
-static inline void __nocfi cpu_replace_ttbr1(pgd_t *pgdp, pgd_t *idmap)
-{
- typedef void (ttbr_replace_func)(phys_addr_t);
- extern ttbr_replace_func idmap_cpu_replace_ttbr1;
- ttbr_replace_func *replace_phys;
-
- /* phys_to_ttbr() zeros lower 2 bits of ttbr with 52-bit PA */
- phys_addr_t ttbr1 = phys_to_ttbr(virt_to_phys(pgdp));
-
- if (system_supports_cnp() && !WARN_ON(pgdp != lm_alias(swapper_pg_dir))) {
- /*
- * cpu_replace_ttbr1() is used when there's a boot CPU
- * up (i.e. cpufeature framework is not up yet) and
- * latter only when we enable CNP via cpufeature's
- * enable() callback.
- * Also we rely on the cpu_hwcap bit being set before
- * calling the enable() function.
- */
- ttbr1 |= TTBR_CNP_BIT;
- }
+void __cpu_replace_ttbr1(pgd_t *pgdp, bool cnp);
- replace_phys = (void *)__pa_symbol(function_nocfi(idmap_cpu_replace_ttbr1));
+static inline void cpu_enable_swapper_cnp(void)
+{
+ __cpu_replace_ttbr1(lm_alias(swapper_pg_dir), true);
+}
- __cpu_install_idmap(idmap);
- replace_phys(ttbr1);
- cpu_uninstall_idmap();
+static inline void cpu_replace_ttbr1(pgd_t *pgdp)
+{
+ /*
+ * Only for early TTBR1 replacement before cpucaps are finalized and
+ * before we've decided whether to use CNP.
+ */
+ WARN_ON(system_capabilities_finalized());
+ __cpu_replace_ttbr1(pgdp, false);
}
/*
@@ -192,9 +176,36 @@ init_new_context(struct task_struct *tsk, struct mm_struct *mm)
{
atomic64_set(&mm->context.id, 0);
refcount_set(&mm->context.pinned, 0);
+
+ /* pkey 0 is the default, so always reserve it. */
+ mm->context.pkey_allocation_map = BIT(0);
+
return 0;
}
+static inline void arch_dup_pkeys(struct mm_struct *oldmm,
+ struct mm_struct *mm)
+{
+ /* Duplicate the oldmm pkey state in mm: */
+ mm->context.pkey_allocation_map = oldmm->context.pkey_allocation_map;
+}
+
+static inline int arch_dup_mmap(struct mm_struct *oldmm, struct mm_struct *mm)
+{
+ arch_dup_pkeys(oldmm, mm);
+
+ return 0;
+}
+
+static inline void arch_exit_mmap(struct mm_struct *mm)
+{
+}
+
+static inline void arch_unmap(struct mm_struct *mm,
+ unsigned long start, unsigned long end)
+{
+}
+
#ifdef CONFIG_ARM64_SW_TTBR0_PAN
static inline void update_saved_ttbr0(struct task_struct *tsk,
struct mm_struct *mm)
@@ -260,24 +271,63 @@ switch_mm(struct mm_struct *prev, struct mm_struct *next,
}
static inline const struct cpumask *
-task_cpu_possible_mask(struct task_struct *p)
+__task_cpu_possible_mask(struct task_struct *p, const struct cpumask *mask)
{
if (!static_branch_unlikely(&arm64_mismatched_32bit_el0))
- return cpu_possible_mask;
+ return mask;
if (!is_compat_thread(task_thread_info(p)))
- return cpu_possible_mask;
+ return mask;
return system_32bit_el0_cpumask();
}
+
+static inline const struct cpumask *
+task_cpu_possible_mask(struct task_struct *p)
+{
+ return __task_cpu_possible_mask(p, cpu_possible_mask);
+}
#define task_cpu_possible_mask task_cpu_possible_mask
+const struct cpumask *task_cpu_fallback_mask(struct task_struct *p);
+
void verify_cpu_asid_bits(void);
void post_ttbr_update_workaround(void);
unsigned long arm64_mm_context_get(struct mm_struct *mm);
void arm64_mm_context_put(struct mm_struct *mm);
+#define mm_untag_mask mm_untag_mask
+static inline unsigned long mm_untag_mask(struct mm_struct *mm)
+{
+ return -1UL >> 8;
+}
+
+/*
+ * Only enforce protection keys on the current process, because there is no
+ * user context to access POR_EL0 for another address space.
+ */
+static inline bool arch_vma_access_permitted(struct vm_area_struct *vma,
+ bool write, bool execute, bool foreign)
+{
+ if (!system_supports_poe())
+ return true;
+
+ /* allow access if the VMA is not one from this process */
+ if (foreign || vma_is_foreign(vma))
+ return true;
+
+ return por_el0_allows_pkey(vma_pkey(vma), write, execute);
+}
+
+#define deactivate_mm deactivate_mm
+static inline void deactivate_mm(struct task_struct *tsk,
+ struct mm_struct *mm)
+{
+ gcs_free(tsk);
+}
+
+
#include <asm-generic/mmu_context.h>
#endif /* !__ASSEMBLY__ */
diff --git a/arch/arm64/include/asm/mmzone.h b/arch/arm64/include/asm/mmzone.h
deleted file mode 100644
index fa17e01d9ab2..000000000000
--- a/arch/arm64/include/asm/mmzone.h
+++ /dev/null
@@ -1,13 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-#ifndef __ASM_MMZONE_H
-#define __ASM_MMZONE_H
-
-#ifdef CONFIG_NUMA
-
-#include <asm/numa.h>
-
-extern struct pglist_data *node_data[];
-#define NODE_DATA(nid) (node_data[(nid)])
-
-#endif /* CONFIG_NUMA */
-#endif /* __ASM_MMZONE_H */
diff --git a/arch/arm64/include/asm/module.h b/arch/arm64/include/asm/module.h
index 4e7fa2623896..79550b22ba19 100644
--- a/arch/arm64/include/asm/module.h
+++ b/arch/arm64/include/asm/module.h
@@ -7,7 +7,6 @@
#include <asm-generic/module.h>
-#ifdef CONFIG_ARM64_MODULE_PLTS
struct mod_plt_sec {
int plt_shndx;
int plt_num_entries;
@@ -21,7 +20,6 @@ struct mod_arch_specific {
/* for CONFIG_DYNAMIC_FTRACE */
struct plt_entry *ftrace_trampolines;
};
-#endif
u64 module_emit_plt_entry(struct module *mod, Elf64_Shdr *sechdrs,
void *loc, const Elf64_Rela *rela,
@@ -30,12 +28,6 @@ u64 module_emit_plt_entry(struct module *mod, Elf64_Shdr *sechdrs,
u64 module_emit_veneer_for_adrp(struct module *mod, Elf64_Shdr *sechdrs,
void *loc, u64 val);
-#ifdef CONFIG_RANDOMIZE_BASE
-extern u64 module_alloc_base;
-#else
-#define module_alloc_base ((u64)_etext - MODULES_VSIZE)
-#endif
-
struct plt_entry {
/*
* A program that conforms to the AArch64 Procedure Call Standard
@@ -52,17 +44,25 @@ struct plt_entry {
static inline bool is_forbidden_offset_for_adrp(void *place)
{
- return IS_ENABLED(CONFIG_ARM64_ERRATUM_843419) &&
- cpus_have_const_cap(ARM64_WORKAROUND_843419) &&
+ return cpus_have_final_cap(ARM64_WORKAROUND_843419) &&
((u64)place & 0xfff) >= 0xff8;
}
struct plt_entry get_plt_entry(u64 dst, void *pc);
-bool plt_entries_equal(const struct plt_entry *a, const struct plt_entry *b);
-static inline bool plt_entry_is_initialized(const struct plt_entry *e)
+static inline const Elf_Shdr *find_section(const Elf_Ehdr *hdr,
+ const Elf_Shdr *sechdrs,
+ const char *name)
{
- return e->adrp || e->add || e->br;
+ const Elf_Shdr *s, *se;
+ const char *secstrs = (void *)hdr + sechdrs[hdr->e_shstrndx].sh_offset;
+
+ for (s = sechdrs, se = sechdrs + hdr->e_shnum; s < se; s++) {
+ if (strcmp(name, secstrs + s->sh_name) == 0)
+ return s;
+ }
+
+ return NULL;
}
#endif /* __ASM_MODULE_H */
diff --git a/arch/arm64/include/asm/module.lds.h b/arch/arm64/include/asm/module.lds.h
index 094701ec5500..b9ae8349e35d 100644
--- a/arch/arm64/include/asm/module.lds.h
+++ b/arch/arm64/include/asm/module.lds.h
@@ -1,9 +1,7 @@
SECTIONS {
-#ifdef CONFIG_ARM64_MODULE_PLTS
.plt 0 : { BYTE(0) }
.init.plt 0 : { BYTE(0) }
.text.ftrace_trampoline 0 : { BYTE(0) }
-#endif
#ifdef CONFIG_KASAN_SW_TAGS
/*
@@ -17,4 +15,12 @@ SECTIONS {
*/
.text.hot : { *(.text.hot) }
#endif
+
+#ifdef CONFIG_UNWIND_TABLES
+ /*
+ * Currently, we only use unwind info at module load time, so we can
+ * put it into the .init allocation.
+ */
+ .init.eh_frame : { *(.eh_frame) }
+#endif
}
diff --git a/arch/arm64/include/asm/mshyperv.h b/arch/arm64/include/asm/mshyperv.h
index 20070a847304..b721d3134ab6 100644
--- a/arch/arm64/include/asm/mshyperv.h
+++ b/arch/arm64/include/asm/mshyperv.h
@@ -6,9 +6,8 @@
* the ARM64 architecture. See include/asm-generic/mshyperv.h for
* definitions are that architecture independent.
*
- * Definitions that are specified in the Hyper-V Top Level Functional
- * Spec (TLFS) should not go in this file, but should instead go in
- * hyperv-tlfs.h.
+ * Definitions that are derived from Hyper-V code or headers should not go in
+ * this file, but should instead go in the relevant files in include/hyperv.
*
* Copyright (C) 2021, Microsoft, Inc.
*
@@ -20,7 +19,7 @@
#include <linux/types.h>
#include <linux/arm-smccc.h>
-#include <asm/hyperv-tlfs.h>
+#include <hyperv/hvhdk.h>
/*
* Declare calls to get and set Hyper-V VP register values on ARM64, which
@@ -31,16 +30,29 @@ void hv_set_vpreg(u32 reg, u64 value);
u64 hv_get_vpreg(u32 reg);
void hv_get_vpreg_128(u32 reg, struct hv_get_vp_registers_output *result);
-static inline void hv_set_register(unsigned int reg, u64 value)
+static inline void hv_set_msr(unsigned int reg, u64 value)
{
hv_set_vpreg(reg, value);
}
-static inline u64 hv_get_register(unsigned int reg)
+static inline u64 hv_get_msr(unsigned int reg)
{
return hv_get_vpreg(reg);
}
+/*
+ * Nested is not supported on arm64
+ */
+static inline void hv_set_non_nested_msr(unsigned int reg, u64 value)
+{
+ hv_set_msr(reg, value);
+}
+
+static inline u64 hv_get_non_nested_msr(unsigned int reg)
+{
+ return hv_get_msr(reg);
+}
+
/* SMCCC hypercall parameters */
#define HV_SMCCC_FUNC_NUMBER 1
#define HV_FUNC_ID ARM_SMCCC_CALL_VAL( \
diff --git a/arch/arm64/include/asm/mte-kasan.h b/arch/arm64/include/asm/mte-kasan.h
index 9f79425fc65a..2e98028c1965 100644
--- a/arch/arm64/include/asm/mte-kasan.h
+++ b/arch/arm64/include/asm/mte-kasan.h
@@ -13,9 +13,74 @@
#include <linux/types.h>
+#ifdef CONFIG_KASAN_HW_TAGS
+
+/* Whether the MTE asynchronous mode is enabled. */
+DECLARE_STATIC_KEY_FALSE(mte_async_or_asymm_mode);
+
+static inline bool system_uses_mte_async_or_asymm_mode(void)
+{
+ return static_branch_unlikely(&mte_async_or_asymm_mode);
+}
+
+#else /* CONFIG_KASAN_HW_TAGS */
+
+static inline bool system_uses_mte_async_or_asymm_mode(void)
+{
+ return false;
+}
+
+#endif /* CONFIG_KASAN_HW_TAGS */
+
#ifdef CONFIG_ARM64_MTE
/*
+ * The Tag Check Flag (TCF) mode for MTE is per EL, hence TCF0
+ * affects EL0 and TCF affects EL1 irrespective of which TTBR is
+ * used.
+ * The kernel accesses TTBR0 usually with LDTR/STTR instructions
+ * when UAO is available, so these would act as EL0 accesses using
+ * TCF0.
+ * However futex.h code uses exclusives which would be executed as
+ * EL1, this can potentially cause a tag check fault even if the
+ * user disables TCF0.
+ *
+ * To address the problem we set the PSTATE.TCO bit in uaccess_enable()
+ * and reset it in uaccess_disable().
+ *
+ * The Tag check override (TCO) bit disables temporarily the tag checking
+ * preventing the issue.
+ */
+static inline void mte_disable_tco(void)
+{
+ asm volatile(ALTERNATIVE("nop", SET_PSTATE_TCO(0),
+ ARM64_MTE, CONFIG_KASAN_HW_TAGS));
+}
+
+static inline void mte_enable_tco(void)
+{
+ asm volatile(ALTERNATIVE("nop", SET_PSTATE_TCO(1),
+ ARM64_MTE, CONFIG_KASAN_HW_TAGS));
+}
+
+/*
+ * These functions disable tag checking only if in MTE async mode
+ * since the sync mode generates exceptions synchronously and the
+ * nofault or load_unaligned_zeropad can handle them.
+ */
+static inline void __mte_disable_tco_async(void)
+{
+ if (system_uses_mte_async_or_asymm_mode())
+ mte_disable_tco();
+}
+
+static inline void __mte_enable_tco_async(void)
+{
+ if (system_uses_mte_async_or_asymm_mode())
+ mte_enable_tco();
+}
+
+/*
* These functions are meant to be only used from KASAN runtime through
* the arch_*() interface defined in asm/memory.h.
* These functions don't include system_supports_mte() checks,
@@ -138,6 +203,22 @@ void mte_enable_kernel_asymm(void);
#else /* CONFIG_ARM64_MTE */
+static inline void mte_disable_tco(void)
+{
+}
+
+static inline void mte_enable_tco(void)
+{
+}
+
+static inline void __mte_disable_tco_async(void)
+{
+}
+
+static inline void __mte_enable_tco_async(void)
+{
+}
+
static inline u8 mte_get_ptr_tag(void *ptr)
{
return 0xFF;
diff --git a/arch/arm64/include/asm/mte.h b/arch/arm64/include/asm/mte.h
index aa523591a44e..6567df8ec8ca 100644
--- a/arch/arm64/include/asm/mte.h
+++ b/arch/arm64/include/asm/mte.h
@@ -25,7 +25,7 @@ unsigned long mte_copy_tags_to_user(void __user *to, void *from,
unsigned long n);
int mte_save_tags(struct page *page);
void mte_save_page_tags(const void *page_addr, void *tag_storage);
-bool mte_restore_tags(swp_entry_t entry, struct page *page);
+void mte_restore_tags(swp_entry_t entry, struct page *page);
void mte_restore_page_tags(void *page_addr, const void *tag_storage);
void mte_invalidate_tags(int type, pgoff_t offset);
void mte_invalidate_tags_area(int type);
@@ -36,13 +36,73 @@ void mte_free_tag_storage(char *storage);
/* track which pages have valid allocation tags */
#define PG_mte_tagged PG_arch_2
+/* simple lock to avoid multiple threads tagging the same page */
+#define PG_mte_lock PG_arch_3
+
+static inline void set_page_mte_tagged(struct page *page)
+{
+ VM_WARN_ON_ONCE(folio_test_hugetlb(page_folio(page)));
+
+ /*
+ * Ensure that the tags written prior to this function are visible
+ * before the page flags update.
+ */
+ smp_wmb();
+ set_bit(PG_mte_tagged, &page->flags);
+}
+
+static inline bool page_mte_tagged(struct page *page)
+{
+ bool ret = test_bit(PG_mte_tagged, &page->flags);
+
+ VM_WARN_ON_ONCE(folio_test_hugetlb(page_folio(page)));
+
+ /*
+ * If the page is tagged, ensure ordering with a likely subsequent
+ * read of the tags.
+ */
+ if (ret)
+ smp_rmb();
+ return ret;
+}
+
+/*
+ * Lock the page for tagging and return 'true' if the page can be tagged,
+ * 'false' if already tagged. PG_mte_tagged is never cleared and therefore the
+ * locking only happens once for page initialisation.
+ *
+ * The page MTE lock state:
+ *
+ * Locked: PG_mte_lock && !PG_mte_tagged
+ * Unlocked: !PG_mte_lock || PG_mte_tagged
+ *
+ * Acquire semantics only if the page is tagged (returning 'false').
+ */
+static inline bool try_page_mte_tagging(struct page *page)
+{
+ VM_WARN_ON_ONCE(folio_test_hugetlb(page_folio(page)));
+
+ if (!test_and_set_bit(PG_mte_lock, &page->flags))
+ return true;
+
+ /*
+ * The tags are either being initialised or may have been initialised
+ * already. Check if the PG_mte_tagged flag has been set or wait
+ * otherwise.
+ */
+ smp_cond_load_acquire(&page->flags, VAL & (1UL << PG_mte_tagged));
+
+ return false;
+}
void mte_zero_clear_page_tags(void *addr);
-void mte_sync_tags(pte_t old_pte, pte_t pte);
+void mte_sync_tags(pte_t pte, unsigned int nr_pages);
void mte_copy_page_tags(void *kto, const void *kfrom);
void mte_thread_init_user(void);
void mte_thread_switch(struct task_struct *next);
+void mte_cpu_setup(void);
void mte_suspend_enter(void);
+void mte_suspend_exit(void);
long set_mte_ctrl(struct task_struct *task, unsigned long arg);
long get_mte_ctrl(struct task_struct *task);
int mte_ptrace_copy_tags(struct task_struct *child, long request,
@@ -54,10 +114,21 @@ size_t mte_probe_user_range(const char __user *uaddr, size_t size);
/* unused if !CONFIG_ARM64_MTE, silence the compiler */
#define PG_mte_tagged 0
+static inline void set_page_mte_tagged(struct page *page)
+{
+}
+static inline bool page_mte_tagged(struct page *page)
+{
+ return false;
+}
+static inline bool try_page_mte_tagging(struct page *page)
+{
+ return false;
+}
static inline void mte_zero_clear_page_tags(void *addr)
{
}
-static inline void mte_sync_tags(pte_t old_pte, pte_t pte)
+static inline void mte_sync_tags(pte_t pte, unsigned int nr_pages)
{
}
static inline void mte_copy_page_tags(void *kto, const void *kfrom)
@@ -72,6 +143,9 @@ static inline void mte_thread_switch(struct task_struct *next)
static inline void mte_suspend_enter(void)
{
}
+static inline void mte_suspend_exit(void)
+{
+}
static inline long set_mte_ctrl(struct task_struct *task, unsigned long arg)
{
return 0;
@@ -89,6 +163,67 @@ static inline int mte_ptrace_copy_tags(struct task_struct *child,
#endif /* CONFIG_ARM64_MTE */
+#if defined(CONFIG_HUGETLB_PAGE) && defined(CONFIG_ARM64_MTE)
+static inline void folio_set_hugetlb_mte_tagged(struct folio *folio)
+{
+ VM_WARN_ON_ONCE(!folio_test_hugetlb(folio));
+
+ /*
+ * Ensure that the tags written prior to this function are visible
+ * before the folio flags update.
+ */
+ smp_wmb();
+ set_bit(PG_mte_tagged, &folio->flags);
+
+}
+
+static inline bool folio_test_hugetlb_mte_tagged(struct folio *folio)
+{
+ bool ret = test_bit(PG_mte_tagged, &folio->flags);
+
+ VM_WARN_ON_ONCE(!folio_test_hugetlb(folio));
+
+ /*
+ * If the folio is tagged, ensure ordering with a likely subsequent
+ * read of the tags.
+ */
+ if (ret)
+ smp_rmb();
+ return ret;
+}
+
+static inline bool folio_try_hugetlb_mte_tagging(struct folio *folio)
+{
+ VM_WARN_ON_ONCE(!folio_test_hugetlb(folio));
+
+ if (!test_and_set_bit(PG_mte_lock, &folio->flags))
+ return true;
+
+ /*
+ * The tags are either being initialised or may have been initialised
+ * already. Check if the PG_mte_tagged flag has been set or wait
+ * otherwise.
+ */
+ smp_cond_load_acquire(&folio->flags, VAL & (1UL << PG_mte_tagged));
+
+ return false;
+}
+#else
+static inline void folio_set_hugetlb_mte_tagged(struct folio *folio)
+{
+}
+
+static inline bool folio_test_hugetlb_mte_tagged(struct folio *folio)
+{
+ return false;
+}
+
+static inline bool folio_try_hugetlb_mte_tagging(struct folio *folio)
+{
+ return false;
+}
+#endif
+
static inline void mte_disable_tco_entry(struct task_struct *task)
{
if (!system_supports_mte())
@@ -110,19 +245,11 @@ static inline void mte_disable_tco_entry(struct task_struct *task)
}
#ifdef CONFIG_KASAN_HW_TAGS
-/* Whether the MTE asynchronous mode is enabled. */
-DECLARE_STATIC_KEY_FALSE(mte_async_or_asymm_mode);
-
-static inline bool system_uses_mte_async_or_asymm_mode(void)
-{
- return static_branch_unlikely(&mte_async_or_asymm_mode);
-}
-
void mte_check_tfsr_el1(void);
static inline void mte_check_tfsr_entry(void)
{
- if (!system_supports_mte())
+ if (!kasan_hw_tags_enabled())
return;
mte_check_tfsr_el1();
@@ -130,7 +257,7 @@ static inline void mte_check_tfsr_entry(void)
static inline void mte_check_tfsr_exit(void)
{
- if (!system_supports_mte())
+ if (!kasan_hw_tags_enabled())
return;
/*
@@ -144,10 +271,6 @@ static inline void mte_check_tfsr_exit(void)
mte_check_tfsr_el1();
}
#else
-static inline bool system_uses_mte_async_or_asymm_mode(void)
-{
- return false;
-}
static inline void mte_check_tfsr_el1(void)
{
}
diff --git a/arch/arm64/include/asm/page-def.h b/arch/arm64/include/asm/page-def.h
index 2403f7b4cdbf..d402e08442ee 100644
--- a/arch/arm64/include/asm/page-def.h
+++ b/arch/arm64/include/asm/page-def.h
@@ -10,9 +10,6 @@
#include <linux/const.h>
-/* PAGE_SHIFT determines the page size */
-#define PAGE_SHIFT CONFIG_ARM64_PAGE_SHIFT
-#define PAGE_SIZE (_AC(1, UL) << PAGE_SHIFT)
-#define PAGE_MASK (~(PAGE_SIZE-1))
+#include <vdso/page.h>
#endif /* __ASM_PAGE_DEF_H */
diff --git a/arch/arm64/include/asm/page.h b/arch/arm64/include/asm/page.h
index 993a27ea6f54..2312e6ee595f 100644
--- a/arch/arm64/include/asm/page.h
+++ b/arch/arm64/include/asm/page.h
@@ -29,9 +29,9 @@ void copy_user_highpage(struct page *to, struct page *from,
void copy_highpage(struct page *to, struct page *from);
#define __HAVE_ARCH_COPY_HIGHPAGE
-struct page *alloc_zeroed_user_highpage_movable(struct vm_area_struct *vma,
+struct folio *vma_alloc_zeroed_movable_folio(struct vm_area_struct *vma,
unsigned long vaddr);
-#define __HAVE_ARCH_ALLOC_ZEROED_USER_HIGHPAGE_MOVABLE
+#define vma_alloc_zeroed_movable_folio vma_alloc_zeroed_movable_folio
void tag_clear_highpage(struct page *to);
#define __HAVE_ARCH_TAG_CLEAR_HIGHPAGE
diff --git a/arch/arm64/include/asm/pci.h b/arch/arm64/include/asm/pci.h
index b33ca260e3c9..016eb6b46dc0 100644
--- a/arch/arm64/include/asm/pci.h
+++ b/arch/arm64/include/asm/pci.h
@@ -9,7 +9,6 @@
#include <asm/io.h>
#define PCIBIOS_MIN_IO 0x1000
-#define PCIBIOS_MIN_MEM 0
/*
* Set to 1 if the kernel should re-assign all PCI bus numbers
@@ -18,21 +17,8 @@
(pci_has_flag(PCI_REASSIGN_ALL_BUS))
#define arch_can_pci_mmap_wc() 1
-#define ARCH_GENERIC_PCI_MMAP_RESOURCE 1
-extern int isa_dma_bridge_buggy;
-
-#ifdef CONFIG_PCI
-static inline int pci_get_legacy_ide_irq(struct pci_dev *dev, int channel)
-{
- /* no legacy IRQ on arm64 */
- return -ENODEV;
-}
-
-static inline int pci_proc_domain(struct pci_bus *bus)
-{
- return 1;
-}
-#endif /* CONFIG_PCI */
+/* Generic PCI */
+#include <asm-generic/pci.h>
#endif /* __ASM_PCI_H */
diff --git a/arch/arm64/include/asm/percpu.h b/arch/arm64/include/asm/percpu.h
index b9ba19dbdb69..9abcc8ef3087 100644
--- a/arch/arm64/include/asm/percpu.h
+++ b/arch/arm64/include/asm/percpu.h
@@ -140,17 +140,11 @@ PERCPU_RET_OP(add, add, ldadd)
* re-enabling preemption for preemptible kernels, but doing that in a way
* which builds inside a module would mean messing directly with the preempt
* count. If you do this, peterz and tglx will hunt you down.
+ *
+ * Not to mention it'll break the actual preemption model for missing a
+ * preemption point when TIF_NEED_RESCHED gets set while preemption is
+ * disabled.
*/
-#define this_cpu_cmpxchg_double_8(ptr1, ptr2, o1, o2, n1, n2) \
-({ \
- int __ret; \
- preempt_disable_notrace(); \
- __ret = cmpxchg_double_local( raw_cpu_ptr(&(ptr1)), \
- raw_cpu_ptr(&(ptr2)), \
- o1, o2, n1, n2); \
- preempt_enable_notrace(); \
- __ret; \
-})
#define _pcp_protect(op, pcp, ...) \
({ \
@@ -240,6 +234,22 @@ PERCPU_RET_OP(add, add, ldadd)
#define this_cpu_cmpxchg_8(pcp, o, n) \
_pcp_protect_return(cmpxchg_relaxed, pcp, o, n)
+#define this_cpu_cmpxchg64(pcp, o, n) this_cpu_cmpxchg_8(pcp, o, n)
+
+#define this_cpu_cmpxchg128(pcp, o, n) \
+({ \
+ typedef typeof(pcp) pcp_op_T__; \
+ u128 old__, new__, ret__; \
+ pcp_op_T__ *ptr__; \
+ old__ = o; \
+ new__ = n; \
+ preempt_disable_notrace(); \
+ ptr__ = raw_cpu_ptr(&(pcp)); \
+ ret__ = cmpxchg128_local((void *)ptr__, old__, new__); \
+ preempt_enable_notrace(); \
+ ret__; \
+})
+
#ifdef __KVM_NVHE_HYPERVISOR__
extern unsigned long __hyp_per_cpu_offset(unsigned int cpu);
#define __per_cpu_offset
diff --git a/arch/arm64/include/asm/perf_event.h b/arch/arm64/include/asm/perf_event.h
index 3eaf462f5752..ee45b4e77347 100644
--- a/arch/arm64/include/asm/perf_event.h
+++ b/arch/arm64/include/asm/perf_event.h
@@ -9,260 +9,7 @@
#include <asm/stack_pointer.h>
#include <asm/ptrace.h>
-#define ARMV8_PMU_MAX_COUNTERS 32
-#define ARMV8_PMU_COUNTER_MASK (ARMV8_PMU_MAX_COUNTERS - 1)
-
-/*
- * Common architectural and microarchitectural event numbers.
- */
-#define ARMV8_PMUV3_PERFCTR_SW_INCR 0x0000
-#define ARMV8_PMUV3_PERFCTR_L1I_CACHE_REFILL 0x0001
-#define ARMV8_PMUV3_PERFCTR_L1I_TLB_REFILL 0x0002
-#define ARMV8_PMUV3_PERFCTR_L1D_CACHE_REFILL 0x0003
-#define ARMV8_PMUV3_PERFCTR_L1D_CACHE 0x0004
-#define ARMV8_PMUV3_PERFCTR_L1D_TLB_REFILL 0x0005
-#define ARMV8_PMUV3_PERFCTR_LD_RETIRED 0x0006
-#define ARMV8_PMUV3_PERFCTR_ST_RETIRED 0x0007
-#define ARMV8_PMUV3_PERFCTR_INST_RETIRED 0x0008
-#define ARMV8_PMUV3_PERFCTR_EXC_TAKEN 0x0009
-#define ARMV8_PMUV3_PERFCTR_EXC_RETURN 0x000A
-#define ARMV8_PMUV3_PERFCTR_CID_WRITE_RETIRED 0x000B
-#define ARMV8_PMUV3_PERFCTR_PC_WRITE_RETIRED 0x000C
-#define ARMV8_PMUV3_PERFCTR_BR_IMMED_RETIRED 0x000D
-#define ARMV8_PMUV3_PERFCTR_BR_RETURN_RETIRED 0x000E
-#define ARMV8_PMUV3_PERFCTR_UNALIGNED_LDST_RETIRED 0x000F
-#define ARMV8_PMUV3_PERFCTR_BR_MIS_PRED 0x0010
-#define ARMV8_PMUV3_PERFCTR_CPU_CYCLES 0x0011
-#define ARMV8_PMUV3_PERFCTR_BR_PRED 0x0012
-#define ARMV8_PMUV3_PERFCTR_MEM_ACCESS 0x0013
-#define ARMV8_PMUV3_PERFCTR_L1I_CACHE 0x0014
-#define ARMV8_PMUV3_PERFCTR_L1D_CACHE_WB 0x0015
-#define ARMV8_PMUV3_PERFCTR_L2D_CACHE 0x0016
-#define ARMV8_PMUV3_PERFCTR_L2D_CACHE_REFILL 0x0017
-#define ARMV8_PMUV3_PERFCTR_L2D_CACHE_WB 0x0018
-#define ARMV8_PMUV3_PERFCTR_BUS_ACCESS 0x0019
-#define ARMV8_PMUV3_PERFCTR_MEMORY_ERROR 0x001A
-#define ARMV8_PMUV3_PERFCTR_INST_SPEC 0x001B
-#define ARMV8_PMUV3_PERFCTR_TTBR_WRITE_RETIRED 0x001C
-#define ARMV8_PMUV3_PERFCTR_BUS_CYCLES 0x001D
-#define ARMV8_PMUV3_PERFCTR_CHAIN 0x001E
-#define ARMV8_PMUV3_PERFCTR_L1D_CACHE_ALLOCATE 0x001F
-#define ARMV8_PMUV3_PERFCTR_L2D_CACHE_ALLOCATE 0x0020
-#define ARMV8_PMUV3_PERFCTR_BR_RETIRED 0x0021
-#define ARMV8_PMUV3_PERFCTR_BR_MIS_PRED_RETIRED 0x0022
-#define ARMV8_PMUV3_PERFCTR_STALL_FRONTEND 0x0023
-#define ARMV8_PMUV3_PERFCTR_STALL_BACKEND 0x0024
-#define ARMV8_PMUV3_PERFCTR_L1D_TLB 0x0025
-#define ARMV8_PMUV3_PERFCTR_L1I_TLB 0x0026
-#define ARMV8_PMUV3_PERFCTR_L2I_CACHE 0x0027
-#define ARMV8_PMUV3_PERFCTR_L2I_CACHE_REFILL 0x0028
-#define ARMV8_PMUV3_PERFCTR_L3D_CACHE_ALLOCATE 0x0029
-#define ARMV8_PMUV3_PERFCTR_L3D_CACHE_REFILL 0x002A
-#define ARMV8_PMUV3_PERFCTR_L3D_CACHE 0x002B
-#define ARMV8_PMUV3_PERFCTR_L3D_CACHE_WB 0x002C
-#define ARMV8_PMUV3_PERFCTR_L2D_TLB_REFILL 0x002D
-#define ARMV8_PMUV3_PERFCTR_L2I_TLB_REFILL 0x002E
-#define ARMV8_PMUV3_PERFCTR_L2D_TLB 0x002F
-#define ARMV8_PMUV3_PERFCTR_L2I_TLB 0x0030
-#define ARMV8_PMUV3_PERFCTR_REMOTE_ACCESS 0x0031
-#define ARMV8_PMUV3_PERFCTR_LL_CACHE 0x0032
-#define ARMV8_PMUV3_PERFCTR_LL_CACHE_MISS 0x0033
-#define ARMV8_PMUV3_PERFCTR_DTLB_WALK 0x0034
-#define ARMV8_PMUV3_PERFCTR_ITLB_WALK 0x0035
-#define ARMV8_PMUV3_PERFCTR_LL_CACHE_RD 0x0036
-#define ARMV8_PMUV3_PERFCTR_LL_CACHE_MISS_RD 0x0037
-#define ARMV8_PMUV3_PERFCTR_REMOTE_ACCESS_RD 0x0038
-#define ARMV8_PMUV3_PERFCTR_L1D_CACHE_LMISS_RD 0x0039
-#define ARMV8_PMUV3_PERFCTR_OP_RETIRED 0x003A
-#define ARMV8_PMUV3_PERFCTR_OP_SPEC 0x003B
-#define ARMV8_PMUV3_PERFCTR_STALL 0x003C
-#define ARMV8_PMUV3_PERFCTR_STALL_SLOT_BACKEND 0x003D
-#define ARMV8_PMUV3_PERFCTR_STALL_SLOT_FRONTEND 0x003E
-#define ARMV8_PMUV3_PERFCTR_STALL_SLOT 0x003F
-
-/* Statistical profiling extension microarchitectural events */
-#define ARMV8_SPE_PERFCTR_SAMPLE_POP 0x4000
-#define ARMV8_SPE_PERFCTR_SAMPLE_FEED 0x4001
-#define ARMV8_SPE_PERFCTR_SAMPLE_FILTRATE 0x4002
-#define ARMV8_SPE_PERFCTR_SAMPLE_COLLISION 0x4003
-
-/* AMUv1 architecture events */
-#define ARMV8_AMU_PERFCTR_CNT_CYCLES 0x4004
-#define ARMV8_AMU_PERFCTR_STALL_BACKEND_MEM 0x4005
-
-/* long-latency read miss events */
-#define ARMV8_PMUV3_PERFCTR_L1I_CACHE_LMISS 0x4006
-#define ARMV8_PMUV3_PERFCTR_L2D_CACHE_LMISS_RD 0x4009
-#define ARMV8_PMUV3_PERFCTR_L2I_CACHE_LMISS 0x400A
-#define ARMV8_PMUV3_PERFCTR_L3D_CACHE_LMISS_RD 0x400B
-
-/* Trace buffer events */
-#define ARMV8_PMUV3_PERFCTR_TRB_WRAP 0x400C
-#define ARMV8_PMUV3_PERFCTR_TRB_TRIG 0x400E
-
-/* Trace unit events */
-#define ARMV8_PMUV3_PERFCTR_TRCEXTOUT0 0x4010
-#define ARMV8_PMUV3_PERFCTR_TRCEXTOUT1 0x4011
-#define ARMV8_PMUV3_PERFCTR_TRCEXTOUT2 0x4012
-#define ARMV8_PMUV3_PERFCTR_TRCEXTOUT3 0x4013
-#define ARMV8_PMUV3_PERFCTR_CTI_TRIGOUT4 0x4018
-#define ARMV8_PMUV3_PERFCTR_CTI_TRIGOUT5 0x4019
-#define ARMV8_PMUV3_PERFCTR_CTI_TRIGOUT6 0x401A
-#define ARMV8_PMUV3_PERFCTR_CTI_TRIGOUT7 0x401B
-
-/* additional latency from alignment events */
-#define ARMV8_PMUV3_PERFCTR_LDST_ALIGN_LAT 0x4020
-#define ARMV8_PMUV3_PERFCTR_LD_ALIGN_LAT 0x4021
-#define ARMV8_PMUV3_PERFCTR_ST_ALIGN_LAT 0x4022
-
-/* Armv8.5 Memory Tagging Extension events */
-#define ARMV8_MTE_PERFCTR_MEM_ACCESS_CHECKED 0x4024
-#define ARMV8_MTE_PERFCTR_MEM_ACCESS_CHECKED_RD 0x4025
-#define ARMV8_MTE_PERFCTR_MEM_ACCESS_CHECKED_WR 0x4026
-
-/* ARMv8 recommended implementation defined event types */
-#define ARMV8_IMPDEF_PERFCTR_L1D_CACHE_RD 0x0040
-#define ARMV8_IMPDEF_PERFCTR_L1D_CACHE_WR 0x0041
-#define ARMV8_IMPDEF_PERFCTR_L1D_CACHE_REFILL_RD 0x0042
-#define ARMV8_IMPDEF_PERFCTR_L1D_CACHE_REFILL_WR 0x0043
-#define ARMV8_IMPDEF_PERFCTR_L1D_CACHE_REFILL_INNER 0x0044
-#define ARMV8_IMPDEF_PERFCTR_L1D_CACHE_REFILL_OUTER 0x0045
-#define ARMV8_IMPDEF_PERFCTR_L1D_CACHE_WB_VICTIM 0x0046
-#define ARMV8_IMPDEF_PERFCTR_L1D_CACHE_WB_CLEAN 0x0047
-#define ARMV8_IMPDEF_PERFCTR_L1D_CACHE_INVAL 0x0048
-
-#define ARMV8_IMPDEF_PERFCTR_L1D_TLB_REFILL_RD 0x004C
-#define ARMV8_IMPDEF_PERFCTR_L1D_TLB_REFILL_WR 0x004D
-#define ARMV8_IMPDEF_PERFCTR_L1D_TLB_RD 0x004E
-#define ARMV8_IMPDEF_PERFCTR_L1D_TLB_WR 0x004F
-#define ARMV8_IMPDEF_PERFCTR_L2D_CACHE_RD 0x0050
-#define ARMV8_IMPDEF_PERFCTR_L2D_CACHE_WR 0x0051
-#define ARMV8_IMPDEF_PERFCTR_L2D_CACHE_REFILL_RD 0x0052
-#define ARMV8_IMPDEF_PERFCTR_L2D_CACHE_REFILL_WR 0x0053
-
-#define ARMV8_IMPDEF_PERFCTR_L2D_CACHE_WB_VICTIM 0x0056
-#define ARMV8_IMPDEF_PERFCTR_L2D_CACHE_WB_CLEAN 0x0057
-#define ARMV8_IMPDEF_PERFCTR_L2D_CACHE_INVAL 0x0058
-
-#define ARMV8_IMPDEF_PERFCTR_L2D_TLB_REFILL_RD 0x005C
-#define ARMV8_IMPDEF_PERFCTR_L2D_TLB_REFILL_WR 0x005D
-#define ARMV8_IMPDEF_PERFCTR_L2D_TLB_RD 0x005E
-#define ARMV8_IMPDEF_PERFCTR_L2D_TLB_WR 0x005F
-#define ARMV8_IMPDEF_PERFCTR_BUS_ACCESS_RD 0x0060
-#define ARMV8_IMPDEF_PERFCTR_BUS_ACCESS_WR 0x0061
-#define ARMV8_IMPDEF_PERFCTR_BUS_ACCESS_SHARED 0x0062
-#define ARMV8_IMPDEF_PERFCTR_BUS_ACCESS_NOT_SHARED 0x0063
-#define ARMV8_IMPDEF_PERFCTR_BUS_ACCESS_NORMAL 0x0064
-#define ARMV8_IMPDEF_PERFCTR_BUS_ACCESS_PERIPH 0x0065
-#define ARMV8_IMPDEF_PERFCTR_MEM_ACCESS_RD 0x0066
-#define ARMV8_IMPDEF_PERFCTR_MEM_ACCESS_WR 0x0067
-#define ARMV8_IMPDEF_PERFCTR_UNALIGNED_LD_SPEC 0x0068
-#define ARMV8_IMPDEF_PERFCTR_UNALIGNED_ST_SPEC 0x0069
-#define ARMV8_IMPDEF_PERFCTR_UNALIGNED_LDST_SPEC 0x006A
-
-#define ARMV8_IMPDEF_PERFCTR_LDREX_SPEC 0x006C
-#define ARMV8_IMPDEF_PERFCTR_STREX_PASS_SPEC 0x006D
-#define ARMV8_IMPDEF_PERFCTR_STREX_FAIL_SPEC 0x006E
-#define ARMV8_IMPDEF_PERFCTR_STREX_SPEC 0x006F
-#define ARMV8_IMPDEF_PERFCTR_LD_SPEC 0x0070
-#define ARMV8_IMPDEF_PERFCTR_ST_SPEC 0x0071
-#define ARMV8_IMPDEF_PERFCTR_LDST_SPEC 0x0072
-#define ARMV8_IMPDEF_PERFCTR_DP_SPEC 0x0073
-#define ARMV8_IMPDEF_PERFCTR_ASE_SPEC 0x0074
-#define ARMV8_IMPDEF_PERFCTR_VFP_SPEC 0x0075
-#define ARMV8_IMPDEF_PERFCTR_PC_WRITE_SPEC 0x0076
-#define ARMV8_IMPDEF_PERFCTR_CRYPTO_SPEC 0x0077
-#define ARMV8_IMPDEF_PERFCTR_BR_IMMED_SPEC 0x0078
-#define ARMV8_IMPDEF_PERFCTR_BR_RETURN_SPEC 0x0079
-#define ARMV8_IMPDEF_PERFCTR_BR_INDIRECT_SPEC 0x007A
-
-#define ARMV8_IMPDEF_PERFCTR_ISB_SPEC 0x007C
-#define ARMV8_IMPDEF_PERFCTR_DSB_SPEC 0x007D
-#define ARMV8_IMPDEF_PERFCTR_DMB_SPEC 0x007E
-
-#define ARMV8_IMPDEF_PERFCTR_EXC_UNDEF 0x0081
-#define ARMV8_IMPDEF_PERFCTR_EXC_SVC 0x0082
-#define ARMV8_IMPDEF_PERFCTR_EXC_PABORT 0x0083
-#define ARMV8_IMPDEF_PERFCTR_EXC_DABORT 0x0084
-
-#define ARMV8_IMPDEF_PERFCTR_EXC_IRQ 0x0086
-#define ARMV8_IMPDEF_PERFCTR_EXC_FIQ 0x0087
-#define ARMV8_IMPDEF_PERFCTR_EXC_SMC 0x0088
-
-#define ARMV8_IMPDEF_PERFCTR_EXC_HVC 0x008A
-#define ARMV8_IMPDEF_PERFCTR_EXC_TRAP_PABORT 0x008B
-#define ARMV8_IMPDEF_PERFCTR_EXC_TRAP_DABORT 0x008C
-#define ARMV8_IMPDEF_PERFCTR_EXC_TRAP_OTHER 0x008D
-#define ARMV8_IMPDEF_PERFCTR_EXC_TRAP_IRQ 0x008E
-#define ARMV8_IMPDEF_PERFCTR_EXC_TRAP_FIQ 0x008F
-#define ARMV8_IMPDEF_PERFCTR_RC_LD_SPEC 0x0090
-#define ARMV8_IMPDEF_PERFCTR_RC_ST_SPEC 0x0091
-
-#define ARMV8_IMPDEF_PERFCTR_L3D_CACHE_RD 0x00A0
-#define ARMV8_IMPDEF_PERFCTR_L3D_CACHE_WR 0x00A1
-#define ARMV8_IMPDEF_PERFCTR_L3D_CACHE_REFILL_RD 0x00A2
-#define ARMV8_IMPDEF_PERFCTR_L3D_CACHE_REFILL_WR 0x00A3
-
-#define ARMV8_IMPDEF_PERFCTR_L3D_CACHE_WB_VICTIM 0x00A6
-#define ARMV8_IMPDEF_PERFCTR_L3D_CACHE_WB_CLEAN 0x00A7
-#define ARMV8_IMPDEF_PERFCTR_L3D_CACHE_INVAL 0x00A8
-
-/*
- * Per-CPU PMCR: config reg
- */
-#define ARMV8_PMU_PMCR_E (1 << 0) /* Enable all counters */
-#define ARMV8_PMU_PMCR_P (1 << 1) /* Reset all counters */
-#define ARMV8_PMU_PMCR_C (1 << 2) /* Cycle counter reset */
-#define ARMV8_PMU_PMCR_D (1 << 3) /* CCNT counts every 64th cpu cycle */
-#define ARMV8_PMU_PMCR_X (1 << 4) /* Export to ETM */
-#define ARMV8_PMU_PMCR_DP (1 << 5) /* Disable CCNT if non-invasive debug*/
-#define ARMV8_PMU_PMCR_LC (1 << 6) /* Overflow on 64 bit cycle counter */
-#define ARMV8_PMU_PMCR_LP (1 << 7) /* Long event counter enable */
-#define ARMV8_PMU_PMCR_N_SHIFT 11 /* Number of counters supported */
-#define ARMV8_PMU_PMCR_N_MASK 0x1f
-#define ARMV8_PMU_PMCR_MASK 0xff /* Mask for writable bits */
-
-/*
- * PMOVSR: counters overflow flag status reg
- */
-#define ARMV8_PMU_OVSR_MASK 0xffffffff /* Mask for writable bits */
-#define ARMV8_PMU_OVERFLOWED_MASK ARMV8_PMU_OVSR_MASK
-
-/*
- * PMXEVTYPER: Event selection reg
- */
-#define ARMV8_PMU_EVTYPE_MASK 0xc800ffff /* Mask for writable bits */
-#define ARMV8_PMU_EVTYPE_EVENT 0xffff /* Mask for EVENT bits */
-
-/*
- * Event filters for PMUv3
- */
-#define ARMV8_PMU_EXCLUDE_EL1 (1U << 31)
-#define ARMV8_PMU_EXCLUDE_EL0 (1U << 30)
-#define ARMV8_PMU_INCLUDE_EL2 (1U << 27)
-
-/*
- * PMUSERENR: user enable reg
- */
-#define ARMV8_PMU_USERENR_MASK 0xf /* Mask for writable bits */
-#define ARMV8_PMU_USERENR_EN (1 << 0) /* PMU regs can be accessed at EL0 */
-#define ARMV8_PMU_USERENR_SW (1 << 1) /* PMSWINC can be written at EL0 */
-#define ARMV8_PMU_USERENR_CR (1 << 2) /* Cycle counter can be read at EL0 */
-#define ARMV8_PMU_USERENR_ER (1 << 3) /* Event counter can be read at EL0 */
-
-/* PMMIR_EL1.SLOTS mask */
-#define ARMV8_PMU_SLOTS_MASK 0xff
-
-#define ARMV8_PMU_BUS_SLOTS_SHIFT 8
-#define ARMV8_PMU_BUS_SLOTS_MASK 0xff
-#define ARMV8_PMU_BUS_WIDTH_SHIFT 16
-#define ARMV8_PMU_BUS_WIDTH_MASK 0xf
-
#ifdef CONFIG_PERF_EVENTS
-struct pt_regs;
-extern unsigned long perf_instruction_pointer(struct pt_regs *regs);
-extern unsigned long perf_misc_flags(struct pt_regs *regs);
-#define perf_misc_flags(regs) perf_misc_flags(regs)
#define perf_arch_bpf_user_pt_regs(regs) &regs->user_regs
#endif
diff --git a/arch/arm64/include/asm/pgalloc.h b/arch/arm64/include/asm/pgalloc.h
index 237224484d0f..1b4509d3382c 100644
--- a/arch/arm64/include/asm/pgalloc.h
+++ b/arch/arm64/include/asm/pgalloc.h
@@ -14,6 +14,7 @@
#include <asm/tlbflush.h>
#define __HAVE_ARCH_PGD_FREE
+#define __HAVE_ARCH_PUD_FREE
#include <asm-generic/pgalloc.h>
#define PGD_SIZE (PTRS_PER_PGD * sizeof(pgd_t))
@@ -27,7 +28,7 @@ static inline void __pud_populate(pud_t *pudp, phys_addr_t pmdp, pudval_t prot)
static inline void pud_populate(struct mm_struct *mm, pud_t *pudp, pmd_t *pmdp)
{
- pudval_t pudval = PUD_TYPE_TABLE;
+ pudval_t pudval = PUD_TYPE_TABLE | PUD_TABLE_AF;
pudval |= (mm == &init_mm) ? PUD_TABLE_UXN : PUD_TABLE_PXN;
__pud_populate(pudp, __pa(pmdp), pudval);
@@ -43,16 +44,24 @@ static inline void __pud_populate(pud_t *pudp, phys_addr_t pmdp, pudval_t prot)
static inline void __p4d_populate(p4d_t *p4dp, phys_addr_t pudp, p4dval_t prot)
{
- set_p4d(p4dp, __p4d(__phys_to_p4d_val(pudp) | prot));
+ if (pgtable_l4_enabled())
+ set_p4d(p4dp, __p4d(__phys_to_p4d_val(pudp) | prot));
}
static inline void p4d_populate(struct mm_struct *mm, p4d_t *p4dp, pud_t *pudp)
{
- p4dval_t p4dval = P4D_TYPE_TABLE;
+ p4dval_t p4dval = P4D_TYPE_TABLE | P4D_TABLE_AF;
p4dval |= (mm == &init_mm) ? P4D_TABLE_UXN : P4D_TABLE_PXN;
__p4d_populate(p4dp, __pa(pudp), p4dval);
}
+
+static inline void pud_free(struct mm_struct *mm, pud_t *pud)
+{
+ if (!pgtable_l4_enabled())
+ return;
+ __pud_free(mm, pud);
+}
#else
static inline void __p4d_populate(p4d_t *p4dp, phys_addr_t pudp, p4dval_t prot)
{
@@ -60,6 +69,29 @@ static inline void __p4d_populate(p4d_t *p4dp, phys_addr_t pudp, p4dval_t prot)
}
#endif /* CONFIG_PGTABLE_LEVELS > 3 */
+#if CONFIG_PGTABLE_LEVELS > 4
+
+static inline void __pgd_populate(pgd_t *pgdp, phys_addr_t p4dp, pgdval_t prot)
+{
+ if (pgtable_l5_enabled())
+ set_pgd(pgdp, __pgd(__phys_to_pgd_val(p4dp) | prot));
+}
+
+static inline void pgd_populate(struct mm_struct *mm, pgd_t *pgdp, p4d_t *p4dp)
+{
+ pgdval_t pgdval = PGD_TYPE_TABLE | PGD_TABLE_AF;
+
+ pgdval |= (mm == &init_mm) ? PGD_TABLE_UXN : PGD_TABLE_PXN;
+ __pgd_populate(pgdp, __pa(p4dp), pgdval);
+}
+
+#else
+static inline void __pgd_populate(pgd_t *pgdp, phys_addr_t p4dp, pgdval_t prot)
+{
+ BUILD_BUG();
+}
+#endif /* CONFIG_PGTABLE_LEVELS > 4 */
+
extern pgd_t *pgd_alloc(struct mm_struct *mm);
extern void pgd_free(struct mm_struct *mm, pgd_t *pgdp);
@@ -77,14 +109,16 @@ static inline void
pmd_populate_kernel(struct mm_struct *mm, pmd_t *pmdp, pte_t *ptep)
{
VM_BUG_ON(mm && mm != &init_mm);
- __pmd_populate(pmdp, __pa(ptep), PMD_TYPE_TABLE | PMD_TABLE_UXN);
+ __pmd_populate(pmdp, __pa(ptep),
+ PMD_TYPE_TABLE | PMD_TABLE_AF | PMD_TABLE_UXN);
}
static inline void
pmd_populate(struct mm_struct *mm, pmd_t *pmdp, pgtable_t ptep)
{
VM_BUG_ON(mm == &init_mm);
- __pmd_populate(pmdp, page_to_phys(ptep), PMD_TYPE_TABLE | PMD_TABLE_PXN);
+ __pmd_populate(pmdp, page_to_phys(ptep),
+ PMD_TYPE_TABLE | PMD_TABLE_AF | PMD_TABLE_PXN);
}
#endif
diff --git a/arch/arm64/include/asm/pgtable-hwdef.h b/arch/arm64/include/asm/pgtable-hwdef.h
index 5ab8d163198f..f3b77deedfa2 100644
--- a/arch/arm64/include/asm/pgtable-hwdef.h
+++ b/arch/arm64/include/asm/pgtable-hwdef.h
@@ -7,40 +7,46 @@
#include <asm/memory.h>
+#define PTDESC_ORDER 3
+
+/* Number of VA bits resolved by a single translation table level */
+#define PTDESC_TABLE_SHIFT (PAGE_SHIFT - PTDESC_ORDER)
+
/*
* Number of page-table levels required to address 'va_bits' wide
* address, without section mapping. We resolve the top (va_bits - PAGE_SHIFT)
- * bits with (PAGE_SHIFT - 3) bits at each page table level. Hence:
+ * bits with PTDESC_TABLE_SHIFT bits at each page table level. Hence:
*
- * levels = DIV_ROUND_UP((va_bits - PAGE_SHIFT), (PAGE_SHIFT - 3))
+ * levels = DIV_ROUND_UP((va_bits - PAGE_SHIFT), PTDESC_TABLE_SHIFT)
*
* where DIV_ROUND_UP(n, d) => (((n) + (d) - 1) / (d))
*
* We cannot include linux/kernel.h which defines DIV_ROUND_UP here
* due to build issues. So we open code DIV_ROUND_UP here:
*
- * ((((va_bits) - PAGE_SHIFT) + (PAGE_SHIFT - 3) - 1) / (PAGE_SHIFT - 3))
+ * ((((va_bits) - PAGE_SHIFT) + PTDESC_TABLE_SHIFT - 1) / PTDESC_TABLE_SHIFT)
*
* which gets simplified as :
*/
-#define ARM64_HW_PGTABLE_LEVELS(va_bits) (((va_bits) - 4) / (PAGE_SHIFT - 3))
+#define ARM64_HW_PGTABLE_LEVELS(va_bits) \
+ (((va_bits) - PTDESC_ORDER - 1) / PTDESC_TABLE_SHIFT)
/*
- * Size mapped by an entry at level n ( 0 <= n <= 3)
- * We map (PAGE_SHIFT - 3) at all translation levels and PAGE_SHIFT bits
+ * Size mapped by an entry at level n ( -1 <= n <= 3)
+ * We map PTDESC_TABLE_SHIFT at all translation levels and PAGE_SHIFT bits
* in the final page. The maximum number of translation levels supported by
- * the architecture is 4. Hence, starting at level n, we have further
+ * the architecture is 5. Hence, starting at level n, we have further
* ((4 - n) - 1) levels of translation excluding the offset within the page.
* So, the total number of bits mapped by an entry at level n is :
*
- * ((4 - n) - 1) * (PAGE_SHIFT - 3) + PAGE_SHIFT
+ * ((4 - n) - 1) * PTDESC_TABLE_SHIFT + PAGE_SHIFT
*
* Rearranging it a bit we get :
- * (4 - n) * (PAGE_SHIFT - 3) + 3
+ * (4 - n) * PTDESC_TABLE_SHIFT + PTDESC_ORDER
*/
-#define ARM64_HW_PGTABLE_LEVEL_SHIFT(n) ((PAGE_SHIFT - 3) * (4 - (n)) + 3)
+#define ARM64_HW_PGTABLE_LEVEL_SHIFT(n) (PTDESC_TABLE_SHIFT * (4 - (n)) + PTDESC_ORDER)
-#define PTRS_PER_PTE (1 << (PAGE_SHIFT - 3))
+#define PTRS_PER_PTE (1 << PTDESC_TABLE_SHIFT)
/*
* PMD_SHIFT determines the size a level 2 page table entry can map.
@@ -49,7 +55,7 @@
#define PMD_SHIFT ARM64_HW_PGTABLE_LEVEL_SHIFT(2)
#define PMD_SIZE (_AC(1, UL) << PMD_SHIFT)
#define PMD_MASK (~(PMD_SIZE-1))
-#define PTRS_PER_PMD (1 << (PAGE_SHIFT - 3))
+#define PTRS_PER_PMD (1 << PTDESC_TABLE_SHIFT)
#endif
/*
@@ -59,12 +65,19 @@
#define PUD_SHIFT ARM64_HW_PGTABLE_LEVEL_SHIFT(1)
#define PUD_SIZE (_AC(1, UL) << PUD_SHIFT)
#define PUD_MASK (~(PUD_SIZE-1))
-#define PTRS_PER_PUD (1 << (PAGE_SHIFT - 3))
+#define PTRS_PER_PUD (1 << PTDESC_TABLE_SHIFT)
+#endif
+
+#if CONFIG_PGTABLE_LEVELS > 4
+#define P4D_SHIFT ARM64_HW_PGTABLE_LEVEL_SHIFT(0)
+#define P4D_SIZE (_AC(1, UL) << P4D_SHIFT)
+#define P4D_MASK (~(P4D_SIZE-1))
+#define PTRS_PER_P4D (1 << PTDESC_TABLE_SHIFT)
#endif
/*
* PGDIR_SHIFT determines the size a top-level page table entry can map
- * (depending on the configuration, this level can be 0, 1 or 2).
+ * (depending on the configuration, this level can be -1, 0, 1 or 2).
*/
#define PGDIR_SHIFT ARM64_HW_PGTABLE_LEVEL_SHIFT(4 - CONFIG_PGTABLE_LEVELS)
#define PGDIR_SIZE (_AC(1, UL) << PGDIR_SHIFT)
@@ -87,13 +100,22 @@
/*
* Hardware page table definitions.
*
+ * Level -1 descriptor (PGD).
+ */
+#define PGD_TYPE_TABLE (_AT(pgdval_t, 3) << 0)
+#define PGD_TYPE_MASK (_AT(pgdval_t, 3) << 0)
+#define PGD_TABLE_AF (_AT(pgdval_t, 1) << 10) /* Ignored if no FEAT_HAFT */
+#define PGD_TABLE_PXN (_AT(pgdval_t, 1) << 59)
+#define PGD_TABLE_UXN (_AT(pgdval_t, 1) << 60)
+
+/*
* Level 0 descriptor (P4D).
*/
#define P4D_TYPE_TABLE (_AT(p4dval_t, 3) << 0)
-#define P4D_TABLE_BIT (_AT(p4dval_t, 1) << 1)
#define P4D_TYPE_MASK (_AT(p4dval_t, 3) << 0)
#define P4D_TYPE_SECT (_AT(p4dval_t, 1) << 0)
#define P4D_SECT_RDONLY (_AT(p4dval_t, 1) << 7) /* AP[2] */
+#define P4D_TABLE_AF (_AT(p4dval_t, 1) << 10) /* Ignored if no FEAT_HAFT */
#define P4D_TABLE_PXN (_AT(p4dval_t, 1) << 59)
#define P4D_TABLE_UXN (_AT(p4dval_t, 1) << 60)
@@ -101,10 +123,10 @@
* Level 1 descriptor (PUD).
*/
#define PUD_TYPE_TABLE (_AT(pudval_t, 3) << 0)
-#define PUD_TABLE_BIT (_AT(pudval_t, 1) << 1)
#define PUD_TYPE_MASK (_AT(pudval_t, 3) << 0)
#define PUD_TYPE_SECT (_AT(pudval_t, 1) << 0)
#define PUD_SECT_RDONLY (_AT(pudval_t, 1) << 7) /* AP[2] */
+#define PUD_TABLE_AF (_AT(pudval_t, 1) << 10) /* Ignored if no FEAT_HAFT */
#define PUD_TABLE_PXN (_AT(pudval_t, 1) << 59)
#define PUD_TABLE_UXN (_AT(pudval_t, 1) << 60)
@@ -114,12 +136,11 @@
#define PMD_TYPE_MASK (_AT(pmdval_t, 3) << 0)
#define PMD_TYPE_TABLE (_AT(pmdval_t, 3) << 0)
#define PMD_TYPE_SECT (_AT(pmdval_t, 1) << 0)
-#define PMD_TABLE_BIT (_AT(pmdval_t, 1) << 1)
+#define PMD_TABLE_AF (_AT(pmdval_t, 1) << 10) /* Ignored if no FEAT_HAFT */
/*
* Section
*/
-#define PMD_SECT_VALID (_AT(pmdval_t, 1) << 0)
#define PMD_SECT_USER (_AT(pmdval_t, 1) << 6) /* AP[1] */
#define PMD_SECT_RDONLY (_AT(pmdval_t, 1) << 7) /* AP[2] */
#define PMD_SECT_S (_AT(pmdval_t, 3) << 8)
@@ -143,7 +164,6 @@
#define PTE_VALID (_AT(pteval_t, 1) << 0)
#define PTE_TYPE_MASK (_AT(pteval_t, 3) << 0)
#define PTE_TYPE_PAGE (_AT(pteval_t, 3) << 0)
-#define PTE_TABLE_BIT (_AT(pteval_t, 1) << 1)
#define PTE_USER (_AT(pteval_t, 1) << 6) /* AP[1] */
#define PTE_RDONLY (_AT(pteval_t, 1) << 7) /* AP[2] */
#define PTE_SHARED (_AT(pteval_t, 3) << 8) /* SH[1:0], inner shareable */
@@ -154,13 +174,19 @@
#define PTE_CONT (_AT(pteval_t, 1) << 52) /* Contiguous range */
#define PTE_PXN (_AT(pteval_t, 1) << 53) /* Privileged XN */
#define PTE_UXN (_AT(pteval_t, 1) << 54) /* User XN */
+#define PTE_SWBITS_MASK _AT(pteval_t, (BIT(63) | GENMASK(58, 55)))
-#define PTE_ADDR_LOW (((_AT(pteval_t, 1) << (48 - PAGE_SHIFT)) - 1) << PAGE_SHIFT)
+#define PTE_ADDR_LOW (((_AT(pteval_t, 1) << (50 - PAGE_SHIFT)) - 1) << PAGE_SHIFT)
#ifdef CONFIG_ARM64_PA_BITS_52
+#ifdef CONFIG_ARM64_64K_PAGES
#define PTE_ADDR_HIGH (_AT(pteval_t, 0xf) << 12)
-#define PTE_ADDR_MASK (PTE_ADDR_LOW | PTE_ADDR_HIGH)
+#define PTE_ADDR_HIGH_SHIFT 36
+#define PHYS_TO_PTE_ADDR_MASK (PTE_ADDR_LOW | PTE_ADDR_HIGH)
#else
-#define PTE_ADDR_MASK PTE_ADDR_LOW
+#define PTE_ADDR_HIGH (_AT(pteval_t, 0x3) << 8)
+#define PTE_ADDR_HIGH_SHIFT 42
+#define PHYS_TO_PTE_ADDR_MASK GENMASK_ULL(49, 8)
+#endif
#endif
/*
@@ -170,15 +196,32 @@
#define PTE_ATTRINDX_MASK (_AT(pteval_t, 7) << 2)
/*
+ * PIIndex[3:0] encoding (Permission Indirection Extension)
+ */
+#define PTE_PI_IDX_0 6 /* AP[1], USER */
+#define PTE_PI_IDX_1 51 /* DBM */
+#define PTE_PI_IDX_2 53 /* PXN */
+#define PTE_PI_IDX_3 54 /* UXN */
+
+/*
+ * POIndex[2:0] encoding (Permission Overlay Extension)
+ */
+#define PTE_PO_IDX_0 (_AT(pteval_t, 1) << 60)
+#define PTE_PO_IDX_1 (_AT(pteval_t, 1) << 61)
+#define PTE_PO_IDX_2 (_AT(pteval_t, 1) << 62)
+
+#define PTE_PO_IDX_MASK GENMASK_ULL(62, 60)
+
+
+/*
* Memory Attribute override for Stage-2 (MemAttr[3:0])
*/
#define PTE_S2_MEMATTR(t) (_AT(pteval_t, (t)) << 2)
/*
- * Highest possible physical address supported.
+ * Hierarchical permission for Stage-1 tables
*/
-#define PHYS_MASK_SHIFT (CONFIG_ARM64_PA_BITS)
-#define PHYS_MASK ((UL(1) << PHYS_MASK_SHIFT) - 1)
+#define S1_TABLE_AP (_AT(pmdval_t, 3) << 61)
#define TTBR_CNP_BIT (UL(1) << 0)
@@ -268,6 +311,11 @@
#define TCR_TBI1 (UL(1) << 38)
#define TCR_HA (UL(1) << 39)
#define TCR_HD (UL(1) << 40)
+#define TCR_HPD0_SHIFT 41
+#define TCR_HPD0 (UL(1) << TCR_HPD0_SHIFT)
+#define TCR_HPD1_SHIFT 42
+#define TCR_HPD1 (UL(1) << TCR_HPD1_SHIFT)
+#define TCR_TBID0 (UL(1) << 51)
#define TCR_TBID1 (UL(1) << 52)
#define TCR_NFD0 (UL(1) << 53)
#define TCR_NFD1 (UL(1) << 54)
@@ -275,6 +323,7 @@
#define TCR_E0PD1 (UL(1) << 56)
#define TCR_TCMA0 (UL(1) << 57)
#define TCR_TCMA1 (UL(1) << 58)
+#define TCR_DS (UL(1) << 59)
/*
* TTBR.
diff --git a/arch/arm64/include/asm/pgtable-prot.h b/arch/arm64/include/asm/pgtable-prot.h
index 62e0ebeed720..85dceb1c66f4 100644
--- a/arch/arm64/include/asm/pgtable-prot.h
+++ b/arch/arm64/include/asm/pgtable-prot.h
@@ -17,59 +17,97 @@
#define PTE_SWP_EXCLUSIVE (_AT(pteval_t, 1) << 2) /* only for swp ptes */
#define PTE_DIRTY (_AT(pteval_t, 1) << 55)
#define PTE_SPECIAL (_AT(pteval_t, 1) << 56)
-#define PTE_DEVMAP (_AT(pteval_t, 1) << 57)
-#define PTE_PROT_NONE (_AT(pteval_t, 1) << 58) /* only when !PTE_VALID */
/*
- * This bit indicates that the entry is present i.e. pmd_page()
- * still points to a valid huge page in memory even if the pmd
- * has been invalidated.
+ * PTE_PRESENT_INVALID=1 & PTE_VALID=0 indicates that the pte's fields should be
+ * interpreted according to the HW layout by SW but any attempted HW access to
+ * the address will result in a fault. pte_present() returns true.
*/
-#define PMD_PRESENT_INVALID (_AT(pteval_t, 1) << 59) /* only when !PMD_SECT_VALID */
+#define PTE_PRESENT_INVALID (PTE_NG) /* only when !PTE_VALID */
+
+#ifdef CONFIG_HAVE_ARCH_USERFAULTFD_WP
+#define PTE_UFFD_WP (_AT(pteval_t, 1) << 58) /* uffd-wp tracking */
+#define PTE_SWP_UFFD_WP (_AT(pteval_t, 1) << 3) /* only for swp ptes */
+#else
+#define PTE_UFFD_WP (_AT(pteval_t, 0))
+#define PTE_SWP_UFFD_WP (_AT(pteval_t, 0))
+#endif /* CONFIG_HAVE_ARCH_USERFAULTFD_WP */
+
+#define _PROT_DEFAULT (PTE_TYPE_PAGE | PTE_AF | PTE_SHARED)
+
+#define PROT_DEFAULT (PTE_TYPE_PAGE | PTE_MAYBE_NG | PTE_MAYBE_SHARED | PTE_AF)
+#define PROT_SECT_DEFAULT (PMD_TYPE_SECT | PMD_MAYBE_NG | PMD_MAYBE_SHARED | PMD_SECT_AF)
+
+#define PROT_DEVICE_nGnRnE (PROT_DEFAULT | PTE_PXN | PTE_UXN | PTE_WRITE | PTE_ATTRINDX(MT_DEVICE_nGnRnE))
+#define PROT_DEVICE_nGnRE (PROT_DEFAULT | PTE_PXN | PTE_UXN | PTE_WRITE | PTE_ATTRINDX(MT_DEVICE_nGnRE))
+#define PROT_NORMAL_NC (PROT_DEFAULT | PTE_PXN | PTE_UXN | PTE_WRITE | PTE_ATTRINDX(MT_NORMAL_NC))
+#define PROT_NORMAL (PROT_DEFAULT | PTE_PXN | PTE_UXN | PTE_WRITE | PTE_ATTRINDX(MT_NORMAL))
+#define PROT_NORMAL_TAGGED (PROT_DEFAULT | PTE_PXN | PTE_UXN | PTE_WRITE | PTE_ATTRINDX(MT_NORMAL_TAGGED))
+
+#define PROT_SECT_DEVICE_nGnRE (PROT_SECT_DEFAULT | PMD_SECT_PXN | PMD_SECT_UXN | PMD_ATTRINDX(MT_DEVICE_nGnRE))
+#define PROT_SECT_NORMAL (PROT_SECT_DEFAULT | PMD_SECT_PXN | PMD_SECT_UXN | PTE_WRITE | PMD_ATTRINDX(MT_NORMAL))
+#define PROT_SECT_NORMAL_EXEC (PROT_SECT_DEFAULT | PMD_SECT_UXN | PMD_ATTRINDX(MT_NORMAL))
+
+#define _PAGE_DEFAULT (_PROT_DEFAULT | PTE_ATTRINDX(MT_NORMAL))
+
+#define _PAGE_KERNEL (PROT_NORMAL)
+#define _PAGE_KERNEL_RO ((PROT_NORMAL & ~PTE_WRITE) | PTE_RDONLY)
+#define _PAGE_KERNEL_ROX ((PROT_NORMAL & ~(PTE_WRITE | PTE_PXN)) | PTE_RDONLY)
+#define _PAGE_KERNEL_EXEC (PROT_NORMAL & ~PTE_PXN)
+#define _PAGE_KERNEL_EXEC_CONT ((PROT_NORMAL & ~PTE_PXN) | PTE_CONT)
+
+#define _PAGE_SHARED (_PAGE_DEFAULT | PTE_USER | PTE_RDONLY | PTE_NG | PTE_PXN | PTE_UXN | PTE_WRITE)
+#define _PAGE_SHARED_EXEC (_PAGE_DEFAULT | PTE_USER | PTE_RDONLY | PTE_NG | PTE_PXN | PTE_WRITE)
+#define _PAGE_READONLY (_PAGE_DEFAULT | PTE_USER | PTE_RDONLY | PTE_NG | PTE_PXN | PTE_UXN)
+#define _PAGE_READONLY_EXEC (_PAGE_DEFAULT | PTE_USER | PTE_RDONLY | PTE_NG | PTE_PXN)
+#define _PAGE_EXECONLY (_PAGE_DEFAULT | PTE_RDONLY | PTE_NG | PTE_PXN)
#ifndef __ASSEMBLY__
#include <asm/cpufeature.h>
#include <asm/pgtable-types.h>
+#include <asm/rsi.h>
extern bool arm64_use_ng_mappings;
+extern unsigned long prot_ns_shared;
-#define _PROT_DEFAULT (PTE_TYPE_PAGE | PTE_AF | PTE_SHARED)
-#define _PROT_SECT_DEFAULT (PMD_TYPE_SECT | PMD_SECT_AF | PMD_SECT_S)
+#define PROT_NS_SHARED (is_realm_world() ? prot_ns_shared : 0)
#define PTE_MAYBE_NG (arm64_use_ng_mappings ? PTE_NG : 0)
#define PMD_MAYBE_NG (arm64_use_ng_mappings ? PMD_SECT_NG : 0)
-/*
- * If we have userspace only BTI we don't want to mark kernel pages
- * guarded even if the system does support BTI.
- */
-#ifdef CONFIG_ARM64_BTI_KERNEL
-#define PTE_MAYBE_GP (system_supports_bti() ? PTE_GP : 0)
+#ifndef CONFIG_ARM64_LPA2
+#define lpa2_is_enabled() false
+#define PTE_MAYBE_SHARED PTE_SHARED
+#define PMD_MAYBE_SHARED PMD_SECT_S
+#define PHYS_MASK_SHIFT (CONFIG_ARM64_PA_BITS)
#else
-#define PTE_MAYBE_GP 0
+static inline bool __pure lpa2_is_enabled(void)
+{
+ return read_tcr() & TCR_DS;
+}
+
+#define PTE_MAYBE_SHARED (lpa2_is_enabled() ? 0 : PTE_SHARED)
+#define PMD_MAYBE_SHARED (lpa2_is_enabled() ? 0 : PMD_SECT_S)
+#define PHYS_MASK_SHIFT (lpa2_is_enabled() ? CONFIG_ARM64_PA_BITS : 48)
#endif
-#define PROT_DEFAULT (_PROT_DEFAULT | PTE_MAYBE_NG)
-#define PROT_SECT_DEFAULT (_PROT_SECT_DEFAULT | PMD_MAYBE_NG)
-
-#define PROT_DEVICE_nGnRnE (PROT_DEFAULT | PTE_PXN | PTE_UXN | PTE_WRITE | PTE_ATTRINDX(MT_DEVICE_nGnRnE))
-#define PROT_DEVICE_nGnRE (PROT_DEFAULT | PTE_PXN | PTE_UXN | PTE_WRITE | PTE_ATTRINDX(MT_DEVICE_nGnRE))
-#define PROT_NORMAL_NC (PROT_DEFAULT | PTE_PXN | PTE_UXN | PTE_WRITE | PTE_ATTRINDX(MT_NORMAL_NC))
-#define PROT_NORMAL (PROT_DEFAULT | PTE_PXN | PTE_UXN | PTE_WRITE | PTE_ATTRINDX(MT_NORMAL))
-#define PROT_NORMAL_TAGGED (PROT_DEFAULT | PTE_PXN | PTE_UXN | PTE_WRITE | PTE_ATTRINDX(MT_NORMAL_TAGGED))
-
-#define PROT_SECT_DEVICE_nGnRE (PROT_SECT_DEFAULT | PMD_SECT_PXN | PMD_SECT_UXN | PMD_ATTRINDX(MT_DEVICE_nGnRE))
-#define PROT_SECT_NORMAL (PROT_SECT_DEFAULT | PMD_SECT_PXN | PMD_SECT_UXN | PMD_ATTRINDX(MT_NORMAL))
-#define PROT_SECT_NORMAL_EXEC (PROT_SECT_DEFAULT | PMD_SECT_UXN | PMD_ATTRINDX(MT_NORMAL))
+/*
+ * Highest possible physical address supported.
+ */
+#define PHYS_MASK ((UL(1) << PHYS_MASK_SHIFT) - 1)
-#define _PAGE_DEFAULT (_PROT_DEFAULT | PTE_ATTRINDX(MT_NORMAL))
+/*
+ * If we have userspace only BTI we don't want to mark kernel pages
+ * guarded even if the system does support BTI.
+ */
+#define PTE_MAYBE_GP (system_supports_bti_kernel() ? PTE_GP : 0)
-#define PAGE_KERNEL __pgprot(PROT_NORMAL)
-#define PAGE_KERNEL_RO __pgprot((PROT_NORMAL & ~PTE_WRITE) | PTE_RDONLY)
-#define PAGE_KERNEL_ROX __pgprot((PROT_NORMAL & ~(PTE_WRITE | PTE_PXN)) | PTE_RDONLY)
-#define PAGE_KERNEL_EXEC __pgprot(PROT_NORMAL & ~PTE_PXN)
-#define PAGE_KERNEL_EXEC_CONT __pgprot((PROT_NORMAL & ~PTE_PXN) | PTE_CONT)
+#define PAGE_KERNEL __pgprot(_PAGE_KERNEL)
+#define PAGE_KERNEL_RO __pgprot(_PAGE_KERNEL_RO)
+#define PAGE_KERNEL_ROX __pgprot(_PAGE_KERNEL_ROX)
+#define PAGE_KERNEL_EXEC __pgprot(_PAGE_KERNEL_EXEC)
+#define PAGE_KERNEL_EXEC_CONT __pgprot(_PAGE_KERNEL_EXEC_CONT)
#define PAGE_S2_MEMATTR(attr, has_fwb) \
({ \
@@ -81,32 +119,74 @@ extern bool arm64_use_ng_mappings;
__val; \
})
-#define PAGE_NONE __pgprot(((_PAGE_DEFAULT) & ~PTE_VALID) | PTE_PROT_NONE | PTE_RDONLY | PTE_NG | PTE_PXN | PTE_UXN)
+#define PAGE_NONE __pgprot(((_PAGE_DEFAULT) & ~PTE_VALID) | PTE_PRESENT_INVALID | PTE_RDONLY | PTE_NG | PTE_PXN | PTE_UXN)
/* shared+writable pages are clean by default, hence PTE_RDONLY|PTE_WRITE */
-#define PAGE_SHARED __pgprot(_PAGE_DEFAULT | PTE_USER | PTE_RDONLY | PTE_NG | PTE_PXN | PTE_UXN | PTE_WRITE)
-#define PAGE_SHARED_EXEC __pgprot(_PAGE_DEFAULT | PTE_USER | PTE_RDONLY | PTE_NG | PTE_PXN | PTE_WRITE)
-#define PAGE_READONLY __pgprot(_PAGE_DEFAULT | PTE_USER | PTE_RDONLY | PTE_NG | PTE_PXN | PTE_UXN)
-#define PAGE_READONLY_EXEC __pgprot(_PAGE_DEFAULT | PTE_USER | PTE_RDONLY | PTE_NG | PTE_PXN)
-#define PAGE_EXECONLY __pgprot(_PAGE_DEFAULT | PTE_RDONLY | PTE_NG | PTE_PXN)
-
-#define __P000 PAGE_NONE
-#define __P001 PAGE_READONLY
-#define __P010 PAGE_READONLY
-#define __P011 PAGE_READONLY
-#define __P100 PAGE_READONLY_EXEC /* PAGE_EXECONLY if Enhanced PAN */
-#define __P101 PAGE_READONLY_EXEC
-#define __P110 PAGE_READONLY_EXEC
-#define __P111 PAGE_READONLY_EXEC
-
-#define __S000 PAGE_NONE
-#define __S001 PAGE_READONLY
-#define __S010 PAGE_SHARED
-#define __S011 PAGE_SHARED
-#define __S100 PAGE_READONLY_EXEC /* PAGE_EXECONLY if Enhanced PAN */
-#define __S101 PAGE_READONLY_EXEC
-#define __S110 PAGE_SHARED_EXEC
-#define __S111 PAGE_SHARED_EXEC
+#define PAGE_SHARED __pgprot(_PAGE_SHARED)
+#define PAGE_SHARED_EXEC __pgprot(_PAGE_SHARED_EXEC)
+#define PAGE_READONLY __pgprot(_PAGE_READONLY)
+#define PAGE_READONLY_EXEC __pgprot(_PAGE_READONLY_EXEC)
+#define PAGE_EXECONLY __pgprot(_PAGE_EXECONLY)
#endif /* __ASSEMBLY__ */
+#define pte_pi_index(pte) ( \
+ ((pte & BIT(PTE_PI_IDX_3)) >> (PTE_PI_IDX_3 - 3)) | \
+ ((pte & BIT(PTE_PI_IDX_2)) >> (PTE_PI_IDX_2 - 2)) | \
+ ((pte & BIT(PTE_PI_IDX_1)) >> (PTE_PI_IDX_1 - 1)) | \
+ ((pte & BIT(PTE_PI_IDX_0)) >> (PTE_PI_IDX_0 - 0)))
+
+/*
+ * Page types used via Permission Indirection Extension (PIE). PIE uses
+ * the USER, DBM, PXN and UXN bits to to generate an index which is used
+ * to look up the actual permission in PIR_ELx and PIRE0_EL1. We define
+ * combinations we use on non-PIE systems with the same encoding, for
+ * convenience these are listed here as comments as are the unallocated
+ * encodings.
+ */
+
+/* 0: PAGE_DEFAULT */
+/* 1: PTE_USER */
+/* 2: PTE_WRITE */
+/* 3: PTE_WRITE | PTE_USER */
+/* 4: PAGE_EXECONLY PTE_PXN */
+/* 5: PAGE_READONLY_EXEC PTE_PXN | PTE_USER */
+/* 6: PTE_PXN | PTE_WRITE */
+/* 7: PAGE_SHARED_EXEC PTE_PXN | PTE_WRITE | PTE_USER */
+/* 8: PAGE_KERNEL_ROX PTE_UXN */
+/* 9: PAGE_GCS_RO PTE_UXN | PTE_USER */
+/* a: PAGE_KERNEL_EXEC PTE_UXN | PTE_WRITE */
+/* b: PAGE_GCS PTE_UXN | PTE_WRITE | PTE_USER */
+/* c: PAGE_KERNEL_RO PTE_UXN | PTE_PXN */
+/* d: PAGE_READONLY PTE_UXN | PTE_PXN | PTE_USER */
+/* e: PAGE_KERNEL PTE_UXN | PTE_PXN | PTE_WRITE */
+/* f: PAGE_SHARED PTE_UXN | PTE_PXN | PTE_WRITE | PTE_USER */
+
+#define _PAGE_GCS (_PAGE_DEFAULT | PTE_NG | PTE_UXN | PTE_WRITE | PTE_USER)
+#define _PAGE_GCS_RO (_PAGE_DEFAULT | PTE_NG | PTE_UXN | PTE_USER)
+
+#define PAGE_GCS __pgprot(_PAGE_GCS)
+#define PAGE_GCS_RO __pgprot(_PAGE_GCS_RO)
+
+#define PIE_E0 ( \
+ PIRx_ELx_PERM_PREP(pte_pi_index(_PAGE_GCS), PIE_GCS) | \
+ PIRx_ELx_PERM_PREP(pte_pi_index(_PAGE_GCS_RO), PIE_R) | \
+ PIRx_ELx_PERM_PREP(pte_pi_index(_PAGE_EXECONLY), PIE_X_O) | \
+ PIRx_ELx_PERM_PREP(pte_pi_index(_PAGE_READONLY_EXEC), PIE_RX_O) | \
+ PIRx_ELx_PERM_PREP(pte_pi_index(_PAGE_SHARED_EXEC), PIE_RWX_O) | \
+ PIRx_ELx_PERM_PREP(pte_pi_index(_PAGE_READONLY), PIE_R_O) | \
+ PIRx_ELx_PERM_PREP(pte_pi_index(_PAGE_SHARED), PIE_RW_O))
+
+#define PIE_E1 ( \
+ PIRx_ELx_PERM_PREP(pte_pi_index(_PAGE_GCS), PIE_NONE_O) | \
+ PIRx_ELx_PERM_PREP(pte_pi_index(_PAGE_GCS_RO), PIE_NONE_O) | \
+ PIRx_ELx_PERM_PREP(pte_pi_index(_PAGE_EXECONLY), PIE_NONE_O) | \
+ PIRx_ELx_PERM_PREP(pte_pi_index(_PAGE_READONLY_EXEC), PIE_R) | \
+ PIRx_ELx_PERM_PREP(pte_pi_index(_PAGE_SHARED_EXEC), PIE_RW) | \
+ PIRx_ELx_PERM_PREP(pte_pi_index(_PAGE_READONLY), PIE_R) | \
+ PIRx_ELx_PERM_PREP(pte_pi_index(_PAGE_SHARED), PIE_RW) | \
+ PIRx_ELx_PERM_PREP(pte_pi_index(_PAGE_KERNEL_ROX), PIE_RX) | \
+ PIRx_ELx_PERM_PREP(pte_pi_index(_PAGE_KERNEL_EXEC), PIE_RWX) | \
+ PIRx_ELx_PERM_PREP(pte_pi_index(_PAGE_KERNEL_RO), PIE_R) | \
+ PIRx_ELx_PERM_PREP(pte_pi_index(_PAGE_KERNEL), PIE_RW))
+
#endif /* __ASM_PGTABLE_PROT_H */
diff --git a/arch/arm64/include/asm/pgtable-types.h b/arch/arm64/include/asm/pgtable-types.h
index b8f158ae2527..265e8301d7ba 100644
--- a/arch/arm64/include/asm/pgtable-types.h
+++ b/arch/arm64/include/asm/pgtable-types.h
@@ -11,11 +11,19 @@
#include <asm/types.h>
-typedef u64 pteval_t;
-typedef u64 pmdval_t;
-typedef u64 pudval_t;
-typedef u64 p4dval_t;
-typedef u64 pgdval_t;
+/*
+ * Page Table Descriptor
+ *
+ * Generic page table descriptor format from which
+ * all level specific descriptors can be derived.
+ */
+typedef u64 ptdesc_t;
+
+typedef ptdesc_t pteval_t;
+typedef ptdesc_t pmdval_t;
+typedef ptdesc_t pudval_t;
+typedef ptdesc_t p4dval_t;
+typedef ptdesc_t pgdval_t;
/*
* These are used to make use of C type-checking..
@@ -36,11 +44,17 @@ typedef struct { pudval_t pud; } pud_t;
#define __pud(x) ((pud_t) { (x) } )
#endif
+#if CONFIG_PGTABLE_LEVELS > 4
+typedef struct { p4dval_t p4d; } p4d_t;
+#define p4d_val(x) ((x).p4d)
+#define __p4d(x) ((p4d_t) { (x) } )
+#endif
+
typedef struct { pgdval_t pgd; } pgd_t;
#define pgd_val(x) ((x).pgd)
#define __pgd(x) ((pgd_t) { (x) } )
-typedef struct { pteval_t pgprot; } pgprot_t;
+typedef struct { ptdesc_t pgprot; } pgprot_t;
#define pgprot_val(x) ((x).pgprot)
#define __pgprot(x) ((pgprot_t) { (x) } )
diff --git a/arch/arm64/include/asm/pgtable.h b/arch/arm64/include/asm/pgtable.h
index b5df82aa99e6..abd2dee416b3 100644
--- a/arch/arm64/include/asm/pgtable.h
+++ b/arch/arm64/include/asm/pgtable.h
@@ -18,11 +18,15 @@
* VMALLOC range.
*
* VMALLOC_START: beginning of the kernel vmalloc space
- * VMALLOC_END: extends to the available space below vmemmap, PCI I/O space
- * and fixed mappings
+ * VMALLOC_END: extends to the available space below vmemmap
*/
#define VMALLOC_START (MODULES_END)
-#define VMALLOC_END (VMEMMAP_START - SZ_256M)
+#if VA_BITS == VA_BITS_MIN
+#define VMALLOC_END (VMEMMAP_START - SZ_8M)
+#else
+#define VMEMMAP_UNUSED_NPAGES ((_PAGE_OFFSET(vabits_actual) - PAGE_OFFSET) >> PAGE_SHIFT)
+#define VMALLOC_END (VMEMMAP_START + VMEMMAP_UNUSED_NPAGES * sizeof(struct page) - SZ_8M)
+#endif
#define vmemmap ((struct page *)VMEMMAP_START - (memstart_addr >> PAGE_SHIFT))
@@ -30,11 +34,91 @@
#include <asm/cmpxchg.h>
#include <asm/fixmap.h>
+#include <asm/por.h>
#include <linux/mmdebug.h>
#include <linux/mm_types.h>
#include <linux/sched.h>
#include <linux/page_table_check.h>
+static inline void emit_pte_barriers(void)
+{
+ /*
+ * These barriers are emitted under certain conditions after a pte entry
+ * was modified (see e.g. __set_pte_complete()). The dsb makes the store
+ * visible to the table walker. The isb ensures that any previous
+ * speculative "invalid translation" marker that is in the CPU's
+ * pipeline gets cleared, so that any access to that address after
+ * setting the pte to valid won't cause a spurious fault. If the thread
+ * gets preempted after storing to the pgtable but before emitting these
+ * barriers, __switch_to() emits a dsb which ensure the walker gets to
+ * see the store. There is no guarantee of an isb being issued though.
+ * This is safe because it will still get issued (albeit on a
+ * potentially different CPU) when the thread starts running again,
+ * before any access to the address.
+ */
+ dsb(ishst);
+ isb();
+}
+
+static inline void queue_pte_barriers(void)
+{
+ unsigned long flags;
+
+ if (in_interrupt()) {
+ emit_pte_barriers();
+ return;
+ }
+
+ flags = read_thread_flags();
+
+ if (flags & BIT(TIF_LAZY_MMU)) {
+ /* Avoid the atomic op if already set. */
+ if (!(flags & BIT(TIF_LAZY_MMU_PENDING)))
+ set_thread_flag(TIF_LAZY_MMU_PENDING);
+ } else {
+ emit_pte_barriers();
+ }
+}
+
+#define __HAVE_ARCH_ENTER_LAZY_MMU_MODE
+static inline void arch_enter_lazy_mmu_mode(void)
+{
+ /*
+ * lazy_mmu_mode is not supposed to permit nesting. But in practice this
+ * does happen with CONFIG_DEBUG_PAGEALLOC, where a page allocation
+ * inside a lazy_mmu_mode section (such as zap_pte_range()) will change
+ * permissions on the linear map with apply_to_page_range(), which
+ * re-enters lazy_mmu_mode. So we tolerate nesting in our
+ * implementation. The first call to arch_leave_lazy_mmu_mode() will
+ * flush and clear the flag such that the remainder of the work in the
+ * outer nest behaves as if outside of lazy mmu mode. This is safe and
+ * keeps tracking simple.
+ */
+
+ if (in_interrupt())
+ return;
+
+ set_thread_flag(TIF_LAZY_MMU);
+}
+
+static inline void arch_flush_lazy_mmu_mode(void)
+{
+ if (in_interrupt())
+ return;
+
+ if (test_and_clear_thread_flag(TIF_LAZY_MMU_PENDING))
+ emit_pte_barriers();
+}
+
+static inline void arch_leave_lazy_mmu_mode(void)
+{
+ if (in_interrupt())
+ return;
+
+ arch_flush_lazy_mmu_mode();
+ clear_thread_flag(TIF_LAZY_MMU);
+}
+
#ifdef CONFIG_TRANSPARENT_HUGEPAGE
#define __HAVE_ARCH_FLUSH_PMD_TLB_RANGE
@@ -45,19 +129,13 @@
__flush_tlb_range(vma, addr, end, PUD_SIZE, false, 1)
#endif /* CONFIG_TRANSPARENT_HUGEPAGE */
-static inline bool arch_thp_swp_supported(void)
-{
- return !system_supports_mte();
-}
-#define arch_thp_swp_supported arch_thp_swp_supported
-
/*
* Outside of a few very special situations (e.g. hibernation), we always
* use broadcast TLB invalidation instructions, therefore a spurious page
* fault on one CPU which has been handled concurrently by another CPU
* does not need to perform additional invalidation.
*/
-#define flush_tlb_fix_spurious_fault(vma, address) do { } while (0)
+#define flush_tlb_fix_spurious_fault(vma, address, ptep) do { } while (0)
/*
* ZERO_PAGE is a global shared page that is always zero: used
@@ -69,23 +147,27 @@ extern unsigned long empty_zero_page[PAGE_SIZE / sizeof(unsigned long)];
#define pte_ERROR(e) \
pr_err("%s:%d: bad pte %016llx.\n", __FILE__, __LINE__, pte_val(e))
-/*
- * Macros to convert between a physical address and its placement in a
- * page table entry, taking care of 52-bit addresses.
- */
#ifdef CONFIG_ARM64_PA_BITS_52
static inline phys_addr_t __pte_to_phys(pte_t pte)
{
+ pte_val(pte) &= ~PTE_MAYBE_SHARED;
return (pte_val(pte) & PTE_ADDR_LOW) |
- ((pte_val(pte) & PTE_ADDR_HIGH) << 36);
+ ((pte_val(pte) & PTE_ADDR_HIGH) << PTE_ADDR_HIGH_SHIFT);
}
static inline pteval_t __phys_to_pte_val(phys_addr_t phys)
{
- return (phys | (phys >> 36)) & PTE_ADDR_MASK;
+ return (phys | (phys >> PTE_ADDR_HIGH_SHIFT)) & PHYS_TO_PTE_ADDR_MASK;
}
#else
-#define __pte_to_phys(pte) (pte_val(pte) & PTE_ADDR_MASK)
-#define __phys_to_pte_val(phys) (phys)
+static inline phys_addr_t __pte_to_phys(pte_t pte)
+{
+ return pte_val(pte) & PTE_ADDR_LOW;
+}
+
+static inline pteval_t __phys_to_pte_val(phys_addr_t phys)
+{
+ return phys;
+}
#endif
#define pte_pfn(pte) (__pte_to_phys(pte) >> PAGE_SHIFT)
@@ -93,20 +175,21 @@ static inline pteval_t __phys_to_pte_val(phys_addr_t phys)
__pte(__phys_to_pte_val((phys_addr_t)(pfn) << PAGE_SHIFT) | pgprot_val(prot))
#define pte_none(pte) (!pte_val(pte))
-#define pte_clear(mm,addr,ptep) set_pte(ptep, __pte(0))
+#define __pte_clear(mm, addr, ptep) \
+ __set_pte(ptep, __pte(0))
#define pte_page(pte) (pfn_to_page(pte_pfn(pte)))
/*
* The following only work if pte_present(). Undefined behaviour otherwise.
*/
-#define pte_present(pte) (!!(pte_val(pte) & (PTE_VALID | PTE_PROT_NONE)))
+#define pte_present(pte) (pte_valid(pte) || pte_present_invalid(pte))
#define pte_young(pte) (!!(pte_val(pte) & PTE_AF))
#define pte_special(pte) (!!(pte_val(pte) & PTE_SPECIAL))
#define pte_write(pte) (!!(pte_val(pte) & PTE_WRITE))
+#define pte_rdonly(pte) (!!(pte_val(pte) & PTE_RDONLY))
#define pte_user(pte) (!!(pte_val(pte) & PTE_USER))
#define pte_user_exec(pte) (!(pte_val(pte) & PTE_UXN))
#define pte_cont(pte) (!!(pte_val(pte) & PTE_CONT))
-#define pte_devmap(pte) (!!(pte_val(pte) & PTE_DEVMAP))
#define pte_tagged(pte) ((pte_val(pte) & PTE_ATTRINDX_MASK) == \
PTE_ATTRINDX(MT_NORMAL_TAGGED))
@@ -120,11 +203,13 @@ static inline pteval_t __phys_to_pte_val(phys_addr_t phys)
(__boundary - 1 < (end) - 1) ? __boundary : (end); \
})
-#define pte_hw_dirty(pte) (pte_write(pte) && !(pte_val(pte) & PTE_RDONLY))
+#define pte_hw_dirty(pte) (pte_write(pte) && !pte_rdonly(pte))
#define pte_sw_dirty(pte) (!!(pte_val(pte) & PTE_DIRTY))
#define pte_dirty(pte) (pte_sw_dirty(pte) || pte_hw_dirty(pte))
#define pte_valid(pte) (!!(pte_val(pte) & PTE_VALID))
+#define pte_present_invalid(pte) \
+ ((pte_val(pte) & (PTE_VALID | PTE_PRESENT_INVALID)) == PTE_PRESENT_INVALID)
/*
* Execute-only user mappings do not have the PTE_USER bit set. All valid
* kernel mappings have the PTE_UXN bit set.
@@ -132,16 +217,38 @@ static inline pteval_t __phys_to_pte_val(phys_addr_t phys)
#define pte_valid_not_user(pte) \
((pte_val(pte) & (PTE_VALID | PTE_USER | PTE_UXN)) == (PTE_VALID | PTE_UXN))
/*
+ * Returns true if the pte is valid and has the contiguous bit set.
+ */
+#define pte_valid_cont(pte) (pte_valid(pte) && pte_cont(pte))
+/*
* Could the pte be present in the TLB? We must check mm_tlb_flush_pending
* so that we don't erroneously return false for pages that have been
* remapped as PROT_NONE but are yet to be flushed from the TLB.
* Note that we can't make any assumptions based on the state of the access
- * flag, since ptep_clear_flush_young() elides a DSB when invalidating the
+ * flag, since __ptep_clear_flush_young() elides a DSB when invalidating the
* TLB.
*/
#define pte_accessible(mm, pte) \
(mm_tlb_flush_pending(mm) ? pte_present(pte) : pte_valid(pte))
+static inline bool por_el0_allows_pkey(u8 pkey, bool write, bool execute)
+{
+ u64 por;
+
+ if (!system_supports_poe())
+ return true;
+
+ por = read_sysreg_s(SYS_POR_EL0);
+
+ if (write)
+ return por_elx_allows_write(por, pkey);
+
+ if (execute)
+ return por_elx_allows_exec(por, pkey);
+
+ return por_elx_allows_read(por, pkey);
+}
+
/*
* p??_access_permitted() is true for valid user mappings (PTE_USER
* bit set, subject to the write permission check). For execute-only
@@ -149,8 +256,11 @@ static inline pteval_t __phys_to_pte_val(phys_addr_t phys)
* not set) must return false. PROT_NONE mappings do not have the
* PTE_VALID bit set.
*/
-#define pte_access_permitted(pte, write) \
+#define pte_access_permitted_no_overlay(pte, write) \
(((pte_val(pte) & (PTE_VALID | PTE_USER)) == (PTE_VALID | PTE_USER)) && (!(write) || pte_write(pte)))
+#define pte_access_permitted(pte, write) \
+ (pte_access_permitted_no_overlay(pte, write) && \
+ por_el0_allows_pkey(FIELD_GET(PTE_PO_IDX_MASK, pte_val(pte)), write, false))
#define pmd_access_permitted(pmd, write) \
(pte_access_permitted(pmd_pte(pmd), (write)))
#define pud_access_permitted(pud, write) \
@@ -180,7 +290,7 @@ static inline pmd_t set_pmd_bit(pmd_t pmd, pgprot_t prot)
return pmd;
}
-static inline pte_t pte_mkwrite(pte_t pte)
+static inline pte_t pte_mkwrite_novma(pte_t pte)
{
pte = set_pte_bit(pte, __pgprot(PTE_WRITE));
pte = clear_pte_bit(pte, __pgprot(PTE_RDONLY));
@@ -212,7 +322,7 @@ static inline pte_t pte_wrprotect(pte_t pte)
* clear), set the PTE_DIRTY bit.
*/
if (pte_hw_dirty(pte))
- pte = pte_mkdirty(pte);
+ pte = set_pte_bit(pte, __pgprot(PTE_DIRTY));
pte = clear_pte_bit(pte, __pgprot(PTE_WRITE));
pte = set_pte_bit(pte, __pgprot(PTE_RDONLY));
@@ -236,8 +346,7 @@ static inline pte_t pte_mkspecial(pte_t pte)
static inline pte_t pte_mkcont(pte_t pte)
{
- pte = set_pte_bit(pte, __pgprot(PTE_CONT));
- return set_pte_bit(pte, __pgprot(PTE_TYPE_PAGE));
+ return set_pte_bit(pte, __pgprot(PTE_CONT));
}
static inline pte_t pte_mknoncont(pte_t pte)
@@ -245,36 +354,68 @@ static inline pte_t pte_mknoncont(pte_t pte)
return clear_pte_bit(pte, __pgprot(PTE_CONT));
}
-static inline pte_t pte_mkpresent(pte_t pte)
+static inline pte_t pte_mkvalid(pte_t pte)
{
return set_pte_bit(pte, __pgprot(PTE_VALID));
}
+static inline pte_t pte_mkinvalid(pte_t pte)
+{
+ pte = set_pte_bit(pte, __pgprot(PTE_PRESENT_INVALID));
+ pte = clear_pte_bit(pte, __pgprot(PTE_VALID));
+ return pte;
+}
+
static inline pmd_t pmd_mkcont(pmd_t pmd)
{
return __pmd(pmd_val(pmd) | PMD_SECT_CONT);
}
-static inline pte_t pte_mkdevmap(pte_t pte)
+#ifdef CONFIG_HAVE_ARCH_USERFAULTFD_WP
+static inline int pte_uffd_wp(pte_t pte)
{
- return set_pte_bit(pte, __pgprot(PTE_DEVMAP | PTE_SPECIAL));
+ return !!(pte_val(pte) & PTE_UFFD_WP);
}
-static inline void set_pte(pte_t *ptep, pte_t pte)
+static inline pte_t pte_mkuffd_wp(pte_t pte)
+{
+ return pte_wrprotect(set_pte_bit(pte, __pgprot(PTE_UFFD_WP)));
+}
+
+static inline pte_t pte_clear_uffd_wp(pte_t pte)
+{
+ return clear_pte_bit(pte, __pgprot(PTE_UFFD_WP));
+}
+#endif /* CONFIG_HAVE_ARCH_USERFAULTFD_WP */
+
+static inline void __set_pte_nosync(pte_t *ptep, pte_t pte)
{
WRITE_ONCE(*ptep, pte);
+}
+static inline void __set_pte_complete(pte_t pte)
+{
/*
* Only if the new pte is valid and kernel, otherwise TLB maintenance
- * or update_mmu_cache() have the necessary barriers.
+ * has the necessary barriers.
*/
- if (pte_valid_not_user(pte)) {
- dsb(ishst);
- isb();
- }
+ if (pte_valid_not_user(pte))
+ queue_pte_barriers();
+}
+
+static inline void __set_pte(pte_t *ptep, pte_t pte)
+{
+ __set_pte_nosync(ptep, pte);
+ __set_pte_complete(pte);
+}
+
+static inline pte_t __ptep_get(pte_t *ptep)
+{
+ return READ_ONCE(*ptep);
}
extern void __sync_icache_dcache(pte_t pteval);
+bool pgattr_change_is_safe(pteval_t old, pteval_t new);
/*
* PTE bits configuration in the presence of hardware Dirty Bit Management
@@ -292,7 +433,7 @@ extern void __sync_icache_dcache(pte_t pteval);
* PTE_DIRTY || (PTE_WRITE && !PTE_RDONLY)
*/
-static inline void __check_racy_pte_update(struct mm_struct *mm, pte_t *ptep,
+static inline void __check_safe_pte_update(struct mm_struct *mm, pte_t *ptep,
pte_t pte)
{
pte_t old_pte;
@@ -300,7 +441,7 @@ static inline void __check_racy_pte_update(struct mm_struct *mm, pte_t *ptep,
if (!IS_ENABLED(CONFIG_DEBUG_VM))
return;
- old_pte = READ_ONCE(*ptep);
+ old_pte = __ptep_get(ptep);
if (!pte_valid(old_pte) || !pte_valid(pte))
return;
@@ -309,7 +450,7 @@ static inline void __check_racy_pte_update(struct mm_struct *mm, pte_t *ptep,
/*
* Check for potential race with hardware updates of the pte
- * (ptep_set_access_flags safely changes valid ptes without going
+ * (__ptep_set_access_flags safely changes valid ptes without going
* through an invalid entry).
*/
VM_WARN_ONCE(!pte_young(pte),
@@ -318,10 +459,12 @@ static inline void __check_racy_pte_update(struct mm_struct *mm, pte_t *ptep,
VM_WARN_ONCE(pte_write(old_pte) && !pte_dirty(pte),
"%s: racy dirty state clearing: 0x%016llx -> 0x%016llx",
__func__, pte_val(old_pte), pte_val(pte));
+ VM_WARN_ONCE(!pgattr_change_is_safe(pte_val(old_pte), pte_val(pte)),
+ "%s: unsafe attribute change: 0x%016llx -> 0x%016llx",
+ __func__, pte_val(old_pte), pte_val(pte));
}
-static inline void __set_pte_at(struct mm_struct *mm, unsigned long addr,
- pte_t *ptep, pte_t pte)
+static inline void __sync_cache_and_tags(pte_t pte, unsigned int nr_pages)
{
if (pte_present(pte) && pte_user_exec(pte) && !pte_special(pte))
__sync_icache_dcache(pte);
@@ -329,41 +472,33 @@ static inline void __set_pte_at(struct mm_struct *mm, unsigned long addr,
/*
* If the PTE would provide user space access to the tags associated
* with it then ensure that the MTE tags are synchronised. Although
- * pte_access_permitted() returns false for exec only mappings, they
- * don't expose tags (instruction fetches don't check tags).
+ * pte_access_permitted_no_overlay() returns false for exec only
+ * mappings, they don't expose tags (instruction fetches don't check
+ * tags).
*/
- if (system_supports_mte() && pte_access_permitted(pte, false) &&
- !pte_special(pte)) {
- pte_t old_pte = READ_ONCE(*ptep);
- /*
- * We only need to synchronise if the new PTE has tags enabled
- * or if swapping in (in which case another mapping may have
- * set tags in the past even if this PTE isn't tagged).
- * (!pte_none() && !pte_present()) is an open coded version of
- * is_swap_pte()
- */
- if (pte_tagged(pte) || (!pte_none(old_pte) && !pte_present(old_pte)))
- mte_sync_tags(old_pte, pte);
- }
+ if (system_supports_mte() && pte_access_permitted_no_overlay(pte, false) &&
+ !pte_special(pte) && pte_tagged(pte))
+ mte_sync_tags(pte, nr_pages);
+}
- __check_racy_pte_update(mm, ptep, pte);
+/*
+ * Select all bits except the pfn
+ */
+#define pte_pgprot pte_pgprot
+static inline pgprot_t pte_pgprot(pte_t pte)
+{
+ unsigned long pfn = pte_pfn(pte);
- set_pte(ptep, pte);
+ return __pgprot(pte_val(pfn_pte(pfn, __pgprot(0))) ^ pte_val(pte));
}
-static inline void set_pte_at(struct mm_struct *mm, unsigned long addr,
- pte_t *ptep, pte_t pte)
+#define pte_advance_pfn pte_advance_pfn
+static inline pte_t pte_advance_pfn(pte_t pte, unsigned long nr)
{
- page_table_check_pte_set(mm, addr, ptep, pte);
- return __set_pte_at(mm, addr, ptep, pte);
+ return pfn_pte(pte_pfn(pte) + nr, pte_pgprot(pte));
}
/*
- * Huge pte definitions.
- */
-#define pte_mkhuge(pte) (__pte(pte_val(pte) & ~PTE_TABLE_BIT))
-
-/*
* Hugetlb definitions.
*/
#define HUGE_MAX_HSTATE 4
@@ -409,21 +544,20 @@ static inline pmd_t pte_pmd(pte_t pte)
static inline pgprot_t mk_pud_sect_prot(pgprot_t prot)
{
- return __pgprot((pgprot_val(prot) & ~PUD_TABLE_BIT) | PUD_TYPE_SECT);
+ return __pgprot((pgprot_val(prot) & ~PUD_TYPE_MASK) | PUD_TYPE_SECT);
}
static inline pgprot_t mk_pmd_sect_prot(pgprot_t prot)
{
- return __pgprot((pgprot_val(prot) & ~PMD_TABLE_BIT) | PMD_TYPE_SECT);
+ return __pgprot((pgprot_val(prot) & ~PMD_TYPE_MASK) | PMD_TYPE_SECT);
}
-#define __HAVE_ARCH_PTE_SWP_EXCLUSIVE
static inline pte_t pte_swp_mkexclusive(pte_t pte)
{
return set_pte_bit(pte, __pgprot(PTE_SWP_EXCLUSIVE));
}
-static inline int pte_swp_exclusive(pte_t pte)
+static inline bool pte_swp_exclusive(pte_t pte)
{
return pte_val(pte) & PTE_SWP_EXCLUSIVE;
}
@@ -433,23 +567,39 @@ static inline pte_t pte_swp_clear_exclusive(pte_t pte)
return clear_pte_bit(pte, __pgprot(PTE_SWP_EXCLUSIVE));
}
-/*
- * Select all bits except the pfn
- */
-static inline pgprot_t pte_pgprot(pte_t pte)
+#ifdef CONFIG_HAVE_ARCH_USERFAULTFD_WP
+static inline pte_t pte_swp_mkuffd_wp(pte_t pte)
{
- unsigned long pfn = pte_pfn(pte);
+ return set_pte_bit(pte, __pgprot(PTE_SWP_UFFD_WP));
+}
- return __pgprot(pte_val(pfn_pte(pfn, __pgprot(0))) ^ pte_val(pte));
+static inline int pte_swp_uffd_wp(pte_t pte)
+{
+ return !!(pte_val(pte) & PTE_SWP_UFFD_WP);
}
+static inline pte_t pte_swp_clear_uffd_wp(pte_t pte)
+{
+ return clear_pte_bit(pte, __pgprot(PTE_SWP_UFFD_WP));
+}
+#endif /* CONFIG_HAVE_ARCH_USERFAULTFD_WP */
+
#ifdef CONFIG_NUMA_BALANCING
/*
* See the comment in include/linux/pgtable.h
*/
static inline int pte_protnone(pte_t pte)
{
- return (pte_val(pte) & (PTE_VALID | PTE_PROT_NONE)) == PTE_PROT_NONE;
+ /*
+ * pte_present_invalid() tells us that the pte is invalid from HW
+ * perspective but present from SW perspective, so the fields are to be
+ * interpretted as per the HW layout. The second 2 checks are the unique
+ * encoding that we use for PROT_NONE. It is insufficient to only use
+ * the first check because we share the same encoding scheme with pmds
+ * which support pmd_mkinvalid(), so can be present-invalid without
+ * being PROT_NONE.
+ */
+ return pte_present_invalid(pte) && !pte_user(pte) && !pte_user_exec(pte);
}
static inline int pmd_protnone(pmd_t pmd)
@@ -458,24 +608,7 @@ static inline int pmd_protnone(pmd_t pmd)
}
#endif
-#define pmd_present_invalid(pmd) (!!(pmd_val(pmd) & PMD_PRESENT_INVALID))
-
-static inline int pmd_present(pmd_t pmd)
-{
- return pte_present(pmd_pte(pmd)) || pmd_present_invalid(pmd);
-}
-
-/*
- * THP definitions.
- */
-
-#ifdef CONFIG_TRANSPARENT_HUGEPAGE
-static inline int pmd_trans_huge(pmd_t pmd)
-{
- return pmd_val(pmd) && pmd_present(pmd) && !(pmd_val(pmd) & PMD_TABLE_BIT);
-}
-#endif /* CONFIG_TRANSPARENT_HUGEPAGE */
-
+#define pmd_present(pmd) pte_present(pmd_pte(pmd))
#define pmd_dirty(pmd) pte_dirty(pmd_pte(pmd))
#define pmd_young(pmd) pte_young(pmd_pte(pmd))
#define pmd_valid(pmd) pte_valid(pmd_pte(pmd))
@@ -484,64 +617,146 @@ static inline int pmd_trans_huge(pmd_t pmd)
#define pmd_cont(pmd) pte_cont(pmd_pte(pmd))
#define pmd_wrprotect(pmd) pte_pmd(pte_wrprotect(pmd_pte(pmd)))
#define pmd_mkold(pmd) pte_pmd(pte_mkold(pmd_pte(pmd)))
-#define pmd_mkwrite(pmd) pte_pmd(pte_mkwrite(pmd_pte(pmd)))
+#define pmd_mkwrite_novma(pmd) pte_pmd(pte_mkwrite_novma(pmd_pte(pmd)))
#define pmd_mkclean(pmd) pte_pmd(pte_mkclean(pmd_pte(pmd)))
#define pmd_mkdirty(pmd) pte_pmd(pte_mkdirty(pmd_pte(pmd)))
#define pmd_mkyoung(pmd) pte_pmd(pte_mkyoung(pmd_pte(pmd)))
+#define pmd_mkinvalid(pmd) pte_pmd(pte_mkinvalid(pmd_pte(pmd)))
+#ifdef CONFIG_HAVE_ARCH_USERFAULTFD_WP
+#define pmd_uffd_wp(pmd) pte_uffd_wp(pmd_pte(pmd))
+#define pmd_mkuffd_wp(pmd) pte_pmd(pte_mkuffd_wp(pmd_pte(pmd)))
+#define pmd_clear_uffd_wp(pmd) pte_pmd(pte_clear_uffd_wp(pmd_pte(pmd)))
+#define pmd_swp_uffd_wp(pmd) pte_swp_uffd_wp(pmd_pte(pmd))
+#define pmd_swp_mkuffd_wp(pmd) pte_pmd(pte_swp_mkuffd_wp(pmd_pte(pmd)))
+#define pmd_swp_clear_uffd_wp(pmd) \
+ pte_pmd(pte_swp_clear_uffd_wp(pmd_pte(pmd)))
+#endif /* CONFIG_HAVE_ARCH_USERFAULTFD_WP */
+
+#define pmd_write(pmd) pte_write(pmd_pte(pmd))
-static inline pmd_t pmd_mkinvalid(pmd_t pmd)
+static inline pmd_t pmd_mkhuge(pmd_t pmd)
{
- pmd = set_pmd_bit(pmd, __pgprot(PMD_PRESENT_INVALID));
- pmd = clear_pmd_bit(pmd, __pgprot(PMD_SECT_VALID));
+ /*
+ * It's possible that the pmd is present-invalid on entry
+ * and in that case it needs to remain present-invalid on
+ * exit. So ensure the VALID bit does not get modified.
+ */
+ pmdval_t mask = PMD_TYPE_MASK & ~PTE_VALID;
+ pmdval_t val = PMD_TYPE_SECT & ~PTE_VALID;
- return pmd;
+ return __pmd((pmd_val(pmd) & ~mask) | val);
}
-#define pmd_thp_or_huge(pmd) (pmd_huge(pmd) || pmd_trans_huge(pmd))
-
-#define pmd_write(pmd) pte_write(pmd_pte(pmd))
-
-#define pmd_mkhuge(pmd) (__pmd(pmd_val(pmd) & ~PMD_TABLE_BIT))
-
-#ifdef CONFIG_TRANSPARENT_HUGEPAGE
-#define pmd_devmap(pmd) pte_devmap(pmd_pte(pmd))
-#endif
-static inline pmd_t pmd_mkdevmap(pmd_t pmd)
+#ifdef CONFIG_ARCH_SUPPORTS_PMD_PFNMAP
+#define pmd_special(pte) (!!((pmd_val(pte) & PTE_SPECIAL)))
+static inline pmd_t pmd_mkspecial(pmd_t pmd)
{
- return pte_pmd(set_pte_bit(pmd_pte(pmd), __pgprot(PTE_DEVMAP)));
+ return set_pmd_bit(pmd, __pgprot(PTE_SPECIAL));
}
+#endif
#define __pmd_to_phys(pmd) __pte_to_phys(pmd_pte(pmd))
#define __phys_to_pmd_val(phys) __phys_to_pte_val(phys)
#define pmd_pfn(pmd) ((__pmd_to_phys(pmd) & PMD_MASK) >> PAGE_SHIFT)
#define pfn_pmd(pfn,prot) __pmd(__phys_to_pmd_val((phys_addr_t)(pfn) << PAGE_SHIFT) | pgprot_val(prot))
-#define mk_pmd(page,prot) pfn_pmd(page_to_pfn(page),prot)
#define pud_young(pud) pte_young(pud_pte(pud))
#define pud_mkyoung(pud) pte_pud(pte_mkyoung(pud_pte(pud)))
#define pud_write(pud) pte_write(pud_pte(pud))
-#define pud_mkhuge(pud) (__pud(pud_val(pud) & ~PUD_TABLE_BIT))
+static inline pud_t pud_mkhuge(pud_t pud)
+{
+ /*
+ * It's possible that the pud is present-invalid on entry
+ * and in that case it needs to remain present-invalid on
+ * exit. So ensure the VALID bit does not get modified.
+ */
+ pudval_t mask = PUD_TYPE_MASK & ~PTE_VALID;
+ pudval_t val = PUD_TYPE_SECT & ~PTE_VALID;
+
+ return __pud((pud_val(pud) & ~mask) | val);
+}
#define __pud_to_phys(pud) __pte_to_phys(pud_pte(pud))
#define __phys_to_pud_val(phys) __phys_to_pte_val(phys)
#define pud_pfn(pud) ((__pud_to_phys(pud) & PUD_MASK) >> PAGE_SHIFT)
#define pfn_pud(pfn,prot) __pud(__phys_to_pud_val((phys_addr_t)(pfn) << PAGE_SHIFT) | pgprot_val(prot))
-static inline void set_pmd_at(struct mm_struct *mm, unsigned long addr,
- pmd_t *pmdp, pmd_t pmd)
+#define pmd_pgprot pmd_pgprot
+static inline pgprot_t pmd_pgprot(pmd_t pmd)
{
- page_table_check_pmd_set(mm, addr, pmdp, pmd);
- return __set_pte_at(mm, addr, (pte_t *)pmdp, pmd_pte(pmd));
+ unsigned long pfn = pmd_pfn(pmd);
+
+ return __pgprot(pmd_val(pfn_pmd(pfn, __pgprot(0))) ^ pmd_val(pmd));
}
-static inline void set_pud_at(struct mm_struct *mm, unsigned long addr,
- pud_t *pudp, pud_t pud)
+#define pud_pgprot pud_pgprot
+static inline pgprot_t pud_pgprot(pud_t pud)
{
- page_table_check_pud_set(mm, addr, pudp, pud);
- return __set_pte_at(mm, addr, (pte_t *)pudp, pud_pte(pud));
+ unsigned long pfn = pud_pfn(pud);
+
+ return __pgprot(pud_val(pfn_pud(pfn, __pgprot(0))) ^ pud_val(pud));
}
+static inline void __set_ptes_anysz(struct mm_struct *mm, pte_t *ptep,
+ pte_t pte, unsigned int nr,
+ unsigned long pgsize)
+{
+ unsigned long stride = pgsize >> PAGE_SHIFT;
+
+ switch (pgsize) {
+ case PAGE_SIZE:
+ page_table_check_ptes_set(mm, ptep, pte, nr);
+ break;
+ case PMD_SIZE:
+ page_table_check_pmds_set(mm, (pmd_t *)ptep, pte_pmd(pte), nr);
+ break;
+#ifndef __PAGETABLE_PMD_FOLDED
+ case PUD_SIZE:
+ page_table_check_puds_set(mm, (pud_t *)ptep, pte_pud(pte), nr);
+ break;
+#endif
+ default:
+ VM_WARN_ON(1);
+ }
+
+ __sync_cache_and_tags(pte, nr * stride);
+
+ for (;;) {
+ __check_safe_pte_update(mm, ptep, pte);
+ __set_pte_nosync(ptep, pte);
+ if (--nr == 0)
+ break;
+ ptep++;
+ pte = pte_advance_pfn(pte, stride);
+ }
+
+ __set_pte_complete(pte);
+}
+
+static inline void __set_ptes(struct mm_struct *mm,
+ unsigned long __always_unused addr,
+ pte_t *ptep, pte_t pte, unsigned int nr)
+{
+ __set_ptes_anysz(mm, ptep, pte, nr, PAGE_SIZE);
+}
+
+static inline void __set_pmds(struct mm_struct *mm,
+ unsigned long __always_unused addr,
+ pmd_t *pmdp, pmd_t pmd, unsigned int nr)
+{
+ __set_ptes_anysz(mm, (pte_t *)pmdp, pmd_pte(pmd), nr, PMD_SIZE);
+}
+#define set_pmd_at(mm, addr, pmdp, pmd) __set_pmds(mm, addr, pmdp, pmd, 1)
+
+static inline void __set_puds(struct mm_struct *mm,
+ unsigned long __always_unused addr,
+ pud_t *pudp, pud_t pud, unsigned int nr)
+{
+ __set_ptes_anysz(mm, (pte_t *)pudp, pud_pte(pud), nr, PUD_SIZE);
+}
+#define set_pud_at(mm, addr, pudp, pud) __set_puds(mm, addr, pudp, pud, 1)
+
#define __p4d_to_phys(p4d) __pte_to_phys(p4d_pte(p4d))
#define __phys_to_p4d_val(phys) __phys_to_pte_val(phys)
@@ -554,6 +769,11 @@ static inline void set_pud_at(struct mm_struct *mm, unsigned long addr,
#define pgprot_nx(prot) \
__pgprot_modify(prot, PTE_MAYBE_GP, PTE_PXN)
+#define pgprot_decrypted(prot) \
+ __pgprot_modify(prot, PROT_NS_SHARED, PROT_NS_SHARED)
+#define pgprot_encrypted(prot) \
+ __pgprot_modify(prot, PROT_NS_SHARED, 0)
+
/*
* Mark the prot value as uncacheable and unbufferable.
*/
@@ -595,6 +815,17 @@ extern pgprot_t phys_mem_access_prot(struct file *file, unsigned long pfn,
#define pmd_leaf_size(pmd) (pmd_cont(pmd) ? CONT_PMD_SIZE : PMD_SIZE)
#define pte_leaf_size(pte) (pte_cont(pte) ? CONT_PTE_SIZE : PAGE_SIZE)
+#ifdef CONFIG_TRANSPARENT_HUGEPAGE
+static inline int pmd_trans_huge(pmd_t pmd)
+{
+ /*
+ * If pmd is present-invalid, pmd_table() won't detect it
+ * as a table, so force the valid bit for the comparison.
+ */
+ return pmd_present(pmd) && !pmd_table(__pmd(pmd_val(pmd) | PTE_VALID));
+}
+#endif /* CONFIG_TRANSPARENT_HUGEPAGE */
+
#if defined(CONFIG_ARM64_64K_PAGES) || CONFIG_PGTABLE_LEVELS < 3
static inline bool pud_sect(pud_t pud) { return false; }
static inline bool pud_table(pud_t pud) { return true; }
@@ -605,13 +836,10 @@ static inline bool pud_table(pud_t pud) { return true; }
PUD_TYPE_TABLE)
#endif
-extern pgd_t init_pg_dir[PTRS_PER_PGD];
-extern pgd_t init_pg_end[];
-extern pgd_t swapper_pg_dir[PTRS_PER_PGD];
-extern pgd_t idmap_pg_dir[PTRS_PER_PGD];
-extern pgd_t idmap_pg_end[];
-extern pgd_t tramp_pg_dir[PTRS_PER_PGD];
-extern pgd_t reserved_pg_dir[PTRS_PER_PGD];
+extern pgd_t swapper_pg_dir[];
+extern pgd_t idmap_pg_dir[];
+extern pgd_t tramp_pg_dir[];
+extern pgd_t reserved_pg_dir[];
extern void set_swapper_pgd(pgd_t *pgdp, pgd_t pgd);
@@ -632,10 +860,8 @@ static inline void set_pmd(pmd_t *pmdp, pmd_t pmd)
WRITE_ONCE(*pmdp, pmd);
- if (pmd_valid(pmd)) {
- dsb(ishst);
- isb();
- }
+ if (pmd_valid(pmd))
+ queue_pte_barriers();
}
static inline void pmd_clear(pmd_t *pmdp)
@@ -665,40 +891,37 @@ static inline unsigned long pmd_page_vaddr(pmd_t pmd)
/* use ONLY for statically allocated translation tables */
#define pte_offset_kimg(dir,addr) ((pte_t *)__phys_to_kimg(pte_offset_phys((dir), (addr))))
-/*
- * Conversion functions: convert a page and protection to a page entry,
- * and a page entry and page directory to the page they refer to.
- */
-#define mk_pte(page,prot) pfn_pte(page_to_pfn(page),prot)
-
#if CONFIG_PGTABLE_LEVELS > 2
#define pmd_ERROR(e) \
pr_err("%s:%d: bad pmd %016llx.\n", __FILE__, __LINE__, pmd_val(e))
#define pud_none(pud) (!pud_val(pud))
-#define pud_bad(pud) (!pud_table(pud))
+#define pud_bad(pud) ((pud_val(pud) & PUD_TYPE_MASK) != \
+ PUD_TYPE_TABLE)
#define pud_present(pud) pte_present(pud_pte(pud))
+#ifndef __PAGETABLE_PMD_FOLDED
#define pud_leaf(pud) (pud_present(pud) && !pud_table(pud))
+#else
+#define pud_leaf(pud) false
+#endif
#define pud_valid(pud) pte_valid(pud_pte(pud))
#define pud_user(pud) pte_user(pud_pte(pud))
+#define pud_user_exec(pud) pte_user_exec(pud_pte(pud))
+static inline bool pgtable_l4_enabled(void);
static inline void set_pud(pud_t *pudp, pud_t pud)
{
-#ifdef __PAGETABLE_PUD_FOLDED
- if (in_swapper_pgdir(pudp)) {
+ if (!pgtable_l4_enabled() && in_swapper_pgdir(pudp)) {
set_swapper_pgd((pgd_t *)pudp, __pgd(pud_val(pud)));
return;
}
-#endif /* __PAGETABLE_PUD_FOLDED */
WRITE_ONCE(*pudp, pud);
- if (pud_valid(pud)) {
- dsb(ishst);
- isb();
- }
+ if (pud_valid(pud))
+ queue_pte_barriers();
}
static inline void pud_clear(pud_t *pudp)
@@ -730,7 +953,9 @@ static inline pmd_t *pud_pgtable(pud_t pud)
#else
+#define pud_valid(pud) false
#define pud_page_paddr(pud) ({ BUILD_BUG(); 0; })
+#define pud_user_exec(pud) pud_user(pud) /* Always 0 with folding */
/* Match pmd_offset folding in <asm/generic/pgtable-nopmd.h> */
#define pmd_set_fixmap(addr) NULL
@@ -743,12 +968,29 @@ static inline pmd_t *pud_pgtable(pud_t pud)
#if CONFIG_PGTABLE_LEVELS > 3
+static __always_inline bool pgtable_l4_enabled(void)
+{
+ if (CONFIG_PGTABLE_LEVELS > 4 || !IS_ENABLED(CONFIG_ARM64_LPA2))
+ return true;
+ if (!alternative_has_cap_likely(ARM64_ALWAYS_BOOT))
+ return vabits_actual == VA_BITS;
+ return alternative_has_cap_unlikely(ARM64_HAS_VA52);
+}
+
+static inline bool mm_pud_folded(const struct mm_struct *mm)
+{
+ return !pgtable_l4_enabled();
+}
+#define mm_pud_folded mm_pud_folded
+
#define pud_ERROR(e) \
pr_err("%s:%d: bad pud %016llx.\n", __FILE__, __LINE__, pud_val(e))
-#define p4d_none(p4d) (!p4d_val(p4d))
-#define p4d_bad(p4d) (!(p4d_val(p4d) & 2))
-#define p4d_present(p4d) (p4d_val(p4d))
+#define p4d_none(p4d) (pgtable_l4_enabled() && !p4d_val(p4d))
+#define p4d_bad(p4d) (pgtable_l4_enabled() && \
+ ((p4d_val(p4d) & P4D_TYPE_MASK) != \
+ P4D_TYPE_TABLE))
+#define p4d_present(p4d) (!p4d_none(p4d))
static inline void set_p4d(p4d_t *p4dp, p4d_t p4d)
{
@@ -758,13 +1000,13 @@ static inline void set_p4d(p4d_t *p4dp, p4d_t p4d)
}
WRITE_ONCE(*p4dp, p4d);
- dsb(ishst);
- isb();
+ queue_pte_barriers();
}
static inline void p4d_clear(p4d_t *p4dp)
{
- set_p4d(p4dp, __p4d(0));
+ if (pgtable_l4_enabled())
+ set_p4d(p4dp, __p4d(0));
}
static inline phys_addr_t p4d_page_paddr(p4d_t p4d)
@@ -772,27 +1014,78 @@ static inline phys_addr_t p4d_page_paddr(p4d_t p4d)
return __p4d_to_phys(p4d);
}
+#define pud_index(addr) (((addr) >> PUD_SHIFT) & (PTRS_PER_PUD - 1))
+
+static inline pud_t *p4d_to_folded_pud(p4d_t *p4dp, unsigned long addr)
+{
+ /* Ensure that 'p4dp' indexes a page table according to 'addr' */
+ VM_BUG_ON(((addr >> P4D_SHIFT) ^ ((u64)p4dp >> 3)) % PTRS_PER_P4D);
+
+ return (pud_t *)PTR_ALIGN_DOWN(p4dp, PAGE_SIZE) + pud_index(addr);
+}
+
static inline pud_t *p4d_pgtable(p4d_t p4d)
{
return (pud_t *)__va(p4d_page_paddr(p4d));
}
-/* Find an entry in the first-level page table. */
-#define pud_offset_phys(dir, addr) (p4d_page_paddr(READ_ONCE(*(dir))) + pud_index(addr) * sizeof(pud_t))
+static inline phys_addr_t pud_offset_phys(p4d_t *p4dp, unsigned long addr)
+{
+ BUG_ON(!pgtable_l4_enabled());
-#define pud_set_fixmap(addr) ((pud_t *)set_fixmap_offset(FIX_PUD, addr))
-#define pud_set_fixmap_offset(p4d, addr) pud_set_fixmap(pud_offset_phys(p4d, addr))
-#define pud_clear_fixmap() clear_fixmap(FIX_PUD)
+ return p4d_page_paddr(READ_ONCE(*p4dp)) + pud_index(addr) * sizeof(pud_t);
+}
-#define p4d_page(p4d) pfn_to_page(__phys_to_pfn(__p4d_to_phys(p4d)))
+static inline
+pud_t *pud_offset_lockless(p4d_t *p4dp, p4d_t p4d, unsigned long addr)
+{
+ if (!pgtable_l4_enabled())
+ return p4d_to_folded_pud(p4dp, addr);
+ return (pud_t *)__va(p4d_page_paddr(p4d)) + pud_index(addr);
+}
+#define pud_offset_lockless pud_offset_lockless
+
+static inline pud_t *pud_offset(p4d_t *p4dp, unsigned long addr)
+{
+ return pud_offset_lockless(p4dp, READ_ONCE(*p4dp), addr);
+}
+#define pud_offset pud_offset
+
+static inline pud_t *pud_set_fixmap(unsigned long addr)
+{
+ if (!pgtable_l4_enabled())
+ return NULL;
+ return (pud_t *)set_fixmap_offset(FIX_PUD, addr);
+}
+
+static inline pud_t *pud_set_fixmap_offset(p4d_t *p4dp, unsigned long addr)
+{
+ if (!pgtable_l4_enabled())
+ return p4d_to_folded_pud(p4dp, addr);
+ return pud_set_fixmap(pud_offset_phys(p4dp, addr));
+}
+
+static inline void pud_clear_fixmap(void)
+{
+ if (pgtable_l4_enabled())
+ clear_fixmap(FIX_PUD);
+}
/* use ONLY for statically allocated translation tables */
-#define pud_offset_kimg(dir,addr) ((pud_t *)__phys_to_kimg(pud_offset_phys((dir), (addr))))
+static inline pud_t *pud_offset_kimg(p4d_t *p4dp, u64 addr)
+{
+ if (!pgtable_l4_enabled())
+ return p4d_to_folded_pud(p4dp, addr);
+ return (pud_t *)__phys_to_kimg(pud_offset_phys(p4dp, addr));
+}
+
+#define p4d_page(p4d) pfn_to_page(__phys_to_pfn(__p4d_to_phys(p4d)))
#else
+static inline bool pgtable_l4_enabled(void) { return false; }
+
#define p4d_page_paddr(p4d) ({ BUILD_BUG(); 0;})
-#define pgd_page_paddr(pgd) ({ BUILD_BUG(); 0;})
/* Match pud_offset folding in <asm/generic/pgtable-nopud.h> */
#define pud_set_fixmap(addr) NULL
@@ -803,6 +1096,150 @@ static inline pud_t *p4d_pgtable(p4d_t p4d)
#endif /* CONFIG_PGTABLE_LEVELS > 3 */
+#if CONFIG_PGTABLE_LEVELS > 4
+
+static __always_inline bool pgtable_l5_enabled(void)
+{
+ if (!alternative_has_cap_likely(ARM64_ALWAYS_BOOT))
+ return vabits_actual == VA_BITS;
+ return alternative_has_cap_unlikely(ARM64_HAS_VA52);
+}
+
+static inline bool mm_p4d_folded(const struct mm_struct *mm)
+{
+ return !pgtable_l5_enabled();
+}
+#define mm_p4d_folded mm_p4d_folded
+
+#define p4d_ERROR(e) \
+ pr_err("%s:%d: bad p4d %016llx.\n", __FILE__, __LINE__, p4d_val(e))
+
+#define pgd_none(pgd) (pgtable_l5_enabled() && !pgd_val(pgd))
+#define pgd_bad(pgd) (pgtable_l5_enabled() && \
+ ((pgd_val(pgd) & PGD_TYPE_MASK) != \
+ PGD_TYPE_TABLE))
+#define pgd_present(pgd) (!pgd_none(pgd))
+
+static inline void set_pgd(pgd_t *pgdp, pgd_t pgd)
+{
+ if (in_swapper_pgdir(pgdp)) {
+ set_swapper_pgd(pgdp, __pgd(pgd_val(pgd)));
+ return;
+ }
+
+ WRITE_ONCE(*pgdp, pgd);
+ queue_pte_barriers();
+}
+
+static inline void pgd_clear(pgd_t *pgdp)
+{
+ if (pgtable_l5_enabled())
+ set_pgd(pgdp, __pgd(0));
+}
+
+static inline phys_addr_t pgd_page_paddr(pgd_t pgd)
+{
+ return __pgd_to_phys(pgd);
+}
+
+#define p4d_index(addr) (((addr) >> P4D_SHIFT) & (PTRS_PER_P4D - 1))
+
+static inline p4d_t *pgd_to_folded_p4d(pgd_t *pgdp, unsigned long addr)
+{
+ /* Ensure that 'pgdp' indexes a page table according to 'addr' */
+ VM_BUG_ON(((addr >> PGDIR_SHIFT) ^ ((u64)pgdp >> 3)) % PTRS_PER_PGD);
+
+ return (p4d_t *)PTR_ALIGN_DOWN(pgdp, PAGE_SIZE) + p4d_index(addr);
+}
+
+static inline phys_addr_t p4d_offset_phys(pgd_t *pgdp, unsigned long addr)
+{
+ BUG_ON(!pgtable_l5_enabled());
+
+ return pgd_page_paddr(READ_ONCE(*pgdp)) + p4d_index(addr) * sizeof(p4d_t);
+}
+
+static inline
+p4d_t *p4d_offset_lockless(pgd_t *pgdp, pgd_t pgd, unsigned long addr)
+{
+ if (!pgtable_l5_enabled())
+ return pgd_to_folded_p4d(pgdp, addr);
+ return (p4d_t *)__va(pgd_page_paddr(pgd)) + p4d_index(addr);
+}
+#define p4d_offset_lockless p4d_offset_lockless
+
+static inline p4d_t *p4d_offset(pgd_t *pgdp, unsigned long addr)
+{
+ return p4d_offset_lockless(pgdp, READ_ONCE(*pgdp), addr);
+}
+
+static inline p4d_t *p4d_set_fixmap(unsigned long addr)
+{
+ if (!pgtable_l5_enabled())
+ return NULL;
+ return (p4d_t *)set_fixmap_offset(FIX_P4D, addr);
+}
+
+static inline p4d_t *p4d_set_fixmap_offset(pgd_t *pgdp, unsigned long addr)
+{
+ if (!pgtable_l5_enabled())
+ return pgd_to_folded_p4d(pgdp, addr);
+ return p4d_set_fixmap(p4d_offset_phys(pgdp, addr));
+}
+
+static inline void p4d_clear_fixmap(void)
+{
+ if (pgtable_l5_enabled())
+ clear_fixmap(FIX_P4D);
+}
+
+/* use ONLY for statically allocated translation tables */
+static inline p4d_t *p4d_offset_kimg(pgd_t *pgdp, u64 addr)
+{
+ if (!pgtable_l5_enabled())
+ return pgd_to_folded_p4d(pgdp, addr);
+ return (p4d_t *)__phys_to_kimg(p4d_offset_phys(pgdp, addr));
+}
+
+#define pgd_page(pgd) pfn_to_page(__phys_to_pfn(__pgd_to_phys(pgd)))
+
+#else
+
+static inline bool pgtable_l5_enabled(void) { return false; }
+
+#define p4d_index(addr) (((addr) >> P4D_SHIFT) & (PTRS_PER_P4D - 1))
+
+/* Match p4d_offset folding in <asm/generic/pgtable-nop4d.h> */
+#define p4d_set_fixmap(addr) NULL
+#define p4d_set_fixmap_offset(p4dp, addr) ((p4d_t *)p4dp)
+#define p4d_clear_fixmap()
+
+#define p4d_offset_kimg(dir,addr) ((p4d_t *)dir)
+
+static inline
+p4d_t *p4d_offset_lockless_folded(pgd_t *pgdp, pgd_t pgd, unsigned long addr)
+{
+ /*
+ * With runtime folding of the pud, pud_offset_lockless() passes
+ * the 'pgd_t *' we return here to p4d_to_folded_pud(), which
+ * will offset the pointer assuming that it points into
+ * a page-table page. However, the fast GUP path passes us a
+ * pgd_t allocated on the stack and so we must use the original
+ * pointer in 'pgdp' to construct the p4d pointer instead of
+ * using the generic p4d_offset_lockless() implementation.
+ *
+ * Note: reusing the original pointer means that we may
+ * dereference the same (live) page-table entry multiple times.
+ * This is safe because it is still only loaded once in the
+ * context of each level and the CPU guarantees same-address
+ * read-after-read ordering.
+ */
+ return p4d_offset(pgdp, addr);
+}
+#define p4d_offset_lockless p4d_offset_lockless_folded
+
+#endif /* CONFIG_PGTABLE_LEVELS > 4 */
+
#define pgd_ERROR(e) \
pr_err("%s:%d: bad pgd %016llx.\n", __FILE__, __LINE__, pgd_val(e))
@@ -816,12 +1253,20 @@ static inline pte_t pte_modify(pte_t pte, pgprot_t newprot)
* in MAIR_EL1. The mask below has to include PTE_ATTRINDX_MASK.
*/
const pteval_t mask = PTE_USER | PTE_PXN | PTE_UXN | PTE_RDONLY |
- PTE_PROT_NONE | PTE_VALID | PTE_WRITE | PTE_GP |
- PTE_ATTRINDX_MASK;
+ PTE_PRESENT_INVALID | PTE_VALID | PTE_WRITE |
+ PTE_GP | PTE_ATTRINDX_MASK | PTE_PO_IDX_MASK;
+
/* preserve the hardware dirty information */
if (pte_hw_dirty(pte))
- pte = pte_mkdirty(pte);
+ pte = set_pte_bit(pte, __pgprot(PTE_DIRTY));
+
pte_val(pte) = (pte_val(pte) & ~mask) | (pgprot_val(newprot) & mask);
+ /*
+ * If we end up clearing hw dirtiness for a sw-dirty PTE, set hardware
+ * dirtiness again.
+ */
+ if (pte_sw_dirty(pte))
+ pte = pte_mkdirty(pte);
return pte;
}
@@ -830,8 +1275,7 @@ static inline pmd_t pmd_modify(pmd_t pmd, pgprot_t newprot)
return pte_pmd(pte_modify(pmd_pte(pmd), newprot));
}
-#define __HAVE_ARCH_PTEP_SET_ACCESS_FLAGS
-extern int ptep_set_access_flags(struct vm_area_struct *vma,
+extern int __ptep_set_access_flags(struct vm_area_struct *vma,
unsigned long address, pte_t *ptep,
pte_t entry, int dirty);
@@ -841,46 +1285,38 @@ static inline int pmdp_set_access_flags(struct vm_area_struct *vma,
unsigned long address, pmd_t *pmdp,
pmd_t entry, int dirty)
{
- return ptep_set_access_flags(vma, address, (pte_t *)pmdp, pmd_pte(entry), dirty);
-}
-
-static inline int pud_devmap(pud_t pud)
-{
- return 0;
-}
-
-static inline int pgd_devmap(pgd_t pgd)
-{
- return 0;
+ return __ptep_set_access_flags(vma, address, (pte_t *)pmdp,
+ pmd_pte(entry), dirty);
}
#endif
#ifdef CONFIG_PAGE_TABLE_CHECK
static inline bool pte_user_accessible_page(pte_t pte)
{
- return pte_present(pte) && (pte_user(pte) || pte_user_exec(pte));
+ return pte_valid(pte) && (pte_user(pte) || pte_user_exec(pte));
}
static inline bool pmd_user_accessible_page(pmd_t pmd)
{
- return pmd_present(pmd) && (pmd_user(pmd) || pmd_user_exec(pmd));
+ return pmd_valid(pmd) && !pmd_table(pmd) && (pmd_user(pmd) || pmd_user_exec(pmd));
}
static inline bool pud_user_accessible_page(pud_t pud)
{
- return pud_present(pud) && pud_user(pud);
+ return pud_valid(pud) && !pud_table(pud) && (pud_user(pud) || pud_user_exec(pud));
}
#endif
/*
* Atomic pte/pmd modifications.
*/
-#define __HAVE_ARCH_PTEP_TEST_AND_CLEAR_YOUNG
-static inline int __ptep_test_and_clear_young(pte_t *ptep)
+static inline int __ptep_test_and_clear_young(struct vm_area_struct *vma,
+ unsigned long address,
+ pte_t *ptep)
{
pte_t old_pte, pte;
- pte = READ_ONCE(*ptep);
+ pte = __ptep_get(ptep);
do {
old_pte = pte;
pte = pte_mkold(pte);
@@ -891,18 +1327,10 @@ static inline int __ptep_test_and_clear_young(pte_t *ptep)
return pte_young(pte);
}
-static inline int ptep_test_and_clear_young(struct vm_area_struct *vma,
- unsigned long address,
- pte_t *ptep)
-{
- return __ptep_test_and_clear_young(ptep);
-}
-
-#define __HAVE_ARCH_PTEP_CLEAR_YOUNG_FLUSH
-static inline int ptep_clear_flush_young(struct vm_area_struct *vma,
+static inline int __ptep_clear_flush_young(struct vm_area_struct *vma,
unsigned long address, pte_t *ptep)
{
- int young = ptep_test_and_clear_young(vma, address, ptep);
+ int young = __ptep_test_and_clear_young(vma, address, ptep);
if (young) {
/*
@@ -919,24 +1347,77 @@ static inline int ptep_clear_flush_young(struct vm_area_struct *vma,
return young;
}
-#ifdef CONFIG_TRANSPARENT_HUGEPAGE
+#if defined(CONFIG_TRANSPARENT_HUGEPAGE) || defined(CONFIG_ARCH_HAS_NONLEAF_PMD_YOUNG)
#define __HAVE_ARCH_PMDP_TEST_AND_CLEAR_YOUNG
static inline int pmdp_test_and_clear_young(struct vm_area_struct *vma,
unsigned long address,
pmd_t *pmdp)
{
- return ptep_test_and_clear_young(vma, address, (pte_t *)pmdp);
+ /* Operation applies to PMD table entry only if FEAT_HAFT is enabled */
+ VM_WARN_ON(pmd_table(READ_ONCE(*pmdp)) && !system_supports_haft());
+ return __ptep_test_and_clear_young(vma, address, (pte_t *)pmdp);
}
-#endif /* CONFIG_TRANSPARENT_HUGEPAGE */
+#endif /* CONFIG_TRANSPARENT_HUGEPAGE || CONFIG_ARCH_HAS_NONLEAF_PMD_YOUNG */
-#define __HAVE_ARCH_PTEP_GET_AND_CLEAR
-static inline pte_t ptep_get_and_clear(struct mm_struct *mm,
- unsigned long address, pte_t *ptep)
+static inline pte_t __ptep_get_and_clear_anysz(struct mm_struct *mm,
+ pte_t *ptep,
+ unsigned long pgsize)
{
pte_t pte = __pte(xchg_relaxed(&pte_val(*ptep), 0));
- page_table_check_pte_clear(mm, address, pte);
+ switch (pgsize) {
+ case PAGE_SIZE:
+ page_table_check_pte_clear(mm, pte);
+ break;
+ case PMD_SIZE:
+ page_table_check_pmd_clear(mm, pte_pmd(pte));
+ break;
+#ifndef __PAGETABLE_PMD_FOLDED
+ case PUD_SIZE:
+ page_table_check_pud_clear(mm, pte_pud(pte));
+ break;
+#endif
+ default:
+ VM_WARN_ON(1);
+ }
+
+ return pte;
+}
+
+static inline pte_t __ptep_get_and_clear(struct mm_struct *mm,
+ unsigned long address, pte_t *ptep)
+{
+ return __ptep_get_and_clear_anysz(mm, ptep, PAGE_SIZE);
+}
+
+static inline void __clear_full_ptes(struct mm_struct *mm, unsigned long addr,
+ pte_t *ptep, unsigned int nr, int full)
+{
+ for (;;) {
+ __ptep_get_and_clear(mm, addr, ptep);
+ if (--nr == 0)
+ break;
+ ptep++;
+ addr += PAGE_SIZE;
+ }
+}
+
+static inline pte_t __get_and_clear_full_ptes(struct mm_struct *mm,
+ unsigned long addr, pte_t *ptep,
+ unsigned int nr, int full)
+{
+ pte_t pte, tmp_pte;
+ pte = __ptep_get_and_clear(mm, addr, ptep);
+ while (--nr) {
+ ptep++;
+ addr += PAGE_SIZE;
+ tmp_pte = __ptep_get_and_clear(mm, addr, ptep);
+ if (pte_dirty(tmp_pte))
+ pte = pte_mkdirty(pte);
+ if (pte_young(tmp_pte))
+ pte = pte_mkyoung(pte);
+ }
return pte;
}
@@ -945,45 +1426,96 @@ static inline pte_t ptep_get_and_clear(struct mm_struct *mm,
static inline pmd_t pmdp_huge_get_and_clear(struct mm_struct *mm,
unsigned long address, pmd_t *pmdp)
{
- pmd_t pmd = __pmd(xchg_relaxed(&pmd_val(*pmdp), 0));
+ return pte_pmd(__ptep_get_and_clear_anysz(mm, (pte_t *)pmdp, PMD_SIZE));
+}
+#endif /* CONFIG_TRANSPARENT_HUGEPAGE */
- page_table_check_pmd_clear(mm, address, pmd);
+static inline void ___ptep_set_wrprotect(struct mm_struct *mm,
+ unsigned long address, pte_t *ptep,
+ pte_t pte)
+{
+ pte_t old_pte;
- return pmd;
+ do {
+ old_pte = pte;
+ pte = pte_wrprotect(pte);
+ pte_val(pte) = cmpxchg_relaxed(&pte_val(*ptep),
+ pte_val(old_pte), pte_val(pte));
+ } while (pte_val(pte) != pte_val(old_pte));
}
-#endif /* CONFIG_TRANSPARENT_HUGEPAGE */
/*
- * ptep_set_wrprotect - mark read-only while trasferring potential hardware
+ * __ptep_set_wrprotect - mark read-only while transferring potential hardware
* dirty status (PTE_DBM && !PTE_RDONLY) to the software PTE_DIRTY bit.
*/
-#define __HAVE_ARCH_PTEP_SET_WRPROTECT
-static inline void ptep_set_wrprotect(struct mm_struct *mm, unsigned long address, pte_t *ptep)
+static inline void __ptep_set_wrprotect(struct mm_struct *mm,
+ unsigned long address, pte_t *ptep)
{
- pte_t old_pte, pte;
+ ___ptep_set_wrprotect(mm, address, ptep, __ptep_get(ptep));
+}
+
+static inline void __wrprotect_ptes(struct mm_struct *mm, unsigned long address,
+ pte_t *ptep, unsigned int nr)
+{
+ unsigned int i;
+
+ for (i = 0; i < nr; i++, address += PAGE_SIZE, ptep++)
+ __ptep_set_wrprotect(mm, address, ptep);
+}
+
+static inline void __clear_young_dirty_pte(struct vm_area_struct *vma,
+ unsigned long addr, pte_t *ptep,
+ pte_t pte, cydp_t flags)
+{
+ pte_t old_pte;
- pte = READ_ONCE(*ptep);
do {
old_pte = pte;
- pte = pte_wrprotect(pte);
+
+ if (flags & CYDP_CLEAR_YOUNG)
+ pte = pte_mkold(pte);
+ if (flags & CYDP_CLEAR_DIRTY)
+ pte = pte_mkclean(pte);
+
pte_val(pte) = cmpxchg_relaxed(&pte_val(*ptep),
pte_val(old_pte), pte_val(pte));
} while (pte_val(pte) != pte_val(old_pte));
}
+static inline void __clear_young_dirty_ptes(struct vm_area_struct *vma,
+ unsigned long addr, pte_t *ptep,
+ unsigned int nr, cydp_t flags)
+{
+ pte_t pte;
+
+ for (;;) {
+ pte = __ptep_get(ptep);
+
+ if (flags == (CYDP_CLEAR_YOUNG | CYDP_CLEAR_DIRTY))
+ __set_pte(ptep, pte_mkclean(pte_mkold(pte)));
+ else
+ __clear_young_dirty_pte(vma, addr, ptep, pte, flags);
+
+ if (--nr == 0)
+ break;
+ ptep++;
+ addr += PAGE_SIZE;
+ }
+}
+
#ifdef CONFIG_TRANSPARENT_HUGEPAGE
#define __HAVE_ARCH_PMDP_SET_WRPROTECT
static inline void pmdp_set_wrprotect(struct mm_struct *mm,
unsigned long address, pmd_t *pmdp)
{
- ptep_set_wrprotect(mm, address, (pte_t *)pmdp);
+ __ptep_set_wrprotect(mm, address, (pte_t *)pmdp);
}
#define pmdp_establish pmdp_establish
static inline pmd_t pmdp_establish(struct vm_area_struct *vma,
unsigned long address, pmd_t *pmdp, pmd_t pmd)
{
- page_table_check_pmd_set(vma->vm_mm, address, pmdp, pmd);
+ page_table_check_pmd_set(vma->vm_mm, pmdp, pmd);
return __pmd(xchg_relaxed(&pmd_val(*pmdp), pmd_val(pmd)));
}
#endif
@@ -992,15 +1524,16 @@ static inline pmd_t pmdp_establish(struct vm_area_struct *vma,
* Encode and decode a swap entry:
* bits 0-1: present (must be zero)
* bits 2: remember PG_anon_exclusive
- * bits 3-7: swap type
- * bits 8-57: swap offset
- * bit 58: PTE_PROT_NONE (must be zero)
+ * bit 3: remember uffd-wp state
+ * bits 6-10: swap type
+ * bit 11: PTE_PRESENT_INVALID (must be zero)
+ * bits 12-61: swap offset
*/
-#define __SWP_TYPE_SHIFT 3
+#define __SWP_TYPE_SHIFT 6
#define __SWP_TYPE_BITS 5
-#define __SWP_OFFSET_BITS 50
#define __SWP_TYPE_MASK ((1 << __SWP_TYPE_BITS) - 1)
-#define __SWP_OFFSET_SHIFT (__SWP_TYPE_BITS + __SWP_TYPE_SHIFT)
+#define __SWP_OFFSET_SHIFT 12
+#define __SWP_OFFSET_BITS 50
#define __SWP_OFFSET_MASK ((1UL << __SWP_OFFSET_BITS) - 1)
#define __swp_type(x) (((x).val >> __SWP_TYPE_SHIFT) & __SWP_TYPE_MASK)
@@ -1021,17 +1554,10 @@ static inline pmd_t pmdp_establish(struct vm_area_struct *vma,
*/
#define MAX_SWAPFILES_CHECK() BUILD_BUG_ON(MAX_SWAPFILES_SHIFT > __SWP_TYPE_BITS)
-extern int kern_addr_valid(unsigned long addr);
-
#ifdef CONFIG_ARM64_MTE
#define __HAVE_ARCH_PREPARE_TO_SWAP
-static inline int arch_prepare_to_swap(struct page *page)
-{
- if (system_supports_mte())
- return mte_save_tags(page);
- return 0;
-}
+extern int arch_prepare_to_swap(struct folio *folio);
#define __HAVE_ARCH_SWAP_INVALIDATE
static inline void arch_swap_invalidate_page(int type, pgoff_t offset)
@@ -1047,19 +1573,16 @@ static inline void arch_swap_invalidate_area(int type)
}
#define __HAVE_ARCH_SWAP_RESTORE
-static inline void arch_swap_restore(swp_entry_t entry, struct folio *folio)
-{
- if (system_supports_mte() && mte_restore_tags(entry, &folio->page))
- set_bit(PG_mte_tagged, &folio->flags);
-}
+extern void arch_swap_restore(swp_entry_t entry, struct folio *folio);
#endif /* CONFIG_ARM64_MTE */
/*
- * On AArch64, the cache coherency is handled via the set_pte_at() function.
+ * On AArch64, the cache coherency is handled via the __set_ptes() function.
*/
-static inline void update_mmu_cache(struct vm_area_struct *vma,
- unsigned long addr, pte_t *ptep)
+static inline void update_mmu_cache_range(struct vm_fault *vmf,
+ struct vm_area_struct *vma, unsigned long addr, pte_t *ptep,
+ unsigned int nr)
{
/*
* We don't do anything here, so there's a very small chance of
@@ -1068,6 +1591,8 @@ static inline void update_mmu_cache(struct vm_area_struct *vma,
*/
}
+#define update_mmu_cache(vma, addr, ptep) \
+ update_mmu_cache_range(NULL, vma, addr, ptep, 1)
#define update_mmu_cache_pmd(vma, address, pmd) do { } while (0)
#ifdef CONFIG_ARM64_PA_BITS_52
@@ -1082,24 +1607,25 @@ static inline void update_mmu_cache(struct vm_area_struct *vma,
* page after fork() + CoW for pfn mappings. We don't always have a
* hardware-managed access flag on arm64.
*/
-static inline bool arch_faults_on_old_pte(void)
-{
- /* The register read below requires a stable CPU to make any sense */
- cant_migrate();
+#define arch_has_hw_pte_young cpu_has_hw_af
- return !cpu_has_hw_af();
-}
-#define arch_faults_on_old_pte arch_faults_on_old_pte
+#ifdef CONFIG_ARCH_HAS_NONLEAF_PMD_YOUNG
+#define arch_has_hw_nonleaf_pmd_young system_supports_haft
+#endif
/*
* Experimentally, it's cheap to set the access flag in hardware and we
* benefit from prefaulting mappings as 'old' to start with.
*/
-static inline bool arch_wants_old_prefaulted_pte(void)
-{
- return !arch_faults_on_old_pte();
-}
-#define arch_wants_old_prefaulted_pte arch_wants_old_prefaulted_pte
+#define arch_wants_old_prefaulted_pte cpu_has_hw_af
+
+/*
+ * Request exec memory is read into pagecache in at least 64K folios. This size
+ * can be contpte-mapped when 4K base pages are in use (16 pages into 1 iTLB
+ * entry), and HPA can coalesce it (4 pages into 1 TLB entry) when 16K base
+ * pages are in use.
+ */
+#define exec_folio_order() ilog2(SZ_64K >> PAGE_SHIFT)
static inline bool pud_sect_supported(void)
{
@@ -1107,6 +1633,316 @@ static inline bool pud_sect_supported(void)
}
+#define __HAVE_ARCH_PTEP_MODIFY_PROT_TRANSACTION
+#define ptep_modify_prot_start ptep_modify_prot_start
+extern pte_t ptep_modify_prot_start(struct vm_area_struct *vma,
+ unsigned long addr, pte_t *ptep);
+
+#define ptep_modify_prot_commit ptep_modify_prot_commit
+extern void ptep_modify_prot_commit(struct vm_area_struct *vma,
+ unsigned long addr, pte_t *ptep,
+ pte_t old_pte, pte_t new_pte);
+
+#define modify_prot_start_ptes modify_prot_start_ptes
+extern pte_t modify_prot_start_ptes(struct vm_area_struct *vma,
+ unsigned long addr, pte_t *ptep,
+ unsigned int nr);
+
+#define modify_prot_commit_ptes modify_prot_commit_ptes
+extern void modify_prot_commit_ptes(struct vm_area_struct *vma, unsigned long addr,
+ pte_t *ptep, pte_t old_pte, pte_t pte,
+ unsigned int nr);
+
+#ifdef CONFIG_ARM64_CONTPTE
+
+/*
+ * The contpte APIs are used to transparently manage the contiguous bit in ptes
+ * where it is possible and makes sense to do so. The PTE_CONT bit is considered
+ * a private implementation detail of the public ptep API (see below).
+ */
+extern void __contpte_try_fold(struct mm_struct *mm, unsigned long addr,
+ pte_t *ptep, pte_t pte);
+extern void __contpte_try_unfold(struct mm_struct *mm, unsigned long addr,
+ pte_t *ptep, pte_t pte);
+extern pte_t contpte_ptep_get(pte_t *ptep, pte_t orig_pte);
+extern pte_t contpte_ptep_get_lockless(pte_t *orig_ptep);
+extern void contpte_set_ptes(struct mm_struct *mm, unsigned long addr,
+ pte_t *ptep, pte_t pte, unsigned int nr);
+extern void contpte_clear_full_ptes(struct mm_struct *mm, unsigned long addr,
+ pte_t *ptep, unsigned int nr, int full);
+extern pte_t contpte_get_and_clear_full_ptes(struct mm_struct *mm,
+ unsigned long addr, pte_t *ptep,
+ unsigned int nr, int full);
+extern int contpte_ptep_test_and_clear_young(struct vm_area_struct *vma,
+ unsigned long addr, pte_t *ptep);
+extern int contpte_ptep_clear_flush_young(struct vm_area_struct *vma,
+ unsigned long addr, pte_t *ptep);
+extern void contpte_wrprotect_ptes(struct mm_struct *mm, unsigned long addr,
+ pte_t *ptep, unsigned int nr);
+extern int contpte_ptep_set_access_flags(struct vm_area_struct *vma,
+ unsigned long addr, pte_t *ptep,
+ pte_t entry, int dirty);
+extern void contpte_clear_young_dirty_ptes(struct vm_area_struct *vma,
+ unsigned long addr, pte_t *ptep,
+ unsigned int nr, cydp_t flags);
+
+static __always_inline void contpte_try_fold(struct mm_struct *mm,
+ unsigned long addr, pte_t *ptep, pte_t pte)
+{
+ /*
+ * Only bother trying if both the virtual and physical addresses are
+ * aligned and correspond to the last entry in a contig range. The core
+ * code mostly modifies ranges from low to high, so this is the likely
+ * the last modification in the contig range, so a good time to fold.
+ * We can't fold special mappings, because there is no associated folio.
+ */
+
+ const unsigned long contmask = CONT_PTES - 1;
+ bool valign = ((addr >> PAGE_SHIFT) & contmask) == contmask;
+
+ if (unlikely(valign)) {
+ bool palign = (pte_pfn(pte) & contmask) == contmask;
+
+ if (unlikely(palign &&
+ pte_valid(pte) && !pte_cont(pte) && !pte_special(pte)))
+ __contpte_try_fold(mm, addr, ptep, pte);
+ }
+}
+
+static __always_inline void contpte_try_unfold(struct mm_struct *mm,
+ unsigned long addr, pte_t *ptep, pte_t pte)
+{
+ if (unlikely(pte_valid_cont(pte)))
+ __contpte_try_unfold(mm, addr, ptep, pte);
+}
+
+#define pte_batch_hint pte_batch_hint
+static inline unsigned int pte_batch_hint(pte_t *ptep, pte_t pte)
+{
+ if (!pte_valid_cont(pte))
+ return 1;
+
+ return CONT_PTES - (((unsigned long)ptep >> 3) & (CONT_PTES - 1));
+}
+
+/*
+ * The below functions constitute the public API that arm64 presents to the
+ * core-mm to manipulate PTE entries within their page tables (or at least this
+ * is the subset of the API that arm64 needs to implement). These public
+ * versions will automatically and transparently apply the contiguous bit where
+ * it makes sense to do so. Therefore any users that are contig-aware (e.g.
+ * hugetlb, kernel mapper) should NOT use these APIs, but instead use the
+ * private versions, which are prefixed with double underscore. All of these
+ * APIs except for ptep_get_lockless() are expected to be called with the PTL
+ * held. Although the contiguous bit is considered private to the
+ * implementation, it is deliberately allowed to leak through the getters (e.g.
+ * ptep_get()), back to core code. This is required so that pte_leaf_size() can
+ * provide an accurate size for perf_get_pgtable_size(). But this leakage means
+ * its possible a pte will be passed to a setter with the contiguous bit set, so
+ * we explicitly clear the contiguous bit in those cases to prevent accidentally
+ * setting it in the pgtable.
+ */
+
+#define ptep_get ptep_get
+static inline pte_t ptep_get(pte_t *ptep)
+{
+ pte_t pte = __ptep_get(ptep);
+
+ if (likely(!pte_valid_cont(pte)))
+ return pte;
+
+ return contpte_ptep_get(ptep, pte);
+}
+
+#define ptep_get_lockless ptep_get_lockless
+static inline pte_t ptep_get_lockless(pte_t *ptep)
+{
+ pte_t pte = __ptep_get(ptep);
+
+ if (likely(!pte_valid_cont(pte)))
+ return pte;
+
+ return contpte_ptep_get_lockless(ptep);
+}
+
+static inline void set_pte(pte_t *ptep, pte_t pte)
+{
+ /*
+ * We don't have the mm or vaddr so cannot unfold contig entries (since
+ * it requires tlb maintenance). set_pte() is not used in core code, so
+ * this should never even be called. Regardless do our best to service
+ * any call and emit a warning if there is any attempt to set a pte on
+ * top of an existing contig range.
+ */
+ pte_t orig_pte = __ptep_get(ptep);
+
+ WARN_ON_ONCE(pte_valid_cont(orig_pte));
+ __set_pte(ptep, pte_mknoncont(pte));
+}
+
+#define set_ptes set_ptes
+static __always_inline void set_ptes(struct mm_struct *mm, unsigned long addr,
+ pte_t *ptep, pte_t pte, unsigned int nr)
+{
+ pte = pte_mknoncont(pte);
+
+ if (likely(nr == 1)) {
+ contpte_try_unfold(mm, addr, ptep, __ptep_get(ptep));
+ __set_ptes(mm, addr, ptep, pte, 1);
+ contpte_try_fold(mm, addr, ptep, pte);
+ } else {
+ contpte_set_ptes(mm, addr, ptep, pte, nr);
+ }
+}
+
+static inline void pte_clear(struct mm_struct *mm,
+ unsigned long addr, pte_t *ptep)
+{
+ contpte_try_unfold(mm, addr, ptep, __ptep_get(ptep));
+ __pte_clear(mm, addr, ptep);
+}
+
+#define clear_full_ptes clear_full_ptes
+static inline void clear_full_ptes(struct mm_struct *mm, unsigned long addr,
+ pte_t *ptep, unsigned int nr, int full)
+{
+ if (likely(nr == 1)) {
+ contpte_try_unfold(mm, addr, ptep, __ptep_get(ptep));
+ __clear_full_ptes(mm, addr, ptep, nr, full);
+ } else {
+ contpte_clear_full_ptes(mm, addr, ptep, nr, full);
+ }
+}
+
+#define get_and_clear_full_ptes get_and_clear_full_ptes
+static inline pte_t get_and_clear_full_ptes(struct mm_struct *mm,
+ unsigned long addr, pte_t *ptep,
+ unsigned int nr, int full)
+{
+ pte_t pte;
+
+ if (likely(nr == 1)) {
+ contpte_try_unfold(mm, addr, ptep, __ptep_get(ptep));
+ pte = __get_and_clear_full_ptes(mm, addr, ptep, nr, full);
+ } else {
+ pte = contpte_get_and_clear_full_ptes(mm, addr, ptep, nr, full);
+ }
+
+ return pte;
+}
+
+#define __HAVE_ARCH_PTEP_GET_AND_CLEAR
+static inline pte_t ptep_get_and_clear(struct mm_struct *mm,
+ unsigned long addr, pte_t *ptep)
+{
+ contpte_try_unfold(mm, addr, ptep, __ptep_get(ptep));
+ return __ptep_get_and_clear(mm, addr, ptep);
+}
+
+#define __HAVE_ARCH_PTEP_TEST_AND_CLEAR_YOUNG
+static inline int ptep_test_and_clear_young(struct vm_area_struct *vma,
+ unsigned long addr, pte_t *ptep)
+{
+ pte_t orig_pte = __ptep_get(ptep);
+
+ if (likely(!pte_valid_cont(orig_pte)))
+ return __ptep_test_and_clear_young(vma, addr, ptep);
+
+ return contpte_ptep_test_and_clear_young(vma, addr, ptep);
+}
+
+#define __HAVE_ARCH_PTEP_CLEAR_YOUNG_FLUSH
+static inline int ptep_clear_flush_young(struct vm_area_struct *vma,
+ unsigned long addr, pte_t *ptep)
+{
+ pte_t orig_pte = __ptep_get(ptep);
+
+ if (likely(!pte_valid_cont(orig_pte)))
+ return __ptep_clear_flush_young(vma, addr, ptep);
+
+ return contpte_ptep_clear_flush_young(vma, addr, ptep);
+}
+
+#define wrprotect_ptes wrprotect_ptes
+static __always_inline void wrprotect_ptes(struct mm_struct *mm,
+ unsigned long addr, pte_t *ptep, unsigned int nr)
+{
+ if (likely(nr == 1)) {
+ /*
+ * Optimization: wrprotect_ptes() can only be called for present
+ * ptes so we only need to check contig bit as condition for
+ * unfold, and we can remove the contig bit from the pte we read
+ * to avoid re-reading. This speeds up fork() which is sensitive
+ * for order-0 folios. Equivalent to contpte_try_unfold().
+ */
+ pte_t orig_pte = __ptep_get(ptep);
+
+ if (unlikely(pte_cont(orig_pte))) {
+ __contpte_try_unfold(mm, addr, ptep, orig_pte);
+ orig_pte = pte_mknoncont(orig_pte);
+ }
+ ___ptep_set_wrprotect(mm, addr, ptep, orig_pte);
+ } else {
+ contpte_wrprotect_ptes(mm, addr, ptep, nr);
+ }
+}
+
+#define __HAVE_ARCH_PTEP_SET_WRPROTECT
+static inline void ptep_set_wrprotect(struct mm_struct *mm,
+ unsigned long addr, pte_t *ptep)
+{
+ wrprotect_ptes(mm, addr, ptep, 1);
+}
+
+#define __HAVE_ARCH_PTEP_SET_ACCESS_FLAGS
+static inline int ptep_set_access_flags(struct vm_area_struct *vma,
+ unsigned long addr, pte_t *ptep,
+ pte_t entry, int dirty)
+{
+ pte_t orig_pte = __ptep_get(ptep);
+
+ entry = pte_mknoncont(entry);
+
+ if (likely(!pte_valid_cont(orig_pte)))
+ return __ptep_set_access_flags(vma, addr, ptep, entry, dirty);
+
+ return contpte_ptep_set_access_flags(vma, addr, ptep, entry, dirty);
+}
+
+#define clear_young_dirty_ptes clear_young_dirty_ptes
+static inline void clear_young_dirty_ptes(struct vm_area_struct *vma,
+ unsigned long addr, pte_t *ptep,
+ unsigned int nr, cydp_t flags)
+{
+ if (likely(nr == 1 && !pte_cont(__ptep_get(ptep))))
+ __clear_young_dirty_ptes(vma, addr, ptep, nr, flags);
+ else
+ contpte_clear_young_dirty_ptes(vma, addr, ptep, nr, flags);
+}
+
+#else /* CONFIG_ARM64_CONTPTE */
+
+#define ptep_get __ptep_get
+#define set_pte __set_pte
+#define set_ptes __set_ptes
+#define pte_clear __pte_clear
+#define clear_full_ptes __clear_full_ptes
+#define get_and_clear_full_ptes __get_and_clear_full_ptes
+#define __HAVE_ARCH_PTEP_GET_AND_CLEAR
+#define ptep_get_and_clear __ptep_get_and_clear
+#define __HAVE_ARCH_PTEP_TEST_AND_CLEAR_YOUNG
+#define ptep_test_and_clear_young __ptep_test_and_clear_young
+#define __HAVE_ARCH_PTEP_CLEAR_YOUNG_FLUSH
+#define ptep_clear_flush_young __ptep_clear_flush_young
+#define __HAVE_ARCH_PTEP_SET_WRPROTECT
+#define ptep_set_wrprotect __ptep_set_wrprotect
+#define wrprotect_ptes __wrprotect_ptes
+#define __HAVE_ARCH_PTEP_SET_ACCESS_FLAGS
+#define ptep_set_access_flags __ptep_set_access_flags
+#define clear_young_dirty_ptes __clear_young_dirty_ptes
+
+#endif /* CONFIG_ARM64_CONTPTE */
+
#endif /* !__ASSEMBLY__ */
#endif /* __ASM_PGTABLE_H */
diff --git a/arch/arm64/include/asm/pkeys.h b/arch/arm64/include/asm/pkeys.h
new file mode 100644
index 000000000000..0ca5f83ce148
--- /dev/null
+++ b/arch/arm64/include/asm/pkeys.h
@@ -0,0 +1,106 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Copyright (C) 2023 Arm Ltd.
+ *
+ * Based on arch/x86/include/asm/pkeys.h
+ */
+
+#ifndef _ASM_ARM64_PKEYS_H
+#define _ASM_ARM64_PKEYS_H
+
+#define ARCH_VM_PKEY_FLAGS (VM_PKEY_BIT0 | VM_PKEY_BIT1 | VM_PKEY_BIT2)
+
+#define arch_max_pkey() 8
+
+int arch_set_user_pkey_access(struct task_struct *tsk, int pkey,
+ unsigned long init_val);
+
+static inline bool arch_pkeys_enabled(void)
+{
+ return system_supports_poe();
+}
+
+static inline int vma_pkey(struct vm_area_struct *vma)
+{
+ return (vma->vm_flags & ARCH_VM_PKEY_FLAGS) >> VM_PKEY_SHIFT;
+}
+
+static inline int arch_override_mprotect_pkey(struct vm_area_struct *vma,
+ int prot, int pkey)
+{
+ if (pkey != -1)
+ return pkey;
+
+ return vma_pkey(vma);
+}
+
+static inline int execute_only_pkey(struct mm_struct *mm)
+{
+ // Execute-only mappings are handled by EPAN/FEAT_PAN3.
+ return -1;
+}
+
+#define mm_pkey_allocation_map(mm) (mm)->context.pkey_allocation_map
+#define mm_set_pkey_allocated(mm, pkey) do { \
+ mm_pkey_allocation_map(mm) |= (1U << pkey); \
+} while (0)
+#define mm_set_pkey_free(mm, pkey) do { \
+ mm_pkey_allocation_map(mm) &= ~(1U << pkey); \
+} while (0)
+
+static inline bool mm_pkey_is_allocated(struct mm_struct *mm, int pkey)
+{
+ /*
+ * "Allocated" pkeys are those that have been returned
+ * from pkey_alloc() or pkey 0 which is allocated
+ * implicitly when the mm is created.
+ */
+ if (pkey < 0 || pkey >= arch_max_pkey())
+ return false;
+
+ return mm_pkey_allocation_map(mm) & (1U << pkey);
+}
+
+/*
+ * Returns a positive, 3-bit key on success, or -1 on failure.
+ */
+static inline int mm_pkey_alloc(struct mm_struct *mm)
+{
+ /*
+ * Note: this is the one and only place we make sure
+ * that the pkey is valid as far as the hardware is
+ * concerned. The rest of the kernel trusts that
+ * only good, valid pkeys come out of here.
+ */
+ u8 all_pkeys_mask = GENMASK(arch_max_pkey() - 1, 0);
+ int ret;
+
+ if (!arch_pkeys_enabled())
+ return -1;
+
+ /*
+ * Are we out of pkeys? We must handle this specially
+ * because ffz() behavior is undefined if there are no
+ * zeros.
+ */
+ if (mm_pkey_allocation_map(mm) == all_pkeys_mask)
+ return -1;
+
+ ret = ffz(mm_pkey_allocation_map(mm));
+
+ mm_set_pkey_allocated(mm, ret);
+
+ return ret;
+}
+
+static inline int mm_pkey_free(struct mm_struct *mm, int pkey)
+{
+ if (!mm_pkey_is_allocated(mm, pkey))
+ return -EINVAL;
+
+ mm_set_pkey_free(mm, pkey);
+
+ return 0;
+}
+
+#endif /* _ASM_ARM64_PKEYS_H */
diff --git a/arch/arm64/include/asm/pointer_auth.h b/arch/arm64/include/asm/pointer_auth.h
index efb098de3a84..d2e0306e65d3 100644
--- a/arch/arm64/include/asm/pointer_auth.h
+++ b/arch/arm64/include/asm/pointer_auth.h
@@ -10,6 +10,13 @@
#include <asm/memory.h>
#include <asm/sysreg.h>
+/*
+ * The EL0/EL1 pointer bits used by a pointer authentication code.
+ * This is dependent on TBI0/TBI1 being enabled, or bits 63:56 would also apply.
+ */
+#define ptrauth_user_pac_mask() GENMASK_ULL(54, vabits_actual)
+#define ptrauth_kernel_pac_mask() GENMASK_ULL(63, vabits_actual)
+
#define PR_PAC_ENABLED_KEYS_MASK \
(PR_PAC_APIAKEY | PR_PAC_APIBKEY | PR_PAC_APDAKEY | PR_PAC_APDBKEY)
@@ -97,11 +104,6 @@ extern int ptrauth_set_enabled_keys(struct task_struct *tsk, unsigned long keys,
unsigned long enabled);
extern int ptrauth_get_enabled_keys(struct task_struct *tsk);
-static inline unsigned long ptrauth_strip_insn_pac(unsigned long ptr)
-{
- return ptrauth_clear_pac(ptr);
-}
-
static __always_inline void ptrauth_enable(void)
{
if (!system_supports_address_auth())
@@ -133,7 +135,6 @@ static __always_inline void ptrauth_enable(void)
#define ptrauth_prctl_reset_keys(tsk, arg) (-EINVAL)
#define ptrauth_set_enabled_keys(tsk, keys, enabled) (-EINVAL)
#define ptrauth_get_enabled_keys(tsk) (-EINVAL)
-#define ptrauth_strip_insn_pac(lr) (lr)
#define ptrauth_suspend_exit()
#define ptrauth_thread_init_user()
#define ptrauth_thread_switch_user(tsk)
diff --git a/arch/arm64/include/asm/por.h b/arch/arm64/include/asm/por.h
new file mode 100644
index 000000000000..d913d5b529e4
--- /dev/null
+++ b/arch/arm64/include/asm/por.h
@@ -0,0 +1,34 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Copyright (C) 2023 Arm Ltd.
+ */
+
+#ifndef _ASM_ARM64_POR_H
+#define _ASM_ARM64_POR_H
+
+#include <asm/sysreg.h>
+
+#define POR_EL0_INIT POR_ELx_PERM_PREP(0, POE_RWX)
+
+static inline bool por_elx_allows_read(u64 por, u8 pkey)
+{
+ u8 perm = POR_ELx_PERM_GET(pkey, por);
+
+ return perm & POE_R;
+}
+
+static inline bool por_elx_allows_write(u64 por, u8 pkey)
+{
+ u8 perm = POR_ELx_PERM_GET(pkey, por);
+
+ return perm & POE_W;
+}
+
+static inline bool por_elx_allows_exec(u64 por, u8 pkey)
+{
+ u8 perm = POR_ELx_PERM_GET(pkey, por);
+
+ return perm & POE_X;
+}
+
+#endif /* _ASM_ARM64_POR_H */
diff --git a/arch/arm64/include/asm/probes.h b/arch/arm64/include/asm/probes.h
index 006946745352..d49368886309 100644
--- a/arch/arm64/include/asm/probes.h
+++ b/arch/arm64/include/asm/probes.h
@@ -9,21 +9,18 @@
#include <asm/insn.h>
-typedef u32 probe_opcode_t;
typedef void (probes_handler_t) (u32 opcode, long addr, struct pt_regs *);
-/* architecture specific copy of original instruction */
struct arch_probe_insn {
- probe_opcode_t *insn;
- pstate_check_t *pstate_cc;
probes_handler_t *handler;
- /* restore address after step xol */
- unsigned long restore;
};
#ifdef CONFIG_KPROBES
-typedef u32 kprobe_opcode_t;
+typedef __le32 kprobe_opcode_t;
struct arch_specific_insn {
struct arch_probe_insn api;
+ kprobe_opcode_t *xol_insn;
+ /* restore address after step xol */
+ unsigned long xol_restore;
};
#endif
diff --git a/arch/arm64/include/asm/processor.h b/arch/arm64/include/asm/processor.h
index 86eb0bfe3b38..61d62bfd5a7b 100644
--- a/arch/arm64/include/asm/processor.h
+++ b/arch/arm64/include/asm/processor.h
@@ -23,6 +23,8 @@
#define MTE_CTRL_TCF_ASYNC (1UL << 17)
#define MTE_CTRL_TCF_ASYMM (1UL << 18)
+#define MTE_CTRL_STORE_ONLY (1UL << 19)
+
#ifndef __ASSEMBLY__
#include <linux/build_bug.h>
@@ -122,6 +124,12 @@ enum vec_type {
ARM64_VEC_MAX,
};
+enum fp_type {
+ FP_STATE_CURRENT, /* Save based on current task state. */
+ FP_STATE_FPSIMD,
+ FP_STATE_SVE,
+};
+
struct cpu_context {
unsigned long x19;
unsigned long x20;
@@ -149,17 +157,23 @@ struct thread_struct {
struct {
unsigned long tp_value; /* TLS register */
unsigned long tp2_value;
+ u64 fpmr;
+ unsigned long pad;
struct user_fpsimd_state fpsimd_state;
} uw;
+ enum fp_type fp_type; /* registers FPSIMD or SVE? */
unsigned int fpsimd_cpu;
void *sve_state; /* SVE registers, if any */
- void *za_state; /* ZA register, if any */
+ void *sme_state; /* ZA and ZT state, if any */
unsigned int vl[ARM64_VEC_MAX]; /* vector length */
unsigned int vl_onexec[ARM64_VEC_MAX]; /* vl after next exec */
unsigned long fault_address; /* fault info */
unsigned long fault_code; /* ESR_EL1 value */
struct debug_info debug; /* debugging */
+
+ struct user_fpsimd_state kernel_fpsimd_state;
+ unsigned int kernel_fpsimd_cpu;
#ifdef CONFIG_ARM64_PTR_AUTH
struct ptrauth_keys_user keys_user;
#ifdef CONFIG_ARM64_PTR_AUTH_KERNEL
@@ -172,6 +186,14 @@ struct thread_struct {
u64 sctlr_user;
u64 svcr;
u64 tpidr2_el0;
+ u64 por_el0;
+#ifdef CONFIG_ARM64_GCS
+ unsigned int gcs_el0_mode;
+ unsigned int gcs_el0_locked;
+ u64 gcspr_el0;
+ u64 gcs_base;
+ u64 gcs_size;
+#endif
};
static inline unsigned int thread_get_vl(struct thread_struct *thread,
@@ -243,6 +265,8 @@ static inline void arch_thread_struct_whitelist(unsigned long *offset,
BUILD_BUG_ON(sizeof_field(struct thread_struct, uw) !=
sizeof_field(struct thread_struct, uw.tp_value) +
sizeof_field(struct thread_struct, uw.tp2_value) +
+ sizeof_field(struct thread_struct, uw.fpmr) +
+ sizeof_field(struct thread_struct, uw.pad) +
sizeof_field(struct thread_struct, uw.fpsimd_state));
*offset = offsetof(struct thread_struct, uw);
@@ -270,22 +294,44 @@ void tls_preserve_current_state(void);
.fpsimd_cpu = NR_CPUS, \
}
-static inline void start_thread_common(struct pt_regs *regs, unsigned long pc)
+static inline void start_thread_common(struct pt_regs *regs, unsigned long pc,
+ unsigned long pstate)
{
- s32 previous_syscall = regs->syscallno;
- memset(regs, 0, sizeof(*regs));
- regs->syscallno = previous_syscall;
- regs->pc = pc;
+ /*
+ * Ensure all GPRs are zeroed, and initialize PC + PSTATE.
+ * The SP (or compat SP) will be initialized later.
+ */
+ regs->user_regs = (struct user_pt_regs) {
+ .pc = pc,
+ .pstate = pstate,
+ };
+ /*
+ * To allow the syscalls:sys_exit_execve tracepoint we need to preserve
+ * syscallno, but do not need orig_x0 or the original GPRs.
+ */
+ regs->orig_x0 = 0;
+
+ /*
+ * An exec from a kernel thread won't have an existing PMR value.
+ */
if (system_uses_irq_prio_masking())
- regs->pmr_save = GIC_PRIO_IRQON;
+ regs->pmr = GIC_PRIO_IRQON;
+
+ /*
+ * The pt_regs::stackframe field must remain valid throughout this
+ * function as a stacktrace can be taken at any time. Any user or
+ * kernel task should have a valid final frame.
+ */
+ WARN_ON_ONCE(regs->stackframe.record.fp != 0);
+ WARN_ON_ONCE(regs->stackframe.record.lr != 0);
+ WARN_ON_ONCE(regs->stackframe.type != FRAME_META_TYPE_FINAL);
}
static inline void start_thread(struct pt_regs *regs, unsigned long pc,
unsigned long sp)
{
- start_thread_common(regs, pc);
- regs->pstate = PSR_MODE_EL0t;
+ start_thread_common(regs, pc, PSR_MODE_EL0t);
spectre_v4_enable_task_mitigation(current);
regs->sp = sp;
}
@@ -294,27 +340,25 @@ static inline void start_thread(struct pt_regs *regs, unsigned long pc,
static inline void compat_start_thread(struct pt_regs *regs, unsigned long pc,
unsigned long sp)
{
- start_thread_common(regs, pc);
- regs->pstate = PSR_AA32_MODE_USR;
+ unsigned long pstate = PSR_AA32_MODE_USR;
if (pc & 1)
- regs->pstate |= PSR_AA32_T_BIT;
-
-#ifdef __AARCH64EB__
- regs->pstate |= PSR_AA32_E_BIT;
-#endif
+ pstate |= PSR_AA32_T_BIT;
+ if (IS_ENABLED(CONFIG_CPU_BIG_ENDIAN))
+ pstate |= PSR_AA32_E_BIT;
+ start_thread_common(regs, pc, pstate);
spectre_v4_enable_task_mitigation(current);
regs->compat_sp = sp;
}
#endif
-static inline bool is_ttbr0_addr(unsigned long addr)
+static __always_inline bool is_ttbr0_addr(unsigned long addr)
{
/* entry assembly clears tags for TTBR0 addrs */
return addr < TASK_SIZE;
}
-static inline bool is_ttbr1_addr(unsigned long addr)
+static __always_inline bool is_ttbr1_addr(unsigned long addr)
{
/* TTBR1 addresses may have a tag if KASAN_SW_TAGS is in use */
return arch_kasan_reset_tag(addr) >= PAGE_OFFSET;
@@ -323,9 +367,6 @@ static inline bool is_ttbr1_addr(unsigned long addr)
/* Forward declaration, a strange C thing */
struct task_struct;
-/* Free all resources held by a thread. */
-extern void release_thread(struct task_struct *);
-
unsigned long __get_wchan(struct task_struct *p);
void update_sctlr_el1(u64 sctlr);
@@ -355,14 +396,6 @@ static inline void prefetchw(const void *ptr)
asm volatile("prfm pstl1keep, %a0\n" : : "p" (ptr));
}
-#define ARCH_HAS_SPINLOCK_PREFETCH
-static inline void spin_lock_prefetch(const void *ptr)
-{
- asm volatile(ARM64_LSE_ATOMIC_INSN(
- "prfm pstl1strm, %a0",
- "nop") : : "p" (ptr));
-}
-
extern unsigned long __ro_after_init signal_minsigstksz; /* sigframe size */
extern void __init minsigstksz_setup(void);
@@ -399,18 +432,10 @@ long get_tagged_addr_ctrl(struct task_struct *task);
#define GET_TAGGED_ADDR_CTRL() get_tagged_addr_ctrl(current)
#endif
-/*
- * For CONFIG_GCC_PLUGIN_STACKLEAK
- *
- * These need to be macros because otherwise we get stuck in a nightmare
- * of header definitions for the use of task_stack_page.
- */
-
-/*
- * The top of the current task's task stack
- */
-#define current_top_of_stack() ((unsigned long)current->stack + THREAD_SIZE)
-#define on_thread_stack() (on_task_stack(current, current_stack_pointer, 1, NULL))
+int get_tsc_mode(unsigned long adr);
+int set_tsc_mode(unsigned int val);
+#define GET_TSC_CTL(adr) get_tsc_mode((adr))
+#define SET_TSC_CTL(val) set_tsc_mode((val))
#endif /* __ASSEMBLY__ */
#endif /* __ASM_PROCESSOR_H */
diff --git a/arch/arm64/include/asm/ptdump.h b/arch/arm64/include/asm/ptdump.h
index b1dd7ecff7ef..fded5358641f 100644
--- a/arch/arm64/include/asm/ptdump.h
+++ b/arch/arm64/include/asm/ptdump.h
@@ -5,7 +5,9 @@
#ifndef __ASM_PTDUMP_H
#define __ASM_PTDUMP_H
-#ifdef CONFIG_PTDUMP_CORE
+#include <linux/ptdump.h>
+
+#ifdef CONFIG_PTDUMP
#include <linux/mm_types.h>
#include <linux/seq_file.h>
@@ -21,20 +23,65 @@ struct ptdump_info {
unsigned long base_addr;
};
+struct ptdump_prot_bits {
+ ptdesc_t mask;
+ ptdesc_t val;
+ const char *set;
+ const char *clear;
+};
+
+struct ptdump_pg_level {
+ const struct ptdump_prot_bits *bits;
+ char name[4];
+ int num;
+ ptdesc_t mask;
+};
+
+/*
+ * The page dumper groups page table entries of the same type into a single
+ * description. It uses pg_state to track the range information while
+ * iterating over the pte entries. When the continuity is broken it then
+ * dumps out a description of the range.
+ */
+struct ptdump_pg_state {
+ struct ptdump_state ptdump;
+ struct ptdump_pg_level *pg_level;
+ struct seq_file *seq;
+ const struct addr_marker *marker;
+ const struct mm_struct *mm;
+ unsigned long start_address;
+ int level;
+ ptdesc_t current_prot;
+ bool check_wx;
+ unsigned long wx_pages;
+ unsigned long uxn_pages;
+};
+
void ptdump_walk(struct seq_file *s, struct ptdump_info *info);
+void note_page(struct ptdump_state *pt_st, unsigned long addr, int level,
+ pteval_t val);
+void note_page_pte(struct ptdump_state *st, unsigned long addr, pte_t pte);
+void note_page_pmd(struct ptdump_state *st, unsigned long addr, pmd_t pmd);
+void note_page_pud(struct ptdump_state *st, unsigned long addr, pud_t pud);
+void note_page_p4d(struct ptdump_state *st, unsigned long addr, p4d_t p4d);
+void note_page_pgd(struct ptdump_state *st, unsigned long addr, pgd_t pgd);
+void note_page_flush(struct ptdump_state *st);
#ifdef CONFIG_PTDUMP_DEBUGFS
+#define EFI_RUNTIME_MAP_END DEFAULT_MAP_WINDOW_64
void __init ptdump_debugfs_register(struct ptdump_info *info, const char *name);
#else
static inline void ptdump_debugfs_register(struct ptdump_info *info,
const char *name) { }
-#endif
-void ptdump_check_wx(void);
-#endif /* CONFIG_PTDUMP_CORE */
-
-#ifdef CONFIG_DEBUG_WX
-#define debug_checkwx() ptdump_check_wx()
+#endif /* CONFIG_PTDUMP_DEBUGFS */
#else
-#define debug_checkwx() do { } while (0)
-#endif
+static inline void note_page(struct ptdump_state *pt_st, unsigned long addr,
+ int level, pteval_t val) { }
+static inline void note_page_pte(struct ptdump_state *st, unsigned long addr, pte_t pte) { }
+static inline void note_page_pmd(struct ptdump_state *st, unsigned long addr, pmd_t pmd) { }
+static inline void note_page_pud(struct ptdump_state *st, unsigned long addr, pud_t pud) { }
+static inline void note_page_p4d(struct ptdump_state *st, unsigned long addr, p4d_t p4d) { }
+static inline void note_page_pgd(struct ptdump_state *st, unsigned long addr, pgd_t pgd) { }
+static inline void note_page_flush(struct ptdump_state *st) { }
+#endif /* CONFIG_PTDUMP */
#endif /* __ASM_PTDUMP_H */
diff --git a/arch/arm64/include/asm/ptrace.h b/arch/arm64/include/asm/ptrace.h
index 41b332c054ab..47ff8654c5ec 100644
--- a/arch/arm64/include/asm/ptrace.h
+++ b/arch/arm64/include/asm/ptrace.h
@@ -21,35 +21,12 @@
#define INIT_PSTATE_EL2 \
(PSR_D_BIT | PSR_A_BIT | PSR_I_BIT | PSR_F_BIT | PSR_MODE_EL2h)
-/*
- * PMR values used to mask/unmask interrupts.
- *
- * GIC priority masking works as follows: if an IRQ's priority is a higher value
- * than the value held in PMR, that IRQ is masked. Lowering the value of PMR
- * means masking more IRQs (or at least that the same IRQs remain masked).
- *
- * To mask interrupts, we clear the most significant bit of PMR.
- *
- * Some code sections either automatically switch back to PSR.I or explicitly
- * require to not use priority masking. If bit GIC_PRIO_PSR_I_SET is included
- * in the priority mask, it indicates that PSR.I should be set and
- * interrupt disabling temporarily does not rely on IRQ priorities.
- */
-#define GIC_PRIO_IRQON 0xe0
-#define __GIC_PRIO_IRQOFF (GIC_PRIO_IRQON & ~0x80)
-#define __GIC_PRIO_IRQOFF_NS 0xa0
-#define GIC_PRIO_PSR_I_SET (1 << 4)
-
-#define GIC_PRIO_IRQOFF \
- ({ \
- extern struct static_key_false gic_nonsecure_priorities;\
- u8 __prio = __GIC_PRIO_IRQOFF; \
- \
- if (static_branch_unlikely(&gic_nonsecure_priorities)) \
- __prio = __GIC_PRIO_IRQOFF_NS; \
- \
- __prio; \
- })
+#include <linux/irqchip/arm-gic-v3-prio.h>
+
+#define GIC_PRIO_IRQON GICV3_PRIO_UNMASKED
+#define GIC_PRIO_IRQOFF GICV3_PRIO_IRQ
+
+#define GIC_PRIO_PSR_I_SET GICV3_PRIO_PSR_I_SET
/* Additional SPSR bits not exposed in the UABI */
#define PSR_MODE_THREAD_BIT (1 << 0)
@@ -121,6 +98,8 @@
#include <linux/bug.h>
#include <linux/types.h>
+#include <asm/stacktrace/frame.h>
+
/* sizeof(struct user) for AArch32 */
#define COMPAT_USER_SZ 296
@@ -172,8 +151,7 @@ static inline unsigned long pstate_to_compat_psr(const unsigned long pstate)
/*
* This struct defines the way the registers are stored on the stack during an
- * exception. Note that sizeof(struct pt_regs) has to be a multiple of 16 (for
- * stack alignment). struct user_pt_regs must form a prefix of struct pt_regs.
+ * exception. struct user_pt_regs must form a prefix of struct pt_regs.
*/
struct pt_regs {
union {
@@ -186,23 +164,20 @@ struct pt_regs {
};
};
u64 orig_x0;
-#ifdef __AARCH64EB__
- u32 unused2;
- s32 syscallno;
-#else
s32 syscallno;
- u32 unused2;
-#endif
+ u32 pmr;
+
u64 sdei_ttbr1;
- /* Only valid when ARM64_HAS_IRQ_PRIO_MASKING is enabled. */
- u64 pmr_save;
- u64 stackframe[2];
+ struct frame_record_meta stackframe;
/* Only valid for some EL1 exceptions. */
u64 lockdep_hardirqs;
u64 exit_rcu;
};
+/* For correct stack alignment, pt_regs has to be a multiple of 16 bytes. */
+static_assert(IS_ALIGNED(sizeof(struct pt_regs), 16));
+
static inline bool in_syscall(struct pt_regs const *regs)
{
return regs->syscallno != NO_SYSCALL;
@@ -236,7 +211,7 @@ static inline void forget_syscall(struct pt_regs *regs)
#define irqs_priority_unmasked(regs) \
(system_uses_irq_prio_masking() ? \
- (regs)->pmr_save == GIC_PRIO_IRQON : \
+ (regs)->pmr == GIC_PRIO_IRQON : \
true)
#define interrupts_enabled(regs) \
diff --git a/arch/arm64/include/asm/rqspinlock.h b/arch/arm64/include/asm/rqspinlock.h
new file mode 100644
index 000000000000..9ea0a74e5892
--- /dev/null
+++ b/arch/arm64/include/asm/rqspinlock.h
@@ -0,0 +1,93 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _ASM_RQSPINLOCK_H
+#define _ASM_RQSPINLOCK_H
+
+#include <asm/barrier.h>
+
+/*
+ * Hardcode res_smp_cond_load_acquire implementations for arm64 to a custom
+ * version based on [0]. In rqspinlock code, our conditional expression involves
+ * checking the value _and_ additionally a timeout. However, on arm64, the
+ * WFE-based implementation may never spin again if no stores occur to the
+ * locked byte in the lock word. As such, we may be stuck forever if
+ * event-stream based unblocking is not available on the platform for WFE spin
+ * loops (arch_timer_evtstrm_available).
+ *
+ * Once support for smp_cond_load_acquire_timewait [0] lands, we can drop this
+ * copy-paste.
+ *
+ * While we rely on the implementation to amortize the cost of sampling
+ * cond_expr for us, it will not happen when event stream support is
+ * unavailable, time_expr check is amortized. This is not the common case, and
+ * it would be difficult to fit our logic in the time_expr_ns >= time_limit_ns
+ * comparison, hence just let it be. In case of event-stream, the loop is woken
+ * up at microsecond granularity.
+ *
+ * [0]: https://lore.kernel.org/lkml/20250203214911.898276-1-ankur.a.arora@oracle.com
+ */
+
+#ifndef smp_cond_load_acquire_timewait
+
+#define smp_cond_time_check_count 200
+
+#define __smp_cond_load_relaxed_spinwait(ptr, cond_expr, time_expr_ns, \
+ time_limit_ns) ({ \
+ typeof(ptr) __PTR = (ptr); \
+ __unqual_scalar_typeof(*ptr) VAL; \
+ unsigned int __count = 0; \
+ for (;;) { \
+ VAL = READ_ONCE(*__PTR); \
+ if (cond_expr) \
+ break; \
+ cpu_relax(); \
+ if (__count++ < smp_cond_time_check_count) \
+ continue; \
+ if ((time_expr_ns) >= (time_limit_ns)) \
+ break; \
+ __count = 0; \
+ } \
+ (typeof(*ptr))VAL; \
+})
+
+#define __smp_cond_load_acquire_timewait(ptr, cond_expr, \
+ time_expr_ns, time_limit_ns) \
+({ \
+ typeof(ptr) __PTR = (ptr); \
+ __unqual_scalar_typeof(*ptr) VAL; \
+ for (;;) { \
+ VAL = smp_load_acquire(__PTR); \
+ if (cond_expr) \
+ break; \
+ __cmpwait_relaxed(__PTR, VAL); \
+ if ((time_expr_ns) >= (time_limit_ns)) \
+ break; \
+ } \
+ (typeof(*ptr))VAL; \
+})
+
+#define smp_cond_load_acquire_timewait(ptr, cond_expr, \
+ time_expr_ns, time_limit_ns) \
+({ \
+ __unqual_scalar_typeof(*ptr) _val; \
+ int __wfe = arch_timer_evtstrm_available(); \
+ \
+ if (likely(__wfe)) { \
+ _val = __smp_cond_load_acquire_timewait(ptr, cond_expr, \
+ time_expr_ns, \
+ time_limit_ns); \
+ } else { \
+ _val = __smp_cond_load_relaxed_spinwait(ptr, cond_expr, \
+ time_expr_ns, \
+ time_limit_ns); \
+ smp_acquire__after_ctrl_dep(); \
+ } \
+ (typeof(*ptr))_val; \
+})
+
+#endif
+
+#define res_smp_cond_load_acquire(v, c) smp_cond_load_acquire_timewait(v, c, 0, 1)
+
+#include <asm-generic/rqspinlock.h>
+
+#endif /* _ASM_RQSPINLOCK_H */
diff --git a/arch/arm64/include/asm/rsi.h b/arch/arm64/include/asm/rsi.h
new file mode 100644
index 000000000000..b42aeac05340
--- /dev/null
+++ b/arch/arm64/include/asm/rsi.h
@@ -0,0 +1,70 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ * Copyright (C) 2024 ARM Ltd.
+ */
+
+#ifndef __ASM_RSI_H_
+#define __ASM_RSI_H_
+
+#include <linux/errno.h>
+#include <linux/jump_label.h>
+#include <asm/rsi_cmds.h>
+
+#define RSI_PDEV_NAME "arm-cca-dev"
+
+DECLARE_STATIC_KEY_FALSE(rsi_present);
+
+void __init arm64_rsi_init(void);
+
+bool __arm64_is_protected_mmio(phys_addr_t base, size_t size);
+
+static inline bool is_realm_world(void)
+{
+ return static_branch_unlikely(&rsi_present);
+}
+
+static inline int rsi_set_memory_range(phys_addr_t start, phys_addr_t end,
+ enum ripas state, unsigned long flags)
+{
+ unsigned long ret;
+ phys_addr_t top;
+
+ while (start != end) {
+ ret = rsi_set_addr_range_state(start, end, state, flags, &top);
+ if (ret || top < start || top > end)
+ return -EINVAL;
+ start = top;
+ }
+
+ return 0;
+}
+
+/*
+ * Convert the specified range to RAM. Do not use this if you rely on the
+ * contents of a page that may already be in RAM state.
+ */
+static inline int rsi_set_memory_range_protected(phys_addr_t start,
+ phys_addr_t end)
+{
+ return rsi_set_memory_range(start, end, RSI_RIPAS_RAM,
+ RSI_CHANGE_DESTROYED);
+}
+
+/*
+ * Convert the specified range to RAM. Do not convert any pages that may have
+ * been DESTROYED, without our permission.
+ */
+static inline int rsi_set_memory_range_protected_safe(phys_addr_t start,
+ phys_addr_t end)
+{
+ return rsi_set_memory_range(start, end, RSI_RIPAS_RAM,
+ RSI_NO_CHANGE_DESTROYED);
+}
+
+static inline int rsi_set_memory_range_shared(phys_addr_t start,
+ phys_addr_t end)
+{
+ return rsi_set_memory_range(start, end, RSI_RIPAS_EMPTY,
+ RSI_CHANGE_DESTROYED);
+}
+#endif /* __ASM_RSI_H_ */
diff --git a/arch/arm64/include/asm/rsi_cmds.h b/arch/arm64/include/asm/rsi_cmds.h
new file mode 100644
index 000000000000..2c8763876dfb
--- /dev/null
+++ b/arch/arm64/include/asm/rsi_cmds.h
@@ -0,0 +1,162 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ * Copyright (C) 2023 ARM Ltd.
+ */
+
+#ifndef __ASM_RSI_CMDS_H
+#define __ASM_RSI_CMDS_H
+
+#include <linux/arm-smccc.h>
+#include <linux/string.h>
+#include <asm/memory.h>
+
+#include <asm/rsi_smc.h>
+
+#define RSI_GRANULE_SHIFT 12
+#define RSI_GRANULE_SIZE (_AC(1, UL) << RSI_GRANULE_SHIFT)
+
+enum ripas {
+ RSI_RIPAS_EMPTY = 0,
+ RSI_RIPAS_RAM = 1,
+ RSI_RIPAS_DESTROYED = 2,
+ RSI_RIPAS_DEV = 3,
+};
+
+static inline unsigned long rsi_request_version(unsigned long req,
+ unsigned long *out_lower,
+ unsigned long *out_higher)
+{
+ struct arm_smccc_res res;
+
+ arm_smccc_smc(SMC_RSI_ABI_VERSION, req, 0, 0, 0, 0, 0, 0, &res);
+
+ if (out_lower)
+ *out_lower = res.a1;
+ if (out_higher)
+ *out_higher = res.a2;
+
+ return res.a0;
+}
+
+static inline unsigned long rsi_get_realm_config(struct realm_config *cfg)
+{
+ struct arm_smccc_res res;
+
+ arm_smccc_smc(SMC_RSI_REALM_CONFIG, virt_to_phys(cfg),
+ 0, 0, 0, 0, 0, 0, &res);
+ return res.a0;
+}
+
+static inline unsigned long rsi_ipa_state_get(phys_addr_t start,
+ phys_addr_t end,
+ enum ripas *state,
+ phys_addr_t *top)
+{
+ struct arm_smccc_res res;
+
+ arm_smccc_smc(SMC_RSI_IPA_STATE_GET,
+ start, end, 0, 0, 0, 0, 0,
+ &res);
+
+ if (res.a0 == RSI_SUCCESS) {
+ if (top)
+ *top = res.a1;
+ if (state)
+ *state = res.a2;
+ }
+
+ return res.a0;
+}
+
+static inline long rsi_set_addr_range_state(phys_addr_t start,
+ phys_addr_t end,
+ enum ripas state,
+ unsigned long flags,
+ phys_addr_t *top)
+{
+ struct arm_smccc_res res;
+
+ arm_smccc_smc(SMC_RSI_IPA_STATE_SET, start, end, state,
+ flags, 0, 0, 0, &res);
+
+ if (top)
+ *top = res.a1;
+
+ if (res.a2 != RSI_ACCEPT)
+ return -EPERM;
+
+ return res.a0;
+}
+
+/**
+ * rsi_attestation_token_init - Initialise the operation to retrieve an
+ * attestation token.
+ *
+ * @challenge: The challenge data to be used in the attestation token
+ * generation.
+ * @size: Size of the challenge data in bytes.
+ *
+ * Initialises the attestation token generation and returns an upper bound
+ * on the attestation token size that can be used to allocate an adequate
+ * buffer. The caller is expected to subsequently call
+ * rsi_attestation_token_continue() to retrieve the attestation token data on
+ * the same CPU.
+ *
+ * Returns:
+ * On success, returns the upper limit of the attestation report size.
+ * Otherwise, -EINVAL
+ */
+static inline long
+rsi_attestation_token_init(const u8 *challenge, unsigned long size)
+{
+ struct arm_smccc_1_2_regs regs = { 0 };
+
+ /* The challenge must be at least 32bytes and at most 64bytes */
+ if (!challenge || size < 32 || size > 64)
+ return -EINVAL;
+
+ regs.a0 = SMC_RSI_ATTESTATION_TOKEN_INIT;
+ memcpy(&regs.a1, challenge, size);
+ arm_smccc_1_2_smc(&regs, &regs);
+
+ if (regs.a0 == RSI_SUCCESS)
+ return regs.a1;
+
+ return -EINVAL;
+}
+
+/**
+ * rsi_attestation_token_continue - Continue the operation to retrieve an
+ * attestation token.
+ *
+ * @granule: {I}PA of the Granule to which the token will be written.
+ * @offset: Offset within Granule to start of buffer in bytes.
+ * @size: The size of the buffer.
+ * @len: The number of bytes written to the buffer.
+ *
+ * Retrieves up to a RSI_GRANULE_SIZE worth of token data per call. The caller
+ * is expected to call rsi_attestation_token_init() before calling this
+ * function to retrieve the attestation token.
+ *
+ * Return:
+ * * %RSI_SUCCESS - Attestation token retrieved successfully.
+ * * %RSI_INCOMPLETE - Token generation is not complete.
+ * * %RSI_ERROR_INPUT - A parameter was not valid.
+ * * %RSI_ERROR_STATE - Attestation not in progress.
+ */
+static inline unsigned long rsi_attestation_token_continue(phys_addr_t granule,
+ unsigned long offset,
+ unsigned long size,
+ unsigned long *len)
+{
+ struct arm_smccc_res res;
+
+ arm_smccc_1_1_invoke(SMC_RSI_ATTESTATION_TOKEN_CONTINUE,
+ granule, offset, size, 0, &res);
+
+ if (len)
+ *len = res.a1;
+ return res.a0;
+}
+
+#endif /* __ASM_RSI_CMDS_H */
diff --git a/arch/arm64/include/asm/rsi_smc.h b/arch/arm64/include/asm/rsi_smc.h
new file mode 100644
index 000000000000..6cb070eca9e9
--- /dev/null
+++ b/arch/arm64/include/asm/rsi_smc.h
@@ -0,0 +1,193 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ * Copyright (C) 2023 ARM Ltd.
+ */
+
+#ifndef __ASM_RSI_SMC_H_
+#define __ASM_RSI_SMC_H_
+
+#include <linux/arm-smccc.h>
+
+/*
+ * This file describes the Realm Services Interface (RSI) Application Binary
+ * Interface (ABI) for SMC calls made from within the Realm to the RMM and
+ * serviced by the RMM.
+ */
+
+/*
+ * The major version number of the RSI implementation. This is increased when
+ * the binary format or semantics of the SMC calls change.
+ */
+#define RSI_ABI_VERSION_MAJOR UL(1)
+
+/*
+ * The minor version number of the RSI implementation. This is increased when
+ * a bug is fixed, or a feature is added without breaking binary compatibility.
+ */
+#define RSI_ABI_VERSION_MINOR UL(0)
+
+#define RSI_ABI_VERSION ((RSI_ABI_VERSION_MAJOR << 16) | \
+ RSI_ABI_VERSION_MINOR)
+
+#define RSI_ABI_VERSION_GET_MAJOR(_version) ((_version) >> 16)
+#define RSI_ABI_VERSION_GET_MINOR(_version) ((_version) & 0xFFFF)
+
+#define RSI_SUCCESS UL(0)
+#define RSI_ERROR_INPUT UL(1)
+#define RSI_ERROR_STATE UL(2)
+#define RSI_INCOMPLETE UL(3)
+#define RSI_ERROR_UNKNOWN UL(4)
+
+#define SMC_RSI_FID(n) ARM_SMCCC_CALL_VAL(ARM_SMCCC_FAST_CALL, \
+ ARM_SMCCC_SMC_64, \
+ ARM_SMCCC_OWNER_STANDARD, \
+ n)
+
+/*
+ * Returns RSI version.
+ *
+ * arg1 == Requested interface revision
+ * ret0 == Status / error
+ * ret1 == Lower implemented interface revision
+ * ret2 == Higher implemented interface revision
+ */
+#define SMC_RSI_ABI_VERSION SMC_RSI_FID(0x190)
+
+/*
+ * Read feature register.
+ *
+ * arg1 == Feature register index
+ * ret0 == Status / error
+ * ret1 == Feature register value
+ */
+#define SMC_RSI_FEATURES SMC_RSI_FID(0x191)
+
+/*
+ * Read measurement for the current Realm.
+ *
+ * arg1 == Index, which measurements slot to read
+ * ret0 == Status / error
+ * ret1 == Measurement value, bytes: 0 - 7
+ * ret2 == Measurement value, bytes: 8 - 15
+ * ret3 == Measurement value, bytes: 16 - 23
+ * ret4 == Measurement value, bytes: 24 - 31
+ * ret5 == Measurement value, bytes: 32 - 39
+ * ret6 == Measurement value, bytes: 40 - 47
+ * ret7 == Measurement value, bytes: 48 - 55
+ * ret8 == Measurement value, bytes: 56 - 63
+ */
+#define SMC_RSI_MEASUREMENT_READ SMC_RSI_FID(0x192)
+
+/*
+ * Extend Realm Extensible Measurement (REM) value.
+ *
+ * arg1 == Index, which measurements slot to extend
+ * arg2 == Size of realm measurement in bytes, max 64 bytes
+ * arg3 == Measurement value, bytes: 0 - 7
+ * arg4 == Measurement value, bytes: 8 - 15
+ * arg5 == Measurement value, bytes: 16 - 23
+ * arg6 == Measurement value, bytes: 24 - 31
+ * arg7 == Measurement value, bytes: 32 - 39
+ * arg8 == Measurement value, bytes: 40 - 47
+ * arg9 == Measurement value, bytes: 48 - 55
+ * arg10 == Measurement value, bytes: 56 - 63
+ * ret0 == Status / error
+ */
+#define SMC_RSI_MEASUREMENT_EXTEND SMC_RSI_FID(0x193)
+
+/*
+ * Initialize the operation to retrieve an attestation token.
+ *
+ * arg1 == Challenge value, bytes: 0 - 7
+ * arg2 == Challenge value, bytes: 8 - 15
+ * arg3 == Challenge value, bytes: 16 - 23
+ * arg4 == Challenge value, bytes: 24 - 31
+ * arg5 == Challenge value, bytes: 32 - 39
+ * arg6 == Challenge value, bytes: 40 - 47
+ * arg7 == Challenge value, bytes: 48 - 55
+ * arg8 == Challenge value, bytes: 56 - 63
+ * ret0 == Status / error
+ * ret1 == Upper bound of token size in bytes
+ */
+#define SMC_RSI_ATTESTATION_TOKEN_INIT SMC_RSI_FID(0x194)
+
+/*
+ * Continue the operation to retrieve an attestation token.
+ *
+ * arg1 == The IPA of token buffer
+ * arg2 == Offset within the granule of the token buffer
+ * arg3 == Size of the granule buffer
+ * ret0 == Status / error
+ * ret1 == Length of token bytes copied to the granule buffer
+ */
+#define SMC_RSI_ATTESTATION_TOKEN_CONTINUE SMC_RSI_FID(0x195)
+
+#ifndef __ASSEMBLY__
+
+struct realm_config {
+ union {
+ struct {
+ unsigned long ipa_bits; /* Width of IPA in bits */
+ unsigned long hash_algo; /* Hash algorithm */
+ };
+ u8 pad[0x200];
+ };
+ union {
+ u8 rpv[64]; /* Realm Personalization Value */
+ u8 pad2[0xe00];
+ };
+ /*
+ * The RMM requires the configuration structure to be aligned to a 4k
+ * boundary, ensure this happens by aligning this structure.
+ */
+} __aligned(0x1000);
+
+#endif /* __ASSEMBLY__ */
+
+/*
+ * Read configuration for the current Realm.
+ *
+ * arg1 == struct realm_config addr
+ * ret0 == Status / error
+ */
+#define SMC_RSI_REALM_CONFIG SMC_RSI_FID(0x196)
+
+/*
+ * Request RIPAS of a target IPA range to be changed to a specified value.
+ *
+ * arg1 == Base IPA address of target region
+ * arg2 == Top of the region
+ * arg3 == RIPAS value
+ * arg4 == flags
+ * ret0 == Status / error
+ * ret1 == Top of modified IPA range
+ * ret2 == Whether the Host accepted or rejected the request
+ */
+#define SMC_RSI_IPA_STATE_SET SMC_RSI_FID(0x197)
+
+#define RSI_NO_CHANGE_DESTROYED UL(0)
+#define RSI_CHANGE_DESTROYED UL(1)
+
+#define RSI_ACCEPT UL(0)
+#define RSI_REJECT UL(1)
+
+/*
+ * Get RIPAS of a target IPA range.
+ *
+ * arg1 == Base IPA of target region
+ * arg2 == End of target IPA region
+ * ret0 == Status / error
+ * ret1 == Top of IPA region which has the reported RIPAS value
+ * ret2 == RIPAS value
+ */
+#define SMC_RSI_IPA_STATE_GET SMC_RSI_FID(0x198)
+
+/*
+ * Make a Host call.
+ *
+ * arg1 == IPA of host call structure
+ * ret0 == Status / error
+ */
+#define SMC_RSI_HOST_CALL SMC_RSI_FID(0x199)
+
+#endif /* __ASM_RSI_SMC_H_ */
diff --git a/arch/arm64/include/asm/runtime-const.h b/arch/arm64/include/asm/runtime-const.h
new file mode 100644
index 000000000000..be5915669d23
--- /dev/null
+++ b/arch/arm64/include/asm/runtime-const.h
@@ -0,0 +1,88 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _ASM_RUNTIME_CONST_H
+#define _ASM_RUNTIME_CONST_H
+
+#include <asm/cacheflush.h>
+
+/* Sigh. You can still run arm64 in BE mode */
+#include <asm/byteorder.h>
+
+#define runtime_const_ptr(sym) ({ \
+ typeof(sym) __ret; \
+ asm_inline("1:\t" \
+ "movz %0, #0xcdef\n\t" \
+ "movk %0, #0x89ab, lsl #16\n\t" \
+ "movk %0, #0x4567, lsl #32\n\t" \
+ "movk %0, #0x0123, lsl #48\n\t" \
+ ".pushsection runtime_ptr_" #sym ",\"a\"\n\t" \
+ ".long 1b - .\n\t" \
+ ".popsection" \
+ :"=r" (__ret)); \
+ __ret; })
+
+#define runtime_const_shift_right_32(val, sym) ({ \
+ unsigned long __ret; \
+ asm_inline("1:\t" \
+ "lsr %w0,%w1,#12\n\t" \
+ ".pushsection runtime_shift_" #sym ",\"a\"\n\t" \
+ ".long 1b - .\n\t" \
+ ".popsection" \
+ :"=r" (__ret) \
+ :"r" (0u+(val))); \
+ __ret; })
+
+#define runtime_const_init(type, sym) do { \
+ extern s32 __start_runtime_##type##_##sym[]; \
+ extern s32 __stop_runtime_##type##_##sym[]; \
+ runtime_const_fixup(__runtime_fixup_##type, \
+ (unsigned long)(sym), \
+ __start_runtime_##type##_##sym, \
+ __stop_runtime_##type##_##sym); \
+} while (0)
+
+/* 16-bit immediate for wide move (movz and movk) in bits 5..20 */
+static inline void __runtime_fixup_16(__le32 *p, unsigned int val)
+{
+ u32 insn = le32_to_cpu(*p);
+ insn &= 0xffe0001f;
+ insn |= (val & 0xffff) << 5;
+ *p = cpu_to_le32(insn);
+}
+
+static inline void __runtime_fixup_caches(void *where, unsigned int insns)
+{
+ unsigned long va = (unsigned long)where;
+ caches_clean_inval_pou(va, va + 4*insns);
+}
+
+static inline void __runtime_fixup_ptr(void *where, unsigned long val)
+{
+ __le32 *p = lm_alias(where);
+ __runtime_fixup_16(p, val);
+ __runtime_fixup_16(p+1, val >> 16);
+ __runtime_fixup_16(p+2, val >> 32);
+ __runtime_fixup_16(p+3, val >> 48);
+ __runtime_fixup_caches(where, 4);
+}
+
+/* Immediate value is 6 bits starting at bit #16 */
+static inline void __runtime_fixup_shift(void *where, unsigned long val)
+{
+ __le32 *p = lm_alias(where);
+ u32 insn = le32_to_cpu(*p);
+ insn &= 0xffc0ffff;
+ insn |= (val & 63) << 16;
+ *p = cpu_to_le32(insn);
+ __runtime_fixup_caches(where, 1);
+}
+
+static inline void runtime_const_fixup(void (*fn)(void *, unsigned long),
+ unsigned long val, s32 *start, s32 *end)
+{
+ while (start < end) {
+ fn(*start + (void *)start, val);
+ start++;
+ }
+}
+
+#endif
diff --git a/arch/arm64/include/asm/rwonce.h b/arch/arm64/include/asm/rwonce.h
index 56f7b1d4d54b..97d9256d33c9 100644
--- a/arch/arm64/include/asm/rwonce.h
+++ b/arch/arm64/include/asm/rwonce.h
@@ -12,16 +12,12 @@
#ifndef BUILD_VDSO
-#ifdef CONFIG_AS_HAS_LDAPR
#define __LOAD_RCPC(sfx, regs...) \
ALTERNATIVE( \
"ldar" #sfx "\t" #regs, \
".arch_extension rcpc\n" \
"ldapr" #sfx "\t" #regs, \
ARM64_HAS_LDAPR)
-#else
-#define __LOAD_RCPC(sfx, regs...) "ldar" #sfx "\t" #regs
-#endif /* CONFIG_AS_HAS_LDAPR */
/*
* When building with LTO, there is an increased risk of the compiler
diff --git a/arch/arm64/include/asm/scs.h b/arch/arm64/include/asm/scs.h
index 8297bccf0784..a76f9b387a26 100644
--- a/arch/arm64/include/asm/scs.h
+++ b/arch/arm64/include/asm/scs.h
@@ -5,25 +5,56 @@
#ifdef __ASSEMBLY__
#include <asm/asm-offsets.h>
+#include <asm/sysreg.h>
#ifdef CONFIG_SHADOW_CALL_STACK
scs_sp .req x18
- .macro scs_load tsk
- ldr scs_sp, [\tsk, #TSK_TI_SCS_SP]
+ .macro scs_load_current
+ get_current_task scs_sp
+ ldr scs_sp, [scs_sp, #TSK_TI_SCS_SP]
.endm
.macro scs_save tsk
str scs_sp, [\tsk, #TSK_TI_SCS_SP]
.endm
#else
- .macro scs_load tsk
+ .macro scs_load_current
.endm
.macro scs_save tsk
.endm
#endif /* CONFIG_SHADOW_CALL_STACK */
+
+#else
+
+#include <linux/scs.h>
+#include <asm/cpufeature.h>
+
+#ifdef CONFIG_UNWIND_PATCH_PAC_INTO_SCS
+static inline void dynamic_scs_init(void)
+{
+ extern bool __pi_dynamic_scs_is_enabled;
+
+ if (__pi_dynamic_scs_is_enabled) {
+ pr_info("Enabling dynamic shadow call stack\n");
+ static_branch_enable(&dynamic_scs_enabled);
+ }
+}
+#else
+static inline void dynamic_scs_init(void) {}
+#endif
+
+enum {
+ EDYNSCS_INVALID_CIE_HEADER = 1,
+ EDYNSCS_INVALID_CIE_SDATA_SIZE = 2,
+ EDYNSCS_INVALID_FDE_AUGM_DATA_SIZE = 3,
+ EDYNSCS_INVALID_CFA_OPCODE = 4,
+};
+
+int __pi_scs_patch(const u8 eh_frame[], int size);
+
#endif /* __ASSEMBLY __ */
#endif /* _ASM_SCS_H */
diff --git a/arch/arm64/include/asm/sdei.h b/arch/arm64/include/asm/sdei.h
index 7bea1d705dd6..484cb6972e99 100644
--- a/arch/arm64/include/asm/sdei.h
+++ b/arch/arm64/include/asm/sdei.h
@@ -17,6 +17,9 @@
#include <asm/virt.h>
+DECLARE_PER_CPU(struct sdei_registered_event *, sdei_active_normal_event);
+DECLARE_PER_CPU(struct sdei_registered_event *, sdei_active_critical_event);
+
extern unsigned long sdei_exit_mode;
/* Software Delegated Exception entry point from firmware*/
@@ -29,6 +32,9 @@ asmlinkage void __sdei_asm_entry_trampoline(unsigned long event_num,
unsigned long pc,
unsigned long pstate);
+/* Abort a running handler. Context is discarded. */
+void __sdei_handler_abort(void);
+
/*
* The above entry point does the minimum to call C code. This function does
* anything else, before calling the driver.
@@ -43,22 +49,5 @@ unsigned long do_sdei_event(struct pt_regs *regs,
unsigned long sdei_arch_get_entry_point(int conduit);
#define sdei_arch_get_entry_point(x) sdei_arch_get_entry_point(x)
-struct stack_info;
-
-bool _on_sdei_stack(unsigned long sp, unsigned long size,
- struct stack_info *info);
-static inline bool on_sdei_stack(unsigned long sp, unsigned long size,
- struct stack_info *info)
-{
- if (!IS_ENABLED(CONFIG_VMAP_STACK))
- return false;
- if (!IS_ENABLED(CONFIG_ARM_SDE_INTERFACE))
- return false;
- if (in_nmi())
- return _on_sdei_stack(sp, size, info);
-
- return false;
-}
-
#endif /* __ASSEMBLY__ */
#endif /* __ASM_SDEI_H */
diff --git a/arch/arm64/include/asm/seccomp.h b/arch/arm64/include/asm/seccomp.h
index 30256233788b..bf6bf40bc5ab 100644
--- a/arch/arm64/include/asm/seccomp.h
+++ b/arch/arm64/include/asm/seccomp.h
@@ -8,13 +8,13 @@
#ifndef _ASM_SECCOMP_H
#define _ASM_SECCOMP_H
-#include <asm/unistd.h>
+#include <asm/unistd_compat_32.h>
#ifdef CONFIG_COMPAT
-#define __NR_seccomp_read_32 __NR_compat_read
-#define __NR_seccomp_write_32 __NR_compat_write
-#define __NR_seccomp_exit_32 __NR_compat_exit
-#define __NR_seccomp_sigreturn_32 __NR_compat_rt_sigreturn
+#define __NR_seccomp_read_32 __NR_compat32_read
+#define __NR_seccomp_write_32 __NR_compat32_write
+#define __NR_seccomp_exit_32 __NR_compat32_exit
+#define __NR_seccomp_sigreturn_32 __NR_compat32_rt_sigreturn
#endif /* CONFIG_COMPAT */
#include <asm-generic/seccomp.h>
@@ -24,7 +24,7 @@
#define SECCOMP_ARCH_NATIVE_NAME "aarch64"
#ifdef CONFIG_COMPAT
# define SECCOMP_ARCH_COMPAT AUDIT_ARCH_ARM
-# define SECCOMP_ARCH_COMPAT_NR __NR_compat_syscalls
+# define SECCOMP_ARCH_COMPAT_NR __NR_compat32_syscalls
# define SECCOMP_ARCH_COMPAT_NAME "arm"
#endif
diff --git a/arch/arm64/include/asm/sections.h b/arch/arm64/include/asm/sections.h
index 40971ac1303f..51b0d594239e 100644
--- a/arch/arm64/include/asm/sections.h
+++ b/arch/arm64/include/asm/sections.h
@@ -11,6 +11,7 @@ extern char __alt_instructions[], __alt_instructions_end[];
extern char __hibernate_exit_text_start[], __hibernate_exit_text_end[];
extern char __hyp_idmap_text_start[], __hyp_idmap_text_end[];
extern char __hyp_text_start[], __hyp_text_end[];
+extern char __hyp_data_start[], __hyp_data_end[];
extern char __hyp_rodata_start[], __hyp_rodata_end[];
extern char __hyp_reloc_begin[], __hyp_reloc_end[];
extern char __hyp_bss_start[], __hyp_bss_end[];
diff --git a/arch/arm64/include/asm/semihost.h b/arch/arm64/include/asm/semihost.h
new file mode 100644
index 000000000000..87e353dab868
--- /dev/null
+++ b/arch/arm64/include/asm/semihost.h
@@ -0,0 +1,24 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Copyright (C) 2012 ARM Ltd.
+ * Author: Marc Zyngier <marc.zyngier@arm.com>
+ *
+ * Adapted for ARM and earlycon:
+ * Copyright (C) 2014 Linaro Ltd.
+ * Author: Rob Herring <robh@kernel.org>
+ */
+
+#ifndef _ARM64_SEMIHOST_H_
+#define _ARM64_SEMIHOST_H_
+
+struct uart_port;
+
+static inline void smh_putc(struct uart_port *port, unsigned char c)
+{
+ asm volatile("mov x1, %0\n"
+ "mov x0, #3\n"
+ "hlt 0xf000\n"
+ : : "r" (&c) : "x0", "x1", "memory");
+}
+
+#endif /* _ARM64_SEMIHOST_H_ */
diff --git a/arch/arm64/include/asm/set_memory.h b/arch/arm64/include/asm/set_memory.h
index 0f740b781187..90f61b17275e 100644
--- a/arch/arm64/include/asm/set_memory.h
+++ b/arch/arm64/include/asm/set_memory.h
@@ -3,6 +3,7 @@
#ifndef _ASM_ARM64_SET_MEMORY_H
#define _ASM_ARM64_SET_MEMORY_H
+#include <asm/mem_encrypt.h>
#include <asm-generic/set_memory.h>
bool can_set_direct_map(void);
@@ -12,6 +13,10 @@ int set_memory_valid(unsigned long addr, int numpages, int enable);
int set_direct_map_invalid_noflush(struct page *page);
int set_direct_map_default_noflush(struct page *page);
+int set_direct_map_valid_noflush(struct page *page, unsigned nr, bool valid);
bool kernel_page_present(struct page *page);
+int set_memory_encrypted(unsigned long addr, int numpages);
+int set_memory_decrypted(unsigned long addr, int numpages);
+
#endif /* _ASM_ARM64_SET_MEMORY_H */
diff --git a/arch/arm64/include/asm/setup.h b/arch/arm64/include/asm/setup.h
index 6437df661700..ba269a7a3201 100644
--- a/arch/arm64/include/asm/setup.h
+++ b/arch/arm64/include/asm/setup.h
@@ -3,10 +3,9 @@
#ifndef __ARM64_ASM_SETUP_H
#define __ARM64_ASM_SETUP_H
-#include <uapi/asm/setup.h>
+#include <linux/string.h>
-void *get_early_fdt_ptr(void);
-void early_fdt_map(u64 dt_phys);
+#include <uapi/asm/setup.h>
/*
* These two variables are used in the head.S file.
@@ -14,4 +13,32 @@ void early_fdt_map(u64 dt_phys);
extern phys_addr_t __fdt_pointer __initdata;
extern u64 __cacheline_aligned boot_args[4];
+static inline bool arch_parse_debug_rodata(char *arg)
+{
+ extern bool rodata_enabled;
+ extern bool rodata_full;
+
+ if (!arg)
+ return false;
+
+ if (!strcmp(arg, "full")) {
+ rodata_enabled = rodata_full = true;
+ return true;
+ }
+
+ if (!strcmp(arg, "off")) {
+ rodata_enabled = rodata_full = false;
+ return true;
+ }
+
+ if (!strcmp(arg, "on")) {
+ rodata_enabled = true;
+ rodata_full = false;
+ return true;
+ }
+
+ return false;
+}
+#define arch_parse_debug_rodata arch_parse_debug_rodata
+
#endif
diff --git a/arch/arm64/include/asm/simd.h b/arch/arm64/include/asm/simd.h
index 6a75d7ecdcaa..8e86c9e70e48 100644
--- a/arch/arm64/include/asm/simd.h
+++ b/arch/arm64/include/asm/simd.h
@@ -12,8 +12,6 @@
#include <linux/preempt.h>
#include <linux/types.h>
-DECLARE_PER_CPU(bool, fpsimd_context_busy);
-
#ifdef CONFIG_KERNEL_MODE_NEON
/*
@@ -28,17 +26,10 @@ static __must_check inline bool may_use_simd(void)
/*
* We must make sure that the SVE has been initialized properly
* before using the SIMD in kernel.
- * fpsimd_context_busy is only set while preemption is disabled,
- * and is clear whenever preemption is enabled. Since
- * this_cpu_read() is atomic w.r.t. preemption, fpsimd_context_busy
- * cannot change under our feet -- if it's set we cannot be
- * migrated, and if it's clear we cannot be migrated to a CPU
- * where it is set.
*/
return !WARN_ON(!system_capabilities_finalized()) &&
system_supports_fpsimd() &&
- !in_hardirq() && !irqs_disabled() && !in_nmi() &&
- !this_cpu_read(fpsimd_context_busy);
+ !in_hardirq() && !irqs_disabled() && !in_nmi();
}
#else /* ! CONFIG_KERNEL_MODE_NEON */
diff --git a/arch/arm64/include/asm/smp.h b/arch/arm64/include/asm/smp.h
index fc55f5a57a06..d48ef6d5abcc 100644
--- a/arch/arm64/include/asm/smp.h
+++ b/arch/arm64/include/asm/smp.h
@@ -25,22 +25,11 @@
#ifndef __ASSEMBLY__
-#include <asm/percpu.h>
-
#include <linux/threads.h>
#include <linux/cpumask.h>
#include <linux/thread_info.h>
-DECLARE_PER_CPU_READ_MOSTLY(int, cpu_number);
-
-/*
- * We don't use this_cpu_read(cpu_number) as that has implicit writes to
- * preempt_count, and associated (compiler) barriers, that we'd like to avoid
- * the expense of. If we're preemptible, the value can be stale at use anyway.
- * And we can't use this_cpu_ptr() either, as that winds up recursing back
- * here under CONFIG_DEBUG_PREEMPT=y.
- */
-#define raw_smp_processor_id() (*raw_cpu_ptr(&cpu_number))
+#define raw_smp_processor_id() (current_thread_info()->cpu)
/*
* Logical CPU mapping.
@@ -61,10 +50,32 @@ struct seq_file;
*/
extern void smp_init_cpus(void);
+enum ipi_msg_type {
+ IPI_RESCHEDULE,
+ IPI_CALL_FUNC,
+ IPI_CPU_STOP,
+ IPI_CPU_STOP_NMI,
+ IPI_TIMER,
+ IPI_IRQ_WORK,
+ NR_IPI,
+ /*
+ * Any enum >= NR_IPI and < MAX_IPI is special and not tracable
+ * with trace_ipi_*
+ */
+ IPI_CPU_BACKTRACE = NR_IPI,
+ IPI_KGDB_ROUNDUP,
+ MAX_IPI
+};
+
/*
* Register IPI interrupts with the arch SMP code
*/
-extern void set_smp_ipi_range(int ipi_base, int nr_ipi);
+extern void set_smp_ipi_range_percpu(int ipi_base, int nr_ipi, int ncpus);
+
+static inline void set_smp_ipi_range(int ipi_base, int n)
+{
+ set_smp_ipi_range_percpu(ipi_base, n, 0);
+}
/*
* Called from the secondary holding pen, this is the secondary CPU entry point.
@@ -89,9 +100,9 @@ extern void arch_send_call_function_single_ipi(int cpu);
extern void arch_send_call_function_ipi_mask(const struct cpumask *mask);
#ifdef CONFIG_ARM64_ACPI_PARKING_PROTOCOL
-extern void arch_send_wakeup_ipi_mask(const struct cpumask *mask);
+extern void arch_send_wakeup_ipi(unsigned int cpu);
#else
-static inline void arch_send_wakeup_ipi_mask(const struct cpumask *mask)
+static inline void arch_send_wakeup_ipi(unsigned int cpu)
{
BUILD_BUG();
}
@@ -99,11 +110,11 @@ static inline void arch_send_wakeup_ipi_mask(const struct cpumask *mask)
extern int __cpu_disable(void);
-extern void __cpu_die(unsigned int cpu);
-extern void cpu_die(void);
-extern void cpu_die_early(void);
+static inline void __cpu_die(unsigned int cpu) { }
+extern void __noreturn cpu_die(void);
+extern void __noreturn cpu_die_early(void);
-static inline void cpu_park_loop(void)
+static inline void __noreturn cpu_park_loop(void)
{
for (;;) {
wfe();
@@ -123,7 +134,7 @@ static inline void update_cpu_boot_status(int val)
* which calls for a kernel panic. Update the boot status and park the calling
* CPU.
*/
-static inline void cpu_panic_kernel(void)
+static inline void __noreturn cpu_panic_kernel(void)
{
update_cpu_boot_status(CPU_PANIC_KERNEL);
cpu_park_loop();
@@ -143,7 +154,6 @@ bool cpus_are_stuck_in_kernel(void);
extern void crash_smp_send_stop(void);
extern bool smp_crash_stop_failed(void);
-extern void panic_smp_self_stop(void);
#endif /* ifndef __ASSEMBLY__ */
diff --git a/arch/arm64/include/asm/sparsemem.h b/arch/arm64/include/asm/sparsemem.h
index 4b73463423c3..84783efdc9d1 100644
--- a/arch/arm64/include/asm/sparsemem.h
+++ b/arch/arm64/include/asm/sparsemem.h
@@ -5,12 +5,15 @@
#ifndef __ASM_SPARSEMEM_H
#define __ASM_SPARSEMEM_H
-#define MAX_PHYSMEM_BITS CONFIG_ARM64_PA_BITS
+#include <asm/pgtable-prot.h>
+
+#define MAX_PHYSMEM_BITS PHYS_MASK_SHIFT
+#define MAX_POSSIBLE_PHYSMEM_BITS (52)
/*
* Section size must be at least 512MB for 64K base
* page size config. Otherwise it will be less than
- * (MAX_ORDER - 1) and the build process will fail.
+ * MAX_PAGE_ORDER and the build process will fail.
*/
#ifdef CONFIG_ARM64_64K_PAGES
#define SECTION_SIZE_BITS 29
diff --git a/arch/arm64/include/asm/spectre.h b/arch/arm64/include/asm/spectre.h
index aa3d3607d5c8..8fef12626090 100644
--- a/arch/arm64/include/asm/spectre.h
+++ b/arch/arm64/include/asm/spectre.h
@@ -13,8 +13,8 @@
#define __BP_HARDEN_HYP_VECS_SZ ((BP_HARDEN_EL2_SLOTS - 1) * SZ_2K)
#ifndef __ASSEMBLY__
-
-#include <linux/percpu.h>
+#include <linux/smp.h>
+#include <asm/percpu.h>
#include <asm/cpufeature.h>
#include <asm/virt.h>
@@ -26,6 +26,7 @@ enum mitigation_state {
SPECTRE_VULNERABLE,
};
+struct pt_regs;
struct task_struct;
/*
@@ -72,7 +73,7 @@ static __always_inline void arm64_apply_bp_hardening(void)
{
struct bp_hardening_data *d;
- if (!cpus_have_const_cap(ARM64_SPECTRE_V2))
+ if (!alternative_has_cap_unlikely(ARM64_SPECTRE_V2))
return;
d = this_cpu_ptr(&bp_hardening_data);
@@ -96,7 +97,26 @@ enum mitigation_state arm64_get_meltdown_state(void);
enum mitigation_state arm64_get_spectre_bhb_state(void);
bool is_spectre_bhb_affected(const struct arm64_cpu_capabilities *entry, int scope);
-u8 spectre_bhb_loop_affected(int scope);
+extern bool __nospectre_bhb;
+u8 get_spectre_bhb_loop_value(void);
+bool is_spectre_bhb_fw_mitigated(void);
void spectre_bhb_enable_mitigation(const struct arm64_cpu_capabilities *__unused);
+bool try_emulate_el1_ssbs(struct pt_regs *regs, u32 instr);
+
+void spectre_v4_patch_fw_mitigation_enable(struct alt_instr *alt, __le32 *origptr,
+ __le32 *updptr, int nr_inst);
+void smccc_patch_fw_mitigation_conduit(struct alt_instr *alt, __le32 *origptr,
+ __le32 *updptr, int nr_inst);
+void spectre_bhb_patch_loop_mitigation_enable(struct alt_instr *alt, __le32 *origptr,
+ __le32 *updptr, int nr_inst);
+void spectre_bhb_patch_fw_mitigation_enabled(struct alt_instr *alt, __le32 *origptr,
+ __le32 *updptr, int nr_inst);
+void spectre_bhb_patch_loop_iter(struct alt_instr *alt,
+ __le32 *origptr, __le32 *updptr, int nr_inst);
+void spectre_bhb_patch_wa3(struct alt_instr *alt,
+ __le32 *origptr, __le32 *updptr, int nr_inst);
+void spectre_bhb_patch_clearbhb(struct alt_instr *alt,
+ __le32 *origptr, __le32 *updptr, int nr_inst);
+
#endif /* __ASSEMBLY__ */
#endif /* __ASM_SPECTRE_H */
diff --git a/arch/arm64/include/asm/spinlock_types.h b/arch/arm64/include/asm/spinlock_types.h
index 11ab1c077697..7cde3d8bd0ad 100644
--- a/arch/arm64/include/asm/spinlock_types.h
+++ b/arch/arm64/include/asm/spinlock_types.h
@@ -6,7 +6,7 @@
#define __ASM_SPINLOCK_TYPES_H
#if !defined(__LINUX_SPINLOCK_TYPES_RAW_H) && !defined(__ASM_SPINLOCK_H)
-# error "please don't include this file directly"
+# error "Please do not include this file directly."
#endif
#include <asm-generic/qspinlock_types.h>
diff --git a/arch/arm64/include/asm/stackprotector.h b/arch/arm64/include/asm/stackprotector.h
index 33f1bb453150..ae3ad80f51fe 100644
--- a/arch/arm64/include/asm/stackprotector.h
+++ b/arch/arm64/include/asm/stackprotector.h
@@ -13,8 +13,6 @@
#ifndef __ASM_STACKPROTECTOR_H
#define __ASM_STACKPROTECTOR_H
-#include <linux/random.h>
-#include <linux/version.h>
#include <asm/pointer_auth.h>
extern unsigned long __stack_chk_guard;
@@ -28,12 +26,7 @@ extern unsigned long __stack_chk_guard;
static __always_inline void boot_init_stack_canary(void)
{
#if defined(CONFIG_STACKPROTECTOR)
- unsigned long canary;
-
- /* Try to get a semi random initial value. */
- get_random_bytes(&canary, sizeof(canary));
- canary ^= LINUX_VERSION_CODE;
- canary &= CANARY_MASK;
+ unsigned long canary = get_random_canary();
current->stack_canary = canary;
if (!IS_ENABLED(CONFIG_STACKPROTECTOR_PER_TASK))
diff --git a/arch/arm64/include/asm/stacktrace.h b/arch/arm64/include/asm/stacktrace.h
index aec9315bf156..6d3280932bf5 100644
--- a/arch/arm64/include/asm/stacktrace.h
+++ b/arch/arm64/include/asm/stacktrace.h
@@ -8,111 +8,113 @@
#include <linux/percpu.h>
#include <linux/sched.h>
#include <linux/sched/task_stack.h>
-#include <linux/types.h>
#include <linux/llist.h>
#include <asm/memory.h>
+#include <asm/pointer_auth.h>
#include <asm/ptrace.h>
#include <asm/sdei.h>
-enum stack_type {
- STACK_TYPE_UNKNOWN,
- STACK_TYPE_TASK,
- STACK_TYPE_IRQ,
- STACK_TYPE_OVERFLOW,
- STACK_TYPE_SDEI_NORMAL,
- STACK_TYPE_SDEI_CRITICAL,
- __NR_STACK_TYPES
-};
-
-struct stack_info {
- unsigned long low;
- unsigned long high;
- enum stack_type type;
-};
+#include <asm/stacktrace/common.h>
extern void dump_backtrace(struct pt_regs *regs, struct task_struct *tsk,
const char *loglvl);
DECLARE_PER_CPU(unsigned long *, irq_stack_ptr);
-static inline bool on_stack(unsigned long sp, unsigned long size,
- unsigned long low, unsigned long high,
- enum stack_type type, struct stack_info *info)
-{
- if (!low)
- return false;
-
- if (sp < low || sp + size < sp || sp + size > high)
- return false;
-
- if (info) {
- info->low = low;
- info->high = high;
- info->type = type;
- }
- return true;
-}
-
-static inline bool on_irq_stack(unsigned long sp, unsigned long size,
- struct stack_info *info)
+static inline struct stack_info stackinfo_get_irq(void)
{
unsigned long low = (unsigned long)raw_cpu_read(irq_stack_ptr);
unsigned long high = low + IRQ_STACK_SIZE;
- return on_stack(sp, size, low, high, STACK_TYPE_IRQ, info);
+ return (struct stack_info) {
+ .low = low,
+ .high = high,
+ };
}
-static inline bool on_task_stack(const struct task_struct *tsk,
- unsigned long sp, unsigned long size,
- struct stack_info *info)
+static inline bool on_irq_stack(unsigned long sp, unsigned long size)
+{
+ struct stack_info info = stackinfo_get_irq();
+ return stackinfo_on_stack(&info, sp, size);
+}
+
+static inline struct stack_info stackinfo_get_task(const struct task_struct *tsk)
{
unsigned long low = (unsigned long)task_stack_page(tsk);
unsigned long high = low + THREAD_SIZE;
- return on_stack(sp, size, low, high, STACK_TYPE_TASK, info);
+ return (struct stack_info) {
+ .low = low,
+ .high = high,
+ };
}
-#ifdef CONFIG_VMAP_STACK
+static inline bool on_task_stack(const struct task_struct *tsk,
+ unsigned long sp, unsigned long size)
+{
+ struct stack_info info = stackinfo_get_task(tsk);
+ return stackinfo_on_stack(&info, sp, size);
+}
+
+#define on_thread_stack() (on_task_stack(current, current_stack_pointer, 1))
+
DECLARE_PER_CPU(unsigned long [OVERFLOW_STACK_SIZE/sizeof(long)], overflow_stack);
-static inline bool on_overflow_stack(unsigned long sp, unsigned long size,
- struct stack_info *info)
+static inline struct stack_info stackinfo_get_overflow(void)
{
unsigned long low = (unsigned long)raw_cpu_ptr(overflow_stack);
unsigned long high = low + OVERFLOW_STACK_SIZE;
- return on_stack(sp, size, low, high, STACK_TYPE_OVERFLOW, info);
+ return (struct stack_info) {
+ .low = low,
+ .high = high,
+ };
+}
+
+#if defined(CONFIG_ARM_SDE_INTERFACE)
+DECLARE_PER_CPU(unsigned long *, sdei_stack_normal_ptr);
+DECLARE_PER_CPU(unsigned long *, sdei_stack_critical_ptr);
+
+static inline struct stack_info stackinfo_get_sdei_normal(void)
+{
+ unsigned long low = (unsigned long)raw_cpu_read(sdei_stack_normal_ptr);
+ unsigned long high = low + SDEI_STACK_SIZE;
+
+ return (struct stack_info) {
+ .low = low,
+ .high = high,
+ };
+}
+
+static inline struct stack_info stackinfo_get_sdei_critical(void)
+{
+ unsigned long low = (unsigned long)raw_cpu_read(sdei_stack_critical_ptr);
+ unsigned long high = low + SDEI_STACK_SIZE;
+
+ return (struct stack_info) {
+ .low = low,
+ .high = high,
+ };
}
#else
-static inline bool on_overflow_stack(unsigned long sp, unsigned long size,
- struct stack_info *info) { return false; }
+#define stackinfo_get_sdei_normal() stackinfo_get_unknown()
+#define stackinfo_get_sdei_critical() stackinfo_get_unknown()
#endif
+#ifdef CONFIG_EFI
+extern u64 *efi_rt_stack_top;
-/*
- * We can only safely access per-cpu stacks from current in a non-preemptible
- * context.
- */
-static inline bool on_accessible_stack(const struct task_struct *tsk,
- unsigned long sp, unsigned long size,
- struct stack_info *info)
+static inline struct stack_info stackinfo_get_efi(void)
{
- if (info)
- info->type = STACK_TYPE_UNKNOWN;
-
- if (on_task_stack(tsk, sp, size, info))
- return true;
- if (tsk != current || preemptible())
- return false;
- if (on_irq_stack(sp, size, info))
- return true;
- if (on_overflow_stack(sp, size, info))
- return true;
- if (on_sdei_stack(sp, size, info))
- return true;
-
- return false;
+ unsigned long high = (u64)efi_rt_stack_top;
+ unsigned long low = high - THREAD_SIZE;
+
+ return (struct stack_info) {
+ .low = low,
+ .high = high,
+ };
}
+#endif
#endif /* __ASM_STACKTRACE_H */
diff --git a/arch/arm64/include/asm/stacktrace/common.h b/arch/arm64/include/asm/stacktrace/common.h
new file mode 100644
index 000000000000..821a8fdd31af
--- /dev/null
+++ b/arch/arm64/include/asm/stacktrace/common.h
@@ -0,0 +1,171 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ * Common arm64 stack unwinder code.
+ *
+ * See: arch/arm64/kernel/stacktrace.c for the reference implementation.
+ *
+ * Copyright (C) 2012 ARM Ltd.
+ */
+#ifndef __ASM_STACKTRACE_COMMON_H
+#define __ASM_STACKTRACE_COMMON_H
+
+#include <linux/types.h>
+
+struct stack_info {
+ unsigned long low;
+ unsigned long high;
+};
+
+/**
+ * struct unwind_state - state used for robust unwinding.
+ *
+ * @fp: The fp value in the frame record (or the real fp)
+ * @pc: The lr value in the frame record (or the real lr)
+ *
+ * @stack: The stack currently being unwound.
+ * @stacks: An array of stacks which can be unwound.
+ * @nr_stacks: The number of stacks in @stacks.
+ */
+struct unwind_state {
+ unsigned long fp;
+ unsigned long pc;
+
+ struct stack_info stack;
+ struct stack_info *stacks;
+ int nr_stacks;
+};
+
+static inline struct stack_info stackinfo_get_unknown(void)
+{
+ return (struct stack_info) {
+ .low = 0,
+ .high = 0,
+ };
+}
+
+static inline bool stackinfo_on_stack(const struct stack_info *info,
+ unsigned long sp, unsigned long size)
+{
+ if (!info->low)
+ return false;
+
+ if (sp < info->low || sp + size < sp || sp + size > info->high)
+ return false;
+
+ return true;
+}
+
+static inline void unwind_init_common(struct unwind_state *state)
+{
+ state->stack = stackinfo_get_unknown();
+}
+
+/**
+ * unwind_find_stack() - Find the accessible stack which entirely contains an
+ * object.
+ *
+ * @state: the current unwind state.
+ * @sp: the base address of the object.
+ * @size: the size of the object.
+ *
+ * Return: a pointer to the relevant stack_info if found; NULL otherwise.
+ */
+static struct stack_info *unwind_find_stack(struct unwind_state *state,
+ unsigned long sp,
+ unsigned long size)
+{
+ struct stack_info *info = &state->stack;
+
+ if (stackinfo_on_stack(info, sp, size))
+ return info;
+
+ for (int i = 0; i < state->nr_stacks; i++) {
+ info = &state->stacks[i];
+ if (stackinfo_on_stack(info, sp, size))
+ return info;
+ }
+
+ return NULL;
+}
+
+/**
+ * unwind_consume_stack() - Update stack boundaries so that future unwind steps
+ * cannot consume this object again.
+ *
+ * @state: the current unwind state.
+ * @info: the stack_info of the stack containing the object.
+ * @sp: the base address of the object.
+ * @size: the size of the object.
+ *
+ * Return: 0 upon success, an error code otherwise.
+ */
+static inline void unwind_consume_stack(struct unwind_state *state,
+ struct stack_info *info,
+ unsigned long sp,
+ unsigned long size)
+{
+ struct stack_info tmp;
+
+ /*
+ * Stack transitions are strictly one-way, and once we've
+ * transitioned from one stack to another, it's never valid to
+ * unwind back to the old stack.
+ *
+ * Destroy the old stack info so that it cannot be found upon a
+ * subsequent transition. If the stack has not changed, we'll
+ * immediately restore the current stack info.
+ *
+ * Note that stacks can nest in several valid orders, e.g.
+ *
+ * TASK -> IRQ -> OVERFLOW -> SDEI_NORMAL
+ * TASK -> SDEI_NORMAL -> SDEI_CRITICAL -> OVERFLOW
+ * HYP -> OVERFLOW
+ *
+ * ... so we do not check the specific order of stack
+ * transitions.
+ */
+ tmp = *info;
+ *info = stackinfo_get_unknown();
+ state->stack = tmp;
+
+ /*
+ * Future unwind steps can only consume stack above this frame record.
+ * Update the current stack to start immediately above it.
+ */
+ state->stack.low = sp + size;
+}
+
+/**
+ * unwind_next_frame_record() - Unwind to the next frame record.
+ *
+ * @state: the current unwind state.
+ *
+ * Return: 0 upon success, an error code otherwise.
+ */
+static inline int
+unwind_next_frame_record(struct unwind_state *state)
+{
+ struct stack_info *info;
+ struct frame_record *record;
+ unsigned long fp = state->fp;
+
+ if (fp & 0x7)
+ return -EINVAL;
+
+ info = unwind_find_stack(state, fp, sizeof(*record));
+ if (!info)
+ return -EINVAL;
+
+ unwind_consume_stack(state, info, fp, sizeof(*record));
+
+ /*
+ * Record this frame record's values.
+ */
+ record = (struct frame_record *)fp;
+ state->fp = READ_ONCE(record->fp);
+ state->pc = READ_ONCE(record->lr);
+
+ return 0;
+}
+
+#endif /* __ASM_STACKTRACE_COMMON_H */
diff --git a/arch/arm64/include/asm/stacktrace/frame.h b/arch/arm64/include/asm/stacktrace/frame.h
new file mode 100644
index 000000000000..0ee0f6ba0fd8
--- /dev/null
+++ b/arch/arm64/include/asm/stacktrace/frame.h
@@ -0,0 +1,48 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+#ifndef __ASM_STACKTRACE_FRAME_H
+#define __ASM_STACKTRACE_FRAME_H
+
+/*
+ * - FRAME_META_TYPE_NONE
+ *
+ * This value is reserved.
+ *
+ * - FRAME_META_TYPE_FINAL
+ *
+ * The record is the last entry on the stack.
+ * Unwinding should terminate successfully.
+ *
+ * - FRAME_META_TYPE_PT_REGS
+ *
+ * The record is embedded within a struct pt_regs, recording the registers at
+ * an arbitrary point in time.
+ * Unwinding should consume pt_regs::pc, followed by pt_regs::lr.
+ *
+ * Note: all other values are reserved and should result in unwinding
+ * terminating with an error.
+ */
+#define FRAME_META_TYPE_NONE 0
+#define FRAME_META_TYPE_FINAL 1
+#define FRAME_META_TYPE_PT_REGS 2
+
+#ifndef __ASSEMBLY__
+/*
+ * A standard AAPCS64 frame record.
+ */
+struct frame_record {
+ u64 fp;
+ u64 lr;
+};
+
+/*
+ * A metadata frame record indicating a special unwind.
+ * The record::{fp,lr} fields must be zero to indicate the presence of
+ * metadata.
+ */
+struct frame_record_meta {
+ struct frame_record record;
+ u64 type;
+};
+#endif /* __ASSEMBLY */
+
+#endif /* __ASM_STACKTRACE_FRAME_H */
diff --git a/arch/arm64/include/asm/stacktrace/nvhe.h b/arch/arm64/include/asm/stacktrace/nvhe.h
new file mode 100644
index 000000000000..171f9edef49f
--- /dev/null
+++ b/arch/arm64/include/asm/stacktrace/nvhe.h
@@ -0,0 +1,55 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ * KVM nVHE hypervisor stack tracing support.
+ *
+ * The unwinder implementation depends on the nVHE mode:
+ *
+ * 1) Non-protected nVHE mode - the host can directly access the
+ * HYP stack pages and unwind the HYP stack in EL1. This saves having
+ * to allocate shared buffers for the host to read the unwinded
+ * stacktrace.
+ *
+ * 2) pKVM (protected nVHE) mode - the host cannot directly access
+ * the HYP memory. The stack is unwinded in EL2 and dumped to a shared
+ * buffer where the host can read and print the stacktrace.
+ *
+ * Copyright (C) 2022 Google LLC
+ */
+#ifndef __ASM_STACKTRACE_NVHE_H
+#define __ASM_STACKTRACE_NVHE_H
+
+#include <asm/stacktrace/common.h>
+
+/**
+ * kvm_nvhe_unwind_init() - Start an unwind from the given nVHE HYP fp and pc
+ *
+ * @state : unwind_state to initialize
+ * @fp : frame pointer at which to start the unwinding.
+ * @pc : program counter at which to start the unwinding.
+ */
+static inline void kvm_nvhe_unwind_init(struct unwind_state *state,
+ unsigned long fp,
+ unsigned long pc)
+{
+ unwind_init_common(state);
+
+ state->fp = fp;
+ state->pc = pc;
+}
+
+#ifndef __KVM_NVHE_HYPERVISOR__
+/*
+ * Conventional (non-protected) nVHE HYP stack unwinder
+ *
+ * In non-protected mode, the unwinding is done from kernel proper context
+ * (by the host in EL1).
+ */
+
+DECLARE_KVM_NVHE_PER_CPU(unsigned long [OVERFLOW_STACK_SIZE/sizeof(long)], overflow_stack);
+DECLARE_KVM_NVHE_PER_CPU(struct kvm_nvhe_stacktrace_info, kvm_stacktrace_info);
+DECLARE_PER_CPU(unsigned long, kvm_arm_hyp_stack_base);
+
+void kvm_nvhe_dump_backtrace(unsigned long hyp_offset);
+
+#endif /* __KVM_NVHE_HYPERVISOR__ */
+#endif /* __ASM_STACKTRACE_NVHE_H */
diff --git a/arch/arm64/include/asm/stage2_pgtable.h b/arch/arm64/include/asm/stage2_pgtable.h
index fe341a6578c3..23d27623e478 100644
--- a/arch/arm64/include/asm/stage2_pgtable.h
+++ b/arch/arm64/include/asm/stage2_pgtable.h
@@ -11,13 +11,6 @@
#include <linux/pgtable.h>
/*
- * PGDIR_SHIFT determines the size a top-level page table entry can map
- * and depends on the number of levels in the page table. Compute the
- * PGDIR_SHIFT for a given number of levels.
- */
-#define pt_levels_pgdir_shift(lvls) ARM64_HW_PGTABLE_LEVEL_SHIFT(4 - (lvls))
-
-/*
* The hardware supports concatenation of up to 16 tables at stage2 entry
* level and we use the feature whenever possible, which means we resolve 4
* additional bits of address at the entry level.
@@ -28,26 +21,13 @@
* (IPA_SHIFT - 4).
*/
#define stage2_pgtable_levels(ipa) ARM64_HW_PGTABLE_LEVELS((ipa) - 4)
-#define kvm_stage2_levels(kvm) VTCR_EL2_LVLS(kvm->arch.vtcr)
-
-/* stage2_pgdir_shift() is the size mapped by top-level stage2 entry for the VM */
-#define stage2_pgdir_shift(kvm) pt_levels_pgdir_shift(kvm_stage2_levels(kvm))
-#define stage2_pgdir_size(kvm) (1ULL << stage2_pgdir_shift(kvm))
-#define stage2_pgdir_mask(kvm) ~(stage2_pgdir_size(kvm) - 1)
+#define kvm_stage2_levels(mmu) VTCR_EL2_LVLS((mmu)->vtcr)
/*
* kvm_mmmu_cache_min_pages() is the number of pages required to install
* a stage-2 translation. We pre-allocate the entry level page table at
* the VM creation.
*/
-#define kvm_mmu_cache_min_pages(kvm) (kvm_stage2_levels(kvm) - 1)
-
-static inline phys_addr_t
-stage2_pgd_addr_end(struct kvm *kvm, phys_addr_t addr, phys_addr_t end)
-{
- phys_addr_t boundary = (addr + stage2_pgdir_size(kvm)) & stage2_pgdir_mask(kvm);
-
- return (boundary - 1 < end - 1) ? boundary : end;
-}
+#define kvm_mmu_cache_min_pages(mmu) (kvm_stage2_levels(mmu) - 1)
#endif /* __ARM64_S2_PGTABLE_H_ */
diff --git a/arch/arm64/include/asm/syscall.h b/arch/arm64/include/asm/syscall.h
index 4cfe9b49709b..712daa90e643 100644
--- a/arch/arm64/include/asm/syscall.h
+++ b/arch/arm64/include/asm/syscall.h
@@ -61,6 +61,22 @@ static inline void syscall_set_return_value(struct task_struct *task,
regs->regs[0] = val;
}
+static inline void syscall_set_nr(struct task_struct *task,
+ struct pt_regs *regs,
+ int nr)
+{
+ regs->syscallno = nr;
+ if (nr == -1) {
+ /*
+ * When the syscall number is set to -1, the syscall will be
+ * skipped. In this case the syscall return value has to be
+ * set explicitly, otherwise the first syscall argument is
+ * returned as the syscall return value.
+ */
+ syscall_set_return_value(task, regs, -ENOSYS, 0);
+ }
+}
+
#define SYSCALL_MAX_ARGS 6
static inline void syscall_get_arguments(struct task_struct *task,
@@ -73,6 +89,19 @@ static inline void syscall_get_arguments(struct task_struct *task,
memcpy(args, &regs->regs[1], 5 * sizeof(args[0]));
}
+static inline void syscall_set_arguments(struct task_struct *task,
+ struct pt_regs *regs,
+ const unsigned long *args)
+{
+ memcpy(&regs->regs[0], args, 6 * sizeof(args[0]));
+ /*
+ * Also copy the first argument into orig_x0
+ * so that syscall_get_arguments() would return it
+ * instead of the previous value.
+ */
+ regs->orig_x0 = regs->regs[0];
+}
+
/*
* We don't care about endianness (__AUDIT_ARCH_LE bit) here because
* AArch64 has the same system calls both on little- and big- endian.
@@ -85,4 +114,7 @@ static inline int syscall_get_arch(struct task_struct *task)
return AUDIT_ARCH_AARCH64;
}
+int syscall_trace_enter(struct pt_regs *regs);
+void syscall_trace_exit(struct pt_regs *regs);
+
#endif /* __ASM_SYSCALL_H */
diff --git a/arch/arm64/include/asm/syscall_wrapper.h b/arch/arm64/include/asm/syscall_wrapper.h
index b383b4802a7b..abb57bc54305 100644
--- a/arch/arm64/include/asm/syscall_wrapper.h
+++ b/arch/arm64/include/asm/syscall_wrapper.h
@@ -8,7 +8,7 @@
#ifndef __ASM_SYSCALL_WRAPPER_H
#define __ASM_SYSCALL_WRAPPER_H
-struct pt_regs;
+#include <asm/ptrace.h>
#define SC_ARM64_REGS_TO_ARGS(x, ...) \
__MAP(x,__SC_ARGS \
@@ -38,14 +38,12 @@ struct pt_regs;
asmlinkage long __arm64_compat_sys_##sname(const struct pt_regs *__unused)
#define COND_SYSCALL_COMPAT(name) \
+ asmlinkage long __arm64_compat_sys_##name(const struct pt_regs *regs); \
asmlinkage long __weak __arm64_compat_sys_##name(const struct pt_regs *regs) \
{ \
return sys_ni_syscall(); \
}
-#define COMPAT_SYS_NI(name) \
- SYSCALL_ALIAS(__arm64_compat_sys_##name, sys_ni_posix_timers);
-
#endif /* CONFIG_COMPAT */
#define __SYSCALL_DEFINEx(x, name, ...) \
@@ -73,11 +71,12 @@ struct pt_regs;
asmlinkage long __arm64_sys_##sname(const struct pt_regs *__unused)
#define COND_SYSCALL(name) \
+ asmlinkage long __arm64_sys_##name(const struct pt_regs *regs); \
asmlinkage long __weak __arm64_sys_##name(const struct pt_regs *regs) \
{ \
return sys_ni_syscall(); \
}
-#define SYS_NI(name) SYSCALL_ALIAS(__arm64_sys_##name, sys_ni_posix_timers);
+asmlinkage long __arm64_sys_ni_syscall(const struct pt_regs *__unused);
#endif /* __ASM_SYSCALL_WRAPPER_H */
diff --git a/arch/arm64/include/asm/sysreg.h b/arch/arm64/include/asm/sysreg.h
index 7c71358d44c4..6604fd6f33f4 100644
--- a/arch/arm64/include/asm/sysreg.h
+++ b/arch/arm64/include/asm/sysreg.h
@@ -12,6 +12,7 @@
#include <linux/bits.h>
#include <linux/stringify.h>
#include <linux/kasan-tags.h>
+#include <linux/kconfig.h>
#include <asm/gpr-num.h>
@@ -90,29 +91,80 @@
*/
#define pstate_field(op1, op2) ((op1) << Op1_shift | (op2) << Op2_shift)
#define PSTATE_Imm_shift CRm_shift
+#define SET_PSTATE(x, r) __emit_inst(0xd500401f | PSTATE_ ## r | ((!!x) << PSTATE_Imm_shift))
#define PSTATE_PAN pstate_field(0, 4)
#define PSTATE_UAO pstate_field(0, 3)
#define PSTATE_SSBS pstate_field(3, 1)
+#define PSTATE_DIT pstate_field(3, 2)
#define PSTATE_TCO pstate_field(3, 4)
-#define SET_PSTATE_PAN(x) __emit_inst(0xd500401f | PSTATE_PAN | ((!!x) << PSTATE_Imm_shift))
-#define SET_PSTATE_UAO(x) __emit_inst(0xd500401f | PSTATE_UAO | ((!!x) << PSTATE_Imm_shift))
-#define SET_PSTATE_SSBS(x) __emit_inst(0xd500401f | PSTATE_SSBS | ((!!x) << PSTATE_Imm_shift))
-#define SET_PSTATE_TCO(x) __emit_inst(0xd500401f | PSTATE_TCO | ((!!x) << PSTATE_Imm_shift))
+#define SET_PSTATE_PAN(x) SET_PSTATE((x), PAN)
+#define SET_PSTATE_UAO(x) SET_PSTATE((x), UAO)
+#define SET_PSTATE_SSBS(x) SET_PSTATE((x), SSBS)
+#define SET_PSTATE_DIT(x) SET_PSTATE((x), DIT)
+#define SET_PSTATE_TCO(x) SET_PSTATE((x), TCO)
#define set_pstate_pan(x) asm volatile(SET_PSTATE_PAN(x))
#define set_pstate_uao(x) asm volatile(SET_PSTATE_UAO(x))
#define set_pstate_ssbs(x) asm volatile(SET_PSTATE_SSBS(x))
+#define set_pstate_dit(x) asm volatile(SET_PSTATE_DIT(x))
-#define __SYS_BARRIER_INSN(CRm, op2, Rt) \
- __emit_inst(0xd5000000 | sys_insn(0, 3, 3, (CRm), (op2)) | ((Rt) & 0x1f))
+/* Register-based PAN access, for save/restore purposes */
+#define SYS_PSTATE_PAN sys_reg(3, 0, 4, 2, 3)
-#define SB_BARRIER_INSN __SYS_BARRIER_INSN(0, 7, 31)
+#define __SYS_BARRIER_INSN(op0, op1, CRn, CRm, op2, Rt) \
+ __emit_inst(0xd5000000 | \
+ sys_insn((op0), (op1), (CRn), (CRm), (op2)) | \
+ ((Rt) & 0x1f))
+#define SB_BARRIER_INSN __SYS_BARRIER_INSN(0, 3, 3, 0, 7, 31)
+#define GSB_SYS_BARRIER_INSN __SYS_BARRIER_INSN(1, 0, 12, 0, 0, 31)
+#define GSB_ACK_BARRIER_INSN __SYS_BARRIER_INSN(1, 0, 12, 0, 1, 31)
+
+/* Data cache zero operations */
#define SYS_DC_ISW sys_insn(1, 0, 7, 6, 2)
+#define SYS_DC_IGSW sys_insn(1, 0, 7, 6, 4)
+#define SYS_DC_IGDSW sys_insn(1, 0, 7, 6, 6)
#define SYS_DC_CSW sys_insn(1, 0, 7, 10, 2)
+#define SYS_DC_CGSW sys_insn(1, 0, 7, 10, 4)
+#define SYS_DC_CGDSW sys_insn(1, 0, 7, 10, 6)
#define SYS_DC_CISW sys_insn(1, 0, 7, 14, 2)
+#define SYS_DC_CIGSW sys_insn(1, 0, 7, 14, 4)
+#define SYS_DC_CIGDSW sys_insn(1, 0, 7, 14, 6)
+
+#define SYS_IC_IALLUIS sys_insn(1, 0, 7, 1, 0)
+#define SYS_IC_IALLU sys_insn(1, 0, 7, 5, 0)
+#define SYS_IC_IVAU sys_insn(1, 3, 7, 5, 1)
+
+#define SYS_DC_IVAC sys_insn(1, 0, 7, 6, 1)
+#define SYS_DC_IGVAC sys_insn(1, 0, 7, 6, 3)
+#define SYS_DC_IGDVAC sys_insn(1, 0, 7, 6, 5)
+
+#define SYS_DC_CVAC sys_insn(1, 3, 7, 10, 1)
+#define SYS_DC_CGVAC sys_insn(1, 3, 7, 10, 3)
+#define SYS_DC_CGDVAC sys_insn(1, 3, 7, 10, 5)
+
+#define SYS_DC_CVAU sys_insn(1, 3, 7, 11, 1)
+
+#define SYS_DC_CVAP sys_insn(1, 3, 7, 12, 1)
+#define SYS_DC_CGVAP sys_insn(1, 3, 7, 12, 3)
+#define SYS_DC_CGDVAP sys_insn(1, 3, 7, 12, 5)
+
+#define SYS_DC_CVADP sys_insn(1, 3, 7, 13, 1)
+#define SYS_DC_CGVADP sys_insn(1, 3, 7, 13, 3)
+#define SYS_DC_CGDVADP sys_insn(1, 3, 7, 13, 5)
+
+#define SYS_DC_CIVAC sys_insn(1, 3, 7, 14, 1)
+#define SYS_DC_CIGVAC sys_insn(1, 3, 7, 14, 3)
+#define SYS_DC_CIGDVAC sys_insn(1, 3, 7, 14, 5)
+
+#define SYS_DC_ZVA sys_insn(1, 3, 7, 4, 1)
+#define SYS_DC_GVA sys_insn(1, 3, 7, 4, 3)
+#define SYS_DC_GZVA sys_insn(1, 3, 7, 4, 4)
+
+#define SYS_DC_CIVAPS sys_insn(1, 0, 7, 15, 1)
+#define SYS_DC_CIGDVAPS sys_insn(1, 0, 7, 15, 5)
/*
* Automatically generated definitions for system registers, the
@@ -130,25 +182,17 @@
#define SYS_SVCR_SMSTART_SM_EL0 sys_reg(0, 3, 4, 3, 3)
#define SYS_SVCR_SMSTOP_SMZA_EL0 sys_reg(0, 3, 4, 6, 3)
-#define SYS_OSDTRRX_EL1 sys_reg(2, 0, 0, 0, 2)
-#define SYS_MDCCINT_EL1 sys_reg(2, 0, 0, 2, 0)
-#define SYS_MDSCR_EL1 sys_reg(2, 0, 0, 2, 2)
-#define SYS_OSDTRTX_EL1 sys_reg(2, 0, 0, 3, 2)
-#define SYS_OSECCR_EL1 sys_reg(2, 0, 0, 6, 2)
#define SYS_DBGBVRn_EL1(n) sys_reg(2, 0, 0, n, 4)
#define SYS_DBGBCRn_EL1(n) sys_reg(2, 0, 0, n, 5)
#define SYS_DBGWVRn_EL1(n) sys_reg(2, 0, 0, n, 6)
#define SYS_DBGWCRn_EL1(n) sys_reg(2, 0, 0, n, 7)
#define SYS_MDRAR_EL1 sys_reg(2, 0, 1, 0, 0)
-#define SYS_OSLAR_EL1 sys_reg(2, 0, 1, 0, 4)
-#define SYS_OSLAR_OSLK BIT(0)
-
#define SYS_OSLSR_EL1 sys_reg(2, 0, 1, 1, 4)
-#define SYS_OSLSR_OSLM_MASK (BIT(3) | BIT(0))
-#define SYS_OSLSR_OSLM_NI 0
-#define SYS_OSLSR_OSLM_IMPLEMENTED BIT(3)
-#define SYS_OSLSR_OSLK BIT(1)
+#define OSLSR_EL1_OSLM_MASK (BIT(3) | BIT(0))
+#define OSLSR_EL1_OSLM_NI 0
+#define OSLSR_EL1_OSLM_IMPLEMENTED BIT(3)
+#define OSLSR_EL1_OSLK BIT(1)
#define SYS_OSDLR_EL1 sys_reg(2, 0, 1, 3, 4)
#define SYS_DBGPRCR_EL1 sys_reg(2, 0, 1, 4, 4)
@@ -161,54 +205,82 @@
#define SYS_DBGDTRTX_EL0 sys_reg(2, 3, 0, 5, 0)
#define SYS_DBGVCR32_EL2 sys_reg(2, 4, 0, 7, 0)
+#define SYS_BRBINF_EL1(n) sys_reg(2, 1, 8, (n & 15), (((n & 16) >> 2) | 0))
+#define SYS_BRBSRC_EL1(n) sys_reg(2, 1, 8, (n & 15), (((n & 16) >> 2) | 1))
+#define SYS_BRBTGT_EL1(n) sys_reg(2, 1, 8, (n & 15), (((n & 16) >> 2) | 2))
+
+#define SYS_TRCITECR_EL1 sys_reg(3, 0, 1, 2, 3)
+#define SYS_TRCACATR(m) sys_reg(2, 1, 2, ((m & 7) << 1), (2 | (m >> 3)))
+#define SYS_TRCACVR(m) sys_reg(2, 1, 2, ((m & 7) << 1), (0 | (m >> 3)))
+#define SYS_TRCAUTHSTATUS sys_reg(2, 1, 7, 14, 6)
+#define SYS_TRCAUXCTLR sys_reg(2, 1, 0, 6, 0)
+#define SYS_TRCBBCTLR sys_reg(2, 1, 0, 15, 0)
+#define SYS_TRCCCCTLR sys_reg(2, 1, 0, 14, 0)
+#define SYS_TRCCIDCCTLR0 sys_reg(2, 1, 3, 0, 2)
+#define SYS_TRCCIDCCTLR1 sys_reg(2, 1, 3, 1, 2)
+#define SYS_TRCCIDCVR(m) sys_reg(2, 1, 3, ((m & 7) << 1), 0)
+#define SYS_TRCCLAIMCLR sys_reg(2, 1, 7, 9, 6)
+#define SYS_TRCCLAIMSET sys_reg(2, 1, 7, 8, 6)
+#define SYS_TRCCNTCTLR(m) sys_reg(2, 1, 0, (4 | (m & 3)), 5)
+#define SYS_TRCCNTRLDVR(m) sys_reg(2, 1, 0, (0 | (m & 3)), 5)
+#define SYS_TRCCNTVR(m) sys_reg(2, 1, 0, (8 | (m & 3)), 5)
+#define SYS_TRCCONFIGR sys_reg(2, 1, 0, 4, 0)
+#define SYS_TRCDEVARCH sys_reg(2, 1, 7, 15, 6)
+#define SYS_TRCDEVID sys_reg(2, 1, 7, 2, 7)
+#define SYS_TRCEVENTCTL0R sys_reg(2, 1, 0, 8, 0)
+#define SYS_TRCEVENTCTL1R sys_reg(2, 1, 0, 9, 0)
+#define SYS_TRCEXTINSELR(m) sys_reg(2, 1, 0, (8 | (m & 3)), 4)
+#define SYS_TRCIDR0 sys_reg(2, 1, 0, 8, 7)
+#define SYS_TRCIDR10 sys_reg(2, 1, 0, 2, 6)
+#define SYS_TRCIDR11 sys_reg(2, 1, 0, 3, 6)
+#define SYS_TRCIDR12 sys_reg(2, 1, 0, 4, 6)
+#define SYS_TRCIDR13 sys_reg(2, 1, 0, 5, 6)
+#define SYS_TRCIDR1 sys_reg(2, 1, 0, 9, 7)
+#define SYS_TRCIDR2 sys_reg(2, 1, 0, 10, 7)
+#define SYS_TRCIDR3 sys_reg(2, 1, 0, 11, 7)
+#define SYS_TRCIDR4 sys_reg(2, 1, 0, 12, 7)
+#define SYS_TRCIDR5 sys_reg(2, 1, 0, 13, 7)
+#define SYS_TRCIDR6 sys_reg(2, 1, 0, 14, 7)
+#define SYS_TRCIDR7 sys_reg(2, 1, 0, 15, 7)
+#define SYS_TRCIDR8 sys_reg(2, 1, 0, 0, 6)
+#define SYS_TRCIDR9 sys_reg(2, 1, 0, 1, 6)
+#define SYS_TRCIMSPEC(m) sys_reg(2, 1, 0, (m & 7), 7)
+#define SYS_TRCITEEDCR sys_reg(2, 1, 0, 2, 1)
+#define SYS_TRCOSLSR sys_reg(2, 1, 1, 1, 4)
+#define SYS_TRCPRGCTLR sys_reg(2, 1, 0, 1, 0)
+#define SYS_TRCQCTLR sys_reg(2, 1, 0, 1, 1)
+#define SYS_TRCRSCTLR(m) sys_reg(2, 1, 1, (m & 15), (0 | (m >> 4)))
+#define SYS_TRCRSR sys_reg(2, 1, 0, 10, 0)
+#define SYS_TRCSEQEVR(m) sys_reg(2, 1, 0, (m & 3), 4)
+#define SYS_TRCSEQRSTEVR sys_reg(2, 1, 0, 6, 4)
+#define SYS_TRCSEQSTR sys_reg(2, 1, 0, 7, 4)
+#define SYS_TRCSSCCR(m) sys_reg(2, 1, 1, (m & 7), 2)
+#define SYS_TRCSSCSR(m) sys_reg(2, 1, 1, (8 | (m & 7)), 2)
+#define SYS_TRCSSPCICR(m) sys_reg(2, 1, 1, (m & 7), 3)
+#define SYS_TRCSTALLCTLR sys_reg(2, 1, 0, 11, 0)
+#define SYS_TRCSTATR sys_reg(2, 1, 0, 3, 0)
+#define SYS_TRCSYNCPR sys_reg(2, 1, 0, 13, 0)
+#define SYS_TRCTRACEIDR sys_reg(2, 1, 0, 0, 1)
+#define SYS_TRCTSCTLR sys_reg(2, 1, 0, 12, 0)
+#define SYS_TRCVICTLR sys_reg(2, 1, 0, 0, 2)
+#define SYS_TRCVIIECTLR sys_reg(2, 1, 0, 1, 2)
+#define SYS_TRCVIPCSSCTLR sys_reg(2, 1, 0, 3, 2)
+#define SYS_TRCVISSCTLR sys_reg(2, 1, 0, 2, 2)
+#define SYS_TRCVMIDCCTLR0 sys_reg(2, 1, 3, 2, 2)
+#define SYS_TRCVMIDCCTLR1 sys_reg(2, 1, 3, 3, 2)
+#define SYS_TRCVMIDCVR(m) sys_reg(2, 1, 3, ((m & 7) << 1), 1)
+
+/* ETM */
+#define SYS_TRCOSLAR sys_reg(2, 1, 1, 0, 4)
+
#define SYS_MIDR_EL1 sys_reg(3, 0, 0, 0, 0)
#define SYS_MPIDR_EL1 sys_reg(3, 0, 0, 0, 5)
#define SYS_REVIDR_EL1 sys_reg(3, 0, 0, 0, 6)
-#define SYS_ID_PFR0_EL1 sys_reg(3, 0, 0, 1, 0)
-#define SYS_ID_PFR1_EL1 sys_reg(3, 0, 0, 1, 1)
-#define SYS_ID_PFR2_EL1 sys_reg(3, 0, 0, 3, 4)
-#define SYS_ID_DFR0_EL1 sys_reg(3, 0, 0, 1, 2)
-#define SYS_ID_DFR1_EL1 sys_reg(3, 0, 0, 3, 5)
-#define SYS_ID_AFR0_EL1 sys_reg(3, 0, 0, 1, 3)
-#define SYS_ID_MMFR0_EL1 sys_reg(3, 0, 0, 1, 4)
-#define SYS_ID_MMFR1_EL1 sys_reg(3, 0, 0, 1, 5)
-#define SYS_ID_MMFR2_EL1 sys_reg(3, 0, 0, 1, 6)
-#define SYS_ID_MMFR3_EL1 sys_reg(3, 0, 0, 1, 7)
-#define SYS_ID_MMFR4_EL1 sys_reg(3, 0, 0, 2, 6)
-#define SYS_ID_MMFR5_EL1 sys_reg(3, 0, 0, 3, 6)
-
-#define SYS_ID_ISAR0_EL1 sys_reg(3, 0, 0, 2, 0)
-#define SYS_ID_ISAR1_EL1 sys_reg(3, 0, 0, 2, 1)
-#define SYS_ID_ISAR2_EL1 sys_reg(3, 0, 0, 2, 2)
-#define SYS_ID_ISAR3_EL1 sys_reg(3, 0, 0, 2, 3)
-#define SYS_ID_ISAR4_EL1 sys_reg(3, 0, 0, 2, 4)
-#define SYS_ID_ISAR5_EL1 sys_reg(3, 0, 0, 2, 5)
-#define SYS_ID_ISAR6_EL1 sys_reg(3, 0, 0, 2, 7)
-
-#define SYS_MVFR0_EL1 sys_reg(3, 0, 0, 3, 0)
-#define SYS_MVFR1_EL1 sys_reg(3, 0, 0, 3, 1)
-#define SYS_MVFR2_EL1 sys_reg(3, 0, 0, 3, 2)
-
-#define SYS_ID_AA64PFR0_EL1 sys_reg(3, 0, 0, 4, 0)
-#define SYS_ID_AA64PFR1_EL1 sys_reg(3, 0, 0, 4, 1)
-
-#define SYS_ID_AA64DFR0_EL1 sys_reg(3, 0, 0, 5, 0)
-#define SYS_ID_AA64DFR1_EL1 sys_reg(3, 0, 0, 5, 1)
-
-#define SYS_ID_AA64AFR0_EL1 sys_reg(3, 0, 0, 5, 4)
-#define SYS_ID_AA64AFR1_EL1 sys_reg(3, 0, 0, 5, 5)
-
-#define SYS_ID_AA64MMFR0_EL1 sys_reg(3, 0, 0, 7, 0)
-#define SYS_ID_AA64MMFR1_EL1 sys_reg(3, 0, 0, 7, 1)
-#define SYS_ID_AA64MMFR2_EL1 sys_reg(3, 0, 0, 7, 2)
-
#define SYS_ACTLR_EL1 sys_reg(3, 0, 1, 0, 1)
#define SYS_RGSR_EL1 sys_reg(3, 0, 1, 0, 5)
#define SYS_GCR_EL1 sys_reg(3, 0, 1, 0, 6)
-#define SYS_TRFCR_EL1 sys_reg(3, 0, 1, 2, 1)
-
#define SYS_TCR_EL1 sys_reg(3, 0, 2, 0, 2)
#define SYS_APIAKEYLO_EL1 sys_reg(3, 0, 2, 1, 0)
@@ -239,163 +311,61 @@
#define SYS_ERXCTLR_EL1 sys_reg(3, 0, 5, 4, 1)
#define SYS_ERXSTATUS_EL1 sys_reg(3, 0, 5, 4, 2)
#define SYS_ERXADDR_EL1 sys_reg(3, 0, 5, 4, 3)
+#define SYS_ERXPFGF_EL1 sys_reg(3, 0, 5, 4, 4)
+#define SYS_ERXPFGCTL_EL1 sys_reg(3, 0, 5, 4, 5)
+#define SYS_ERXPFGCDN_EL1 sys_reg(3, 0, 5, 4, 6)
#define SYS_ERXMISC0_EL1 sys_reg(3, 0, 5, 5, 0)
#define SYS_ERXMISC1_EL1 sys_reg(3, 0, 5, 5, 1)
+#define SYS_ERXMISC2_EL1 sys_reg(3, 0, 5, 5, 2)
+#define SYS_ERXMISC3_EL1 sys_reg(3, 0, 5, 5, 3)
#define SYS_TFSR_EL1 sys_reg(3, 0, 5, 6, 0)
#define SYS_TFSRE0_EL1 sys_reg(3, 0, 5, 6, 1)
#define SYS_PAR_EL1 sys_reg(3, 0, 7, 4, 0)
#define SYS_PAR_EL1_F BIT(0)
+/* When PAR_EL1.F == 1 */
#define SYS_PAR_EL1_FST GENMASK(6, 1)
+#define SYS_PAR_EL1_PTW BIT(8)
+#define SYS_PAR_EL1_S BIT(9)
+#define SYS_PAR_EL1_AssuredOnly BIT(12)
+#define SYS_PAR_EL1_TopLevel BIT(13)
+#define SYS_PAR_EL1_Overlay BIT(14)
+#define SYS_PAR_EL1_DirtyBit BIT(15)
+#define SYS_PAR_EL1_F1_IMPDEF GENMASK_ULL(63, 48)
+#define SYS_PAR_EL1_F1_RES0 (BIT(7) | BIT(10) | GENMASK_ULL(47, 16))
+#define SYS_PAR_EL1_RES1 BIT(11)
+/* When PAR_EL1.F == 0 */
+#define SYS_PAR_EL1_SH GENMASK_ULL(8, 7)
+#define SYS_PAR_EL1_NS BIT(9)
+#define SYS_PAR_EL1_F0_IMPDEF BIT(10)
+#define SYS_PAR_EL1_NSE BIT(11)
+#define SYS_PAR_EL1_PA GENMASK_ULL(51, 12)
+#define SYS_PAR_EL1_ATTR GENMASK_ULL(63, 56)
+#define SYS_PAR_EL1_F0_RES0 (GENMASK_ULL(6, 1) | GENMASK_ULL(55, 52))
/*** Statistical Profiling Extension ***/
-/* ID registers */
-#define SYS_PMSIDR_EL1 sys_reg(3, 0, 9, 9, 7)
-#define SYS_PMSIDR_EL1_FE_SHIFT 0
-#define SYS_PMSIDR_EL1_FT_SHIFT 1
-#define SYS_PMSIDR_EL1_FL_SHIFT 2
-#define SYS_PMSIDR_EL1_ARCHINST_SHIFT 3
-#define SYS_PMSIDR_EL1_LDS_SHIFT 4
-#define SYS_PMSIDR_EL1_ERND_SHIFT 5
-#define SYS_PMSIDR_EL1_INTERVAL_SHIFT 8
-#define SYS_PMSIDR_EL1_INTERVAL_MASK 0xfUL
-#define SYS_PMSIDR_EL1_MAXSIZE_SHIFT 12
-#define SYS_PMSIDR_EL1_MAXSIZE_MASK 0xfUL
-#define SYS_PMSIDR_EL1_COUNTSIZE_SHIFT 16
-#define SYS_PMSIDR_EL1_COUNTSIZE_MASK 0xfUL
-
-#define SYS_PMBIDR_EL1 sys_reg(3, 0, 9, 10, 7)
-#define SYS_PMBIDR_EL1_ALIGN_SHIFT 0
-#define SYS_PMBIDR_EL1_ALIGN_MASK 0xfU
-#define SYS_PMBIDR_EL1_P_SHIFT 4
-#define SYS_PMBIDR_EL1_F_SHIFT 5
-
-/* Sampling controls */
-#define SYS_PMSCR_EL1 sys_reg(3, 0, 9, 9, 0)
-#define SYS_PMSCR_EL1_E0SPE_SHIFT 0
-#define SYS_PMSCR_EL1_E1SPE_SHIFT 1
-#define SYS_PMSCR_EL1_CX_SHIFT 3
-#define SYS_PMSCR_EL1_PA_SHIFT 4
-#define SYS_PMSCR_EL1_TS_SHIFT 5
-#define SYS_PMSCR_EL1_PCT_SHIFT 6
-
-#define SYS_PMSCR_EL2 sys_reg(3, 4, 9, 9, 0)
-#define SYS_PMSCR_EL2_E0HSPE_SHIFT 0
-#define SYS_PMSCR_EL2_E2SPE_SHIFT 1
-#define SYS_PMSCR_EL2_CX_SHIFT 3
-#define SYS_PMSCR_EL2_PA_SHIFT 4
-#define SYS_PMSCR_EL2_TS_SHIFT 5
-#define SYS_PMSCR_EL2_PCT_SHIFT 6
-
-#define SYS_PMSICR_EL1 sys_reg(3, 0, 9, 9, 2)
-
-#define SYS_PMSIRR_EL1 sys_reg(3, 0, 9, 9, 3)
-#define SYS_PMSIRR_EL1_RND_SHIFT 0
-#define SYS_PMSIRR_EL1_INTERVAL_SHIFT 8
-#define SYS_PMSIRR_EL1_INTERVAL_MASK 0xffffffUL
-
-/* Filtering controls */
-#define SYS_PMSNEVFR_EL1 sys_reg(3, 0, 9, 9, 1)
-
-#define SYS_PMSFCR_EL1 sys_reg(3, 0, 9, 9, 4)
-#define SYS_PMSFCR_EL1_FE_SHIFT 0
-#define SYS_PMSFCR_EL1_FT_SHIFT 1
-#define SYS_PMSFCR_EL1_FL_SHIFT 2
-#define SYS_PMSFCR_EL1_B_SHIFT 16
-#define SYS_PMSFCR_EL1_LD_SHIFT 17
-#define SYS_PMSFCR_EL1_ST_SHIFT 18
-
-#define SYS_PMSEVFR_EL1 sys_reg(3, 0, 9, 9, 5)
-#define SYS_PMSEVFR_EL1_RES0_8_2 \
+#define PMSEVFR_EL1_RES0_IMP \
(GENMASK_ULL(47, 32) | GENMASK_ULL(23, 16) | GENMASK_ULL(11, 8) |\
BIT_ULL(6) | BIT_ULL(4) | BIT_ULL(2) | BIT_ULL(0))
-#define SYS_PMSEVFR_EL1_RES0_8_3 \
- (SYS_PMSEVFR_EL1_RES0_8_2 & ~(BIT_ULL(18) | BIT_ULL(17) | BIT_ULL(11)))
-
-#define SYS_PMSLATFR_EL1 sys_reg(3, 0, 9, 9, 6)
-#define SYS_PMSLATFR_EL1_MINLAT_SHIFT 0
-
-/* Buffer controls */
-#define SYS_PMBLIMITR_EL1 sys_reg(3, 0, 9, 10, 0)
-#define SYS_PMBLIMITR_EL1_E_SHIFT 0
-#define SYS_PMBLIMITR_EL1_FM_SHIFT 1
-#define SYS_PMBLIMITR_EL1_FM_MASK 0x3UL
-#define SYS_PMBLIMITR_EL1_FM_STOP_IRQ (0 << SYS_PMBLIMITR_EL1_FM_SHIFT)
-
-#define SYS_PMBPTR_EL1 sys_reg(3, 0, 9, 10, 1)
+#define PMSEVFR_EL1_RES0_V1P1 \
+ (PMSEVFR_EL1_RES0_IMP & ~(BIT_ULL(18) | BIT_ULL(17) | BIT_ULL(11)))
+#define PMSEVFR_EL1_RES0_V1P2 \
+ (PMSEVFR_EL1_RES0_V1P1 & ~BIT_ULL(6))
/* Buffer error reporting */
-#define SYS_PMBSR_EL1 sys_reg(3, 0, 9, 10, 3)
-#define SYS_PMBSR_EL1_COLL_SHIFT 16
-#define SYS_PMBSR_EL1_S_SHIFT 17
-#define SYS_PMBSR_EL1_EA_SHIFT 18
-#define SYS_PMBSR_EL1_DL_SHIFT 19
-#define SYS_PMBSR_EL1_EC_SHIFT 26
-#define SYS_PMBSR_EL1_EC_MASK 0x3fUL
-
-#define SYS_PMBSR_EL1_EC_BUF (0x0UL << SYS_PMBSR_EL1_EC_SHIFT)
-#define SYS_PMBSR_EL1_EC_FAULT_S1 (0x24UL << SYS_PMBSR_EL1_EC_SHIFT)
-#define SYS_PMBSR_EL1_EC_FAULT_S2 (0x25UL << SYS_PMBSR_EL1_EC_SHIFT)
+#define PMBSR_EL1_FAULT_FSC_SHIFT PMBSR_EL1_MSS_SHIFT
+#define PMBSR_EL1_FAULT_FSC_MASK PMBSR_EL1_MSS_MASK
-#define SYS_PMBSR_EL1_FAULT_FSC_SHIFT 0
-#define SYS_PMBSR_EL1_FAULT_FSC_MASK 0x3fUL
+#define PMBSR_EL1_BUF_BSC_SHIFT PMBSR_EL1_MSS_SHIFT
+#define PMBSR_EL1_BUF_BSC_MASK PMBSR_EL1_MSS_MASK
-#define SYS_PMBSR_EL1_BUF_BSC_SHIFT 0
-#define SYS_PMBSR_EL1_BUF_BSC_MASK 0x3fUL
-
-#define SYS_PMBSR_EL1_BUF_BSC_FULL (0x1UL << SYS_PMBSR_EL1_BUF_BSC_SHIFT)
+#define PMBSR_EL1_BUF_BSC_FULL 0x1UL
/*** End of Statistical Profiling Extension ***/
-/*
- * TRBE Registers
- */
-#define SYS_TRBLIMITR_EL1 sys_reg(3, 0, 9, 11, 0)
-#define SYS_TRBPTR_EL1 sys_reg(3, 0, 9, 11, 1)
-#define SYS_TRBBASER_EL1 sys_reg(3, 0, 9, 11, 2)
-#define SYS_TRBSR_EL1 sys_reg(3, 0, 9, 11, 3)
-#define SYS_TRBMAR_EL1 sys_reg(3, 0, 9, 11, 4)
-#define SYS_TRBTRG_EL1 sys_reg(3, 0, 9, 11, 6)
-#define SYS_TRBIDR_EL1 sys_reg(3, 0, 9, 11, 7)
-
-#define TRBLIMITR_LIMIT_MASK GENMASK_ULL(51, 0)
-#define TRBLIMITR_LIMIT_SHIFT 12
-#define TRBLIMITR_NVM BIT(5)
-#define TRBLIMITR_TRIG_MODE_MASK GENMASK(1, 0)
-#define TRBLIMITR_TRIG_MODE_SHIFT 3
-#define TRBLIMITR_FILL_MODE_MASK GENMASK(1, 0)
-#define TRBLIMITR_FILL_MODE_SHIFT 1
-#define TRBLIMITR_ENABLE BIT(0)
-#define TRBPTR_PTR_MASK GENMASK_ULL(63, 0)
-#define TRBPTR_PTR_SHIFT 0
-#define TRBBASER_BASE_MASK GENMASK_ULL(51, 0)
-#define TRBBASER_BASE_SHIFT 12
-#define TRBSR_EC_MASK GENMASK(5, 0)
-#define TRBSR_EC_SHIFT 26
-#define TRBSR_IRQ BIT(22)
-#define TRBSR_TRG BIT(21)
-#define TRBSR_WRAP BIT(20)
-#define TRBSR_ABORT BIT(18)
-#define TRBSR_STOP BIT(17)
-#define TRBSR_MSS_MASK GENMASK(15, 0)
-#define TRBSR_MSS_SHIFT 0
-#define TRBSR_BSC_MASK GENMASK(5, 0)
-#define TRBSR_BSC_SHIFT 0
-#define TRBSR_FSC_MASK GENMASK(5, 0)
-#define TRBSR_FSC_SHIFT 0
-#define TRBMAR_SHARE_MASK GENMASK(1, 0)
-#define TRBMAR_SHARE_SHIFT 8
-#define TRBMAR_OUTER_MASK GENMASK(3, 0)
-#define TRBMAR_OUTER_SHIFT 4
-#define TRBMAR_INNER_MASK GENMASK(3, 0)
-#define TRBMAR_INNER_SHIFT 0
-#define TRBTRG_TRG_MASK GENMASK(31, 0)
-#define TRBTRG_TRG_SHIFT 0
-#define TRBIDR_FLAG BIT(5)
-#define TRBIDR_PROG BIT(4)
-#define TRBIDR_ALIGN_MASK GENMASK(3, 0)
-#define TRBIDR_ALIGN_SHIFT 0
+#define TRBSR_EL1_BSC_MASK GENMASK(5, 0)
+#define TRBSR_EL1_BSC_SHIFT 0
#define SYS_PMINTENSET_EL1 sys_reg(3, 0, 9, 14, 1)
#define SYS_PMINTENCLR_EL1 sys_reg(3, 0, 9, 14, 2)
@@ -436,19 +406,12 @@
#define SYS_ICC_IGRPEN0_EL1 sys_reg(3, 0, 12, 12, 6)
#define SYS_ICC_IGRPEN1_EL1 sys_reg(3, 0, 12, 12, 7)
-#define SYS_TPIDR_EL1 sys_reg(3, 0, 13, 0, 4)
-
-#define SYS_SCXTNUM_EL1 sys_reg(3, 0, 13, 0, 7)
+#define SYS_ACCDATA_EL1 sys_reg(3, 0, 13, 0, 5)
#define SYS_CNTKCTL_EL1 sys_reg(3, 0, 14, 1, 0)
-#define SYS_CCSIDR_EL1 sys_reg(3, 1, 0, 0, 0)
#define SYS_AIDR_EL1 sys_reg(3, 1, 0, 0, 7)
-#define SMIDR_EL1_IMPLEMENTER_SHIFT 24
-#define SMIDR_EL1_SMPS_SHIFT 15
-#define SMIDR_EL1_AFFINITY_SHIFT 0
-
#define SYS_RNDR_EL0 sys_reg(3, 3, 2, 4, 0)
#define SYS_RNDRRS_EL0 sys_reg(3, 3, 2, 4, 1)
@@ -457,7 +420,6 @@
#define SYS_PMCNTENCLR_EL0 sys_reg(3, 3, 9, 12, 2)
#define SYS_PMOVSCLR_EL0 sys_reg(3, 3, 9, 12, 3)
#define SYS_PMSWINC_EL0 sys_reg(3, 3, 9, 12, 4)
-#define SYS_PMSELR_EL0 sys_reg(3, 3, 9, 12, 5)
#define SYS_PMCEID0_EL0 sys_reg(3, 3, 9, 12, 6)
#define SYS_PMCEID1_EL0 sys_reg(3, 3, 9, 12, 7)
#define SYS_PMCCNTR_EL0 sys_reg(3, 3, 9, 13, 0)
@@ -510,6 +472,8 @@
#define SYS_CNTFRQ_EL0 sys_reg(3, 3, 14, 0, 0)
+#define SYS_CNTPCT_EL0 sys_reg(3, 3, 14, 0, 1)
+#define SYS_CNTVCT_EL0 sys_reg(3, 3, 14, 0, 2)
#define SYS_CNTPCTSS_EL0 sys_reg(3, 3, 14, 0, 5)
#define SYS_CNTVCTSS_EL0 sys_reg(3, 3, 14, 0, 6)
@@ -517,39 +481,80 @@
#define SYS_CNTP_CTL_EL0 sys_reg(3, 3, 14, 2, 1)
#define SYS_CNTP_CVAL_EL0 sys_reg(3, 3, 14, 2, 2)
+#define SYS_CNTV_TVAL_EL0 sys_reg(3, 3, 14, 3, 0)
#define SYS_CNTV_CTL_EL0 sys_reg(3, 3, 14, 3, 1)
#define SYS_CNTV_CVAL_EL0 sys_reg(3, 3, 14, 3, 2)
#define SYS_AARCH32_CNTP_TVAL sys_reg(0, 0, 14, 2, 0)
#define SYS_AARCH32_CNTP_CTL sys_reg(0, 0, 14, 2, 1)
+#define SYS_AARCH32_CNTPCT sys_reg(0, 0, 0, 14, 0)
+#define SYS_AARCH32_CNTVCT sys_reg(0, 1, 0, 14, 0)
#define SYS_AARCH32_CNTP_CVAL sys_reg(0, 2, 0, 14, 0)
+#define SYS_AARCH32_CNTPCTSS sys_reg(0, 8, 0, 14, 0)
+#define SYS_AARCH32_CNTVCTSS sys_reg(0, 9, 0, 14, 0)
#define __PMEV_op2(n) ((n) & 0x7)
#define __CNTR_CRm(n) (0x8 | (((n) >> 3) & 0x3))
+#define SYS_PMEVCNTSVRn_EL1(n) sys_reg(2, 0, 14, __CNTR_CRm(n), __PMEV_op2(n))
#define SYS_PMEVCNTRn_EL0(n) sys_reg(3, 3, 14, __CNTR_CRm(n), __PMEV_op2(n))
#define __TYPER_CRm(n) (0xc | (((n) >> 3) & 0x3))
#define SYS_PMEVTYPERn_EL0(n) sys_reg(3, 3, 14, __TYPER_CRm(n), __PMEV_op2(n))
#define SYS_PMCCFILTR_EL0 sys_reg(3, 3, 14, 15, 7)
+#define SYS_SPMCGCRn_EL1(n) sys_reg(2, 0, 9, 13, ((n) & 1))
+
+#define __SPMEV_op2(n) ((n) & 0x7)
+#define __SPMEV_crm(p, n) ((((p) & 7) << 1) | (((n) >> 3) & 1))
+#define SYS_SPMEVCNTRn_EL0(n) sys_reg(2, 3, 14, __SPMEV_crm(0b000, n), __SPMEV_op2(n))
+#define SYS_SPMEVFILT2Rn_EL0(n) sys_reg(2, 3, 14, __SPMEV_crm(0b011, n), __SPMEV_op2(n))
+#define SYS_SPMEVFILTRn_EL0(n) sys_reg(2, 3, 14, __SPMEV_crm(0b010, n), __SPMEV_op2(n))
+#define SYS_SPMEVTYPERn_EL0(n) sys_reg(2, 3, 14, __SPMEV_crm(0b001, n), __SPMEV_op2(n))
+
+#define SYS_VPIDR_EL2 sys_reg(3, 4, 0, 0, 0)
+#define SYS_VMPIDR_EL2 sys_reg(3, 4, 0, 0, 5)
+
#define SYS_SCTLR_EL2 sys_reg(3, 4, 1, 0, 0)
-#define SYS_HFGRTR_EL2 sys_reg(3, 4, 1, 1, 4)
-#define SYS_HFGWTR_EL2 sys_reg(3, 4, 1, 1, 5)
-#define SYS_HFGITR_EL2 sys_reg(3, 4, 1, 1, 6)
-#define SYS_TRFCR_EL2 sys_reg(3, 4, 1, 2, 1)
-#define SYS_HCRX_EL2 sys_reg(3, 4, 1, 2, 2)
-#define SYS_HDFGRTR_EL2 sys_reg(3, 4, 3, 1, 4)
-#define SYS_HDFGWTR_EL2 sys_reg(3, 4, 3, 1, 5)
+#define SYS_ACTLR_EL2 sys_reg(3, 4, 1, 0, 1)
+#define SYS_SCTLR2_EL2 sys_reg(3, 4, 1, 0, 3)
+#define SYS_HCR_EL2 sys_reg(3, 4, 1, 1, 0)
+#define SYS_MDCR_EL2 sys_reg(3, 4, 1, 1, 1)
+#define SYS_CPTR_EL2 sys_reg(3, 4, 1, 1, 2)
+#define SYS_HSTR_EL2 sys_reg(3, 4, 1, 1, 3)
+#define SYS_HACR_EL2 sys_reg(3, 4, 1, 1, 7)
+
+#define SYS_TTBR0_EL2 sys_reg(3, 4, 2, 0, 0)
+#define SYS_TTBR1_EL2 sys_reg(3, 4, 2, 0, 1)
+#define SYS_TCR_EL2 sys_reg(3, 4, 2, 0, 2)
+#define SYS_VTTBR_EL2 sys_reg(3, 4, 2, 1, 0)
+#define SYS_VTCR_EL2 sys_reg(3, 4, 2, 1, 2)
+
#define SYS_HAFGRTR_EL2 sys_reg(3, 4, 3, 1, 6)
#define SYS_SPSR_EL2 sys_reg(3, 4, 4, 0, 0)
#define SYS_ELR_EL2 sys_reg(3, 4, 4, 0, 1)
+#define SYS_SP_EL1 sys_reg(3, 4, 4, 1, 0)
+#define SYS_SPSR_irq sys_reg(3, 4, 4, 3, 0)
+#define SYS_SPSR_abt sys_reg(3, 4, 4, 3, 1)
+#define SYS_SPSR_und sys_reg(3, 4, 4, 3, 2)
+#define SYS_SPSR_fiq sys_reg(3, 4, 4, 3, 3)
#define SYS_IFSR32_EL2 sys_reg(3, 4, 5, 0, 1)
+#define SYS_AFSR0_EL2 sys_reg(3, 4, 5, 1, 0)
+#define SYS_AFSR1_EL2 sys_reg(3, 4, 5, 1, 1)
#define SYS_ESR_EL2 sys_reg(3, 4, 5, 2, 0)
#define SYS_VSESR_EL2 sys_reg(3, 4, 5, 2, 3)
#define SYS_FPEXC32_EL2 sys_reg(3, 4, 5, 3, 0)
#define SYS_TFSR_EL2 sys_reg(3, 4, 5, 6, 0)
-#define SYS_VDISR_EL2 sys_reg(3, 4, 12, 1, 1)
+#define SYS_FAR_EL2 sys_reg(3, 4, 6, 0, 0)
+#define SYS_HPFAR_EL2 sys_reg(3, 4, 6, 0, 4)
+
+#define SYS_MAIR_EL2 sys_reg(3, 4, 10, 2, 0)
+#define SYS_AMAIR_EL2 sys_reg(3, 4, 10, 3, 0)
+
+#define SYS_VBAR_EL2 sys_reg(3, 4, 12, 0, 0)
+#define SYS_RVBAR_EL2 sys_reg(3, 4, 12, 0, 1)
+#define SYS_RMR_EL2 sys_reg(3, 4, 12, 0, 2)
+#define SYS_VDISR_EL2 sys_reg(3, 4, 12, 1, 1)
#define __SYS__AP0Rx_EL2(x) sys_reg(3, 4, 12, 8, x)
#define SYS_ICH_AP0R0_EL2 __SYS__AP0Rx_EL2(0)
#define SYS_ICH_AP0R1_EL2 __SYS__AP0Rx_EL2(1)
@@ -564,9 +569,6 @@
#define SYS_ICH_VSEIR_EL2 sys_reg(3, 4, 12, 9, 4)
#define SYS_ICC_SRE_EL2 sys_reg(3, 4, 12, 9, 5)
-#define SYS_ICH_HCR_EL2 sys_reg(3, 4, 12, 11, 0)
-#define SYS_ICH_VTR_EL2 sys_reg(3, 4, 12, 11, 1)
-#define SYS_ICH_MISR_EL2 sys_reg(3, 4, 12, 11, 2)
#define SYS_ICH_EISR_EL2 sys_reg(3, 4, 12, 11, 3)
#define SYS_ICH_ELRSR_EL2 sys_reg(3, 4, 12, 11, 5)
#define SYS_ICH_VMCR_EL2 sys_reg(3, 4, 12, 11, 7)
@@ -591,20 +593,41 @@
#define SYS_ICH_LR14_EL2 __SYS__LR8_EL2(6)
#define SYS_ICH_LR15_EL2 __SYS__LR8_EL2(7)
+#define SYS_CONTEXTIDR_EL2 sys_reg(3, 4, 13, 0, 1)
+#define SYS_TPIDR_EL2 sys_reg(3, 4, 13, 0, 2)
+#define SYS_SCXTNUM_EL2 sys_reg(3, 4, 13, 0, 7)
+
+#define __AMEV_op2(m) (m & 0x7)
+#define __AMEV_CRm(n, m) (n | ((m & 0x8) >> 3))
+#define __SYS__AMEVCNTVOFF0n_EL2(m) sys_reg(3, 4, 13, __AMEV_CRm(0x8, m), __AMEV_op2(m))
+#define SYS_AMEVCNTVOFF0n_EL2(m) __SYS__AMEVCNTVOFF0n_EL2(m)
+#define __SYS__AMEVCNTVOFF1n_EL2(m) sys_reg(3, 4, 13, __AMEV_CRm(0xA, m), __AMEV_op2(m))
+#define SYS_AMEVCNTVOFF1n_EL2(m) __SYS__AMEVCNTVOFF1n_EL2(m)
+
+#define SYS_CNTVOFF_EL2 sys_reg(3, 4, 14, 0, 3)
+#define SYS_CNTHCTL_EL2 sys_reg(3, 4, 14, 1, 0)
+#define SYS_CNTHP_TVAL_EL2 sys_reg(3, 4, 14, 2, 0)
+#define SYS_CNTHP_CTL_EL2 sys_reg(3, 4, 14, 2, 1)
+#define SYS_CNTHP_CVAL_EL2 sys_reg(3, 4, 14, 2, 2)
+#define SYS_CNTHV_TVAL_EL2 sys_reg(3, 4, 14, 3, 0)
+#define SYS_CNTHV_CTL_EL2 sys_reg(3, 4, 14, 3, 1)
+#define SYS_CNTHV_CVAL_EL2 sys_reg(3, 4, 14, 3, 2)
+
/* VHE encodings for architectural EL0/1 system registers */
-#define SYS_SCTLR_EL12 sys_reg(3, 5, 1, 0, 0)
+#define SYS_BRBCR_EL12 sys_reg(2, 5, 9, 0, 0)
#define SYS_TTBR0_EL12 sys_reg(3, 5, 2, 0, 0)
#define SYS_TTBR1_EL12 sys_reg(3, 5, 2, 0, 1)
-#define SYS_TCR_EL12 sys_reg(3, 5, 2, 0, 2)
#define SYS_SPSR_EL12 sys_reg(3, 5, 4, 0, 0)
#define SYS_ELR_EL12 sys_reg(3, 5, 4, 0, 1)
#define SYS_AFSR0_EL12 sys_reg(3, 5, 5, 1, 0)
#define SYS_AFSR1_EL12 sys_reg(3, 5, 5, 1, 1)
#define SYS_ESR_EL12 sys_reg(3, 5, 5, 2, 0)
#define SYS_TFSR_EL12 sys_reg(3, 5, 5, 6, 0)
+#define SYS_PMSCR_EL12 sys_reg(3, 5, 9, 9, 0)
#define SYS_MAIR_EL12 sys_reg(3, 5, 10, 2, 0)
#define SYS_AMAIR_EL12 sys_reg(3, 5, 10, 3, 0)
#define SYS_VBAR_EL12 sys_reg(3, 5, 12, 0, 0)
+#define SYS_SCXTNUM_EL12 sys_reg(3, 5, 13, 0, 7)
#define SYS_CNTKCTL_EL12 sys_reg(3, 5, 14, 1, 0)
#define SYS_CNTP_TVAL_EL02 sys_reg(3, 5, 14, 2, 0)
#define SYS_CNTP_CTL_EL02 sys_reg(3, 5, 14, 2, 1)
@@ -613,11 +636,197 @@
#define SYS_CNTV_CTL_EL02 sys_reg(3, 5, 14, 3, 1)
#define SYS_CNTV_CVAL_EL02 sys_reg(3, 5, 14, 3, 2)
+#define SYS_SP_EL2 sys_reg(3, 6, 4, 1, 0)
+
+/* AT instructions */
+#define AT_Op0 1
+#define AT_CRn 7
+
+#define OP_AT_S1E1R sys_insn(AT_Op0, 0, AT_CRn, 8, 0)
+#define OP_AT_S1E1W sys_insn(AT_Op0, 0, AT_CRn, 8, 1)
+#define OP_AT_S1E0R sys_insn(AT_Op0, 0, AT_CRn, 8, 2)
+#define OP_AT_S1E0W sys_insn(AT_Op0, 0, AT_CRn, 8, 3)
+#define OP_AT_S1E1RP sys_insn(AT_Op0, 0, AT_CRn, 9, 0)
+#define OP_AT_S1E1WP sys_insn(AT_Op0, 0, AT_CRn, 9, 1)
+#define OP_AT_S1E1A sys_insn(AT_Op0, 0, AT_CRn, 9, 2)
+#define OP_AT_S1E2R sys_insn(AT_Op0, 4, AT_CRn, 8, 0)
+#define OP_AT_S1E2W sys_insn(AT_Op0, 4, AT_CRn, 8, 1)
+#define OP_AT_S12E1R sys_insn(AT_Op0, 4, AT_CRn, 8, 4)
+#define OP_AT_S12E1W sys_insn(AT_Op0, 4, AT_CRn, 8, 5)
+#define OP_AT_S12E0R sys_insn(AT_Op0, 4, AT_CRn, 8, 6)
+#define OP_AT_S12E0W sys_insn(AT_Op0, 4, AT_CRn, 8, 7)
+#define OP_AT_S1E2A sys_insn(AT_Op0, 4, AT_CRn, 9, 2)
+
+/* TLBI instructions */
+#define TLBI_Op0 1
+
+#define TLBI_Op1_EL1 0 /* Accessible from EL1 or higher */
+#define TLBI_Op1_EL2 4 /* Accessible from EL2 or higher */
+
+#define TLBI_CRn_XS 8 /* Extra Slow (the common one) */
+#define TLBI_CRn_nXS 9 /* not Extra Slow (which nobody uses)*/
+
+#define TLBI_CRm_IPAIS 0 /* S2 Inner-Shareable */
+#define TLBI_CRm_nROS 1 /* non-Range, Outer-Sharable */
+#define TLBI_CRm_RIS 2 /* Range, Inner-Sharable */
+#define TLBI_CRm_nRIS 3 /* non-Range, Inner-Sharable */
+#define TLBI_CRm_IPAONS 4 /* S2 Outer and Non-Shareable */
+#define TLBI_CRm_ROS 5 /* Range, Outer-Sharable */
+#define TLBI_CRm_RNS 6 /* Range, Non-Sharable */
+#define TLBI_CRm_nRNS 7 /* non-Range, Non-Sharable */
+
+#define OP_TLBI_VMALLE1OS sys_insn(1, 0, 8, 1, 0)
+#define OP_TLBI_VAE1OS sys_insn(1, 0, 8, 1, 1)
+#define OP_TLBI_ASIDE1OS sys_insn(1, 0, 8, 1, 2)
+#define OP_TLBI_VAAE1OS sys_insn(1, 0, 8, 1, 3)
+#define OP_TLBI_VALE1OS sys_insn(1, 0, 8, 1, 5)
+#define OP_TLBI_VAALE1OS sys_insn(1, 0, 8, 1, 7)
+#define OP_TLBI_RVAE1IS sys_insn(1, 0, 8, 2, 1)
+#define OP_TLBI_RVAAE1IS sys_insn(1, 0, 8, 2, 3)
+#define OP_TLBI_RVALE1IS sys_insn(1, 0, 8, 2, 5)
+#define OP_TLBI_RVAALE1IS sys_insn(1, 0, 8, 2, 7)
+#define OP_TLBI_VMALLE1IS sys_insn(1, 0, 8, 3, 0)
+#define OP_TLBI_VAE1IS sys_insn(1, 0, 8, 3, 1)
+#define OP_TLBI_ASIDE1IS sys_insn(1, 0, 8, 3, 2)
+#define OP_TLBI_VAAE1IS sys_insn(1, 0, 8, 3, 3)
+#define OP_TLBI_VALE1IS sys_insn(1, 0, 8, 3, 5)
+#define OP_TLBI_VAALE1IS sys_insn(1, 0, 8, 3, 7)
+#define OP_TLBI_RVAE1OS sys_insn(1, 0, 8, 5, 1)
+#define OP_TLBI_RVAAE1OS sys_insn(1, 0, 8, 5, 3)
+#define OP_TLBI_RVALE1OS sys_insn(1, 0, 8, 5, 5)
+#define OP_TLBI_RVAALE1OS sys_insn(1, 0, 8, 5, 7)
+#define OP_TLBI_RVAE1 sys_insn(1, 0, 8, 6, 1)
+#define OP_TLBI_RVAAE1 sys_insn(1, 0, 8, 6, 3)
+#define OP_TLBI_RVALE1 sys_insn(1, 0, 8, 6, 5)
+#define OP_TLBI_RVAALE1 sys_insn(1, 0, 8, 6, 7)
+#define OP_TLBI_VMALLE1 sys_insn(1, 0, 8, 7, 0)
+#define OP_TLBI_VAE1 sys_insn(1, 0, 8, 7, 1)
+#define OP_TLBI_ASIDE1 sys_insn(1, 0, 8, 7, 2)
+#define OP_TLBI_VAAE1 sys_insn(1, 0, 8, 7, 3)
+#define OP_TLBI_VALE1 sys_insn(1, 0, 8, 7, 5)
+#define OP_TLBI_VAALE1 sys_insn(1, 0, 8, 7, 7)
+#define OP_TLBI_VMALLE1OSNXS sys_insn(1, 0, 9, 1, 0)
+#define OP_TLBI_VAE1OSNXS sys_insn(1, 0, 9, 1, 1)
+#define OP_TLBI_ASIDE1OSNXS sys_insn(1, 0, 9, 1, 2)
+#define OP_TLBI_VAAE1OSNXS sys_insn(1, 0, 9, 1, 3)
+#define OP_TLBI_VALE1OSNXS sys_insn(1, 0, 9, 1, 5)
+#define OP_TLBI_VAALE1OSNXS sys_insn(1, 0, 9, 1, 7)
+#define OP_TLBI_RVAE1ISNXS sys_insn(1, 0, 9, 2, 1)
+#define OP_TLBI_RVAAE1ISNXS sys_insn(1, 0, 9, 2, 3)
+#define OP_TLBI_RVALE1ISNXS sys_insn(1, 0, 9, 2, 5)
+#define OP_TLBI_RVAALE1ISNXS sys_insn(1, 0, 9, 2, 7)
+#define OP_TLBI_VMALLE1ISNXS sys_insn(1, 0, 9, 3, 0)
+#define OP_TLBI_VAE1ISNXS sys_insn(1, 0, 9, 3, 1)
+#define OP_TLBI_ASIDE1ISNXS sys_insn(1, 0, 9, 3, 2)
+#define OP_TLBI_VAAE1ISNXS sys_insn(1, 0, 9, 3, 3)
+#define OP_TLBI_VALE1ISNXS sys_insn(1, 0, 9, 3, 5)
+#define OP_TLBI_VAALE1ISNXS sys_insn(1, 0, 9, 3, 7)
+#define OP_TLBI_RVAE1OSNXS sys_insn(1, 0, 9, 5, 1)
+#define OP_TLBI_RVAAE1OSNXS sys_insn(1, 0, 9, 5, 3)
+#define OP_TLBI_RVALE1OSNXS sys_insn(1, 0, 9, 5, 5)
+#define OP_TLBI_RVAALE1OSNXS sys_insn(1, 0, 9, 5, 7)
+#define OP_TLBI_RVAE1NXS sys_insn(1, 0, 9, 6, 1)
+#define OP_TLBI_RVAAE1NXS sys_insn(1, 0, 9, 6, 3)
+#define OP_TLBI_RVALE1NXS sys_insn(1, 0, 9, 6, 5)
+#define OP_TLBI_RVAALE1NXS sys_insn(1, 0, 9, 6, 7)
+#define OP_TLBI_VMALLE1NXS sys_insn(1, 0, 9, 7, 0)
+#define OP_TLBI_VAE1NXS sys_insn(1, 0, 9, 7, 1)
+#define OP_TLBI_ASIDE1NXS sys_insn(1, 0, 9, 7, 2)
+#define OP_TLBI_VAAE1NXS sys_insn(1, 0, 9, 7, 3)
+#define OP_TLBI_VALE1NXS sys_insn(1, 0, 9, 7, 5)
+#define OP_TLBI_VAALE1NXS sys_insn(1, 0, 9, 7, 7)
+#define OP_TLBI_IPAS2E1IS sys_insn(1, 4, 8, 0, 1)
+#define OP_TLBI_RIPAS2E1IS sys_insn(1, 4, 8, 0, 2)
+#define OP_TLBI_IPAS2LE1IS sys_insn(1, 4, 8, 0, 5)
+#define OP_TLBI_RIPAS2LE1IS sys_insn(1, 4, 8, 0, 6)
+#define OP_TLBI_ALLE2OS sys_insn(1, 4, 8, 1, 0)
+#define OP_TLBI_VAE2OS sys_insn(1, 4, 8, 1, 1)
+#define OP_TLBI_ALLE1OS sys_insn(1, 4, 8, 1, 4)
+#define OP_TLBI_VALE2OS sys_insn(1, 4, 8, 1, 5)
+#define OP_TLBI_VMALLS12E1OS sys_insn(1, 4, 8, 1, 6)
+#define OP_TLBI_RVAE2IS sys_insn(1, 4, 8, 2, 1)
+#define OP_TLBI_RVALE2IS sys_insn(1, 4, 8, 2, 5)
+#define OP_TLBI_ALLE2IS sys_insn(1, 4, 8, 3, 0)
+#define OP_TLBI_VAE2IS sys_insn(1, 4, 8, 3, 1)
+#define OP_TLBI_ALLE1IS sys_insn(1, 4, 8, 3, 4)
+#define OP_TLBI_VALE2IS sys_insn(1, 4, 8, 3, 5)
+#define OP_TLBI_VMALLS12E1IS sys_insn(1, 4, 8, 3, 6)
+#define OP_TLBI_IPAS2E1OS sys_insn(1, 4, 8, 4, 0)
+#define OP_TLBI_IPAS2E1 sys_insn(1, 4, 8, 4, 1)
+#define OP_TLBI_RIPAS2E1 sys_insn(1, 4, 8, 4, 2)
+#define OP_TLBI_RIPAS2E1OS sys_insn(1, 4, 8, 4, 3)
+#define OP_TLBI_IPAS2LE1OS sys_insn(1, 4, 8, 4, 4)
+#define OP_TLBI_IPAS2LE1 sys_insn(1, 4, 8, 4, 5)
+#define OP_TLBI_RIPAS2LE1 sys_insn(1, 4, 8, 4, 6)
+#define OP_TLBI_RIPAS2LE1OS sys_insn(1, 4, 8, 4, 7)
+#define OP_TLBI_RVAE2OS sys_insn(1, 4, 8, 5, 1)
+#define OP_TLBI_RVALE2OS sys_insn(1, 4, 8, 5, 5)
+#define OP_TLBI_RVAE2 sys_insn(1, 4, 8, 6, 1)
+#define OP_TLBI_RVALE2 sys_insn(1, 4, 8, 6, 5)
+#define OP_TLBI_ALLE2 sys_insn(1, 4, 8, 7, 0)
+#define OP_TLBI_VAE2 sys_insn(1, 4, 8, 7, 1)
+#define OP_TLBI_ALLE1 sys_insn(1, 4, 8, 7, 4)
+#define OP_TLBI_VALE2 sys_insn(1, 4, 8, 7, 5)
+#define OP_TLBI_VMALLS12E1 sys_insn(1, 4, 8, 7, 6)
+#define OP_TLBI_IPAS2E1ISNXS sys_insn(1, 4, 9, 0, 1)
+#define OP_TLBI_RIPAS2E1ISNXS sys_insn(1, 4, 9, 0, 2)
+#define OP_TLBI_IPAS2LE1ISNXS sys_insn(1, 4, 9, 0, 5)
+#define OP_TLBI_RIPAS2LE1ISNXS sys_insn(1, 4, 9, 0, 6)
+#define OP_TLBI_ALLE2OSNXS sys_insn(1, 4, 9, 1, 0)
+#define OP_TLBI_VAE2OSNXS sys_insn(1, 4, 9, 1, 1)
+#define OP_TLBI_ALLE1OSNXS sys_insn(1, 4, 9, 1, 4)
+#define OP_TLBI_VALE2OSNXS sys_insn(1, 4, 9, 1, 5)
+#define OP_TLBI_VMALLS12E1OSNXS sys_insn(1, 4, 9, 1, 6)
+#define OP_TLBI_RVAE2ISNXS sys_insn(1, 4, 9, 2, 1)
+#define OP_TLBI_RVALE2ISNXS sys_insn(1, 4, 9, 2, 5)
+#define OP_TLBI_ALLE2ISNXS sys_insn(1, 4, 9, 3, 0)
+#define OP_TLBI_VAE2ISNXS sys_insn(1, 4, 9, 3, 1)
+#define OP_TLBI_ALLE1ISNXS sys_insn(1, 4, 9, 3, 4)
+#define OP_TLBI_VALE2ISNXS sys_insn(1, 4, 9, 3, 5)
+#define OP_TLBI_VMALLS12E1ISNXS sys_insn(1, 4, 9, 3, 6)
+#define OP_TLBI_IPAS2E1OSNXS sys_insn(1, 4, 9, 4, 0)
+#define OP_TLBI_IPAS2E1NXS sys_insn(1, 4, 9, 4, 1)
+#define OP_TLBI_RIPAS2E1NXS sys_insn(1, 4, 9, 4, 2)
+#define OP_TLBI_RIPAS2E1OSNXS sys_insn(1, 4, 9, 4, 3)
+#define OP_TLBI_IPAS2LE1OSNXS sys_insn(1, 4, 9, 4, 4)
+#define OP_TLBI_IPAS2LE1NXS sys_insn(1, 4, 9, 4, 5)
+#define OP_TLBI_RIPAS2LE1NXS sys_insn(1, 4, 9, 4, 6)
+#define OP_TLBI_RIPAS2LE1OSNXS sys_insn(1, 4, 9, 4, 7)
+#define OP_TLBI_RVAE2OSNXS sys_insn(1, 4, 9, 5, 1)
+#define OP_TLBI_RVALE2OSNXS sys_insn(1, 4, 9, 5, 5)
+#define OP_TLBI_RVAE2NXS sys_insn(1, 4, 9, 6, 1)
+#define OP_TLBI_RVALE2NXS sys_insn(1, 4, 9, 6, 5)
+#define OP_TLBI_ALLE2NXS sys_insn(1, 4, 9, 7, 0)
+#define OP_TLBI_VAE2NXS sys_insn(1, 4, 9, 7, 1)
+#define OP_TLBI_ALLE1NXS sys_insn(1, 4, 9, 7, 4)
+#define OP_TLBI_VALE2NXS sys_insn(1, 4, 9, 7, 5)
+#define OP_TLBI_VMALLS12E1NXS sys_insn(1, 4, 9, 7, 6)
+
+/* Misc instructions */
+#define OP_GCSPUSHX sys_insn(1, 0, 7, 7, 4)
+#define OP_GCSPOPCX sys_insn(1, 0, 7, 7, 5)
+#define OP_GCSPOPX sys_insn(1, 0, 7, 7, 6)
+#define OP_GCSPUSHM sys_insn(1, 3, 7, 7, 0)
+
+#define OP_BRB_IALL sys_insn(1, 1, 7, 2, 4)
+#define OP_BRB_INJ sys_insn(1, 1, 7, 2, 5)
+#define OP_CFP_RCTX sys_insn(1, 3, 7, 3, 4)
+#define OP_DVP_RCTX sys_insn(1, 3, 7, 3, 5)
+#define OP_COSP_RCTX sys_insn(1, 3, 7, 3, 6)
+#define OP_CPP_RCTX sys_insn(1, 3, 7, 3, 7)
+
+/*
+ * BRBE Instructions
+ */
+#define BRB_IALL_INSN __emit_inst(0xd5000000 | OP_BRB_IALL | (0x1f))
+#define BRB_INJ_INSN __emit_inst(0xd5000000 | OP_BRB_INJ | (0x1f))
+
/* Common SCTLR_ELx flags. */
#define SCTLR_ELx_ENTP2 (BIT(60))
#define SCTLR_ELx_DSSBS (BIT(44))
#define SCTLR_ELx_ATA (BIT(43))
+#define SCTLR_ELx_EE_SHIFT 25
#define SCTLR_ELx_ENIA_SHIFT 31
#define SCTLR_ELx_ITFSB (BIT(37))
@@ -626,7 +835,7 @@
#define SCTLR_ELx_LSMAOE (BIT(29))
#define SCTLR_ELx_nTLSMD (BIT(28))
#define SCTLR_ELx_ENDA (BIT(27))
-#define SCTLR_ELx_EE (BIT(25))
+#define SCTLR_ELx_EE (BIT(SCTLR_ELx_EE_SHIFT))
#define SCTLR_ELx_EIS (BIT(22))
#define SCTLR_ELx_IESB (BIT(21))
#define SCTLR_ELx_TSCXT (BIT(20))
@@ -644,6 +853,7 @@
(BIT(18)) | (BIT(22)) | (BIT(23)) | (BIT(28)) | \
(BIT(29)))
+#define SCTLR_EL2_BT (BIT(36))
#ifdef CONFIG_CPU_BIG_ENDIAN
#define ENDIAN_SET_EL2 SCTLR_ELx_EE
#else
@@ -689,291 +899,49 @@
/* Position the attr at the correct index */
#define MAIR_ATTRIDX(attr, idx) ((attr) << ((idx) * 8))
-/* id_aa64pfr0 */
-#define ID_AA64PFR0_CSV3_SHIFT 60
-#define ID_AA64PFR0_CSV2_SHIFT 56
-#define ID_AA64PFR0_DIT_SHIFT 48
-#define ID_AA64PFR0_AMU_SHIFT 44
-#define ID_AA64PFR0_MPAM_SHIFT 40
-#define ID_AA64PFR0_SEL2_SHIFT 36
-#define ID_AA64PFR0_SVE_SHIFT 32
-#define ID_AA64PFR0_RAS_SHIFT 28
-#define ID_AA64PFR0_GIC_SHIFT 24
-#define ID_AA64PFR0_ASIMD_SHIFT 20
-#define ID_AA64PFR0_FP_SHIFT 16
-#define ID_AA64PFR0_EL3_SHIFT 12
-#define ID_AA64PFR0_EL2_SHIFT 8
-#define ID_AA64PFR0_EL1_SHIFT 4
-#define ID_AA64PFR0_EL0_SHIFT 0
-
-#define ID_AA64PFR0_AMU 0x1
-#define ID_AA64PFR0_SVE 0x1
-#define ID_AA64PFR0_RAS_V1 0x1
-#define ID_AA64PFR0_RAS_V1P1 0x2
-#define ID_AA64PFR0_FP_NI 0xf
-#define ID_AA64PFR0_FP_SUPPORTED 0x0
-#define ID_AA64PFR0_ASIMD_NI 0xf
-#define ID_AA64PFR0_ASIMD_SUPPORTED 0x0
-#define ID_AA64PFR0_ELx_64BIT_ONLY 0x1
-#define ID_AA64PFR0_ELx_32BIT_64BIT 0x2
-
-/* id_aa64pfr1 */
-#define ID_AA64PFR1_SME_SHIFT 24
-#define ID_AA64PFR1_MPAMFRAC_SHIFT 16
-#define ID_AA64PFR1_RASFRAC_SHIFT 12
-#define ID_AA64PFR1_MTE_SHIFT 8
-#define ID_AA64PFR1_SSBS_SHIFT 4
-#define ID_AA64PFR1_BT_SHIFT 0
-
-#define ID_AA64PFR1_SSBS_PSTATE_NI 0
-#define ID_AA64PFR1_SSBS_PSTATE_ONLY 1
-#define ID_AA64PFR1_SSBS_PSTATE_INSNS 2
-#define ID_AA64PFR1_BT_BTI 0x1
-#define ID_AA64PFR1_SME 1
-
-#define ID_AA64PFR1_MTE_NI 0x0
-#define ID_AA64PFR1_MTE_EL0 0x1
-#define ID_AA64PFR1_MTE 0x2
-#define ID_AA64PFR1_MTE_ASYMM 0x3
-
/* id_aa64mmfr0 */
-#define ID_AA64MMFR0_ECV_SHIFT 60
-#define ID_AA64MMFR0_FGT_SHIFT 56
-#define ID_AA64MMFR0_EXS_SHIFT 44
-#define ID_AA64MMFR0_TGRAN4_2_SHIFT 40
-#define ID_AA64MMFR0_TGRAN64_2_SHIFT 36
-#define ID_AA64MMFR0_TGRAN16_2_SHIFT 32
-#define ID_AA64MMFR0_TGRAN4_SHIFT 28
-#define ID_AA64MMFR0_TGRAN64_SHIFT 24
-#define ID_AA64MMFR0_TGRAN16_SHIFT 20
-#define ID_AA64MMFR0_BIGENDEL0_SHIFT 16
-#define ID_AA64MMFR0_SNSMEM_SHIFT 12
-#define ID_AA64MMFR0_BIGENDEL_SHIFT 8
-#define ID_AA64MMFR0_ASID_SHIFT 4
-#define ID_AA64MMFR0_PARANGE_SHIFT 0
-
-#define ID_AA64MMFR0_ASID_8 0x0
-#define ID_AA64MMFR0_ASID_16 0x2
-
-#define ID_AA64MMFR0_TGRAN4_NI 0xf
-#define ID_AA64MMFR0_TGRAN4_SUPPORTED_MIN 0x0
-#define ID_AA64MMFR0_TGRAN4_SUPPORTED_MAX 0x7
-#define ID_AA64MMFR0_TGRAN64_NI 0xf
-#define ID_AA64MMFR0_TGRAN64_SUPPORTED_MIN 0x0
-#define ID_AA64MMFR0_TGRAN64_SUPPORTED_MAX 0x7
-#define ID_AA64MMFR0_TGRAN16_NI 0x0
-#define ID_AA64MMFR0_TGRAN16_SUPPORTED_MIN 0x1
-#define ID_AA64MMFR0_TGRAN16_SUPPORTED_MAX 0xf
-
-#define ID_AA64MMFR0_PARANGE_32 0x0
-#define ID_AA64MMFR0_PARANGE_36 0x1
-#define ID_AA64MMFR0_PARANGE_40 0x2
-#define ID_AA64MMFR0_PARANGE_42 0x3
-#define ID_AA64MMFR0_PARANGE_44 0x4
-#define ID_AA64MMFR0_PARANGE_48 0x5
-#define ID_AA64MMFR0_PARANGE_52 0x6
+#define ID_AA64MMFR0_EL1_TGRAN4_SUPPORTED_MIN 0x0
+#define ID_AA64MMFR0_EL1_TGRAN4_LPA2 ID_AA64MMFR0_EL1_TGRAN4_52_BIT
+#define ID_AA64MMFR0_EL1_TGRAN4_SUPPORTED_MAX 0x7
+#define ID_AA64MMFR0_EL1_TGRAN64_SUPPORTED_MIN 0x0
+#define ID_AA64MMFR0_EL1_TGRAN64_SUPPORTED_MAX 0x7
+#define ID_AA64MMFR0_EL1_TGRAN16_SUPPORTED_MIN 0x1
+#define ID_AA64MMFR0_EL1_TGRAN16_LPA2 ID_AA64MMFR0_EL1_TGRAN16_52_BIT
+#define ID_AA64MMFR0_EL1_TGRAN16_SUPPORTED_MAX 0xf
#define ARM64_MIN_PARANGE_BITS 32
-#define ID_AA64MMFR0_TGRAN_2_SUPPORTED_DEFAULT 0x0
-#define ID_AA64MMFR0_TGRAN_2_SUPPORTED_NONE 0x1
-#define ID_AA64MMFR0_TGRAN_2_SUPPORTED_MIN 0x2
-#define ID_AA64MMFR0_TGRAN_2_SUPPORTED_MAX 0x7
+#define ID_AA64MMFR0_EL1_TGRAN_2_SUPPORTED_DEFAULT 0x0
+#define ID_AA64MMFR0_EL1_TGRAN_2_SUPPORTED_NONE 0x1
+#define ID_AA64MMFR0_EL1_TGRAN_2_SUPPORTED_MIN 0x2
+#define ID_AA64MMFR0_EL1_TGRAN_2_SUPPORTED_LPA2 0x3
+#define ID_AA64MMFR0_EL1_TGRAN_2_SUPPORTED_MAX 0x7
#ifdef CONFIG_ARM64_PA_BITS_52
-#define ID_AA64MMFR0_PARANGE_MAX ID_AA64MMFR0_PARANGE_52
+#define ID_AA64MMFR0_EL1_PARANGE_MAX ID_AA64MMFR0_EL1_PARANGE_52
#else
-#define ID_AA64MMFR0_PARANGE_MAX ID_AA64MMFR0_PARANGE_48
+#define ID_AA64MMFR0_EL1_PARANGE_MAX ID_AA64MMFR0_EL1_PARANGE_48
#endif
-/* id_aa64mmfr1 */
-#define ID_AA64MMFR1_ECBHB_SHIFT 60
-#define ID_AA64MMFR1_TIDCP1_SHIFT 52
-#define ID_AA64MMFR1_HCX_SHIFT 40
-#define ID_AA64MMFR1_AFP_SHIFT 44
-#define ID_AA64MMFR1_ETS_SHIFT 36
-#define ID_AA64MMFR1_TWED_SHIFT 32
-#define ID_AA64MMFR1_XNX_SHIFT 28
-#define ID_AA64MMFR1_SPECSEI_SHIFT 24
-#define ID_AA64MMFR1_PAN_SHIFT 20
-#define ID_AA64MMFR1_LOR_SHIFT 16
-#define ID_AA64MMFR1_HPD_SHIFT 12
-#define ID_AA64MMFR1_VHE_SHIFT 8
-#define ID_AA64MMFR1_VMIDBITS_SHIFT 4
-#define ID_AA64MMFR1_HADBS_SHIFT 0
-
-#define ID_AA64MMFR1_VMIDBITS_8 0
-#define ID_AA64MMFR1_VMIDBITS_16 2
-
-#define ID_AA64MMFR1_TIDCP1_NI 0
-#define ID_AA64MMFR1_TIDCP1_IMP 1
-
-/* id_aa64mmfr2 */
-#define ID_AA64MMFR2_E0PD_SHIFT 60
-#define ID_AA64MMFR2_EVT_SHIFT 56
-#define ID_AA64MMFR2_BBM_SHIFT 52
-#define ID_AA64MMFR2_TTL_SHIFT 48
-#define ID_AA64MMFR2_FWB_SHIFT 40
-#define ID_AA64MMFR2_IDS_SHIFT 36
-#define ID_AA64MMFR2_AT_SHIFT 32
-#define ID_AA64MMFR2_ST_SHIFT 28
-#define ID_AA64MMFR2_NV_SHIFT 24
-#define ID_AA64MMFR2_CCIDX_SHIFT 20
-#define ID_AA64MMFR2_LVA_SHIFT 16
-#define ID_AA64MMFR2_IESB_SHIFT 12
-#define ID_AA64MMFR2_LSM_SHIFT 8
-#define ID_AA64MMFR2_UAO_SHIFT 4
-#define ID_AA64MMFR2_CNP_SHIFT 0
-
-/* id_aa64dfr0 */
-#define ID_AA64DFR0_MTPMU_SHIFT 48
-#define ID_AA64DFR0_TRBE_SHIFT 44
-#define ID_AA64DFR0_TRACE_FILT_SHIFT 40
-#define ID_AA64DFR0_DOUBLELOCK_SHIFT 36
-#define ID_AA64DFR0_PMSVER_SHIFT 32
-#define ID_AA64DFR0_CTX_CMPS_SHIFT 28
-#define ID_AA64DFR0_WRPS_SHIFT 20
-#define ID_AA64DFR0_BRPS_SHIFT 12
-#define ID_AA64DFR0_PMUVER_SHIFT 8
-#define ID_AA64DFR0_TRACEVER_SHIFT 4
-#define ID_AA64DFR0_DEBUGVER_SHIFT 0
-
-#define ID_AA64DFR0_PMUVER_8_0 0x1
-#define ID_AA64DFR0_PMUVER_8_1 0x4
-#define ID_AA64DFR0_PMUVER_8_4 0x5
-#define ID_AA64DFR0_PMUVER_8_5 0x6
-#define ID_AA64DFR0_PMUVER_8_7 0x7
-#define ID_AA64DFR0_PMUVER_IMP_DEF 0xf
-
-#define ID_AA64DFR0_PMSVER_8_2 0x1
-#define ID_AA64DFR0_PMSVER_8_3 0x2
-
-#define ID_DFR0_PERFMON_SHIFT 24
-
-#define ID_DFR0_PERFMON_8_0 0x3
-#define ID_DFR0_PERFMON_8_1 0x4
-#define ID_DFR0_PERFMON_8_4 0x5
-#define ID_DFR0_PERFMON_8_5 0x6
-
-#define ID_ISAR4_SWP_FRAC_SHIFT 28
-#define ID_ISAR4_PSR_M_SHIFT 24
-#define ID_ISAR4_SYNCH_PRIM_FRAC_SHIFT 20
-#define ID_ISAR4_BARRIER_SHIFT 16
-#define ID_ISAR4_SMC_SHIFT 12
-#define ID_ISAR4_WRITEBACK_SHIFT 8
-#define ID_ISAR4_WITHSHIFTS_SHIFT 4
-#define ID_ISAR4_UNPRIV_SHIFT 0
-
-#define ID_DFR1_MTPMU_SHIFT 0
-
-#define ID_ISAR0_DIVIDE_SHIFT 24
-#define ID_ISAR0_DEBUG_SHIFT 20
-#define ID_ISAR0_COPROC_SHIFT 16
-#define ID_ISAR0_CMPBRANCH_SHIFT 12
-#define ID_ISAR0_BITFIELD_SHIFT 8
-#define ID_ISAR0_BITCOUNT_SHIFT 4
-#define ID_ISAR0_SWAP_SHIFT 0
-
-#define ID_ISAR5_RDM_SHIFT 24
-#define ID_ISAR5_CRC32_SHIFT 16
-#define ID_ISAR5_SHA2_SHIFT 12
-#define ID_ISAR5_SHA1_SHIFT 8
-#define ID_ISAR5_AES_SHIFT 4
-#define ID_ISAR5_SEVL_SHIFT 0
-
-#define ID_ISAR6_I8MM_SHIFT 24
-#define ID_ISAR6_BF16_SHIFT 20
-#define ID_ISAR6_SPECRES_SHIFT 16
-#define ID_ISAR6_SB_SHIFT 12
-#define ID_ISAR6_FHM_SHIFT 8
-#define ID_ISAR6_DP_SHIFT 4
-#define ID_ISAR6_JSCVT_SHIFT 0
-
-#define ID_MMFR0_INNERSHR_SHIFT 28
-#define ID_MMFR0_FCSE_SHIFT 24
-#define ID_MMFR0_AUXREG_SHIFT 20
-#define ID_MMFR0_TCM_SHIFT 16
-#define ID_MMFR0_SHARELVL_SHIFT 12
-#define ID_MMFR0_OUTERSHR_SHIFT 8
-#define ID_MMFR0_PMSA_SHIFT 4
-#define ID_MMFR0_VMSA_SHIFT 0
-
-#define ID_MMFR4_EVT_SHIFT 28
-#define ID_MMFR4_CCIDX_SHIFT 24
-#define ID_MMFR4_LSM_SHIFT 20
-#define ID_MMFR4_HPDS_SHIFT 16
-#define ID_MMFR4_CNP_SHIFT 12
-#define ID_MMFR4_XNX_SHIFT 8
-#define ID_MMFR4_AC2_SHIFT 4
-#define ID_MMFR4_SPECSEI_SHIFT 0
-
-#define ID_MMFR5_ETS_SHIFT 0
-
-#define ID_PFR0_DIT_SHIFT 24
-#define ID_PFR0_CSV2_SHIFT 16
-#define ID_PFR0_STATE3_SHIFT 12
-#define ID_PFR0_STATE2_SHIFT 8
-#define ID_PFR0_STATE1_SHIFT 4
-#define ID_PFR0_STATE0_SHIFT 0
-
-#define ID_DFR0_PERFMON_SHIFT 24
-#define ID_DFR0_MPROFDBG_SHIFT 20
-#define ID_DFR0_MMAPTRC_SHIFT 16
-#define ID_DFR0_COPTRC_SHIFT 12
-#define ID_DFR0_MMAPDBG_SHIFT 8
-#define ID_DFR0_COPSDBG_SHIFT 4
-#define ID_DFR0_COPDBG_SHIFT 0
-
-#define ID_PFR2_SSBS_SHIFT 4
-#define ID_PFR2_CSV3_SHIFT 0
-
-#define MVFR0_FPROUND_SHIFT 28
-#define MVFR0_FPSHVEC_SHIFT 24
-#define MVFR0_FPSQRT_SHIFT 20
-#define MVFR0_FPDIVIDE_SHIFT 16
-#define MVFR0_FPTRAP_SHIFT 12
-#define MVFR0_FPDP_SHIFT 8
-#define MVFR0_FPSP_SHIFT 4
-#define MVFR0_SIMD_SHIFT 0
-
-#define MVFR1_SIMDFMAC_SHIFT 28
-#define MVFR1_FPHP_SHIFT 24
-#define MVFR1_SIMDHP_SHIFT 20
-#define MVFR1_SIMDSP_SHIFT 16
-#define MVFR1_SIMDINT_SHIFT 12
-#define MVFR1_SIMDLS_SHIFT 8
-#define MVFR1_FPDNAN_SHIFT 4
-#define MVFR1_FPFTZ_SHIFT 0
-
-#define ID_PFR1_GIC_SHIFT 28
-#define ID_PFR1_VIRT_FRAC_SHIFT 24
-#define ID_PFR1_SEC_FRAC_SHIFT 20
-#define ID_PFR1_GENTIMER_SHIFT 16
-#define ID_PFR1_VIRTUALIZATION_SHIFT 12
-#define ID_PFR1_MPROGMOD_SHIFT 8
-#define ID_PFR1_SECURITY_SHIFT 4
-#define ID_PFR1_PROGMOD_SHIFT 0
-
#if defined(CONFIG_ARM64_4K_PAGES)
-#define ID_AA64MMFR0_TGRAN_SHIFT ID_AA64MMFR0_TGRAN4_SHIFT
-#define ID_AA64MMFR0_TGRAN_SUPPORTED_MIN ID_AA64MMFR0_TGRAN4_SUPPORTED_MIN
-#define ID_AA64MMFR0_TGRAN_SUPPORTED_MAX ID_AA64MMFR0_TGRAN4_SUPPORTED_MAX
-#define ID_AA64MMFR0_TGRAN_2_SHIFT ID_AA64MMFR0_TGRAN4_2_SHIFT
+#define ID_AA64MMFR0_EL1_TGRAN_SHIFT ID_AA64MMFR0_EL1_TGRAN4_SHIFT
+#define ID_AA64MMFR0_EL1_TGRAN_LPA2 ID_AA64MMFR0_EL1_TGRAN4_52_BIT
+#define ID_AA64MMFR0_EL1_TGRAN_SUPPORTED_MIN ID_AA64MMFR0_EL1_TGRAN4_SUPPORTED_MIN
+#define ID_AA64MMFR0_EL1_TGRAN_SUPPORTED_MAX ID_AA64MMFR0_EL1_TGRAN4_SUPPORTED_MAX
+#define ID_AA64MMFR0_EL1_TGRAN_2_SHIFT ID_AA64MMFR0_EL1_TGRAN4_2_SHIFT
#elif defined(CONFIG_ARM64_16K_PAGES)
-#define ID_AA64MMFR0_TGRAN_SHIFT ID_AA64MMFR0_TGRAN16_SHIFT
-#define ID_AA64MMFR0_TGRAN_SUPPORTED_MIN ID_AA64MMFR0_TGRAN16_SUPPORTED_MIN
-#define ID_AA64MMFR0_TGRAN_SUPPORTED_MAX ID_AA64MMFR0_TGRAN16_SUPPORTED_MAX
-#define ID_AA64MMFR0_TGRAN_2_SHIFT ID_AA64MMFR0_TGRAN16_2_SHIFT
+#define ID_AA64MMFR0_EL1_TGRAN_SHIFT ID_AA64MMFR0_EL1_TGRAN16_SHIFT
+#define ID_AA64MMFR0_EL1_TGRAN_LPA2 ID_AA64MMFR0_EL1_TGRAN16_52_BIT
+#define ID_AA64MMFR0_EL1_TGRAN_SUPPORTED_MIN ID_AA64MMFR0_EL1_TGRAN16_SUPPORTED_MIN
+#define ID_AA64MMFR0_EL1_TGRAN_SUPPORTED_MAX ID_AA64MMFR0_EL1_TGRAN16_SUPPORTED_MAX
+#define ID_AA64MMFR0_EL1_TGRAN_2_SHIFT ID_AA64MMFR0_EL1_TGRAN16_2_SHIFT
#elif defined(CONFIG_ARM64_64K_PAGES)
-#define ID_AA64MMFR0_TGRAN_SHIFT ID_AA64MMFR0_TGRAN64_SHIFT
-#define ID_AA64MMFR0_TGRAN_SUPPORTED_MIN ID_AA64MMFR0_TGRAN64_SUPPORTED_MIN
-#define ID_AA64MMFR0_TGRAN_SUPPORTED_MAX ID_AA64MMFR0_TGRAN64_SUPPORTED_MAX
-#define ID_AA64MMFR0_TGRAN_2_SHIFT ID_AA64MMFR0_TGRAN64_2_SHIFT
+#define ID_AA64MMFR0_EL1_TGRAN_SHIFT ID_AA64MMFR0_EL1_TGRAN64_SHIFT
+#define ID_AA64MMFR0_EL1_TGRAN_SUPPORTED_MIN ID_AA64MMFR0_EL1_TGRAN64_SUPPORTED_MIN
+#define ID_AA64MMFR0_EL1_TGRAN_SUPPORTED_MAX ID_AA64MMFR0_EL1_TGRAN64_SUPPORTED_MAX
+#define ID_AA64MMFR0_EL1_TGRAN_2_SHIFT ID_AA64MMFR0_EL1_TGRAN64_2_SHIFT
#endif
-#define MVFR2_FPMISC_SHIFT 4
-#define MVFR2_SIMDMISC_SHIFT 0
-
#define CPACR_EL1_FPEN_EL1EN (BIT(20)) /* enable EL1 access */
#define CPACR_EL1_FPEN_EL0EN (BIT(21)) /* enable EL0 access, if EL1EN set */
@@ -1007,10 +975,6 @@
#define SYS_RGSR_EL1_SEED_SHIFT 8
#define SYS_RGSR_EL1_SEED_MASK 0xffffUL
-/* GMID_EL1 field definitions */
-#define GMID_EL1_BS_SHIFT 0
-#define GMID_EL1_BS_SIZE 4
-
/* TFSR{,E0}_EL1 bit definitions */
#define SYS_TFSR_EL1_TF0_SHIFT 0
#define SYS_TFSR_EL1_TF1_SHIFT 1
@@ -1020,22 +984,7 @@
/* Safe value for MPIDR_EL1: Bit31:RES1, Bit30:U:0, Bit24:MT:0 */
#define SYS_MPIDR_SAFE_VAL (BIT(31))
-#define TRFCR_ELx_TS_SHIFT 5
-#define TRFCR_ELx_TS_VIRTUAL ((0x1UL) << TRFCR_ELx_TS_SHIFT)
-#define TRFCR_ELx_TS_GUEST_PHYSICAL ((0x2UL) << TRFCR_ELx_TS_SHIFT)
-#define TRFCR_ELx_TS_PHYSICAL ((0x3UL) << TRFCR_ELx_TS_SHIFT)
-#define TRFCR_EL2_CX BIT(3)
-#define TRFCR_ELx_ExTRE BIT(1)
-#define TRFCR_ELx_E0TRE BIT(0)
-
-/* HCRX_EL2 definitions */
-#define HCRX_EL2_SMPME_MASK (1 << 5)
-
/* GIC Hypervisor interface registers */
-/* ICH_MISR_EL2 bit definitions */
-#define ICH_MISR_EOI (1 << 0)
-#define ICH_MISR_U (1 << 1)
-
/* ICH_LR*_EL2 bit definitions */
#define ICH_LR_VIRTUAL_ID_MASK ((1ULL << 32) - 1)
@@ -1050,17 +999,6 @@
#define ICH_LR_PRIORITY_SHIFT 48
#define ICH_LR_PRIORITY_MASK (0xffULL << ICH_LR_PRIORITY_SHIFT)
-/* ICH_HCR_EL2 bit definitions */
-#define ICH_HCR_EN (1 << 0)
-#define ICH_HCR_UIE (1 << 1)
-#define ICH_HCR_NPIE (1 << 3)
-#define ICH_HCR_TC (1 << 10)
-#define ICH_HCR_TALL0 (1 << 11)
-#define ICH_HCR_TALL1 (1 << 12)
-#define ICH_HCR_TDIR (1 << 14)
-#define ICH_HCR_EOIcount_SHIFT 27
-#define ICH_HCR_EOIcount_MASK (0x1f << ICH_HCR_EOIcount_SHIFT)
-
/* ICH_VMCR_EL2 bit definitions */
#define ICH_VMCR_ACK_CTL_SHIFT 2
#define ICH_VMCR_ACK_CTL_MASK (1 << ICH_VMCR_ACK_CTL_SHIFT)
@@ -1081,28 +1019,128 @@
#define ICH_VMCR_ENG1_SHIFT 1
#define ICH_VMCR_ENG1_MASK (1 << ICH_VMCR_ENG1_SHIFT)
-/* ICH_VTR_EL2 bit definitions */
-#define ICH_VTR_PRI_BITS_SHIFT 29
-#define ICH_VTR_PRI_BITS_MASK (7 << ICH_VTR_PRI_BITS_SHIFT)
-#define ICH_VTR_ID_BITS_SHIFT 23
-#define ICH_VTR_ID_BITS_MASK (7 << ICH_VTR_ID_BITS_SHIFT)
-#define ICH_VTR_SEIS_SHIFT 22
-#define ICH_VTR_SEIS_MASK (1 << ICH_VTR_SEIS_SHIFT)
-#define ICH_VTR_A3V_SHIFT 21
-#define ICH_VTR_A3V_MASK (1 << ICH_VTR_A3V_SHIFT)
-#define ICH_VTR_TDS_SHIFT 19
-#define ICH_VTR_TDS_MASK (1 << ICH_VTR_TDS_SHIFT)
-
-/* HFG[WR]TR_EL2 bit definitions */
-#define HFGxTR_EL2_nTPIDR2_EL0_SHIFT 55
-#define HFGxTR_EL2_nTPIDR2_EL0_MASK BIT_MASK(HFGxTR_EL2_nTPIDR2_EL0_SHIFT)
-#define HFGxTR_EL2_nSMPRI_EL1_SHIFT 54
-#define HFGxTR_EL2_nSMPRI_EL1_MASK BIT_MASK(HFGxTR_EL2_nSMPRI_EL1_SHIFT)
+/*
+ * Permission Indirection Extension (PIE) permission encodings.
+ * Encodings with the _O suffix, have overlays applied (Permission Overlay Extension).
+ */
+#define PIE_NONE_O UL(0x0)
+#define PIE_R_O UL(0x1)
+#define PIE_X_O UL(0x2)
+#define PIE_RX_O UL(0x3)
+#define PIE_RW_O UL(0x5)
+#define PIE_RWnX_O UL(0x6)
+#define PIE_RWX_O UL(0x7)
+#define PIE_R UL(0x8)
+#define PIE_GCS UL(0x9)
+#define PIE_RX UL(0xa)
+#define PIE_RW UL(0xc)
+#define PIE_RWX UL(0xe)
+#define PIE_MASK UL(0xf)
+
+#define PIRx_ELx_BITS_PER_IDX 4
+#define PIRx_ELx_PERM_SHIFT(idx) ((idx) * PIRx_ELx_BITS_PER_IDX)
+#define PIRx_ELx_PERM_PREP(idx, perm) (((perm) & PIE_MASK) << PIRx_ELx_PERM_SHIFT(idx))
+
+/*
+ * Permission Overlay Extension (POE) permission encodings.
+ */
+#define POE_NONE UL(0x0)
+#define POE_R UL(0x1)
+#define POE_X UL(0x2)
+#define POE_RX UL(0x3)
+#define POE_W UL(0x4)
+#define POE_RW UL(0x5)
+#define POE_WX UL(0x6)
+#define POE_RWX UL(0x7)
+#define POE_MASK UL(0xf)
+
+#define POR_ELx_BITS_PER_IDX 4
+#define POR_ELx_PERM_SHIFT(idx) ((idx) * POR_ELx_BITS_PER_IDX)
+#define POR_ELx_PERM_GET(idx, reg) (((reg) >> POR_ELx_PERM_SHIFT(idx)) & POE_MASK)
+#define POR_ELx_PERM_PREP(idx, perm) (((perm) & POE_MASK) << POR_ELx_PERM_SHIFT(idx))
-#define ARM64_FEATURE_FIELD_BITS 4
+/*
+ * Definitions for Guarded Control Stack
+ */
+
+#define GCS_CAP_ADDR_MASK GENMASK(63, 12)
+#define GCS_CAP_ADDR_SHIFT 12
+#define GCS_CAP_ADDR_WIDTH 52
+#define GCS_CAP_ADDR(x) FIELD_GET(GCS_CAP_ADDR_MASK, x)
-/* Create a mask for the feature bits of the specified feature. */
-#define ARM64_FEATURE_MASK(x) (GENMASK_ULL(x##_SHIFT + ARM64_FEATURE_FIELD_BITS - 1, x##_SHIFT))
+#define GCS_CAP_TOKEN_MASK GENMASK(11, 0)
+#define GCS_CAP_TOKEN_SHIFT 0
+#define GCS_CAP_TOKEN_WIDTH 12
+#define GCS_CAP_TOKEN(x) FIELD_GET(GCS_CAP_TOKEN_MASK, x)
+
+#define GCS_CAP_VALID_TOKEN 0x1
+#define GCS_CAP_IN_PROGRESS_TOKEN 0x5
+
+#define GCS_CAP(x) ((((unsigned long)x) & GCS_CAP_ADDR_MASK) | \
+ GCS_CAP_VALID_TOKEN)
+/*
+ * Definitions for GICv5 instructions
+ */
+#define GICV5_OP_GIC_CDAFF sys_insn(1, 0, 12, 1, 3)
+#define GICV5_OP_GIC_CDDI sys_insn(1, 0, 12, 2, 0)
+#define GICV5_OP_GIC_CDDIS sys_insn(1, 0, 12, 1, 0)
+#define GICV5_OP_GIC_CDHM sys_insn(1, 0, 12, 2, 1)
+#define GICV5_OP_GIC_CDEN sys_insn(1, 0, 12, 1, 1)
+#define GICV5_OP_GIC_CDEOI sys_insn(1, 0, 12, 1, 7)
+#define GICV5_OP_GIC_CDPEND sys_insn(1, 0, 12, 1, 4)
+#define GICV5_OP_GIC_CDPRI sys_insn(1, 0, 12, 1, 2)
+#define GICV5_OP_GIC_CDRCFG sys_insn(1, 0, 12, 1, 5)
+#define GICV5_OP_GICR_CDIA sys_insn(1, 0, 12, 3, 0)
+
+/* Definitions for GIC CDAFF */
+#define GICV5_GIC_CDAFF_IAFFID_MASK GENMASK_ULL(47, 32)
+#define GICV5_GIC_CDAFF_TYPE_MASK GENMASK_ULL(31, 29)
+#define GICV5_GIC_CDAFF_IRM_MASK BIT_ULL(28)
+#define GICV5_GIC_CDAFF_ID_MASK GENMASK_ULL(23, 0)
+
+/* Definitions for GIC CDDI */
+#define GICV5_GIC_CDDI_TYPE_MASK GENMASK_ULL(31, 29)
+#define GICV5_GIC_CDDI_ID_MASK GENMASK_ULL(23, 0)
+
+/* Definitions for GIC CDDIS */
+#define GICV5_GIC_CDDIS_TYPE_MASK GENMASK_ULL(31, 29)
+#define GICV5_GIC_CDDIS_TYPE(r) FIELD_GET(GICV5_GIC_CDDIS_TYPE_MASK, r)
+#define GICV5_GIC_CDDIS_ID_MASK GENMASK_ULL(23, 0)
+#define GICV5_GIC_CDDIS_ID(r) FIELD_GET(GICV5_GIC_CDDIS_ID_MASK, r)
+
+/* Definitions for GIC CDEN */
+#define GICV5_GIC_CDEN_TYPE_MASK GENMASK_ULL(31, 29)
+#define GICV5_GIC_CDEN_ID_MASK GENMASK_ULL(23, 0)
+
+/* Definitions for GIC CDHM */
+#define GICV5_GIC_CDHM_HM_MASK BIT_ULL(32)
+#define GICV5_GIC_CDHM_TYPE_MASK GENMASK_ULL(31, 29)
+#define GICV5_GIC_CDHM_ID_MASK GENMASK_ULL(23, 0)
+
+/* Definitions for GIC CDPEND */
+#define GICV5_GIC_CDPEND_PENDING_MASK BIT_ULL(32)
+#define GICV5_GIC_CDPEND_TYPE_MASK GENMASK_ULL(31, 29)
+#define GICV5_GIC_CDPEND_ID_MASK GENMASK_ULL(23, 0)
+
+/* Definitions for GIC CDPRI */
+#define GICV5_GIC_CDPRI_PRIORITY_MASK GENMASK_ULL(39, 35)
+#define GICV5_GIC_CDPRI_TYPE_MASK GENMASK_ULL(31, 29)
+#define GICV5_GIC_CDPRI_ID_MASK GENMASK_ULL(23, 0)
+
+/* Definitions for GIC CDRCFG */
+#define GICV5_GIC_CDRCFG_TYPE_MASK GENMASK_ULL(31, 29)
+#define GICV5_GIC_CDRCFG_ID_MASK GENMASK_ULL(23, 0)
+
+/* Definitions for GICR CDIA */
+#define GICV5_GIC_CDIA_VALID_MASK BIT_ULL(32)
+#define GICV5_GICR_CDIA_VALID(r) FIELD_GET(GICV5_GIC_CDIA_VALID_MASK, r)
+#define GICV5_GIC_CDIA_TYPE_MASK GENMASK_ULL(31, 29)
+#define GICV5_GIC_CDIA_ID_MASK GENMASK_ULL(23, 0)
+
+#define gicr_insn(insn) read_sysreg_s(GICV5_OP_GICR_##insn)
+#define gic_insn(v, insn) write_sysreg_s(v, GICV5_OP_GIC_##insn)
+
+#define ARM64_FEATURE_FIELD_BITS 4
#ifdef __ASSEMBLY__
@@ -1114,8 +1152,18 @@
__emit_inst(0xd5000000|(\sreg)|(.L__gpr_num_\rt))
.endm
+ .macro msr_hcr_el2, reg
+#if IS_ENABLED(CONFIG_AMPERE_ERRATUM_AC04_CPU_23)
+ dsb nsh
+ msr hcr_el2, \reg
+ isb
+#else
+ msr hcr_el2, \reg
+#endif
+ .endm
#else
+#include <linux/bitfield.h>
#include <linux/build_bug.h>
#include <linux/types.h>
#include <asm/alternative.h>
@@ -1171,15 +1219,21 @@
/*
* For registers without architectural names, or simply unsupported by
* GAS.
+ *
+ * __check_r forces warnings to be generated by the compiler when
+ * evaluating r which wouldn't normally happen due to being passed to
+ * the assembler via __stringify(r).
*/
#define read_sysreg_s(r) ({ \
u64 __val; \
+ u32 __maybe_unused __check_r = (u32)(r); \
asm volatile(__mrs_s("%0", r) : "=r" (__val)); \
__val; \
})
#define write_sysreg_s(v, r) do { \
u64 __val = (u64)(v); \
+ u32 __maybe_unused __check_r = (u32)(r); \
asm volatile(__msr_s(r, "%x0") : : "rZ" (__val)); \
} while (0)
@@ -1194,6 +1248,13 @@
write_sysreg(__scs_new, sysreg); \
} while (0)
+#define sysreg_clear_set_hcr(clear, set) do { \
+ u64 __scs_val = read_sysreg(hcr_el2); \
+ u64 __scs_new = (__scs_val & ~(u64)(clear)) | (set); \
+ if (__scs_new != __scs_val) \
+ write_sysreg_hcr(__scs_new); \
+} while (0)
+
#define sysreg_clear_set_s(sysreg, clear, set) do { \
u64 __scs_val = read_sysreg_s(sysreg); \
u64 __scs_new = (__scs_val & ~(u64)(clear)) | (set); \
@@ -1201,6 +1262,17 @@
write_sysreg_s(__scs_new, sysreg); \
} while (0)
+#define write_sysreg_hcr(__val) do { \
+ if (IS_ENABLED(CONFIG_AMPERE_ERRATUM_AC04_CPU_23) && \
+ (!system_capabilities_finalized() || \
+ alternative_has_cap_unlikely(ARM64_WORKAROUND_AMPERE_AC04_CPU_23))) \
+ asm volatile("dsb nsh; msr hcr_el2, %x0; isb" \
+ : : "rZ" (__val)); \
+ else \
+ asm volatile("msr hcr_el2, %x0" \
+ : : "rZ" (__val)); \
+} while (0)
+
#define read_sysreg_par() ({ \
u64 par; \
asm(ALTERNATIVE("nop", "dmb sy", ARM64_WORKAROUND_1508412)); \
@@ -1209,7 +1281,7 @@
par; \
})
-#endif
+#define SYS_FIELD_VALUE(reg, field, val) reg##_##field##_##val
#define SYS_FIELD_GET(reg, field, val) \
FIELD_GET(reg##_##field##_MASK, val)
@@ -1218,6 +1290,9 @@
FIELD_PREP(reg##_##field##_MASK, val)
#define SYS_FIELD_PREP_ENUM(reg, field, val) \
- FIELD_PREP(reg##_##field##_MASK, reg##_##field##_##val)
+ FIELD_PREP(reg##_##field##_MASK, \
+ SYS_FIELD_VALUE(reg, field, val))
+
+#endif
#endif /* __ASM_SYSREG_H */
diff --git a/arch/arm64/include/asm/system_misc.h b/arch/arm64/include/asm/system_misc.h
index 0eb7709422e2..344b1c1a4bbb 100644
--- a/arch/arm64/include/asm/system_misc.h
+++ b/arch/arm64/include/asm/system_misc.h
@@ -18,17 +18,13 @@
struct pt_regs;
-void die(const char *msg, struct pt_regs *regs, int err);
+void die(const char *msg, struct pt_regs *regs, long err);
struct siginfo;
void arm64_notify_die(const char *str, struct pt_regs *regs,
int signo, int sicode, unsigned long far,
unsigned long err);
-void hook_debug_fault_code(int nr, int (*fn)(unsigned long, unsigned long,
- struct pt_regs *),
- int sig, int code, const char *name);
-
struct mm_struct;
extern void __show_regs(struct pt_regs *);
diff --git a/arch/arm64/include/asm/patching.h b/arch/arm64/include/asm/text-patching.h
index 6bf5adc56295..587bdb91ab7a 100644
--- a/arch/arm64/include/asm/patching.h
+++ b/arch/arm64/include/asm/text-patching.h
@@ -7,6 +7,10 @@
int aarch64_insn_read(void *addr, u32 *insnp);
int aarch64_insn_write(void *addr, u32 insn);
+int aarch64_insn_write_literal_u64(void *addr, u64 val);
+void *aarch64_insn_set(void *dst, u32 insn, size_t len);
+void *aarch64_insn_copy(void *dst, void *src, size_t len);
+
int aarch64_insn_patch_text_nosync(void *addr, u32 insn);
int aarch64_insn_patch_text(void *addrs[], u32 insns[], int cnt);
diff --git a/arch/arm64/include/asm/thread_info.h b/arch/arm64/include/asm/thread_info.h
index 848739c15de8..f241b8601ebd 100644
--- a/arch/arm64/include/asm/thread_info.h
+++ b/arch/arm64/include/asm/thread_info.h
@@ -55,24 +55,22 @@ struct thread_info {
void arch_setup_new_exec(void);
#define arch_setup_new_exec arch_setup_new_exec
-void arch_release_task_struct(struct task_struct *tsk);
-int arch_dup_task_struct(struct task_struct *dst,
- struct task_struct *src);
-
#endif
#define TIF_SIGPENDING 0 /* signal pending */
#define TIF_NEED_RESCHED 1 /* rescheduling necessary */
-#define TIF_NOTIFY_RESUME 2 /* callback before returning to user */
-#define TIF_FOREIGN_FPSTATE 3 /* CPU's FP state is not current's */
-#define TIF_UPROBE 4 /* uprobe breakpoint or singlestep */
-#define TIF_MTE_ASYNC_FAULT 5 /* MTE Asynchronous Tag Check Fault */
-#define TIF_NOTIFY_SIGNAL 6 /* signal notifications exist */
+#define TIF_NEED_RESCHED_LAZY 2 /* Lazy rescheduling needed */
+#define TIF_NOTIFY_RESUME 3 /* callback before returning to user */
+#define TIF_FOREIGN_FPSTATE 4 /* CPU's FP state is not current's */
+#define TIF_UPROBE 5 /* uprobe breakpoint or singlestep */
+#define TIF_MTE_ASYNC_FAULT 6 /* MTE Asynchronous Tag Check Fault */
+#define TIF_NOTIFY_SIGNAL 7 /* signal notifications exist */
#define TIF_SYSCALL_TRACE 8 /* syscall trace active */
#define TIF_SYSCALL_AUDIT 9 /* syscall auditing */
#define TIF_SYSCALL_TRACEPOINT 10 /* syscall tracepoint for ftrace */
#define TIF_SECCOMP 11 /* syscall secure computing */
#define TIF_SYSCALL_EMU 12 /* syscall emulation active */
+#define TIF_PATCH_PENDING 13 /* pending live patching update */
#define TIF_MEMDIE 18 /* is terminating due to OOM killer */
#define TIF_FREEZE 19
#define TIF_RESTORE_SIGMASK 20
@@ -84,9 +82,14 @@ int arch_dup_task_struct(struct task_struct *dst,
#define TIF_TAGGED_ADDR 26 /* Allow tagged user addresses */
#define TIF_SME 27 /* SME in use */
#define TIF_SME_VL_INHERIT 28 /* Inherit SME vl_onexec across exec */
+#define TIF_KERNEL_FPSTATE 29 /* Task is in a kernel mode FPSIMD section */
+#define TIF_TSC_SIGSEGV 30 /* SIGSEGV on counter-timer access */
+#define TIF_LAZY_MMU 31 /* Task in lazy mmu mode */
+#define TIF_LAZY_MMU_PENDING 32 /* Ops pending for lazy mmu mode exit */
#define _TIF_SIGPENDING (1 << TIF_SIGPENDING)
#define _TIF_NEED_RESCHED (1 << TIF_NEED_RESCHED)
+#define _TIF_NEED_RESCHED_LAZY (1 << TIF_NEED_RESCHED_LAZY)
#define _TIF_NOTIFY_RESUME (1 << TIF_NOTIFY_RESUME)
#define _TIF_FOREIGN_FPSTATE (1 << TIF_FOREIGN_FPSTATE)
#define _TIF_SYSCALL_TRACE (1 << TIF_SYSCALL_TRACE)
@@ -94,17 +97,20 @@ int arch_dup_task_struct(struct task_struct *dst,
#define _TIF_SYSCALL_TRACEPOINT (1 << TIF_SYSCALL_TRACEPOINT)
#define _TIF_SECCOMP (1 << TIF_SECCOMP)
#define _TIF_SYSCALL_EMU (1 << TIF_SYSCALL_EMU)
+#define _TIF_PATCH_PENDING (1 << TIF_PATCH_PENDING)
#define _TIF_UPROBE (1 << TIF_UPROBE)
#define _TIF_SINGLESTEP (1 << TIF_SINGLESTEP)
#define _TIF_32BIT (1 << TIF_32BIT)
#define _TIF_SVE (1 << TIF_SVE)
#define _TIF_MTE_ASYNC_FAULT (1 << TIF_MTE_ASYNC_FAULT)
#define _TIF_NOTIFY_SIGNAL (1 << TIF_NOTIFY_SIGNAL)
+#define _TIF_TSC_SIGSEGV (1 << TIF_TSC_SIGSEGV)
-#define _TIF_WORK_MASK (_TIF_NEED_RESCHED | _TIF_SIGPENDING | \
+#define _TIF_WORK_MASK (_TIF_NEED_RESCHED | _TIF_NEED_RESCHED_LAZY | \
_TIF_NOTIFY_RESUME | _TIF_FOREIGN_FPSTATE | \
_TIF_UPROBE | _TIF_MTE_ASYNC_FAULT | \
- _TIF_NOTIFY_SIGNAL)
+ _TIF_NOTIFY_SIGNAL | _TIF_SIGPENDING | \
+ _TIF_PATCH_PENDING)
#define _TIF_SYSCALL_WORK (_TIF_SYSCALL_TRACE | _TIF_SYSCALL_AUDIT | \
_TIF_SYSCALL_TRACEPOINT | _TIF_SECCOMP | \
diff --git a/arch/arm64/include/asm/tlb.h b/arch/arm64/include/asm/tlb.h
index c995d1f4594f..8d762607285c 100644
--- a/arch/arm64/include/asm/tlb.h
+++ b/arch/arm64/include/asm/tlb.h
@@ -9,12 +9,7 @@
#define __ASM_TLB_H
#include <linux/pagemap.h>
-#include <linux/swap.h>
-static inline void __tlb_remove_table(void *_table)
-{
- free_page_and_swap_cache((struct page *)_table);
-}
#define tlb_flush tlb_flush
static void tlb_flush(struct mmu_gather *tlb);
@@ -22,15 +17,15 @@ static void tlb_flush(struct mmu_gather *tlb);
#include <asm-generic/tlb.h>
/*
- * get the tlbi levels in arm64. Default value is 0 if more than one
- * of cleared_* is set or neither is set.
- * Arm64 doesn't support p4ds now.
+ * get the tlbi levels in arm64. Default value is TLBI_TTL_UNKNOWN if more than
+ * one of cleared_* is set or neither is set - this elides the level hinting to
+ * the hardware.
*/
static inline int tlb_get_level(struct mmu_gather *tlb)
{
/* The TTL field is only valid for the leaf entry. */
if (tlb->freed_tables)
- return 0;
+ return TLBI_TTL_UNKNOWN;
if (tlb->cleared_ptes && !(tlb->cleared_pmds ||
tlb->cleared_puds ||
@@ -47,7 +42,12 @@ static inline int tlb_get_level(struct mmu_gather *tlb)
tlb->cleared_p4ds))
return 1;
- return 0;
+ if (tlb->cleared_p4ds && !(tlb->cleared_ptes ||
+ tlb->cleared_pmds ||
+ tlb->cleared_puds))
+ return 0;
+
+ return TLBI_TTL_UNKNOWN;
}
static inline void tlb_flush(struct mmu_gather *tlb)
@@ -75,18 +75,18 @@ static inline void tlb_flush(struct mmu_gather *tlb)
static inline void __pte_free_tlb(struct mmu_gather *tlb, pgtable_t pte,
unsigned long addr)
{
- pgtable_pte_page_dtor(pte);
- tlb_remove_table(tlb, pte);
+ struct ptdesc *ptdesc = page_ptdesc(pte);
+
+ tlb_remove_ptdesc(tlb, ptdesc);
}
#if CONFIG_PGTABLE_LEVELS > 2
static inline void __pmd_free_tlb(struct mmu_gather *tlb, pmd_t *pmdp,
unsigned long addr)
{
- struct page *page = virt_to_page(pmdp);
+ struct ptdesc *ptdesc = virt_to_ptdesc(pmdp);
- pgtable_pmd_page_dtor(page);
- tlb_remove_table(tlb, page);
+ tlb_remove_ptdesc(tlb, ptdesc);
}
#endif
@@ -94,7 +94,25 @@ static inline void __pmd_free_tlb(struct mmu_gather *tlb, pmd_t *pmdp,
static inline void __pud_free_tlb(struct mmu_gather *tlb, pud_t *pudp,
unsigned long addr)
{
- tlb_remove_table(tlb, virt_to_page(pudp));
+ struct ptdesc *ptdesc = virt_to_ptdesc(pudp);
+
+ if (!pgtable_l4_enabled())
+ return;
+
+ tlb_remove_ptdesc(tlb, ptdesc);
+}
+#endif
+
+#if CONFIG_PGTABLE_LEVELS > 4
+static inline void __p4d_free_tlb(struct mmu_gather *tlb, p4d_t *p4dp,
+ unsigned long addr)
+{
+ struct ptdesc *ptdesc = virt_to_ptdesc(p4dp);
+
+ if (!pgtable_l5_enabled())
+ return;
+
+ tlb_remove_ptdesc(tlb, ptdesc);
}
#endif
diff --git a/arch/arm64/include/asm/tlbbatch.h b/arch/arm64/include/asm/tlbbatch.h
new file mode 100644
index 000000000000..fedb0b87b8db
--- /dev/null
+++ b/arch/arm64/include/asm/tlbbatch.h
@@ -0,0 +1,12 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _ARCH_ARM64_TLBBATCH_H
+#define _ARCH_ARM64_TLBBATCH_H
+
+struct arch_tlbflush_unmap_batch {
+ /*
+ * For arm64, HW can do tlb shootdown, so we don't
+ * need to record cpumask for sending IPI
+ */
+};
+
+#endif /* _ARCH_ARM64_TLBBATCH_H */
diff --git a/arch/arm64/include/asm/tlbflush.h b/arch/arm64/include/asm/tlbflush.h
index 412a3b9a3c25..18a5dc0c9a54 100644
--- a/arch/arm64/include/asm/tlbflush.h
+++ b/arch/arm64/include/asm/tlbflush.h
@@ -13,6 +13,7 @@
#include <linux/bitfield.h>
#include <linux/mm_types.h>
#include <linux/sched.h>
+#include <linux/mmu_notifier.h>
#include <asm/cputype.h>
#include <asm/mmu.h>
@@ -93,19 +94,22 @@ static inline unsigned long get_trans_granule(void)
* When ARMv8.4-TTL exists, TLBI operations take an additional hint for
* the level at which the invalidation must take place. If the level is
* wrong, no invalidation may take place. In the case where the level
- * cannot be easily determined, a 0 value for the level parameter will
- * perform a non-hinted invalidation.
+ * cannot be easily determined, the value TLBI_TTL_UNKNOWN will perform
+ * a non-hinted invalidation. Any provided level outside the hint range
+ * will also cause fall-back to non-hinted invalidation.
*
* For Stage-2 invalidation, use the level values provided to that effect
* in asm/stage2_pgtable.h.
*/
#define TLBI_TTL_MASK GENMASK_ULL(47, 44)
+#define TLBI_TTL_UNKNOWN INT_MAX
+
#define __tlbi_level(op, addr, level) do { \
u64 arg = addr; \
\
- if (cpus_have_const_cap(ARM64_HAS_ARMv8_4_TTL) && \
- level) { \
+ if (alternative_has_cap_unlikely(ARM64_HAS_ARMv8_4_TTL) && \
+ level >= 0 && level <= 3) { \
u64 ttl = level & 3; \
ttl |= get_trans_granule() << 2; \
arg &= ~TLBI_TTL_MASK; \
@@ -121,28 +125,41 @@ static inline unsigned long get_trans_granule(void)
} while (0)
/*
- * This macro creates a properly formatted VA operand for the TLB RANGE.
- * The value bit assignments are:
+ * This macro creates a properly formatted VA operand for the TLB RANGE. The
+ * value bit assignments are:
*
* +----------+------+-------+-------+-------+----------------------+
* | ASID | TG | SCALE | NUM | TTL | BADDR |
* +-----------------+-------+-------+-------+----------------------+
* |63 48|47 46|45 44|43 39|38 37|36 0|
*
- * The address range is determined by below formula:
- * [BADDR, BADDR + (NUM + 1) * 2^(5*SCALE + 1) * PAGESIZE)
+ * The address range is determined by below formula: [BADDR, BADDR + (NUM + 1) *
+ * 2^(5*SCALE + 1) * PAGESIZE)
+ *
+ * Note that the first argument, baddr, is pre-shifted; If LPA2 is in use, BADDR
+ * holds addr[52:16]. Else BADDR holds page number. See for example ARM DDI
+ * 0487J.a section C5.5.60 "TLBI VAE1IS, TLBI VAE1ISNXS, TLB Invalidate by VA,
+ * EL1, Inner Shareable".
*
*/
-#define __TLBI_VADDR_RANGE(addr, asid, scale, num, ttl) \
- ({ \
- unsigned long __ta = (addr) >> PAGE_SHIFT; \
- __ta &= GENMASK_ULL(36, 0); \
- __ta |= (unsigned long)(ttl) << 37; \
- __ta |= (unsigned long)(num) << 39; \
- __ta |= (unsigned long)(scale) << 44; \
- __ta |= get_trans_granule() << 46; \
- __ta |= (unsigned long)(asid) << 48; \
- __ta; \
+#define TLBIR_ASID_MASK GENMASK_ULL(63, 48)
+#define TLBIR_TG_MASK GENMASK_ULL(47, 46)
+#define TLBIR_SCALE_MASK GENMASK_ULL(45, 44)
+#define TLBIR_NUM_MASK GENMASK_ULL(43, 39)
+#define TLBIR_TTL_MASK GENMASK_ULL(38, 37)
+#define TLBIR_BADDR_MASK GENMASK_ULL(36, 0)
+
+#define __TLBI_VADDR_RANGE(baddr, asid, scale, num, ttl) \
+ ({ \
+ unsigned long __ta = 0; \
+ unsigned long __ttl = (ttl >= 1 && ttl <= 3) ? ttl : 0; \
+ __ta |= FIELD_PREP(TLBIR_BADDR_MASK, baddr); \
+ __ta |= FIELD_PREP(TLBIR_TTL_MASK, __ttl); \
+ __ta |= FIELD_PREP(TLBIR_NUM_MASK, num); \
+ __ta |= FIELD_PREP(TLBIR_SCALE_MASK, scale); \
+ __ta |= FIELD_PREP(TLBIR_TG_MASK, get_trans_granule()); \
+ __ta |= FIELD_PREP(TLBIR_ASID_MASK, asid); \
+ __ta; \
})
/* These macros are used by the TLBI RANGE feature. */
@@ -151,12 +168,18 @@ static inline unsigned long get_trans_granule(void)
#define MAX_TLBI_RANGE_PAGES __TLBI_RANGE_PAGES(31, 3)
/*
- * Generate 'num' values from -1 to 30 with -1 rejected by the
- * __flush_tlb_range() loop below.
+ * Generate 'num' values from -1 to 31 with -1 rejected by the
+ * __flush_tlb_range() loop below. Its return value is only
+ * significant for a maximum of MAX_TLBI_RANGE_PAGES pages. If
+ * 'pages' is more than that, you must iterate over the overall
+ * range.
*/
-#define TLBI_RANGE_MASK GENMASK_ULL(4, 0)
-#define __TLBI_RANGE_NUM(pages, scale) \
- ((((pages) >> (5 * (scale) + 1)) & TLBI_RANGE_MASK) - 1)
+#define __TLBI_RANGE_NUM(pages, scale) \
+ ({ \
+ int __pages = min((pages), \
+ __TLBI_RANGE_PAGES(31, (scale))); \
+ (__pages >> (5 * (scale) + 1)) - 1; \
+ })
/*
* TLB Invalidation
@@ -215,12 +238,16 @@ static inline unsigned long get_trans_granule(void)
* CPUs, ensuring that any walk-cache entries associated with the
* translation are also invalidated.
*
- * __flush_tlb_range(vma, start, end, stride, last_level)
+ * __flush_tlb_range(vma, start, end, stride, last_level, tlb_level)
* Invalidate the virtual-address range '[start, end)' on all
* CPUs for the user address space corresponding to 'vma->mm'.
* The invalidation operations are issued at a granularity
* determined by 'stride' and only affect any walk-cache entries
- * if 'last_level' is equal to false.
+ * if 'last_level' is equal to false. tlb_level is the level at
+ * which the invalidation must take place. If the level is wrong,
+ * no invalidation may take place. In the case where the level
+ * cannot be easily determined, the value TLBI_TTL_UNKNOWN will
+ * perform a non-hinted invalidation.
*
*
* Finally, take a look at asm/tlb.h to see how tlb_flush() is implemented
@@ -252,17 +279,26 @@ static inline void flush_tlb_mm(struct mm_struct *mm)
__tlbi(aside1is, asid);
__tlbi_user(aside1is, asid);
dsb(ish);
+ mmu_notifier_arch_invalidate_secondary_tlbs(mm, 0, -1UL);
}
-static inline void flush_tlb_page_nosync(struct vm_area_struct *vma,
- unsigned long uaddr)
+static inline void __flush_tlb_page_nosync(struct mm_struct *mm,
+ unsigned long uaddr)
{
unsigned long addr;
dsb(ishst);
- addr = __TLBI_VADDR(uaddr, ASID(vma->vm_mm));
+ addr = __TLBI_VADDR(uaddr, ASID(mm));
__tlbi(vale1is, addr);
__tlbi_user(vale1is, addr);
+ mmu_notifier_arch_invalidate_secondary_tlbs(mm, uaddr & PAGE_MASK,
+ (uaddr & PAGE_MASK) + PAGE_SIZE);
+}
+
+static inline void flush_tlb_page_nosync(struct vm_area_struct *vma,
+ unsigned long uaddr)
+{
+ return __flush_tlb_page_nosync(vma->vm_mm, uaddr);
}
static inline void flush_tlb_page(struct vm_area_struct *vma,
@@ -272,91 +308,167 @@ static inline void flush_tlb_page(struct vm_area_struct *vma,
dsb(ish);
}
+static inline bool arch_tlbbatch_should_defer(struct mm_struct *mm)
+{
+ /*
+ * TLB flush deferral is not required on systems which are affected by
+ * ARM64_WORKAROUND_REPEAT_TLBI, as __tlbi()/__tlbi_user() implementation
+ * will have two consecutive TLBI instructions with a dsb(ish) in between
+ * defeating the purpose (i.e save overall 'dsb ish' cost).
+ */
+ if (alternative_has_cap_unlikely(ARM64_WORKAROUND_REPEAT_TLBI))
+ return false;
+
+ return true;
+}
+
+/*
+ * To support TLB batched flush for multiple pages unmapping, we only send
+ * the TLBI for each page in arch_tlbbatch_add_pending() and wait for the
+ * completion at the end in arch_tlbbatch_flush(). Since we've already issued
+ * TLBI for each page so only a DSB is needed to synchronise its effect on the
+ * other CPUs.
+ *
+ * This will save the time waiting on DSB comparing issuing a TLBI;DSB sequence
+ * for each page.
+ */
+static inline void arch_tlbbatch_flush(struct arch_tlbflush_unmap_batch *batch)
+{
+ dsb(ish);
+}
+
/*
* This is meant to avoid soft lock-ups on large TLB flushing ranges and not
* necessarily a performance improvement.
*/
-#define MAX_TLBI_OPS PTRS_PER_PTE
+#define MAX_DVM_OPS PTRS_PER_PTE
-static inline void __flush_tlb_range(struct vm_area_struct *vma,
+/*
+ * __flush_tlb_range_op - Perform TLBI operation upon a range
+ *
+ * @op: TLBI instruction that operates on a range (has 'r' prefix)
+ * @start: The start address of the range
+ * @pages: Range as the number of pages from 'start'
+ * @stride: Flush granularity
+ * @asid: The ASID of the task (0 for IPA instructions)
+ * @tlb_level: Translation Table level hint, if known
+ * @tlbi_user: If 'true', call an additional __tlbi_user()
+ * (typically for user ASIDs). 'flase' for IPA instructions
+ * @lpa2: If 'true', the lpa2 scheme is used as set out below
+ *
+ * When the CPU does not support TLB range operations, flush the TLB
+ * entries one by one at the granularity of 'stride'. If the TLB
+ * range ops are supported, then:
+ *
+ * 1. If FEAT_LPA2 is in use, the start address of a range operation must be
+ * 64KB aligned, so flush pages one by one until the alignment is reached
+ * using the non-range operations. This step is skipped if LPA2 is not in
+ * use.
+ *
+ * 2. The minimum range granularity is decided by 'scale', so multiple range
+ * TLBI operations may be required. Start from scale = 3, flush the largest
+ * possible number of pages ((num+1)*2^(5*scale+1)) that fit into the
+ * requested range, then decrement scale and continue until one or zero pages
+ * are left. We must start from highest scale to ensure 64KB start alignment
+ * is maintained in the LPA2 case.
+ *
+ * 3. If there is 1 page remaining, flush it through non-range operations. Range
+ * operations can only span an even number of pages. We save this for last to
+ * ensure 64KB start alignment is maintained for the LPA2 case.
+ */
+#define __flush_tlb_range_op(op, start, pages, stride, \
+ asid, tlb_level, tlbi_user, lpa2) \
+do { \
+ typeof(start) __flush_start = start; \
+ typeof(pages) __flush_pages = pages; \
+ int num = 0; \
+ int scale = 3; \
+ int shift = lpa2 ? 16 : PAGE_SHIFT; \
+ unsigned long addr; \
+ \
+ while (__flush_pages > 0) { \
+ if (!system_supports_tlb_range() || \
+ __flush_pages == 1 || \
+ (lpa2 && __flush_start != ALIGN(__flush_start, SZ_64K))) { \
+ addr = __TLBI_VADDR(__flush_start, asid); \
+ __tlbi_level(op, addr, tlb_level); \
+ if (tlbi_user) \
+ __tlbi_user_level(op, addr, tlb_level); \
+ __flush_start += stride; \
+ __flush_pages -= stride >> PAGE_SHIFT; \
+ continue; \
+ } \
+ \
+ num = __TLBI_RANGE_NUM(__flush_pages, scale); \
+ if (num >= 0) { \
+ addr = __TLBI_VADDR_RANGE(__flush_start >> shift, asid, \
+ scale, num, tlb_level); \
+ __tlbi(r##op, addr); \
+ if (tlbi_user) \
+ __tlbi_user(r##op, addr); \
+ __flush_start += __TLBI_RANGE_PAGES(num, scale) << PAGE_SHIFT; \
+ __flush_pages -= __TLBI_RANGE_PAGES(num, scale);\
+ } \
+ scale--; \
+ } \
+} while (0)
+
+#define __flush_s2_tlb_range_op(op, start, pages, stride, tlb_level) \
+ __flush_tlb_range_op(op, start, pages, stride, 0, tlb_level, false, kvm_lpa2_is_enabled());
+
+static inline bool __flush_tlb_range_limit_excess(unsigned long start,
+ unsigned long end, unsigned long pages, unsigned long stride)
+{
+ /*
+ * When the system does not support TLB range based flush
+ * operation, (MAX_DVM_OPS - 1) pages can be handled. But
+ * with TLB range based operation, MAX_TLBI_RANGE_PAGES
+ * pages can be handled.
+ */
+ if ((!system_supports_tlb_range() &&
+ (end - start) >= (MAX_DVM_OPS * stride)) ||
+ pages > MAX_TLBI_RANGE_PAGES)
+ return true;
+
+ return false;
+}
+
+static inline void __flush_tlb_range_nosync(struct mm_struct *mm,
unsigned long start, unsigned long end,
unsigned long stride, bool last_level,
int tlb_level)
{
- int num = 0;
- int scale = 0;
- unsigned long asid, addr, pages;
+ unsigned long asid, pages;
start = round_down(start, stride);
end = round_up(end, stride);
pages = (end - start) >> PAGE_SHIFT;
- /*
- * When not uses TLB range ops, we can handle up to
- * (MAX_TLBI_OPS - 1) pages;
- * When uses TLB range ops, we can handle up to
- * (MAX_TLBI_RANGE_PAGES - 1) pages.
- */
- if ((!system_supports_tlb_range() &&
- (end - start) >= (MAX_TLBI_OPS * stride)) ||
- pages >= MAX_TLBI_RANGE_PAGES) {
- flush_tlb_mm(vma->vm_mm);
+ if (__flush_tlb_range_limit_excess(start, end, pages, stride)) {
+ flush_tlb_mm(mm);
return;
}
dsb(ishst);
- asid = ASID(vma->vm_mm);
+ asid = ASID(mm);
- /*
- * When the CPU does not support TLB range operations, flush the TLB
- * entries one by one at the granularity of 'stride'. If the TLB
- * range ops are supported, then:
- *
- * 1. If 'pages' is odd, flush the first page through non-range
- * operations;
- *
- * 2. For remaining pages: the minimum range granularity is decided
- * by 'scale', so multiple range TLBI operations may be required.
- * Start from scale = 0, flush the corresponding number of pages
- * ((num+1)*2^(5*scale+1) starting from 'addr'), then increase it
- * until no pages left.
- *
- * Note that certain ranges can be represented by either num = 31 and
- * scale or num = 0 and scale + 1. The loop below favours the latter
- * since num is limited to 30 by the __TLBI_RANGE_NUM() macro.
- */
- while (pages > 0) {
- if (!system_supports_tlb_range() ||
- pages % 2 == 1) {
- addr = __TLBI_VADDR(start, asid);
- if (last_level) {
- __tlbi_level(vale1is, addr, tlb_level);
- __tlbi_user_level(vale1is, addr, tlb_level);
- } else {
- __tlbi_level(vae1is, addr, tlb_level);
- __tlbi_user_level(vae1is, addr, tlb_level);
- }
- start += stride;
- pages -= stride >> PAGE_SHIFT;
- continue;
- }
-
- num = __TLBI_RANGE_NUM(pages, scale);
- if (num >= 0) {
- addr = __TLBI_VADDR_RANGE(start, asid, scale,
- num, tlb_level);
- if (last_level) {
- __tlbi(rvale1is, addr);
- __tlbi_user(rvale1is, addr);
- } else {
- __tlbi(rvae1is, addr);
- __tlbi_user(rvae1is, addr);
- }
- start += __TLBI_RANGE_PAGES(num, scale) << PAGE_SHIFT;
- pages -= __TLBI_RANGE_PAGES(num, scale);
- }
- scale++;
- }
+ if (last_level)
+ __flush_tlb_range_op(vale1is, start, pages, stride, asid,
+ tlb_level, true, lpa2_is_enabled());
+ else
+ __flush_tlb_range_op(vae1is, start, pages, stride, asid,
+ tlb_level, true, lpa2_is_enabled());
+
+ mmu_notifier_arch_invalidate_secondary_tlbs(mm, start, end);
+}
+
+static inline void __flush_tlb_range(struct vm_area_struct *vma,
+ unsigned long start, unsigned long end,
+ unsigned long stride, bool last_level,
+ int tlb_level)
+{
+ __flush_tlb_range_nosync(vma->vm_mm, start, end, stride,
+ last_level, tlb_level);
dsb(ish);
}
@@ -366,26 +478,29 @@ static inline void flush_tlb_range(struct vm_area_struct *vma,
/*
* We cannot use leaf-only invalidation here, since we may be invalidating
* table entries as part of collapsing hugepages or moving page tables.
- * Set the tlb_level to 0 because we can not get enough information here.
+ * Set the tlb_level to TLBI_TTL_UNKNOWN because we can not get enough
+ * information here.
*/
- __flush_tlb_range(vma, start, end, PAGE_SIZE, false, 0);
+ __flush_tlb_range(vma, start, end, PAGE_SIZE, false, TLBI_TTL_UNKNOWN);
}
static inline void flush_tlb_kernel_range(unsigned long start, unsigned long end)
{
- unsigned long addr;
+ const unsigned long stride = PAGE_SIZE;
+ unsigned long pages;
- if ((end - start) > (MAX_TLBI_OPS * PAGE_SIZE)) {
+ start = round_down(start, stride);
+ end = round_up(end, stride);
+ pages = (end - start) >> PAGE_SHIFT;
+
+ if (__flush_tlb_range_limit_excess(start, end, pages, stride)) {
flush_tlb_all();
return;
}
- start = __TLBI_VADDR(start, 0);
- end = __TLBI_VADDR(end, 0);
-
dsb(ishst);
- for (addr = start; addr < end; addr += 1 << (PAGE_SHIFT - 12))
- __tlbi(vaale1is, addr);
+ __flush_tlb_range_op(vaale1is, start, pages, stride, 0,
+ TLBI_TTL_UNKNOWN, false, lpa2_is_enabled());
dsb(ish);
isb();
}
@@ -403,6 +518,12 @@ static inline void __flush_tlb_kernel_pgtable(unsigned long kaddr)
dsb(ish);
isb();
}
+
+static inline void arch_tlbbatch_add_pending(struct arch_tlbflush_unmap_batch *batch,
+ struct mm_struct *mm, unsigned long start, unsigned long end)
+{
+ __flush_tlb_range_nosync(mm, start, end, PAGE_SIZE, true, 3);
+}
#endif
#endif
diff --git a/arch/arm64/include/asm/topology.h b/arch/arm64/include/asm/topology.h
index 9fab663dd2de..341174bf9106 100644
--- a/arch/arm64/include/asm/topology.h
+++ b/arch/arm64/include/asm/topology.h
@@ -5,6 +5,7 @@
#include <linux/cpumask.h>
#ifdef CONFIG_NUMA
+#include <asm/numa.h>
struct pci_bus;
int pcibus_to_node(struct pci_bus *bus);
@@ -23,10 +24,7 @@ void update_freq_counters_refs(void);
#define arch_set_freq_scale topology_set_freq_scale
#define arch_scale_freq_capacity topology_get_freq_scale
#define arch_scale_freq_invariant topology_scale_freq_invariant
-
-#ifdef CONFIG_ACPI_CPPC_LIB
-#define arch_init_invariance_cppc topology_init_cpu_capacity_cppc
-#endif
+#define arch_scale_freq_ref topology_get_freq_ref
/* Replace task scheduler's default cpu-invariant accounting */
#define arch_scale_cpu_capacity topology_get_cpu_scale
@@ -34,9 +32,9 @@ void update_freq_counters_refs(void);
/* Enable topology flag updates */
#define arch_update_cpu_topology topology_update_cpu_topology
-/* Replace task scheduler's default thermal pressure API */
-#define arch_scale_thermal_pressure topology_get_thermal_pressure
-#define arch_update_thermal_pressure topology_update_thermal_pressure
+/* Replace task scheduler's default HW pressure API */
+#define arch_scale_hw_pressure topology_get_hw_pressure
+#define arch_update_hw_pressure topology_update_hw_pressure
#include <asm-generic/topology.h>
diff --git a/arch/arm64/include/asm/traps.h b/arch/arm64/include/asm/traps.h
index 6e5826470bea..e3e8944a71c3 100644
--- a/arch/arm64/include/asm/traps.h
+++ b/arch/arm64/include/asm/traps.h
@@ -9,27 +9,34 @@
#include <linux/list.h>
#include <asm/esr.h>
+#include <asm/ptrace.h>
#include <asm/sections.h>
-struct pt_regs;
-
-struct undef_hook {
- struct list_head node;
- u32 instr_mask;
- u32 instr_val;
- u64 pstate_mask;
- u64 pstate_val;
- int (*fn)(struct pt_regs *regs, u32 instr);
-};
+#ifdef CONFIG_ARMV8_DEPRECATED
+bool try_emulate_armv8_deprecated(struct pt_regs *regs, u32 insn);
+#else
+static inline bool
+try_emulate_armv8_deprecated(struct pt_regs *regs, u32 insn)
+{
+ return false;
+}
+#endif /* CONFIG_ARMV8_DEPRECATED */
-void register_undef_hook(struct undef_hook *hook);
-void unregister_undef_hook(struct undef_hook *hook);
void force_signal_inject(int signal, int code, unsigned long address, unsigned long err);
void arm64_notify_segfault(unsigned long addr);
void arm64_force_sig_fault(int signo, int code, unsigned long far, const char *str);
+void arm64_force_sig_fault_pkey(unsigned long far, const char *str, int pkey);
void arm64_force_sig_mceerr(int code, unsigned long far, short lsb, const char *str);
void arm64_force_sig_ptrace_errno_trap(int errno, unsigned long far, const char *str);
+int bug_brk_handler(struct pt_regs *regs, unsigned long esr);
+int cfi_brk_handler(struct pt_regs *regs, unsigned long esr);
+int reserved_fault_brk_handler(struct pt_regs *regs, unsigned long esr);
+int kasan_brk_handler(struct pt_regs *regs, unsigned long esr);
+int ubsan_brk_handler(struct pt_regs *regs, unsigned long esr);
+
+int early_brk64(unsigned long addr, unsigned long esr, struct pt_regs *regs);
+
/*
* Move regs->pc to next instruction and do necessary setup before it
* is executed.
@@ -100,4 +107,55 @@ static inline unsigned long arm64_ras_serror_get_severity(unsigned long esr)
bool arm64_is_fatal_ras_serror(struct pt_regs *regs, unsigned long esr);
void __noreturn arm64_serror_panic(struct pt_regs *regs, unsigned long esr);
+
+static inline void arm64_mops_reset_regs(struct user_pt_regs *regs, unsigned long esr)
+{
+ bool wrong_option = esr & ESR_ELx_MOPS_ISS_WRONG_OPTION;
+ bool option_a = esr & ESR_ELx_MOPS_ISS_OPTION_A;
+ int dstreg = ESR_ELx_MOPS_ISS_DESTREG(esr);
+ int srcreg = ESR_ELx_MOPS_ISS_SRCREG(esr);
+ int sizereg = ESR_ELx_MOPS_ISS_SIZEREG(esr);
+ unsigned long dst, size;
+
+ dst = regs->regs[dstreg];
+ size = regs->regs[sizereg];
+
+ /*
+ * Put the registers back in the original format suitable for a
+ * prologue instruction, using the generic return routine from the
+ * Arm ARM (DDI 0487I.a) rules CNTMJ and MWFQH.
+ */
+ if (esr & ESR_ELx_MOPS_ISS_MEM_INST) {
+ /* SET* instruction */
+ if (option_a ^ wrong_option) {
+ /* Format is from Option A; forward set */
+ regs->regs[dstreg] = dst + size;
+ regs->regs[sizereg] = -size;
+ }
+ } else {
+ /* CPY* instruction */
+ unsigned long src = regs->regs[srcreg];
+ if (!(option_a ^ wrong_option)) {
+ /* Format is from Option B */
+ if (regs->pstate & PSR_N_BIT) {
+ /* Backward copy */
+ regs->regs[dstreg] = dst - size;
+ regs->regs[srcreg] = src - size;
+ }
+ } else {
+ /* Format is from Option A */
+ if (size & BIT(63)) {
+ /* Forward copy */
+ regs->regs[dstreg] = dst + size;
+ regs->regs[srcreg] = src + size;
+ regs->regs[sizereg] = -size;
+ }
+ }
+ }
+
+ if (esr & ESR_ELx_MOPS_ISS_FROM_EPILOGUE)
+ regs->pc -= 8;
+ else
+ regs->pc -= 4;
+}
#endif
diff --git a/arch/arm64/include/asm/uaccess.h b/arch/arm64/include/asm/uaccess.h
index 2fc9f0861769..5b91803201ef 100644
--- a/arch/arm64/include/asm/uaccess.h
+++ b/arch/arm64/include/asm/uaccess.h
@@ -65,7 +65,6 @@ static inline void __uaccess_ttbr0_disable(void)
ttbr &= ~TTBR_ASID_MASK;
/* reserved_pg_dir placed before swapper_pg_dir */
write_sysreg(ttbr - RESERVED_SWAPPER_OFFSET, ttbr0_el1);
- isb();
/* Set reserved ASID */
write_sysreg(ttbr, ttbr1_el1);
isb();
@@ -89,7 +88,6 @@ static inline void __uaccess_ttbr0_enable(void)
ttbr1 &= ~TTBR_ASID_MASK; /* safety measure */
ttbr1 |= ttbr0 & TTBR_ASID_MASK;
write_sysreg(ttbr1, ttbr1_el1);
- isb();
/* Restore user page table */
write_sysreg(ttbr0, ttbr0_el1);
@@ -136,55 +134,9 @@ static inline void __uaccess_enable_hw_pan(void)
CONFIG_ARM64_PAN));
}
-/*
- * The Tag Check Flag (TCF) mode for MTE is per EL, hence TCF0
- * affects EL0 and TCF affects EL1 irrespective of which TTBR is
- * used.
- * The kernel accesses TTBR0 usually with LDTR/STTR instructions
- * when UAO is available, so these would act as EL0 accesses using
- * TCF0.
- * However futex.h code uses exclusives which would be executed as
- * EL1, this can potentially cause a tag check fault even if the
- * user disables TCF0.
- *
- * To address the problem we set the PSTATE.TCO bit in uaccess_enable()
- * and reset it in uaccess_disable().
- *
- * The Tag check override (TCO) bit disables temporarily the tag checking
- * preventing the issue.
- */
-static inline void __uaccess_disable_tco(void)
-{
- asm volatile(ALTERNATIVE("nop", SET_PSTATE_TCO(0),
- ARM64_MTE, CONFIG_KASAN_HW_TAGS));
-}
-
-static inline void __uaccess_enable_tco(void)
-{
- asm volatile(ALTERNATIVE("nop", SET_PSTATE_TCO(1),
- ARM64_MTE, CONFIG_KASAN_HW_TAGS));
-}
-
-/*
- * These functions disable tag checking only if in MTE async mode
- * since the sync mode generates exceptions synchronously and the
- * nofault or load_unaligned_zeropad can handle them.
- */
-static inline void __uaccess_disable_tco_async(void)
-{
- if (system_uses_mte_async_or_asymm_mode())
- __uaccess_disable_tco();
-}
-
-static inline void __uaccess_enable_tco_async(void)
-{
- if (system_uses_mte_async_or_asymm_mode())
- __uaccess_enable_tco();
-}
-
static inline void uaccess_disable_privileged(void)
{
- __uaccess_disable_tco();
+ mte_disable_tco();
if (uaccess_ttbr0_disable())
return;
@@ -194,7 +146,7 @@ static inline void uaccess_disable_privileged(void)
static inline void uaccess_enable_privileged(void)
{
- __uaccess_enable_tco();
+ mte_enable_tco();
if (uaccess_ttbr0_enable())
return;
@@ -203,9 +155,11 @@ static inline void uaccess_enable_privileged(void)
}
/*
- * Sanitise a uaccess pointer such that it becomes NULL if above the maximum
- * user address. In case the pointer is tagged (has the top byte set), untag
- * the pointer before checking.
+ * Sanitize a uaccess pointer such that it cannot reach any kernel address.
+ *
+ * Clearing bit 55 ensures the pointer cannot address any portion of the TTBR1
+ * address range (i.e. any kernel address), and either the pointer falls within
+ * the TTBR0 address range or must cause a fault.
*/
#define uaccess_mask_ptr(ptr) (__typeof__(ptr))__uaccess_mask_ptr(ptr)
static inline void __user *__uaccess_mask_ptr(const void __user *ptr)
@@ -213,14 +167,12 @@ static inline void __user *__uaccess_mask_ptr(const void __user *ptr)
void __user *safe_ptr;
asm volatile(
- " bics xzr, %3, %2\n"
- " csel %0, %1, xzr, eq\n"
- : "=&r" (safe_ptr)
- : "r" (ptr), "r" (TASK_SIZE_MAX - 1),
- "r" (untagged_addr(ptr))
- : "cc");
-
- csdb();
+ " bic %0, %1, %2\n"
+ : "=r" (safe_ptr)
+ : "r" (ptr),
+ "i" (BIT(55))
+ );
+
return safe_ptr;
}
@@ -232,29 +184,40 @@ static inline void __user *__uaccess_mask_ptr(const void __user *ptr)
* The "__xxx_error" versions set the third argument to -EFAULT if an error
* occurs, and leave it unchanged on success.
*/
-#define __get_mem_asm(load, reg, x, addr, err, type) \
+#ifdef CONFIG_CC_HAS_ASM_GOTO_OUTPUT
+#define __get_mem_asm(load, reg, x, addr, label, type) \
+ asm_goto_output( \
+ "1: " load " " reg "0, [%1]\n" \
+ _ASM_EXTABLE_##type##ACCESS(1b, %l2) \
+ : "=r" (x) \
+ : "r" (addr) : : label)
+#else
+#define __get_mem_asm(load, reg, x, addr, label, type) do { \
+ int __gma_err = 0; \
asm volatile( \
"1: " load " " reg "1, [%2]\n" \
"2:\n" \
_ASM_EXTABLE_##type##ACCESS_ERR_ZERO(1b, 2b, %w0, %w1) \
- : "+r" (err), "=&r" (x) \
- : "r" (addr))
+ : "+r" (__gma_err), "=r" (x) \
+ : "r" (addr)); \
+ if (__gma_err) goto label; } while (0)
+#endif
-#define __raw_get_mem(ldr, x, ptr, err, type) \
+#define __raw_get_mem(ldr, x, ptr, label, type) \
do { \
unsigned long __gu_val; \
switch (sizeof(*(ptr))) { \
case 1: \
- __get_mem_asm(ldr "b", "%w", __gu_val, (ptr), (err), type); \
+ __get_mem_asm(ldr "b", "%w", __gu_val, (ptr), label, type); \
break; \
case 2: \
- __get_mem_asm(ldr "h", "%w", __gu_val, (ptr), (err), type); \
+ __get_mem_asm(ldr "h", "%w", __gu_val, (ptr), label, type); \
break; \
case 4: \
- __get_mem_asm(ldr, "%w", __gu_val, (ptr), (err), type); \
+ __get_mem_asm(ldr, "%w", __gu_val, (ptr), label, type); \
break; \
case 8: \
- __get_mem_asm(ldr, "%x", __gu_val, (ptr), (err), type); \
+ __get_mem_asm(ldr, "%x", __gu_val, (ptr), label, type); \
break; \
default: \
BUILD_BUG(); \
@@ -267,27 +230,34 @@ do { \
* uaccess_ttbr0_disable(). As `x` and `ptr` could contain blocking functions,
* we must evaluate these outside of the critical section.
*/
-#define __raw_get_user(x, ptr, err) \
+#define __raw_get_user(x, ptr, label) \
do { \
__typeof__(*(ptr)) __user *__rgu_ptr = (ptr); \
__typeof__(x) __rgu_val; \
__chk_user_ptr(ptr); \
- \
- uaccess_ttbr0_enable(); \
- __raw_get_mem("ldtr", __rgu_val, __rgu_ptr, err, U); \
- uaccess_ttbr0_disable(); \
- \
- (x) = __rgu_val; \
+ do { \
+ __label__ __rgu_failed; \
+ uaccess_ttbr0_enable(); \
+ __raw_get_mem("ldtr", __rgu_val, __rgu_ptr, __rgu_failed, U); \
+ uaccess_ttbr0_disable(); \
+ (x) = __rgu_val; \
+ break; \
+ __rgu_failed: \
+ uaccess_ttbr0_disable(); \
+ goto label; \
+ } while (0); \
} while (0)
#define __get_user_error(x, ptr, err) \
do { \
+ __label__ __gu_failed; \
__typeof__(*(ptr)) __user *__p = (ptr); \
might_fault(); \
if (access_ok(__p, sizeof(*__p))) { \
__p = uaccess_mask_ptr(__p); \
- __raw_get_user((x), __p, (err)); \
+ __raw_get_user((x), __p, __gu_failed); \
} else { \
+ __gu_failed: \
(x) = (__force __typeof__(x))0; (err) = -EFAULT; \
} \
} while (0)
@@ -302,48 +272,50 @@ do { \
#define get_user __get_user
/*
- * We must not call into the scheduler between __uaccess_enable_tco_async() and
- * __uaccess_disable_tco_async(). As `dst` and `src` may contain blocking
+ * We must not call into the scheduler between __mte_enable_tco_async() and
+ * __mte_disable_tco_async(). As `dst` and `src` may contain blocking
* functions, we must evaluate these outside of the critical section.
*/
#define __get_kernel_nofault(dst, src, type, err_label) \
do { \
__typeof__(dst) __gkn_dst = (dst); \
__typeof__(src) __gkn_src = (src); \
- int __gkn_err = 0; \
+ do { \
+ __label__ __gkn_label; \
\
- __uaccess_enable_tco_async(); \
- __raw_get_mem("ldr", *((type *)(__gkn_dst)), \
- (__force type *)(__gkn_src), __gkn_err, K); \
- __uaccess_disable_tco_async(); \
- \
- if (unlikely(__gkn_err)) \
+ __mte_enable_tco_async(); \
+ __raw_get_mem("ldr", *((type *)(__gkn_dst)), \
+ (__force type *)(__gkn_src), __gkn_label, K); \
+ __mte_disable_tco_async(); \
+ break; \
+ __gkn_label: \
+ __mte_disable_tco_async(); \
goto err_label; \
+ } while (0); \
} while (0)
-#define __put_mem_asm(store, reg, x, addr, err, type) \
- asm volatile( \
- "1: " store " " reg "1, [%2]\n" \
+#define __put_mem_asm(store, reg, x, addr, label, type) \
+ asm goto( \
+ "1: " store " " reg "0, [%1]\n" \
"2:\n" \
- _ASM_EXTABLE_##type##ACCESS_ERR(1b, 2b, %w0) \
- : "+r" (err) \
- : "r" (x), "r" (addr))
+ _ASM_EXTABLE_##type##ACCESS(1b, %l2) \
+ : : "rZ" (x), "r" (addr) : : label)
-#define __raw_put_mem(str, x, ptr, err, type) \
+#define __raw_put_mem(str, x, ptr, label, type) \
do { \
__typeof__(*(ptr)) __pu_val = (x); \
switch (sizeof(*(ptr))) { \
case 1: \
- __put_mem_asm(str "b", "%w", __pu_val, (ptr), (err), type); \
+ __put_mem_asm(str "b", "%w", __pu_val, (ptr), label, type); \
break; \
case 2: \
- __put_mem_asm(str "h", "%w", __pu_val, (ptr), (err), type); \
+ __put_mem_asm(str "h", "%w", __pu_val, (ptr), label, type); \
break; \
case 4: \
- __put_mem_asm(str, "%w", __pu_val, (ptr), (err), type); \
+ __put_mem_asm(str, "%w", __pu_val, (ptr), label, type); \
break; \
case 8: \
- __put_mem_asm(str, "%x", __pu_val, (ptr), (err), type); \
+ __put_mem_asm(str, "%x", __pu_val, (ptr), label, type); \
break; \
default: \
BUILD_BUG(); \
@@ -355,25 +327,34 @@ do { \
* uaccess_ttbr0_disable(). As `x` and `ptr` could contain blocking functions,
* we must evaluate these outside of the critical section.
*/
-#define __raw_put_user(x, ptr, err) \
+#define __raw_put_user(x, ptr, label) \
do { \
+ __label__ __rpu_failed; \
__typeof__(*(ptr)) __user *__rpu_ptr = (ptr); \
__typeof__(*(ptr)) __rpu_val = (x); \
__chk_user_ptr(__rpu_ptr); \
\
- uaccess_ttbr0_enable(); \
- __raw_put_mem("sttr", __rpu_val, __rpu_ptr, err, U); \
- uaccess_ttbr0_disable(); \
+ do { \
+ uaccess_ttbr0_enable(); \
+ __raw_put_mem("sttr", __rpu_val, __rpu_ptr, __rpu_failed, U); \
+ uaccess_ttbr0_disable(); \
+ break; \
+ __rpu_failed: \
+ uaccess_ttbr0_disable(); \
+ goto label; \
+ } while (0); \
} while (0)
#define __put_user_error(x, ptr, err) \
do { \
+ __label__ __pu_failed; \
__typeof__(*(ptr)) __user *__p = (ptr); \
might_fault(); \
if (access_ok(__p, sizeof(*__p))) { \
__p = uaccess_mask_ptr(__p); \
- __raw_put_user((x), __p, (err)); \
+ __raw_put_user((x), __p, __pu_failed); \
} else { \
+ __pu_failed: \
(err) = -EFAULT; \
} \
} while (0)
@@ -388,23 +369,26 @@ do { \
#define put_user __put_user
/*
- * We must not call into the scheduler between __uaccess_enable_tco_async() and
- * __uaccess_disable_tco_async(). As `dst` and `src` may contain blocking
+ * We must not call into the scheduler between __mte_enable_tco_async() and
+ * __mte_disable_tco_async(). As `dst` and `src` may contain blocking
* functions, we must evaluate these outside of the critical section.
*/
#define __put_kernel_nofault(dst, src, type, err_label) \
do { \
__typeof__(dst) __pkn_dst = (dst); \
__typeof__(src) __pkn_src = (src); \
- int __pkn_err = 0; \
- \
- __uaccess_enable_tco_async(); \
- __raw_put_mem("str", *((type *)(__pkn_src)), \
- (__force type *)(__pkn_dst), __pkn_err, K); \
- __uaccess_disable_tco_async(); \
\
- if (unlikely(__pkn_err)) \
+ do { \
+ __label__ __pkn_err; \
+ __mte_enable_tco_async(); \
+ __raw_put_mem("str", *((type *)(__pkn_src)), \
+ (__force type *)(__pkn_dst), __pkn_err, K); \
+ __mte_disable_tco_async(); \
+ break; \
+ __pkn_err: \
+ __mte_disable_tco_async(); \
goto err_label; \
+ } while (0); \
} while(0)
extern unsigned long __must_check __arch_copy_from_user(void *to, const void __user *from, unsigned long n);
@@ -429,6 +413,51 @@ extern unsigned long __must_check __arch_copy_to_user(void __user *to, const voi
__actu_ret; \
})
+static __must_check __always_inline bool user_access_begin(const void __user *ptr, size_t len)
+{
+ if (unlikely(!access_ok(ptr,len)))
+ return 0;
+ uaccess_ttbr0_enable();
+ return 1;
+}
+#define user_access_begin(a,b) user_access_begin(a,b)
+#define user_access_end() uaccess_ttbr0_disable()
+#define unsafe_put_user(x, ptr, label) \
+ __raw_put_mem("sttr", x, uaccess_mask_ptr(ptr), label, U)
+#define unsafe_get_user(x, ptr, label) \
+ __raw_get_mem("ldtr", x, uaccess_mask_ptr(ptr), label, U)
+
+/*
+ * KCSAN uses these to save and restore ttbr state.
+ * We do not support KCSAN with ARM64_SW_TTBR0_PAN, so
+ * they are no-ops.
+ */
+static inline unsigned long user_access_save(void) { return 0; }
+static inline void user_access_restore(unsigned long enabled) { }
+
+/*
+ * We want the unsafe accessors to always be inlined and use
+ * the error labels - thus the macro games.
+ */
+#define unsafe_copy_loop(dst, src, len, type, label) \
+ while (len >= sizeof(type)) { \
+ unsafe_put_user(*(type *)(src),(type __user *)(dst),label); \
+ dst += sizeof(type); \
+ src += sizeof(type); \
+ len -= sizeof(type); \
+ }
+
+#define unsafe_copy_to_user(_dst,_src,_len,label) \
+do { \
+ char __user *__ucu_dst = (_dst); \
+ const char *__ucu_src = (_src); \
+ size_t __ucu_len = (_len); \
+ unsafe_copy_loop(__ucu_dst, __ucu_src, __ucu_len, u64, label); \
+ unsafe_copy_loop(__ucu_dst, __ucu_src, __ucu_len, u32, label); \
+ unsafe_copy_loop(__ucu_dst, __ucu_src, __ucu_len, u16, label); \
+ unsafe_copy_loop(__ucu_dst, __ucu_src, __ucu_len, u8, label); \
+} while (0)
+
#define INLINE_COPY_TO_USER
#define INLINE_COPY_FROM_USER
@@ -449,8 +478,6 @@ extern long strncpy_from_user(char *dest, const char __user *src, long count);
extern __must_check long strnlen_user(const char __user *str, long n);
#ifdef CONFIG_ARCH_HAS_UACCESS_FLUSHCACHE
-struct page;
-void memcpy_page_flushcache(char *to, struct page *page, size_t offset, size_t len);
extern unsigned long __must_check __copy_user_flushcache(void *to, const void __user *from, unsigned long n);
static inline int __copy_from_user_flushcache(void *dst, const void __user *src, unsigned size)
@@ -475,4 +502,44 @@ static inline size_t probe_subpage_writeable(const char __user *uaddr,
#endif /* CONFIG_ARCH_HAS_SUBPAGE_FAULTS */
+#ifdef CONFIG_ARM64_GCS
+
+static inline int gcssttr(unsigned long __user *addr, unsigned long val)
+{
+ register unsigned long __user *_addr __asm__ ("x0") = addr;
+ register unsigned long _val __asm__ ("x1") = val;
+ int err = 0;
+
+ /* GCSSTTR x1, x0 */
+ asm volatile(
+ "1: .inst 0xd91f1c01\n"
+ "2: \n"
+ _ASM_EXTABLE_UACCESS_ERR(1b, 2b, %w0)
+ : "+r" (err)
+ : "rZ" (_val), "r" (_addr)
+ : "memory");
+
+ return err;
+}
+
+static inline void put_user_gcs(unsigned long val, unsigned long __user *addr,
+ int *err)
+{
+ int ret;
+
+ if (!access_ok((char __user *)addr, sizeof(u64))) {
+ *err = -EFAULT;
+ return;
+ }
+
+ uaccess_ttbr0_enable();
+ ret = gcssttr(addr, val);
+ if (ret != 0)
+ *err = ret;
+ uaccess_ttbr0_disable();
+}
+
+
+#endif /* CONFIG_ARM64_GCS */
+
#endif /* __ASM_UACCESS_H */
diff --git a/arch/arm64/include/asm/unistd.h b/arch/arm64/include/asm/unistd.h
index 037feba03a51..80618c9bbcd8 100644
--- a/arch/arm64/include/asm/unistd.h
+++ b/arch/arm64/include/asm/unistd.h
@@ -17,35 +17,17 @@
#define __ARCH_WANT_SYS_VFORK
/*
- * Compat syscall numbers used by the AArch64 kernel.
- */
-#define __NR_compat_restart_syscall 0
-#define __NR_compat_exit 1
-#define __NR_compat_read 3
-#define __NR_compat_write 4
-#define __NR_compat_gettimeofday 78
-#define __NR_compat_sigreturn 119
-#define __NR_compat_rt_sigreturn 173
-#define __NR_compat_clock_gettime 263
-#define __NR_compat_clock_getres 264
-#define __NR_compat_clock_gettime64 403
-#define __NR_compat_clock_getres_time64 406
-
-/*
* The following SVCs are ARM private.
*/
#define __ARM_NR_COMPAT_BASE 0x0f0000
#define __ARM_NR_compat_cacheflush (__ARM_NR_COMPAT_BASE + 2)
#define __ARM_NR_compat_set_tls (__ARM_NR_COMPAT_BASE + 5)
#define __ARM_NR_COMPAT_END (__ARM_NR_COMPAT_BASE + 0x800)
-
-#define __NR_compat_syscalls 451
#endif
#define __ARCH_WANT_SYS_CLONE
+#define __ARCH_WANT_NEW_STAT
-#ifndef __COMPAT_SYSCALL_NR
-#include <uapi/asm/unistd.h>
-#endif
+#include <asm/unistd_64.h>
#define NR_syscalls (__NR_syscalls)
diff --git a/arch/arm64/include/asm/unistd32.h b/arch/arm64/include/asm/unistd32.h
index 604a2053d006..e0b1a0b57f75 100644
--- a/arch/arm64/include/asm/unistd32.h
+++ b/arch/arm64/include/asm/unistd32.h
@@ -1,914 +1,9 @@
/* SPDX-License-Identifier: GPL-2.0-only */
-/*
- * AArch32 (compat) system call definitions.
- *
- * Copyright (C) 2001-2005 Russell King
- * Copyright (C) 2012 ARM Ltd.
- */
+#ifndef _UAPI__ASM_ARM_UNISTD_H
+#define _UAPI__ASM_ARM_UNISTD_H
-#ifndef __SYSCALL
-#define __SYSCALL(x, y)
-#endif
+#include <asm/unistd_32.h>
-#define __NR_restart_syscall 0
-__SYSCALL(__NR_restart_syscall, sys_restart_syscall)
-#define __NR_exit 1
-__SYSCALL(__NR_exit, sys_exit)
-#define __NR_fork 2
-__SYSCALL(__NR_fork, sys_fork)
-#define __NR_read 3
-__SYSCALL(__NR_read, sys_read)
-#define __NR_write 4
-__SYSCALL(__NR_write, sys_write)
-#define __NR_open 5
-__SYSCALL(__NR_open, compat_sys_open)
-#define __NR_close 6
-__SYSCALL(__NR_close, sys_close)
- /* 7 was sys_waitpid */
-__SYSCALL(7, sys_ni_syscall)
-#define __NR_creat 8
-__SYSCALL(__NR_creat, sys_creat)
-#define __NR_link 9
-__SYSCALL(__NR_link, sys_link)
-#define __NR_unlink 10
-__SYSCALL(__NR_unlink, sys_unlink)
-#define __NR_execve 11
-__SYSCALL(__NR_execve, compat_sys_execve)
-#define __NR_chdir 12
-__SYSCALL(__NR_chdir, sys_chdir)
- /* 13 was sys_time */
-__SYSCALL(13, sys_ni_syscall)
-#define __NR_mknod 14
-__SYSCALL(__NR_mknod, sys_mknod)
-#define __NR_chmod 15
-__SYSCALL(__NR_chmod, sys_chmod)
-#define __NR_lchown 16
-__SYSCALL(__NR_lchown, sys_lchown16)
- /* 17 was sys_break */
-__SYSCALL(17, sys_ni_syscall)
- /* 18 was sys_stat */
-__SYSCALL(18, sys_ni_syscall)
-#define __NR_lseek 19
-__SYSCALL(__NR_lseek, compat_sys_lseek)
-#define __NR_getpid 20
-__SYSCALL(__NR_getpid, sys_getpid)
-#define __NR_mount 21
-__SYSCALL(__NR_mount, sys_mount)
- /* 22 was sys_umount */
-__SYSCALL(22, sys_ni_syscall)
-#define __NR_setuid 23
-__SYSCALL(__NR_setuid, sys_setuid16)
-#define __NR_getuid 24
-__SYSCALL(__NR_getuid, sys_getuid16)
- /* 25 was sys_stime */
-__SYSCALL(25, sys_ni_syscall)
-#define __NR_ptrace 26
-__SYSCALL(__NR_ptrace, compat_sys_ptrace)
- /* 27 was sys_alarm */
-__SYSCALL(27, sys_ni_syscall)
- /* 28 was sys_fstat */
-__SYSCALL(28, sys_ni_syscall)
-#define __NR_pause 29
-__SYSCALL(__NR_pause, sys_pause)
- /* 30 was sys_utime */
-__SYSCALL(30, sys_ni_syscall)
- /* 31 was sys_stty */
-__SYSCALL(31, sys_ni_syscall)
- /* 32 was sys_gtty */
-__SYSCALL(32, sys_ni_syscall)
-#define __NR_access 33
-__SYSCALL(__NR_access, sys_access)
-#define __NR_nice 34
-__SYSCALL(__NR_nice, sys_nice)
- /* 35 was sys_ftime */
-__SYSCALL(35, sys_ni_syscall)
-#define __NR_sync 36
-__SYSCALL(__NR_sync, sys_sync)
-#define __NR_kill 37
-__SYSCALL(__NR_kill, sys_kill)
-#define __NR_rename 38
-__SYSCALL(__NR_rename, sys_rename)
-#define __NR_mkdir 39
-__SYSCALL(__NR_mkdir, sys_mkdir)
-#define __NR_rmdir 40
-__SYSCALL(__NR_rmdir, sys_rmdir)
-#define __NR_dup 41
-__SYSCALL(__NR_dup, sys_dup)
-#define __NR_pipe 42
-__SYSCALL(__NR_pipe, sys_pipe)
-#define __NR_times 43
-__SYSCALL(__NR_times, compat_sys_times)
- /* 44 was sys_prof */
-__SYSCALL(44, sys_ni_syscall)
-#define __NR_brk 45
-__SYSCALL(__NR_brk, sys_brk)
-#define __NR_setgid 46
-__SYSCALL(__NR_setgid, sys_setgid16)
-#define __NR_getgid 47
-__SYSCALL(__NR_getgid, sys_getgid16)
- /* 48 was sys_signal */
-__SYSCALL(48, sys_ni_syscall)
-#define __NR_geteuid 49
-__SYSCALL(__NR_geteuid, sys_geteuid16)
-#define __NR_getegid 50
-__SYSCALL(__NR_getegid, sys_getegid16)
-#define __NR_acct 51
-__SYSCALL(__NR_acct, sys_acct)
-#define __NR_umount2 52
-__SYSCALL(__NR_umount2, sys_umount)
- /* 53 was sys_lock */
-__SYSCALL(53, sys_ni_syscall)
-#define __NR_ioctl 54
-__SYSCALL(__NR_ioctl, compat_sys_ioctl)
-#define __NR_fcntl 55
-__SYSCALL(__NR_fcntl, compat_sys_fcntl)
- /* 56 was sys_mpx */
-__SYSCALL(56, sys_ni_syscall)
-#define __NR_setpgid 57
-__SYSCALL(__NR_setpgid, sys_setpgid)
- /* 58 was sys_ulimit */
-__SYSCALL(58, sys_ni_syscall)
- /* 59 was sys_olduname */
-__SYSCALL(59, sys_ni_syscall)
-#define __NR_umask 60
-__SYSCALL(__NR_umask, sys_umask)
-#define __NR_chroot 61
-__SYSCALL(__NR_chroot, sys_chroot)
-#define __NR_ustat 62
-__SYSCALL(__NR_ustat, compat_sys_ustat)
-#define __NR_dup2 63
-__SYSCALL(__NR_dup2, sys_dup2)
-#define __NR_getppid 64
-__SYSCALL(__NR_getppid, sys_getppid)
-#define __NR_getpgrp 65
-__SYSCALL(__NR_getpgrp, sys_getpgrp)
-#define __NR_setsid 66
-__SYSCALL(__NR_setsid, sys_setsid)
-#define __NR_sigaction 67
-__SYSCALL(__NR_sigaction, compat_sys_sigaction)
- /* 68 was sys_sgetmask */
-__SYSCALL(68, sys_ni_syscall)
- /* 69 was sys_ssetmask */
-__SYSCALL(69, sys_ni_syscall)
-#define __NR_setreuid 70
-__SYSCALL(__NR_setreuid, sys_setreuid16)
-#define __NR_setregid 71
-__SYSCALL(__NR_setregid, sys_setregid16)
-#define __NR_sigsuspend 72
-__SYSCALL(__NR_sigsuspend, sys_sigsuspend)
-#define __NR_sigpending 73
-__SYSCALL(__NR_sigpending, compat_sys_sigpending)
-#define __NR_sethostname 74
-__SYSCALL(__NR_sethostname, sys_sethostname)
-#define __NR_setrlimit 75
-__SYSCALL(__NR_setrlimit, compat_sys_setrlimit)
- /* 76 was compat_sys_getrlimit */
-__SYSCALL(76, sys_ni_syscall)
-#define __NR_getrusage 77
-__SYSCALL(__NR_getrusage, compat_sys_getrusage)
-#define __NR_gettimeofday 78
-__SYSCALL(__NR_gettimeofday, compat_sys_gettimeofday)
-#define __NR_settimeofday 79
-__SYSCALL(__NR_settimeofday, compat_sys_settimeofday)
-#define __NR_getgroups 80
-__SYSCALL(__NR_getgroups, sys_getgroups16)
-#define __NR_setgroups 81
-__SYSCALL(__NR_setgroups, sys_setgroups16)
- /* 82 was compat_sys_select */
-__SYSCALL(82, sys_ni_syscall)
-#define __NR_symlink 83
-__SYSCALL(__NR_symlink, sys_symlink)
- /* 84 was sys_lstat */
-__SYSCALL(84, sys_ni_syscall)
-#define __NR_readlink 85
-__SYSCALL(__NR_readlink, sys_readlink)
-#define __NR_uselib 86
-__SYSCALL(__NR_uselib, sys_uselib)
-#define __NR_swapon 87
-__SYSCALL(__NR_swapon, sys_swapon)
-#define __NR_reboot 88
-__SYSCALL(__NR_reboot, sys_reboot)
- /* 89 was sys_readdir */
-__SYSCALL(89, sys_ni_syscall)
- /* 90 was sys_mmap */
-__SYSCALL(90, sys_ni_syscall)
-#define __NR_munmap 91
-__SYSCALL(__NR_munmap, sys_munmap)
-#define __NR_truncate 92
-__SYSCALL(__NR_truncate, compat_sys_truncate)
-#define __NR_ftruncate 93
-__SYSCALL(__NR_ftruncate, compat_sys_ftruncate)
-#define __NR_fchmod 94
-__SYSCALL(__NR_fchmod, sys_fchmod)
-#define __NR_fchown 95
-__SYSCALL(__NR_fchown, sys_fchown16)
-#define __NR_getpriority 96
-__SYSCALL(__NR_getpriority, sys_getpriority)
-#define __NR_setpriority 97
-__SYSCALL(__NR_setpriority, sys_setpriority)
- /* 98 was sys_profil */
-__SYSCALL(98, sys_ni_syscall)
-#define __NR_statfs 99
-__SYSCALL(__NR_statfs, compat_sys_statfs)
-#define __NR_fstatfs 100
-__SYSCALL(__NR_fstatfs, compat_sys_fstatfs)
- /* 101 was sys_ioperm */
-__SYSCALL(101, sys_ni_syscall)
- /* 102 was sys_socketcall */
-__SYSCALL(102, sys_ni_syscall)
-#define __NR_syslog 103
-__SYSCALL(__NR_syslog, sys_syslog)
-#define __NR_setitimer 104
-__SYSCALL(__NR_setitimer, compat_sys_setitimer)
-#define __NR_getitimer 105
-__SYSCALL(__NR_getitimer, compat_sys_getitimer)
-#define __NR_stat 106
-__SYSCALL(__NR_stat, compat_sys_newstat)
-#define __NR_lstat 107
-__SYSCALL(__NR_lstat, compat_sys_newlstat)
-#define __NR_fstat 108
-__SYSCALL(__NR_fstat, compat_sys_newfstat)
- /* 109 was sys_uname */
-__SYSCALL(109, sys_ni_syscall)
- /* 110 was sys_iopl */
-__SYSCALL(110, sys_ni_syscall)
-#define __NR_vhangup 111
-__SYSCALL(__NR_vhangup, sys_vhangup)
- /* 112 was sys_idle */
-__SYSCALL(112, sys_ni_syscall)
- /* 113 was sys_syscall */
-__SYSCALL(113, sys_ni_syscall)
-#define __NR_wait4 114
-__SYSCALL(__NR_wait4, compat_sys_wait4)
-#define __NR_swapoff 115
-__SYSCALL(__NR_swapoff, sys_swapoff)
-#define __NR_sysinfo 116
-__SYSCALL(__NR_sysinfo, compat_sys_sysinfo)
- /* 117 was sys_ipc */
-__SYSCALL(117, sys_ni_syscall)
-#define __NR_fsync 118
-__SYSCALL(__NR_fsync, sys_fsync)
-#define __NR_sigreturn 119
-__SYSCALL(__NR_sigreturn, compat_sys_sigreturn)
-#define __NR_clone 120
-__SYSCALL(__NR_clone, sys_clone)
-#define __NR_setdomainname 121
-__SYSCALL(__NR_setdomainname, sys_setdomainname)
-#define __NR_uname 122
-__SYSCALL(__NR_uname, sys_newuname)
- /* 123 was sys_modify_ldt */
-__SYSCALL(123, sys_ni_syscall)
-#define __NR_adjtimex 124
-__SYSCALL(__NR_adjtimex, sys_adjtimex_time32)
-#define __NR_mprotect 125
-__SYSCALL(__NR_mprotect, sys_mprotect)
-#define __NR_sigprocmask 126
-__SYSCALL(__NR_sigprocmask, compat_sys_sigprocmask)
- /* 127 was sys_create_module */
-__SYSCALL(127, sys_ni_syscall)
-#define __NR_init_module 128
-__SYSCALL(__NR_init_module, sys_init_module)
-#define __NR_delete_module 129
-__SYSCALL(__NR_delete_module, sys_delete_module)
- /* 130 was sys_get_kernel_syms */
-__SYSCALL(130, sys_ni_syscall)
-#define __NR_quotactl 131
-__SYSCALL(__NR_quotactl, sys_quotactl)
-#define __NR_getpgid 132
-__SYSCALL(__NR_getpgid, sys_getpgid)
-#define __NR_fchdir 133
-__SYSCALL(__NR_fchdir, sys_fchdir)
-#define __NR_bdflush 134
-__SYSCALL(__NR_bdflush, sys_ni_syscall)
-#define __NR_sysfs 135
-__SYSCALL(__NR_sysfs, sys_sysfs)
-#define __NR_personality 136
-__SYSCALL(__NR_personality, sys_personality)
- /* 137 was sys_afs_syscall */
-__SYSCALL(137, sys_ni_syscall)
-#define __NR_setfsuid 138
-__SYSCALL(__NR_setfsuid, sys_setfsuid16)
-#define __NR_setfsgid 139
-__SYSCALL(__NR_setfsgid, sys_setfsgid16)
-#define __NR__llseek 140
-__SYSCALL(__NR__llseek, sys_llseek)
-#define __NR_getdents 141
-__SYSCALL(__NR_getdents, compat_sys_getdents)
-#define __NR__newselect 142
-__SYSCALL(__NR__newselect, compat_sys_select)
-#define __NR_flock 143
-__SYSCALL(__NR_flock, sys_flock)
-#define __NR_msync 144
-__SYSCALL(__NR_msync, sys_msync)
-#define __NR_readv 145
-__SYSCALL(__NR_readv, sys_readv)
-#define __NR_writev 146
-__SYSCALL(__NR_writev, sys_writev)
-#define __NR_getsid 147
-__SYSCALL(__NR_getsid, sys_getsid)
-#define __NR_fdatasync 148
-__SYSCALL(__NR_fdatasync, sys_fdatasync)
- /* 149 was sys_sysctl */
-__SYSCALL(149, sys_ni_syscall)
-#define __NR_mlock 150
-__SYSCALL(__NR_mlock, sys_mlock)
-#define __NR_munlock 151
-__SYSCALL(__NR_munlock, sys_munlock)
-#define __NR_mlockall 152
-__SYSCALL(__NR_mlockall, sys_mlockall)
-#define __NR_munlockall 153
-__SYSCALL(__NR_munlockall, sys_munlockall)
-#define __NR_sched_setparam 154
-__SYSCALL(__NR_sched_setparam, sys_sched_setparam)
-#define __NR_sched_getparam 155
-__SYSCALL(__NR_sched_getparam, sys_sched_getparam)
-#define __NR_sched_setscheduler 156
-__SYSCALL(__NR_sched_setscheduler, sys_sched_setscheduler)
-#define __NR_sched_getscheduler 157
-__SYSCALL(__NR_sched_getscheduler, sys_sched_getscheduler)
-#define __NR_sched_yield 158
-__SYSCALL(__NR_sched_yield, sys_sched_yield)
-#define __NR_sched_get_priority_max 159
-__SYSCALL(__NR_sched_get_priority_max, sys_sched_get_priority_max)
-#define __NR_sched_get_priority_min 160
-__SYSCALL(__NR_sched_get_priority_min, sys_sched_get_priority_min)
-#define __NR_sched_rr_get_interval 161
-__SYSCALL(__NR_sched_rr_get_interval, sys_sched_rr_get_interval_time32)
-#define __NR_nanosleep 162
-__SYSCALL(__NR_nanosleep, sys_nanosleep_time32)
-#define __NR_mremap 163
-__SYSCALL(__NR_mremap, sys_mremap)
-#define __NR_setresuid 164
-__SYSCALL(__NR_setresuid, sys_setresuid16)
-#define __NR_getresuid 165
-__SYSCALL(__NR_getresuid, sys_getresuid16)
- /* 166 was sys_vm86 */
-__SYSCALL(166, sys_ni_syscall)
- /* 167 was sys_query_module */
-__SYSCALL(167, sys_ni_syscall)
-#define __NR_poll 168
-__SYSCALL(__NR_poll, sys_poll)
-#define __NR_nfsservctl 169
-__SYSCALL(__NR_nfsservctl, sys_ni_syscall)
-#define __NR_setresgid 170
-__SYSCALL(__NR_setresgid, sys_setresgid16)
-#define __NR_getresgid 171
-__SYSCALL(__NR_getresgid, sys_getresgid16)
-#define __NR_prctl 172
-__SYSCALL(__NR_prctl, sys_prctl)
-#define __NR_rt_sigreturn 173
-__SYSCALL(__NR_rt_sigreturn, compat_sys_rt_sigreturn)
-#define __NR_rt_sigaction 174
-__SYSCALL(__NR_rt_sigaction, compat_sys_rt_sigaction)
-#define __NR_rt_sigprocmask 175
-__SYSCALL(__NR_rt_sigprocmask, compat_sys_rt_sigprocmask)
-#define __NR_rt_sigpending 176
-__SYSCALL(__NR_rt_sigpending, compat_sys_rt_sigpending)
-#define __NR_rt_sigtimedwait 177
-__SYSCALL(__NR_rt_sigtimedwait, compat_sys_rt_sigtimedwait_time32)
-#define __NR_rt_sigqueueinfo 178
-__SYSCALL(__NR_rt_sigqueueinfo, compat_sys_rt_sigqueueinfo)
-#define __NR_rt_sigsuspend 179
-__SYSCALL(__NR_rt_sigsuspend, compat_sys_rt_sigsuspend)
-#define __NR_pread64 180
-__SYSCALL(__NR_pread64, compat_sys_aarch32_pread64)
-#define __NR_pwrite64 181
-__SYSCALL(__NR_pwrite64, compat_sys_aarch32_pwrite64)
-#define __NR_chown 182
-__SYSCALL(__NR_chown, sys_chown16)
-#define __NR_getcwd 183
-__SYSCALL(__NR_getcwd, sys_getcwd)
-#define __NR_capget 184
-__SYSCALL(__NR_capget, sys_capget)
-#define __NR_capset 185
-__SYSCALL(__NR_capset, sys_capset)
-#define __NR_sigaltstack 186
-__SYSCALL(__NR_sigaltstack, compat_sys_sigaltstack)
-#define __NR_sendfile 187
-__SYSCALL(__NR_sendfile, compat_sys_sendfile)
- /* 188 reserved */
-__SYSCALL(188, sys_ni_syscall)
- /* 189 reserved */
-__SYSCALL(189, sys_ni_syscall)
-#define __NR_vfork 190
-__SYSCALL(__NR_vfork, sys_vfork)
-#define __NR_ugetrlimit 191 /* SuS compliant getrlimit */
-__SYSCALL(__NR_ugetrlimit, compat_sys_getrlimit) /* SuS compliant getrlimit */
-#define __NR_mmap2 192
-__SYSCALL(__NR_mmap2, compat_sys_aarch32_mmap2)
-#define __NR_truncate64 193
-__SYSCALL(__NR_truncate64, compat_sys_aarch32_truncate64)
-#define __NR_ftruncate64 194
-__SYSCALL(__NR_ftruncate64, compat_sys_aarch32_ftruncate64)
-#define __NR_stat64 195
-__SYSCALL(__NR_stat64, sys_stat64)
-#define __NR_lstat64 196
-__SYSCALL(__NR_lstat64, sys_lstat64)
-#define __NR_fstat64 197
-__SYSCALL(__NR_fstat64, sys_fstat64)
-#define __NR_lchown32 198
-__SYSCALL(__NR_lchown32, sys_lchown)
-#define __NR_getuid32 199
-__SYSCALL(__NR_getuid32, sys_getuid)
-#define __NR_getgid32 200
-__SYSCALL(__NR_getgid32, sys_getgid)
-#define __NR_geteuid32 201
-__SYSCALL(__NR_geteuid32, sys_geteuid)
-#define __NR_getegid32 202
-__SYSCALL(__NR_getegid32, sys_getegid)
-#define __NR_setreuid32 203
-__SYSCALL(__NR_setreuid32, sys_setreuid)
-#define __NR_setregid32 204
-__SYSCALL(__NR_setregid32, sys_setregid)
-#define __NR_getgroups32 205
-__SYSCALL(__NR_getgroups32, sys_getgroups)
-#define __NR_setgroups32 206
-__SYSCALL(__NR_setgroups32, sys_setgroups)
-#define __NR_fchown32 207
-__SYSCALL(__NR_fchown32, sys_fchown)
-#define __NR_setresuid32 208
-__SYSCALL(__NR_setresuid32, sys_setresuid)
-#define __NR_getresuid32 209
-__SYSCALL(__NR_getresuid32, sys_getresuid)
-#define __NR_setresgid32 210
-__SYSCALL(__NR_setresgid32, sys_setresgid)
-#define __NR_getresgid32 211
-__SYSCALL(__NR_getresgid32, sys_getresgid)
-#define __NR_chown32 212
-__SYSCALL(__NR_chown32, sys_chown)
-#define __NR_setuid32 213
-__SYSCALL(__NR_setuid32, sys_setuid)
-#define __NR_setgid32 214
-__SYSCALL(__NR_setgid32, sys_setgid)
-#define __NR_setfsuid32 215
-__SYSCALL(__NR_setfsuid32, sys_setfsuid)
-#define __NR_setfsgid32 216
-__SYSCALL(__NR_setfsgid32, sys_setfsgid)
-#define __NR_getdents64 217
-__SYSCALL(__NR_getdents64, sys_getdents64)
-#define __NR_pivot_root 218
-__SYSCALL(__NR_pivot_root, sys_pivot_root)
-#define __NR_mincore 219
-__SYSCALL(__NR_mincore, sys_mincore)
-#define __NR_madvise 220
-__SYSCALL(__NR_madvise, sys_madvise)
-#define __NR_fcntl64 221
-__SYSCALL(__NR_fcntl64, compat_sys_fcntl64)
- /* 222 for tux */
-__SYSCALL(222, sys_ni_syscall)
- /* 223 is unused */
-__SYSCALL(223, sys_ni_syscall)
-#define __NR_gettid 224
-__SYSCALL(__NR_gettid, sys_gettid)
-#define __NR_readahead 225
-__SYSCALL(__NR_readahead, compat_sys_aarch32_readahead)
-#define __NR_setxattr 226
-__SYSCALL(__NR_setxattr, sys_setxattr)
-#define __NR_lsetxattr 227
-__SYSCALL(__NR_lsetxattr, sys_lsetxattr)
-#define __NR_fsetxattr 228
-__SYSCALL(__NR_fsetxattr, sys_fsetxattr)
-#define __NR_getxattr 229
-__SYSCALL(__NR_getxattr, sys_getxattr)
-#define __NR_lgetxattr 230
-__SYSCALL(__NR_lgetxattr, sys_lgetxattr)
-#define __NR_fgetxattr 231
-__SYSCALL(__NR_fgetxattr, sys_fgetxattr)
-#define __NR_listxattr 232
-__SYSCALL(__NR_listxattr, sys_listxattr)
-#define __NR_llistxattr 233
-__SYSCALL(__NR_llistxattr, sys_llistxattr)
-#define __NR_flistxattr 234
-__SYSCALL(__NR_flistxattr, sys_flistxattr)
-#define __NR_removexattr 235
-__SYSCALL(__NR_removexattr, sys_removexattr)
-#define __NR_lremovexattr 236
-__SYSCALL(__NR_lremovexattr, sys_lremovexattr)
-#define __NR_fremovexattr 237
-__SYSCALL(__NR_fremovexattr, sys_fremovexattr)
-#define __NR_tkill 238
-__SYSCALL(__NR_tkill, sys_tkill)
-#define __NR_sendfile64 239
-__SYSCALL(__NR_sendfile64, sys_sendfile64)
-#define __NR_futex 240
-__SYSCALL(__NR_futex, sys_futex_time32)
-#define __NR_sched_setaffinity 241
-__SYSCALL(__NR_sched_setaffinity, compat_sys_sched_setaffinity)
-#define __NR_sched_getaffinity 242
-__SYSCALL(__NR_sched_getaffinity, compat_sys_sched_getaffinity)
-#define __NR_io_setup 243
-__SYSCALL(__NR_io_setup, compat_sys_io_setup)
-#define __NR_io_destroy 244
-__SYSCALL(__NR_io_destroy, sys_io_destroy)
-#define __NR_io_getevents 245
-__SYSCALL(__NR_io_getevents, sys_io_getevents_time32)
-#define __NR_io_submit 246
-__SYSCALL(__NR_io_submit, compat_sys_io_submit)
-#define __NR_io_cancel 247
-__SYSCALL(__NR_io_cancel, sys_io_cancel)
-#define __NR_exit_group 248
-__SYSCALL(__NR_exit_group, sys_exit_group)
-#define __NR_lookup_dcookie 249
-__SYSCALL(__NR_lookup_dcookie, compat_sys_lookup_dcookie)
-#define __NR_epoll_create 250
-__SYSCALL(__NR_epoll_create, sys_epoll_create)
-#define __NR_epoll_ctl 251
-__SYSCALL(__NR_epoll_ctl, sys_epoll_ctl)
-#define __NR_epoll_wait 252
-__SYSCALL(__NR_epoll_wait, sys_epoll_wait)
-#define __NR_remap_file_pages 253
-__SYSCALL(__NR_remap_file_pages, sys_remap_file_pages)
- /* 254 for set_thread_area */
-__SYSCALL(254, sys_ni_syscall)
- /* 255 for get_thread_area */
-__SYSCALL(255, sys_ni_syscall)
-#define __NR_set_tid_address 256
-__SYSCALL(__NR_set_tid_address, sys_set_tid_address)
-#define __NR_timer_create 257
-__SYSCALL(__NR_timer_create, compat_sys_timer_create)
-#define __NR_timer_settime 258
-__SYSCALL(__NR_timer_settime, sys_timer_settime32)
-#define __NR_timer_gettime 259
-__SYSCALL(__NR_timer_gettime, sys_timer_gettime32)
-#define __NR_timer_getoverrun 260
-__SYSCALL(__NR_timer_getoverrun, sys_timer_getoverrun)
-#define __NR_timer_delete 261
-__SYSCALL(__NR_timer_delete, sys_timer_delete)
-#define __NR_clock_settime 262
-__SYSCALL(__NR_clock_settime, sys_clock_settime32)
-#define __NR_clock_gettime 263
-__SYSCALL(__NR_clock_gettime, sys_clock_gettime32)
-#define __NR_clock_getres 264
-__SYSCALL(__NR_clock_getres, sys_clock_getres_time32)
-#define __NR_clock_nanosleep 265
-__SYSCALL(__NR_clock_nanosleep, sys_clock_nanosleep_time32)
-#define __NR_statfs64 266
-__SYSCALL(__NR_statfs64, compat_sys_aarch32_statfs64)
-#define __NR_fstatfs64 267
-__SYSCALL(__NR_fstatfs64, compat_sys_aarch32_fstatfs64)
-#define __NR_tgkill 268
-__SYSCALL(__NR_tgkill, sys_tgkill)
-#define __NR_utimes 269
-__SYSCALL(__NR_utimes, sys_utimes_time32)
-#define __NR_arm_fadvise64_64 270
-__SYSCALL(__NR_arm_fadvise64_64, compat_sys_aarch32_fadvise64_64)
-#define __NR_pciconfig_iobase 271
-__SYSCALL(__NR_pciconfig_iobase, sys_pciconfig_iobase)
-#define __NR_pciconfig_read 272
-__SYSCALL(__NR_pciconfig_read, sys_pciconfig_read)
-#define __NR_pciconfig_write 273
-__SYSCALL(__NR_pciconfig_write, sys_pciconfig_write)
-#define __NR_mq_open 274
-__SYSCALL(__NR_mq_open, compat_sys_mq_open)
-#define __NR_mq_unlink 275
-__SYSCALL(__NR_mq_unlink, sys_mq_unlink)
-#define __NR_mq_timedsend 276
-__SYSCALL(__NR_mq_timedsend, sys_mq_timedsend_time32)
-#define __NR_mq_timedreceive 277
-__SYSCALL(__NR_mq_timedreceive, sys_mq_timedreceive_time32)
-#define __NR_mq_notify 278
-__SYSCALL(__NR_mq_notify, compat_sys_mq_notify)
-#define __NR_mq_getsetattr 279
-__SYSCALL(__NR_mq_getsetattr, compat_sys_mq_getsetattr)
-#define __NR_waitid 280
-__SYSCALL(__NR_waitid, compat_sys_waitid)
-#define __NR_socket 281
-__SYSCALL(__NR_socket, sys_socket)
-#define __NR_bind 282
-__SYSCALL(__NR_bind, sys_bind)
-#define __NR_connect 283
-__SYSCALL(__NR_connect, sys_connect)
-#define __NR_listen 284
-__SYSCALL(__NR_listen, sys_listen)
-#define __NR_accept 285
-__SYSCALL(__NR_accept, sys_accept)
-#define __NR_getsockname 286
-__SYSCALL(__NR_getsockname, sys_getsockname)
-#define __NR_getpeername 287
-__SYSCALL(__NR_getpeername, sys_getpeername)
-#define __NR_socketpair 288
-__SYSCALL(__NR_socketpair, sys_socketpair)
-#define __NR_send 289
-__SYSCALL(__NR_send, sys_send)
-#define __NR_sendto 290
-__SYSCALL(__NR_sendto, sys_sendto)
-#define __NR_recv 291
-__SYSCALL(__NR_recv, compat_sys_recv)
-#define __NR_recvfrom 292
-__SYSCALL(__NR_recvfrom, compat_sys_recvfrom)
-#define __NR_shutdown 293
-__SYSCALL(__NR_shutdown, sys_shutdown)
-#define __NR_setsockopt 294
-__SYSCALL(__NR_setsockopt, sys_setsockopt)
-#define __NR_getsockopt 295
-__SYSCALL(__NR_getsockopt, sys_getsockopt)
-#define __NR_sendmsg 296
-__SYSCALL(__NR_sendmsg, compat_sys_sendmsg)
-#define __NR_recvmsg 297
-__SYSCALL(__NR_recvmsg, compat_sys_recvmsg)
-#define __NR_semop 298
-__SYSCALL(__NR_semop, sys_semop)
-#define __NR_semget 299
-__SYSCALL(__NR_semget, sys_semget)
-#define __NR_semctl 300
-__SYSCALL(__NR_semctl, compat_sys_old_semctl)
-#define __NR_msgsnd 301
-__SYSCALL(__NR_msgsnd, compat_sys_msgsnd)
-#define __NR_msgrcv 302
-__SYSCALL(__NR_msgrcv, compat_sys_msgrcv)
-#define __NR_msgget 303
-__SYSCALL(__NR_msgget, sys_msgget)
-#define __NR_msgctl 304
-__SYSCALL(__NR_msgctl, compat_sys_old_msgctl)
-#define __NR_shmat 305
-__SYSCALL(__NR_shmat, compat_sys_shmat)
-#define __NR_shmdt 306
-__SYSCALL(__NR_shmdt, sys_shmdt)
-#define __NR_shmget 307
-__SYSCALL(__NR_shmget, sys_shmget)
-#define __NR_shmctl 308
-__SYSCALL(__NR_shmctl, compat_sys_old_shmctl)
-#define __NR_add_key 309
-__SYSCALL(__NR_add_key, sys_add_key)
-#define __NR_request_key 310
-__SYSCALL(__NR_request_key, sys_request_key)
-#define __NR_keyctl 311
-__SYSCALL(__NR_keyctl, compat_sys_keyctl)
-#define __NR_semtimedop 312
-__SYSCALL(__NR_semtimedop, sys_semtimedop_time32)
-#define __NR_vserver 313
-__SYSCALL(__NR_vserver, sys_ni_syscall)
-#define __NR_ioprio_set 314
-__SYSCALL(__NR_ioprio_set, sys_ioprio_set)
-#define __NR_ioprio_get 315
-__SYSCALL(__NR_ioprio_get, sys_ioprio_get)
-#define __NR_inotify_init 316
-__SYSCALL(__NR_inotify_init, sys_inotify_init)
-#define __NR_inotify_add_watch 317
-__SYSCALL(__NR_inotify_add_watch, sys_inotify_add_watch)
-#define __NR_inotify_rm_watch 318
-__SYSCALL(__NR_inotify_rm_watch, sys_inotify_rm_watch)
-#define __NR_mbind 319
-__SYSCALL(__NR_mbind, sys_mbind)
-#define __NR_get_mempolicy 320
-__SYSCALL(__NR_get_mempolicy, sys_get_mempolicy)
-#define __NR_set_mempolicy 321
-__SYSCALL(__NR_set_mempolicy, sys_set_mempolicy)
-#define __NR_openat 322
-__SYSCALL(__NR_openat, compat_sys_openat)
-#define __NR_mkdirat 323
-__SYSCALL(__NR_mkdirat, sys_mkdirat)
-#define __NR_mknodat 324
-__SYSCALL(__NR_mknodat, sys_mknodat)
-#define __NR_fchownat 325
-__SYSCALL(__NR_fchownat, sys_fchownat)
-#define __NR_futimesat 326
-__SYSCALL(__NR_futimesat, sys_futimesat_time32)
-#define __NR_fstatat64 327
-__SYSCALL(__NR_fstatat64, sys_fstatat64)
-#define __NR_unlinkat 328
-__SYSCALL(__NR_unlinkat, sys_unlinkat)
-#define __NR_renameat 329
-__SYSCALL(__NR_renameat, sys_renameat)
-#define __NR_linkat 330
-__SYSCALL(__NR_linkat, sys_linkat)
-#define __NR_symlinkat 331
-__SYSCALL(__NR_symlinkat, sys_symlinkat)
-#define __NR_readlinkat 332
-__SYSCALL(__NR_readlinkat, sys_readlinkat)
-#define __NR_fchmodat 333
-__SYSCALL(__NR_fchmodat, sys_fchmodat)
-#define __NR_faccessat 334
-__SYSCALL(__NR_faccessat, sys_faccessat)
-#define __NR_pselect6 335
-__SYSCALL(__NR_pselect6, compat_sys_pselect6_time32)
-#define __NR_ppoll 336
-__SYSCALL(__NR_ppoll, compat_sys_ppoll_time32)
-#define __NR_unshare 337
-__SYSCALL(__NR_unshare, sys_unshare)
-#define __NR_set_robust_list 338
-__SYSCALL(__NR_set_robust_list, compat_sys_set_robust_list)
-#define __NR_get_robust_list 339
-__SYSCALL(__NR_get_robust_list, compat_sys_get_robust_list)
-#define __NR_splice 340
-__SYSCALL(__NR_splice, sys_splice)
-#define __NR_sync_file_range2 341
-__SYSCALL(__NR_sync_file_range2, compat_sys_aarch32_sync_file_range2)
-#define __NR_tee 342
-__SYSCALL(__NR_tee, sys_tee)
-#define __NR_vmsplice 343
-__SYSCALL(__NR_vmsplice, sys_vmsplice)
-#define __NR_move_pages 344
-__SYSCALL(__NR_move_pages, sys_move_pages)
-#define __NR_getcpu 345
-__SYSCALL(__NR_getcpu, sys_getcpu)
-#define __NR_epoll_pwait 346
-__SYSCALL(__NR_epoll_pwait, compat_sys_epoll_pwait)
-#define __NR_kexec_load 347
-__SYSCALL(__NR_kexec_load, compat_sys_kexec_load)
-#define __NR_utimensat 348
-__SYSCALL(__NR_utimensat, sys_utimensat_time32)
-#define __NR_signalfd 349
-__SYSCALL(__NR_signalfd, compat_sys_signalfd)
-#define __NR_timerfd_create 350
-__SYSCALL(__NR_timerfd_create, sys_timerfd_create)
-#define __NR_eventfd 351
-__SYSCALL(__NR_eventfd, sys_eventfd)
-#define __NR_fallocate 352
-__SYSCALL(__NR_fallocate, compat_sys_aarch32_fallocate)
-#define __NR_timerfd_settime 353
-__SYSCALL(__NR_timerfd_settime, sys_timerfd_settime32)
-#define __NR_timerfd_gettime 354
-__SYSCALL(__NR_timerfd_gettime, sys_timerfd_gettime32)
-#define __NR_signalfd4 355
-__SYSCALL(__NR_signalfd4, compat_sys_signalfd4)
-#define __NR_eventfd2 356
-__SYSCALL(__NR_eventfd2, sys_eventfd2)
-#define __NR_epoll_create1 357
-__SYSCALL(__NR_epoll_create1, sys_epoll_create1)
-#define __NR_dup3 358
-__SYSCALL(__NR_dup3, sys_dup3)
-#define __NR_pipe2 359
-__SYSCALL(__NR_pipe2, sys_pipe2)
-#define __NR_inotify_init1 360
-__SYSCALL(__NR_inotify_init1, sys_inotify_init1)
-#define __NR_preadv 361
-__SYSCALL(__NR_preadv, compat_sys_preadv)
-#define __NR_pwritev 362
-__SYSCALL(__NR_pwritev, compat_sys_pwritev)
-#define __NR_rt_tgsigqueueinfo 363
-__SYSCALL(__NR_rt_tgsigqueueinfo, compat_sys_rt_tgsigqueueinfo)
-#define __NR_perf_event_open 364
-__SYSCALL(__NR_perf_event_open, sys_perf_event_open)
-#define __NR_recvmmsg 365
-__SYSCALL(__NR_recvmmsg, compat_sys_recvmmsg_time32)
-#define __NR_accept4 366
-__SYSCALL(__NR_accept4, sys_accept4)
-#define __NR_fanotify_init 367
-__SYSCALL(__NR_fanotify_init, sys_fanotify_init)
-#define __NR_fanotify_mark 368
-__SYSCALL(__NR_fanotify_mark, compat_sys_fanotify_mark)
-#define __NR_prlimit64 369
-__SYSCALL(__NR_prlimit64, sys_prlimit64)
-#define __NR_name_to_handle_at 370
-__SYSCALL(__NR_name_to_handle_at, sys_name_to_handle_at)
-#define __NR_open_by_handle_at 371
-__SYSCALL(__NR_open_by_handle_at, compat_sys_open_by_handle_at)
-#define __NR_clock_adjtime 372
-__SYSCALL(__NR_clock_adjtime, sys_clock_adjtime32)
-#define __NR_syncfs 373
-__SYSCALL(__NR_syncfs, sys_syncfs)
-#define __NR_sendmmsg 374
-__SYSCALL(__NR_sendmmsg, compat_sys_sendmmsg)
-#define __NR_setns 375
-__SYSCALL(__NR_setns, sys_setns)
-#define __NR_process_vm_readv 376
-__SYSCALL(__NR_process_vm_readv, sys_process_vm_readv)
-#define __NR_process_vm_writev 377
-__SYSCALL(__NR_process_vm_writev, sys_process_vm_writev)
-#define __NR_kcmp 378
-__SYSCALL(__NR_kcmp, sys_kcmp)
-#define __NR_finit_module 379
-__SYSCALL(__NR_finit_module, sys_finit_module)
-#define __NR_sched_setattr 380
-__SYSCALL(__NR_sched_setattr, sys_sched_setattr)
-#define __NR_sched_getattr 381
-__SYSCALL(__NR_sched_getattr, sys_sched_getattr)
-#define __NR_renameat2 382
-__SYSCALL(__NR_renameat2, sys_renameat2)
-#define __NR_seccomp 383
-__SYSCALL(__NR_seccomp, sys_seccomp)
-#define __NR_getrandom 384
-__SYSCALL(__NR_getrandom, sys_getrandom)
-#define __NR_memfd_create 385
-__SYSCALL(__NR_memfd_create, sys_memfd_create)
-#define __NR_bpf 386
-__SYSCALL(__NR_bpf, sys_bpf)
-#define __NR_execveat 387
-__SYSCALL(__NR_execveat, compat_sys_execveat)
-#define __NR_userfaultfd 388
-__SYSCALL(__NR_userfaultfd, sys_userfaultfd)
-#define __NR_membarrier 389
-__SYSCALL(__NR_membarrier, sys_membarrier)
-#define __NR_mlock2 390
-__SYSCALL(__NR_mlock2, sys_mlock2)
-#define __NR_copy_file_range 391
-__SYSCALL(__NR_copy_file_range, sys_copy_file_range)
-#define __NR_preadv2 392
-__SYSCALL(__NR_preadv2, compat_sys_preadv2)
-#define __NR_pwritev2 393
-__SYSCALL(__NR_pwritev2, compat_sys_pwritev2)
-#define __NR_pkey_mprotect 394
-__SYSCALL(__NR_pkey_mprotect, sys_pkey_mprotect)
-#define __NR_pkey_alloc 395
-__SYSCALL(__NR_pkey_alloc, sys_pkey_alloc)
-#define __NR_pkey_free 396
-__SYSCALL(__NR_pkey_free, sys_pkey_free)
-#define __NR_statx 397
-__SYSCALL(__NR_statx, sys_statx)
-#define __NR_rseq 398
-__SYSCALL(__NR_rseq, sys_rseq)
-#define __NR_io_pgetevents 399
-__SYSCALL(__NR_io_pgetevents, compat_sys_io_pgetevents)
-#define __NR_migrate_pages 400
-__SYSCALL(__NR_migrate_pages, sys_migrate_pages)
-#define __NR_kexec_file_load 401
-__SYSCALL(__NR_kexec_file_load, sys_kexec_file_load)
-/* 402 is unused */
-#define __NR_clock_gettime64 403
-__SYSCALL(__NR_clock_gettime64, sys_clock_gettime)
-#define __NR_clock_settime64 404
-__SYSCALL(__NR_clock_settime64, sys_clock_settime)
-#define __NR_clock_adjtime64 405
-__SYSCALL(__NR_clock_adjtime64, sys_clock_adjtime)
-#define __NR_clock_getres_time64 406
-__SYSCALL(__NR_clock_getres_time64, sys_clock_getres)
-#define __NR_clock_nanosleep_time64 407
-__SYSCALL(__NR_clock_nanosleep_time64, sys_clock_nanosleep)
-#define __NR_timer_gettime64 408
-__SYSCALL(__NR_timer_gettime64, sys_timer_gettime)
-#define __NR_timer_settime64 409
-__SYSCALL(__NR_timer_settime64, sys_timer_settime)
-#define __NR_timerfd_gettime64 410
-__SYSCALL(__NR_timerfd_gettime64, sys_timerfd_gettime)
-#define __NR_timerfd_settime64 411
-__SYSCALL(__NR_timerfd_settime64, sys_timerfd_settime)
-#define __NR_utimensat_time64 412
-__SYSCALL(__NR_utimensat_time64, sys_utimensat)
-#define __NR_pselect6_time64 413
-__SYSCALL(__NR_pselect6_time64, compat_sys_pselect6_time64)
-#define __NR_ppoll_time64 414
-__SYSCALL(__NR_ppoll_time64, compat_sys_ppoll_time64)
-#define __NR_io_pgetevents_time64 416
-__SYSCALL(__NR_io_pgetevents_time64, sys_io_pgetevents)
-#define __NR_recvmmsg_time64 417
-__SYSCALL(__NR_recvmmsg_time64, compat_sys_recvmmsg_time64)
-#define __NR_mq_timedsend_time64 418
-__SYSCALL(__NR_mq_timedsend_time64, sys_mq_timedsend)
-#define __NR_mq_timedreceive_time64 419
-__SYSCALL(__NR_mq_timedreceive_time64, sys_mq_timedreceive)
-#define __NR_semtimedop_time64 420
-__SYSCALL(__NR_semtimedop_time64, sys_semtimedop)
-#define __NR_rt_sigtimedwait_time64 421
-__SYSCALL(__NR_rt_sigtimedwait_time64, compat_sys_rt_sigtimedwait_time64)
-#define __NR_futex_time64 422
-__SYSCALL(__NR_futex_time64, sys_futex)
-#define __NR_sched_rr_get_interval_time64 423
-__SYSCALL(__NR_sched_rr_get_interval_time64, sys_sched_rr_get_interval)
-#define __NR_pidfd_send_signal 424
-__SYSCALL(__NR_pidfd_send_signal, sys_pidfd_send_signal)
-#define __NR_io_uring_setup 425
-__SYSCALL(__NR_io_uring_setup, sys_io_uring_setup)
-#define __NR_io_uring_enter 426
-__SYSCALL(__NR_io_uring_enter, sys_io_uring_enter)
-#define __NR_io_uring_register 427
-__SYSCALL(__NR_io_uring_register, sys_io_uring_register)
-#define __NR_open_tree 428
-__SYSCALL(__NR_open_tree, sys_open_tree)
-#define __NR_move_mount 429
-__SYSCALL(__NR_move_mount, sys_move_mount)
-#define __NR_fsopen 430
-__SYSCALL(__NR_fsopen, sys_fsopen)
-#define __NR_fsconfig 431
-__SYSCALL(__NR_fsconfig, sys_fsconfig)
-#define __NR_fsmount 432
-__SYSCALL(__NR_fsmount, sys_fsmount)
-#define __NR_fspick 433
-__SYSCALL(__NR_fspick, sys_fspick)
-#define __NR_pidfd_open 434
-__SYSCALL(__NR_pidfd_open, sys_pidfd_open)
-#define __NR_clone3 435
-__SYSCALL(__NR_clone3, sys_clone3)
-#define __NR_close_range 436
-__SYSCALL(__NR_close_range, sys_close_range)
-#define __NR_openat2 437
-__SYSCALL(__NR_openat2, sys_openat2)
-#define __NR_pidfd_getfd 438
-__SYSCALL(__NR_pidfd_getfd, sys_pidfd_getfd)
-#define __NR_faccessat2 439
-__SYSCALL(__NR_faccessat2, sys_faccessat2)
-#define __NR_process_madvise 440
-__SYSCALL(__NR_process_madvise, sys_process_madvise)
-#define __NR_epoll_pwait2 441
-__SYSCALL(__NR_epoll_pwait2, compat_sys_epoll_pwait2)
-#define __NR_mount_setattr 442
-__SYSCALL(__NR_mount_setattr, sys_mount_setattr)
-#define __NR_quotactl_fd 443
-__SYSCALL(__NR_quotactl_fd, sys_quotactl_fd)
-#define __NR_landlock_create_ruleset 444
-__SYSCALL(__NR_landlock_create_ruleset, sys_landlock_create_ruleset)
-#define __NR_landlock_add_rule 445
-__SYSCALL(__NR_landlock_add_rule, sys_landlock_add_rule)
-#define __NR_landlock_restrict_self 446
-__SYSCALL(__NR_landlock_restrict_self, sys_landlock_restrict_self)
-#define __NR_process_mrelease 448
-__SYSCALL(__NR_process_mrelease, sys_process_mrelease)
-#define __NR_futex_waitv 449
-__SYSCALL(__NR_futex_waitv, sys_futex_waitv)
-#define __NR_set_mempolicy_home_node 450
-__SYSCALL(__NR_set_mempolicy_home_node, sys_set_mempolicy_home_node)
+#define __NR_sync_file_range2 __NR_arm_sync_file_range
-/*
- * Please add new compat syscalls above this comment and update
- * __NR_compat_syscalls in asm/unistd.h.
- */
+#endif /* _UAPI__ASM_ARM_UNISTD_H */
diff --git a/arch/arm64/include/asm/uprobes.h b/arch/arm64/include/asm/uprobes.h
index 315eef654e39..89bfb0213a50 100644
--- a/arch/arm64/include/asm/uprobes.h
+++ b/arch/arm64/include/asm/uprobes.h
@@ -10,24 +10,33 @@
#include <asm/insn.h>
#include <asm/probes.h>
-#define MAX_UINSN_BYTES AARCH64_INSN_SIZE
-
-#define UPROBE_SWBP_INSN BRK64_OPCODE_UPROBES
+#define UPROBE_SWBP_INSN cpu_to_le32(BRK64_OPCODE_UPROBES)
#define UPROBE_SWBP_INSN_SIZE AARCH64_INSN_SIZE
-#define UPROBE_XOL_SLOT_BYTES MAX_UINSN_BYTES
+#define UPROBE_XOL_SLOT_BYTES AARCH64_INSN_SIZE
-typedef u32 uprobe_opcode_t;
+typedef __le32 uprobe_opcode_t;
struct arch_uprobe_task {
};
struct arch_uprobe {
union {
- u8 insn[MAX_UINSN_BYTES];
- u8 ixol[MAX_UINSN_BYTES];
+ __le32 insn;
+ __le32 ixol;
};
struct arch_probe_insn api;
bool simulate;
};
+int uprobe_brk_handler(struct pt_regs *regs, unsigned long esr);
+#ifdef CONFIG_UPROBES
+int uprobe_single_step_handler(struct pt_regs *regs, unsigned long esr);
+#else
+static inline int uprobe_single_step_handler(struct pt_regs *regs,
+ unsigned long esr)
+{
+ return DBG_HOOK_ERROR;
+}
+#endif
+
#endif
diff --git a/arch/arm64/include/asm/vdso.h b/arch/arm64/include/asm/vdso.h
index f99dcb94b438..61679070f595 100644
--- a/arch/arm64/include/asm/vdso.h
+++ b/arch/arm64/include/asm/vdso.h
@@ -5,27 +5,20 @@
#ifndef __ASM_VDSO_H
#define __ASM_VDSO_H
-/*
- * Default link address for the vDSO.
- * Since we randomise the VDSO mapping, there's little point in trying
- * to prelink this.
- */
-#define VDSO_LBASE 0x0
-
-#define __VVAR_PAGES 2
+#define __VDSO_PAGES 4
#ifndef __ASSEMBLY__
#include <generated/vdso-offsets.h>
-#ifdef CONFIG_COMPAT_VDSO
-#include <generated/vdso32-offsets.h>
-#endif
#define VDSO_SYMBOL(base, name) \
({ \
- (void *)(vdso_offset_##name - VDSO_LBASE + (unsigned long)(base)); \
+ (void *)(vdso_offset_##name + (unsigned long)(base)); \
})
+extern char vdso_start[], vdso_end[];
+extern char vdso32_start[], vdso32_end[];
+
#endif /* !__ASSEMBLY__ */
#endif /* __ASM_VDSO_H */
diff --git a/arch/arm64/include/asm/vdso/compat_gettimeofday.h b/arch/arm64/include/asm/vdso/compat_gettimeofday.h
index ecb6fd4c3c64..d60ea7a72a9c 100644
--- a/arch/arm64/include/asm/vdso/compat_gettimeofday.h
+++ b/arch/arm64/include/asm/vdso/compat_gettimeofday.h
@@ -8,7 +8,7 @@
#ifndef __ASSEMBLY__
#include <asm/barrier.h>
-#include <asm/unistd.h>
+#include <asm/unistd_compat_32.h>
#include <asm/errno.h>
#include <asm/vdso/compat_barrier.h>
@@ -24,7 +24,7 @@ int gettimeofday_fallback(struct __kernel_old_timeval *_tv,
register struct timezone *tz asm("r1") = _tz;
register struct __kernel_old_timeval *tv asm("r0") = _tv;
register long ret asm ("r0");
- register long nr asm("r7") = __NR_compat_gettimeofday;
+ register long nr asm("r7") = __NR_compat32_gettimeofday;
asm volatile(
" swi #0\n"
@@ -41,7 +41,7 @@ long clock_gettime_fallback(clockid_t _clkid, struct __kernel_timespec *_ts)
register struct __kernel_timespec *ts asm("r1") = _ts;
register clockid_t clkid asm("r0") = _clkid;
register long ret asm ("r0");
- register long nr asm("r7") = __NR_compat_clock_gettime64;
+ register long nr asm("r7") = __NR_compat32_clock_gettime64;
asm volatile(
" swi #0\n"
@@ -58,7 +58,7 @@ long clock_gettime32_fallback(clockid_t _clkid, struct old_timespec32 *_ts)
register struct old_timespec32 *ts asm("r1") = _ts;
register clockid_t clkid asm("r0") = _clkid;
register long ret asm ("r0");
- register long nr asm("r7") = __NR_compat_clock_gettime;
+ register long nr asm("r7") = __NR_compat32_clock_gettime;
asm volatile(
" swi #0\n"
@@ -75,7 +75,7 @@ int clock_getres_fallback(clockid_t _clkid, struct __kernel_timespec *_ts)
register struct __kernel_timespec *ts asm("r1") = _ts;
register clockid_t clkid asm("r0") = _clkid;
register long ret asm ("r0");
- register long nr asm("r7") = __NR_compat_clock_getres_time64;
+ register long nr asm("r7") = __NR_compat32_clock_getres_time64;
asm volatile(
" swi #0\n"
@@ -92,7 +92,7 @@ int clock_getres32_fallback(clockid_t _clkid, struct old_timespec32 *_ts)
register struct old_timespec32 *ts asm("r1") = _ts;
register clockid_t clkid asm("r0") = _clkid;
register long ret asm ("r0");
- register long nr asm("r7") = __NR_compat_clock_getres;
+ register long nr asm("r7") = __NR_compat32_clock_getres;
asm volatile(
" swi #0\n"
@@ -104,7 +104,7 @@ int clock_getres32_fallback(clockid_t _clkid, struct old_timespec32 *_ts)
}
static __always_inline u64 __arch_get_hw_counter(s32 clock_mode,
- const struct vdso_data *vd)
+ const struct vdso_time_data *vd)
{
u64 res;
@@ -131,45 +131,33 @@ static __always_inline u64 __arch_get_hw_counter(s32 clock_mode,
return res;
}
-static __always_inline const struct vdso_data *__arch_get_vdso_data(void)
+static __always_inline const struct vdso_time_data *__arch_get_vdso_u_time_data(void)
{
- const struct vdso_data *ret;
+ const struct vdso_time_data *ret;
/*
- * This simply puts &_vdso_data into ret. The reason why we don't use
- * `ret = _vdso_data` is that the compiler tends to optimise this in a
- * very suboptimal way: instead of keeping &_vdso_data in a register,
- * it goes through a relocation almost every time _vdso_data must be
+ * This simply puts &_vdso_time_data into ret. The reason why we don't use
+ * `ret = _vdso_time_data` is that the compiler tends to optimise this in a
+ * very suboptimal way: instead of keeping &_vdso_time_data in a register,
+ * it goes through a relocation almost every time _vdso_time_data must be
* accessed (even in subfunctions). This is both time and space
* consuming: each relocation uses a word in the code section, and it
* has to be loaded at runtime.
*
* This trick hides the assignment from the compiler. Since it cannot
* track where the pointer comes from, it will only use one relocation
- * where __arch_get_vdso_data() is called, and then keep the result in
- * a register.
+ * where __aarch64_get_vdso_u_time_data() is called, and then keep the
+ * result in a register.
*/
- asm volatile("mov %0, %1" : "=r"(ret) : "r"(_vdso_data));
+ asm volatile("mov %0, %1" : "=r"(ret) : "r"(&vdso_u_time_data));
return ret;
}
+#define __arch_get_vdso_u_time_data __arch_get_vdso_u_time_data
-#ifdef CONFIG_TIME_NS
-static __always_inline
-const struct vdso_data *__arch_get_timens_vdso_data(const struct vdso_data *vd)
-{
- const struct vdso_data *ret;
-
- /* See __arch_get_vdso_data(). */
- asm volatile("mov %0, %1" : "=r"(ret) : "r"(_timens_data));
-
- return ret;
-}
-#endif
-
-static inline bool vdso_clocksource_ok(const struct vdso_data *vd)
+static inline bool vdso_clocksource_ok(const struct vdso_clock *vc)
{
- return vd->clock_mode == VDSO_CLOCKMODE_ARCHTIMER;
+ return vc->clock_mode == VDSO_CLOCKMODE_ARCHTIMER;
}
#define vdso_clocksource_ok vdso_clocksource_ok
diff --git a/arch/arm64/include/asm/vdso/getrandom.h b/arch/arm64/include/asm/vdso/getrandom.h
new file mode 100644
index 000000000000..a2197da1951b
--- /dev/null
+++ b/arch/arm64/include/asm/vdso/getrandom.h
@@ -0,0 +1,38 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+
+#ifndef __ASM_VDSO_GETRANDOM_H
+#define __ASM_VDSO_GETRANDOM_H
+
+#ifndef __ASSEMBLY__
+
+#include <asm/unistd.h>
+#include <asm/vdso/vsyscall.h>
+#include <vdso/datapage.h>
+
+/**
+ * getrandom_syscall - Invoke the getrandom() syscall.
+ * @buffer: Destination buffer to fill with random bytes.
+ * @len: Size of @buffer in bytes.
+ * @flags: Zero or more GRND_* flags.
+ * Returns: The number of random bytes written to @buffer, or a negative value indicating an error.
+ */
+static __always_inline ssize_t getrandom_syscall(void *_buffer, size_t _len, unsigned int _flags)
+{
+ register void *buffer asm ("x0") = _buffer;
+ register size_t len asm ("x1") = _len;
+ register unsigned int flags asm ("x2") = _flags;
+ register long ret asm ("x0");
+ register long nr asm ("x8") = __NR_getrandom;
+
+ asm volatile(
+ " svc #0\n"
+ : "=r" (ret)
+ : "r" (buffer), "r" (len), "r" (flags), "r" (nr)
+ : "memory");
+
+ return ret;
+}
+
+#endif /* !__ASSEMBLY__ */
+
+#endif /* __ASM_VDSO_GETRANDOM_H */
diff --git a/arch/arm64/include/asm/vdso/gettimeofday.h b/arch/arm64/include/asm/vdso/gettimeofday.h
index 4f7a629df81f..da1ab8759592 100644
--- a/arch/arm64/include/asm/vdso/gettimeofday.h
+++ b/arch/arm64/include/asm/vdso/gettimeofday.h
@@ -7,8 +7,11 @@
#ifndef __ASSEMBLY__
+#include <asm/alternative.h>
+#include <asm/arch_timer.h>
#include <asm/barrier.h>
#include <asm/unistd.h>
+#include <asm/sysreg.h>
#define VDSO_HAS_CLOCK_GETRES 1
@@ -65,10 +68,8 @@ int clock_getres_fallback(clockid_t _clkid, struct __kernel_timespec *_ts)
}
static __always_inline u64 __arch_get_hw_counter(s32 clock_mode,
- const struct vdso_data *vd)
+ const struct vdso_time_data *vd)
{
- u64 res;
-
/*
* Core checks for mode already, so this raced against a concurrent
* update. Return something. Core will do another round and then
@@ -77,30 +78,21 @@ static __always_inline u64 __arch_get_hw_counter(s32 clock_mode,
if (clock_mode == VDSO_CLOCKMODE_NONE)
return 0;
- /*
- * This isb() is required to prevent that the counter value
- * is speculated.
- */
- isb();
- asm volatile("mrs %0, cntvct_el0" : "=r" (res) :: "memory");
- arch_counter_enforce_ordering(res);
-
- return res;
+ return __arch_counter_get_cntvct();
}
-static __always_inline
-const struct vdso_data *__arch_get_vdso_data(void)
+#if IS_ENABLED(CONFIG_CC_IS_GCC) && IS_ENABLED(CONFIG_PAGE_SIZE_64KB)
+static __always_inline const struct vdso_time_data *__arch_get_vdso_u_time_data(void)
{
- return _vdso_data;
-}
+ const struct vdso_time_data *ret = &vdso_u_time_data;
-#ifdef CONFIG_TIME_NS
-static __always_inline
-const struct vdso_data *__arch_get_timens_vdso_data(const struct vdso_data *vd)
-{
- return _timens_data;
+ /* Work around invalid absolute relocations */
+ OPTIMIZER_HIDE_VAR(ret);
+
+ return ret;
}
-#endif
+#define __arch_get_vdso_u_time_data __arch_get_vdso_u_time_data
+#endif /* IS_ENABLED(CONFIG_CC_IS_GCC) && IS_ENABLED(CONFIG_PAGE_SIZE_64KB) */
#endif /* !__ASSEMBLY__ */
diff --git a/arch/arm64/include/asm/vdso/vsyscall.h b/arch/arm64/include/asm/vdso/vsyscall.h
index f94b1457c117..417aae5763a8 100644
--- a/arch/arm64/include/asm/vdso/vsyscall.h
+++ b/arch/arm64/include/asm/vdso/vsyscall.h
@@ -4,30 +4,20 @@
#ifndef __ASSEMBLY__
-#include <linux/timekeeper_internal.h>
#include <vdso/datapage.h>
#define VDSO_PRECISION_MASK ~(0xFF00ULL<<48)
-extern struct vdso_data *vdso_data;
/*
* Update the vDSO data page to keep in sync with kernel timekeeping.
*/
static __always_inline
-struct vdso_data *__arm64_get_k_vdso_data(void)
+void __arch_update_vdso_clock(struct vdso_clock *vc)
{
- return vdso_data;
+ vc->mask = VDSO_PRECISION_MASK;
}
-#define __arch_get_k_vdso_data __arm64_get_k_vdso_data
-
-static __always_inline
-void __arm64_update_vsyscall(struct vdso_data *vdata, struct timekeeper *tk)
-{
- vdata[CS_HRES_COARSE].mask = VDSO_PRECISION_MASK;
- vdata[CS_RAW].mask = VDSO_PRECISION_MASK;
-}
-#define __arch_update_vsyscall __arm64_update_vsyscall
+#define __arch_update_vdso_clock __arch_update_vdso_clock
/* The asm-generic header needs to be included after the definitions above */
#include <asm-generic/vdso/vsyscall.h>
diff --git a/arch/arm64/include/asm/vectors.h b/arch/arm64/include/asm/vectors.h
index bc9a2145f419..b815d8f2c0dc 100644
--- a/arch/arm64/include/asm/vectors.h
+++ b/arch/arm64/include/asm/vectors.h
@@ -62,7 +62,7 @@ DECLARE_PER_CPU_READ_MOSTLY(const char *, this_cpu_vector);
static inline const char *
arm64_get_bp_hardening_vector(enum arm64_bp_harden_el1_vectors slot)
{
- if (arm64_kernel_unmapped_at_el0())
+ if (cpus_have_cap(ARM64_UNMAP_KERNEL_AT_EL0))
return (char *)(TRAMP_VALIAS + SZ_2K * slot);
WARN_ON_ONCE(slot == EL1_VECTOR_KPTI);
diff --git a/arch/arm64/include/asm/virt.h b/arch/arm64/include/asm/virt.h
index 4eb601e7de50..aa280f356b96 100644
--- a/arch/arm64/include/asm/virt.h
+++ b/arch/arm64/include/asm/virt.h
@@ -67,7 +67,8 @@
* __boot_cpu_mode records what mode CPUs were booted in.
* A correctly-implemented bootloader must start all CPUs in the same mode:
* In this case, both 32bit halves of __boot_cpu_mode will contain the
- * same value (either 0 if booted in EL1, BOOT_CPU_MODE_EL2 if booted in EL2).
+ * same value (either BOOT_CPU_MODE_EL1 if booted in EL1, BOOT_CPU_MODE_EL2 if
+ * booted in EL2).
*
* Should the bootloader fail to do this, the two values will be different.
* This allows the kernel to flag an error when the secondaries have come up.
@@ -78,9 +79,16 @@ extern u32 __boot_cpu_mode[2];
void __hyp_set_vectors(phys_addr_t phys_vector_base);
void __hyp_reset_vectors(void);
+bool is_kvm_arm_initialised(void);
DECLARE_STATIC_KEY_FALSE(kvm_protected_mode_initialized);
+static inline bool is_pkvm_initialized(void)
+{
+ return IS_ENABLED(CONFIG_KVM) &&
+ static_branch_likely(&kvm_protected_mode_initialized);
+}
+
/* Reports the availability of HYP mode */
static inline bool is_hyp_mode_available(void)
{
@@ -88,8 +96,7 @@ static inline bool is_hyp_mode_available(void)
* If KVM protected mode is initialized, all CPUs must have been booted
* in EL2. Avoid checking __boot_cpu_mode as CPUs now come up in EL1.
*/
- if (IS_ENABLED(CONFIG_KVM) &&
- static_branch_likely(&kvm_protected_mode_initialized))
+ if (is_pkvm_initialized())
return true;
return (__boot_cpu_mode[0] == BOOT_CPU_MODE_EL2 &&
@@ -103,15 +110,16 @@ static inline bool is_hyp_mode_mismatched(void)
* If KVM protected mode is initialized, all CPUs must have been booted
* in EL2. Avoid checking __boot_cpu_mode as CPUs now come up in EL1.
*/
- if (IS_ENABLED(CONFIG_KVM) &&
- static_branch_likely(&kvm_protected_mode_initialized))
+ if (is_pkvm_initialized())
return false;
return __boot_cpu_mode[0] != __boot_cpu_mode[1];
}
-static inline bool is_kernel_in_hyp_mode(void)
+static __always_inline bool is_kernel_in_hyp_mode(void)
{
+ BUILD_BUG_ON(__is_defined(__KVM_NVHE_HYPERVISOR__) ||
+ __is_defined(__KVM_VHE_HYPERVISOR__));
return read_sysreg(CurrentEL) == CurrentEL_EL2;
}
@@ -140,6 +148,14 @@ static __always_inline bool is_protected_kvm_enabled(void)
return cpus_have_final_cap(ARM64_KVM_PROTECTED_MODE);
}
+static __always_inline bool has_hvhe(void)
+{
+ if (is_vhe_hyp_code())
+ return false;
+
+ return cpus_have_final_cap(ARM64_KVM_HVHE);
+}
+
static inline bool is_hyp_nvhe(void)
{
return is_hyp_mode_available() && !is_kernel_in_hyp_mode();
diff --git a/arch/arm64/include/asm/vmalloc.h b/arch/arm64/include/asm/vmalloc.h
index 38fafffe699f..12f534e8f3ed 100644
--- a/arch/arm64/include/asm/vmalloc.h
+++ b/arch/arm64/include/asm/vmalloc.h
@@ -23,6 +23,51 @@ static inline bool arch_vmap_pmd_supported(pgprot_t prot)
return !IS_ENABLED(CONFIG_PTDUMP_DEBUGFS);
}
+#define arch_vmap_pte_range_map_size arch_vmap_pte_range_map_size
+static inline unsigned long arch_vmap_pte_range_map_size(unsigned long addr,
+ unsigned long end, u64 pfn,
+ unsigned int max_page_shift)
+{
+ /*
+ * If the block is at least CONT_PTE_SIZE in size, and is naturally
+ * aligned in both virtual and physical space, then we can pte-map the
+ * block using the PTE_CONT bit for more efficient use of the TLB.
+ */
+ if (max_page_shift < CONT_PTE_SHIFT)
+ return PAGE_SIZE;
+
+ if (end - addr < CONT_PTE_SIZE)
+ return PAGE_SIZE;
+
+ if (!IS_ALIGNED(addr, CONT_PTE_SIZE))
+ return PAGE_SIZE;
+
+ if (!IS_ALIGNED(PFN_PHYS(pfn), CONT_PTE_SIZE))
+ return PAGE_SIZE;
+
+ return CONT_PTE_SIZE;
+}
+
+#define arch_vmap_pte_range_unmap_size arch_vmap_pte_range_unmap_size
+static inline unsigned long arch_vmap_pte_range_unmap_size(unsigned long addr,
+ pte_t *ptep)
+{
+ /*
+ * The caller handles alignment so it's sufficient just to check
+ * PTE_CONT.
+ */
+ return pte_valid_cont(__ptep_get(ptep)) ? CONT_PTE_SIZE : PAGE_SIZE;
+}
+
+#define arch_vmap_pte_supported_shift arch_vmap_pte_supported_shift
+static inline int arch_vmap_pte_supported_shift(unsigned long size)
+{
+ if (size >= CONT_PTE_SIZE)
+ return CONT_PTE_SHIFT;
+
+ return PAGE_SHIFT;
+}
+
#endif
#define arch_vmap_pgprot_tagged arch_vmap_pgprot_tagged
diff --git a/arch/arm64/include/asm/vncr_mapping.h b/arch/arm64/include/asm/vncr_mapping.h
new file mode 100644
index 000000000000..f6ec500ad3fa
--- /dev/null
+++ b/arch/arm64/include/asm/vncr_mapping.h
@@ -0,0 +1,110 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * System register offsets in the VNCR page
+ * All offsets are *byte* displacements!
+ */
+
+#ifndef __ARM64_VNCR_MAPPING_H__
+#define __ARM64_VNCR_MAPPING_H__
+
+#define VNCR_VTTBR_EL2 0x020
+#define VNCR_VTCR_EL2 0x040
+#define VNCR_VMPIDR_EL2 0x050
+#define VNCR_CNTVOFF_EL2 0x060
+#define VNCR_HCR_EL2 0x078
+#define VNCR_HSTR_EL2 0x080
+#define VNCR_VPIDR_EL2 0x088
+#define VNCR_TPIDR_EL2 0x090
+#define VNCR_HCRX_EL2 0x0A0
+#define VNCR_VNCR_EL2 0x0B0
+#define VNCR_CPACR_EL1 0x100
+#define VNCR_CONTEXTIDR_EL1 0x108
+#define VNCR_SCTLR_EL1 0x110
+#define VNCR_ACTLR_EL1 0x118
+#define VNCR_TCR_EL1 0x120
+#define VNCR_AFSR0_EL1 0x128
+#define VNCR_AFSR1_EL1 0x130
+#define VNCR_ESR_EL1 0x138
+#define VNCR_MAIR_EL1 0x140
+#define VNCR_AMAIR_EL1 0x148
+#define VNCR_MDSCR_EL1 0x158
+#define VNCR_SPSR_EL1 0x160
+#define VNCR_CNTV_CVAL_EL0 0x168
+#define VNCR_CNTV_CTL_EL0 0x170
+#define VNCR_CNTP_CVAL_EL0 0x178
+#define VNCR_CNTP_CTL_EL0 0x180
+#define VNCR_SCXTNUM_EL1 0x188
+#define VNCR_TFSR_EL1 0x190
+#define VNCR_HDFGRTR2_EL2 0x1A0
+#define VNCR_HDFGWTR2_EL2 0x1B0
+#define VNCR_HFGRTR_EL2 0x1B8
+#define VNCR_HFGWTR_EL2 0x1C0
+#define VNCR_HFGITR_EL2 0x1C8
+#define VNCR_HDFGRTR_EL2 0x1D0
+#define VNCR_HDFGWTR_EL2 0x1D8
+#define VNCR_ZCR_EL1 0x1E0
+#define VNCR_HAFGRTR_EL2 0x1E8
+#define VNCR_TTBR0_EL1 0x200
+#define VNCR_TTBR1_EL1 0x210
+#define VNCR_FAR_EL1 0x220
+#define VNCR_ELR_EL1 0x230
+#define VNCR_SP_EL1 0x240
+#define VNCR_VBAR_EL1 0x250
+#define VNCR_TCR2_EL1 0x270
+#define VNCR_SCTLR2_EL1 0x278
+#define VNCR_PIRE0_EL1 0x290
+#define VNCR_PIR_EL1 0x2A0
+#define VNCR_POR_EL1 0x2A8
+#define VNCR_HFGRTR2_EL2 0x2C0
+#define VNCR_HFGWTR2_EL2 0x2C8
+#define VNCR_HFGITR2_EL2 0x310
+#define VNCR_ICH_LR0_EL2 0x400
+#define VNCR_ICH_LR1_EL2 0x408
+#define VNCR_ICH_LR2_EL2 0x410
+#define VNCR_ICH_LR3_EL2 0x418
+#define VNCR_ICH_LR4_EL2 0x420
+#define VNCR_ICH_LR5_EL2 0x428
+#define VNCR_ICH_LR6_EL2 0x430
+#define VNCR_ICH_LR7_EL2 0x438
+#define VNCR_ICH_LR8_EL2 0x440
+#define VNCR_ICH_LR9_EL2 0x448
+#define VNCR_ICH_LR10_EL2 0x450
+#define VNCR_ICH_LR11_EL2 0x458
+#define VNCR_ICH_LR12_EL2 0x460
+#define VNCR_ICH_LR13_EL2 0x468
+#define VNCR_ICH_LR14_EL2 0x470
+#define VNCR_ICH_LR15_EL2 0x478
+#define VNCR_ICH_AP0R0_EL2 0x480
+#define VNCR_ICH_AP0R1_EL2 0x488
+#define VNCR_ICH_AP0R2_EL2 0x490
+#define VNCR_ICH_AP0R3_EL2 0x498
+#define VNCR_ICH_AP1R0_EL2 0x4A0
+#define VNCR_ICH_AP1R1_EL2 0x4A8
+#define VNCR_ICH_AP1R2_EL2 0x4B0
+#define VNCR_ICH_AP1R3_EL2 0x4B8
+#define VNCR_ICH_HCR_EL2 0x4C0
+#define VNCR_ICH_VMCR_EL2 0x4C8
+#define VNCR_VDISR_EL2 0x500
+#define VNCR_VSESR_EL2 0x508
+#define VNCR_PMBLIMITR_EL1 0x800
+#define VNCR_PMBPTR_EL1 0x810
+#define VNCR_PMBSR_EL1 0x820
+#define VNCR_PMSCR_EL1 0x828
+#define VNCR_PMSEVFR_EL1 0x830
+#define VNCR_PMSICR_EL1 0x838
+#define VNCR_PMSIRR_EL1 0x840
+#define VNCR_PMSLATFR_EL1 0x848
+#define VNCR_TRFCR_EL1 0x880
+#define VNCR_MPAM1_EL1 0x900
+#define VNCR_MPAMHCR_EL2 0x930
+#define VNCR_MPAMVPMV_EL2 0x938
+#define VNCR_MPAMVPM0_EL2 0x940
+#define VNCR_MPAMVPM1_EL2 0x948
+#define VNCR_MPAMVPM2_EL2 0x950
+#define VNCR_MPAMVPM3_EL2 0x958
+#define VNCR_MPAMVPM4_EL2 0x960
+#define VNCR_MPAMVPM5_EL2 0x968
+#define VNCR_MPAMVPM6_EL2 0x970
+#define VNCR_MPAMVPM7_EL2 0x978
+
+#endif /* __ARM64_VNCR_MAPPING_H__ */
diff --git a/arch/arm64/include/asm/word-at-a-time.h b/arch/arm64/include/asm/word-at-a-time.h
index 1c8e4f2490bf..824ca6987a51 100644
--- a/arch/arm64/include/asm/word-at-a-time.h
+++ b/arch/arm64/include/asm/word-at-a-time.h
@@ -9,7 +9,8 @@
#ifndef __AARCH64EB__
-#include <linux/kernel.h>
+#include <linux/bitops.h>
+#include <linux/wordpart.h>
struct word_at_a_time {
const unsigned long one_bits, high_bits;
@@ -26,20 +27,15 @@ static inline unsigned long has_zero(unsigned long a, unsigned long *bits,
}
#define prep_zero_mask(a, bits, c) (bits)
+#define create_zero_mask(bits) (bits)
+#define find_zero(bits) (__ffs(bits) >> 3)
-static inline unsigned long create_zero_mask(unsigned long bits)
+static inline unsigned long zero_bytemask(unsigned long bits)
{
bits = (bits - 1) & ~bits;
return bits >> 7;
}
-static inline unsigned long find_zero(unsigned long mask)
-{
- return fls64(mask) >> 3;
-}
-
-#define zero_bytemask(mask) (mask)
-
#else /* __AARCH64EB__ */
#include <asm-generic/word-at-a-time.h>
#endif
@@ -55,7 +51,7 @@ static inline unsigned long load_unaligned_zeropad(const void *addr)
{
unsigned long ret;
- __uaccess_enable_tco_async();
+ __mte_enable_tco_async();
/* Load word from unaligned pointer addr */
asm(
@@ -65,7 +61,7 @@ static inline unsigned long load_unaligned_zeropad(const void *addr)
: "=&r" (ret)
: "r" (addr), "Q" (*(unsigned long *)addr));
- __uaccess_disable_tco_async();
+ __mte_disable_tco_async();
return ret;
}
diff --git a/arch/arm64/include/uapi/asm/Kbuild b/arch/arm64/include/uapi/asm/Kbuild
index 602d137932dc..c6d141d7b7d7 100644
--- a/arch/arm64/include/uapi/asm/Kbuild
+++ b/arch/arm64/include/uapi/asm/Kbuild
@@ -1,3 +1,4 @@
# SPDX-License-Identifier: GPL-2.0
+syscall-y += unistd_64.h
generic-y += kvm_para.h
diff --git a/arch/arm64/include/uapi/asm/hwcap.h b/arch/arm64/include/uapi/asm/hwcap.h
index 1ad2568a2569..72c78468b806 100644
--- a/arch/arm64/include/uapi/asm/hwcap.h
+++ b/arch/arm64/include/uapi/asm/hwcap.h
@@ -21,7 +21,7 @@
* HWCAP flags - for AT_HWCAP
*
* Bits 62 and 63 are reserved for use by libc.
- * Bits 32-61 are unallocated for potential use by libc.
+ * Bits 33-61 are unallocated for potential use by libc.
*/
#define HWCAP_FP (1 << 0)
#define HWCAP_ASIMD (1 << 1)
@@ -55,6 +55,22 @@
#define HWCAP_SB (1 << 29)
#define HWCAP_PACA (1 << 30)
#define HWCAP_PACG (1UL << 31)
+#define HWCAP_GCS (1UL << 32)
+#define HWCAP_CMPBR (1UL << 33)
+#define HWCAP_FPRCVT (1UL << 34)
+#define HWCAP_F8MM8 (1UL << 35)
+#define HWCAP_F8MM4 (1UL << 36)
+#define HWCAP_SVE_F16MM (1UL << 37)
+#define HWCAP_SVE_ELTPERM (1UL << 38)
+#define HWCAP_SVE_AES2 (1UL << 39)
+#define HWCAP_SVE_BFSCALE (1UL << 40)
+#define HWCAP_SVE2P2 (1UL << 41)
+#define HWCAP_SME2P2 (1UL << 42)
+#define HWCAP_SME_SBITPERM (1UL << 43)
+#define HWCAP_SME_AES (1UL << 44)
+#define HWCAP_SME_SFEXPA (1UL << 45)
+#define HWCAP_SME_STMOP (1UL << 46)
+#define HWCAP_SME_SMOP4 (1UL << 47)
/*
* HWCAP2 flags - for AT_HWCAP2
@@ -92,5 +108,42 @@
#define HWCAP2_SME_FA64 (1 << 30)
#define HWCAP2_WFXT (1UL << 31)
#define HWCAP2_EBF16 (1UL << 32)
+#define HWCAP2_SVE_EBF16 (1UL << 33)
+#define HWCAP2_CSSC (1UL << 34)
+#define HWCAP2_RPRFM (1UL << 35)
+#define HWCAP2_SVE2P1 (1UL << 36)
+#define HWCAP2_SME2 (1UL << 37)
+#define HWCAP2_SME2P1 (1UL << 38)
+#define HWCAP2_SME_I16I32 (1UL << 39)
+#define HWCAP2_SME_BI32I32 (1UL << 40)
+#define HWCAP2_SME_B16B16 (1UL << 41)
+#define HWCAP2_SME_F16F16 (1UL << 42)
+#define HWCAP2_MOPS (1UL << 43)
+#define HWCAP2_HBC (1UL << 44)
+#define HWCAP2_SVE_B16B16 (1UL << 45)
+#define HWCAP2_LRCPC3 (1UL << 46)
+#define HWCAP2_LSE128 (1UL << 47)
+#define HWCAP2_FPMR (1UL << 48)
+#define HWCAP2_LUT (1UL << 49)
+#define HWCAP2_FAMINMAX (1UL << 50)
+#define HWCAP2_F8CVT (1UL << 51)
+#define HWCAP2_F8FMA (1UL << 52)
+#define HWCAP2_F8DP4 (1UL << 53)
+#define HWCAP2_F8DP2 (1UL << 54)
+#define HWCAP2_F8E4M3 (1UL << 55)
+#define HWCAP2_F8E5M2 (1UL << 56)
+#define HWCAP2_SME_LUTV2 (1UL << 57)
+#define HWCAP2_SME_F8F16 (1UL << 58)
+#define HWCAP2_SME_F8F32 (1UL << 59)
+#define HWCAP2_SME_SF8FMA (1UL << 60)
+#define HWCAP2_SME_SF8DP4 (1UL << 61)
+#define HWCAP2_SME_SF8DP2 (1UL << 62)
+#define HWCAP2_POE (1UL << 63)
+
+/*
+ * HWCAP3 flags - for AT_HWCAP3
+ */
+#define HWCAP3_MTE_FAR (1UL << 0)
+#define HWCAP3_MTE_STORE_ONLY (1UL << 1)
#endif /* _UAPI__ASM_HWCAP_H */
diff --git a/arch/arm64/include/uapi/asm/kvm.h b/arch/arm64/include/uapi/asm/kvm.h
index 3bb134355874..ed5f3892674c 100644
--- a/arch/arm64/include/uapi/asm/kvm.h
+++ b/arch/arm64/include/uapi/asm/kvm.h
@@ -37,15 +37,11 @@
#include <asm/ptrace.h>
#include <asm/sve_context.h>
-#define __KVM_HAVE_GUEST_DEBUG
#define __KVM_HAVE_IRQ_LINE
-#define __KVM_HAVE_READONLY_MEM
#define __KVM_HAVE_VCPU_EVENTS
#define KVM_COALESCED_MMIO_PAGE_OFFSET 1
-
-#define KVM_REG_SIZE(id) \
- (1U << (((id) & KVM_REG_SIZE_MASK) >> KVM_REG_SIZE_SHIFT))
+#define KVM_DIRTY_LOG_PAGE_OFFSET 64
struct kvm_regs {
struct user_pt_regs regs; /* sp = sp_el0 */
@@ -75,9 +71,11 @@ struct kvm_regs {
/* KVM_ARM_SET_DEVICE_ADDR ioctl id encoding */
#define KVM_ARM_DEVICE_TYPE_SHIFT 0
-#define KVM_ARM_DEVICE_TYPE_MASK (0xffff << KVM_ARM_DEVICE_TYPE_SHIFT)
+#define KVM_ARM_DEVICE_TYPE_MASK __GENMASK(KVM_ARM_DEVICE_TYPE_SHIFT + 15, \
+ KVM_ARM_DEVICE_TYPE_SHIFT)
#define KVM_ARM_DEVICE_ID_SHIFT 16
-#define KVM_ARM_DEVICE_ID_MASK (0xffff << KVM_ARM_DEVICE_ID_SHIFT)
+#define KVM_ARM_DEVICE_ID_MASK __GENMASK(KVM_ARM_DEVICE_ID_SHIFT + 15, \
+ KVM_ARM_DEVICE_ID_SHIFT)
/* Supported device IDs */
#define KVM_ARM_DEVICE_VGIC_V2 0
@@ -106,6 +104,8 @@ struct kvm_regs {
#define KVM_ARM_VCPU_SVE 4 /* enable SVE for this CPU */
#define KVM_ARM_VCPU_PTRAUTH_ADDRESS 5 /* VCPU uses address authentication */
#define KVM_ARM_VCPU_PTRAUTH_GENERIC 6 /* VCPU uses generic authentication */
+#define KVM_ARM_VCPU_HAS_EL2 7 /* Support nested virtualization */
+#define KVM_ARM_VCPU_HAS_EL2_E2H0 8 /* Limit NV support to E2H RES0 */
struct kvm_vcpu_init {
__u32 target;
@@ -158,6 +158,11 @@ struct kvm_sync_regs {
__u64 device_irq_level;
};
+/* Bits for run->s.regs.device_irq_level */
+#define KVM_ARM_DEV_EL1_VTIMER (1 << 0)
+#define KVM_ARM_DEV_EL1_PTIMER (1 << 1)
+#define KVM_ARM_DEV_PMU (1 << 2)
+
/*
* PMU filter structure. Describe a range of events with a particular
* action. To be used with KVM_ARM_VCPU_PMU_V3_FILTER.
@@ -194,6 +199,15 @@ struct kvm_arm_copy_mte_tags {
__u64 reserved[2];
};
+/*
+ * Counter/Timer offset structure. Describe the virtual/physical offset.
+ * To be used with KVM_ARM_SET_COUNTER_OFFSET.
+ */
+struct kvm_arm_counter_offset {
+ __u64 counter_offset;
+ __u64 reserved;
+};
+
#define KVM_ARM_TAGS_TO_GUEST 0
#define KVM_ARM_TAGS_FROM_GUEST 1
@@ -358,6 +372,7 @@ enum {
#endif
};
+/* Vendor hyper call function numbers 0-63 */
#define KVM_REG_ARM_VENDOR_HYP_BMAP KVM_REG_ARM_FW_FEAT_BMAP_REG(2)
enum {
@@ -368,6 +383,21 @@ enum {
#endif
};
+/* Vendor hyper call function numbers 64-127 */
+#define KVM_REG_ARM_VENDOR_HYP_BMAP_2 KVM_REG_ARM_FW_FEAT_BMAP_REG(3)
+
+enum {
+ KVM_REG_ARM_VENDOR_HYP_BIT_DISCOVER_IMPL_VER = 0,
+ KVM_REG_ARM_VENDOR_HYP_BIT_DISCOVER_IMPL_CPUS = 1,
+#ifdef __KERNEL__
+ KVM_REG_ARM_VENDOR_HYP_BMAP_2_BIT_COUNT,
+#endif
+};
+
+/* Device Control API on vm fd */
+#define KVM_ARM_VM_SMCCC_CTRL 0
+#define KVM_ARM_VM_SMCCC_FILTER 0
+
/* Device Control API: ARM VGIC */
#define KVM_DEV_ARM_VGIC_GRP_ADDR 0
#define KVM_DEV_ARM_VGIC_GRP_DIST_REGS 1
@@ -386,6 +416,7 @@ enum {
#define KVM_DEV_ARM_VGIC_GRP_CPU_SYSREGS 6
#define KVM_DEV_ARM_VGIC_GRP_LEVEL_INFO 7
#define KVM_DEV_ARM_VGIC_GRP_ITS_REGS 8
+#define KVM_DEV_ARM_VGIC_GRP_MAINT_IRQ 9
#define KVM_DEV_ARM_VGIC_LINE_LEVEL_INFO_SHIFT 10
#define KVM_DEV_ARM_VGIC_LINE_LEVEL_INFO_MASK \
(0x3fffffULL << KVM_DEV_ARM_VGIC_LINE_LEVEL_INFO_SHIFT)
@@ -400,13 +431,16 @@ enum {
/* Device Control API on vcpu fd */
#define KVM_ARM_VCPU_PMU_V3_CTRL 0
-#define KVM_ARM_VCPU_PMU_V3_IRQ 0
-#define KVM_ARM_VCPU_PMU_V3_INIT 1
-#define KVM_ARM_VCPU_PMU_V3_FILTER 2
-#define KVM_ARM_VCPU_PMU_V3_SET_PMU 3
+#define KVM_ARM_VCPU_PMU_V3_IRQ 0
+#define KVM_ARM_VCPU_PMU_V3_INIT 1
+#define KVM_ARM_VCPU_PMU_V3_FILTER 2
+#define KVM_ARM_VCPU_PMU_V3_SET_PMU 3
+#define KVM_ARM_VCPU_PMU_V3_SET_NR_COUNTERS 4
#define KVM_ARM_VCPU_TIMER_CTRL 1
#define KVM_ARM_VCPU_TIMER_IRQ_VTIMER 0
#define KVM_ARM_VCPU_TIMER_IRQ_PTIMER 1
+#define KVM_ARM_VCPU_TIMER_IRQ_HVTIMER 2
+#define KVM_ARM_VCPU_TIMER_IRQ_HPTIMER 3
#define KVM_ARM_VCPU_PVTIME_CTRL 2
#define KVM_ARM_VCPU_PVTIME_IPA 0
@@ -462,9 +496,68 @@ enum {
*/
#define KVM_SYSTEM_EVENT_RESET_FLAG_PSCI_RESET2 (1ULL << 0)
+/*
+ * Shutdown caused by a PSCI v1.3 SYSTEM_OFF2 call.
+ * Valid only when the system event has a type of KVM_SYSTEM_EVENT_SHUTDOWN.
+ */
+#define KVM_SYSTEM_EVENT_SHUTDOWN_FLAG_PSCI_OFF2 (1ULL << 0)
+
/* run->fail_entry.hardware_entry_failure_reason codes. */
#define KVM_EXIT_FAIL_ENTRY_CPU_UNSUPPORTED (1ULL << 0)
+enum kvm_smccc_filter_action {
+ KVM_SMCCC_FILTER_HANDLE = 0,
+ KVM_SMCCC_FILTER_DENY,
+ KVM_SMCCC_FILTER_FWD_TO_USER,
+
+#ifdef __KERNEL__
+ NR_SMCCC_FILTER_ACTIONS
+#endif
+};
+
+struct kvm_smccc_filter {
+ __u32 base;
+ __u32 nr_functions;
+ __u8 action;
+ __u8 pad[15];
+};
+
+/* arm64-specific KVM_EXIT_HYPERCALL flags */
+#define KVM_HYPERCALL_EXIT_SMC (1U << 0)
+#define KVM_HYPERCALL_EXIT_16BIT (1U << 1)
+
+/*
+ * Get feature ID registers userspace writable mask.
+ *
+ * From DDI0487J.a, D19.2.66 ("ID_AA64MMFR2_EL1, AArch64 Memory Model
+ * Feature Register 2"):
+ *
+ * "The Feature ID space is defined as the System register space in
+ * AArch64 with op0==3, op1=={0, 1, 3}, CRn==0, CRm=={0-7},
+ * op2=={0-7}."
+ *
+ * This covers all currently known R/O registers that indicate
+ * anything useful feature wise, including the ID registers.
+ *
+ * If we ever need to introduce a new range, it will be described as
+ * such in the range field.
+ */
+#define KVM_ARM_FEATURE_ID_RANGE_IDX(op0, op1, crn, crm, op2) \
+ ({ \
+ __u64 __op1 = (op1) & 3; \
+ __op1 -= (__op1 == 3); \
+ (__op1 << 6 | ((crm) & 7) << 3 | (op2)); \
+ })
+
+#define KVM_ARM_FEATURE_ID_RANGE 0
+#define KVM_ARM_FEATURE_ID_RANGE_SIZE (3 * 8 * 8)
+
+struct reg_mask_range {
+ __u64 addr; /* Pointer to mask array */
+ __u32 range; /* Requested range */
+ __u32 reserved[13];
+};
+
#endif
#endif /* __ARM_KVM_H__ */
diff --git a/arch/arm64/include/uapi/asm/mman.h b/arch/arm64/include/uapi/asm/mman.h
index 1e6482a838e1..e7e0c8216243 100644
--- a/arch/arm64/include/uapi/asm/mman.h
+++ b/arch/arm64/include/uapi/asm/mman.h
@@ -7,4 +7,13 @@
#define PROT_BTI 0x10 /* BTI guarded page */
#define PROT_MTE 0x20 /* Normal Tagged mapping */
+/* Override any generic PKEY permission defines */
+#define PKEY_DISABLE_EXECUTE 0x4
+#define PKEY_DISABLE_READ 0x8
+#undef PKEY_ACCESS_MASK
+#define PKEY_ACCESS_MASK (PKEY_DISABLE_ACCESS |\
+ PKEY_DISABLE_WRITE |\
+ PKEY_DISABLE_READ |\
+ PKEY_DISABLE_EXECUTE)
+
#endif /* ! _UAPI__ASM_MMAN_H */
diff --git a/arch/arm64/include/uapi/asm/perf_regs.h b/arch/arm64/include/uapi/asm/perf_regs.h
index d54daafa89e3..86e556429e0e 100644
--- a/arch/arm64/include/uapi/asm/perf_regs.h
+++ b/arch/arm64/include/uapi/asm/perf_regs.h
@@ -37,5 +37,12 @@ enum perf_event_arm_regs {
PERF_REG_ARM64_SP,
PERF_REG_ARM64_PC,
PERF_REG_ARM64_MAX,
+
+ /* Extended/pseudo registers */
+ PERF_REG_ARM64_VG = 46, /* SVE Vector Granule */
+ PERF_REG_ARM64_EXTENDED_MAX
};
+
+#define PERF_REG_EXTENDED_MASK (1ULL << PERF_REG_ARM64_VG)
+
#endif /* _ASM_ARM64_PERF_REGS_H */
diff --git a/arch/arm64/include/uapi/asm/ptrace.h b/arch/arm64/include/uapi/asm/ptrace.h
index 7fa2f7036aa7..0f39ba4f3efd 100644
--- a/arch/arm64/include/uapi/asm/ptrace.h
+++ b/arch/arm64/include/uapi/asm/ptrace.h
@@ -324,6 +324,14 @@ struct user_za_header {
#define ZA_PT_SIZE(vq) \
(ZA_PT_ZA_OFFSET + ZA_PT_ZA_SIZE(vq))
+/* GCS state (NT_ARM_GCS) */
+
+struct user_gcs {
+ __u64 features_enabled;
+ __u64 features_locked;
+ __u64 gcspr_el0;
+};
+
#endif /* __ASSEMBLY__ */
#endif /* _UAPI__ASM_PTRACE_H */
diff --git a/arch/arm64/include/uapi/asm/sigcontext.h b/arch/arm64/include/uapi/asm/sigcontext.h
index 4aaf31e3bf16..d42f7a92238b 100644
--- a/arch/arm64/include/uapi/asm/sigcontext.h
+++ b/arch/arm64/include/uapi/asm/sigcontext.h
@@ -62,6 +62,10 @@ struct sigcontext {
* context. Such structures must be placed after the rt_sigframe on the stack
* and be 16-byte aligned. The last structure must be a dummy one with the
* magic and size set to 0.
+ *
+ * Note that the values allocated for use as magic should be chosen to
+ * be meaningful in ASCII to aid manual parsing, ZA doesn't follow this
+ * convention due to oversight but it should be observed for future additions.
*/
struct _aarch64_ctx {
__u32 magic;
@@ -94,6 +98,13 @@ struct esr_context {
__u64 esr;
};
+#define POE_MAGIC 0x504f4530
+
+struct poe_context {
+ struct _aarch64_ctx head;
+ __u64 por_el0;
+};
+
/*
* extra_context: describes extra space in the signal frame for
* additional structures that don't fit in sigcontext.__reserved[].
@@ -140,6 +151,22 @@ struct sve_context {
#define SVE_SIG_FLAG_SM 0x1 /* Context describes streaming mode */
+/* TPIDR2_EL0 context */
+#define TPIDR2_MAGIC 0x54504902
+
+struct tpidr2_context {
+ struct _aarch64_ctx head;
+ __u64 tpidr2;
+};
+
+/* FPMR context */
+#define FPMR_MAGIC 0x46504d52
+
+struct fpmr_context {
+ struct _aarch64_ctx head;
+ __u64 fpmr;
+};
+
#define ZA_MAGIC 0x54366345
struct za_context {
@@ -148,6 +175,23 @@ struct za_context {
__u16 __reserved[3];
};
+#define ZT_MAGIC 0x5a544e01
+
+struct zt_context {
+ struct _aarch64_ctx head;
+ __u16 nregs;
+ __u16 __reserved[3];
+};
+
+#define GCS_MAGIC 0x47435300
+
+struct gcs_context {
+ struct _aarch64_ctx head;
+ __u64 gcspr;
+ __u64 features_enabled;
+ __u64 reserved;
+};
+
#endif /* !__ASSEMBLY__ */
#include <asm/sve_context.h>
@@ -157,7 +201,7 @@ struct za_context {
* vector length beyond its initial architectural limit of 2048 bits
* (16 quadwords).
*
- * See linux/Documentation/arm64/sve.rst for a description of the VL/VQ
+ * See linux/Documentation/arch/arm64/sve.rst for a description of the VL/VQ
* terminology.
*/
#define SVE_VQ_BYTES __SVE_VQ_BYTES /* bytes per quadword */
@@ -292,12 +336,23 @@ struct za_context {
((sizeof(struct za_context) + (__SVE_VQ_BYTES - 1)) \
/ __SVE_VQ_BYTES * __SVE_VQ_BYTES)
-#define ZA_SIG_REGS_SIZE(vq) ((vq * __SVE_VQ_BYTES) * (vq * __SVE_VQ_BYTES))
+#define ZA_SIG_REGS_SIZE(vq) (((vq) * __SVE_VQ_BYTES) * ((vq) * __SVE_VQ_BYTES))
#define ZA_SIG_ZAV_OFFSET(vq, n) (ZA_SIG_REGS_OFFSET + \
- (SVE_SIG_ZREG_SIZE(vq) * n))
+ (SVE_SIG_ZREG_SIZE(vq) * (n)))
#define ZA_SIG_CONTEXT_SIZE(vq) \
(ZA_SIG_REGS_OFFSET + ZA_SIG_REGS_SIZE(vq))
+#define ZT_SIG_REG_SIZE 512
+
+#define ZT_SIG_REG_BYTES (ZT_SIG_REG_SIZE / 8)
+
+#define ZT_SIG_REGS_OFFSET sizeof(struct zt_context)
+
+#define ZT_SIG_REGS_SIZE(n) (ZT_SIG_REG_BYTES * (n))
+
+#define ZT_SIG_CONTEXT_SIZE(n) \
+ (sizeof(struct zt_context) + ZT_SIG_REGS_SIZE(n))
+
#endif /* _UAPI__ASM_SIGCONTEXT_H */
diff --git a/arch/arm64/include/uapi/asm/sve_context.h b/arch/arm64/include/uapi/asm/sve_context.h
index 754ab751b523..72aefc081061 100644
--- a/arch/arm64/include/uapi/asm/sve_context.h
+++ b/arch/arm64/include/uapi/asm/sve_context.h
@@ -13,6 +13,17 @@
#define __SVE_VQ_BYTES 16 /* number of bytes per quadword */
+/*
+ * Yes, __SVE_VQ_MAX is 512 QUADWORDS.
+ *
+ * To help ensure forward portability, this is much larger than the
+ * current maximum value defined by the SVE architecture. While arrays
+ * or static allocations can be sized based on this value, watch out!
+ * It will waste a surprisingly large amount of memory.
+ *
+ * Dynamic sizing based on the actual runtime vector length is likely to
+ * be preferable for most purposes.
+ */
#define __SVE_VQ_MIN 1
#define __SVE_VQ_MAX 512
diff --git a/arch/arm64/include/uapi/asm/unistd.h b/arch/arm64/include/uapi/asm/unistd.h
index ce2ee8f1e361..df36f23876e8 100644
--- a/arch/arm64/include/uapi/asm/unistd.h
+++ b/arch/arm64/include/uapi/asm/unistd.h
@@ -1,25 +1,2 @@
/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
-/*
- * Copyright (C) 2012 ARM Ltd.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program. If not, see <http://www.gnu.org/licenses/>.
- */
-
-#define __ARCH_WANT_RENAMEAT
-#define __ARCH_WANT_NEW_STAT
-#define __ARCH_WANT_SET_GET_RLIMIT
-#define __ARCH_WANT_TIME32_SYSCALLS
-#define __ARCH_WANT_SYS_CLONE3
-#define __ARCH_WANT_MEMFD_SECRET
-
-#include <asm-generic/unistd.h>
+#include <asm/unistd_64.h>