aboutsummaryrefslogtreecommitdiffstats
path: root/arch/arm64/kernel
diff options
context:
space:
mode:
authorMarc Zyngier <maz@kernel.org>2020-12-09 10:00:24 +0000
committerMarc Zyngier <maz@kernel.org>2020-12-09 10:00:24 +0000
commit3a514592b698588326924625b6948a10c35fadd5 (patch)
tree1209f5cb0a780f87c41e3299b22776a221766892 /arch/arm64/kernel
parentMerge remote-tracking branch 'origin/kvm-arm64/misc-5.11' into kvmarm-master/queue (diff)
parentKVM: arm64: Fix nVHE boot on VHE systems (diff)
downloadlinux-dev-3a514592b698588326924625b6948a10c35fadd5.tar.xz
linux-dev-3a514592b698588326924625b6948a10c35fadd5.zip
Merge remote-tracking branch 'origin/kvm-arm64/psci-relay' into kvmarm-master/next
Signed-off-by: Marc Zyngier <maz@kernel.org>
Diffstat (limited to 'arch/arm64/kernel')
-rw-r--r--arch/arm64/kernel/alternative.c7
-rw-r--r--arch/arm64/kernel/armv8_deprecated.c4
-rw-r--r--arch/arm64/kernel/asm-offsets.c8
-rw-r--r--arch/arm64/kernel/cpufeature.c68
-rw-r--r--arch/arm64/kernel/entry.S19
-rw-r--r--arch/arm64/kernel/head.S180
-rw-r--r--arch/arm64/kernel/image-vars.h6
-rw-r--r--arch/arm64/kernel/process.c29
-rw-r--r--arch/arm64/kernel/proton-pack.c5
-rw-r--r--arch/arm64/kernel/sdei.c33
-rw-r--r--arch/arm64/kernel/setup.c2
-rw-r--r--arch/arm64/kernel/signal.c3
-rw-r--r--arch/arm64/kernel/sleep.S2
-rw-r--r--arch/arm64/kernel/suspend.c1
-rw-r--r--arch/arm64/kernel/vdso/Makefile2
-rw-r--r--arch/arm64/kernel/vdso32/Makefile2
-rw-r--r--arch/arm64/kernel/vmlinux.lds.S12
17 files changed, 151 insertions, 232 deletions
diff --git a/arch/arm64/kernel/alternative.c b/arch/arm64/kernel/alternative.c
index 73039949b5ce..a57cffb752e8 100644
--- a/arch/arm64/kernel/alternative.c
+++ b/arch/arm64/kernel/alternative.c
@@ -21,7 +21,8 @@
#define ALT_ORIG_PTR(a) __ALT_PTR(a, orig_offset)
#define ALT_REPL_PTR(a) __ALT_PTR(a, alt_offset)
-static int all_alternatives_applied;
+/* Volatile, as we may be patching the guts of READ_ONCE() */
+static volatile int all_alternatives_applied;
static DECLARE_BITMAP(applied_alternatives, ARM64_NCAPS);
@@ -205,7 +206,7 @@ static int __apply_alternatives_multi_stop(void *unused)
/* We always have a CPU 0 at this point (__init) */
if (smp_processor_id()) {
- while (!READ_ONCE(all_alternatives_applied))
+ while (!all_alternatives_applied)
cpu_relax();
isb();
} else {
@@ -217,7 +218,7 @@ static int __apply_alternatives_multi_stop(void *unused)
BUG_ON(all_alternatives_applied);
__apply_alternatives(&region, false, remaining_capabilities);
/* Barriers provided by the cache flushing */
- WRITE_ONCE(all_alternatives_applied, 1);
+ all_alternatives_applied = 1;
}
return 0;
diff --git a/arch/arm64/kernel/armv8_deprecated.c b/arch/arm64/kernel/armv8_deprecated.c
index 7364de008bab..0e86e8b9cedd 100644
--- a/arch/arm64/kernel/armv8_deprecated.c
+++ b/arch/arm64/kernel/armv8_deprecated.c
@@ -277,7 +277,7 @@ static void __init register_insn_emulation_sysctl(void)
#define __user_swpX_asm(data, addr, res, temp, temp2, B) \
do { \
- uaccess_enable(); \
+ uaccess_enable_privileged(); \
__asm__ __volatile__( \
" mov %w3, %w7\n" \
"0: ldxr"B" %w2, [%4]\n" \
@@ -302,7 +302,7 @@ do { \
"i" (-EFAULT), \
"i" (__SWP_LL_SC_LOOPS) \
: "memory"); \
- uaccess_disable(); \
+ uaccess_disable_privileged(); \
} while (0)
#define __user_swp_asm(data, addr, res, temp, temp2) \
diff --git a/arch/arm64/kernel/asm-offsets.c b/arch/arm64/kernel/asm-offsets.c
index 7d32fc959b1a..5e82488f1b82 100644
--- a/arch/arm64/kernel/asm-offsets.c
+++ b/arch/arm64/kernel/asm-offsets.c
@@ -30,7 +30,6 @@ int main(void)
BLANK();
DEFINE(TSK_TI_FLAGS, offsetof(struct task_struct, thread_info.flags));
DEFINE(TSK_TI_PREEMPT, offsetof(struct task_struct, thread_info.preempt_count));
- DEFINE(TSK_TI_ADDR_LIMIT, offsetof(struct task_struct, thread_info.addr_limit));
#ifdef CONFIG_ARM64_SW_TTBR0_PAN
DEFINE(TSK_TI_TTBR0, offsetof(struct task_struct, thread_info.ttbr0));
#endif
@@ -70,7 +69,7 @@ int main(void)
DEFINE(S_PSTATE, offsetof(struct pt_regs, pstate));
DEFINE(S_PC, offsetof(struct pt_regs, pc));
DEFINE(S_SYSCALLNO, offsetof(struct pt_regs, syscallno));
- DEFINE(S_ORIG_ADDR_LIMIT, offsetof(struct pt_regs, orig_addr_limit));
+ DEFINE(S_SDEI_TTBR1, offsetof(struct pt_regs, sdei_ttbr1));
DEFINE(S_PMR_SAVE, offsetof(struct pt_regs, pmr_save));
DEFINE(S_STACKFRAME, offsetof(struct pt_regs, stackframe));
DEFINE(S_FRAME_SIZE, sizeof(struct pt_regs));
@@ -110,6 +109,11 @@ int main(void)
DEFINE(CPU_APGAKEYLO_EL1, offsetof(struct kvm_cpu_context, sys_regs[APGAKEYLO_EL1]));
DEFINE(HOST_CONTEXT_VCPU, offsetof(struct kvm_cpu_context, __hyp_running_vcpu));
DEFINE(HOST_DATA_CONTEXT, offsetof(struct kvm_host_data, host_ctxt));
+ DEFINE(NVHE_INIT_MAIR_EL2, offsetof(struct kvm_nvhe_init_params, mair_el2));
+ DEFINE(NVHE_INIT_TCR_EL2, offsetof(struct kvm_nvhe_init_params, tcr_el2));
+ DEFINE(NVHE_INIT_TPIDR_EL2, offsetof(struct kvm_nvhe_init_params, tpidr_el2));
+ DEFINE(NVHE_INIT_STACK_HYP_VA, offsetof(struct kvm_nvhe_init_params, stack_hyp_va));
+ DEFINE(NVHE_INIT_PGD_PA, offsetof(struct kvm_nvhe_init_params, pgd_pa));
#endif
#ifdef CONFIG_CPU_PM
DEFINE(CPU_CTX_SP, offsetof(struct cpu_suspend_ctx, sp));
diff --git a/arch/arm64/kernel/cpufeature.c b/arch/arm64/kernel/cpufeature.c
index 280b10762f6b..d96f4554282d 100644
--- a/arch/arm64/kernel/cpufeature.c
+++ b/arch/arm64/kernel/cpufeature.c
@@ -74,6 +74,7 @@
#include <asm/cpufeature.h>
#include <asm/cpu_ops.h>
#include <asm/fpsimd.h>
+#include <asm/kvm_host.h>
#include <asm/mmu_context.h>
#include <asm/mte.h>
#include <asm/processor.h>
@@ -153,10 +154,6 @@ EXPORT_SYMBOL(cpu_hwcap_keys);
.width = 0, \
}
-/* meta feature for alternatives */
-static bool __maybe_unused
-cpufeature_pan_not_uao(const struct arm64_cpu_capabilities *entry, int __unused);
-
static void cpu_enable_cnp(struct arm64_cpu_capabilities const *cap);
static bool __system_matches_cap(unsigned int n);
@@ -1600,7 +1597,7 @@ static void cpu_enable_pan(const struct arm64_cpu_capabilities *__unused)
WARN_ON_ONCE(in_interrupt());
sysreg_clear_set(sctlr_el1, SCTLR_EL1_SPAN, 0);
- asm(SET_PSTATE_PAN(1));
+ set_pstate_pan(1);
}
#endif /* CONFIG_ARM64_PAN */
@@ -1709,6 +1706,21 @@ static void cpu_enable_mte(struct arm64_cpu_capabilities const *cap)
}
#endif /* CONFIG_ARM64_MTE */
+#ifdef CONFIG_KVM
+static bool is_kvm_protected_mode(const struct arm64_cpu_capabilities *entry, int __unused)
+{
+ if (kvm_get_mode() != KVM_MODE_PROTECTED)
+ return false;
+
+ if (is_kernel_in_hyp_mode()) {
+ pr_warn("Protected KVM not available with VHE\n");
+ return false;
+ }
+
+ return true;
+}
+#endif /* CONFIG_KVM */
+
/* Internal helper functions to match cpu capability type */
static bool
cpucap_late_cpu_optional(const struct arm64_cpu_capabilities *cap)
@@ -1770,28 +1782,6 @@ static const struct arm64_cpu_capabilities arm64_features[] = {
.type = ARM64_CPUCAP_WEAK_LOCAL_CPU_FEATURE,
.matches = has_no_hw_prefetch,
},
-#ifdef CONFIG_ARM64_UAO
- {
- .desc = "User Access Override",
- .capability = ARM64_HAS_UAO,
- .type = ARM64_CPUCAP_SYSTEM_FEATURE,
- .matches = has_cpuid_feature,
- .sys_reg = SYS_ID_AA64MMFR2_EL1,
- .field_pos = ID_AA64MMFR2_UAO_SHIFT,
- .min_field_value = 1,
- /*
- * We rely on stop_machine() calling uao_thread_switch() to set
- * UAO immediately after patching.
- */
- },
-#endif /* CONFIG_ARM64_UAO */
-#ifdef CONFIG_ARM64_PAN
- {
- .capability = ARM64_ALT_PAN_NOT_UAO,
- .type = ARM64_CPUCAP_SYSTEM_FEATURE,
- .matches = cpufeature_pan_not_uao,
- },
-#endif /* CONFIG_ARM64_PAN */
#ifdef CONFIG_ARM64_VHE
{
.desc = "Virtualization Host Extensions",
@@ -1822,6 +1812,12 @@ static const struct arm64_cpu_capabilities arm64_features[] = {
.field_pos = ID_AA64PFR0_EL1_SHIFT,
.min_field_value = ID_AA64PFR0_EL1_32BIT_64BIT,
},
+ {
+ .desc = "Protected KVM",
+ .capability = ARM64_KVM_PROTECTED_MODE,
+ .type = ARM64_CPUCAP_SYSTEM_FEATURE,
+ .matches = is_kvm_protected_mode,
+ },
#endif
{
.desc = "Kernel page table isolation (KPTI)",
@@ -2138,6 +2134,16 @@ static const struct arm64_cpu_capabilities arm64_features[] = {
.cpu_enable = cpu_enable_mte,
},
#endif /* CONFIG_ARM64_MTE */
+ {
+ .desc = "RCpc load-acquire (LDAPR)",
+ .capability = ARM64_HAS_LDAPR,
+ .type = ARM64_CPUCAP_SYSTEM_FEATURE,
+ .sys_reg = SYS_ID_AA64ISAR1_EL1,
+ .sign = FTR_UNSIGNED,
+ .field_pos = ID_AA64ISAR1_LRCPC_SHIFT,
+ .matches = has_cpuid_feature,
+ .min_field_value = 1,
+ },
{},
};
@@ -2652,7 +2658,7 @@ bool this_cpu_has_cap(unsigned int n)
* - The SYSTEM_FEATURE cpu_hwcaps may not have been set.
* In all other cases cpus_have_{const_}cap() should be used.
*/
-static bool __system_matches_cap(unsigned int n)
+static bool __maybe_unused __system_matches_cap(unsigned int n)
{
if (n < ARM64_NCAPS) {
const struct arm64_cpu_capabilities *cap = cpu_hwcaps_ptrs[n];
@@ -2732,12 +2738,6 @@ void __init setup_cpu_features(void)
ARCH_DMA_MINALIGN);
}
-static bool __maybe_unused
-cpufeature_pan_not_uao(const struct arm64_cpu_capabilities *entry, int __unused)
-{
- return (__system_matches_cap(ARM64_HAS_PAN) && !__system_matches_cap(ARM64_HAS_UAO));
-}
-
static void __maybe_unused cpu_enable_cnp(struct arm64_cpu_capabilities const *cap)
{
cpu_replace_ttbr1(lm_alias(swapper_pg_dir));
diff --git a/arch/arm64/kernel/entry.S b/arch/arm64/kernel/entry.S
index b295fb912b12..bdd3b57b12f5 100644
--- a/arch/arm64/kernel/entry.S
+++ b/arch/arm64/kernel/entry.S
@@ -216,12 +216,6 @@ alternative_else_nop_endif
.else
add x21, sp, #S_FRAME_SIZE
get_current_task tsk
- /* Save the task's original addr_limit and set USER_DS */
- ldr x20, [tsk, #TSK_TI_ADDR_LIMIT]
- str x20, [sp, #S_ORIG_ADDR_LIMIT]
- mov x20, #USER_DS
- str x20, [tsk, #TSK_TI_ADDR_LIMIT]
- /* No need to reset PSTATE.UAO, hardware's already set it to 0 for us */
.endif /* \el == 0 */
mrs x22, elr_el1
mrs x23, spsr_el1
@@ -279,12 +273,6 @@ alternative_else_nop_endif
.macro kernel_exit, el
.if \el != 0
disable_daif
-
- /* Restore the task's original addr_limit. */
- ldr x20, [sp, #S_ORIG_ADDR_LIMIT]
- str x20, [tsk, #TSK_TI_ADDR_LIMIT]
-
- /* No need to restore UAO, it will be restored from SPSR_EL1 */
.endif
/* Restore pmr */
@@ -999,10 +987,9 @@ SYM_CODE_START(__sdei_asm_entry_trampoline)
mov x4, xzr
/*
- * Use reg->interrupted_regs.addr_limit to remember whether to unmap
- * the kernel on exit.
+ * Remember whether to unmap the kernel on exit.
*/
-1: str x4, [x1, #(SDEI_EVENT_INTREGS + S_ORIG_ADDR_LIMIT)]
+1: str x4, [x1, #(SDEI_EVENT_INTREGS + S_SDEI_TTBR1)]
#ifdef CONFIG_RANDOMIZE_BASE
adr x4, tramp_vectors + PAGE_SIZE
@@ -1023,7 +1010,7 @@ NOKPROBE(__sdei_asm_entry_trampoline)
* x4: struct sdei_registered_event argument from registration time.
*/
SYM_CODE_START(__sdei_asm_exit_trampoline)
- ldr x4, [x4, #(SDEI_EVENT_INTREGS + S_ORIG_ADDR_LIMIT)]
+ ldr x4, [x4, #(SDEI_EVENT_INTREGS + S_SDEI_TTBR1)]
cbnz x4, 1f
tramp_unmap_kernel tmp=x4
diff --git a/arch/arm64/kernel/head.S b/arch/arm64/kernel/head.S
index d8d9caf02834..957683029438 100644
--- a/arch/arm64/kernel/head.S
+++ b/arch/arm64/kernel/head.S
@@ -11,7 +11,6 @@
#include <linux/linkage.h>
#include <linux/init.h>
-#include <linux/irqchip/arm-gic-v3.h>
#include <linux/pgtable.h>
#include <asm/asm_pointer_auth.h>
@@ -21,6 +20,7 @@
#include <asm/asm-offsets.h>
#include <asm/cache.h>
#include <asm/cputype.h>
+#include <asm/el2_setup.h>
#include <asm/elf.h>
#include <asm/image.h>
#include <asm/kernel-pgtable.h>
@@ -104,7 +104,7 @@ pe_header:
*/
SYM_CODE_START(primary_entry)
bl preserve_boot_args
- bl el2_setup // Drop to EL1, w0=cpu_boot_mode
+ bl init_kernel_el // w0=cpu_boot_mode
adrp x23, __PHYS_OFFSET
and x23, x23, MIN_KIMG_ALIGN - 1 // KASLR offset, defaults to 0
bl set_cpu_boot_mode_flag
@@ -482,174 +482,86 @@ EXPORT_SYMBOL(kimage_vaddr)
.section ".idmap.text","awx"
/*
- * If we're fortunate enough to boot at EL2, ensure that the world is
- * sane before dropping to EL1.
+ * Starting from EL2 or EL1, configure the CPU to execute at the highest
+ * reachable EL supported by the kernel in a chosen default state. If dropping
+ * from EL2 to EL1, configure EL2 before configuring EL1.
+ *
+ * Since we cannot always rely on ERET synchronizing writes to sysregs (e.g. if
+ * SCTLR_ELx.EOS is clear), we place an ISB prior to ERET.
*
* Returns either BOOT_CPU_MODE_EL1 or BOOT_CPU_MODE_EL2 in w0 if
* booted in EL1 or EL2 respectively.
*/
-SYM_FUNC_START(el2_setup)
- msr SPsel, #1 // We want to use SP_EL{1,2}
+SYM_FUNC_START(init_kernel_el)
mrs x0, CurrentEL
cmp x0, #CurrentEL_EL2
- b.eq 1f
- mov_q x0, (SCTLR_EL1_RES1 | ENDIAN_SET_EL1)
+ b.eq init_el2
+
+SYM_INNER_LABEL(init_el1, SYM_L_LOCAL)
+ mov_q x0, INIT_SCTLR_EL1_MMU_OFF
msr sctlr_el1, x0
- mov w0, #BOOT_CPU_MODE_EL1 // This cpu booted in EL1
isb
- ret
-
-1: mov_q x0, (SCTLR_EL2_RES1 | ENDIAN_SET_EL2)
- msr sctlr_el2, x0
+ mov_q x0, INIT_PSTATE_EL1
+ msr spsr_el1, x0
+ msr elr_el1, lr
+ mov w0, #BOOT_CPU_MODE_EL1
+ eret
+SYM_INNER_LABEL(init_el2, SYM_L_LOCAL)
#ifdef CONFIG_ARM64_VHE
/*
- * Check for VHE being present. For the rest of the EL2 setup,
- * x2 being non-zero indicates that we do have VHE, and that the
- * kernel is intended to run at EL2.
+ * Check for VHE being present. x2 being non-zero indicates that we
+ * do have VHE, and that the kernel is intended to run at EL2.
*/
mrs x2, id_aa64mmfr1_el1
ubfx x2, x2, #ID_AA64MMFR1_VHE_SHIFT, #4
#else
mov x2, xzr
#endif
+ cbz x2, init_el2_nvhe
- /* Hyp configuration. */
- mov_q x0, HCR_HOST_NVHE_FLAGS
- cbz x2, set_hcr
+ /*
+ * When VHE _is_ in use, EL1 will not be used in the host and
+ * requires no configuration, and all non-hyp-specific EL2 setup
+ * will be done via the _EL1 system register aliases in __cpu_setup.
+ */
mov_q x0, HCR_HOST_VHE_FLAGS
-set_hcr:
msr hcr_el2, x0
isb
- /*
- * Allow Non-secure EL1 and EL0 to access physical timer and counter.
- * This is not necessary for VHE, since the host kernel runs in EL2,
- * and EL0 accesses are configured in the later stage of boot process.
- * Note that when HCR_EL2.E2H == 1, CNTHCTL_EL2 has the same bit layout
- * as CNTKCTL_EL1, and CNTKCTL_EL1 accessing instructions are redefined
- * to access CNTHCTL_EL2. This allows the kernel designed to run at EL1
- * to transparently mess with the EL0 bits via CNTKCTL_EL1 access in
- * EL2.
- */
- cbnz x2, 1f
- mrs x0, cnthctl_el2
- orr x0, x0, #3 // Enable EL1 physical timers
- msr cnthctl_el2, x0
-1:
- msr cntvoff_el2, xzr // Clear virtual offset
-
-#ifdef CONFIG_ARM_GIC_V3
- /* GICv3 system register access */
- mrs x0, id_aa64pfr0_el1
- ubfx x0, x0, #ID_AA64PFR0_GIC_SHIFT, #4
- cbz x0, 3f
-
- mrs_s x0, SYS_ICC_SRE_EL2
- orr x0, x0, #ICC_SRE_EL2_SRE // Set ICC_SRE_EL2.SRE==1
- orr x0, x0, #ICC_SRE_EL2_ENABLE // Set ICC_SRE_EL2.Enable==1
- msr_s SYS_ICC_SRE_EL2, x0
- isb // Make sure SRE is now set
- mrs_s x0, SYS_ICC_SRE_EL2 // Read SRE back,
- tbz x0, #0, 3f // and check that it sticks
- msr_s SYS_ICH_HCR_EL2, xzr // Reset ICC_HCR_EL2 to defaults
-
-3:
-#endif
-
- /* Populate ID registers. */
- mrs x0, midr_el1
- mrs x1, mpidr_el1
- msr vpidr_el2, x0
- msr vmpidr_el2, x1
-
-#ifdef CONFIG_COMPAT
- msr hstr_el2, xzr // Disable CP15 traps to EL2
-#endif
-
- /* EL2 debug */
- mrs x1, id_aa64dfr0_el1
- sbfx x0, x1, #ID_AA64DFR0_PMUVER_SHIFT, #4
- cmp x0, #1
- b.lt 4f // Skip if no PMU present
- mrs x0, pmcr_el0 // Disable debug access traps
- ubfx x0, x0, #11, #5 // to EL2 and allow access to
-4:
- csel x3, xzr, x0, lt // all PMU counters from EL1
-
- /* Statistical profiling */
- ubfx x0, x1, #ID_AA64DFR0_PMSVER_SHIFT, #4
- cbz x0, 7f // Skip if SPE not present
- cbnz x2, 6f // VHE?
- mrs_s x4, SYS_PMBIDR_EL1 // If SPE available at EL2,
- and x4, x4, #(1 << SYS_PMBIDR_EL1_P_SHIFT)
- cbnz x4, 5f // then permit sampling of physical
- mov x4, #(1 << SYS_PMSCR_EL2_PCT_SHIFT | \
- 1 << SYS_PMSCR_EL2_PA_SHIFT)
- msr_s SYS_PMSCR_EL2, x4 // addresses and physical counter
-5:
- mov x1, #(MDCR_EL2_E2PB_MASK << MDCR_EL2_E2PB_SHIFT)
- orr x3, x3, x1 // If we don't have VHE, then
- b 7f // use EL1&0 translation.
-6: // For VHE, use EL2 translation
- orr x3, x3, #MDCR_EL2_TPMS // and disable access from EL1
-7:
- msr mdcr_el2, x3 // Configure debug traps
+ init_el2_state vhe
- /* LORegions */
- mrs x1, id_aa64mmfr1_el1
- ubfx x0, x1, #ID_AA64MMFR1_LOR_SHIFT, 4
- cbz x0, 1f
- msr_s SYS_LORC_EL1, xzr
-1:
-
- /* Stage-2 translation */
- msr vttbr_el2, xzr
-
- cbz x2, install_el2_stub
-
- mov w0, #BOOT_CPU_MODE_EL2 // This CPU booted in EL2
isb
- ret
-SYM_INNER_LABEL(install_el2_stub, SYM_L_LOCAL)
+ mov_q x0, INIT_PSTATE_EL2
+ msr spsr_el2, x0
+ msr elr_el2, lr
+ mov w0, #BOOT_CPU_MODE_EL2
+ eret
+
+SYM_INNER_LABEL(init_el2_nvhe, SYM_L_LOCAL)
/*
* When VHE is not in use, early init of EL2 and EL1 needs to be
* done here.
- * When VHE _is_ in use, EL1 will not be used in the host and
- * requires no configuration, and all non-hyp-specific EL2 setup
- * will be done via the _EL1 system register aliases in __cpu_setup.
*/
- mov_q x0, (SCTLR_EL1_RES1 | ENDIAN_SET_EL1)
+ mov_q x0, INIT_SCTLR_EL1_MMU_OFF
msr sctlr_el1, x0
- /* Coprocessor traps. */
- mov x0, #0x33ff
- msr cptr_el2, x0 // Disable copro. traps to EL2
-
- /* SVE register access */
- mrs x1, id_aa64pfr0_el1
- ubfx x1, x1, #ID_AA64PFR0_SVE_SHIFT, #4
- cbz x1, 7f
-
- bic x0, x0, #CPTR_EL2_TZ // Also disable SVE traps
- msr cptr_el2, x0 // Disable copro. traps to EL2
+ mov_q x0, HCR_HOST_NVHE_FLAGS
+ msr hcr_el2, x0
isb
- mov x1, #ZCR_ELx_LEN_MASK // SVE: Enable full vector
- msr_s SYS_ZCR_EL2, x1 // length for EL1.
+
+ init_el2_state nvhe
/* Hypervisor stub */
-7: adr_l x0, __hyp_stub_vectors
+ adr_l x0, __hyp_stub_vectors
msr vbar_el2, x0
+ isb
- /* spsr */
- mov x0, #(PSR_F_BIT | PSR_I_BIT | PSR_A_BIT | PSR_D_BIT |\
- PSR_MODE_EL1h)
- msr spsr_el2, x0
msr elr_el2, lr
- mov w0, #BOOT_CPU_MODE_EL2 // This CPU booted in EL2
+ mov w0, #BOOT_CPU_MODE_EL2
eret
-SYM_FUNC_END(el2_setup)
+SYM_FUNC_END(init_kernel_el)
/*
* Sets the __boot_cpu_mode flag depending on the CPU boot mode passed
@@ -699,7 +611,7 @@ SYM_DATA_END(__early_cpu_boot_status)
* cores are held until we're ready for them to initialise.
*/
SYM_FUNC_START(secondary_holding_pen)
- bl el2_setup // Drop to EL1, w0=cpu_boot_mode
+ bl init_kernel_el // w0=cpu_boot_mode
bl set_cpu_boot_mode_flag
mrs x0, mpidr_el1
mov_q x1, MPIDR_HWID_BITMASK
@@ -717,7 +629,7 @@ SYM_FUNC_END(secondary_holding_pen)
* be used where CPUs are brought online dynamically by the kernel.
*/
SYM_FUNC_START(secondary_entry)
- bl el2_setup // Drop to EL1
+ bl init_kernel_el // w0=cpu_boot_mode
bl set_cpu_boot_mode_flag
b secondary_startup
SYM_FUNC_END(secondary_entry)
diff --git a/arch/arm64/kernel/image-vars.h b/arch/arm64/kernel/image-vars.h
index 4b32588918d9..39289d75118d 100644
--- a/arch/arm64/kernel/image-vars.h
+++ b/arch/arm64/kernel/image-vars.h
@@ -77,9 +77,6 @@ KVM_NVHE_ALIAS(panic);
/* Vectors installed by hyp-init on reset HVC. */
KVM_NVHE_ALIAS(__hyp_stub_vectors);
-/* IDMAP TCR_EL1.T0SZ as computed by the EL1 init code */
-KVM_NVHE_ALIAS(idmap_t0sz);
-
/* Kernel symbol used by icache_is_vpipt(). */
KVM_NVHE_ALIAS(__icache_flags);
@@ -102,6 +99,9 @@ KVM_NVHE_ALIAS(gic_nonsecure_priorities);
KVM_NVHE_ALIAS(__start___kvm_ex_table);
KVM_NVHE_ALIAS(__stop___kvm_ex_table);
+/* Array containing bases of nVHE per-CPU memory regions. */
+KVM_NVHE_ALIAS(kvm_arm_hyp_percpu_base);
+
#endif /* CONFIG_KVM */
#endif /* __ARM64_KERNEL_IMAGE_VARS_H */
diff --git a/arch/arm64/kernel/process.c b/arch/arm64/kernel/process.c
index a47a40ec6ad9..71005cb0f4e0 100644
--- a/arch/arm64/kernel/process.c
+++ b/arch/arm64/kernel/process.c
@@ -422,16 +422,15 @@ int copy_thread(unsigned long clone_flags, unsigned long stack_start,
if (clone_flags & CLONE_SETTLS)
p->thread.uw.tp_value = tls;
} else {
+ /*
+ * A kthread has no context to ERET to, so ensure any buggy
+ * ERET is treated as an illegal exception return.
+ *
+ * When a user task is created from a kthread, childregs will
+ * be initialized by start_thread() or start_compat_thread().
+ */
memset(childregs, 0, sizeof(struct pt_regs));
- childregs->pstate = PSR_MODE_EL1h;
- if (IS_ENABLED(CONFIG_ARM64_UAO) &&
- cpus_have_const_cap(ARM64_HAS_UAO))
- childregs->pstate |= PSR_UAO_BIT;
-
- spectre_v4_enable_task_mitigation(p);
-
- if (system_uses_irq_prio_masking())
- childregs->pmr_save = GIC_PRIO_IRQON;
+ childregs->pstate = PSR_MODE_EL1h | PSR_IL_BIT;
p->thread.cpu_context.x19 = stack_start;
p->thread.cpu_context.x20 = stk_sz;
@@ -461,17 +460,6 @@ static void tls_thread_switch(struct task_struct *next)
write_sysreg(*task_user_tls(next), tpidr_el0);
}
-/* Restore the UAO state depending on next's addr_limit */
-void uao_thread_switch(struct task_struct *next)
-{
- if (IS_ENABLED(CONFIG_ARM64_UAO)) {
- if (task_thread_info(next)->addr_limit == KERNEL_DS)
- asm(ALTERNATIVE("nop", SET_PSTATE_UAO(1), ARM64_HAS_UAO));
- else
- asm(ALTERNATIVE("nop", SET_PSTATE_UAO(0), ARM64_HAS_UAO));
- }
-}
-
/*
* Force SSBS state on context-switch, since it may be lost after migrating
* from a CPU which treats the bit as RES0 in a heterogeneous system.
@@ -554,7 +542,6 @@ __notrace_funcgraph struct task_struct *__switch_to(struct task_struct *prev,
hw_breakpoint_thread_switch(next);
contextidr_thread_switch(next);
entry_task_switch(next);
- uao_thread_switch(next);
ssbs_thread_switch(next);
erratum_1418040_thread_switch(prev, next);
diff --git a/arch/arm64/kernel/proton-pack.c b/arch/arm64/kernel/proton-pack.c
index fca03648e270..902e4084c477 100644
--- a/arch/arm64/kernel/proton-pack.c
+++ b/arch/arm64/kernel/proton-pack.c
@@ -24,6 +24,7 @@
#include <linux/prctl.h>
#include <linux/sched/task_stack.h>
+#include <asm/insn.h>
#include <asm/spectre.h>
#include <asm/traps.h>
#include <asm/virt.h>
@@ -520,12 +521,12 @@ static enum mitigation_state spectre_v4_enable_hw_mitigation(void)
if (spectre_v4_mitigations_off()) {
sysreg_clear_set(sctlr_el1, 0, SCTLR_ELx_DSSBS);
- asm volatile(SET_PSTATE_SSBS(1));
+ set_pstate_ssbs(1);
return SPECTRE_VULNERABLE;
}
/* SCTLR_EL1.DSSBS was initialised to 0 during boot */
- asm volatile(SET_PSTATE_SSBS(0));
+ set_pstate_ssbs(0);
return SPECTRE_MITIGATED;
}
diff --git a/arch/arm64/kernel/sdei.c b/arch/arm64/kernel/sdei.c
index 7689f2031c0c..e04b3e90c003 100644
--- a/arch/arm64/kernel/sdei.c
+++ b/arch/arm64/kernel/sdei.c
@@ -178,12 +178,6 @@ static __kprobes unsigned long _sdei_handler(struct pt_regs *regs,
sdei_api_event_context(i, &regs->regs[i]);
}
- /*
- * We didn't take an exception to get here, set PAN. UAO will be cleared
- * by sdei_event_handler()s force_uaccess_begin() call.
- */
- __uaccess_enable_hw_pan();
-
err = sdei_event_handler(regs, arg);
if (err)
return SDEI_EV_FAILED;
@@ -222,12 +216,39 @@ static __kprobes unsigned long _sdei_handler(struct pt_regs *regs,
return vbar + 0x480;
}
+static void __kprobes notrace __sdei_pstate_entry(void)
+{
+ /*
+ * The original SDEI spec (ARM DEN 0054A) can be read ambiguously as to
+ * whether PSTATE bits are inherited unchanged or generated from
+ * scratch, and the TF-A implementation always clears PAN and always
+ * clears UAO. There are no other known implementations.
+ *
+ * Subsequent revisions (ARM DEN 0054B) follow the usual rules for how
+ * PSTATE is modified upon architectural exceptions, and so PAN is
+ * either inherited or set per SCTLR_ELx.SPAN, and UAO is always
+ * cleared.
+ *
+ * We must explicitly reset PAN to the expected state, including
+ * clearing it when the host isn't using it, in case a VM had it set.
+ */
+ if (system_uses_hw_pan())
+ set_pstate_pan(1);
+ else if (cpu_has_pan())
+ set_pstate_pan(0);
+}
asmlinkage __kprobes notrace unsigned long
__sdei_handler(struct pt_regs *regs, struct sdei_registered_event *arg)
{
unsigned long ret;
+ /*
+ * We didn't take an exception to get here, so the HW hasn't
+ * set/cleared bits in PSTATE that we may rely on. Initialize PAN.
+ */
+ __sdei_pstate_entry();
+
nmi_enter();
ret = _sdei_handler(regs, arg);
diff --git a/arch/arm64/kernel/setup.c b/arch/arm64/kernel/setup.c
index 133257ffd859..2f2973bc67c7 100644
--- a/arch/arm64/kernel/setup.c
+++ b/arch/arm64/kernel/setup.c
@@ -276,7 +276,7 @@ arch_initcall(reserve_memblock_reserved_regions);
u64 __cpu_logical_map[NR_CPUS] = { [0 ... NR_CPUS-1] = INVALID_HWID };
-u64 cpu_logical_map(int cpu)
+u64 cpu_logical_map(unsigned int cpu)
{
return __cpu_logical_map[cpu];
}
diff --git a/arch/arm64/kernel/signal.c b/arch/arm64/kernel/signal.c
index a8184cad8890..af5c6c6638f7 100644
--- a/arch/arm64/kernel/signal.c
+++ b/arch/arm64/kernel/signal.c
@@ -922,9 +922,6 @@ asmlinkage void do_notify_resume(struct pt_regs *regs,
trace_hardirqs_off();
do {
- /* Check valid user FS if needed */
- addr_limit_user_check();
-
if (thread_flags & _TIF_NEED_RESCHED) {
/* Unmask Debug and SError for the next task */
local_daif_restore(DAIF_PROCCTX_NOIRQ);
diff --git a/arch/arm64/kernel/sleep.S b/arch/arm64/kernel/sleep.S
index ba40d57757d6..4be7f7eed875 100644
--- a/arch/arm64/kernel/sleep.S
+++ b/arch/arm64/kernel/sleep.S
@@ -99,7 +99,7 @@ SYM_FUNC_END(__cpu_suspend_enter)
.pushsection ".idmap.text", "awx"
SYM_CODE_START(cpu_resume)
- bl el2_setup // if in EL2 drop to EL1 cleanly
+ bl init_kernel_el
bl __cpu_setup
/* enable the MMU early - so we can access sleep_save_stash by va */
adrp x1, swapper_pg_dir
diff --git a/arch/arm64/kernel/suspend.c b/arch/arm64/kernel/suspend.c
index 96cd347c7a46..a67b37a7a47e 100644
--- a/arch/arm64/kernel/suspend.c
+++ b/arch/arm64/kernel/suspend.c
@@ -58,7 +58,6 @@ void notrace __cpu_suspend_exit(void)
* features that might not have been set correctly.
*/
__uaccess_enable_hw_pan();
- uao_thread_switch(current);
/*
* Restore HW breakpoint registers to sane values
diff --git a/arch/arm64/kernel/vdso/Makefile b/arch/arm64/kernel/vdso/Makefile
index d65f52264aba..a8f8e409e2bf 100644
--- a/arch/arm64/kernel/vdso/Makefile
+++ b/arch/arm64/kernel/vdso/Makefile
@@ -28,7 +28,7 @@ ldflags-y := -shared -nostdlib -soname=linux-vdso.so.1 --hash-style=sysv \
$(btildflags-y) -T
ccflags-y := -fno-common -fno-builtin -fno-stack-protector -ffixed-x18
-ccflags-y += -DDISABLE_BRANCH_PROFILING
+ccflags-y += -DDISABLE_BRANCH_PROFILING -DBUILD_VDSO
CFLAGS_REMOVE_vgettimeofday.o = $(CC_FLAGS_FTRACE) -Os $(CC_FLAGS_SCS) $(GCC_PLUGINS_CFLAGS)
KASAN_SANITIZE := n
diff --git a/arch/arm64/kernel/vdso32/Makefile b/arch/arm64/kernel/vdso32/Makefile
index 79280c53b9a6..a1e0f91e6cea 100644
--- a/arch/arm64/kernel/vdso32/Makefile
+++ b/arch/arm64/kernel/vdso32/Makefile
@@ -48,7 +48,7 @@ cc32-as-instr = $(call try-run,\
# As a result we set our own flags here.
# KBUILD_CPPFLAGS and NOSTDINC_FLAGS from top-level Makefile
-VDSO_CPPFLAGS := -D__KERNEL__ -nostdinc -isystem $(shell $(CC_COMPAT) -print-file-name=include)
+VDSO_CPPFLAGS := -DBUILD_VDSO -D__KERNEL__ -nostdinc -isystem $(shell $(CC_COMPAT) -print-file-name=include)
VDSO_CPPFLAGS += $(LINUXINCLUDE)
# Common C and assembly flags
diff --git a/arch/arm64/kernel/vmlinux.lds.S b/arch/arm64/kernel/vmlinux.lds.S
index 1bda604f4c70..43af13968dfd 100644
--- a/arch/arm64/kernel/vmlinux.lds.S
+++ b/arch/arm64/kernel/vmlinux.lds.S
@@ -30,6 +30,13 @@ jiffies = jiffies_64;
*(__kvm_ex_table) \
__stop___kvm_ex_table = .;
+#define HYPERVISOR_DATA_SECTIONS \
+ HYP_SECTION_NAME(.data..ro_after_init) : { \
+ __hyp_data_ro_after_init_start = .; \
+ *(HYP_SECTION_NAME(.data..ro_after_init)) \
+ __hyp_data_ro_after_init_end = .; \
+ }
+
#define HYPERVISOR_PERCPU_SECTION \
. = ALIGN(PAGE_SIZE); \
HYP_SECTION_NAME(.data..percpu) : { \
@@ -37,6 +44,7 @@ jiffies = jiffies_64;
}
#else /* CONFIG_KVM */
#define HYPERVISOR_EXTABLE
+#define HYPERVISOR_DATA_SECTIONS
#define HYPERVISOR_PERCPU_SECTION
#endif
@@ -201,7 +209,7 @@ SECTIONS
INIT_CALLS
CON_INITCALL
INIT_RAM_FS
- *(.init.rodata.* .init.bss) /* from the EFI stub */
+ *(.init.altinstructions .init.rodata.* .init.bss) /* from the EFI stub */
}
.exit.data : {
EXIT_DATA
@@ -234,6 +242,8 @@ SECTIONS
_sdata = .;
RW_DATA(L1_CACHE_BYTES, PAGE_SIZE, THREAD_ALIGN)
+ HYPERVISOR_DATA_SECTIONS
+
/*
* Data written with the MMU off but read with the MMU on requires
* cache lines to be invalidated, discarding up to a Cache Writeback