aboutsummaryrefslogtreecommitdiffstats
path: root/arch/arm64/kernel
diff options
context:
space:
mode:
Diffstat (limited to 'arch/arm64/kernel')
-rw-r--r--arch/arm64/kernel/Makefile7
-rw-r--r--arch/arm64/kernel/acpi_numa.c12
-rw-r--r--arch/arm64/kernel/alternative.c2
-rw-r--r--arch/arm64/kernel/asm-offsets.c3
-rw-r--r--arch/arm64/kernel/cpu_errata.c2
-rw-r--r--arch/arm64/kernel/cpufeature.c75
-rw-r--r--arch/arm64/kernel/entry-common.c54
-rw-r--r--arch/arm64/kernel/entry.S14
-rw-r--r--arch/arm64/kernel/head.S84
-rw-r--r--arch/arm64/kernel/hibernate.c271
-rw-r--r--arch/arm64/kernel/hyp-stub.S129
-rw-r--r--arch/arm64/kernel/idreg-override.c219
-rw-r--r--arch/arm64/kernel/image-vars.h4
-rw-r--r--arch/arm64/kernel/kaslr.c43
-rw-r--r--arch/arm64/kernel/machine_kexec.c57
-rw-r--r--arch/arm64/kernel/machine_kexec_file.c4
-rw-r--r--arch/arm64/kernel/module-plts.c2
-rw-r--r--arch/arm64/kernel/mte.c58
-rw-r--r--arch/arm64/kernel/perf_event.c17
-rw-r--r--arch/arm64/kernel/probes/uprobes.c2
-rw-r--r--arch/arm64/kernel/process.c6
-rw-r--r--arch/arm64/kernel/ptrace.c3
-rw-r--r--arch/arm64/kernel/relocate_kernel.S48
-rw-r--r--arch/arm64/kernel/setup.c15
-rw-r--r--arch/arm64/kernel/sleep.S1
-rw-r--r--arch/arm64/kernel/smp.c4
-rw-r--r--arch/arm64/kernel/stacktrace.c13
-rw-r--r--arch/arm64/kernel/suspend.c2
-rw-r--r--arch/arm64/kernel/syscall.c30
-rw-r--r--arch/arm64/kernel/topology.c115
-rw-r--r--arch/arm64/kernel/traps.c2
-rw-r--r--arch/arm64/kernel/vdso-wrap.S (renamed from arch/arm64/kernel/vdso/vdso.S)0
-rw-r--r--arch/arm64/kernel/vdso/Makefile4
-rwxr-xr-xarch/arm64/kernel/vdso/gen_vdso_offsets.sh2
-rw-r--r--arch/arm64/kernel/vdso32-wrap.S (renamed from arch/arm64/kernel/vdso32/vdso.S)0
-rw-r--r--arch/arm64/kernel/vdso32/Makefile1
-rw-r--r--arch/arm64/kernel/vmlinux.lds.S26
37 files changed, 705 insertions, 626 deletions
diff --git a/arch/arm64/kernel/Makefile b/arch/arm64/kernel/Makefile
index 86364ab6f13f..ed65576ce710 100644
--- a/arch/arm64/kernel/Makefile
+++ b/arch/arm64/kernel/Makefile
@@ -17,7 +17,7 @@ obj-y := debug-monitors.o entry.o irq.o fpsimd.o \
return_address.o cpuinfo.o cpu_errata.o \
cpufeature.o alternative.o cacheinfo.o \
smp.o smp_spin_table.o topology.o smccc-call.o \
- syscall.o proton-pack.o
+ syscall.o proton-pack.o idreg-override.o
targets += efi-entry.o
@@ -59,9 +59,10 @@ obj-$(CONFIG_CRASH_CORE) += crash_core.o
obj-$(CONFIG_ARM_SDE_INTERFACE) += sdei.o
obj-$(CONFIG_ARM64_PTR_AUTH) += pointer_auth.o
obj-$(CONFIG_ARM64_MTE) += mte.o
+obj-y += vdso-wrap.o
+obj-$(CONFIG_COMPAT_VDSO) += vdso32-wrap.o
-obj-y += vdso/ probes/
-obj-$(CONFIG_COMPAT_VDSO) += vdso32/
+obj-y += probes/
head-y := head.o
extra-y += $(head-y) vmlinux.lds
diff --git a/arch/arm64/kernel/acpi_numa.c b/arch/arm64/kernel/acpi_numa.c
index 7ff800045434..fdfecf0991ce 100644
--- a/arch/arm64/kernel/acpi_numa.c
+++ b/arch/arm64/kernel/acpi_numa.c
@@ -118,15 +118,3 @@ void __init acpi_numa_gicc_affinity_init(struct acpi_srat_gicc_affinity *pa)
node_set(node, numa_nodes_parsed);
}
-int __init arm64_acpi_numa_init(void)
-{
- int ret;
-
- ret = acpi_numa_init();
- if (ret) {
- pr_info("Failed to initialise from firmware\n");
- return ret;
- }
-
- return srat_disabled() ? -EINVAL : 0;
-}
diff --git a/arch/arm64/kernel/alternative.c b/arch/arm64/kernel/alternative.c
index a57cffb752e8..1184c44ea2c7 100644
--- a/arch/arm64/kernel/alternative.c
+++ b/arch/arm64/kernel/alternative.c
@@ -17,7 +17,7 @@
#include <asm/sections.h>
#include <linux/stop_machine.h>
-#define __ALT_PTR(a,f) ((void *)&(a)->f + (a)->f)
+#define __ALT_PTR(a, f) ((void *)&(a)->f + (a)->f)
#define ALT_ORIG_PTR(a) __ALT_PTR(a, orig_offset)
#define ALT_REPL_PTR(a) __ALT_PTR(a, alt_offset)
diff --git a/arch/arm64/kernel/asm-offsets.c b/arch/arm64/kernel/asm-offsets.c
index 301784463587..a36e2fc330d4 100644
--- a/arch/arm64/kernel/asm-offsets.c
+++ b/arch/arm64/kernel/asm-offsets.c
@@ -99,6 +99,9 @@ int main(void)
DEFINE(CPU_BOOT_STACK, offsetof(struct secondary_data, stack));
DEFINE(CPU_BOOT_TASK, offsetof(struct secondary_data, task));
BLANK();
+ DEFINE(FTR_OVR_VAL_OFFSET, offsetof(struct arm64_ftr_override, val));
+ DEFINE(FTR_OVR_MASK_OFFSET, offsetof(struct arm64_ftr_override, mask));
+ BLANK();
#ifdef CONFIG_KVM
DEFINE(VCPU_CONTEXT, offsetof(struct kvm_vcpu, arch.ctxt));
DEFINE(VCPU_FAULT_DISR, offsetof(struct kvm_vcpu, arch.fault.disr_el1));
diff --git a/arch/arm64/kernel/cpu_errata.c b/arch/arm64/kernel/cpu_errata.c
index a63428301f42..506a1cd37973 100644
--- a/arch/arm64/kernel/cpu_errata.c
+++ b/arch/arm64/kernel/cpu_errata.c
@@ -107,8 +107,6 @@ cpu_enable_trap_ctr_access(const struct arm64_cpu_capabilities *cap)
}
#ifdef CONFIG_ARM64_ERRATUM_1463225
-DEFINE_PER_CPU(int, __in_cortex_a76_erratum_1463225_wa);
-
static bool
has_cortex_a76_erratum_1463225(const struct arm64_cpu_capabilities *entry,
int scope)
diff --git a/arch/arm64/kernel/cpufeature.c b/arch/arm64/kernel/cpufeature.c
index 3e6331b64932..066030717a4c 100644
--- a/arch/arm64/kernel/cpufeature.c
+++ b/arch/arm64/kernel/cpufeature.c
@@ -352,9 +352,12 @@ static const struct arm64_ftr_bits ftr_ctr[] = {
ARM64_FTR_END,
};
+static struct arm64_ftr_override __ro_after_init no_override = { };
+
struct arm64_ftr_reg arm64_ftr_reg_ctrel0 = {
.name = "SYS_CTR_EL0",
- .ftr_bits = ftr_ctr
+ .ftr_bits = ftr_ctr,
+ .override = &no_override,
};
static const struct arm64_ftr_bits ftr_id_mmfr0[] = {
@@ -544,13 +547,20 @@ static const struct arm64_ftr_bits ftr_raz[] = {
ARM64_FTR_END,
};
-#define ARM64_FTR_REG(id, table) { \
- .sys_id = id, \
- .reg = &(struct arm64_ftr_reg){ \
- .name = #id, \
- .ftr_bits = &((table)[0]), \
+#define ARM64_FTR_REG_OVERRIDE(id, table, ovr) { \
+ .sys_id = id, \
+ .reg = &(struct arm64_ftr_reg){ \
+ .name = #id, \
+ .override = (ovr), \
+ .ftr_bits = &((table)[0]), \
}}
+#define ARM64_FTR_REG(id, table) ARM64_FTR_REG_OVERRIDE(id, table, &no_override)
+
+struct arm64_ftr_override __ro_after_init id_aa64mmfr1_override;
+struct arm64_ftr_override __ro_after_init id_aa64pfr1_override;
+struct arm64_ftr_override __ro_after_init id_aa64isar1_override;
+
static const struct __ftr_reg_entry {
u32 sys_id;
struct arm64_ftr_reg *reg;
@@ -585,7 +595,8 @@ static const struct __ftr_reg_entry {
/* Op1 = 0, CRn = 0, CRm = 4 */
ARM64_FTR_REG(SYS_ID_AA64PFR0_EL1, ftr_id_aa64pfr0),
- ARM64_FTR_REG(SYS_ID_AA64PFR1_EL1, ftr_id_aa64pfr1),
+ ARM64_FTR_REG_OVERRIDE(SYS_ID_AA64PFR1_EL1, ftr_id_aa64pfr1,
+ &id_aa64pfr1_override),
ARM64_FTR_REG(SYS_ID_AA64ZFR0_EL1, ftr_id_aa64zfr0),
/* Op1 = 0, CRn = 0, CRm = 5 */
@@ -594,11 +605,13 @@ static const struct __ftr_reg_entry {
/* Op1 = 0, CRn = 0, CRm = 6 */
ARM64_FTR_REG(SYS_ID_AA64ISAR0_EL1, ftr_id_aa64isar0),
- ARM64_FTR_REG(SYS_ID_AA64ISAR1_EL1, ftr_id_aa64isar1),
+ ARM64_FTR_REG_OVERRIDE(SYS_ID_AA64ISAR1_EL1, ftr_id_aa64isar1,
+ &id_aa64isar1_override),
/* Op1 = 0, CRn = 0, CRm = 7 */
ARM64_FTR_REG(SYS_ID_AA64MMFR0_EL1, ftr_id_aa64mmfr0),
- ARM64_FTR_REG(SYS_ID_AA64MMFR1_EL1, ftr_id_aa64mmfr1),
+ ARM64_FTR_REG_OVERRIDE(SYS_ID_AA64MMFR1_EL1, ftr_id_aa64mmfr1,
+ &id_aa64mmfr1_override),
ARM64_FTR_REG(SYS_ID_AA64MMFR2_EL1, ftr_id_aa64mmfr2),
/* Op1 = 0, CRn = 1, CRm = 2 */
@@ -770,6 +783,33 @@ static void __init init_cpu_ftr_reg(u32 sys_reg, u64 new)
for (ftrp = reg->ftr_bits; ftrp->width; ftrp++) {
u64 ftr_mask = arm64_ftr_mask(ftrp);
s64 ftr_new = arm64_ftr_value(ftrp, new);
+ s64 ftr_ovr = arm64_ftr_value(ftrp, reg->override->val);
+
+ if ((ftr_mask & reg->override->mask) == ftr_mask) {
+ s64 tmp = arm64_ftr_safe_value(ftrp, ftr_ovr, ftr_new);
+ char *str = NULL;
+
+ if (ftr_ovr != tmp) {
+ /* Unsafe, remove the override */
+ reg->override->mask &= ~ftr_mask;
+ reg->override->val &= ~ftr_mask;
+ tmp = ftr_ovr;
+ str = "ignoring override";
+ } else if (ftr_new != tmp) {
+ /* Override was valid */
+ ftr_new = tmp;
+ str = "forced";
+ } else if (ftr_ovr == tmp) {
+ /* Override was the safe value */
+ str = "already set";
+ }
+
+ if (str)
+ pr_warn("%s[%d:%d]: %s to %llx\n",
+ reg->name,
+ ftrp->shift + ftrp->width - 1,
+ ftrp->shift, str, tmp);
+ }
val = arm64_ftr_set_value(ftrp, val, ftr_new);
@@ -1115,14 +1155,17 @@ u64 read_sanitised_ftr_reg(u32 id)
EXPORT_SYMBOL_GPL(read_sanitised_ftr_reg);
#define read_sysreg_case(r) \
- case r: return read_sysreg_s(r)
+ case r: val = read_sysreg_s(r); break;
/*
* __read_sysreg_by_encoding() - Used by a STARTING cpu before cpuinfo is populated.
* Read the system register on the current CPU
*/
-static u64 __read_sysreg_by_encoding(u32 sys_id)
+u64 __read_sysreg_by_encoding(u32 sys_id)
{
+ struct arm64_ftr_reg *regp;
+ u64 val;
+
switch (sys_id) {
read_sysreg_case(SYS_ID_PFR0_EL1);
read_sysreg_case(SYS_ID_PFR1_EL1);
@@ -1165,6 +1208,14 @@ static u64 __read_sysreg_by_encoding(u32 sys_id)
BUG();
return 0;
}
+
+ regp = get_arm64_ftr_reg(sys_id);
+ if (regp) {
+ val &= ~regp->override->mask;
+ val |= (regp->override->val & regp->override->mask);
+ }
+
+ return val;
}
#include <linux/irqchip/arm-gic-v3.h>
@@ -1455,7 +1506,7 @@ static bool cpu_has_broken_dbm(void)
/* List of CPUs which have broken DBM support. */
static const struct midr_range cpus[] = {
#ifdef CONFIG_ARM64_ERRATUM_1024718
- MIDR_RANGE(MIDR_CORTEX_A55, 0, 0, 1, 0), // A55 r0p0 -r1p0
+ MIDR_ALL_VERSIONS(MIDR_CORTEX_A55),
/* Kryo4xx Silver (rdpe => r1p0) */
MIDR_REV(MIDR_QCOM_KRYO_4XX_SILVER, 0xd, 0xe),
#endif
diff --git a/arch/arm64/kernel/entry-common.c b/arch/arm64/kernel/entry-common.c
index 5346953e4382..9d3588450473 100644
--- a/arch/arm64/kernel/entry-common.c
+++ b/arch/arm64/kernel/entry-common.c
@@ -109,6 +109,55 @@ asmlinkage void noinstr exit_el1_irq_or_nmi(struct pt_regs *regs)
exit_to_kernel_mode(regs);
}
+#ifdef CONFIG_ARM64_ERRATUM_1463225
+static DEFINE_PER_CPU(int, __in_cortex_a76_erratum_1463225_wa);
+
+static void cortex_a76_erratum_1463225_svc_handler(void)
+{
+ u32 reg, val;
+
+ if (!unlikely(test_thread_flag(TIF_SINGLESTEP)))
+ return;
+
+ if (!unlikely(this_cpu_has_cap(ARM64_WORKAROUND_1463225)))
+ return;
+
+ __this_cpu_write(__in_cortex_a76_erratum_1463225_wa, 1);
+ reg = read_sysreg(mdscr_el1);
+ val = reg | DBG_MDSCR_SS | DBG_MDSCR_KDE;
+ write_sysreg(val, mdscr_el1);
+ asm volatile("msr daifclr, #8");
+ isb();
+
+ /* We will have taken a single-step exception by this point */
+
+ write_sysreg(reg, mdscr_el1);
+ __this_cpu_write(__in_cortex_a76_erratum_1463225_wa, 0);
+}
+
+static bool cortex_a76_erratum_1463225_debug_handler(struct pt_regs *regs)
+{
+ if (!__this_cpu_read(__in_cortex_a76_erratum_1463225_wa))
+ return false;
+
+ /*
+ * We've taken a dummy step exception from the kernel to ensure
+ * that interrupts are re-enabled on the syscall path. Return back
+ * to cortex_a76_erratum_1463225_svc_handler() with debug exceptions
+ * masked so that we can safely restore the mdscr and get on with
+ * handling the syscall.
+ */
+ regs->pstate |= PSR_D_BIT;
+ return true;
+}
+#else /* CONFIG_ARM64_ERRATUM_1463225 */
+static void cortex_a76_erratum_1463225_svc_handler(void) { }
+static bool cortex_a76_erratum_1463225_debug_handler(struct pt_regs *regs)
+{
+ return false;
+}
+#endif /* CONFIG_ARM64_ERRATUM_1463225 */
+
static void noinstr el1_abort(struct pt_regs *regs, unsigned long esr)
{
unsigned long far = read_sysreg(far_el1);
@@ -186,7 +235,8 @@ static void noinstr el1_dbg(struct pt_regs *regs, unsigned long esr)
gic_write_pmr(GIC_PRIO_IRQON | GIC_PRIO_PSR_I_SET);
arm64_enter_el1_dbg(regs);
- do_debug_exception(far, esr, regs);
+ if (!cortex_a76_erratum_1463225_debug_handler(regs))
+ do_debug_exception(far, esr, regs);
arm64_exit_el1_dbg(regs);
}
@@ -362,6 +412,7 @@ static void noinstr el0_svc(struct pt_regs *regs)
gic_write_pmr(GIC_PRIO_IRQON | GIC_PRIO_PSR_I_SET);
enter_from_user_mode();
+ cortex_a76_erratum_1463225_svc_handler();
do_el0_svc(regs);
}
@@ -439,6 +490,7 @@ static void noinstr el0_svc_compat(struct pt_regs *regs)
gic_write_pmr(GIC_PRIO_IRQON | GIC_PRIO_PSR_I_SET);
enter_from_user_mode();
+ cortex_a76_erratum_1463225_svc_handler();
do_el0_svc_compat(regs);
}
diff --git a/arch/arm64/kernel/entry.S b/arch/arm64/kernel/entry.S
index c9bae73f2621..a31a0a713c85 100644
--- a/arch/arm64/kernel/entry.S
+++ b/arch/arm64/kernel/entry.S
@@ -261,16 +261,16 @@ alternative_else_nop_endif
stp lr, x21, [sp, #S_LR]
/*
- * In order to be able to dump the contents of struct pt_regs at the
- * time the exception was taken (in case we attempt to walk the call
- * stack later), chain it together with the stack frames.
+ * For exceptions from EL0, terminate the callchain here.
+ * For exceptions from EL1, create a synthetic frame record so the
+ * interrupted code shows up in the backtrace.
*/
.if \el == 0
- stp xzr, xzr, [sp, #S_STACKFRAME]
+ mov x29, xzr
.else
stp x29, x22, [sp, #S_STACKFRAME]
- .endif
add x29, sp, #S_STACKFRAME
+ .endif
#ifdef CONFIG_ARM64_SW_TTBR0_PAN
alternative_if_not ARM64_HAS_PAN
@@ -805,7 +805,7 @@ SYM_CODE_END(ret_to_user)
// Move from tramp_pg_dir to swapper_pg_dir
.macro tramp_map_kernel, tmp
mrs \tmp, ttbr1_el1
- add \tmp, \tmp, #(2 * PAGE_SIZE)
+ add \tmp, \tmp, #TRAMP_SWAPPER_OFFSET
bic \tmp, \tmp, #USER_ASID_FLAG
msr ttbr1_el1, \tmp
#ifdef CONFIG_QCOM_FALKOR_ERRATUM_1003
@@ -825,7 +825,7 @@ alternative_else_nop_endif
// Move from swapper_pg_dir to tramp_pg_dir
.macro tramp_unmap_kernel, tmp
mrs \tmp, ttbr1_el1
- sub \tmp, \tmp, #(2 * PAGE_SIZE)
+ sub \tmp, \tmp, #TRAMP_SWAPPER_OFFSET
orr \tmp, \tmp, #USER_ASID_FLAG
msr ttbr1_el1, \tmp
/*
diff --git a/arch/arm64/kernel/head.S b/arch/arm64/kernel/head.S
index a0dc987724ed..840bda1869e9 100644
--- a/arch/arm64/kernel/head.S
+++ b/arch/arm64/kernel/head.S
@@ -319,7 +319,7 @@ SYM_FUNC_START_LOCAL(__create_page_tables)
*/
adrp x5, __idmap_text_end
clz x5, x5
- cmp x5, TCR_T0SZ(VA_BITS) // default T0SZ small enough?
+ cmp x5, TCR_T0SZ(VA_BITS_MIN) // default T0SZ small enough?
b.ge 1f // .. then skip VA range extension
adr_l x6, idmap_t0sz
@@ -404,10 +404,6 @@ SYM_FUNC_START_LOCAL(__primary_switched)
adr_l x5, init_task
msr sp_el0, x5 // Save thread_info
-#ifdef CONFIG_ARM64_PTR_AUTH
- __ptrauth_keys_init_cpu x5, x6, x7, x8
-#endif
-
adr_l x8, vectors // load VBAR_EL1 with virtual
msr vbar_el1, x8 // vector table address
isb
@@ -436,10 +432,12 @@ SYM_FUNC_START_LOCAL(__primary_switched)
#if defined(CONFIG_KASAN_GENERIC) || defined(CONFIG_KASAN_SW_TAGS)
bl kasan_early_init
#endif
+ mov x0, x21 // pass FDT address in x0
+ bl early_fdt_map // Try mapping the FDT early
+ bl init_feature_override // Parse cpu feature overrides
#ifdef CONFIG_RANDOMIZE_BASE
tst x23, ~(MIN_KIMG_ALIGN - 1) // already running randomized?
b.ne 0f
- mov x0, x21 // pass FDT address in x0
bl kaslr_early_init // parse FDT for KASLR options
cbz x0, 0f // KASLR disabled? just proceed
orr x23, x23, x0 // record KASLR offset
@@ -447,6 +445,7 @@ SYM_FUNC_START_LOCAL(__primary_switched)
ret // to __primary_switch()
0:
#endif
+ bl switch_to_vhe // Prefer VHE if possible
add sp, sp, #16
mov x29, #0
mov x30, #0
@@ -478,13 +477,14 @@ EXPORT_SYMBOL(kimage_vaddr)
* booted in EL1 or EL2 respectively.
*/
SYM_FUNC_START(init_kernel_el)
+ mov_q x0, INIT_SCTLR_EL1_MMU_OFF
+ msr sctlr_el1, x0
+
mrs x0, CurrentEL
cmp x0, #CurrentEL_EL2
b.eq init_el2
SYM_INNER_LABEL(init_el1, SYM_L_LOCAL)
- mov_q x0, INIT_SCTLR_EL1_MMU_OFF
- msr sctlr_el1, x0
isb
mov_q x0, INIT_PSTATE_EL1
msr spsr_el1, x0
@@ -493,50 +493,11 @@ SYM_INNER_LABEL(init_el1, SYM_L_LOCAL)
eret
SYM_INNER_LABEL(init_el2, SYM_L_LOCAL)
-#ifdef CONFIG_ARM64_VHE
- /*
- * Check for VHE being present. x2 being non-zero indicates that we
- * do have VHE, and that the kernel is intended to run at EL2.
- */
- mrs x2, id_aa64mmfr1_el1
- ubfx x2, x2, #ID_AA64MMFR1_VHE_SHIFT, #4
-#else
- mov x2, xzr
-#endif
- cbz x2, init_el2_nvhe
-
- /*
- * When VHE _is_ in use, EL1 will not be used in the host and
- * requires no configuration, and all non-hyp-specific EL2 setup
- * will be done via the _EL1 system register aliases in __cpu_setup.
- */
- mov_q x0, HCR_HOST_VHE_FLAGS
- msr hcr_el2, x0
- isb
-
- init_el2_state vhe
-
- isb
-
- mov_q x0, INIT_PSTATE_EL2
- msr spsr_el2, x0
- msr elr_el2, lr
- mov w0, #BOOT_CPU_MODE_EL2
- eret
-
-SYM_INNER_LABEL(init_el2_nvhe, SYM_L_LOCAL)
- /*
- * When VHE is not in use, early init of EL2 and EL1 needs to be
- * done here.
- */
- mov_q x0, INIT_SCTLR_EL1_MMU_OFF
- msr sctlr_el1, x0
-
mov_q x0, HCR_HOST_NVHE_FLAGS
msr hcr_el2, x0
isb
- init_el2_state nvhe
+ init_el2_state
/* Hypervisor stub */
adr_l x0, __hyp_stub_vectors
@@ -623,6 +584,7 @@ SYM_FUNC_START_LOCAL(secondary_startup)
/*
* Common entry point for secondary CPUs.
*/
+ bl switch_to_vhe
bl __cpu_secondary_check52bitva
bl __cpu_setup // initialise processor
adrp x1, swapper_pg_dir
@@ -693,8 +655,10 @@ SYM_FUNC_END(__secondary_too_slow)
SYM_FUNC_START(__enable_mmu)
mrs x2, ID_AA64MMFR0_EL1
ubfx x2, x2, #ID_AA64MMFR0_TGRAN_SHIFT, 4
- cmp x2, #ID_AA64MMFR0_TGRAN_SUPPORTED
- b.ne __no_granule_support
+ cmp x2, #ID_AA64MMFR0_TGRAN_SUPPORTED_MIN
+ b.lt __no_granule_support
+ cmp x2, #ID_AA64MMFR0_TGRAN_SUPPORTED_MAX
+ b.gt __no_granule_support
update_early_cpu_boot_status 0, x2, x3
adrp x2, idmap_pg_dir
phys_to_ttbr x1, x1
@@ -703,16 +667,9 @@ SYM_FUNC_START(__enable_mmu)
offset_ttbr1 x1, x3
msr ttbr1_el1, x1 // load TTBR1
isb
- msr sctlr_el1, x0
- isb
- /*
- * Invalidate the local I-cache so that any instructions fetched
- * speculatively from the PoC are discarded, since they may have
- * been dynamically patched at the PoU.
- */
- ic iallu
- dsb nsh
- isb
+
+ set_sctlr_el1 x0
+
ret
SYM_FUNC_END(__enable_mmu)
@@ -882,13 +839,10 @@ SYM_FUNC_START_LOCAL(__primary_switch)
tlbi vmalle1 // Remove any stale TLB entries
dsb nsh
-
- msr sctlr_el1, x19 // re-enable the MMU
- isb
- ic iallu // flush instructions fetched
- dsb nsh // via old mapping
isb
+ set_sctlr_el1 x19 // re-enable the MMU
+
bl __relocate_kernel
#endif
#endif
diff --git a/arch/arm64/kernel/hibernate.c b/arch/arm64/kernel/hibernate.c
index 9c9f47e9f7f4..b1cef371df2b 100644
--- a/arch/arm64/kernel/hibernate.c
+++ b/arch/arm64/kernel/hibernate.c
@@ -16,7 +16,6 @@
#define pr_fmt(x) "hibernate: " x
#include <linux/cpu.h>
#include <linux/kvm_host.h>
-#include <linux/mm.h>
#include <linux/pm.h>
#include <linux/sched.h>
#include <linux/suspend.h>
@@ -31,13 +30,12 @@
#include <asm/memory.h>
#include <asm/mmu_context.h>
#include <asm/mte.h>
-#include <asm/pgalloc.h>
-#include <asm/pgtable-hwdef.h>
#include <asm/sections.h>
#include <asm/smp.h>
#include <asm/smp_plat.h>
#include <asm/suspend.h>
#include <asm/sysreg.h>
+#include <asm/trans_pgd.h>
#include <asm/virt.h>
/*
@@ -178,52 +176,9 @@ int arch_hibernation_header_restore(void *addr)
}
EXPORT_SYMBOL(arch_hibernation_header_restore);
-static int trans_pgd_map_page(pgd_t *trans_pgd, void *page,
- unsigned long dst_addr,
- pgprot_t pgprot)
+static void *hibernate_page_alloc(void *arg)
{
- pgd_t *pgdp;
- p4d_t *p4dp;
- pud_t *pudp;
- pmd_t *pmdp;
- pte_t *ptep;
-
- pgdp = pgd_offset_pgd(trans_pgd, dst_addr);
- if (pgd_none(READ_ONCE(*pgdp))) {
- pudp = (void *)get_safe_page(GFP_ATOMIC);
- if (!pudp)
- return -ENOMEM;
- pgd_populate(&init_mm, pgdp, pudp);
- }
-
- p4dp = p4d_offset(pgdp, dst_addr);
- if (p4d_none(READ_ONCE(*p4dp))) {
- pudp = (void *)get_safe_page(GFP_ATOMIC);
- if (!pudp)
- return -ENOMEM;
- p4d_populate(&init_mm, p4dp, pudp);
- }
-
- pudp = pud_offset(p4dp, dst_addr);
- if (pud_none(READ_ONCE(*pudp))) {
- pmdp = (void *)get_safe_page(GFP_ATOMIC);
- if (!pmdp)
- return -ENOMEM;
- pud_populate(&init_mm, pudp, pmdp);
- }
-
- pmdp = pmd_offset(pudp, dst_addr);
- if (pmd_none(READ_ONCE(*pmdp))) {
- ptep = (void *)get_safe_page(GFP_ATOMIC);
- if (!ptep)
- return -ENOMEM;
- pmd_populate_kernel(&init_mm, pmdp, ptep);
- }
-
- ptep = pte_offset_kernel(pmdp, dst_addr);
- set_pte(ptep, pfn_pte(virt_to_pfn(page), PAGE_KERNEL_EXEC));
-
- return 0;
+ return (void *)get_safe_page((__force gfp_t)(unsigned long)arg);
}
/*
@@ -239,11 +194,16 @@ static int trans_pgd_map_page(pgd_t *trans_pgd, void *page,
* page system.
*/
static int create_safe_exec_page(void *src_start, size_t length,
- unsigned long dst_addr,
phys_addr_t *phys_dst_addr)
{
+ struct trans_pgd_info trans_info = {
+ .trans_alloc_page = hibernate_page_alloc,
+ .trans_alloc_arg = (__force void *)GFP_ATOMIC,
+ };
+
void *page = (void *)get_safe_page(GFP_ATOMIC);
- pgd_t *trans_pgd;
+ phys_addr_t trans_ttbr0;
+ unsigned long t0sz;
int rc;
if (!page)
@@ -251,13 +211,7 @@ static int create_safe_exec_page(void *src_start, size_t length,
memcpy(page, src_start, length);
__flush_icache_range((unsigned long)page, (unsigned long)page + length);
-
- trans_pgd = (void *)get_safe_page(GFP_ATOMIC);
- if (!trans_pgd)
- return -ENOMEM;
-
- rc = trans_pgd_map_page(trans_pgd, page, dst_addr,
- PAGE_KERNEL_EXEC);
+ rc = trans_pgd_idmap_page(&trans_info, &trans_ttbr0, &t0sz, page);
if (rc)
return rc;
@@ -270,12 +224,15 @@ static int create_safe_exec_page(void *src_start, size_t length,
* page, but TLBs may contain stale ASID-tagged entries (e.g. for EFI
* runtime services), while for a userspace-driven test_resume cycle it
* points to userspace page tables (and we must point it at a zero page
- * ourselves). Elsewhere we only (un)install the idmap with preemption
- * disabled, so T0SZ should be as required regardless.
+ * ourselves).
+ *
+ * We change T0SZ as part of installing the idmap. This is undone by
+ * cpu_uninstall_idmap() in __cpu_suspend_exit().
*/
cpu_set_reserved_ttbr0();
local_flush_tlb_all();
- write_sysreg(phys_to_ttbr(virt_to_phys(trans_pgd)), ttbr0_el1);
+ __cpu_set_tcr_t0sz(t0sz);
+ write_sysreg(trans_ttbr0, ttbr0_el1);
isb();
*phys_dst_addr = virt_to_phys(page);
@@ -462,182 +419,6 @@ int swsusp_arch_suspend(void)
return ret;
}
-static void _copy_pte(pte_t *dst_ptep, pte_t *src_ptep, unsigned long addr)
-{
- pte_t pte = READ_ONCE(*src_ptep);
-
- if (pte_valid(pte)) {
- /*
- * Resume will overwrite areas that may be marked
- * read only (code, rodata). Clear the RDONLY bit from
- * the temporary mappings we use during restore.
- */
- set_pte(dst_ptep, pte_mkwrite(pte));
- } else if (debug_pagealloc_enabled() && !pte_none(pte)) {
- /*
- * debug_pagealloc will removed the PTE_VALID bit if
- * the page isn't in use by the resume kernel. It may have
- * been in use by the original kernel, in which case we need
- * to put it back in our copy to do the restore.
- *
- * Before marking this entry valid, check the pfn should
- * be mapped.
- */
- BUG_ON(!pfn_valid(pte_pfn(pte)));
-
- set_pte(dst_ptep, pte_mkpresent(pte_mkwrite(pte)));
- }
-}
-
-static int copy_pte(pmd_t *dst_pmdp, pmd_t *src_pmdp, unsigned long start,
- unsigned long end)
-{
- pte_t *src_ptep;
- pte_t *dst_ptep;
- unsigned long addr = start;
-
- dst_ptep = (pte_t *)get_safe_page(GFP_ATOMIC);
- if (!dst_ptep)
- return -ENOMEM;
- pmd_populate_kernel(&init_mm, dst_pmdp, dst_ptep);
- dst_ptep = pte_offset_kernel(dst_pmdp, start);
-
- src_ptep = pte_offset_kernel(src_pmdp, start);
- do {
- _copy_pte(dst_ptep, src_ptep, addr);
- } while (dst_ptep++, src_ptep++, addr += PAGE_SIZE, addr != end);
-
- return 0;
-}
-
-static int copy_pmd(pud_t *dst_pudp, pud_t *src_pudp, unsigned long start,
- unsigned long end)
-{
- pmd_t *src_pmdp;
- pmd_t *dst_pmdp;
- unsigned long next;
- unsigned long addr = start;
-
- if (pud_none(READ_ONCE(*dst_pudp))) {
- dst_pmdp = (pmd_t *)get_safe_page(GFP_ATOMIC);
- if (!dst_pmdp)
- return -ENOMEM;
- pud_populate(&init_mm, dst_pudp, dst_pmdp);
- }
- dst_pmdp = pmd_offset(dst_pudp, start);
-
- src_pmdp = pmd_offset(src_pudp, start);
- do {
- pmd_t pmd = READ_ONCE(*src_pmdp);
-
- next = pmd_addr_end(addr, end);
- if (pmd_none(pmd))
- continue;
- if (pmd_table(pmd)) {
- if (copy_pte(dst_pmdp, src_pmdp, addr, next))
- return -ENOMEM;
- } else {
- set_pmd(dst_pmdp,
- __pmd(pmd_val(pmd) & ~PMD_SECT_RDONLY));
- }
- } while (dst_pmdp++, src_pmdp++, addr = next, addr != end);
-
- return 0;
-}
-
-static int copy_pud(p4d_t *dst_p4dp, p4d_t *src_p4dp, unsigned long start,
- unsigned long end)
-{
- pud_t *dst_pudp;
- pud_t *src_pudp;
- unsigned long next;
- unsigned long addr = start;
-
- if (p4d_none(READ_ONCE(*dst_p4dp))) {
- dst_pudp = (pud_t *)get_safe_page(GFP_ATOMIC);
- if (!dst_pudp)
- return -ENOMEM;
- p4d_populate(&init_mm, dst_p4dp, dst_pudp);
- }
- dst_pudp = pud_offset(dst_p4dp, start);
-
- src_pudp = pud_offset(src_p4dp, start);
- do {
- pud_t pud = READ_ONCE(*src_pudp);
-
- next = pud_addr_end(addr, end);
- if (pud_none(pud))
- continue;
- if (pud_table(pud)) {
- if (copy_pmd(dst_pudp, src_pudp, addr, next))
- return -ENOMEM;
- } else {
- set_pud(dst_pudp,
- __pud(pud_val(pud) & ~PUD_SECT_RDONLY));
- }
- } while (dst_pudp++, src_pudp++, addr = next, addr != end);
-
- return 0;
-}
-
-static int copy_p4d(pgd_t *dst_pgdp, pgd_t *src_pgdp, unsigned long start,
- unsigned long end)
-{
- p4d_t *dst_p4dp;
- p4d_t *src_p4dp;
- unsigned long next;
- unsigned long addr = start;
-
- dst_p4dp = p4d_offset(dst_pgdp, start);
- src_p4dp = p4d_offset(src_pgdp, start);
- do {
- next = p4d_addr_end(addr, end);
- if (p4d_none(READ_ONCE(*src_p4dp)))
- continue;
- if (copy_pud(dst_p4dp, src_p4dp, addr, next))
- return -ENOMEM;
- } while (dst_p4dp++, src_p4dp++, addr = next, addr != end);
-
- return 0;
-}
-
-static int copy_page_tables(pgd_t *dst_pgdp, unsigned long start,
- unsigned long end)
-{
- unsigned long next;
- unsigned long addr = start;
- pgd_t *src_pgdp = pgd_offset_k(start);
-
- dst_pgdp = pgd_offset_pgd(dst_pgdp, start);
- do {
- next = pgd_addr_end(addr, end);
- if (pgd_none(READ_ONCE(*src_pgdp)))
- continue;
- if (copy_p4d(dst_pgdp, src_pgdp, addr, next))
- return -ENOMEM;
- } while (dst_pgdp++, src_pgdp++, addr = next, addr != end);
-
- return 0;
-}
-
-static int trans_pgd_create_copy(pgd_t **dst_pgdp, unsigned long start,
- unsigned long end)
-{
- int rc;
- pgd_t *trans_pgd = (pgd_t *)get_safe_page(GFP_ATOMIC);
-
- if (!trans_pgd) {
- pr_err("Failed to allocate memory for temporary page tables.\n");
- return -ENOMEM;
- }
-
- rc = copy_page_tables(trans_pgd, start, end);
- if (!rc)
- *dst_pgdp = trans_pgd;
-
- return rc;
-}
-
/*
* Setup then Resume from the hibernate image using swsusp_arch_suspend_exit().
*
@@ -650,16 +431,20 @@ int swsusp_arch_resume(void)
void *zero_page;
size_t exit_size;
pgd_t *tmp_pg_dir;
- phys_addr_t phys_hibernate_exit;
void __noreturn (*hibernate_exit)(phys_addr_t, phys_addr_t, void *,
void *, phys_addr_t, phys_addr_t);
+ struct trans_pgd_info trans_info = {
+ .trans_alloc_page = hibernate_page_alloc,
+ .trans_alloc_arg = (void *)GFP_ATOMIC,
+ };
/*
* Restoring the memory image will overwrite the ttbr1 page tables.
* Create a second copy of just the linear map, and use this when
* restoring.
*/
- rc = trans_pgd_create_copy(&tmp_pg_dir, PAGE_OFFSET, PAGE_END);
+ rc = trans_pgd_create_copy(&trans_info, &tmp_pg_dir, PAGE_OFFSET,
+ PAGE_END);
if (rc)
return rc;
@@ -673,19 +458,13 @@ int swsusp_arch_resume(void)
return -ENOMEM;
}
- /*
- * Locate the exit code in the bottom-but-one page, so that *NULL
- * still has disastrous affects.
- */
- hibernate_exit = (void *)PAGE_SIZE;
exit_size = __hibernate_exit_text_end - __hibernate_exit_text_start;
/*
* Copy swsusp_arch_suspend_exit() to a safe page. This will generate
* a new set of ttbr0 page tables and load them.
*/
rc = create_safe_exec_page(__hibernate_exit_text_start, exit_size,
- (unsigned long)hibernate_exit,
- &phys_hibernate_exit);
+ (phys_addr_t *)&hibernate_exit);
if (rc) {
pr_err("Failed to create safe executable page for hibernate_exit code.\n");
return rc;
@@ -704,7 +483,7 @@ int swsusp_arch_resume(void)
* We can skip this step if we booted at EL1, or are running with VHE.
*/
if (el2_reset_needed()) {
- phys_addr_t el2_vectors = phys_hibernate_exit; /* base */
+ phys_addr_t el2_vectors = (phys_addr_t)hibernate_exit;
el2_vectors += hibernate_el2_vectors -
__hibernate_exit_text_start; /* offset */
diff --git a/arch/arm64/kernel/hyp-stub.S b/arch/arm64/kernel/hyp-stub.S
index 160f5881a0b7..5eccbd62fec8 100644
--- a/arch/arm64/kernel/hyp-stub.S
+++ b/arch/arm64/kernel/hyp-stub.S
@@ -8,9 +8,9 @@
#include <linux/init.h>
#include <linux/linkage.h>
-#include <linux/irqchip/arm-gic-v3.h>
#include <asm/assembler.h>
+#include <asm/el2_setup.h>
#include <asm/kvm_arm.h>
#include <asm/kvm_asm.h>
#include <asm/ptrace.h>
@@ -47,10 +47,13 @@ SYM_CODE_END(__hyp_stub_vectors)
SYM_CODE_START_LOCAL(el1_sync)
cmp x0, #HVC_SET_VECTORS
- b.ne 2f
+ b.ne 1f
msr vbar_el2, x1
b 9f
+1: cmp x0, #HVC_VHE_RESTART
+ b.eq mutate_to_vhe
+
2: cmp x0, #HVC_SOFT_RESTART
b.ne 3f
mov x0, x2
@@ -70,6 +73,102 @@ SYM_CODE_START_LOCAL(el1_sync)
eret
SYM_CODE_END(el1_sync)
+// nVHE? No way! Give me the real thing!
+SYM_CODE_START_LOCAL(mutate_to_vhe)
+ // Sanity check: MMU *must* be off
+ mrs x1, sctlr_el2
+ tbnz x1, #0, 1f
+
+ // Needs to be VHE capable, obviously
+ mrs x1, id_aa64mmfr1_el1
+ ubfx x1, x1, #ID_AA64MMFR1_VHE_SHIFT, #4
+ cbz x1, 1f
+
+ // Check whether VHE is disabled from the command line
+ adr_l x1, id_aa64mmfr1_override
+ ldr x2, [x1, FTR_OVR_VAL_OFFSET]
+ ldr x1, [x1, FTR_OVR_MASK_OFFSET]
+ ubfx x2, x2, #ID_AA64MMFR1_VHE_SHIFT, #4
+ ubfx x1, x1, #ID_AA64MMFR1_VHE_SHIFT, #4
+ cmp x1, xzr
+ and x2, x2, x1
+ csinv x2, x2, xzr, ne
+ cbnz x2, 2f
+
+1: mov_q x0, HVC_STUB_ERR
+ eret
+2:
+ // Engage the VHE magic!
+ mov_q x0, HCR_HOST_VHE_FLAGS
+ msr hcr_el2, x0
+ isb
+
+ // Use the EL1 allocated stack, per-cpu offset
+ mrs x0, sp_el1
+ mov sp, x0
+ mrs x0, tpidr_el1
+ msr tpidr_el2, x0
+
+ // FP configuration, vectors
+ mrs_s x0, SYS_CPACR_EL12
+ msr cpacr_el1, x0
+ mrs_s x0, SYS_VBAR_EL12
+ msr vbar_el1, x0
+
+ // Use EL2 translations for SPE and disable access from EL1
+ mrs x0, mdcr_el2
+ bic x0, x0, #(MDCR_EL2_E2PB_MASK << MDCR_EL2_E2PB_SHIFT)
+ msr mdcr_el2, x0
+
+ // Transfer the MM state from EL1 to EL2
+ mrs_s x0, SYS_TCR_EL12
+ msr tcr_el1, x0
+ mrs_s x0, SYS_TTBR0_EL12
+ msr ttbr0_el1, x0
+ mrs_s x0, SYS_TTBR1_EL12
+ msr ttbr1_el1, x0
+ mrs_s x0, SYS_MAIR_EL12
+ msr mair_el1, x0
+ isb
+
+ // Hack the exception return to stay at EL2
+ mrs x0, spsr_el1
+ and x0, x0, #~PSR_MODE_MASK
+ mov x1, #PSR_MODE_EL2h
+ orr x0, x0, x1
+ msr spsr_el1, x0
+
+ b enter_vhe
+SYM_CODE_END(mutate_to_vhe)
+
+ // At the point where we reach enter_vhe(), we run with
+ // the MMU off (which is enforced by mutate_to_vhe()).
+ // We thus need to be in the idmap, or everything will
+ // explode when enabling the MMU.
+
+ .pushsection .idmap.text, "ax"
+
+SYM_CODE_START_LOCAL(enter_vhe)
+ // Invalidate TLBs before enabling the MMU
+ tlbi vmalle1
+ dsb nsh
+ isb
+
+ // Enable the EL2 S1 MMU, as set up from EL1
+ mrs_s x0, SYS_SCTLR_EL12
+ set_sctlr_el1 x0
+
+ // Disable the EL1 S1 MMU for a good measure
+ mov_q x0, INIT_SCTLR_EL1_MMU_OFF
+ msr_s SYS_SCTLR_EL12, x0
+
+ mov x0, xzr
+
+ eret
+SYM_CODE_END(enter_vhe)
+
+ .popsection
+
.macro invalid_vector label
SYM_CODE_START_LOCAL(\label)
b \label
@@ -85,6 +184,8 @@ SYM_CODE_END(\label)
invalid_vector el1_fiq_invalid
invalid_vector el1_error_invalid
+ .popsection
+
/*
* __hyp_set_vectors: Call this after boot to set the initial hypervisor
* vectors as part of hypervisor installation. On an SMP system, this should
@@ -118,3 +219,27 @@ SYM_FUNC_START(__hyp_reset_vectors)
hvc #0
ret
SYM_FUNC_END(__hyp_reset_vectors)
+
+/*
+ * Entry point to switch to VHE if deemed capable
+ */
+SYM_FUNC_START(switch_to_vhe)
+#ifdef CONFIG_ARM64_VHE
+ // Need to have booted at EL2
+ adr_l x1, __boot_cpu_mode
+ ldr w0, [x1]
+ cmp w0, #BOOT_CPU_MODE_EL2
+ b.ne 1f
+
+ // and still be at EL1
+ mrs x0, CurrentEL
+ cmp x0, #CurrentEL_EL1
+ b.ne 1f
+
+ // Turn the world upside down
+ mov x0, #HVC_VHE_RESTART
+ hvc #0
+1:
+#endif
+ ret
+SYM_FUNC_END(switch_to_vhe)
diff --git a/arch/arm64/kernel/idreg-override.c b/arch/arm64/kernel/idreg-override.c
new file mode 100644
index 000000000000..83f1c4b92095
--- /dev/null
+++ b/arch/arm64/kernel/idreg-override.c
@@ -0,0 +1,219 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Early cpufeature override framework
+ *
+ * Copyright (C) 2020 Google LLC
+ * Author: Marc Zyngier <maz@kernel.org>
+ */
+
+#include <linux/ctype.h>
+#include <linux/kernel.h>
+#include <linux/libfdt.h>
+
+#include <asm/cacheflush.h>
+#include <asm/cpufeature.h>
+#include <asm/setup.h>
+
+#define FTR_DESC_NAME_LEN 20
+#define FTR_DESC_FIELD_LEN 10
+#define FTR_ALIAS_NAME_LEN 30
+#define FTR_ALIAS_OPTION_LEN 80
+
+struct ftr_set_desc {
+ char name[FTR_DESC_NAME_LEN];
+ struct arm64_ftr_override *override;
+ struct {
+ char name[FTR_DESC_FIELD_LEN];
+ u8 shift;
+ } fields[];
+};
+
+static const struct ftr_set_desc mmfr1 __initconst = {
+ .name = "id_aa64mmfr1",
+ .override = &id_aa64mmfr1_override,
+ .fields = {
+ { "vh", ID_AA64MMFR1_VHE_SHIFT },
+ {}
+ },
+};
+
+static const struct ftr_set_desc pfr1 __initconst = {
+ .name = "id_aa64pfr1",
+ .override = &id_aa64pfr1_override,
+ .fields = {
+ { "bt", ID_AA64PFR1_BT_SHIFT },
+ {}
+ },
+};
+
+static const struct ftr_set_desc isar1 __initconst = {
+ .name = "id_aa64isar1",
+ .override = &id_aa64isar1_override,
+ .fields = {
+ { "gpi", ID_AA64ISAR1_GPI_SHIFT },
+ { "gpa", ID_AA64ISAR1_GPA_SHIFT },
+ { "api", ID_AA64ISAR1_API_SHIFT },
+ { "apa", ID_AA64ISAR1_APA_SHIFT },
+ {}
+ },
+};
+
+extern struct arm64_ftr_override kaslr_feature_override;
+
+static const struct ftr_set_desc kaslr __initconst = {
+ .name = "kaslr",
+#ifdef CONFIG_RANDOMIZE_BASE
+ .override = &kaslr_feature_override,
+#endif
+ .fields = {
+ { "disabled", 0 },
+ {}
+ },
+};
+
+static const struct ftr_set_desc * const regs[] __initconst = {
+ &mmfr1,
+ &pfr1,
+ &isar1,
+ &kaslr,
+};
+
+static const struct {
+ char alias[FTR_ALIAS_NAME_LEN];
+ char feature[FTR_ALIAS_OPTION_LEN];
+} aliases[] __initconst = {
+ { "kvm-arm.mode=nvhe", "id_aa64mmfr1.vh=0" },
+ { "kvm-arm.mode=protected", "id_aa64mmfr1.vh=0" },
+ { "arm64.nobti", "id_aa64pfr1.bt=0" },
+ { "arm64.nopauth",
+ "id_aa64isar1.gpi=0 id_aa64isar1.gpa=0 "
+ "id_aa64isar1.api=0 id_aa64isar1.apa=0" },
+ { "nokaslr", "kaslr.disabled=1" },
+};
+
+static int __init find_field(const char *cmdline,
+ const struct ftr_set_desc *reg, int f, u64 *v)
+{
+ char opt[FTR_DESC_NAME_LEN + FTR_DESC_FIELD_LEN + 2];
+ int len;
+
+ len = snprintf(opt, ARRAY_SIZE(opt), "%s.%s=",
+ reg->name, reg->fields[f].name);
+
+ if (!parameqn(cmdline, opt, len))
+ return -1;
+
+ return kstrtou64(cmdline + len, 0, v);
+}
+
+static void __init match_options(const char *cmdline)
+{
+ int i;
+
+ for (i = 0; i < ARRAY_SIZE(regs); i++) {
+ int f;
+
+ if (!regs[i]->override)
+ continue;
+
+ for (f = 0; strlen(regs[i]->fields[f].name); f++) {
+ u64 shift = regs[i]->fields[f].shift;
+ u64 mask = 0xfUL << shift;
+ u64 v;
+
+ if (find_field(cmdline, regs[i], f, &v))
+ continue;
+
+ regs[i]->override->val &= ~mask;
+ regs[i]->override->val |= (v << shift) & mask;
+ regs[i]->override->mask |= mask;
+
+ return;
+ }
+ }
+}
+
+static __init void __parse_cmdline(const char *cmdline, bool parse_aliases)
+{
+ do {
+ char buf[256];
+ size_t len;
+ int i;
+
+ cmdline = skip_spaces(cmdline);
+
+ for (len = 0; cmdline[len] && !isspace(cmdline[len]); len++);
+ if (!len)
+ return;
+
+ len = min(len, ARRAY_SIZE(buf) - 1);
+ strncpy(buf, cmdline, len);
+ buf[len] = 0;
+
+ if (strcmp(buf, "--") == 0)
+ return;
+
+ cmdline += len;
+
+ match_options(buf);
+
+ for (i = 0; parse_aliases && i < ARRAY_SIZE(aliases); i++)
+ if (parameq(buf, aliases[i].alias))
+ __parse_cmdline(aliases[i].feature, false);
+ } while (1);
+}
+
+static __init const u8 *get_bootargs_cmdline(void)
+{
+ const u8 *prop;
+ void *fdt;
+ int node;
+
+ fdt = get_early_fdt_ptr();
+ if (!fdt)
+ return NULL;
+
+ node = fdt_path_offset(fdt, "/chosen");
+ if (node < 0)
+ return NULL;
+
+ prop = fdt_getprop(fdt, node, "bootargs", NULL);
+ if (!prop)
+ return NULL;
+
+ return strlen(prop) ? prop : NULL;
+}
+
+static __init void parse_cmdline(void)
+{
+ const u8 *prop = get_bootargs_cmdline();
+
+ if (IS_ENABLED(CONFIG_CMDLINE_FORCE) || !prop)
+ __parse_cmdline(CONFIG_CMDLINE, true);
+
+ if (!IS_ENABLED(CONFIG_CMDLINE_FORCE) && prop)
+ __parse_cmdline(prop, true);
+}
+
+/* Keep checkers quiet */
+void init_feature_override(void);
+
+asmlinkage void __init init_feature_override(void)
+{
+ int i;
+
+ for (i = 0; i < ARRAY_SIZE(regs); i++) {
+ if (regs[i]->override) {
+ regs[i]->override->val = 0;
+ regs[i]->override->mask = 0;
+ }
+ }
+
+ parse_cmdline();
+
+ for (i = 0; i < ARRAY_SIZE(regs); i++) {
+ if (regs[i]->override)
+ __flush_dcache_area(regs[i]->override,
+ sizeof(*regs[i]->override));
+ }
+}
diff --git a/arch/arm64/kernel/image-vars.h b/arch/arm64/kernel/image-vars.h
index f676243abac6..5aa9ed1e9ec6 100644
--- a/arch/arm64/kernel/image-vars.h
+++ b/arch/arm64/kernel/image-vars.h
@@ -64,7 +64,6 @@ __efistub__ctype = _ctype;
/* Alternative callbacks for init-time patching of nVHE hyp code. */
KVM_NVHE_ALIAS(kvm_patch_vector_branch);
KVM_NVHE_ALIAS(kvm_update_va_mask);
-KVM_NVHE_ALIAS(kvm_update_kimg_phys_offset);
KVM_NVHE_ALIAS(kvm_get_kimage_voffset);
/* Global kernel state accessed by nVHE hyp code. */
@@ -102,6 +101,9 @@ KVM_NVHE_ALIAS(__stop___kvm_ex_table);
/* Array containing bases of nVHE per-CPU memory regions. */
KVM_NVHE_ALIAS(kvm_arm_hyp_percpu_base);
+/* PMU available static key */
+KVM_NVHE_ALIAS(kvm_arm_pmu_available);
+
#endif /* CONFIG_KVM */
#endif /* __ARM64_KERNEL_IMAGE_VARS_H */
diff --git a/arch/arm64/kernel/kaslr.c b/arch/arm64/kernel/kaslr.c
index 1c74c45b9494..27f8939deb1b 100644
--- a/arch/arm64/kernel/kaslr.c
+++ b/arch/arm64/kernel/kaslr.c
@@ -19,6 +19,7 @@
#include <asm/memory.h>
#include <asm/mmu.h>
#include <asm/sections.h>
+#include <asm/setup.h>
enum kaslr_status {
KASLR_ENABLED,
@@ -50,39 +51,7 @@ static __init u64 get_kaslr_seed(void *fdt)
return ret;
}
-static __init bool cmdline_contains_nokaslr(const u8 *cmdline)
-{
- const u8 *str;
-
- str = strstr(cmdline, "nokaslr");
- return str == cmdline || (str > cmdline && *(str - 1) == ' ');
-}
-
-static __init bool is_kaslr_disabled_cmdline(void *fdt)
-{
- if (!IS_ENABLED(CONFIG_CMDLINE_FORCE)) {
- int node;
- const u8 *prop;
-
- node = fdt_path_offset(fdt, "/chosen");
- if (node < 0)
- goto out;
-
- prop = fdt_getprop(fdt, node, "bootargs", NULL);
- if (!prop)
- goto out;
-
- if (cmdline_contains_nokaslr(prop))
- return true;
-
- if (IS_ENABLED(CONFIG_CMDLINE_EXTEND))
- goto out;
-
- return false;
- }
-out:
- return cmdline_contains_nokaslr(CONFIG_CMDLINE);
-}
+struct arm64_ftr_override kaslr_feature_override __initdata;
/*
* This routine will be executed with the kernel mapped at its default virtual
@@ -92,12 +61,11 @@ out:
* containing function pointers) to be reinitialized, and zero-initialized
* .bss variables will be reset to 0.
*/
-u64 __init kaslr_early_init(u64 dt_phys)
+u64 __init kaslr_early_init(void)
{
void *fdt;
u64 seed, offset, mask, module_range;
unsigned long raw;
- int size;
/*
* Set a reasonable default for module_alloc_base in case
@@ -111,8 +79,7 @@ u64 __init kaslr_early_init(u64 dt_phys)
* and proceed with KASLR disabled. We will make another
* attempt at mapping the FDT in setup_machine()
*/
- early_fixmap_init();
- fdt = fixmap_remap_fdt(dt_phys, &size, PAGE_KERNEL);
+ fdt = get_early_fdt_ptr();
if (!fdt) {
kaslr_status = KASLR_DISABLED_FDT_REMAP;
return 0;
@@ -127,7 +94,7 @@ u64 __init kaslr_early_init(u64 dt_phys)
* Check if 'nokaslr' appears on the command line, and
* return 0 if that is the case.
*/
- if (is_kaslr_disabled_cmdline(fdt)) {
+ if (kaslr_feature_override.val & kaslr_feature_override.mask & 0xf) {
kaslr_status = KASLR_DISABLED_CMDLINE;
return 0;
}
diff --git a/arch/arm64/kernel/machine_kexec.c b/arch/arm64/kernel/machine_kexec.c
index a0b144cfaea7..90a335c74442 100644
--- a/arch/arm64/kernel/machine_kexec.c
+++ b/arch/arm64/kernel/machine_kexec.c
@@ -42,6 +42,7 @@ static void _kexec_image_info(const char *func, int line,
pr_debug(" start: %lx\n", kimage->start);
pr_debug(" head: %lx\n", kimage->head);
pr_debug(" nr_segments: %lu\n", kimage->nr_segments);
+ pr_debug(" kern_reloc: %pa\n", &kimage->arch.kern_reloc);
for (i = 0; i < kimage->nr_segments; i++) {
pr_debug(" segment[%lu]: %016lx - %016lx, 0x%lx bytes, %lu pages\n",
@@ -58,6 +59,23 @@ void machine_kexec_cleanup(struct kimage *kimage)
/* Empty routine needed to avoid build errors. */
}
+int machine_kexec_post_load(struct kimage *kimage)
+{
+ void *reloc_code = page_to_virt(kimage->control_code_page);
+
+ memcpy(reloc_code, arm64_relocate_new_kernel,
+ arm64_relocate_new_kernel_size);
+ kimage->arch.kern_reloc = __pa(reloc_code);
+ kexec_image_info(kimage);
+
+ /* Flush the reloc_code in preparation for its execution. */
+ __flush_dcache_area(reloc_code, arm64_relocate_new_kernel_size);
+ flush_icache_range((uintptr_t)reloc_code, (uintptr_t)reloc_code +
+ arm64_relocate_new_kernel_size);
+
+ return 0;
+}
+
/**
* machine_kexec_prepare - Prepare for a kexec reboot.
*
@@ -67,8 +85,6 @@ void machine_kexec_cleanup(struct kimage *kimage)
*/
int machine_kexec_prepare(struct kimage *kimage)
{
- kexec_image_info(kimage);
-
if (kimage->type != KEXEC_TYPE_CRASH && cpus_are_stuck_in_kernel()) {
pr_err("Can't kexec: CPUs are stuck in the kernel.\n");
return -EBUSY;
@@ -143,8 +159,6 @@ static void kexec_segment_flush(const struct kimage *kimage)
*/
void machine_kexec(struct kimage *kimage)
{
- phys_addr_t reboot_code_buffer_phys;
- void *reboot_code_buffer;
bool in_kexec_crash = (kimage == kexec_crash_image);
bool stuck_cpus = cpus_are_stuck_in_kernel();
@@ -155,31 +169,6 @@ void machine_kexec(struct kimage *kimage)
WARN(in_kexec_crash && (stuck_cpus || smp_crash_stop_failed()),
"Some CPUs may be stale, kdump will be unreliable.\n");
- reboot_code_buffer_phys = page_to_phys(kimage->control_code_page);
- reboot_code_buffer = phys_to_virt(reboot_code_buffer_phys);
-
- kexec_image_info(kimage);
-
- /*
- * Copy arm64_relocate_new_kernel to the reboot_code_buffer for use
- * after the kernel is shut down.
- */
- memcpy(reboot_code_buffer, arm64_relocate_new_kernel,
- arm64_relocate_new_kernel_size);
-
- /* Flush the reboot_code_buffer in preparation for its execution. */
- __flush_dcache_area(reboot_code_buffer, arm64_relocate_new_kernel_size);
-
- /*
- * Although we've killed off the secondary CPUs, we don't update
- * the online mask if we're handling a crash kernel and consequently
- * need to avoid flush_icache_range(), which will attempt to IPI
- * the offline CPUs. Therefore, we must use the __* variant here.
- */
- __flush_icache_range((uintptr_t)reboot_code_buffer,
- (uintptr_t)reboot_code_buffer +
- arm64_relocate_new_kernel_size);
-
/* Flush the kimage list and its buffers. */
kexec_list_flush(kimage);
@@ -193,7 +182,7 @@ void machine_kexec(struct kimage *kimage)
/*
* cpu_soft_restart will shutdown the MMU, disable data caches, then
- * transfer control to the reboot_code_buffer which contains a copy of
+ * transfer control to the kern_reloc which contains a copy of
* the arm64_relocate_new_kernel routine. arm64_relocate_new_kernel
* uses physical addressing to relocate the new image to its final
* position and transfers control to the image entry point when the
@@ -203,12 +192,8 @@ void machine_kexec(struct kimage *kimage)
* userspace (kexec-tools).
* In kexec_file case, the kernel starts directly without purgatory.
*/
- cpu_soft_restart(reboot_code_buffer_phys, kimage->head, kimage->start,
-#ifdef CONFIG_KEXEC_FILE
- kimage->arch.dtb_mem);
-#else
- 0);
-#endif
+ cpu_soft_restart(kimage->arch.kern_reloc, kimage->head, kimage->start,
+ kimage->arch.dtb_mem);
BUG(); /* Should never get here. */
}
diff --git a/arch/arm64/kernel/machine_kexec_file.c b/arch/arm64/kernel/machine_kexec_file.c
index 03210f644790..0cde47a63beb 100644
--- a/arch/arm64/kernel/machine_kexec_file.c
+++ b/arch/arm64/kernel/machine_kexec_file.c
@@ -182,8 +182,10 @@ static int create_dtb(struct kimage *image,
/* duplicate a device tree blob */
ret = fdt_open_into(initial_boot_params, buf, buf_size);
- if (ret)
+ if (ret) {
+ vfree(buf);
return -EINVAL;
+ }
ret = setup_dtb(image, initrd_load_addr, initrd_len,
cmdline, buf);
diff --git a/arch/arm64/kernel/module-plts.c b/arch/arm64/kernel/module-plts.c
index 2e224435c024..e53493d8b208 100644
--- a/arch/arm64/kernel/module-plts.c
+++ b/arch/arm64/kernel/module-plts.c
@@ -131,7 +131,7 @@ u64 module_emit_veneer_for_adrp(struct module *mod, Elf64_Shdr *sechdrs,
}
#endif
-#define cmp_3way(a,b) ((a) < (b) ? -1 : (a) > (b))
+#define cmp_3way(a, b) ((a) < (b) ? -1 : (a) > (b))
static int cmp_rela(const void *a, const void *b)
{
diff --git a/arch/arm64/kernel/mte.c b/arch/arm64/kernel/mte.c
index 80b62fe49dcf..b3c70a612c7a 100644
--- a/arch/arm64/kernel/mte.c
+++ b/arch/arm64/kernel/mte.c
@@ -19,12 +19,13 @@
#include <asm/barrier.h>
#include <asm/cpufeature.h>
#include <asm/mte.h>
-#include <asm/mte-kasan.h>
#include <asm/ptrace.h>
#include <asm/sysreg.h>
u64 gcr_kernel_excl __ro_after_init;
+static bool report_fault_once = true;
+
static void mte_sync_page_tags(struct page *page, pte_t *ptep, bool check_swap)
{
pte_t old_pte = READ_ONCE(*ptep);
@@ -86,51 +87,6 @@ int memcmp_pages(struct page *page1, struct page *page2)
return ret;
}
-u8 mte_get_mem_tag(void *addr)
-{
- if (!system_supports_mte())
- return 0xFF;
-
- asm(__MTE_PREAMBLE "ldg %0, [%0]"
- : "+r" (addr));
-
- return mte_get_ptr_tag(addr);
-}
-
-u8 mte_get_random_tag(void)
-{
- void *addr;
-
- if (!system_supports_mte())
- return 0xFF;
-
- asm(__MTE_PREAMBLE "irg %0, %0"
- : "+r" (addr));
-
- return mte_get_ptr_tag(addr);
-}
-
-void *mte_set_mem_tag_range(void *addr, size_t size, u8 tag)
-{
- void *ptr = addr;
-
- if ((!system_supports_mte()) || (size == 0))
- return addr;
-
- /* Make sure that size is MTE granule aligned. */
- WARN_ON(size & (MTE_GRANULE_SIZE - 1));
-
- /* Make sure that the address is MTE granule aligned. */
- WARN_ON((u64)addr & (MTE_GRANULE_SIZE - 1));
-
- tag = 0xF0 | tag;
- ptr = (void *)__tag_set(ptr, tag);
-
- mte_assign_mem_tag_range(ptr, size);
-
- return ptr;
-}
-
void mte_init_tags(u64 max_tag)
{
static bool gcr_kernel_excl_initialized;
@@ -158,6 +114,16 @@ void mte_enable_kernel(void)
isb();
}
+void mte_set_report_once(bool state)
+{
+ WRITE_ONCE(report_fault_once, state);
+}
+
+bool mte_report_once(void)
+{
+ return READ_ONCE(report_fault_once);
+}
+
static void update_sctlr_el1_tcf0(u64 tcf0)
{
/* ISB required for the kernel uaccess routines */
diff --git a/arch/arm64/kernel/perf_event.c b/arch/arm64/kernel/perf_event.c
index 3605f77ad4df..4658fcf88c2b 100644
--- a/arch/arm64/kernel/perf_event.c
+++ b/arch/arm64/kernel/perf_event.c
@@ -280,7 +280,7 @@ armv8pmu_event_attr_is_visible(struct kobject *kobj,
return 0;
}
-static struct attribute_group armv8_pmuv3_events_attr_group = {
+static const struct attribute_group armv8_pmuv3_events_attr_group = {
.name = "events",
.attrs = armv8_pmuv3_event_attrs,
.is_visible = armv8pmu_event_attr_is_visible,
@@ -300,7 +300,7 @@ static struct attribute *armv8_pmuv3_format_attrs[] = {
NULL,
};
-static struct attribute_group armv8_pmuv3_format_attr_group = {
+static const struct attribute_group armv8_pmuv3_format_attr_group = {
.name = "format",
.attrs = armv8_pmuv3_format_attrs,
};
@@ -322,7 +322,7 @@ static struct attribute *armv8_pmuv3_caps_attrs[] = {
NULL,
};
-static struct attribute_group armv8_pmuv3_caps_attr_group = {
+static const struct attribute_group armv8_pmuv3_caps_attr_group = {
.name = "caps",
.attrs = armv8_pmuv3_caps_attrs,
};
@@ -460,7 +460,7 @@ static inline int armv8pmu_counter_has_overflowed(u32 pmnc, int idx)
return pmnc & BIT(ARMV8_IDX_TO_COUNTER(idx));
}
-static inline u32 armv8pmu_read_evcntr(int idx)
+static inline u64 armv8pmu_read_evcntr(int idx)
{
u32 counter = ARMV8_IDX_TO_COUNTER(idx);
@@ -810,7 +810,7 @@ static int armv8pmu_get_single_idx(struct pmu_hw_events *cpuc,
{
int idx;
- for (idx = ARMV8_IDX_COUNTER0; idx < cpu_pmu->num_events; idx ++) {
+ for (idx = ARMV8_IDX_COUNTER0; idx < cpu_pmu->num_events; idx++) {
if (!test_and_set_bit(idx, cpuc->used_mask))
return idx;
}
@@ -1188,6 +1188,12 @@ static int armv8_a77_pmu_init(struct arm_pmu *cpu_pmu)
armv8_pmuv3_map_event);
}
+static int armv8_a78_pmu_init(struct arm_pmu *cpu_pmu)
+{
+ return armv8_pmu_init_nogroups(cpu_pmu, "armv8_cortex_a78",
+ armv8_pmuv3_map_event);
+}
+
static int armv8_e1_pmu_init(struct arm_pmu *cpu_pmu)
{
return armv8_pmu_init_nogroups(cpu_pmu, "armv8_neoverse_e1",
@@ -1225,6 +1231,7 @@ static const struct of_device_id armv8_pmu_of_device_ids[] = {
{.compatible = "arm,cortex-a75-pmu", .data = armv8_a75_pmu_init},
{.compatible = "arm,cortex-a76-pmu", .data = armv8_a76_pmu_init},
{.compatible = "arm,cortex-a77-pmu", .data = armv8_a77_pmu_init},
+ {.compatible = "arm,cortex-a78-pmu", .data = armv8_a78_pmu_init},
{.compatible = "arm,neoverse-e1-pmu", .data = armv8_e1_pmu_init},
{.compatible = "arm,neoverse-n1-pmu", .data = armv8_n1_pmu_init},
{.compatible = "cavium,thunder-pmu", .data = armv8_thunder_pmu_init},
diff --git a/arch/arm64/kernel/probes/uprobes.c b/arch/arm64/kernel/probes/uprobes.c
index a412d8edbcd2..2c247634552b 100644
--- a/arch/arm64/kernel/probes/uprobes.c
+++ b/arch/arm64/kernel/probes/uprobes.c
@@ -38,7 +38,7 @@ int arch_uprobe_analyze_insn(struct arch_uprobe *auprobe, struct mm_struct *mm,
/* TODO: Currently we do not support AARCH32 instruction probing */
if (mm->context.flags & MMCF_AARCH32)
- return -ENOTSUPP;
+ return -EOPNOTSUPP;
else if (!IS_ALIGNED(addr, AARCH64_INSN_SIZE))
return -EINVAL;
diff --git a/arch/arm64/kernel/process.c b/arch/arm64/kernel/process.c
index 6616486a58fe..325c83b1a24d 100644
--- a/arch/arm64/kernel/process.c
+++ b/arch/arm64/kernel/process.c
@@ -304,7 +304,7 @@ void __show_regs(struct pt_regs *regs)
}
}
-void show_regs(struct pt_regs * regs)
+void show_regs(struct pt_regs *regs)
{
__show_regs(regs);
dump_backtrace(regs, NULL, KERN_DEFAULT);
@@ -398,7 +398,7 @@ int copy_thread(unsigned long clone_flags, unsigned long stack_start,
ptrauth_thread_init_kernel(p);
- if (likely(!(p->flags & PF_KTHREAD))) {
+ if (likely(!(p->flags & (PF_KTHREAD | PF_IO_WORKER)))) {
*childregs = *current_pt_regs();
childregs->regs[0] = 0;
@@ -587,7 +587,7 @@ unsigned long get_wchan(struct task_struct *p)
ret = frame.pc;
goto out;
}
- } while (count ++ < 16);
+ } while (count++ < 16);
out:
put_task_stack(p);
diff --git a/arch/arm64/kernel/ptrace.c b/arch/arm64/kernel/ptrace.c
index 8ac487c84e37..170f42fd6101 100644
--- a/arch/arm64/kernel/ptrace.c
+++ b/arch/arm64/kernel/ptrace.c
@@ -194,6 +194,7 @@ static void ptrace_hbptriggered(struct perf_event *bp,
}
arm64_force_sig_ptrace_errno_trap(si_errno, bkpt->trigger,
desc);
+ return;
}
#endif
arm64_force_sig_fault(SIGTRAP, TRAP_HWBKPT, bkpt->trigger, desc);
@@ -1796,7 +1797,7 @@ int syscall_trace_enter(struct pt_regs *regs)
if (flags & (_TIF_SYSCALL_EMU | _TIF_SYSCALL_TRACE)) {
tracehook_report_syscall(regs, PTRACE_SYSCALL_ENTER);
- if (!in_syscall(regs) || (flags & _TIF_SYSCALL_EMU))
+ if (flags & _TIF_SYSCALL_EMU)
return NO_SYSCALL;
}
diff --git a/arch/arm64/kernel/relocate_kernel.S b/arch/arm64/kernel/relocate_kernel.S
index 84eec95ec06c..b78ea5de97a4 100644
--- a/arch/arm64/kernel/relocate_kernel.S
+++ b/arch/arm64/kernel/relocate_kernel.S
@@ -17,28 +17,24 @@
/*
* arm64_relocate_new_kernel - Put a 2nd stage image in place and boot it.
*
- * The memory that the old kernel occupies may be overwritten when coping the
+ * The memory that the old kernel occupies may be overwritten when copying the
* new image to its final location. To assure that the
* arm64_relocate_new_kernel routine which does that copy is not overwritten,
* all code and data needed by arm64_relocate_new_kernel must be between the
* symbols arm64_relocate_new_kernel and arm64_relocate_new_kernel_end. The
* machine_kexec() routine will copy arm64_relocate_new_kernel to the kexec
- * control_code_page, a special page which has been set up to be preserved
- * during the copy operation.
+ * safe memory that has been set up to be preserved during the copy operation.
*/
SYM_CODE_START(arm64_relocate_new_kernel)
-
/* Setup the list loop variables. */
mov x18, x2 /* x18 = dtb address */
mov x17, x1 /* x17 = kimage_start */
mov x16, x0 /* x16 = kimage_head */
- raw_dcache_line_size x15, x0 /* x15 = dcache line size */
mov x14, xzr /* x14 = entry ptr */
mov x13, xzr /* x13 = copy dest */
-
/* Check if the new image needs relocation. */
tbnz x16, IND_DONE_BIT, .Ldone
-
+ raw_dcache_line_size x15, x1 /* x15 = dcache line size */
.Lloop:
and x12, x16, PAGE_MASK /* x12 = addr */
@@ -47,44 +43,28 @@ SYM_CODE_START(arm64_relocate_new_kernel)
tbz x16, IND_SOURCE_BIT, .Ltest_indirection
/* Invalidate dest page to PoC. */
- mov x0, x13
- add x20, x0, #PAGE_SIZE
+ mov x2, x13
+ add x20, x2, #PAGE_SIZE
sub x1, x15, #1
- bic x0, x0, x1
-2: dc ivac, x0
- add x0, x0, x15
- cmp x0, x20
+ bic x2, x2, x1
+2: dc ivac, x2
+ add x2, x2, x15
+ cmp x2, x20
b.lo 2b
dsb sy
- mov x20, x13
- mov x21, x12
- copy_page x20, x21, x0, x1, x2, x3, x4, x5, x6, x7
-
- /* dest += PAGE_SIZE */
- add x13, x13, PAGE_SIZE
+ copy_page x13, x12, x1, x2, x3, x4, x5, x6, x7, x8
b .Lnext
-
.Ltest_indirection:
tbz x16, IND_INDIRECTION_BIT, .Ltest_destination
-
- /* ptr = addr */
- mov x14, x12
+ mov x14, x12 /* ptr = addr */
b .Lnext
-
.Ltest_destination:
tbz x16, IND_DESTINATION_BIT, .Lnext
-
- /* dest = addr */
- mov x13, x12
-
+ mov x13, x12 /* dest = addr */
.Lnext:
- /* entry = *ptr++ */
- ldr x16, [x14], #8
-
- /* while (!(entry & DONE)) */
- tbz x16, IND_DONE_BIT, .Lloop
-
+ ldr x16, [x14], #8 /* entry = *ptr++ */
+ tbz x16, IND_DONE_BIT, .Lloop /* while (!(entry & DONE)) */
.Ldone:
/* wait for writes from copy_page to finish */
dsb nsh
diff --git a/arch/arm64/kernel/setup.c b/arch/arm64/kernel/setup.c
index c18aacde8bb0..61845c0821d9 100644
--- a/arch/arm64/kernel/setup.c
+++ b/arch/arm64/kernel/setup.c
@@ -168,6 +168,21 @@ static void __init smp_build_mpidr_hash(void)
pr_warn("Large number of MPIDR hash buckets detected\n");
}
+static void *early_fdt_ptr __initdata;
+
+void __init *get_early_fdt_ptr(void)
+{
+ return early_fdt_ptr;
+}
+
+asmlinkage void __init early_fdt_map(u64 dt_phys)
+{
+ int fdt_size;
+
+ early_fixmap_init();
+ early_fdt_ptr = fixmap_remap_fdt(dt_phys, &fdt_size, PAGE_KERNEL);
+}
+
static void __init setup_machine_fdt(phys_addr_t dt_phys)
{
int size;
diff --git a/arch/arm64/kernel/sleep.S b/arch/arm64/kernel/sleep.S
index 6bdef7362c0e..5bfd9b87f85d 100644
--- a/arch/arm64/kernel/sleep.S
+++ b/arch/arm64/kernel/sleep.S
@@ -100,6 +100,7 @@ SYM_FUNC_END(__cpu_suspend_enter)
.pushsection ".idmap.text", "awx"
SYM_CODE_START(cpu_resume)
bl init_kernel_el
+ bl switch_to_vhe
bl __cpu_setup
/* enable the MMU early - so we can access sleep_save_stash by va */
adrp x1, swapper_pg_dir
diff --git a/arch/arm64/kernel/smp.c b/arch/arm64/kernel/smp.c
index ad00f99ee9b0..357590beaabb 100644
--- a/arch/arm64/kernel/smp.c
+++ b/arch/arm64/kernel/smp.c
@@ -434,8 +434,10 @@ static void __init hyp_mode_check(void)
"CPU: CPUs started in inconsistent modes");
else
pr_info("CPU: All CPU(s) started at EL1\n");
- if (IS_ENABLED(CONFIG_KVM) && !is_kernel_in_hyp_mode())
+ if (IS_ENABLED(CONFIG_KVM) && !is_kernel_in_hyp_mode()) {
kvm_compute_layout();
+ kvm_apply_hyp_relocations();
+ }
}
void __init smp_cpus_done(unsigned int max_cpus)
diff --git a/arch/arm64/kernel/stacktrace.c b/arch/arm64/kernel/stacktrace.c
index fa56af1a59c3..ad20981dfda4 100644
--- a/arch/arm64/kernel/stacktrace.c
+++ b/arch/arm64/kernel/stacktrace.c
@@ -44,6 +44,10 @@ int notrace unwind_frame(struct task_struct *tsk, struct stackframe *frame)
unsigned long fp = frame->fp;
struct stack_info info;
+ /* Terminal record; nothing to unwind */
+ if (!fp)
+ return -ENOENT;
+
if (fp & 0xf)
return -EINVAL;
@@ -104,15 +108,6 @@ int notrace unwind_frame(struct task_struct *tsk, struct stackframe *frame)
frame->pc = ptrauth_strip_insn_pac(frame->pc);
- /*
- * Frames created upon entry from EL0 have NULL FP and PC values, so
- * don't bother reporting these. Frames created by __noreturn functions
- * might have a valid FP even if PC is bogus, so only terminate where
- * both are NULL.
- */
- if (!frame->fp && !frame->pc)
- return -EINVAL;
-
return 0;
}
NOKPROBE_SYMBOL(unwind_frame);
diff --git a/arch/arm64/kernel/suspend.c b/arch/arm64/kernel/suspend.c
index a67b37a7a47e..d7564891ffe1 100644
--- a/arch/arm64/kernel/suspend.c
+++ b/arch/arm64/kernel/suspend.c
@@ -119,7 +119,7 @@ int cpu_suspend(unsigned long arg, int (*fn)(unsigned long))
if (!ret)
ret = -EOPNOTSUPP;
} else {
- __cpu_suspend_exit();
+ RCU_NONIDLE(__cpu_suspend_exit());
}
unpause_graph_tracing();
diff --git a/arch/arm64/kernel/syscall.c b/arch/arm64/kernel/syscall.c
index c2877c332f2d..b9cf12b271d7 100644
--- a/arch/arm64/kernel/syscall.c
+++ b/arch/arm64/kernel/syscall.c
@@ -65,35 +65,6 @@ static inline bool has_syscall_work(unsigned long flags)
int syscall_trace_enter(struct pt_regs *regs);
void syscall_trace_exit(struct pt_regs *regs);
-#ifdef CONFIG_ARM64_ERRATUM_1463225
-DECLARE_PER_CPU(int, __in_cortex_a76_erratum_1463225_wa);
-
-static void cortex_a76_erratum_1463225_svc_handler(void)
-{
- u32 reg, val;
-
- if (!unlikely(test_thread_flag(TIF_SINGLESTEP)))
- return;
-
- if (!unlikely(this_cpu_has_cap(ARM64_WORKAROUND_1463225)))
- return;
-
- __this_cpu_write(__in_cortex_a76_erratum_1463225_wa, 1);
- reg = read_sysreg(mdscr_el1);
- val = reg | DBG_MDSCR_SS | DBG_MDSCR_KDE;
- write_sysreg(val, mdscr_el1);
- asm volatile("msr daifclr, #8");
- isb();
-
- /* We will have taken a single-step exception by this point */
-
- write_sysreg(reg, mdscr_el1);
- __this_cpu_write(__in_cortex_a76_erratum_1463225_wa, 0);
-}
-#else
-static void cortex_a76_erratum_1463225_svc_handler(void) { }
-#endif /* CONFIG_ARM64_ERRATUM_1463225 */
-
static void el0_svc_common(struct pt_regs *regs, int scno, int sc_nr,
const syscall_fn_t syscall_table[])
{
@@ -120,7 +91,6 @@ static void el0_svc_common(struct pt_regs *regs, int scno, int sc_nr,
* (Similarly for HVC and SMC elsewhere.)
*/
- cortex_a76_erratum_1463225_svc_handler();
local_daif_restore(DAIF_PROCCTX);
if (flags & _TIF_MTE_ASYNC_FAULT) {
diff --git a/arch/arm64/kernel/topology.c b/arch/arm64/kernel/topology.c
index f6faa697e83e..e08a4126453a 100644
--- a/arch/arm64/kernel/topology.c
+++ b/arch/arm64/kernel/topology.c
@@ -199,76 +199,38 @@ static int freq_inv_set_max_ratio(int cpu, u64 max_rate, u64 ref_rate)
return 0;
}
-static inline bool
-enable_policy_freq_counters(int cpu, cpumask_var_t valid_cpus)
-{
- struct cpufreq_policy *policy = cpufreq_cpu_get(cpu);
-
- if (!policy) {
- pr_debug("CPU%d: No cpufreq policy found.\n", cpu);
- return false;
- }
-
- if (cpumask_subset(policy->related_cpus, valid_cpus))
- cpumask_or(amu_fie_cpus, policy->related_cpus,
- amu_fie_cpus);
-
- cpufreq_cpu_put(policy);
-
- return true;
-}
-
static DEFINE_STATIC_KEY_FALSE(amu_fie_key);
#define amu_freq_invariant() static_branch_unlikely(&amu_fie_key)
-static int __init init_amu_fie(void)
+static void amu_fie_setup(const struct cpumask *cpus)
{
- bool invariance_status = topology_scale_freq_invariant();
- cpumask_var_t valid_cpus;
- bool have_policy = false;
- int ret = 0;
+ bool invariant;
int cpu;
- if (!zalloc_cpumask_var(&valid_cpus, GFP_KERNEL))
- return -ENOMEM;
-
- if (!zalloc_cpumask_var(&amu_fie_cpus, GFP_KERNEL)) {
- ret = -ENOMEM;
- goto free_valid_mask;
- }
+ /* We are already set since the last insmod of cpufreq driver */
+ if (unlikely(cpumask_subset(cpus, amu_fie_cpus)))
+ return;
- for_each_present_cpu(cpu) {
+ for_each_cpu(cpu, cpus) {
if (!freq_counters_valid(cpu) ||
freq_inv_set_max_ratio(cpu,
cpufreq_get_hw_max_freq(cpu) * 1000,
arch_timer_get_rate()))
- continue;
-
- cpumask_set_cpu(cpu, valid_cpus);
- have_policy |= enable_policy_freq_counters(cpu, valid_cpus);
+ return;
}
- /*
- * If we are not restricted by cpufreq policies, we only enable
- * the use of the AMU feature for FIE if all CPUs support AMU.
- * Otherwise, enable_policy_freq_counters has already enabled
- * policy cpus.
- */
- if (!have_policy && cpumask_equal(valid_cpus, cpu_present_mask))
- cpumask_or(amu_fie_cpus, amu_fie_cpus, valid_cpus);
+ cpumask_or(amu_fie_cpus, amu_fie_cpus, cpus);
- if (!cpumask_empty(amu_fie_cpus)) {
- pr_info("CPUs[%*pbl]: counters will be used for FIE.",
- cpumask_pr_args(amu_fie_cpus));
- static_branch_enable(&amu_fie_key);
- }
+ invariant = topology_scale_freq_invariant();
- /*
- * If the system is not fully invariant after AMU init, disable
- * partial use of counters for frequency invariance.
- */
- if (!topology_scale_freq_invariant())
- static_branch_disable(&amu_fie_key);
+ /* We aren't fully invariant yet */
+ if (!invariant && !cpumask_equal(amu_fie_cpus, cpu_present_mask))
+ return;
+
+ static_branch_enable(&amu_fie_key);
+
+ pr_debug("CPUs[%*pbl]: counters will be used for FIE.",
+ cpumask_pr_args(cpus));
/*
* Task scheduler behavior depends on frequency invariance support,
@@ -276,15 +238,50 @@ static int __init init_amu_fie(void)
* a result of counter initialisation and use, retrigger the build of
* scheduling domains to ensure the information is propagated properly.
*/
- if (invariance_status != topology_scale_freq_invariant())
+ if (!invariant)
rebuild_sched_domains_energy();
+}
+
+static int init_amu_fie_callback(struct notifier_block *nb, unsigned long val,
+ void *data)
+{
+ struct cpufreq_policy *policy = data;
+
+ if (val == CPUFREQ_CREATE_POLICY)
+ amu_fie_setup(policy->related_cpus);
+
+ /*
+ * We don't need to handle CPUFREQ_REMOVE_POLICY event as the AMU
+ * counters don't have any dependency on cpufreq driver once we have
+ * initialized AMU support and enabled invariance. The AMU counters will
+ * keep on working just fine in the absence of the cpufreq driver, and
+ * for the CPUs for which there are no counters available, the last set
+ * value of freq_scale will remain valid as that is the frequency those
+ * CPUs are running at.
+ */
+
+ return 0;
+}
+
+static struct notifier_block init_amu_fie_notifier = {
+ .notifier_call = init_amu_fie_callback,
+};
+
+static int __init init_amu_fie(void)
+{
+ int ret;
+
+ if (!zalloc_cpumask_var(&amu_fie_cpus, GFP_KERNEL))
+ return -ENOMEM;
-free_valid_mask:
- free_cpumask_var(valid_cpus);
+ ret = cpufreq_register_notifier(&init_amu_fie_notifier,
+ CPUFREQ_POLICY_NOTIFIER);
+ if (ret)
+ free_cpumask_var(amu_fie_cpus);
return ret;
}
-late_initcall_sync(init_amu_fie);
+core_initcall(init_amu_fie);
bool arch_freq_counters_available(const struct cpumask *cpus)
{
diff --git a/arch/arm64/kernel/traps.c b/arch/arm64/kernel/traps.c
index 6895ce777e7f..a05d34f0e82a 100644
--- a/arch/arm64/kernel/traps.c
+++ b/arch/arm64/kernel/traps.c
@@ -45,7 +45,7 @@
#include <asm/system_misc.h>
#include <asm/sysreg.h>
-static const char *handler[]= {
+static const char *handler[] = {
"Synchronous Abort",
"IRQ",
"FIQ",
diff --git a/arch/arm64/kernel/vdso/vdso.S b/arch/arm64/kernel/vdso-wrap.S
index c4b1990bf2be..c4b1990bf2be 100644
--- a/arch/arm64/kernel/vdso/vdso.S
+++ b/arch/arm64/kernel/vdso-wrap.S
diff --git a/arch/arm64/kernel/vdso/Makefile b/arch/arm64/kernel/vdso/Makefile
index cd9c3fa25902..945e6bb326e3 100644
--- a/arch/arm64/kernel/vdso/Makefile
+++ b/arch/arm64/kernel/vdso/Makefile
@@ -29,7 +29,8 @@ ldflags-y := -shared -nostdlib -soname=linux-vdso.so.1 --hash-style=sysv \
ccflags-y := -fno-common -fno-builtin -fno-stack-protector -ffixed-x18
ccflags-y += -DDISABLE_BRANCH_PROFILING -DBUILD_VDSO
-CFLAGS_REMOVE_vgettimeofday.o = $(CC_FLAGS_FTRACE) -Os $(CC_FLAGS_SCS) $(GCC_PLUGINS_CFLAGS)
+CFLAGS_REMOVE_vgettimeofday.o = $(CC_FLAGS_FTRACE) -Os $(CC_FLAGS_SCS) $(GCC_PLUGINS_CFLAGS) \
+ $(CC_FLAGS_LTO)
KASAN_SANITIZE := n
UBSAN_SANITIZE := n
OBJECT_FILES_NON_STANDARD := y
@@ -44,7 +45,6 @@ endif
# Disable gcov profiling for VDSO code
GCOV_PROFILE := n
-obj-y += vdso.o
targets += vdso.lds
CPPFLAGS_vdso.lds += -P -C -U$(ARCH)
diff --git a/arch/arm64/kernel/vdso/gen_vdso_offsets.sh b/arch/arm64/kernel/vdso/gen_vdso_offsets.sh
index 8b806eacd0a6..0387209d65b1 100755
--- a/arch/arm64/kernel/vdso/gen_vdso_offsets.sh
+++ b/arch/arm64/kernel/vdso/gen_vdso_offsets.sh
@@ -13,4 +13,4 @@
LC_ALL=C
sed -n -e 's/^00*/0/' -e \
-'s/^\([0-9a-fA-F]*\) . VDSO_\([a-zA-Z0-9_]*\)$/\#define vdso_offset_\2\t0x\1/p'
+'s/^\([0-9a-fA-F]*\) . VDSO_\([a-zA-Z0-9_]*\)$/\#define vdso_offset_\2 0x\1/p'
diff --git a/arch/arm64/kernel/vdso32/vdso.S b/arch/arm64/kernel/vdso32-wrap.S
index e72ac7bc4c04..e72ac7bc4c04 100644
--- a/arch/arm64/kernel/vdso32/vdso.S
+++ b/arch/arm64/kernel/vdso32-wrap.S
diff --git a/arch/arm64/kernel/vdso32/Makefile b/arch/arm64/kernel/vdso32/Makefile
index a1e0f91e6cea..789ad420f16b 100644
--- a/arch/arm64/kernel/vdso32/Makefile
+++ b/arch/arm64/kernel/vdso32/Makefile
@@ -155,7 +155,6 @@ c-obj-vdso-gettimeofday := $(addprefix $(obj)/, $(c-obj-vdso-gettimeofday))
asm-obj-vdso := $(addprefix $(obj)/, $(asm-obj-vdso))
obj-vdso := $(c-obj-vdso) $(c-obj-vdso-gettimeofday) $(asm-obj-vdso)
-obj-y += vdso.o
targets += vdso.lds
CPPFLAGS_vdso.lds += -P -C -U$(ARCH)
diff --git a/arch/arm64/kernel/vmlinux.lds.S b/arch/arm64/kernel/vmlinux.lds.S
index 4c0b0c89ad59..7eea7888bb02 100644
--- a/arch/arm64/kernel/vmlinux.lds.S
+++ b/arch/arm64/kernel/vmlinux.lds.S
@@ -31,10 +31,11 @@ jiffies = jiffies_64;
__stop___kvm_ex_table = .;
#define HYPERVISOR_DATA_SECTIONS \
- HYP_SECTION_NAME(.data..ro_after_init) : { \
- __hyp_data_ro_after_init_start = .; \
+ HYP_SECTION_NAME(.rodata) : { \
+ __hyp_rodata_start = .; \
*(HYP_SECTION_NAME(.data..ro_after_init)) \
- __hyp_data_ro_after_init_end = .; \
+ *(HYP_SECTION_NAME(.rodata)) \
+ __hyp_rodata_end = .; \
}
#define HYPERVISOR_PERCPU_SECTION \
@@ -42,10 +43,19 @@ jiffies = jiffies_64;
HYP_SECTION_NAME(.data..percpu) : { \
*(HYP_SECTION_NAME(.data..percpu)) \
}
+
+#define HYPERVISOR_RELOC_SECTION \
+ .hyp.reloc : ALIGN(4) { \
+ __hyp_reloc_begin = .; \
+ *(.hyp.reloc) \
+ __hyp_reloc_end = .; \
+ }
+
#else /* CONFIG_KVM */
#define HYPERVISOR_EXTABLE
#define HYPERVISOR_DATA_SECTIONS
#define HYPERVISOR_PERCPU_SECTION
+#define HYPERVISOR_RELOC_SECTION
#endif
#define HYPERVISOR_TEXT \
@@ -216,6 +226,8 @@ SECTIONS
PERCPU_SECTION(L1_CACHE_BYTES)
HYPERVISOR_PERCPU_SECTION
+ HYPERVISOR_RELOC_SECTION
+
.rela.dyn : ALIGN(8) {
*(.rela .rela*)
}
@@ -316,3 +328,11 @@ ASSERT((__entry_tramp_text_end - __entry_tramp_text_start) == PAGE_SIZE,
* If padding is applied before .head.text, virt<->phys conversions will fail.
*/
ASSERT(_text == KIMAGE_VADDR, "HEAD is misaligned")
+
+ASSERT(swapper_pg_dir - reserved_pg_dir == RESERVED_SWAPPER_OFFSET,
+ "RESERVED_SWAPPER_OFFSET is wrong!")
+
+#ifdef CONFIG_UNMAP_KERNEL_AT_EL0
+ASSERT(swapper_pg_dir - tramp_pg_dir == TRAMP_SWAPPER_OFFSET,
+ "TRAMP_SWAPPER_OFFSET is wrong!")
+#endif