aboutsummaryrefslogtreecommitdiffstats
path: root/arch/arm64/kvm/hyp/nvhe
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2022-05-26 14:20:14 -0700
committerLinus Torvalds <torvalds@linux-foundation.org>2022-05-26 14:20:14 -0700
commitbf9095424d027e942e1d1ee74977e17b7df8e455 (patch)
tree57659cf68b7df09005bc5ada4d315d66472cebf3 /arch/arm64/kvm/hyp/nvhe
parentMerge tag 'mm-stable-2022-05-25' of git://git.kernel.org/pub/scm/linux/kernel/git/akpm/mm (diff)
parentKVM: x86: Fix the intel_pt PMI handling wrongly considered from guest (diff)
downloadlinux-dev-bf9095424d027e942e1d1ee74977e17b7df8e455.tar.xz
linux-dev-bf9095424d027e942e1d1ee74977e17b7df8e455.zip
Merge tag 'for-linus' of git://git.kernel.org/pub/scm/virt/kvm/kvm
Pull kvm updates from Paolo Bonzini: "S390: - ultravisor communication device driver - fix TEID on terminating storage key ops RISC-V: - Added Sv57x4 support for G-stage page table - Added range based local HFENCE functions - Added remote HFENCE functions based on VCPU requests - Added ISA extension registers in ONE_REG interface - Updated KVM RISC-V maintainers entry to cover selftests support ARM: - Add support for the ARMv8.6 WFxT extension - Guard pages for the EL2 stacks - Trap and emulate AArch32 ID registers to hide unsupported features - Ability to select and save/restore the set of hypercalls exposed to the guest - Support for PSCI-initiated suspend in collaboration with userspace - GICv3 register-based LPI invalidation support - Move host PMU event merging into the vcpu data structure - GICv3 ITS save/restore fixes - The usual set of small-scale cleanups and fixes x86: - New ioctls to get/set TSC frequency for a whole VM - Allow userspace to opt out of hypercall patching - Only do MSR filtering for MSRs accessed by rdmsr/wrmsr AMD SEV improvements: - Add KVM_EXIT_SHUTDOWN metadata for SEV-ES - V_TSC_AUX support Nested virtualization improvements for AMD: - Support for "nested nested" optimizations (nested vVMLOAD/VMSAVE, nested vGIF) - Allow AVIC to co-exist with a nested guest running - Fixes for LBR virtualizations when a nested guest is running, and nested LBR virtualization support - PAUSE filtering for nested hypervisors Guest support: - Decoupling of vcpu_is_preempted from PV spinlocks" * tag 'for-linus' of git://git.kernel.org/pub/scm/virt/kvm/kvm: (199 commits) KVM: x86: Fix the intel_pt PMI handling wrongly considered from guest KVM: selftests: x86: Sync the new name of the test case to .gitignore Documentation: kvm: reorder ARM-specific section about KVM_SYSTEM_EVENT_SUSPEND x86, kvm: use correct GFP flags for preemption disabled KVM: LAPIC: Drop pending LAPIC timer injection when canceling the timer x86/kvm: Alloc dummy async #PF token outside of raw spinlock KVM: x86: avoid calling x86 emulator without a decoded instruction KVM: SVM: Use kzalloc for sev ioctl interfaces to prevent kernel data leak x86/fpu: KVM: Set the base guest FPU uABI size to sizeof(struct kvm_xsave) s390/uv_uapi: depend on CONFIG_S390 KVM: selftests: x86: Fix test failure on arch lbr capable platforms KVM: LAPIC: Trace LAPIC timer expiration on every vmentry KVM: s390: selftest: Test suppression indication on key prot exception KVM: s390: Don't indicate suppression on dirtying, failing memop selftests: drivers/s390x: Add uvdevice tests drivers/s390/char: Add Ultravisor io device MAINTAINERS: Update KVM RISC-V entry to cover selftests support RISC-V: KVM: Introduce ISA extension register RISC-V: KVM: Cleanup stale TLB entries when host CPU changes RISC-V: KVM: Add remote HFENCE functions based on VCPU requests ...
Diffstat (limited to 'arch/arm64/kvm/hyp/nvhe')
-rw-r--r--arch/arm64/kvm/hyp/nvhe/host.S32
-rw-r--r--arch/arm64/kvm/hyp/nvhe/hyp-main.c18
-rw-r--r--arch/arm64/kvm/hyp/nvhe/mm.c78
-rw-r--r--arch/arm64/kvm/hyp/nvhe/setup.c31
-rw-r--r--arch/arm64/kvm/hyp/nvhe/switch.c57
-rw-r--r--arch/arm64/kvm/hyp/nvhe/sys_regs.c3
6 files changed, 144 insertions, 75 deletions
diff --git a/arch/arm64/kvm/hyp/nvhe/host.S b/arch/arm64/kvm/hyp/nvhe/host.S
index 727c979b2b69..ea6a397b64a6 100644
--- a/arch/arm64/kvm/hyp/nvhe/host.S
+++ b/arch/arm64/kvm/hyp/nvhe/host.S
@@ -80,7 +80,7 @@ SYM_FUNC_START(__hyp_do_panic)
mov lr, #(PSR_F_BIT | PSR_I_BIT | PSR_A_BIT | PSR_D_BIT |\
PSR_MODE_EL1h)
msr spsr_el2, lr
- ldr lr, =nvhe_hyp_panic_handler
+ adr_l lr, nvhe_hyp_panic_handler
hyp_kimg_va lr, x6
msr elr_el2, lr
@@ -125,13 +125,11 @@ alternative_else_nop_endif
add sp, sp, #16
/*
* Compute the idmap address of __kvm_handle_stub_hvc and
- * jump there. Since we use kimage_voffset, do not use the
- * HYP VA for __kvm_handle_stub_hvc, but the kernel VA instead
- * (by loading it from the constant pool).
+ * jump there.
*
* Preserve x0-x4, which may contain stub parameters.
*/
- ldr x5, =__kvm_handle_stub_hvc
+ adr_l x5, __kvm_handle_stub_hvc
hyp_pa x5, x6
br x5
SYM_FUNC_END(__host_hvc)
@@ -153,6 +151,18 @@ SYM_FUNC_END(__host_hvc)
.macro invalid_host_el2_vect
.align 7
+
+ /*
+ * Test whether the SP has overflowed, without corrupting a GPR.
+ * nVHE hypervisor stacks are aligned so that the PAGE_SHIFT bit
+ * of SP should always be 1.
+ */
+ add sp, sp, x0 // sp' = sp + x0
+ sub x0, sp, x0 // x0' = sp' - x0 = (sp + x0) - x0 = sp
+ tbz x0, #PAGE_SHIFT, .L__hyp_sp_overflow\@
+ sub x0, sp, x0 // x0'' = sp' - x0' = (sp + x0) - sp = x0
+ sub sp, sp, x0 // sp'' = sp' - x0 = (sp + x0) - x0 = sp
+
/* If a guest is loaded, panic out of it. */
stp x0, x1, [sp, #-16]!
get_loaded_vcpu x0, x1
@@ -165,6 +175,18 @@ SYM_FUNC_END(__host_hvc)
* been partially clobbered by __host_enter.
*/
b hyp_panic
+
+.L__hyp_sp_overflow\@:
+ /*
+ * Reset SP to the top of the stack, to allow handling the hyp_panic.
+ * This corrupts the stack but is ok, since we won't be attempting
+ * any unwinding here.
+ */
+ ldr_this_cpu x0, kvm_init_params + NVHE_INIT_STACK_HYP_VA, x1
+ mov sp, x0
+
+ b hyp_panic_bad_stack
+ ASM_BUG()
.endm
.macro invalid_host_el1_vect
diff --git a/arch/arm64/kvm/hyp/nvhe/hyp-main.c b/arch/arm64/kvm/hyp/nvhe/hyp-main.c
index 5e2197db0d32..3cea4b6ac23e 100644
--- a/arch/arm64/kvm/hyp/nvhe/hyp-main.c
+++ b/arch/arm64/kvm/hyp/nvhe/hyp-main.c
@@ -160,7 +160,23 @@ static void handle___pkvm_create_private_mapping(struct kvm_cpu_context *host_ct
DECLARE_REG(size_t, size, host_ctxt, 2);
DECLARE_REG(enum kvm_pgtable_prot, prot, host_ctxt, 3);
- cpu_reg(host_ctxt, 1) = __pkvm_create_private_mapping(phys, size, prot);
+ /*
+ * __pkvm_create_private_mapping() populates a pointer with the
+ * hypervisor start address of the allocation.
+ *
+ * However, handle___pkvm_create_private_mapping() hypercall crosses the
+ * EL1/EL2 boundary so the pointer would not be valid in this context.
+ *
+ * Instead pass the allocation address as the return value (or return
+ * ERR_PTR() on failure).
+ */
+ unsigned long haddr;
+ int err = __pkvm_create_private_mapping(phys, size, prot, &haddr);
+
+ if (err)
+ haddr = (unsigned long)ERR_PTR(err);
+
+ cpu_reg(host_ctxt, 1) = haddr;
}
static void handle___pkvm_prot_finalize(struct kvm_cpu_context *host_ctxt)
diff --git a/arch/arm64/kvm/hyp/nvhe/mm.c b/arch/arm64/kvm/hyp/nvhe/mm.c
index cdbe8e246418..96193cb31a39 100644
--- a/arch/arm64/kvm/hyp/nvhe/mm.c
+++ b/arch/arm64/kvm/hyp/nvhe/mm.c
@@ -37,36 +37,60 @@ static int __pkvm_create_mappings(unsigned long start, unsigned long size,
return err;
}
-unsigned long __pkvm_create_private_mapping(phys_addr_t phys, size_t size,
- enum kvm_pgtable_prot prot)
+/**
+ * pkvm_alloc_private_va_range - Allocates a private VA range.
+ * @size: The size of the VA range to reserve.
+ * @haddr: The hypervisor virtual start address of the allocation.
+ *
+ * The private virtual address (VA) range is allocated above __io_map_base
+ * and aligned based on the order of @size.
+ *
+ * Return: 0 on success or negative error code on failure.
+ */
+int pkvm_alloc_private_va_range(size_t size, unsigned long *haddr)
{
- unsigned long addr;
- int err;
+ unsigned long base, addr;
+ int ret = 0;
hyp_spin_lock(&pkvm_pgd_lock);
- size = PAGE_ALIGN(size + offset_in_page(phys));
- addr = __io_map_base;
- __io_map_base += size;
+ /* Align the allocation based on the order of its size */
+ addr = ALIGN(__io_map_base, PAGE_SIZE << get_order(size));
- /* Are we overflowing on the vmemmap ? */
- if (__io_map_base > __hyp_vmemmap) {
- __io_map_base -= size;
- addr = (unsigned long)ERR_PTR(-ENOMEM);
- goto out;
- }
+ /* The allocated size is always a multiple of PAGE_SIZE */
+ base = addr + PAGE_ALIGN(size);
- err = kvm_pgtable_hyp_map(&pkvm_pgtable, addr, size, phys, prot);
- if (err) {
- addr = (unsigned long)ERR_PTR(err);
- goto out;
+ /* Are we overflowing on the vmemmap ? */
+ if (!addr || base > __hyp_vmemmap)
+ ret = -ENOMEM;
+ else {
+ __io_map_base = base;
+ *haddr = addr;
}
- addr = addr + offset_in_page(phys);
-out:
hyp_spin_unlock(&pkvm_pgd_lock);
- return addr;
+ return ret;
+}
+
+int __pkvm_create_private_mapping(phys_addr_t phys, size_t size,
+ enum kvm_pgtable_prot prot,
+ unsigned long *haddr)
+{
+ unsigned long addr;
+ int err;
+
+ size = PAGE_ALIGN(size + offset_in_page(phys));
+ err = pkvm_alloc_private_va_range(size, &addr);
+ if (err)
+ return err;
+
+ err = __pkvm_create_mappings(addr, size, phys, prot);
+ if (err)
+ return err;
+
+ *haddr = addr + offset_in_page(phys);
+ return err;
}
int pkvm_create_mappings_locked(void *from, void *to, enum kvm_pgtable_prot prot)
@@ -146,7 +170,8 @@ int pkvm_cpu_set_vector(enum arm64_hyp_spectre_vector slot)
int hyp_map_vectors(void)
{
phys_addr_t phys;
- void *bp_base;
+ unsigned long bp_base;
+ int ret;
if (!kvm_system_needs_idmapped_vectors()) {
__hyp_bp_vect_base = __bp_harden_hyp_vecs;
@@ -154,13 +179,12 @@ int hyp_map_vectors(void)
}
phys = __hyp_pa(__bp_harden_hyp_vecs);
- bp_base = (void *)__pkvm_create_private_mapping(phys,
- __BP_HARDEN_HYP_VECS_SZ,
- PAGE_HYP_EXEC);
- if (IS_ERR_OR_NULL(bp_base))
- return PTR_ERR(bp_base);
+ ret = __pkvm_create_private_mapping(phys, __BP_HARDEN_HYP_VECS_SZ,
+ PAGE_HYP_EXEC, &bp_base);
+ if (ret)
+ return ret;
- __hyp_bp_vect_base = bp_base;
+ __hyp_bp_vect_base = (void *)bp_base;
return 0;
}
diff --git a/arch/arm64/kvm/hyp/nvhe/setup.c b/arch/arm64/kvm/hyp/nvhe/setup.c
index 27af337f9fea..e8d4ea2fcfa0 100644
--- a/arch/arm64/kvm/hyp/nvhe/setup.c
+++ b/arch/arm64/kvm/hyp/nvhe/setup.c
@@ -99,17 +99,42 @@ static int recreate_hyp_mappings(phys_addr_t phys, unsigned long size,
return ret;
for (i = 0; i < hyp_nr_cpus; i++) {
+ struct kvm_nvhe_init_params *params = per_cpu_ptr(&kvm_init_params, i);
+ unsigned long hyp_addr;
+
start = (void *)kern_hyp_va(per_cpu_base[i]);
end = start + PAGE_ALIGN(hyp_percpu_size);
ret = pkvm_create_mappings(start, end, PAGE_HYP);
if (ret)
return ret;
- end = (void *)per_cpu_ptr(&kvm_init_params, i)->stack_hyp_va;
- start = end - PAGE_SIZE;
- ret = pkvm_create_mappings(start, end, PAGE_HYP);
+ /*
+ * Allocate a contiguous HYP private VA range for the stack
+ * and guard page. The allocation is also aligned based on
+ * the order of its size.
+ */
+ ret = pkvm_alloc_private_va_range(PAGE_SIZE * 2, &hyp_addr);
+ if (ret)
+ return ret;
+
+ /*
+ * Since the stack grows downwards, map the stack to the page
+ * at the higher address and leave the lower guard page
+ * unbacked.
+ *
+ * Any valid stack address now has the PAGE_SHIFT bit as 1
+ * and addresses corresponding to the guard page have the
+ * PAGE_SHIFT bit as 0 - this is used for overflow detection.
+ */
+ hyp_spin_lock(&pkvm_pgd_lock);
+ ret = kvm_pgtable_hyp_map(&pkvm_pgtable, hyp_addr + PAGE_SIZE,
+ PAGE_SIZE, params->stack_pa, PAGE_HYP);
+ hyp_spin_unlock(&pkvm_pgd_lock);
if (ret)
return ret;
+
+ /* Update stack_hyp_va to end of the stack's private VA range */
+ params->stack_hyp_va = hyp_addr + (2 * PAGE_SIZE);
}
/*
diff --git a/arch/arm64/kvm/hyp/nvhe/switch.c b/arch/arm64/kvm/hyp/nvhe/switch.c
index caace61ea459..6db801db8f27 100644
--- a/arch/arm64/kvm/hyp/nvhe/switch.c
+++ b/arch/arm64/kvm/hyp/nvhe/switch.c
@@ -150,16 +150,13 @@ static void __hyp_vgic_restore_state(struct kvm_vcpu *vcpu)
}
}
-/**
+/*
* Disable host events, enable guest events
*/
-static bool __pmu_switch_to_guest(struct kvm_cpu_context *host_ctxt)
+#ifdef CONFIG_HW_PERF_EVENTS
+static bool __pmu_switch_to_guest(struct kvm_vcpu *vcpu)
{
- struct kvm_host_data *host;
- struct kvm_pmu_events *pmu;
-
- host = container_of(host_ctxt, struct kvm_host_data, host_ctxt);
- pmu = &host->pmu_events;
+ struct kvm_pmu_events *pmu = &vcpu->arch.pmu.events;
if (pmu->events_host)
write_sysreg(pmu->events_host, pmcntenclr_el0);
@@ -170,16 +167,12 @@ static bool __pmu_switch_to_guest(struct kvm_cpu_context *host_ctxt)
return (pmu->events_host || pmu->events_guest);
}
-/**
+/*
* Disable guest events, enable host events
*/
-static void __pmu_switch_to_host(struct kvm_cpu_context *host_ctxt)
+static void __pmu_switch_to_host(struct kvm_vcpu *vcpu)
{
- struct kvm_host_data *host;
- struct kvm_pmu_events *pmu;
-
- host = container_of(host_ctxt, struct kvm_host_data, host_ctxt);
- pmu = &host->pmu_events;
+ struct kvm_pmu_events *pmu = &vcpu->arch.pmu.events;
if (pmu->events_guest)
write_sysreg(pmu->events_guest, pmcntenclr_el0);
@@ -187,8 +180,12 @@ static void __pmu_switch_to_host(struct kvm_cpu_context *host_ctxt)
if (pmu->events_host)
write_sysreg(pmu->events_host, pmcntenset_el0);
}
+#else
+#define __pmu_switch_to_guest(v) ({ false; })
+#define __pmu_switch_to_host(v) do {} while (0)
+#endif
-/**
+/*
* Handler for protected VM MSR, MRS or System instruction execution in AArch64.
*
* Returns true if the hypervisor has handled the exit, and control should go
@@ -205,23 +202,6 @@ static bool kvm_handle_pvm_sys64(struct kvm_vcpu *vcpu, u64 *exit_code)
kvm_handle_pvm_sysreg(vcpu, exit_code));
}
-/**
- * Handler for protected floating-point and Advanced SIMD accesses.
- *
- * Returns true if the hypervisor has handled the exit, and control should go
- * back to the guest, or false if it hasn't.
- */
-static bool kvm_handle_pvm_fpsimd(struct kvm_vcpu *vcpu, u64 *exit_code)
-{
- /* Linux guests assume support for floating-point and Advanced SIMD. */
- BUILD_BUG_ON(!FIELD_GET(ARM64_FEATURE_MASK(ID_AA64PFR0_FP),
- PVM_ID_AA64PFR0_ALLOW));
- BUILD_BUG_ON(!FIELD_GET(ARM64_FEATURE_MASK(ID_AA64PFR0_ASIMD),
- PVM_ID_AA64PFR0_ALLOW));
-
- return kvm_hyp_handle_fpsimd(vcpu, exit_code);
-}
-
static const exit_handler_fn hyp_exit_handlers[] = {
[0 ... ESR_ELx_EC_MAX] = NULL,
[ESR_ELx_EC_CP15_32] = kvm_hyp_handle_cp15_32,
@@ -237,7 +217,7 @@ static const exit_handler_fn pvm_exit_handlers[] = {
[0 ... ESR_ELx_EC_MAX] = NULL,
[ESR_ELx_EC_SYS64] = kvm_handle_pvm_sys64,
[ESR_ELx_EC_SVE] = kvm_handle_pvm_restricted,
- [ESR_ELx_EC_FP_ASIMD] = kvm_handle_pvm_fpsimd,
+ [ESR_ELx_EC_FP_ASIMD] = kvm_hyp_handle_fpsimd,
[ESR_ELx_EC_IABT_LOW] = kvm_hyp_handle_iabt_low,
[ESR_ELx_EC_DABT_LOW] = kvm_hyp_handle_dabt_low,
[ESR_ELx_EC_PAC] = kvm_hyp_handle_ptrauth,
@@ -304,7 +284,7 @@ int __kvm_vcpu_run(struct kvm_vcpu *vcpu)
host_ctxt->__hyp_running_vcpu = vcpu;
guest_ctxt = &vcpu->arch.ctxt;
- pmu_switch_needed = __pmu_switch_to_guest(host_ctxt);
+ pmu_switch_needed = __pmu_switch_to_guest(vcpu);
__sysreg_save_state_nvhe(host_ctxt);
/*
@@ -366,7 +346,7 @@ int __kvm_vcpu_run(struct kvm_vcpu *vcpu)
__debug_restore_host_buffers_nvhe(vcpu);
if (pmu_switch_needed)
- __pmu_switch_to_host(host_ctxt);
+ __pmu_switch_to_host(vcpu);
/* Returning to host will clear PSR.I, remask PMR if needed */
if (system_uses_irq_prio_masking())
@@ -377,7 +357,7 @@ int __kvm_vcpu_run(struct kvm_vcpu *vcpu)
return exit_code;
}
-void __noreturn hyp_panic(void)
+asmlinkage void __noreturn hyp_panic(void)
{
u64 spsr = read_sysreg_el2(SYS_SPSR);
u64 elr = read_sysreg_el2(SYS_ELR);
@@ -399,6 +379,11 @@ void __noreturn hyp_panic(void)
unreachable();
}
+asmlinkage void __noreturn hyp_panic_bad_stack(void)
+{
+ hyp_panic();
+}
+
asmlinkage void kvm_unexpected_el2_exception(void)
{
return __kvm_unexpected_el2_exception();
diff --git a/arch/arm64/kvm/hyp/nvhe/sys_regs.c b/arch/arm64/kvm/hyp/nvhe/sys_regs.c
index 619f94fc95fa..b6d86e423319 100644
--- a/arch/arm64/kvm/hyp/nvhe/sys_regs.c
+++ b/arch/arm64/kvm/hyp/nvhe/sys_regs.c
@@ -90,9 +90,6 @@ static u64 get_pvm_id_aa64pfr0(const struct kvm_vcpu *vcpu)
u64 set_mask = 0;
u64 allow_mask = PVM_ID_AA64PFR0_ALLOW;
- if (!vcpu_has_sve(vcpu))
- allow_mask &= ~ARM64_FEATURE_MASK(ID_AA64PFR0_SVE);
-
set_mask |= get_restricted_features_unsigned(id_aa64pfr0_el1_sys_val,
PVM_ID_AA64PFR0_RESTRICT_UNSIGNED);