aboutsummaryrefslogtreecommitdiffstats
path: root/arch/arm64/kvm
diff options
context:
space:
mode:
Diffstat (limited to 'arch/arm64/kvm')
-rw-r--r--arch/arm64/kvm/.gitignore2
-rw-r--r--arch/arm64/kvm/Kconfig2
-rw-r--r--arch/arm64/kvm/Makefile24
-rw-r--r--arch/arm64/kvm/arch_timer.c13
-rw-r--r--arch/arm64/kvm/arm.c140
-rw-r--r--arch/arm64/kvm/fpsimd.c79
-rw-r--r--arch/arm64/kvm/handle_exit.c10
-rw-r--r--arch/arm64/kvm/hyp/Makefile2
-rw-r--r--arch/arm64/kvm/hyp/exception.c5
-rw-r--r--arch/arm64/kvm/hyp/fpsimd.S6
-rw-r--r--arch/arm64/kvm/hyp/hyp-constants.c10
-rw-r--r--arch/arm64/kvm/hyp/include/hyp/switch.h44
-rw-r--r--arch/arm64/kvm/hyp/include/hyp/sysreg-sr.h7
-rw-r--r--arch/arm64/kvm/hyp/include/nvhe/mem_protect.h6
-rw-r--r--arch/arm64/kvm/hyp/include/nvhe/mm.h59
-rw-r--r--arch/arm64/kvm/hyp/nvhe/Makefile1
-rw-r--r--arch/arm64/kvm/hyp/nvhe/early_alloc.c5
-rw-r--r--arch/arm64/kvm/hyp/nvhe/hyp-main.c8
-rw-r--r--arch/arm64/kvm/hyp/nvhe/mem_protect.c505
-rw-r--r--arch/arm64/kvm/hyp/nvhe/mm.c4
-rw-r--r--arch/arm64/kvm/hyp/nvhe/page_alloc.c2
-rw-r--r--arch/arm64/kvm/hyp/nvhe/setup.c25
-rw-r--r--arch/arm64/kvm/hyp/nvhe/switch.c9
-rw-r--r--arch/arm64/kvm/hyp/pgtable.c126
-rw-r--r--arch/arm64/kvm/hyp/vgic-v3-sr.c3
-rw-r--r--arch/arm64/kvm/hyp/vhe/switch.c5
-rw-r--r--arch/arm64/kvm/mmu.c177
-rw-r--r--arch/arm64/kvm/perf.c59
-rw-r--r--arch/arm64/kvm/pkvm.c (renamed from arch/arm64/kvm/hyp/reserved_mem.c)8
-rw-r--r--arch/arm64/kvm/pmu-emul.c5
-rw-r--r--arch/arm64/kvm/psci.c10
-rw-r--r--arch/arm64/kvm/reset.c38
-rw-r--r--arch/arm64/kvm/sys_regs.c2
-rw-r--r--arch/arm64/kvm/vgic/vgic-init.c12
-rw-r--r--arch/arm64/kvm/vgic/vgic-kvm-device.c2
-rw-r--r--arch/arm64/kvm/vgic/vgic-mmio-v2.c3
-rw-r--r--arch/arm64/kvm/vgic/vgic-mmio-v3.c15
-rw-r--r--arch/arm64/kvm/vgic/vgic-mmio.c2
-rw-r--r--arch/arm64/kvm/vgic/vgic-mmio.h2
-rw-r--r--arch/arm64/kvm/vgic/vgic-v2.c9
-rw-r--r--arch/arm64/kvm/vgic/vgic-v3.c27
-rw-r--r--arch/arm64/kvm/vgic/vgic-v4.c5
-rw-r--r--arch/arm64/kvm/vgic/vgic.c2
43 files changed, 1011 insertions, 469 deletions
diff --git a/arch/arm64/kvm/.gitignore b/arch/arm64/kvm/.gitignore
new file mode 100644
index 000000000000..6182aefb8302
--- /dev/null
+++ b/arch/arm64/kvm/.gitignore
@@ -0,0 +1,2 @@
+# SPDX-License-Identifier: GPL-2.0-only
+hyp_constants.h
diff --git a/arch/arm64/kvm/Kconfig b/arch/arm64/kvm/Kconfig
index 8ffcbe29395e..8a5fbbf084df 100644
--- a/arch/arm64/kvm/Kconfig
+++ b/arch/arm64/kvm/Kconfig
@@ -39,6 +39,8 @@ menuconfig KVM
select HAVE_KVM_IRQ_BYPASS
select HAVE_KVM_VCPU_RUN_PID_CHANGE
select SCHED_INFO
+ select GUEST_PERF_EVENTS if PERF_EVENTS
+ select INTERVAL_TREE
help
Support hosting virtualized guest machines.
diff --git a/arch/arm64/kvm/Makefile b/arch/arm64/kvm/Makefile
index 989bb5dad2c8..91861fd8b897 100644
--- a/arch/arm64/kvm/Makefile
+++ b/arch/arm64/kvm/Makefile
@@ -5,17 +5,15 @@
ccflags-y += -I $(srctree)/$(src)
-KVM=../../../virt/kvm
+include $(srctree)/virt/kvm/Makefile.kvm
obj-$(CONFIG_KVM) += kvm.o
obj-$(CONFIG_KVM) += hyp/
-kvm-y := $(KVM)/kvm_main.o $(KVM)/coalesced_mmio.o $(KVM)/eventfd.o \
- $(KVM)/vfio.o $(KVM)/irqchip.o $(KVM)/binary_stats.o \
- arm.o mmu.o mmio.o psci.o perf.o hypercalls.o pvtime.o \
+kvm-y += arm.o mmu.o mmio.o psci.o hypercalls.o pvtime.o \
inject_fault.o va_layout.o handle_exit.o \
guest.o debug.o reset.o sys_regs.o \
- vgic-sys-reg-v3.o fpsimd.o pmu.o \
+ vgic-sys-reg-v3.o fpsimd.o pmu.o pkvm.o \
arch_timer.o trng.o\
vgic/vgic.o vgic/vgic-init.o \
vgic/vgic-irqfd.o vgic/vgic-v2.o \
@@ -25,3 +23,19 @@ kvm-y := $(KVM)/kvm_main.o $(KVM)/coalesced_mmio.o $(KVM)/eventfd.o \
vgic/vgic-its.o vgic/vgic-debug.o
kvm-$(CONFIG_HW_PERF_EVENTS) += pmu-emul.o
+
+always-y := hyp_constants.h hyp-constants.s
+
+define rule_gen_hyp_constants
+ $(call filechk,offsets,__HYP_CONSTANTS_H__)
+endef
+
+CFLAGS_hyp-constants.o = -I $(srctree)/$(src)/hyp/include
+$(obj)/hyp-constants.s: $(src)/hyp/hyp-constants.c FORCE
+ $(call if_changed_dep,cc_s_c)
+
+$(obj)/hyp_constants.h: $(obj)/hyp-constants.s FORCE
+ $(call if_changed_rule,gen_hyp_constants)
+
+obj-kvm := $(addprefix $(obj)/, $(kvm-y))
+$(obj-kvm): $(obj)/hyp_constants.h
diff --git a/arch/arm64/kvm/arch_timer.c b/arch/arm64/kvm/arch_timer.c
index 3df67c127489..6e542e2eae32 100644
--- a/arch/arm64/kvm/arch_timer.c
+++ b/arch/arm64/kvm/arch_timer.c
@@ -467,7 +467,7 @@ out:
}
/*
- * Schedule the background timer before calling kvm_vcpu_block, so that this
+ * Schedule the background timer before calling kvm_vcpu_halt, so that this
* thread is removed from its waitqueue and made runnable when there's a timer
* interrupt to handle.
*/
@@ -649,7 +649,6 @@ void kvm_timer_vcpu_put(struct kvm_vcpu *vcpu)
{
struct arch_timer_cpu *timer = vcpu_timer(vcpu);
struct timer_map map;
- struct rcuwait *wait = kvm_arch_vcpu_get_wait(vcpu);
if (unlikely(!timer->enabled))
return;
@@ -672,7 +671,7 @@ void kvm_timer_vcpu_put(struct kvm_vcpu *vcpu)
if (map.emul_ptimer)
soft_timer_cancel(&map.emul_ptimer->hrtimer);
- if (rcuwait_active(wait))
+ if (kvm_vcpu_is_blocking(vcpu))
kvm_timer_blocking(vcpu);
/*
@@ -750,7 +749,7 @@ int kvm_timer_vcpu_reset(struct kvm_vcpu *vcpu)
/* Make the updates of cntvoff for all vtimer contexts atomic */
static void update_vtimer_cntvoff(struct kvm_vcpu *vcpu, u64 cntvoff)
{
- int i;
+ unsigned long i;
struct kvm *kvm = vcpu->kvm;
struct kvm_vcpu *tmp;
@@ -1189,8 +1188,8 @@ void kvm_timer_vcpu_terminate(struct kvm_vcpu *vcpu)
static bool timer_irqs_are_valid(struct kvm_vcpu *vcpu)
{
- int vtimer_irq, ptimer_irq;
- int i, ret;
+ int vtimer_irq, ptimer_irq, ret;
+ unsigned long i;
vtimer_irq = vcpu_vtimer(vcpu)->irq.irq;
ret = kvm_vgic_set_owner(vcpu, vtimer_irq, vcpu_vtimer(vcpu));
@@ -1297,7 +1296,7 @@ void kvm_timer_init_vhe(void)
static void set_timer_irqs(struct kvm *kvm, int vtimer_irq, int ptimer_irq)
{
struct kvm_vcpu *vcpu;
- int i;
+ unsigned long i;
kvm_for_each_vcpu(i, vcpu, kvm) {
vcpu_vtimer(vcpu)->irq.irq = vtimer_irq;
diff --git a/arch/arm64/kvm/arm.c b/arch/arm64/kvm/arm.c
index e4727dc771bf..a4a0063df456 100644
--- a/arch/arm64/kvm/arm.c
+++ b/arch/arm64/kvm/arm.c
@@ -146,7 +146,7 @@ int kvm_arch_init_vm(struct kvm *kvm, unsigned long type)
if (ret)
return ret;
- ret = create_hyp_mappings(kvm, kvm + 1, PAGE_HYP);
+ ret = kvm_share_hyp(kvm, kvm + 1);
if (ret)
goto out_free_stage2_pgd;
@@ -175,19 +175,13 @@ vm_fault_t kvm_arch_vcpu_fault(struct kvm_vcpu *vcpu, struct vm_fault *vmf)
*/
void kvm_arch_destroy_vm(struct kvm *kvm)
{
- int i;
-
bitmap_free(kvm->arch.pmu_filter);
kvm_vgic_destroy(kvm);
- for (i = 0; i < KVM_MAX_VCPUS; ++i) {
- if (kvm->vcpus[i]) {
- kvm_vcpu_destroy(kvm->vcpus[i]);
- kvm->vcpus[i] = NULL;
- }
- }
- atomic_set(&kvm->online_vcpus, 0);
+ kvm_destroy_vcpus(kvm);
+
+ kvm_unshare_hyp(kvm, kvm + 1);
}
int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext)
@@ -342,7 +336,7 @@ int kvm_arch_vcpu_create(struct kvm_vcpu *vcpu)
if (err)
return err;
- return create_hyp_mappings(vcpu, vcpu + 1, PAGE_HYP);
+ return kvm_share_hyp(vcpu, vcpu + 1);
}
void kvm_arch_vcpu_postcreate(struct kvm_vcpu *vcpu)
@@ -351,7 +345,7 @@ void kvm_arch_vcpu_postcreate(struct kvm_vcpu *vcpu)
void kvm_arch_vcpu_destroy(struct kvm_vcpu *vcpu)
{
- if (vcpu->arch.has_run_once && unlikely(!irqchip_in_kernel(vcpu->kvm)))
+ if (vcpu_has_run_once(vcpu) && unlikely(!irqchip_in_kernel(vcpu->kvm)))
static_branch_dec(&userspace_irqchip_in_use);
kvm_mmu_free_memory_cache(&vcpu->arch.mmu_page_cache);
@@ -368,27 +362,12 @@ int kvm_cpu_has_pending_timer(struct kvm_vcpu *vcpu)
void kvm_arch_vcpu_blocking(struct kvm_vcpu *vcpu)
{
- /*
- * If we're about to block (most likely because we've just hit a
- * WFI), we need to sync back the state of the GIC CPU interface
- * so that we have the latest PMR and group enables. This ensures
- * that kvm_arch_vcpu_runnable has up-to-date data to decide
- * whether we have pending interrupts.
- *
- * For the same reason, we want to tell GICv4 that we need
- * doorbells to be signalled, should an interrupt become pending.
- */
- preempt_disable();
- kvm_vgic_vmcr_sync(vcpu);
- vgic_v4_put(vcpu, true);
- preempt_enable();
+
}
void kvm_arch_vcpu_unblocking(struct kvm_vcpu *vcpu)
{
- preempt_disable();
- vgic_v4_load(vcpu);
- preempt_enable();
+
}
void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
@@ -503,6 +482,13 @@ bool kvm_arch_vcpu_in_kernel(struct kvm_vcpu *vcpu)
return vcpu_mode_priv(vcpu);
}
+#ifdef CONFIG_GUEST_PERF_EVENTS
+unsigned long kvm_arch_vcpu_get_ip(struct kvm_vcpu *vcpu)
+{
+ return *vcpu_pc(vcpu);
+}
+#endif
+
/* Just ensure a guest exit from a particular CPU */
static void exit_vm_noop(void *info)
{
@@ -584,18 +570,33 @@ static void update_vmid(struct kvm_vmid *vmid)
spin_unlock(&kvm_vmid_lock);
}
-static int kvm_vcpu_first_run_init(struct kvm_vcpu *vcpu)
+static int kvm_vcpu_initialized(struct kvm_vcpu *vcpu)
+{
+ return vcpu->arch.target >= 0;
+}
+
+/*
+ * Handle both the initialisation that is being done when the vcpu is
+ * run for the first time, as well as the updates that must be
+ * performed each time we get a new thread dealing with this vcpu.
+ */
+int kvm_arch_vcpu_run_pid_change(struct kvm_vcpu *vcpu)
{
struct kvm *kvm = vcpu->kvm;
- int ret = 0;
+ int ret;
- if (likely(vcpu->arch.has_run_once))
- return 0;
+ if (!kvm_vcpu_initialized(vcpu))
+ return -ENOEXEC;
if (!kvm_arm_vcpu_is_finalized(vcpu))
return -EPERM;
- vcpu->arch.has_run_once = true;
+ ret = kvm_arch_vcpu_run_map_fp(vcpu);
+ if (ret)
+ return ret;
+
+ if (likely(vcpu_has_run_once(vcpu)))
+ return 0;
kvm_arm_vcpu_init_debug(vcpu);
@@ -607,12 +608,6 @@ static int kvm_vcpu_first_run_init(struct kvm_vcpu *vcpu)
ret = kvm_vgic_map_resources(kvm);
if (ret)
return ret;
- } else {
- /*
- * Tell the rest of the code that there are userspace irqchip
- * VMs in the wild.
- */
- static_branch_inc(&userspace_irqchip_in_use);
}
ret = kvm_timer_enable(vcpu);
@@ -620,6 +615,16 @@ static int kvm_vcpu_first_run_init(struct kvm_vcpu *vcpu)
return ret;
ret = kvm_arm_pmu_v3_enable(vcpu);
+ if (ret)
+ return ret;
+
+ if (!irqchip_in_kernel(kvm)) {
+ /*
+ * Tell the rest of the code that there are userspace irqchip
+ * VMs in the wild.
+ */
+ static_branch_inc(&userspace_irqchip_in_use);
+ }
/*
* Initialize traps for protected VMs.
@@ -639,7 +644,7 @@ bool kvm_arch_intc_initialized(struct kvm *kvm)
void kvm_arm_halt_guest(struct kvm *kvm)
{
- int i;
+ unsigned long i;
struct kvm_vcpu *vcpu;
kvm_for_each_vcpu(i, vcpu, kvm)
@@ -649,12 +654,12 @@ void kvm_arm_halt_guest(struct kvm *kvm)
void kvm_arm_resume_guest(struct kvm *kvm)
{
- int i;
+ unsigned long i;
struct kvm_vcpu *vcpu;
kvm_for_each_vcpu(i, vcpu, kvm) {
vcpu->arch.pause = false;
- rcuwait_wake_up(kvm_arch_vcpu_get_wait(vcpu));
+ __kvm_vcpu_wake_up(vcpu);
}
}
@@ -679,9 +684,37 @@ static void vcpu_req_sleep(struct kvm_vcpu *vcpu)
smp_rmb();
}
-static int kvm_vcpu_initialized(struct kvm_vcpu *vcpu)
+/**
+ * kvm_vcpu_wfi - emulate Wait-For-Interrupt behavior
+ * @vcpu: The VCPU pointer
+ *
+ * Suspend execution of a vCPU until a valid wake event is detected, i.e. until
+ * the vCPU is runnable. The vCPU may or may not be scheduled out, depending
+ * on when a wake event arrives, e.g. there may already be a pending wake event.
+ */
+void kvm_vcpu_wfi(struct kvm_vcpu *vcpu)
{
- return vcpu->arch.target >= 0;
+ /*
+ * Sync back the state of the GIC CPU interface so that we have
+ * the latest PMR and group enables. This ensures that
+ * kvm_arch_vcpu_runnable has up-to-date data to decide whether
+ * we have pending interrupts, e.g. when determining if the
+ * vCPU should block.
+ *
+ * For the same reason, we want to tell GICv4 that we need
+ * doorbells to be signalled, should an interrupt become pending.
+ */
+ preempt_disable();
+ kvm_vgic_vmcr_sync(vcpu);
+ vgic_v4_put(vcpu, true);
+ preempt_enable();
+
+ kvm_vcpu_halt(vcpu);
+ kvm_clear_request(KVM_REQ_UNHALT, vcpu);
+
+ preempt_disable();
+ vgic_v4_load(vcpu);
+ preempt_enable();
}
static void check_vcpu_requests(struct kvm_vcpu *vcpu)
@@ -779,13 +812,6 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu)
struct kvm_run *run = vcpu->run;
int ret;
- if (unlikely(!kvm_vcpu_initialized(vcpu)))
- return -ENOEXEC;
-
- ret = kvm_vcpu_first_run_init(vcpu);
- if (ret)
- return ret;
-
if (run->exit_reason == KVM_EXIT_MMIO) {
ret = kvm_handle_mmio_return(vcpu);
if (ret)
@@ -849,6 +875,7 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu)
}
kvm_arm_setup_debug(vcpu);
+ kvm_arch_vcpu_ctxflush_fp(vcpu);
/**************************************************************
* Enter the guest
@@ -1123,7 +1150,7 @@ static int kvm_arch_vcpu_ioctl_vcpu_init(struct kvm_vcpu *vcpu,
* need to invalidate the I-cache though, as FWB does *not*
* imply CTR_EL0.DIC.
*/
- if (vcpu->arch.has_run_once) {
+ if (vcpu_has_run_once(vcpu)) {
if (!cpus_have_final_cap(ARM64_HAS_STAGE2_FWB))
stage2_unmap_vm(vcpu->kvm);
else
@@ -1775,7 +1802,8 @@ static int init_subsystems(void)
if (err)
goto out;
- kvm_perf_init();
+ kvm_register_perf_callbacks(NULL);
+
kvm_sys_reg_table_init();
out:
@@ -2035,7 +2063,7 @@ static int finalize_hyp_mode(void)
struct kvm_vcpu *kvm_mpidr_to_vcpu(struct kvm *kvm, unsigned long mpidr)
{
struct kvm_vcpu *vcpu;
- int i;
+ unsigned long i;
mpidr &= MPIDR_HWID_BITMASK;
kvm_for_each_vcpu(i, vcpu, kvm) {
@@ -2163,7 +2191,7 @@ out_err:
/* NOP: Compiling as a module not supported */
void kvm_arch_exit(void)
{
- kvm_perf_teardown();
+ kvm_unregister_perf_callbacks();
}
static int __init early_kvm_mode_cfg(char *arg)
diff --git a/arch/arm64/kvm/fpsimd.c b/arch/arm64/kvm/fpsimd.c
index 5621020b28de..2f48fd362a8c 100644
--- a/arch/arm64/kvm/fpsimd.c
+++ b/arch/arm64/kvm/fpsimd.c
@@ -7,7 +7,6 @@
*/
#include <linux/irqflags.h>
#include <linux/sched.h>
-#include <linux/thread_info.h>
#include <linux/kvm_host.h>
#include <asm/fpsimd.h>
#include <asm/kvm_asm.h>
@@ -15,6 +14,19 @@
#include <asm/kvm_mmu.h>
#include <asm/sysreg.h>
+void kvm_vcpu_unshare_task_fp(struct kvm_vcpu *vcpu)
+{
+ struct task_struct *p = vcpu->arch.parent_task;
+ struct user_fpsimd_state *fpsimd;
+
+ if (!is_protected_kvm_enabled() || !p)
+ return;
+
+ fpsimd = &p->thread.uw.fpsimd_state;
+ kvm_unshare_hyp(fpsimd, fpsimd + 1);
+ put_task_struct(p);
+}
+
/*
* Called on entry to KVM_RUN unless this vcpu previously ran at least
* once and the most recent prior KVM_RUN for this vcpu was called from
@@ -28,36 +40,29 @@ int kvm_arch_vcpu_run_map_fp(struct kvm_vcpu *vcpu)
{
int ret;
- struct thread_info *ti = &current->thread_info;
struct user_fpsimd_state *fpsimd = &current->thread.uw.fpsimd_state;
- /*
- * Make sure the host task thread flags and fpsimd state are
- * visible to hyp:
- */
- ret = create_hyp_mappings(ti, ti + 1, PAGE_HYP);
- if (ret)
- goto error;
+ kvm_vcpu_unshare_task_fp(vcpu);
- ret = create_hyp_mappings(fpsimd, fpsimd + 1, PAGE_HYP);
+ /* Make sure the host task fpsimd state is visible to hyp: */
+ ret = kvm_share_hyp(fpsimd, fpsimd + 1);
if (ret)
- goto error;
-
- if (vcpu->arch.sve_state) {
- void *sve_end;
+ return ret;
- sve_end = vcpu->arch.sve_state + vcpu_sve_state_size(vcpu);
+ vcpu->arch.host_fpsimd_state = kern_hyp_va(fpsimd);
- ret = create_hyp_mappings(vcpu->arch.sve_state, sve_end,
- PAGE_HYP);
- if (ret)
- goto error;
+ /*
+ * We need to keep current's task_struct pinned until its data has been
+ * unshared with the hypervisor to make sure it is not re-used by the
+ * kernel and donated to someone else while already shared -- see
+ * kvm_vcpu_unshare_task_fp() for the matching put_task_struct().
+ */
+ if (is_protected_kvm_enabled()) {
+ get_task_struct(current);
+ vcpu->arch.parent_task = current;
}
- vcpu->arch.host_thread_info = kern_hyp_va(ti);
- vcpu->arch.host_fpsimd_state = kern_hyp_va(fpsimd);
-error:
- return ret;
+ return 0;
}
/*
@@ -66,26 +71,27 @@ error:
*
* Here, we just set the correct metadata to indicate that the FPSIMD
* state in the cpu regs (if any) belongs to current on the host.
- *
- * TIF_SVE is backed up here, since it may get clobbered with guest state.
- * This flag is restored by kvm_arch_vcpu_put_fp(vcpu).
*/
void kvm_arch_vcpu_load_fp(struct kvm_vcpu *vcpu)
{
BUG_ON(!current->mm);
+ BUG_ON(test_thread_flag(TIF_SVE));
- vcpu->arch.flags &= ~(KVM_ARM64_FP_ENABLED |
- KVM_ARM64_HOST_SVE_IN_USE |
- KVM_ARM64_HOST_SVE_ENABLED);
+ vcpu->arch.flags &= ~KVM_ARM64_FP_ENABLED;
vcpu->arch.flags |= KVM_ARM64_FP_HOST;
- if (test_thread_flag(TIF_SVE))
- vcpu->arch.flags |= KVM_ARM64_HOST_SVE_IN_USE;
-
if (read_sysreg(cpacr_el1) & CPACR_EL1_ZEN_EL0EN)
vcpu->arch.flags |= KVM_ARM64_HOST_SVE_ENABLED;
}
+void kvm_arch_vcpu_ctxflush_fp(struct kvm_vcpu *vcpu)
+{
+ if (test_thread_flag(TIF_FOREIGN_FPSTATE))
+ vcpu->arch.flags |= KVM_ARM64_FP_FOREIGN_FPSTATE;
+ else
+ vcpu->arch.flags &= ~KVM_ARM64_FP_FOREIGN_FPSTATE;
+}
+
/*
* If the guest FPSIMD state was loaded, update the host's context
* tracking data mark the CPU FPSIMD regs as dirty and belonging to vcpu
@@ -115,13 +121,11 @@ void kvm_arch_vcpu_ctxsync_fp(struct kvm_vcpu *vcpu)
void kvm_arch_vcpu_put_fp(struct kvm_vcpu *vcpu)
{
unsigned long flags;
- bool host_has_sve = system_supports_sve();
- bool guest_has_sve = vcpu_has_sve(vcpu);
local_irq_save(flags);
if (vcpu->arch.flags & KVM_ARM64_FP_ENABLED) {
- if (guest_has_sve) {
+ if (vcpu_has_sve(vcpu)) {
__vcpu_sys_reg(vcpu, ZCR_EL1) = read_sysreg_el1(SYS_ZCR);
/* Restore the VL that was saved when bound to the CPU */
@@ -131,7 +135,7 @@ void kvm_arch_vcpu_put_fp(struct kvm_vcpu *vcpu)
}
fpsimd_save_and_flush_cpu_state();
- } else if (has_vhe() && host_has_sve) {
+ } else if (has_vhe() && system_supports_sve()) {
/*
* The FPSIMD/SVE state in the CPU has not been touched, and we
* have SVE (and VHE): CPACR_EL1 (alias CPTR_EL2) has been
@@ -145,8 +149,7 @@ void kvm_arch_vcpu_put_fp(struct kvm_vcpu *vcpu)
sysreg_clear_set(CPACR_EL1, CPACR_EL1_ZEN_EL0EN, 0);
}
- update_thread_flag(TIF_SVE,
- vcpu->arch.flags & KVM_ARM64_HOST_SVE_IN_USE);
+ update_thread_flag(TIF_SVE, 0);
local_irq_restore(flags);
}
diff --git a/arch/arm64/kvm/handle_exit.c b/arch/arm64/kvm/handle_exit.c
index 275a27368a04..fd2dd26caf91 100644
--- a/arch/arm64/kvm/handle_exit.c
+++ b/arch/arm64/kvm/handle_exit.c
@@ -82,7 +82,7 @@ static int handle_no_fpsimd(struct kvm_vcpu *vcpu)
*
* WFE: Yield the CPU and come back to this vcpu when the scheduler
* decides to.
- * WFI: Simply call kvm_vcpu_block(), which will halt execution of
+ * WFI: Simply call kvm_vcpu_halt(), which will halt execution of
* world-switches and schedule other host processes until there is an
* incoming IRQ or FIQ to the VM.
*/
@@ -95,8 +95,7 @@ static int kvm_handle_wfx(struct kvm_vcpu *vcpu)
} else {
trace_kvm_wfx_arm64(*vcpu_pc(vcpu), false);
vcpu->stat.wfi_exit_stat++;
- kvm_vcpu_block(vcpu);
- kvm_clear_request(KVM_REQ_UNHALT, vcpu);
+ kvm_vcpu_wfi(vcpu);
}
kvm_incr_pc(vcpu);
@@ -140,9 +139,12 @@ static int kvm_handle_unknown_ec(struct kvm_vcpu *vcpu)
return 1;
}
+/*
+ * Guest access to SVE registers should be routed to this handler only
+ * when the system doesn't support SVE.
+ */
static int handle_sve(struct kvm_vcpu *vcpu)
{
- /* Until SVE is supported for guests: */
kvm_inject_undefined(vcpu);
return 1;
}
diff --git a/arch/arm64/kvm/hyp/Makefile b/arch/arm64/kvm/hyp/Makefile
index b726332eec49..687598e41b21 100644
--- a/arch/arm64/kvm/hyp/Makefile
+++ b/arch/arm64/kvm/hyp/Makefile
@@ -10,4 +10,4 @@ subdir-ccflags-y := -I$(incdir) \
-DDISABLE_BRANCH_PROFILING \
$(DISABLE_STACKLEAK_PLUGIN)
-obj-$(CONFIG_KVM) += vhe/ nvhe/ pgtable.o reserved_mem.o
+obj-$(CONFIG_KVM) += vhe/ nvhe/ pgtable.o
diff --git a/arch/arm64/kvm/hyp/exception.c b/arch/arm64/kvm/hyp/exception.c
index 0418399e0a20..c5d009715402 100644
--- a/arch/arm64/kvm/hyp/exception.c
+++ b/arch/arm64/kvm/hyp/exception.c
@@ -38,7 +38,10 @@ static inline void __vcpu_write_sys_reg(struct kvm_vcpu *vcpu, u64 val, int reg)
static void __vcpu_write_spsr(struct kvm_vcpu *vcpu, u64 val)
{
- write_sysreg_el1(val, SYS_SPSR);
+ if (has_vhe())
+ write_sysreg_el1(val, SYS_SPSR);
+ else
+ __vcpu_sys_reg(vcpu, SPSR_EL1) = val;
}
static void __vcpu_write_spsr_abt(struct kvm_vcpu *vcpu, u64 val)
diff --git a/arch/arm64/kvm/hyp/fpsimd.S b/arch/arm64/kvm/hyp/fpsimd.S
index e950875e31ce..61e6f3ba7b7d 100644
--- a/arch/arm64/kvm/hyp/fpsimd.S
+++ b/arch/arm64/kvm/hyp/fpsimd.S
@@ -25,9 +25,3 @@ SYM_FUNC_START(__sve_restore_state)
sve_load 0, x1, x2, 3
ret
SYM_FUNC_END(__sve_restore_state)
-
-SYM_FUNC_START(__sve_save_state)
- mov x2, #1
- sve_save 0, x1, x2, 3
- ret
-SYM_FUNC_END(__sve_save_state)
diff --git a/arch/arm64/kvm/hyp/hyp-constants.c b/arch/arm64/kvm/hyp/hyp-constants.c
new file mode 100644
index 000000000000..b3742a6691e8
--- /dev/null
+++ b/arch/arm64/kvm/hyp/hyp-constants.c
@@ -0,0 +1,10 @@
+// SPDX-License-Identifier: GPL-2.0-only
+
+#include <linux/kbuild.h>
+#include <nvhe/memory.h>
+
+int main(void)
+{
+ DEFINE(STRUCT_HYP_PAGE_SIZE, sizeof(struct hyp_page));
+ return 0;
+}
diff --git a/arch/arm64/kvm/hyp/include/hyp/switch.h b/arch/arm64/kvm/hyp/include/hyp/switch.h
index 7a0af1d39303..58e14f8ead23 100644
--- a/arch/arm64/kvm/hyp/include/hyp/switch.h
+++ b/arch/arm64/kvm/hyp/include/hyp/switch.h
@@ -29,7 +29,6 @@
#include <asm/fpsimd.h>
#include <asm/debug-monitors.h>
#include <asm/processor.h>
-#include <asm/thread_info.h>
struct kvm_exception_table_entry {
int insn, fixup;
@@ -49,7 +48,7 @@ static inline bool update_fp_enabled(struct kvm_vcpu *vcpu)
* trap the accesses.
*/
if (!system_supports_fpsimd() ||
- vcpu->arch.host_thread_info->flags & _TIF_FOREIGN_FPSTATE)
+ vcpu->arch.flags & KVM_ARM64_FP_FOREIGN_FPSTATE)
vcpu->arch.flags &= ~(KVM_ARM64_FP_ENABLED |
KVM_ARM64_FP_HOST);
@@ -143,16 +142,6 @@ static inline bool __populate_fault_info(struct kvm_vcpu *vcpu)
return __get_fault_info(vcpu->arch.fault.esr_el2, &vcpu->arch.fault);
}
-static inline void __hyp_sve_save_host(struct kvm_vcpu *vcpu)
-{
- struct thread_struct *thread;
-
- thread = container_of(vcpu->arch.host_fpsimd_state, struct thread_struct,
- uw.fpsimd_state);
-
- __sve_save_state(sve_pffr(thread), &vcpu->arch.host_fpsimd_state->fpsr);
-}
-
static inline void __hyp_sve_restore_guest(struct kvm_vcpu *vcpu)
{
sve_cond_update_zcr_vq(vcpu_sve_max_vq(vcpu) - 1, SYS_ZCR_EL2);
@@ -169,21 +158,14 @@ static inline void __hyp_sve_restore_guest(struct kvm_vcpu *vcpu)
*/
static bool kvm_hyp_handle_fpsimd(struct kvm_vcpu *vcpu, u64 *exit_code)
{
- bool sve_guest, sve_host;
+ bool sve_guest;
u8 esr_ec;
u64 reg;
if (!system_supports_fpsimd())
return false;
- if (system_supports_sve()) {
- sve_guest = vcpu_has_sve(vcpu);
- sve_host = vcpu->arch.flags & KVM_ARM64_HOST_SVE_IN_USE;
- } else {
- sve_guest = false;
- sve_host = false;
- }
-
+ sve_guest = vcpu_has_sve(vcpu);
esr_ec = kvm_vcpu_trap_get_class(vcpu);
/* Don't handle SVE traps for non-SVE vcpus here: */
@@ -207,11 +189,7 @@ static bool kvm_hyp_handle_fpsimd(struct kvm_vcpu *vcpu, u64 *exit_code)
isb();
if (vcpu->arch.flags & KVM_ARM64_FP_HOST) {
- if (sve_host)
- __hyp_sve_save_host(vcpu);
- else
- __fpsimd_save_state(vcpu->arch.host_fpsimd_state);
-
+ __fpsimd_save_state(vcpu->arch.host_fpsimd_state);
vcpu->arch.flags &= ~KVM_ARM64_FP_HOST;
}
@@ -403,6 +381,8 @@ typedef bool (*exit_handler_fn)(struct kvm_vcpu *, u64 *);
static const exit_handler_fn *kvm_get_exit_handler_array(struct kvm_vcpu *vcpu);
+static void early_exit_filter(struct kvm_vcpu *vcpu, u64 *exit_code);
+
/*
* Allow the hypervisor to handle the exit with an exit handler if it has one.
*
@@ -429,6 +409,18 @@ static inline bool kvm_hyp_handle_exit(struct kvm_vcpu *vcpu, u64 *exit_code)
*/
static inline bool fixup_guest_exit(struct kvm_vcpu *vcpu, u64 *exit_code)
{
+ /*
+ * Save PSTATE early so that we can evaluate the vcpu mode
+ * early on.
+ */
+ vcpu->arch.ctxt.regs.pstate = read_sysreg_el2(SYS_SPSR);
+
+ /*
+ * Check whether we want to repaint the state one way or
+ * another.
+ */
+ early_exit_filter(vcpu, exit_code);
+
if (ARM_EXCEPTION_CODE(*exit_code) != ARM_EXCEPTION_IRQ)
vcpu->arch.fault.esr_el2 = read_sysreg_el2(SYS_ESR);
diff --git a/arch/arm64/kvm/hyp/include/hyp/sysreg-sr.h b/arch/arm64/kvm/hyp/include/hyp/sysreg-sr.h
index de7e14c862e6..7ecca8b07851 100644
--- a/arch/arm64/kvm/hyp/include/hyp/sysreg-sr.h
+++ b/arch/arm64/kvm/hyp/include/hyp/sysreg-sr.h
@@ -70,7 +70,12 @@ static inline void __sysreg_save_el1_state(struct kvm_cpu_context *ctxt)
static inline void __sysreg_save_el2_return_state(struct kvm_cpu_context *ctxt)
{
ctxt->regs.pc = read_sysreg_el2(SYS_ELR);
- ctxt->regs.pstate = read_sysreg_el2(SYS_SPSR);
+ /*
+ * Guest PSTATE gets saved at guest fixup time in all
+ * cases. We still need to handle the nVHE host side here.
+ */
+ if (!has_vhe() && ctxt->__hyp_running_vcpu)
+ ctxt->regs.pstate = read_sysreg_el2(SYS_SPSR);
if (cpus_have_final_cap(ARM64_HAS_RAS_EXTN))
ctxt_sys_reg(ctxt, DISR_EL1) = read_sysreg_s(SYS_VDISR_EL2);
diff --git a/arch/arm64/kvm/hyp/include/nvhe/mem_protect.h b/arch/arm64/kvm/hyp/include/nvhe/mem_protect.h
index b58c910babaf..80e99836eac7 100644
--- a/arch/arm64/kvm/hyp/include/nvhe/mem_protect.h
+++ b/arch/arm64/kvm/hyp/include/nvhe/mem_protect.h
@@ -24,6 +24,11 @@ enum pkvm_page_state {
PKVM_PAGE_OWNED = 0ULL,
PKVM_PAGE_SHARED_OWNED = KVM_PGTABLE_PROT_SW0,
PKVM_PAGE_SHARED_BORROWED = KVM_PGTABLE_PROT_SW1,
+ __PKVM_PAGE_RESERVED = KVM_PGTABLE_PROT_SW0 |
+ KVM_PGTABLE_PROT_SW1,
+
+ /* Meta-states which aren't encoded directly in the PTE's SW bits */
+ PKVM_NOPAGE,
};
#define PKVM_PAGE_STATE_PROT_MASK (KVM_PGTABLE_PROT_SW0 | KVM_PGTABLE_PROT_SW1)
@@ -50,6 +55,7 @@ extern const u8 pkvm_hyp_id;
int __pkvm_prot_finalize(void);
int __pkvm_host_share_hyp(u64 pfn);
+int __pkvm_host_unshare_hyp(u64 pfn);
bool addr_is_memory(phys_addr_t phys);
int host_stage2_idmap_locked(phys_addr_t addr, u64 size, enum kvm_pgtable_prot prot);
diff --git a/arch/arm64/kvm/hyp/include/nvhe/mm.h b/arch/arm64/kvm/hyp/include/nvhe/mm.h
index c9a8f535212e..2d08510c6cc1 100644
--- a/arch/arm64/kvm/hyp/include/nvhe/mm.h
+++ b/arch/arm64/kvm/hyp/include/nvhe/mm.h
@@ -10,13 +10,8 @@
#include <nvhe/memory.h>
#include <nvhe/spinlock.h>
-#define HYP_MEMBLOCK_REGIONS 128
-extern struct memblock_region kvm_nvhe_sym(hyp_memory)[];
-extern unsigned int kvm_nvhe_sym(hyp_memblock_nr);
extern struct kvm_pgtable pkvm_pgtable;
extern hyp_spinlock_t pkvm_pgd_lock;
-extern struct hyp_pool hpool;
-extern u64 __io_map_base;
int hyp_create_idmap(u32 hyp_va_bits);
int hyp_map_vectors(void);
@@ -39,58 +34,4 @@ static inline void hyp_vmemmap_range(phys_addr_t phys, unsigned long size,
*end = ALIGN(*end, PAGE_SIZE);
}
-static inline unsigned long __hyp_pgtable_max_pages(unsigned long nr_pages)
-{
- unsigned long total = 0, i;
-
- /* Provision the worst case scenario */
- for (i = 0; i < KVM_PGTABLE_MAX_LEVELS; i++) {
- nr_pages = DIV_ROUND_UP(nr_pages, PTRS_PER_PTE);
- total += nr_pages;
- }
-
- return total;
-}
-
-static inline unsigned long __hyp_pgtable_total_pages(void)
-{
- unsigned long res = 0, i;
-
- /* Cover all of memory with page-granularity */
- for (i = 0; i < kvm_nvhe_sym(hyp_memblock_nr); i++) {
- struct memblock_region *reg = &kvm_nvhe_sym(hyp_memory)[i];
- res += __hyp_pgtable_max_pages(reg->size >> PAGE_SHIFT);
- }
-
- return res;
-}
-
-static inline unsigned long hyp_s1_pgtable_pages(void)
-{
- unsigned long res;
-
- res = __hyp_pgtable_total_pages();
-
- /* Allow 1 GiB for private mappings */
- res += __hyp_pgtable_max_pages(SZ_1G >> PAGE_SHIFT);
-
- return res;
-}
-
-static inline unsigned long host_s2_pgtable_pages(void)
-{
- unsigned long res;
-
- /*
- * Include an extra 16 pages to safely upper-bound the worst case of
- * concatenated pgds.
- */
- res = __hyp_pgtable_total_pages() + 16;
-
- /* Allow 1 GiB for MMIO mappings */
- res += __hyp_pgtable_max_pages(SZ_1G >> PAGE_SHIFT);
-
- return res;
-}
-
#endif /* __KVM_HYP_MM_H */
diff --git a/arch/arm64/kvm/hyp/nvhe/Makefile b/arch/arm64/kvm/hyp/nvhe/Makefile
index c3c11974fa3b..24b2c2425b38 100644
--- a/arch/arm64/kvm/hyp/nvhe/Makefile
+++ b/arch/arm64/kvm/hyp/nvhe/Makefile
@@ -89,6 +89,7 @@ KBUILD_CFLAGS := $(filter-out $(CC_FLAGS_FTRACE) $(CC_FLAGS_SCS) $(CC_FLAGS_CFI)
# cause crashes. Just disable it.
GCOV_PROFILE := n
KASAN_SANITIZE := n
+KCSAN_SANITIZE := n
UBSAN_SANITIZE := n
KCOV_INSTRUMENT := n
diff --git a/arch/arm64/kvm/hyp/nvhe/early_alloc.c b/arch/arm64/kvm/hyp/nvhe/early_alloc.c
index 1306c430ab87..00de04153cc6 100644
--- a/arch/arm64/kvm/hyp/nvhe/early_alloc.c
+++ b/arch/arm64/kvm/hyp/nvhe/early_alloc.c
@@ -43,6 +43,9 @@ void *hyp_early_alloc_page(void *arg)
return hyp_early_alloc_contig(1);
}
+static void hyp_early_alloc_get_page(void *addr) { }
+static void hyp_early_alloc_put_page(void *addr) { }
+
void hyp_early_alloc_init(void *virt, unsigned long size)
{
base = cur = (unsigned long)virt;
@@ -51,4 +54,6 @@ void hyp_early_alloc_init(void *virt, unsigned long size)
hyp_early_alloc_mm_ops.zalloc_page = hyp_early_alloc_page;
hyp_early_alloc_mm_ops.phys_to_virt = hyp_phys_to_virt;
hyp_early_alloc_mm_ops.virt_to_phys = hyp_virt_to_phys;
+ hyp_early_alloc_mm_ops.get_page = hyp_early_alloc_get_page;
+ hyp_early_alloc_mm_ops.put_page = hyp_early_alloc_put_page;
}
diff --git a/arch/arm64/kvm/hyp/nvhe/hyp-main.c b/arch/arm64/kvm/hyp/nvhe/hyp-main.c
index b096bf009144..5e2197db0d32 100644
--- a/arch/arm64/kvm/hyp/nvhe/hyp-main.c
+++ b/arch/arm64/kvm/hyp/nvhe/hyp-main.c
@@ -147,6 +147,13 @@ static void handle___pkvm_host_share_hyp(struct kvm_cpu_context *host_ctxt)
cpu_reg(host_ctxt, 1) = __pkvm_host_share_hyp(pfn);
}
+static void handle___pkvm_host_unshare_hyp(struct kvm_cpu_context *host_ctxt)
+{
+ DECLARE_REG(u64, pfn, host_ctxt, 1);
+
+ cpu_reg(host_ctxt, 1) = __pkvm_host_unshare_hyp(pfn);
+}
+
static void handle___pkvm_create_private_mapping(struct kvm_cpu_context *host_ctxt)
{
DECLARE_REG(phys_addr_t, phys, host_ctxt, 1);
@@ -184,6 +191,7 @@ static const hcall_t host_hcall[] = {
HANDLE_FUNC(__pkvm_prot_finalize),
HANDLE_FUNC(__pkvm_host_share_hyp),
+ HANDLE_FUNC(__pkvm_host_unshare_hyp),
HANDLE_FUNC(__kvm_adjust_pc),
HANDLE_FUNC(__kvm_vcpu_run),
HANDLE_FUNC(__kvm_flush_vm_context),
diff --git a/arch/arm64/kvm/hyp/nvhe/mem_protect.c b/arch/arm64/kvm/hyp/nvhe/mem_protect.c
index c1a90dd022b8..674f10564373 100644
--- a/arch/arm64/kvm/hyp/nvhe/mem_protect.c
+++ b/arch/arm64/kvm/hyp/nvhe/mem_protect.c
@@ -9,6 +9,7 @@
#include <asm/kvm_hyp.h>
#include <asm/kvm_mmu.h>
#include <asm/kvm_pgtable.h>
+#include <asm/kvm_pkvm.h>
#include <asm/stage2_pgtable.h>
#include <hyp/fault.h>
@@ -27,6 +28,26 @@ static struct hyp_pool host_s2_pool;
const u8 pkvm_hyp_id = 1;
+static void host_lock_component(void)
+{
+ hyp_spin_lock(&host_kvm.lock);
+}
+
+static void host_unlock_component(void)
+{
+ hyp_spin_unlock(&host_kvm.lock);
+}
+
+static void hyp_lock_component(void)
+{
+ hyp_spin_lock(&pkvm_pgd_lock);
+}
+
+static void hyp_unlock_component(void)
+{
+ hyp_spin_unlock(&pkvm_pgd_lock);
+}
+
static void *host_s2_zalloc_pages_exact(size_t size)
{
void *addr = hyp_alloc_pages(&host_s2_pool, get_order(size));
@@ -103,19 +124,19 @@ int kvm_host_prepare_stage2(void *pgt_pool_base)
prepare_host_vtcr();
hyp_spin_lock_init(&host_kvm.lock);
+ mmu->arch = &host_kvm.arch;
ret = prepare_s2_pool(pgt_pool_base);
if (ret)
return ret;
- ret = __kvm_pgtable_stage2_init(&host_kvm.pgt, &host_kvm.arch,
+ ret = __kvm_pgtable_stage2_init(&host_kvm.pgt, mmu,
&host_kvm.mm_ops, KVM_HOST_S2_FLAGS,
host_stage2_force_pte_cb);
if (ret)
return ret;
mmu->pgd_phys = __hyp_pa(host_kvm.pgt.pgd);
- mmu->arch = &host_kvm.arch;
mmu->pgt = &host_kvm.pgt;
WRITE_ONCE(mmu->vmid.vmid_gen, 0);
WRITE_ONCE(mmu->vmid.vmid, 0);
@@ -338,116 +359,446 @@ static int host_stage2_idmap(u64 addr)
prot = is_memory ? PKVM_HOST_MEM_PROT : PKVM_HOST_MMIO_PROT;
- hyp_spin_lock(&host_kvm.lock);
+ host_lock_component();
ret = host_stage2_adjust_range(addr, &range);
if (ret)
goto unlock;
ret = host_stage2_idmap_locked(range.start, range.end - range.start, prot);
unlock:
- hyp_spin_unlock(&host_kvm.lock);
+ host_unlock_component();
return ret;
}
-static inline bool check_prot(enum kvm_pgtable_prot prot,
- enum kvm_pgtable_prot required,
- enum kvm_pgtable_prot denied)
+void handle_host_mem_abort(struct kvm_cpu_context *host_ctxt)
{
- return (prot & (required | denied)) == required;
+ struct kvm_vcpu_fault_info fault;
+ u64 esr, addr;
+ int ret = 0;
+
+ esr = read_sysreg_el2(SYS_ESR);
+ BUG_ON(!__get_fault_info(esr, &fault));
+
+ addr = (fault.hpfar_el2 & HPFAR_MASK) << 8;
+ ret = host_stage2_idmap(addr);
+ BUG_ON(ret && ret != -EAGAIN);
}
-int __pkvm_host_share_hyp(u64 pfn)
+/* This corresponds to locking order */
+enum pkvm_component_id {
+ PKVM_ID_HOST,
+ PKVM_ID_HYP,
+};
+
+struct pkvm_mem_transition {
+ u64 nr_pages;
+
+ struct {
+ enum pkvm_component_id id;
+ /* Address in the initiator's address space */
+ u64 addr;
+
+ union {
+ struct {
+ /* Address in the completer's address space */
+ u64 completer_addr;
+ } host;
+ };
+ } initiator;
+
+ struct {
+ enum pkvm_component_id id;
+ } completer;
+};
+
+struct pkvm_mem_share {
+ const struct pkvm_mem_transition tx;
+ const enum kvm_pgtable_prot completer_prot;
+};
+
+struct check_walk_data {
+ enum pkvm_page_state desired;
+ enum pkvm_page_state (*get_page_state)(kvm_pte_t pte);
+};
+
+static int __check_page_state_visitor(u64 addr, u64 end, u32 level,
+ kvm_pte_t *ptep,
+ enum kvm_pgtable_walk_flags flag,
+ void * const arg)
{
- phys_addr_t addr = hyp_pfn_to_phys(pfn);
- enum kvm_pgtable_prot prot, cur;
- void *virt = __hyp_va(addr);
- enum pkvm_page_state state;
- kvm_pte_t pte;
- int ret;
+ struct check_walk_data *d = arg;
+ kvm_pte_t pte = *ptep;
- if (!addr_is_memory(addr))
+ if (kvm_pte_valid(pte) && !addr_is_memory(kvm_pte_to_phys(pte)))
return -EINVAL;
- hyp_spin_lock(&host_kvm.lock);
- hyp_spin_lock(&pkvm_pgd_lock);
+ return d->get_page_state(pte) == d->desired ? 0 : -EPERM;
+}
+
+static int check_page_state_range(struct kvm_pgtable *pgt, u64 addr, u64 size,
+ struct check_walk_data *data)
+{
+ struct kvm_pgtable_walker walker = {
+ .cb = __check_page_state_visitor,
+ .arg = data,
+ .flags = KVM_PGTABLE_WALK_LEAF,
+ };
+
+ return kvm_pgtable_walk(pgt, addr, size, &walker);
+}
+
+static enum pkvm_page_state host_get_page_state(kvm_pte_t pte)
+{
+ if (!kvm_pte_valid(pte) && pte)
+ return PKVM_NOPAGE;
+
+ return pkvm_getstate(kvm_pgtable_stage2_pte_prot(pte));
+}
+
+static int __host_check_page_state_range(u64 addr, u64 size,
+ enum pkvm_page_state state)
+{
+ struct check_walk_data d = {
+ .desired = state,
+ .get_page_state = host_get_page_state,
+ };
+
+ hyp_assert_lock_held(&host_kvm.lock);
+ return check_page_state_range(&host_kvm.pgt, addr, size, &d);
+}
+
+static int __host_set_page_state_range(u64 addr, u64 size,
+ enum pkvm_page_state state)
+{
+ enum kvm_pgtable_prot prot = pkvm_mkstate(PKVM_HOST_MEM_PROT, state);
+
+ return host_stage2_idmap_locked(addr, size, prot);
+}
+
+static int host_request_owned_transition(u64 *completer_addr,
+ const struct pkvm_mem_transition *tx)
+{
+ u64 size = tx->nr_pages * PAGE_SIZE;
+ u64 addr = tx->initiator.addr;
+
+ *completer_addr = tx->initiator.host.completer_addr;
+ return __host_check_page_state_range(addr, size, PKVM_PAGE_OWNED);
+}
+
+static int host_request_unshare(u64 *completer_addr,
+ const struct pkvm_mem_transition *tx)
+{
+ u64 size = tx->nr_pages * PAGE_SIZE;
+ u64 addr = tx->initiator.addr;
+
+ *completer_addr = tx->initiator.host.completer_addr;
+ return __host_check_page_state_range(addr, size, PKVM_PAGE_SHARED_OWNED);
+}
+
+static int host_initiate_share(u64 *completer_addr,
+ const struct pkvm_mem_transition *tx)
+{
+ u64 size = tx->nr_pages * PAGE_SIZE;
+ u64 addr = tx->initiator.addr;
+
+ *completer_addr = tx->initiator.host.completer_addr;
+ return __host_set_page_state_range(addr, size, PKVM_PAGE_SHARED_OWNED);
+}
+
+static int host_initiate_unshare(u64 *completer_addr,
+ const struct pkvm_mem_transition *tx)
+{
+ u64 size = tx->nr_pages * PAGE_SIZE;
+ u64 addr = tx->initiator.addr;
+
+ *completer_addr = tx->initiator.host.completer_addr;
+ return __host_set_page_state_range(addr, size, PKVM_PAGE_OWNED);
+}
+
+static enum pkvm_page_state hyp_get_page_state(kvm_pte_t pte)
+{
+ if (!kvm_pte_valid(pte))
+ return PKVM_NOPAGE;
+
+ return pkvm_getstate(kvm_pgtable_stage2_pte_prot(pte));
+}
+
+static int __hyp_check_page_state_range(u64 addr, u64 size,
+ enum pkvm_page_state state)
+{
+ struct check_walk_data d = {
+ .desired = state,
+ .get_page_state = hyp_get_page_state,
+ };
+
+ hyp_assert_lock_held(&pkvm_pgd_lock);
+ return check_page_state_range(&pkvm_pgtable, addr, size, &d);
+}
+
+static bool __hyp_ack_skip_pgtable_check(const struct pkvm_mem_transition *tx)
+{
+ return !(IS_ENABLED(CONFIG_NVHE_EL2_DEBUG) ||
+ tx->initiator.id != PKVM_ID_HOST);
+}
+
+static int hyp_ack_share(u64 addr, const struct pkvm_mem_transition *tx,
+ enum kvm_pgtable_prot perms)
+{
+ u64 size = tx->nr_pages * PAGE_SIZE;
+
+ if (perms != PAGE_HYP)
+ return -EPERM;
+
+ if (__hyp_ack_skip_pgtable_check(tx))
+ return 0;
+
+ return __hyp_check_page_state_range(addr, size, PKVM_NOPAGE);
+}
+
+static int hyp_ack_unshare(u64 addr, const struct pkvm_mem_transition *tx)
+{
+ u64 size = tx->nr_pages * PAGE_SIZE;
+
+ if (__hyp_ack_skip_pgtable_check(tx))
+ return 0;
+
+ return __hyp_check_page_state_range(addr, size,
+ PKVM_PAGE_SHARED_BORROWED);
+}
+
+static int hyp_complete_share(u64 addr, const struct pkvm_mem_transition *tx,
+ enum kvm_pgtable_prot perms)
+{
+ void *start = (void *)addr, *end = start + (tx->nr_pages * PAGE_SIZE);
+ enum kvm_pgtable_prot prot;
+
+ prot = pkvm_mkstate(perms, PKVM_PAGE_SHARED_BORROWED);
+ return pkvm_create_mappings_locked(start, end, prot);
+}
+
+static int hyp_complete_unshare(u64 addr, const struct pkvm_mem_transition *tx)
+{
+ u64 size = tx->nr_pages * PAGE_SIZE;
+ int ret = kvm_pgtable_hyp_unmap(&pkvm_pgtable, addr, size);
+
+ return (ret != size) ? -EFAULT : 0;
+}
+
+static int check_share(struct pkvm_mem_share *share)
+{
+ const struct pkvm_mem_transition *tx = &share->tx;
+ u64 completer_addr;
+ int ret;
+
+ switch (tx->initiator.id) {
+ case PKVM_ID_HOST:
+ ret = host_request_owned_transition(&completer_addr, tx);
+ break;
+ default:
+ ret = -EINVAL;
+ }
- ret = kvm_pgtable_get_leaf(&host_kvm.pgt, addr, &pte, NULL);
if (ret)
- goto unlock;
- if (!pte)
- goto map_shared;
+ return ret;
- /*
- * Check attributes in the host stage-2 PTE. We need the page to be:
- * - mapped RWX as we're sharing memory;
- * - not borrowed, as that implies absence of ownership.
- * Otherwise, we can't let it got through
- */
- cur = kvm_pgtable_stage2_pte_prot(pte);
- prot = pkvm_mkstate(0, PKVM_PAGE_SHARED_BORROWED);
- if (!check_prot(cur, PKVM_HOST_MEM_PROT, prot)) {
- ret = -EPERM;
- goto unlock;
+ switch (tx->completer.id) {
+ case PKVM_ID_HYP:
+ ret = hyp_ack_share(completer_addr, tx, share->completer_prot);
+ break;
+ default:
+ ret = -EINVAL;
}
- state = pkvm_getstate(cur);
- if (state == PKVM_PAGE_OWNED)
- goto map_shared;
+ return ret;
+}
- /*
- * Tolerate double-sharing the same page, but this requires
- * cross-checking the hypervisor stage-1.
- */
- if (state != PKVM_PAGE_SHARED_OWNED) {
- ret = -EPERM;
- goto unlock;
+static int __do_share(struct pkvm_mem_share *share)
+{
+ const struct pkvm_mem_transition *tx = &share->tx;
+ u64 completer_addr;
+ int ret;
+
+ switch (tx->initiator.id) {
+ case PKVM_ID_HOST:
+ ret = host_initiate_share(&completer_addr, tx);
+ break;
+ default:
+ ret = -EINVAL;
}
- ret = kvm_pgtable_get_leaf(&pkvm_pgtable, (u64)virt, &pte, NULL);
if (ret)
- goto unlock;
+ return ret;
- /*
- * If the page has been shared with the hypervisor, it must be
- * already mapped as SHARED_BORROWED in its stage-1.
- */
- cur = kvm_pgtable_hyp_pte_prot(pte);
- prot = pkvm_mkstate(PAGE_HYP, PKVM_PAGE_SHARED_BORROWED);
- if (!check_prot(cur, prot, ~prot))
- ret = -EPERM;
- goto unlock;
+ switch (tx->completer.id) {
+ case PKVM_ID_HYP:
+ ret = hyp_complete_share(completer_addr, tx, share->completer_prot);
+ break;
+ default:
+ ret = -EINVAL;
+ }
-map_shared:
- /*
- * If the page is not yet shared, adjust mappings in both page-tables
- * while both locks are held.
- */
- prot = pkvm_mkstate(PAGE_HYP, PKVM_PAGE_SHARED_BORROWED);
- ret = pkvm_create_mappings_locked(virt, virt + PAGE_SIZE, prot);
- BUG_ON(ret);
+ return ret;
+}
+
+/*
+ * do_share():
+ *
+ * The page owner grants access to another component with a given set
+ * of permissions.
+ *
+ * Initiator: OWNED => SHARED_OWNED
+ * Completer: NOPAGE => SHARED_BORROWED
+ */
+static int do_share(struct pkvm_mem_share *share)
+{
+ int ret;
- prot = pkvm_mkstate(PKVM_HOST_MEM_PROT, PKVM_PAGE_SHARED_OWNED);
- ret = host_stage2_idmap_locked(addr, PAGE_SIZE, prot);
- BUG_ON(ret);
+ ret = check_share(share);
+ if (ret)
+ return ret;
-unlock:
- hyp_spin_unlock(&pkvm_pgd_lock);
- hyp_spin_unlock(&host_kvm.lock);
+ return WARN_ON(__do_share(share));
+}
+
+static int check_unshare(struct pkvm_mem_share *share)
+{
+ const struct pkvm_mem_transition *tx = &share->tx;
+ u64 completer_addr;
+ int ret;
+
+ switch (tx->initiator.id) {
+ case PKVM_ID_HOST:
+ ret = host_request_unshare(&completer_addr, tx);
+ break;
+ default:
+ ret = -EINVAL;
+ }
+
+ if (ret)
+ return ret;
+
+ switch (tx->completer.id) {
+ case PKVM_ID_HYP:
+ ret = hyp_ack_unshare(completer_addr, tx);
+ break;
+ default:
+ ret = -EINVAL;
+ }
return ret;
}
-void handle_host_mem_abort(struct kvm_cpu_context *host_ctxt)
+static int __do_unshare(struct pkvm_mem_share *share)
{
- struct kvm_vcpu_fault_info fault;
- u64 esr, addr;
- int ret = 0;
+ const struct pkvm_mem_transition *tx = &share->tx;
+ u64 completer_addr;
+ int ret;
- esr = read_sysreg_el2(SYS_ESR);
- BUG_ON(!__get_fault_info(esr, &fault));
+ switch (tx->initiator.id) {
+ case PKVM_ID_HOST:
+ ret = host_initiate_unshare(&completer_addr, tx);
+ break;
+ default:
+ ret = -EINVAL;
+ }
- addr = (fault.hpfar_el2 & HPFAR_MASK) << 8;
- ret = host_stage2_idmap(addr);
- BUG_ON(ret && ret != -EAGAIN);
+ if (ret)
+ return ret;
+
+ switch (tx->completer.id) {
+ case PKVM_ID_HYP:
+ ret = hyp_complete_unshare(completer_addr, tx);
+ break;
+ default:
+ ret = -EINVAL;
+ }
+
+ return ret;
+}
+
+/*
+ * do_unshare():
+ *
+ * The page owner revokes access from another component for a range of
+ * pages which were previously shared using do_share().
+ *
+ * Initiator: SHARED_OWNED => OWNED
+ * Completer: SHARED_BORROWED => NOPAGE
+ */
+static int do_unshare(struct pkvm_mem_share *share)
+{
+ int ret;
+
+ ret = check_unshare(share);
+ if (ret)
+ return ret;
+
+ return WARN_ON(__do_unshare(share));
+}
+
+int __pkvm_host_share_hyp(u64 pfn)
+{
+ int ret;
+ u64 host_addr = hyp_pfn_to_phys(pfn);
+ u64 hyp_addr = (u64)__hyp_va(host_addr);
+ struct pkvm_mem_share share = {
+ .tx = {
+ .nr_pages = 1,
+ .initiator = {
+ .id = PKVM_ID_HOST,
+ .addr = host_addr,
+ .host = {
+ .completer_addr = hyp_addr,
+ },
+ },
+ .completer = {
+ .id = PKVM_ID_HYP,
+ },
+ },
+ .completer_prot = PAGE_HYP,
+ };
+
+ host_lock_component();
+ hyp_lock_component();
+
+ ret = do_share(&share);
+
+ hyp_unlock_component();
+ host_unlock_component();
+
+ return ret;
+}
+
+int __pkvm_host_unshare_hyp(u64 pfn)
+{
+ int ret;
+ u64 host_addr = hyp_pfn_to_phys(pfn);
+ u64 hyp_addr = (u64)__hyp_va(host_addr);
+ struct pkvm_mem_share share = {
+ .tx = {
+ .nr_pages = 1,
+ .initiator = {
+ .id = PKVM_ID_HOST,
+ .addr = host_addr,
+ .host = {
+ .completer_addr = hyp_addr,
+ },
+ },
+ .completer = {
+ .id = PKVM_ID_HYP,
+ },
+ },
+ .completer_prot = PAGE_HYP,
+ };
+
+ host_lock_component();
+ hyp_lock_component();
+
+ ret = do_unshare(&share);
+
+ hyp_unlock_component();
+ host_unlock_component();
+
+ return ret;
}
diff --git a/arch/arm64/kvm/hyp/nvhe/mm.c b/arch/arm64/kvm/hyp/nvhe/mm.c
index 2fabeceb889a..526a7d6fa86f 100644
--- a/arch/arm64/kvm/hyp/nvhe/mm.c
+++ b/arch/arm64/kvm/hyp/nvhe/mm.c
@@ -8,6 +8,7 @@
#include <asm/kvm_hyp.h>
#include <asm/kvm_mmu.h>
#include <asm/kvm_pgtable.h>
+#include <asm/kvm_pkvm.h>
#include <asm/spectre.h>
#include <nvhe/early_alloc.h>
@@ -18,11 +19,12 @@
struct kvm_pgtable pkvm_pgtable;
hyp_spinlock_t pkvm_pgd_lock;
-u64 __io_map_base;
struct memblock_region hyp_memory[HYP_MEMBLOCK_REGIONS];
unsigned int hyp_memblock_nr;
+static u64 __io_map_base;
+
static int __pkvm_create_mappings(unsigned long start, unsigned long size,
unsigned long phys, enum kvm_pgtable_prot prot)
{
diff --git a/arch/arm64/kvm/hyp/nvhe/page_alloc.c b/arch/arm64/kvm/hyp/nvhe/page_alloc.c
index 0bd7701ad1df..543cad6c376a 100644
--- a/arch/arm64/kvm/hyp/nvhe/page_alloc.c
+++ b/arch/arm64/kvm/hyp/nvhe/page_alloc.c
@@ -241,7 +241,7 @@ int hyp_pool_init(struct hyp_pool *pool, u64 pfn, unsigned int nr_pages,
int i;
hyp_spin_lock_init(&pool->lock);
- pool->max_order = min(MAX_ORDER, get_order(nr_pages << PAGE_SHIFT));
+ pool->max_order = min(MAX_ORDER, get_order((nr_pages + 1) << PAGE_SHIFT));
for (i = 0; i < pool->max_order; i++)
INIT_LIST_HEAD(&pool->free_area[i]);
pool->range_start = phys;
diff --git a/arch/arm64/kvm/hyp/nvhe/setup.c b/arch/arm64/kvm/hyp/nvhe/setup.c
index 578f71798c2e..27af337f9fea 100644
--- a/arch/arm64/kvm/hyp/nvhe/setup.c
+++ b/arch/arm64/kvm/hyp/nvhe/setup.c
@@ -8,6 +8,7 @@
#include <asm/kvm_hyp.h>
#include <asm/kvm_mmu.h>
#include <asm/kvm_pgtable.h>
+#include <asm/kvm_pkvm.h>
#include <nvhe/early_alloc.h>
#include <nvhe/fixed_config.h>
@@ -17,7 +18,6 @@
#include <nvhe/mm.h>
#include <nvhe/trap_handler.h>
-struct hyp_pool hpool;
unsigned long hyp_nr_cpus;
#define hyp_percpu_size ((unsigned long)__per_cpu_end - \
@@ -27,6 +27,7 @@ static void *vmemmap_base;
static void *hyp_pgt_base;
static void *host_s2_pgt_base;
static struct kvm_pgtable_mm_ops pkvm_pgtable_mm_ops;
+static struct hyp_pool hpool;
static int divide_memory_pool(void *virt, unsigned long size)
{
@@ -165,6 +166,7 @@ static int finalize_host_mappings_walker(u64 addr, u64 end, u32 level,
enum kvm_pgtable_walk_flags flag,
void * const arg)
{
+ struct kvm_pgtable_mm_ops *mm_ops = arg;
enum kvm_pgtable_prot prot;
enum pkvm_page_state state;
kvm_pte_t pte = *ptep;
@@ -173,6 +175,15 @@ static int finalize_host_mappings_walker(u64 addr, u64 end, u32 level,
if (!kvm_pte_valid(pte))
return 0;
+ /*
+ * Fix-up the refcount for the page-table pages as the early allocator
+ * was unable to access the hyp_vmemmap and so the buddy allocator has
+ * initialised the refcount to '1'.
+ */
+ mm_ops->get_page(ptep);
+ if (flag != KVM_PGTABLE_WALK_LEAF)
+ return 0;
+
if (level != (KVM_PGTABLE_MAX_LEVELS - 1))
return -EINVAL;
@@ -205,7 +216,8 @@ static int finalize_host_mappings(void)
{
struct kvm_pgtable_walker walker = {
.cb = finalize_host_mappings_walker,
- .flags = KVM_PGTABLE_WALK_LEAF,
+ .flags = KVM_PGTABLE_WALK_LEAF | KVM_PGTABLE_WALK_TABLE_POST,
+ .arg = pkvm_pgtable.mm_ops,
};
int i, ret;
@@ -240,19 +252,20 @@ void __noreturn __pkvm_init_finalise(void)
if (ret)
goto out;
- ret = finalize_host_mappings();
- if (ret)
- goto out;
-
pkvm_pgtable_mm_ops = (struct kvm_pgtable_mm_ops) {
.zalloc_page = hyp_zalloc_hyp_page,
.phys_to_virt = hyp_phys_to_virt,
.virt_to_phys = hyp_virt_to_phys,
.get_page = hpool_get_page,
.put_page = hpool_put_page,
+ .page_count = hyp_page_count,
};
pkvm_pgtable.mm_ops = &pkvm_pgtable_mm_ops;
+ ret = finalize_host_mappings();
+ if (ret)
+ goto out;
+
out:
/*
* We tail-called to here from handle___pkvm_init() and will not return,
diff --git a/arch/arm64/kvm/hyp/nvhe/switch.c b/arch/arm64/kvm/hyp/nvhe/switch.c
index c0e3fed26d93..6410d21d8695 100644
--- a/arch/arm64/kvm/hyp/nvhe/switch.c
+++ b/arch/arm64/kvm/hyp/nvhe/switch.c
@@ -25,7 +25,6 @@
#include <asm/fpsimd.h>
#include <asm/debug-monitors.h>
#include <asm/processor.h>
-#include <asm/thread_info.h>
#include <nvhe/fixed_config.h>
#include <nvhe/mem_protect.h>
@@ -233,7 +232,7 @@ static const exit_handler_fn *kvm_get_exit_handler_array(struct kvm_vcpu *vcpu)
* Returns false if the guest ran in AArch32 when it shouldn't have, and
* thus should exit to the host, or true if a the guest run loop can continue.
*/
-static bool handle_aarch32_guest(struct kvm_vcpu *vcpu, u64 *exit_code)
+static void early_exit_filter(struct kvm_vcpu *vcpu, u64 *exit_code)
{
struct kvm *kvm = kern_hyp_va(vcpu->kvm);
@@ -248,10 +247,7 @@ static bool handle_aarch32_guest(struct kvm_vcpu *vcpu, u64 *exit_code)
vcpu->arch.target = -1;
*exit_code &= BIT(ARM_EXIT_WITH_SERROR_BIT);
*exit_code |= ARM_EXCEPTION_IL;
- return false;
}
-
- return true;
}
/* Switch to the guest for legacy non-VHE systems */
@@ -316,9 +312,6 @@ int __kvm_vcpu_run(struct kvm_vcpu *vcpu)
/* Jump in the fire! */
exit_code = __guest_enter(vcpu);
- if (unlikely(!handle_aarch32_guest(vcpu, &exit_code)))
- break;
-
/* And we're baaack! */
} while (fixup_guest_exit(vcpu, &exit_code));
diff --git a/arch/arm64/kvm/hyp/pgtable.c b/arch/arm64/kvm/hyp/pgtable.c
index f8ceebe4982e..2cb3867eb7c2 100644
--- a/arch/arm64/kvm/hyp/pgtable.c
+++ b/arch/arm64/kvm/hyp/pgtable.c
@@ -383,21 +383,6 @@ enum kvm_pgtable_prot kvm_pgtable_hyp_pte_prot(kvm_pte_t pte)
return prot;
}
-static bool hyp_pte_needs_update(kvm_pte_t old, kvm_pte_t new)
-{
- /*
- * Tolerate KVM recreating the exact same mapping, or changing software
- * bits if the existing mapping was valid.
- */
- if (old == new)
- return false;
-
- if (!kvm_pte_valid(old))
- return true;
-
- return !WARN_ON((old ^ new) & ~KVM_PTE_LEAF_ATTR_HI_SW);
-}
-
static bool hyp_map_walker_try_leaf(u64 addr, u64 end, u32 level,
kvm_pte_t *ptep, struct hyp_map_data *data)
{
@@ -407,11 +392,16 @@ static bool hyp_map_walker_try_leaf(u64 addr, u64 end, u32 level,
if (!kvm_block_mapping_supported(addr, end, phys, level))
return false;
+ data->phys += granule;
new = kvm_init_valid_leaf_pte(phys, data->attr, level);
- if (hyp_pte_needs_update(old, new))
- smp_store_release(ptep, new);
+ if (old == new)
+ return true;
+ if (!kvm_pte_valid(old))
+ data->mm_ops->get_page(ptep);
+ else if (WARN_ON((old ^ new) & ~KVM_PTE_LEAF_ATTR_HI_SW))
+ return false;
- data->phys += granule;
+ smp_store_release(ptep, new);
return true;
}
@@ -433,6 +423,7 @@ static int hyp_map_walker(u64 addr, u64 end, u32 level, kvm_pte_t *ptep,
return -ENOMEM;
kvm_set_table_pte(ptep, childp, mm_ops);
+ mm_ops->get_page(ptep);
return 0;
}
@@ -460,6 +451,69 @@ int kvm_pgtable_hyp_map(struct kvm_pgtable *pgt, u64 addr, u64 size, u64 phys,
return ret;
}
+struct hyp_unmap_data {
+ u64 unmapped;
+ struct kvm_pgtable_mm_ops *mm_ops;
+};
+
+static int hyp_unmap_walker(u64 addr, u64 end, u32 level, kvm_pte_t *ptep,
+ enum kvm_pgtable_walk_flags flag, void * const arg)
+{
+ kvm_pte_t pte = *ptep, *childp = NULL;
+ u64 granule = kvm_granule_size(level);
+ struct hyp_unmap_data *data = arg;
+ struct kvm_pgtable_mm_ops *mm_ops = data->mm_ops;
+
+ if (!kvm_pte_valid(pte))
+ return -EINVAL;
+
+ if (kvm_pte_table(pte, level)) {
+ childp = kvm_pte_follow(pte, mm_ops);
+
+ if (mm_ops->page_count(childp) != 1)
+ return 0;
+
+ kvm_clear_pte(ptep);
+ dsb(ishst);
+ __tlbi_level(vae2is, __TLBI_VADDR(addr, 0), level);
+ } else {
+ if (end - addr < granule)
+ return -EINVAL;
+
+ kvm_clear_pte(ptep);
+ dsb(ishst);
+ __tlbi_level(vale2is, __TLBI_VADDR(addr, 0), level);
+ data->unmapped += granule;
+ }
+
+ dsb(ish);
+ isb();
+ mm_ops->put_page(ptep);
+
+ if (childp)
+ mm_ops->put_page(childp);
+
+ return 0;
+}
+
+u64 kvm_pgtable_hyp_unmap(struct kvm_pgtable *pgt, u64 addr, u64 size)
+{
+ struct hyp_unmap_data unmap_data = {
+ .mm_ops = pgt->mm_ops,
+ };
+ struct kvm_pgtable_walker walker = {
+ .cb = hyp_unmap_walker,
+ .arg = &unmap_data,
+ .flags = KVM_PGTABLE_WALK_LEAF | KVM_PGTABLE_WALK_TABLE_POST,
+ };
+
+ if (!pgt->mm_ops->page_count)
+ return 0;
+
+ kvm_pgtable_walk(pgt, addr, size, &walker);
+ return unmap_data.unmapped;
+}
+
int kvm_pgtable_hyp_init(struct kvm_pgtable *pgt, u32 va_bits,
struct kvm_pgtable_mm_ops *mm_ops)
{
@@ -482,8 +536,16 @@ static int hyp_free_walker(u64 addr, u64 end, u32 level, kvm_pte_t *ptep,
enum kvm_pgtable_walk_flags flag, void * const arg)
{
struct kvm_pgtable_mm_ops *mm_ops = arg;
+ kvm_pte_t pte = *ptep;
+
+ if (!kvm_pte_valid(pte))
+ return 0;
+
+ mm_ops->put_page(ptep);
+
+ if (kvm_pte_table(pte, level))
+ mm_ops->put_page(kvm_pte_follow(pte, mm_ops));
- mm_ops->put_page((void *)kvm_pte_follow(*ptep, mm_ops));
return 0;
}
@@ -491,7 +553,7 @@ void kvm_pgtable_hyp_destroy(struct kvm_pgtable *pgt)
{
struct kvm_pgtable_walker walker = {
.cb = hyp_free_walker,
- .flags = KVM_PGTABLE_WALK_TABLE_POST,
+ .flags = KVM_PGTABLE_WALK_LEAF | KVM_PGTABLE_WALK_TABLE_POST,
.arg = pgt->mm_ops,
};
@@ -921,13 +983,9 @@ static int stage2_unmap_walker(u64 addr, u64 end, u32 level, kvm_pte_t *ptep,
*/
stage2_put_pte(ptep, mmu, addr, level, mm_ops);
- if (need_flush) {
- kvm_pte_t *pte_follow = kvm_pte_follow(pte, mm_ops);
-
- dcache_clean_inval_poc((unsigned long)pte_follow,
- (unsigned long)pte_follow +
- kvm_granule_size(level));
- }
+ if (need_flush && mm_ops->dcache_clean_inval_poc)
+ mm_ops->dcache_clean_inval_poc(kvm_pte_follow(pte, mm_ops),
+ kvm_granule_size(level));
if (childp)
mm_ops->put_page(childp);
@@ -1089,15 +1147,13 @@ static int stage2_flush_walker(u64 addr, u64 end, u32 level, kvm_pte_t *ptep,
struct kvm_pgtable *pgt = arg;
struct kvm_pgtable_mm_ops *mm_ops = pgt->mm_ops;
kvm_pte_t pte = *ptep;
- kvm_pte_t *pte_follow;
if (!kvm_pte_valid(pte) || !stage2_pte_cacheable(pgt, pte))
return 0;
- pte_follow = kvm_pte_follow(pte, mm_ops);
- dcache_clean_inval_poc((unsigned long)pte_follow,
- (unsigned long)pte_follow +
- kvm_granule_size(level));
+ if (mm_ops->dcache_clean_inval_poc)
+ mm_ops->dcache_clean_inval_poc(kvm_pte_follow(pte, mm_ops),
+ kvm_granule_size(level));
return 0;
}
@@ -1116,13 +1172,13 @@ int kvm_pgtable_stage2_flush(struct kvm_pgtable *pgt, u64 addr, u64 size)
}
-int __kvm_pgtable_stage2_init(struct kvm_pgtable *pgt, struct kvm_arch *arch,
+int __kvm_pgtable_stage2_init(struct kvm_pgtable *pgt, struct kvm_s2_mmu *mmu,
struct kvm_pgtable_mm_ops *mm_ops,
enum kvm_pgtable_stage2_flags flags,
kvm_pgtable_force_pte_cb_t force_pte_cb)
{
size_t pgd_sz;
- u64 vtcr = arch->vtcr;
+ u64 vtcr = mmu->arch->vtcr;
u32 ia_bits = VTCR_EL2_IPA(vtcr);
u32 sl0 = FIELD_GET(VTCR_EL2_SL0_MASK, vtcr);
u32 start_level = VTCR_EL2_TGRAN_SL0_BASE - sl0;
@@ -1135,7 +1191,7 @@ int __kvm_pgtable_stage2_init(struct kvm_pgtable *pgt, struct kvm_arch *arch,
pgt->ia_bits = ia_bits;
pgt->start_level = start_level;
pgt->mm_ops = mm_ops;
- pgt->mmu = &arch->mmu;
+ pgt->mmu = mmu;
pgt->flags = flags;
pgt->force_pte_cb = force_pte_cb;
diff --git a/arch/arm64/kvm/hyp/vgic-v3-sr.c b/arch/arm64/kvm/hyp/vgic-v3-sr.c
index 20db2f281cf2..4fb419f7b8b6 100644
--- a/arch/arm64/kvm/hyp/vgic-v3-sr.c
+++ b/arch/arm64/kvm/hyp/vgic-v3-sr.c
@@ -983,6 +983,9 @@ static void __vgic_v3_read_ctlr(struct kvm_vcpu *vcpu, u32 vmcr, int rt)
val = ((vtr >> 29) & 7) << ICC_CTLR_EL1_PRI_BITS_SHIFT;
/* IDbits */
val |= ((vtr >> 23) & 7) << ICC_CTLR_EL1_ID_BITS_SHIFT;
+ /* SEIS */
+ if (kvm_vgic_global_state.ich_vtr_el2 & ICH_VTR_SEIS_MASK)
+ val |= BIT(ICC_CTLR_EL1_SEIS_SHIFT);
/* A3V */
val |= ((vtr >> 21) & 1) << ICC_CTLR_EL1_A3V_SHIFT;
/* EOImode */
diff --git a/arch/arm64/kvm/hyp/vhe/switch.c b/arch/arm64/kvm/hyp/vhe/switch.c
index 5a2cb5d9bc4b..11d053fdd604 100644
--- a/arch/arm64/kvm/hyp/vhe/switch.c
+++ b/arch/arm64/kvm/hyp/vhe/switch.c
@@ -24,7 +24,6 @@
#include <asm/fpsimd.h>
#include <asm/debug-monitors.h>
#include <asm/processor.h>
-#include <asm/thread_info.h>
/* VHE specific context */
DEFINE_PER_CPU(struct kvm_host_data, kvm_host_data);
@@ -112,6 +111,10 @@ static const exit_handler_fn *kvm_get_exit_handler_array(struct kvm_vcpu *vcpu)
return hyp_exit_handlers;
}
+static void early_exit_filter(struct kvm_vcpu *vcpu, u64 *exit_code)
+{
+}
+
/* Switch to the guest for VHE systems running in EL2 */
static int __kvm_vcpu_run_vhe(struct kvm_vcpu *vcpu)
{
diff --git a/arch/arm64/kvm/mmu.c b/arch/arm64/kvm/mmu.c
index 326cdfec74a1..bc2aba953299 100644
--- a/arch/arm64/kvm/mmu.c
+++ b/arch/arm64/kvm/mmu.c
@@ -210,13 +210,13 @@ static void stage2_flush_vm(struct kvm *kvm)
{
struct kvm_memslots *slots;
struct kvm_memory_slot *memslot;
- int idx;
+ int idx, bkt;
idx = srcu_read_lock(&kvm->srcu);
spin_lock(&kvm->mmu_lock);
slots = kvm_memslots(kvm);
- kvm_for_each_memslot(memslot, slots)
+ kvm_for_each_memslot(memslot, bkt, slots)
stage2_flush_memslot(kvm, memslot);
spin_unlock(&kvm->mmu_lock);
@@ -239,6 +239,9 @@ void free_hyp_pgds(void)
static bool kvm_host_owns_hyp_mappings(void)
{
+ if (is_kernel_in_hyp_mode())
+ return false;
+
if (static_branch_likely(&kvm_protected_mode_initialized))
return false;
@@ -281,14 +284,117 @@ static phys_addr_t kvm_kaddr_to_phys(void *kaddr)
}
}
-static int pkvm_share_hyp(phys_addr_t start, phys_addr_t end)
+struct hyp_shared_pfn {
+ u64 pfn;
+ int count;
+ struct rb_node node;
+};
+
+static DEFINE_MUTEX(hyp_shared_pfns_lock);
+static struct rb_root hyp_shared_pfns = RB_ROOT;
+
+static struct hyp_shared_pfn *find_shared_pfn(u64 pfn, struct rb_node ***node,
+ struct rb_node **parent)
{
- phys_addr_t addr;
+ struct hyp_shared_pfn *this;
+
+ *node = &hyp_shared_pfns.rb_node;
+ *parent = NULL;
+ while (**node) {
+ this = container_of(**node, struct hyp_shared_pfn, node);
+ *parent = **node;
+ if (this->pfn < pfn)
+ *node = &((**node)->rb_left);
+ else if (this->pfn > pfn)
+ *node = &((**node)->rb_right);
+ else
+ return this;
+ }
+
+ return NULL;
+}
+
+static int share_pfn_hyp(u64 pfn)
+{
+ struct rb_node **node, *parent;
+ struct hyp_shared_pfn *this;
+ int ret = 0;
+
+ mutex_lock(&hyp_shared_pfns_lock);
+ this = find_shared_pfn(pfn, &node, &parent);
+ if (this) {
+ this->count++;
+ goto unlock;
+ }
+
+ this = kzalloc(sizeof(*this), GFP_KERNEL);
+ if (!this) {
+ ret = -ENOMEM;
+ goto unlock;
+ }
+
+ this->pfn = pfn;
+ this->count = 1;
+ rb_link_node(&this->node, parent, node);
+ rb_insert_color(&this->node, &hyp_shared_pfns);
+ ret = kvm_call_hyp_nvhe(__pkvm_host_share_hyp, pfn, 1);
+unlock:
+ mutex_unlock(&hyp_shared_pfns_lock);
+
+ return ret;
+}
+
+static int unshare_pfn_hyp(u64 pfn)
+{
+ struct rb_node **node, *parent;
+ struct hyp_shared_pfn *this;
+ int ret = 0;
+
+ mutex_lock(&hyp_shared_pfns_lock);
+ this = find_shared_pfn(pfn, &node, &parent);
+ if (WARN_ON(!this)) {
+ ret = -ENOENT;
+ goto unlock;
+ }
+
+ this->count--;
+ if (this->count)
+ goto unlock;
+
+ rb_erase(&this->node, &hyp_shared_pfns);
+ kfree(this);
+ ret = kvm_call_hyp_nvhe(__pkvm_host_unshare_hyp, pfn, 1);
+unlock:
+ mutex_unlock(&hyp_shared_pfns_lock);
+
+ return ret;
+}
+
+int kvm_share_hyp(void *from, void *to)
+{
+ phys_addr_t start, end, cur;
+ u64 pfn;
int ret;
- for (addr = ALIGN_DOWN(start, PAGE_SIZE); addr < end; addr += PAGE_SIZE) {
- ret = kvm_call_hyp_nvhe(__pkvm_host_share_hyp,
- __phys_to_pfn(addr));
+ if (is_kernel_in_hyp_mode())
+ return 0;
+
+ /*
+ * The share hcall maps things in the 'fixed-offset' region of the hyp
+ * VA space, so we can only share physically contiguous data-structures
+ * for now.
+ */
+ if (is_vmalloc_or_module_addr(from) || is_vmalloc_or_module_addr(to))
+ return -EINVAL;
+
+ if (kvm_host_owns_hyp_mappings())
+ return create_hyp_mappings(from, to, PAGE_HYP);
+
+ start = ALIGN_DOWN(__pa(from), PAGE_SIZE);
+ end = PAGE_ALIGN(__pa(to));
+ for (cur = start; cur < end; cur += PAGE_SIZE) {
+ pfn = __phys_to_pfn(cur);
+ ret = share_pfn_hyp(pfn);
if (ret)
return ret;
}
@@ -296,6 +402,22 @@ static int pkvm_share_hyp(phys_addr_t start, phys_addr_t end)
return 0;
}
+void kvm_unshare_hyp(void *from, void *to)
+{
+ phys_addr_t start, end, cur;
+ u64 pfn;
+
+ if (is_kernel_in_hyp_mode() || kvm_host_owns_hyp_mappings() || !from)
+ return;
+
+ start = ALIGN_DOWN(__pa(from), PAGE_SIZE);
+ end = PAGE_ALIGN(__pa(to));
+ for (cur = start; cur < end; cur += PAGE_SIZE) {
+ pfn = __phys_to_pfn(cur);
+ WARN_ON(unshare_pfn_hyp(pfn));
+ }
+}
+
/**
* create_hyp_mappings - duplicate a kernel virtual address range in Hyp mode
* @from: The virtual kernel start address of the range
@@ -316,12 +438,8 @@ int create_hyp_mappings(void *from, void *to, enum kvm_pgtable_prot prot)
if (is_kernel_in_hyp_mode())
return 0;
- if (!kvm_host_owns_hyp_mappings()) {
- if (WARN_ON(prot != PAGE_HYP))
- return -EPERM;
- return pkvm_share_hyp(kvm_kaddr_to_phys(from),
- kvm_kaddr_to_phys(to));
- }
+ if (!kvm_host_owns_hyp_mappings())
+ return -EPERM;
start = start & PAGE_MASK;
end = PAGE_ALIGN(end);
@@ -407,6 +525,9 @@ int create_hyp_io_mappings(phys_addr_t phys_addr, size_t size,
unsigned long addr;
int ret;
+ if (is_protected_kvm_enabled())
+ return -EPERM;
+
*kaddr = ioremap(phys_addr, size);
if (!*kaddr)
return -ENOMEM;
@@ -516,7 +637,8 @@ int kvm_init_stage2_mmu(struct kvm *kvm, struct kvm_s2_mmu *mmu)
if (!pgt)
return -ENOMEM;
- err = kvm_pgtable_stage2_init(pgt, &kvm->arch, &kvm_s2_mm_ops);
+ mmu->arch = &kvm->arch;
+ err = kvm_pgtable_stage2_init(pgt, mmu, &kvm_s2_mm_ops);
if (err)
goto out_free_pgtable;
@@ -529,7 +651,6 @@ int kvm_init_stage2_mmu(struct kvm *kvm, struct kvm_s2_mmu *mmu)
for_each_possible_cpu(cpu)
*per_cpu_ptr(mmu->last_vcpu_ran, cpu) = -1;
- mmu->arch = &kvm->arch;
mmu->pgt = pgt;
mmu->pgd_phys = __pa(pgt->pgd);
WRITE_ONCE(mmu->vmid.vmid_gen, 0);
@@ -595,14 +716,14 @@ void stage2_unmap_vm(struct kvm *kvm)
{
struct kvm_memslots *slots;
struct kvm_memory_slot *memslot;
- int idx;
+ int idx, bkt;
idx = srcu_read_lock(&kvm->srcu);
mmap_read_lock(current->mm);
spin_lock(&kvm->mmu_lock);
slots = kvm_memslots(kvm);
- kvm_for_each_memslot(memslot, slots)
+ kvm_for_each_memslot(memslot, bkt, slots)
stage2_unmap_memslot(kvm, memslot);
spin_unlock(&kvm->mmu_lock);
@@ -650,6 +771,9 @@ int kvm_phys_addr_ioremap(struct kvm *kvm, phys_addr_t guest_ipa,
KVM_PGTABLE_PROT_R |
(writable ? KVM_PGTABLE_PROT_W : 0);
+ if (is_protected_kvm_enabled())
+ return -EPERM;
+
size += offset_in_page(guest_ipa);
guest_ipa &= PAGE_MASK;
@@ -1463,7 +1587,6 @@ out:
}
void kvm_arch_commit_memory_region(struct kvm *kvm,
- const struct kvm_userspace_memory_region *mem,
struct kvm_memory_slot *old,
const struct kvm_memory_slot *new,
enum kvm_mr_change change)
@@ -1473,25 +1596,24 @@ void kvm_arch_commit_memory_region(struct kvm *kvm,
* allocated dirty_bitmap[], dirty pages will be tracked while the
* memory slot is write protected.
*/
- if (change != KVM_MR_DELETE && mem->flags & KVM_MEM_LOG_DIRTY_PAGES) {
+ if (change != KVM_MR_DELETE && new->flags & KVM_MEM_LOG_DIRTY_PAGES) {
/*
* If we're with initial-all-set, we don't need to write
* protect any pages because they're all reported as dirty.
* Huge pages and normal pages will be write protect gradually.
*/
if (!kvm_dirty_log_manual_protect_and_init_set(kvm)) {
- kvm_mmu_wp_memory_region(kvm, mem->slot);
+ kvm_mmu_wp_memory_region(kvm, new->id);
}
}
}
int kvm_arch_prepare_memory_region(struct kvm *kvm,
- struct kvm_memory_slot *memslot,
- const struct kvm_userspace_memory_region *mem,
+ const struct kvm_memory_slot *old,
+ struct kvm_memory_slot *new,
enum kvm_mr_change change)
{
- hva_t hva = mem->userspace_addr;
- hva_t reg_end = hva + mem->memory_size;
+ hva_t hva, reg_end;
int ret = 0;
if (change != KVM_MR_CREATE && change != KVM_MR_MOVE &&
@@ -1502,9 +1624,12 @@ int kvm_arch_prepare_memory_region(struct kvm *kvm,
* Prevent userspace from creating a memory region outside of the IPA
* space addressable by the KVM guest IPA space.
*/
- if ((memslot->base_gfn + memslot->npages) > (kvm_phys_size(kvm) >> PAGE_SHIFT))
+ if ((new->base_gfn + new->npages) > (kvm_phys_size(kvm) >> PAGE_SHIFT))
return -EFAULT;
+ hva = new->userspace_addr;
+ reg_end = hva + (new->npages << PAGE_SHIFT);
+
mmap_read_lock(current->mm);
/*
* A memory region could potentially cover multiple VMAs, and any holes
@@ -1536,7 +1661,7 @@ int kvm_arch_prepare_memory_region(struct kvm *kvm,
if (vma->vm_flags & VM_PFNMAP) {
/* IO region dirty page logging not allowed */
- if (memslot->flags & KVM_MEM_LOG_DIRTY_PAGES) {
+ if (new->flags & KVM_MEM_LOG_DIRTY_PAGES) {
ret = -EINVAL;
break;
}
diff --git a/arch/arm64/kvm/perf.c b/arch/arm64/kvm/perf.c
deleted file mode 100644
index c84fe24b2ea1..000000000000
--- a/arch/arm64/kvm/perf.c
+++ /dev/null
@@ -1,59 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0-only
-/*
- * Based on the x86 implementation.
- *
- * Copyright (C) 2012 ARM Ltd.
- * Author: Marc Zyngier <marc.zyngier@arm.com>
- */
-
-#include <linux/perf_event.h>
-#include <linux/kvm_host.h>
-
-#include <asm/kvm_emulate.h>
-
-DEFINE_STATIC_KEY_FALSE(kvm_arm_pmu_available);
-
-static int kvm_is_in_guest(void)
-{
- return kvm_get_running_vcpu() != NULL;
-}
-
-static int kvm_is_user_mode(void)
-{
- struct kvm_vcpu *vcpu;
-
- vcpu = kvm_get_running_vcpu();
-
- if (vcpu)
- return !vcpu_mode_priv(vcpu);
-
- return 0;
-}
-
-static unsigned long kvm_get_guest_ip(void)
-{
- struct kvm_vcpu *vcpu;
-
- vcpu = kvm_get_running_vcpu();
-
- if (vcpu)
- return *vcpu_pc(vcpu);
-
- return 0;
-}
-
-static struct perf_guest_info_callbacks kvm_guest_cbs = {
- .is_in_guest = kvm_is_in_guest,
- .is_user_mode = kvm_is_user_mode,
- .get_guest_ip = kvm_get_guest_ip,
-};
-
-int kvm_perf_init(void)
-{
- return perf_register_guest_info_callbacks(&kvm_guest_cbs);
-}
-
-int kvm_perf_teardown(void)
-{
- return perf_unregister_guest_info_callbacks(&kvm_guest_cbs);
-}
diff --git a/arch/arm64/kvm/hyp/reserved_mem.c b/arch/arm64/kvm/pkvm.c
index 578670e3f608..ebecb7c045f4 100644
--- a/arch/arm64/kvm/hyp/reserved_mem.c
+++ b/arch/arm64/kvm/pkvm.c
@@ -8,10 +8,9 @@
#include <linux/memblock.h>
#include <linux/sort.h>
-#include <asm/kvm_host.h>
+#include <asm/kvm_pkvm.h>
-#include <nvhe/memory.h>
-#include <nvhe/mm.h>
+#include "hyp_constants.h"
static struct memblock_region *hyp_memory = kvm_nvhe_sym(hyp_memory);
static unsigned int *hyp_memblock_nr_ptr = &kvm_nvhe_sym(hyp_memblock_nr);
@@ -82,7 +81,8 @@ void __init kvm_hyp_reserve(void)
do {
prev = nr_pages;
nr_pages = hyp_mem_pages + prev;
- nr_pages = DIV_ROUND_UP(nr_pages * sizeof(struct hyp_page), PAGE_SIZE);
+ nr_pages = DIV_ROUND_UP(nr_pages * STRUCT_HYP_PAGE_SIZE,
+ PAGE_SIZE);
nr_pages += __hyp_pgtable_max_pages(nr_pages);
} while (nr_pages != prev);
hyp_mem_pages += nr_pages;
diff --git a/arch/arm64/kvm/pmu-emul.c b/arch/arm64/kvm/pmu-emul.c
index a5e4bbf5e68f..fbcfd4ec6f92 100644
--- a/arch/arm64/kvm/pmu-emul.c
+++ b/arch/arm64/kvm/pmu-emul.c
@@ -14,6 +14,8 @@
#include <kvm/arm_pmu.h>
#include <kvm/arm_vgic.h>
+DEFINE_STATIC_KEY_FALSE(kvm_arm_pmu_available);
+
static void kvm_pmu_create_perf_event(struct kvm_vcpu *vcpu, u64 select_idx);
static void kvm_pmu_update_pmc_chained(struct kvm_vcpu *vcpu, u64 select_idx);
static void kvm_pmu_stop_counter(struct kvm_vcpu *vcpu, struct kvm_pmc *pmc);
@@ -28,6 +30,7 @@ static u32 kvm_pmu_event_mask(struct kvm *kvm)
case ID_AA64DFR0_PMUVER_8_1:
case ID_AA64DFR0_PMUVER_8_4:
case ID_AA64DFR0_PMUVER_8_5:
+ case ID_AA64DFR0_PMUVER_8_7:
return GENMASK(15, 0);
default: /* Shouldn't be here, just for sanity */
WARN_ONCE(1, "Unknown PMU version %d\n", kvm->arch.pmuver);
@@ -900,7 +903,7 @@ static int kvm_arm_pmu_v3_init(struct kvm_vcpu *vcpu)
*/
static bool pmu_irq_is_valid(struct kvm *kvm, int irq)
{
- int i;
+ unsigned long i;
struct kvm_vcpu *vcpu;
kvm_for_each_vcpu(i, vcpu, kvm) {
diff --git a/arch/arm64/kvm/psci.c b/arch/arm64/kvm/psci.c
index 74c47d420253..3eae32876897 100644
--- a/arch/arm64/kvm/psci.c
+++ b/arch/arm64/kvm/psci.c
@@ -46,7 +46,7 @@ static unsigned long kvm_psci_vcpu_suspend(struct kvm_vcpu *vcpu)
* specification (ARM DEN 0022A). This means all suspend states
* for KVM will preserve the register state.
*/
- kvm_vcpu_block(vcpu);
+ kvm_vcpu_halt(vcpu);
kvm_clear_request(KVM_REQ_UNHALT, vcpu);
return PSCI_RET_SUCCESS;
@@ -109,7 +109,7 @@ static unsigned long kvm_psci_vcpu_on(struct kvm_vcpu *source_vcpu)
/*
* Make sure the reset request is observed if the change to
- * power_state is observed.
+ * power_off is observed.
*/
smp_wmb();
@@ -121,8 +121,8 @@ static unsigned long kvm_psci_vcpu_on(struct kvm_vcpu *source_vcpu)
static unsigned long kvm_psci_vcpu_affinity_info(struct kvm_vcpu *vcpu)
{
- int i, matching_cpus = 0;
- unsigned long mpidr;
+ int matching_cpus = 0;
+ unsigned long i, mpidr;
unsigned long target_affinity;
unsigned long target_affinity_mask;
unsigned long lowest_affinity_level;
@@ -164,7 +164,7 @@ static unsigned long kvm_psci_vcpu_affinity_info(struct kvm_vcpu *vcpu)
static void kvm_prepare_system_event(struct kvm_vcpu *vcpu, u32 type)
{
- int i;
+ unsigned long i;
struct kvm_vcpu *tmp;
/*
diff --git a/arch/arm64/kvm/reset.c b/arch/arm64/kvm/reset.c
index 426bd7fbc3fd..ecc40c8cd6f6 100644
--- a/arch/arm64/kvm/reset.c
+++ b/arch/arm64/kvm/reset.c
@@ -52,10 +52,10 @@ int kvm_arm_init_sve(void)
* The get_sve_reg()/set_sve_reg() ioctl interface will need
* to be extended with multiple register slice support in
* order to support vector lengths greater than
- * SVE_VL_ARCH_MAX:
+ * VL_ARCH_MAX:
*/
- if (WARN_ON(kvm_sve_max_vl > SVE_VL_ARCH_MAX))
- kvm_sve_max_vl = SVE_VL_ARCH_MAX;
+ if (WARN_ON(kvm_sve_max_vl > VL_ARCH_MAX))
+ kvm_sve_max_vl = VL_ARCH_MAX;
/*
* Don't even try to make use of vector lengths that
@@ -94,22 +94,31 @@ static int kvm_vcpu_finalize_sve(struct kvm_vcpu *vcpu)
{
void *buf;
unsigned int vl;
+ size_t reg_sz;
+ int ret;
vl = vcpu->arch.sve_max_vl;
/*
* Responsibility for these properties is shared between
- * kvm_arm_init_arch_resources(), kvm_vcpu_enable_sve() and
+ * kvm_arm_init_sve(), kvm_vcpu_enable_sve() and
* set_sve_vls(). Double-check here just to be sure:
*/
if (WARN_ON(!sve_vl_valid(vl) || vl > sve_max_virtualisable_vl() ||
- vl > SVE_VL_ARCH_MAX))
+ vl > VL_ARCH_MAX))
return -EIO;
- buf = kzalloc(SVE_SIG_REGS_SIZE(sve_vq_from_vl(vl)), GFP_KERNEL_ACCOUNT);
+ reg_sz = vcpu_sve_state_size(vcpu);
+ buf = kzalloc(reg_sz, GFP_KERNEL_ACCOUNT);
if (!buf)
return -ENOMEM;
+ ret = kvm_share_hyp(buf, buf + reg_sz);
+ if (ret) {
+ kfree(buf);
+ return ret;
+ }
+
vcpu->arch.sve_state = buf;
vcpu->arch.flags |= KVM_ARM64_VCPU_SVE_FINALIZED;
return 0;
@@ -141,7 +150,13 @@ bool kvm_arm_vcpu_is_finalized(struct kvm_vcpu *vcpu)
void kvm_arm_vcpu_destroy(struct kvm_vcpu *vcpu)
{
- kfree(vcpu->arch.sve_state);
+ void *sve_state = vcpu->arch.sve_state;
+
+ kvm_vcpu_unshare_task_fp(vcpu);
+ kvm_unshare_hyp(vcpu, vcpu + 1);
+ if (sve_state)
+ kvm_unshare_hyp(sve_state, sve_state + vcpu_sve_state_size(vcpu));
+ kfree(sve_state);
}
static void kvm_vcpu_reset_sve(struct kvm_vcpu *vcpu)
@@ -170,7 +185,7 @@ static bool vcpu_allowed_register_width(struct kvm_vcpu *vcpu)
{
struct kvm_vcpu *tmp;
bool is32bit;
- int i;
+ unsigned long i;
is32bit = vcpu_has_feature(vcpu, KVM_ARM_VCPU_EL1_32BIT);
if (!cpus_have_const_cap(ARM64_HAS_32BIT_EL1) && is32bit)
@@ -193,10 +208,9 @@ static bool vcpu_allowed_register_width(struct kvm_vcpu *vcpu)
* kvm_reset_vcpu - sets core registers and sys_regs to reset value
* @vcpu: The VCPU pointer
*
- * This function finds the right table above and sets the registers on
- * the virtual CPU struct to their architecturally defined reset
- * values, except for registers whose reset is deferred until
- * kvm_arm_vcpu_finalize().
+ * This function sets the registers on the virtual CPU struct to their
+ * architecturally defined reset values, except for registers whose reset is
+ * deferred until kvm_arm_vcpu_finalize().
*
* Note: This function can be called from two paths: The KVM_ARM_VCPU_INIT
* ioctl or as part of handling a request issued by another VCPU in the PSCI
diff --git a/arch/arm64/kvm/sys_regs.c b/arch/arm64/kvm/sys_regs.c
index e3ec1a44f94d..4dc2fba316ff 100644
--- a/arch/arm64/kvm/sys_regs.c
+++ b/arch/arm64/kvm/sys_regs.c
@@ -1525,7 +1525,7 @@ static const struct sys_reg_desc sys_reg_descs[] = {
/* CRm=6 */
ID_SANITISED(ID_AA64ISAR0_EL1),
ID_SANITISED(ID_AA64ISAR1_EL1),
- ID_UNALLOCATED(6,2),
+ ID_SANITISED(ID_AA64ISAR2_EL1),
ID_UNALLOCATED(6,3),
ID_UNALLOCATED(6,4),
ID_UNALLOCATED(6,5),
diff --git a/arch/arm64/kvm/vgic/vgic-init.c b/arch/arm64/kvm/vgic/vgic-init.c
index 0a06d0648970..fc00304fe7d8 100644
--- a/arch/arm64/kvm/vgic/vgic-init.c
+++ b/arch/arm64/kvm/vgic/vgic-init.c
@@ -70,8 +70,9 @@ void kvm_vgic_early_init(struct kvm *kvm)
*/
int kvm_vgic_create(struct kvm *kvm, u32 type)
{
- int i, ret;
struct kvm_vcpu *vcpu;
+ unsigned long i;
+ int ret;
if (irqchip_in_kernel(kvm))
return -EEXIST;
@@ -91,7 +92,7 @@ int kvm_vgic_create(struct kvm *kvm, u32 type)
return ret;
kvm_for_each_vcpu(i, vcpu, kvm) {
- if (vcpu->arch.has_run_once)
+ if (vcpu_has_run_once(vcpu))
goto out_unlock;
}
ret = 0;
@@ -255,7 +256,8 @@ int vgic_init(struct kvm *kvm)
{
struct vgic_dist *dist = &kvm->arch.vgic;
struct kvm_vcpu *vcpu;
- int ret = 0, i, idx;
+ int ret = 0, i;
+ unsigned long idx;
if (vgic_initialized(kvm))
return 0;
@@ -308,7 +310,7 @@ int vgic_init(struct kvm *kvm)
goto out;
}
- kvm_for_each_vcpu(i, vcpu, kvm)
+ kvm_for_each_vcpu(idx, vcpu, kvm)
kvm_vgic_vcpu_enable(vcpu);
ret = kvm_vgic_setup_default_irq_routing(kvm);
@@ -370,7 +372,7 @@ void kvm_vgic_vcpu_destroy(struct kvm_vcpu *vcpu)
static void __kvm_vgic_destroy(struct kvm *kvm)
{
struct kvm_vcpu *vcpu;
- int i;
+ unsigned long i;
vgic_debug_destroy(kvm);
diff --git a/arch/arm64/kvm/vgic/vgic-kvm-device.c b/arch/arm64/kvm/vgic/vgic-kvm-device.c
index 0d000d2fe8d2..c6d52a1fd9c8 100644
--- a/arch/arm64/kvm/vgic/vgic-kvm-device.c
+++ b/arch/arm64/kvm/vgic/vgic-kvm-device.c
@@ -325,7 +325,7 @@ void unlock_all_vcpus(struct kvm *kvm)
bool lock_all_vcpus(struct kvm *kvm)
{
struct kvm_vcpu *tmp_vcpu;
- int c;
+ unsigned long c;
/*
* Any time a vcpu is run, vcpu_load is called which tries to grab the
diff --git a/arch/arm64/kvm/vgic/vgic-mmio-v2.c b/arch/arm64/kvm/vgic/vgic-mmio-v2.c
index 5f9014ae595b..12e4c223e6b8 100644
--- a/arch/arm64/kvm/vgic/vgic-mmio-v2.c
+++ b/arch/arm64/kvm/vgic/vgic-mmio-v2.c
@@ -113,9 +113,8 @@ static void vgic_mmio_write_sgir(struct kvm_vcpu *source_vcpu,
int intid = val & 0xf;
int targets = (val >> 16) & 0xff;
int mode = (val >> 24) & 0x03;
- int c;
struct kvm_vcpu *vcpu;
- unsigned long flags;
+ unsigned long flags, c;
switch (mode) {
case 0x0: /* as specified by targets */
diff --git a/arch/arm64/kvm/vgic/vgic-mmio-v3.c b/arch/arm64/kvm/vgic/vgic-mmio-v3.c
index bf7ec4a78497..58e40b4874f8 100644
--- a/arch/arm64/kvm/vgic/vgic-mmio-v3.c
+++ b/arch/arm64/kvm/vgic/vgic-mmio-v3.c
@@ -754,7 +754,8 @@ static void vgic_unregister_redist_iodev(struct kvm_vcpu *vcpu)
static int vgic_register_all_redist_iodevs(struct kvm *kvm)
{
struct kvm_vcpu *vcpu;
- int c, ret = 0;
+ unsigned long c;
+ int ret = 0;
kvm_for_each_vcpu(c, vcpu, kvm) {
ret = vgic_register_redist_iodev(vcpu);
@@ -763,10 +764,12 @@ static int vgic_register_all_redist_iodevs(struct kvm *kvm)
}
if (ret) {
- /* The current c failed, so we start with the previous one. */
+ /* The current c failed, so iterate over the previous ones. */
+ int i;
+
mutex_lock(&kvm->slots_lock);
- for (c--; c >= 0; c--) {
- vcpu = kvm_get_vcpu(kvm, c);
+ for (i = 0; i < c; i++) {
+ vcpu = kvm_get_vcpu(kvm, i);
vgic_unregister_redist_iodev(vcpu);
}
mutex_unlock(&kvm->slots_lock);
@@ -995,10 +998,10 @@ void vgic_v3_dispatch_sgi(struct kvm_vcpu *vcpu, u64 reg, bool allow_group1)
struct kvm_vcpu *c_vcpu;
u16 target_cpus;
u64 mpidr;
- int sgi, c;
+ int sgi;
int vcpu_id = vcpu->vcpu_id;
bool broadcast;
- unsigned long flags;
+ unsigned long c, flags;
sgi = (reg & ICC_SGI1R_SGI_ID_MASK) >> ICC_SGI1R_SGI_ID_SHIFT;
broadcast = reg & BIT_ULL(ICC_SGI1R_IRQ_ROUTING_MODE_BIT);
diff --git a/arch/arm64/kvm/vgic/vgic-mmio.c b/arch/arm64/kvm/vgic/vgic-mmio.c
index 48c6067fc5ec..7068da080799 100644
--- a/arch/arm64/kvm/vgic/vgic-mmio.c
+++ b/arch/arm64/kvm/vgic/vgic-mmio.c
@@ -1050,7 +1050,7 @@ static int dispatch_mmio_write(struct kvm_vcpu *vcpu, struct kvm_io_device *dev,
return 0;
}
-struct kvm_io_device_ops kvm_io_gic_ops = {
+const struct kvm_io_device_ops kvm_io_gic_ops = {
.read = dispatch_mmio_read,
.write = dispatch_mmio_write,
};
diff --git a/arch/arm64/kvm/vgic/vgic-mmio.h b/arch/arm64/kvm/vgic/vgic-mmio.h
index fefcca2b14dc..3fa696f198a3 100644
--- a/arch/arm64/kvm/vgic/vgic-mmio.h
+++ b/arch/arm64/kvm/vgic/vgic-mmio.h
@@ -34,7 +34,7 @@ struct vgic_register_region {
};
};
-extern struct kvm_io_device_ops kvm_io_gic_ops;
+extern const struct kvm_io_device_ops kvm_io_gic_ops;
#define VGIC_ACCESS_8bit 1
#define VGIC_ACCESS_32bit 2
diff --git a/arch/arm64/kvm/vgic/vgic-v2.c b/arch/arm64/kvm/vgic/vgic-v2.c
index 95a18cec14a3..645648349c99 100644
--- a/arch/arm64/kvm/vgic/vgic-v2.c
+++ b/arch/arm64/kvm/vgic/vgic-v2.c
@@ -293,12 +293,12 @@ int vgic_v2_map_resources(struct kvm *kvm)
if (IS_VGIC_ADDR_UNDEF(dist->vgic_dist_base) ||
IS_VGIC_ADDR_UNDEF(dist->vgic_cpu_base)) {
- kvm_err("Need to set vgic cpu and dist addresses first\n");
+ kvm_debug("Need to set vgic cpu and dist addresses first\n");
return -ENXIO;
}
if (!vgic_v2_check_base(dist->vgic_dist_base, dist->vgic_cpu_base)) {
- kvm_err("VGIC CPU and dist frames overlap\n");
+ kvm_debug("VGIC CPU and dist frames overlap\n");
return -EINVAL;
}
@@ -345,6 +345,11 @@ int vgic_v2_probe(const struct gic_kvm_info *info)
int ret;
u32 vtr;
+ if (is_protected_kvm_enabled()) {
+ kvm_err("GICv2 not supported in protected mode\n");
+ return -ENXIO;
+ }
+
if (!info->vctrl.start) {
kvm_err("GICH not present in the firmware table\n");
return -ENXIO;
diff --git a/arch/arm64/kvm/vgic/vgic-v3.c b/arch/arm64/kvm/vgic/vgic-v3.c
index 04f62c4b07fb..b549af8b1dc2 100644
--- a/arch/arm64/kvm/vgic/vgic-v3.c
+++ b/arch/arm64/kvm/vgic/vgic-v3.c
@@ -542,24 +542,24 @@ int vgic_v3_map_resources(struct kvm *kvm)
struct vgic_dist *dist = &kvm->arch.vgic;
struct kvm_vcpu *vcpu;
int ret = 0;
- int c;
+ unsigned long c;
kvm_for_each_vcpu(c, vcpu, kvm) {
struct vgic_cpu *vgic_cpu = &vcpu->arch.vgic_cpu;
if (IS_VGIC_ADDR_UNDEF(vgic_cpu->rd_iodev.base_addr)) {
- kvm_debug("vcpu %d redistributor base not set\n", c);
+ kvm_debug("vcpu %ld redistributor base not set\n", c);
return -ENXIO;
}
}
if (IS_VGIC_ADDR_UNDEF(dist->vgic_dist_base)) {
- kvm_err("Need to set vgic distributor addresses first\n");
+ kvm_debug("Need to set vgic distributor addresses first\n");
return -ENXIO;
}
if (!vgic_v3_check_base(kvm)) {
- kvm_err("VGIC redist and dist frames overlap\n");
+ kvm_debug("VGIC redist and dist frames overlap\n");
return -EINVAL;
}
@@ -609,6 +609,18 @@ static int __init early_gicv4_enable(char *buf)
}
early_param("kvm-arm.vgic_v4_enable", early_gicv4_enable);
+static const struct midr_range broken_seis[] = {
+ MIDR_ALL_VERSIONS(MIDR_APPLE_M1_ICESTORM),
+ MIDR_ALL_VERSIONS(MIDR_APPLE_M1_FIRESTORM),
+ {},
+};
+
+static bool vgic_v3_broken_seis(void)
+{
+ return ((kvm_vgic_global_state.ich_vtr_el2 & ICH_VTR_SEIS_MASK) &&
+ is_midr_in_range_list(read_cpuid_id(), broken_seis));
+}
+
/**
* vgic_v3_probe - probe for a VGICv3 compatible interrupt controller
* @info: pointer to the GIC description
@@ -651,7 +663,7 @@ int vgic_v3_probe(const struct gic_kvm_info *info)
} else if (!PAGE_ALIGNED(info->vcpu.start)) {
pr_warn("GICV physical address 0x%llx not page aligned\n",
(unsigned long long)info->vcpu.start);
- } else {
+ } else if (kvm_get_mode() != KVM_MODE_PROTECTED) {
kvm_vgic_global_state.vcpu_base = info->vcpu.start;
kvm_vgic_global_state.can_emulate_gicv2 = true;
ret = kvm_register_vgic_device(KVM_DEV_TYPE_ARM_VGIC_V2);
@@ -676,9 +688,10 @@ int vgic_v3_probe(const struct gic_kvm_info *info)
group1_trap = true;
}
- if (kvm_vgic_global_state.ich_vtr_el2 & ICH_VTR_SEIS_MASK) {
- kvm_info("GICv3 with locally generated SEI\n");
+ if (vgic_v3_broken_seis()) {
+ kvm_info("GICv3 with broken locally generated SEI\n");
+ kvm_vgic_global_state.ich_vtr_el2 &= ~ICH_VTR_SEIS_MASK;
group0_trap = true;
group1_trap = true;
if (ich_vtr_el2 & ICH_VTR_TDS_MASK)
diff --git a/arch/arm64/kvm/vgic/vgic-v4.c b/arch/arm64/kvm/vgic/vgic-v4.c
index 772dd15a22c7..ad06ba6c9b00 100644
--- a/arch/arm64/kvm/vgic/vgic-v4.c
+++ b/arch/arm64/kvm/vgic/vgic-v4.c
@@ -189,7 +189,7 @@ void vgic_v4_configure_vsgis(struct kvm *kvm)
{
struct vgic_dist *dist = &kvm->arch.vgic;
struct kvm_vcpu *vcpu;
- int i;
+ unsigned long i;
kvm_arm_halt_guest(kvm);
@@ -235,7 +235,8 @@ int vgic_v4_init(struct kvm *kvm)
{
struct vgic_dist *dist = &kvm->arch.vgic;
struct kvm_vcpu *vcpu;
- int i, nr_vcpus, ret;
+ int nr_vcpus, ret;
+ unsigned long i;
if (!kvm_vgic_global_state.has_gicv4)
return 0; /* Nothing to see here... move along. */
diff --git a/arch/arm64/kvm/vgic/vgic.c b/arch/arm64/kvm/vgic/vgic.c
index 5dad4996cfb2..9b98876a8a93 100644
--- a/arch/arm64/kvm/vgic/vgic.c
+++ b/arch/arm64/kvm/vgic/vgic.c
@@ -990,7 +990,7 @@ int kvm_vgic_vcpu_pending_irq(struct kvm_vcpu *vcpu)
void vgic_kick_vcpus(struct kvm *kvm)
{
struct kvm_vcpu *vcpu;
- int c;
+ unsigned long c;
/*
* We've injected an interrupt, time to find out who deserves