aboutsummaryrefslogtreecommitdiffstats
path: root/arch/arm64/kvm
diff options
context:
space:
mode:
Diffstat (limited to 'arch/arm64/kvm')
-rw-r--r--arch/arm64/kvm/.gitignore2
-rw-r--r--arch/arm64/kvm/Kconfig15
-rw-r--r--arch/arm64/kvm/Makefile30
-rw-r--r--arch/arm64/kvm/arch_timer.c63
-rw-r--r--arch/arm64/kvm/arm.c537
-rw-r--r--arch/arm64/kvm/debug.c87
-rw-r--r--arch/arm64/kvm/fpsimd.c147
-rw-r--r--arch/arm64/kvm/guest.c15
-rw-r--r--arch/arm64/kvm/handle_exit.c90
-rw-r--r--arch/arm64/kvm/hyp/Makefile7
-rw-r--r--arch/arm64/kvm/hyp/exception.c31
-rw-r--r--arch/arm64/kvm/hyp/fpsimd.S6
-rw-r--r--arch/arm64/kvm/hyp/hyp-constants.c10
-rw-r--r--arch/arm64/kvm/hyp/hyp-entry.S11
-rw-r--r--arch/arm64/kvm/hyp/include/hyp/debug-sr.h6
-rw-r--r--arch/arm64/kvm/hyp/include/hyp/switch.h105
-rw-r--r--arch/arm64/kvm/hyp/include/hyp/sysreg-sr.h4
-rw-r--r--arch/arm64/kvm/hyp/include/nvhe/fixed_config.h121
-rw-r--r--arch/arm64/kvm/hyp/include/nvhe/mem_protect.h6
-rw-r--r--arch/arm64/kvm/hyp/include/nvhe/mm.h65
-rw-r--r--arch/arm64/kvm/hyp/nvhe/Makefile29
-rw-r--r--arch/arm64/kvm/hyp/nvhe/cache.S5
-rw-r--r--arch/arm64/kvm/hyp/nvhe/debug-sr.c8
-rw-r--r--arch/arm64/kvm/hyp/nvhe/early_alloc.c5
-rw-r--r--arch/arm64/kvm/hyp/nvhe/host.S43
-rw-r--r--arch/arm64/kvm/hyp/nvhe/hyp-main.c26
-rw-r--r--arch/arm64/kvm/hyp/nvhe/list_debug.c54
-rw-r--r--arch/arm64/kvm/hyp/nvhe/mem_protect.c512
-rw-r--r--arch/arm64/kvm/hyp/nvhe/mm.c86
-rw-r--r--arch/arm64/kvm/hyp/nvhe/page_alloc.c6
-rw-r--r--arch/arm64/kvm/hyp/nvhe/pkvm.c38
-rw-r--r--arch/arm64/kvm/hyp/nvhe/setup.c56
-rw-r--r--arch/arm64/kvm/hyp/nvhe/stacktrace.c158
-rw-r--r--arch/arm64/kvm/hyp/nvhe/stub.c22
-rw-r--r--arch/arm64/kvm/hyp/nvhe/switch.c78
-rw-r--r--arch/arm64/kvm/hyp/nvhe/sys_regs.c83
-rw-r--r--arch/arm64/kvm/hyp/pgtable.c128
-rw-r--r--arch/arm64/kvm/hyp/vgic-v3-sr.c7
-rw-r--r--arch/arm64/kvm/hyp/vhe/switch.c22
-rw-r--r--arch/arm64/kvm/hyp/vhe/sysreg-sr.c4
-rw-r--r--arch/arm64/kvm/hypercalls.c339
-rw-r--r--arch/arm64/kvm/inject_fault.c49
-rw-r--r--arch/arm64/kvm/mmio.c3
-rw-r--r--arch/arm64/kvm/mmu.c374
-rw-r--r--arch/arm64/kvm/perf.c59
-rw-r--r--arch/arm64/kvm/pkvm.c (renamed from arch/arm64/kvm/hyp/reserved_mem.c)8
-rw-r--r--arch/arm64/kvm/pmu-emul.c176
-rw-r--r--arch/arm64/kvm/pmu.c40
-rw-r--r--arch/arm64/kvm/psci.c317
-rw-r--r--arch/arm64/kvm/reset.c111
-rw-r--r--arch/arm64/kvm/stacktrace.c245
-rw-r--r--arch/arm64/kvm/sys_regs.c849
-rw-r--r--arch/arm64/kvm/sys_regs.h51
-rw-r--r--arch/arm64/kvm/va_layout.c5
-rw-r--r--arch/arm64/kvm/vgic-sys-reg-v3.c462
-rw-r--r--arch/arm64/kvm/vgic/vgic-debug.c10
-rw-r--r--arch/arm64/kvm/vgic/vgic-init.c25
-rw-r--r--arch/arm64/kvm/vgic/vgic-its.c169
-rw-r--r--arch/arm64/kvm/vgic/vgic-kvm-device.c344
-rw-r--r--arch/arm64/kvm/vgic/vgic-mmio-v2.c25
-rw-r--r--arch/arm64/kvm/vgic/vgic-mmio-v3.c190
-rw-r--r--arch/arm64/kvm/vgic/vgic-mmio.c48
-rw-r--r--arch/arm64/kvm/vgic/vgic-mmio.h9
-rw-r--r--arch/arm64/kvm/vgic/vgic-v2.c9
-rw-r--r--arch/arm64/kvm/vgic/vgic-v3.c31
-rw-r--r--arch/arm64/kvm/vgic/vgic-v4.c5
-rw-r--r--arch/arm64/kvm/vgic/vgic.c4
-rw-r--r--arch/arm64/kvm/vgic/vgic.h19
-rw-r--r--arch/arm64/kvm/vmid.c196
69 files changed, 4663 insertions, 2237 deletions
diff --git a/arch/arm64/kvm/.gitignore b/arch/arm64/kvm/.gitignore
new file mode 100644
index 000000000000..6182aefb8302
--- /dev/null
+++ b/arch/arm64/kvm/.gitignore
@@ -0,0 +1,2 @@
+# SPDX-License-Identifier: GPL-2.0-only
+hyp_constants.h
diff --git a/arch/arm64/kvm/Kconfig b/arch/arm64/kvm/Kconfig
index 8ffcbe29395e..815cc118c675 100644
--- a/arch/arm64/kvm/Kconfig
+++ b/arch/arm64/kvm/Kconfig
@@ -39,6 +39,8 @@ menuconfig KVM
select HAVE_KVM_IRQ_BYPASS
select HAVE_KVM_VCPU_RUN_PID_CHANGE
select SCHED_INFO
+ select GUEST_PERF_EVENTS if PERF_EVENTS
+ select INTERVAL_TREE
help
Support hosting virtualized guest machines.
@@ -54,4 +56,17 @@ config NVHE_EL2_DEBUG
If unsure, say N.
+config PROTECTED_NVHE_STACKTRACE
+ bool "Protected KVM hypervisor stacktraces"
+ depends on NVHE_EL2_DEBUG
+ default n
+ help
+ Say Y here to enable pKVM hypervisor stacktraces on hyp_panic()
+
+ If using protected nVHE mode, but cannot afford the associated
+ memory cost (less than 0.75 page per CPU) of pKVM stacktraces,
+ say N.
+
+ If unsure, or not using protected nVHE (pKVM), say N.
+
endif # VIRTUALIZATION
diff --git a/arch/arm64/kvm/Makefile b/arch/arm64/kvm/Makefile
index 989bb5dad2c8..5e33c2d4645a 100644
--- a/arch/arm64/kvm/Makefile
+++ b/arch/arm64/kvm/Makefile
@@ -5,18 +5,16 @@
ccflags-y += -I $(srctree)/$(src)
-KVM=../../../virt/kvm
+include $(srctree)/virt/kvm/Makefile.kvm
obj-$(CONFIG_KVM) += kvm.o
obj-$(CONFIG_KVM) += hyp/
-kvm-y := $(KVM)/kvm_main.o $(KVM)/coalesced_mmio.o $(KVM)/eventfd.o \
- $(KVM)/vfio.o $(KVM)/irqchip.o $(KVM)/binary_stats.o \
- arm.o mmu.o mmio.o psci.o perf.o hypercalls.o pvtime.o \
+kvm-y += arm.o mmu.o mmio.o psci.o hypercalls.o pvtime.o \
inject_fault.o va_layout.o handle_exit.o \
- guest.o debug.o reset.o sys_regs.o \
- vgic-sys-reg-v3.o fpsimd.o pmu.o \
- arch_timer.o trng.o\
+ guest.o debug.o reset.o sys_regs.o stacktrace.o \
+ vgic-sys-reg-v3.o fpsimd.o pkvm.o \
+ arch_timer.o trng.o vmid.o \
vgic/vgic.o vgic/vgic-init.o \
vgic/vgic-irqfd.o vgic/vgic-v2.o \
vgic/vgic-v3.o vgic/vgic-v4.o \
@@ -24,4 +22,20 @@ kvm-y := $(KVM)/kvm_main.o $(KVM)/coalesced_mmio.o $(KVM)/eventfd.o \
vgic/vgic-mmio-v3.o vgic/vgic-kvm-device.o \
vgic/vgic-its.o vgic/vgic-debug.o
-kvm-$(CONFIG_HW_PERF_EVENTS) += pmu-emul.o
+kvm-$(CONFIG_HW_PERF_EVENTS) += pmu-emul.o pmu.o
+
+always-y := hyp_constants.h hyp-constants.s
+
+define rule_gen_hyp_constants
+ $(call filechk,offsets,__HYP_CONSTANTS_H__)
+endef
+
+CFLAGS_hyp-constants.o = -I $(srctree)/$(src)/hyp/include
+$(obj)/hyp-constants.s: $(src)/hyp/hyp-constants.c FORCE
+ $(call if_changed_dep,cc_s_c)
+
+$(obj)/hyp_constants.h: $(obj)/hyp-constants.s FORCE
+ $(call if_changed_rule,gen_hyp_constants)
+
+obj-kvm := $(addprefix $(obj)/, $(kvm-y))
+$(obj-kvm): $(obj)/hyp_constants.h
diff --git a/arch/arm64/kvm/arch_timer.c b/arch/arm64/kvm/arch_timer.c
index 3df67c127489..bb24a76b4224 100644
--- a/arch/arm64/kvm/arch_timer.c
+++ b/arch/arm64/kvm/arch_timer.c
@@ -208,18 +208,16 @@ static irqreturn_t kvm_arch_timer_handler(int irq, void *dev_id)
return IRQ_HANDLED;
}
-static u64 kvm_timer_compute_delta(struct arch_timer_context *timer_ctx)
+static u64 kvm_counter_compute_delta(struct arch_timer_context *timer_ctx,
+ u64 val)
{
- u64 cval, now;
+ u64 now = kvm_phys_timer_read() - timer_get_offset(timer_ctx);
- cval = timer_get_cval(timer_ctx);
- now = kvm_phys_timer_read() - timer_get_offset(timer_ctx);
-
- if (now < cval) {
+ if (now < val) {
u64 ns;
ns = cyclecounter_cyc2ns(timecounter->cc,
- cval - now,
+ val - now,
timecounter->mask,
&timecounter->frac);
return ns;
@@ -228,6 +226,11 @@ static u64 kvm_timer_compute_delta(struct arch_timer_context *timer_ctx)
return 0;
}
+static u64 kvm_timer_compute_delta(struct arch_timer_context *timer_ctx)
+{
+ return kvm_counter_compute_delta(timer_ctx, timer_get_cval(timer_ctx));
+}
+
static bool kvm_timer_irq_can_fire(struct arch_timer_context *timer_ctx)
{
WARN_ON(timer_ctx && timer_ctx->loaded);
@@ -236,6 +239,20 @@ static bool kvm_timer_irq_can_fire(struct arch_timer_context *timer_ctx)
(ARCH_TIMER_CTRL_IT_MASK | ARCH_TIMER_CTRL_ENABLE)) == ARCH_TIMER_CTRL_ENABLE);
}
+static bool vcpu_has_wfit_active(struct kvm_vcpu *vcpu)
+{
+ return (cpus_have_final_cap(ARM64_HAS_WFXT) &&
+ vcpu_get_flag(vcpu, IN_WFIT));
+}
+
+static u64 wfit_delay_ns(struct kvm_vcpu *vcpu)
+{
+ struct arch_timer_context *ctx = vcpu_vtimer(vcpu);
+ u64 val = vcpu_get_reg(vcpu, kvm_vcpu_sys_get_rt(vcpu));
+
+ return kvm_counter_compute_delta(ctx, val);
+}
+
/*
* Returns the earliest expiration time in ns among guest timers.
* Note that it will return 0 if none of timers can fire.
@@ -253,6 +270,9 @@ static u64 kvm_timer_earliest_exp(struct kvm_vcpu *vcpu)
min_delta = min(min_delta, kvm_timer_compute_delta(ctx));
}
+ if (vcpu_has_wfit_active(vcpu))
+ min_delta = min(min_delta, wfit_delay_ns(vcpu));
+
/* If none of timers can fire, then return 0 */
if (min_delta == ULLONG_MAX)
return 0;
@@ -350,15 +370,9 @@ static bool kvm_timer_should_fire(struct arch_timer_context *timer_ctx)
return cval <= now;
}
-bool kvm_timer_is_pending(struct kvm_vcpu *vcpu)
+int kvm_cpu_has_pending_timer(struct kvm_vcpu *vcpu)
{
- struct timer_map map;
-
- get_timer_map(vcpu, &map);
-
- return kvm_timer_should_fire(map.direct_vtimer) ||
- kvm_timer_should_fire(map.direct_ptimer) ||
- kvm_timer_should_fire(map.emul_ptimer);
+ return vcpu_has_wfit_active(vcpu) && wfit_delay_ns(vcpu) == 0;
}
/*
@@ -467,7 +481,7 @@ out:
}
/*
- * Schedule the background timer before calling kvm_vcpu_block, so that this
+ * Schedule the background timer before calling kvm_vcpu_halt, so that this
* thread is removed from its waitqueue and made runnable when there's a timer
* interrupt to handle.
*/
@@ -484,7 +498,8 @@ static void kvm_timer_blocking(struct kvm_vcpu *vcpu)
*/
if (!kvm_timer_irq_can_fire(map.direct_vtimer) &&
!kvm_timer_irq_can_fire(map.direct_ptimer) &&
- !kvm_timer_irq_can_fire(map.emul_ptimer))
+ !kvm_timer_irq_can_fire(map.emul_ptimer) &&
+ !vcpu_has_wfit_active(vcpu))
return;
/*
@@ -649,7 +664,6 @@ void kvm_timer_vcpu_put(struct kvm_vcpu *vcpu)
{
struct arch_timer_cpu *timer = vcpu_timer(vcpu);
struct timer_map map;
- struct rcuwait *wait = kvm_arch_vcpu_get_wait(vcpu);
if (unlikely(!timer->enabled))
return;
@@ -672,7 +686,7 @@ void kvm_timer_vcpu_put(struct kvm_vcpu *vcpu)
if (map.emul_ptimer)
soft_timer_cancel(&map.emul_ptimer->hrtimer);
- if (rcuwait_active(wait))
+ if (kvm_vcpu_is_blocking(vcpu))
kvm_timer_blocking(vcpu);
/*
@@ -750,7 +764,7 @@ int kvm_timer_vcpu_reset(struct kvm_vcpu *vcpu)
/* Make the updates of cntvoff for all vtimer contexts atomic */
static void update_vtimer_cntvoff(struct kvm_vcpu *vcpu, u64 cntvoff)
{
- int i;
+ unsigned long i;
struct kvm *kvm = vcpu->kvm;
struct kvm_vcpu *tmp;
@@ -1189,8 +1203,8 @@ void kvm_timer_vcpu_terminate(struct kvm_vcpu *vcpu)
static bool timer_irqs_are_valid(struct kvm_vcpu *vcpu)
{
- int vtimer_irq, ptimer_irq;
- int i, ret;
+ int vtimer_irq, ptimer_irq, ret;
+ unsigned long i;
vtimer_irq = vcpu_vtimer(vcpu)->irq.irq;
ret = kvm_vgic_set_owner(vcpu, vtimer_irq, vcpu_vtimer(vcpu));
@@ -1216,6 +1230,9 @@ bool kvm_arch_timer_get_input_level(int vintid)
struct kvm_vcpu *vcpu = kvm_get_running_vcpu();
struct arch_timer_context *timer;
+ if (WARN(!vcpu, "No vcpu context!\n"))
+ return false;
+
if (vintid == vcpu_vtimer(vcpu)->irq.irq)
timer = vcpu_vtimer(vcpu);
else if (vintid == vcpu_ptimer(vcpu)->irq.irq)
@@ -1297,7 +1314,7 @@ void kvm_timer_init_vhe(void)
static void set_timer_irqs(struct kvm *kvm, int vtimer_irq, int ptimer_irq)
{
struct kvm_vcpu *vcpu;
- int i;
+ unsigned long i;
kvm_for_each_vcpu(i, vcpu, kvm) {
vcpu_vtimer(vcpu)->irq.irq = vtimer_irq;
diff --git a/arch/arm64/kvm/arm.c b/arch/arm64/kvm/arm.c
index e4727dc771bf..94d33e296e10 100644
--- a/arch/arm64/kvm/arm.c
+++ b/arch/arm64/kvm/arm.c
@@ -49,15 +49,10 @@ DEFINE_STATIC_KEY_FALSE(kvm_protected_mode_initialized);
DECLARE_KVM_HYP_PER_CPU(unsigned long, kvm_hyp_vector);
-static DEFINE_PER_CPU(unsigned long, kvm_arm_hyp_stack_page);
+DEFINE_PER_CPU(unsigned long, kvm_arm_hyp_stack_page);
unsigned long kvm_arm_hyp_percpu_base[NR_CPUS];
DECLARE_KVM_NVHE_PER_CPU(struct kvm_nvhe_init_params, kvm_init_params);
-/* The VMID used in the VTTBR */
-static atomic64_t kvm_vmid_gen = ATOMIC64_INIT(1);
-static u32 kvm_next_vmid;
-static DEFINE_SPINLOCK(kvm_vmid_lock);
-
static bool vgic_present;
static DEFINE_PER_CPU(unsigned char, kvm_arm_hardware_enabled);
@@ -89,7 +84,8 @@ int kvm_vm_ioctl_enable_cap(struct kvm *kvm,
switch (cap->cap) {
case KVM_CAP_ARM_NISV_TO_USER:
r = 0;
- kvm->arch.return_nisv_io_abort_to_user = true;
+ set_bit(KVM_ARCH_FLAG_RETURN_NISV_IO_ABORT_TO_USER,
+ &kvm->arch.flags);
break;
case KVM_CAP_ARM_MTE:
mutex_lock(&kvm->lock);
@@ -97,10 +93,14 @@ int kvm_vm_ioctl_enable_cap(struct kvm *kvm,
r = -EINVAL;
} else {
r = 0;
- kvm->arch.mte_enabled = true;
+ set_bit(KVM_ARCH_FLAG_MTE_ENABLED, &kvm->arch.flags);
}
mutex_unlock(&kvm->lock);
break;
+ case KVM_CAP_ARM_SYSTEM_SUSPEND:
+ r = 0;
+ set_bit(KVM_ARCH_FLAG_SYSTEM_SUSPEND_ENABLED, &kvm->arch.flags);
+ break;
default:
r = -EINVAL;
break;
@@ -146,16 +146,23 @@ int kvm_arch_init_vm(struct kvm *kvm, unsigned long type)
if (ret)
return ret;
- ret = create_hyp_mappings(kvm, kvm + 1, PAGE_HYP);
+ ret = kvm_share_hyp(kvm, kvm + 1);
if (ret)
goto out_free_stage2_pgd;
+ if (!zalloc_cpumask_var(&kvm->arch.supported_cpus, GFP_KERNEL)) {
+ ret = -ENOMEM;
+ goto out_free_stage2_pgd;
+ }
+ cpumask_copy(kvm->arch.supported_cpus, cpu_possible_mask);
+
kvm_vgic_early_init(kvm);
/* The maximum number of VCPUs is limited by the host's GIC model */
- kvm->arch.max_vcpus = kvm_arm_default_max_vcpus();
+ kvm->max_vcpus = kvm_arm_default_max_vcpus();
set_default_spectre(kvm);
+ kvm_arm_init_hypercalls(kvm);
return ret;
out_free_stage2_pgd:
@@ -175,19 +182,14 @@ vm_fault_t kvm_arch_vcpu_fault(struct kvm_vcpu *vcpu, struct vm_fault *vmf)
*/
void kvm_arch_destroy_vm(struct kvm *kvm)
{
- int i;
-
bitmap_free(kvm->arch.pmu_filter);
+ free_cpumask_var(kvm->arch.supported_cpus);
kvm_vgic_destroy(kvm);
- for (i = 0; i < KVM_MAX_VCPUS; ++i) {
- if (kvm->vcpus[i]) {
- kvm_vcpu_destroy(kvm->vcpus[i]);
- kvm->vcpus[i] = NULL;
- }
- }
- atomic_set(&kvm->online_vcpus, 0);
+ kvm_destroy_vcpus(kvm);
+
+ kvm_unshare_hyp(kvm, kvm + 1);
}
int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext)
@@ -215,6 +217,7 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext)
case KVM_CAP_SET_GUEST_DEBUG:
case KVM_CAP_VCPU_ATTRIBUTES:
case KVM_CAP_PTP_KVM:
+ case KVM_CAP_ARM_SYSTEM_SUSPEND:
r = 1;
break;
case KVM_CAP_SET_GUEST_DEBUG2:
@@ -235,7 +238,7 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext)
case KVM_CAP_MAX_VCPUS:
case KVM_CAP_MAX_VCPU_ID:
if (kvm)
- r = kvm->arch.max_vcpus;
+ r = kvm->max_vcpus;
else
r = kvm_arm_default_max_vcpus();
break;
@@ -311,7 +314,7 @@ int kvm_arch_vcpu_precreate(struct kvm *kvm, unsigned int id)
if (irqchip_in_kernel(kvm) && vgic_initialized(kvm))
return -EBUSY;
- if (id >= kvm->arch.max_vcpus)
+ if (id >= kvm->max_vcpus)
return -EINVAL;
return 0;
@@ -327,6 +330,12 @@ int kvm_arch_vcpu_create(struct kvm_vcpu *vcpu)
vcpu->arch.mmu_page_cache.gfp_zero = __GFP_ZERO;
+ /*
+ * Default value for the FP state, will be overloaded at load
+ * time if we support FP (pretty likely)
+ */
+ vcpu->arch.fp_state = FP_STATE_FREE;
+
/* Set up the timer */
kvm_timer_vcpu_init(vcpu);
@@ -342,7 +351,7 @@ int kvm_arch_vcpu_create(struct kvm_vcpu *vcpu)
if (err)
return err;
- return create_hyp_mappings(vcpu, vcpu + 1, PAGE_HYP);
+ return kvm_share_hyp(vcpu, vcpu + 1);
}
void kvm_arch_vcpu_postcreate(struct kvm_vcpu *vcpu)
@@ -351,7 +360,7 @@ void kvm_arch_vcpu_postcreate(struct kvm_vcpu *vcpu)
void kvm_arch_vcpu_destroy(struct kvm_vcpu *vcpu)
{
- if (vcpu->arch.has_run_once && unlikely(!irqchip_in_kernel(vcpu->kvm)))
+ if (vcpu_has_run_once(vcpu) && unlikely(!irqchip_in_kernel(vcpu->kvm)))
static_branch_dec(&userspace_irqchip_in_use);
kvm_mmu_free_memory_cache(&vcpu->arch.mmu_page_cache);
@@ -361,34 +370,14 @@ void kvm_arch_vcpu_destroy(struct kvm_vcpu *vcpu)
kvm_arm_vcpu_destroy(vcpu);
}
-int kvm_cpu_has_pending_timer(struct kvm_vcpu *vcpu)
-{
- return kvm_timer_is_pending(vcpu);
-}
-
void kvm_arch_vcpu_blocking(struct kvm_vcpu *vcpu)
{
- /*
- * If we're about to block (most likely because we've just hit a
- * WFI), we need to sync back the state of the GIC CPU interface
- * so that we have the latest PMR and group enables. This ensures
- * that kvm_arch_vcpu_runnable has up-to-date data to decide
- * whether we have pending interrupts.
- *
- * For the same reason, we want to tell GICv4 that we need
- * doorbells to be signalled, should an interrupt become pending.
- */
- preempt_disable();
- kvm_vgic_vmcr_sync(vcpu);
- vgic_v4_put(vcpu, true);
- preempt_enable();
+
}
void kvm_arch_vcpu_unblocking(struct kvm_vcpu *vcpu)
{
- preempt_disable();
- vgic_v4_load(vcpu);
- preempt_enable();
+
}
void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
@@ -432,6 +421,9 @@ void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
if (vcpu_has_ptrauth(vcpu))
vcpu_ptrauth_disable(vcpu);
kvm_arch_vcpu_load_debug_state_flags(vcpu);
+
+ if (!cpumask_test_cpu(smp_processor_id(), vcpu->kvm->arch.supported_cpus))
+ vcpu_set_on_unsupported_cpu(vcpu);
}
void kvm_arch_vcpu_put(struct kvm_vcpu *vcpu)
@@ -443,24 +435,40 @@ void kvm_arch_vcpu_put(struct kvm_vcpu *vcpu)
kvm_timer_vcpu_put(vcpu);
kvm_vgic_put(vcpu);
kvm_vcpu_pmu_restore_host(vcpu);
+ kvm_arm_vmid_clear_active();
+ vcpu_clear_on_unsupported_cpu(vcpu);
vcpu->cpu = -1;
}
-static void vcpu_power_off(struct kvm_vcpu *vcpu)
+void kvm_arm_vcpu_power_off(struct kvm_vcpu *vcpu)
{
- vcpu->arch.power_off = true;
+ vcpu->arch.mp_state.mp_state = KVM_MP_STATE_STOPPED;
kvm_make_request(KVM_REQ_SLEEP, vcpu);
kvm_vcpu_kick(vcpu);
}
+bool kvm_arm_vcpu_stopped(struct kvm_vcpu *vcpu)
+{
+ return vcpu->arch.mp_state.mp_state == KVM_MP_STATE_STOPPED;
+}
+
+static void kvm_arm_vcpu_suspend(struct kvm_vcpu *vcpu)
+{
+ vcpu->arch.mp_state.mp_state = KVM_MP_STATE_SUSPENDED;
+ kvm_make_request(KVM_REQ_SUSPEND, vcpu);
+ kvm_vcpu_kick(vcpu);
+}
+
+static bool kvm_arm_vcpu_suspended(struct kvm_vcpu *vcpu)
+{
+ return vcpu->arch.mp_state.mp_state == KVM_MP_STATE_SUSPENDED;
+}
+
int kvm_arch_vcpu_ioctl_get_mpstate(struct kvm_vcpu *vcpu,
struct kvm_mp_state *mp_state)
{
- if (vcpu->arch.power_off)
- mp_state->mp_state = KVM_MP_STATE_STOPPED;
- else
- mp_state->mp_state = KVM_MP_STATE_RUNNABLE;
+ *mp_state = vcpu->arch.mp_state;
return 0;
}
@@ -472,10 +480,13 @@ int kvm_arch_vcpu_ioctl_set_mpstate(struct kvm_vcpu *vcpu,
switch (mp_state->mp_state) {
case KVM_MP_STATE_RUNNABLE:
- vcpu->arch.power_off = false;
+ vcpu->arch.mp_state = *mp_state;
break;
case KVM_MP_STATE_STOPPED:
- vcpu_power_off(vcpu);
+ kvm_arm_vcpu_power_off(vcpu);
+ break;
+ case KVM_MP_STATE_SUSPENDED:
+ kvm_arm_vcpu_suspend(vcpu);
break;
default:
ret = -EINVAL;
@@ -495,7 +506,7 @@ int kvm_arch_vcpu_runnable(struct kvm_vcpu *v)
{
bool irq_lines = *vcpu_hcr(v) & (HCR_VI | HCR_VF);
return ((irq_lines || kvm_vgic_vcpu_pending_irq(v))
- && !v->arch.power_off && !v->arch.pause);
+ && !kvm_arm_vcpu_stopped(v) && !v->arch.pause);
}
bool kvm_arch_vcpu_in_kernel(struct kvm_vcpu *vcpu)
@@ -503,99 +514,40 @@ bool kvm_arch_vcpu_in_kernel(struct kvm_vcpu *vcpu)
return vcpu_mode_priv(vcpu);
}
-/* Just ensure a guest exit from a particular CPU */
-static void exit_vm_noop(void *info)
+#ifdef CONFIG_GUEST_PERF_EVENTS
+unsigned long kvm_arch_vcpu_get_ip(struct kvm_vcpu *vcpu)
{
+ return *vcpu_pc(vcpu);
}
+#endif
-void force_vm_exit(const cpumask_t *mask)
-{
- preempt_disable();
- smp_call_function_many(mask, exit_vm_noop, NULL, true);
- preempt_enable();
-}
-
-/**
- * need_new_vmid_gen - check that the VMID is still valid
- * @vmid: The VMID to check
- *
- * return true if there is a new generation of VMIDs being used
- *
- * The hardware supports a limited set of values with the value zero reserved
- * for the host, so we check if an assigned value belongs to a previous
- * generation, which requires us to assign a new value. If we're the first to
- * use a VMID for the new generation, we must flush necessary caches and TLBs
- * on all CPUs.
- */
-static bool need_new_vmid_gen(struct kvm_vmid *vmid)
+static int kvm_vcpu_initialized(struct kvm_vcpu *vcpu)
{
- u64 current_vmid_gen = atomic64_read(&kvm_vmid_gen);
- smp_rmb(); /* Orders read of kvm_vmid_gen and kvm->arch.vmid */
- return unlikely(READ_ONCE(vmid->vmid_gen) != current_vmid_gen);
+ return vcpu->arch.target >= 0;
}
-/**
- * update_vmid - Update the vmid with a valid VMID for the current generation
- * @vmid: The stage-2 VMID information struct
+/*
+ * Handle both the initialisation that is being done when the vcpu is
+ * run for the first time, as well as the updates that must be
+ * performed each time we get a new thread dealing with this vcpu.
*/
-static void update_vmid(struct kvm_vmid *vmid)
-{
- if (!need_new_vmid_gen(vmid))
- return;
-
- spin_lock(&kvm_vmid_lock);
-
- /*
- * We need to re-check the vmid_gen here to ensure that if another vcpu
- * already allocated a valid vmid for this vm, then this vcpu should
- * use the same vmid.
- */
- if (!need_new_vmid_gen(vmid)) {
- spin_unlock(&kvm_vmid_lock);
- return;
- }
-
- /* First user of a new VMID generation? */
- if (unlikely(kvm_next_vmid == 0)) {
- atomic64_inc(&kvm_vmid_gen);
- kvm_next_vmid = 1;
-
- /*
- * On SMP we know no other CPUs can use this CPU's or each
- * other's VMID after force_vm_exit returns since the
- * kvm_vmid_lock blocks them from reentry to the guest.
- */
- force_vm_exit(cpu_all_mask);
- /*
- * Now broadcast TLB + ICACHE invalidation over the inner
- * shareable domain to make sure all data structures are
- * clean.
- */
- kvm_call_hyp(__kvm_flush_vm_context);
- }
-
- WRITE_ONCE(vmid->vmid, kvm_next_vmid);
- kvm_next_vmid++;
- kvm_next_vmid &= (1 << kvm_get_vmid_bits()) - 1;
-
- smp_wmb();
- WRITE_ONCE(vmid->vmid_gen, atomic64_read(&kvm_vmid_gen));
-
- spin_unlock(&kvm_vmid_lock);
-}
-
-static int kvm_vcpu_first_run_init(struct kvm_vcpu *vcpu)
+int kvm_arch_vcpu_run_pid_change(struct kvm_vcpu *vcpu)
{
struct kvm *kvm = vcpu->kvm;
- int ret = 0;
+ int ret;
- if (likely(vcpu->arch.has_run_once))
- return 0;
+ if (!kvm_vcpu_initialized(vcpu))
+ return -ENOEXEC;
if (!kvm_arm_vcpu_is_finalized(vcpu))
return -EPERM;
- vcpu->arch.has_run_once = true;
+ ret = kvm_arch_vcpu_run_map_fp(vcpu);
+ if (ret)
+ return ret;
+
+ if (likely(vcpu_has_run_once(vcpu)))
+ return 0;
kvm_arm_vcpu_init_debug(vcpu);
@@ -607,12 +559,6 @@ static int kvm_vcpu_first_run_init(struct kvm_vcpu *vcpu)
ret = kvm_vgic_map_resources(kvm);
if (ret)
return ret;
- } else {
- /*
- * Tell the rest of the code that there are userspace irqchip
- * VMs in the wild.
- */
- static_branch_inc(&userspace_irqchip_in_use);
}
ret = kvm_timer_enable(vcpu);
@@ -620,6 +566,16 @@ static int kvm_vcpu_first_run_init(struct kvm_vcpu *vcpu)
return ret;
ret = kvm_arm_pmu_v3_enable(vcpu);
+ if (ret)
+ return ret;
+
+ if (!irqchip_in_kernel(kvm)) {
+ /*
+ * Tell the rest of the code that there are userspace irqchip
+ * VMs in the wild.
+ */
+ static_branch_inc(&userspace_irqchip_in_use);
+ }
/*
* Initialize traps for protected VMs.
@@ -629,6 +585,10 @@ static int kvm_vcpu_first_run_init(struct kvm_vcpu *vcpu)
if (kvm_vm_is_protected(kvm))
kvm_call_hyp_nvhe(__pkvm_vcpu_init_traps, vcpu);
+ mutex_lock(&kvm->lock);
+ set_bit(KVM_ARCH_FLAG_HAS_RAN_ONCE, &kvm->arch.flags);
+ mutex_unlock(&kvm->lock);
+
return ret;
}
@@ -639,7 +599,7 @@ bool kvm_arch_intc_initialized(struct kvm *kvm)
void kvm_arm_halt_guest(struct kvm *kvm)
{
- int i;
+ unsigned long i;
struct kvm_vcpu *vcpu;
kvm_for_each_vcpu(i, vcpu, kvm)
@@ -649,24 +609,24 @@ void kvm_arm_halt_guest(struct kvm *kvm)
void kvm_arm_resume_guest(struct kvm *kvm)
{
- int i;
+ unsigned long i;
struct kvm_vcpu *vcpu;
kvm_for_each_vcpu(i, vcpu, kvm) {
vcpu->arch.pause = false;
- rcuwait_wake_up(kvm_arch_vcpu_get_wait(vcpu));
+ __kvm_vcpu_wake_up(vcpu);
}
}
-static void vcpu_req_sleep(struct kvm_vcpu *vcpu)
+static void kvm_vcpu_sleep(struct kvm_vcpu *vcpu)
{
struct rcuwait *wait = kvm_arch_vcpu_get_wait(vcpu);
rcuwait_wait_event(wait,
- (!vcpu->arch.power_off) &&(!vcpu->arch.pause),
+ (!kvm_arm_vcpu_stopped(vcpu)) && (!vcpu->arch.pause),
TASK_INTERRUPTIBLE);
- if (vcpu->arch.power_off || vcpu->arch.pause) {
+ if (kvm_arm_vcpu_stopped(vcpu) || vcpu->arch.pause) {
/* Awaken to handle a signal, request we sleep again later. */
kvm_make_request(KVM_REQ_SLEEP, vcpu);
}
@@ -679,16 +639,86 @@ static void vcpu_req_sleep(struct kvm_vcpu *vcpu)
smp_rmb();
}
-static int kvm_vcpu_initialized(struct kvm_vcpu *vcpu)
+/**
+ * kvm_vcpu_wfi - emulate Wait-For-Interrupt behavior
+ * @vcpu: The VCPU pointer
+ *
+ * Suspend execution of a vCPU until a valid wake event is detected, i.e. until
+ * the vCPU is runnable. The vCPU may or may not be scheduled out, depending
+ * on when a wake event arrives, e.g. there may already be a pending wake event.
+ */
+void kvm_vcpu_wfi(struct kvm_vcpu *vcpu)
{
- return vcpu->arch.target >= 0;
+ /*
+ * Sync back the state of the GIC CPU interface so that we have
+ * the latest PMR and group enables. This ensures that
+ * kvm_arch_vcpu_runnable has up-to-date data to decide whether
+ * we have pending interrupts, e.g. when determining if the
+ * vCPU should block.
+ *
+ * For the same reason, we want to tell GICv4 that we need
+ * doorbells to be signalled, should an interrupt become pending.
+ */
+ preempt_disable();
+ kvm_vgic_vmcr_sync(vcpu);
+ vgic_v4_put(vcpu, true);
+ preempt_enable();
+
+ kvm_vcpu_halt(vcpu);
+ vcpu_clear_flag(vcpu, IN_WFIT);
+
+ preempt_disable();
+ vgic_v4_load(vcpu);
+ preempt_enable();
}
-static void check_vcpu_requests(struct kvm_vcpu *vcpu)
+static int kvm_vcpu_suspend(struct kvm_vcpu *vcpu)
+{
+ if (!kvm_arm_vcpu_suspended(vcpu))
+ return 1;
+
+ kvm_vcpu_wfi(vcpu);
+
+ /*
+ * The suspend state is sticky; we do not leave it until userspace
+ * explicitly marks the vCPU as runnable. Request that we suspend again
+ * later.
+ */
+ kvm_make_request(KVM_REQ_SUSPEND, vcpu);
+
+ /*
+ * Check to make sure the vCPU is actually runnable. If so, exit to
+ * userspace informing it of the wakeup condition.
+ */
+ if (kvm_arch_vcpu_runnable(vcpu)) {
+ memset(&vcpu->run->system_event, 0, sizeof(vcpu->run->system_event));
+ vcpu->run->system_event.type = KVM_SYSTEM_EVENT_WAKEUP;
+ vcpu->run->exit_reason = KVM_EXIT_SYSTEM_EVENT;
+ return 0;
+ }
+
+ /*
+ * Otherwise, we were unblocked to process a different event, such as a
+ * pending signal. Return 1 and allow kvm_arch_vcpu_ioctl_run() to
+ * process the event.
+ */
+ return 1;
+}
+
+/**
+ * check_vcpu_requests - check and handle pending vCPU requests
+ * @vcpu: the VCPU pointer
+ *
+ * Return: 1 if we should enter the guest
+ * 0 if we should exit to userspace
+ * < 0 if we should exit to userspace, where the return value indicates
+ * an error
+ */
+static int check_vcpu_requests(struct kvm_vcpu *vcpu)
{
if (kvm_request_pending(vcpu)) {
if (kvm_check_request(KVM_REQ_SLEEP, vcpu))
- vcpu_req_sleep(vcpu);
+ kvm_vcpu_sleep(vcpu);
if (kvm_check_request(KVM_REQ_VCPU_RESET, vcpu))
kvm_reset_vcpu(vcpu);
@@ -713,7 +743,12 @@ static void check_vcpu_requests(struct kvm_vcpu *vcpu)
if (kvm_check_request(KVM_REQ_RELOAD_PMU, vcpu))
kvm_pmu_handle_pmcr(vcpu,
__vcpu_sys_reg(vcpu, PMCR_EL0));
+
+ if (kvm_check_request(KVM_REQ_SUSPEND, vcpu))
+ return kvm_vcpu_suspend(vcpu);
}
+
+ return 1;
}
static bool vcpu_mode_is_bad_32bit(struct kvm_vcpu *vcpu)
@@ -721,8 +756,7 @@ static bool vcpu_mode_is_bad_32bit(struct kvm_vcpu *vcpu)
if (likely(!vcpu_mode_is_32bit(vcpu)))
return false;
- return !system_supports_32bit_el0() ||
- static_branch_unlikely(&arm64_mismatched_32bit_el0);
+ return !kvm_supports_32bit_el0();
}
/**
@@ -759,11 +793,36 @@ static bool kvm_vcpu_exit_request(struct kvm_vcpu *vcpu, int *ret)
}
}
+ if (unlikely(vcpu_on_unsupported_cpu(vcpu))) {
+ run->exit_reason = KVM_EXIT_FAIL_ENTRY;
+ run->fail_entry.hardware_entry_failure_reason = KVM_EXIT_FAIL_ENTRY_CPU_UNSUPPORTED;
+ run->fail_entry.cpu = smp_processor_id();
+ *ret = 0;
+ return true;
+ }
+
return kvm_request_pending(vcpu) ||
- need_new_vmid_gen(&vcpu->arch.hw_mmu->vmid) ||
xfer_to_guest_mode_work_pending();
}
+/*
+ * Actually run the vCPU, entering an RCU extended quiescent state (EQS) while
+ * the vCPU is running.
+ *
+ * This must be noinstr as instrumentation may make use of RCU, and this is not
+ * safe during the EQS.
+ */
+static int noinstr kvm_arm_vcpu_enter_exit(struct kvm_vcpu *vcpu)
+{
+ int ret;
+
+ guest_state_enter_irqoff();
+ ret = kvm_call_hyp_ret(__kvm_vcpu_run, vcpu);
+ guest_state_exit_irqoff();
+
+ return ret;
+}
+
/**
* kvm_arch_vcpu_ioctl_run - the main VCPU run function to execute guest code
* @vcpu: The VCPU pointer
@@ -779,13 +838,6 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu)
struct kvm_run *run = vcpu->run;
int ret;
- if (unlikely(!kvm_vcpu_initialized(vcpu)))
- return -ENOEXEC;
-
- ret = kvm_vcpu_first_run_init(vcpu);
- if (ret)
- return ret;
-
if (run->exit_reason == KVM_EXIT_MMIO) {
ret = kvm_handle_mmio_return(vcpu);
if (ret)
@@ -803,6 +855,7 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu)
ret = 1;
run->exit_reason = KVM_EXIT_UNKNOWN;
+ run->flags = 0;
while (ret > 0) {
/*
* Check conditions before entering the guest
@@ -811,9 +864,8 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu)
if (!ret)
ret = 1;
- update_vmid(&vcpu->arch.hw_mmu->vmid);
-
- check_vcpu_requests(vcpu);
+ if (ret > 0)
+ ret = check_vcpu_requests(vcpu);
/*
* Preparing the interrupts to be injected also
@@ -822,12 +874,23 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu)
*/
preempt_disable();
+ /*
+ * The VMID allocator only tracks active VMIDs per
+ * physical CPU, and therefore the VMID allocated may not be
+ * preserved on VMID roll-over if the task was preempted,
+ * making a thread's VMID inactive. So we need to call
+ * kvm_arm_vmid_update() in non-premptible context.
+ */
+ kvm_arm_vmid_update(&vcpu->arch.hw_mmu->vmid);
+
kvm_pmu_flush_hwstate(vcpu);
local_irq_disable();
kvm_vgic_flush_hwstate(vcpu);
+ kvm_pmu_update_vcpu_events(vcpu);
+
/*
* Ensure we set mode to IN_GUEST_MODE after we disable
* interrupts and before the final VCPU requests check.
@@ -849,14 +912,15 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu)
}
kvm_arm_setup_debug(vcpu);
+ kvm_arch_vcpu_ctxflush_fp(vcpu);
/**************************************************************
* Enter the guest
*/
trace_kvm_entry(*vcpu_pc(vcpu));
- guest_enter_irqoff();
+ guest_timing_enter_irqoff();
- ret = kvm_call_hyp_ret(__kvm_vcpu_run, vcpu);
+ ret = kvm_arm_vcpu_enter_exit(vcpu);
vcpu->mode = OUTSIDE_GUEST_MODE;
vcpu->stat.exits++;
@@ -891,26 +955,25 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu)
kvm_arch_vcpu_ctxsync_fp(vcpu);
/*
- * We may have taken a host interrupt in HYP mode (ie
- * while executing the guest). This interrupt is still
- * pending, as we haven't serviced it yet!
+ * We must ensure that any pending interrupts are taken before
+ * we exit guest timing so that timer ticks are accounted as
+ * guest time. Transiently unmask interrupts so that any
+ * pending interrupts are taken.
*
- * We're now back in SVC mode, with interrupts
- * disabled. Enabling the interrupts now will have
- * the effect of taking the interrupt again, in SVC
- * mode this time.
+ * Per ARM DDI 0487G.b section D1.13.4, an ISB (or other
+ * context synchronization event) is necessary to ensure that
+ * pending interrupts are taken.
*/
+ if (ARM_EXCEPTION_CODE(ret) == ARM_EXCEPTION_IRQ) {
+ local_irq_enable();
+ isb();
+ local_irq_disable();
+ }
+
+ guest_timing_exit_irqoff();
+
local_irq_enable();
- /*
- * We do local_irq_enable() before calling guest_exit() so
- * that if a timer interrupt hits while running the guest we
- * account that tick as being spent in the guest. We enable
- * preemption after calling guest_exit() so that if we get
- * preempted we make sure ticks after that is not counted as
- * guest time.
- */
- guest_exit();
trace_kvm_exit(ret, kvm_vcpu_trap_get_class(vcpu), *vcpu_pc(vcpu));
/* Exit types that need handling before we can be preempted */
@@ -956,8 +1019,8 @@ out:
* the vcpu state. Note that this relies on __kvm_adjust_pc()
* being preempt-safe on VHE.
*/
- if (unlikely(vcpu->arch.flags & (KVM_ARM64_PENDING_EXCEPTION |
- KVM_ARM64_INCREMENT_PC)))
+ if (unlikely(vcpu_get_flag(vcpu, PENDING_EXCEPTION) ||
+ vcpu_get_flag(vcpu, INCREMENT_PC)))
kvm_call_hyp(__kvm_adjust_pc, vcpu);
vcpu_put(vcpu);
@@ -1123,7 +1186,7 @@ static int kvm_arch_vcpu_ioctl_vcpu_init(struct kvm_vcpu *vcpu,
* need to invalidate the I-cache though, as FWB does *not*
* imply CTR_EL0.DIC.
*/
- if (vcpu->arch.has_run_once) {
+ if (vcpu_has_run_once(vcpu)) {
if (!cpus_have_final_cap(ARM64_HAS_STAGE2_FWB))
stage2_unmap_vm(vcpu->kvm);
else
@@ -1137,9 +1200,9 @@ static int kvm_arch_vcpu_ioctl_vcpu_init(struct kvm_vcpu *vcpu,
* Handle the "start in power-off" case.
*/
if (test_bit(KVM_ARM_VCPU_POWER_OFF, vcpu->arch.features))
- vcpu_power_off(vcpu);
+ kvm_arm_vcpu_power_off(vcpu);
else
- vcpu->arch.power_off = false;
+ vcpu->arch.mp_state.mp_state = KVM_MP_STATE_RUNNABLE;
return 0;
}
@@ -1355,18 +1418,11 @@ void kvm_arch_flush_remote_tlbs_memslot(struct kvm *kvm,
static int kvm_vm_ioctl_set_device_addr(struct kvm *kvm,
struct kvm_arm_device_addr *dev_addr)
{
- unsigned long dev_id, type;
-
- dev_id = (dev_addr->id & KVM_ARM_DEVICE_ID_MASK) >>
- KVM_ARM_DEVICE_ID_SHIFT;
- type = (dev_addr->id & KVM_ARM_DEVICE_TYPE_MASK) >>
- KVM_ARM_DEVICE_TYPE_SHIFT;
-
- switch (dev_id) {
+ switch (FIELD_GET(KVM_ARM_DEVICE_ID_MASK, dev_addr->id)) {
case KVM_ARM_DEVICE_VGIC_V2:
if (!vgic_present)
return -ENXIO;
- return kvm_vgic_addr(kvm, type, &dev_addr->addr, true);
+ return kvm_set_legacy_vgic_v2_addr(kvm, dev_addr);
default:
return -ENODEV;
}
@@ -1449,10 +1505,8 @@ static int kvm_init_vector_slots(void)
base = kern_hyp_va(kvm_ksym_ref(__bp_harden_hyp_vecs));
kvm_init_vector_slot(base, HYP_VECTOR_SPECTRE_DIRECT);
- if (!cpus_have_const_cap(ARM64_SPECTRE_V3A))
- return 0;
-
- if (!has_vhe()) {
+ if (kvm_system_needs_idmapped_vectors() &&
+ !is_protected_kvm_enabled()) {
err = create_hyp_exec_mappings(__pa_symbol(__bp_harden_hyp_vecs),
__BP_HARDEN_HYP_VECS_SZ, &base);
if (err)
@@ -1499,7 +1553,6 @@ static void cpu_prepare_hyp_mode(int cpu)
tcr |= (idmap_t0sz & GENMASK(TCR_TxSZ_WIDTH - 1, 0)) << TCR_T0SZ_OFFSET;
params->tcr_el2 = tcr;
- params->stack_hyp_va = kern_hyp_va(per_cpu(kvm_arm_hyp_stack_page, cpu) + PAGE_SIZE);
params->pgd_pa = kvm_mmu_get_httbr();
if (is_protected_kvm_enabled())
params->hcr_el2 = HCR_HOST_NVHE_PROTECTED_FLAGS;
@@ -1703,7 +1756,7 @@ static void init_cpu_logical_map(void)
/*
* Copy the MPIDR <-> logical CPU ID mapping to hyp.
- * Only copy the set of online CPUs whose features have been chacked
+ * Only copy the set of online CPUs whose features have been checked
* against the finalized system capabilities. The hypervisor will not
* allow any other CPUs from the `possible` set to boot.
*/
@@ -1775,8 +1828,7 @@ static int init_subsystems(void)
if (err)
goto out;
- kvm_perf_init();
- kvm_sys_reg_table_init();
+ kvm_register_perf_callbacks(NULL);
out:
if (err || !is_protected_kvm_enabled())
@@ -1827,6 +1879,7 @@ static int kvm_hyp_init_protection(u32 hyp_va_bits)
kvm_nvhe_sym(id_aa64pfr1_el1_sys_val) = read_sanitised_ftr_reg(SYS_ID_AA64PFR1_EL1);
kvm_nvhe_sym(id_aa64isar0_el1_sys_val) = read_sanitised_ftr_reg(SYS_ID_AA64ISAR0_EL1);
kvm_nvhe_sym(id_aa64isar1_el1_sys_val) = read_sanitised_ftr_reg(SYS_ID_AA64ISAR1_EL1);
+ kvm_nvhe_sym(id_aa64isar2_el1_sys_val) = read_sanitised_ftr_reg(SYS_ID_AA64ISAR2_EL1);
kvm_nvhe_sym(id_aa64mmfr0_el1_sys_val) = read_sanitised_ftr_reg(SYS_ID_AA64MMFR0_EL1);
kvm_nvhe_sym(id_aa64mmfr1_el1_sys_val) = read_sanitised_ftr_reg(SYS_ID_AA64MMFR1_EL1);
kvm_nvhe_sym(id_aa64mmfr2_el1_sys_val) = read_sanitised_ftr_reg(SYS_ID_AA64MMFR2_EL1);
@@ -1947,14 +2000,46 @@ static int init_hyp_mode(void)
* Map the Hyp stack pages
*/
for_each_possible_cpu(cpu) {
+ struct kvm_nvhe_init_params *params = per_cpu_ptr_nvhe_sym(kvm_init_params, cpu);
char *stack_page = (char *)per_cpu(kvm_arm_hyp_stack_page, cpu);
- err = create_hyp_mappings(stack_page, stack_page + PAGE_SIZE,
- PAGE_HYP);
+ unsigned long hyp_addr;
+ /*
+ * Allocate a contiguous HYP private VA range for the stack
+ * and guard page. The allocation is also aligned based on
+ * the order of its size.
+ */
+ err = hyp_alloc_private_va_range(PAGE_SIZE * 2, &hyp_addr);
+ if (err) {
+ kvm_err("Cannot allocate hyp stack guard page\n");
+ goto out_err;
+ }
+
+ /*
+ * Since the stack grows downwards, map the stack to the page
+ * at the higher address and leave the lower guard page
+ * unbacked.
+ *
+ * Any valid stack address now has the PAGE_SHIFT bit as 1
+ * and addresses corresponding to the guard page have the
+ * PAGE_SHIFT bit as 0 - this is used for overflow detection.
+ */
+ err = __create_hyp_mappings(hyp_addr + PAGE_SIZE, PAGE_SIZE,
+ __pa(stack_page), PAGE_HYP);
if (err) {
kvm_err("Cannot map hyp stack\n");
goto out_err;
}
+
+ /*
+ * Save the stack PA in nvhe_init_params. This will be needed
+ * to recreate the stack mapping in protected nVHE mode.
+ * __hyp_pa() won't do the right thing there, since the stack
+ * has been mapped in the flexible private VA space.
+ */
+ params->stack_pa = __pa(stack_page);
+
+ params->stack_hyp_va = hyp_addr + (2 * PAGE_SIZE);
}
for_each_possible_cpu(cpu) {
@@ -2024,18 +2109,18 @@ static int finalize_hyp_mode(void)
return 0;
/*
- * Exclude HYP BSS from kmemleak so that it doesn't get peeked
- * at, which would end badly once the section is inaccessible.
- * None of other sections should ever be introspected.
+ * Exclude HYP sections from kmemleak so that they don't get peeked
+ * at, which would end badly once inaccessible.
*/
kmemleak_free_part(__hyp_bss_start, __hyp_bss_end - __hyp_bss_start);
+ kmemleak_free_part_phys(hyp_mem_base, hyp_mem_size);
return pkvm_drop_host_privileges();
}
struct kvm_vcpu *kvm_mpidr_to_vcpu(struct kvm *kvm, unsigned long mpidr)
{
struct kvm_vcpu *vcpu;
- int i;
+ unsigned long i;
mpidr &= MPIDR_HWID_BITMASK;
kvm_for_each_vcpu(i, vcpu, kvm) {
@@ -2103,6 +2188,12 @@ int kvm_arch_init(void *opaque)
return -ENODEV;
}
+ err = kvm_sys_reg_table_init();
+ if (err) {
+ kvm_info("Error initializing system register tables");
+ return err;
+ }
+
in_hyp_mode = is_kernel_in_hyp_mode();
if (cpus_have_final_cap(ARM64_WORKAROUND_DEVICE_LOAD_ACQUIRE) ||
@@ -2118,6 +2209,12 @@ int kvm_arch_init(void *opaque)
if (err)
return err;
+ err = kvm_arm_vmid_alloc_init();
+ if (err) {
+ kvm_err("Failed to initialize VMID allocator.\n");
+ return err;
+ }
+
if (!in_hyp_mode) {
err = init_hyp_mode();
if (err)
@@ -2157,13 +2254,14 @@ out_hyp:
if (!in_hyp_mode)
teardown_hyp_mode();
out_err:
+ kvm_arm_vmid_alloc_free();
return err;
}
/* NOP: Compiling as a module not supported */
void kvm_arch_exit(void)
{
- kvm_perf_teardown();
+ kvm_unregister_perf_callbacks();
}
static int __init early_kvm_mode_cfg(char *arg)
@@ -2171,18 +2269,27 @@ static int __init early_kvm_mode_cfg(char *arg)
if (!arg)
return -EINVAL;
- if (strcmp(arg, "protected") == 0) {
- kvm_mode = KVM_MODE_PROTECTED;
+ if (strcmp(arg, "none") == 0) {
+ kvm_mode = KVM_MODE_NONE;
return 0;
}
- if (strcmp(arg, "nvhe") == 0 && !WARN_ON(is_kernel_in_hyp_mode())) {
- kvm_mode = KVM_MODE_DEFAULT;
+ if (!is_hyp_mode_available()) {
+ pr_warn_once("KVM is not available. Ignoring kvm-arm.mode\n");
return 0;
}
- if (strcmp(arg, "none") == 0) {
- kvm_mode = KVM_MODE_NONE;
+ if (strcmp(arg, "protected") == 0) {
+ if (!is_kernel_in_hyp_mode())
+ kvm_mode = KVM_MODE_PROTECTED;
+ else
+ pr_warn_once("Protected KVM not available with VHE\n");
+
+ return 0;
+ }
+
+ if (strcmp(arg, "nvhe") == 0 && !WARN_ON(is_kernel_in_hyp_mode())) {
+ kvm_mode = KVM_MODE_DEFAULT;
return 0;
}
diff --git a/arch/arm64/kvm/debug.c b/arch/arm64/kvm/debug.c
index db9361338b2a..fccf9ec01813 100644
--- a/arch/arm64/kvm/debug.c
+++ b/arch/arm64/kvm/debug.c
@@ -32,6 +32,10 @@ static DEFINE_PER_CPU(u64, mdcr_el2);
*
* Guest access to MDSCR_EL1 is trapped by the hypervisor and handled
* after we have restored the preserved value to the main context.
+ *
+ * When single-step is enabled by userspace, we tweak PSTATE.SS on every
+ * guest entry. Preserve PSTATE.SS so we can restore the original value
+ * for the vcpu after the single-step is disabled.
*/
static void save_guest_debug_regs(struct kvm_vcpu *vcpu)
{
@@ -41,6 +45,9 @@ static void save_guest_debug_regs(struct kvm_vcpu *vcpu)
trace_kvm_arm_set_dreg32("Saved MDSCR_EL1",
vcpu->arch.guest_debug_preserved.mdscr_el1);
+
+ vcpu->arch.guest_debug_preserved.pstate_ss =
+ (*vcpu_cpsr(vcpu) & DBG_SPSR_SS);
}
static void restore_guest_debug_regs(struct kvm_vcpu *vcpu)
@@ -51,6 +58,11 @@ static void restore_guest_debug_regs(struct kvm_vcpu *vcpu)
trace_kvm_arm_set_dreg32("Restored MDSCR_EL1",
vcpu_read_sys_reg(vcpu, MDSCR_EL1));
+
+ if (vcpu->arch.guest_debug_preserved.pstate_ss)
+ *vcpu_cpsr(vcpu) |= DBG_SPSR_SS;
+ else
+ *vcpu_cpsr(vcpu) &= ~DBG_SPSR_SS;
}
/**
@@ -104,10 +116,12 @@ static void kvm_arm_setup_mdcr_el2(struct kvm_vcpu *vcpu)
* Trap debug register access when one of the following is true:
* - Userspace is using the hardware to debug the guest
* (KVM_GUESTDBG_USE_HW is set).
- * - The guest is not using debug (KVM_ARM64_DEBUG_DIRTY is clear).
+ * - The guest is not using debug (DEBUG_DIRTY clear).
+ * - The guest has enabled the OS Lock (debug exceptions are blocked).
*/
if ((vcpu->guest_debug & KVM_GUESTDBG_USE_HW) ||
- !(vcpu->arch.flags & KVM_ARM64_DEBUG_DIRTY))
+ !vcpu_get_flag(vcpu, DEBUG_DIRTY) ||
+ kvm_vcpu_os_lock_enabled(vcpu))
vcpu->arch.mdcr_el2 |= MDCR_EL2_TDA;
trace_kvm_arm_set_dreg32("MDCR_EL2", vcpu->arch.mdcr_el2);
@@ -145,8 +159,8 @@ void kvm_arm_reset_debug_ptr(struct kvm_vcpu *vcpu)
* debug related registers.
*
* Additionally, KVM only traps guest accesses to the debug registers if
- * the guest is not actively using them (see the KVM_ARM64_DEBUG_DIRTY
- * flag on vcpu->arch.flags). Since the guest must not interfere
+ * the guest is not actively using them (see the DEBUG_DIRTY
+ * flag on vcpu->arch.iflags). Since the guest must not interfere
* with the hardware state when debugging the guest, we must ensure that
* trapping is enabled whenever we are debugging the guest using the
* debug registers.
@@ -160,8 +174,8 @@ void kvm_arm_setup_debug(struct kvm_vcpu *vcpu)
kvm_arm_setup_mdcr_el2(vcpu);
- /* Is Guest debugging in effect? */
- if (vcpu->guest_debug) {
+ /* Check if we need to use the debug registers. */
+ if (vcpu->guest_debug || kvm_vcpu_os_lock_enabled(vcpu)) {
/* Save guest debug state */
save_guest_debug_regs(vcpu);
@@ -186,7 +200,18 @@ void kvm_arm_setup_debug(struct kvm_vcpu *vcpu)
* debugging the system.
*/
if (vcpu->guest_debug & KVM_GUESTDBG_SINGLESTEP) {
- *vcpu_cpsr(vcpu) |= DBG_SPSR_SS;
+ /*
+ * If the software step state at the last guest exit
+ * was Active-pending, we don't set DBG_SPSR_SS so
+ * that the state is maintained (to not run another
+ * single-step until the pending Software Step
+ * exception is taken).
+ */
+ if (!vcpu_get_flag(vcpu, DBG_SS_ACTIVE_PENDING))
+ *vcpu_cpsr(vcpu) |= DBG_SPSR_SS;
+ else
+ *vcpu_cpsr(vcpu) &= ~DBG_SPSR_SS;
+
mdscr = vcpu_read_sys_reg(vcpu, MDSCR_EL1);
mdscr |= DBG_MDSCR_SS;
vcpu_write_sys_reg(vcpu, mdscr, MDSCR_EL1);
@@ -203,9 +228,8 @@ void kvm_arm_setup_debug(struct kvm_vcpu *vcpu)
*
* We simply switch the debug_ptr to point to our new
* external_debug_state which has been populated by the
- * debug ioctl. The existing KVM_ARM64_DEBUG_DIRTY
- * mechanism ensures the registers are updated on the
- * world switch.
+ * debug ioctl. The existing DEBUG_DIRTY mechanism ensures
+ * the registers are updated on the world switch.
*/
if (vcpu->guest_debug & KVM_GUESTDBG_USE_HW) {
/* Enable breakpoints/watchpoints */
@@ -214,7 +238,7 @@ void kvm_arm_setup_debug(struct kvm_vcpu *vcpu)
vcpu_write_sys_reg(vcpu, mdscr, MDSCR_EL1);
vcpu->arch.debug_ptr = &vcpu->arch.external_debug_state;
- vcpu->arch.flags |= KVM_ARM64_DEBUG_DIRTY;
+ vcpu_set_flag(vcpu, DEBUG_DIRTY);
trace_kvm_arm_set_regset("BKPTS", get_num_brps(),
&vcpu->arch.debug_ptr->dbg_bcr[0],
@@ -223,6 +247,19 @@ void kvm_arm_setup_debug(struct kvm_vcpu *vcpu)
trace_kvm_arm_set_regset("WAPTS", get_num_wrps(),
&vcpu->arch.debug_ptr->dbg_wcr[0],
&vcpu->arch.debug_ptr->dbg_wvr[0]);
+
+ /*
+ * The OS Lock blocks debug exceptions in all ELs when it is
+ * enabled. If the guest has enabled the OS Lock, constrain its
+ * effects to the guest. Emulate the behavior by clearing
+ * MDSCR_EL1.MDE. In so doing, we ensure that host debug
+ * exceptions are unaffected by guest configuration of the OS
+ * Lock.
+ */
+ } else if (kvm_vcpu_os_lock_enabled(vcpu)) {
+ mdscr = vcpu_read_sys_reg(vcpu, MDSCR_EL1);
+ mdscr &= ~DBG_MDSCR_MDE;
+ vcpu_write_sys_reg(vcpu, mdscr, MDSCR_EL1);
}
}
@@ -231,7 +268,7 @@ void kvm_arm_setup_debug(struct kvm_vcpu *vcpu)
/* If KDE or MDE are set, perform a full save/restore cycle. */
if (vcpu_read_sys_reg(vcpu, MDSCR_EL1) & (DBG_MDSCR_KDE | DBG_MDSCR_MDE))
- vcpu->arch.flags |= KVM_ARM64_DEBUG_DIRTY;
+ vcpu_set_flag(vcpu, DEBUG_DIRTY);
/* Write mdcr_el2 changes since vcpu_load on VHE systems */
if (has_vhe() && orig_mdcr_el2 != vcpu->arch.mdcr_el2)
@@ -244,7 +281,19 @@ void kvm_arm_clear_debug(struct kvm_vcpu *vcpu)
{
trace_kvm_arm_clear_debug(vcpu->guest_debug);
- if (vcpu->guest_debug) {
+ /*
+ * Restore the guest's debug registers if we were using them.
+ */
+ if (vcpu->guest_debug || kvm_vcpu_os_lock_enabled(vcpu)) {
+ if (vcpu->guest_debug & KVM_GUESTDBG_SINGLESTEP) {
+ if (!(*vcpu_cpsr(vcpu) & DBG_SPSR_SS))
+ /*
+ * Mark the vcpu as ACTIVE_PENDING
+ * until Software Step exception is taken.
+ */
+ vcpu_set_flag(vcpu, DBG_SS_ACTIVE_PENDING);
+ }
+
restore_guest_debug_regs(vcpu);
/*
@@ -278,18 +327,18 @@ void kvm_arch_vcpu_load_debug_state_flags(struct kvm_vcpu *vcpu)
* If SPE is present on this CPU and is available at current EL,
* we may need to check if the host state needs to be saved.
*/
- if (cpuid_feature_extract_unsigned_field(dfr0, ID_AA64DFR0_PMSVER_SHIFT) &&
+ if (cpuid_feature_extract_unsigned_field(dfr0, ID_AA64DFR0_EL1_PMSVer_SHIFT) &&
!(read_sysreg_s(SYS_PMBIDR_EL1) & BIT(SYS_PMBIDR_EL1_P_SHIFT)))
- vcpu->arch.flags |= KVM_ARM64_DEBUG_STATE_SAVE_SPE;
+ vcpu_set_flag(vcpu, DEBUG_STATE_SAVE_SPE);
/* Check if we have TRBE implemented and available at the host */
- if (cpuid_feature_extract_unsigned_field(dfr0, ID_AA64DFR0_TRBE_SHIFT) &&
+ if (cpuid_feature_extract_unsigned_field(dfr0, ID_AA64DFR0_EL1_TraceBuffer_SHIFT) &&
!(read_sysreg_s(SYS_TRBIDR_EL1) & TRBIDR_PROG))
- vcpu->arch.flags |= KVM_ARM64_DEBUG_STATE_SAVE_TRBE;
+ vcpu_set_flag(vcpu, DEBUG_STATE_SAVE_TRBE);
}
void kvm_arch_vcpu_put_debug_state_flags(struct kvm_vcpu *vcpu)
{
- vcpu->arch.flags &= ~(KVM_ARM64_DEBUG_STATE_SAVE_SPE |
- KVM_ARM64_DEBUG_STATE_SAVE_TRBE);
+ vcpu_clear_flag(vcpu, DEBUG_STATE_SAVE_SPE);
+ vcpu_clear_flag(vcpu, DEBUG_STATE_SAVE_TRBE);
}
diff --git a/arch/arm64/kvm/fpsimd.c b/arch/arm64/kvm/fpsimd.c
index 5621020b28de..ec8e4494873d 100644
--- a/arch/arm64/kvm/fpsimd.c
+++ b/arch/arm64/kvm/fpsimd.c
@@ -7,7 +7,6 @@
*/
#include <linux/irqflags.h>
#include <linux/sched.h>
-#include <linux/thread_info.h>
#include <linux/kvm_host.h>
#include <asm/fpsimd.h>
#include <asm/kvm_asm.h>
@@ -15,6 +14,19 @@
#include <asm/kvm_mmu.h>
#include <asm/sysreg.h>
+void kvm_vcpu_unshare_task_fp(struct kvm_vcpu *vcpu)
+{
+ struct task_struct *p = vcpu->arch.parent_task;
+ struct user_fpsimd_state *fpsimd;
+
+ if (!is_protected_kvm_enabled() || !p)
+ return;
+
+ fpsimd = &p->thread.uw.fpsimd_state;
+ kvm_unshare_hyp(fpsimd, fpsimd + 1);
+ put_task_struct(p);
+}
+
/*
* Called on entry to KVM_RUN unless this vcpu previously ran at least
* once and the most recent prior KVM_RUN for this vcpu was called from
@@ -28,36 +40,29 @@ int kvm_arch_vcpu_run_map_fp(struct kvm_vcpu *vcpu)
{
int ret;
- struct thread_info *ti = &current->thread_info;
struct user_fpsimd_state *fpsimd = &current->thread.uw.fpsimd_state;
- /*
- * Make sure the host task thread flags and fpsimd state are
- * visible to hyp:
- */
- ret = create_hyp_mappings(ti, ti + 1, PAGE_HYP);
- if (ret)
- goto error;
+ kvm_vcpu_unshare_task_fp(vcpu);
- ret = create_hyp_mappings(fpsimd, fpsimd + 1, PAGE_HYP);
+ /* Make sure the host task fpsimd state is visible to hyp: */
+ ret = kvm_share_hyp(fpsimd, fpsimd + 1);
if (ret)
- goto error;
+ return ret;
- if (vcpu->arch.sve_state) {
- void *sve_end;
-
- sve_end = vcpu->arch.sve_state + vcpu_sve_state_size(vcpu);
+ vcpu->arch.host_fpsimd_state = kern_hyp_va(fpsimd);
- ret = create_hyp_mappings(vcpu->arch.sve_state, sve_end,
- PAGE_HYP);
- if (ret)
- goto error;
+ /*
+ * We need to keep current's task_struct pinned until its data has been
+ * unshared with the hypervisor to make sure it is not re-used by the
+ * kernel and donated to someone else while already shared -- see
+ * kvm_vcpu_unshare_task_fp() for the matching put_task_struct().
+ */
+ if (is_protected_kvm_enabled()) {
+ get_task_struct(current);
+ vcpu->arch.parent_task = current;
}
- vcpu->arch.host_thread_info = kern_hyp_va(ti);
- vcpu->arch.host_fpsimd_state = kern_hyp_va(fpsimd);
-error:
- return ret;
+ return 0;
}
/*
@@ -66,40 +71,75 @@ error:
*
* Here, we just set the correct metadata to indicate that the FPSIMD
* state in the cpu regs (if any) belongs to current on the host.
- *
- * TIF_SVE is backed up here, since it may get clobbered with guest state.
- * This flag is restored by kvm_arch_vcpu_put_fp(vcpu).
*/
void kvm_arch_vcpu_load_fp(struct kvm_vcpu *vcpu)
{
BUG_ON(!current->mm);
+ BUG_ON(test_thread_flag(TIF_SVE));
- vcpu->arch.flags &= ~(KVM_ARM64_FP_ENABLED |
- KVM_ARM64_HOST_SVE_IN_USE |
- KVM_ARM64_HOST_SVE_ENABLED);
- vcpu->arch.flags |= KVM_ARM64_FP_HOST;
+ if (!system_supports_fpsimd())
+ return;
- if (test_thread_flag(TIF_SVE))
- vcpu->arch.flags |= KVM_ARM64_HOST_SVE_IN_USE;
+ vcpu->arch.fp_state = FP_STATE_HOST_OWNED;
+ vcpu_clear_flag(vcpu, HOST_SVE_ENABLED);
if (read_sysreg(cpacr_el1) & CPACR_EL1_ZEN_EL0EN)
- vcpu->arch.flags |= KVM_ARM64_HOST_SVE_ENABLED;
+ vcpu_set_flag(vcpu, HOST_SVE_ENABLED);
+
+ /*
+ * We don't currently support SME guests but if we leave
+ * things in streaming mode then when the guest starts running
+ * FPSIMD or SVE code it may generate SME traps so as a
+ * special case if we are in streaming mode we force the host
+ * state to be saved now and exit streaming mode so that we
+ * don't have to handle any SME traps for valid guest
+ * operations. Do this for ZA as well for now for simplicity.
+ */
+ if (system_supports_sme()) {
+ vcpu_clear_flag(vcpu, HOST_SME_ENABLED);
+ if (read_sysreg(cpacr_el1) & CPACR_EL1_SMEN_EL0EN)
+ vcpu_set_flag(vcpu, HOST_SME_ENABLED);
+
+ if (read_sysreg_s(SYS_SVCR) & (SVCR_SM_MASK | SVCR_ZA_MASK)) {
+ vcpu->arch.fp_state = FP_STATE_FREE;
+ fpsimd_save_and_flush_cpu_state();
+ }
+ }
+}
+
+/*
+ * Called just before entering the guest once we are no longer preemptable
+ * and interrupts are disabled. If we have managed to run anything using
+ * FP while we were preemptible (such as off the back of an interrupt),
+ * then neither the host nor the guest own the FP hardware (and it was the
+ * responsibility of the code that used FP to save the existing state).
+ */
+void kvm_arch_vcpu_ctxflush_fp(struct kvm_vcpu *vcpu)
+{
+ if (test_thread_flag(TIF_FOREIGN_FPSTATE))
+ vcpu->arch.fp_state = FP_STATE_FREE;
}
/*
- * If the guest FPSIMD state was loaded, update the host's context
- * tracking data mark the CPU FPSIMD regs as dirty and belonging to vcpu
- * so that they will be written back if the kernel clobbers them due to
- * kernel-mode NEON before re-entry into the guest.
+ * Called just after exiting the guest. If the guest FPSIMD state
+ * was loaded, update the host's context tracking data mark the CPU
+ * FPSIMD regs as dirty and belonging to vcpu so that they will be
+ * written back if the kernel clobbers them due to kernel-mode NEON
+ * before re-entry into the guest.
*/
void kvm_arch_vcpu_ctxsync_fp(struct kvm_vcpu *vcpu)
{
WARN_ON_ONCE(!irqs_disabled());
- if (vcpu->arch.flags & KVM_ARM64_FP_ENABLED) {
+ if (vcpu->arch.fp_state == FP_STATE_GUEST_OWNED) {
+ /*
+ * Currently we do not support SME guests so SVCR is
+ * always 0 and we just need a variable to point to.
+ */
fpsimd_bind_state_to_cpu(&vcpu->arch.ctxt.fp_regs,
vcpu->arch.sve_state,
- vcpu->arch.sve_max_vl);
+ vcpu->arch.sve_max_vl,
+ NULL, 0, &vcpu->arch.svcr);
clear_thread_flag(TIF_FOREIGN_FPSTATE);
update_thread_flag(TIF_SVE, vcpu_has_sve(vcpu));
@@ -115,13 +155,27 @@ void kvm_arch_vcpu_ctxsync_fp(struct kvm_vcpu *vcpu)
void kvm_arch_vcpu_put_fp(struct kvm_vcpu *vcpu)
{
unsigned long flags;
- bool host_has_sve = system_supports_sve();
- bool guest_has_sve = vcpu_has_sve(vcpu);
local_irq_save(flags);
- if (vcpu->arch.flags & KVM_ARM64_FP_ENABLED) {
- if (guest_has_sve) {
+ /*
+ * If we have VHE then the Hyp code will reset CPACR_EL1 to
+ * CPACR_EL1_DEFAULT and we need to reenable SME.
+ */
+ if (has_vhe() && system_supports_sme()) {
+ /* Also restore EL0 state seen on entry */
+ if (vcpu_get_flag(vcpu, HOST_SME_ENABLED))
+ sysreg_clear_set(CPACR_EL1, 0,
+ CPACR_EL1_SMEN_EL0EN |
+ CPACR_EL1_SMEN_EL1EN);
+ else
+ sysreg_clear_set(CPACR_EL1,
+ CPACR_EL1_SMEN_EL0EN,
+ CPACR_EL1_SMEN_EL1EN);
+ }
+
+ if (vcpu->arch.fp_state == FP_STATE_GUEST_OWNED) {
+ if (vcpu_has_sve(vcpu)) {
__vcpu_sys_reg(vcpu, ZCR_EL1) = read_sysreg_el1(SYS_ZCR);
/* Restore the VL that was saved when bound to the CPU */
@@ -131,7 +185,7 @@ void kvm_arch_vcpu_put_fp(struct kvm_vcpu *vcpu)
}
fpsimd_save_and_flush_cpu_state();
- } else if (has_vhe() && host_has_sve) {
+ } else if (has_vhe() && system_supports_sve()) {
/*
* The FPSIMD/SVE state in the CPU has not been touched, and we
* have SVE (and VHE): CPACR_EL1 (alias CPTR_EL2) has been
@@ -139,14 +193,13 @@ void kvm_arch_vcpu_put_fp(struct kvm_vcpu *vcpu)
* for EL0. To avoid spurious traps, restore the trap state
* seen by kvm_arch_vcpu_load_fp():
*/
- if (vcpu->arch.flags & KVM_ARM64_HOST_SVE_ENABLED)
+ if (vcpu_get_flag(vcpu, HOST_SVE_ENABLED))
sysreg_clear_set(CPACR_EL1, 0, CPACR_EL1_ZEN_EL0EN);
else
sysreg_clear_set(CPACR_EL1, CPACR_EL1_ZEN_EL0EN, 0);
}
- update_thread_flag(TIF_SVE,
- vcpu->arch.flags & KVM_ARM64_HOST_SVE_IN_USE);
+ update_thread_flag(TIF_SVE, 0);
local_irq_restore(flags);
}
diff --git a/arch/arm64/kvm/guest.c b/arch/arm64/kvm/guest.c
index e116c7767730..2ff13a3f8479 100644
--- a/arch/arm64/kvm/guest.c
+++ b/arch/arm64/kvm/guest.c
@@ -18,7 +18,7 @@
#include <linux/string.h>
#include <linux/vmalloc.h>
#include <linux/fs.h>
-#include <kvm/arm_psci.h>
+#include <kvm/arm_hypercalls.h>
#include <asm/cputype.h>
#include <linux/uaccess.h>
#include <asm/fpsimd.h>
@@ -242,7 +242,7 @@ static int set_core_reg(struct kvm_vcpu *vcpu, const struct kvm_one_reg *reg)
u64 mode = (*(u64 *)valp) & PSR_AA32_MODE_MASK;
switch (mode) {
case PSR_AA32_MODE_USR:
- if (!system_supports_32bit_el0())
+ if (!kvm_supports_32bit_el0())
return -EINVAL;
break;
case PSR_AA32_MODE_FIQ:
@@ -282,7 +282,7 @@ static int set_core_reg(struct kvm_vcpu *vcpu, const struct kvm_one_reg *reg)
break;
/*
- * Otherwide, this is a priviledged mode, and *all* the
+ * Otherwise, this is a privileged mode, and *all* the
* registers must be narrowed to 32bit.
*/
default:
@@ -756,7 +756,9 @@ int kvm_arm_get_reg(struct kvm_vcpu *vcpu, const struct kvm_one_reg *reg)
switch (reg->id & KVM_REG_ARM_COPROC_MASK) {
case KVM_REG_ARM_CORE: return get_core_reg(vcpu, reg);
- case KVM_REG_ARM_FW: return kvm_arm_get_fw_reg(vcpu, reg);
+ case KVM_REG_ARM_FW:
+ case KVM_REG_ARM_FW_FEAT_BMAP:
+ return kvm_arm_get_fw_reg(vcpu, reg);
case KVM_REG_ARM64_SVE: return get_sve_reg(vcpu, reg);
}
@@ -774,7 +776,9 @@ int kvm_arm_set_reg(struct kvm_vcpu *vcpu, const struct kvm_one_reg *reg)
switch (reg->id & KVM_REG_ARM_COPROC_MASK) {
case KVM_REG_ARM_CORE: return set_core_reg(vcpu, reg);
- case KVM_REG_ARM_FW: return kvm_arm_set_fw_reg(vcpu, reg);
+ case KVM_REG_ARM_FW:
+ case KVM_REG_ARM_FW_FEAT_BMAP:
+ return kvm_arm_set_fw_reg(vcpu, reg);
case KVM_REG_ARM64_SVE: return set_sve_reg(vcpu, reg);
}
@@ -933,6 +937,7 @@ int kvm_arch_vcpu_ioctl_set_guest_debug(struct kvm_vcpu *vcpu,
} else {
/* If not enabled clear all flags */
vcpu->guest_debug = 0;
+ vcpu_clear_flag(vcpu, DBG_SS_ACTIVE_PENDING);
}
out:
diff --git a/arch/arm64/kvm/handle_exit.c b/arch/arm64/kvm/handle_exit.c
index 5abe0617f2af..e778eefcf214 100644
--- a/arch/arm64/kvm/handle_exit.c
+++ b/arch/arm64/kvm/handle_exit.c
@@ -17,6 +17,7 @@
#include <asm/kvm_emulate.h>
#include <asm/kvm_mmu.h>
#include <asm/debug-monitors.h>
+#include <asm/stacktrace/nvhe.h>
#include <asm/traps.h>
#include <kvm/arm_hypercalls.h>
@@ -26,7 +27,7 @@
typedef int (*exit_handle_fn)(struct kvm_vcpu *);
-static void kvm_handle_guest_serror(struct kvm_vcpu *vcpu, u32 esr)
+static void kvm_handle_guest_serror(struct kvm_vcpu *vcpu, u64 esr)
{
if (!arm64_is_ras_serror(esr) || arm64_is_fatal_ras_serror(NULL, esr))
kvm_inject_vabt(vcpu);
@@ -80,25 +81,51 @@ static int handle_no_fpsimd(struct kvm_vcpu *vcpu)
*
* @vcpu: the vcpu pointer
*
- * WFE: Yield the CPU and come back to this vcpu when the scheduler
+ * WFE[T]: Yield the CPU and come back to this vcpu when the scheduler
* decides to.
- * WFI: Simply call kvm_vcpu_block(), which will halt execution of
+ * WFI: Simply call kvm_vcpu_halt(), which will halt execution of
* world-switches and schedule other host processes until there is an
* incoming IRQ or FIQ to the VM.
+ * WFIT: Same as WFI, with a timed wakeup implemented as a background timer
+ *
+ * WF{I,E}T can immediately return if the deadline has already expired.
*/
static int kvm_handle_wfx(struct kvm_vcpu *vcpu)
{
- if (kvm_vcpu_get_esr(vcpu) & ESR_ELx_WFx_ISS_WFE) {
+ u64 esr = kvm_vcpu_get_esr(vcpu);
+
+ if (esr & ESR_ELx_WFx_ISS_WFE) {
trace_kvm_wfx_arm64(*vcpu_pc(vcpu), true);
vcpu->stat.wfe_exit_stat++;
- kvm_vcpu_on_spin(vcpu, vcpu_mode_priv(vcpu));
} else {
trace_kvm_wfx_arm64(*vcpu_pc(vcpu), false);
vcpu->stat.wfi_exit_stat++;
- kvm_vcpu_block(vcpu);
- kvm_clear_request(KVM_REQ_UNHALT, vcpu);
}
+ if (esr & ESR_ELx_WFx_ISS_WFxT) {
+ if (esr & ESR_ELx_WFx_ISS_RV) {
+ u64 val, now;
+
+ now = kvm_arm_timer_get_reg(vcpu, KVM_REG_ARM_TIMER_CNT);
+ val = vcpu_get_reg(vcpu, kvm_vcpu_sys_get_rt(vcpu));
+
+ if (now >= val)
+ goto out;
+ } else {
+ /* Treat WFxT as WFx if RN is invalid */
+ esr &= ~ESR_ELx_WFx_ISS_WFxT;
+ }
+ }
+
+ if (esr & ESR_ELx_WFx_ISS_WFE) {
+ kvm_vcpu_on_spin(vcpu, vcpu_mode_priv(vcpu));
+ } else {
+ if (esr & ESR_ELx_WFx_ISS_WFxT)
+ vcpu_set_flag(vcpu, IN_WFIT);
+
+ kvm_vcpu_wfi(vcpu);
+ }
+out:
kvm_incr_pc(vcpu);
return 1;
@@ -118,22 +145,30 @@ static int kvm_handle_wfx(struct kvm_vcpu *vcpu)
static int kvm_handle_guest_debug(struct kvm_vcpu *vcpu)
{
struct kvm_run *run = vcpu->run;
- u32 esr = kvm_vcpu_get_esr(vcpu);
+ u64 esr = kvm_vcpu_get_esr(vcpu);
run->exit_reason = KVM_EXIT_DEBUG;
- run->debug.arch.hsr = esr;
+ run->debug.arch.hsr = lower_32_bits(esr);
+ run->debug.arch.hsr_high = upper_32_bits(esr);
+ run->flags = KVM_DEBUG_ARCH_HSR_HIGH_VALID;
- if (ESR_ELx_EC(esr) == ESR_ELx_EC_WATCHPT_LOW)
+ switch (ESR_ELx_EC(esr)) {
+ case ESR_ELx_EC_WATCHPT_LOW:
run->debug.arch.far = vcpu->arch.fault.far_el2;
+ break;
+ case ESR_ELx_EC_SOFTSTP_LOW:
+ vcpu_clear_flag(vcpu, DBG_SS_ACTIVE_PENDING);
+ break;
+ }
return 0;
}
static int kvm_handle_unknown_ec(struct kvm_vcpu *vcpu)
{
- u32 esr = kvm_vcpu_get_esr(vcpu);
+ u64 esr = kvm_vcpu_get_esr(vcpu);
- kvm_pr_unimpl("Unknown exception class: esr: %#08x -- %s\n",
+ kvm_pr_unimpl("Unknown exception class: esr: %#016llx -- %s\n",
esr, esr_get_class_string(esr));
kvm_inject_undefined(vcpu);
@@ -168,6 +203,7 @@ static exit_handle_fn arm_exit_handlers[] = {
[ESR_ELx_EC_CP15_64] = kvm_handle_cp15_64,
[ESR_ELx_EC_CP14_MR] = kvm_handle_cp14_32,
[ESR_ELx_EC_CP14_LS] = kvm_handle_cp14_load_store,
+ [ESR_ELx_EC_CP10_ID] = kvm_handle_cp10_id,
[ESR_ELx_EC_CP14_64] = kvm_handle_cp14_64,
[ESR_ELx_EC_HVC32] = handle_hvc,
[ESR_ELx_EC_SMC32] = handle_smc,
@@ -188,7 +224,7 @@ static exit_handle_fn arm_exit_handlers[] = {
static exit_handle_fn kvm_get_exit_handler(struct kvm_vcpu *vcpu)
{
- u32 esr = kvm_vcpu_get_esr(vcpu);
+ u64 esr = kvm_vcpu_get_esr(vcpu);
u8 esr_ec = ESR_ELx_EC(esr);
return arm_exit_handlers[esr_ec];
@@ -229,6 +265,14 @@ int handle_exit(struct kvm_vcpu *vcpu, int exception_index)
{
struct kvm_run *run = vcpu->run;
+ if (ARM_SERROR_PENDING(exception_index)) {
+ /*
+ * The SError is handled by handle_exit_early(). If the guest
+ * survives it will re-execute the original instruction.
+ */
+ return 1;
+ }
+
exception_index = ARM_EXCEPTION_CODE(exception_index);
switch (exception_index) {
@@ -241,7 +285,7 @@ int handle_exit(struct kvm_vcpu *vcpu, int exception_index)
case ARM_EXCEPTION_HYP_GONE:
/*
* EL2 has been reset to the hyp-stub. This happens when a guest
- * is pre-empted by kvm_reboot()'s shutdown call.
+ * is pre-emptied by kvm_reboot()'s shutdown call.
*/
run->exit_reason = KVM_EXIT_FAIL_ENTRY;
return 0;
@@ -288,13 +332,8 @@ void __noreturn __cold nvhe_hyp_panic_handler(u64 esr, u64 spsr,
u64 elr_in_kimg = __phys_to_kimg(elr_phys);
u64 hyp_offset = elr_in_kimg - kaslr_offset() - elr_virt;
u64 mode = spsr & PSR_MODE_MASK;
+ u64 panic_addr = elr_virt + hyp_offset;
- /*
- * The nVHE hyp symbols are not included by kallsyms to avoid issues
- * with aliasing. That means that the symbols cannot be printed with the
- * "%pS" format specifier, so fall back to the vmlinux address if
- * there's no better option.
- */
if (mode != PSR_MODE_EL2t && mode != PSR_MODE_EL2h) {
kvm_err("Invalid host exception to nVHE hyp!\n");
} else if (ESR_ELx_EC(esr) == ESR_ELx_EC_BRK64 &&
@@ -314,11 +353,16 @@ void __noreturn __cold nvhe_hyp_panic_handler(u64 esr, u64 spsr,
if (file)
kvm_err("nVHE hyp BUG at: %s:%u!\n", file, line);
else
- kvm_err("nVHE hyp BUG at: %016llx!\n", elr_virt + hyp_offset);
+ kvm_err("nVHE hyp BUG at: [<%016llx>] %pB!\n", panic_addr,
+ (void *)(panic_addr + kaslr_offset()));
} else {
- kvm_err("nVHE hyp panic at: %016llx!\n", elr_virt + hyp_offset);
+ kvm_err("nVHE hyp panic at: [<%016llx>] %pB!\n", panic_addr,
+ (void *)(panic_addr + kaslr_offset()));
}
+ /* Dump the nVHE hypervisor backtrace */
+ kvm_nvhe_dump_backtrace(hyp_offset);
+
/*
* Hyp has panicked and we're going to handle that by panicking the
* kernel. The kernel offset will be revealed in the panic so we're
@@ -327,6 +371,6 @@ void __noreturn __cold nvhe_hyp_panic_handler(u64 esr, u64 spsr,
*/
kvm_err("Hyp Offset: 0x%llx\n", hyp_offset);
- panic("HYP panic:\nPS:%08llx PC:%016llx ESR:%08llx\nFAR:%016llx HPFAR:%016llx PAR:%016llx\nVCPU:%016lx\n",
+ panic("HYP panic:\nPS:%08llx PC:%016llx ESR:%016llx\nFAR:%016llx HPFAR:%016llx PAR:%016llx\nVCPU:%016lx\n",
spsr, elr_virt, esr, far, hpfar, par, vcpu);
}
diff --git a/arch/arm64/kvm/hyp/Makefile b/arch/arm64/kvm/hyp/Makefile
index b726332eec49..a38dea6186c9 100644
--- a/arch/arm64/kvm/hyp/Makefile
+++ b/arch/arm64/kvm/hyp/Makefile
@@ -5,9 +5,6 @@
incdir := $(srctree)/$(src)/include
subdir-asflags-y := -I$(incdir)
-subdir-ccflags-y := -I$(incdir) \
- -fno-stack-protector \
- -DDISABLE_BRANCH_PROFILING \
- $(DISABLE_STACKLEAK_PLUGIN)
+subdir-ccflags-y := -I$(incdir)
-obj-$(CONFIG_KVM) += vhe/ nvhe/ pgtable.o reserved_mem.o
+obj-$(CONFIG_KVM) += vhe/ nvhe/ pgtable.o
diff --git a/arch/arm64/kvm/hyp/exception.c b/arch/arm64/kvm/hyp/exception.c
index 0418399e0a20..791d3de76771 100644
--- a/arch/arm64/kvm/hyp/exception.c
+++ b/arch/arm64/kvm/hyp/exception.c
@@ -13,6 +13,7 @@
#include <hyp/adjust_pc.h>
#include <linux/kvm_host.h>
#include <asm/kvm_emulate.h>
+#include <asm/kvm_mmu.h>
#if !defined (__KVM_NVHE_HYPERVISOR__) && !defined (__KVM_VHE_HYPERVISOR__)
#error Hypervisor code only!
@@ -38,7 +39,10 @@ static inline void __vcpu_write_sys_reg(struct kvm_vcpu *vcpu, u64 val, int reg)
static void __vcpu_write_spsr(struct kvm_vcpu *vcpu, u64 val)
{
- write_sysreg_el1(val, SYS_SPSR);
+ if (has_vhe())
+ write_sysreg_el1(val, SYS_SPSR);
+ else
+ __vcpu_sys_reg(vcpu, SPSR_EL1) = val;
}
static void __vcpu_write_spsr_abt(struct kvm_vcpu *vcpu, u64 val)
@@ -112,7 +116,7 @@ static void enter_exception64(struct kvm_vcpu *vcpu, unsigned long target_mode,
new |= (old & PSR_C_BIT);
new |= (old & PSR_V_BIT);
- if (kvm_has_mte(vcpu->kvm))
+ if (kvm_has_mte(kern_hyp_va(vcpu->kvm)))
new |= PSR_TCO_BIT;
new |= (old & PSR_DIT_BIT);
@@ -300,14 +304,14 @@ static void enter_exception32(struct kvm_vcpu *vcpu, u32 mode, u32 vect_offset)
static void kvm_inject_exception(struct kvm_vcpu *vcpu)
{
if (vcpu_el1_is_32bit(vcpu)) {
- switch (vcpu->arch.flags & KVM_ARM64_EXCEPT_MASK) {
- case KVM_ARM64_EXCEPT_AA32_UND:
+ switch (vcpu_get_flag(vcpu, EXCEPT_MASK)) {
+ case unpack_vcpu_flag(EXCEPT_AA32_UND):
enter_exception32(vcpu, PSR_AA32_MODE_UND, 4);
break;
- case KVM_ARM64_EXCEPT_AA32_IABT:
+ case unpack_vcpu_flag(EXCEPT_AA32_IABT):
enter_exception32(vcpu, PSR_AA32_MODE_ABT, 12);
break;
- case KVM_ARM64_EXCEPT_AA32_DABT:
+ case unpack_vcpu_flag(EXCEPT_AA32_DABT):
enter_exception32(vcpu, PSR_AA32_MODE_ABT, 16);
break;
default:
@@ -315,9 +319,8 @@ static void kvm_inject_exception(struct kvm_vcpu *vcpu)
break;
}
} else {
- switch (vcpu->arch.flags & KVM_ARM64_EXCEPT_MASK) {
- case (KVM_ARM64_EXCEPT_AA64_ELx_SYNC |
- KVM_ARM64_EXCEPT_AA64_EL1):
+ switch (vcpu_get_flag(vcpu, EXCEPT_MASK)) {
+ case unpack_vcpu_flag(EXCEPT_AA64_EL1_SYNC):
enter_exception64(vcpu, PSR_MODE_EL1h, except_type_sync);
break;
default:
@@ -337,12 +340,12 @@ static void kvm_inject_exception(struct kvm_vcpu *vcpu)
*/
void __kvm_adjust_pc(struct kvm_vcpu *vcpu)
{
- if (vcpu->arch.flags & KVM_ARM64_PENDING_EXCEPTION) {
+ if (vcpu_get_flag(vcpu, PENDING_EXCEPTION)) {
kvm_inject_exception(vcpu);
- vcpu->arch.flags &= ~(KVM_ARM64_PENDING_EXCEPTION |
- KVM_ARM64_EXCEPT_MASK);
- } else if (vcpu->arch.flags & KVM_ARM64_INCREMENT_PC) {
+ vcpu_clear_flag(vcpu, PENDING_EXCEPTION);
+ vcpu_clear_flag(vcpu, EXCEPT_MASK);
+ } else if (vcpu_get_flag(vcpu, INCREMENT_PC)) {
kvm_skip_instr(vcpu);
- vcpu->arch.flags &= ~KVM_ARM64_INCREMENT_PC;
+ vcpu_clear_flag(vcpu, INCREMENT_PC);
}
}
diff --git a/arch/arm64/kvm/hyp/fpsimd.S b/arch/arm64/kvm/hyp/fpsimd.S
index e950875e31ce..61e6f3ba7b7d 100644
--- a/arch/arm64/kvm/hyp/fpsimd.S
+++ b/arch/arm64/kvm/hyp/fpsimd.S
@@ -25,9 +25,3 @@ SYM_FUNC_START(__sve_restore_state)
sve_load 0, x1, x2, 3
ret
SYM_FUNC_END(__sve_restore_state)
-
-SYM_FUNC_START(__sve_save_state)
- mov x2, #1
- sve_save 0, x1, x2, 3
- ret
-SYM_FUNC_END(__sve_save_state)
diff --git a/arch/arm64/kvm/hyp/hyp-constants.c b/arch/arm64/kvm/hyp/hyp-constants.c
new file mode 100644
index 000000000000..b3742a6691e8
--- /dev/null
+++ b/arch/arm64/kvm/hyp/hyp-constants.c
@@ -0,0 +1,10 @@
+// SPDX-License-Identifier: GPL-2.0-only
+
+#include <linux/kbuild.h>
+#include <nvhe/memory.h>
+
+int main(void)
+{
+ DEFINE(STRUCT_HYP_PAGE_SIZE, sizeof(struct hyp_page));
+ return 0;
+}
diff --git a/arch/arm64/kvm/hyp/hyp-entry.S b/arch/arm64/kvm/hyp/hyp-entry.S
index b6b6801d96d5..8f3f93fa119e 100644
--- a/arch/arm64/kvm/hyp/hyp-entry.S
+++ b/arch/arm64/kvm/hyp/hyp-entry.S
@@ -62,6 +62,10 @@ el1_sync: // Guest trapped into EL2
/* ARM_SMCCC_ARCH_WORKAROUND_2 handling */
eor w1, w1, #(ARM_SMCCC_ARCH_WORKAROUND_1 ^ \
ARM_SMCCC_ARCH_WORKAROUND_2)
+ cbz w1, wa_epilogue
+
+ eor w1, w1, #(ARM_SMCCC_ARCH_WORKAROUND_2 ^ \
+ ARM_SMCCC_ARCH_WORKAROUND_3)
cbnz w1, el1_trap
wa_epilogue:
@@ -192,7 +196,10 @@ SYM_CODE_END(__kvm_hyp_vector)
sub sp, sp, #(8 * 4)
stp x2, x3, [sp, #(8 * 0)]
stp x0, x1, [sp, #(8 * 2)]
+ alternative_cb ARM64_ALWAYS_SYSTEM, spectre_bhb_patch_wa3
+ /* Patched to mov WA3 when supported */
mov w0, #ARM_SMCCC_ARCH_WORKAROUND_1
+ alternative_cb_end
smc #0
ldp x2, x3, [sp, #(8 * 0)]
add sp, sp, #(8 * 2)
@@ -205,9 +212,11 @@ SYM_CODE_END(__kvm_hyp_vector)
spectrev2_smccc_wa1_smc
.else
stp x0, x1, [sp, #-16]!
+ mitigate_spectre_bhb_loop x0
+ mitigate_spectre_bhb_clear_insn
.endif
.if \indirect != 0
- alternative_cb kvm_patch_vector_branch
+ alternative_cb ARM64_ALWAYS_SYSTEM, kvm_patch_vector_branch
/*
* For ARM64_SPECTRE_V3A configurations, these NOPs get replaced with:
*
diff --git a/arch/arm64/kvm/hyp/include/hyp/debug-sr.h b/arch/arm64/kvm/hyp/include/hyp/debug-sr.h
index 4ebe9f558f3a..961bbef104a6 100644
--- a/arch/arm64/kvm/hyp/include/hyp/debug-sr.h
+++ b/arch/arm64/kvm/hyp/include/hyp/debug-sr.h
@@ -132,7 +132,7 @@ static inline void __debug_switch_to_guest_common(struct kvm_vcpu *vcpu)
struct kvm_guest_debug_arch *host_dbg;
struct kvm_guest_debug_arch *guest_dbg;
- if (!(vcpu->arch.flags & KVM_ARM64_DEBUG_DIRTY))
+ if (!vcpu_get_flag(vcpu, DEBUG_DIRTY))
return;
host_ctxt = &this_cpu_ptr(&kvm_host_data)->host_ctxt;
@@ -151,7 +151,7 @@ static inline void __debug_switch_to_host_common(struct kvm_vcpu *vcpu)
struct kvm_guest_debug_arch *host_dbg;
struct kvm_guest_debug_arch *guest_dbg;
- if (!(vcpu->arch.flags & KVM_ARM64_DEBUG_DIRTY))
+ if (!vcpu_get_flag(vcpu, DEBUG_DIRTY))
return;
host_ctxt = &this_cpu_ptr(&kvm_host_data)->host_ctxt;
@@ -162,7 +162,7 @@ static inline void __debug_switch_to_host_common(struct kvm_vcpu *vcpu)
__debug_save_state(guest_dbg, guest_ctxt);
__debug_restore_state(host_dbg, host_ctxt);
- vcpu->arch.flags &= ~KVM_ARM64_DEBUG_DIRTY;
+ vcpu_clear_flag(vcpu, DEBUG_DIRTY);
}
#endif /* __ARM64_KVM_HYP_DEBUG_SR_H__ */
diff --git a/arch/arm64/kvm/hyp/include/hyp/switch.h b/arch/arm64/kvm/hyp/include/hyp/switch.h
index 96c5f3fb7838..3330d1b76bdd 100644
--- a/arch/arm64/kvm/hyp/include/hyp/switch.h
+++ b/arch/arm64/kvm/hyp/include/hyp/switch.h
@@ -29,7 +29,6 @@
#include <asm/fpsimd.h>
#include <asm/debug-monitors.h>
#include <asm/processor.h>
-#include <asm/thread_info.h>
struct kvm_exception_table_entry {
int insn, fixup;
@@ -38,22 +37,10 @@ struct kvm_exception_table_entry {
extern struct kvm_exception_table_entry __start___kvm_ex_table;
extern struct kvm_exception_table_entry __stop___kvm_ex_table;
-/* Check whether the FP regs were dirtied while in the host-side run loop: */
-static inline bool update_fp_enabled(struct kvm_vcpu *vcpu)
+/* Check whether the FP regs are owned by the guest */
+static inline bool guest_owns_fp_regs(struct kvm_vcpu *vcpu)
{
- /*
- * When the system doesn't support FP/SIMD, we cannot rely on
- * the _TIF_FOREIGN_FPSTATE flag. However, we always inject an
- * abort on the very first access to FP and thus we should never
- * see KVM_ARM64_FP_ENABLED. For added safety, make sure we always
- * trap the accesses.
- */
- if (!system_supports_fpsimd() ||
- vcpu->arch.host_thread_info->flags & _TIF_FOREIGN_FPSTATE)
- vcpu->arch.flags &= ~(KVM_ARM64_FP_ENABLED |
- KVM_ARM64_FP_HOST);
-
- return !!(vcpu->arch.flags & KVM_ARM64_FP_ENABLED);
+ return vcpu->arch.fp_state == FP_STATE_GUEST_OWNED;
}
/* Save the 32-bit only FPSIMD system register state */
@@ -100,6 +87,17 @@ static inline void __activate_traps_common(struct kvm_vcpu *vcpu)
vcpu->arch.mdcr_el2_host = read_sysreg(mdcr_el2);
write_sysreg(vcpu->arch.mdcr_el2, mdcr_el2);
+
+ if (cpus_have_final_cap(ARM64_SME)) {
+ sysreg_clear_set_s(SYS_HFGRTR_EL2,
+ HFGxTR_EL2_nSMPRI_EL1_MASK |
+ HFGxTR_EL2_nTPIDR2_EL0_MASK,
+ 0);
+ sysreg_clear_set_s(SYS_HFGWTR_EL2,
+ HFGxTR_EL2_nSMPRI_EL1_MASK |
+ HFGxTR_EL2_nTPIDR2_EL0_MASK,
+ 0);
+ }
}
static inline void __deactivate_traps_common(struct kvm_vcpu *vcpu)
@@ -109,6 +107,15 @@ static inline void __deactivate_traps_common(struct kvm_vcpu *vcpu)
write_sysreg(0, hstr_el2);
if (kvm_arm_support_pmu_v3())
write_sysreg(0, pmuserenr_el0);
+
+ if (cpus_have_final_cap(ARM64_SME)) {
+ sysreg_clear_set_s(SYS_HFGRTR_EL2, 0,
+ HFGxTR_EL2_nSMPRI_EL1_MASK |
+ HFGxTR_EL2_nTPIDR2_EL0_MASK);
+ sysreg_clear_set_s(SYS_HFGWTR_EL2, 0,
+ HFGxTR_EL2_nSMPRI_EL1_MASK |
+ HFGxTR_EL2_nTPIDR2_EL0_MASK);
+ }
}
static inline void ___activate_traps(struct kvm_vcpu *vcpu)
@@ -143,16 +150,6 @@ static inline bool __populate_fault_info(struct kvm_vcpu *vcpu)
return __get_fault_info(vcpu->arch.fault.esr_el2, &vcpu->arch.fault);
}
-static inline void __hyp_sve_save_host(struct kvm_vcpu *vcpu)
-{
- struct thread_struct *thread;
-
- thread = container_of(vcpu->arch.host_fpsimd_state, struct thread_struct,
- uw.fpsimd_state);
-
- __sve_save_state(sve_pffr(thread), &vcpu->arch.host_fpsimd_state->fpsr);
-}
-
static inline void __hyp_sve_restore_guest(struct kvm_vcpu *vcpu)
{
sve_cond_update_zcr_vq(vcpu_sve_max_vq(vcpu) - 1, SYS_ZCR_EL2);
@@ -169,21 +166,14 @@ static inline void __hyp_sve_restore_guest(struct kvm_vcpu *vcpu)
*/
static bool kvm_hyp_handle_fpsimd(struct kvm_vcpu *vcpu, u64 *exit_code)
{
- bool sve_guest, sve_host;
+ bool sve_guest;
u8 esr_ec;
u64 reg;
if (!system_supports_fpsimd())
return false;
- if (system_supports_sve()) {
- sve_guest = vcpu_has_sve(vcpu);
- sve_host = vcpu->arch.flags & KVM_ARM64_HOST_SVE_IN_USE;
- } else {
- sve_guest = false;
- sve_host = false;
- }
-
+ sve_guest = vcpu_has_sve(vcpu);
esr_ec = kvm_vcpu_trap_get_class(vcpu);
/* Don't handle SVE traps for non-SVE vcpus here: */
@@ -191,10 +181,12 @@ static bool kvm_hyp_handle_fpsimd(struct kvm_vcpu *vcpu, u64 *exit_code)
return false;
/* Valid trap. Switch the context: */
+
+ /* First disable enough traps to allow us to update the registers */
if (has_vhe()) {
- reg = CPACR_EL1_FPEN;
+ reg = CPACR_EL1_FPEN_EL0EN | CPACR_EL1_FPEN_EL1EN;
if (sve_guest)
- reg |= CPACR_EL1_ZEN;
+ reg |= CPACR_EL1_ZEN_EL0EN | CPACR_EL1_ZEN_EL1EN;
sysreg_clear_set(cpacr_el1, 0, reg);
} else {
@@ -206,15 +198,11 @@ static bool kvm_hyp_handle_fpsimd(struct kvm_vcpu *vcpu, u64 *exit_code)
}
isb();
- if (vcpu->arch.flags & KVM_ARM64_FP_HOST) {
- if (sve_host)
- __hyp_sve_save_host(vcpu);
- else
- __fpsimd_save_state(vcpu->arch.host_fpsimd_state);
-
- vcpu->arch.flags &= ~KVM_ARM64_FP_HOST;
- }
+ /* Write out the host state if it's in the registers */
+ if (vcpu->arch.fp_state == FP_STATE_HOST_OWNED)
+ __fpsimd_save_state(vcpu->arch.host_fpsimd_state);
+ /* Restore the guest state */
if (sve_guest)
__hyp_sve_restore_guest(vcpu);
else
@@ -224,7 +212,7 @@ static bool kvm_hyp_handle_fpsimd(struct kvm_vcpu *vcpu, u64 *exit_code)
if (!(read_sysreg(hcr_el2) & HCR_RW))
write_sysreg(__vcpu_sys_reg(vcpu, FPEXC32_EL2), fpexc32_el2);
- vcpu->arch.flags |= KVM_ARM64_FP_ENABLED;
+ vcpu->arch.fp_state = FP_STATE_GUEST_OWNED;
return true;
}
@@ -284,7 +272,7 @@ static inline bool handle_tx2_tvm(struct kvm_vcpu *vcpu)
return true;
}
-static inline bool esr_is_ptrauth_trap(u32 esr)
+static inline bool esr_is_ptrauth_trap(u64 esr)
{
switch (esr_sys64_to_sysreg(esr)) {
case SYS_APIAKEYLO_EL1:
@@ -424,6 +412,24 @@ static inline bool kvm_hyp_handle_exit(struct kvm_vcpu *vcpu, u64 *exit_code)
return false;
}
+static inline void synchronize_vcpu_pstate(struct kvm_vcpu *vcpu, u64 *exit_code)
+{
+ /*
+ * Check for the conditions of Cortex-A510's #2077057. When these occur
+ * SPSR_EL2 can't be trusted, but isn't needed either as it is
+ * unchanged from the value in vcpu_gp_regs(vcpu)->pstate.
+ * Are we single-stepping the guest, and took a PAC exception from the
+ * active-not-pending state?
+ */
+ if (cpus_have_final_cap(ARM64_WORKAROUND_2077057) &&
+ vcpu->guest_debug & KVM_GUESTDBG_SINGLESTEP &&
+ *vcpu_cpsr(vcpu) & DBG_SPSR_SS &&
+ ESR_ELx_EC(read_sysreg_el2(SYS_ESR)) == ESR_ELx_EC_PAC)
+ write_sysreg_el2(*vcpu_cpsr(vcpu), SYS_SPSR);
+
+ vcpu->arch.ctxt.regs.pstate = read_sysreg_el2(SYS_SPSR);
+}
+
/*
* Return true when we were able to fixup the guest exit and should return to
* the guest, false when we should restore the host state and return to the
@@ -435,7 +441,7 @@ static inline bool fixup_guest_exit(struct kvm_vcpu *vcpu, u64 *exit_code)
* Save PSTATE early so that we can evaluate the vcpu mode
* early on.
*/
- vcpu->arch.ctxt.regs.pstate = read_sysreg_el2(SYS_SPSR);
+ synchronize_vcpu_pstate(vcpu, exit_code);
/*
* Check whether we want to repaint the state one way or
@@ -446,7 +452,8 @@ static inline bool fixup_guest_exit(struct kvm_vcpu *vcpu, u64 *exit_code)
if (ARM_EXCEPTION_CODE(*exit_code) != ARM_EXCEPTION_IRQ)
vcpu->arch.fault.esr_el2 = read_sysreg_el2(SYS_ESR);
- if (ARM_SERROR_PENDING(*exit_code)) {
+ if (ARM_SERROR_PENDING(*exit_code) &&
+ ARM_EXCEPTION_CODE(*exit_code) != ARM_EXCEPTION_IRQ) {
u8 esr_ec = kvm_vcpu_trap_get_class(vcpu);
/*
diff --git a/arch/arm64/kvm/hyp/include/hyp/sysreg-sr.h b/arch/arm64/kvm/hyp/include/hyp/sysreg-sr.h
index 7ecca8b07851..baa5b9b3dde5 100644
--- a/arch/arm64/kvm/hyp/include/hyp/sysreg-sr.h
+++ b/arch/arm64/kvm/hyp/include/hyp/sysreg-sr.h
@@ -195,7 +195,7 @@ static inline void __sysreg32_save_state(struct kvm_vcpu *vcpu)
__vcpu_sys_reg(vcpu, DACR32_EL2) = read_sysreg(dacr32_el2);
__vcpu_sys_reg(vcpu, IFSR32_EL2) = read_sysreg(ifsr32_el2);
- if (has_vhe() || vcpu->arch.flags & KVM_ARM64_DEBUG_DIRTY)
+ if (has_vhe() || vcpu_get_flag(vcpu, DEBUG_DIRTY))
__vcpu_sys_reg(vcpu, DBGVCR32_EL2) = read_sysreg(dbgvcr32_el2);
}
@@ -212,7 +212,7 @@ static inline void __sysreg32_restore_state(struct kvm_vcpu *vcpu)
write_sysreg(__vcpu_sys_reg(vcpu, DACR32_EL2), dacr32_el2);
write_sysreg(__vcpu_sys_reg(vcpu, IFSR32_EL2), ifsr32_el2);
- if (has_vhe() || vcpu->arch.flags & KVM_ARM64_DEBUG_DIRTY)
+ if (has_vhe() || vcpu_get_flag(vcpu, DEBUG_DIRTY))
write_sysreg(__vcpu_sys_reg(vcpu, DBGVCR32_EL2), dbgvcr32_el2);
}
diff --git a/arch/arm64/kvm/hyp/include/nvhe/fixed_config.h b/arch/arm64/kvm/hyp/include/nvhe/fixed_config.h
index eea1f6a53723..07edfc7524c9 100644
--- a/arch/arm64/kvm/hyp/include/nvhe/fixed_config.h
+++ b/arch/arm64/kvm/hyp/include/nvhe/fixed_config.h
@@ -35,9 +35,9 @@
* - Data Independent Timing
*/
#define PVM_ID_AA64PFR0_ALLOW (\
- ARM64_FEATURE_MASK(ID_AA64PFR0_FP) | \
- ARM64_FEATURE_MASK(ID_AA64PFR0_ASIMD) | \
- ARM64_FEATURE_MASK(ID_AA64PFR0_DIT) \
+ ARM64_FEATURE_MASK(ID_AA64PFR0_EL1_FP) | \
+ ARM64_FEATURE_MASK(ID_AA64PFR0_EL1_AdvSIMD) | \
+ ARM64_FEATURE_MASK(ID_AA64PFR0_EL1_DIT) \
)
/*
@@ -49,11 +49,11 @@
* Supported by KVM
*/
#define PVM_ID_AA64PFR0_RESTRICT_UNSIGNED (\
- FIELD_PREP(ARM64_FEATURE_MASK(ID_AA64PFR0_EL0), ID_AA64PFR0_ELx_64BIT_ONLY) | \
- FIELD_PREP(ARM64_FEATURE_MASK(ID_AA64PFR0_EL1), ID_AA64PFR0_ELx_64BIT_ONLY) | \
- FIELD_PREP(ARM64_FEATURE_MASK(ID_AA64PFR0_EL2), ID_AA64PFR0_ELx_64BIT_ONLY) | \
- FIELD_PREP(ARM64_FEATURE_MASK(ID_AA64PFR0_EL3), ID_AA64PFR0_ELx_64BIT_ONLY) | \
- FIELD_PREP(ARM64_FEATURE_MASK(ID_AA64PFR0_RAS), ID_AA64PFR0_RAS_V1) \
+ FIELD_PREP(ARM64_FEATURE_MASK(ID_AA64PFR0_EL1_EL0), ID_AA64PFR0_EL1_ELx_64BIT_ONLY) | \
+ FIELD_PREP(ARM64_FEATURE_MASK(ID_AA64PFR0_EL1_EL1), ID_AA64PFR0_EL1_ELx_64BIT_ONLY) | \
+ FIELD_PREP(ARM64_FEATURE_MASK(ID_AA64PFR0_EL1_EL2), ID_AA64PFR0_EL1_ELx_64BIT_ONLY) | \
+ FIELD_PREP(ARM64_FEATURE_MASK(ID_AA64PFR0_EL1_EL3), ID_AA64PFR0_EL1_ELx_64BIT_ONLY) | \
+ FIELD_PREP(ARM64_FEATURE_MASK(ID_AA64PFR0_EL1_RAS), ID_AA64PFR0_EL1_RAS_IMP) \
)
/*
@@ -62,8 +62,8 @@
* - Speculative Store Bypassing
*/
#define PVM_ID_AA64PFR1_ALLOW (\
- ARM64_FEATURE_MASK(ID_AA64PFR1_BT) | \
- ARM64_FEATURE_MASK(ID_AA64PFR1_SSBS) \
+ ARM64_FEATURE_MASK(ID_AA64PFR1_EL1_BT) | \
+ ARM64_FEATURE_MASK(ID_AA64PFR1_EL1_SSBS) \
)
/*
@@ -74,10 +74,10 @@
* - Non-context synchronizing exception entry and exit
*/
#define PVM_ID_AA64MMFR0_ALLOW (\
- ARM64_FEATURE_MASK(ID_AA64MMFR0_BIGENDEL) | \
- ARM64_FEATURE_MASK(ID_AA64MMFR0_SNSMEM) | \
- ARM64_FEATURE_MASK(ID_AA64MMFR0_BIGENDEL0) | \
- ARM64_FEATURE_MASK(ID_AA64MMFR0_EXS) \
+ ARM64_FEATURE_MASK(ID_AA64MMFR0_EL1_BIGEND) | \
+ ARM64_FEATURE_MASK(ID_AA64MMFR0_EL1_SNSMEM) | \
+ ARM64_FEATURE_MASK(ID_AA64MMFR0_EL1_BIGENDEL0) | \
+ ARM64_FEATURE_MASK(ID_AA64MMFR0_EL1_EXS) \
)
/*
@@ -86,8 +86,8 @@
* - 16-bit ASID
*/
#define PVM_ID_AA64MMFR0_RESTRICT_UNSIGNED (\
- FIELD_PREP(ARM64_FEATURE_MASK(ID_AA64MMFR0_PARANGE), ID_AA64MMFR0_PARANGE_40) | \
- FIELD_PREP(ARM64_FEATURE_MASK(ID_AA64MMFR0_ASID), ID_AA64MMFR0_ASID_16) \
+ FIELD_PREP(ARM64_FEATURE_MASK(ID_AA64MMFR0_EL1_PARANGE), ID_AA64MMFR0_EL1_PARANGE_40) | \
+ FIELD_PREP(ARM64_FEATURE_MASK(ID_AA64MMFR0_EL1_ASIDBITS), ID_AA64MMFR0_EL1_ASIDBITS_16) \
)
/*
@@ -100,12 +100,12 @@
* - Enhanced Translation Synchronization
*/
#define PVM_ID_AA64MMFR1_ALLOW (\
- ARM64_FEATURE_MASK(ID_AA64MMFR1_HADBS) | \
- ARM64_FEATURE_MASK(ID_AA64MMFR1_VMIDBITS) | \
- ARM64_FEATURE_MASK(ID_AA64MMFR1_HPD) | \
- ARM64_FEATURE_MASK(ID_AA64MMFR1_PAN) | \
- ARM64_FEATURE_MASK(ID_AA64MMFR1_SPECSEI) | \
- ARM64_FEATURE_MASK(ID_AA64MMFR1_ETS) \
+ ARM64_FEATURE_MASK(ID_AA64MMFR1_EL1_HAFDBS) | \
+ ARM64_FEATURE_MASK(ID_AA64MMFR1_EL1_VMIDBits) | \
+ ARM64_FEATURE_MASK(ID_AA64MMFR1_EL1_HPDS) | \
+ ARM64_FEATURE_MASK(ID_AA64MMFR1_EL1_PAN) | \
+ ARM64_FEATURE_MASK(ID_AA64MMFR1_EL1_SpecSEI) | \
+ ARM64_FEATURE_MASK(ID_AA64MMFR1_EL1_ETS) \
)
/*
@@ -120,14 +120,14 @@
* - E0PDx mechanism
*/
#define PVM_ID_AA64MMFR2_ALLOW (\
- ARM64_FEATURE_MASK(ID_AA64MMFR2_CNP) | \
- ARM64_FEATURE_MASK(ID_AA64MMFR2_UAO) | \
- ARM64_FEATURE_MASK(ID_AA64MMFR2_IESB) | \
- ARM64_FEATURE_MASK(ID_AA64MMFR2_AT) | \
- ARM64_FEATURE_MASK(ID_AA64MMFR2_IDS) | \
- ARM64_FEATURE_MASK(ID_AA64MMFR2_TTL) | \
- ARM64_FEATURE_MASK(ID_AA64MMFR2_BBM) | \
- ARM64_FEATURE_MASK(ID_AA64MMFR2_E0PD) \
+ ARM64_FEATURE_MASK(ID_AA64MMFR2_EL1_CnP) | \
+ ARM64_FEATURE_MASK(ID_AA64MMFR2_EL1_UAO) | \
+ ARM64_FEATURE_MASK(ID_AA64MMFR2_EL1_IESB) | \
+ ARM64_FEATURE_MASK(ID_AA64MMFR2_EL1_AT) | \
+ ARM64_FEATURE_MASK(ID_AA64MMFR2_EL1_IDS) | \
+ ARM64_FEATURE_MASK(ID_AA64MMFR2_EL1_TTL) | \
+ ARM64_FEATURE_MASK(ID_AA64MMFR2_EL1_BBM) | \
+ ARM64_FEATURE_MASK(ID_AA64MMFR2_EL1_E0PD) \
)
/*
@@ -159,37 +159,42 @@
* No restrictions on instructions implemented in AArch64.
*/
#define PVM_ID_AA64ISAR0_ALLOW (\
- ARM64_FEATURE_MASK(ID_AA64ISAR0_AES) | \
- ARM64_FEATURE_MASK(ID_AA64ISAR0_SHA1) | \
- ARM64_FEATURE_MASK(ID_AA64ISAR0_SHA2) | \
- ARM64_FEATURE_MASK(ID_AA64ISAR0_CRC32) | \
- ARM64_FEATURE_MASK(ID_AA64ISAR0_ATOMICS) | \
- ARM64_FEATURE_MASK(ID_AA64ISAR0_RDM) | \
- ARM64_FEATURE_MASK(ID_AA64ISAR0_SHA3) | \
- ARM64_FEATURE_MASK(ID_AA64ISAR0_SM3) | \
- ARM64_FEATURE_MASK(ID_AA64ISAR0_SM4) | \
- ARM64_FEATURE_MASK(ID_AA64ISAR0_DP) | \
- ARM64_FEATURE_MASK(ID_AA64ISAR0_FHM) | \
- ARM64_FEATURE_MASK(ID_AA64ISAR0_TS) | \
- ARM64_FEATURE_MASK(ID_AA64ISAR0_TLB) | \
- ARM64_FEATURE_MASK(ID_AA64ISAR0_RNDR) \
+ ARM64_FEATURE_MASK(ID_AA64ISAR0_EL1_AES) | \
+ ARM64_FEATURE_MASK(ID_AA64ISAR0_EL1_SHA1) | \
+ ARM64_FEATURE_MASK(ID_AA64ISAR0_EL1_SHA2) | \
+ ARM64_FEATURE_MASK(ID_AA64ISAR0_EL1_CRC32) | \
+ ARM64_FEATURE_MASK(ID_AA64ISAR0_EL1_ATOMIC) | \
+ ARM64_FEATURE_MASK(ID_AA64ISAR0_EL1_RDM) | \
+ ARM64_FEATURE_MASK(ID_AA64ISAR0_EL1_SHA3) | \
+ ARM64_FEATURE_MASK(ID_AA64ISAR0_EL1_SM3) | \
+ ARM64_FEATURE_MASK(ID_AA64ISAR0_EL1_SM4) | \
+ ARM64_FEATURE_MASK(ID_AA64ISAR0_EL1_DP) | \
+ ARM64_FEATURE_MASK(ID_AA64ISAR0_EL1_FHM) | \
+ ARM64_FEATURE_MASK(ID_AA64ISAR0_EL1_TS) | \
+ ARM64_FEATURE_MASK(ID_AA64ISAR0_EL1_TLB) | \
+ ARM64_FEATURE_MASK(ID_AA64ISAR0_EL1_RNDR) \
)
#define PVM_ID_AA64ISAR1_ALLOW (\
- ARM64_FEATURE_MASK(ID_AA64ISAR1_DPB) | \
- ARM64_FEATURE_MASK(ID_AA64ISAR1_APA) | \
- ARM64_FEATURE_MASK(ID_AA64ISAR1_API) | \
- ARM64_FEATURE_MASK(ID_AA64ISAR1_JSCVT) | \
- ARM64_FEATURE_MASK(ID_AA64ISAR1_FCMA) | \
- ARM64_FEATURE_MASK(ID_AA64ISAR1_LRCPC) | \
- ARM64_FEATURE_MASK(ID_AA64ISAR1_GPA) | \
- ARM64_FEATURE_MASK(ID_AA64ISAR1_GPI) | \
- ARM64_FEATURE_MASK(ID_AA64ISAR1_FRINTTS) | \
- ARM64_FEATURE_MASK(ID_AA64ISAR1_SB) | \
- ARM64_FEATURE_MASK(ID_AA64ISAR1_SPECRES) | \
- ARM64_FEATURE_MASK(ID_AA64ISAR1_BF16) | \
- ARM64_FEATURE_MASK(ID_AA64ISAR1_DGH) | \
- ARM64_FEATURE_MASK(ID_AA64ISAR1_I8MM) \
+ ARM64_FEATURE_MASK(ID_AA64ISAR1_EL1_DPB) | \
+ ARM64_FEATURE_MASK(ID_AA64ISAR1_EL1_APA) | \
+ ARM64_FEATURE_MASK(ID_AA64ISAR1_EL1_API) | \
+ ARM64_FEATURE_MASK(ID_AA64ISAR1_EL1_JSCVT) | \
+ ARM64_FEATURE_MASK(ID_AA64ISAR1_EL1_FCMA) | \
+ ARM64_FEATURE_MASK(ID_AA64ISAR1_EL1_LRCPC) | \
+ ARM64_FEATURE_MASK(ID_AA64ISAR1_EL1_GPA) | \
+ ARM64_FEATURE_MASK(ID_AA64ISAR1_EL1_GPI) | \
+ ARM64_FEATURE_MASK(ID_AA64ISAR1_EL1_FRINTTS) | \
+ ARM64_FEATURE_MASK(ID_AA64ISAR1_EL1_SB) | \
+ ARM64_FEATURE_MASK(ID_AA64ISAR1_EL1_SPECRES) | \
+ ARM64_FEATURE_MASK(ID_AA64ISAR1_EL1_BF16) | \
+ ARM64_FEATURE_MASK(ID_AA64ISAR1_EL1_DGH) | \
+ ARM64_FEATURE_MASK(ID_AA64ISAR1_EL1_I8MM) \
+ )
+
+#define PVM_ID_AA64ISAR2_ALLOW (\
+ ARM64_FEATURE_MASK(ID_AA64ISAR2_EL1_GPA3) | \
+ ARM64_FEATURE_MASK(ID_AA64ISAR2_EL1_APA3) \
)
u64 pvm_read_id_reg(const struct kvm_vcpu *vcpu, u32 id);
diff --git a/arch/arm64/kvm/hyp/include/nvhe/mem_protect.h b/arch/arm64/kvm/hyp/include/nvhe/mem_protect.h
index b58c910babaf..80e99836eac7 100644
--- a/arch/arm64/kvm/hyp/include/nvhe/mem_protect.h
+++ b/arch/arm64/kvm/hyp/include/nvhe/mem_protect.h
@@ -24,6 +24,11 @@ enum pkvm_page_state {
PKVM_PAGE_OWNED = 0ULL,
PKVM_PAGE_SHARED_OWNED = KVM_PGTABLE_PROT_SW0,
PKVM_PAGE_SHARED_BORROWED = KVM_PGTABLE_PROT_SW1,
+ __PKVM_PAGE_RESERVED = KVM_PGTABLE_PROT_SW0 |
+ KVM_PGTABLE_PROT_SW1,
+
+ /* Meta-states which aren't encoded directly in the PTE's SW bits */
+ PKVM_NOPAGE,
};
#define PKVM_PAGE_STATE_PROT_MASK (KVM_PGTABLE_PROT_SW0 | KVM_PGTABLE_PROT_SW1)
@@ -50,6 +55,7 @@ extern const u8 pkvm_hyp_id;
int __pkvm_prot_finalize(void);
int __pkvm_host_share_hyp(u64 pfn);
+int __pkvm_host_unshare_hyp(u64 pfn);
bool addr_is_memory(phys_addr_t phys);
int host_stage2_idmap_locked(phys_addr_t addr, u64 size, enum kvm_pgtable_prot prot);
diff --git a/arch/arm64/kvm/hyp/include/nvhe/mm.h b/arch/arm64/kvm/hyp/include/nvhe/mm.h
index c9a8f535212e..42d8eb9bfe72 100644
--- a/arch/arm64/kvm/hyp/include/nvhe/mm.h
+++ b/arch/arm64/kvm/hyp/include/nvhe/mm.h
@@ -10,13 +10,8 @@
#include <nvhe/memory.h>
#include <nvhe/spinlock.h>
-#define HYP_MEMBLOCK_REGIONS 128
-extern struct memblock_region kvm_nvhe_sym(hyp_memory)[];
-extern unsigned int kvm_nvhe_sym(hyp_memblock_nr);
extern struct kvm_pgtable pkvm_pgtable;
extern hyp_spinlock_t pkvm_pgd_lock;
-extern struct hyp_pool hpool;
-extern u64 __io_map_base;
int hyp_create_idmap(u32 hyp_va_bits);
int hyp_map_vectors(void);
@@ -24,8 +19,10 @@ int hyp_back_vmemmap(phys_addr_t phys, unsigned long size, phys_addr_t back);
int pkvm_cpu_set_vector(enum arm64_hyp_spectre_vector slot);
int pkvm_create_mappings(void *from, void *to, enum kvm_pgtable_prot prot);
int pkvm_create_mappings_locked(void *from, void *to, enum kvm_pgtable_prot prot);
-unsigned long __pkvm_create_private_mapping(phys_addr_t phys, size_t size,
- enum kvm_pgtable_prot prot);
+int __pkvm_create_private_mapping(phys_addr_t phys, size_t size,
+ enum kvm_pgtable_prot prot,
+ unsigned long *haddr);
+int pkvm_alloc_private_va_range(size_t size, unsigned long *haddr);
static inline void hyp_vmemmap_range(phys_addr_t phys, unsigned long size,
unsigned long *start, unsigned long *end)
@@ -39,58 +36,4 @@ static inline void hyp_vmemmap_range(phys_addr_t phys, unsigned long size,
*end = ALIGN(*end, PAGE_SIZE);
}
-static inline unsigned long __hyp_pgtable_max_pages(unsigned long nr_pages)
-{
- unsigned long total = 0, i;
-
- /* Provision the worst case scenario */
- for (i = 0; i < KVM_PGTABLE_MAX_LEVELS; i++) {
- nr_pages = DIV_ROUND_UP(nr_pages, PTRS_PER_PTE);
- total += nr_pages;
- }
-
- return total;
-}
-
-static inline unsigned long __hyp_pgtable_total_pages(void)
-{
- unsigned long res = 0, i;
-
- /* Cover all of memory with page-granularity */
- for (i = 0; i < kvm_nvhe_sym(hyp_memblock_nr); i++) {
- struct memblock_region *reg = &kvm_nvhe_sym(hyp_memory)[i];
- res += __hyp_pgtable_max_pages(reg->size >> PAGE_SHIFT);
- }
-
- return res;
-}
-
-static inline unsigned long hyp_s1_pgtable_pages(void)
-{
- unsigned long res;
-
- res = __hyp_pgtable_total_pages();
-
- /* Allow 1 GiB for private mappings */
- res += __hyp_pgtable_max_pages(SZ_1G >> PAGE_SHIFT);
-
- return res;
-}
-
-static inline unsigned long host_s2_pgtable_pages(void)
-{
- unsigned long res;
-
- /*
- * Include an extra 16 pages to safely upper-bound the worst case of
- * concatenated pgds.
- */
- res = __hyp_pgtable_total_pages() + 16;
-
- /* Allow 1 GiB for MMIO mappings */
- res += __hyp_pgtable_max_pages(SZ_1G >> PAGE_SHIFT);
-
- return res;
-}
-
#endif /* __KVM_HYP_MM_H */
diff --git a/arch/arm64/kvm/hyp/nvhe/Makefile b/arch/arm64/kvm/hyp/nvhe/Makefile
index 24b2c2425b38..be0a2bc3e20d 100644
--- a/arch/arm64/kvm/hyp/nvhe/Makefile
+++ b/arch/arm64/kvm/hyp/nvhe/Makefile
@@ -4,7 +4,15 @@
#
asflags-y := -D__KVM_NVHE_HYPERVISOR__ -D__DISABLE_EXPORTS
-ccflags-y := -D__KVM_NVHE_HYPERVISOR__ -D__DISABLE_EXPORTS
+
+# Tracepoint and MMIO logging symbols should not be visible at nVHE KVM as
+# there is no way to execute them and any such MMIO access from nVHE KVM
+# will explode instantly (Words of Marc Zyngier). So introduce a generic flag
+# __DISABLE_TRACE_MMIO__ to disable MMIO tracing for nVHE KVM.
+ccflags-y := -D__KVM_NVHE_HYPERVISOR__ -D__DISABLE_EXPORTS -D__DISABLE_TRACE_MMIO__
+ccflags-y += -fno-stack-protector \
+ -DDISABLE_BRANCH_PROFILING \
+ $(DISABLE_STACKLEAK_PLUGIN)
hostprogs := gen-hyprel
HOST_EXTRACFLAGS += -I$(objtree)/include
@@ -12,12 +20,13 @@ HOST_EXTRACFLAGS += -I$(objtree)/include
lib-objs := clear_page.o copy_page.o memcpy.o memset.o
lib-objs := $(addprefix ../../../lib/, $(lib-objs))
-obj-y := timer-sr.o sysreg-sr.o debug-sr.o switch.o tlb.o hyp-init.o host.o \
- hyp-main.o hyp-smp.o psci-relay.o early_alloc.o stub.o page_alloc.o \
- cache.o setup.o mm.o mem_protect.o sys_regs.o pkvm.o
-obj-y += ../vgic-v3-sr.o ../aarch32.o ../vgic-v2-cpuif-proxy.o ../entry.o \
+hyp-obj-y := timer-sr.o sysreg-sr.o debug-sr.o switch.o tlb.o hyp-init.o host.o \
+ hyp-main.o hyp-smp.o psci-relay.o early_alloc.o page_alloc.o \
+ cache.o setup.o mm.o mem_protect.o sys_regs.o pkvm.o stacktrace.o
+hyp-obj-y += ../vgic-v3-sr.o ../aarch32.o ../vgic-v2-cpuif-proxy.o ../entry.o \
../fpsimd.o ../hyp-entry.o ../exception.o ../pgtable.o
-obj-y += $(lib-objs)
+hyp-obj-$(CONFIG_DEBUG_LIST) += list_debug.o
+hyp-obj-y += $(lib-objs)
##
## Build rules for compiling nVHE hyp code
@@ -25,9 +34,9 @@ obj-y += $(lib-objs)
## file containing all nVHE hyp code and data.
##
-hyp-obj := $(patsubst %.o,%.nvhe.o,$(obj-y))
+hyp-obj := $(patsubst %.o,%.nvhe.o,$(hyp-obj-y))
obj-y := kvm_nvhe.o
-extra-y := $(hyp-obj) kvm_nvhe.tmp.o kvm_nvhe.rel.o hyp.lds hyp-reloc.S hyp-reloc.o
+targets += $(hyp-obj) kvm_nvhe.tmp.o kvm_nvhe.rel.o hyp.lds hyp-reloc.S hyp-reloc.o
# 1) Compile all source files to `.nvhe.o` object files. The file extension
# avoids file name clashes for files shared with VHE.
@@ -83,6 +92,10 @@ quiet_cmd_hypcopy = HYPCOPY $@
# Remove ftrace, Shadow Call Stack, and CFI CFLAGS.
# This is equivalent to the 'notrace', '__noscs', and '__nocfi' annotations.
KBUILD_CFLAGS := $(filter-out $(CC_FLAGS_FTRACE) $(CC_FLAGS_SCS) $(CC_FLAGS_CFI), $(KBUILD_CFLAGS))
+# Starting from 13.0.0 llvm emits SHT_REL section '.llvm.call-graph-profile'
+# when profile optimization is applied. gen-hyprel does not support SHT_REL and
+# causes a build failure. Remove profile optimization flags.
+KBUILD_CFLAGS := $(filter-out -fprofile-sample-use=% -fprofile-use=%, $(KBUILD_CFLAGS))
# KVM nVHE code is run at a different exception code with a different map, so
# compiler instrumentation that inserts callbacks or checks into the code may
diff --git a/arch/arm64/kvm/hyp/nvhe/cache.S b/arch/arm64/kvm/hyp/nvhe/cache.S
index 958734f4d6b0..0c367eb5f4e2 100644
--- a/arch/arm64/kvm/hyp/nvhe/cache.S
+++ b/arch/arm64/kvm/hyp/nvhe/cache.S
@@ -7,7 +7,8 @@
#include <asm/assembler.h>
#include <asm/alternative.h>
-SYM_FUNC_START_PI(dcache_clean_inval_poc)
+SYM_FUNC_START(__pi_dcache_clean_inval_poc)
dcache_by_line_op civac, sy, x0, x1, x2, x3
ret
-SYM_FUNC_END_PI(dcache_clean_inval_poc)
+SYM_FUNC_END(__pi_dcache_clean_inval_poc)
+SYM_FUNC_ALIAS(dcache_clean_inval_poc, __pi_dcache_clean_inval_poc)
diff --git a/arch/arm64/kvm/hyp/nvhe/debug-sr.c b/arch/arm64/kvm/hyp/nvhe/debug-sr.c
index df361d839902..e17455773b98 100644
--- a/arch/arm64/kvm/hyp/nvhe/debug-sr.c
+++ b/arch/arm64/kvm/hyp/nvhe/debug-sr.c
@@ -84,10 +84,10 @@ static void __debug_restore_trace(u64 trfcr_el1)
void __debug_save_host_buffers_nvhe(struct kvm_vcpu *vcpu)
{
/* Disable and flush SPE data generation */
- if (vcpu->arch.flags & KVM_ARM64_DEBUG_STATE_SAVE_SPE)
+ if (vcpu_get_flag(vcpu, DEBUG_STATE_SAVE_SPE))
__debug_save_spe(&vcpu->arch.host_debug_state.pmscr_el1);
/* Disable and flush Self-Hosted Trace generation */
- if (vcpu->arch.flags & KVM_ARM64_DEBUG_STATE_SAVE_TRBE)
+ if (vcpu_get_flag(vcpu, DEBUG_STATE_SAVE_TRBE))
__debug_save_trace(&vcpu->arch.host_debug_state.trfcr_el1);
}
@@ -98,9 +98,9 @@ void __debug_switch_to_guest(struct kvm_vcpu *vcpu)
void __debug_restore_host_buffers_nvhe(struct kvm_vcpu *vcpu)
{
- if (vcpu->arch.flags & KVM_ARM64_DEBUG_STATE_SAVE_SPE)
+ if (vcpu_get_flag(vcpu, DEBUG_STATE_SAVE_SPE))
__debug_restore_spe(vcpu->arch.host_debug_state.pmscr_el1);
- if (vcpu->arch.flags & KVM_ARM64_DEBUG_STATE_SAVE_TRBE)
+ if (vcpu_get_flag(vcpu, DEBUG_STATE_SAVE_TRBE))
__debug_restore_trace(vcpu->arch.host_debug_state.trfcr_el1);
}
diff --git a/arch/arm64/kvm/hyp/nvhe/early_alloc.c b/arch/arm64/kvm/hyp/nvhe/early_alloc.c
index 1306c430ab87..00de04153cc6 100644
--- a/arch/arm64/kvm/hyp/nvhe/early_alloc.c
+++ b/arch/arm64/kvm/hyp/nvhe/early_alloc.c
@@ -43,6 +43,9 @@ void *hyp_early_alloc_page(void *arg)
return hyp_early_alloc_contig(1);
}
+static void hyp_early_alloc_get_page(void *addr) { }
+static void hyp_early_alloc_put_page(void *addr) { }
+
void hyp_early_alloc_init(void *virt, unsigned long size)
{
base = cur = (unsigned long)virt;
@@ -51,4 +54,6 @@ void hyp_early_alloc_init(void *virt, unsigned long size)
hyp_early_alloc_mm_ops.zalloc_page = hyp_early_alloc_page;
hyp_early_alloc_mm_ops.phys_to_virt = hyp_phys_to_virt;
hyp_early_alloc_mm_ops.virt_to_phys = hyp_virt_to_phys;
+ hyp_early_alloc_mm_ops.get_page = hyp_early_alloc_get_page;
+ hyp_early_alloc_mm_ops.put_page = hyp_early_alloc_put_page;
}
diff --git a/arch/arm64/kvm/hyp/nvhe/host.S b/arch/arm64/kvm/hyp/nvhe/host.S
index 3d613e721a75..b6c0188c4b35 100644
--- a/arch/arm64/kvm/hyp/nvhe/host.S
+++ b/arch/arm64/kvm/hyp/nvhe/host.S
@@ -80,7 +80,7 @@ SYM_FUNC_START(__hyp_do_panic)
mov lr, #(PSR_F_BIT | PSR_I_BIT | PSR_A_BIT | PSR_D_BIT |\
PSR_MODE_EL1h)
msr spsr_el2, lr
- ldr lr, =nvhe_hyp_panic_handler
+ adr_l lr, nvhe_hyp_panic_handler
hyp_kimg_va lr, x6
msr elr_el2, lr
@@ -125,13 +125,11 @@ alternative_else_nop_endif
add sp, sp, #16
/*
* Compute the idmap address of __kvm_handle_stub_hvc and
- * jump there. Since we use kimage_voffset, do not use the
- * HYP VA for __kvm_handle_stub_hvc, but the kernel VA instead
- * (by loading it from the constant pool).
+ * jump there.
*
* Preserve x0-x4, which may contain stub parameters.
*/
- ldr x5, =__kvm_handle_stub_hvc
+ adr_l x5, __kvm_handle_stub_hvc
hyp_pa x5, x6
br x5
SYM_FUNC_END(__host_hvc)
@@ -153,6 +151,18 @@ SYM_FUNC_END(__host_hvc)
.macro invalid_host_el2_vect
.align 7
+
+ /*
+ * Test whether the SP has overflowed, without corrupting a GPR.
+ * nVHE hypervisor stacks are aligned so that the PAGE_SHIFT bit
+ * of SP should always be 1.
+ */
+ add sp, sp, x0 // sp' = sp + x0
+ sub x0, sp, x0 // x0' = sp' - x0 = (sp + x0) - x0 = sp
+ tbz x0, #PAGE_SHIFT, .L__hyp_sp_overflow\@
+ sub x0, sp, x0 // x0'' = sp' - x0' = (sp + x0) - sp = x0
+ sub sp, sp, x0 // sp'' = sp' - x0 = (sp + x0) - x0 = sp
+
/* If a guest is loaded, panic out of it. */
stp x0, x1, [sp, #-16]!
get_loaded_vcpu x0, x1
@@ -165,6 +175,13 @@ SYM_FUNC_END(__host_hvc)
* been partially clobbered by __host_enter.
*/
b hyp_panic
+
+.L__hyp_sp_overflow\@:
+ /* Switch to the overflow stack */
+ adr_this_cpu sp, overflow_stack + OVERFLOW_STACK_SIZE, x0
+
+ b hyp_panic_bad_stack
+ ASM_BUG()
.endm
.macro invalid_host_el1_vect
@@ -198,15 +215,15 @@ SYM_CODE_START(__kvm_hyp_host_vector)
invalid_host_el2_vect // FIQ EL2h
invalid_host_el2_vect // Error EL2h
- host_el1_sync_vect // Synchronous 64-bit EL1
- invalid_host_el1_vect // IRQ 64-bit EL1
- invalid_host_el1_vect // FIQ 64-bit EL1
- invalid_host_el1_vect // Error 64-bit EL1
+ host_el1_sync_vect // Synchronous 64-bit EL1/EL0
+ invalid_host_el1_vect // IRQ 64-bit EL1/EL0
+ invalid_host_el1_vect // FIQ 64-bit EL1/EL0
+ invalid_host_el1_vect // Error 64-bit EL1/EL0
- invalid_host_el1_vect // Synchronous 32-bit EL1
- invalid_host_el1_vect // IRQ 32-bit EL1
- invalid_host_el1_vect // FIQ 32-bit EL1
- invalid_host_el1_vect // Error 32-bit EL1
+ host_el1_sync_vect // Synchronous 32-bit EL1/EL0
+ invalid_host_el1_vect // IRQ 32-bit EL1/EL0
+ invalid_host_el1_vect // FIQ 32-bit EL1/EL0
+ invalid_host_el1_vect // Error 32-bit EL1/EL0
SYM_CODE_END(__kvm_hyp_host_vector)
/*
diff --git a/arch/arm64/kvm/hyp/nvhe/hyp-main.c b/arch/arm64/kvm/hyp/nvhe/hyp-main.c
index b096bf009144..3cea4b6ac23e 100644
--- a/arch/arm64/kvm/hyp/nvhe/hyp-main.c
+++ b/arch/arm64/kvm/hyp/nvhe/hyp-main.c
@@ -147,13 +147,36 @@ static void handle___pkvm_host_share_hyp(struct kvm_cpu_context *host_ctxt)
cpu_reg(host_ctxt, 1) = __pkvm_host_share_hyp(pfn);
}
+static void handle___pkvm_host_unshare_hyp(struct kvm_cpu_context *host_ctxt)
+{
+ DECLARE_REG(u64, pfn, host_ctxt, 1);
+
+ cpu_reg(host_ctxt, 1) = __pkvm_host_unshare_hyp(pfn);
+}
+
static void handle___pkvm_create_private_mapping(struct kvm_cpu_context *host_ctxt)
{
DECLARE_REG(phys_addr_t, phys, host_ctxt, 1);
DECLARE_REG(size_t, size, host_ctxt, 2);
DECLARE_REG(enum kvm_pgtable_prot, prot, host_ctxt, 3);
- cpu_reg(host_ctxt, 1) = __pkvm_create_private_mapping(phys, size, prot);
+ /*
+ * __pkvm_create_private_mapping() populates a pointer with the
+ * hypervisor start address of the allocation.
+ *
+ * However, handle___pkvm_create_private_mapping() hypercall crosses the
+ * EL1/EL2 boundary so the pointer would not be valid in this context.
+ *
+ * Instead pass the allocation address as the return value (or return
+ * ERR_PTR() on failure).
+ */
+ unsigned long haddr;
+ int err = __pkvm_create_private_mapping(phys, size, prot, &haddr);
+
+ if (err)
+ haddr = (unsigned long)ERR_PTR(err);
+
+ cpu_reg(host_ctxt, 1) = haddr;
}
static void handle___pkvm_prot_finalize(struct kvm_cpu_context *host_ctxt)
@@ -184,6 +207,7 @@ static const hcall_t host_hcall[] = {
HANDLE_FUNC(__pkvm_prot_finalize),
HANDLE_FUNC(__pkvm_host_share_hyp),
+ HANDLE_FUNC(__pkvm_host_unshare_hyp),
HANDLE_FUNC(__kvm_adjust_pc),
HANDLE_FUNC(__kvm_vcpu_run),
HANDLE_FUNC(__kvm_flush_vm_context),
diff --git a/arch/arm64/kvm/hyp/nvhe/list_debug.c b/arch/arm64/kvm/hyp/nvhe/list_debug.c
new file mode 100644
index 000000000000..d68abd7ea124
--- /dev/null
+++ b/arch/arm64/kvm/hyp/nvhe/list_debug.c
@@ -0,0 +1,54 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Copyright (C) 2022 - Google LLC
+ * Author: Keir Fraser <keirf@google.com>
+ */
+
+#include <linux/list.h>
+#include <linux/bug.h>
+
+static inline __must_check bool nvhe_check_data_corruption(bool v)
+{
+ return v;
+}
+
+#define NVHE_CHECK_DATA_CORRUPTION(condition) \
+ nvhe_check_data_corruption(({ \
+ bool corruption = unlikely(condition); \
+ if (corruption) { \
+ if (IS_ENABLED(CONFIG_BUG_ON_DATA_CORRUPTION)) { \
+ BUG_ON(1); \
+ } else \
+ WARN_ON(1); \
+ } \
+ corruption; \
+ }))
+
+/* The predicates checked here are taken from lib/list_debug.c. */
+
+bool __list_add_valid(struct list_head *new, struct list_head *prev,
+ struct list_head *next)
+{
+ if (NVHE_CHECK_DATA_CORRUPTION(next->prev != prev) ||
+ NVHE_CHECK_DATA_CORRUPTION(prev->next != next) ||
+ NVHE_CHECK_DATA_CORRUPTION(new == prev || new == next))
+ return false;
+
+ return true;
+}
+
+bool __list_del_entry_valid(struct list_head *entry)
+{
+ struct list_head *prev, *next;
+
+ prev = entry->prev;
+ next = entry->next;
+
+ if (NVHE_CHECK_DATA_CORRUPTION(next == LIST_POISON1) ||
+ NVHE_CHECK_DATA_CORRUPTION(prev == LIST_POISON2) ||
+ NVHE_CHECK_DATA_CORRUPTION(prev->next != entry) ||
+ NVHE_CHECK_DATA_CORRUPTION(next->prev != entry))
+ return false;
+
+ return true;
+}
diff --git a/arch/arm64/kvm/hyp/nvhe/mem_protect.c b/arch/arm64/kvm/hyp/nvhe/mem_protect.c
index c1a90dd022b8..07f9dc9848ef 100644
--- a/arch/arm64/kvm/hyp/nvhe/mem_protect.c
+++ b/arch/arm64/kvm/hyp/nvhe/mem_protect.c
@@ -9,6 +9,7 @@
#include <asm/kvm_hyp.h>
#include <asm/kvm_mmu.h>
#include <asm/kvm_pgtable.h>
+#include <asm/kvm_pkvm.h>
#include <asm/stage2_pgtable.h>
#include <hyp/fault.h>
@@ -27,6 +28,26 @@ static struct hyp_pool host_s2_pool;
const u8 pkvm_hyp_id = 1;
+static void host_lock_component(void)
+{
+ hyp_spin_lock(&host_kvm.lock);
+}
+
+static void host_unlock_component(void)
+{
+ hyp_spin_unlock(&host_kvm.lock);
+}
+
+static void hyp_lock_component(void)
+{
+ hyp_spin_lock(&pkvm_pgd_lock);
+}
+
+static void hyp_unlock_component(void)
+{
+ hyp_spin_unlock(&pkvm_pgd_lock);
+}
+
static void *host_s2_zalloc_pages_exact(size_t size)
{
void *addr = hyp_alloc_pages(&host_s2_pool, get_order(size));
@@ -103,22 +124,21 @@ int kvm_host_prepare_stage2(void *pgt_pool_base)
prepare_host_vtcr();
hyp_spin_lock_init(&host_kvm.lock);
+ mmu->arch = &host_kvm.arch;
ret = prepare_s2_pool(pgt_pool_base);
if (ret)
return ret;
- ret = __kvm_pgtable_stage2_init(&host_kvm.pgt, &host_kvm.arch,
+ ret = __kvm_pgtable_stage2_init(&host_kvm.pgt, mmu,
&host_kvm.mm_ops, KVM_HOST_S2_FLAGS,
host_stage2_force_pte_cb);
if (ret)
return ret;
mmu->pgd_phys = __hyp_pa(host_kvm.pgt.pgd);
- mmu->arch = &host_kvm.arch;
mmu->pgt = &host_kvm.pgt;
- WRITE_ONCE(mmu->vmid.vmid_gen, 0);
- WRITE_ONCE(mmu->vmid.vmid, 0);
+ atomic64_set(&mmu->vmid.id, 0);
return 0;
}
@@ -294,15 +314,11 @@ static int host_stage2_adjust_range(u64 addr, struct kvm_mem_range *range)
int host_stage2_idmap_locked(phys_addr_t addr, u64 size,
enum kvm_pgtable_prot prot)
{
- hyp_assert_lock_held(&host_kvm.lock);
-
return host_stage2_try(__host_stage2_idmap, addr, addr + size, prot);
}
int host_stage2_set_owner_locked(phys_addr_t addr, u64 size, u8 owner_id)
{
- hyp_assert_lock_held(&host_kvm.lock);
-
return host_stage2_try(kvm_pgtable_stage2_set_owner, &host_kvm.pgt,
addr, size, &host_s2_pool, owner_id);
}
@@ -338,116 +354,446 @@ static int host_stage2_idmap(u64 addr)
prot = is_memory ? PKVM_HOST_MEM_PROT : PKVM_HOST_MMIO_PROT;
- hyp_spin_lock(&host_kvm.lock);
+ host_lock_component();
ret = host_stage2_adjust_range(addr, &range);
if (ret)
goto unlock;
ret = host_stage2_idmap_locked(range.start, range.end - range.start, prot);
unlock:
- hyp_spin_unlock(&host_kvm.lock);
+ host_unlock_component();
return ret;
}
-static inline bool check_prot(enum kvm_pgtable_prot prot,
- enum kvm_pgtable_prot required,
- enum kvm_pgtable_prot denied)
+void handle_host_mem_abort(struct kvm_cpu_context *host_ctxt)
{
- return (prot & (required | denied)) == required;
+ struct kvm_vcpu_fault_info fault;
+ u64 esr, addr;
+ int ret = 0;
+
+ esr = read_sysreg_el2(SYS_ESR);
+ BUG_ON(!__get_fault_info(esr, &fault));
+
+ addr = (fault.hpfar_el2 & HPFAR_MASK) << 8;
+ ret = host_stage2_idmap(addr);
+ BUG_ON(ret && ret != -EAGAIN);
}
-int __pkvm_host_share_hyp(u64 pfn)
+/* This corresponds to locking order */
+enum pkvm_component_id {
+ PKVM_ID_HOST,
+ PKVM_ID_HYP,
+};
+
+struct pkvm_mem_transition {
+ u64 nr_pages;
+
+ struct {
+ enum pkvm_component_id id;
+ /* Address in the initiator's address space */
+ u64 addr;
+
+ union {
+ struct {
+ /* Address in the completer's address space */
+ u64 completer_addr;
+ } host;
+ };
+ } initiator;
+
+ struct {
+ enum pkvm_component_id id;
+ } completer;
+};
+
+struct pkvm_mem_share {
+ const struct pkvm_mem_transition tx;
+ const enum kvm_pgtable_prot completer_prot;
+};
+
+struct check_walk_data {
+ enum pkvm_page_state desired;
+ enum pkvm_page_state (*get_page_state)(kvm_pte_t pte);
+};
+
+static int __check_page_state_visitor(u64 addr, u64 end, u32 level,
+ kvm_pte_t *ptep,
+ enum kvm_pgtable_walk_flags flag,
+ void * const arg)
{
- phys_addr_t addr = hyp_pfn_to_phys(pfn);
- enum kvm_pgtable_prot prot, cur;
- void *virt = __hyp_va(addr);
- enum pkvm_page_state state;
- kvm_pte_t pte;
- int ret;
+ struct check_walk_data *d = arg;
+ kvm_pte_t pte = *ptep;
- if (!addr_is_memory(addr))
+ if (kvm_pte_valid(pte) && !addr_is_memory(kvm_pte_to_phys(pte)))
return -EINVAL;
- hyp_spin_lock(&host_kvm.lock);
- hyp_spin_lock(&pkvm_pgd_lock);
+ return d->get_page_state(pte) == d->desired ? 0 : -EPERM;
+}
+
+static int check_page_state_range(struct kvm_pgtable *pgt, u64 addr, u64 size,
+ struct check_walk_data *data)
+{
+ struct kvm_pgtable_walker walker = {
+ .cb = __check_page_state_visitor,
+ .arg = data,
+ .flags = KVM_PGTABLE_WALK_LEAF,
+ };
+
+ return kvm_pgtable_walk(pgt, addr, size, &walker);
+}
+
+static enum pkvm_page_state host_get_page_state(kvm_pte_t pte)
+{
+ if (!kvm_pte_valid(pte) && pte)
+ return PKVM_NOPAGE;
+
+ return pkvm_getstate(kvm_pgtable_stage2_pte_prot(pte));
+}
+
+static int __host_check_page_state_range(u64 addr, u64 size,
+ enum pkvm_page_state state)
+{
+ struct check_walk_data d = {
+ .desired = state,
+ .get_page_state = host_get_page_state,
+ };
+
+ hyp_assert_lock_held(&host_kvm.lock);
+ return check_page_state_range(&host_kvm.pgt, addr, size, &d);
+}
+
+static int __host_set_page_state_range(u64 addr, u64 size,
+ enum pkvm_page_state state)
+{
+ enum kvm_pgtable_prot prot = pkvm_mkstate(PKVM_HOST_MEM_PROT, state);
+
+ return host_stage2_idmap_locked(addr, size, prot);
+}
+
+static int host_request_owned_transition(u64 *completer_addr,
+ const struct pkvm_mem_transition *tx)
+{
+ u64 size = tx->nr_pages * PAGE_SIZE;
+ u64 addr = tx->initiator.addr;
+
+ *completer_addr = tx->initiator.host.completer_addr;
+ return __host_check_page_state_range(addr, size, PKVM_PAGE_OWNED);
+}
+
+static int host_request_unshare(u64 *completer_addr,
+ const struct pkvm_mem_transition *tx)
+{
+ u64 size = tx->nr_pages * PAGE_SIZE;
+ u64 addr = tx->initiator.addr;
+
+ *completer_addr = tx->initiator.host.completer_addr;
+ return __host_check_page_state_range(addr, size, PKVM_PAGE_SHARED_OWNED);
+}
+
+static int host_initiate_share(u64 *completer_addr,
+ const struct pkvm_mem_transition *tx)
+{
+ u64 size = tx->nr_pages * PAGE_SIZE;
+ u64 addr = tx->initiator.addr;
+
+ *completer_addr = tx->initiator.host.completer_addr;
+ return __host_set_page_state_range(addr, size, PKVM_PAGE_SHARED_OWNED);
+}
+
+static int host_initiate_unshare(u64 *completer_addr,
+ const struct pkvm_mem_transition *tx)
+{
+ u64 size = tx->nr_pages * PAGE_SIZE;
+ u64 addr = tx->initiator.addr;
+
+ *completer_addr = tx->initiator.host.completer_addr;
+ return __host_set_page_state_range(addr, size, PKVM_PAGE_OWNED);
+}
+
+static enum pkvm_page_state hyp_get_page_state(kvm_pte_t pte)
+{
+ if (!kvm_pte_valid(pte))
+ return PKVM_NOPAGE;
+
+ return pkvm_getstate(kvm_pgtable_hyp_pte_prot(pte));
+}
+
+static int __hyp_check_page_state_range(u64 addr, u64 size,
+ enum pkvm_page_state state)
+{
+ struct check_walk_data d = {
+ .desired = state,
+ .get_page_state = hyp_get_page_state,
+ };
+
+ hyp_assert_lock_held(&pkvm_pgd_lock);
+ return check_page_state_range(&pkvm_pgtable, addr, size, &d);
+}
+
+static bool __hyp_ack_skip_pgtable_check(const struct pkvm_mem_transition *tx)
+{
+ return !(IS_ENABLED(CONFIG_NVHE_EL2_DEBUG) ||
+ tx->initiator.id != PKVM_ID_HOST);
+}
+
+static int hyp_ack_share(u64 addr, const struct pkvm_mem_transition *tx,
+ enum kvm_pgtable_prot perms)
+{
+ u64 size = tx->nr_pages * PAGE_SIZE;
+
+ if (perms != PAGE_HYP)
+ return -EPERM;
+
+ if (__hyp_ack_skip_pgtable_check(tx))
+ return 0;
+
+ return __hyp_check_page_state_range(addr, size, PKVM_NOPAGE);
+}
+
+static int hyp_ack_unshare(u64 addr, const struct pkvm_mem_transition *tx)
+{
+ u64 size = tx->nr_pages * PAGE_SIZE;
+
+ if (__hyp_ack_skip_pgtable_check(tx))
+ return 0;
+
+ return __hyp_check_page_state_range(addr, size,
+ PKVM_PAGE_SHARED_BORROWED);
+}
+
+static int hyp_complete_share(u64 addr, const struct pkvm_mem_transition *tx,
+ enum kvm_pgtable_prot perms)
+{
+ void *start = (void *)addr, *end = start + (tx->nr_pages * PAGE_SIZE);
+ enum kvm_pgtable_prot prot;
+
+ prot = pkvm_mkstate(perms, PKVM_PAGE_SHARED_BORROWED);
+ return pkvm_create_mappings_locked(start, end, prot);
+}
+
+static int hyp_complete_unshare(u64 addr, const struct pkvm_mem_transition *tx)
+{
+ u64 size = tx->nr_pages * PAGE_SIZE;
+ int ret = kvm_pgtable_hyp_unmap(&pkvm_pgtable, addr, size);
+
+ return (ret != size) ? -EFAULT : 0;
+}
+
+static int check_share(struct pkvm_mem_share *share)
+{
+ const struct pkvm_mem_transition *tx = &share->tx;
+ u64 completer_addr;
+ int ret;
+
+ switch (tx->initiator.id) {
+ case PKVM_ID_HOST:
+ ret = host_request_owned_transition(&completer_addr, tx);
+ break;
+ default:
+ ret = -EINVAL;
+ }
- ret = kvm_pgtable_get_leaf(&host_kvm.pgt, addr, &pte, NULL);
if (ret)
- goto unlock;
- if (!pte)
- goto map_shared;
+ return ret;
- /*
- * Check attributes in the host stage-2 PTE. We need the page to be:
- * - mapped RWX as we're sharing memory;
- * - not borrowed, as that implies absence of ownership.
- * Otherwise, we can't let it got through
- */
- cur = kvm_pgtable_stage2_pte_prot(pte);
- prot = pkvm_mkstate(0, PKVM_PAGE_SHARED_BORROWED);
- if (!check_prot(cur, PKVM_HOST_MEM_PROT, prot)) {
- ret = -EPERM;
- goto unlock;
+ switch (tx->completer.id) {
+ case PKVM_ID_HYP:
+ ret = hyp_ack_share(completer_addr, tx, share->completer_prot);
+ break;
+ default:
+ ret = -EINVAL;
}
- state = pkvm_getstate(cur);
- if (state == PKVM_PAGE_OWNED)
- goto map_shared;
+ return ret;
+}
- /*
- * Tolerate double-sharing the same page, but this requires
- * cross-checking the hypervisor stage-1.
- */
- if (state != PKVM_PAGE_SHARED_OWNED) {
- ret = -EPERM;
- goto unlock;
+static int __do_share(struct pkvm_mem_share *share)
+{
+ const struct pkvm_mem_transition *tx = &share->tx;
+ u64 completer_addr;
+ int ret;
+
+ switch (tx->initiator.id) {
+ case PKVM_ID_HOST:
+ ret = host_initiate_share(&completer_addr, tx);
+ break;
+ default:
+ ret = -EINVAL;
}
- ret = kvm_pgtable_get_leaf(&pkvm_pgtable, (u64)virt, &pte, NULL);
if (ret)
- goto unlock;
+ return ret;
- /*
- * If the page has been shared with the hypervisor, it must be
- * already mapped as SHARED_BORROWED in its stage-1.
- */
- cur = kvm_pgtable_hyp_pte_prot(pte);
- prot = pkvm_mkstate(PAGE_HYP, PKVM_PAGE_SHARED_BORROWED);
- if (!check_prot(cur, prot, ~prot))
- ret = -EPERM;
- goto unlock;
+ switch (tx->completer.id) {
+ case PKVM_ID_HYP:
+ ret = hyp_complete_share(completer_addr, tx, share->completer_prot);
+ break;
+ default:
+ ret = -EINVAL;
+ }
-map_shared:
- /*
- * If the page is not yet shared, adjust mappings in both page-tables
- * while both locks are held.
- */
- prot = pkvm_mkstate(PAGE_HYP, PKVM_PAGE_SHARED_BORROWED);
- ret = pkvm_create_mappings_locked(virt, virt + PAGE_SIZE, prot);
- BUG_ON(ret);
+ return ret;
+}
+
+/*
+ * do_share():
+ *
+ * The page owner grants access to another component with a given set
+ * of permissions.
+ *
+ * Initiator: OWNED => SHARED_OWNED
+ * Completer: NOPAGE => SHARED_BORROWED
+ */
+static int do_share(struct pkvm_mem_share *share)
+{
+ int ret;
- prot = pkvm_mkstate(PKVM_HOST_MEM_PROT, PKVM_PAGE_SHARED_OWNED);
- ret = host_stage2_idmap_locked(addr, PAGE_SIZE, prot);
- BUG_ON(ret);
+ ret = check_share(share);
+ if (ret)
+ return ret;
-unlock:
- hyp_spin_unlock(&pkvm_pgd_lock);
- hyp_spin_unlock(&host_kvm.lock);
+ return WARN_ON(__do_share(share));
+}
+
+static int check_unshare(struct pkvm_mem_share *share)
+{
+ const struct pkvm_mem_transition *tx = &share->tx;
+ u64 completer_addr;
+ int ret;
+
+ switch (tx->initiator.id) {
+ case PKVM_ID_HOST:
+ ret = host_request_unshare(&completer_addr, tx);
+ break;
+ default:
+ ret = -EINVAL;
+ }
+
+ if (ret)
+ return ret;
+
+ switch (tx->completer.id) {
+ case PKVM_ID_HYP:
+ ret = hyp_ack_unshare(completer_addr, tx);
+ break;
+ default:
+ ret = -EINVAL;
+ }
return ret;
}
-void handle_host_mem_abort(struct kvm_cpu_context *host_ctxt)
+static int __do_unshare(struct pkvm_mem_share *share)
{
- struct kvm_vcpu_fault_info fault;
- u64 esr, addr;
- int ret = 0;
+ const struct pkvm_mem_transition *tx = &share->tx;
+ u64 completer_addr;
+ int ret;
- esr = read_sysreg_el2(SYS_ESR);
- BUG_ON(!__get_fault_info(esr, &fault));
+ switch (tx->initiator.id) {
+ case PKVM_ID_HOST:
+ ret = host_initiate_unshare(&completer_addr, tx);
+ break;
+ default:
+ ret = -EINVAL;
+ }
- addr = (fault.hpfar_el2 & HPFAR_MASK) << 8;
- ret = host_stage2_idmap(addr);
- BUG_ON(ret && ret != -EAGAIN);
+ if (ret)
+ return ret;
+
+ switch (tx->completer.id) {
+ case PKVM_ID_HYP:
+ ret = hyp_complete_unshare(completer_addr, tx);
+ break;
+ default:
+ ret = -EINVAL;
+ }
+
+ return ret;
+}
+
+/*
+ * do_unshare():
+ *
+ * The page owner revokes access from another component for a range of
+ * pages which were previously shared using do_share().
+ *
+ * Initiator: SHARED_OWNED => OWNED
+ * Completer: SHARED_BORROWED => NOPAGE
+ */
+static int do_unshare(struct pkvm_mem_share *share)
+{
+ int ret;
+
+ ret = check_unshare(share);
+ if (ret)
+ return ret;
+
+ return WARN_ON(__do_unshare(share));
+}
+
+int __pkvm_host_share_hyp(u64 pfn)
+{
+ int ret;
+ u64 host_addr = hyp_pfn_to_phys(pfn);
+ u64 hyp_addr = (u64)__hyp_va(host_addr);
+ struct pkvm_mem_share share = {
+ .tx = {
+ .nr_pages = 1,
+ .initiator = {
+ .id = PKVM_ID_HOST,
+ .addr = host_addr,
+ .host = {
+ .completer_addr = hyp_addr,
+ },
+ },
+ .completer = {
+ .id = PKVM_ID_HYP,
+ },
+ },
+ .completer_prot = PAGE_HYP,
+ };
+
+ host_lock_component();
+ hyp_lock_component();
+
+ ret = do_share(&share);
+
+ hyp_unlock_component();
+ host_unlock_component();
+
+ return ret;
+}
+
+int __pkvm_host_unshare_hyp(u64 pfn)
+{
+ int ret;
+ u64 host_addr = hyp_pfn_to_phys(pfn);
+ u64 hyp_addr = (u64)__hyp_va(host_addr);
+ struct pkvm_mem_share share = {
+ .tx = {
+ .nr_pages = 1,
+ .initiator = {
+ .id = PKVM_ID_HOST,
+ .addr = host_addr,
+ .host = {
+ .completer_addr = hyp_addr,
+ },
+ },
+ .completer = {
+ .id = PKVM_ID_HYP,
+ },
+ },
+ .completer_prot = PAGE_HYP,
+ };
+
+ host_lock_component();
+ hyp_lock_component();
+
+ ret = do_unshare(&share);
+
+ hyp_unlock_component();
+ host_unlock_component();
+
+ return ret;
}
diff --git a/arch/arm64/kvm/hyp/nvhe/mm.c b/arch/arm64/kvm/hyp/nvhe/mm.c
index 2fabeceb889a..96193cb31a39 100644
--- a/arch/arm64/kvm/hyp/nvhe/mm.c
+++ b/arch/arm64/kvm/hyp/nvhe/mm.c
@@ -8,6 +8,7 @@
#include <asm/kvm_hyp.h>
#include <asm/kvm_mmu.h>
#include <asm/kvm_pgtable.h>
+#include <asm/kvm_pkvm.h>
#include <asm/spectre.h>
#include <nvhe/early_alloc.h>
@@ -18,11 +19,12 @@
struct kvm_pgtable pkvm_pgtable;
hyp_spinlock_t pkvm_pgd_lock;
-u64 __io_map_base;
struct memblock_region hyp_memory[HYP_MEMBLOCK_REGIONS];
unsigned int hyp_memblock_nr;
+static u64 __io_map_base;
+
static int __pkvm_create_mappings(unsigned long start, unsigned long size,
unsigned long phys, enum kvm_pgtable_prot prot)
{
@@ -35,36 +37,60 @@ static int __pkvm_create_mappings(unsigned long start, unsigned long size,
return err;
}
-unsigned long __pkvm_create_private_mapping(phys_addr_t phys, size_t size,
- enum kvm_pgtable_prot prot)
+/**
+ * pkvm_alloc_private_va_range - Allocates a private VA range.
+ * @size: The size of the VA range to reserve.
+ * @haddr: The hypervisor virtual start address of the allocation.
+ *
+ * The private virtual address (VA) range is allocated above __io_map_base
+ * and aligned based on the order of @size.
+ *
+ * Return: 0 on success or negative error code on failure.
+ */
+int pkvm_alloc_private_va_range(size_t size, unsigned long *haddr)
{
- unsigned long addr;
- int err;
+ unsigned long base, addr;
+ int ret = 0;
hyp_spin_lock(&pkvm_pgd_lock);
- size = PAGE_ALIGN(size + offset_in_page(phys));
- addr = __io_map_base;
- __io_map_base += size;
+ /* Align the allocation based on the order of its size */
+ addr = ALIGN(__io_map_base, PAGE_SIZE << get_order(size));
- /* Are we overflowing on the vmemmap ? */
- if (__io_map_base > __hyp_vmemmap) {
- __io_map_base -= size;
- addr = (unsigned long)ERR_PTR(-ENOMEM);
- goto out;
- }
+ /* The allocated size is always a multiple of PAGE_SIZE */
+ base = addr + PAGE_ALIGN(size);
- err = kvm_pgtable_hyp_map(&pkvm_pgtable, addr, size, phys, prot);
- if (err) {
- addr = (unsigned long)ERR_PTR(err);
- goto out;
+ /* Are we overflowing on the vmemmap ? */
+ if (!addr || base > __hyp_vmemmap)
+ ret = -ENOMEM;
+ else {
+ __io_map_base = base;
+ *haddr = addr;
}
- addr = addr + offset_in_page(phys);
-out:
hyp_spin_unlock(&pkvm_pgd_lock);
- return addr;
+ return ret;
+}
+
+int __pkvm_create_private_mapping(phys_addr_t phys, size_t size,
+ enum kvm_pgtable_prot prot,
+ unsigned long *haddr)
+{
+ unsigned long addr;
+ int err;
+
+ size = PAGE_ALIGN(size + offset_in_page(phys));
+ err = pkvm_alloc_private_va_range(size, &addr);
+ if (err)
+ return err;
+
+ err = __pkvm_create_mappings(addr, size, phys, prot);
+ if (err)
+ return err;
+
+ *haddr = addr + offset_in_page(phys);
+ return err;
}
int pkvm_create_mappings_locked(void *from, void *to, enum kvm_pgtable_prot prot)
@@ -144,19 +170,21 @@ int pkvm_cpu_set_vector(enum arm64_hyp_spectre_vector slot)
int hyp_map_vectors(void)
{
phys_addr_t phys;
- void *bp_base;
+ unsigned long bp_base;
+ int ret;
- if (!cpus_have_const_cap(ARM64_SPECTRE_V3A))
+ if (!kvm_system_needs_idmapped_vectors()) {
+ __hyp_bp_vect_base = __bp_harden_hyp_vecs;
return 0;
+ }
phys = __hyp_pa(__bp_harden_hyp_vecs);
- bp_base = (void *)__pkvm_create_private_mapping(phys,
- __BP_HARDEN_HYP_VECS_SZ,
- PAGE_HYP_EXEC);
- if (IS_ERR_OR_NULL(bp_base))
- return PTR_ERR(bp_base);
+ ret = __pkvm_create_private_mapping(phys, __BP_HARDEN_HYP_VECS_SZ,
+ PAGE_HYP_EXEC, &bp_base);
+ if (ret)
+ return ret;
- __hyp_bp_vect_base = bp_base;
+ __hyp_bp_vect_base = (void *)bp_base;
return 0;
}
diff --git a/arch/arm64/kvm/hyp/nvhe/page_alloc.c b/arch/arm64/kvm/hyp/nvhe/page_alloc.c
index 0bd7701ad1df..d40f0b30b534 100644
--- a/arch/arm64/kvm/hyp/nvhe/page_alloc.c
+++ b/arch/arm64/kvm/hyp/nvhe/page_alloc.c
@@ -102,7 +102,7 @@ static void __hyp_attach_page(struct hyp_pool *pool,
* Only the first struct hyp_page of a high-order page (otherwise known
* as the 'head') should have p->order set. The non-head pages should
* have p->order = HYP_NO_ORDER. Here @p may no longer be the head
- * after coallescing, so make sure to mark it HYP_NO_ORDER proactively.
+ * after coalescing, so make sure to mark it HYP_NO_ORDER proactively.
*/
p->order = HYP_NO_ORDER;
for (; (order + 1) < pool->max_order; order++) {
@@ -110,7 +110,7 @@ static void __hyp_attach_page(struct hyp_pool *pool,
if (!buddy)
break;
- /* Take the buddy out of its list, and coallesce with @p */
+ /* Take the buddy out of its list, and coalesce with @p */
page_remove_from_list(buddy);
buddy->order = HYP_NO_ORDER;
p = min(p, buddy);
@@ -241,7 +241,7 @@ int hyp_pool_init(struct hyp_pool *pool, u64 pfn, unsigned int nr_pages,
int i;
hyp_spin_lock_init(&pool->lock);
- pool->max_order = min(MAX_ORDER, get_order(nr_pages << PAGE_SHIFT));
+ pool->max_order = min(MAX_ORDER, get_order((nr_pages + 1) << PAGE_SHIFT));
for (i = 0; i < pool->max_order; i++)
INIT_LIST_HEAD(&pool->free_area[i]);
pool->range_start = phys;
diff --git a/arch/arm64/kvm/hyp/nvhe/pkvm.c b/arch/arm64/kvm/hyp/nvhe/pkvm.c
index 99c8d8b73e70..85d3b7ae720f 100644
--- a/arch/arm64/kvm/hyp/nvhe/pkvm.c
+++ b/arch/arm64/kvm/hyp/nvhe/pkvm.c
@@ -20,35 +20,35 @@ static void pvm_init_traps_aa64pfr0(struct kvm_vcpu *vcpu)
u64 cptr_set = 0;
/* Protected KVM does not support AArch32 guests. */
- BUILD_BUG_ON(FIELD_GET(ARM64_FEATURE_MASK(ID_AA64PFR0_EL0),
- PVM_ID_AA64PFR0_RESTRICT_UNSIGNED) != ID_AA64PFR0_ELx_64BIT_ONLY);
- BUILD_BUG_ON(FIELD_GET(ARM64_FEATURE_MASK(ID_AA64PFR0_EL1),
- PVM_ID_AA64PFR0_RESTRICT_UNSIGNED) != ID_AA64PFR0_ELx_64BIT_ONLY);
+ BUILD_BUG_ON(FIELD_GET(ARM64_FEATURE_MASK(ID_AA64PFR0_EL1_EL0),
+ PVM_ID_AA64PFR0_RESTRICT_UNSIGNED) != ID_AA64PFR0_EL1_ELx_64BIT_ONLY);
+ BUILD_BUG_ON(FIELD_GET(ARM64_FEATURE_MASK(ID_AA64PFR0_EL1_EL1),
+ PVM_ID_AA64PFR0_RESTRICT_UNSIGNED) != ID_AA64PFR0_EL1_ELx_64BIT_ONLY);
/*
* Linux guests assume support for floating-point and Advanced SIMD. Do
* not change the trapping behavior for these from the KVM default.
*/
- BUILD_BUG_ON(!FIELD_GET(ARM64_FEATURE_MASK(ID_AA64PFR0_FP),
+ BUILD_BUG_ON(!FIELD_GET(ARM64_FEATURE_MASK(ID_AA64PFR0_EL1_FP),
PVM_ID_AA64PFR0_ALLOW));
- BUILD_BUG_ON(!FIELD_GET(ARM64_FEATURE_MASK(ID_AA64PFR0_ASIMD),
+ BUILD_BUG_ON(!FIELD_GET(ARM64_FEATURE_MASK(ID_AA64PFR0_EL1_AdvSIMD),
PVM_ID_AA64PFR0_ALLOW));
/* Trap RAS unless all current versions are supported */
- if (FIELD_GET(ARM64_FEATURE_MASK(ID_AA64PFR0_RAS), feature_ids) <
- ID_AA64PFR0_RAS_V1P1) {
+ if (FIELD_GET(ARM64_FEATURE_MASK(ID_AA64PFR0_EL1_RAS), feature_ids) <
+ ID_AA64PFR0_EL1_RAS_V1P1) {
hcr_set |= HCR_TERR | HCR_TEA;
hcr_clear |= HCR_FIEN;
}
/* Trap AMU */
- if (!FIELD_GET(ARM64_FEATURE_MASK(ID_AA64PFR0_AMU), feature_ids)) {
+ if (!FIELD_GET(ARM64_FEATURE_MASK(ID_AA64PFR0_EL1_AMU), feature_ids)) {
hcr_clear |= HCR_AMVOFFEN;
cptr_set |= CPTR_EL2_TAM;
}
/* Trap SVE */
- if (!FIELD_GET(ARM64_FEATURE_MASK(ID_AA64PFR0_SVE), feature_ids))
+ if (!FIELD_GET(ARM64_FEATURE_MASK(ID_AA64PFR0_EL1_SVE), feature_ids))
cptr_set |= CPTR_EL2_TZ;
vcpu->arch.hcr_el2 |= hcr_set;
@@ -66,7 +66,7 @@ static void pvm_init_traps_aa64pfr1(struct kvm_vcpu *vcpu)
u64 hcr_clear = 0;
/* Memory Tagging: Trap and Treat as Untagged if not supported. */
- if (!FIELD_GET(ARM64_FEATURE_MASK(ID_AA64PFR1_MTE), feature_ids)) {
+ if (!FIELD_GET(ARM64_FEATURE_MASK(ID_AA64PFR1_EL1_MTE), feature_ids)) {
hcr_set |= HCR_TID5;
hcr_clear |= HCR_DCT | HCR_ATA;
}
@@ -86,32 +86,32 @@ static void pvm_init_traps_aa64dfr0(struct kvm_vcpu *vcpu)
u64 cptr_set = 0;
/* Trap/constrain PMU */
- if (!FIELD_GET(ARM64_FEATURE_MASK(ID_AA64DFR0_PMUVER), feature_ids)) {
+ if (!FIELD_GET(ARM64_FEATURE_MASK(ID_AA64DFR0_EL1_PMUVer), feature_ids)) {
mdcr_set |= MDCR_EL2_TPM | MDCR_EL2_TPMCR;
mdcr_clear |= MDCR_EL2_HPME | MDCR_EL2_MTPME |
MDCR_EL2_HPMN_MASK;
}
/* Trap Debug */
- if (!FIELD_GET(ARM64_FEATURE_MASK(ID_AA64DFR0_DEBUGVER), feature_ids))
+ if (!FIELD_GET(ARM64_FEATURE_MASK(ID_AA64DFR0_EL1_DebugVer), feature_ids))
mdcr_set |= MDCR_EL2_TDRA | MDCR_EL2_TDA | MDCR_EL2_TDE;
/* Trap OS Double Lock */
- if (!FIELD_GET(ARM64_FEATURE_MASK(ID_AA64DFR0_DOUBLELOCK), feature_ids))
+ if (!FIELD_GET(ARM64_FEATURE_MASK(ID_AA64DFR0_EL1_DoubleLock), feature_ids))
mdcr_set |= MDCR_EL2_TDOSA;
/* Trap SPE */
- if (!FIELD_GET(ARM64_FEATURE_MASK(ID_AA64DFR0_PMSVER), feature_ids)) {
+ if (!FIELD_GET(ARM64_FEATURE_MASK(ID_AA64DFR0_EL1_PMSVer), feature_ids)) {
mdcr_set |= MDCR_EL2_TPMS;
mdcr_clear |= MDCR_EL2_E2PB_MASK << MDCR_EL2_E2PB_SHIFT;
}
/* Trap Trace Filter */
- if (!FIELD_GET(ARM64_FEATURE_MASK(ID_AA64DFR0_TRACE_FILT), feature_ids))
+ if (!FIELD_GET(ARM64_FEATURE_MASK(ID_AA64DFR0_EL1_TraceFilt), feature_ids))
mdcr_set |= MDCR_EL2_TTRF;
/* Trap Trace */
- if (!FIELD_GET(ARM64_FEATURE_MASK(ID_AA64DFR0_TRACEVER), feature_ids))
+ if (!FIELD_GET(ARM64_FEATURE_MASK(ID_AA64DFR0_EL1_TraceVer), feature_ids))
cptr_set |= CPTR_EL2_TTA;
vcpu->arch.mdcr_el2 |= mdcr_set;
@@ -128,7 +128,7 @@ static void pvm_init_traps_aa64mmfr0(struct kvm_vcpu *vcpu)
u64 mdcr_set = 0;
/* Trap Debug Communications Channel registers */
- if (!FIELD_GET(ARM64_FEATURE_MASK(ID_AA64MMFR0_FGT), feature_ids))
+ if (!FIELD_GET(ARM64_FEATURE_MASK(ID_AA64MMFR0_EL1_FGT), feature_ids))
mdcr_set |= MDCR_EL2_TDCC;
vcpu->arch.mdcr_el2 |= mdcr_set;
@@ -143,7 +143,7 @@ static void pvm_init_traps_aa64mmfr1(struct kvm_vcpu *vcpu)
u64 hcr_set = 0;
/* Trap LOR */
- if (!FIELD_GET(ARM64_FEATURE_MASK(ID_AA64MMFR1_LOR), feature_ids))
+ if (!FIELD_GET(ARM64_FEATURE_MASK(ID_AA64MMFR1_EL1_LO), feature_ids))
hcr_set |= HCR_TLOR;
vcpu->arch.hcr_el2 |= hcr_set;
diff --git a/arch/arm64/kvm/hyp/nvhe/setup.c b/arch/arm64/kvm/hyp/nvhe/setup.c
index 578f71798c2e..e8d4ea2fcfa0 100644
--- a/arch/arm64/kvm/hyp/nvhe/setup.c
+++ b/arch/arm64/kvm/hyp/nvhe/setup.c
@@ -8,6 +8,7 @@
#include <asm/kvm_hyp.h>
#include <asm/kvm_mmu.h>
#include <asm/kvm_pgtable.h>
+#include <asm/kvm_pkvm.h>
#include <nvhe/early_alloc.h>
#include <nvhe/fixed_config.h>
@@ -17,7 +18,6 @@
#include <nvhe/mm.h>
#include <nvhe/trap_handler.h>
-struct hyp_pool hpool;
unsigned long hyp_nr_cpus;
#define hyp_percpu_size ((unsigned long)__per_cpu_end - \
@@ -27,6 +27,7 @@ static void *vmemmap_base;
static void *hyp_pgt_base;
static void *host_s2_pgt_base;
static struct kvm_pgtable_mm_ops pkvm_pgtable_mm_ops;
+static struct hyp_pool hpool;
static int divide_memory_pool(void *virt, unsigned long size)
{
@@ -98,17 +99,42 @@ static int recreate_hyp_mappings(phys_addr_t phys, unsigned long size,
return ret;
for (i = 0; i < hyp_nr_cpus; i++) {
+ struct kvm_nvhe_init_params *params = per_cpu_ptr(&kvm_init_params, i);
+ unsigned long hyp_addr;
+
start = (void *)kern_hyp_va(per_cpu_base[i]);
end = start + PAGE_ALIGN(hyp_percpu_size);
ret = pkvm_create_mappings(start, end, PAGE_HYP);
if (ret)
return ret;
- end = (void *)per_cpu_ptr(&kvm_init_params, i)->stack_hyp_va;
- start = end - PAGE_SIZE;
- ret = pkvm_create_mappings(start, end, PAGE_HYP);
+ /*
+ * Allocate a contiguous HYP private VA range for the stack
+ * and guard page. The allocation is also aligned based on
+ * the order of its size.
+ */
+ ret = pkvm_alloc_private_va_range(PAGE_SIZE * 2, &hyp_addr);
+ if (ret)
+ return ret;
+
+ /*
+ * Since the stack grows downwards, map the stack to the page
+ * at the higher address and leave the lower guard page
+ * unbacked.
+ *
+ * Any valid stack address now has the PAGE_SHIFT bit as 1
+ * and addresses corresponding to the guard page have the
+ * PAGE_SHIFT bit as 0 - this is used for overflow detection.
+ */
+ hyp_spin_lock(&pkvm_pgd_lock);
+ ret = kvm_pgtable_hyp_map(&pkvm_pgtable, hyp_addr + PAGE_SIZE,
+ PAGE_SIZE, params->stack_pa, PAGE_HYP);
+ hyp_spin_unlock(&pkvm_pgd_lock);
if (ret)
return ret;
+
+ /* Update stack_hyp_va to end of the stack's private VA range */
+ params->stack_hyp_va = hyp_addr + (2 * PAGE_SIZE);
}
/*
@@ -165,6 +191,7 @@ static int finalize_host_mappings_walker(u64 addr, u64 end, u32 level,
enum kvm_pgtable_walk_flags flag,
void * const arg)
{
+ struct kvm_pgtable_mm_ops *mm_ops = arg;
enum kvm_pgtable_prot prot;
enum pkvm_page_state state;
kvm_pte_t pte = *ptep;
@@ -173,6 +200,15 @@ static int finalize_host_mappings_walker(u64 addr, u64 end, u32 level,
if (!kvm_pte_valid(pte))
return 0;
+ /*
+ * Fix-up the refcount for the page-table pages as the early allocator
+ * was unable to access the hyp_vmemmap and so the buddy allocator has
+ * initialised the refcount to '1'.
+ */
+ mm_ops->get_page(ptep);
+ if (flag != KVM_PGTABLE_WALK_LEAF)
+ return 0;
+
if (level != (KVM_PGTABLE_MAX_LEVELS - 1))
return -EINVAL;
@@ -205,7 +241,8 @@ static int finalize_host_mappings(void)
{
struct kvm_pgtable_walker walker = {
.cb = finalize_host_mappings_walker,
- .flags = KVM_PGTABLE_WALK_LEAF,
+ .flags = KVM_PGTABLE_WALK_LEAF | KVM_PGTABLE_WALK_TABLE_POST,
+ .arg = pkvm_pgtable.mm_ops,
};
int i, ret;
@@ -240,19 +277,20 @@ void __noreturn __pkvm_init_finalise(void)
if (ret)
goto out;
- ret = finalize_host_mappings();
- if (ret)
- goto out;
-
pkvm_pgtable_mm_ops = (struct kvm_pgtable_mm_ops) {
.zalloc_page = hyp_zalloc_hyp_page,
.phys_to_virt = hyp_phys_to_virt,
.virt_to_phys = hyp_virt_to_phys,
.get_page = hpool_get_page,
.put_page = hpool_put_page,
+ .page_count = hyp_page_count,
};
pkvm_pgtable.mm_ops = &pkvm_pgtable_mm_ops;
+ ret = finalize_host_mappings();
+ if (ret)
+ goto out;
+
out:
/*
* We tail-called to here from handle___pkvm_init() and will not return,
diff --git a/arch/arm64/kvm/hyp/nvhe/stacktrace.c b/arch/arm64/kvm/hyp/nvhe/stacktrace.c
new file mode 100644
index 000000000000..ed6b58b19cfa
--- /dev/null
+++ b/arch/arm64/kvm/hyp/nvhe/stacktrace.c
@@ -0,0 +1,158 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * KVM nVHE hypervisor stack tracing support.
+ *
+ * Copyright (C) 2022 Google LLC
+ */
+#include <asm/kvm_asm.h>
+#include <asm/kvm_hyp.h>
+#include <asm/memory.h>
+#include <asm/percpu.h>
+
+DEFINE_PER_CPU(unsigned long [OVERFLOW_STACK_SIZE/sizeof(long)], overflow_stack)
+ __aligned(16);
+
+DEFINE_PER_CPU(struct kvm_nvhe_stacktrace_info, kvm_stacktrace_info);
+
+/*
+ * hyp_prepare_backtrace - Prepare non-protected nVHE backtrace.
+ *
+ * @fp : frame pointer at which to start the unwinding.
+ * @pc : program counter at which to start the unwinding.
+ *
+ * Save the information needed by the host to unwind the non-protected
+ * nVHE hypervisor stack in EL1.
+ */
+static void hyp_prepare_backtrace(unsigned long fp, unsigned long pc)
+{
+ struct kvm_nvhe_stacktrace_info *stacktrace_info = this_cpu_ptr(&kvm_stacktrace_info);
+ struct kvm_nvhe_init_params *params = this_cpu_ptr(&kvm_init_params);
+
+ stacktrace_info->stack_base = (unsigned long)(params->stack_hyp_va - PAGE_SIZE);
+ stacktrace_info->overflow_stack_base = (unsigned long)this_cpu_ptr(overflow_stack);
+ stacktrace_info->fp = fp;
+ stacktrace_info->pc = pc;
+}
+
+#ifdef CONFIG_PROTECTED_NVHE_STACKTRACE
+#include <asm/stacktrace/nvhe.h>
+
+DEFINE_PER_CPU(unsigned long [NVHE_STACKTRACE_SIZE/sizeof(long)], pkvm_stacktrace);
+
+static struct stack_info stackinfo_get_overflow(void)
+{
+ unsigned long low = (unsigned long)this_cpu_ptr(overflow_stack);
+ unsigned long high = low + OVERFLOW_STACK_SIZE;
+
+ return (struct stack_info) {
+ .low = low,
+ .high = high,
+ };
+}
+
+static struct stack_info stackinfo_get_hyp(void)
+{
+ struct kvm_nvhe_init_params *params = this_cpu_ptr(&kvm_init_params);
+ unsigned long high = params->stack_hyp_va;
+ unsigned long low = high - PAGE_SIZE;
+
+ return (struct stack_info) {
+ .low = low,
+ .high = high,
+ };
+}
+
+static int unwind_next(struct unwind_state *state)
+{
+ return unwind_next_frame_record(state);
+}
+
+static void notrace unwind(struct unwind_state *state,
+ stack_trace_consume_fn consume_entry,
+ void *cookie)
+{
+ while (1) {
+ int ret;
+
+ if (!consume_entry(cookie, state->pc))
+ break;
+ ret = unwind_next(state);
+ if (ret < 0)
+ break;
+ }
+}
+
+/*
+ * pkvm_save_backtrace_entry - Saves a protected nVHE HYP stacktrace entry
+ *
+ * @arg : index of the entry in the stacktrace buffer
+ * @where : the program counter corresponding to the stack frame
+ *
+ * Save the return address of a stack frame to the shared stacktrace buffer.
+ * The host can access this shared buffer from EL1 to dump the backtrace.
+ */
+static bool pkvm_save_backtrace_entry(void *arg, unsigned long where)
+{
+ unsigned long *stacktrace = this_cpu_ptr(pkvm_stacktrace);
+ int *idx = (int *)arg;
+
+ /*
+ * Need 2 free slots: 1 for current entry and 1 for the
+ * delimiter.
+ */
+ if (*idx > ARRAY_SIZE(pkvm_stacktrace) - 2)
+ return false;
+
+ stacktrace[*idx] = where;
+ stacktrace[++*idx] = 0UL;
+
+ return true;
+}
+
+/*
+ * pkvm_save_backtrace - Saves the protected nVHE HYP stacktrace
+ *
+ * @fp : frame pointer at which to start the unwinding.
+ * @pc : program counter at which to start the unwinding.
+ *
+ * Save the unwinded stack addresses to the shared stacktrace buffer.
+ * The host can access this shared buffer from EL1 to dump the backtrace.
+ */
+static void pkvm_save_backtrace(unsigned long fp, unsigned long pc)
+{
+ struct stack_info stacks[] = {
+ stackinfo_get_overflow(),
+ stackinfo_get_hyp(),
+ };
+ struct unwind_state state = {
+ .stacks = stacks,
+ .nr_stacks = ARRAY_SIZE(stacks),
+ };
+ int idx = 0;
+
+ kvm_nvhe_unwind_init(&state, fp, pc);
+
+ unwind(&state, pkvm_save_backtrace_entry, &idx);
+}
+#else /* !CONFIG_PROTECTED_NVHE_STACKTRACE */
+static void pkvm_save_backtrace(unsigned long fp, unsigned long pc)
+{
+}
+#endif /* CONFIG_PROTECTED_NVHE_STACKTRACE */
+
+/*
+ * kvm_nvhe_prepare_backtrace - prepare to dump the nVHE backtrace
+ *
+ * @fp : frame pointer at which to start the unwinding.
+ * @pc : program counter at which to start the unwinding.
+ *
+ * Saves the information needed by the host to dump the nVHE hypervisor
+ * backtrace.
+ */
+void kvm_nvhe_prepare_backtrace(unsigned long fp, unsigned long pc)
+{
+ if (is_protected_kvm_enabled())
+ pkvm_save_backtrace(fp, pc);
+ else
+ hyp_prepare_backtrace(fp, pc);
+}
diff --git a/arch/arm64/kvm/hyp/nvhe/stub.c b/arch/arm64/kvm/hyp/nvhe/stub.c
deleted file mode 100644
index c0aa6bbfd79d..000000000000
--- a/arch/arm64/kvm/hyp/nvhe/stub.c
+++ /dev/null
@@ -1,22 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0-only
-/*
- * Stubs for out-of-line function calls caused by re-using kernel
- * infrastructure at EL2.
- *
- * Copyright (C) 2020 - Google LLC
- */
-
-#include <linux/list.h>
-
-#ifdef CONFIG_DEBUG_LIST
-bool __list_add_valid(struct list_head *new, struct list_head *prev,
- struct list_head *next)
-{
- return true;
-}
-
-bool __list_del_entry_valid(struct list_head *entry)
-{
- return true;
-}
-#endif
diff --git a/arch/arm64/kvm/hyp/nvhe/switch.c b/arch/arm64/kvm/hyp/nvhe/switch.c
index d13115a12434..c2cb46ca4fb6 100644
--- a/arch/arm64/kvm/hyp/nvhe/switch.c
+++ b/arch/arm64/kvm/hyp/nvhe/switch.c
@@ -25,7 +25,6 @@
#include <asm/fpsimd.h>
#include <asm/debug-monitors.h>
#include <asm/processor.h>
-#include <asm/thread_info.h>
#include <nvhe/fixed_config.h>
#include <nvhe/mem_protect.h>
@@ -35,6 +34,8 @@ DEFINE_PER_CPU(struct kvm_host_data, kvm_host_data);
DEFINE_PER_CPU(struct kvm_cpu_context, kvm_hyp_ctxt);
DEFINE_PER_CPU(unsigned long, kvm_hyp_vector);
+extern void kvm_nvhe_prepare_backtrace(unsigned long fp, unsigned long pc);
+
static void __activate_traps(struct kvm_vcpu *vcpu)
{
u64 val;
@@ -44,10 +45,12 @@ static void __activate_traps(struct kvm_vcpu *vcpu)
val = vcpu->arch.cptr_el2;
val |= CPTR_EL2_TTA | CPTR_EL2_TAM;
- if (!update_fp_enabled(vcpu)) {
+ if (!guest_owns_fp_regs(vcpu)) {
val |= CPTR_EL2_TFP | CPTR_EL2_TZ;
__activate_traps_fpsimd32(vcpu);
}
+ if (cpus_have_final_cap(ARM64_SME))
+ val |= CPTR_EL2_TSM;
write_sysreg(val, cptr_el2);
write_sysreg(__this_cpu_read(kvm_hyp_vector), vbar_el2);
@@ -96,8 +99,10 @@ static void __deactivate_traps(struct kvm_vcpu *vcpu)
write_sysreg(this_cpu_ptr(&kvm_init_params)->hcr_el2, hcr_el2);
cptr = CPTR_EL2_DEFAULT;
- if (vcpu_has_sve(vcpu) && (vcpu->arch.flags & KVM_ARM64_FP_ENABLED))
+ if (vcpu_has_sve(vcpu) && (vcpu->arch.fp_state == FP_STATE_GUEST_OWNED))
cptr |= CPTR_EL2_TZ;
+ if (cpus_have_final_cap(ARM64_SME))
+ cptr &= ~CPTR_EL2_TSM;
write_sysreg(cptr, cptr_el2);
write_sysreg(__kvm_hyp_host_vector, vbar_el2);
@@ -112,7 +117,7 @@ static void __hyp_vgic_save_state(struct kvm_vcpu *vcpu)
}
}
-/* Restore VGICv3 state on non_VEH systems */
+/* Restore VGICv3 state on non-VHE systems */
static void __hyp_vgic_restore_state(struct kvm_vcpu *vcpu)
{
if (static_branch_unlikely(&kvm_vgic_global_state.gicv3_cpuif)) {
@@ -121,16 +126,13 @@ static void __hyp_vgic_restore_state(struct kvm_vcpu *vcpu)
}
}
-/**
+/*
* Disable host events, enable guest events
*/
-static bool __pmu_switch_to_guest(struct kvm_cpu_context *host_ctxt)
+#ifdef CONFIG_HW_PERF_EVENTS
+static bool __pmu_switch_to_guest(struct kvm_vcpu *vcpu)
{
- struct kvm_host_data *host;
- struct kvm_pmu_events *pmu;
-
- host = container_of(host_ctxt, struct kvm_host_data, host_ctxt);
- pmu = &host->pmu_events;
+ struct kvm_pmu_events *pmu = &vcpu->arch.pmu.events;
if (pmu->events_host)
write_sysreg(pmu->events_host, pmcntenclr_el0);
@@ -141,16 +143,12 @@ static bool __pmu_switch_to_guest(struct kvm_cpu_context *host_ctxt)
return (pmu->events_host || pmu->events_guest);
}
-/**
+/*
* Disable guest events, enable host events
*/
-static void __pmu_switch_to_host(struct kvm_cpu_context *host_ctxt)
+static void __pmu_switch_to_host(struct kvm_vcpu *vcpu)
{
- struct kvm_host_data *host;
- struct kvm_pmu_events *pmu;
-
- host = container_of(host_ctxt, struct kvm_host_data, host_ctxt);
- pmu = &host->pmu_events;
+ struct kvm_pmu_events *pmu = &vcpu->arch.pmu.events;
if (pmu->events_guest)
write_sysreg(pmu->events_guest, pmcntenclr_el0);
@@ -158,8 +156,12 @@ static void __pmu_switch_to_host(struct kvm_cpu_context *host_ctxt)
if (pmu->events_host)
write_sysreg(pmu->events_host, pmcntenset_el0);
}
+#else
+#define __pmu_switch_to_guest(v) ({ false; })
+#define __pmu_switch_to_host(v) do {} while (0)
+#endif
-/**
+/*
* Handler for protected VM MSR, MRS or System instruction execution in AArch64.
*
* Returns true if the hypervisor has handled the exit, and control should go
@@ -176,23 +178,6 @@ static bool kvm_handle_pvm_sys64(struct kvm_vcpu *vcpu, u64 *exit_code)
kvm_handle_pvm_sysreg(vcpu, exit_code));
}
-/**
- * Handler for protected floating-point and Advanced SIMD accesses.
- *
- * Returns true if the hypervisor has handled the exit, and control should go
- * back to the guest, or false if it hasn't.
- */
-static bool kvm_handle_pvm_fpsimd(struct kvm_vcpu *vcpu, u64 *exit_code)
-{
- /* Linux guests assume support for floating-point and Advanced SIMD. */
- BUILD_BUG_ON(!FIELD_GET(ARM64_FEATURE_MASK(ID_AA64PFR0_FP),
- PVM_ID_AA64PFR0_ALLOW));
- BUILD_BUG_ON(!FIELD_GET(ARM64_FEATURE_MASK(ID_AA64PFR0_ASIMD),
- PVM_ID_AA64PFR0_ALLOW));
-
- return kvm_hyp_handle_fpsimd(vcpu, exit_code);
-}
-
static const exit_handler_fn hyp_exit_handlers[] = {
[0 ... ESR_ELx_EC_MAX] = NULL,
[ESR_ELx_EC_CP15_32] = kvm_hyp_handle_cp15_32,
@@ -208,7 +193,7 @@ static const exit_handler_fn pvm_exit_handlers[] = {
[0 ... ESR_ELx_EC_MAX] = NULL,
[ESR_ELx_EC_SYS64] = kvm_handle_pvm_sys64,
[ESR_ELx_EC_SVE] = kvm_handle_pvm_restricted,
- [ESR_ELx_EC_FP_ASIMD] = kvm_handle_pvm_fpsimd,
+ [ESR_ELx_EC_FP_ASIMD] = kvm_hyp_handle_fpsimd,
[ESR_ELx_EC_IABT_LOW] = kvm_hyp_handle_iabt_low,
[ESR_ELx_EC_DABT_LOW] = kvm_hyp_handle_dabt_low,
[ESR_ELx_EC_PAC] = kvm_hyp_handle_ptrauth,
@@ -275,7 +260,7 @@ int __kvm_vcpu_run(struct kvm_vcpu *vcpu)
host_ctxt->__hyp_running_vcpu = vcpu;
guest_ctxt = &vcpu->arch.ctxt;
- pmu_switch_needed = __pmu_switch_to_guest(host_ctxt);
+ pmu_switch_needed = __pmu_switch_to_guest(vcpu);
__sysreg_save_state_nvhe(host_ctxt);
/*
@@ -326,7 +311,7 @@ int __kvm_vcpu_run(struct kvm_vcpu *vcpu)
__sysreg_restore_state_nvhe(host_ctxt);
- if (vcpu->arch.flags & KVM_ARM64_FP_ENABLED)
+ if (vcpu->arch.fp_state == FP_STATE_GUEST_OWNED)
__fpsimd_save_fpexc32(vcpu);
__debug_switch_to_host(vcpu);
@@ -337,7 +322,7 @@ int __kvm_vcpu_run(struct kvm_vcpu *vcpu)
__debug_restore_host_buffers_nvhe(vcpu);
if (pmu_switch_needed)
- __pmu_switch_to_host(host_ctxt);
+ __pmu_switch_to_host(vcpu);
/* Returning to host will clear PSR.I, remask PMR if needed */
if (system_uses_irq_prio_masking())
@@ -348,7 +333,7 @@ int __kvm_vcpu_run(struct kvm_vcpu *vcpu)
return exit_code;
}
-void __noreturn hyp_panic(void)
+asmlinkage void __noreturn hyp_panic(void)
{
u64 spsr = read_sysreg_el2(SYS_SPSR);
u64 elr = read_sysreg_el2(SYS_ELR);
@@ -366,11 +351,20 @@ void __noreturn hyp_panic(void)
__sysreg_restore_state_nvhe(host_ctxt);
}
+ /* Prepare to dump kvm nvhe hyp stacktrace */
+ kvm_nvhe_prepare_backtrace((unsigned long)__builtin_frame_address(0),
+ _THIS_IP_);
+
__hyp_do_panic(host_ctxt, spsr, elr, par);
unreachable();
}
+asmlinkage void __noreturn hyp_panic_bad_stack(void)
+{
+ hyp_panic();
+}
+
asmlinkage void kvm_unexpected_el2_exception(void)
{
- return __kvm_unexpected_el2_exception();
+ __kvm_unexpected_el2_exception();
}
diff --git a/arch/arm64/kvm/hyp/nvhe/sys_regs.c b/arch/arm64/kvm/hyp/nvhe/sys_regs.c
index 792cf6e6ac92..0f9ac25afdf4 100644
--- a/arch/arm64/kvm/hyp/nvhe/sys_regs.c
+++ b/arch/arm64/kvm/hyp/nvhe/sys_regs.c
@@ -22,6 +22,7 @@ u64 id_aa64pfr0_el1_sys_val;
u64 id_aa64pfr1_el1_sys_val;
u64 id_aa64isar0_el1_sys_val;
u64 id_aa64isar1_el1_sys_val;
+u64 id_aa64isar2_el1_sys_val;
u64 id_aa64mmfr0_el1_sys_val;
u64 id_aa64mmfr1_el1_sys_val;
u64 id_aa64mmfr2_el1_sys_val;
@@ -32,14 +33,12 @@ u64 id_aa64mmfr2_el1_sys_val;
*/
static void inject_undef64(struct kvm_vcpu *vcpu)
{
- u32 esr = (ESR_ELx_EC_UNKNOWN << ESR_ELx_EC_SHIFT);
+ u64 esr = (ESR_ELx_EC_UNKNOWN << ESR_ELx_EC_SHIFT);
*vcpu_pc(vcpu) = read_sysreg_el2(SYS_ELR);
*vcpu_cpsr(vcpu) = read_sysreg_el2(SYS_SPSR);
- vcpu->arch.flags |= (KVM_ARM64_EXCEPT_AA64_EL1 |
- KVM_ARM64_EXCEPT_AA64_ELx_SYNC |
- KVM_ARM64_PENDING_EXCEPTION);
+ kvm_pend_exception(vcpu, EXCEPT_AA64_EL1_SYNC);
__kvm_adjust_pc(vcpu);
@@ -89,16 +88,13 @@ static u64 get_pvm_id_aa64pfr0(const struct kvm_vcpu *vcpu)
u64 set_mask = 0;
u64 allow_mask = PVM_ID_AA64PFR0_ALLOW;
- if (!vcpu_has_sve(vcpu))
- allow_mask &= ~ARM64_FEATURE_MASK(ID_AA64PFR0_SVE);
-
set_mask |= get_restricted_features_unsigned(id_aa64pfr0_el1_sys_val,
PVM_ID_AA64PFR0_RESTRICT_UNSIGNED);
/* Spectre and Meltdown mitigation in KVM */
- set_mask |= FIELD_PREP(ARM64_FEATURE_MASK(ID_AA64PFR0_CSV2),
+ set_mask |= FIELD_PREP(ARM64_FEATURE_MASK(ID_AA64PFR0_EL1_CSV2),
(u64)kvm->arch.pfr0_csv2);
- set_mask |= FIELD_PREP(ARM64_FEATURE_MASK(ID_AA64PFR0_CSV3),
+ set_mask |= FIELD_PREP(ARM64_FEATURE_MASK(ID_AA64PFR0_EL1_CSV3),
(u64)kvm->arch.pfr0_csv3);
return (id_aa64pfr0_el1_sys_val & allow_mask) | set_mask;
@@ -110,7 +106,7 @@ static u64 get_pvm_id_aa64pfr1(const struct kvm_vcpu *vcpu)
u64 allow_mask = PVM_ID_AA64PFR1_ALLOW;
if (!kvm_has_mte(kvm))
- allow_mask &= ~ARM64_FEATURE_MASK(ID_AA64PFR1_MTE);
+ allow_mask &= ~ARM64_FEATURE_MASK(ID_AA64PFR1_EL1_MTE);
return id_aa64pfr1_el1_sys_val & allow_mask;
}
@@ -175,14 +171,25 @@ static u64 get_pvm_id_aa64isar1(const struct kvm_vcpu *vcpu)
u64 allow_mask = PVM_ID_AA64ISAR1_ALLOW;
if (!vcpu_has_ptrauth(vcpu))
- allow_mask &= ~(ARM64_FEATURE_MASK(ID_AA64ISAR1_APA) |
- ARM64_FEATURE_MASK(ID_AA64ISAR1_API) |
- ARM64_FEATURE_MASK(ID_AA64ISAR1_GPA) |
- ARM64_FEATURE_MASK(ID_AA64ISAR1_GPI));
+ allow_mask &= ~(ARM64_FEATURE_MASK(ID_AA64ISAR1_EL1_APA) |
+ ARM64_FEATURE_MASK(ID_AA64ISAR1_EL1_API) |
+ ARM64_FEATURE_MASK(ID_AA64ISAR1_EL1_GPA) |
+ ARM64_FEATURE_MASK(ID_AA64ISAR1_EL1_GPI));
return id_aa64isar1_el1_sys_val & allow_mask;
}
+static u64 get_pvm_id_aa64isar2(const struct kvm_vcpu *vcpu)
+{
+ u64 allow_mask = PVM_ID_AA64ISAR2_ALLOW;
+
+ if (!vcpu_has_ptrauth(vcpu))
+ allow_mask &= ~(ARM64_FEATURE_MASK(ID_AA64ISAR2_EL1_APA3) |
+ ARM64_FEATURE_MASK(ID_AA64ISAR2_EL1_GPA3));
+
+ return id_aa64isar2_el1_sys_val & allow_mask;
+}
+
static u64 get_pvm_id_aa64mmfr0(const struct kvm_vcpu *vcpu)
{
u64 set_mask;
@@ -225,6 +232,8 @@ u64 pvm_read_id_reg(const struct kvm_vcpu *vcpu, u32 id)
return get_pvm_id_aa64isar0(vcpu);
case SYS_ID_AA64ISAR1_EL1:
return get_pvm_id_aa64isar1(vcpu);
+ case SYS_ID_AA64ISAR2_EL1:
+ return get_pvm_id_aa64isar2(vcpu);
case SYS_ID_AA64MMFR0_EL1:
return get_pvm_id_aa64mmfr0(vcpu);
case SYS_ID_AA64MMFR1_EL1:
@@ -232,15 +241,9 @@ u64 pvm_read_id_reg(const struct kvm_vcpu *vcpu, u32 id)
case SYS_ID_AA64MMFR2_EL1:
return get_pvm_id_aa64mmfr2(vcpu);
default:
- /*
- * Should never happen because all cases are covered in
- * pvm_sys_reg_descs[].
- */
- WARN_ON(1);
- break;
+ /* Unhandled ID register, RAZ */
+ return 0;
}
-
- return 0;
}
static u64 read_id_reg(const struct kvm_vcpu *vcpu,
@@ -278,8 +281,8 @@ static bool pvm_access_id_aarch32(struct kvm_vcpu *vcpu,
* No support for AArch32 guests, therefore, pKVM has no sanitized copy
* of AArch32 feature id registers.
*/
- BUILD_BUG_ON(FIELD_GET(ARM64_FEATURE_MASK(ID_AA64PFR0_EL1),
- PVM_ID_AA64PFR0_RESTRICT_UNSIGNED) > ID_AA64PFR0_ELx_64BIT_ONLY);
+ BUILD_BUG_ON(FIELD_GET(ARM64_FEATURE_MASK(ID_AA64PFR0_EL1_EL1),
+ PVM_ID_AA64PFR0_RESTRICT_UNSIGNED) > ID_AA64PFR0_EL1_ELx_64BIT_ONLY);
return pvm_access_raz_wi(vcpu, p, r);
}
@@ -321,6 +324,16 @@ static bool pvm_gic_read_sre(struct kvm_vcpu *vcpu,
/* Mark the specified system register as an AArch64 feature id register. */
#define AARCH64(REG) { SYS_DESC(REG), .access = pvm_access_id_aarch64 }
+/*
+ * sys_reg_desc initialiser for architecturally unallocated cpufeature ID
+ * register with encoding Op0=3, Op1=0, CRn=0, CRm=crm, Op2=op2
+ * (1 <= crm < 8, 0 <= Op2 < 8).
+ */
+#define ID_UNALLOCATED(crm, op2) { \
+ Op0(3), Op1(0), CRn(0), CRm(crm), Op2(op2), \
+ .access = pvm_access_id_aarch64, \
+}
+
/* Mark the specified system register as Read-As-Zero/Write-Ignored */
#define RAZ_WI(REG) { SYS_DESC(REG), .access = pvm_access_raz_wi }
@@ -364,24 +377,46 @@ static const struct sys_reg_desc pvm_sys_reg_descs[] = {
AARCH32(SYS_MVFR0_EL1),
AARCH32(SYS_MVFR1_EL1),
AARCH32(SYS_MVFR2_EL1),
+ ID_UNALLOCATED(3,3),
AARCH32(SYS_ID_PFR2_EL1),
AARCH32(SYS_ID_DFR1_EL1),
AARCH32(SYS_ID_MMFR5_EL1),
+ ID_UNALLOCATED(3,7),
/* AArch64 ID registers */
/* CRm=4 */
AARCH64(SYS_ID_AA64PFR0_EL1),
AARCH64(SYS_ID_AA64PFR1_EL1),
+ ID_UNALLOCATED(4,2),
+ ID_UNALLOCATED(4,3),
AARCH64(SYS_ID_AA64ZFR0_EL1),
+ ID_UNALLOCATED(4,5),
+ ID_UNALLOCATED(4,6),
+ ID_UNALLOCATED(4,7),
AARCH64(SYS_ID_AA64DFR0_EL1),
AARCH64(SYS_ID_AA64DFR1_EL1),
+ ID_UNALLOCATED(5,2),
+ ID_UNALLOCATED(5,3),
AARCH64(SYS_ID_AA64AFR0_EL1),
AARCH64(SYS_ID_AA64AFR1_EL1),
+ ID_UNALLOCATED(5,6),
+ ID_UNALLOCATED(5,7),
AARCH64(SYS_ID_AA64ISAR0_EL1),
AARCH64(SYS_ID_AA64ISAR1_EL1),
+ AARCH64(SYS_ID_AA64ISAR2_EL1),
+ ID_UNALLOCATED(6,3),
+ ID_UNALLOCATED(6,4),
+ ID_UNALLOCATED(6,5),
+ ID_UNALLOCATED(6,6),
+ ID_UNALLOCATED(6,7),
AARCH64(SYS_ID_AA64MMFR0_EL1),
AARCH64(SYS_ID_AA64MMFR1_EL1),
AARCH64(SYS_ID_AA64MMFR2_EL1),
+ ID_UNALLOCATED(7,3),
+ ID_UNALLOCATED(7,4),
+ ID_UNALLOCATED(7,5),
+ ID_UNALLOCATED(7,6),
+ ID_UNALLOCATED(7,7),
/* Scalable Vector Registers are restricted. */
diff --git a/arch/arm64/kvm/hyp/pgtable.c b/arch/arm64/kvm/hyp/pgtable.c
index f8ceebe4982e..cdf8e76b0be1 100644
--- a/arch/arm64/kvm/hyp/pgtable.c
+++ b/arch/arm64/kvm/hyp/pgtable.c
@@ -61,7 +61,7 @@ struct kvm_pgtable_walk_data {
static bool kvm_phys_is_valid(u64 phys)
{
- return phys < BIT(id_aa64mmfr0_parange_to_phys_shift(ID_AA64MMFR0_PARANGE_MAX));
+ return phys < BIT(id_aa64mmfr0_parange_to_phys_shift(ID_AA64MMFR0_EL1_PARANGE_MAX));
}
static bool kvm_block_mapping_supported(u64 addr, u64 end, u64 phys, u32 level)
@@ -383,21 +383,6 @@ enum kvm_pgtable_prot kvm_pgtable_hyp_pte_prot(kvm_pte_t pte)
return prot;
}
-static bool hyp_pte_needs_update(kvm_pte_t old, kvm_pte_t new)
-{
- /*
- * Tolerate KVM recreating the exact same mapping, or changing software
- * bits if the existing mapping was valid.
- */
- if (old == new)
- return false;
-
- if (!kvm_pte_valid(old))
- return true;
-
- return !WARN_ON((old ^ new) & ~KVM_PTE_LEAF_ATTR_HI_SW);
-}
-
static bool hyp_map_walker_try_leaf(u64 addr, u64 end, u32 level,
kvm_pte_t *ptep, struct hyp_map_data *data)
{
@@ -407,11 +392,16 @@ static bool hyp_map_walker_try_leaf(u64 addr, u64 end, u32 level,
if (!kvm_block_mapping_supported(addr, end, phys, level))
return false;
+ data->phys += granule;
new = kvm_init_valid_leaf_pte(phys, data->attr, level);
- if (hyp_pte_needs_update(old, new))
- smp_store_release(ptep, new);
+ if (old == new)
+ return true;
+ if (!kvm_pte_valid(old))
+ data->mm_ops->get_page(ptep);
+ else if (WARN_ON((old ^ new) & ~KVM_PTE_LEAF_ATTR_HI_SW))
+ return false;
- data->phys += granule;
+ smp_store_release(ptep, new);
return true;
}
@@ -433,6 +423,7 @@ static int hyp_map_walker(u64 addr, u64 end, u32 level, kvm_pte_t *ptep,
return -ENOMEM;
kvm_set_table_pte(ptep, childp, mm_ops);
+ mm_ops->get_page(ptep);
return 0;
}
@@ -460,6 +451,69 @@ int kvm_pgtable_hyp_map(struct kvm_pgtable *pgt, u64 addr, u64 size, u64 phys,
return ret;
}
+struct hyp_unmap_data {
+ u64 unmapped;
+ struct kvm_pgtable_mm_ops *mm_ops;
+};
+
+static int hyp_unmap_walker(u64 addr, u64 end, u32 level, kvm_pte_t *ptep,
+ enum kvm_pgtable_walk_flags flag, void * const arg)
+{
+ kvm_pte_t pte = *ptep, *childp = NULL;
+ u64 granule = kvm_granule_size(level);
+ struct hyp_unmap_data *data = arg;
+ struct kvm_pgtable_mm_ops *mm_ops = data->mm_ops;
+
+ if (!kvm_pte_valid(pte))
+ return -EINVAL;
+
+ if (kvm_pte_table(pte, level)) {
+ childp = kvm_pte_follow(pte, mm_ops);
+
+ if (mm_ops->page_count(childp) != 1)
+ return 0;
+
+ kvm_clear_pte(ptep);
+ dsb(ishst);
+ __tlbi_level(vae2is, __TLBI_VADDR(addr, 0), level);
+ } else {
+ if (end - addr < granule)
+ return -EINVAL;
+
+ kvm_clear_pte(ptep);
+ dsb(ishst);
+ __tlbi_level(vale2is, __TLBI_VADDR(addr, 0), level);
+ data->unmapped += granule;
+ }
+
+ dsb(ish);
+ isb();
+ mm_ops->put_page(ptep);
+
+ if (childp)
+ mm_ops->put_page(childp);
+
+ return 0;
+}
+
+u64 kvm_pgtable_hyp_unmap(struct kvm_pgtable *pgt, u64 addr, u64 size)
+{
+ struct hyp_unmap_data unmap_data = {
+ .mm_ops = pgt->mm_ops,
+ };
+ struct kvm_pgtable_walker walker = {
+ .cb = hyp_unmap_walker,
+ .arg = &unmap_data,
+ .flags = KVM_PGTABLE_WALK_LEAF | KVM_PGTABLE_WALK_TABLE_POST,
+ };
+
+ if (!pgt->mm_ops->page_count)
+ return 0;
+
+ kvm_pgtable_walk(pgt, addr, size, &walker);
+ return unmap_data.unmapped;
+}
+
int kvm_pgtable_hyp_init(struct kvm_pgtable *pgt, u32 va_bits,
struct kvm_pgtable_mm_ops *mm_ops)
{
@@ -482,8 +536,16 @@ static int hyp_free_walker(u64 addr, u64 end, u32 level, kvm_pte_t *ptep,
enum kvm_pgtable_walk_flags flag, void * const arg)
{
struct kvm_pgtable_mm_ops *mm_ops = arg;
+ kvm_pte_t pte = *ptep;
+
+ if (!kvm_pte_valid(pte))
+ return 0;
+
+ mm_ops->put_page(ptep);
+
+ if (kvm_pte_table(pte, level))
+ mm_ops->put_page(kvm_pte_follow(pte, mm_ops));
- mm_ops->put_page((void *)kvm_pte_follow(*ptep, mm_ops));
return 0;
}
@@ -491,7 +553,7 @@ void kvm_pgtable_hyp_destroy(struct kvm_pgtable *pgt)
{
struct kvm_pgtable_walker walker = {
.cb = hyp_free_walker,
- .flags = KVM_PGTABLE_WALK_TABLE_POST,
+ .flags = KVM_PGTABLE_WALK_LEAF | KVM_PGTABLE_WALK_TABLE_POST,
.arg = pgt->mm_ops,
};
@@ -921,13 +983,9 @@ static int stage2_unmap_walker(u64 addr, u64 end, u32 level, kvm_pte_t *ptep,
*/
stage2_put_pte(ptep, mmu, addr, level, mm_ops);
- if (need_flush) {
- kvm_pte_t *pte_follow = kvm_pte_follow(pte, mm_ops);
-
- dcache_clean_inval_poc((unsigned long)pte_follow,
- (unsigned long)pte_follow +
- kvm_granule_size(level));
- }
+ if (need_flush && mm_ops->dcache_clean_inval_poc)
+ mm_ops->dcache_clean_inval_poc(kvm_pte_follow(pte, mm_ops),
+ kvm_granule_size(level));
if (childp)
mm_ops->put_page(childp);
@@ -1089,15 +1147,13 @@ static int stage2_flush_walker(u64 addr, u64 end, u32 level, kvm_pte_t *ptep,
struct kvm_pgtable *pgt = arg;
struct kvm_pgtable_mm_ops *mm_ops = pgt->mm_ops;
kvm_pte_t pte = *ptep;
- kvm_pte_t *pte_follow;
if (!kvm_pte_valid(pte) || !stage2_pte_cacheable(pgt, pte))
return 0;
- pte_follow = kvm_pte_follow(pte, mm_ops);
- dcache_clean_inval_poc((unsigned long)pte_follow,
- (unsigned long)pte_follow +
- kvm_granule_size(level));
+ if (mm_ops->dcache_clean_inval_poc)
+ mm_ops->dcache_clean_inval_poc(kvm_pte_follow(pte, mm_ops),
+ kvm_granule_size(level));
return 0;
}
@@ -1116,13 +1172,13 @@ int kvm_pgtable_stage2_flush(struct kvm_pgtable *pgt, u64 addr, u64 size)
}
-int __kvm_pgtable_stage2_init(struct kvm_pgtable *pgt, struct kvm_arch *arch,
+int __kvm_pgtable_stage2_init(struct kvm_pgtable *pgt, struct kvm_s2_mmu *mmu,
struct kvm_pgtable_mm_ops *mm_ops,
enum kvm_pgtable_stage2_flags flags,
kvm_pgtable_force_pte_cb_t force_pte_cb)
{
size_t pgd_sz;
- u64 vtcr = arch->vtcr;
+ u64 vtcr = mmu->arch->vtcr;
u32 ia_bits = VTCR_EL2_IPA(vtcr);
u32 sl0 = FIELD_GET(VTCR_EL2_SL0_MASK, vtcr);
u32 start_level = VTCR_EL2_TGRAN_SL0_BASE - sl0;
@@ -1135,7 +1191,7 @@ int __kvm_pgtable_stage2_init(struct kvm_pgtable *pgt, struct kvm_arch *arch,
pgt->ia_bits = ia_bits;
pgt->start_level = start_level;
pgt->mm_ops = mm_ops;
- pgt->mmu = &arch->mmu;
+ pgt->mmu = mmu;
pgt->flags = flags;
pgt->force_pte_cb = force_pte_cb;
diff --git a/arch/arm64/kvm/hyp/vgic-v3-sr.c b/arch/arm64/kvm/hyp/vgic-v3-sr.c
index 20db2f281cf2..6cb638b184b1 100644
--- a/arch/arm64/kvm/hyp/vgic-v3-sr.c
+++ b/arch/arm64/kvm/hyp/vgic-v3-sr.c
@@ -473,7 +473,7 @@ static int __vgic_v3_bpr_min(void)
static int __vgic_v3_get_group(struct kvm_vcpu *vcpu)
{
- u32 esr = kvm_vcpu_get_esr(vcpu);
+ u64 esr = kvm_vcpu_get_esr(vcpu);
u8 crm = (esr & ESR_ELx_SYS64_ISS_CRM_MASK) >> ESR_ELx_SYS64_ISS_CRM_SHIFT;
return crm != 8;
@@ -983,6 +983,9 @@ static void __vgic_v3_read_ctlr(struct kvm_vcpu *vcpu, u32 vmcr, int rt)
val = ((vtr >> 29) & 7) << ICC_CTLR_EL1_PRI_BITS_SHIFT;
/* IDbits */
val |= ((vtr >> 23) & 7) << ICC_CTLR_EL1_ID_BITS_SHIFT;
+ /* SEIS */
+ if (kvm_vgic_global_state.ich_vtr_el2 & ICH_VTR_SEIS_MASK)
+ val |= BIT(ICC_CTLR_EL1_SEIS_SHIFT);
/* A3V */
val |= ((vtr >> 21) & 1) << ICC_CTLR_EL1_A3V_SHIFT;
/* EOImode */
@@ -1013,7 +1016,7 @@ static void __vgic_v3_write_ctlr(struct kvm_vcpu *vcpu, u32 vmcr, int rt)
int __vgic_v3_perform_cpuif_access(struct kvm_vcpu *vcpu)
{
int rt;
- u32 esr;
+ u64 esr;
u32 vmcr;
void (*fn)(struct kvm_vcpu *, u32, int);
bool is_read;
diff --git a/arch/arm64/kvm/hyp/vhe/switch.c b/arch/arm64/kvm/hyp/vhe/switch.c
index fbb26b93c347..1a97391fedd2 100644
--- a/arch/arm64/kvm/hyp/vhe/switch.c
+++ b/arch/arm64/kvm/hyp/vhe/switch.c
@@ -10,6 +10,7 @@
#include <linux/kvm_host.h>
#include <linux/types.h>
#include <linux/jump_label.h>
+#include <linux/percpu.h>
#include <uapi/linux/psci.h>
#include <kvm/arm_psci.h>
@@ -25,6 +26,7 @@
#include <asm/debug-monitors.h>
#include <asm/processor.h>
#include <asm/thread_info.h>
+#include <asm/vectors.h>
/* VHE specific context */
DEFINE_PER_CPU(struct kvm_host_data, kvm_host_data);
@@ -39,7 +41,8 @@ static void __activate_traps(struct kvm_vcpu *vcpu)
val = read_sysreg(cpacr_el1);
val |= CPACR_EL1_TTA;
- val &= ~CPACR_EL1_ZEN;
+ val &= ~(CPACR_EL1_ZEN_EL0EN | CPACR_EL1_ZEN_EL1EN |
+ CPACR_EL1_SMEN_EL0EN | CPACR_EL1_SMEN_EL1EN);
/*
* With VHE (HCR.E2H == 1), accesses to CPACR_EL1 are routed to
@@ -52,11 +55,11 @@ static void __activate_traps(struct kvm_vcpu *vcpu)
val |= CPTR_EL2_TAM;
- if (update_fp_enabled(vcpu)) {
+ if (guest_owns_fp_regs(vcpu)) {
if (vcpu_has_sve(vcpu))
- val |= CPACR_EL1_ZEN;
+ val |= CPACR_EL1_ZEN_EL0EN | CPACR_EL1_ZEN_EL1EN;
} else {
- val &= ~CPACR_EL1_FPEN;
+ val &= ~(CPACR_EL1_FPEN_EL0EN | CPACR_EL1_FPEN_EL1EN);
__activate_traps_fpsimd32(vcpu);
}
@@ -68,7 +71,7 @@ NOKPROBE_SYMBOL(__activate_traps);
static void __deactivate_traps(struct kvm_vcpu *vcpu)
{
- extern char vectors[]; /* kernel exception vectors */
+ const char *host_vectors = vectors;
___deactivate_traps(vcpu);
@@ -82,7 +85,10 @@ static void __deactivate_traps(struct kvm_vcpu *vcpu)
asm(ALTERNATIVE("nop", "isb", ARM64_WORKAROUND_SPECULATIVE_AT));
write_sysreg(CPACR_EL1_DEFAULT, cpacr_el1);
- write_sysreg(vectors, vbar_el1);
+
+ if (!arm64_kernel_unmapped_at_el0())
+ host_vectors = __this_cpu_read(this_cpu_vector);
+ write_sysreg(host_vectors, vbar_el1);
}
NOKPROBE_SYMBOL(__deactivate_traps);
@@ -161,7 +167,7 @@ static int __kvm_vcpu_run_vhe(struct kvm_vcpu *vcpu)
sysreg_restore_host_state_vhe(host_ctxt);
- if (vcpu->arch.flags & KVM_ARM64_FP_ENABLED)
+ if (vcpu->arch.fp_state == FP_STATE_GUEST_OWNED)
__fpsimd_save_fpexc32(vcpu);
__debug_switch_to_host(vcpu);
@@ -235,5 +241,5 @@ void __noreturn hyp_panic(void)
asmlinkage void kvm_unexpected_el2_exception(void)
{
- return __kvm_unexpected_el2_exception();
+ __kvm_unexpected_el2_exception();
}
diff --git a/arch/arm64/kvm/hyp/vhe/sysreg-sr.c b/arch/arm64/kvm/hyp/vhe/sysreg-sr.c
index 007a12dd4351..7b44f6b3b547 100644
--- a/arch/arm64/kvm/hyp/vhe/sysreg-sr.c
+++ b/arch/arm64/kvm/hyp/vhe/sysreg-sr.c
@@ -79,7 +79,7 @@ void kvm_vcpu_load_sysregs_vhe(struct kvm_vcpu *vcpu)
__sysreg_restore_user_state(guest_ctxt);
__sysreg_restore_el1_state(guest_ctxt);
- vcpu->arch.sysregs_loaded_on_cpu = true;
+ vcpu_set_flag(vcpu, SYSREGS_ON_CPU);
activate_traps_vhe_load(vcpu);
}
@@ -110,5 +110,5 @@ void kvm_vcpu_put_sysregs_vhe(struct kvm_vcpu *vcpu)
/* Restore host user state */
__sysreg_restore_user_state(host_ctxt);
- vcpu->arch.sysregs_loaded_on_cpu = false;
+ vcpu_clear_flag(vcpu, SYSREGS_ON_CPU);
}
diff --git a/arch/arm64/kvm/hypercalls.c b/arch/arm64/kvm/hypercalls.c
index 30da78f72b3b..c9f401fa01a9 100644
--- a/arch/arm64/kvm/hypercalls.c
+++ b/arch/arm64/kvm/hypercalls.c
@@ -9,6 +9,13 @@
#include <kvm/arm_hypercalls.h>
#include <kvm/arm_psci.h>
+#define KVM_ARM_SMCCC_STD_FEATURES \
+ GENMASK(KVM_REG_ARM_STD_BMAP_BIT_COUNT - 1, 0)
+#define KVM_ARM_SMCCC_STD_HYP_FEATURES \
+ GENMASK(KVM_REG_ARM_STD_HYP_BMAP_BIT_COUNT - 1, 0)
+#define KVM_ARM_SMCCC_VENDOR_HYP_FEATURES \
+ GENMASK(KVM_REG_ARM_VENDOR_HYP_BMAP_BIT_COUNT - 1, 0)
+
static void kvm_ptp_get_time(struct kvm_vcpu *vcpu, u64 *val)
{
struct system_time_snapshot systime_snapshot;
@@ -58,13 +65,73 @@ static void kvm_ptp_get_time(struct kvm_vcpu *vcpu, u64 *val)
val[3] = lower_32_bits(cycles);
}
+static bool kvm_hvc_call_default_allowed(u32 func_id)
+{
+ switch (func_id) {
+ /*
+ * List of function-ids that are not gated with the bitmapped
+ * feature firmware registers, and are to be allowed for
+ * servicing the call by default.
+ */
+ case ARM_SMCCC_VERSION_FUNC_ID:
+ case ARM_SMCCC_ARCH_FEATURES_FUNC_ID:
+ return true;
+ default:
+ /* PSCI 0.2 and up is in the 0:0x1f range */
+ if (ARM_SMCCC_OWNER_NUM(func_id) == ARM_SMCCC_OWNER_STANDARD &&
+ ARM_SMCCC_FUNC_NUM(func_id) <= 0x1f)
+ return true;
+
+ /*
+ * KVM's PSCI 0.1 doesn't comply with SMCCC, and has
+ * its own function-id base and range
+ */
+ if (func_id >= KVM_PSCI_FN(0) && func_id <= KVM_PSCI_FN(3))
+ return true;
+
+ return false;
+ }
+}
+
+static bool kvm_hvc_call_allowed(struct kvm_vcpu *vcpu, u32 func_id)
+{
+ struct kvm_smccc_features *smccc_feat = &vcpu->kvm->arch.smccc_feat;
+
+ switch (func_id) {
+ case ARM_SMCCC_TRNG_VERSION:
+ case ARM_SMCCC_TRNG_FEATURES:
+ case ARM_SMCCC_TRNG_GET_UUID:
+ case ARM_SMCCC_TRNG_RND32:
+ case ARM_SMCCC_TRNG_RND64:
+ return test_bit(KVM_REG_ARM_STD_BIT_TRNG_V1_0,
+ &smccc_feat->std_bmap);
+ case ARM_SMCCC_HV_PV_TIME_FEATURES:
+ case ARM_SMCCC_HV_PV_TIME_ST:
+ return test_bit(KVM_REG_ARM_STD_HYP_BIT_PV_TIME,
+ &smccc_feat->std_hyp_bmap);
+ case ARM_SMCCC_VENDOR_HYP_KVM_FEATURES_FUNC_ID:
+ case ARM_SMCCC_VENDOR_HYP_CALL_UID_FUNC_ID:
+ return test_bit(KVM_REG_ARM_VENDOR_HYP_BIT_FUNC_FEAT,
+ &smccc_feat->vendor_hyp_bmap);
+ case ARM_SMCCC_VENDOR_HYP_KVM_PTP_FUNC_ID:
+ return test_bit(KVM_REG_ARM_VENDOR_HYP_BIT_PTP,
+ &smccc_feat->vendor_hyp_bmap);
+ default:
+ return kvm_hvc_call_default_allowed(func_id);
+ }
+}
+
int kvm_hvc_call_handler(struct kvm_vcpu *vcpu)
{
+ struct kvm_smccc_features *smccc_feat = &vcpu->kvm->arch.smccc_feat;
u32 func_id = smccc_get_function(vcpu);
u64 val[4] = {SMCCC_RET_NOT_SUPPORTED};
u32 feature;
gpa_t gpa;
+ if (!kvm_hvc_call_allowed(vcpu, func_id))
+ goto out;
+
switch (func_id) {
case ARM_SMCCC_VERSION_FUNC_ID:
val[0] = ARM_SMCCC_VERSION_1_1;
@@ -107,8 +174,22 @@ int kvm_hvc_call_handler(struct kvm_vcpu *vcpu)
break;
}
break;
+ case ARM_SMCCC_ARCH_WORKAROUND_3:
+ switch (arm64_get_spectre_bhb_state()) {
+ case SPECTRE_VULNERABLE:
+ break;
+ case SPECTRE_MITIGATED:
+ val[0] = SMCCC_RET_SUCCESS;
+ break;
+ case SPECTRE_UNAFFECTED:
+ val[0] = SMCCC_ARCH_WORKAROUND_RET_UNAFFECTED;
+ break;
+ }
+ break;
case ARM_SMCCC_HV_PV_TIME_FEATURES:
- val[0] = SMCCC_RET_SUCCESS;
+ if (test_bit(KVM_REG_ARM_STD_HYP_BIT_PV_TIME,
+ &smccc_feat->std_hyp_bmap))
+ val[0] = SMCCC_RET_SUCCESS;
break;
}
break;
@@ -127,8 +208,7 @@ int kvm_hvc_call_handler(struct kvm_vcpu *vcpu)
val[3] = ARM_SMCCC_VENDOR_HYP_UID_KVM_REG_3;
break;
case ARM_SMCCC_VENDOR_HYP_KVM_FEATURES_FUNC_ID:
- val[0] = BIT(ARM_SMCCC_KVM_FUNC_FEATURES);
- val[0] |= BIT(ARM_SMCCC_KVM_FUNC_PTP);
+ val[0] = smccc_feat->vendor_hyp_bmap;
break;
case ARM_SMCCC_VENDOR_HYP_KVM_PTP_FUNC_ID:
kvm_ptp_get_time(vcpu, val);
@@ -143,6 +223,259 @@ int kvm_hvc_call_handler(struct kvm_vcpu *vcpu)
return kvm_psci_call(vcpu);
}
+out:
smccc_set_retval(vcpu, val[0], val[1], val[2], val[3]);
return 1;
}
+
+static const u64 kvm_arm_fw_reg_ids[] = {
+ KVM_REG_ARM_PSCI_VERSION,
+ KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_1,
+ KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_2,
+ KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_3,
+ KVM_REG_ARM_STD_BMAP,
+ KVM_REG_ARM_STD_HYP_BMAP,
+ KVM_REG_ARM_VENDOR_HYP_BMAP,
+};
+
+void kvm_arm_init_hypercalls(struct kvm *kvm)
+{
+ struct kvm_smccc_features *smccc_feat = &kvm->arch.smccc_feat;
+
+ smccc_feat->std_bmap = KVM_ARM_SMCCC_STD_FEATURES;
+ smccc_feat->std_hyp_bmap = KVM_ARM_SMCCC_STD_HYP_FEATURES;
+ smccc_feat->vendor_hyp_bmap = KVM_ARM_SMCCC_VENDOR_HYP_FEATURES;
+}
+
+int kvm_arm_get_fw_num_regs(struct kvm_vcpu *vcpu)
+{
+ return ARRAY_SIZE(kvm_arm_fw_reg_ids);
+}
+
+int kvm_arm_copy_fw_reg_indices(struct kvm_vcpu *vcpu, u64 __user *uindices)
+{
+ int i;
+
+ for (i = 0; i < ARRAY_SIZE(kvm_arm_fw_reg_ids); i++) {
+ if (put_user(kvm_arm_fw_reg_ids[i], uindices++))
+ return -EFAULT;
+ }
+
+ return 0;
+}
+
+#define KVM_REG_FEATURE_LEVEL_MASK GENMASK(3, 0)
+
+/*
+ * Convert the workaround level into an easy-to-compare number, where higher
+ * values mean better protection.
+ */
+static int get_kernel_wa_level(u64 regid)
+{
+ switch (regid) {
+ case KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_1:
+ switch (arm64_get_spectre_v2_state()) {
+ case SPECTRE_VULNERABLE:
+ return KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_1_NOT_AVAIL;
+ case SPECTRE_MITIGATED:
+ return KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_1_AVAIL;
+ case SPECTRE_UNAFFECTED:
+ return KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_1_NOT_REQUIRED;
+ }
+ return KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_1_NOT_AVAIL;
+ case KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_2:
+ switch (arm64_get_spectre_v4_state()) {
+ case SPECTRE_MITIGATED:
+ /*
+ * As for the hypercall discovery, we pretend we
+ * don't have any FW mitigation if SSBS is there at
+ * all times.
+ */
+ if (cpus_have_final_cap(ARM64_SSBS))
+ return KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_2_NOT_AVAIL;
+ fallthrough;
+ case SPECTRE_UNAFFECTED:
+ return KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_2_NOT_REQUIRED;
+ case SPECTRE_VULNERABLE:
+ return KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_2_NOT_AVAIL;
+ }
+ break;
+ case KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_3:
+ switch (arm64_get_spectre_bhb_state()) {
+ case SPECTRE_VULNERABLE:
+ return KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_3_NOT_AVAIL;
+ case SPECTRE_MITIGATED:
+ return KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_3_AVAIL;
+ case SPECTRE_UNAFFECTED:
+ return KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_3_NOT_REQUIRED;
+ }
+ return KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_3_NOT_AVAIL;
+ }
+
+ return -EINVAL;
+}
+
+int kvm_arm_get_fw_reg(struct kvm_vcpu *vcpu, const struct kvm_one_reg *reg)
+{
+ struct kvm_smccc_features *smccc_feat = &vcpu->kvm->arch.smccc_feat;
+ void __user *uaddr = (void __user *)(long)reg->addr;
+ u64 val;
+
+ switch (reg->id) {
+ case KVM_REG_ARM_PSCI_VERSION:
+ val = kvm_psci_version(vcpu);
+ break;
+ case KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_1:
+ case KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_2:
+ case KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_3:
+ val = get_kernel_wa_level(reg->id) & KVM_REG_FEATURE_LEVEL_MASK;
+ break;
+ case KVM_REG_ARM_STD_BMAP:
+ val = READ_ONCE(smccc_feat->std_bmap);
+ break;
+ case KVM_REG_ARM_STD_HYP_BMAP:
+ val = READ_ONCE(smccc_feat->std_hyp_bmap);
+ break;
+ case KVM_REG_ARM_VENDOR_HYP_BMAP:
+ val = READ_ONCE(smccc_feat->vendor_hyp_bmap);
+ break;
+ default:
+ return -ENOENT;
+ }
+
+ if (copy_to_user(uaddr, &val, KVM_REG_SIZE(reg->id)))
+ return -EFAULT;
+
+ return 0;
+}
+
+static int kvm_arm_set_fw_reg_bmap(struct kvm_vcpu *vcpu, u64 reg_id, u64 val)
+{
+ int ret = 0;
+ struct kvm *kvm = vcpu->kvm;
+ struct kvm_smccc_features *smccc_feat = &kvm->arch.smccc_feat;
+ unsigned long *fw_reg_bmap, fw_reg_features;
+
+ switch (reg_id) {
+ case KVM_REG_ARM_STD_BMAP:
+ fw_reg_bmap = &smccc_feat->std_bmap;
+ fw_reg_features = KVM_ARM_SMCCC_STD_FEATURES;
+ break;
+ case KVM_REG_ARM_STD_HYP_BMAP:
+ fw_reg_bmap = &smccc_feat->std_hyp_bmap;
+ fw_reg_features = KVM_ARM_SMCCC_STD_HYP_FEATURES;
+ break;
+ case KVM_REG_ARM_VENDOR_HYP_BMAP:
+ fw_reg_bmap = &smccc_feat->vendor_hyp_bmap;
+ fw_reg_features = KVM_ARM_SMCCC_VENDOR_HYP_FEATURES;
+ break;
+ default:
+ return -ENOENT;
+ }
+
+ /* Check for unsupported bit */
+ if (val & ~fw_reg_features)
+ return -EINVAL;
+
+ mutex_lock(&kvm->lock);
+
+ if (test_bit(KVM_ARCH_FLAG_HAS_RAN_ONCE, &kvm->arch.flags) &&
+ val != *fw_reg_bmap) {
+ ret = -EBUSY;
+ goto out;
+ }
+
+ WRITE_ONCE(*fw_reg_bmap, val);
+out:
+ mutex_unlock(&kvm->lock);
+ return ret;
+}
+
+int kvm_arm_set_fw_reg(struct kvm_vcpu *vcpu, const struct kvm_one_reg *reg)
+{
+ void __user *uaddr = (void __user *)(long)reg->addr;
+ u64 val;
+ int wa_level;
+
+ if (copy_from_user(&val, uaddr, KVM_REG_SIZE(reg->id)))
+ return -EFAULT;
+
+ switch (reg->id) {
+ case KVM_REG_ARM_PSCI_VERSION:
+ {
+ bool wants_02;
+
+ wants_02 = test_bit(KVM_ARM_VCPU_PSCI_0_2, vcpu->arch.features);
+
+ switch (val) {
+ case KVM_ARM_PSCI_0_1:
+ if (wants_02)
+ return -EINVAL;
+ vcpu->kvm->arch.psci_version = val;
+ return 0;
+ case KVM_ARM_PSCI_0_2:
+ case KVM_ARM_PSCI_1_0:
+ case KVM_ARM_PSCI_1_1:
+ if (!wants_02)
+ return -EINVAL;
+ vcpu->kvm->arch.psci_version = val;
+ return 0;
+ }
+ break;
+ }
+
+ case KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_1:
+ case KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_3:
+ if (val & ~KVM_REG_FEATURE_LEVEL_MASK)
+ return -EINVAL;
+
+ if (get_kernel_wa_level(reg->id) < val)
+ return -EINVAL;
+
+ return 0;
+
+ case KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_2:
+ if (val & ~(KVM_REG_FEATURE_LEVEL_MASK |
+ KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_2_ENABLED))
+ return -EINVAL;
+
+ /* The enabled bit must not be set unless the level is AVAIL. */
+ if ((val & KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_2_ENABLED) &&
+ (val & KVM_REG_FEATURE_LEVEL_MASK) != KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_2_AVAIL)
+ return -EINVAL;
+
+ /*
+ * Map all the possible incoming states to the only two we
+ * really want to deal with.
+ */
+ switch (val & KVM_REG_FEATURE_LEVEL_MASK) {
+ case KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_2_NOT_AVAIL:
+ case KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_2_UNKNOWN:
+ wa_level = KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_2_NOT_AVAIL;
+ break;
+ case KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_2_AVAIL:
+ case KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_2_NOT_REQUIRED:
+ wa_level = KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_2_NOT_REQUIRED;
+ break;
+ default:
+ return -EINVAL;
+ }
+
+ /*
+ * We can deal with NOT_AVAIL on NOT_REQUIRED, but not the
+ * other way around.
+ */
+ if (get_kernel_wa_level(reg->id) < wa_level)
+ return -EINVAL;
+
+ return 0;
+ case KVM_REG_ARM_STD_BMAP:
+ case KVM_REG_ARM_STD_HYP_BMAP:
+ case KVM_REG_ARM_VENDOR_HYP_BMAP:
+ return kvm_arm_set_fw_reg_bmap(vcpu, reg->id, val);
+ default:
+ return -ENOENT;
+ }
+
+ return -EINVAL;
+}
diff --git a/arch/arm64/kvm/inject_fault.c b/arch/arm64/kvm/inject_fault.c
index b47df73e98d7..f32f4a2a347f 100644
--- a/arch/arm64/kvm/inject_fault.c
+++ b/arch/arm64/kvm/inject_fault.c
@@ -18,11 +18,9 @@ static void inject_abt64(struct kvm_vcpu *vcpu, bool is_iabt, unsigned long addr
{
unsigned long cpsr = *vcpu_cpsr(vcpu);
bool is_aarch32 = vcpu_mode_is_32bit(vcpu);
- u32 esr = 0;
+ u64 esr = 0;
- vcpu->arch.flags |= (KVM_ARM64_EXCEPT_AA64_EL1 |
- KVM_ARM64_EXCEPT_AA64_ELx_SYNC |
- KVM_ARM64_PENDING_EXCEPTION);
+ kvm_pend_exception(vcpu, EXCEPT_AA64_EL1_SYNC);
vcpu_write_sys_reg(vcpu, addr, FAR_EL1);
@@ -50,11 +48,9 @@ static void inject_abt64(struct kvm_vcpu *vcpu, bool is_iabt, unsigned long addr
static void inject_undef64(struct kvm_vcpu *vcpu)
{
- u32 esr = (ESR_ELx_EC_UNKNOWN << ESR_ELx_EC_SHIFT);
+ u64 esr = (ESR_ELx_EC_UNKNOWN << ESR_ELx_EC_SHIFT);
- vcpu->arch.flags |= (KVM_ARM64_EXCEPT_AA64_EL1 |
- KVM_ARM64_EXCEPT_AA64_ELx_SYNC |
- KVM_ARM64_PENDING_EXCEPTION);
+ kvm_pend_exception(vcpu, EXCEPT_AA64_EL1_SYNC);
/*
* Build an unknown exception, depending on the instruction
@@ -73,8 +69,7 @@ static void inject_undef64(struct kvm_vcpu *vcpu)
static void inject_undef32(struct kvm_vcpu *vcpu)
{
- vcpu->arch.flags |= (KVM_ARM64_EXCEPT_AA32_UND |
- KVM_ARM64_PENDING_EXCEPTION);
+ kvm_pend_exception(vcpu, EXCEPT_AA32_UND);
}
/*
@@ -97,14 +92,12 @@ static void inject_abt32(struct kvm_vcpu *vcpu, bool is_pabt, u32 addr)
far = vcpu_read_sys_reg(vcpu, FAR_EL1);
if (is_pabt) {
- vcpu->arch.flags |= (KVM_ARM64_EXCEPT_AA32_IABT |
- KVM_ARM64_PENDING_EXCEPTION);
+ kvm_pend_exception(vcpu, EXCEPT_AA32_IABT);
far &= GENMASK(31, 0);
far |= (u64)addr << 32;
vcpu_write_sys_reg(vcpu, fsr, IFSR32_EL2);
} else { /* !iabt */
- vcpu->arch.flags |= (KVM_ARM64_EXCEPT_AA32_DABT |
- KVM_ARM64_PENDING_EXCEPTION);
+ kvm_pend_exception(vcpu, EXCEPT_AA32_DABT);
far &= GENMASK(63, 32);
far |= addr;
vcpu_write_sys_reg(vcpu, fsr, ESR_EL1);
@@ -145,6 +138,34 @@ void kvm_inject_pabt(struct kvm_vcpu *vcpu, unsigned long addr)
inject_abt64(vcpu, true, addr);
}
+void kvm_inject_size_fault(struct kvm_vcpu *vcpu)
+{
+ unsigned long addr, esr;
+
+ addr = kvm_vcpu_get_fault_ipa(vcpu);
+ addr |= kvm_vcpu_get_hfar(vcpu) & GENMASK(11, 0);
+
+ if (kvm_vcpu_trap_is_iabt(vcpu))
+ kvm_inject_pabt(vcpu, addr);
+ else
+ kvm_inject_dabt(vcpu, addr);
+
+ /*
+ * If AArch64 or LPAE, set FSC to 0 to indicate an Address
+ * Size Fault at level 0, as if exceeding PARange.
+ *
+ * Non-LPAE guests will only get the external abort, as there
+ * is no way to to describe the ASF.
+ */
+ if (vcpu_el1_is_32bit(vcpu) &&
+ !(vcpu_read_sys_reg(vcpu, TCR_EL1) & TTBCR_EAE))
+ return;
+
+ esr = vcpu_read_sys_reg(vcpu, ESR_EL1);
+ esr &= ~GENMASK_ULL(5, 0);
+ vcpu_write_sys_reg(vcpu, esr, ESR_EL1);
+}
+
/**
* kvm_inject_undefined - inject an undefined instruction into the guest
* @vcpu: The vCPU in which to inject the exception
diff --git a/arch/arm64/kvm/mmio.c b/arch/arm64/kvm/mmio.c
index 3e2d8ba11a02..3dd38a151d2a 100644
--- a/arch/arm64/kvm/mmio.c
+++ b/arch/arm64/kvm/mmio.c
@@ -135,7 +135,8 @@ int io_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa)
* volunteered to do so, and bail out otherwise.
*/
if (!kvm_vcpu_dabt_isvalid(vcpu)) {
- if (vcpu->kvm->arch.return_nisv_io_abort_to_user) {
+ if (test_bit(KVM_ARCH_FLAG_RETURN_NISV_IO_ABORT_TO_USER,
+ &vcpu->kvm->arch.flags)) {
run->exit_reason = KVM_EXIT_ARM_NISV;
run->arm_nisv.esr_iss = kvm_vcpu_dabt_iss_nisv_sanitized(vcpu);
run->arm_nisv.fault_ipa = fault_ipa;
diff --git a/arch/arm64/kvm/mmu.c b/arch/arm64/kvm/mmu.c
index 326cdfec74a1..60ee3d9f01f8 100644
--- a/arch/arm64/kvm/mmu.c
+++ b/arch/arm64/kvm/mmu.c
@@ -31,6 +31,13 @@ static phys_addr_t hyp_idmap_vector;
static unsigned long io_map_base;
+static phys_addr_t stage2_range_addr_end(phys_addr_t addr, phys_addr_t end)
+{
+ phys_addr_t size = kvm_granule_size(KVM_PGTABLE_MIN_BLOCK_LEVEL);
+ phys_addr_t boundary = ALIGN_DOWN(addr + size, size);
+
+ return (boundary - 1 < end - 1) ? boundary : end;
+}
/*
* Release kvm_mmu_lock periodically if the memory region is large. Otherwise,
@@ -52,13 +59,13 @@ static int stage2_apply_range(struct kvm *kvm, phys_addr_t addr,
if (!pgt)
return -EINVAL;
- next = stage2_pgd_addr_end(kvm, addr, end);
+ next = stage2_range_addr_end(addr, end);
ret = fn(pgt, addr, next - addr);
if (ret)
break;
if (resched && next != end)
- cond_resched_lock(&kvm->mmu_lock);
+ cond_resched_rwlock_write(&kvm->mmu_lock);
} while (addr = next, addr != end);
return ret;
@@ -92,9 +99,13 @@ static bool kvm_is_device_pfn(unsigned long pfn)
static void *stage2_memcache_zalloc_page(void *arg)
{
struct kvm_mmu_memory_cache *mc = arg;
+ void *virt;
/* Allocated with __GFP_ZERO, so no need to zero */
- return kvm_mmu_memory_cache_alloc(mc);
+ virt = kvm_mmu_memory_cache_alloc(mc);
+ if (virt)
+ kvm_account_pgtable_pages(virt, 1);
+ return virt;
}
static void *kvm_host_zalloc_pages_exact(size_t size)
@@ -102,6 +113,21 @@ static void *kvm_host_zalloc_pages_exact(size_t size)
return alloc_pages_exact(size, GFP_KERNEL_ACCOUNT | __GFP_ZERO);
}
+static void *kvm_s2_zalloc_pages_exact(size_t size)
+{
+ void *virt = kvm_host_zalloc_pages_exact(size);
+
+ if (virt)
+ kvm_account_pgtable_pages(virt, (size >> PAGE_SHIFT));
+ return virt;
+}
+
+static void kvm_s2_free_pages_exact(void *virt, size_t size)
+{
+ kvm_account_pgtable_pages(virt, -(size >> PAGE_SHIFT));
+ free_pages_exact(virt, size);
+}
+
static void kvm_host_get_page(void *addr)
{
get_page(virt_to_page(addr));
@@ -112,6 +138,15 @@ static void kvm_host_put_page(void *addr)
put_page(virt_to_page(addr));
}
+static void kvm_s2_put_page(void *addr)
+{
+ struct page *p = virt_to_page(addr);
+ /* Dropping last refcount, the page will be freed */
+ if (page_count(p) == 1)
+ kvm_account_pgtable_pages(addr, -1);
+ put_page(p);
+}
+
static int kvm_host_page_count(void *addr)
{
return page_count(virt_to_page(addr));
@@ -179,7 +214,7 @@ static void __unmap_stage2_range(struct kvm_s2_mmu *mmu, phys_addr_t start, u64
struct kvm *kvm = kvm_s2_mmu_to_kvm(mmu);
phys_addr_t end = start + size;
- assert_spin_locked(&kvm->mmu_lock);
+ lockdep_assert_held_write(&kvm->mmu_lock);
WARN_ON(size & ~PAGE_MASK);
WARN_ON(stage2_apply_range(kvm, start, end, kvm_pgtable_stage2_unmap,
may_block));
@@ -210,16 +245,16 @@ static void stage2_flush_vm(struct kvm *kvm)
{
struct kvm_memslots *slots;
struct kvm_memory_slot *memslot;
- int idx;
+ int idx, bkt;
idx = srcu_read_lock(&kvm->srcu);
- spin_lock(&kvm->mmu_lock);
+ write_lock(&kvm->mmu_lock);
slots = kvm_memslots(kvm);
- kvm_for_each_memslot(memslot, slots)
+ kvm_for_each_memslot(memslot, bkt, slots)
stage2_flush_memslot(kvm, memslot);
- spin_unlock(&kvm->mmu_lock);
+ write_unlock(&kvm->mmu_lock);
srcu_read_unlock(&kvm->srcu, idx);
}
@@ -239,6 +274,9 @@ void free_hyp_pgds(void)
static bool kvm_host_owns_hyp_mappings(void)
{
+ if (is_kernel_in_hyp_mode())
+ return false;
+
if (static_branch_likely(&kvm_protected_mode_initialized))
return false;
@@ -255,8 +293,8 @@ static bool kvm_host_owns_hyp_mappings(void)
return true;
}
-static int __create_hyp_mappings(unsigned long start, unsigned long size,
- unsigned long phys, enum kvm_pgtable_prot prot)
+int __create_hyp_mappings(unsigned long start, unsigned long size,
+ unsigned long phys, enum kvm_pgtable_prot prot)
{
int err;
@@ -281,14 +319,117 @@ static phys_addr_t kvm_kaddr_to_phys(void *kaddr)
}
}
-static int pkvm_share_hyp(phys_addr_t start, phys_addr_t end)
+struct hyp_shared_pfn {
+ u64 pfn;
+ int count;
+ struct rb_node node;
+};
+
+static DEFINE_MUTEX(hyp_shared_pfns_lock);
+static struct rb_root hyp_shared_pfns = RB_ROOT;
+
+static struct hyp_shared_pfn *find_shared_pfn(u64 pfn, struct rb_node ***node,
+ struct rb_node **parent)
{
- phys_addr_t addr;
+ struct hyp_shared_pfn *this;
+
+ *node = &hyp_shared_pfns.rb_node;
+ *parent = NULL;
+ while (**node) {
+ this = container_of(**node, struct hyp_shared_pfn, node);
+ *parent = **node;
+ if (this->pfn < pfn)
+ *node = &((**node)->rb_left);
+ else if (this->pfn > pfn)
+ *node = &((**node)->rb_right);
+ else
+ return this;
+ }
+
+ return NULL;
+}
+
+static int share_pfn_hyp(u64 pfn)
+{
+ struct rb_node **node, *parent;
+ struct hyp_shared_pfn *this;
+ int ret = 0;
+
+ mutex_lock(&hyp_shared_pfns_lock);
+ this = find_shared_pfn(pfn, &node, &parent);
+ if (this) {
+ this->count++;
+ goto unlock;
+ }
+
+ this = kzalloc(sizeof(*this), GFP_KERNEL);
+ if (!this) {
+ ret = -ENOMEM;
+ goto unlock;
+ }
+
+ this->pfn = pfn;
+ this->count = 1;
+ rb_link_node(&this->node, parent, node);
+ rb_insert_color(&this->node, &hyp_shared_pfns);
+ ret = kvm_call_hyp_nvhe(__pkvm_host_share_hyp, pfn, 1);
+unlock:
+ mutex_unlock(&hyp_shared_pfns_lock);
+
+ return ret;
+}
+
+static int unshare_pfn_hyp(u64 pfn)
+{
+ struct rb_node **node, *parent;
+ struct hyp_shared_pfn *this;
+ int ret = 0;
+
+ mutex_lock(&hyp_shared_pfns_lock);
+ this = find_shared_pfn(pfn, &node, &parent);
+ if (WARN_ON(!this)) {
+ ret = -ENOENT;
+ goto unlock;
+ }
+
+ this->count--;
+ if (this->count)
+ goto unlock;
+
+ rb_erase(&this->node, &hyp_shared_pfns);
+ kfree(this);
+ ret = kvm_call_hyp_nvhe(__pkvm_host_unshare_hyp, pfn, 1);
+unlock:
+ mutex_unlock(&hyp_shared_pfns_lock);
+
+ return ret;
+}
+
+int kvm_share_hyp(void *from, void *to)
+{
+ phys_addr_t start, end, cur;
+ u64 pfn;
int ret;
- for (addr = ALIGN_DOWN(start, PAGE_SIZE); addr < end; addr += PAGE_SIZE) {
- ret = kvm_call_hyp_nvhe(__pkvm_host_share_hyp,
- __phys_to_pfn(addr));
+ if (is_kernel_in_hyp_mode())
+ return 0;
+
+ /*
+ * The share hcall maps things in the 'fixed-offset' region of the hyp
+ * VA space, so we can only share physically contiguous data-structures
+ * for now.
+ */
+ if (is_vmalloc_or_module_addr(from) || is_vmalloc_or_module_addr(to))
+ return -EINVAL;
+
+ if (kvm_host_owns_hyp_mappings())
+ return create_hyp_mappings(from, to, PAGE_HYP);
+
+ start = ALIGN_DOWN(__pa(from), PAGE_SIZE);
+ end = PAGE_ALIGN(__pa(to));
+ for (cur = start; cur < end; cur += PAGE_SIZE) {
+ pfn = __phys_to_pfn(cur);
+ ret = share_pfn_hyp(pfn);
if (ret)
return ret;
}
@@ -296,6 +437,22 @@ static int pkvm_share_hyp(phys_addr_t start, phys_addr_t end)
return 0;
}
+void kvm_unshare_hyp(void *from, void *to)
+{
+ phys_addr_t start, end, cur;
+ u64 pfn;
+
+ if (is_kernel_in_hyp_mode() || kvm_host_owns_hyp_mappings() || !from)
+ return;
+
+ start = ALIGN_DOWN(__pa(from), PAGE_SIZE);
+ end = PAGE_ALIGN(__pa(to));
+ for (cur = start; cur < end; cur += PAGE_SIZE) {
+ pfn = __phys_to_pfn(cur);
+ WARN_ON(unshare_pfn_hyp(pfn));
+ }
+}
+
/**
* create_hyp_mappings - duplicate a kernel virtual address range in Hyp mode
* @from: The virtual kernel start address of the range
@@ -316,12 +473,8 @@ int create_hyp_mappings(void *from, void *to, enum kvm_pgtable_prot prot)
if (is_kernel_in_hyp_mode())
return 0;
- if (!kvm_host_owns_hyp_mappings()) {
- if (WARN_ON(prot != PAGE_HYP))
- return -EPERM;
- return pkvm_share_hyp(kvm_kaddr_to_phys(from),
- kvm_kaddr_to_phys(to));
- }
+ if (!kvm_host_owns_hyp_mappings())
+ return -EPERM;
start = start & PAGE_MASK;
end = PAGE_ALIGN(end);
@@ -339,23 +492,22 @@ int create_hyp_mappings(void *from, void *to, enum kvm_pgtable_prot prot)
return 0;
}
-static int __create_hyp_private_mapping(phys_addr_t phys_addr, size_t size,
- unsigned long *haddr,
- enum kvm_pgtable_prot prot)
+
+/**
+ * hyp_alloc_private_va_range - Allocates a private VA range.
+ * @size: The size of the VA range to reserve.
+ * @haddr: The hypervisor virtual start address of the allocation.
+ *
+ * The private virtual address (VA) range is allocated below io_map_base
+ * and aligned based on the order of @size.
+ *
+ * Return: 0 on success or negative error code on failure.
+ */
+int hyp_alloc_private_va_range(size_t size, unsigned long *haddr)
{
unsigned long base;
int ret = 0;
- if (!kvm_host_owns_hyp_mappings()) {
- base = kvm_call_hyp_nvhe(__pkvm_create_private_mapping,
- phys_addr, size, prot);
- if (IS_ERR_OR_NULL((void *)base))
- return PTR_ERR((void *)base);
- *haddr = base;
-
- return 0;
- }
-
mutex_lock(&kvm_hyp_pgd_mutex);
/*
@@ -366,8 +518,10 @@ static int __create_hyp_private_mapping(phys_addr_t phys_addr, size_t size,
*
* The allocated size is always a multiple of PAGE_SIZE.
*/
- size = PAGE_ALIGN(size + offset_in_page(phys_addr));
- base = io_map_base - size;
+ base = io_map_base - PAGE_ALIGN(size);
+
+ /* Align the allocation based on the order of its size */
+ base = ALIGN_DOWN(base, PAGE_SIZE << get_order(size));
/*
* Verify that BIT(VA_BITS - 1) hasn't been flipped by
@@ -377,19 +531,40 @@ static int __create_hyp_private_mapping(phys_addr_t phys_addr, size_t size,
if ((base ^ io_map_base) & BIT(VA_BITS - 1))
ret = -ENOMEM;
else
- io_map_base = base;
+ *haddr = io_map_base = base;
mutex_unlock(&kvm_hyp_pgd_mutex);
+ return ret;
+}
+
+static int __create_hyp_private_mapping(phys_addr_t phys_addr, size_t size,
+ unsigned long *haddr,
+ enum kvm_pgtable_prot prot)
+{
+ unsigned long addr;
+ int ret = 0;
+
+ if (!kvm_host_owns_hyp_mappings()) {
+ addr = kvm_call_hyp_nvhe(__pkvm_create_private_mapping,
+ phys_addr, size, prot);
+ if (IS_ERR_VALUE(addr))
+ return addr;
+ *haddr = addr;
+
+ return 0;
+ }
+
+ size = PAGE_ALIGN(size + offset_in_page(phys_addr));
+ ret = hyp_alloc_private_va_range(size, &addr);
if (ret)
- goto out;
+ return ret;
- ret = __create_hyp_mappings(base, size, phys_addr, prot);
+ ret = __create_hyp_mappings(addr, size, phys_addr, prot);
if (ret)
- goto out;
+ return ret;
- *haddr = base + offset_in_page(phys_addr);
-out:
+ *haddr = addr + offset_in_page(phys_addr);
return ret;
}
@@ -407,6 +582,9 @@ int create_hyp_io_mappings(phys_addr_t phys_addr, size_t size,
unsigned long addr;
int ret;
+ if (is_protected_kvm_enabled())
+ return -EPERM;
+
*kaddr = ioremap(phys_addr, size);
if (!*kaddr)
return -ENOMEM;
@@ -482,10 +660,10 @@ static int get_user_mapping_size(struct kvm *kvm, u64 addr)
static struct kvm_pgtable_mm_ops kvm_s2_mm_ops = {
.zalloc_page = stage2_memcache_zalloc_page,
- .zalloc_pages_exact = kvm_host_zalloc_pages_exact,
- .free_pages_exact = free_pages_exact,
+ .zalloc_pages_exact = kvm_s2_zalloc_pages_exact,
+ .free_pages_exact = kvm_s2_free_pages_exact,
.get_page = kvm_host_get_page,
- .put_page = kvm_host_put_page,
+ .put_page = kvm_s2_put_page,
.page_count = kvm_host_page_count,
.phys_to_virt = kvm_host_va,
.virt_to_phys = kvm_host_pa,
@@ -494,7 +672,7 @@ static struct kvm_pgtable_mm_ops kvm_s2_mm_ops = {
};
/**
- * kvm_init_stage2_mmu - Initialise a S2 MMU strucrure
+ * kvm_init_stage2_mmu - Initialise a S2 MMU structure
* @kvm: The pointer to the KVM structure
* @mmu: The pointer to the s2 MMU structure
*
@@ -516,7 +694,8 @@ int kvm_init_stage2_mmu(struct kvm *kvm, struct kvm_s2_mmu *mmu)
if (!pgt)
return -ENOMEM;
- err = kvm_pgtable_stage2_init(pgt, &kvm->arch, &kvm_s2_mm_ops);
+ mmu->arch = &kvm->arch;
+ err = kvm_pgtable_stage2_init(pgt, mmu, &kvm_s2_mm_ops);
if (err)
goto out_free_pgtable;
@@ -529,10 +708,8 @@ int kvm_init_stage2_mmu(struct kvm *kvm, struct kvm_s2_mmu *mmu)
for_each_possible_cpu(cpu)
*per_cpu_ptr(mmu->last_vcpu_ran, cpu) = -1;
- mmu->arch = &kvm->arch;
mmu->pgt = pgt;
mmu->pgd_phys = __pa(pgt->pgd);
- WRITE_ONCE(mmu->vmid.vmid_gen, 0);
return 0;
out_destroy_pgtable:
@@ -595,17 +772,17 @@ void stage2_unmap_vm(struct kvm *kvm)
{
struct kvm_memslots *slots;
struct kvm_memory_slot *memslot;
- int idx;
+ int idx, bkt;
idx = srcu_read_lock(&kvm->srcu);
mmap_read_lock(current->mm);
- spin_lock(&kvm->mmu_lock);
+ write_lock(&kvm->mmu_lock);
slots = kvm_memslots(kvm);
- kvm_for_each_memslot(memslot, slots)
+ kvm_for_each_memslot(memslot, bkt, slots)
stage2_unmap_memslot(kvm, memslot);
- spin_unlock(&kvm->mmu_lock);
+ write_unlock(&kvm->mmu_lock);
mmap_read_unlock(current->mm);
srcu_read_unlock(&kvm->srcu, idx);
}
@@ -615,14 +792,14 @@ void kvm_free_stage2_pgd(struct kvm_s2_mmu *mmu)
struct kvm *kvm = kvm_s2_mmu_to_kvm(mmu);
struct kvm_pgtable *pgt = NULL;
- spin_lock(&kvm->mmu_lock);
+ write_lock(&kvm->mmu_lock);
pgt = mmu->pgt;
if (pgt) {
mmu->pgd_phys = 0;
mmu->pgt = NULL;
free_percpu(mmu->last_vcpu_ran);
}
- spin_unlock(&kvm->mmu_lock);
+ write_unlock(&kvm->mmu_lock);
if (pgt) {
kvm_pgtable_stage2_destroy(pgt);
@@ -644,12 +821,15 @@ int kvm_phys_addr_ioremap(struct kvm *kvm, phys_addr_t guest_ipa,
{
phys_addr_t addr;
int ret = 0;
- struct kvm_mmu_memory_cache cache = { 0, __GFP_ZERO, NULL, };
+ struct kvm_mmu_memory_cache cache = { .gfp_zero = __GFP_ZERO };
struct kvm_pgtable *pgt = kvm->arch.mmu.pgt;
enum kvm_pgtable_prot prot = KVM_PGTABLE_PROT_DEVICE |
KVM_PGTABLE_PROT_R |
(writable ? KVM_PGTABLE_PROT_W : 0);
+ if (is_protected_kvm_enabled())
+ return -EPERM;
+
size += offset_in_page(guest_ipa);
guest_ipa &= PAGE_MASK;
@@ -659,10 +839,10 @@ int kvm_phys_addr_ioremap(struct kvm *kvm, phys_addr_t guest_ipa,
if (ret)
break;
- spin_lock(&kvm->mmu_lock);
+ write_lock(&kvm->mmu_lock);
ret = kvm_pgtable_stage2_map(pgt, addr, PAGE_SIZE, pa, prot,
&cache);
- spin_unlock(&kvm->mmu_lock);
+ write_unlock(&kvm->mmu_lock);
if (ret)
break;
@@ -710,9 +890,9 @@ static void kvm_mmu_wp_memory_region(struct kvm *kvm, int slot)
start = memslot->base_gfn << PAGE_SHIFT;
end = (memslot->base_gfn + memslot->npages) << PAGE_SHIFT;
- spin_lock(&kvm->mmu_lock);
+ write_lock(&kvm->mmu_lock);
stage2_wp_range(&kvm->arch.mmu, start, end);
- spin_unlock(&kvm->mmu_lock);
+ write_unlock(&kvm->mmu_lock);
kvm_flush_remote_tlbs(kvm);
}
@@ -848,7 +1028,7 @@ transparent_hugepage_adjust(struct kvm *kvm, struct kvm_memory_slot *memslot,
* THP doesn't start to split while we are adjusting the
* refcounts.
*
- * We are sure this doesn't happen, because mmu_notifier_retry
+ * We are sure this doesn't happen, because mmu_invalidate_retry
* was successful and we are holding the mmu_lock, so if this
* THP is trying to split, it will be blocked in the mmu
* notifier before touching any of the pages, specifically
@@ -956,6 +1136,7 @@ static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa,
gfn_t gfn;
kvm_pfn_t pfn;
bool logging_active = memslot_is_logging(memslot);
+ bool use_read_lock = false;
unsigned long fault_level = kvm_vcpu_trap_get_fault_level(vcpu);
unsigned long vma_pagesize, fault_granule;
enum kvm_pgtable_prot prot = KVM_PGTABLE_PROT_R;
@@ -990,6 +1171,8 @@ static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa,
if (logging_active) {
force_pte = true;
vma_shift = PAGE_SHIFT;
+ use_read_lock = (fault_status == FSC_PERM && write_fault &&
+ fault_granule == PAGE_SIZE);
} else {
vma_shift = get_vma_page_shift(vma, hva);
}
@@ -1040,9 +1223,9 @@ static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa,
return ret;
}
- mmu_seq = vcpu->kvm->mmu_notifier_seq;
+ mmu_seq = vcpu->kvm->mmu_invalidate_seq;
/*
- * Ensure the read of mmu_notifier_seq happens before we call
+ * Ensure the read of mmu_invalidate_seq happens before we call
* gfn_to_pfn_prot (which calls get_user_pages), so that we don't risk
* the page we just got a reference to gets unmapped before we have a
* chance to grab the mmu_lock, which ensure that if the page gets
@@ -1088,9 +1271,17 @@ static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa,
if (exec_fault && device)
return -ENOEXEC;
- spin_lock(&kvm->mmu_lock);
+ /*
+ * To reduce MMU contentions and enhance concurrency during dirty
+ * logging dirty logging, only acquire read lock for permission
+ * relaxation.
+ */
+ if (use_read_lock)
+ read_lock(&kvm->mmu_lock);
+ else
+ write_lock(&kvm->mmu_lock);
pgt = vcpu->arch.hw_mmu->pgt;
- if (mmu_notifier_retry(kvm, mmu_seq))
+ if (mmu_invalidate_retry(kvm, mmu_seq))
goto out_unlock;
/*
@@ -1135,6 +1326,8 @@ static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa,
if (fault_status == FSC_PERM && vma_pagesize == fault_granule) {
ret = kvm_pgtable_stage2_relax_perms(pgt, fault_ipa, prot);
} else {
+ WARN_ONCE(use_read_lock, "Attempted stage-2 map outside of write lock\n");
+
ret = kvm_pgtable_stage2_map(pgt, fault_ipa, vma_pagesize,
__pfn_to_phys(pfn), prot,
memcache);
@@ -1147,7 +1340,10 @@ static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa,
}
out_unlock:
- spin_unlock(&kvm->mmu_lock);
+ if (use_read_lock)
+ read_unlock(&kvm->mmu_lock);
+ else
+ write_unlock(&kvm->mmu_lock);
kvm_set_pfn_accessed(pfn);
kvm_release_pfn_clean(pfn);
return ret != -EAGAIN ? ret : 0;
@@ -1162,10 +1358,10 @@ static void handle_access_fault(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa)
trace_kvm_access_fault(fault_ipa);
- spin_lock(&vcpu->kvm->mmu_lock);
+ write_lock(&vcpu->kvm->mmu_lock);
mmu = vcpu->arch.hw_mmu;
kpte = kvm_pgtable_stage2_mkyoung(mmu->pgt, fault_ipa);
- spin_unlock(&vcpu->kvm->mmu_lock);
+ write_unlock(&vcpu->kvm->mmu_lock);
pte = __pte(kpte);
if (pte_valid(pte))
@@ -1198,6 +1394,25 @@ int kvm_handle_guest_abort(struct kvm_vcpu *vcpu)
fault_ipa = kvm_vcpu_get_fault_ipa(vcpu);
is_iabt = kvm_vcpu_trap_is_iabt(vcpu);
+ if (fault_status == FSC_FAULT) {
+ /* Beyond sanitised PARange (which is the IPA limit) */
+ if (fault_ipa >= BIT_ULL(get_kvm_ipa_limit())) {
+ kvm_inject_size_fault(vcpu);
+ return 1;
+ }
+
+ /* Falls between the IPA range and the PARange? */
+ if (fault_ipa >= BIT_ULL(vcpu->arch.hw_mmu->pgt->ia_bits)) {
+ fault_ipa |= kvm_vcpu_get_hfar(vcpu) & GENMASK(11, 0);
+
+ if (is_iabt)
+ kvm_inject_pabt(vcpu, fault_ipa);
+ else
+ kvm_inject_dabt(vcpu, fault_ipa);
+ return 1;
+ }
+ }
+
/* Synchronous External Abort? */
if (kvm_vcpu_abt_issea(vcpu)) {
/*
@@ -1463,7 +1678,6 @@ out:
}
void kvm_arch_commit_memory_region(struct kvm *kvm,
- const struct kvm_userspace_memory_region *mem,
struct kvm_memory_slot *old,
const struct kvm_memory_slot *new,
enum kvm_mr_change change)
@@ -1473,25 +1687,24 @@ void kvm_arch_commit_memory_region(struct kvm *kvm,
* allocated dirty_bitmap[], dirty pages will be tracked while the
* memory slot is write protected.
*/
- if (change != KVM_MR_DELETE && mem->flags & KVM_MEM_LOG_DIRTY_PAGES) {
+ if (change != KVM_MR_DELETE && new->flags & KVM_MEM_LOG_DIRTY_PAGES) {
/*
* If we're with initial-all-set, we don't need to write
* protect any pages because they're all reported as dirty.
* Huge pages and normal pages will be write protect gradually.
*/
if (!kvm_dirty_log_manual_protect_and_init_set(kvm)) {
- kvm_mmu_wp_memory_region(kvm, mem->slot);
+ kvm_mmu_wp_memory_region(kvm, new->id);
}
}
}
int kvm_arch_prepare_memory_region(struct kvm *kvm,
- struct kvm_memory_slot *memslot,
- const struct kvm_userspace_memory_region *mem,
+ const struct kvm_memory_slot *old,
+ struct kvm_memory_slot *new,
enum kvm_mr_change change)
{
- hva_t hva = mem->userspace_addr;
- hva_t reg_end = hva + mem->memory_size;
+ hva_t hva, reg_end;
int ret = 0;
if (change != KVM_MR_CREATE && change != KVM_MR_MOVE &&
@@ -1502,9 +1715,12 @@ int kvm_arch_prepare_memory_region(struct kvm *kvm,
* Prevent userspace from creating a memory region outside of the IPA
* space addressable by the KVM guest IPA space.
*/
- if ((memslot->base_gfn + memslot->npages) > (kvm_phys_size(kvm) >> PAGE_SHIFT))
+ if ((new->base_gfn + new->npages) > (kvm_phys_size(kvm) >> PAGE_SHIFT))
return -EFAULT;
+ hva = new->userspace_addr;
+ reg_end = hva + (new->npages << PAGE_SHIFT);
+
mmap_read_lock(current->mm);
/*
* A memory region could potentially cover multiple VMAs, and any holes
@@ -1536,7 +1752,7 @@ int kvm_arch_prepare_memory_region(struct kvm *kvm,
if (vma->vm_flags & VM_PFNMAP) {
/* IO region dirty page logging not allowed */
- if (memslot->flags & KVM_MEM_LOG_DIRTY_PAGES) {
+ if (new->flags & KVM_MEM_LOG_DIRTY_PAGES) {
ret = -EINVAL;
break;
}
@@ -1567,9 +1783,9 @@ void kvm_arch_flush_shadow_memslot(struct kvm *kvm,
gpa_t gpa = slot->base_gfn << PAGE_SHIFT;
phys_addr_t size = slot->npages << PAGE_SHIFT;
- spin_lock(&kvm->mmu_lock);
+ write_lock(&kvm->mmu_lock);
unmap_stage2_range(&kvm->arch.mmu, gpa, size);
- spin_unlock(&kvm->mmu_lock);
+ write_unlock(&kvm->mmu_lock);
}
/*
diff --git a/arch/arm64/kvm/perf.c b/arch/arm64/kvm/perf.c
deleted file mode 100644
index c84fe24b2ea1..000000000000
--- a/arch/arm64/kvm/perf.c
+++ /dev/null
@@ -1,59 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0-only
-/*
- * Based on the x86 implementation.
- *
- * Copyright (C) 2012 ARM Ltd.
- * Author: Marc Zyngier <marc.zyngier@arm.com>
- */
-
-#include <linux/perf_event.h>
-#include <linux/kvm_host.h>
-
-#include <asm/kvm_emulate.h>
-
-DEFINE_STATIC_KEY_FALSE(kvm_arm_pmu_available);
-
-static int kvm_is_in_guest(void)
-{
- return kvm_get_running_vcpu() != NULL;
-}
-
-static int kvm_is_user_mode(void)
-{
- struct kvm_vcpu *vcpu;
-
- vcpu = kvm_get_running_vcpu();
-
- if (vcpu)
- return !vcpu_mode_priv(vcpu);
-
- return 0;
-}
-
-static unsigned long kvm_get_guest_ip(void)
-{
- struct kvm_vcpu *vcpu;
-
- vcpu = kvm_get_running_vcpu();
-
- if (vcpu)
- return *vcpu_pc(vcpu);
-
- return 0;
-}
-
-static struct perf_guest_info_callbacks kvm_guest_cbs = {
- .is_in_guest = kvm_is_in_guest,
- .is_user_mode = kvm_is_user_mode,
- .get_guest_ip = kvm_get_guest_ip,
-};
-
-int kvm_perf_init(void)
-{
- return perf_register_guest_info_callbacks(&kvm_guest_cbs);
-}
-
-int kvm_perf_teardown(void)
-{
- return perf_unregister_guest_info_callbacks(&kvm_guest_cbs);
-}
diff --git a/arch/arm64/kvm/hyp/reserved_mem.c b/arch/arm64/kvm/pkvm.c
index 578670e3f608..ebecb7c045f4 100644
--- a/arch/arm64/kvm/hyp/reserved_mem.c
+++ b/arch/arm64/kvm/pkvm.c
@@ -8,10 +8,9 @@
#include <linux/memblock.h>
#include <linux/sort.h>
-#include <asm/kvm_host.h>
+#include <asm/kvm_pkvm.h>
-#include <nvhe/memory.h>
-#include <nvhe/mm.h>
+#include "hyp_constants.h"
static struct memblock_region *hyp_memory = kvm_nvhe_sym(hyp_memory);
static unsigned int *hyp_memblock_nr_ptr = &kvm_nvhe_sym(hyp_memblock_nr);
@@ -82,7 +81,8 @@ void __init kvm_hyp_reserve(void)
do {
prev = nr_pages;
nr_pages = hyp_mem_pages + prev;
- nr_pages = DIV_ROUND_UP(nr_pages * sizeof(struct hyp_page), PAGE_SIZE);
+ nr_pages = DIV_ROUND_UP(nr_pages * STRUCT_HYP_PAGE_SIZE,
+ PAGE_SIZE);
nr_pages += __hyp_pgtable_max_pages(nr_pages);
} while (nr_pages != prev);
hyp_mem_pages += nr_pages;
diff --git a/arch/arm64/kvm/pmu-emul.c b/arch/arm64/kvm/pmu-emul.c
index a5e4bbf5e68f..0003c7d37533 100644
--- a/arch/arm64/kvm/pmu-emul.c
+++ b/arch/arm64/kvm/pmu-emul.c
@@ -7,6 +7,7 @@
#include <linux/cpu.h>
#include <linux/kvm.h>
#include <linux/kvm_host.h>
+#include <linux/list.h>
#include <linux/perf_event.h>
#include <linux/perf/arm_pmu.h>
#include <linux/uaccess.h>
@@ -14,6 +15,11 @@
#include <kvm/arm_pmu.h>
#include <kvm/arm_vgic.h>
+DEFINE_STATIC_KEY_FALSE(kvm_arm_pmu_available);
+
+static LIST_HEAD(arm_pmus);
+static DEFINE_MUTEX(arm_pmus_lock);
+
static void kvm_pmu_create_perf_event(struct kvm_vcpu *vcpu, u64 select_idx);
static void kvm_pmu_update_pmc_chained(struct kvm_vcpu *vcpu, u64 select_idx);
static void kvm_pmu_stop_counter(struct kvm_vcpu *vcpu, struct kvm_pmc *pmc);
@@ -22,15 +28,20 @@ static void kvm_pmu_stop_counter(struct kvm_vcpu *vcpu, struct kvm_pmc *pmc);
static u32 kvm_pmu_event_mask(struct kvm *kvm)
{
- switch (kvm->arch.pmuver) {
- case ID_AA64DFR0_PMUVER_8_0:
+ unsigned int pmuver;
+
+ pmuver = kvm->arch.arm_pmu->pmuver;
+
+ switch (pmuver) {
+ case ID_AA64DFR0_EL1_PMUVer_IMP:
return GENMASK(9, 0);
- case ID_AA64DFR0_PMUVER_8_1:
- case ID_AA64DFR0_PMUVER_8_4:
- case ID_AA64DFR0_PMUVER_8_5:
+ case ID_AA64DFR0_EL1_PMUVer_V3P1:
+ case ID_AA64DFR0_EL1_PMUVer_V3P4:
+ case ID_AA64DFR0_EL1_PMUVer_V3P5:
+ case ID_AA64DFR0_EL1_PMUVer_V3P7:
return GENMASK(15, 0);
default: /* Shouldn't be here, just for sanity */
- WARN_ONCE(1, "Unknown PMU version %d\n", kvm->arch.pmuver);
+ WARN_ONCE(1, "Unknown PMU version %d\n", pmuver);
return 0;
}
}
@@ -166,6 +177,9 @@ u64 kvm_pmu_get_counter_value(struct kvm_vcpu *vcpu, u64 select_idx)
struct kvm_pmu *pmu = &vcpu->arch.pmu;
struct kvm_pmc *pmc = &pmu->pmc[select_idx];
+ if (!kvm_vcpu_has_pmu(vcpu))
+ return 0;
+
counter = kvm_pmu_get_pair_counter_value(vcpu, pmc);
if (kvm_pmu_pmc_is_chained(pmc) &&
@@ -187,6 +201,9 @@ void kvm_pmu_set_counter_value(struct kvm_vcpu *vcpu, u64 select_idx, u64 val)
{
u64 reg;
+ if (!kvm_vcpu_has_pmu(vcpu))
+ return;
+
reg = (select_idx == ARMV8_PMU_CYCLE_IDX)
? PMCCNTR_EL0 : PMEVCNTR0_EL0 + select_idx;
__vcpu_sys_reg(vcpu, reg) += (s64)val - kvm_pmu_get_counter_value(vcpu, select_idx);
@@ -311,6 +328,9 @@ void kvm_pmu_enable_counter_mask(struct kvm_vcpu *vcpu, u64 val)
struct kvm_pmu *pmu = &vcpu->arch.pmu;
struct kvm_pmc *pmc;
+ if (!kvm_vcpu_has_pmu(vcpu))
+ return;
+
if (!(__vcpu_sys_reg(vcpu, PMCR_EL0) & ARMV8_PMU_PMCR_E) || !val)
return;
@@ -346,7 +366,7 @@ void kvm_pmu_disable_counter_mask(struct kvm_vcpu *vcpu, u64 val)
struct kvm_pmu *pmu = &vcpu->arch.pmu;
struct kvm_pmc *pmc;
- if (!val)
+ if (!kvm_vcpu_has_pmu(vcpu) || !val)
return;
for (i = 0; i < ARMV8_PMU_MAX_COUNTERS; i++) {
@@ -516,6 +536,9 @@ void kvm_pmu_software_increment(struct kvm_vcpu *vcpu, u64 val)
struct kvm_pmu *pmu = &vcpu->arch.pmu;
int i;
+ if (!kvm_vcpu_has_pmu(vcpu))
+ return;
+
if (!(__vcpu_sys_reg(vcpu, PMCR_EL0) & ARMV8_PMU_PMCR_E))
return;
@@ -565,6 +588,9 @@ void kvm_pmu_handle_pmcr(struct kvm_vcpu *vcpu, u64 val)
{
int i;
+ if (!kvm_vcpu_has_pmu(vcpu))
+ return;
+
if (val & ARMV8_PMU_PMCR_E) {
kvm_pmu_enable_counter_mask(vcpu,
__vcpu_sys_reg(vcpu, PMCNTENSET_EL0));
@@ -597,6 +623,7 @@ static bool kvm_pmu_counter_is_enabled(struct kvm_vcpu *vcpu, u64 select_idx)
*/
static void kvm_pmu_create_perf_event(struct kvm_vcpu *vcpu, u64 select_idx)
{
+ struct arm_pmu *arm_pmu = vcpu->kvm->arch.arm_pmu;
struct kvm_pmu *pmu = &vcpu->arch.pmu;
struct kvm_pmc *pmc;
struct perf_event *event;
@@ -633,7 +660,7 @@ static void kvm_pmu_create_perf_event(struct kvm_vcpu *vcpu, u64 select_idx)
return;
memset(&attr, 0, sizeof(struct perf_event_attr));
- attr.type = PERF_TYPE_RAW;
+ attr.type = arm_pmu->pmu.type;
attr.size = sizeof(attr);
attr.pinned = 1;
attr.disabled = !kvm_pmu_counter_is_enabled(vcpu, pmc->idx);
@@ -727,6 +754,9 @@ void kvm_pmu_set_counter_event_type(struct kvm_vcpu *vcpu, u64 data,
{
u64 reg, mask;
+ if (!kvm_vcpu_has_pmu(vcpu))
+ return;
+
mask = ARMV8_PMU_EVTYPE_MASK;
mask &= ~ARMV8_PMU_EVTYPE_EVENT;
mask |= kvm_pmu_event_mask(vcpu->kvm);
@@ -742,17 +772,32 @@ void kvm_pmu_set_counter_event_type(struct kvm_vcpu *vcpu, u64 data,
void kvm_host_pmu_init(struct arm_pmu *pmu)
{
- if (pmu->pmuver != 0 && pmu->pmuver != ID_AA64DFR0_PMUVER_IMP_DEF &&
- !kvm_arm_support_pmu_v3() && !is_protected_kvm_enabled())
+ struct arm_pmu_entry *entry;
+
+ if (pmu->pmuver == 0 || pmu->pmuver == ID_AA64DFR0_EL1_PMUVer_IMP_DEF)
+ return;
+
+ mutex_lock(&arm_pmus_lock);
+
+ entry = kmalloc(sizeof(*entry), GFP_KERNEL);
+ if (!entry)
+ goto out_unlock;
+
+ entry->arm_pmu = pmu;
+ list_add_tail(&entry->entry, &arm_pmus);
+
+ if (list_is_singular(&arm_pmus))
static_branch_enable(&kvm_arm_pmu_available);
+
+out_unlock:
+ mutex_unlock(&arm_pmus_lock);
}
-static int kvm_pmu_probe_pmuver(void)
+static struct arm_pmu *kvm_pmu_probe_armpmu(void)
{
struct perf_event_attr attr = { };
struct perf_event *event;
- struct arm_pmu *pmu;
- int pmuver = ID_AA64DFR0_PMUVER_IMP_DEF;
+ struct arm_pmu *pmu = NULL;
/*
* Create a dummy event that only counts user cycles. As we'll never
@@ -777,19 +822,20 @@ static int kvm_pmu_probe_pmuver(void)
if (IS_ERR(event)) {
pr_err_once("kvm: pmu event creation failed %ld\n",
PTR_ERR(event));
- return ID_AA64DFR0_PMUVER_IMP_DEF;
+ return NULL;
}
if (event->pmu) {
pmu = to_arm_pmu(event->pmu);
- if (pmu->pmuver)
- pmuver = pmu->pmuver;
+ if (pmu->pmuver == 0 ||
+ pmu->pmuver == ID_AA64DFR0_EL1_PMUVer_IMP_DEF)
+ pmu = NULL;
}
perf_event_disable(event);
perf_event_release_kernel(event);
- return pmuver;
+ return pmu;
}
u64 kvm_pmu_get_pmceid(struct kvm_vcpu *vcpu, bool pmceid1)
@@ -798,6 +844,9 @@ u64 kvm_pmu_get_pmceid(struct kvm_vcpu *vcpu, bool pmceid1)
u64 val, mask = 0;
int base, i, nr_events;
+ if (!kvm_vcpu_has_pmu(vcpu))
+ return 0;
+
if (!pmceid1) {
val = read_sysreg(pmceid0_el0);
base = 0;
@@ -807,7 +856,7 @@ u64 kvm_pmu_get_pmceid(struct kvm_vcpu *vcpu, bool pmceid1)
* Don't advertise STALL_SLOT, as PMMIR_EL0 is handled
* as RAZ
*/
- if (vcpu->kvm->arch.pmuver >= ID_AA64DFR0_PMUVER_8_4)
+ if (vcpu->kvm->arch.arm_pmu->pmuver >= ID_AA64DFR0_EL1_PMUVer_V3P4)
val &= ~BIT_ULL(ARMV8_PMUV3_PERFCTR_STALL_SLOT - 32);
base = 32;
}
@@ -900,7 +949,7 @@ static int kvm_arm_pmu_v3_init(struct kvm_vcpu *vcpu)
*/
static bool pmu_irq_is_valid(struct kvm *kvm, int irq)
{
- int i;
+ unsigned long i;
struct kvm_vcpu *vcpu;
kvm_for_each_vcpu(i, vcpu, kvm) {
@@ -919,26 +968,64 @@ static bool pmu_irq_is_valid(struct kvm *kvm, int irq)
return true;
}
+static int kvm_arm_pmu_v3_set_pmu(struct kvm_vcpu *vcpu, int pmu_id)
+{
+ struct kvm *kvm = vcpu->kvm;
+ struct arm_pmu_entry *entry;
+ struct arm_pmu *arm_pmu;
+ int ret = -ENXIO;
+
+ mutex_lock(&kvm->lock);
+ mutex_lock(&arm_pmus_lock);
+
+ list_for_each_entry(entry, &arm_pmus, entry) {
+ arm_pmu = entry->arm_pmu;
+ if (arm_pmu->pmu.type == pmu_id) {
+ if (test_bit(KVM_ARCH_FLAG_HAS_RAN_ONCE, &kvm->arch.flags) ||
+ (kvm->arch.pmu_filter && kvm->arch.arm_pmu != arm_pmu)) {
+ ret = -EBUSY;
+ break;
+ }
+
+ kvm->arch.arm_pmu = arm_pmu;
+ cpumask_copy(kvm->arch.supported_cpus, &arm_pmu->supported_cpus);
+ ret = 0;
+ break;
+ }
+ }
+
+ mutex_unlock(&arm_pmus_lock);
+ mutex_unlock(&kvm->lock);
+ return ret;
+}
+
int kvm_arm_pmu_v3_set_attr(struct kvm_vcpu *vcpu, struct kvm_device_attr *attr)
{
+ struct kvm *kvm = vcpu->kvm;
+
if (!kvm_vcpu_has_pmu(vcpu))
return -ENODEV;
if (vcpu->arch.pmu.created)
return -EBUSY;
- if (!vcpu->kvm->arch.pmuver)
- vcpu->kvm->arch.pmuver = kvm_pmu_probe_pmuver();
-
- if (vcpu->kvm->arch.pmuver == ID_AA64DFR0_PMUVER_IMP_DEF)
- return -ENODEV;
+ mutex_lock(&kvm->lock);
+ if (!kvm->arch.arm_pmu) {
+ /* No PMU set, get the default one */
+ kvm->arch.arm_pmu = kvm_pmu_probe_armpmu();
+ if (!kvm->arch.arm_pmu) {
+ mutex_unlock(&kvm->lock);
+ return -ENODEV;
+ }
+ }
+ mutex_unlock(&kvm->lock);
switch (attr->attr) {
case KVM_ARM_VCPU_PMU_V3_IRQ: {
int __user *uaddr = (int __user *)(long)attr->addr;
int irq;
- if (!irqchip_in_kernel(vcpu->kvm))
+ if (!irqchip_in_kernel(kvm))
return -EINVAL;
if (get_user(irq, uaddr))
@@ -948,7 +1035,7 @@ int kvm_arm_pmu_v3_set_attr(struct kvm_vcpu *vcpu, struct kvm_device_attr *attr)
if (!(irq_is_ppi(irq) || irq_is_spi(irq)))
return -EINVAL;
- if (!pmu_irq_is_valid(vcpu->kvm, irq))
+ if (!pmu_irq_is_valid(kvm, irq))
return -EINVAL;
if (kvm_arm_pmu_irq_initialized(vcpu))
@@ -963,7 +1050,7 @@ int kvm_arm_pmu_v3_set_attr(struct kvm_vcpu *vcpu, struct kvm_device_attr *attr)
struct kvm_pmu_event_filter filter;
int nr_events;
- nr_events = kvm_pmu_event_mask(vcpu->kvm) + 1;
+ nr_events = kvm_pmu_event_mask(kvm) + 1;
uaddr = (struct kvm_pmu_event_filter __user *)(long)attr->addr;
@@ -975,12 +1062,17 @@ int kvm_arm_pmu_v3_set_attr(struct kvm_vcpu *vcpu, struct kvm_device_attr *attr)
filter.action != KVM_PMU_EVENT_DENY))
return -EINVAL;
- mutex_lock(&vcpu->kvm->lock);
+ mutex_lock(&kvm->lock);
+
+ if (test_bit(KVM_ARCH_FLAG_HAS_RAN_ONCE, &kvm->arch.flags)) {
+ mutex_unlock(&kvm->lock);
+ return -EBUSY;
+ }
- if (!vcpu->kvm->arch.pmu_filter) {
- vcpu->kvm->arch.pmu_filter = bitmap_alloc(nr_events, GFP_KERNEL_ACCOUNT);
- if (!vcpu->kvm->arch.pmu_filter) {
- mutex_unlock(&vcpu->kvm->lock);
+ if (!kvm->arch.pmu_filter) {
+ kvm->arch.pmu_filter = bitmap_alloc(nr_events, GFP_KERNEL_ACCOUNT);
+ if (!kvm->arch.pmu_filter) {
+ mutex_unlock(&kvm->lock);
return -ENOMEM;
}
@@ -991,20 +1083,29 @@ int kvm_arm_pmu_v3_set_attr(struct kvm_vcpu *vcpu, struct kvm_device_attr *attr)
* events, the default is to allow.
*/
if (filter.action == KVM_PMU_EVENT_ALLOW)
- bitmap_zero(vcpu->kvm->arch.pmu_filter, nr_events);
+ bitmap_zero(kvm->arch.pmu_filter, nr_events);
else
- bitmap_fill(vcpu->kvm->arch.pmu_filter, nr_events);
+ bitmap_fill(kvm->arch.pmu_filter, nr_events);
}
if (filter.action == KVM_PMU_EVENT_ALLOW)
- bitmap_set(vcpu->kvm->arch.pmu_filter, filter.base_event, filter.nevents);
+ bitmap_set(kvm->arch.pmu_filter, filter.base_event, filter.nevents);
else
- bitmap_clear(vcpu->kvm->arch.pmu_filter, filter.base_event, filter.nevents);
+ bitmap_clear(kvm->arch.pmu_filter, filter.base_event, filter.nevents);
- mutex_unlock(&vcpu->kvm->lock);
+ mutex_unlock(&kvm->lock);
return 0;
}
+ case KVM_ARM_VCPU_PMU_V3_SET_PMU: {
+ int __user *uaddr = (int __user *)(long)attr->addr;
+ int pmu_id;
+
+ if (get_user(pmu_id, uaddr))
+ return -EFAULT;
+
+ return kvm_arm_pmu_v3_set_pmu(vcpu, pmu_id);
+ }
case KVM_ARM_VCPU_PMU_V3_INIT:
return kvm_arm_pmu_v3_init(vcpu);
}
@@ -1042,6 +1143,7 @@ int kvm_arm_pmu_v3_has_attr(struct kvm_vcpu *vcpu, struct kvm_device_attr *attr)
case KVM_ARM_VCPU_PMU_V3_IRQ:
case KVM_ARM_VCPU_PMU_V3_INIT:
case KVM_ARM_VCPU_PMU_V3_FILTER:
+ case KVM_ARM_VCPU_PMU_V3_SET_PMU:
if (kvm_vcpu_has_pmu(vcpu))
return 0;
}
diff --git a/arch/arm64/kvm/pmu.c b/arch/arm64/kvm/pmu.c
index 03a6c1f4a09a..7887133d15f0 100644
--- a/arch/arm64/kvm/pmu.c
+++ b/arch/arm64/kvm/pmu.c
@@ -5,7 +5,8 @@
*/
#include <linux/kvm_host.h>
#include <linux/perf_event.h>
-#include <asm/kvm_hyp.h>
+
+static DEFINE_PER_CPU(struct kvm_pmu_events, kvm_pmu_events);
/*
* Given the perf event attributes and system type, determine
@@ -25,21 +26,26 @@ static bool kvm_pmu_switch_needed(struct perf_event_attr *attr)
return (attr->exclude_host != attr->exclude_guest);
}
+struct kvm_pmu_events *kvm_get_pmu_events(void)
+{
+ return this_cpu_ptr(&kvm_pmu_events);
+}
+
/*
* Add events to track that we may want to switch at guest entry/exit
* time.
*/
void kvm_set_pmu_events(u32 set, struct perf_event_attr *attr)
{
- struct kvm_host_data *ctx = this_cpu_ptr_hyp_sym(kvm_host_data);
+ struct kvm_pmu_events *pmu = kvm_get_pmu_events();
- if (!kvm_arm_support_pmu_v3() || !ctx || !kvm_pmu_switch_needed(attr))
+ if (!kvm_arm_support_pmu_v3() || !pmu || !kvm_pmu_switch_needed(attr))
return;
if (!attr->exclude_host)
- ctx->pmu_events.events_host |= set;
+ pmu->events_host |= set;
if (!attr->exclude_guest)
- ctx->pmu_events.events_guest |= set;
+ pmu->events_guest |= set;
}
/*
@@ -47,13 +53,13 @@ void kvm_set_pmu_events(u32 set, struct perf_event_attr *attr)
*/
void kvm_clr_pmu_events(u32 clr)
{
- struct kvm_host_data *ctx = this_cpu_ptr_hyp_sym(kvm_host_data);
+ struct kvm_pmu_events *pmu = kvm_get_pmu_events();
- if (!kvm_arm_support_pmu_v3() || !ctx)
+ if (!kvm_arm_support_pmu_v3() || !pmu)
return;
- ctx->pmu_events.events_host &= ~clr;
- ctx->pmu_events.events_guest &= ~clr;
+ pmu->events_host &= ~clr;
+ pmu->events_guest &= ~clr;
}
#define PMEVTYPER_READ_CASE(idx) \
@@ -169,16 +175,16 @@ static void kvm_vcpu_pmu_disable_el0(unsigned long events)
*/
void kvm_vcpu_pmu_restore_guest(struct kvm_vcpu *vcpu)
{
- struct kvm_host_data *host;
+ struct kvm_pmu_events *pmu;
u32 events_guest, events_host;
if (!kvm_arm_support_pmu_v3() || !has_vhe())
return;
preempt_disable();
- host = this_cpu_ptr_hyp_sym(kvm_host_data);
- events_guest = host->pmu_events.events_guest;
- events_host = host->pmu_events.events_host;
+ pmu = kvm_get_pmu_events();
+ events_guest = pmu->events_guest;
+ events_host = pmu->events_host;
kvm_vcpu_pmu_enable_el0(events_guest);
kvm_vcpu_pmu_disable_el0(events_host);
@@ -190,15 +196,15 @@ void kvm_vcpu_pmu_restore_guest(struct kvm_vcpu *vcpu)
*/
void kvm_vcpu_pmu_restore_host(struct kvm_vcpu *vcpu)
{
- struct kvm_host_data *host;
+ struct kvm_pmu_events *pmu;
u32 events_guest, events_host;
if (!kvm_arm_support_pmu_v3() || !has_vhe())
return;
- host = this_cpu_ptr_hyp_sym(kvm_host_data);
- events_guest = host->pmu_events.events_guest;
- events_host = host->pmu_events.events_host;
+ pmu = kvm_get_pmu_events();
+ events_guest = pmu->events_guest;
+ events_host = pmu->events_host;
kvm_vcpu_pmu_enable_el0(events_host);
kvm_vcpu_pmu_disable_el0(events_guest);
diff --git a/arch/arm64/kvm/psci.c b/arch/arm64/kvm/psci.c
index 74c47d420253..7fbc4c1b9df0 100644
--- a/arch/arm64/kvm/psci.c
+++ b/arch/arm64/kvm/psci.c
@@ -46,19 +46,11 @@ static unsigned long kvm_psci_vcpu_suspend(struct kvm_vcpu *vcpu)
* specification (ARM DEN 0022A). This means all suspend states
* for KVM will preserve the register state.
*/
- kvm_vcpu_block(vcpu);
- kvm_clear_request(KVM_REQ_UNHALT, vcpu);
+ kvm_vcpu_wfi(vcpu);
return PSCI_RET_SUCCESS;
}
-static void kvm_psci_vcpu_off(struct kvm_vcpu *vcpu)
-{
- vcpu->arch.power_off = true;
- kvm_make_request(KVM_REQ_SLEEP, vcpu);
- kvm_vcpu_kick(vcpu);
-}
-
static inline bool kvm_psci_valid_affinity(struct kvm_vcpu *vcpu,
unsigned long affinity)
{
@@ -84,8 +76,8 @@ static unsigned long kvm_psci_vcpu_on(struct kvm_vcpu *source_vcpu)
*/
if (!vcpu)
return PSCI_RET_INVALID_PARAMS;
- if (!vcpu->arch.power_off) {
- if (kvm_psci_version(source_vcpu, kvm) != KVM_ARM_PSCI_0_1)
+ if (!kvm_arm_vcpu_stopped(vcpu)) {
+ if (kvm_psci_version(source_vcpu) != KVM_ARM_PSCI_0_1)
return PSCI_RET_ALREADY_ON;
else
return PSCI_RET_INVALID_PARAMS;
@@ -108,12 +100,12 @@ static unsigned long kvm_psci_vcpu_on(struct kvm_vcpu *source_vcpu)
kvm_make_request(KVM_REQ_VCPU_RESET, vcpu);
/*
- * Make sure the reset request is observed if the change to
- * power_state is observed.
+ * Make sure the reset request is observed if the RUNNABLE mp_state is
+ * observed.
*/
smp_wmb();
- vcpu->arch.power_off = false;
+ vcpu->arch.mp_state.mp_state = KVM_MP_STATE_RUNNABLE;
kvm_vcpu_wake_up(vcpu);
return PSCI_RET_SUCCESS;
@@ -121,8 +113,8 @@ static unsigned long kvm_psci_vcpu_on(struct kvm_vcpu *source_vcpu)
static unsigned long kvm_psci_vcpu_affinity_info(struct kvm_vcpu *vcpu)
{
- int i, matching_cpus = 0;
- unsigned long mpidr;
+ int matching_cpus = 0;
+ unsigned long i, mpidr;
unsigned long target_affinity;
unsigned long target_affinity_mask;
unsigned long lowest_affinity_level;
@@ -151,7 +143,7 @@ static unsigned long kvm_psci_vcpu_affinity_info(struct kvm_vcpu *vcpu)
mpidr = kvm_vcpu_get_mpidr_aff(tmp);
if ((mpidr & target_affinity_mask) == target_affinity) {
matching_cpus++;
- if (!tmp->arch.power_off)
+ if (!kvm_arm_vcpu_stopped(tmp))
return PSCI_0_2_AFFINITY_LEVEL_ON;
}
}
@@ -162,9 +154,9 @@ static unsigned long kvm_psci_vcpu_affinity_info(struct kvm_vcpu *vcpu)
return PSCI_0_2_AFFINITY_LEVEL_OFF;
}
-static void kvm_prepare_system_event(struct kvm_vcpu *vcpu, u32 type)
+static void kvm_prepare_system_event(struct kvm_vcpu *vcpu, u32 type, u64 flags)
{
- int i;
+ unsigned long i;
struct kvm_vcpu *tmp;
/*
@@ -177,22 +169,39 @@ static void kvm_prepare_system_event(struct kvm_vcpu *vcpu, u32 type)
* re-initialized.
*/
kvm_for_each_vcpu(i, tmp, vcpu->kvm)
- tmp->arch.power_off = true;
+ tmp->arch.mp_state.mp_state = KVM_MP_STATE_STOPPED;
kvm_make_all_cpus_request(vcpu->kvm, KVM_REQ_SLEEP);
memset(&vcpu->run->system_event, 0, sizeof(vcpu->run->system_event));
vcpu->run->system_event.type = type;
+ vcpu->run->system_event.ndata = 1;
+ vcpu->run->system_event.data[0] = flags;
vcpu->run->exit_reason = KVM_EXIT_SYSTEM_EVENT;
}
static void kvm_psci_system_off(struct kvm_vcpu *vcpu)
{
- kvm_prepare_system_event(vcpu, KVM_SYSTEM_EVENT_SHUTDOWN);
+ kvm_prepare_system_event(vcpu, KVM_SYSTEM_EVENT_SHUTDOWN, 0);
}
static void kvm_psci_system_reset(struct kvm_vcpu *vcpu)
{
- kvm_prepare_system_event(vcpu, KVM_SYSTEM_EVENT_RESET);
+ kvm_prepare_system_event(vcpu, KVM_SYSTEM_EVENT_RESET, 0);
+}
+
+static void kvm_psci_system_reset2(struct kvm_vcpu *vcpu)
+{
+ kvm_prepare_system_event(vcpu, KVM_SYSTEM_EVENT_RESET,
+ KVM_SYSTEM_EVENT_RESET_FLAG_PSCI_RESET2);
+}
+
+static void kvm_psci_system_suspend(struct kvm_vcpu *vcpu)
+{
+ struct kvm_run *run = vcpu->run;
+
+ memset(&run->system_event, 0, sizeof(vcpu->run->system_event));
+ run->system_event.type = KVM_SYSTEM_EVENT_SUSPEND;
+ run->exit_reason = KVM_EXIT_SYSTEM_EVENT;
}
static void kvm_psci_narrow_to_32bit(struct kvm_vcpu *vcpu)
@@ -209,15 +218,11 @@ static void kvm_psci_narrow_to_32bit(struct kvm_vcpu *vcpu)
static unsigned long kvm_psci_check_allowed_function(struct kvm_vcpu *vcpu, u32 fn)
{
- switch(fn) {
- case PSCI_0_2_FN64_CPU_SUSPEND:
- case PSCI_0_2_FN64_CPU_ON:
- case PSCI_0_2_FN64_AFFINITY_INFO:
- /* Disallow these functions for 32bit guests */
- if (vcpu_mode_is_32bit(vcpu))
- return PSCI_RET_NOT_SUPPORTED;
- break;
- }
+ /*
+ * Prevent 32 bit guests from calling 64 bit PSCI functions.
+ */
+ if ((fn & PSCI_0_2_64BIT) && vcpu_mode_is_32bit(vcpu))
+ return PSCI_RET_NOT_SUPPORTED;
return 0;
}
@@ -229,10 +234,6 @@ static int kvm_psci_0_2_call(struct kvm_vcpu *vcpu)
unsigned long val;
int ret = 1;
- val = kvm_psci_check_allowed_function(vcpu, psci_fn);
- if (val)
- goto out;
-
switch (psci_fn) {
case PSCI_0_2_FN_PSCI_VERSION:
/*
@@ -246,7 +247,7 @@ static int kvm_psci_0_2_call(struct kvm_vcpu *vcpu)
val = kvm_psci_vcpu_suspend(vcpu);
break;
case PSCI_0_2_FN_CPU_OFF:
- kvm_psci_vcpu_off(vcpu);
+ kvm_arm_vcpu_power_off(vcpu);
val = PSCI_RET_SUCCESS;
break;
case PSCI_0_2_FN_CPU_ON:
@@ -300,29 +301,31 @@ static int kvm_psci_0_2_call(struct kvm_vcpu *vcpu)
break;
}
-out:
smccc_set_retval(vcpu, val, 0, 0, 0);
return ret;
}
-static int kvm_psci_1_0_call(struct kvm_vcpu *vcpu)
+static int kvm_psci_1_x_call(struct kvm_vcpu *vcpu, u32 minor)
{
+ unsigned long val = PSCI_RET_NOT_SUPPORTED;
u32 psci_fn = smccc_get_function(vcpu);
- u32 feature;
- unsigned long val;
+ struct kvm *kvm = vcpu->kvm;
+ u32 arg;
int ret = 1;
switch(psci_fn) {
case PSCI_0_2_FN_PSCI_VERSION:
- val = KVM_ARM_PSCI_1_0;
+ val = minor == 0 ? KVM_ARM_PSCI_1_0 : KVM_ARM_PSCI_1_1;
break;
case PSCI_1_0_FN_PSCI_FEATURES:
- feature = smccc_get_arg1(vcpu);
- val = kvm_psci_check_allowed_function(vcpu, feature);
+ arg = smccc_get_arg1(vcpu);
+ val = kvm_psci_check_allowed_function(vcpu, arg);
if (val)
break;
- switch(feature) {
+ val = PSCI_RET_NOT_SUPPORTED;
+
+ switch(arg) {
case PSCI_0_2_FN_PSCI_VERSION:
case PSCI_0_2_FN_CPU_SUSPEND:
case PSCI_0_2_FN64_CPU_SUSPEND:
@@ -338,8 +341,47 @@ static int kvm_psci_1_0_call(struct kvm_vcpu *vcpu)
case ARM_SMCCC_VERSION_FUNC_ID:
val = 0;
break;
- default:
- val = PSCI_RET_NOT_SUPPORTED;
+ case PSCI_1_0_FN_SYSTEM_SUSPEND:
+ case PSCI_1_0_FN64_SYSTEM_SUSPEND:
+ if (test_bit(KVM_ARCH_FLAG_SYSTEM_SUSPEND_ENABLED, &kvm->arch.flags))
+ val = 0;
+ break;
+ case PSCI_1_1_FN_SYSTEM_RESET2:
+ case PSCI_1_1_FN64_SYSTEM_RESET2:
+ if (minor >= 1)
+ val = 0;
+ break;
+ }
+ break;
+ case PSCI_1_0_FN_SYSTEM_SUSPEND:
+ kvm_psci_narrow_to_32bit(vcpu);
+ fallthrough;
+ case PSCI_1_0_FN64_SYSTEM_SUSPEND:
+ /*
+ * Return directly to userspace without changing the vCPU's
+ * registers. Userspace depends on reading the SMCCC parameters
+ * to implement SYSTEM_SUSPEND.
+ */
+ if (test_bit(KVM_ARCH_FLAG_SYSTEM_SUSPEND_ENABLED, &kvm->arch.flags)) {
+ kvm_psci_system_suspend(vcpu);
+ return 0;
+ }
+ break;
+ case PSCI_1_1_FN_SYSTEM_RESET2:
+ kvm_psci_narrow_to_32bit(vcpu);
+ fallthrough;
+ case PSCI_1_1_FN64_SYSTEM_RESET2:
+ if (minor >= 1) {
+ arg = smccc_get_arg1(vcpu);
+
+ if (arg <= PSCI_1_1_RESET_TYPE_SYSTEM_WARM_RESET ||
+ arg >= PSCI_1_1_RESET_TYPE_VENDOR_START) {
+ kvm_psci_system_reset2(vcpu);
+ vcpu_set_reg(vcpu, 0, PSCI_RET_INTERNAL_FAILURE);
+ return 0;
+ }
+
+ val = PSCI_RET_INVALID_PARAMS;
break;
}
break;
@@ -359,7 +401,7 @@ static int kvm_psci_0_1_call(struct kvm_vcpu *vcpu)
switch (psci_fn) {
case KVM_PSCI_FN_CPU_OFF:
- kvm_psci_vcpu_off(vcpu);
+ kvm_arm_vcpu_power_off(vcpu);
val = PSCI_RET_SUCCESS;
break;
case KVM_PSCI_FN_CPU_ON:
@@ -392,180 +434,25 @@ static int kvm_psci_0_1_call(struct kvm_vcpu *vcpu)
*/
int kvm_psci_call(struct kvm_vcpu *vcpu)
{
- switch (kvm_psci_version(vcpu, vcpu->kvm)) {
+ u32 psci_fn = smccc_get_function(vcpu);
+ unsigned long val;
+
+ val = kvm_psci_check_allowed_function(vcpu, psci_fn);
+ if (val) {
+ smccc_set_retval(vcpu, val, 0, 0, 0);
+ return 1;
+ }
+
+ switch (kvm_psci_version(vcpu)) {
+ case KVM_ARM_PSCI_1_1:
+ return kvm_psci_1_x_call(vcpu, 1);
case KVM_ARM_PSCI_1_0:
- return kvm_psci_1_0_call(vcpu);
+ return kvm_psci_1_x_call(vcpu, 0);
case KVM_ARM_PSCI_0_2:
return kvm_psci_0_2_call(vcpu);
case KVM_ARM_PSCI_0_1:
return kvm_psci_0_1_call(vcpu);
default:
return -EINVAL;
- };
-}
-
-int kvm_arm_get_fw_num_regs(struct kvm_vcpu *vcpu)
-{
- return 3; /* PSCI version and two workaround registers */
-}
-
-int kvm_arm_copy_fw_reg_indices(struct kvm_vcpu *vcpu, u64 __user *uindices)
-{
- if (put_user(KVM_REG_ARM_PSCI_VERSION, uindices++))
- return -EFAULT;
-
- if (put_user(KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_1, uindices++))
- return -EFAULT;
-
- if (put_user(KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_2, uindices++))
- return -EFAULT;
-
- return 0;
-}
-
-#define KVM_REG_FEATURE_LEVEL_WIDTH 4
-#define KVM_REG_FEATURE_LEVEL_MASK (BIT(KVM_REG_FEATURE_LEVEL_WIDTH) - 1)
-
-/*
- * Convert the workaround level into an easy-to-compare number, where higher
- * values mean better protection.
- */
-static int get_kernel_wa_level(u64 regid)
-{
- switch (regid) {
- case KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_1:
- switch (arm64_get_spectre_v2_state()) {
- case SPECTRE_VULNERABLE:
- return KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_1_NOT_AVAIL;
- case SPECTRE_MITIGATED:
- return KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_1_AVAIL;
- case SPECTRE_UNAFFECTED:
- return KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_1_NOT_REQUIRED;
- }
- return KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_1_NOT_AVAIL;
- case KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_2:
- switch (arm64_get_spectre_v4_state()) {
- case SPECTRE_MITIGATED:
- /*
- * As for the hypercall discovery, we pretend we
- * don't have any FW mitigation if SSBS is there at
- * all times.
- */
- if (cpus_have_final_cap(ARM64_SSBS))
- return KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_2_NOT_AVAIL;
- fallthrough;
- case SPECTRE_UNAFFECTED:
- return KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_2_NOT_REQUIRED;
- case SPECTRE_VULNERABLE:
- return KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_2_NOT_AVAIL;
- }
- }
-
- return -EINVAL;
-}
-
-int kvm_arm_get_fw_reg(struct kvm_vcpu *vcpu, const struct kvm_one_reg *reg)
-{
- void __user *uaddr = (void __user *)(long)reg->addr;
- u64 val;
-
- switch (reg->id) {
- case KVM_REG_ARM_PSCI_VERSION:
- val = kvm_psci_version(vcpu, vcpu->kvm);
- break;
- case KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_1:
- case KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_2:
- val = get_kernel_wa_level(reg->id) & KVM_REG_FEATURE_LEVEL_MASK;
- break;
- default:
- return -ENOENT;
}
-
- if (copy_to_user(uaddr, &val, KVM_REG_SIZE(reg->id)))
- return -EFAULT;
-
- return 0;
-}
-
-int kvm_arm_set_fw_reg(struct kvm_vcpu *vcpu, const struct kvm_one_reg *reg)
-{
- void __user *uaddr = (void __user *)(long)reg->addr;
- u64 val;
- int wa_level;
-
- if (copy_from_user(&val, uaddr, KVM_REG_SIZE(reg->id)))
- return -EFAULT;
-
- switch (reg->id) {
- case KVM_REG_ARM_PSCI_VERSION:
- {
- bool wants_02;
-
- wants_02 = test_bit(KVM_ARM_VCPU_PSCI_0_2, vcpu->arch.features);
-
- switch (val) {
- case KVM_ARM_PSCI_0_1:
- if (wants_02)
- return -EINVAL;
- vcpu->kvm->arch.psci_version = val;
- return 0;
- case KVM_ARM_PSCI_0_2:
- case KVM_ARM_PSCI_1_0:
- if (!wants_02)
- return -EINVAL;
- vcpu->kvm->arch.psci_version = val;
- return 0;
- }
- break;
- }
-
- case KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_1:
- if (val & ~KVM_REG_FEATURE_LEVEL_MASK)
- return -EINVAL;
-
- if (get_kernel_wa_level(reg->id) < val)
- return -EINVAL;
-
- return 0;
-
- case KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_2:
- if (val & ~(KVM_REG_FEATURE_LEVEL_MASK |
- KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_2_ENABLED))
- return -EINVAL;
-
- /* The enabled bit must not be set unless the level is AVAIL. */
- if ((val & KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_2_ENABLED) &&
- (val & KVM_REG_FEATURE_LEVEL_MASK) != KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_2_AVAIL)
- return -EINVAL;
-
- /*
- * Map all the possible incoming states to the only two we
- * really want to deal with.
- */
- switch (val & KVM_REG_FEATURE_LEVEL_MASK) {
- case KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_2_NOT_AVAIL:
- case KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_2_UNKNOWN:
- wa_level = KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_2_NOT_AVAIL;
- break;
- case KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_2_AVAIL:
- case KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_2_NOT_REQUIRED:
- wa_level = KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_2_NOT_REQUIRED;
- break;
- default:
- return -EINVAL;
- }
-
- /*
- * We can deal with NOT_AVAIL on NOT_REQUIRED, but not the
- * other way around.
- */
- if (get_kernel_wa_level(reg->id) < wa_level)
- return -EINVAL;
-
- return 0;
- default:
- return -ENOENT;
- }
-
- return -EINVAL;
}
diff --git a/arch/arm64/kvm/reset.c b/arch/arm64/kvm/reset.c
index 27386f0d81e4..5ae18472205a 100644
--- a/arch/arm64/kvm/reset.c
+++ b/arch/arm64/kvm/reset.c
@@ -81,7 +81,7 @@ static int kvm_vcpu_enable_sve(struct kvm_vcpu *vcpu)
* KVM_REG_ARM64_SVE_VLS. Allocation is deferred until
* kvm_arm_vcpu_finalize(), which freezes the configuration.
*/
- vcpu->arch.flags |= KVM_ARM64_GUEST_HAS_SVE;
+ vcpu_set_flag(vcpu, GUEST_HAS_SVE);
return 0;
}
@@ -94,24 +94,33 @@ static int kvm_vcpu_finalize_sve(struct kvm_vcpu *vcpu)
{
void *buf;
unsigned int vl;
+ size_t reg_sz;
+ int ret;
vl = vcpu->arch.sve_max_vl;
/*
* Responsibility for these properties is shared between
- * kvm_arm_init_arch_resources(), kvm_vcpu_enable_sve() and
+ * kvm_arm_init_sve(), kvm_vcpu_enable_sve() and
* set_sve_vls(). Double-check here just to be sure:
*/
if (WARN_ON(!sve_vl_valid(vl) || vl > sve_max_virtualisable_vl() ||
vl > VL_ARCH_MAX))
return -EIO;
- buf = kzalloc(SVE_SIG_REGS_SIZE(sve_vq_from_vl(vl)), GFP_KERNEL_ACCOUNT);
+ reg_sz = vcpu_sve_state_size(vcpu);
+ buf = kzalloc(reg_sz, GFP_KERNEL_ACCOUNT);
if (!buf)
return -ENOMEM;
+ ret = kvm_share_hyp(buf, buf + reg_sz);
+ if (ret) {
+ kfree(buf);
+ return ret;
+ }
+
vcpu->arch.sve_state = buf;
- vcpu->arch.flags |= KVM_ARM64_VCPU_SVE_FINALIZED;
+ vcpu_set_flag(vcpu, VCPU_SVE_FINALIZED);
return 0;
}
@@ -141,7 +150,13 @@ bool kvm_arm_vcpu_is_finalized(struct kvm_vcpu *vcpu)
void kvm_arm_vcpu_destroy(struct kvm_vcpu *vcpu)
{
- kfree(vcpu->arch.sve_state);
+ void *sve_state = vcpu->arch.sve_state;
+
+ kvm_vcpu_unshare_task_fp(vcpu);
+ kvm_unshare_hyp(vcpu, vcpu + 1);
+ if (sve_state)
+ kvm_unshare_hyp(sve_state, sve_state + vcpu_sve_state_size(vcpu));
+ kfree(sve_state);
}
static void kvm_vcpu_reset_sve(struct kvm_vcpu *vcpu)
@@ -162,41 +177,64 @@ static int kvm_vcpu_enable_ptrauth(struct kvm_vcpu *vcpu)
!system_has_full_ptr_auth())
return -EINVAL;
- vcpu->arch.flags |= KVM_ARM64_GUEST_HAS_PTRAUTH;
+ vcpu_set_flag(vcpu, GUEST_HAS_PTRAUTH);
return 0;
}
-static bool vcpu_allowed_register_width(struct kvm_vcpu *vcpu)
+/**
+ * kvm_set_vm_width() - set the register width for the guest
+ * @vcpu: Pointer to the vcpu being configured
+ *
+ * Set both KVM_ARCH_FLAG_EL1_32BIT and KVM_ARCH_FLAG_REG_WIDTH_CONFIGURED
+ * in the VM flags based on the vcpu's requested register width, the HW
+ * capabilities and other options (such as MTE).
+ * When REG_WIDTH_CONFIGURED is already set, the vcpu settings must be
+ * consistent with the value of the FLAG_EL1_32BIT bit in the flags.
+ *
+ * Return: 0 on success, negative error code on failure.
+ */
+static int kvm_set_vm_width(struct kvm_vcpu *vcpu)
{
- struct kvm_vcpu *tmp;
+ struct kvm *kvm = vcpu->kvm;
bool is32bit;
- int i;
is32bit = vcpu_has_feature(vcpu, KVM_ARM_VCPU_EL1_32BIT);
+
+ lockdep_assert_held(&kvm->lock);
+
+ if (test_bit(KVM_ARCH_FLAG_REG_WIDTH_CONFIGURED, &kvm->arch.flags)) {
+ /*
+ * The guest's register width is already configured.
+ * Make sure that the vcpu is consistent with it.
+ */
+ if (is32bit == test_bit(KVM_ARCH_FLAG_EL1_32BIT, &kvm->arch.flags))
+ return 0;
+
+ return -EINVAL;
+ }
+
if (!cpus_have_const_cap(ARM64_HAS_32BIT_EL1) && is32bit)
- return false;
+ return -EINVAL;
/* MTE is incompatible with AArch32 */
- if (kvm_has_mte(vcpu->kvm) && is32bit)
- return false;
+ if (kvm_has_mte(kvm) && is32bit)
+ return -EINVAL;
- /* Check that the vcpus are either all 32bit or all 64bit */
- kvm_for_each_vcpu(i, tmp, vcpu->kvm) {
- if (vcpu_has_feature(tmp, KVM_ARM_VCPU_EL1_32BIT) != is32bit)
- return false;
- }
+ if (is32bit)
+ set_bit(KVM_ARCH_FLAG_EL1_32BIT, &kvm->arch.flags);
- return true;
+ set_bit(KVM_ARCH_FLAG_REG_WIDTH_CONFIGURED, &kvm->arch.flags);
+
+ return 0;
}
/**
* kvm_reset_vcpu - sets core registers and sys_regs to reset value
* @vcpu: The VCPU pointer
*
- * This function finds the right table above and sets the registers on
- * the virtual CPU struct to their architecturally defined reset
- * values, except for registers whose reset is deferred until
- * kvm_arm_vcpu_finalize().
+ * This function sets the registers on the virtual CPU struct to their
+ * architecturally defined reset values, except for registers whose reset is
+ * deferred until kvm_arm_vcpu_finalize().
*
* Note: This function can be called from two paths: The KVM_ARM_VCPU_INIT
* ioctl or as part of handling a request issued by another VCPU in the PSCI
@@ -216,10 +254,16 @@ int kvm_reset_vcpu(struct kvm_vcpu *vcpu)
u32 pstate;
mutex_lock(&vcpu->kvm->lock);
- reset_state = vcpu->arch.reset_state;
- WRITE_ONCE(vcpu->arch.reset_state.reset, false);
+ ret = kvm_set_vm_width(vcpu);
+ if (!ret) {
+ reset_state = vcpu->arch.reset_state;
+ WRITE_ONCE(vcpu->arch.reset_state.reset, false);
+ }
mutex_unlock(&vcpu->kvm->lock);
+ if (ret)
+ return ret;
+
/* Reset PMU outside of the non-preemptible section */
kvm_pmu_vcpu_reset(vcpu);
@@ -246,14 +290,9 @@ int kvm_reset_vcpu(struct kvm_vcpu *vcpu)
}
}
- if (!vcpu_allowed_register_width(vcpu)) {
- ret = -EINVAL;
- goto out;
- }
-
switch (vcpu->arch.target) {
default:
- if (test_bit(KVM_ARM_VCPU_EL1_32BIT, vcpu->arch.features)) {
+ if (vcpu_el1_is_32bit(vcpu)) {
pstate = VCPU_RESET_PSTATE_SVC;
} else {
pstate = VCPU_RESET_PSTATE_EL1;
@@ -320,7 +359,7 @@ int kvm_set_ipa_limit(void)
mmfr0 = read_sanitised_ftr_reg(SYS_ID_AA64MMFR0_EL1);
parange = cpuid_feature_extract_unsigned_field(mmfr0,
- ID_AA64MMFR0_PARANGE_SHIFT);
+ ID_AA64MMFR0_EL1_PARANGE_SHIFT);
/*
* IPA size beyond 48 bits could not be supported
* on either 4K or 16K page size. Hence let's cap
@@ -328,20 +367,20 @@ int kvm_set_ipa_limit(void)
* on the system.
*/
if (PAGE_SIZE != SZ_64K)
- parange = min(parange, (unsigned int)ID_AA64MMFR0_PARANGE_48);
+ parange = min(parange, (unsigned int)ID_AA64MMFR0_EL1_PARANGE_48);
/*
* Check with ARMv8.5-GTG that our PAGE_SIZE is supported at
* Stage-2. If not, things will stop very quickly.
*/
- switch (cpuid_feature_extract_unsigned_field(mmfr0, ID_AA64MMFR0_TGRAN_2_SHIFT)) {
- case ID_AA64MMFR0_TGRAN_2_SUPPORTED_NONE:
+ switch (cpuid_feature_extract_unsigned_field(mmfr0, ID_AA64MMFR0_EL1_TGRAN_2_SHIFT)) {
+ case ID_AA64MMFR0_EL1_TGRAN_2_SUPPORTED_NONE:
kvm_err("PAGE_SIZE not supported at Stage-2, giving up\n");
return -EINVAL;
- case ID_AA64MMFR0_TGRAN_2_SUPPORTED_DEFAULT:
+ case ID_AA64MMFR0_EL1_TGRAN_2_SUPPORTED_DEFAULT:
kvm_debug("PAGE_SIZE supported at Stage-2 (default)\n");
break;
- case ID_AA64MMFR0_TGRAN_2_SUPPORTED_MIN ... ID_AA64MMFR0_TGRAN_2_SUPPORTED_MAX:
+ case ID_AA64MMFR0_EL1_TGRAN_2_SUPPORTED_MIN ... ID_AA64MMFR0_EL1_TGRAN_2_SUPPORTED_MAX:
kvm_debug("PAGE_SIZE supported at Stage-2 (advertised)\n");
break;
default:
diff --git a/arch/arm64/kvm/stacktrace.c b/arch/arm64/kvm/stacktrace.c
new file mode 100644
index 000000000000..3ace5b75813b
--- /dev/null
+++ b/arch/arm64/kvm/stacktrace.c
@@ -0,0 +1,245 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ * KVM nVHE hypervisor stack tracing support.
+ *
+ * The unwinder implementation depends on the nVHE mode:
+ *
+ * 1) Non-protected nVHE mode - the host can directly access the
+ * HYP stack pages and unwind the HYP stack in EL1. This saves having
+ * to allocate shared buffers for the host to read the unwinded
+ * stacktrace.
+ *
+ * 2) pKVM (protected nVHE) mode - the host cannot directly access
+ * the HYP memory. The stack is unwinded in EL2 and dumped to a shared
+ * buffer where the host can read and print the stacktrace.
+ *
+ * Copyright (C) 2022 Google LLC
+ */
+
+#include <linux/kvm.h>
+#include <linux/kvm_host.h>
+
+#include <asm/stacktrace/nvhe.h>
+
+static struct stack_info stackinfo_get_overflow(void)
+{
+ struct kvm_nvhe_stacktrace_info *stacktrace_info
+ = this_cpu_ptr_nvhe_sym(kvm_stacktrace_info);
+ unsigned long low = (unsigned long)stacktrace_info->overflow_stack_base;
+ unsigned long high = low + OVERFLOW_STACK_SIZE;
+
+ return (struct stack_info) {
+ .low = low,
+ .high = high,
+ };
+}
+
+static struct stack_info stackinfo_get_overflow_kern_va(void)
+{
+ unsigned long low = (unsigned long)this_cpu_ptr_nvhe_sym(overflow_stack);
+ unsigned long high = low + OVERFLOW_STACK_SIZE;
+
+ return (struct stack_info) {
+ .low = low,
+ .high = high,
+ };
+}
+
+static struct stack_info stackinfo_get_hyp(void)
+{
+ struct kvm_nvhe_stacktrace_info *stacktrace_info
+ = this_cpu_ptr_nvhe_sym(kvm_stacktrace_info);
+ unsigned long low = (unsigned long)stacktrace_info->stack_base;
+ unsigned long high = low + PAGE_SIZE;
+
+ return (struct stack_info) {
+ .low = low,
+ .high = high,
+ };
+}
+
+static struct stack_info stackinfo_get_hyp_kern_va(void)
+{
+ unsigned long low = (unsigned long)*this_cpu_ptr(&kvm_arm_hyp_stack_page);
+ unsigned long high = low + PAGE_SIZE;
+
+ return (struct stack_info) {
+ .low = low,
+ .high = high,
+ };
+}
+
+/*
+ * kvm_nvhe_stack_kern_va - Convert KVM nVHE HYP stack addresses to a kernel VAs
+ *
+ * The nVHE hypervisor stack is mapped in the flexible 'private' VA range, to
+ * allow for guard pages below the stack. Consequently, the fixed offset address
+ * translation macros won't work here.
+ *
+ * The kernel VA is calculated as an offset from the kernel VA of the hypervisor
+ * stack base.
+ *
+ * Returns true on success and updates @addr to its corresponding kernel VA;
+ * otherwise returns false.
+ */
+static bool kvm_nvhe_stack_kern_va(unsigned long *addr, unsigned long size)
+{
+ struct stack_info stack_hyp, stack_kern;
+
+ stack_hyp = stackinfo_get_hyp();
+ stack_kern = stackinfo_get_hyp_kern_va();
+ if (stackinfo_on_stack(&stack_hyp, *addr, size))
+ goto found;
+
+ stack_hyp = stackinfo_get_overflow();
+ stack_kern = stackinfo_get_overflow_kern_va();
+ if (stackinfo_on_stack(&stack_hyp, *addr, size))
+ goto found;
+
+ return false;
+
+found:
+ *addr = *addr - stack_hyp.low + stack_kern.low;
+ return true;
+}
+
+/*
+ * Convert a KVN nVHE HYP frame record address to a kernel VA
+ */
+static bool kvm_nvhe_stack_kern_record_va(unsigned long *addr)
+{
+ return kvm_nvhe_stack_kern_va(addr, 16);
+}
+
+static int unwind_next(struct unwind_state *state)
+{
+ /*
+ * The FP is in the hypervisor VA space. Convert it to the kernel VA
+ * space so it can be unwound by the regular unwind functions.
+ */
+ if (!kvm_nvhe_stack_kern_record_va(&state->fp))
+ return -EINVAL;
+
+ return unwind_next_frame_record(state);
+}
+
+static void unwind(struct unwind_state *state,
+ stack_trace_consume_fn consume_entry, void *cookie)
+{
+ while (1) {
+ int ret;
+
+ if (!consume_entry(cookie, state->pc))
+ break;
+ ret = unwind_next(state);
+ if (ret < 0)
+ break;
+ }
+}
+
+/*
+ * kvm_nvhe_dump_backtrace_entry - Symbolize and print an nVHE backtrace entry
+ *
+ * @arg : the hypervisor offset, used for address translation
+ * @where : the program counter corresponding to the stack frame
+ */
+static bool kvm_nvhe_dump_backtrace_entry(void *arg, unsigned long where)
+{
+ unsigned long va_mask = GENMASK_ULL(vabits_actual - 1, 0);
+ unsigned long hyp_offset = (unsigned long)arg;
+
+ /* Mask tags and convert to kern addr */
+ where = (where & va_mask) + hyp_offset;
+ kvm_err(" [<%016lx>] %pB\n", where, (void *)(where + kaslr_offset()));
+
+ return true;
+}
+
+static void kvm_nvhe_dump_backtrace_start(void)
+{
+ kvm_err("nVHE call trace:\n");
+}
+
+static void kvm_nvhe_dump_backtrace_end(void)
+{
+ kvm_err("---[ end nVHE call trace ]---\n");
+}
+
+/*
+ * hyp_dump_backtrace - Dump the non-protected nVHE backtrace.
+ *
+ * @hyp_offset: hypervisor offset, used for address translation.
+ *
+ * The host can directly access HYP stack pages in non-protected
+ * mode, so the unwinding is done directly from EL1. This removes
+ * the need for shared buffers between host and hypervisor for
+ * the stacktrace.
+ */
+static void hyp_dump_backtrace(unsigned long hyp_offset)
+{
+ struct kvm_nvhe_stacktrace_info *stacktrace_info;
+ struct stack_info stacks[] = {
+ stackinfo_get_overflow_kern_va(),
+ stackinfo_get_hyp_kern_va(),
+ };
+ struct unwind_state state = {
+ .stacks = stacks,
+ .nr_stacks = ARRAY_SIZE(stacks),
+ };
+
+ stacktrace_info = this_cpu_ptr_nvhe_sym(kvm_stacktrace_info);
+
+ kvm_nvhe_unwind_init(&state, stacktrace_info->fp, stacktrace_info->pc);
+
+ kvm_nvhe_dump_backtrace_start();
+ unwind(&state, kvm_nvhe_dump_backtrace_entry, (void *)hyp_offset);
+ kvm_nvhe_dump_backtrace_end();
+}
+
+#ifdef CONFIG_PROTECTED_NVHE_STACKTRACE
+DECLARE_KVM_NVHE_PER_CPU(unsigned long [NVHE_STACKTRACE_SIZE/sizeof(long)],
+ pkvm_stacktrace);
+
+/*
+ * pkvm_dump_backtrace - Dump the protected nVHE HYP backtrace.
+ *
+ * @hyp_offset: hypervisor offset, used for address translation.
+ *
+ * Dumping of the pKVM HYP backtrace is done by reading the
+ * stack addresses from the shared stacktrace buffer, since the
+ * host cannot directly access hypervisor memory in protected
+ * mode.
+ */
+static void pkvm_dump_backtrace(unsigned long hyp_offset)
+{
+ unsigned long *stacktrace
+ = (unsigned long *) this_cpu_ptr_nvhe_sym(pkvm_stacktrace);
+ int i;
+
+ kvm_nvhe_dump_backtrace_start();
+ /* The saved stacktrace is terminated by a null entry */
+ for (i = 0;
+ i < ARRAY_SIZE(kvm_nvhe_sym(pkvm_stacktrace)) && stacktrace[i];
+ i++)
+ kvm_nvhe_dump_backtrace_entry((void *)hyp_offset, stacktrace[i]);
+ kvm_nvhe_dump_backtrace_end();
+}
+#else /* !CONFIG_PROTECTED_NVHE_STACKTRACE */
+static void pkvm_dump_backtrace(unsigned long hyp_offset)
+{
+ kvm_err("Cannot dump pKVM nVHE stacktrace: !CONFIG_PROTECTED_NVHE_STACKTRACE\n");
+}
+#endif /* CONFIG_PROTECTED_NVHE_STACKTRACE */
+
+/*
+ * kvm_nvhe_dump_backtrace - Dump KVM nVHE hypervisor backtrace.
+ *
+ * @hyp_offset: hypervisor offset, used for address translation.
+ */
+void kvm_nvhe_dump_backtrace(unsigned long hyp_offset)
+{
+ if (is_protected_kvm_enabled())
+ pkvm_dump_backtrace(hyp_offset);
+ else
+ hyp_dump_backtrace(hyp_offset);
+}
diff --git a/arch/arm64/kvm/sys_regs.c b/arch/arm64/kvm/sys_regs.c
index 4dc2fba316ff..f4a7c5abcbca 100644
--- a/arch/arm64/kvm/sys_regs.c
+++ b/arch/arm64/kvm/sys_regs.c
@@ -34,16 +34,13 @@
#include "trace.h"
/*
- * All of this file is extremely similar to the ARM coproc.c, but the
- * types are different. My gut feeling is that it should be pretty
- * easy to merge, but that would be an ABI breakage -- again. VFP
- * would also need to be abstracted.
- *
* For AArch32, we only take care of what is being trapped. Anything
* that has to do with init and userspace access has to go via the
* 64bit interface.
*/
+static u64 sys_reg_to_index(const struct sys_reg_desc *reg);
+
static bool read_from_write_only(struct kvm_vcpu *vcpu,
struct sys_reg_params *params,
const struct sys_reg_desc *r)
@@ -68,7 +65,7 @@ u64 vcpu_read_sys_reg(const struct kvm_vcpu *vcpu, int reg)
{
u64 val = 0x8badf00d8badf00d;
- if (vcpu->arch.sysregs_loaded_on_cpu &&
+ if (vcpu_get_flag(vcpu, SYSREGS_ON_CPU) &&
__vcpu_read_sys_reg_from_cpu(reg, &val))
return val;
@@ -77,7 +74,7 @@ u64 vcpu_read_sys_reg(const struct kvm_vcpu *vcpu, int reg)
void vcpu_write_sys_reg(struct kvm_vcpu *vcpu, u64 val, int reg)
{
- if (vcpu->arch.sysregs_loaded_on_cpu &&
+ if (vcpu_get_flag(vcpu, SYSREGS_ON_CPU) &&
__vcpu_write_sys_reg_to_cpu(val, reg))
return;
@@ -276,7 +273,7 @@ static bool trap_loregion(struct kvm_vcpu *vcpu,
u64 val = read_sanitised_ftr_reg(SYS_ID_AA64MMFR1_EL1);
u32 sr = reg_to_encoding(r);
- if (!(val & (0xfUL << ID_AA64MMFR1_LOR_SHIFT))) {
+ if (!(val & (0xfUL << ID_AA64MMFR1_EL1_LO_SHIFT))) {
kvm_inject_undefined(vcpu);
return false;
}
@@ -287,16 +284,47 @@ static bool trap_loregion(struct kvm_vcpu *vcpu,
return trap_raz_wi(vcpu, p, r);
}
+static bool trap_oslar_el1(struct kvm_vcpu *vcpu,
+ struct sys_reg_params *p,
+ const struct sys_reg_desc *r)
+{
+ u64 oslsr;
+
+ if (!p->is_write)
+ return read_from_write_only(vcpu, p, r);
+
+ /* Forward the OSLK bit to OSLSR */
+ oslsr = __vcpu_sys_reg(vcpu, OSLSR_EL1) & ~SYS_OSLSR_OSLK;
+ if (p->regval & SYS_OSLAR_OSLK)
+ oslsr |= SYS_OSLSR_OSLK;
+
+ __vcpu_sys_reg(vcpu, OSLSR_EL1) = oslsr;
+ return true;
+}
+
static bool trap_oslsr_el1(struct kvm_vcpu *vcpu,
struct sys_reg_params *p,
const struct sys_reg_desc *r)
{
- if (p->is_write) {
- return ignore_write(vcpu, p);
- } else {
- p->regval = (1 << 3);
- return true;
- }
+ if (p->is_write)
+ return write_to_read_only(vcpu, p, r);
+
+ p->regval = __vcpu_sys_reg(vcpu, r->reg);
+ return true;
+}
+
+static int set_oslsr_el1(struct kvm_vcpu *vcpu, const struct sys_reg_desc *rd,
+ u64 val)
+{
+ /*
+ * The only modifiable bit is the OSLK bit. Refuse the write if
+ * userspace attempts to change any other bit in the register.
+ */
+ if ((val ^ rd->val) & ~SYS_OSLSR_OSLK)
+ return -EINVAL;
+
+ __vcpu_sys_reg(vcpu, rd->reg) = val;
+ return 0;
}
static bool trap_dbgauthstatus_el1(struct kvm_vcpu *vcpu,
@@ -344,7 +372,7 @@ static bool trap_debug_regs(struct kvm_vcpu *vcpu,
{
if (p->is_write) {
vcpu_write_sys_reg(vcpu, p->regval, r->reg);
- vcpu->arch.flags |= KVM_ARM64_DEBUG_DIRTY;
+ vcpu_set_flag(vcpu, DEBUG_DIRTY);
} else {
p->regval = vcpu_read_sys_reg(vcpu, r->reg);
}
@@ -360,8 +388,8 @@ static bool trap_debug_regs(struct kvm_vcpu *vcpu,
* A 32 bit write to a debug register leave top bits alone
* A 32 bit read from a debug register only returns the bottom bits
*
- * All writes will set the KVM_ARM64_DEBUG_DIRTY flag to ensure the
- * hyp.S code switches between host and guest values in future.
+ * All writes will set the DEBUG_DIRTY flag to ensure the hyp code
+ * switches between host and guest values in future.
*/
static void reg_to_dbg(struct kvm_vcpu *vcpu,
struct sys_reg_params *p,
@@ -377,7 +405,7 @@ static void reg_to_dbg(struct kvm_vcpu *vcpu,
val |= (p->regval & (mask >> shift)) << shift;
*dbg_reg = val;
- vcpu->arch.flags |= KVM_ARM64_DEBUG_DIRTY;
+ vcpu_set_flag(vcpu, DEBUG_DIRTY);
}
static void dbg_to_reg(struct kvm_vcpu *vcpu,
@@ -408,22 +436,16 @@ static bool trap_bvr(struct kvm_vcpu *vcpu,
}
static int set_bvr(struct kvm_vcpu *vcpu, const struct sys_reg_desc *rd,
- const struct kvm_one_reg *reg, void __user *uaddr)
+ u64 val)
{
- __u64 *r = &vcpu->arch.vcpu_debug_state.dbg_bvr[rd->CRm];
-
- if (copy_from_user(r, uaddr, KVM_REG_SIZE(reg->id)) != 0)
- return -EFAULT;
+ vcpu->arch.vcpu_debug_state.dbg_bvr[rd->CRm] = val;
return 0;
}
static int get_bvr(struct kvm_vcpu *vcpu, const struct sys_reg_desc *rd,
- const struct kvm_one_reg *reg, void __user *uaddr)
+ u64 *val)
{
- __u64 *r = &vcpu->arch.vcpu_debug_state.dbg_bvr[rd->CRm];
-
- if (copy_to_user(uaddr, r, KVM_REG_SIZE(reg->id)) != 0)
- return -EFAULT;
+ *val = vcpu->arch.vcpu_debug_state.dbg_bvr[rd->CRm];
return 0;
}
@@ -450,23 +472,16 @@ static bool trap_bcr(struct kvm_vcpu *vcpu,
}
static int set_bcr(struct kvm_vcpu *vcpu, const struct sys_reg_desc *rd,
- const struct kvm_one_reg *reg, void __user *uaddr)
+ u64 val)
{
- __u64 *r = &vcpu->arch.vcpu_debug_state.dbg_bcr[rd->CRm];
-
- if (copy_from_user(r, uaddr, KVM_REG_SIZE(reg->id)) != 0)
- return -EFAULT;
-
+ vcpu->arch.vcpu_debug_state.dbg_bcr[rd->CRm] = val;
return 0;
}
static int get_bcr(struct kvm_vcpu *vcpu, const struct sys_reg_desc *rd,
- const struct kvm_one_reg *reg, void __user *uaddr)
+ u64 *val)
{
- __u64 *r = &vcpu->arch.vcpu_debug_state.dbg_bcr[rd->CRm];
-
- if (copy_to_user(uaddr, r, KVM_REG_SIZE(reg->id)) != 0)
- return -EFAULT;
+ *val = vcpu->arch.vcpu_debug_state.dbg_bcr[rd->CRm];
return 0;
}
@@ -494,22 +509,16 @@ static bool trap_wvr(struct kvm_vcpu *vcpu,
}
static int set_wvr(struct kvm_vcpu *vcpu, const struct sys_reg_desc *rd,
- const struct kvm_one_reg *reg, void __user *uaddr)
+ u64 val)
{
- __u64 *r = &vcpu->arch.vcpu_debug_state.dbg_wvr[rd->CRm];
-
- if (copy_from_user(r, uaddr, KVM_REG_SIZE(reg->id)) != 0)
- return -EFAULT;
+ vcpu->arch.vcpu_debug_state.dbg_wvr[rd->CRm] = val;
return 0;
}
static int get_wvr(struct kvm_vcpu *vcpu, const struct sys_reg_desc *rd,
- const struct kvm_one_reg *reg, void __user *uaddr)
+ u64 *val)
{
- __u64 *r = &vcpu->arch.vcpu_debug_state.dbg_wvr[rd->CRm];
-
- if (copy_to_user(uaddr, r, KVM_REG_SIZE(reg->id)) != 0)
- return -EFAULT;
+ *val = vcpu->arch.vcpu_debug_state.dbg_wvr[rd->CRm];
return 0;
}
@@ -536,22 +545,16 @@ static bool trap_wcr(struct kvm_vcpu *vcpu,
}
static int set_wcr(struct kvm_vcpu *vcpu, const struct sys_reg_desc *rd,
- const struct kvm_one_reg *reg, void __user *uaddr)
+ u64 val)
{
- __u64 *r = &vcpu->arch.vcpu_debug_state.dbg_wcr[rd->CRm];
-
- if (copy_from_user(r, uaddr, KVM_REG_SIZE(reg->id)) != 0)
- return -EFAULT;
+ vcpu->arch.vcpu_debug_state.dbg_wcr[rd->CRm] = val;
return 0;
}
static int get_wcr(struct kvm_vcpu *vcpu, const struct sys_reg_desc *rd,
- const struct kvm_one_reg *reg, void __user *uaddr)
+ u64 *val)
{
- __u64 *r = &vcpu->arch.vcpu_debug_state.dbg_wcr[rd->CRm];
-
- if (copy_to_user(uaddr, r, KVM_REG_SIZE(reg->id)) != 0)
- return -EFAULT;
+ *val = vcpu->arch.vcpu_debug_state.dbg_wcr[rd->CRm];
return 0;
}
@@ -649,7 +652,7 @@ static void reset_pmcr(struct kvm_vcpu *vcpu, const struct sys_reg_desc *r)
*/
val = ((pmcr & ~ARMV8_PMU_PMCR_MASK)
| (ARMV8_PMU_PMCR_MASK & 0xdecafbad)) & (~ARMV8_PMU_PMCR_E);
- if (!system_supports_32bit_el0())
+ if (!kvm_supports_32bit_el0())
val |= ARMV8_PMU_PMCR_LC;
__vcpu_sys_reg(vcpu, r->reg) = val;
}
@@ -698,7 +701,7 @@ static bool access_pmcr(struct kvm_vcpu *vcpu, struct sys_reg_params *p,
val = __vcpu_sys_reg(vcpu, PMCR_EL0);
val &= ~ARMV8_PMU_PMCR_MASK;
val |= p->regval & ARMV8_PMU_PMCR_MASK;
- if (!system_supports_32bit_el0())
+ if (!kvm_supports_32bit_el0())
val |= ARMV8_PMU_PMCR_LC;
__vcpu_sys_reg(vcpu, PMCR_EL0) = val;
kvm_pmu_handle_pmcr(vcpu, val);
@@ -1060,13 +1063,12 @@ static bool access_arch_timer(struct kvm_vcpu *vcpu,
}
/* Read a sanitised cpufeature ID register by sys_reg_desc */
-static u64 read_id_reg(const struct kvm_vcpu *vcpu,
- struct sys_reg_desc const *r, bool raz)
+static u64 read_id_reg(const struct kvm_vcpu *vcpu, struct sys_reg_desc const *r)
{
u32 id = reg_to_encoding(r);
u64 val;
- if (raz)
+ if (sysreg_visible_as_raz(vcpu, r))
return 0;
val = read_sanitised_ftr_reg(id);
@@ -1074,39 +1076,47 @@ static u64 read_id_reg(const struct kvm_vcpu *vcpu,
switch (id) {
case SYS_ID_AA64PFR0_EL1:
if (!vcpu_has_sve(vcpu))
- val &= ~ARM64_FEATURE_MASK(ID_AA64PFR0_SVE);
- val &= ~ARM64_FEATURE_MASK(ID_AA64PFR0_AMU);
- val &= ~ARM64_FEATURE_MASK(ID_AA64PFR0_CSV2);
- val |= FIELD_PREP(ARM64_FEATURE_MASK(ID_AA64PFR0_CSV2), (u64)vcpu->kvm->arch.pfr0_csv2);
- val &= ~ARM64_FEATURE_MASK(ID_AA64PFR0_CSV3);
- val |= FIELD_PREP(ARM64_FEATURE_MASK(ID_AA64PFR0_CSV3), (u64)vcpu->kvm->arch.pfr0_csv3);
- if (irqchip_in_kernel(vcpu->kvm) &&
- vcpu->kvm->arch.vgic.vgic_model == KVM_DEV_TYPE_ARM_VGIC_V3) {
- val &= ~ARM64_FEATURE_MASK(ID_AA64PFR0_GIC);
- val |= FIELD_PREP(ARM64_FEATURE_MASK(ID_AA64PFR0_GIC), 1);
+ val &= ~ARM64_FEATURE_MASK(ID_AA64PFR0_EL1_SVE);
+ val &= ~ARM64_FEATURE_MASK(ID_AA64PFR0_EL1_AMU);
+ val &= ~ARM64_FEATURE_MASK(ID_AA64PFR0_EL1_CSV2);
+ val |= FIELD_PREP(ARM64_FEATURE_MASK(ID_AA64PFR0_EL1_CSV2), (u64)vcpu->kvm->arch.pfr0_csv2);
+ val &= ~ARM64_FEATURE_MASK(ID_AA64PFR0_EL1_CSV3);
+ val |= FIELD_PREP(ARM64_FEATURE_MASK(ID_AA64PFR0_EL1_CSV3), (u64)vcpu->kvm->arch.pfr0_csv3);
+ if (kvm_vgic_global_state.type == VGIC_V3) {
+ val &= ~ARM64_FEATURE_MASK(ID_AA64PFR0_EL1_GIC);
+ val |= FIELD_PREP(ARM64_FEATURE_MASK(ID_AA64PFR0_EL1_GIC), 1);
}
break;
case SYS_ID_AA64PFR1_EL1:
if (!kvm_has_mte(vcpu->kvm))
- val &= ~ARM64_FEATURE_MASK(ID_AA64PFR1_MTE);
+ val &= ~ARM64_FEATURE_MASK(ID_AA64PFR1_EL1_MTE);
+
+ val &= ~ARM64_FEATURE_MASK(ID_AA64PFR1_EL1_SME);
break;
case SYS_ID_AA64ISAR1_EL1:
if (!vcpu_has_ptrauth(vcpu))
- val &= ~(ARM64_FEATURE_MASK(ID_AA64ISAR1_APA) |
- ARM64_FEATURE_MASK(ID_AA64ISAR1_API) |
- ARM64_FEATURE_MASK(ID_AA64ISAR1_GPA) |
- ARM64_FEATURE_MASK(ID_AA64ISAR1_GPI));
+ val &= ~(ARM64_FEATURE_MASK(ID_AA64ISAR1_EL1_APA) |
+ ARM64_FEATURE_MASK(ID_AA64ISAR1_EL1_API) |
+ ARM64_FEATURE_MASK(ID_AA64ISAR1_EL1_GPA) |
+ ARM64_FEATURE_MASK(ID_AA64ISAR1_EL1_GPI));
+ break;
+ case SYS_ID_AA64ISAR2_EL1:
+ if (!vcpu_has_ptrauth(vcpu))
+ val &= ~(ARM64_FEATURE_MASK(ID_AA64ISAR2_EL1_APA3) |
+ ARM64_FEATURE_MASK(ID_AA64ISAR2_EL1_GPA3));
+ if (!cpus_have_final_cap(ARM64_HAS_WFXT))
+ val &= ~ARM64_FEATURE_MASK(ID_AA64ISAR2_EL1_WFxT);
break;
case SYS_ID_AA64DFR0_EL1:
/* Limit debug to ARMv8.0 */
- val &= ~ARM64_FEATURE_MASK(ID_AA64DFR0_DEBUGVER);
- val |= FIELD_PREP(ARM64_FEATURE_MASK(ID_AA64DFR0_DEBUGVER), 6);
+ val &= ~ARM64_FEATURE_MASK(ID_AA64DFR0_EL1_DebugVer);
+ val |= FIELD_PREP(ARM64_FEATURE_MASK(ID_AA64DFR0_EL1_DebugVer), 6);
/* Limit guests to PMUv3 for ARMv8.4 */
val = cpuid_feature_cap_perfmon_field(val,
- ID_AA64DFR0_PMUVER_SHIFT,
- kvm_vcpu_has_pmu(vcpu) ? ID_AA64DFR0_PMUVER_8_4 : 0);
+ ID_AA64DFR0_EL1_PMUVer_SHIFT,
+ kvm_vcpu_has_pmu(vcpu) ? ID_AA64DFR0_EL1_PMUVer_V3P4 : 0);
/* Hide SPE from guests */
- val &= ~ARM64_FEATURE_MASK(ID_AA64DFR0_PMSVER);
+ val &= ~ARM64_FEATURE_MASK(ID_AA64DFR0_EL1_PMSVer);
break;
case SYS_ID_DFR0_EL1:
/* Limit guests to PMUv3 for ARMv8.4 */
@@ -1134,40 +1144,39 @@ static unsigned int id_visibility(const struct kvm_vcpu *vcpu,
return 0;
}
-/* cpufeature ID register access trap handlers */
-
-static bool __access_id_reg(struct kvm_vcpu *vcpu,
- struct sys_reg_params *p,
- const struct sys_reg_desc *r,
- bool raz)
+static unsigned int aa32_id_visibility(const struct kvm_vcpu *vcpu,
+ const struct sys_reg_desc *r)
{
- if (p->is_write)
- return write_to_read_only(vcpu, p, r);
+ /*
+ * AArch32 ID registers are UNKNOWN if AArch32 isn't implemented at any
+ * EL. Promote to RAZ/WI in order to guarantee consistency between
+ * systems.
+ */
+ if (!kvm_supports_32bit_el0())
+ return REG_RAZ | REG_USER_WI;
- p->regval = read_id_reg(vcpu, r, raz);
- return true;
+ return id_visibility(vcpu, r);
}
+static unsigned int raz_visibility(const struct kvm_vcpu *vcpu,
+ const struct sys_reg_desc *r)
+{
+ return REG_RAZ;
+}
+
+/* cpufeature ID register access trap handlers */
+
static bool access_id_reg(struct kvm_vcpu *vcpu,
struct sys_reg_params *p,
const struct sys_reg_desc *r)
{
- bool raz = sysreg_visible_as_raz(vcpu, r);
-
- return __access_id_reg(vcpu, p, r, raz);
-}
+ if (p->is_write)
+ return write_to_read_only(vcpu, p, r);
-static bool access_raz_id_reg(struct kvm_vcpu *vcpu,
- struct sys_reg_params *p,
- const struct sys_reg_desc *r)
-{
- return __access_id_reg(vcpu, p, r, true);
+ p->regval = read_id_reg(vcpu, r);
+ return true;
}
-static int reg_from_user(u64 *val, const void __user *uaddr, u64 id);
-static int reg_to_user(void __user *uaddr, const u64 *val, u64 id);
-static u64 sys_reg_to_index(const struct sys_reg_desc *reg);
-
/* Visibility overrides for SVE-specific control registers */
static unsigned int sve_visibility(const struct kvm_vcpu *vcpu,
const struct sys_reg_desc *rd)
@@ -1180,42 +1189,35 @@ static unsigned int sve_visibility(const struct kvm_vcpu *vcpu,
static int set_id_aa64pfr0_el1(struct kvm_vcpu *vcpu,
const struct sys_reg_desc *rd,
- const struct kvm_one_reg *reg, void __user *uaddr)
+ u64 val)
{
- const u64 id = sys_reg_to_index(rd);
u8 csv2, csv3;
- int err;
- u64 val;
-
- err = reg_from_user(&val, uaddr, id);
- if (err)
- return err;
/*
* Allow AA64PFR0_EL1.CSV2 to be set from userspace as long as
* it doesn't promise more than what is actually provided (the
* guest could otherwise be covered in ectoplasmic residue).
*/
- csv2 = cpuid_feature_extract_unsigned_field(val, ID_AA64PFR0_CSV2_SHIFT);
+ csv2 = cpuid_feature_extract_unsigned_field(val, ID_AA64PFR0_EL1_CSV2_SHIFT);
if (csv2 > 1 ||
(csv2 && arm64_get_spectre_v2_state() != SPECTRE_UNAFFECTED))
return -EINVAL;
/* Same thing for CSV3 */
- csv3 = cpuid_feature_extract_unsigned_field(val, ID_AA64PFR0_CSV3_SHIFT);
+ csv3 = cpuid_feature_extract_unsigned_field(val, ID_AA64PFR0_EL1_CSV3_SHIFT);
if (csv3 > 1 ||
(csv3 && arm64_get_meltdown_state() != SPECTRE_UNAFFECTED))
return -EINVAL;
/* We can only differ with CSV[23], and anything else is an error */
- val ^= read_id_reg(vcpu, rd, false);
- val &= ~((0xFUL << ID_AA64PFR0_CSV2_SHIFT) |
- (0xFUL << ID_AA64PFR0_CSV3_SHIFT));
+ val ^= read_id_reg(vcpu, rd);
+ val &= ~(ARM64_FEATURE_MASK(ID_AA64PFR0_EL1_CSV2) |
+ ARM64_FEATURE_MASK(ID_AA64PFR0_EL1_CSV3));
if (val)
return -EINVAL;
vcpu->kvm->arch.pfr0_csv2 = csv2;
- vcpu->kvm->arch.pfr0_csv3 = csv3 ;
+ vcpu->kvm->arch.pfr0_csv3 = csv3;
return 0;
}
@@ -1227,77 +1229,33 @@ static int set_id_aa64pfr0_el1(struct kvm_vcpu *vcpu,
* are stored, and for set_id_reg() we don't allow the effective value
* to be changed.
*/
-static int __get_id_reg(const struct kvm_vcpu *vcpu,
- const struct sys_reg_desc *rd, void __user *uaddr,
- bool raz)
+static int get_id_reg(struct kvm_vcpu *vcpu, const struct sys_reg_desc *rd,
+ u64 *val)
{
- const u64 id = sys_reg_to_index(rd);
- const u64 val = read_id_reg(vcpu, rd, raz);
-
- return reg_to_user(uaddr, &val, id);
+ *val = read_id_reg(vcpu, rd);
+ return 0;
}
-static int __set_id_reg(const struct kvm_vcpu *vcpu,
- const struct sys_reg_desc *rd, void __user *uaddr,
- bool raz)
+static int set_id_reg(struct kvm_vcpu *vcpu, const struct sys_reg_desc *rd,
+ u64 val)
{
- const u64 id = sys_reg_to_index(rd);
- int err;
- u64 val;
-
- err = reg_from_user(&val, uaddr, id);
- if (err)
- return err;
-
/* This is what we mean by invariant: you can't change it. */
- if (val != read_id_reg(vcpu, rd, raz))
+ if (val != read_id_reg(vcpu, rd))
return -EINVAL;
return 0;
}
-static int get_id_reg(struct kvm_vcpu *vcpu, const struct sys_reg_desc *rd,
- const struct kvm_one_reg *reg, void __user *uaddr)
-{
- bool raz = sysreg_visible_as_raz(vcpu, rd);
-
- return __get_id_reg(vcpu, rd, uaddr, raz);
-}
-
-static int set_id_reg(struct kvm_vcpu *vcpu, const struct sys_reg_desc *rd,
- const struct kvm_one_reg *reg, void __user *uaddr)
-{
- bool raz = sysreg_visible_as_raz(vcpu, rd);
-
- return __set_id_reg(vcpu, rd, uaddr, raz);
-}
-
-static int set_raz_id_reg(struct kvm_vcpu *vcpu, const struct sys_reg_desc *rd,
- const struct kvm_one_reg *reg, void __user *uaddr)
-{
- return __set_id_reg(vcpu, rd, uaddr, true);
-}
-
static int get_raz_reg(struct kvm_vcpu *vcpu, const struct sys_reg_desc *rd,
- const struct kvm_one_reg *reg, void __user *uaddr)
+ u64 *val)
{
- const u64 id = sys_reg_to_index(rd);
- const u64 val = 0;
-
- return reg_to_user(uaddr, &val, id);
+ *val = 0;
+ return 0;
}
static int set_wi_reg(struct kvm_vcpu *vcpu, const struct sys_reg_desc *rd,
- const struct kvm_one_reg *reg, void __user *uaddr)
+ u64 val)
{
- int err;
- u64 val;
-
- /* Perform the access even if we are going to ignore the value */
- err = reg_from_user(&val, uaddr, sys_reg_to_index(rd));
- if (err)
- return err;
-
return 0;
}
@@ -1387,6 +1345,15 @@ static unsigned int mte_visibility(const struct kvm_vcpu *vcpu,
.visibility = id_visibility, \
}
+/* sys_reg_desc initialiser for known cpufeature ID registers */
+#define AA32_ID_SANITISED(name) { \
+ SYS_DESC(SYS_##name), \
+ .access = access_id_reg, \
+ .get_user = get_id_reg, \
+ .set_user = set_id_reg, \
+ .visibility = aa32_id_visibility, \
+}
+
/*
* sys_reg_desc initialiser for architecturally unallocated cpufeature ID
* register with encoding Op0=3, Op1=0, CRn=0, CRm=crm, Op2=op2
@@ -1394,9 +1361,10 @@ static unsigned int mte_visibility(const struct kvm_vcpu *vcpu,
*/
#define ID_UNALLOCATED(crm, op2) { \
Op0(3), Op1(0), CRn(0), CRm(crm), Op2(op2), \
- .access = access_raz_id_reg, \
- .get_user = get_raz_reg, \
- .set_user = set_raz_id_reg, \
+ .access = access_id_reg, \
+ .get_user = get_id_reg, \
+ .set_user = set_id_reg, \
+ .visibility = raz_visibility \
}
/*
@@ -1406,9 +1374,10 @@ static unsigned int mte_visibility(const struct kvm_vcpu *vcpu,
*/
#define ID_HIDDEN(name) { \
SYS_DESC(SYS_##name), \
- .access = access_raz_id_reg, \
- .get_user = get_raz_reg, \
- .set_user = set_raz_id_reg, \
+ .access = access_id_reg, \
+ .get_user = get_id_reg, \
+ .set_user = set_id_reg, \
+ .visibility = raz_visibility, \
}
/*
@@ -1418,9 +1387,9 @@ static unsigned int mte_visibility(const struct kvm_vcpu *vcpu,
* Debug handling: We do trap most, if not all debug related system
* registers. The implementation is good enough to ensure that a guest
* can use these with minimal performance degradation. The drawback is
- * that we don't implement any of the external debug, none of the
- * OSlock protocol. This should be revisited if we ever encounter a
- * more demanding guest...
+ * that we don't implement any of the external debug architecture.
+ * This should be revisited if we ever encounter a more demanding
+ * guest...
*/
static const struct sys_reg_desc sys_reg_descs[] = {
{ SYS_DESC(SYS_DC_ISW), access_dcsw },
@@ -1447,8 +1416,9 @@ static const struct sys_reg_desc sys_reg_descs[] = {
DBG_BCR_BVR_WCR_WVR_EL1(15),
{ SYS_DESC(SYS_MDRAR_EL1), trap_raz_wi },
- { SYS_DESC(SYS_OSLAR_EL1), trap_raz_wi },
- { SYS_DESC(SYS_OSLSR_EL1), trap_oslsr_el1 },
+ { SYS_DESC(SYS_OSLAR_EL1), trap_oslar_el1 },
+ { SYS_DESC(SYS_OSLSR_EL1), trap_oslsr_el1, reset_val, OSLSR_EL1,
+ SYS_OSLSR_OSLM_IMPLEMENTED, .set_user = set_oslsr_el1, },
{ SYS_DESC(SYS_OSDLR_EL1), trap_raz_wi },
{ SYS_DESC(SYS_DBGPRCR_EL1), trap_raz_wi },
{ SYS_DESC(SYS_DBGCLAIMSET_EL1), trap_raz_wi },
@@ -1471,33 +1441,33 @@ static const struct sys_reg_desc sys_reg_descs[] = {
/* AArch64 mappings of the AArch32 ID registers */
/* CRm=1 */
- ID_SANITISED(ID_PFR0_EL1),
- ID_SANITISED(ID_PFR1_EL1),
- ID_SANITISED(ID_DFR0_EL1),
+ AA32_ID_SANITISED(ID_PFR0_EL1),
+ AA32_ID_SANITISED(ID_PFR1_EL1),
+ AA32_ID_SANITISED(ID_DFR0_EL1),
ID_HIDDEN(ID_AFR0_EL1),
- ID_SANITISED(ID_MMFR0_EL1),
- ID_SANITISED(ID_MMFR1_EL1),
- ID_SANITISED(ID_MMFR2_EL1),
- ID_SANITISED(ID_MMFR3_EL1),
+ AA32_ID_SANITISED(ID_MMFR0_EL1),
+ AA32_ID_SANITISED(ID_MMFR1_EL1),
+ AA32_ID_SANITISED(ID_MMFR2_EL1),
+ AA32_ID_SANITISED(ID_MMFR3_EL1),
/* CRm=2 */
- ID_SANITISED(ID_ISAR0_EL1),
- ID_SANITISED(ID_ISAR1_EL1),
- ID_SANITISED(ID_ISAR2_EL1),
- ID_SANITISED(ID_ISAR3_EL1),
- ID_SANITISED(ID_ISAR4_EL1),
- ID_SANITISED(ID_ISAR5_EL1),
- ID_SANITISED(ID_MMFR4_EL1),
- ID_SANITISED(ID_ISAR6_EL1),
+ AA32_ID_SANITISED(ID_ISAR0_EL1),
+ AA32_ID_SANITISED(ID_ISAR1_EL1),
+ AA32_ID_SANITISED(ID_ISAR2_EL1),
+ AA32_ID_SANITISED(ID_ISAR3_EL1),
+ AA32_ID_SANITISED(ID_ISAR4_EL1),
+ AA32_ID_SANITISED(ID_ISAR5_EL1),
+ AA32_ID_SANITISED(ID_MMFR4_EL1),
+ AA32_ID_SANITISED(ID_ISAR6_EL1),
/* CRm=3 */
- ID_SANITISED(MVFR0_EL1),
- ID_SANITISED(MVFR1_EL1),
- ID_SANITISED(MVFR2_EL1),
+ AA32_ID_SANITISED(MVFR0_EL1),
+ AA32_ID_SANITISED(MVFR1_EL1),
+ AA32_ID_SANITISED(MVFR2_EL1),
ID_UNALLOCATED(3,3),
- ID_SANITISED(ID_PFR2_EL1),
+ AA32_ID_SANITISED(ID_PFR2_EL1),
ID_HIDDEN(ID_DFR1_EL1),
- ID_SANITISED(ID_MMFR5_EL1),
+ AA32_ID_SANITISED(ID_MMFR5_EL1),
ID_UNALLOCATED(3,7),
/* AArch64 ID registers */
@@ -1508,7 +1478,7 @@ static const struct sys_reg_desc sys_reg_descs[] = {
ID_UNALLOCATED(4,2),
ID_UNALLOCATED(4,3),
ID_SANITISED(ID_AA64ZFR0_EL1),
- ID_UNALLOCATED(4,5),
+ ID_HIDDEN(ID_AA64SMFR0_EL1),
ID_UNALLOCATED(4,6),
ID_UNALLOCATED(4,7),
@@ -1551,6 +1521,8 @@ static const struct sys_reg_desc sys_reg_descs[] = {
{ SYS_DESC(SYS_ZCR_EL1), NULL, reset_val, ZCR_EL1, 0, .visibility = sve_visibility },
{ SYS_DESC(SYS_TRFCR_EL1), undef_access },
+ { SYS_DESC(SYS_SMPRI_EL1), undef_access },
+ { SYS_DESC(SYS_SMCR_EL1), undef_access },
{ SYS_DESC(SYS_TTBR0_EL1), access_vm_reg, reset_unknown, TTBR0_EL1 },
{ SYS_DESC(SYS_TTBR1_EL1), access_vm_reg, reset_unknown, TTBR1_EL1 },
{ SYS_DESC(SYS_TCR_EL1), access_vm_reg, reset_val, TCR_EL1, 0 },
@@ -1633,8 +1605,10 @@ static const struct sys_reg_desc sys_reg_descs[] = {
{ SYS_DESC(SYS_CCSIDR_EL1), access_ccsidr },
{ SYS_DESC(SYS_CLIDR_EL1), access_clidr },
+ { SYS_DESC(SYS_SMIDR_EL1), undef_access },
{ SYS_DESC(SYS_CSSELR_EL1), access_csselr, reset_unknown, CSSELR_EL1 },
{ SYS_DESC(SYS_CTR_EL0), access_ctr },
+ { SYS_DESC(SYS_SVCR), undef_access },
{ PMU_SYS_REG(SYS_PMCR_EL0), .access = access_pmcr,
.reset = reset_pmcr, .reg = PMCR_EL0 },
@@ -1674,6 +1648,7 @@ static const struct sys_reg_desc sys_reg_descs[] = {
{ SYS_DESC(SYS_TPIDR_EL0), NULL, reset_unknown, TPIDR_EL0 },
{ SYS_DESC(SYS_TPIDRRO_EL0), NULL, reset_unknown, TPIDRRO_EL0 },
+ { SYS_DESC(SYS_TPIDR2_EL0), undef_access },
{ SYS_DESC(SYS_SCXTNUM_EL0), undef_access },
@@ -1839,11 +1814,11 @@ static bool trap_dbgdidr(struct kvm_vcpu *vcpu,
} else {
u64 dfr = read_sanitised_ftr_reg(SYS_ID_AA64DFR0_EL1);
u64 pfr = read_sanitised_ftr_reg(SYS_ID_AA64PFR0_EL1);
- u32 el3 = !!cpuid_feature_extract_unsigned_field(pfr, ID_AA64PFR0_EL3_SHIFT);
+ u32 el3 = !!cpuid_feature_extract_unsigned_field(pfr, ID_AA64PFR0_EL1_EL3_SHIFT);
- p->regval = ((((dfr >> ID_AA64DFR0_WRPS_SHIFT) & 0xf) << 28) |
- (((dfr >> ID_AA64DFR0_BRPS_SHIFT) & 0xf) << 24) |
- (((dfr >> ID_AA64DFR0_CTX_CMPS_SHIFT) & 0xf) << 20)
+ p->regval = ((((dfr >> ID_AA64DFR0_EL1_WRPs_SHIFT) & 0xf) << 28) |
+ (((dfr >> ID_AA64DFR0_EL1_BRPs_SHIFT) & 0xf) << 24) |
+ (((dfr >> ID_AA64DFR0_EL1_CTX_CMPs_SHIFT) & 0xf) << 20)
| (6 << 16) | (1 << 15) | (el3 << 14) | (el3 << 12));
return true;
}
@@ -1920,10 +1895,10 @@ static const struct sys_reg_desc cp14_regs[] = {
DBGBXVR(0),
/* DBGOSLAR */
- { Op1( 0), CRn( 1), CRm( 0), Op2( 4), trap_raz_wi },
+ { Op1( 0), CRn( 1), CRm( 0), Op2( 4), trap_oslar_el1 },
DBGBXVR(1),
/* DBGOSLSR */
- { Op1( 0), CRn( 1), CRm( 1), Op2( 4), trap_oslsr_el1 },
+ { Op1( 0), CRn( 1), CRm( 1), Op2( 4), trap_oslsr_el1, NULL, OSLSR_EL1 },
DBGBXVR(2),
DBGBXVR(3),
/* DBGOSDLR */
@@ -1969,20 +1944,22 @@ static const struct sys_reg_desc cp14_64_regs[] = {
{ Op1( 0), CRm( 2), .access = trap_raz_wi },
};
+#define CP15_PMU_SYS_REG(_map, _Op1, _CRn, _CRm, _Op2) \
+ AA32(_map), \
+ Op1(_Op1), CRn(_CRn), CRm(_CRm), Op2(_Op2), \
+ .visibility = pmu_visibility
+
/* Macro to expand the PMEVCNTRn register */
#define PMU_PMEVCNTR(n) \
- /* PMEVCNTRn */ \
- { Op1(0), CRn(0b1110), \
- CRm((0b1000 | (((n) >> 3) & 0x3))), Op2(((n) & 0x7)), \
- access_pmu_evcntr }
+ { CP15_PMU_SYS_REG(DIRECT, 0, 0b1110, \
+ (0b1000 | (((n) >> 3) & 0x3)), ((n) & 0x7)), \
+ .access = access_pmu_evcntr }
/* Macro to expand the PMEVTYPERn register */
#define PMU_PMEVTYPER(n) \
- /* PMEVTYPERn */ \
- { Op1(0), CRn(0b1110), \
- CRm((0b1100 | (((n) >> 3) & 0x3))), Op2(((n) & 0x7)), \
- access_pmu_evtyper }
-
+ { CP15_PMU_SYS_REG(DIRECT, 0, 0b1110, \
+ (0b1100 | (((n) >> 3) & 0x3)), ((n) & 0x7)), \
+ .access = access_pmu_evtyper }
/*
* Trapped cp15 registers. TTBR0/TTBR1 get a double encoding,
* depending on the way they are accessed (as a 32bit or a 64bit
@@ -2022,25 +1999,25 @@ static const struct sys_reg_desc cp15_regs[] = {
{ Op1( 0), CRn( 7), CRm(14), Op2( 2), access_dcsw },
/* PMU */
- { Op1( 0), CRn( 9), CRm(12), Op2( 0), access_pmcr },
- { Op1( 0), CRn( 9), CRm(12), Op2( 1), access_pmcnten },
- { Op1( 0), CRn( 9), CRm(12), Op2( 2), access_pmcnten },
- { Op1( 0), CRn( 9), CRm(12), Op2( 3), access_pmovs },
- { Op1( 0), CRn( 9), CRm(12), Op2( 4), access_pmswinc },
- { Op1( 0), CRn( 9), CRm(12), Op2( 5), access_pmselr },
- { AA32(LO), Op1( 0), CRn( 9), CRm(12), Op2( 6), access_pmceid },
- { AA32(LO), Op1( 0), CRn( 9), CRm(12), Op2( 7), access_pmceid },
- { Op1( 0), CRn( 9), CRm(13), Op2( 0), access_pmu_evcntr },
- { Op1( 0), CRn( 9), CRm(13), Op2( 1), access_pmu_evtyper },
- { Op1( 0), CRn( 9), CRm(13), Op2( 2), access_pmu_evcntr },
- { Op1( 0), CRn( 9), CRm(14), Op2( 0), access_pmuserenr },
- { Op1( 0), CRn( 9), CRm(14), Op2( 1), access_pminten },
- { Op1( 0), CRn( 9), CRm(14), Op2( 2), access_pminten },
- { Op1( 0), CRn( 9), CRm(14), Op2( 3), access_pmovs },
- { AA32(HI), Op1( 0), CRn( 9), CRm(14), Op2( 4), access_pmceid },
- { AA32(HI), Op1( 0), CRn( 9), CRm(14), Op2( 5), access_pmceid },
+ { CP15_PMU_SYS_REG(DIRECT, 0, 9, 12, 0), .access = access_pmcr },
+ { CP15_PMU_SYS_REG(DIRECT, 0, 9, 12, 1), .access = access_pmcnten },
+ { CP15_PMU_SYS_REG(DIRECT, 0, 9, 12, 2), .access = access_pmcnten },
+ { CP15_PMU_SYS_REG(DIRECT, 0, 9, 12, 3), .access = access_pmovs },
+ { CP15_PMU_SYS_REG(DIRECT, 0, 9, 12, 4), .access = access_pmswinc },
+ { CP15_PMU_SYS_REG(DIRECT, 0, 9, 12, 5), .access = access_pmselr },
+ { CP15_PMU_SYS_REG(LO, 0, 9, 12, 6), .access = access_pmceid },
+ { CP15_PMU_SYS_REG(LO, 0, 9, 12, 7), .access = access_pmceid },
+ { CP15_PMU_SYS_REG(DIRECT, 0, 9, 13, 0), .access = access_pmu_evcntr },
+ { CP15_PMU_SYS_REG(DIRECT, 0, 9, 13, 1), .access = access_pmu_evtyper },
+ { CP15_PMU_SYS_REG(DIRECT, 0, 9, 13, 2), .access = access_pmu_evcntr },
+ { CP15_PMU_SYS_REG(DIRECT, 0, 9, 14, 0), .access = access_pmuserenr },
+ { CP15_PMU_SYS_REG(DIRECT, 0, 9, 14, 1), .access = access_pminten },
+ { CP15_PMU_SYS_REG(DIRECT, 0, 9, 14, 2), .access = access_pminten },
+ { CP15_PMU_SYS_REG(DIRECT, 0, 9, 14, 3), .access = access_pmovs },
+ { CP15_PMU_SYS_REG(HI, 0, 9, 14, 4), .access = access_pmceid },
+ { CP15_PMU_SYS_REG(HI, 0, 9, 14, 5), .access = access_pmceid },
/* PMMIR */
- { Op1( 0), CRn( 9), CRm(14), Op2( 6), trap_raz_wi },
+ { CP15_PMU_SYS_REG(DIRECT, 0, 9, 14, 6), .access = trap_raz_wi },
/* PRRR/MAIR0 */
{ AA32(LO), Op1( 0), CRn(10), CRm( 2), Op2( 0), access_vm_reg, NULL, MAIR_EL1 },
@@ -2125,7 +2102,7 @@ static const struct sys_reg_desc cp15_regs[] = {
PMU_PMEVTYPER(29),
PMU_PMEVTYPER(30),
/* PMCCFILTR */
- { Op1(0), CRn(14), CRm(15), Op2(7), access_pmu_evtyper },
+ { CP15_PMU_SYS_REG(DIRECT, 0, 14, 15, 7), .access = access_pmu_evtyper },
{ Op1(1), CRn( 0), CRm( 0), Op2(0), access_ccsidr },
{ Op1(1), CRn( 0), CRm( 0), Op2(1), access_clidr },
@@ -2134,7 +2111,7 @@ static const struct sys_reg_desc cp15_regs[] = {
static const struct sys_reg_desc cp15_64_regs[] = {
{ Op1( 0), CRn( 0), CRm( 2), Op2( 0), access_vm_reg, NULL, TTBR0_EL1 },
- { Op1( 0), CRn( 0), CRm( 9), Op2( 0), access_pmu_evcntr },
+ { CP15_PMU_SYS_REG(DIRECT, 0, 0, 9, 0), .access = access_pmu_evcntr },
{ Op1( 0), CRn( 0), CRm(12), Op2( 0), access_gic_sgi }, /* ICC_SGI1R */
{ Op1( 1), CRn( 0), CRm( 2), Op2( 0), access_vm_reg, NULL, TTBR1_EL1 },
{ Op1( 1), CRn( 0), CRm(12), Op2( 0), access_gic_sgi }, /* ICC_ASGI1R */
@@ -2142,25 +2119,24 @@ static const struct sys_reg_desc cp15_64_regs[] = {
{ SYS_DESC(SYS_AARCH32_CNTP_CVAL), access_arch_timer },
};
-static int check_sysreg_table(const struct sys_reg_desc *table, unsigned int n,
- bool is_32)
+static bool check_sysreg_table(const struct sys_reg_desc *table, unsigned int n,
+ bool is_32)
{
unsigned int i;
for (i = 0; i < n; i++) {
if (!is_32 && table[i].reg && !table[i].reset) {
- kvm_err("sys_reg table %p entry %d has lacks reset\n",
- table, i);
- return 1;
+ kvm_err("sys_reg table %pS entry %d lacks reset\n", &table[i], i);
+ return false;
}
if (i && cmp_sys_reg(&table[i-1], &table[i]) >= 0) {
- kvm_err("sys_reg table %p out of order (%d)\n", table, i - 1);
- return 1;
+ kvm_err("sys_reg table %pS entry %d out of order\n", &table[i - 1], i - 1);
+ return false;
}
}
- return 0;
+ return true;
}
int kvm_handle_cp14_load_store(struct kvm_vcpu *vcpu)
@@ -2201,27 +2177,27 @@ static void perform_access(struct kvm_vcpu *vcpu,
* @table: array of trap descriptors
* @num: size of the trap descriptor array
*
- * Return 0 if the access has been handled, and -1 if not.
+ * Return true if the access has been handled, false if not.
*/
-static int emulate_cp(struct kvm_vcpu *vcpu,
- struct sys_reg_params *params,
- const struct sys_reg_desc *table,
- size_t num)
+static bool emulate_cp(struct kvm_vcpu *vcpu,
+ struct sys_reg_params *params,
+ const struct sys_reg_desc *table,
+ size_t num)
{
const struct sys_reg_desc *r;
if (!table)
- return -1; /* Not handled */
+ return false; /* Not handled */
r = find_reg(params, table, num);
if (r) {
perform_access(vcpu, params, r);
- return 0;
+ return true;
}
/* Not handled */
- return -1;
+ return false;
}
static void unhandled_cp_access(struct kvm_vcpu *vcpu,
@@ -2259,7 +2235,7 @@ static int kvm_handle_cp_64(struct kvm_vcpu *vcpu,
size_t nr_global)
{
struct sys_reg_params params;
- u32 esr = kvm_vcpu_get_esr(vcpu);
+ u64 esr = kvm_vcpu_get_esr(vcpu);
int Rt = kvm_vcpu_sys_get_rt(vcpu);
int Rt2 = (esr >> 10) & 0x1f;
@@ -2285,7 +2261,7 @@ static int kvm_handle_cp_64(struct kvm_vcpu *vcpu,
* potential register operation in the case of a read and return
* with success.
*/
- if (!emulate_cp(vcpu, &params, global, nr_global)) {
+ if (emulate_cp(vcpu, &params, global, nr_global)) {
/* Split up the value between registers for the read side */
if (!params.is_write) {
vcpu_set_reg(vcpu, Rt, lower_32_bits(params.regval));
@@ -2299,34 +2275,144 @@ static int kvm_handle_cp_64(struct kvm_vcpu *vcpu,
return 1;
}
+static bool emulate_sys_reg(struct kvm_vcpu *vcpu, struct sys_reg_params *params);
+
+/*
+ * The CP10 ID registers are architecturally mapped to AArch64 feature
+ * registers. Abuse that fact so we can rely on the AArch64 handler for accesses
+ * from AArch32.
+ */
+static bool kvm_esr_cp10_id_to_sys64(u64 esr, struct sys_reg_params *params)
+{
+ u8 reg_id = (esr >> 10) & 0xf;
+ bool valid;
+
+ params->is_write = ((esr & 1) == 0);
+ params->Op0 = 3;
+ params->Op1 = 0;
+ params->CRn = 0;
+ params->CRm = 3;
+
+ /* CP10 ID registers are read-only */
+ valid = !params->is_write;
+
+ switch (reg_id) {
+ /* MVFR0 */
+ case 0b0111:
+ params->Op2 = 0;
+ break;
+ /* MVFR1 */
+ case 0b0110:
+ params->Op2 = 1;
+ break;
+ /* MVFR2 */
+ case 0b0101:
+ params->Op2 = 2;
+ break;
+ default:
+ valid = false;
+ }
+
+ if (valid)
+ return true;
+
+ kvm_pr_unimpl("Unhandled cp10 register %s: %u\n",
+ params->is_write ? "write" : "read", reg_id);
+ return false;
+}
+
+/**
+ * kvm_handle_cp10_id() - Handles a VMRS trap on guest access to a 'Media and
+ * VFP Register' from AArch32.
+ * @vcpu: The vCPU pointer
+ *
+ * MVFR{0-2} are architecturally mapped to the AArch64 MVFR{0-2}_EL1 registers.
+ * Work out the correct AArch64 system register encoding and reroute to the
+ * AArch64 system register emulation.
+ */
+int kvm_handle_cp10_id(struct kvm_vcpu *vcpu)
+{
+ int Rt = kvm_vcpu_sys_get_rt(vcpu);
+ u64 esr = kvm_vcpu_get_esr(vcpu);
+ struct sys_reg_params params;
+
+ /* UNDEF on any unhandled register access */
+ if (!kvm_esr_cp10_id_to_sys64(esr, &params)) {
+ kvm_inject_undefined(vcpu);
+ return 1;
+ }
+
+ if (emulate_sys_reg(vcpu, &params))
+ vcpu_set_reg(vcpu, Rt, params.regval);
+
+ return 1;
+}
+
+/**
+ * kvm_emulate_cp15_id_reg() - Handles an MRC trap on a guest CP15 access where
+ * CRn=0, which corresponds to the AArch32 feature
+ * registers.
+ * @vcpu: the vCPU pointer
+ * @params: the system register access parameters.
+ *
+ * Our cp15 system register tables do not enumerate the AArch32 feature
+ * registers. Conveniently, our AArch64 table does, and the AArch32 system
+ * register encoding can be trivially remapped into the AArch64 for the feature
+ * registers: Append op0=3, leaving op1, CRn, CRm, and op2 the same.
+ *
+ * According to DDI0487G.b G7.3.1, paragraph "Behavior of VMSAv8-32 32-bit
+ * System registers with (coproc=0b1111, CRn==c0)", read accesses from this
+ * range are either UNKNOWN or RES0. Rerouting remains architectural as we
+ * treat undefined registers in this range as RAZ.
+ */
+static int kvm_emulate_cp15_id_reg(struct kvm_vcpu *vcpu,
+ struct sys_reg_params *params)
+{
+ int Rt = kvm_vcpu_sys_get_rt(vcpu);
+
+ /* Treat impossible writes to RO registers as UNDEFINED */
+ if (params->is_write) {
+ unhandled_cp_access(vcpu, params);
+ return 1;
+ }
+
+ params->Op0 = 3;
+
+ /*
+ * All registers where CRm > 3 are known to be UNKNOWN/RAZ from AArch32.
+ * Avoid conflicting with future expansion of AArch64 feature registers
+ * and simply treat them as RAZ here.
+ */
+ if (params->CRm > 3)
+ params->regval = 0;
+ else if (!emulate_sys_reg(vcpu, params))
+ return 1;
+
+ vcpu_set_reg(vcpu, Rt, params->regval);
+ return 1;
+}
+
/**
* kvm_handle_cp_32 -- handles a mrc/mcr trap on a guest CP14/CP15 access
* @vcpu: The VCPU pointer
* @run: The kvm_run struct
*/
static int kvm_handle_cp_32(struct kvm_vcpu *vcpu,
+ struct sys_reg_params *params,
const struct sys_reg_desc *global,
size_t nr_global)
{
- struct sys_reg_params params;
- u32 esr = kvm_vcpu_get_esr(vcpu);
int Rt = kvm_vcpu_sys_get_rt(vcpu);
- params.CRm = (esr >> 1) & 0xf;
- params.regval = vcpu_get_reg(vcpu, Rt);
- params.is_write = ((esr & 1) == 0);
- params.CRn = (esr >> 10) & 0xf;
- params.Op0 = 0;
- params.Op1 = (esr >> 14) & 0x7;
- params.Op2 = (esr >> 17) & 0x7;
+ params->regval = vcpu_get_reg(vcpu, Rt);
- if (!emulate_cp(vcpu, &params, global, nr_global)) {
- if (!params.is_write)
- vcpu_set_reg(vcpu, Rt, params.regval);
+ if (emulate_cp(vcpu, params, global, nr_global)) {
+ if (!params->is_write)
+ vcpu_set_reg(vcpu, Rt, params->regval);
return 1;
}
- unhandled_cp_access(vcpu, &params);
+ unhandled_cp_access(vcpu, params);
return 1;
}
@@ -2337,7 +2423,20 @@ int kvm_handle_cp15_64(struct kvm_vcpu *vcpu)
int kvm_handle_cp15_32(struct kvm_vcpu *vcpu)
{
- return kvm_handle_cp_32(vcpu, cp15_regs, ARRAY_SIZE(cp15_regs));
+ struct sys_reg_params params;
+
+ params = esr_cp1x_32_to_params(kvm_vcpu_get_esr(vcpu));
+
+ /*
+ * Certain AArch32 ID registers are handled by rerouting to the AArch64
+ * system register table. Registers in the ID range where CRm=0 are
+ * excluded from this scheme as they do not trivially map into AArch64
+ * system register encodings.
+ */
+ if (params.Op1 == 0 && params.CRn == 0 && params.CRm)
+ return kvm_emulate_cp15_id_reg(vcpu, &params);
+
+ return kvm_handle_cp_32(vcpu, &params, cp15_regs, ARRAY_SIZE(cp15_regs));
}
int kvm_handle_cp14_64(struct kvm_vcpu *vcpu)
@@ -2347,7 +2446,11 @@ int kvm_handle_cp14_64(struct kvm_vcpu *vcpu)
int kvm_handle_cp14_32(struct kvm_vcpu *vcpu)
{
- return kvm_handle_cp_32(vcpu, cp14_regs, ARRAY_SIZE(cp14_regs));
+ struct sys_reg_params params;
+
+ params = esr_cp1x_32_to_params(kvm_vcpu_get_esr(vcpu));
+
+ return kvm_handle_cp_32(vcpu, &params, cp14_regs, ARRAY_SIZE(cp14_regs));
}
static bool is_imp_def_sys_reg(struct sys_reg_params *params)
@@ -2356,7 +2459,14 @@ static bool is_imp_def_sys_reg(struct sys_reg_params *params)
return params->Op0 == 3 && (params->CRn & 0b1011) == 0b1011;
}
-static int emulate_sys_reg(struct kvm_vcpu *vcpu,
+/**
+ * emulate_sys_reg - Emulate a guest access to an AArch64 system register
+ * @vcpu: The VCPU pointer
+ * @params: Decoded system register parameters
+ *
+ * Return: true if the system register access was successful, false otherwise.
+ */
+static bool emulate_sys_reg(struct kvm_vcpu *vcpu,
struct sys_reg_params *params)
{
const struct sys_reg_desc *r;
@@ -2365,7 +2475,10 @@ static int emulate_sys_reg(struct kvm_vcpu *vcpu,
if (likely(r)) {
perform_access(vcpu, params, r);
- } else if (is_imp_def_sys_reg(params)) {
+ return true;
+ }
+
+ if (is_imp_def_sys_reg(params)) {
kvm_inject_undefined(vcpu);
} else {
print_sys_reg_msg(params,
@@ -2373,7 +2486,7 @@ static int emulate_sys_reg(struct kvm_vcpu *vcpu,
*vcpu_pc(vcpu), *vcpu_cpsr(vcpu));
kvm_inject_undefined(vcpu);
}
- return 1;
+ return false;
}
/**
@@ -2401,18 +2514,18 @@ int kvm_handle_sys_reg(struct kvm_vcpu *vcpu)
struct sys_reg_params params;
unsigned long esr = kvm_vcpu_get_esr(vcpu);
int Rt = kvm_vcpu_sys_get_rt(vcpu);
- int ret;
trace_kvm_handle_sys_reg(esr);
params = esr_sys64_to_params(esr);
params.regval = vcpu_get_reg(vcpu, Rt);
- ret = emulate_sys_reg(vcpu, &params);
+ if (!emulate_sys_reg(vcpu, &params))
+ return 1;
if (!params.is_write)
vcpu_set_reg(vcpu, Rt, params.regval);
- return ret;
+ return 1;
}
/******************************************************************************
@@ -2448,35 +2561,34 @@ static bool index_to_params(u64 id, struct sys_reg_params *params)
}
}
-const struct sys_reg_desc *find_reg_by_id(u64 id,
- struct sys_reg_params *params,
- const struct sys_reg_desc table[],
- unsigned int num)
+const struct sys_reg_desc *get_reg_by_id(u64 id,
+ const struct sys_reg_desc table[],
+ unsigned int num)
{
- if (!index_to_params(id, params))
+ struct sys_reg_params params;
+
+ if (!index_to_params(id, &params))
return NULL;
- return find_reg(params, table, num);
+ return find_reg(&params, table, num);
}
/* Decode an index value, and find the sys_reg_desc entry. */
-static const struct sys_reg_desc *index_to_sys_reg_desc(struct kvm_vcpu *vcpu,
- u64 id)
+static const struct sys_reg_desc *
+id_to_sys_reg_desc(struct kvm_vcpu *vcpu, u64 id,
+ const struct sys_reg_desc table[], unsigned int num)
+
{
const struct sys_reg_desc *r;
- struct sys_reg_params params;
/* We only do sys_reg for now. */
if ((id & KVM_REG_ARM_COPROC_MASK) != KVM_REG_ARM64_SYSREG)
return NULL;
- if (!index_to_params(id, &params))
- return NULL;
-
- r = find_reg(&params, sys_reg_descs, ARRAY_SIZE(sys_reg_descs));
+ r = get_reg_by_id(id, table, num);
/* Not saved in the sys_reg array and not otherwise accessible? */
- if (r && !(r->reg || r->get_user))
+ if (r && (!(r->reg || r->get_user) || sysreg_hidden(vcpu, r)))
r = NULL;
return r;
@@ -2516,48 +2628,30 @@ static struct sys_reg_desc invariant_sys_regs[] = {
{ SYS_DESC(SYS_CTR_EL0), NULL, get_ctr_el0 },
};
-static int reg_from_user(u64 *val, const void __user *uaddr, u64 id)
-{
- if (copy_from_user(val, uaddr, KVM_REG_SIZE(id)) != 0)
- return -EFAULT;
- return 0;
-}
-
-static int reg_to_user(void __user *uaddr, const u64 *val, u64 id)
+static int get_invariant_sys_reg(u64 id, u64 __user *uaddr)
{
- if (copy_to_user(uaddr, val, KVM_REG_SIZE(id)) != 0)
- return -EFAULT;
- return 0;
-}
-
-static int get_invariant_sys_reg(u64 id, void __user *uaddr)
-{
- struct sys_reg_params params;
const struct sys_reg_desc *r;
- r = find_reg_by_id(id, &params, invariant_sys_regs,
- ARRAY_SIZE(invariant_sys_regs));
+ r = get_reg_by_id(id, invariant_sys_regs,
+ ARRAY_SIZE(invariant_sys_regs));
if (!r)
return -ENOENT;
- return reg_to_user(uaddr, &r->val, id);
+ return put_user(r->val, uaddr);
}
-static int set_invariant_sys_reg(u64 id, void __user *uaddr)
+static int set_invariant_sys_reg(u64 id, u64 __user *uaddr)
{
- struct sys_reg_params params;
const struct sys_reg_desc *r;
- int err;
- u64 val = 0; /* Make sure high bits are 0 for 32-bit regs */
+ u64 val;
- r = find_reg_by_id(id, &params, invariant_sys_regs,
- ARRAY_SIZE(invariant_sys_regs));
+ r = get_reg_by_id(id, invariant_sys_regs,
+ ARRAY_SIZE(invariant_sys_regs));
if (!r)
return -ENOENT;
- err = reg_from_user(&val, uaddr, id);
- if (err)
- return err;
+ if (get_user(val, uaddr))
+ return -EFAULT;
/* This is what we mean by invariant: you can't change it. */
if (r->val != val)
@@ -2648,54 +2742,89 @@ static int demux_c15_set(u64 id, void __user *uaddr)
}
}
-int kvm_arm_sys_reg_get_reg(struct kvm_vcpu *vcpu, const struct kvm_one_reg *reg)
+int kvm_sys_reg_get_user(struct kvm_vcpu *vcpu, const struct kvm_one_reg *reg,
+ const struct sys_reg_desc table[], unsigned int num)
{
+ u64 __user *uaddr = (u64 __user *)(unsigned long)reg->addr;
const struct sys_reg_desc *r;
+ u64 val;
+ int ret;
+
+ r = id_to_sys_reg_desc(vcpu, reg->id, table, num);
+ if (!r)
+ return -ENOENT;
+
+ if (r->get_user) {
+ ret = (r->get_user)(vcpu, r, &val);
+ } else {
+ val = __vcpu_sys_reg(vcpu, r->reg);
+ ret = 0;
+ }
+
+ if (!ret)
+ ret = put_user(val, uaddr);
+
+ return ret;
+}
+
+int kvm_arm_sys_reg_get_reg(struct kvm_vcpu *vcpu, const struct kvm_one_reg *reg)
+{
void __user *uaddr = (void __user *)(unsigned long)reg->addr;
+ int err;
if ((reg->id & KVM_REG_ARM_COPROC_MASK) == KVM_REG_ARM_DEMUX)
return demux_c15_get(reg->id, uaddr);
- if (KVM_REG_SIZE(reg->id) != sizeof(__u64))
- return -ENOENT;
+ err = get_invariant_sys_reg(reg->id, uaddr);
+ if (err != -ENOENT)
+ return err;
- r = index_to_sys_reg_desc(vcpu, reg->id);
- if (!r)
- return get_invariant_sys_reg(reg->id, uaddr);
+ return kvm_sys_reg_get_user(vcpu, reg,
+ sys_reg_descs, ARRAY_SIZE(sys_reg_descs));
+}
- /* Check for regs disabled by runtime config */
- if (sysreg_hidden(vcpu, r))
+int kvm_sys_reg_set_user(struct kvm_vcpu *vcpu, const struct kvm_one_reg *reg,
+ const struct sys_reg_desc table[], unsigned int num)
+{
+ u64 __user *uaddr = (u64 __user *)(unsigned long)reg->addr;
+ const struct sys_reg_desc *r;
+ u64 val;
+ int ret;
+
+ if (get_user(val, uaddr))
+ return -EFAULT;
+
+ r = id_to_sys_reg_desc(vcpu, reg->id, table, num);
+ if (!r)
return -ENOENT;
- if (r->get_user)
- return (r->get_user)(vcpu, r, reg, uaddr);
+ if (sysreg_user_write_ignore(vcpu, r))
+ return 0;
+
+ if (r->set_user) {
+ ret = (r->set_user)(vcpu, r, val);
+ } else {
+ __vcpu_sys_reg(vcpu, r->reg) = val;
+ ret = 0;
+ }
- return reg_to_user(uaddr, &__vcpu_sys_reg(vcpu, r->reg), reg->id);
+ return ret;
}
int kvm_arm_sys_reg_set_reg(struct kvm_vcpu *vcpu, const struct kvm_one_reg *reg)
{
- const struct sys_reg_desc *r;
void __user *uaddr = (void __user *)(unsigned long)reg->addr;
+ int err;
if ((reg->id & KVM_REG_ARM_COPROC_MASK) == KVM_REG_ARM_DEMUX)
return demux_c15_set(reg->id, uaddr);
- if (KVM_REG_SIZE(reg->id) != sizeof(__u64))
- return -ENOENT;
-
- r = index_to_sys_reg_desc(vcpu, reg->id);
- if (!r)
- return set_invariant_sys_reg(reg->id, uaddr);
-
- /* Check for regs disabled by runtime config */
- if (sysreg_hidden(vcpu, r))
- return -ENOENT;
-
- if (r->set_user)
- return (r->set_user)(vcpu, r, reg, uaddr);
+ err = set_invariant_sys_reg(reg->id, uaddr);
+ if (err != -ENOENT)
+ return err;
- return reg_from_user(&__vcpu_sys_reg(vcpu, r->reg), uaddr, reg->id);
+ return kvm_sys_reg_set_user(vcpu, reg,
+ sys_reg_descs, ARRAY_SIZE(sys_reg_descs));
}
static unsigned int num_demux_regs(void)
@@ -2815,18 +2944,22 @@ int kvm_arm_copy_sys_reg_indices(struct kvm_vcpu *vcpu, u64 __user *uindices)
return write_demux_regids(uindices);
}
-void kvm_sys_reg_table_init(void)
+int kvm_sys_reg_table_init(void)
{
+ bool valid = true;
unsigned int i;
struct sys_reg_desc clidr;
/* Make sure tables are unique and in order. */
- BUG_ON(check_sysreg_table(sys_reg_descs, ARRAY_SIZE(sys_reg_descs), false));
- BUG_ON(check_sysreg_table(cp14_regs, ARRAY_SIZE(cp14_regs), true));
- BUG_ON(check_sysreg_table(cp14_64_regs, ARRAY_SIZE(cp14_64_regs), true));
- BUG_ON(check_sysreg_table(cp15_regs, ARRAY_SIZE(cp15_regs), true));
- BUG_ON(check_sysreg_table(cp15_64_regs, ARRAY_SIZE(cp15_64_regs), true));
- BUG_ON(check_sysreg_table(invariant_sys_regs, ARRAY_SIZE(invariant_sys_regs), false));
+ valid &= check_sysreg_table(sys_reg_descs, ARRAY_SIZE(sys_reg_descs), false);
+ valid &= check_sysreg_table(cp14_regs, ARRAY_SIZE(cp14_regs), true);
+ valid &= check_sysreg_table(cp14_64_regs, ARRAY_SIZE(cp14_64_regs), true);
+ valid &= check_sysreg_table(cp15_regs, ARRAY_SIZE(cp15_regs), true);
+ valid &= check_sysreg_table(cp15_64_regs, ARRAY_SIZE(cp15_64_regs), true);
+ valid &= check_sysreg_table(invariant_sys_regs, ARRAY_SIZE(invariant_sys_regs), false);
+
+ if (!valid)
+ return -EINVAL;
/* We abuse the reset function to overwrite the table itself. */
for (i = 0; i < ARRAY_SIZE(invariant_sys_regs); i++)
@@ -2849,4 +2982,6 @@ void kvm_sys_reg_table_init(void)
break;
/* Clear all higher bits. */
cache_levels &= (1 << (i*3))-1;
+
+ return 0;
}
diff --git a/arch/arm64/kvm/sys_regs.h b/arch/arm64/kvm/sys_regs.h
index cc0cc95a0280..e4ebb3a379fd 100644
--- a/arch/arm64/kvm/sys_regs.h
+++ b/arch/arm64/kvm/sys_regs.h
@@ -35,12 +35,19 @@ struct sys_reg_params {
.Op2 = ((esr) >> 17) & 0x7, \
.is_write = !((esr) & 1) })
+#define esr_cp1x_32_to_params(esr) \
+ ((struct sys_reg_params){ .Op1 = ((esr) >> 14) & 0x7, \
+ .CRn = ((esr) >> 10) & 0xf, \
+ .CRm = ((esr) >> 1) & 0xf, \
+ .Op2 = ((esr) >> 17) & 0x7, \
+ .is_write = !((esr) & 1) })
+
struct sys_reg_desc {
/* Sysreg string for debug */
const char *name;
enum {
- AA32_ZEROHIGH,
+ AA32_DIRECT,
AA32_LO,
AA32_HI,
} aarch32_map;
@@ -68,9 +75,9 @@ struct sys_reg_desc {
/* Custom get/set_user functions, fallback to generic if NULL */
int (*get_user)(struct kvm_vcpu *vcpu, const struct sys_reg_desc *rd,
- const struct kvm_one_reg *reg, void __user *uaddr);
+ u64 *val);
int (*set_user)(struct kvm_vcpu *vcpu, const struct sys_reg_desc *rd,
- const struct kvm_one_reg *reg, void __user *uaddr);
+ u64 val);
/* Return mask of REG_* runtime visibility overrides */
unsigned int (*visibility)(const struct kvm_vcpu *vcpu,
@@ -79,6 +86,7 @@ struct sys_reg_desc {
#define REG_HIDDEN (1 << 0) /* hidden from userspace and guest */
#define REG_RAZ (1 << 1) /* RAZ from userspace and guest */
+#define REG_USER_WI (1 << 2) /* WI from userspace only */
static __printf(2, 3)
inline void print_sys_reg_msg(const struct sys_reg_params *p,
@@ -129,22 +137,31 @@ static inline void reset_val(struct kvm_vcpu *vcpu, const struct sys_reg_desc *r
__vcpu_sys_reg(vcpu, r->reg) = r->val;
}
-static inline bool sysreg_hidden(const struct kvm_vcpu *vcpu,
- const struct sys_reg_desc *r)
+static inline unsigned int sysreg_visibility(const struct kvm_vcpu *vcpu,
+ const struct sys_reg_desc *r)
{
if (likely(!r->visibility))
- return false;
+ return 0;
- return r->visibility(vcpu, r) & REG_HIDDEN;
+ return r->visibility(vcpu, r);
+}
+
+static inline bool sysreg_hidden(const struct kvm_vcpu *vcpu,
+ const struct sys_reg_desc *r)
+{
+ return sysreg_visibility(vcpu, r) & REG_HIDDEN;
}
static inline bool sysreg_visible_as_raz(const struct kvm_vcpu *vcpu,
const struct sys_reg_desc *r)
{
- if (likely(!r->visibility))
- return false;
+ return sysreg_visibility(vcpu, r) & REG_RAZ;
+}
- return r->visibility(vcpu, r) & REG_RAZ;
+static inline bool sysreg_user_write_ignore(const struct kvm_vcpu *vcpu,
+ const struct sys_reg_desc *r)
+{
+ return sysreg_visibility(vcpu, r) & REG_USER_WI;
}
static inline int cmp_sys_reg(const struct sys_reg_desc *i1,
@@ -183,10 +200,16 @@ find_reg(const struct sys_reg_params *params, const struct sys_reg_desc table[],
return __inline_bsearch((void *)pval, table, num, sizeof(table[0]), match_sys_reg);
}
-const struct sys_reg_desc *find_reg_by_id(u64 id,
- struct sys_reg_params *params,
- const struct sys_reg_desc table[],
- unsigned int num);
+const struct sys_reg_desc *get_reg_by_id(u64 id,
+ const struct sys_reg_desc table[],
+ unsigned int num);
+
+int kvm_arm_sys_reg_get_reg(struct kvm_vcpu *vcpu, const struct kvm_one_reg *);
+int kvm_arm_sys_reg_set_reg(struct kvm_vcpu *vcpu, const struct kvm_one_reg *);
+int kvm_sys_reg_get_user(struct kvm_vcpu *vcpu, const struct kvm_one_reg *reg,
+ const struct sys_reg_desc table[], unsigned int num);
+int kvm_sys_reg_set_user(struct kvm_vcpu *vcpu, const struct kvm_one_reg *reg,
+ const struct sys_reg_desc table[], unsigned int num);
#define AA32(_x) .aarch32_map = AA32_##_x
#define Op0(_x) .Op0 = _x
diff --git a/arch/arm64/kvm/va_layout.c b/arch/arm64/kvm/va_layout.c
index acdb7b3cc97d..91b22a014610 100644
--- a/arch/arm64/kvm/va_layout.c
+++ b/arch/arm64/kvm/va_layout.c
@@ -169,7 +169,7 @@ void __init kvm_update_va_mask(struct alt_instr *alt,
* dictates it and we don't have any spare bits in the
* address), NOP everything after masking the kernel VA.
*/
- if (has_vhe() || (!tag_val && i > 0)) {
+ if (cpus_have_cap(ARM64_HAS_VIRT_HOST_EXTN) || (!tag_val && i > 0)) {
updptr[i] = cpu_to_le32(aarch64_insn_gen_nop());
continue;
}
@@ -193,7 +193,8 @@ void kvm_patch_vector_branch(struct alt_instr *alt,
BUG_ON(nr_inst != 4);
- if (!cpus_have_const_cap(ARM64_SPECTRE_V3A) || WARN_ON_ONCE(has_vhe()))
+ if (!cpus_have_cap(ARM64_SPECTRE_V3A) ||
+ WARN_ON_ONCE(cpus_have_cap(ARM64_HAS_VIRT_HOST_EXTN)))
return;
/*
diff --git a/arch/arm64/kvm/vgic-sys-reg-v3.c b/arch/arm64/kvm/vgic-sys-reg-v3.c
index 07d5271e9f05..9e7c486b48c2 100644
--- a/arch/arm64/kvm/vgic-sys-reg-v3.c
+++ b/arch/arm64/kvm/vgic-sys-reg-v3.c
@@ -10,293 +10,357 @@
#include "vgic/vgic.h"
#include "sys_regs.h"
-static bool access_gic_ctlr(struct kvm_vcpu *vcpu, struct sys_reg_params *p,
- const struct sys_reg_desc *r)
+static int set_gic_ctlr(struct kvm_vcpu *vcpu, const struct sys_reg_desc *r,
+ u64 val)
{
u32 host_pri_bits, host_id_bits, host_seis, host_a3v, seis, a3v;
struct vgic_cpu *vgic_v3_cpu = &vcpu->arch.vgic_cpu;
struct vgic_vmcr vmcr;
+
+ vgic_get_vmcr(vcpu, &vmcr);
+
+ /*
+ * Disallow restoring VM state if not supported by this
+ * hardware.
+ */
+ host_pri_bits = FIELD_GET(ICC_CTLR_EL1_PRI_BITS_MASK, val) + 1;
+ if (host_pri_bits > vgic_v3_cpu->num_pri_bits)
+ return -EINVAL;
+
+ vgic_v3_cpu->num_pri_bits = host_pri_bits;
+
+ host_id_bits = FIELD_GET(ICC_CTLR_EL1_ID_BITS_MASK, val);
+ if (host_id_bits > vgic_v3_cpu->num_id_bits)
+ return -EINVAL;
+
+ vgic_v3_cpu->num_id_bits = host_id_bits;
+
+ host_seis = FIELD_GET(ICH_VTR_SEIS_MASK, kvm_vgic_global_state.ich_vtr_el2);
+ seis = FIELD_GET(ICC_CTLR_EL1_SEIS_MASK, val);
+ if (host_seis != seis)
+ return -EINVAL;
+
+ host_a3v = FIELD_GET(ICH_VTR_A3V_MASK, kvm_vgic_global_state.ich_vtr_el2);
+ a3v = FIELD_GET(ICC_CTLR_EL1_A3V_MASK, val);
+ if (host_a3v != a3v)
+ return -EINVAL;
+
+ /*
+ * Here set VMCR.CTLR in ICC_CTLR_EL1 layout.
+ * The vgic_set_vmcr() will convert to ICH_VMCR layout.
+ */
+ vmcr.cbpr = FIELD_GET(ICC_CTLR_EL1_CBPR_MASK, val);
+ vmcr.eoim = FIELD_GET(ICC_CTLR_EL1_EOImode_MASK, val);
+ vgic_set_vmcr(vcpu, &vmcr);
+
+ return 0;
+}
+
+static int get_gic_ctlr(struct kvm_vcpu *vcpu, const struct sys_reg_desc *r,
+ u64 *valp)
+{
+ struct vgic_cpu *vgic_v3_cpu = &vcpu->arch.vgic_cpu;
+ struct vgic_vmcr vmcr;
u64 val;
vgic_get_vmcr(vcpu, &vmcr);
- if (p->is_write) {
- val = p->regval;
-
- /*
- * Disallow restoring VM state if not supported by this
- * hardware.
- */
- host_pri_bits = ((val & ICC_CTLR_EL1_PRI_BITS_MASK) >>
- ICC_CTLR_EL1_PRI_BITS_SHIFT) + 1;
- if (host_pri_bits > vgic_v3_cpu->num_pri_bits)
- return false;
-
- vgic_v3_cpu->num_pri_bits = host_pri_bits;
-
- host_id_bits = (val & ICC_CTLR_EL1_ID_BITS_MASK) >>
- ICC_CTLR_EL1_ID_BITS_SHIFT;
- if (host_id_bits > vgic_v3_cpu->num_id_bits)
- return false;
-
- vgic_v3_cpu->num_id_bits = host_id_bits;
-
- host_seis = ((kvm_vgic_global_state.ich_vtr_el2 &
- ICH_VTR_SEIS_MASK) >> ICH_VTR_SEIS_SHIFT);
- seis = (val & ICC_CTLR_EL1_SEIS_MASK) >>
- ICC_CTLR_EL1_SEIS_SHIFT;
- if (host_seis != seis)
- return false;
-
- host_a3v = ((kvm_vgic_global_state.ich_vtr_el2 &
- ICH_VTR_A3V_MASK) >> ICH_VTR_A3V_SHIFT);
- a3v = (val & ICC_CTLR_EL1_A3V_MASK) >> ICC_CTLR_EL1_A3V_SHIFT;
- if (host_a3v != a3v)
- return false;
-
- /*
- * Here set VMCR.CTLR in ICC_CTLR_EL1 layout.
- * The vgic_set_vmcr() will convert to ICH_VMCR layout.
- */
- vmcr.cbpr = (val & ICC_CTLR_EL1_CBPR_MASK) >> ICC_CTLR_EL1_CBPR_SHIFT;
- vmcr.eoim = (val & ICC_CTLR_EL1_EOImode_MASK) >> ICC_CTLR_EL1_EOImode_SHIFT;
- vgic_set_vmcr(vcpu, &vmcr);
- } else {
- val = 0;
- val |= (vgic_v3_cpu->num_pri_bits - 1) <<
- ICC_CTLR_EL1_PRI_BITS_SHIFT;
- val |= vgic_v3_cpu->num_id_bits << ICC_CTLR_EL1_ID_BITS_SHIFT;
- val |= ((kvm_vgic_global_state.ich_vtr_el2 &
- ICH_VTR_SEIS_MASK) >> ICH_VTR_SEIS_SHIFT) <<
- ICC_CTLR_EL1_SEIS_SHIFT;
- val |= ((kvm_vgic_global_state.ich_vtr_el2 &
- ICH_VTR_A3V_MASK) >> ICH_VTR_A3V_SHIFT) <<
- ICC_CTLR_EL1_A3V_SHIFT;
- /*
- * The VMCR.CTLR value is in ICC_CTLR_EL1 layout.
- * Extract it directly using ICC_CTLR_EL1 reg definitions.
- */
- val |= (vmcr.cbpr << ICC_CTLR_EL1_CBPR_SHIFT) & ICC_CTLR_EL1_CBPR_MASK;
- val |= (vmcr.eoim << ICC_CTLR_EL1_EOImode_SHIFT) & ICC_CTLR_EL1_EOImode_MASK;
-
- p->regval = val;
- }
+ val = 0;
+ val |= FIELD_PREP(ICC_CTLR_EL1_PRI_BITS_MASK, vgic_v3_cpu->num_pri_bits - 1);
+ val |= FIELD_PREP(ICC_CTLR_EL1_ID_BITS_MASK, vgic_v3_cpu->num_id_bits);
+ val |= FIELD_PREP(ICC_CTLR_EL1_SEIS_MASK,
+ FIELD_GET(ICH_VTR_SEIS_MASK,
+ kvm_vgic_global_state.ich_vtr_el2));
+ val |= FIELD_PREP(ICC_CTLR_EL1_A3V_MASK,
+ FIELD_GET(ICH_VTR_A3V_MASK, kvm_vgic_global_state.ich_vtr_el2));
+ /*
+ * The VMCR.CTLR value is in ICC_CTLR_EL1 layout.
+ * Extract it directly using ICC_CTLR_EL1 reg definitions.
+ */
+ val |= FIELD_PREP(ICC_CTLR_EL1_CBPR_MASK, vmcr.cbpr);
+ val |= FIELD_PREP(ICC_CTLR_EL1_EOImode_MASK, vmcr.eoim);
+
+ *valp = val;
- return true;
+ return 0;
}
-static bool access_gic_pmr(struct kvm_vcpu *vcpu, struct sys_reg_params *p,
- const struct sys_reg_desc *r)
+static int set_gic_pmr(struct kvm_vcpu *vcpu, const struct sys_reg_desc *r,
+ u64 val)
{
struct vgic_vmcr vmcr;
vgic_get_vmcr(vcpu, &vmcr);
- if (p->is_write) {
- vmcr.pmr = (p->regval & ICC_PMR_EL1_MASK) >> ICC_PMR_EL1_SHIFT;
- vgic_set_vmcr(vcpu, &vmcr);
- } else {
- p->regval = (vmcr.pmr << ICC_PMR_EL1_SHIFT) & ICC_PMR_EL1_MASK;
- }
+ vmcr.pmr = FIELD_GET(ICC_PMR_EL1_MASK, val);
+ vgic_set_vmcr(vcpu, &vmcr);
- return true;
+ return 0;
}
-static bool access_gic_bpr0(struct kvm_vcpu *vcpu, struct sys_reg_params *p,
- const struct sys_reg_desc *r)
+static int get_gic_pmr(struct kvm_vcpu *vcpu, const struct sys_reg_desc *r,
+ u64 *val)
{
struct vgic_vmcr vmcr;
vgic_get_vmcr(vcpu, &vmcr);
- if (p->is_write) {
- vmcr.bpr = (p->regval & ICC_BPR0_EL1_MASK) >>
- ICC_BPR0_EL1_SHIFT;
- vgic_set_vmcr(vcpu, &vmcr);
- } else {
- p->regval = (vmcr.bpr << ICC_BPR0_EL1_SHIFT) &
- ICC_BPR0_EL1_MASK;
- }
+ *val = FIELD_PREP(ICC_PMR_EL1_MASK, vmcr.pmr);
- return true;
+ return 0;
}
-static bool access_gic_bpr1(struct kvm_vcpu *vcpu, struct sys_reg_params *p,
- const struct sys_reg_desc *r)
+static int set_gic_bpr0(struct kvm_vcpu *vcpu, const struct sys_reg_desc *r,
+ u64 val)
{
struct vgic_vmcr vmcr;
- if (!p->is_write)
- p->regval = 0;
+ vgic_get_vmcr(vcpu, &vmcr);
+ vmcr.bpr = FIELD_GET(ICC_BPR0_EL1_MASK, val);
+ vgic_set_vmcr(vcpu, &vmcr);
+
+ return 0;
+}
+
+static int get_gic_bpr0(struct kvm_vcpu *vcpu, const struct sys_reg_desc *r,
+ u64 *val)
+{
+ struct vgic_vmcr vmcr;
vgic_get_vmcr(vcpu, &vmcr);
- if (!vmcr.cbpr) {
- if (p->is_write) {
- vmcr.abpr = (p->regval & ICC_BPR1_EL1_MASK) >>
- ICC_BPR1_EL1_SHIFT;
- vgic_set_vmcr(vcpu, &vmcr);
- } else {
- p->regval = (vmcr.abpr << ICC_BPR1_EL1_SHIFT) &
- ICC_BPR1_EL1_MASK;
- }
- } else {
- if (!p->is_write)
- p->regval = min((vmcr.bpr + 1), 7U);
- }
+ *val = FIELD_PREP(ICC_BPR0_EL1_MASK, vmcr.bpr);
- return true;
+ return 0;
}
-static bool access_gic_grpen0(struct kvm_vcpu *vcpu, struct sys_reg_params *p,
- const struct sys_reg_desc *r)
+static int set_gic_bpr1(struct kvm_vcpu *vcpu, const struct sys_reg_desc *r,
+ u64 val)
{
struct vgic_vmcr vmcr;
vgic_get_vmcr(vcpu, &vmcr);
- if (p->is_write) {
- vmcr.grpen0 = (p->regval & ICC_IGRPEN0_EL1_MASK) >>
- ICC_IGRPEN0_EL1_SHIFT;
+ if (!vmcr.cbpr) {
+ vmcr.abpr = FIELD_GET(ICC_BPR1_EL1_MASK, val);
vgic_set_vmcr(vcpu, &vmcr);
- } else {
- p->regval = (vmcr.grpen0 << ICC_IGRPEN0_EL1_SHIFT) &
- ICC_IGRPEN0_EL1_MASK;
}
- return true;
+ return 0;
}
-static bool access_gic_grpen1(struct kvm_vcpu *vcpu, struct sys_reg_params *p,
- const struct sys_reg_desc *r)
+static int get_gic_bpr1(struct kvm_vcpu *vcpu, const struct sys_reg_desc *r,
+ u64 *val)
{
struct vgic_vmcr vmcr;
vgic_get_vmcr(vcpu, &vmcr);
- if (p->is_write) {
- vmcr.grpen1 = (p->regval & ICC_IGRPEN1_EL1_MASK) >>
- ICC_IGRPEN1_EL1_SHIFT;
- vgic_set_vmcr(vcpu, &vmcr);
- } else {
- p->regval = (vmcr.grpen1 << ICC_IGRPEN1_EL1_SHIFT) &
- ICC_IGRPEN1_EL1_MASK;
- }
+ if (!vmcr.cbpr)
+ *val = FIELD_PREP(ICC_BPR1_EL1_MASK, vmcr.abpr);
+ else
+ *val = min((vmcr.bpr + 1), 7U);
+
+
+ return 0;
+}
+
+static int set_gic_grpen0(struct kvm_vcpu *vcpu, const struct sys_reg_desc *r,
+ u64 val)
+{
+ struct vgic_vmcr vmcr;
+
+ vgic_get_vmcr(vcpu, &vmcr);
+ vmcr.grpen0 = FIELD_GET(ICC_IGRPEN0_EL1_MASK, val);
+ vgic_set_vmcr(vcpu, &vmcr);
+
+ return 0;
+}
+
+static int get_gic_grpen0(struct kvm_vcpu *vcpu, const struct sys_reg_desc *r,
+ u64 *val)
+{
+ struct vgic_vmcr vmcr;
+
+ vgic_get_vmcr(vcpu, &vmcr);
+ *val = FIELD_PREP(ICC_IGRPEN0_EL1_MASK, vmcr.grpen0);
- return true;
+ return 0;
}
-static void vgic_v3_access_apr_reg(struct kvm_vcpu *vcpu,
- struct sys_reg_params *p, u8 apr, u8 idx)
+static int set_gic_grpen1(struct kvm_vcpu *vcpu, const struct sys_reg_desc *r,
+ u64 val)
+{
+ struct vgic_vmcr vmcr;
+
+ vgic_get_vmcr(vcpu, &vmcr);
+ vmcr.grpen1 = FIELD_GET(ICC_IGRPEN1_EL1_MASK, val);
+ vgic_set_vmcr(vcpu, &vmcr);
+
+ return 0;
+}
+
+static int get_gic_grpen1(struct kvm_vcpu *vcpu, const struct sys_reg_desc *r,
+ u64 *val)
+{
+ struct vgic_vmcr vmcr;
+
+ vgic_get_vmcr(vcpu, &vmcr);
+ *val = FIELD_GET(ICC_IGRPEN1_EL1_MASK, vmcr.grpen1);
+
+ return 0;
+}
+
+static void set_apr_reg(struct kvm_vcpu *vcpu, u64 val, u8 apr, u8 idx)
{
struct vgic_v3_cpu_if *vgicv3 = &vcpu->arch.vgic_cpu.vgic_v3;
- uint32_t *ap_reg;
if (apr)
- ap_reg = &vgicv3->vgic_ap1r[idx];
+ vgicv3->vgic_ap1r[idx] = val;
else
- ap_reg = &vgicv3->vgic_ap0r[idx];
+ vgicv3->vgic_ap0r[idx] = val;
+}
+
+static u64 get_apr_reg(struct kvm_vcpu *vcpu, u8 apr, u8 idx)
+{
+ struct vgic_v3_cpu_if *vgicv3 = &vcpu->arch.vgic_cpu.vgic_v3;
- if (p->is_write)
- *ap_reg = p->regval;
+ if (apr)
+ return vgicv3->vgic_ap1r[idx];
else
- p->regval = *ap_reg;
+ return vgicv3->vgic_ap0r[idx];
+}
+
+static int set_gic_ap0r(struct kvm_vcpu *vcpu, const struct sys_reg_desc *r,
+ u64 val)
+
+{
+ u8 idx = r->Op2 & 3;
+
+ if (idx > vgic_v3_max_apr_idx(vcpu))
+ return -EINVAL;
+
+ set_apr_reg(vcpu, val, 0, idx);
+ return 0;
}
-static bool access_gic_aprn(struct kvm_vcpu *vcpu, struct sys_reg_params *p,
- const struct sys_reg_desc *r, u8 apr)
+static int get_gic_ap0r(struct kvm_vcpu *vcpu, const struct sys_reg_desc *r,
+ u64 *val)
{
u8 idx = r->Op2 & 3;
if (idx > vgic_v3_max_apr_idx(vcpu))
- goto err;
+ return -EINVAL;
- vgic_v3_access_apr_reg(vcpu, p, apr, idx);
- return true;
-err:
- if (!p->is_write)
- p->regval = 0;
+ *val = get_apr_reg(vcpu, 0, idx);
- return false;
+ return 0;
}
-static bool access_gic_ap0r(struct kvm_vcpu *vcpu, struct sys_reg_params *p,
- const struct sys_reg_desc *r)
+static int set_gic_ap1r(struct kvm_vcpu *vcpu, const struct sys_reg_desc *r,
+ u64 val)
+
+{
+ u8 idx = r->Op2 & 3;
+
+ if (idx > vgic_v3_max_apr_idx(vcpu))
+ return -EINVAL;
+
+ set_apr_reg(vcpu, val, 1, idx);
+ return 0;
+}
+static int get_gic_ap1r(struct kvm_vcpu *vcpu, const struct sys_reg_desc *r,
+ u64 *val)
{
- return access_gic_aprn(vcpu, p, r, 0);
+ u8 idx = r->Op2 & 3;
+
+ if (idx > vgic_v3_max_apr_idx(vcpu))
+ return -EINVAL;
+
+ *val = get_apr_reg(vcpu, 1, idx);
+
+ return 0;
}
-static bool access_gic_ap1r(struct kvm_vcpu *vcpu, struct sys_reg_params *p,
- const struct sys_reg_desc *r)
+static int set_gic_sre(struct kvm_vcpu *vcpu, const struct sys_reg_desc *r,
+ u64 val)
{
- return access_gic_aprn(vcpu, p, r, 1);
+ /* Validate SRE bit */
+ if (!(val & ICC_SRE_EL1_SRE))
+ return -EINVAL;
+
+ return 0;
}
-static bool access_gic_sre(struct kvm_vcpu *vcpu, struct sys_reg_params *p,
- const struct sys_reg_desc *r)
+static int get_gic_sre(struct kvm_vcpu *vcpu, const struct sys_reg_desc *r,
+ u64 *val)
{
struct vgic_v3_cpu_if *vgicv3 = &vcpu->arch.vgic_cpu.vgic_v3;
- /* Validate SRE bit */
- if (p->is_write) {
- if (!(p->regval & ICC_SRE_EL1_SRE))
- return false;
- } else {
- p->regval = vgicv3->vgic_sre;
- }
+ *val = vgicv3->vgic_sre;
- return true;
+ return 0;
}
+
static const struct sys_reg_desc gic_v3_icc_reg_descs[] = {
- { SYS_DESC(SYS_ICC_PMR_EL1), access_gic_pmr },
- { SYS_DESC(SYS_ICC_BPR0_EL1), access_gic_bpr0 },
- { SYS_DESC(SYS_ICC_AP0R0_EL1), access_gic_ap0r },
- { SYS_DESC(SYS_ICC_AP0R1_EL1), access_gic_ap0r },
- { SYS_DESC(SYS_ICC_AP0R2_EL1), access_gic_ap0r },
- { SYS_DESC(SYS_ICC_AP0R3_EL1), access_gic_ap0r },
- { SYS_DESC(SYS_ICC_AP1R0_EL1), access_gic_ap1r },
- { SYS_DESC(SYS_ICC_AP1R1_EL1), access_gic_ap1r },
- { SYS_DESC(SYS_ICC_AP1R2_EL1), access_gic_ap1r },
- { SYS_DESC(SYS_ICC_AP1R3_EL1), access_gic_ap1r },
- { SYS_DESC(SYS_ICC_BPR1_EL1), access_gic_bpr1 },
- { SYS_DESC(SYS_ICC_CTLR_EL1), access_gic_ctlr },
- { SYS_DESC(SYS_ICC_SRE_EL1), access_gic_sre },
- { SYS_DESC(SYS_ICC_IGRPEN0_EL1), access_gic_grpen0 },
- { SYS_DESC(SYS_ICC_IGRPEN1_EL1), access_gic_grpen1 },
+ { SYS_DESC(SYS_ICC_PMR_EL1),
+ .set_user = set_gic_pmr, .get_user = get_gic_pmr, },
+ { SYS_DESC(SYS_ICC_BPR0_EL1),
+ .set_user = set_gic_bpr0, .get_user = get_gic_bpr0, },
+ { SYS_DESC(SYS_ICC_AP0R0_EL1),
+ .set_user = set_gic_ap0r, .get_user = get_gic_ap0r, },
+ { SYS_DESC(SYS_ICC_AP0R1_EL1),
+ .set_user = set_gic_ap0r, .get_user = get_gic_ap0r, },
+ { SYS_DESC(SYS_ICC_AP0R2_EL1),
+ .set_user = set_gic_ap0r, .get_user = get_gic_ap0r, },
+ { SYS_DESC(SYS_ICC_AP0R3_EL1),
+ .set_user = set_gic_ap0r, .get_user = get_gic_ap0r, },
+ { SYS_DESC(SYS_ICC_AP1R0_EL1),
+ .set_user = set_gic_ap1r, .get_user = get_gic_ap1r, },
+ { SYS_DESC(SYS_ICC_AP1R1_EL1),
+ .set_user = set_gic_ap1r, .get_user = get_gic_ap1r, },
+ { SYS_DESC(SYS_ICC_AP1R2_EL1),
+ .set_user = set_gic_ap1r, .get_user = get_gic_ap1r, },
+ { SYS_DESC(SYS_ICC_AP1R3_EL1),
+ .set_user = set_gic_ap1r, .get_user = get_gic_ap1r, },
+ { SYS_DESC(SYS_ICC_BPR1_EL1),
+ .set_user = set_gic_bpr1, .get_user = get_gic_bpr1, },
+ { SYS_DESC(SYS_ICC_CTLR_EL1),
+ .set_user = set_gic_ctlr, .get_user = get_gic_ctlr, },
+ { SYS_DESC(SYS_ICC_SRE_EL1),
+ .set_user = set_gic_sre, .get_user = get_gic_sre, },
+ { SYS_DESC(SYS_ICC_IGRPEN0_EL1),
+ .set_user = set_gic_grpen0, .get_user = get_gic_grpen0, },
+ { SYS_DESC(SYS_ICC_IGRPEN1_EL1),
+ .set_user = set_gic_grpen1, .get_user = get_gic_grpen1, },
};
-int vgic_v3_has_cpu_sysregs_attr(struct kvm_vcpu *vcpu, bool is_write, u64 id,
- u64 *reg)
+static u64 attr_to_id(u64 attr)
{
- struct sys_reg_params params;
- u64 sysreg = (id & KVM_DEV_ARM_VGIC_SYSREG_MASK) | KVM_REG_SIZE_U64;
-
- params.regval = *reg;
- params.is_write = is_write;
+ return ARM64_SYS_REG(FIELD_GET(KVM_REG_ARM_VGIC_SYSREG_OP0_MASK, attr),
+ FIELD_GET(KVM_REG_ARM_VGIC_SYSREG_OP1_MASK, attr),
+ FIELD_GET(KVM_REG_ARM_VGIC_SYSREG_CRN_MASK, attr),
+ FIELD_GET(KVM_REG_ARM_VGIC_SYSREG_CRM_MASK, attr),
+ FIELD_GET(KVM_REG_ARM_VGIC_SYSREG_OP2_MASK, attr));
+}
- if (find_reg_by_id(sysreg, &params, gic_v3_icc_reg_descs,
- ARRAY_SIZE(gic_v3_icc_reg_descs)))
+int vgic_v3_has_cpu_sysregs_attr(struct kvm_vcpu *vcpu, struct kvm_device_attr *attr)
+{
+ if (get_reg_by_id(attr_to_id(attr->attr), gic_v3_icc_reg_descs,
+ ARRAY_SIZE(gic_v3_icc_reg_descs)))
return 0;
return -ENXIO;
}
-int vgic_v3_cpu_sysregs_uaccess(struct kvm_vcpu *vcpu, bool is_write, u64 id,
- u64 *reg)
+int vgic_v3_cpu_sysregs_uaccess(struct kvm_vcpu *vcpu,
+ struct kvm_device_attr *attr,
+ bool is_write)
{
- struct sys_reg_params params;
- const struct sys_reg_desc *r;
- u64 sysreg = (id & KVM_DEV_ARM_VGIC_SYSREG_MASK) | KVM_REG_SIZE_U64;
+ struct kvm_one_reg reg = {
+ .id = attr_to_id(attr->attr),
+ .addr = attr->addr,
+ };
if (is_write)
- params.regval = *reg;
- params.is_write = is_write;
-
- r = find_reg_by_id(sysreg, &params, gic_v3_icc_reg_descs,
- ARRAY_SIZE(gic_v3_icc_reg_descs));
- if (!r)
- return -ENXIO;
-
- if (!r->access(vcpu, &params, r))
- return -EINVAL;
-
- if (!is_write)
- *reg = params.regval;
-
- return 0;
+ return kvm_sys_reg_set_user(vcpu, &reg, gic_v3_icc_reg_descs,
+ ARRAY_SIZE(gic_v3_icc_reg_descs));
+ else
+ return kvm_sys_reg_get_user(vcpu, &reg, gic_v3_icc_reg_descs,
+ ARRAY_SIZE(gic_v3_icc_reg_descs));
}
diff --git a/arch/arm64/kvm/vgic/vgic-debug.c b/arch/arm64/kvm/vgic/vgic-debug.c
index f38c40a76251..78cde687383c 100644
--- a/arch/arm64/kvm/vgic/vgic-debug.c
+++ b/arch/arm64/kvm/vgic/vgic-debug.c
@@ -82,7 +82,7 @@ static bool end_of_vgic(struct vgic_state_iter *iter)
static void *vgic_debug_start(struct seq_file *s, loff_t *pos)
{
- struct kvm *kvm = (struct kvm *)s->private;
+ struct kvm *kvm = s->private;
struct vgic_state_iter *iter;
mutex_lock(&kvm->lock);
@@ -110,7 +110,7 @@ out:
static void *vgic_debug_next(struct seq_file *s, void *v, loff_t *pos)
{
- struct kvm *kvm = (struct kvm *)s->private;
+ struct kvm *kvm = s->private;
struct vgic_state_iter *iter = kvm->arch.vgic.iter;
++*pos;
@@ -122,7 +122,7 @@ static void *vgic_debug_next(struct seq_file *s, void *v, loff_t *pos)
static void vgic_debug_stop(struct seq_file *s, void *v)
{
- struct kvm *kvm = (struct kvm *)s->private;
+ struct kvm *kvm = s->private;
struct vgic_state_iter *iter;
/*
@@ -229,8 +229,8 @@ static void print_irq_state(struct seq_file *s, struct vgic_irq *irq,
static int vgic_debug_show(struct seq_file *s, void *v)
{
- struct kvm *kvm = (struct kvm *)s->private;
- struct vgic_state_iter *iter = (struct vgic_state_iter *)v;
+ struct kvm *kvm = s->private;
+ struct vgic_state_iter *iter = v;
struct vgic_irq *irq;
struct kvm_vcpu *vcpu = NULL;
unsigned long flags;
diff --git a/arch/arm64/kvm/vgic/vgic-init.c b/arch/arm64/kvm/vgic/vgic-init.c
index 0a06d0648970..f6d4f4052555 100644
--- a/arch/arm64/kvm/vgic/vgic-init.c
+++ b/arch/arm64/kvm/vgic/vgic-init.c
@@ -70,8 +70,9 @@ void kvm_vgic_early_init(struct kvm *kvm)
*/
int kvm_vgic_create(struct kvm *kvm, u32 type)
{
- int i, ret;
struct kvm_vcpu *vcpu;
+ unsigned long i;
+ int ret;
if (irqchip_in_kernel(kvm))
return -EEXIST;
@@ -91,17 +92,17 @@ int kvm_vgic_create(struct kvm *kvm, u32 type)
return ret;
kvm_for_each_vcpu(i, vcpu, kvm) {
- if (vcpu->arch.has_run_once)
+ if (vcpu_has_run_once(vcpu))
goto out_unlock;
}
ret = 0;
if (type == KVM_DEV_TYPE_ARM_VGIC_V2)
- kvm->arch.max_vcpus = VGIC_V2_MAX_CPUS;
+ kvm->max_vcpus = VGIC_V2_MAX_CPUS;
else
- kvm->arch.max_vcpus = VGIC_V3_MAX_CPUS;
+ kvm->max_vcpus = VGIC_V3_MAX_CPUS;
- if (atomic_read(&kvm->online_vcpus) > kvm->arch.max_vcpus) {
+ if (atomic_read(&kvm->online_vcpus) > kvm->max_vcpus) {
ret = -E2BIG;
goto out_unlock;
}
@@ -255,7 +256,8 @@ int vgic_init(struct kvm *kvm)
{
struct vgic_dist *dist = &kvm->arch.vgic;
struct kvm_vcpu *vcpu;
- int ret = 0, i, idx;
+ int ret = 0, i;
+ unsigned long idx;
if (vgic_initialized(kvm))
return 0;
@@ -308,7 +310,7 @@ int vgic_init(struct kvm *kvm)
goto out;
}
- kvm_for_each_vcpu(i, vcpu, kvm)
+ kvm_for_each_vcpu(idx, vcpu, kvm)
kvm_vgic_vcpu_enable(vcpu);
ret = kvm_vgic_setup_default_irq_routing(kvm);
@@ -317,7 +319,12 @@ int vgic_init(struct kvm *kvm)
vgic_debug_init(kvm);
- dist->implementation_rev = 2;
+ /*
+ * If userspace didn't set the GIC implementation revision,
+ * default to the latest and greatest. You know want it.
+ */
+ if (!dist->implementation_rev)
+ dist->implementation_rev = KVM_VGIC_IMP_REV_LATEST;
dist->initialized = true;
out:
@@ -370,7 +377,7 @@ void kvm_vgic_vcpu_destroy(struct kvm_vcpu *vcpu)
static void __kvm_vgic_destroy(struct kvm *kvm)
{
struct kvm_vcpu *vcpu;
- int i;
+ unsigned long i;
vgic_debug_destroy(kvm);
diff --git a/arch/arm64/kvm/vgic/vgic-its.c b/arch/arm64/kvm/vgic/vgic-its.c
index 089fc2ffcb43..733b53055f97 100644
--- a/arch/arm64/kvm/vgic/vgic-its.c
+++ b/arch/arm64/kvm/vgic/vgic-its.c
@@ -406,7 +406,7 @@ static void update_affinity_collection(struct kvm *kvm, struct vgic_its *its,
struct its_ite *ite;
for_each_lpi_its(device, ite, its) {
- if (!ite->collection || coll != ite->collection)
+ if (ite->collection != coll)
continue;
update_affinity_ite(kvm, ite);
@@ -683,7 +683,7 @@ int vgic_its_resolve_lpi(struct kvm *kvm, struct vgic_its *its,
if (!vcpu)
return E_ITS_INT_UNMAPPED_INTERRUPT;
- if (!vcpu->arch.vgic_cpu.lpis_enabled)
+ if (!vgic_lpis_enabled(vcpu))
return -EBUSY;
vgic_its_cache_translation(kvm, its, devid, eventid, ite->irq);
@@ -894,6 +894,18 @@ static int vgic_its_cmd_handle_movi(struct kvm *kvm, struct vgic_its *its,
return update_affinity(ite->irq, vcpu);
}
+static bool __is_visible_gfn_locked(struct vgic_its *its, gpa_t gpa)
+{
+ gfn_t gfn = gpa >> PAGE_SHIFT;
+ int idx;
+ bool ret;
+
+ idx = srcu_read_lock(&its->dev->kvm->srcu);
+ ret = kvm_is_visible_gfn(its->dev->kvm, gfn);
+ srcu_read_unlock(&its->dev->kvm->srcu, idx);
+ return ret;
+}
+
/*
* Check whether an ID can be stored into the corresponding guest table.
* For a direct table this is pretty easy, but gets a bit nasty for
@@ -908,9 +920,7 @@ static bool vgic_its_check_id(struct vgic_its *its, u64 baser, u32 id,
u64 indirect_ptr, type = GITS_BASER_TYPE(baser);
phys_addr_t base = GITS_BASER_ADDR_48_to_52(baser);
int esz = GITS_BASER_ENTRY_SIZE(baser);
- int index, idx;
- gfn_t gfn;
- bool ret;
+ int index;
switch (type) {
case GITS_BASER_TYPE_DEVICE:
@@ -933,12 +943,11 @@ static bool vgic_its_check_id(struct vgic_its *its, u64 baser, u32 id,
return false;
addr = base + id * esz;
- gfn = addr >> PAGE_SHIFT;
if (eaddr)
*eaddr = addr;
- goto out;
+ return __is_visible_gfn_locked(its, addr);
}
/* calculate and check the index into the 1st level */
@@ -964,27 +973,42 @@ static bool vgic_its_check_id(struct vgic_its *its, u64 baser, u32 id,
/* Find the address of the actual entry */
index = id % (SZ_64K / esz);
indirect_ptr += index * esz;
- gfn = indirect_ptr >> PAGE_SHIFT;
if (eaddr)
*eaddr = indirect_ptr;
-out:
- idx = srcu_read_lock(&its->dev->kvm->srcu);
- ret = kvm_is_visible_gfn(its->dev->kvm, gfn);
- srcu_read_unlock(&its->dev->kvm->srcu, idx);
- return ret;
+ return __is_visible_gfn_locked(its, indirect_ptr);
+}
+
+/*
+ * Check whether an event ID can be stored in the corresponding Interrupt
+ * Translation Table, which starts at device->itt_addr.
+ */
+static bool vgic_its_check_event_id(struct vgic_its *its, struct its_device *device,
+ u32 event_id)
+{
+ const struct vgic_its_abi *abi = vgic_its_get_abi(its);
+ int ite_esz = abi->ite_esz;
+ gpa_t gpa;
+
+ /* max table size is: BIT_ULL(device->num_eventid_bits) * ite_esz */
+ if (event_id >= BIT_ULL(device->num_eventid_bits))
+ return false;
+
+ gpa = device->itt_addr + event_id * ite_esz;
+ return __is_visible_gfn_locked(its, gpa);
}
+/*
+ * Add a new collection into the ITS collection table.
+ * Returns 0 on success, and a negative error value for generic errors.
+ */
static int vgic_its_alloc_collection(struct vgic_its *its,
struct its_collection **colp,
u32 coll_id)
{
struct its_collection *collection;
- if (!vgic_its_check_id(its, its->baser_coll_table, coll_id, NULL))
- return E_ITS_MAPC_COLLECTION_OOR;
-
collection = kzalloc(sizeof(*collection), GFP_KERNEL_ACCOUNT);
if (!collection)
return -ENOMEM;
@@ -1061,7 +1085,7 @@ static int vgic_its_cmd_handle_mapi(struct kvm *kvm, struct vgic_its *its,
if (!device)
return E_ITS_MAPTI_UNMAPPED_DEVICE;
- if (event_id >= BIT_ULL(device->num_eventid_bits))
+ if (!vgic_its_check_event_id(its, device, event_id))
return E_ITS_MAPTI_ID_OOR;
if (its_cmd_get_command(its_cmd) == GITS_CMD_MAPTI)
@@ -1078,7 +1102,12 @@ static int vgic_its_cmd_handle_mapi(struct kvm *kvm, struct vgic_its *its,
collection = find_collection(its, coll_id);
if (!collection) {
- int ret = vgic_its_alloc_collection(its, &collection, coll_id);
+ int ret;
+
+ if (!vgic_its_check_id(its, its->baser_coll_table, coll_id, NULL))
+ return E_ITS_MAPC_COLLECTION_OOR;
+
+ ret = vgic_its_alloc_collection(its, &collection, coll_id);
if (ret)
return ret;
new_coll = collection;
@@ -1233,6 +1262,10 @@ static int vgic_its_cmd_handle_mapc(struct kvm *kvm, struct vgic_its *its,
if (!collection) {
int ret;
+ if (!vgic_its_check_id(its, its->baser_coll_table,
+ coll_id, NULL))
+ return E_ITS_MAPC_COLLECTION_OOR;
+
ret = vgic_its_alloc_collection(its, &collection,
coll_id);
if (ret)
@@ -1272,6 +1305,11 @@ static int vgic_its_cmd_handle_clear(struct kvm *kvm, struct vgic_its *its,
return 0;
}
+int vgic_its_inv_lpi(struct kvm *kvm, struct vgic_irq *irq)
+{
+ return update_lpi_config(kvm, irq, NULL, true);
+}
+
/*
* The INV command syncs the configuration bits from the memory table.
* Must be called with the its_lock mutex held.
@@ -1288,7 +1326,41 @@ static int vgic_its_cmd_handle_inv(struct kvm *kvm, struct vgic_its *its,
if (!ite)
return E_ITS_INV_UNMAPPED_INTERRUPT;
- return update_lpi_config(kvm, ite->irq, NULL, true);
+ return vgic_its_inv_lpi(kvm, ite->irq);
+}
+
+/**
+ * vgic_its_invall - invalidate all LPIs targetting a given vcpu
+ * @vcpu: the vcpu for which the RD is targetted by an invalidation
+ *
+ * Contrary to the INVALL command, this targets a RD instead of a
+ * collection, and we don't need to hold the its_lock, since no ITS is
+ * involved here.
+ */
+int vgic_its_invall(struct kvm_vcpu *vcpu)
+{
+ struct kvm *kvm = vcpu->kvm;
+ int irq_count, i = 0;
+ u32 *intids;
+
+ irq_count = vgic_copy_lpi_list(kvm, vcpu, &intids);
+ if (irq_count < 0)
+ return irq_count;
+
+ for (i = 0; i < irq_count; i++) {
+ struct vgic_irq *irq = vgic_get_irq(kvm, NULL, intids[i]);
+ if (!irq)
+ continue;
+ update_lpi_config(kvm, irq, vcpu, false);
+ vgic_put_irq(kvm, irq);
+ }
+
+ kfree(intids);
+
+ if (vcpu->arch.vgic_cpu.vgic_v3.its_vpe.its_vm)
+ its_invall_vpe(&vcpu->arch.vgic_cpu.vgic_v3.its_vpe);
+
+ return 0;
}
/*
@@ -1305,32 +1377,13 @@ static int vgic_its_cmd_handle_invall(struct kvm *kvm, struct vgic_its *its,
u32 coll_id = its_cmd_get_collection(its_cmd);
struct its_collection *collection;
struct kvm_vcpu *vcpu;
- struct vgic_irq *irq;
- u32 *intids;
- int irq_count, i;
collection = find_collection(its, coll_id);
if (!its_is_collection_mapped(collection))
return E_ITS_INVALL_UNMAPPED_COLLECTION;
vcpu = kvm_get_vcpu(kvm, collection->target_addr);
-
- irq_count = vgic_copy_lpi_list(kvm, vcpu, &intids);
- if (irq_count < 0)
- return irq_count;
-
- for (i = 0; i < irq_count; i++) {
- irq = vgic_get_irq(kvm, NULL, intids[i]);
- if (!irq)
- continue;
- update_lpi_config(kvm, irq, vcpu, false);
- vgic_put_irq(kvm, irq);
- }
-
- kfree(intids);
-
- if (vcpu->arch.vgic_cpu.vgic_v3.its_vpe.its_vm)
- its_invall_vpe(&vcpu->arch.vgic_cpu.vgic_v3.its_vpe);
+ vgic_its_invall(vcpu);
return 0;
}
@@ -2096,7 +2149,7 @@ static int scan_its_table(struct vgic_its *its, gpa_t base, int size, u32 esz,
memset(entry, 0, esz);
- while (len > 0) {
+ while (true) {
int next_offset;
size_t byte_offset;
@@ -2109,6 +2162,9 @@ static int scan_its_table(struct vgic_its *its, gpa_t base, int size, u32 esz,
return next_offset;
byte_offset = next_offset * esz;
+ if (byte_offset >= len)
+ break;
+
id += next_offset;
gpa += byte_offset;
len -= byte_offset;
@@ -2143,7 +2199,7 @@ static int vgic_its_save_ite(struct vgic_its *its, struct its_device *dev,
static int vgic_its_restore_ite(struct vgic_its *its, u32 event_id,
void *ptr, void *opaque)
{
- struct its_device *dev = (struct its_device *)opaque;
+ struct its_device *dev = opaque;
struct its_collection *collection;
struct kvm *kvm = its->dev->kvm;
struct kvm_vcpu *vcpu = NULL;
@@ -2175,6 +2231,9 @@ static int vgic_its_restore_ite(struct vgic_its *its, u32 event_id,
if (!collection)
return -EINVAL;
+ if (!vgic_its_check_event_id(its, dev, event_id))
+ return -EINVAL;
+
ite = vgic_its_alloc_ite(dev, collection, event_id);
if (IS_ERR(ite))
return PTR_ERR(ite);
@@ -2183,8 +2242,10 @@ static int vgic_its_restore_ite(struct vgic_its *its, u32 event_id,
vcpu = kvm_get_vcpu(kvm, collection->target_addr);
irq = vgic_add_lpi(kvm, lpi_id, vcpu);
- if (IS_ERR(irq))
+ if (IS_ERR(irq)) {
+ its_free_ite(kvm, ite);
return PTR_ERR(irq);
+ }
ite->irq = irq;
return offset;
@@ -2296,6 +2357,7 @@ static int vgic_its_restore_dte(struct vgic_its *its, u32 id,
void *ptr, void *opaque)
{
struct its_device *dev;
+ u64 baser = its->baser_device_table;
gpa_t itt_addr;
u8 num_eventid_bits;
u64 entry = *(u64 *)ptr;
@@ -2316,6 +2378,9 @@ static int vgic_its_restore_dte(struct vgic_its *its, u32 id,
/* dte entry is valid */
offset = (entry & KVM_ITS_DTE_NEXT_MASK) >> KVM_ITS_DTE_NEXT_SHIFT;
+ if (!vgic_its_check_id(its, baser, id, NULL))
+ return -EINVAL;
+
dev = vgic_its_alloc_device(its, id, itt_addr, num_eventid_bits);
if (IS_ERR(dev))
return PTR_ERR(dev);
@@ -2445,6 +2510,9 @@ static int vgic_its_restore_device_tables(struct vgic_its *its)
if (ret > 0)
ret = 0;
+ if (ret < 0)
+ vgic_its_free_device_list(its->dev->kvm, its);
+
return ret;
}
@@ -2461,6 +2529,11 @@ static int vgic_its_save_cte(struct vgic_its *its,
return kvm_write_guest_lock(its->dev->kvm, gpa, &val, esz);
}
+/*
+ * Restore a collection entry into the ITS collection table.
+ * Return +1 on success, 0 if the entry was invalid (which should be
+ * interpreted as end-of-table), and a negative error value for generic errors.
+ */
static int vgic_its_restore_cte(struct vgic_its *its, gpa_t gpa, int esz)
{
struct its_collection *collection;
@@ -2487,6 +2560,10 @@ static int vgic_its_restore_cte(struct vgic_its *its, gpa_t gpa, int esz)
collection = find_collection(its, coll_id);
if (collection)
return -EEXIST;
+
+ if (!vgic_its_check_id(its, its->baser_coll_table, coll_id, NULL))
+ return -EINVAL;
+
ret = vgic_its_alloc_collection(its, &collection, coll_id);
if (ret)
return ret;
@@ -2566,6 +2643,9 @@ static int vgic_its_restore_collection_table(struct vgic_its *its)
if (ret > 0)
return 0;
+ if (ret < 0)
+ vgic_its_free_collection_list(its->dev->kvm, its);
+
return ret;
}
@@ -2597,7 +2677,10 @@ static int vgic_its_restore_tables_v0(struct vgic_its *its)
if (ret)
return ret;
- return vgic_its_restore_device_tables(its);
+ ret = vgic_its_restore_device_tables(its);
+ if (ret)
+ vgic_its_free_collection_list(its->dev->kvm, its);
+ return ret;
}
static int vgic_its_commit_v0(struct vgic_its *its)
diff --git a/arch/arm64/kvm/vgic/vgic-kvm-device.c b/arch/arm64/kvm/vgic/vgic-kvm-device.c
index 0d000d2fe8d2..edeac2380591 100644
--- a/arch/arm64/kvm/vgic/vgic-kvm-device.c
+++ b/arch/arm64/kvm/vgic/vgic-kvm-device.c
@@ -41,11 +41,42 @@ static int vgic_check_type(struct kvm *kvm, int type_needed)
return 0;
}
+int kvm_set_legacy_vgic_v2_addr(struct kvm *kvm, struct kvm_arm_device_addr *dev_addr)
+{
+ struct vgic_dist *vgic = &kvm->arch.vgic;
+ int r;
+
+ mutex_lock(&kvm->lock);
+ switch (FIELD_GET(KVM_ARM_DEVICE_TYPE_MASK, dev_addr->id)) {
+ case KVM_VGIC_V2_ADDR_TYPE_DIST:
+ r = vgic_check_type(kvm, KVM_DEV_TYPE_ARM_VGIC_V2);
+ if (!r)
+ r = vgic_check_iorange(kvm, vgic->vgic_dist_base, dev_addr->addr,
+ SZ_4K, KVM_VGIC_V2_DIST_SIZE);
+ if (!r)
+ vgic->vgic_dist_base = dev_addr->addr;
+ break;
+ case KVM_VGIC_V2_ADDR_TYPE_CPU:
+ r = vgic_check_type(kvm, KVM_DEV_TYPE_ARM_VGIC_V2);
+ if (!r)
+ r = vgic_check_iorange(kvm, vgic->vgic_cpu_base, dev_addr->addr,
+ SZ_4K, KVM_VGIC_V2_CPU_SIZE);
+ if (!r)
+ vgic->vgic_cpu_base = dev_addr->addr;
+ break;
+ default:
+ r = -ENODEV;
+ }
+
+ mutex_unlock(&kvm->lock);
+
+ return r;
+}
+
/**
* kvm_vgic_addr - set or get vgic VM base addresses
* @kvm: pointer to the vm struct
- * @type: the VGIC addr type, one of KVM_VGIC_V[23]_ADDR_TYPE_XXX
- * @addr: pointer to address value
+ * @attr: pointer to the attribute being retrieved/updated
* @write: if true set the address in the VM address space, if false read the
* address
*
@@ -57,15 +88,22 @@ static int vgic_check_type(struct kvm *kvm, int type_needed)
* overlapping regions in case of a virtual GICv3 here, since we don't know
* the number of VCPUs yet, so we defer this check to map_resources().
*/
-int kvm_vgic_addr(struct kvm *kvm, unsigned long type, u64 *addr, bool write)
+static int kvm_vgic_addr(struct kvm *kvm, struct kvm_device_attr *attr, bool write)
{
- int r = 0;
+ u64 __user *uaddr = (u64 __user *)attr->addr;
struct vgic_dist *vgic = &kvm->arch.vgic;
phys_addr_t *addr_ptr, alignment, size;
u64 undef_value = VGIC_ADDR_UNDEF;
+ u64 addr;
+ int r;
+
+ /* Reading a redistributor region addr implies getting the index */
+ if (write || attr->attr == KVM_VGIC_V3_ADDR_TYPE_REDIST_REGION)
+ if (get_user(addr, uaddr))
+ return -EFAULT;
mutex_lock(&kvm->lock);
- switch (type) {
+ switch (attr->attr) {
case KVM_VGIC_V2_ADDR_TYPE_DIST:
r = vgic_check_type(kvm, KVM_DEV_TYPE_ARM_VGIC_V2);
addr_ptr = &vgic->vgic_dist_base;
@@ -91,7 +129,7 @@ int kvm_vgic_addr(struct kvm *kvm, unsigned long type, u64 *addr, bool write)
if (r)
break;
if (write) {
- r = vgic_v3_set_redist_base(kvm, 0, *addr, 0);
+ r = vgic_v3_set_redist_base(kvm, 0, addr, 0);
goto out;
}
rdreg = list_first_entry_or_null(&vgic->rd_regions,
@@ -111,14 +149,12 @@ int kvm_vgic_addr(struct kvm *kvm, unsigned long type, u64 *addr, bool write)
if (r)
break;
- index = *addr & KVM_VGIC_V3_RDIST_INDEX_MASK;
+ index = addr & KVM_VGIC_V3_RDIST_INDEX_MASK;
if (write) {
- gpa_t base = *addr & KVM_VGIC_V3_RDIST_BASE_MASK;
- u32 count = (*addr & KVM_VGIC_V3_RDIST_COUNT_MASK)
- >> KVM_VGIC_V3_RDIST_COUNT_SHIFT;
- u8 flags = (*addr & KVM_VGIC_V3_RDIST_FLAGS_MASK)
- >> KVM_VGIC_V3_RDIST_FLAGS_SHIFT;
+ gpa_t base = addr & KVM_VGIC_V3_RDIST_BASE_MASK;
+ u32 count = FIELD_GET(KVM_VGIC_V3_RDIST_COUNT_MASK, addr);
+ u8 flags = FIELD_GET(KVM_VGIC_V3_RDIST_FLAGS_MASK, addr);
if (!count || flags)
r = -EINVAL;
@@ -134,9 +170,9 @@ int kvm_vgic_addr(struct kvm *kvm, unsigned long type, u64 *addr, bool write)
goto out;
}
- *addr = index;
- *addr |= rdreg->base;
- *addr |= (u64)rdreg->count << KVM_VGIC_V3_RDIST_COUNT_SHIFT;
+ addr = index;
+ addr |= rdreg->base;
+ addr |= (u64)rdreg->count << KVM_VGIC_V3_RDIST_COUNT_SHIFT;
goto out;
}
default:
@@ -147,15 +183,19 @@ int kvm_vgic_addr(struct kvm *kvm, unsigned long type, u64 *addr, bool write)
goto out;
if (write) {
- r = vgic_check_iorange(kvm, *addr_ptr, *addr, alignment, size);
+ r = vgic_check_iorange(kvm, *addr_ptr, addr, alignment, size);
if (!r)
- *addr_ptr = *addr;
+ *addr_ptr = addr;
} else {
- *addr = *addr_ptr;
+ addr = *addr_ptr;
}
out:
mutex_unlock(&kvm->lock);
+
+ if (!r && !write)
+ r = put_user(addr, uaddr);
+
return r;
}
@@ -165,17 +205,9 @@ static int vgic_set_common_attr(struct kvm_device *dev,
int r;
switch (attr->group) {
- case KVM_DEV_ARM_VGIC_GRP_ADDR: {
- u64 __user *uaddr = (u64 __user *)(long)attr->addr;
- u64 addr;
- unsigned long type = (unsigned long)attr->attr;
-
- if (copy_from_user(&addr, uaddr, sizeof(addr)))
- return -EFAULT;
-
- r = kvm_vgic_addr(dev->kvm, type, &addr, true);
+ case KVM_DEV_ARM_VGIC_GRP_ADDR:
+ r = kvm_vgic_addr(dev->kvm, attr, true);
return (r == -ENODEV) ? -ENXIO : r;
- }
case KVM_DEV_ARM_VGIC_GRP_NR_IRQS: {
u32 __user *uaddr = (u32 __user *)(long)attr->addr;
u32 val;
@@ -214,6 +246,24 @@ static int vgic_set_common_attr(struct kvm_device *dev,
r = vgic_init(dev->kvm);
mutex_unlock(&dev->kvm->lock);
return r;
+ case KVM_DEV_ARM_VGIC_SAVE_PENDING_TABLES:
+ /*
+ * OK, this one isn't common at all, but we
+ * want to handle all control group attributes
+ * in a single place.
+ */
+ if (vgic_check_type(dev->kvm, KVM_DEV_TYPE_ARM_VGIC_V3))
+ return -ENXIO;
+ mutex_lock(&dev->kvm->lock);
+
+ if (!lock_all_vcpus(dev->kvm)) {
+ mutex_unlock(&dev->kvm->lock);
+ return -EBUSY;
+ }
+ r = vgic_v3_save_pending_tables(dev->kvm);
+ unlock_all_vcpus(dev->kvm);
+ mutex_unlock(&dev->kvm->lock);
+ return r;
}
break;
}
@@ -228,22 +278,9 @@ static int vgic_get_common_attr(struct kvm_device *dev,
int r = -ENXIO;
switch (attr->group) {
- case KVM_DEV_ARM_VGIC_GRP_ADDR: {
- u64 __user *uaddr = (u64 __user *)(long)attr->addr;
- u64 addr;
- unsigned long type = (unsigned long)attr->attr;
-
- if (copy_from_user(&addr, uaddr, sizeof(addr)))
- return -EFAULT;
-
- r = kvm_vgic_addr(dev->kvm, type, &addr, false);
- if (r)
- return (r == -ENODEV) ? -ENXIO : r;
-
- if (copy_to_user(uaddr, &addr, sizeof(addr)))
- return -EFAULT;
- break;
- }
+ case KVM_DEV_ARM_VGIC_GRP_ADDR:
+ r = kvm_vgic_addr(dev->kvm, attr, false);
+ return (r == -ENODEV) ? -ENXIO : r;
case KVM_DEV_ARM_VGIC_GRP_NR_IRQS: {
u32 __user *uaddr = (u32 __user *)(long)attr->addr;
@@ -325,7 +362,7 @@ void unlock_all_vcpus(struct kvm *kvm)
bool lock_all_vcpus(struct kvm *kvm)
{
struct kvm_vcpu *tmp_vcpu;
- int c;
+ unsigned long c;
/*
* Any time a vcpu is run, vcpu_load is called which tries to grab the
@@ -348,17 +385,18 @@ bool lock_all_vcpus(struct kvm *kvm)
*
* @dev: kvm device handle
* @attr: kvm device attribute
- * @reg: address the value is read or written
* @is_write: true if userspace is writing a register
*/
static int vgic_v2_attr_regs_access(struct kvm_device *dev,
struct kvm_device_attr *attr,
- u32 *reg, bool is_write)
+ bool is_write)
{
+ u32 __user *uaddr = (u32 __user *)(unsigned long)attr->addr;
struct vgic_reg_attr reg_attr;
gpa_t addr;
struct kvm_vcpu *vcpu;
int ret;
+ u32 val;
ret = vgic_v2_parse_attr(dev, attr, &reg_attr);
if (ret)
@@ -367,6 +405,10 @@ static int vgic_v2_attr_regs_access(struct kvm_device *dev,
vcpu = reg_attr.vcpu;
addr = reg_attr.addr;
+ if (is_write)
+ if (get_user(val, uaddr))
+ return -EFAULT;
+
mutex_lock(&dev->kvm->lock);
ret = vgic_init(dev->kvm);
@@ -380,10 +422,10 @@ static int vgic_v2_attr_regs_access(struct kvm_device *dev,
switch (attr->group) {
case KVM_DEV_ARM_VGIC_GRP_CPU_REGS:
- ret = vgic_v2_cpuif_uaccess(vcpu, is_write, addr, reg);
+ ret = vgic_v2_cpuif_uaccess(vcpu, is_write, addr, &val);
break;
case KVM_DEV_ARM_VGIC_GRP_DIST_REGS:
- ret = vgic_v2_dist_uaccess(vcpu, is_write, addr, reg);
+ ret = vgic_v2_dist_uaccess(vcpu, is_write, addr, &val);
break;
default:
ret = -EINVAL;
@@ -393,57 +435,35 @@ static int vgic_v2_attr_regs_access(struct kvm_device *dev,
unlock_all_vcpus(dev->kvm);
out:
mutex_unlock(&dev->kvm->lock);
+
+ if (!ret && !is_write)
+ ret = put_user(val, uaddr);
+
return ret;
}
static int vgic_v2_set_attr(struct kvm_device *dev,
struct kvm_device_attr *attr)
{
- int ret;
-
- ret = vgic_set_common_attr(dev, attr);
- if (ret != -ENXIO)
- return ret;
-
switch (attr->group) {
case KVM_DEV_ARM_VGIC_GRP_DIST_REGS:
- case KVM_DEV_ARM_VGIC_GRP_CPU_REGS: {
- u32 __user *uaddr = (u32 __user *)(long)attr->addr;
- u32 reg;
-
- if (get_user(reg, uaddr))
- return -EFAULT;
-
- return vgic_v2_attr_regs_access(dev, attr, &reg, true);
- }
+ case KVM_DEV_ARM_VGIC_GRP_CPU_REGS:
+ return vgic_v2_attr_regs_access(dev, attr, true);
+ default:
+ return vgic_set_common_attr(dev, attr);
}
-
- return -ENXIO;
}
static int vgic_v2_get_attr(struct kvm_device *dev,
struct kvm_device_attr *attr)
{
- int ret;
-
- ret = vgic_get_common_attr(dev, attr);
- if (ret != -ENXIO)
- return ret;
-
switch (attr->group) {
case KVM_DEV_ARM_VGIC_GRP_DIST_REGS:
- case KVM_DEV_ARM_VGIC_GRP_CPU_REGS: {
- u32 __user *uaddr = (u32 __user *)(long)attr->addr;
- u32 reg = 0;
-
- ret = vgic_v2_attr_regs_access(dev, attr, &reg, false);
- if (ret)
- return ret;
- return put_user(reg, uaddr);
- }
+ case KVM_DEV_ARM_VGIC_GRP_CPU_REGS:
+ return vgic_v2_attr_regs_access(dev, attr, false);
+ default:
+ return vgic_get_common_attr(dev, attr);
}
-
- return -ENXIO;
}
static int vgic_v2_has_attr(struct kvm_device *dev,
@@ -512,18 +532,18 @@ int vgic_v3_parse_attr(struct kvm_device *dev, struct kvm_device_attr *attr,
*
* @dev: kvm device handle
* @attr: kvm device attribute
- * @reg: address the value is read or written
* @is_write: true if userspace is writing a register
*/
static int vgic_v3_attr_regs_access(struct kvm_device *dev,
struct kvm_device_attr *attr,
- u64 *reg, bool is_write)
+ bool is_write)
{
struct vgic_reg_attr reg_attr;
gpa_t addr;
struct kvm_vcpu *vcpu;
+ bool uaccess;
+ u32 val;
int ret;
- u32 tmp32;
ret = vgic_v3_parse_attr(dev, attr, &reg_attr);
if (ret)
@@ -532,6 +552,21 @@ static int vgic_v3_attr_regs_access(struct kvm_device *dev,
vcpu = reg_attr.vcpu;
addr = reg_attr.addr;
+ switch (attr->group) {
+ case KVM_DEV_ARM_VGIC_GRP_CPU_SYSREGS:
+ /* Sysregs uaccess is performed by the sysreg handling code */
+ uaccess = false;
+ break;
+ default:
+ uaccess = true;
+ }
+
+ if (uaccess && is_write) {
+ u32 __user *uaddr = (u32 __user *)(unsigned long)attr->addr;
+ if (get_user(val, uaddr))
+ return -EFAULT;
+ }
+
mutex_lock(&dev->kvm->lock);
if (unlikely(!vgic_initialized(dev->kvm))) {
@@ -546,29 +581,14 @@ static int vgic_v3_attr_regs_access(struct kvm_device *dev,
switch (attr->group) {
case KVM_DEV_ARM_VGIC_GRP_DIST_REGS:
- if (is_write)
- tmp32 = *reg;
-
- ret = vgic_v3_dist_uaccess(vcpu, is_write, addr, &tmp32);
- if (!is_write)
- *reg = tmp32;
+ ret = vgic_v3_dist_uaccess(vcpu, is_write, addr, &val);
break;
case KVM_DEV_ARM_VGIC_GRP_REDIST_REGS:
- if (is_write)
- tmp32 = *reg;
-
- ret = vgic_v3_redist_uaccess(vcpu, is_write, addr, &tmp32);
- if (!is_write)
- *reg = tmp32;
+ ret = vgic_v3_redist_uaccess(vcpu, is_write, addr, &val);
break;
- case KVM_DEV_ARM_VGIC_GRP_CPU_SYSREGS: {
- u64 regid;
-
- regid = (attr->attr & KVM_DEV_ARM_VGIC_SYSREG_INSTR_MASK);
- ret = vgic_v3_cpu_sysregs_uaccess(vcpu, is_write,
- regid, reg);
+ case KVM_DEV_ARM_VGIC_GRP_CPU_SYSREGS:
+ ret = vgic_v3_cpu_sysregs_uaccess(vcpu, attr, is_write);
break;
- }
case KVM_DEV_ARM_VGIC_GRP_LEVEL_INFO: {
unsigned int info, intid;
@@ -578,7 +598,7 @@ static int vgic_v3_attr_regs_access(struct kvm_device *dev,
intid = attr->attr &
KVM_DEV_ARM_VGIC_LINE_LEVEL_INTID_MASK;
ret = vgic_v3_line_level_info_uaccess(vcpu, is_write,
- intid, reg);
+ intid, &val);
} else {
ret = -EINVAL;
}
@@ -592,117 +612,41 @@ static int vgic_v3_attr_regs_access(struct kvm_device *dev,
unlock_all_vcpus(dev->kvm);
out:
mutex_unlock(&dev->kvm->lock);
+
+ if (!ret && uaccess && !is_write) {
+ u32 __user *uaddr = (u32 __user *)(unsigned long)attr->addr;
+ ret = put_user(val, uaddr);
+ }
+
return ret;
}
static int vgic_v3_set_attr(struct kvm_device *dev,
struct kvm_device_attr *attr)
{
- int ret;
-
- ret = vgic_set_common_attr(dev, attr);
- if (ret != -ENXIO)
- return ret;
-
switch (attr->group) {
case KVM_DEV_ARM_VGIC_GRP_DIST_REGS:
- case KVM_DEV_ARM_VGIC_GRP_REDIST_REGS: {
- u32 __user *uaddr = (u32 __user *)(long)attr->addr;
- u32 tmp32;
- u64 reg;
-
- if (get_user(tmp32, uaddr))
- return -EFAULT;
-
- reg = tmp32;
- return vgic_v3_attr_regs_access(dev, attr, &reg, true);
- }
- case KVM_DEV_ARM_VGIC_GRP_CPU_SYSREGS: {
- u64 __user *uaddr = (u64 __user *)(long)attr->addr;
- u64 reg;
-
- if (get_user(reg, uaddr))
- return -EFAULT;
-
- return vgic_v3_attr_regs_access(dev, attr, &reg, true);
- }
- case KVM_DEV_ARM_VGIC_GRP_LEVEL_INFO: {
- u32 __user *uaddr = (u32 __user *)(long)attr->addr;
- u64 reg;
- u32 tmp32;
-
- if (get_user(tmp32, uaddr))
- return -EFAULT;
-
- reg = tmp32;
- return vgic_v3_attr_regs_access(dev, attr, &reg, true);
- }
- case KVM_DEV_ARM_VGIC_GRP_CTRL: {
- int ret;
-
- switch (attr->attr) {
- case KVM_DEV_ARM_VGIC_SAVE_PENDING_TABLES:
- mutex_lock(&dev->kvm->lock);
-
- if (!lock_all_vcpus(dev->kvm)) {
- mutex_unlock(&dev->kvm->lock);
- return -EBUSY;
- }
- ret = vgic_v3_save_pending_tables(dev->kvm);
- unlock_all_vcpus(dev->kvm);
- mutex_unlock(&dev->kvm->lock);
- return ret;
- }
- break;
- }
+ case KVM_DEV_ARM_VGIC_GRP_REDIST_REGS:
+ case KVM_DEV_ARM_VGIC_GRP_CPU_SYSREGS:
+ case KVM_DEV_ARM_VGIC_GRP_LEVEL_INFO:
+ return vgic_v3_attr_regs_access(dev, attr, true);
+ default:
+ return vgic_set_common_attr(dev, attr);
}
- return -ENXIO;
}
static int vgic_v3_get_attr(struct kvm_device *dev,
struct kvm_device_attr *attr)
{
- int ret;
-
- ret = vgic_get_common_attr(dev, attr);
- if (ret != -ENXIO)
- return ret;
-
switch (attr->group) {
case KVM_DEV_ARM_VGIC_GRP_DIST_REGS:
- case KVM_DEV_ARM_VGIC_GRP_REDIST_REGS: {
- u32 __user *uaddr = (u32 __user *)(long)attr->addr;
- u64 reg;
- u32 tmp32;
-
- ret = vgic_v3_attr_regs_access(dev, attr, &reg, false);
- if (ret)
- return ret;
- tmp32 = reg;
- return put_user(tmp32, uaddr);
- }
- case KVM_DEV_ARM_VGIC_GRP_CPU_SYSREGS: {
- u64 __user *uaddr = (u64 __user *)(long)attr->addr;
- u64 reg;
-
- ret = vgic_v3_attr_regs_access(dev, attr, &reg, false);
- if (ret)
- return ret;
- return put_user(reg, uaddr);
- }
- case KVM_DEV_ARM_VGIC_GRP_LEVEL_INFO: {
- u32 __user *uaddr = (u32 __user *)(long)attr->addr;
- u64 reg;
- u32 tmp32;
-
- ret = vgic_v3_attr_regs_access(dev, attr, &reg, false);
- if (ret)
- return ret;
- tmp32 = reg;
- return put_user(tmp32, uaddr);
- }
+ case KVM_DEV_ARM_VGIC_GRP_REDIST_REGS:
+ case KVM_DEV_ARM_VGIC_GRP_CPU_SYSREGS:
+ case KVM_DEV_ARM_VGIC_GRP_LEVEL_INFO:
+ return vgic_v3_attr_regs_access(dev, attr, false);
+ default:
+ return vgic_get_common_attr(dev, attr);
}
- return -ENXIO;
}
static int vgic_v3_has_attr(struct kvm_device *dev,
diff --git a/arch/arm64/kvm/vgic/vgic-mmio-v2.c b/arch/arm64/kvm/vgic/vgic-mmio-v2.c
index 5f9014ae595b..e070cda86e12 100644
--- a/arch/arm64/kvm/vgic/vgic-mmio-v2.c
+++ b/arch/arm64/kvm/vgic/vgic-mmio-v2.c
@@ -73,9 +73,13 @@ static int vgic_mmio_uaccess_write_v2_misc(struct kvm_vcpu *vcpu,
gpa_t addr, unsigned int len,
unsigned long val)
{
+ struct vgic_dist *dist = &vcpu->kvm->arch.vgic;
+ u32 reg;
+
switch (addr & 0x0c) {
case GIC_DIST_IIDR:
- if (val != vgic_mmio_read_v2_misc(vcpu, addr, len))
+ reg = vgic_mmio_read_v2_misc(vcpu, addr, len);
+ if ((reg ^ val) & ~GICD_IIDR_REVISION_MASK)
return -EINVAL;
/*
@@ -87,8 +91,16 @@ static int vgic_mmio_uaccess_write_v2_misc(struct kvm_vcpu *vcpu,
* migration from old kernels to new kernels with legacy
* userspace.
*/
- vcpu->kvm->arch.vgic.v2_groups_user_writable = true;
- return 0;
+ reg = FIELD_GET(GICD_IIDR_REVISION_MASK, reg);
+ switch (reg) {
+ case KVM_VGIC_IMP_REV_2:
+ case KVM_VGIC_IMP_REV_3:
+ vcpu->kvm->arch.vgic.v2_groups_user_writable = true;
+ dist->implementation_rev = reg;
+ return 0;
+ default:
+ return -EINVAL;
+ }
}
vgic_mmio_write_v2_misc(vcpu, addr, len, val);
@@ -113,9 +125,8 @@ static void vgic_mmio_write_sgir(struct kvm_vcpu *source_vcpu,
int intid = val & 0xf;
int targets = (val >> 16) & 0xff;
int mode = (val >> 24) & 0x03;
- int c;
struct kvm_vcpu *vcpu;
- unsigned long flags;
+ unsigned long flags, c;
switch (mode) {
case 0x0: /* as specified by targets */
@@ -418,11 +429,11 @@ static const struct vgic_register_region vgic_v2_dist_registers[] = {
VGIC_ACCESS_32bit),
REGISTER_DESC_WITH_BITS_PER_IRQ(GIC_DIST_PENDING_SET,
vgic_mmio_read_pending, vgic_mmio_write_spending,
- NULL, vgic_uaccess_write_spending, 1,
+ vgic_uaccess_read_pending, vgic_uaccess_write_spending, 1,
VGIC_ACCESS_32bit),
REGISTER_DESC_WITH_BITS_PER_IRQ(GIC_DIST_PENDING_CLEAR,
vgic_mmio_read_pending, vgic_mmio_write_cpending,
- NULL, vgic_uaccess_write_cpending, 1,
+ vgic_uaccess_read_pending, vgic_uaccess_write_cpending, 1,
VGIC_ACCESS_32bit),
REGISTER_DESC_WITH_BITS_PER_IRQ(GIC_DIST_ACTIVE_SET,
vgic_mmio_read_active, vgic_mmio_write_sactive,
diff --git a/arch/arm64/kvm/vgic/vgic-mmio-v3.c b/arch/arm64/kvm/vgic/vgic-mmio-v3.c
index bf7ec4a78497..91201f743033 100644
--- a/arch/arm64/kvm/vgic/vgic-mmio-v3.c
+++ b/arch/arm64/kvm/vgic/vgic-mmio-v3.c
@@ -155,13 +155,27 @@ static int vgic_mmio_uaccess_write_v3_misc(struct kvm_vcpu *vcpu,
unsigned long val)
{
struct vgic_dist *dist = &vcpu->kvm->arch.vgic;
+ u32 reg;
switch (addr & 0x0c) {
case GICD_TYPER2:
- case GICD_IIDR:
if (val != vgic_mmio_read_v3_misc(vcpu, addr, len))
return -EINVAL;
return 0;
+ case GICD_IIDR:
+ reg = vgic_mmio_read_v3_misc(vcpu, addr, len);
+ if ((reg ^ val) & ~GICD_IIDR_REVISION_MASK)
+ return -EINVAL;
+
+ reg = FIELD_GET(GICD_IIDR_REVISION_MASK, reg);
+ switch (reg) {
+ case KVM_VGIC_IMP_REV_2:
+ case KVM_VGIC_IMP_REV_3:
+ dist->implementation_rev = reg;
+ return 0;
+ default:
+ return -EINVAL;
+ }
case GICD_CTLR:
/* Not a GICv4.1? No HW SGIs */
if (!kvm_vgic_global_state.has_gicv4_1)
@@ -221,34 +235,58 @@ static void vgic_mmio_write_irouter(struct kvm_vcpu *vcpu,
vgic_put_irq(vcpu->kvm, irq);
}
+bool vgic_lpis_enabled(struct kvm_vcpu *vcpu)
+{
+ struct vgic_cpu *vgic_cpu = &vcpu->arch.vgic_cpu;
+
+ return atomic_read(&vgic_cpu->ctlr) == GICR_CTLR_ENABLE_LPIS;
+}
+
static unsigned long vgic_mmio_read_v3r_ctlr(struct kvm_vcpu *vcpu,
gpa_t addr, unsigned int len)
{
struct vgic_cpu *vgic_cpu = &vcpu->arch.vgic_cpu;
+ unsigned long val;
- return vgic_cpu->lpis_enabled ? GICR_CTLR_ENABLE_LPIS : 0;
-}
+ val = atomic_read(&vgic_cpu->ctlr);
+ if (vgic_get_implementation_rev(vcpu) >= KVM_VGIC_IMP_REV_3)
+ val |= GICR_CTLR_IR | GICR_CTLR_CES;
+ return val;
+}
static void vgic_mmio_write_v3r_ctlr(struct kvm_vcpu *vcpu,
gpa_t addr, unsigned int len,
unsigned long val)
{
struct vgic_cpu *vgic_cpu = &vcpu->arch.vgic_cpu;
- bool was_enabled = vgic_cpu->lpis_enabled;
+ u32 ctlr;
if (!vgic_has_its(vcpu->kvm))
return;
- vgic_cpu->lpis_enabled = val & GICR_CTLR_ENABLE_LPIS;
+ if (!(val & GICR_CTLR_ENABLE_LPIS)) {
+ /*
+ * Don't disable if RWP is set, as there already an
+ * ongoing disable. Funky guest...
+ */
+ ctlr = atomic_cmpxchg_acquire(&vgic_cpu->ctlr,
+ GICR_CTLR_ENABLE_LPIS,
+ GICR_CTLR_RWP);
+ if (ctlr != GICR_CTLR_ENABLE_LPIS)
+ return;
- if (was_enabled && !vgic_cpu->lpis_enabled) {
vgic_flush_pending_lpis(vcpu);
vgic_its_invalidate_cache(vcpu->kvm);
- }
+ atomic_set_release(&vgic_cpu->ctlr, 0);
+ } else {
+ ctlr = atomic_cmpxchg_acquire(&vgic_cpu->ctlr, 0,
+ GICR_CTLR_ENABLE_LPIS);
+ if (ctlr != 0)
+ return;
- if (!was_enabled && vgic_cpu->lpis_enabled)
vgic_enable_lpis(vcpu);
+ }
}
static bool vgic_mmio_vcpu_rdist_is_last(struct kvm_vcpu *vcpu)
@@ -315,42 +353,6 @@ static unsigned long vgic_mmio_read_v3_idregs(struct kvm_vcpu *vcpu,
return 0;
}
-static unsigned long vgic_v3_uaccess_read_pending(struct kvm_vcpu *vcpu,
- gpa_t addr, unsigned int len)
-{
- u32 intid = VGIC_ADDR_TO_INTID(addr, 1);
- u32 value = 0;
- int i;
-
- /*
- * pending state of interrupt is latched in pending_latch variable.
- * Userspace will save and restore pending state and line_level
- * separately.
- * Refer to Documentation/virt/kvm/devices/arm-vgic-v3.rst
- * for handling of ISPENDR and ICPENDR.
- */
- for (i = 0; i < len * 8; i++) {
- struct vgic_irq *irq = vgic_get_irq(vcpu->kvm, vcpu, intid + i);
- bool state = irq->pending_latch;
-
- if (irq->hw && vgic_irq_is_sgi(irq->intid)) {
- int err;
-
- err = irq_get_irqchip_state(irq->host_irq,
- IRQCHIP_STATE_PENDING,
- &state);
- WARN_ON(err);
- }
-
- if (state)
- value |= (1U << i);
-
- vgic_put_irq(vcpu->kvm, irq);
- }
-
- return value;
-}
-
static int vgic_v3_uaccess_write_pending(struct kvm_vcpu *vcpu,
gpa_t addr, unsigned int len,
unsigned long val)
@@ -478,11 +480,10 @@ static void vgic_mmio_write_propbase(struct kvm_vcpu *vcpu,
unsigned long val)
{
struct vgic_dist *dist = &vcpu->kvm->arch.vgic;
- struct vgic_cpu *vgic_cpu = &vcpu->arch.vgic_cpu;
u64 old_propbaser, propbaser;
/* Storing a value with LPIs already enabled is undefined */
- if (vgic_cpu->lpis_enabled)
+ if (vgic_lpis_enabled(vcpu))
return;
do {
@@ -513,7 +514,7 @@ static void vgic_mmio_write_pendbase(struct kvm_vcpu *vcpu,
u64 old_pendbaser, pendbaser;
/* Storing a value with LPIs already enabled is undefined */
- if (vgic_cpu->lpis_enabled)
+ if (vgic_lpis_enabled(vcpu))
return;
do {
@@ -525,6 +526,63 @@ static void vgic_mmio_write_pendbase(struct kvm_vcpu *vcpu,
pendbaser) != old_pendbaser);
}
+static unsigned long vgic_mmio_read_sync(struct kvm_vcpu *vcpu,
+ gpa_t addr, unsigned int len)
+{
+ return !!atomic_read(&vcpu->arch.vgic_cpu.syncr_busy);
+}
+
+static void vgic_set_rdist_busy(struct kvm_vcpu *vcpu, bool busy)
+{
+ if (busy) {
+ atomic_inc(&vcpu->arch.vgic_cpu.syncr_busy);
+ smp_mb__after_atomic();
+ } else {
+ smp_mb__before_atomic();
+ atomic_dec(&vcpu->arch.vgic_cpu.syncr_busy);
+ }
+}
+
+static void vgic_mmio_write_invlpi(struct kvm_vcpu *vcpu,
+ gpa_t addr, unsigned int len,
+ unsigned long val)
+{
+ struct vgic_irq *irq;
+
+ /*
+ * If the guest wrote only to the upper 32bit part of the
+ * register, drop the write on the floor, as it is only for
+ * vPEs (which we don't support for obvious reasons).
+ *
+ * Also discard the access if LPIs are not enabled.
+ */
+ if ((addr & 4) || !vgic_lpis_enabled(vcpu))
+ return;
+
+ vgic_set_rdist_busy(vcpu, true);
+
+ irq = vgic_get_irq(vcpu->kvm, NULL, lower_32_bits(val));
+ if (irq) {
+ vgic_its_inv_lpi(vcpu->kvm, irq);
+ vgic_put_irq(vcpu->kvm, irq);
+ }
+
+ vgic_set_rdist_busy(vcpu, false);
+}
+
+static void vgic_mmio_write_invall(struct kvm_vcpu *vcpu,
+ gpa_t addr, unsigned int len,
+ unsigned long val)
+{
+ /* See vgic_mmio_write_invlpi() for the early return rationale */
+ if ((addr & 4) || !vgic_lpis_enabled(vcpu))
+ return;
+
+ vgic_set_rdist_busy(vcpu, true);
+ vgic_its_invall(vcpu);
+ vgic_set_rdist_busy(vcpu, false);
+}
+
/*
* The GICv3 per-IRQ registers are split to control PPIs and SGIs in the
* redistributors, while SPIs are covered by registers in the distributor
@@ -572,7 +630,7 @@ static const struct vgic_register_region vgic_v3_dist_registers[] = {
VGIC_ACCESS_32bit),
REGISTER_DESC_WITH_BITS_PER_IRQ_SHARED(GICD_ISPENDR,
vgic_mmio_read_pending, vgic_mmio_write_spending,
- vgic_v3_uaccess_read_pending, vgic_v3_uaccess_write_pending, 1,
+ vgic_uaccess_read_pending, vgic_v3_uaccess_write_pending, 1,
VGIC_ACCESS_32bit),
REGISTER_DESC_WITH_BITS_PER_IRQ_SHARED(GICD_ICPENDR,
vgic_mmio_read_pending, vgic_mmio_write_cpending,
@@ -630,6 +688,15 @@ static const struct vgic_register_region vgic_v3_rd_registers[] = {
REGISTER_DESC_WITH_LENGTH(GICR_PENDBASER,
vgic_mmio_read_pendbase, vgic_mmio_write_pendbase, 8,
VGIC_ACCESS_64bit | VGIC_ACCESS_32bit),
+ REGISTER_DESC_WITH_LENGTH(GICR_INVLPIR,
+ vgic_mmio_read_raz, vgic_mmio_write_invlpi, 8,
+ VGIC_ACCESS_64bit | VGIC_ACCESS_32bit),
+ REGISTER_DESC_WITH_LENGTH(GICR_INVALLR,
+ vgic_mmio_read_raz, vgic_mmio_write_invall, 8,
+ VGIC_ACCESS_64bit | VGIC_ACCESS_32bit),
+ REGISTER_DESC_WITH_LENGTH(GICR_SYNCR,
+ vgic_mmio_read_sync, vgic_mmio_write_wi, 4,
+ VGIC_ACCESS_32bit),
REGISTER_DESC_WITH_LENGTH(GICR_IDREGS,
vgic_mmio_read_v3_idregs, vgic_mmio_write_wi, 48,
VGIC_ACCESS_32bit),
@@ -647,7 +714,7 @@ static const struct vgic_register_region vgic_v3_rd_registers[] = {
VGIC_ACCESS_32bit),
REGISTER_DESC_WITH_LENGTH_UACCESS(SZ_64K + GICR_ISPENDR0,
vgic_mmio_read_pending, vgic_mmio_write_spending,
- vgic_v3_uaccess_read_pending, vgic_v3_uaccess_write_pending, 4,
+ vgic_uaccess_read_pending, vgic_v3_uaccess_write_pending, 4,
VGIC_ACCESS_32bit),
REGISTER_DESC_WITH_LENGTH_UACCESS(SZ_64K + GICR_ICPENDR0,
vgic_mmio_read_pending, vgic_mmio_write_cpending,
@@ -754,7 +821,8 @@ static void vgic_unregister_redist_iodev(struct kvm_vcpu *vcpu)
static int vgic_register_all_redist_iodevs(struct kvm *kvm)
{
struct kvm_vcpu *vcpu;
- int c, ret = 0;
+ unsigned long c;
+ int ret = 0;
kvm_for_each_vcpu(c, vcpu, kvm) {
ret = vgic_register_redist_iodev(vcpu);
@@ -763,10 +831,12 @@ static int vgic_register_all_redist_iodevs(struct kvm *kvm)
}
if (ret) {
- /* The current c failed, so we start with the previous one. */
+ /* The current c failed, so iterate over the previous ones. */
+ int i;
+
mutex_lock(&kvm->slots_lock);
- for (c--; c >= 0; c--) {
- vcpu = kvm_get_vcpu(kvm, c);
+ for (i = 0; i < c; i++) {
+ vcpu = kvm_get_vcpu(kvm, i);
vgic_unregister_redist_iodev(vcpu);
}
mutex_unlock(&kvm->slots_lock);
@@ -916,12 +986,8 @@ int vgic_v3_has_attr_regs(struct kvm_device *dev, struct kvm_device_attr *attr)
iodev.base_addr = 0;
break;
}
- case KVM_DEV_ARM_VGIC_GRP_CPU_SYSREGS: {
- u64 reg, id;
-
- id = (attr->attr & KVM_DEV_ARM_VGIC_SYSREG_INSTR_MASK);
- return vgic_v3_has_cpu_sysregs_attr(vcpu, 0, id, &reg);
- }
+ case KVM_DEV_ARM_VGIC_GRP_CPU_SYSREGS:
+ return vgic_v3_has_cpu_sysregs_attr(vcpu, attr);
default:
return -ENXIO;
}
@@ -995,10 +1061,10 @@ void vgic_v3_dispatch_sgi(struct kvm_vcpu *vcpu, u64 reg, bool allow_group1)
struct kvm_vcpu *c_vcpu;
u16 target_cpus;
u64 mpidr;
- int sgi, c;
+ int sgi;
int vcpu_id = vcpu->vcpu_id;
bool broadcast;
- unsigned long flags;
+ unsigned long c, flags;
sgi = (reg & ICC_SGI1R_SGI_ID_MASK) >> ICC_SGI1R_SGI_ID_SHIFT;
broadcast = reg & BIT_ULL(ICC_SGI1R_IRQ_ROUTING_MODE_BIT);
@@ -1088,7 +1154,7 @@ int vgic_v3_redist_uaccess(struct kvm_vcpu *vcpu, bool is_write,
}
int vgic_v3_line_level_info_uaccess(struct kvm_vcpu *vcpu, bool is_write,
- u32 intid, u64 *val)
+ u32 intid, u32 *val)
{
if (intid % 32)
return -EINVAL;
diff --git a/arch/arm64/kvm/vgic/vgic-mmio.c b/arch/arm64/kvm/vgic/vgic-mmio.c
index 48c6067fc5ec..b32d434c1d4a 100644
--- a/arch/arm64/kvm/vgic/vgic-mmio.c
+++ b/arch/arm64/kvm/vgic/vgic-mmio.c
@@ -226,8 +226,9 @@ int vgic_uaccess_write_cenable(struct kvm_vcpu *vcpu,
return 0;
}
-unsigned long vgic_mmio_read_pending(struct kvm_vcpu *vcpu,
- gpa_t addr, unsigned int len)
+static unsigned long __read_pending(struct kvm_vcpu *vcpu,
+ gpa_t addr, unsigned int len,
+ bool is_user)
{
u32 intid = VGIC_ADDR_TO_INTID(addr, 1);
u32 value = 0;
@@ -239,6 +240,15 @@ unsigned long vgic_mmio_read_pending(struct kvm_vcpu *vcpu,
unsigned long flags;
bool val;
+ /*
+ * When used from userspace with a GICv3 model:
+ *
+ * Pending state of interrupt is latched in pending_latch
+ * variable. Userspace will save and restore pending state
+ * and line_level separately.
+ * Refer to Documentation/virt/kvm/devices/arm-vgic-v3.rst
+ * for handling of ISPENDR and ICPENDR.
+ */
raw_spin_lock_irqsave(&irq->irq_lock, flags);
if (irq->hw && vgic_irq_is_sgi(irq->intid)) {
int err;
@@ -248,8 +258,20 @@ unsigned long vgic_mmio_read_pending(struct kvm_vcpu *vcpu,
IRQCHIP_STATE_PENDING,
&val);
WARN_RATELIMIT(err, "IRQ %d", irq->host_irq);
+ } else if (!is_user && vgic_irq_is_mapped_level(irq)) {
+ val = vgic_get_phys_line_level(irq);
} else {
- val = irq_is_pending(irq);
+ switch (vcpu->kvm->arch.vgic.vgic_model) {
+ case KVM_DEV_TYPE_ARM_VGIC_V3:
+ if (is_user) {
+ val = irq->pending_latch;
+ break;
+ }
+ fallthrough;
+ default:
+ val = irq_is_pending(irq);
+ break;
+ }
}
value |= ((u32)val << i);
@@ -261,6 +283,18 @@ unsigned long vgic_mmio_read_pending(struct kvm_vcpu *vcpu,
return value;
}
+unsigned long vgic_mmio_read_pending(struct kvm_vcpu *vcpu,
+ gpa_t addr, unsigned int len)
+{
+ return __read_pending(vcpu, addr, len, false);
+}
+
+unsigned long vgic_uaccess_read_pending(struct kvm_vcpu *vcpu,
+ gpa_t addr, unsigned int len)
+{
+ return __read_pending(vcpu, addr, len, true);
+}
+
static bool is_vgic_v2_sgi(struct kvm_vcpu *vcpu, struct vgic_irq *irq)
{
return (vgic_irq_is_sgi(irq->intid) &&
@@ -741,10 +775,10 @@ void vgic_mmio_write_config(struct kvm_vcpu *vcpu,
}
}
-u64 vgic_read_irq_line_level_info(struct kvm_vcpu *vcpu, u32 intid)
+u32 vgic_read_irq_line_level_info(struct kvm_vcpu *vcpu, u32 intid)
{
int i;
- u64 val = 0;
+ u32 val = 0;
int nr_irqs = vcpu->kvm->arch.vgic.nr_spis + VGIC_NR_PRIVATE_IRQS;
for (i = 0; i < 32; i++) {
@@ -764,7 +798,7 @@ u64 vgic_read_irq_line_level_info(struct kvm_vcpu *vcpu, u32 intid)
}
void vgic_write_irq_line_level_info(struct kvm_vcpu *vcpu, u32 intid,
- const u64 val)
+ const u32 val)
{
int i;
int nr_irqs = vcpu->kvm->arch.vgic.nr_spis + VGIC_NR_PRIVATE_IRQS;
@@ -1050,7 +1084,7 @@ static int dispatch_mmio_write(struct kvm_vcpu *vcpu, struct kvm_io_device *dev,
return 0;
}
-struct kvm_io_device_ops kvm_io_gic_ops = {
+const struct kvm_io_device_ops kvm_io_gic_ops = {
.read = dispatch_mmio_read,
.write = dispatch_mmio_write,
};
diff --git a/arch/arm64/kvm/vgic/vgic-mmio.h b/arch/arm64/kvm/vgic/vgic-mmio.h
index fefcca2b14dc..5b490a4dfa5e 100644
--- a/arch/arm64/kvm/vgic/vgic-mmio.h
+++ b/arch/arm64/kvm/vgic/vgic-mmio.h
@@ -34,7 +34,7 @@ struct vgic_register_region {
};
};
-extern struct kvm_io_device_ops kvm_io_gic_ops;
+extern const struct kvm_io_device_ops kvm_io_gic_ops;
#define VGIC_ACCESS_8bit 1
#define VGIC_ACCESS_32bit 2
@@ -149,6 +149,9 @@ int vgic_uaccess_write_cenable(struct kvm_vcpu *vcpu,
unsigned long vgic_mmio_read_pending(struct kvm_vcpu *vcpu,
gpa_t addr, unsigned int len);
+unsigned long vgic_uaccess_read_pending(struct kvm_vcpu *vcpu,
+ gpa_t addr, unsigned int len);
+
void vgic_mmio_write_spending(struct kvm_vcpu *vcpu,
gpa_t addr, unsigned int len,
unsigned long val);
@@ -204,10 +207,10 @@ void vgic_mmio_write_config(struct kvm_vcpu *vcpu,
int vgic_uaccess(struct kvm_vcpu *vcpu, struct vgic_io_device *dev,
bool is_write, int offset, u32 *val);
-u64 vgic_read_irq_line_level_info(struct kvm_vcpu *vcpu, u32 intid);
+u32 vgic_read_irq_line_level_info(struct kvm_vcpu *vcpu, u32 intid);
void vgic_write_irq_line_level_info(struct kvm_vcpu *vcpu, u32 intid,
- const u64 val);
+ const u32 val);
unsigned int vgic_v2_init_dist_iodev(struct vgic_io_device *dev);
diff --git a/arch/arm64/kvm/vgic/vgic-v2.c b/arch/arm64/kvm/vgic/vgic-v2.c
index 95a18cec14a3..645648349c99 100644
--- a/arch/arm64/kvm/vgic/vgic-v2.c
+++ b/arch/arm64/kvm/vgic/vgic-v2.c
@@ -293,12 +293,12 @@ int vgic_v2_map_resources(struct kvm *kvm)
if (IS_VGIC_ADDR_UNDEF(dist->vgic_dist_base) ||
IS_VGIC_ADDR_UNDEF(dist->vgic_cpu_base)) {
- kvm_err("Need to set vgic cpu and dist addresses first\n");
+ kvm_debug("Need to set vgic cpu and dist addresses first\n");
return -ENXIO;
}
if (!vgic_v2_check_base(dist->vgic_dist_base, dist->vgic_cpu_base)) {
- kvm_err("VGIC CPU and dist frames overlap\n");
+ kvm_debug("VGIC CPU and dist frames overlap\n");
return -EINVAL;
}
@@ -345,6 +345,11 @@ int vgic_v2_probe(const struct gic_kvm_info *info)
int ret;
u32 vtr;
+ if (is_protected_kvm_enabled()) {
+ kvm_err("GICv2 not supported in protected mode\n");
+ return -ENXIO;
+ }
+
if (!info->vctrl.start) {
kvm_err("GICH not present in the firmware table\n");
return -ENXIO;
diff --git a/arch/arm64/kvm/vgic/vgic-v3.c b/arch/arm64/kvm/vgic/vgic-v3.c
index 04f62c4b07fb..826ff6f2a4e7 100644
--- a/arch/arm64/kvm/vgic/vgic-v3.c
+++ b/arch/arm64/kvm/vgic/vgic-v3.c
@@ -542,24 +542,24 @@ int vgic_v3_map_resources(struct kvm *kvm)
struct vgic_dist *dist = &kvm->arch.vgic;
struct kvm_vcpu *vcpu;
int ret = 0;
- int c;
+ unsigned long c;
kvm_for_each_vcpu(c, vcpu, kvm) {
struct vgic_cpu *vgic_cpu = &vcpu->arch.vgic_cpu;
if (IS_VGIC_ADDR_UNDEF(vgic_cpu->rd_iodev.base_addr)) {
- kvm_debug("vcpu %d redistributor base not set\n", c);
+ kvm_debug("vcpu %ld redistributor base not set\n", c);
return -ENXIO;
}
}
if (IS_VGIC_ADDR_UNDEF(dist->vgic_dist_base)) {
- kvm_err("Need to set vgic distributor addresses first\n");
+ kvm_debug("Need to set vgic distributor addresses first\n");
return -ENXIO;
}
if (!vgic_v3_check_base(kvm)) {
- kvm_err("VGIC redist and dist frames overlap\n");
+ kvm_debug("VGIC redist and dist frames overlap\n");
return -EINVAL;
}
@@ -609,6 +609,22 @@ static int __init early_gicv4_enable(char *buf)
}
early_param("kvm-arm.vgic_v4_enable", early_gicv4_enable);
+static const struct midr_range broken_seis[] = {
+ MIDR_ALL_VERSIONS(MIDR_APPLE_M1_ICESTORM),
+ MIDR_ALL_VERSIONS(MIDR_APPLE_M1_FIRESTORM),
+ MIDR_ALL_VERSIONS(MIDR_APPLE_M1_ICESTORM_PRO),
+ MIDR_ALL_VERSIONS(MIDR_APPLE_M1_FIRESTORM_PRO),
+ MIDR_ALL_VERSIONS(MIDR_APPLE_M1_ICESTORM_MAX),
+ MIDR_ALL_VERSIONS(MIDR_APPLE_M1_FIRESTORM_MAX),
+ {},
+};
+
+static bool vgic_v3_broken_seis(void)
+{
+ return ((kvm_vgic_global_state.ich_vtr_el2 & ICH_VTR_SEIS_MASK) &&
+ is_midr_in_range_list(read_cpuid_id(), broken_seis));
+}
+
/**
* vgic_v3_probe - probe for a VGICv3 compatible interrupt controller
* @info: pointer to the GIC description
@@ -651,7 +667,7 @@ int vgic_v3_probe(const struct gic_kvm_info *info)
} else if (!PAGE_ALIGNED(info->vcpu.start)) {
pr_warn("GICV physical address 0x%llx not page aligned\n",
(unsigned long long)info->vcpu.start);
- } else {
+ } else if (kvm_get_mode() != KVM_MODE_PROTECTED) {
kvm_vgic_global_state.vcpu_base = info->vcpu.start;
kvm_vgic_global_state.can_emulate_gicv2 = true;
ret = kvm_register_vgic_device(KVM_DEV_TYPE_ARM_VGIC_V2);
@@ -676,9 +692,10 @@ int vgic_v3_probe(const struct gic_kvm_info *info)
group1_trap = true;
}
- if (kvm_vgic_global_state.ich_vtr_el2 & ICH_VTR_SEIS_MASK) {
- kvm_info("GICv3 with locally generated SEI\n");
+ if (vgic_v3_broken_seis()) {
+ kvm_info("GICv3 with broken locally generated SEI\n");
+ kvm_vgic_global_state.ich_vtr_el2 &= ~ICH_VTR_SEIS_MASK;
group0_trap = true;
group1_trap = true;
if (ich_vtr_el2 & ICH_VTR_TDS_MASK)
diff --git a/arch/arm64/kvm/vgic/vgic-v4.c b/arch/arm64/kvm/vgic/vgic-v4.c
index 772dd15a22c7..ad06ba6c9b00 100644
--- a/arch/arm64/kvm/vgic/vgic-v4.c
+++ b/arch/arm64/kvm/vgic/vgic-v4.c
@@ -189,7 +189,7 @@ void vgic_v4_configure_vsgis(struct kvm *kvm)
{
struct vgic_dist *dist = &kvm->arch.vgic;
struct kvm_vcpu *vcpu;
- int i;
+ unsigned long i;
kvm_arm_halt_guest(kvm);
@@ -235,7 +235,8 @@ int vgic_v4_init(struct kvm *kvm)
{
struct vgic_dist *dist = &kvm->arch.vgic;
struct kvm_vcpu *vcpu;
- int i, nr_vcpus, ret;
+ int nr_vcpus, ret;
+ unsigned long i;
if (!kvm_vgic_global_state.has_gicv4)
return 0; /* Nothing to see here... move along. */
diff --git a/arch/arm64/kvm/vgic/vgic.c b/arch/arm64/kvm/vgic/vgic.c
index 5dad4996cfb2..d97e6080b421 100644
--- a/arch/arm64/kvm/vgic/vgic.c
+++ b/arch/arm64/kvm/vgic/vgic.c
@@ -37,7 +37,7 @@ struct vgic_global kvm_vgic_global_state __ro_after_init = {
* If you need to take multiple locks, always take the upper lock first,
* then the lower ones, e.g. first take the its_lock, then the irq_lock.
* If you are already holding a lock and need to take a higher one, you
- * have to drop the lower ranking lock first and re-aquire it after having
+ * have to drop the lower ranking lock first and re-acquire it after having
* taken the upper one.
*
* When taking more than one ap_list_lock at the same time, always take the
@@ -990,7 +990,7 @@ int kvm_vgic_vcpu_pending_irq(struct kvm_vcpu *vcpu)
void vgic_kick_vcpus(struct kvm *kvm)
{
struct kvm_vcpu *vcpu;
- int c;
+ unsigned long c;
/*
* We've injected an interrupt, time to find out who deserves
diff --git a/arch/arm64/kvm/vgic/vgic.h b/arch/arm64/kvm/vgic/vgic.h
index 3fd6c86a7ef3..0c8da72953f0 100644
--- a/arch/arm64/kvm/vgic/vgic.h
+++ b/arch/arm64/kvm/vgic/vgic.h
@@ -98,6 +98,11 @@
#define DEBUG_SPINLOCK_BUG_ON(p)
#endif
+static inline u32 vgic_get_implementation_rev(struct kvm_vcpu *vcpu)
+{
+ return vcpu->kvm->arch.vgic.implementation_rev;
+}
+
/* Requires the irq_lock to be held by the caller. */
static inline bool irq_is_pending(struct vgic_irq *irq)
{
@@ -240,12 +245,11 @@ int vgic_v3_dist_uaccess(struct kvm_vcpu *vcpu, bool is_write,
int offset, u32 *val);
int vgic_v3_redist_uaccess(struct kvm_vcpu *vcpu, bool is_write,
int offset, u32 *val);
-int vgic_v3_cpu_sysregs_uaccess(struct kvm_vcpu *vcpu, bool is_write,
- u64 id, u64 *val);
-int vgic_v3_has_cpu_sysregs_attr(struct kvm_vcpu *vcpu, bool is_write, u64 id,
- u64 *reg);
+int vgic_v3_cpu_sysregs_uaccess(struct kvm_vcpu *vcpu,
+ struct kvm_device_attr *attr, bool is_write);
+int vgic_v3_has_cpu_sysregs_attr(struct kvm_vcpu *vcpu, struct kvm_device_attr *attr);
int vgic_v3_line_level_info_uaccess(struct kvm_vcpu *vcpu, bool is_write,
- u32 intid, u64 *val);
+ u32 intid, u32 *val);
int kvm_register_vgic_device(unsigned long type);
void vgic_set_vmcr(struct kvm_vcpu *vcpu, struct vgic_vmcr *vmcr);
void vgic_get_vmcr(struct kvm_vcpu *vcpu, struct vgic_vmcr *vmcr);
@@ -308,6 +312,7 @@ static inline bool vgic_dist_overlap(struct kvm *kvm, gpa_t base, size_t size)
(base < d->vgic_dist_base + KVM_VGIC_V3_DIST_SIZE);
}
+bool vgic_lpis_enabled(struct kvm_vcpu *vcpu);
int vgic_copy_lpi_list(struct kvm *kvm, struct kvm_vcpu *vcpu, u32 **intid_ptr);
int vgic_its_resolve_lpi(struct kvm *kvm, struct vgic_its *its,
u32 devid, u32 eventid, struct vgic_irq **irq);
@@ -317,6 +322,10 @@ void vgic_lpi_translation_cache_init(struct kvm *kvm);
void vgic_lpi_translation_cache_destroy(struct kvm *kvm);
void vgic_its_invalidate_cache(struct kvm *kvm);
+/* GICv4.1 MMIO interface */
+int vgic_its_inv_lpi(struct kvm *kvm, struct vgic_irq *irq);
+int vgic_its_invall(struct kvm_vcpu *vcpu);
+
bool vgic_supports_direct_msis(struct kvm *kvm);
int vgic_v4_init(struct kvm *kvm);
void vgic_v4_teardown(struct kvm *kvm);
diff --git a/arch/arm64/kvm/vmid.c b/arch/arm64/kvm/vmid.c
new file mode 100644
index 000000000000..d78ae63d7c15
--- /dev/null
+++ b/arch/arm64/kvm/vmid.c
@@ -0,0 +1,196 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * VMID allocator.
+ *
+ * Based on Arm64 ASID allocator algorithm.
+ * Please refer arch/arm64/mm/context.c for detailed
+ * comments on algorithm.
+ *
+ * Copyright (C) 2002-2003 Deep Blue Solutions Ltd, all rights reserved.
+ * Copyright (C) 2012 ARM Ltd.
+ */
+
+#include <linux/bitfield.h>
+#include <linux/bitops.h>
+
+#include <asm/kvm_asm.h>
+#include <asm/kvm_mmu.h>
+
+unsigned int kvm_arm_vmid_bits;
+static DEFINE_RAW_SPINLOCK(cpu_vmid_lock);
+
+static atomic64_t vmid_generation;
+static unsigned long *vmid_map;
+
+static DEFINE_PER_CPU(atomic64_t, active_vmids);
+static DEFINE_PER_CPU(u64, reserved_vmids);
+
+#define VMID_MASK (~GENMASK(kvm_arm_vmid_bits - 1, 0))
+#define VMID_FIRST_VERSION (1UL << kvm_arm_vmid_bits)
+
+#define NUM_USER_VMIDS VMID_FIRST_VERSION
+#define vmid2idx(vmid) ((vmid) & ~VMID_MASK)
+#define idx2vmid(idx) vmid2idx(idx)
+
+/*
+ * As vmid #0 is always reserved, we will never allocate one
+ * as below and can be treated as invalid. This is used to
+ * set the active_vmids on vCPU schedule out.
+ */
+#define VMID_ACTIVE_INVALID VMID_FIRST_VERSION
+
+#define vmid_gen_match(vmid) \
+ (!(((vmid) ^ atomic64_read(&vmid_generation)) >> kvm_arm_vmid_bits))
+
+static void flush_context(void)
+{
+ int cpu;
+ u64 vmid;
+
+ bitmap_clear(vmid_map, 0, NUM_USER_VMIDS);
+
+ for_each_possible_cpu(cpu) {
+ vmid = atomic64_xchg_relaxed(&per_cpu(active_vmids, cpu), 0);
+
+ /* Preserve reserved VMID */
+ if (vmid == 0)
+ vmid = per_cpu(reserved_vmids, cpu);
+ __set_bit(vmid2idx(vmid), vmid_map);
+ per_cpu(reserved_vmids, cpu) = vmid;
+ }
+
+ /*
+ * Unlike ASID allocator, we expect less frequent rollover in
+ * case of VMIDs. Hence, instead of marking the CPU as
+ * flush_pending and issuing a local context invalidation on
+ * the next context-switch, we broadcast TLB flush + I-cache
+ * invalidation over the inner shareable domain on rollover.
+ */
+ kvm_call_hyp(__kvm_flush_vm_context);
+}
+
+static bool check_update_reserved_vmid(u64 vmid, u64 newvmid)
+{
+ int cpu;
+ bool hit = false;
+
+ /*
+ * Iterate over the set of reserved VMIDs looking for a match
+ * and update to use newvmid (i.e. the same VMID in the current
+ * generation).
+ */
+ for_each_possible_cpu(cpu) {
+ if (per_cpu(reserved_vmids, cpu) == vmid) {
+ hit = true;
+ per_cpu(reserved_vmids, cpu) = newvmid;
+ }
+ }
+
+ return hit;
+}
+
+static u64 new_vmid(struct kvm_vmid *kvm_vmid)
+{
+ static u32 cur_idx = 1;
+ u64 vmid = atomic64_read(&kvm_vmid->id);
+ u64 generation = atomic64_read(&vmid_generation);
+
+ if (vmid != 0) {
+ u64 newvmid = generation | (vmid & ~VMID_MASK);
+
+ if (check_update_reserved_vmid(vmid, newvmid)) {
+ atomic64_set(&kvm_vmid->id, newvmid);
+ return newvmid;
+ }
+
+ if (!__test_and_set_bit(vmid2idx(vmid), vmid_map)) {
+ atomic64_set(&kvm_vmid->id, newvmid);
+ return newvmid;
+ }
+ }
+
+ vmid = find_next_zero_bit(vmid_map, NUM_USER_VMIDS, cur_idx);
+ if (vmid != NUM_USER_VMIDS)
+ goto set_vmid;
+
+ /* We're out of VMIDs, so increment the global generation count */
+ generation = atomic64_add_return_relaxed(VMID_FIRST_VERSION,
+ &vmid_generation);
+ flush_context();
+
+ /* We have more VMIDs than CPUs, so this will always succeed */
+ vmid = find_next_zero_bit(vmid_map, NUM_USER_VMIDS, 1);
+
+set_vmid:
+ __set_bit(vmid, vmid_map);
+ cur_idx = vmid;
+ vmid = idx2vmid(vmid) | generation;
+ atomic64_set(&kvm_vmid->id, vmid);
+ return vmid;
+}
+
+/* Called from vCPU sched out with preemption disabled */
+void kvm_arm_vmid_clear_active(void)
+{
+ atomic64_set(this_cpu_ptr(&active_vmids), VMID_ACTIVE_INVALID);
+}
+
+void kvm_arm_vmid_update(struct kvm_vmid *kvm_vmid)
+{
+ unsigned long flags;
+ u64 vmid, old_active_vmid;
+
+ vmid = atomic64_read(&kvm_vmid->id);
+
+ /*
+ * Please refer comments in check_and_switch_context() in
+ * arch/arm64/mm/context.c.
+ *
+ * Unlike ASID allocator, we set the active_vmids to
+ * VMID_ACTIVE_INVALID on vCPU schedule out to avoid
+ * reserving the VMID space needlessly on rollover.
+ * Hence explicitly check here for a "!= 0" to
+ * handle the sync with a concurrent rollover.
+ */
+ old_active_vmid = atomic64_read(this_cpu_ptr(&active_vmids));
+ if (old_active_vmid != 0 && vmid_gen_match(vmid) &&
+ 0 != atomic64_cmpxchg_relaxed(this_cpu_ptr(&active_vmids),
+ old_active_vmid, vmid))
+ return;
+
+ raw_spin_lock_irqsave(&cpu_vmid_lock, flags);
+
+ /* Check that our VMID belongs to the current generation. */
+ vmid = atomic64_read(&kvm_vmid->id);
+ if (!vmid_gen_match(vmid))
+ vmid = new_vmid(kvm_vmid);
+
+ atomic64_set(this_cpu_ptr(&active_vmids), vmid);
+ raw_spin_unlock_irqrestore(&cpu_vmid_lock, flags);
+}
+
+/*
+ * Initialize the VMID allocator
+ */
+int kvm_arm_vmid_alloc_init(void)
+{
+ kvm_arm_vmid_bits = kvm_get_vmid_bits();
+
+ /*
+ * Expect allocation after rollover to fail if we don't have
+ * at least one more VMID than CPUs. VMID #0 is always reserved.
+ */
+ WARN_ON(NUM_USER_VMIDS - 1 <= num_possible_cpus());
+ atomic64_set(&vmid_generation, VMID_FIRST_VERSION);
+ vmid_map = kcalloc(BITS_TO_LONGS(NUM_USER_VMIDS),
+ sizeof(*vmid_map), GFP_KERNEL);
+ if (!vmid_map)
+ return -ENOMEM;
+
+ return 0;
+}
+
+void kvm_arm_vmid_alloc_free(void)
+{
+ kfree(vmid_map);
+}