aboutsummaryrefslogtreecommitdiffstatshomepage
path: root/arch
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2023-03-16 11:32:12 -0700
committerLinus Torvalds <torvalds@linux-foundation.org>2023-03-16 11:32:12 -0700
commit0ddc84d2dd43e2c2c3f634baa05ea10abd31197e (patch)
tree8f49aea42e45692574290f7b2e12022e32409521 /arch
parentMerge tag 'linux-kselftest-fixes-6.3-rc3' of git://git.kernel.org/pub/scm/linux/kernel/git/shuah/linux-kselftest (diff)
parentKVM: selftests: Sync KVM exit reasons in selftests (diff)
downloadwireguard-linux-0ddc84d2dd43e2c2c3f634baa05ea10abd31197e.tar.xz
wireguard-linux-0ddc84d2dd43e2c2c3f634baa05ea10abd31197e.zip
Merge tag 'for-linus' of git://git.kernel.org/pub/scm/virt/kvm/kvm
Pull kvm fixes from Paolo Bonzini: "ARM64: - Address a rather annoying bug w.r.t. guest timer offsetting. The synchronization of timer offsets between vCPUs was broken, leading to inconsistent timer reads within the VM. x86: - New tests for the slow path of the EVTCHNOP_send Xen hypercall - Add missing nVMX consistency checks for CR0 and CR4 - Fix bug that broke AMD GATag on 512 vCPU machines Selftests: - Skip hugetlb tests if huge pages are not available - Sync KVM exit reasons" * tag 'for-linus' of git://git.kernel.org/pub/scm/virt/kvm/kvm: KVM: selftests: Sync KVM exit reasons in selftests KVM: selftests: Add macro to generate KVM exit reason strings KVM: selftests: Print expected and actual exit reason in KVM exit reason assert KVM: selftests: Make vCPU exit reason test assertion common KVM: selftests: Add EVTCHNOP_send slow path test to xen_shinfo_test KVM: selftests: Use enum for test numbers in xen_shinfo_test KVM: selftests: Add helpers to make Xen-style VMCALL/VMMCALL hypercalls KVM: selftests: Move the guts of kvm_hypercall() to a separate macro KVM: SVM: WARN if GATag generation drops VM or vCPU ID information KVM: SVM: Modify AVIC GATag to support max number of 512 vCPUs KVM: SVM: Fix a benign off-by-one bug in AVIC physical table mask selftests: KVM: skip hugetlb tests if huge pages are not available KVM: VMX: Use tabs instead of spaces for indentation KVM: VMX: Fix indentation coding style issue KVM: nVMX: remove unnecessary #ifdef KVM: nVMX: add missing consistency checks for CR0 and CR4 KVM: arm64: timers: Convert per-vcpu virtual offset to a global value
Diffstat (limited to 'arch')
-rw-r--r--arch/arm64/include/asm/kvm_host.h3
-rw-r--r--arch/arm64/kvm/arch_timer.c45
-rw-r--r--arch/arm64/kvm/hypercalls.c2
-rw-r--r--arch/x86/include/asm/svm.h12
-rw-r--r--arch/x86/kvm/svm/avic.c37
-rw-r--r--arch/x86/kvm/vmx/nested.c18
-rw-r--r--arch/x86/kvm/vmx/vmenter.S4
-rw-r--r--arch/x86/kvm/vmx/vmx.c12
8 files changed, 66 insertions, 67 deletions
diff --git a/arch/arm64/include/asm/kvm_host.h b/arch/arm64/include/asm/kvm_host.h
index a1892a8f6032..bcd774d74f34 100644
--- a/arch/arm64/include/asm/kvm_host.h
+++ b/arch/arm64/include/asm/kvm_host.h
@@ -193,6 +193,9 @@ struct kvm_arch {
/* Interrupt controller */
struct vgic_dist vgic;
+ /* Timers */
+ struct arch_timer_vm_data timer_data;
+
/* Mandated version of PSCI */
u32 psci_version;
diff --git a/arch/arm64/kvm/arch_timer.c b/arch/arm64/kvm/arch_timer.c
index 00610477ec7b..e1af4301b913 100644
--- a/arch/arm64/kvm/arch_timer.c
+++ b/arch/arm64/kvm/arch_timer.c
@@ -84,14 +84,10 @@ u64 timer_get_cval(struct arch_timer_context *ctxt)
static u64 timer_get_offset(struct arch_timer_context *ctxt)
{
- struct kvm_vcpu *vcpu = ctxt->vcpu;
+ if (ctxt->offset.vm_offset)
+ return *ctxt->offset.vm_offset;
- switch(arch_timer_ctx_index(ctxt)) {
- case TIMER_VTIMER:
- return __vcpu_sys_reg(vcpu, CNTVOFF_EL2);
- default:
- return 0;
- }
+ return 0;
}
static void timer_set_ctl(struct arch_timer_context *ctxt, u32 ctl)
@@ -128,15 +124,12 @@ static void timer_set_cval(struct arch_timer_context *ctxt, u64 cval)
static void timer_set_offset(struct arch_timer_context *ctxt, u64 offset)
{
- struct kvm_vcpu *vcpu = ctxt->vcpu;
-
- switch(arch_timer_ctx_index(ctxt)) {
- case TIMER_VTIMER:
- __vcpu_sys_reg(vcpu, CNTVOFF_EL2) = offset;
- break;
- default:
+ if (!ctxt->offset.vm_offset) {
WARN(offset, "timer %ld\n", arch_timer_ctx_index(ctxt));
+ return;
}
+
+ WRITE_ONCE(*ctxt->offset.vm_offset, offset);
}
u64 kvm_phys_timer_read(void)
@@ -765,25 +758,6 @@ int kvm_timer_vcpu_reset(struct kvm_vcpu *vcpu)
return 0;
}
-/* Make the updates of cntvoff for all vtimer contexts atomic */
-static void update_vtimer_cntvoff(struct kvm_vcpu *vcpu, u64 cntvoff)
-{
- unsigned long i;
- struct kvm *kvm = vcpu->kvm;
- struct kvm_vcpu *tmp;
-
- mutex_lock(&kvm->lock);
- kvm_for_each_vcpu(i, tmp, kvm)
- timer_set_offset(vcpu_vtimer(tmp), cntvoff);
-
- /*
- * When called from the vcpu create path, the CPU being created is not
- * included in the loop above, so we just set it here as well.
- */
- timer_set_offset(vcpu_vtimer(vcpu), cntvoff);
- mutex_unlock(&kvm->lock);
-}
-
void kvm_timer_vcpu_init(struct kvm_vcpu *vcpu)
{
struct arch_timer_cpu *timer = vcpu_timer(vcpu);
@@ -791,10 +765,11 @@ void kvm_timer_vcpu_init(struct kvm_vcpu *vcpu)
struct arch_timer_context *ptimer = vcpu_ptimer(vcpu);
vtimer->vcpu = vcpu;
+ vtimer->offset.vm_offset = &vcpu->kvm->arch.timer_data.voffset;
ptimer->vcpu = vcpu;
/* Synchronize cntvoff across all vtimers of a VM. */
- update_vtimer_cntvoff(vcpu, kvm_phys_timer_read());
+ timer_set_offset(vtimer, kvm_phys_timer_read());
timer_set_offset(ptimer, 0);
hrtimer_init(&timer->bg_timer, CLOCK_MONOTONIC, HRTIMER_MODE_ABS_HARD);
@@ -840,7 +815,7 @@ int kvm_arm_timer_set_reg(struct kvm_vcpu *vcpu, u64 regid, u64 value)
break;
case KVM_REG_ARM_TIMER_CNT:
timer = vcpu_vtimer(vcpu);
- update_vtimer_cntvoff(vcpu, kvm_phys_timer_read() - value);
+ timer_set_offset(timer, kvm_phys_timer_read() - value);
break;
case KVM_REG_ARM_TIMER_CVAL:
timer = vcpu_vtimer(vcpu);
diff --git a/arch/arm64/kvm/hypercalls.c b/arch/arm64/kvm/hypercalls.c
index 64c086c02c60..5da884e11337 100644
--- a/arch/arm64/kvm/hypercalls.c
+++ b/arch/arm64/kvm/hypercalls.c
@@ -44,7 +44,7 @@ static void kvm_ptp_get_time(struct kvm_vcpu *vcpu, u64 *val)
feature = smccc_get_arg1(vcpu);
switch (feature) {
case KVM_PTP_VIRT_COUNTER:
- cycles = systime_snapshot.cycles - vcpu_read_sys_reg(vcpu, CNTVOFF_EL2);
+ cycles = systime_snapshot.cycles - vcpu->kvm->arch.timer_data.voffset;
break;
case KVM_PTP_PHYS_COUNTER:
cycles = systime_snapshot.cycles;
diff --git a/arch/x86/include/asm/svm.h b/arch/x86/include/asm/svm.h
index cb1ee53ad3b1..770dcf75eaa9 100644
--- a/arch/x86/include/asm/svm.h
+++ b/arch/x86/include/asm/svm.h
@@ -261,20 +261,22 @@ enum avic_ipi_failure_cause {
AVIC_IPI_FAILURE_INVALID_BACKING_PAGE,
};
-#define AVIC_PHYSICAL_MAX_INDEX_MASK GENMASK_ULL(9, 0)
+#define AVIC_PHYSICAL_MAX_INDEX_MASK GENMASK_ULL(8, 0)
/*
- * For AVIC, the max index allowed for physical APIC ID
- * table is 0xff (255).
+ * For AVIC, the max index allowed for physical APIC ID table is 0xfe (254), as
+ * 0xff is a broadcast to all CPUs, i.e. can't be targeted individually.
*/
#define AVIC_MAX_PHYSICAL_ID 0XFEULL
/*
- * For x2AVIC, the max index allowed for physical APIC ID
- * table is 0x1ff (511).
+ * For x2AVIC, the max index allowed for physical APIC ID table is 0x1ff (511).
*/
#define X2AVIC_MAX_PHYSICAL_ID 0x1FFUL
+static_assert((AVIC_MAX_PHYSICAL_ID & AVIC_PHYSICAL_MAX_INDEX_MASK) == AVIC_MAX_PHYSICAL_ID);
+static_assert((X2AVIC_MAX_PHYSICAL_ID & AVIC_PHYSICAL_MAX_INDEX_MASK) == X2AVIC_MAX_PHYSICAL_ID);
+
#define AVIC_HPA_MASK ~((0xFFFULL << 52) | 0xFFF)
#define VMCB_AVIC_APIC_BAR_MASK 0xFFFFFFFFFF000ULL
diff --git a/arch/x86/kvm/svm/avic.c b/arch/x86/kvm/svm/avic.c
index ca684979e90d..cfc8ab773025 100644
--- a/arch/x86/kvm/svm/avic.c
+++ b/arch/x86/kvm/svm/avic.c
@@ -27,19 +27,38 @@
#include "irq.h"
#include "svm.h"
-/* AVIC GATAG is encoded using VM and VCPU IDs */
-#define AVIC_VCPU_ID_BITS 8
-#define AVIC_VCPU_ID_MASK ((1 << AVIC_VCPU_ID_BITS) - 1)
+/*
+ * Encode the arbitrary VM ID and the vCPU's default APIC ID, i.e the vCPU ID,
+ * into the GATag so that KVM can retrieve the correct vCPU from a GALog entry
+ * if an interrupt can't be delivered, e.g. because the vCPU isn't running.
+ *
+ * For the vCPU ID, use however many bits are currently allowed for the max
+ * guest physical APIC ID (limited by the size of the physical ID table), and
+ * use whatever bits remain to assign arbitrary AVIC IDs to VMs. Note, the
+ * size of the GATag is defined by hardware (32 bits), but is an opaque value
+ * as far as hardware is concerned.
+ */
+#define AVIC_VCPU_ID_MASK AVIC_PHYSICAL_MAX_INDEX_MASK
-#define AVIC_VM_ID_BITS 24
-#define AVIC_VM_ID_NR (1 << AVIC_VM_ID_BITS)
-#define AVIC_VM_ID_MASK ((1 << AVIC_VM_ID_BITS) - 1)
+#define AVIC_VM_ID_SHIFT HWEIGHT32(AVIC_PHYSICAL_MAX_INDEX_MASK)
+#define AVIC_VM_ID_MASK (GENMASK(31, AVIC_VM_ID_SHIFT) >> AVIC_VM_ID_SHIFT)
-#define AVIC_GATAG(x, y) (((x & AVIC_VM_ID_MASK) << AVIC_VCPU_ID_BITS) | \
- (y & AVIC_VCPU_ID_MASK))
-#define AVIC_GATAG_TO_VMID(x) ((x >> AVIC_VCPU_ID_BITS) & AVIC_VM_ID_MASK)
+#define AVIC_GATAG_TO_VMID(x) ((x >> AVIC_VM_ID_SHIFT) & AVIC_VM_ID_MASK)
#define AVIC_GATAG_TO_VCPUID(x) (x & AVIC_VCPU_ID_MASK)
+#define __AVIC_GATAG(vm_id, vcpu_id) ((((vm_id) & AVIC_VM_ID_MASK) << AVIC_VM_ID_SHIFT) | \
+ ((vcpu_id) & AVIC_VCPU_ID_MASK))
+#define AVIC_GATAG(vm_id, vcpu_id) \
+({ \
+ u32 ga_tag = __AVIC_GATAG(vm_id, vcpu_id); \
+ \
+ WARN_ON_ONCE(AVIC_GATAG_TO_VCPUID(ga_tag) != (vcpu_id)); \
+ WARN_ON_ONCE(AVIC_GATAG_TO_VMID(ga_tag) != (vm_id)); \
+ ga_tag; \
+})
+
+static_assert(__AVIC_GATAG(AVIC_VM_ID_MASK, AVIC_VCPU_ID_MASK) == -1u);
+
static bool force_avic;
module_param_unsafe(force_avic, bool, 0444);
diff --git a/arch/x86/kvm/vmx/nested.c b/arch/x86/kvm/vmx/nested.c
index 7c4f5ca405c7..1bc2b80273c9 100644
--- a/arch/x86/kvm/vmx/nested.c
+++ b/arch/x86/kvm/vmx/nested.c
@@ -2903,7 +2903,7 @@ static int nested_vmx_check_address_space_size(struct kvm_vcpu *vcpu,
static int nested_vmx_check_host_state(struct kvm_vcpu *vcpu,
struct vmcs12 *vmcs12)
{
- bool ia32e;
+ bool ia32e = !!(vmcs12->vm_exit_controls & VM_EXIT_HOST_ADDR_SPACE_SIZE);
if (CC(!nested_host_cr0_valid(vcpu, vmcs12->host_cr0)) ||
CC(!nested_host_cr4_valid(vcpu, vmcs12->host_cr4)) ||
@@ -2923,12 +2923,6 @@ static int nested_vmx_check_host_state(struct kvm_vcpu *vcpu,
vmcs12->host_ia32_perf_global_ctrl)))
return -EINVAL;
-#ifdef CONFIG_X86_64
- ia32e = !!(vmcs12->vm_exit_controls & VM_EXIT_HOST_ADDR_SPACE_SIZE);
-#else
- ia32e = false;
-#endif
-
if (ia32e) {
if (CC(!(vmcs12->host_cr4 & X86_CR4_PAE)))
return -EINVAL;
@@ -3022,7 +3016,7 @@ static int nested_vmx_check_guest_state(struct kvm_vcpu *vcpu,
struct vmcs12 *vmcs12,
enum vm_entry_failure_code *entry_failure_code)
{
- bool ia32e;
+ bool ia32e = !!(vmcs12->vm_entry_controls & VM_ENTRY_IA32E_MODE);
*entry_failure_code = ENTRY_FAIL_DEFAULT;
@@ -3048,6 +3042,13 @@ static int nested_vmx_check_guest_state(struct kvm_vcpu *vcpu,
vmcs12->guest_ia32_perf_global_ctrl)))
return -EINVAL;
+ if (CC((vmcs12->guest_cr0 & (X86_CR0_PG | X86_CR0_PE)) == X86_CR0_PG))
+ return -EINVAL;
+
+ if (CC(ia32e && !(vmcs12->guest_cr4 & X86_CR4_PAE)) ||
+ CC(ia32e && !(vmcs12->guest_cr0 & X86_CR0_PG)))
+ return -EINVAL;
+
/*
* If the load IA32_EFER VM-entry control is 1, the following checks
* are performed on the field for the IA32_EFER MSR:
@@ -3059,7 +3060,6 @@ static int nested_vmx_check_guest_state(struct kvm_vcpu *vcpu,
*/
if (to_vmx(vcpu)->nested.nested_run_pending &&
(vmcs12->vm_entry_controls & VM_ENTRY_LOAD_IA32_EFER)) {
- ia32e = (vmcs12->vm_entry_controls & VM_ENTRY_IA32E_MODE) != 0;
if (CC(!kvm_valid_efer(vcpu, vmcs12->guest_ia32_efer)) ||
CC(ia32e != !!(vmcs12->guest_ia32_efer & EFER_LMA)) ||
CC(((vmcs12->guest_cr0 & X86_CR0_PG) &&
diff --git a/arch/x86/kvm/vmx/vmenter.S b/arch/x86/kvm/vmx/vmenter.S
index f550540ed54e..631fd7da2bc3 100644
--- a/arch/x86/kvm/vmx/vmenter.S
+++ b/arch/x86/kvm/vmx/vmenter.S
@@ -262,7 +262,7 @@ SYM_INNER_LABEL(vmx_vmexit, SYM_L_GLOBAL)
* eIBRS has its own protection against poisoned RSB, so it doesn't
* need the RSB filling sequence. But it does need to be enabled, and a
* single call to retire, before the first unbalanced RET.
- */
+ */
FILL_RETURN_BUFFER %_ASM_CX, RSB_CLEAR_LOOPS, X86_FEATURE_RSB_VMEXIT,\
X86_FEATURE_RSB_VMEXIT_LITE
@@ -311,7 +311,7 @@ SYM_FUNC_END(vmx_do_nmi_irqoff)
* vmread_error_trampoline - Trampoline from inline asm to vmread_error()
* @field: VMCS field encoding that failed
* @fault: %true if the VMREAD faulted, %false if it failed
-
+ *
* Save and restore volatile registers across a call to vmread_error(). Note,
* all parameters are passed on the stack.
*/
diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c
index bcac3efcde41..d2d6e1b6c788 100644
--- a/arch/x86/kvm/vmx/vmx.c
+++ b/arch/x86/kvm/vmx/vmx.c
@@ -874,7 +874,7 @@ void vmx_update_exception_bitmap(struct kvm_vcpu *vcpu)
*/
if (is_guest_mode(vcpu))
eb |= get_vmcs12(vcpu)->exception_bitmap;
- else {
+ else {
int mask = 0, match = 0;
if (enable_ept && (eb & (1u << PF_VECTOR))) {
@@ -1282,7 +1282,7 @@ void vmx_prepare_switch_to_guest(struct kvm_vcpu *vcpu)
}
}
- if (vmx->nested.need_vmcs12_to_shadow_sync)
+ if (vmx->nested.need_vmcs12_to_shadow_sync)
nested_sync_vmcs12_to_shadow(vcpu);
if (vmx->guest_state_loaded)
@@ -5049,10 +5049,10 @@ static int vmx_interrupt_allowed(struct kvm_vcpu *vcpu, bool for_injection)
if (to_vmx(vcpu)->nested.nested_run_pending)
return -EBUSY;
- /*
- * An IRQ must not be injected into L2 if it's supposed to VM-Exit,
- * e.g. if the IRQ arrived asynchronously after checking nested events.
- */
+ /*
+ * An IRQ must not be injected into L2 if it's supposed to VM-Exit,
+ * e.g. if the IRQ arrived asynchronously after checking nested events.
+ */
if (for_injection && is_guest_mode(vcpu) && nested_exit_on_intr(vcpu))
return -EBUSY;