diff options
author | 2024-12-10 17:33:02 -0800 | |
---|---|---|
committer | 2025-02-12 10:16:33 -0800 | |
commit | 93da6af3ae563b7dbc60d97b5872109822d7bf0a (patch) | |
tree | c9bdd5e7369ec4de8e2198948391ae3949ca9b32 | |
parent | KVM: x86: Query X86_FEATURE_MWAIT iff userspace owns the CPUID feature bit (diff) | |
download | wireguard-linux-93da6af3ae563b7dbc60d97b5872109822d7bf0a.tar.xz wireguard-linux-93da6af3ae563b7dbc60d97b5872109822d7bf0a.zip |
KVM: x86: Defer runtime updates of dynamic CPUID bits until CPUID emulation
Defer runtime CPUID updates until the next non-faulting CPUID emulation
or KVM_GET_CPUID2, which are the only paths in KVM that consume the
dynamic entries. Deferring the updates is especially beneficial to
nested VM-Enter/VM-Exit, as KVM will almost always detect multiple state
changes, not to mention the updates don't need to be realized while L2 is
active if CPUID is being intercepted by L1 (CPUID is a mandatory intercept
on Intel, but not AMD).
Deferring CPUID updates shaves several hundred cycles from nested VMX
roundtrips, as measured from L2 executing CPUID in a tight loop:
SKX 6850 => 6450
ICX 9000 => 8800
EMR 7900 => 7700
Alternatively, KVM could update only the CPUID leaves that are affected
by the state change, e.g. update XSAVE info only if XCR0 or XSS changes,
but that adds non-trivial complexity and doesn't solve the underlying
problem of nested transitions potentially changing both XCR0 and XSS, on
both nested VM-Enter and VM-Exit.
Skipping updates entirely if L2 is active and CPUID is being intercepted
by L1 could work for the common case. However, simply skipping updates if
L2 is active is *very* subtly dangerous and complex. Most KVM updates are
triggered by changes to the current vCPU state, which may be L2 state,
whereas performing updates only for L1 would requiring detecting changes
to L1 state. KVM would need to either track relevant L1 state, or defer
runtime CPUID updates until the next nested VM-Exit. The former is ugly
and complex, while the latter comes with similar dangers to deferring all
CPUID updates, and would only address the nested VM-Enter path.
To guard against using stale data, disallow querying dynamic CPUID feature
bits, i.e. features that KVM updates at runtime, via a compile-time
assertion in guest_cpu_cap_has(). Exempt MWAIT from the rule, as the
MISC_ENABLE_NO_MWAIT means that MWAIT is _conditionally_ a dynamic CPUID
feature.
Note, the rule could be enforced for MWAIT as well, e.g. by querying guest
CPUID in kvm_emulate_monitor_mwait, but there's no obvious advtantage to
doing so, and allowing MWAIT for guest_cpuid_has() opens up a different can
of worms. MONITOR/MWAIT can't be virtualized (for a reasonable definition),
and the nature of the MWAIT_NEVER_UD_FAULTS and MISC_ENABLE_NO_MWAIT quirks
means checking X86_FEATURE_MWAIT outside of kvm_emulate_monitor_mwait() is
wrong for other reasons.
Beyond the aforementioned feature bits, the only other dynamic CPUID
(sub)leaves are the XSAVE sizes, and similar to MWAIT, consuming those
CPUID entries in KVM is all but guaranteed to be a bug. The layout for an
actual XSAVE buffer depends on the format (compacted or not) and
potentially the features that are actually enabled. E.g. see the logic in
fpstate_clear_xstate_component() needed to poke into the guest's effective
XSAVE state to clear MPX state on INIT. KVM does consume
CPUID.0xD.0.{EAX,EDX} in kvm_check_cpuid() and cpuid_get_supported_xcr0(),
but not EBX, which is the only dynamic output register in the leaf.
Link: https://lore.kernel.org/r/20241211013302.1347853-6-seanjc@google.com
Signed-off-by: Sean Christopherson <seanjc@google.com>
Diffstat (limited to '')
-rw-r--r-- | arch/x86/include/asm/kvm_host.h | 1 | ||||
-rw-r--r-- | arch/x86/kvm/cpuid.c | 12 | ||||
-rw-r--r-- | arch/x86/kvm/cpuid.h | 9 | ||||
-rw-r--r-- | arch/x86/kvm/lapic.c | 2 | ||||
-rw-r--r-- | arch/x86/kvm/smm.c | 2 | ||||
-rw-r--r-- | arch/x86/kvm/svm/sev.c | 2 | ||||
-rw-r--r-- | arch/x86/kvm/svm/svm.c | 2 | ||||
-rw-r--r-- | arch/x86/kvm/vmx/vmx.c | 2 | ||||
-rw-r--r-- | arch/x86/kvm/x86.c | 6 |
9 files changed, 27 insertions, 11 deletions
diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h index c4e844b60b0f..94b14b3a3b8d 100644 --- a/arch/x86/include/asm/kvm_host.h +++ b/arch/x86/include/asm/kvm_host.h @@ -879,6 +879,7 @@ struct kvm_vcpu_arch { int cpuid_nent; struct kvm_cpuid_entry2 *cpuid_entries; + bool cpuid_dynamic_bits_dirty; bool is_amd_compatible; /* diff --git a/arch/x86/kvm/cpuid.c b/arch/x86/kvm/cpuid.c index 0de3f581ef12..97a90689a9dc 100644 --- a/arch/x86/kvm/cpuid.c +++ b/arch/x86/kvm/cpuid.c @@ -195,6 +195,7 @@ static int kvm_check_cpuid(struct kvm_vcpu *vcpu) } static u32 kvm_apply_cpuid_pv_features_quirk(struct kvm_vcpu *vcpu); +static void kvm_update_cpuid_runtime(struct kvm_vcpu *vcpu); /* Check whether the supplied CPUID data is equal to what is already set for the vCPU. */ static int kvm_cpuid_check_equal(struct kvm_vcpu *vcpu, struct kvm_cpuid_entry2 *e2, @@ -299,10 +300,12 @@ static __always_inline void kvm_update_feature_runtime(struct kvm_vcpu *vcpu, guest_cpu_cap_change(vcpu, x86_feature, has_feature); } -void kvm_update_cpuid_runtime(struct kvm_vcpu *vcpu) +static void kvm_update_cpuid_runtime(struct kvm_vcpu *vcpu) { struct kvm_cpuid_entry2 *best; + vcpu->arch.cpuid_dynamic_bits_dirty = false; + best = kvm_find_cpuid_entry(vcpu, 1); if (best) { kvm_update_feature_runtime(vcpu, best, X86_FEATURE_OSXSAVE, @@ -332,7 +335,6 @@ void kvm_update_cpuid_runtime(struct kvm_vcpu *vcpu) cpuid_entry_has(best, X86_FEATURE_XSAVEC))) best->ebx = xstate_required_size(vcpu->arch.xcr0, true); } -EXPORT_SYMBOL_GPL(kvm_update_cpuid_runtime); static bool kvm_cpuid_has_hyperv(struct kvm_vcpu *vcpu) { @@ -645,6 +647,9 @@ int kvm_vcpu_ioctl_get_cpuid2(struct kvm_vcpu *vcpu, if (cpuid->nent < vcpu->arch.cpuid_nent) return -E2BIG; + if (vcpu->arch.cpuid_dynamic_bits_dirty) + kvm_update_cpuid_runtime(vcpu); + if (copy_to_user(entries, vcpu->arch.cpuid_entries, vcpu->arch.cpuid_nent * sizeof(struct kvm_cpuid_entry2))) return -EFAULT; @@ -1984,6 +1989,9 @@ bool kvm_cpuid(struct kvm_vcpu *vcpu, u32 *eax, u32 *ebx, struct kvm_cpuid_entry2 *entry; bool exact, used_max_basic = false; + if (vcpu->arch.cpuid_dynamic_bits_dirty) + kvm_update_cpuid_runtime(vcpu); + entry = kvm_find_cpuid_entry_index(vcpu, function, index); exact = !!entry; diff --git a/arch/x86/kvm/cpuid.h b/arch/x86/kvm/cpuid.h index 67d80aa72d50..d2884162a46a 100644 --- a/arch/x86/kvm/cpuid.h +++ b/arch/x86/kvm/cpuid.h @@ -11,7 +11,6 @@ extern u32 kvm_cpu_caps[NR_KVM_CPU_CAPS] __read_mostly; void kvm_set_cpu_caps(void); void kvm_vcpu_after_set_cpuid(struct kvm_vcpu *vcpu); -void kvm_update_cpuid_runtime(struct kvm_vcpu *vcpu); struct kvm_cpuid_entry2 *kvm_find_cpuid_entry_index(struct kvm_vcpu *vcpu, u32 function, u32 index); struct kvm_cpuid_entry2 *kvm_find_cpuid_entry(struct kvm_vcpu *vcpu, @@ -232,6 +231,14 @@ static __always_inline bool guest_cpu_cap_has(struct kvm_vcpu *vcpu, { unsigned int x86_leaf = __feature_leaf(x86_feature); + /* + * Except for MWAIT, querying dynamic feature bits is disallowed, so + * that KVM can defer runtime updates until the next CPUID emulation. + */ + BUILD_BUG_ON(x86_feature == X86_FEATURE_APIC || + x86_feature == X86_FEATURE_OSXSAVE || + x86_feature == X86_FEATURE_OSPKE); + return vcpu->arch.cpu_caps[x86_leaf] & __feature_bit(x86_feature); } diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c index 750bb89b20ac..9dbc0f5d9865 100644 --- a/arch/x86/kvm/lapic.c +++ b/arch/x86/kvm/lapic.c @@ -2586,7 +2586,7 @@ static void __kvm_apic_set_base(struct kvm_vcpu *vcpu, u64 value) vcpu->arch.apic_base = value; if ((old_value ^ value) & MSR_IA32_APICBASE_ENABLE) - kvm_update_cpuid_runtime(vcpu); + vcpu->arch.cpuid_dynamic_bits_dirty = true; if (!apic) return; diff --git a/arch/x86/kvm/smm.c b/arch/x86/kvm/smm.c index e0ab7df27b66..699e551ec93b 100644 --- a/arch/x86/kvm/smm.c +++ b/arch/x86/kvm/smm.c @@ -358,7 +358,7 @@ void enter_smm(struct kvm_vcpu *vcpu) goto error; #endif - kvm_update_cpuid_runtime(vcpu); + vcpu->arch.cpuid_dynamic_bits_dirty = true; kvm_mmu_reset_context(vcpu); return; error: diff --git a/arch/x86/kvm/svm/sev.c b/arch/x86/kvm/svm/sev.c index 8bc62e994138..42a6d8bb1024 100644 --- a/arch/x86/kvm/svm/sev.c +++ b/arch/x86/kvm/svm/sev.c @@ -3261,7 +3261,7 @@ static void sev_es_sync_from_ghcb(struct vcpu_svm *svm) if (kvm_ghcb_xcr0_is_valid(svm)) { vcpu->arch.xcr0 = ghcb_get_xcr0(ghcb); - kvm_update_cpuid_runtime(vcpu); + vcpu->arch.cpuid_dynamic_bits_dirty = true; } /* Copy the GHCB exit information into the VMCB fields */ diff --git a/arch/x86/kvm/svm/svm.c b/arch/x86/kvm/svm/svm.c index 7640a84e554a..88dd61389a2d 100644 --- a/arch/x86/kvm/svm/svm.c +++ b/arch/x86/kvm/svm/svm.c @@ -1932,7 +1932,7 @@ void svm_set_cr4(struct kvm_vcpu *vcpu, unsigned long cr4) vmcb_mark_dirty(to_svm(vcpu)->vmcb, VMCB_CR); if ((cr4 ^ old_cr4) & (X86_CR4_OSXSAVE | X86_CR4_PKE)) - kvm_update_cpuid_runtime(vcpu); + vcpu->arch.cpuid_dynamic_bits_dirty = true; } static void svm_set_segment(struct kvm_vcpu *vcpu, diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c index f72835e85b6d..b60de7f8e563 100644 --- a/arch/x86/kvm/vmx/vmx.c +++ b/arch/x86/kvm/vmx/vmx.c @@ -3523,7 +3523,7 @@ void vmx_set_cr4(struct kvm_vcpu *vcpu, unsigned long cr4) vmcs_writel(GUEST_CR4, hw_cr4); if ((cr4 ^ old_cr4) & (X86_CR4_OSXSAVE | X86_CR4_PKE)) - kvm_update_cpuid_runtime(vcpu); + vcpu->arch.cpuid_dynamic_bits_dirty = true; } void vmx_get_segment(struct kvm_vcpu *vcpu, struct kvm_segment *var, int seg) diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 9883f0565baf..263ab2ec6f71 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -1264,7 +1264,7 @@ static int __kvm_set_xcr(struct kvm_vcpu *vcpu, u32 index, u64 xcr) vcpu->arch.xcr0 = xcr0; if ((xcr0 ^ old_xcr0) & XFEATURE_MASK_EXTEND) - kvm_update_cpuid_runtime(vcpu); + vcpu->arch.cpuid_dynamic_bits_dirty = true; return 0; } @@ -3899,7 +3899,7 @@ int kvm_set_msr_common(struct kvm_vcpu *vcpu, struct msr_data *msr_info) if (!guest_cpu_cap_has(vcpu, X86_FEATURE_XMM3)) return 1; vcpu->arch.ia32_misc_enable_msr = data; - kvm_update_cpuid_runtime(vcpu); + vcpu->arch.cpuid_dynamic_bits_dirty = true; } else { vcpu->arch.ia32_misc_enable_msr = data; } @@ -3934,7 +3934,7 @@ int kvm_set_msr_common(struct kvm_vcpu *vcpu, struct msr_data *msr_info) if (data & ~kvm_caps.supported_xss) return 1; vcpu->arch.ia32_xss = data; - kvm_update_cpuid_runtime(vcpu); + vcpu->arch.cpuid_dynamic_bits_dirty = true; break; case MSR_SMI_COUNT: if (!msr_info->host_initiated) |