aboutsummaryrefslogtreecommitdiffstatshomepage
path: root/arch/x86/kvm/cpuid.c
diff options
context:
space:
mode:
Diffstat (limited to 'arch/x86/kvm/cpuid.c')
-rw-r--r--arch/x86/kvm/cpuid.c441
1 files changed, 291 insertions, 150 deletions
diff --git a/arch/x86/kvm/cpuid.c b/arch/x86/kvm/cpuid.c
index de6d44e07e34..adba49afb5fe 100644
--- a/arch/x86/kvm/cpuid.c
+++ b/arch/x86/kvm/cpuid.c
@@ -8,8 +8,10 @@
* Copyright 2011 Red Hat, Inc. and/or its affiliates.
* Copyright IBM Corporation, 2008
*/
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
#include <linux/kvm_host.h>
+#include "linux/lockdep.h"
#include <linux/export.h>
#include <linux/vmalloc.h>
#include <linux/uaccess.h>
@@ -25,6 +27,7 @@
#include "mmu.h"
#include "trace.h"
#include "pmu.h"
+#include "xen.h"
/*
* Unlike "struct cpuinfo_x86.x86_capability", kvm_cpu_caps doesn't need to be
@@ -58,28 +61,70 @@ u32 xstate_required_size(u64 xstate_bv, bool compacted)
return ret;
}
-/*
- * This one is tied to SSB in the user API, and not
- * visible in /proc/cpuinfo.
- */
-#define KVM_X86_FEATURE_PSFD (13*32+28) /* Predictive Store Forwarding Disable */
-
#define F feature_bit
-#define SF(name) (boot_cpu_has(X86_FEATURE_##name) ? F(name) : 0)
+/* Scattered Flag - For features that are scattered by cpufeatures.h. */
+#define SF(name) \
+({ \
+ BUILD_BUG_ON(X86_FEATURE_##name >= MAX_CPU_FEATURES); \
+ (boot_cpu_has(X86_FEATURE_##name) ? F(name) : 0); \
+})
+
+/*
+ * Magic value used by KVM when querying userspace-provided CPUID entries and
+ * doesn't care about the CPIUD index because the index of the function in
+ * question is not significant. Note, this magic value must have at least one
+ * bit set in bits[63:32] and must be consumed as a u64 by cpuid_entry2_find()
+ * to avoid false positives when processing guest CPUID input.
+ */
+#define KVM_CPUID_INDEX_NOT_SIGNIFICANT -1ull
static inline struct kvm_cpuid_entry2 *cpuid_entry2_find(
- struct kvm_cpuid_entry2 *entries, int nent, u32 function, u32 index)
+ struct kvm_cpuid_entry2 *entries, int nent, u32 function, u64 index)
{
struct kvm_cpuid_entry2 *e;
int i;
+ /*
+ * KVM has a semi-arbitrary rule that querying the guest's CPUID model
+ * with IRQs disabled is disallowed. The CPUID model can legitimately
+ * have over one hundred entries, i.e. the lookup is slow, and IRQs are
+ * typically disabled in KVM only when KVM is in a performance critical
+ * path, e.g. the core VM-Enter/VM-Exit run loop. Nothing will break
+ * if this rule is violated, this assertion is purely to flag potential
+ * performance issues. If this fires, consider moving the lookup out
+ * of the hotpath, e.g. by caching information during CPUID updates.
+ */
+ lockdep_assert_irqs_enabled();
+
for (i = 0; i < nent; i++) {
e = &entries[i];
- if (e->function == function &&
- (!(e->flags & KVM_CPUID_FLAG_SIGNIFCANT_INDEX) || e->index == index))
+ if (e->function != function)
+ continue;
+
+ /*
+ * If the index isn't significant, use the first entry with a
+ * matching function. It's userspace's responsibility to not
+ * provide "duplicate" entries in all cases.
+ */
+ if (!(e->flags & KVM_CPUID_FLAG_SIGNIFCANT_INDEX) || e->index == index)
return e;
+
+
+ /*
+ * Similarly, use the first matching entry if KVM is doing a
+ * lookup (as opposed to emulating CPUID) for a function that's
+ * architecturally defined as not having a significant index.
+ */
+ if (index == KVM_CPUID_INDEX_NOT_SIGNIFICANT) {
+ /*
+ * Direct lookups from KVM should not diverge from what
+ * KVM defines internally (the architectural behavior).
+ */
+ WARN_ON_ONCE(cpuid_function_is_indexed(function));
+ return e;
+ }
}
return NULL;
@@ -96,7 +141,8 @@ static int kvm_check_cpuid(struct kvm_vcpu *vcpu,
* The existing code assumes virtual address is 48-bit or 57-bit in the
* canonical address checks; exit if it is ever changed.
*/
- best = cpuid_entry2_find(entries, nent, 0x80000008, 0);
+ best = cpuid_entry2_find(entries, nent, 0x80000008,
+ KVM_CPUID_INDEX_NOT_SIGNIFICANT);
if (best) {
int vaddr_bits = (best->eax & 0xff00) >> 8;
@@ -143,15 +189,15 @@ static int kvm_cpuid_check_equal(struct kvm_vcpu *vcpu, struct kvm_cpuid_entry2
return 0;
}
-static void kvm_update_kvm_cpuid_base(struct kvm_vcpu *vcpu)
+static struct kvm_hypervisor_cpuid kvm_get_hypervisor_cpuid(struct kvm_vcpu *vcpu,
+ const char *sig)
{
- u32 function;
+ struct kvm_hypervisor_cpuid cpuid = {};
struct kvm_cpuid_entry2 *entry;
+ u32 base;
- vcpu->arch.kvm_cpuid_base = 0;
-
- for_each_possible_hypervisor_cpuid_base(function) {
- entry = kvm_find_cpuid_entry(vcpu, function, 0);
+ for_each_possible_hypervisor_cpuid_base(base) {
+ entry = kvm_find_cpuid_entry(vcpu, base);
if (entry) {
u32 signature[3];
@@ -160,24 +206,27 @@ static void kvm_update_kvm_cpuid_base(struct kvm_vcpu *vcpu)
signature[1] = entry->ecx;
signature[2] = entry->edx;
- BUILD_BUG_ON(sizeof(signature) > sizeof(KVM_SIGNATURE));
- if (!memcmp(signature, KVM_SIGNATURE, sizeof(signature))) {
- vcpu->arch.kvm_cpuid_base = function;
+ if (!memcmp(signature, sig, sizeof(signature))) {
+ cpuid.base = base;
+ cpuid.limit = entry->eax;
break;
}
}
}
+
+ return cpuid;
}
static struct kvm_cpuid_entry2 *__kvm_find_kvm_cpuid_features(struct kvm_vcpu *vcpu,
struct kvm_cpuid_entry2 *entries, int nent)
{
- u32 base = vcpu->arch.kvm_cpuid_base;
+ u32 base = vcpu->arch.kvm_cpuid.base;
if (!base)
return NULL;
- return cpuid_entry2_find(entries, nent, base | KVM_CPUID_FEATURES, 0);
+ return cpuid_entry2_find(entries, nent, base | KVM_CPUID_FEATURES,
+ KVM_CPUID_INDEX_NOT_SIGNIFICANT);
}
static struct kvm_cpuid_entry2 *kvm_find_kvm_cpuid_features(struct kvm_vcpu *vcpu)
@@ -200,7 +249,7 @@ void kvm_update_pv_runtime(struct kvm_vcpu *vcpu)
/*
* Calculate guest's supported XCR0 taking into account guest CPUID data and
- * supported_xcr0 (comprised of host configuration and KVM_SUPPORTED_XCR0).
+ * KVM's supported XCR0 (comprised of host's XCR0 and KVM_SUPPORTED_XCR0).
*/
static u64 cpuid_get_supported_xcr0(struct kvm_cpuid_entry2 *entries, int nent)
{
@@ -210,21 +259,20 @@ static u64 cpuid_get_supported_xcr0(struct kvm_cpuid_entry2 *entries, int nent)
if (!best)
return 0;
- return (best->eax | ((u64)best->edx << 32)) & supported_xcr0;
+ return (best->eax | ((u64)best->edx << 32)) & kvm_caps.supported_xcr0;
}
static void __kvm_update_cpuid_runtime(struct kvm_vcpu *vcpu, struct kvm_cpuid_entry2 *entries,
int nent)
{
struct kvm_cpuid_entry2 *best;
- u64 guest_supported_xcr0 = cpuid_get_supported_xcr0(entries, nent);
- best = cpuid_entry2_find(entries, nent, 1, 0);
+ best = cpuid_entry2_find(entries, nent, 1, KVM_CPUID_INDEX_NOT_SIGNIFICANT);
if (best) {
/* Update OSXSAVE bit */
if (boot_cpu_has(X86_FEATURE_XSAVE))
cpuid_entry_change(best, X86_FEATURE_OSXSAVE,
- kvm_read_cr4_bits(vcpu, X86_CR4_OSXSAVE));
+ kvm_is_cr4_bit_set(vcpu, X86_CR4_OSXSAVE));
cpuid_entry_change(best, X86_FEATURE_APIC,
vcpu->arch.apic_base & MSR_IA32_APICBASE_ENABLE);
@@ -233,7 +281,7 @@ static void __kvm_update_cpuid_runtime(struct kvm_vcpu *vcpu, struct kvm_cpuid_e
best = cpuid_entry2_find(entries, nent, 7, 0);
if (best && boot_cpu_has(X86_FEATURE_PKU) && best->function == 0x7)
cpuid_entry_change(best, X86_FEATURE_OSPKE,
- kvm_read_cr4_bits(vcpu, X86_CR4_PKE));
+ kvm_is_cr4_bit_set(vcpu, X86_CR4_PKE));
best = cpuid_entry2_find(entries, nent, 0xD, 0);
if (best)
@@ -250,27 +298,12 @@ static void __kvm_update_cpuid_runtime(struct kvm_vcpu *vcpu, struct kvm_cpuid_e
best->eax &= ~(1 << KVM_FEATURE_PV_UNHALT);
if (!kvm_check_has_quirk(vcpu->kvm, KVM_X86_QUIRK_MISC_ENABLE_NO_MWAIT)) {
- best = cpuid_entry2_find(entries, nent, 0x1, 0);
+ best = cpuid_entry2_find(entries, nent, 0x1, KVM_CPUID_INDEX_NOT_SIGNIFICANT);
if (best)
cpuid_entry_change(best, X86_FEATURE_MWAIT,
vcpu->arch.ia32_misc_enable_msr &
MSR_IA32_MISC_ENABLE_MWAIT);
}
-
- /*
- * Bits 127:0 of the allowed SECS.ATTRIBUTES (CPUID.0x12.0x1) enumerate
- * the supported XSAVE Feature Request Mask (XFRM), i.e. the enclave's
- * requested XCR0 value. The enclave's XFRM must be a subset of XCRO
- * at the time of EENTER, thus adjust the allowed XFRM by the guest's
- * supported XCR0. Similar to XCR0 handling, FP and SSE are forced to
- * '1' even on CPUs that don't support XSAVE.
- */
- best = cpuid_entry2_find(entries, nent, 0x12, 0x1);
- if (best) {
- best->ecx &= guest_supported_xcr0 & 0xffffffff;
- best->edx &= guest_supported_xcr0 >> 32;
- best->ecx |= XFEATURE_MASK_FPSSE;
- }
}
void kvm_update_cpuid_runtime(struct kvm_vcpu *vcpu)
@@ -279,13 +312,46 @@ void kvm_update_cpuid_runtime(struct kvm_vcpu *vcpu)
}
EXPORT_SYMBOL_GPL(kvm_update_cpuid_runtime);
+static bool kvm_cpuid_has_hyperv(struct kvm_cpuid_entry2 *entries, int nent)
+{
+#ifdef CONFIG_KVM_HYPERV
+ struct kvm_cpuid_entry2 *entry;
+
+ entry = cpuid_entry2_find(entries, nent, HYPERV_CPUID_INTERFACE,
+ KVM_CPUID_INDEX_NOT_SIGNIFICANT);
+ return entry && entry->eax == HYPERV_CPUID_SIGNATURE_EAX;
+#else
+ return false;
+#endif
+}
+
static void kvm_vcpu_after_set_cpuid(struct kvm_vcpu *vcpu)
{
struct kvm_lapic *apic = vcpu->arch.apic;
struct kvm_cpuid_entry2 *best;
- u64 guest_supported_xcr0;
+ bool allow_gbpages;
+
+ BUILD_BUG_ON(KVM_NR_GOVERNED_FEATURES > KVM_MAX_NR_GOVERNED_FEATURES);
+ bitmap_zero(vcpu->arch.governed_features.enabled,
+ KVM_MAX_NR_GOVERNED_FEATURES);
+
+ /*
+ * If TDP is enabled, let the guest use GBPAGES if they're supported in
+ * hardware. The hardware page walker doesn't let KVM disable GBPAGES,
+ * i.e. won't treat them as reserved, and KVM doesn't redo the GVA->GPA
+ * walk for performance and complexity reasons. Not to mention KVM
+ * _can't_ solve the problem because GVA->GPA walks aren't visible to
+ * KVM once a TDP translation is installed. Mimic hardware behavior so
+ * that KVM's is at least consistent, i.e. doesn't randomly inject #PF.
+ * If TDP is disabled, honor *only* guest CPUID as KVM has full control
+ * and can install smaller shadow pages if the host lacks 1GiB support.
+ */
+ allow_gbpages = tdp_enabled ? boot_cpu_has(X86_FEATURE_GBPAGES) :
+ guest_cpuid_has(vcpu, X86_FEATURE_GBPAGES);
+ if (allow_gbpages)
+ kvm_governed_feature_set(vcpu, X86_FEATURE_GBPAGES);
- best = kvm_find_cpuid_entry(vcpu, 1, 0);
+ best = kvm_find_cpuid_entry(vcpu, 1);
if (best && apic) {
if (cpuid_entry_has(best, X86_FEATURE_TSC_DEADLINE_TIMER))
apic->lapic_timer.timer_mode_mask = 3 << 17;
@@ -295,11 +361,9 @@ static void kvm_vcpu_after_set_cpuid(struct kvm_vcpu *vcpu)
kvm_apic_set_version(vcpu);
}
- guest_supported_xcr0 =
+ vcpu->arch.guest_supported_xcr0 =
cpuid_get_supported_xcr0(vcpu->arch.cpuid_entries, vcpu->arch.cpuid_nent);
- vcpu->arch.guest_fpu.fpstate->user_xfeatures = guest_supported_xcr0;
-
kvm_update_pv_runtime(vcpu);
vcpu->arch.maxphyaddr = cpuid_query_maxphyaddr(vcpu);
@@ -309,7 +373,8 @@ static void kvm_vcpu_after_set_cpuid(struct kvm_vcpu *vcpu)
vcpu->arch.cr4_guest_rsvd_bits =
__cr4_reserved_bits(guest_cpuid_has, vcpu);
- kvm_hv_set_cpuid(vcpu);
+ kvm_hv_set_cpuid(vcpu, kvm_cpuid_has_hyperv(vcpu->arch.cpuid_entries,
+ vcpu->arch.cpuid_nent));
/* Invoke the vendor callback only after the above state is updated. */
static_call(kvm_x86_vcpu_after_set_cpuid)(vcpu);
@@ -325,10 +390,10 @@ int cpuid_query_maxphyaddr(struct kvm_vcpu *vcpu)
{
struct kvm_cpuid_entry2 *best;
- best = kvm_find_cpuid_entry(vcpu, 0x80000000, 0);
+ best = kvm_find_cpuid_entry(vcpu, 0x80000000);
if (!best || best->eax < 0x80000008)
goto not_found;
- best = kvm_find_cpuid_entry(vcpu, 0x80000008, 0);
+ best = kvm_find_cpuid_entry(vcpu, 0x80000008);
if (best)
return best->eax & 0xff;
not_found:
@@ -363,7 +428,7 @@ static int kvm_set_cpuid(struct kvm_vcpu *vcpu, struct kvm_cpuid_entry2 *e2,
* KVM_SET_CPUID{,2} again. To support this legacy behavior, check
* whether the supplied CPUID data is equal to what's already set.
*/
- if (vcpu->arch.last_vmentry_cpu != -1) {
+ if (kvm_vcpu_has_run(vcpu)) {
r = kvm_cpuid_check_equal(vcpu, e2, nent);
if (r)
return r;
@@ -372,6 +437,14 @@ static int kvm_set_cpuid(struct kvm_vcpu *vcpu, struct kvm_cpuid_entry2 *e2,
return 0;
}
+#ifdef CONFIG_KVM_HYPERV
+ if (kvm_cpuid_has_hyperv(e2, nent)) {
+ r = kvm_hv_vcpu_init(vcpu);
+ if (r)
+ return r;
+ }
+#endif
+
r = kvm_check_cpuid(vcpu, e2, nent);
if (r)
return r;
@@ -380,7 +453,10 @@ static int kvm_set_cpuid(struct kvm_vcpu *vcpu, struct kvm_cpuid_entry2 *e2,
vcpu->arch.cpuid_entries = e2;
vcpu->arch.cpuid_nent = nent;
- kvm_update_kvm_cpuid_base(vcpu);
+ vcpu->arch.kvm_cpuid = kvm_get_hypervisor_cpuid(vcpu, KVM_SIGNATURE);
+#ifdef CONFIG_KVM_XEN
+ vcpu->arch.xen.cpuid = kvm_get_hypervisor_cpuid(vcpu, XEN_SIGNATURE);
+#endif
kvm_vcpu_after_set_cpuid(vcpu);
return 0;
@@ -399,7 +475,7 @@ int kvm_vcpu_ioctl_set_cpuid(struct kvm_vcpu *vcpu,
return -E2BIG;
if (cpuid->nent) {
- e = vmemdup_user(entries, array_size(sizeof(*e), cpuid->nent));
+ e = vmemdup_array_user(entries, cpuid->nent, sizeof(*e));
if (IS_ERR(e))
return PTR_ERR(e);
@@ -443,7 +519,7 @@ int kvm_vcpu_ioctl_set_cpuid2(struct kvm_vcpu *vcpu,
return -E2BIG;
if (cpuid->nent) {
- e2 = vmemdup_user(entries, array_size(sizeof(*e2), cpuid->nent));
+ e2 = vmemdup_array_user(entries, cpuid->nent, sizeof(*e2));
if (IS_ERR(e2))
return PTR_ERR(e2);
}
@@ -459,20 +535,15 @@ int kvm_vcpu_ioctl_get_cpuid2(struct kvm_vcpu *vcpu,
struct kvm_cpuid2 *cpuid,
struct kvm_cpuid_entry2 __user *entries)
{
- int r;
-
- r = -E2BIG;
if (cpuid->nent < vcpu->arch.cpuid_nent)
- goto out;
- r = -EFAULT;
+ return -E2BIG;
+
if (copy_to_user(entries, vcpu->arch.cpuid_entries,
vcpu->arch.cpuid_nent * sizeof(struct kvm_cpuid_entry2)))
- goto out;
- return 0;
+ return -EFAULT;
-out:
cpuid->nent = vcpu->arch.cpuid_nent;
- return r;
+ return 0;
}
/* Mask kvm_cpu_caps for @leaf with the raw CPUID capabilities of this CPU. */
@@ -490,9 +561,9 @@ static __always_inline void __kvm_cpu_cap_mask(unsigned int leaf)
}
static __always_inline
-void kvm_cpu_cap_init_scattered(enum kvm_only_cpuid_leafs leaf, u32 mask)
+void kvm_cpu_cap_init_kvm_defined(enum kvm_only_cpuid_leafs leaf, u32 mask)
{
- /* Use kvm_cpu_cap_mask for non-scattered leafs. */
+ /* Use kvm_cpu_cap_mask for leafs that aren't KVM-only. */
BUILD_BUG_ON(leaf < NCAPINTS);
kvm_cpu_caps[leaf] = mask;
@@ -502,7 +573,7 @@ void kvm_cpu_cap_init_scattered(enum kvm_only_cpuid_leafs leaf, u32 mask)
static __always_inline void kvm_cpu_cap_mask(enum cpuid_leafs leaf, u32 mask)
{
- /* Use kvm_cpu_cap_init_scattered for scattered leafs. */
+ /* Use kvm_cpu_cap_init_kvm_defined for KVM-only leafs. */
BUILD_BUG_ON(leaf >= NCAPINTS);
kvm_cpu_caps[leaf] &= mask;
@@ -589,7 +660,7 @@ void kvm_set_cpu_caps(void)
F(SPEC_CTRL_SSBD) | F(ARCH_CAPABILITIES) | F(INTEL_STIBP) |
F(MD_CLEAR) | F(AVX512_VP2INTERSECT) | F(FSRM) |
F(SERIALIZE) | F(TSXLDTRK) | F(AVX512_FP16) |
- F(AMX_TILE) | F(AMX_INT8) | F(AMX_BF16)
+ F(AMX_TILE) | F(AMX_INT8) | F(AMX_BF16) | F(FLUSH_L1D)
);
/* TSC_ADJUST and ARCH_CAPABILITIES are emulated in software. */
@@ -604,15 +675,27 @@ void kvm_set_cpu_caps(void)
kvm_cpu_cap_set(X86_FEATURE_SPEC_CTRL_SSBD);
kvm_cpu_cap_mask(CPUID_7_1_EAX,
- F(AVX_VNNI) | F(AVX512_BF16)
+ F(AVX_VNNI) | F(AVX512_BF16) | F(CMPCCXADD) |
+ F(FZRM) | F(FSRS) | F(FSRC) |
+ F(AMX_FP16) | F(AVX_IFMA) | F(LAM)
+ );
+
+ kvm_cpu_cap_init_kvm_defined(CPUID_7_1_EDX,
+ F(AVX_VNNI_INT8) | F(AVX_NE_CONVERT) | F(PREFETCHITI) |
+ F(AMX_COMPLEX)
+ );
+
+ kvm_cpu_cap_init_kvm_defined(CPUID_7_2_EDX,
+ F(INTEL_PSFD) | F(IPRED_CTRL) | F(RRSBA_CTRL) | F(DDPD_U) |
+ F(BHI_CTRL) | F(MCDT_NO)
);
kvm_cpu_cap_mask(CPUID_D_1_EAX,
F(XSAVEOPT) | F(XSAVEC) | F(XGETBV1) | F(XSAVES) | f_xfd
);
- kvm_cpu_cap_init_scattered(CPUID_12_EAX,
- SF(SGX1) | SF(SGX2)
+ kvm_cpu_cap_init_kvm_defined(CPUID_12_EAX,
+ SF(SGX1) | SF(SGX2) | SF(SGX_EDECCSSA)
);
kvm_cpu_cap_mask(CPUID_8000_0001_ECX,
@@ -637,11 +720,15 @@ void kvm_set_cpu_caps(void)
if (!tdp_enabled && IS_ENABLED(CONFIG_X86_64))
kvm_cpu_cap_set(X86_FEATURE_GBPAGES);
+ kvm_cpu_cap_init_kvm_defined(CPUID_8000_0007_EDX,
+ SF(CONSTANT_TSC)
+ );
+
kvm_cpu_cap_mask(CPUID_8000_0008_EBX,
F(CLZERO) | F(XSAVEERPTR) |
F(WBNOINVD) | F(AMD_IBPB) | F(AMD_IBRS) | F(AMD_SSBD) | F(VIRT_SSBD) |
F(AMD_SSB_NO) | F(AMD_STIBP) | F(AMD_STIBP_ALWAYS_ON) |
- __feature_bit(KVM_X86_FEATURE_PSFD)
+ F(AMD_PSFD)
);
/*
@@ -677,6 +764,36 @@ void kvm_set_cpu_caps(void)
0 /* SME */ | F(SEV) | 0 /* VM_PAGE_FLUSH */ | F(SEV_ES) |
F(SME_COHERENT));
+ kvm_cpu_cap_mask(CPUID_8000_0021_EAX,
+ F(NO_NESTED_DATA_BP) | F(LFENCE_RDTSC) | 0 /* SmmPgCfgLock */ |
+ F(NULL_SEL_CLR_BASE) | F(AUTOIBRS) | 0 /* PrefetchCtlMsr */ |
+ F(WRMSR_XX_BASE_NS)
+ );
+
+ kvm_cpu_cap_check_and_set(X86_FEATURE_SBPB);
+ kvm_cpu_cap_check_and_set(X86_FEATURE_IBPB_BRTYPE);
+ kvm_cpu_cap_check_and_set(X86_FEATURE_SRSO_NO);
+
+ kvm_cpu_cap_init_kvm_defined(CPUID_8000_0022_EAX,
+ F(PERFMON_V2)
+ );
+
+ /*
+ * Synthesize "LFENCE is serializing" into the AMD-defined entry in
+ * KVM's supported CPUID if the feature is reported as supported by the
+ * kernel. LFENCE_RDTSC was a Linux-defined synthetic feature long
+ * before AMD joined the bandwagon, e.g. LFENCE is serializing on most
+ * CPUs that support SSE2. On CPUs that don't support AMD's leaf,
+ * kvm_cpu_cap_mask() will unfortunately drop the flag due to ANDing
+ * the mask with the raw host CPUID, and reporting support in AMD's
+ * leaf can make it easier for userspace to detect the feature.
+ */
+ if (cpu_feature_enabled(X86_FEATURE_LFENCE_RDTSC))
+ kvm_cpu_cap_set(X86_FEATURE_LFENCE_RDTSC);
+ if (!static_cpu_has_bug(X86_BUG_NULL_SEG))
+ kvm_cpu_cap_set(X86_FEATURE_NULL_SEL_CLR_BASE);
+ kvm_cpu_cap_set(X86_FEATURE_NO_SMM_CTL_MSR);
+
kvm_cpu_cap_mask(CPUID_C000_0001_EDX,
F(XSTORE) | F(XSTORE_EN) | F(XCRYPT) | F(XCRYPT_EN) |
F(ACE2) | F(ACE2_EN) | F(PHE) | F(PHE_EN) |
@@ -706,16 +823,22 @@ struct kvm_cpuid_array {
int nent;
};
+static struct kvm_cpuid_entry2 *get_next_cpuid(struct kvm_cpuid_array *array)
+{
+ if (array->nent >= array->maxnent)
+ return NULL;
+
+ return &array->entries[array->nent++];
+}
+
static struct kvm_cpuid_entry2 *do_host_cpuid(struct kvm_cpuid_array *array,
u32 function, u32 index)
{
- struct kvm_cpuid_entry2 *entry;
+ struct kvm_cpuid_entry2 *entry = get_next_cpuid(array);
- if (array->nent >= array->maxnent)
+ if (!entry)
return NULL;
- entry = &array->entries[array->nent++];
-
memset(entry, 0, sizeof(*entry));
entry->function = function;
entry->index = index;
@@ -848,83 +971,71 @@ static inline int __do_cpuid_func(struct kvm_cpuid_array *array, u32 function)
break;
/* function 7 has additional index. */
case 7:
- entry->eax = min(entry->eax, 1u);
+ max_idx = entry->eax = min(entry->eax, 2u);
cpuid_entry_override(entry, CPUID_7_0_EBX);
cpuid_entry_override(entry, CPUID_7_ECX);
cpuid_entry_override(entry, CPUID_7_EDX);
- /* KVM only supports 0x7.0 and 0x7.1, capped above via min(). */
- if (entry->eax == 1) {
+ /* KVM only supports up to 0x7.2, capped above via min(). */
+ if (max_idx >= 1) {
entry = do_host_cpuid(array, function, 1);
if (!entry)
goto out;
cpuid_entry_override(entry, CPUID_7_1_EAX);
+ cpuid_entry_override(entry, CPUID_7_1_EDX);
entry->ebx = 0;
entry->ecx = 0;
- entry->edx = 0;
}
- break;
- case 9:
+ if (max_idx >= 2) {
+ entry = do_host_cpuid(array, function, 2);
+ if (!entry)
+ goto out;
+
+ cpuid_entry_override(entry, CPUID_7_2_EDX);
+ entry->ecx = 0;
+ entry->ebx = 0;
+ entry->eax = 0;
+ }
break;
case 0xa: { /* Architectural Performance Monitoring */
- struct x86_pmu_capability cap;
union cpuid10_eax eax;
union cpuid10_edx edx;
- if (!static_cpu_has(X86_FEATURE_ARCH_PERFMON)) {
+ if (!enable_pmu || !static_cpu_has(X86_FEATURE_ARCH_PERFMON)) {
entry->eax = entry->ebx = entry->ecx = entry->edx = 0;
break;
}
- perf_get_x86_pmu_capability(&cap);
+ eax.split.version_id = kvm_pmu_cap.version;
+ eax.split.num_counters = kvm_pmu_cap.num_counters_gp;
+ eax.split.bit_width = kvm_pmu_cap.bit_width_gp;
+ eax.split.mask_length = kvm_pmu_cap.events_mask_len;
+ edx.split.num_counters_fixed = kvm_pmu_cap.num_counters_fixed;
+ edx.split.bit_width_fixed = kvm_pmu_cap.bit_width_fixed;
- /*
- * The guest architecture pmu is only supported if the architecture
- * pmu exists on the host and the module parameters allow it.
- */
- if (!cap.version || !enable_pmu)
- memset(&cap, 0, sizeof(cap));
-
- eax.split.version_id = min(cap.version, 2);
- eax.split.num_counters = cap.num_counters_gp;
- eax.split.bit_width = cap.bit_width_gp;
- eax.split.mask_length = cap.events_mask_len;
-
- edx.split.num_counters_fixed =
- min(cap.num_counters_fixed, KVM_PMC_MAX_FIXED);
- edx.split.bit_width_fixed = cap.bit_width_fixed;
- if (cap.version)
+ if (kvm_pmu_cap.version)
edx.split.anythread_deprecated = 1;
edx.split.reserved1 = 0;
edx.split.reserved2 = 0;
entry->eax = eax.full;
- entry->ebx = cap.events_mask;
+ entry->ebx = kvm_pmu_cap.events_mask;
entry->ecx = 0;
entry->edx = edx.full;
break;
}
- /*
- * Per Intel's SDM, the 0x1f is a superset of 0xb,
- * thus they can be handled by common code.
- */
case 0x1f:
case 0xb:
/*
- * Populate entries until the level type (ECX[15:8]) of the
- * previous entry is zero. Note, CPUID EAX.{0x1f,0xb}.0 is
- * the starting entry, filled by the primary do_host_cpuid().
+ * No topology; a valid topology is indicated by the presence
+ * of subleaf 1.
*/
- for (i = 1; entry->ecx & 0xff00; ++i) {
- entry = do_host_cpuid(array, function, i);
- if (!entry)
- goto out;
- }
+ entry->eax = entry->ebx = entry->ecx = 0;
break;
case 0xd: {
- u64 permitted_xcr0 = supported_xcr0 & xstate_get_guest_group_perm();
- u64 permitted_xss = supported_xss;
+ u64 permitted_xcr0 = kvm_get_filtered_xcr0();
+ u64 permitted_xss = kvm_caps.supported_xss;
entry->eax &= permitted_xcr0;
entry->ebx = xstate_required_size(permitted_xcr0, false);
@@ -1007,9 +1118,7 @@ static inline int __do_cpuid_func(struct kvm_cpuid_array *array, u32 function)
* userspace. ATTRIBUTES.XFRM is not adjusted as userspace is
* expected to derive it from supported XCR0.
*/
- entry->eax &= SGX_ATTR_DEBUG | SGX_ATTR_MODE64BIT |
- SGX_ATTR_PROVISIONKEY | SGX_ATTR_EINITTOKENKEY |
- SGX_ATTR_KSS;
+ entry->eax &= SGX_ATTR_PRIV_MASK | SGX_ATTR_UNPRIV_MASK;
entry->ebx &= 0;
break;
/* Intel PT */
@@ -1073,7 +1182,7 @@ static inline int __do_cpuid_func(struct kvm_cpuid_array *array, u32 function)
entry->edx = 0;
break;
case 0x80000000:
- entry->eax = min(entry->eax, 0x80000021);
+ entry->eax = min(entry->eax, 0x80000022);
/*
* Serializing LFENCE is reported in a multitude of ways, and
* NullSegClearsBase is not reported in CPUID on Zen2; help
@@ -1093,15 +1202,20 @@ static inline int __do_cpuid_func(struct kvm_cpuid_array *array, u32 function)
entry->eax = max(entry->eax, 0x80000021);
break;
case 0x80000001:
+ entry->ebx &= ~GENMASK(27, 16);
cpuid_entry_override(entry, CPUID_8000_0001_EDX);
cpuid_entry_override(entry, CPUID_8000_0001_ECX);
break;
+ case 0x80000005:
+ /* Pass host L1 cache and TLB info. */
+ break;
case 0x80000006:
- /* L2 cache and TLB: pass through host info. */
+ /* Drop reserved bits, pass host L2 cache and TLB info. */
+ entry->edx &= ~GENMASK(17, 16);
break;
case 0x80000007: /* Advanced power management */
- /* invariant TSC is CPUID.80000007H:EDX[8] */
- entry->edx &= (1 << 8);
+ cpuid_entry_override(entry, CPUID_8000_0007_EDX);
+
/* mask against host */
entry->edx &= boot_cpu_data.x86_power;
entry->eax = entry->ebx = entry->ecx = 0;
@@ -1127,6 +1241,7 @@ static inline int __do_cpuid_func(struct kvm_cpuid_array *array, u32 function)
g_phys_as = phys_as;
entry->eax = g_phys_as | (virt_as << 8);
+ entry->ecx &= ~(GENMASK(31, 16) | GENMASK(11, 8));
entry->edx = 0;
cpuid_entry_override(entry, CPUID_8000_0008_EBX);
break;
@@ -1146,14 +1261,21 @@ static inline int __do_cpuid_func(struct kvm_cpuid_array *array, u32 function)
entry->ecx = entry->edx = 0;
break;
case 0x8000001a:
+ entry->eax &= GENMASK(2, 0);
+ entry->ebx = entry->ecx = entry->edx = 0;
+ break;
case 0x8000001e:
+ /* Do not return host topology information. */
+ entry->eax = entry->ebx = entry->ecx = 0;
+ entry->edx = 0; /* reserved */
break;
case 0x8000001F:
if (!kvm_cpu_cap_has(X86_FEATURE_SEV)) {
entry->eax = entry->ebx = entry->ecx = entry->edx = 0;
} else {
cpuid_entry_override(entry, CPUID_8000_001F_EAX);
-
+ /* Clear NumVMPL since KVM does not support VMPL. */
+ entry->ebx &= ~GENMASK(31, 12);
/*
* Enumerate '0' for "PA bits reduction", the adjusted
* MAXPHYADDR is enumerated directly (see 0x80000008).
@@ -1166,22 +1288,30 @@ static inline int __do_cpuid_func(struct kvm_cpuid_array *array, u32 function)
break;
case 0x80000021:
entry->ebx = entry->ecx = entry->edx = 0;
- /*
- * Pass down these bits:
- * EAX 0 NNDBP, Processor ignores nested data breakpoints
- * EAX 2 LAS, LFENCE always serializing
- * EAX 6 NSCB, Null selector clear base
- *
- * Other defined bits are for MSRs that KVM does not expose:
- * EAX 3 SPCL, SMM page configuration lock
- * EAX 13 PCMSR, Prefetch control MSR
- */
- entry->eax &= BIT(0) | BIT(2) | BIT(6);
- if (static_cpu_has(X86_FEATURE_LFENCE_RDTSC))
- entry->eax |= BIT(2);
- if (!static_cpu_has_bug(X86_BUG_NULL_SEG))
- entry->eax |= BIT(6);
+ cpuid_entry_override(entry, CPUID_8000_0021_EAX);
break;
+ /* AMD Extended Performance Monitoring and Debug */
+ case 0x80000022: {
+ union cpuid_0x80000022_ebx ebx;
+
+ entry->ecx = entry->edx = 0;
+ if (!enable_pmu || !kvm_cpu_cap_has(X86_FEATURE_PERFMON_V2)) {
+ entry->eax = entry->ebx;
+ break;
+ }
+
+ cpuid_entry_override(entry, CPUID_8000_0022_EAX);
+
+ if (kvm_cpu_cap_has(X86_FEATURE_PERFMON_V2))
+ ebx.split.num_core_pmc = kvm_pmu_cap.num_counters_gp;
+ else if (kvm_cpu_cap_has(X86_FEATURE_PERFCTR_CORE))
+ ebx.split.num_core_pmc = AMD64_NUM_COUNTERS_CORE;
+ else
+ ebx.split.num_core_pmc = AMD64_NUM_COUNTERS;
+
+ entry->ebx = ebx.full;
+ break;
+ }
/*Add support for Centaur's CPUID instruction*/
case 0xC0000000:
/*Just support up to 0xC0000004 now*/
@@ -1291,7 +1421,7 @@ int kvm_dev_ioctl_get_cpuid(struct kvm_cpuid2 *cpuid,
if (sanity_check_entries(entries, cpuid->nent, type))
return -EINVAL;
- array.entries = kvcalloc(sizeof(struct kvm_cpuid_entry2), cpuid->nent, GFP_KERNEL);
+ array.entries = kvcalloc(cpuid->nent, sizeof(struct kvm_cpuid_entry2), GFP_KERNEL);
if (!array.entries)
return -ENOMEM;
@@ -1313,12 +1443,20 @@ out_free:
return r;
}
-struct kvm_cpuid_entry2 *kvm_find_cpuid_entry(struct kvm_vcpu *vcpu,
- u32 function, u32 index)
+struct kvm_cpuid_entry2 *kvm_find_cpuid_entry_index(struct kvm_vcpu *vcpu,
+ u32 function, u32 index)
{
return cpuid_entry2_find(vcpu->arch.cpuid_entries, vcpu->arch.cpuid_nent,
function, index);
}
+EXPORT_SYMBOL_GPL(kvm_find_cpuid_entry_index);
+
+struct kvm_cpuid_entry2 *kvm_find_cpuid_entry(struct kvm_vcpu *vcpu,
+ u32 function)
+{
+ return cpuid_entry2_find(vcpu->arch.cpuid_entries, vcpu->arch.cpuid_nent,
+ function, KVM_CPUID_INDEX_NOT_SIGNIFICANT);
+}
EXPORT_SYMBOL_GPL(kvm_find_cpuid_entry);
/*
@@ -1355,7 +1493,7 @@ get_out_of_range_cpuid_entry(struct kvm_vcpu *vcpu, u32 *fn_ptr, u32 index)
struct kvm_cpuid_entry2 *basic, *class;
u32 function = *fn_ptr;
- basic = kvm_find_cpuid_entry(vcpu, 0, 0);
+ basic = kvm_find_cpuid_entry(vcpu, 0);
if (!basic)
return NULL;
@@ -1364,11 +1502,11 @@ get_out_of_range_cpuid_entry(struct kvm_vcpu *vcpu, u32 *fn_ptr, u32 index)
return NULL;
if (function >= 0x40000000 && function <= 0x4fffffff)
- class = kvm_find_cpuid_entry(vcpu, function & 0xffffff00, 0);
+ class = kvm_find_cpuid_entry(vcpu, function & 0xffffff00);
else if (function >= 0xc0000000)
- class = kvm_find_cpuid_entry(vcpu, 0xc0000000, 0);
+ class = kvm_find_cpuid_entry(vcpu, 0xc0000000);
else
- class = kvm_find_cpuid_entry(vcpu, function & 0x80000000, 0);
+ class = kvm_find_cpuid_entry(vcpu, function & 0x80000000);
if (class && function <= class->eax)
return NULL;
@@ -1386,7 +1524,7 @@ get_out_of_range_cpuid_entry(struct kvm_vcpu *vcpu, u32 *fn_ptr, u32 index)
* the effective CPUID entry is the max basic leaf. Note, the index of
* the original requested leaf is observed!
*/
- return kvm_find_cpuid_entry(vcpu, basic->eax, index);
+ return kvm_find_cpuid_entry_index(vcpu, basic->eax, index);
}
bool kvm_cpuid(struct kvm_vcpu *vcpu, u32 *eax, u32 *ebx,
@@ -1396,7 +1534,7 @@ bool kvm_cpuid(struct kvm_vcpu *vcpu, u32 *eax, u32 *ebx,
struct kvm_cpuid_entry2 *entry;
bool exact, used_max_basic = false;
- entry = kvm_find_cpuid_entry(vcpu, function, index);
+ entry = kvm_find_cpuid_entry_index(vcpu, function, index);
exact = !!entry;
if (!entry && !exact_only) {
@@ -1414,6 +1552,9 @@ bool kvm_cpuid(struct kvm_vcpu *vcpu, u32 *eax, u32 *ebx,
if (!__kvm_get_msr(vcpu, MSR_IA32_TSX_CTRL, &data, true) &&
(data & TSX_CTRL_CPUID_CLEAR))
*ebx &= ~(F(RTM) | F(HLE));
+ } else if (function == 0x80000007) {
+ if (kvm_hv_invtsc_suppressed(vcpu))
+ *edx &= ~SF(CONSTANT_TSC);
}
} else {
*eax = *ebx = *ecx = *edx = 0;
@@ -1425,7 +1566,7 @@ bool kvm_cpuid(struct kvm_vcpu *vcpu, u32 *eax, u32 *ebx,
* exists. EDX can be copied from any existing index.
*/
if (function == 0xb || function == 0x1f) {
- entry = kvm_find_cpuid_entry(vcpu, function, 1);
+ entry = kvm_find_cpuid_entry_index(vcpu, function, 1);
if (entry) {
*ecx = index & 0xff;
*edx = entry->edx;