aboutsummaryrefslogtreecommitdiffstats
path: root/arch/x86/kvm/x86.c
diff options
context:
space:
mode:
Diffstat (limited to 'arch/x86/kvm/x86.c')
-rw-r--r--arch/x86/kvm/x86.c1176
1 files changed, 650 insertions, 526 deletions
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 7fa89c39c64f..25f19078b321 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -6,6 +6,7 @@
* Copyright (C) 2006 Qumranet, Inc.
* Copyright (C) 2008 Qumranet, Inc.
* Copyright IBM Corporation, 2008
+ * Copyright 2010 Red Hat, Inc. and/or its affilates.
*
* Authors:
* Avi Kivity <avi@qumranet.com>
@@ -41,17 +42,19 @@
#include <linux/srcu.h>
#include <linux/slab.h>
#include <linux/perf_event.h>
+#include <linux/uaccess.h>
#include <trace/events/kvm.h>
#define CREATE_TRACE_POINTS
#include "trace.h"
#include <asm/debugreg.h>
-#include <asm/uaccess.h>
#include <asm/msr.h>
#include <asm/desc.h>
#include <asm/mtrr.h>
#include <asm/mce.h>
+#include <asm/i387.h>
+#include <asm/xcr.h>
#define MAX_IO_MSRS 256
#define CR0_RESERVED_BITS \
@@ -62,6 +65,7 @@
(~(unsigned long)(X86_CR4_VME | X86_CR4_PVI | X86_CR4_TSD | X86_CR4_DE\
| X86_CR4_PSE | X86_CR4_PAE | X86_CR4_MCE \
| X86_CR4_PGE | X86_CR4_PCE | X86_CR4_OSFXSR \
+ | X86_CR4_OSXSAVE \
| X86_CR4_OSXMMEXCPT | X86_CR4_VMXE))
#define CR8_RESERVED_BITS (~(unsigned long)X86_CR8_TPR)
@@ -147,6 +151,13 @@ struct kvm_stats_debugfs_item debugfs_entries[] = {
{ NULL }
};
+u64 __read_mostly host_xcr0;
+
+static inline u32 bit(int bitno)
+{
+ return 1 << (bitno & 31);
+}
+
static void kvm_on_user_return(struct user_return_notifier *urn)
{
unsigned slot;
@@ -285,7 +296,7 @@ static void kvm_multiple_exception(struct kvm_vcpu *vcpu,
prev_nr = vcpu->arch.exception.nr;
if (prev_nr == DF_VECTOR) {
/* triple fault -> shutdown */
- set_bit(KVM_REQ_TRIPLE_FAULT, &vcpu->requests);
+ kvm_make_request(KVM_REQ_TRIPLE_FAULT, vcpu);
return;
}
class1 = exception_class(prev_nr);
@@ -414,121 +425,163 @@ out:
return changed;
}
-void kvm_set_cr0(struct kvm_vcpu *vcpu, unsigned long cr0)
+int kvm_set_cr0(struct kvm_vcpu *vcpu, unsigned long cr0)
{
+ unsigned long old_cr0 = kvm_read_cr0(vcpu);
+ unsigned long update_bits = X86_CR0_PG | X86_CR0_WP |
+ X86_CR0_CD | X86_CR0_NW;
+
cr0 |= X86_CR0_ET;
#ifdef CONFIG_X86_64
- if (cr0 & 0xffffffff00000000UL) {
- kvm_inject_gp(vcpu, 0);
- return;
- }
+ if (cr0 & 0xffffffff00000000UL)
+ return 1;
#endif
cr0 &= ~CR0_RESERVED_BITS;
- if ((cr0 & X86_CR0_NW) && !(cr0 & X86_CR0_CD)) {
- kvm_inject_gp(vcpu, 0);
- return;
- }
+ if ((cr0 & X86_CR0_NW) && !(cr0 & X86_CR0_CD))
+ return 1;
- if ((cr0 & X86_CR0_PG) && !(cr0 & X86_CR0_PE)) {
- kvm_inject_gp(vcpu, 0);
- return;
- }
+ if ((cr0 & X86_CR0_PG) && !(cr0 & X86_CR0_PE))
+ return 1;
if (!is_paging(vcpu) && (cr0 & X86_CR0_PG)) {
#ifdef CONFIG_X86_64
if ((vcpu->arch.efer & EFER_LME)) {
int cs_db, cs_l;
- if (!is_pae(vcpu)) {
- kvm_inject_gp(vcpu, 0);
- return;
- }
+ if (!is_pae(vcpu))
+ return 1;
kvm_x86_ops->get_cs_db_l_bits(vcpu, &cs_db, &cs_l);
- if (cs_l) {
- kvm_inject_gp(vcpu, 0);
- return;
-
- }
+ if (cs_l)
+ return 1;
} else
#endif
- if (is_pae(vcpu) && !load_pdptrs(vcpu, vcpu->arch.cr3)) {
- kvm_inject_gp(vcpu, 0);
- return;
- }
-
+ if (is_pae(vcpu) && !load_pdptrs(vcpu, vcpu->arch.cr3))
+ return 1;
}
kvm_x86_ops->set_cr0(vcpu, cr0);
- kvm_mmu_reset_context(vcpu);
- return;
+ if ((cr0 ^ old_cr0) & update_bits)
+ kvm_mmu_reset_context(vcpu);
+ return 0;
}
EXPORT_SYMBOL_GPL(kvm_set_cr0);
void kvm_lmsw(struct kvm_vcpu *vcpu, unsigned long msw)
{
- kvm_set_cr0(vcpu, kvm_read_cr0_bits(vcpu, ~0x0eul) | (msw & 0x0f));
+ (void)kvm_set_cr0(vcpu, kvm_read_cr0_bits(vcpu, ~0x0eul) | (msw & 0x0f));
}
EXPORT_SYMBOL_GPL(kvm_lmsw);
-void kvm_set_cr4(struct kvm_vcpu *vcpu, unsigned long cr4)
+int __kvm_set_xcr(struct kvm_vcpu *vcpu, u32 index, u64 xcr)
{
- unsigned long old_cr4 = kvm_read_cr4(vcpu);
- unsigned long pdptr_bits = X86_CR4_PGE | X86_CR4_PSE | X86_CR4_PAE;
+ u64 xcr0;
- if (cr4 & CR4_RESERVED_BITS) {
+ /* Only support XCR_XFEATURE_ENABLED_MASK(xcr0) now */
+ if (index != XCR_XFEATURE_ENABLED_MASK)
+ return 1;
+ xcr0 = xcr;
+ if (kvm_x86_ops->get_cpl(vcpu) != 0)
+ return 1;
+ if (!(xcr0 & XSTATE_FP))
+ return 1;
+ if ((xcr0 & XSTATE_YMM) && !(xcr0 & XSTATE_SSE))
+ return 1;
+ if (xcr0 & ~host_xcr0)
+ return 1;
+ vcpu->arch.xcr0 = xcr0;
+ vcpu->guest_xcr0_loaded = 0;
+ return 0;
+}
+
+int kvm_set_xcr(struct kvm_vcpu *vcpu, u32 index, u64 xcr)
+{
+ if (__kvm_set_xcr(vcpu, index, xcr)) {
kvm_inject_gp(vcpu, 0);
+ return 1;
+ }
+ return 0;
+}
+EXPORT_SYMBOL_GPL(kvm_set_xcr);
+
+static bool guest_cpuid_has_xsave(struct kvm_vcpu *vcpu)
+{
+ struct kvm_cpuid_entry2 *best;
+
+ best = kvm_find_cpuid_entry(vcpu, 1, 0);
+ return best && (best->ecx & bit(X86_FEATURE_XSAVE));
+}
+
+static void update_cpuid(struct kvm_vcpu *vcpu)
+{
+ struct kvm_cpuid_entry2 *best;
+
+ best = kvm_find_cpuid_entry(vcpu, 1, 0);
+ if (!best)
return;
+
+ /* Update OSXSAVE bit */
+ if (cpu_has_xsave && best->function == 0x1) {
+ best->ecx &= ~(bit(X86_FEATURE_OSXSAVE));
+ if (kvm_read_cr4_bits(vcpu, X86_CR4_OSXSAVE))
+ best->ecx |= bit(X86_FEATURE_OSXSAVE);
}
+}
+
+int kvm_set_cr4(struct kvm_vcpu *vcpu, unsigned long cr4)
+{
+ unsigned long old_cr4 = kvm_read_cr4(vcpu);
+ unsigned long pdptr_bits = X86_CR4_PGE | X86_CR4_PSE | X86_CR4_PAE;
+
+ if (cr4 & CR4_RESERVED_BITS)
+ return 1;
+
+ if (!guest_cpuid_has_xsave(vcpu) && (cr4 & X86_CR4_OSXSAVE))
+ return 1;
if (is_long_mode(vcpu)) {
- if (!(cr4 & X86_CR4_PAE)) {
- kvm_inject_gp(vcpu, 0);
- return;
- }
+ if (!(cr4 & X86_CR4_PAE))
+ return 1;
} else if (is_paging(vcpu) && (cr4 & X86_CR4_PAE)
&& ((cr4 ^ old_cr4) & pdptr_bits)
- && !load_pdptrs(vcpu, vcpu->arch.cr3)) {
- kvm_inject_gp(vcpu, 0);
- return;
- }
+ && !load_pdptrs(vcpu, vcpu->arch.cr3))
+ return 1;
+
+ if (cr4 & X86_CR4_VMXE)
+ return 1;
- if (cr4 & X86_CR4_VMXE) {
- kvm_inject_gp(vcpu, 0);
- return;
- }
kvm_x86_ops->set_cr4(vcpu, cr4);
- vcpu->arch.cr4 = cr4;
- kvm_mmu_reset_context(vcpu);
+
+ if ((cr4 ^ old_cr4) & pdptr_bits)
+ kvm_mmu_reset_context(vcpu);
+
+ if ((cr4 ^ old_cr4) & X86_CR4_OSXSAVE)
+ update_cpuid(vcpu);
+
+ return 0;
}
EXPORT_SYMBOL_GPL(kvm_set_cr4);
-void kvm_set_cr3(struct kvm_vcpu *vcpu, unsigned long cr3)
+int kvm_set_cr3(struct kvm_vcpu *vcpu, unsigned long cr3)
{
if (cr3 == vcpu->arch.cr3 && !pdptrs_changed(vcpu)) {
kvm_mmu_sync_roots(vcpu);
kvm_mmu_flush_tlb(vcpu);
- return;
+ return 0;
}
if (is_long_mode(vcpu)) {
- if (cr3 & CR3_L_MODE_RESERVED_BITS) {
- kvm_inject_gp(vcpu, 0);
- return;
- }
+ if (cr3 & CR3_L_MODE_RESERVED_BITS)
+ return 1;
} else {
if (is_pae(vcpu)) {
- if (cr3 & CR3_PAE_RESERVED_BITS) {
- kvm_inject_gp(vcpu, 0);
- return;
- }
- if (is_paging(vcpu) && !load_pdptrs(vcpu, cr3)) {
- kvm_inject_gp(vcpu, 0);
- return;
- }
+ if (cr3 & CR3_PAE_RESERVED_BITS)
+ return 1;
+ if (is_paging(vcpu) && !load_pdptrs(vcpu, cr3))
+ return 1;
}
/*
* We don't check reserved bits in nonpae mode, because
@@ -546,24 +599,28 @@ void kvm_set_cr3(struct kvm_vcpu *vcpu, unsigned long cr3)
* to debug) behavior on the guest side.
*/
if (unlikely(!gfn_to_memslot(vcpu->kvm, cr3 >> PAGE_SHIFT)))
- kvm_inject_gp(vcpu, 0);
- else {
- vcpu->arch.cr3 = cr3;
- vcpu->arch.mmu.new_cr3(vcpu);
- }
+ return 1;
+ vcpu->arch.cr3 = cr3;
+ vcpu->arch.mmu.new_cr3(vcpu);
+ return 0;
}
EXPORT_SYMBOL_GPL(kvm_set_cr3);
-void kvm_set_cr8(struct kvm_vcpu *vcpu, unsigned long cr8)
+int __kvm_set_cr8(struct kvm_vcpu *vcpu, unsigned long cr8)
{
- if (cr8 & CR8_RESERVED_BITS) {
- kvm_inject_gp(vcpu, 0);
- return;
- }
+ if (cr8 & CR8_RESERVED_BITS)
+ return 1;
if (irqchip_in_kernel(vcpu->kvm))
kvm_lapic_set_tpr(vcpu, cr8);
else
vcpu->arch.cr8 = cr8;
+ return 0;
+}
+
+void kvm_set_cr8(struct kvm_vcpu *vcpu, unsigned long cr8)
+{
+ if (__kvm_set_cr8(vcpu, cr8))
+ kvm_inject_gp(vcpu, 0);
}
EXPORT_SYMBOL_GPL(kvm_set_cr8);
@@ -576,7 +633,7 @@ unsigned long kvm_get_cr8(struct kvm_vcpu *vcpu)
}
EXPORT_SYMBOL_GPL(kvm_get_cr8);
-int kvm_set_dr(struct kvm_vcpu *vcpu, int dr, unsigned long val)
+static int __kvm_set_dr(struct kvm_vcpu *vcpu, int dr, unsigned long val)
{
switch (dr) {
case 0 ... 3:
@@ -585,29 +642,21 @@ int kvm_set_dr(struct kvm_vcpu *vcpu, int dr, unsigned long val)
vcpu->arch.eff_db[dr] = val;
break;
case 4:
- if (kvm_read_cr4_bits(vcpu, X86_CR4_DE)) {
- kvm_queue_exception(vcpu, UD_VECTOR);
- return 1;
- }
+ if (kvm_read_cr4_bits(vcpu, X86_CR4_DE))
+ return 1; /* #UD */
/* fall through */
case 6:
- if (val & 0xffffffff00000000ULL) {
- kvm_inject_gp(vcpu, 0);
- return 1;
- }
+ if (val & 0xffffffff00000000ULL)
+ return -1; /* #GP */
vcpu->arch.dr6 = (val & DR6_VOLATILE) | DR6_FIXED_1;
break;
case 5:
- if (kvm_read_cr4_bits(vcpu, X86_CR4_DE)) {
- kvm_queue_exception(vcpu, UD_VECTOR);
- return 1;
- }
+ if (kvm_read_cr4_bits(vcpu, X86_CR4_DE))
+ return 1; /* #UD */
/* fall through */
default: /* 7 */
- if (val & 0xffffffff00000000ULL) {
- kvm_inject_gp(vcpu, 0);
- return 1;
- }
+ if (val & 0xffffffff00000000ULL)
+ return -1; /* #GP */
vcpu->arch.dr7 = (val & DR7_VOLATILE) | DR7_FIXED_1;
if (!(vcpu->guest_debug & KVM_GUESTDBG_USE_HW_BP)) {
kvm_x86_ops->set_dr7(vcpu, vcpu->arch.dr7);
@@ -618,28 +667,37 @@ int kvm_set_dr(struct kvm_vcpu *vcpu, int dr, unsigned long val)
return 0;
}
+
+int kvm_set_dr(struct kvm_vcpu *vcpu, int dr, unsigned long val)
+{
+ int res;
+
+ res = __kvm_set_dr(vcpu, dr, val);
+ if (res > 0)
+ kvm_queue_exception(vcpu, UD_VECTOR);
+ else if (res < 0)
+ kvm_inject_gp(vcpu, 0);
+
+ return res;
+}
EXPORT_SYMBOL_GPL(kvm_set_dr);
-int kvm_get_dr(struct kvm_vcpu *vcpu, int dr, unsigned long *val)
+static int _kvm_get_dr(struct kvm_vcpu *vcpu, int dr, unsigned long *val)
{
switch (dr) {
case 0 ... 3:
*val = vcpu->arch.db[dr];
break;
case 4:
- if (kvm_read_cr4_bits(vcpu, X86_CR4_DE)) {
- kvm_queue_exception(vcpu, UD_VECTOR);
+ if (kvm_read_cr4_bits(vcpu, X86_CR4_DE))
return 1;
- }
/* fall through */
case 6:
*val = vcpu->arch.dr6;
break;
case 5:
- if (kvm_read_cr4_bits(vcpu, X86_CR4_DE)) {
- kvm_queue_exception(vcpu, UD_VECTOR);
+ if (kvm_read_cr4_bits(vcpu, X86_CR4_DE))
return 1;
- }
/* fall through */
default: /* 7 */
*val = vcpu->arch.dr7;
@@ -648,12 +706,16 @@ int kvm_get_dr(struct kvm_vcpu *vcpu, int dr, unsigned long *val)
return 0;
}
-EXPORT_SYMBOL_GPL(kvm_get_dr);
-static inline u32 bit(int bitno)
+int kvm_get_dr(struct kvm_vcpu *vcpu, int dr, unsigned long *val)
{
- return 1 << (bitno & 31);
+ if (_kvm_get_dr(vcpu, dr, val)) {
+ kvm_queue_exception(vcpu, UD_VECTOR);
+ return 1;
+ }
+ return 0;
}
+EXPORT_SYMBOL_GPL(kvm_get_dr);
/*
* List of msr numbers which we expose to userspace through KVM_GET_MSRS
@@ -671,7 +733,7 @@ static u32 msrs_to_save[] = {
HV_X64_MSR_GUEST_OS_ID, HV_X64_MSR_HYPERCALL,
HV_X64_MSR_APIC_ASSIST_PAGE,
MSR_IA32_SYSENTER_CS, MSR_IA32_SYSENTER_ESP, MSR_IA32_SYSENTER_EIP,
- MSR_K6_STAR,
+ MSR_STAR,
#ifdef CONFIG_X86_64
MSR_CSTAR, MSR_KERNEL_GS_BASE, MSR_SYSCALL_MASK, MSR_LSTAR,
#endif
@@ -682,10 +744,14 @@ static unsigned num_msrs_to_save;
static u32 emulated_msrs[] = {
MSR_IA32_MISC_ENABLE,
+ MSR_IA32_MCG_STATUS,
+ MSR_IA32_MCG_CTL,
};
static int set_efer(struct kvm_vcpu *vcpu, u64 efer)
{
+ u64 old_efer = vcpu->arch.efer;
+
if (efer & efer_reserved_bits)
return 1;
@@ -714,11 +780,13 @@ static int set_efer(struct kvm_vcpu *vcpu, u64 efer)
kvm_x86_ops->set_efer(vcpu, efer);
- vcpu->arch.efer = efer;
-
vcpu->arch.mmu.base_role.nxe = (efer & EFER_NX) && !tdp_enabled;
kvm_mmu_reset_context(vcpu);
+ /* Update reserved bits */
+ if ((efer ^ old_efer) & EFER_NX)
+ kvm_mmu_reset_context(vcpu);
+
return 0;
}
@@ -882,7 +950,7 @@ static int kvm_request_guest_time_update(struct kvm_vcpu *v)
if (!vcpu->time_page)
return 0;
- set_bit(KVM_REQ_KVMCLOCK_UPDATE, &v->requests);
+ kvm_make_request(KVM_REQ_KVMCLOCK_UPDATE, v);
return 1;
}
@@ -1524,16 +1592,12 @@ static int __msr_io(struct kvm_vcpu *vcpu, struct kvm_msrs *msrs,
{
int i, idx;
- vcpu_load(vcpu);
-
idx = srcu_read_lock(&vcpu->kvm->srcu);
for (i = 0; i < msrs->nmsrs; ++i)
if (do_msr(vcpu, entries[i].index, &entries[i].data))
break;
srcu_read_unlock(&vcpu->kvm->srcu, idx);
- vcpu_put(vcpu);
-
return i;
}
@@ -1618,6 +1682,7 @@ int kvm_dev_ioctl_check_extension(long ext)
case KVM_CAP_PCI_SEGMENT:
case KVM_CAP_DEBUGREGS:
case KVM_CAP_X86_ROBUST_SINGLESTEP:
+ case KVM_CAP_XSAVE:
r = 1;
break;
case KVM_CAP_COALESCED_MMIO:
@@ -1641,6 +1706,9 @@ int kvm_dev_ioctl_check_extension(long ext)
case KVM_CAP_MCE:
r = KVM_MAX_MCE_BANKS;
break;
+ case KVM_CAP_XCRS:
+ r = cpu_has_xsave;
+ break;
default:
r = 0;
break;
@@ -1717,8 +1785,28 @@ out:
return r;
}
+static void wbinvd_ipi(void *garbage)
+{
+ wbinvd();
+}
+
+static bool need_emulate_wbinvd(struct kvm_vcpu *vcpu)
+{
+ return vcpu->kvm->arch.iommu_domain &&
+ !(vcpu->kvm->arch.iommu_flags & KVM_IOMMU_CACHE_COHERENCY);
+}
+
void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
{
+ /* Address WBINVD may be executed by guest */
+ if (need_emulate_wbinvd(vcpu)) {
+ if (kvm_x86_ops->has_wbinvd_exit())
+ cpumask_set_cpu(cpu, vcpu->arch.wbinvd_dirty_mask);
+ else if (vcpu->cpu != -1 && vcpu->cpu != cpu)
+ smp_call_function_single(vcpu->cpu,
+ wbinvd_ipi, NULL, 1);
+ }
+
kvm_x86_ops->vcpu_load(vcpu, cpu);
if (unlikely(per_cpu(cpu_tsc_khz, cpu) == 0)) {
unsigned long khz = cpufreq_quick_get(cpu);
@@ -1731,8 +1819,8 @@ void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
void kvm_arch_vcpu_put(struct kvm_vcpu *vcpu)
{
- kvm_put_guest_fpu(vcpu);
kvm_x86_ops->vcpu_put(vcpu);
+ kvm_put_guest_fpu(vcpu);
}
static int is_efer_nx(void)
@@ -1781,7 +1869,6 @@ static int kvm_vcpu_ioctl_set_cpuid(struct kvm_vcpu *vcpu,
if (copy_from_user(cpuid_entries, entries,
cpuid->nent * sizeof(struct kvm_cpuid_entry)))
goto out_free;
- vcpu_load(vcpu);
for (i = 0; i < cpuid->nent; i++) {
vcpu->arch.cpuid_entries[i].function = cpuid_entries[i].function;
vcpu->arch.cpuid_entries[i].eax = cpuid_entries[i].eax;
@@ -1799,7 +1886,7 @@ static int kvm_vcpu_ioctl_set_cpuid(struct kvm_vcpu *vcpu,
r = 0;
kvm_apic_set_version(vcpu);
kvm_x86_ops->cpuid_update(vcpu);
- vcpu_put(vcpu);
+ update_cpuid(vcpu);
out_free:
vfree(cpuid_entries);
@@ -1820,11 +1907,10 @@ static int kvm_vcpu_ioctl_set_cpuid2(struct kvm_vcpu *vcpu,
if (copy_from_user(&vcpu->arch.cpuid_entries, entries,
cpuid->nent * sizeof(struct kvm_cpuid_entry2)))
goto out;
- vcpu_load(vcpu);
vcpu->arch.cpuid_nent = cpuid->nent;
kvm_apic_set_version(vcpu);
kvm_x86_ops->cpuid_update(vcpu);
- vcpu_put(vcpu);
+ update_cpuid(vcpu);
return 0;
out:
@@ -1837,7 +1923,6 @@ static int kvm_vcpu_ioctl_get_cpuid2(struct kvm_vcpu *vcpu,
{
int r;
- vcpu_load(vcpu);
r = -E2BIG;
if (cpuid->nent < vcpu->arch.cpuid_nent)
goto out;
@@ -1849,7 +1934,6 @@ static int kvm_vcpu_ioctl_get_cpuid2(struct kvm_vcpu *vcpu,
out:
cpuid->nent = vcpu->arch.cpuid_nent;
- vcpu_put(vcpu);
return r;
}
@@ -1901,13 +1985,13 @@ static void do_cpuid_ent(struct kvm_cpuid_entry2 *entry, u32 function,
0 /* Reserved */ | f_lm | F(3DNOWEXT) | F(3DNOW);
/* cpuid 1.ecx */
const u32 kvm_supported_word4_x86_features =
- F(XMM3) | 0 /* Reserved, DTES64, MONITOR */ |
+ F(XMM3) | F(PCLMULQDQ) | 0 /* DTES64, MONITOR */ |
0 /* DS-CPL, VMX, SMX, EST */ |
0 /* TM2 */ | F(SSSE3) | 0 /* CNXT-ID */ | 0 /* Reserved */ |
0 /* Reserved */ | F(CX16) | 0 /* xTPR Update, PDCM */ |
0 /* Reserved, DCA */ | F(XMM4_1) |
F(XMM4_2) | F(X2APIC) | F(MOVBE) | F(POPCNT) |
- 0 /* Reserved, XSAVE, OSXSAVE */;
+ 0 /* Reserved, AES */ | F(XSAVE) | 0 /* OSXSAVE */ | F(AVX);
/* cpuid 0x80000001.ecx */
const u32 kvm_supported_word6_x86_features =
F(LAHF_LM) | F(CMP_LEGACY) | F(SVM) | 0 /* ExtApicSpace */ |
@@ -1922,7 +2006,7 @@ static void do_cpuid_ent(struct kvm_cpuid_entry2 *entry, u32 function,
switch (function) {
case 0:
- entry->eax = min(entry->eax, (u32)0xb);
+ entry->eax = min(entry->eax, (u32)0xd);
break;
case 1:
entry->edx &= kvm_supported_word0_x86_features;
@@ -1980,6 +2064,20 @@ static void do_cpuid_ent(struct kvm_cpuid_entry2 *entry, u32 function,
}
break;
}
+ case 0xd: {
+ int i;
+
+ entry->flags |= KVM_CPUID_FLAG_SIGNIFCANT_INDEX;
+ for (i = 1; *nent < maxnent; ++i) {
+ if (entry[i - 1].eax == 0 && i != 2)
+ break;
+ do_cpuid_1_ent(&entry[i], function, i);
+ entry[i].flags |=
+ KVM_CPUID_FLAG_SIGNIFCANT_INDEX;
+ ++*nent;
+ }
+ break;
+ }
case KVM_CPUID_SIGNATURE: {
char signature[12] = "KVMKVMKVM\0\0";
u32 *sigptr = (u32 *)signature;
@@ -2081,9 +2179,7 @@ out:
static int kvm_vcpu_ioctl_get_lapic(struct kvm_vcpu *vcpu,
struct kvm_lapic_state *s)
{
- vcpu_load(vcpu);
memcpy(s->regs, vcpu->arch.apic->regs, sizeof *s);
- vcpu_put(vcpu);
return 0;
}
@@ -2091,11 +2187,9 @@ static int kvm_vcpu_ioctl_get_lapic(struct kvm_vcpu *vcpu,
static int kvm_vcpu_ioctl_set_lapic(struct kvm_vcpu *vcpu,
struct kvm_lapic_state *s)
{
- vcpu_load(vcpu);
memcpy(vcpu->arch.apic->regs, s->regs, sizeof *s);
kvm_apic_post_state_restore(vcpu);
update_cr8_intercept(vcpu);
- vcpu_put(vcpu);
return 0;
}
@@ -2107,20 +2201,15 @@ static int kvm_vcpu_ioctl_interrupt(struct kvm_vcpu *vcpu,
return -EINVAL;
if (irqchip_in_kernel(vcpu->kvm))
return -ENXIO;
- vcpu_load(vcpu);
kvm_queue_interrupt(vcpu, irq->irq, false);
- vcpu_put(vcpu);
-
return 0;
}
static int kvm_vcpu_ioctl_nmi(struct kvm_vcpu *vcpu)
{
- vcpu_load(vcpu);
kvm_inject_nmi(vcpu);
- vcpu_put(vcpu);
return 0;
}
@@ -2140,7 +2229,6 @@ static int kvm_vcpu_ioctl_x86_setup_mce(struct kvm_vcpu *vcpu,
int r;
unsigned bank_num = mcg_cap & 0xff, bank;
- vcpu_load(vcpu);
r = -EINVAL;
if (!bank_num || bank_num >= KVM_MAX_MCE_BANKS)
goto out;
@@ -2155,7 +2243,6 @@ static int kvm_vcpu_ioctl_x86_setup_mce(struct kvm_vcpu *vcpu,
for (bank = 0; bank < bank_num; bank++)
vcpu->arch.mce_banks[bank*4] = ~(u64)0;
out:
- vcpu_put(vcpu);
return r;
}
@@ -2188,7 +2275,7 @@ static int kvm_vcpu_ioctl_x86_set_mce(struct kvm_vcpu *vcpu,
printk(KERN_DEBUG "kvm: set_mce: "
"injects mce exception while "
"previous one is in progress!\n");
- set_bit(KVM_REQ_TRIPLE_FAULT, &vcpu->requests);
+ kvm_make_request(KVM_REQ_TRIPLE_FAULT, vcpu);
return 0;
}
if (banks[1] & MCI_STATUS_VAL)
@@ -2213,8 +2300,6 @@ static int kvm_vcpu_ioctl_x86_set_mce(struct kvm_vcpu *vcpu,
static void kvm_vcpu_ioctl_x86_get_vcpu_events(struct kvm_vcpu *vcpu,
struct kvm_vcpu_events *events)
{
- vcpu_load(vcpu);
-
events->exception.injected =
vcpu->arch.exception.pending &&
!kvm_exception_is_soft(vcpu->arch.exception.nr);
@@ -2239,8 +2324,6 @@ static void kvm_vcpu_ioctl_x86_get_vcpu_events(struct kvm_vcpu *vcpu,
events->flags = (KVM_VCPUEVENT_VALID_NMI_PENDING
| KVM_VCPUEVENT_VALID_SIPI_VECTOR
| KVM_VCPUEVENT_VALID_SHADOW);
-
- vcpu_put(vcpu);
}
static int kvm_vcpu_ioctl_x86_set_vcpu_events(struct kvm_vcpu *vcpu,
@@ -2251,8 +2334,6 @@ static int kvm_vcpu_ioctl_x86_set_vcpu_events(struct kvm_vcpu *vcpu,
| KVM_VCPUEVENT_VALID_SHADOW))
return -EINVAL;
- vcpu_load(vcpu);
-
vcpu->arch.exception.pending = events->exception.injected;
vcpu->arch.exception.nr = events->exception.nr;
vcpu->arch.exception.has_error_code = events->exception.has_error_code;
@@ -2275,22 +2356,16 @@ static int kvm_vcpu_ioctl_x86_set_vcpu_events(struct kvm_vcpu *vcpu,
if (events->flags & KVM_VCPUEVENT_VALID_SIPI_VECTOR)
vcpu->arch.sipi_vector = events->sipi_vector;
- vcpu_put(vcpu);
-
return 0;
}
static void kvm_vcpu_ioctl_x86_get_debugregs(struct kvm_vcpu *vcpu,
struct kvm_debugregs *dbgregs)
{
- vcpu_load(vcpu);
-
memcpy(dbgregs->db, vcpu->arch.db, sizeof(vcpu->arch.db));
dbgregs->dr6 = vcpu->arch.dr6;
dbgregs->dr7 = vcpu->arch.dr7;
dbgregs->flags = 0;
-
- vcpu_put(vcpu);
}
static int kvm_vcpu_ioctl_x86_set_debugregs(struct kvm_vcpu *vcpu,
@@ -2299,40 +2374,113 @@ static int kvm_vcpu_ioctl_x86_set_debugregs(struct kvm_vcpu *vcpu,
if (dbgregs->flags)
return -EINVAL;
- vcpu_load(vcpu);
-
memcpy(vcpu->arch.db, dbgregs->db, sizeof(vcpu->arch.db));
vcpu->arch.dr6 = dbgregs->dr6;
vcpu->arch.dr7 = dbgregs->dr7;
- vcpu_put(vcpu);
+ return 0;
+}
+
+static void kvm_vcpu_ioctl_x86_get_xsave(struct kvm_vcpu *vcpu,
+ struct kvm_xsave *guest_xsave)
+{
+ if (cpu_has_xsave)
+ memcpy(guest_xsave->region,
+ &vcpu->arch.guest_fpu.state->xsave,
+ sizeof(struct xsave_struct));
+ else {
+ memcpy(guest_xsave->region,
+ &vcpu->arch.guest_fpu.state->fxsave,
+ sizeof(struct i387_fxsave_struct));
+ *(u64 *)&guest_xsave->region[XSAVE_HDR_OFFSET / sizeof(u32)] =
+ XSTATE_FPSSE;
+ }
+}
+
+static int kvm_vcpu_ioctl_x86_set_xsave(struct kvm_vcpu *vcpu,
+ struct kvm_xsave *guest_xsave)
+{
+ u64 xstate_bv =
+ *(u64 *)&guest_xsave->region[XSAVE_HDR_OFFSET / sizeof(u32)];
+ if (cpu_has_xsave)
+ memcpy(&vcpu->arch.guest_fpu.state->xsave,
+ guest_xsave->region, sizeof(struct xsave_struct));
+ else {
+ if (xstate_bv & ~XSTATE_FPSSE)
+ return -EINVAL;
+ memcpy(&vcpu->arch.guest_fpu.state->fxsave,
+ guest_xsave->region, sizeof(struct i387_fxsave_struct));
+ }
return 0;
}
+static void kvm_vcpu_ioctl_x86_get_xcrs(struct kvm_vcpu *vcpu,
+ struct kvm_xcrs *guest_xcrs)
+{
+ if (!cpu_has_xsave) {
+ guest_xcrs->nr_xcrs = 0;
+ return;
+ }
+
+ guest_xcrs->nr_xcrs = 1;
+ guest_xcrs->flags = 0;
+ guest_xcrs->xcrs[0].xcr = XCR_XFEATURE_ENABLED_MASK;
+ guest_xcrs->xcrs[0].value = vcpu->arch.xcr0;
+}
+
+static int kvm_vcpu_ioctl_x86_set_xcrs(struct kvm_vcpu *vcpu,
+ struct kvm_xcrs *guest_xcrs)
+{
+ int i, r = 0;
+
+ if (!cpu_has_xsave)
+ return -EINVAL;
+
+ if (guest_xcrs->nr_xcrs > KVM_MAX_XCRS || guest_xcrs->flags)
+ return -EINVAL;
+
+ for (i = 0; i < guest_xcrs->nr_xcrs; i++)
+ /* Only support XCR0 currently */
+ if (guest_xcrs->xcrs[0].xcr == XCR_XFEATURE_ENABLED_MASK) {
+ r = __kvm_set_xcr(vcpu, XCR_XFEATURE_ENABLED_MASK,
+ guest_xcrs->xcrs[0].value);
+ break;
+ }
+ if (r)
+ r = -EINVAL;
+ return r;
+}
+
long kvm_arch_vcpu_ioctl(struct file *filp,
unsigned int ioctl, unsigned long arg)
{
struct kvm_vcpu *vcpu = filp->private_data;
void __user *argp = (void __user *)arg;
int r;
- struct kvm_lapic_state *lapic = NULL;
+ union {
+ struct kvm_lapic_state *lapic;
+ struct kvm_xsave *xsave;
+ struct kvm_xcrs *xcrs;
+ void *buffer;
+ } u;
+ u.buffer = NULL;
switch (ioctl) {
case KVM_GET_LAPIC: {
r = -EINVAL;
if (!vcpu->arch.apic)
goto out;
- lapic = kzalloc(sizeof(struct kvm_lapic_state), GFP_KERNEL);
+ u.lapic = kzalloc(sizeof(struct kvm_lapic_state), GFP_KERNEL);
r = -ENOMEM;
- if (!lapic)
+ if (!u.lapic)
goto out;
- r = kvm_vcpu_ioctl_get_lapic(vcpu, lapic);
+ r = kvm_vcpu_ioctl_get_lapic(vcpu, u.lapic);
if (r)
goto out;
r = -EFAULT;
- if (copy_to_user(argp, lapic, sizeof(struct kvm_lapic_state)))
+ if (copy_to_user(argp, u.lapic, sizeof(struct kvm_lapic_state)))
goto out;
r = 0;
break;
@@ -2341,14 +2489,14 @@ long kvm_arch_vcpu_ioctl(struct file *filp,
r = -EINVAL;
if (!vcpu->arch.apic)
goto out;
- lapic = kmalloc(sizeof(struct kvm_lapic_state), GFP_KERNEL);
+ u.lapic = kmalloc(sizeof(struct kvm_lapic_state), GFP_KERNEL);
r = -ENOMEM;
- if (!lapic)
+ if (!u.lapic)
goto out;
r = -EFAULT;
- if (copy_from_user(lapic, argp, sizeof(struct kvm_lapic_state)))
+ if (copy_from_user(u.lapic, argp, sizeof(struct kvm_lapic_state)))
goto out;
- r = kvm_vcpu_ioctl_set_lapic(vcpu, lapic);
+ r = kvm_vcpu_ioctl_set_lapic(vcpu, u.lapic);
if (r)
goto out;
r = 0;
@@ -2464,9 +2612,7 @@ long kvm_arch_vcpu_ioctl(struct file *filp,
r = -EFAULT;
if (copy_from_user(&mce, argp, sizeof mce))
goto out;
- vcpu_load(vcpu);
r = kvm_vcpu_ioctl_x86_set_mce(vcpu, &mce);
- vcpu_put(vcpu);
break;
}
case KVM_GET_VCPU_EVENTS: {
@@ -2513,11 +2659,67 @@ long kvm_arch_vcpu_ioctl(struct file *filp,
r = kvm_vcpu_ioctl_x86_set_debugregs(vcpu, &dbgregs);
break;
}
+ case KVM_GET_XSAVE: {
+ u.xsave = kzalloc(sizeof(struct kvm_xsave), GFP_KERNEL);
+ r = -ENOMEM;
+ if (!u.xsave)
+ break;
+
+ kvm_vcpu_ioctl_x86_get_xsave(vcpu, u.xsave);
+
+ r = -EFAULT;
+ if (copy_to_user(argp, u.xsave, sizeof(struct kvm_xsave)))
+ break;
+ r = 0;
+ break;
+ }
+ case KVM_SET_XSAVE: {
+ u.xsave = kzalloc(sizeof(struct kvm_xsave), GFP_KERNEL);
+ r = -ENOMEM;
+ if (!u.xsave)
+ break;
+
+ r = -EFAULT;
+ if (copy_from_user(u.xsave, argp, sizeof(struct kvm_xsave)))
+ break;
+
+ r = kvm_vcpu_ioctl_x86_set_xsave(vcpu, u.xsave);
+ break;
+ }
+ case KVM_GET_XCRS: {
+ u.xcrs = kzalloc(sizeof(struct kvm_xcrs), GFP_KERNEL);
+ r = -ENOMEM;
+ if (!u.xcrs)
+ break;
+
+ kvm_vcpu_ioctl_x86_get_xcrs(vcpu, u.xcrs);
+
+ r = -EFAULT;
+ if (copy_to_user(argp, u.xcrs,
+ sizeof(struct kvm_xcrs)))
+ break;
+ r = 0;
+ break;
+ }
+ case KVM_SET_XCRS: {
+ u.xcrs = kzalloc(sizeof(struct kvm_xcrs), GFP_KERNEL);
+ r = -ENOMEM;
+ if (!u.xcrs)
+ break;
+
+ r = -EFAULT;
+ if (copy_from_user(u.xcrs, argp,
+ sizeof(struct kvm_xcrs)))
+ break;
+
+ r = kvm_vcpu_ioctl_x86_set_xcrs(vcpu, u.xcrs);
+ break;
+ }
default:
r = -EINVAL;
}
out:
- kfree(lapic);
+ kfree(u.buffer);
return r;
}
@@ -2560,115 +2762,6 @@ static int kvm_vm_ioctl_get_nr_mmu_pages(struct kvm *kvm)
return kvm->arch.n_alloc_mmu_pages;
}
-gfn_t unalias_gfn_instantiation(struct kvm *kvm, gfn_t gfn)
-{
- int i;
- struct kvm_mem_alias *alias;
- struct kvm_mem_aliases *aliases;
-
- aliases = kvm_aliases(kvm);
-
- for (i = 0; i < aliases->naliases; ++i) {
- alias = &aliases->aliases[i];
- if (alias->flags & KVM_ALIAS_INVALID)
- continue;
- if (gfn >= alias->base_gfn
- && gfn < alias->base_gfn + alias->npages)
- return alias->target_gfn + gfn - alias->base_gfn;
- }
- return gfn;
-}
-
-gfn_t unalias_gfn(struct kvm *kvm, gfn_t gfn)
-{
- int i;
- struct kvm_mem_alias *alias;
- struct kvm_mem_aliases *aliases;
-
- aliases = kvm_aliases(kvm);
-
- for (i = 0; i < aliases->naliases; ++i) {
- alias = &aliases->aliases[i];
- if (gfn >= alias->base_gfn
- && gfn < alias->base_gfn + alias->npages)
- return alias->target_gfn + gfn - alias->base_gfn;
- }
- return gfn;
-}
-
-/*
- * Set a new alias region. Aliases map a portion of physical memory into
- * another portion. This is useful for memory windows, for example the PC
- * VGA region.
- */
-static int kvm_vm_ioctl_set_memory_alias(struct kvm *kvm,
- struct kvm_memory_alias *alias)
-{
- int r, n;
- struct kvm_mem_alias *p;
- struct kvm_mem_aliases *aliases, *old_aliases;
-
- r = -EINVAL;
- /* General sanity checks */
- if (alias->memory_size & (PAGE_SIZE - 1))
- goto out;
- if (alias->guest_phys_addr & (PAGE_SIZE - 1))
- goto out;
- if (alias->slot >= KVM_ALIAS_SLOTS)
- goto out;
- if (alias->guest_phys_addr + alias->memory_size
- < alias->guest_phys_addr)
- goto out;
- if (alias->target_phys_addr + alias->memory_size
- < alias->target_phys_addr)
- goto out;
-
- r = -ENOMEM;
- aliases = kzalloc(sizeof(struct kvm_mem_aliases), GFP_KERNEL);
- if (!aliases)
- goto out;
-
- mutex_lock(&kvm->slots_lock);
-
- /* invalidate any gfn reference in case of deletion/shrinking */
- memcpy(aliases, kvm->arch.aliases, sizeof(struct kvm_mem_aliases));
- aliases->aliases[alias->slot].flags |= KVM_ALIAS_INVALID;
- old_aliases = kvm->arch.aliases;
- rcu_assign_pointer(kvm->arch.aliases, aliases);
- synchronize_srcu_expedited(&kvm->srcu);
- kvm_mmu_zap_all(kvm);
- kfree(old_aliases);
-
- r = -ENOMEM;
- aliases = kzalloc(sizeof(struct kvm_mem_aliases), GFP_KERNEL);
- if (!aliases)
- goto out_unlock;
-
- memcpy(aliases, kvm->arch.aliases, sizeof(struct kvm_mem_aliases));
-
- p = &aliases->aliases[alias->slot];
- p->base_gfn = alias->guest_phys_addr >> PAGE_SHIFT;
- p->npages = alias->memory_size >> PAGE_SHIFT;
- p->target_gfn = alias->target_phys_addr >> PAGE_SHIFT;
- p->flags &= ~(KVM_ALIAS_INVALID);
-
- for (n = KVM_ALIAS_SLOTS; n > 0; --n)
- if (aliases->aliases[n - 1].npages)
- break;
- aliases->naliases = n;
-
- old_aliases = kvm->arch.aliases;
- rcu_assign_pointer(kvm->arch.aliases, aliases);
- synchronize_srcu_expedited(&kvm->srcu);
- kfree(old_aliases);
- r = 0;
-
-out_unlock:
- mutex_unlock(&kvm->slots_lock);
-out:
- return r;
-}
-
static int kvm_vm_ioctl_get_irqchip(struct kvm *kvm, struct kvm_irqchip *chip)
{
int r;
@@ -2797,7 +2890,6 @@ int kvm_vm_ioctl_get_dirty_log(struct kvm *kvm,
struct kvm_memory_slot *memslot;
unsigned long n;
unsigned long is_dirty = 0;
- unsigned long *dirty_bitmap = NULL;
mutex_lock(&kvm->slots_lock);
@@ -2812,27 +2904,30 @@ int kvm_vm_ioctl_get_dirty_log(struct kvm *kvm,
n = kvm_dirty_bitmap_bytes(memslot);
- r = -ENOMEM;
- dirty_bitmap = vmalloc(n);
- if (!dirty_bitmap)
- goto out;
- memset(dirty_bitmap, 0, n);
-
for (i = 0; !is_dirty && i < n/sizeof(long); i++)
is_dirty = memslot->dirty_bitmap[i];
/* If nothing is dirty, don't bother messing with page tables. */
if (is_dirty) {
struct kvm_memslots *slots, *old_slots;
+ unsigned long *dirty_bitmap;
spin_lock(&kvm->mmu_lock);
kvm_mmu_slot_remove_write_access(kvm, log->slot);
spin_unlock(&kvm->mmu_lock);
- slots = kzalloc(sizeof(struct kvm_memslots), GFP_KERNEL);
- if (!slots)
- goto out_free;
+ r = -ENOMEM;
+ dirty_bitmap = vmalloc(n);
+ if (!dirty_bitmap)
+ goto out;
+ memset(dirty_bitmap, 0, n);
+ r = -ENOMEM;
+ slots = kzalloc(sizeof(struct kvm_memslots), GFP_KERNEL);
+ if (!slots) {
+ vfree(dirty_bitmap);
+ goto out;
+ }
memcpy(slots, kvm->memslots, sizeof(struct kvm_memslots));
slots->memslots[log->slot].dirty_bitmap = dirty_bitmap;
@@ -2841,13 +2936,20 @@ int kvm_vm_ioctl_get_dirty_log(struct kvm *kvm,
synchronize_srcu_expedited(&kvm->srcu);
dirty_bitmap = old_slots->memslots[log->slot].dirty_bitmap;
kfree(old_slots);
+
+ r = -EFAULT;
+ if (copy_to_user(log->dirty_bitmap, dirty_bitmap, n)) {
+ vfree(dirty_bitmap);
+ goto out;
+ }
+ vfree(dirty_bitmap);
+ } else {
+ r = -EFAULT;
+ if (clear_user(log->dirty_bitmap, n))
+ goto out;
}
r = 0;
- if (copy_to_user(log->dirty_bitmap, dirty_bitmap, n))
- r = -EFAULT;
-out_free:
- vfree(dirty_bitmap);
out:
mutex_unlock(&kvm->slots_lock);
return r;
@@ -2867,7 +2969,6 @@ long kvm_arch_vm_ioctl(struct file *filp,
union {
struct kvm_pit_state ps;
struct kvm_pit_state2 ps2;
- struct kvm_memory_alias alias;
struct kvm_pit_config pit_config;
} u;
@@ -2888,22 +2989,6 @@ long kvm_arch_vm_ioctl(struct file *filp,
goto out;
break;
}
- case KVM_SET_MEMORY_REGION: {
- struct kvm_memory_region kvm_mem;
- struct kvm_userspace_memory_region kvm_userspace_mem;
-
- r = -EFAULT;
- if (copy_from_user(&kvm_mem, argp, sizeof kvm_mem))
- goto out;
- kvm_userspace_mem.slot = kvm_mem.slot;
- kvm_userspace_mem.flags = kvm_mem.flags;
- kvm_userspace_mem.guest_phys_addr = kvm_mem.guest_phys_addr;
- kvm_userspace_mem.memory_size = kvm_mem.memory_size;
- r = kvm_vm_ioctl_set_memory_region(kvm, &kvm_userspace_mem, 0);
- if (r)
- goto out;
- break;
- }
case KVM_SET_NR_MMU_PAGES:
r = kvm_vm_ioctl_set_nr_mmu_pages(kvm, arg);
if (r)
@@ -2912,14 +2997,6 @@ long kvm_arch_vm_ioctl(struct file *filp,
case KVM_GET_NR_MMU_PAGES:
r = kvm_vm_ioctl_get_nr_mmu_pages(kvm);
break;
- case KVM_SET_MEMORY_ALIAS:
- r = -EFAULT;
- if (copy_from_user(&u.alias, argp, sizeof(struct kvm_memory_alias)))
- goto out;
- r = kvm_vm_ioctl_set_memory_alias(kvm, &u.alias);
- if (r)
- goto out;
- break;
case KVM_CREATE_IRQCHIP: {
struct kvm_pic *vpic;
@@ -3259,7 +3336,7 @@ static int kvm_read_guest_virt_helper(gva_t addr, void *val, unsigned int bytes,
}
ret = kvm_read_guest(vcpu->kvm, gpa, data, toread);
if (ret < 0) {
- r = X86EMUL_UNHANDLEABLE;
+ r = X86EMUL_IO_NEEDED;
goto out;
}
@@ -3315,7 +3392,7 @@ static int kvm_write_guest_virt_system(gva_t addr, void *val,
}
ret = kvm_write_guest(vcpu->kvm, gpa, data, towrite);
if (ret < 0) {
- r = X86EMUL_UNHANDLEABLE;
+ r = X86EMUL_IO_NEEDED;
goto out;
}
@@ -3330,10 +3407,10 @@ out:
static int emulator_read_emulated(unsigned long addr,
void *val,
unsigned int bytes,
+ unsigned int *error_code,
struct kvm_vcpu *vcpu)
{
gpa_t gpa;
- u32 error_code;
if (vcpu->mmio_read_completed) {
memcpy(val, vcpu->mmio_data, bytes);
@@ -3343,12 +3420,10 @@ static int emulator_read_emulated(unsigned long addr,
return X86EMUL_CONTINUE;
}
- gpa = kvm_mmu_gva_to_gpa_read(vcpu, addr, &error_code);
+ gpa = kvm_mmu_gva_to_gpa_read(vcpu, addr, error_code);
- if (gpa == UNMAPPED_GVA) {
- kvm_inject_page_fault(vcpu, addr, error_code);
+ if (gpa == UNMAPPED_GVA)
return X86EMUL_PROPAGATE_FAULT;
- }
/* For APIC access vmexit */
if ((gpa & PAGE_MASK) == APIC_DEFAULT_PHYS_BASE)
@@ -3370,11 +3445,12 @@ mmio:
trace_kvm_mmio(KVM_TRACE_MMIO_READ_UNSATISFIED, bytes, gpa, 0);
vcpu->mmio_needed = 1;
- vcpu->mmio_phys_addr = gpa;
- vcpu->mmio_size = bytes;
- vcpu->mmio_is_write = 0;
+ vcpu->run->exit_reason = KVM_EXIT_MMIO;
+ vcpu->run->mmio.phys_addr = vcpu->mmio_phys_addr = gpa;
+ vcpu->run->mmio.len = vcpu->mmio_size = bytes;
+ vcpu->run->mmio.is_write = vcpu->mmio_is_write = 0;
- return X86EMUL_UNHANDLEABLE;
+ return X86EMUL_IO_NEEDED;
}
int emulator_write_phys(struct kvm_vcpu *vcpu, gpa_t gpa,
@@ -3392,17 +3468,15 @@ int emulator_write_phys(struct kvm_vcpu *vcpu, gpa_t gpa,
static int emulator_write_emulated_onepage(unsigned long addr,
const void *val,
unsigned int bytes,
+ unsigned int *error_code,
struct kvm_vcpu *vcpu)
{
gpa_t gpa;
- u32 error_code;
- gpa = kvm_mmu_gva_to_gpa_write(vcpu, addr, &error_code);
+ gpa = kvm_mmu_gva_to_gpa_write(vcpu, addr, error_code);
- if (gpa == UNMAPPED_GVA) {
- kvm_inject_page_fault(vcpu, addr, error_code);
+ if (gpa == UNMAPPED_GVA)
return X86EMUL_PROPAGATE_FAULT;
- }
/* For APIC access vmexit */
if ((gpa & PAGE_MASK) == APIC_DEFAULT_PHYS_BASE)
@@ -3420,10 +3494,11 @@ mmio:
return X86EMUL_CONTINUE;
vcpu->mmio_needed = 1;
- vcpu->mmio_phys_addr = gpa;
- vcpu->mmio_size = bytes;
- vcpu->mmio_is_write = 1;
- memcpy(vcpu->mmio_data, val, bytes);
+ vcpu->run->exit_reason = KVM_EXIT_MMIO;
+ vcpu->run->mmio.phys_addr = vcpu->mmio_phys_addr = gpa;
+ vcpu->run->mmio.len = vcpu->mmio_size = bytes;
+ vcpu->run->mmio.is_write = vcpu->mmio_is_write = 1;
+ memcpy(vcpu->run->mmio.data, val, bytes);
return X86EMUL_CONTINUE;
}
@@ -3431,6 +3506,7 @@ mmio:
int emulator_write_emulated(unsigned long addr,
const void *val,
unsigned int bytes,
+ unsigned int *error_code,
struct kvm_vcpu *vcpu)
{
/* Crossing a page boundary? */
@@ -3438,16 +3514,17 @@ int emulator_write_emulated(unsigned long addr,
int rc, now;
now = -addr & ~PAGE_MASK;
- rc = emulator_write_emulated_onepage(addr, val, now, vcpu);
+ rc = emulator_write_emulated_onepage(addr, val, now, error_code,
+ vcpu);
if (rc != X86EMUL_CONTINUE)
return rc;
addr += now;
val += now;
bytes -= now;
}
- return emulator_write_emulated_onepage(addr, val, bytes, vcpu);
+ return emulator_write_emulated_onepage(addr, val, bytes, error_code,
+ vcpu);
}
-EXPORT_SYMBOL_GPL(emulator_write_emulated);
#define CMPXCHG_TYPE(t, ptr, old, new) \
(cmpxchg((t *)(ptr), *(t *)(old), *(t *)(new)) == *(t *)(old))
@@ -3463,6 +3540,7 @@ static int emulator_cmpxchg_emulated(unsigned long addr,
const void *old,
const void *new,
unsigned int bytes,
+ unsigned int *error_code,
struct kvm_vcpu *vcpu)
{
gpa_t gpa;
@@ -3484,6 +3562,10 @@ static int emulator_cmpxchg_emulated(unsigned long addr,
goto emul_write;
page = gfn_to_page(vcpu->kvm, gpa >> PAGE_SHIFT);
+ if (is_error_page(page)) {
+ kvm_release_page_clean(page);
+ goto emul_write;
+ }
kaddr = kmap_atomic(page, KM_USER0);
kaddr += offset_in_page(gpa);
@@ -3516,7 +3598,7 @@ static int emulator_cmpxchg_emulated(unsigned long addr,
emul_write:
printk_once(KERN_WARNING "kvm: emulating exchange as write\n");
- return emulator_write_emulated(addr, new, bytes, vcpu);
+ return emulator_write_emulated(addr, new, bytes, error_code, vcpu);
}
static int kernel_pio(struct kvm_vcpu *vcpu, void *pd)
@@ -3604,42 +3686,38 @@ int emulate_invlpg(struct kvm_vcpu *vcpu, gva_t address)
return X86EMUL_CONTINUE;
}
-int emulate_clts(struct kvm_vcpu *vcpu)
+int kvm_emulate_wbinvd(struct kvm_vcpu *vcpu)
{
- kvm_x86_ops->set_cr0(vcpu, kvm_read_cr0_bits(vcpu, ~X86_CR0_TS));
- kvm_x86_ops->fpu_activate(vcpu);
+ if (!need_emulate_wbinvd(vcpu))
+ return X86EMUL_CONTINUE;
+
+ if (kvm_x86_ops->has_wbinvd_exit()) {
+ smp_call_function_many(vcpu->arch.wbinvd_dirty_mask,
+ wbinvd_ipi, NULL, 1);
+ cpumask_clear(vcpu->arch.wbinvd_dirty_mask);
+ }
+ wbinvd();
return X86EMUL_CONTINUE;
}
+EXPORT_SYMBOL_GPL(kvm_emulate_wbinvd);
-int emulator_get_dr(struct x86_emulate_ctxt *ctxt, int dr, unsigned long *dest)
+int emulate_clts(struct kvm_vcpu *vcpu)
{
- return kvm_get_dr(ctxt->vcpu, dr, dest);
+ kvm_x86_ops->set_cr0(vcpu, kvm_read_cr0_bits(vcpu, ~X86_CR0_TS));
+ kvm_x86_ops->fpu_activate(vcpu);
+ return X86EMUL_CONTINUE;
}
-int emulator_set_dr(struct x86_emulate_ctxt *ctxt, int dr, unsigned long value)
+int emulator_get_dr(int dr, unsigned long *dest, struct kvm_vcpu *vcpu)
{
- unsigned long mask = (ctxt->mode == X86EMUL_MODE_PROT64) ? ~0ULL : ~0U;
-
- return kvm_set_dr(ctxt->vcpu, dr, value & mask);
+ return _kvm_get_dr(vcpu, dr, dest);
}
-void kvm_report_emulation_failure(struct kvm_vcpu *vcpu, const char *context)
+int emulator_set_dr(int dr, unsigned long value, struct kvm_vcpu *vcpu)
{
- u8 opcodes[4];
- unsigned long rip = kvm_rip_read(vcpu);
- unsigned long rip_linear;
-
- if (!printk_ratelimit())
- return;
- rip_linear = rip + get_segment_base(vcpu, VCPU_SREG_CS);
-
- kvm_read_guest_virt(rip_linear, (void *)opcodes, 4, vcpu, NULL);
-
- printk(KERN_ERR "emulation failed (%s) rip %lx %02x %02x %02x %02x\n",
- context, rip, opcodes[0], opcodes[1], opcodes[2], opcodes[3]);
+ return __kvm_set_dr(vcpu, dr, value);
}
-EXPORT_SYMBOL_GPL(kvm_report_emulation_failure);
static u64 mk_cr_64(u64 curr_cr, u32 new_val)
{
@@ -3674,27 +3752,32 @@ static unsigned long emulator_get_cr(int cr, struct kvm_vcpu *vcpu)
return value;
}
-static void emulator_set_cr(int cr, unsigned long val, struct kvm_vcpu *vcpu)
+static int emulator_set_cr(int cr, unsigned long val, struct kvm_vcpu *vcpu)
{
+ int res = 0;
+
switch (cr) {
case 0:
- kvm_set_cr0(vcpu, mk_cr_64(kvm_read_cr0(vcpu), val));
+ res = kvm_set_cr0(vcpu, mk_cr_64(kvm_read_cr0(vcpu), val));
break;
case 2:
vcpu->arch.cr2 = val;
break;
case 3:
- kvm_set_cr3(vcpu, val);
+ res = kvm_set_cr3(vcpu, val);
break;
case 4:
- kvm_set_cr4(vcpu, mk_cr_64(kvm_read_cr4(vcpu), val));
+ res = kvm_set_cr4(vcpu, mk_cr_64(kvm_read_cr4(vcpu), val));
break;
case 8:
- kvm_set_cr8(vcpu, val & 0xfUL);
+ res = __kvm_set_cr8(vcpu, val & 0xfUL);
break;
default:
vcpu_printf(vcpu, "%s: unexpected cr %u\n", __func__, cr);
+ res = -1;
}
+
+ return res;
}
static int emulator_get_cpl(struct kvm_vcpu *vcpu)
@@ -3707,6 +3790,12 @@ static void emulator_get_gdt(struct desc_ptr *dt, struct kvm_vcpu *vcpu)
kvm_x86_ops->get_gdt(vcpu, dt);
}
+static unsigned long emulator_get_cached_segment_base(int seg,
+ struct kvm_vcpu *vcpu)
+{
+ return get_segment_base(vcpu, seg);
+}
+
static bool emulator_get_cached_descriptor(struct desc_struct *desc, int seg,
struct kvm_vcpu *vcpu)
{
@@ -3779,11 +3868,6 @@ static void emulator_set_segment_selector(u16 sel, int seg,
kvm_set_segment(vcpu, &kvm_seg, seg);
}
-static void emulator_set_rflags(struct kvm_vcpu *vcpu, unsigned long rflags)
-{
- kvm_x86_ops->set_rflags(vcpu, rflags);
-}
-
static struct x86_emulate_ops emulate_ops = {
.read_std = kvm_read_guest_virt_system,
.write_std = kvm_write_guest_virt_system,
@@ -3797,11 +3881,15 @@ static struct x86_emulate_ops emulate_ops = {
.set_cached_descriptor = emulator_set_cached_descriptor,
.get_segment_selector = emulator_get_segment_selector,
.set_segment_selector = emulator_set_segment_selector,
+ .get_cached_segment_base = emulator_get_cached_segment_base,
.get_gdt = emulator_get_gdt,
.get_cr = emulator_get_cr,
.set_cr = emulator_set_cr,
.cpl = emulator_get_cpl,
- .set_rflags = emulator_set_rflags,
+ .get_dr = emulator_get_dr,
+ .set_dr = emulator_set_dr,
+ .set_msr = kvm_set_msr,
+ .get_msr = kvm_get_msr,
};
static void cache_all_regs(struct kvm_vcpu *vcpu)
@@ -3812,14 +3900,75 @@ static void cache_all_regs(struct kvm_vcpu *vcpu)
vcpu->arch.regs_dirty = ~0;
}
+static void toggle_interruptibility(struct kvm_vcpu *vcpu, u32 mask)
+{
+ u32 int_shadow = kvm_x86_ops->get_interrupt_shadow(vcpu, mask);
+ /*
+ * an sti; sti; sequence only disable interrupts for the first
+ * instruction. So, if the last instruction, be it emulated or
+ * not, left the system with the INT_STI flag enabled, it
+ * means that the last instruction is an sti. We should not
+ * leave the flag on in this case. The same goes for mov ss
+ */
+ if (!(int_shadow & mask))
+ kvm_x86_ops->set_interrupt_shadow(vcpu, mask);
+}
+
+static void inject_emulated_exception(struct kvm_vcpu *vcpu)
+{
+ struct x86_emulate_ctxt *ctxt = &vcpu->arch.emulate_ctxt;
+ if (ctxt->exception == PF_VECTOR)
+ kvm_inject_page_fault(vcpu, ctxt->cr2, ctxt->error_code);
+ else if (ctxt->error_code_valid)
+ kvm_queue_exception_e(vcpu, ctxt->exception, ctxt->error_code);
+ else
+ kvm_queue_exception(vcpu, ctxt->exception);
+}
+
+static int handle_emulation_failure(struct kvm_vcpu *vcpu)
+{
+ ++vcpu->stat.insn_emulation_fail;
+ trace_kvm_emulate_insn_failed(vcpu);
+ vcpu->run->exit_reason = KVM_EXIT_INTERNAL_ERROR;
+ vcpu->run->internal.suberror = KVM_INTERNAL_ERROR_EMULATION;
+ vcpu->run->internal.ndata = 0;
+ kvm_queue_exception(vcpu, UD_VECTOR);
+ return EMULATE_FAIL;
+}
+
+static bool reexecute_instruction(struct kvm_vcpu *vcpu, gva_t gva)
+{
+ gpa_t gpa;
+
+ if (tdp_enabled)
+ return false;
+
+ /*
+ * if emulation was due to access to shadowed page table
+ * and it failed try to unshadow page and re-entetr the
+ * guest to let CPU execute the instruction.
+ */
+ if (kvm_mmu_unprotect_page_virt(vcpu, gva))
+ return true;
+
+ gpa = kvm_mmu_gva_to_gpa_system(vcpu, gva, NULL);
+
+ if (gpa == UNMAPPED_GVA)
+ return true; /* let cpu generate fault */
+
+ if (!kvm_is_error_hva(gfn_to_hva(vcpu->kvm, gpa >> PAGE_SHIFT)))
+ return true;
+
+ return false;
+}
+
int emulate_instruction(struct kvm_vcpu *vcpu,
unsigned long cr2,
u16 error_code,
int emulation_type)
{
- int r, shadow_mask;
- struct decode_cache *c;
- struct kvm_run *run = vcpu->run;
+ int r;
+ struct decode_cache *c = &vcpu->arch.emulate_ctxt.decode;
kvm_clear_exception_queue(vcpu);
vcpu->arch.mmio_fault_cr2 = cr2;
@@ -3831,8 +3980,6 @@ int emulate_instruction(struct kvm_vcpu *vcpu,
*/
cache_all_regs(vcpu);
- vcpu->mmio_is_write = 0;
-
if (!(emulation_type & EMULTYPE_NO_DECODE)) {
int cs_db, cs_l;
kvm_x86_ops->get_cs_db_l_bits(vcpu, &cs_db, &cs_l);
@@ -3846,13 +3993,16 @@ int emulate_instruction(struct kvm_vcpu *vcpu,
? X86EMUL_MODE_VM86 : cs_l
? X86EMUL_MODE_PROT64 : cs_db
? X86EMUL_MODE_PROT32 : X86EMUL_MODE_PROT16;
+ memset(c, 0, sizeof(struct decode_cache));
+ memcpy(c->regs, vcpu->arch.regs, sizeof c->regs);
+ vcpu->arch.emulate_ctxt.interruptibility = 0;
+ vcpu->arch.emulate_ctxt.exception = -1;
r = x86_decode_insn(&vcpu->arch.emulate_ctxt, &emulate_ops);
trace_kvm_emulate_insn_start(vcpu);
/* Only allow emulation of specific instructions on #UD
* (namely VMMCALL, sysenter, sysexit, syscall)*/
- c = &vcpu->arch.emulate_ctxt.decode;
if (emulation_type & EMULTYPE_TRAP_UD) {
if (!c->twobyte)
return EMULATE_FAIL;
@@ -3880,11 +4030,11 @@ int emulate_instruction(struct kvm_vcpu *vcpu,
++vcpu->stat.insn_emulation;
if (r) {
- ++vcpu->stat.insn_emulation_fail;
- trace_kvm_emulate_insn_failed(vcpu);
- if (kvm_mmu_unprotect_page_virt(vcpu, cr2))
+ if (reexecute_instruction(vcpu, cr2))
return EMULATE_DONE;
- return EMULATE_FAIL;
+ if (emulation_type & EMULTYPE_SKIP)
+ return EMULATE_FAIL;
+ return handle_emulation_failure(vcpu);
}
}
@@ -3893,48 +4043,42 @@ int emulate_instruction(struct kvm_vcpu *vcpu,
return EMULATE_DONE;
}
+ /* this is needed for vmware backdor interface to work since it
+ changes registers values during IO operation */
+ memcpy(c->regs, vcpu->arch.regs, sizeof c->regs);
+
restart:
r = x86_emulate_insn(&vcpu->arch.emulate_ctxt, &emulate_ops);
- shadow_mask = vcpu->arch.emulate_ctxt.interruptibility;
- if (r == 0)
- kvm_x86_ops->set_interrupt_shadow(vcpu, shadow_mask);
+ if (r) { /* emulation failed */
+ if (reexecute_instruction(vcpu, cr2))
+ return EMULATE_DONE;
- if (vcpu->arch.pio.count) {
- if (!vcpu->arch.pio.in)
- vcpu->arch.pio.count = 0;
- return EMULATE_DO_MMIO;
+ return handle_emulation_failure(vcpu);
}
- if (r || vcpu->mmio_is_write) {
- run->exit_reason = KVM_EXIT_MMIO;
- run->mmio.phys_addr = vcpu->mmio_phys_addr;
- memcpy(run->mmio.data, vcpu->mmio_data, 8);
- run->mmio.len = vcpu->mmio_size;
- run->mmio.is_write = vcpu->mmio_is_write;
+ toggle_interruptibility(vcpu, vcpu->arch.emulate_ctxt.interruptibility);
+ kvm_x86_ops->set_rflags(vcpu, vcpu->arch.emulate_ctxt.eflags);
+ memcpy(vcpu->arch.regs, c->regs, sizeof c->regs);
+ kvm_rip_write(vcpu, vcpu->arch.emulate_ctxt.eip);
+
+ if (vcpu->arch.emulate_ctxt.exception >= 0) {
+ inject_emulated_exception(vcpu);
+ return EMULATE_DONE;
}
- if (r) {
- if (kvm_mmu_unprotect_page_virt(vcpu, cr2))
- goto done;
- if (!vcpu->mmio_needed) {
- ++vcpu->stat.insn_emulation_fail;
- trace_kvm_emulate_insn_failed(vcpu);
- kvm_report_emulation_failure(vcpu, "mmio");
- return EMULATE_FAIL;
- }
+ if (vcpu->arch.pio.count) {
+ if (!vcpu->arch.pio.in)
+ vcpu->arch.pio.count = 0;
return EMULATE_DO_MMIO;
}
- if (vcpu->mmio_is_write) {
- vcpu->mmio_needed = 0;
+ if (vcpu->mmio_needed) {
+ if (vcpu->mmio_is_write)
+ vcpu->mmio_needed = 0;
return EMULATE_DO_MMIO;
}
-done:
- if (vcpu->arch.exception.pending)
- vcpu->arch.emulate_ctxt.restart = false;
-
if (vcpu->arch.emulate_ctxt.restart)
goto restart;
@@ -4108,6 +4252,9 @@ int kvm_arch_init(void *opaque)
perf_register_guest_info_callbacks(&kvm_guest_cbs);
+ if (cpu_has_xsave)
+ host_xcr0 = xgetbv(XCR_XFEATURE_ENABLED_MASK);
+
return 0;
out:
@@ -4270,7 +4417,7 @@ int kvm_fix_hypercall(struct kvm_vcpu *vcpu)
kvm_x86_ops->patch_hypercall(vcpu, instruction);
- return emulator_write_emulated(rip, instruction, 3, vcpu);
+ return emulator_write_emulated(rip, instruction, 3, NULL, vcpu);
}
void realmode_lgdt(struct kvm_vcpu *vcpu, u16 limit, unsigned long base)
@@ -4506,59 +4653,78 @@ static void inject_pending_event(struct kvm_vcpu *vcpu)
}
}
+static void kvm_load_guest_xcr0(struct kvm_vcpu *vcpu)
+{
+ if (kvm_read_cr4_bits(vcpu, X86_CR4_OSXSAVE) &&
+ !vcpu->guest_xcr0_loaded) {
+ /* kvm_set_xcr() also depends on this */
+ xsetbv(XCR_XFEATURE_ENABLED_MASK, vcpu->arch.xcr0);
+ vcpu->guest_xcr0_loaded = 1;
+ }
+}
+
+static void kvm_put_guest_xcr0(struct kvm_vcpu *vcpu)
+{
+ if (vcpu->guest_xcr0_loaded) {
+ if (vcpu->arch.xcr0 != host_xcr0)
+ xsetbv(XCR_XFEATURE_ENABLED_MASK, host_xcr0);
+ vcpu->guest_xcr0_loaded = 0;
+ }
+}
+
static int vcpu_enter_guest(struct kvm_vcpu *vcpu)
{
int r;
bool req_int_win = !irqchip_in_kernel(vcpu->kvm) &&
vcpu->run->request_interrupt_window;
- if (vcpu->requests)
- if (test_and_clear_bit(KVM_REQ_MMU_RELOAD, &vcpu->requests))
- kvm_mmu_unload(vcpu);
-
- r = kvm_mmu_reload(vcpu);
- if (unlikely(r))
- goto out;
-
if (vcpu->requests) {
- if (test_and_clear_bit(KVM_REQ_MIGRATE_TIMER, &vcpu->requests))
+ if (kvm_check_request(KVM_REQ_MMU_RELOAD, vcpu))
+ kvm_mmu_unload(vcpu);
+ if (kvm_check_request(KVM_REQ_MIGRATE_TIMER, vcpu))
__kvm_migrate_timers(vcpu);
- if (test_and_clear_bit(KVM_REQ_KVMCLOCK_UPDATE, &vcpu->requests))
+ if (kvm_check_request(KVM_REQ_KVMCLOCK_UPDATE, vcpu))
kvm_write_guest_time(vcpu);
- if (test_and_clear_bit(KVM_REQ_MMU_SYNC, &vcpu->requests))
+ if (kvm_check_request(KVM_REQ_MMU_SYNC, vcpu))
kvm_mmu_sync_roots(vcpu);
- if (test_and_clear_bit(KVM_REQ_TLB_FLUSH, &vcpu->requests))
+ if (kvm_check_request(KVM_REQ_TLB_FLUSH, vcpu))
kvm_x86_ops->tlb_flush(vcpu);
- if (test_and_clear_bit(KVM_REQ_REPORT_TPR_ACCESS,
- &vcpu->requests)) {
+ if (kvm_check_request(KVM_REQ_REPORT_TPR_ACCESS, vcpu)) {
vcpu->run->exit_reason = KVM_EXIT_TPR_ACCESS;
r = 0;
goto out;
}
- if (test_and_clear_bit(KVM_REQ_TRIPLE_FAULT, &vcpu->requests)) {
+ if (kvm_check_request(KVM_REQ_TRIPLE_FAULT, vcpu)) {
vcpu->run->exit_reason = KVM_EXIT_SHUTDOWN;
r = 0;
goto out;
}
- if (test_and_clear_bit(KVM_REQ_DEACTIVATE_FPU, &vcpu->requests)) {
+ if (kvm_check_request(KVM_REQ_DEACTIVATE_FPU, vcpu)) {
vcpu->fpu_active = 0;
kvm_x86_ops->fpu_deactivate(vcpu);
}
}
+ r = kvm_mmu_reload(vcpu);
+ if (unlikely(r))
+ goto out;
+
preempt_disable();
kvm_x86_ops->prepare_guest_switch(vcpu);
if (vcpu->fpu_active)
kvm_load_guest_fpu(vcpu);
+ kvm_load_guest_xcr0(vcpu);
- local_irq_disable();
+ atomic_set(&vcpu->guest_mode, 1);
+ smp_wmb();
- clear_bit(KVM_REQ_KICK, &vcpu->requests);
- smp_mb__after_clear_bit();
+ local_irq_disable();
- if (vcpu->requests || need_resched() || signal_pending(current)) {
- set_bit(KVM_REQ_KICK, &vcpu->requests);
+ if (!atomic_read(&vcpu->guest_mode) || vcpu->requests
+ || need_resched() || signal_pending(current)) {
+ atomic_set(&vcpu->guest_mode, 0);
+ smp_wmb();
local_irq_enable();
preempt_enable();
r = 1;
@@ -4603,7 +4769,8 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu)
if (hw_breakpoint_active())
hw_breakpoint_restore();
- set_bit(KVM_REQ_KICK, &vcpu->requests);
+ atomic_set(&vcpu->guest_mode, 0);
+ smp_wmb();
local_irq_enable();
++vcpu->stat.exits;
@@ -4665,7 +4832,7 @@ static int __vcpu_run(struct kvm_vcpu *vcpu)
srcu_read_unlock(&kvm->srcu, vcpu->srcu_idx);
kvm_vcpu_block(vcpu);
vcpu->srcu_idx = srcu_read_lock(&kvm->srcu);
- if (test_and_clear_bit(KVM_REQ_UNHALT, &vcpu->requests))
+ if (kvm_check_request(KVM_REQ_UNHALT, vcpu))
{
switch(vcpu->arch.mp_state) {
case KVM_MP_STATE_HALTED:
@@ -4717,8 +4884,6 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
int r;
sigset_t sigsaved;
- vcpu_load(vcpu);
-
if (vcpu->sigset_active)
sigprocmask(SIG_SETMASK, &vcpu->sigset, &sigsaved);
@@ -4743,7 +4908,7 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
vcpu->srcu_idx = srcu_read_lock(&vcpu->kvm->srcu);
r = emulate_instruction(vcpu, 0, 0, EMULTYPE_NO_DECODE);
srcu_read_unlock(&vcpu->kvm->srcu, vcpu->srcu_idx);
- if (r == EMULATE_DO_MMIO) {
+ if (r != EMULATE_DONE) {
r = 0;
goto out;
}
@@ -4759,14 +4924,11 @@ out:
if (vcpu->sigset_active)
sigprocmask(SIG_SETMASK, &sigsaved, NULL);
- vcpu_put(vcpu);
return r;
}
int kvm_arch_vcpu_ioctl_get_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs)
{
- vcpu_load(vcpu);
-
regs->rax = kvm_register_read(vcpu, VCPU_REGS_RAX);
regs->rbx = kvm_register_read(vcpu, VCPU_REGS_RBX);
regs->rcx = kvm_register_read(vcpu, VCPU_REGS_RCX);
@@ -4789,15 +4951,11 @@ int kvm_arch_vcpu_ioctl_get_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs)
regs->rip = kvm_rip_read(vcpu);
regs->rflags = kvm_get_rflags(vcpu);
- vcpu_put(vcpu);
-
return 0;
}
int kvm_arch_vcpu_ioctl_set_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs)
{
- vcpu_load(vcpu);
-
kvm_register_write(vcpu, VCPU_REGS_RAX, regs->rax);
kvm_register_write(vcpu, VCPU_REGS_RBX, regs->rbx);
kvm_register_write(vcpu, VCPU_REGS_RCX, regs->rcx);
@@ -4822,8 +4980,6 @@ int kvm_arch_vcpu_ioctl_set_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs)
vcpu->arch.exception.pending = false;
- vcpu_put(vcpu);
-
return 0;
}
@@ -4842,8 +4998,6 @@ int kvm_arch_vcpu_ioctl_get_sregs(struct kvm_vcpu *vcpu,
{
struct desc_ptr dt;
- vcpu_load(vcpu);
-
kvm_get_segment(vcpu, &sregs->cs, VCPU_SREG_CS);
kvm_get_segment(vcpu, &sregs->ds, VCPU_SREG_DS);
kvm_get_segment(vcpu, &sregs->es, VCPU_SREG_ES);
@@ -4875,32 +5029,27 @@ int kvm_arch_vcpu_ioctl_get_sregs(struct kvm_vcpu *vcpu,
set_bit(vcpu->arch.interrupt.nr,
(unsigned long *)sregs->interrupt_bitmap);
- vcpu_put(vcpu);
-
return 0;
}
int kvm_arch_vcpu_ioctl_get_mpstate(struct kvm_vcpu *vcpu,
struct kvm_mp_state *mp_state)
{
- vcpu_load(vcpu);
mp_state->mp_state = vcpu->arch.mp_state;
- vcpu_put(vcpu);
return 0;
}
int kvm_arch_vcpu_ioctl_set_mpstate(struct kvm_vcpu *vcpu,
struct kvm_mp_state *mp_state)
{
- vcpu_load(vcpu);
vcpu->arch.mp_state = mp_state->mp_state;
- vcpu_put(vcpu);
return 0;
}
int kvm_task_switch(struct kvm_vcpu *vcpu, u16 tss_selector, int reason,
bool has_error_code, u32 error_code)
{
+ struct decode_cache *c = &vcpu->arch.emulate_ctxt.decode;
int cs_db, cs_l, ret;
cache_all_regs(vcpu);
@@ -4915,6 +5064,8 @@ int kvm_task_switch(struct kvm_vcpu *vcpu, u16 tss_selector, int reason,
? X86EMUL_MODE_VM86 : cs_l
? X86EMUL_MODE_PROT64 : cs_db
? X86EMUL_MODE_PROT32 : X86EMUL_MODE_PROT16;
+ memset(c, 0, sizeof(struct decode_cache));
+ memcpy(c->regs, vcpu->arch.regs, sizeof c->regs);
ret = emulator_task_switch(&vcpu->arch.emulate_ctxt, &emulate_ops,
tss_selector, reason, has_error_code,
@@ -4923,6 +5074,8 @@ int kvm_task_switch(struct kvm_vcpu *vcpu, u16 tss_selector, int reason,
if (ret)
return EMULATE_FAIL;
+ memcpy(vcpu->arch.regs, c->regs, sizeof c->regs);
+ kvm_rip_write(vcpu, vcpu->arch.emulate_ctxt.eip);
kvm_x86_ops->set_rflags(vcpu, vcpu->arch.emulate_ctxt.eflags);
return EMULATE_DONE;
}
@@ -4935,8 +5088,6 @@ int kvm_arch_vcpu_ioctl_set_sregs(struct kvm_vcpu *vcpu,
int pending_vec, max_bits;
struct desc_ptr dt;
- vcpu_load(vcpu);
-
dt.size = sregs->idt.limit;
dt.address = sregs->idt.base;
kvm_x86_ops->set_idt(vcpu, &dt);
@@ -4996,8 +5147,6 @@ int kvm_arch_vcpu_ioctl_set_sregs(struct kvm_vcpu *vcpu,
!is_protmode(vcpu))
vcpu->arch.mp_state = KVM_MP_STATE_RUNNABLE;
- vcpu_put(vcpu);
-
return 0;
}
@@ -5007,12 +5156,10 @@ int kvm_arch_vcpu_ioctl_set_guest_debug(struct kvm_vcpu *vcpu,
unsigned long rflags;
int i, r;
- vcpu_load(vcpu);
-
if (dbg->control & (KVM_GUESTDBG_INJECT_DB | KVM_GUESTDBG_INJECT_BP)) {
r = -EBUSY;
if (vcpu->arch.exception.pending)
- goto unlock_out;
+ goto out;
if (dbg->control & KVM_GUESTDBG_INJECT_DB)
kvm_queue_exception(vcpu, DB_VECTOR);
else
@@ -5054,34 +5201,12 @@ int kvm_arch_vcpu_ioctl_set_guest_debug(struct kvm_vcpu *vcpu,
r = 0;
-unlock_out:
- vcpu_put(vcpu);
+out:
return r;
}
/*
- * fxsave fpu state. Taken from x86_64/processor.h. To be killed when
- * we have asm/x86/processor.h
- */
-struct fxsave {
- u16 cwd;
- u16 swd;
- u16 twd;
- u16 fop;
- u64 rip;
- u64 rdp;
- u32 mxcsr;
- u32 mxcsr_mask;
- u32 st_space[32]; /* 8*16 bytes for each FP-reg = 128 bytes */
-#ifdef CONFIG_X86_64
- u32 xmm_space[64]; /* 16*16 bytes for each XMM-reg = 256 bytes */
-#else
- u32 xmm_space[32]; /* 8*16 bytes for each XMM-reg = 128 bytes */
-#endif
-};
-
-/*
* Translate a guest virtual address to a guest physical address.
*/
int kvm_arch_vcpu_ioctl_translate(struct kvm_vcpu *vcpu,
@@ -5091,7 +5216,6 @@ int kvm_arch_vcpu_ioctl_translate(struct kvm_vcpu *vcpu,
gpa_t gpa;
int idx;
- vcpu_load(vcpu);
idx = srcu_read_lock(&vcpu->kvm->srcu);
gpa = kvm_mmu_gva_to_gpa_system(vcpu, vaddr, NULL);
srcu_read_unlock(&vcpu->kvm->srcu, idx);
@@ -5099,16 +5223,14 @@ int kvm_arch_vcpu_ioctl_translate(struct kvm_vcpu *vcpu,
tr->valid = gpa != UNMAPPED_GVA;
tr->writeable = 1;
tr->usermode = 0;
- vcpu_put(vcpu);
return 0;
}
int kvm_arch_vcpu_ioctl_get_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu)
{
- struct fxsave *fxsave = (struct fxsave *)&vcpu->arch.guest_fx_image;
-
- vcpu_load(vcpu);
+ struct i387_fxsave_struct *fxsave =
+ &vcpu->arch.guest_fpu.state->fxsave;
memcpy(fpu->fpr, fxsave->st_space, 128);
fpu->fcw = fxsave->cwd;
@@ -5119,16 +5241,13 @@ int kvm_arch_vcpu_ioctl_get_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu)
fpu->last_dp = fxsave->rdp;
memcpy(fpu->xmm, fxsave->xmm_space, sizeof fxsave->xmm_space);
- vcpu_put(vcpu);
-
return 0;
}
int kvm_arch_vcpu_ioctl_set_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu)
{
- struct fxsave *fxsave = (struct fxsave *)&vcpu->arch.guest_fx_image;
-
- vcpu_load(vcpu);
+ struct i387_fxsave_struct *fxsave =
+ &vcpu->arch.guest_fpu.state->fxsave;
memcpy(fxsave->st_space, fpu->fpr, 128);
fxsave->cwd = fpu->fcw;
@@ -5139,61 +5258,63 @@ int kvm_arch_vcpu_ioctl_set_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu)
fxsave->rdp = fpu->last_dp;
memcpy(fxsave->xmm_space, fpu->xmm, sizeof fxsave->xmm_space);
- vcpu_put(vcpu);
-
return 0;
}
-void fx_init(struct kvm_vcpu *vcpu)
+int fx_init(struct kvm_vcpu *vcpu)
{
- unsigned after_mxcsr_mask;
+ int err;
+
+ err = fpu_alloc(&vcpu->arch.guest_fpu);
+ if (err)
+ return err;
+
+ fpu_finit(&vcpu->arch.guest_fpu);
/*
- * Touch the fpu the first time in non atomic context as if
- * this is the first fpu instruction the exception handler
- * will fire before the instruction returns and it'll have to
- * allocate ram with GFP_KERNEL.
+ * Ensure guest xcr0 is valid for loading
*/
- if (!used_math())
- kvm_fx_save(&vcpu->arch.host_fx_image);
-
- /* Initialize guest FPU by resetting ours and saving into guest's */
- preempt_disable();
- kvm_fx_save(&vcpu->arch.host_fx_image);
- kvm_fx_finit();
- kvm_fx_save(&vcpu->arch.guest_fx_image);
- kvm_fx_restore(&vcpu->arch.host_fx_image);
- preempt_enable();
+ vcpu->arch.xcr0 = XSTATE_FP;
vcpu->arch.cr0 |= X86_CR0_ET;
- after_mxcsr_mask = offsetof(struct i387_fxsave_struct, st_space);
- vcpu->arch.guest_fx_image.mxcsr = 0x1f80;
- memset((void *)&vcpu->arch.guest_fx_image + after_mxcsr_mask,
- 0, sizeof(struct i387_fxsave_struct) - after_mxcsr_mask);
+
+ return 0;
}
EXPORT_SYMBOL_GPL(fx_init);
+static void fx_free(struct kvm_vcpu *vcpu)
+{
+ fpu_free(&vcpu->arch.guest_fpu);
+}
+
void kvm_load_guest_fpu(struct kvm_vcpu *vcpu)
{
if (vcpu->guest_fpu_loaded)
return;
+ /*
+ * Restore all possible states in the guest,
+ * and assume host would use all available bits.
+ * Guest xcr0 would be loaded later.
+ */
+ kvm_put_guest_xcr0(vcpu);
vcpu->guest_fpu_loaded = 1;
- kvm_fx_save(&vcpu->arch.host_fx_image);
- kvm_fx_restore(&vcpu->arch.guest_fx_image);
+ unlazy_fpu(current);
+ fpu_restore_checking(&vcpu->arch.guest_fpu);
trace_kvm_fpu(1);
}
void kvm_put_guest_fpu(struct kvm_vcpu *vcpu)
{
+ kvm_put_guest_xcr0(vcpu);
+
if (!vcpu->guest_fpu_loaded)
return;
vcpu->guest_fpu_loaded = 0;
- kvm_fx_save(&vcpu->arch.guest_fx_image);
- kvm_fx_restore(&vcpu->arch.host_fx_image);
+ fpu_save_init(&vcpu->arch.guest_fpu);
++vcpu->stat.fpu_reload;
- set_bit(KVM_REQ_DEACTIVATE_FPU, &vcpu->requests);
+ kvm_make_request(KVM_REQ_DEACTIVATE_FPU, vcpu);
trace_kvm_fpu(0);
}
@@ -5204,6 +5325,8 @@ void kvm_arch_vcpu_free(struct kvm_vcpu *vcpu)
vcpu->arch.time_page = NULL;
}
+ free_cpumask_var(vcpu->arch.wbinvd_dirty_mask);
+ fx_free(vcpu);
kvm_x86_ops->vcpu_free(vcpu);
}
@@ -5217,9 +5340,6 @@ int kvm_arch_vcpu_setup(struct kvm_vcpu *vcpu)
{
int r;
- /* We do fxsave: this must be aligned. */
- BUG_ON((unsigned long)&vcpu->arch.host_fx_image & 0xF);
-
vcpu->arch.mtrr_state.have_fixed = 1;
vcpu_load(vcpu);
r = kvm_arch_vcpu_reset(vcpu);
@@ -5241,6 +5361,7 @@ void kvm_arch_vcpu_destroy(struct kvm_vcpu *vcpu)
kvm_mmu_unload(vcpu);
vcpu_put(vcpu);
+ fx_free(vcpu);
kvm_x86_ops->vcpu_free(vcpu);
}
@@ -5334,7 +5455,12 @@ int kvm_arch_vcpu_init(struct kvm_vcpu *vcpu)
}
vcpu->arch.mcg_cap = KVM_MAX_MCE_BANKS;
+ if (!zalloc_cpumask_var(&vcpu->arch.wbinvd_dirty_mask, GFP_KERNEL))
+ goto fail_free_mce_banks;
+
return 0;
+fail_free_mce_banks:
+ kfree(vcpu->arch.mce_banks);
fail_free_lapic:
kvm_free_lapic(vcpu);
fail_mmu_destroy:
@@ -5364,12 +5490,6 @@ struct kvm *kvm_arch_create_vm(void)
if (!kvm)
return ERR_PTR(-ENOMEM);
- kvm->arch.aliases = kzalloc(sizeof(struct kvm_mem_aliases), GFP_KERNEL);
- if (!kvm->arch.aliases) {
- kfree(kvm);
- return ERR_PTR(-ENOMEM);
- }
-
INIT_LIST_HEAD(&kvm->arch.active_mmu_pages);
INIT_LIST_HEAD(&kvm->arch.assigned_dev_head);
@@ -5412,12 +5532,12 @@ static void kvm_free_vcpus(struct kvm *kvm)
void kvm_arch_sync_events(struct kvm *kvm)
{
kvm_free_all_assigned_devices(kvm);
+ kvm_free_pit(kvm);
}
void kvm_arch_destroy_vm(struct kvm *kvm)
{
kvm_iommu_unmap_guest(kvm);
- kvm_free_pit(kvm);
kfree(kvm->arch.vpic);
kfree(kvm->arch.vioapic);
kvm_free_vcpus(kvm);
@@ -5427,7 +5547,6 @@ void kvm_arch_destroy_vm(struct kvm *kvm)
if (kvm->arch.ept_identity_pagetable)
put_page(kvm->arch.ept_identity_pagetable);
cleanup_srcu_struct(&kvm->srcu);
- kfree(kvm->arch.aliases);
kfree(kvm);
}
@@ -5438,6 +5557,11 @@ int kvm_arch_prepare_memory_region(struct kvm *kvm,
int user_alloc)
{
int npages = memslot->npages;
+ int map_flags = MAP_PRIVATE | MAP_ANONYMOUS;
+
+ /* Prevent internal slot pages from being moved by fork()/COW. */
+ if (memslot->id >= KVM_MEMORY_SLOTS)
+ map_flags = MAP_SHARED | MAP_ANONYMOUS;
/*To keep backward compatibility with older userspace,
*x86 needs to hanlde !user_alloc case.
@@ -5450,7 +5574,7 @@ int kvm_arch_prepare_memory_region(struct kvm *kvm,
userspace_addr = do_mmap(NULL, 0,
npages * PAGE_SIZE,
PROT_READ | PROT_WRITE,
- MAP_PRIVATE | MAP_ANONYMOUS,
+ map_flags,
0);
up_write(&current->mm->mmap_sem);
@@ -5523,7 +5647,7 @@ void kvm_vcpu_kick(struct kvm_vcpu *vcpu)
me = get_cpu();
if (cpu != me && (unsigned)cpu < nr_cpu_ids && cpu_online(cpu))
- if (!test_and_set_bit(KVM_REQ_KICK, &vcpu->requests))
+ if (atomic_xchg(&vcpu->guest_mode, 0))
smp_send_reschedule(cpu);
put_cpu();
}