aboutsummaryrefslogtreecommitdiffstats
path: root/arch/x86/kvm/vmx.c
diff options
context:
space:
mode:
Diffstat (limited to 'arch/x86/kvm/vmx.c')
-rw-r--r--arch/x86/kvm/vmx.c350
1 files changed, 274 insertions, 76 deletions
diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
index bf89ec2cfb82..4c3fa0f67469 100644
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@ -93,14 +93,14 @@ module_param(yield_on_hlt, bool, S_IRUGO);
* These 2 parameters are used to config the controls for Pause-Loop Exiting:
* ple_gap: upper bound on the amount of time between two successive
* executions of PAUSE in a loop. Also indicate if ple enabled.
- * According to test, this time is usually small than 41 cycles.
+ * According to test, this time is usually smaller than 128 cycles.
* ple_window: upper bound on the amount of time a guest is allowed to execute
* in a PAUSE loop. Tests indicate that most spinlocks are held for
* less than 2^12 cycles
* Time is measured based on a counter that runs at the same rate as the TSC,
* refer SDM volume 3b section 21.6.13 & 22.1.3.
*/
-#define KVM_VMX_DEFAULT_PLE_GAP 41
+#define KVM_VMX_DEFAULT_PLE_GAP 128
#define KVM_VMX_DEFAULT_PLE_WINDOW 4096
static int ple_gap = KVM_VMX_DEFAULT_PLE_GAP;
module_param(ple_gap, int, S_IRUGO);
@@ -128,8 +128,11 @@ struct vcpu_vmx {
unsigned long host_rsp;
int launched;
u8 fail;
+ u8 cpl;
+ bool nmi_known_unmasked;
u32 exit_intr_info;
u32 idt_vectoring_info;
+ ulong rflags;
struct shared_msr_entry *guest_msrs;
int nmsrs;
int save_nmsrs;
@@ -159,6 +162,10 @@ struct vcpu_vmx {
u32 ar;
} tr, es, ds, fs, gs;
} rmode;
+ struct {
+ u32 bitmask; /* 4 bits per segment (1 bit per field) */
+ struct kvm_save_segment seg[8];
+ } segment_cache;
int vpid;
bool emulation_required;
@@ -171,16 +178,25 @@ struct vcpu_vmx {
bool rdtscp_enabled;
};
+enum segment_cache_field {
+ SEG_FIELD_SEL = 0,
+ SEG_FIELD_BASE = 1,
+ SEG_FIELD_LIMIT = 2,
+ SEG_FIELD_AR = 3,
+
+ SEG_FIELD_NR = 4
+};
+
static inline struct vcpu_vmx *to_vmx(struct kvm_vcpu *vcpu)
{
return container_of(vcpu, struct vcpu_vmx, vcpu);
}
-static int init_rmode(struct kvm *kvm);
static u64 construct_eptp(unsigned long root_hpa);
static void kvm_cpu_vmxon(u64 addr);
static void kvm_cpu_vmxoff(void);
static void vmx_set_cr3(struct kvm_vcpu *vcpu, unsigned long cr3);
+static int vmx_set_tss_addr(struct kvm *kvm, unsigned int addr);
static DEFINE_PER_CPU(struct vmcs *, vmxarea);
static DEFINE_PER_CPU(struct vmcs *, current_vmcs);
@@ -643,6 +659,62 @@ static void vmcs_set_bits(unsigned long field, u32 mask)
vmcs_writel(field, vmcs_readl(field) | mask);
}
+static void vmx_segment_cache_clear(struct vcpu_vmx *vmx)
+{
+ vmx->segment_cache.bitmask = 0;
+}
+
+static bool vmx_segment_cache_test_set(struct vcpu_vmx *vmx, unsigned seg,
+ unsigned field)
+{
+ bool ret;
+ u32 mask = 1 << (seg * SEG_FIELD_NR + field);
+
+ if (!(vmx->vcpu.arch.regs_avail & (1 << VCPU_EXREG_SEGMENTS))) {
+ vmx->vcpu.arch.regs_avail |= (1 << VCPU_EXREG_SEGMENTS);
+ vmx->segment_cache.bitmask = 0;
+ }
+ ret = vmx->segment_cache.bitmask & mask;
+ vmx->segment_cache.bitmask |= mask;
+ return ret;
+}
+
+static u16 vmx_read_guest_seg_selector(struct vcpu_vmx *vmx, unsigned seg)
+{
+ u16 *p = &vmx->segment_cache.seg[seg].selector;
+
+ if (!vmx_segment_cache_test_set(vmx, seg, SEG_FIELD_SEL))
+ *p = vmcs_read16(kvm_vmx_segment_fields[seg].selector);
+ return *p;
+}
+
+static ulong vmx_read_guest_seg_base(struct vcpu_vmx *vmx, unsigned seg)
+{
+ ulong *p = &vmx->segment_cache.seg[seg].base;
+
+ if (!vmx_segment_cache_test_set(vmx, seg, SEG_FIELD_BASE))
+ *p = vmcs_readl(kvm_vmx_segment_fields[seg].base);
+ return *p;
+}
+
+static u32 vmx_read_guest_seg_limit(struct vcpu_vmx *vmx, unsigned seg)
+{
+ u32 *p = &vmx->segment_cache.seg[seg].limit;
+
+ if (!vmx_segment_cache_test_set(vmx, seg, SEG_FIELD_LIMIT))
+ *p = vmcs_read32(kvm_vmx_segment_fields[seg].limit);
+ return *p;
+}
+
+static u32 vmx_read_guest_seg_ar(struct vcpu_vmx *vmx, unsigned seg)
+{
+ u32 *p = &vmx->segment_cache.seg[seg].ar;
+
+ if (!vmx_segment_cache_test_set(vmx, seg, SEG_FIELD_AR))
+ *p = vmcs_read32(kvm_vmx_segment_fields[seg].ar_bytes);
+ return *p;
+}
+
static void update_exception_bitmap(struct kvm_vcpu *vcpu)
{
u32 eb;
@@ -970,17 +1042,24 @@ static unsigned long vmx_get_rflags(struct kvm_vcpu *vcpu)
{
unsigned long rflags, save_rflags;
- rflags = vmcs_readl(GUEST_RFLAGS);
- if (to_vmx(vcpu)->rmode.vm86_active) {
- rflags &= RMODE_GUEST_OWNED_EFLAGS_BITS;
- save_rflags = to_vmx(vcpu)->rmode.save_rflags;
- rflags |= save_rflags & ~RMODE_GUEST_OWNED_EFLAGS_BITS;
+ if (!test_bit(VCPU_EXREG_RFLAGS, (ulong *)&vcpu->arch.regs_avail)) {
+ __set_bit(VCPU_EXREG_RFLAGS, (ulong *)&vcpu->arch.regs_avail);
+ rflags = vmcs_readl(GUEST_RFLAGS);
+ if (to_vmx(vcpu)->rmode.vm86_active) {
+ rflags &= RMODE_GUEST_OWNED_EFLAGS_BITS;
+ save_rflags = to_vmx(vcpu)->rmode.save_rflags;
+ rflags |= save_rflags & ~RMODE_GUEST_OWNED_EFLAGS_BITS;
+ }
+ to_vmx(vcpu)->rflags = rflags;
}
- return rflags;
+ return to_vmx(vcpu)->rflags;
}
static void vmx_set_rflags(struct kvm_vcpu *vcpu, unsigned long rflags)
{
+ __set_bit(VCPU_EXREG_RFLAGS, (ulong *)&vcpu->arch.regs_avail);
+ __clear_bit(VCPU_EXREG_CPL, (ulong *)&vcpu->arch.regs_avail);
+ to_vmx(vcpu)->rflags = rflags;
if (to_vmx(vcpu)->rmode.vm86_active) {
to_vmx(vcpu)->rmode.save_rflags = rflags;
rflags |= X86_EFLAGS_IOPL | X86_EFLAGS_VM;
@@ -1053,7 +1132,10 @@ static void vmx_queue_exception(struct kvm_vcpu *vcpu, unsigned nr,
}
if (vmx->rmode.vm86_active) {
- if (kvm_inject_realmode_interrupt(vcpu, nr) != EMULATE_DONE)
+ int inc_eip = 0;
+ if (kvm_exception_is_soft(nr))
+ inc_eip = vcpu->arch.event_exit_inst_len;
+ if (kvm_inject_realmode_interrupt(vcpu, nr, inc_eip) != EMULATE_DONE)
kvm_make_request(KVM_REQ_TRIPLE_FAULT, vcpu);
return;
}
@@ -1151,6 +1233,16 @@ static u64 guest_read_tsc(void)
}
/*
+ * Empty call-back. Needs to be implemented when VMX enables the SET_TSC_KHZ
+ * ioctl. In this case the call-back should update internal vmx state to make
+ * the changes effective.
+ */
+static void vmx_set_tsc_khz(struct kvm_vcpu *vcpu, u32 user_tsc_khz)
+{
+ /* Nothing to do here */
+}
+
+/*
* writes 'offset' into guest's timestamp counter offset register
*/
static void vmx_write_tsc_offset(struct kvm_vcpu *vcpu, u64 offset)
@@ -1164,6 +1256,11 @@ static void vmx_adjust_tsc_offset(struct kvm_vcpu *vcpu, s64 adjustment)
vmcs_write64(TSC_OFFSET, offset + adjustment);
}
+static u64 vmx_compute_tsc_offset(struct kvm_vcpu *vcpu, u64 target_tsc)
+{
+ return target_tsc - native_read_tsc();
+}
+
/*
* Reads an msr value (of 'msr_index') into 'pdata'.
* Returns 0 on success, non-0 otherwise.
@@ -1243,9 +1340,11 @@ static int vmx_set_msr(struct kvm_vcpu *vcpu, u32 msr_index, u64 data)
break;
#ifdef CONFIG_X86_64
case MSR_FS_BASE:
+ vmx_segment_cache_clear(vmx);
vmcs_writel(GUEST_FS_BASE, data);
break;
case MSR_GS_BASE:
+ vmx_segment_cache_clear(vmx);
vmcs_writel(GUEST_GS_BASE, data);
break;
case MSR_KERNEL_GS_BASE:
@@ -1333,19 +1432,25 @@ static __init int vmx_disabled_by_bios(void)
rdmsrl(MSR_IA32_FEATURE_CONTROL, msr);
if (msr & FEATURE_CONTROL_LOCKED) {
+ /* launched w/ TXT and VMX disabled */
if (!(msr & FEATURE_CONTROL_VMXON_ENABLED_INSIDE_SMX)
&& tboot_enabled())
return 1;
+ /* launched w/o TXT and VMX only enabled w/ TXT */
if (!(msr & FEATURE_CONTROL_VMXON_ENABLED_OUTSIDE_SMX)
+ && (msr & FEATURE_CONTROL_VMXON_ENABLED_INSIDE_SMX)
&& !tboot_enabled()) {
printk(KERN_WARNING "kvm: disable TXT in the BIOS or "
- " activate TXT before enabling KVM\n");
+ "activate TXT before enabling KVM\n");
return 1;
}
+ /* launched w/o TXT and VMX disabled */
+ if (!(msr & FEATURE_CONTROL_VMXON_ENABLED_OUTSIDE_SMX)
+ && !tboot_enabled())
+ return 1;
}
return 0;
- /* locked but not enabled */
}
static void kvm_cpu_vmxon(u64 addr)
@@ -1683,6 +1788,9 @@ static void enter_pmode(struct kvm_vcpu *vcpu)
vmx->emulation_required = 1;
vmx->rmode.vm86_active = 0;
+ vmx_segment_cache_clear(vmx);
+
+ vmcs_write16(GUEST_TR_SELECTOR, vmx->rmode.tr.selector);
vmcs_writel(GUEST_TR_BASE, vmx->rmode.tr.base);
vmcs_write32(GUEST_TR_LIMIT, vmx->rmode.tr.limit);
vmcs_write32(GUEST_TR_AR_BYTES, vmx->rmode.tr.ar);
@@ -1705,6 +1813,8 @@ static void enter_pmode(struct kvm_vcpu *vcpu)
fix_pmode_dataseg(VCPU_SREG_GS, &vmx->rmode.gs);
fix_pmode_dataseg(VCPU_SREG_FS, &vmx->rmode.fs);
+ vmx_segment_cache_clear(vmx);
+
vmcs_write16(GUEST_SS_SELECTOR, 0);
vmcs_write32(GUEST_SS_AR_BYTES, 0x93);
@@ -1756,6 +1866,21 @@ static void enter_rmode(struct kvm_vcpu *vcpu)
vmx->emulation_required = 1;
vmx->rmode.vm86_active = 1;
+ /*
+ * Very old userspace does not call KVM_SET_TSS_ADDR before entering
+ * vcpu. Call it here with phys address pointing 16M below 4G.
+ */
+ if (!vcpu->kvm->arch.tss_addr) {
+ printk_once(KERN_WARNING "kvm: KVM_SET_TSS_ADDR need to be "
+ "called before entering vcpu\n");
+ srcu_read_unlock(&vcpu->kvm->srcu, vcpu->srcu_idx);
+ vmx_set_tss_addr(vcpu->kvm, 0xfeffd000);
+ vcpu->srcu_idx = srcu_read_lock(&vcpu->kvm->srcu);
+ }
+
+ vmx_segment_cache_clear(vmx);
+
+ vmx->rmode.tr.selector = vmcs_read16(GUEST_TR_SELECTOR);
vmx->rmode.tr.base = vmcs_readl(GUEST_TR_BASE);
vmcs_writel(GUEST_TR_BASE, rmode_tss_base(vcpu->kvm));
@@ -1794,7 +1919,6 @@ static void enter_rmode(struct kvm_vcpu *vcpu)
continue_rmode:
kvm_mmu_reset_context(vcpu);
- init_rmode(vcpu->kvm);
}
static void vmx_set_efer(struct kvm_vcpu *vcpu, u64 efer)
@@ -1832,6 +1956,8 @@ static void enter_lmode(struct kvm_vcpu *vcpu)
{
u32 guest_tr_ar;
+ vmx_segment_cache_clear(to_vmx(vcpu));
+
guest_tr_ar = vmcs_read32(GUEST_TR_AR_BYTES);
if ((guest_tr_ar & AR_TYPE_MASK) != AR_TYPE_BUSY_64_TSS) {
printk(KERN_DEBUG "%s: tss fixup for long mode. \n",
@@ -1979,6 +2105,7 @@ static void vmx_set_cr0(struct kvm_vcpu *vcpu, unsigned long cr0)
vmcs_writel(CR0_READ_SHADOW, cr0);
vmcs_writel(GUEST_CR0, hw_cr0);
vcpu->arch.cr0 = cr0;
+ __clear_bit(VCPU_EXREG_CPL, (ulong *)&vcpu->arch.regs_avail);
}
static u64 construct_eptp(unsigned long root_hpa)
@@ -2030,23 +2157,39 @@ static void vmx_set_cr4(struct kvm_vcpu *vcpu, unsigned long cr4)
vmcs_writel(GUEST_CR4, hw_cr4);
}
-static u64 vmx_get_segment_base(struct kvm_vcpu *vcpu, int seg)
-{
- struct kvm_vmx_segment_field *sf = &kvm_vmx_segment_fields[seg];
-
- return vmcs_readl(sf->base);
-}
-
static void vmx_get_segment(struct kvm_vcpu *vcpu,
struct kvm_segment *var, int seg)
{
- struct kvm_vmx_segment_field *sf = &kvm_vmx_segment_fields[seg];
+ struct vcpu_vmx *vmx = to_vmx(vcpu);
+ struct kvm_save_segment *save;
u32 ar;
- var->base = vmcs_readl(sf->base);
- var->limit = vmcs_read32(sf->limit);
- var->selector = vmcs_read16(sf->selector);
- ar = vmcs_read32(sf->ar_bytes);
+ if (vmx->rmode.vm86_active
+ && (seg == VCPU_SREG_TR || seg == VCPU_SREG_ES
+ || seg == VCPU_SREG_DS || seg == VCPU_SREG_FS
+ || seg == VCPU_SREG_GS)
+ && !emulate_invalid_guest_state) {
+ switch (seg) {
+ case VCPU_SREG_TR: save = &vmx->rmode.tr; break;
+ case VCPU_SREG_ES: save = &vmx->rmode.es; break;
+ case VCPU_SREG_DS: save = &vmx->rmode.ds; break;
+ case VCPU_SREG_FS: save = &vmx->rmode.fs; break;
+ case VCPU_SREG_GS: save = &vmx->rmode.gs; break;
+ default: BUG();
+ }
+ var->selector = save->selector;
+ var->base = save->base;
+ var->limit = save->limit;
+ ar = save->ar;
+ if (seg == VCPU_SREG_TR
+ || var->selector == vmx_read_guest_seg_selector(vmx, seg))
+ goto use_saved_rmode_seg;
+ }
+ var->base = vmx_read_guest_seg_base(vmx, seg);
+ var->limit = vmx_read_guest_seg_limit(vmx, seg);
+ var->selector = vmx_read_guest_seg_selector(vmx, seg);
+ ar = vmx_read_guest_seg_ar(vmx, seg);
+use_saved_rmode_seg:
if ((ar & AR_UNUSABLE_MASK) && !emulate_invalid_guest_state)
ar = 0;
var->type = ar & 15;
@@ -2060,17 +2203,39 @@ static void vmx_get_segment(struct kvm_vcpu *vcpu,
var->unusable = (ar >> 16) & 1;
}
-static int vmx_get_cpl(struct kvm_vcpu *vcpu)
+static u64 vmx_get_segment_base(struct kvm_vcpu *vcpu, int seg)
+{
+ struct kvm_segment s;
+
+ if (to_vmx(vcpu)->rmode.vm86_active) {
+ vmx_get_segment(vcpu, &s, seg);
+ return s.base;
+ }
+ return vmx_read_guest_seg_base(to_vmx(vcpu), seg);
+}
+
+static int __vmx_get_cpl(struct kvm_vcpu *vcpu)
{
if (!is_protmode(vcpu))
return 0;
- if (vmx_get_rflags(vcpu) & X86_EFLAGS_VM) /* if virtual 8086 */
+ if (!is_long_mode(vcpu)
+ && (kvm_get_rflags(vcpu) & X86_EFLAGS_VM)) /* if virtual 8086 */
return 3;
- return vmcs_read16(GUEST_CS_SELECTOR) & 3;
+ return vmx_read_guest_seg_selector(to_vmx(vcpu), VCPU_SREG_CS) & 3;
+}
+
+static int vmx_get_cpl(struct kvm_vcpu *vcpu)
+{
+ if (!test_bit(VCPU_EXREG_CPL, (ulong *)&vcpu->arch.regs_avail)) {
+ __set_bit(VCPU_EXREG_CPL, (ulong *)&vcpu->arch.regs_avail);
+ to_vmx(vcpu)->cpl = __vmx_get_cpl(vcpu);
+ }
+ return to_vmx(vcpu)->cpl;
}
+
static u32 vmx_segment_access_rights(struct kvm_segment *var)
{
u32 ar;
@@ -2100,7 +2265,10 @@ static void vmx_set_segment(struct kvm_vcpu *vcpu,
struct kvm_vmx_segment_field *sf = &kvm_vmx_segment_fields[seg];
u32 ar;
+ vmx_segment_cache_clear(vmx);
+
if (vmx->rmode.vm86_active && seg == VCPU_SREG_TR) {
+ vmcs_write16(sf->selector, var->selector);
vmx->rmode.tr.selector = var->selector;
vmx->rmode.tr.base = var->base;
vmx->rmode.tr.limit = var->limit;
@@ -2135,11 +2303,12 @@ static void vmx_set_segment(struct kvm_vcpu *vcpu,
ar |= 0x1; /* Accessed */
vmcs_write32(sf->ar_bytes, ar);
+ __clear_bit(VCPU_EXREG_CPL, (ulong *)&vcpu->arch.regs_avail);
}
static void vmx_get_cs_db_l_bits(struct kvm_vcpu *vcpu, int *db, int *l)
{
- u32 ar = vmcs_read32(GUEST_CS_AR_BYTES);
+ u32 ar = vmx_read_guest_seg_ar(to_vmx(vcpu), VCPU_SREG_CS);
*db = (ar >> 14) & 1;
*l = (ar >> 13) & 1;
@@ -2361,11 +2530,12 @@ static bool guest_state_valid(struct kvm_vcpu *vcpu)
static int init_rmode_tss(struct kvm *kvm)
{
- gfn_t fn = rmode_tss_base(kvm) >> PAGE_SHIFT;
+ gfn_t fn;
u16 data = 0;
- int ret = 0;
- int r;
+ int r, idx, ret = 0;
+ idx = srcu_read_lock(&kvm->srcu);
+ fn = rmode_tss_base(kvm) >> PAGE_SHIFT;
r = kvm_clear_guest_page(kvm, fn, 0, PAGE_SIZE);
if (r < 0)
goto out;
@@ -2389,12 +2559,13 @@ static int init_rmode_tss(struct kvm *kvm)
ret = 1;
out:
+ srcu_read_unlock(&kvm->srcu, idx);
return ret;
}
static int init_rmode_identity_map(struct kvm *kvm)
{
- int i, r, ret;
+ int i, idx, r, ret;
pfn_t identity_map_pfn;
u32 tmp;
@@ -2409,6 +2580,7 @@ static int init_rmode_identity_map(struct kvm *kvm)
return 1;
ret = 0;
identity_map_pfn = kvm->arch.ept_identity_map_addr >> PAGE_SHIFT;
+ idx = srcu_read_lock(&kvm->srcu);
r = kvm_clear_guest_page(kvm, identity_map_pfn, 0, PAGE_SIZE);
if (r < 0)
goto out;
@@ -2424,6 +2596,7 @@ static int init_rmode_identity_map(struct kvm *kvm)
kvm->arch.ept_identity_pagetable_done = true;
ret = 1;
out:
+ srcu_read_unlock(&kvm->srcu, idx);
return ret;
}
@@ -2699,22 +2872,6 @@ static int vmx_vcpu_setup(struct vcpu_vmx *vmx)
return 0;
}
-static int init_rmode(struct kvm *kvm)
-{
- int idx, ret = 0;
-
- idx = srcu_read_lock(&kvm->srcu);
- if (!init_rmode_tss(kvm))
- goto exit;
- if (!init_rmode_identity_map(kvm))
- goto exit;
-
- ret = 1;
-exit:
- srcu_read_unlock(&kvm->srcu, idx);
- return ret;
-}
-
static int vmx_vcpu_reset(struct kvm_vcpu *vcpu)
{
struct vcpu_vmx *vmx = to_vmx(vcpu);
@@ -2722,10 +2879,6 @@ static int vmx_vcpu_reset(struct kvm_vcpu *vcpu)
int ret;
vcpu->arch.regs_avail = ~((1 << VCPU_REGS_RIP) | (1 << VCPU_REGS_RSP));
- if (!init_rmode(vmx->vcpu.kvm)) {
- ret = -ENOMEM;
- goto out;
- }
vmx->rmode.vm86_active = 0;
@@ -2742,6 +2895,8 @@ static int vmx_vcpu_reset(struct kvm_vcpu *vcpu)
if (ret != 0)
goto out;
+ vmx_segment_cache_clear(vmx);
+
seg_setup(VCPU_SREG_CS);
/*
* GUEST_CS_BASE should really be 0xffff0000, but VT vm86 mode
@@ -2805,7 +2960,7 @@ static int vmx_vcpu_reset(struct kvm_vcpu *vcpu)
vmcs_write64(VIRTUAL_APIC_PAGE_ADDR, 0);
if (vm_need_tpr_shadow(vmx->vcpu.kvm))
vmcs_write64(VIRTUAL_APIC_PAGE_ADDR,
- page_to_phys(vmx->vcpu.arch.apic->regs_page));
+ __pa(vmx->vcpu.arch.apic->regs));
vmcs_write32(TPR_THRESHOLD, 0);
}
@@ -2871,7 +3026,10 @@ static void vmx_inject_irq(struct kvm_vcpu *vcpu)
++vcpu->stat.irq_injections;
if (vmx->rmode.vm86_active) {
- if (kvm_inject_realmode_interrupt(vcpu, irq) != EMULATE_DONE)
+ int inc_eip = 0;
+ if (vcpu->arch.interrupt.soft)
+ inc_eip = vcpu->arch.event_exit_inst_len;
+ if (kvm_inject_realmode_interrupt(vcpu, irq, inc_eip) != EMULATE_DONE)
kvm_make_request(KVM_REQ_TRIPLE_FAULT, vcpu);
return;
}
@@ -2904,8 +3062,9 @@ static void vmx_inject_nmi(struct kvm_vcpu *vcpu)
}
++vcpu->stat.nmi_injections;
+ vmx->nmi_known_unmasked = false;
if (vmx->rmode.vm86_active) {
- if (kvm_inject_realmode_interrupt(vcpu, NMI_VECTOR) != EMULATE_DONE)
+ if (kvm_inject_realmode_interrupt(vcpu, NMI_VECTOR, 0) != EMULATE_DONE)
kvm_make_request(KVM_REQ_TRIPLE_FAULT, vcpu);
return;
}
@@ -2928,6 +3087,8 @@ static bool vmx_get_nmi_mask(struct kvm_vcpu *vcpu)
{
if (!cpu_has_virtual_nmis())
return to_vmx(vcpu)->soft_vnmi_blocked;
+ if (to_vmx(vcpu)->nmi_known_unmasked)
+ return false;
return vmcs_read32(GUEST_INTERRUPTIBILITY_INFO) & GUEST_INTR_STATE_NMI;
}
@@ -2941,6 +3102,7 @@ static void vmx_set_nmi_mask(struct kvm_vcpu *vcpu, bool masked)
vmx->vnmi_blocked_time = 0;
}
} else {
+ vmx->nmi_known_unmasked = !masked;
if (masked)
vmcs_set_bits(GUEST_INTERRUPTIBILITY_INFO,
GUEST_INTR_STATE_NMI);
@@ -2971,6 +3133,9 @@ static int vmx_set_tss_addr(struct kvm *kvm, unsigned int addr)
if (ret)
return ret;
kvm->arch.tss_addr = addr;
+ if (!init_rmode_tss(kvm))
+ return -ENOMEM;
+
return 0;
}
@@ -3055,7 +3220,7 @@ static int handle_exception(struct kvm_vcpu *vcpu)
enum emulation_result er;
vect_info = vmx->idt_vectoring_info;
- intr_info = vmcs_read32(VM_EXIT_INTR_INFO);
+ intr_info = vmx->exit_intr_info;
if (is_machine_check(intr_info))
return handle_machine_check(vcpu);
@@ -3086,7 +3251,6 @@ static int handle_exception(struct kvm_vcpu *vcpu)
}
error_code = 0;
- rip = kvm_rip_read(vcpu);
if (intr_info & INTR_INFO_DELIVER_CODE_MASK)
error_code = vmcs_read32(VM_EXIT_INTR_ERROR_CODE);
if (is_page_fault(intr_info)) {
@@ -3133,6 +3297,7 @@ static int handle_exception(struct kvm_vcpu *vcpu)
vmx->vcpu.arch.event_exit_inst_len =
vmcs_read32(VM_EXIT_INSTRUCTION_LEN);
kvm_run->exit_reason = KVM_EXIT_DEBUG;
+ rip = kvm_rip_read(vcpu);
kvm_run->debug.arch.pc = vmcs_readl(GUEST_CS_BASE) + rip;
kvm_run->debug.arch.exception = ex_no;
break;
@@ -3469,9 +3634,7 @@ static int handle_task_switch(struct kvm_vcpu *vcpu)
switch (type) {
case INTR_TYPE_NMI_INTR:
vcpu->arch.nmi_injected = false;
- if (cpu_has_virtual_nmis())
- vmcs_set_bits(GUEST_INTERRUPTIBILITY_INFO,
- GUEST_INTR_STATE_NMI);
+ vmx_set_nmi_mask(vcpu, true);
break;
case INTR_TYPE_EXT_INTR:
case INTR_TYPE_SOFT_INTR:
@@ -3831,12 +3994,17 @@ static void update_cr8_intercept(struct kvm_vcpu *vcpu, int tpr, int irr)
static void vmx_complete_atomic_exit(struct vcpu_vmx *vmx)
{
- u32 exit_intr_info = vmx->exit_intr_info;
+ u32 exit_intr_info;
+
+ if (!(vmx->exit_reason == EXIT_REASON_MCE_DURING_VMENTRY
+ || vmx->exit_reason == EXIT_REASON_EXCEPTION_NMI))
+ return;
+
+ vmx->exit_intr_info = vmcs_read32(VM_EXIT_INTR_INFO);
+ exit_intr_info = vmx->exit_intr_info;
/* Handle machine checks before interrupts are enabled */
- if ((vmx->exit_reason == EXIT_REASON_MCE_DURING_VMENTRY)
- || (vmx->exit_reason == EXIT_REASON_EXCEPTION_NMI
- && is_machine_check(exit_intr_info)))
+ if (is_machine_check(exit_intr_info))
kvm_machine_check();
/* We need to handle NMIs before interrupts are enabled */
@@ -3850,7 +4018,7 @@ static void vmx_complete_atomic_exit(struct vcpu_vmx *vmx)
static void vmx_recover_nmi_blocking(struct vcpu_vmx *vmx)
{
- u32 exit_intr_info = vmx->exit_intr_info;
+ u32 exit_intr_info;
bool unblock_nmi;
u8 vector;
bool idtv_info_valid;
@@ -3858,6 +4026,13 @@ static void vmx_recover_nmi_blocking(struct vcpu_vmx *vmx)
idtv_info_valid = vmx->idt_vectoring_info & VECTORING_INFO_VALID_MASK;
if (cpu_has_virtual_nmis()) {
+ if (vmx->nmi_known_unmasked)
+ return;
+ /*
+ * Can't use vmx->exit_intr_info since we're not sure what
+ * the exit reason is.
+ */
+ exit_intr_info = vmcs_read32(VM_EXIT_INTR_INFO);
unblock_nmi = (exit_intr_info & INTR_INFO_UNBLOCK_NMI) != 0;
vector = exit_intr_info & INTR_INFO_VECTOR_MASK;
/*
@@ -3874,6 +4049,10 @@ static void vmx_recover_nmi_blocking(struct vcpu_vmx *vmx)
vector != DF_VECTOR && !idtv_info_valid)
vmcs_set_bits(GUEST_INTERRUPTIBILITY_INFO,
GUEST_INTR_STATE_NMI);
+ else
+ vmx->nmi_known_unmasked =
+ !(vmcs_read32(GUEST_INTERRUPTIBILITY_INFO)
+ & GUEST_INTR_STATE_NMI);
} else if (unlikely(vmx->soft_vnmi_blocked))
vmx->vnmi_blocked_time +=
ktime_to_ns(ktime_sub(ktime_get(), vmx->entry_time));
@@ -3910,8 +4089,7 @@ static void __vmx_complete_interrupts(struct vcpu_vmx *vmx,
* Clear bit "block by NMI" before VM entry if a NMI
* delivery faulted.
*/
- vmcs_clear_bits(GUEST_INTERRUPTIBILITY_INFO,
- GUEST_INTR_STATE_NMI);
+ vmx_set_nmi_mask(&vmx->vcpu, false);
break;
case INTR_TYPE_SOFT_EXCEPTION:
vmx->vcpu.arch.event_exit_inst_len =
@@ -3962,7 +4140,7 @@ static void vmx_cancel_injection(struct kvm_vcpu *vcpu)
#define Q "l"
#endif
-static void vmx_vcpu_run(struct kvm_vcpu *vcpu)
+static void __noclone vmx_vcpu_run(struct kvm_vcpu *vcpu)
{
struct vcpu_vmx *vmx = to_vmx(vcpu);
@@ -3991,6 +4169,7 @@ static void vmx_vcpu_run(struct kvm_vcpu *vcpu)
asm(
/* Store host registers */
"push %%"R"dx; push %%"R"bp;"
+ "push %%"R"cx \n\t" /* placeholder for guest rcx */
"push %%"R"cx \n\t"
"cmp %%"R"sp, %c[host_rsp](%0) \n\t"
"je 1f \n\t"
@@ -4032,10 +4211,11 @@ static void vmx_vcpu_run(struct kvm_vcpu *vcpu)
".Llaunched: " __ex(ASM_VMX_VMRESUME) "\n\t"
".Lkvm_vmx_return: "
/* Save guest registers, load host registers, keep flags */
- "xchg %0, (%%"R"sp) \n\t"
+ "mov %0, %c[wordsize](%%"R"sp) \n\t"
+ "pop %0 \n\t"
"mov %%"R"ax, %c[rax](%0) \n\t"
"mov %%"R"bx, %c[rbx](%0) \n\t"
- "push"Q" (%%"R"sp); pop"Q" %c[rcx](%0) \n\t"
+ "pop"Q" %c[rcx](%0) \n\t"
"mov %%"R"dx, %c[rdx](%0) \n\t"
"mov %%"R"si, %c[rsi](%0) \n\t"
"mov %%"R"di, %c[rdi](%0) \n\t"
@@ -4053,7 +4233,7 @@ static void vmx_vcpu_run(struct kvm_vcpu *vcpu)
"mov %%cr2, %%"R"ax \n\t"
"mov %%"R"ax, %c[cr2](%0) \n\t"
- "pop %%"R"bp; pop %%"R"bp; pop %%"R"dx \n\t"
+ "pop %%"R"bp; pop %%"R"dx \n\t"
"setbe %c[fail](%0) \n\t"
: : "c"(vmx), "d"((unsigned long)HOST_RSP),
[launched]"i"(offsetof(struct vcpu_vmx, launched)),
@@ -4076,7 +4256,8 @@ static void vmx_vcpu_run(struct kvm_vcpu *vcpu)
[r14]"i"(offsetof(struct vcpu_vmx, vcpu.arch.regs[VCPU_REGS_R14])),
[r15]"i"(offsetof(struct vcpu_vmx, vcpu.arch.regs[VCPU_REGS_R15])),
#endif
- [cr2]"i"(offsetof(struct vcpu_vmx, vcpu.arch.cr2))
+ [cr2]"i"(offsetof(struct vcpu_vmx, vcpu.arch.cr2)),
+ [wordsize]"i"(sizeof(ulong))
: "cc", "memory"
, R"ax", R"bx", R"di", R"si"
#ifdef CONFIG_X86_64
@@ -4085,7 +4266,10 @@ static void vmx_vcpu_run(struct kvm_vcpu *vcpu)
);
vcpu->arch.regs_avail = ~((1 << VCPU_REGS_RIP) | (1 << VCPU_REGS_RSP)
+ | (1 << VCPU_EXREG_RFLAGS)
+ | (1 << VCPU_EXREG_CPL)
| (1 << VCPU_EXREG_PDPTR)
+ | (1 << VCPU_EXREG_SEGMENTS)
| (1 << VCPU_EXREG_CR3));
vcpu->arch.regs_dirty = 0;
@@ -4095,7 +4279,6 @@ static void vmx_vcpu_run(struct kvm_vcpu *vcpu)
vmx->launched = 1;
vmx->exit_reason = vmcs_read32(VM_EXIT_REASON);
- vmx->exit_intr_info = vmcs_read32(VM_EXIT_INTR_INFO);
vmx_complete_atomic_exit(vmx);
vmx_recover_nmi_blocking(vmx);
@@ -4156,8 +4339,8 @@ static struct kvm_vcpu *vmx_create_vcpu(struct kvm *kvm, unsigned int id)
goto free_vcpu;
vmx->guest_msrs = kmalloc(PAGE_SIZE, GFP_KERNEL);
+ err = -ENOMEM;
if (!vmx->guest_msrs) {
- err = -ENOMEM;
goto uninit_vcpu;
}
@@ -4176,15 +4359,19 @@ static struct kvm_vcpu *vmx_create_vcpu(struct kvm *kvm, unsigned int id)
if (err)
goto free_vmcs;
if (vm_need_virtualize_apic_accesses(kvm))
- if (alloc_apic_access_page(kvm) != 0)
+ err = alloc_apic_access_page(kvm);
+ if (err)
goto free_vmcs;
if (enable_ept) {
if (!kvm->arch.ept_identity_map_addr)
kvm->arch.ept_identity_map_addr =
VMX_EPT_IDENTITY_PAGETABLE_ADDR;
+ err = -ENOMEM;
if (alloc_identity_pagetable(kvm) != 0)
goto free_vmcs;
+ if (!init_rmode_identity_map(kvm))
+ goto free_vmcs;
}
return &vmx->vcpu;
@@ -4326,6 +4513,13 @@ static void vmx_set_supported_cpuid(u32 func, struct kvm_cpuid_entry2 *entry)
{
}
+static int vmx_check_intercept(struct kvm_vcpu *vcpu,
+ struct x86_instruction_info *info,
+ enum x86_intercept_stage stage)
+{
+ return X86EMUL_CONTINUE;
+}
+
static struct kvm_x86_ops vmx_x86_ops = {
.cpu_has_kvm_support = cpu_has_kvm_support,
.disabled_by_bios = vmx_disabled_by_bios,
@@ -4407,10 +4601,14 @@ static struct kvm_x86_ops vmx_x86_ops = {
.has_wbinvd_exit = cpu_has_vmx_wbinvd_exit,
+ .set_tsc_khz = vmx_set_tsc_khz,
.write_tsc_offset = vmx_write_tsc_offset,
.adjust_tsc_offset = vmx_adjust_tsc_offset,
+ .compute_tsc_offset = vmx_compute_tsc_offset,
.set_tdp_cr3 = vmx_set_cr3,
+
+ .check_intercept = vmx_check_intercept,
};
static int __init vmx_init(void)