aboutsummaryrefslogtreecommitdiffstats
path: root/arch/x86/kvm/vmx.c
diff options
context:
space:
mode:
Diffstat (limited to 'arch/x86/kvm/vmx.c')
-rw-r--r--arch/x86/kvm/vmx.c103
1 files changed, 76 insertions, 27 deletions
diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
index 6ffd643d1a64..0acac81f198b 100644
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@ -472,6 +472,7 @@ struct vcpu_vmx {
int gs_ldt_reload_needed;
int fs_reload_needed;
u64 msr_host_bndcfgs;
+ unsigned long vmcs_host_cr4; /* May not match real cr4 */
} host_state;
struct {
int vm86_active;
@@ -1626,7 +1627,7 @@ static void reload_tss(void)
/*
* VT restores TR but not its size. Useless.
*/
- struct desc_ptr *gdt = &__get_cpu_var(host_gdt);
+ struct desc_ptr *gdt = this_cpu_ptr(&host_gdt);
struct desc_struct *descs;
descs = (void *)gdt->address;
@@ -1672,7 +1673,7 @@ static bool update_transition_efer(struct vcpu_vmx *vmx, int efer_offset)
static unsigned long segment_base(u16 selector)
{
- struct desc_ptr *gdt = &__get_cpu_var(host_gdt);
+ struct desc_ptr *gdt = this_cpu_ptr(&host_gdt);
struct desc_struct *d;
unsigned long table_base;
unsigned long v;
@@ -1802,7 +1803,7 @@ static void __vmx_load_host_state(struct vcpu_vmx *vmx)
*/
if (!user_has_fpu() && !vmx->vcpu.guest_fpu_loaded)
stts();
- load_gdt(&__get_cpu_var(host_gdt));
+ load_gdt(this_cpu_ptr(&host_gdt));
}
static void vmx_load_host_state(struct vcpu_vmx *vmx)
@@ -1832,7 +1833,7 @@ static void vmx_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
}
if (vmx->loaded_vmcs->cpu != cpu) {
- struct desc_ptr *gdt = &__get_cpu_var(host_gdt);
+ struct desc_ptr *gdt = this_cpu_ptr(&host_gdt);
unsigned long sysenter_esp;
kvm_make_request(KVM_REQ_TLB_FLUSH, vcpu);
@@ -2626,6 +2627,8 @@ static int vmx_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
break;
case MSR_IA32_CR_PAT:
if (vmcs_config.vmentry_ctrl & VM_ENTRY_LOAD_IA32_PAT) {
+ if (!kvm_mtrr_valid(vcpu, MSR_IA32_CR_PAT, data))
+ return 1;
vmcs_write64(GUEST_IA32_PAT, data);
vcpu->arch.pat = data;
break;
@@ -2769,7 +2772,7 @@ static int hardware_enable(void)
ept_sync_global();
}
- native_store_gdt(&__get_cpu_var(host_gdt));
+ native_store_gdt(this_cpu_ptr(&host_gdt));
return 0;
}
@@ -3132,9 +3135,17 @@ static __init int hardware_setup(void)
if (!cpu_has_vmx_unrestricted_guest())
enable_unrestricted_guest = 0;
- if (!cpu_has_vmx_flexpriority())
+ if (!cpu_has_vmx_flexpriority()) {
flexpriority_enabled = 0;
+ /*
+ * set_apic_access_page_addr() is used to reload apic access
+ * page upon invalidation. No need to do anything if the
+ * processor does not have the APIC_ACCESS_ADDR VMCS field.
+ */
+ kvm_x86_ops->set_apic_access_page_addr = NULL;
+ }
+
if (!cpu_has_vmx_tpr_shadow())
kvm_x86_ops->update_cr8_intercept = NULL;
@@ -3930,7 +3941,7 @@ static int init_rmode_tss(struct kvm *kvm)
{
gfn_t fn;
u16 data = 0;
- int r, idx, ret = 0;
+ int idx, r;
idx = srcu_read_lock(&kvm->srcu);
fn = kvm->arch.tss_addr >> PAGE_SHIFT;
@@ -3952,13 +3963,9 @@ static int init_rmode_tss(struct kvm *kvm)
r = kvm_write_guest_page(kvm, fn, &data,
RMODE_TSS_SIZE - 2 * PAGE_SIZE - 1,
sizeof(u8));
- if (r < 0)
- goto out;
-
- ret = 1;
out:
srcu_read_unlock(&kvm->srcu, idx);
- return ret;
+ return r;
}
static int init_rmode_identity_map(struct kvm *kvm)
@@ -4027,7 +4034,7 @@ static int alloc_apic_access_page(struct kvm *kvm)
int r = 0;
mutex_lock(&kvm->slots_lock);
- if (kvm->arch.apic_access_page)
+ if (kvm->arch.apic_access_page_done)
goto out;
kvm_userspace_mem.slot = APIC_ACCESS_PAGE_PRIVATE_MEMSLOT;
kvm_userspace_mem.flags = 0;
@@ -4043,7 +4050,12 @@ static int alloc_apic_access_page(struct kvm *kvm)
goto out;
}
- kvm->arch.apic_access_page = page;
+ /*
+ * Do not pin the page in memory, so that memory hot-unplug
+ * is able to migrate it.
+ */
+ put_page(page);
+ kvm->arch.apic_access_page_done = true;
out:
mutex_unlock(&kvm->slots_lock);
return r;
@@ -4256,11 +4268,16 @@ static void vmx_set_constant_host_state(struct vcpu_vmx *vmx)
u32 low32, high32;
unsigned long tmpl;
struct desc_ptr dt;
+ unsigned long cr4;
vmcs_writel(HOST_CR0, read_cr0() & ~X86_CR0_TS); /* 22.2.3 */
- vmcs_writel(HOST_CR4, read_cr4()); /* 22.2.3, 22.2.5 */
vmcs_writel(HOST_CR3, read_cr3()); /* 22.2.3 FIXME: shadow tables */
+ /* Save the most likely value for this task's CR4 in the VMCS. */
+ cr4 = read_cr4();
+ vmcs_writel(HOST_CR4, cr4); /* 22.2.3, 22.2.5 */
+ vmx->host_state.vmcs_host_cr4 = cr4;
+
vmcs_write16(HOST_CS_SELECTOR, __KERNEL_CS); /* 22.2.4 */
#ifdef CONFIG_X86_64
/*
@@ -4559,9 +4576,7 @@ static void vmx_vcpu_reset(struct kvm_vcpu *vcpu)
vmcs_write32(TPR_THRESHOLD, 0);
}
- if (vm_need_virtualize_apic_accesses(vmx->vcpu.kvm))
- vmcs_write64(APIC_ACCESS_ADDR,
- page_to_phys(vmx->vcpu.kvm->arch.apic_access_page));
+ kvm_vcpu_reload_apic_access_page(vcpu);
if (vmx_vm_has_apicv(vcpu->kvm))
memset(&vmx->pi_desc, 0, sizeof(struct pi_desc));
@@ -4751,10 +4766,7 @@ static int vmx_set_tss_addr(struct kvm *kvm, unsigned int addr)
if (ret)
return ret;
kvm->arch.tss_addr = addr;
- if (!init_rmode_tss(kvm))
- return -ENOMEM;
-
- return 0;
+ return init_rmode_tss(kvm);
}
static bool rmode_exception(struct kvm_vcpu *vcpu, int vec)
@@ -6718,7 +6730,7 @@ static int handle_invept(struct kvm_vcpu *vcpu)
switch (type) {
case VMX_EPT_EXTENT_GLOBAL:
kvm_mmu_sync_roots(vcpu);
- kvm_mmu_flush_tlb(vcpu);
+ kvm_make_request(KVM_REQ_TLB_FLUSH, vcpu);
nested_vmx_succeed(vcpu);
break;
default:
@@ -6993,6 +7005,8 @@ static bool nested_vmx_exit_handled(struct kvm_vcpu *vcpu)
case EXIT_REASON_TASK_SWITCH:
return 1;
case EXIT_REASON_CPUID:
+ if (kvm_register_read(vcpu, VCPU_REGS_RAX) == 0xa)
+ return 0;
return 1;
case EXIT_REASON_HLT:
return nested_cpu_has(vmcs12, CPU_BASED_HLT_EXITING);
@@ -7201,6 +7215,29 @@ static void vmx_set_virtual_x2apic_mode(struct kvm_vcpu *vcpu, bool set)
vmx_set_msr_bitmap(vcpu);
}
+static void vmx_set_apic_access_page_addr(struct kvm_vcpu *vcpu, hpa_t hpa)
+{
+ struct vcpu_vmx *vmx = to_vmx(vcpu);
+
+ /*
+ * Currently we do not handle the nested case where L2 has an
+ * APIC access page of its own; that page is still pinned.
+ * Hence, we skip the case where the VCPU is in guest mode _and_
+ * L1 prepared an APIC access page for L2.
+ *
+ * For the case where L1 and L2 share the same APIC access page
+ * (flexpriority=Y but SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES clear
+ * in the vmcs12), this function will only update either the vmcs01
+ * or the vmcs02. If the former, the vmcs02 will be updated by
+ * prepare_vmcs02. If the latter, the vmcs01 will be updated in
+ * the next L2->L1 exit.
+ */
+ if (!is_guest_mode(vcpu) ||
+ !nested_cpu_has2(vmx->nested.current_vmcs12,
+ SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES))
+ vmcs_write64(APIC_ACCESS_ADDR, hpa);
+}
+
static void vmx_hwapic_isr_update(struct kvm *kvm, int isr)
{
u16 status;
@@ -7483,7 +7520,7 @@ static void atomic_switch_perf_msrs(struct vcpu_vmx *vmx)
static void __noclone vmx_vcpu_run(struct kvm_vcpu *vcpu)
{
struct vcpu_vmx *vmx = to_vmx(vcpu);
- unsigned long debugctlmsr;
+ unsigned long debugctlmsr, cr4;
/* Record the guest's net vcpu time for enforced NMI injections. */
if (unlikely(!cpu_has_virtual_nmis() && vmx->soft_vnmi_blocked))
@@ -7509,6 +7546,12 @@ static void __noclone vmx_vcpu_run(struct kvm_vcpu *vcpu)
if (test_bit(VCPU_REGS_RIP, (unsigned long *)&vcpu->arch.regs_dirty))
vmcs_writel(GUEST_RIP, vcpu->arch.regs[VCPU_REGS_RIP]);
+ cr4 = read_cr4();
+ if (unlikely(cr4 != vmx->host_state.vmcs_host_cr4)) {
+ vmcs_writel(HOST_CR4, cr4);
+ vmx->host_state.vmcs_host_cr4 = cr4;
+ }
+
/* When single-stepping over STI and MOV SS, we must clear the
* corresponding interruptibility bits in the guest state. Otherwise
* vmentry fails as it then expects bit 14 (BS) in pending debug
@@ -8008,7 +8051,7 @@ static void vmx_start_preemption_timer(struct kvm_vcpu *vcpu)
/*
* prepare_vmcs02 is called when the L1 guest hypervisor runs its nested
* L2 guest. L1 has a vmcs for L2 (vmcs12), and this function "merges" it
- * with L0's requirements for its guest (a.k.a. vmsc01), so we can run the L2
+ * with L0's requirements for its guest (a.k.a. vmcs01), so we can run the L2
* guest in a way that will both be appropriate to L1's requests, and our
* needs. In addition to modifying the active vmcs (which is vmcs02), this
* function also has additional necessary side-effects, like setting various
@@ -8143,8 +8186,7 @@ static void prepare_vmcs02(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12)
} else if (vm_need_virtualize_apic_accesses(vmx->vcpu.kvm)) {
exec_control |=
SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES;
- vmcs_write64(APIC_ACCESS_ADDR,
- page_to_phys(vcpu->kvm->arch.apic_access_page));
+ kvm_vcpu_reload_apic_access_page(vcpu);
}
vmcs_write32(SECONDARY_VM_EXEC_CONTROL, exec_control);
@@ -8953,6 +8995,12 @@ static void nested_vmx_vmexit(struct kvm_vcpu *vcpu, u32 exit_reason,
}
/*
+ * We are now running in L2, mmu_notifier will force to reload the
+ * page's hpa for L2 vmcs. Need to reload it for L1 before entering L1.
+ */
+ kvm_vcpu_reload_apic_access_page(vcpu);
+
+ /*
* Exiting from L2 to L1, we're now back to L1 which thinks it just
* finished a VMLAUNCH or VMRESUME instruction, so we need to set the
* success or failure flag accordingly.
@@ -9077,6 +9125,7 @@ static struct kvm_x86_ops vmx_x86_ops = {
.enable_irq_window = enable_irq_window,
.update_cr8_intercept = update_cr8_intercept,
.set_virtual_x2apic_mode = vmx_set_virtual_x2apic_mode,
+ .set_apic_access_page_addr = vmx_set_apic_access_page_addr,
.vm_has_apicv = vmx_vm_has_apicv,
.load_eoi_exitmap = vmx_load_eoi_exitmap,
.hwapic_irr_update = vmx_hwapic_irr_update,