aboutsummaryrefslogtreecommitdiffstatshomepage
path: root/arch/x86/kvm/vmx/nested.c
diff options
context:
space:
mode:
Diffstat (limited to 'arch/x86/kvm/vmx/nested.c')
-rw-r--r--arch/x86/kvm/vmx/nested.c310
1 files changed, 196 insertions, 114 deletions
diff --git a/arch/x86/kvm/vmx/nested.c b/arch/x86/kvm/vmx/nested.c
index 657c2eda357c..de232306561a 100644
--- a/arch/x86/kvm/vmx/nested.c
+++ b/arch/x86/kvm/vmx/nested.c
@@ -224,7 +224,7 @@ static inline void nested_release_evmcs(struct kvm_vcpu *vcpu)
return;
kvm_vcpu_unmap(vcpu, &vmx->nested.hv_evmcs_map, true);
- vmx->nested.hv_evmcs_vmptr = -1ull;
+ vmx->nested.hv_evmcs_vmptr = 0;
vmx->nested.hv_evmcs = NULL;
}
@@ -353,9 +353,8 @@ static void nested_ept_init_mmu_context(struct kvm_vcpu *vcpu)
to_vmx(vcpu)->nested.msrs.ept_caps &
VMX_EPT_EXECUTE_ONLY_BIT,
nested_ept_ad_enabled(vcpu),
- nested_ept_get_cr3(vcpu));
- vcpu->arch.mmu->set_cr3 = vmx_set_cr3;
- vcpu->arch.mmu->get_cr3 = nested_ept_get_cr3;
+ nested_ept_get_eptp(vcpu));
+ vcpu->arch.mmu->get_guest_pgd = nested_ept_get_eptp;
vcpu->arch.mmu->inject_page_fault = nested_ept_inject_page_fault;
vcpu->arch.mmu->get_pdptr = kvm_pdptr_read;
@@ -544,7 +543,8 @@ static void nested_vmx_disable_intercept_for_msr(unsigned long *msr_bitmap_l1,
}
}
-static inline void enable_x2apic_msr_intercepts(unsigned long *msr_bitmap) {
+static inline void enable_x2apic_msr_intercepts(unsigned long *msr_bitmap)
+{
int msr;
for (msr = 0x800; msr <= 0x8ff; msr += BITS_PER_LONG) {
@@ -1909,20 +1909,21 @@ static int copy_vmcs12_to_enlightened(struct vcpu_vmx *vmx)
* This is an equivalent of the nested hypervisor executing the vmptrld
* instruction.
*/
-static int nested_vmx_handle_enlightened_vmptrld(struct kvm_vcpu *vcpu,
- bool from_launch)
+static enum nested_evmptrld_status nested_vmx_handle_enlightened_vmptrld(
+ struct kvm_vcpu *vcpu, bool from_launch)
{
struct vcpu_vmx *vmx = to_vmx(vcpu);
bool evmcs_gpa_changed = false;
u64 evmcs_gpa;
if (likely(!vmx->nested.enlightened_vmcs_enabled))
- return 1;
+ return EVMPTRLD_DISABLED;
if (!nested_enlightened_vmentry(vcpu, &evmcs_gpa))
- return 1;
+ return EVMPTRLD_DISABLED;
- if (unlikely(evmcs_gpa != vmx->nested.hv_evmcs_vmptr)) {
+ if (unlikely(!vmx->nested.hv_evmcs ||
+ evmcs_gpa != vmx->nested.hv_evmcs_vmptr)) {
if (!vmx->nested.hv_evmcs)
vmx->nested.current_vmptr = -1ull;
@@ -1930,7 +1931,7 @@ static int nested_vmx_handle_enlightened_vmptrld(struct kvm_vcpu *vcpu,
if (kvm_vcpu_map(vcpu, gpa_to_gfn(evmcs_gpa),
&vmx->nested.hv_evmcs_map))
- return 0;
+ return EVMPTRLD_ERROR;
vmx->nested.hv_evmcs = vmx->nested.hv_evmcs_map.hva;
@@ -1959,7 +1960,7 @@ static int nested_vmx_handle_enlightened_vmptrld(struct kvm_vcpu *vcpu,
if ((vmx->nested.hv_evmcs->revision_id != KVM_EVMCS_VERSION) &&
(vmx->nested.hv_evmcs->revision_id != VMCS12_REVISION)) {
nested_release_evmcs(vcpu);
- return 0;
+ return EVMPTRLD_VMFAIL;
}
vmx->nested.dirty_vmcs12 = true;
@@ -1981,28 +1982,20 @@ static int nested_vmx_handle_enlightened_vmptrld(struct kvm_vcpu *vcpu,
}
/*
- * Clean fields data can't de used on VMLAUNCH and when we switch
+ * Clean fields data can't be used on VMLAUNCH and when we switch
* between different L2 guests as KVM keeps a single VMCS12 per L1.
*/
if (from_launch || evmcs_gpa_changed)
vmx->nested.hv_evmcs->hv_clean_fields &=
~HV_VMX_ENLIGHTENED_CLEAN_FIELD_ALL;
- return 1;
+ return EVMPTRLD_SUCCEEDED;
}
void nested_sync_vmcs12_to_shadow(struct kvm_vcpu *vcpu)
{
struct vcpu_vmx *vmx = to_vmx(vcpu);
- /*
- * hv_evmcs may end up being not mapped after migration (when
- * L2 was running), map it here to make sure vmcs12 changes are
- * properly reflected.
- */
- if (vmx->nested.enlightened_vmcs_enabled && !vmx->nested.hv_evmcs)
- nested_vmx_handle_enlightened_vmptrld(vcpu, false);
-
if (vmx->nested.hv_evmcs) {
copy_vmcs12_to_enlightened(vmx);
/* All fields are clean */
@@ -2473,9 +2466,9 @@ static int prepare_vmcs02(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12,
* If L1 use EPT, then L0 needs to execute INVEPT on
* EPTP02 instead of EPTP01. Therefore, delay TLB
* flush until vmcs02->eptp is fully updated by
- * KVM_REQ_LOAD_CR3. Note that this assumes
+ * KVM_REQ_LOAD_MMU_PGD. Note that this assumes
* KVM_REQ_TLB_FLUSH is evaluated after
- * KVM_REQ_LOAD_CR3 in vcpu_enter_guest().
+ * KVM_REQ_LOAD_MMU_PGD in vcpu_enter_guest().
*/
kvm_make_request(KVM_REQ_TLB_FLUSH, vcpu);
}
@@ -2520,7 +2513,7 @@ static int prepare_vmcs02(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12,
/*
* Immediately write vmcs02.GUEST_CR3. It will be propagated to vmcs12
* on nested VM-Exit, which can occur without actually running L2 and
- * thus without hitting vmx_set_cr3(), e.g. if L1 is entering L2 with
+ * thus without hitting vmx_load_mmu_pgd(), e.g. if L1 is entering L2 with
* vmcs12.GUEST_ACTIVITYSTATE=HLT, in which case KVM will intercept the
* transition to HLT instead of running L2.
*/
@@ -2562,13 +2555,13 @@ static int nested_vmx_check_nmi_controls(struct vmcs12 *vmcs12)
return 0;
}
-static bool valid_ept_address(struct kvm_vcpu *vcpu, u64 address)
+static bool nested_vmx_check_eptp(struct kvm_vcpu *vcpu, u64 new_eptp)
{
struct vcpu_vmx *vmx = to_vmx(vcpu);
int maxphyaddr = cpuid_maxphyaddr(vcpu);
/* Check for memory type validity */
- switch (address & VMX_EPTP_MT_MASK) {
+ switch (new_eptp & VMX_EPTP_MT_MASK) {
case VMX_EPTP_MT_UC:
if (CC(!(vmx->nested.msrs.ept_caps & VMX_EPTP_UC_BIT)))
return false;
@@ -2581,16 +2574,26 @@ static bool valid_ept_address(struct kvm_vcpu *vcpu, u64 address)
return false;
}
- /* only 4 levels page-walk length are valid */
- if (CC((address & VMX_EPTP_PWL_MASK) != VMX_EPTP_PWL_4))
+ /* Page-walk levels validity. */
+ switch (new_eptp & VMX_EPTP_PWL_MASK) {
+ case VMX_EPTP_PWL_5:
+ if (CC(!(vmx->nested.msrs.ept_caps & VMX_EPT_PAGE_WALK_5_BIT)))
+ return false;
+ break;
+ case VMX_EPTP_PWL_4:
+ if (CC(!(vmx->nested.msrs.ept_caps & VMX_EPT_PAGE_WALK_4_BIT)))
+ return false;
+ break;
+ default:
return false;
+ }
/* Reserved bits should not be set */
- if (CC(address >> maxphyaddr || ((address >> 7) & 0x1f)))
+ if (CC(new_eptp >> maxphyaddr || ((new_eptp >> 7) & 0x1f)))
return false;
/* AD, if set, should be supported */
- if (address & VMX_EPTP_AD_ENABLE_BIT) {
+ if (new_eptp & VMX_EPTP_AD_ENABLE_BIT) {
if (CC(!(vmx->nested.msrs.ept_caps & VMX_EPT_AD_BIT)))
return false;
}
@@ -2639,7 +2642,7 @@ static int nested_check_vm_execution_controls(struct kvm_vcpu *vcpu,
return -EINVAL;
if (nested_cpu_has_ept(vmcs12) &&
- CC(!valid_ept_address(vcpu, vmcs12->ept_pointer)))
+ CC(!nested_vmx_check_eptp(vcpu, vmcs12->ept_pointer)))
return -EINVAL;
if (nested_cpu_has_vmfunc(vmcs12)) {
@@ -2959,7 +2962,7 @@ static int nested_vmx_check_vmentry_hw(struct kvm_vcpu *vcpu)
/*
* Induce a consistency check VMExit by clearing bit 1 in GUEST_RFLAGS,
* which is reserved to '1' by hardware. GUEST_RFLAGS is guaranteed to
- * be written (by preparve_vmcs02()) before the "real" VMEnter, i.e.
+ * be written (by prepare_vmcs02()) before the "real" VMEnter, i.e.
* there is no need to preserve other bits or save/restore the field.
*/
vmcs_writel(GUEST_RFLAGS, 0);
@@ -3051,6 +3054,27 @@ static bool nested_get_vmcs12_pages(struct kvm_vcpu *vcpu)
struct page *page;
u64 hpa;
+ /*
+ * hv_evmcs may end up being not mapped after migration (when
+ * L2 was running), map it here to make sure vmcs12 changes are
+ * properly reflected.
+ */
+ if (vmx->nested.enlightened_vmcs_enabled && !vmx->nested.hv_evmcs) {
+ enum nested_evmptrld_status evmptrld_status =
+ nested_vmx_handle_enlightened_vmptrld(vcpu, false);
+
+ if (evmptrld_status == EVMPTRLD_VMFAIL ||
+ evmptrld_status == EVMPTRLD_ERROR) {
+ pr_debug_ratelimited("%s: enlightened vmptrld failed\n",
+ __func__);
+ vcpu->run->exit_reason = KVM_EXIT_INTERNAL_ERROR;
+ vcpu->run->internal.suberror =
+ KVM_INTERNAL_ERROR_EMULATION;
+ vcpu->run->internal.ndata = 0;
+ return false;
+ }
+ }
+
if (nested_cpu_has2(vmcs12, SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES)) {
/*
* Translate L1 physical address to host physical
@@ -3160,10 +3184,10 @@ static void load_vmcs12_host_state(struct kvm_vcpu *vcpu,
* or KVM_SET_NESTED_STATE). Otherwise it's called from vmlaunch/vmresume.
*
* Returns:
- * NVMX_ENTRY_SUCCESS: Entered VMX non-root mode
- * NVMX_ENTRY_VMFAIL: Consistency check VMFail
- * NVMX_ENTRY_VMEXIT: Consistency check VMExit
- * NVMX_ENTRY_KVM_INTERNAL_ERROR: KVM internal error
+ * NVMX_VMENTRY_SUCCESS: Entered VMX non-root mode
+ * NVMX_VMENTRY_VMFAIL: Consistency check VMFail
+ * NVMX_VMENTRY_VMEXIT: Consistency check VMExit
+ * NVMX_VMENTRY_KVM_INTERNAL_ERROR: KVM internal error
*/
enum nvmx_vmentry_status nested_vmx_enter_non_root_mode(struct kvm_vcpu *vcpu,
bool from_vmentry)
@@ -3314,12 +3338,18 @@ static int nested_vmx_run(struct kvm_vcpu *vcpu, bool launch)
enum nvmx_vmentry_status status;
struct vcpu_vmx *vmx = to_vmx(vcpu);
u32 interrupt_shadow = vmx_get_interrupt_shadow(vcpu);
+ enum nested_evmptrld_status evmptrld_status;
if (!nested_vmx_check_permission(vcpu))
return 1;
- if (!nested_vmx_handle_enlightened_vmptrld(vcpu, launch))
+ evmptrld_status = nested_vmx_handle_enlightened_vmptrld(vcpu, launch);
+ if (evmptrld_status == EVMPTRLD_ERROR) {
+ kvm_queue_exception(vcpu, UD_VECTOR);
return 1;
+ } else if (evmptrld_status == EVMPTRLD_VMFAIL) {
+ return nested_vmx_failInvalid(vcpu);
+ }
if (!vmx->nested.hv_evmcs && vmx->nested.current_vmptr == -1ull)
return nested_vmx_failInvalid(vcpu);
@@ -3497,7 +3527,7 @@ static void vmcs12_save_pending_event(struct kvm_vcpu *vcpu,
}
-static void nested_mark_vmcs12_pages_dirty(struct kvm_vcpu *vcpu)
+void nested_mark_vmcs12_pages_dirty(struct kvm_vcpu *vcpu)
{
struct vmcs12 *vmcs12 = get_vmcs12(vcpu);
gfn_t gfn;
@@ -3575,25 +3605,80 @@ static void nested_vmx_inject_exception_vmexit(struct kvm_vcpu *vcpu,
nested_vmx_vmexit(vcpu, EXIT_REASON_EXCEPTION_NMI, intr_info, exit_qual);
}
-static int vmx_check_nested_events(struct kvm_vcpu *vcpu, bool external_intr)
+/*
+ * Returns true if a debug trap is pending delivery.
+ *
+ * In KVM, debug traps bear an exception payload. As such, the class of a #DB
+ * exception may be inferred from the presence of an exception payload.
+ */
+static inline bool vmx_pending_dbg_trap(struct kvm_vcpu *vcpu)
+{
+ return vcpu->arch.exception.pending &&
+ vcpu->arch.exception.nr == DB_VECTOR &&
+ vcpu->arch.exception.payload;
+}
+
+/*
+ * Certain VM-exits set the 'pending debug exceptions' field to indicate a
+ * recognized #DB (data or single-step) that has yet to be delivered. Since KVM
+ * represents these debug traps with a payload that is said to be compatible
+ * with the 'pending debug exceptions' field, write the payload to the VMCS
+ * field if a VM-exit is delivered before the debug trap.
+ */
+static void nested_vmx_update_pending_dbg(struct kvm_vcpu *vcpu)
+{
+ if (vmx_pending_dbg_trap(vcpu))
+ vmcs_writel(GUEST_PENDING_DBG_EXCEPTIONS,
+ vcpu->arch.exception.payload);
+}
+
+static int vmx_check_nested_events(struct kvm_vcpu *vcpu)
{
struct vcpu_vmx *vmx = to_vmx(vcpu);
unsigned long exit_qual;
bool block_nested_events =
vmx->nested.nested_run_pending || kvm_event_needs_reinjection(vcpu);
+ bool mtf_pending = vmx->nested.mtf_pending;
struct kvm_lapic *apic = vcpu->arch.apic;
+ /*
+ * Clear the MTF state. If a higher priority VM-exit is delivered first,
+ * this state is discarded.
+ */
+ vmx->nested.mtf_pending = false;
+
if (lapic_in_kernel(vcpu) &&
test_bit(KVM_APIC_INIT, &apic->pending_events)) {
if (block_nested_events)
return -EBUSY;
+ nested_vmx_update_pending_dbg(vcpu);
clear_bit(KVM_APIC_INIT, &apic->pending_events);
nested_vmx_vmexit(vcpu, EXIT_REASON_INIT_SIGNAL, 0, 0);
return 0;
}
+ /*
+ * Process any exceptions that are not debug traps before MTF.
+ */
if (vcpu->arch.exception.pending &&
- nested_vmx_check_exception(vcpu, &exit_qual)) {
+ !vmx_pending_dbg_trap(vcpu) &&
+ nested_vmx_check_exception(vcpu, &exit_qual)) {
+ if (block_nested_events)
+ return -EBUSY;
+ nested_vmx_inject_exception_vmexit(vcpu, exit_qual);
+ return 0;
+ }
+
+ if (mtf_pending) {
+ if (block_nested_events)
+ return -EBUSY;
+ nested_vmx_update_pending_dbg(vcpu);
+ nested_vmx_vmexit(vcpu, EXIT_REASON_MONITOR_TRAP_FLAG, 0, 0);
+ return 0;
+ }
+
+ if (vcpu->arch.exception.pending &&
+ nested_vmx_check_exception(vcpu, &exit_qual)) {
if (block_nested_events)
return -EBUSY;
nested_vmx_inject_exception_vmexit(vcpu, exit_qual);
@@ -3623,8 +3708,7 @@ static int vmx_check_nested_events(struct kvm_vcpu *vcpu, bool external_intr)
return 0;
}
- if ((kvm_cpu_has_interrupt(vcpu) || external_intr) &&
- nested_exit_on_intr(vcpu)) {
+ if (kvm_cpu_has_interrupt(vcpu) && nested_exit_on_intr(vcpu)) {
if (block_nested_events)
return -EBUSY;
nested_vmx_vmexit(vcpu, EXIT_REASON_EXTERNAL_INTERRUPT, 0, 0);
@@ -3967,7 +4051,7 @@ static void load_vmcs12_host_state(struct kvm_vcpu *vcpu,
*
* If vmcs12 uses EPT, we need to execute this flush on EPTP01
* and therefore we request the TLB flush to happen only after VMCS EPTP
- * has been set by KVM_REQ_LOAD_CR3.
+ * has been set by KVM_REQ_LOAD_MMU_PGD.
*/
if (enable_vpid &&
(!nested_cpu_has_vpid(vmcs12) || !nested_has_guest_tlb_tag(vcpu))) {
@@ -4272,17 +4356,8 @@ void nested_vmx_vmexit(struct kvm_vcpu *vcpu, u32 exit_reason,
vcpu->arch.mp_state = KVM_MP_STATE_RUNNABLE;
if (likely(!vmx->fail)) {
- /*
- * TODO: SDM says that with acknowledge interrupt on
- * exit, bit 31 of the VM-exit interrupt information
- * (valid interrupt) is always set to 1 on
- * EXIT_REASON_EXTERNAL_INTERRUPT, so we shouldn't
- * need kvm_cpu_has_interrupt(). See the commit
- * message for details.
- */
- if (nested_exit_intr_ack_set(vcpu) &&
- exit_reason == EXIT_REASON_EXTERNAL_INTERRUPT &&
- kvm_cpu_has_interrupt(vcpu)) {
+ if (exit_reason == EXIT_REASON_EXTERNAL_INTERRUPT &&
+ nested_exit_intr_ack_set(vcpu)) {
int irq = kvm_cpu_get_interrupt(vcpu);
WARN_ON(irq < 0);
vmcs12->vm_exit_intr_info = irq |
@@ -4326,7 +4401,7 @@ void nested_vmx_vmexit(struct kvm_vcpu *vcpu, u32 exit_reason,
* Decode the memory-address operand of a vmx instruction, as recorded on an
* exit caused by such an instruction (run by a guest hypervisor).
* On success, returns 0. When the operand is invalid, returns 1 and throws
- * #UD or #GP.
+ * #UD, #GP, or #SS.
*/
int get_vmx_mem_address(struct kvm_vcpu *vcpu, unsigned long exit_qualification,
u32 vmx_instruction_info, bool wr, int len, gva_t *ret)
@@ -4367,7 +4442,7 @@ int get_vmx_mem_address(struct kvm_vcpu *vcpu, unsigned long exit_qualification,
if (base_is_valid)
off += kvm_register_read(vcpu, base_reg);
if (index_is_valid)
- off += kvm_register_read(vcpu, index_reg)<<scaling;
+ off += kvm_register_read(vcpu, index_reg) << scaling;
vmx_get_segment(vcpu, &s, seg_reg);
/*
@@ -4460,7 +4535,7 @@ void nested_vmx_pmu_entry_exit_ctls_update(struct kvm_vcpu *vcpu)
return;
vmx = to_vmx(vcpu);
- if (kvm_x86_ops->pmu_ops->is_valid_msr(vcpu, MSR_CORE_PERF_GLOBAL_CTRL)) {
+ if (kvm_x86_ops.pmu_ops->is_valid_msr(vcpu, MSR_CORE_PERF_GLOBAL_CTRL)) {
vmx->nested.msrs.entry_ctls_high |=
VM_ENTRY_LOAD_IA32_PERF_GLOBAL_CTRL;
vmx->nested.msrs.exit_ctls_high |=
@@ -4546,7 +4621,7 @@ static int enter_vmx_operation(struct kvm_vcpu *vcpu)
vmx->nested.vmcs02_initialized = false;
vmx->nested.vmxon = true;
- if (pt_mode == PT_MODE_HOST_GUEST) {
+ if (vmx_pt_mode_is_host_guest()) {
vmx->pt_desc.guest.ctl = 0;
pt_update_intercept_for_msr(vmx);
}
@@ -5178,7 +5253,7 @@ static int nested_vmx_eptp_switching(struct kvm_vcpu *vcpu,
struct vmcs12 *vmcs12)
{
u32 index = kvm_rcx_read(vcpu);
- u64 address;
+ u64 new_eptp;
bool accessed_dirty;
struct kvm_mmu *mmu = vcpu->arch.walk_mmu;
@@ -5191,23 +5266,23 @@ static int nested_vmx_eptp_switching(struct kvm_vcpu *vcpu,
if (kvm_vcpu_read_guest_page(vcpu, vmcs12->eptp_list_address >> PAGE_SHIFT,
- &address, index * 8, 8))
+ &new_eptp, index * 8, 8))
return 1;
- accessed_dirty = !!(address & VMX_EPTP_AD_ENABLE_BIT);
+ accessed_dirty = !!(new_eptp & VMX_EPTP_AD_ENABLE_BIT);
/*
* If the (L2) guest does a vmfunc to the currently
* active ept pointer, we don't have to do anything else
*/
- if (vmcs12->ept_pointer != address) {
- if (!valid_ept_address(vcpu, address))
+ if (vmcs12->ept_pointer != new_eptp) {
+ if (!nested_vmx_check_eptp(vcpu, new_eptp))
return 1;
kvm_mmu_unload(vcpu);
mmu->ept_ad = accessed_dirty;
mmu->mmu_role.base.ad_disabled = !accessed_dirty;
- vmcs12->ept_pointer = address;
+ vmcs12->ept_pointer = new_eptp;
/*
* TODO: Check what's the correct approach in case
* mmu reload fails. Currently, we just let the next
@@ -5256,24 +5331,17 @@ fail:
return 1;
}
-
-static bool nested_vmx_exit_handled_io(struct kvm_vcpu *vcpu,
- struct vmcs12 *vmcs12)
+/*
+ * Return true if an IO instruction with the specified port and size should cause
+ * a VM-exit into L1.
+ */
+bool nested_vmx_check_io_bitmaps(struct kvm_vcpu *vcpu, unsigned int port,
+ int size)
{
- unsigned long exit_qualification;
+ struct vmcs12 *vmcs12 = get_vmcs12(vcpu);
gpa_t bitmap, last_bitmap;
- unsigned int port;
- int size;
u8 b;
- if (!nested_cpu_has(vmcs12, CPU_BASED_USE_IO_BITMAPS))
- return nested_cpu_has(vmcs12, CPU_BASED_UNCOND_IO_EXITING);
-
- exit_qualification = vmcs_readl(EXIT_QUALIFICATION);
-
- port = exit_qualification >> 16;
- size = (exit_qualification & 7) + 1;
-
last_bitmap = (gpa_t)-1;
b = -1;
@@ -5300,8 +5368,26 @@ static bool nested_vmx_exit_handled_io(struct kvm_vcpu *vcpu,
return false;
}
+static bool nested_vmx_exit_handled_io(struct kvm_vcpu *vcpu,
+ struct vmcs12 *vmcs12)
+{
+ unsigned long exit_qualification;
+ unsigned short port;
+ int size;
+
+ if (!nested_cpu_has(vmcs12, CPU_BASED_USE_IO_BITMAPS))
+ return nested_cpu_has(vmcs12, CPU_BASED_UNCOND_IO_EXITING);
+
+ exit_qualification = vmcs_readl(EXIT_QUALIFICATION);
+
+ port = exit_qualification >> 16;
+ size = (exit_qualification & 7) + 1;
+
+ return nested_vmx_check_io_bitmaps(vcpu, port, size);
+}
+
/*
- * Return 1 if we should exit from L2 to L1 to handle an MSR access access,
+ * Return 1 if we should exit from L2 to L1 to handle an MSR access,
* rather than handle it ourselves in L0. I.e., check whether L1 expressed
* disinterest in the current event (read or write a specific MSR) by using an
* MSR bitmap. This may be the case even when L0 doesn't use MSR bitmaps.
@@ -5457,8 +5543,7 @@ bool nested_vmx_exit_reflected(struct kvm_vcpu *vcpu, u32 exit_reason)
struct vcpu_vmx *vmx = to_vmx(vcpu);
struct vmcs12 *vmcs12 = get_vmcs12(vcpu);
- if (vmx->nested.nested_run_pending)
- return false;
+ WARN_ON_ONCE(vmx->nested.nested_run_pending);
if (unlikely(vmx->fail)) {
trace_kvm_nested_vmenter_failed(
@@ -5467,19 +5552,6 @@ bool nested_vmx_exit_reflected(struct kvm_vcpu *vcpu, u32 exit_reason)
return true;
}
- /*
- * The host physical addresses of some pages of guest memory
- * are loaded into the vmcs02 (e.g. vmcs12's Virtual APIC
- * Page). The CPU may write to these pages via their host
- * physical address while L2 is running, bypassing any
- * address-translation-based dirty tracking (e.g. EPT write
- * protection).
- *
- * Mark them dirty on every exit from L2 to prevent them from
- * getting out of sync with dirty tracking.
- */
- nested_mark_vmcs12_pages_dirty(vcpu);
-
trace_kvm_nested_vmexit(kvm_rip_read(vcpu), exit_reason,
vmcs_readl(EXIT_QUALIFICATION),
vmx->idt_vectoring_info,
@@ -5560,7 +5632,7 @@ bool nested_vmx_exit_reflected(struct kvm_vcpu *vcpu, u32 exit_reason)
case EXIT_REASON_MWAIT_INSTRUCTION:
return nested_cpu_has(vmcs12, CPU_BASED_MWAIT_EXITING);
case EXIT_REASON_MONITOR_TRAP_FLAG:
- return nested_cpu_has(vmcs12, CPU_BASED_MONITOR_TRAP_FLAG);
+ return nested_cpu_has_mtf(vmcs12);
case EXIT_REASON_MONITOR_INSTRUCTION:
return nested_cpu_has(vmcs12, CPU_BASED_MONITOR_EXITING);
case EXIT_REASON_PAUSE_INSTRUCTION:
@@ -5683,6 +5755,9 @@ static int vmx_get_nested_state(struct kvm_vcpu *vcpu,
if (vmx->nested.nested_run_pending)
kvm_state.flags |= KVM_STATE_NESTED_RUN_PENDING;
+
+ if (vmx->nested.mtf_pending)
+ kvm_state.flags |= KVM_STATE_NESTED_MTF_PENDING;
}
}
@@ -5834,10 +5909,12 @@ static int vmx_set_nested_state(struct kvm_vcpu *vcpu,
set_current_vmptr(vmx, kvm_state->hdr.vmx.vmcs12_pa);
} else if (kvm_state->flags & KVM_STATE_NESTED_EVMCS) {
/*
- * Sync eVMCS upon entry as we may not have
- * HV_X64_MSR_VP_ASSIST_PAGE set up yet.
+ * nested_vmx_handle_enlightened_vmptrld() cannot be called
+ * directly from here as HV_X64_MSR_VP_ASSIST_PAGE may not be
+ * restored yet. EVMCS will be mapped from
+ * nested_get_vmcs12_pages().
*/
- vmx->nested.need_vmcs12_to_shadow_sync = true;
+ kvm_make_request(KVM_REQ_GET_VMCS12_PAGES, vcpu);
} else {
return -EINVAL;
}
@@ -5863,6 +5940,9 @@ static int vmx_set_nested_state(struct kvm_vcpu *vcpu,
vmx->nested.nested_run_pending =
!!(kvm_state->flags & KVM_STATE_NESTED_RUN_PENDING);
+ vmx->nested.mtf_pending =
+ !!(kvm_state->flags & KVM_STATE_NESTED_MTF_PENDING);
+
ret = -EINVAL;
if (nested_cpu_has_shadow_vmcs(vmcs12) &&
vmcs12->vmcs_link_pointer != -1ull) {
@@ -5920,8 +6000,7 @@ void nested_vmx_set_vmcs_shadowing_bitmap(void)
* bit in the high half is on if the corresponding bit in the control field
* may be on. See also vmx_control_verify().
*/
-void nested_vmx_setup_ctls_msrs(struct nested_vmx_msrs *msrs, u32 ept_caps,
- bool apicv)
+void nested_vmx_setup_ctls_msrs(struct nested_vmx_msrs *msrs, u32 ept_caps)
{
/*
* Note that as a general rule, the high half of the MSRs (bits in
@@ -5948,7 +6027,7 @@ void nested_vmx_setup_ctls_msrs(struct nested_vmx_msrs *msrs, u32 ept_caps,
PIN_BASED_EXT_INTR_MASK |
PIN_BASED_NMI_EXITING |
PIN_BASED_VIRTUAL_NMIS |
- (apicv ? PIN_BASED_POSTED_INTR : 0);
+ (enable_apicv ? PIN_BASED_POSTED_INTR : 0);
msrs->pinbased_ctls_high |=
PIN_BASED_ALWAYSON_WITHOUT_TRUE_MSR |
PIN_BASED_VMX_PREEMPTION_TIMER;
@@ -6057,11 +6136,13 @@ void nested_vmx_setup_ctls_msrs(struct nested_vmx_msrs *msrs, u32 ept_caps,
/* nested EPT: emulate EPT also to L1 */
msrs->secondary_ctls_high |=
SECONDARY_EXEC_ENABLE_EPT;
- msrs->ept_caps = VMX_EPT_PAGE_WALK_4_BIT |
- VMX_EPTP_WB_BIT | VMX_EPT_INVEPT_BIT;
- if (cpu_has_vmx_ept_execute_only())
- msrs->ept_caps |=
- VMX_EPT_EXECUTE_ONLY_BIT;
+ msrs->ept_caps =
+ VMX_EPT_PAGE_WALK_4_BIT |
+ VMX_EPT_PAGE_WALK_5_BIT |
+ VMX_EPTP_WB_BIT |
+ VMX_EPT_INVEPT_BIT |
+ VMX_EPT_EXECUTE_ONLY_BIT;
+
msrs->ept_caps &= ept_caps;
msrs->ept_caps |= VMX_EPT_EXTENT_GLOBAL_BIT |
VMX_EPT_EXTENT_CONTEXT_BIT | VMX_EPT_2MB_PAGE_BIT |
@@ -6160,7 +6241,8 @@ void nested_vmx_hardware_unsetup(void)
}
}
-__init int nested_vmx_hardware_setup(int (*exit_handlers[])(struct kvm_vcpu *))
+__init int nested_vmx_hardware_setup(struct kvm_x86_ops *ops,
+ int (*exit_handlers[])(struct kvm_vcpu *))
{
int i;
@@ -6196,12 +6278,12 @@ __init int nested_vmx_hardware_setup(int (*exit_handlers[])(struct kvm_vcpu *))
exit_handlers[EXIT_REASON_INVVPID] = handle_invvpid;
exit_handlers[EXIT_REASON_VMFUNC] = handle_vmfunc;
- kvm_x86_ops->check_nested_events = vmx_check_nested_events;
- kvm_x86_ops->get_nested_state = vmx_get_nested_state;
- kvm_x86_ops->set_nested_state = vmx_set_nested_state;
- kvm_x86_ops->get_vmcs12_pages = nested_get_vmcs12_pages;
- kvm_x86_ops->nested_enable_evmcs = nested_enable_evmcs;
- kvm_x86_ops->nested_get_evmcs_version = nested_get_evmcs_version;
+ ops->check_nested_events = vmx_check_nested_events;
+ ops->get_nested_state = vmx_get_nested_state;
+ ops->set_nested_state = vmx_set_nested_state;
+ ops->get_vmcs12_pages = nested_get_vmcs12_pages;
+ ops->nested_enable_evmcs = nested_enable_evmcs;
+ ops->nested_get_evmcs_version = nested_get_evmcs_version;
return 0;
}