aboutsummaryrefslogtreecommitdiffstatshomepage
path: root/arch/x86/kvm/svm/nested.c
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2021-06-28 15:40:51 -0700
committerLinus Torvalds <torvalds@linux-foundation.org>2021-06-28 15:40:51 -0700
commit36824f198c621cebeb22966b5e244378fa341295 (patch)
treeee1e358a4ed0cd022ae12b4b7ba1fa3d0e5746d5 /arch/x86/kvm/svm/nested.c
parentMerge tag 'arm64-upstream' of git://git.kernel.org/pub/scm/linux/kernel/git/arm64/linux (diff)
parentMerge tag 'kvmarm-5.14' of git://git.kernel.org/pub/scm/linux/kernel/git/kvmarm/kvmarm into HEAD (diff)
downloadwireguard-linux-36824f198c621cebeb22966b5e244378fa341295.tar.xz
wireguard-linux-36824f198c621cebeb22966b5e244378fa341295.zip
Merge tag 'for-linus' of git://git.kernel.org/pub/scm/virt/kvm/kvm
Pull kvm updates from Paolo Bonzini: "This covers all architectures (except MIPS) so I don't expect any other feature pull requests this merge window. ARM: - Add MTE support in guests, complete with tag save/restore interface - Reduce the impact of CMOs by moving them in the page-table code - Allow device block mappings at stage-2 - Reduce the footprint of the vmemmap in protected mode - Support the vGIC on dumb systems such as the Apple M1 - Add selftest infrastructure to support multiple configuration and apply that to PMU/non-PMU setups - Add selftests for the debug architecture - The usual crop of PMU fixes PPC: - Support for the H_RPT_INVALIDATE hypercall - Conversion of Book3S entry/exit to C - Bug fixes S390: - new HW facilities for guests - make inline assembly more robust with KASAN and co x86: - Allow userspace to handle emulation errors (unknown instructions) - Lazy allocation of the rmap (host physical -> guest physical address) - Support for virtualizing TSC scaling on VMX machines - Optimizations to avoid shattering huge pages at the beginning of live migration - Support for initializing the PDPTRs without loading them from memory - Many TLB flushing cleanups - Refuse to load if two-stage paging is available but NX is not (this has been a requirement in practice for over a year) - A large series that separates the MMU mode (WP/SMAP/SMEP etc.) from CR0/CR4/EFER, using the MMU mode everywhere once it is computed from the CPU registers - Use PM notifier to notify the guest about host suspend or hibernate - Support for passing arguments to Hyper-V hypercalls using XMM registers - Support for Hyper-V TLB flush hypercalls and enlightened MSR bitmap on AMD processors - Hide Hyper-V hypercalls that are not included in the guest CPUID - Fixes for live migration of virtual machines that use the Hyper-V "enlightened VMCS" optimization of nested virtualization - Bugfixes (not many) Generic: - Support for retrieving statistics without debugfs - Cleanups for the KVM selftests API" * tag 'for-linus' of git://git.kernel.org/pub/scm/virt/kvm/kvm: (314 commits) KVM: x86: rename apic_access_page_done to apic_access_memslot_enabled kvm: x86: disable the narrow guest module parameter on unload selftests: kvm: Allows userspace to handle emulation errors. kvm: x86: Allow userspace to handle emulation errors KVM: x86/mmu: Let guest use GBPAGES if supported in hardware and TDP is on KVM: x86/mmu: Get CR4.SMEP from MMU, not vCPU, in shadow page fault KVM: x86/mmu: Get CR0.WP from MMU, not vCPU, in shadow page fault KVM: x86/mmu: Drop redundant rsvd bits reset for nested NPT KVM: x86/mmu: Optimize and clean up so called "last nonleaf level" logic KVM: x86: Enhance comments for MMU roles and nested transition trickiness KVM: x86/mmu: WARN on any reserved SPTE value when making a valid SPTE KVM: x86/mmu: Add helpers to do full reserved SPTE checks w/ generic MMU KVM: x86/mmu: Use MMU's role to determine PTTYPE KVM: x86/mmu: Collapse 32-bit PAE and 64-bit statements for helpers KVM: x86/mmu: Add a helper to calculate root from role_regs KVM: x86/mmu: Add helper to update paging metadata KVM: x86/mmu: Don't update nested guest's paging bitmasks if CR0.PG=0 KVM: x86/mmu: Consolidate reset_rsvds_bits_mask() calls KVM: x86/mmu: Use MMU role_regs to get LA57, and drop vCPU LA57 helper KVM: x86/mmu: Get nested MMU's root level from the MMU's role ...
Diffstat (limited to '')
-rw-r--r--arch/x86/kvm/svm/nested.c91
1 files changed, 66 insertions, 25 deletions
diff --git a/arch/x86/kvm/svm/nested.c b/arch/x86/kvm/svm/nested.c
index 5e8d8443154e..21d03e3a5dfd 100644
--- a/arch/x86/kvm/svm/nested.c
+++ b/arch/x86/kvm/svm/nested.c
@@ -98,13 +98,18 @@ static void nested_svm_init_mmu_context(struct kvm_vcpu *vcpu)
WARN_ON(mmu_is_nested(vcpu));
vcpu->arch.mmu = &vcpu->arch.guest_mmu;
+
+ /*
+ * The NPT format depends on L1's CR4 and EFER, which is in vmcb01. Note,
+ * when called via KVM_SET_NESTED_STATE, that state may _not_ match current
+ * vCPU state. CR0.WP is explicitly ignored, while CR0.PG is required.
+ */
kvm_init_shadow_npt_mmu(vcpu, X86_CR0_PG, svm->vmcb01.ptr->save.cr4,
svm->vmcb01.ptr->save.efer,
svm->nested.ctl.nested_cr3);
vcpu->arch.mmu->get_guest_pgd = nested_svm_get_tdp_cr3;
vcpu->arch.mmu->get_pdptr = nested_svm_get_tdp_pdptr;
vcpu->arch.mmu->inject_page_fault = nested_svm_inject_npf_exit;
- reset_shadow_zero_bits_mask(vcpu, vcpu->arch.mmu);
vcpu->arch.walk_mmu = &vcpu->arch.nested_mmu;
}
@@ -380,33 +385,47 @@ static inline bool nested_npt_enabled(struct vcpu_svm *svm)
return svm->nested.ctl.nested_ctl & SVM_NESTED_CTL_NP_ENABLE;
}
+static void nested_svm_transition_tlb_flush(struct kvm_vcpu *vcpu)
+{
+ /*
+ * TODO: optimize unconditional TLB flush/MMU sync. A partial list of
+ * things to fix before this can be conditional:
+ *
+ * - Flush TLBs for both L1 and L2 remote TLB flush
+ * - Honor L1's request to flush an ASID on nested VMRUN
+ * - Sync nested NPT MMU on VMRUN that flushes L2's ASID[*]
+ * - Don't crush a pending TLB flush in vmcb02 on nested VMRUN
+ * - Flush L1's ASID on KVM_REQ_TLB_FLUSH_GUEST
+ *
+ * [*] Unlike nested EPT, SVM's ASID management can invalidate nested
+ * NPT guest-physical mappings on VMRUN.
+ */
+ kvm_make_request(KVM_REQ_MMU_SYNC, vcpu);
+ kvm_make_request(KVM_REQ_TLB_FLUSH_CURRENT, vcpu);
+}
+
/*
* Load guest's/host's cr3 on nested vmentry or vmexit. @nested_npt is true
* if we are emulating VM-Entry into a guest with NPT enabled.
*/
static int nested_svm_load_cr3(struct kvm_vcpu *vcpu, unsigned long cr3,
- bool nested_npt)
+ bool nested_npt, bool reload_pdptrs)
{
if (CC(kvm_vcpu_is_illegal_gpa(vcpu, cr3)))
return -EINVAL;
- if (!nested_npt && is_pae_paging(vcpu) &&
- (cr3 != kvm_read_cr3(vcpu) || pdptrs_changed(vcpu))) {
- if (CC(!load_pdptrs(vcpu, vcpu->arch.walk_mmu, cr3)))
- return -EINVAL;
- }
+ if (reload_pdptrs && !nested_npt && is_pae_paging(vcpu) &&
+ CC(!load_pdptrs(vcpu, vcpu->arch.walk_mmu, cr3)))
+ return -EINVAL;
- /*
- * TODO: optimize unconditional TLB flush/MMU sync here and in
- * kvm_init_shadow_npt_mmu().
- */
if (!nested_npt)
- kvm_mmu_new_pgd(vcpu, cr3, false, false);
+ kvm_mmu_new_pgd(vcpu, cr3);
vcpu->arch.cr3 = cr3;
kvm_register_mark_available(vcpu, VCPU_EXREG_CR3);
- kvm_init_mmu(vcpu, false);
+ /* Re-initialize the MMU, e.g. to pick up CR4 MMU role changes. */
+ kvm_init_mmu(vcpu);
return 0;
}
@@ -481,6 +500,7 @@ static void nested_vmcb02_prepare_save(struct vcpu_svm *svm, struct vmcb *vmcb12
static void nested_vmcb02_prepare_control(struct vcpu_svm *svm)
{
const u32 mask = V_INTR_MASKING_MASK | V_GIF_ENABLE_MASK | V_GIF_MASK;
+ struct kvm_vcpu *vcpu = &svm->vcpu;
/*
* Filled at exit: exit_code, exit_code_hi, exit_info_1, exit_info_2,
@@ -505,10 +525,10 @@ static void nested_vmcb02_prepare_control(struct vcpu_svm *svm)
/* nested_cr3. */
if (nested_npt_enabled(svm))
- nested_svm_init_mmu_context(&svm->vcpu);
+ nested_svm_init_mmu_context(vcpu);
- svm->vmcb->control.tsc_offset = svm->vcpu.arch.tsc_offset =
- svm->vcpu.arch.l1_tsc_offset + svm->nested.ctl.tsc_offset;
+ svm->vmcb->control.tsc_offset = vcpu->arch.tsc_offset =
+ vcpu->arch.l1_tsc_offset + svm->nested.ctl.tsc_offset;
svm->vmcb->control.int_ctl =
(svm->nested.ctl.int_ctl & ~mask) |
@@ -523,8 +543,10 @@ static void nested_vmcb02_prepare_control(struct vcpu_svm *svm)
svm->vmcb->control.pause_filter_count = svm->nested.ctl.pause_filter_count;
svm->vmcb->control.pause_filter_thresh = svm->nested.ctl.pause_filter_thresh;
+ nested_svm_transition_tlb_flush(vcpu);
+
/* Enter Guest-Mode */
- enter_guest_mode(&svm->vcpu);
+ enter_guest_mode(vcpu);
/*
* Merge guest and host intercepts - must be called with vcpu in
@@ -576,7 +598,7 @@ int enter_svm_guest_mode(struct kvm_vcpu *vcpu, u64 vmcb12_gpa,
nested_vmcb02_prepare_save(svm, vmcb12);
ret = nested_svm_load_cr3(&svm->vcpu, vmcb12->save.cr3,
- nested_npt_enabled(svm));
+ nested_npt_enabled(svm), true);
if (ret)
return ret;
@@ -596,8 +618,6 @@ int nested_svm_vmrun(struct kvm_vcpu *vcpu)
struct kvm_host_map map;
u64 vmcb12_gpa;
- ++vcpu->stat.nested_run;
-
if (is_smm(vcpu)) {
kvm_queue_exception(vcpu, UD_VECTOR);
return 1;
@@ -803,9 +823,11 @@ int nested_svm_vmexit(struct vcpu_svm *svm)
kvm_vcpu_unmap(vcpu, &map, true);
+ nested_svm_transition_tlb_flush(vcpu);
+
nested_svm_uninit_mmu_context(vcpu);
- rc = nested_svm_load_cr3(vcpu, svm->vmcb->save.cr3, false);
+ rc = nested_svm_load_cr3(vcpu, svm->vmcb->save.cr3, false, true);
if (rc)
return 1;
@@ -1228,8 +1250,8 @@ static int svm_set_nested_state(struct kvm_vcpu *vcpu,
&user_kvm_nested_state->data.svm[0];
struct vmcb_control_area *ctl;
struct vmcb_save_area *save;
+ unsigned long cr0;
int ret;
- u32 cr0;
BUILD_BUG_ON(sizeof(struct vmcb_control_area) + sizeof(struct vmcb_save_area) >
KVM_STATE_NESTED_SVM_VMCB_SIZE);
@@ -1302,6 +1324,19 @@ static int svm_set_nested_state(struct kvm_vcpu *vcpu,
goto out_free;
/*
+ * While the nested guest CR3 is already checked and set by
+ * KVM_SET_SREGS, it was set when nested state was yet loaded,
+ * thus MMU might not be initialized correctly.
+ * Set it again to fix this.
+ */
+
+ ret = nested_svm_load_cr3(&svm->vcpu, vcpu->arch.cr3,
+ nested_npt_enabled(svm), false);
+ if (WARN_ON_ONCE(ret))
+ goto out_free;
+
+
+ /*
* All checks done, we can enter guest mode. Userspace provides
* vmcb12.control, which will be combined with L1 and stored into
* vmcb02, and the L1 save state which we store in vmcb01.
@@ -1358,9 +1393,15 @@ static bool svm_get_nested_state_pages(struct kvm_vcpu *vcpu)
if (WARN_ON(!is_guest_mode(vcpu)))
return true;
- if (nested_svm_load_cr3(&svm->vcpu, vcpu->arch.cr3,
- nested_npt_enabled(svm)))
- return false;
+ if (!vcpu->arch.pdptrs_from_userspace &&
+ !nested_npt_enabled(svm) && is_pae_paging(vcpu))
+ /*
+ * Reload the guest's PDPTRs since after a migration
+ * the guest CR3 might be restored prior to setting the nested
+ * state which can lead to a load of wrong PDPTRs.
+ */
+ if (CC(!load_pdptrs(vcpu, vcpu->arch.walk_mmu, vcpu->arch.cr3)))
+ return false;
if (!nested_svm_vmrun_msrpm(svm)) {
vcpu->run->exit_reason = KVM_EXIT_INTERNAL_ERROR;