aboutsummaryrefslogtreecommitdiffstats
path: root/arch/x86/kvm/vmx/vmx.c
diff options
context:
space:
mode:
Diffstat (limited to 'arch/x86/kvm/vmx/vmx.c')
-rw-r--r--arch/x86/kvm/vmx/vmx.c432
1 files changed, 241 insertions, 191 deletions
diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c
index bcbf0d2139e9..cbe0cdade38a 100644
--- a/arch/x86/kvm/vmx/vmx.c
+++ b/arch/x86/kvm/vmx/vmx.c
@@ -57,6 +57,7 @@
#include "mmu.h"
#include "nested.h"
#include "pmu.h"
+#include "sgx.h"
#include "trace.h"
#include "vmcs.h"
#include "vmcs12.h"
@@ -156,9 +157,11 @@ static u32 vmx_possible_passthrough_msrs[MAX_POSSIBLE_PASSTHROUGH_MSRS] = {
MSR_IA32_SPEC_CTRL,
MSR_IA32_PRED_CMD,
MSR_IA32_TSC,
+#ifdef CONFIG_X86_64
MSR_FS_BASE,
MSR_GS_BASE,
MSR_KERNEL_GS_BASE,
+#endif
MSR_IA32_SYSENTER_CS,
MSR_IA32_SYSENTER_ESP,
MSR_IA32_SYSENTER_EIP,
@@ -361,8 +364,6 @@ static const struct kernel_param_ops vmentry_l1d_flush_ops = {
module_param_cb(vmentry_l1d_flush, &vmentry_l1d_flush_ops, NULL, 0644);
static u32 vmx_segment_access_rights(struct kvm_segment *var);
-static __always_inline void vmx_disable_intercept_for_msr(struct kvm_vcpu *vcpu,
- u32 msr, int type);
void vmx_vmexit(void);
@@ -472,26 +473,6 @@ static const u32 vmx_uret_msrs_list[] = {
static bool __read_mostly enlightened_vmcs = true;
module_param(enlightened_vmcs, bool, 0444);
-/* check_ept_pointer() should be under protection of ept_pointer_lock. */
-static void check_ept_pointer_match(struct kvm *kvm)
-{
- struct kvm_vcpu *vcpu;
- u64 tmp_eptp = INVALID_PAGE;
- int i;
-
- kvm_for_each_vcpu(i, vcpu, kvm) {
- if (!VALID_PAGE(tmp_eptp)) {
- tmp_eptp = to_vmx(vcpu)->ept_pointer;
- } else if (tmp_eptp != to_vmx(vcpu)->ept_pointer) {
- to_kvm_vmx(kvm)->ept_pointers_match
- = EPT_POINTERS_MISMATCH;
- return;
- }
- }
-
- to_kvm_vmx(kvm)->ept_pointers_match = EPT_POINTERS_MATCH;
-}
-
static int kvm_fill_hv_flush_list_func(struct hv_guest_mapping_flush_list *flush,
void *data)
{
@@ -501,47 +482,70 @@ static int kvm_fill_hv_flush_list_func(struct hv_guest_mapping_flush_list *flush
range->pages);
}
-static inline int __hv_remote_flush_tlb_with_range(struct kvm *kvm,
- struct kvm_vcpu *vcpu, struct kvm_tlb_range *range)
+static inline int hv_remote_flush_root_ept(hpa_t root_ept,
+ struct kvm_tlb_range *range)
{
- u64 ept_pointer = to_vmx(vcpu)->ept_pointer;
-
- /*
- * FLUSH_GUEST_PHYSICAL_ADDRESS_SPACE hypercall needs address
- * of the base of EPT PML4 table, strip off EPT configuration
- * information.
- */
if (range)
- return hyperv_flush_guest_mapping_range(ept_pointer & PAGE_MASK,
+ return hyperv_flush_guest_mapping_range(root_ept,
kvm_fill_hv_flush_list_func, (void *)range);
else
- return hyperv_flush_guest_mapping(ept_pointer & PAGE_MASK);
+ return hyperv_flush_guest_mapping(root_ept);
}
static int hv_remote_flush_tlb_with_range(struct kvm *kvm,
struct kvm_tlb_range *range)
{
+ struct kvm_vmx *kvm_vmx = to_kvm_vmx(kvm);
struct kvm_vcpu *vcpu;
- int ret = 0, i;
+ int ret = 0, i, nr_unique_valid_roots;
+ hpa_t root;
- spin_lock(&to_kvm_vmx(kvm)->ept_pointer_lock);
+ spin_lock(&kvm_vmx->hv_root_ept_lock);
- if (to_kvm_vmx(kvm)->ept_pointers_match == EPT_POINTERS_CHECK)
- check_ept_pointer_match(kvm);
+ if (!VALID_PAGE(kvm_vmx->hv_root_ept)) {
+ nr_unique_valid_roots = 0;
- if (to_kvm_vmx(kvm)->ept_pointers_match != EPT_POINTERS_MATCH) {
+ /*
+ * Flush all valid roots, and see if all vCPUs have converged
+ * on a common root, in which case future flushes can skip the
+ * loop and flush the common root.
+ */
kvm_for_each_vcpu(i, vcpu, kvm) {
- /* If ept_pointer is invalid pointer, bypass flush request. */
- if (VALID_PAGE(to_vmx(vcpu)->ept_pointer))
- ret |= __hv_remote_flush_tlb_with_range(
- kvm, vcpu, range);
+ root = to_vmx(vcpu)->hv_root_ept;
+ if (!VALID_PAGE(root) || root == kvm_vmx->hv_root_ept)
+ continue;
+
+ /*
+ * Set the tracked root to the first valid root. Keep
+ * this root for the entirety of the loop even if more
+ * roots are encountered as a low effort optimization
+ * to avoid flushing the same (first) root again.
+ */
+ if (++nr_unique_valid_roots == 1)
+ kvm_vmx->hv_root_ept = root;
+
+ if (!ret)
+ ret = hv_remote_flush_root_ept(root, range);
+
+ /*
+ * Stop processing roots if a failure occurred and
+ * multiple valid roots have already been detected.
+ */
+ if (ret && nr_unique_valid_roots > 1)
+ break;
}
+
+ /*
+ * The optimized flush of a single root can't be used if there
+ * are multiple valid roots (obviously).
+ */
+ if (nr_unique_valid_roots > 1)
+ kvm_vmx->hv_root_ept = INVALID_PAGE;
} else {
- ret = __hv_remote_flush_tlb_with_range(kvm,
- kvm_get_vcpu(kvm, 0), range);
+ ret = hv_remote_flush_root_ept(kvm_vmx->hv_root_ept, range);
}
- spin_unlock(&to_kvm_vmx(kvm)->ept_pointer_lock);
+ spin_unlock(&kvm_vmx->hv_root_ept_lock);
return ret;
}
static int hv_remote_flush_tlb(struct kvm *kvm)
@@ -559,7 +563,7 @@ static int hv_enable_direct_tlbflush(struct kvm_vcpu *vcpu)
* evmcs in singe VM shares same assist page.
*/
if (!*p_hv_pa_pg)
- *p_hv_pa_pg = kzalloc(PAGE_SIZE, GFP_KERNEL);
+ *p_hv_pa_pg = kzalloc(PAGE_SIZE, GFP_KERNEL_ACCOUNT);
if (!*p_hv_pa_pg)
return -ENOMEM;
@@ -576,6 +580,21 @@ static int hv_enable_direct_tlbflush(struct kvm_vcpu *vcpu)
#endif /* IS_ENABLED(CONFIG_HYPERV) */
+static void hv_track_root_ept(struct kvm_vcpu *vcpu, hpa_t root_ept)
+{
+#if IS_ENABLED(CONFIG_HYPERV)
+ struct kvm_vmx *kvm_vmx = to_kvm_vmx(vcpu->kvm);
+
+ if (kvm_x86_ops.tlb_remote_flush == hv_remote_flush_tlb) {
+ spin_lock(&kvm_vmx->hv_root_ept_lock);
+ to_vmx(vcpu)->hv_root_ept = root_ept;
+ if (root_ept != kvm_vmx->hv_root_ept)
+ kvm_vmx->hv_root_ept = INVALID_PAGE;
+ spin_unlock(&kvm_vmx->hv_root_ept_lock);
+ }
+#endif
+}
+
/*
* Comment's format: document - errata name - stepping - processor name.
* Refer from
@@ -1570,12 +1589,25 @@ static int vmx_rtit_ctl_check(struct kvm_vcpu *vcpu, u64 data)
static bool vmx_can_emulate_instruction(struct kvm_vcpu *vcpu, void *insn, int insn_len)
{
+ /*
+ * Emulation of instructions in SGX enclaves is impossible as RIP does
+ * not point tthe failing instruction, and even if it did, the code
+ * stream is inaccessible. Inject #UD instead of exiting to userspace
+ * so that guest userspace can't DoS the guest simply by triggering
+ * emulation (enclaves are CPL3 only).
+ */
+ if (to_vmx(vcpu)->exit_reason.enclave_mode) {
+ kvm_queue_exception(vcpu, UD_VECTOR);
+ return false;
+ }
return true;
}
static int skip_emulated_instruction(struct kvm_vcpu *vcpu)
{
+ union vmx_exit_reason exit_reason = to_vmx(vcpu)->exit_reason;
unsigned long rip, orig_rip;
+ u32 instr_len;
/*
* Using VMCS.VM_EXIT_INSTRUCTION_LEN on EPT misconfig depends on
@@ -1586,9 +1618,33 @@ static int skip_emulated_instruction(struct kvm_vcpu *vcpu)
* i.e. we end up advancing IP with some random value.
*/
if (!static_cpu_has(X86_FEATURE_HYPERVISOR) ||
- to_vmx(vcpu)->exit_reason.basic != EXIT_REASON_EPT_MISCONFIG) {
+ exit_reason.basic != EXIT_REASON_EPT_MISCONFIG) {
+ instr_len = vmcs_read32(VM_EXIT_INSTRUCTION_LEN);
+
+ /*
+ * Emulating an enclave's instructions isn't supported as KVM
+ * cannot access the enclave's memory or its true RIP, e.g. the
+ * vmcs.GUEST_RIP points at the exit point of the enclave, not
+ * the RIP that actually triggered the VM-Exit. But, because
+ * most instructions that cause VM-Exit will #UD in an enclave,
+ * most instruction-based VM-Exits simply do not occur.
+ *
+ * There are a few exceptions, notably the debug instructions
+ * INT1ICEBRK and INT3, as they are allowed in debug enclaves
+ * and generate #DB/#BP as expected, which KVM might intercept.
+ * But again, the CPU does the dirty work and saves an instr
+ * length of zero so VMMs don't shoot themselves in the foot.
+ * WARN if KVM tries to skip a non-zero length instruction on
+ * a VM-Exit from an enclave.
+ */
+ if (!instr_len)
+ goto rip_updated;
+
+ WARN(exit_reason.enclave_mode,
+ "KVM: skipping instruction after SGX enclave VM-Exit");
+
orig_rip = kvm_rip_read(vcpu);
- rip = orig_rip + vmcs_read32(VM_EXIT_INSTRUCTION_LEN);
+ rip = orig_rip + instr_len;
#ifdef CONFIG_X86_64
/*
* We need to mask out the high 32 bits of RIP if not in 64-bit
@@ -1604,6 +1660,7 @@ static int skip_emulated_instruction(struct kvm_vcpu *vcpu)
return 0;
}
+rip_updated:
/* skipping an emulated instruction also counts */
vmx_set_interrupt_shadow(vcpu, 0);
@@ -1865,6 +1922,13 @@ static int vmx_get_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
case MSR_IA32_FEAT_CTL:
msr_info->data = vmx->msr_ia32_feature_control;
break;
+ case MSR_IA32_SGXLEPUBKEYHASH0 ... MSR_IA32_SGXLEPUBKEYHASH3:
+ if (!msr_info->host_initiated &&
+ !guest_cpuid_has(vcpu, X86_FEATURE_SGX_LC))
+ return 1;
+ msr_info->data = to_vmx(vcpu)->msr_ia32_sgxlepubkeyhash
+ [msr_info->index - MSR_IA32_SGXLEPUBKEYHASH0];
+ break;
case MSR_IA32_VMX_BASIC ... MSR_IA32_VMX_VMFUNC:
if (!nested_vmx_allowed(vcpu))
return 1;
@@ -2158,6 +2222,29 @@ static int vmx_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
vmx->msr_ia32_feature_control = data;
if (msr_info->host_initiated && data == 0)
vmx_leave_nested(vcpu);
+
+ /* SGX may be enabled/disabled by guest's firmware */
+ vmx_write_encls_bitmap(vcpu, NULL);
+ break;
+ case MSR_IA32_SGXLEPUBKEYHASH0 ... MSR_IA32_SGXLEPUBKEYHASH3:
+ /*
+ * On real hardware, the LE hash MSRs are writable before
+ * the firmware sets bit 0 in MSR 0x7a ("activating" SGX),
+ * at which point SGX related bits in IA32_FEATURE_CONTROL
+ * become writable.
+ *
+ * KVM does not emulate SGX activation for simplicity, so
+ * allow writes to the LE hash MSRs if IA32_FEATURE_CONTROL
+ * is unlocked. This is technically not architectural
+ * behavior, but it's close enough.
+ */
+ if (!msr_info->host_initiated &&
+ (!guest_cpuid_has(vcpu, X86_FEATURE_SGX_LC) ||
+ ((vmx->msr_ia32_feature_control & FEAT_CTL_LOCKED) &&
+ !(vmx->msr_ia32_feature_control & FEAT_CTL_SGX_LC_ENABLED))))
+ return 1;
+ vmx->msr_ia32_sgxlepubkeyhash
+ [msr_index - MSR_IA32_SGXLEPUBKEYHASH0] = data;
break;
case MSR_IA32_VMX_BASIC ... MSR_IA32_VMX_VMFUNC:
if (!msr_info->host_initiated)
@@ -3088,8 +3175,7 @@ static int vmx_get_max_tdp_level(void)
return 4;
}
-u64 construct_eptp(struct kvm_vcpu *vcpu, unsigned long root_hpa,
- int root_level)
+u64 construct_eptp(struct kvm_vcpu *vcpu, hpa_t root_hpa, int root_level)
{
u64 eptp = VMX_EPTP_MT_WB;
@@ -3098,13 +3184,13 @@ u64 construct_eptp(struct kvm_vcpu *vcpu, unsigned long root_hpa,
if (enable_ept_ad_bits &&
(!is_guest_mode(vcpu) || nested_ept_ad_enabled(vcpu)))
eptp |= VMX_EPTP_AD_ENABLE_BIT;
- eptp |= (root_hpa & PAGE_MASK);
+ eptp |= root_hpa;
return eptp;
}
-static void vmx_load_mmu_pgd(struct kvm_vcpu *vcpu, unsigned long pgd,
- int pgd_level)
+static void vmx_load_mmu_pgd(struct kvm_vcpu *vcpu, hpa_t root_hpa,
+ int root_level)
{
struct kvm *kvm = vcpu->kvm;
bool update_guest_cr3 = true;
@@ -3112,16 +3198,10 @@ static void vmx_load_mmu_pgd(struct kvm_vcpu *vcpu, unsigned long pgd,
u64 eptp;
if (enable_ept) {
- eptp = construct_eptp(vcpu, pgd, pgd_level);
+ eptp = construct_eptp(vcpu, root_hpa, root_level);
vmcs_write64(EPT_POINTER, eptp);
- if (kvm_x86_ops.tlb_remote_flush) {
- spin_lock(&to_kvm_vmx(kvm)->ept_pointer_lock);
- to_vmx(vcpu)->ept_pointer = eptp;
- to_kvm_vmx(kvm)->ept_pointers_match
- = EPT_POINTERS_CHECK;
- spin_unlock(&to_kvm_vmx(kvm)->ept_pointer_lock);
- }
+ hv_track_root_ept(vcpu, root_hpa);
if (!enable_unrestricted_guest && !is_paging(vcpu))
guest_cr3 = to_kvm_vmx(kvm)->ept_identity_map_addr;
@@ -3131,7 +3211,7 @@ static void vmx_load_mmu_pgd(struct kvm_vcpu *vcpu, unsigned long pgd,
update_guest_cr3 = false;
vmx_ept_load_pdptrs(vcpu);
} else {
- guest_cr3 = pgd;
+ guest_cr3 = root_hpa | kvm_get_active_pcid(vcpu);
}
if (update_guest_cr3)
@@ -3738,8 +3818,7 @@ static void vmx_set_msr_bitmap_write(ulong *msr_bitmap, u32 msr)
__set_bit(msr & 0x1fff, msr_bitmap + 0xc00 / f);
}
-static __always_inline void vmx_disable_intercept_for_msr(struct kvm_vcpu *vcpu,
- u32 msr, int type)
+void vmx_disable_intercept_for_msr(struct kvm_vcpu *vcpu, u32 msr, int type)
{
struct vcpu_vmx *vmx = to_vmx(vcpu);
unsigned long *msr_bitmap = vmx->vmcs01.msr_bitmap;
@@ -3784,8 +3863,7 @@ static __always_inline void vmx_disable_intercept_for_msr(struct kvm_vcpu *vcpu,
vmx_clear_msr_bitmap_write(msr_bitmap, msr);
}
-static __always_inline void vmx_enable_intercept_for_msr(struct kvm_vcpu *vcpu,
- u32 msr, int type)
+void vmx_enable_intercept_for_msr(struct kvm_vcpu *vcpu, u32 msr, int type)
{
struct vcpu_vmx *vmx = to_vmx(vcpu);
unsigned long *msr_bitmap = vmx->vmcs01.msr_bitmap;
@@ -3818,15 +3896,6 @@ static __always_inline void vmx_enable_intercept_for_msr(struct kvm_vcpu *vcpu,
vmx_set_msr_bitmap_write(msr_bitmap, msr);
}
-void vmx_set_intercept_for_msr(struct kvm_vcpu *vcpu,
- u32 msr, int type, bool value)
-{
- if (value)
- vmx_enable_intercept_for_msr(vcpu, msr, type);
- else
- vmx_disable_intercept_for_msr(vcpu, msr, type);
-}
-
static u8 vmx_msr_bitmap_mode(struct kvm_vcpu *vcpu)
{
u8 mode = 0;
@@ -4314,15 +4383,6 @@ static void vmx_compute_secondary_exec_control(struct vcpu_vmx *vmx)
vmx->secondary_exec_control = exec_control;
}
-static void ept_set_mmio_spte_mask(void)
-{
- /*
- * EPT Misconfigurations can be generated if the value of bits 2:0
- * of an EPT paging-structure entry is 110b (write/execute).
- */
- kvm_mmu_set_mmio_spte_mask(VMX_EPT_MISCONFIG_WX_VALUE, 0);
-}
-
#define VMX_XSS_EXIT_BITMAP 0
/*
@@ -4410,8 +4470,7 @@ static void init_vmcs(struct vcpu_vmx *vmx)
vmcs_write16(GUEST_PML_INDEX, PML_ENTITY_NUM - 1);
}
- if (cpu_has_vmx_encls_vmexit())
- vmcs_write64(ENCLS_EXITING_BITMAP, -1ull);
+ vmx_write_encls_bitmap(&vmx->vcpu, NULL);
if (vmx_pt_mode_is_host_guest()) {
memset(&vmx->pt_desc, 0, sizeof(vmx->pt_desc));
@@ -5020,7 +5079,7 @@ static int handle_cr(struct kvm_vcpu *vcpu)
reg = (exit_qualification >> 8) & 15;
switch ((exit_qualification >> 4) & 3) {
case 0: /* mov to cr */
- val = kvm_register_readl(vcpu, reg);
+ val = kvm_register_read(vcpu, reg);
trace_kvm_cr_write(cr, val);
switch (cr) {
case 0:
@@ -5143,7 +5202,7 @@ static int handle_dr(struct kvm_vcpu *vcpu)
kvm_register_write(vcpu, reg, val);
err = 0;
} else {
- err = kvm_set_dr(vcpu, dr, kvm_register_readl(vcpu, reg));
+ err = kvm_set_dr(vcpu, dr, kvm_register_read(vcpu, reg));
}
out:
@@ -5184,17 +5243,6 @@ static int handle_interrupt_window(struct kvm_vcpu *vcpu)
return 1;
}
-static int handle_vmcall(struct kvm_vcpu *vcpu)
-{
- return kvm_emulate_hypercall(vcpu);
-}
-
-static int handle_invd(struct kvm_vcpu *vcpu)
-{
- /* Treat an INVD instruction as a NOP and just skip it. */
- return kvm_skip_emulated_instruction(vcpu);
-}
-
static int handle_invlpg(struct kvm_vcpu *vcpu)
{
unsigned long exit_qualification = vmx_get_exit_qual(vcpu);
@@ -5203,28 +5251,6 @@ static int handle_invlpg(struct kvm_vcpu *vcpu)
return kvm_skip_emulated_instruction(vcpu);
}
-static int handle_rdpmc(struct kvm_vcpu *vcpu)
-{
- int err;
-
- err = kvm_rdpmc(vcpu);
- return kvm_complete_insn_gp(vcpu, err);
-}
-
-static int handle_wbinvd(struct kvm_vcpu *vcpu)
-{
- return kvm_emulate_wbinvd(vcpu);
-}
-
-static int handle_xsetbv(struct kvm_vcpu *vcpu)
-{
- u64 new_bv = kvm_read_edx_eax(vcpu);
- u32 index = kvm_rcx_read(vcpu);
-
- int err = kvm_set_xcr(vcpu, index, new_bv);
- return kvm_complete_insn_gp(vcpu, err);
-}
-
static int handle_apic_access(struct kvm_vcpu *vcpu)
{
if (likely(fasteoi)) {
@@ -5361,7 +5387,7 @@ static int handle_ept_violation(struct kvm_vcpu *vcpu)
EPT_VIOLATION_EXECUTABLE))
? PFERR_PRESENT_MASK : 0;
- error_code |= (exit_qualification & 0x100) != 0 ?
+ error_code |= (exit_qualification & EPT_VIOLATION_GVA_TRANSLATED) != 0 ?
PFERR_GUEST_FINAL_MASK : PFERR_GUEST_PAGE_MASK;
vcpu->arch.exit_qualification = exit_qualification;
@@ -5384,6 +5410,9 @@ static int handle_ept_misconfig(struct kvm_vcpu *vcpu)
{
gpa_t gpa;
+ if (!vmx_can_emulate_instruction(vcpu, NULL, 0))
+ return 1;
+
/*
* A nested guest cannot optimize MMIO vmexits, because we have an
* nGPA here instead of the required GPA.
@@ -5485,18 +5514,6 @@ static void shrink_ple_window(struct kvm_vcpu *vcpu)
}
}
-static void vmx_enable_tdp(void)
-{
- kvm_mmu_set_mask_ptes(VMX_EPT_READABLE_MASK,
- enable_ept_ad_bits ? VMX_EPT_ACCESS_BIT : 0ull,
- enable_ept_ad_bits ? VMX_EPT_DIRTY_BIT : 0ull,
- 0ull, VMX_EPT_EXECUTABLE_MASK,
- cpu_has_vmx_ept_execute_only() ? 0ull : VMX_EPT_READABLE_MASK,
- VMX_EPT_RWX_MASK, 0ull);
-
- ept_set_mmio_spte_mask();
-}
-
/*
* Indicate a busy-waiting vcpu in spinlock. We do not enable the PAUSE
* exiting, so only get here on cpu with PAUSE-Loop-Exiting.
@@ -5516,34 +5533,11 @@ static int handle_pause(struct kvm_vcpu *vcpu)
return kvm_skip_emulated_instruction(vcpu);
}
-static int handle_nop(struct kvm_vcpu *vcpu)
-{
- return kvm_skip_emulated_instruction(vcpu);
-}
-
-static int handle_mwait(struct kvm_vcpu *vcpu)
-{
- printk_once(KERN_WARNING "kvm: MWAIT instruction emulated as NOP!\n");
- return handle_nop(vcpu);
-}
-
-static int handle_invalid_op(struct kvm_vcpu *vcpu)
-{
- kvm_queue_exception(vcpu, UD_VECTOR);
- return 1;
-}
-
static int handle_monitor_trap(struct kvm_vcpu *vcpu)
{
return 1;
}
-static int handle_monitor(struct kvm_vcpu *vcpu)
-{
- printk_once(KERN_WARNING "kvm: MONITOR instruction emulated as NOP!\n");
- return handle_nop(vcpu);
-}
-
static int handle_invpcid(struct kvm_vcpu *vcpu)
{
u32 vmx_instruction_info;
@@ -5560,7 +5554,7 @@ static int handle_invpcid(struct kvm_vcpu *vcpu)
}
vmx_instruction_info = vmcs_read32(VMX_INSTRUCTION_INFO);
- type = kvm_register_readl(vcpu, (vmx_instruction_info >> 28) & 0xf);
+ type = kvm_register_read(vcpu, (vmx_instruction_info >> 28) & 0xf);
if (type > 3) {
kvm_inject_gp(vcpu, 0);
@@ -5632,16 +5626,18 @@ static int handle_vmx_instruction(struct kvm_vcpu *vcpu)
return 1;
}
+#ifndef CONFIG_X86_SGX_KVM
static int handle_encls(struct kvm_vcpu *vcpu)
{
/*
- * SGX virtualization is not yet supported. There is no software
- * enable bit for SGX, so we have to trap ENCLS and inject a #UD
- * to prevent the guest from executing ENCLS.
+ * SGX virtualization is disabled. There is no software enable bit for
+ * SGX, so KVM intercepts all ENCLS leafs and injects a #UD to prevent
+ * the guest from executing ENCLS (when SGX is supported by hardware).
*/
kvm_queue_exception(vcpu, UD_VECTOR);
return 1;
}
+#endif /* CONFIG_X86_SGX_KVM */
static int handle_bus_lock_vmexit(struct kvm_vcpu *vcpu)
{
@@ -5668,10 +5664,10 @@ static int (*kvm_vmx_exit_handlers[])(struct kvm_vcpu *vcpu) = {
[EXIT_REASON_MSR_WRITE] = kvm_emulate_wrmsr,
[EXIT_REASON_INTERRUPT_WINDOW] = handle_interrupt_window,
[EXIT_REASON_HLT] = kvm_emulate_halt,
- [EXIT_REASON_INVD] = handle_invd,
+ [EXIT_REASON_INVD] = kvm_emulate_invd,
[EXIT_REASON_INVLPG] = handle_invlpg,
- [EXIT_REASON_RDPMC] = handle_rdpmc,
- [EXIT_REASON_VMCALL] = handle_vmcall,
+ [EXIT_REASON_RDPMC] = kvm_emulate_rdpmc,
+ [EXIT_REASON_VMCALL] = kvm_emulate_hypercall,
[EXIT_REASON_VMCLEAR] = handle_vmx_instruction,
[EXIT_REASON_VMLAUNCH] = handle_vmx_instruction,
[EXIT_REASON_VMPTRLD] = handle_vmx_instruction,
@@ -5685,8 +5681,8 @@ static int (*kvm_vmx_exit_handlers[])(struct kvm_vcpu *vcpu) = {
[EXIT_REASON_APIC_ACCESS] = handle_apic_access,
[EXIT_REASON_APIC_WRITE] = handle_apic_write,
[EXIT_REASON_EOI_INDUCED] = handle_apic_eoi_induced,
- [EXIT_REASON_WBINVD] = handle_wbinvd,
- [EXIT_REASON_XSETBV] = handle_xsetbv,
+ [EXIT_REASON_WBINVD] = kvm_emulate_wbinvd,
+ [EXIT_REASON_XSETBV] = kvm_emulate_xsetbv,
[EXIT_REASON_TASK_SWITCH] = handle_task_switch,
[EXIT_REASON_MCE_DURING_VMENTRY] = handle_machine_check,
[EXIT_REASON_GDTR_IDTR] = handle_desc,
@@ -5694,13 +5690,13 @@ static int (*kvm_vmx_exit_handlers[])(struct kvm_vcpu *vcpu) = {
[EXIT_REASON_EPT_VIOLATION] = handle_ept_violation,
[EXIT_REASON_EPT_MISCONFIG] = handle_ept_misconfig,
[EXIT_REASON_PAUSE_INSTRUCTION] = handle_pause,
- [EXIT_REASON_MWAIT_INSTRUCTION] = handle_mwait,
+ [EXIT_REASON_MWAIT_INSTRUCTION] = kvm_emulate_mwait,
[EXIT_REASON_MONITOR_TRAP_FLAG] = handle_monitor_trap,
- [EXIT_REASON_MONITOR_INSTRUCTION] = handle_monitor,
+ [EXIT_REASON_MONITOR_INSTRUCTION] = kvm_emulate_monitor,
[EXIT_REASON_INVEPT] = handle_vmx_instruction,
[EXIT_REASON_INVVPID] = handle_vmx_instruction,
- [EXIT_REASON_RDRAND] = handle_invalid_op,
- [EXIT_REASON_RDSEED] = handle_invalid_op,
+ [EXIT_REASON_RDRAND] = kvm_handle_invalid_op,
+ [EXIT_REASON_RDSEED] = kvm_handle_invalid_op,
[EXIT_REASON_PML_FULL] = handle_pml_full,
[EXIT_REASON_INVPCID] = handle_invpcid,
[EXIT_REASON_VMFUNC] = handle_vmx_instruction,
@@ -5787,12 +5783,23 @@ static void vmx_dump_dtsel(char *name, uint32_t limit)
vmcs_readl(limit + GUEST_GDTR_BASE - GUEST_GDTR_LIMIT));
}
-void dump_vmcs(void)
+static void vmx_dump_msrs(char *name, struct vmx_msrs *m)
+{
+ unsigned int i;
+ struct vmx_msr_entry *e;
+
+ pr_err("MSR %s:\n", name);
+ for (i = 0, e = m->val; i < m->nr; ++i, ++e)
+ pr_err(" %2d: msr=0x%08x value=0x%016llx\n", i, e->index, e->value);
+}
+
+void dump_vmcs(struct kvm_vcpu *vcpu)
{
+ struct vcpu_vmx *vmx = to_vmx(vcpu);
u32 vmentry_ctl, vmexit_ctl;
u32 cpu_based_exec_ctrl, pin_based_exec_ctrl, secondary_exec_control;
unsigned long cr4;
- u64 efer;
+ int efer_slot;
if (!dump_invalid_vmcs) {
pr_warn_ratelimited("set kvm_intel.dump_invalid_vmcs=1 to dump internal KVM state.\n");
@@ -5804,7 +5811,6 @@ void dump_vmcs(void)
cpu_based_exec_ctrl = vmcs_read32(CPU_BASED_VM_EXEC_CONTROL);
pin_based_exec_ctrl = vmcs_read32(PIN_BASED_VM_EXEC_CONTROL);
cr4 = vmcs_readl(GUEST_CR4);
- efer = vmcs_read64(GUEST_IA32_EFER);
secondary_exec_control = 0;
if (cpu_has_secondary_exec_ctrls())
secondary_exec_control = vmcs_read32(SECONDARY_VM_EXEC_CONTROL);
@@ -5816,9 +5822,7 @@ void dump_vmcs(void)
pr_err("CR4: actual=0x%016lx, shadow=0x%016lx, gh_mask=%016lx\n",
cr4, vmcs_readl(CR4_READ_SHADOW), vmcs_readl(CR4_GUEST_HOST_MASK));
pr_err("CR3 = 0x%016lx\n", vmcs_readl(GUEST_CR3));
- if ((secondary_exec_control & SECONDARY_EXEC_ENABLE_EPT) &&
- (cr4 & X86_CR4_PAE) && !(efer & EFER_LMA))
- {
+ if (cpu_has_vmx_ept()) {
pr_err("PDPTR0 = 0x%016llx PDPTR1 = 0x%016llx\n",
vmcs_read64(GUEST_PDPTR0), vmcs_read64(GUEST_PDPTR1));
pr_err("PDPTR2 = 0x%016llx PDPTR3 = 0x%016llx\n",
@@ -5841,10 +5845,20 @@ void dump_vmcs(void)
vmx_dump_sel("LDTR:", GUEST_LDTR_SELECTOR);
vmx_dump_dtsel("IDTR:", GUEST_IDTR_LIMIT);
vmx_dump_sel("TR: ", GUEST_TR_SELECTOR);
- if ((vmexit_ctl & (VM_EXIT_SAVE_IA32_PAT | VM_EXIT_SAVE_IA32_EFER)) ||
- (vmentry_ctl & (VM_ENTRY_LOAD_IA32_PAT | VM_ENTRY_LOAD_IA32_EFER)))
- pr_err("EFER = 0x%016llx PAT = 0x%016llx\n",
- efer, vmcs_read64(GUEST_IA32_PAT));
+ efer_slot = vmx_find_loadstore_msr_slot(&vmx->msr_autoload.guest, MSR_EFER);
+ if (vmentry_ctl & VM_ENTRY_LOAD_IA32_EFER)
+ pr_err("EFER= 0x%016llx\n", vmcs_read64(GUEST_IA32_EFER));
+ else if (efer_slot >= 0)
+ pr_err("EFER= 0x%016llx (autoload)\n",
+ vmx->msr_autoload.guest.val[efer_slot].value);
+ else if (vmentry_ctl & VM_ENTRY_IA32E_MODE)
+ pr_err("EFER= 0x%016llx (effective)\n",
+ vcpu->arch.efer | (EFER_LMA | EFER_LME));
+ else
+ pr_err("EFER= 0x%016llx (effective)\n",
+ vcpu->arch.efer & ~(EFER_LMA | EFER_LME));
+ if (vmentry_ctl & VM_ENTRY_LOAD_IA32_PAT)
+ pr_err("PAT = 0x%016llx\n", vmcs_read64(GUEST_IA32_PAT));
pr_err("DebugCtl = 0x%016llx DebugExceptions = 0x%016lx\n",
vmcs_read64(GUEST_IA32_DEBUGCTL),
vmcs_readl(GUEST_PENDING_DBG_EXCEPTIONS));
@@ -5860,6 +5874,10 @@ void dump_vmcs(void)
if (secondary_exec_control & SECONDARY_EXEC_VIRTUAL_INTR_DELIVERY)
pr_err("InterruptStatus = %04x\n",
vmcs_read16(GUEST_INTR_STATUS));
+ if (vmcs_read32(VM_ENTRY_MSR_LOAD_COUNT) > 0)
+ vmx_dump_msrs("guest autoload", &vmx->msr_autoload.guest);
+ if (vmcs_read32(VM_EXIT_MSR_STORE_COUNT) > 0)
+ vmx_dump_msrs("guest autostore", &vmx->msr_autostore.guest);
pr_err("*** Host State ***\n");
pr_err("RIP = 0x%016lx RSP = 0x%016lx\n",
@@ -5881,14 +5899,16 @@ void dump_vmcs(void)
vmcs_readl(HOST_IA32_SYSENTER_ESP),
vmcs_read32(HOST_IA32_SYSENTER_CS),
vmcs_readl(HOST_IA32_SYSENTER_EIP));
- if (vmexit_ctl & (VM_EXIT_LOAD_IA32_PAT | VM_EXIT_LOAD_IA32_EFER))
- pr_err("EFER = 0x%016llx PAT = 0x%016llx\n",
- vmcs_read64(HOST_IA32_EFER),
- vmcs_read64(HOST_IA32_PAT));
+ if (vmexit_ctl & VM_EXIT_LOAD_IA32_EFER)
+ pr_err("EFER= 0x%016llx\n", vmcs_read64(HOST_IA32_EFER));
+ if (vmexit_ctl & VM_EXIT_LOAD_IA32_PAT)
+ pr_err("PAT = 0x%016llx\n", vmcs_read64(HOST_IA32_PAT));
if (cpu_has_load_perf_global_ctrl() &&
vmexit_ctl & VM_EXIT_LOAD_IA32_PERF_GLOBAL_CTRL)
pr_err("PerfGlobCtl = 0x%016llx\n",
vmcs_read64(HOST_IA32_PERF_GLOBAL_CTRL));
+ if (vmcs_read32(VM_EXIT_MSR_LOAD_COUNT) > 0)
+ vmx_dump_msrs("host autoload", &vmx->msr_autoload.host);
pr_err("*** Control State ***\n");
pr_err("PinBased=%08x CPUBased=%08x SecondaryExec=%08x\n",
@@ -5997,7 +6017,7 @@ static int __vmx_handle_exit(struct kvm_vcpu *vcpu, fastpath_t exit_fastpath)
}
if (exit_reason.failed_vmentry) {
- dump_vmcs();
+ dump_vmcs(vcpu);
vcpu->run->exit_reason = KVM_EXIT_FAIL_ENTRY;
vcpu->run->fail_entry.hardware_entry_failure_reason
= exit_reason.full;
@@ -6006,7 +6026,7 @@ static int __vmx_handle_exit(struct kvm_vcpu *vcpu, fastpath_t exit_fastpath)
}
if (unlikely(vmx->fail)) {
- dump_vmcs();
+ dump_vmcs(vcpu);
vcpu->run->exit_reason = KVM_EXIT_FAIL_ENTRY;
vcpu->run->fail_entry.hardware_entry_failure_reason
= vmcs_read32(VM_INSTRUCTION_ERROR);
@@ -6092,7 +6112,7 @@ static int __vmx_handle_exit(struct kvm_vcpu *vcpu, fastpath_t exit_fastpath)
unexpected_vmexit:
vcpu_unimpl(vcpu, "vmx: unexpected exit reason 0x%x\n",
exit_reason.full);
- dump_vmcs();
+ dump_vmcs(vcpu);
vcpu->run->exit_reason = KVM_EXIT_INTERNAL_ERROR;
vcpu->run->internal.suberror =
KVM_INTERNAL_ERROR_UNEXPECTED_EXIT_REASON;
@@ -6938,9 +6958,11 @@ static int vmx_create_vcpu(struct kvm_vcpu *vcpu)
bitmap_fill(vmx->shadow_msr_intercept.write, MAX_POSSIBLE_PASSTHROUGH_MSRS);
vmx_disable_intercept_for_msr(vcpu, MSR_IA32_TSC, MSR_TYPE_R);
+#ifdef CONFIG_X86_64
vmx_disable_intercept_for_msr(vcpu, MSR_FS_BASE, MSR_TYPE_RW);
vmx_disable_intercept_for_msr(vcpu, MSR_GS_BASE, MSR_TYPE_RW);
vmx_disable_intercept_for_msr(vcpu, MSR_KERNEL_GS_BASE, MSR_TYPE_RW);
+#endif
vmx_disable_intercept_for_msr(vcpu, MSR_IA32_SYSENTER_CS, MSR_TYPE_RW);
vmx_disable_intercept_for_msr(vcpu, MSR_IA32_SYSENTER_ESP, MSR_TYPE_RW);
vmx_disable_intercept_for_msr(vcpu, MSR_IA32_SYSENTER_EIP, MSR_TYPE_RW);
@@ -6976,6 +6998,8 @@ static int vmx_create_vcpu(struct kvm_vcpu *vcpu)
else
memset(&vmx->nested.msrs, 0, sizeof(vmx->nested.msrs));
+ vcpu_setup_sgx_lepubkeyhash(vcpu);
+
vmx->nested.posted_intr_nv = -1;
vmx->nested.current_vmptr = -1ull;
@@ -6989,8 +7013,9 @@ static int vmx_create_vcpu(struct kvm_vcpu *vcpu)
vmx->pi_desc.nv = POSTED_INTR_VECTOR;
vmx->pi_desc.sn = 1;
- vmx->ept_pointer = INVALID_PAGE;
-
+#if IS_ENABLED(CONFIG_HYPERV)
+ vmx->hv_root_ept = INVALID_PAGE;
+#endif
return 0;
free_vmcs:
@@ -7007,7 +7032,9 @@ free_vpid:
static int vmx_vm_init(struct kvm *kvm)
{
- spin_lock_init(&to_kvm_vmx(kvm)->ept_pointer_lock);
+#if IS_ENABLED(CONFIG_HYPERV)
+ spin_lock_init(&to_kvm_vmx(kvm)->hv_root_ept_lock);
+#endif
if (!ple_gap)
kvm->arch.pause_in_guest = true;
@@ -7302,6 +7329,19 @@ static void vmx_vcpu_after_set_cpuid(struct kvm_vcpu *vcpu)
set_cr4_guest_host_mask(vmx);
+ vmx_write_encls_bitmap(vcpu, NULL);
+ if (guest_cpuid_has(vcpu, X86_FEATURE_SGX))
+ vmx->msr_ia32_feature_control_valid_bits |= FEAT_CTL_SGX_ENABLED;
+ else
+ vmx->msr_ia32_feature_control_valid_bits &= ~FEAT_CTL_SGX_ENABLED;
+
+ if (guest_cpuid_has(vcpu, X86_FEATURE_SGX_LC))
+ vmx->msr_ia32_feature_control_valid_bits |=
+ FEAT_CTL_SGX_LC_ENABLED;
+ else
+ vmx->msr_ia32_feature_control_valid_bits &=
+ ~FEAT_CTL_SGX_LC_ENABLED;
+
/* Refresh #PF interception to account for MAXPHYADDR changes. */
vmx_update_exception_bitmap(vcpu);
}
@@ -7322,6 +7362,13 @@ static __init void vmx_set_cpu_caps(void)
if (vmx_pt_mode_is_host_guest())
kvm_cpu_cap_check_and_set(X86_FEATURE_INTEL_PT);
+ if (!enable_sgx) {
+ kvm_cpu_cap_clear(X86_FEATURE_SGX);
+ kvm_cpu_cap_clear(X86_FEATURE_SGX_LC);
+ kvm_cpu_cap_clear(X86_FEATURE_SGX1);
+ kvm_cpu_cap_clear(X86_FEATURE_SGX2);
+ }
+
if (vmx_umip_emulated())
kvm_cpu_cap_set(X86_FEATURE_UMIP);
@@ -7848,7 +7895,8 @@ static __init int hardware_setup(void)
set_bit(0, vmx_vpid_bitmap); /* 0 is reserved for host */
if (enable_ept)
- vmx_enable_tdp();
+ kvm_mmu_set_ept_masks(enable_ept_ad_bits,
+ cpu_has_vmx_ept_execute_only());
if (!enable_ept)
ept_lpage_level = 0;
@@ -7909,6 +7957,8 @@ static __init int hardware_setup(void)
if (!enable_ept || !cpu_has_vmx_intel_pt())
pt_mode = PT_MODE_SYSTEM;
+ setup_default_sgx_lepubkeyhash();
+
if (nested) {
nested_vmx_setup_ctls_msrs(&vmcs_config.nested,
vmx_capability.ept);