aboutsummaryrefslogtreecommitdiffstats
path: root/arch/x86/kvm/x86.c
diff options
context:
space:
mode:
Diffstat (limited to 'arch/x86/kvm/x86.c')
-rw-r--r--arch/x86/kvm/x86.c214
1 files changed, 169 insertions, 45 deletions
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index efc7a82ab140..cebdaa1e3cf5 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -75,6 +75,7 @@
#include <asm/tlbflush.h>
#include <asm/intel_pt.h>
#include <asm/emulate_prefix.h>
+#include <asm/sgx.h>
#include <clocksource/hyperv_timer.h>
#define CREATE_TRACE_POINTS
@@ -245,6 +246,9 @@ struct kvm_stats_debugfs_item debugfs_entries[] = {
VCPU_STAT("l1d_flush", l1d_flush),
VCPU_STAT("halt_poll_success_ns", halt_poll_success_ns),
VCPU_STAT("halt_poll_fail_ns", halt_poll_fail_ns),
+ VCPU_STAT("nested_run", nested_run),
+ VCPU_STAT("directed_yield_attempted", directed_yield_attempted),
+ VCPU_STAT("directed_yield_successful", directed_yield_successful),
VM_STAT("mmu_shadow_zapped", mmu_shadow_zapped),
VM_STAT("mmu_pte_write", mmu_pte_write),
VM_STAT("mmu_pde_zapped", mmu_pde_zapped),
@@ -543,8 +547,6 @@ static void kvm_multiple_exception(struct kvm_vcpu *vcpu,
if (!vcpu->arch.exception.pending && !vcpu->arch.exception.injected) {
queue:
- if (has_error && !is_protmode(vcpu))
- has_error = false;
if (reinject) {
/*
* On vmentry, vcpu->arch.exception.pending is only
@@ -983,14 +985,17 @@ static int __kvm_set_xcr(struct kvm_vcpu *vcpu, u32 index, u64 xcr)
return 0;
}
-int kvm_set_xcr(struct kvm_vcpu *vcpu, u32 index, u64 xcr)
+int kvm_emulate_xsetbv(struct kvm_vcpu *vcpu)
{
- if (static_call(kvm_x86_get_cpl)(vcpu) == 0)
- return __kvm_set_xcr(vcpu, index, xcr);
+ if (static_call(kvm_x86_get_cpl)(vcpu) != 0 ||
+ __kvm_set_xcr(vcpu, kvm_rcx_read(vcpu), kvm_read_edx_eax(vcpu))) {
+ kvm_inject_gp(vcpu, 0);
+ return 1;
+ }
- return 1;
+ return kvm_skip_emulated_instruction(vcpu);
}
-EXPORT_SYMBOL_GPL(kvm_set_xcr);
+EXPORT_SYMBOL_GPL(kvm_emulate_xsetbv);
bool kvm_is_valid_cr4(struct kvm_vcpu *vcpu, unsigned long cr4)
{
@@ -1072,10 +1077,15 @@ int kvm_set_cr3(struct kvm_vcpu *vcpu, unsigned long cr3)
return 0;
}
- if (is_long_mode(vcpu) && kvm_vcpu_is_illegal_gpa(vcpu, cr3))
+ /*
+ * Do not condition the GPA check on long mode, this helper is used to
+ * stuff CR3, e.g. for RSM emulation, and there is no guarantee that
+ * the current vCPU mode is accurate.
+ */
+ if (kvm_vcpu_is_illegal_gpa(vcpu, cr3))
return 1;
- else if (is_pae_paging(vcpu) &&
- !load_pdptrs(vcpu, vcpu->arch.walk_mmu, cr3))
+
+ if (is_pae_paging(vcpu) && !load_pdptrs(vcpu, vcpu->arch.walk_mmu, cr3))
return 1;
kvm_mmu_new_pgd(vcpu, cr3, skip_tlb_flush, skip_tlb_flush);
@@ -1191,20 +1201,21 @@ void kvm_get_dr(struct kvm_vcpu *vcpu, int dr, unsigned long *val)
}
EXPORT_SYMBOL_GPL(kvm_get_dr);
-bool kvm_rdpmc(struct kvm_vcpu *vcpu)
+int kvm_emulate_rdpmc(struct kvm_vcpu *vcpu)
{
u32 ecx = kvm_rcx_read(vcpu);
u64 data;
- int err;
- err = kvm_pmu_rdpmc(vcpu, ecx, &data);
- if (err)
- return err;
+ if (kvm_pmu_rdpmc(vcpu, ecx, &data)) {
+ kvm_inject_gp(vcpu, 0);
+ return 1;
+ }
+
kvm_rax_write(vcpu, (u32)data);
kvm_rdx_write(vcpu, data >> 32);
- return err;
+ return kvm_skip_emulated_instruction(vcpu);
}
-EXPORT_SYMBOL_GPL(kvm_rdpmc);
+EXPORT_SYMBOL_GPL(kvm_emulate_rdpmc);
/*
* List of msr numbers which we expose to userspace through KVM_GET_MSRS
@@ -1791,6 +1802,40 @@ int kvm_emulate_wrmsr(struct kvm_vcpu *vcpu)
}
EXPORT_SYMBOL_GPL(kvm_emulate_wrmsr);
+int kvm_emulate_as_nop(struct kvm_vcpu *vcpu)
+{
+ return kvm_skip_emulated_instruction(vcpu);
+}
+EXPORT_SYMBOL_GPL(kvm_emulate_as_nop);
+
+int kvm_emulate_invd(struct kvm_vcpu *vcpu)
+{
+ /* Treat an INVD instruction as a NOP and just skip it. */
+ return kvm_emulate_as_nop(vcpu);
+}
+EXPORT_SYMBOL_GPL(kvm_emulate_invd);
+
+int kvm_emulate_mwait(struct kvm_vcpu *vcpu)
+{
+ pr_warn_once("kvm: MWAIT instruction emulated as NOP!\n");
+ return kvm_emulate_as_nop(vcpu);
+}
+EXPORT_SYMBOL_GPL(kvm_emulate_mwait);
+
+int kvm_handle_invalid_op(struct kvm_vcpu *vcpu)
+{
+ kvm_queue_exception(vcpu, UD_VECTOR);
+ return 1;
+}
+EXPORT_SYMBOL_GPL(kvm_handle_invalid_op);
+
+int kvm_emulate_monitor(struct kvm_vcpu *vcpu)
+{
+ pr_warn_once("kvm: MONITOR instruction emulated as NOP!\n");
+ return kvm_emulate_as_nop(vcpu);
+}
+EXPORT_SYMBOL_GPL(kvm_emulate_monitor);
+
static inline bool kvm_vcpu_exit_request(struct kvm_vcpu *vcpu)
{
xfer_to_guest_mode_prepare();
@@ -3382,6 +3427,12 @@ int kvm_get_msr_common(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
msr_info->data = 0;
break;
case MSR_F15H_PERF_CTL0 ... MSR_F15H_PERF_CTR5:
+ if (kvm_pmu_is_valid_msr(vcpu, msr_info->index))
+ return kvm_pmu_get_msr(vcpu, msr_info);
+ if (!msr_info->host_initiated)
+ return 1;
+ msr_info->data = 0;
+ break;
case MSR_K7_EVNTSEL0 ... MSR_K7_EVNTSEL3:
case MSR_K7_PERFCTR0 ... MSR_K7_PERFCTR3:
case MSR_P6_PERFCTR0 ... MSR_P6_PERFCTR1:
@@ -3771,8 +3822,14 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext)
case KVM_CAP_X86_USER_SPACE_MSR:
case KVM_CAP_X86_MSR_FILTER:
case KVM_CAP_ENFORCE_PV_FEATURE_CPUID:
+#ifdef CONFIG_X86_SGX_KVM
+ case KVM_CAP_SGX_ATTRIBUTE:
+#endif
+ case KVM_CAP_VM_COPY_ENC_CONTEXT_FROM:
r = 1;
break;
+ case KVM_CAP_SET_GUEST_DEBUG2:
+ return KVM_GUESTDBG_VALID_MASK;
#ifdef CONFIG_KVM_XEN
case KVM_CAP_XEN_HVM:
r = KVM_XEN_HVM_CONFIG_HYPERCALL_MSR |
@@ -4673,7 +4730,6 @@ static int kvm_vcpu_ioctl_enable_cap(struct kvm_vcpu *vcpu,
kvm_update_pv_runtime(vcpu);
return 0;
-
default:
return -EINVAL;
}
@@ -5355,6 +5411,28 @@ split_irqchip_unlock:
kvm->arch.bus_lock_detection_enabled = true;
r = 0;
break;
+#ifdef CONFIG_X86_SGX_KVM
+ case KVM_CAP_SGX_ATTRIBUTE: {
+ unsigned long allowed_attributes = 0;
+
+ r = sgx_set_attribute(&allowed_attributes, cap->args[0]);
+ if (r)
+ break;
+
+ /* KVM only supports the PROVISIONKEY privileged attribute. */
+ if ((allowed_attributes & SGX_ATTR_PROVISIONKEY) &&
+ !(allowed_attributes & ~SGX_ATTR_PROVISIONKEY))
+ kvm->arch.sgx_provisioning_allowed = true;
+ else
+ r = -EINVAL;
+ break;
+ }
+#endif
+ case KVM_CAP_VM_COPY_ENC_CONTEXT_FROM:
+ r = -EINVAL;
+ if (kvm_x86_ops.vm_copy_enc_context_from)
+ r = kvm_x86_ops.vm_copy_enc_context_from(kvm, cap->args[0]);
+ return r;
default:
r = -EINVAL;
break;
@@ -5999,6 +6077,7 @@ gpa_t kvm_mmu_gva_to_gpa_read(struct kvm_vcpu *vcpu, gva_t gva,
u32 access = (static_call(kvm_x86_get_cpl)(vcpu) == 3) ? PFERR_USER_MASK : 0;
return vcpu->arch.walk_mmu->gva_to_gpa(vcpu, gva, access, exception);
}
+EXPORT_SYMBOL_GPL(kvm_mmu_gva_to_gpa_read);
gpa_t kvm_mmu_gva_to_gpa_fetch(struct kvm_vcpu *vcpu, gva_t gva,
struct x86_exception *exception)
@@ -6015,6 +6094,7 @@ gpa_t kvm_mmu_gva_to_gpa_write(struct kvm_vcpu *vcpu, gva_t gva,
access |= PFERR_WRITE_MASK;
return vcpu->arch.walk_mmu->gva_to_gpa(vcpu, gva, access, exception);
}
+EXPORT_SYMBOL_GPL(kvm_mmu_gva_to_gpa_write);
/* uses this to access any guest's mapped memory without checking CPL */
gpa_t kvm_mmu_gva_to_gpa_system(struct kvm_vcpu *vcpu, gva_t gva,
@@ -6934,12 +7014,12 @@ static bool emulator_guest_has_fxsr(struct x86_emulate_ctxt *ctxt)
static ulong emulator_read_gpr(struct x86_emulate_ctxt *ctxt, unsigned reg)
{
- return kvm_register_read(emul_to_vcpu(ctxt), reg);
+ return kvm_register_read_raw(emul_to_vcpu(ctxt), reg);
}
static void emulator_write_gpr(struct x86_emulate_ctxt *ctxt, unsigned reg, ulong val)
{
- kvm_register_write(emul_to_vcpu(ctxt), reg, val);
+ kvm_register_write_raw(emul_to_vcpu(ctxt), reg, val);
}
static void emulator_set_nmi_mask(struct x86_emulate_ctxt *ctxt, bool masked)
@@ -8043,9 +8123,6 @@ int kvm_arch_init(void *opaque)
if (r)
goto out_free_percpu;
- kvm_mmu_set_mask_ptes(PT_USER_MASK, PT_ACCESSED_MASK,
- PT_DIRTY_MASK, PT64_NX_MASK, 0,
- PT_PRESENT_MASK, 0, sme_me_mask);
kvm_timer_init();
perf_register_guest_info_callbacks(&kvm_guest_cbs);
@@ -8205,21 +8282,35 @@ void kvm_apicv_init(struct kvm *kvm, bool enable)
}
EXPORT_SYMBOL_GPL(kvm_apicv_init);
-static void kvm_sched_yield(struct kvm *kvm, unsigned long dest_id)
+static void kvm_sched_yield(struct kvm_vcpu *vcpu, unsigned long dest_id)
{
struct kvm_vcpu *target = NULL;
struct kvm_apic_map *map;
+ vcpu->stat.directed_yield_attempted++;
+
rcu_read_lock();
- map = rcu_dereference(kvm->arch.apic_map);
+ map = rcu_dereference(vcpu->kvm->arch.apic_map);
if (likely(map) && dest_id <= map->max_apic_id && map->phys_map[dest_id])
target = map->phys_map[dest_id]->vcpu;
rcu_read_unlock();
- if (target && READ_ONCE(target->ready))
- kvm_vcpu_yield_to(target);
+ if (!target || !READ_ONCE(target->ready))
+ goto no_yield;
+
+ /* Ignore requests to yield to self */
+ if (vcpu == target)
+ goto no_yield;
+
+ if (kvm_vcpu_yield_to(target) <= 0)
+ goto no_yield;
+
+ vcpu->stat.directed_yield_successful++;
+
+no_yield:
+ return;
}
int kvm_emulate_hypercall(struct kvm_vcpu *vcpu)
@@ -8266,7 +8357,7 @@ int kvm_emulate_hypercall(struct kvm_vcpu *vcpu)
break;
kvm_pv_kick_cpu_op(vcpu->kvm, a0, a1);
- kvm_sched_yield(vcpu->kvm, a1);
+ kvm_sched_yield(vcpu, a1);
ret = 0;
break;
#ifdef CONFIG_X86_64
@@ -8284,7 +8375,7 @@ int kvm_emulate_hypercall(struct kvm_vcpu *vcpu)
if (!guest_pv_has(vcpu, KVM_FEATURE_PV_SCHED_YIELD))
break;
- kvm_sched_yield(vcpu->kvm, a0);
+ kvm_sched_yield(vcpu, a0);
ret = 0;
break;
default:
@@ -8367,6 +8458,27 @@ static void update_cr8_intercept(struct kvm_vcpu *vcpu)
static_call(kvm_x86_update_cr8_intercept)(vcpu, tpr, max_irr);
}
+
+int kvm_check_nested_events(struct kvm_vcpu *vcpu)
+{
+ if (WARN_ON_ONCE(!is_guest_mode(vcpu)))
+ return -EIO;
+
+ if (kvm_check_request(KVM_REQ_TRIPLE_FAULT, vcpu)) {
+ kvm_x86_ops.nested_ops->triple_fault(vcpu);
+ return 1;
+ }
+
+ return kvm_x86_ops.nested_ops->check_events(vcpu);
+}
+
+static void kvm_inject_exception(struct kvm_vcpu *vcpu)
+{
+ if (vcpu->arch.exception.error_code && !is_protmode(vcpu))
+ vcpu->arch.exception.error_code = false;
+ static_call(kvm_x86_queue_exception)(vcpu);
+}
+
static void inject_pending_event(struct kvm_vcpu *vcpu, bool *req_immediate_exit)
{
int r;
@@ -8375,7 +8487,7 @@ static void inject_pending_event(struct kvm_vcpu *vcpu, bool *req_immediate_exit
/* try to reinject previous events if any */
if (vcpu->arch.exception.injected) {
- static_call(kvm_x86_queue_exception)(vcpu);
+ kvm_inject_exception(vcpu);
can_inject = false;
}
/*
@@ -8412,7 +8524,7 @@ static void inject_pending_event(struct kvm_vcpu *vcpu, bool *req_immediate_exit
* from L2 to L1.
*/
if (is_guest_mode(vcpu)) {
- r = kvm_x86_ops.nested_ops->check_events(vcpu);
+ r = kvm_check_nested_events(vcpu);
if (r < 0)
goto busy;
}
@@ -8438,7 +8550,7 @@ static void inject_pending_event(struct kvm_vcpu *vcpu, bool *req_immediate_exit
}
}
- static_call(kvm_x86_queue_exception)(vcpu);
+ kvm_inject_exception(vcpu);
can_inject = false;
}
@@ -8587,7 +8699,7 @@ static void enter_smm_save_state_32(struct kvm_vcpu *vcpu, char *buf)
put_smstate(u32, buf, 0x7ff0, kvm_rip_read(vcpu));
for (i = 0; i < 8; i++)
- put_smstate(u32, buf, 0x7fd0 + i * 4, kvm_register_read(vcpu, i));
+ put_smstate(u32, buf, 0x7fd0 + i * 4, kvm_register_read_raw(vcpu, i));
kvm_get_dr(vcpu, 6, &val);
put_smstate(u32, buf, 0x7fcc, (u32)val);
@@ -8633,7 +8745,7 @@ static void enter_smm_save_state_64(struct kvm_vcpu *vcpu, char *buf)
int i;
for (i = 0; i < 16; i++)
- put_smstate(u64, buf, 0x7ff8 - i * 8, kvm_register_read(vcpu, i));
+ put_smstate(u64, buf, 0x7ff8 - i * 8, kvm_register_read_raw(vcpu, i));
put_smstate(u64, buf, 0x7f78, kvm_rip_read(vcpu));
put_smstate(u32, buf, 0x7f70, kvm_get_rflags(vcpu));
@@ -8975,10 +9087,14 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu)
goto out;
}
if (kvm_check_request(KVM_REQ_TRIPLE_FAULT, vcpu)) {
- vcpu->run->exit_reason = KVM_EXIT_SHUTDOWN;
- vcpu->mmio_needed = 0;
- r = 0;
- goto out;
+ if (is_guest_mode(vcpu)) {
+ kvm_x86_ops.nested_ops->triple_fault(vcpu);
+ } else {
+ vcpu->run->exit_reason = KVM_EXIT_SHUTDOWN;
+ vcpu->mmio_needed = 0;
+ r = 0;
+ goto out;
+ }
}
if (kvm_check_request(KVM_REQ_APF_HALT, vcpu)) {
/* Page is swapped out. Do synthetic halt */
@@ -9276,7 +9392,7 @@ static inline int vcpu_block(struct kvm *kvm, struct kvm_vcpu *vcpu)
static inline bool kvm_vcpu_running(struct kvm_vcpu *vcpu)
{
if (is_guest_mode(vcpu))
- kvm_x86_ops.nested_ops->check_events(vcpu);
+ kvm_check_nested_events(vcpu);
return (vcpu->arch.mp_state == KVM_MP_STATE_RUNNABLE &&
!vcpu->arch.apf.halted);
@@ -11002,6 +11118,14 @@ int kvm_arch_vcpu_runnable(struct kvm_vcpu *vcpu)
return kvm_vcpu_running(vcpu) || kvm_vcpu_has_events(vcpu);
}
+bool kvm_arch_dy_has_pending_interrupt(struct kvm_vcpu *vcpu)
+{
+ if (vcpu->arch.apicv_active && static_call(kvm_x86_dy_apicv_has_pending_interrupt)(vcpu))
+ return true;
+
+ return false;
+}
+
bool kvm_arch_dy_runnable(struct kvm_vcpu *vcpu)
{
if (READ_ONCE(vcpu->arch.pv.pv_unhalted))
@@ -11012,14 +11136,14 @@ bool kvm_arch_dy_runnable(struct kvm_vcpu *vcpu)
kvm_test_request(KVM_REQ_EVENT, vcpu))
return true;
- if (vcpu->arch.apicv_active && static_call(kvm_x86_dy_apicv_has_pending_interrupt)(vcpu))
- return true;
-
- return false;
+ return kvm_arch_dy_has_pending_interrupt(vcpu);
}
bool kvm_arch_vcpu_in_kernel(struct kvm_vcpu *vcpu)
{
+ if (vcpu->arch.guest_state_protected)
+ return true;
+
return vcpu->arch.preempted_in_kernel;
}
@@ -11290,7 +11414,7 @@ bool kvm_arch_can_dequeue_async_page_present(struct kvm_vcpu *vcpu)
if (!kvm_pv_async_pf_enabled(vcpu))
return true;
else
- return apf_pageready_slot_free(vcpu);
+ return kvm_lapic_enabled(vcpu) && apf_pageready_slot_free(vcpu);
}
void kvm_arch_start_assignment(struct kvm *kvm)
@@ -11539,7 +11663,7 @@ int kvm_handle_invpcid(struct kvm_vcpu *vcpu, unsigned long type, gva_t gva)
fallthrough;
case INVPCID_TYPE_ALL_INCL_GLOBAL:
- kvm_mmu_unload(vcpu);
+ kvm_make_request(KVM_REQ_MMU_RELOAD, vcpu);
return kvm_skip_emulated_instruction(vcpu);
default: