aboutsummaryrefslogtreecommitdiffstatshomepage
path: root/arch/x86/kvm/x86.c
diff options
context:
space:
mode:
Diffstat (limited to 'arch/x86/kvm/x86.c')
-rw-r--r--arch/x86/kvm/x86.c703
1 files changed, 423 insertions, 280 deletions
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 3bf2ecafd027..9e41b5135340 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -18,6 +18,7 @@
#include <linux/kvm_host.h>
#include "irq.h"
+#include "ioapic.h"
#include "mmu.h"
#include "i8254.h"
#include "tss.h"
@@ -97,9 +98,6 @@ static u64 __read_mostly efer_reserved_bits = ~((u64)EFER_SCE);
static u64 __read_mostly cr4_reserved_bits = CR4_RESERVED_BITS;
-#define VM_STAT(x, ...) offsetof(struct kvm, stat.x), KVM_STAT_VM, ## __VA_ARGS__
-#define VCPU_STAT(x, ...) offsetof(struct kvm_vcpu, stat.x), KVM_STAT_VCPU, ## __VA_ARGS__
-
#define KVM_X2APIC_API_VALID_FLAGS (KVM_X2APIC_API_USE_32BIT_IDS | \
KVM_X2APIC_API_DISABLE_BROADCAST_QUIRK)
@@ -194,45 +192,46 @@ u64 __read_mostly supported_xss;
EXPORT_SYMBOL_GPL(supported_xss);
struct kvm_stats_debugfs_item debugfs_entries[] = {
- { "pf_fixed", VCPU_STAT(pf_fixed) },
- { "pf_guest", VCPU_STAT(pf_guest) },
- { "tlb_flush", VCPU_STAT(tlb_flush) },
- { "invlpg", VCPU_STAT(invlpg) },
- { "exits", VCPU_STAT(exits) },
- { "io_exits", VCPU_STAT(io_exits) },
- { "mmio_exits", VCPU_STAT(mmio_exits) },
- { "signal_exits", VCPU_STAT(signal_exits) },
- { "irq_window", VCPU_STAT(irq_window_exits) },
- { "nmi_window", VCPU_STAT(nmi_window_exits) },
- { "halt_exits", VCPU_STAT(halt_exits) },
- { "halt_successful_poll", VCPU_STAT(halt_successful_poll) },
- { "halt_attempted_poll", VCPU_STAT(halt_attempted_poll) },
- { "halt_poll_invalid", VCPU_STAT(halt_poll_invalid) },
- { "halt_wakeup", VCPU_STAT(halt_wakeup) },
- { "hypercalls", VCPU_STAT(hypercalls) },
- { "request_irq", VCPU_STAT(request_irq_exits) },
- { "irq_exits", VCPU_STAT(irq_exits) },
- { "host_state_reload", VCPU_STAT(host_state_reload) },
- { "fpu_reload", VCPU_STAT(fpu_reload) },
- { "insn_emulation", VCPU_STAT(insn_emulation) },
- { "insn_emulation_fail", VCPU_STAT(insn_emulation_fail) },
- { "irq_injections", VCPU_STAT(irq_injections) },
- { "nmi_injections", VCPU_STAT(nmi_injections) },
- { "req_event", VCPU_STAT(req_event) },
- { "l1d_flush", VCPU_STAT(l1d_flush) },
- { "mmu_shadow_zapped", VM_STAT(mmu_shadow_zapped) },
- { "mmu_pte_write", VM_STAT(mmu_pte_write) },
- { "mmu_pte_updated", VM_STAT(mmu_pte_updated) },
- { "mmu_pde_zapped", VM_STAT(mmu_pde_zapped) },
- { "mmu_flooded", VM_STAT(mmu_flooded) },
- { "mmu_recycled", VM_STAT(mmu_recycled) },
- { "mmu_cache_miss", VM_STAT(mmu_cache_miss) },
- { "mmu_unsync", VM_STAT(mmu_unsync) },
- { "remote_tlb_flush", VM_STAT(remote_tlb_flush) },
- { "largepages", VM_STAT(lpages, .mode = 0444) },
- { "nx_largepages_splitted", VM_STAT(nx_lpage_splits, .mode = 0444) },
- { "max_mmu_page_hash_collisions",
- VM_STAT(max_mmu_page_hash_collisions) },
+ VCPU_STAT("pf_fixed", pf_fixed),
+ VCPU_STAT("pf_guest", pf_guest),
+ VCPU_STAT("tlb_flush", tlb_flush),
+ VCPU_STAT("invlpg", invlpg),
+ VCPU_STAT("exits", exits),
+ VCPU_STAT("io_exits", io_exits),
+ VCPU_STAT("mmio_exits", mmio_exits),
+ VCPU_STAT("signal_exits", signal_exits),
+ VCPU_STAT("irq_window", irq_window_exits),
+ VCPU_STAT("nmi_window", nmi_window_exits),
+ VCPU_STAT("halt_exits", halt_exits),
+ VCPU_STAT("halt_successful_poll", halt_successful_poll),
+ VCPU_STAT("halt_attempted_poll", halt_attempted_poll),
+ VCPU_STAT("halt_poll_invalid", halt_poll_invalid),
+ VCPU_STAT("halt_wakeup", halt_wakeup),
+ VCPU_STAT("hypercalls", hypercalls),
+ VCPU_STAT("request_irq", request_irq_exits),
+ VCPU_STAT("irq_exits", irq_exits),
+ VCPU_STAT("host_state_reload", host_state_reload),
+ VCPU_STAT("fpu_reload", fpu_reload),
+ VCPU_STAT("insn_emulation", insn_emulation),
+ VCPU_STAT("insn_emulation_fail", insn_emulation_fail),
+ VCPU_STAT("irq_injections", irq_injections),
+ VCPU_STAT("nmi_injections", nmi_injections),
+ VCPU_STAT("req_event", req_event),
+ VCPU_STAT("l1d_flush", l1d_flush),
+ VCPU_STAT("halt_poll_success_ns", halt_poll_success_ns),
+ VCPU_STAT("halt_poll_fail_ns", halt_poll_fail_ns),
+ VM_STAT("mmu_shadow_zapped", mmu_shadow_zapped),
+ VM_STAT("mmu_pte_write", mmu_pte_write),
+ VM_STAT("mmu_pte_updated", mmu_pte_updated),
+ VM_STAT("mmu_pde_zapped", mmu_pde_zapped),
+ VM_STAT("mmu_flooded", mmu_flooded),
+ VM_STAT("mmu_recycled", mmu_recycled),
+ VM_STAT("mmu_cache_miss", mmu_cache_miss),
+ VM_STAT("mmu_unsync", mmu_unsync),
+ VM_STAT("remote_tlb_flush", remote_tlb_flush),
+ VM_STAT("largepages", lpages, .mode = 0444),
+ VM_STAT("nx_largepages_splitted", nx_lpage_splits, .mode = 0444),
+ VM_STAT("max_mmu_page_hash_collisions", max_mmu_page_hash_collisions),
{ NULL }
};
@@ -261,7 +260,7 @@ static int emulator_fix_hypercall(struct x86_emulate_ctxt *ctxt);
static inline void kvm_async_pf_hash_reset(struct kvm_vcpu *vcpu)
{
int i;
- for (i = 0; i < roundup_pow_of_two(ASYNC_PF_PER_VCPU); i++)
+ for (i = 0; i < ASYNC_PF_PER_VCPU; i++)
vcpu->arch.apf.gfns[i] = ~0;
}
@@ -572,11 +571,12 @@ void kvm_requeue_exception(struct kvm_vcpu *vcpu, unsigned nr)
}
EXPORT_SYMBOL_GPL(kvm_requeue_exception);
-static void kvm_queue_exception_p(struct kvm_vcpu *vcpu, unsigned nr,
- unsigned long payload)
+void kvm_queue_exception_p(struct kvm_vcpu *vcpu, unsigned nr,
+ unsigned long payload)
{
kvm_multiple_exception(vcpu, nr, false, 0, true, payload, false);
}
+EXPORT_SYMBOL_GPL(kvm_queue_exception_p);
static void kvm_queue_exception_e_p(struct kvm_vcpu *vcpu, unsigned nr,
u32 error_code, unsigned long payload)
@@ -611,15 +611,28 @@ void kvm_inject_page_fault(struct kvm_vcpu *vcpu, struct x86_exception *fault)
}
EXPORT_SYMBOL_GPL(kvm_inject_page_fault);
-static bool kvm_propagate_fault(struct kvm_vcpu *vcpu, struct x86_exception *fault)
+bool kvm_inject_emulated_page_fault(struct kvm_vcpu *vcpu,
+ struct x86_exception *fault)
{
- if (mmu_is_nested(vcpu) && !fault->nested_page_fault)
- vcpu->arch.nested_mmu.inject_page_fault(vcpu, fault);
- else
- vcpu->arch.mmu->inject_page_fault(vcpu, fault);
+ struct kvm_mmu *fault_mmu;
+ WARN_ON_ONCE(fault->vector != PF_VECTOR);
+
+ fault_mmu = fault->nested_page_fault ? vcpu->arch.mmu :
+ vcpu->arch.walk_mmu;
+
+ /*
+ * Invalidate the TLB entry for the faulting address, if it exists,
+ * else the access will fault indefinitely (and to emulate hardware).
+ */
+ if ((fault->error_code & PFERR_PRESENT_MASK) &&
+ !(fault->error_code & PFERR_RSVD_MASK))
+ kvm_mmu_invalidate_gva(vcpu, fault_mmu, fault->address,
+ fault_mmu->root_hpa);
+ fault_mmu->inject_page_fault(vcpu, fault);
return fault->nested_page_fault;
}
+EXPORT_SYMBOL_GPL(kvm_inject_emulated_page_fault);
void kvm_inject_nmi(struct kvm_vcpu *vcpu)
{
@@ -836,11 +849,25 @@ void kvm_load_guest_xsave_state(struct kvm_vcpu *vcpu)
vcpu->arch.ia32_xss != host_xss)
wrmsrl(MSR_IA32_XSS, vcpu->arch.ia32_xss);
}
+
+ if (static_cpu_has(X86_FEATURE_PKU) &&
+ (kvm_read_cr4_bits(vcpu, X86_CR4_PKE) ||
+ (vcpu->arch.xcr0 & XFEATURE_MASK_PKRU)) &&
+ vcpu->arch.pkru != vcpu->arch.host_pkru)
+ __write_pkru(vcpu->arch.pkru);
}
EXPORT_SYMBOL_GPL(kvm_load_guest_xsave_state);
void kvm_load_host_xsave_state(struct kvm_vcpu *vcpu)
{
+ if (static_cpu_has(X86_FEATURE_PKU) &&
+ (kvm_read_cr4_bits(vcpu, X86_CR4_PKE) ||
+ (vcpu->arch.xcr0 & XFEATURE_MASK_PKRU))) {
+ vcpu->arch.pkru = rdpkru();
+ if (vcpu->arch.pkru != vcpu->arch.host_pkru)
+ __write_pkru(vcpu->arch.host_pkru);
+ }
+
if (kvm_read_cr4_bits(vcpu, X86_CR4_OSXSAVE)) {
if (vcpu->arch.xcr0 != host_xcr0)
@@ -926,19 +953,6 @@ EXPORT_SYMBOL_GPL(kvm_set_xcr);
__reserved_bits; \
})
-static u64 kvm_host_cr4_reserved_bits(struct cpuinfo_x86 *c)
-{
- u64 reserved_bits = __cr4_reserved_bits(cpu_has, c);
-
- if (kvm_cpu_cap_has(X86_FEATURE_LA57))
- reserved_bits &= ~X86_CR4_LA57;
-
- if (kvm_cpu_cap_has(X86_FEATURE_UMIP))
- reserved_bits &= ~X86_CR4_UMIP;
-
- return reserved_bits;
-}
-
static int kvm_valid_cr4(struct kvm_vcpu *vcpu, unsigned long cr4)
{
if (cr4 & cr4_reserved_bits)
@@ -1006,7 +1020,7 @@ int kvm_set_cr3(struct kvm_vcpu *vcpu, unsigned long cr3)
if (cr3 == kvm_read_cr3(vcpu) && !pdptrs_changed(vcpu)) {
if (!skip_tlb_flush) {
kvm_mmu_sync_roots(vcpu);
- kvm_make_request(KVM_REQ_TLB_FLUSH, vcpu);
+ kvm_make_request(KVM_REQ_TLB_FLUSH_CURRENT, vcpu);
}
return 0;
}
@@ -1018,7 +1032,7 @@ int kvm_set_cr3(struct kvm_vcpu *vcpu, unsigned long cr3)
!load_pdptrs(vcpu, vcpu->arch.walk_mmu, cr3))
return 1;
- kvm_mmu_new_cr3(vcpu, cr3, skip_tlb_flush);
+ kvm_mmu_new_pgd(vcpu, cr3, skip_tlb_flush, skip_tlb_flush);
vcpu->arch.cr3 = cr3;
kvm_register_mark_available(vcpu, VCPU_EXREG_CR3);
@@ -1058,13 +1072,7 @@ static void kvm_update_dr0123(struct kvm_vcpu *vcpu)
}
}
-static void kvm_update_dr6(struct kvm_vcpu *vcpu)
-{
- if (!(vcpu->guest_debug & KVM_GUESTDBG_USE_HW_BP))
- kvm_x86_ops.set_dr6(vcpu, vcpu->arch.dr6);
-}
-
-static void kvm_update_dr7(struct kvm_vcpu *vcpu)
+void kvm_update_dr7(struct kvm_vcpu *vcpu)
{
unsigned long dr7;
@@ -1077,6 +1085,7 @@ static void kvm_update_dr7(struct kvm_vcpu *vcpu)
if (dr7 & DR7_BP_EN_MASK)
vcpu->arch.switch_db_regs |= KVM_DEBUGREG_BP_ENABLED;
}
+EXPORT_SYMBOL_GPL(kvm_update_dr7);
static u64 kvm_dr6_fixed(struct kvm_vcpu *vcpu)
{
@@ -1103,7 +1112,6 @@ static int __kvm_set_dr(struct kvm_vcpu *vcpu, int dr, unsigned long val)
if (val & 0xffffffff00000000ULL)
return -1; /* #GP */
vcpu->arch.dr6 = (val & DR6_VOLATILE) | kvm_dr6_fixed(vcpu);
- kvm_update_dr6(vcpu);
break;
case 5:
/* fall through */
@@ -1139,10 +1147,7 @@ int kvm_get_dr(struct kvm_vcpu *vcpu, int dr, unsigned long *val)
case 4:
/* fall through */
case 6:
- if (vcpu->guest_debug & KVM_GUESTDBG_USE_HW_BP)
- *val = vcpu->arch.dr6;
- else
- *val = kvm_x86_ops.get_dr6(vcpu);
+ *val = vcpu->arch.dr6;
break;
case 5:
/* fall through */
@@ -1241,13 +1246,18 @@ static const u32 emulated_msrs_all[] = {
HV_X64_MSR_VP_ASSIST_PAGE,
HV_X64_MSR_REENLIGHTENMENT_CONTROL, HV_X64_MSR_TSC_EMULATION_CONTROL,
HV_X64_MSR_TSC_EMULATION_STATUS,
+ HV_X64_MSR_SYNDBG_OPTIONS,
+ HV_X64_MSR_SYNDBG_CONTROL, HV_X64_MSR_SYNDBG_STATUS,
+ HV_X64_MSR_SYNDBG_SEND_BUFFER, HV_X64_MSR_SYNDBG_RECV_BUFFER,
+ HV_X64_MSR_SYNDBG_PENDING_BUFFER,
MSR_KVM_ASYNC_PF_EN, MSR_KVM_STEAL_TIME,
- MSR_KVM_PV_EOI_EN,
+ MSR_KVM_PV_EOI_EN, MSR_KVM_ASYNC_PF_INT, MSR_KVM_ASYNC_PF_ACK,
MSR_IA32_TSC_ADJUST,
MSR_IA32_TSCDEADLINE,
MSR_IA32_ARCH_CAPABILITIES,
+ MSR_IA32_PERF_CAPABILITIES,
MSR_IA32_MISC_ENABLE,
MSR_IA32_MCG_STATUS,
MSR_IA32_MCG_CTL,
@@ -1314,6 +1324,7 @@ static const u32 msr_based_features_all[] = {
MSR_F10H_DECFG,
MSR_IA32_UCODE_REV,
MSR_IA32_ARCH_CAPABILITIES,
+ MSR_IA32_PERF_CAPABILITIES,
};
static u32 msr_based_features[ARRAY_SIZE(msr_based_features_all)];
@@ -1572,6 +1583,13 @@ int kvm_emulate_wrmsr(struct kvm_vcpu *vcpu)
}
EXPORT_SYMBOL_GPL(kvm_emulate_wrmsr);
+bool kvm_vcpu_exit_request(struct kvm_vcpu *vcpu)
+{
+ return vcpu->mode == EXITING_GUEST_MODE || kvm_request_pending(vcpu) ||
+ need_resched() || signal_pending(current);
+}
+EXPORT_SYMBOL_GPL(kvm_vcpu_exit_request);
+
/*
* The fast path for frequent and performance sensitive wrmsr emulation,
* i.e. the sending of IPI, sending IPI early in the VM-Exit flow reduces
@@ -1600,27 +1618,44 @@ static int handle_fastpath_set_x2apic_icr_irqoff(struct kvm_vcpu *vcpu, u64 data
return 1;
}
-enum exit_fastpath_completion handle_fastpath_set_msr_irqoff(struct kvm_vcpu *vcpu)
+static int handle_fastpath_set_tscdeadline(struct kvm_vcpu *vcpu, u64 data)
+{
+ if (!kvm_can_use_hv_timer(vcpu))
+ return 1;
+
+ kvm_set_lapic_tscdeadline_msr(vcpu, data);
+ return 0;
+}
+
+fastpath_t handle_fastpath_set_msr_irqoff(struct kvm_vcpu *vcpu)
{
u32 msr = kvm_rcx_read(vcpu);
u64 data;
- int ret = 0;
+ fastpath_t ret = EXIT_FASTPATH_NONE;
switch (msr) {
case APIC_BASE_MSR + (APIC_ICR >> 4):
data = kvm_read_edx_eax(vcpu);
- ret = handle_fastpath_set_x2apic_icr_irqoff(vcpu, data);
+ if (!handle_fastpath_set_x2apic_icr_irqoff(vcpu, data)) {
+ kvm_skip_emulated_instruction(vcpu);
+ ret = EXIT_FASTPATH_EXIT_HANDLED;
+ }
+ break;
+ case MSR_IA32_TSCDEADLINE:
+ data = kvm_read_edx_eax(vcpu);
+ if (!handle_fastpath_set_tscdeadline(vcpu, data)) {
+ kvm_skip_emulated_instruction(vcpu);
+ ret = EXIT_FASTPATH_REENTER_GUEST;
+ }
break;
default:
- return EXIT_FASTPATH_NONE;
+ break;
}
- if (!ret) {
+ if (ret != EXIT_FASTPATH_NONE)
trace_kvm_msr_write(msr, data);
- return EXIT_FASTPATH_SKIP_EMUL_INS;
- }
- return EXIT_FASTPATH_NONE;
+ return ret;
}
EXPORT_SYMBOL_GPL(handle_fastpath_set_msr_irqoff);
@@ -1909,7 +1944,7 @@ static void kvm_track_tsc_matching(struct kvm_vcpu *vcpu)
static void update_ia32_tsc_adjust_msr(struct kvm_vcpu *vcpu, s64 offset)
{
- u64 curr_offset = kvm_x86_ops.read_l1_tsc_offset(vcpu);
+ u64 curr_offset = vcpu->arch.l1_tsc_offset;
vcpu->arch.ia32_tsc_adjust_msr += offset - curr_offset;
}
@@ -1951,14 +1986,13 @@ static u64 kvm_compute_tsc_offset(struct kvm_vcpu *vcpu, u64 target_tsc)
u64 kvm_read_l1_tsc(struct kvm_vcpu *vcpu, u64 host_tsc)
{
- u64 tsc_offset = kvm_x86_ops.read_l1_tsc_offset(vcpu);
-
- return tsc_offset + kvm_scale_tsc(vcpu, host_tsc);
+ return vcpu->arch.l1_tsc_offset + kvm_scale_tsc(vcpu, host_tsc);
}
EXPORT_SYMBOL_GPL(kvm_read_l1_tsc);
static void kvm_vcpu_write_tsc_offset(struct kvm_vcpu *vcpu, u64 offset)
{
+ vcpu->arch.l1_tsc_offset = offset;
vcpu->arch.tsc_offset = kvm_x86_ops.write_l1_tsc_offset(vcpu, offset);
}
@@ -2083,7 +2117,7 @@ EXPORT_SYMBOL_GPL(kvm_write_tsc);
static inline void adjust_tsc_offset_guest(struct kvm_vcpu *vcpu,
s64 adjustment)
{
- u64 tsc_offset = kvm_x86_ops.read_l1_tsc_offset(vcpu);
+ u64 tsc_offset = vcpu->arch.l1_tsc_offset;
kvm_vcpu_write_tsc_offset(vcpu, tsc_offset + adjustment);
}
@@ -2645,29 +2679,54 @@ out:
return r;
}
+static inline bool kvm_pv_async_pf_enabled(struct kvm_vcpu *vcpu)
+{
+ u64 mask = KVM_ASYNC_PF_ENABLED | KVM_ASYNC_PF_DELIVERY_AS_INT;
+
+ return (vcpu->arch.apf.msr_en_val & mask) == mask;
+}
+
static int kvm_pv_enable_async_pf(struct kvm_vcpu *vcpu, u64 data)
{
gpa_t gpa = data & ~0x3f;
- /* Bits 3:5 are reserved, Should be zero */
- if (data & 0x38)
+ /* Bits 4:5 are reserved, Should be zero */
+ if (data & 0x30)
return 1;
- vcpu->arch.apf.msr_val = data;
+ vcpu->arch.apf.msr_en_val = data;
- if (!(data & KVM_ASYNC_PF_ENABLED)) {
+ if (!kvm_pv_async_pf_enabled(vcpu)) {
kvm_clear_async_pf_completion_queue(vcpu);
kvm_async_pf_hash_reset(vcpu);
return 0;
}
if (kvm_gfn_to_hva_cache_init(vcpu->kvm, &vcpu->arch.apf.data, gpa,
- sizeof(u32)))
+ sizeof(u64)))
return 1;
vcpu->arch.apf.send_user_only = !(data & KVM_ASYNC_PF_SEND_ALWAYS);
vcpu->arch.apf.delivery_as_pf_vmexit = data & KVM_ASYNC_PF_DELIVERY_AS_PF_VMEXIT;
+
kvm_async_pf_wakeup_all(vcpu);
+
+ return 0;
+}
+
+static int kvm_pv_enable_async_pf_int(struct kvm_vcpu *vcpu, u64 data)
+{
+ /* Bits 8-63 are reserved */
+ if (data >> 8)
+ return 1;
+
+ if (!lapic_in_kernel(vcpu))
+ return 1;
+
+ vcpu->arch.apf.msr_int_val = data;
+
+ vcpu->arch.apf.vec = data & KVM_ASYNC_PF_VEC_MASK;
+
return 0;
}
@@ -2677,10 +2736,16 @@ static void kvmclock_reset(struct kvm_vcpu *vcpu)
vcpu->arch.time = 0;
}
-static void kvm_vcpu_flush_tlb(struct kvm_vcpu *vcpu, bool invalidate_gpa)
+static void kvm_vcpu_flush_tlb_all(struct kvm_vcpu *vcpu)
{
++vcpu->stat.tlb_flush;
- kvm_x86_ops.tlb_flush(vcpu, invalidate_gpa);
+ kvm_x86_ops.tlb_flush_all(vcpu);
+}
+
+static void kvm_vcpu_flush_tlb_guest(struct kvm_vcpu *vcpu)
+{
+ ++vcpu->stat.tlb_flush;
+ kvm_x86_ops.tlb_flush_guest(vcpu);
}
static void record_steal_time(struct kvm_vcpu *vcpu)
@@ -2706,7 +2771,7 @@ static void record_steal_time(struct kvm_vcpu *vcpu)
trace_kvm_pv_tlb_flush(vcpu->vcpu_id,
st->preempted & KVM_VCPU_FLUSH_TLB);
if (xchg(&st->preempted, 0) & KVM_VCPU_FLUSH_TLB)
- kvm_vcpu_flush_tlb(vcpu, false);
+ kvm_vcpu_flush_tlb_guest(vcpu);
vcpu->arch.st.preempted = 0;
@@ -2883,6 +2948,16 @@ int kvm_set_msr_common(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
if (kvm_pv_enable_async_pf(vcpu, data))
return 1;
break;
+ case MSR_KVM_ASYNC_PF_INT:
+ if (kvm_pv_enable_async_pf_int(vcpu, data))
+ return 1;
+ break;
+ case MSR_KVM_ASYNC_PF_ACK:
+ if (data & 0x1) {
+ vcpu->arch.apf.pageready_pending = false;
+ kvm_check_async_pf_completion(vcpu);
+ }
+ break;
case MSR_KVM_STEAL_TIME:
if (unlikely(!sched_info_on()))
@@ -2940,6 +3015,8 @@ int kvm_set_msr_common(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
*/
break;
case HV_X64_MSR_GUEST_OS_ID ... HV_X64_MSR_SINT15:
+ case HV_X64_MSR_SYNDBG_CONTROL ... HV_X64_MSR_SYNDBG_PENDING_BUFFER:
+ case HV_X64_MSR_SYNDBG_OPTIONS:
case HV_X64_MSR_CRASH_P0 ... HV_X64_MSR_CRASH_P4:
case HV_X64_MSR_CRASH_CTL:
case HV_X64_MSR_STIMER0_CONFIG ... HV_X64_MSR_STIMER3_COUNT:
@@ -3060,6 +3137,17 @@ int kvm_get_msr_common(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
case MSR_IA32_PERF_CTL:
case MSR_AMD64_DC_CFG:
case MSR_F15H_EX_CFG:
+ /*
+ * Intel Sandy Bridge CPUs must support the RAPL (running average power
+ * limit) MSRs. Just return 0, as we do not want to expose the host
+ * data here. Do not conditionalize this on CPUID, as KVM does not do
+ * so for existing CPU-specific MSRs.
+ */
+ case MSR_RAPL_POWER_UNIT:
+ case MSR_PP0_ENERGY_STATUS: /* Power plane 0 (core) */
+ case MSR_PP1_ENERGY_STATUS: /* Power plane 1 (graphics uncore) */
+ case MSR_PKG_ENERGY_STATUS: /* Total package */
+ case MSR_DRAM_ENERGY_STATUS: /* DRAM controller */
msr_info->data = 0;
break;
case MSR_F15H_PERF_CTL0 ... MSR_F15H_PERF_CTR5:
@@ -3068,7 +3156,7 @@ int kvm_get_msr_common(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
case MSR_P6_PERFCTR0 ... MSR_P6_PERFCTR1:
case MSR_P6_EVNTSEL0 ... MSR_P6_EVNTSEL1:
if (kvm_pmu_is_valid_msr(vcpu, msr_info->index))
- return kvm_pmu_get_msr(vcpu, msr_info->index, &msr_info->data);
+ return kvm_pmu_get_msr(vcpu, msr_info);
msr_info->data = 0;
break;
case MSR_IA32_UCODE_REV:
@@ -3146,7 +3234,13 @@ int kvm_get_msr_common(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
msr_info->data = vcpu->arch.time;
break;
case MSR_KVM_ASYNC_PF_EN:
- msr_info->data = vcpu->arch.apf.msr_val;
+ msr_info->data = vcpu->arch.apf.msr_en_val;
+ break;
+ case MSR_KVM_ASYNC_PF_INT:
+ msr_info->data = vcpu->arch.apf.msr_int_val;
+ break;
+ case MSR_KVM_ASYNC_PF_ACK:
+ msr_info->data = 0;
break;
case MSR_KVM_STEAL_TIME:
msr_info->data = vcpu->arch.st.msr_val;
@@ -3184,6 +3278,8 @@ int kvm_get_msr_common(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
msr_info->data = 0x20000000;
break;
case HV_X64_MSR_GUEST_OS_ID ... HV_X64_MSR_SINT15:
+ case HV_X64_MSR_SYNDBG_CONTROL ... HV_X64_MSR_SYNDBG_PENDING_BUFFER:
+ case HV_X64_MSR_SYNDBG_OPTIONS:
case HV_X64_MSR_CRASH_P0 ... HV_X64_MSR_CRASH_P4:
case HV_X64_MSR_CRASH_CTL:
case HV_X64_MSR_STIMER0_CONFIG ... HV_X64_MSR_STIMER3_COUNT:
@@ -3230,7 +3326,7 @@ int kvm_get_msr_common(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
break;
default:
if (kvm_pmu_is_valid_msr(vcpu, msr_info->index))
- return kvm_pmu_get_msr(vcpu, msr_info->index, &msr_info->data);
+ return kvm_pmu_get_msr(vcpu, msr_info);
if (!ignore_msrs) {
vcpu_debug_ratelimited(vcpu, "unhandled rdmsr: 0x%x\n",
msr_info->index);
@@ -3360,6 +3456,7 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext)
case KVM_CAP_X86_ROBUST_SINGLESTEP:
case KVM_CAP_XSAVE:
case KVM_CAP_ASYNC_PF:
+ case KVM_CAP_ASYNC_PF_INT:
case KVM_CAP_GET_TSC_KHZ:
case KVM_CAP_KVMCLOCK_CTRL:
case KVM_CAP_READONLY_MEM:
@@ -3374,6 +3471,7 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext)
case KVM_CAP_GET_MSR_FEATURES:
case KVM_CAP_MSR_PLATFORM_INFO:
case KVM_CAP_EXCEPTION_PAYLOAD:
+ case KVM_CAP_SET_GUEST_DEBUG:
r = 1;
break;
case KVM_CAP_SYNC_REGS:
@@ -3427,14 +3525,14 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext)
r = KVM_X2APIC_API_VALID_FLAGS;
break;
case KVM_CAP_NESTED_STATE:
- r = kvm_x86_ops.get_nested_state ?
- kvm_x86_ops.get_nested_state(NULL, NULL, 0) : 0;
+ r = kvm_x86_ops.nested_ops->get_state ?
+ kvm_x86_ops.nested_ops->get_state(NULL, NULL, 0) : 0;
break;
case KVM_CAP_HYPERV_DIRECT_TLBFLUSH:
r = kvm_x86_ops.enable_direct_tlbflush != NULL;
break;
case KVM_CAP_HYPERV_ENLIGHTENED_VMCS:
- r = kvm_x86_ops.nested_enable_evmcs != NULL;
+ r = kvm_x86_ops.nested_ops->enable_evmcs != NULL;
break;
default:
break;
@@ -3559,6 +3657,9 @@ void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
kvm_x86_ops.vcpu_load(vcpu, cpu);
+ /* Save host pkru register if supported */
+ vcpu->arch.host_pkru = read_pkru();
+
/* Apply any externally detected TSC adjustments (due to suspend) */
if (unlikely(vcpu->arch.tsc_offset_adjustment)) {
adjust_tsc_offset_host(vcpu, vcpu->arch.tsc_offset_adjustment);
@@ -3752,7 +3853,7 @@ static int kvm_vcpu_ioctl_x86_setup_mce(struct kvm_vcpu *vcpu,
unsigned bank_num = mcg_cap & 0xff, bank;
r = -EINVAL;
- if (!bank_num || bank_num >= KVM_MAX_MCE_BANKS)
+ if (!bank_num || bank_num > KVM_MAX_MCE_BANKS)
goto out;
if (mcg_cap & ~(kvm_mce_cap_supported | 0xff | 0xff0000))
goto out;
@@ -4010,7 +4111,6 @@ static int kvm_vcpu_ioctl_x86_set_debugregs(struct kvm_vcpu *vcpu,
memcpy(vcpu->arch.db, dbgregs->db, sizeof(vcpu->arch.db));
kvm_update_dr0123(vcpu);
vcpu->arch.dr6 = dbgregs->dr6;
- kvm_update_dr6(vcpu);
vcpu->arch.dr7 = dbgregs->dr7;
kvm_update_dr7(vcpu);
@@ -4220,9 +4320,9 @@ static int kvm_vcpu_ioctl_enable_cap(struct kvm_vcpu *vcpu,
return kvm_hv_activate_synic(vcpu, cap->cap ==
KVM_CAP_HYPERV_SYNIC2);
case KVM_CAP_HYPERV_ENLIGHTENED_VMCS:
- if (!kvm_x86_ops.nested_enable_evmcs)
+ if (!kvm_x86_ops.nested_ops->enable_evmcs)
return -ENOTTY;
- r = kvm_x86_ops.nested_enable_evmcs(vcpu, &vmcs_version);
+ r = kvm_x86_ops.nested_ops->enable_evmcs(vcpu, &vmcs_version);
if (!r) {
user_ptr = (void __user *)(uintptr_t)cap->args[0];
if (copy_to_user(user_ptr, &vmcs_version,
@@ -4537,7 +4637,7 @@ long kvm_arch_vcpu_ioctl(struct file *filp,
u32 user_data_size;
r = -EINVAL;
- if (!kvm_x86_ops.get_nested_state)
+ if (!kvm_x86_ops.nested_ops->get_state)
break;
BUILD_BUG_ON(sizeof(user_data_size) != sizeof(user_kvm_nested_state->size));
@@ -4545,8 +4645,8 @@ long kvm_arch_vcpu_ioctl(struct file *filp,
if (get_user(user_data_size, &user_kvm_nested_state->size))
break;
- r = kvm_x86_ops.get_nested_state(vcpu, user_kvm_nested_state,
- user_data_size);
+ r = kvm_x86_ops.nested_ops->get_state(vcpu, user_kvm_nested_state,
+ user_data_size);
if (r < 0)
break;
@@ -4567,7 +4667,7 @@ long kvm_arch_vcpu_ioctl(struct file *filp,
int idx;
r = -EINVAL;
- if (!kvm_x86_ops.set_nested_state)
+ if (!kvm_x86_ops.nested_ops->set_state)
break;
r = -EFAULT;
@@ -4580,7 +4680,8 @@ long kvm_arch_vcpu_ioctl(struct file *filp,
if (kvm_state.flags &
~(KVM_STATE_NESTED_RUN_PENDING | KVM_STATE_NESTED_GUEST_MODE
- | KVM_STATE_NESTED_EVMCS))
+ | KVM_STATE_NESTED_EVMCS | KVM_STATE_NESTED_MTF_PENDING
+ | KVM_STATE_NESTED_GIF_SET))
break;
/* nested_run_pending implies guest_mode. */
@@ -4589,7 +4690,7 @@ long kvm_arch_vcpu_ioctl(struct file *filp,
break;
idx = srcu_read_lock(&vcpu->kvm->srcu);
- r = kvm_x86_ops.set_nested_state(vcpu, user_kvm_nested_state, &kvm_state);
+ r = kvm_x86_ops.nested_ops->set_state(vcpu, user_kvm_nested_state, &kvm_state);
srcu_read_unlock(&vcpu->kvm->srcu, idx);
break;
}
@@ -5049,10 +5150,13 @@ set_identity_unlock:
r = -EFAULT;
if (copy_from_user(&u.ps, argp, sizeof(u.ps)))
goto out;
+ mutex_lock(&kvm->lock);
r = -ENXIO;
if (!kvm->arch.vpit)
- goto out;
+ goto set_pit_out;
r = kvm_vm_ioctl_set_pit(kvm, &u.ps);
+set_pit_out:
+ mutex_unlock(&kvm->lock);
break;
}
case KVM_GET_PIT2: {
@@ -5072,10 +5176,13 @@ set_identity_unlock:
r = -EFAULT;
if (copy_from_user(&u.ps2, argp, sizeof(u.ps2)))
goto out;
+ mutex_lock(&kvm->lock);
r = -ENXIO;
if (!kvm->arch.vpit)
- goto out;
+ goto set_pit2_out;
r = kvm_vm_ioctl_set_pit2(kvm, &u.ps2);
+set_pit2_out:
+ mutex_unlock(&kvm->lock);
break;
}
case KVM_REINJECT_CONTROL: {
@@ -5230,6 +5337,10 @@ static void kvm_init_msr_list(void)
if (!kvm_cpu_cap_has(X86_FEATURE_RDTSCP))
continue;
break;
+ case MSR_IA32_UMWAIT_CONTROL:
+ if (!kvm_cpu_cap_has(X86_FEATURE_WAITPKG))
+ continue;
+ break;
case MSR_IA32_RTIT_CTL:
case MSR_IA32_RTIT_STATUS:
if (!kvm_cpu_cap_has(X86_FEATURE_INTEL_PT))
@@ -5247,7 +5358,7 @@ static void kvm_init_msr_list(void)
!intel_pt_validate_hw_cap(PT_CAP_single_range_output)))
continue;
break;
- case MSR_IA32_RTIT_ADDR0_A ... MSR_IA32_RTIT_ADDR3_B: {
+ case MSR_IA32_RTIT_ADDR0_A ... MSR_IA32_RTIT_ADDR3_B:
if (!kvm_cpu_cap_has(X86_FEATURE_INTEL_PT) ||
msrs_to_save_all[i] - MSR_IA32_RTIT_ADDR0_A >=
intel_pt_validate_hw_cap(PT_CAP_num_address_ranges) * 2)
@@ -5262,7 +5373,7 @@ static void kvm_init_msr_list(void)
if (msrs_to_save_all[i] - MSR_ARCH_PERFMON_EVENTSEL0 >=
min(INTEL_PMC_MAX_GENERIC, x86_pmu.num_counters_gp))
continue;
- }
+ break;
default:
break;
}
@@ -6391,7 +6502,7 @@ static bool inject_emulated_exception(struct kvm_vcpu *vcpu)
{
struct x86_emulate_ctxt *ctxt = vcpu->arch.emulate_ctxt;
if (ctxt->exception.vector == PF_VECTOR)
- return kvm_propagate_fault(vcpu, &ctxt->exception);
+ return kvm_inject_emulated_page_fault(vcpu, &ctxt->exception);
if (ctxt->exception.error_code_valid)
kvm_queue_exception_e(vcpu, ctxt->exception.vector,
@@ -6654,7 +6765,7 @@ static int kvm_vcpu_do_singlestep(struct kvm_vcpu *vcpu)
if (vcpu->guest_debug & KVM_GUESTDBG_SINGLESTEP) {
kvm_run->debug.arch.dr6 = DR6_BS | DR6_FIXED_1 | DR6_RTM;
- kvm_run->debug.arch.pc = vcpu->arch.singlestep_rip;
+ kvm_run->debug.arch.pc = kvm_get_linear_rip(vcpu);
kvm_run->debug.arch.exception = DB_VECTOR;
kvm_run->exit_reason = KVM_EXIT_DEBUG;
return 0;
@@ -6714,9 +6825,7 @@ static bool kvm_vcpu_check_breakpoint(struct kvm_vcpu *vcpu, int *r)
vcpu->arch.db);
if (dr6 != 0) {
- vcpu->arch.dr6 &= ~DR_TRAP_BITS;
- vcpu->arch.dr6 |= dr6 | DR6_RTM;
- kvm_queue_exception(vcpu, DB_VECTOR);
+ kvm_queue_exception_p(vcpu, DB_VECTOR, dr6);
*r = 1;
return true;
}
@@ -7659,14 +7768,17 @@ static void update_cr8_intercept(struct kvm_vcpu *vcpu)
kvm_x86_ops.update_cr8_intercept(vcpu, tpr, max_irr);
}
-static int inject_pending_event(struct kvm_vcpu *vcpu)
+static void inject_pending_event(struct kvm_vcpu *vcpu, bool *req_immediate_exit)
{
int r;
+ bool can_inject = true;
/* try to reinject previous events if any */
- if (vcpu->arch.exception.injected)
+ if (vcpu->arch.exception.injected) {
kvm_x86_ops.queue_exception(vcpu);
+ can_inject = false;
+ }
/*
* Do not inject an NMI or interrupt if there is a pending
* exception. Exceptions and interrupts are recognized at
@@ -7682,22 +7794,28 @@ static int inject_pending_event(struct kvm_vcpu *vcpu)
* fully complete the previous instruction.
*/
else if (!vcpu->arch.exception.pending) {
- if (vcpu->arch.nmi_injected)
+ if (vcpu->arch.nmi_injected) {
kvm_x86_ops.set_nmi(vcpu);
- else if (vcpu->arch.interrupt.injected)
+ can_inject = false;
+ } else if (vcpu->arch.interrupt.injected) {
kvm_x86_ops.set_irq(vcpu);
+ can_inject = false;
+ }
}
+ WARN_ON_ONCE(vcpu->arch.exception.injected &&
+ vcpu->arch.exception.pending);
+
/*
* Call check_nested_events() even if we reinjected a previous event
* in order for caller to determine if it should require immediate-exit
* from L2 to L1 due to pending L1 events which require exit
* from L2 to L1.
*/
- if (is_guest_mode(vcpu) && kvm_x86_ops.check_nested_events) {
- r = kvm_x86_ops.check_nested_events(vcpu);
- if (r != 0)
- return r;
+ if (is_guest_mode(vcpu)) {
+ r = kvm_x86_ops.nested_ops->check_events(vcpu);
+ if (r < 0)
+ goto busy;
}
/* try to inject new event if pending */
@@ -7706,7 +7824,6 @@ static int inject_pending_event(struct kvm_vcpu *vcpu)
vcpu->arch.exception.has_error_code,
vcpu->arch.exception.error_code);
- WARN_ON_ONCE(vcpu->arch.exception.injected);
vcpu->arch.exception.pending = false;
vcpu->arch.exception.injected = true;
@@ -7715,16 +7832,6 @@ static int inject_pending_event(struct kvm_vcpu *vcpu)
X86_EFLAGS_RF);
if (vcpu->arch.exception.nr == DB_VECTOR) {
- /*
- * This code assumes that nSVM doesn't use
- * check_nested_events(). If it does, the
- * DR6/DR7 changes should happen before L1
- * gets a #VMEXIT for an intercepted #DB in
- * L2. (Under VMX, on the other hand, the
- * DR6/DR7 changes should not happen in the
- * event of a VM-exit to L1 for an intercepted
- * #DB in L2.)
- */
kvm_deliver_exception_payload(vcpu);
if (vcpu->arch.dr7 & DR7_GD) {
vcpu->arch.dr7 &= ~DR7_GD;
@@ -7733,42 +7840,72 @@ static int inject_pending_event(struct kvm_vcpu *vcpu)
}
kvm_x86_ops.queue_exception(vcpu);
+ can_inject = false;
}
- /* Don't consider new event if we re-injected an event */
- if (kvm_event_needs_reinjection(vcpu))
- return 0;
+ /*
+ * Finally, inject interrupt events. If an event cannot be injected
+ * due to architectural conditions (e.g. IF=0) a window-open exit
+ * will re-request KVM_REQ_EVENT. Sometimes however an event is pending
+ * and can architecturally be injected, but we cannot do it right now:
+ * an interrupt could have arrived just now and we have to inject it
+ * as a vmexit, or there could already an event in the queue, which is
+ * indicated by can_inject. In that case we request an immediate exit
+ * in order to make progress and get back here for another iteration.
+ * The kvm_x86_ops hooks communicate this by returning -EBUSY.
+ */
+ if (vcpu->arch.smi_pending) {
+ r = can_inject ? kvm_x86_ops.smi_allowed(vcpu, true) : -EBUSY;
+ if (r < 0)
+ goto busy;
+ if (r) {
+ vcpu->arch.smi_pending = false;
+ ++vcpu->arch.smi_count;
+ enter_smm(vcpu);
+ can_inject = false;
+ } else
+ kvm_x86_ops.enable_smi_window(vcpu);
+ }
- if (vcpu->arch.smi_pending && !is_smm(vcpu) &&
- kvm_x86_ops.smi_allowed(vcpu)) {
- vcpu->arch.smi_pending = false;
- ++vcpu->arch.smi_count;
- enter_smm(vcpu);
- } else if (vcpu->arch.nmi_pending && kvm_x86_ops.nmi_allowed(vcpu)) {
- --vcpu->arch.nmi_pending;
- vcpu->arch.nmi_injected = true;
- kvm_x86_ops.set_nmi(vcpu);
- } else if (kvm_cpu_has_injectable_intr(vcpu)) {
- /*
- * Because interrupts can be injected asynchronously, we are
- * calling check_nested_events again here to avoid a race condition.
- * See https://lkml.org/lkml/2014/7/2/60 for discussion about this
- * proposal and current concerns. Perhaps we should be setting
- * KVM_REQ_EVENT only on certain events and not unconditionally?
- */
- if (is_guest_mode(vcpu) && kvm_x86_ops.check_nested_events) {
- r = kvm_x86_ops.check_nested_events(vcpu);
- if (r != 0)
- return r;
+ if (vcpu->arch.nmi_pending) {
+ r = can_inject ? kvm_x86_ops.nmi_allowed(vcpu, true) : -EBUSY;
+ if (r < 0)
+ goto busy;
+ if (r) {
+ --vcpu->arch.nmi_pending;
+ vcpu->arch.nmi_injected = true;
+ kvm_x86_ops.set_nmi(vcpu);
+ can_inject = false;
+ WARN_ON(kvm_x86_ops.nmi_allowed(vcpu, true) < 0);
}
- if (kvm_x86_ops.interrupt_allowed(vcpu)) {
- kvm_queue_interrupt(vcpu, kvm_cpu_get_interrupt(vcpu),
- false);
+ if (vcpu->arch.nmi_pending)
+ kvm_x86_ops.enable_nmi_window(vcpu);
+ }
+
+ if (kvm_cpu_has_injectable_intr(vcpu)) {
+ r = can_inject ? kvm_x86_ops.interrupt_allowed(vcpu, true) : -EBUSY;
+ if (r < 0)
+ goto busy;
+ if (r) {
+ kvm_queue_interrupt(vcpu, kvm_cpu_get_interrupt(vcpu), false);
kvm_x86_ops.set_irq(vcpu);
+ WARN_ON(kvm_x86_ops.interrupt_allowed(vcpu, true) < 0);
}
+ if (kvm_cpu_has_injectable_intr(vcpu))
+ kvm_x86_ops.enable_irq_window(vcpu);
}
- return 0;
+ if (is_guest_mode(vcpu) &&
+ kvm_x86_ops.nested_ops->hv_timer_pending &&
+ kvm_x86_ops.nested_ops->hv_timer_pending(vcpu))
+ *req_immediate_exit = true;
+
+ WARN_ON(vcpu->arch.exception.pending);
+ return;
+
+busy:
+ *req_immediate_exit = true;
+ return;
}
static void process_nmi(struct kvm_vcpu *vcpu)
@@ -8037,7 +8174,7 @@ void kvm_make_scan_ioapic_request_mask(struct kvm *kvm,
zalloc_cpumask_var(&cpus, GFP_ATOMIC);
kvm_make_vcpus_request_mask(kvm, KVM_REQ_SCAN_IOAPIC,
- vcpu_bitmap, cpus);
+ NULL, vcpu_bitmap, cpus);
free_cpumask_var(cpus);
}
@@ -8067,6 +8204,7 @@ EXPORT_SYMBOL_GPL(kvm_vcpu_update_apicv);
*/
void kvm_request_apicv_update(struct kvm *kvm, bool activate, ulong bit)
{
+ struct kvm_vcpu *except;
unsigned long old, new, expected;
if (!kvm_x86_ops.check_apicv_inhibit_reasons ||
@@ -8091,7 +8229,17 @@ void kvm_request_apicv_update(struct kvm *kvm, bool activate, ulong bit)
trace_kvm_apicv_update_request(activate, bit);
if (kvm_x86_ops.pre_update_apicv_exec_ctrl)
kvm_x86_ops.pre_update_apicv_exec_ctrl(kvm, activate);
- kvm_make_all_cpus_request(kvm, KVM_REQ_APICV_UPDATE);
+
+ /*
+ * Sending request to update APICV for all other vcpus,
+ * while update the calling vcpu immediately instead of
+ * waiting for another #VMEXIT to handle the request.
+ */
+ except = kvm_get_running_vcpu();
+ kvm_make_all_cpus_request_except(kvm, KVM_REQ_APICV_UPDATE,
+ except);
+ if (except)
+ kvm_vcpu_update_apicv(except);
}
EXPORT_SYMBOL_GPL(kvm_request_apicv_update);
@@ -8148,24 +8296,13 @@ int kvm_arch_mmu_notifier_invalidate_range(struct kvm *kvm,
void kvm_vcpu_reload_apic_access_page(struct kvm_vcpu *vcpu)
{
- struct page *page = NULL;
-
if (!lapic_in_kernel(vcpu))
return;
if (!kvm_x86_ops.set_apic_access_page_addr)
return;
- page = gfn_to_page(vcpu->kvm, APIC_DEFAULT_PHYS_BASE >> PAGE_SHIFT);
- if (is_error_page(page))
- return;
- kvm_x86_ops.set_apic_access_page_addr(vcpu, page_to_phys(page));
-
- /*
- * Do not pin apic access page in memory, the MMU notifier
- * will call us again if it is migrated or swapped out.
- */
- put_page(page);
+ kvm_x86_ops.set_apic_access_page_addr(vcpu);
}
void __kvm_request_immediate_exit(struct kvm_vcpu *vcpu)
@@ -8185,13 +8322,13 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu)
bool req_int_win =
dm_request_for_irq_injection(vcpu) &&
kvm_cpu_accept_dm_intr(vcpu);
- enum exit_fastpath_completion exit_fastpath = EXIT_FASTPATH_NONE;
+ fastpath_t exit_fastpath;
bool req_immediate_exit = false;
if (kvm_request_pending(vcpu)) {
if (kvm_check_request(KVM_REQ_GET_VMCS12_PAGES, vcpu)) {
- if (unlikely(!kvm_x86_ops.get_vmcs12_pages(vcpu))) {
+ if (unlikely(!kvm_x86_ops.nested_ops->get_vmcs12_pages(vcpu))) {
r = 0;
goto out;
}
@@ -8213,8 +8350,17 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu)
kvm_mmu_sync_roots(vcpu);
if (kvm_check_request(KVM_REQ_LOAD_MMU_PGD, vcpu))
kvm_mmu_load_pgd(vcpu);
- if (kvm_check_request(KVM_REQ_TLB_FLUSH, vcpu))
- kvm_vcpu_flush_tlb(vcpu, true);
+ if (kvm_check_request(KVM_REQ_TLB_FLUSH, vcpu)) {
+ kvm_vcpu_flush_tlb_all(vcpu);
+
+ /* Flushing all ASIDs flushes the current ASID... */
+ kvm_clear_request(KVM_REQ_TLB_FLUSH_CURRENT, vcpu);
+ }
+ if (kvm_check_request(KVM_REQ_TLB_FLUSH_CURRENT, vcpu))
+ kvm_vcpu_flush_tlb_current(vcpu);
+ if (kvm_check_request(KVM_REQ_HV_TLB_FLUSH, vcpu))
+ kvm_vcpu_flush_tlb_guest(vcpu);
+
if (kvm_check_request(KVM_REQ_REPORT_TPR_ACCESS, vcpu)) {
vcpu->run->exit_reason = KVM_EXIT_TPR_ACCESS;
r = 0;
@@ -8287,6 +8433,8 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu)
kvm_hv_process_stimers(vcpu);
if (kvm_check_request(KVM_REQ_APICV_UPDATE, vcpu))
kvm_vcpu_update_apicv(vcpu);
+ if (kvm_check_request(KVM_REQ_APF_READY, vcpu))
+ kvm_check_async_pf_completion(vcpu);
}
if (kvm_check_request(KVM_REQ_EVENT, vcpu) || req_int_win) {
@@ -8297,32 +8445,9 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu)
goto out;
}
- if (inject_pending_event(vcpu) != 0)
- req_immediate_exit = true;
- else {
- /* Enable SMI/NMI/IRQ window open exits if needed.
- *
- * SMIs have three cases:
- * 1) They can be nested, and then there is nothing to
- * do here because RSM will cause a vmexit anyway.
- * 2) There is an ISA-specific reason why SMI cannot be
- * injected, and the moment when this changes can be
- * intercepted.
- * 3) Or the SMI can be pending because
- * inject_pending_event has completed the injection
- * of an IRQ or NMI from the previous vmexit, and
- * then we request an immediate exit to inject the
- * SMI.
- */
- if (vcpu->arch.smi_pending && !is_smm(vcpu))
- if (!kvm_x86_ops.enable_smi_window(vcpu))
- req_immediate_exit = true;
- if (vcpu->arch.nmi_pending)
- kvm_x86_ops.enable_nmi_window(vcpu);
- if (kvm_cpu_has_injectable_intr(vcpu) || req_int_win)
- kvm_x86_ops.enable_irq_window(vcpu);
- WARN_ON(vcpu->arch.exception.pending);
- }
+ inject_pending_event(vcpu, &req_immediate_exit);
+ if (req_int_win)
+ kvm_x86_ops.enable_irq_window(vcpu);
if (kvm_lapic_enabled(vcpu)) {
update_cr8_intercept(vcpu);
@@ -8370,8 +8495,7 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu)
if (kvm_lapic_enabled(vcpu) && vcpu->arch.apicv_active)
kvm_x86_ops.sync_pir_to_irr(vcpu);
- if (vcpu->mode == EXITING_GUEST_MODE || kvm_request_pending(vcpu)
- || need_resched() || signal_pending(current)) {
+ if (kvm_vcpu_exit_request(vcpu)) {
vcpu->mode = OUTSIDE_GUEST_MODE;
smp_wmb();
local_irq_enable();
@@ -8403,7 +8527,7 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu)
vcpu->arch.switch_db_regs &= ~KVM_DEBUGREG_RELOAD;
}
- kvm_x86_ops.run(vcpu);
+ exit_fastpath = kvm_x86_ops.run(vcpu);
/*
* Do this here before restoring debug registers on the host. And
@@ -8415,7 +8539,6 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu)
WARN_ON(vcpu->guest_debug & KVM_GUESTDBG_USE_HW_BP);
kvm_x86_ops.sync_dirty_debug_regs(vcpu);
kvm_update_dr0123(vcpu);
- kvm_update_dr6(vcpu);
kvm_update_dr7(vcpu);
vcpu->arch.switch_db_regs &= ~KVM_DEBUGREG_RELOAD;
}
@@ -8435,7 +8558,7 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu)
vcpu->mode = OUTSIDE_GUEST_MODE;
smp_wmb();
- kvm_x86_ops.handle_exit_irqoff(vcpu, &exit_fastpath);
+ kvm_x86_ops.handle_exit_irqoff(vcpu);
/*
* Consume any pending interrupts, including the possible source of
@@ -8482,6 +8605,8 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu)
return r;
cancel_injection:
+ if (req_immediate_exit)
+ kvm_make_request(KVM_REQ_EVENT, vcpu);
kvm_x86_ops.cancel_injection(vcpu);
if (unlikely(vcpu->arch.apic_attention))
kvm_lapic_sync_from_vapic(vcpu);
@@ -8524,8 +8649,8 @@ static inline int vcpu_block(struct kvm *kvm, struct kvm_vcpu *vcpu)
static inline bool kvm_vcpu_running(struct kvm_vcpu *vcpu)
{
- if (is_guest_mode(vcpu) && kvm_x86_ops.check_nested_events)
- kvm_x86_ops.check_nested_events(vcpu);
+ if (is_guest_mode(vcpu))
+ kvm_x86_ops.nested_ops->check_events(vcpu);
return (vcpu->arch.mp_state == KVM_MP_STATE_RUNNABLE &&
!vcpu->arch.apf.halted);
@@ -8561,8 +8686,6 @@ static int vcpu_run(struct kvm_vcpu *vcpu)
break;
}
- kvm_check_async_pf_completion(vcpu);
-
if (signal_pending(current)) {
r = -EINTR;
vcpu->run->exit_reason = KVM_EXIT_INTR;
@@ -8707,8 +8830,9 @@ static void kvm_put_guest_fpu(struct kvm_vcpu *vcpu)
trace_kvm_fpu(0);
}
-int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
+int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu)
{
+ struct kvm_run *kvm_run = vcpu->run;
int r;
vcpu_load(vcpu);
@@ -8726,18 +8850,18 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
r = -EAGAIN;
if (signal_pending(current)) {
r = -EINTR;
- vcpu->run->exit_reason = KVM_EXIT_INTR;
+ kvm_run->exit_reason = KVM_EXIT_INTR;
++vcpu->stat.signal_exits;
}
goto out;
}
- if (vcpu->run->kvm_valid_regs & ~KVM_SYNC_X86_VALID_FIELDS) {
+ if (kvm_run->kvm_valid_regs & ~KVM_SYNC_X86_VALID_FIELDS) {
r = -EINVAL;
goto out;
}
- if (vcpu->run->kvm_dirty_regs) {
+ if (kvm_run->kvm_dirty_regs) {
r = sync_regs(vcpu);
if (r != 0)
goto out;
@@ -8767,7 +8891,7 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
out:
kvm_put_guest_fpu(vcpu);
- if (vcpu->run->kvm_valid_regs)
+ if (kvm_run->kvm_valid_regs)
store_regs(vcpu);
post_kvm_run_save(vcpu);
kvm_sigset_deactivate(vcpu);
@@ -9359,9 +9483,8 @@ int kvm_arch_vcpu_create(struct kvm_vcpu *vcpu)
}
fx_init(vcpu);
- vcpu->arch.guest_xstate_size = XSAVE_HDR_SIZE + XSAVE_HDR_OFFSET;
-
vcpu->arch.maxphyaddr = cpuid_query_maxphyaddr(vcpu);
+ vcpu->arch.tdp_level = kvm_x86_ops.get_tdp_level(vcpu);
vcpu->arch.pat = MSR_IA32_CR_PAT_DEFAULT;
@@ -9476,14 +9599,14 @@ void kvm_vcpu_reset(struct kvm_vcpu *vcpu, bool init_event)
memset(vcpu->arch.db, 0, sizeof(vcpu->arch.db));
kvm_update_dr0123(vcpu);
vcpu->arch.dr6 = DR6_INIT;
- kvm_update_dr6(vcpu);
vcpu->arch.dr7 = DR7_FIXED_1;
kvm_update_dr7(vcpu);
vcpu->arch.cr2 = 0;
kvm_make_request(KVM_REQ_EVENT, vcpu);
- vcpu->arch.apf.msr_val = 0;
+ vcpu->arch.apf.msr_en_val = 0;
+ vcpu->arch.apf.msr_int_val = 0;
vcpu->arch.st.msr_val = 0;
kvmclock_reset(vcpu);
@@ -9658,7 +9781,9 @@ int kvm_arch_hardware_setup(void *opaque)
if (!kvm_cpu_cap_has(X86_FEATURE_XSAVES))
supported_xss = 0;
- cr4_reserved_bits = kvm_host_cr4_reserved_bits(&boot_cpu_data);
+#define __kvm_cpu_cap_has(UNUSED_, f) kvm_cpu_cap_has(f)
+ cr4_reserved_bits = __cr4_reserved_bits(__kvm_cpu_cap_has, UNUSED_);
+#undef __kvm_cpu_cap_has
if (kvm_has_tsc_control) {
/*
@@ -9690,7 +9815,8 @@ int kvm_arch_check_processor_compat(void *opaque)
WARN_ON(!irqs_disabled());
- if (kvm_host_cr4_reserved_bits(c) != cr4_reserved_bits)
+ if (__cr4_reserved_bits(cpu_has, c) !=
+ __cr4_reserved_bits(cpu_has, &boot_cpu_data))
return -EIO;
return ops->check_processor_compatibility();
@@ -10018,7 +10144,7 @@ static void kvm_mmu_slot_apply_flags(struct kvm *kvm,
{
/* Still write protect RO slot */
if (new->flags & KVM_MEM_READONLY) {
- kvm_mmu_slot_remove_write_access(kvm, new, PT_PAGE_TABLE_LEVEL);
+ kvm_mmu_slot_remove_write_access(kvm, new, PG_LEVEL_4K);
return;
}
@@ -10058,7 +10184,7 @@ static void kvm_mmu_slot_apply_flags(struct kvm *kvm,
} else {
int level =
kvm_dirty_log_manual_protect_and_init_set(kvm) ?
- PT_DIRECTORY_LEVEL : PT_PAGE_TABLE_LEVEL;
+ PG_LEVEL_2M : PG_LEVEL_4K;
/*
* If we're with initial-all-set, we don't need
@@ -10160,11 +10286,12 @@ static inline bool kvm_vcpu_has_events(struct kvm_vcpu *vcpu)
if (kvm_test_request(KVM_REQ_NMI, vcpu) ||
(vcpu->arch.nmi_pending &&
- kvm_x86_ops.nmi_allowed(vcpu)))
+ kvm_x86_ops.nmi_allowed(vcpu, false)))
return true;
if (kvm_test_request(KVM_REQ_SMI, vcpu) ||
- (vcpu->arch.smi_pending && !is_smm(vcpu)))
+ (vcpu->arch.smi_pending &&
+ kvm_x86_ops.smi_allowed(vcpu, false)))
return true;
if (kvm_arch_interrupt_allowed(vcpu) &&
@@ -10175,6 +10302,11 @@ static inline bool kvm_vcpu_has_events(struct kvm_vcpu *vcpu)
if (kvm_hv_has_stimer_pending(vcpu))
return true;
+ if (is_guest_mode(vcpu) &&
+ kvm_x86_ops.nested_ops->hv_timer_pending &&
+ kvm_x86_ops.nested_ops->hv_timer_pending(vcpu))
+ return true;
+
return false;
}
@@ -10211,7 +10343,7 @@ int kvm_arch_vcpu_should_kick(struct kvm_vcpu *vcpu)
int kvm_arch_interrupt_allowed(struct kvm_vcpu *vcpu)
{
- return kvm_x86_ops.interrupt_allowed(vcpu);
+ return kvm_x86_ops.interrupt_allowed(vcpu, false);
}
unsigned long kvm_get_linear_rip(struct kvm_vcpu *vcpu)
@@ -10276,12 +10408,14 @@ void kvm_arch_async_page_ready(struct kvm_vcpu *vcpu, struct kvm_async_pf *work)
static inline u32 kvm_async_pf_hash_fn(gfn_t gfn)
{
+ BUILD_BUG_ON(!is_power_of_2(ASYNC_PF_PER_VCPU));
+
return hash_32(gfn & 0xffffffff, order_base_2(ASYNC_PF_PER_VCPU));
}
static inline u32 kvm_async_pf_next_probe(u32 key)
{
- return (key + 1) & (roundup_pow_of_two(ASYNC_PF_PER_VCPU) - 1);
+ return (key + 1) & (ASYNC_PF_PER_VCPU - 1);
}
static void kvm_add_async_pf_gfn(struct kvm_vcpu *vcpu, gfn_t gfn)
@@ -10299,7 +10433,7 @@ static u32 kvm_async_pf_gfn_slot(struct kvm_vcpu *vcpu, gfn_t gfn)
int i;
u32 key = kvm_async_pf_hash_fn(gfn);
- for (i = 0; i < roundup_pow_of_two(ASYNC_PF_PER_VCPU) &&
+ for (i = 0; i < ASYNC_PF_PER_VCPU &&
(vcpu->arch.apf.gfns[key] != gfn &&
vcpu->arch.apf.gfns[key] != ~0); i++)
key = kvm_async_pf_next_probe(key);
@@ -10317,6 +10451,10 @@ static void kvm_del_async_pf_gfn(struct kvm_vcpu *vcpu, gfn_t gfn)
u32 i, j, k;
i = j = kvm_async_pf_gfn_slot(vcpu, gfn);
+
+ if (WARN_ON_ONCE(vcpu->arch.apf.gfns[i] != gfn))
+ return;
+
while (true) {
vcpu->arch.apf.gfns[i] = ~0;
do {
@@ -10335,18 +10473,32 @@ static void kvm_del_async_pf_gfn(struct kvm_vcpu *vcpu, gfn_t gfn)
}
}
-static int apf_put_user(struct kvm_vcpu *vcpu, u32 val)
+static inline int apf_put_user_notpresent(struct kvm_vcpu *vcpu)
{
+ u32 reason = KVM_PV_REASON_PAGE_NOT_PRESENT;
- return kvm_write_guest_cached(vcpu->kvm, &vcpu->arch.apf.data, &val,
- sizeof(val));
+ return kvm_write_guest_cached(vcpu->kvm, &vcpu->arch.apf.data, &reason,
+ sizeof(reason));
}
-static int apf_get_user(struct kvm_vcpu *vcpu, u32 *val)
+static inline int apf_put_user_ready(struct kvm_vcpu *vcpu, u32 token)
{
+ unsigned int offset = offsetof(struct kvm_vcpu_pv_apf_data, token);
- return kvm_read_guest_cached(vcpu->kvm, &vcpu->arch.apf.data, val,
- sizeof(u32));
+ return kvm_write_guest_offset_cached(vcpu->kvm, &vcpu->arch.apf.data,
+ &token, offset, sizeof(token));
+}
+
+static inline bool apf_pageready_slot_free(struct kvm_vcpu *vcpu)
+{
+ unsigned int offset = offsetof(struct kvm_vcpu_pv_apf_data, token);
+ u32 val;
+
+ if (kvm_read_guest_offset_cached(vcpu->kvm, &vcpu->arch.apf.data,
+ &val, offset, sizeof(val)))
+ return false;
+
+ return !val;
}
static bool kvm_can_deliver_async_pf(struct kvm_vcpu *vcpu)
@@ -10354,9 +10506,8 @@ static bool kvm_can_deliver_async_pf(struct kvm_vcpu *vcpu)
if (!vcpu->arch.apf.delivery_as_pf_vmexit && is_guest_mode(vcpu))
return false;
- if (!(vcpu->arch.apf.msr_val & KVM_ASYNC_PF_ENABLED) ||
- (vcpu->arch.apf.send_user_only &&
- kvm_x86_ops.get_cpl(vcpu) == 0))
+ if (!kvm_pv_async_pf_enabled(vcpu) ||
+ (vcpu->arch.apf.send_user_only && kvm_x86_ops.get_cpl(vcpu) == 0))
return false;
return true;
@@ -10376,7 +10527,7 @@ bool kvm_can_do_async_pf(struct kvm_vcpu *vcpu)
* If interrupts are off we cannot even use an artificial
* halt state.
*/
- return kvm_x86_ops.interrupt_allowed(vcpu);
+ return kvm_arch_interrupt_allowed(vcpu);
}
void kvm_arch_async_page_not_present(struct kvm_vcpu *vcpu,
@@ -10388,7 +10539,7 @@ void kvm_arch_async_page_not_present(struct kvm_vcpu *vcpu,
kvm_add_async_pf_gfn(vcpu, work->arch.gfn);
if (kvm_can_deliver_async_pf(vcpu) &&
- !apf_put_user(vcpu, KVM_PV_REASON_PAGE_NOT_PRESENT)) {
+ !apf_put_user_notpresent(vcpu)) {
fault.vector = PF_VECTOR;
fault.error_code_valid = true;
fault.error_code = 0;
@@ -10412,8 +10563,10 @@ void kvm_arch_async_page_not_present(struct kvm_vcpu *vcpu,
void kvm_arch_async_page_present(struct kvm_vcpu *vcpu,
struct kvm_async_pf *work)
{
- struct x86_exception fault;
- u32 val;
+ struct kvm_lapic_irq irq = {
+ .delivery_mode = APIC_DM_FIXED,
+ .vector = vcpu->arch.apf.vec
+ };
if (work->wakeup_all)
work->arch.token = ~0; /* broadcast wakeup */
@@ -10421,39 +10574,29 @@ void kvm_arch_async_page_present(struct kvm_vcpu *vcpu,
kvm_del_async_pf_gfn(vcpu, work->arch.gfn);
trace_kvm_async_pf_ready(work->arch.token, work->cr2_or_gpa);
- if (vcpu->arch.apf.msr_val & KVM_ASYNC_PF_ENABLED &&
- !apf_get_user(vcpu, &val)) {
- if (val == KVM_PV_REASON_PAGE_NOT_PRESENT &&
- vcpu->arch.exception.pending &&
- vcpu->arch.exception.nr == PF_VECTOR &&
- !apf_put_user(vcpu, 0)) {
- vcpu->arch.exception.injected = false;
- vcpu->arch.exception.pending = false;
- vcpu->arch.exception.nr = 0;
- vcpu->arch.exception.has_error_code = false;
- vcpu->arch.exception.error_code = 0;
- vcpu->arch.exception.has_payload = false;
- vcpu->arch.exception.payload = 0;
- } else if (!apf_put_user(vcpu, KVM_PV_REASON_PAGE_READY)) {
- fault.vector = PF_VECTOR;
- fault.error_code_valid = true;
- fault.error_code = 0;
- fault.nested_page_fault = false;
- fault.address = work->arch.token;
- fault.async_page_fault = true;
- kvm_inject_page_fault(vcpu, &fault);
- }
+ if (kvm_pv_async_pf_enabled(vcpu) &&
+ !apf_put_user_ready(vcpu, work->arch.token)) {
+ vcpu->arch.apf.pageready_pending = true;
+ kvm_apic_set_irq(vcpu, &irq, NULL);
}
+
vcpu->arch.apf.halted = false;
vcpu->arch.mp_state = KVM_MP_STATE_RUNNABLE;
}
-bool kvm_arch_can_inject_async_page_present(struct kvm_vcpu *vcpu)
+void kvm_arch_async_page_present_queued(struct kvm_vcpu *vcpu)
+{
+ kvm_make_request(KVM_REQ_APF_READY, vcpu);
+ if (!vcpu->arch.apf.pageready_pending)
+ kvm_vcpu_kick(vcpu);
+}
+
+bool kvm_arch_can_dequeue_async_page_present(struct kvm_vcpu *vcpu)
{
- if (!(vcpu->arch.apf.msr_val & KVM_ASYNC_PF_ENABLED))
+ if (!kvm_pv_async_pf_enabled(vcpu))
return true;
else
- return kvm_can_do_async_pf(vcpu);
+ return apf_pageready_slot_free(vcpu);
}
void kvm_arch_start_assignment(struct kvm *kvm)