diff options
Diffstat (limited to 'arch/x86/kvm/lapic.c')
-rw-r--r-- | arch/x86/kvm/lapic.c | 917 |
1 files changed, 601 insertions, 316 deletions
diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c index e3099c642fec..d7639d126e6c 100644 --- a/arch/x86/kvm/lapic.c +++ b/arch/x86/kvm/lapic.c @@ -27,6 +27,7 @@ #include <linux/math64.h> #include <linux/slab.h> #include <asm/processor.h> +#include <asm/mce.h> #include <asm/msr.h> #include <asm/page.h> #include <asm/current.h> @@ -36,6 +37,7 @@ #include <linux/jump_label.h> #include "kvm_cache_regs.h" #include "irq.h" +#include "ioapic.h" #include "trace.h" #include "x86.h" #include "cpuid.h" @@ -53,15 +55,12 @@ #define PRIo64 "o" /* 14 is the version for Xeon and Pentium 8.4.8*/ -#define APIC_VERSION (0x14UL | ((KVM_APIC_LVT_NUM - 1) << 16)) +#define APIC_VERSION 0x14UL #define LAPIC_MMIO_LENGTH (1 << 12) /* followed define is not in apicdef.h */ #define MAX_APIC_VECTOR 256 #define APIC_VECTORS_PER_REG 32 -#define APIC_BROADCAST 0xFF -#define X2APIC_BROADCAST 0xFFFFFFFFul - static bool lapic_timer_advance_dynamic __read_mostly; #define LAPIC_TIMER_ADVANCE_ADJUST_MIN 100 /* clock cycles */ #define LAPIC_TIMER_ADVANCE_ADJUST_MAX 10000 /* clock cycles */ @@ -69,6 +68,41 @@ static bool lapic_timer_advance_dynamic __read_mostly; #define LAPIC_TIMER_ADVANCE_NS_MAX 5000 /* step-by-step approximation to mitigate fluctuation */ #define LAPIC_TIMER_ADVANCE_ADJUST_STEP 8 +static int kvm_lapic_msr_read(struct kvm_lapic *apic, u32 reg, u64 *data); +static int kvm_lapic_msr_write(struct kvm_lapic *apic, u32 reg, u64 data); + +static inline void __kvm_lapic_set_reg(char *regs, int reg_off, u32 val) +{ + *((u32 *) (regs + reg_off)) = val; +} + +static inline void kvm_lapic_set_reg(struct kvm_lapic *apic, int reg_off, u32 val) +{ + __kvm_lapic_set_reg(apic->regs, reg_off, val); +} + +static __always_inline u64 __kvm_lapic_get_reg64(char *regs, int reg) +{ + BUILD_BUG_ON(reg != APIC_ICR); + return *((u64 *) (regs + reg)); +} + +static __always_inline u64 kvm_lapic_get_reg64(struct kvm_lapic *apic, int reg) +{ + return __kvm_lapic_get_reg64(apic->regs, reg); +} + +static __always_inline void __kvm_lapic_set_reg64(char *regs, int reg, u64 val) +{ + BUILD_BUG_ON(reg != APIC_ICR); + *((u64 *) (regs + reg)) = val; +} + +static __always_inline void kvm_lapic_set_reg64(struct kvm_lapic *apic, + int reg, u64 val) +{ + __kvm_lapic_set_reg64(apic->regs, reg, val); +} static inline int apic_test_vector(int vec, void *bitmap) { @@ -93,8 +127,8 @@ static inline int __apic_test_and_clear_vector(int vec, void *bitmap) return __test_and_clear_bit(VEC_POS(vec), (bitmap) + REG_POS(vec)); } -struct static_key_deferred apic_hw_disabled __read_mostly; -struct static_key_deferred apic_sw_disabled __read_mostly; +__read_mostly DEFINE_STATIC_KEY_DEFERRED_FALSE(apic_hw_disabled, HZ); +__read_mostly DEFINE_STATIC_KEY_DEFERRED_FALSE(apic_sw_disabled, HZ); static inline int apic_enabled(struct kvm_lapic *apic) { @@ -113,11 +147,19 @@ static inline u32 kvm_x2apic_id(struct kvm_lapic *apic) return apic->vcpu->vcpu_id; } -bool kvm_can_post_timer_interrupt(struct kvm_vcpu *vcpu) +static bool kvm_can_post_timer_interrupt(struct kvm_vcpu *vcpu) { - return pi_inject_timer && kvm_vcpu_apicv_active(vcpu); + return pi_inject_timer && kvm_vcpu_apicv_active(vcpu) && + (kvm_mwait_in_guest(vcpu->kvm) || kvm_hlt_in_guest(vcpu->kvm)); } -EXPORT_SYMBOL_GPL(kvm_can_post_timer_interrupt); + +bool kvm_can_use_hv_timer(struct kvm_vcpu *vcpu) +{ + return kvm_x86_ops.set_hv_timer + && !(kvm_mwait_in_guest(vcpu->kvm) || + kvm_can_post_timer_interrupt(vcpu)); +} +EXPORT_SYMBOL_GPL(kvm_can_use_hv_timer); static bool kvm_use_posted_timer_interrupt(struct kvm_vcpu *vcpu) { @@ -164,14 +206,43 @@ static void kvm_apic_map_free(struct rcu_head *rcu) kvfree(map); } -static void recalculate_apic_map(struct kvm *kvm) +/* + * CLEAN -> DIRTY and UPDATE_IN_PROGRESS -> DIRTY changes happen without a lock. + * + * DIRTY -> UPDATE_IN_PROGRESS and UPDATE_IN_PROGRESS -> CLEAN happen with + * apic_map_lock_held. + */ +enum { + CLEAN, + UPDATE_IN_PROGRESS, + DIRTY +}; + +void kvm_recalculate_apic_map(struct kvm *kvm) { struct kvm_apic_map *new, *old = NULL; struct kvm_vcpu *vcpu; - int i; + unsigned long i; u32 max_id = 255; /* enough space for any xAPIC ID */ + /* Read kvm->arch.apic_map_dirty before kvm->arch.apic_map. */ + if (atomic_read_acquire(&kvm->arch.apic_map_dirty) == CLEAN) + return; + + WARN_ONCE(!irqchip_in_kernel(kvm), + "Dirty APIC map without an in-kernel local APIC"); + mutex_lock(&kvm->arch.apic_map_lock); + /* + * Read kvm->arch.apic_map_dirty before kvm->arch.apic_map + * (if clean) or the APIC registers (if dirty). + */ + if (atomic_cmpxchg_acquire(&kvm->arch.apic_map_dirty, + DIRTY, UPDATE_IN_PROGRESS) == CLEAN) { + /* Someone else has updated the map. */ + mutex_unlock(&kvm->arch.apic_map_lock); + return; + } kvm_for_each_vcpu(i, vcpu, kvm) if (kvm_apic_present(vcpu)) @@ -236,6 +307,12 @@ out: old = rcu_dereference_protected(kvm->arch.apic_map, lockdep_is_held(&kvm->arch.apic_map_lock)); rcu_assign_pointer(kvm->arch.apic_map, new); + /* + * Write kvm->arch.apic_map before clearing apic->apic_map_dirty. + * If another update has come in, leave it DIRTY. + */ + atomic_cmpxchg_release(&kvm->arch.apic_map_dirty, + UPDATE_IN_PROGRESS, CLEAN); mutex_unlock(&kvm->arch.apic_map_lock); if (old) @@ -253,24 +330,34 @@ static inline void apic_set_spiv(struct kvm_lapic *apic, u32 val) if (enabled != apic->sw_enabled) { apic->sw_enabled = enabled; if (enabled) - static_key_slow_dec_deferred(&apic_sw_disabled); + static_branch_slow_dec_deferred(&apic_sw_disabled); else - static_key_slow_inc(&apic_sw_disabled.key); + static_branch_inc(&apic_sw_disabled.key); - recalculate_apic_map(apic->vcpu->kvm); + atomic_set_release(&apic->vcpu->kvm->arch.apic_map_dirty, DIRTY); } + + /* Check if there are APF page ready requests pending */ + if (enabled) + kvm_make_request(KVM_REQ_APF_READY, apic->vcpu); } static inline void kvm_apic_set_xapic_id(struct kvm_lapic *apic, u8 id) { kvm_lapic_set_reg(apic, APIC_ID, id << 24); - recalculate_apic_map(apic->vcpu->kvm); + atomic_set_release(&apic->vcpu->kvm->arch.apic_map_dirty, DIRTY); } static inline void kvm_apic_set_ldr(struct kvm_lapic *apic, u32 id) { kvm_lapic_set_reg(apic, APIC_LDR, id); - recalculate_apic_map(apic->vcpu->kvm); + atomic_set_release(&apic->vcpu->kvm->arch.apic_map_dirty, DIRTY); +} + +static inline void kvm_apic_set_dfr(struct kvm_lapic *apic, u32 val) +{ + kvm_lapic_set_reg(apic, APIC_DFR, val); + atomic_set_release(&apic->vcpu->kvm->arch.apic_map_dirty, DIRTY); } static inline u32 kvm_apic_calc_x2apic_ldr(u32 id) @@ -286,7 +373,7 @@ static inline void kvm_apic_set_x2apic_id(struct kvm_lapic *apic, u32 id) kvm_lapic_set_reg(apic, APIC_ID, id); kvm_lapic_set_reg(apic, APIC_LDR, ldr); - recalculate_apic_map(apic->vcpu->kvm); + atomic_set_release(&apic->vcpu->kvm->arch.apic_map_dirty, DIRTY); } static inline int apic_lvt_enabled(struct kvm_lapic *apic, int lvt_type) @@ -294,11 +381,6 @@ static inline int apic_lvt_enabled(struct kvm_lapic *apic, int lvt_type) return !(kvm_lapic_get_reg(apic, lvt_type) & APIC_LVT_MASKED); } -static inline int apic_lvt_vector(struct kvm_lapic *apic, int lvt_type) -{ - return kvm_lapic_get_reg(apic, lvt_type) & APIC_VECTOR_MASK; -} - static inline int apic_lvtt_oneshot(struct kvm_lapic *apic) { return apic->lapic_timer.timer_mode == APIC_LVT_TIMER_ONESHOT; @@ -319,15 +401,26 @@ static inline int apic_lvt_nmi_mode(u32 lvt_val) return (lvt_val & (APIC_MODE_MASK | APIC_LVT_MASKED)) == APIC_DM_NMI; } +static inline bool kvm_lapic_lvt_supported(struct kvm_lapic *apic, int lvt_index) +{ + return apic->nr_lvt_entries > lvt_index; +} + +static inline int kvm_apic_calc_nr_lvt_entries(struct kvm_vcpu *vcpu) +{ + return KVM_APIC_MAX_NR_LVT_ENTRIES - !(vcpu->arch.mcg_cap & MCG_CMCI_P); +} + void kvm_apic_set_version(struct kvm_vcpu *vcpu) { struct kvm_lapic *apic = vcpu->arch.apic; - struct kvm_cpuid_entry2 *feat; - u32 v = APIC_VERSION; + u32 v = 0; if (!lapic_in_kernel(vcpu)) return; + v = APIC_VERSION | ((apic->nr_lvt_entries - 1) << 16); + /* * KVM emulates 82093AA datasheet (with in-kernel IOAPIC implementation) * which doesn't have EOI register; Some buggy OSes (e.g. Windows with @@ -335,19 +428,39 @@ void kvm_apic_set_version(struct kvm_vcpu *vcpu) * version first and level-triggered interrupts never get EOIed in * IOAPIC. */ - feat = kvm_find_cpuid_entry(apic->vcpu, 0x1, 0); - if (feat && (feat->ecx & (1 << (X86_FEATURE_X2APIC & 31))) && + if (guest_cpuid_has(vcpu, X86_FEATURE_X2APIC) && !ioapic_in_kernel(vcpu->kvm)) v |= APIC_LVR_DIRECTED_EOI; kvm_lapic_set_reg(apic, APIC_LVR, v); } -static const unsigned int apic_lvt_mask[KVM_APIC_LVT_NUM] = { - LVT_MASK , /* part LVTT mask, timer mode mask added at runtime */ - LVT_MASK | APIC_MODE_MASK, /* LVTTHMR */ - LVT_MASK | APIC_MODE_MASK, /* LVTPC */ - LINT_MASK, LINT_MASK, /* LVT0-1 */ - LVT_MASK /* LVTERR */ +void kvm_apic_after_set_mcg_cap(struct kvm_vcpu *vcpu) +{ + int nr_lvt_entries = kvm_apic_calc_nr_lvt_entries(vcpu); + struct kvm_lapic *apic = vcpu->arch.apic; + int i; + + if (!lapic_in_kernel(vcpu) || nr_lvt_entries == apic->nr_lvt_entries) + return; + + /* Initialize/mask any "new" LVT entries. */ + for (i = apic->nr_lvt_entries; i < nr_lvt_entries; i++) + kvm_lapic_set_reg(apic, APIC_LVTx(i), APIC_LVT_MASKED); + + apic->nr_lvt_entries = nr_lvt_entries; + + /* The number of LVT entries is reflected in the version register. */ + kvm_apic_set_version(vcpu); +} + +static const unsigned int apic_lvt_mask[KVM_APIC_MAX_NR_LVT_ENTRIES] = { + [LVT_TIMER] = LVT_MASK, /* timer mode mask added at runtime */ + [LVT_THERMAL_MONITOR] = LVT_MASK | APIC_MODE_MASK, + [LVT_PERFORMANCE_COUNTER] = LVT_MASK | APIC_MODE_MASK, + [LVT_LINT0] = LINT_MASK, + [LVT_LINT1] = LINT_MASK, + [LVT_ERROR] = LVT_MASK, + [LVT_CMCI] = LVT_MASK | APIC_MODE_MASK }; static int find_highest_vector(void *bitmap) @@ -441,15 +554,11 @@ static inline int apic_find_highest_irr(struct kvm_lapic *apic) static inline void apic_clear_irr(int vec, struct kvm_lapic *apic) { - struct kvm_vcpu *vcpu; - - vcpu = apic->vcpu; - - if (unlikely(vcpu->arch.apicv_active)) { + if (unlikely(apic->apicv_active)) { /* need to update RVI */ kvm_lapic_clear_vector(vec, apic->regs + APIC_IRR); - kvm_x86_ops->hwapic_irr_update(vcpu, - apic_find_highest_irr(apic)); + static_call_cond(kvm_x86_hwapic_irr_update)(apic->vcpu, + apic_find_highest_irr(apic)); } else { apic->irr_pending = false; kvm_lapic_clear_vector(vec, apic->regs + APIC_IRR); @@ -458,22 +567,24 @@ static inline void apic_clear_irr(int vec, struct kvm_lapic *apic) } } -static inline void apic_set_isr(int vec, struct kvm_lapic *apic) +void kvm_apic_clear_irr(struct kvm_vcpu *vcpu, int vec) { - struct kvm_vcpu *vcpu; + apic_clear_irr(vec, vcpu->arch.apic); +} +EXPORT_SYMBOL_GPL(kvm_apic_clear_irr); +static inline void apic_set_isr(int vec, struct kvm_lapic *apic) +{ if (__apic_test_and_set_vector(vec, apic->regs + APIC_ISR)) return; - vcpu = apic->vcpu; - /* * With APIC virtualization enabled, all caching is disabled * because the processor can modify ISR under the hood. Instead * just set SVI. */ - if (unlikely(vcpu->arch.apicv_active)) - kvm_x86_ops->hwapic_isr_update(vcpu, vec); + if (unlikely(apic->apicv_active)) + static_call_cond(kvm_x86_hwapic_isr_update)(vec); else { ++apic->isr_count; BUG_ON(apic->isr_count > MAX_APIC_VECTOR); @@ -507,12 +618,9 @@ static inline int apic_find_highest_isr(struct kvm_lapic *apic) static inline void apic_clear_isr(int vec, struct kvm_lapic *apic) { - struct kvm_vcpu *vcpu; if (!__apic_test_and_clear_vector(vec, apic->regs + APIC_ISR)) return; - vcpu = apic->vcpu; - /* * We do get here for APIC virtualization enabled if the guest * uses the Hyper-V APIC enlightenment. In this case we may need @@ -520,9 +628,8 @@ static inline void apic_clear_isr(int vec, struct kvm_lapic *apic) * on the other hand isr_count and highest_isr_cache are unused * and must be left alone. */ - if (unlikely(vcpu->arch.apicv_active)) - kvm_x86_ops->hwapic_isr_update(vcpu, - apic_find_highest_isr(apic)); + if (unlikely(apic->apicv_active)) + static_call_cond(kvm_x86_hwapic_isr_update)(apic_find_highest_isr(apic)); else { --apic->isr_count; BUG_ON(apic->isr_count < 0); @@ -624,42 +731,41 @@ static inline bool pv_eoi_enabled(struct kvm_vcpu *vcpu) return vcpu->arch.pv_eoi.msr_val & KVM_MSR_ENABLED; } -static bool pv_eoi_get_pending(struct kvm_vcpu *vcpu) -{ - u8 val; - if (pv_eoi_get_user(vcpu, &val) < 0) { - printk(KERN_WARNING "Can't read EOI MSR value: 0x%llx\n", - (unsigned long long)vcpu->arch.pv_eoi.msr_val); - return false; - } - return val & 0x1; -} - static void pv_eoi_set_pending(struct kvm_vcpu *vcpu) { - if (pv_eoi_put_user(vcpu, KVM_PV_EOI_ENABLED) < 0) { - printk(KERN_WARNING "Can't set EOI MSR value: 0x%llx\n", - (unsigned long long)vcpu->arch.pv_eoi.msr_val); + if (pv_eoi_put_user(vcpu, KVM_PV_EOI_ENABLED) < 0) return; - } + __set_bit(KVM_APIC_PV_EOI_PENDING, &vcpu->arch.apic_attention); } -static void pv_eoi_clr_pending(struct kvm_vcpu *vcpu) +static bool pv_eoi_test_and_clr_pending(struct kvm_vcpu *vcpu) { - if (pv_eoi_put_user(vcpu, KVM_PV_EOI_DISABLED) < 0) { - printk(KERN_WARNING "Can't clear EOI MSR value: 0x%llx\n", - (unsigned long long)vcpu->arch.pv_eoi.msr_val); - return; - } + u8 val; + + if (pv_eoi_get_user(vcpu, &val) < 0) + return false; + + val &= KVM_PV_EOI_ENABLED; + + if (val && pv_eoi_put_user(vcpu, KVM_PV_EOI_DISABLED) < 0) + return false; + + /* + * Clear pending bit in any case: it will be set again on vmentry. + * While this might not be ideal from performance point of view, + * this makes sure pv eoi is only enabled when we know it's safe. + */ __clear_bit(KVM_APIC_PV_EOI_PENDING, &vcpu->arch.apic_attention); + + return val; } static int apic_has_interrupt_for_ppr(struct kvm_lapic *apic, u32 ppr) { int highest_irr; - if (apic->vcpu->arch.apicv_active) - highest_irr = kvm_x86_ops->sync_pir_to_irr(apic->vcpu); + if (kvm_x86_ops.sync_pir_to_irr) + highest_irr = static_call(kvm_x86_sync_pir_to_irr)(apic->vcpu); else highest_irr = apic_find_highest_irr(apic); if (highest_irr == -1 || (highest_irr & 0xF0) <= ppr) @@ -721,17 +827,17 @@ static bool kvm_apic_match_physical_addr(struct kvm_lapic *apic, u32 mda) if (kvm_apic_broadcast(apic, mda)) return true; - if (apic_x2apic_mode(apic)) - return mda == kvm_x2apic_id(apic); - /* - * Hotplug hack: Make LAPIC in xAPIC mode also accept interrupts as if - * it were in x2APIC mode. Hotplugged VCPUs start in xAPIC mode and - * this allows unique addressing of VCPUs with APIC ID over 0xff. - * The 0xff condition is needed because writeable xAPIC ID. + * Hotplug hack: Accept interrupts for vCPUs in xAPIC mode as if they + * were in x2APIC mode if the target APIC ID can't be encoded as an + * xAPIC ID. This allows unique addressing of hotplugged vCPUs (which + * start in xAPIC mode) with an APIC ID that is unaddressable in xAPIC + * mode. Match the x2APIC ID if and only if the target APIC ID can't + * be encoded in xAPIC to avoid spurious matches against a vCPU that + * changed its (addressable) xAPIC ID (which is writable). */ - if (kvm_x2apic_id(apic) > 0xff && mda == kvm_x2apic_id(apic)) - return true; + if (apic_x2apic_mode(apic) || mda > 0xff) + return mda == kvm_x2apic_id(apic); return mda == kvm_xapic_id(apic); } @@ -944,6 +1050,10 @@ bool kvm_irq_delivery_to_apic_fast(struct kvm *kvm, struct kvm_lapic *src, *r = -1; if (irq->shorthand == APIC_DEST_SELF) { + if (KVM_BUG_ON(!src, kvm)) { + *r = 0; + return true; + } *r = kvm_apic_set_irq(src->vcpu, irq, dest_map); return true; } @@ -1023,7 +1133,7 @@ static int __apic_accept_irq(struct kvm_lapic *apic, int delivery_mode, switch (delivery_mode) { case APIC_DM_LOWEST: vcpu->arch.apic_arb_prio++; - /* fall through */ + fallthrough; case APIC_DM_FIXED: if (unlikely(trig_mode && !level)) break; @@ -1048,11 +1158,8 @@ static int __apic_accept_irq(struct kvm_lapic *apic, int delivery_mode, apic->regs + APIC_TMR); } - if (kvm_x86_ops->deliver_posted_interrupt(vcpu, vector)) { - kvm_lapic_set_irr(vector, apic); - kvm_make_request(KVM_REQ_EVENT, vcpu); - kvm_vcpu_kick(vcpu); - } + static_call(kvm_x86_deliver_interrupt)(apic, delivery_mode, + trig_mode, vector); break; case APIC_DM_REMRD: @@ -1123,8 +1230,8 @@ void kvm_bitmap_or_dest_vcpus(struct kvm *kvm, struct kvm_lapic_irq *irq, struct kvm_lapic *src = NULL; struct kvm_apic_map *map; struct kvm_vcpu *vcpu; - unsigned long bitmap; - int i, vcpu_idx; + unsigned long bitmap, i; + int vcpu_idx; bool ret; rcu_read_lock(); @@ -1203,7 +1310,8 @@ static int apic_set_eoi(struct kvm_lapic *apic) apic_clear_isr(vector, apic); apic_update_ppr(apic); - if (test_bit(vector, vcpu_to_synic(apic->vcpu)->vec_bitmap)) + if (to_hv_vcpu(apic->vcpu) && + test_bit(vector, to_hv_synic(apic->vcpu)->vec_bitmap)) kvm_hv_synic_send_eoi(apic->vcpu, vector); kvm_ioapic_send_eoi(apic, vector); @@ -1226,10 +1334,13 @@ void kvm_apic_set_eoi_accelerated(struct kvm_vcpu *vcpu, int vector) } EXPORT_SYMBOL_GPL(kvm_apic_set_eoi_accelerated); -static void apic_send_ipi(struct kvm_lapic *apic, u32 icr_low, u32 icr_high) +void kvm_apic_send_ipi(struct kvm_lapic *apic, u32 icr_low, u32 icr_high) { struct kvm_lapic_irq irq; + /* KVM has no delay and should always clear the BUSY/PENDING flag. */ + WARN_ON_ONCE(icr_low & APIC_ICR_BUSY); + irq.vector = icr_low & APIC_VECTOR_MASK; irq.delivery_mode = icr_low & APIC_MODE_MASK; irq.dest_mode = icr_low & APIC_DEST_MASK; @@ -1240,12 +1351,13 @@ static void apic_send_ipi(struct kvm_lapic *apic, u32 icr_low, u32 icr_high) if (apic_x2apic_mode(apic)) irq.dest_id = icr_high; else - irq.dest_id = GET_APIC_DEST_FIELD(icr_high); + irq.dest_id = GET_XAPIC_DEST_FIELD(icr_high); trace_kvm_apic_ipi(icr_low, irq.dest_id); kvm_irq_delivery_to_apic(apic->vcpu->kvm, apic, &irq, NULL); } +EXPORT_SYMBOL_GPL(kvm_apic_send_ipi); static u32 apic_get_tmcct(struct kvm_lapic *apic) { @@ -1311,7 +1423,7 @@ static u32 __apic_read(struct kvm_lapic *apic, unsigned int offset) break; case APIC_TASKPRI: report_tpr_access(apic, false); - /* fall thru */ + fallthrough; default: val = kvm_lapic_get_reg(apic, offset); break; @@ -1329,8 +1441,8 @@ static inline struct kvm_lapic *to_lapic(struct kvm_io_device *dev) #define APIC_REGS_MASK(first, count) \ (APIC_REG_MASK(first) * ((1ull << (count)) - 1)) -int kvm_lapic_reg_read(struct kvm_lapic *apic, u32 offset, int len, - void *data) +static int kvm_lapic_reg_read(struct kvm_lapic *apic, u32 offset, int len, + void *data) { unsigned char alignment = offset & 0xf; u32 result; @@ -1348,7 +1460,6 @@ int kvm_lapic_reg_read(struct kvm_lapic *apic, u32 offset, int len, APIC_REGS_MASK(APIC_IRR, APIC_ISR_NR) | APIC_REG_MASK(APIC_ESR) | APIC_REG_MASK(APIC_ICR) | - APIC_REG_MASK(APIC_ICR2) | APIC_REG_MASK(APIC_LVTT) | APIC_REG_MASK(APIC_LVTTHMR) | APIC_REG_MASK(APIC_LVTPC) | @@ -1359,9 +1470,22 @@ int kvm_lapic_reg_read(struct kvm_lapic *apic, u32 offset, int len, APIC_REG_MASK(APIC_TMCCT) | APIC_REG_MASK(APIC_TDCR); - /* ARBPRI is not valid on x2APIC */ + if (kvm_lapic_lvt_supported(apic, LVT_CMCI)) + valid_reg_mask |= APIC_REG_MASK(APIC_LVTCMCI); + + /* + * ARBPRI and ICR2 are not valid in x2APIC mode. WARN if KVM reads ICR + * in x2APIC mode as it's an 8-byte register in x2APIC and needs to be + * manually handled by the caller. + */ if (!apic_x2apic_mode(apic)) - valid_reg_mask |= APIC_REG_MASK(APIC_ARBPRI); + valid_reg_mask |= APIC_REG_MASK(APIC_ARBPRI) | + APIC_REG_MASK(APIC_ICR2); + else + WARN_ON_ONCE(offset == APIC_ICR); + + if (alignment + len > 4) + return 1; if (offset > 0x3f0 || !(valid_reg_mask & APIC_REG_MASK(offset))) return 1; @@ -1383,7 +1507,6 @@ int kvm_lapic_reg_read(struct kvm_lapic *apic, u32 offset, int len, } return 0; } -EXPORT_SYMBOL_GPL(kvm_lapic_reg_read); static int apic_mmio_in_range(struct kvm_lapic *apic, gpa_t addr) { @@ -1445,6 +1568,18 @@ static void limit_periodic_timer_frequency(struct kvm_lapic *apic) } } +static void cancel_hv_timer(struct kvm_lapic *apic); + +static void cancel_apic_timer(struct kvm_lapic *apic) +{ + hrtimer_cancel(&apic->lapic_timer.timer); + preempt_disable(); + if (apic->lapic_timer.hv_timer_in_use) + cancel_hv_timer(apic); + preempt_enable(); + atomic_set(&apic->lapic_timer.pending, 0); +} + static void apic_update_lvtt(struct kvm_lapic *apic) { u32 timer_mode = kvm_lapic_get_reg(apic, APIC_LVTT) & @@ -1453,7 +1588,7 @@ static void apic_update_lvtt(struct kvm_lapic *apic) if (apic->lapic_timer.timer_mode != timer_mode) { if (apic_lvtt_tscdeadline(apic) != (timer_mode == APIC_LVT_TIMER_TSCDEADLINE)) { - hrtimer_cancel(&apic->lapic_timer.timer); + cancel_apic_timer(apic); kvm_lapic_set_reg(apic, APIC_TMICT, 0); apic->lapic_timer.period = 0; apic->lapic_timer.tscdeadline = 0; @@ -1477,7 +1612,7 @@ static bool lapic_timer_int_injected(struct kvm_vcpu *vcpu) int vec = reg & APIC_VECTOR_MASK; void *bitmap = apic->regs + APIC_ISR; - if (vcpu->arch.apicv_active) + if (apic->apicv_active) bitmap = apic->regs + APIC_IRR; if (apic_test_vector(vec, bitmap)) @@ -1496,7 +1631,7 @@ static inline void __wait_lapic_expire(struct kvm_vcpu *vcpu, u64 guest_cycles) * that __delay() uses delay_tsc whenever the hardware has TSC, thus * always for VMX enabled hardware. */ - if (vcpu->arch.tsc_scaling_ratio == kvm_default_tsc_scaling_ratio) { + if (vcpu->arch.tsc_scaling_ratio == kvm_caps.default_tsc_scaling_ratio) { __delay(min(guest_cycles, nsec_to_cycles(vcpu, timer_advance_ns))); } else { @@ -1540,24 +1675,32 @@ static void __kvm_wait_lapic_expire(struct kvm_vcpu *vcpu) struct kvm_lapic *apic = vcpu->arch.apic; u64 guest_tsc, tsc_deadline; - if (apic->lapic_timer.expired_tscdeadline == 0) - return; - tsc_deadline = apic->lapic_timer.expired_tscdeadline; apic->lapic_timer.expired_tscdeadline = 0; guest_tsc = kvm_read_l1_tsc(vcpu, rdtsc()); - apic->lapic_timer.advance_expire_delta = guest_tsc - tsc_deadline; + trace_kvm_wait_lapic_expire(vcpu->vcpu_id, guest_tsc - tsc_deadline); + + if (lapic_timer_advance_dynamic) { + adjust_lapic_timer_advance(vcpu, guest_tsc - tsc_deadline); + /* + * If the timer fired early, reread the TSC to account for the + * overhead of the above adjustment to avoid waiting longer + * than is necessary. + */ + if (guest_tsc < tsc_deadline) + guest_tsc = kvm_read_l1_tsc(vcpu, rdtsc()); + } if (guest_tsc < tsc_deadline) __wait_lapic_expire(vcpu, tsc_deadline - guest_tsc); - - if (lapic_timer_advance_dynamic) - adjust_lapic_timer_advance(vcpu, apic->lapic_timer.advance_expire_delta); } void kvm_wait_lapic_expire(struct kvm_vcpu *vcpu) { - if (lapic_timer_int_injected(vcpu)) + if (lapic_in_kernel(vcpu) && + vcpu->arch.apic->lapic_timer.expired_tscdeadline && + vcpu->arch.apic->lapic_timer.timer_advance_ns && + lapic_timer_int_injected(vcpu)) __kvm_wait_lapic_expire(vcpu); } EXPORT_SYMBOL_GPL(kvm_wait_lapic_expire); @@ -1575,7 +1718,7 @@ static void kvm_apic_inject_pending_timer_irqs(struct kvm_lapic *apic) } } -static void apic_timer_expired(struct kvm_lapic *apic) +static void apic_timer_expired(struct kvm_lapic *apic, bool from_timer_fn) { struct kvm_vcpu *vcpu = apic->vcpu; struct kvm_timer *ktimer = &apic->lapic_timer; @@ -1586,15 +1729,31 @@ static void apic_timer_expired(struct kvm_lapic *apic) if (apic_lvtt_tscdeadline(apic) || ktimer->hv_timer_in_use) ktimer->expired_tscdeadline = ktimer->tscdeadline; + if (!from_timer_fn && apic->apicv_active) { + WARN_ON(kvm_get_running_vcpu() != vcpu); + kvm_apic_inject_pending_timer_irqs(apic); + return; + } + if (kvm_use_posted_timer_interrupt(apic->vcpu)) { - if (apic->lapic_timer.timer_advance_ns) + /* + * Ensure the guest's timer has truly expired before posting an + * interrupt. Open code the relevant checks to avoid querying + * lapic_timer_int_injected(), which will be false since the + * interrupt isn't yet injected. Waiting until after injecting + * is not an option since that won't help a posted interrupt. + */ + if (vcpu->arch.apic->lapic_timer.expired_tscdeadline && + vcpu->arch.apic->lapic_timer.timer_advance_ns) __kvm_wait_lapic_expire(vcpu); kvm_apic_inject_pending_timer_irqs(apic); return; } atomic_inc(&apic->lapic_timer.pending); - kvm_set_pending_timer(vcpu); + kvm_make_request(KVM_REQ_UNBLOCK, vcpu); + if (from_timer_fn) + kvm_vcpu_kick(vcpu); } static void start_sw_tscdeadline(struct kvm_lapic *apic) @@ -1625,18 +1784,23 @@ static void start_sw_tscdeadline(struct kvm_lapic *apic) expire = ktime_sub_ns(expire, ktimer->timer_advance_ns); hrtimer_start(&ktimer->timer, expire, HRTIMER_MODE_ABS_HARD); } else - apic_timer_expired(apic); + apic_timer_expired(apic, false); local_irq_restore(flags); } +static inline u64 tmict_to_ns(struct kvm_lapic *apic, u32 tmict) +{ + return (u64)tmict * APIC_BUS_CYCLE_NS * (u64)apic->divide_count; +} + static void update_target_expiration(struct kvm_lapic *apic, uint32_t old_divisor) { ktime_t now, remaining; u64 ns_remaining_old, ns_remaining_new; - apic->lapic_timer.period = (u64)kvm_lapic_get_reg(apic, APIC_TMICT) - * APIC_BUS_CYCLE_NS * apic->divide_count; + apic->lapic_timer.period = + tmict_to_ns(apic, kvm_lapic_get_reg(apic, APIC_TMICT)); limit_periodic_timer_frequency(apic); now = ktime_get(); @@ -1654,14 +1818,15 @@ static void update_target_expiration(struct kvm_lapic *apic, uint32_t old_diviso apic->lapic_timer.target_expiration = ktime_add_ns(now, ns_remaining_new); } -static bool set_target_expiration(struct kvm_lapic *apic) +static bool set_target_expiration(struct kvm_lapic *apic, u32 count_reg) { ktime_t now; u64 tscl = rdtsc(); + s64 deadline; now = ktime_get(); - apic->lapic_timer.period = (u64)kvm_lapic_get_reg(apic, APIC_TMICT) - * APIC_BUS_CYCLE_NS * apic->divide_count; + apic->lapic_timer.period = + tmict_to_ns(apic, kvm_lapic_get_reg(apic, APIC_TMICT)); if (!apic->lapic_timer.period) { apic->lapic_timer.tscdeadline = 0; @@ -1669,10 +1834,32 @@ static bool set_target_expiration(struct kvm_lapic *apic) } limit_periodic_timer_frequency(apic); + deadline = apic->lapic_timer.period; + + if (apic_lvtt_period(apic) || apic_lvtt_oneshot(apic)) { + if (unlikely(count_reg != APIC_TMICT)) { + deadline = tmict_to_ns(apic, + kvm_lapic_get_reg(apic, count_reg)); + if (unlikely(deadline <= 0)) + deadline = apic->lapic_timer.period; + else if (unlikely(deadline > apic->lapic_timer.period)) { + pr_info_ratelimited( + "kvm: vcpu %i: requested lapic timer restore with " + "starting count register %#x=%u (%lld ns) > initial count (%lld ns). " + "Using initial count to start timer.\n", + apic->vcpu->vcpu_id, + count_reg, + kvm_lapic_get_reg(apic, count_reg), + deadline, apic->lapic_timer.period); + kvm_lapic_set_reg(apic, count_reg, 0); + deadline = apic->lapic_timer.period; + } + } + } apic->lapic_timer.tscdeadline = kvm_read_l1_tsc(apic->vcpu, tscl) + - nsec_to_cycles(apic->vcpu, apic->lapic_timer.period); - apic->lapic_timer.target_expiration = ktime_add_ns(now, apic->lapic_timer.period); + nsec_to_cycles(apic->vcpu, deadline); + apic->lapic_timer.target_expiration = ktime_add_ns(now, deadline); return true; } @@ -1705,7 +1892,7 @@ static void start_sw_period(struct kvm_lapic *apic) if (ktime_after(ktime_get(), apic->lapic_timer.target_expiration)) { - apic_timer_expired(apic); + apic_timer_expired(apic, false); if (apic_lvtt_oneshot(apic)) return; @@ -1715,7 +1902,7 @@ static void start_sw_period(struct kvm_lapic *apic) hrtimer_start(&apic->lapic_timer.timer, apic->lapic_timer.target_expiration, - HRTIMER_MODE_ABS); + HRTIMER_MODE_ABS_HARD); } bool kvm_lapic_hv_timer_in_use(struct kvm_vcpu *vcpu) @@ -1731,7 +1918,7 @@ static void cancel_hv_timer(struct kvm_lapic *apic) { WARN_ON(preemptible()); WARN_ON(!apic->lapic_timer.hv_timer_in_use); - kvm_x86_ops->cancel_hv_timer(apic->vcpu); + static_call(kvm_x86_cancel_hv_timer)(apic->vcpu); apic->lapic_timer.hv_timer_in_use = false; } @@ -1742,13 +1929,13 @@ static bool start_hv_timer(struct kvm_lapic *apic) bool expired; WARN_ON(preemptible()); - if (!kvm_x86_ops->set_hv_timer) + if (!kvm_can_use_hv_timer(vcpu)) return false; if (!ktimer->tscdeadline) return false; - if (kvm_x86_ops->set_hv_timer(vcpu, ktimer->tscdeadline, &expired)) + if (static_call(kvm_x86_set_hv_timer)(vcpu, ktimer->tscdeadline, &expired)) return false; ktimer->hv_timer_in_use = true; @@ -1767,7 +1954,7 @@ static bool start_hv_timer(struct kvm_lapic *apic) if (atomic_read(&ktimer->pending)) { cancel_hv_timer(apic); } else if (expired) { - apic_timer_expired(apic); + apic_timer_expired(apic, false); cancel_hv_timer(apic); } } @@ -1815,9 +2002,9 @@ void kvm_lapic_expired_hv_timer(struct kvm_vcpu *vcpu) /* If the preempt notifier has already run, it also called apic_timer_expired */ if (!apic->lapic_timer.hv_timer_in_use) goto out; - WARN_ON(swait_active(&vcpu->wq)); + WARN_ON(kvm_vcpu_is_blocking(vcpu)); + apic_timer_expired(apic, false); cancel_hv_timer(apic); - apic_timer_expired(apic); if (apic_lvtt_period(apic) && apic->lapic_timer.period) { advance_periodic_target_expiration(apic); @@ -1832,7 +2019,6 @@ void kvm_lapic_switch_to_hv_timer(struct kvm_vcpu *vcpu) { restart_apic_timer(vcpu->arch.apic); } -EXPORT_SYMBOL_GPL(kvm_lapic_switch_to_hv_timer); void kvm_lapic_switch_to_sw_timer(struct kvm_vcpu *vcpu) { @@ -1844,7 +2030,6 @@ void kvm_lapic_switch_to_sw_timer(struct kvm_vcpu *vcpu) start_sw_timer(apic); preempt_enable(); } -EXPORT_SYMBOL_GPL(kvm_lapic_switch_to_sw_timer); void kvm_lapic_restart_hv_timer(struct kvm_vcpu *vcpu) { @@ -1854,17 +2039,22 @@ void kvm_lapic_restart_hv_timer(struct kvm_vcpu *vcpu) restart_apic_timer(apic); } -static void start_apic_timer(struct kvm_lapic *apic) +static void __start_apic_timer(struct kvm_lapic *apic, u32 count_reg) { atomic_set(&apic->lapic_timer.pending, 0); if ((apic_lvtt_period(apic) || apic_lvtt_oneshot(apic)) - && !set_target_expiration(apic)) + && !set_target_expiration(apic, count_reg)) return; restart_apic_timer(apic); } +static void start_apic_timer(struct kvm_lapic *apic) +{ + __start_apic_timer(apic, APIC_TMICT); +} + static void apic_manage_nmi_watchdog(struct kvm_lapic *apic, u32 lvt0_val) { bool lvt0_in_nmi_mode = apic_lvt_nmi_mode(lvt0_val); @@ -1878,7 +2068,30 @@ static void apic_manage_nmi_watchdog(struct kvm_lapic *apic, u32 lvt0_val) } } -int kvm_lapic_reg_write(struct kvm_lapic *apic, u32 reg, u32 val) +static void kvm_lapic_xapic_id_updated(struct kvm_lapic *apic) +{ + struct kvm *kvm = apic->vcpu->kvm; + + if (KVM_BUG_ON(apic_x2apic_mode(apic), kvm)) + return; + + if (kvm_xapic_id(apic) == apic->vcpu->vcpu_id) + return; + + kvm_set_apicv_inhibit(apic->vcpu->kvm, APICV_INHIBIT_REASON_APIC_ID_MODIFIED); +} + +static int get_lvt_index(u32 reg) +{ + if (reg == APIC_LVTCMCI) + return LVT_CMCI; + if (reg < APIC_LVTT || reg > APIC_LVTERR) + return -1; + return array_index_nospec( + (reg - APIC_LVTT) >> 4, KVM_APIC_MAX_NR_LVT_ENTRIES); +} + +static int kvm_lapic_reg_write(struct kvm_lapic *apic, u32 reg, u32 val) { int ret = 0; @@ -1886,10 +2099,12 @@ int kvm_lapic_reg_write(struct kvm_lapic *apic, u32 reg, u32 val) switch (reg) { case APIC_ID: /* Local APIC ID */ - if (!apic_x2apic_mode(apic)) + if (!apic_x2apic_mode(apic)) { kvm_apic_set_xapic_id(apic, val >> 24); - else + kvm_lapic_xapic_id_updated(apic); + } else { ret = 1; + } break; case APIC_TASKPRI: @@ -1909,10 +2124,9 @@ int kvm_lapic_reg_write(struct kvm_lapic *apic, u32 reg, u32 val) break; case APIC_DFR: - if (!apic_x2apic_mode(apic)) { - kvm_lapic_set_reg(apic, APIC_DFR, val | 0x0FFFFFFF); - recalculate_apic_map(apic->vcpu->kvm); - } else + if (!apic_x2apic_mode(apic)) + kvm_apic_set_dfr(apic, val | 0x0FFFFFFF); + else ret = 1; break; @@ -1923,13 +2137,10 @@ int kvm_lapic_reg_write(struct kvm_lapic *apic, u32 reg, u32 val) apic_set_spiv(apic, val & mask); if (!(val & APIC_SPIV_APIC_ENABLED)) { int i; - u32 lvt_val; - for (i = 0; i < KVM_APIC_LVT_NUM; i++) { - lvt_val = kvm_lapic_get_reg(apic, - APIC_LVTT + 0x10 * i); - kvm_lapic_set_reg(apic, APIC_LVTT + 0x10 * i, - lvt_val | APIC_LVT_MASKED); + for (i = 0; i < apic->nr_lvt_entries; i++) { + kvm_lapic_set_reg(apic, APIC_LVTx(i), + kvm_lapic_get_reg(apic, APIC_LVTx(i)) | APIC_LVT_MASKED); } apic_update_lvtt(apic); atomic_set(&apic->lapic_timer.pending, 0); @@ -1938,34 +2149,35 @@ int kvm_lapic_reg_write(struct kvm_lapic *apic, u32 reg, u32 val) break; } case APIC_ICR: + WARN_ON_ONCE(apic_x2apic_mode(apic)); + /* No delay here, so we always clear the pending bit */ - val &= ~(1 << 12); - apic_send_ipi(apic, val, kvm_lapic_get_reg(apic, APIC_ICR2)); + val &= ~APIC_ICR_BUSY; + kvm_apic_send_ipi(apic, val, kvm_lapic_get_reg(apic, APIC_ICR2)); kvm_lapic_set_reg(apic, APIC_ICR, val); break; - case APIC_ICR2: - if (!apic_x2apic_mode(apic)) - val &= 0xff000000; - kvm_lapic_set_reg(apic, APIC_ICR2, val); + if (apic_x2apic_mode(apic)) + ret = 1; + else + kvm_lapic_set_reg(apic, APIC_ICR2, val & 0xff000000); break; case APIC_LVT0: apic_manage_nmi_watchdog(apic, val); - /* fall through */ + fallthrough; case APIC_LVTTHMR: case APIC_LVTPC: case APIC_LVT1: - case APIC_LVTERR: { - /* TODO: Check vector */ - size_t size; - u32 index; - + case APIC_LVTERR: + case APIC_LVTCMCI: { + u32 index = get_lvt_index(reg); + if (!kvm_lapic_lvt_supported(apic, index)) { + ret = 1; + break; + } if (!kvm_apic_sw_enabled(apic)) val |= APIC_LVT_MASKED; - size = ARRAY_SIZE(apic_lvt_mask); - index = array_index_nospec( - (reg - APIC_LVTT) >> 4, size); val &= apic_lvt_mask[index]; kvm_lapic_set_reg(apic, reg, val); break; @@ -1983,7 +2195,7 @@ int kvm_lapic_reg_write(struct kvm_lapic *apic, u32 reg, u32 val) if (apic_lvtt_tscdeadline(apic)) break; - hrtimer_cancel(&apic->lapic_timer.timer); + cancel_apic_timer(apic); kvm_lapic_set_reg(apic, APIC_TMICT, val); start_apic_timer(apic); break; @@ -1991,7 +2203,7 @@ int kvm_lapic_reg_write(struct kvm_lapic *apic, u32 reg, u32 val) case APIC_TDCR: { uint32_t old_divisor = apic->divide_count; - kvm_lapic_set_reg(apic, APIC_TDCR, val); + kvm_lapic_set_reg(apic, APIC_TDCR, val & 0xb); update_divide_count(apic); if (apic->divide_count != old_divisor && apic->lapic_timer.period) { @@ -2007,9 +2219,9 @@ int kvm_lapic_reg_write(struct kvm_lapic *apic, u32 reg, u32 val) break; case APIC_SELF_IPI: - if (apic_x2apic_mode(apic)) { - kvm_lapic_reg_write(apic, APIC_ICR, 0x40000 | (val & 0xff)); - } else + if (apic_x2apic_mode(apic)) + kvm_apic_send_ipi(apic, APIC_DEST_SELF | (val & APIC_VECTOR_MASK), 0); + else ret = 1; break; default: @@ -2017,9 +2229,15 @@ int kvm_lapic_reg_write(struct kvm_lapic *apic, u32 reg, u32 val) break; } + /* + * Recalculate APIC maps if necessary, e.g. if the software enable bit + * was toggled, the APIC ID changed, etc... The maps are marked dirty + * on relevant changes, i.e. this is a nop for most writes. + */ + kvm_recalculate_apic_map(apic->vcpu->kvm); + return ret; } -EXPORT_SYMBOL_GPL(kvm_lapic_reg_write); static int apic_mmio_write(struct kvm_vcpu *vcpu, struct kvm_io_device *this, gpa_t address, int len, const void *data) @@ -2063,15 +2281,28 @@ EXPORT_SYMBOL_GPL(kvm_lapic_set_eoi); /* emulate APIC access in a trap manner */ void kvm_apic_write_nodecode(struct kvm_vcpu *vcpu, u32 offset) { - u32 val = 0; - - /* hw has done the conditional check and inst decode */ - offset &= 0xff0; + struct kvm_lapic *apic = vcpu->arch.apic; + u64 val; - kvm_lapic_reg_read(vcpu->arch.apic, offset, 4, &val); + if (apic_x2apic_mode(apic)) { + if (KVM_BUG_ON(kvm_lapic_msr_read(apic, offset, &val), vcpu->kvm)) + return; + } else { + val = kvm_lapic_get_reg(apic, offset); + } - /* TODO: optimize to just emulate side effect w/o one more write */ - kvm_lapic_reg_write(vcpu->arch.apic, offset, val); + /* + * ICR is a single 64-bit register when x2APIC is enabled. For legacy + * xAPIC, ICR writes need to go down the common (slightly slower) path + * to get the upper half from ICR2. + */ + if (apic_x2apic_mode(apic) && offset == APIC_ICR) { + kvm_apic_send_ipi(apic, (u32)val, (u32)(val >> 32)); + trace_kvm_apic_write(APIC_ICR, val); + } else { + /* TODO: optimize to just emulate side effect w/o one more write */ + kvm_lapic_reg_write(apic, offset, (u32)val); + } } EXPORT_SYMBOL_GPL(kvm_apic_write_nodecode); @@ -2085,10 +2316,10 @@ void kvm_free_lapic(struct kvm_vcpu *vcpu) hrtimer_cancel(&apic->lapic_timer.timer); if (!(vcpu->arch.apic_base & MSR_IA32_APICBASE_ENABLE)) - static_key_slow_dec_deferred(&apic_hw_disabled); + static_branch_slow_dec_deferred(&apic_hw_disabled); if (!apic->sw_enabled) - static_key_slow_dec_deferred(&apic_sw_disabled); + static_branch_slow_dec_deferred(&apic_sw_disabled); if (apic->regs) free_page((unsigned long)apic->regs); @@ -2105,8 +2336,7 @@ u64 kvm_get_lapic_tscdeadline_msr(struct kvm_vcpu *vcpu) { struct kvm_lapic *apic = vcpu->arch.apic; - if (!lapic_in_kernel(vcpu) || - !apic_lvtt_tscdeadline(apic)) + if (!kvm_apic_present(vcpu) || !apic_lvtt_tscdeadline(apic)) return 0; return apic->lapic_timer.tscdeadline; @@ -2116,8 +2346,7 @@ void kvm_set_lapic_tscdeadline_msr(struct kvm_vcpu *vcpu, u64 data) { struct kvm_lapic *apic = vcpu->arch.apic; - if (!lapic_in_kernel(vcpu) || apic_lvtt_oneshot(apic) || - apic_lvtt_period(apic)) + if (!kvm_apic_present(vcpu) || !apic_lvtt_tscdeadline(apic)) return; hrtimer_cancel(&apic->lapic_timer.timer); @@ -2127,10 +2356,7 @@ void kvm_set_lapic_tscdeadline_msr(struct kvm_vcpu *vcpu, u64 data) void kvm_lapic_set_tpr(struct kvm_vcpu *vcpu, unsigned long cr8) { - struct kvm_lapic *apic = vcpu->arch.apic; - - apic_set_tpr(apic, ((cr8 & 0x0f) << 4) - | (kvm_lapic_get_reg(apic, APIC_TASKPRI) & 4)); + apic_set_tpr(vcpu->arch.apic, (cr8 & 0x0f) << 4); } u64 kvm_lapic_get_cr8(struct kvm_vcpu *vcpu) @@ -2147,13 +2373,10 @@ void kvm_lapic_set_base(struct kvm_vcpu *vcpu, u64 value) u64 old_value = vcpu->arch.apic_base; struct kvm_lapic *apic = vcpu->arch.apic; - if (!apic) - value |= MSR_IA32_APICBASE_BSP; - vcpu->arch.apic_base = value; if ((old_value ^ value) & MSR_IA32_APICBASE_ENABLE) - kvm_update_cpuid(vcpu); + kvm_update_cpuid_runtime(vcpu); if (!apic) return; @@ -2162,37 +2385,48 @@ void kvm_lapic_set_base(struct kvm_vcpu *vcpu, u64 value) if ((old_value ^ value) & MSR_IA32_APICBASE_ENABLE) { if (value & MSR_IA32_APICBASE_ENABLE) { kvm_apic_set_xapic_id(apic, vcpu->vcpu_id); - static_key_slow_dec_deferred(&apic_hw_disabled); + static_branch_slow_dec_deferred(&apic_hw_disabled); + /* Check if there are APF page ready requests pending */ + kvm_make_request(KVM_REQ_APF_READY, vcpu); } else { - static_key_slow_inc(&apic_hw_disabled.key); - recalculate_apic_map(vcpu->kvm); + static_branch_inc(&apic_hw_disabled.key); + atomic_set_release(&apic->vcpu->kvm->arch.apic_map_dirty, DIRTY); } } if (((old_value ^ value) & X2APIC_ENABLE) && (value & X2APIC_ENABLE)) kvm_apic_set_x2apic_id(apic, vcpu->vcpu_id); - if ((old_value ^ value) & (MSR_IA32_APICBASE_ENABLE | X2APIC_ENABLE)) - kvm_x86_ops->set_virtual_apic_mode(vcpu); + if ((old_value ^ value) & (MSR_IA32_APICBASE_ENABLE | X2APIC_ENABLE)) { + kvm_vcpu_update_apicv(vcpu); + static_call_cond(kvm_x86_set_virtual_apic_mode)(vcpu); + } apic->base_address = apic->vcpu->arch.apic_base & MSR_IA32_APICBASE_BASE; if ((value & MSR_IA32_APICBASE_ENABLE) && - apic->base_address != APIC_DEFAULT_PHYS_BASE) - pr_warn_once("APIC base relocation is unsupported by KVM"); + apic->base_address != APIC_DEFAULT_PHYS_BASE) { + kvm_set_apicv_inhibit(apic->vcpu->kvm, + APICV_INHIBIT_REASON_APIC_BASE_MODIFIED); + } } void kvm_apic_update_apicv(struct kvm_vcpu *vcpu) { struct kvm_lapic *apic = vcpu->arch.apic; - if (vcpu->arch.apicv_active) { + if (apic->apicv_active) { /* irr_pending is always true when apicv is activated. */ apic->irr_pending = true; apic->isr_count = 1; } else { - apic->irr_pending = (apic_search_irr(apic) != -1); + /* + * Don't clear irr_pending, searching the IRR can race with + * updates from the CPU as APICv is still active from hardware's + * perspective. The flag will be cleared as appropriate when + * KVM injects the interrupt. + */ apic->isr_count = count_vectors(apic->regs + APIC_ISR); } } @@ -2201,23 +2435,29 @@ EXPORT_SYMBOL_GPL(kvm_apic_update_apicv); void kvm_lapic_reset(struct kvm_vcpu *vcpu, bool init_event) { struct kvm_lapic *apic = vcpu->arch.apic; + u64 msr_val; int i; + if (!init_event) { + msr_val = APIC_DEFAULT_PHYS_BASE | MSR_IA32_APICBASE_ENABLE; + if (kvm_vcpu_is_reset_bsp(vcpu)) + msr_val |= MSR_IA32_APICBASE_BSP; + kvm_lapic_set_base(vcpu, msr_val); + } + if (!apic) return; /* Stop the timer in case it's a reset to an active apic */ hrtimer_cancel(&apic->lapic_timer.timer); - if (!init_event) { - kvm_lapic_set_base(vcpu, APIC_DEFAULT_PHYS_BASE | - MSR_IA32_APICBASE_ENABLE); + /* The xAPIC ID is set at RESET even if the APIC was already enabled. */ + if (!init_event) kvm_apic_set_xapic_id(apic, vcpu->vcpu_id); - } kvm_apic_set_version(apic->vcpu); - for (i = 0; i < KVM_APIC_LVT_NUM; i++) - kvm_lapic_set_reg(apic, APIC_LVTT + 0x10 * i, APIC_LVT_MASKED); + for (i = 0; i < apic->nr_lvt_entries; i++) + kvm_lapic_set_reg(apic, APIC_LVTx(i), APIC_LVT_MASKED); apic_update_lvtt(apic); if (kvm_vcpu_is_reset_bsp(vcpu) && kvm_check_has_quirk(vcpu->kvm, KVM_X86_QUIRK_LINT0_REENABLED)) @@ -2225,14 +2465,18 @@ void kvm_lapic_reset(struct kvm_vcpu *vcpu, bool init_event) SET_APIC_DELIVERY_MODE(0, APIC_MODE_EXTINT)); apic_manage_nmi_watchdog(apic, kvm_lapic_get_reg(apic, APIC_LVT0)); - kvm_lapic_set_reg(apic, APIC_DFR, 0xffffffffU); + kvm_apic_set_dfr(apic, 0xffffffffU); apic_set_spiv(apic, 0xff); kvm_lapic_set_reg(apic, APIC_TASKPRI, 0); if (!apic_x2apic_mode(apic)) kvm_apic_set_ldr(apic, 0); kvm_lapic_set_reg(apic, APIC_ESR, 0); - kvm_lapic_set_reg(apic, APIC_ICR, 0); - kvm_lapic_set_reg(apic, APIC_ICR2, 0); + if (!apic_x2apic_mode(apic)) { + kvm_lapic_set_reg(apic, APIC_ICR, 0); + kvm_lapic_set_reg(apic, APIC_ICR2, 0); + } else { + kvm_lapic_set_reg64(apic, APIC_ICR, 0); + } kvm_lapic_set_reg(apic, APIC_TDCR, 0); kvm_lapic_set_reg(apic, APIC_TMICT, 0); for (i = 0; i < 8; i++) { @@ -2244,19 +2488,19 @@ void kvm_lapic_reset(struct kvm_vcpu *vcpu, bool init_event) apic->highest_isr_cache = -1; update_divide_count(apic); atomic_set(&apic->lapic_timer.pending, 0); - if (kvm_vcpu_is_bsp(vcpu)) - kvm_lapic_set_base(vcpu, - vcpu->arch.apic_base | MSR_IA32_APICBASE_BSP); + vcpu->arch.pv_eoi.msr_val = 0; apic_update_ppr(apic); - if (vcpu->arch.apicv_active) { - kvm_x86_ops->apicv_post_state_restore(vcpu); - kvm_x86_ops->hwapic_irr_update(vcpu, -1); - kvm_x86_ops->hwapic_isr_update(vcpu, -1); + if (apic->apicv_active) { + static_call_cond(kvm_x86_apicv_post_state_restore)(vcpu); + static_call_cond(kvm_x86_hwapic_irr_update)(vcpu, -1); + static_call_cond(kvm_x86_hwapic_isr_update)(-1); } vcpu->arch.apic_arb_prio = 0; vcpu->arch.apic_attention = 0; + + kvm_recalculate_apic_map(vcpu->kvm); } /* @@ -2313,7 +2557,7 @@ static enum hrtimer_restart apic_timer_fn(struct hrtimer *data) struct kvm_timer *ktimer = container_of(data, struct kvm_timer, timer); struct kvm_lapic *apic = container_of(ktimer, struct kvm_lapic, lapic_timer); - apic_timer_expired(apic); + apic_timer_expired(apic, true); if (lapic_is_periodic(apic)) { advance_periodic_target_expiration(apic); @@ -2343,6 +2587,8 @@ int kvm_create_lapic(struct kvm_vcpu *vcpu, int timer_advance_ns) } apic->vcpu = vcpu; + apic->nr_lvt_entries = kvm_apic_calc_nr_lvt_entries(vcpu); + hrtimer_init(&apic->lapic_timer.timer, CLOCK_MONOTONIC, HRTIMER_MODE_ABS_HARD); apic->lapic_timer.timer.function = apic_timer_fn; @@ -2355,11 +2601,11 @@ int kvm_create_lapic(struct kvm_vcpu *vcpu, int timer_advance_ns) } /* - * APIC is created enabled. This will prevent kvm_lapic_set_base from - * thinking that APIC state has changed. + * Stuff the APIC ENABLE bit in lieu of temporarily incrementing + * apic_hw_disabled; the full RESET value is set by kvm_lapic_reset(). */ vcpu->arch.apic_base = MSR_IA32_APICBASE_ENABLE; - static_key_slow_inc(&apic_sw_disabled.key); /* sw disabled at reset */ + static_branch_inc(&apic_sw_disabled.key); /* sw disabled at reset */ kvm_iodevice_init(&apic->dev, &apic_mmio_ops); return 0; @@ -2375,12 +2621,13 @@ int kvm_apic_has_interrupt(struct kvm_vcpu *vcpu) struct kvm_lapic *apic = vcpu->arch.apic; u32 ppr; - if (!kvm_apic_hw_enabled(apic)) + if (!kvm_apic_present(vcpu)) return -1; __apic_update_ppr(apic, &ppr); return apic_has_interrupt_for_ppr(apic, ppr); } +EXPORT_SYMBOL_GPL(kvm_apic_has_interrupt); int kvm_apic_accept_pic_intr(struct kvm_vcpu *vcpu) { @@ -2421,7 +2668,7 @@ int kvm_get_apic_interrupt(struct kvm_vcpu *vcpu) */ apic_clear_irr(vector, apic); - if (test_bit(vector, vcpu_to_synic(vcpu)->auto_eoi_bitmap)) { + if (to_hv_vcpu(vcpu) && test_bit(vector, to_hv_synic(vcpu)->auto_eoi_bitmap)) { /* * For auto-EOI interrupts, there might be another pending * interrupt above PPR, so check whether to raise another @@ -2448,6 +2695,7 @@ static int kvm_apic_state_fixup(struct kvm_vcpu *vcpu, if (apic_x2apic_mode(vcpu->arch.apic)) { u32 *id = (u32 *)(s->regs + APIC_ID); u32 *ldr = (u32 *)(s->regs + APIC_LDR); + u64 icr; if (vcpu->kvm->arch.x2apic_format) { if (*id != vcpu->vcpu_id) @@ -2459,9 +2707,23 @@ static int kvm_apic_state_fixup(struct kvm_vcpu *vcpu, *id <<= 24; } - /* In x2APIC mode, the LDR is fixed and based on the id */ - if (set) + /* + * In x2APIC mode, the LDR is fixed and based on the id. And + * ICR is internally a single 64-bit register, but needs to be + * split to ICR+ICR2 in userspace for backwards compatibility. + */ + if (set) { *ldr = kvm_apic_calc_x2apic_ldr(*id); + + icr = __kvm_lapic_get_reg(s->regs, APIC_ICR) | + (u64)__kvm_lapic_get_reg(s->regs, APIC_ICR2) << 32; + __kvm_lapic_set_reg64(s->regs, APIC_ICR, icr); + } else { + icr = __kvm_lapic_get_reg64(s->regs, APIC_ICR); + __kvm_lapic_set_reg(s->regs, APIC_ICR2, icr >> 32); + } + } else { + kvm_lapic_xapic_id_updated(vcpu->arch.apic); } return 0; @@ -2470,6 +2732,14 @@ static int kvm_apic_state_fixup(struct kvm_vcpu *vcpu, int kvm_apic_get_state(struct kvm_vcpu *vcpu, struct kvm_lapic_state *s) { memcpy(s->regs, vcpu->arch.apic->regs, sizeof(*s)); + + /* + * Get calculated timer current count for remaining timer period (if + * any) and store it in the returned register set. + */ + __kvm_lapic_set_reg(s->regs, APIC_TMCCT, + __apic_read(vcpu->arch.apic, APIC_TMCCT)); + return kvm_apic_state_fixup(vcpu, s, false); } @@ -2478,33 +2748,35 @@ int kvm_apic_set_state(struct kvm_vcpu *vcpu, struct kvm_lapic_state *s) struct kvm_lapic *apic = vcpu->arch.apic; int r; - kvm_lapic_set_base(vcpu, vcpu->arch.apic_base); /* set SPIV separately to get count of SW disabled APICs right */ apic_set_spiv(apic, *((u32 *)(s->regs + APIC_SPIV))); r = kvm_apic_state_fixup(vcpu, s, true); - if (r) + if (r) { + kvm_recalculate_apic_map(vcpu->kvm); return r; + } memcpy(vcpu->arch.apic->regs, s->regs, sizeof(*s)); - recalculate_apic_map(vcpu->kvm); + atomic_set_release(&apic->vcpu->kvm->arch.apic_map_dirty, DIRTY); + kvm_recalculate_apic_map(vcpu->kvm); kvm_apic_set_version(vcpu); apic_update_ppr(apic); - hrtimer_cancel(&apic->lapic_timer.timer); + cancel_apic_timer(apic); + apic->lapic_timer.expired_tscdeadline = 0; apic_update_lvtt(apic); apic_manage_nmi_watchdog(apic, kvm_lapic_get_reg(apic, APIC_LVT0)); update_divide_count(apic); - start_apic_timer(apic); + __start_apic_timer(apic, APIC_TMCCT); + kvm_lapic_set_reg(apic, APIC_TMCCT, 0); kvm_apic_update_apicv(vcpu); apic->highest_isr_cache = -1; - if (vcpu->arch.apicv_active) { - kvm_x86_ops->apicv_post_state_restore(vcpu); - kvm_x86_ops->hwapic_irr_update(vcpu, - apic_find_highest_irr(apic)); - kvm_x86_ops->hwapic_isr_update(vcpu, - apic_find_highest_isr(apic)); + if (apic->apicv_active) { + static_call_cond(kvm_x86_apicv_post_state_restore)(vcpu); + static_call_cond(kvm_x86_hwapic_irr_update)(vcpu, apic_find_highest_irr(apic)); + static_call_cond(kvm_x86_hwapic_isr_update)(apic_find_highest_isr(apic)); } kvm_make_request(KVM_REQ_EVENT, vcpu); if (ioapic_in_kernel(vcpu->kvm)) @@ -2538,7 +2810,6 @@ void __kvm_migrate_apic_timer(struct kvm_vcpu *vcpu) static void apic_sync_pv_eoi_from_guest(struct kvm_vcpu *vcpu, struct kvm_lapic *apic) { - bool pending; int vector; /* * PV EOI state is derived from KVM_APIC_PV_EOI_PENDING in host @@ -2552,14 +2823,8 @@ static void apic_sync_pv_eoi_from_guest(struct kvm_vcpu *vcpu, * -> host enabled PV EOI, guest executed EOI. */ BUG_ON(!pv_eoi_enabled(vcpu)); - pending = pv_eoi_get_pending(vcpu); - /* - * Clear pending bit in any case: it will be set again on vmentry. - * While this might not be ideal from performance point of view, - * this makes sure pv eoi is only enabled when we know it's safe. - */ - pv_eoi_clr_pending(vcpu); - if (pending) + + if (pv_eoi_test_and_clr_pending(vcpu)) return; vector = apic_set_eoi(apic); trace_kvm_pv_eoi(apic, vector); @@ -2648,147 +2913,167 @@ int kvm_lapic_set_vapic_addr(struct kvm_vcpu *vcpu, gpa_t vapic_addr) return 0; } -int kvm_x2apic_msr_write(struct kvm_vcpu *vcpu, u32 msr, u64 data) +int kvm_x2apic_icr_write(struct kvm_lapic *apic, u64 data) { - struct kvm_lapic *apic = vcpu->arch.apic; - u32 reg = (msr - APIC_BASE_MSR) << 4; + data &= ~APIC_ICR_BUSY; - if (!lapic_in_kernel(vcpu) || !apic_x2apic_mode(apic)) - return 1; + kvm_apic_send_ipi(apic, (u32)data, (u32)(data >> 32)); + kvm_lapic_set_reg64(apic, APIC_ICR, data); + trace_kvm_apic_write(APIC_ICR, data); + return 0; +} + +static int kvm_lapic_msr_read(struct kvm_lapic *apic, u32 reg, u64 *data) +{ + u32 low; - if (reg == APIC_ICR2) + if (reg == APIC_ICR) { + *data = kvm_lapic_get_reg64(apic, APIC_ICR); + return 0; + } + + if (kvm_lapic_reg_read(apic, reg, 4, &low)) return 1; - /* if this is ICR write vector before command */ + *data = low; + + return 0; +} + +static int kvm_lapic_msr_write(struct kvm_lapic *apic, u32 reg, u64 data) +{ + /* + * ICR is a 64-bit register in x2APIC mode (and Hyper'v PV vAPIC) and + * can be written as such, all other registers remain accessible only + * through 32-bit reads/writes. + */ if (reg == APIC_ICR) - kvm_lapic_reg_write(apic, APIC_ICR2, (u32)(data >> 32)); + return kvm_x2apic_icr_write(apic, data); + return kvm_lapic_reg_write(apic, reg, (u32)data); } -int kvm_x2apic_msr_read(struct kvm_vcpu *vcpu, u32 msr, u64 *data) +int kvm_x2apic_msr_write(struct kvm_vcpu *vcpu, u32 msr, u64 data) { struct kvm_lapic *apic = vcpu->arch.apic; - u32 reg = (msr - APIC_BASE_MSR) << 4, low, high = 0; + u32 reg = (msr - APIC_BASE_MSR) << 4; if (!lapic_in_kernel(vcpu) || !apic_x2apic_mode(apic)) return 1; - if (reg == APIC_DFR || reg == APIC_ICR2) - return 1; + return kvm_lapic_msr_write(apic, reg, data); +} - if (kvm_lapic_reg_read(apic, reg, 4, &low)) +int kvm_x2apic_msr_read(struct kvm_vcpu *vcpu, u32 msr, u64 *data) +{ + struct kvm_lapic *apic = vcpu->arch.apic; + u32 reg = (msr - APIC_BASE_MSR) << 4; + + if (!lapic_in_kernel(vcpu) || !apic_x2apic_mode(apic)) return 1; - if (reg == APIC_ICR) - kvm_lapic_reg_read(apic, APIC_ICR2, 4, &high); - *data = (((u64)high) << 32) | low; + if (reg == APIC_DFR) + return 1; - return 0; + return kvm_lapic_msr_read(apic, reg, data); } int kvm_hv_vapic_msr_write(struct kvm_vcpu *vcpu, u32 reg, u64 data) { - struct kvm_lapic *apic = vcpu->arch.apic; - if (!lapic_in_kernel(vcpu)) return 1; - /* if this is ICR write vector before command */ - if (reg == APIC_ICR) - kvm_lapic_reg_write(apic, APIC_ICR2, (u32)(data >> 32)); - return kvm_lapic_reg_write(apic, reg, (u32)data); + return kvm_lapic_msr_write(vcpu->arch.apic, reg, data); } int kvm_hv_vapic_msr_read(struct kvm_vcpu *vcpu, u32 reg, u64 *data) { - struct kvm_lapic *apic = vcpu->arch.apic; - u32 low, high = 0; - if (!lapic_in_kernel(vcpu)) return 1; - if (kvm_lapic_reg_read(apic, reg, 4, &low)) - return 1; - if (reg == APIC_ICR) - kvm_lapic_reg_read(apic, APIC_ICR2, 4, &high); - - *data = (((u64)high) << 32) | low; - - return 0; + return kvm_lapic_msr_read(vcpu->arch.apic, reg, data); } -int kvm_lapic_enable_pv_eoi(struct kvm_vcpu *vcpu, u64 data, unsigned long len) +int kvm_lapic_set_pv_eoi(struct kvm_vcpu *vcpu, u64 data, unsigned long len) { u64 addr = data & ~KVM_MSR_ENABLED; struct gfn_to_hva_cache *ghc = &vcpu->arch.pv_eoi.data; unsigned long new_len; + int ret; if (!IS_ALIGNED(addr, 4)) return 1; - vcpu->arch.pv_eoi.msr_val = data; - if (!pv_eoi_enabled(vcpu)) - return 0; + if (data & KVM_MSR_ENABLED) { + if (addr == ghc->gpa && len <= ghc->len) + new_len = ghc->len; + else + new_len = len; - if (addr == ghc->gpa && len <= ghc->len) - new_len = ghc->len; - else - new_len = len; + ret = kvm_gfn_to_hva_cache_init(vcpu->kvm, ghc, addr, new_len); + if (ret) + return ret; + } + + vcpu->arch.pv_eoi.msr_val = data; - return kvm_gfn_to_hva_cache_init(vcpu->kvm, ghc, addr, new_len); + return 0; } -void kvm_apic_accept_events(struct kvm_vcpu *vcpu) +int kvm_apic_accept_events(struct kvm_vcpu *vcpu) { struct kvm_lapic *apic = vcpu->arch.apic; u8 sipi_vector; - unsigned long pe; + int r; - if (!lapic_in_kernel(vcpu) || !apic->pending_events) - return; + if (!kvm_apic_has_pending_init_or_sipi(vcpu)) + return 0; + + if (is_guest_mode(vcpu)) { + r = kvm_check_nested_events(vcpu); + if (r < 0) + return r == -EBUSY ? 0 : r; + /* + * Continue processing INIT/SIPI even if a nested VM-Exit + * occurred, e.g. pending SIPIs should be dropped if INIT+SIPI + * are blocked as a result of transitioning to VMX root mode. + */ + } /* - * INITs are latched while CPU is in specific states - * (SMM, VMX non-root mode, SVM with GIF=0). - * Because a CPU cannot be in these states immediately - * after it has processed an INIT signal (and thus in - * KVM_MP_STATE_INIT_RECEIVED state), just eat SIPIs - * and leave the INIT pending. + * INITs are blocked while CPU is in specific states (SMM, VMX root + * mode, SVM with GIF=0), while SIPIs are dropped if the CPU isn't in + * wait-for-SIPI (WFS). */ - if (kvm_vcpu_latch_init(vcpu)) { + if (!kvm_apic_init_sipi_allowed(vcpu)) { WARN_ON_ONCE(vcpu->arch.mp_state == KVM_MP_STATE_INIT_RECEIVED); - if (test_bit(KVM_APIC_SIPI, &apic->pending_events)) - clear_bit(KVM_APIC_SIPI, &apic->pending_events); - return; + clear_bit(KVM_APIC_SIPI, &apic->pending_events); + return 0; } - pe = xchg(&apic->pending_events, 0); - if (test_bit(KVM_APIC_INIT, &pe)) { + if (test_and_clear_bit(KVM_APIC_INIT, &apic->pending_events)) { kvm_vcpu_reset(vcpu, true); if (kvm_vcpu_is_bsp(apic->vcpu)) vcpu->arch.mp_state = KVM_MP_STATE_RUNNABLE; else vcpu->arch.mp_state = KVM_MP_STATE_INIT_RECEIVED; } - if (test_bit(KVM_APIC_SIPI, &pe) && - vcpu->arch.mp_state == KVM_MP_STATE_INIT_RECEIVED) { - /* evaluate pending_events before reading the vector */ - smp_rmb(); - sipi_vector = apic->sipi_vector; - kvm_vcpu_deliver_sipi_vector(vcpu, sipi_vector); - vcpu->arch.mp_state = KVM_MP_STATE_RUNNABLE; + if (test_and_clear_bit(KVM_APIC_SIPI, &apic->pending_events)) { + if (vcpu->arch.mp_state == KVM_MP_STATE_INIT_RECEIVED) { + /* evaluate pending_events before reading the vector */ + smp_rmb(); + sipi_vector = apic->sipi_vector; + static_call(kvm_x86_vcpu_deliver_sipi_vector)(vcpu, sipi_vector); + vcpu->arch.mp_state = KVM_MP_STATE_RUNNABLE; + } } -} - -void kvm_lapic_init(void) -{ - /* do not patch jump label more than once per second */ - jump_label_rate_limit(&apic_hw_disabled, HZ); - jump_label_rate_limit(&apic_sw_disabled, HZ); + return 0; } void kvm_lapic_exit(void) { static_key_deferred_flush(&apic_hw_disabled); + WARN_ON(static_branch_unlikely(&apic_hw_disabled.key)); static_key_deferred_flush(&apic_sw_disabled); + WARN_ON(static_branch_unlikely(&apic_sw_disabled.key)); } |