aboutsummaryrefslogtreecommitdiffstats
path: root/arch/x86/kvm/lapic.c
diff options
context:
space:
mode:
Diffstat (limited to 'arch/x86/kvm/lapic.c')
-rw-r--r--arch/x86/kvm/lapic.c917
1 files changed, 601 insertions, 316 deletions
diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c
index e3099c642fec..d7639d126e6c 100644
--- a/arch/x86/kvm/lapic.c
+++ b/arch/x86/kvm/lapic.c
@@ -27,6 +27,7 @@
#include <linux/math64.h>
#include <linux/slab.h>
#include <asm/processor.h>
+#include <asm/mce.h>
#include <asm/msr.h>
#include <asm/page.h>
#include <asm/current.h>
@@ -36,6 +37,7 @@
#include <linux/jump_label.h>
#include "kvm_cache_regs.h"
#include "irq.h"
+#include "ioapic.h"
#include "trace.h"
#include "x86.h"
#include "cpuid.h"
@@ -53,15 +55,12 @@
#define PRIo64 "o"
/* 14 is the version for Xeon and Pentium 8.4.8*/
-#define APIC_VERSION (0x14UL | ((KVM_APIC_LVT_NUM - 1) << 16))
+#define APIC_VERSION 0x14UL
#define LAPIC_MMIO_LENGTH (1 << 12)
/* followed define is not in apicdef.h */
#define MAX_APIC_VECTOR 256
#define APIC_VECTORS_PER_REG 32
-#define APIC_BROADCAST 0xFF
-#define X2APIC_BROADCAST 0xFFFFFFFFul
-
static bool lapic_timer_advance_dynamic __read_mostly;
#define LAPIC_TIMER_ADVANCE_ADJUST_MIN 100 /* clock cycles */
#define LAPIC_TIMER_ADVANCE_ADJUST_MAX 10000 /* clock cycles */
@@ -69,6 +68,41 @@ static bool lapic_timer_advance_dynamic __read_mostly;
#define LAPIC_TIMER_ADVANCE_NS_MAX 5000
/* step-by-step approximation to mitigate fluctuation */
#define LAPIC_TIMER_ADVANCE_ADJUST_STEP 8
+static int kvm_lapic_msr_read(struct kvm_lapic *apic, u32 reg, u64 *data);
+static int kvm_lapic_msr_write(struct kvm_lapic *apic, u32 reg, u64 data);
+
+static inline void __kvm_lapic_set_reg(char *regs, int reg_off, u32 val)
+{
+ *((u32 *) (regs + reg_off)) = val;
+}
+
+static inline void kvm_lapic_set_reg(struct kvm_lapic *apic, int reg_off, u32 val)
+{
+ __kvm_lapic_set_reg(apic->regs, reg_off, val);
+}
+
+static __always_inline u64 __kvm_lapic_get_reg64(char *regs, int reg)
+{
+ BUILD_BUG_ON(reg != APIC_ICR);
+ return *((u64 *) (regs + reg));
+}
+
+static __always_inline u64 kvm_lapic_get_reg64(struct kvm_lapic *apic, int reg)
+{
+ return __kvm_lapic_get_reg64(apic->regs, reg);
+}
+
+static __always_inline void __kvm_lapic_set_reg64(char *regs, int reg, u64 val)
+{
+ BUILD_BUG_ON(reg != APIC_ICR);
+ *((u64 *) (regs + reg)) = val;
+}
+
+static __always_inline void kvm_lapic_set_reg64(struct kvm_lapic *apic,
+ int reg, u64 val)
+{
+ __kvm_lapic_set_reg64(apic->regs, reg, val);
+}
static inline int apic_test_vector(int vec, void *bitmap)
{
@@ -93,8 +127,8 @@ static inline int __apic_test_and_clear_vector(int vec, void *bitmap)
return __test_and_clear_bit(VEC_POS(vec), (bitmap) + REG_POS(vec));
}
-struct static_key_deferred apic_hw_disabled __read_mostly;
-struct static_key_deferred apic_sw_disabled __read_mostly;
+__read_mostly DEFINE_STATIC_KEY_DEFERRED_FALSE(apic_hw_disabled, HZ);
+__read_mostly DEFINE_STATIC_KEY_DEFERRED_FALSE(apic_sw_disabled, HZ);
static inline int apic_enabled(struct kvm_lapic *apic)
{
@@ -113,11 +147,19 @@ static inline u32 kvm_x2apic_id(struct kvm_lapic *apic)
return apic->vcpu->vcpu_id;
}
-bool kvm_can_post_timer_interrupt(struct kvm_vcpu *vcpu)
+static bool kvm_can_post_timer_interrupt(struct kvm_vcpu *vcpu)
{
- return pi_inject_timer && kvm_vcpu_apicv_active(vcpu);
+ return pi_inject_timer && kvm_vcpu_apicv_active(vcpu) &&
+ (kvm_mwait_in_guest(vcpu->kvm) || kvm_hlt_in_guest(vcpu->kvm));
}
-EXPORT_SYMBOL_GPL(kvm_can_post_timer_interrupt);
+
+bool kvm_can_use_hv_timer(struct kvm_vcpu *vcpu)
+{
+ return kvm_x86_ops.set_hv_timer
+ && !(kvm_mwait_in_guest(vcpu->kvm) ||
+ kvm_can_post_timer_interrupt(vcpu));
+}
+EXPORT_SYMBOL_GPL(kvm_can_use_hv_timer);
static bool kvm_use_posted_timer_interrupt(struct kvm_vcpu *vcpu)
{
@@ -164,14 +206,43 @@ static void kvm_apic_map_free(struct rcu_head *rcu)
kvfree(map);
}
-static void recalculate_apic_map(struct kvm *kvm)
+/*
+ * CLEAN -> DIRTY and UPDATE_IN_PROGRESS -> DIRTY changes happen without a lock.
+ *
+ * DIRTY -> UPDATE_IN_PROGRESS and UPDATE_IN_PROGRESS -> CLEAN happen with
+ * apic_map_lock_held.
+ */
+enum {
+ CLEAN,
+ UPDATE_IN_PROGRESS,
+ DIRTY
+};
+
+void kvm_recalculate_apic_map(struct kvm *kvm)
{
struct kvm_apic_map *new, *old = NULL;
struct kvm_vcpu *vcpu;
- int i;
+ unsigned long i;
u32 max_id = 255; /* enough space for any xAPIC ID */
+ /* Read kvm->arch.apic_map_dirty before kvm->arch.apic_map. */
+ if (atomic_read_acquire(&kvm->arch.apic_map_dirty) == CLEAN)
+ return;
+
+ WARN_ONCE(!irqchip_in_kernel(kvm),
+ "Dirty APIC map without an in-kernel local APIC");
+
mutex_lock(&kvm->arch.apic_map_lock);
+ /*
+ * Read kvm->arch.apic_map_dirty before kvm->arch.apic_map
+ * (if clean) or the APIC registers (if dirty).
+ */
+ if (atomic_cmpxchg_acquire(&kvm->arch.apic_map_dirty,
+ DIRTY, UPDATE_IN_PROGRESS) == CLEAN) {
+ /* Someone else has updated the map. */
+ mutex_unlock(&kvm->arch.apic_map_lock);
+ return;
+ }
kvm_for_each_vcpu(i, vcpu, kvm)
if (kvm_apic_present(vcpu))
@@ -236,6 +307,12 @@ out:
old = rcu_dereference_protected(kvm->arch.apic_map,
lockdep_is_held(&kvm->arch.apic_map_lock));
rcu_assign_pointer(kvm->arch.apic_map, new);
+ /*
+ * Write kvm->arch.apic_map before clearing apic->apic_map_dirty.
+ * If another update has come in, leave it DIRTY.
+ */
+ atomic_cmpxchg_release(&kvm->arch.apic_map_dirty,
+ UPDATE_IN_PROGRESS, CLEAN);
mutex_unlock(&kvm->arch.apic_map_lock);
if (old)
@@ -253,24 +330,34 @@ static inline void apic_set_spiv(struct kvm_lapic *apic, u32 val)
if (enabled != apic->sw_enabled) {
apic->sw_enabled = enabled;
if (enabled)
- static_key_slow_dec_deferred(&apic_sw_disabled);
+ static_branch_slow_dec_deferred(&apic_sw_disabled);
else
- static_key_slow_inc(&apic_sw_disabled.key);
+ static_branch_inc(&apic_sw_disabled.key);
- recalculate_apic_map(apic->vcpu->kvm);
+ atomic_set_release(&apic->vcpu->kvm->arch.apic_map_dirty, DIRTY);
}
+
+ /* Check if there are APF page ready requests pending */
+ if (enabled)
+ kvm_make_request(KVM_REQ_APF_READY, apic->vcpu);
}
static inline void kvm_apic_set_xapic_id(struct kvm_lapic *apic, u8 id)
{
kvm_lapic_set_reg(apic, APIC_ID, id << 24);
- recalculate_apic_map(apic->vcpu->kvm);
+ atomic_set_release(&apic->vcpu->kvm->arch.apic_map_dirty, DIRTY);
}
static inline void kvm_apic_set_ldr(struct kvm_lapic *apic, u32 id)
{
kvm_lapic_set_reg(apic, APIC_LDR, id);
- recalculate_apic_map(apic->vcpu->kvm);
+ atomic_set_release(&apic->vcpu->kvm->arch.apic_map_dirty, DIRTY);
+}
+
+static inline void kvm_apic_set_dfr(struct kvm_lapic *apic, u32 val)
+{
+ kvm_lapic_set_reg(apic, APIC_DFR, val);
+ atomic_set_release(&apic->vcpu->kvm->arch.apic_map_dirty, DIRTY);
}
static inline u32 kvm_apic_calc_x2apic_ldr(u32 id)
@@ -286,7 +373,7 @@ static inline void kvm_apic_set_x2apic_id(struct kvm_lapic *apic, u32 id)
kvm_lapic_set_reg(apic, APIC_ID, id);
kvm_lapic_set_reg(apic, APIC_LDR, ldr);
- recalculate_apic_map(apic->vcpu->kvm);
+ atomic_set_release(&apic->vcpu->kvm->arch.apic_map_dirty, DIRTY);
}
static inline int apic_lvt_enabled(struct kvm_lapic *apic, int lvt_type)
@@ -294,11 +381,6 @@ static inline int apic_lvt_enabled(struct kvm_lapic *apic, int lvt_type)
return !(kvm_lapic_get_reg(apic, lvt_type) & APIC_LVT_MASKED);
}
-static inline int apic_lvt_vector(struct kvm_lapic *apic, int lvt_type)
-{
- return kvm_lapic_get_reg(apic, lvt_type) & APIC_VECTOR_MASK;
-}
-
static inline int apic_lvtt_oneshot(struct kvm_lapic *apic)
{
return apic->lapic_timer.timer_mode == APIC_LVT_TIMER_ONESHOT;
@@ -319,15 +401,26 @@ static inline int apic_lvt_nmi_mode(u32 lvt_val)
return (lvt_val & (APIC_MODE_MASK | APIC_LVT_MASKED)) == APIC_DM_NMI;
}
+static inline bool kvm_lapic_lvt_supported(struct kvm_lapic *apic, int lvt_index)
+{
+ return apic->nr_lvt_entries > lvt_index;
+}
+
+static inline int kvm_apic_calc_nr_lvt_entries(struct kvm_vcpu *vcpu)
+{
+ return KVM_APIC_MAX_NR_LVT_ENTRIES - !(vcpu->arch.mcg_cap & MCG_CMCI_P);
+}
+
void kvm_apic_set_version(struct kvm_vcpu *vcpu)
{
struct kvm_lapic *apic = vcpu->arch.apic;
- struct kvm_cpuid_entry2 *feat;
- u32 v = APIC_VERSION;
+ u32 v = 0;
if (!lapic_in_kernel(vcpu))
return;
+ v = APIC_VERSION | ((apic->nr_lvt_entries - 1) << 16);
+
/*
* KVM emulates 82093AA datasheet (with in-kernel IOAPIC implementation)
* which doesn't have EOI register; Some buggy OSes (e.g. Windows with
@@ -335,19 +428,39 @@ void kvm_apic_set_version(struct kvm_vcpu *vcpu)
* version first and level-triggered interrupts never get EOIed in
* IOAPIC.
*/
- feat = kvm_find_cpuid_entry(apic->vcpu, 0x1, 0);
- if (feat && (feat->ecx & (1 << (X86_FEATURE_X2APIC & 31))) &&
+ if (guest_cpuid_has(vcpu, X86_FEATURE_X2APIC) &&
!ioapic_in_kernel(vcpu->kvm))
v |= APIC_LVR_DIRECTED_EOI;
kvm_lapic_set_reg(apic, APIC_LVR, v);
}
-static const unsigned int apic_lvt_mask[KVM_APIC_LVT_NUM] = {
- LVT_MASK , /* part LVTT mask, timer mode mask added at runtime */
- LVT_MASK | APIC_MODE_MASK, /* LVTTHMR */
- LVT_MASK | APIC_MODE_MASK, /* LVTPC */
- LINT_MASK, LINT_MASK, /* LVT0-1 */
- LVT_MASK /* LVTERR */
+void kvm_apic_after_set_mcg_cap(struct kvm_vcpu *vcpu)
+{
+ int nr_lvt_entries = kvm_apic_calc_nr_lvt_entries(vcpu);
+ struct kvm_lapic *apic = vcpu->arch.apic;
+ int i;
+
+ if (!lapic_in_kernel(vcpu) || nr_lvt_entries == apic->nr_lvt_entries)
+ return;
+
+ /* Initialize/mask any "new" LVT entries. */
+ for (i = apic->nr_lvt_entries; i < nr_lvt_entries; i++)
+ kvm_lapic_set_reg(apic, APIC_LVTx(i), APIC_LVT_MASKED);
+
+ apic->nr_lvt_entries = nr_lvt_entries;
+
+ /* The number of LVT entries is reflected in the version register. */
+ kvm_apic_set_version(vcpu);
+}
+
+static const unsigned int apic_lvt_mask[KVM_APIC_MAX_NR_LVT_ENTRIES] = {
+ [LVT_TIMER] = LVT_MASK, /* timer mode mask added at runtime */
+ [LVT_THERMAL_MONITOR] = LVT_MASK | APIC_MODE_MASK,
+ [LVT_PERFORMANCE_COUNTER] = LVT_MASK | APIC_MODE_MASK,
+ [LVT_LINT0] = LINT_MASK,
+ [LVT_LINT1] = LINT_MASK,
+ [LVT_ERROR] = LVT_MASK,
+ [LVT_CMCI] = LVT_MASK | APIC_MODE_MASK
};
static int find_highest_vector(void *bitmap)
@@ -441,15 +554,11 @@ static inline int apic_find_highest_irr(struct kvm_lapic *apic)
static inline void apic_clear_irr(int vec, struct kvm_lapic *apic)
{
- struct kvm_vcpu *vcpu;
-
- vcpu = apic->vcpu;
-
- if (unlikely(vcpu->arch.apicv_active)) {
+ if (unlikely(apic->apicv_active)) {
/* need to update RVI */
kvm_lapic_clear_vector(vec, apic->regs + APIC_IRR);
- kvm_x86_ops->hwapic_irr_update(vcpu,
- apic_find_highest_irr(apic));
+ static_call_cond(kvm_x86_hwapic_irr_update)(apic->vcpu,
+ apic_find_highest_irr(apic));
} else {
apic->irr_pending = false;
kvm_lapic_clear_vector(vec, apic->regs + APIC_IRR);
@@ -458,22 +567,24 @@ static inline void apic_clear_irr(int vec, struct kvm_lapic *apic)
}
}
-static inline void apic_set_isr(int vec, struct kvm_lapic *apic)
+void kvm_apic_clear_irr(struct kvm_vcpu *vcpu, int vec)
{
- struct kvm_vcpu *vcpu;
+ apic_clear_irr(vec, vcpu->arch.apic);
+}
+EXPORT_SYMBOL_GPL(kvm_apic_clear_irr);
+static inline void apic_set_isr(int vec, struct kvm_lapic *apic)
+{
if (__apic_test_and_set_vector(vec, apic->regs + APIC_ISR))
return;
- vcpu = apic->vcpu;
-
/*
* With APIC virtualization enabled, all caching is disabled
* because the processor can modify ISR under the hood. Instead
* just set SVI.
*/
- if (unlikely(vcpu->arch.apicv_active))
- kvm_x86_ops->hwapic_isr_update(vcpu, vec);
+ if (unlikely(apic->apicv_active))
+ static_call_cond(kvm_x86_hwapic_isr_update)(vec);
else {
++apic->isr_count;
BUG_ON(apic->isr_count > MAX_APIC_VECTOR);
@@ -507,12 +618,9 @@ static inline int apic_find_highest_isr(struct kvm_lapic *apic)
static inline void apic_clear_isr(int vec, struct kvm_lapic *apic)
{
- struct kvm_vcpu *vcpu;
if (!__apic_test_and_clear_vector(vec, apic->regs + APIC_ISR))
return;
- vcpu = apic->vcpu;
-
/*
* We do get here for APIC virtualization enabled if the guest
* uses the Hyper-V APIC enlightenment. In this case we may need
@@ -520,9 +628,8 @@ static inline void apic_clear_isr(int vec, struct kvm_lapic *apic)
* on the other hand isr_count and highest_isr_cache are unused
* and must be left alone.
*/
- if (unlikely(vcpu->arch.apicv_active))
- kvm_x86_ops->hwapic_isr_update(vcpu,
- apic_find_highest_isr(apic));
+ if (unlikely(apic->apicv_active))
+ static_call_cond(kvm_x86_hwapic_isr_update)(apic_find_highest_isr(apic));
else {
--apic->isr_count;
BUG_ON(apic->isr_count < 0);
@@ -624,42 +731,41 @@ static inline bool pv_eoi_enabled(struct kvm_vcpu *vcpu)
return vcpu->arch.pv_eoi.msr_val & KVM_MSR_ENABLED;
}
-static bool pv_eoi_get_pending(struct kvm_vcpu *vcpu)
-{
- u8 val;
- if (pv_eoi_get_user(vcpu, &val) < 0) {
- printk(KERN_WARNING "Can't read EOI MSR value: 0x%llx\n",
- (unsigned long long)vcpu->arch.pv_eoi.msr_val);
- return false;
- }
- return val & 0x1;
-}
-
static void pv_eoi_set_pending(struct kvm_vcpu *vcpu)
{
- if (pv_eoi_put_user(vcpu, KVM_PV_EOI_ENABLED) < 0) {
- printk(KERN_WARNING "Can't set EOI MSR value: 0x%llx\n",
- (unsigned long long)vcpu->arch.pv_eoi.msr_val);
+ if (pv_eoi_put_user(vcpu, KVM_PV_EOI_ENABLED) < 0)
return;
- }
+
__set_bit(KVM_APIC_PV_EOI_PENDING, &vcpu->arch.apic_attention);
}
-static void pv_eoi_clr_pending(struct kvm_vcpu *vcpu)
+static bool pv_eoi_test_and_clr_pending(struct kvm_vcpu *vcpu)
{
- if (pv_eoi_put_user(vcpu, KVM_PV_EOI_DISABLED) < 0) {
- printk(KERN_WARNING "Can't clear EOI MSR value: 0x%llx\n",
- (unsigned long long)vcpu->arch.pv_eoi.msr_val);
- return;
- }
+ u8 val;
+
+ if (pv_eoi_get_user(vcpu, &val) < 0)
+ return false;
+
+ val &= KVM_PV_EOI_ENABLED;
+
+ if (val && pv_eoi_put_user(vcpu, KVM_PV_EOI_DISABLED) < 0)
+ return false;
+
+ /*
+ * Clear pending bit in any case: it will be set again on vmentry.
+ * While this might not be ideal from performance point of view,
+ * this makes sure pv eoi is only enabled when we know it's safe.
+ */
__clear_bit(KVM_APIC_PV_EOI_PENDING, &vcpu->arch.apic_attention);
+
+ return val;
}
static int apic_has_interrupt_for_ppr(struct kvm_lapic *apic, u32 ppr)
{
int highest_irr;
- if (apic->vcpu->arch.apicv_active)
- highest_irr = kvm_x86_ops->sync_pir_to_irr(apic->vcpu);
+ if (kvm_x86_ops.sync_pir_to_irr)
+ highest_irr = static_call(kvm_x86_sync_pir_to_irr)(apic->vcpu);
else
highest_irr = apic_find_highest_irr(apic);
if (highest_irr == -1 || (highest_irr & 0xF0) <= ppr)
@@ -721,17 +827,17 @@ static bool kvm_apic_match_physical_addr(struct kvm_lapic *apic, u32 mda)
if (kvm_apic_broadcast(apic, mda))
return true;
- if (apic_x2apic_mode(apic))
- return mda == kvm_x2apic_id(apic);
-
/*
- * Hotplug hack: Make LAPIC in xAPIC mode also accept interrupts as if
- * it were in x2APIC mode. Hotplugged VCPUs start in xAPIC mode and
- * this allows unique addressing of VCPUs with APIC ID over 0xff.
- * The 0xff condition is needed because writeable xAPIC ID.
+ * Hotplug hack: Accept interrupts for vCPUs in xAPIC mode as if they
+ * were in x2APIC mode if the target APIC ID can't be encoded as an
+ * xAPIC ID. This allows unique addressing of hotplugged vCPUs (which
+ * start in xAPIC mode) with an APIC ID that is unaddressable in xAPIC
+ * mode. Match the x2APIC ID if and only if the target APIC ID can't
+ * be encoded in xAPIC to avoid spurious matches against a vCPU that
+ * changed its (addressable) xAPIC ID (which is writable).
*/
- if (kvm_x2apic_id(apic) > 0xff && mda == kvm_x2apic_id(apic))
- return true;
+ if (apic_x2apic_mode(apic) || mda > 0xff)
+ return mda == kvm_x2apic_id(apic);
return mda == kvm_xapic_id(apic);
}
@@ -944,6 +1050,10 @@ bool kvm_irq_delivery_to_apic_fast(struct kvm *kvm, struct kvm_lapic *src,
*r = -1;
if (irq->shorthand == APIC_DEST_SELF) {
+ if (KVM_BUG_ON(!src, kvm)) {
+ *r = 0;
+ return true;
+ }
*r = kvm_apic_set_irq(src->vcpu, irq, dest_map);
return true;
}
@@ -1023,7 +1133,7 @@ static int __apic_accept_irq(struct kvm_lapic *apic, int delivery_mode,
switch (delivery_mode) {
case APIC_DM_LOWEST:
vcpu->arch.apic_arb_prio++;
- /* fall through */
+ fallthrough;
case APIC_DM_FIXED:
if (unlikely(trig_mode && !level))
break;
@@ -1048,11 +1158,8 @@ static int __apic_accept_irq(struct kvm_lapic *apic, int delivery_mode,
apic->regs + APIC_TMR);
}
- if (kvm_x86_ops->deliver_posted_interrupt(vcpu, vector)) {
- kvm_lapic_set_irr(vector, apic);
- kvm_make_request(KVM_REQ_EVENT, vcpu);
- kvm_vcpu_kick(vcpu);
- }
+ static_call(kvm_x86_deliver_interrupt)(apic, delivery_mode,
+ trig_mode, vector);
break;
case APIC_DM_REMRD:
@@ -1123,8 +1230,8 @@ void kvm_bitmap_or_dest_vcpus(struct kvm *kvm, struct kvm_lapic_irq *irq,
struct kvm_lapic *src = NULL;
struct kvm_apic_map *map;
struct kvm_vcpu *vcpu;
- unsigned long bitmap;
- int i, vcpu_idx;
+ unsigned long bitmap, i;
+ int vcpu_idx;
bool ret;
rcu_read_lock();
@@ -1203,7 +1310,8 @@ static int apic_set_eoi(struct kvm_lapic *apic)
apic_clear_isr(vector, apic);
apic_update_ppr(apic);
- if (test_bit(vector, vcpu_to_synic(apic->vcpu)->vec_bitmap))
+ if (to_hv_vcpu(apic->vcpu) &&
+ test_bit(vector, to_hv_synic(apic->vcpu)->vec_bitmap))
kvm_hv_synic_send_eoi(apic->vcpu, vector);
kvm_ioapic_send_eoi(apic, vector);
@@ -1226,10 +1334,13 @@ void kvm_apic_set_eoi_accelerated(struct kvm_vcpu *vcpu, int vector)
}
EXPORT_SYMBOL_GPL(kvm_apic_set_eoi_accelerated);
-static void apic_send_ipi(struct kvm_lapic *apic, u32 icr_low, u32 icr_high)
+void kvm_apic_send_ipi(struct kvm_lapic *apic, u32 icr_low, u32 icr_high)
{
struct kvm_lapic_irq irq;
+ /* KVM has no delay and should always clear the BUSY/PENDING flag. */
+ WARN_ON_ONCE(icr_low & APIC_ICR_BUSY);
+
irq.vector = icr_low & APIC_VECTOR_MASK;
irq.delivery_mode = icr_low & APIC_MODE_MASK;
irq.dest_mode = icr_low & APIC_DEST_MASK;
@@ -1240,12 +1351,13 @@ static void apic_send_ipi(struct kvm_lapic *apic, u32 icr_low, u32 icr_high)
if (apic_x2apic_mode(apic))
irq.dest_id = icr_high;
else
- irq.dest_id = GET_APIC_DEST_FIELD(icr_high);
+ irq.dest_id = GET_XAPIC_DEST_FIELD(icr_high);
trace_kvm_apic_ipi(icr_low, irq.dest_id);
kvm_irq_delivery_to_apic(apic->vcpu->kvm, apic, &irq, NULL);
}
+EXPORT_SYMBOL_GPL(kvm_apic_send_ipi);
static u32 apic_get_tmcct(struct kvm_lapic *apic)
{
@@ -1311,7 +1423,7 @@ static u32 __apic_read(struct kvm_lapic *apic, unsigned int offset)
break;
case APIC_TASKPRI:
report_tpr_access(apic, false);
- /* fall thru */
+ fallthrough;
default:
val = kvm_lapic_get_reg(apic, offset);
break;
@@ -1329,8 +1441,8 @@ static inline struct kvm_lapic *to_lapic(struct kvm_io_device *dev)
#define APIC_REGS_MASK(first, count) \
(APIC_REG_MASK(first) * ((1ull << (count)) - 1))
-int kvm_lapic_reg_read(struct kvm_lapic *apic, u32 offset, int len,
- void *data)
+static int kvm_lapic_reg_read(struct kvm_lapic *apic, u32 offset, int len,
+ void *data)
{
unsigned char alignment = offset & 0xf;
u32 result;
@@ -1348,7 +1460,6 @@ int kvm_lapic_reg_read(struct kvm_lapic *apic, u32 offset, int len,
APIC_REGS_MASK(APIC_IRR, APIC_ISR_NR) |
APIC_REG_MASK(APIC_ESR) |
APIC_REG_MASK(APIC_ICR) |
- APIC_REG_MASK(APIC_ICR2) |
APIC_REG_MASK(APIC_LVTT) |
APIC_REG_MASK(APIC_LVTTHMR) |
APIC_REG_MASK(APIC_LVTPC) |
@@ -1359,9 +1470,22 @@ int kvm_lapic_reg_read(struct kvm_lapic *apic, u32 offset, int len,
APIC_REG_MASK(APIC_TMCCT) |
APIC_REG_MASK(APIC_TDCR);
- /* ARBPRI is not valid on x2APIC */
+ if (kvm_lapic_lvt_supported(apic, LVT_CMCI))
+ valid_reg_mask |= APIC_REG_MASK(APIC_LVTCMCI);
+
+ /*
+ * ARBPRI and ICR2 are not valid in x2APIC mode. WARN if KVM reads ICR
+ * in x2APIC mode as it's an 8-byte register in x2APIC and needs to be
+ * manually handled by the caller.
+ */
if (!apic_x2apic_mode(apic))
- valid_reg_mask |= APIC_REG_MASK(APIC_ARBPRI);
+ valid_reg_mask |= APIC_REG_MASK(APIC_ARBPRI) |
+ APIC_REG_MASK(APIC_ICR2);
+ else
+ WARN_ON_ONCE(offset == APIC_ICR);
+
+ if (alignment + len > 4)
+ return 1;
if (offset > 0x3f0 || !(valid_reg_mask & APIC_REG_MASK(offset)))
return 1;
@@ -1383,7 +1507,6 @@ int kvm_lapic_reg_read(struct kvm_lapic *apic, u32 offset, int len,
}
return 0;
}
-EXPORT_SYMBOL_GPL(kvm_lapic_reg_read);
static int apic_mmio_in_range(struct kvm_lapic *apic, gpa_t addr)
{
@@ -1445,6 +1568,18 @@ static void limit_periodic_timer_frequency(struct kvm_lapic *apic)
}
}
+static void cancel_hv_timer(struct kvm_lapic *apic);
+
+static void cancel_apic_timer(struct kvm_lapic *apic)
+{
+ hrtimer_cancel(&apic->lapic_timer.timer);
+ preempt_disable();
+ if (apic->lapic_timer.hv_timer_in_use)
+ cancel_hv_timer(apic);
+ preempt_enable();
+ atomic_set(&apic->lapic_timer.pending, 0);
+}
+
static void apic_update_lvtt(struct kvm_lapic *apic)
{
u32 timer_mode = kvm_lapic_get_reg(apic, APIC_LVTT) &
@@ -1453,7 +1588,7 @@ static void apic_update_lvtt(struct kvm_lapic *apic)
if (apic->lapic_timer.timer_mode != timer_mode) {
if (apic_lvtt_tscdeadline(apic) != (timer_mode ==
APIC_LVT_TIMER_TSCDEADLINE)) {
- hrtimer_cancel(&apic->lapic_timer.timer);
+ cancel_apic_timer(apic);
kvm_lapic_set_reg(apic, APIC_TMICT, 0);
apic->lapic_timer.period = 0;
apic->lapic_timer.tscdeadline = 0;
@@ -1477,7 +1612,7 @@ static bool lapic_timer_int_injected(struct kvm_vcpu *vcpu)
int vec = reg & APIC_VECTOR_MASK;
void *bitmap = apic->regs + APIC_ISR;
- if (vcpu->arch.apicv_active)
+ if (apic->apicv_active)
bitmap = apic->regs + APIC_IRR;
if (apic_test_vector(vec, bitmap))
@@ -1496,7 +1631,7 @@ static inline void __wait_lapic_expire(struct kvm_vcpu *vcpu, u64 guest_cycles)
* that __delay() uses delay_tsc whenever the hardware has TSC, thus
* always for VMX enabled hardware.
*/
- if (vcpu->arch.tsc_scaling_ratio == kvm_default_tsc_scaling_ratio) {
+ if (vcpu->arch.tsc_scaling_ratio == kvm_caps.default_tsc_scaling_ratio) {
__delay(min(guest_cycles,
nsec_to_cycles(vcpu, timer_advance_ns)));
} else {
@@ -1540,24 +1675,32 @@ static void __kvm_wait_lapic_expire(struct kvm_vcpu *vcpu)
struct kvm_lapic *apic = vcpu->arch.apic;
u64 guest_tsc, tsc_deadline;
- if (apic->lapic_timer.expired_tscdeadline == 0)
- return;
-
tsc_deadline = apic->lapic_timer.expired_tscdeadline;
apic->lapic_timer.expired_tscdeadline = 0;
guest_tsc = kvm_read_l1_tsc(vcpu, rdtsc());
- apic->lapic_timer.advance_expire_delta = guest_tsc - tsc_deadline;
+ trace_kvm_wait_lapic_expire(vcpu->vcpu_id, guest_tsc - tsc_deadline);
+
+ if (lapic_timer_advance_dynamic) {
+ adjust_lapic_timer_advance(vcpu, guest_tsc - tsc_deadline);
+ /*
+ * If the timer fired early, reread the TSC to account for the
+ * overhead of the above adjustment to avoid waiting longer
+ * than is necessary.
+ */
+ if (guest_tsc < tsc_deadline)
+ guest_tsc = kvm_read_l1_tsc(vcpu, rdtsc());
+ }
if (guest_tsc < tsc_deadline)
__wait_lapic_expire(vcpu, tsc_deadline - guest_tsc);
-
- if (lapic_timer_advance_dynamic)
- adjust_lapic_timer_advance(vcpu, apic->lapic_timer.advance_expire_delta);
}
void kvm_wait_lapic_expire(struct kvm_vcpu *vcpu)
{
- if (lapic_timer_int_injected(vcpu))
+ if (lapic_in_kernel(vcpu) &&
+ vcpu->arch.apic->lapic_timer.expired_tscdeadline &&
+ vcpu->arch.apic->lapic_timer.timer_advance_ns &&
+ lapic_timer_int_injected(vcpu))
__kvm_wait_lapic_expire(vcpu);
}
EXPORT_SYMBOL_GPL(kvm_wait_lapic_expire);
@@ -1575,7 +1718,7 @@ static void kvm_apic_inject_pending_timer_irqs(struct kvm_lapic *apic)
}
}
-static void apic_timer_expired(struct kvm_lapic *apic)
+static void apic_timer_expired(struct kvm_lapic *apic, bool from_timer_fn)
{
struct kvm_vcpu *vcpu = apic->vcpu;
struct kvm_timer *ktimer = &apic->lapic_timer;
@@ -1586,15 +1729,31 @@ static void apic_timer_expired(struct kvm_lapic *apic)
if (apic_lvtt_tscdeadline(apic) || ktimer->hv_timer_in_use)
ktimer->expired_tscdeadline = ktimer->tscdeadline;
+ if (!from_timer_fn && apic->apicv_active) {
+ WARN_ON(kvm_get_running_vcpu() != vcpu);
+ kvm_apic_inject_pending_timer_irqs(apic);
+ return;
+ }
+
if (kvm_use_posted_timer_interrupt(apic->vcpu)) {
- if (apic->lapic_timer.timer_advance_ns)
+ /*
+ * Ensure the guest's timer has truly expired before posting an
+ * interrupt. Open code the relevant checks to avoid querying
+ * lapic_timer_int_injected(), which will be false since the
+ * interrupt isn't yet injected. Waiting until after injecting
+ * is not an option since that won't help a posted interrupt.
+ */
+ if (vcpu->arch.apic->lapic_timer.expired_tscdeadline &&
+ vcpu->arch.apic->lapic_timer.timer_advance_ns)
__kvm_wait_lapic_expire(vcpu);
kvm_apic_inject_pending_timer_irqs(apic);
return;
}
atomic_inc(&apic->lapic_timer.pending);
- kvm_set_pending_timer(vcpu);
+ kvm_make_request(KVM_REQ_UNBLOCK, vcpu);
+ if (from_timer_fn)
+ kvm_vcpu_kick(vcpu);
}
static void start_sw_tscdeadline(struct kvm_lapic *apic)
@@ -1625,18 +1784,23 @@ static void start_sw_tscdeadline(struct kvm_lapic *apic)
expire = ktime_sub_ns(expire, ktimer->timer_advance_ns);
hrtimer_start(&ktimer->timer, expire, HRTIMER_MODE_ABS_HARD);
} else
- apic_timer_expired(apic);
+ apic_timer_expired(apic, false);
local_irq_restore(flags);
}
+static inline u64 tmict_to_ns(struct kvm_lapic *apic, u32 tmict)
+{
+ return (u64)tmict * APIC_BUS_CYCLE_NS * (u64)apic->divide_count;
+}
+
static void update_target_expiration(struct kvm_lapic *apic, uint32_t old_divisor)
{
ktime_t now, remaining;
u64 ns_remaining_old, ns_remaining_new;
- apic->lapic_timer.period = (u64)kvm_lapic_get_reg(apic, APIC_TMICT)
- * APIC_BUS_CYCLE_NS * apic->divide_count;
+ apic->lapic_timer.period =
+ tmict_to_ns(apic, kvm_lapic_get_reg(apic, APIC_TMICT));
limit_periodic_timer_frequency(apic);
now = ktime_get();
@@ -1654,14 +1818,15 @@ static void update_target_expiration(struct kvm_lapic *apic, uint32_t old_diviso
apic->lapic_timer.target_expiration = ktime_add_ns(now, ns_remaining_new);
}
-static bool set_target_expiration(struct kvm_lapic *apic)
+static bool set_target_expiration(struct kvm_lapic *apic, u32 count_reg)
{
ktime_t now;
u64 tscl = rdtsc();
+ s64 deadline;
now = ktime_get();
- apic->lapic_timer.period = (u64)kvm_lapic_get_reg(apic, APIC_TMICT)
- * APIC_BUS_CYCLE_NS * apic->divide_count;
+ apic->lapic_timer.period =
+ tmict_to_ns(apic, kvm_lapic_get_reg(apic, APIC_TMICT));
if (!apic->lapic_timer.period) {
apic->lapic_timer.tscdeadline = 0;
@@ -1669,10 +1834,32 @@ static bool set_target_expiration(struct kvm_lapic *apic)
}
limit_periodic_timer_frequency(apic);
+ deadline = apic->lapic_timer.period;
+
+ if (apic_lvtt_period(apic) || apic_lvtt_oneshot(apic)) {
+ if (unlikely(count_reg != APIC_TMICT)) {
+ deadline = tmict_to_ns(apic,
+ kvm_lapic_get_reg(apic, count_reg));
+ if (unlikely(deadline <= 0))
+ deadline = apic->lapic_timer.period;
+ else if (unlikely(deadline > apic->lapic_timer.period)) {
+ pr_info_ratelimited(
+ "kvm: vcpu %i: requested lapic timer restore with "
+ "starting count register %#x=%u (%lld ns) > initial count (%lld ns). "
+ "Using initial count to start timer.\n",
+ apic->vcpu->vcpu_id,
+ count_reg,
+ kvm_lapic_get_reg(apic, count_reg),
+ deadline, apic->lapic_timer.period);
+ kvm_lapic_set_reg(apic, count_reg, 0);
+ deadline = apic->lapic_timer.period;
+ }
+ }
+ }
apic->lapic_timer.tscdeadline = kvm_read_l1_tsc(apic->vcpu, tscl) +
- nsec_to_cycles(apic->vcpu, apic->lapic_timer.period);
- apic->lapic_timer.target_expiration = ktime_add_ns(now, apic->lapic_timer.period);
+ nsec_to_cycles(apic->vcpu, deadline);
+ apic->lapic_timer.target_expiration = ktime_add_ns(now, deadline);
return true;
}
@@ -1705,7 +1892,7 @@ static void start_sw_period(struct kvm_lapic *apic)
if (ktime_after(ktime_get(),
apic->lapic_timer.target_expiration)) {
- apic_timer_expired(apic);
+ apic_timer_expired(apic, false);
if (apic_lvtt_oneshot(apic))
return;
@@ -1715,7 +1902,7 @@ static void start_sw_period(struct kvm_lapic *apic)
hrtimer_start(&apic->lapic_timer.timer,
apic->lapic_timer.target_expiration,
- HRTIMER_MODE_ABS);
+ HRTIMER_MODE_ABS_HARD);
}
bool kvm_lapic_hv_timer_in_use(struct kvm_vcpu *vcpu)
@@ -1731,7 +1918,7 @@ static void cancel_hv_timer(struct kvm_lapic *apic)
{
WARN_ON(preemptible());
WARN_ON(!apic->lapic_timer.hv_timer_in_use);
- kvm_x86_ops->cancel_hv_timer(apic->vcpu);
+ static_call(kvm_x86_cancel_hv_timer)(apic->vcpu);
apic->lapic_timer.hv_timer_in_use = false;
}
@@ -1742,13 +1929,13 @@ static bool start_hv_timer(struct kvm_lapic *apic)
bool expired;
WARN_ON(preemptible());
- if (!kvm_x86_ops->set_hv_timer)
+ if (!kvm_can_use_hv_timer(vcpu))
return false;
if (!ktimer->tscdeadline)
return false;
- if (kvm_x86_ops->set_hv_timer(vcpu, ktimer->tscdeadline, &expired))
+ if (static_call(kvm_x86_set_hv_timer)(vcpu, ktimer->tscdeadline, &expired))
return false;
ktimer->hv_timer_in_use = true;
@@ -1767,7 +1954,7 @@ static bool start_hv_timer(struct kvm_lapic *apic)
if (atomic_read(&ktimer->pending)) {
cancel_hv_timer(apic);
} else if (expired) {
- apic_timer_expired(apic);
+ apic_timer_expired(apic, false);
cancel_hv_timer(apic);
}
}
@@ -1815,9 +2002,9 @@ void kvm_lapic_expired_hv_timer(struct kvm_vcpu *vcpu)
/* If the preempt notifier has already run, it also called apic_timer_expired */
if (!apic->lapic_timer.hv_timer_in_use)
goto out;
- WARN_ON(swait_active(&vcpu->wq));
+ WARN_ON(kvm_vcpu_is_blocking(vcpu));
+ apic_timer_expired(apic, false);
cancel_hv_timer(apic);
- apic_timer_expired(apic);
if (apic_lvtt_period(apic) && apic->lapic_timer.period) {
advance_periodic_target_expiration(apic);
@@ -1832,7 +2019,6 @@ void kvm_lapic_switch_to_hv_timer(struct kvm_vcpu *vcpu)
{
restart_apic_timer(vcpu->arch.apic);
}
-EXPORT_SYMBOL_GPL(kvm_lapic_switch_to_hv_timer);
void kvm_lapic_switch_to_sw_timer(struct kvm_vcpu *vcpu)
{
@@ -1844,7 +2030,6 @@ void kvm_lapic_switch_to_sw_timer(struct kvm_vcpu *vcpu)
start_sw_timer(apic);
preempt_enable();
}
-EXPORT_SYMBOL_GPL(kvm_lapic_switch_to_sw_timer);
void kvm_lapic_restart_hv_timer(struct kvm_vcpu *vcpu)
{
@@ -1854,17 +2039,22 @@ void kvm_lapic_restart_hv_timer(struct kvm_vcpu *vcpu)
restart_apic_timer(apic);
}
-static void start_apic_timer(struct kvm_lapic *apic)
+static void __start_apic_timer(struct kvm_lapic *apic, u32 count_reg)
{
atomic_set(&apic->lapic_timer.pending, 0);
if ((apic_lvtt_period(apic) || apic_lvtt_oneshot(apic))
- && !set_target_expiration(apic))
+ && !set_target_expiration(apic, count_reg))
return;
restart_apic_timer(apic);
}
+static void start_apic_timer(struct kvm_lapic *apic)
+{
+ __start_apic_timer(apic, APIC_TMICT);
+}
+
static void apic_manage_nmi_watchdog(struct kvm_lapic *apic, u32 lvt0_val)
{
bool lvt0_in_nmi_mode = apic_lvt_nmi_mode(lvt0_val);
@@ -1878,7 +2068,30 @@ static void apic_manage_nmi_watchdog(struct kvm_lapic *apic, u32 lvt0_val)
}
}
-int kvm_lapic_reg_write(struct kvm_lapic *apic, u32 reg, u32 val)
+static void kvm_lapic_xapic_id_updated(struct kvm_lapic *apic)
+{
+ struct kvm *kvm = apic->vcpu->kvm;
+
+ if (KVM_BUG_ON(apic_x2apic_mode(apic), kvm))
+ return;
+
+ if (kvm_xapic_id(apic) == apic->vcpu->vcpu_id)
+ return;
+
+ kvm_set_apicv_inhibit(apic->vcpu->kvm, APICV_INHIBIT_REASON_APIC_ID_MODIFIED);
+}
+
+static int get_lvt_index(u32 reg)
+{
+ if (reg == APIC_LVTCMCI)
+ return LVT_CMCI;
+ if (reg < APIC_LVTT || reg > APIC_LVTERR)
+ return -1;
+ return array_index_nospec(
+ (reg - APIC_LVTT) >> 4, KVM_APIC_MAX_NR_LVT_ENTRIES);
+}
+
+static int kvm_lapic_reg_write(struct kvm_lapic *apic, u32 reg, u32 val)
{
int ret = 0;
@@ -1886,10 +2099,12 @@ int kvm_lapic_reg_write(struct kvm_lapic *apic, u32 reg, u32 val)
switch (reg) {
case APIC_ID: /* Local APIC ID */
- if (!apic_x2apic_mode(apic))
+ if (!apic_x2apic_mode(apic)) {
kvm_apic_set_xapic_id(apic, val >> 24);
- else
+ kvm_lapic_xapic_id_updated(apic);
+ } else {
ret = 1;
+ }
break;
case APIC_TASKPRI:
@@ -1909,10 +2124,9 @@ int kvm_lapic_reg_write(struct kvm_lapic *apic, u32 reg, u32 val)
break;
case APIC_DFR:
- if (!apic_x2apic_mode(apic)) {
- kvm_lapic_set_reg(apic, APIC_DFR, val | 0x0FFFFFFF);
- recalculate_apic_map(apic->vcpu->kvm);
- } else
+ if (!apic_x2apic_mode(apic))
+ kvm_apic_set_dfr(apic, val | 0x0FFFFFFF);
+ else
ret = 1;
break;
@@ -1923,13 +2137,10 @@ int kvm_lapic_reg_write(struct kvm_lapic *apic, u32 reg, u32 val)
apic_set_spiv(apic, val & mask);
if (!(val & APIC_SPIV_APIC_ENABLED)) {
int i;
- u32 lvt_val;
- for (i = 0; i < KVM_APIC_LVT_NUM; i++) {
- lvt_val = kvm_lapic_get_reg(apic,
- APIC_LVTT + 0x10 * i);
- kvm_lapic_set_reg(apic, APIC_LVTT + 0x10 * i,
- lvt_val | APIC_LVT_MASKED);
+ for (i = 0; i < apic->nr_lvt_entries; i++) {
+ kvm_lapic_set_reg(apic, APIC_LVTx(i),
+ kvm_lapic_get_reg(apic, APIC_LVTx(i)) | APIC_LVT_MASKED);
}
apic_update_lvtt(apic);
atomic_set(&apic->lapic_timer.pending, 0);
@@ -1938,34 +2149,35 @@ int kvm_lapic_reg_write(struct kvm_lapic *apic, u32 reg, u32 val)
break;
}
case APIC_ICR:
+ WARN_ON_ONCE(apic_x2apic_mode(apic));
+
/* No delay here, so we always clear the pending bit */
- val &= ~(1 << 12);
- apic_send_ipi(apic, val, kvm_lapic_get_reg(apic, APIC_ICR2));
+ val &= ~APIC_ICR_BUSY;
+ kvm_apic_send_ipi(apic, val, kvm_lapic_get_reg(apic, APIC_ICR2));
kvm_lapic_set_reg(apic, APIC_ICR, val);
break;
-
case APIC_ICR2:
- if (!apic_x2apic_mode(apic))
- val &= 0xff000000;
- kvm_lapic_set_reg(apic, APIC_ICR2, val);
+ if (apic_x2apic_mode(apic))
+ ret = 1;
+ else
+ kvm_lapic_set_reg(apic, APIC_ICR2, val & 0xff000000);
break;
case APIC_LVT0:
apic_manage_nmi_watchdog(apic, val);
- /* fall through */
+ fallthrough;
case APIC_LVTTHMR:
case APIC_LVTPC:
case APIC_LVT1:
- case APIC_LVTERR: {
- /* TODO: Check vector */
- size_t size;
- u32 index;
-
+ case APIC_LVTERR:
+ case APIC_LVTCMCI: {
+ u32 index = get_lvt_index(reg);
+ if (!kvm_lapic_lvt_supported(apic, index)) {
+ ret = 1;
+ break;
+ }
if (!kvm_apic_sw_enabled(apic))
val |= APIC_LVT_MASKED;
- size = ARRAY_SIZE(apic_lvt_mask);
- index = array_index_nospec(
- (reg - APIC_LVTT) >> 4, size);
val &= apic_lvt_mask[index];
kvm_lapic_set_reg(apic, reg, val);
break;
@@ -1983,7 +2195,7 @@ int kvm_lapic_reg_write(struct kvm_lapic *apic, u32 reg, u32 val)
if (apic_lvtt_tscdeadline(apic))
break;
- hrtimer_cancel(&apic->lapic_timer.timer);
+ cancel_apic_timer(apic);
kvm_lapic_set_reg(apic, APIC_TMICT, val);
start_apic_timer(apic);
break;
@@ -1991,7 +2203,7 @@ int kvm_lapic_reg_write(struct kvm_lapic *apic, u32 reg, u32 val)
case APIC_TDCR: {
uint32_t old_divisor = apic->divide_count;
- kvm_lapic_set_reg(apic, APIC_TDCR, val);
+ kvm_lapic_set_reg(apic, APIC_TDCR, val & 0xb);
update_divide_count(apic);
if (apic->divide_count != old_divisor &&
apic->lapic_timer.period) {
@@ -2007,9 +2219,9 @@ int kvm_lapic_reg_write(struct kvm_lapic *apic, u32 reg, u32 val)
break;
case APIC_SELF_IPI:
- if (apic_x2apic_mode(apic)) {
- kvm_lapic_reg_write(apic, APIC_ICR, 0x40000 | (val & 0xff));
- } else
+ if (apic_x2apic_mode(apic))
+ kvm_apic_send_ipi(apic, APIC_DEST_SELF | (val & APIC_VECTOR_MASK), 0);
+ else
ret = 1;
break;
default:
@@ -2017,9 +2229,15 @@ int kvm_lapic_reg_write(struct kvm_lapic *apic, u32 reg, u32 val)
break;
}
+ /*
+ * Recalculate APIC maps if necessary, e.g. if the software enable bit
+ * was toggled, the APIC ID changed, etc... The maps are marked dirty
+ * on relevant changes, i.e. this is a nop for most writes.
+ */
+ kvm_recalculate_apic_map(apic->vcpu->kvm);
+
return ret;
}
-EXPORT_SYMBOL_GPL(kvm_lapic_reg_write);
static int apic_mmio_write(struct kvm_vcpu *vcpu, struct kvm_io_device *this,
gpa_t address, int len, const void *data)
@@ -2063,15 +2281,28 @@ EXPORT_SYMBOL_GPL(kvm_lapic_set_eoi);
/* emulate APIC access in a trap manner */
void kvm_apic_write_nodecode(struct kvm_vcpu *vcpu, u32 offset)
{
- u32 val = 0;
-
- /* hw has done the conditional check and inst decode */
- offset &= 0xff0;
+ struct kvm_lapic *apic = vcpu->arch.apic;
+ u64 val;
- kvm_lapic_reg_read(vcpu->arch.apic, offset, 4, &val);
+ if (apic_x2apic_mode(apic)) {
+ if (KVM_BUG_ON(kvm_lapic_msr_read(apic, offset, &val), vcpu->kvm))
+ return;
+ } else {
+ val = kvm_lapic_get_reg(apic, offset);
+ }
- /* TODO: optimize to just emulate side effect w/o one more write */
- kvm_lapic_reg_write(vcpu->arch.apic, offset, val);
+ /*
+ * ICR is a single 64-bit register when x2APIC is enabled. For legacy
+ * xAPIC, ICR writes need to go down the common (slightly slower) path
+ * to get the upper half from ICR2.
+ */
+ if (apic_x2apic_mode(apic) && offset == APIC_ICR) {
+ kvm_apic_send_ipi(apic, (u32)val, (u32)(val >> 32));
+ trace_kvm_apic_write(APIC_ICR, val);
+ } else {
+ /* TODO: optimize to just emulate side effect w/o one more write */
+ kvm_lapic_reg_write(apic, offset, (u32)val);
+ }
}
EXPORT_SYMBOL_GPL(kvm_apic_write_nodecode);
@@ -2085,10 +2316,10 @@ void kvm_free_lapic(struct kvm_vcpu *vcpu)
hrtimer_cancel(&apic->lapic_timer.timer);
if (!(vcpu->arch.apic_base & MSR_IA32_APICBASE_ENABLE))
- static_key_slow_dec_deferred(&apic_hw_disabled);
+ static_branch_slow_dec_deferred(&apic_hw_disabled);
if (!apic->sw_enabled)
- static_key_slow_dec_deferred(&apic_sw_disabled);
+ static_branch_slow_dec_deferred(&apic_sw_disabled);
if (apic->regs)
free_page((unsigned long)apic->regs);
@@ -2105,8 +2336,7 @@ u64 kvm_get_lapic_tscdeadline_msr(struct kvm_vcpu *vcpu)
{
struct kvm_lapic *apic = vcpu->arch.apic;
- if (!lapic_in_kernel(vcpu) ||
- !apic_lvtt_tscdeadline(apic))
+ if (!kvm_apic_present(vcpu) || !apic_lvtt_tscdeadline(apic))
return 0;
return apic->lapic_timer.tscdeadline;
@@ -2116,8 +2346,7 @@ void kvm_set_lapic_tscdeadline_msr(struct kvm_vcpu *vcpu, u64 data)
{
struct kvm_lapic *apic = vcpu->arch.apic;
- if (!lapic_in_kernel(vcpu) || apic_lvtt_oneshot(apic) ||
- apic_lvtt_period(apic))
+ if (!kvm_apic_present(vcpu) || !apic_lvtt_tscdeadline(apic))
return;
hrtimer_cancel(&apic->lapic_timer.timer);
@@ -2127,10 +2356,7 @@ void kvm_set_lapic_tscdeadline_msr(struct kvm_vcpu *vcpu, u64 data)
void kvm_lapic_set_tpr(struct kvm_vcpu *vcpu, unsigned long cr8)
{
- struct kvm_lapic *apic = vcpu->arch.apic;
-
- apic_set_tpr(apic, ((cr8 & 0x0f) << 4)
- | (kvm_lapic_get_reg(apic, APIC_TASKPRI) & 4));
+ apic_set_tpr(vcpu->arch.apic, (cr8 & 0x0f) << 4);
}
u64 kvm_lapic_get_cr8(struct kvm_vcpu *vcpu)
@@ -2147,13 +2373,10 @@ void kvm_lapic_set_base(struct kvm_vcpu *vcpu, u64 value)
u64 old_value = vcpu->arch.apic_base;
struct kvm_lapic *apic = vcpu->arch.apic;
- if (!apic)
- value |= MSR_IA32_APICBASE_BSP;
-
vcpu->arch.apic_base = value;
if ((old_value ^ value) & MSR_IA32_APICBASE_ENABLE)
- kvm_update_cpuid(vcpu);
+ kvm_update_cpuid_runtime(vcpu);
if (!apic)
return;
@@ -2162,37 +2385,48 @@ void kvm_lapic_set_base(struct kvm_vcpu *vcpu, u64 value)
if ((old_value ^ value) & MSR_IA32_APICBASE_ENABLE) {
if (value & MSR_IA32_APICBASE_ENABLE) {
kvm_apic_set_xapic_id(apic, vcpu->vcpu_id);
- static_key_slow_dec_deferred(&apic_hw_disabled);
+ static_branch_slow_dec_deferred(&apic_hw_disabled);
+ /* Check if there are APF page ready requests pending */
+ kvm_make_request(KVM_REQ_APF_READY, vcpu);
} else {
- static_key_slow_inc(&apic_hw_disabled.key);
- recalculate_apic_map(vcpu->kvm);
+ static_branch_inc(&apic_hw_disabled.key);
+ atomic_set_release(&apic->vcpu->kvm->arch.apic_map_dirty, DIRTY);
}
}
if (((old_value ^ value) & X2APIC_ENABLE) && (value & X2APIC_ENABLE))
kvm_apic_set_x2apic_id(apic, vcpu->vcpu_id);
- if ((old_value ^ value) & (MSR_IA32_APICBASE_ENABLE | X2APIC_ENABLE))
- kvm_x86_ops->set_virtual_apic_mode(vcpu);
+ if ((old_value ^ value) & (MSR_IA32_APICBASE_ENABLE | X2APIC_ENABLE)) {
+ kvm_vcpu_update_apicv(vcpu);
+ static_call_cond(kvm_x86_set_virtual_apic_mode)(vcpu);
+ }
apic->base_address = apic->vcpu->arch.apic_base &
MSR_IA32_APICBASE_BASE;
if ((value & MSR_IA32_APICBASE_ENABLE) &&
- apic->base_address != APIC_DEFAULT_PHYS_BASE)
- pr_warn_once("APIC base relocation is unsupported by KVM");
+ apic->base_address != APIC_DEFAULT_PHYS_BASE) {
+ kvm_set_apicv_inhibit(apic->vcpu->kvm,
+ APICV_INHIBIT_REASON_APIC_BASE_MODIFIED);
+ }
}
void kvm_apic_update_apicv(struct kvm_vcpu *vcpu)
{
struct kvm_lapic *apic = vcpu->arch.apic;
- if (vcpu->arch.apicv_active) {
+ if (apic->apicv_active) {
/* irr_pending is always true when apicv is activated. */
apic->irr_pending = true;
apic->isr_count = 1;
} else {
- apic->irr_pending = (apic_search_irr(apic) != -1);
+ /*
+ * Don't clear irr_pending, searching the IRR can race with
+ * updates from the CPU as APICv is still active from hardware's
+ * perspective. The flag will be cleared as appropriate when
+ * KVM injects the interrupt.
+ */
apic->isr_count = count_vectors(apic->regs + APIC_ISR);
}
}
@@ -2201,23 +2435,29 @@ EXPORT_SYMBOL_GPL(kvm_apic_update_apicv);
void kvm_lapic_reset(struct kvm_vcpu *vcpu, bool init_event)
{
struct kvm_lapic *apic = vcpu->arch.apic;
+ u64 msr_val;
int i;
+ if (!init_event) {
+ msr_val = APIC_DEFAULT_PHYS_BASE | MSR_IA32_APICBASE_ENABLE;
+ if (kvm_vcpu_is_reset_bsp(vcpu))
+ msr_val |= MSR_IA32_APICBASE_BSP;
+ kvm_lapic_set_base(vcpu, msr_val);
+ }
+
if (!apic)
return;
/* Stop the timer in case it's a reset to an active apic */
hrtimer_cancel(&apic->lapic_timer.timer);
- if (!init_event) {
- kvm_lapic_set_base(vcpu, APIC_DEFAULT_PHYS_BASE |
- MSR_IA32_APICBASE_ENABLE);
+ /* The xAPIC ID is set at RESET even if the APIC was already enabled. */
+ if (!init_event)
kvm_apic_set_xapic_id(apic, vcpu->vcpu_id);
- }
kvm_apic_set_version(apic->vcpu);
- for (i = 0; i < KVM_APIC_LVT_NUM; i++)
- kvm_lapic_set_reg(apic, APIC_LVTT + 0x10 * i, APIC_LVT_MASKED);
+ for (i = 0; i < apic->nr_lvt_entries; i++)
+ kvm_lapic_set_reg(apic, APIC_LVTx(i), APIC_LVT_MASKED);
apic_update_lvtt(apic);
if (kvm_vcpu_is_reset_bsp(vcpu) &&
kvm_check_has_quirk(vcpu->kvm, KVM_X86_QUIRK_LINT0_REENABLED))
@@ -2225,14 +2465,18 @@ void kvm_lapic_reset(struct kvm_vcpu *vcpu, bool init_event)
SET_APIC_DELIVERY_MODE(0, APIC_MODE_EXTINT));
apic_manage_nmi_watchdog(apic, kvm_lapic_get_reg(apic, APIC_LVT0));
- kvm_lapic_set_reg(apic, APIC_DFR, 0xffffffffU);
+ kvm_apic_set_dfr(apic, 0xffffffffU);
apic_set_spiv(apic, 0xff);
kvm_lapic_set_reg(apic, APIC_TASKPRI, 0);
if (!apic_x2apic_mode(apic))
kvm_apic_set_ldr(apic, 0);
kvm_lapic_set_reg(apic, APIC_ESR, 0);
- kvm_lapic_set_reg(apic, APIC_ICR, 0);
- kvm_lapic_set_reg(apic, APIC_ICR2, 0);
+ if (!apic_x2apic_mode(apic)) {
+ kvm_lapic_set_reg(apic, APIC_ICR, 0);
+ kvm_lapic_set_reg(apic, APIC_ICR2, 0);
+ } else {
+ kvm_lapic_set_reg64(apic, APIC_ICR, 0);
+ }
kvm_lapic_set_reg(apic, APIC_TDCR, 0);
kvm_lapic_set_reg(apic, APIC_TMICT, 0);
for (i = 0; i < 8; i++) {
@@ -2244,19 +2488,19 @@ void kvm_lapic_reset(struct kvm_vcpu *vcpu, bool init_event)
apic->highest_isr_cache = -1;
update_divide_count(apic);
atomic_set(&apic->lapic_timer.pending, 0);
- if (kvm_vcpu_is_bsp(vcpu))
- kvm_lapic_set_base(vcpu,
- vcpu->arch.apic_base | MSR_IA32_APICBASE_BSP);
+
vcpu->arch.pv_eoi.msr_val = 0;
apic_update_ppr(apic);
- if (vcpu->arch.apicv_active) {
- kvm_x86_ops->apicv_post_state_restore(vcpu);
- kvm_x86_ops->hwapic_irr_update(vcpu, -1);
- kvm_x86_ops->hwapic_isr_update(vcpu, -1);
+ if (apic->apicv_active) {
+ static_call_cond(kvm_x86_apicv_post_state_restore)(vcpu);
+ static_call_cond(kvm_x86_hwapic_irr_update)(vcpu, -1);
+ static_call_cond(kvm_x86_hwapic_isr_update)(-1);
}
vcpu->arch.apic_arb_prio = 0;
vcpu->arch.apic_attention = 0;
+
+ kvm_recalculate_apic_map(vcpu->kvm);
}
/*
@@ -2313,7 +2557,7 @@ static enum hrtimer_restart apic_timer_fn(struct hrtimer *data)
struct kvm_timer *ktimer = container_of(data, struct kvm_timer, timer);
struct kvm_lapic *apic = container_of(ktimer, struct kvm_lapic, lapic_timer);
- apic_timer_expired(apic);
+ apic_timer_expired(apic, true);
if (lapic_is_periodic(apic)) {
advance_periodic_target_expiration(apic);
@@ -2343,6 +2587,8 @@ int kvm_create_lapic(struct kvm_vcpu *vcpu, int timer_advance_ns)
}
apic->vcpu = vcpu;
+ apic->nr_lvt_entries = kvm_apic_calc_nr_lvt_entries(vcpu);
+
hrtimer_init(&apic->lapic_timer.timer, CLOCK_MONOTONIC,
HRTIMER_MODE_ABS_HARD);
apic->lapic_timer.timer.function = apic_timer_fn;
@@ -2355,11 +2601,11 @@ int kvm_create_lapic(struct kvm_vcpu *vcpu, int timer_advance_ns)
}
/*
- * APIC is created enabled. This will prevent kvm_lapic_set_base from
- * thinking that APIC state has changed.
+ * Stuff the APIC ENABLE bit in lieu of temporarily incrementing
+ * apic_hw_disabled; the full RESET value is set by kvm_lapic_reset().
*/
vcpu->arch.apic_base = MSR_IA32_APICBASE_ENABLE;
- static_key_slow_inc(&apic_sw_disabled.key); /* sw disabled at reset */
+ static_branch_inc(&apic_sw_disabled.key); /* sw disabled at reset */
kvm_iodevice_init(&apic->dev, &apic_mmio_ops);
return 0;
@@ -2375,12 +2621,13 @@ int kvm_apic_has_interrupt(struct kvm_vcpu *vcpu)
struct kvm_lapic *apic = vcpu->arch.apic;
u32 ppr;
- if (!kvm_apic_hw_enabled(apic))
+ if (!kvm_apic_present(vcpu))
return -1;
__apic_update_ppr(apic, &ppr);
return apic_has_interrupt_for_ppr(apic, ppr);
}
+EXPORT_SYMBOL_GPL(kvm_apic_has_interrupt);
int kvm_apic_accept_pic_intr(struct kvm_vcpu *vcpu)
{
@@ -2421,7 +2668,7 @@ int kvm_get_apic_interrupt(struct kvm_vcpu *vcpu)
*/
apic_clear_irr(vector, apic);
- if (test_bit(vector, vcpu_to_synic(vcpu)->auto_eoi_bitmap)) {
+ if (to_hv_vcpu(vcpu) && test_bit(vector, to_hv_synic(vcpu)->auto_eoi_bitmap)) {
/*
* For auto-EOI interrupts, there might be another pending
* interrupt above PPR, so check whether to raise another
@@ -2448,6 +2695,7 @@ static int kvm_apic_state_fixup(struct kvm_vcpu *vcpu,
if (apic_x2apic_mode(vcpu->arch.apic)) {
u32 *id = (u32 *)(s->regs + APIC_ID);
u32 *ldr = (u32 *)(s->regs + APIC_LDR);
+ u64 icr;
if (vcpu->kvm->arch.x2apic_format) {
if (*id != vcpu->vcpu_id)
@@ -2459,9 +2707,23 @@ static int kvm_apic_state_fixup(struct kvm_vcpu *vcpu,
*id <<= 24;
}
- /* In x2APIC mode, the LDR is fixed and based on the id */
- if (set)
+ /*
+ * In x2APIC mode, the LDR is fixed and based on the id. And
+ * ICR is internally a single 64-bit register, but needs to be
+ * split to ICR+ICR2 in userspace for backwards compatibility.
+ */
+ if (set) {
*ldr = kvm_apic_calc_x2apic_ldr(*id);
+
+ icr = __kvm_lapic_get_reg(s->regs, APIC_ICR) |
+ (u64)__kvm_lapic_get_reg(s->regs, APIC_ICR2) << 32;
+ __kvm_lapic_set_reg64(s->regs, APIC_ICR, icr);
+ } else {
+ icr = __kvm_lapic_get_reg64(s->regs, APIC_ICR);
+ __kvm_lapic_set_reg(s->regs, APIC_ICR2, icr >> 32);
+ }
+ } else {
+ kvm_lapic_xapic_id_updated(vcpu->arch.apic);
}
return 0;
@@ -2470,6 +2732,14 @@ static int kvm_apic_state_fixup(struct kvm_vcpu *vcpu,
int kvm_apic_get_state(struct kvm_vcpu *vcpu, struct kvm_lapic_state *s)
{
memcpy(s->regs, vcpu->arch.apic->regs, sizeof(*s));
+
+ /*
+ * Get calculated timer current count for remaining timer period (if
+ * any) and store it in the returned register set.
+ */
+ __kvm_lapic_set_reg(s->regs, APIC_TMCCT,
+ __apic_read(vcpu->arch.apic, APIC_TMCCT));
+
return kvm_apic_state_fixup(vcpu, s, false);
}
@@ -2478,33 +2748,35 @@ int kvm_apic_set_state(struct kvm_vcpu *vcpu, struct kvm_lapic_state *s)
struct kvm_lapic *apic = vcpu->arch.apic;
int r;
-
kvm_lapic_set_base(vcpu, vcpu->arch.apic_base);
/* set SPIV separately to get count of SW disabled APICs right */
apic_set_spiv(apic, *((u32 *)(s->regs + APIC_SPIV)));
r = kvm_apic_state_fixup(vcpu, s, true);
- if (r)
+ if (r) {
+ kvm_recalculate_apic_map(vcpu->kvm);
return r;
+ }
memcpy(vcpu->arch.apic->regs, s->regs, sizeof(*s));
- recalculate_apic_map(vcpu->kvm);
+ atomic_set_release(&apic->vcpu->kvm->arch.apic_map_dirty, DIRTY);
+ kvm_recalculate_apic_map(vcpu->kvm);
kvm_apic_set_version(vcpu);
apic_update_ppr(apic);
- hrtimer_cancel(&apic->lapic_timer.timer);
+ cancel_apic_timer(apic);
+ apic->lapic_timer.expired_tscdeadline = 0;
apic_update_lvtt(apic);
apic_manage_nmi_watchdog(apic, kvm_lapic_get_reg(apic, APIC_LVT0));
update_divide_count(apic);
- start_apic_timer(apic);
+ __start_apic_timer(apic, APIC_TMCCT);
+ kvm_lapic_set_reg(apic, APIC_TMCCT, 0);
kvm_apic_update_apicv(vcpu);
apic->highest_isr_cache = -1;
- if (vcpu->arch.apicv_active) {
- kvm_x86_ops->apicv_post_state_restore(vcpu);
- kvm_x86_ops->hwapic_irr_update(vcpu,
- apic_find_highest_irr(apic));
- kvm_x86_ops->hwapic_isr_update(vcpu,
- apic_find_highest_isr(apic));
+ if (apic->apicv_active) {
+ static_call_cond(kvm_x86_apicv_post_state_restore)(vcpu);
+ static_call_cond(kvm_x86_hwapic_irr_update)(vcpu, apic_find_highest_irr(apic));
+ static_call_cond(kvm_x86_hwapic_isr_update)(apic_find_highest_isr(apic));
}
kvm_make_request(KVM_REQ_EVENT, vcpu);
if (ioapic_in_kernel(vcpu->kvm))
@@ -2538,7 +2810,6 @@ void __kvm_migrate_apic_timer(struct kvm_vcpu *vcpu)
static void apic_sync_pv_eoi_from_guest(struct kvm_vcpu *vcpu,
struct kvm_lapic *apic)
{
- bool pending;
int vector;
/*
* PV EOI state is derived from KVM_APIC_PV_EOI_PENDING in host
@@ -2552,14 +2823,8 @@ static void apic_sync_pv_eoi_from_guest(struct kvm_vcpu *vcpu,
* -> host enabled PV EOI, guest executed EOI.
*/
BUG_ON(!pv_eoi_enabled(vcpu));
- pending = pv_eoi_get_pending(vcpu);
- /*
- * Clear pending bit in any case: it will be set again on vmentry.
- * While this might not be ideal from performance point of view,
- * this makes sure pv eoi is only enabled when we know it's safe.
- */
- pv_eoi_clr_pending(vcpu);
- if (pending)
+
+ if (pv_eoi_test_and_clr_pending(vcpu))
return;
vector = apic_set_eoi(apic);
trace_kvm_pv_eoi(apic, vector);
@@ -2648,147 +2913,167 @@ int kvm_lapic_set_vapic_addr(struct kvm_vcpu *vcpu, gpa_t vapic_addr)
return 0;
}
-int kvm_x2apic_msr_write(struct kvm_vcpu *vcpu, u32 msr, u64 data)
+int kvm_x2apic_icr_write(struct kvm_lapic *apic, u64 data)
{
- struct kvm_lapic *apic = vcpu->arch.apic;
- u32 reg = (msr - APIC_BASE_MSR) << 4;
+ data &= ~APIC_ICR_BUSY;
- if (!lapic_in_kernel(vcpu) || !apic_x2apic_mode(apic))
- return 1;
+ kvm_apic_send_ipi(apic, (u32)data, (u32)(data >> 32));
+ kvm_lapic_set_reg64(apic, APIC_ICR, data);
+ trace_kvm_apic_write(APIC_ICR, data);
+ return 0;
+}
+
+static int kvm_lapic_msr_read(struct kvm_lapic *apic, u32 reg, u64 *data)
+{
+ u32 low;
- if (reg == APIC_ICR2)
+ if (reg == APIC_ICR) {
+ *data = kvm_lapic_get_reg64(apic, APIC_ICR);
+ return 0;
+ }
+
+ if (kvm_lapic_reg_read(apic, reg, 4, &low))
return 1;
- /* if this is ICR write vector before command */
+ *data = low;
+
+ return 0;
+}
+
+static int kvm_lapic_msr_write(struct kvm_lapic *apic, u32 reg, u64 data)
+{
+ /*
+ * ICR is a 64-bit register in x2APIC mode (and Hyper'v PV vAPIC) and
+ * can be written as such, all other registers remain accessible only
+ * through 32-bit reads/writes.
+ */
if (reg == APIC_ICR)
- kvm_lapic_reg_write(apic, APIC_ICR2, (u32)(data >> 32));
+ return kvm_x2apic_icr_write(apic, data);
+
return kvm_lapic_reg_write(apic, reg, (u32)data);
}
-int kvm_x2apic_msr_read(struct kvm_vcpu *vcpu, u32 msr, u64 *data)
+int kvm_x2apic_msr_write(struct kvm_vcpu *vcpu, u32 msr, u64 data)
{
struct kvm_lapic *apic = vcpu->arch.apic;
- u32 reg = (msr - APIC_BASE_MSR) << 4, low, high = 0;
+ u32 reg = (msr - APIC_BASE_MSR) << 4;
if (!lapic_in_kernel(vcpu) || !apic_x2apic_mode(apic))
return 1;
- if (reg == APIC_DFR || reg == APIC_ICR2)
- return 1;
+ return kvm_lapic_msr_write(apic, reg, data);
+}
- if (kvm_lapic_reg_read(apic, reg, 4, &low))
+int kvm_x2apic_msr_read(struct kvm_vcpu *vcpu, u32 msr, u64 *data)
+{
+ struct kvm_lapic *apic = vcpu->arch.apic;
+ u32 reg = (msr - APIC_BASE_MSR) << 4;
+
+ if (!lapic_in_kernel(vcpu) || !apic_x2apic_mode(apic))
return 1;
- if (reg == APIC_ICR)
- kvm_lapic_reg_read(apic, APIC_ICR2, 4, &high);
- *data = (((u64)high) << 32) | low;
+ if (reg == APIC_DFR)
+ return 1;
- return 0;
+ return kvm_lapic_msr_read(apic, reg, data);
}
int kvm_hv_vapic_msr_write(struct kvm_vcpu *vcpu, u32 reg, u64 data)
{
- struct kvm_lapic *apic = vcpu->arch.apic;
-
if (!lapic_in_kernel(vcpu))
return 1;
- /* if this is ICR write vector before command */
- if (reg == APIC_ICR)
- kvm_lapic_reg_write(apic, APIC_ICR2, (u32)(data >> 32));
- return kvm_lapic_reg_write(apic, reg, (u32)data);
+ return kvm_lapic_msr_write(vcpu->arch.apic, reg, data);
}
int kvm_hv_vapic_msr_read(struct kvm_vcpu *vcpu, u32 reg, u64 *data)
{
- struct kvm_lapic *apic = vcpu->arch.apic;
- u32 low, high = 0;
-
if (!lapic_in_kernel(vcpu))
return 1;
- if (kvm_lapic_reg_read(apic, reg, 4, &low))
- return 1;
- if (reg == APIC_ICR)
- kvm_lapic_reg_read(apic, APIC_ICR2, 4, &high);
-
- *data = (((u64)high) << 32) | low;
-
- return 0;
+ return kvm_lapic_msr_read(vcpu->arch.apic, reg, data);
}
-int kvm_lapic_enable_pv_eoi(struct kvm_vcpu *vcpu, u64 data, unsigned long len)
+int kvm_lapic_set_pv_eoi(struct kvm_vcpu *vcpu, u64 data, unsigned long len)
{
u64 addr = data & ~KVM_MSR_ENABLED;
struct gfn_to_hva_cache *ghc = &vcpu->arch.pv_eoi.data;
unsigned long new_len;
+ int ret;
if (!IS_ALIGNED(addr, 4))
return 1;
- vcpu->arch.pv_eoi.msr_val = data;
- if (!pv_eoi_enabled(vcpu))
- return 0;
+ if (data & KVM_MSR_ENABLED) {
+ if (addr == ghc->gpa && len <= ghc->len)
+ new_len = ghc->len;
+ else
+ new_len = len;
- if (addr == ghc->gpa && len <= ghc->len)
- new_len = ghc->len;
- else
- new_len = len;
+ ret = kvm_gfn_to_hva_cache_init(vcpu->kvm, ghc, addr, new_len);
+ if (ret)
+ return ret;
+ }
+
+ vcpu->arch.pv_eoi.msr_val = data;
- return kvm_gfn_to_hva_cache_init(vcpu->kvm, ghc, addr, new_len);
+ return 0;
}
-void kvm_apic_accept_events(struct kvm_vcpu *vcpu)
+int kvm_apic_accept_events(struct kvm_vcpu *vcpu)
{
struct kvm_lapic *apic = vcpu->arch.apic;
u8 sipi_vector;
- unsigned long pe;
+ int r;
- if (!lapic_in_kernel(vcpu) || !apic->pending_events)
- return;
+ if (!kvm_apic_has_pending_init_or_sipi(vcpu))
+ return 0;
+
+ if (is_guest_mode(vcpu)) {
+ r = kvm_check_nested_events(vcpu);
+ if (r < 0)
+ return r == -EBUSY ? 0 : r;
+ /*
+ * Continue processing INIT/SIPI even if a nested VM-Exit
+ * occurred, e.g. pending SIPIs should be dropped if INIT+SIPI
+ * are blocked as a result of transitioning to VMX root mode.
+ */
+ }
/*
- * INITs are latched while CPU is in specific states
- * (SMM, VMX non-root mode, SVM with GIF=0).
- * Because a CPU cannot be in these states immediately
- * after it has processed an INIT signal (and thus in
- * KVM_MP_STATE_INIT_RECEIVED state), just eat SIPIs
- * and leave the INIT pending.
+ * INITs are blocked while CPU is in specific states (SMM, VMX root
+ * mode, SVM with GIF=0), while SIPIs are dropped if the CPU isn't in
+ * wait-for-SIPI (WFS).
*/
- if (kvm_vcpu_latch_init(vcpu)) {
+ if (!kvm_apic_init_sipi_allowed(vcpu)) {
WARN_ON_ONCE(vcpu->arch.mp_state == KVM_MP_STATE_INIT_RECEIVED);
- if (test_bit(KVM_APIC_SIPI, &apic->pending_events))
- clear_bit(KVM_APIC_SIPI, &apic->pending_events);
- return;
+ clear_bit(KVM_APIC_SIPI, &apic->pending_events);
+ return 0;
}
- pe = xchg(&apic->pending_events, 0);
- if (test_bit(KVM_APIC_INIT, &pe)) {
+ if (test_and_clear_bit(KVM_APIC_INIT, &apic->pending_events)) {
kvm_vcpu_reset(vcpu, true);
if (kvm_vcpu_is_bsp(apic->vcpu))
vcpu->arch.mp_state = KVM_MP_STATE_RUNNABLE;
else
vcpu->arch.mp_state = KVM_MP_STATE_INIT_RECEIVED;
}
- if (test_bit(KVM_APIC_SIPI, &pe) &&
- vcpu->arch.mp_state == KVM_MP_STATE_INIT_RECEIVED) {
- /* evaluate pending_events before reading the vector */
- smp_rmb();
- sipi_vector = apic->sipi_vector;
- kvm_vcpu_deliver_sipi_vector(vcpu, sipi_vector);
- vcpu->arch.mp_state = KVM_MP_STATE_RUNNABLE;
+ if (test_and_clear_bit(KVM_APIC_SIPI, &apic->pending_events)) {
+ if (vcpu->arch.mp_state == KVM_MP_STATE_INIT_RECEIVED) {
+ /* evaluate pending_events before reading the vector */
+ smp_rmb();
+ sipi_vector = apic->sipi_vector;
+ static_call(kvm_x86_vcpu_deliver_sipi_vector)(vcpu, sipi_vector);
+ vcpu->arch.mp_state = KVM_MP_STATE_RUNNABLE;
+ }
}
-}
-
-void kvm_lapic_init(void)
-{
- /* do not patch jump label more than once per second */
- jump_label_rate_limit(&apic_hw_disabled, HZ);
- jump_label_rate_limit(&apic_sw_disabled, HZ);
+ return 0;
}
void kvm_lapic_exit(void)
{
static_key_deferred_flush(&apic_hw_disabled);
+ WARN_ON(static_branch_unlikely(&apic_hw_disabled.key));
static_key_deferred_flush(&apic_sw_disabled);
+ WARN_ON(static_branch_unlikely(&apic_sw_disabled.key));
}