aboutsummaryrefslogtreecommitdiffstats
path: root/arch/x86/kvm/x86.c
diff options
context:
space:
mode:
Diffstat (limited to 'arch/x86/kvm/x86.c')
-rw-r--r--arch/x86/kvm/x86.c115
1 files changed, 79 insertions, 36 deletions
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index a0d1fc80ac5a..b9591abde62a 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -136,10 +136,14 @@ EXPORT_SYMBOL_GPL(kvm_default_tsc_scaling_ratio);
static u32 __read_mostly tsc_tolerance_ppm = 250;
module_param(tsc_tolerance_ppm, uint, S_IRUGO | S_IWUSR);
-/* lapic timer advance (tscdeadline mode only) in nanoseconds */
-unsigned int __read_mostly lapic_timer_advance_ns = 1000;
+/*
+ * lapic timer advance (tscdeadline mode only) in nanoseconds. '-1' enables
+ * adaptive tuning starting from default advancment of 1000ns. '0' disables
+ * advancement entirely. Any other value is used as-is and disables adaptive
+ * tuning, i.e. allows priveleged userspace to set an exact advancement time.
+ */
+static int __read_mostly lapic_timer_advance_ns = -1;
module_param(lapic_timer_advance_ns, uint, S_IRUGO | S_IWUSR);
-EXPORT_SYMBOL_GPL(lapic_timer_advance_ns);
static bool __read_mostly vector_hashing = true;
module_param(vector_hashing, bool, S_IRUGO);
@@ -3677,15 +3681,15 @@ static void fill_xsave(u8 *dest, struct kvm_vcpu *vcpu)
*/
valid = xstate_bv & ~XFEATURE_MASK_FPSSE;
while (valid) {
- u64 feature = valid & -valid;
- int index = fls64(feature) - 1;
- void *src = get_xsave_addr(xsave, feature);
+ u64 xfeature_mask = valid & -valid;
+ int xfeature_nr = fls64(xfeature_mask) - 1;
+ void *src = get_xsave_addr(xsave, xfeature_nr);
if (src) {
u32 size, offset, ecx, edx;
- cpuid_count(XSTATE_CPUID, index,
+ cpuid_count(XSTATE_CPUID, xfeature_nr,
&size, &offset, &ecx, &edx);
- if (feature == XFEATURE_MASK_PKRU)
+ if (xfeature_nr == XFEATURE_PKRU)
memcpy(dest + offset, &vcpu->arch.pkru,
sizeof(vcpu->arch.pkru));
else
@@ -3693,7 +3697,7 @@ static void fill_xsave(u8 *dest, struct kvm_vcpu *vcpu)
}
- valid -= feature;
+ valid -= xfeature_mask;
}
}
@@ -3720,22 +3724,22 @@ static void load_xsave(struct kvm_vcpu *vcpu, u8 *src)
*/
valid = xstate_bv & ~XFEATURE_MASK_FPSSE;
while (valid) {
- u64 feature = valid & -valid;
- int index = fls64(feature) - 1;
- void *dest = get_xsave_addr(xsave, feature);
+ u64 xfeature_mask = valid & -valid;
+ int xfeature_nr = fls64(xfeature_mask) - 1;
+ void *dest = get_xsave_addr(xsave, xfeature_nr);
if (dest) {
u32 size, offset, ecx, edx;
- cpuid_count(XSTATE_CPUID, index,
+ cpuid_count(XSTATE_CPUID, xfeature_nr,
&size, &offset, &ecx, &edx);
- if (feature == XFEATURE_MASK_PKRU)
+ if (xfeature_nr == XFEATURE_PKRU)
memcpy(&vcpu->arch.pkru, src + offset,
sizeof(vcpu->arch.pkru));
else
memcpy(dest, src + offset, size);
}
- valid -= feature;
+ valid -= xfeature_mask;
}
}
@@ -6535,6 +6539,12 @@ int kvm_emulate_instruction_from_buffer(struct kvm_vcpu *vcpu,
}
EXPORT_SYMBOL_GPL(kvm_emulate_instruction_from_buffer);
+static int complete_fast_pio_out_port_0x7e(struct kvm_vcpu *vcpu)
+{
+ vcpu->arch.pio.count = 0;
+ return 1;
+}
+
static int complete_fast_pio_out(struct kvm_vcpu *vcpu)
{
vcpu->arch.pio.count = 0;
@@ -6551,12 +6561,23 @@ static int kvm_fast_pio_out(struct kvm_vcpu *vcpu, int size,
unsigned long val = kvm_register_read(vcpu, VCPU_REGS_RAX);
int ret = emulator_pio_out_emulated(&vcpu->arch.emulate_ctxt,
size, port, &val, 1);
+ if (ret)
+ return ret;
- if (!ret) {
+ /*
+ * Workaround userspace that relies on old KVM behavior of %rip being
+ * incremented prior to exiting to userspace to handle "OUT 0x7e".
+ */
+ if (port == 0x7e &&
+ kvm_check_has_quirk(vcpu->kvm, KVM_X86_QUIRK_OUT_7E_INC_RIP)) {
+ vcpu->arch.complete_userspace_io =
+ complete_fast_pio_out_port_0x7e;
+ kvm_skip_emulated_instruction(vcpu);
+ } else {
vcpu->arch.pio.linear_rip = kvm_get_linear_rip(vcpu);
vcpu->arch.complete_userspace_io = complete_fast_pio_out;
}
- return ret;
+ return 0;
}
static int complete_fast_pio_in(struct kvm_vcpu *vcpu)
@@ -6677,10 +6698,8 @@ static void kvm_hyperv_tsc_notifier(void)
}
#endif
-static int kvmclock_cpufreq_notifier(struct notifier_block *nb, unsigned long val,
- void *data)
+static void __kvmclock_cpufreq_notifier(struct cpufreq_freqs *freq, int cpu)
{
- struct cpufreq_freqs *freq = data;
struct kvm *kvm;
struct kvm_vcpu *vcpu;
int i, send_ipi = 0;
@@ -6724,17 +6743,12 @@ static int kvmclock_cpufreq_notifier(struct notifier_block *nb, unsigned long va
*
*/
- if (val == CPUFREQ_PRECHANGE && freq->old > freq->new)
- return 0;
- if (val == CPUFREQ_POSTCHANGE && freq->old < freq->new)
- return 0;
-
- smp_call_function_single(freq->cpu, tsc_khz_changed, freq, 1);
+ smp_call_function_single(cpu, tsc_khz_changed, freq, 1);
spin_lock(&kvm_lock);
list_for_each_entry(kvm, &vm_list, vm_list) {
kvm_for_each_vcpu(i, vcpu, kvm) {
- if (vcpu->cpu != freq->cpu)
+ if (vcpu->cpu != cpu)
continue;
kvm_make_request(KVM_REQ_CLOCK_UPDATE, vcpu);
if (vcpu->cpu != smp_processor_id())
@@ -6756,8 +6770,24 @@ static int kvmclock_cpufreq_notifier(struct notifier_block *nb, unsigned long va
* guest context is entered kvmclock will be updated,
* so the guest will not see stale values.
*/
- smp_call_function_single(freq->cpu, tsc_khz_changed, freq, 1);
+ smp_call_function_single(cpu, tsc_khz_changed, freq, 1);
}
+}
+
+static int kvmclock_cpufreq_notifier(struct notifier_block *nb, unsigned long val,
+ void *data)
+{
+ struct cpufreq_freqs *freq = data;
+ int cpu;
+
+ if (val == CPUFREQ_PRECHANGE && freq->old > freq->new)
+ return 0;
+ if (val == CPUFREQ_POSTCHANGE && freq->old < freq->new)
+ return 0;
+
+ for_each_cpu(cpu, freq->policy->cpus)
+ __kvmclock_cpufreq_notifier(freq, cpu);
+
return 0;
}
@@ -7873,10 +7903,15 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu)
}
trace_kvm_entry(vcpu->vcpu_id);
- if (lapic_timer_advance_ns)
+ if (lapic_in_kernel(vcpu) &&
+ vcpu->arch.apic->lapic_timer.timer_advance_ns)
wait_lapic_expire(vcpu);
guest_enter_irqoff();
+ fpregs_assert_state_consistent();
+ if (test_thread_flag(TIF_NEED_FPU_LOAD))
+ switch_fpu_return();
+
if (unlikely(vcpu->arch.switch_db_regs)) {
set_debugreg(0, 7);
set_debugreg(vcpu->arch.eff_db[0], 0);
@@ -8135,22 +8170,30 @@ static int complete_emulated_mmio(struct kvm_vcpu *vcpu)
/* Swap (qemu) user FPU context for the guest FPU context. */
static void kvm_load_guest_fpu(struct kvm_vcpu *vcpu)
{
- preempt_disable();
+ fpregs_lock();
+
copy_fpregs_to_fpstate(&current->thread.fpu);
/* PKRU is separately restored in kvm_x86_ops->run. */
__copy_kernel_to_fpregs(&vcpu->arch.guest_fpu->state,
~XFEATURE_MASK_PKRU);
- preempt_enable();
+
+ fpregs_mark_activate();
+ fpregs_unlock();
+
trace_kvm_fpu(1);
}
/* When vcpu_run ends, restore user space FPU context. */
static void kvm_put_guest_fpu(struct kvm_vcpu *vcpu)
{
- preempt_disable();
+ fpregs_lock();
+
copy_fpregs_to_fpstate(vcpu->arch.guest_fpu);
copy_kernel_to_fpregs(&current->thread.fpu.state);
- preempt_enable();
+
+ fpregs_mark_activate();
+ fpregs_unlock();
+
++vcpu->stat.fpu_reload;
trace_kvm_fpu(0);
}
@@ -8848,11 +8891,11 @@ void kvm_vcpu_reset(struct kvm_vcpu *vcpu, bool init_event)
if (init_event)
kvm_put_guest_fpu(vcpu);
mpx_state_buffer = get_xsave_addr(&vcpu->arch.guest_fpu->state.xsave,
- XFEATURE_MASK_BNDREGS);
+ XFEATURE_BNDREGS);
if (mpx_state_buffer)
memset(mpx_state_buffer, 0, sizeof(struct mpx_bndreg_state));
mpx_state_buffer = get_xsave_addr(&vcpu->arch.guest_fpu->state.xsave,
- XFEATURE_MASK_BNDCSR);
+ XFEATURE_BNDCSR);
if (mpx_state_buffer)
memset(mpx_state_buffer, 0, sizeof(struct mpx_bndcsr));
if (init_event)
@@ -9061,7 +9104,7 @@ int kvm_arch_vcpu_init(struct kvm_vcpu *vcpu)
if (irqchip_in_kernel(vcpu->kvm)) {
vcpu->arch.apicv_active = kvm_x86_ops->get_enable_apicv(vcpu);
- r = kvm_create_lapic(vcpu);
+ r = kvm_create_lapic(vcpu, lapic_timer_advance_ns);
if (r < 0)
goto fail_mmu_destroy;
} else