diff options
Diffstat (limited to 'arch/x86/kvm/svm.c')
-rw-r--r-- | arch/x86/kvm/svm.c | 310 |
1 files changed, 57 insertions, 253 deletions
diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c index fdb8cb63a6c0..83a1c643f9a5 100644 --- a/arch/x86/kvm/svm.c +++ b/arch/x86/kvm/svm.c @@ -158,7 +158,8 @@ struct vcpu_svm { unsigned long int3_rip; u32 apf_reason; - u64 tsc_ratio; + /* cached guest cpuid flags for faster access */ + bool nrips_enabled : 1; }; static DEFINE_PER_CPU(u64, current_tsc_ratio); @@ -202,6 +203,7 @@ module_param(npt, int, S_IRUGO); static int nested = true; module_param(nested, int, S_IRUGO); +static void svm_set_cr0(struct kvm_vcpu *vcpu, unsigned long cr0); static void svm_flush_tlb(struct kvm_vcpu *vcpu); static void svm_complete_interrupts(struct vcpu_svm *svm); @@ -210,7 +212,6 @@ static int nested_svm_intercept(struct vcpu_svm *svm); static int nested_svm_vmexit(struct vcpu_svm *svm); static int nested_svm_check_exception(struct vcpu_svm *svm, unsigned nr, bool has_error_code, u32 error_code); -static u64 __scale_tsc(u64 ratio, u64 tsc); enum { VMCB_INTERCEPTS, /* Intercept vectors, TSC offset, @@ -513,7 +514,7 @@ static void skip_emulated_instruction(struct kvm_vcpu *vcpu) struct vcpu_svm *svm = to_svm(vcpu); if (svm->vmcb->control.next_rip != 0) { - WARN_ON(!static_cpu_has(X86_FEATURE_NRIPS)); + WARN_ON_ONCE(!static_cpu_has(X86_FEATURE_NRIPS)); svm->next_rip = svm->vmcb->control.next_rip; } @@ -865,64 +866,6 @@ static void svm_disable_lbrv(struct vcpu_svm *svm) set_msr_interception(msrpm, MSR_IA32_LASTINTTOIP, 0, 0); } -#define MTRR_TYPE_UC_MINUS 7 -#define MTRR2PROTVAL_INVALID 0xff - -static u8 mtrr2protval[8]; - -static u8 fallback_mtrr_type(int mtrr) -{ - /* - * WT and WP aren't always available in the host PAT. Treat - * them as UC and UC- respectively. Everything else should be - * there. - */ - switch (mtrr) - { - case MTRR_TYPE_WRTHROUGH: - return MTRR_TYPE_UNCACHABLE; - case MTRR_TYPE_WRPROT: - return MTRR_TYPE_UC_MINUS; - default: - BUG(); - } -} - -static void build_mtrr2protval(void) -{ - int i; - u64 pat; - - for (i = 0; i < 8; i++) - mtrr2protval[i] = MTRR2PROTVAL_INVALID; - - /* Ignore the invalid MTRR types. */ - mtrr2protval[2] = 0; - mtrr2protval[3] = 0; - - /* - * Use host PAT value to figure out the mapping from guest MTRR - * values to nested page table PAT/PCD/PWT values. We do not - * want to change the host PAT value every time we enter the - * guest. - */ - rdmsrl(MSR_IA32_CR_PAT, pat); - for (i = 0; i < 8; i++) { - u8 mtrr = pat >> (8 * i); - - if (mtrr2protval[mtrr] == MTRR2PROTVAL_INVALID) - mtrr2protval[mtrr] = __cm_idx2pte(i); - } - - for (i = 0; i < 8; i++) { - if (mtrr2protval[i] == MTRR2PROTVAL_INVALID) { - u8 fallback = fallback_mtrr_type(i); - mtrr2protval[i] = mtrr2protval[fallback]; - BUG_ON(mtrr2protval[i] == MTRR2PROTVAL_INVALID); - } - } -} - static __init int svm_hardware_setup(void) { int cpu; @@ -948,20 +891,9 @@ static __init int svm_hardware_setup(void) kvm_enable_efer_bits(EFER_FFXSR); if (boot_cpu_has(X86_FEATURE_TSCRATEMSR)) { - u64 max; - kvm_has_tsc_control = true; - - /* - * Make sure the user can only configure tsc_khz values that - * fit into a signed integer. - * A min value is not calculated needed because it will always - * be 1 on all machines and a value of 0 is used to disable - * tsc-scaling for the vcpu. - */ - max = min(0x7fffffffULL, __scale_tsc(tsc_khz, TSC_RATIO_MAX)); - - kvm_max_guest_tsc_khz = max; + kvm_max_tsc_scaling_ratio = TSC_RATIO_MAX; + kvm_tsc_scaling_ratio_frac_bits = 32; } if (nested) { @@ -989,7 +921,6 @@ static __init int svm_hardware_setup(void) } else kvm_disable_tdp(); - build_mtrr2protval(); return 0; err: @@ -1026,68 +957,6 @@ static void init_sys_seg(struct vmcb_seg *seg, uint32_t type) seg->base = 0; } -static u64 __scale_tsc(u64 ratio, u64 tsc) -{ - u64 mult, frac, _tsc; - - mult = ratio >> 32; - frac = ratio & ((1ULL << 32) - 1); - - _tsc = tsc; - _tsc *= mult; - _tsc += (tsc >> 32) * frac; - _tsc += ((tsc & ((1ULL << 32) - 1)) * frac) >> 32; - - return _tsc; -} - -static u64 svm_scale_tsc(struct kvm_vcpu *vcpu, u64 tsc) -{ - struct vcpu_svm *svm = to_svm(vcpu); - u64 _tsc = tsc; - - if (svm->tsc_ratio != TSC_RATIO_DEFAULT) - _tsc = __scale_tsc(svm->tsc_ratio, tsc); - - return _tsc; -} - -static void svm_set_tsc_khz(struct kvm_vcpu *vcpu, u32 user_tsc_khz, bool scale) -{ - struct vcpu_svm *svm = to_svm(vcpu); - u64 ratio; - u64 khz; - - /* Guest TSC same frequency as host TSC? */ - if (!scale) { - svm->tsc_ratio = TSC_RATIO_DEFAULT; - return; - } - - /* TSC scaling supported? */ - if (!boot_cpu_has(X86_FEATURE_TSCRATEMSR)) { - if (user_tsc_khz > tsc_khz) { - vcpu->arch.tsc_catchup = 1; - vcpu->arch.tsc_always_catchup = 1; - } else - WARN(1, "user requested TSC rate below hardware speed\n"); - return; - } - - khz = user_tsc_khz; - - /* TSC scaling required - calculate ratio */ - ratio = khz << 32; - do_div(ratio, tsc_khz); - - if (ratio == 0 || ratio & TSC_RATIO_RSVD) { - WARN_ONCE(1, "Invalid TSC ratio - virtual-tsc-khz=%u\n", - user_tsc_khz); - return; - } - svm->tsc_ratio = ratio; -} - static u64 svm_read_tsc_offset(struct kvm_vcpu *vcpu) { struct vcpu_svm *svm = to_svm(vcpu); @@ -1114,16 +983,10 @@ static void svm_write_tsc_offset(struct kvm_vcpu *vcpu, u64 offset) mark_dirty(svm->vmcb, VMCB_INTERCEPTS); } -static void svm_adjust_tsc_offset(struct kvm_vcpu *vcpu, s64 adjustment, bool host) +static void svm_adjust_tsc_offset_guest(struct kvm_vcpu *vcpu, s64 adjustment) { struct vcpu_svm *svm = to_svm(vcpu); - if (host) { - if (svm->tsc_ratio != TSC_RATIO_DEFAULT) - WARN_ON(adjustment < 0); - adjustment = svm_scale_tsc(vcpu, (u64)adjustment); - } - svm->vmcb->control.tsc_offset += adjustment; if (is_guest_mode(vcpu)) svm->nested.hsave->control.tsc_offset += adjustment; @@ -1135,53 +998,7 @@ static void svm_adjust_tsc_offset(struct kvm_vcpu *vcpu, s64 adjustment, bool ho mark_dirty(svm->vmcb, VMCB_INTERCEPTS); } -static u64 svm_compute_tsc_offset(struct kvm_vcpu *vcpu, u64 target_tsc) -{ - u64 tsc; - - tsc = svm_scale_tsc(vcpu, rdtsc()); - - return target_tsc - tsc; -} - -static void svm_set_guest_pat(struct vcpu_svm *svm, u64 *g_pat) -{ - struct kvm_vcpu *vcpu = &svm->vcpu; - - /* Unlike Intel, AMD takes the guest's CR0.CD into account. - * - * AMD does not have IPAT. To emulate it for the case of guests - * with no assigned devices, just set everything to WB. If guests - * have assigned devices, however, we cannot force WB for RAM - * pages only, so use the guest PAT directly. - */ - if (!kvm_arch_has_assigned_device(vcpu->kvm)) - *g_pat = 0x0606060606060606; - else - *g_pat = vcpu->arch.pat; -} - -static u64 svm_get_mt_mask(struct kvm_vcpu *vcpu, gfn_t gfn, bool is_mmio) -{ - u8 mtrr; - - /* - * 1. MMIO: trust guest MTRR, so same as item 3. - * 2. No passthrough: always map as WB, and force guest PAT to WB as well - * 3. Passthrough: can't guarantee the result, try to trust guest. - */ - if (!is_mmio && !kvm_arch_has_assigned_device(vcpu->kvm)) - return 0; - - if (!kvm_check_has_quirk(vcpu->kvm, KVM_X86_QUIRK_CD_NW_CLEARED) && - kvm_read_cr0(vcpu) & X86_CR0_CD) - return _PAGE_NOCACHE; - - mtrr = kvm_mtrr_get_guest_memory_type(vcpu, gfn); - return mtrr2protval[mtrr]; -} - -static void init_vmcb(struct vcpu_svm *svm, bool init_event) +static void init_vmcb(struct vcpu_svm *svm) { struct vmcb_control_area *control = &svm->vmcb->control; struct vmcb_save_area *save = &svm->vmcb->save; @@ -1202,6 +1019,8 @@ static void init_vmcb(struct vcpu_svm *svm, bool init_event) set_exception_intercept(svm, PF_VECTOR); set_exception_intercept(svm, UD_VECTOR); set_exception_intercept(svm, MC_VECTOR); + set_exception_intercept(svm, AC_VECTOR); + set_exception_intercept(svm, DB_VECTOR); set_intercept(svm, INTERCEPT_INTR); set_intercept(svm, INTERCEPT_NMI); @@ -1252,8 +1071,7 @@ static void init_vmcb(struct vcpu_svm *svm, bool init_event) init_sys_seg(&save->ldtr, SEG_TYPE_LDT); init_sys_seg(&save->tr, SEG_TYPE_BUSY_TSS16); - if (!init_event) - svm_set_efer(&svm->vcpu, 0); + svm_set_efer(&svm->vcpu, 0); save->dr6 = 0xffff0ff0; kvm_set_rflags(&svm->vcpu, 2); save->rip = 0x0000fff0; @@ -1263,7 +1081,8 @@ static void init_vmcb(struct vcpu_svm *svm, bool init_event) * svm_set_cr0() sets PG and WP and clears NW and CD on save->cr0. * It also updates the guest-visible cr0 value. */ - (void)kvm_set_cr0(&svm->vcpu, X86_CR0_NW | X86_CR0_CD | X86_CR0_ET); + svm_set_cr0(&svm->vcpu, X86_CR0_NW | X86_CR0_CD | X86_CR0_ET); + kvm_mmu_reset_context(&svm->vcpu); save->cr4 = X86_CR4_PAE; /* rdx = ?? */ @@ -1276,7 +1095,6 @@ static void init_vmcb(struct vcpu_svm *svm, bool init_event) clr_cr_intercept(svm, INTERCEPT_CR3_READ); clr_cr_intercept(svm, INTERCEPT_CR3_WRITE); save->g_pat = svm->vcpu.arch.pat; - svm_set_guest_pat(svm, &save->g_pat); save->cr3 = 0; save->cr4 = 0; } @@ -1307,7 +1125,7 @@ static void svm_vcpu_reset(struct kvm_vcpu *vcpu, bool init_event) if (kvm_vcpu_is_reset_bsp(&svm->vcpu)) svm->vcpu.arch.apic_base |= MSR_IA32_APICBASE_BSP; } - init_vmcb(svm, init_event); + init_vmcb(svm); kvm_cpuid(vcpu, &eax, &dummy, &dummy, &dummy); kvm_register_write(vcpu, VCPU_REGS_RDX, eax); @@ -1328,8 +1146,6 @@ static struct kvm_vcpu *svm_create_vcpu(struct kvm *kvm, unsigned int id) goto out; } - svm->tsc_ratio = TSC_RATIO_DEFAULT; - err = kvm_vcpu_init(&svm->vcpu, kvm, id); if (err) goto free_svm; @@ -1363,7 +1179,7 @@ static struct kvm_vcpu *svm_create_vcpu(struct kvm *kvm, unsigned int id) clear_page(svm->vmcb); svm->vmcb_pa = page_to_pfn(page) << PAGE_SHIFT; svm->asid_generation = 0; - init_vmcb(svm, false); + init_vmcb(svm); svm_init_osvw(&svm->vcpu); @@ -1415,10 +1231,12 @@ static void svm_vcpu_load(struct kvm_vcpu *vcpu, int cpu) for (i = 0; i < NR_HOST_SAVE_USER_MSRS; i++) rdmsrl(host_save_user_msrs[i], svm->host_user_msrs[i]); - if (static_cpu_has(X86_FEATURE_TSCRATEMSR) && - svm->tsc_ratio != __this_cpu_read(current_tsc_ratio)) { - __this_cpu_write(current_tsc_ratio, svm->tsc_ratio); - wrmsrl(MSR_AMD64_TSC_RATIO, svm->tsc_ratio); + if (static_cpu_has(X86_FEATURE_TSCRATEMSR)) { + u64 tsc_ratio = vcpu->arch.tsc_scaling_ratio; + if (tsc_ratio != __this_cpu_read(current_tsc_ratio)) { + __this_cpu_write(current_tsc_ratio, tsc_ratio); + wrmsrl(MSR_AMD64_TSC_RATIO, tsc_ratio); + } } } @@ -1671,10 +1489,13 @@ static void svm_set_cr0(struct kvm_vcpu *vcpu, unsigned long cr0) if (!vcpu->fpu_active) cr0 |= X86_CR0_TS; - - /* These are emulated via page tables. */ - cr0 &= ~(X86_CR0_CD | X86_CR0_NW); - + /* + * re-enable caching here because the QEMU bios + * does not do it - this results in some delay at + * reboot + */ + if (kvm_check_has_quirk(vcpu->kvm, KVM_X86_QUIRK_CD_NW_CLEARED)) + cr0 &= ~(X86_CR0_CD | X86_CR0_NW); svm->vmcb->save.cr0 = cr0; mark_dirty(svm->vmcb, VMCB_CR); update_cr0_intercept(svm); @@ -1734,20 +1555,13 @@ static void svm_set_segment(struct kvm_vcpu *vcpu, mark_dirty(svm->vmcb, VMCB_SEG); } -static void update_db_bp_intercept(struct kvm_vcpu *vcpu) +static void update_bp_intercept(struct kvm_vcpu *vcpu) { struct vcpu_svm *svm = to_svm(vcpu); - clr_exception_intercept(svm, DB_VECTOR); clr_exception_intercept(svm, BP_VECTOR); - if (svm->nmi_singlestep) - set_exception_intercept(svm, DB_VECTOR); - if (vcpu->guest_debug & KVM_GUESTDBG_ENABLE) { - if (vcpu->guest_debug & - (KVM_GUESTDBG_SINGLESTEP | KVM_GUESTDBG_USE_HW_BP)) - set_exception_intercept(svm, DB_VECTOR); if (vcpu->guest_debug & KVM_GUESTDBG_USE_SW_BP) set_exception_intercept(svm, BP_VECTOR); } else @@ -1853,7 +1667,6 @@ static int db_interception(struct vcpu_svm *svm) if (!(svm->vcpu.guest_debug & KVM_GUESTDBG_SINGLESTEP)) svm->vmcb->save.rflags &= ~(X86_EFLAGS_TF | X86_EFLAGS_RF); - update_db_bp_intercept(&svm->vcpu); } if (svm->vcpu.guest_debug & @@ -1888,6 +1701,12 @@ static int ud_interception(struct vcpu_svm *svm) return 1; } +static int ac_interception(struct vcpu_svm *svm) +{ + kvm_queue_exception_e(&svm->vcpu, AC_VECTOR, 0); + return 1; +} + static void svm_fpu_activate(struct kvm_vcpu *vcpu) { struct vcpu_svm *svm = to_svm(vcpu); @@ -1982,7 +1801,7 @@ static int shutdown_interception(struct vcpu_svm *svm) * so reinitialize it. */ clear_page(svm->vmcb); - init_vmcb(svm, false); + init_vmcb(svm); kvm_run->exit_reason = KVM_EXIT_SHUTDOWN; return 0; @@ -2457,7 +2276,9 @@ static int nested_svm_vmexit(struct vcpu_svm *svm) nested_vmcb->control.exit_info_2 = vmcb->control.exit_info_2; nested_vmcb->control.exit_int_info = vmcb->control.exit_int_info; nested_vmcb->control.exit_int_info_err = vmcb->control.exit_int_info_err; - nested_vmcb->control.next_rip = vmcb->control.next_rip; + + if (svm->nrips_enabled) + nested_vmcb->control.next_rip = vmcb->control.next_rip; /* * If we emulate a VMRUN/#VMEXIT in the same host #vmexit cycle we have @@ -3152,7 +2973,7 @@ static int cr8_write_interception(struct vcpu_svm *svm) u8 cr8_prev = kvm_get_cr8(&svm->vcpu); /* instruction emulation calls kvm_set_cr8() */ r = cr_interception(svm); - if (irqchip_in_kernel(svm->vcpu.kvm)) + if (lapic_in_kernel(&svm->vcpu)) return r; if (cr8_prev <= kvm_get_cr8(&svm->vcpu)) return r; @@ -3163,8 +2984,7 @@ static int cr8_write_interception(struct vcpu_svm *svm) static u64 svm_read_l1_tsc(struct kvm_vcpu *vcpu, u64 host_tsc) { struct vmcb *vmcb = get_host_vmcb(to_svm(vcpu)); - return vmcb->control.tsc_offset + - svm_scale_tsc(vcpu, host_tsc); + return vmcb->control.tsc_offset + host_tsc; } static int svm_get_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info) @@ -3174,7 +2994,7 @@ static int svm_get_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info) switch (msr_info->index) { case MSR_IA32_TSC: { msr_info->data = svm->vmcb->control.tsc_offset + - svm_scale_tsc(vcpu, rdtsc()); + kvm_scale_tsc(vcpu, rdtsc()); break; } @@ -3349,16 +3169,6 @@ static int svm_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr) case MSR_VM_IGNNE: vcpu_unimpl(vcpu, "unimplemented wrmsr: 0x%x data 0x%llx\n", ecx, data); break; - case MSR_IA32_CR_PAT: - if (npt_enabled) { - if (!kvm_mtrr_valid(vcpu, MSR_IA32_CR_PAT, data)) - return 1; - vcpu->arch.pat = data; - svm_set_guest_pat(svm, &svm->vmcb->save.g_pat); - mark_dirty(svm->vmcb, VMCB_NPT); - break; - } - /* fall through */ default: return kvm_set_msr_common(vcpu, msr); } @@ -3396,24 +3206,11 @@ static int msr_interception(struct vcpu_svm *svm) static int interrupt_window_interception(struct vcpu_svm *svm) { - struct kvm_run *kvm_run = svm->vcpu.run; - kvm_make_request(KVM_REQ_EVENT, &svm->vcpu); svm_clear_vintr(svm); svm->vmcb->control.int_ctl &= ~V_IRQ_MASK; mark_dirty(svm->vmcb, VMCB_INTR); ++svm->vcpu.stat.irq_window_exits; - /* - * If the user space waits to inject interrupts, exit as soon as - * possible - */ - if (!irqchip_in_kernel(svm->vcpu.kvm) && - kvm_run->request_interrupt_window && - !kvm_cpu_has_interrupt(&svm->vcpu)) { - kvm_run->exit_reason = KVM_EXIT_IRQ_WINDOW_OPEN; - return 0; - } - return 1; } @@ -3473,6 +3270,7 @@ static int (*const svm_exit_handlers[])(struct vcpu_svm *svm) = { [SVM_EXIT_EXCP_BASE + PF_VECTOR] = pf_interception, [SVM_EXIT_EXCP_BASE + NM_VECTOR] = nm_interception, [SVM_EXIT_EXCP_BASE + MC_VECTOR] = mc_interception, + [SVM_EXIT_EXCP_BASE + AC_VECTOR] = ac_interception, [SVM_EXIT_INTR] = intr_interception, [SVM_EXIT_NMI] = nmi_interception, [SVM_EXIT_SMI] = nop_on_interception, @@ -3761,12 +3559,12 @@ static void svm_set_virtual_x2apic_mode(struct kvm_vcpu *vcpu, bool set) return; } -static int svm_vm_has_apicv(struct kvm *kvm) +static int svm_cpu_uses_apicv(struct kvm_vcpu *vcpu) { return 0; } -static void svm_load_eoi_exitmap(struct kvm_vcpu *vcpu, u64 *eoi_exit_bitmap) +static void svm_load_eoi_exitmap(struct kvm_vcpu *vcpu) { return; } @@ -3856,7 +3654,6 @@ static void enable_nmi_window(struct kvm_vcpu *vcpu) */ svm->nmi_singlestep = true; svm->vmcb->save.rflags |= (X86_EFLAGS_TF | X86_EFLAGS_RF); - update_db_bp_intercept(vcpu); } static int svm_set_tss_addr(struct kvm *kvm, unsigned int addr) @@ -4193,8 +3990,17 @@ static bool svm_has_high_real_mode_segbase(void) return true; } +static u64 svm_get_mt_mask(struct kvm_vcpu *vcpu, gfn_t gfn, bool is_mmio) +{ + return 0; +} + static void svm_cpuid_update(struct kvm_vcpu *vcpu) { + struct vcpu_svm *svm = to_svm(vcpu); + + /* Update nrips enabled cache */ + svm->nrips_enabled = !!guest_cpuid_has_nrips(&svm->vcpu); } static void svm_set_supported_cpuid(u32 func, struct kvm_cpuid_entry2 *entry) @@ -4473,7 +4279,7 @@ static struct kvm_x86_ops svm_x86_ops = { .vcpu_load = svm_vcpu_load, .vcpu_put = svm_vcpu_put, - .update_db_bp_intercept = update_db_bp_intercept, + .update_bp_intercept = update_bp_intercept, .get_msr = svm_get_msr, .set_msr = svm_set_msr, .get_segment_base = svm_get_segment_base, @@ -4522,7 +4328,7 @@ static struct kvm_x86_ops svm_x86_ops = { .enable_irq_window = enable_irq_window, .update_cr8_intercept = update_cr8_intercept, .set_virtual_x2apic_mode = svm_set_virtual_x2apic_mode, - .vm_has_apicv = svm_vm_has_apicv, + .cpu_uses_apicv = svm_cpu_uses_apicv, .load_eoi_exitmap = svm_load_eoi_exitmap, .sync_pir_to_irr = svm_sync_pir_to_irr, @@ -4545,11 +4351,9 @@ static struct kvm_x86_ops svm_x86_ops = { .has_wbinvd_exit = svm_has_wbinvd_exit, - .set_tsc_khz = svm_set_tsc_khz, .read_tsc_offset = svm_read_tsc_offset, .write_tsc_offset = svm_write_tsc_offset, - .adjust_tsc_offset = svm_adjust_tsc_offset, - .compute_tsc_offset = svm_compute_tsc_offset, + .adjust_tsc_offset_guest = svm_adjust_tsc_offset_guest, .read_l1_tsc = svm_read_l1_tsc, .set_tdp_cr3 = set_tdp_cr3, |