aboutsummaryrefslogtreecommitdiffstats
path: root/arch/x86/kvm/svm/svm.c
diff options
context:
space:
mode:
Diffstat (limited to 'arch/x86/kvm/svm/svm.c')
-rw-r--r--arch/x86/kvm/svm/svm.c174
1 files changed, 102 insertions, 72 deletions
diff --git a/arch/x86/kvm/svm/svm.c b/arch/x86/kvm/svm/svm.c
index b649f92287a2..8834822c00cd 100644
--- a/arch/x86/kvm/svm/svm.c
+++ b/arch/x86/kvm/svm/svm.c
@@ -43,6 +43,9 @@
#include "svm.h"
#include "svm_ops.h"
+#include "kvm_onhyperv.h"
+#include "svm_onhyperv.h"
+
#define __ex(x) __kvm_handle_fault_on_reboot(x)
MODULE_AUTHOR("Qumranet");
@@ -185,6 +188,13 @@ module_param(vls, int, 0444);
static int vgif = true;
module_param(vgif, int, 0444);
+/*
+ * enable / disable AVIC. Because the defaults differ for APICv
+ * support between VMX and SVM we cannot use module_param_named.
+ */
+static bool avic;
+module_param(avic, bool, 0444);
+
bool __read_mostly dump_invalid_vmcb;
module_param(dump_invalid_vmcb, bool, 0644);
@@ -212,7 +222,7 @@ DEFINE_PER_CPU(struct svm_cpu_data *, svm_data);
* RDTSCP and RDPID are not used in the kernel, specifically to allow KVM to
* defer the restoration of TSC_AUX until the CPU returns to userspace.
*/
-#define TSC_AUX_URET_SLOT 0
+static int tsc_aux_uret_slot __read_mostly = -1;
static const u32 msrpm_ranges[] = {0, 0xc0000000, 0xc0010000};
@@ -447,6 +457,11 @@ static int has_svm(void)
return 0;
}
+ if (pgtable_l5_enabled()) {
+ pr_info("KVM doesn't yet support 5-level paging on AMD SVM\n");
+ return 0;
+ }
+
return 1;
}
@@ -668,6 +683,9 @@ static void set_msr_interception_bitmap(struct kvm_vcpu *vcpu, u32 *msrpm,
write ? clear_bit(bit_write, &tmp) : set_bit(bit_write, &tmp);
msrpm[offset] = tmp;
+
+ svm_hv_vmcb_dirty_nested_enlightenments(vcpu);
+
}
void set_msr_interception(struct kvm_vcpu *vcpu, u32 *msrpm, u32 msr,
@@ -858,8 +876,8 @@ static __init void svm_adjust_mmio_mask(void)
return;
/* If memory encryption is not enabled, use existing mask */
- rdmsrl(MSR_K8_SYSCFG, msr);
- if (!(msr & MSR_K8_SYSCFG_MEM_ENCRYPT))
+ rdmsrl(MSR_AMD64_SYSCFG, msr);
+ if (!(msr & MSR_AMD64_SYSCFG_MEM_ENCRYPT))
return;
enc_bit = cpuid_ebx(0x8000001f) & 0x3f;
@@ -934,6 +952,16 @@ static __init int svm_hardware_setup(void)
int r;
unsigned int order = get_order(IOPM_SIZE);
+ /*
+ * NX is required for shadow paging and for NPT if the NX huge pages
+ * mitigation is enabled.
+ */
+ if (!boot_cpu_has(X86_FEATURE_NX)) {
+ pr_err_ratelimited("NX (Execute Disable) not supported\n");
+ return -EOPNOTSUPP;
+ }
+ kvm_enable_efer_bits(EFER_NX);
+
iopm_pages = alloc_pages(GFP_KERNEL, order);
if (!iopm_pages)
@@ -947,9 +975,6 @@ static __init int svm_hardware_setup(void)
supported_xcr0 &= ~(XFEATURE_MASK_BNDREGS | XFEATURE_MASK_BNDCSR);
- if (boot_cpu_has(X86_FEATURE_NX))
- kvm_enable_efer_bits(EFER_NX);
-
if (boot_cpu_has(X86_FEATURE_FXSR_OPT))
kvm_enable_efer_bits(EFER_FFXSR);
@@ -959,8 +984,7 @@ static __init int svm_hardware_setup(void)
kvm_tsc_scaling_ratio_frac_bits = 32;
}
- if (boot_cpu_has(X86_FEATURE_RDTSCP))
- kvm_define_user_return_msr(TSC_AUX_URET_SLOT, MSR_TSC_AUX);
+ tsc_aux_uret_slot = kvm_add_user_return_msr(MSR_TSC_AUX);
/* Check for pause filtering support */
if (!boot_cpu_has(X86_FEATURE_PAUSEFILTER)) {
@@ -992,6 +1016,8 @@ static __init int svm_hardware_setup(void)
/* Note, SEV setup consumes npt_enabled. */
sev_hardware_setup();
+ svm_hv_hardware_setup();
+
svm_adjust_mmio_mask();
for_each_possible_cpu(cpu) {
@@ -1005,16 +1031,12 @@ static __init int svm_hardware_setup(void)
nrips = false;
}
- if (avic) {
- if (!npt_enabled ||
- !boot_cpu_has(X86_FEATURE_AVIC) ||
- !IS_ENABLED(CONFIG_X86_LOCAL_APIC)) {
- avic = false;
- } else {
- pr_info("AVIC enabled\n");
+ enable_apicv = avic = avic && npt_enabled && boot_cpu_has(X86_FEATURE_AVIC);
- amd_iommu_register_ga_log_notifier(&avic_ga_log_notifier);
- }
+ if (enable_apicv) {
+ pr_info("AVIC enabled\n");
+
+ amd_iommu_register_ga_log_notifier(&avic_ga_log_notifier);
}
if (vls) {
@@ -1078,29 +1100,35 @@ static void init_sys_seg(struct vmcb_seg *seg, uint32_t type)
seg->base = 0;
}
-static u64 svm_write_l1_tsc_offset(struct kvm_vcpu *vcpu, u64 offset)
+static u64 svm_get_l2_tsc_offset(struct kvm_vcpu *vcpu)
{
struct vcpu_svm *svm = to_svm(vcpu);
- u64 g_tsc_offset = 0;
- if (is_guest_mode(vcpu)) {
- /* Write L1's TSC offset. */
- g_tsc_offset = svm->vmcb->control.tsc_offset -
- svm->vmcb01.ptr->control.tsc_offset;
- svm->vmcb01.ptr->control.tsc_offset = offset;
- }
+ return svm->nested.ctl.tsc_offset;
+}
- trace_kvm_write_tsc_offset(vcpu->vcpu_id,
- svm->vmcb->control.tsc_offset - g_tsc_offset,
- offset);
+static u64 svm_get_l2_tsc_multiplier(struct kvm_vcpu *vcpu)
+{
+ return kvm_default_tsc_scaling_ratio;
+}
- svm->vmcb->control.tsc_offset = offset + g_tsc_offset;
+static void svm_write_tsc_offset(struct kvm_vcpu *vcpu, u64 offset)
+{
+ struct vcpu_svm *svm = to_svm(vcpu);
+ svm->vmcb01.ptr->control.tsc_offset = vcpu->arch.l1_tsc_offset;
+ svm->vmcb->control.tsc_offset = offset;
vmcb_mark_dirty(svm->vmcb, VMCB_INTERCEPTS);
- return svm->vmcb->control.tsc_offset;
}
-static void svm_check_invpcid(struct vcpu_svm *svm)
+static void svm_write_tsc_multiplier(struct kvm_vcpu *vcpu, u64 multiplier)
+{
+ wrmsrl(MSR_AMD64_TSC_RATIO, multiplier);
+}
+
+/* Evaluate instruction intercepts that depend on guest CPUID features. */
+static void svm_recalc_instruction_intercepts(struct kvm_vcpu *vcpu,
+ struct vcpu_svm *svm)
{
/*
* Intercept INVPCID if shadow paging is enabled to sync/free shadow
@@ -1113,6 +1141,13 @@ static void svm_check_invpcid(struct vcpu_svm *svm)
else
svm_clr_intercept(svm, INTERCEPT_INVPCID);
}
+
+ if (kvm_cpu_cap_has(X86_FEATURE_RDTSCP)) {
+ if (guest_cpuid_has(vcpu, X86_FEATURE_RDTSCP))
+ svm_clr_intercept(svm, INTERCEPT_RDTSCP);
+ else
+ svm_set_intercept(svm, INTERCEPT_RDTSCP);
+ }
}
static void init_vmcb(struct kvm_vcpu *vcpu)
@@ -1235,8 +1270,8 @@ static void init_vmcb(struct kvm_vcpu *vcpu)
svm->current_vmcb->asid_generation = 0;
svm->asid = 0;
- svm->nested.vmcb12_gpa = 0;
- svm->nested.last_vmcb12_gpa = 0;
+ svm->nested.vmcb12_gpa = INVALID_GPA;
+ svm->nested.last_vmcb12_gpa = INVALID_GPA;
vcpu->arch.hflags = 0;
if (!kvm_pause_in_guest(vcpu->kvm)) {
@@ -1248,7 +1283,7 @@ static void init_vmcb(struct kvm_vcpu *vcpu)
svm_clr_intercept(svm, INTERCEPT_PAUSE);
}
- svm_check_invpcid(svm);
+ svm_recalc_instruction_intercepts(vcpu, svm);
/*
* If the host supports V_SPEC_CTRL then disable the interception
@@ -1276,6 +1311,8 @@ static void init_vmcb(struct kvm_vcpu *vcpu)
}
}
+ svm_hv_init_vmcb(svm->vmcb);
+
vmcb_mark_all_dirty(svm->vmcb);
enable_gif(svm);
@@ -1424,6 +1461,9 @@ static void svm_prepare_guest_switch(struct kvm_vcpu *vcpu)
struct vcpu_svm *svm = to_svm(vcpu);
struct svm_cpu_data *sd = per_cpu(svm_data, vcpu->cpu);
+ if (sev_es_guest(vcpu->kvm))
+ sev_es_unmap_ghcb(svm);
+
if (svm->guest_state_loaded)
return;
@@ -1445,8 +1485,8 @@ static void svm_prepare_guest_switch(struct kvm_vcpu *vcpu)
}
}
- if (static_cpu_has(X86_FEATURE_RDTSCP))
- kvm_set_user_return_msr(TSC_AUX_URET_SLOT, svm->tsc_aux, -1ull);
+ if (likely(tsc_aux_uret_slot >= 0))
+ kvm_set_user_return_msr(tsc_aux_uret_slot, svm->tsc_aux, -1ull);
svm->guest_state_loaded = true;
}
@@ -2655,11 +2695,6 @@ static int svm_get_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
msr_info->data |= (u64)svm->sysenter_esp_hi << 32;
break;
case MSR_TSC_AUX:
- if (!boot_cpu_has(X86_FEATURE_RDTSCP))
- return 1;
- if (!msr_info->host_initiated &&
- !guest_cpuid_has(vcpu, X86_FEATURE_RDTSCP))
- return 1;
msr_info->data = svm->tsc_aux;
break;
/*
@@ -2876,30 +2911,13 @@ static int svm_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr)
svm->sysenter_esp_hi = guest_cpuid_is_intel(vcpu) ? (data >> 32) : 0;
break;
case MSR_TSC_AUX:
- if (!boot_cpu_has(X86_FEATURE_RDTSCP))
- return 1;
-
- if (!msr->host_initiated &&
- !guest_cpuid_has(vcpu, X86_FEATURE_RDTSCP))
- return 1;
-
- /*
- * Per Intel's SDM, bits 63:32 are reserved, but AMD's APM has
- * incomplete and conflicting architectural behavior. Current
- * AMD CPUs completely ignore bits 63:32, i.e. they aren't
- * reserved and always read as zeros. Emulate AMD CPU behavior
- * to avoid explosions if the vCPU is migrated from an AMD host
- * to an Intel host.
- */
- data = (u32)data;
-
/*
* TSC_AUX is usually changed only during boot and never read
* directly. Intercept TSC_AUX instead of exposing it to the
* guest via direct_access_msrs, and switch it via user return.
*/
preempt_disable();
- r = kvm_set_user_return_msr(TSC_AUX_URET_SLOT, data, -1ull);
+ r = kvm_set_user_return_msr(tsc_aux_uret_slot, data, -1ull);
preempt_enable();
if (r)
return 1;
@@ -3084,6 +3102,7 @@ static int (*const svm_exit_handlers[])(struct kvm_vcpu *vcpu) = {
[SVM_EXIT_STGI] = stgi_interception,
[SVM_EXIT_CLGI] = clgi_interception,
[SVM_EXIT_SKINIT] = skinit_interception,
+ [SVM_EXIT_RDTSCP] = kvm_handle_invalid_op,
[SVM_EXIT_WBINVD] = kvm_emulate_wbinvd,
[SVM_EXIT_MONITOR] = kvm_emulate_monitor,
[SVM_EXIT_MWAIT] = kvm_emulate_mwait,
@@ -3113,6 +3132,8 @@ static void dump_vmcb(struct kvm_vcpu *vcpu)
return;
}
+ pr_err("VMCB %p, last attempted VMRUN on CPU %d\n",
+ svm->current_vmcb->ptr, vcpu->arch.last_vmentry_cpu);
pr_err("VMCB Control Area:\n");
pr_err("%-20s%04x\n", "cr_read:", control->intercepts[INTERCEPT_CR] & 0xffff);
pr_err("%-20s%04x\n", "cr_write:", control->intercepts[INTERCEPT_CR] >> 16);
@@ -3769,6 +3790,8 @@ static __no_kcsan fastpath_t svm_vcpu_run(struct kvm_vcpu *vcpu)
}
svm->vmcb->save.cr2 = vcpu->arch.cr2;
+ svm_hv_update_vp_id(svm->vmcb, vcpu);
+
/*
* Run with all-zero DR6 unless needed, so that we can get the exact cause
* of a #DB.
@@ -3842,6 +3865,12 @@ static __no_kcsan fastpath_t svm_vcpu_run(struct kvm_vcpu *vcpu)
svm->next_rip = 0;
if (is_guest_mode(vcpu)) {
nested_sync_control_from_vmcb02(svm);
+
+ /* Track VMRUNs that have made past consistency checking */
+ if (svm->nested.nested_run_pending &&
+ svm->vmcb->control.exit_code != SVM_EXIT_ERR)
+ ++vcpu->stat.nested_run;
+
svm->nested.nested_run_pending = 0;
}
@@ -3853,10 +3882,8 @@ static __no_kcsan fastpath_t svm_vcpu_run(struct kvm_vcpu *vcpu)
vcpu->arch.apf.host_apf_flags =
kvm_read_and_reset_apf_flags();
- if (npt_enabled) {
- vcpu->arch.regs_avail &= ~(1 << VCPU_EXREG_PDPTR);
- vcpu->arch.regs_dirty &= ~(1 << VCPU_EXREG_PDPTR);
- }
+ if (npt_enabled)
+ kvm_register_clear_available(vcpu, VCPU_EXREG_PDPTR);
/*
* We need to handle MC intercepts here before the vcpu has a chance to
@@ -3884,6 +3911,8 @@ static void svm_load_mmu_pgd(struct kvm_vcpu *vcpu, hpa_t root_hpa,
svm->vmcb->control.nested_cr3 = __sme_set(root_hpa);
vmcb_mark_dirty(svm->vmcb, VMCB_NPT);
+ hv_track_root_tdp(vcpu, root_hpa);
+
/* Loading L2's CR3 is handled by enter_svm_guest_mode. */
if (!test_bit(VCPU_EXREG_CR3, (ulong *)&vcpu->arch.regs_avail))
return;
@@ -3972,8 +4001,7 @@ static void svm_vcpu_after_set_cpuid(struct kvm_vcpu *vcpu)
svm->nrips_enabled = kvm_cpu_cap_has(X86_FEATURE_NRIPS) &&
guest_cpuid_has(vcpu, X86_FEATURE_NRIPS);
- /* Check again if INVPCID interception if required */
- svm_check_invpcid(svm);
+ svm_recalc_instruction_intercepts(vcpu, svm);
/* For sev guests, the memory encryption bit is not reserved in CR3. */
if (sev_guest(vcpu->kvm)) {
@@ -4257,7 +4285,7 @@ static int svm_smi_allowed(struct kvm_vcpu *vcpu, bool for_injection)
return !svm_smi_blocked(vcpu);
}
-static int svm_pre_enter_smm(struct kvm_vcpu *vcpu, char *smstate)
+static int svm_enter_smm(struct kvm_vcpu *vcpu, char *smstate)
{
struct vcpu_svm *svm = to_svm(vcpu);
int ret;
@@ -4279,7 +4307,7 @@ static int svm_pre_enter_smm(struct kvm_vcpu *vcpu, char *smstate)
return 0;
}
-static int svm_pre_leave_smm(struct kvm_vcpu *vcpu, const char *smstate)
+static int svm_leave_smm(struct kvm_vcpu *vcpu, const char *smstate)
{
struct vcpu_svm *svm = to_svm(vcpu);
struct kvm_host_map map;
@@ -4435,13 +4463,12 @@ static int svm_vm_init(struct kvm *kvm)
if (!pause_filter_count || !pause_filter_thresh)
kvm->arch.pause_in_guest = true;
- if (avic) {
+ if (enable_apicv) {
int ret = avic_vm_init(kvm);
if (ret)
return ret;
}
- kvm_apicv_init(kvm, avic);
return 0;
}
@@ -4532,7 +4559,10 @@ static struct kvm_x86_ops svm_x86_ops __initdata = {
.has_wbinvd_exit = svm_has_wbinvd_exit,
- .write_l1_tsc_offset = svm_write_l1_tsc_offset,
+ .get_l2_tsc_offset = svm_get_l2_tsc_offset,
+ .get_l2_tsc_multiplier = svm_get_l2_tsc_multiplier,
+ .write_tsc_offset = svm_write_tsc_offset,
+ .write_tsc_multiplier = svm_write_tsc_multiplier,
.load_mmu_pgd = svm_load_mmu_pgd,
@@ -4552,8 +4582,8 @@ static struct kvm_x86_ops svm_x86_ops __initdata = {
.setup_mce = svm_setup_mce,
.smi_allowed = svm_smi_allowed,
- .pre_enter_smm = svm_pre_enter_smm,
- .pre_leave_smm = svm_pre_leave_smm,
+ .enter_smm = svm_enter_smm,
+ .leave_smm = svm_leave_smm,
.enable_smi_window = svm_enable_smi_window,
.mem_enc_op = svm_mem_enc_op,