diff options
author | 2020-12-28 14:20:00 +0000 | |
---|---|---|
committer | 2020-12-28 14:20:00 +0000 | |
commit | 2ae6f64ce1ce304b502461fdfe0b96c8171ae2cc (patch) | |
tree | 88e987c447daf2c29e2d4c15e58d1029b0cc78c2 /arch/x86/kvm/svm | |
parent | regulator: bd718x7: Add enable times (diff) | |
parent | Linux 5.11-rc1 (diff) | |
download | linux-dev-2ae6f64ce1ce304b502461fdfe0b96c8171ae2cc.tar.xz linux-dev-2ae6f64ce1ce304b502461fdfe0b96c8171ae2cc.zip |
Merge tag 'v5.11-rc1' into regulator-5.11
Linux 5.11-rc1
Diffstat (limited to 'arch/x86/kvm/svm')
-rw-r--r-- | arch/x86/kvm/svm/avic.c | 9 | ||||
-rw-r--r-- | arch/x86/kvm/svm/nested.c | 11 | ||||
-rw-r--r-- | arch/x86/kvm/svm/sev.c | 917 | ||||
-rw-r--r-- | arch/x86/kvm/svm/svm.c | 485 | ||||
-rw-r--r-- | arch/x86/kvm/svm/svm.h | 167 | ||||
-rw-r--r-- | arch/x86/kvm/svm/vmenter.S | 50 |
6 files changed, 1430 insertions, 209 deletions
diff --git a/arch/x86/kvm/svm/avic.c b/arch/x86/kvm/svm/avic.c index 8c550999ace0..0ef84d57b72e 100644 --- a/arch/x86/kvm/svm/avic.c +++ b/arch/x86/kvm/svm/avic.c @@ -233,7 +233,8 @@ static u64 *avic_get_physical_id_entry(struct kvm_vcpu *vcpu, */ static int avic_update_access_page(struct kvm *kvm, bool activate) { - int ret = 0; + void __user *ret; + int r = 0; mutex_lock(&kvm->slots_lock); /* @@ -249,13 +250,15 @@ static int avic_update_access_page(struct kvm *kvm, bool activate) APIC_ACCESS_PAGE_PRIVATE_MEMSLOT, APIC_DEFAULT_PHYS_BASE, activate ? PAGE_SIZE : 0); - if (ret) + if (IS_ERR(ret)) { + r = PTR_ERR(ret); goto out; + } kvm->arch.apic_access_page_done = activate; out: mutex_unlock(&kvm->slots_lock); - return ret; + return r; } static int avic_init_backing_page(struct kvm_vcpu *vcpu) diff --git a/arch/x86/kvm/svm/nested.c b/arch/x86/kvm/svm/nested.c index 9e4c226dbf7d..b0b667456b2e 100644 --- a/arch/x86/kvm/svm/nested.c +++ b/arch/x86/kvm/svm/nested.c @@ -254,7 +254,7 @@ static bool nested_vmcb_checks(struct vcpu_svm *svm, struct vmcb *vmcb12) (vmcb12->save.cr3 & MSR_CR3_LONG_MBZ_MASK)) return false; } - if (kvm_valid_cr4(&svm->vcpu, vmcb12->save.cr4)) + if (!kvm_is_valid_cr4(&svm->vcpu, vmcb12->save.cr4)) return false; return nested_vmcb_check_controls(&vmcb12->control); @@ -381,7 +381,7 @@ static void nested_prepare_vmcb_save(struct vcpu_svm *svm, struct vmcb *vmcb12) svm->vmcb->save.ds = vmcb12->save.ds; svm->vmcb->save.gdtr = vmcb12->save.gdtr; svm->vmcb->save.idtr = vmcb12->save.idtr; - kvm_set_rflags(&svm->vcpu, vmcb12->save.rflags); + kvm_set_rflags(&svm->vcpu, vmcb12->save.rflags | X86_EFLAGS_FIXED); svm_set_efer(&svm->vcpu, vmcb12->save.efer); svm_set_cr0(&svm->vcpu, vmcb12->save.cr0); svm_set_cr4(&svm->vcpu, vmcb12->save.cr4); @@ -394,8 +394,8 @@ static void nested_prepare_vmcb_save(struct vcpu_svm *svm, struct vmcb *vmcb12) svm->vmcb->save.rax = vmcb12->save.rax; svm->vmcb->save.rsp = vmcb12->save.rsp; svm->vmcb->save.rip = vmcb12->save.rip; - svm->vmcb->save.dr7 = vmcb12->save.dr7; - svm->vcpu.arch.dr6 = vmcb12->save.dr6; + svm->vmcb->save.dr7 = vmcb12->save.dr7 | DR7_FIXED_1; + svm->vcpu.arch.dr6 = vmcb12->save.dr6 | DR6_FIXED_1 | DR6_RTM; svm->vmcb->save.cpl = vmcb12->save.cpl; } @@ -660,13 +660,14 @@ int nested_svm_vmexit(struct vcpu_svm *svm) svm->vmcb->save.gdtr = hsave->save.gdtr; svm->vmcb->save.idtr = hsave->save.idtr; kvm_set_rflags(&svm->vcpu, hsave->save.rflags); + kvm_set_rflags(&svm->vcpu, hsave->save.rflags | X86_EFLAGS_FIXED); svm_set_efer(&svm->vcpu, hsave->save.efer); svm_set_cr0(&svm->vcpu, hsave->save.cr0 | X86_CR0_PE); svm_set_cr4(&svm->vcpu, hsave->save.cr4); kvm_rax_write(&svm->vcpu, hsave->save.rax); kvm_rsp_write(&svm->vcpu, hsave->save.rsp); kvm_rip_write(&svm->vcpu, hsave->save.rip); - svm->vmcb->save.dr7 = 0; + svm->vmcb->save.dr7 = DR7_FIXED_1; svm->vmcb->save.cpl = 0; svm->vmcb->control.exit_int_info = 0; diff --git a/arch/x86/kvm/svm/sev.c b/arch/x86/kvm/svm/sev.c index c0b14106258a..9858d5ae9ddd 100644 --- a/arch/x86/kvm/svm/sev.c +++ b/arch/x86/kvm/svm/sev.c @@ -14,10 +14,20 @@ #include <linux/psp-sev.h> #include <linux/pagemap.h> #include <linux/swap.h> +#include <linux/processor.h> +#include <linux/trace_events.h> +#include <asm/fpu/internal.h> + +#include <asm/trapnr.h> #include "x86.h" #include "svm.h" +#include "cpuid.h" +#include "trace.h" + +#define __ex(x) __kvm_handle_fault_on_reboot(x) +static u8 sev_enc_bit; static int sev_flush_asids(void); static DECLARE_RWSEM(sev_deactivate_lock); static DEFINE_MUTEX(sev_bitmap_lock); @@ -25,7 +35,6 @@ unsigned int max_sev_asid; static unsigned int min_sev_asid; static unsigned long *sev_asid_bitmap; static unsigned long *sev_reclaim_asid_bitmap; -#define __sme_page_pa(x) __sme_set(page_to_pfn(x) << PAGE_SHIFT) struct enc_region { struct list_head list; @@ -57,19 +66,19 @@ static int sev_flush_asids(void) } /* Must be called with the sev_bitmap_lock held */ -static bool __sev_recycle_asids(void) +static bool __sev_recycle_asids(int min_asid, int max_asid) { int pos; /* Check if there are any ASIDs to reclaim before performing a flush */ - pos = find_next_bit(sev_reclaim_asid_bitmap, - max_sev_asid, min_sev_asid - 1); - if (pos >= max_sev_asid) + pos = find_next_bit(sev_reclaim_asid_bitmap, max_sev_asid, min_asid); + if (pos >= max_asid) return false; if (sev_flush_asids()) return false; + /* The flush process will flush all reclaimable SEV and SEV-ES ASIDs */ bitmap_xor(sev_asid_bitmap, sev_asid_bitmap, sev_reclaim_asid_bitmap, max_sev_asid); bitmap_zero(sev_reclaim_asid_bitmap, max_sev_asid); @@ -77,20 +86,23 @@ static bool __sev_recycle_asids(void) return true; } -static int sev_asid_new(void) +static int sev_asid_new(struct kvm_sev_info *sev) { + int pos, min_asid, max_asid; bool retry = true; - int pos; mutex_lock(&sev_bitmap_lock); /* - * SEV-enabled guest must use asid from min_sev_asid to max_sev_asid. + * SEV-enabled guests must use asid from min_sev_asid to max_sev_asid. + * SEV-ES-enabled guest can use from 1 to min_sev_asid - 1. */ + min_asid = sev->es_active ? 0 : min_sev_asid - 1; + max_asid = sev->es_active ? min_sev_asid - 1 : max_sev_asid; again: - pos = find_next_zero_bit(sev_asid_bitmap, max_sev_asid, min_sev_asid - 1); - if (pos >= max_sev_asid) { - if (retry && __sev_recycle_asids()) { + pos = find_next_zero_bit(sev_asid_bitmap, max_sev_asid, min_asid); + if (pos >= max_asid) { + if (retry && __sev_recycle_asids(min_asid, max_asid)) { retry = false; goto again; } @@ -172,7 +184,7 @@ static int sev_guest_init(struct kvm *kvm, struct kvm_sev_cmd *argp) if (unlikely(sev->active)) return ret; - asid = sev_asid_new(); + asid = sev_asid_new(sev); if (asid < 0) return ret; @@ -191,6 +203,16 @@ e_free: return ret; } +static int sev_es_guest_init(struct kvm *kvm, struct kvm_sev_cmd *argp) +{ + if (!sev_es) + return -ENOTTY; + + to_kvm_svm(kvm)->sev_info.es_active = true; + + return sev_guest_init(kvm, argp); +} + static int sev_bind_asid(struct kvm *kvm, unsigned int handle, int *error) { struct sev_data_activate *data; @@ -490,6 +512,96 @@ e_free: return ret; } +static int sev_es_sync_vmsa(struct vcpu_svm *svm) +{ + struct vmcb_save_area *save = &svm->vmcb->save; + + /* Check some debug related fields before encrypting the VMSA */ + if (svm->vcpu.guest_debug || (save->dr7 & ~DR7_FIXED_1)) + return -EINVAL; + + /* Sync registgers */ + save->rax = svm->vcpu.arch.regs[VCPU_REGS_RAX]; + save->rbx = svm->vcpu.arch.regs[VCPU_REGS_RBX]; + save->rcx = svm->vcpu.arch.regs[VCPU_REGS_RCX]; + save->rdx = svm->vcpu.arch.regs[VCPU_REGS_RDX]; + save->rsp = svm->vcpu.arch.regs[VCPU_REGS_RSP]; + save->rbp = svm->vcpu.arch.regs[VCPU_REGS_RBP]; + save->rsi = svm->vcpu.arch.regs[VCPU_REGS_RSI]; + save->rdi = svm->vcpu.arch.regs[VCPU_REGS_RDI]; +#ifdef CONFIG_X86_64 + save->r8 = svm->vcpu.arch.regs[VCPU_REGS_R8]; + save->r9 = svm->vcpu.arch.regs[VCPU_REGS_R9]; + save->r10 = svm->vcpu.arch.regs[VCPU_REGS_R10]; + save->r11 = svm->vcpu.arch.regs[VCPU_REGS_R11]; + save->r12 = svm->vcpu.arch.regs[VCPU_REGS_R12]; + save->r13 = svm->vcpu.arch.regs[VCPU_REGS_R13]; + save->r14 = svm->vcpu.arch.regs[VCPU_REGS_R14]; + save->r15 = svm->vcpu.arch.regs[VCPU_REGS_R15]; +#endif + save->rip = svm->vcpu.arch.regs[VCPU_REGS_RIP]; + + /* Sync some non-GPR registers before encrypting */ + save->xcr0 = svm->vcpu.arch.xcr0; + save->pkru = svm->vcpu.arch.pkru; + save->xss = svm->vcpu.arch.ia32_xss; + + /* + * SEV-ES will use a VMSA that is pointed to by the VMCB, not + * the traditional VMSA that is part of the VMCB. Copy the + * traditional VMSA as it has been built so far (in prep + * for LAUNCH_UPDATE_VMSA) to be the initial SEV-ES state. + */ + memcpy(svm->vmsa, save, sizeof(*save)); + + return 0; +} + +static int sev_launch_update_vmsa(struct kvm *kvm, struct kvm_sev_cmd *argp) +{ + struct kvm_sev_info *sev = &to_kvm_svm(kvm)->sev_info; + struct sev_data_launch_update_vmsa *vmsa; + int i, ret; + + if (!sev_es_guest(kvm)) + return -ENOTTY; + + vmsa = kzalloc(sizeof(*vmsa), GFP_KERNEL); + if (!vmsa) + return -ENOMEM; + + for (i = 0; i < kvm->created_vcpus; i++) { + struct vcpu_svm *svm = to_svm(kvm->vcpus[i]); + + /* Perform some pre-encryption checks against the VMSA */ + ret = sev_es_sync_vmsa(svm); + if (ret) + goto e_free; + + /* + * The LAUNCH_UPDATE_VMSA command will perform in-place + * encryption of the VMSA memory content (i.e it will write + * the same memory region with the guest's key), so invalidate + * it first. + */ + clflush_cache_range(svm->vmsa, PAGE_SIZE); + + vmsa->handle = sev->handle; + vmsa->address = __sme_pa(svm->vmsa); + vmsa->len = PAGE_SIZE; + ret = sev_issue_cmd(kvm, SEV_CMD_LAUNCH_UPDATE_VMSA, vmsa, + &argp->error); + if (ret) + goto e_free; + + svm->vcpu.arch.guest_state_protected = true; + } + +e_free: + kfree(vmsa); + return ret; +} + static int sev_launch_measure(struct kvm *kvm, struct kvm_sev_cmd *argp) { void __user *measure = (void __user *)(uintptr_t)argp->data; @@ -642,8 +754,8 @@ static int __sev_dbg_decrypt(struct kvm *kvm, unsigned long src_paddr, * Its safe to read more than we are asked, caller should ensure that * destination has enough space. */ - src_paddr = round_down(src_paddr, 16); offset = src_paddr & 15; + src_paddr = round_down(src_paddr, 16); sz = round_up(sz + offset, 16); return __sev_issue_dbg_cmd(kvm, src_paddr, dst_paddr, sz, err, false); @@ -932,7 +1044,7 @@ int svm_mem_enc_op(struct kvm *kvm, void __user *argp) struct kvm_sev_cmd sev_cmd; int r; - if (!svm_sev_enabled()) + if (!svm_sev_enabled() || !sev) return -ENOTTY; if (!argp) @@ -947,12 +1059,18 @@ int svm_mem_enc_op(struct kvm *kvm, void __user *argp) case KVM_SEV_INIT: r = sev_guest_init(kvm, &sev_cmd); break; + case KVM_SEV_ES_INIT: + r = sev_es_guest_init(kvm, &sev_cmd); + break; case KVM_SEV_LAUNCH_START: r = sev_launch_start(kvm, &sev_cmd); break; case KVM_SEV_LAUNCH_UPDATE_DATA: r = sev_launch_update_data(kvm, &sev_cmd); break; + case KVM_SEV_LAUNCH_UPDATE_VMSA: + r = sev_launch_update_vmsa(kvm, &sev_cmd); + break; case KVM_SEV_LAUNCH_MEASURE: r = sev_launch_measure(kvm, &sev_cmd); break; @@ -1125,49 +1243,61 @@ void sev_vm_destroy(struct kvm *kvm) sev_asid_free(sev->asid); } -int __init sev_hardware_setup(void) +void __init sev_hardware_setup(void) { - struct sev_user_data_status *status; - int rc; + unsigned int eax, ebx, ecx, edx; + bool sev_es_supported = false; + bool sev_supported = false; + + /* Does the CPU support SEV? */ + if (!boot_cpu_has(X86_FEATURE_SEV)) + goto out; + + /* Retrieve SEV CPUID information */ + cpuid(0x8000001f, &eax, &ebx, &ecx, &edx); + + /* Set encryption bit location for SEV-ES guests */ + sev_enc_bit = ebx & 0x3f; /* Maximum number of encrypted guests supported simultaneously */ - max_sev_asid = cpuid_ecx(0x8000001F); + max_sev_asid = ecx; if (!svm_sev_enabled()) - return 1; + goto out; /* Minimum ASID value that should be used for SEV guest */ - min_sev_asid = cpuid_edx(0x8000001F); + min_sev_asid = edx; /* Initialize SEV ASID bitmaps */ sev_asid_bitmap = bitmap_zalloc(max_sev_asid, GFP_KERNEL); if (!sev_asid_bitmap) - return 1; + goto out; sev_reclaim_asid_bitmap = bitmap_zalloc(max_sev_asid, GFP_KERNEL); if (!sev_reclaim_asid_bitmap) - return 1; + goto out; - status = kmalloc(sizeof(*status), GFP_KERNEL); - if (!status) - return 1; + pr_info("SEV supported: %u ASIDs\n", max_sev_asid - min_sev_asid + 1); + sev_supported = true; - /* - * Check SEV platform status. - * - * PLATFORM_STATUS can be called in any state, if we failed to query - * the PLATFORM status then either PSP firmware does not support SEV - * feature or SEV firmware is dead. - */ - rc = sev_platform_status(status, NULL); - if (rc) - goto err; + /* SEV-ES support requested? */ + if (!sev_es) + goto out; - pr_info("SEV supported\n"); + /* Does the CPU support SEV-ES? */ + if (!boot_cpu_has(X86_FEATURE_SEV_ES)) + goto out; -err: - kfree(status); - return rc; + /* Has the system been allocated ASIDs for SEV-ES? */ + if (min_sev_asid == 1) + goto out; + + pr_info("SEV-ES supported: %u ASIDs\n", min_sev_asid - 1); + sev_es_supported = true; + +out: + sev = sev_supported; + sev_es = sev_es_supported; } void sev_hardware_teardown(void) @@ -1181,13 +1311,329 @@ void sev_hardware_teardown(void) sev_flush_asids(); } +/* + * Pages used by hardware to hold guest encrypted state must be flushed before + * returning them to the system. + */ +static void sev_flush_guest_memory(struct vcpu_svm *svm, void *va, + unsigned long len) +{ + /* + * If hardware enforced cache coherency for encrypted mappings of the + * same physical page is supported, nothing to do. + */ + if (boot_cpu_has(X86_FEATURE_SME_COHERENT)) + return; + + /* + * If the VM Page Flush MSR is supported, use it to flush the page + * (using the page virtual address and the guest ASID). + */ + if (boot_cpu_has(X86_FEATURE_VM_PAGE_FLUSH)) { + struct kvm_sev_info *sev; + unsigned long va_start; + u64 start, stop; + + /* Align start and stop to page boundaries. */ + va_start = (unsigned long)va; + start = (u64)va_start & PAGE_MASK; + stop = PAGE_ALIGN((u64)va_start + len); + + if (start < stop) { + sev = &to_kvm_svm(svm->vcpu.kvm)->sev_info; + + while (start < stop) { + wrmsrl(MSR_AMD64_VM_PAGE_FLUSH, + start | sev->asid); + + start += PAGE_SIZE; + } + + return; + } + + WARN(1, "Address overflow, using WBINVD\n"); + } + + /* + * Hardware should always have one of the above features, + * but if not, use WBINVD and issue a warning. + */ + WARN_ONCE(1, "Using WBINVD to flush guest memory\n"); + wbinvd_on_all_cpus(); +} + +void sev_free_vcpu(struct kvm_vcpu *vcpu) +{ + struct vcpu_svm *svm; + + if (!sev_es_guest(vcpu->kvm)) + return; + + svm = to_svm(vcpu); + + if (vcpu->arch.guest_state_protected) + sev_flush_guest_memory(svm, svm->vmsa, PAGE_SIZE); + __free_page(virt_to_page(svm->vmsa)); + + if (svm->ghcb_sa_free) + kfree(svm->ghcb_sa); +} + +static void dump_ghcb(struct vcpu_svm *svm) +{ + struct ghcb *ghcb = svm->ghcb; + unsigned int nbits; + + /* Re-use the dump_invalid_vmcb module parameter */ + if (!dump_invalid_vmcb) { + pr_warn_ratelimited("set kvm_amd.dump_invalid_vmcb=1 to dump internal KVM state.\n"); + return; + } + + nbits = sizeof(ghcb->save.valid_bitmap) * 8; + + pr_err("GHCB (GPA=%016llx):\n", svm->vmcb->control.ghcb_gpa); + pr_err("%-20s%016llx is_valid: %u\n", "sw_exit_code", + ghcb->save.sw_exit_code, ghcb_sw_exit_code_is_valid(ghcb)); + pr_err("%-20s%016llx is_valid: %u\n", "sw_exit_info_1", + ghcb->save.sw_exit_info_1, ghcb_sw_exit_info_1_is_valid(ghcb)); + pr_err("%-20s%016llx is_valid: %u\n", "sw_exit_info_2", + ghcb->save.sw_exit_info_2, ghcb_sw_exit_info_2_is_valid(ghcb)); + pr_err("%-20s%016llx is_valid: %u\n", "sw_scratch", + ghcb->save.sw_scratch, ghcb_sw_scratch_is_valid(ghcb)); + pr_err("%-20s%*pb\n", "valid_bitmap", nbits, ghcb->save.valid_bitmap); +} + +static void sev_es_sync_to_ghcb(struct vcpu_svm *svm) +{ + struct kvm_vcpu *vcpu = &svm->vcpu; + struct ghcb *ghcb = svm->ghcb; + + /* + * The GHCB protocol so far allows for the following data + * to be returned: + * GPRs RAX, RBX, RCX, RDX + * + * Copy their values to the GHCB if they are dirty. + */ + if (kvm_register_is_dirty(vcpu, VCPU_REGS_RAX)) + ghcb_set_rax(ghcb, vcpu->arch.regs[VCPU_REGS_RAX]); + if (kvm_register_is_dirty(vcpu, VCPU_REGS_RBX)) + ghcb_set_rbx(ghcb, vcpu->arch.regs[VCPU_REGS_RBX]); + if (kvm_register_is_dirty(vcpu, VCPU_REGS_RCX)) + ghcb_set_rcx(ghcb, vcpu->arch.regs[VCPU_REGS_RCX]); + if (kvm_register_is_dirty(vcpu, VCPU_REGS_RDX)) + ghcb_set_rdx(ghcb, vcpu->arch.regs[VCPU_REGS_RDX]); +} + +static void sev_es_sync_from_ghcb(struct vcpu_svm *svm) +{ + struct vmcb_control_area *control = &svm->vmcb->control; + struct kvm_vcpu *vcpu = &svm->vcpu; + struct ghcb *ghcb = svm->ghcb; + u64 exit_code; + + /* + * The GHCB protocol so far allows for the following data + * to be supplied: + * GPRs RAX, RBX, RCX, RDX + * XCR0 + * CPL + * + * VMMCALL allows the guest to provide extra registers. KVM also + * expects RSI for hypercalls, so include that, too. + * + * Copy their values to the appropriate location if supplied. + */ + memset(vcpu->arch.regs, 0, sizeof(vcpu->arch.regs)); + + vcpu->arch.regs[VCPU_REGS_RAX] = ghcb_get_rax_if_valid(ghcb); + vcpu->arch.regs[VCPU_REGS_RBX] = ghcb_get_rbx_if_valid(ghcb); + vcpu->arch.regs[VCPU_REGS_RCX] = ghcb_get_rcx_if_valid(ghcb); + vcpu->arch.regs[VCPU_REGS_RDX] = ghcb_get_rdx_if_valid(ghcb); + vcpu->arch.regs[VCPU_REGS_RSI] = ghcb_get_rsi_if_valid(ghcb); + + svm->vmcb->save.cpl = ghcb_get_cpl_if_valid(ghcb); + + if (ghcb_xcr0_is_valid(ghcb)) { + vcpu->arch.xcr0 = ghcb_get_xcr0(ghcb); + kvm_update_cpuid_runtime(vcpu); + } + + /* Copy the GHCB exit information into the VMCB fields */ + exit_code = ghcb_get_sw_exit_code(ghcb); + control->exit_code = lower_32_bits(exit_code); + control->exit_code_hi = upper_32_bits(exit_code); + control->exit_info_1 = ghcb_get_sw_exit_info_1(ghcb); + control->exit_info_2 = ghcb_get_sw_exit_info_2(ghcb); + + /* Clear the valid entries fields */ + memset(ghcb->save.valid_bitmap, 0, sizeof(ghcb->save.valid_bitmap)); +} + +static int sev_es_validate_vmgexit(struct vcpu_svm *svm) +{ + struct kvm_vcpu *vcpu; + struct ghcb *ghcb; + u64 exit_code = 0; + + ghcb = svm->ghcb; + + /* Only GHCB Usage code 0 is supported */ + if (ghcb->ghcb_usage) + goto vmgexit_err; + + /* + * Retrieve the exit code now even though is may not be marked valid + * as it could help with debugging. + */ + exit_code = ghcb_get_sw_exit_code(ghcb); + + if (!ghcb_sw_exit_code_is_valid(ghcb) || + !ghcb_sw_exit_info_1_is_valid(ghcb) || + !ghcb_sw_exit_info_2_is_valid(ghcb)) + goto vmgexit_err; + + switch (ghcb_get_sw_exit_code(ghcb)) { + case SVM_EXIT_READ_DR7: + break; + case SVM_EXIT_WRITE_DR7: + if (!ghcb_rax_is_valid(ghcb)) + goto vmgexit_err; + break; + case SVM_EXIT_RDTSC: + break; + case SVM_EXIT_RDPMC: + if (!ghcb_rcx_is_valid(ghcb)) + goto vmgexit_err; + break; + case SVM_EXIT_CPUID: + if (!ghcb_rax_is_valid(ghcb) || + !ghcb_rcx_is_valid(ghcb)) + goto vmgexit_err; + if (ghcb_get_rax(ghcb) == 0xd) + if (!ghcb_xcr0_is_valid(ghcb)) + goto vmgexit_err; + break; + case SVM_EXIT_INVD: + break; + case SVM_EXIT_IOIO: + if (ghcb_get_sw_exit_info_1(ghcb) & SVM_IOIO_STR_MASK) { + if (!ghcb_sw_scratch_is_valid(ghcb)) + goto vmgexit_err; + } else { + if (!(ghcb_get_sw_exit_info_1(ghcb) & SVM_IOIO_TYPE_MASK)) + if (!ghcb_rax_is_valid(ghcb)) + goto vmgexit_err; + } + break; + case SVM_EXIT_MSR: + if (!ghcb_rcx_is_valid(ghcb)) + goto vmgexit_err; + if (ghcb_get_sw_exit_info_1(ghcb)) { + if (!ghcb_rax_is_valid(ghcb) || + !ghcb_rdx_is_valid(ghcb)) + goto vmgexit_err; + } + break; + case SVM_EXIT_VMMCALL: + if (!ghcb_rax_is_valid(ghcb) || + !ghcb_cpl_is_valid(ghcb)) + goto vmgexit_err; + break; + case SVM_EXIT_RDTSCP: + break; + case SVM_EXIT_WBINVD: + break; + case SVM_EXIT_MONITOR: + if (!ghcb_rax_is_valid(ghcb) || + !ghcb_rcx_is_valid(ghcb) || + !ghcb_rdx_is_valid(ghcb)) + goto vmgexit_err; + break; + case SVM_EXIT_MWAIT: + if (!ghcb_rax_is_valid(ghcb) || + !ghcb_rcx_is_valid(ghcb)) + goto vmgexit_err; + break; + case SVM_VMGEXIT_MMIO_READ: + case SVM_VMGEXIT_MMIO_WRITE: + if (!ghcb_sw_scratch_is_valid(ghcb)) + goto vmgexit_err; + break; + case SVM_VMGEXIT_NMI_COMPLETE: + case SVM_VMGEXIT_AP_JUMP_TABLE: + case SVM_VMGEXIT_UNSUPPORTED_EVENT: + break; + default: + goto vmgexit_err; + } + + return 0; + +vmgexit_err: + vcpu = &svm->vcpu; + + if (ghcb->ghcb_usage) { + vcpu_unimpl(vcpu, "vmgexit: ghcb usage %#x is not valid\n", + ghcb->ghcb_usage); + } else { + vcpu_unimpl(vcpu, "vmgexit: exit reason %#llx is not valid\n", + exit_code); + dump_ghcb(svm); + } + + vcpu->run->exit_reason = KVM_EXIT_INTERNAL_ERROR; + vcpu->run->internal.suberror = KVM_INTERNAL_ERROR_UNEXPECTED_EXIT_REASON; + vcpu->run->internal.ndata = 2; + vcpu->run->internal.data[0] = exit_code; + vcpu->run->internal.data[1] = vcpu->arch.last_vmentry_cpu; + + return -EINVAL; +} + +static void pre_sev_es_run(struct vcpu_svm *svm) +{ + if (!svm->ghcb) + return; + + if (svm->ghcb_sa_free) { + /* + * The scratch area lives outside the GHCB, so there is a + * buffer that, depending on the operation performed, may + * need to be synced, then freed. + */ + if (svm->ghcb_sa_sync) { + kvm_write_guest(svm->vcpu.kvm, + ghcb_get_sw_scratch(svm->ghcb), + svm->ghcb_sa, svm->ghcb_sa_len); + svm->ghcb_sa_sync = false; + } + + kfree(svm->ghcb_sa); + svm->ghcb_sa = NULL; + svm->ghcb_sa_free = false; + } + + trace_kvm_vmgexit_exit(svm->vcpu.vcpu_id, svm->ghcb); + + sev_es_sync_to_ghcb(svm); + + kvm_vcpu_unmap(&svm->vcpu, &svm->ghcb_map, true); + svm->ghcb = NULL; +} + void pre_sev_run(struct vcpu_svm *svm, int cpu) { struct svm_cpu_data *sd = per_cpu(svm_data, cpu); int asid = sev_get_asid(svm->vcpu.kvm); + /* Perform any SEV-ES pre-run actions */ + pre_sev_es_run(svm); + /* Assign the asid allocated with this SEV guest */ - svm->vmcb->control.asid = asid; + svm->asid = asid; /* * Flush guest TLB: @@ -1203,3 +1649,394 @@ void pre_sev_run(struct vcpu_svm *svm, int cpu) svm->vmcb->control.tlb_ctl = TLB_CONTROL_FLUSH_ASID; vmcb_mark_dirty(svm->vmcb, VMCB_ASID); } + +#define GHCB_SCRATCH_AREA_LIMIT (16ULL * PAGE_SIZE) +static bool setup_vmgexit_scratch(struct vcpu_svm *svm, bool sync, u64 len) +{ + struct vmcb_control_area *control = &svm->vmcb->control; + struct ghcb *ghcb = svm->ghcb; + u64 ghcb_scratch_beg, ghcb_scratch_end; + u64 scratch_gpa_beg, scratch_gpa_end; + void *scratch_va; + + scratch_gpa_beg = ghcb_get_sw_scratch(ghcb); + if (!scratch_gpa_beg) { + pr_err("vmgexit: scratch gpa not provided\n"); + return false; + } + + scratch_gpa_end = scratch_gpa_beg + len; + if (scratch_gpa_end < scratch_gpa_beg) { + pr_err("vmgexit: scratch length (%#llx) not valid for scratch address (%#llx)\n", + len, scratch_gpa_beg); + return false; + } + + if ((scratch_gpa_beg & PAGE_MASK) == control->ghcb_gpa) { + /* Scratch area begins within GHCB */ + ghcb_scratch_beg = control->ghcb_gpa + + offsetof(struct ghcb, shared_buffer); + ghcb_scratch_end = control->ghcb_gpa + + offsetof(struct ghcb, reserved_1); + + /* + * If the scratch area begins within the GHCB, it must be + * completely contained in the GHCB shared buffer area. + */ + if (scratch_gpa_beg < ghcb_scratch_beg || + scratch_gpa_end > ghcb_scratch_end) { + pr_err("vmgexit: scratch area is outside of GHCB shared buffer area (%#llx - %#llx)\n", + scratch_gpa_beg, scratch_gpa_end); + return false; + } + + scratch_va = (void *)svm->ghcb; + scratch_va += (scratch_gpa_beg - control->ghcb_gpa); + } else { + /* + * The guest memory must be read into a kernel buffer, so + * limit the size + */ + if (len > GHCB_SCRATCH_AREA_LIMIT) { + pr_err("vmgexit: scratch area exceeds KVM limits (%#llx requested, %#llx limit)\n", + len, GHCB_SCRATCH_AREA_LIMIT); + return false; + } + scratch_va = kzalloc(len, GFP_KERNEL); + if (!scratch_va) + return false; + + if (kvm_read_guest(svm->vcpu.kvm, scratch_gpa_beg, scratch_va, len)) { + /* Unable to copy scratch area from guest */ + pr_err("vmgexit: kvm_read_guest for scratch area failed\n"); + + kfree(scratch_va); + return false; + } + + /* + * The scratch area is outside the GHCB. The operation will + * dictate whether the buffer needs to be synced before running + * the vCPU next time (i.e. a read was requested so the data + * must be written back to the guest memory). + */ + svm->ghcb_sa_sync = sync; + svm->ghcb_sa_free = true; + } + + svm->ghcb_sa = scratch_va; + svm->ghcb_sa_len = len; + + return true; +} + +static void set_ghcb_msr_bits(struct vcpu_svm *svm, u64 value, u64 mask, + unsigned int pos) +{ + svm->vmcb->control.ghcb_gpa &= ~(mask << pos); + svm->vmcb->control.ghcb_gpa |= (value & mask) << pos; +} + +static u64 get_ghcb_msr_bits(struct vcpu_svm *svm, u64 mask, unsigned int pos) +{ + return (svm->vmcb->control.ghcb_gpa >> pos) & mask; +} + +static void set_ghcb_msr(struct vcpu_svm *svm, u64 value) +{ + svm->vmcb->control.ghcb_gpa = value; +} + +static int sev_handle_vmgexit_msr_protocol(struct vcpu_svm *svm) +{ + struct vmcb_control_area *control = &svm->vmcb->control; + struct kvm_vcpu *vcpu = &svm->vcpu; + u64 ghcb_info; + int ret = 1; + + ghcb_info = control->ghcb_gpa & GHCB_MSR_INFO_MASK; + + trace_kvm_vmgexit_msr_protocol_enter(svm->vcpu.vcpu_id, + control->ghcb_gpa); + + switch (ghcb_info) { + case GHCB_MSR_SEV_INFO_REQ: + set_ghcb_msr(svm, GHCB_MSR_SEV_INFO(GHCB_VERSION_MAX, + GHCB_VERSION_MIN, + sev_enc_bit)); + break; + case GHCB_MSR_CPUID_REQ: { + u64 cpuid_fn, cpuid_reg, cpuid_value; + + cpuid_fn = get_ghcb_msr_bits(svm, + GHCB_MSR_CPUID_FUNC_MASK, + GHCB_MSR_CPUID_FUNC_POS); + + /* Initialize the registers needed by the CPUID intercept */ + vcpu->arch.regs[VCPU_REGS_RAX] = cpuid_fn; + vcpu->arch.regs[VCPU_REGS_RCX] = 0; + + ret = svm_invoke_exit_handler(svm, SVM_EXIT_CPUID); + if (!ret) { + ret = -EINVAL; + break; + } + + cpuid_reg = get_ghcb_msr_bits(svm, + GHCB_MSR_CPUID_REG_MASK, + GHCB_MSR_CPUID_REG_POS); + if (cpuid_reg == 0) + cpuid_value = vcpu->arch.regs[VCPU_REGS_RAX]; + else if (cpuid_reg == 1) + cpuid_value = vcpu->arch.regs[VCPU_REGS_RBX]; + else if (cpuid_reg == 2) + cpuid_value = vcpu->arch.regs[VCPU_REGS_RCX]; + else + cpuid_value = vcpu->arch.regs[VCPU_REGS_RDX]; + + set_ghcb_msr_bits(svm, cpuid_value, + GHCB_MSR_CPUID_VALUE_MASK, + GHCB_MSR_CPUID_VALUE_POS); + + set_ghcb_msr_bits(svm, GHCB_MSR_CPUID_RESP, + GHCB_MSR_INFO_MASK, + GHCB_MSR_INFO_POS); + break; + } + case GHCB_MSR_TERM_REQ: { + u64 reason_set, reason_code; + + reason_set = get_ghcb_msr_bits(svm, + GHCB_MSR_TERM_REASON_SET_MASK, + GHCB_MSR_TERM_REASON_SET_POS); + reason_code = get_ghcb_msr_bits(svm, + GHCB_MSR_TERM_REASON_MASK, + GHCB_MSR_TERM_REASON_POS); + pr_info("SEV-ES guest requested termination: %#llx:%#llx\n", + reason_set, reason_code); + fallthrough; + } + default: + ret = -EINVAL; + } + + trace_kvm_vmgexit_msr_protocol_exit(svm->vcpu.vcpu_id, + control->ghcb_gpa, ret); + + return ret; +} + +int sev_handle_vmgexit(struct vcpu_svm *svm) +{ + struct vmcb_control_area *control = &svm->vmcb->control; + u64 ghcb_gpa, exit_code; + struct ghcb *ghcb; + int ret; + + /* Validate the GHCB */ + ghcb_gpa = control->ghcb_gpa; + if (ghcb_gpa & GHCB_MSR_INFO_MASK) + return sev_handle_vmgexit_msr_protocol(svm); + + if (!ghcb_gpa) { + vcpu_unimpl(&svm->vcpu, "vmgexit: GHCB gpa is not set\n"); + return -EINVAL; + } + + if (kvm_vcpu_map(&svm->vcpu, ghcb_gpa >> PAGE_SHIFT, &svm->ghcb_map)) { + /* Unable to map GHCB from guest */ + vcpu_unimpl(&svm->vcpu, "vmgexit: error mapping GHCB [%#llx] from guest\n", + ghcb_gpa); + return -EINVAL; + } + + svm->ghcb = svm->ghcb_map.hva; + ghcb = svm->ghcb_map.hva; + + trace_kvm_vmgexit_enter(svm->vcpu.vcpu_id, ghcb); + + exit_code = ghcb_get_sw_exit_code(ghcb); + + ret = sev_es_validate_vmgexit(svm); + if (ret) + return ret; + + sev_es_sync_from_ghcb(svm); + ghcb_set_sw_exit_info_1(ghcb, 0); + ghcb_set_sw_exit_info_2(ghcb, 0); + + ret = -EINVAL; + switch (exit_code) { + case SVM_VMGEXIT_MMIO_READ: + if (!setup_vmgexit_scratch(svm, true, control->exit_info_2)) + break; + + ret = kvm_sev_es_mmio_read(&svm->vcpu, + control->exit_info_1, + control->exit_info_2, + svm->ghcb_sa); + break; + case SVM_VMGEXIT_MMIO_WRITE: + if (!setup_vmgexit_scratch(svm, false, control->exit_info_2)) + break; + + ret = kvm_sev_es_mmio_write(&svm->vcpu, + control->exit_info_1, + control->exit_info_2, + svm->ghcb_sa); + break; + case SVM_VMGEXIT_NMI_COMPLETE: + ret = svm_invoke_exit_handler(svm, SVM_EXIT_IRET); + break; + case SVM_VMGEXIT_AP_JUMP_TABLE: { + struct kvm_sev_info *sev = &to_kvm_svm(svm->vcpu.kvm)->sev_info; + + switch (control->exit_info_1) { + case 0: + /* Set AP jump table address */ + sev->ap_jump_table = control->exit_info_2; + break; + case 1: + /* Get AP jump table address */ + ghcb_set_sw_exit_info_2(ghcb, sev->ap_jump_table); + break; + default: + pr_err("svm: vmgexit: unsupported AP jump table request - exit_info_1=%#llx\n", + control->exit_info_1); + ghcb_set_sw_exit_info_1(ghcb, 1); + ghcb_set_sw_exit_info_2(ghcb, + X86_TRAP_UD | + SVM_EVTINJ_TYPE_EXEPT | + SVM_EVTINJ_VALID); + } + + ret = 1; + break; + } + case SVM_VMGEXIT_UNSUPPORTED_EVENT: + vcpu_unimpl(&svm->vcpu, + "vmgexit: unsupported event - exit_info_1=%#llx, exit_info_2=%#llx\n", + control->exit_info_1, control->exit_info_2); + break; + default: + ret = svm_invoke_exit_handler(svm, exit_code); + } + + return ret; +} + +int sev_es_string_io(struct vcpu_svm *svm, int size, unsigned int port, int in) +{ + if (!setup_vmgexit_scratch(svm, in, svm->vmcb->control.exit_info_2)) + return -EINVAL; + + return kvm_sev_es_string_io(&svm->vcpu, size, port, + svm->ghcb_sa, svm->ghcb_sa_len, in); +} + +void sev_es_init_vmcb(struct vcpu_svm *svm) +{ + struct kvm_vcpu *vcpu = &svm->vcpu; + + svm->vmcb->control.nested_ctl |= SVM_NESTED_CTL_SEV_ES_ENABLE; + svm->vmcb->control.virt_ext |= LBR_CTL_ENABLE_MASK; + + /* + * An SEV-ES guest requires a VMSA area that is a separate from the + * VMCB page. Do not include the encryption mask on the VMSA physical + * address since hardware will access it using the guest key. + */ + svm->vmcb->control.vmsa_pa = __pa(svm->vmsa); + + /* Can't intercept CR register access, HV can't modify CR registers */ + svm_clr_intercept(svm, INTERCEPT_CR0_READ); + svm_clr_intercept(svm, INTERCEPT_CR4_READ); + svm_clr_intercept(svm, INTERCEPT_CR8_READ); + svm_clr_intercept(svm, INTERCEPT_CR0_WRITE); + svm_clr_intercept(svm, INTERCEPT_CR4_WRITE); + svm_clr_intercept(svm, INTERCEPT_CR8_WRITE); + + svm_clr_intercept(svm, INTERCEPT_SELECTIVE_CR0); + + /* Track EFER/CR register changes */ + svm_set_intercept(svm, TRAP_EFER_WRITE); + svm_set_intercept(svm, TRAP_CR0_WRITE); + svm_set_intercept(svm, TRAP_CR4_WRITE); + svm_set_intercept(svm, TRAP_CR8_WRITE); + + /* No support for enable_vmware_backdoor */ + clr_exception_intercept(svm, GP_VECTOR); + + /* Can't intercept XSETBV, HV can't modify XCR0 directly */ + svm_clr_intercept(svm, INTERCEPT_XSETBV); + + /* Clear intercepts on selected MSRs */ + set_msr_interception(vcpu, svm->msrpm, MSR_EFER, 1, 1); + set_msr_interception(vcpu, svm->msrpm, MSR_IA32_CR_PAT, 1, 1); + set_msr_interception(vcpu, svm->msrpm, MSR_IA32_LASTBRANCHFROMIP, 1, 1); + set_msr_interception(vcpu, svm->msrpm, MSR_IA32_LASTBRANCHTOIP, 1, 1); + set_msr_interception(vcpu, svm->msrpm, MSR_IA32_LASTINTFROMIP, 1, 1); + set_msr_interception(vcpu, svm->msrpm, MSR_IA32_LASTINTTOIP, 1, 1); +} + +void sev_es_create_vcpu(struct vcpu_svm *svm) +{ + /* + * Set the GHCB MSR value as per the GHCB specification when creating + * a vCPU for an SEV-ES guest. + */ + set_ghcb_msr(svm, GHCB_MSR_SEV_INFO(GHCB_VERSION_MAX, + GHCB_VERSION_MIN, + sev_enc_bit)); +} + +void sev_es_vcpu_load(struct vcpu_svm *svm, int cpu) +{ + struct svm_cpu_data *sd = per_cpu(svm_data, cpu); + struct vmcb_save_area *hostsa; + unsigned int i; + + /* + * As an SEV-ES guest, hardware will restore the host state on VMEXIT, + * of which one step is to perform a VMLOAD. Since hardware does not + * perform a VMSAVE on VMRUN, the host savearea must be updated. + */ + asm volatile(__ex("vmsave") : : "a" (__sme_page_pa(sd->save_area)) : "memory"); + + /* + * Certain MSRs are restored on VMEXIT, only save ones that aren't + * restored. + */ + for (i = 0; i < NR_HOST_SAVE_USER_MSRS; i++) { + if (host_save_user_msrs[i].sev_es_restored) + continue; + + rdmsrl(host_save_user_msrs[i].index, svm->host_user_msrs[i]); + } + + /* XCR0 is restored on VMEXIT, save the current host value */ + hostsa = (struct vmcb_save_area *)(page_address(sd->save_area) + 0x400); + hostsa->xcr0 = xgetbv(XCR_XFEATURE_ENABLED_MASK); + + /* PKRU is restored on VMEXIT, save the curent host value */ + hostsa->pkru = read_pkru(); + + /* MSR_IA32_XSS is restored on VMEXIT, save the currnet host value */ + hostsa->xss = host_xss; +} + +void sev_es_vcpu_put(struct vcpu_svm *svm) +{ + unsigned int i; + + /* + * Certain MSRs are restored on VMEXIT and were saved with vmsave in + * sev_es_vcpu_load() above. Only restore ones that weren't. + */ + for (i = 0; i < NR_HOST_SAVE_USER_MSRS; i++) { + if (host_save_user_msrs[i].sev_es_restored) + continue; + + wrmsrl(host_save_user_msrs[i].index, svm->host_user_msrs[i]); + } +} diff --git a/arch/x86/kvm/svm/svm.c b/arch/x86/kvm/svm/svm.c index 2f32fd09e259..cce0143a6f80 100644 --- a/arch/x86/kvm/svm/svm.c +++ b/arch/x86/kvm/svm/svm.c @@ -33,9 +33,9 @@ #include <asm/debugreg.h> #include <asm/kvm_para.h> #include <asm/irq_remapping.h> -#include <asm/mce.h> #include <asm/spec-ctrl.h> #include <asm/cpu_device_id.h> +#include <asm/traps.h> #include <asm/virtext.h> #include "trace.h" @@ -90,7 +90,7 @@ static DEFINE_PER_CPU(u64, current_tsc_ratio); static const struct svm_direct_access_msrs { u32 index; /* Index of the MSR */ - bool always; /* True if intercept is always on */ + bool always; /* True if intercept is initially cleared */ } direct_access_msrs[MAX_DIRECT_ACCESS_MSRS] = { { .index = MSR_STAR, .always = true }, { .index = MSR_IA32_SYSENTER_CS, .always = true }, @@ -108,6 +108,9 @@ static const struct svm_direct_access_msrs { { .index = MSR_IA32_LASTBRANCHTOIP, .always = false }, { .index = MSR_IA32_LASTINTFROMIP, .always = false }, { .index = MSR_IA32_LASTINTTOIP, .always = false }, + { .index = MSR_EFER, .always = false }, + { .index = MSR_IA32_CR_PAT, .always = false }, + { .index = MSR_AMD64_SEV_ES_GHCB, .always = true }, { .index = MSR_INVALID, .always = false }, }; @@ -187,10 +190,14 @@ static int vgif = true; module_param(vgif, int, 0444); /* enable/disable SEV support */ -static int sev = IS_ENABLED(CONFIG_AMD_MEM_ENCRYPT_ACTIVE_BY_DEFAULT); +int sev = IS_ENABLED(CONFIG_AMD_MEM_ENCRYPT_ACTIVE_BY_DEFAULT); module_param(sev, int, 0444); -static bool __read_mostly dump_invalid_vmcb = 0; +/* enable/disable SEV-ES support */ +int sev_es = IS_ENABLED(CONFIG_AMD_MEM_ENCRYPT_ACTIVE_BY_DEFAULT); +module_param(sev_es, int, 0444); + +bool __read_mostly dump_invalid_vmcb; module_param(dump_invalid_vmcb, bool, 0644); static u8 rsm_ins_bytes[] = "\x0f\xaa"; @@ -336,6 +343,13 @@ static int skip_emulated_instruction(struct kvm_vcpu *vcpu) { struct vcpu_svm *svm = to_svm(vcpu); + /* + * SEV-ES does not expose the next RIP. The RIP update is controlled by + * the type of exit and the #VC handler in the guest. + */ + if (sev_es_guest(vcpu->kvm)) + goto done; + if (nrips && svm->vmcb->control.next_rip != 0) { WARN_ON_ONCE(!static_cpu_has(X86_FEATURE_NRIPS)); svm->next_rip = svm->vmcb->control.next_rip; @@ -347,6 +361,8 @@ static int skip_emulated_instruction(struct kvm_vcpu *vcpu) } else { kvm_rip_write(vcpu, svm->next_rip); } + +done: svm_set_interrupt_shadow(vcpu, 0); return 1; @@ -484,7 +500,7 @@ static int svm_hardware_enable(void) wrmsrl(MSR_EFER, efer | EFER_SVME); - wrmsrl(MSR_VM_HSAVE_PA, page_to_pfn(sd->save_area) << PAGE_SHIFT); + wrmsrl(MSR_VM_HSAVE_PA, __sme_page_pa(sd->save_area)); if (static_cpu_has(X86_FEATURE_TSCRATEMSR)) { wrmsrl(MSR_AMD64_TSC_RATIO, TSC_RATIO_DEFAULT); @@ -530,12 +546,12 @@ static int svm_hardware_enable(void) static void svm_cpu_uninit(int cpu) { - struct svm_cpu_data *sd = per_cpu(svm_data, raw_smp_processor_id()); + struct svm_cpu_data *sd = per_cpu(svm_data, cpu); if (!sd) return; - per_cpu(svm_data, raw_smp_processor_id()) = NULL; + per_cpu(svm_data, cpu) = NULL; kfree(sd->sev_vmcbs); __free_page(sd->save_area); kfree(sd); @@ -552,6 +568,7 @@ static int svm_cpu_init(int cpu) sd->save_area = alloc_page(GFP_KERNEL); if (!sd->save_area) goto free_cpu_data; + clear_page(page_address(sd->save_area)); if (svm_sev_enabled()) { sd->sev_vmcbs = kmalloc_array(max_sev_asid + 1, @@ -662,8 +679,8 @@ static void set_msr_interception_bitmap(struct kvm_vcpu *vcpu, u32 *msrpm, msrpm[offset] = tmp; } -static void set_msr_interception(struct kvm_vcpu *vcpu, u32 *msrpm, u32 msr, - int read, int write) +void set_msr_interception(struct kvm_vcpu *vcpu, u32 *msrpm, u32 msr, + int read, int write) { set_shadow_msr_intercept(vcpu, msr, read, write); set_msr_interception_bitmap(vcpu, msrpm, msr, read, write); @@ -959,15 +976,11 @@ static __init int svm_hardware_setup(void) kvm_enable_efer_bits(EFER_SVME | EFER_LMSLE); } - if (sev) { - if (boot_cpu_has(X86_FEATURE_SEV) && - IS_ENABLED(CONFIG_KVM_AMD_SEV)) { - r = sev_hardware_setup(); - if (r) - sev = false; - } else { - sev = false; - } + if (IS_ENABLED(CONFIG_KVM_AMD_SEV) && sev) { + sev_hardware_setup(); + } else { + sev = false; + sev_es = false; } svm_adjust_mmio_mask(); @@ -1215,6 +1228,7 @@ static void init_vmcb(struct vcpu_svm *svm) save->cr4 = 0; } svm->asid_generation = 0; + svm->asid = 0; svm->nested.vmcb12_gpa = 0; svm->vcpu.arch.hflags = 0; @@ -1252,6 +1266,11 @@ static void init_vmcb(struct vcpu_svm *svm) if (sev_guest(svm->vcpu.kvm)) { svm->vmcb->control.nested_ctl |= SVM_NESTED_CTL_SEV_ENABLE; clr_exception_intercept(svm, UD_VECTOR); + + if (sev_es_guest(svm->vcpu.kvm)) { + /* Perform SEV-ES specific VMCB updates */ + sev_es_init_vmcb(svm); + } } vmcb_mark_all_dirty(svm->vmcb); @@ -1288,6 +1307,7 @@ static int svm_create_vcpu(struct kvm_vcpu *vcpu) { struct vcpu_svm *svm; struct page *vmcb_page; + struct page *vmsa_page = NULL; int err; BUILD_BUG_ON(offsetof(struct vcpu_svm, vcpu) != 0); @@ -1298,9 +1318,27 @@ static int svm_create_vcpu(struct kvm_vcpu *vcpu) if (!vmcb_page) goto out; + if (sev_es_guest(svm->vcpu.kvm)) { + /* + * SEV-ES guests require a separate VMSA page used to contain + * the encrypted register state of the guest. + */ + vmsa_page = alloc_page(GFP_KERNEL_ACCOUNT | __GFP_ZERO); + if (!vmsa_page) + goto error_free_vmcb_page; + + /* + * SEV-ES guests maintain an encrypted version of their FPU + * state which is restored and saved on VMRUN and VMEXIT. + * Free the fpu structure to prevent KVM from attempting to + * access the FPU state. + */ + kvm_free_guest_fpu(vcpu); + } + err = avic_init_vcpu(svm); if (err) - goto error_free_vmcb_page; + goto error_free_vmsa_page; /* We initialize this flag to true to make sure that the is_running * bit would be set the first time the vcpu is loaded. @@ -1309,21 +1347,34 @@ static int svm_create_vcpu(struct kvm_vcpu *vcpu) svm->avic_is_running = true; svm->msrpm = svm_vcpu_alloc_msrpm(); - if (!svm->msrpm) - goto error_free_vmcb_page; + if (!svm->msrpm) { + err = -ENOMEM; + goto error_free_vmsa_page; + } svm_vcpu_init_msrpm(vcpu, svm->msrpm); svm->vmcb = page_address(vmcb_page); svm->vmcb_pa = __sme_set(page_to_pfn(vmcb_page) << PAGE_SHIFT); + + if (vmsa_page) + svm->vmsa = page_address(vmsa_page); + svm->asid_generation = 0; init_vmcb(svm); svm_init_osvw(vcpu); vcpu->arch.microcode_version = 0x01000065; + if (sev_es_guest(svm->vcpu.kvm)) + /* Perform SEV-ES specific VMCB creation updates */ + sev_es_create_vcpu(svm); + return 0; +error_free_vmsa_page: + if (vmsa_page) + __free_page(vmsa_page); error_free_vmcb_page: __free_page(vmcb_page); out: @@ -1351,6 +1402,8 @@ static void svm_free_vcpu(struct kvm_vcpu *vcpu) svm_free_nested(svm); + sev_free_vcpu(vcpu); + __free_page(pfn_to_page(__sme_clr(svm->vmcb_pa) >> PAGE_SHIFT)); __free_pages(virt_to_page(svm->msrpm), MSRPM_ALLOC_ORDER); } @@ -1366,15 +1419,20 @@ static void svm_vcpu_load(struct kvm_vcpu *vcpu, int cpu) vmcb_mark_all_dirty(svm->vmcb); } + if (sev_es_guest(svm->vcpu.kvm)) { + sev_es_vcpu_load(svm, cpu); + } else { #ifdef CONFIG_X86_64 - rdmsrl(MSR_GS_BASE, to_svm(vcpu)->host.gs_base); + rdmsrl(MSR_GS_BASE, to_svm(vcpu)->host.gs_base); #endif - savesegment(fs, svm->host.fs); - savesegment(gs, svm->host.gs); - svm->host.ldt = kvm_read_ldt(); + savesegment(fs, svm->host.fs); + savesegment(gs, svm->host.gs); + svm->host.ldt = kvm_read_ldt(); - for (i = 0; i < NR_HOST_SAVE_USER_MSRS; i++) - rdmsrl(host_save_user_msrs[i], svm->host_user_msrs[i]); + for (i = 0; i < NR_HOST_SAVE_USER_MSRS; i++) + rdmsrl(host_save_user_msrs[i].index, + svm->host_user_msrs[i]); + } if (static_cpu_has(X86_FEATURE_TSCRATEMSR)) { u64 tsc_ratio = vcpu->arch.tsc_scaling_ratio; @@ -1402,18 +1460,24 @@ static void svm_vcpu_put(struct kvm_vcpu *vcpu) avic_vcpu_put(vcpu); ++vcpu->stat.host_state_reload; - kvm_load_ldt(svm->host.ldt); + if (sev_es_guest(svm->vcpu.kvm)) { + sev_es_vcpu_put(svm); + } else { + kvm_load_ldt(svm->host.ldt); #ifdef CONFIG_X86_64 - loadsegment(fs, svm->host.fs); - wrmsrl(MSR_KERNEL_GS_BASE, current->thread.gsbase); - load_gs_index(svm->host.gs); + loadsegment(fs, svm->host.fs); + wrmsrl(MSR_KERNEL_GS_BASE, current->thread.gsbase); + load_gs_index(svm->host.gs); #else #ifdef CONFIG_X86_32_LAZY_GS - loadsegment(gs, svm->host.gs); + loadsegment(gs, svm->host.gs); #endif #endif - for (i = 0; i < NR_HOST_SAVE_USER_MSRS; i++) - wrmsrl(host_save_user_msrs[i], svm->host_user_msrs[i]); + + for (i = 0; i < NR_HOST_SAVE_USER_MSRS; i++) + wrmsrl(host_save_user_msrs[i].index, + svm->host_user_msrs[i]); + } } static unsigned long svm_get_rflags(struct kvm_vcpu *vcpu) @@ -1631,9 +1695,18 @@ static void svm_set_gdt(struct kvm_vcpu *vcpu, struct desc_ptr *dt) static void update_cr0_intercept(struct vcpu_svm *svm) { - ulong gcr0 = svm->vcpu.arch.cr0; - u64 *hcr0 = &svm->vmcb->save.cr0; + ulong gcr0; + u64 *hcr0; + /* + * SEV-ES guests must always keep the CR intercepts cleared. CR + * tracking is done using the CR write traps. + */ + if (sev_es_guest(svm->vcpu.kvm)) + return; + + gcr0 = svm->vcpu.arch.cr0; + hcr0 = &svm->vmcb->save.cr0; *hcr0 = (*hcr0 & ~SVM_CR0_SELECTIVE_MASK) | (gcr0 & SVM_CR0_SELECTIVE_MASK); @@ -1653,7 +1726,7 @@ void svm_set_cr0(struct kvm_vcpu *vcpu, unsigned long cr0) struct vcpu_svm *svm = to_svm(vcpu); #ifdef CONFIG_X86_64 - if (vcpu->arch.efer & EFER_LME) { + if (vcpu->arch.efer & EFER_LME && !vcpu->arch.guest_state_protected) { if (!is_paging(vcpu) && (cr0 & X86_CR0_PG)) { vcpu->arch.efer |= EFER_LMA; svm->vmcb->save.efer |= EFER_LMA | EFER_LME; @@ -1682,13 +1755,15 @@ void svm_set_cr0(struct kvm_vcpu *vcpu, unsigned long cr0) update_cr0_intercept(svm); } -int svm_set_cr4(struct kvm_vcpu *vcpu, unsigned long cr4) +static bool svm_is_valid_cr4(struct kvm_vcpu *vcpu, unsigned long cr4) { - unsigned long host_cr4_mce = cr4_read_shadow() & X86_CR4_MCE; - unsigned long old_cr4 = to_svm(vcpu)->vmcb->save.cr4; + return true; +} - if (cr4 & X86_CR4_VMXE) - return 1; +void svm_set_cr4(struct kvm_vcpu *vcpu, unsigned long cr4) +{ + unsigned long host_cr4_mce = cr4_read_shadow() & X86_CR4_MCE; + unsigned long old_cr4 = vcpu->arch.cr4; if (npt_enabled && ((old_cr4 ^ cr4) & X86_CR4_PGE)) svm_flush_tlb(vcpu); @@ -1699,7 +1774,9 @@ int svm_set_cr4(struct kvm_vcpu *vcpu, unsigned long cr4) cr4 |= host_cr4_mce; to_svm(vcpu)->vmcb->save.cr4 = cr4; vmcb_mark_dirty(to_svm(vcpu)->vmcb, VMCB_CR); - return 0; + + if ((cr4 ^ old_cr4) & (X86_CR4_OSXSAVE | X86_CR4_PKE)) + kvm_update_cpuid_runtime(vcpu); } static void svm_set_segment(struct kvm_vcpu *vcpu, @@ -1751,18 +1828,20 @@ static void new_asid(struct vcpu_svm *svm, struct svm_cpu_data *sd) ++sd->asid_generation; sd->next_asid = sd->min_asid; svm->vmcb->control.tlb_ctl = TLB_CONTROL_FLUSH_ALL_ASID; + vmcb_mark_dirty(svm->vmcb, VMCB_ASID); } svm->asid_generation = sd->asid_generation; - svm->vmcb->control.asid = sd->next_asid++; - - vmcb_mark_dirty(svm->vmcb, VMCB_ASID); + svm->asid = sd->next_asid++; } static void svm_set_dr6(struct vcpu_svm *svm, unsigned long value) { struct vmcb *vmcb = svm->vmcb; + if (svm->vcpu.arch.guest_state_protected) + return; + if (unlikely(value != vmcb->save.dr6)) { vmcb->save.dr6 = value; vmcb_mark_dirty(vmcb, VMCB_DR); @@ -1773,6 +1852,9 @@ static void svm_sync_dirty_debug_regs(struct kvm_vcpu *vcpu) { struct vcpu_svm *svm = to_svm(vcpu); + if (vcpu->arch.guest_state_protected) + return; + get_debugreg(vcpu->arch.db[0], 0); get_debugreg(vcpu->arch.db[1], 1); get_debugreg(vcpu->arch.db[2], 2); @@ -1791,6 +1873,9 @@ static void svm_set_dr7(struct kvm_vcpu *vcpu, unsigned long value) { struct vcpu_svm *svm = to_svm(vcpu); + if (vcpu->arch.guest_state_protected) + return; + svm->vmcb->save.dr7 = value; vmcb_mark_dirty(svm->vmcb, VMCB_DR); } @@ -1929,25 +2014,6 @@ static bool is_erratum_383(void) return true; } -/* - * Trigger machine check on the host. We assume all the MSRs are already set up - * by the CPU and that we still run on the same CPU as the MCE occurred on. - * We pass a fake environment to the machine check handler because we want - * the guest to be always treated like user space, no matter what context - * it used internally. - */ -static void kvm_machine_check(void) -{ -#if defined(CONFIG_X86_MCE) - struct pt_regs regs = { - .cs = 3, /* Fake ring 3 no matter what the guest ran on */ - .flags = X86_EFLAGS_IF, - }; - - do_machine_check(®s); -#endif -} - static void svm_handle_mce(struct vcpu_svm *svm) { if (is_erratum_383()) { @@ -1979,6 +2045,13 @@ static int shutdown_interception(struct vcpu_svm *svm) struct kvm_run *kvm_run = svm->vcpu.run; /* + * The VM save area has already been encrypted so it + * cannot be reinitialized - just terminate. + */ + if (sev_es_guest(svm->vcpu.kvm)) + return -EINVAL; + + /* * VMCB is undefined after a SHUTDOWN intercept * so reinitialize it. */ @@ -1999,11 +2072,16 @@ static int io_interception(struct vcpu_svm *svm) ++svm->vcpu.stat.io_exits; string = (io_info & SVM_IOIO_STR_MASK) != 0; in = (io_info & SVM_IOIO_TYPE_MASK) != 0; - if (string) - return kvm_emulate_instruction(vcpu, 0); - port = io_info >> 16; size = (io_info & SVM_IOIO_SIZE_MASK) >> SVM_IOIO_SIZE_SHIFT; + + if (string) { + if (sev_es_guest(vcpu->kvm)) + return sev_es_string_io(svm, size, port, in); + else + return kvm_emulate_instruction(vcpu, 0); + } + svm->next_rip = svm->vmcb->control.exit_info_2; return kvm_fast_pio(&svm->vcpu, size, port, in); @@ -2267,9 +2345,11 @@ static int cpuid_interception(struct vcpu_svm *svm) static int iret_interception(struct vcpu_svm *svm) { ++svm->vcpu.stat.nmi_window_exits; - svm_clr_intercept(svm, INTERCEPT_IRET); svm->vcpu.arch.hflags |= HF_IRET_MASK; - svm->nmi_iret_rip = kvm_rip_read(&svm->vcpu); + if (!sev_es_guest(svm->vcpu.kvm)) { + svm_clr_intercept(svm, INTERCEPT_IRET); + svm->nmi_iret_rip = kvm_rip_read(&svm->vcpu); + } kvm_make_request(KVM_REQ_EVENT, &svm->vcpu); return 1; } @@ -2406,6 +2486,41 @@ static int cr_interception(struct vcpu_svm *svm) return kvm_complete_insn_gp(&svm->vcpu, err); } +static int cr_trap(struct vcpu_svm *svm) +{ + struct kvm_vcpu *vcpu = &svm->vcpu; + unsigned long old_value, new_value; + unsigned int cr; + int ret = 0; + + new_value = (unsigned long)svm->vmcb->control.exit_info_1; + + cr = svm->vmcb->control.exit_code - SVM_EXIT_CR0_WRITE_TRAP; + switch (cr) { + case 0: + old_value = kvm_read_cr0(vcpu); + svm_set_cr0(vcpu, new_value); + + kvm_post_set_cr0(vcpu, old_value, new_value); + break; + case 4: + old_value = kvm_read_cr4(vcpu); + svm_set_cr4(vcpu, new_value); + + kvm_post_set_cr4(vcpu, old_value, new_value); + break; + case 8: + ret = kvm_set_cr8(&svm->vcpu, new_value); + break; + default: + WARN(1, "unhandled CR%d write trap", cr); + kvm_queue_exception(vcpu, UD_VECTOR); + return 1; + } + + return kvm_complete_insn_gp(vcpu, ret); +} + static int dr_interception(struct vcpu_svm *svm) { int reg, dr; @@ -2459,6 +2574,25 @@ static int cr8_write_interception(struct vcpu_svm *svm) return 0; } +static int efer_trap(struct vcpu_svm *svm) +{ + struct msr_data msr_info; + int ret; + + /* + * Clear the EFER_SVME bit from EFER. The SVM code always sets this + * bit in svm_set_efer(), but __kvm_valid_efer() checks it against + * whether the guest has X86_FEATURE_SVM - this avoids a failure if + * the guest doesn't have X86_FEATURE_SVM. + */ + msr_info.host_initiated = false; + msr_info.index = MSR_EFER; + msr_info.data = svm->vmcb->control.exit_info_1 & ~EFER_SVME; + ret = kvm_set_msr_common(&svm->vcpu, &msr_info); + + return kvm_complete_insn_gp(&svm->vcpu, ret); +} + static int svm_get_msr_feature(struct kvm_msr_entry *msr) { msr->data = 0; @@ -2541,10 +2675,7 @@ static int svm_get_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info) break; case MSR_IA32_SPEC_CTRL: if (!msr_info->host_initiated && - !guest_cpuid_has(vcpu, X86_FEATURE_SPEC_CTRL) && - !guest_cpuid_has(vcpu, X86_FEATURE_AMD_STIBP) && - !guest_cpuid_has(vcpu, X86_FEATURE_AMD_IBRS) && - !guest_cpuid_has(vcpu, X86_FEATURE_AMD_SSBD)) + !guest_has_spec_ctrl_msr(vcpu)) return 1; msr_info->data = svm->spec_ctrl; @@ -2582,6 +2713,20 @@ static int svm_get_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info) return 0; } +static int svm_complete_emulated_msr(struct kvm_vcpu *vcpu, int err) +{ + struct vcpu_svm *svm = to_svm(vcpu); + if (!sev_es_guest(svm->vcpu.kvm) || !err) + return kvm_complete_insn_gp(&svm->vcpu, err); + + ghcb_set_sw_exit_info_1(svm->ghcb, 1); + ghcb_set_sw_exit_info_2(svm->ghcb, + X86_TRAP_GP | + SVM_EVTINJ_TYPE_EXEPT | + SVM_EVTINJ_VALID); + return 1; +} + static int rdmsr_interception(struct vcpu_svm *svm) { return kvm_emulate_rdmsr(&svm->vcpu); @@ -2628,10 +2773,7 @@ static int svm_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr) break; case MSR_IA32_SPEC_CTRL: if (!msr->host_initiated && - !guest_cpuid_has(vcpu, X86_FEATURE_SPEC_CTRL) && - !guest_cpuid_has(vcpu, X86_FEATURE_AMD_STIBP) && - !guest_cpuid_has(vcpu, X86_FEATURE_AMD_IBRS) && - !guest_cpuid_has(vcpu, X86_FEATURE_AMD_SSBD)) + !guest_has_spec_ctrl_msr(vcpu)) return 1; if (kvm_spec_ctrl_test_value(data)) @@ -2656,12 +2798,12 @@ static int svm_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr) break; case MSR_IA32_PRED_CMD: if (!msr->host_initiated && - !guest_cpuid_has(vcpu, X86_FEATURE_AMD_IBPB)) + !guest_has_pred_cmd_msr(vcpu)) return 1; if (data & ~PRED_CMD_IBPB) return 1; - if (!boot_cpu_has(X86_FEATURE_AMD_IBPB)) + if (!boot_cpu_has(X86_FEATURE_IBPB)) return 1; if (!data) break; @@ -2803,7 +2945,14 @@ static int interrupt_window_interception(struct vcpu_svm *svm) static int pause_interception(struct vcpu_svm *svm) { struct kvm_vcpu *vcpu = &svm->vcpu; - bool in_kernel = (svm_get_cpl(vcpu) == 0); + bool in_kernel; + + /* + * CPL is not made available for an SEV-ES guest, therefore + * vcpu->arch.preempted_in_kernel can never be true. Just + * set in_kernel to false as well. + */ + in_kernel = !sev_es_guest(svm->vcpu.kvm) && svm_get_cpl(vcpu) == 0; if (!kvm_pause_in_guest(vcpu->kvm)) grow_ple_window(vcpu); @@ -2918,11 +3067,16 @@ static int (*const svm_exit_handlers[])(struct vcpu_svm *svm) = { [SVM_EXIT_MWAIT] = mwait_interception, [SVM_EXIT_XSETBV] = xsetbv_interception, [SVM_EXIT_RDPRU] = rdpru_interception, + [SVM_EXIT_EFER_WRITE_TRAP] = efer_trap, + [SVM_EXIT_CR0_WRITE_TRAP] = cr_trap, + [SVM_EXIT_CR4_WRITE_TRAP] = cr_trap, + [SVM_EXIT_CR8_WRITE_TRAP] = cr_trap, [SVM_EXIT_INVPCID] = invpcid_interception, [SVM_EXIT_NPF] = npf_interception, [SVM_EXIT_RSM] = rsm_interception, [SVM_EXIT_AVIC_INCOMPLETE_IPI] = avic_incomplete_ipi_interception, [SVM_EXIT_AVIC_UNACCELERATED_ACCESS] = avic_unaccelerated_access_interception, + [SVM_EXIT_VMGEXIT] = sev_handle_vmgexit, }; static void dump_vmcb(struct kvm_vcpu *vcpu) @@ -2964,6 +3118,7 @@ static void dump_vmcb(struct kvm_vcpu *vcpu) pr_err("%-20s%lld\n", "nested_ctl:", control->nested_ctl); pr_err("%-20s%016llx\n", "nested_cr3:", control->nested_cr3); pr_err("%-20s%016llx\n", "avic_vapic_bar:", control->avic_vapic_bar); + pr_err("%-20s%016llx\n", "ghcb:", control->ghcb_gpa); pr_err("%-20s%08x\n", "event_inj:", control->event_inj); pr_err("%-20s%08x\n", "event_inj_err:", control->event_inj_err); pr_err("%-20s%lld\n", "virt_ext:", control->virt_ext); @@ -2971,6 +3126,7 @@ static void dump_vmcb(struct kvm_vcpu *vcpu) pr_err("%-20s%016llx\n", "avic_backing_page:", control->avic_backing_page); pr_err("%-20s%016llx\n", "avic_logical_id:", control->avic_logical_id); pr_err("%-20s%016llx\n", "avic_physical_id:", control->avic_physical_id); + pr_err("%-20s%016llx\n", "vmsa_pa:", control->vmsa_pa); pr_err("VMCB State Save Area:\n"); pr_err("%-5s s: %04x a: %04x l: %08x b: %016llx\n", "es:", @@ -3043,6 +3199,43 @@ static void dump_vmcb(struct kvm_vcpu *vcpu) "excp_to:", save->last_excp_to); } +static int svm_handle_invalid_exit(struct kvm_vcpu *vcpu, u64 exit_code) +{ + if (exit_code < ARRAY_SIZE(svm_exit_handlers) && + svm_exit_handlers[exit_code]) + return 0; + + vcpu_unimpl(vcpu, "svm: unexpected exit reason 0x%llx\n", exit_code); + dump_vmcb(vcpu); + vcpu->run->exit_reason = KVM_EXIT_INTERNAL_ERROR; + vcpu->run->internal.suberror = KVM_INTERNAL_ERROR_UNEXPECTED_EXIT_REASON; + vcpu->run->internal.ndata = 2; + vcpu->run->internal.data[0] = exit_code; + vcpu->run->internal.data[1] = vcpu->arch.last_vmentry_cpu; + + return -EINVAL; +} + +int svm_invoke_exit_handler(struct vcpu_svm *svm, u64 exit_code) +{ + if (svm_handle_invalid_exit(&svm->vcpu, exit_code)) + return 0; + +#ifdef CONFIG_RETPOLINE + if (exit_code == SVM_EXIT_MSR) + return msr_interception(svm); + else if (exit_code == SVM_EXIT_VINTR) + return interrupt_window_interception(svm); + else if (exit_code == SVM_EXIT_INTR) + return intr_interception(svm); + else if (exit_code == SVM_EXIT_HLT) + return halt_interception(svm); + else if (exit_code == SVM_EXIT_NPF) + return npf_interception(svm); +#endif + return svm_exit_handlers[exit_code](svm); +} + static void svm_get_exit_info(struct kvm_vcpu *vcpu, u64 *info1, u64 *info2, u32 *intr_info, u32 *error_code) { @@ -3066,10 +3259,13 @@ static int handle_exit(struct kvm_vcpu *vcpu, fastpath_t exit_fastpath) trace_kvm_exit(exit_code, vcpu, KVM_ISA_SVM); - if (!svm_is_intercept(svm, INTERCEPT_CR0_WRITE)) - vcpu->arch.cr0 = svm->vmcb->save.cr0; - if (npt_enabled) - vcpu->arch.cr3 = svm->vmcb->save.cr3; + /* SEV-ES guests must use the CR write traps to track CR registers. */ + if (!sev_es_guest(vcpu->kvm)) { + if (!svm_is_intercept(svm, INTERCEPT_CR0_WRITE)) + vcpu->arch.cr0 = svm->vmcb->save.cr0; + if (npt_enabled) + vcpu->arch.cr3 = svm->vmcb->save.cr3; + } if (is_guest_mode(vcpu)) { int vmexit; @@ -3106,32 +3302,7 @@ static int handle_exit(struct kvm_vcpu *vcpu, fastpath_t exit_fastpath) if (exit_fastpath != EXIT_FASTPATH_NONE) return 1; - if (exit_code >= ARRAY_SIZE(svm_exit_handlers) - || !svm_exit_handlers[exit_code]) { - vcpu_unimpl(vcpu, "svm: unexpected exit reason 0x%x\n", exit_code); - dump_vmcb(vcpu); - vcpu->run->exit_reason = KVM_EXIT_INTERNAL_ERROR; - vcpu->run->internal.suberror = - KVM_INTERNAL_ERROR_UNEXPECTED_EXIT_REASON; - vcpu->run->internal.ndata = 2; - vcpu->run->internal.data[0] = exit_code; - vcpu->run->internal.data[1] = vcpu->arch.last_vmentry_cpu; - return 0; - } - -#ifdef CONFIG_RETPOLINE - if (exit_code == SVM_EXIT_MSR) - return msr_interception(svm); - else if (exit_code == SVM_EXIT_VINTR) - return interrupt_window_interception(svm); - else if (exit_code == SVM_EXIT_INTR) - return intr_interception(svm); - else if (exit_code == SVM_EXIT_HLT) - return halt_interception(svm); - else if (exit_code == SVM_EXIT_NPF) - return npf_interception(svm); -#endif - return svm_exit_handlers[exit_code](svm); + return svm_invoke_exit_handler(svm, exit_code); } static void reload_tss(struct kvm_vcpu *vcpu) @@ -3160,7 +3331,8 @@ static void svm_inject_nmi(struct kvm_vcpu *vcpu) svm->vmcb->control.event_inj = SVM_EVTINJ_VALID | SVM_EVTINJ_TYPE_NMI; vcpu->arch.hflags |= HF_NMI_MASK; - svm_set_intercept(svm, INTERCEPT_IRET); + if (!sev_es_guest(svm->vcpu.kvm)) + svm_set_intercept(svm, INTERCEPT_IRET); ++vcpu->stat.nmi_injections; } @@ -3181,6 +3353,13 @@ static void update_cr8_intercept(struct kvm_vcpu *vcpu, int tpr, int irr) { struct vcpu_svm *svm = to_svm(vcpu); + /* + * SEV-ES guests must always keep the CR intercepts cleared. CR + * tracking is done using the CR write traps. + */ + if (sev_es_guest(vcpu->kvm)) + return; + if (nested_svm_virtualize_tpr(vcpu)) return; @@ -3237,10 +3416,12 @@ static void svm_set_nmi_mask(struct kvm_vcpu *vcpu, bool masked) if (masked) { svm->vcpu.arch.hflags |= HF_NMI_MASK; - svm_set_intercept(svm, INTERCEPT_IRET); + if (!sev_es_guest(svm->vcpu.kvm)) + svm_set_intercept(svm, INTERCEPT_IRET); } else { svm->vcpu.arch.hflags &= ~HF_NMI_MASK; - svm_clr_intercept(svm, INTERCEPT_IRET); + if (!sev_es_guest(svm->vcpu.kvm)) + svm_clr_intercept(svm, INTERCEPT_IRET); } } @@ -3252,7 +3433,14 @@ bool svm_interrupt_blocked(struct kvm_vcpu *vcpu) if (!gif_set(svm)) return true; - if (is_guest_mode(vcpu)) { + if (sev_es_guest(svm->vcpu.kvm)) { + /* + * SEV-ES guests to not expose RFLAGS. Use the VMCB interrupt mask + * bit to determine the state of the IF flag. + */ + if (!(vmcb->control.int_state & SVM_GUEST_INTERRUPT_MASK)) + return true; + } else if (is_guest_mode(vcpu)) { /* As long as interrupts are being delivered... */ if ((svm->nested.ctl.int_ctl & V_INTR_MASKING_MASK) ? !(svm->nested.hsave->save.rflags & X86_EFLAGS_IF) @@ -3411,8 +3599,9 @@ static void svm_complete_interrupts(struct vcpu_svm *svm) * If we've made progress since setting HF_IRET_MASK, we've * executed an IRET and can allow NMI injection. */ - if ((svm->vcpu.arch.hflags & HF_IRET_MASK) - && kvm_rip_read(&svm->vcpu) != svm->nmi_iret_rip) { + if ((svm->vcpu.arch.hflags & HF_IRET_MASK) && + (sev_es_guest(svm->vcpu.kvm) || + kvm_rip_read(&svm->vcpu) != svm->nmi_iret_rip)) { svm->vcpu.arch.hflags &= ~(HF_NMI_MASK | HF_IRET_MASK); kvm_make_request(KVM_REQ_EVENT, &svm->vcpu); } @@ -3435,6 +3624,12 @@ static void svm_complete_interrupts(struct vcpu_svm *svm) break; case SVM_EXITINTINFO_TYPE_EXEPT: /* + * Never re-inject a #VC exception. + */ + if (vector == X86_TRAP_VC) + break; + + /* * In case of software exceptions, do not reinject the vector, * but re-execute the instruction instead. Rewind RIP first * if we emulated INT3 before. @@ -3507,16 +3702,20 @@ static noinstr void svm_vcpu_enter_exit(struct kvm_vcpu *vcpu, guest_enter_irqoff(); lockdep_hardirqs_on(CALLER_ADDR0); - __svm_vcpu_run(svm->vmcb_pa, (unsigned long *)&svm->vcpu.arch.regs); + if (sev_es_guest(svm->vcpu.kvm)) { + __svm_sev_es_vcpu_run(svm->vmcb_pa); + } else { + __svm_vcpu_run(svm->vmcb_pa, (unsigned long *)&svm->vcpu.arch.regs); #ifdef CONFIG_X86_64 - native_wrmsrl(MSR_GS_BASE, svm->host.gs_base); + native_wrmsrl(MSR_GS_BASE, svm->host.gs_base); #else - loadsegment(fs, svm->host.fs); + loadsegment(fs, svm->host.fs); #ifndef CONFIG_X86_32_LAZY_GS - loadsegment(gs, svm->host.gs); + loadsegment(gs, svm->host.gs); #endif #endif + } /* * VMEXIT disables interrupts (host state), but tracing and lockdep @@ -3566,6 +3765,10 @@ static __no_kcsan fastpath_t svm_vcpu_run(struct kvm_vcpu *vcpu) sync_lapic_to_cr8(vcpu); + if (unlikely(svm->asid != svm->vmcb->control.asid)) { + svm->vmcb->control.asid = svm->asid; + vmcb_mark_dirty(svm->vmcb, VMCB_ASID); + } svm->vmcb->save.cr2 = vcpu->arch.cr2; /* @@ -3610,14 +3813,17 @@ static __no_kcsan fastpath_t svm_vcpu_run(struct kvm_vcpu *vcpu) if (unlikely(!msr_write_intercepted(vcpu, MSR_IA32_SPEC_CTRL))) svm->spec_ctrl = native_read_msr(MSR_IA32_SPEC_CTRL); - reload_tss(vcpu); + if (!sev_es_guest(svm->vcpu.kvm)) + reload_tss(vcpu); x86_spec_ctrl_restore_host(svm->spec_ctrl, svm->virt_spec_ctrl); - vcpu->arch.cr2 = svm->vmcb->save.cr2; - vcpu->arch.regs[VCPU_REGS_RAX] = svm->vmcb->save.rax; - vcpu->arch.regs[VCPU_REGS_RSP] = svm->vmcb->save.rsp; - vcpu->arch.regs[VCPU_REGS_RIP] = svm->vmcb->save.rip; + if (!sev_es_guest(svm->vcpu.kvm)) { + vcpu->arch.cr2 = svm->vmcb->save.cr2; + vcpu->arch.regs[VCPU_REGS_RAX] = svm->vmcb->save.rax; + vcpu->arch.regs[VCPU_REGS_RSP] = svm->vmcb->save.rsp; + vcpu->arch.regs[VCPU_REGS_RIP] = svm->vmcb->save.rip; + } if (unlikely(svm->vmcb->control.exit_code == SVM_EXIT_NMI)) kvm_before_interrupt(&svm->vcpu); @@ -3720,12 +3926,21 @@ static bool svm_cpu_has_accelerated_tpr(void) return false; } -static bool svm_has_emulated_msr(u32 index) +/* + * The kvm parameter can be NULL (module initialization, or invocation before + * VM creation). Be sure to check the kvm parameter before using it. + */ +static bool svm_has_emulated_msr(struct kvm *kvm, u32 index) { switch (index) { case MSR_IA32_MCG_EXT_CTL: case MSR_IA32_VMX_BASIC ... MSR_IA32_VMX_VMFUNC: return false; + case MSR_IA32_SMBASE: + /* SEV-ES guests do not support SMM, so report false */ + if (kvm && sev_es_guest(kvm)) + return false; + break; default: break; } @@ -3741,6 +3956,7 @@ static u64 svm_get_mt_mask(struct kvm_vcpu *vcpu, gfn_t gfn, bool is_mmio) static void svm_vcpu_after_set_cpuid(struct kvm_vcpu *vcpu) { struct vcpu_svm *svm = to_svm(vcpu); + struct kvm_cpuid_entry2 *best; vcpu->arch.xsaves_enabled = guest_cpuid_has(vcpu, X86_FEATURE_XSAVE) && boot_cpu_has(X86_FEATURE_XSAVE) && @@ -3753,6 +3969,13 @@ static void svm_vcpu_after_set_cpuid(struct kvm_vcpu *vcpu) /* Check again if INVPCID interception if required */ svm_check_invpcid(svm); + /* For sev guests, the memory encryption bit is not reserved in CR3. */ + if (sev_guest(vcpu->kvm)) { + best = kvm_find_cpuid_entry(vcpu, 0x8000001F, 0); + if (best) + vcpu->arch.cr3_lm_rsvd_bits &= ~(1UL << (best->ebx & 0x3f)); + } + if (!kvm_vcpu_apicv_active(vcpu)) return; @@ -4076,6 +4299,12 @@ static bool svm_can_emulate_instruction(struct kvm_vcpu *vcpu, void *insn, int i unsigned long cr4; /* + * When the guest is an SEV-ES guest, emulation is not possible. + */ + if (sev_es_guest(vcpu->kvm)) + return false; + + /* * Detect and workaround Errata 1096 Fam_17h_00_0Fh. * * Errata: @@ -4207,6 +4436,7 @@ static struct kvm_x86_ops svm_x86_ops __initdata = { .get_cpl = svm_get_cpl, .get_cs_db_l_bits = kvm_get_cs_db_l_bits, .set_cr0 = svm_set_cr0, + .is_valid_cr4 = svm_is_valid_cr4, .set_cr4 = svm_set_cr4, .set_efer = svm_set_efer, .get_idt = svm_get_idt, @@ -4295,6 +4525,7 @@ static struct kvm_x86_ops svm_x86_ops __initdata = { .apic_init_signal_blocked = svm_apic_init_signal_blocked, .msr_filter_changed = svm_msr_filter_changed, + .complete_emulated_msr = svm_complete_emulated_msr, }; static struct kvm_x86_init_ops svm_init_ops __initdata = { diff --git a/arch/x86/kvm/svm/svm.h b/arch/x86/kvm/svm/svm.h index 1d853fe4c778..5431e6335e2e 100644 --- a/arch/x86/kvm/svm/svm.h +++ b/arch/x86/kvm/svm/svm.h @@ -17,21 +17,32 @@ #include <linux/kvm_types.h> #include <linux/kvm_host.h> +#include <linux/bits.h> #include <asm/svm.h> -static const u32 host_save_user_msrs[] = { +#define __sme_page_pa(x) __sme_set(page_to_pfn(x) << PAGE_SHIFT) + +static const struct svm_host_save_msrs { + u32 index; /* Index of the MSR */ + bool sev_es_restored; /* True if MSR is restored on SEV-ES VMEXIT */ +} host_save_user_msrs[] = { #ifdef CONFIG_X86_64 - MSR_STAR, MSR_LSTAR, MSR_CSTAR, MSR_SYSCALL_MASK, MSR_KERNEL_GS_BASE, - MSR_FS_BASE, + { .index = MSR_STAR, .sev_es_restored = true }, + { .index = MSR_LSTAR, .sev_es_restored = true }, + { .index = MSR_CSTAR, .sev_es_restored = true }, + { .index = MSR_SYSCALL_MASK, .sev_es_restored = true }, + { .index = MSR_KERNEL_GS_BASE, .sev_es_restored = true }, + { .index = MSR_FS_BASE, .sev_es_restored = true }, #endif - MSR_IA32_SYSENTER_CS, MSR_IA32_SYSENTER_ESP, MSR_IA32_SYSENTER_EIP, - MSR_TSC_AUX, + { .index = MSR_IA32_SYSENTER_CS, .sev_es_restored = true }, + { .index = MSR_IA32_SYSENTER_ESP, .sev_es_restored = true }, + { .index = MSR_IA32_SYSENTER_EIP, .sev_es_restored = true }, + { .index = MSR_TSC_AUX, .sev_es_restored = false }, }; - #define NR_HOST_SAVE_USER_MSRS ARRAY_SIZE(host_save_user_msrs) -#define MAX_DIRECT_ACCESS_MSRS 15 +#define MAX_DIRECT_ACCESS_MSRS 18 #define MSRPM_OFFSETS 16 extern u32 msrpm_offsets[MSRPM_OFFSETS] __read_mostly; extern bool npt_enabled; @@ -61,11 +72,13 @@ enum { struct kvm_sev_info { bool active; /* SEV enabled guest */ + bool es_active; /* SEV-ES enabled guest */ unsigned int asid; /* ASID used for this guest */ unsigned int handle; /* SEV firmware handle */ int fd; /* SEV device fd */ unsigned long pages_locked; /* Number of pages locked */ struct list_head regions_list; /* List of registered regions */ + u64 ap_jump_table; /* SEV-ES AP Jump Table address */ }; struct kvm_svm { @@ -106,6 +119,7 @@ struct vcpu_svm { struct vmcb *vmcb; unsigned long vmcb_pa; struct svm_cpu_data *svm_data; + u32 asid; uint64_t asid_generation; uint64_t sysenter_esp; uint64_t sysenter_eip; @@ -166,6 +180,17 @@ struct vcpu_svm { DECLARE_BITMAP(read, MAX_DIRECT_ACCESS_MSRS); DECLARE_BITMAP(write, MAX_DIRECT_ACCESS_MSRS); } shadow_msr_intercept; + + /* SEV-ES support */ + struct vmcb_save_area *vmsa; + struct ghcb *ghcb; + struct kvm_host_map ghcb_map; + + /* SEV-ES scratch area support */ + void *ghcb_sa; + u64 ghcb_sa_len; + bool ghcb_sa_sync; + bool ghcb_sa_free; }; struct svm_cpu_data { @@ -193,6 +218,28 @@ static inline struct kvm_svm *to_kvm_svm(struct kvm *kvm) return container_of(kvm, struct kvm_svm, kvm); } +static inline bool sev_guest(struct kvm *kvm) +{ +#ifdef CONFIG_KVM_AMD_SEV + struct kvm_sev_info *sev = &to_kvm_svm(kvm)->sev_info; + + return sev->active; +#else + return false; +#endif +} + +static inline bool sev_es_guest(struct kvm *kvm) +{ +#ifdef CONFIG_KVM_AMD_SEV + struct kvm_sev_info *sev = &to_kvm_svm(kvm)->sev_info; + + return sev_guest(kvm) && sev->es_active; +#else + return false; +#endif +} + static inline void vmcb_mark_all_dirty(struct vmcb *vmcb) { vmcb->control.clean = 0; @@ -244,21 +291,24 @@ static inline void set_dr_intercepts(struct vcpu_svm *svm) { struct vmcb *vmcb = get_host_vmcb(svm); - vmcb_set_intercept(&vmcb->control, INTERCEPT_DR0_READ); - vmcb_set_intercept(&vmcb->control, INTERCEPT_DR1_READ); - vmcb_set_intercept(&vmcb->control, INTERCEPT_DR2_READ); - vmcb_set_intercept(&vmcb->control, INTERCEPT_DR3_READ); - vmcb_set_intercept(&vmcb->control, INTERCEPT_DR4_READ); - vmcb_set_intercept(&vmcb->control, INTERCEPT_DR5_READ); - vmcb_set_intercept(&vmcb->control, INTERCEPT_DR6_READ); + if (!sev_es_guest(svm->vcpu.kvm)) { + vmcb_set_intercept(&vmcb->control, INTERCEPT_DR0_READ); + vmcb_set_intercept(&vmcb->control, INTERCEPT_DR1_READ); + vmcb_set_intercept(&vmcb->control, INTERCEPT_DR2_READ); + vmcb_set_intercept(&vmcb->control, INTERCEPT_DR3_READ); + vmcb_set_intercept(&vmcb->control, INTERCEPT_DR4_READ); + vmcb_set_intercept(&vmcb->control, INTERCEPT_DR5_READ); + vmcb_set_intercept(&vmcb->control, INTERCEPT_DR6_READ); + vmcb_set_intercept(&vmcb->control, INTERCEPT_DR0_WRITE); + vmcb_set_intercept(&vmcb->control, INTERCEPT_DR1_WRITE); + vmcb_set_intercept(&vmcb->control, INTERCEPT_DR2_WRITE); + vmcb_set_intercept(&vmcb->control, INTERCEPT_DR3_WRITE); + vmcb_set_intercept(&vmcb->control, INTERCEPT_DR4_WRITE); + vmcb_set_intercept(&vmcb->control, INTERCEPT_DR5_WRITE); + vmcb_set_intercept(&vmcb->control, INTERCEPT_DR6_WRITE); + } + vmcb_set_intercept(&vmcb->control, INTERCEPT_DR7_READ); - vmcb_set_intercept(&vmcb->control, INTERCEPT_DR0_WRITE); - vmcb_set_intercept(&vmcb->control, INTERCEPT_DR1_WRITE); - vmcb_set_intercept(&vmcb->control, INTERCEPT_DR2_WRITE); - vmcb_set_intercept(&vmcb->control, INTERCEPT_DR3_WRITE); - vmcb_set_intercept(&vmcb->control, INTERCEPT_DR4_WRITE); - vmcb_set_intercept(&vmcb->control, INTERCEPT_DR5_WRITE); - vmcb_set_intercept(&vmcb->control, INTERCEPT_DR6_WRITE); vmcb_set_intercept(&vmcb->control, INTERCEPT_DR7_WRITE); recalc_intercepts(svm); @@ -270,6 +320,12 @@ static inline void clr_dr_intercepts(struct vcpu_svm *svm) vmcb->control.intercepts[INTERCEPT_DR] = 0; + /* DR7 access must remain intercepted for an SEV-ES guest */ + if (sev_es_guest(svm->vcpu.kvm)) { + vmcb_set_intercept(&vmcb->control, INTERCEPT_DR7_READ); + vmcb_set_intercept(&vmcb->control, INTERCEPT_DR7_WRITE); + } + recalc_intercepts(svm); } @@ -351,6 +407,10 @@ static inline bool gif_set(struct vcpu_svm *svm) #define MSR_CR3_LONG_MBZ_MASK 0xfff0000000000000U #define MSR_INVALID 0xffffffffU +extern int sev; +extern int sev_es; +extern bool dump_invalid_vmcb; + u32 svm_msrpm_offset(u32 msr); u32 *svm_vcpu_alloc_msrpm(void); void svm_vcpu_init_msrpm(struct kvm_vcpu *vcpu, u32 *msrpm); @@ -358,13 +418,16 @@ void svm_vcpu_free_msrpm(u32 *msrpm); int svm_set_efer(struct kvm_vcpu *vcpu, u64 efer); void svm_set_cr0(struct kvm_vcpu *vcpu, unsigned long cr0); -int svm_set_cr4(struct kvm_vcpu *vcpu, unsigned long cr4); +void svm_set_cr4(struct kvm_vcpu *vcpu, unsigned long cr4); void svm_flush_tlb(struct kvm_vcpu *vcpu); void disable_nmi_singlestep(struct vcpu_svm *svm); bool svm_smi_blocked(struct kvm_vcpu *vcpu); bool svm_nmi_blocked(struct kvm_vcpu *vcpu); bool svm_interrupt_blocked(struct kvm_vcpu *vcpu); void svm_set_gif(struct vcpu_svm *svm, bool value); +int svm_invoke_exit_handler(struct vcpu_svm *svm, u64 exit_code); +void set_msr_interception(struct kvm_vcpu *vcpu, u32 *msrpm, u32 msr, + int read, int write); /* nested.c */ @@ -470,18 +533,42 @@ void svm_vcpu_unblocking(struct kvm_vcpu *vcpu); /* sev.c */ -extern unsigned int max_sev_asid; +#define GHCB_VERSION_MAX 1ULL +#define GHCB_VERSION_MIN 1ULL + +#define GHCB_MSR_INFO_POS 0 +#define GHCB_MSR_INFO_MASK (BIT_ULL(12) - 1) + +#define GHCB_MSR_SEV_INFO_RESP 0x001 +#define GHCB_MSR_SEV_INFO_REQ 0x002 +#define GHCB_MSR_VER_MAX_POS 48 +#define GHCB_MSR_VER_MAX_MASK 0xffff +#define GHCB_MSR_VER_MIN_POS 32 +#define GHCB_MSR_VER_MIN_MASK 0xffff +#define GHCB_MSR_CBIT_POS 24 +#define GHCB_MSR_CBIT_MASK 0xff +#define GHCB_MSR_SEV_INFO(_max, _min, _cbit) \ + ((((_max) & GHCB_MSR_VER_MAX_MASK) << GHCB_MSR_VER_MAX_POS) | \ + (((_min) & GHCB_MSR_VER_MIN_MASK) << GHCB_MSR_VER_MIN_POS) | \ + (((_cbit) & GHCB_MSR_CBIT_MASK) << GHCB_MSR_CBIT_POS) | \ + GHCB_MSR_SEV_INFO_RESP) + +#define GHCB_MSR_CPUID_REQ 0x004 +#define GHCB_MSR_CPUID_RESP 0x005 +#define GHCB_MSR_CPUID_FUNC_POS 32 +#define GHCB_MSR_CPUID_FUNC_MASK 0xffffffff +#define GHCB_MSR_CPUID_VALUE_POS 32 +#define GHCB_MSR_CPUID_VALUE_MASK 0xffffffff +#define GHCB_MSR_CPUID_REG_POS 30 +#define GHCB_MSR_CPUID_REG_MASK 0x3 + +#define GHCB_MSR_TERM_REQ 0x100 +#define GHCB_MSR_TERM_REASON_SET_POS 12 +#define GHCB_MSR_TERM_REASON_SET_MASK 0xf +#define GHCB_MSR_TERM_REASON_POS 16 +#define GHCB_MSR_TERM_REASON_MASK 0xff -static inline bool sev_guest(struct kvm *kvm) -{ -#ifdef CONFIG_KVM_AMD_SEV - struct kvm_sev_info *sev = &to_kvm_svm(kvm)->sev_info; - - return sev->active; -#else - return false; -#endif -} +extern unsigned int max_sev_asid; static inline bool svm_sev_enabled(void) { @@ -495,7 +582,19 @@ int svm_register_enc_region(struct kvm *kvm, int svm_unregister_enc_region(struct kvm *kvm, struct kvm_enc_region *range); void pre_sev_run(struct vcpu_svm *svm, int cpu); -int __init sev_hardware_setup(void); +void __init sev_hardware_setup(void); void sev_hardware_teardown(void); +void sev_free_vcpu(struct kvm_vcpu *vcpu); +int sev_handle_vmgexit(struct vcpu_svm *svm); +int sev_es_string_io(struct vcpu_svm *svm, int size, unsigned int port, int in); +void sev_es_init_vmcb(struct vcpu_svm *svm); +void sev_es_create_vcpu(struct vcpu_svm *svm); +void sev_es_vcpu_load(struct vcpu_svm *svm, int cpu); +void sev_es_vcpu_put(struct vcpu_svm *svm); + +/* vmenter.S */ + +void __svm_sev_es_vcpu_run(unsigned long vmcb_pa); +void __svm_vcpu_run(unsigned long vmcb_pa, unsigned long *regs); #endif diff --git a/arch/x86/kvm/svm/vmenter.S b/arch/x86/kvm/svm/vmenter.S index 1ec1ac40e328..6feb8c08f45a 100644 --- a/arch/x86/kvm/svm/vmenter.S +++ b/arch/x86/kvm/svm/vmenter.S @@ -168,3 +168,53 @@ SYM_FUNC_START(__svm_vcpu_run) pop %_ASM_BP ret SYM_FUNC_END(__svm_vcpu_run) + +/** + * __svm_sev_es_vcpu_run - Run a SEV-ES vCPU via a transition to SVM guest mode + * @vmcb_pa: unsigned long + */ +SYM_FUNC_START(__svm_sev_es_vcpu_run) + push %_ASM_BP +#ifdef CONFIG_X86_64 + push %r15 + push %r14 + push %r13 + push %r12 +#else + push %edi + push %esi +#endif + push %_ASM_BX + + /* Enter guest mode */ + mov %_ASM_ARG1, %_ASM_AX + sti + +1: vmrun %_ASM_AX + jmp 3f +2: cmpb $0, kvm_rebooting + jne 3f + ud2 + _ASM_EXTABLE(1b, 2b) + +3: cli + +#ifdef CONFIG_RETPOLINE + /* IMPORTANT: Stuff the RSB immediately after VM-Exit, before RET! */ + FILL_RETURN_BUFFER %_ASM_AX, RSB_CLEAR_LOOPS, X86_FEATURE_RETPOLINE +#endif + + pop %_ASM_BX + +#ifdef CONFIG_X86_64 + pop %r12 + pop %r13 + pop %r14 + pop %r15 +#else + pop %esi + pop %edi +#endif + pop %_ASM_BP + ret +SYM_FUNC_END(__svm_sev_es_vcpu_run) |