aboutsummaryrefslogtreecommitdiffstats
path: root/arch/x86/kvm/svm
diff options
context:
space:
mode:
authorMark Brown <broonie@kernel.org>2020-12-28 14:20:00 +0000
committerMark Brown <broonie@kernel.org>2020-12-28 14:20:00 +0000
commit2ae6f64ce1ce304b502461fdfe0b96c8171ae2cc (patch)
tree88e987c447daf2c29e2d4c15e58d1029b0cc78c2 /arch/x86/kvm/svm
parentregulator: bd718x7: Add enable times (diff)
parentLinux 5.11-rc1 (diff)
downloadlinux-dev-2ae6f64ce1ce304b502461fdfe0b96c8171ae2cc.tar.xz
linux-dev-2ae6f64ce1ce304b502461fdfe0b96c8171ae2cc.zip
Merge tag 'v5.11-rc1' into regulator-5.11
Linux 5.11-rc1
Diffstat (limited to 'arch/x86/kvm/svm')
-rw-r--r--arch/x86/kvm/svm/avic.c9
-rw-r--r--arch/x86/kvm/svm/nested.c11
-rw-r--r--arch/x86/kvm/svm/sev.c917
-rw-r--r--arch/x86/kvm/svm/svm.c485
-rw-r--r--arch/x86/kvm/svm/svm.h167
-rw-r--r--arch/x86/kvm/svm/vmenter.S50
6 files changed, 1430 insertions, 209 deletions
diff --git a/arch/x86/kvm/svm/avic.c b/arch/x86/kvm/svm/avic.c
index 8c550999ace0..0ef84d57b72e 100644
--- a/arch/x86/kvm/svm/avic.c
+++ b/arch/x86/kvm/svm/avic.c
@@ -233,7 +233,8 @@ static u64 *avic_get_physical_id_entry(struct kvm_vcpu *vcpu,
*/
static int avic_update_access_page(struct kvm *kvm, bool activate)
{
- int ret = 0;
+ void __user *ret;
+ int r = 0;
mutex_lock(&kvm->slots_lock);
/*
@@ -249,13 +250,15 @@ static int avic_update_access_page(struct kvm *kvm, bool activate)
APIC_ACCESS_PAGE_PRIVATE_MEMSLOT,
APIC_DEFAULT_PHYS_BASE,
activate ? PAGE_SIZE : 0);
- if (ret)
+ if (IS_ERR(ret)) {
+ r = PTR_ERR(ret);
goto out;
+ }
kvm->arch.apic_access_page_done = activate;
out:
mutex_unlock(&kvm->slots_lock);
- return ret;
+ return r;
}
static int avic_init_backing_page(struct kvm_vcpu *vcpu)
diff --git a/arch/x86/kvm/svm/nested.c b/arch/x86/kvm/svm/nested.c
index 9e4c226dbf7d..b0b667456b2e 100644
--- a/arch/x86/kvm/svm/nested.c
+++ b/arch/x86/kvm/svm/nested.c
@@ -254,7 +254,7 @@ static bool nested_vmcb_checks(struct vcpu_svm *svm, struct vmcb *vmcb12)
(vmcb12->save.cr3 & MSR_CR3_LONG_MBZ_MASK))
return false;
}
- if (kvm_valid_cr4(&svm->vcpu, vmcb12->save.cr4))
+ if (!kvm_is_valid_cr4(&svm->vcpu, vmcb12->save.cr4))
return false;
return nested_vmcb_check_controls(&vmcb12->control);
@@ -381,7 +381,7 @@ static void nested_prepare_vmcb_save(struct vcpu_svm *svm, struct vmcb *vmcb12)
svm->vmcb->save.ds = vmcb12->save.ds;
svm->vmcb->save.gdtr = vmcb12->save.gdtr;
svm->vmcb->save.idtr = vmcb12->save.idtr;
- kvm_set_rflags(&svm->vcpu, vmcb12->save.rflags);
+ kvm_set_rflags(&svm->vcpu, vmcb12->save.rflags | X86_EFLAGS_FIXED);
svm_set_efer(&svm->vcpu, vmcb12->save.efer);
svm_set_cr0(&svm->vcpu, vmcb12->save.cr0);
svm_set_cr4(&svm->vcpu, vmcb12->save.cr4);
@@ -394,8 +394,8 @@ static void nested_prepare_vmcb_save(struct vcpu_svm *svm, struct vmcb *vmcb12)
svm->vmcb->save.rax = vmcb12->save.rax;
svm->vmcb->save.rsp = vmcb12->save.rsp;
svm->vmcb->save.rip = vmcb12->save.rip;
- svm->vmcb->save.dr7 = vmcb12->save.dr7;
- svm->vcpu.arch.dr6 = vmcb12->save.dr6;
+ svm->vmcb->save.dr7 = vmcb12->save.dr7 | DR7_FIXED_1;
+ svm->vcpu.arch.dr6 = vmcb12->save.dr6 | DR6_FIXED_1 | DR6_RTM;
svm->vmcb->save.cpl = vmcb12->save.cpl;
}
@@ -660,13 +660,14 @@ int nested_svm_vmexit(struct vcpu_svm *svm)
svm->vmcb->save.gdtr = hsave->save.gdtr;
svm->vmcb->save.idtr = hsave->save.idtr;
kvm_set_rflags(&svm->vcpu, hsave->save.rflags);
+ kvm_set_rflags(&svm->vcpu, hsave->save.rflags | X86_EFLAGS_FIXED);
svm_set_efer(&svm->vcpu, hsave->save.efer);
svm_set_cr0(&svm->vcpu, hsave->save.cr0 | X86_CR0_PE);
svm_set_cr4(&svm->vcpu, hsave->save.cr4);
kvm_rax_write(&svm->vcpu, hsave->save.rax);
kvm_rsp_write(&svm->vcpu, hsave->save.rsp);
kvm_rip_write(&svm->vcpu, hsave->save.rip);
- svm->vmcb->save.dr7 = 0;
+ svm->vmcb->save.dr7 = DR7_FIXED_1;
svm->vmcb->save.cpl = 0;
svm->vmcb->control.exit_int_info = 0;
diff --git a/arch/x86/kvm/svm/sev.c b/arch/x86/kvm/svm/sev.c
index c0b14106258a..9858d5ae9ddd 100644
--- a/arch/x86/kvm/svm/sev.c
+++ b/arch/x86/kvm/svm/sev.c
@@ -14,10 +14,20 @@
#include <linux/psp-sev.h>
#include <linux/pagemap.h>
#include <linux/swap.h>
+#include <linux/processor.h>
+#include <linux/trace_events.h>
+#include <asm/fpu/internal.h>
+
+#include <asm/trapnr.h>
#include "x86.h"
#include "svm.h"
+#include "cpuid.h"
+#include "trace.h"
+
+#define __ex(x) __kvm_handle_fault_on_reboot(x)
+static u8 sev_enc_bit;
static int sev_flush_asids(void);
static DECLARE_RWSEM(sev_deactivate_lock);
static DEFINE_MUTEX(sev_bitmap_lock);
@@ -25,7 +35,6 @@ unsigned int max_sev_asid;
static unsigned int min_sev_asid;
static unsigned long *sev_asid_bitmap;
static unsigned long *sev_reclaim_asid_bitmap;
-#define __sme_page_pa(x) __sme_set(page_to_pfn(x) << PAGE_SHIFT)
struct enc_region {
struct list_head list;
@@ -57,19 +66,19 @@ static int sev_flush_asids(void)
}
/* Must be called with the sev_bitmap_lock held */
-static bool __sev_recycle_asids(void)
+static bool __sev_recycle_asids(int min_asid, int max_asid)
{
int pos;
/* Check if there are any ASIDs to reclaim before performing a flush */
- pos = find_next_bit(sev_reclaim_asid_bitmap,
- max_sev_asid, min_sev_asid - 1);
- if (pos >= max_sev_asid)
+ pos = find_next_bit(sev_reclaim_asid_bitmap, max_sev_asid, min_asid);
+ if (pos >= max_asid)
return false;
if (sev_flush_asids())
return false;
+ /* The flush process will flush all reclaimable SEV and SEV-ES ASIDs */
bitmap_xor(sev_asid_bitmap, sev_asid_bitmap, sev_reclaim_asid_bitmap,
max_sev_asid);
bitmap_zero(sev_reclaim_asid_bitmap, max_sev_asid);
@@ -77,20 +86,23 @@ static bool __sev_recycle_asids(void)
return true;
}
-static int sev_asid_new(void)
+static int sev_asid_new(struct kvm_sev_info *sev)
{
+ int pos, min_asid, max_asid;
bool retry = true;
- int pos;
mutex_lock(&sev_bitmap_lock);
/*
- * SEV-enabled guest must use asid from min_sev_asid to max_sev_asid.
+ * SEV-enabled guests must use asid from min_sev_asid to max_sev_asid.
+ * SEV-ES-enabled guest can use from 1 to min_sev_asid - 1.
*/
+ min_asid = sev->es_active ? 0 : min_sev_asid - 1;
+ max_asid = sev->es_active ? min_sev_asid - 1 : max_sev_asid;
again:
- pos = find_next_zero_bit(sev_asid_bitmap, max_sev_asid, min_sev_asid - 1);
- if (pos >= max_sev_asid) {
- if (retry && __sev_recycle_asids()) {
+ pos = find_next_zero_bit(sev_asid_bitmap, max_sev_asid, min_asid);
+ if (pos >= max_asid) {
+ if (retry && __sev_recycle_asids(min_asid, max_asid)) {
retry = false;
goto again;
}
@@ -172,7 +184,7 @@ static int sev_guest_init(struct kvm *kvm, struct kvm_sev_cmd *argp)
if (unlikely(sev->active))
return ret;
- asid = sev_asid_new();
+ asid = sev_asid_new(sev);
if (asid < 0)
return ret;
@@ -191,6 +203,16 @@ e_free:
return ret;
}
+static int sev_es_guest_init(struct kvm *kvm, struct kvm_sev_cmd *argp)
+{
+ if (!sev_es)
+ return -ENOTTY;
+
+ to_kvm_svm(kvm)->sev_info.es_active = true;
+
+ return sev_guest_init(kvm, argp);
+}
+
static int sev_bind_asid(struct kvm *kvm, unsigned int handle, int *error)
{
struct sev_data_activate *data;
@@ -490,6 +512,96 @@ e_free:
return ret;
}
+static int sev_es_sync_vmsa(struct vcpu_svm *svm)
+{
+ struct vmcb_save_area *save = &svm->vmcb->save;
+
+ /* Check some debug related fields before encrypting the VMSA */
+ if (svm->vcpu.guest_debug || (save->dr7 & ~DR7_FIXED_1))
+ return -EINVAL;
+
+ /* Sync registgers */
+ save->rax = svm->vcpu.arch.regs[VCPU_REGS_RAX];
+ save->rbx = svm->vcpu.arch.regs[VCPU_REGS_RBX];
+ save->rcx = svm->vcpu.arch.regs[VCPU_REGS_RCX];
+ save->rdx = svm->vcpu.arch.regs[VCPU_REGS_RDX];
+ save->rsp = svm->vcpu.arch.regs[VCPU_REGS_RSP];
+ save->rbp = svm->vcpu.arch.regs[VCPU_REGS_RBP];
+ save->rsi = svm->vcpu.arch.regs[VCPU_REGS_RSI];
+ save->rdi = svm->vcpu.arch.regs[VCPU_REGS_RDI];
+#ifdef CONFIG_X86_64
+ save->r8 = svm->vcpu.arch.regs[VCPU_REGS_R8];
+ save->r9 = svm->vcpu.arch.regs[VCPU_REGS_R9];
+ save->r10 = svm->vcpu.arch.regs[VCPU_REGS_R10];
+ save->r11 = svm->vcpu.arch.regs[VCPU_REGS_R11];
+ save->r12 = svm->vcpu.arch.regs[VCPU_REGS_R12];
+ save->r13 = svm->vcpu.arch.regs[VCPU_REGS_R13];
+ save->r14 = svm->vcpu.arch.regs[VCPU_REGS_R14];
+ save->r15 = svm->vcpu.arch.regs[VCPU_REGS_R15];
+#endif
+ save->rip = svm->vcpu.arch.regs[VCPU_REGS_RIP];
+
+ /* Sync some non-GPR registers before encrypting */
+ save->xcr0 = svm->vcpu.arch.xcr0;
+ save->pkru = svm->vcpu.arch.pkru;
+ save->xss = svm->vcpu.arch.ia32_xss;
+
+ /*
+ * SEV-ES will use a VMSA that is pointed to by the VMCB, not
+ * the traditional VMSA that is part of the VMCB. Copy the
+ * traditional VMSA as it has been built so far (in prep
+ * for LAUNCH_UPDATE_VMSA) to be the initial SEV-ES state.
+ */
+ memcpy(svm->vmsa, save, sizeof(*save));
+
+ return 0;
+}
+
+static int sev_launch_update_vmsa(struct kvm *kvm, struct kvm_sev_cmd *argp)
+{
+ struct kvm_sev_info *sev = &to_kvm_svm(kvm)->sev_info;
+ struct sev_data_launch_update_vmsa *vmsa;
+ int i, ret;
+
+ if (!sev_es_guest(kvm))
+ return -ENOTTY;
+
+ vmsa = kzalloc(sizeof(*vmsa), GFP_KERNEL);
+ if (!vmsa)
+ return -ENOMEM;
+
+ for (i = 0; i < kvm->created_vcpus; i++) {
+ struct vcpu_svm *svm = to_svm(kvm->vcpus[i]);
+
+ /* Perform some pre-encryption checks against the VMSA */
+ ret = sev_es_sync_vmsa(svm);
+ if (ret)
+ goto e_free;
+
+ /*
+ * The LAUNCH_UPDATE_VMSA command will perform in-place
+ * encryption of the VMSA memory content (i.e it will write
+ * the same memory region with the guest's key), so invalidate
+ * it first.
+ */
+ clflush_cache_range(svm->vmsa, PAGE_SIZE);
+
+ vmsa->handle = sev->handle;
+ vmsa->address = __sme_pa(svm->vmsa);
+ vmsa->len = PAGE_SIZE;
+ ret = sev_issue_cmd(kvm, SEV_CMD_LAUNCH_UPDATE_VMSA, vmsa,
+ &argp->error);
+ if (ret)
+ goto e_free;
+
+ svm->vcpu.arch.guest_state_protected = true;
+ }
+
+e_free:
+ kfree(vmsa);
+ return ret;
+}
+
static int sev_launch_measure(struct kvm *kvm, struct kvm_sev_cmd *argp)
{
void __user *measure = (void __user *)(uintptr_t)argp->data;
@@ -642,8 +754,8 @@ static int __sev_dbg_decrypt(struct kvm *kvm, unsigned long src_paddr,
* Its safe to read more than we are asked, caller should ensure that
* destination has enough space.
*/
- src_paddr = round_down(src_paddr, 16);
offset = src_paddr & 15;
+ src_paddr = round_down(src_paddr, 16);
sz = round_up(sz + offset, 16);
return __sev_issue_dbg_cmd(kvm, src_paddr, dst_paddr, sz, err, false);
@@ -932,7 +1044,7 @@ int svm_mem_enc_op(struct kvm *kvm, void __user *argp)
struct kvm_sev_cmd sev_cmd;
int r;
- if (!svm_sev_enabled())
+ if (!svm_sev_enabled() || !sev)
return -ENOTTY;
if (!argp)
@@ -947,12 +1059,18 @@ int svm_mem_enc_op(struct kvm *kvm, void __user *argp)
case KVM_SEV_INIT:
r = sev_guest_init(kvm, &sev_cmd);
break;
+ case KVM_SEV_ES_INIT:
+ r = sev_es_guest_init(kvm, &sev_cmd);
+ break;
case KVM_SEV_LAUNCH_START:
r = sev_launch_start(kvm, &sev_cmd);
break;
case KVM_SEV_LAUNCH_UPDATE_DATA:
r = sev_launch_update_data(kvm, &sev_cmd);
break;
+ case KVM_SEV_LAUNCH_UPDATE_VMSA:
+ r = sev_launch_update_vmsa(kvm, &sev_cmd);
+ break;
case KVM_SEV_LAUNCH_MEASURE:
r = sev_launch_measure(kvm, &sev_cmd);
break;
@@ -1125,49 +1243,61 @@ void sev_vm_destroy(struct kvm *kvm)
sev_asid_free(sev->asid);
}
-int __init sev_hardware_setup(void)
+void __init sev_hardware_setup(void)
{
- struct sev_user_data_status *status;
- int rc;
+ unsigned int eax, ebx, ecx, edx;
+ bool sev_es_supported = false;
+ bool sev_supported = false;
+
+ /* Does the CPU support SEV? */
+ if (!boot_cpu_has(X86_FEATURE_SEV))
+ goto out;
+
+ /* Retrieve SEV CPUID information */
+ cpuid(0x8000001f, &eax, &ebx, &ecx, &edx);
+
+ /* Set encryption bit location for SEV-ES guests */
+ sev_enc_bit = ebx & 0x3f;
/* Maximum number of encrypted guests supported simultaneously */
- max_sev_asid = cpuid_ecx(0x8000001F);
+ max_sev_asid = ecx;
if (!svm_sev_enabled())
- return 1;
+ goto out;
/* Minimum ASID value that should be used for SEV guest */
- min_sev_asid = cpuid_edx(0x8000001F);
+ min_sev_asid = edx;
/* Initialize SEV ASID bitmaps */
sev_asid_bitmap = bitmap_zalloc(max_sev_asid, GFP_KERNEL);
if (!sev_asid_bitmap)
- return 1;
+ goto out;
sev_reclaim_asid_bitmap = bitmap_zalloc(max_sev_asid, GFP_KERNEL);
if (!sev_reclaim_asid_bitmap)
- return 1;
+ goto out;
- status = kmalloc(sizeof(*status), GFP_KERNEL);
- if (!status)
- return 1;
+ pr_info("SEV supported: %u ASIDs\n", max_sev_asid - min_sev_asid + 1);
+ sev_supported = true;
- /*
- * Check SEV platform status.
- *
- * PLATFORM_STATUS can be called in any state, if we failed to query
- * the PLATFORM status then either PSP firmware does not support SEV
- * feature or SEV firmware is dead.
- */
- rc = sev_platform_status(status, NULL);
- if (rc)
- goto err;
+ /* SEV-ES support requested? */
+ if (!sev_es)
+ goto out;
- pr_info("SEV supported\n");
+ /* Does the CPU support SEV-ES? */
+ if (!boot_cpu_has(X86_FEATURE_SEV_ES))
+ goto out;
-err:
- kfree(status);
- return rc;
+ /* Has the system been allocated ASIDs for SEV-ES? */
+ if (min_sev_asid == 1)
+ goto out;
+
+ pr_info("SEV-ES supported: %u ASIDs\n", min_sev_asid - 1);
+ sev_es_supported = true;
+
+out:
+ sev = sev_supported;
+ sev_es = sev_es_supported;
}
void sev_hardware_teardown(void)
@@ -1181,13 +1311,329 @@ void sev_hardware_teardown(void)
sev_flush_asids();
}
+/*
+ * Pages used by hardware to hold guest encrypted state must be flushed before
+ * returning them to the system.
+ */
+static void sev_flush_guest_memory(struct vcpu_svm *svm, void *va,
+ unsigned long len)
+{
+ /*
+ * If hardware enforced cache coherency for encrypted mappings of the
+ * same physical page is supported, nothing to do.
+ */
+ if (boot_cpu_has(X86_FEATURE_SME_COHERENT))
+ return;
+
+ /*
+ * If the VM Page Flush MSR is supported, use it to flush the page
+ * (using the page virtual address and the guest ASID).
+ */
+ if (boot_cpu_has(X86_FEATURE_VM_PAGE_FLUSH)) {
+ struct kvm_sev_info *sev;
+ unsigned long va_start;
+ u64 start, stop;
+
+ /* Align start and stop to page boundaries. */
+ va_start = (unsigned long)va;
+ start = (u64)va_start & PAGE_MASK;
+ stop = PAGE_ALIGN((u64)va_start + len);
+
+ if (start < stop) {
+ sev = &to_kvm_svm(svm->vcpu.kvm)->sev_info;
+
+ while (start < stop) {
+ wrmsrl(MSR_AMD64_VM_PAGE_FLUSH,
+ start | sev->asid);
+
+ start += PAGE_SIZE;
+ }
+
+ return;
+ }
+
+ WARN(1, "Address overflow, using WBINVD\n");
+ }
+
+ /*
+ * Hardware should always have one of the above features,
+ * but if not, use WBINVD and issue a warning.
+ */
+ WARN_ONCE(1, "Using WBINVD to flush guest memory\n");
+ wbinvd_on_all_cpus();
+}
+
+void sev_free_vcpu(struct kvm_vcpu *vcpu)
+{
+ struct vcpu_svm *svm;
+
+ if (!sev_es_guest(vcpu->kvm))
+ return;
+
+ svm = to_svm(vcpu);
+
+ if (vcpu->arch.guest_state_protected)
+ sev_flush_guest_memory(svm, svm->vmsa, PAGE_SIZE);
+ __free_page(virt_to_page(svm->vmsa));
+
+ if (svm->ghcb_sa_free)
+ kfree(svm->ghcb_sa);
+}
+
+static void dump_ghcb(struct vcpu_svm *svm)
+{
+ struct ghcb *ghcb = svm->ghcb;
+ unsigned int nbits;
+
+ /* Re-use the dump_invalid_vmcb module parameter */
+ if (!dump_invalid_vmcb) {
+ pr_warn_ratelimited("set kvm_amd.dump_invalid_vmcb=1 to dump internal KVM state.\n");
+ return;
+ }
+
+ nbits = sizeof(ghcb->save.valid_bitmap) * 8;
+
+ pr_err("GHCB (GPA=%016llx):\n", svm->vmcb->control.ghcb_gpa);
+ pr_err("%-20s%016llx is_valid: %u\n", "sw_exit_code",
+ ghcb->save.sw_exit_code, ghcb_sw_exit_code_is_valid(ghcb));
+ pr_err("%-20s%016llx is_valid: %u\n", "sw_exit_info_1",
+ ghcb->save.sw_exit_info_1, ghcb_sw_exit_info_1_is_valid(ghcb));
+ pr_err("%-20s%016llx is_valid: %u\n", "sw_exit_info_2",
+ ghcb->save.sw_exit_info_2, ghcb_sw_exit_info_2_is_valid(ghcb));
+ pr_err("%-20s%016llx is_valid: %u\n", "sw_scratch",
+ ghcb->save.sw_scratch, ghcb_sw_scratch_is_valid(ghcb));
+ pr_err("%-20s%*pb\n", "valid_bitmap", nbits, ghcb->save.valid_bitmap);
+}
+
+static void sev_es_sync_to_ghcb(struct vcpu_svm *svm)
+{
+ struct kvm_vcpu *vcpu = &svm->vcpu;
+ struct ghcb *ghcb = svm->ghcb;
+
+ /*
+ * The GHCB protocol so far allows for the following data
+ * to be returned:
+ * GPRs RAX, RBX, RCX, RDX
+ *
+ * Copy their values to the GHCB if they are dirty.
+ */
+ if (kvm_register_is_dirty(vcpu, VCPU_REGS_RAX))
+ ghcb_set_rax(ghcb, vcpu->arch.regs[VCPU_REGS_RAX]);
+ if (kvm_register_is_dirty(vcpu, VCPU_REGS_RBX))
+ ghcb_set_rbx(ghcb, vcpu->arch.regs[VCPU_REGS_RBX]);
+ if (kvm_register_is_dirty(vcpu, VCPU_REGS_RCX))
+ ghcb_set_rcx(ghcb, vcpu->arch.regs[VCPU_REGS_RCX]);
+ if (kvm_register_is_dirty(vcpu, VCPU_REGS_RDX))
+ ghcb_set_rdx(ghcb, vcpu->arch.regs[VCPU_REGS_RDX]);
+}
+
+static void sev_es_sync_from_ghcb(struct vcpu_svm *svm)
+{
+ struct vmcb_control_area *control = &svm->vmcb->control;
+ struct kvm_vcpu *vcpu = &svm->vcpu;
+ struct ghcb *ghcb = svm->ghcb;
+ u64 exit_code;
+
+ /*
+ * The GHCB protocol so far allows for the following data
+ * to be supplied:
+ * GPRs RAX, RBX, RCX, RDX
+ * XCR0
+ * CPL
+ *
+ * VMMCALL allows the guest to provide extra registers. KVM also
+ * expects RSI for hypercalls, so include that, too.
+ *
+ * Copy their values to the appropriate location if supplied.
+ */
+ memset(vcpu->arch.regs, 0, sizeof(vcpu->arch.regs));
+
+ vcpu->arch.regs[VCPU_REGS_RAX] = ghcb_get_rax_if_valid(ghcb);
+ vcpu->arch.regs[VCPU_REGS_RBX] = ghcb_get_rbx_if_valid(ghcb);
+ vcpu->arch.regs[VCPU_REGS_RCX] = ghcb_get_rcx_if_valid(ghcb);
+ vcpu->arch.regs[VCPU_REGS_RDX] = ghcb_get_rdx_if_valid(ghcb);
+ vcpu->arch.regs[VCPU_REGS_RSI] = ghcb_get_rsi_if_valid(ghcb);
+
+ svm->vmcb->save.cpl = ghcb_get_cpl_if_valid(ghcb);
+
+ if (ghcb_xcr0_is_valid(ghcb)) {
+ vcpu->arch.xcr0 = ghcb_get_xcr0(ghcb);
+ kvm_update_cpuid_runtime(vcpu);
+ }
+
+ /* Copy the GHCB exit information into the VMCB fields */
+ exit_code = ghcb_get_sw_exit_code(ghcb);
+ control->exit_code = lower_32_bits(exit_code);
+ control->exit_code_hi = upper_32_bits(exit_code);
+ control->exit_info_1 = ghcb_get_sw_exit_info_1(ghcb);
+ control->exit_info_2 = ghcb_get_sw_exit_info_2(ghcb);
+
+ /* Clear the valid entries fields */
+ memset(ghcb->save.valid_bitmap, 0, sizeof(ghcb->save.valid_bitmap));
+}
+
+static int sev_es_validate_vmgexit(struct vcpu_svm *svm)
+{
+ struct kvm_vcpu *vcpu;
+ struct ghcb *ghcb;
+ u64 exit_code = 0;
+
+ ghcb = svm->ghcb;
+
+ /* Only GHCB Usage code 0 is supported */
+ if (ghcb->ghcb_usage)
+ goto vmgexit_err;
+
+ /*
+ * Retrieve the exit code now even though is may not be marked valid
+ * as it could help with debugging.
+ */
+ exit_code = ghcb_get_sw_exit_code(ghcb);
+
+ if (!ghcb_sw_exit_code_is_valid(ghcb) ||
+ !ghcb_sw_exit_info_1_is_valid(ghcb) ||
+ !ghcb_sw_exit_info_2_is_valid(ghcb))
+ goto vmgexit_err;
+
+ switch (ghcb_get_sw_exit_code(ghcb)) {
+ case SVM_EXIT_READ_DR7:
+ break;
+ case SVM_EXIT_WRITE_DR7:
+ if (!ghcb_rax_is_valid(ghcb))
+ goto vmgexit_err;
+ break;
+ case SVM_EXIT_RDTSC:
+ break;
+ case SVM_EXIT_RDPMC:
+ if (!ghcb_rcx_is_valid(ghcb))
+ goto vmgexit_err;
+ break;
+ case SVM_EXIT_CPUID:
+ if (!ghcb_rax_is_valid(ghcb) ||
+ !ghcb_rcx_is_valid(ghcb))
+ goto vmgexit_err;
+ if (ghcb_get_rax(ghcb) == 0xd)
+ if (!ghcb_xcr0_is_valid(ghcb))
+ goto vmgexit_err;
+ break;
+ case SVM_EXIT_INVD:
+ break;
+ case SVM_EXIT_IOIO:
+ if (ghcb_get_sw_exit_info_1(ghcb) & SVM_IOIO_STR_MASK) {
+ if (!ghcb_sw_scratch_is_valid(ghcb))
+ goto vmgexit_err;
+ } else {
+ if (!(ghcb_get_sw_exit_info_1(ghcb) & SVM_IOIO_TYPE_MASK))
+ if (!ghcb_rax_is_valid(ghcb))
+ goto vmgexit_err;
+ }
+ break;
+ case SVM_EXIT_MSR:
+ if (!ghcb_rcx_is_valid(ghcb))
+ goto vmgexit_err;
+ if (ghcb_get_sw_exit_info_1(ghcb)) {
+ if (!ghcb_rax_is_valid(ghcb) ||
+ !ghcb_rdx_is_valid(ghcb))
+ goto vmgexit_err;
+ }
+ break;
+ case SVM_EXIT_VMMCALL:
+ if (!ghcb_rax_is_valid(ghcb) ||
+ !ghcb_cpl_is_valid(ghcb))
+ goto vmgexit_err;
+ break;
+ case SVM_EXIT_RDTSCP:
+ break;
+ case SVM_EXIT_WBINVD:
+ break;
+ case SVM_EXIT_MONITOR:
+ if (!ghcb_rax_is_valid(ghcb) ||
+ !ghcb_rcx_is_valid(ghcb) ||
+ !ghcb_rdx_is_valid(ghcb))
+ goto vmgexit_err;
+ break;
+ case SVM_EXIT_MWAIT:
+ if (!ghcb_rax_is_valid(ghcb) ||
+ !ghcb_rcx_is_valid(ghcb))
+ goto vmgexit_err;
+ break;
+ case SVM_VMGEXIT_MMIO_READ:
+ case SVM_VMGEXIT_MMIO_WRITE:
+ if (!ghcb_sw_scratch_is_valid(ghcb))
+ goto vmgexit_err;
+ break;
+ case SVM_VMGEXIT_NMI_COMPLETE:
+ case SVM_VMGEXIT_AP_JUMP_TABLE:
+ case SVM_VMGEXIT_UNSUPPORTED_EVENT:
+ break;
+ default:
+ goto vmgexit_err;
+ }
+
+ return 0;
+
+vmgexit_err:
+ vcpu = &svm->vcpu;
+
+ if (ghcb->ghcb_usage) {
+ vcpu_unimpl(vcpu, "vmgexit: ghcb usage %#x is not valid\n",
+ ghcb->ghcb_usage);
+ } else {
+ vcpu_unimpl(vcpu, "vmgexit: exit reason %#llx is not valid\n",
+ exit_code);
+ dump_ghcb(svm);
+ }
+
+ vcpu->run->exit_reason = KVM_EXIT_INTERNAL_ERROR;
+ vcpu->run->internal.suberror = KVM_INTERNAL_ERROR_UNEXPECTED_EXIT_REASON;
+ vcpu->run->internal.ndata = 2;
+ vcpu->run->internal.data[0] = exit_code;
+ vcpu->run->internal.data[1] = vcpu->arch.last_vmentry_cpu;
+
+ return -EINVAL;
+}
+
+static void pre_sev_es_run(struct vcpu_svm *svm)
+{
+ if (!svm->ghcb)
+ return;
+
+ if (svm->ghcb_sa_free) {
+ /*
+ * The scratch area lives outside the GHCB, so there is a
+ * buffer that, depending on the operation performed, may
+ * need to be synced, then freed.
+ */
+ if (svm->ghcb_sa_sync) {
+ kvm_write_guest(svm->vcpu.kvm,
+ ghcb_get_sw_scratch(svm->ghcb),
+ svm->ghcb_sa, svm->ghcb_sa_len);
+ svm->ghcb_sa_sync = false;
+ }
+
+ kfree(svm->ghcb_sa);
+ svm->ghcb_sa = NULL;
+ svm->ghcb_sa_free = false;
+ }
+
+ trace_kvm_vmgexit_exit(svm->vcpu.vcpu_id, svm->ghcb);
+
+ sev_es_sync_to_ghcb(svm);
+
+ kvm_vcpu_unmap(&svm->vcpu, &svm->ghcb_map, true);
+ svm->ghcb = NULL;
+}
+
void pre_sev_run(struct vcpu_svm *svm, int cpu)
{
struct svm_cpu_data *sd = per_cpu(svm_data, cpu);
int asid = sev_get_asid(svm->vcpu.kvm);
+ /* Perform any SEV-ES pre-run actions */
+ pre_sev_es_run(svm);
+
/* Assign the asid allocated with this SEV guest */
- svm->vmcb->control.asid = asid;
+ svm->asid = asid;
/*
* Flush guest TLB:
@@ -1203,3 +1649,394 @@ void pre_sev_run(struct vcpu_svm *svm, int cpu)
svm->vmcb->control.tlb_ctl = TLB_CONTROL_FLUSH_ASID;
vmcb_mark_dirty(svm->vmcb, VMCB_ASID);
}
+
+#define GHCB_SCRATCH_AREA_LIMIT (16ULL * PAGE_SIZE)
+static bool setup_vmgexit_scratch(struct vcpu_svm *svm, bool sync, u64 len)
+{
+ struct vmcb_control_area *control = &svm->vmcb->control;
+ struct ghcb *ghcb = svm->ghcb;
+ u64 ghcb_scratch_beg, ghcb_scratch_end;
+ u64 scratch_gpa_beg, scratch_gpa_end;
+ void *scratch_va;
+
+ scratch_gpa_beg = ghcb_get_sw_scratch(ghcb);
+ if (!scratch_gpa_beg) {
+ pr_err("vmgexit: scratch gpa not provided\n");
+ return false;
+ }
+
+ scratch_gpa_end = scratch_gpa_beg + len;
+ if (scratch_gpa_end < scratch_gpa_beg) {
+ pr_err("vmgexit: scratch length (%#llx) not valid for scratch address (%#llx)\n",
+ len, scratch_gpa_beg);
+ return false;
+ }
+
+ if ((scratch_gpa_beg & PAGE_MASK) == control->ghcb_gpa) {
+ /* Scratch area begins within GHCB */
+ ghcb_scratch_beg = control->ghcb_gpa +
+ offsetof(struct ghcb, shared_buffer);
+ ghcb_scratch_end = control->ghcb_gpa +
+ offsetof(struct ghcb, reserved_1);
+
+ /*
+ * If the scratch area begins within the GHCB, it must be
+ * completely contained in the GHCB shared buffer area.
+ */
+ if (scratch_gpa_beg < ghcb_scratch_beg ||
+ scratch_gpa_end > ghcb_scratch_end) {
+ pr_err("vmgexit: scratch area is outside of GHCB shared buffer area (%#llx - %#llx)\n",
+ scratch_gpa_beg, scratch_gpa_end);
+ return false;
+ }
+
+ scratch_va = (void *)svm->ghcb;
+ scratch_va += (scratch_gpa_beg - control->ghcb_gpa);
+ } else {
+ /*
+ * The guest memory must be read into a kernel buffer, so
+ * limit the size
+ */
+ if (len > GHCB_SCRATCH_AREA_LIMIT) {
+ pr_err("vmgexit: scratch area exceeds KVM limits (%#llx requested, %#llx limit)\n",
+ len, GHCB_SCRATCH_AREA_LIMIT);
+ return false;
+ }
+ scratch_va = kzalloc(len, GFP_KERNEL);
+ if (!scratch_va)
+ return false;
+
+ if (kvm_read_guest(svm->vcpu.kvm, scratch_gpa_beg, scratch_va, len)) {
+ /* Unable to copy scratch area from guest */
+ pr_err("vmgexit: kvm_read_guest for scratch area failed\n");
+
+ kfree(scratch_va);
+ return false;
+ }
+
+ /*
+ * The scratch area is outside the GHCB. The operation will
+ * dictate whether the buffer needs to be synced before running
+ * the vCPU next time (i.e. a read was requested so the data
+ * must be written back to the guest memory).
+ */
+ svm->ghcb_sa_sync = sync;
+ svm->ghcb_sa_free = true;
+ }
+
+ svm->ghcb_sa = scratch_va;
+ svm->ghcb_sa_len = len;
+
+ return true;
+}
+
+static void set_ghcb_msr_bits(struct vcpu_svm *svm, u64 value, u64 mask,
+ unsigned int pos)
+{
+ svm->vmcb->control.ghcb_gpa &= ~(mask << pos);
+ svm->vmcb->control.ghcb_gpa |= (value & mask) << pos;
+}
+
+static u64 get_ghcb_msr_bits(struct vcpu_svm *svm, u64 mask, unsigned int pos)
+{
+ return (svm->vmcb->control.ghcb_gpa >> pos) & mask;
+}
+
+static void set_ghcb_msr(struct vcpu_svm *svm, u64 value)
+{
+ svm->vmcb->control.ghcb_gpa = value;
+}
+
+static int sev_handle_vmgexit_msr_protocol(struct vcpu_svm *svm)
+{
+ struct vmcb_control_area *control = &svm->vmcb->control;
+ struct kvm_vcpu *vcpu = &svm->vcpu;
+ u64 ghcb_info;
+ int ret = 1;
+
+ ghcb_info = control->ghcb_gpa & GHCB_MSR_INFO_MASK;
+
+ trace_kvm_vmgexit_msr_protocol_enter(svm->vcpu.vcpu_id,
+ control->ghcb_gpa);
+
+ switch (ghcb_info) {
+ case GHCB_MSR_SEV_INFO_REQ:
+ set_ghcb_msr(svm, GHCB_MSR_SEV_INFO(GHCB_VERSION_MAX,
+ GHCB_VERSION_MIN,
+ sev_enc_bit));
+ break;
+ case GHCB_MSR_CPUID_REQ: {
+ u64 cpuid_fn, cpuid_reg, cpuid_value;
+
+ cpuid_fn = get_ghcb_msr_bits(svm,
+ GHCB_MSR_CPUID_FUNC_MASK,
+ GHCB_MSR_CPUID_FUNC_POS);
+
+ /* Initialize the registers needed by the CPUID intercept */
+ vcpu->arch.regs[VCPU_REGS_RAX] = cpuid_fn;
+ vcpu->arch.regs[VCPU_REGS_RCX] = 0;
+
+ ret = svm_invoke_exit_handler(svm, SVM_EXIT_CPUID);
+ if (!ret) {
+ ret = -EINVAL;
+ break;
+ }
+
+ cpuid_reg = get_ghcb_msr_bits(svm,
+ GHCB_MSR_CPUID_REG_MASK,
+ GHCB_MSR_CPUID_REG_POS);
+ if (cpuid_reg == 0)
+ cpuid_value = vcpu->arch.regs[VCPU_REGS_RAX];
+ else if (cpuid_reg == 1)
+ cpuid_value = vcpu->arch.regs[VCPU_REGS_RBX];
+ else if (cpuid_reg == 2)
+ cpuid_value = vcpu->arch.regs[VCPU_REGS_RCX];
+ else
+ cpuid_value = vcpu->arch.regs[VCPU_REGS_RDX];
+
+ set_ghcb_msr_bits(svm, cpuid_value,
+ GHCB_MSR_CPUID_VALUE_MASK,
+ GHCB_MSR_CPUID_VALUE_POS);
+
+ set_ghcb_msr_bits(svm, GHCB_MSR_CPUID_RESP,
+ GHCB_MSR_INFO_MASK,
+ GHCB_MSR_INFO_POS);
+ break;
+ }
+ case GHCB_MSR_TERM_REQ: {
+ u64 reason_set, reason_code;
+
+ reason_set = get_ghcb_msr_bits(svm,
+ GHCB_MSR_TERM_REASON_SET_MASK,
+ GHCB_MSR_TERM_REASON_SET_POS);
+ reason_code = get_ghcb_msr_bits(svm,
+ GHCB_MSR_TERM_REASON_MASK,
+ GHCB_MSR_TERM_REASON_POS);
+ pr_info("SEV-ES guest requested termination: %#llx:%#llx\n",
+ reason_set, reason_code);
+ fallthrough;
+ }
+ default:
+ ret = -EINVAL;
+ }
+
+ trace_kvm_vmgexit_msr_protocol_exit(svm->vcpu.vcpu_id,
+ control->ghcb_gpa, ret);
+
+ return ret;
+}
+
+int sev_handle_vmgexit(struct vcpu_svm *svm)
+{
+ struct vmcb_control_area *control = &svm->vmcb->control;
+ u64 ghcb_gpa, exit_code;
+ struct ghcb *ghcb;
+ int ret;
+
+ /* Validate the GHCB */
+ ghcb_gpa = control->ghcb_gpa;
+ if (ghcb_gpa & GHCB_MSR_INFO_MASK)
+ return sev_handle_vmgexit_msr_protocol(svm);
+
+ if (!ghcb_gpa) {
+ vcpu_unimpl(&svm->vcpu, "vmgexit: GHCB gpa is not set\n");
+ return -EINVAL;
+ }
+
+ if (kvm_vcpu_map(&svm->vcpu, ghcb_gpa >> PAGE_SHIFT, &svm->ghcb_map)) {
+ /* Unable to map GHCB from guest */
+ vcpu_unimpl(&svm->vcpu, "vmgexit: error mapping GHCB [%#llx] from guest\n",
+ ghcb_gpa);
+ return -EINVAL;
+ }
+
+ svm->ghcb = svm->ghcb_map.hva;
+ ghcb = svm->ghcb_map.hva;
+
+ trace_kvm_vmgexit_enter(svm->vcpu.vcpu_id, ghcb);
+
+ exit_code = ghcb_get_sw_exit_code(ghcb);
+
+ ret = sev_es_validate_vmgexit(svm);
+ if (ret)
+ return ret;
+
+ sev_es_sync_from_ghcb(svm);
+ ghcb_set_sw_exit_info_1(ghcb, 0);
+ ghcb_set_sw_exit_info_2(ghcb, 0);
+
+ ret = -EINVAL;
+ switch (exit_code) {
+ case SVM_VMGEXIT_MMIO_READ:
+ if (!setup_vmgexit_scratch(svm, true, control->exit_info_2))
+ break;
+
+ ret = kvm_sev_es_mmio_read(&svm->vcpu,
+ control->exit_info_1,
+ control->exit_info_2,
+ svm->ghcb_sa);
+ break;
+ case SVM_VMGEXIT_MMIO_WRITE:
+ if (!setup_vmgexit_scratch(svm, false, control->exit_info_2))
+ break;
+
+ ret = kvm_sev_es_mmio_write(&svm->vcpu,
+ control->exit_info_1,
+ control->exit_info_2,
+ svm->ghcb_sa);
+ break;
+ case SVM_VMGEXIT_NMI_COMPLETE:
+ ret = svm_invoke_exit_handler(svm, SVM_EXIT_IRET);
+ break;
+ case SVM_VMGEXIT_AP_JUMP_TABLE: {
+ struct kvm_sev_info *sev = &to_kvm_svm(svm->vcpu.kvm)->sev_info;
+
+ switch (control->exit_info_1) {
+ case 0:
+ /* Set AP jump table address */
+ sev->ap_jump_table = control->exit_info_2;
+ break;
+ case 1:
+ /* Get AP jump table address */
+ ghcb_set_sw_exit_info_2(ghcb, sev->ap_jump_table);
+ break;
+ default:
+ pr_err("svm: vmgexit: unsupported AP jump table request - exit_info_1=%#llx\n",
+ control->exit_info_1);
+ ghcb_set_sw_exit_info_1(ghcb, 1);
+ ghcb_set_sw_exit_info_2(ghcb,
+ X86_TRAP_UD |
+ SVM_EVTINJ_TYPE_EXEPT |
+ SVM_EVTINJ_VALID);
+ }
+
+ ret = 1;
+ break;
+ }
+ case SVM_VMGEXIT_UNSUPPORTED_EVENT:
+ vcpu_unimpl(&svm->vcpu,
+ "vmgexit: unsupported event - exit_info_1=%#llx, exit_info_2=%#llx\n",
+ control->exit_info_1, control->exit_info_2);
+ break;
+ default:
+ ret = svm_invoke_exit_handler(svm, exit_code);
+ }
+
+ return ret;
+}
+
+int sev_es_string_io(struct vcpu_svm *svm, int size, unsigned int port, int in)
+{
+ if (!setup_vmgexit_scratch(svm, in, svm->vmcb->control.exit_info_2))
+ return -EINVAL;
+
+ return kvm_sev_es_string_io(&svm->vcpu, size, port,
+ svm->ghcb_sa, svm->ghcb_sa_len, in);
+}
+
+void sev_es_init_vmcb(struct vcpu_svm *svm)
+{
+ struct kvm_vcpu *vcpu = &svm->vcpu;
+
+ svm->vmcb->control.nested_ctl |= SVM_NESTED_CTL_SEV_ES_ENABLE;
+ svm->vmcb->control.virt_ext |= LBR_CTL_ENABLE_MASK;
+
+ /*
+ * An SEV-ES guest requires a VMSA area that is a separate from the
+ * VMCB page. Do not include the encryption mask on the VMSA physical
+ * address since hardware will access it using the guest key.
+ */
+ svm->vmcb->control.vmsa_pa = __pa(svm->vmsa);
+
+ /* Can't intercept CR register access, HV can't modify CR registers */
+ svm_clr_intercept(svm, INTERCEPT_CR0_READ);
+ svm_clr_intercept(svm, INTERCEPT_CR4_READ);
+ svm_clr_intercept(svm, INTERCEPT_CR8_READ);
+ svm_clr_intercept(svm, INTERCEPT_CR0_WRITE);
+ svm_clr_intercept(svm, INTERCEPT_CR4_WRITE);
+ svm_clr_intercept(svm, INTERCEPT_CR8_WRITE);
+
+ svm_clr_intercept(svm, INTERCEPT_SELECTIVE_CR0);
+
+ /* Track EFER/CR register changes */
+ svm_set_intercept(svm, TRAP_EFER_WRITE);
+ svm_set_intercept(svm, TRAP_CR0_WRITE);
+ svm_set_intercept(svm, TRAP_CR4_WRITE);
+ svm_set_intercept(svm, TRAP_CR8_WRITE);
+
+ /* No support for enable_vmware_backdoor */
+ clr_exception_intercept(svm, GP_VECTOR);
+
+ /* Can't intercept XSETBV, HV can't modify XCR0 directly */
+ svm_clr_intercept(svm, INTERCEPT_XSETBV);
+
+ /* Clear intercepts on selected MSRs */
+ set_msr_interception(vcpu, svm->msrpm, MSR_EFER, 1, 1);
+ set_msr_interception(vcpu, svm->msrpm, MSR_IA32_CR_PAT, 1, 1);
+ set_msr_interception(vcpu, svm->msrpm, MSR_IA32_LASTBRANCHFROMIP, 1, 1);
+ set_msr_interception(vcpu, svm->msrpm, MSR_IA32_LASTBRANCHTOIP, 1, 1);
+ set_msr_interception(vcpu, svm->msrpm, MSR_IA32_LASTINTFROMIP, 1, 1);
+ set_msr_interception(vcpu, svm->msrpm, MSR_IA32_LASTINTTOIP, 1, 1);
+}
+
+void sev_es_create_vcpu(struct vcpu_svm *svm)
+{
+ /*
+ * Set the GHCB MSR value as per the GHCB specification when creating
+ * a vCPU for an SEV-ES guest.
+ */
+ set_ghcb_msr(svm, GHCB_MSR_SEV_INFO(GHCB_VERSION_MAX,
+ GHCB_VERSION_MIN,
+ sev_enc_bit));
+}
+
+void sev_es_vcpu_load(struct vcpu_svm *svm, int cpu)
+{
+ struct svm_cpu_data *sd = per_cpu(svm_data, cpu);
+ struct vmcb_save_area *hostsa;
+ unsigned int i;
+
+ /*
+ * As an SEV-ES guest, hardware will restore the host state on VMEXIT,
+ * of which one step is to perform a VMLOAD. Since hardware does not
+ * perform a VMSAVE on VMRUN, the host savearea must be updated.
+ */
+ asm volatile(__ex("vmsave") : : "a" (__sme_page_pa(sd->save_area)) : "memory");
+
+ /*
+ * Certain MSRs are restored on VMEXIT, only save ones that aren't
+ * restored.
+ */
+ for (i = 0; i < NR_HOST_SAVE_USER_MSRS; i++) {
+ if (host_save_user_msrs[i].sev_es_restored)
+ continue;
+
+ rdmsrl(host_save_user_msrs[i].index, svm->host_user_msrs[i]);
+ }
+
+ /* XCR0 is restored on VMEXIT, save the current host value */
+ hostsa = (struct vmcb_save_area *)(page_address(sd->save_area) + 0x400);
+ hostsa->xcr0 = xgetbv(XCR_XFEATURE_ENABLED_MASK);
+
+ /* PKRU is restored on VMEXIT, save the curent host value */
+ hostsa->pkru = read_pkru();
+
+ /* MSR_IA32_XSS is restored on VMEXIT, save the currnet host value */
+ hostsa->xss = host_xss;
+}
+
+void sev_es_vcpu_put(struct vcpu_svm *svm)
+{
+ unsigned int i;
+
+ /*
+ * Certain MSRs are restored on VMEXIT and were saved with vmsave in
+ * sev_es_vcpu_load() above. Only restore ones that weren't.
+ */
+ for (i = 0; i < NR_HOST_SAVE_USER_MSRS; i++) {
+ if (host_save_user_msrs[i].sev_es_restored)
+ continue;
+
+ wrmsrl(host_save_user_msrs[i].index, svm->host_user_msrs[i]);
+ }
+}
diff --git a/arch/x86/kvm/svm/svm.c b/arch/x86/kvm/svm/svm.c
index 2f32fd09e259..cce0143a6f80 100644
--- a/arch/x86/kvm/svm/svm.c
+++ b/arch/x86/kvm/svm/svm.c
@@ -33,9 +33,9 @@
#include <asm/debugreg.h>
#include <asm/kvm_para.h>
#include <asm/irq_remapping.h>
-#include <asm/mce.h>
#include <asm/spec-ctrl.h>
#include <asm/cpu_device_id.h>
+#include <asm/traps.h>
#include <asm/virtext.h>
#include "trace.h"
@@ -90,7 +90,7 @@ static DEFINE_PER_CPU(u64, current_tsc_ratio);
static const struct svm_direct_access_msrs {
u32 index; /* Index of the MSR */
- bool always; /* True if intercept is always on */
+ bool always; /* True if intercept is initially cleared */
} direct_access_msrs[MAX_DIRECT_ACCESS_MSRS] = {
{ .index = MSR_STAR, .always = true },
{ .index = MSR_IA32_SYSENTER_CS, .always = true },
@@ -108,6 +108,9 @@ static const struct svm_direct_access_msrs {
{ .index = MSR_IA32_LASTBRANCHTOIP, .always = false },
{ .index = MSR_IA32_LASTINTFROMIP, .always = false },
{ .index = MSR_IA32_LASTINTTOIP, .always = false },
+ { .index = MSR_EFER, .always = false },
+ { .index = MSR_IA32_CR_PAT, .always = false },
+ { .index = MSR_AMD64_SEV_ES_GHCB, .always = true },
{ .index = MSR_INVALID, .always = false },
};
@@ -187,10 +190,14 @@ static int vgif = true;
module_param(vgif, int, 0444);
/* enable/disable SEV support */
-static int sev = IS_ENABLED(CONFIG_AMD_MEM_ENCRYPT_ACTIVE_BY_DEFAULT);
+int sev = IS_ENABLED(CONFIG_AMD_MEM_ENCRYPT_ACTIVE_BY_DEFAULT);
module_param(sev, int, 0444);
-static bool __read_mostly dump_invalid_vmcb = 0;
+/* enable/disable SEV-ES support */
+int sev_es = IS_ENABLED(CONFIG_AMD_MEM_ENCRYPT_ACTIVE_BY_DEFAULT);
+module_param(sev_es, int, 0444);
+
+bool __read_mostly dump_invalid_vmcb;
module_param(dump_invalid_vmcb, bool, 0644);
static u8 rsm_ins_bytes[] = "\x0f\xaa";
@@ -336,6 +343,13 @@ static int skip_emulated_instruction(struct kvm_vcpu *vcpu)
{
struct vcpu_svm *svm = to_svm(vcpu);
+ /*
+ * SEV-ES does not expose the next RIP. The RIP update is controlled by
+ * the type of exit and the #VC handler in the guest.
+ */
+ if (sev_es_guest(vcpu->kvm))
+ goto done;
+
if (nrips && svm->vmcb->control.next_rip != 0) {
WARN_ON_ONCE(!static_cpu_has(X86_FEATURE_NRIPS));
svm->next_rip = svm->vmcb->control.next_rip;
@@ -347,6 +361,8 @@ static int skip_emulated_instruction(struct kvm_vcpu *vcpu)
} else {
kvm_rip_write(vcpu, svm->next_rip);
}
+
+done:
svm_set_interrupt_shadow(vcpu, 0);
return 1;
@@ -484,7 +500,7 @@ static int svm_hardware_enable(void)
wrmsrl(MSR_EFER, efer | EFER_SVME);
- wrmsrl(MSR_VM_HSAVE_PA, page_to_pfn(sd->save_area) << PAGE_SHIFT);
+ wrmsrl(MSR_VM_HSAVE_PA, __sme_page_pa(sd->save_area));
if (static_cpu_has(X86_FEATURE_TSCRATEMSR)) {
wrmsrl(MSR_AMD64_TSC_RATIO, TSC_RATIO_DEFAULT);
@@ -530,12 +546,12 @@ static int svm_hardware_enable(void)
static void svm_cpu_uninit(int cpu)
{
- struct svm_cpu_data *sd = per_cpu(svm_data, raw_smp_processor_id());
+ struct svm_cpu_data *sd = per_cpu(svm_data, cpu);
if (!sd)
return;
- per_cpu(svm_data, raw_smp_processor_id()) = NULL;
+ per_cpu(svm_data, cpu) = NULL;
kfree(sd->sev_vmcbs);
__free_page(sd->save_area);
kfree(sd);
@@ -552,6 +568,7 @@ static int svm_cpu_init(int cpu)
sd->save_area = alloc_page(GFP_KERNEL);
if (!sd->save_area)
goto free_cpu_data;
+ clear_page(page_address(sd->save_area));
if (svm_sev_enabled()) {
sd->sev_vmcbs = kmalloc_array(max_sev_asid + 1,
@@ -662,8 +679,8 @@ static void set_msr_interception_bitmap(struct kvm_vcpu *vcpu, u32 *msrpm,
msrpm[offset] = tmp;
}
-static void set_msr_interception(struct kvm_vcpu *vcpu, u32 *msrpm, u32 msr,
- int read, int write)
+void set_msr_interception(struct kvm_vcpu *vcpu, u32 *msrpm, u32 msr,
+ int read, int write)
{
set_shadow_msr_intercept(vcpu, msr, read, write);
set_msr_interception_bitmap(vcpu, msrpm, msr, read, write);
@@ -959,15 +976,11 @@ static __init int svm_hardware_setup(void)
kvm_enable_efer_bits(EFER_SVME | EFER_LMSLE);
}
- if (sev) {
- if (boot_cpu_has(X86_FEATURE_SEV) &&
- IS_ENABLED(CONFIG_KVM_AMD_SEV)) {
- r = sev_hardware_setup();
- if (r)
- sev = false;
- } else {
- sev = false;
- }
+ if (IS_ENABLED(CONFIG_KVM_AMD_SEV) && sev) {
+ sev_hardware_setup();
+ } else {
+ sev = false;
+ sev_es = false;
}
svm_adjust_mmio_mask();
@@ -1215,6 +1228,7 @@ static void init_vmcb(struct vcpu_svm *svm)
save->cr4 = 0;
}
svm->asid_generation = 0;
+ svm->asid = 0;
svm->nested.vmcb12_gpa = 0;
svm->vcpu.arch.hflags = 0;
@@ -1252,6 +1266,11 @@ static void init_vmcb(struct vcpu_svm *svm)
if (sev_guest(svm->vcpu.kvm)) {
svm->vmcb->control.nested_ctl |= SVM_NESTED_CTL_SEV_ENABLE;
clr_exception_intercept(svm, UD_VECTOR);
+
+ if (sev_es_guest(svm->vcpu.kvm)) {
+ /* Perform SEV-ES specific VMCB updates */
+ sev_es_init_vmcb(svm);
+ }
}
vmcb_mark_all_dirty(svm->vmcb);
@@ -1288,6 +1307,7 @@ static int svm_create_vcpu(struct kvm_vcpu *vcpu)
{
struct vcpu_svm *svm;
struct page *vmcb_page;
+ struct page *vmsa_page = NULL;
int err;
BUILD_BUG_ON(offsetof(struct vcpu_svm, vcpu) != 0);
@@ -1298,9 +1318,27 @@ static int svm_create_vcpu(struct kvm_vcpu *vcpu)
if (!vmcb_page)
goto out;
+ if (sev_es_guest(svm->vcpu.kvm)) {
+ /*
+ * SEV-ES guests require a separate VMSA page used to contain
+ * the encrypted register state of the guest.
+ */
+ vmsa_page = alloc_page(GFP_KERNEL_ACCOUNT | __GFP_ZERO);
+ if (!vmsa_page)
+ goto error_free_vmcb_page;
+
+ /*
+ * SEV-ES guests maintain an encrypted version of their FPU
+ * state which is restored and saved on VMRUN and VMEXIT.
+ * Free the fpu structure to prevent KVM from attempting to
+ * access the FPU state.
+ */
+ kvm_free_guest_fpu(vcpu);
+ }
+
err = avic_init_vcpu(svm);
if (err)
- goto error_free_vmcb_page;
+ goto error_free_vmsa_page;
/* We initialize this flag to true to make sure that the is_running
* bit would be set the first time the vcpu is loaded.
@@ -1309,21 +1347,34 @@ static int svm_create_vcpu(struct kvm_vcpu *vcpu)
svm->avic_is_running = true;
svm->msrpm = svm_vcpu_alloc_msrpm();
- if (!svm->msrpm)
- goto error_free_vmcb_page;
+ if (!svm->msrpm) {
+ err = -ENOMEM;
+ goto error_free_vmsa_page;
+ }
svm_vcpu_init_msrpm(vcpu, svm->msrpm);
svm->vmcb = page_address(vmcb_page);
svm->vmcb_pa = __sme_set(page_to_pfn(vmcb_page) << PAGE_SHIFT);
+
+ if (vmsa_page)
+ svm->vmsa = page_address(vmsa_page);
+
svm->asid_generation = 0;
init_vmcb(svm);
svm_init_osvw(vcpu);
vcpu->arch.microcode_version = 0x01000065;
+ if (sev_es_guest(svm->vcpu.kvm))
+ /* Perform SEV-ES specific VMCB creation updates */
+ sev_es_create_vcpu(svm);
+
return 0;
+error_free_vmsa_page:
+ if (vmsa_page)
+ __free_page(vmsa_page);
error_free_vmcb_page:
__free_page(vmcb_page);
out:
@@ -1351,6 +1402,8 @@ static void svm_free_vcpu(struct kvm_vcpu *vcpu)
svm_free_nested(svm);
+ sev_free_vcpu(vcpu);
+
__free_page(pfn_to_page(__sme_clr(svm->vmcb_pa) >> PAGE_SHIFT));
__free_pages(virt_to_page(svm->msrpm), MSRPM_ALLOC_ORDER);
}
@@ -1366,15 +1419,20 @@ static void svm_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
vmcb_mark_all_dirty(svm->vmcb);
}
+ if (sev_es_guest(svm->vcpu.kvm)) {
+ sev_es_vcpu_load(svm, cpu);
+ } else {
#ifdef CONFIG_X86_64
- rdmsrl(MSR_GS_BASE, to_svm(vcpu)->host.gs_base);
+ rdmsrl(MSR_GS_BASE, to_svm(vcpu)->host.gs_base);
#endif
- savesegment(fs, svm->host.fs);
- savesegment(gs, svm->host.gs);
- svm->host.ldt = kvm_read_ldt();
+ savesegment(fs, svm->host.fs);
+ savesegment(gs, svm->host.gs);
+ svm->host.ldt = kvm_read_ldt();
- for (i = 0; i < NR_HOST_SAVE_USER_MSRS; i++)
- rdmsrl(host_save_user_msrs[i], svm->host_user_msrs[i]);
+ for (i = 0; i < NR_HOST_SAVE_USER_MSRS; i++)
+ rdmsrl(host_save_user_msrs[i].index,
+ svm->host_user_msrs[i]);
+ }
if (static_cpu_has(X86_FEATURE_TSCRATEMSR)) {
u64 tsc_ratio = vcpu->arch.tsc_scaling_ratio;
@@ -1402,18 +1460,24 @@ static void svm_vcpu_put(struct kvm_vcpu *vcpu)
avic_vcpu_put(vcpu);
++vcpu->stat.host_state_reload;
- kvm_load_ldt(svm->host.ldt);
+ if (sev_es_guest(svm->vcpu.kvm)) {
+ sev_es_vcpu_put(svm);
+ } else {
+ kvm_load_ldt(svm->host.ldt);
#ifdef CONFIG_X86_64
- loadsegment(fs, svm->host.fs);
- wrmsrl(MSR_KERNEL_GS_BASE, current->thread.gsbase);
- load_gs_index(svm->host.gs);
+ loadsegment(fs, svm->host.fs);
+ wrmsrl(MSR_KERNEL_GS_BASE, current->thread.gsbase);
+ load_gs_index(svm->host.gs);
#else
#ifdef CONFIG_X86_32_LAZY_GS
- loadsegment(gs, svm->host.gs);
+ loadsegment(gs, svm->host.gs);
#endif
#endif
- for (i = 0; i < NR_HOST_SAVE_USER_MSRS; i++)
- wrmsrl(host_save_user_msrs[i], svm->host_user_msrs[i]);
+
+ for (i = 0; i < NR_HOST_SAVE_USER_MSRS; i++)
+ wrmsrl(host_save_user_msrs[i].index,
+ svm->host_user_msrs[i]);
+ }
}
static unsigned long svm_get_rflags(struct kvm_vcpu *vcpu)
@@ -1631,9 +1695,18 @@ static void svm_set_gdt(struct kvm_vcpu *vcpu, struct desc_ptr *dt)
static void update_cr0_intercept(struct vcpu_svm *svm)
{
- ulong gcr0 = svm->vcpu.arch.cr0;
- u64 *hcr0 = &svm->vmcb->save.cr0;
+ ulong gcr0;
+ u64 *hcr0;
+ /*
+ * SEV-ES guests must always keep the CR intercepts cleared. CR
+ * tracking is done using the CR write traps.
+ */
+ if (sev_es_guest(svm->vcpu.kvm))
+ return;
+
+ gcr0 = svm->vcpu.arch.cr0;
+ hcr0 = &svm->vmcb->save.cr0;
*hcr0 = (*hcr0 & ~SVM_CR0_SELECTIVE_MASK)
| (gcr0 & SVM_CR0_SELECTIVE_MASK);
@@ -1653,7 +1726,7 @@ void svm_set_cr0(struct kvm_vcpu *vcpu, unsigned long cr0)
struct vcpu_svm *svm = to_svm(vcpu);
#ifdef CONFIG_X86_64
- if (vcpu->arch.efer & EFER_LME) {
+ if (vcpu->arch.efer & EFER_LME && !vcpu->arch.guest_state_protected) {
if (!is_paging(vcpu) && (cr0 & X86_CR0_PG)) {
vcpu->arch.efer |= EFER_LMA;
svm->vmcb->save.efer |= EFER_LMA | EFER_LME;
@@ -1682,13 +1755,15 @@ void svm_set_cr0(struct kvm_vcpu *vcpu, unsigned long cr0)
update_cr0_intercept(svm);
}
-int svm_set_cr4(struct kvm_vcpu *vcpu, unsigned long cr4)
+static bool svm_is_valid_cr4(struct kvm_vcpu *vcpu, unsigned long cr4)
{
- unsigned long host_cr4_mce = cr4_read_shadow() & X86_CR4_MCE;
- unsigned long old_cr4 = to_svm(vcpu)->vmcb->save.cr4;
+ return true;
+}
- if (cr4 & X86_CR4_VMXE)
- return 1;
+void svm_set_cr4(struct kvm_vcpu *vcpu, unsigned long cr4)
+{
+ unsigned long host_cr4_mce = cr4_read_shadow() & X86_CR4_MCE;
+ unsigned long old_cr4 = vcpu->arch.cr4;
if (npt_enabled && ((old_cr4 ^ cr4) & X86_CR4_PGE))
svm_flush_tlb(vcpu);
@@ -1699,7 +1774,9 @@ int svm_set_cr4(struct kvm_vcpu *vcpu, unsigned long cr4)
cr4 |= host_cr4_mce;
to_svm(vcpu)->vmcb->save.cr4 = cr4;
vmcb_mark_dirty(to_svm(vcpu)->vmcb, VMCB_CR);
- return 0;
+
+ if ((cr4 ^ old_cr4) & (X86_CR4_OSXSAVE | X86_CR4_PKE))
+ kvm_update_cpuid_runtime(vcpu);
}
static void svm_set_segment(struct kvm_vcpu *vcpu,
@@ -1751,18 +1828,20 @@ static void new_asid(struct vcpu_svm *svm, struct svm_cpu_data *sd)
++sd->asid_generation;
sd->next_asid = sd->min_asid;
svm->vmcb->control.tlb_ctl = TLB_CONTROL_FLUSH_ALL_ASID;
+ vmcb_mark_dirty(svm->vmcb, VMCB_ASID);
}
svm->asid_generation = sd->asid_generation;
- svm->vmcb->control.asid = sd->next_asid++;
-
- vmcb_mark_dirty(svm->vmcb, VMCB_ASID);
+ svm->asid = sd->next_asid++;
}
static void svm_set_dr6(struct vcpu_svm *svm, unsigned long value)
{
struct vmcb *vmcb = svm->vmcb;
+ if (svm->vcpu.arch.guest_state_protected)
+ return;
+
if (unlikely(value != vmcb->save.dr6)) {
vmcb->save.dr6 = value;
vmcb_mark_dirty(vmcb, VMCB_DR);
@@ -1773,6 +1852,9 @@ static void svm_sync_dirty_debug_regs(struct kvm_vcpu *vcpu)
{
struct vcpu_svm *svm = to_svm(vcpu);
+ if (vcpu->arch.guest_state_protected)
+ return;
+
get_debugreg(vcpu->arch.db[0], 0);
get_debugreg(vcpu->arch.db[1], 1);
get_debugreg(vcpu->arch.db[2], 2);
@@ -1791,6 +1873,9 @@ static void svm_set_dr7(struct kvm_vcpu *vcpu, unsigned long value)
{
struct vcpu_svm *svm = to_svm(vcpu);
+ if (vcpu->arch.guest_state_protected)
+ return;
+
svm->vmcb->save.dr7 = value;
vmcb_mark_dirty(svm->vmcb, VMCB_DR);
}
@@ -1929,25 +2014,6 @@ static bool is_erratum_383(void)
return true;
}
-/*
- * Trigger machine check on the host. We assume all the MSRs are already set up
- * by the CPU and that we still run on the same CPU as the MCE occurred on.
- * We pass a fake environment to the machine check handler because we want
- * the guest to be always treated like user space, no matter what context
- * it used internally.
- */
-static void kvm_machine_check(void)
-{
-#if defined(CONFIG_X86_MCE)
- struct pt_regs regs = {
- .cs = 3, /* Fake ring 3 no matter what the guest ran on */
- .flags = X86_EFLAGS_IF,
- };
-
- do_machine_check(&regs);
-#endif
-}
-
static void svm_handle_mce(struct vcpu_svm *svm)
{
if (is_erratum_383()) {
@@ -1979,6 +2045,13 @@ static int shutdown_interception(struct vcpu_svm *svm)
struct kvm_run *kvm_run = svm->vcpu.run;
/*
+ * The VM save area has already been encrypted so it
+ * cannot be reinitialized - just terminate.
+ */
+ if (sev_es_guest(svm->vcpu.kvm))
+ return -EINVAL;
+
+ /*
* VMCB is undefined after a SHUTDOWN intercept
* so reinitialize it.
*/
@@ -1999,11 +2072,16 @@ static int io_interception(struct vcpu_svm *svm)
++svm->vcpu.stat.io_exits;
string = (io_info & SVM_IOIO_STR_MASK) != 0;
in = (io_info & SVM_IOIO_TYPE_MASK) != 0;
- if (string)
- return kvm_emulate_instruction(vcpu, 0);
-
port = io_info >> 16;
size = (io_info & SVM_IOIO_SIZE_MASK) >> SVM_IOIO_SIZE_SHIFT;
+
+ if (string) {
+ if (sev_es_guest(vcpu->kvm))
+ return sev_es_string_io(svm, size, port, in);
+ else
+ return kvm_emulate_instruction(vcpu, 0);
+ }
+
svm->next_rip = svm->vmcb->control.exit_info_2;
return kvm_fast_pio(&svm->vcpu, size, port, in);
@@ -2267,9 +2345,11 @@ static int cpuid_interception(struct vcpu_svm *svm)
static int iret_interception(struct vcpu_svm *svm)
{
++svm->vcpu.stat.nmi_window_exits;
- svm_clr_intercept(svm, INTERCEPT_IRET);
svm->vcpu.arch.hflags |= HF_IRET_MASK;
- svm->nmi_iret_rip = kvm_rip_read(&svm->vcpu);
+ if (!sev_es_guest(svm->vcpu.kvm)) {
+ svm_clr_intercept(svm, INTERCEPT_IRET);
+ svm->nmi_iret_rip = kvm_rip_read(&svm->vcpu);
+ }
kvm_make_request(KVM_REQ_EVENT, &svm->vcpu);
return 1;
}
@@ -2406,6 +2486,41 @@ static int cr_interception(struct vcpu_svm *svm)
return kvm_complete_insn_gp(&svm->vcpu, err);
}
+static int cr_trap(struct vcpu_svm *svm)
+{
+ struct kvm_vcpu *vcpu = &svm->vcpu;
+ unsigned long old_value, new_value;
+ unsigned int cr;
+ int ret = 0;
+
+ new_value = (unsigned long)svm->vmcb->control.exit_info_1;
+
+ cr = svm->vmcb->control.exit_code - SVM_EXIT_CR0_WRITE_TRAP;
+ switch (cr) {
+ case 0:
+ old_value = kvm_read_cr0(vcpu);
+ svm_set_cr0(vcpu, new_value);
+
+ kvm_post_set_cr0(vcpu, old_value, new_value);
+ break;
+ case 4:
+ old_value = kvm_read_cr4(vcpu);
+ svm_set_cr4(vcpu, new_value);
+
+ kvm_post_set_cr4(vcpu, old_value, new_value);
+ break;
+ case 8:
+ ret = kvm_set_cr8(&svm->vcpu, new_value);
+ break;
+ default:
+ WARN(1, "unhandled CR%d write trap", cr);
+ kvm_queue_exception(vcpu, UD_VECTOR);
+ return 1;
+ }
+
+ return kvm_complete_insn_gp(vcpu, ret);
+}
+
static int dr_interception(struct vcpu_svm *svm)
{
int reg, dr;
@@ -2459,6 +2574,25 @@ static int cr8_write_interception(struct vcpu_svm *svm)
return 0;
}
+static int efer_trap(struct vcpu_svm *svm)
+{
+ struct msr_data msr_info;
+ int ret;
+
+ /*
+ * Clear the EFER_SVME bit from EFER. The SVM code always sets this
+ * bit in svm_set_efer(), but __kvm_valid_efer() checks it against
+ * whether the guest has X86_FEATURE_SVM - this avoids a failure if
+ * the guest doesn't have X86_FEATURE_SVM.
+ */
+ msr_info.host_initiated = false;
+ msr_info.index = MSR_EFER;
+ msr_info.data = svm->vmcb->control.exit_info_1 & ~EFER_SVME;
+ ret = kvm_set_msr_common(&svm->vcpu, &msr_info);
+
+ return kvm_complete_insn_gp(&svm->vcpu, ret);
+}
+
static int svm_get_msr_feature(struct kvm_msr_entry *msr)
{
msr->data = 0;
@@ -2541,10 +2675,7 @@ static int svm_get_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
break;
case MSR_IA32_SPEC_CTRL:
if (!msr_info->host_initiated &&
- !guest_cpuid_has(vcpu, X86_FEATURE_SPEC_CTRL) &&
- !guest_cpuid_has(vcpu, X86_FEATURE_AMD_STIBP) &&
- !guest_cpuid_has(vcpu, X86_FEATURE_AMD_IBRS) &&
- !guest_cpuid_has(vcpu, X86_FEATURE_AMD_SSBD))
+ !guest_has_spec_ctrl_msr(vcpu))
return 1;
msr_info->data = svm->spec_ctrl;
@@ -2582,6 +2713,20 @@ static int svm_get_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
return 0;
}
+static int svm_complete_emulated_msr(struct kvm_vcpu *vcpu, int err)
+{
+ struct vcpu_svm *svm = to_svm(vcpu);
+ if (!sev_es_guest(svm->vcpu.kvm) || !err)
+ return kvm_complete_insn_gp(&svm->vcpu, err);
+
+ ghcb_set_sw_exit_info_1(svm->ghcb, 1);
+ ghcb_set_sw_exit_info_2(svm->ghcb,
+ X86_TRAP_GP |
+ SVM_EVTINJ_TYPE_EXEPT |
+ SVM_EVTINJ_VALID);
+ return 1;
+}
+
static int rdmsr_interception(struct vcpu_svm *svm)
{
return kvm_emulate_rdmsr(&svm->vcpu);
@@ -2628,10 +2773,7 @@ static int svm_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr)
break;
case MSR_IA32_SPEC_CTRL:
if (!msr->host_initiated &&
- !guest_cpuid_has(vcpu, X86_FEATURE_SPEC_CTRL) &&
- !guest_cpuid_has(vcpu, X86_FEATURE_AMD_STIBP) &&
- !guest_cpuid_has(vcpu, X86_FEATURE_AMD_IBRS) &&
- !guest_cpuid_has(vcpu, X86_FEATURE_AMD_SSBD))
+ !guest_has_spec_ctrl_msr(vcpu))
return 1;
if (kvm_spec_ctrl_test_value(data))
@@ -2656,12 +2798,12 @@ static int svm_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr)
break;
case MSR_IA32_PRED_CMD:
if (!msr->host_initiated &&
- !guest_cpuid_has(vcpu, X86_FEATURE_AMD_IBPB))
+ !guest_has_pred_cmd_msr(vcpu))
return 1;
if (data & ~PRED_CMD_IBPB)
return 1;
- if (!boot_cpu_has(X86_FEATURE_AMD_IBPB))
+ if (!boot_cpu_has(X86_FEATURE_IBPB))
return 1;
if (!data)
break;
@@ -2803,7 +2945,14 @@ static int interrupt_window_interception(struct vcpu_svm *svm)
static int pause_interception(struct vcpu_svm *svm)
{
struct kvm_vcpu *vcpu = &svm->vcpu;
- bool in_kernel = (svm_get_cpl(vcpu) == 0);
+ bool in_kernel;
+
+ /*
+ * CPL is not made available for an SEV-ES guest, therefore
+ * vcpu->arch.preempted_in_kernel can never be true. Just
+ * set in_kernel to false as well.
+ */
+ in_kernel = !sev_es_guest(svm->vcpu.kvm) && svm_get_cpl(vcpu) == 0;
if (!kvm_pause_in_guest(vcpu->kvm))
grow_ple_window(vcpu);
@@ -2918,11 +3067,16 @@ static int (*const svm_exit_handlers[])(struct vcpu_svm *svm) = {
[SVM_EXIT_MWAIT] = mwait_interception,
[SVM_EXIT_XSETBV] = xsetbv_interception,
[SVM_EXIT_RDPRU] = rdpru_interception,
+ [SVM_EXIT_EFER_WRITE_TRAP] = efer_trap,
+ [SVM_EXIT_CR0_WRITE_TRAP] = cr_trap,
+ [SVM_EXIT_CR4_WRITE_TRAP] = cr_trap,
+ [SVM_EXIT_CR8_WRITE_TRAP] = cr_trap,
[SVM_EXIT_INVPCID] = invpcid_interception,
[SVM_EXIT_NPF] = npf_interception,
[SVM_EXIT_RSM] = rsm_interception,
[SVM_EXIT_AVIC_INCOMPLETE_IPI] = avic_incomplete_ipi_interception,
[SVM_EXIT_AVIC_UNACCELERATED_ACCESS] = avic_unaccelerated_access_interception,
+ [SVM_EXIT_VMGEXIT] = sev_handle_vmgexit,
};
static void dump_vmcb(struct kvm_vcpu *vcpu)
@@ -2964,6 +3118,7 @@ static void dump_vmcb(struct kvm_vcpu *vcpu)
pr_err("%-20s%lld\n", "nested_ctl:", control->nested_ctl);
pr_err("%-20s%016llx\n", "nested_cr3:", control->nested_cr3);
pr_err("%-20s%016llx\n", "avic_vapic_bar:", control->avic_vapic_bar);
+ pr_err("%-20s%016llx\n", "ghcb:", control->ghcb_gpa);
pr_err("%-20s%08x\n", "event_inj:", control->event_inj);
pr_err("%-20s%08x\n", "event_inj_err:", control->event_inj_err);
pr_err("%-20s%lld\n", "virt_ext:", control->virt_ext);
@@ -2971,6 +3126,7 @@ static void dump_vmcb(struct kvm_vcpu *vcpu)
pr_err("%-20s%016llx\n", "avic_backing_page:", control->avic_backing_page);
pr_err("%-20s%016llx\n", "avic_logical_id:", control->avic_logical_id);
pr_err("%-20s%016llx\n", "avic_physical_id:", control->avic_physical_id);
+ pr_err("%-20s%016llx\n", "vmsa_pa:", control->vmsa_pa);
pr_err("VMCB State Save Area:\n");
pr_err("%-5s s: %04x a: %04x l: %08x b: %016llx\n",
"es:",
@@ -3043,6 +3199,43 @@ static void dump_vmcb(struct kvm_vcpu *vcpu)
"excp_to:", save->last_excp_to);
}
+static int svm_handle_invalid_exit(struct kvm_vcpu *vcpu, u64 exit_code)
+{
+ if (exit_code < ARRAY_SIZE(svm_exit_handlers) &&
+ svm_exit_handlers[exit_code])
+ return 0;
+
+ vcpu_unimpl(vcpu, "svm: unexpected exit reason 0x%llx\n", exit_code);
+ dump_vmcb(vcpu);
+ vcpu->run->exit_reason = KVM_EXIT_INTERNAL_ERROR;
+ vcpu->run->internal.suberror = KVM_INTERNAL_ERROR_UNEXPECTED_EXIT_REASON;
+ vcpu->run->internal.ndata = 2;
+ vcpu->run->internal.data[0] = exit_code;
+ vcpu->run->internal.data[1] = vcpu->arch.last_vmentry_cpu;
+
+ return -EINVAL;
+}
+
+int svm_invoke_exit_handler(struct vcpu_svm *svm, u64 exit_code)
+{
+ if (svm_handle_invalid_exit(&svm->vcpu, exit_code))
+ return 0;
+
+#ifdef CONFIG_RETPOLINE
+ if (exit_code == SVM_EXIT_MSR)
+ return msr_interception(svm);
+ else if (exit_code == SVM_EXIT_VINTR)
+ return interrupt_window_interception(svm);
+ else if (exit_code == SVM_EXIT_INTR)
+ return intr_interception(svm);
+ else if (exit_code == SVM_EXIT_HLT)
+ return halt_interception(svm);
+ else if (exit_code == SVM_EXIT_NPF)
+ return npf_interception(svm);
+#endif
+ return svm_exit_handlers[exit_code](svm);
+}
+
static void svm_get_exit_info(struct kvm_vcpu *vcpu, u64 *info1, u64 *info2,
u32 *intr_info, u32 *error_code)
{
@@ -3066,10 +3259,13 @@ static int handle_exit(struct kvm_vcpu *vcpu, fastpath_t exit_fastpath)
trace_kvm_exit(exit_code, vcpu, KVM_ISA_SVM);
- if (!svm_is_intercept(svm, INTERCEPT_CR0_WRITE))
- vcpu->arch.cr0 = svm->vmcb->save.cr0;
- if (npt_enabled)
- vcpu->arch.cr3 = svm->vmcb->save.cr3;
+ /* SEV-ES guests must use the CR write traps to track CR registers. */
+ if (!sev_es_guest(vcpu->kvm)) {
+ if (!svm_is_intercept(svm, INTERCEPT_CR0_WRITE))
+ vcpu->arch.cr0 = svm->vmcb->save.cr0;
+ if (npt_enabled)
+ vcpu->arch.cr3 = svm->vmcb->save.cr3;
+ }
if (is_guest_mode(vcpu)) {
int vmexit;
@@ -3106,32 +3302,7 @@ static int handle_exit(struct kvm_vcpu *vcpu, fastpath_t exit_fastpath)
if (exit_fastpath != EXIT_FASTPATH_NONE)
return 1;
- if (exit_code >= ARRAY_SIZE(svm_exit_handlers)
- || !svm_exit_handlers[exit_code]) {
- vcpu_unimpl(vcpu, "svm: unexpected exit reason 0x%x\n", exit_code);
- dump_vmcb(vcpu);
- vcpu->run->exit_reason = KVM_EXIT_INTERNAL_ERROR;
- vcpu->run->internal.suberror =
- KVM_INTERNAL_ERROR_UNEXPECTED_EXIT_REASON;
- vcpu->run->internal.ndata = 2;
- vcpu->run->internal.data[0] = exit_code;
- vcpu->run->internal.data[1] = vcpu->arch.last_vmentry_cpu;
- return 0;
- }
-
-#ifdef CONFIG_RETPOLINE
- if (exit_code == SVM_EXIT_MSR)
- return msr_interception(svm);
- else if (exit_code == SVM_EXIT_VINTR)
- return interrupt_window_interception(svm);
- else if (exit_code == SVM_EXIT_INTR)
- return intr_interception(svm);
- else if (exit_code == SVM_EXIT_HLT)
- return halt_interception(svm);
- else if (exit_code == SVM_EXIT_NPF)
- return npf_interception(svm);
-#endif
- return svm_exit_handlers[exit_code](svm);
+ return svm_invoke_exit_handler(svm, exit_code);
}
static void reload_tss(struct kvm_vcpu *vcpu)
@@ -3160,7 +3331,8 @@ static void svm_inject_nmi(struct kvm_vcpu *vcpu)
svm->vmcb->control.event_inj = SVM_EVTINJ_VALID | SVM_EVTINJ_TYPE_NMI;
vcpu->arch.hflags |= HF_NMI_MASK;
- svm_set_intercept(svm, INTERCEPT_IRET);
+ if (!sev_es_guest(svm->vcpu.kvm))
+ svm_set_intercept(svm, INTERCEPT_IRET);
++vcpu->stat.nmi_injections;
}
@@ -3181,6 +3353,13 @@ static void update_cr8_intercept(struct kvm_vcpu *vcpu, int tpr, int irr)
{
struct vcpu_svm *svm = to_svm(vcpu);
+ /*
+ * SEV-ES guests must always keep the CR intercepts cleared. CR
+ * tracking is done using the CR write traps.
+ */
+ if (sev_es_guest(vcpu->kvm))
+ return;
+
if (nested_svm_virtualize_tpr(vcpu))
return;
@@ -3237,10 +3416,12 @@ static void svm_set_nmi_mask(struct kvm_vcpu *vcpu, bool masked)
if (masked) {
svm->vcpu.arch.hflags |= HF_NMI_MASK;
- svm_set_intercept(svm, INTERCEPT_IRET);
+ if (!sev_es_guest(svm->vcpu.kvm))
+ svm_set_intercept(svm, INTERCEPT_IRET);
} else {
svm->vcpu.arch.hflags &= ~HF_NMI_MASK;
- svm_clr_intercept(svm, INTERCEPT_IRET);
+ if (!sev_es_guest(svm->vcpu.kvm))
+ svm_clr_intercept(svm, INTERCEPT_IRET);
}
}
@@ -3252,7 +3433,14 @@ bool svm_interrupt_blocked(struct kvm_vcpu *vcpu)
if (!gif_set(svm))
return true;
- if (is_guest_mode(vcpu)) {
+ if (sev_es_guest(svm->vcpu.kvm)) {
+ /*
+ * SEV-ES guests to not expose RFLAGS. Use the VMCB interrupt mask
+ * bit to determine the state of the IF flag.
+ */
+ if (!(vmcb->control.int_state & SVM_GUEST_INTERRUPT_MASK))
+ return true;
+ } else if (is_guest_mode(vcpu)) {
/* As long as interrupts are being delivered... */
if ((svm->nested.ctl.int_ctl & V_INTR_MASKING_MASK)
? !(svm->nested.hsave->save.rflags & X86_EFLAGS_IF)
@@ -3411,8 +3599,9 @@ static void svm_complete_interrupts(struct vcpu_svm *svm)
* If we've made progress since setting HF_IRET_MASK, we've
* executed an IRET and can allow NMI injection.
*/
- if ((svm->vcpu.arch.hflags & HF_IRET_MASK)
- && kvm_rip_read(&svm->vcpu) != svm->nmi_iret_rip) {
+ if ((svm->vcpu.arch.hflags & HF_IRET_MASK) &&
+ (sev_es_guest(svm->vcpu.kvm) ||
+ kvm_rip_read(&svm->vcpu) != svm->nmi_iret_rip)) {
svm->vcpu.arch.hflags &= ~(HF_NMI_MASK | HF_IRET_MASK);
kvm_make_request(KVM_REQ_EVENT, &svm->vcpu);
}
@@ -3435,6 +3624,12 @@ static void svm_complete_interrupts(struct vcpu_svm *svm)
break;
case SVM_EXITINTINFO_TYPE_EXEPT:
/*
+ * Never re-inject a #VC exception.
+ */
+ if (vector == X86_TRAP_VC)
+ break;
+
+ /*
* In case of software exceptions, do not reinject the vector,
* but re-execute the instruction instead. Rewind RIP first
* if we emulated INT3 before.
@@ -3507,16 +3702,20 @@ static noinstr void svm_vcpu_enter_exit(struct kvm_vcpu *vcpu,
guest_enter_irqoff();
lockdep_hardirqs_on(CALLER_ADDR0);
- __svm_vcpu_run(svm->vmcb_pa, (unsigned long *)&svm->vcpu.arch.regs);
+ if (sev_es_guest(svm->vcpu.kvm)) {
+ __svm_sev_es_vcpu_run(svm->vmcb_pa);
+ } else {
+ __svm_vcpu_run(svm->vmcb_pa, (unsigned long *)&svm->vcpu.arch.regs);
#ifdef CONFIG_X86_64
- native_wrmsrl(MSR_GS_BASE, svm->host.gs_base);
+ native_wrmsrl(MSR_GS_BASE, svm->host.gs_base);
#else
- loadsegment(fs, svm->host.fs);
+ loadsegment(fs, svm->host.fs);
#ifndef CONFIG_X86_32_LAZY_GS
- loadsegment(gs, svm->host.gs);
+ loadsegment(gs, svm->host.gs);
#endif
#endif
+ }
/*
* VMEXIT disables interrupts (host state), but tracing and lockdep
@@ -3566,6 +3765,10 @@ static __no_kcsan fastpath_t svm_vcpu_run(struct kvm_vcpu *vcpu)
sync_lapic_to_cr8(vcpu);
+ if (unlikely(svm->asid != svm->vmcb->control.asid)) {
+ svm->vmcb->control.asid = svm->asid;
+ vmcb_mark_dirty(svm->vmcb, VMCB_ASID);
+ }
svm->vmcb->save.cr2 = vcpu->arch.cr2;
/*
@@ -3610,14 +3813,17 @@ static __no_kcsan fastpath_t svm_vcpu_run(struct kvm_vcpu *vcpu)
if (unlikely(!msr_write_intercepted(vcpu, MSR_IA32_SPEC_CTRL)))
svm->spec_ctrl = native_read_msr(MSR_IA32_SPEC_CTRL);
- reload_tss(vcpu);
+ if (!sev_es_guest(svm->vcpu.kvm))
+ reload_tss(vcpu);
x86_spec_ctrl_restore_host(svm->spec_ctrl, svm->virt_spec_ctrl);
- vcpu->arch.cr2 = svm->vmcb->save.cr2;
- vcpu->arch.regs[VCPU_REGS_RAX] = svm->vmcb->save.rax;
- vcpu->arch.regs[VCPU_REGS_RSP] = svm->vmcb->save.rsp;
- vcpu->arch.regs[VCPU_REGS_RIP] = svm->vmcb->save.rip;
+ if (!sev_es_guest(svm->vcpu.kvm)) {
+ vcpu->arch.cr2 = svm->vmcb->save.cr2;
+ vcpu->arch.regs[VCPU_REGS_RAX] = svm->vmcb->save.rax;
+ vcpu->arch.regs[VCPU_REGS_RSP] = svm->vmcb->save.rsp;
+ vcpu->arch.regs[VCPU_REGS_RIP] = svm->vmcb->save.rip;
+ }
if (unlikely(svm->vmcb->control.exit_code == SVM_EXIT_NMI))
kvm_before_interrupt(&svm->vcpu);
@@ -3720,12 +3926,21 @@ static bool svm_cpu_has_accelerated_tpr(void)
return false;
}
-static bool svm_has_emulated_msr(u32 index)
+/*
+ * The kvm parameter can be NULL (module initialization, or invocation before
+ * VM creation). Be sure to check the kvm parameter before using it.
+ */
+static bool svm_has_emulated_msr(struct kvm *kvm, u32 index)
{
switch (index) {
case MSR_IA32_MCG_EXT_CTL:
case MSR_IA32_VMX_BASIC ... MSR_IA32_VMX_VMFUNC:
return false;
+ case MSR_IA32_SMBASE:
+ /* SEV-ES guests do not support SMM, so report false */
+ if (kvm && sev_es_guest(kvm))
+ return false;
+ break;
default:
break;
}
@@ -3741,6 +3956,7 @@ static u64 svm_get_mt_mask(struct kvm_vcpu *vcpu, gfn_t gfn, bool is_mmio)
static void svm_vcpu_after_set_cpuid(struct kvm_vcpu *vcpu)
{
struct vcpu_svm *svm = to_svm(vcpu);
+ struct kvm_cpuid_entry2 *best;
vcpu->arch.xsaves_enabled = guest_cpuid_has(vcpu, X86_FEATURE_XSAVE) &&
boot_cpu_has(X86_FEATURE_XSAVE) &&
@@ -3753,6 +3969,13 @@ static void svm_vcpu_after_set_cpuid(struct kvm_vcpu *vcpu)
/* Check again if INVPCID interception if required */
svm_check_invpcid(svm);
+ /* For sev guests, the memory encryption bit is not reserved in CR3. */
+ if (sev_guest(vcpu->kvm)) {
+ best = kvm_find_cpuid_entry(vcpu, 0x8000001F, 0);
+ if (best)
+ vcpu->arch.cr3_lm_rsvd_bits &= ~(1UL << (best->ebx & 0x3f));
+ }
+
if (!kvm_vcpu_apicv_active(vcpu))
return;
@@ -4076,6 +4299,12 @@ static bool svm_can_emulate_instruction(struct kvm_vcpu *vcpu, void *insn, int i
unsigned long cr4;
/*
+ * When the guest is an SEV-ES guest, emulation is not possible.
+ */
+ if (sev_es_guest(vcpu->kvm))
+ return false;
+
+ /*
* Detect and workaround Errata 1096 Fam_17h_00_0Fh.
*
* Errata:
@@ -4207,6 +4436,7 @@ static struct kvm_x86_ops svm_x86_ops __initdata = {
.get_cpl = svm_get_cpl,
.get_cs_db_l_bits = kvm_get_cs_db_l_bits,
.set_cr0 = svm_set_cr0,
+ .is_valid_cr4 = svm_is_valid_cr4,
.set_cr4 = svm_set_cr4,
.set_efer = svm_set_efer,
.get_idt = svm_get_idt,
@@ -4295,6 +4525,7 @@ static struct kvm_x86_ops svm_x86_ops __initdata = {
.apic_init_signal_blocked = svm_apic_init_signal_blocked,
.msr_filter_changed = svm_msr_filter_changed,
+ .complete_emulated_msr = svm_complete_emulated_msr,
};
static struct kvm_x86_init_ops svm_init_ops __initdata = {
diff --git a/arch/x86/kvm/svm/svm.h b/arch/x86/kvm/svm/svm.h
index 1d853fe4c778..5431e6335e2e 100644
--- a/arch/x86/kvm/svm/svm.h
+++ b/arch/x86/kvm/svm/svm.h
@@ -17,21 +17,32 @@
#include <linux/kvm_types.h>
#include <linux/kvm_host.h>
+#include <linux/bits.h>
#include <asm/svm.h>
-static const u32 host_save_user_msrs[] = {
+#define __sme_page_pa(x) __sme_set(page_to_pfn(x) << PAGE_SHIFT)
+
+static const struct svm_host_save_msrs {
+ u32 index; /* Index of the MSR */
+ bool sev_es_restored; /* True if MSR is restored on SEV-ES VMEXIT */
+} host_save_user_msrs[] = {
#ifdef CONFIG_X86_64
- MSR_STAR, MSR_LSTAR, MSR_CSTAR, MSR_SYSCALL_MASK, MSR_KERNEL_GS_BASE,
- MSR_FS_BASE,
+ { .index = MSR_STAR, .sev_es_restored = true },
+ { .index = MSR_LSTAR, .sev_es_restored = true },
+ { .index = MSR_CSTAR, .sev_es_restored = true },
+ { .index = MSR_SYSCALL_MASK, .sev_es_restored = true },
+ { .index = MSR_KERNEL_GS_BASE, .sev_es_restored = true },
+ { .index = MSR_FS_BASE, .sev_es_restored = true },
#endif
- MSR_IA32_SYSENTER_CS, MSR_IA32_SYSENTER_ESP, MSR_IA32_SYSENTER_EIP,
- MSR_TSC_AUX,
+ { .index = MSR_IA32_SYSENTER_CS, .sev_es_restored = true },
+ { .index = MSR_IA32_SYSENTER_ESP, .sev_es_restored = true },
+ { .index = MSR_IA32_SYSENTER_EIP, .sev_es_restored = true },
+ { .index = MSR_TSC_AUX, .sev_es_restored = false },
};
-
#define NR_HOST_SAVE_USER_MSRS ARRAY_SIZE(host_save_user_msrs)
-#define MAX_DIRECT_ACCESS_MSRS 15
+#define MAX_DIRECT_ACCESS_MSRS 18
#define MSRPM_OFFSETS 16
extern u32 msrpm_offsets[MSRPM_OFFSETS] __read_mostly;
extern bool npt_enabled;
@@ -61,11 +72,13 @@ enum {
struct kvm_sev_info {
bool active; /* SEV enabled guest */
+ bool es_active; /* SEV-ES enabled guest */
unsigned int asid; /* ASID used for this guest */
unsigned int handle; /* SEV firmware handle */
int fd; /* SEV device fd */
unsigned long pages_locked; /* Number of pages locked */
struct list_head regions_list; /* List of registered regions */
+ u64 ap_jump_table; /* SEV-ES AP Jump Table address */
};
struct kvm_svm {
@@ -106,6 +119,7 @@ struct vcpu_svm {
struct vmcb *vmcb;
unsigned long vmcb_pa;
struct svm_cpu_data *svm_data;
+ u32 asid;
uint64_t asid_generation;
uint64_t sysenter_esp;
uint64_t sysenter_eip;
@@ -166,6 +180,17 @@ struct vcpu_svm {
DECLARE_BITMAP(read, MAX_DIRECT_ACCESS_MSRS);
DECLARE_BITMAP(write, MAX_DIRECT_ACCESS_MSRS);
} shadow_msr_intercept;
+
+ /* SEV-ES support */
+ struct vmcb_save_area *vmsa;
+ struct ghcb *ghcb;
+ struct kvm_host_map ghcb_map;
+
+ /* SEV-ES scratch area support */
+ void *ghcb_sa;
+ u64 ghcb_sa_len;
+ bool ghcb_sa_sync;
+ bool ghcb_sa_free;
};
struct svm_cpu_data {
@@ -193,6 +218,28 @@ static inline struct kvm_svm *to_kvm_svm(struct kvm *kvm)
return container_of(kvm, struct kvm_svm, kvm);
}
+static inline bool sev_guest(struct kvm *kvm)
+{
+#ifdef CONFIG_KVM_AMD_SEV
+ struct kvm_sev_info *sev = &to_kvm_svm(kvm)->sev_info;
+
+ return sev->active;
+#else
+ return false;
+#endif
+}
+
+static inline bool sev_es_guest(struct kvm *kvm)
+{
+#ifdef CONFIG_KVM_AMD_SEV
+ struct kvm_sev_info *sev = &to_kvm_svm(kvm)->sev_info;
+
+ return sev_guest(kvm) && sev->es_active;
+#else
+ return false;
+#endif
+}
+
static inline void vmcb_mark_all_dirty(struct vmcb *vmcb)
{
vmcb->control.clean = 0;
@@ -244,21 +291,24 @@ static inline void set_dr_intercepts(struct vcpu_svm *svm)
{
struct vmcb *vmcb = get_host_vmcb(svm);
- vmcb_set_intercept(&vmcb->control, INTERCEPT_DR0_READ);
- vmcb_set_intercept(&vmcb->control, INTERCEPT_DR1_READ);
- vmcb_set_intercept(&vmcb->control, INTERCEPT_DR2_READ);
- vmcb_set_intercept(&vmcb->control, INTERCEPT_DR3_READ);
- vmcb_set_intercept(&vmcb->control, INTERCEPT_DR4_READ);
- vmcb_set_intercept(&vmcb->control, INTERCEPT_DR5_READ);
- vmcb_set_intercept(&vmcb->control, INTERCEPT_DR6_READ);
+ if (!sev_es_guest(svm->vcpu.kvm)) {
+ vmcb_set_intercept(&vmcb->control, INTERCEPT_DR0_READ);
+ vmcb_set_intercept(&vmcb->control, INTERCEPT_DR1_READ);
+ vmcb_set_intercept(&vmcb->control, INTERCEPT_DR2_READ);
+ vmcb_set_intercept(&vmcb->control, INTERCEPT_DR3_READ);
+ vmcb_set_intercept(&vmcb->control, INTERCEPT_DR4_READ);
+ vmcb_set_intercept(&vmcb->control, INTERCEPT_DR5_READ);
+ vmcb_set_intercept(&vmcb->control, INTERCEPT_DR6_READ);
+ vmcb_set_intercept(&vmcb->control, INTERCEPT_DR0_WRITE);
+ vmcb_set_intercept(&vmcb->control, INTERCEPT_DR1_WRITE);
+ vmcb_set_intercept(&vmcb->control, INTERCEPT_DR2_WRITE);
+ vmcb_set_intercept(&vmcb->control, INTERCEPT_DR3_WRITE);
+ vmcb_set_intercept(&vmcb->control, INTERCEPT_DR4_WRITE);
+ vmcb_set_intercept(&vmcb->control, INTERCEPT_DR5_WRITE);
+ vmcb_set_intercept(&vmcb->control, INTERCEPT_DR6_WRITE);
+ }
+
vmcb_set_intercept(&vmcb->control, INTERCEPT_DR7_READ);
- vmcb_set_intercept(&vmcb->control, INTERCEPT_DR0_WRITE);
- vmcb_set_intercept(&vmcb->control, INTERCEPT_DR1_WRITE);
- vmcb_set_intercept(&vmcb->control, INTERCEPT_DR2_WRITE);
- vmcb_set_intercept(&vmcb->control, INTERCEPT_DR3_WRITE);
- vmcb_set_intercept(&vmcb->control, INTERCEPT_DR4_WRITE);
- vmcb_set_intercept(&vmcb->control, INTERCEPT_DR5_WRITE);
- vmcb_set_intercept(&vmcb->control, INTERCEPT_DR6_WRITE);
vmcb_set_intercept(&vmcb->control, INTERCEPT_DR7_WRITE);
recalc_intercepts(svm);
@@ -270,6 +320,12 @@ static inline void clr_dr_intercepts(struct vcpu_svm *svm)
vmcb->control.intercepts[INTERCEPT_DR] = 0;
+ /* DR7 access must remain intercepted for an SEV-ES guest */
+ if (sev_es_guest(svm->vcpu.kvm)) {
+ vmcb_set_intercept(&vmcb->control, INTERCEPT_DR7_READ);
+ vmcb_set_intercept(&vmcb->control, INTERCEPT_DR7_WRITE);
+ }
+
recalc_intercepts(svm);
}
@@ -351,6 +407,10 @@ static inline bool gif_set(struct vcpu_svm *svm)
#define MSR_CR3_LONG_MBZ_MASK 0xfff0000000000000U
#define MSR_INVALID 0xffffffffU
+extern int sev;
+extern int sev_es;
+extern bool dump_invalid_vmcb;
+
u32 svm_msrpm_offset(u32 msr);
u32 *svm_vcpu_alloc_msrpm(void);
void svm_vcpu_init_msrpm(struct kvm_vcpu *vcpu, u32 *msrpm);
@@ -358,13 +418,16 @@ void svm_vcpu_free_msrpm(u32 *msrpm);
int svm_set_efer(struct kvm_vcpu *vcpu, u64 efer);
void svm_set_cr0(struct kvm_vcpu *vcpu, unsigned long cr0);
-int svm_set_cr4(struct kvm_vcpu *vcpu, unsigned long cr4);
+void svm_set_cr4(struct kvm_vcpu *vcpu, unsigned long cr4);
void svm_flush_tlb(struct kvm_vcpu *vcpu);
void disable_nmi_singlestep(struct vcpu_svm *svm);
bool svm_smi_blocked(struct kvm_vcpu *vcpu);
bool svm_nmi_blocked(struct kvm_vcpu *vcpu);
bool svm_interrupt_blocked(struct kvm_vcpu *vcpu);
void svm_set_gif(struct vcpu_svm *svm, bool value);
+int svm_invoke_exit_handler(struct vcpu_svm *svm, u64 exit_code);
+void set_msr_interception(struct kvm_vcpu *vcpu, u32 *msrpm, u32 msr,
+ int read, int write);
/* nested.c */
@@ -470,18 +533,42 @@ void svm_vcpu_unblocking(struct kvm_vcpu *vcpu);
/* sev.c */
-extern unsigned int max_sev_asid;
+#define GHCB_VERSION_MAX 1ULL
+#define GHCB_VERSION_MIN 1ULL
+
+#define GHCB_MSR_INFO_POS 0
+#define GHCB_MSR_INFO_MASK (BIT_ULL(12) - 1)
+
+#define GHCB_MSR_SEV_INFO_RESP 0x001
+#define GHCB_MSR_SEV_INFO_REQ 0x002
+#define GHCB_MSR_VER_MAX_POS 48
+#define GHCB_MSR_VER_MAX_MASK 0xffff
+#define GHCB_MSR_VER_MIN_POS 32
+#define GHCB_MSR_VER_MIN_MASK 0xffff
+#define GHCB_MSR_CBIT_POS 24
+#define GHCB_MSR_CBIT_MASK 0xff
+#define GHCB_MSR_SEV_INFO(_max, _min, _cbit) \
+ ((((_max) & GHCB_MSR_VER_MAX_MASK) << GHCB_MSR_VER_MAX_POS) | \
+ (((_min) & GHCB_MSR_VER_MIN_MASK) << GHCB_MSR_VER_MIN_POS) | \
+ (((_cbit) & GHCB_MSR_CBIT_MASK) << GHCB_MSR_CBIT_POS) | \
+ GHCB_MSR_SEV_INFO_RESP)
+
+#define GHCB_MSR_CPUID_REQ 0x004
+#define GHCB_MSR_CPUID_RESP 0x005
+#define GHCB_MSR_CPUID_FUNC_POS 32
+#define GHCB_MSR_CPUID_FUNC_MASK 0xffffffff
+#define GHCB_MSR_CPUID_VALUE_POS 32
+#define GHCB_MSR_CPUID_VALUE_MASK 0xffffffff
+#define GHCB_MSR_CPUID_REG_POS 30
+#define GHCB_MSR_CPUID_REG_MASK 0x3
+
+#define GHCB_MSR_TERM_REQ 0x100
+#define GHCB_MSR_TERM_REASON_SET_POS 12
+#define GHCB_MSR_TERM_REASON_SET_MASK 0xf
+#define GHCB_MSR_TERM_REASON_POS 16
+#define GHCB_MSR_TERM_REASON_MASK 0xff
-static inline bool sev_guest(struct kvm *kvm)
-{
-#ifdef CONFIG_KVM_AMD_SEV
- struct kvm_sev_info *sev = &to_kvm_svm(kvm)->sev_info;
-
- return sev->active;
-#else
- return false;
-#endif
-}
+extern unsigned int max_sev_asid;
static inline bool svm_sev_enabled(void)
{
@@ -495,7 +582,19 @@ int svm_register_enc_region(struct kvm *kvm,
int svm_unregister_enc_region(struct kvm *kvm,
struct kvm_enc_region *range);
void pre_sev_run(struct vcpu_svm *svm, int cpu);
-int __init sev_hardware_setup(void);
+void __init sev_hardware_setup(void);
void sev_hardware_teardown(void);
+void sev_free_vcpu(struct kvm_vcpu *vcpu);
+int sev_handle_vmgexit(struct vcpu_svm *svm);
+int sev_es_string_io(struct vcpu_svm *svm, int size, unsigned int port, int in);
+void sev_es_init_vmcb(struct vcpu_svm *svm);
+void sev_es_create_vcpu(struct vcpu_svm *svm);
+void sev_es_vcpu_load(struct vcpu_svm *svm, int cpu);
+void sev_es_vcpu_put(struct vcpu_svm *svm);
+
+/* vmenter.S */
+
+void __svm_sev_es_vcpu_run(unsigned long vmcb_pa);
+void __svm_vcpu_run(unsigned long vmcb_pa, unsigned long *regs);
#endif
diff --git a/arch/x86/kvm/svm/vmenter.S b/arch/x86/kvm/svm/vmenter.S
index 1ec1ac40e328..6feb8c08f45a 100644
--- a/arch/x86/kvm/svm/vmenter.S
+++ b/arch/x86/kvm/svm/vmenter.S
@@ -168,3 +168,53 @@ SYM_FUNC_START(__svm_vcpu_run)
pop %_ASM_BP
ret
SYM_FUNC_END(__svm_vcpu_run)
+
+/**
+ * __svm_sev_es_vcpu_run - Run a SEV-ES vCPU via a transition to SVM guest mode
+ * @vmcb_pa: unsigned long
+ */
+SYM_FUNC_START(__svm_sev_es_vcpu_run)
+ push %_ASM_BP
+#ifdef CONFIG_X86_64
+ push %r15
+ push %r14
+ push %r13
+ push %r12
+#else
+ push %edi
+ push %esi
+#endif
+ push %_ASM_BX
+
+ /* Enter guest mode */
+ mov %_ASM_ARG1, %_ASM_AX
+ sti
+
+1: vmrun %_ASM_AX
+ jmp 3f
+2: cmpb $0, kvm_rebooting
+ jne 3f
+ ud2
+ _ASM_EXTABLE(1b, 2b)
+
+3: cli
+
+#ifdef CONFIG_RETPOLINE
+ /* IMPORTANT: Stuff the RSB immediately after VM-Exit, before RET! */
+ FILL_RETURN_BUFFER %_ASM_AX, RSB_CLEAR_LOOPS, X86_FEATURE_RETPOLINE
+#endif
+
+ pop %_ASM_BX
+
+#ifdef CONFIG_X86_64
+ pop %r12
+ pop %r13
+ pop %r14
+ pop %r15
+#else
+ pop %esi
+ pop %edi
+#endif
+ pop %_ASM_BP
+ ret
+SYM_FUNC_END(__svm_sev_es_vcpu_run)