53 files changed, 9925 insertions, 490 deletions
diff --git a/tools/testing/selftests/kvm/x86_64/amx_test.c b/tools/testing/selftests/kvm/x86_64/amx_test.c
new file mode 100644
index 000000000000..dadcbad10a1d
--- /dev/null
+++ b/tools/testing/selftests/kvm/x86_64/amx_test.c
@@ -0,0 +1,416 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * amx tests
+ *
+ * Copyright (C) 2021, Intel, Inc.
+ *
+ * Tests for amx #NM exception and save/restore.
+ */
+
+#define _GNU_SOURCE /* for program_invocation_short_name */
+#include <fcntl.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <sys/ioctl.h>
+#include <sys/syscall.h>
+
+#include "test_util.h"
+
+#include "kvm_util.h"
+#include "processor.h"
+#include "vmx.h"
+
+#ifndef __x86_64__
+# error This test is 64-bit only
+#endif
+
+#define NUM_TILES			8
+#define TILE_SIZE			1024
+#define XSAVE_SIZE			((NUM_TILES * TILE_SIZE) + PAGE_SIZE)
+
+/* Tile configuration associated: */
+#define MAX_TILES			16
+#define RESERVED_BYTES			14
+
+#define XFEATURE_XTILECFG		17
+#define XFEATURE_XTILEDATA		18
+#define XFEATURE_MASK_XTILECFG		(1 << XFEATURE_XTILECFG)
+#define XFEATURE_MASK_XTILEDATA		(1 << XFEATURE_XTILEDATA)
+#define XFEATURE_MASK_XTILE		(XFEATURE_MASK_XTILECFG | XFEATURE_MASK_XTILEDATA)
+
+#define TILE_CPUID			0x1d
+#define XSTATE_CPUID			0xd
+#define TILE_PALETTE_CPUID_SUBLEAVE	0x1
+#define XSTATE_USER_STATE_SUBLEAVE	0x0
+
+#define XSAVE_HDR_OFFSET		512
+
+struct xsave_data {
+	u8 area[XSAVE_SIZE];
+} __aligned(64);
+
+struct tile_config {
+	u8  palette_id;
+	u8  start_row;
+	u8  reserved[RESERVED_BYTES];
+	u16 colsb[MAX_TILES];
+	u8  rows[MAX_TILES];
+};
+
+struct tile_data {
+	u8 data[NUM_TILES * TILE_SIZE];
+};
+
+struct xtile_info {
+	u16 bytes_per_tile;
+	u16 bytes_per_row;
+	u16 max_names;
+	u16 max_rows;
+	u32 xsave_offset;
+	u32 xsave_size;
+};
+
+static struct xtile_info xtile;
+
+static inline u64 __xgetbv(u32 index)
+{
+	u32 eax, edx;
+
+	asm volatile("xgetbv;"
+		     : "=a" (eax), "=d" (edx)
+		     : "c" (index));
+	return eax + ((u64)edx << 32);
+}
+
+static inline void __xsetbv(u32 index, u64 value)
+{
+	u32 eax = value;
+	u32 edx = value >> 32;
+
+	asm volatile("xsetbv" :: "a" (eax), "d" (edx), "c" (index));
+}
+
+static inline void __ldtilecfg(void *cfg)
+{
+	asm volatile(".byte 0xc4,0xe2,0x78,0x49,0x00"
+		     : : "a"(cfg));
+}
+
+static inline void __tileloadd(void *tile)
+{
+	asm volatile(".byte 0xc4,0xe2,0x7b,0x4b,0x04,0x10"
+		     : : "a"(tile), "d"(0));
+}
+
+static inline void __tilerelease(void)
+{
+	asm volatile(".byte 0xc4, 0xe2, 0x78, 0x49, 0xc0" ::);
+}
+
+static inline void __xsavec(struct xsave_data *data, uint64_t rfbm)
+{
+	uint32_t rfbm_lo = rfbm;
+	uint32_t rfbm_hi = rfbm >> 32;
+
+	asm volatile("xsavec (%%rdi)"
+		     : : "D" (data), "a" (rfbm_lo), "d" (rfbm_hi)
+		     : "memory");
+}
+
+static inline void check_cpuid_xsave(void)
+{
+	GUEST_ASSERT(this_cpu_has(X86_FEATURE_XSAVE));
+	GUEST_ASSERT(this_cpu_has(X86_FEATURE_OSXSAVE));
+}
+
+static bool check_xsave_supports_xtile(void)
+{
+	return __xgetbv(0) & XFEATURE_MASK_XTILE;
+}
+
+static bool enum_xtile_config(void)
+{
+	u32 eax, ebx, ecx, edx;
+
+	__cpuid(TILE_CPUID, TILE_PALETTE_CPUID_SUBLEAVE, &eax, &ebx, &ecx, &edx);
+	if (!eax || !ebx || !ecx)
+		return false;
+
+	xtile.max_names = ebx >> 16;
+	if (xtile.max_names < NUM_TILES)
+		return false;
+
+	xtile.bytes_per_tile = eax >> 16;
+	if (xtile.bytes_per_tile < TILE_SIZE)
+		return false;
+
+	xtile.bytes_per_row = ebx;
+	xtile.max_rows = ecx;
+
+	return true;
+}
+
+static bool enum_xsave_tile(void)
+{
+	u32 eax, ebx, ecx, edx;
+
+	__cpuid(XSTATE_CPUID, XFEATURE_XTILEDATA, &eax, &ebx, &ecx, &edx);
+	if (!eax || !ebx)
+		return false;
+
+	xtile.xsave_offset = ebx;
+	xtile.xsave_size = eax;
+
+	return true;
+}
+
+static bool check_xsave_size(void)
+{
+	u32 eax, ebx, ecx, edx;
+	bool valid = false;
+
+	__cpuid(XSTATE_CPUID, XSTATE_USER_STATE_SUBLEAVE, &eax, &ebx, &ecx, &edx);
+	if (ebx && ebx <= XSAVE_SIZE)
+		valid = true;
+
+	return valid;
+}
+
+static bool check_xtile_info(void)
+{
+	bool ret = false;
+
+	if (!check_xsave_size())
+		return ret;
+
+	if (!enum_xsave_tile())
+		return ret;
+
+	if (!enum_xtile_config())
+		return ret;
+
+	if (sizeof(struct tile_data) >= xtile.xsave_size)
+		ret = true;
+
+	return ret;
+}
+
+static void set_tilecfg(struct tile_config *cfg)
+{
+	int i;
+
+	/* Only palette id 1 */
+	cfg->palette_id = 1;
+	for (i = 0; i < xtile.max_names; i++) {
+		cfg->colsb[i] = xtile.bytes_per_row;
+		cfg->rows[i] = xtile.max_rows;
+	}
+}
+
+static void set_xstatebv(void *data, uint64_t bv)
+{
+	*(uint64_t *)(data + XSAVE_HDR_OFFSET) = bv;
+}
+
+static u64 get_xstatebv(void *data)
+{
+	return *(u64 *)(data + XSAVE_HDR_OFFSET);
+}
+
+static void init_regs(void)
+{
+	uint64_t cr4, xcr0;
+
+	/* turn on CR4.OSXSAVE */
+	cr4 = get_cr4();
+	cr4 |= X86_CR4_OSXSAVE;
+	set_cr4(cr4);
+
+	xcr0 = __xgetbv(0);
+	xcr0 |= XFEATURE_MASK_XTILE;
+	__xsetbv(0x0, xcr0);
+}
+
+static void __attribute__((__flatten__)) guest_code(struct tile_config *amx_cfg,
+						    struct tile_data *tiledata,
+						    struct xsave_data *xsave_data)
+{
+	init_regs();
+	check_cpuid_xsave();
+	GUEST_ASSERT(check_xsave_supports_xtile());
+	GUEST_ASSERT(check_xtile_info());
+
+	/* check xtile configs */
+	GUEST_ASSERT(xtile.xsave_offset == 2816);
+	GUEST_ASSERT(xtile.xsave_size == 8192);
+	GUEST_ASSERT(xtile.max_names == 8);
+	GUEST_ASSERT(xtile.bytes_per_tile == 1024);
+	GUEST_ASSERT(xtile.bytes_per_row == 64);
+	GUEST_ASSERT(xtile.max_rows == 16);
+	GUEST_SYNC(1);
+
+	/* xfd=0, enable amx */
+	wrmsr(MSR_IA32_XFD, 0);
+	GUEST_SYNC(2);
+	GUEST_ASSERT(rdmsr(MSR_IA32_XFD) == 0);
+	set_tilecfg(amx_cfg);
+	__ldtilecfg(amx_cfg);
+	GUEST_SYNC(3);
+	/* Check save/restore when trap to userspace */
+	__tileloadd(tiledata);
+	GUEST_SYNC(4);
+	__tilerelease();
+	GUEST_SYNC(5);
+	/* bit 18 not in the XCOMP_BV after xsavec() */
+	set_xstatebv(xsave_data, XFEATURE_MASK_XTILEDATA);
+	__xsavec(xsave_data, XFEATURE_MASK_XTILEDATA);
+	GUEST_ASSERT((get_xstatebv(xsave_data) & XFEATURE_MASK_XTILEDATA) == 0);
+
+	/* xfd=0x40000, disable amx tiledata */
+	wrmsr(MSR_IA32_XFD, XFEATURE_MASK_XTILEDATA);
+	GUEST_SYNC(6);
+	GUEST_ASSERT(rdmsr(MSR_IA32_XFD) == XFEATURE_MASK_XTILEDATA);
+	set_tilecfg(amx_cfg);
+	__ldtilecfg(amx_cfg);
+	/* Trigger #NM exception */
+	__tileloadd(tiledata);
+	GUEST_SYNC(10);
+
+	GUEST_DONE();
+}
+
+void guest_nm_handler(struct ex_regs *regs)
+{
+	/* Check if #NM is triggered by XFEATURE_MASK_XTILEDATA */
+	GUEST_SYNC(7);
+	GUEST_ASSERT(rdmsr(MSR_IA32_XFD_ERR) == XFEATURE_MASK_XTILEDATA);
+	GUEST_SYNC(8);
+	GUEST_ASSERT(rdmsr(MSR_IA32_XFD_ERR) == XFEATURE_MASK_XTILEDATA);
+	/* Clear xfd_err */
+	wrmsr(MSR_IA32_XFD_ERR, 0);
+	/* xfd=0, enable amx */
+	wrmsr(MSR_IA32_XFD, 0);
+	GUEST_SYNC(9);
+}
+
+int main(int argc, char *argv[])
+{
+	struct kvm_regs regs1, regs2;
+	struct kvm_vcpu *vcpu;
+	struct kvm_vm *vm;
+	struct kvm_run *run;
+	struct kvm_x86_state *state;
+	int xsave_restore_size;
+	vm_vaddr_t amx_cfg, tiledata, xsavedata;
+	struct ucall uc;
+	u32 amx_offset;
+	int stage, ret;
+
+	vm_xsave_require_permission(XSTATE_XTILE_DATA_BIT);
+
+	/* Create VM */
+	vm = vm_create_with_one_vcpu(&vcpu, guest_code);
+
+	TEST_REQUIRE(kvm_cpu_has(X86_FEATURE_XSAVE));
+	TEST_REQUIRE(kvm_cpu_has(X86_FEATURE_AMX_TILE));
+	TEST_REQUIRE(kvm_cpu_has(X86_FEATURE_XTILECFG));
+	TEST_REQUIRE(kvm_cpu_has(X86_FEATURE_XTILEDATA));
+
+	/* Get xsave/restore max size */
+	xsave_restore_size = kvm_get_supported_cpuid_entry(0xd)->ecx;
+
+	run = vcpu->run;
+	vcpu_regs_get(vcpu, &regs1);
+
+	/* Register #NM handler */
+	vm_init_descriptor_tables(vm);
+	vcpu_init_descriptor_tables(vcpu);
+	vm_install_exception_handler(vm, NM_VECTOR, guest_nm_handler);
+
+	/* amx cfg for guest_code */
+	amx_cfg = vm_vaddr_alloc_page(vm);
+	memset(addr_gva2hva(vm, amx_cfg), 0x0, getpagesize());
+
+	/* amx tiledata for guest_code */
+	tiledata = vm_vaddr_alloc_pages(vm, 2);
+	memset(addr_gva2hva(vm, tiledata), rand() | 1, 2 * getpagesize());
+
+	/* xsave data for guest_code */
+	xsavedata = vm_vaddr_alloc_pages(vm, 3);
+	memset(addr_gva2hva(vm, xsavedata), 0, 3 * getpagesize());
+	vcpu_args_set(vcpu, 3, amx_cfg, tiledata, xsavedata);
+
+	for (stage = 1; ; stage++) {
+		vcpu_run(vcpu);
+		TEST_ASSERT(run->exit_reason == KVM_EXIT_IO,
+			    "Stage %d: unexpected exit reason: %u (%s),\n",
+			    stage, run->exit_reason,
+			    exit_reason_str(run->exit_reason));
+
+		switch (get_ucall(vcpu, &uc)) {
+		case UCALL_ABORT:
+			REPORT_GUEST_ASSERT(uc);
+			/* NOT REACHED */
+		case UCALL_SYNC:
+			switch (uc.args[1]) {
+			case 1:
+			case 2:
+			case 3:
+			case 5:
+			case 6:
+			case 7:
+			case 8:
+				fprintf(stderr, "GUEST_SYNC(%ld)\n", uc.args[1]);
+				break;
+			case 4:
+			case 10:
+				fprintf(stderr,
+				"GUEST_SYNC(%ld), check save/restore status\n", uc.args[1]);
+
+				/* Compacted mode, get amx offset by xsave area
+				 * size subtract 8K amx size.
+				 */
+				amx_offset = xsave_restore_size - NUM_TILES*TILE_SIZE;
+				state = vcpu_save_state(vcpu);
+				void *amx_start = (void *)state->xsave + amx_offset;
+				void *tiles_data = (void *)addr_gva2hva(vm, tiledata);
+				/* Only check TMM0 register, 1 tile */
+				ret = memcmp(amx_start, tiles_data, TILE_SIZE);
+				TEST_ASSERT(ret == 0, "memcmp failed, ret=%d\n", ret);
+				kvm_x86_state_cleanup(state);
+				break;
+			case 9:
+				fprintf(stderr,
+				"GUEST_SYNC(%ld), #NM exception and enable amx\n", uc.args[1]);
+				break;
+			}
+			break;
+		case UCALL_DONE:
+			fprintf(stderr, "UCALL_DONE\n");
+			goto done;
+		default:
+			TEST_FAIL("Unknown ucall %lu", uc.cmd);
+		}
+
+		state = vcpu_save_state(vcpu);
+		memset(&regs1, 0, sizeof(regs1));
+		vcpu_regs_get(vcpu, &regs1);
+
+		kvm_vm_release(vm);
+
+		/* Restore state in a new VM.  */
+		vcpu = vm_recreate_with_one_vcpu(vm);
+		vcpu_load_state(vcpu, state);
+		run = vcpu->run;
+		kvm_x86_state_cleanup(state);
+
+		memset(&regs2, 0, sizeof(regs2));
+		vcpu_regs_get(vcpu, &regs2);
+		TEST_ASSERT(!memcmp(&regs1, &regs2, sizeof(regs2)),
+			    "Unexpected register values after vcpu_load_state; rdi: %lx rsi: %lx",
+			    (ulong) regs2.rdi, (ulong) regs2.rsi);
+	}
+done:
+	kvm_vm_free(vm);
+}
diff --git a/tools/testing/selftests/kvm/x86_64/cpuid_test.c b/tools/testing/selftests/kvm/x86_64/cpuid_test.c
new file mode 100644
index 000000000000..a6aeee2e62e4
--- /dev/null
+++ b/tools/testing/selftests/kvm/x86_64/cpuid_test.c
@@ -0,0 +1,196 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Copyright (C) 2021, Red Hat Inc.
+ *
+ * Generic tests for KVM CPUID set/get ioctls
+ */
+#include <asm/kvm_para.h>
+#include <linux/kvm_para.h>
+#include <stdint.h>
+
+#include "test_util.h"
+#include "kvm_util.h"
+#include "processor.h"
+
+/* CPUIDs known to differ */
+struct {
+	u32 function;
+	u32 index;
+} mangled_cpuids[] = {
+	/*
+	 * These entries depend on the vCPU's XCR0 register and IA32_XSS MSR,
+	 * which are not controlled for by this test.
+	 */
+	{.function = 0xd, .index = 0},
+	{.function = 0xd, .index = 1},
+};
+
+static void test_guest_cpuids(struct kvm_cpuid2 *guest_cpuid)
+{
+	int i;
+	u32 eax, ebx, ecx, edx;
+
+	for (i = 0; i < guest_cpuid->nent; i++) {
+		__cpuid(guest_cpuid->entries[i].function,
+			guest_cpuid->entries[i].index,
+			&eax, &ebx, &ecx, &edx);
+
+		GUEST_ASSERT(eax == guest_cpuid->entries[i].eax &&
+			     ebx == guest_cpuid->entries[i].ebx &&
+			     ecx == guest_cpuid->entries[i].ecx &&
+			     edx == guest_cpuid->entries[i].edx);
+	}
+
+}
+
+static void test_cpuid_40000000(struct kvm_cpuid2 *guest_cpuid)
+{
+	u32 eax, ebx, ecx, edx;
+
+	cpuid(0x40000000, &eax, &ebx, &ecx, &edx);
+
+	GUEST_ASSERT(eax == 0x40000001);
+}
+
+static void guest_main(struct kvm_cpuid2 *guest_cpuid)
+{
+	GUEST_SYNC(1);
+
+	test_guest_cpuids(guest_cpuid);
+
+	GUEST_SYNC(2);
+
+	test_cpuid_40000000(guest_cpuid);
+
+	GUEST_DONE();
+}
+
+static bool is_cpuid_mangled(const struct kvm_cpuid_entry2 *entrie)
+{
+	int i;
+
+	for (i = 0; i < sizeof(mangled_cpuids); i++) {
+		if (mangled_cpuids[i].function == entrie->function &&
+		    mangled_cpuids[i].index == entrie->index)
+			return true;
+	}
+
+	return false;
+}
+
+static void compare_cpuids(const struct kvm_cpuid2 *cpuid1,
+			   const struct kvm_cpuid2 *cpuid2)
+{
+	const struct kvm_cpuid_entry2 *e1, *e2;
+	int i;
+
+	TEST_ASSERT(cpuid1->nent == cpuid2->nent,
+		    "CPUID nent mismatch: %d vs. %d", cpuid1->nent, cpuid2->nent);
+
+	for (i = 0; i < cpuid1->nent; i++) {
+		e1 = &cpuid1->entries[i];
+		e2 = &cpuid2->entries[i];
+
+		TEST_ASSERT(e1->function == e2->function &&
+			    e1->index == e2->index && e1->flags == e2->flags,
+			    "CPUID entries[%d] mismtach: 0x%x.%d.%x vs. 0x%x.%d.%x\n",
+			    i, e1->function, e1->index, e1->flags,
+			    e2->function, e2->index, e2->flags);
+
+		if (is_cpuid_mangled(e1))
+			continue;
+
+		TEST_ASSERT(e1->eax == e2->eax && e1->ebx == e2->ebx &&
+			    e1->ecx == e2->ecx && e1->edx == e2->edx,
+			    "CPUID 0x%x.%x differ: 0x%x:0x%x:0x%x:0x%x vs 0x%x:0x%x:0x%x:0x%x",
+			    e1->function, e1->index,
+			    e1->eax, e1->ebx, e1->ecx, e1->edx,
+			    e2->eax, e2->ebx, e2->ecx, e2->edx);
+	}
+}
+
+static void run_vcpu(struct kvm_vcpu *vcpu, int stage)
+{
+	struct ucall uc;
+
+	vcpu_run(vcpu);
+
+	switch (get_ucall(vcpu, &uc)) {
+	case UCALL_SYNC:
+		TEST_ASSERT(!strcmp((const char *)uc.args[0], "hello") &&
+			    uc.args[1] == stage + 1,
+			    "Stage %d: Unexpected register values vmexit, got %lx",
+			    stage + 1, (ulong)uc.args[1]);
+		return;
+	case UCALL_DONE:
+		return;
+	case UCALL_ABORT:
+		REPORT_GUEST_ASSERT_2(uc, "values: %#lx, %#lx");
+	default:
+		TEST_ASSERT(false, "Unexpected exit: %s",
+			    exit_reason_str(vcpu->run->exit_reason));
+	}
+}
+
+struct kvm_cpuid2 *vcpu_alloc_cpuid(struct kvm_vm *vm, vm_vaddr_t *p_gva, struct kvm_cpuid2 *cpuid)
+{
+	int size = sizeof(*cpuid) + cpuid->nent * sizeof(cpuid->entries[0]);
+	vm_vaddr_t gva = vm_vaddr_alloc(vm, size, KVM_UTIL_MIN_VADDR);
+	struct kvm_cpuid2 *guest_cpuids = addr_gva2hva(vm, gva);
+
+	memcpy(guest_cpuids, cpuid, size);
+
+	*p_gva = gva;
+	return guest_cpuids;
+}
+
+static void set_cpuid_after_run(struct kvm_vcpu *vcpu)
+{
+	struct kvm_cpuid_entry2 *ent;
+	int rc;
+	u32 eax, ebx, x;
+
+	/* Setting unmodified CPUID is allowed */
+	rc = __vcpu_set_cpuid(vcpu);
+	TEST_ASSERT(!rc, "Setting unmodified CPUID after KVM_RUN failed: %d", rc);
+
+	/* Changing CPU features is forbidden */
+	ent = vcpu_get_cpuid_entry(vcpu, 0x7);
+	ebx = ent->ebx;
+	ent->ebx--;
+	rc = __vcpu_set_cpuid(vcpu);
+	TEST_ASSERT(rc, "Changing CPU features should fail");
+	ent->ebx = ebx;
+
+	/* Changing MAXPHYADDR is forbidden */
+	ent = vcpu_get_cpuid_entry(vcpu, 0x80000008);
+	eax = ent->eax;
+	x = eax & 0xff;
+	ent->eax = (eax & ~0xffu) | (x - 1);
+	rc = __vcpu_set_cpuid(vcpu);
+	TEST_ASSERT(rc, "Changing MAXPHYADDR should fail");
+	ent->eax = eax;
+}
+
+int main(void)
+{
+	struct kvm_vcpu *vcpu;
+	vm_vaddr_t cpuid_gva;
+	struct kvm_vm *vm;
+	int stage;
+
+	vm = vm_create_with_one_vcpu(&vcpu, guest_main);
+
+	compare_cpuids(kvm_get_supported_cpuid(), vcpu->cpuid);
+
+	vcpu_alloc_cpuid(vm, &cpuid_gva, vcpu->cpuid);
+
+	vcpu_args_set(vcpu, 1, cpuid_gva);
+
+	for (stage = 0; stage < 3; stage++)
+		run_vcpu(vcpu, stage);
+
+	set_cpuid_after_run(vcpu);
+
+	kvm_vm_free(vm);
+}
diff --git a/tools/testing/selftests/kvm/x86_64/cr4_cpuid_sync_test.c b/tools/testing/selftests/kvm/x86_64/cr4_cpuid_sync_test.c
index 63cc9c3f5ab6..4208487652f8 100644
--- a/tools/testing/selftests/kvm/x86_64/cr4_cpuid_sync_test.c
+++ b/tools/testing/selftests/kvm/x86_64/cr4_cpuid_sync_test.c
@@ -19,25 +19,11 @@
 #include "kvm_util.h"
 #include "processor.h"
 
-#define X86_FEATURE_XSAVE	(1<<26)
-#define X86_FEATURE_OSXSAVE	(1<<27)
-#define VCPU_ID			1
-
 static inline bool cr4_cpuid_is_sync(void)
 {
-	int func, subfunc;
-	uint32_t eax, ebx, ecx, edx;
-	uint64_t cr4;
-
-	func = 0x1;
-	subfunc = 0x0;
-	__asm__ __volatile__("cpuid"
-			     : "=a"(eax), "=b"(ebx), "=c"(ecx), "=d"(edx)
-			     : "a"(func), "c"(subfunc));
-
-	cr4 = get_cr4();
+	uint64_t cr4 = get_cr4();
 
-	return (!!(ecx & X86_FEATURE_OSXSAVE)) == (!!(cr4 & X86_CR4_OSXSAVE));
+	return (this_cpu_has(X86_FEATURE_OSXSAVE) == !!(cr4 & X86_CR4_OSXSAVE));
 }
 
 static void guest_code(void)
@@ -63,55 +49,46 @@ static void guest_code(void)
 
 int main(int argc, char *argv[])
 {
+	struct kvm_vcpu *vcpu;
 	struct kvm_run *run;
 	struct kvm_vm *vm;
 	struct kvm_sregs sregs;
-	struct kvm_cpuid_entry2 *entry;
 	struct ucall uc;
-	int rc;
 
-	entry = kvm_get_supported_cpuid_entry(1);
-	if (!(entry->ecx & X86_FEATURE_XSAVE)) {
-		printf("XSAVE feature not supported, skipping test\n");
-		return 0;
-	}
+	TEST_REQUIRE(kvm_cpu_has(X86_FEATURE_XSAVE));
 
 	/* Tell stdout not to buffer its content */
 	setbuf(stdout, NULL);
 
-	/* Create VM */
-	vm = vm_create_default(VCPU_ID, 0, guest_code);
-	vcpu_set_cpuid(vm, VCPU_ID, kvm_get_supported_cpuid());
-	run = vcpu_state(vm, VCPU_ID);
+	vm = vm_create_with_one_vcpu(&vcpu, guest_code);
+	run = vcpu->run;
 
 	while (1) {
-		rc = _vcpu_run(vm, VCPU_ID);
+		vcpu_run(vcpu);
 
-		TEST_ASSERT(rc == 0, "vcpu_run failed: %d\n", rc);
 		TEST_ASSERT(run->exit_reason == KVM_EXIT_IO,
 			    "Unexpected exit reason: %u (%s),\n",
 			    run->exit_reason,
 			    exit_reason_str(run->exit_reason));
 
-		switch (get_ucall(vm, VCPU_ID, &uc)) {
+		switch (get_ucall(vcpu, &uc)) {
 		case UCALL_SYNC:
 			/* emulate hypervisor clearing CR4.OSXSAVE */
-			vcpu_sregs_get(vm, VCPU_ID, &sregs);
+			vcpu_sregs_get(vcpu, &sregs);
 			sregs.cr4 &= ~X86_CR4_OSXSAVE;
-			vcpu_sregs_set(vm, VCPU_ID, &sregs);
+			vcpu_sregs_set(vcpu, &sregs);
 			break;
 		case UCALL_ABORT:
-			TEST_ASSERT(false, "Guest CR4 bit (OSXSAVE) unsynchronized with CPUID bit.");
+			REPORT_GUEST_ASSERT(uc);
 			break;
 		case UCALL_DONE:
 			goto done;
 		default:
-			TEST_ASSERT(false, "Unknown ucall 0x%x.", uc.cmd);
+			TEST_FAIL("Unknown ucall %lu", uc.cmd);
 		}
 	}
 
-	kvm_vm_free(vm);
-
 done:
+	kvm_vm_free(vm);
 	return 0;
 }
diff --git a/tools/testing/selftests/kvm/x86_64/debug_regs.c b/tools/testing/selftests/kvm/x86_64/debug_regs.c
new file mode 100644
index 000000000000..7ef99c3359a0
--- /dev/null
+++ b/tools/testing/selftests/kvm/x86_64/debug_regs.c
@@ -0,0 +1,214 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * KVM guest debug register tests
+ *
+ * Copyright (C) 2020, Red Hat, Inc.
+ */
+#include <stdio.h>
+#include <string.h>
+#include "kvm_util.h"
+#include "processor.h"
+#include "apic.h"
+
+#define DR6_BD		(1 << 13)
+#define DR7_GD		(1 << 13)
+
+#define IRQ_VECTOR 0xAA
+
+/* For testing data access debug BP */
+uint32_t guest_value;
+
+extern unsigned char sw_bp, hw_bp, write_data, ss_start, bd_start;
+
+static void guest_code(void)
+{
+	/* Create a pending interrupt on current vCPU */
+	x2apic_enable();
+	x2apic_write_reg(APIC_ICR, APIC_DEST_SELF | APIC_INT_ASSERT |
+			 APIC_DM_FIXED | IRQ_VECTOR);
+
+	/*
+	 * Software BP tests.
+	 *
+	 * NOTE: sw_bp need to be before the cmd here, because int3 is an
+	 * exception rather than a normal trap for KVM_SET_GUEST_DEBUG (we
+	 * capture it using the vcpu exception bitmap).
+	 */
+	asm volatile("sw_bp: int3");
+
+	/* Hardware instruction BP test */
+	asm volatile("hw_bp: nop");
+
+	/* Hardware data BP test */
+	asm volatile("mov $1234,%%rax;\n\t"
+		     "mov %%rax,%0;\n\t write_data:"
+		     : "=m" (guest_value) : : "rax");
+
+	/*
+	 * Single step test, covers 2 basic instructions and 2 emulated
+	 *
+	 * Enable interrupts during the single stepping to see that
+	 * pending interrupt we raised is not handled due to KVM_GUESTDBG_BLOCKIRQ
+	 */
+	asm volatile("ss_start: "
+		     "sti\n\t"
+		     "xor %%eax,%%eax\n\t"
+		     "cpuid\n\t"
+		     "movl $0x1a0,%%ecx\n\t"
+		     "rdmsr\n\t"
+		     "cli\n\t"
+		     : : : "eax", "ebx", "ecx", "edx");
+
+	/* DR6.BD test */
+	asm volatile("bd_start: mov %%dr0, %%rax" : : : "rax");
+	GUEST_DONE();
+}
+
+#define  CAST_TO_RIP(v)  ((unsigned long long)&(v))
+
+static void vcpu_skip_insn(struct kvm_vcpu *vcpu, int insn_len)
+{
+	struct kvm_regs regs;
+
+	vcpu_regs_get(vcpu, &regs);
+	regs.rip += insn_len;
+	vcpu_regs_set(vcpu, &regs);
+}
+
+int main(void)
+{
+	struct kvm_guest_debug debug;
+	unsigned long long target_dr6, target_rip;
+	struct kvm_vcpu *vcpu;
+	struct kvm_run *run;
+	struct kvm_vm *vm;
+	struct ucall uc;
+	uint64_t cmd;
+	int i;
+	/* Instruction lengths starting at ss_start */
+	int ss_size[6] = {
+		1,		/* sti*/
+		2,		/* xor */
+		2,		/* cpuid */
+		5,		/* mov */
+		2,		/* rdmsr */
+		1,		/* cli */
+	};
+
+	TEST_REQUIRE(kvm_has_cap(KVM_CAP_SET_GUEST_DEBUG));
+
+	vm = vm_create_with_one_vcpu(&vcpu, guest_code);
+	run = vcpu->run;
+
+	/* Test software BPs - int3 */
+	memset(&debug, 0, sizeof(debug));
+	debug.control = KVM_GUESTDBG_ENABLE | KVM_GUESTDBG_USE_SW_BP;
+	vcpu_guest_debug_set(vcpu, &debug);
+	vcpu_run(vcpu);
+	TEST_ASSERT(run->exit_reason == KVM_EXIT_DEBUG &&
+		    run->debug.arch.exception == BP_VECTOR &&
+		    run->debug.arch.pc == CAST_TO_RIP(sw_bp),
+		    "INT3: exit %d exception %d rip 0x%llx (should be 0x%llx)",
+		    run->exit_reason, run->debug.arch.exception,
+		    run->debug.arch.pc, CAST_TO_RIP(sw_bp));
+	vcpu_skip_insn(vcpu, 1);
+
+	/* Test instruction HW BP over DR[0-3] */
+	for (i = 0; i < 4; i++) {
+		memset(&debug, 0, sizeof(debug));
+		debug.control = KVM_GUESTDBG_ENABLE | KVM_GUESTDBG_USE_HW_BP;
+		debug.arch.debugreg[i] = CAST_TO_RIP(hw_bp);
+		debug.arch.debugreg[7] = 0x400 | (1UL << (2*i+1));
+		vcpu_guest_debug_set(vcpu, &debug);
+		vcpu_run(vcpu);
+		target_dr6 = 0xffff0ff0 | (1UL << i);
+		TEST_ASSERT(run->exit_reason == KVM_EXIT_DEBUG &&
+			    run->debug.arch.exception == DB_VECTOR &&
+			    run->debug.arch.pc == CAST_TO_RIP(hw_bp) &&
+			    run->debug.arch.dr6 == target_dr6,
+			    "INS_HW_BP (DR%d): exit %d exception %d rip 0x%llx "
+			    "(should be 0x%llx) dr6 0x%llx (should be 0x%llx)",
+			    i, run->exit_reason, run->debug.arch.exception,
+			    run->debug.arch.pc, CAST_TO_RIP(hw_bp),
+			    run->debug.arch.dr6, target_dr6);
+	}
+	/* Skip "nop" */
+	vcpu_skip_insn(vcpu, 1);
+
+	/* Test data access HW BP over DR[0-3] */
+	for (i = 0; i < 4; i++) {
+		memset(&debug, 0, sizeof(debug));
+		debug.control = KVM_GUESTDBG_ENABLE | KVM_GUESTDBG_USE_HW_BP;
+		debug.arch.debugreg[i] = CAST_TO_RIP(guest_value);
+		debug.arch.debugreg[7] = 0x00000400 | (1UL << (2*i+1)) |
+		    (0x000d0000UL << (4*i));
+		vcpu_guest_debug_set(vcpu, &debug);
+		vcpu_run(vcpu);
+		target_dr6 = 0xffff0ff0 | (1UL << i);
+		TEST_ASSERT(run->exit_reason == KVM_EXIT_DEBUG &&
+			    run->debug.arch.exception == DB_VECTOR &&
+			    run->debug.arch.pc == CAST_TO_RIP(write_data) &&
+			    run->debug.arch.dr6 == target_dr6,
+			    "DATA_HW_BP (DR%d): exit %d exception %d rip 0x%llx "
+			    "(should be 0x%llx) dr6 0x%llx (should be 0x%llx)",
+			    i, run->exit_reason, run->debug.arch.exception,
+			    run->debug.arch.pc, CAST_TO_RIP(write_data),
+			    run->debug.arch.dr6, target_dr6);
+		/* Rollback the 4-bytes "mov" */
+		vcpu_skip_insn(vcpu, -7);
+	}
+	/* Skip the 4-bytes "mov" */
+	vcpu_skip_insn(vcpu, 7);
+
+	/* Test single step */
+	target_rip = CAST_TO_RIP(ss_start);
+	target_dr6 = 0xffff4ff0ULL;
+	for (i = 0; i < (sizeof(ss_size) / sizeof(ss_size[0])); i++) {
+		target_rip += ss_size[i];
+		memset(&debug, 0, sizeof(debug));
+		debug.control = KVM_GUESTDBG_ENABLE | KVM_GUESTDBG_SINGLESTEP |
+				KVM_GUESTDBG_BLOCKIRQ;
+		debug.arch.debugreg[7] = 0x00000400;
+		vcpu_guest_debug_set(vcpu, &debug);
+		vcpu_run(vcpu);
+		TEST_ASSERT(run->exit_reason == KVM_EXIT_DEBUG &&
+			    run->debug.arch.exception == DB_VECTOR &&
+			    run->debug.arch.pc == target_rip &&
+			    run->debug.arch.dr6 == target_dr6,
+			    "SINGLE_STEP[%d]: exit %d exception %d rip 0x%llx "
+			    "(should be 0x%llx) dr6 0x%llx (should be 0x%llx)",
+			    i, run->exit_reason, run->debug.arch.exception,
+			    run->debug.arch.pc, target_rip, run->debug.arch.dr6,
+			    target_dr6);
+	}
+
+	/* Finally test global disable */
+	memset(&debug, 0, sizeof(debug));
+	debug.control = KVM_GUESTDBG_ENABLE | KVM_GUESTDBG_USE_HW_BP;
+	debug.arch.debugreg[7] = 0x400 | DR7_GD;
+	vcpu_guest_debug_set(vcpu, &debug);
+	vcpu_run(vcpu);
+	target_dr6 = 0xffff0ff0 | DR6_BD;
+	TEST_ASSERT(run->exit_reason == KVM_EXIT_DEBUG &&
+		    run->debug.arch.exception == DB_VECTOR &&
+		    run->debug.arch.pc == CAST_TO_RIP(bd_start) &&
+		    run->debug.arch.dr6 == target_dr6,
+			    "DR7.GD: exit %d exception %d rip 0x%llx "
+			    "(should be 0x%llx) dr6 0x%llx (should be 0x%llx)",
+			    run->exit_reason, run->debug.arch.exception,
+			    run->debug.arch.pc, target_rip, run->debug.arch.dr6,
+			    target_dr6);
+
+	/* Disable all debug controls, run to the end */
+	memset(&debug, 0, sizeof(debug));
+	vcpu_guest_debug_set(vcpu, &debug);
+
+	vcpu_run(vcpu);
+	TEST_ASSERT(run->exit_reason == KVM_EXIT_IO, "KVM_EXIT_IO");
+	cmd = get_ucall(vcpu, &uc);
+	TEST_ASSERT(cmd == UCALL_DONE, "UCALL_DONE");
+
+	kvm_vm_free(vm);
+
+	return 0;
+}
diff --git a/tools/testing/selftests/kvm/x86_64/emulator_error_test.c b/tools/testing/selftests/kvm/x86_64/emulator_error_test.c
new file mode 100644
index 000000000000..236e11755ba6
--- /dev/null
+++ b/tools/testing/selftests/kvm/x86_64/emulator_error_test.c
@@ -0,0 +1,193 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright (C) 2020, Google LLC.
+ *
+ * Tests for KVM_CAP_EXIT_ON_EMULATION_FAILURE capability.
+ */
+
+#define _GNU_SOURCE /* for program_invocation_short_name */
+
+#include "test_util.h"
+#include "kvm_util.h"
+#include "vmx.h"
+
+#define MAXPHYADDR 36
+
+#define MEM_REGION_GVA	0x0000123456789000
+#define MEM_REGION_GPA	0x0000000700000000
+#define MEM_REGION_SLOT	10
+#define MEM_REGION_SIZE PAGE_SIZE
+
+static void guest_code(void)
+{
+	__asm__ __volatile__("flds (%[addr])"
+			     :: [addr]"r"(MEM_REGION_GVA));
+
+	GUEST_DONE();
+}
+
+/*
+ * Accessors to get R/M, REG, and Mod bits described in the SDM vol 2,
+ * figure 2-2 "Table Interpretation of ModR/M Byte (C8H)".
+ */
+#define GET_RM(insn_byte) (insn_byte & 0x7)
+#define GET_REG(insn_byte) ((insn_byte & 0x38) >> 3)
+#define GET_MOD(insn_byte) ((insn_byte & 0xc) >> 6)
+
+/* Ensure we are dealing with a simple 2-byte flds instruction. */
+static bool is_flds(uint8_t *insn_bytes, uint8_t insn_size)
+{
+	return insn_size >= 2 &&
+	       insn_bytes[0] == 0xd9 &&
+	       GET_REG(insn_bytes[1]) == 0x0 &&
+	       GET_MOD(insn_bytes[1]) == 0x0 &&
+	       /* Ensure there is no SIB byte. */
+	       GET_RM(insn_bytes[1]) != 0x4 &&
+	       /* Ensure there is no displacement byte. */
+	       GET_RM(insn_bytes[1]) != 0x5;
+}
+
+static void process_exit_on_emulation_error(struct kvm_vcpu *vcpu)
+{
+	struct kvm_run *run = vcpu->run;
+	struct kvm_regs regs;
+	uint8_t *insn_bytes;
+	uint8_t insn_size;
+	uint64_t flags;
+
+	TEST_ASSERT(run->exit_reason == KVM_EXIT_INTERNAL_ERROR,
+		    "Unexpected exit reason: %u (%s)",
+		    run->exit_reason,
+		    exit_reason_str(run->exit_reason));
+
+	TEST_ASSERT(run->emulation_failure.suberror == KVM_INTERNAL_ERROR_EMULATION,
+		    "Unexpected suberror: %u",
+		    run->emulation_failure.suberror);
+
+	if (run->emulation_failure.ndata >= 1) {
+		flags = run->emulation_failure.flags;
+		if ((flags & KVM_INTERNAL_ERROR_EMULATION_FLAG_INSTRUCTION_BYTES) &&
+		    run->emulation_failure.ndata >= 3) {
+			insn_size = run->emulation_failure.insn_size;
+			insn_bytes = run->emulation_failure.insn_bytes;
+
+			TEST_ASSERT(insn_size <= 15 && insn_size > 0,
+				    "Unexpected instruction size: %u",
+				    insn_size);
+
+			TEST_ASSERT(is_flds(insn_bytes, insn_size),
+				    "Unexpected instruction.  Expected 'flds' (0xd9 /0)");
+
+			/*
+			 * If is_flds() succeeded then the instruction bytes
+			 * contained an flds instruction that is 2-bytes in
+			 * length (ie: no prefix, no SIB, no displacement).
+			 */
+			vcpu_regs_get(vcpu, &regs);
+			regs.rip += 2;
+			vcpu_regs_set(vcpu, &regs);
+		}
+	}
+}
+
+static void do_guest_assert(struct ucall *uc)
+{
+	REPORT_GUEST_ASSERT(*uc);
+}
+
+static void check_for_guest_assert(struct kvm_vcpu *vcpu)
+{
+	struct ucall uc;
+
+	if (vcpu->run->exit_reason == KVM_EXIT_IO &&
+	    get_ucall(vcpu, &uc) == UCALL_ABORT) {
+		do_guest_assert(&uc);
+	}
+}
+
+static void process_ucall_done(struct kvm_vcpu *vcpu)
+{
+	struct kvm_run *run = vcpu->run;
+	struct ucall uc;
+
+	check_for_guest_assert(vcpu);
+
+	TEST_ASSERT(run->exit_reason == KVM_EXIT_IO,
+		    "Unexpected exit reason: %u (%s)",
+		    run->exit_reason,
+		    exit_reason_str(run->exit_reason));
+
+	TEST_ASSERT(get_ucall(vcpu, &uc) == UCALL_DONE,
+		    "Unexpected ucall command: %lu, expected UCALL_DONE (%d)",
+		    uc.cmd, UCALL_DONE);
+}
+
+static uint64_t process_ucall(struct kvm_vcpu *vcpu)
+{
+	struct kvm_run *run = vcpu->run;
+	struct ucall uc;
+
+	TEST_ASSERT(run->exit_reason == KVM_EXIT_IO,
+		    "Unexpected exit reason: %u (%s)",
+		    run->exit_reason,
+		    exit_reason_str(run->exit_reason));
+
+	switch (get_ucall(vcpu, &uc)) {
+	case UCALL_SYNC:
+		break;
+	case UCALL_ABORT:
+		do_guest_assert(&uc);
+		break;
+	case UCALL_DONE:
+		process_ucall_done(vcpu);
+		break;
+	default:
+		TEST_ASSERT(false, "Unexpected ucall");
+	}
+
+	return uc.cmd;
+}
+
+int main(int argc, char *argv[])
+{
+	struct kvm_vcpu *vcpu;
+	struct kvm_vm *vm;
+	uint64_t gpa, pte;
+	uint64_t *hva;
+	int rc;
+
+	/* Tell stdout not to buffer its content */
+	setbuf(stdout, NULL);
+
+	TEST_REQUIRE(kvm_has_cap(KVM_CAP_SMALLER_MAXPHYADDR));
+
+	vm = vm_create_with_one_vcpu(&vcpu, guest_code);
+
+	vcpu_set_cpuid_maxphyaddr(vcpu, MAXPHYADDR);
+
+	rc = kvm_check_cap(KVM_CAP_EXIT_ON_EMULATION_FAILURE);
+	TEST_ASSERT(rc, "KVM_CAP_EXIT_ON_EMULATION_FAILURE is unavailable");
+	vm_enable_cap(vm, KVM_CAP_EXIT_ON_EMULATION_FAILURE, 1);
+
+	vm_userspace_mem_region_add(vm, VM_MEM_SRC_ANONYMOUS,
+				    MEM_REGION_GPA, MEM_REGION_SLOT,
+				    MEM_REGION_SIZE / PAGE_SIZE, 0);
+	gpa = vm_phy_pages_alloc(vm, MEM_REGION_SIZE / PAGE_SIZE,
+				 MEM_REGION_GPA, MEM_REGION_SLOT);
+	TEST_ASSERT(gpa == MEM_REGION_GPA, "Failed vm_phy_pages_alloc\n");
+	virt_map(vm, MEM_REGION_GVA, MEM_REGION_GPA, 1);
+	hva = addr_gpa2hva(vm, MEM_REGION_GPA);
+	memset(hva, 0, PAGE_SIZE);
+	pte = vm_get_page_table_entry(vm, vcpu, MEM_REGION_GVA);
+	vm_set_page_table_entry(vm, vcpu, MEM_REGION_GVA, pte | (1ull << 36));
+
+	vcpu_run(vcpu);
+	process_exit_on_emulation_error(vcpu);
+	vcpu_run(vcpu);
+
+	TEST_ASSERT(process_ucall(vcpu) == UCALL_DONE, "Expected UCALL_DONE");
+
+	kvm_vm_free(vm);
+
+	return 0;
+}
diff --git a/tools/testing/selftests/kvm/x86_64/evmcs_test.c b/tools/testing/selftests/kvm/x86_64/evmcs_test.c
index 92915e6408e7..99bc202243d2 100644
--- a/tools/testing/selftests/kvm/x86_64/evmcs_test.c
+++ b/tools/testing/selftests/kvm/x86_64/evmcs_test.c
@@ -10,6 +10,7 @@
 #include <stdlib.h>
 #include <string.h>
 #include <sys/ioctl.h>
+#include <linux/bitmap.h>
 
 #include "test_util.h"
 
@@ -17,23 +18,66 @@
 
 #include "vmx.h"
 
-#define VCPU_ID		5
+static int ud_count;
 
-void l2_guest_code(void)
+static void guest_ud_handler(struct ex_regs *regs)
 {
-	GUEST_SYNC(6);
+	ud_count++;
+	regs->rip += 3; /* VMLAUNCH */
+}
+
+static void guest_nmi_handler(struct ex_regs *regs)
+{
+}
+
+/* Exits to L1 destroy GRPs! */
+static inline void rdmsr_fs_base(void)
+{
+	__asm__ __volatile__ ("mov $0xc0000100, %%rcx; rdmsr" : : :
+			      "rax", "rbx", "rcx", "rdx",
+			      "rsi", "rdi", "r8", "r9", "r10", "r11", "r12",
+			      "r13", "r14", "r15");
+}
+static inline void rdmsr_gs_base(void)
+{
+	__asm__ __volatile__ ("mov $0xc0000101, %%rcx; rdmsr" : : :
+			      "rax", "rbx", "rcx", "rdx",
+			      "rsi", "rdi", "r8", "r9", "r10", "r11", "r12",
+			      "r13", "r14", "r15");
+}
 
+void l2_guest_code(void)
+{
 	GUEST_SYNC(7);
 
+	GUEST_SYNC(8);
+
+	/* Forced exit to L1 upon restore */
+	GUEST_SYNC(9);
+
+	vmcall();
+
+	/* MSR-Bitmap tests */
+	rdmsr_fs_base(); /* intercepted */
+	rdmsr_fs_base(); /* intercepted */
+	rdmsr_gs_base(); /* not intercepted */
+	vmcall();
+	rdmsr_gs_base(); /* intercepted */
+
 	/* Done, exit to L1 and never come back.  */
 	vmcall();
 }
 
-void l1_guest_code(struct vmx_pages *vmx_pages)
+void guest_code(struct vmx_pages *vmx_pages)
 {
 #define L2_GUEST_STACK_SIZE 64
 	unsigned long l2_guest_stack[L2_GUEST_STACK_SIZE];
 
+	x2apic_enable();
+
+	GUEST_SYNC(1);
+	GUEST_SYNC(2);
+
 	enable_vp_assist(vmx_pages->vp_assist_gpa, vmx_pages->vp_assist);
 
 	GUEST_ASSERT(vmx_pages->vmcs_gpa);
@@ -50,101 +94,180 @@ void l1_guest_code(struct vmx_pages *vmx_pages)
 
 	GUEST_SYNC(5);
 	GUEST_ASSERT(vmptrstz() == vmx_pages->enlightened_vmcs_gpa);
+	current_evmcs->revision_id = -1u;
+	GUEST_ASSERT(vmlaunch());
+	current_evmcs->revision_id = EVMCS_VERSION;
+	GUEST_SYNC(6);
+
+	vmwrite(PIN_BASED_VM_EXEC_CONTROL, vmreadz(PIN_BASED_VM_EXEC_CONTROL) |
+		PIN_BASED_NMI_EXITING);
+
 	GUEST_ASSERT(!vmlaunch());
 	GUEST_ASSERT(vmptrstz() == vmx_pages->enlightened_vmcs_gpa);
-	GUEST_SYNC(8);
+
+	/*
+	 * NMI forces L2->L1 exit, resuming L2 and hope that EVMCS is
+	 * up-to-date (RIP points where it should and not at the beginning
+	 * of l2_guest_code(). GUEST_SYNC(9) checkes that.
+	 */
 	GUEST_ASSERT(!vmresume());
+
+	GUEST_SYNC(10);
+
 	GUEST_ASSERT(vmreadz(VM_EXIT_REASON) == EXIT_REASON_VMCALL);
-	GUEST_SYNC(9);
+	current_evmcs->guest_rip += 3; /* vmcall */
+
+	/* Intercept RDMSR 0xc0000100 */
+	vmwrite(CPU_BASED_VM_EXEC_CONTROL, vmreadz(CPU_BASED_VM_EXEC_CONTROL) |
+		CPU_BASED_USE_MSR_BITMAPS);
+	set_bit(MSR_FS_BASE & 0x1fff, vmx_pages->msr + 0x400);
+	GUEST_ASSERT(!vmresume());
+	GUEST_ASSERT(vmreadz(VM_EXIT_REASON) == EXIT_REASON_MSR_READ);
+	current_evmcs->guest_rip += 2; /* rdmsr */
+
+	/* Enable enlightened MSR bitmap */
+	current_evmcs->hv_enlightenments_control.msr_bitmap = 1;
+	GUEST_ASSERT(!vmresume());
+	GUEST_ASSERT(vmreadz(VM_EXIT_REASON) == EXIT_REASON_MSR_READ);
+	current_evmcs->guest_rip += 2; /* rdmsr */
+
+	/* Intercept RDMSR 0xc0000101 without telling KVM about it */
+	set_bit(MSR_GS_BASE & 0x1fff, vmx_pages->msr + 0x400);
+	/* Make sure HV_VMX_ENLIGHTENED_CLEAN_FIELD_MSR_BITMAP is set */
+	current_evmcs->hv_clean_fields |= HV_VMX_ENLIGHTENED_CLEAN_FIELD_MSR_BITMAP;
+	GUEST_ASSERT(!vmresume());
+	/* Make sure we don't see EXIT_REASON_MSR_READ here so eMSR bitmap works */
+	GUEST_ASSERT(vmreadz(VM_EXIT_REASON) == EXIT_REASON_VMCALL);
+	current_evmcs->guest_rip += 3; /* vmcall */
+
+	/* Now tell KVM we've changed MSR-Bitmap */
+	current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_MSR_BITMAP;
+	GUEST_ASSERT(!vmresume());
+	GUEST_ASSERT(vmreadz(VM_EXIT_REASON) == EXIT_REASON_MSR_READ);
+	current_evmcs->guest_rip += 2; /* rdmsr */
+
+	GUEST_ASSERT(!vmresume());
+	GUEST_ASSERT(vmreadz(VM_EXIT_REASON) == EXIT_REASON_VMCALL);
+	GUEST_SYNC(11);
+
+	/* Try enlightened vmptrld with an incorrect GPA */
+	evmcs_vmptrld(0xdeadbeef, vmx_pages->enlightened_vmcs);
+	GUEST_ASSERT(vmlaunch());
+	GUEST_ASSERT(ud_count == 1);
+	GUEST_DONE();
 }
 
-void guest_code(struct vmx_pages *vmx_pages)
+void inject_nmi(struct kvm_vcpu *vcpu)
 {
-	GUEST_SYNC(1);
-	GUEST_SYNC(2);
+	struct kvm_vcpu_events events;
 
-	if (vmx_pages)
-		l1_guest_code(vmx_pages);
+	vcpu_events_get(vcpu, &events);
 
-	GUEST_DONE();
+	events.nmi.pending = 1;
+	events.flags |= KVM_VCPUEVENT_VALID_NMI_PENDING;
+
+	vcpu_events_set(vcpu, &events);
+}
+
+static struct kvm_vcpu *save_restore_vm(struct kvm_vm *vm,
+					struct kvm_vcpu *vcpu)
+{
+	struct kvm_regs regs1, regs2;
+	struct kvm_x86_state *state;
+
+	state = vcpu_save_state(vcpu);
+	memset(&regs1, 0, sizeof(regs1));
+	vcpu_regs_get(vcpu, &regs1);
+
+	kvm_vm_release(vm);
+
+	/* Restore state in a new VM.  */
+	vcpu = vm_recreate_with_one_vcpu(vm);
+	vcpu_set_hv_cpuid(vcpu);
+	vcpu_enable_evmcs(vcpu);
+	vcpu_load_state(vcpu, state);
+	kvm_x86_state_cleanup(state);
+
+	memset(&regs2, 0, sizeof(regs2));
+	vcpu_regs_get(vcpu, &regs2);
+	TEST_ASSERT(!memcmp(&regs1, &regs2, sizeof(regs2)),
+		    "Unexpected register values after vcpu_load_state; rdi: %lx rsi: %lx",
+		    (ulong) regs2.rdi, (ulong) regs2.rsi);
+	return vcpu;
 }
 
 int main(int argc, char *argv[])
 {
 	vm_vaddr_t vmx_pages_gva = 0;
 
-	struct kvm_regs regs1, regs2;
+	struct kvm_vcpu *vcpu;
 	struct kvm_vm *vm;
 	struct kvm_run *run;
-	struct kvm_x86_state *state;
 	struct ucall uc;
 	int stage;
 
-	/* Create VM */
-	vm = vm_create_default(VCPU_ID, 0, guest_code);
+	vm = vm_create_with_one_vcpu(&vcpu, guest_code);
 
-	vcpu_set_cpuid(vm, VCPU_ID, kvm_get_supported_cpuid());
+	TEST_REQUIRE(kvm_cpu_has(X86_FEATURE_VMX));
+	TEST_REQUIRE(kvm_has_cap(KVM_CAP_NESTED_STATE));
+	TEST_REQUIRE(kvm_has_cap(KVM_CAP_HYPERV_ENLIGHTENED_VMCS));
 
-	if (!kvm_check_cap(KVM_CAP_NESTED_STATE) ||
-	    !kvm_check_cap(KVM_CAP_HYPERV_ENLIGHTENED_VMCS)) {
-		printf("capabilities not available, skipping test\n");
-		exit(KSFT_SKIP);
-	}
+	vcpu_set_hv_cpuid(vcpu);
+	vcpu_enable_evmcs(vcpu);
 
-	vcpu_enable_evmcs(vm, VCPU_ID);
-
-	run = vcpu_state(vm, VCPU_ID);
+	vcpu_alloc_vmx(vm, &vmx_pages_gva);
+	vcpu_args_set(vcpu, 1, vmx_pages_gva);
 
-	vcpu_regs_get(vm, VCPU_ID, &regs1);
+	vm_init_descriptor_tables(vm);
+	vcpu_init_descriptor_tables(vcpu);
+	vm_install_exception_handler(vm, UD_VECTOR, guest_ud_handler);
+	vm_install_exception_handler(vm, NMI_VECTOR, guest_nmi_handler);
 
-	vcpu_alloc_vmx(vm, &vmx_pages_gva);
-	vcpu_args_set(vm, VCPU_ID, 1, vmx_pages_gva);
+	pr_info("Running L1 which uses EVMCS to run L2\n");
 
 	for (stage = 1;; stage++) {
-		_vcpu_run(vm, VCPU_ID);
+		run = vcpu->run;
+
+		vcpu_run(vcpu);
 		TEST_ASSERT(run->exit_reason == KVM_EXIT_IO,
 			    "Stage %d: unexpected exit reason: %u (%s),\n",
 			    stage, run->exit_reason,
 			    exit_reason_str(run->exit_reason));
 
-		switch (get_ucall(vm, VCPU_ID, &uc)) {
+		switch (get_ucall(vcpu, &uc)) {
 		case UCALL_ABORT:
-			TEST_ASSERT(false, "%s at %s:%d", (const char *)uc.args[0],
-				    __FILE__, uc.args[1]);
+			REPORT_GUEST_ASSERT(uc);
 			/* NOT REACHED */
 		case UCALL_SYNC:
 			break;
 		case UCALL_DONE:
 			goto done;
 		default:
-			TEST_ASSERT(false, "Unknown ucall 0x%x.", uc.cmd);
+			TEST_FAIL("Unknown ucall %lu", uc.cmd);
 		}
 
 		/* UCALL_SYNC is handled here.  */
 		TEST_ASSERT(!strcmp((const char *)uc.args[0], "hello") &&
-			    uc.args[1] == stage, "Unexpected register values vmexit #%lx, got %lx",
+			    uc.args[1] == stage, "Stage %d: Unexpected register values vmexit, got %lx",
 			    stage, (ulong)uc.args[1]);
 
-		state = vcpu_save_state(vm, VCPU_ID);
-		memset(&regs1, 0, sizeof(regs1));
-		vcpu_regs_get(vm, VCPU_ID, &regs1);
-
-		kvm_vm_release(vm);
-
-		/* Restore state in a new VM.  */
-		kvm_vm_restart(vm, O_RDWR);
-		vm_vcpu_add(vm, VCPU_ID);
-		vcpu_set_cpuid(vm, VCPU_ID, kvm_get_supported_cpuid());
-		vcpu_enable_evmcs(vm, VCPU_ID);
-		vcpu_load_state(vm, VCPU_ID, state);
-		run = vcpu_state(vm, VCPU_ID);
-		free(state);
-
-		memset(&regs2, 0, sizeof(regs2));
-		vcpu_regs_get(vm, VCPU_ID, &regs2);
-		TEST_ASSERT(!memcmp(&regs1, &regs2, sizeof(regs2)),
-			    "Unexpected register values after vcpu_load_state; rdi: %lx rsi: %lx",
-			    (ulong) regs2.rdi, (ulong) regs2.rsi);
+		vcpu = save_restore_vm(vm, vcpu);
+
+		/* Force immediate L2->L1 exit before resuming */
+		if (stage == 8) {
+			pr_info("Injecting NMI into L1 before L2 had a chance to run after restore\n");
+			inject_nmi(vcpu);
+		}
+
+		/*
+		 * Do KVM_GET_NESTED_STATE/KVM_SET_NESTED_STATE for a freshly
+		 * restored VM (before the first KVM_RUN) to check that
+		 * KVM_STATE_NESTED_EVMCS is not lost.
+		 */
+		if (stage == 9) {
+			pr_info("Trying extra KVM_GET_NESTED_STATE/KVM_SET_NESTED_STATE cycle\n");
+			vcpu = save_restore_vm(vm, vcpu);
+		}
 	}
 
 done:
diff --git a/tools/testing/selftests/kvm/x86_64/fix_hypercall_test.c b/tools/testing/selftests/kvm/x86_64/fix_hypercall_test.c
new file mode 100644
index 000000000000..32f7e09ef67c
--- /dev/null
+++ b/tools/testing/selftests/kvm/x86_64/fix_hypercall_test.c
@@ -0,0 +1,135 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Copyright (C) 2020, Google LLC.
+ *
+ * Tests for KVM paravirtual feature disablement
+ */
+#include <asm/kvm_para.h>
+#include <linux/kvm_para.h>
+#include <linux/stringify.h>
+#include <stdint.h>
+
+#include "apic.h"
+#include "test_util.h"
+#include "kvm_util.h"
+#include "processor.h"
+
+/* VMCALL and VMMCALL are both 3-byte opcodes. */
+#define HYPERCALL_INSN_SIZE	3
+
+static bool quirk_disabled;
+
+static void guest_ud_handler(struct ex_regs *regs)
+{
+	regs->rax = -EFAULT;
+	regs->rip += HYPERCALL_INSN_SIZE;
+}
+
+static const uint8_t vmx_vmcall[HYPERCALL_INSN_SIZE]  = { 0x0f, 0x01, 0xc1 };
+static const uint8_t svm_vmmcall[HYPERCALL_INSN_SIZE] = { 0x0f, 0x01, 0xd9 };
+
+extern uint8_t hypercall_insn[HYPERCALL_INSN_SIZE];
+static uint64_t do_sched_yield(uint8_t apic_id)
+{
+	uint64_t ret;
+
+	asm volatile("hypercall_insn:\n\t"
+		     ".byte 0xcc,0xcc,0xcc\n\t"
+		     : "=a"(ret)
+		     : "a"((uint64_t)KVM_HC_SCHED_YIELD), "b"((uint64_t)apic_id)
+		     : "memory");
+
+	return ret;
+}
+
+static void guest_main(void)
+{
+	const uint8_t *native_hypercall_insn;
+	const uint8_t *other_hypercall_insn;
+	uint64_t ret;
+
+	if (is_intel_cpu()) {
+		native_hypercall_insn = vmx_vmcall;
+		other_hypercall_insn  = svm_vmmcall;
+	} else if (is_amd_cpu()) {
+		native_hypercall_insn = svm_vmmcall;
+		other_hypercall_insn  = vmx_vmcall;
+	} else {
+		GUEST_ASSERT(0);
+		/* unreachable */
+		return;
+	}
+
+	memcpy(hypercall_insn, other_hypercall_insn, HYPERCALL_INSN_SIZE);
+
+	ret = do_sched_yield(GET_APIC_ID_FIELD(xapic_read_reg(APIC_ID)));
+
+	/*
+	 * If the quirk is disabled, verify that guest_ud_handler() "returned"
+	 * -EFAULT and that KVM did NOT patch the hypercall.  If the quirk is
+	 * enabled, verify that the hypercall succeeded and that KVM patched in
+	 * the "right" hypercall.
+	 */
+	if (quirk_disabled) {
+		GUEST_ASSERT(ret == (uint64_t)-EFAULT);
+		GUEST_ASSERT(!memcmp(other_hypercall_insn, hypercall_insn,
+			     HYPERCALL_INSN_SIZE));
+	} else {
+		GUEST_ASSERT(!ret);
+		GUEST_ASSERT(!memcmp(native_hypercall_insn, hypercall_insn,
+			     HYPERCALL_INSN_SIZE));
+	}
+
+	GUEST_DONE();
+}
+
+static void enter_guest(struct kvm_vcpu *vcpu)
+{
+	struct kvm_run *run = vcpu->run;
+	struct ucall uc;
+
+	vcpu_run(vcpu);
+	switch (get_ucall(vcpu, &uc)) {
+	case UCALL_SYNC:
+		pr_info("%s: %016lx\n", (const char *)uc.args[2], uc.args[3]);
+		break;
+	case UCALL_DONE:
+		return;
+	case UCALL_ABORT:
+		REPORT_GUEST_ASSERT(uc);
+	default:
+		TEST_FAIL("Unhandled ucall: %ld\nexit_reason: %u (%s)",
+			  uc.cmd, run->exit_reason, exit_reason_str(run->exit_reason));
+	}
+}
+
+static void test_fix_hypercall(bool disable_quirk)
+{
+	struct kvm_vcpu *vcpu;
+	struct kvm_vm *vm;
+
+	vm = vm_create_with_one_vcpu(&vcpu, guest_main);
+
+	vm_init_descriptor_tables(vcpu->vm);
+	vcpu_init_descriptor_tables(vcpu);
+	vm_install_exception_handler(vcpu->vm, UD_VECTOR, guest_ud_handler);
+
+	if (disable_quirk)
+		vm_enable_cap(vm, KVM_CAP_DISABLE_QUIRKS2,
+			      KVM_X86_QUIRK_FIX_HYPERCALL_INSN);
+
+	quirk_disabled = disable_quirk;
+	sync_global_to_guest(vm, quirk_disabled);
+
+	virt_pg_map(vm, APIC_DEFAULT_GPA, APIC_DEFAULT_GPA);
+
+	enter_guest(vcpu);
+}
+
+int main(void)
+{
+	TEST_REQUIRE(kvm_check_cap(KVM_CAP_DISABLE_QUIRKS2) & KVM_X86_QUIRK_FIX_HYPERCALL_INSN);
+
+	test_fix_hypercall(false);
+	test_fix_hypercall(true);
+}
diff --git a/tools/testing/selftests/kvm/x86_64/get_msr_index_features.c b/tools/testing/selftests/kvm/x86_64/get_msr_index_features.c
new file mode 100644
index 000000000000..d09b3cbcadc6
--- /dev/null
+++ b/tools/testing/selftests/kvm/x86_64/get_msr_index_features.c
@@ -0,0 +1,35 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Test that KVM_GET_MSR_INDEX_LIST and
+ * KVM_GET_MSR_FEATURE_INDEX_LIST work as intended
+ *
+ * Copyright (C) 2020, Red Hat, Inc.
+ */
+#include <fcntl.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <sys/ioctl.h>
+
+#include "test_util.h"
+#include "kvm_util.h"
+#include "processor.h"
+
+int main(int argc, char *argv[])
+{
+	const struct kvm_msr_list *feature_list;
+	int i;
+
+	/*
+	 * Skip the entire test if MSR_FEATURES isn't supported, other tests
+	 * will cover the "regular" list of MSRs, the coverage here is purely
+	 * opportunistic and not interesting on its own.
+	 */
+	TEST_REQUIRE(kvm_has_cap(KVM_CAP_GET_MSR_FEATURES));
+
+	(void)kvm_get_msr_index_list();
+
+	feature_list = kvm_get_feature_msr_index_list();
+	for (i = 0; i < feature_list->nmsrs; i++)
+		kvm_get_feature_msr(feature_list->indices[i]);
+}
diff --git a/tools/testing/selftests/kvm/x86_64/hyperv_clock.c b/tools/testing/selftests/kvm/x86_64/hyperv_clock.c
new file mode 100644
index 000000000000..d576bc8ce823
--- /dev/null
+++ b/tools/testing/selftests/kvm/x86_64/hyperv_clock.c
@@ -0,0 +1,265 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Copyright (C) 2021, Red Hat, Inc.
+ *
+ * Tests for Hyper-V clocksources
+ */
+#include "test_util.h"
+#include "kvm_util.h"
+#include "processor.h"
+#include "hyperv.h"
+
+struct ms_hyperv_tsc_page {
+	volatile u32 tsc_sequence;
+	u32 reserved1;
+	volatile u64 tsc_scale;
+	volatile s64 tsc_offset;
+} __packed;
+
+/* Simplified mul_u64_u64_shr() */
+static inline u64 mul_u64_u64_shr64(u64 a, u64 b)
+{
+	union {
+		u64 ll;
+		struct {
+			u32 low, high;
+		} l;
+	} rm, rn, rh, a0, b0;
+	u64 c;
+
+	a0.ll = a;
+	b0.ll = b;
+
+	rm.ll = (u64)a0.l.low * b0.l.high;
+	rn.ll = (u64)a0.l.high * b0.l.low;
+	rh.ll = (u64)a0.l.high * b0.l.high;
+
+	rh.l.low = c = rm.l.high + rn.l.high + rh.l.low;
+	rh.l.high = (c >> 32) + rh.l.high;
+
+	return rh.ll;
+}
+
+static inline void nop_loop(void)
+{
+	int i;
+
+	for (i = 0; i < 100000000; i++)
+		asm volatile("nop");
+}
+
+static inline void check_tsc_msr_rdtsc(void)
+{
+	u64 tsc_freq, r1, r2, t1, t2;
+	s64 delta_ns;
+
+	tsc_freq = rdmsr(HV_X64_MSR_TSC_FREQUENCY);
+	GUEST_ASSERT(tsc_freq > 0);
+
+	/* For increased accuracy, take mean rdtsc() before and afrer rdmsr() */
+	r1 = rdtsc();
+	t1 = rdmsr(HV_X64_MSR_TIME_REF_COUNT);
+	r1 = (r1 + rdtsc()) / 2;
+	nop_loop();
+	r2 = rdtsc();
+	t2 = rdmsr(HV_X64_MSR_TIME_REF_COUNT);
+	r2 = (r2 + rdtsc()) / 2;
+
+	GUEST_ASSERT(r2 > r1 && t2 > t1);
+
+	/* HV_X64_MSR_TIME_REF_COUNT is in 100ns */
+	delta_ns = ((t2 - t1) * 100) - ((r2 - r1) * 1000000000 / tsc_freq);
+	if (delta_ns < 0)
+		delta_ns = -delta_ns;
+
+	/* 1% tolerance */
+	GUEST_ASSERT(delta_ns * 100 < (t2 - t1) * 100);
+}
+
+static inline u64 get_tscpage_ts(struct ms_hyperv_tsc_page *tsc_page)
+{
+	return mul_u64_u64_shr64(rdtsc(), tsc_page->tsc_scale) + tsc_page->tsc_offset;
+}
+
+static inline void check_tsc_msr_tsc_page(struct ms_hyperv_tsc_page *tsc_page)
+{
+	u64 r1, r2, t1, t2;
+
+	/* Compare TSC page clocksource with HV_X64_MSR_TIME_REF_COUNT */
+	t1 = get_tscpage_ts(tsc_page);
+	r1 = rdmsr(HV_X64_MSR_TIME_REF_COUNT);
+
+	/* 10 ms tolerance */
+	GUEST_ASSERT(r1 >= t1 && r1 - t1 < 100000);
+	nop_loop();
+
+	t2 = get_tscpage_ts(tsc_page);
+	r2 = rdmsr(HV_X64_MSR_TIME_REF_COUNT);
+	GUEST_ASSERT(r2 >= t1 && r2 - t2 < 100000);
+}
+
+static void guest_main(struct ms_hyperv_tsc_page *tsc_page, vm_paddr_t tsc_page_gpa)
+{
+	u64 tsc_scale, tsc_offset;
+
+	/* Set Guest OS id to enable Hyper-V emulation */
+	GUEST_SYNC(1);
+	wrmsr(HV_X64_MSR_GUEST_OS_ID, (u64)0x8100 << 48);
+	GUEST_SYNC(2);
+
+	check_tsc_msr_rdtsc();
+
+	GUEST_SYNC(3);
+
+	/* Set up TSC page is disabled state, check that it's clean */
+	wrmsr(HV_X64_MSR_REFERENCE_TSC, tsc_page_gpa);
+	GUEST_ASSERT(tsc_page->tsc_sequence == 0);
+	GUEST_ASSERT(tsc_page->tsc_scale == 0);
+	GUEST_ASSERT(tsc_page->tsc_offset == 0);
+
+	GUEST_SYNC(4);
+
+	/* Set up TSC page is enabled state */
+	wrmsr(HV_X64_MSR_REFERENCE_TSC, tsc_page_gpa | 0x1);
+	GUEST_ASSERT(tsc_page->tsc_sequence != 0);
+
+	GUEST_SYNC(5);
+
+	check_tsc_msr_tsc_page(tsc_page);
+
+	GUEST_SYNC(6);
+
+	tsc_offset = tsc_page->tsc_offset;
+	/* Call KVM_SET_CLOCK from userspace, check that TSC page was updated */
+
+	GUEST_SYNC(7);
+	/* Sanity check TSC page timestamp, it should be close to 0 */
+	GUEST_ASSERT(get_tscpage_ts(tsc_page) < 100000);
+
+	GUEST_ASSERT(tsc_page->tsc_offset != tsc_offset);
+
+	nop_loop();
+
+	/*
+	 * Enable Re-enlightenment and check that TSC page stays constant across
+	 * KVM_SET_CLOCK.
+	 */
+	wrmsr(HV_X64_MSR_REENLIGHTENMENT_CONTROL, 0x1 << 16 | 0xff);
+	wrmsr(HV_X64_MSR_TSC_EMULATION_CONTROL, 0x1);
+	tsc_offset = tsc_page->tsc_offset;
+	tsc_scale = tsc_page->tsc_scale;
+	GUEST_SYNC(8);
+	GUEST_ASSERT(tsc_page->tsc_offset == tsc_offset);
+	GUEST_ASSERT(tsc_page->tsc_scale == tsc_scale);
+
+	GUEST_SYNC(9);
+
+	check_tsc_msr_tsc_page(tsc_page);
+
+	/*
+	 * Disable re-enlightenment and TSC page, check that KVM doesn't update
+	 * it anymore.
+	 */
+	wrmsr(HV_X64_MSR_REENLIGHTENMENT_CONTROL, 0);
+	wrmsr(HV_X64_MSR_TSC_EMULATION_CONTROL, 0);
+	wrmsr(HV_X64_MSR_REFERENCE_TSC, 0);
+	memset(tsc_page, 0, sizeof(*tsc_page));
+
+	GUEST_SYNC(10);
+	GUEST_ASSERT(tsc_page->tsc_sequence == 0);
+	GUEST_ASSERT(tsc_page->tsc_offset == 0);
+	GUEST_ASSERT(tsc_page->tsc_scale == 0);
+
+	GUEST_DONE();
+}
+
+static void host_check_tsc_msr_rdtsc(struct kvm_vcpu *vcpu)
+{
+	u64 tsc_freq, r1, r2, t1, t2;
+	s64 delta_ns;
+
+	tsc_freq = vcpu_get_msr(vcpu, HV_X64_MSR_TSC_FREQUENCY);
+	TEST_ASSERT(tsc_freq > 0, "TSC frequency must be nonzero");
+
+	/* For increased accuracy, take mean rdtsc() before and afrer ioctl */
+	r1 = rdtsc();
+	t1 = vcpu_get_msr(vcpu, HV_X64_MSR_TIME_REF_COUNT);
+	r1 = (r1 + rdtsc()) / 2;
+	nop_loop();
+	r2 = rdtsc();
+	t2 = vcpu_get_msr(vcpu, HV_X64_MSR_TIME_REF_COUNT);
+	r2 = (r2 + rdtsc()) / 2;
+
+	TEST_ASSERT(t2 > t1, "Time reference MSR is not monotonic (%ld <= %ld)", t1, t2);
+
+	/* HV_X64_MSR_TIME_REF_COUNT is in 100ns */
+	delta_ns = ((t2 - t1) * 100) - ((r2 - r1) * 1000000000 / tsc_freq);
+	if (delta_ns < 0)
+		delta_ns = -delta_ns;
+
+	/* 1% tolerance */
+	TEST_ASSERT(delta_ns * 100 < (t2 - t1) * 100,
+		    "Elapsed time does not match (MSR=%ld, TSC=%ld)",
+		    (t2 - t1) * 100, (r2 - r1) * 1000000000 / tsc_freq);
+}
+
+int main(void)
+{
+	struct kvm_vcpu *vcpu;
+	struct kvm_vm *vm;
+	struct kvm_run *run;
+	struct ucall uc;
+	vm_vaddr_t tsc_page_gva;
+	int stage;
+
+	vm = vm_create_with_one_vcpu(&vcpu, guest_main);
+	run = vcpu->run;
+
+	vcpu_set_hv_cpuid(vcpu);
+
+	tsc_page_gva = vm_vaddr_alloc_page(vm);
+	memset(addr_gva2hva(vm, tsc_page_gva), 0x0, getpagesize());
+	TEST_ASSERT((addr_gva2gpa(vm, tsc_page_gva) & (getpagesize() - 1)) == 0,
+		"TSC page has to be page aligned\n");
+	vcpu_args_set(vcpu, 2, tsc_page_gva, addr_gva2gpa(vm, tsc_page_gva));
+
+	host_check_tsc_msr_rdtsc(vcpu);
+
+	for (stage = 1;; stage++) {
+		vcpu_run(vcpu);
+		TEST_ASSERT(run->exit_reason == KVM_EXIT_IO,
+			    "Stage %d: unexpected exit reason: %u (%s),\n",
+			    stage, run->exit_reason,
+			    exit_reason_str(run->exit_reason));
+
+		switch (get_ucall(vcpu, &uc)) {
+		case UCALL_ABORT:
+			REPORT_GUEST_ASSERT(uc);
+			/* NOT REACHED */
+		case UCALL_SYNC:
+			break;
+		case UCALL_DONE:
+			/* Keep in sync with guest_main() */
+			TEST_ASSERT(stage == 11, "Testing ended prematurely, stage %d\n",
+				    stage);
+			goto out;
+		default:
+			TEST_FAIL("Unknown ucall %lu", uc.cmd);
+		}
+
+		TEST_ASSERT(!strcmp((const char *)uc.args[0], "hello") &&
+			    uc.args[1] == stage,
+			    "Stage %d: Unexpected register values vmexit, got %lx",
+			    stage, (ulong)uc.args[1]);
+
+		/* Reset kvmclock triggering TSC page update */
+		if (stage == 7 || stage == 8 || stage == 10) {
+			struct kvm_clock_data clock = {0};
+
+			vm_ioctl(vm, KVM_SET_CLOCK, &clock);
+		}
+	}
+
+out:
+	kvm_vm_free(vm);
+}
diff --git a/tools/testing/selftests/kvm/x86_64/hyperv_cpuid.c b/tools/testing/selftests/kvm/x86_64/hyperv_cpuid.c
index 443a2b54645b..e804eb08dff9 100644
--- a/tools/testing/selftests/kvm/x86_64/hyperv_cpuid.c
+++ b/tools/testing/selftests/kvm/x86_64/hyperv_cpuid.c
@@ -20,24 +20,22 @@
 #include "processor.h"
 #include "vmx.h"
 
-#define VCPU_ID 0
-
 static void guest_code(void)
 {
 }
 
-static int smt_possible(void)
+static bool smt_possible(void)
 {
 	char buf[16];
 	FILE *f;
-	bool res = 1;
+	bool res = true;
 
 	f = fopen("/sys/devices/system/cpu/smt/control", "r");
 	if (f) {
 		if (fread(buf, sizeof(*buf), sizeof(buf), f) > 0) {
 			if (!strncmp(buf, "forceoff", 8) ||
 			    !strncmp(buf, "notsupported", 12))
-				res = 0;
+				res = false;
 		}
 		fclose(f);
 	}
@@ -45,28 +43,24 @@ static int smt_possible(void)
 	return res;
 }
 
-static void test_hv_cpuid(struct kvm_cpuid2 *hv_cpuid_entries,
-			  int evmcs_enabled)
+static void test_hv_cpuid(const struct kvm_cpuid2 *hv_cpuid_entries,
+			  bool evmcs_expected)
 {
 	int i;
+	int nent_expected = 10;
+	u32 test_val;
 
-	if (!evmcs_enabled)
-		TEST_ASSERT(hv_cpuid_entries->nent == 6,
-			    "KVM_GET_SUPPORTED_HV_CPUID should return 6 entries"
-			    " when Enlightened VMCS is disabled (returned %d)",
-			    hv_cpuid_entries->nent);
-	else
-		TEST_ASSERT(hv_cpuid_entries->nent == 7,
-			    "KVM_GET_SUPPORTED_HV_CPUID should return 7 entries"
-			    " when Enlightened VMCS is enabled (returned %d)",
-			    hv_cpuid_entries->nent);
+	TEST_ASSERT(hv_cpuid_entries->nent == nent_expected,
+		    "KVM_GET_SUPPORTED_HV_CPUID should return %d entries"
+		    " (returned %d)",
+		    nent_expected, hv_cpuid_entries->nent);
 
 	for (i = 0; i < hv_cpuid_entries->nent; i++) {
-		struct kvm_cpuid_entry2 *entry = &hv_cpuid_entries->entries[i];
+		const struct kvm_cpuid_entry2 *entry = &hv_cpuid_entries->entries[i];
 
 		TEST_ASSERT((entry->function >= 0x40000000) &&
-			    (entry->function <= 0x4000000A),
-			    "function %lx is our of supported range",
+			    (entry->function <= 0x40000082),
+			    "function %x is our of supported range",
 			    entry->function);
 
 		TEST_ASSERT(entry->index == 0,
@@ -78,14 +72,37 @@ static void test_hv_cpuid(struct kvm_cpuid2 *hv_cpuid_entries,
 		TEST_ASSERT(!entry->padding[0] && !entry->padding[1] &&
 			    !entry->padding[2], "padding should be zero");
 
-		if (entry->function == 0x40000004) {
-			int nononarchcs = !!(entry->eax & (1UL << 18));
+		switch (entry->function) {
+		case 0x40000000:
+			test_val = 0x40000082;
+
+			TEST_ASSERT(entry->eax == test_val,
+				    "Wrong max leaf report in 0x40000000.EAX: %x"
+				    " (evmcs=%d)",
+				    entry->eax, evmcs_expected
+				);
+			break;
+		case 0x40000004:
+			test_val = entry->eax & (1UL << 18);
 
-			TEST_ASSERT(nononarchcs == !smt_possible(),
+			TEST_ASSERT(!!test_val == !smt_possible(),
 				    "NoNonArchitecturalCoreSharing bit"
 				    " doesn't reflect SMT setting");
-		}
+			break;
+		case 0x4000000A:
+			TEST_ASSERT(entry->eax & (1UL << 19),
+				    "Enlightened MSR-Bitmap should always be supported"
+				    " 0x40000000.EAX: %x", entry->eax);
+			if (evmcs_expected)
+				TEST_ASSERT((entry->eax & 0xffff) == 0x101,
+				    "Supported Enlightened VMCS version range is supposed to be 1:1"
+				    " 0x40000000.EAX: %x", entry->eax);
+
+			break;
+		default:
+			break;
 
+		}
 		/*
 		 * If needed for debug:
 		 * fprintf(stdout,
@@ -94,88 +111,66 @@ static void test_hv_cpuid(struct kvm_cpuid2 *hv_cpuid_entries,
 		 *	entry->edx);
 		 */
 	}
-
 }
 
-void test_hv_cpuid_e2big(struct kvm_vm *vm)
+void test_hv_cpuid_e2big(struct kvm_vm *vm, struct kvm_vcpu *vcpu)
 {
 	static struct kvm_cpuid2 cpuid = {.nent = 0};
 	int ret;
 
-	ret = _vcpu_ioctl(vm, VCPU_ID, KVM_GET_SUPPORTED_HV_CPUID, &cpuid);
+	if (vcpu)
+		ret = __vcpu_ioctl(vcpu, KVM_GET_SUPPORTED_HV_CPUID, &cpuid);
+	else
+		ret = __kvm_ioctl(vm->kvm_fd, KVM_GET_SUPPORTED_HV_CPUID, &cpuid);
 
 	TEST_ASSERT(ret == -1 && errno == E2BIG,
-		    "KVM_GET_SUPPORTED_HV_CPUID didn't fail with -E2BIG when"
-		    " it should have: %d %d", ret, errno);
-}
-
-
-struct kvm_cpuid2 *kvm_get_supported_hv_cpuid(struct kvm_vm *vm)
-{
-	int nent = 20; /* should be enough */
-	static struct kvm_cpuid2 *cpuid;
-
-	cpuid = malloc(sizeof(*cpuid) + nent * sizeof(struct kvm_cpuid_entry2));
-
-	if (!cpuid) {
-		perror("malloc");
-		abort();
-	}
-
-	cpuid->nent = nent;
-
-	vcpu_ioctl(vm, VCPU_ID, KVM_GET_SUPPORTED_HV_CPUID, cpuid);
-
-	return cpuid;
+		    "%s KVM_GET_SUPPORTED_HV_CPUID didn't fail with -E2BIG when"
+		    " it should have: %d %d", !vcpu ? "KVM" : "vCPU", ret, errno);
 }
 
-
 int main(int argc, char *argv[])
 {
 	struct kvm_vm *vm;
-	int rv;
-	struct kvm_cpuid2 *hv_cpuid_entries;
+	const struct kvm_cpuid2 *hv_cpuid_entries;
+	struct kvm_vcpu *vcpu;
 
 	/* Tell stdout not to buffer its content */
 	setbuf(stdout, NULL);
 
-	rv = kvm_check_cap(KVM_CAP_HYPERV_CPUID);
-	if (!rv) {
-		fprintf(stderr,
-			"KVM_CAP_HYPERV_CPUID not supported, skip test\n");
-		exit(KSFT_SKIP);
-	}
-
-	/* Create VM */
-	vm = vm_create_default(VCPU_ID, 0, guest_code);
-
-	test_hv_cpuid_e2big(vm);
+	TEST_REQUIRE(kvm_has_cap(KVM_CAP_HYPERV_CPUID));
 
-	hv_cpuid_entries = kvm_get_supported_hv_cpuid(vm);
-	if (!hv_cpuid_entries)
-		return 1;
+	vm = vm_create_with_one_vcpu(&vcpu, guest_code);
 
-	test_hv_cpuid(hv_cpuid_entries, 0);
+	/* Test vCPU ioctl version */
+	test_hv_cpuid_e2big(vm, vcpu);
 
-	free(hv_cpuid_entries);
+	hv_cpuid_entries = vcpu_get_supported_hv_cpuid(vcpu);
+	test_hv_cpuid(hv_cpuid_entries, false);
+	free((void *)hv_cpuid_entries);
 
-	if (!kvm_check_cap(KVM_CAP_HYPERV_ENLIGHTENED_VMCS)) {
-		fprintf(stderr,
-			"Enlightened VMCS is unsupported, skip related test\n");
-		goto vm_free;
+	if (!kvm_cpu_has(X86_FEATURE_VMX) ||
+	    !kvm_has_cap(KVM_CAP_HYPERV_ENLIGHTENED_VMCS)) {
+		print_skip("Enlightened VMCS is unsupported");
+		goto do_sys;
+	}
+	vcpu_enable_evmcs(vcpu);
+	hv_cpuid_entries = vcpu_get_supported_hv_cpuid(vcpu);
+	test_hv_cpuid(hv_cpuid_entries, true);
+	free((void *)hv_cpuid_entries);
+
+do_sys:
+	/* Test system ioctl version */
+	if (!kvm_has_cap(KVM_CAP_SYS_HYPERV_CPUID)) {
+		print_skip("KVM_CAP_SYS_HYPERV_CPUID not supported");
+		goto out;
 	}
 
-	vcpu_enable_evmcs(vm, VCPU_ID);
-
-	hv_cpuid_entries = kvm_get_supported_hv_cpuid(vm);
-	if (!hv_cpuid_entries)
-		return 1;
-
-	test_hv_cpuid(hv_cpuid_entries, 1);
+	test_hv_cpuid_e2big(vm, NULL);
 
-	free(hv_cpuid_entries);
+	hv_cpuid_entries = kvm_get_supported_hv_cpuid();
+	test_hv_cpuid(hv_cpuid_entries, kvm_cpu_has(X86_FEATURE_VMX));
 
-vm_free:
+out:
 	kvm_vm_free(vm);
 
 	return 0;
diff --git a/tools/testing/selftests/kvm/x86_64/hyperv_features.c b/tools/testing/selftests/kvm/x86_64/hyperv_features.c
new file mode 100644
index 000000000000..05b32e550a80
--- /dev/null
+++ b/tools/testing/selftests/kvm/x86_64/hyperv_features.c
@@ -0,0 +1,643 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Copyright (C) 2021, Red Hat, Inc.
+ *
+ * Tests for Hyper-V features enablement
+ */
+#include <asm/kvm_para.h>
+#include <linux/kvm_para.h>
+#include <stdint.h>
+
+#include "test_util.h"
+#include "kvm_util.h"
+#include "processor.h"
+#include "hyperv.h"
+
+#define LINUX_OS_ID ((u64)0x8100 << 48)
+
+static inline uint8_t hypercall(u64 control, vm_vaddr_t input_address,
+				vm_vaddr_t output_address, uint64_t *hv_status)
+{
+	uint8_t vector;
+
+	/* Note both the hypercall and the "asm safe" clobber r9-r11. */
+	asm volatile("mov %[output_address], %%r8\n\t"
+		     KVM_ASM_SAFE("vmcall")
+		     : "=a" (*hv_status),
+		       "+c" (control), "+d" (input_address),
+		       KVM_ASM_SAFE_OUTPUTS(vector)
+		     : [output_address] "r"(output_address),
+		       "a" (-EFAULT)
+		     : "cc", "memory", "r8", KVM_ASM_SAFE_CLOBBERS);
+	return vector;
+}
+
+struct msr_data {
+	uint32_t idx;
+	bool available;
+	bool write;
+	u64 write_val;
+};
+
+struct hcall_data {
+	uint64_t control;
+	uint64_t expect;
+	bool ud_expected;
+};
+
+static void guest_msr(struct msr_data *msr)
+{
+	uint64_t ignored;
+	uint8_t vector;
+
+	GUEST_ASSERT(msr->idx);
+
+	if (!msr->write)
+		vector = rdmsr_safe(msr->idx, &ignored);
+	else
+		vector = wrmsr_safe(msr->idx, msr->write_val);
+
+	if (msr->available)
+		GUEST_ASSERT_2(!vector, msr->idx, vector);
+	else
+		GUEST_ASSERT_2(vector == GP_VECTOR, msr->idx, vector);
+	GUEST_DONE();
+}
+
+static void guest_hcall(vm_vaddr_t pgs_gpa, struct hcall_data *hcall)
+{
+	u64 res, input, output;
+	uint8_t vector;
+
+	GUEST_ASSERT(hcall->control);
+
+	wrmsr(HV_X64_MSR_GUEST_OS_ID, LINUX_OS_ID);
+	wrmsr(HV_X64_MSR_HYPERCALL, pgs_gpa);
+
+	if (!(hcall->control & HV_HYPERCALL_FAST_BIT)) {
+		input = pgs_gpa;
+		output = pgs_gpa + 4096;
+	} else {
+		input = output = 0;
+	}
+
+	vector = hypercall(hcall->control, input, output, &res);
+	if (hcall->ud_expected) {
+		GUEST_ASSERT_2(vector == UD_VECTOR, hcall->control, vector);
+	} else {
+		GUEST_ASSERT_2(!vector, hcall->control, vector);
+		GUEST_ASSERT_2(res == hcall->expect, hcall->expect, res);
+	}
+
+	GUEST_DONE();
+}
+
+static void vcpu_reset_hv_cpuid(struct kvm_vcpu *vcpu)
+{
+	/*
+	 * Enable all supported Hyper-V features, then clear the leafs holding
+	 * the features that will be tested one by one.
+	 */
+	vcpu_set_hv_cpuid(vcpu);
+
+	vcpu_clear_cpuid_entry(vcpu, HYPERV_CPUID_FEATURES);
+	vcpu_clear_cpuid_entry(vcpu, HYPERV_CPUID_ENLIGHTMENT_INFO);
+	vcpu_clear_cpuid_entry(vcpu, HYPERV_CPUID_SYNDBG_PLATFORM_CAPABILITIES);
+}
+
+static void guest_test_msrs_access(void)
+{
+	struct kvm_cpuid2 *prev_cpuid = NULL;
+	struct kvm_cpuid_entry2 *feat, *dbg;
+	struct kvm_vcpu *vcpu;
+	struct kvm_run *run;
+	struct kvm_vm *vm;
+	struct ucall uc;
+	int stage = 0;
+	vm_vaddr_t msr_gva;
+	struct msr_data *msr;
+
+	while (true) {
+		vm = vm_create_with_one_vcpu(&vcpu, guest_msr);
+
+		msr_gva = vm_vaddr_alloc_page(vm);
+		memset(addr_gva2hva(vm, msr_gva), 0x0, getpagesize());
+		msr = addr_gva2hva(vm, msr_gva);
+
+		vcpu_args_set(vcpu, 1, msr_gva);
+		vcpu_enable_cap(vcpu, KVM_CAP_HYPERV_ENFORCE_CPUID, 1);
+
+		if (!prev_cpuid) {
+			vcpu_reset_hv_cpuid(vcpu);
+
+			prev_cpuid = allocate_kvm_cpuid2(vcpu->cpuid->nent);
+		} else {
+			vcpu_init_cpuid(vcpu, prev_cpuid);
+		}
+
+		feat = vcpu_get_cpuid_entry(vcpu, HYPERV_CPUID_FEATURES);
+		dbg = vcpu_get_cpuid_entry(vcpu, HYPERV_CPUID_SYNDBG_PLATFORM_CAPABILITIES);
+
+		vm_init_descriptor_tables(vm);
+		vcpu_init_descriptor_tables(vcpu);
+
+		run = vcpu->run;
+
+		/* TODO: Make this entire test easier to maintain. */
+		if (stage >= 21)
+			vcpu_enable_cap(vcpu, KVM_CAP_HYPERV_SYNIC2, 0);
+
+		switch (stage) {
+		case 0:
+			/*
+			 * Only available when Hyper-V identification is set
+			 */
+			msr->idx = HV_X64_MSR_GUEST_OS_ID;
+			msr->write = 0;
+			msr->available = 0;
+			break;
+		case 1:
+			msr->idx = HV_X64_MSR_HYPERCALL;
+			msr->write = 0;
+			msr->available = 0;
+			break;
+		case 2:
+			feat->eax |= HV_MSR_HYPERCALL_AVAILABLE;
+			/*
+			 * HV_X64_MSR_GUEST_OS_ID has to be written first to make
+			 * HV_X64_MSR_HYPERCALL available.
+			 */
+			msr->idx = HV_X64_MSR_GUEST_OS_ID;
+			msr->write = 1;
+			msr->write_val = LINUX_OS_ID;
+			msr->available = 1;
+			break;
+		case 3:
+			msr->idx = HV_X64_MSR_GUEST_OS_ID;
+			msr->write = 0;
+			msr->available = 1;
+			break;
+		case 4:
+			msr->idx = HV_X64_MSR_HYPERCALL;
+			msr->write = 0;
+			msr->available = 1;
+			break;
+
+		case 5:
+			msr->idx = HV_X64_MSR_VP_RUNTIME;
+			msr->write = 0;
+			msr->available = 0;
+			break;
+		case 6:
+			feat->eax |= HV_MSR_VP_RUNTIME_AVAILABLE;
+			msr->idx = HV_X64_MSR_VP_RUNTIME;
+			msr->write = 0;
+			msr->available = 1;
+			break;
+		case 7:
+			/* Read only */
+			msr->idx = HV_X64_MSR_VP_RUNTIME;
+			msr->write = 1;
+			msr->write_val = 1;
+			msr->available = 0;
+			break;
+
+		case 8:
+			msr->idx = HV_X64_MSR_TIME_REF_COUNT;
+			msr->write = 0;
+			msr->available = 0;
+			break;
+		case 9:
+			feat->eax |= HV_MSR_TIME_REF_COUNT_AVAILABLE;
+			msr->idx = HV_X64_MSR_TIME_REF_COUNT;
+			msr->write = 0;
+			msr->available = 1;
+			break;
+		case 10:
+			/* Read only */
+			msr->idx = HV_X64_MSR_TIME_REF_COUNT;
+			msr->write = 1;
+			msr->write_val = 1;
+			msr->available = 0;
+			break;
+
+		case 11:
+			msr->idx = HV_X64_MSR_VP_INDEX;
+			msr->write = 0;
+			msr->available = 0;
+			break;
+		case 12:
+			feat->eax |= HV_MSR_VP_INDEX_AVAILABLE;
+			msr->idx = HV_X64_MSR_VP_INDEX;
+			msr->write = 0;
+			msr->available = 1;
+			break;
+		case 13:
+			/* Read only */
+			msr->idx = HV_X64_MSR_VP_INDEX;
+			msr->write = 1;
+			msr->write_val = 1;
+			msr->available = 0;
+			break;
+
+		case 14:
+			msr->idx = HV_X64_MSR_RESET;
+			msr->write = 0;
+			msr->available = 0;
+			break;
+		case 15:
+			feat->eax |= HV_MSR_RESET_AVAILABLE;
+			msr->idx = HV_X64_MSR_RESET;
+			msr->write = 0;
+			msr->available = 1;
+			break;
+		case 16:
+			msr->idx = HV_X64_MSR_RESET;
+			msr->write = 1;
+			msr->write_val = 0;
+			msr->available = 1;
+			break;
+
+		case 17:
+			msr->idx = HV_X64_MSR_REFERENCE_TSC;
+			msr->write = 0;
+			msr->available = 0;
+			break;
+		case 18:
+			feat->eax |= HV_MSR_REFERENCE_TSC_AVAILABLE;
+			msr->idx = HV_X64_MSR_REFERENCE_TSC;
+			msr->write = 0;
+			msr->available = 1;
+			break;
+		case 19:
+			msr->idx = HV_X64_MSR_REFERENCE_TSC;
+			msr->write = 1;
+			msr->write_val = 0;
+			msr->available = 1;
+			break;
+
+		case 20:
+			msr->idx = HV_X64_MSR_EOM;
+			msr->write = 0;
+			msr->available = 0;
+			break;
+		case 21:
+			/*
+			 * Remains unavailable even with KVM_CAP_HYPERV_SYNIC2
+			 * capability enabled and guest visible CPUID bit unset.
+			 */
+			msr->idx = HV_X64_MSR_EOM;
+			msr->write = 0;
+			msr->available = 0;
+			break;
+		case 22:
+			feat->eax |= HV_MSR_SYNIC_AVAILABLE;
+			msr->idx = HV_X64_MSR_EOM;
+			msr->write = 0;
+			msr->available = 1;
+			break;
+		case 23:
+			msr->idx = HV_X64_MSR_EOM;
+			msr->write = 1;
+			msr->write_val = 0;
+			msr->available = 1;
+			break;
+
+		case 24:
+			msr->idx = HV_X64_MSR_STIMER0_CONFIG;
+			msr->write = 0;
+			msr->available = 0;
+			break;
+		case 25:
+			feat->eax |= HV_MSR_SYNTIMER_AVAILABLE;
+			msr->idx = HV_X64_MSR_STIMER0_CONFIG;
+			msr->write = 0;
+			msr->available = 1;
+			break;
+		case 26:
+			msr->idx = HV_X64_MSR_STIMER0_CONFIG;
+			msr->write = 1;
+			msr->write_val = 0;
+			msr->available = 1;
+			break;
+		case 27:
+			/* Direct mode test */
+			msr->idx = HV_X64_MSR_STIMER0_CONFIG;
+			msr->write = 1;
+			msr->write_val = 1 << 12;
+			msr->available = 0;
+			break;
+		case 28:
+			feat->edx |= HV_STIMER_DIRECT_MODE_AVAILABLE;
+			msr->idx = HV_X64_MSR_STIMER0_CONFIG;
+			msr->write = 1;
+			msr->write_val = 1 << 12;
+			msr->available = 1;
+			break;
+
+		case 29:
+			msr->idx = HV_X64_MSR_EOI;
+			msr->write = 0;
+			msr->available = 0;
+			break;
+		case 30:
+			feat->eax |= HV_MSR_APIC_ACCESS_AVAILABLE;
+			msr->idx = HV_X64_MSR_EOI;
+			msr->write = 1;
+			msr->write_val = 1;
+			msr->available = 1;
+			break;
+
+		case 31:
+			msr->idx = HV_X64_MSR_TSC_FREQUENCY;
+			msr->write = 0;
+			msr->available = 0;
+			break;
+		case 32:
+			feat->eax |= HV_ACCESS_FREQUENCY_MSRS;
+			msr->idx = HV_X64_MSR_TSC_FREQUENCY;
+			msr->write = 0;
+			msr->available = 1;
+			break;
+		case 33:
+			/* Read only */
+			msr->idx = HV_X64_MSR_TSC_FREQUENCY;
+			msr->write = 1;
+			msr->write_val = 1;
+			msr->available = 0;
+			break;
+
+		case 34:
+			msr->idx = HV_X64_MSR_REENLIGHTENMENT_CONTROL;
+			msr->write = 0;
+			msr->available = 0;
+			break;
+		case 35:
+			feat->eax |= HV_ACCESS_REENLIGHTENMENT;
+			msr->idx = HV_X64_MSR_REENLIGHTENMENT_CONTROL;
+			msr->write = 0;
+			msr->available = 1;
+			break;
+		case 36:
+			msr->idx = HV_X64_MSR_REENLIGHTENMENT_CONTROL;
+			msr->write = 1;
+			msr->write_val = 1;
+			msr->available = 1;
+			break;
+		case 37:
+			/* Can only write '0' */
+			msr->idx = HV_X64_MSR_TSC_EMULATION_STATUS;
+			msr->write = 1;
+			msr->write_val = 1;
+			msr->available = 0;
+			break;
+
+		case 38:
+			msr->idx = HV_X64_MSR_CRASH_P0;
+			msr->write = 0;
+			msr->available = 0;
+			break;
+		case 39:
+			feat->edx |= HV_FEATURE_GUEST_CRASH_MSR_AVAILABLE;
+			msr->idx = HV_X64_MSR_CRASH_P0;
+			msr->write = 0;
+			msr->available = 1;
+			break;
+		case 40:
+			msr->idx = HV_X64_MSR_CRASH_P0;
+			msr->write = 1;
+			msr->write_val = 1;
+			msr->available = 1;
+			break;
+
+		case 41:
+			msr->idx = HV_X64_MSR_SYNDBG_STATUS;
+			msr->write = 0;
+			msr->available = 0;
+			break;
+		case 42:
+			feat->edx |= HV_FEATURE_DEBUG_MSRS_AVAILABLE;
+			dbg->eax |= HV_X64_SYNDBG_CAP_ALLOW_KERNEL_DEBUGGING;
+			msr->idx = HV_X64_MSR_SYNDBG_STATUS;
+			msr->write = 0;
+			msr->available = 1;
+			break;
+		case 43:
+			msr->idx = HV_X64_MSR_SYNDBG_STATUS;
+			msr->write = 1;
+			msr->write_val = 0;
+			msr->available = 1;
+			break;
+
+		case 44:
+			kvm_vm_free(vm);
+			return;
+		}
+
+		vcpu_set_cpuid(vcpu);
+
+		memcpy(prev_cpuid, vcpu->cpuid, kvm_cpuid2_size(vcpu->cpuid->nent));
+
+		pr_debug("Stage %d: testing msr: 0x%x for %s\n", stage,
+			 msr->idx, msr->write ? "write" : "read");
+
+		vcpu_run(vcpu);
+		TEST_ASSERT(run->exit_reason == KVM_EXIT_IO,
+			    "unexpected exit reason: %u (%s)",
+			    run->exit_reason, exit_reason_str(run->exit_reason));
+
+		switch (get_ucall(vcpu, &uc)) {
+		case UCALL_ABORT:
+			REPORT_GUEST_ASSERT_2(uc, "MSR = %lx, vector = %lx");
+			return;
+		case UCALL_DONE:
+			break;
+		default:
+			TEST_FAIL("Unhandled ucall: %ld", uc.cmd);
+			return;
+		}
+
+		stage++;
+		kvm_vm_free(vm);
+	}
+}
+
+static void guest_test_hcalls_access(void)
+{
+	struct kvm_cpuid_entry2 *feat, *recomm, *dbg;
+	struct kvm_cpuid2 *prev_cpuid = NULL;
+	struct kvm_vcpu *vcpu;
+	struct kvm_run *run;
+	struct kvm_vm *vm;
+	struct ucall uc;
+	int stage = 0;
+	vm_vaddr_t hcall_page, hcall_params;
+	struct hcall_data *hcall;
+
+	while (true) {
+		vm = vm_create_with_one_vcpu(&vcpu, guest_hcall);
+
+		vm_init_descriptor_tables(vm);
+		vcpu_init_descriptor_tables(vcpu);
+
+		/* Hypercall input/output */
+		hcall_page = vm_vaddr_alloc_pages(vm, 2);
+		memset(addr_gva2hva(vm, hcall_page), 0x0, 2 * getpagesize());
+
+		hcall_params = vm_vaddr_alloc_page(vm);
+		memset(addr_gva2hva(vm, hcall_params), 0x0, getpagesize());
+		hcall = addr_gva2hva(vm, hcall_params);
+
+		vcpu_args_set(vcpu, 2, addr_gva2gpa(vm, hcall_page), hcall_params);
+		vcpu_enable_cap(vcpu, KVM_CAP_HYPERV_ENFORCE_CPUID, 1);
+
+		if (!prev_cpuid) {
+			vcpu_reset_hv_cpuid(vcpu);
+
+			prev_cpuid = allocate_kvm_cpuid2(vcpu->cpuid->nent);
+		} else {
+			vcpu_init_cpuid(vcpu, prev_cpuid);
+		}
+
+		feat = vcpu_get_cpuid_entry(vcpu, HYPERV_CPUID_FEATURES);
+		recomm = vcpu_get_cpuid_entry(vcpu, HYPERV_CPUID_ENLIGHTMENT_INFO);
+		dbg = vcpu_get_cpuid_entry(vcpu, HYPERV_CPUID_SYNDBG_PLATFORM_CAPABILITIES);
+
+		run = vcpu->run;
+
+		switch (stage) {
+		case 0:
+			feat->eax |= HV_MSR_HYPERCALL_AVAILABLE;
+			hcall->control = 0xbeef;
+			hcall->expect = HV_STATUS_INVALID_HYPERCALL_CODE;
+			break;
+
+		case 1:
+			hcall->control = HVCALL_POST_MESSAGE;
+			hcall->expect = HV_STATUS_ACCESS_DENIED;
+			break;
+		case 2:
+			feat->ebx |= HV_POST_MESSAGES;
+			hcall->control = HVCALL_POST_MESSAGE;
+			hcall->expect = HV_STATUS_INVALID_HYPERCALL_INPUT;
+			break;
+
+		case 3:
+			hcall->control = HVCALL_SIGNAL_EVENT;
+			hcall->expect = HV_STATUS_ACCESS_DENIED;
+			break;
+		case 4:
+			feat->ebx |= HV_SIGNAL_EVENTS;
+			hcall->control = HVCALL_SIGNAL_EVENT;
+			hcall->expect = HV_STATUS_INVALID_HYPERCALL_INPUT;
+			break;
+
+		case 5:
+			hcall->control = HVCALL_RESET_DEBUG_SESSION;
+			hcall->expect = HV_STATUS_INVALID_HYPERCALL_CODE;
+			break;
+		case 6:
+			dbg->eax |= HV_X64_SYNDBG_CAP_ALLOW_KERNEL_DEBUGGING;
+			hcall->control = HVCALL_RESET_DEBUG_SESSION;
+			hcall->expect = HV_STATUS_ACCESS_DENIED;
+			break;
+		case 7:
+			feat->ebx |= HV_DEBUGGING;
+			hcall->control = HVCALL_RESET_DEBUG_SESSION;
+			hcall->expect = HV_STATUS_OPERATION_DENIED;
+			break;
+
+		case 8:
+			hcall->control = HVCALL_FLUSH_VIRTUAL_ADDRESS_SPACE;
+			hcall->expect = HV_STATUS_ACCESS_DENIED;
+			break;
+		case 9:
+			recomm->eax |= HV_X64_REMOTE_TLB_FLUSH_RECOMMENDED;
+			hcall->control = HVCALL_FLUSH_VIRTUAL_ADDRESS_SPACE;
+			hcall->expect = HV_STATUS_SUCCESS;
+			break;
+		case 10:
+			hcall->control = HVCALL_FLUSH_VIRTUAL_ADDRESS_SPACE_EX;
+			hcall->expect = HV_STATUS_ACCESS_DENIED;
+			break;
+		case 11:
+			recomm->eax |= HV_X64_EX_PROCESSOR_MASKS_RECOMMENDED;
+			hcall->control = HVCALL_FLUSH_VIRTUAL_ADDRESS_SPACE_EX;
+			hcall->expect = HV_STATUS_SUCCESS;
+			break;
+
+		case 12:
+			hcall->control = HVCALL_SEND_IPI;
+			hcall->expect = HV_STATUS_ACCESS_DENIED;
+			break;
+		case 13:
+			recomm->eax |= HV_X64_CLUSTER_IPI_RECOMMENDED;
+			hcall->control = HVCALL_SEND_IPI;
+			hcall->expect = HV_STATUS_INVALID_HYPERCALL_INPUT;
+			break;
+		case 14:
+			/* Nothing in 'sparse banks' -> success */
+			hcall->control = HVCALL_SEND_IPI_EX;
+			hcall->expect = HV_STATUS_SUCCESS;
+			break;
+
+		case 15:
+			hcall->control = HVCALL_NOTIFY_LONG_SPIN_WAIT;
+			hcall->expect = HV_STATUS_ACCESS_DENIED;
+			break;
+		case 16:
+			recomm->ebx = 0xfff;
+			hcall->control = HVCALL_NOTIFY_LONG_SPIN_WAIT;
+			hcall->expect = HV_STATUS_SUCCESS;
+			break;
+		case 17:
+			/* XMM fast hypercall */
+			hcall->control = HVCALL_FLUSH_VIRTUAL_ADDRESS_SPACE | HV_HYPERCALL_FAST_BIT;
+			hcall->ud_expected = true;
+			break;
+		case 18:
+			feat->edx |= HV_X64_HYPERCALL_XMM_INPUT_AVAILABLE;
+			hcall->control = HVCALL_FLUSH_VIRTUAL_ADDRESS_SPACE | HV_HYPERCALL_FAST_BIT;
+			hcall->ud_expected = false;
+			hcall->expect = HV_STATUS_SUCCESS;
+			break;
+		case 19:
+			kvm_vm_free(vm);
+			return;
+		}
+
+		vcpu_set_cpuid(vcpu);
+
+		memcpy(prev_cpuid, vcpu->cpuid, kvm_cpuid2_size(vcpu->cpuid->nent));
+
+		pr_debug("Stage %d: testing hcall: 0x%lx\n", stage, hcall->control);
+
+		vcpu_run(vcpu);
+		TEST_ASSERT(run->exit_reason == KVM_EXIT_IO,
+			    "unexpected exit reason: %u (%s)",
+			    run->exit_reason, exit_reason_str(run->exit_reason));
+
+		switch (get_ucall(vcpu, &uc)) {
+		case UCALL_ABORT:
+			REPORT_GUEST_ASSERT_2(uc, "arg1 = %lx, arg2 = %lx");
+			return;
+		case UCALL_DONE:
+			break;
+		default:
+			TEST_FAIL("Unhandled ucall: %ld", uc.cmd);
+			return;
+		}
+
+		stage++;
+		kvm_vm_free(vm);
+	}
+}
+
+int main(void)
+{
+	pr_info("Testing access to Hyper-V specific MSRs\n");
+	guest_test_msrs_access();
+
+	pr_info("Testing access to Hyper-V hypercalls\n");
+	guest_test_hcalls_access();
+}
diff --git a/tools/testing/selftests/kvm/x86_64/hyperv_svm_test.c b/tools/testing/selftests/kvm/x86_64/hyperv_svm_test.c
new file mode 100644
index 000000000000..a380ad7bb9b3
--- /dev/null
+++ b/tools/testing/selftests/kvm/x86_64/hyperv_svm_test.c
@@ -0,0 +1,167 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * KVM_GET/SET_* tests
+ *
+ * Copyright (C) 2022, Red Hat, Inc.
+ *
+ * Tests for Hyper-V extensions to SVM.
+ */
+#define _GNU_SOURCE /* for program_invocation_short_name */
+#include <fcntl.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <sys/ioctl.h>
+#include <linux/bitmap.h>
+
+#include "test_util.h"
+
+#include "kvm_util.h"
+#include "processor.h"
+#include "svm_util.h"
+#include "hyperv.h"
+
+#define L2_GUEST_STACK_SIZE 256
+
+struct hv_enlightenments {
+	struct __packed hv_enlightenments_control {
+		u32 nested_flush_hypercall:1;
+		u32 msr_bitmap:1;
+		u32 enlightened_npt_tlb: 1;
+		u32 reserved:29;
+	} __packed hv_enlightenments_control;
+	u32 hv_vp_id;
+	u64 hv_vm_id;
+	u64 partition_assist_page;
+	u64 reserved;
+} __packed;
+
+/*
+ * Hyper-V uses the software reserved clean bit in VMCB
+ */
+#define VMCB_HV_NESTED_ENLIGHTENMENTS (1U << 31)
+
+void l2_guest_code(void)
+{
+	GUEST_SYNC(3);
+	/* Exit to L1 */
+	vmmcall();
+
+	/* MSR-Bitmap tests */
+	rdmsr(MSR_FS_BASE); /* intercepted */
+	rdmsr(MSR_FS_BASE); /* intercepted */
+	rdmsr(MSR_GS_BASE); /* not intercepted */
+	vmmcall();
+	rdmsr(MSR_GS_BASE); /* intercepted */
+
+	GUEST_SYNC(5);
+
+	/* Done, exit to L1 and never come back.  */
+	vmmcall();
+}
+
+static void __attribute__((__flatten__)) guest_code(struct svm_test_data *svm)
+{
+	unsigned long l2_guest_stack[L2_GUEST_STACK_SIZE];
+	struct vmcb *vmcb = svm->vmcb;
+	struct hv_enlightenments *hve =
+		(struct hv_enlightenments *)vmcb->control.reserved_sw;
+
+	GUEST_SYNC(1);
+
+	wrmsr(HV_X64_MSR_GUEST_OS_ID, (u64)0x8100 << 48);
+
+	GUEST_ASSERT(svm->vmcb_gpa);
+	/* Prepare for L2 execution. */
+	generic_svm_setup(svm, l2_guest_code,
+			  &l2_guest_stack[L2_GUEST_STACK_SIZE]);
+
+	GUEST_SYNC(2);
+	run_guest(vmcb, svm->vmcb_gpa);
+	GUEST_ASSERT(vmcb->control.exit_code == SVM_EXIT_VMMCALL);
+	GUEST_SYNC(4);
+	vmcb->save.rip += 3;
+
+	/* Intercept RDMSR 0xc0000100 */
+	vmcb->control.intercept |= 1ULL << INTERCEPT_MSR_PROT;
+	set_bit(2 * (MSR_FS_BASE & 0x1fff), svm->msr + 0x800);
+	run_guest(vmcb, svm->vmcb_gpa);
+	GUEST_ASSERT(vmcb->control.exit_code == SVM_EXIT_MSR);
+	vmcb->save.rip += 2; /* rdmsr */
+
+	/* Enable enlightened MSR bitmap */
+	hve->hv_enlightenments_control.msr_bitmap = 1;
+	run_guest(vmcb, svm->vmcb_gpa);
+	GUEST_ASSERT(vmcb->control.exit_code == SVM_EXIT_MSR);
+	vmcb->save.rip += 2; /* rdmsr */
+
+	/* Intercept RDMSR 0xc0000101 without telling KVM about it */
+	set_bit(2 * (MSR_GS_BASE & 0x1fff), svm->msr + 0x800);
+	/* Make sure HV_VMX_ENLIGHTENED_CLEAN_FIELD_MSR_BITMAP is set */
+	vmcb->control.clean |= VMCB_HV_NESTED_ENLIGHTENMENTS;
+	run_guest(vmcb, svm->vmcb_gpa);
+	/* Make sure we don't see SVM_EXIT_MSR here so eMSR bitmap works */
+	GUEST_ASSERT(vmcb->control.exit_code == SVM_EXIT_VMMCALL);
+	vmcb->save.rip += 3; /* vmcall */
+
+	/* Now tell KVM we've changed MSR-Bitmap */
+	vmcb->control.clean &= ~VMCB_HV_NESTED_ENLIGHTENMENTS;
+	run_guest(vmcb, svm->vmcb_gpa);
+	GUEST_ASSERT(vmcb->control.exit_code == SVM_EXIT_MSR);
+	vmcb->save.rip += 2; /* rdmsr */
+
+	run_guest(vmcb, svm->vmcb_gpa);
+	GUEST_ASSERT(vmcb->control.exit_code == SVM_EXIT_VMMCALL);
+	GUEST_SYNC(6);
+
+	GUEST_DONE();
+}
+
+int main(int argc, char *argv[])
+{
+	vm_vaddr_t nested_gva = 0;
+
+	struct kvm_vcpu *vcpu;
+	struct kvm_vm *vm;
+	struct kvm_run *run;
+	struct ucall uc;
+	int stage;
+
+	TEST_REQUIRE(kvm_cpu_has(X86_FEATURE_SVM));
+
+	/* Create VM */
+	vm = vm_create_with_one_vcpu(&vcpu, guest_code);
+	vcpu_set_hv_cpuid(vcpu);
+	run = vcpu->run;
+	vcpu_alloc_svm(vm, &nested_gva);
+	vcpu_args_set(vcpu, 1, nested_gva);
+
+	for (stage = 1;; stage++) {
+		vcpu_run(vcpu);
+		TEST_ASSERT(run->exit_reason == KVM_EXIT_IO,
+			    "Stage %d: unexpected exit reason: %u (%s),\n",
+			    stage, run->exit_reason,
+			    exit_reason_str(run->exit_reason));
+
+		switch (get_ucall(vcpu, &uc)) {
+		case UCALL_ABORT:
+			REPORT_GUEST_ASSERT(uc);
+			/* NOT REACHED */
+		case UCALL_SYNC:
+			break;
+		case UCALL_DONE:
+			goto done;
+		default:
+			TEST_FAIL("Unknown ucall %lu", uc.cmd);
+		}
+
+		/* UCALL_SYNC is handled here.  */
+		TEST_ASSERT(!strcmp((const char *)uc.args[0], "hello") &&
+			    uc.args[1] == stage, "Stage %d: Unexpected register values vmexit, got %lx",
+			    stage, (ulong)uc.args[1]);
+
+	}
+
+done:
+	kvm_vm_free(vm);
+}
diff --git a/tools/testing/selftests/kvm/x86_64/kvm_clock_test.c b/tools/testing/selftests/kvm/x86_64/kvm_clock_test.c
new file mode 100644
index 000000000000..813ce282cf56
--- /dev/null
+++ b/tools/testing/selftests/kvm/x86_64/kvm_clock_test.c
@@ -0,0 +1,195 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Copyright (C) 2021, Google LLC.
+ *
+ * Tests for adjusting the KVM clock from userspace
+ */
+#include <asm/kvm_para.h>
+#include <asm/pvclock.h>
+#include <asm/pvclock-abi.h>
+#include <stdint.h>
+#include <string.h>
+#include <sys/stat.h>
+#include <time.h>
+
+#include "test_util.h"
+#include "kvm_util.h"
+#include "processor.h"
+
+struct test_case {
+	uint64_t kvmclock_base;
+	int64_t realtime_offset;
+};
+
+static struct test_case test_cases[] = {
+	{ .kvmclock_base = 0 },
+	{ .kvmclock_base = 180 * NSEC_PER_SEC },
+	{ .kvmclock_base = 0, .realtime_offset = -180 * NSEC_PER_SEC },
+	{ .kvmclock_base = 0, .realtime_offset = 180 * NSEC_PER_SEC },
+};
+
+#define GUEST_SYNC_CLOCK(__stage, __val)			\
+		GUEST_SYNC_ARGS(__stage, __val, 0, 0, 0)
+
+static void guest_main(vm_paddr_t pvti_pa, struct pvclock_vcpu_time_info *pvti)
+{
+	int i;
+
+	wrmsr(MSR_KVM_SYSTEM_TIME_NEW, pvti_pa | KVM_MSR_ENABLED);
+	for (i = 0; i < ARRAY_SIZE(test_cases); i++)
+		GUEST_SYNC_CLOCK(i, __pvclock_read_cycles(pvti, rdtsc()));
+}
+
+#define EXPECTED_FLAGS (KVM_CLOCK_REALTIME | KVM_CLOCK_HOST_TSC)
+
+static inline void assert_flags(struct kvm_clock_data *data)
+{
+	TEST_ASSERT((data->flags & EXPECTED_FLAGS) == EXPECTED_FLAGS,
+		    "unexpected clock data flags: %x (want set: %x)",
+		    data->flags, EXPECTED_FLAGS);
+}
+
+static void handle_sync(struct ucall *uc, struct kvm_clock_data *start,
+			struct kvm_clock_data *end)
+{
+	uint64_t obs, exp_lo, exp_hi;
+
+	obs = uc->args[2];
+	exp_lo = start->clock;
+	exp_hi = end->clock;
+
+	assert_flags(start);
+	assert_flags(end);
+
+	TEST_ASSERT(exp_lo <= obs && obs <= exp_hi,
+		    "unexpected kvm-clock value: %"PRIu64" expected range: [%"PRIu64", %"PRIu64"]",
+		    obs, exp_lo, exp_hi);
+
+	pr_info("kvm-clock value: %"PRIu64" expected range [%"PRIu64", %"PRIu64"]\n",
+		obs, exp_lo, exp_hi);
+}
+
+static void handle_abort(struct ucall *uc)
+{
+	REPORT_GUEST_ASSERT(*uc);
+}
+
+static void setup_clock(struct kvm_vm *vm, struct test_case *test_case)
+{
+	struct kvm_clock_data data;
+
+	memset(&data, 0, sizeof(data));
+
+	data.clock = test_case->kvmclock_base;
+	if (test_case->realtime_offset) {
+		struct timespec ts;
+		int r;
+
+		data.flags |= KVM_CLOCK_REALTIME;
+		do {
+			r = clock_gettime(CLOCK_REALTIME, &ts);
+			if (!r)
+				break;
+		} while (errno == EINTR);
+
+		TEST_ASSERT(!r, "clock_gettime() failed: %d\n", r);
+
+		data.realtime = ts.tv_sec * NSEC_PER_SEC;
+		data.realtime += ts.tv_nsec;
+		data.realtime += test_case->realtime_offset;
+	}
+
+	vm_ioctl(vm, KVM_SET_CLOCK, &data);
+}
+
+static void enter_guest(struct kvm_vcpu *vcpu)
+{
+	struct kvm_clock_data start, end;
+	struct kvm_run *run = vcpu->run;
+	struct kvm_vm *vm = vcpu->vm;
+	struct ucall uc;
+	int i;
+
+	for (i = 0; i < ARRAY_SIZE(test_cases); i++) {
+		setup_clock(vm, &test_cases[i]);
+
+		vm_ioctl(vm, KVM_GET_CLOCK, &start);
+
+		vcpu_run(vcpu);
+		vm_ioctl(vm, KVM_GET_CLOCK, &end);
+
+		TEST_ASSERT(run->exit_reason == KVM_EXIT_IO,
+			    "unexpected exit reason: %u (%s)",
+			    run->exit_reason, exit_reason_str(run->exit_reason));
+
+		switch (get_ucall(vcpu, &uc)) {
+		case UCALL_SYNC:
+			handle_sync(&uc, &start, &end);
+			break;
+		case UCALL_ABORT:
+			handle_abort(&uc);
+			return;
+		default:
+			TEST_ASSERT(0, "unhandled ucall: %ld\n", uc.cmd);
+		}
+	}
+}
+
+#define CLOCKSOURCE_PATH "/sys/devices/system/clocksource/clocksource0/current_clocksource"
+
+static void check_clocksource(void)
+{
+	char *clk_name;
+	struct stat st;
+	FILE *fp;
+
+	fp = fopen(CLOCKSOURCE_PATH, "r");
+	if (!fp) {
+		pr_info("failed to open clocksource file: %d; assuming TSC.\n",
+			errno);
+		return;
+	}
+
+	if (fstat(fileno(fp), &st)) {
+		pr_info("failed to stat clocksource file: %d; assuming TSC.\n",
+			errno);
+		goto out;
+	}
+
+	clk_name = malloc(st.st_size);
+	TEST_ASSERT(clk_name, "failed to allocate buffer to read file\n");
+
+	if (!fgets(clk_name, st.st_size, fp)) {
+		pr_info("failed to read clocksource file: %d; assuming TSC.\n",
+			ferror(fp));
+		goto out;
+	}
+
+	TEST_ASSERT(!strncmp(clk_name, "tsc\n", st.st_size),
+		    "clocksource not supported: %s", clk_name);
+out:
+	fclose(fp);
+}
+
+int main(void)
+{
+	struct kvm_vcpu *vcpu;
+	vm_vaddr_t pvti_gva;
+	vm_paddr_t pvti_gpa;
+	struct kvm_vm *vm;
+	int flags;
+
+	flags = kvm_check_cap(KVM_CAP_ADJUST_CLOCK);
+	TEST_REQUIRE(flags & KVM_CLOCK_REALTIME);
+
+	check_clocksource();
+
+	vm = vm_create_with_one_vcpu(&vcpu, guest_main);
+
+	pvti_gva = vm_vaddr_alloc(vm, getpagesize(), 0x10000);
+	pvti_gpa = addr_gva2gpa(vm, pvti_gva);
+	vcpu_args_set(vcpu, 2, pvti_gpa, pvti_gva);
+
+	enter_guest(vcpu);
+	kvm_vm_free(vm);
+}
diff --git a/tools/testing/selftests/kvm/x86_64/kvm_pv_test.c b/tools/testing/selftests/kvm/x86_64/kvm_pv_test.c
new file mode 100644
index 000000000000..619655c1a1f3
--- /dev/null
+++ b/tools/testing/selftests/kvm/x86_64/kvm_pv_test.c
@@ -0,0 +1,157 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Copyright (C) 2020, Google LLC.
+ *
+ * Tests for KVM paravirtual feature disablement
+ */
+#include <asm/kvm_para.h>
+#include <linux/kvm_para.h>
+#include <stdint.h>
+
+#include "test_util.h"
+#include "kvm_util.h"
+#include "processor.h"
+
+struct msr_data {
+	uint32_t idx;
+	const char *name;
+};
+
+#define TEST_MSR(msr) { .idx = msr, .name = #msr }
+#define UCALL_PR_MSR 0xdeadbeef
+#define PR_MSR(msr) ucall(UCALL_PR_MSR, 1, msr)
+
+/*
+ * KVM paravirtual msrs to test. Expect a #GP if any of these msrs are read or
+ * written, as the KVM_CPUID_FEATURES leaf is cleared.
+ */
+static struct msr_data msrs_to_test[] = {
+	TEST_MSR(MSR_KVM_SYSTEM_TIME),
+	TEST_MSR(MSR_KVM_SYSTEM_TIME_NEW),
+	TEST_MSR(MSR_KVM_WALL_CLOCK),
+	TEST_MSR(MSR_KVM_WALL_CLOCK_NEW),
+	TEST_MSR(MSR_KVM_ASYNC_PF_EN),
+	TEST_MSR(MSR_KVM_STEAL_TIME),
+	TEST_MSR(MSR_KVM_PV_EOI_EN),
+	TEST_MSR(MSR_KVM_POLL_CONTROL),
+	TEST_MSR(MSR_KVM_ASYNC_PF_INT),
+	TEST_MSR(MSR_KVM_ASYNC_PF_ACK),
+};
+
+static void test_msr(struct msr_data *msr)
+{
+	uint64_t ignored;
+	uint8_t vector;
+
+	PR_MSR(msr);
+
+	vector = rdmsr_safe(msr->idx, &ignored);
+	GUEST_ASSERT_1(vector == GP_VECTOR, vector);
+
+	vector = wrmsr_safe(msr->idx, 0);
+	GUEST_ASSERT_1(vector == GP_VECTOR, vector);
+}
+
+struct hcall_data {
+	uint64_t nr;
+	const char *name;
+};
+
+#define TEST_HCALL(hc) { .nr = hc, .name = #hc }
+#define UCALL_PR_HCALL 0xdeadc0de
+#define PR_HCALL(hc) ucall(UCALL_PR_HCALL, 1, hc)
+
+/*
+ * KVM hypercalls to test. Expect -KVM_ENOSYS when called, as the corresponding
+ * features have been cleared in KVM_CPUID_FEATURES.
+ */
+static struct hcall_data hcalls_to_test[] = {
+	TEST_HCALL(KVM_HC_KICK_CPU),
+	TEST_HCALL(KVM_HC_SEND_IPI),
+	TEST_HCALL(KVM_HC_SCHED_YIELD),
+};
+
+static void test_hcall(struct hcall_data *hc)
+{
+	uint64_t r;
+
+	PR_HCALL(hc);
+	r = kvm_hypercall(hc->nr, 0, 0, 0, 0);
+	GUEST_ASSERT(r == -KVM_ENOSYS);
+}
+
+static void guest_main(void)
+{
+	int i;
+
+	for (i = 0; i < ARRAY_SIZE(msrs_to_test); i++) {
+		test_msr(&msrs_to_test[i]);
+	}
+
+	for (i = 0; i < ARRAY_SIZE(hcalls_to_test); i++) {
+		test_hcall(&hcalls_to_test[i]);
+	}
+
+	GUEST_DONE();
+}
+
+static void pr_msr(struct ucall *uc)
+{
+	struct msr_data *msr = (struct msr_data *)uc->args[0];
+
+	pr_info("testing msr: %s (%#x)\n", msr->name, msr->idx);
+}
+
+static void pr_hcall(struct ucall *uc)
+{
+	struct hcall_data *hc = (struct hcall_data *)uc->args[0];
+
+	pr_info("testing hcall: %s (%lu)\n", hc->name, hc->nr);
+}
+
+static void enter_guest(struct kvm_vcpu *vcpu)
+{
+	struct kvm_run *run = vcpu->run;
+	struct ucall uc;
+
+	while (true) {
+		vcpu_run(vcpu);
+		TEST_ASSERT(run->exit_reason == KVM_EXIT_IO,
+			    "unexpected exit reason: %u (%s)",
+			    run->exit_reason, exit_reason_str(run->exit_reason));
+
+		switch (get_ucall(vcpu, &uc)) {
+		case UCALL_PR_MSR:
+			pr_msr(&uc);
+			break;
+		case UCALL_PR_HCALL:
+			pr_hcall(&uc);
+			break;
+		case UCALL_ABORT:
+			REPORT_GUEST_ASSERT_1(uc, "vector = %lu");
+			return;
+		case UCALL_DONE:
+			return;
+		}
+	}
+}
+
+int main(void)
+{
+	struct kvm_vcpu *vcpu;
+	struct kvm_vm *vm;
+
+	TEST_REQUIRE(kvm_has_cap(KVM_CAP_ENFORCE_PV_FEATURE_CPUID));
+
+	vm = vm_create_with_one_vcpu(&vcpu, guest_main);
+
+	vcpu_enable_cap(vcpu, KVM_CAP_ENFORCE_PV_FEATURE_CPUID, 1);
+
+	vcpu_clear_cpuid_entry(vcpu, KVM_CPUID_FEATURES);
+
+	vm_init_descriptor_tables(vm);
+	vcpu_init_descriptor_tables(vcpu);
+
+	enter_guest(vcpu);
+	kvm_vm_free(vm);
+}
diff --git a/tools/testing/selftests/kvm/x86_64/max_vcpuid_cap_test.c b/tools/testing/selftests/kvm/x86_64/max_vcpuid_cap_test.c
new file mode 100644
index 000000000000..3cc4b86832fe
--- /dev/null
+++ b/tools/testing/selftests/kvm/x86_64/max_vcpuid_cap_test.c
@@ -0,0 +1,44 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * maximum APIC ID capability tests
+ *
+ * Copyright (C) 2022, Intel, Inc.
+ *
+ * Tests for getting/setting maximum APIC ID capability
+ */
+
+#include "kvm_util.h"
+
+#define MAX_VCPU_ID	2
+
+int main(int argc, char *argv[])
+{
+	struct kvm_vm *vm;
+	int ret;
+
+	vm = vm_create_barebones();
+
+	/* Get KVM_CAP_MAX_VCPU_ID cap supported in KVM */
+	ret = vm_check_cap(vm, KVM_CAP_MAX_VCPU_ID);
+
+	/* Try to set KVM_CAP_MAX_VCPU_ID beyond KVM cap */
+	ret = __vm_enable_cap(vm, KVM_CAP_MAX_VCPU_ID, ret + 1);
+	TEST_ASSERT(ret < 0,
+		    "Setting KVM_CAP_MAX_VCPU_ID beyond KVM cap should fail");
+
+	/* Set KVM_CAP_MAX_VCPU_ID */
+	vm_enable_cap(vm, KVM_CAP_MAX_VCPU_ID, MAX_VCPU_ID);
+
+
+	/* Try to set KVM_CAP_MAX_VCPU_ID again */
+	ret = __vm_enable_cap(vm, KVM_CAP_MAX_VCPU_ID, MAX_VCPU_ID + 1);
+	TEST_ASSERT(ret < 0,
+		    "Setting KVM_CAP_MAX_VCPU_ID multiple times should fail");
+
+	/* Create vCPU with id beyond KVM_CAP_MAX_VCPU_ID cap*/
+	ret = __vm_ioctl(vm, KVM_CREATE_VCPU, (void *)MAX_VCPU_ID);
+	TEST_ASSERT(ret < 0, "Creating vCPU with ID > MAX_VCPU_ID should fail");
+
+	kvm_vm_free(vm);
+	return 0;
+}
diff --git a/tools/testing/selftests/kvm/x86_64/mmio_warning_test.c b/tools/testing/selftests/kvm/x86_64/mmio_warning_test.c
index 00bb97d76000..fb02581953a3 100644
--- a/tools/testing/selftests/kvm/x86_64/mmio_warning_test.c
+++ b/tools/testing/selftests/kvm/x86_64/mmio_warning_test.c
@@ -44,7 +44,7 @@ void *thr(void *arg)
 	struct kvm_run *run = tc->run;
 
 	res = ioctl(kvmcpu, KVM_RUN, 0);
-	printf("ret1=%d exit_reason=%d suberror=%d\n",
+	pr_info("ret1=%d exit_reason=%d suberror=%d\n",
 		res, run->exit_reason, run->internal.suberror);
 
 	return 0;
@@ -59,10 +59,10 @@ void test(void)
 
 	kvm = open("/dev/kvm", O_RDWR);
 	TEST_ASSERT(kvm != -1, "failed to open /dev/kvm");
-	kvmvm = ioctl(kvm, KVM_CREATE_VM, 0);
-	TEST_ASSERT(kvmvm != -1, "KVM_CREATE_VM failed");
+	kvmvm = __kvm_ioctl(kvm, KVM_CREATE_VM, NULL);
+	TEST_ASSERT(kvmvm > 0, KVM_IOCTL_ERROR(KVM_CREATE_VM, kvmvm));
 	kvmcpu = ioctl(kvmvm, KVM_CREATE_VCPU, 0);
-	TEST_ASSERT(kvmcpu != -1, "KVM_CREATE_VCPU failed");
+	TEST_ASSERT(kvmcpu != -1, KVM_IOCTL_ERROR(KVM_CREATE_VCPU, kvmcpu));
 	run = (struct kvm_run *)mmap(0, 4096, PROT_READ|PROT_WRITE, MAP_SHARED,
 				    kvmcpu, 0);
 	tc.kvmcpu = kvmcpu;
@@ -82,8 +82,9 @@ int get_warnings_count(void)
 	FILE *f;
 
 	f = popen("dmesg | grep \"WARNING:\" | wc -l", "r");
-	fscanf(f, "%d", &warnings);
-	fclose(f);
+	if (fscanf(f, "%d", &warnings) < 1)
+		warnings = 0;
+	pclose(f);
 
 	return warnings;
 }
@@ -92,15 +93,9 @@ int main(void)
 {
 	int warnings_before, warnings_after;
 
-	if (!is_intel_cpu()) {
-		printf("Must be run on an Intel CPU, skipping test\n");
-		exit(KSFT_SKIP);
-	}
+	TEST_REQUIRE(is_intel_cpu());
 
-	if (vm_is_unrestricted_guest(NULL)) {
-		printf("Unrestricted guest must be disabled, skipping test\n");
-		exit(KSFT_SKIP);
-	}
+	TEST_REQUIRE(!vm_is_unrestricted_guest(NULL));
 
 	warnings_before = get_warnings_count();
 
diff --git a/tools/testing/selftests/kvm/x86_64/monitor_mwait_test.c b/tools/testing/selftests/kvm/x86_64/monitor_mwait_test.c
new file mode 100644
index 000000000000..016070cad36e
--- /dev/null
+++ b/tools/testing/selftests/kvm/x86_64/monitor_mwait_test.c
@@ -0,0 +1,131 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <fcntl.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <sys/ioctl.h>
+
+#include "kvm_util.h"
+#include "processor.h"
+
+#define CPUID_MWAIT (1u << 3)
+
+enum monitor_mwait_testcases {
+	MWAIT_QUIRK_DISABLED = BIT(0),
+	MISC_ENABLES_QUIRK_DISABLED = BIT(1),
+	MWAIT_DISABLED = BIT(2),
+};
+
+static void guest_monitor_wait(int testcase)
+{
+	/*
+	 * If both MWAIT and its quirk are disabled, MONITOR/MWAIT should #UD,
+	 * in all other scenarios KVM should emulate them as nops.
+	 */
+	bool fault_wanted = (testcase & MWAIT_QUIRK_DISABLED) &&
+			    (testcase & MWAIT_DISABLED);
+	u8 vector;
+
+	GUEST_SYNC(testcase);
+
+	/*
+	 * Arbitrarily MONITOR this function, SVM performs fault checks before
+	 * intercept checks, so the inputs for MONITOR and MWAIT must be valid.
+	 */
+	vector = kvm_asm_safe("monitor", "a"(guest_monitor_wait), "c"(0), "d"(0));
+	if (fault_wanted)
+		GUEST_ASSERT_2(vector == UD_VECTOR, testcase, vector);
+	else
+		GUEST_ASSERT_2(!vector, testcase, vector);
+
+	vector = kvm_asm_safe("mwait", "a"(guest_monitor_wait), "c"(0), "d"(0));
+	if (fault_wanted)
+		GUEST_ASSERT_2(vector == UD_VECTOR, testcase, vector);
+	else
+		GUEST_ASSERT_2(!vector, testcase, vector);
+}
+
+static void guest_code(void)
+{
+	guest_monitor_wait(MWAIT_DISABLED);
+
+	guest_monitor_wait(MWAIT_QUIRK_DISABLED | MWAIT_DISABLED);
+
+	guest_monitor_wait(MISC_ENABLES_QUIRK_DISABLED | MWAIT_DISABLED);
+	guest_monitor_wait(MISC_ENABLES_QUIRK_DISABLED);
+
+	guest_monitor_wait(MISC_ENABLES_QUIRK_DISABLED | MWAIT_QUIRK_DISABLED | MWAIT_DISABLED);
+	guest_monitor_wait(MISC_ENABLES_QUIRK_DISABLED | MWAIT_QUIRK_DISABLED);
+
+	GUEST_DONE();
+}
+
+int main(int argc, char *argv[])
+{
+	uint64_t disabled_quirks;
+	struct kvm_vcpu *vcpu;
+	struct kvm_run *run;
+	struct kvm_vm *vm;
+	struct ucall uc;
+	int testcase;
+
+	TEST_REQUIRE(kvm_has_cap(KVM_CAP_DISABLE_QUIRKS2));
+
+	vm = vm_create_with_one_vcpu(&vcpu, guest_code);
+	vcpu_clear_cpuid_feature(vcpu, X86_FEATURE_MWAIT);
+
+	run = vcpu->run;
+
+	vm_init_descriptor_tables(vm);
+	vcpu_init_descriptor_tables(vcpu);
+
+	while (1) {
+		vcpu_run(vcpu);
+
+		TEST_ASSERT(run->exit_reason == KVM_EXIT_IO,
+			    "Unexpected exit reason: %u (%s),\n",
+			    run->exit_reason,
+			    exit_reason_str(run->exit_reason));
+
+		switch (get_ucall(vcpu, &uc)) {
+		case UCALL_SYNC:
+			testcase = uc.args[1];
+			break;
+		case UCALL_ABORT:
+			REPORT_GUEST_ASSERT_2(uc, "testcase = %lx, vector = %ld");
+			goto done;
+		case UCALL_DONE:
+			goto done;
+		default:
+			TEST_FAIL("Unknown ucall %lu", uc.cmd);
+			goto done;
+		}
+
+		disabled_quirks = 0;
+		if (testcase & MWAIT_QUIRK_DISABLED)
+			disabled_quirks |= KVM_X86_QUIRK_MWAIT_NEVER_UD_FAULTS;
+		if (testcase & MISC_ENABLES_QUIRK_DISABLED)
+			disabled_quirks |= KVM_X86_QUIRK_MISC_ENABLE_NO_MWAIT;
+		vm_enable_cap(vm, KVM_CAP_DISABLE_QUIRKS2, disabled_quirks);
+
+		/*
+		 * If the MISC_ENABLES quirk (KVM neglects to update CPUID to
+		 * enable/disable MWAIT) is disabled, toggle the ENABLE_MWAIT
+		 * bit in MISC_ENABLES accordingly.  If the quirk is enabled,
+		 * the only valid configuration is MWAIT disabled, as CPUID
+		 * can't be manually changed after running the vCPU.
+		 */
+		if (!(testcase & MISC_ENABLES_QUIRK_DISABLED)) {
+			TEST_ASSERT(testcase & MWAIT_DISABLED,
+				    "Can't toggle CPUID features after running vCPU");
+			continue;
+		}
+
+		vcpu_set_msr(vcpu, MSR_IA32_MISC_ENABLE,
+			     (testcase & MWAIT_DISABLED) ? 0 : MSR_IA32_MISC_ENABLE_MWAIT);
+	}
+
+done:
+	kvm_vm_free(vm);
+	return 0;
+}
diff --git a/tools/testing/selftests/kvm/x86_64/nested_exceptions_test.c b/tools/testing/selftests/kvm/x86_64/nested_exceptions_test.c
new file mode 100644
index 000000000000..ac33835f78f4
--- /dev/null
+++ b/tools/testing/selftests/kvm/x86_64/nested_exceptions_test.c
@@ -0,0 +1,295 @@
+// SPDX-License-Identifier: GPL-2.0-only
+#define _GNU_SOURCE /* for program_invocation_short_name */
+
+#include "test_util.h"
+#include "kvm_util.h"
+#include "processor.h"
+#include "vmx.h"
+#include "svm_util.h"
+
+#define L2_GUEST_STACK_SIZE 256
+
+/*
+ * Arbitrary, never shoved into KVM/hardware, just need to avoid conflict with
+ * the "real" exceptions used, #SS/#GP/#DF (12/13/8).
+ */
+#define FAKE_TRIPLE_FAULT_VECTOR	0xaa
+
+/* Arbitrary 32-bit error code injected by this test. */
+#define SS_ERROR_CODE 0xdeadbeef
+
+/*
+ * Bit '0' is set on Intel if the exception occurs while delivering a previous
+ * event/exception.  AMD's wording is ambiguous, but presumably the bit is set
+ * if the exception occurs while delivering an external event, e.g. NMI or INTR,
+ * but not for exceptions that occur when delivering other exceptions or
+ * software interrupts.
+ *
+ * Note, Intel's name for it, "External event", is misleading and much more
+ * aligned with AMD's behavior, but the SDM is quite clear on its behavior.
+ */
+#define ERROR_CODE_EXT_FLAG	BIT(0)
+
+/*
+ * Bit '1' is set if the fault occurred when looking up a descriptor in the
+ * IDT, which is the case here as the IDT is empty/NULL.
+ */
+#define ERROR_CODE_IDT_FLAG	BIT(1)
+
+/*
+ * The #GP that occurs when vectoring #SS should show the index into the IDT
+ * for #SS, plus have the "IDT flag" set.
+ */
+#define GP_ERROR_CODE_AMD ((SS_VECTOR * 8) | ERROR_CODE_IDT_FLAG)
+#define GP_ERROR_CODE_INTEL ((SS_VECTOR * 8) | ERROR_CODE_IDT_FLAG | ERROR_CODE_EXT_FLAG)
+
+/*
+ * Intel and AMD both shove '0' into the error code on #DF, regardless of what
+ * led to the double fault.
+ */
+#define DF_ERROR_CODE 0
+
+#define INTERCEPT_SS		(BIT_ULL(SS_VECTOR))
+#define INTERCEPT_SS_DF		(INTERCEPT_SS | BIT_ULL(DF_VECTOR))
+#define INTERCEPT_SS_GP_DF	(INTERCEPT_SS_DF | BIT_ULL(GP_VECTOR))
+
+static void l2_ss_pending_test(void)
+{
+	GUEST_SYNC(SS_VECTOR);
+}
+
+static void l2_ss_injected_gp_test(void)
+{
+	GUEST_SYNC(GP_VECTOR);
+}
+
+static void l2_ss_injected_df_test(void)
+{
+	GUEST_SYNC(DF_VECTOR);
+}
+
+static void l2_ss_injected_tf_test(void)
+{
+	GUEST_SYNC(FAKE_TRIPLE_FAULT_VECTOR);
+}
+
+static void svm_run_l2(struct svm_test_data *svm, void *l2_code, int vector,
+		       uint32_t error_code)
+{
+	struct vmcb *vmcb = svm->vmcb;
+	struct vmcb_control_area *ctrl = &vmcb->control;
+
+	vmcb->save.rip = (u64)l2_code;
+	run_guest(vmcb, svm->vmcb_gpa);
+
+	if (vector == FAKE_TRIPLE_FAULT_VECTOR)
+		return;
+
+	GUEST_ASSERT_EQ(ctrl->exit_code, (SVM_EXIT_EXCP_BASE + vector));
+	GUEST_ASSERT_EQ(ctrl->exit_info_1, error_code);
+}
+
+static void l1_svm_code(struct svm_test_data *svm)
+{
+	struct vmcb_control_area *ctrl = &svm->vmcb->control;
+	unsigned long l2_guest_stack[L2_GUEST_STACK_SIZE];
+
+	generic_svm_setup(svm, NULL, &l2_guest_stack[L2_GUEST_STACK_SIZE]);
+	svm->vmcb->save.idtr.limit = 0;
+	ctrl->intercept |= BIT_ULL(INTERCEPT_SHUTDOWN);
+
+	ctrl->intercept_exceptions = INTERCEPT_SS_GP_DF;
+	svm_run_l2(svm, l2_ss_pending_test, SS_VECTOR, SS_ERROR_CODE);
+	svm_run_l2(svm, l2_ss_injected_gp_test, GP_VECTOR, GP_ERROR_CODE_AMD);
+
+	ctrl->intercept_exceptions = INTERCEPT_SS_DF;
+	svm_run_l2(svm, l2_ss_injected_df_test, DF_VECTOR, DF_ERROR_CODE);
+
+	ctrl->intercept_exceptions = INTERCEPT_SS;
+	svm_run_l2(svm, l2_ss_injected_tf_test, FAKE_TRIPLE_FAULT_VECTOR, 0);
+	GUEST_ASSERT_EQ(ctrl->exit_code, SVM_EXIT_SHUTDOWN);
+
+	GUEST_DONE();
+}
+
+static void vmx_run_l2(void *l2_code, int vector, uint32_t error_code)
+{
+	GUEST_ASSERT(!vmwrite(GUEST_RIP, (u64)l2_code));
+
+	GUEST_ASSERT_EQ(vector == SS_VECTOR ? vmlaunch() : vmresume(), 0);
+
+	if (vector == FAKE_TRIPLE_FAULT_VECTOR)
+		return;
+
+	GUEST_ASSERT_EQ(vmreadz(VM_EXIT_REASON), EXIT_REASON_EXCEPTION_NMI);
+	GUEST_ASSERT_EQ((vmreadz(VM_EXIT_INTR_INFO) & 0xff), vector);
+	GUEST_ASSERT_EQ(vmreadz(VM_EXIT_INTR_ERROR_CODE), error_code);
+}
+
+static void l1_vmx_code(struct vmx_pages *vmx)
+{
+	unsigned long l2_guest_stack[L2_GUEST_STACK_SIZE];
+
+	GUEST_ASSERT_EQ(prepare_for_vmx_operation(vmx), true);
+
+	GUEST_ASSERT_EQ(load_vmcs(vmx), true);
+
+	prepare_vmcs(vmx, NULL, &l2_guest_stack[L2_GUEST_STACK_SIZE]);
+	GUEST_ASSERT_EQ(vmwrite(GUEST_IDTR_LIMIT, 0), 0);
+
+	/*
+	 * VMX disallows injecting an exception with error_code[31:16] != 0,
+	 * and hardware will never generate a VM-Exit with bits 31:16 set.
+	 * KVM should likewise truncate the "bad" userspace value.
+	 */
+	GUEST_ASSERT_EQ(vmwrite(EXCEPTION_BITMAP, INTERCEPT_SS_GP_DF), 0);
+	vmx_run_l2(l2_ss_pending_test, SS_VECTOR, (u16)SS_ERROR_CODE);
+	vmx_run_l2(l2_ss_injected_gp_test, GP_VECTOR, GP_ERROR_CODE_INTEL);
+
+	GUEST_ASSERT_EQ(vmwrite(EXCEPTION_BITMAP, INTERCEPT_SS_DF), 0);
+	vmx_run_l2(l2_ss_injected_df_test, DF_VECTOR, DF_ERROR_CODE);
+
+	GUEST_ASSERT_EQ(vmwrite(EXCEPTION_BITMAP, INTERCEPT_SS), 0);
+	vmx_run_l2(l2_ss_injected_tf_test, FAKE_TRIPLE_FAULT_VECTOR, 0);
+	GUEST_ASSERT_EQ(vmreadz(VM_EXIT_REASON), EXIT_REASON_TRIPLE_FAULT);
+
+	GUEST_DONE();
+}
+
+static void __attribute__((__flatten__)) l1_guest_code(void *test_data)
+{
+	if (this_cpu_has(X86_FEATURE_SVM))
+		l1_svm_code(test_data);
+	else
+		l1_vmx_code(test_data);
+}
+
+static void assert_ucall_vector(struct kvm_vcpu *vcpu, int vector)
+{
+	struct kvm_run *run = vcpu->run;
+	struct ucall uc;
+
+	TEST_ASSERT(run->exit_reason == KVM_EXIT_IO,
+		    "Unexpected exit reason: %u (%s),\n",
+		    run->exit_reason, exit_reason_str(run->exit_reason));
+
+	switch (get_ucall(vcpu, &uc)) {
+	case UCALL_SYNC:
+		TEST_ASSERT(vector == uc.args[1],
+			    "Expected L2 to ask for %d, got %ld", vector, uc.args[1]);
+		break;
+	case UCALL_DONE:
+		TEST_ASSERT(vector == -1,
+			    "Expected L2 to ask for %d, L2 says it's done", vector);
+		break;
+	case UCALL_ABORT:
+		TEST_FAIL("%s at %s:%ld (0x%lx != 0x%lx)",
+			  (const char *)uc.args[0], __FILE__, uc.args[1],
+			  uc.args[2], uc.args[3]);
+		break;
+	default:
+		TEST_FAIL("Expected L2 to ask for %d, got unexpected ucall %lu", vector, uc.cmd);
+	}
+}
+
+static void queue_ss_exception(struct kvm_vcpu *vcpu, bool inject)
+{
+	struct kvm_vcpu_events events;
+
+	vcpu_events_get(vcpu, &events);
+
+	TEST_ASSERT(!events.exception.pending,
+		    "Vector %d unexpectedlt pending", events.exception.nr);
+	TEST_ASSERT(!events.exception.injected,
+		    "Vector %d unexpectedly injected", events.exception.nr);
+
+	events.flags = KVM_VCPUEVENT_VALID_PAYLOAD;
+	events.exception.pending = !inject;
+	events.exception.injected = inject;
+	events.exception.nr = SS_VECTOR;
+	events.exception.has_error_code = true;
+	events.exception.error_code = SS_ERROR_CODE;
+	vcpu_events_set(vcpu, &events);
+}
+
+/*
+ * Verify KVM_{G,S}ET_EVENTS play nice with pending vs. injected exceptions
+ * when an exception is being queued for L2.  Specifically, verify that KVM
+ * honors L1 exception intercept controls when a #SS is pending/injected,
+ * triggers a #GP on vectoring the #SS, morphs to #DF if #GP isn't intercepted
+ * by L1, and finally causes (nested) SHUTDOWN if #DF isn't intercepted by L1.
+ */
+int main(int argc, char *argv[])
+{
+	vm_vaddr_t nested_test_data_gva;
+	struct kvm_vcpu_events events;
+	struct kvm_vcpu *vcpu;
+	struct kvm_vm *vm;
+
+	TEST_REQUIRE(kvm_has_cap(KVM_CAP_EXCEPTION_PAYLOAD));
+	TEST_REQUIRE(kvm_cpu_has(X86_FEATURE_SVM) || kvm_cpu_has(X86_FEATURE_VMX));
+
+	vm = vm_create_with_one_vcpu(&vcpu, l1_guest_code);
+	vm_enable_cap(vm, KVM_CAP_EXCEPTION_PAYLOAD, -2ul);
+
+	if (kvm_cpu_has(X86_FEATURE_SVM))
+		vcpu_alloc_svm(vm, &nested_test_data_gva);
+	else
+		vcpu_alloc_vmx(vm, &nested_test_data_gva);
+
+	vcpu_args_set(vcpu, 1, nested_test_data_gva);
+
+	/* Run L1 => L2.  L2 should sync and request #SS. */
+	vcpu_run(vcpu);
+	assert_ucall_vector(vcpu, SS_VECTOR);
+
+	/* Pend #SS and request immediate exit.  #SS should still be pending. */
+	queue_ss_exception(vcpu, false);
+	vcpu->run->immediate_exit = true;
+	vcpu_run_complete_io(vcpu);
+
+	/* Verify the pending events comes back out the same as it went in. */
+	vcpu_events_get(vcpu, &events);
+	ASSERT_EQ(events.flags & KVM_VCPUEVENT_VALID_PAYLOAD,
+		  KVM_VCPUEVENT_VALID_PAYLOAD);
+	ASSERT_EQ(events.exception.pending, true);
+	ASSERT_EQ(events.exception.nr, SS_VECTOR);
+	ASSERT_EQ(events.exception.has_error_code, true);
+	ASSERT_EQ(events.exception.error_code, SS_ERROR_CODE);
+
+	/*
+	 * Run for real with the pending #SS, L1 should get a VM-Exit due to
+	 * #SS interception and re-enter L2 to request #GP (via injected #SS).
+	 */
+	vcpu->run->immediate_exit = false;
+	vcpu_run(vcpu);
+	assert_ucall_vector(vcpu, GP_VECTOR);
+
+	/*
+	 * Inject #SS, the #SS should bypass interception and cause #GP, which
+	 * L1 should intercept before KVM morphs it to #DF.  L1 should then
+	 * disable #GP interception and run L2 to request #DF (via #SS => #GP).
+	 */
+	queue_ss_exception(vcpu, true);
+	vcpu_run(vcpu);
+	assert_ucall_vector(vcpu, DF_VECTOR);
+
+	/*
+	 * Inject #SS, the #SS should bypass interception and cause #GP, which
+	 * L1 is no longer interception, and so should see a #DF VM-Exit.  L1
+	 * should then signal that is done.
+	 */
+	queue_ss_exception(vcpu, true);
+	vcpu_run(vcpu);
+	assert_ucall_vector(vcpu, FAKE_TRIPLE_FAULT_VECTOR);
+
+	/*
+	 * Inject #SS yet again.  L1 is not intercepting #GP or #DF, and so
+	 * should see nested TRIPLE_FAULT / SHUTDOWN.
+	 */
+	queue_ss_exception(vcpu, true);
+	vcpu_run(vcpu);
+	assert_ucall_vector(vcpu, -1);
+
+	kvm_vm_free(vm);
+}
diff --git a/tools/testing/selftests/kvm/x86_64/nx_huge_pages_test.c b/tools/testing/selftests/kvm/x86_64/nx_huge_pages_test.c
new file mode 100644
index 000000000000..59ffe7fd354f
--- /dev/null
+++ b/tools/testing/selftests/kvm/x86_64/nx_huge_pages_test.c
@@ -0,0 +1,272 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * tools/testing/selftests/kvm/nx_huge_page_test.c
+ *
+ * Usage: to be run via nx_huge_page_test.sh, which does the necessary
+ * environment setup and teardown
+ *
+ * Copyright (C) 2022, Google LLC.
+ */
+
+#define _GNU_SOURCE
+
+#include <fcntl.h>
+#include <stdint.h>
+#include <time.h>
+
+#include <test_util.h>
+#include "kvm_util.h"
+#include "processor.h"
+
+#define HPAGE_SLOT		10
+#define HPAGE_GPA		(4UL << 30) /* 4G prevents collision w/ slot 0 */
+#define HPAGE_GVA		HPAGE_GPA /* GVA is arbitrary, so use GPA. */
+#define PAGES_PER_2MB_HUGE_PAGE 512
+#define HPAGE_SLOT_NPAGES	(3 * PAGES_PER_2MB_HUGE_PAGE)
+
+/*
+ * Passed by nx_huge_pages_test.sh to provide an easy warning if this test is
+ * being run without it.
+ */
+#define MAGIC_TOKEN 887563923
+
+/*
+ * x86 opcode for the return instruction. Used to call into, and then
+ * immediately return from, memory backed with hugepages.
+ */
+#define RETURN_OPCODE 0xC3
+
+/* Call the specified memory address. */
+static void guest_do_CALL(uint64_t target)
+{
+	((void (*)(void)) target)();
+}
+
+/*
+ * Exit the VM after each memory access so that the userspace component of the
+ * test can make assertions about the pages backing the VM.
+ *
+ * See the below for an explanation of how each access should affect the
+ * backing mappings.
+ */
+void guest_code(void)
+{
+	uint64_t hpage_1 = HPAGE_GVA;
+	uint64_t hpage_2 = hpage_1 + (PAGE_SIZE * 512);
+	uint64_t hpage_3 = hpage_2 + (PAGE_SIZE * 512);
+
+	READ_ONCE(*(uint64_t *)hpage_1);
+	GUEST_SYNC(1);
+
+	READ_ONCE(*(uint64_t *)hpage_2);
+	GUEST_SYNC(2);
+
+	guest_do_CALL(hpage_1);
+	GUEST_SYNC(3);
+
+	guest_do_CALL(hpage_3);
+	GUEST_SYNC(4);
+
+	READ_ONCE(*(uint64_t *)hpage_1);
+	GUEST_SYNC(5);
+
+	READ_ONCE(*(uint64_t *)hpage_3);
+	GUEST_SYNC(6);
+}
+
+static void check_2m_page_count(struct kvm_vm *vm, int expected_pages_2m)
+{
+	int actual_pages_2m;
+
+	actual_pages_2m = vm_get_stat(vm, "pages_2m");
+
+	TEST_ASSERT(actual_pages_2m == expected_pages_2m,
+		    "Unexpected 2m page count. Expected %d, got %d",
+		    expected_pages_2m, actual_pages_2m);
+}
+
+static void check_split_count(struct kvm_vm *vm, int expected_splits)
+{
+	int actual_splits;
+
+	actual_splits = vm_get_stat(vm, "nx_lpage_splits");
+
+	TEST_ASSERT(actual_splits == expected_splits,
+		    "Unexpected NX huge page split count. Expected %d, got %d",
+		    expected_splits, actual_splits);
+}
+
+static void wait_for_reclaim(int reclaim_period_ms)
+{
+	long reclaim_wait_ms;
+	struct timespec ts;
+
+	reclaim_wait_ms = reclaim_period_ms * 5;
+	ts.tv_sec = reclaim_wait_ms / 1000;
+	ts.tv_nsec = (reclaim_wait_ms - (ts.tv_sec * 1000)) * 1000000;
+	nanosleep(&ts, NULL);
+}
+
+void run_test(int reclaim_period_ms, bool disable_nx_huge_pages,
+	      bool reboot_permissions)
+{
+	struct kvm_vcpu *vcpu;
+	struct kvm_vm *vm;
+	uint64_t nr_bytes;
+	void *hva;
+	int r;
+
+	vm = vm_create(1);
+
+	if (disable_nx_huge_pages) {
+		r = __vm_disable_nx_huge_pages(vm);
+		if (reboot_permissions) {
+			TEST_ASSERT(!r, "Disabling NX huge pages should succeed if process has reboot permissions");
+		} else {
+			TEST_ASSERT(r == -1 && errno == EPERM,
+				    "This process should not have permission to disable NX huge pages");
+			return;
+		}
+	}
+
+	vcpu = vm_vcpu_add(vm, 0, guest_code);
+
+	vm_userspace_mem_region_add(vm, VM_MEM_SRC_ANONYMOUS_HUGETLB,
+				    HPAGE_GPA, HPAGE_SLOT,
+				    HPAGE_SLOT_NPAGES, 0);
+
+	nr_bytes = HPAGE_SLOT_NPAGES * vm->page_size;
+
+	/*
+	 * Ensure that KVM can map HPAGE_SLOT with huge pages by mapping the
+	 * region into the guest with 2MiB pages whenever TDP is disabled (i.e.
+	 * whenever KVM is shadowing the guest page tables).
+	 *
+	 * When TDP is enabled, KVM should be able to map HPAGE_SLOT with huge
+	 * pages irrespective of the guest page size, so map with 4KiB pages
+	 * to test that that is the case.
+	 */
+	if (kvm_is_tdp_enabled())
+		virt_map_level(vm, HPAGE_GVA, HPAGE_GPA, nr_bytes, PG_LEVEL_4K);
+	else
+		virt_map_level(vm, HPAGE_GVA, HPAGE_GPA, nr_bytes, PG_LEVEL_2M);
+
+	hva = addr_gpa2hva(vm, HPAGE_GPA);
+	memset(hva, RETURN_OPCODE, nr_bytes);
+
+	check_2m_page_count(vm, 0);
+	check_split_count(vm, 0);
+
+	/*
+	 * The guest code will first read from the first hugepage, resulting
+	 * in a huge page mapping being created.
+	 */
+	vcpu_run(vcpu);
+	check_2m_page_count(vm, 1);
+	check_split_count(vm, 0);
+
+	/*
+	 * Then the guest code will read from the second hugepage, resulting
+	 * in another huge page mapping being created.
+	 */
+	vcpu_run(vcpu);
+	check_2m_page_count(vm, 2);
+	check_split_count(vm, 0);
+
+	/*
+	 * Next, the guest will execute from the first huge page, causing it
+	 * to be remapped at 4k.
+	 *
+	 * If NX huge pages are disabled, this should have no effect.
+	 */
+	vcpu_run(vcpu);
+	check_2m_page_count(vm, disable_nx_huge_pages ? 2 : 1);
+	check_split_count(vm, disable_nx_huge_pages ? 0 : 1);
+
+	/*
+	 * Executing from the third huge page (previously unaccessed) will
+	 * cause part to be mapped at 4k.
+	 *
+	 * If NX huge pages are disabled, it should be mapped at 2M.
+	 */
+	vcpu_run(vcpu);
+	check_2m_page_count(vm, disable_nx_huge_pages ? 3 : 1);
+	check_split_count(vm, disable_nx_huge_pages ? 0 : 2);
+
+	/* Reading from the first huge page again should have no effect. */
+	vcpu_run(vcpu);
+	check_2m_page_count(vm, disable_nx_huge_pages ? 3 : 1);
+	check_split_count(vm, disable_nx_huge_pages ? 0 : 2);
+
+	/* Give recovery thread time to run. */
+	wait_for_reclaim(reclaim_period_ms);
+
+	/*
+	 * Now that the reclaimer has run, all the split pages should be gone.
+	 *
+	 * If NX huge pages are disabled, the relaimer will not run, so
+	 * nothing should change from here on.
+	 */
+	check_2m_page_count(vm, disable_nx_huge_pages ? 3 : 1);
+	check_split_count(vm, 0);
+
+	/*
+	 * The 4k mapping on hpage 3 should have been removed, so check that
+	 * reading from it causes a huge page mapping to be installed.
+	 */
+	vcpu_run(vcpu);
+	check_2m_page_count(vm, disable_nx_huge_pages ? 3 : 2);
+	check_split_count(vm, 0);
+
+	kvm_vm_free(vm);
+}
+
+static void help(char *name)
+{
+	puts("");
+	printf("usage: %s [-h] [-p period_ms] [-t token]\n", name);
+	puts("");
+	printf(" -p: The NX reclaim period in miliseconds.\n");
+	printf(" -t: The magic token to indicate environment setup is done.\n");
+	printf(" -r: The test has reboot permissions and can disable NX huge pages.\n");
+	puts("");
+	exit(0);
+}
+
+int main(int argc, char **argv)
+{
+	int reclaim_period_ms = 0, token = 0, opt;
+	bool reboot_permissions = false;
+
+	while ((opt = getopt(argc, argv, "hp:t:r")) != -1) {
+		switch (opt) {
+		case 'p':
+			reclaim_period_ms = atoi(optarg);
+			break;
+		case 't':
+			token = atoi(optarg);
+			break;
+		case 'r':
+			reboot_permissions = true;
+			break;
+		case 'h':
+		default:
+			help(argv[0]);
+			break;
+		}
+	}
+
+	TEST_REQUIRE(kvm_has_cap(KVM_CAP_VM_DISABLE_NX_HUGE_PAGES));
+	TEST_REQUIRE(reclaim_period_ms > 0);
+
+	__TEST_REQUIRE(token == MAGIC_TOKEN,
+		       "This test must be run with the magic token %d.\n"
+		       "This is done by nx_huge_pages_test.sh, which\n"
+		       "also handles environment setup for the test.");
+
+	run_test(reclaim_period_ms, false, reboot_permissions);
+	run_test(reclaim_period_ms, true, reboot_permissions);
+
+	return 0;
+}
+
diff --git a/tools/testing/selftests/kvm/x86_64/nx_huge_pages_test.sh b/tools/testing/selftests/kvm/x86_64/nx_huge_pages_test.sh
new file mode 100755
index 000000000000..0560149e66ed
--- /dev/null
+++ b/tools/testing/selftests/kvm/x86_64/nx_huge_pages_test.sh
@@ -0,0 +1,59 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0-only */
+#
+# Wrapper script which performs setup and cleanup for nx_huge_pages_test.
+# Makes use of root privileges to set up huge pages and KVM module parameters.
+#
+# tools/testing/selftests/kvm/nx_huge_page_test.sh
+# Copyright (C) 2022, Google LLC.
+
+set -e
+
+NX_HUGE_PAGES=$(cat /sys/module/kvm/parameters/nx_huge_pages)
+NX_HUGE_PAGES_RECOVERY_RATIO=$(cat /sys/module/kvm/parameters/nx_huge_pages_recovery_ratio)
+NX_HUGE_PAGES_RECOVERY_PERIOD=$(cat /sys/module/kvm/parameters/nx_huge_pages_recovery_period_ms)
+HUGE_PAGES=$(cat /sys/kernel/mm/hugepages/hugepages-2048kB/nr_hugepages)
+
+set +e
+
+function sudo_echo () {
+	echo "$1" | sudo tee -a "$2" > /dev/null
+}
+
+NXECUTABLE="$(dirname $0)/nx_huge_pages_test"
+
+sudo_echo test /dev/null || exit 4 # KSFT_SKIP=4
+
+(
+	set -e
+
+	sudo_echo 1 /sys/module/kvm/parameters/nx_huge_pages
+	sudo_echo 1 /sys/module/kvm/parameters/nx_huge_pages_recovery_ratio
+	sudo_echo 100 /sys/module/kvm/parameters/nx_huge_pages_recovery_period_ms
+	sudo_echo "$(( $HUGE_PAGES + 3 ))" /sys/kernel/mm/hugepages/hugepages-2048kB/nr_hugepages
+
+	# Test with reboot permissions
+	if [ $(whoami) == "root" ] || sudo setcap cap_sys_boot+ep $NXECUTABLE 2> /dev/null; then
+		echo Running test with CAP_SYS_BOOT enabled
+		$NXECUTABLE -t 887563923 -p 100 -r
+		test $(whoami) == "root" || sudo setcap cap_sys_boot-ep $NXECUTABLE
+	else
+		echo setcap failed, skipping nx_huge_pages_test with CAP_SYS_BOOT enabled
+	fi
+
+	# Test without reboot permissions
+	if [ $(whoami) != "root" ] ; then
+		echo Running test with CAP_SYS_BOOT disabled
+		$NXECUTABLE -t 887563923 -p 100
+	else
+		echo Running as root, skipping nx_huge_pages_test with CAP_SYS_BOOT disabled
+	fi
+)
+RET=$?
+
+sudo_echo "$NX_HUGE_PAGES" /sys/module/kvm/parameters/nx_huge_pages
+sudo_echo "$NX_HUGE_PAGES_RECOVERY_RATIO" /sys/module/kvm/parameters/nx_huge_pages_recovery_ratio
+sudo_echo "$NX_HUGE_PAGES_RECOVERY_PERIOD" /sys/module/kvm/parameters/nx_huge_pages_recovery_period_ms
+sudo_echo "$HUGE_PAGES" /sys/kernel/mm/hugepages/hugepages-2048kB/nr_hugepages
+
+exit $RET
diff --git a/tools/testing/selftests/kvm/x86_64/platform_info_test.c b/tools/testing/selftests/kvm/x86_64/platform_info_test.c
index f9334bd3cce9..76417c7d687b 100644
--- a/tools/testing/selftests/kvm/x86_64/platform_info_test.c
+++ b/tools/testing/selftests/kvm/x86_64/platform_info_test.c
@@ -21,7 +21,6 @@
 #include "kvm_util.h"
 #include "processor.h"
 
-#define VCPU_ID 0
 #define MSR_PLATFORM_INFO_MAX_TURBO_RATIO 0xff00
 
 static void guest_code(void)
@@ -35,43 +34,32 @@ static void guest_code(void)
 	}
 }
 
-static void set_msr_platform_info_enabled(struct kvm_vm *vm, bool enable)
+static void test_msr_platform_info_enabled(struct kvm_vcpu *vcpu)
 {
-	struct kvm_enable_cap cap = {};
-
-	cap.cap = KVM_CAP_MSR_PLATFORM_INFO;
-	cap.flags = 0;
-	cap.args[0] = (int)enable;
-	vm_enable_cap(vm, &cap);
-}
-
-static void test_msr_platform_info_enabled(struct kvm_vm *vm)
-{
-	struct kvm_run *run = vcpu_state(vm, VCPU_ID);
+	struct kvm_run *run = vcpu->run;
 	struct ucall uc;
 
-	set_msr_platform_info_enabled(vm, true);
-	vcpu_run(vm, VCPU_ID);
+	vm_enable_cap(vcpu->vm, KVM_CAP_MSR_PLATFORM_INFO, true);
+	vcpu_run(vcpu);
 	TEST_ASSERT(run->exit_reason == KVM_EXIT_IO,
 			"Exit_reason other than KVM_EXIT_IO: %u (%s),\n",
 			run->exit_reason,
 			exit_reason_str(run->exit_reason));
-	get_ucall(vm, VCPU_ID, &uc);
+	get_ucall(vcpu, &uc);
 	TEST_ASSERT(uc.cmd == UCALL_SYNC,
-			"Received ucall other than UCALL_SYNC: %u\n",
-			ucall);
+			"Received ucall other than UCALL_SYNC: %lu\n", uc.cmd);
 	TEST_ASSERT((uc.args[1] & MSR_PLATFORM_INFO_MAX_TURBO_RATIO) ==
 		MSR_PLATFORM_INFO_MAX_TURBO_RATIO,
 		"Expected MSR_PLATFORM_INFO to have max turbo ratio mask: %i.",
 		MSR_PLATFORM_INFO_MAX_TURBO_RATIO);
 }
 
-static void test_msr_platform_info_disabled(struct kvm_vm *vm)
+static void test_msr_platform_info_disabled(struct kvm_vcpu *vcpu)
 {
-	struct kvm_run *run = vcpu_state(vm, VCPU_ID);
+	struct kvm_run *run = vcpu->run;
 
-	set_msr_platform_info_enabled(vm, false);
-	vcpu_run(vm, VCPU_ID);
+	vm_enable_cap(vcpu->vm, KVM_CAP_MSR_PLATFORM_INFO, false);
+	vcpu_run(vcpu);
 	TEST_ASSERT(run->exit_reason == KVM_EXIT_SHUTDOWN,
 			"Exit_reason other than KVM_EXIT_SHUTDOWN: %u (%s)\n",
 			run->exit_reason,
@@ -80,28 +68,23 @@ static void test_msr_platform_info_disabled(struct kvm_vm *vm)
 
 int main(int argc, char *argv[])
 {
+	struct kvm_vcpu *vcpu;
 	struct kvm_vm *vm;
-	int rv;
 	uint64_t msr_platform_info;
 
 	/* Tell stdout not to buffer its content */
 	setbuf(stdout, NULL);
 
-	rv = kvm_check_cap(KVM_CAP_MSR_PLATFORM_INFO);
-	if (!rv) {
-		fprintf(stderr,
-			"KVM_CAP_MSR_PLATFORM_INFO not supported, skip test\n");
-		exit(KSFT_SKIP);
-	}
+	TEST_REQUIRE(kvm_has_cap(KVM_CAP_MSR_PLATFORM_INFO));
 
-	vm = vm_create_default(VCPU_ID, 0, guest_code);
+	vm = vm_create_with_one_vcpu(&vcpu, guest_code);
 
-	msr_platform_info = vcpu_get_msr(vm, VCPU_ID, MSR_PLATFORM_INFO);
-	vcpu_set_msr(vm, VCPU_ID, MSR_PLATFORM_INFO,
-		msr_platform_info | MSR_PLATFORM_INFO_MAX_TURBO_RATIO);
-	test_msr_platform_info_enabled(vm);
-	test_msr_platform_info_disabled(vm);
-	vcpu_set_msr(vm, VCPU_ID, MSR_PLATFORM_INFO, msr_platform_info);
+	msr_platform_info = vcpu_get_msr(vcpu, MSR_PLATFORM_INFO);
+	vcpu_set_msr(vcpu, MSR_PLATFORM_INFO,
+		     msr_platform_info | MSR_PLATFORM_INFO_MAX_TURBO_RATIO);
+	test_msr_platform_info_enabled(vcpu);
+	test_msr_platform_info_disabled(vcpu);
+	vcpu_set_msr(vcpu, MSR_PLATFORM_INFO, msr_platform_info);
 
 	kvm_vm_free(vm);
 
diff --git a/tools/testing/selftests/kvm/x86_64/pmu_event_filter_test.c b/tools/testing/selftests/kvm/x86_64/pmu_event_filter_test.c
new file mode 100644
index 000000000000..ea4e259a1e2e
--- /dev/null
+++ b/tools/testing/selftests/kvm/x86_64/pmu_event_filter_test.c
@@ -0,0 +1,479 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Test for x86 KVM_SET_PMU_EVENT_FILTER.
+ *
+ * Copyright (C) 2022, Google LLC.
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2.
+ *
+ * Verifies the expected behavior of allow lists and deny lists for
+ * virtual PMU events.
+ */
+
+#define _GNU_SOURCE /* for program_invocation_short_name */
+#include "test_util.h"
+#include "kvm_util.h"
+#include "processor.h"
+
+/*
+ * In lieu of copying perf_event.h into tools...
+ */
+#define ARCH_PERFMON_EVENTSEL_OS			(1ULL << 17)
+#define ARCH_PERFMON_EVENTSEL_ENABLE			(1ULL << 22)
+
+union cpuid10_eax {
+	struct {
+		unsigned int version_id:8;
+		unsigned int num_counters:8;
+		unsigned int bit_width:8;
+		unsigned int mask_length:8;
+	} split;
+	unsigned int full;
+};
+
+union cpuid10_ebx {
+	struct {
+		unsigned int no_unhalted_core_cycles:1;
+		unsigned int no_instructions_retired:1;
+		unsigned int no_unhalted_reference_cycles:1;
+		unsigned int no_llc_reference:1;
+		unsigned int no_llc_misses:1;
+		unsigned int no_branch_instruction_retired:1;
+		unsigned int no_branch_misses_retired:1;
+	} split;
+	unsigned int full;
+};
+
+/* End of stuff taken from perf_event.h. */
+
+/* Oddly, this isn't in perf_event.h. */
+#define ARCH_PERFMON_BRANCHES_RETIRED		5
+
+#define NUM_BRANCHES 42
+
+/*
+ * This is how the event selector and unit mask are stored in an AMD
+ * core performance event-select register. Intel's format is similar,
+ * but the event selector is only 8 bits.
+ */
+#define EVENT(select, umask) ((select & 0xf00UL) << 24 | (select & 0xff) | \
+			      (umask & 0xff) << 8)
+
+/*
+ * "Branch instructions retired", from the Intel SDM, volume 3,
+ * "Pre-defined Architectural Performance Events."
+ */
+
+#define INTEL_BR_RETIRED EVENT(0xc4, 0)
+
+/*
+ * "Retired branch instructions", from Processor Programming Reference
+ * (PPR) for AMD Family 17h Model 01h, Revision B1 Processors,
+ * Preliminary Processor Programming Reference (PPR) for AMD Family
+ * 17h Model 31h, Revision B0 Processors, and Preliminary Processor
+ * Programming Reference (PPR) for AMD Family 19h Model 01h, Revision
+ * B1 Processors Volume 1 of 2.
+ */
+
+#define AMD_ZEN_BR_RETIRED EVENT(0xc2, 0)
+
+/*
+ * This event list comprises Intel's eight architectural events plus
+ * AMD's "retired branch instructions" for Zen[123] (and possibly
+ * other AMD CPUs).
+ */
+static const uint64_t event_list[] = {
+	EVENT(0x3c, 0),
+	EVENT(0xc0, 0),
+	EVENT(0x3c, 1),
+	EVENT(0x2e, 0x4f),
+	EVENT(0x2e, 0x41),
+	EVENT(0xc4, 0),
+	EVENT(0xc5, 0),
+	EVENT(0xa4, 1),
+	AMD_ZEN_BR_RETIRED,
+};
+
+/*
+ * If we encounter a #GP during the guest PMU sanity check, then the guest
+ * PMU is not functional. Inform the hypervisor via GUEST_SYNC(0).
+ */
+static void guest_gp_handler(struct ex_regs *regs)
+{
+	GUEST_SYNC(0);
+}
+
+/*
+ * Check that we can write a new value to the given MSR and read it back.
+ * The caller should provide a non-empty set of bits that are safe to flip.
+ *
+ * Return on success. GUEST_SYNC(0) on error.
+ */
+static void check_msr(uint32_t msr, uint64_t bits_to_flip)
+{
+	uint64_t v = rdmsr(msr) ^ bits_to_flip;
+
+	wrmsr(msr, v);
+	if (rdmsr(msr) != v)
+		GUEST_SYNC(0);
+
+	v ^= bits_to_flip;
+	wrmsr(msr, v);
+	if (rdmsr(msr) != v)
+		GUEST_SYNC(0);
+}
+
+static void intel_guest_code(void)
+{
+	check_msr(MSR_CORE_PERF_GLOBAL_CTRL, 1);
+	check_msr(MSR_P6_EVNTSEL0, 0xffff);
+	check_msr(MSR_IA32_PMC0, 0xffff);
+	GUEST_SYNC(1);
+
+	for (;;) {
+		uint64_t br0, br1;
+
+		wrmsr(MSR_CORE_PERF_GLOBAL_CTRL, 0);
+		wrmsr(MSR_P6_EVNTSEL0, ARCH_PERFMON_EVENTSEL_ENABLE |
+		      ARCH_PERFMON_EVENTSEL_OS | INTEL_BR_RETIRED);
+		wrmsr(MSR_CORE_PERF_GLOBAL_CTRL, 1);
+		br0 = rdmsr(MSR_IA32_PMC0);
+		__asm__ __volatile__("loop ." : "+c"((int){NUM_BRANCHES}));
+		br1 = rdmsr(MSR_IA32_PMC0);
+		GUEST_SYNC(br1 - br0);
+	}
+}
+
+/*
+ * To avoid needing a check for CPUID.80000001:ECX.PerfCtrExtCore[bit 23],
+ * this code uses the always-available, legacy K7 PMU MSRs, which alias to
+ * the first four of the six extended core PMU MSRs.
+ */
+static void amd_guest_code(void)
+{
+	check_msr(MSR_K7_EVNTSEL0, 0xffff);
+	check_msr(MSR_K7_PERFCTR0, 0xffff);
+	GUEST_SYNC(1);
+
+	for (;;) {
+		uint64_t br0, br1;
+
+		wrmsr(MSR_K7_EVNTSEL0, 0);
+		wrmsr(MSR_K7_EVNTSEL0, ARCH_PERFMON_EVENTSEL_ENABLE |
+		      ARCH_PERFMON_EVENTSEL_OS | AMD_ZEN_BR_RETIRED);
+		br0 = rdmsr(MSR_K7_PERFCTR0);
+		__asm__ __volatile__("loop ." : "+c"((int){NUM_BRANCHES}));
+		br1 = rdmsr(MSR_K7_PERFCTR0);
+		GUEST_SYNC(br1 - br0);
+	}
+}
+
+/*
+ * Run the VM to the next GUEST_SYNC(value), and return the value passed
+ * to the sync. Any other exit from the guest is fatal.
+ */
+static uint64_t run_vcpu_to_sync(struct kvm_vcpu *vcpu)
+{
+	struct kvm_run *run = vcpu->run;
+	struct ucall uc;
+
+	vcpu_run(vcpu);
+	TEST_ASSERT(run->exit_reason == KVM_EXIT_IO,
+		    "Exit_reason other than KVM_EXIT_IO: %u (%s)\n",
+		    run->exit_reason,
+		    exit_reason_str(run->exit_reason));
+	get_ucall(vcpu, &uc);
+	TEST_ASSERT(uc.cmd == UCALL_SYNC,
+		    "Received ucall other than UCALL_SYNC: %lu", uc.cmd);
+	return uc.args[1];
+}
+
+/*
+ * In a nested environment or if the vPMU is disabled, the guest PMU
+ * might not work as architected (accessing the PMU MSRs may raise
+ * #GP, or writes could simply be discarded). In those situations,
+ * there is no point in running these tests. The guest code will perform
+ * a sanity check and then GUEST_SYNC(success). In the case of failure,
+ * the behavior of the guest on resumption is undefined.
+ */
+static bool sanity_check_pmu(struct kvm_vcpu *vcpu)
+{
+	bool success;
+
+	vm_install_exception_handler(vcpu->vm, GP_VECTOR, guest_gp_handler);
+	success = run_vcpu_to_sync(vcpu);
+	vm_install_exception_handler(vcpu->vm, GP_VECTOR, NULL);
+
+	return success;
+}
+
+static struct kvm_pmu_event_filter *alloc_pmu_event_filter(uint32_t nevents)
+{
+	struct kvm_pmu_event_filter *f;
+	int size = sizeof(*f) + nevents * sizeof(f->events[0]);
+
+	f = malloc(size);
+	TEST_ASSERT(f, "Out of memory");
+	memset(f, 0, size);
+	f->nevents = nevents;
+	return f;
+}
+
+
+static struct kvm_pmu_event_filter *
+create_pmu_event_filter(const uint64_t event_list[],
+			int nevents, uint32_t action)
+{
+	struct kvm_pmu_event_filter *f;
+	int i;
+
+	f = alloc_pmu_event_filter(nevents);
+	f->action = action;
+	for (i = 0; i < nevents; i++)
+		f->events[i] = event_list[i];
+
+	return f;
+}
+
+static struct kvm_pmu_event_filter *event_filter(uint32_t action)
+{
+	return create_pmu_event_filter(event_list,
+				       ARRAY_SIZE(event_list),
+				       action);
+}
+
+/*
+ * Remove the first occurrence of 'event' (if any) from the filter's
+ * event list.
+ */
+static struct kvm_pmu_event_filter *remove_event(struct kvm_pmu_event_filter *f,
+						 uint64_t event)
+{
+	bool found = false;
+	int i;
+
+	for (i = 0; i < f->nevents; i++) {
+		if (found)
+			f->events[i - 1] = f->events[i];
+		else
+			found = f->events[i] == event;
+	}
+	if (found)
+		f->nevents--;
+	return f;
+}
+
+static void test_without_filter(struct kvm_vcpu *vcpu)
+{
+	uint64_t count = run_vcpu_to_sync(vcpu);
+
+	if (count != NUM_BRANCHES)
+		pr_info("%s: Branch instructions retired = %lu (expected %u)\n",
+			__func__, count, NUM_BRANCHES);
+	TEST_ASSERT(count, "Allowed PMU event is not counting");
+}
+
+static uint64_t test_with_filter(struct kvm_vcpu *vcpu,
+				 struct kvm_pmu_event_filter *f)
+{
+	vm_ioctl(vcpu->vm, KVM_SET_PMU_EVENT_FILTER, f);
+	return run_vcpu_to_sync(vcpu);
+}
+
+static void test_amd_deny_list(struct kvm_vcpu *vcpu)
+{
+	uint64_t event = EVENT(0x1C2, 0);
+	struct kvm_pmu_event_filter *f;
+	uint64_t count;
+
+	f = create_pmu_event_filter(&event, 1, KVM_PMU_EVENT_DENY);
+	count = test_with_filter(vcpu, f);
+
+	free(f);
+	if (count != NUM_BRANCHES)
+		pr_info("%s: Branch instructions retired = %lu (expected %u)\n",
+			__func__, count, NUM_BRANCHES);
+	TEST_ASSERT(count, "Allowed PMU event is not counting");
+}
+
+static void test_member_deny_list(struct kvm_vcpu *vcpu)
+{
+	struct kvm_pmu_event_filter *f = event_filter(KVM_PMU_EVENT_DENY);
+	uint64_t count = test_with_filter(vcpu, f);
+
+	free(f);
+	if (count)
+		pr_info("%s: Branch instructions retired = %lu (expected 0)\n",
+			__func__, count);
+	TEST_ASSERT(!count, "Disallowed PMU Event is counting");
+}
+
+static void test_member_allow_list(struct kvm_vcpu *vcpu)
+{
+	struct kvm_pmu_event_filter *f = event_filter(KVM_PMU_EVENT_ALLOW);
+	uint64_t count = test_with_filter(vcpu, f);
+
+	free(f);
+	if (count != NUM_BRANCHES)
+		pr_info("%s: Branch instructions retired = %lu (expected %u)\n",
+			__func__, count, NUM_BRANCHES);
+	TEST_ASSERT(count, "Allowed PMU event is not counting");
+}
+
+static void test_not_member_deny_list(struct kvm_vcpu *vcpu)
+{
+	struct kvm_pmu_event_filter *f = event_filter(KVM_PMU_EVENT_DENY);
+	uint64_t count;
+
+	remove_event(f, INTEL_BR_RETIRED);
+	remove_event(f, AMD_ZEN_BR_RETIRED);
+	count = test_with_filter(vcpu, f);
+	free(f);
+	if (count != NUM_BRANCHES)
+		pr_info("%s: Branch instructions retired = %lu (expected %u)\n",
+			__func__, count, NUM_BRANCHES);
+	TEST_ASSERT(count, "Allowed PMU event is not counting");
+}
+
+static void test_not_member_allow_list(struct kvm_vcpu *vcpu)
+{
+	struct kvm_pmu_event_filter *f = event_filter(KVM_PMU_EVENT_ALLOW);
+	uint64_t count;
+
+	remove_event(f, INTEL_BR_RETIRED);
+	remove_event(f, AMD_ZEN_BR_RETIRED);
+	count = test_with_filter(vcpu, f);
+	free(f);
+	if (count)
+		pr_info("%s: Branch instructions retired = %lu (expected 0)\n",
+			__func__, count);
+	TEST_ASSERT(!count, "Disallowed PMU Event is counting");
+}
+
+/*
+ * Verify that setting KVM_PMU_CAP_DISABLE prevents the use of the PMU.
+ *
+ * Note that KVM_CAP_PMU_CAPABILITY must be invoked prior to creating VCPUs.
+ */
+static void test_pmu_config_disable(void (*guest_code)(void))
+{
+	struct kvm_vcpu *vcpu;
+	int r;
+	struct kvm_vm *vm;
+
+	r = kvm_check_cap(KVM_CAP_PMU_CAPABILITY);
+	if (!(r & KVM_PMU_CAP_DISABLE))
+		return;
+
+	vm = vm_create(1);
+
+	vm_enable_cap(vm, KVM_CAP_PMU_CAPABILITY, KVM_PMU_CAP_DISABLE);
+
+	vcpu = vm_vcpu_add(vm, 0, guest_code);
+	vm_init_descriptor_tables(vm);
+	vcpu_init_descriptor_tables(vcpu);
+
+	TEST_ASSERT(!sanity_check_pmu(vcpu),
+		    "Guest should not be able to use disabled PMU.");
+
+	kvm_vm_free(vm);
+}
+
+/*
+ * Check for a non-zero PMU version, at least one general-purpose
+ * counter per logical processor, an EBX bit vector of length greater
+ * than 5, and EBX[5] clear.
+ */
+static bool check_intel_pmu_leaf(const struct kvm_cpuid_entry2 *entry)
+{
+	union cpuid10_eax eax = { .full = entry->eax };
+	union cpuid10_ebx ebx = { .full = entry->ebx };
+
+	return eax.split.version_id && eax.split.num_counters > 0 &&
+		eax.split.mask_length > ARCH_PERFMON_BRANCHES_RETIRED &&
+		!ebx.split.no_branch_instruction_retired;
+}
+
+/*
+ * Note that CPUID leaf 0xa is Intel-specific. This leaf should be
+ * clear on AMD hardware.
+ */
+static bool use_intel_pmu(void)
+{
+	const struct kvm_cpuid_entry2 *entry;
+
+	entry = kvm_get_supported_cpuid_entry(0xa);
+	return is_intel_cpu() && check_intel_pmu_leaf(entry);
+}
+
+static bool is_zen1(uint32_t eax)
+{
+	return x86_family(eax) == 0x17 && x86_model(eax) <= 0x0f;
+}
+
+static bool is_zen2(uint32_t eax)
+{
+	return x86_family(eax) == 0x17 &&
+		x86_model(eax) >= 0x30 && x86_model(eax) <= 0x3f;
+}
+
+static bool is_zen3(uint32_t eax)
+{
+	return x86_family(eax) == 0x19 && x86_model(eax) <= 0x0f;
+}
+
+/*
+ * Determining AMD support for a PMU event requires consulting the AMD
+ * PPR for the CPU or reference material derived therefrom. The AMD
+ * test code herein has been verified to work on Zen1, Zen2, and Zen3.
+ *
+ * Feel free to add more AMD CPUs that are documented to support event
+ * select 0xc2 umask 0 as "retired branch instructions."
+ */
+static bool use_amd_pmu(void)
+{
+	const struct kvm_cpuid_entry2 *entry;
+
+	entry = kvm_get_supported_cpuid_entry(1);
+	return is_amd_cpu() &&
+		(is_zen1(entry->eax) ||
+		 is_zen2(entry->eax) ||
+		 is_zen3(entry->eax));
+}
+
+int main(int argc, char *argv[])
+{
+	void (*guest_code)(void);
+	struct kvm_vcpu *vcpu;
+	struct kvm_vm *vm;
+
+	/* Tell stdout not to buffer its content */
+	setbuf(stdout, NULL);
+
+	TEST_REQUIRE(kvm_has_cap(KVM_CAP_PMU_EVENT_FILTER));
+
+	TEST_REQUIRE(use_intel_pmu() || use_amd_pmu());
+	guest_code = use_intel_pmu() ? intel_guest_code : amd_guest_code;
+
+	vm = vm_create_with_one_vcpu(&vcpu, guest_code);
+
+	vm_init_descriptor_tables(vm);
+	vcpu_init_descriptor_tables(vcpu);
+
+	TEST_REQUIRE(sanity_check_pmu(vcpu));
+
+	if (use_amd_pmu())
+		test_amd_deny_list(vcpu);
+
+	test_without_filter(vcpu);
+	test_member_deny_list(vcpu);
+	test_member_allow_list(vcpu);
+	test_not_member_deny_list(vcpu);
+	test_not_member_allow_list(vcpu);
+
+	kvm_vm_free(vm);
+
+	test_pmu_config_disable(guest_code);
+
+	return 0;
+}
diff --git a/tools/testing/selftests/kvm/x86_64/set_boot_cpu_id.c b/tools/testing/selftests/kvm/x86_64/set_boot_cpu_id.c
new file mode 100644
index 000000000000..b25d7556b638
--- /dev/null
+++ b/tools/testing/selftests/kvm/x86_64/set_boot_cpu_id.c
@@ -0,0 +1,131 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Test that KVM_SET_BOOT_CPU_ID works as intended
+ *
+ * Copyright (C) 2020, Red Hat, Inc.
+ */
+#define _GNU_SOURCE /* for program_invocation_name */
+#include <fcntl.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <sys/ioctl.h>
+
+#include "test_util.h"
+#include "kvm_util.h"
+#include "processor.h"
+#include "apic.h"
+
+static void guest_bsp_vcpu(void *arg)
+{
+	GUEST_SYNC(1);
+
+	GUEST_ASSERT(get_bsp_flag() != 0);
+
+	GUEST_DONE();
+}
+
+static void guest_not_bsp_vcpu(void *arg)
+{
+	GUEST_SYNC(1);
+
+	GUEST_ASSERT(get_bsp_flag() == 0);
+
+	GUEST_DONE();
+}
+
+static void test_set_bsp_busy(struct kvm_vcpu *vcpu, const char *msg)
+{
+	int r = __vm_ioctl(vcpu->vm, KVM_SET_BOOT_CPU_ID,
+			   (void *)(unsigned long)vcpu->id);
+
+	TEST_ASSERT(r == -1 && errno == EBUSY, "KVM_SET_BOOT_CPU_ID set %s", msg);
+}
+
+static void run_vcpu(struct kvm_vcpu *vcpu)
+{
+	struct ucall uc;
+	int stage;
+
+	for (stage = 0; stage < 2; stage++) {
+
+		vcpu_run(vcpu);
+
+		switch (get_ucall(vcpu, &uc)) {
+		case UCALL_SYNC:
+			TEST_ASSERT(!strcmp((const char *)uc.args[0], "hello") &&
+					uc.args[1] == stage + 1,
+					"Stage %d: Unexpected register values vmexit, got %lx",
+					stage + 1, (ulong)uc.args[1]);
+			test_set_bsp_busy(vcpu, "while running vm");
+			break;
+		case UCALL_DONE:
+			TEST_ASSERT(stage == 1,
+					"Expected GUEST_DONE in stage 2, got stage %d",
+					stage);
+			break;
+		case UCALL_ABORT:
+			REPORT_GUEST_ASSERT_2(uc, "values: %#lx, %#lx");
+		default:
+			TEST_ASSERT(false, "Unexpected exit: %s",
+				    exit_reason_str(vcpu->run->exit_reason));
+		}
+	}
+}
+
+static struct kvm_vm *create_vm(uint32_t nr_vcpus, uint32_t bsp_vcpu_id,
+				struct kvm_vcpu *vcpus[])
+{
+	struct kvm_vm *vm;
+	uint32_t i;
+
+	vm = vm_create(nr_vcpus);
+
+	vm_ioctl(vm, KVM_SET_BOOT_CPU_ID, (void *)(unsigned long)bsp_vcpu_id);
+
+	for (i = 0; i < nr_vcpus; i++)
+		vcpus[i] = vm_vcpu_add(vm, i, i == bsp_vcpu_id ? guest_bsp_vcpu :
+								 guest_not_bsp_vcpu);
+	return vm;
+}
+
+static void run_vm_bsp(uint32_t bsp_vcpu_id)
+{
+	struct kvm_vcpu *vcpus[2];
+	struct kvm_vm *vm;
+
+	vm = create_vm(ARRAY_SIZE(vcpus), bsp_vcpu_id, vcpus);
+
+	run_vcpu(vcpus[0]);
+	run_vcpu(vcpus[1]);
+
+	kvm_vm_free(vm);
+}
+
+static void check_set_bsp_busy(void)
+{
+	struct kvm_vcpu *vcpus[2];
+	struct kvm_vm *vm;
+
+	vm = create_vm(ARRAY_SIZE(vcpus), 0, vcpus);
+
+	test_set_bsp_busy(vcpus[1], "after adding vcpu");
+
+	run_vcpu(vcpus[0]);
+	run_vcpu(vcpus[1]);
+
+	test_set_bsp_busy(vcpus[1], "to a terminated vcpu");
+
+	kvm_vm_free(vm);
+}
+
+int main(int argc, char *argv[])
+{
+	TEST_REQUIRE(kvm_has_cap(KVM_CAP_SET_BOOT_CPU_ID));
+
+	run_vm_bsp(0);
+	run_vm_bsp(1);
+	run_vm_bsp(0);
+
+	check_set_bsp_busy();
+}
diff --git a/tools/testing/selftests/kvm/x86_64/set_sregs_test.c b/tools/testing/selftests/kvm/x86_64/set_sregs_test.c
index 9f7656184f31..2bb08bf2125d 100644
--- a/tools/testing/selftests/kvm/x86_64/set_sregs_test.c
+++ b/tools/testing/selftests/kvm/x86_64/set_sregs_test.c
@@ -22,27 +22,112 @@
 #include "kvm_util.h"
 #include "processor.h"
 
-#define VCPU_ID                  5
+static void test_cr4_feature_bit(struct kvm_vcpu *vcpu, struct kvm_sregs *orig,
+				 uint64_t feature_bit)
+{
+	struct kvm_sregs sregs;
+	int rc;
+
+	/* Skip the sub-test, the feature is supported. */
+	if (orig->cr4 & feature_bit)
+		return;
+
+	memcpy(&sregs, orig, sizeof(sregs));
+	sregs.cr4 |= feature_bit;
+
+	rc = _vcpu_sregs_set(vcpu, &sregs);
+	TEST_ASSERT(rc, "KVM allowed unsupported CR4 bit (0x%lx)", feature_bit);
+
+	/* Sanity check that KVM didn't change anything. */
+	vcpu_sregs_get(vcpu, &sregs);
+	TEST_ASSERT(!memcmp(&sregs, orig, sizeof(sregs)), "KVM modified sregs");
+}
+
+static uint64_t calc_supported_cr4_feature_bits(void)
+{
+	uint64_t cr4;
+
+	cr4 = X86_CR4_VME | X86_CR4_PVI | X86_CR4_TSD | X86_CR4_DE |
+	      X86_CR4_PSE | X86_CR4_PAE | X86_CR4_MCE | X86_CR4_PGE |
+	      X86_CR4_PCE | X86_CR4_OSFXSR | X86_CR4_OSXMMEXCPT;
+	if (kvm_cpu_has(X86_FEATURE_UMIP))
+		cr4 |= X86_CR4_UMIP;
+	if (kvm_cpu_has(X86_FEATURE_LA57))
+		cr4 |= X86_CR4_LA57;
+	if (kvm_cpu_has(X86_FEATURE_VMX))
+		cr4 |= X86_CR4_VMXE;
+	if (kvm_cpu_has(X86_FEATURE_SMX))
+		cr4 |= X86_CR4_SMXE;
+	if (kvm_cpu_has(X86_FEATURE_FSGSBASE))
+		cr4 |= X86_CR4_FSGSBASE;
+	if (kvm_cpu_has(X86_FEATURE_PCID))
+		cr4 |= X86_CR4_PCIDE;
+	if (kvm_cpu_has(X86_FEATURE_XSAVE))
+		cr4 |= X86_CR4_OSXSAVE;
+	if (kvm_cpu_has(X86_FEATURE_SMEP))
+		cr4 |= X86_CR4_SMEP;
+	if (kvm_cpu_has(X86_FEATURE_SMAP))
+		cr4 |= X86_CR4_SMAP;
+	if (kvm_cpu_has(X86_FEATURE_PKU))
+		cr4 |= X86_CR4_PKE;
+
+	return cr4;
+}
 
 int main(int argc, char *argv[])
 {
 	struct kvm_sregs sregs;
+	struct kvm_vcpu *vcpu;
 	struct kvm_vm *vm;
+	uint64_t cr4;
 	int rc;
 
 	/* Tell stdout not to buffer its content */
 	setbuf(stdout, NULL);
 
-	/* Create VM */
-	vm = vm_create_default(VCPU_ID, 0, NULL);
+	/*
+	 * Create a dummy VM, specifically to avoid doing KVM_SET_CPUID2, and
+	 * use it to verify all supported CR4 bits can be set prior to defining
+	 * the vCPU model, i.e. without doing KVM_SET_CPUID2.
+	 */
+	vm = vm_create_barebones();
+	vcpu = __vm_vcpu_add(vm, 0);
+
+	vcpu_sregs_get(vcpu, &sregs);
+
+	sregs.cr4 |= calc_supported_cr4_feature_bits();
+	cr4 = sregs.cr4;
+
+	rc = _vcpu_sregs_set(vcpu, &sregs);
+	TEST_ASSERT(!rc, "Failed to set supported CR4 bits (0x%lx)", cr4);
+
+	vcpu_sregs_get(vcpu, &sregs);
+	TEST_ASSERT(sregs.cr4 == cr4, "sregs.CR4 (0x%llx) != CR4 (0x%lx)",
+		    sregs.cr4, cr4);
+
+	/* Verify all unsupported features are rejected by KVM. */
+	test_cr4_feature_bit(vcpu, &sregs, X86_CR4_UMIP);
+	test_cr4_feature_bit(vcpu, &sregs, X86_CR4_LA57);
+	test_cr4_feature_bit(vcpu, &sregs, X86_CR4_VMXE);
+	test_cr4_feature_bit(vcpu, &sregs, X86_CR4_SMXE);
+	test_cr4_feature_bit(vcpu, &sregs, X86_CR4_FSGSBASE);
+	test_cr4_feature_bit(vcpu, &sregs, X86_CR4_PCIDE);
+	test_cr4_feature_bit(vcpu, &sregs, X86_CR4_OSXSAVE);
+	test_cr4_feature_bit(vcpu, &sregs, X86_CR4_SMEP);
+	test_cr4_feature_bit(vcpu, &sregs, X86_CR4_SMAP);
+	test_cr4_feature_bit(vcpu, &sregs, X86_CR4_PKE);
+	kvm_vm_free(vm);
+
+	/* Create a "real" VM and verify APIC_BASE can be set. */
+	vm = vm_create_with_one_vcpu(&vcpu, NULL);
 
-	vcpu_sregs_get(vm, VCPU_ID, &sregs);
+	vcpu_sregs_get(vcpu, &sregs);
 	sregs.apic_base = 1 << 10;
-	rc = _vcpu_sregs_set(vm, VCPU_ID, &sregs);
+	rc = _vcpu_sregs_set(vcpu, &sregs);
 	TEST_ASSERT(rc, "Set IA32_APIC_BASE to %llx (invalid)",
 		    sregs.apic_base);
 	sregs.apic_base = 1 << 11;
-	rc = _vcpu_sregs_set(vm, VCPU_ID, &sregs);
+	rc = _vcpu_sregs_set(vcpu, &sregs);
 	TEST_ASSERT(!rc, "Couldn't set IA32_APIC_BASE to %llx (valid)",
 		    sregs.apic_base);
 
diff --git a/tools/testing/selftests/kvm/x86_64/sev_migrate_tests.c b/tools/testing/selftests/kvm/x86_64/sev_migrate_tests.c
new file mode 100644
index 000000000000..c7ef97561038
--- /dev/null
+++ b/tools/testing/selftests/kvm/x86_64/sev_migrate_tests.c
@@ -0,0 +1,421 @@
+// SPDX-License-Identifier: GPL-2.0-only
+#include <linux/kvm.h>
+#include <linux/psp-sev.h>
+#include <stdio.h>
+#include <sys/ioctl.h>
+#include <stdlib.h>
+#include <errno.h>
+#include <pthread.h>
+
+#include "test_util.h"
+#include "kvm_util.h"
+#include "processor.h"
+#include "svm_util.h"
+#include "kselftest.h"
+
+#define SEV_POLICY_ES 0b100
+
+#define NR_MIGRATE_TEST_VCPUS 4
+#define NR_MIGRATE_TEST_VMS 3
+#define NR_LOCK_TESTING_THREADS 3
+#define NR_LOCK_TESTING_ITERATIONS 10000
+
+bool have_sev_es;
+
+static int __sev_ioctl(int vm_fd, int cmd_id, void *data, __u32 *fw_error)
+{
+	struct kvm_sev_cmd cmd = {
+		.id = cmd_id,
+		.data = (uint64_t)data,
+		.sev_fd = open_sev_dev_path_or_exit(),
+	};
+	int ret;
+
+	ret = ioctl(vm_fd, KVM_MEMORY_ENCRYPT_OP, &cmd);
+	*fw_error = cmd.error;
+	return ret;
+}
+
+static void sev_ioctl(int vm_fd, int cmd_id, void *data)
+{
+	int ret;
+	__u32 fw_error;
+
+	ret = __sev_ioctl(vm_fd, cmd_id, data, &fw_error);
+	TEST_ASSERT(ret == 0 && fw_error == SEV_RET_SUCCESS,
+		    "%d failed: return code: %d, errno: %d, fw error: %d",
+		    cmd_id, ret, errno, fw_error);
+}
+
+static struct kvm_vm *sev_vm_create(bool es)
+{
+	struct kvm_vm *vm;
+	struct kvm_sev_launch_start start = { 0 };
+	int i;
+
+	vm = vm_create_barebones();
+	sev_ioctl(vm->fd, es ? KVM_SEV_ES_INIT : KVM_SEV_INIT, NULL);
+	for (i = 0; i < NR_MIGRATE_TEST_VCPUS; ++i)
+		__vm_vcpu_add(vm, i);
+	if (es)
+		start.policy |= SEV_POLICY_ES;
+	sev_ioctl(vm->fd, KVM_SEV_LAUNCH_START, &start);
+	if (es)
+		sev_ioctl(vm->fd, KVM_SEV_LAUNCH_UPDATE_VMSA, NULL);
+	return vm;
+}
+
+static struct kvm_vm *aux_vm_create(bool with_vcpus)
+{
+	struct kvm_vm *vm;
+	int i;
+
+	vm = vm_create_barebones();
+	if (!with_vcpus)
+		return vm;
+
+	for (i = 0; i < NR_MIGRATE_TEST_VCPUS; ++i)
+		__vm_vcpu_add(vm, i);
+
+	return vm;
+}
+
+static int __sev_migrate_from(struct kvm_vm *dst, struct kvm_vm *src)
+{
+	return __vm_enable_cap(dst, KVM_CAP_VM_MOVE_ENC_CONTEXT_FROM, src->fd);
+}
+
+
+static void sev_migrate_from(struct kvm_vm *dst, struct kvm_vm *src)
+{
+	int ret;
+
+	ret = __sev_migrate_from(dst, src);
+	TEST_ASSERT(!ret, "Migration failed, ret: %d, errno: %d\n", ret, errno);
+}
+
+static void test_sev_migrate_from(bool es)
+{
+	struct kvm_vm *src_vm;
+	struct kvm_vm *dst_vms[NR_MIGRATE_TEST_VMS];
+	int i, ret;
+
+	src_vm = sev_vm_create(es);
+	for (i = 0; i < NR_MIGRATE_TEST_VMS; ++i)
+		dst_vms[i] = aux_vm_create(true);
+
+	/* Initial migration from the src to the first dst. */
+	sev_migrate_from(dst_vms[0], src_vm);
+
+	for (i = 1; i < NR_MIGRATE_TEST_VMS; i++)
+		sev_migrate_from(dst_vms[i], dst_vms[i - 1]);
+
+	/* Migrate the guest back to the original VM. */
+	ret = __sev_migrate_from(src_vm, dst_vms[NR_MIGRATE_TEST_VMS - 1]);
+	TEST_ASSERT(ret == -1 && errno == EIO,
+		    "VM that was migrated from should be dead. ret %d, errno: %d\n", ret,
+		    errno);
+
+	kvm_vm_free(src_vm);
+	for (i = 0; i < NR_MIGRATE_TEST_VMS; ++i)
+		kvm_vm_free(dst_vms[i]);
+}
+
+struct locking_thread_input {
+	struct kvm_vm *vm;
+	struct kvm_vm *source_vms[NR_LOCK_TESTING_THREADS];
+};
+
+static void *locking_test_thread(void *arg)
+{
+	int i, j;
+	struct locking_thread_input *input = (struct locking_thread_input *)arg;
+
+	for (i = 0; i < NR_LOCK_TESTING_ITERATIONS; ++i) {
+		j = i % NR_LOCK_TESTING_THREADS;
+		__sev_migrate_from(input->vm, input->source_vms[j]);
+	}
+
+	return NULL;
+}
+
+static void test_sev_migrate_locking(void)
+{
+	struct locking_thread_input input[NR_LOCK_TESTING_THREADS];
+	pthread_t pt[NR_LOCK_TESTING_THREADS];
+	int i;
+
+	for (i = 0; i < NR_LOCK_TESTING_THREADS; ++i) {
+		input[i].vm = sev_vm_create(/* es= */ false);
+		input[0].source_vms[i] = input[i].vm;
+	}
+	for (i = 1; i < NR_LOCK_TESTING_THREADS; ++i)
+		memcpy(input[i].source_vms, input[0].source_vms,
+		       sizeof(input[i].source_vms));
+
+	for (i = 0; i < NR_LOCK_TESTING_THREADS; ++i)
+		pthread_create(&pt[i], NULL, locking_test_thread, &input[i]);
+
+	for (i = 0; i < NR_LOCK_TESTING_THREADS; ++i)
+		pthread_join(pt[i], NULL);
+	for (i = 0; i < NR_LOCK_TESTING_THREADS; ++i)
+		kvm_vm_free(input[i].vm);
+}
+
+static void test_sev_migrate_parameters(void)
+{
+	struct kvm_vm *sev_vm, *sev_es_vm, *vm_no_vcpu, *vm_no_sev,
+		*sev_es_vm_no_vmsa;
+	int ret;
+
+	vm_no_vcpu = vm_create_barebones();
+	vm_no_sev = aux_vm_create(true);
+	ret = __sev_migrate_from(vm_no_vcpu, vm_no_sev);
+	TEST_ASSERT(ret == -1 && errno == EINVAL,
+		    "Migrations require SEV enabled. ret %d, errno: %d\n", ret,
+		    errno);
+
+	if (!have_sev_es)
+		goto out;
+
+	sev_vm = sev_vm_create(/* es= */ false);
+	sev_es_vm = sev_vm_create(/* es= */ true);
+	sev_es_vm_no_vmsa = vm_create_barebones();
+	sev_ioctl(sev_es_vm_no_vmsa->fd, KVM_SEV_ES_INIT, NULL);
+	__vm_vcpu_add(sev_es_vm_no_vmsa, 1);
+
+	ret = __sev_migrate_from(sev_vm, sev_es_vm);
+	TEST_ASSERT(
+		ret == -1 && errno == EINVAL,
+		"Should not be able migrate to SEV enabled VM. ret: %d, errno: %d\n",
+		ret, errno);
+
+	ret = __sev_migrate_from(sev_es_vm, sev_vm);
+	TEST_ASSERT(
+		ret == -1 && errno == EINVAL,
+		"Should not be able migrate to SEV-ES enabled VM. ret: %d, errno: %d\n",
+		ret, errno);
+
+	ret = __sev_migrate_from(vm_no_vcpu, sev_es_vm);
+	TEST_ASSERT(
+		ret == -1 && errno == EINVAL,
+		"SEV-ES migrations require same number of vCPUS. ret: %d, errno: %d\n",
+		ret, errno);
+
+	ret = __sev_migrate_from(vm_no_vcpu, sev_es_vm_no_vmsa);
+	TEST_ASSERT(
+		ret == -1 && errno == EINVAL,
+		"SEV-ES migrations require UPDATE_VMSA. ret %d, errno: %d\n",
+		ret, errno);
+
+	kvm_vm_free(sev_vm);
+	kvm_vm_free(sev_es_vm);
+	kvm_vm_free(sev_es_vm_no_vmsa);
+out:
+	kvm_vm_free(vm_no_vcpu);
+	kvm_vm_free(vm_no_sev);
+}
+
+static int __sev_mirror_create(struct kvm_vm *dst, struct kvm_vm *src)
+{
+	return __vm_enable_cap(dst, KVM_CAP_VM_COPY_ENC_CONTEXT_FROM, src->fd);
+}
+
+
+static void sev_mirror_create(struct kvm_vm *dst, struct kvm_vm *src)
+{
+	int ret;
+
+	ret = __sev_mirror_create(dst, src);
+	TEST_ASSERT(!ret, "Copying context failed, ret: %d, errno: %d\n", ret, errno);
+}
+
+static void verify_mirror_allowed_cmds(int vm_fd)
+{
+	struct kvm_sev_guest_status status;
+
+	for (int cmd_id = KVM_SEV_INIT; cmd_id < KVM_SEV_NR_MAX; ++cmd_id) {
+		int ret;
+		__u32 fw_error;
+
+		/*
+		 * These commands are allowed for mirror VMs, all others are
+		 * not.
+		 */
+		switch (cmd_id) {
+		case KVM_SEV_LAUNCH_UPDATE_VMSA:
+		case KVM_SEV_GUEST_STATUS:
+		case KVM_SEV_DBG_DECRYPT:
+		case KVM_SEV_DBG_ENCRYPT:
+			continue;
+		default:
+			break;
+		}
+
+		/*
+		 * These commands should be disallowed before the data
+		 * parameter is examined so NULL is OK here.
+		 */
+		ret = __sev_ioctl(vm_fd, cmd_id, NULL, &fw_error);
+		TEST_ASSERT(
+			ret == -1 && errno == EINVAL,
+			"Should not be able call command: %d. ret: %d, errno: %d\n",
+			cmd_id, ret, errno);
+	}
+
+	sev_ioctl(vm_fd, KVM_SEV_GUEST_STATUS, &status);
+}
+
+static void test_sev_mirror(bool es)
+{
+	struct kvm_vm *src_vm, *dst_vm;
+	int i;
+
+	src_vm = sev_vm_create(es);
+	dst_vm = aux_vm_create(false);
+
+	sev_mirror_create(dst_vm, src_vm);
+
+	/* Check that we can complete creation of the mirror VM.  */
+	for (i = 0; i < NR_MIGRATE_TEST_VCPUS; ++i)
+		__vm_vcpu_add(dst_vm, i);
+
+	if (es)
+		sev_ioctl(dst_vm->fd, KVM_SEV_LAUNCH_UPDATE_VMSA, NULL);
+
+	verify_mirror_allowed_cmds(dst_vm->fd);
+
+	kvm_vm_free(src_vm);
+	kvm_vm_free(dst_vm);
+}
+
+static void test_sev_mirror_parameters(void)
+{
+	struct kvm_vm *sev_vm, *sev_es_vm, *vm_no_vcpu, *vm_with_vcpu;
+	int ret;
+
+	sev_vm = sev_vm_create(/* es= */ false);
+	vm_with_vcpu = aux_vm_create(true);
+	vm_no_vcpu = aux_vm_create(false);
+
+	ret = __sev_mirror_create(sev_vm, sev_vm);
+	TEST_ASSERT(
+		ret == -1 && errno == EINVAL,
+		"Should not be able copy context to self. ret: %d, errno: %d\n",
+		ret, errno);
+
+	ret = __sev_mirror_create(vm_no_vcpu, vm_with_vcpu);
+	TEST_ASSERT(ret == -1 && errno == EINVAL,
+		    "Copy context requires SEV enabled. ret %d, errno: %d\n", ret,
+		    errno);
+
+	ret = __sev_mirror_create(vm_with_vcpu, sev_vm);
+	TEST_ASSERT(
+		ret == -1 && errno == EINVAL,
+		"SEV copy context requires no vCPUS on the destination. ret: %d, errno: %d\n",
+		ret, errno);
+
+	if (!have_sev_es)
+		goto out;
+
+	sev_es_vm = sev_vm_create(/* es= */ true);
+	ret = __sev_mirror_create(sev_vm, sev_es_vm);
+	TEST_ASSERT(
+		ret == -1 && errno == EINVAL,
+		"Should not be able copy context to SEV enabled VM. ret: %d, errno: %d\n",
+		ret, errno);
+
+	ret = __sev_mirror_create(sev_es_vm, sev_vm);
+	TEST_ASSERT(
+		ret == -1 && errno == EINVAL,
+		"Should not be able copy context to SEV-ES enabled VM. ret: %d, errno: %d\n",
+		ret, errno);
+
+	kvm_vm_free(sev_es_vm);
+
+out:
+	kvm_vm_free(sev_vm);
+	kvm_vm_free(vm_with_vcpu);
+	kvm_vm_free(vm_no_vcpu);
+}
+
+static void test_sev_move_copy(void)
+{
+	struct kvm_vm *dst_vm, *dst2_vm, *dst3_vm, *sev_vm, *mirror_vm,
+		      *dst_mirror_vm, *dst2_mirror_vm, *dst3_mirror_vm;
+
+	sev_vm = sev_vm_create(/* es= */ false);
+	dst_vm = aux_vm_create(true);
+	dst2_vm = aux_vm_create(true);
+	dst3_vm = aux_vm_create(true);
+	mirror_vm = aux_vm_create(false);
+	dst_mirror_vm = aux_vm_create(false);
+	dst2_mirror_vm = aux_vm_create(false);
+	dst3_mirror_vm = aux_vm_create(false);
+
+	sev_mirror_create(mirror_vm, sev_vm);
+
+	sev_migrate_from(dst_mirror_vm, mirror_vm);
+	sev_migrate_from(dst_vm, sev_vm);
+
+	sev_migrate_from(dst2_vm, dst_vm);
+	sev_migrate_from(dst2_mirror_vm, dst_mirror_vm);
+
+	sev_migrate_from(dst3_mirror_vm, dst2_mirror_vm);
+	sev_migrate_from(dst3_vm, dst2_vm);
+
+	kvm_vm_free(dst_vm);
+	kvm_vm_free(sev_vm);
+	kvm_vm_free(dst2_vm);
+	kvm_vm_free(dst3_vm);
+	kvm_vm_free(mirror_vm);
+	kvm_vm_free(dst_mirror_vm);
+	kvm_vm_free(dst2_mirror_vm);
+	kvm_vm_free(dst3_mirror_vm);
+
+	/*
+	 * Run similar test be destroy mirrors before mirrored VMs to ensure
+	 * destruction is done safely.
+	 */
+	sev_vm = sev_vm_create(/* es= */ false);
+	dst_vm = aux_vm_create(true);
+	mirror_vm = aux_vm_create(false);
+	dst_mirror_vm = aux_vm_create(false);
+
+	sev_mirror_create(mirror_vm, sev_vm);
+
+	sev_migrate_from(dst_mirror_vm, mirror_vm);
+	sev_migrate_from(dst_vm, sev_vm);
+
+	kvm_vm_free(mirror_vm);
+	kvm_vm_free(dst_mirror_vm);
+	kvm_vm_free(dst_vm);
+	kvm_vm_free(sev_vm);
+}
+
+int main(int argc, char *argv[])
+{
+	TEST_REQUIRE(kvm_has_cap(KVM_CAP_VM_MOVE_ENC_CONTEXT_FROM));
+	TEST_REQUIRE(kvm_has_cap(KVM_CAP_VM_COPY_ENC_CONTEXT_FROM));
+
+	TEST_REQUIRE(kvm_cpu_has(X86_FEATURE_SEV));
+
+	have_sev_es = kvm_cpu_has(X86_FEATURE_SEV_ES);
+
+	if (kvm_has_cap(KVM_CAP_VM_MOVE_ENC_CONTEXT_FROM)) {
+		test_sev_migrate_from(/* es= */ false);
+		if (have_sev_es)
+			test_sev_migrate_from(/* es= */ true);
+		test_sev_migrate_locking();
+		test_sev_migrate_parameters();
+		if (kvm_has_cap(KVM_CAP_VM_COPY_ENC_CONTEXT_FROM))
+			test_sev_move_copy();
+	}
+	if (kvm_has_cap(KVM_CAP_VM_COPY_ENC_CONTEXT_FROM)) {
+		test_sev_mirror(/* es= */ false);
+		if (have_sev_es)
+			test_sev_mirror(/* es= */ true);
+		test_sev_mirror_parameters();
+	}
+	return 0;
+}
diff --git a/tools/testing/selftests/kvm/x86_64/smm_test.c b/tools/testing/selftests/kvm/x86_64/smm_test.c
index 8c063646f2a0..1f136a81858e 100644
--- a/tools/testing/selftests/kvm/x86_64/smm_test.c
+++ b/tools/testing/selftests/kvm/x86_64/smm_test.c
@@ -17,10 +17,7 @@
 #include "kvm_util.h"
 
 #include "vmx.h"
-
-#define VCPU_ID	      1
-
-#define PAGE_SIZE  4096
+#include "svm_util.h"
 
 #define SMRAM_SIZE 65536
 #define SMRAM_MEMSLOT ((1 << 16) | 1)
@@ -46,21 +43,34 @@ uint8_t smi_handler[] = {
 	0x0f, 0xaa,           /* rsm */
 };
 
-void sync_with_host(uint64_t phase)
+static inline void sync_with_host(uint64_t phase)
 {
 	asm volatile("in $" XSTR(SYNC_PORT)", %%al \n"
-		     : : "a" (phase));
+		     : "+a" (phase));
+}
+
+static void self_smi(void)
+{
+	x2apic_write_reg(APIC_ICR,
+			 APIC_DEST_SELF | APIC_INT_ASSERT | APIC_DM_SMI);
 }
 
-void self_smi(void)
+static void l2_guest_code(void)
 {
-	wrmsr(APIC_BASE_MSR + (APIC_ICR >> 4),
-	      APIC_DEST_SELF | APIC_INT_ASSERT | APIC_DM_SMI);
+	sync_with_host(8);
+
+	sync_with_host(10);
+
+	vmcall();
 }
 
-void guest_code(struct vmx_pages *vmx_pages)
+static void guest_code(void *arg)
 {
+	#define L2_GUEST_STACK_SIZE 64
+	unsigned long l2_guest_stack[L2_GUEST_STACK_SIZE];
 	uint64_t apicbase = rdmsr(MSR_IA32_APICBASE);
+	struct svm_test_data *svm = arg;
+	struct vmx_pages *vmx_pages = arg;
 
 	sync_with_host(1);
 
@@ -72,23 +82,55 @@ void guest_code(struct vmx_pages *vmx_pages)
 
 	sync_with_host(4);
 
-	if (vmx_pages) {
-		GUEST_ASSERT(prepare_for_vmx_operation(vmx_pages));
+	if (arg) {
+		if (this_cpu_has(X86_FEATURE_SVM)) {
+			generic_svm_setup(svm, l2_guest_code,
+					  &l2_guest_stack[L2_GUEST_STACK_SIZE]);
+		} else {
+			GUEST_ASSERT(prepare_for_vmx_operation(vmx_pages));
+			GUEST_ASSERT(load_vmcs(vmx_pages));
+			prepare_vmcs(vmx_pages, l2_guest_code,
+				     &l2_guest_stack[L2_GUEST_STACK_SIZE]);
+		}
 
 		sync_with_host(5);
 
 		self_smi();
 
 		sync_with_host(7);
+
+		if (this_cpu_has(X86_FEATURE_SVM)) {
+			run_guest(svm->vmcb, svm->vmcb_gpa);
+			run_guest(svm->vmcb, svm->vmcb_gpa);
+		} else {
+			vmlaunch();
+			vmresume();
+		}
+
+		/* Stages 8-11 are eaten by SMM (SMRAM_STAGE reported instead) */
+		sync_with_host(12);
 	}
 
 	sync_with_host(DONE);
 }
 
+void inject_smi(struct kvm_vcpu *vcpu)
+{
+	struct kvm_vcpu_events events;
+
+	vcpu_events_get(vcpu, &events);
+
+	events.smi.pending = 1;
+	events.flags |= KVM_VCPUEVENT_VALID_SMM;
+
+	vcpu_events_set(vcpu, &events);
+}
+
 int main(int argc, char *argv[])
 {
-	vm_vaddr_t vmx_pages_gva = 0;
+	vm_vaddr_t nested_gva = 0;
 
+	struct kvm_vcpu *vcpu;
 	struct kvm_regs regs;
 	struct kvm_vm *vm;
 	struct kvm_run *run;
@@ -96,11 +138,9 @@ int main(int argc, char *argv[])
 	int stage, stage_reported;
 
 	/* Create VM */
-	vm = vm_create_default(VCPU_ID, 0, guest_code);
+	vm = vm_create_with_one_vcpu(&vcpu, guest_code);
 
-	vcpu_set_cpuid(vm, VCPU_ID, kvm_get_supported_cpuid());
-
-	run = vcpu_state(vm, VCPU_ID);
+	run = vcpu->run;
 
 	vm_userspace_mem_region_add(vm, VM_MEM_SRC_ANONYMOUS, SMRAM_GPA,
 				    SMRAM_MEMSLOT, SMRAM_PAGES, 0);
@@ -111,25 +151,29 @@ int main(int argc, char *argv[])
 	memcpy(addr_gpa2hva(vm, SMRAM_GPA) + 0x8000, smi_handler,
 	       sizeof(smi_handler));
 
-	vcpu_set_msr(vm, VCPU_ID, MSR_IA32_SMBASE, SMRAM_GPA);
+	vcpu_set_msr(vcpu, MSR_IA32_SMBASE, SMRAM_GPA);
 
-	if (kvm_check_cap(KVM_CAP_NESTED_STATE)) {
-		vcpu_alloc_vmx(vm, &vmx_pages_gva);
-		vcpu_args_set(vm, VCPU_ID, 1, vmx_pages_gva);
-	} else {
-		printf("will skip SMM test with VMX enabled\n");
-		vcpu_args_set(vm, VCPU_ID, 1, 0);
+	if (kvm_has_cap(KVM_CAP_NESTED_STATE)) {
+		if (kvm_cpu_has(X86_FEATURE_SVM))
+			vcpu_alloc_svm(vm, &nested_gva);
+		else if (kvm_cpu_has(X86_FEATURE_VMX))
+			vcpu_alloc_vmx(vm, &nested_gva);
 	}
 
+	if (!nested_gva)
+		pr_info("will skip SMM test with VMX enabled\n");
+
+	vcpu_args_set(vcpu, 1, nested_gva);
+
 	for (stage = 1;; stage++) {
-		_vcpu_run(vm, VCPU_ID);
+		vcpu_run(vcpu);
 		TEST_ASSERT(run->exit_reason == KVM_EXIT_IO,
 			    "Stage %d: unexpected exit reason: %u (%s),\n",
 			    stage, run->exit_reason,
 			    exit_reason_str(run->exit_reason));
 
 		memset(&regs, 0, sizeof(regs));
-		vcpu_regs_get(vm, VCPU_ID, &regs);
+		vcpu_regs_get(vcpu, &regs);
 
 		stage_reported = regs.rax & 0xff;
 
@@ -141,14 +185,29 @@ int main(int argc, char *argv[])
 			    "Unexpected stage: #%x, got %x",
 			    stage, stage_reported);
 
-		state = vcpu_save_state(vm, VCPU_ID);
+		/*
+		 * Enter SMM during L2 execution and check that we correctly
+		 * return from it. Do not perform save/restore while in SMM yet.
+		 */
+		if (stage == 8) {
+			inject_smi(vcpu);
+			continue;
+		}
+
+		/*
+		 * Perform save/restore while the guest is in SMM triggered
+		 * during L2 execution.
+		 */
+		if (stage == 10)
+			inject_smi(vcpu);
+
+		state = vcpu_save_state(vcpu);
 		kvm_vm_release(vm);
-		kvm_vm_restart(vm, O_RDWR);
-		vm_vcpu_add(vm, VCPU_ID);
-		vcpu_set_cpuid(vm, VCPU_ID, kvm_get_supported_cpuid());
-		vcpu_load_state(vm, VCPU_ID, state);
-		run = vcpu_state(vm, VCPU_ID);
-		free(state);
+
+		vcpu = vm_recreate_with_one_vcpu(vm);
+		vcpu_load_state(vcpu, state);
+		run = vcpu->run;
+		kvm_x86_state_cleanup(state);
 	}
 
 done:
diff --git a/tools/testing/selftests/kvm/x86_64/state_test.c b/tools/testing/selftests/kvm/x86_64/state_test.c
index 3ab5ec3da9f4..ea578971fb9f 100644
--- a/tools/testing/selftests/kvm/x86_64/state_test.c
+++ b/tools/testing/selftests/kvm/x86_64/state_test.c
@@ -18,14 +18,45 @@
 #include "kvm_util.h"
 #include "processor.h"
 #include "vmx.h"
+#include "svm_util.h"
 
-#define VCPU_ID		5
+#define L2_GUEST_STACK_SIZE 256
 
-void l2_guest_code(void)
+void svm_l2_guest_code(void)
 {
+	GUEST_SYNC(4);
+	/* Exit to L1 */
+	vmcall();
 	GUEST_SYNC(6);
+	/* Done, exit to L1 and never come back.  */
+	vmcall();
+}
 
-        /* Exit to L1 */
+static void svm_l1_guest_code(struct svm_test_data *svm)
+{
+	unsigned long l2_guest_stack[L2_GUEST_STACK_SIZE];
+	struct vmcb *vmcb = svm->vmcb;
+
+	GUEST_ASSERT(svm->vmcb_gpa);
+	/* Prepare for L2 execution. */
+	generic_svm_setup(svm, svm_l2_guest_code,
+			  &l2_guest_stack[L2_GUEST_STACK_SIZE]);
+
+	GUEST_SYNC(3);
+	run_guest(vmcb, svm->vmcb_gpa);
+	GUEST_ASSERT(vmcb->control.exit_code == SVM_EXIT_VMMCALL);
+	GUEST_SYNC(5);
+	vmcb->save.rip += 3;
+	run_guest(vmcb, svm->vmcb_gpa);
+	GUEST_ASSERT(vmcb->control.exit_code == SVM_EXIT_VMMCALL);
+	GUEST_SYNC(7);
+}
+
+void vmx_l2_guest_code(void)
+{
+	GUEST_SYNC(6);
+
+	/* Exit to L1 */
 	vmcall();
 
 	/* L1 has now set up a shadow VMCS for us.  */
@@ -42,10 +73,9 @@ void l2_guest_code(void)
 	vmcall();
 }
 
-void l1_guest_code(struct vmx_pages *vmx_pages)
+static void vmx_l1_guest_code(struct vmx_pages *vmx_pages)
 {
-#define L2_GUEST_STACK_SIZE 64
-        unsigned long l2_guest_stack[L2_GUEST_STACK_SIZE];
+	unsigned long l2_guest_stack[L2_GUEST_STACK_SIZE];
 
 	GUEST_ASSERT(vmx_pages->vmcs_gpa);
 	GUEST_ASSERT(prepare_for_vmx_operation(vmx_pages));
@@ -56,7 +86,7 @@ void l1_guest_code(struct vmx_pages *vmx_pages)
 	GUEST_SYNC(4);
 	GUEST_ASSERT(vmptrstz() == vmx_pages->vmcs_gpa);
 
-	prepare_vmcs(vmx_pages, l2_guest_code,
+	prepare_vmcs(vmx_pages, vmx_l2_guest_code,
 		     &l2_guest_stack[L2_GUEST_STACK_SIZE]);
 
 	GUEST_SYNC(5);
@@ -106,22 +136,27 @@ void l1_guest_code(struct vmx_pages *vmx_pages)
 	GUEST_ASSERT(vmresume());
 }
 
-void guest_code(struct vmx_pages *vmx_pages)
+static void __attribute__((__flatten__)) guest_code(void *arg)
 {
 	GUEST_SYNC(1);
 	GUEST_SYNC(2);
 
-	if (vmx_pages)
-		l1_guest_code(vmx_pages);
+	if (arg) {
+		if (this_cpu_has(X86_FEATURE_SVM))
+			svm_l1_guest_code(arg);
+		else
+			vmx_l1_guest_code(arg);
+	}
 
 	GUEST_DONE();
 }
 
 int main(int argc, char *argv[])
 {
-	vm_vaddr_t vmx_pages_gva = 0;
+	vm_vaddr_t nested_gva = 0;
 
 	struct kvm_regs regs1, regs2;
+	struct kvm_vcpu *vcpu;
 	struct kvm_vm *vm;
 	struct kvm_run *run;
 	struct kvm_x86_state *state;
@@ -129,61 +164,61 @@ int main(int argc, char *argv[])
 	int stage;
 
 	/* Create VM */
-	vm = vm_create_default(VCPU_ID, 0, guest_code);
-	vcpu_set_cpuid(vm, VCPU_ID, kvm_get_supported_cpuid());
-	run = vcpu_state(vm, VCPU_ID);
-
-	vcpu_regs_get(vm, VCPU_ID, &regs1);
-
-	if (kvm_check_cap(KVM_CAP_NESTED_STATE)) {
-		vcpu_alloc_vmx(vm, &vmx_pages_gva);
-		vcpu_args_set(vm, VCPU_ID, 1, vmx_pages_gva);
-	} else {
-		printf("will skip nested state checks\n");
-		vcpu_args_set(vm, VCPU_ID, 1, 0);
+	vm = vm_create_with_one_vcpu(&vcpu, guest_code);
+	run = vcpu->run;
+
+	vcpu_regs_get(vcpu, &regs1);
+
+	if (kvm_has_cap(KVM_CAP_NESTED_STATE)) {
+		if (kvm_cpu_has(X86_FEATURE_SVM))
+			vcpu_alloc_svm(vm, &nested_gva);
+		else if (kvm_cpu_has(X86_FEATURE_VMX))
+			vcpu_alloc_vmx(vm, &nested_gva);
 	}
 
+	if (!nested_gva)
+		pr_info("will skip nested state checks\n");
+
+	vcpu_args_set(vcpu, 1, nested_gva);
+
 	for (stage = 1;; stage++) {
-		_vcpu_run(vm, VCPU_ID);
+		vcpu_run(vcpu);
 		TEST_ASSERT(run->exit_reason == KVM_EXIT_IO,
 			    "Stage %d: unexpected exit reason: %u (%s),\n",
 			    stage, run->exit_reason,
 			    exit_reason_str(run->exit_reason));
 
-		switch (get_ucall(vm, VCPU_ID, &uc)) {
+		switch (get_ucall(vcpu, &uc)) {
 		case UCALL_ABORT:
-			TEST_ASSERT(false, "%s at %s:%d", (const char *)uc.args[0],
-				    __FILE__, uc.args[1]);
+			REPORT_GUEST_ASSERT(uc);
 			/* NOT REACHED */
 		case UCALL_SYNC:
 			break;
 		case UCALL_DONE:
 			goto done;
 		default:
-			TEST_ASSERT(false, "Unknown ucall 0x%x.", uc.cmd);
+			TEST_FAIL("Unknown ucall %lu", uc.cmd);
 		}
 
 		/* UCALL_SYNC is handled here.  */
 		TEST_ASSERT(!strcmp((const char *)uc.args[0], "hello") &&
-			    uc.args[1] == stage, "Unexpected register values vmexit #%lx, got %lx",
+			    uc.args[1] == stage, "Stage %d: Unexpected register values vmexit, got %lx",
 			    stage, (ulong)uc.args[1]);
 
-		state = vcpu_save_state(vm, VCPU_ID);
+		state = vcpu_save_state(vcpu);
 		memset(&regs1, 0, sizeof(regs1));
-		vcpu_regs_get(vm, VCPU_ID, &regs1);
+		vcpu_regs_get(vcpu, &regs1);
 
 		kvm_vm_release(vm);
 
 		/* Restore state in a new VM.  */
-		kvm_vm_restart(vm, O_RDWR);
-		vm_vcpu_add(vm, VCPU_ID);
-		vcpu_set_cpuid(vm, VCPU_ID, kvm_get_supported_cpuid());
-		vcpu_load_state(vm, VCPU_ID, state);
-		run = vcpu_state(vm, VCPU_ID);
-		free(state);
+		vcpu = vm_recreate_with_one_vcpu(vm);
+		vcpu_load_state(vcpu, state);
+		run = vcpu->run;
+		kvm_x86_state_cleanup(state);
 
 		memset(&regs2, 0, sizeof(regs2));
-		vcpu_regs_get(vm, VCPU_ID, &regs2);
+		vcpu_regs_get(vcpu, &regs2);
 		TEST_ASSERT(!memcmp(&regs1, &regs2, sizeof(regs2)),
 			    "Unexpected register values after vcpu_load_state; rdi: %lx rsi: %lx",
 			    (ulong) regs2.rdi, (ulong) regs2.rsi);
diff --git a/tools/testing/selftests/kvm/x86_64/svm_int_ctl_test.c b/tools/testing/selftests/kvm/x86_64/svm_int_ctl_test.c
new file mode 100644
index 000000000000..4a07ba227b99
--- /dev/null
+++ b/tools/testing/selftests/kvm/x86_64/svm_int_ctl_test.c
@@ -0,0 +1,127 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * svm_int_ctl_test
+ *
+ * Copyright (C) 2021, Red Hat, Inc.
+ *
+ * Nested SVM testing: test simultaneous use of V_IRQ from L1 and L0.
+ */
+
+#include "test_util.h"
+#include "kvm_util.h"
+#include "processor.h"
+#include "svm_util.h"
+#include "apic.h"
+
+bool vintr_irq_called;
+bool intr_irq_called;
+
+#define VINTR_IRQ_NUMBER 0x20
+#define INTR_IRQ_NUMBER 0x30
+
+static void vintr_irq_handler(struct ex_regs *regs)
+{
+	vintr_irq_called = true;
+}
+
+static void intr_irq_handler(struct ex_regs *regs)
+{
+	x2apic_write_reg(APIC_EOI, 0x00);
+	intr_irq_called = true;
+}
+
+static void l2_guest_code(struct svm_test_data *svm)
+{
+	/* This code raises interrupt INTR_IRQ_NUMBER in the L1's LAPIC,
+	 * and since L1 didn't enable virtual interrupt masking,
+	 * L2 should receive it and not L1.
+	 *
+	 * L2 also has virtual interrupt 'VINTR_IRQ_NUMBER' pending in V_IRQ
+	 * so it should also receive it after the following 'sti'.
+	 */
+	x2apic_write_reg(APIC_ICR,
+		APIC_DEST_SELF | APIC_INT_ASSERT | INTR_IRQ_NUMBER);
+
+	__asm__ __volatile__(
+		"sti\n"
+		"nop\n"
+	);
+
+	GUEST_ASSERT(vintr_irq_called);
+	GUEST_ASSERT(intr_irq_called);
+
+	__asm__ __volatile__(
+		"vmcall\n"
+	);
+}
+
+static void l1_guest_code(struct svm_test_data *svm)
+{
+	#define L2_GUEST_STACK_SIZE 64
+	unsigned long l2_guest_stack[L2_GUEST_STACK_SIZE];
+	struct vmcb *vmcb = svm->vmcb;
+
+	x2apic_enable();
+
+	/* Prepare for L2 execution. */
+	generic_svm_setup(svm, l2_guest_code,
+			  &l2_guest_stack[L2_GUEST_STACK_SIZE]);
+
+	/* No virtual interrupt masking */
+	vmcb->control.int_ctl &= ~V_INTR_MASKING_MASK;
+
+	/* No intercepts for real and virtual interrupts */
+	vmcb->control.intercept &= ~(BIT(INTERCEPT_INTR) | BIT(INTERCEPT_VINTR));
+
+	/* Make a virtual interrupt VINTR_IRQ_NUMBER pending */
+	vmcb->control.int_ctl |= V_IRQ_MASK | (0x1 << V_INTR_PRIO_SHIFT);
+	vmcb->control.int_vector = VINTR_IRQ_NUMBER;
+
+	run_guest(vmcb, svm->vmcb_gpa);
+	GUEST_ASSERT(vmcb->control.exit_code == SVM_EXIT_VMMCALL);
+	GUEST_DONE();
+}
+
+int main(int argc, char *argv[])
+{
+	struct kvm_vcpu *vcpu;
+	struct kvm_run *run;
+	vm_vaddr_t svm_gva;
+	struct kvm_vm *vm;
+	struct ucall uc;
+
+	TEST_REQUIRE(kvm_cpu_has(X86_FEATURE_SVM));
+
+	vm = vm_create_with_one_vcpu(&vcpu, l1_guest_code);
+
+	vm_init_descriptor_tables(vm);
+	vcpu_init_descriptor_tables(vcpu);
+
+	vm_install_exception_handler(vm, VINTR_IRQ_NUMBER, vintr_irq_handler);
+	vm_install_exception_handler(vm, INTR_IRQ_NUMBER, intr_irq_handler);
+
+	vcpu_alloc_svm(vm, &svm_gva);
+	vcpu_args_set(vcpu, 1, svm_gva);
+
+	run = vcpu->run;
+
+	vcpu_run(vcpu);
+	TEST_ASSERT(run->exit_reason == KVM_EXIT_IO,
+		    "Got exit_reason other than KVM_EXIT_IO: %u (%s)\n",
+		    run->exit_reason,
+		    exit_reason_str(run->exit_reason));
+
+	switch (get_ucall(vcpu, &uc)) {
+	case UCALL_ABORT:
+		REPORT_GUEST_ASSERT(uc);
+		break;
+		/* NOT REACHED */
+	case UCALL_DONE:
+		goto done;
+	default:
+		TEST_FAIL("Unknown ucall 0x%lx.", uc.cmd);
+	}
+done:
+	kvm_vm_free(vm);
+	return 0;
+}
diff --git a/tools/testing/selftests/kvm/x86_64/svm_nested_soft_inject_test.c b/tools/testing/selftests/kvm/x86_64/svm_nested_soft_inject_test.c
new file mode 100644
index 000000000000..e637d7736012
--- /dev/null
+++ b/tools/testing/selftests/kvm/x86_64/svm_nested_soft_inject_test.c
@@ -0,0 +1,211 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Copyright (C) 2022 Oracle and/or its affiliates.
+ *
+ * Based on:
+ *   svm_int_ctl_test
+ *
+ *   Copyright (C) 2021, Red Hat, Inc.
+ *
+ */
+
+#include <stdatomic.h>
+#include <stdio.h>
+#include <unistd.h>
+#include "apic.h"
+#include "kvm_util.h"
+#include "processor.h"
+#include "svm_util.h"
+#include "test_util.h"
+
+#define INT_NR			0x20
+
+static_assert(ATOMIC_INT_LOCK_FREE == 2, "atomic int is not lockless");
+
+static unsigned int bp_fired;
+static void guest_bp_handler(struct ex_regs *regs)
+{
+	bp_fired++;
+}
+
+static unsigned int int_fired;
+static void l2_guest_code_int(void);
+
+static void guest_int_handler(struct ex_regs *regs)
+{
+	int_fired++;
+	GUEST_ASSERT_2(regs->rip == (unsigned long)l2_guest_code_int,
+		       regs->rip, (unsigned long)l2_guest_code_int);
+}
+
+static void l2_guest_code_int(void)
+{
+	GUEST_ASSERT_1(int_fired == 1, int_fired);
+	vmmcall();
+	ud2();
+
+	GUEST_ASSERT_1(bp_fired == 1, bp_fired);
+	hlt();
+}
+
+static atomic_int nmi_stage;
+#define nmi_stage_get() atomic_load_explicit(&nmi_stage, memory_order_acquire)
+#define nmi_stage_inc() atomic_fetch_add_explicit(&nmi_stage, 1, memory_order_acq_rel)
+static void guest_nmi_handler(struct ex_regs *regs)
+{
+	nmi_stage_inc();
+
+	if (nmi_stage_get() == 1) {
+		vmmcall();
+		GUEST_ASSERT(false);
+	} else {
+		GUEST_ASSERT_1(nmi_stage_get() == 3, nmi_stage_get());
+		GUEST_DONE();
+	}
+}
+
+static void l2_guest_code_nmi(void)
+{
+	ud2();
+}
+
+static void l1_guest_code(struct svm_test_data *svm, uint64_t is_nmi, uint64_t idt_alt)
+{
+	#define L2_GUEST_STACK_SIZE 64
+	unsigned long l2_guest_stack[L2_GUEST_STACK_SIZE];
+	struct vmcb *vmcb = svm->vmcb;
+
+	if (is_nmi)
+		x2apic_enable();
+
+	/* Prepare for L2 execution. */
+	generic_svm_setup(svm,
+			  is_nmi ? l2_guest_code_nmi : l2_guest_code_int,
+			  &l2_guest_stack[L2_GUEST_STACK_SIZE]);
+
+	vmcb->control.intercept_exceptions |= BIT(PF_VECTOR) | BIT(UD_VECTOR);
+	vmcb->control.intercept |= BIT(INTERCEPT_NMI) | BIT(INTERCEPT_HLT);
+
+	if (is_nmi) {
+		vmcb->control.event_inj = SVM_EVTINJ_VALID | SVM_EVTINJ_TYPE_NMI;
+	} else {
+		vmcb->control.event_inj = INT_NR | SVM_EVTINJ_VALID | SVM_EVTINJ_TYPE_SOFT;
+		/* The return address pushed on stack */
+		vmcb->control.next_rip = vmcb->save.rip;
+	}
+
+	run_guest(vmcb, svm->vmcb_gpa);
+	GUEST_ASSERT_3(vmcb->control.exit_code == SVM_EXIT_VMMCALL,
+		       vmcb->control.exit_code,
+		       vmcb->control.exit_info_1, vmcb->control.exit_info_2);
+
+	if (is_nmi) {
+		clgi();
+		x2apic_write_reg(APIC_ICR, APIC_DEST_SELF | APIC_INT_ASSERT | APIC_DM_NMI);
+
+		GUEST_ASSERT_1(nmi_stage_get() == 1, nmi_stage_get());
+		nmi_stage_inc();
+
+		stgi();
+		/* self-NMI happens here */
+		while (true)
+			cpu_relax();
+	}
+
+	/* Skip over VMMCALL */
+	vmcb->save.rip += 3;
+
+	/* Switch to alternate IDT to cause intervening NPF again */
+	vmcb->save.idtr.base = idt_alt;
+	vmcb->control.clean = 0; /* &= ~BIT(VMCB_DT) would be enough */
+
+	vmcb->control.event_inj = BP_VECTOR | SVM_EVTINJ_VALID | SVM_EVTINJ_TYPE_EXEPT;
+	/* The return address pushed on stack, skip over UD2 */
+	vmcb->control.next_rip = vmcb->save.rip + 2;
+
+	run_guest(vmcb, svm->vmcb_gpa);
+	GUEST_ASSERT_3(vmcb->control.exit_code == SVM_EXIT_HLT,
+		       vmcb->control.exit_code,
+		       vmcb->control.exit_info_1, vmcb->control.exit_info_2);
+
+	GUEST_DONE();
+}
+
+static void run_test(bool is_nmi)
+{
+	struct kvm_vcpu *vcpu;
+	struct kvm_vm *vm;
+	vm_vaddr_t svm_gva;
+	vm_vaddr_t idt_alt_vm;
+	struct kvm_guest_debug debug;
+
+	pr_info("Running %s test\n", is_nmi ? "NMI" : "soft int");
+
+	vm = vm_create_with_one_vcpu(&vcpu, l1_guest_code);
+
+	vm_init_descriptor_tables(vm);
+	vcpu_init_descriptor_tables(vcpu);
+
+	vm_install_exception_handler(vm, NMI_VECTOR, guest_nmi_handler);
+	vm_install_exception_handler(vm, BP_VECTOR, guest_bp_handler);
+	vm_install_exception_handler(vm, INT_NR, guest_int_handler);
+
+	vcpu_alloc_svm(vm, &svm_gva);
+
+	if (!is_nmi) {
+		void *idt, *idt_alt;
+
+		idt_alt_vm = vm_vaddr_alloc_page(vm);
+		idt_alt = addr_gva2hva(vm, idt_alt_vm);
+		idt = addr_gva2hva(vm, vm->idt);
+		memcpy(idt_alt, idt, getpagesize());
+	} else {
+		idt_alt_vm = 0;
+	}
+	vcpu_args_set(vcpu, 3, svm_gva, (uint64_t)is_nmi, (uint64_t)idt_alt_vm);
+
+	memset(&debug, 0, sizeof(debug));
+	vcpu_guest_debug_set(vcpu, &debug);
+
+	struct kvm_run *run = vcpu->run;
+	struct ucall uc;
+
+	alarm(2);
+	vcpu_run(vcpu);
+	alarm(0);
+	TEST_ASSERT(run->exit_reason == KVM_EXIT_IO,
+		    "Got exit_reason other than KVM_EXIT_IO: %u (%s)\n",
+		    run->exit_reason,
+		    exit_reason_str(run->exit_reason));
+
+	switch (get_ucall(vcpu, &uc)) {
+	case UCALL_ABORT:
+		REPORT_GUEST_ASSERT_3(uc, "vals = 0x%lx 0x%lx 0x%lx");
+		break;
+		/* NOT REACHED */
+	case UCALL_DONE:
+		goto done;
+	default:
+		TEST_FAIL("Unknown ucall 0x%lx.", uc.cmd);
+	}
+done:
+	kvm_vm_free(vm);
+}
+
+int main(int argc, char *argv[])
+{
+	/* Tell stdout not to buffer its content */
+	setbuf(stdout, NULL);
+
+	TEST_REQUIRE(kvm_cpu_has(X86_FEATURE_SVM));
+
+	TEST_ASSERT(kvm_cpu_has(X86_FEATURE_NRIPS),
+		    "KVM with nSVM is supposed to unconditionally advertise nRIP Save");
+
+	atomic_init(&nmi_stage, 0);
+
+	run_test(false);
+	run_test(true);
+
+	return 0;
+}
diff --git a/tools/testing/selftests/kvm/x86_64/svm_vmcall_test.c b/tools/testing/selftests/kvm/x86_64/svm_vmcall_test.c
index e280f68f6365..c3ac45df7483 100644
--- a/tools/testing/selftests/kvm/x86_64/svm_vmcall_test.c
+++ b/tools/testing/selftests/kvm/x86_64/svm_vmcall_test.c
@@ -12,10 +12,6 @@
 #include "processor.h"
 #include "svm_util.h"
 
-#define VCPU_ID		5
-
-static struct kvm_vm *vm;
-
 static void l2_guest_code(struct svm_test_data *svm)
 {
 	__asm__ __volatile__("vmcall");
@@ -39,38 +35,37 @@ static void l1_guest_code(struct svm_test_data *svm)
 
 int main(int argc, char *argv[])
 {
+	struct kvm_vcpu *vcpu;
 	vm_vaddr_t svm_gva;
+	struct kvm_vm *vm;
 
-	nested_svm_check_supported();
+	TEST_REQUIRE(kvm_cpu_has(X86_FEATURE_SVM));
 
-	vm = vm_create_default(VCPU_ID, 0, (void *) l1_guest_code);
-	vcpu_set_cpuid(vm, VCPU_ID, kvm_get_supported_cpuid());
+	vm = vm_create_with_one_vcpu(&vcpu, l1_guest_code);
 
 	vcpu_alloc_svm(vm, &svm_gva);
-	vcpu_args_set(vm, VCPU_ID, 1, svm_gva);
+	vcpu_args_set(vcpu, 1, svm_gva);
 
 	for (;;) {
-		volatile struct kvm_run *run = vcpu_state(vm, VCPU_ID);
+		volatile struct kvm_run *run = vcpu->run;
 		struct ucall uc;
 
-		vcpu_run(vm, VCPU_ID);
+		vcpu_run(vcpu);
 		TEST_ASSERT(run->exit_reason == KVM_EXIT_IO,
 			    "Got exit_reason other than KVM_EXIT_IO: %u (%s)\n",
 			    run->exit_reason,
 			    exit_reason_str(run->exit_reason));
 
-		switch (get_ucall(vm, VCPU_ID, &uc)) {
+		switch (get_ucall(vcpu, &uc)) {
 		case UCALL_ABORT:
-			TEST_ASSERT(false, "%s",
-				    (const char *)uc.args[0]);
+			REPORT_GUEST_ASSERT(uc);
 			/* NOT REACHED */
 		case UCALL_SYNC:
 			break;
 		case UCALL_DONE:
 			goto done;
 		default:
-			TEST_ASSERT(false,
-				    "Unknown ucall 0x%x.", uc.cmd);
+			TEST_FAIL("Unknown ucall 0x%lx.", uc.cmd);
 		}
 	}
 done:
diff --git a/tools/testing/selftests/kvm/x86_64/sync_regs_test.c b/tools/testing/selftests/kvm/x86_64/sync_regs_test.c
index 5c8224256294..9b6db0b0b13e 100644
--- a/tools/testing/selftests/kvm/x86_64/sync_regs_test.c
+++ b/tools/testing/selftests/kvm/x86_64/sync_regs_test.c
@@ -20,10 +20,12 @@
 #include "kvm_util.h"
 #include "processor.h"
 
-#define VCPU_ID 5
-
 #define UCALL_PIO_PORT ((uint16_t)0x1000)
 
+struct ucall uc_none = {
+	.cmd = UCALL_NONE,
+};
+
 /*
  * ucall is embedded here to protect against compiler reshuffling registers
  * before calling a function. In this test we only need to get KVM_EXIT_IO
@@ -34,7 +36,8 @@ void guest_code(void)
 	asm volatile("1: in %[port], %%al\n"
 		     "add $0x1, %%rbx\n"
 		     "jmp 1b"
-		     : : [port] "d" (UCALL_PIO_PORT) : "rax", "rbx");
+		     : : [port] "d" (UCALL_PIO_PORT), "D" (&uc_none)
+		     : "rax", "rbx");
 }
 
 static void compare_regs(struct kvm_regs *left, struct kvm_regs *right)
@@ -79,6 +82,7 @@ static void compare_vcpu_events(struct kvm_vcpu_events *left,
 
 int main(int argc, char *argv[])
 {
+	struct kvm_vcpu *vcpu;
 	struct kvm_vm *vm;
 	struct kvm_run *run;
 	struct kvm_regs regs;
@@ -90,66 +94,59 @@ int main(int argc, char *argv[])
 	setbuf(stdout, NULL);
 
 	cap = kvm_check_cap(KVM_CAP_SYNC_REGS);
-	if ((cap & TEST_SYNC_FIELDS) != TEST_SYNC_FIELDS) {
-		fprintf(stderr, "KVM_CAP_SYNC_REGS not supported, skipping test\n");
-		exit(KSFT_SKIP);
-	}
-	if ((cap & INVALID_SYNC_FIELD) != 0) {
-		fprintf(stderr, "The \"invalid\" field is not invalid, skipping test\n");
-		exit(KSFT_SKIP);
-	}
+	TEST_REQUIRE((cap & TEST_SYNC_FIELDS) == TEST_SYNC_FIELDS);
+	TEST_REQUIRE(!(cap & INVALID_SYNC_FIELD));
 
-	/* Create VM */
-	vm = vm_create_default(VCPU_ID, 0, guest_code);
+	vm = vm_create_with_one_vcpu(&vcpu, guest_code);
 
-	run = vcpu_state(vm, VCPU_ID);
+	run = vcpu->run;
 
 	/* Request reading invalid register set from VCPU. */
 	run->kvm_valid_regs = INVALID_SYNC_FIELD;
-	rv = _vcpu_run(vm, VCPU_ID);
+	rv = _vcpu_run(vcpu);
 	TEST_ASSERT(rv < 0 && errno == EINVAL,
 		    "Invalid kvm_valid_regs did not cause expected KVM_RUN error: %d\n",
 		    rv);
-	vcpu_state(vm, VCPU_ID)->kvm_valid_regs = 0;
+	run->kvm_valid_regs = 0;
 
 	run->kvm_valid_regs = INVALID_SYNC_FIELD | TEST_SYNC_FIELDS;
-	rv = _vcpu_run(vm, VCPU_ID);
+	rv = _vcpu_run(vcpu);
 	TEST_ASSERT(rv < 0 && errno == EINVAL,
 		    "Invalid kvm_valid_regs did not cause expected KVM_RUN error: %d\n",
 		    rv);
-	vcpu_state(vm, VCPU_ID)->kvm_valid_regs = 0;
+	run->kvm_valid_regs = 0;
 
 	/* Request setting invalid register set into VCPU. */
 	run->kvm_dirty_regs = INVALID_SYNC_FIELD;
-	rv = _vcpu_run(vm, VCPU_ID);
+	rv = _vcpu_run(vcpu);
 	TEST_ASSERT(rv < 0 && errno == EINVAL,
 		    "Invalid kvm_dirty_regs did not cause expected KVM_RUN error: %d\n",
 		    rv);
-	vcpu_state(vm, VCPU_ID)->kvm_dirty_regs = 0;
+	run->kvm_dirty_regs = 0;
 
 	run->kvm_dirty_regs = INVALID_SYNC_FIELD | TEST_SYNC_FIELDS;
-	rv = _vcpu_run(vm, VCPU_ID);
+	rv = _vcpu_run(vcpu);
 	TEST_ASSERT(rv < 0 && errno == EINVAL,
 		    "Invalid kvm_dirty_regs did not cause expected KVM_RUN error: %d\n",
 		    rv);
-	vcpu_state(vm, VCPU_ID)->kvm_dirty_regs = 0;
+	run->kvm_dirty_regs = 0;
 
 	/* Request and verify all valid register sets. */
 	/* TODO: BUILD TIME CHECK: TEST_ASSERT(KVM_SYNC_X86_NUM_FIELDS != 3); */
 	run->kvm_valid_regs = TEST_SYNC_FIELDS;
-	rv = _vcpu_run(vm, VCPU_ID);
+	rv = _vcpu_run(vcpu);
 	TEST_ASSERT(run->exit_reason == KVM_EXIT_IO,
 		    "Unexpected exit reason: %u (%s),\n",
 		    run->exit_reason,
 		    exit_reason_str(run->exit_reason));
 
-	vcpu_regs_get(vm, VCPU_ID, &regs);
+	vcpu_regs_get(vcpu, &regs);
 	compare_regs(&regs, &run->s.regs.regs);
 
-	vcpu_sregs_get(vm, VCPU_ID, &sregs);
+	vcpu_sregs_get(vcpu, &sregs);
 	compare_sregs(&sregs, &run->s.regs.sregs);
 
-	vcpu_events_get(vm, VCPU_ID, &events);
+	vcpu_events_get(vcpu, &events);
 	compare_vcpu_events(&events, &run->s.regs.events);
 
 	/* Set and verify various register values. */
@@ -159,7 +156,7 @@ int main(int argc, char *argv[])
 
 	run->kvm_valid_regs = TEST_SYNC_FIELDS;
 	run->kvm_dirty_regs = KVM_SYNC_X86_REGS | KVM_SYNC_X86_SREGS;
-	rv = _vcpu_run(vm, VCPU_ID);
+	rv = _vcpu_run(vcpu);
 	TEST_ASSERT(run->exit_reason == KVM_EXIT_IO,
 		    "Unexpected exit reason: %u (%s),\n",
 		    run->exit_reason,
@@ -171,13 +168,13 @@ int main(int argc, char *argv[])
 		    "apic_base sync regs value incorrect 0x%llx.",
 		    run->s.regs.sregs.apic_base);
 
-	vcpu_regs_get(vm, VCPU_ID, &regs);
+	vcpu_regs_get(vcpu, &regs);
 	compare_regs(&regs, &run->s.regs.regs);
 
-	vcpu_sregs_get(vm, VCPU_ID, &sregs);
+	vcpu_sregs_get(vcpu, &sregs);
 	compare_sregs(&sregs, &run->s.regs.sregs);
 
-	vcpu_events_get(vm, VCPU_ID, &events);
+	vcpu_events_get(vcpu, &events);
 	compare_vcpu_events(&events, &run->s.regs.events);
 
 	/* Clear kvm_dirty_regs bits, verify new s.regs values are
@@ -186,7 +183,7 @@ int main(int argc, char *argv[])
 	run->kvm_valid_regs = TEST_SYNC_FIELDS;
 	run->kvm_dirty_regs = 0;
 	run->s.regs.regs.rbx = 0xDEADBEEF;
-	rv = _vcpu_run(vm, VCPU_ID);
+	rv = _vcpu_run(vcpu);
 	TEST_ASSERT(run->exit_reason == KVM_EXIT_IO,
 		    "Unexpected exit reason: %u (%s),\n",
 		    run->exit_reason,
@@ -203,8 +200,8 @@ int main(int argc, char *argv[])
 	run->kvm_dirty_regs = 0;
 	run->s.regs.regs.rbx = 0xAAAA;
 	regs.rbx = 0xBAC0;
-	vcpu_regs_set(vm, VCPU_ID, &regs);
-	rv = _vcpu_run(vm, VCPU_ID);
+	vcpu_regs_set(vcpu, &regs);
+	rv = _vcpu_run(vcpu);
 	TEST_ASSERT(run->exit_reason == KVM_EXIT_IO,
 		    "Unexpected exit reason: %u (%s),\n",
 		    run->exit_reason,
@@ -212,7 +209,7 @@ int main(int argc, char *argv[])
 	TEST_ASSERT(run->s.regs.regs.rbx == 0xAAAA,
 		    "rbx sync regs value incorrect 0x%llx.",
 		    run->s.regs.regs.rbx);
-	vcpu_regs_get(vm, VCPU_ID, &regs);
+	vcpu_regs_get(vcpu, &regs);
 	TEST_ASSERT(regs.rbx == 0xBAC0 + 1,
 		    "rbx guest value incorrect 0x%llx.",
 		    regs.rbx);
@@ -224,7 +221,7 @@ int main(int argc, char *argv[])
 	run->kvm_valid_regs = 0;
 	run->kvm_dirty_regs = TEST_SYNC_FIELDS;
 	run->s.regs.regs.rbx = 0xBBBB;
-	rv = _vcpu_run(vm, VCPU_ID);
+	rv = _vcpu_run(vcpu);
 	TEST_ASSERT(run->exit_reason == KVM_EXIT_IO,
 		    "Unexpected exit reason: %u (%s),\n",
 		    run->exit_reason,
@@ -232,7 +229,7 @@ int main(int argc, char *argv[])
 	TEST_ASSERT(run->s.regs.regs.rbx == 0xBBBB,
 		    "rbx sync regs value incorrect 0x%llx.",
 		    run->s.regs.regs.rbx);
-	vcpu_regs_get(vm, VCPU_ID, &regs);
+	vcpu_regs_get(vcpu, &regs);
 	TEST_ASSERT(regs.rbx == 0xBBBB + 1,
 		    "rbx guest value incorrect 0x%llx.",
 		    regs.rbx);
diff --git a/tools/testing/selftests/kvm/x86_64/triple_fault_event_test.c b/tools/testing/selftests/kvm/x86_64/triple_fault_event_test.c
new file mode 100644
index 000000000000..70b44f0b52fe
--- /dev/null
+++ b/tools/testing/selftests/kvm/x86_64/triple_fault_event_test.c
@@ -0,0 +1,90 @@
+// SPDX-License-Identifier: GPL-2.0-only
+#include "test_util.h"
+#include "kvm_util.h"
+#include "processor.h"
+#include "vmx.h"
+
+#include <string.h>
+#include <sys/ioctl.h>
+
+#include "kselftest.h"
+
+#define ARBITRARY_IO_PORT	0x2000
+
+/* The virtual machine object. */
+static struct kvm_vm *vm;
+
+static void l2_guest_code(void)
+{
+	asm volatile("inb %%dx, %%al"
+		     : : [port] "d" (ARBITRARY_IO_PORT) : "rax");
+}
+
+void l1_guest_code(struct vmx_pages *vmx)
+{
+#define L2_GUEST_STACK_SIZE 64
+	unsigned long l2_guest_stack[L2_GUEST_STACK_SIZE];
+
+	GUEST_ASSERT(vmx->vmcs_gpa);
+	GUEST_ASSERT(prepare_for_vmx_operation(vmx));
+	GUEST_ASSERT(load_vmcs(vmx));
+
+	prepare_vmcs(vmx, l2_guest_code,
+		     &l2_guest_stack[L2_GUEST_STACK_SIZE]);
+
+	GUEST_ASSERT(!vmlaunch());
+	/* L2 should triple fault after a triple fault event injected. */
+	GUEST_ASSERT(vmreadz(VM_EXIT_REASON) == EXIT_REASON_TRIPLE_FAULT);
+	GUEST_DONE();
+}
+
+int main(void)
+{
+	struct kvm_vcpu *vcpu;
+	struct kvm_run *run;
+	struct kvm_vcpu_events events;
+	vm_vaddr_t vmx_pages_gva;
+	struct ucall uc;
+
+	TEST_REQUIRE(kvm_cpu_has(X86_FEATURE_VMX));
+
+	TEST_REQUIRE(kvm_has_cap(KVM_CAP_X86_TRIPLE_FAULT_EVENT));
+
+	vm = vm_create_with_one_vcpu(&vcpu, l1_guest_code);
+	vm_enable_cap(vm, KVM_CAP_X86_TRIPLE_FAULT_EVENT, 1);
+
+	run = vcpu->run;
+	vcpu_alloc_vmx(vm, &vmx_pages_gva);
+	vcpu_args_set(vcpu, 1, vmx_pages_gva);
+	vcpu_run(vcpu);
+
+	TEST_ASSERT(run->exit_reason == KVM_EXIT_IO,
+		    "Expected KVM_EXIT_IO, got: %u (%s)\n",
+		    run->exit_reason, exit_reason_str(run->exit_reason));
+	TEST_ASSERT(run->io.port == ARBITRARY_IO_PORT,
+		    "Expected IN from port %d from L2, got port %d",
+		    ARBITRARY_IO_PORT, run->io.port);
+	vcpu_events_get(vcpu, &events);
+	events.flags |= KVM_VCPUEVENT_VALID_TRIPLE_FAULT;
+	events.triple_fault.pending = true;
+	vcpu_events_set(vcpu, &events);
+	run->immediate_exit = true;
+	vcpu_run_complete_io(vcpu);
+
+	vcpu_events_get(vcpu, &events);
+	TEST_ASSERT(events.flags & KVM_VCPUEVENT_VALID_TRIPLE_FAULT,
+		    "Triple fault event invalid");
+	TEST_ASSERT(events.triple_fault.pending,
+		    "No triple fault pending");
+	vcpu_run(vcpu);
+
+	switch (get_ucall(vcpu, &uc)) {
+	case UCALL_DONE:
+		break;
+	case UCALL_ABORT:
+		REPORT_GUEST_ASSERT(uc);
+	default:
+		TEST_FAIL("Unexpected ucall: %lu", uc.cmd);
+	}
+
+}
diff --git a/tools/testing/selftests/kvm/x86_64/tsc_msrs_test.c b/tools/testing/selftests/kvm/x86_64/tsc_msrs_test.c
new file mode 100644
index 000000000000..22d366c697f7
--- /dev/null
+++ b/tools/testing/selftests/kvm/x86_64/tsc_msrs_test.c
@@ -0,0 +1,153 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Tests for MSR_IA32_TSC and MSR_IA32_TSC_ADJUST.
+ *
+ * Copyright (C) 2020, Red Hat, Inc.
+ */
+#include <stdio.h>
+#include <string.h>
+#include "kvm_util.h"
+#include "processor.h"
+
+#define UNITY                  (1ull << 30)
+#define HOST_ADJUST            (UNITY * 64)
+#define GUEST_STEP             (UNITY * 4)
+#define ROUND(x)               ((x + UNITY / 2) & -UNITY)
+#define rounded_rdmsr(x)       ROUND(rdmsr(x))
+#define rounded_host_rdmsr(x)  ROUND(vcpu_get_msr(vcpu, x))
+
+static void guest_code(void)
+{
+	u64 val = 0;
+
+	GUEST_ASSERT_EQ(rounded_rdmsr(MSR_IA32_TSC), val);
+	GUEST_ASSERT_EQ(rounded_rdmsr(MSR_IA32_TSC_ADJUST), val);
+
+	/* Guest: writes to MSR_IA32_TSC affect both MSRs.  */
+	val = 1ull * GUEST_STEP;
+	wrmsr(MSR_IA32_TSC, val);
+	GUEST_ASSERT_EQ(rounded_rdmsr(MSR_IA32_TSC), val);
+	GUEST_ASSERT_EQ(rounded_rdmsr(MSR_IA32_TSC_ADJUST), val);
+
+	/* Guest: writes to MSR_IA32_TSC_ADJUST affect both MSRs.  */
+	GUEST_SYNC(2);
+	val = 2ull * GUEST_STEP;
+	wrmsr(MSR_IA32_TSC_ADJUST, val);
+	GUEST_ASSERT_EQ(rounded_rdmsr(MSR_IA32_TSC), val);
+	GUEST_ASSERT_EQ(rounded_rdmsr(MSR_IA32_TSC_ADJUST), val);
+
+	/* Host: setting the TSC offset.  */
+	GUEST_SYNC(3);
+	GUEST_ASSERT_EQ(rounded_rdmsr(MSR_IA32_TSC), HOST_ADJUST + val);
+	GUEST_ASSERT_EQ(rounded_rdmsr(MSR_IA32_TSC_ADJUST), val);
+
+	/*
+	 * Guest: writes to MSR_IA32_TSC_ADJUST do not destroy the
+	 * host-side offset and affect both MSRs.
+	 */
+	GUEST_SYNC(4);
+	val = 3ull * GUEST_STEP;
+	wrmsr(MSR_IA32_TSC_ADJUST, val);
+	GUEST_ASSERT_EQ(rounded_rdmsr(MSR_IA32_TSC), HOST_ADJUST + val);
+	GUEST_ASSERT_EQ(rounded_rdmsr(MSR_IA32_TSC_ADJUST), val);
+
+	/*
+	 * Guest: writes to MSR_IA32_TSC affect both MSRs, so the host-side
+	 * offset is now visible in MSR_IA32_TSC_ADJUST.
+	 */
+	GUEST_SYNC(5);
+	val = 4ull * GUEST_STEP;
+	wrmsr(MSR_IA32_TSC, val);
+	GUEST_ASSERT_EQ(rounded_rdmsr(MSR_IA32_TSC), val);
+	GUEST_ASSERT_EQ(rounded_rdmsr(MSR_IA32_TSC_ADJUST), val - HOST_ADJUST);
+
+	GUEST_DONE();
+}
+
+static void run_vcpu(struct kvm_vcpu *vcpu, int stage)
+{
+	struct ucall uc;
+
+	vcpu_run(vcpu);
+
+	switch (get_ucall(vcpu, &uc)) {
+	case UCALL_SYNC:
+		TEST_ASSERT(!strcmp((const char *)uc.args[0], "hello") &&
+			    uc.args[1] == stage + 1, "Stage %d: Unexpected register values vmexit, got %lx",
+			    stage + 1, (ulong)uc.args[1]);
+		return;
+	case UCALL_DONE:
+		return;
+	case UCALL_ABORT:
+		REPORT_GUEST_ASSERT_2(uc, "values: %#lx, %#lx");
+	default:
+		TEST_ASSERT(false, "Unexpected exit: %s",
+			    exit_reason_str(vcpu->run->exit_reason));
+	}
+}
+
+int main(void)
+{
+	struct kvm_vcpu *vcpu;
+	struct kvm_vm *vm;
+	uint64_t val;
+
+	vm = vm_create_with_one_vcpu(&vcpu, guest_code);
+
+	val = 0;
+	ASSERT_EQ(rounded_host_rdmsr(MSR_IA32_TSC), val);
+	ASSERT_EQ(rounded_host_rdmsr(MSR_IA32_TSC_ADJUST), val);
+
+	/* Guest: writes to MSR_IA32_TSC affect both MSRs.  */
+	run_vcpu(vcpu, 1);
+	val = 1ull * GUEST_STEP;
+	ASSERT_EQ(rounded_host_rdmsr(MSR_IA32_TSC), val);
+	ASSERT_EQ(rounded_host_rdmsr(MSR_IA32_TSC_ADJUST), val);
+
+	/* Guest: writes to MSR_IA32_TSC_ADJUST affect both MSRs.  */
+	run_vcpu(vcpu, 2);
+	val = 2ull * GUEST_STEP;
+	ASSERT_EQ(rounded_host_rdmsr(MSR_IA32_TSC), val);
+	ASSERT_EQ(rounded_host_rdmsr(MSR_IA32_TSC_ADJUST), val);
+
+	/*
+	 * Host: writes to MSR_IA32_TSC set the host-side offset
+	 * and therefore do not change MSR_IA32_TSC_ADJUST.
+	 */
+	vcpu_set_msr(vcpu, MSR_IA32_TSC, HOST_ADJUST + val);
+	ASSERT_EQ(rounded_host_rdmsr(MSR_IA32_TSC), HOST_ADJUST + val);
+	ASSERT_EQ(rounded_host_rdmsr(MSR_IA32_TSC_ADJUST), val);
+	run_vcpu(vcpu, 3);
+
+	/* Host: writes to MSR_IA32_TSC_ADJUST do not modify the TSC.  */
+	vcpu_set_msr(vcpu, MSR_IA32_TSC_ADJUST, UNITY * 123456);
+	ASSERT_EQ(rounded_host_rdmsr(MSR_IA32_TSC), HOST_ADJUST + val);
+	ASSERT_EQ(vcpu_get_msr(vcpu, MSR_IA32_TSC_ADJUST), UNITY * 123456);
+
+	/* Restore previous value.  */
+	vcpu_set_msr(vcpu, MSR_IA32_TSC_ADJUST, val);
+	ASSERT_EQ(rounded_host_rdmsr(MSR_IA32_TSC), HOST_ADJUST + val);
+	ASSERT_EQ(rounded_host_rdmsr(MSR_IA32_TSC_ADJUST), val);
+
+	/*
+	 * Guest: writes to MSR_IA32_TSC_ADJUST do not destroy the
+	 * host-side offset and affect both MSRs.
+	 */
+	run_vcpu(vcpu, 4);
+	val = 3ull * GUEST_STEP;
+	ASSERT_EQ(rounded_host_rdmsr(MSR_IA32_TSC), HOST_ADJUST + val);
+	ASSERT_EQ(rounded_host_rdmsr(MSR_IA32_TSC_ADJUST), val);
+
+	/*
+	 * Guest: writes to MSR_IA32_TSC affect both MSRs, so the host-side
+	 * offset is now visible in MSR_IA32_TSC_ADJUST.
+	 */
+	run_vcpu(vcpu, 5);
+	val = 4ull * GUEST_STEP;
+	ASSERT_EQ(rounded_host_rdmsr(MSR_IA32_TSC), val);
+	ASSERT_EQ(rounded_host_rdmsr(MSR_IA32_TSC_ADJUST), val - HOST_ADJUST);
+
+	kvm_vm_free(vm);
+
+	return 0;
+}
diff --git a/tools/testing/selftests/kvm/x86_64/tsc_scaling_sync.c b/tools/testing/selftests/kvm/x86_64/tsc_scaling_sync.c
new file mode 100644
index 000000000000..47139aab7408
--- /dev/null
+++ b/tools/testing/selftests/kvm/x86_64/tsc_scaling_sync.c
@@ -0,0 +1,118 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * svm_vmcall_test
+ *
+ * Copyright © 2021 Amazon.com, Inc. or its affiliates.
+ *
+ * Xen shared_info / pvclock testing
+ */
+
+#include "test_util.h"
+#include "kvm_util.h"
+#include "processor.h"
+
+#include <stdint.h>
+#include <time.h>
+#include <sched.h>
+#include <signal.h>
+#include <pthread.h>
+
+#define NR_TEST_VCPUS 20
+
+static struct kvm_vm *vm;
+pthread_spinlock_t create_lock;
+
+#define TEST_TSC_KHZ    2345678UL
+#define TEST_TSC_OFFSET 200000000
+
+uint64_t tsc_sync;
+static void guest_code(void)
+{
+	uint64_t start_tsc, local_tsc, tmp;
+
+	start_tsc = rdtsc();
+	do {
+		tmp = READ_ONCE(tsc_sync);
+		local_tsc = rdtsc();
+		WRITE_ONCE(tsc_sync, local_tsc);
+		if (unlikely(local_tsc < tmp))
+			GUEST_SYNC_ARGS(0, local_tsc, tmp, 0, 0);
+
+	} while (local_tsc - start_tsc < 5000 * TEST_TSC_KHZ);
+
+	GUEST_DONE();
+}
+
+
+static void *run_vcpu(void *_cpu_nr)
+{
+	unsigned long vcpu_id = (unsigned long)_cpu_nr;
+	unsigned long failures = 0;
+	static bool first_cpu_done;
+	struct kvm_vcpu *vcpu;
+
+	/* The kernel is fine, but vm_vcpu_add() needs locking */
+	pthread_spin_lock(&create_lock);
+
+	vcpu = vm_vcpu_add(vm, vcpu_id, guest_code);
+
+	if (!first_cpu_done) {
+		first_cpu_done = true;
+		vcpu_set_msr(vcpu, MSR_IA32_TSC, TEST_TSC_OFFSET);
+	}
+
+	pthread_spin_unlock(&create_lock);
+
+	for (;;) {
+		volatile struct kvm_run *run = vcpu->run;
+                struct ucall uc;
+
+		vcpu_run(vcpu);
+                TEST_ASSERT(run->exit_reason == KVM_EXIT_IO,
+                            "Got exit_reason other than KVM_EXIT_IO: %u (%s)\n",
+                            run->exit_reason,
+                            exit_reason_str(run->exit_reason));
+
+		switch (get_ucall(vcpu, &uc)) {
+                case UCALL_DONE:
+			goto out;
+
+                case UCALL_SYNC:
+			printf("Guest %d sync %lx %lx %ld\n", vcpu->id,
+			       uc.args[2], uc.args[3], uc.args[2] - uc.args[3]);
+			failures++;
+			break;
+
+                default:
+                        TEST_FAIL("Unknown ucall %lu", uc.cmd);
+		}
+	}
+ out:
+	return (void *)failures;
+}
+
+int main(int argc, char *argv[])
+{
+	TEST_REQUIRE(kvm_has_cap(KVM_CAP_VM_TSC_CONTROL));
+
+	vm = vm_create(NR_TEST_VCPUS);
+	vm_ioctl(vm, KVM_SET_TSC_KHZ, (void *) TEST_TSC_KHZ);
+
+	pthread_spin_init(&create_lock, PTHREAD_PROCESS_PRIVATE);
+	pthread_t cpu_threads[NR_TEST_VCPUS];
+	unsigned long cpu;
+	for (cpu = 0; cpu < NR_TEST_VCPUS; cpu++)
+		pthread_create(&cpu_threads[cpu], NULL, run_vcpu, (void *)cpu);
+
+	unsigned long failures = 0;
+	for (cpu = 0; cpu < NR_TEST_VCPUS; cpu++) {
+		void *this_cpu_failures;
+		pthread_join(cpu_threads[cpu], &this_cpu_failures);
+		failures += (unsigned long)this_cpu_failures;
+	}
+
+	TEST_ASSERT(!failures, "TSC sync failed");
+	pthread_spin_destroy(&create_lock);
+	kvm_vm_free(vm);
+	return 0;
+}
diff --git a/tools/testing/selftests/kvm/x86_64/ucna_injection_test.c b/tools/testing/selftests/kvm/x86_64/ucna_injection_test.c
new file mode 100644
index 000000000000..a897c7fd8abe
--- /dev/null
+++ b/tools/testing/selftests/kvm/x86_64/ucna_injection_test.c
@@ -0,0 +1,316 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * ucna_injection_test
+ *
+ * Copyright (C) 2022, Google LLC.
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2.
+ *
+ * Test that user space can inject UnCorrectable No Action required (UCNA)
+ * memory errors to the guest.
+ *
+ * The test starts one vCPU with the MCG_CMCI_P enabled. It verifies that
+ * proper UCNA errors can be injected to a vCPU with MCG_CMCI_P and
+ * corresponding per-bank control register (MCI_CTL2) bit enabled.
+ * The test also checks that the UCNA errors get recorded in the
+ * Machine Check bank registers no matter the error signal interrupts get
+ * delivered into the guest or not.
+ *
+ */
+
+#define _GNU_SOURCE /* for program_invocation_short_name */
+#include <pthread.h>
+#include <inttypes.h>
+#include <string.h>
+#include <time.h>
+
+#include "kvm_util_base.h"
+#include "kvm_util.h"
+#include "mce.h"
+#include "processor.h"
+#include "test_util.h"
+#include "apic.h"
+
+#define SYNC_FIRST_UCNA 9
+#define SYNC_SECOND_UCNA 10
+#define SYNC_GP 11
+#define FIRST_UCNA_ADDR 0xdeadbeef
+#define SECOND_UCNA_ADDR 0xcafeb0ba
+
+/*
+ * Vector for the CMCI interrupt.
+ * Value is arbitrary. Any value in 0x20-0xFF should work:
+ * https://wiki.osdev.org/Interrupt_Vector_Table
+ */
+#define CMCI_VECTOR  0xa9
+
+#define UCNA_BANK  0x7	// IMC0 bank
+
+#define MCI_CTL2_RESERVED_BIT BIT_ULL(29)
+
+static uint64_t supported_mcg_caps;
+
+/*
+ * Record states about the injected UCNA.
+ * The variables started with the 'i_' prefixes are recorded in interrupt
+ * handler. Variables without the 'i_' prefixes are recorded in guest main
+ * execution thread.
+ */
+static volatile uint64_t i_ucna_rcvd;
+static volatile uint64_t i_ucna_addr;
+static volatile uint64_t ucna_addr;
+static volatile uint64_t ucna_addr2;
+
+struct thread_params {
+	struct kvm_vcpu *vcpu;
+	uint64_t *p_i_ucna_rcvd;
+	uint64_t *p_i_ucna_addr;
+	uint64_t *p_ucna_addr;
+	uint64_t *p_ucna_addr2;
+};
+
+static void verify_apic_base_addr(void)
+{
+	uint64_t msr = rdmsr(MSR_IA32_APICBASE);
+	uint64_t base = GET_APIC_BASE(msr);
+
+	GUEST_ASSERT(base == APIC_DEFAULT_GPA);
+}
+
+static void ucna_injection_guest_code(void)
+{
+	uint64_t ctl2;
+	verify_apic_base_addr();
+	xapic_enable();
+
+	/* Sets up the interrupt vector and enables per-bank CMCI sigaling. */
+	xapic_write_reg(APIC_LVTCMCI, CMCI_VECTOR | APIC_DM_FIXED);
+	ctl2 = rdmsr(MSR_IA32_MCx_CTL2(UCNA_BANK));
+	wrmsr(MSR_IA32_MCx_CTL2(UCNA_BANK), ctl2 | MCI_CTL2_CMCI_EN);
+
+	/* Enables interrupt in guest. */
+	asm volatile("sti");
+
+	/* Let user space inject the first UCNA */
+	GUEST_SYNC(SYNC_FIRST_UCNA);
+
+	ucna_addr = rdmsr(MSR_IA32_MCx_ADDR(UCNA_BANK));
+
+	/* Disables the per-bank CMCI signaling. */
+	ctl2 = rdmsr(MSR_IA32_MCx_CTL2(UCNA_BANK));
+	wrmsr(MSR_IA32_MCx_CTL2(UCNA_BANK), ctl2 & ~MCI_CTL2_CMCI_EN);
+
+	/* Let the user space inject the second UCNA */
+	GUEST_SYNC(SYNC_SECOND_UCNA);
+
+	ucna_addr2 = rdmsr(MSR_IA32_MCx_ADDR(UCNA_BANK));
+	GUEST_DONE();
+}
+
+static void cmci_disabled_guest_code(void)
+{
+	uint64_t ctl2 = rdmsr(MSR_IA32_MCx_CTL2(UCNA_BANK));
+	wrmsr(MSR_IA32_MCx_CTL2(UCNA_BANK), ctl2 | MCI_CTL2_CMCI_EN);
+
+	GUEST_DONE();
+}
+
+static void cmci_enabled_guest_code(void)
+{
+	uint64_t ctl2 = rdmsr(MSR_IA32_MCx_CTL2(UCNA_BANK));
+	wrmsr(MSR_IA32_MCx_CTL2(UCNA_BANK), ctl2 | MCI_CTL2_RESERVED_BIT);
+
+	GUEST_DONE();
+}
+
+static void guest_cmci_handler(struct ex_regs *regs)
+{
+	i_ucna_rcvd++;
+	i_ucna_addr = rdmsr(MSR_IA32_MCx_ADDR(UCNA_BANK));
+	xapic_write_reg(APIC_EOI, 0);
+}
+
+static void guest_gp_handler(struct ex_regs *regs)
+{
+	GUEST_SYNC(SYNC_GP);
+}
+
+static void run_vcpu_expect_gp(struct kvm_vcpu *vcpu)
+{
+	unsigned int exit_reason;
+	struct ucall uc;
+
+	vcpu_run(vcpu);
+
+	exit_reason = vcpu->run->exit_reason;
+	TEST_ASSERT(exit_reason == KVM_EXIT_IO,
+		    "exited with unexpected exit reason %u-%s, expected KVM_EXIT_IO",
+		    exit_reason, exit_reason_str(exit_reason));
+	TEST_ASSERT(get_ucall(vcpu, &uc) == UCALL_SYNC,
+		    "Expect UCALL_SYNC\n");
+	TEST_ASSERT(uc.args[1] == SYNC_GP, "#GP is expected.");
+	printf("vCPU received GP in guest.\n");
+}
+
+static void inject_ucna(struct kvm_vcpu *vcpu, uint64_t addr) {
+	/*
+	 * A UCNA error is indicated with VAL=1, UC=1, PCC=0, S=0 and AR=0 in
+	 * the IA32_MCi_STATUS register.
+	 * MSCOD=1 (BIT[16] - MscodDataRdErr).
+	 * MCACOD=0x0090 (Memory controller error format, channel 0)
+	 */
+	uint64_t status = MCI_STATUS_VAL | MCI_STATUS_UC | MCI_STATUS_EN |
+			  MCI_STATUS_MISCV | MCI_STATUS_ADDRV | 0x10090;
+	struct kvm_x86_mce mce = {};
+	mce.status = status;
+	mce.mcg_status = 0;
+	/*
+	 * MCM_ADDR_PHYS indicates the reported address is a physical address.
+	 * Lowest 6 bits is the recoverable address LSB, i.e., the injected MCE
+	 * is at 4KB granularity.
+	 */
+	mce.misc = (MCM_ADDR_PHYS << 6) | 0xc;
+	mce.addr = addr;
+	mce.bank = UCNA_BANK;
+
+	vcpu_ioctl(vcpu, KVM_X86_SET_MCE, &mce);
+}
+
+static void *run_ucna_injection(void *arg)
+{
+	struct thread_params *params = (struct thread_params *)arg;
+	struct ucall uc;
+	int old;
+	int r;
+	unsigned int exit_reason;
+
+	r = pthread_setcanceltype(PTHREAD_CANCEL_ASYNCHRONOUS, &old);
+	TEST_ASSERT(r == 0,
+		    "pthread_setcanceltype failed with errno=%d",
+		    r);
+
+	vcpu_run(params->vcpu);
+
+	exit_reason = params->vcpu->run->exit_reason;
+	TEST_ASSERT(exit_reason == KVM_EXIT_IO,
+		    "unexpected exit reason %u-%s, expected KVM_EXIT_IO",
+		    exit_reason, exit_reason_str(exit_reason));
+	TEST_ASSERT(get_ucall(params->vcpu, &uc) == UCALL_SYNC,
+		    "Expect UCALL_SYNC\n");
+	TEST_ASSERT(uc.args[1] == SYNC_FIRST_UCNA, "Injecting first UCNA.");
+
+	printf("Injecting first UCNA at %#x.\n", FIRST_UCNA_ADDR);
+
+	inject_ucna(params->vcpu, FIRST_UCNA_ADDR);
+	vcpu_run(params->vcpu);
+
+	exit_reason = params->vcpu->run->exit_reason;
+	TEST_ASSERT(exit_reason == KVM_EXIT_IO,
+		    "unexpected exit reason %u-%s, expected KVM_EXIT_IO",
+		    exit_reason, exit_reason_str(exit_reason));
+	TEST_ASSERT(get_ucall(params->vcpu, &uc) == UCALL_SYNC,
+		    "Expect UCALL_SYNC\n");
+	TEST_ASSERT(uc.args[1] == SYNC_SECOND_UCNA, "Injecting second UCNA.");
+
+	printf("Injecting second UCNA at %#x.\n", SECOND_UCNA_ADDR);
+
+	inject_ucna(params->vcpu, SECOND_UCNA_ADDR);
+	vcpu_run(params->vcpu);
+
+	exit_reason = params->vcpu->run->exit_reason;
+	TEST_ASSERT(exit_reason == KVM_EXIT_IO,
+		    "unexpected exit reason %u-%s, expected KVM_EXIT_IO",
+		    exit_reason, exit_reason_str(exit_reason));
+	if (get_ucall(params->vcpu, &uc) == UCALL_ABORT) {
+		TEST_ASSERT(false, "vCPU assertion failure: %s.\n",
+			    (const char *)uc.args[0]);
+	}
+
+	return NULL;
+}
+
+static void test_ucna_injection(struct kvm_vcpu *vcpu, struct thread_params *params)
+{
+	struct kvm_vm *vm = vcpu->vm;
+	params->vcpu = vcpu;
+	params->p_i_ucna_rcvd = (uint64_t *)addr_gva2hva(vm, (uint64_t)&i_ucna_rcvd);
+	params->p_i_ucna_addr = (uint64_t *)addr_gva2hva(vm, (uint64_t)&i_ucna_addr);
+	params->p_ucna_addr = (uint64_t *)addr_gva2hva(vm, (uint64_t)&ucna_addr);
+	params->p_ucna_addr2 = (uint64_t *)addr_gva2hva(vm, (uint64_t)&ucna_addr2);
+
+	run_ucna_injection(params);
+
+	TEST_ASSERT(*params->p_i_ucna_rcvd == 1, "Only first UCNA get signaled.");
+	TEST_ASSERT(*params->p_i_ucna_addr == FIRST_UCNA_ADDR,
+		    "Only first UCNA reported addr get recorded via interrupt.");
+	TEST_ASSERT(*params->p_ucna_addr == FIRST_UCNA_ADDR,
+		    "First injected UCNAs should get exposed via registers.");
+	TEST_ASSERT(*params->p_ucna_addr2 == SECOND_UCNA_ADDR,
+		    "Second injected UCNAs should get exposed via registers.");
+
+	printf("Test successful.\n"
+	       "UCNA CMCI interrupts received: %ld\n"
+	       "Last UCNA address received via CMCI: %lx\n"
+	       "First UCNA address in vCPU thread: %lx\n"
+	       "Second UCNA address in vCPU thread: %lx\n",
+	       *params->p_i_ucna_rcvd, *params->p_i_ucna_addr,
+	       *params->p_ucna_addr, *params->p_ucna_addr2);
+}
+
+static void setup_mce_cap(struct kvm_vcpu *vcpu, bool enable_cmci_p)
+{
+	uint64_t mcg_caps = MCG_CTL_P | MCG_SER_P | MCG_LMCE_P | KVM_MAX_MCE_BANKS;
+	if (enable_cmci_p)
+		mcg_caps |= MCG_CMCI_P;
+
+	mcg_caps &= supported_mcg_caps | MCG_CAP_BANKS_MASK;
+	vcpu_ioctl(vcpu, KVM_X86_SETUP_MCE, &mcg_caps);
+}
+
+static struct kvm_vcpu *create_vcpu_with_mce_cap(struct kvm_vm *vm, uint32_t vcpuid,
+						 bool enable_cmci_p, void *guest_code)
+{
+	struct kvm_vcpu *vcpu = vm_vcpu_add(vm, vcpuid, guest_code);
+	setup_mce_cap(vcpu, enable_cmci_p);
+	return vcpu;
+}
+
+int main(int argc, char *argv[])
+{
+	struct thread_params params;
+	struct kvm_vm *vm;
+	struct kvm_vcpu *ucna_vcpu;
+	struct kvm_vcpu *cmcidis_vcpu;
+	struct kvm_vcpu *cmci_vcpu;
+
+	kvm_check_cap(KVM_CAP_MCE);
+
+	vm = __vm_create(VM_MODE_DEFAULT, 3, 0);
+
+	kvm_ioctl(vm->kvm_fd, KVM_X86_GET_MCE_CAP_SUPPORTED,
+		  &supported_mcg_caps);
+
+	if (!(supported_mcg_caps & MCG_CMCI_P)) {
+		print_skip("MCG_CMCI_P is not supported");
+		exit(KSFT_SKIP);
+	}
+
+	ucna_vcpu = create_vcpu_with_mce_cap(vm, 0, true, ucna_injection_guest_code);
+	cmcidis_vcpu = create_vcpu_with_mce_cap(vm, 1, false, cmci_disabled_guest_code);
+	cmci_vcpu = create_vcpu_with_mce_cap(vm, 2, true, cmci_enabled_guest_code);
+
+	vm_init_descriptor_tables(vm);
+	vcpu_init_descriptor_tables(ucna_vcpu);
+	vcpu_init_descriptor_tables(cmcidis_vcpu);
+	vcpu_init_descriptor_tables(cmci_vcpu);
+	vm_install_exception_handler(vm, CMCI_VECTOR, guest_cmci_handler);
+	vm_install_exception_handler(vm, GP_VECTOR, guest_gp_handler);
+
+	virt_pg_map(vm, APIC_DEFAULT_GPA, APIC_DEFAULT_GPA);
+
+	test_ucna_injection(ucna_vcpu, &params);
+	run_vcpu_expect_gp(cmcidis_vcpu);
+	run_vcpu_expect_gp(cmci_vcpu);
+
+	kvm_vm_free(vm);
+}
diff --git a/tools/testing/selftests/kvm/x86_64/userspace_io_test.c b/tools/testing/selftests/kvm/x86_64/userspace_io_test.c
new file mode 100644
index 000000000000..7316521428f8
--- /dev/null
+++ b/tools/testing/selftests/kvm/x86_64/userspace_io_test.c
@@ -0,0 +1,108 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <fcntl.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <sys/ioctl.h>
+
+#include "test_util.h"
+
+#include "kvm_util.h"
+#include "processor.h"
+
+static void guest_ins_port80(uint8_t *buffer, unsigned int count)
+{
+	unsigned long end;
+
+	if (count == 2)
+		end = (unsigned long)buffer + 1;
+	else
+		end = (unsigned long)buffer + 8192;
+
+	asm volatile("cld; rep; insb" : "+D"(buffer), "+c"(count) : "d"(0x80) : "memory");
+	GUEST_ASSERT_1(count == 0, count);
+	GUEST_ASSERT_2((unsigned long)buffer == end, buffer, end);
+}
+
+static void guest_code(void)
+{
+	uint8_t buffer[8192];
+	int i;
+
+	/*
+	 * Special case tests.  main() will adjust RCX 2 => 1 and 3 => 8192 to
+	 * test that KVM doesn't explode when userspace modifies the "count" on
+	 * a userspace I/O exit.  KVM isn't required to play nice with the I/O
+	 * itself as KVM doesn't support manipulating the count, it just needs
+	 * to not explode or overflow a buffer.
+	 */
+	guest_ins_port80(buffer, 2);
+	guest_ins_port80(buffer, 3);
+
+	/* Verify KVM fills the buffer correctly when not stuffing RCX. */
+	memset(buffer, 0, sizeof(buffer));
+	guest_ins_port80(buffer, 8192);
+	for (i = 0; i < 8192; i++)
+		GUEST_ASSERT_2(buffer[i] == 0xaa, i, buffer[i]);
+
+	GUEST_DONE();
+}
+
+int main(int argc, char *argv[])
+{
+	struct kvm_vcpu *vcpu;
+	struct kvm_regs regs;
+	struct kvm_run *run;
+	struct kvm_vm *vm;
+	struct ucall uc;
+
+	/* Tell stdout not to buffer its content */
+	setbuf(stdout, NULL);
+
+	vm = vm_create_with_one_vcpu(&vcpu, guest_code);
+	run = vcpu->run;
+
+	memset(&regs, 0, sizeof(regs));
+
+	while (1) {
+		vcpu_run(vcpu);
+
+		TEST_ASSERT(run->exit_reason == KVM_EXIT_IO,
+			    "Unexpected exit reason: %u (%s),\n",
+			    run->exit_reason,
+			    exit_reason_str(run->exit_reason));
+
+		if (get_ucall(vcpu, &uc))
+			break;
+
+		TEST_ASSERT(run->io.port == 0x80,
+			    "Expected I/O at port 0x80, got port 0x%x\n", run->io.port);
+
+		/*
+		 * Modify the rep string count in RCX: 2 => 1 and 3 => 8192.
+		 * Note, this abuses KVM's batching of rep string I/O to avoid
+		 * getting stuck in an infinite loop.  That behavior isn't in
+		 * scope from a testing perspective as it's not ABI in any way,
+		 * i.e. it really is abusing internal KVM knowledge.
+		 */
+		vcpu_regs_get(vcpu, &regs);
+		if (regs.rcx == 2)
+			regs.rcx = 1;
+		if (regs.rcx == 3)
+			regs.rcx = 8192;
+		memset((void *)run + run->io.data_offset, 0xaa, 4096);
+		vcpu_regs_set(vcpu, &regs);
+	}
+
+	switch (uc.cmd) {
+	case UCALL_DONE:
+		break;
+	case UCALL_ABORT:
+		REPORT_GUEST_ASSERT_2(uc, "argN+1 = 0x%lx, argN+2 = 0x%lx");
+	default:
+		TEST_FAIL("Unknown ucall %lu", uc.cmd);
+	}
+
+	kvm_vm_free(vm);
+	return 0;
+}
diff --git a/tools/testing/selftests/kvm/x86_64/userspace_msr_exit_test.c b/tools/testing/selftests/kvm/x86_64/userspace_msr_exit_test.c
new file mode 100644
index 000000000000..a4f06370a245
--- /dev/null
+++ b/tools/testing/selftests/kvm/x86_64/userspace_msr_exit_test.c
@@ -0,0 +1,748 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright (C) 2020, Google LLC.
+ *
+ * Tests for exiting into userspace on registered MSRs
+ */
+
+#define _GNU_SOURCE /* for program_invocation_short_name */
+#include <sys/ioctl.h>
+
+#include "test_util.h"
+#include "kvm_util.h"
+#include "vmx.h"
+
+/* Forced emulation prefix, used to invoke the emulator unconditionally. */
+#define KVM_FEP "ud2; .byte 'k', 'v', 'm';"
+#define KVM_FEP_LENGTH 5
+static int fep_available = 1;
+
+#define MSR_NON_EXISTENT 0x474f4f00
+
+static u64 deny_bits = 0;
+struct kvm_msr_filter filter_allow = {
+	.flags = KVM_MSR_FILTER_DEFAULT_ALLOW,
+	.ranges = {
+		{
+			.flags = KVM_MSR_FILTER_READ |
+				 KVM_MSR_FILTER_WRITE,
+			.nmsrs = 1,
+			/* Test an MSR the kernel knows about. */
+			.base = MSR_IA32_XSS,
+			.bitmap = (uint8_t*)&deny_bits,
+		}, {
+			.flags = KVM_MSR_FILTER_READ |
+				 KVM_MSR_FILTER_WRITE,
+			.nmsrs = 1,
+			/* Test an MSR the kernel doesn't know about. */
+			.base = MSR_IA32_FLUSH_CMD,
+			.bitmap = (uint8_t*)&deny_bits,
+		}, {
+			.flags = KVM_MSR_FILTER_READ |
+				 KVM_MSR_FILTER_WRITE,
+			.nmsrs = 1,
+			/* Test a fabricated MSR that no one knows about. */
+			.base = MSR_NON_EXISTENT,
+			.bitmap = (uint8_t*)&deny_bits,
+		},
+	},
+};
+
+struct kvm_msr_filter filter_fs = {
+	.flags = KVM_MSR_FILTER_DEFAULT_ALLOW,
+	.ranges = {
+		{
+			.flags = KVM_MSR_FILTER_READ,
+			.nmsrs = 1,
+			.base = MSR_FS_BASE,
+			.bitmap = (uint8_t*)&deny_bits,
+		},
+	},
+};
+
+struct kvm_msr_filter filter_gs = {
+	.flags = KVM_MSR_FILTER_DEFAULT_ALLOW,
+	.ranges = {
+		{
+			.flags = KVM_MSR_FILTER_READ,
+			.nmsrs = 1,
+			.base = MSR_GS_BASE,
+			.bitmap = (uint8_t*)&deny_bits,
+		},
+	},
+};
+
+static uint64_t msr_non_existent_data;
+static int guest_exception_count;
+static u32 msr_reads, msr_writes;
+
+static u8 bitmap_00000000[KVM_MSR_FILTER_MAX_BITMAP_SIZE];
+static u8 bitmap_00000000_write[KVM_MSR_FILTER_MAX_BITMAP_SIZE];
+static u8 bitmap_40000000[KVM_MSR_FILTER_MAX_BITMAP_SIZE];
+static u8 bitmap_c0000000[KVM_MSR_FILTER_MAX_BITMAP_SIZE];
+static u8 bitmap_c0000000_read[KVM_MSR_FILTER_MAX_BITMAP_SIZE];
+static u8 bitmap_deadbeef[1] = { 0x1 };
+
+static void deny_msr(uint8_t *bitmap, u32 msr)
+{
+	u32 idx = msr & (KVM_MSR_FILTER_MAX_BITMAP_SIZE - 1);
+
+	bitmap[idx / 8] &= ~(1 << (idx % 8));
+}
+
+static void prepare_bitmaps(void)
+{
+	memset(bitmap_00000000, 0xff, sizeof(bitmap_00000000));
+	memset(bitmap_00000000_write, 0xff, sizeof(bitmap_00000000_write));
+	memset(bitmap_40000000, 0xff, sizeof(bitmap_40000000));
+	memset(bitmap_c0000000, 0xff, sizeof(bitmap_c0000000));
+	memset(bitmap_c0000000_read, 0xff, sizeof(bitmap_c0000000_read));
+
+	deny_msr(bitmap_00000000_write, MSR_IA32_POWER_CTL);
+	deny_msr(bitmap_c0000000_read, MSR_SYSCALL_MASK);
+	deny_msr(bitmap_c0000000_read, MSR_GS_BASE);
+}
+
+struct kvm_msr_filter filter_deny = {
+	.flags = KVM_MSR_FILTER_DEFAULT_DENY,
+	.ranges = {
+		{
+			.flags = KVM_MSR_FILTER_READ,
+			.base = 0x00000000,
+			.nmsrs = KVM_MSR_FILTER_MAX_BITMAP_SIZE * BITS_PER_BYTE,
+			.bitmap = bitmap_00000000,
+		}, {
+			.flags = KVM_MSR_FILTER_WRITE,
+			.base = 0x00000000,
+			.nmsrs = KVM_MSR_FILTER_MAX_BITMAP_SIZE * BITS_PER_BYTE,
+			.bitmap = bitmap_00000000_write,
+		}, {
+			.flags = KVM_MSR_FILTER_READ | KVM_MSR_FILTER_WRITE,
+			.base = 0x40000000,
+			.nmsrs = KVM_MSR_FILTER_MAX_BITMAP_SIZE * BITS_PER_BYTE,
+			.bitmap = bitmap_40000000,
+		}, {
+			.flags = KVM_MSR_FILTER_READ,
+			.base = 0xc0000000,
+			.nmsrs = KVM_MSR_FILTER_MAX_BITMAP_SIZE * BITS_PER_BYTE,
+			.bitmap = bitmap_c0000000_read,
+		}, {
+			.flags = KVM_MSR_FILTER_WRITE,
+			.base = 0xc0000000,
+			.nmsrs = KVM_MSR_FILTER_MAX_BITMAP_SIZE * BITS_PER_BYTE,
+			.bitmap = bitmap_c0000000,
+		}, {
+			.flags = KVM_MSR_FILTER_WRITE | KVM_MSR_FILTER_READ,
+			.base = 0xdeadbeef,
+			.nmsrs = 1,
+			.bitmap = bitmap_deadbeef,
+		},
+	},
+};
+
+struct kvm_msr_filter no_filter_deny = {
+	.flags = KVM_MSR_FILTER_DEFAULT_ALLOW,
+};
+
+/*
+ * Note: Force test_rdmsr() to not be inlined to prevent the labels,
+ * rdmsr_start and rdmsr_end, from being defined multiple times.
+ */
+static noinline uint64_t test_rdmsr(uint32_t msr)
+{
+	uint32_t a, d;
+
+	guest_exception_count = 0;
+
+	__asm__ __volatile__("rdmsr_start: rdmsr; rdmsr_end:" :
+			"=a"(a), "=d"(d) : "c"(msr) : "memory");
+
+	return a | ((uint64_t) d << 32);
+}
+
+/*
+ * Note: Force test_wrmsr() to not be inlined to prevent the labels,
+ * wrmsr_start and wrmsr_end, from being defined multiple times.
+ */
+static noinline void test_wrmsr(uint32_t msr, uint64_t value)
+{
+	uint32_t a = value;
+	uint32_t d = value >> 32;
+
+	guest_exception_count = 0;
+
+	__asm__ __volatile__("wrmsr_start: wrmsr; wrmsr_end:" ::
+			"a"(a), "d"(d), "c"(msr) : "memory");
+}
+
+extern char rdmsr_start, rdmsr_end;
+extern char wrmsr_start, wrmsr_end;
+
+/*
+ * Note: Force test_em_rdmsr() to not be inlined to prevent the labels,
+ * rdmsr_start and rdmsr_end, from being defined multiple times.
+ */
+static noinline uint64_t test_em_rdmsr(uint32_t msr)
+{
+	uint32_t a, d;
+
+	guest_exception_count = 0;
+
+	__asm__ __volatile__(KVM_FEP "em_rdmsr_start: rdmsr; em_rdmsr_end:" :
+			"=a"(a), "=d"(d) : "c"(msr) : "memory");
+
+	return a | ((uint64_t) d << 32);
+}
+
+/*
+ * Note: Force test_em_wrmsr() to not be inlined to prevent the labels,
+ * wrmsr_start and wrmsr_end, from being defined multiple times.
+ */
+static noinline void test_em_wrmsr(uint32_t msr, uint64_t value)
+{
+	uint32_t a = value;
+	uint32_t d = value >> 32;
+
+	guest_exception_count = 0;
+
+	__asm__ __volatile__(KVM_FEP "em_wrmsr_start: wrmsr; em_wrmsr_end:" ::
+			"a"(a), "d"(d), "c"(msr) : "memory");
+}
+
+extern char em_rdmsr_start, em_rdmsr_end;
+extern char em_wrmsr_start, em_wrmsr_end;
+
+static void guest_code_filter_allow(void)
+{
+	uint64_t data;
+
+	/*
+	 * Test userspace intercepting rdmsr / wrmsr for MSR_IA32_XSS.
+	 *
+	 * A GP is thrown if anything other than 0 is written to
+	 * MSR_IA32_XSS.
+	 */
+	data = test_rdmsr(MSR_IA32_XSS);
+	GUEST_ASSERT(data == 0);
+	GUEST_ASSERT(guest_exception_count == 0);
+
+	test_wrmsr(MSR_IA32_XSS, 0);
+	GUEST_ASSERT(guest_exception_count == 0);
+
+	test_wrmsr(MSR_IA32_XSS, 1);
+	GUEST_ASSERT(guest_exception_count == 1);
+
+	/*
+	 * Test userspace intercepting rdmsr / wrmsr for MSR_IA32_FLUSH_CMD.
+	 *
+	 * A GP is thrown if MSR_IA32_FLUSH_CMD is read
+	 * from or if a value other than 1 is written to it.
+	 */
+	test_rdmsr(MSR_IA32_FLUSH_CMD);
+	GUEST_ASSERT(guest_exception_count == 1);
+
+	test_wrmsr(MSR_IA32_FLUSH_CMD, 0);
+	GUEST_ASSERT(guest_exception_count == 1);
+
+	test_wrmsr(MSR_IA32_FLUSH_CMD, 1);
+	GUEST_ASSERT(guest_exception_count == 0);
+
+	/*
+	 * Test userspace intercepting rdmsr / wrmsr for MSR_NON_EXISTENT.
+	 *
+	 * Test that a fabricated MSR can pass through the kernel
+	 * and be handled in userspace.
+	 */
+	test_wrmsr(MSR_NON_EXISTENT, 2);
+	GUEST_ASSERT(guest_exception_count == 0);
+
+	data = test_rdmsr(MSR_NON_EXISTENT);
+	GUEST_ASSERT(data == 2);
+	GUEST_ASSERT(guest_exception_count == 0);
+
+	/*
+	 * Test to see if the instruction emulator is available (ie: the module
+	 * parameter 'kvm.force_emulation_prefix=1' is set).  This instruction
+	 * will #UD if it isn't available.
+	 */
+	__asm__ __volatile__(KVM_FEP "nop");
+
+	if (fep_available) {
+		/* Let userspace know we aren't done. */
+		GUEST_SYNC(0);
+
+		/*
+		 * Now run the same tests with the instruction emulator.
+		 */
+		data = test_em_rdmsr(MSR_IA32_XSS);
+		GUEST_ASSERT(data == 0);
+		GUEST_ASSERT(guest_exception_count == 0);
+		test_em_wrmsr(MSR_IA32_XSS, 0);
+		GUEST_ASSERT(guest_exception_count == 0);
+		test_em_wrmsr(MSR_IA32_XSS, 1);
+		GUEST_ASSERT(guest_exception_count == 1);
+
+		test_em_rdmsr(MSR_IA32_FLUSH_CMD);
+		GUEST_ASSERT(guest_exception_count == 1);
+		test_em_wrmsr(MSR_IA32_FLUSH_CMD, 0);
+		GUEST_ASSERT(guest_exception_count == 1);
+		test_em_wrmsr(MSR_IA32_FLUSH_CMD, 1);
+		GUEST_ASSERT(guest_exception_count == 0);
+
+		test_em_wrmsr(MSR_NON_EXISTENT, 2);
+		GUEST_ASSERT(guest_exception_count == 0);
+		data = test_em_rdmsr(MSR_NON_EXISTENT);
+		GUEST_ASSERT(data == 2);
+		GUEST_ASSERT(guest_exception_count == 0);
+	}
+
+	GUEST_DONE();
+}
+
+static void guest_msr_calls(bool trapped)
+{
+	/* This goes into the in-kernel emulation */
+	wrmsr(MSR_SYSCALL_MASK, 0);
+
+	if (trapped) {
+		/* This goes into user space emulation */
+		GUEST_ASSERT(rdmsr(MSR_SYSCALL_MASK) == MSR_SYSCALL_MASK);
+		GUEST_ASSERT(rdmsr(MSR_GS_BASE) == MSR_GS_BASE);
+	} else {
+		GUEST_ASSERT(rdmsr(MSR_SYSCALL_MASK) != MSR_SYSCALL_MASK);
+		GUEST_ASSERT(rdmsr(MSR_GS_BASE) != MSR_GS_BASE);
+	}
+
+	/* If trapped == true, this goes into user space emulation */
+	wrmsr(MSR_IA32_POWER_CTL, 0x1234);
+
+	/* This goes into the in-kernel emulation */
+	rdmsr(MSR_IA32_POWER_CTL);
+
+	/* Invalid MSR, should always be handled by user space exit */
+	GUEST_ASSERT(rdmsr(0xdeadbeef) == 0xdeadbeef);
+	wrmsr(0xdeadbeef, 0x1234);
+}
+
+static void guest_code_filter_deny(void)
+{
+	guest_msr_calls(true);
+
+	/*
+	 * Disable msr filtering, so that the kernel
+	 * handles everything in the next round
+	 */
+	GUEST_SYNC(0);
+
+	guest_msr_calls(false);
+
+	GUEST_DONE();
+}
+
+static void guest_code_permission_bitmap(void)
+{
+	uint64_t data;
+
+	data = test_rdmsr(MSR_FS_BASE);
+	GUEST_ASSERT(data == MSR_FS_BASE);
+	data = test_rdmsr(MSR_GS_BASE);
+	GUEST_ASSERT(data != MSR_GS_BASE);
+
+	/* Let userspace know to switch the filter */
+	GUEST_SYNC(0);
+
+	data = test_rdmsr(MSR_FS_BASE);
+	GUEST_ASSERT(data != MSR_FS_BASE);
+	data = test_rdmsr(MSR_GS_BASE);
+	GUEST_ASSERT(data == MSR_GS_BASE);
+
+	GUEST_DONE();
+}
+
+static void __guest_gp_handler(struct ex_regs *regs,
+			       char *r_start, char *r_end,
+			       char *w_start, char *w_end)
+{
+	if (regs->rip == (uintptr_t)r_start) {
+		regs->rip = (uintptr_t)r_end;
+		regs->rax = 0;
+		regs->rdx = 0;
+	} else if (regs->rip == (uintptr_t)w_start) {
+		regs->rip = (uintptr_t)w_end;
+	} else {
+		GUEST_ASSERT(!"RIP is at an unknown location!");
+	}
+
+	++guest_exception_count;
+}
+
+static void guest_gp_handler(struct ex_regs *regs)
+{
+	__guest_gp_handler(regs, &rdmsr_start, &rdmsr_end,
+			   &wrmsr_start, &wrmsr_end);
+}
+
+static void guest_fep_gp_handler(struct ex_regs *regs)
+{
+	__guest_gp_handler(regs, &em_rdmsr_start, &em_rdmsr_end,
+			   &em_wrmsr_start, &em_wrmsr_end);
+}
+
+static void guest_ud_handler(struct ex_regs *regs)
+{
+	fep_available = 0;
+	regs->rip += KVM_FEP_LENGTH;
+}
+
+static void check_for_guest_assert(struct kvm_vcpu *vcpu)
+{
+	struct ucall uc;
+
+	if (vcpu->run->exit_reason == KVM_EXIT_IO &&
+	    get_ucall(vcpu, &uc) == UCALL_ABORT) {
+		REPORT_GUEST_ASSERT(uc);
+	}
+}
+
+static void process_rdmsr(struct kvm_vcpu *vcpu, uint32_t msr_index)
+{
+	struct kvm_run *run = vcpu->run;
+
+	check_for_guest_assert(vcpu);
+
+	TEST_ASSERT(run->exit_reason == KVM_EXIT_X86_RDMSR,
+		    "Unexpected exit reason: %u (%s),\n",
+		    run->exit_reason,
+		    exit_reason_str(run->exit_reason));
+	TEST_ASSERT(run->msr.index == msr_index,
+			"Unexpected msr (0x%04x), expected 0x%04x",
+			run->msr.index, msr_index);
+
+	switch (run->msr.index) {
+	case MSR_IA32_XSS:
+		run->msr.data = 0;
+		break;
+	case MSR_IA32_FLUSH_CMD:
+		run->msr.error = 1;
+		break;
+	case MSR_NON_EXISTENT:
+		run->msr.data = msr_non_existent_data;
+		break;
+	case MSR_FS_BASE:
+		run->msr.data = MSR_FS_BASE;
+		break;
+	case MSR_GS_BASE:
+		run->msr.data = MSR_GS_BASE;
+		break;
+	default:
+		TEST_ASSERT(false, "Unexpected MSR: 0x%04x", run->msr.index);
+	}
+}
+
+static void process_wrmsr(struct kvm_vcpu *vcpu, uint32_t msr_index)
+{
+	struct kvm_run *run = vcpu->run;
+
+	check_for_guest_assert(vcpu);
+
+	TEST_ASSERT(run->exit_reason == KVM_EXIT_X86_WRMSR,
+		    "Unexpected exit reason: %u (%s),\n",
+		    run->exit_reason,
+		    exit_reason_str(run->exit_reason));
+	TEST_ASSERT(run->msr.index == msr_index,
+			"Unexpected msr (0x%04x), expected 0x%04x",
+			run->msr.index, msr_index);
+
+	switch (run->msr.index) {
+	case MSR_IA32_XSS:
+		if (run->msr.data != 0)
+			run->msr.error = 1;
+		break;
+	case MSR_IA32_FLUSH_CMD:
+		if (run->msr.data != 1)
+			run->msr.error = 1;
+		break;
+	case MSR_NON_EXISTENT:
+		msr_non_existent_data = run->msr.data;
+		break;
+	default:
+		TEST_ASSERT(false, "Unexpected MSR: 0x%04x", run->msr.index);
+	}
+}
+
+static void process_ucall_done(struct kvm_vcpu *vcpu)
+{
+	struct kvm_run *run = vcpu->run;
+	struct ucall uc;
+
+	check_for_guest_assert(vcpu);
+
+	TEST_ASSERT(run->exit_reason == KVM_EXIT_IO,
+		    "Unexpected exit reason: %u (%s)",
+		    run->exit_reason,
+		    exit_reason_str(run->exit_reason));
+
+	TEST_ASSERT(get_ucall(vcpu, &uc) == UCALL_DONE,
+		    "Unexpected ucall command: %lu, expected UCALL_DONE (%d)",
+		    uc.cmd, UCALL_DONE);
+}
+
+static uint64_t process_ucall(struct kvm_vcpu *vcpu)
+{
+	struct kvm_run *run = vcpu->run;
+	struct ucall uc = {};
+
+	check_for_guest_assert(vcpu);
+
+	TEST_ASSERT(run->exit_reason == KVM_EXIT_IO,
+		    "Unexpected exit reason: %u (%s)",
+		    run->exit_reason,
+		    exit_reason_str(run->exit_reason));
+
+	switch (get_ucall(vcpu, &uc)) {
+	case UCALL_SYNC:
+		break;
+	case UCALL_ABORT:
+		check_for_guest_assert(vcpu);
+		break;
+	case UCALL_DONE:
+		process_ucall_done(vcpu);
+		break;
+	default:
+		TEST_ASSERT(false, "Unexpected ucall");
+	}
+
+	return uc.cmd;
+}
+
+static void run_guest_then_process_rdmsr(struct kvm_vcpu *vcpu,
+					 uint32_t msr_index)
+{
+	vcpu_run(vcpu);
+	process_rdmsr(vcpu, msr_index);
+}
+
+static void run_guest_then_process_wrmsr(struct kvm_vcpu *vcpu,
+					 uint32_t msr_index)
+{
+	vcpu_run(vcpu);
+	process_wrmsr(vcpu, msr_index);
+}
+
+static uint64_t run_guest_then_process_ucall(struct kvm_vcpu *vcpu)
+{
+	vcpu_run(vcpu);
+	return process_ucall(vcpu);
+}
+
+static void run_guest_then_process_ucall_done(struct kvm_vcpu *vcpu)
+{
+	vcpu_run(vcpu);
+	process_ucall_done(vcpu);
+}
+
+static void test_msr_filter_allow(void)
+{
+	struct kvm_vcpu *vcpu;
+	struct kvm_vm *vm;
+	int rc;
+
+	vm = vm_create_with_one_vcpu(&vcpu, guest_code_filter_allow);
+
+	rc = kvm_check_cap(KVM_CAP_X86_USER_SPACE_MSR);
+	TEST_ASSERT(rc, "KVM_CAP_X86_USER_SPACE_MSR is available");
+	vm_enable_cap(vm, KVM_CAP_X86_USER_SPACE_MSR, KVM_MSR_EXIT_REASON_FILTER);
+
+	rc = kvm_check_cap(KVM_CAP_X86_MSR_FILTER);
+	TEST_ASSERT(rc, "KVM_CAP_X86_MSR_FILTER is available");
+
+	vm_ioctl(vm, KVM_X86_SET_MSR_FILTER, &filter_allow);
+
+	vm_init_descriptor_tables(vm);
+	vcpu_init_descriptor_tables(vcpu);
+
+	vm_install_exception_handler(vm, GP_VECTOR, guest_gp_handler);
+
+	/* Process guest code userspace exits. */
+	run_guest_then_process_rdmsr(vcpu, MSR_IA32_XSS);
+	run_guest_then_process_wrmsr(vcpu, MSR_IA32_XSS);
+	run_guest_then_process_wrmsr(vcpu, MSR_IA32_XSS);
+
+	run_guest_then_process_rdmsr(vcpu, MSR_IA32_FLUSH_CMD);
+	run_guest_then_process_wrmsr(vcpu, MSR_IA32_FLUSH_CMD);
+	run_guest_then_process_wrmsr(vcpu, MSR_IA32_FLUSH_CMD);
+
+	run_guest_then_process_wrmsr(vcpu, MSR_NON_EXISTENT);
+	run_guest_then_process_rdmsr(vcpu, MSR_NON_EXISTENT);
+
+	vm_install_exception_handler(vm, UD_VECTOR, guest_ud_handler);
+	vcpu_run(vcpu);
+	vm_install_exception_handler(vm, UD_VECTOR, NULL);
+
+	if (process_ucall(vcpu) != UCALL_DONE) {
+		vm_install_exception_handler(vm, GP_VECTOR, guest_fep_gp_handler);
+
+		/* Process emulated rdmsr and wrmsr instructions. */
+		run_guest_then_process_rdmsr(vcpu, MSR_IA32_XSS);
+		run_guest_then_process_wrmsr(vcpu, MSR_IA32_XSS);
+		run_guest_then_process_wrmsr(vcpu, MSR_IA32_XSS);
+
+		run_guest_then_process_rdmsr(vcpu, MSR_IA32_FLUSH_CMD);
+		run_guest_then_process_wrmsr(vcpu, MSR_IA32_FLUSH_CMD);
+		run_guest_then_process_wrmsr(vcpu, MSR_IA32_FLUSH_CMD);
+
+		run_guest_then_process_wrmsr(vcpu, MSR_NON_EXISTENT);
+		run_guest_then_process_rdmsr(vcpu, MSR_NON_EXISTENT);
+
+		/* Confirm the guest completed without issues. */
+		run_guest_then_process_ucall_done(vcpu);
+	} else {
+		printf("To run the instruction emulated tests set the module parameter 'kvm.force_emulation_prefix=1'\n");
+	}
+
+	kvm_vm_free(vm);
+}
+
+static int handle_ucall(struct kvm_vcpu *vcpu)
+{
+	struct ucall uc;
+
+	switch (get_ucall(vcpu, &uc)) {
+	case UCALL_ABORT:
+		REPORT_GUEST_ASSERT(uc);
+		break;
+	case UCALL_SYNC:
+		vm_ioctl(vcpu->vm, KVM_X86_SET_MSR_FILTER, &no_filter_deny);
+		break;
+	case UCALL_DONE:
+		return 1;
+	default:
+		TEST_FAIL("Unknown ucall %lu", uc.cmd);
+	}
+
+	return 0;
+}
+
+static void handle_rdmsr(struct kvm_run *run)
+{
+	run->msr.data = run->msr.index;
+	msr_reads++;
+
+	if (run->msr.index == MSR_SYSCALL_MASK ||
+	    run->msr.index == MSR_GS_BASE) {
+		TEST_ASSERT(run->msr.reason == KVM_MSR_EXIT_REASON_FILTER,
+			    "MSR read trap w/o access fault");
+	}
+
+	if (run->msr.index == 0xdeadbeef) {
+		TEST_ASSERT(run->msr.reason == KVM_MSR_EXIT_REASON_UNKNOWN,
+			    "MSR deadbeef read trap w/o inval fault");
+	}
+}
+
+static void handle_wrmsr(struct kvm_run *run)
+{
+	/* ignore */
+	msr_writes++;
+
+	if (run->msr.index == MSR_IA32_POWER_CTL) {
+		TEST_ASSERT(run->msr.data == 0x1234,
+			    "MSR data for MSR_IA32_POWER_CTL incorrect");
+		TEST_ASSERT(run->msr.reason == KVM_MSR_EXIT_REASON_FILTER,
+			    "MSR_IA32_POWER_CTL trap w/o access fault");
+	}
+
+	if (run->msr.index == 0xdeadbeef) {
+		TEST_ASSERT(run->msr.data == 0x1234,
+			    "MSR data for deadbeef incorrect");
+		TEST_ASSERT(run->msr.reason == KVM_MSR_EXIT_REASON_UNKNOWN,
+			    "deadbeef trap w/o inval fault");
+	}
+}
+
+static void test_msr_filter_deny(void)
+{
+	struct kvm_vcpu *vcpu;
+	struct kvm_vm *vm;
+	struct kvm_run *run;
+	int rc;
+
+	vm = vm_create_with_one_vcpu(&vcpu, guest_code_filter_deny);
+	run = vcpu->run;
+
+	rc = kvm_check_cap(KVM_CAP_X86_USER_SPACE_MSR);
+	TEST_ASSERT(rc, "KVM_CAP_X86_USER_SPACE_MSR is available");
+	vm_enable_cap(vm, KVM_CAP_X86_USER_SPACE_MSR, KVM_MSR_EXIT_REASON_INVAL |
+						      KVM_MSR_EXIT_REASON_UNKNOWN |
+						      KVM_MSR_EXIT_REASON_FILTER);
+
+	rc = kvm_check_cap(KVM_CAP_X86_MSR_FILTER);
+	TEST_ASSERT(rc, "KVM_CAP_X86_MSR_FILTER is available");
+
+	prepare_bitmaps();
+	vm_ioctl(vm, KVM_X86_SET_MSR_FILTER, &filter_deny);
+
+	while (1) {
+		vcpu_run(vcpu);
+
+		switch (run->exit_reason) {
+		case KVM_EXIT_X86_RDMSR:
+			handle_rdmsr(run);
+			break;
+		case KVM_EXIT_X86_WRMSR:
+			handle_wrmsr(run);
+			break;
+		case KVM_EXIT_IO:
+			if (handle_ucall(vcpu))
+				goto done;
+			break;
+		}
+
+	}
+
+done:
+	TEST_ASSERT(msr_reads == 4, "Handled 4 rdmsr in user space");
+	TEST_ASSERT(msr_writes == 3, "Handled 3 wrmsr in user space");
+
+	kvm_vm_free(vm);
+}
+
+static void test_msr_permission_bitmap(void)
+{
+	struct kvm_vcpu *vcpu;
+	struct kvm_vm *vm;
+	int rc;
+
+	vm = vm_create_with_one_vcpu(&vcpu, guest_code_permission_bitmap);
+
+	rc = kvm_check_cap(KVM_CAP_X86_USER_SPACE_MSR);
+	TEST_ASSERT(rc, "KVM_CAP_X86_USER_SPACE_MSR is available");
+	vm_enable_cap(vm, KVM_CAP_X86_USER_SPACE_MSR, KVM_MSR_EXIT_REASON_FILTER);
+
+	rc = kvm_check_cap(KVM_CAP_X86_MSR_FILTER);
+	TEST_ASSERT(rc, "KVM_CAP_X86_MSR_FILTER is available");
+
+	vm_ioctl(vm, KVM_X86_SET_MSR_FILTER, &filter_fs);
+	run_guest_then_process_rdmsr(vcpu, MSR_FS_BASE);
+	TEST_ASSERT(run_guest_then_process_ucall(vcpu) == UCALL_SYNC,
+		    "Expected ucall state to be UCALL_SYNC.");
+	vm_ioctl(vm, KVM_X86_SET_MSR_FILTER, &filter_gs);
+	run_guest_then_process_rdmsr(vcpu, MSR_GS_BASE);
+	run_guest_then_process_ucall_done(vcpu);
+
+	kvm_vm_free(vm);
+}
+
+int main(int argc, char *argv[])
+{
+	/* Tell stdout not to buffer its content */
+	setbuf(stdout, NULL);
+
+	test_msr_filter_allow();
+
+	test_msr_filter_deny();
+
+	test_msr_permission_bitmap();
+
+	return 0;
+}
diff --git a/tools/testing/selftests/kvm/x86_64/vmx_apic_access_test.c b/tools/testing/selftests/kvm/x86_64/vmx_apic_access_test.c
new file mode 100644
index 000000000000..5abecf06329e
--- /dev/null
+++ b/tools/testing/selftests/kvm/x86_64/vmx_apic_access_test.c
@@ -0,0 +1,131 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * vmx_apic_access_test
+ *
+ * Copyright (C) 2020, Google LLC.
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2.
+ *
+ * The first subtest simply checks to see that an L2 guest can be
+ * launched with a valid APIC-access address that is backed by a
+ * page of L1 physical memory.
+ *
+ * The second subtest sets the APIC-access address to a (valid) L1
+ * physical address that is not backed by memory. KVM can't handle
+ * this situation, so resuming L2 should result in a KVM exit for
+ * internal error (emulation). This is not an architectural
+ * requirement. It is just a shortcoming of KVM. The internal error
+ * is unfortunate, but it's better than what used to happen!
+ */
+
+#include "test_util.h"
+#include "kvm_util.h"
+#include "processor.h"
+#include "vmx.h"
+
+#include <string.h>
+#include <sys/ioctl.h>
+
+#include "kselftest.h"
+
+static void l2_guest_code(void)
+{
+	/* Exit to L1 */
+	__asm__ __volatile__("vmcall");
+}
+
+static void l1_guest_code(struct vmx_pages *vmx_pages, unsigned long high_gpa)
+{
+#define L2_GUEST_STACK_SIZE 64
+	unsigned long l2_guest_stack[L2_GUEST_STACK_SIZE];
+	uint32_t control;
+
+	GUEST_ASSERT(prepare_for_vmx_operation(vmx_pages));
+	GUEST_ASSERT(load_vmcs(vmx_pages));
+
+	/* Prepare the VMCS for L2 execution. */
+	prepare_vmcs(vmx_pages, l2_guest_code,
+		     &l2_guest_stack[L2_GUEST_STACK_SIZE]);
+	control = vmreadz(CPU_BASED_VM_EXEC_CONTROL);
+	control |= CPU_BASED_ACTIVATE_SECONDARY_CONTROLS;
+	vmwrite(CPU_BASED_VM_EXEC_CONTROL, control);
+	control = vmreadz(SECONDARY_VM_EXEC_CONTROL);
+	control |= SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES;
+	vmwrite(SECONDARY_VM_EXEC_CONTROL, control);
+	vmwrite(APIC_ACCESS_ADDR, vmx_pages->apic_access_gpa);
+
+	/* Try to launch L2 with the memory-backed APIC-access address. */
+	GUEST_SYNC(vmreadz(APIC_ACCESS_ADDR));
+	GUEST_ASSERT(!vmlaunch());
+	GUEST_ASSERT(vmreadz(VM_EXIT_REASON) == EXIT_REASON_VMCALL);
+
+	vmwrite(APIC_ACCESS_ADDR, high_gpa);
+
+	/* Try to resume L2 with the unbacked APIC-access address. */
+	GUEST_SYNC(vmreadz(APIC_ACCESS_ADDR));
+	GUEST_ASSERT(!vmresume());
+	GUEST_ASSERT(vmreadz(VM_EXIT_REASON) == EXIT_REASON_VMCALL);
+
+	GUEST_DONE();
+}
+
+int main(int argc, char *argv[])
+{
+	unsigned long apic_access_addr = ~0ul;
+	vm_vaddr_t vmx_pages_gva;
+	unsigned long high_gpa;
+	struct vmx_pages *vmx;
+	bool done = false;
+
+	struct kvm_vcpu *vcpu;
+	struct kvm_vm *vm;
+
+	TEST_REQUIRE(kvm_cpu_has(X86_FEATURE_VMX));
+
+	vm = vm_create_with_one_vcpu(&vcpu, l1_guest_code);
+
+	high_gpa = (vm->max_gfn - 1) << vm->page_shift;
+
+	vmx = vcpu_alloc_vmx(vm, &vmx_pages_gva);
+	prepare_virtualize_apic_accesses(vmx, vm);
+	vcpu_args_set(vcpu, 2, vmx_pages_gva, high_gpa);
+
+	while (!done) {
+		volatile struct kvm_run *run = vcpu->run;
+		struct ucall uc;
+
+		vcpu_run(vcpu);
+		if (apic_access_addr == high_gpa) {
+			TEST_ASSERT(run->exit_reason ==
+				    KVM_EXIT_INTERNAL_ERROR,
+				    "Got exit reason other than KVM_EXIT_INTERNAL_ERROR: %u (%s)\n",
+				    run->exit_reason,
+				    exit_reason_str(run->exit_reason));
+			TEST_ASSERT(run->internal.suberror ==
+				    KVM_INTERNAL_ERROR_EMULATION,
+				    "Got internal suberror other than KVM_INTERNAL_ERROR_EMULATION: %u\n",
+				    run->internal.suberror);
+			break;
+		}
+		TEST_ASSERT(run->exit_reason == KVM_EXIT_IO,
+			    "Got exit_reason other than KVM_EXIT_IO: %u (%s)\n",
+			    run->exit_reason,
+			    exit_reason_str(run->exit_reason));
+
+		switch (get_ucall(vcpu, &uc)) {
+		case UCALL_ABORT:
+			REPORT_GUEST_ASSERT(uc);
+			/* NOT REACHED */
+		case UCALL_SYNC:
+			apic_access_addr = uc.args[1];
+			break;
+		case UCALL_DONE:
+			done = true;
+			break;
+		default:
+			TEST_ASSERT(false, "Unknown ucall %lu", uc.cmd);
+		}
+	}
+	kvm_vm_free(vm);
+	return 0;
+}
diff --git a/tools/testing/selftests/kvm/x86_64/vmx_close_while_nested_test.c b/tools/testing/selftests/kvm/x86_64/vmx_close_while_nested_test.c
index 5dfb53546a26..d79651b02740 100644
--- a/tools/testing/selftests/kvm/x86_64/vmx_close_while_nested_test.c
+++ b/tools/testing/selftests/kvm/x86_64/vmx_close_while_nested_test.c
@@ -18,20 +18,15 @@
 
 #include "kselftest.h"
 
-#define VCPU_ID		5
-
 enum {
 	PORT_L0_EXIT = 0x2000,
 };
 
-/* The virtual machine object. */
-static struct kvm_vm *vm;
-
 static void l2_guest_code(void)
 {
 	/* Exit to L0 */
-        asm volatile("inb %%dx, %%al"
-                     : : [port] "d" (PORT_L0_EXIT) : "rax");
+	asm volatile("inb %%dx, %%al"
+		     : : [port] "d" (PORT_L0_EXIT) : "rax");
 }
 
 static void l1_guest_code(struct vmx_pages *vmx_pages)
@@ -53,21 +48,22 @@ static void l1_guest_code(struct vmx_pages *vmx_pages)
 int main(int argc, char *argv[])
 {
 	vm_vaddr_t vmx_pages_gva;
+	struct kvm_vcpu *vcpu;
+	struct kvm_vm *vm;
 
-	nested_vmx_check_supported();
+	TEST_REQUIRE(kvm_cpu_has(X86_FEATURE_VMX));
 
-	vm = vm_create_default(VCPU_ID, 0, (void *) l1_guest_code);
-	vcpu_set_cpuid(vm, VCPU_ID, kvm_get_supported_cpuid());
+	vm = vm_create_with_one_vcpu(&vcpu, l1_guest_code);
 
 	/* Allocate VMX pages and shared descriptors (vmx_pages). */
 	vcpu_alloc_vmx(vm, &vmx_pages_gva);
-	vcpu_args_set(vm, VCPU_ID, 1, vmx_pages_gva);
+	vcpu_args_set(vcpu, 1, vmx_pages_gva);
 
 	for (;;) {
-		volatile struct kvm_run *run = vcpu_state(vm, VCPU_ID);
+		volatile struct kvm_run *run = vcpu->run;
 		struct ucall uc;
 
-		vcpu_run(vm, VCPU_ID);
+		vcpu_run(vcpu);
 		TEST_ASSERT(run->exit_reason == KVM_EXIT_IO,
 			    "Got exit_reason other than KVM_EXIT_IO: %u (%s)\n",
 			    run->exit_reason,
@@ -76,12 +72,12 @@ int main(int argc, char *argv[])
 		if (run->io.port == PORT_L0_EXIT)
 			break;
 
-		switch (get_ucall(vm, VCPU_ID, &uc)) {
+		switch (get_ucall(vcpu, &uc)) {
 		case UCALL_ABORT:
-			TEST_ASSERT(false, "%s", (const char *)uc.args[0]);
+			REPORT_GUEST_ASSERT(uc);
 			/* NOT REACHED */
 		default:
-			TEST_ASSERT(false, "Unknown ucall 0x%x.", uc.cmd);
+			TEST_FAIL("Unknown ucall %lu", uc.cmd);
 		}
 	}
 }
diff --git a/tools/testing/selftests/kvm/x86_64/vmx_dirty_log_test.c b/tools/testing/selftests/kvm/x86_64/vmx_dirty_log_test.c
index a223a6401258..2d8c23d639f7 100644
--- a/tools/testing/selftests/kvm/x86_64/vmx_dirty_log_test.c
+++ b/tools/testing/selftests/kvm/x86_64/vmx_dirty_log_test.c
@@ -17,11 +17,9 @@
 #include "processor.h"
 #include "vmx.h"
 
-#define VCPU_ID				1
-
 /* The memory slot index to track dirty pages */
 #define TEST_MEM_SLOT_INDEX		1
-#define TEST_MEM_SIZE			3
+#define TEST_MEM_PAGES			3
 
 /* L1 guest test virtual memory offset */
 #define GUEST_TEST_MEM			0xc0000000
@@ -73,33 +71,32 @@ int main(int argc, char *argv[])
 	unsigned long *bmap;
 	uint64_t *host_test_mem;
 
+	struct kvm_vcpu *vcpu;
 	struct kvm_vm *vm;
 	struct kvm_run *run;
 	struct ucall uc;
 	bool done = false;
 
-	nested_vmx_check_supported();
+	TEST_REQUIRE(kvm_cpu_has(X86_FEATURE_VMX));
 
 	/* Create VM */
-	vm = vm_create_default(VCPU_ID, 0, l1_guest_code);
-	vcpu_set_cpuid(vm, VCPU_ID, kvm_get_supported_cpuid());
+	vm = vm_create_with_one_vcpu(&vcpu, l1_guest_code);
 	vmx = vcpu_alloc_vmx(vm, &vmx_pages_gva);
-	vcpu_args_set(vm, VCPU_ID, 1, vmx_pages_gva);
-	run = vcpu_state(vm, VCPU_ID);
+	vcpu_args_set(vcpu, 1, vmx_pages_gva);
+	run = vcpu->run;
 
 	/* Add an extra memory slot for testing dirty logging */
 	vm_userspace_mem_region_add(vm, VM_MEM_SRC_ANONYMOUS,
 				    GUEST_TEST_MEM,
 				    TEST_MEM_SLOT_INDEX,
-				    TEST_MEM_SIZE,
+				    TEST_MEM_PAGES,
 				    KVM_MEM_LOG_DIRTY_PAGES);
 
 	/*
 	 * Add an identity map for GVA range [0xc0000000, 0xc0002000).  This
 	 * affects both L1 and L2.  However...
 	 */
-	virt_map(vm, GUEST_TEST_MEM, GUEST_TEST_MEM,
-		 TEST_MEM_SIZE * 4096, 0);
+	virt_map(vm, GUEST_TEST_MEM, GUEST_TEST_MEM, TEST_MEM_PAGES);
 
 	/*
 	 * ... pages in the L2 GPA range [0xc0001000, 0xc0003000) will map to
@@ -109,25 +106,24 @@ int main(int argc, char *argv[])
 	 * meaning after the last call to virt_map.
 	 */
 	prepare_eptp(vmx, vm, 0);
-	nested_map_memslot(vmx, vm, 0, 0);
-	nested_map(vmx, vm, NESTED_TEST_MEM1, GUEST_TEST_MEM, 4096, 0);
-	nested_map(vmx, vm, NESTED_TEST_MEM2, GUEST_TEST_MEM, 4096, 0);
+	nested_map_memslot(vmx, vm, 0);
+	nested_map(vmx, vm, NESTED_TEST_MEM1, GUEST_TEST_MEM, 4096);
+	nested_map(vmx, vm, NESTED_TEST_MEM2, GUEST_TEST_MEM, 4096);
 
-	bmap = bitmap_alloc(TEST_MEM_SIZE);
+	bmap = bitmap_zalloc(TEST_MEM_PAGES);
 	host_test_mem = addr_gpa2hva(vm, GUEST_TEST_MEM);
 
 	while (!done) {
-		memset(host_test_mem, 0xaa, TEST_MEM_SIZE * 4096);
-		_vcpu_run(vm, VCPU_ID);
+		memset(host_test_mem, 0xaa, TEST_MEM_PAGES * 4096);
+		vcpu_run(vcpu);
 		TEST_ASSERT(run->exit_reason == KVM_EXIT_IO,
 			    "Unexpected exit reason: %u (%s),\n",
 			    run->exit_reason,
 			    exit_reason_str(run->exit_reason));
 
-		switch (get_ucall(vm, VCPU_ID, &uc)) {
+		switch (get_ucall(vcpu, &uc)) {
 		case UCALL_ABORT:
-			TEST_ASSERT(false, "%s at %s:%d", (const char *)uc.args[0],
-				    __FILE__, uc.args[1]);
+			REPORT_GUEST_ASSERT(uc);
 			/* NOT REACHED */
 		case UCALL_SYNC:
 			/*
@@ -152,7 +148,7 @@ int main(int argc, char *argv[])
 			done = true;
 			break;
 		default:
-			TEST_ASSERT(false, "Unknown ucall 0x%x.", uc.cmd);
+			TEST_FAIL("Unknown ucall %lu", uc.cmd);
 		}
 	}
 }
diff --git a/tools/testing/selftests/kvm/x86_64/vmx_exception_with_invalid_guest_state.c b/tools/testing/selftests/kvm/x86_64/vmx_exception_with_invalid_guest_state.c
new file mode 100644
index 000000000000..2641b286b4ed
--- /dev/null
+++ b/tools/testing/selftests/kvm/x86_64/vmx_exception_with_invalid_guest_state.c
@@ -0,0 +1,147 @@
+// SPDX-License-Identifier: GPL-2.0-only
+#include "test_util.h"
+#include "kvm_util.h"
+#include "processor.h"
+
+#include <signal.h>
+#include <string.h>
+#include <sys/ioctl.h>
+#include <sys/time.h>
+
+#include "kselftest.h"
+
+static void guest_ud_handler(struct ex_regs *regs)
+{
+	/* Loop on the ud2 until guest state is made invalid. */
+}
+
+static void guest_code(void)
+{
+	asm volatile("ud2");
+}
+
+static void __run_vcpu_with_invalid_state(struct kvm_vcpu *vcpu)
+{
+	struct kvm_run *run = vcpu->run;
+
+	vcpu_run(vcpu);
+
+	TEST_ASSERT(run->exit_reason == KVM_EXIT_INTERNAL_ERROR,
+		    "Expected KVM_EXIT_INTERNAL_ERROR, got %d (%s)\n",
+		    run->exit_reason, exit_reason_str(run->exit_reason));
+	TEST_ASSERT(run->emulation_failure.suberror == KVM_INTERNAL_ERROR_EMULATION,
+		    "Expected emulation failure, got %d\n",
+		    run->emulation_failure.suberror);
+}
+
+static void run_vcpu_with_invalid_state(struct kvm_vcpu *vcpu)
+{
+	/*
+	 * Always run twice to verify KVM handles the case where _KVM_ queues
+	 * an exception with invalid state and then exits to userspace, i.e.
+	 * that KVM doesn't explode if userspace ignores the initial error.
+	 */
+	__run_vcpu_with_invalid_state(vcpu);
+	__run_vcpu_with_invalid_state(vcpu);
+}
+
+static void set_timer(void)
+{
+	struct itimerval timer;
+
+	timer.it_value.tv_sec  = 0;
+	timer.it_value.tv_usec = 200;
+	timer.it_interval = timer.it_value;
+	ASSERT_EQ(setitimer(ITIMER_REAL, &timer, NULL), 0);
+}
+
+static void set_or_clear_invalid_guest_state(struct kvm_vcpu *vcpu, bool set)
+{
+	static struct kvm_sregs sregs;
+
+	if (!sregs.cr0)
+		vcpu_sregs_get(vcpu, &sregs);
+	sregs.tr.unusable = !!set;
+	vcpu_sregs_set(vcpu, &sregs);
+}
+
+static void set_invalid_guest_state(struct kvm_vcpu *vcpu)
+{
+	set_or_clear_invalid_guest_state(vcpu, true);
+}
+
+static void clear_invalid_guest_state(struct kvm_vcpu *vcpu)
+{
+	set_or_clear_invalid_guest_state(vcpu, false);
+}
+
+static struct kvm_vcpu *get_set_sigalrm_vcpu(struct kvm_vcpu *__vcpu)
+{
+	static struct kvm_vcpu *vcpu = NULL;
+
+	if (__vcpu)
+		vcpu = __vcpu;
+	return vcpu;
+}
+
+static void sigalrm_handler(int sig)
+{
+	struct kvm_vcpu *vcpu = get_set_sigalrm_vcpu(NULL);
+	struct kvm_vcpu_events events;
+
+	TEST_ASSERT(sig == SIGALRM, "Unexpected signal = %d", sig);
+
+	vcpu_events_get(vcpu, &events);
+
+	/*
+	 * If an exception is pending, attempt KVM_RUN with invalid guest,
+	 * otherwise rearm the timer and keep doing so until the timer fires
+	 * between KVM queueing an exception and re-entering the guest.
+	 */
+	if (events.exception.pending) {
+		set_invalid_guest_state(vcpu);
+		run_vcpu_with_invalid_state(vcpu);
+	} else {
+		set_timer();
+	}
+}
+
+int main(int argc, char *argv[])
+{
+	struct kvm_vcpu *vcpu;
+	struct kvm_vm *vm;
+
+	TEST_REQUIRE(is_intel_cpu());
+	TEST_REQUIRE(!vm_is_unrestricted_guest(NULL));
+
+	vm = vm_create_with_one_vcpu(&vcpu, guest_code);
+	get_set_sigalrm_vcpu(vcpu);
+
+	vm_init_descriptor_tables(vm);
+	vcpu_init_descriptor_tables(vcpu);
+
+	vm_install_exception_handler(vm, UD_VECTOR, guest_ud_handler);
+
+	/*
+	 * Stuff invalid guest state for L2 by making TR unusuable.  The next
+	 * KVM_RUN should induce a TRIPLE_FAULT in L2 as KVM doesn't support
+	 * emulating invalid guest state for L2.
+	 */
+	set_invalid_guest_state(vcpu);
+	run_vcpu_with_invalid_state(vcpu);
+
+	/*
+	 * Verify KVM also handles the case where userspace gains control while
+	 * an exception is pending and stuffs invalid state.  Run with valid
+	 * guest state and a timer firing every 200us, and attempt to enter the
+	 * guest with invalid state when the handler interrupts KVM with an
+	 * exception pending.
+	 */
+	clear_invalid_guest_state(vcpu);
+	TEST_ASSERT(signal(SIGALRM, sigalrm_handler) != SIG_ERR,
+		    "Failed to register SIGALRM handler, errno = %d (%s)",
+		    errno, strerror(errno));
+
+	set_timer();
+	run_vcpu_with_invalid_state(vcpu);
+}
diff --git a/tools/testing/selftests/kvm/x86_64/vmx_invalid_nested_guest_state.c b/tools/testing/selftests/kvm/x86_64/vmx_invalid_nested_guest_state.c
new file mode 100644
index 000000000000..6bfb4bb471ca
--- /dev/null
+++ b/tools/testing/selftests/kvm/x86_64/vmx_invalid_nested_guest_state.c
@@ -0,0 +1,105 @@
+// SPDX-License-Identifier: GPL-2.0-only
+#include "test_util.h"
+#include "kvm_util.h"
+#include "processor.h"
+#include "vmx.h"
+
+#include <string.h>
+#include <sys/ioctl.h>
+
+#include "kselftest.h"
+
+#define ARBITRARY_IO_PORT 0x2000
+
+static struct kvm_vm *vm;
+
+static void l2_guest_code(void)
+{
+	/*
+	 * Generate an exit to L0 userspace, i.e. main(), via I/O to an
+	 * arbitrary port.
+	 */
+	asm volatile("inb %%dx, %%al"
+		     : : [port] "d" (ARBITRARY_IO_PORT) : "rax");
+}
+
+static void l1_guest_code(struct vmx_pages *vmx_pages)
+{
+#define L2_GUEST_STACK_SIZE 64
+	unsigned long l2_guest_stack[L2_GUEST_STACK_SIZE];
+
+	GUEST_ASSERT(prepare_for_vmx_operation(vmx_pages));
+	GUEST_ASSERT(load_vmcs(vmx_pages));
+
+	/* Prepare the VMCS for L2 execution. */
+	prepare_vmcs(vmx_pages, l2_guest_code,
+		     &l2_guest_stack[L2_GUEST_STACK_SIZE]);
+
+	/*
+	 * L2 must be run without unrestricted guest, verify that the selftests
+	 * library hasn't enabled it.  Because KVM selftests jump directly to
+	 * 64-bit mode, unrestricted guest support isn't required.
+	 */
+	GUEST_ASSERT(!(vmreadz(CPU_BASED_VM_EXEC_CONTROL) & CPU_BASED_ACTIVATE_SECONDARY_CONTROLS) ||
+		     !(vmreadz(SECONDARY_VM_EXEC_CONTROL) & SECONDARY_EXEC_UNRESTRICTED_GUEST));
+
+	GUEST_ASSERT(!vmlaunch());
+
+	/* L2 should triple fault after main() stuffs invalid guest state. */
+	GUEST_ASSERT(vmreadz(VM_EXIT_REASON) == EXIT_REASON_TRIPLE_FAULT);
+	GUEST_DONE();
+}
+
+int main(int argc, char *argv[])
+{
+	vm_vaddr_t vmx_pages_gva;
+	struct kvm_sregs sregs;
+	struct kvm_vcpu *vcpu;
+	struct kvm_run *run;
+	struct ucall uc;
+
+	TEST_REQUIRE(kvm_cpu_has(X86_FEATURE_VMX));
+
+	vm = vm_create_with_one_vcpu(&vcpu, l1_guest_code);
+
+	/* Allocate VMX pages and shared descriptors (vmx_pages). */
+	vcpu_alloc_vmx(vm, &vmx_pages_gva);
+	vcpu_args_set(vcpu, 1, vmx_pages_gva);
+
+	vcpu_run(vcpu);
+
+	run = vcpu->run;
+
+	/*
+	 * The first exit to L0 userspace should be an I/O access from L2.
+	 * Running L1 should launch L2 without triggering an exit to userspace.
+	 */
+	TEST_ASSERT(run->exit_reason == KVM_EXIT_IO,
+		    "Expected KVM_EXIT_IO, got: %u (%s)\n",
+		    run->exit_reason, exit_reason_str(run->exit_reason));
+
+	TEST_ASSERT(run->io.port == ARBITRARY_IO_PORT,
+		    "Expected IN from port %d from L2, got port %d",
+		    ARBITRARY_IO_PORT, run->io.port);
+
+	/*
+	 * Stuff invalid guest state for L2 by making TR unusuable.  The next
+	 * KVM_RUN should induce a TRIPLE_FAULT in L2 as KVM doesn't support
+	 * emulating invalid guest state for L2.
+	 */
+	memset(&sregs, 0, sizeof(sregs));
+	vcpu_sregs_get(vcpu, &sregs);
+	sregs.tr.unusable = 1;
+	vcpu_sregs_set(vcpu, &sregs);
+
+	vcpu_run(vcpu);
+
+	switch (get_ucall(vcpu, &uc)) {
+	case UCALL_DONE:
+		break;
+	case UCALL_ABORT:
+		REPORT_GUEST_ASSERT(uc);
+	default:
+		TEST_FAIL("Unexpected ucall: %lu", uc.cmd);
+	}
+}
diff --git a/tools/testing/selftests/kvm/x86_64/vmx_msrs_test.c b/tools/testing/selftests/kvm/x86_64/vmx_msrs_test.c
new file mode 100644
index 000000000000..322d561b4260
--- /dev/null
+++ b/tools/testing/selftests/kvm/x86_64/vmx_msrs_test.c
@@ -0,0 +1,84 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * VMX control MSR test
+ *
+ * Copyright (C) 2022 Google LLC.
+ *
+ * Tests for KVM ownership of bits in the VMX entry/exit control MSRs. Checks
+ * that KVM will set owned bits where appropriate, and will not if
+ * KVM_X86_QUIRK_TWEAK_VMX_CTRL_MSRS is disabled.
+ */
+#include <linux/bitmap.h>
+#include "kvm_util.h"
+#include "vmx.h"
+
+static void vmx_fixed1_msr_test(struct kvm_vcpu *vcpu, uint32_t msr_index,
+				  uint64_t mask)
+{
+	uint64_t val = vcpu_get_msr(vcpu, msr_index);
+	uint64_t bit;
+
+	mask &= val;
+
+	for_each_set_bit(bit, &mask, 64) {
+		vcpu_set_msr(vcpu, msr_index, val & ~BIT_ULL(bit));
+		vcpu_set_msr(vcpu, msr_index, val);
+	}
+}
+
+static void vmx_fixed0_msr_test(struct kvm_vcpu *vcpu, uint32_t msr_index,
+				uint64_t mask)
+{
+	uint64_t val = vcpu_get_msr(vcpu, msr_index);
+	uint64_t bit;
+
+	mask = ~mask | val;
+
+	for_each_clear_bit(bit, &mask, 64) {
+		vcpu_set_msr(vcpu, msr_index, val | BIT_ULL(bit));
+		vcpu_set_msr(vcpu, msr_index, val);
+	}
+}
+
+static void vmx_fixed0and1_msr_test(struct kvm_vcpu *vcpu, uint32_t msr_index)
+{
+	vmx_fixed0_msr_test(vcpu, msr_index, GENMASK_ULL(31, 0));
+	vmx_fixed1_msr_test(vcpu, msr_index, GENMASK_ULL(63, 32));
+}
+
+static void vmx_save_restore_msrs_test(struct kvm_vcpu *vcpu)
+{
+	vcpu_set_msr(vcpu, MSR_IA32_VMX_VMCS_ENUM, 0);
+	vcpu_set_msr(vcpu, MSR_IA32_VMX_VMCS_ENUM, -1ull);
+
+	vmx_fixed1_msr_test(vcpu, MSR_IA32_VMX_BASIC,
+			    BIT_ULL(49) | BIT_ULL(54) | BIT_ULL(55));
+
+	vmx_fixed1_msr_test(vcpu, MSR_IA32_VMX_MISC,
+			    BIT_ULL(5) | GENMASK_ULL(8, 6) | BIT_ULL(14) |
+			    BIT_ULL(15) | BIT_ULL(28) | BIT_ULL(29) | BIT_ULL(30));
+
+	vmx_fixed0and1_msr_test(vcpu, MSR_IA32_VMX_PROCBASED_CTLS2);
+	vmx_fixed1_msr_test(vcpu, MSR_IA32_VMX_EPT_VPID_CAP, -1ull);
+	vmx_fixed0and1_msr_test(vcpu, MSR_IA32_VMX_TRUE_PINBASED_CTLS);
+	vmx_fixed0and1_msr_test(vcpu, MSR_IA32_VMX_TRUE_PROCBASED_CTLS);
+	vmx_fixed0and1_msr_test(vcpu, MSR_IA32_VMX_TRUE_EXIT_CTLS);
+	vmx_fixed0and1_msr_test(vcpu, MSR_IA32_VMX_TRUE_ENTRY_CTLS);
+	vmx_fixed1_msr_test(vcpu, MSR_IA32_VMX_VMFUNC, -1ull);
+}
+
+int main(void)
+{
+	struct kvm_vcpu *vcpu;
+	struct kvm_vm *vm;
+
+	TEST_REQUIRE(kvm_has_cap(KVM_CAP_DISABLE_QUIRKS2));
+	TEST_REQUIRE(kvm_cpu_has(X86_FEATURE_VMX));
+
+	/* No need to actually do KVM_RUN, thus no guest code. */
+	vm = vm_create_with_one_vcpu(&vcpu, NULL);
+
+	vmx_save_restore_msrs_test(vcpu);
+
+	kvm_vm_free(vm);
+}
diff --git a/tools/testing/selftests/kvm/x86_64/vmx_nested_tsc_scaling_test.c b/tools/testing/selftests/kvm/x86_64/vmx_nested_tsc_scaling_test.c
new file mode 100644
index 000000000000..465a9434d61c
--- /dev/null
+++ b/tools/testing/selftests/kvm/x86_64/vmx_nested_tsc_scaling_test.c
@@ -0,0 +1,231 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * vmx_nested_tsc_scaling_test
+ *
+ * Copyright 2021 Amazon.com, Inc. or its affiliates. All Rights Reserved.
+ *
+ * This test case verifies that nested TSC scaling behaves as expected when
+ * both L1 and L2 are scaled using different ratios. For this test we scale
+ * L1 down and scale L2 up.
+ */
+
+#include <time.h>
+
+#include "kvm_util.h"
+#include "vmx.h"
+#include "kselftest.h"
+
+/* L2 is scaled up (from L1's perspective) by this factor */
+#define L2_SCALE_FACTOR 4ULL
+
+#define TSC_OFFSET_L2 ((uint64_t) -33125236320908)
+#define TSC_MULTIPLIER_L2 (L2_SCALE_FACTOR << 48)
+
+#define L2_GUEST_STACK_SIZE 64
+
+enum { USLEEP, UCHECK_L1, UCHECK_L2 };
+#define GUEST_SLEEP(sec)         ucall(UCALL_SYNC, 2, USLEEP, sec)
+#define GUEST_CHECK(level, freq) ucall(UCALL_SYNC, 2, level, freq)
+
+
+/*
+ * This function checks whether the "actual" TSC frequency of a guest matches
+ * its expected frequency. In order to account for delays in taking the TSC
+ * measurements, a difference of 1% between the actual and the expected value
+ * is tolerated.
+ */
+static void compare_tsc_freq(uint64_t actual, uint64_t expected)
+{
+	uint64_t tolerance, thresh_low, thresh_high;
+
+	tolerance = expected / 100;
+	thresh_low = expected - tolerance;
+	thresh_high = expected + tolerance;
+
+	TEST_ASSERT(thresh_low < actual,
+		"TSC freq is expected to be between %"PRIu64" and %"PRIu64
+		" but it actually is %"PRIu64,
+		thresh_low, thresh_high, actual);
+	TEST_ASSERT(thresh_high > actual,
+		"TSC freq is expected to be between %"PRIu64" and %"PRIu64
+		" but it actually is %"PRIu64,
+		thresh_low, thresh_high, actual);
+}
+
+static void check_tsc_freq(int level)
+{
+	uint64_t tsc_start, tsc_end, tsc_freq;
+
+	/*
+	 * Reading the TSC twice with about a second's difference should give
+	 * us an approximation of the TSC frequency from the guest's
+	 * perspective. Now, this won't be completely accurate, but it should
+	 * be good enough for the purposes of this test.
+	 */
+	tsc_start = rdmsr(MSR_IA32_TSC);
+	GUEST_SLEEP(1);
+	tsc_end = rdmsr(MSR_IA32_TSC);
+
+	tsc_freq = tsc_end - tsc_start;
+
+	GUEST_CHECK(level, tsc_freq);
+}
+
+static void l2_guest_code(void)
+{
+	check_tsc_freq(UCHECK_L2);
+
+	/* exit to L1 */
+	__asm__ __volatile__("vmcall");
+}
+
+static void l1_guest_code(struct vmx_pages *vmx_pages)
+{
+	unsigned long l2_guest_stack[L2_GUEST_STACK_SIZE];
+	uint32_t control;
+
+	/* check that L1's frequency looks alright before launching L2 */
+	check_tsc_freq(UCHECK_L1);
+
+	GUEST_ASSERT(prepare_for_vmx_operation(vmx_pages));
+	GUEST_ASSERT(load_vmcs(vmx_pages));
+
+	/* prepare the VMCS for L2 execution */
+	prepare_vmcs(vmx_pages, l2_guest_code, &l2_guest_stack[L2_GUEST_STACK_SIZE]);
+
+	/* enable TSC offsetting and TSC scaling for L2 */
+	control = vmreadz(CPU_BASED_VM_EXEC_CONTROL);
+	control |= CPU_BASED_USE_MSR_BITMAPS | CPU_BASED_USE_TSC_OFFSETTING;
+	vmwrite(CPU_BASED_VM_EXEC_CONTROL, control);
+
+	control = vmreadz(SECONDARY_VM_EXEC_CONTROL);
+	control |= SECONDARY_EXEC_TSC_SCALING;
+	vmwrite(SECONDARY_VM_EXEC_CONTROL, control);
+
+	vmwrite(TSC_OFFSET, TSC_OFFSET_L2);
+	vmwrite(TSC_MULTIPLIER, TSC_MULTIPLIER_L2);
+	vmwrite(TSC_MULTIPLIER_HIGH, TSC_MULTIPLIER_L2 >> 32);
+
+	/* launch L2 */
+	GUEST_ASSERT(!vmlaunch());
+	GUEST_ASSERT(vmreadz(VM_EXIT_REASON) == EXIT_REASON_VMCALL);
+
+	/* check that L1's frequency still looks good */
+	check_tsc_freq(UCHECK_L1);
+
+	GUEST_DONE();
+}
+
+static void stable_tsc_check_supported(void)
+{
+	FILE *fp;
+	char buf[4];
+
+	fp = fopen("/sys/devices/system/clocksource/clocksource0/current_clocksource", "r");
+	if (fp == NULL)
+		goto skip_test;
+
+	if (fgets(buf, sizeof(buf), fp) == NULL)
+		goto skip_test;
+
+	if (strncmp(buf, "tsc", sizeof(buf)))
+		goto skip_test;
+
+	return;
+skip_test:
+	print_skip("Kernel does not use TSC clocksource - assuming that host TSC is not stable");
+	exit(KSFT_SKIP);
+}
+
+int main(int argc, char *argv[])
+{
+	struct kvm_vcpu *vcpu;
+	struct kvm_vm *vm;
+	vm_vaddr_t vmx_pages_gva;
+
+	uint64_t tsc_start, tsc_end;
+	uint64_t tsc_khz;
+	uint64_t l1_scale_factor;
+	uint64_t l0_tsc_freq = 0;
+	uint64_t l1_tsc_freq = 0;
+	uint64_t l2_tsc_freq = 0;
+
+	TEST_REQUIRE(kvm_cpu_has(X86_FEATURE_VMX));
+	TEST_REQUIRE(kvm_has_cap(KVM_CAP_TSC_CONTROL));
+	stable_tsc_check_supported();
+
+	/*
+	 * We set L1's scale factor to be a random number from 2 to 10.
+	 * Ideally we would do the same for L2's factor but that one is
+	 * referenced by both main() and l1_guest_code() and using a global
+	 * variable does not work.
+	 */
+	srand(time(NULL));
+	l1_scale_factor = (rand() % 9) + 2;
+	printf("L1's scale down factor is: %"PRIu64"\n", l1_scale_factor);
+	printf("L2's scale up factor is: %llu\n", L2_SCALE_FACTOR);
+
+	tsc_start = rdtsc();
+	sleep(1);
+	tsc_end = rdtsc();
+
+	l0_tsc_freq = tsc_end - tsc_start;
+	printf("real TSC frequency is around: %"PRIu64"\n", l0_tsc_freq);
+
+	vm = vm_create_with_one_vcpu(&vcpu, l1_guest_code);
+	vcpu_alloc_vmx(vm, &vmx_pages_gva);
+	vcpu_args_set(vcpu, 1, vmx_pages_gva);
+
+	tsc_khz = __vcpu_ioctl(vcpu, KVM_GET_TSC_KHZ, NULL);
+	TEST_ASSERT(tsc_khz != -1, "vcpu ioctl KVM_GET_TSC_KHZ failed");
+
+	/* scale down L1's TSC frequency */
+	vcpu_ioctl(vcpu, KVM_SET_TSC_KHZ, (void *) (tsc_khz / l1_scale_factor));
+
+	for (;;) {
+		volatile struct kvm_run *run = vcpu->run;
+		struct ucall uc;
+
+		vcpu_run(vcpu);
+		TEST_ASSERT(run->exit_reason == KVM_EXIT_IO,
+			    "Got exit_reason other than KVM_EXIT_IO: %u (%s)\n",
+			    run->exit_reason,
+			    exit_reason_str(run->exit_reason));
+
+		switch (get_ucall(vcpu, &uc)) {
+		case UCALL_ABORT:
+			REPORT_GUEST_ASSERT(uc);
+		case UCALL_SYNC:
+			switch (uc.args[0]) {
+			case USLEEP:
+				sleep(uc.args[1]);
+				break;
+			case UCHECK_L1:
+				l1_tsc_freq = uc.args[1];
+				printf("L1's TSC frequency is around: %"PRIu64
+				       "\n", l1_tsc_freq);
+
+				compare_tsc_freq(l1_tsc_freq,
+						 l0_tsc_freq / l1_scale_factor);
+				break;
+			case UCHECK_L2:
+				l2_tsc_freq = uc.args[1];
+				printf("L2's TSC frequency is around: %"PRIu64
+				       "\n", l2_tsc_freq);
+
+				compare_tsc_freq(l2_tsc_freq,
+						 l1_tsc_freq * L2_SCALE_FACTOR);
+				break;
+			}
+			break;
+		case UCALL_DONE:
+			goto done;
+		default:
+			TEST_FAIL("Unknown ucall %lu", uc.cmd);
+		}
+	}
+
+done:
+	kvm_vm_free(vm);
+	return 0;
+}
diff --git a/tools/testing/selftests/kvm/x86_64/vmx_pmu_caps_test.c b/tools/testing/selftests/kvm/x86_64/vmx_pmu_caps_test.c
new file mode 100644
index 000000000000..069589c52f41
--- /dev/null
+++ b/tools/testing/selftests/kvm/x86_64/vmx_pmu_caps_test.c
@@ -0,0 +1,109 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Test for VMX-pmu perf capability msr
+ *
+ * Copyright (C) 2021 Intel Corporation
+ *
+ * Test to check the effect of various CPUID settings on
+ * MSR_IA32_PERF_CAPABILITIES MSR, and check that what
+ * we write with KVM_SET_MSR is _not_ modified by the guest
+ * and check it can be retrieved with KVM_GET_MSR, also test
+ * the invalid LBR formats are rejected.
+ */
+
+#define _GNU_SOURCE /* for program_invocation_short_name */
+#include <sys/ioctl.h>
+
+#include "kvm_util.h"
+#include "vmx.h"
+
+#define PMU_CAP_FW_WRITES	(1ULL << 13)
+#define PMU_CAP_LBR_FMT		0x3f
+
+union cpuid10_eax {
+	struct {
+		unsigned int version_id:8;
+		unsigned int num_counters:8;
+		unsigned int bit_width:8;
+		unsigned int mask_length:8;
+	} split;
+	unsigned int full;
+};
+
+union perf_capabilities {
+	struct {
+		u64	lbr_format:6;
+		u64	pebs_trap:1;
+		u64	pebs_arch_reg:1;
+		u64	pebs_format:4;
+		u64	smm_freeze:1;
+		u64	full_width_write:1;
+		u64 pebs_baseline:1;
+		u64	perf_metrics:1;
+		u64	pebs_output_pt_available:1;
+		u64	anythread_deprecated:1;
+	};
+	u64	capabilities;
+};
+
+static void guest_code(void)
+{
+	wrmsr(MSR_IA32_PERF_CAPABILITIES, PMU_CAP_LBR_FMT);
+}
+
+int main(int argc, char *argv[])
+{
+	const struct kvm_cpuid_entry2 *entry_a_0;
+	struct kvm_vm *vm;
+	struct kvm_vcpu *vcpu;
+	int ret;
+	union cpuid10_eax eax;
+	union perf_capabilities host_cap;
+	uint64_t val;
+
+	host_cap.capabilities = kvm_get_feature_msr(MSR_IA32_PERF_CAPABILITIES);
+	host_cap.capabilities &= (PMU_CAP_FW_WRITES | PMU_CAP_LBR_FMT);
+
+	/* Create VM */
+	vm = vm_create_with_one_vcpu(&vcpu, guest_code);
+
+	TEST_REQUIRE(kvm_cpu_has(X86_FEATURE_PDCM));
+
+	TEST_REQUIRE(kvm_get_cpuid_max_basic() >= 0xa);
+	entry_a_0 = kvm_get_supported_cpuid_entry(0xa);
+
+	eax.full = entry_a_0->eax;
+	__TEST_REQUIRE(eax.split.version_id, "PMU is not supported by the vCPU");
+
+	/* testcase 1, set capabilities when we have PDCM bit */
+	vcpu_set_msr(vcpu, MSR_IA32_PERF_CAPABILITIES, PMU_CAP_FW_WRITES);
+
+	/* check capabilities can be retrieved with KVM_GET_MSR */
+	ASSERT_EQ(vcpu_get_msr(vcpu, MSR_IA32_PERF_CAPABILITIES), PMU_CAP_FW_WRITES);
+
+	/* check whatever we write with KVM_SET_MSR is _not_ modified */
+	vcpu_run(vcpu);
+	ASSERT_EQ(vcpu_get_msr(vcpu, MSR_IA32_PERF_CAPABILITIES), PMU_CAP_FW_WRITES);
+
+	/* testcase 2, check valid LBR formats are accepted */
+	vcpu_set_msr(vcpu, MSR_IA32_PERF_CAPABILITIES, 0);
+	ASSERT_EQ(vcpu_get_msr(vcpu, MSR_IA32_PERF_CAPABILITIES), 0);
+
+	vcpu_set_msr(vcpu, MSR_IA32_PERF_CAPABILITIES, host_cap.lbr_format);
+	ASSERT_EQ(vcpu_get_msr(vcpu, MSR_IA32_PERF_CAPABILITIES), (u64)host_cap.lbr_format);
+
+	/*
+	 * Testcase 3, check that an "invalid" LBR format is rejected.  Only an
+	 * exact match of the host's format (and 0/disabled) is allowed.
+	 */
+	for (val = 1; val <= PMU_CAP_LBR_FMT; val++) {
+		if (val == (host_cap.capabilities & PMU_CAP_LBR_FMT))
+			continue;
+
+		ret = _vcpu_set_msr(vcpu, MSR_IA32_PERF_CAPABILITIES, val);
+		TEST_ASSERT(!ret, "Bad LBR FMT = 0x%lx didn't fail", val);
+	}
+
+	printf("Completed perf capability tests.\n");
+	kvm_vm_free(vm);
+}
diff --git a/tools/testing/selftests/kvm/x86_64/vmx_preemption_timer_test.c b/tools/testing/selftests/kvm/x86_64/vmx_preemption_timer_test.c
new file mode 100644
index 000000000000..0efdc05969a5
--- /dev/null
+++ b/tools/testing/selftests/kvm/x86_64/vmx_preemption_timer_test.c
@@ -0,0 +1,252 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * VMX-preemption timer test
+ *
+ * Copyright (C) 2020, Google, LLC.
+ *
+ * Test to ensure the VM-Enter after migration doesn't
+ * incorrectly restarts the timer with the full timer
+ * value instead of partially decayed timer value
+ *
+ */
+#define _GNU_SOURCE /* for program_invocation_short_name */
+#include <fcntl.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <sys/ioctl.h>
+
+#include "test_util.h"
+
+#include "kvm_util.h"
+#include "processor.h"
+#include "vmx.h"
+
+#define PREEMPTION_TIMER_VALUE			100000000ull
+#define PREEMPTION_TIMER_VALUE_THRESHOLD1	 80000000ull
+
+u32 vmx_pt_rate;
+bool l2_save_restore_done;
+static u64 l2_vmx_pt_start;
+volatile u64 l2_vmx_pt_finish;
+
+union vmx_basic basic;
+union vmx_ctrl_msr ctrl_pin_rev;
+union vmx_ctrl_msr ctrl_exit_rev;
+
+void l2_guest_code(void)
+{
+	u64 vmx_pt_delta;
+
+	vmcall();
+	l2_vmx_pt_start = (rdtsc() >> vmx_pt_rate) << vmx_pt_rate;
+
+	/*
+	 * Wait until the 1st threshold has passed
+	 */
+	do {
+		l2_vmx_pt_finish = rdtsc();
+		vmx_pt_delta = (l2_vmx_pt_finish - l2_vmx_pt_start) >>
+				vmx_pt_rate;
+	} while (vmx_pt_delta < PREEMPTION_TIMER_VALUE_THRESHOLD1);
+
+	/*
+	 * Force L2 through Save and Restore cycle
+	 */
+	GUEST_SYNC(1);
+
+	l2_save_restore_done = 1;
+
+	/*
+	 * Now wait for the preemption timer to fire and
+	 * exit to L1
+	 */
+	while ((l2_vmx_pt_finish = rdtsc()))
+		;
+}
+
+void l1_guest_code(struct vmx_pages *vmx_pages)
+{
+#define L2_GUEST_STACK_SIZE 64
+	unsigned long l2_guest_stack[L2_GUEST_STACK_SIZE];
+	u64 l1_vmx_pt_start;
+	u64 l1_vmx_pt_finish;
+	u64 l1_tsc_deadline, l2_tsc_deadline;
+
+	GUEST_ASSERT(vmx_pages->vmcs_gpa);
+	GUEST_ASSERT(prepare_for_vmx_operation(vmx_pages));
+	GUEST_ASSERT(load_vmcs(vmx_pages));
+	GUEST_ASSERT(vmptrstz() == vmx_pages->vmcs_gpa);
+
+	prepare_vmcs(vmx_pages, l2_guest_code,
+		     &l2_guest_stack[L2_GUEST_STACK_SIZE]);
+
+	/*
+	 * Check for Preemption timer support
+	 */
+	basic.val = rdmsr(MSR_IA32_VMX_BASIC);
+	ctrl_pin_rev.val = rdmsr(basic.ctrl ? MSR_IA32_VMX_TRUE_PINBASED_CTLS
+			: MSR_IA32_VMX_PINBASED_CTLS);
+	ctrl_exit_rev.val = rdmsr(basic.ctrl ? MSR_IA32_VMX_TRUE_EXIT_CTLS
+			: MSR_IA32_VMX_EXIT_CTLS);
+
+	if (!(ctrl_pin_rev.clr & PIN_BASED_VMX_PREEMPTION_TIMER) ||
+	    !(ctrl_exit_rev.clr & VM_EXIT_SAVE_VMX_PREEMPTION_TIMER))
+		return;
+
+	GUEST_ASSERT(!vmlaunch());
+	GUEST_ASSERT(vmreadz(VM_EXIT_REASON) == EXIT_REASON_VMCALL);
+	vmwrite(GUEST_RIP, vmreadz(GUEST_RIP) + vmreadz(VM_EXIT_INSTRUCTION_LEN));
+
+	/*
+	 * Turn on PIN control and resume the guest
+	 */
+	GUEST_ASSERT(!vmwrite(PIN_BASED_VM_EXEC_CONTROL,
+			      vmreadz(PIN_BASED_VM_EXEC_CONTROL) |
+			      PIN_BASED_VMX_PREEMPTION_TIMER));
+
+	GUEST_ASSERT(!vmwrite(VMX_PREEMPTION_TIMER_VALUE,
+			      PREEMPTION_TIMER_VALUE));
+
+	vmx_pt_rate = rdmsr(MSR_IA32_VMX_MISC) & 0x1F;
+
+	l2_save_restore_done = 0;
+
+	l1_vmx_pt_start = (rdtsc() >> vmx_pt_rate) << vmx_pt_rate;
+
+	GUEST_ASSERT(!vmresume());
+
+	l1_vmx_pt_finish = rdtsc();
+
+	/*
+	 * Ensure exit from L2 happens after L2 goes through
+	 * save and restore
+	 */
+	GUEST_ASSERT(l2_save_restore_done);
+
+	/*
+	 * Ensure the exit from L2 is due to preemption timer expiry
+	 */
+	GUEST_ASSERT(vmreadz(VM_EXIT_REASON) == EXIT_REASON_PREEMPTION_TIMER);
+
+	l1_tsc_deadline = l1_vmx_pt_start +
+		(PREEMPTION_TIMER_VALUE << vmx_pt_rate);
+
+	l2_tsc_deadline = l2_vmx_pt_start +
+		(PREEMPTION_TIMER_VALUE << vmx_pt_rate);
+
+	/*
+	 * Sync with the host and pass the l1|l2 pt_expiry_finish times and
+	 * tsc deadlines so that host can verify they are as expected
+	 */
+	GUEST_SYNC_ARGS(2, l1_vmx_pt_finish, l1_tsc_deadline,
+		l2_vmx_pt_finish, l2_tsc_deadline);
+}
+
+void guest_code(struct vmx_pages *vmx_pages)
+{
+	if (vmx_pages)
+		l1_guest_code(vmx_pages);
+
+	GUEST_DONE();
+}
+
+int main(int argc, char *argv[])
+{
+	vm_vaddr_t vmx_pages_gva = 0;
+
+	struct kvm_regs regs1, regs2;
+	struct kvm_vm *vm;
+	struct kvm_run *run;
+	struct kvm_vcpu *vcpu;
+	struct kvm_x86_state *state;
+	struct ucall uc;
+	int stage;
+
+	/*
+	 * AMD currently does not implement any VMX features, so for now we
+	 * just early out.
+	 */
+	TEST_REQUIRE(kvm_cpu_has(X86_FEATURE_VMX));
+
+	TEST_REQUIRE(kvm_has_cap(KVM_CAP_NESTED_STATE));
+
+	/* Create VM */
+	vm = vm_create_with_one_vcpu(&vcpu, guest_code);
+	run = vcpu->run;
+
+	vcpu_regs_get(vcpu, &regs1);
+
+	vcpu_alloc_vmx(vm, &vmx_pages_gva);
+	vcpu_args_set(vcpu, 1, vmx_pages_gva);
+
+	for (stage = 1;; stage++) {
+		vcpu_run(vcpu);
+		TEST_ASSERT(run->exit_reason == KVM_EXIT_IO,
+			    "Stage %d: unexpected exit reason: %u (%s),\n",
+			    stage, run->exit_reason,
+			    exit_reason_str(run->exit_reason));
+
+		switch (get_ucall(vcpu, &uc)) {
+		case UCALL_ABORT:
+			REPORT_GUEST_ASSERT(uc);
+			/* NOT REACHED */
+		case UCALL_SYNC:
+			break;
+		case UCALL_DONE:
+			goto done;
+		default:
+			TEST_FAIL("Unknown ucall %lu", uc.cmd);
+		}
+
+		/* UCALL_SYNC is handled here.  */
+		TEST_ASSERT(!strcmp((const char *)uc.args[0], "hello") &&
+			    uc.args[1] == stage, "Stage %d: Unexpected register values vmexit, got %lx",
+			    stage, (ulong)uc.args[1]);
+		/*
+		 * If this stage 2 then we should verify the vmx pt expiry
+		 * is as expected.
+		 * From L1's perspective verify Preemption timer hasn't
+		 * expired too early.
+		 * From L2's perspective verify Preemption timer hasn't
+		 * expired too late.
+		 */
+		if (stage == 2) {
+
+			pr_info("Stage %d: L1 PT expiry TSC (%lu) , L1 TSC deadline (%lu)\n",
+				stage, uc.args[2], uc.args[3]);
+
+			pr_info("Stage %d: L2 PT expiry TSC (%lu) , L2 TSC deadline (%lu)\n",
+				stage, uc.args[4], uc.args[5]);
+
+			TEST_ASSERT(uc.args[2] >= uc.args[3],
+				"Stage %d: L1 PT expiry TSC (%lu) < L1 TSC deadline (%lu)",
+				stage, uc.args[2], uc.args[3]);
+
+			TEST_ASSERT(uc.args[4] < uc.args[5],
+				"Stage %d: L2 PT expiry TSC (%lu) > L2 TSC deadline (%lu)",
+				stage, uc.args[4], uc.args[5]);
+		}
+
+		state = vcpu_save_state(vcpu);
+		memset(&regs1, 0, sizeof(regs1));
+		vcpu_regs_get(vcpu, &regs1);
+
+		kvm_vm_release(vm);
+
+		/* Restore state in a new VM.  */
+		vcpu = vm_recreate_with_one_vcpu(vm);
+		vcpu_load_state(vcpu, state);
+		run = vcpu->run;
+		kvm_x86_state_cleanup(state);
+
+		memset(&regs2, 0, sizeof(regs2));
+		vcpu_regs_get(vcpu, &regs2);
+		TEST_ASSERT(!memcmp(&regs1, &regs2, sizeof(regs2)),
+			    "Unexpected register values after vcpu_load_state; rdi: %lx rsi: %lx",
+			    (ulong) regs2.rdi, (ulong) regs2.rsi);
+	}
+
+done:
+	kvm_vm_free(vm);
+}
diff --git a/tools/testing/selftests/kvm/x86_64/vmx_set_nested_state_test.c b/tools/testing/selftests/kvm/x86_64/vmx_set_nested_state_test.c
index 9ef7fab39d48..41ea7028a1f8 100644
--- a/tools/testing/selftests/kvm/x86_64/vmx_set_nested_state_test.c
+++ b/tools/testing/selftests/kvm/x86_64/vmx_set_nested_state_test.c
@@ -23,38 +23,37 @@
  * changes this should be updated.
  */
 #define VMCS12_REVISION 0x11e57ed0
-#define VCPU_ID 5
 
 bool have_evmcs;
 
-void test_nested_state(struct kvm_vm *vm, struct kvm_nested_state *state)
+void test_nested_state(struct kvm_vcpu *vcpu, struct kvm_nested_state *state)
 {
-	vcpu_nested_state_set(vm, VCPU_ID, state, false);
+	vcpu_nested_state_set(vcpu, state);
 }
 
-void test_nested_state_expect_errno(struct kvm_vm *vm,
+void test_nested_state_expect_errno(struct kvm_vcpu *vcpu,
 				    struct kvm_nested_state *state,
 				    int expected_errno)
 {
 	int rv;
 
-	rv = vcpu_nested_state_set(vm, VCPU_ID, state, true);
+	rv = __vcpu_nested_state_set(vcpu, state);
 	TEST_ASSERT(rv == -1 && errno == expected_errno,
 		"Expected %s (%d) from vcpu_nested_state_set but got rv: %i errno: %s (%d)",
 		strerror(expected_errno), expected_errno, rv, strerror(errno),
 		errno);
 }
 
-void test_nested_state_expect_einval(struct kvm_vm *vm,
+void test_nested_state_expect_einval(struct kvm_vcpu *vcpu,
 				     struct kvm_nested_state *state)
 {
-	test_nested_state_expect_errno(vm, state, EINVAL);
+	test_nested_state_expect_errno(vcpu, state, EINVAL);
 }
 
-void test_nested_state_expect_efault(struct kvm_vm *vm,
+void test_nested_state_expect_efault(struct kvm_vcpu *vcpu,
 				     struct kvm_nested_state *state)
 {
-	test_nested_state_expect_errno(vm, state, EFAULT);
+	test_nested_state_expect_errno(vcpu, state, EFAULT);
 }
 
 void set_revision_id_for_vmcs12(struct kvm_nested_state *state,
@@ -76,10 +75,8 @@ void set_default_state(struct kvm_nested_state *state)
 void set_default_vmx_state(struct kvm_nested_state *state, int size)
 {
 	memset(state, 0, size);
-	state->flags = KVM_STATE_NESTED_GUEST_MODE  |
-			KVM_STATE_NESTED_RUN_PENDING;
 	if (have_evmcs)
-		state->flags |= KVM_STATE_NESTED_EVMCS;
+		state->flags = KVM_STATE_NESTED_EVMCS;
 	state->format = 0;
 	state->size = size;
 	state->hdr.vmx.vmxon_pa = 0x1000;
@@ -88,7 +85,7 @@ void set_default_vmx_state(struct kvm_nested_state *state, int size)
 	set_revision_id_for_vmcs12(state, VMCS12_REVISION);
 }
 
-void test_vmx_nested_state(struct kvm_vm *vm)
+void test_vmx_nested_state(struct kvm_vcpu *vcpu)
 {
 	/* Add a page for VMCS12. */
 	const int state_sz = sizeof(struct kvm_nested_state) + getpagesize();
@@ -98,14 +95,14 @@ void test_vmx_nested_state(struct kvm_vm *vm)
 	/* The format must be set to 0. 0 for VMX, 1 for SVM. */
 	set_default_vmx_state(state, state_sz);
 	state->format = 1;
-	test_nested_state_expect_einval(vm, state);
+	test_nested_state_expect_einval(vcpu, state);
 
 	/*
 	 * We cannot virtualize anything if the guest does not have VMX
 	 * enabled.
 	 */
 	set_default_vmx_state(state, state_sz);
-	test_nested_state_expect_einval(vm, state);
+	test_nested_state_expect_einval(vcpu, state);
 
 	/*
 	 * We cannot virtualize anything if the guest does not have VMX
@@ -114,17 +111,17 @@ void test_vmx_nested_state(struct kvm_vm *vm)
 	 */
 	set_default_vmx_state(state, state_sz);
 	state->hdr.vmx.vmxon_pa = -1ull;
-	test_nested_state_expect_einval(vm, state);
+	test_nested_state_expect_einval(vcpu, state);
 
 	state->hdr.vmx.vmcs12_pa = -1ull;
 	state->flags = KVM_STATE_NESTED_EVMCS;
-	test_nested_state_expect_einval(vm, state);
+	test_nested_state_expect_einval(vcpu, state);
 
 	state->flags = 0;
-	test_nested_state(vm, state);
+	test_nested_state(vcpu, state);
 
 	/* Enable VMX in the guest CPUID. */
-	vcpu_set_cpuid(vm, VCPU_ID, kvm_get_supported_cpuid());
+	vcpu_set_cpuid_feature(vcpu, X86_FEATURE_VMX);
 
 	/*
 	 * Setting vmxon_pa == -1ull and vmcs_pa == -1ull exits early without
@@ -135,29 +132,34 @@ void test_vmx_nested_state(struct kvm_vm *vm)
 	set_default_vmx_state(state, state_sz);
 	state->hdr.vmx.vmxon_pa = -1ull;
 	state->hdr.vmx.vmcs12_pa = -1ull;
-	test_nested_state_expect_einval(vm, state);
+	test_nested_state_expect_einval(vcpu, state);
 
 	state->flags &= KVM_STATE_NESTED_EVMCS;
 	if (have_evmcs) {
-		test_nested_state_expect_einval(vm, state);
-		vcpu_enable_evmcs(vm, VCPU_ID);
+		test_nested_state_expect_einval(vcpu, state);
+		vcpu_enable_evmcs(vcpu);
 	}
-	test_nested_state(vm, state);
+	test_nested_state(vcpu, state);
 
 	/* It is invalid to have vmxon_pa == -1ull and SMM flags non-zero. */
 	state->hdr.vmx.smm.flags = 1;
-	test_nested_state_expect_einval(vm, state);
+	test_nested_state_expect_einval(vcpu, state);
+
+	/* Invalid flags are rejected. */
+	set_default_vmx_state(state, state_sz);
+	state->hdr.vmx.flags = ~0;
+	test_nested_state_expect_einval(vcpu, state);
 
 	/* It is invalid to have vmxon_pa == -1ull and vmcs_pa != -1ull. */
 	set_default_vmx_state(state, state_sz);
 	state->hdr.vmx.vmxon_pa = -1ull;
 	state->flags = 0;
-	test_nested_state_expect_einval(vm, state);
+	test_nested_state_expect_einval(vcpu, state);
 
 	/* It is invalid to have vmxon_pa set to a non-page aligned address. */
 	set_default_vmx_state(state, state_sz);
 	state->hdr.vmx.vmxon_pa = 1;
-	test_nested_state_expect_einval(vm, state);
+	test_nested_state_expect_einval(vcpu, state);
 
 	/*
 	 * It is invalid to have KVM_STATE_NESTED_SMM_GUEST_MODE and
@@ -167,7 +169,7 @@ void test_vmx_nested_state(struct kvm_vm *vm)
 	state->flags = KVM_STATE_NESTED_GUEST_MODE  |
 		      KVM_STATE_NESTED_RUN_PENDING;
 	state->hdr.vmx.smm.flags = KVM_STATE_NESTED_SMM_GUEST_MODE;
-	test_nested_state_expect_einval(vm, state);
+	test_nested_state_expect_einval(vcpu, state);
 
 	/*
 	 * It is invalid to have any of the SMM flags set besides:
@@ -177,29 +179,50 @@ void test_vmx_nested_state(struct kvm_vm *vm)
 	set_default_vmx_state(state, state_sz);
 	state->hdr.vmx.smm.flags = ~(KVM_STATE_NESTED_SMM_GUEST_MODE |
 				KVM_STATE_NESTED_SMM_VMXON);
-	test_nested_state_expect_einval(vm, state);
+	test_nested_state_expect_einval(vcpu, state);
 
 	/* Outside SMM, SMM flags must be zero. */
 	set_default_vmx_state(state, state_sz);
 	state->flags = 0;
 	state->hdr.vmx.smm.flags = KVM_STATE_NESTED_SMM_GUEST_MODE;
-	test_nested_state_expect_einval(vm, state);
+	test_nested_state_expect_einval(vcpu, state);
 
-	/* Size must be large enough to fit kvm_nested_state and vmcs12. */
+	/*
+	 * Size must be large enough to fit kvm_nested_state and vmcs12
+	 * if VMCS12 physical address is set
+	 */
 	set_default_vmx_state(state, state_sz);
 	state->size = sizeof(*state);
-	test_nested_state(vm, state);
+	state->flags = 0;
+	test_nested_state_expect_einval(vcpu, state);
+
+	set_default_vmx_state(state, state_sz);
+	state->size = sizeof(*state);
+	state->flags = 0;
+	state->hdr.vmx.vmcs12_pa = -1;
+	test_nested_state(vcpu, state);
+
+	/*
+	 * KVM_SET_NESTED_STATE succeeds with invalid VMCS
+	 * contents but L2 not running.
+	 */
+	set_default_vmx_state(state, state_sz);
+	state->flags = 0;
+	test_nested_state(vcpu, state);
+
+	/* Invalid flags are rejected, even if no VMCS loaded. */
+	set_default_vmx_state(state, state_sz);
+	state->size = sizeof(*state);
+	state->flags = 0;
+	state->hdr.vmx.vmcs12_pa = -1;
+	state->hdr.vmx.flags = ~0;
+	test_nested_state_expect_einval(vcpu, state);
 
 	/* vmxon_pa cannot be the same address as vmcs_pa. */
 	set_default_vmx_state(state, state_sz);
 	state->hdr.vmx.vmxon_pa = 0;
 	state->hdr.vmx.vmcs12_pa = 0;
-	test_nested_state_expect_einval(vm, state);
-
-	/* The revision id for vmcs12 must be VMCS12_REVISION. */
-	set_default_vmx_state(state, state_sz);
-	set_revision_id_for_vmcs12(state, 0);
-	test_nested_state_expect_einval(vm, state);
+	test_nested_state_expect_einval(vcpu, state);
 
 	/*
 	 * Test that if we leave nesting the state reflects that when we get
@@ -209,10 +232,10 @@ void test_vmx_nested_state(struct kvm_vm *vm)
 	state->hdr.vmx.vmxon_pa = -1ull;
 	state->hdr.vmx.vmcs12_pa = -1ull;
 	state->flags = 0;
-	test_nested_state(vm, state);
-	vcpu_nested_state_get(vm, VCPU_ID, state);
+	test_nested_state(vcpu, state);
+	vcpu_nested_state_get(vcpu, state);
 	TEST_ASSERT(state->size >= sizeof(*state) && state->size <= state_sz,
-		    "Size must be between %d and %d.  The size returned was %d.",
+		    "Size must be between %ld and %d.  The size returned was %d.",
 		    sizeof(*state), state_sz, state->size);
 	TEST_ASSERT(state->hdr.vmx.vmxon_pa == -1ull, "vmxon_pa must be -1ull.");
 	TEST_ASSERT(state->hdr.vmx.vmcs12_pa == -1ull, "vmcs_pa must be -1ull.");
@@ -224,29 +247,32 @@ int main(int argc, char *argv[])
 {
 	struct kvm_vm *vm;
 	struct kvm_nested_state state;
+	struct kvm_vcpu *vcpu;
 
 	have_evmcs = kvm_check_cap(KVM_CAP_HYPERV_ENLIGHTENED_VMCS);
 
-	if (!kvm_check_cap(KVM_CAP_NESTED_STATE)) {
-		printf("KVM_CAP_NESTED_STATE not available, skipping test\n");
-		exit(KSFT_SKIP);
-	}
+	TEST_REQUIRE(kvm_has_cap(KVM_CAP_NESTED_STATE));
 
 	/*
 	 * AMD currently does not implement set_nested_state, so for now we
 	 * just early out.
 	 */
-	nested_vmx_check_supported();
+	TEST_REQUIRE(kvm_cpu_has(X86_FEATURE_VMX));
 
-	vm = vm_create_default(VCPU_ID, 0, 0);
+	vm = vm_create_with_one_vcpu(&vcpu, NULL);
+
+	/*
+	 * First run tests with VMX disabled to check error handling.
+	 */
+	vcpu_clear_cpuid_feature(vcpu, X86_FEATURE_VMX);
 
 	/* Passing a NULL kvm_nested_state causes a EFAULT. */
-	test_nested_state_expect_efault(vm, NULL);
+	test_nested_state_expect_efault(vcpu, NULL);
 
 	/* 'size' cannot be smaller than sizeof(kvm_nested_state). */
 	set_default_state(&state);
 	state.size = 0;
-	test_nested_state_expect_einval(vm, &state);
+	test_nested_state_expect_einval(vcpu, &state);
 
 	/*
 	 * Setting the flags 0xf fails the flags check.  The only flags that
@@ -257,7 +283,7 @@ int main(int argc, char *argv[])
 	 */
 	set_default_state(&state);
 	state.flags = 0xf;
-	test_nested_state_expect_einval(vm, &state);
+	test_nested_state_expect_einval(vcpu, &state);
 
 	/*
 	 * If KVM_STATE_NESTED_RUN_PENDING is set then
@@ -265,9 +291,9 @@ int main(int argc, char *argv[])
 	 */
 	set_default_state(&state);
 	state.flags = KVM_STATE_NESTED_RUN_PENDING;
-	test_nested_state_expect_einval(vm, &state);
+	test_nested_state_expect_einval(vcpu, &state);
 
-	test_vmx_nested_state(vm);
+	test_vmx_nested_state(vcpu);
 
 	kvm_vm_free(vm);
 	return 0;
diff --git a/tools/testing/selftests/kvm/x86_64/vmx_tsc_adjust_test.c b/tools/testing/selftests/kvm/x86_64/vmx_tsc_adjust_test.c
index 69e482a95c47..5943187e8594 100644
--- a/tools/testing/selftests/kvm/x86_64/vmx_tsc_adjust_test.c
+++ b/tools/testing/selftests/kvm/x86_64/vmx_tsc_adjust_test.c
@@ -32,9 +32,6 @@
 #define MSR_IA32_TSC_ADJUST 0x3b
 #endif
 
-#define PAGE_SIZE	4096
-#define VCPU_ID		5
-
 #define TSC_ADJUST_VALUE (1ll << 32)
 #define TSC_OFFSET_VALUE -(1ll << 48)
 
@@ -121,36 +118,36 @@ static void l1_guest_code(struct vmx_pages *vmx_pages)
 
 static void report(int64_t val)
 {
-	printf("IA32_TSC_ADJUST is %ld (%lld * TSC_ADJUST_VALUE + %lld).\n",
-	       val, val / TSC_ADJUST_VALUE, val % TSC_ADJUST_VALUE);
+	pr_info("IA32_TSC_ADJUST is %ld (%lld * TSC_ADJUST_VALUE + %lld).\n",
+		val, val / TSC_ADJUST_VALUE, val % TSC_ADJUST_VALUE);
 }
 
 int main(int argc, char *argv[])
 {
 	vm_vaddr_t vmx_pages_gva;
+	struct kvm_vcpu *vcpu;
 
-	nested_vmx_check_supported();
+	TEST_REQUIRE(kvm_cpu_has(X86_FEATURE_VMX));
 
-	vm = vm_create_default(VCPU_ID, 0, (void *) l1_guest_code);
-	vcpu_set_cpuid(vm, VCPU_ID, kvm_get_supported_cpuid());
+	vm = vm_create_with_one_vcpu(&vcpu, (void *) l1_guest_code);
 
 	/* Allocate VMX pages and shared descriptors (vmx_pages). */
 	vcpu_alloc_vmx(vm, &vmx_pages_gva);
-	vcpu_args_set(vm, VCPU_ID, 1, vmx_pages_gva);
+	vcpu_args_set(vcpu, 1, vmx_pages_gva);
 
 	for (;;) {
-		volatile struct kvm_run *run = vcpu_state(vm, VCPU_ID);
+		volatile struct kvm_run *run = vcpu->run;
 		struct ucall uc;
 
-		vcpu_run(vm, VCPU_ID);
+		vcpu_run(vcpu);
 		TEST_ASSERT(run->exit_reason == KVM_EXIT_IO,
 			    "Got exit_reason other than KVM_EXIT_IO: %u (%s)\n",
 			    run->exit_reason,
 			    exit_reason_str(run->exit_reason));
 
-		switch (get_ucall(vm, VCPU_ID, &uc)) {
+		switch (get_ucall(vcpu, &uc)) {
 		case UCALL_ABORT:
-			TEST_ASSERT(false, "%s", (const char *)uc.args[0]);
+			REPORT_GUEST_ASSERT(uc);
 			/* NOT REACHED */
 		case UCALL_SYNC:
 			report(uc.args[1]);
@@ -158,11 +155,11 @@ int main(int argc, char *argv[])
 		case UCALL_DONE:
 			goto done;
 		default:
-			TEST_ASSERT(false, "Unknown ucall 0x%x.", uc.cmd);
+			TEST_FAIL("Unknown ucall %lu", uc.cmd);
 		}
 	}
 
-	kvm_vm_free(vm);
 done:
+	kvm_vm_free(vm);
 	return 0;
 }
diff --git a/tools/testing/selftests/kvm/x86_64/xapic_ipi_test.c b/tools/testing/selftests/kvm/x86_64/xapic_ipi_test.c
new file mode 100644
index 000000000000..3d272d7f961e
--- /dev/null
+++ b/tools/testing/selftests/kvm/x86_64/xapic_ipi_test.c
@@ -0,0 +1,495 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * xapic_ipi_test
+ *
+ * Copyright (C) 2020, Google LLC.
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2.
+ *
+ * Test that when the APIC is in xAPIC mode, a vCPU can send an IPI to wake
+ * another vCPU that is halted when KVM's backing page for the APIC access
+ * address has been moved by mm.
+ *
+ * The test starts two vCPUs: one that sends IPIs and one that continually
+ * executes HLT. The sender checks that the halter has woken from the HLT and
+ * has reentered HLT before sending the next IPI. While the vCPUs are running,
+ * the host continually calls migrate_pages to move all of the process' pages
+ * amongst the available numa nodes on the machine.
+ *
+ * Migration is a command line option. When used on non-numa machines will 
+ * exit with error. Test is still usefull on non-numa for testing IPIs.
+ */
+
+#define _GNU_SOURCE /* for program_invocation_short_name */
+#include <getopt.h>
+#include <pthread.h>
+#include <inttypes.h>
+#include <string.h>
+#include <time.h>
+
+#include "kvm_util.h"
+#include "numaif.h"
+#include "processor.h"
+#include "test_util.h"
+#include "vmx.h"
+
+/* Default running time for the test */
+#define DEFAULT_RUN_SECS 3
+
+/* Default delay between migrate_pages calls (microseconds) */
+#define DEFAULT_DELAY_USECS 500000
+
+/*
+ * Vector for IPI from sender vCPU to halting vCPU.
+ * Value is arbitrary and was chosen for the alternating bit pattern. Any
+ * value should work.
+ */
+#define IPI_VECTOR	 0xa5
+
+/*
+ * Incremented in the IPI handler. Provides evidence to the sender that the IPI
+ * arrived at the destination
+ */
+static volatile uint64_t ipis_rcvd;
+
+/* Data struct shared between host main thread and vCPUs */
+struct test_data_page {
+	uint32_t halter_apic_id;
+	volatile uint64_t hlt_count;
+	volatile uint64_t wake_count;
+	uint64_t ipis_sent;
+	uint64_t migrations_attempted;
+	uint64_t migrations_completed;
+	uint32_t icr;
+	uint32_t icr2;
+	uint32_t halter_tpr;
+	uint32_t halter_ppr;
+
+	/*
+	 *  Record local version register as a cross-check that APIC access
+	 *  worked. Value should match what KVM reports (APIC_VERSION in
+	 *  arch/x86/kvm/lapic.c). If test is failing, check that values match
+	 *  to determine whether APIC access exits are working.
+	 */
+	uint32_t halter_lvr;
+};
+
+struct thread_params {
+	struct test_data_page *data;
+	struct kvm_vcpu *vcpu;
+	uint64_t *pipis_rcvd; /* host address of ipis_rcvd global */
+};
+
+void verify_apic_base_addr(void)
+{
+	uint64_t msr = rdmsr(MSR_IA32_APICBASE);
+	uint64_t base = GET_APIC_BASE(msr);
+
+	GUEST_ASSERT(base == APIC_DEFAULT_GPA);
+}
+
+static void halter_guest_code(struct test_data_page *data)
+{
+	verify_apic_base_addr();
+	xapic_enable();
+
+	data->halter_apic_id = GET_APIC_ID_FIELD(xapic_read_reg(APIC_ID));
+	data->halter_lvr = xapic_read_reg(APIC_LVR);
+
+	/*
+	 * Loop forever HLTing and recording halts & wakes. Disable interrupts
+	 * each time around to minimize window between signaling the pending
+	 * halt to the sender vCPU and executing the halt. No need to disable on
+	 * first run as this vCPU executes first and the host waits for it to
+	 * signal going into first halt before starting the sender vCPU. Record
+	 * TPR and PPR for diagnostic purposes in case the test fails.
+	 */
+	for (;;) {
+		data->halter_tpr = xapic_read_reg(APIC_TASKPRI);
+		data->halter_ppr = xapic_read_reg(APIC_PROCPRI);
+		data->hlt_count++;
+		asm volatile("sti; hlt; cli");
+		data->wake_count++;
+	}
+}
+
+/*
+ * Runs on halter vCPU when IPI arrives. Write an arbitrary non-zero value to
+ * enable diagnosing errant writes to the APIC access address backing page in
+ * case of test failure.
+ */
+static void guest_ipi_handler(struct ex_regs *regs)
+{
+	ipis_rcvd++;
+	xapic_write_reg(APIC_EOI, 77);
+}
+
+static void sender_guest_code(struct test_data_page *data)
+{
+	uint64_t last_wake_count;
+	uint64_t last_hlt_count;
+	uint64_t last_ipis_rcvd_count;
+	uint32_t icr_val;
+	uint32_t icr2_val;
+	uint64_t tsc_start;
+
+	verify_apic_base_addr();
+	xapic_enable();
+
+	/*
+	 * Init interrupt command register for sending IPIs
+	 *
+	 * Delivery mode=fixed, per SDM:
+	 *   "Delivers the interrupt specified in the vector field to the target
+	 *    processor."
+	 *
+	 * Destination mode=physical i.e. specify target by its local APIC
+	 * ID. This vCPU assumes that the halter vCPU has already started and
+	 * set data->halter_apic_id.
+	 */
+	icr_val = (APIC_DEST_PHYSICAL | APIC_DM_FIXED | IPI_VECTOR);
+	icr2_val = SET_APIC_DEST_FIELD(data->halter_apic_id);
+	data->icr = icr_val;
+	data->icr2 = icr2_val;
+
+	last_wake_count = data->wake_count;
+	last_hlt_count = data->hlt_count;
+	last_ipis_rcvd_count = ipis_rcvd;
+	for (;;) {
+		/*
+		 * Send IPI to halter vCPU.
+		 * First IPI can be sent unconditionally because halter vCPU
+		 * starts earlier.
+		 */
+		xapic_write_reg(APIC_ICR2, icr2_val);
+		xapic_write_reg(APIC_ICR, icr_val);
+		data->ipis_sent++;
+
+		/*
+		 * Wait up to ~1 sec for halter to indicate that it has:
+		 * 1. Received the IPI
+		 * 2. Woken up from the halt
+		 * 3. Gone back into halt
+		 * Current CPUs typically run at 2.x Ghz which is ~2
+		 * billion ticks per second.
+		 */
+		tsc_start = rdtsc();
+		while (rdtsc() - tsc_start < 2000000000) {
+			if ((ipis_rcvd != last_ipis_rcvd_count) &&
+			    (data->wake_count != last_wake_count) &&
+			    (data->hlt_count != last_hlt_count))
+				break;
+		}
+
+		GUEST_ASSERT((ipis_rcvd != last_ipis_rcvd_count) &&
+			     (data->wake_count != last_wake_count) &&
+			     (data->hlt_count != last_hlt_count));
+
+		last_wake_count = data->wake_count;
+		last_hlt_count = data->hlt_count;
+		last_ipis_rcvd_count = ipis_rcvd;
+	}
+}
+
+static void *vcpu_thread(void *arg)
+{
+	struct thread_params *params = (struct thread_params *)arg;
+	struct kvm_vcpu *vcpu = params->vcpu;
+	struct ucall uc;
+	int old;
+	int r;
+	unsigned int exit_reason;
+
+	r = pthread_setcanceltype(PTHREAD_CANCEL_ASYNCHRONOUS, &old);
+	TEST_ASSERT(r == 0,
+		    "pthread_setcanceltype failed on vcpu_id=%u with errno=%d",
+		    vcpu->id, r);
+
+	fprintf(stderr, "vCPU thread running vCPU %u\n", vcpu->id);
+	vcpu_run(vcpu);
+	exit_reason = vcpu->run->exit_reason;
+
+	TEST_ASSERT(exit_reason == KVM_EXIT_IO,
+		    "vCPU %u exited with unexpected exit reason %u-%s, expected KVM_EXIT_IO",
+		    vcpu->id, exit_reason, exit_reason_str(exit_reason));
+
+	if (get_ucall(vcpu, &uc) == UCALL_ABORT) {
+		TEST_ASSERT(false,
+			    "vCPU %u exited with error: %s.\n"
+			    "Sending vCPU sent %lu IPIs to halting vCPU\n"
+			    "Halting vCPU halted %lu times, woke %lu times, received %lu IPIs.\n"
+			    "Halter TPR=%#x PPR=%#x LVR=%#x\n"
+			    "Migrations attempted: %lu\n"
+			    "Migrations completed: %lu\n",
+			    vcpu->id, (const char *)uc.args[0],
+			    params->data->ipis_sent, params->data->hlt_count,
+			    params->data->wake_count,
+			    *params->pipis_rcvd, params->data->halter_tpr,
+			    params->data->halter_ppr, params->data->halter_lvr,
+			    params->data->migrations_attempted,
+			    params->data->migrations_completed);
+	}
+
+	return NULL;
+}
+
+static void cancel_join_vcpu_thread(pthread_t thread, struct kvm_vcpu *vcpu)
+{
+	void *retval;
+	int r;
+
+	r = pthread_cancel(thread);
+	TEST_ASSERT(r == 0,
+		    "pthread_cancel on vcpu_id=%d failed with errno=%d",
+		    vcpu->id, r);
+
+	r = pthread_join(thread, &retval);
+	TEST_ASSERT(r == 0,
+		    "pthread_join on vcpu_id=%d failed with errno=%d",
+		    vcpu->id, r);
+	TEST_ASSERT(retval == PTHREAD_CANCELED,
+		    "expected retval=%p, got %p", PTHREAD_CANCELED,
+		    retval);
+}
+
+void do_migrations(struct test_data_page *data, int run_secs, int delay_usecs,
+		   uint64_t *pipis_rcvd)
+{
+	long pages_not_moved;
+	unsigned long nodemask = 0;
+	unsigned long nodemasks[sizeof(nodemask) * 8];
+	int nodes = 0;
+	time_t start_time, last_update, now;
+	time_t interval_secs = 1;
+	int i, r;
+	int from, to;
+	unsigned long bit;
+	uint64_t hlt_count;
+	uint64_t wake_count;
+	uint64_t ipis_sent;
+
+	fprintf(stderr, "Calling migrate_pages every %d microseconds\n",
+		delay_usecs);
+
+	/* Get set of first 64 numa nodes available */
+	r = get_mempolicy(NULL, &nodemask, sizeof(nodemask) * 8,
+			  0, MPOL_F_MEMS_ALLOWED);
+	TEST_ASSERT(r == 0, "get_mempolicy failed errno=%d", errno);
+
+	fprintf(stderr, "Numa nodes found amongst first %lu possible nodes "
+		"(each 1-bit indicates node is present): %#lx\n",
+		sizeof(nodemask) * 8, nodemask);
+
+	/* Init array of masks containing a single-bit in each, one for each
+	 * available node. migrate_pages called below requires specifying nodes
+	 * as bit masks.
+	 */
+	for (i = 0, bit = 1; i < sizeof(nodemask) * 8; i++, bit <<= 1) {
+		if (nodemask & bit) {
+			nodemasks[nodes] = nodemask & bit;
+			nodes++;
+		}
+	}
+
+	TEST_ASSERT(nodes > 1,
+		    "Did not find at least 2 numa nodes. Can't do migration\n");
+
+	fprintf(stderr, "Migrating amongst %d nodes found\n", nodes);
+
+	from = 0;
+	to = 1;
+	start_time = time(NULL);
+	last_update = start_time;
+
+	ipis_sent = data->ipis_sent;
+	hlt_count = data->hlt_count;
+	wake_count = data->wake_count;
+
+	while ((int)(time(NULL) - start_time) < run_secs) {
+		data->migrations_attempted++;
+
+		/*
+		 * migrate_pages with PID=0 will migrate all pages of this
+		 * process between the nodes specified as bitmasks. The page
+		 * backing the APIC access address belongs to this process
+		 * because it is allocated by KVM in the context of the
+		 * KVM_CREATE_VCPU ioctl. If that assumption ever changes this
+		 * test may break or give a false positive signal.
+		 */
+		pages_not_moved = migrate_pages(0, sizeof(nodemasks[from]),
+						&nodemasks[from],
+						&nodemasks[to]);
+		if (pages_not_moved < 0)
+			fprintf(stderr,
+				"migrate_pages failed, errno=%d\n", errno);
+		else if (pages_not_moved > 0)
+			fprintf(stderr,
+				"migrate_pages could not move %ld pages\n",
+				pages_not_moved);
+		else
+			data->migrations_completed++;
+
+		from = to;
+		to++;
+		if (to == nodes)
+			to = 0;
+
+		now = time(NULL);
+		if (((now - start_time) % interval_secs == 0) &&
+		    (now != last_update)) {
+			last_update = now;
+			fprintf(stderr,
+				"%lu seconds: Migrations attempted=%lu completed=%lu, "
+				"IPIs sent=%lu received=%lu, HLTs=%lu wakes=%lu\n",
+				now - start_time, data->migrations_attempted,
+				data->migrations_completed,
+				data->ipis_sent, *pipis_rcvd,
+				data->hlt_count, data->wake_count);
+
+			TEST_ASSERT(ipis_sent != data->ipis_sent &&
+				    hlt_count != data->hlt_count &&
+				    wake_count != data->wake_count,
+				    "IPI, HLT and wake count have not increased "
+				    "in the last %lu seconds. "
+				    "HLTer is likely hung.\n", interval_secs);
+
+			ipis_sent = data->ipis_sent;
+			hlt_count = data->hlt_count;
+			wake_count = data->wake_count;
+		}
+		usleep(delay_usecs);
+	}
+}
+
+void get_cmdline_args(int argc, char *argv[], int *run_secs,
+		      bool *migrate, int *delay_usecs)
+{
+	for (;;) {
+		int opt = getopt(argc, argv, "s:d:m");
+
+		if (opt == -1)
+			break;
+		switch (opt) {
+		case 's':
+			*run_secs = parse_size(optarg);
+			break;
+		case 'm':
+			*migrate = true;
+			break;
+		case 'd':
+			*delay_usecs = parse_size(optarg);
+			break;
+		default:
+			TEST_ASSERT(false,
+				    "Usage: -s <runtime seconds>. Default is %d seconds.\n"
+				    "-m adds calls to migrate_pages while vCPUs are running."
+				    " Default is no migrations.\n"
+				    "-d <delay microseconds> - delay between migrate_pages() calls."
+				    " Default is %d microseconds.\n",
+				    DEFAULT_RUN_SECS, DEFAULT_DELAY_USECS);
+		}
+	}
+}
+
+int main(int argc, char *argv[])
+{
+	int r;
+	int wait_secs;
+	const int max_halter_wait = 10;
+	int run_secs = 0;
+	int delay_usecs = 0;
+	struct test_data_page *data;
+	vm_vaddr_t test_data_page_vaddr;
+	bool migrate = false;
+	pthread_t threads[2];
+	struct thread_params params[2];
+	struct kvm_vm *vm;
+	uint64_t *pipis_rcvd;
+
+	get_cmdline_args(argc, argv, &run_secs, &migrate, &delay_usecs);
+	if (run_secs <= 0)
+		run_secs = DEFAULT_RUN_SECS;
+	if (delay_usecs <= 0)
+		delay_usecs = DEFAULT_DELAY_USECS;
+
+	vm = vm_create_with_one_vcpu(&params[0].vcpu, halter_guest_code);
+
+	vm_init_descriptor_tables(vm);
+	vcpu_init_descriptor_tables(params[0].vcpu);
+	vm_install_exception_handler(vm, IPI_VECTOR, guest_ipi_handler);
+
+	virt_pg_map(vm, APIC_DEFAULT_GPA, APIC_DEFAULT_GPA);
+
+	params[1].vcpu = vm_vcpu_add(vm, 1, sender_guest_code);
+
+	test_data_page_vaddr = vm_vaddr_alloc_page(vm);
+	data = addr_gva2hva(vm, test_data_page_vaddr);
+	memset(data, 0, sizeof(*data));
+	params[0].data = data;
+	params[1].data = data;
+
+	vcpu_args_set(params[0].vcpu, 1, test_data_page_vaddr);
+	vcpu_args_set(params[1].vcpu, 1, test_data_page_vaddr);
+
+	pipis_rcvd = (uint64_t *)addr_gva2hva(vm, (uint64_t)&ipis_rcvd);
+	params[0].pipis_rcvd = pipis_rcvd;
+	params[1].pipis_rcvd = pipis_rcvd;
+
+	/* Start halter vCPU thread and wait for it to execute first HLT. */
+	r = pthread_create(&threads[0], NULL, vcpu_thread, &params[0]);
+	TEST_ASSERT(r == 0,
+		    "pthread_create halter failed errno=%d", errno);
+	fprintf(stderr, "Halter vCPU thread started\n");
+
+	wait_secs = 0;
+	while ((wait_secs < max_halter_wait) && !data->hlt_count) {
+		sleep(1);
+		wait_secs++;
+	}
+
+	TEST_ASSERT(data->hlt_count,
+		    "Halter vCPU did not execute first HLT within %d seconds",
+		    max_halter_wait);
+
+	fprintf(stderr,
+		"Halter vCPU thread reported its APIC ID: %u after %d seconds.\n",
+		data->halter_apic_id, wait_secs);
+
+	r = pthread_create(&threads[1], NULL, vcpu_thread, &params[1]);
+	TEST_ASSERT(r == 0, "pthread_create sender failed errno=%d", errno);
+
+	fprintf(stderr,
+		"IPI sender vCPU thread started. Letting vCPUs run for %d seconds.\n",
+		run_secs);
+
+	if (!migrate)
+		sleep(run_secs);
+	else
+		do_migrations(data, run_secs, delay_usecs, pipis_rcvd);
+
+	/*
+	 * Cancel threads and wait for them to stop.
+	 */
+	cancel_join_vcpu_thread(threads[0], params[0].vcpu);
+	cancel_join_vcpu_thread(threads[1], params[1].vcpu);
+
+	fprintf(stderr,
+		"Test successful after running for %d seconds.\n"
+		"Sending vCPU sent %lu IPIs to halting vCPU\n"
+		"Halting vCPU halted %lu times, woke %lu times, received %lu IPIs.\n"
+		"Halter APIC ID=%#x\n"
+		"Sender ICR value=%#x ICR2 value=%#x\n"
+		"Halter TPR=%#x PPR=%#x LVR=%#x\n"
+		"Migrations attempted: %lu\n"
+		"Migrations completed: %lu\n",
+		run_secs, data->ipis_sent,
+		data->hlt_count, data->wake_count, *pipis_rcvd,
+		data->halter_apic_id,
+		data->icr, data->icr2,
+		data->halter_tpr, data->halter_ppr, data->halter_lvr,
+		data->migrations_attempted, data->migrations_completed);
+
+	kvm_vm_free(vm);
+
+	return 0;
+}
diff --git a/tools/testing/selftests/kvm/x86_64/xapic_state_test.c b/tools/testing/selftests/kvm/x86_64/xapic_state_test.c
new file mode 100644
index 000000000000..6f7a5ef66718
--- /dev/null
+++ b/tools/testing/selftests/kvm/x86_64/xapic_state_test.c
@@ -0,0 +1,158 @@
+// SPDX-License-Identifier: GPL-2.0-only
+#define _GNU_SOURCE /* for program_invocation_short_name */
+#include <fcntl.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <sys/ioctl.h>
+
+#include "apic.h"
+#include "kvm_util.h"
+#include "processor.h"
+#include "test_util.h"
+
+struct xapic_vcpu {
+	struct kvm_vcpu *vcpu;
+	bool is_x2apic;
+};
+
+static void xapic_guest_code(void)
+{
+	asm volatile("cli");
+
+	xapic_enable();
+
+	while (1) {
+		uint64_t val = (u64)xapic_read_reg(APIC_IRR) |
+			       (u64)xapic_read_reg(APIC_IRR + 0x10) << 32;
+
+		xapic_write_reg(APIC_ICR2, val >> 32);
+		xapic_write_reg(APIC_ICR, val);
+		GUEST_SYNC(val);
+	}
+}
+
+static void x2apic_guest_code(void)
+{
+	asm volatile("cli");
+
+	x2apic_enable();
+
+	do {
+		uint64_t val = x2apic_read_reg(APIC_IRR) |
+			       x2apic_read_reg(APIC_IRR + 0x10) << 32;
+
+		x2apic_write_reg(APIC_ICR, val);
+		GUEST_SYNC(val);
+	} while (1);
+}
+
+static void ____test_icr(struct xapic_vcpu *x, uint64_t val)
+{
+	struct kvm_vcpu *vcpu = x->vcpu;
+	struct kvm_lapic_state xapic;
+	struct ucall uc;
+	uint64_t icr;
+
+	/*
+	 * Tell the guest what ICR value to write.  Use the IRR to pass info,
+	 * all bits are valid and should not be modified by KVM (ignoring the
+	 * fact that vectors 0-15 are technically illegal).
+	 */
+	vcpu_ioctl(vcpu, KVM_GET_LAPIC, &xapic);
+	*((u32 *)&xapic.regs[APIC_IRR]) = val;
+	*((u32 *)&xapic.regs[APIC_IRR + 0x10]) = val >> 32;
+	vcpu_ioctl(vcpu, KVM_SET_LAPIC, &xapic);
+
+	vcpu_run(vcpu);
+	ASSERT_EQ(get_ucall(vcpu, &uc), UCALL_SYNC);
+	ASSERT_EQ(uc.args[1], val);
+
+	vcpu_ioctl(vcpu, KVM_GET_LAPIC, &xapic);
+	icr = (u64)(*((u32 *)&xapic.regs[APIC_ICR])) |
+	      (u64)(*((u32 *)&xapic.regs[APIC_ICR2])) << 32;
+	if (!x->is_x2apic) {
+		val &= (-1u | (0xffull << (32 + 24)));
+		ASSERT_EQ(icr, val & ~APIC_ICR_BUSY);
+	} else {
+		ASSERT_EQ(icr & ~APIC_ICR_BUSY, val & ~APIC_ICR_BUSY);
+	}
+}
+
+#define X2APIC_RSVED_BITS_MASK  (GENMASK_ULL(31,20) | \
+				 GENMASK_ULL(17,16) | \
+				 GENMASK_ULL(13,13))
+
+static void __test_icr(struct xapic_vcpu *x, uint64_t val)
+{
+	if (x->is_x2apic) {
+		/* Hardware writing vICR register requires reserved bits 31:20,
+		 * 17:16 and 13 kept as zero to avoid #GP exception. Data value
+		 * written to vICR should mask out those bits above.
+		 */
+		val &= ~X2APIC_RSVED_BITS_MASK;
+	}
+	____test_icr(x, val | APIC_ICR_BUSY);
+	____test_icr(x, val & ~(u64)APIC_ICR_BUSY);
+}
+
+static void test_icr(struct xapic_vcpu *x)
+{
+	struct kvm_vcpu *vcpu = x->vcpu;
+	uint64_t icr, i, j;
+
+	icr = APIC_DEST_SELF | APIC_INT_ASSERT | APIC_DM_FIXED;
+	for (i = 0; i <= 0xff; i++)
+		__test_icr(x, icr | i);
+
+	icr = APIC_INT_ASSERT | APIC_DM_FIXED;
+	for (i = 0; i <= 0xff; i++)
+		__test_icr(x, icr | i);
+
+	/*
+	 * Send all flavors of IPIs to non-existent vCPUs.  TODO: use number of
+	 * vCPUs, not vcpu.id + 1.  Arbitrarily use vector 0xff.
+	 */
+	icr = APIC_INT_ASSERT | 0xff;
+	for (i = vcpu->id + 1; i < 0xff; i++) {
+		for (j = 0; j < 8; j++)
+			__test_icr(x, i << (32 + 24) | icr | (j << 8));
+	}
+
+	/* And again with a shorthand destination for all types of IPIs. */
+	icr = APIC_DEST_ALLBUT | APIC_INT_ASSERT;
+	for (i = 0; i < 8; i++)
+		__test_icr(x, icr | (i << 8));
+
+	/* And a few garbage value, just make sure it's an IRQ (blocked). */
+	__test_icr(x, 0xa5a5a5a5a5a5a5a5 & ~APIC_DM_FIXED_MASK);
+	__test_icr(x, 0x5a5a5a5a5a5a5a5a & ~APIC_DM_FIXED_MASK);
+	__test_icr(x, -1ull & ~APIC_DM_FIXED_MASK);
+}
+
+int main(int argc, char *argv[])
+{
+	struct xapic_vcpu x = {
+		.vcpu = NULL,
+		.is_x2apic = true,
+	};
+	struct kvm_vm *vm;
+
+	vm = vm_create_with_one_vcpu(&x.vcpu, x2apic_guest_code);
+	test_icr(&x);
+	kvm_vm_free(vm);
+
+	/*
+	 * Use a second VM for the xAPIC test so that x2APIC can be hidden from
+	 * the guest in order to test AVIC.  KVM disallows changing CPUID after
+	 * KVM_RUN and AVIC is disabled if _any_ vCPU is allowed to use x2APIC.
+	 */
+	vm = vm_create_with_one_vcpu(&x.vcpu, xapic_guest_code);
+	x.is_x2apic = false;
+
+	vcpu_clear_cpuid_feature(x.vcpu, X86_FEATURE_X2APIC);
+
+	virt_pg_map(vm, APIC_DEFAULT_GPA, APIC_DEFAULT_GPA);
+	test_icr(&x);
+	kvm_vm_free(vm);
+}
diff --git a/tools/testing/selftests/kvm/x86_64/xen_shinfo_test.c b/tools/testing/selftests/kvm/x86_64/xen_shinfo_test.c
new file mode 100644
index 000000000000..2a5727188c8d
--- /dev/null
+++ b/tools/testing/selftests/kvm/x86_64/xen_shinfo_test.c
@@ -0,0 +1,1020 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * svm_vmcall_test
+ *
+ * Copyright © 2021 Amazon.com, Inc. or its affiliates.
+ *
+ * Xen shared_info / pvclock testing
+ */
+
+#include "test_util.h"
+#include "kvm_util.h"
+#include "processor.h"
+
+#include <stdint.h>
+#include <time.h>
+#include <sched.h>
+#include <signal.h>
+#include <pthread.h>
+
+#include <sys/eventfd.h>
+
+/* Defined in include/linux/kvm_types.h */
+#define GPA_INVALID		(~(ulong)0)
+
+#define SHINFO_REGION_GVA	0xc0000000ULL
+#define SHINFO_REGION_GPA	0xc0000000ULL
+#define SHINFO_REGION_SLOT	10
+
+#define DUMMY_REGION_GPA	(SHINFO_REGION_GPA + (2 * PAGE_SIZE))
+#define DUMMY_REGION_SLOT	11
+
+#define SHINFO_ADDR	(SHINFO_REGION_GPA)
+#define PVTIME_ADDR	(SHINFO_REGION_GPA + PAGE_SIZE)
+#define RUNSTATE_ADDR	(SHINFO_REGION_GPA + PAGE_SIZE + 0x20)
+#define VCPU_INFO_ADDR	(SHINFO_REGION_GPA + 0x40)
+
+#define SHINFO_VADDR	(SHINFO_REGION_GVA)
+#define RUNSTATE_VADDR	(SHINFO_REGION_GVA + PAGE_SIZE + 0x20)
+#define VCPU_INFO_VADDR	(SHINFO_REGION_GVA + 0x40)
+
+#define EVTCHN_VECTOR	0x10
+
+#define EVTCHN_TEST1 15
+#define EVTCHN_TEST2 66
+#define EVTCHN_TIMER 13
+
+#define XEN_HYPERCALL_MSR	0x40000000
+
+#define MIN_STEAL_TIME		50000
+
+#define SHINFO_RACE_TIMEOUT	2	/* seconds */
+
+#define __HYPERVISOR_set_timer_op	15
+#define __HYPERVISOR_sched_op		29
+#define __HYPERVISOR_event_channel_op	32
+
+#define SCHEDOP_poll			3
+
+#define EVTCHNOP_send			4
+
+#define EVTCHNSTAT_interdomain		2
+
+struct evtchn_send {
+	u32 port;
+};
+
+struct sched_poll {
+	u32 *ports;
+	unsigned int nr_ports;
+	u64 timeout;
+};
+
+struct pvclock_vcpu_time_info {
+	u32   version;
+	u32   pad0;
+	u64   tsc_timestamp;
+	u64   system_time;
+	u32   tsc_to_system_mul;
+	s8    tsc_shift;
+	u8    flags;
+	u8    pad[2];
+} __attribute__((__packed__)); /* 32 bytes */
+
+struct pvclock_wall_clock {
+	u32   version;
+	u32   sec;
+	u32   nsec;
+} __attribute__((__packed__));
+
+struct vcpu_runstate_info {
+    uint32_t state;
+    uint64_t state_entry_time;
+    uint64_t time[4];
+};
+
+struct arch_vcpu_info {
+    unsigned long cr2;
+    unsigned long pad; /* sizeof(vcpu_info_t) == 64 */
+};
+
+struct vcpu_info {
+	uint8_t evtchn_upcall_pending;
+	uint8_t evtchn_upcall_mask;
+	unsigned long evtchn_pending_sel;
+	struct arch_vcpu_info arch;
+	struct pvclock_vcpu_time_info time;
+}; /* 64 bytes (x86) */
+
+struct shared_info {
+	struct vcpu_info vcpu_info[32];
+	unsigned long evtchn_pending[64];
+	unsigned long evtchn_mask[64];
+	struct pvclock_wall_clock wc;
+	uint32_t wc_sec_hi;
+	/* arch_shared_info here */
+};
+
+#define RUNSTATE_running  0
+#define RUNSTATE_runnable 1
+#define RUNSTATE_blocked  2
+#define RUNSTATE_offline  3
+
+static const char *runstate_names[] = {
+	"running",
+	"runnable",
+	"blocked",
+	"offline"
+};
+
+struct {
+	struct kvm_irq_routing info;
+	struct kvm_irq_routing_entry entries[2];
+} irq_routes;
+
+static volatile bool guest_saw_irq;
+
+static void evtchn_handler(struct ex_regs *regs)
+{
+	struct vcpu_info *vi = (void *)VCPU_INFO_VADDR;
+	vi->evtchn_upcall_pending = 0;
+	vi->evtchn_pending_sel = 0;
+	guest_saw_irq = true;
+
+	GUEST_SYNC(0x20);
+}
+
+static void guest_wait_for_irq(void)
+{
+	while (!guest_saw_irq)
+		__asm__ __volatile__ ("rep nop" : : : "memory");
+	guest_saw_irq = false;
+}
+
+static void guest_code(void)
+{
+	struct vcpu_runstate_info *rs = (void *)RUNSTATE_VADDR;
+	int i;
+
+	__asm__ __volatile__(
+		"sti\n"
+		"nop\n"
+	);
+
+	/* Trigger an interrupt injection */
+	GUEST_SYNC(0);
+
+	guest_wait_for_irq();
+
+	/* Test having the host set runstates manually */
+	GUEST_SYNC(RUNSTATE_runnable);
+	GUEST_ASSERT(rs->time[RUNSTATE_runnable] != 0);
+	GUEST_ASSERT(rs->state == 0);
+
+	GUEST_SYNC(RUNSTATE_blocked);
+	GUEST_ASSERT(rs->time[RUNSTATE_blocked] != 0);
+	GUEST_ASSERT(rs->state == 0);
+
+	GUEST_SYNC(RUNSTATE_offline);
+	GUEST_ASSERT(rs->time[RUNSTATE_offline] != 0);
+	GUEST_ASSERT(rs->state == 0);
+
+	/* Test runstate time adjust */
+	GUEST_SYNC(4);
+	GUEST_ASSERT(rs->time[RUNSTATE_blocked] == 0x5a);
+	GUEST_ASSERT(rs->time[RUNSTATE_offline] == 0x6b6b);
+
+	/* Test runstate time set */
+	GUEST_SYNC(5);
+	GUEST_ASSERT(rs->state_entry_time >= 0x8000);
+	GUEST_ASSERT(rs->time[RUNSTATE_runnable] == 0);
+	GUEST_ASSERT(rs->time[RUNSTATE_blocked] == 0x6b6b);
+	GUEST_ASSERT(rs->time[RUNSTATE_offline] == 0x5a);
+
+	/* sched_yield() should result in some 'runnable' time */
+	GUEST_SYNC(6);
+	GUEST_ASSERT(rs->time[RUNSTATE_runnable] >= MIN_STEAL_TIME);
+
+	/* Attempt to deliver a *masked* interrupt */
+	GUEST_SYNC(7);
+
+	/* Wait until we see the bit set */
+	struct shared_info *si = (void *)SHINFO_VADDR;
+	while (!si->evtchn_pending[0])
+		__asm__ __volatile__ ("rep nop" : : : "memory");
+
+	/* Now deliver an *unmasked* interrupt */
+	GUEST_SYNC(8);
+
+	guest_wait_for_irq();
+
+	/* Change memslots and deliver an interrupt */
+	GUEST_SYNC(9);
+
+	guest_wait_for_irq();
+
+	/* Deliver event channel with KVM_XEN_HVM_EVTCHN_SEND */
+	GUEST_SYNC(10);
+
+	guest_wait_for_irq();
+
+	GUEST_SYNC(11);
+
+	/* Our turn. Deliver event channel (to ourselves) with
+	 * EVTCHNOP_send hypercall. */
+	unsigned long rax;
+	struct evtchn_send s = { .port = 127 };
+	__asm__ __volatile__ ("vmcall" :
+			      "=a" (rax) :
+			      "a" (__HYPERVISOR_event_channel_op),
+			      "D" (EVTCHNOP_send),
+			      "S" (&s));
+
+	GUEST_ASSERT(rax == 0);
+
+	guest_wait_for_irq();
+
+	GUEST_SYNC(12);
+
+	/* Deliver "outbound" event channel to an eventfd which
+	 * happens to be one of our own irqfds. */
+	s.port = 197;
+	__asm__ __volatile__ ("vmcall" :
+			      "=a" (rax) :
+			      "a" (__HYPERVISOR_event_channel_op),
+			      "D" (EVTCHNOP_send),
+			      "S" (&s));
+
+	GUEST_ASSERT(rax == 0);
+
+	guest_wait_for_irq();
+
+	GUEST_SYNC(13);
+
+	/* Set a timer 100ms in the future. */
+	__asm__ __volatile__ ("vmcall" :
+			      "=a" (rax) :
+			      "a" (__HYPERVISOR_set_timer_op),
+			      "D" (rs->state_entry_time + 100000000));
+	GUEST_ASSERT(rax == 0);
+
+	GUEST_SYNC(14);
+
+	/* Now wait for the timer */
+	guest_wait_for_irq();
+
+	GUEST_SYNC(15);
+
+	/* The host has 'restored' the timer. Just wait for it. */
+	guest_wait_for_irq();
+
+	GUEST_SYNC(16);
+
+	/* Poll for an event channel port which is already set */
+	u32 ports[1] = { EVTCHN_TIMER };
+	struct sched_poll p = {
+		.ports = ports,
+		.nr_ports = 1,
+		.timeout = 0,
+	};
+
+	__asm__ __volatile__ ("vmcall" :
+			      "=a" (rax) :
+			      "a" (__HYPERVISOR_sched_op),
+			      "D" (SCHEDOP_poll),
+			      "S" (&p));
+
+	GUEST_ASSERT(rax == 0);
+
+	GUEST_SYNC(17);
+
+	/* Poll for an unset port and wait for the timeout. */
+	p.timeout = 100000000;
+	__asm__ __volatile__ ("vmcall" :
+			      "=a" (rax) :
+			      "a" (__HYPERVISOR_sched_op),
+			      "D" (SCHEDOP_poll),
+			      "S" (&p));
+
+	GUEST_ASSERT(rax == 0);
+
+	GUEST_SYNC(18);
+
+	/* A timer will wake the masked port we're waiting on, while we poll */
+	p.timeout = 0;
+	__asm__ __volatile__ ("vmcall" :
+			      "=a" (rax) :
+			      "a" (__HYPERVISOR_sched_op),
+			      "D" (SCHEDOP_poll),
+			      "S" (&p));
+
+	GUEST_ASSERT(rax == 0);
+
+	GUEST_SYNC(19);
+
+	/* A timer wake an *unmasked* port which should wake us with an
+	 * actual interrupt, while we're polling on a different port. */
+	ports[0]++;
+	p.timeout = 0;
+	__asm__ __volatile__ ("vmcall" :
+			      "=a" (rax) :
+			      "a" (__HYPERVISOR_sched_op),
+			      "D" (SCHEDOP_poll),
+			      "S" (&p));
+
+	GUEST_ASSERT(rax == 0);
+
+	guest_wait_for_irq();
+
+	GUEST_SYNC(20);
+
+	/* Timer should have fired already */
+	guest_wait_for_irq();
+
+	GUEST_SYNC(21);
+	/* Racing host ioctls */
+
+	guest_wait_for_irq();
+
+	GUEST_SYNC(22);
+	/* Racing vmcall against host ioctl */
+
+	ports[0] = 0;
+
+	p = (struct sched_poll) {
+		.ports = ports,
+		.nr_ports = 1,
+		.timeout = 0
+	};
+
+wait_for_timer:
+	/*
+	 * Poll for a timer wake event while the worker thread is mucking with
+	 * the shared info.  KVM XEN drops timer IRQs if the shared info is
+	 * invalid when the timer expires.  Arbitrarily poll 100 times before
+	 * giving up and asking the VMM to re-arm the timer.  100 polls should
+	 * consume enough time to beat on KVM without taking too long if the
+	 * timer IRQ is dropped due to an invalid event channel.
+	 */
+	for (i = 0; i < 100 && !guest_saw_irq; i++)
+		asm volatile("vmcall"
+			     : "=a" (rax)
+			     : "a" (__HYPERVISOR_sched_op),
+			       "D" (SCHEDOP_poll),
+			       "S" (&p)
+			     : "memory");
+
+	/*
+	 * Re-send the timer IRQ if it was (likely) dropped due to the timer
+	 * expiring while the event channel was invalid.
+	 */
+	if (!guest_saw_irq) {
+		GUEST_SYNC(23);
+		goto wait_for_timer;
+	}
+	guest_saw_irq = false;
+
+	GUEST_SYNC(24);
+}
+
+static int cmp_timespec(struct timespec *a, struct timespec *b)
+{
+	if (a->tv_sec > b->tv_sec)
+		return 1;
+	else if (a->tv_sec < b->tv_sec)
+		return -1;
+	else if (a->tv_nsec > b->tv_nsec)
+		return 1;
+	else if (a->tv_nsec < b->tv_nsec)
+		return -1;
+	else
+		return 0;
+}
+
+static struct vcpu_info *vinfo;
+static struct kvm_vcpu *vcpu;
+
+static void handle_alrm(int sig)
+{
+	if (vinfo)
+		printf("evtchn_upcall_pending 0x%x\n", vinfo->evtchn_upcall_pending);
+	vcpu_dump(stdout, vcpu, 0);
+	TEST_FAIL("IRQ delivery timed out");
+}
+
+static void *juggle_shinfo_state(void *arg)
+{
+	struct kvm_vm *vm = (struct kvm_vm *)arg;
+
+	struct kvm_xen_hvm_attr cache_init = {
+		.type = KVM_XEN_ATTR_TYPE_SHARED_INFO,
+		.u.shared_info.gfn = SHINFO_REGION_GPA / PAGE_SIZE
+	};
+
+	struct kvm_xen_hvm_attr cache_destroy = {
+		.type = KVM_XEN_ATTR_TYPE_SHARED_INFO,
+		.u.shared_info.gfn = GPA_INVALID
+	};
+
+	for (;;) {
+		__vm_ioctl(vm, KVM_XEN_HVM_SET_ATTR, &cache_init);
+		__vm_ioctl(vm, KVM_XEN_HVM_SET_ATTR, &cache_destroy);
+		pthread_testcancel();
+	};
+
+	return NULL;
+}
+
+int main(int argc, char *argv[])
+{
+	struct timespec min_ts, max_ts, vm_ts;
+	struct kvm_vm *vm;
+	pthread_t thread;
+	bool verbose;
+	int ret;
+
+	verbose = argc > 1 && (!strncmp(argv[1], "-v", 3) ||
+			       !strncmp(argv[1], "--verbose", 10));
+
+	int xen_caps = kvm_check_cap(KVM_CAP_XEN_HVM);
+	TEST_REQUIRE(xen_caps & KVM_XEN_HVM_CONFIG_SHARED_INFO);
+
+	bool do_runstate_tests = !!(xen_caps & KVM_XEN_HVM_CONFIG_RUNSTATE);
+	bool do_eventfd_tests = !!(xen_caps & KVM_XEN_HVM_CONFIG_EVTCHN_2LEVEL);
+	bool do_evtchn_tests = do_eventfd_tests && !!(xen_caps & KVM_XEN_HVM_CONFIG_EVTCHN_SEND);
+
+	clock_gettime(CLOCK_REALTIME, &min_ts);
+
+	vm = vm_create_with_one_vcpu(&vcpu, guest_code);
+
+	/* Map a region for the shared_info page */
+	vm_userspace_mem_region_add(vm, VM_MEM_SRC_ANONYMOUS,
+				    SHINFO_REGION_GPA, SHINFO_REGION_SLOT, 2, 0);
+	virt_map(vm, SHINFO_REGION_GVA, SHINFO_REGION_GPA, 2);
+
+	struct shared_info *shinfo = addr_gpa2hva(vm, SHINFO_VADDR);
+
+	int zero_fd = open("/dev/zero", O_RDONLY);
+	TEST_ASSERT(zero_fd != -1, "Failed to open /dev/zero");
+
+	struct kvm_xen_hvm_config hvmc = {
+		.flags = KVM_XEN_HVM_CONFIG_INTERCEPT_HCALL,
+		.msr = XEN_HYPERCALL_MSR,
+	};
+
+	/* Let the kernel know that we *will* use it for sending all
+	 * event channels, which lets it intercept SCHEDOP_poll */
+	if (do_evtchn_tests)
+		hvmc.flags |= KVM_XEN_HVM_CONFIG_EVTCHN_SEND;
+
+	vm_ioctl(vm, KVM_XEN_HVM_CONFIG, &hvmc);
+
+	struct kvm_xen_hvm_attr lm = {
+		.type = KVM_XEN_ATTR_TYPE_LONG_MODE,
+		.u.long_mode = 1,
+	};
+	vm_ioctl(vm, KVM_XEN_HVM_SET_ATTR, &lm);
+
+	struct kvm_xen_hvm_attr ha = {
+		.type = KVM_XEN_ATTR_TYPE_SHARED_INFO,
+		.u.shared_info.gfn = SHINFO_REGION_GPA / PAGE_SIZE,
+	};
+	vm_ioctl(vm, KVM_XEN_HVM_SET_ATTR, &ha);
+
+	/*
+	 * Test what happens when the HVA of the shinfo page is remapped after
+	 * the kernel has a reference to it. But make sure we copy the clock
+	 * info over since that's only set at setup time, and we test it later.
+	 */
+	struct pvclock_wall_clock wc_copy = shinfo->wc;
+	void *m = mmap(shinfo, PAGE_SIZE, PROT_READ|PROT_WRITE, MAP_FIXED|MAP_PRIVATE, zero_fd, 0);
+	TEST_ASSERT(m == shinfo, "Failed to map /dev/zero over shared info");
+	shinfo->wc = wc_copy;
+
+	struct kvm_xen_vcpu_attr vi = {
+		.type = KVM_XEN_VCPU_ATTR_TYPE_VCPU_INFO,
+		.u.gpa = VCPU_INFO_ADDR,
+	};
+	vcpu_ioctl(vcpu, KVM_XEN_VCPU_SET_ATTR, &vi);
+
+	struct kvm_xen_vcpu_attr pvclock = {
+		.type = KVM_XEN_VCPU_ATTR_TYPE_VCPU_TIME_INFO,
+		.u.gpa = PVTIME_ADDR,
+	};
+	vcpu_ioctl(vcpu, KVM_XEN_VCPU_SET_ATTR, &pvclock);
+
+	struct kvm_xen_hvm_attr vec = {
+		.type = KVM_XEN_ATTR_TYPE_UPCALL_VECTOR,
+		.u.vector = EVTCHN_VECTOR,
+	};
+	vm_ioctl(vm, KVM_XEN_HVM_SET_ATTR, &vec);
+
+	vm_init_descriptor_tables(vm);
+	vcpu_init_descriptor_tables(vcpu);
+	vm_install_exception_handler(vm, EVTCHN_VECTOR, evtchn_handler);
+
+	if (do_runstate_tests) {
+		struct kvm_xen_vcpu_attr st = {
+			.type = KVM_XEN_VCPU_ATTR_TYPE_RUNSTATE_ADDR,
+			.u.gpa = RUNSTATE_ADDR,
+		};
+		vcpu_ioctl(vcpu, KVM_XEN_VCPU_SET_ATTR, &st);
+	}
+
+	int irq_fd[2] = { -1, -1 };
+
+	if (do_eventfd_tests) {
+		irq_fd[0] = eventfd(0, 0);
+		irq_fd[1] = eventfd(0, 0);
+
+		/* Unexpected, but not a KVM failure */
+		if (irq_fd[0] == -1 || irq_fd[1] == -1)
+			do_evtchn_tests = do_eventfd_tests = false;
+	}
+
+	if (do_eventfd_tests) {
+		irq_routes.info.nr = 2;
+
+		irq_routes.entries[0].gsi = 32;
+		irq_routes.entries[0].type = KVM_IRQ_ROUTING_XEN_EVTCHN;
+		irq_routes.entries[0].u.xen_evtchn.port = EVTCHN_TEST1;
+		irq_routes.entries[0].u.xen_evtchn.vcpu = vcpu->id;
+		irq_routes.entries[0].u.xen_evtchn.priority = KVM_IRQ_ROUTING_XEN_EVTCHN_PRIO_2LEVEL;
+
+		irq_routes.entries[1].gsi = 33;
+		irq_routes.entries[1].type = KVM_IRQ_ROUTING_XEN_EVTCHN;
+		irq_routes.entries[1].u.xen_evtchn.port = EVTCHN_TEST2;
+		irq_routes.entries[1].u.xen_evtchn.vcpu = vcpu->id;
+		irq_routes.entries[1].u.xen_evtchn.priority = KVM_IRQ_ROUTING_XEN_EVTCHN_PRIO_2LEVEL;
+
+		vm_ioctl(vm, KVM_SET_GSI_ROUTING, &irq_routes.info);
+
+		struct kvm_irqfd ifd = { };
+
+		ifd.fd = irq_fd[0];
+		ifd.gsi = 32;
+		vm_ioctl(vm, KVM_IRQFD, &ifd);
+
+		ifd.fd = irq_fd[1];
+		ifd.gsi = 33;
+		vm_ioctl(vm, KVM_IRQFD, &ifd);
+
+		struct sigaction sa = { };
+		sa.sa_handler = handle_alrm;
+		sigaction(SIGALRM, &sa, NULL);
+	}
+
+	struct kvm_xen_vcpu_attr tmr = {
+		.type = KVM_XEN_VCPU_ATTR_TYPE_TIMER,
+		.u.timer.port = EVTCHN_TIMER,
+		.u.timer.priority = KVM_IRQ_ROUTING_XEN_EVTCHN_PRIO_2LEVEL,
+		.u.timer.expires_ns = 0
+	};
+
+	if (do_evtchn_tests) {
+		struct kvm_xen_hvm_attr inj = {
+			.type = KVM_XEN_ATTR_TYPE_EVTCHN,
+			.u.evtchn.send_port = 127,
+			.u.evtchn.type = EVTCHNSTAT_interdomain,
+			.u.evtchn.flags = 0,
+			.u.evtchn.deliver.port.port = EVTCHN_TEST1,
+			.u.evtchn.deliver.port.vcpu = vcpu->id + 1,
+			.u.evtchn.deliver.port.priority = KVM_IRQ_ROUTING_XEN_EVTCHN_PRIO_2LEVEL,
+		};
+		vm_ioctl(vm, KVM_XEN_HVM_SET_ATTR, &inj);
+
+		/* Test migration to a different vCPU */
+		inj.u.evtchn.flags = KVM_XEN_EVTCHN_UPDATE;
+		inj.u.evtchn.deliver.port.vcpu = vcpu->id;
+		vm_ioctl(vm, KVM_XEN_HVM_SET_ATTR, &inj);
+
+		inj.u.evtchn.send_port = 197;
+		inj.u.evtchn.deliver.eventfd.port = 0;
+		inj.u.evtchn.deliver.eventfd.fd = irq_fd[1];
+		inj.u.evtchn.flags = 0;
+		vm_ioctl(vm, KVM_XEN_HVM_SET_ATTR, &inj);
+
+		vcpu_ioctl(vcpu, KVM_XEN_VCPU_SET_ATTR, &tmr);
+	}
+	vinfo = addr_gpa2hva(vm, VCPU_INFO_VADDR);
+	vinfo->evtchn_upcall_pending = 0;
+
+	struct vcpu_runstate_info *rs = addr_gpa2hva(vm, RUNSTATE_ADDR);
+	rs->state = 0x5a;
+
+	bool evtchn_irq_expected = false;
+
+	for (;;) {
+		volatile struct kvm_run *run = vcpu->run;
+		struct ucall uc;
+
+		vcpu_run(vcpu);
+
+		TEST_ASSERT(run->exit_reason == KVM_EXIT_IO,
+			    "Got exit_reason other than KVM_EXIT_IO: %u (%s)\n",
+			    run->exit_reason,
+			    exit_reason_str(run->exit_reason));
+
+		switch (get_ucall(vcpu, &uc)) {
+		case UCALL_ABORT:
+			REPORT_GUEST_ASSERT(uc);
+			/* NOT REACHED */
+		case UCALL_SYNC: {
+			struct kvm_xen_vcpu_attr rst;
+			long rundelay;
+
+			if (do_runstate_tests)
+				TEST_ASSERT(rs->state_entry_time == rs->time[0] +
+					    rs->time[1] + rs->time[2] + rs->time[3],
+					    "runstate times don't add up");
+
+			switch (uc.args[1]) {
+			case 0:
+				if (verbose)
+					printf("Delivering evtchn upcall\n");
+				evtchn_irq_expected = true;
+				vinfo->evtchn_upcall_pending = 1;
+				break;
+
+			case RUNSTATE_runnable...RUNSTATE_offline:
+				TEST_ASSERT(!evtchn_irq_expected, "Event channel IRQ not seen");
+				if (!do_runstate_tests)
+					goto done;
+				if (verbose)
+					printf("Testing runstate %s\n", runstate_names[uc.args[1]]);
+				rst.type = KVM_XEN_VCPU_ATTR_TYPE_RUNSTATE_CURRENT;
+				rst.u.runstate.state = uc.args[1];
+				vcpu_ioctl(vcpu, KVM_XEN_VCPU_SET_ATTR, &rst);
+				break;
+
+			case 4:
+				if (verbose)
+					printf("Testing RUNSTATE_ADJUST\n");
+				rst.type = KVM_XEN_VCPU_ATTR_TYPE_RUNSTATE_ADJUST;
+				memset(&rst.u, 0, sizeof(rst.u));
+				rst.u.runstate.state = (uint64_t)-1;
+				rst.u.runstate.time_blocked =
+					0x5a - rs->time[RUNSTATE_blocked];
+				rst.u.runstate.time_offline =
+					0x6b6b - rs->time[RUNSTATE_offline];
+				rst.u.runstate.time_runnable = -rst.u.runstate.time_blocked -
+					rst.u.runstate.time_offline;
+				vcpu_ioctl(vcpu, KVM_XEN_VCPU_SET_ATTR, &rst);
+				break;
+
+			case 5:
+				if (verbose)
+					printf("Testing RUNSTATE_DATA\n");
+				rst.type = KVM_XEN_VCPU_ATTR_TYPE_RUNSTATE_DATA;
+				memset(&rst.u, 0, sizeof(rst.u));
+				rst.u.runstate.state = RUNSTATE_running;
+				rst.u.runstate.state_entry_time = 0x6b6b + 0x5a;
+				rst.u.runstate.time_blocked = 0x6b6b;
+				rst.u.runstate.time_offline = 0x5a;
+				vcpu_ioctl(vcpu, KVM_XEN_VCPU_SET_ATTR, &rst);
+				break;
+
+			case 6:
+				if (verbose)
+					printf("Testing steal time\n");
+				/* Yield until scheduler delay exceeds target */
+				rundelay = get_run_delay() + MIN_STEAL_TIME;
+				do {
+					sched_yield();
+				} while (get_run_delay() < rundelay);
+				break;
+
+			case 7:
+				if (!do_eventfd_tests)
+					goto done;
+				if (verbose)
+					printf("Testing masked event channel\n");
+				shinfo->evtchn_mask[0] = 1UL << EVTCHN_TEST1;
+				eventfd_write(irq_fd[0], 1UL);
+				alarm(1);
+				break;
+
+			case 8:
+				if (verbose)
+					printf("Testing unmasked event channel\n");
+				/* Unmask that, but deliver the other one */
+				shinfo->evtchn_pending[0] = 0;
+				shinfo->evtchn_mask[0] = 0;
+				eventfd_write(irq_fd[1], 1UL);
+				evtchn_irq_expected = true;
+				alarm(1);
+				break;
+
+			case 9:
+				TEST_ASSERT(!evtchn_irq_expected,
+					    "Expected event channel IRQ but it didn't happen");
+				shinfo->evtchn_pending[1] = 0;
+				if (verbose)
+					printf("Testing event channel after memslot change\n");
+				vm_userspace_mem_region_add(vm, VM_MEM_SRC_ANONYMOUS,
+							    DUMMY_REGION_GPA, DUMMY_REGION_SLOT, 1, 0);
+				eventfd_write(irq_fd[0], 1UL);
+				evtchn_irq_expected = true;
+				alarm(1);
+				break;
+
+			case 10:
+				TEST_ASSERT(!evtchn_irq_expected,
+					    "Expected event channel IRQ but it didn't happen");
+				if (!do_evtchn_tests)
+					goto done;
+
+				shinfo->evtchn_pending[0] = 0;
+				if (verbose)
+					printf("Testing injection with KVM_XEN_HVM_EVTCHN_SEND\n");
+
+				struct kvm_irq_routing_xen_evtchn e;
+				e.port = EVTCHN_TEST2;
+				e.vcpu = vcpu->id;
+				e.priority = KVM_IRQ_ROUTING_XEN_EVTCHN_PRIO_2LEVEL;
+
+				vm_ioctl(vm, KVM_XEN_HVM_EVTCHN_SEND, &e);
+				evtchn_irq_expected = true;
+				alarm(1);
+				break;
+
+			case 11:
+				TEST_ASSERT(!evtchn_irq_expected,
+					    "Expected event channel IRQ but it didn't happen");
+				shinfo->evtchn_pending[1] = 0;
+
+				if (verbose)
+					printf("Testing guest EVTCHNOP_send direct to evtchn\n");
+				evtchn_irq_expected = true;
+				alarm(1);
+				break;
+
+			case 12:
+				TEST_ASSERT(!evtchn_irq_expected,
+					    "Expected event channel IRQ but it didn't happen");
+				shinfo->evtchn_pending[0] = 0;
+
+				if (verbose)
+					printf("Testing guest EVTCHNOP_send to eventfd\n");
+				evtchn_irq_expected = true;
+				alarm(1);
+				break;
+
+			case 13:
+				TEST_ASSERT(!evtchn_irq_expected,
+					    "Expected event channel IRQ but it didn't happen");
+				shinfo->evtchn_pending[1] = 0;
+
+				if (verbose)
+					printf("Testing guest oneshot timer\n");
+				break;
+
+			case 14:
+				memset(&tmr, 0, sizeof(tmr));
+				tmr.type = KVM_XEN_VCPU_ATTR_TYPE_TIMER;
+				vcpu_ioctl(vcpu, KVM_XEN_VCPU_GET_ATTR, &tmr);
+				TEST_ASSERT(tmr.u.timer.port == EVTCHN_TIMER,
+					    "Timer port not returned");
+				TEST_ASSERT(tmr.u.timer.priority == KVM_IRQ_ROUTING_XEN_EVTCHN_PRIO_2LEVEL,
+					    "Timer priority not returned");
+				TEST_ASSERT(tmr.u.timer.expires_ns > rs->state_entry_time,
+					    "Timer expiry not returned");
+				evtchn_irq_expected = true;
+				alarm(1);
+				break;
+
+			case 15:
+				TEST_ASSERT(!evtchn_irq_expected,
+					    "Expected event channel IRQ but it didn't happen");
+				shinfo->evtchn_pending[0] = 0;
+
+				if (verbose)
+					printf("Testing restored oneshot timer\n");
+
+				tmr.u.timer.expires_ns = rs->state_entry_time + 100000000;
+				vcpu_ioctl(vcpu, KVM_XEN_VCPU_SET_ATTR, &tmr);
+				evtchn_irq_expected = true;
+				alarm(1);
+				break;
+
+			case 16:
+				TEST_ASSERT(!evtchn_irq_expected,
+					    "Expected event channel IRQ but it didn't happen");
+
+				if (verbose)
+					printf("Testing SCHEDOP_poll with already pending event\n");
+				shinfo->evtchn_pending[0] = shinfo->evtchn_mask[0] = 1UL << EVTCHN_TIMER;
+				alarm(1);
+				break;
+
+			case 17:
+				if (verbose)
+					printf("Testing SCHEDOP_poll timeout\n");
+				shinfo->evtchn_pending[0] = 0;
+				alarm(1);
+				break;
+
+			case 18:
+				if (verbose)
+					printf("Testing SCHEDOP_poll wake on masked event\n");
+
+				tmr.u.timer.expires_ns = rs->state_entry_time + 100000000;
+				vcpu_ioctl(vcpu, KVM_XEN_VCPU_SET_ATTR, &tmr);
+				alarm(1);
+				break;
+
+			case 19:
+				shinfo->evtchn_pending[0] = shinfo->evtchn_mask[0] = 0;
+				if (verbose)
+					printf("Testing SCHEDOP_poll wake on unmasked event\n");
+
+				evtchn_irq_expected = true;
+				tmr.u.timer.expires_ns = rs->state_entry_time + 100000000;
+				vcpu_ioctl(vcpu, KVM_XEN_VCPU_SET_ATTR, &tmr);
+
+				/* Read it back and check the pending time is reported correctly */
+				tmr.u.timer.expires_ns = 0;
+				vcpu_ioctl(vcpu, KVM_XEN_VCPU_GET_ATTR, &tmr);
+				TEST_ASSERT(tmr.u.timer.expires_ns == rs->state_entry_time + 100000000,
+					    "Timer not reported pending");
+				alarm(1);
+				break;
+
+			case 20:
+				TEST_ASSERT(!evtchn_irq_expected,
+					    "Expected event channel IRQ but it didn't happen");
+				/* Read timer and check it is no longer pending */
+				vcpu_ioctl(vcpu, KVM_XEN_VCPU_GET_ATTR, &tmr);
+				TEST_ASSERT(!tmr.u.timer.expires_ns, "Timer still reported pending");
+
+				shinfo->evtchn_pending[0] = 0;
+				if (verbose)
+					printf("Testing timer in the past\n");
+
+				evtchn_irq_expected = true;
+				tmr.u.timer.expires_ns = rs->state_entry_time - 100000000ULL;
+				vcpu_ioctl(vcpu, KVM_XEN_VCPU_SET_ATTR, &tmr);
+				alarm(1);
+				break;
+
+			case 21:
+				TEST_ASSERT(!evtchn_irq_expected,
+					    "Expected event channel IRQ but it didn't happen");
+				alarm(0);
+
+				if (verbose)
+					printf("Testing shinfo lock corruption (KVM_XEN_HVM_EVTCHN_SEND)\n");
+
+				ret = pthread_create(&thread, NULL, &juggle_shinfo_state, (void *)vm);
+				TEST_ASSERT(ret == 0, "pthread_create() failed: %s", strerror(ret));
+
+				struct kvm_irq_routing_xen_evtchn uxe = {
+					.port = 1,
+					.vcpu = vcpu->id,
+					.priority = KVM_IRQ_ROUTING_XEN_EVTCHN_PRIO_2LEVEL
+				};
+
+				evtchn_irq_expected = true;
+				for (time_t t = time(NULL) + SHINFO_RACE_TIMEOUT; time(NULL) < t;)
+					__vm_ioctl(vm, KVM_XEN_HVM_EVTCHN_SEND, &uxe);
+				break;
+
+			case 22:
+				TEST_ASSERT(!evtchn_irq_expected,
+					    "Expected event channel IRQ but it didn't happen");
+
+				if (verbose)
+					printf("Testing shinfo lock corruption (SCHEDOP_poll)\n");
+
+				shinfo->evtchn_pending[0] = 1;
+
+				evtchn_irq_expected = true;
+				tmr.u.timer.expires_ns = rs->state_entry_time +
+							 SHINFO_RACE_TIMEOUT * 1000000000ULL;
+				vcpu_ioctl(vcpu, KVM_XEN_VCPU_SET_ATTR, &tmr);
+				break;
+
+			case 23:
+				/*
+				 * Optional and possibly repeated sync point.
+				 * Injecting the timer IRQ may fail if the
+				 * shinfo is invalid when the timer expires.
+				 * If the timer has expired but the IRQ hasn't
+				 * been delivered, rearm the timer and retry.
+				 */
+				vcpu_ioctl(vcpu, KVM_XEN_VCPU_GET_ATTR, &tmr);
+
+				/* Resume the guest if the timer is still pending. */
+				if (tmr.u.timer.expires_ns)
+					break;
+
+				/* All done if the IRQ was delivered. */
+				if (!evtchn_irq_expected)
+					break;
+
+				tmr.u.timer.expires_ns = rs->state_entry_time +
+							 SHINFO_RACE_TIMEOUT * 1000000000ULL;
+				vcpu_ioctl(vcpu, KVM_XEN_VCPU_SET_ATTR, &tmr);
+				break;
+			case 24:
+				TEST_ASSERT(!evtchn_irq_expected,
+					    "Expected event channel IRQ but it didn't happen");
+
+				ret = pthread_cancel(thread);
+				TEST_ASSERT(ret == 0, "pthread_cancel() failed: %s", strerror(ret));
+
+				ret = pthread_join(thread, 0);
+				TEST_ASSERT(ret == 0, "pthread_join() failed: %s", strerror(ret));
+				goto done;
+
+			case 0x20:
+				TEST_ASSERT(evtchn_irq_expected, "Unexpected event channel IRQ");
+				evtchn_irq_expected = false;
+				break;
+			}
+			break;
+		}
+		case UCALL_DONE:
+			goto done;
+		default:
+			TEST_FAIL("Unknown ucall 0x%lx.", uc.cmd);
+		}
+	}
+
+ done:
+	alarm(0);
+	clock_gettime(CLOCK_REALTIME, &max_ts);
+
+	/*
+	 * Just a *really* basic check that things are being put in the
+	 * right place. The actual calculations are much the same for
+	 * Xen as they are for the KVM variants, so no need to check.
+	 */
+	struct pvclock_wall_clock *wc;
+	struct pvclock_vcpu_time_info *ti, *ti2;
+
+	wc = addr_gpa2hva(vm, SHINFO_REGION_GPA + 0xc00);
+	ti = addr_gpa2hva(vm, SHINFO_REGION_GPA + 0x40 + 0x20);
+	ti2 = addr_gpa2hva(vm, PVTIME_ADDR);
+
+	if (verbose) {
+		printf("Wall clock (v %d) %d.%09d\n", wc->version, wc->sec, wc->nsec);
+		printf("Time info 1: v %u tsc %" PRIu64 " time %" PRIu64 " mul %u shift %u flags %x\n",
+		       ti->version, ti->tsc_timestamp, ti->system_time, ti->tsc_to_system_mul,
+		       ti->tsc_shift, ti->flags);
+		printf("Time info 2: v %u tsc %" PRIu64 " time %" PRIu64 " mul %u shift %u flags %x\n",
+		       ti2->version, ti2->tsc_timestamp, ti2->system_time, ti2->tsc_to_system_mul,
+		       ti2->tsc_shift, ti2->flags);
+	}
+
+	vm_ts.tv_sec = wc->sec;
+	vm_ts.tv_nsec = wc->nsec;
+	TEST_ASSERT(wc->version && !(wc->version & 1),
+		    "Bad wallclock version %x", wc->version);
+	TEST_ASSERT(cmp_timespec(&min_ts, &vm_ts) <= 0, "VM time too old");
+	TEST_ASSERT(cmp_timespec(&max_ts, &vm_ts) >= 0, "VM time too new");
+
+	TEST_ASSERT(ti->version && !(ti->version & 1),
+		    "Bad time_info version %x", ti->version);
+	TEST_ASSERT(ti2->version && !(ti2->version & 1),
+		    "Bad time_info version %x", ti->version);
+
+	if (do_runstate_tests) {
+		/*
+		 * Fetch runstate and check sanity. Strictly speaking in the
+		 * general case we might not expect the numbers to be identical
+		 * but in this case we know we aren't running the vCPU any more.
+		 */
+		struct kvm_xen_vcpu_attr rst = {
+			.type = KVM_XEN_VCPU_ATTR_TYPE_RUNSTATE_DATA,
+		};
+		vcpu_ioctl(vcpu, KVM_XEN_VCPU_GET_ATTR, &rst);
+
+		if (verbose) {
+			printf("Runstate: %s(%d), entry %" PRIu64 " ns\n",
+			       rs->state <= RUNSTATE_offline ? runstate_names[rs->state] : "unknown",
+			       rs->state, rs->state_entry_time);
+			for (int i = RUNSTATE_running; i <= RUNSTATE_offline; i++) {
+				printf("State %s: %" PRIu64 " ns\n",
+				       runstate_names[i], rs->time[i]);
+			}
+		}
+		TEST_ASSERT(rs->state == rst.u.runstate.state, "Runstate mismatch");
+		TEST_ASSERT(rs->state_entry_time == rst.u.runstate.state_entry_time,
+			    "State entry time mismatch");
+		TEST_ASSERT(rs->time[RUNSTATE_running] == rst.u.runstate.time_running,
+			    "Running time mismatch");
+		TEST_ASSERT(rs->time[RUNSTATE_runnable] == rst.u.runstate.time_runnable,
+			    "Runnable time mismatch");
+		TEST_ASSERT(rs->time[RUNSTATE_blocked] == rst.u.runstate.time_blocked,
+			    "Blocked time mismatch");
+		TEST_ASSERT(rs->time[RUNSTATE_offline] == rst.u.runstate.time_offline,
+			    "Offline time mismatch");
+
+		TEST_ASSERT(rs->state_entry_time == rs->time[0] +
+			    rs->time[1] + rs->time[2] + rs->time[3],
+			    "runstate times don't add up");
+	}
+	kvm_vm_free(vm);
+	return 0;
+}
diff --git a/tools/testing/selftests/kvm/x86_64/xen_vmcall_test.c b/tools/testing/selftests/kvm/x86_64/xen_vmcall_test.c
new file mode 100644
index 000000000000..88914d48c65e
--- /dev/null
+++ b/tools/testing/selftests/kvm/x86_64/xen_vmcall_test.c
@@ -0,0 +1,145 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * xen_vmcall_test
+ *
+ * Copyright © 2020 Amazon.com, Inc. or its affiliates.
+ *
+ * Userspace hypercall testing
+ */
+
+#include "test_util.h"
+#include "kvm_util.h"
+#include "processor.h"
+
+#define HCALL_REGION_GPA	0xc0000000ULL
+#define HCALL_REGION_SLOT	10
+
+#define INPUTVALUE 17
+#define ARGVALUE(x) (0xdeadbeef5a5a0000UL + x)
+#define RETVALUE 0xcafef00dfbfbffffUL
+
+#define XEN_HYPERCALL_MSR	0x40000200
+#define HV_GUEST_OS_ID_MSR	0x40000000
+#define HV_HYPERCALL_MSR	0x40000001
+
+#define HVCALL_SIGNAL_EVENT		0x005d
+#define HV_STATUS_INVALID_ALIGNMENT	4
+
+static void guest_code(void)
+{
+	unsigned long rax = INPUTVALUE;
+	unsigned long rdi = ARGVALUE(1);
+	unsigned long rsi = ARGVALUE(2);
+	unsigned long rdx = ARGVALUE(3);
+	unsigned long rcx;
+	register unsigned long r10 __asm__("r10") = ARGVALUE(4);
+	register unsigned long r8 __asm__("r8") = ARGVALUE(5);
+	register unsigned long r9 __asm__("r9") = ARGVALUE(6);
+
+	/* First a direct invocation of 'vmcall' */
+	__asm__ __volatile__("vmcall" :
+			     "=a"(rax) :
+			     "a"(rax), "D"(rdi), "S"(rsi), "d"(rdx),
+			     "r"(r10), "r"(r8), "r"(r9));
+	GUEST_ASSERT(rax == RETVALUE);
+
+	/* Fill in the Xen hypercall page */
+	__asm__ __volatile__("wrmsr" : : "c" (XEN_HYPERCALL_MSR),
+			     "a" (HCALL_REGION_GPA & 0xffffffff),
+			     "d" (HCALL_REGION_GPA >> 32));
+
+	/* Set Hyper-V Guest OS ID */
+	__asm__ __volatile__("wrmsr" : : "c" (HV_GUEST_OS_ID_MSR),
+			     "a" (0x5a), "d" (0));
+
+	/* Hyper-V hypercall page */
+	u64 msrval = HCALL_REGION_GPA + PAGE_SIZE + 1;
+	__asm__ __volatile__("wrmsr" : : "c" (HV_HYPERCALL_MSR),
+			     "a" (msrval & 0xffffffff),
+			     "d" (msrval >> 32));
+
+	/* Invoke a Xen hypercall */
+	__asm__ __volatile__("call *%1" : "=a"(rax) :
+			     "r"(HCALL_REGION_GPA + INPUTVALUE * 32),
+			     "a"(rax), "D"(rdi), "S"(rsi), "d"(rdx),
+			     "r"(r10), "r"(r8), "r"(r9));
+	GUEST_ASSERT(rax == RETVALUE);
+
+	/* Invoke a Hyper-V hypercall */
+	rax = 0;
+	rcx = HVCALL_SIGNAL_EVENT;	/* code */
+	rdx = 0x5a5a5a5a;		/* ingpa (badly aligned) */
+	__asm__ __volatile__("call *%1" : "=a"(rax) :
+			     "r"(HCALL_REGION_GPA + PAGE_SIZE),
+			     "a"(rax), "c"(rcx), "d"(rdx),
+			     "r"(r8));
+	GUEST_ASSERT(rax == HV_STATUS_INVALID_ALIGNMENT);
+
+	GUEST_DONE();
+}
+
+int main(int argc, char *argv[])
+{
+	unsigned int xen_caps;
+	struct kvm_vcpu *vcpu;
+	struct kvm_vm *vm;
+
+	xen_caps = kvm_check_cap(KVM_CAP_XEN_HVM);
+	TEST_REQUIRE(xen_caps & KVM_XEN_HVM_CONFIG_INTERCEPT_HCALL);
+
+	vm = vm_create_with_one_vcpu(&vcpu, guest_code);
+	vcpu_set_hv_cpuid(vcpu);
+
+	struct kvm_xen_hvm_config hvmc = {
+		.flags = KVM_XEN_HVM_CONFIG_INTERCEPT_HCALL,
+		.msr = XEN_HYPERCALL_MSR,
+	};
+	vm_ioctl(vm, KVM_XEN_HVM_CONFIG, &hvmc);
+
+	/* Map a region for the hypercall pages */
+	vm_userspace_mem_region_add(vm, VM_MEM_SRC_ANONYMOUS,
+				    HCALL_REGION_GPA, HCALL_REGION_SLOT, 2, 0);
+	virt_map(vm, HCALL_REGION_GPA, HCALL_REGION_GPA, 2);
+
+	for (;;) {
+		volatile struct kvm_run *run = vcpu->run;
+		struct ucall uc;
+
+		vcpu_run(vcpu);
+
+		if (run->exit_reason == KVM_EXIT_XEN) {
+			ASSERT_EQ(run->xen.type, KVM_EXIT_XEN_HCALL);
+			ASSERT_EQ(run->xen.u.hcall.cpl, 0);
+			ASSERT_EQ(run->xen.u.hcall.longmode, 1);
+			ASSERT_EQ(run->xen.u.hcall.input, INPUTVALUE);
+			ASSERT_EQ(run->xen.u.hcall.params[0], ARGVALUE(1));
+			ASSERT_EQ(run->xen.u.hcall.params[1], ARGVALUE(2));
+			ASSERT_EQ(run->xen.u.hcall.params[2], ARGVALUE(3));
+			ASSERT_EQ(run->xen.u.hcall.params[3], ARGVALUE(4));
+			ASSERT_EQ(run->xen.u.hcall.params[4], ARGVALUE(5));
+			ASSERT_EQ(run->xen.u.hcall.params[5], ARGVALUE(6));
+			run->xen.u.hcall.result = RETVALUE;
+			continue;
+		}
+
+		TEST_ASSERT(run->exit_reason == KVM_EXIT_IO,
+			    "Got exit_reason other than KVM_EXIT_IO: %u (%s)\n",
+			    run->exit_reason,
+			    exit_reason_str(run->exit_reason));
+
+		switch (get_ucall(vcpu, &uc)) {
+		case UCALL_ABORT:
+			REPORT_GUEST_ASSERT(uc);
+			/* NOT REACHED */
+		case UCALL_SYNC:
+			break;
+		case UCALL_DONE:
+			goto done;
+		default:
+			TEST_FAIL("Unknown ucall 0x%lx.", uc.cmd);
+		}
+	}
+done:
+	kvm_vm_free(vm);
+	return 0;
+}
diff --git a/tools/testing/selftests/kvm/x86_64/xss_msr_test.c b/tools/testing/selftests/kvm/x86_64/xss_msr_test.c
index 851ea81b9d9f..e0ddf47362e7 100644
--- a/tools/testing/selftests/kvm/x86_64/xss_msr_test.c
+++ b/tools/testing/selftests/kvm/x86_64/xss_msr_test.c
@@ -12,64 +12,44 @@
 #include "kvm_util.h"
 #include "vmx.h"
 
-#define VCPU_ID	      1
 #define MSR_BITS      64
 
-#define X86_FEATURE_XSAVES	(1<<3)
-
-bool is_supported_msr(u32 msr_index)
-{
-	struct kvm_msr_list *list;
-	bool found = false;
-	int i;
-
-	list = kvm_get_msr_index_list();
-	for (i = 0; i < list->nmsrs; ++i) {
-		if (list->indices[i] == msr_index) {
-			found = true;
-			break;
-		}
-	}
-
-	free(list);
-	return found;
-}
-
 int main(int argc, char *argv[])
 {
-	struct kvm_cpuid_entry2 *entry;
-	bool xss_supported = false;
+	bool xss_in_msr_list;
 	struct kvm_vm *vm;
+	struct kvm_vcpu *vcpu;
 	uint64_t xss_val;
 	int i, r;
 
 	/* Create VM */
-	vm = vm_create_default(VCPU_ID, 0, 0);
+	vm = vm_create_with_one_vcpu(&vcpu, NULL);
 
-	if (kvm_get_cpuid_max_basic() >= 0xd) {
-		entry = kvm_get_supported_cpuid_index(0xd, 1);
-		xss_supported = entry && !!(entry->eax & X86_FEATURE_XSAVES);
-	}
-	if (!xss_supported) {
-		printf("IA32_XSS is not supported by the vCPU.\n");
-		exit(KSFT_SKIP);
-	}
+	TEST_REQUIRE(kvm_cpu_has(X86_FEATURE_XSAVES));
 
-	xss_val = vcpu_get_msr(vm, VCPU_ID, MSR_IA32_XSS);
+	xss_val = vcpu_get_msr(vcpu, MSR_IA32_XSS);
 	TEST_ASSERT(xss_val == 0,
 		    "MSR_IA32_XSS should be initialized to zero\n");
 
-	vcpu_set_msr(vm, VCPU_ID, MSR_IA32_XSS, xss_val);
+	vcpu_set_msr(vcpu, MSR_IA32_XSS, xss_val);
+
 	/*
 	 * At present, KVM only supports a guest IA32_XSS value of 0. Verify
 	 * that trying to set the guest IA32_XSS to an unsupported value fails.
 	 * Also, in the future when a non-zero value succeeds check that
-	 * IA32_XSS is in the KVM_GET_MSR_INDEX_LIST.
+	 * IA32_XSS is in the list of MSRs to save/restore.
 	 */
+	xss_in_msr_list = kvm_msr_is_in_save_restore_list(MSR_IA32_XSS);
 	for (i = 0; i < MSR_BITS; ++i) {
-		r = _vcpu_set_msr(vm, VCPU_ID, MSR_IA32_XSS, 1ull << i);
-		TEST_ASSERT(r == 0 || is_supported_msr(MSR_IA32_XSS),
-			    "IA32_XSS was able to be set, but was not found in KVM_GET_MSR_INDEX_LIST.\n");
+		r = _vcpu_set_msr(vcpu, MSR_IA32_XSS, 1ull << i);
+
+		/*
+		 * Setting a list of MSRs returns the entry that "faulted", or
+		 * the last entry +1 if all MSRs were successfully written.
+		 */
+		TEST_ASSERT(!r || r == 1, KVM_IOCTL_ERROR(KVM_SET_MSRS, r));
+		TEST_ASSERT(r != 1 || xss_in_msr_list,
+			    "IA32_XSS was able to be set, but was not in save/restore list");
 	}
 
 	kvm_vm_free(vm);