aboutsummaryrefslogtreecommitdiffstatshomepage
path: root/tools/testing/selftests/kvm/x86_64
diff options
context:
space:
mode:
Diffstat (limited to 'tools/testing/selftests/kvm/x86_64')
-rw-r--r--tools/testing/selftests/kvm/x86_64/amx_test.c334
-rw-r--r--tools/testing/selftests/kvm/x86_64/cpuid_test.c208
-rw-r--r--tools/testing/selftests/kvm/x86_64/cr4_cpuid_sync_test.c56
-rw-r--r--tools/testing/selftests/kvm/x86_64/debug_regs.c110
-rw-r--r--tools/testing/selftests/kvm/x86_64/dirty_log_page_splitting_test.c262
-rw-r--r--tools/testing/selftests/kvm/x86_64/evmcs_test.c166
-rw-r--r--tools/testing/selftests/kvm/x86_64/exit_on_emulation_failure_test.c42
-rw-r--r--tools/testing/selftests/kvm/x86_64/fix_hypercall_test.c144
-rw-r--r--tools/testing/selftests/kvm/x86_64/flds_emulation.h52
-rw-r--r--tools/testing/selftests/kvm/x86_64/get_msr_index_features.c35
-rw-r--r--tools/testing/selftests/kvm/x86_64/hwcr_msr_test.c47
-rw-r--r--tools/testing/selftests/kvm/x86_64/hyperv_clock.c263
-rw-r--r--tools/testing/selftests/kvm/x86_64/hyperv_cpuid.c134
-rw-r--r--tools/testing/selftests/kvm/x86_64/hyperv_evmcs.c310
-rw-r--r--tools/testing/selftests/kvm/x86_64/hyperv_extended_hypercalls.c98
-rw-r--r--tools/testing/selftests/kvm/x86_64/hyperv_features.c701
-rw-r--r--tools/testing/selftests/kvm/x86_64/hyperv_ipi.c313
-rw-r--r--tools/testing/selftests/kvm/x86_64/hyperv_svm_test.c200
-rw-r--r--tools/testing/selftests/kvm/x86_64/hyperv_tlb_flush.c682
-rw-r--r--tools/testing/selftests/kvm/x86_64/kvm_clock_test.c156
-rw-r--r--tools/testing/selftests/kvm/x86_64/kvm_pv_test.c154
-rw-r--r--tools/testing/selftests/kvm/x86_64/max_vcpuid_cap_test.c44
-rw-r--r--tools/testing/selftests/kvm/x86_64/mmio_warning_test.c126
-rw-r--r--tools/testing/selftests/kvm/x86_64/monitor_mwait_test.c131
-rw-r--r--tools/testing/selftests/kvm/x86_64/nested_exceptions_test.c290
-rw-r--r--tools/testing/selftests/kvm/x86_64/nx_huge_pages_test.c269
-rwxr-xr-xtools/testing/selftests/kvm/x86_64/nx_huge_pages_test.sh58
-rw-r--r--tools/testing/selftests/kvm/x86_64/platform_info_test.c66
-rw-r--r--tools/testing/selftests/kvm/x86_64/pmu_counters_test.c620
-rw-r--r--tools/testing/selftests/kvm/x86_64/pmu_event_filter_test.c910
-rw-r--r--tools/testing/selftests/kvm/x86_64/private_mem_conversions_test.c484
-rw-r--r--tools/testing/selftests/kvm/x86_64/private_mem_kvm_exits_test.c120
-rw-r--r--tools/testing/selftests/kvm/x86_64/recalc_apic_map_test.c74
-rw-r--r--tools/testing/selftests/kvm/x86_64/set_boot_cpu_id.c131
-rw-r--r--tools/testing/selftests/kvm/x86_64/set_sregs_test.c108
-rw-r--r--tools/testing/selftests/kvm/x86_64/sev_migrate_tests.c397
-rw-r--r--tools/testing/selftests/kvm/x86_64/sev_smoke_test.c88
-rw-r--r--tools/testing/selftests/kvm/x86_64/smaller_maxphyaddr_emulation_test.c111
-rw-r--r--tools/testing/selftests/kvm/x86_64/smm_test.c118
-rw-r--r--tools/testing/selftests/kvm/x86_64/state_test.c154
-rw-r--r--tools/testing/selftests/kvm/x86_64/svm_int_ctl_test.c121
-rw-r--r--tools/testing/selftests/kvm/x86_64/svm_nested_shutdown_test.c62
-rw-r--r--tools/testing/selftests/kvm/x86_64/svm_nested_soft_inject_test.c213
-rw-r--r--tools/testing/selftests/kvm/x86_64/svm_vmcall_test.c25
-rw-r--r--tools/testing/selftests/kvm/x86_64/sync_regs_test.c312
-rw-r--r--tools/testing/selftests/kvm/x86_64/triple_fault_event_test.c124
-rw-r--r--tools/testing/selftests/kvm/x86_64/tsc_msrs_test.c161
-rw-r--r--tools/testing/selftests/kvm/x86_64/tsc_scaling_sync.c110
-rw-r--r--tools/testing/selftests/kvm/x86_64/ucna_injection_test.c302
-rw-r--r--tools/testing/selftests/kvm/x86_64/userspace_io_test.c103
-rw-r--r--tools/testing/selftests/kvm/x86_64/userspace_msr_exit_test.c780
-rw-r--r--tools/testing/selftests/kvm/x86_64/vmx_apic_access_test.c124
-rw-r--r--tools/testing/selftests/kvm/x86_64/vmx_close_while_nested_test.c31
-rw-r--r--tools/testing/selftests/kvm/x86_64/vmx_dirty_log_test.c51
-rw-r--r--tools/testing/selftests/kvm/x86_64/vmx_exception_with_invalid_guest_state.c145
-rw-r--r--tools/testing/selftests/kvm/x86_64/vmx_invalid_nested_guest_state.c103
-rw-r--r--tools/testing/selftests/kvm/x86_64/vmx_msrs_test.c131
-rw-r--r--tools/testing/selftests/kvm/x86_64/vmx_nested_tsc_scaling_test.c206
-rw-r--r--tools/testing/selftests/kvm/x86_64/vmx_pmu_caps_test.c228
-rw-r--r--tools/testing/selftests/kvm/x86_64/vmx_preemption_timer_test.c53
-rw-r--r--tools/testing/selftests/kvm/x86_64/vmx_set_nested_state_test.c138
-rw-r--r--tools/testing/selftests/kvm/x86_64/vmx_tsc_adjust_test.c30
-rw-r--r--tools/testing/selftests/kvm/x86_64/xapic_ipi_test.c491
-rw-r--r--tools/testing/selftests/kvm/x86_64/xapic_state_test.c215
-rw-r--r--tools/testing/selftests/kvm/x86_64/xcr0_cpuid_test.c137
-rw-r--r--tools/testing/selftests/kvm/x86_64/xen_shinfo_test.c1156
-rw-r--r--tools/testing/selftests/kvm/x86_64/xen_vmcall_test.c142
-rw-r--r--tools/testing/selftests/kvm/x86_64/xss_msr_test.c58
68 files changed, 13656 insertions, 862 deletions
diff --git a/tools/testing/selftests/kvm/x86_64/amx_test.c b/tools/testing/selftests/kvm/x86_64/amx_test.c
new file mode 100644
index 000000000000..eae521f050e0
--- /dev/null
+++ b/tools/testing/selftests/kvm/x86_64/amx_test.c
@@ -0,0 +1,334 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * amx tests
+ *
+ * Copyright (C) 2021, Intel, Inc.
+ *
+ * Tests for amx #NM exception and save/restore.
+ */
+
+#define _GNU_SOURCE /* for program_invocation_short_name */
+#include <fcntl.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <sys/ioctl.h>
+#include <sys/syscall.h>
+
+#include "test_util.h"
+
+#include "kvm_util.h"
+#include "processor.h"
+#include "vmx.h"
+
+#ifndef __x86_64__
+# error This test is 64-bit only
+#endif
+
+#define NUM_TILES 8
+#define TILE_SIZE 1024
+#define XSAVE_SIZE ((NUM_TILES * TILE_SIZE) + PAGE_SIZE)
+
+/* Tile configuration associated: */
+#define PALETTE_TABLE_INDEX 1
+#define MAX_TILES 16
+#define RESERVED_BYTES 14
+
+#define XSAVE_HDR_OFFSET 512
+
+struct tile_config {
+ u8 palette_id;
+ u8 start_row;
+ u8 reserved[RESERVED_BYTES];
+ u16 colsb[MAX_TILES];
+ u8 rows[MAX_TILES];
+};
+
+struct tile_data {
+ u8 data[NUM_TILES * TILE_SIZE];
+};
+
+struct xtile_info {
+ u16 bytes_per_tile;
+ u16 bytes_per_row;
+ u16 max_names;
+ u16 max_rows;
+ u32 xsave_offset;
+ u32 xsave_size;
+};
+
+static struct xtile_info xtile;
+
+static inline void __ldtilecfg(void *cfg)
+{
+ asm volatile(".byte 0xc4,0xe2,0x78,0x49,0x00"
+ : : "a"(cfg));
+}
+
+static inline void __tileloadd(void *tile)
+{
+ asm volatile(".byte 0xc4,0xe2,0x7b,0x4b,0x04,0x10"
+ : : "a"(tile), "d"(0));
+}
+
+static inline void __tilerelease(void)
+{
+ asm volatile(".byte 0xc4, 0xe2, 0x78, 0x49, 0xc0" ::);
+}
+
+static inline void __xsavec(struct xstate *xstate, uint64_t rfbm)
+{
+ uint32_t rfbm_lo = rfbm;
+ uint32_t rfbm_hi = rfbm >> 32;
+
+ asm volatile("xsavec (%%rdi)"
+ : : "D" (xstate), "a" (rfbm_lo), "d" (rfbm_hi)
+ : "memory");
+}
+
+static void check_xtile_info(void)
+{
+ GUEST_ASSERT(this_cpu_has_p(X86_PROPERTY_XSTATE_MAX_SIZE_XCR0));
+ GUEST_ASSERT(this_cpu_property(X86_PROPERTY_XSTATE_MAX_SIZE_XCR0) <= XSAVE_SIZE);
+
+ xtile.xsave_offset = this_cpu_property(X86_PROPERTY_XSTATE_TILE_OFFSET);
+ GUEST_ASSERT(xtile.xsave_offset == 2816);
+ xtile.xsave_size = this_cpu_property(X86_PROPERTY_XSTATE_TILE_SIZE);
+ GUEST_ASSERT(xtile.xsave_size == 8192);
+ GUEST_ASSERT(sizeof(struct tile_data) >= xtile.xsave_size);
+
+ GUEST_ASSERT(this_cpu_has_p(X86_PROPERTY_AMX_MAX_PALETTE_TABLES));
+ GUEST_ASSERT(this_cpu_property(X86_PROPERTY_AMX_MAX_PALETTE_TABLES) >=
+ PALETTE_TABLE_INDEX);
+
+ GUEST_ASSERT(this_cpu_has_p(X86_PROPERTY_AMX_NR_TILE_REGS));
+ xtile.max_names = this_cpu_property(X86_PROPERTY_AMX_NR_TILE_REGS);
+ GUEST_ASSERT(xtile.max_names == 8);
+ xtile.bytes_per_tile = this_cpu_property(X86_PROPERTY_AMX_BYTES_PER_TILE);
+ GUEST_ASSERT(xtile.bytes_per_tile == 1024);
+ xtile.bytes_per_row = this_cpu_property(X86_PROPERTY_AMX_BYTES_PER_ROW);
+ GUEST_ASSERT(xtile.bytes_per_row == 64);
+ xtile.max_rows = this_cpu_property(X86_PROPERTY_AMX_MAX_ROWS);
+ GUEST_ASSERT(xtile.max_rows == 16);
+}
+
+static void set_tilecfg(struct tile_config *cfg)
+{
+ int i;
+
+ /* Only palette id 1 */
+ cfg->palette_id = 1;
+ for (i = 0; i < xtile.max_names; i++) {
+ cfg->colsb[i] = xtile.bytes_per_row;
+ cfg->rows[i] = xtile.max_rows;
+ }
+}
+
+static void init_regs(void)
+{
+ uint64_t cr4, xcr0;
+
+ GUEST_ASSERT(this_cpu_has(X86_FEATURE_XSAVE));
+
+ /* turn on CR4.OSXSAVE */
+ cr4 = get_cr4();
+ cr4 |= X86_CR4_OSXSAVE;
+ set_cr4(cr4);
+ GUEST_ASSERT(this_cpu_has(X86_FEATURE_OSXSAVE));
+
+ xcr0 = xgetbv(0);
+ xcr0 |= XFEATURE_MASK_XTILE;
+ xsetbv(0x0, xcr0);
+ GUEST_ASSERT((xgetbv(0) & XFEATURE_MASK_XTILE) == XFEATURE_MASK_XTILE);
+}
+
+static void __attribute__((__flatten__)) guest_code(struct tile_config *amx_cfg,
+ struct tile_data *tiledata,
+ struct xstate *xstate)
+{
+ init_regs();
+ check_xtile_info();
+ GUEST_SYNC(1);
+
+ /* xfd=0, enable amx */
+ wrmsr(MSR_IA32_XFD, 0);
+ GUEST_SYNC(2);
+ GUEST_ASSERT(rdmsr(MSR_IA32_XFD) == 0);
+ set_tilecfg(amx_cfg);
+ __ldtilecfg(amx_cfg);
+ GUEST_SYNC(3);
+ /* Check save/restore when trap to userspace */
+ __tileloadd(tiledata);
+ GUEST_SYNC(4);
+ __tilerelease();
+ GUEST_SYNC(5);
+ /*
+ * After XSAVEC, XTILEDATA is cleared in the xstate_bv but is set in
+ * the xcomp_bv.
+ */
+ xstate->header.xstate_bv = XFEATURE_MASK_XTILE_DATA;
+ __xsavec(xstate, XFEATURE_MASK_XTILE_DATA);
+ GUEST_ASSERT(!(xstate->header.xstate_bv & XFEATURE_MASK_XTILE_DATA));
+ GUEST_ASSERT(xstate->header.xcomp_bv & XFEATURE_MASK_XTILE_DATA);
+
+ /* xfd=0x40000, disable amx tiledata */
+ wrmsr(MSR_IA32_XFD, XFEATURE_MASK_XTILE_DATA);
+
+ /*
+ * XTILEDATA is cleared in xstate_bv but set in xcomp_bv, this property
+ * remains the same even when amx tiledata is disabled by IA32_XFD.
+ */
+ xstate->header.xstate_bv = XFEATURE_MASK_XTILE_DATA;
+ __xsavec(xstate, XFEATURE_MASK_XTILE_DATA);
+ GUEST_ASSERT(!(xstate->header.xstate_bv & XFEATURE_MASK_XTILE_DATA));
+ GUEST_ASSERT((xstate->header.xcomp_bv & XFEATURE_MASK_XTILE_DATA));
+
+ GUEST_SYNC(6);
+ GUEST_ASSERT(rdmsr(MSR_IA32_XFD) == XFEATURE_MASK_XTILE_DATA);
+ set_tilecfg(amx_cfg);
+ __ldtilecfg(amx_cfg);
+ /* Trigger #NM exception */
+ __tileloadd(tiledata);
+ GUEST_SYNC(10);
+
+ GUEST_DONE();
+}
+
+void guest_nm_handler(struct ex_regs *regs)
+{
+ /* Check if #NM is triggered by XFEATURE_MASK_XTILE_DATA */
+ GUEST_SYNC(7);
+ GUEST_ASSERT(!(get_cr0() & X86_CR0_TS));
+ GUEST_ASSERT(rdmsr(MSR_IA32_XFD_ERR) == XFEATURE_MASK_XTILE_DATA);
+ GUEST_ASSERT(rdmsr(MSR_IA32_XFD) == XFEATURE_MASK_XTILE_DATA);
+ GUEST_SYNC(8);
+ GUEST_ASSERT(rdmsr(MSR_IA32_XFD_ERR) == XFEATURE_MASK_XTILE_DATA);
+ GUEST_ASSERT(rdmsr(MSR_IA32_XFD) == XFEATURE_MASK_XTILE_DATA);
+ /* Clear xfd_err */
+ wrmsr(MSR_IA32_XFD_ERR, 0);
+ /* xfd=0, enable amx */
+ wrmsr(MSR_IA32_XFD, 0);
+ GUEST_SYNC(9);
+}
+
+int main(int argc, char *argv[])
+{
+ struct kvm_regs regs1, regs2;
+ struct kvm_vcpu *vcpu;
+ struct kvm_vm *vm;
+ struct kvm_x86_state *state;
+ int xsave_restore_size;
+ vm_vaddr_t amx_cfg, tiledata, xstate;
+ struct ucall uc;
+ u32 amx_offset;
+ int ret;
+
+ /*
+ * Note, all off-by-default features must be enabled before anything
+ * caches KVM_GET_SUPPORTED_CPUID, e.g. before using kvm_cpu_has().
+ */
+ vm_xsave_require_permission(XFEATURE_MASK_XTILE_DATA);
+
+ TEST_REQUIRE(kvm_cpu_has(X86_FEATURE_XFD));
+ TEST_REQUIRE(kvm_cpu_has(X86_FEATURE_XSAVE));
+ TEST_REQUIRE(kvm_cpu_has(X86_FEATURE_AMX_TILE));
+ TEST_REQUIRE(kvm_cpu_has(X86_FEATURE_XTILECFG));
+ TEST_REQUIRE(kvm_cpu_has(X86_FEATURE_XTILEDATA));
+ TEST_REQUIRE(kvm_cpu_has(X86_FEATURE_XTILEDATA_XFD));
+
+ /* Create VM */
+ vm = vm_create_with_one_vcpu(&vcpu, guest_code);
+
+ TEST_ASSERT(kvm_cpu_has_p(X86_PROPERTY_XSTATE_MAX_SIZE),
+ "KVM should enumerate max XSAVE size when XSAVE is supported");
+ xsave_restore_size = kvm_cpu_property(X86_PROPERTY_XSTATE_MAX_SIZE);
+
+ vcpu_regs_get(vcpu, &regs1);
+
+ /* Register #NM handler */
+ vm_init_descriptor_tables(vm);
+ vcpu_init_descriptor_tables(vcpu);
+ vm_install_exception_handler(vm, NM_VECTOR, guest_nm_handler);
+
+ /* amx cfg for guest_code */
+ amx_cfg = vm_vaddr_alloc_page(vm);
+ memset(addr_gva2hva(vm, amx_cfg), 0x0, getpagesize());
+
+ /* amx tiledata for guest_code */
+ tiledata = vm_vaddr_alloc_pages(vm, 2);
+ memset(addr_gva2hva(vm, tiledata), rand() | 1, 2 * getpagesize());
+
+ /* XSAVE state for guest_code */
+ xstate = vm_vaddr_alloc_pages(vm, DIV_ROUND_UP(XSAVE_SIZE, PAGE_SIZE));
+ memset(addr_gva2hva(vm, xstate), 0, PAGE_SIZE * DIV_ROUND_UP(XSAVE_SIZE, PAGE_SIZE));
+ vcpu_args_set(vcpu, 3, amx_cfg, tiledata, xstate);
+
+ for (;;) {
+ vcpu_run(vcpu);
+ TEST_ASSERT_KVM_EXIT_REASON(vcpu, KVM_EXIT_IO);
+
+ switch (get_ucall(vcpu, &uc)) {
+ case UCALL_ABORT:
+ REPORT_GUEST_ASSERT(uc);
+ /* NOT REACHED */
+ case UCALL_SYNC:
+ switch (uc.args[1]) {
+ case 1:
+ case 2:
+ case 3:
+ case 5:
+ case 6:
+ case 7:
+ case 8:
+ fprintf(stderr, "GUEST_SYNC(%ld)\n", uc.args[1]);
+ break;
+ case 4:
+ case 10:
+ fprintf(stderr,
+ "GUEST_SYNC(%ld), check save/restore status\n", uc.args[1]);
+
+ /* Compacted mode, get amx offset by xsave area
+ * size subtract 8K amx size.
+ */
+ amx_offset = xsave_restore_size - NUM_TILES*TILE_SIZE;
+ state = vcpu_save_state(vcpu);
+ void *amx_start = (void *)state->xsave + amx_offset;
+ void *tiles_data = (void *)addr_gva2hva(vm, tiledata);
+ /* Only check TMM0 register, 1 tile */
+ ret = memcmp(amx_start, tiles_data, TILE_SIZE);
+ TEST_ASSERT(ret == 0, "memcmp failed, ret=%d", ret);
+ kvm_x86_state_cleanup(state);
+ break;
+ case 9:
+ fprintf(stderr,
+ "GUEST_SYNC(%ld), #NM exception and enable amx\n", uc.args[1]);
+ break;
+ }
+ break;
+ case UCALL_DONE:
+ fprintf(stderr, "UCALL_DONE\n");
+ goto done;
+ default:
+ TEST_FAIL("Unknown ucall %lu", uc.cmd);
+ }
+
+ state = vcpu_save_state(vcpu);
+ memset(&regs1, 0, sizeof(regs1));
+ vcpu_regs_get(vcpu, &regs1);
+
+ kvm_vm_release(vm);
+
+ /* Restore state in a new VM. */
+ vcpu = vm_recreate_with_one_vcpu(vm);
+ vcpu_load_state(vcpu, state);
+ kvm_x86_state_cleanup(state);
+
+ memset(&regs2, 0, sizeof(regs2));
+ vcpu_regs_get(vcpu, &regs2);
+ TEST_ASSERT(!memcmp(&regs1, &regs2, sizeof(regs2)),
+ "Unexpected register values after vcpu_load_state; rdi: %lx rsi: %lx",
+ (ulong) regs2.rdi, (ulong) regs2.rsi);
+ }
+done:
+ kvm_vm_free(vm);
+}
diff --git a/tools/testing/selftests/kvm/x86_64/cpuid_test.c b/tools/testing/selftests/kvm/x86_64/cpuid_test.c
new file mode 100644
index 000000000000..8c579ce714e9
--- /dev/null
+++ b/tools/testing/selftests/kvm/x86_64/cpuid_test.c
@@ -0,0 +1,208 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Copyright (C) 2021, Red Hat Inc.
+ *
+ * Generic tests for KVM CPUID set/get ioctls
+ */
+#include <asm/kvm_para.h>
+#include <linux/kvm_para.h>
+#include <stdint.h>
+
+#include "test_util.h"
+#include "kvm_util.h"
+#include "processor.h"
+
+/* CPUIDs known to differ */
+struct {
+ u32 function;
+ u32 index;
+} mangled_cpuids[] = {
+ /*
+ * These entries depend on the vCPU's XCR0 register and IA32_XSS MSR,
+ * which are not controlled for by this test.
+ */
+ {.function = 0xd, .index = 0},
+ {.function = 0xd, .index = 1},
+};
+
+static void test_guest_cpuids(struct kvm_cpuid2 *guest_cpuid)
+{
+ int i;
+ u32 eax, ebx, ecx, edx;
+
+ for (i = 0; i < guest_cpuid->nent; i++) {
+ __cpuid(guest_cpuid->entries[i].function,
+ guest_cpuid->entries[i].index,
+ &eax, &ebx, &ecx, &edx);
+
+ GUEST_ASSERT_EQ(eax, guest_cpuid->entries[i].eax);
+ GUEST_ASSERT_EQ(ebx, guest_cpuid->entries[i].ebx);
+ GUEST_ASSERT_EQ(ecx, guest_cpuid->entries[i].ecx);
+ GUEST_ASSERT_EQ(edx, guest_cpuid->entries[i].edx);
+ }
+
+}
+
+static void guest_main(struct kvm_cpuid2 *guest_cpuid)
+{
+ GUEST_SYNC(1);
+
+ test_guest_cpuids(guest_cpuid);
+
+ GUEST_SYNC(2);
+
+ GUEST_ASSERT_EQ(this_cpu_property(X86_PROPERTY_MAX_KVM_LEAF), 0x40000001);
+
+ GUEST_DONE();
+}
+
+static bool is_cpuid_mangled(const struct kvm_cpuid_entry2 *entrie)
+{
+ int i;
+
+ for (i = 0; i < sizeof(mangled_cpuids); i++) {
+ if (mangled_cpuids[i].function == entrie->function &&
+ mangled_cpuids[i].index == entrie->index)
+ return true;
+ }
+
+ return false;
+}
+
+static void compare_cpuids(const struct kvm_cpuid2 *cpuid1,
+ const struct kvm_cpuid2 *cpuid2)
+{
+ const struct kvm_cpuid_entry2 *e1, *e2;
+ int i;
+
+ TEST_ASSERT(cpuid1->nent == cpuid2->nent,
+ "CPUID nent mismatch: %d vs. %d", cpuid1->nent, cpuid2->nent);
+
+ for (i = 0; i < cpuid1->nent; i++) {
+ e1 = &cpuid1->entries[i];
+ e2 = &cpuid2->entries[i];
+
+ TEST_ASSERT(e1->function == e2->function &&
+ e1->index == e2->index && e1->flags == e2->flags,
+ "CPUID entries[%d] mismtach: 0x%x.%d.%x vs. 0x%x.%d.%x",
+ i, e1->function, e1->index, e1->flags,
+ e2->function, e2->index, e2->flags);
+
+ if (is_cpuid_mangled(e1))
+ continue;
+
+ TEST_ASSERT(e1->eax == e2->eax && e1->ebx == e2->ebx &&
+ e1->ecx == e2->ecx && e1->edx == e2->edx,
+ "CPUID 0x%x.%x differ: 0x%x:0x%x:0x%x:0x%x vs 0x%x:0x%x:0x%x:0x%x",
+ e1->function, e1->index,
+ e1->eax, e1->ebx, e1->ecx, e1->edx,
+ e2->eax, e2->ebx, e2->ecx, e2->edx);
+ }
+}
+
+static void run_vcpu(struct kvm_vcpu *vcpu, int stage)
+{
+ struct ucall uc;
+
+ vcpu_run(vcpu);
+
+ switch (get_ucall(vcpu, &uc)) {
+ case UCALL_SYNC:
+ TEST_ASSERT(!strcmp((const char *)uc.args[0], "hello") &&
+ uc.args[1] == stage + 1,
+ "Stage %d: Unexpected register values vmexit, got %lx",
+ stage + 1, (ulong)uc.args[1]);
+ return;
+ case UCALL_DONE:
+ return;
+ case UCALL_ABORT:
+ REPORT_GUEST_ASSERT(uc);
+ default:
+ TEST_ASSERT(false, "Unexpected exit: %s",
+ exit_reason_str(vcpu->run->exit_reason));
+ }
+}
+
+struct kvm_cpuid2 *vcpu_alloc_cpuid(struct kvm_vm *vm, vm_vaddr_t *p_gva, struct kvm_cpuid2 *cpuid)
+{
+ int size = sizeof(*cpuid) + cpuid->nent * sizeof(cpuid->entries[0]);
+ vm_vaddr_t gva = vm_vaddr_alloc(vm, size, KVM_UTIL_MIN_VADDR);
+ struct kvm_cpuid2 *guest_cpuids = addr_gva2hva(vm, gva);
+
+ memcpy(guest_cpuids, cpuid, size);
+
+ *p_gva = gva;
+ return guest_cpuids;
+}
+
+static void set_cpuid_after_run(struct kvm_vcpu *vcpu)
+{
+ struct kvm_cpuid_entry2 *ent;
+ int rc;
+ u32 eax, ebx, x;
+
+ /* Setting unmodified CPUID is allowed */
+ rc = __vcpu_set_cpuid(vcpu);
+ TEST_ASSERT(!rc, "Setting unmodified CPUID after KVM_RUN failed: %d", rc);
+
+ /* Changing CPU features is forbidden */
+ ent = vcpu_get_cpuid_entry(vcpu, 0x7);
+ ebx = ent->ebx;
+ ent->ebx--;
+ rc = __vcpu_set_cpuid(vcpu);
+ TEST_ASSERT(rc, "Changing CPU features should fail");
+ ent->ebx = ebx;
+
+ /* Changing MAXPHYADDR is forbidden */
+ ent = vcpu_get_cpuid_entry(vcpu, 0x80000008);
+ eax = ent->eax;
+ x = eax & 0xff;
+ ent->eax = (eax & ~0xffu) | (x - 1);
+ rc = __vcpu_set_cpuid(vcpu);
+ TEST_ASSERT(rc, "Changing MAXPHYADDR should fail");
+ ent->eax = eax;
+}
+
+static void test_get_cpuid2(struct kvm_vcpu *vcpu)
+{
+ struct kvm_cpuid2 *cpuid = allocate_kvm_cpuid2(vcpu->cpuid->nent + 1);
+ int i, r;
+
+ vcpu_ioctl(vcpu, KVM_GET_CPUID2, cpuid);
+ TEST_ASSERT(cpuid->nent == vcpu->cpuid->nent,
+ "KVM didn't update nent on success, wanted %u, got %u",
+ vcpu->cpuid->nent, cpuid->nent);
+
+ for (i = 0; i < vcpu->cpuid->nent; i++) {
+ cpuid->nent = i;
+ r = __vcpu_ioctl(vcpu, KVM_GET_CPUID2, cpuid);
+ TEST_ASSERT(r && errno == E2BIG, KVM_IOCTL_ERROR(KVM_GET_CPUID2, r));
+ TEST_ASSERT(cpuid->nent == i, "KVM modified nent on failure");
+ }
+ free(cpuid);
+}
+
+int main(void)
+{
+ struct kvm_vcpu *vcpu;
+ vm_vaddr_t cpuid_gva;
+ struct kvm_vm *vm;
+ int stage;
+
+ vm = vm_create_with_one_vcpu(&vcpu, guest_main);
+
+ compare_cpuids(kvm_get_supported_cpuid(), vcpu->cpuid);
+
+ vcpu_alloc_cpuid(vm, &cpuid_gva, vcpu->cpuid);
+
+ vcpu_args_set(vcpu, 1, cpuid_gva);
+
+ for (stage = 0; stage < 3; stage++)
+ run_vcpu(vcpu, stage);
+
+ set_cpuid_after_run(vcpu);
+
+ test_get_cpuid2(vcpu);
+
+ kvm_vm_free(vm);
+}
diff --git a/tools/testing/selftests/kvm/x86_64/cr4_cpuid_sync_test.c b/tools/testing/selftests/kvm/x86_64/cr4_cpuid_sync_test.c
index 140e91901582..624dc725e14d 100644
--- a/tools/testing/selftests/kvm/x86_64/cr4_cpuid_sync_test.c
+++ b/tools/testing/selftests/kvm/x86_64/cr4_cpuid_sync_test.c
@@ -19,25 +19,11 @@
#include "kvm_util.h"
#include "processor.h"
-#define X86_FEATURE_XSAVE (1<<26)
-#define X86_FEATURE_OSXSAVE (1<<27)
-#define VCPU_ID 1
-
static inline bool cr4_cpuid_is_sync(void)
{
- int func, subfunc;
- uint32_t eax, ebx, ecx, edx;
- uint64_t cr4;
-
- func = 0x1;
- subfunc = 0x0;
- __asm__ __volatile__("cpuid"
- : "=a"(eax), "=b"(ebx), "=c"(ecx), "=d"(edx)
- : "a"(func), "c"(subfunc));
+ uint64_t cr4 = get_cr4();
- cr4 = get_cr4();
-
- return (!!(ecx & X86_FEATURE_OSXSAVE)) == (!!(cr4 & X86_CR4_OSXSAVE));
+ return (this_cpu_has(X86_FEATURE_OSXSAVE) == !!(cr4 & X86_CR4_OSXSAVE));
}
static void guest_code(void)
@@ -63,45 +49,28 @@ static void guest_code(void)
int main(int argc, char *argv[])
{
- struct kvm_run *run;
+ struct kvm_vcpu *vcpu;
struct kvm_vm *vm;
struct kvm_sregs sregs;
- struct kvm_cpuid_entry2 *entry;
struct ucall uc;
- int rc;
- entry = kvm_get_supported_cpuid_entry(1);
- if (!(entry->ecx & X86_FEATURE_XSAVE)) {
- print_skip("XSAVE feature not supported");
- return 0;
- }
+ TEST_REQUIRE(kvm_cpu_has(X86_FEATURE_XSAVE));
- /* Tell stdout not to buffer its content */
- setbuf(stdout, NULL);
-
- /* Create VM */
- vm = vm_create_default(VCPU_ID, 0, guest_code);
- vcpu_set_cpuid(vm, VCPU_ID, kvm_get_supported_cpuid());
- run = vcpu_state(vm, VCPU_ID);
+ vm = vm_create_with_one_vcpu(&vcpu, guest_code);
while (1) {
- rc = _vcpu_run(vm, VCPU_ID);
-
- TEST_ASSERT(rc == 0, "vcpu_run failed: %d\n", rc);
- TEST_ASSERT(run->exit_reason == KVM_EXIT_IO,
- "Unexpected exit reason: %u (%s),\n",
- run->exit_reason,
- exit_reason_str(run->exit_reason));
+ vcpu_run(vcpu);
+ TEST_ASSERT_KVM_EXIT_REASON(vcpu, KVM_EXIT_IO);
- switch (get_ucall(vm, VCPU_ID, &uc)) {
+ switch (get_ucall(vcpu, &uc)) {
case UCALL_SYNC:
/* emulate hypervisor clearing CR4.OSXSAVE */
- vcpu_sregs_get(vm, VCPU_ID, &sregs);
+ vcpu_sregs_get(vcpu, &sregs);
sregs.cr4 &= ~X86_CR4_OSXSAVE;
- vcpu_sregs_set(vm, VCPU_ID, &sregs);
+ vcpu_sregs_set(vcpu, &sregs);
break;
case UCALL_ABORT:
- TEST_FAIL("Guest CR4 bit (OSXSAVE) unsynchronized with CPUID bit.");
+ REPORT_GUEST_ASSERT(uc);
break;
case UCALL_DONE:
goto done;
@@ -110,8 +79,7 @@ int main(int argc, char *argv[])
}
}
- kvm_vm_free(vm);
-
done:
+ kvm_vm_free(vm);
return 0;
}
diff --git a/tools/testing/selftests/kvm/x86_64/debug_regs.c b/tools/testing/selftests/kvm/x86_64/debug_regs.c
index 8162c58a1234..f6b295e0b2d2 100644
--- a/tools/testing/selftests/kvm/x86_64/debug_regs.c
+++ b/tools/testing/selftests/kvm/x86_64/debug_regs.c
@@ -8,12 +8,13 @@
#include <string.h>
#include "kvm_util.h"
#include "processor.h"
-
-#define VCPU_ID 0
+#include "apic.h"
#define DR6_BD (1 << 13)
#define DR7_GD (1 << 13)
+#define IRQ_VECTOR 0xAA
+
/* For testing data access debug BP */
uint32_t guest_value;
@@ -21,6 +22,11 @@ extern unsigned char sw_bp, hw_bp, write_data, ss_start, bd_start;
static void guest_code(void)
{
+ /* Create a pending interrupt on current vCPU */
+ x2apic_enable();
+ x2apic_write_reg(APIC_ICR, APIC_DEST_SELF | APIC_INT_ASSERT |
+ APIC_DM_FIXED | IRQ_VECTOR);
+
/*
* Software BP tests.
*
@@ -38,77 +44,83 @@ static void guest_code(void)
"mov %%rax,%0;\n\t write_data:"
: "=m" (guest_value) : : "rax");
- /* Single step test, covers 2 basic instructions and 2 emulated */
+ /*
+ * Single step test, covers 2 basic instructions and 2 emulated
+ *
+ * Enable interrupts during the single stepping to see that
+ * pending interrupt we raised is not handled due to KVM_GUESTDBG_BLOCKIRQ
+ */
asm volatile("ss_start: "
- "xor %%rax,%%rax\n\t"
+ "sti\n\t"
+ "xor %%eax,%%eax\n\t"
"cpuid\n\t"
"movl $0x1a0,%%ecx\n\t"
"rdmsr\n\t"
- : : : "rax", "ecx");
+ "cli\n\t"
+ : : : "eax", "ebx", "ecx", "edx");
/* DR6.BD test */
asm volatile("bd_start: mov %%dr0, %%rax" : : : "rax");
GUEST_DONE();
}
-#define CLEAR_DEBUG() memset(&debug, 0, sizeof(debug))
-#define APPLY_DEBUG() vcpu_set_guest_debug(vm, VCPU_ID, &debug)
#define CAST_TO_RIP(v) ((unsigned long long)&(v))
-#define SET_RIP(v) do { \
- vcpu_regs_get(vm, VCPU_ID, &regs); \
- regs.rip = (v); \
- vcpu_regs_set(vm, VCPU_ID, &regs); \
- } while (0)
-#define MOVE_RIP(v) SET_RIP(regs.rip + (v));
+
+static void vcpu_skip_insn(struct kvm_vcpu *vcpu, int insn_len)
+{
+ struct kvm_regs regs;
+
+ vcpu_regs_get(vcpu, &regs);
+ regs.rip += insn_len;
+ vcpu_regs_set(vcpu, &regs);
+}
int main(void)
{
struct kvm_guest_debug debug;
unsigned long long target_dr6, target_rip;
- struct kvm_regs regs;
+ struct kvm_vcpu *vcpu;
struct kvm_run *run;
struct kvm_vm *vm;
struct ucall uc;
uint64_t cmd;
int i;
/* Instruction lengths starting at ss_start */
- int ss_size[4] = {
- 3, /* xor */
+ int ss_size[6] = {
+ 1, /* sti*/
+ 2, /* xor */
2, /* cpuid */
5, /* mov */
2, /* rdmsr */
+ 1, /* cli */
};
- if (!kvm_check_cap(KVM_CAP_SET_GUEST_DEBUG)) {
- print_skip("KVM_CAP_SET_GUEST_DEBUG not supported");
- return 0;
- }
+ TEST_REQUIRE(kvm_has_cap(KVM_CAP_SET_GUEST_DEBUG));
- vm = vm_create_default(VCPU_ID, 0, guest_code);
- vcpu_set_cpuid(vm, VCPU_ID, kvm_get_supported_cpuid());
- run = vcpu_state(vm, VCPU_ID);
+ vm = vm_create_with_one_vcpu(&vcpu, guest_code);
+ run = vcpu->run;
/* Test software BPs - int3 */
- CLEAR_DEBUG();
+ memset(&debug, 0, sizeof(debug));
debug.control = KVM_GUESTDBG_ENABLE | KVM_GUESTDBG_USE_SW_BP;
- APPLY_DEBUG();
- vcpu_run(vm, VCPU_ID);
+ vcpu_guest_debug_set(vcpu, &debug);
+ vcpu_run(vcpu);
TEST_ASSERT(run->exit_reason == KVM_EXIT_DEBUG &&
run->debug.arch.exception == BP_VECTOR &&
run->debug.arch.pc == CAST_TO_RIP(sw_bp),
"INT3: exit %d exception %d rip 0x%llx (should be 0x%llx)",
run->exit_reason, run->debug.arch.exception,
run->debug.arch.pc, CAST_TO_RIP(sw_bp));
- MOVE_RIP(1);
+ vcpu_skip_insn(vcpu, 1);
/* Test instruction HW BP over DR[0-3] */
for (i = 0; i < 4; i++) {
- CLEAR_DEBUG();
+ memset(&debug, 0, sizeof(debug));
debug.control = KVM_GUESTDBG_ENABLE | KVM_GUESTDBG_USE_HW_BP;
debug.arch.debugreg[i] = CAST_TO_RIP(hw_bp);
debug.arch.debugreg[7] = 0x400 | (1UL << (2*i+1));
- APPLY_DEBUG();
- vcpu_run(vm, VCPU_ID);
+ vcpu_guest_debug_set(vcpu, &debug);
+ vcpu_run(vcpu);
target_dr6 = 0xffff0ff0 | (1UL << i);
TEST_ASSERT(run->exit_reason == KVM_EXIT_DEBUG &&
run->debug.arch.exception == DB_VECTOR &&
@@ -121,17 +133,17 @@ int main(void)
run->debug.arch.dr6, target_dr6);
}
/* Skip "nop" */
- MOVE_RIP(1);
+ vcpu_skip_insn(vcpu, 1);
/* Test data access HW BP over DR[0-3] */
for (i = 0; i < 4; i++) {
- CLEAR_DEBUG();
+ memset(&debug, 0, sizeof(debug));
debug.control = KVM_GUESTDBG_ENABLE | KVM_GUESTDBG_USE_HW_BP;
debug.arch.debugreg[i] = CAST_TO_RIP(guest_value);
debug.arch.debugreg[7] = 0x00000400 | (1UL << (2*i+1)) |
(0x000d0000UL << (4*i));
- APPLY_DEBUG();
- vcpu_run(vm, VCPU_ID);
+ vcpu_guest_debug_set(vcpu, &debug);
+ vcpu_run(vcpu);
target_dr6 = 0xffff0ff0 | (1UL << i);
TEST_ASSERT(run->exit_reason == KVM_EXIT_DEBUG &&
run->debug.arch.exception == DB_VECTOR &&
@@ -143,22 +155,22 @@ int main(void)
run->debug.arch.pc, CAST_TO_RIP(write_data),
run->debug.arch.dr6, target_dr6);
/* Rollback the 4-bytes "mov" */
- MOVE_RIP(-7);
+ vcpu_skip_insn(vcpu, -7);
}
/* Skip the 4-bytes "mov" */
- MOVE_RIP(7);
+ vcpu_skip_insn(vcpu, 7);
/* Test single step */
target_rip = CAST_TO_RIP(ss_start);
target_dr6 = 0xffff4ff0ULL;
- vcpu_regs_get(vm, VCPU_ID, &regs);
for (i = 0; i < (sizeof(ss_size) / sizeof(ss_size[0])); i++) {
target_rip += ss_size[i];
- CLEAR_DEBUG();
- debug.control = KVM_GUESTDBG_ENABLE | KVM_GUESTDBG_SINGLESTEP;
+ memset(&debug, 0, sizeof(debug));
+ debug.control = KVM_GUESTDBG_ENABLE | KVM_GUESTDBG_SINGLESTEP |
+ KVM_GUESTDBG_BLOCKIRQ;
debug.arch.debugreg[7] = 0x00000400;
- APPLY_DEBUG();
- vcpu_run(vm, VCPU_ID);
+ vcpu_guest_debug_set(vcpu, &debug);
+ vcpu_run(vcpu);
TEST_ASSERT(run->exit_reason == KVM_EXIT_DEBUG &&
run->debug.arch.exception == DB_VECTOR &&
run->debug.arch.pc == target_rip &&
@@ -171,11 +183,11 @@ int main(void)
}
/* Finally test global disable */
- CLEAR_DEBUG();
+ memset(&debug, 0, sizeof(debug));
debug.control = KVM_GUESTDBG_ENABLE | KVM_GUESTDBG_USE_HW_BP;
debug.arch.debugreg[7] = 0x400 | DR7_GD;
- APPLY_DEBUG();
- vcpu_run(vm, VCPU_ID);
+ vcpu_guest_debug_set(vcpu, &debug);
+ vcpu_run(vcpu);
target_dr6 = 0xffff0ff0 | DR6_BD;
TEST_ASSERT(run->exit_reason == KVM_EXIT_DEBUG &&
run->debug.arch.exception == DB_VECTOR &&
@@ -188,12 +200,12 @@ int main(void)
target_dr6);
/* Disable all debug controls, run to the end */
- CLEAR_DEBUG();
- APPLY_DEBUG();
+ memset(&debug, 0, sizeof(debug));
+ vcpu_guest_debug_set(vcpu, &debug);
- vcpu_run(vm, VCPU_ID);
- TEST_ASSERT(run->exit_reason == KVM_EXIT_IO, "KVM_EXIT_IO");
- cmd = get_ucall(vm, VCPU_ID, &uc);
+ vcpu_run(vcpu);
+ TEST_ASSERT_KVM_EXIT_REASON(vcpu, KVM_EXIT_IO);
+ cmd = get_ucall(vcpu, &uc);
TEST_ASSERT(cmd == UCALL_DONE, "UCALL_DONE");
kvm_vm_free(vm);
diff --git a/tools/testing/selftests/kvm/x86_64/dirty_log_page_splitting_test.c b/tools/testing/selftests/kvm/x86_64/dirty_log_page_splitting_test.c
new file mode 100644
index 000000000000..ee3b384b991c
--- /dev/null
+++ b/tools/testing/selftests/kvm/x86_64/dirty_log_page_splitting_test.c
@@ -0,0 +1,262 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * KVM dirty logging page splitting test
+ *
+ * Based on dirty_log_perf.c
+ *
+ * Copyright (C) 2018, Red Hat, Inc.
+ * Copyright (C) 2023, Google, Inc.
+ */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <pthread.h>
+#include <linux/bitmap.h>
+
+#include "kvm_util.h"
+#include "test_util.h"
+#include "memstress.h"
+#include "guest_modes.h"
+
+#define VCPUS 2
+#define SLOTS 2
+#define ITERATIONS 2
+
+static uint64_t guest_percpu_mem_size = DEFAULT_PER_VCPU_MEM_SIZE;
+
+static enum vm_mem_backing_src_type backing_src = VM_MEM_SRC_ANONYMOUS_HUGETLB;
+
+static u64 dirty_log_manual_caps;
+static bool host_quit;
+static int iteration;
+static int vcpu_last_completed_iteration[KVM_MAX_VCPUS];
+
+struct kvm_page_stats {
+ uint64_t pages_4k;
+ uint64_t pages_2m;
+ uint64_t pages_1g;
+ uint64_t hugepages;
+};
+
+static void get_page_stats(struct kvm_vm *vm, struct kvm_page_stats *stats, const char *stage)
+{
+ stats->pages_4k = vm_get_stat(vm, "pages_4k");
+ stats->pages_2m = vm_get_stat(vm, "pages_2m");
+ stats->pages_1g = vm_get_stat(vm, "pages_1g");
+ stats->hugepages = stats->pages_2m + stats->pages_1g;
+
+ pr_debug("\nPage stats after %s: 4K: %ld 2M: %ld 1G: %ld huge: %ld\n",
+ stage, stats->pages_4k, stats->pages_2m, stats->pages_1g,
+ stats->hugepages);
+}
+
+static void run_vcpu_iteration(struct kvm_vm *vm)
+{
+ int i;
+
+ iteration++;
+ for (i = 0; i < VCPUS; i++) {
+ while (READ_ONCE(vcpu_last_completed_iteration[i]) !=
+ iteration)
+ ;
+ }
+}
+
+static void vcpu_worker(struct memstress_vcpu_args *vcpu_args)
+{
+ struct kvm_vcpu *vcpu = vcpu_args->vcpu;
+ int vcpu_idx = vcpu_args->vcpu_idx;
+
+ while (!READ_ONCE(host_quit)) {
+ int current_iteration = READ_ONCE(iteration);
+
+ vcpu_run(vcpu);
+
+ TEST_ASSERT_EQ(get_ucall(vcpu, NULL), UCALL_SYNC);
+
+ vcpu_last_completed_iteration[vcpu_idx] = current_iteration;
+
+ /* Wait for the start of the next iteration to be signaled. */
+ while (current_iteration == READ_ONCE(iteration) &&
+ READ_ONCE(iteration) >= 0 &&
+ !READ_ONCE(host_quit))
+ ;
+ }
+}
+
+static void run_test(enum vm_guest_mode mode, void *unused)
+{
+ struct kvm_vm *vm;
+ unsigned long **bitmaps;
+ uint64_t guest_num_pages;
+ uint64_t host_num_pages;
+ uint64_t pages_per_slot;
+ int i;
+ struct kvm_page_stats stats_populated;
+ struct kvm_page_stats stats_dirty_logging_enabled;
+ struct kvm_page_stats stats_dirty_pass[ITERATIONS];
+ struct kvm_page_stats stats_clear_pass[ITERATIONS];
+ struct kvm_page_stats stats_dirty_logging_disabled;
+ struct kvm_page_stats stats_repopulated;
+
+ vm = memstress_create_vm(mode, VCPUS, guest_percpu_mem_size,
+ SLOTS, backing_src, false);
+
+ guest_num_pages = (VCPUS * guest_percpu_mem_size) >> vm->page_shift;
+ guest_num_pages = vm_adjust_num_guest_pages(mode, guest_num_pages);
+ host_num_pages = vm_num_host_pages(mode, guest_num_pages);
+ pages_per_slot = host_num_pages / SLOTS;
+ TEST_ASSERT_EQ(host_num_pages, pages_per_slot * SLOTS);
+ TEST_ASSERT(!(host_num_pages % 512),
+ "Number of pages, '%lu' not a multiple of 2MiB", host_num_pages);
+
+ bitmaps = memstress_alloc_bitmaps(SLOTS, pages_per_slot);
+
+ if (dirty_log_manual_caps)
+ vm_enable_cap(vm, KVM_CAP_MANUAL_DIRTY_LOG_PROTECT2,
+ dirty_log_manual_caps);
+
+ /* Start the iterations */
+ iteration = -1;
+ host_quit = false;
+
+ for (i = 0; i < VCPUS; i++)
+ vcpu_last_completed_iteration[i] = -1;
+
+ memstress_start_vcpu_threads(VCPUS, vcpu_worker);
+
+ run_vcpu_iteration(vm);
+ get_page_stats(vm, &stats_populated, "populating memory");
+
+ /* Enable dirty logging */
+ memstress_enable_dirty_logging(vm, SLOTS);
+
+ get_page_stats(vm, &stats_dirty_logging_enabled, "enabling dirty logging");
+
+ while (iteration < ITERATIONS) {
+ run_vcpu_iteration(vm);
+ get_page_stats(vm, &stats_dirty_pass[iteration - 1],
+ "dirtying memory");
+
+ memstress_get_dirty_log(vm, bitmaps, SLOTS);
+
+ if (dirty_log_manual_caps) {
+ memstress_clear_dirty_log(vm, bitmaps, SLOTS, pages_per_slot);
+
+ get_page_stats(vm, &stats_clear_pass[iteration - 1], "clearing dirty log");
+ }
+ }
+
+ /* Disable dirty logging */
+ memstress_disable_dirty_logging(vm, SLOTS);
+
+ get_page_stats(vm, &stats_dirty_logging_disabled, "disabling dirty logging");
+
+ /* Run vCPUs again to fault pages back in. */
+ run_vcpu_iteration(vm);
+ get_page_stats(vm, &stats_repopulated, "repopulating memory");
+
+ /*
+ * Tell the vCPU threads to quit. No need to manually check that vCPUs
+ * have stopped running after disabling dirty logging, the join will
+ * wait for them to exit.
+ */
+ host_quit = true;
+ memstress_join_vcpu_threads(VCPUS);
+
+ memstress_free_bitmaps(bitmaps, SLOTS);
+ memstress_destroy_vm(vm);
+
+ TEST_ASSERT_EQ((stats_populated.pages_2m * 512 +
+ stats_populated.pages_1g * 512 * 512), host_num_pages);
+
+ /*
+ * Check that all huge pages were split. Since large pages can only
+ * exist in the data slot, and the vCPUs should have dirtied all pages
+ * in the data slot, there should be no huge pages left after splitting.
+ * Splitting happens at dirty log enable time without
+ * KVM_CAP_MANUAL_DIRTY_LOG_PROTECT2 and after the first clear pass
+ * with that capability.
+ */
+ if (dirty_log_manual_caps) {
+ TEST_ASSERT_EQ(stats_clear_pass[0].hugepages, 0);
+ TEST_ASSERT(stats_clear_pass[0].pages_4k >= host_num_pages,
+ "Expected at least '%lu' 4KiB pages, found only '%lu'",
+ host_num_pages, stats_clear_pass[0].pages_4k);
+ TEST_ASSERT_EQ(stats_dirty_logging_enabled.hugepages, stats_populated.hugepages);
+ } else {
+ TEST_ASSERT_EQ(stats_dirty_logging_enabled.hugepages, 0);
+ TEST_ASSERT(stats_dirty_logging_enabled.pages_4k >= host_num_pages,
+ "Expected at least '%lu' 4KiB pages, found only '%lu'",
+ host_num_pages, stats_dirty_logging_enabled.pages_4k);
+ }
+
+ /*
+ * Once dirty logging is disabled and the vCPUs have touched all their
+ * memory again, the hugepage counts should be the same as they were
+ * right after initial population of memory.
+ */
+ TEST_ASSERT_EQ(stats_populated.pages_2m, stats_repopulated.pages_2m);
+ TEST_ASSERT_EQ(stats_populated.pages_1g, stats_repopulated.pages_1g);
+}
+
+static void help(char *name)
+{
+ puts("");
+ printf("usage: %s [-h] [-b vcpu bytes] [-s mem type]\n",
+ name);
+ puts("");
+ printf(" -b: specify the size of the memory region which should be\n"
+ " dirtied by each vCPU. e.g. 10M or 3G.\n"
+ " (default: 1G)\n");
+ backing_src_help("-s");
+ puts("");
+}
+
+int main(int argc, char *argv[])
+{
+ int opt;
+
+ TEST_REQUIRE(get_kvm_param_bool("eager_page_split"));
+ TEST_REQUIRE(get_kvm_param_bool("tdp_mmu"));
+
+ while ((opt = getopt(argc, argv, "b:hs:")) != -1) {
+ switch (opt) {
+ case 'b':
+ guest_percpu_mem_size = parse_size(optarg);
+ break;
+ case 'h':
+ help(argv[0]);
+ exit(0);
+ case 's':
+ backing_src = parse_backing_src_type(optarg);
+ break;
+ default:
+ help(argv[0]);
+ exit(1);
+ }
+ }
+
+ if (!is_backing_src_hugetlb(backing_src)) {
+ pr_info("This test will only work reliably with HugeTLB memory. "
+ "It can work with THP, but that is best effort.\n");
+ }
+
+ guest_modes_append_default();
+
+ dirty_log_manual_caps = 0;
+ for_each_guest_mode(run_test, NULL);
+
+ dirty_log_manual_caps =
+ kvm_check_cap(KVM_CAP_MANUAL_DIRTY_LOG_PROTECT2);
+
+ if (dirty_log_manual_caps) {
+ dirty_log_manual_caps &= (KVM_DIRTY_LOG_MANUAL_PROTECT_ENABLE |
+ KVM_DIRTY_LOG_INITIALLY_SET);
+ for_each_guest_mode(run_test, NULL);
+ } else {
+ pr_info("Skipping testing with MANUAL_PROTECT as it is not supported");
+ }
+
+ return 0;
+}
diff --git a/tools/testing/selftests/kvm/x86_64/evmcs_test.c b/tools/testing/selftests/kvm/x86_64/evmcs_test.c
deleted file mode 100644
index 757928199f19..000000000000
--- a/tools/testing/selftests/kvm/x86_64/evmcs_test.c
+++ /dev/null
@@ -1,166 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-/*
- * Copyright (C) 2018, Red Hat, Inc.
- *
- * Tests for Enlightened VMCS, including nested guest state.
- */
-#define _GNU_SOURCE /* for program_invocation_short_name */
-#include <fcntl.h>
-#include <stdio.h>
-#include <stdlib.h>
-#include <string.h>
-#include <sys/ioctl.h>
-
-#include "test_util.h"
-
-#include "kvm_util.h"
-
-#include "vmx.h"
-
-#define VCPU_ID 5
-
-void l2_guest_code(void)
-{
- GUEST_SYNC(7);
-
- GUEST_SYNC(8);
-
- /* Done, exit to L1 and never come back. */
- vmcall();
-}
-
-void l1_guest_code(struct vmx_pages *vmx_pages)
-{
-#define L2_GUEST_STACK_SIZE 64
- unsigned long l2_guest_stack[L2_GUEST_STACK_SIZE];
-
- enable_vp_assist(vmx_pages->vp_assist_gpa, vmx_pages->vp_assist);
-
- GUEST_ASSERT(vmx_pages->vmcs_gpa);
- GUEST_ASSERT(prepare_for_vmx_operation(vmx_pages));
- GUEST_SYNC(3);
- GUEST_ASSERT(load_vmcs(vmx_pages));
- GUEST_ASSERT(vmptrstz() == vmx_pages->enlightened_vmcs_gpa);
-
- GUEST_SYNC(4);
- GUEST_ASSERT(vmptrstz() == vmx_pages->enlightened_vmcs_gpa);
-
- prepare_vmcs(vmx_pages, l2_guest_code,
- &l2_guest_stack[L2_GUEST_STACK_SIZE]);
-
- GUEST_SYNC(5);
- GUEST_ASSERT(vmptrstz() == vmx_pages->enlightened_vmcs_gpa);
- current_evmcs->revision_id = -1u;
- GUEST_ASSERT(vmlaunch());
- current_evmcs->revision_id = EVMCS_VERSION;
- GUEST_SYNC(6);
-
- GUEST_ASSERT(!vmlaunch());
- GUEST_ASSERT(vmptrstz() == vmx_pages->enlightened_vmcs_gpa);
- GUEST_SYNC(9);
- GUEST_ASSERT(!vmresume());
- GUEST_ASSERT(vmreadz(VM_EXIT_REASON) == EXIT_REASON_VMCALL);
- GUEST_SYNC(10);
-}
-
-void guest_code(struct vmx_pages *vmx_pages)
-{
- GUEST_SYNC(1);
- GUEST_SYNC(2);
-
- if (vmx_pages)
- l1_guest_code(vmx_pages);
-
- GUEST_DONE();
-
- /* Try enlightened vmptrld with an incorrect GPA */
- evmcs_vmptrld(0xdeadbeef, vmx_pages->enlightened_vmcs);
- GUEST_ASSERT(vmlaunch());
-}
-
-int main(int argc, char *argv[])
-{
- vm_vaddr_t vmx_pages_gva = 0;
-
- struct kvm_regs regs1, regs2;
- struct kvm_vm *vm;
- struct kvm_run *run;
- struct kvm_x86_state *state;
- struct ucall uc;
- int stage;
-
- /* Create VM */
- vm = vm_create_default(VCPU_ID, 0, guest_code);
-
- vcpu_set_cpuid(vm, VCPU_ID, kvm_get_supported_cpuid());
-
- if (!nested_vmx_supported() ||
- !kvm_check_cap(KVM_CAP_NESTED_STATE) ||
- !kvm_check_cap(KVM_CAP_HYPERV_ENLIGHTENED_VMCS)) {
- print_skip("Enlightened VMCS is unsupported");
- exit(KSFT_SKIP);
- }
-
- vcpu_enable_evmcs(vm, VCPU_ID);
-
- run = vcpu_state(vm, VCPU_ID);
-
- vcpu_regs_get(vm, VCPU_ID, &regs1);
-
- vcpu_alloc_vmx(vm, &vmx_pages_gva);
- vcpu_args_set(vm, VCPU_ID, 1, vmx_pages_gva);
-
- for (stage = 1;; stage++) {
- _vcpu_run(vm, VCPU_ID);
- TEST_ASSERT(run->exit_reason == KVM_EXIT_IO,
- "Stage %d: unexpected exit reason: %u (%s),\n",
- stage, run->exit_reason,
- exit_reason_str(run->exit_reason));
-
- switch (get_ucall(vm, VCPU_ID, &uc)) {
- case UCALL_ABORT:
- TEST_FAIL("%s at %s:%ld", (const char *)uc.args[0],
- __FILE__, uc.args[1]);
- /* NOT REACHED */
- case UCALL_SYNC:
- break;
- case UCALL_DONE:
- goto part1_done;
- default:
- TEST_FAIL("Unknown ucall %lu", uc.cmd);
- }
-
- /* UCALL_SYNC is handled here. */
- TEST_ASSERT(!strcmp((const char *)uc.args[0], "hello") &&
- uc.args[1] == stage, "Stage %d: Unexpected register values vmexit, got %lx",
- stage, (ulong)uc.args[1]);
-
- state = vcpu_save_state(vm, VCPU_ID);
- memset(&regs1, 0, sizeof(regs1));
- vcpu_regs_get(vm, VCPU_ID, &regs1);
-
- kvm_vm_release(vm);
-
- /* Restore state in a new VM. */
- kvm_vm_restart(vm, O_RDWR);
- vm_vcpu_add(vm, VCPU_ID);
- vcpu_set_cpuid(vm, VCPU_ID, kvm_get_supported_cpuid());
- vcpu_enable_evmcs(vm, VCPU_ID);
- vcpu_load_state(vm, VCPU_ID, state);
- run = vcpu_state(vm, VCPU_ID);
- free(state);
-
- memset(&regs2, 0, sizeof(regs2));
- vcpu_regs_get(vm, VCPU_ID, &regs2);
- TEST_ASSERT(!memcmp(&regs1, &regs2, sizeof(regs2)),
- "Unexpected register values after vcpu_load_state; rdi: %lx rsi: %lx",
- (ulong) regs2.rdi, (ulong) regs2.rsi);
- }
-
-part1_done:
- _vcpu_run(vm, VCPU_ID);
- TEST_ASSERT(run->exit_reason == KVM_EXIT_SHUTDOWN,
- "Unexpected successful VMEnter with invalid eVMCS pointer!");
-
- kvm_vm_free(vm);
-}
diff --git a/tools/testing/selftests/kvm/x86_64/exit_on_emulation_failure_test.c b/tools/testing/selftests/kvm/x86_64/exit_on_emulation_failure_test.c
new file mode 100644
index 000000000000..6c2e5e0ceb1f
--- /dev/null
+++ b/tools/testing/selftests/kvm/x86_64/exit_on_emulation_failure_test.c
@@ -0,0 +1,42 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright (C) 2022, Google LLC.
+ *
+ * Test for KVM_CAP_EXIT_ON_EMULATION_FAILURE.
+ */
+
+#define _GNU_SOURCE /* for program_invocation_short_name */
+
+#include "flds_emulation.h"
+
+#include "test_util.h"
+
+#define MMIO_GPA 0x700000000
+#define MMIO_GVA MMIO_GPA
+
+static void guest_code(void)
+{
+ /* Execute flds with an MMIO address to force KVM to emulate it. */
+ flds(MMIO_GVA);
+ GUEST_DONE();
+}
+
+int main(int argc, char *argv[])
+{
+ struct kvm_vcpu *vcpu;
+ struct kvm_vm *vm;
+
+ TEST_REQUIRE(kvm_has_cap(KVM_CAP_EXIT_ON_EMULATION_FAILURE));
+
+ vm = vm_create_with_one_vcpu(&vcpu, guest_code);
+ vm_enable_cap(vm, KVM_CAP_EXIT_ON_EMULATION_FAILURE, 1);
+ virt_map(vm, MMIO_GVA, MMIO_GPA, 1);
+
+ vcpu_run(vcpu);
+ handle_flds_emulation_failure_exit(vcpu);
+ vcpu_run(vcpu);
+ TEST_ASSERT_EQ(get_ucall(vcpu, NULL), UCALL_DONE);
+
+ kvm_vm_free(vm);
+ return 0;
+}
diff --git a/tools/testing/selftests/kvm/x86_64/fix_hypercall_test.c b/tools/testing/selftests/kvm/x86_64/fix_hypercall_test.c
new file mode 100644
index 000000000000..f3c2239228b1
--- /dev/null
+++ b/tools/testing/selftests/kvm/x86_64/fix_hypercall_test.c
@@ -0,0 +1,144 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Copyright (C) 2020, Google LLC.
+ *
+ * Tests for KVM paravirtual feature disablement
+ */
+#include <asm/kvm_para.h>
+#include <linux/kvm_para.h>
+#include <linux/stringify.h>
+#include <stdint.h>
+
+#include "kvm_test_harness.h"
+#include "apic.h"
+#include "test_util.h"
+#include "kvm_util.h"
+#include "processor.h"
+
+/* VMCALL and VMMCALL are both 3-byte opcodes. */
+#define HYPERCALL_INSN_SIZE 3
+
+static bool quirk_disabled;
+
+static void guest_ud_handler(struct ex_regs *regs)
+{
+ regs->rax = -EFAULT;
+ regs->rip += HYPERCALL_INSN_SIZE;
+}
+
+static const uint8_t vmx_vmcall[HYPERCALL_INSN_SIZE] = { 0x0f, 0x01, 0xc1 };
+static const uint8_t svm_vmmcall[HYPERCALL_INSN_SIZE] = { 0x0f, 0x01, 0xd9 };
+
+extern uint8_t hypercall_insn[HYPERCALL_INSN_SIZE];
+static uint64_t do_sched_yield(uint8_t apic_id)
+{
+ uint64_t ret;
+
+ asm volatile("hypercall_insn:\n\t"
+ ".byte 0xcc,0xcc,0xcc\n\t"
+ : "=a"(ret)
+ : "a"((uint64_t)KVM_HC_SCHED_YIELD), "b"((uint64_t)apic_id)
+ : "memory");
+
+ return ret;
+}
+
+static void guest_main(void)
+{
+ const uint8_t *native_hypercall_insn;
+ const uint8_t *other_hypercall_insn;
+ uint64_t ret;
+
+ if (host_cpu_is_intel) {
+ native_hypercall_insn = vmx_vmcall;
+ other_hypercall_insn = svm_vmmcall;
+ } else if (host_cpu_is_amd) {
+ native_hypercall_insn = svm_vmmcall;
+ other_hypercall_insn = vmx_vmcall;
+ } else {
+ GUEST_ASSERT(0);
+ /* unreachable */
+ return;
+ }
+
+ memcpy(hypercall_insn, other_hypercall_insn, HYPERCALL_INSN_SIZE);
+
+ ret = do_sched_yield(GET_APIC_ID_FIELD(xapic_read_reg(APIC_ID)));
+
+ /*
+ * If the quirk is disabled, verify that guest_ud_handler() "returned"
+ * -EFAULT and that KVM did NOT patch the hypercall. If the quirk is
+ * enabled, verify that the hypercall succeeded and that KVM patched in
+ * the "right" hypercall.
+ */
+ if (quirk_disabled) {
+ GUEST_ASSERT(ret == (uint64_t)-EFAULT);
+ GUEST_ASSERT(!memcmp(other_hypercall_insn, hypercall_insn,
+ HYPERCALL_INSN_SIZE));
+ } else {
+ GUEST_ASSERT(!ret);
+ GUEST_ASSERT(!memcmp(native_hypercall_insn, hypercall_insn,
+ HYPERCALL_INSN_SIZE));
+ }
+
+ GUEST_DONE();
+}
+
+KVM_ONE_VCPU_TEST_SUITE(fix_hypercall);
+
+static void enter_guest(struct kvm_vcpu *vcpu)
+{
+ struct kvm_run *run = vcpu->run;
+ struct ucall uc;
+
+ vcpu_run(vcpu);
+ switch (get_ucall(vcpu, &uc)) {
+ case UCALL_SYNC:
+ pr_info("%s: %016lx\n", (const char *)uc.args[2], uc.args[3]);
+ break;
+ case UCALL_DONE:
+ return;
+ case UCALL_ABORT:
+ REPORT_GUEST_ASSERT(uc);
+ default:
+ TEST_FAIL("Unhandled ucall: %ld\nexit_reason: %u (%s)",
+ uc.cmd, run->exit_reason, exit_reason_str(run->exit_reason));
+ }
+}
+
+static void test_fix_hypercall(struct kvm_vcpu *vcpu, bool disable_quirk)
+{
+ struct kvm_vm *vm = vcpu->vm;
+
+ vm_init_descriptor_tables(vm);
+ vcpu_init_descriptor_tables(vcpu);
+ vm_install_exception_handler(vcpu->vm, UD_VECTOR, guest_ud_handler);
+
+ if (disable_quirk)
+ vm_enable_cap(vm, KVM_CAP_DISABLE_QUIRKS2,
+ KVM_X86_QUIRK_FIX_HYPERCALL_INSN);
+
+ quirk_disabled = disable_quirk;
+ sync_global_to_guest(vm, quirk_disabled);
+
+ virt_pg_map(vm, APIC_DEFAULT_GPA, APIC_DEFAULT_GPA);
+
+ enter_guest(vcpu);
+}
+
+KVM_ONE_VCPU_TEST(fix_hypercall, enable_quirk, guest_main)
+{
+ test_fix_hypercall(vcpu, false);
+}
+
+KVM_ONE_VCPU_TEST(fix_hypercall, disable_quirk, guest_main)
+{
+ test_fix_hypercall(vcpu, true);
+}
+
+int main(int argc, char *argv[])
+{
+ TEST_REQUIRE(kvm_check_cap(KVM_CAP_DISABLE_QUIRKS2) & KVM_X86_QUIRK_FIX_HYPERCALL_INSN);
+
+ return test_harness_run(argc, argv);
+}
diff --git a/tools/testing/selftests/kvm/x86_64/flds_emulation.h b/tools/testing/selftests/kvm/x86_64/flds_emulation.h
new file mode 100644
index 000000000000..37b1a9f52864
--- /dev/null
+++ b/tools/testing/selftests/kvm/x86_64/flds_emulation.h
@@ -0,0 +1,52 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+#ifndef SELFTEST_KVM_FLDS_EMULATION_H
+#define SELFTEST_KVM_FLDS_EMULATION_H
+
+#include "kvm_util.h"
+
+#define FLDS_MEM_EAX ".byte 0xd9, 0x00"
+
+/*
+ * flds is an instruction that the KVM instruction emulator is known not to
+ * support. This can be used in guest code along with a mechanism to force
+ * KVM to emulate the instruction (e.g. by providing an MMIO address) to
+ * exercise emulation failures.
+ */
+static inline void flds(uint64_t address)
+{
+ __asm__ __volatile__(FLDS_MEM_EAX :: "a"(address));
+}
+
+static inline void handle_flds_emulation_failure_exit(struct kvm_vcpu *vcpu)
+{
+ struct kvm_run *run = vcpu->run;
+ struct kvm_regs regs;
+ uint8_t *insn_bytes;
+ uint64_t flags;
+
+ TEST_ASSERT_KVM_EXIT_REASON(vcpu, KVM_EXIT_INTERNAL_ERROR);
+
+ TEST_ASSERT(run->emulation_failure.suberror == KVM_INTERNAL_ERROR_EMULATION,
+ "Unexpected suberror: %u",
+ run->emulation_failure.suberror);
+
+ flags = run->emulation_failure.flags;
+ TEST_ASSERT(run->emulation_failure.ndata >= 3 &&
+ flags & KVM_INTERNAL_ERROR_EMULATION_FLAG_INSTRUCTION_BYTES,
+ "run->emulation_failure is missing instruction bytes");
+
+ TEST_ASSERT(run->emulation_failure.insn_size >= 2,
+ "Expected a 2-byte opcode for 'flds', got %d bytes",
+ run->emulation_failure.insn_size);
+
+ insn_bytes = run->emulation_failure.insn_bytes;
+ TEST_ASSERT(insn_bytes[0] == 0xd9 && insn_bytes[1] == 0,
+ "Expected 'flds [eax]', opcode '0xd9 0x00', got opcode 0x%02x 0x%02x",
+ insn_bytes[0], insn_bytes[1]);
+
+ vcpu_regs_get(vcpu, &regs);
+ regs.rip += 2;
+ vcpu_regs_set(vcpu, &regs);
+}
+
+#endif /* !SELFTEST_KVM_FLDS_EMULATION_H */
diff --git a/tools/testing/selftests/kvm/x86_64/get_msr_index_features.c b/tools/testing/selftests/kvm/x86_64/get_msr_index_features.c
new file mode 100644
index 000000000000..d09b3cbcadc6
--- /dev/null
+++ b/tools/testing/selftests/kvm/x86_64/get_msr_index_features.c
@@ -0,0 +1,35 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Test that KVM_GET_MSR_INDEX_LIST and
+ * KVM_GET_MSR_FEATURE_INDEX_LIST work as intended
+ *
+ * Copyright (C) 2020, Red Hat, Inc.
+ */
+#include <fcntl.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <sys/ioctl.h>
+
+#include "test_util.h"
+#include "kvm_util.h"
+#include "processor.h"
+
+int main(int argc, char *argv[])
+{
+ const struct kvm_msr_list *feature_list;
+ int i;
+
+ /*
+ * Skip the entire test if MSR_FEATURES isn't supported, other tests
+ * will cover the "regular" list of MSRs, the coverage here is purely
+ * opportunistic and not interesting on its own.
+ */
+ TEST_REQUIRE(kvm_has_cap(KVM_CAP_GET_MSR_FEATURES));
+
+ (void)kvm_get_msr_index_list();
+
+ feature_list = kvm_get_feature_msr_index_list();
+ for (i = 0; i < feature_list->nmsrs; i++)
+ kvm_get_feature_msr(feature_list->indices[i]);
+}
diff --git a/tools/testing/selftests/kvm/x86_64/hwcr_msr_test.c b/tools/testing/selftests/kvm/x86_64/hwcr_msr_test.c
new file mode 100644
index 000000000000..df351ae17029
--- /dev/null
+++ b/tools/testing/selftests/kvm/x86_64/hwcr_msr_test.c
@@ -0,0 +1,47 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright (C) 2023, Google LLC.
+ */
+
+#define _GNU_SOURCE /* for program_invocation_short_name */
+#include <sys/ioctl.h>
+
+#include "test_util.h"
+#include "kvm_util.h"
+#include "vmx.h"
+
+void test_hwcr_bit(struct kvm_vcpu *vcpu, unsigned int bit)
+{
+ const uint64_t ignored = BIT_ULL(3) | BIT_ULL(6) | BIT_ULL(8);
+ const uint64_t valid = BIT_ULL(18) | BIT_ULL(24);
+ const uint64_t legal = ignored | valid;
+ uint64_t val = BIT_ULL(bit);
+ uint64_t actual;
+ int r;
+
+ r = _vcpu_set_msr(vcpu, MSR_K7_HWCR, val);
+ TEST_ASSERT(val & ~legal ? !r : r == 1,
+ "Expected KVM_SET_MSRS(MSR_K7_HWCR) = 0x%lx to %s",
+ val, val & ~legal ? "fail" : "succeed");
+
+ actual = vcpu_get_msr(vcpu, MSR_K7_HWCR);
+ TEST_ASSERT(actual == (val & valid),
+ "Bit %u: unexpected HWCR 0x%lx; expected 0x%lx",
+ bit, actual, (val & valid));
+
+ vcpu_set_msr(vcpu, MSR_K7_HWCR, 0);
+}
+
+int main(int argc, char *argv[])
+{
+ struct kvm_vm *vm;
+ struct kvm_vcpu *vcpu;
+ unsigned int bit;
+
+ vm = vm_create_with_one_vcpu(&vcpu, NULL);
+
+ for (bit = 0; bit < BITS_PER_LONG; bit++)
+ test_hwcr_bit(vcpu, bit);
+
+ kvm_vm_free(vm);
+}
diff --git a/tools/testing/selftests/kvm/x86_64/hyperv_clock.c b/tools/testing/selftests/kvm/x86_64/hyperv_clock.c
new file mode 100644
index 000000000000..e058bc676cd6
--- /dev/null
+++ b/tools/testing/selftests/kvm/x86_64/hyperv_clock.c
@@ -0,0 +1,263 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Copyright (C) 2021, Red Hat, Inc.
+ *
+ * Tests for Hyper-V clocksources
+ */
+#include "test_util.h"
+#include "kvm_util.h"
+#include "processor.h"
+#include "hyperv.h"
+
+struct ms_hyperv_tsc_page {
+ volatile u32 tsc_sequence;
+ u32 reserved1;
+ volatile u64 tsc_scale;
+ volatile s64 tsc_offset;
+} __packed;
+
+/* Simplified mul_u64_u64_shr() */
+static inline u64 mul_u64_u64_shr64(u64 a, u64 b)
+{
+ union {
+ u64 ll;
+ struct {
+ u32 low, high;
+ } l;
+ } rm, rn, rh, a0, b0;
+ u64 c;
+
+ a0.ll = a;
+ b0.ll = b;
+
+ rm.ll = (u64)a0.l.low * b0.l.high;
+ rn.ll = (u64)a0.l.high * b0.l.low;
+ rh.ll = (u64)a0.l.high * b0.l.high;
+
+ rh.l.low = c = rm.l.high + rn.l.high + rh.l.low;
+ rh.l.high = (c >> 32) + rh.l.high;
+
+ return rh.ll;
+}
+
+static inline void nop_loop(void)
+{
+ int i;
+
+ for (i = 0; i < 100000000; i++)
+ asm volatile("nop");
+}
+
+static inline void check_tsc_msr_rdtsc(void)
+{
+ u64 tsc_freq, r1, r2, t1, t2;
+ s64 delta_ns;
+
+ tsc_freq = rdmsr(HV_X64_MSR_TSC_FREQUENCY);
+ GUEST_ASSERT(tsc_freq > 0);
+
+ /* For increased accuracy, take mean rdtsc() before and afrer rdmsr() */
+ r1 = rdtsc();
+ t1 = rdmsr(HV_X64_MSR_TIME_REF_COUNT);
+ r1 = (r1 + rdtsc()) / 2;
+ nop_loop();
+ r2 = rdtsc();
+ t2 = rdmsr(HV_X64_MSR_TIME_REF_COUNT);
+ r2 = (r2 + rdtsc()) / 2;
+
+ GUEST_ASSERT(r2 > r1 && t2 > t1);
+
+ /* HV_X64_MSR_TIME_REF_COUNT is in 100ns */
+ delta_ns = ((t2 - t1) * 100) - ((r2 - r1) * 1000000000 / tsc_freq);
+ if (delta_ns < 0)
+ delta_ns = -delta_ns;
+
+ /* 1% tolerance */
+ GUEST_ASSERT(delta_ns * 100 < (t2 - t1) * 100);
+}
+
+static inline u64 get_tscpage_ts(struct ms_hyperv_tsc_page *tsc_page)
+{
+ return mul_u64_u64_shr64(rdtsc(), tsc_page->tsc_scale) + tsc_page->tsc_offset;
+}
+
+static inline void check_tsc_msr_tsc_page(struct ms_hyperv_tsc_page *tsc_page)
+{
+ u64 r1, r2, t1, t2;
+
+ /* Compare TSC page clocksource with HV_X64_MSR_TIME_REF_COUNT */
+ t1 = get_tscpage_ts(tsc_page);
+ r1 = rdmsr(HV_X64_MSR_TIME_REF_COUNT);
+
+ /* 10 ms tolerance */
+ GUEST_ASSERT(r1 >= t1 && r1 - t1 < 100000);
+ nop_loop();
+
+ t2 = get_tscpage_ts(tsc_page);
+ r2 = rdmsr(HV_X64_MSR_TIME_REF_COUNT);
+ GUEST_ASSERT(r2 >= t1 && r2 - t2 < 100000);
+}
+
+static void guest_main(struct ms_hyperv_tsc_page *tsc_page, vm_paddr_t tsc_page_gpa)
+{
+ u64 tsc_scale, tsc_offset;
+
+ /* Set Guest OS id to enable Hyper-V emulation */
+ GUEST_SYNC(1);
+ wrmsr(HV_X64_MSR_GUEST_OS_ID, HYPERV_LINUX_OS_ID);
+ GUEST_SYNC(2);
+
+ check_tsc_msr_rdtsc();
+
+ GUEST_SYNC(3);
+
+ /* Set up TSC page is disabled state, check that it's clean */
+ wrmsr(HV_X64_MSR_REFERENCE_TSC, tsc_page_gpa);
+ GUEST_ASSERT(tsc_page->tsc_sequence == 0);
+ GUEST_ASSERT(tsc_page->tsc_scale == 0);
+ GUEST_ASSERT(tsc_page->tsc_offset == 0);
+
+ GUEST_SYNC(4);
+
+ /* Set up TSC page is enabled state */
+ wrmsr(HV_X64_MSR_REFERENCE_TSC, tsc_page_gpa | 0x1);
+ GUEST_ASSERT(tsc_page->tsc_sequence != 0);
+
+ GUEST_SYNC(5);
+
+ check_tsc_msr_tsc_page(tsc_page);
+
+ GUEST_SYNC(6);
+
+ tsc_offset = tsc_page->tsc_offset;
+ /* Call KVM_SET_CLOCK from userspace, check that TSC page was updated */
+
+ GUEST_SYNC(7);
+ /* Sanity check TSC page timestamp, it should be close to 0 */
+ GUEST_ASSERT(get_tscpage_ts(tsc_page) < 100000);
+
+ GUEST_ASSERT(tsc_page->tsc_offset != tsc_offset);
+
+ nop_loop();
+
+ /*
+ * Enable Re-enlightenment and check that TSC page stays constant across
+ * KVM_SET_CLOCK.
+ */
+ wrmsr(HV_X64_MSR_REENLIGHTENMENT_CONTROL, 0x1 << 16 | 0xff);
+ wrmsr(HV_X64_MSR_TSC_EMULATION_CONTROL, 0x1);
+ tsc_offset = tsc_page->tsc_offset;
+ tsc_scale = tsc_page->tsc_scale;
+ GUEST_SYNC(8);
+ GUEST_ASSERT(tsc_page->tsc_offset == tsc_offset);
+ GUEST_ASSERT(tsc_page->tsc_scale == tsc_scale);
+
+ GUEST_SYNC(9);
+
+ check_tsc_msr_tsc_page(tsc_page);
+
+ /*
+ * Disable re-enlightenment and TSC page, check that KVM doesn't update
+ * it anymore.
+ */
+ wrmsr(HV_X64_MSR_REENLIGHTENMENT_CONTROL, 0);
+ wrmsr(HV_X64_MSR_TSC_EMULATION_CONTROL, 0);
+ wrmsr(HV_X64_MSR_REFERENCE_TSC, 0);
+ memset(tsc_page, 0, sizeof(*tsc_page));
+
+ GUEST_SYNC(10);
+ GUEST_ASSERT(tsc_page->tsc_sequence == 0);
+ GUEST_ASSERT(tsc_page->tsc_offset == 0);
+ GUEST_ASSERT(tsc_page->tsc_scale == 0);
+
+ GUEST_DONE();
+}
+
+static void host_check_tsc_msr_rdtsc(struct kvm_vcpu *vcpu)
+{
+ u64 tsc_freq, r1, r2, t1, t2;
+ s64 delta_ns;
+
+ tsc_freq = vcpu_get_msr(vcpu, HV_X64_MSR_TSC_FREQUENCY);
+ TEST_ASSERT(tsc_freq > 0, "TSC frequency must be nonzero");
+
+ /* For increased accuracy, take mean rdtsc() before and afrer ioctl */
+ r1 = rdtsc();
+ t1 = vcpu_get_msr(vcpu, HV_X64_MSR_TIME_REF_COUNT);
+ r1 = (r1 + rdtsc()) / 2;
+ nop_loop();
+ r2 = rdtsc();
+ t2 = vcpu_get_msr(vcpu, HV_X64_MSR_TIME_REF_COUNT);
+ r2 = (r2 + rdtsc()) / 2;
+
+ TEST_ASSERT(t2 > t1, "Time reference MSR is not monotonic (%ld <= %ld)", t1, t2);
+
+ /* HV_X64_MSR_TIME_REF_COUNT is in 100ns */
+ delta_ns = ((t2 - t1) * 100) - ((r2 - r1) * 1000000000 / tsc_freq);
+ if (delta_ns < 0)
+ delta_ns = -delta_ns;
+
+ /* 1% tolerance */
+ TEST_ASSERT(delta_ns * 100 < (t2 - t1) * 100,
+ "Elapsed time does not match (MSR=%ld, TSC=%ld)",
+ (t2 - t1) * 100, (r2 - r1) * 1000000000 / tsc_freq);
+}
+
+int main(void)
+{
+ struct kvm_vcpu *vcpu;
+ struct kvm_vm *vm;
+ struct ucall uc;
+ vm_vaddr_t tsc_page_gva;
+ int stage;
+
+ TEST_REQUIRE(kvm_has_cap(KVM_CAP_HYPERV_TIME));
+ TEST_REQUIRE(sys_clocksource_is_based_on_tsc());
+
+ vm = vm_create_with_one_vcpu(&vcpu, guest_main);
+
+ vcpu_set_hv_cpuid(vcpu);
+
+ tsc_page_gva = vm_vaddr_alloc_page(vm);
+ memset(addr_gva2hva(vm, tsc_page_gva), 0x0, getpagesize());
+ TEST_ASSERT((addr_gva2gpa(vm, tsc_page_gva) & (getpagesize() - 1)) == 0,
+ "TSC page has to be page aligned");
+ vcpu_args_set(vcpu, 2, tsc_page_gva, addr_gva2gpa(vm, tsc_page_gva));
+
+ host_check_tsc_msr_rdtsc(vcpu);
+
+ for (stage = 1;; stage++) {
+ vcpu_run(vcpu);
+ TEST_ASSERT_KVM_EXIT_REASON(vcpu, KVM_EXIT_IO);
+
+ switch (get_ucall(vcpu, &uc)) {
+ case UCALL_ABORT:
+ REPORT_GUEST_ASSERT(uc);
+ /* NOT REACHED */
+ case UCALL_SYNC:
+ break;
+ case UCALL_DONE:
+ /* Keep in sync with guest_main() */
+ TEST_ASSERT(stage == 11, "Testing ended prematurely, stage %d",
+ stage);
+ goto out;
+ default:
+ TEST_FAIL("Unknown ucall %lu", uc.cmd);
+ }
+
+ TEST_ASSERT(!strcmp((const char *)uc.args[0], "hello") &&
+ uc.args[1] == stage,
+ "Stage %d: Unexpected register values vmexit, got %lx",
+ stage, (ulong)uc.args[1]);
+
+ /* Reset kvmclock triggering TSC page update */
+ if (stage == 7 || stage == 8 || stage == 10) {
+ struct kvm_clock_data clock = {0};
+
+ vm_ioctl(vm, KVM_SET_CLOCK, &clock);
+ }
+ }
+
+out:
+ kvm_vm_free(vm);
+}
diff --git a/tools/testing/selftests/kvm/x86_64/hyperv_cpuid.c b/tools/testing/selftests/kvm/x86_64/hyperv_cpuid.c
index 745b708c2d3b..5c27efbf405e 100644
--- a/tools/testing/selftests/kvm/x86_64/hyperv_cpuid.c
+++ b/tools/testing/selftests/kvm/x86_64/hyperv_cpuid.c
@@ -20,8 +20,6 @@
#include "processor.h"
#include "vmx.h"
-#define VCPU_ID 0
-
static void guest_code(void)
{
}
@@ -45,32 +43,26 @@ static bool smt_possible(void)
return res;
}
-static void test_hv_cpuid(struct kvm_cpuid2 *hv_cpuid_entries,
- bool evmcs_enabled)
+static void test_hv_cpuid(const struct kvm_cpuid2 *hv_cpuid_entries,
+ bool evmcs_expected)
{
int i;
- int nent = 9;
+ int nent_expected = 10;
u32 test_val;
- if (evmcs_enabled)
- nent += 1; /* 0x4000000A */
-
- TEST_ASSERT(hv_cpuid_entries->nent == nent,
+ TEST_ASSERT(hv_cpuid_entries->nent == nent_expected,
"KVM_GET_SUPPORTED_HV_CPUID should return %d entries"
- " with evmcs=%d (returned %d)",
- nent, evmcs_enabled, hv_cpuid_entries->nent);
+ " (returned %d)",
+ nent_expected, hv_cpuid_entries->nent);
for (i = 0; i < hv_cpuid_entries->nent; i++) {
- struct kvm_cpuid_entry2 *entry = &hv_cpuid_entries->entries[i];
+ const struct kvm_cpuid_entry2 *entry = &hv_cpuid_entries->entries[i];
TEST_ASSERT((entry->function >= 0x40000000) &&
(entry->function <= 0x40000082),
"function %x is our of supported range",
entry->function);
- TEST_ASSERT(evmcs_enabled || (entry->function != 0x4000000A),
- "0x4000000A leaf should not be reported");
-
TEST_ASSERT(entry->index == 0,
".index field should be zero");
@@ -87,7 +79,7 @@ static void test_hv_cpuid(struct kvm_cpuid2 *hv_cpuid_entries,
TEST_ASSERT(entry->eax == test_val,
"Wrong max leaf report in 0x40000000.EAX: %x"
" (evmcs=%d)",
- entry->eax, evmcs_enabled
+ entry->eax, evmcs_expected
);
break;
case 0x40000004:
@@ -97,8 +89,20 @@ static void test_hv_cpuid(struct kvm_cpuid2 *hv_cpuid_entries,
"NoNonArchitecturalCoreSharing bit"
" doesn't reflect SMT setting");
break;
- }
+ case 0x4000000A:
+ TEST_ASSERT(entry->eax & (1UL << 19),
+ "Enlightened MSR-Bitmap should always be supported"
+ " 0x40000000.EAX: %x", entry->eax);
+ if (evmcs_expected)
+ TEST_ASSERT((entry->eax & 0xffff) == 0x101,
+ "Supported Enlightened VMCS version range is supposed to be 1:1"
+ " 0x40000000.EAX: %x", entry->eax);
+
+ break;
+ default:
+ break;
+ }
/*
* If needed for debug:
* fprintf(stdout,
@@ -107,84 +111,64 @@ static void test_hv_cpuid(struct kvm_cpuid2 *hv_cpuid_entries,
* entry->edx);
*/
}
-
}
-void test_hv_cpuid_e2big(struct kvm_vm *vm)
+void test_hv_cpuid_e2big(struct kvm_vm *vm, struct kvm_vcpu *vcpu)
{
static struct kvm_cpuid2 cpuid = {.nent = 0};
int ret;
- ret = _vcpu_ioctl(vm, VCPU_ID, KVM_GET_SUPPORTED_HV_CPUID, &cpuid);
+ if (vcpu)
+ ret = __vcpu_ioctl(vcpu, KVM_GET_SUPPORTED_HV_CPUID, &cpuid);
+ else
+ ret = __kvm_ioctl(vm->kvm_fd, KVM_GET_SUPPORTED_HV_CPUID, &cpuid);
TEST_ASSERT(ret == -1 && errno == E2BIG,
- "KVM_GET_SUPPORTED_HV_CPUID didn't fail with -E2BIG when"
- " it should have: %d %d", ret, errno);
+ "%s KVM_GET_SUPPORTED_HV_CPUID didn't fail with -E2BIG when"
+ " it should have: %d %d", !vcpu ? "KVM" : "vCPU", ret, errno);
}
-
-struct kvm_cpuid2 *kvm_get_supported_hv_cpuid(struct kvm_vm *vm)
+int main(int argc, char *argv[])
{
- int nent = 20; /* should be enough */
- static struct kvm_cpuid2 *cpuid;
-
- cpuid = malloc(sizeof(*cpuid) + nent * sizeof(struct kvm_cpuid_entry2));
-
- if (!cpuid) {
- perror("malloc");
- abort();
- }
-
- cpuid->nent = nent;
+ struct kvm_vm *vm;
+ const struct kvm_cpuid2 *hv_cpuid_entries;
+ struct kvm_vcpu *vcpu;
- vcpu_ioctl(vm, VCPU_ID, KVM_GET_SUPPORTED_HV_CPUID, cpuid);
+ TEST_REQUIRE(kvm_has_cap(KVM_CAP_HYPERV_CPUID));
- return cpuid;
-}
+ vm = vm_create_with_one_vcpu(&vcpu, guest_code);
+ /* Test vCPU ioctl version */
+ test_hv_cpuid_e2big(vm, vcpu);
-int main(int argc, char *argv[])
-{
- struct kvm_vm *vm;
- int rv, stage;
- struct kvm_cpuid2 *hv_cpuid_entries;
- bool evmcs_enabled;
+ hv_cpuid_entries = vcpu_get_supported_hv_cpuid(vcpu);
+ test_hv_cpuid(hv_cpuid_entries, false);
+ free((void *)hv_cpuid_entries);
- /* Tell stdout not to buffer its content */
- setbuf(stdout, NULL);
-
- rv = kvm_check_cap(KVM_CAP_HYPERV_CPUID);
- if (!rv) {
- print_skip("KVM_CAP_HYPERV_CPUID not supported");
- exit(KSFT_SKIP);
+ if (!kvm_cpu_has(X86_FEATURE_VMX) ||
+ !kvm_has_cap(KVM_CAP_HYPERV_ENLIGHTENED_VMCS)) {
+ print_skip("Enlightened VMCS is unsupported");
+ goto do_sys;
+ }
+ vcpu_enable_evmcs(vcpu);
+ hv_cpuid_entries = vcpu_get_supported_hv_cpuid(vcpu);
+ test_hv_cpuid(hv_cpuid_entries, true);
+ free((void *)hv_cpuid_entries);
+
+do_sys:
+ /* Test system ioctl version */
+ if (!kvm_has_cap(KVM_CAP_SYS_HYPERV_CPUID)) {
+ print_skip("KVM_CAP_SYS_HYPERV_CPUID not supported");
+ goto out;
}
- for (stage = 0; stage < 3; stage++) {
- evmcs_enabled = false;
+ test_hv_cpuid_e2big(vm, NULL);
- vm = vm_create_default(VCPU_ID, 0, guest_code);
- switch (stage) {
- case 0:
- test_hv_cpuid_e2big(vm);
- continue;
- case 1:
- break;
- case 2:
- if (!nested_vmx_supported() ||
- !kvm_check_cap(KVM_CAP_HYPERV_ENLIGHTENED_VMCS)) {
- print_skip("Enlightened VMCS is unsupported");
- continue;
- }
- vcpu_enable_evmcs(vm, VCPU_ID);
- evmcs_enabled = true;
- break;
- }
+ hv_cpuid_entries = kvm_get_supported_hv_cpuid();
+ test_hv_cpuid(hv_cpuid_entries, kvm_cpu_has(X86_FEATURE_VMX));
- hv_cpuid_entries = kvm_get_supported_hv_cpuid(vm);
- test_hv_cpuid(hv_cpuid_entries, evmcs_enabled);
- free(hv_cpuid_entries);
- kvm_vm_free(vm);
- }
+out:
+ kvm_vm_free(vm);
return 0;
}
diff --git a/tools/testing/selftests/kvm/x86_64/hyperv_evmcs.c b/tools/testing/selftests/kvm/x86_64/hyperv_evmcs.c
new file mode 100644
index 000000000000..4c7257ecd2a6
--- /dev/null
+++ b/tools/testing/selftests/kvm/x86_64/hyperv_evmcs.c
@@ -0,0 +1,310 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright (C) 2018, Red Hat, Inc.
+ *
+ * Tests for Enlightened VMCS, including nested guest state.
+ */
+#define _GNU_SOURCE /* for program_invocation_short_name */
+#include <fcntl.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <sys/ioctl.h>
+#include <linux/bitmap.h>
+
+#include "test_util.h"
+
+#include "kvm_util.h"
+
+#include "hyperv.h"
+#include "vmx.h"
+
+static int ud_count;
+
+static void guest_ud_handler(struct ex_regs *regs)
+{
+ ud_count++;
+ regs->rip += 3; /* VMLAUNCH */
+}
+
+static void guest_nmi_handler(struct ex_regs *regs)
+{
+}
+
+static inline void rdmsr_from_l2(uint32_t msr)
+{
+ /* Currently, L1 doesn't preserve GPRs during vmexits. */
+ __asm__ __volatile__ ("rdmsr" : : "c"(msr) :
+ "rax", "rbx", "rdx", "rsi", "rdi", "r8", "r9",
+ "r10", "r11", "r12", "r13", "r14", "r15");
+}
+
+/* Exit to L1 from L2 with RDMSR instruction */
+void l2_guest_code(void)
+{
+ u64 unused;
+
+ GUEST_SYNC(7);
+
+ GUEST_SYNC(8);
+
+ /* Forced exit to L1 upon restore */
+ GUEST_SYNC(9);
+
+ vmcall();
+
+ /* MSR-Bitmap tests */
+ rdmsr_from_l2(MSR_FS_BASE); /* intercepted */
+ rdmsr_from_l2(MSR_FS_BASE); /* intercepted */
+ rdmsr_from_l2(MSR_GS_BASE); /* not intercepted */
+ vmcall();
+ rdmsr_from_l2(MSR_GS_BASE); /* intercepted */
+
+ /* L2 TLB flush tests */
+ hyperv_hypercall(HVCALL_FLUSH_VIRTUAL_ADDRESS_SPACE | HV_HYPERCALL_FAST_BIT, 0x0,
+ HV_FLUSH_ALL_VIRTUAL_ADDRESS_SPACES | HV_FLUSH_ALL_PROCESSORS);
+ rdmsr_from_l2(MSR_FS_BASE);
+ /*
+ * Note: hypercall status (RAX) is not preserved correctly by L1 after
+ * synthetic vmexit, use unchecked version.
+ */
+ __hyperv_hypercall(HVCALL_FLUSH_VIRTUAL_ADDRESS_SPACE | HV_HYPERCALL_FAST_BIT, 0x0,
+ HV_FLUSH_ALL_VIRTUAL_ADDRESS_SPACES | HV_FLUSH_ALL_PROCESSORS,
+ &unused);
+
+ /* Done, exit to L1 and never come back. */
+ vmcall();
+}
+
+void guest_code(struct vmx_pages *vmx_pages, struct hyperv_test_pages *hv_pages,
+ vm_vaddr_t hv_hcall_page_gpa)
+{
+#define L2_GUEST_STACK_SIZE 64
+ unsigned long l2_guest_stack[L2_GUEST_STACK_SIZE];
+
+ wrmsr(HV_X64_MSR_GUEST_OS_ID, HYPERV_LINUX_OS_ID);
+ wrmsr(HV_X64_MSR_HYPERCALL, hv_hcall_page_gpa);
+
+ x2apic_enable();
+
+ GUEST_SYNC(1);
+ GUEST_SYNC(2);
+
+ enable_vp_assist(hv_pages->vp_assist_gpa, hv_pages->vp_assist);
+ evmcs_enable();
+
+ GUEST_ASSERT(prepare_for_vmx_operation(vmx_pages));
+ GUEST_SYNC(3);
+ GUEST_ASSERT(load_evmcs(hv_pages));
+ GUEST_ASSERT(vmptrstz() == hv_pages->enlightened_vmcs_gpa);
+
+ GUEST_SYNC(4);
+ GUEST_ASSERT(vmptrstz() == hv_pages->enlightened_vmcs_gpa);
+
+ prepare_vmcs(vmx_pages, l2_guest_code,
+ &l2_guest_stack[L2_GUEST_STACK_SIZE]);
+
+ GUEST_SYNC(5);
+ GUEST_ASSERT(vmptrstz() == hv_pages->enlightened_vmcs_gpa);
+ current_evmcs->revision_id = -1u;
+ GUEST_ASSERT(vmlaunch());
+ current_evmcs->revision_id = EVMCS_VERSION;
+ GUEST_SYNC(6);
+
+ vmwrite(PIN_BASED_VM_EXEC_CONTROL, vmreadz(PIN_BASED_VM_EXEC_CONTROL) |
+ PIN_BASED_NMI_EXITING);
+
+ /* L2 TLB flush setup */
+ current_evmcs->partition_assist_page = hv_pages->partition_assist_gpa;
+ current_evmcs->hv_enlightenments_control.nested_flush_hypercall = 1;
+ current_evmcs->hv_vm_id = 1;
+ current_evmcs->hv_vp_id = 1;
+ current_vp_assist->nested_control.features.directhypercall = 1;
+ *(u32 *)(hv_pages->partition_assist) = 0;
+
+ GUEST_ASSERT(!vmlaunch());
+ GUEST_ASSERT_EQ(vmreadz(VM_EXIT_REASON), EXIT_REASON_EXCEPTION_NMI);
+ GUEST_ASSERT_EQ((vmreadz(VM_EXIT_INTR_INFO) & 0xff), NMI_VECTOR);
+ GUEST_ASSERT(vmptrstz() == hv_pages->enlightened_vmcs_gpa);
+
+ /*
+ * NMI forces L2->L1 exit, resuming L2 and hope that EVMCS is
+ * up-to-date (RIP points where it should and not at the beginning
+ * of l2_guest_code(). GUEST_SYNC(9) checkes that.
+ */
+ GUEST_ASSERT(!vmresume());
+
+ GUEST_SYNC(10);
+
+ GUEST_ASSERT(vmreadz(VM_EXIT_REASON) == EXIT_REASON_VMCALL);
+ current_evmcs->guest_rip += 3; /* vmcall */
+
+ /* Intercept RDMSR 0xc0000100 */
+ vmwrite(CPU_BASED_VM_EXEC_CONTROL, vmreadz(CPU_BASED_VM_EXEC_CONTROL) |
+ CPU_BASED_USE_MSR_BITMAPS);
+ __set_bit(MSR_FS_BASE & 0x1fff, vmx_pages->msr + 0x400);
+ GUEST_ASSERT(!vmresume());
+ GUEST_ASSERT(vmreadz(VM_EXIT_REASON) == EXIT_REASON_MSR_READ);
+ current_evmcs->guest_rip += 2; /* rdmsr */
+
+ /* Enable enlightened MSR bitmap */
+ current_evmcs->hv_enlightenments_control.msr_bitmap = 1;
+ GUEST_ASSERT(!vmresume());
+ GUEST_ASSERT(vmreadz(VM_EXIT_REASON) == EXIT_REASON_MSR_READ);
+ current_evmcs->guest_rip += 2; /* rdmsr */
+
+ /* Intercept RDMSR 0xc0000101 without telling KVM about it */
+ __set_bit(MSR_GS_BASE & 0x1fff, vmx_pages->msr + 0x400);
+ /* Make sure HV_VMX_ENLIGHTENED_CLEAN_FIELD_MSR_BITMAP is set */
+ current_evmcs->hv_clean_fields |= HV_VMX_ENLIGHTENED_CLEAN_FIELD_MSR_BITMAP;
+ GUEST_ASSERT(!vmresume());
+ /* Make sure we don't see EXIT_REASON_MSR_READ here so eMSR bitmap works */
+ GUEST_ASSERT(vmreadz(VM_EXIT_REASON) == EXIT_REASON_VMCALL);
+ current_evmcs->guest_rip += 3; /* vmcall */
+
+ /* Now tell KVM we've changed MSR-Bitmap */
+ current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_MSR_BITMAP;
+ GUEST_ASSERT(!vmresume());
+ GUEST_ASSERT(vmreadz(VM_EXIT_REASON) == EXIT_REASON_MSR_READ);
+ current_evmcs->guest_rip += 2; /* rdmsr */
+
+ /*
+ * L2 TLB flush test. First VMCALL should be handled directly by L0,
+ * no VMCALL exit expected.
+ */
+ GUEST_ASSERT(!vmresume());
+ GUEST_ASSERT(vmreadz(VM_EXIT_REASON) == EXIT_REASON_MSR_READ);
+ current_evmcs->guest_rip += 2; /* rdmsr */
+ /* Enable synthetic vmexit */
+ *(u32 *)(hv_pages->partition_assist) = 1;
+ GUEST_ASSERT(!vmresume());
+ GUEST_ASSERT(vmreadz(VM_EXIT_REASON) == HV_VMX_SYNTHETIC_EXIT_REASON_TRAP_AFTER_FLUSH);
+
+ GUEST_ASSERT(!vmresume());
+ GUEST_ASSERT(vmreadz(VM_EXIT_REASON) == EXIT_REASON_VMCALL);
+ GUEST_SYNC(11);
+
+ /* Try enlightened vmptrld with an incorrect GPA */
+ evmcs_vmptrld(0xdeadbeef, hv_pages->enlightened_vmcs);
+ GUEST_ASSERT(vmlaunch());
+ GUEST_ASSERT(ud_count == 1);
+ GUEST_DONE();
+}
+
+void inject_nmi(struct kvm_vcpu *vcpu)
+{
+ struct kvm_vcpu_events events;
+
+ vcpu_events_get(vcpu, &events);
+
+ events.nmi.pending = 1;
+ events.flags |= KVM_VCPUEVENT_VALID_NMI_PENDING;
+
+ vcpu_events_set(vcpu, &events);
+}
+
+static struct kvm_vcpu *save_restore_vm(struct kvm_vm *vm,
+ struct kvm_vcpu *vcpu)
+{
+ struct kvm_regs regs1, regs2;
+ struct kvm_x86_state *state;
+
+ state = vcpu_save_state(vcpu);
+ memset(&regs1, 0, sizeof(regs1));
+ vcpu_regs_get(vcpu, &regs1);
+
+ kvm_vm_release(vm);
+
+ /* Restore state in a new VM. */
+ vcpu = vm_recreate_with_one_vcpu(vm);
+ vcpu_set_hv_cpuid(vcpu);
+ vcpu_enable_evmcs(vcpu);
+ vcpu_load_state(vcpu, state);
+ kvm_x86_state_cleanup(state);
+
+ memset(&regs2, 0, sizeof(regs2));
+ vcpu_regs_get(vcpu, &regs2);
+ TEST_ASSERT(!memcmp(&regs1, &regs2, sizeof(regs2)),
+ "Unexpected register values after vcpu_load_state; rdi: %lx rsi: %lx",
+ (ulong) regs2.rdi, (ulong) regs2.rsi);
+ return vcpu;
+}
+
+int main(int argc, char *argv[])
+{
+ vm_vaddr_t vmx_pages_gva = 0, hv_pages_gva = 0;
+ vm_vaddr_t hcall_page;
+
+ struct kvm_vcpu *vcpu;
+ struct kvm_vm *vm;
+ struct ucall uc;
+ int stage;
+
+ TEST_REQUIRE(kvm_cpu_has(X86_FEATURE_VMX));
+ TEST_REQUIRE(kvm_has_cap(KVM_CAP_NESTED_STATE));
+ TEST_REQUIRE(kvm_has_cap(KVM_CAP_HYPERV_ENLIGHTENED_VMCS));
+ TEST_REQUIRE(kvm_has_cap(KVM_CAP_HYPERV_DIRECT_TLBFLUSH));
+
+ vm = vm_create_with_one_vcpu(&vcpu, guest_code);
+
+ hcall_page = vm_vaddr_alloc_pages(vm, 1);
+ memset(addr_gva2hva(vm, hcall_page), 0x0, getpagesize());
+
+ vcpu_set_hv_cpuid(vcpu);
+ vcpu_enable_evmcs(vcpu);
+
+ vcpu_alloc_vmx(vm, &vmx_pages_gva);
+ vcpu_alloc_hyperv_test_pages(vm, &hv_pages_gva);
+ vcpu_args_set(vcpu, 3, vmx_pages_gva, hv_pages_gva, addr_gva2gpa(vm, hcall_page));
+ vcpu_set_msr(vcpu, HV_X64_MSR_VP_INDEX, vcpu->id);
+
+ vm_init_descriptor_tables(vm);
+ vcpu_init_descriptor_tables(vcpu);
+ vm_install_exception_handler(vm, UD_VECTOR, guest_ud_handler);
+ vm_install_exception_handler(vm, NMI_VECTOR, guest_nmi_handler);
+
+ pr_info("Running L1 which uses EVMCS to run L2\n");
+
+ for (stage = 1;; stage++) {
+ vcpu_run(vcpu);
+ TEST_ASSERT_KVM_EXIT_REASON(vcpu, KVM_EXIT_IO);
+
+ switch (get_ucall(vcpu, &uc)) {
+ case UCALL_ABORT:
+ REPORT_GUEST_ASSERT(uc);
+ /* NOT REACHED */
+ case UCALL_SYNC:
+ break;
+ case UCALL_DONE:
+ goto done;
+ default:
+ TEST_FAIL("Unknown ucall %lu", uc.cmd);
+ }
+
+ /* UCALL_SYNC is handled here. */
+ TEST_ASSERT(!strcmp((const char *)uc.args[0], "hello") &&
+ uc.args[1] == stage, "Stage %d: Unexpected register values vmexit, got %lx",
+ stage, (ulong)uc.args[1]);
+
+ vcpu = save_restore_vm(vm, vcpu);
+
+ /* Force immediate L2->L1 exit before resuming */
+ if (stage == 8) {
+ pr_info("Injecting NMI into L1 before L2 had a chance to run after restore\n");
+ inject_nmi(vcpu);
+ }
+
+ /*
+ * Do KVM_GET_NESTED_STATE/KVM_SET_NESTED_STATE for a freshly
+ * restored VM (before the first KVM_RUN) to check that
+ * KVM_STATE_NESTED_EVMCS is not lost.
+ */
+ if (stage == 9) {
+ pr_info("Trying extra KVM_GET_NESTED_STATE/KVM_SET_NESTED_STATE cycle\n");
+ vcpu = save_restore_vm(vm, vcpu);
+ }
+ }
+
+done:
+ kvm_vm_free(vm);
+}
diff --git a/tools/testing/selftests/kvm/x86_64/hyperv_extended_hypercalls.c b/tools/testing/selftests/kvm/x86_64/hyperv_extended_hypercalls.c
new file mode 100644
index 000000000000..949e08e98f31
--- /dev/null
+++ b/tools/testing/selftests/kvm/x86_64/hyperv_extended_hypercalls.c
@@ -0,0 +1,98 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Test Hyper-V extended hypercall, HV_EXT_CALL_QUERY_CAPABILITIES (0x8001),
+ * exit to userspace and receive result in guest.
+ *
+ * Negative tests are present in hyperv_features.c
+ *
+ * Copyright 2022 Google LLC
+ * Author: Vipin Sharma <vipinsh@google.com>
+ */
+#include "kvm_util.h"
+#include "processor.h"
+#include "hyperv.h"
+
+/* Any value is fine */
+#define EXT_CAPABILITIES 0xbull
+
+static void guest_code(vm_paddr_t in_pg_gpa, vm_paddr_t out_pg_gpa,
+ vm_vaddr_t out_pg_gva)
+{
+ uint64_t *output_gva;
+
+ wrmsr(HV_X64_MSR_GUEST_OS_ID, HYPERV_LINUX_OS_ID);
+ wrmsr(HV_X64_MSR_HYPERCALL, in_pg_gpa);
+
+ output_gva = (uint64_t *)out_pg_gva;
+
+ hyperv_hypercall(HV_EXT_CALL_QUERY_CAPABILITIES, in_pg_gpa, out_pg_gpa);
+
+ /* TLFS states output will be a uint64_t value */
+ GUEST_ASSERT_EQ(*output_gva, EXT_CAPABILITIES);
+
+ GUEST_DONE();
+}
+
+int main(void)
+{
+ vm_vaddr_t hcall_out_page;
+ vm_vaddr_t hcall_in_page;
+ struct kvm_vcpu *vcpu;
+ struct kvm_run *run;
+ struct kvm_vm *vm;
+ uint64_t *outval;
+ struct ucall uc;
+
+ TEST_REQUIRE(kvm_has_cap(KVM_CAP_HYPERV_CPUID));
+
+ /* Verify if extended hypercalls are supported */
+ if (!kvm_cpuid_has(kvm_get_supported_hv_cpuid(),
+ HV_ENABLE_EXTENDED_HYPERCALLS)) {
+ print_skip("Extended calls not supported by the kernel");
+ exit(KSFT_SKIP);
+ }
+
+ vm = vm_create_with_one_vcpu(&vcpu, guest_code);
+ run = vcpu->run;
+ vcpu_set_hv_cpuid(vcpu);
+
+ /* Hypercall input */
+ hcall_in_page = vm_vaddr_alloc_pages(vm, 1);
+ memset(addr_gva2hva(vm, hcall_in_page), 0x0, vm->page_size);
+
+ /* Hypercall output */
+ hcall_out_page = vm_vaddr_alloc_pages(vm, 1);
+ memset(addr_gva2hva(vm, hcall_out_page), 0x0, vm->page_size);
+
+ vcpu_args_set(vcpu, 3, addr_gva2gpa(vm, hcall_in_page),
+ addr_gva2gpa(vm, hcall_out_page), hcall_out_page);
+
+ vcpu_run(vcpu);
+
+ TEST_ASSERT(run->exit_reason == KVM_EXIT_HYPERV,
+ "Unexpected exit reason: %u (%s)",
+ run->exit_reason, exit_reason_str(run->exit_reason));
+
+ outval = addr_gpa2hva(vm, run->hyperv.u.hcall.params[1]);
+ *outval = EXT_CAPABILITIES;
+ run->hyperv.u.hcall.result = HV_STATUS_SUCCESS;
+
+ vcpu_run(vcpu);
+
+ TEST_ASSERT(run->exit_reason == KVM_EXIT_IO,
+ "Unexpected exit reason: %u (%s)",
+ run->exit_reason, exit_reason_str(run->exit_reason));
+
+ switch (get_ucall(vcpu, &uc)) {
+ case UCALL_ABORT:
+ REPORT_GUEST_ASSERT(uc);
+ break;
+ case UCALL_DONE:
+ break;
+ default:
+ TEST_FAIL("Unhandled ucall: %ld", uc.cmd);
+ }
+
+ kvm_vm_free(vm);
+ return 0;
+}
diff --git a/tools/testing/selftests/kvm/x86_64/hyperv_features.c b/tools/testing/selftests/kvm/x86_64/hyperv_features.c
new file mode 100644
index 000000000000..b923a285e96f
--- /dev/null
+++ b/tools/testing/selftests/kvm/x86_64/hyperv_features.c
@@ -0,0 +1,701 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Copyright (C) 2021, Red Hat, Inc.
+ *
+ * Tests for Hyper-V features enablement
+ */
+#include <asm/kvm_para.h>
+#include <linux/kvm_para.h>
+#include <stdint.h>
+
+#include "test_util.h"
+#include "kvm_util.h"
+#include "processor.h"
+#include "hyperv.h"
+
+/*
+ * HYPERV_CPUID_ENLIGHTMENT_INFO.EBX is not a 'feature' CPUID leaf
+ * but to activate the feature it is sufficient to set it to a non-zero
+ * value. Use BIT(0) for that.
+ */
+#define HV_PV_SPINLOCKS_TEST \
+ KVM_X86_CPU_FEATURE(HYPERV_CPUID_ENLIGHTMENT_INFO, 0, EBX, 0)
+
+struct msr_data {
+ uint32_t idx;
+ bool fault_expected;
+ bool write;
+ u64 write_val;
+};
+
+struct hcall_data {
+ uint64_t control;
+ uint64_t expect;
+ bool ud_expected;
+};
+
+static bool is_write_only_msr(uint32_t msr)
+{
+ return msr == HV_X64_MSR_EOI;
+}
+
+static void guest_msr(struct msr_data *msr)
+{
+ uint8_t vector = 0;
+ uint64_t msr_val = 0;
+
+ GUEST_ASSERT(msr->idx);
+
+ if (msr->write)
+ vector = wrmsr_safe(msr->idx, msr->write_val);
+
+ if (!vector && (!msr->write || !is_write_only_msr(msr->idx)))
+ vector = rdmsr_safe(msr->idx, &msr_val);
+
+ if (msr->fault_expected)
+ __GUEST_ASSERT(vector == GP_VECTOR,
+ "Expected #GP on %sMSR(0x%x), got vector '0x%x'",
+ msr->write ? "WR" : "RD", msr->idx, vector);
+ else
+ __GUEST_ASSERT(!vector,
+ "Expected success on %sMSR(0x%x), got vector '0x%x'",
+ msr->write ? "WR" : "RD", msr->idx, vector);
+
+ if (vector || is_write_only_msr(msr->idx))
+ goto done;
+
+ if (msr->write)
+ __GUEST_ASSERT(!vector,
+ "WRMSR(0x%x) to '0x%lx', RDMSR read '0x%lx'",
+ msr->idx, msr->write_val, msr_val);
+
+ /* Invariant TSC bit appears when TSC invariant control MSR is written to */
+ if (msr->idx == HV_X64_MSR_TSC_INVARIANT_CONTROL) {
+ if (!this_cpu_has(HV_ACCESS_TSC_INVARIANT))
+ GUEST_ASSERT(this_cpu_has(X86_FEATURE_INVTSC));
+ else
+ GUEST_ASSERT(this_cpu_has(X86_FEATURE_INVTSC) ==
+ !!(msr_val & HV_INVARIANT_TSC_EXPOSED));
+ }
+
+done:
+ GUEST_DONE();
+}
+
+static void guest_hcall(vm_vaddr_t pgs_gpa, struct hcall_data *hcall)
+{
+ u64 res, input, output;
+ uint8_t vector;
+
+ GUEST_ASSERT_NE(hcall->control, 0);
+
+ wrmsr(HV_X64_MSR_GUEST_OS_ID, HYPERV_LINUX_OS_ID);
+ wrmsr(HV_X64_MSR_HYPERCALL, pgs_gpa);
+
+ if (!(hcall->control & HV_HYPERCALL_FAST_BIT)) {
+ input = pgs_gpa;
+ output = pgs_gpa + 4096;
+ } else {
+ input = output = 0;
+ }
+
+ vector = __hyperv_hypercall(hcall->control, input, output, &res);
+ if (hcall->ud_expected) {
+ __GUEST_ASSERT(vector == UD_VECTOR,
+ "Expected #UD for control '%lu', got vector '0x%x'",
+ hcall->control, vector);
+ } else {
+ __GUEST_ASSERT(!vector,
+ "Expected no exception for control '%lu', got vector '0x%x'",
+ hcall->control, vector);
+ GUEST_ASSERT_EQ(res, hcall->expect);
+ }
+
+ GUEST_DONE();
+}
+
+static void vcpu_reset_hv_cpuid(struct kvm_vcpu *vcpu)
+{
+ /*
+ * Enable all supported Hyper-V features, then clear the leafs holding
+ * the features that will be tested one by one.
+ */
+ vcpu_set_hv_cpuid(vcpu);
+
+ vcpu_clear_cpuid_entry(vcpu, HYPERV_CPUID_FEATURES);
+ vcpu_clear_cpuid_entry(vcpu, HYPERV_CPUID_ENLIGHTMENT_INFO);
+ vcpu_clear_cpuid_entry(vcpu, HYPERV_CPUID_SYNDBG_PLATFORM_CAPABILITIES);
+}
+
+static void guest_test_msrs_access(void)
+{
+ struct kvm_cpuid2 *prev_cpuid = NULL;
+ struct kvm_vcpu *vcpu;
+ struct kvm_vm *vm;
+ struct ucall uc;
+ int stage = 0;
+ vm_vaddr_t msr_gva;
+ struct msr_data *msr;
+ bool has_invtsc = kvm_cpu_has(X86_FEATURE_INVTSC);
+
+ while (true) {
+ vm = vm_create_with_one_vcpu(&vcpu, guest_msr);
+
+ msr_gva = vm_vaddr_alloc_page(vm);
+ memset(addr_gva2hva(vm, msr_gva), 0x0, getpagesize());
+ msr = addr_gva2hva(vm, msr_gva);
+
+ vcpu_args_set(vcpu, 1, msr_gva);
+ vcpu_enable_cap(vcpu, KVM_CAP_HYPERV_ENFORCE_CPUID, 1);
+
+ if (!prev_cpuid) {
+ vcpu_reset_hv_cpuid(vcpu);
+
+ prev_cpuid = allocate_kvm_cpuid2(vcpu->cpuid->nent);
+ } else {
+ vcpu_init_cpuid(vcpu, prev_cpuid);
+ }
+
+ vm_init_descriptor_tables(vm);
+ vcpu_init_descriptor_tables(vcpu);
+
+ /* TODO: Make this entire test easier to maintain. */
+ if (stage >= 21)
+ vcpu_enable_cap(vcpu, KVM_CAP_HYPERV_SYNIC2, 0);
+
+ switch (stage) {
+ case 0:
+ /*
+ * Only available when Hyper-V identification is set
+ */
+ msr->idx = HV_X64_MSR_GUEST_OS_ID;
+ msr->write = false;
+ msr->fault_expected = true;
+ break;
+ case 1:
+ msr->idx = HV_X64_MSR_HYPERCALL;
+ msr->write = false;
+ msr->fault_expected = true;
+ break;
+ case 2:
+ vcpu_set_cpuid_feature(vcpu, HV_MSR_HYPERCALL_AVAILABLE);
+ /*
+ * HV_X64_MSR_GUEST_OS_ID has to be written first to make
+ * HV_X64_MSR_HYPERCALL available.
+ */
+ msr->idx = HV_X64_MSR_GUEST_OS_ID;
+ msr->write = true;
+ msr->write_val = HYPERV_LINUX_OS_ID;
+ msr->fault_expected = false;
+ break;
+ case 3:
+ msr->idx = HV_X64_MSR_GUEST_OS_ID;
+ msr->write = false;
+ msr->fault_expected = false;
+ break;
+ case 4:
+ msr->idx = HV_X64_MSR_HYPERCALL;
+ msr->write = false;
+ msr->fault_expected = false;
+ break;
+
+ case 5:
+ msr->idx = HV_X64_MSR_VP_RUNTIME;
+ msr->write = false;
+ msr->fault_expected = true;
+ break;
+ case 6:
+ vcpu_set_cpuid_feature(vcpu, HV_MSR_VP_RUNTIME_AVAILABLE);
+ msr->idx = HV_X64_MSR_VP_RUNTIME;
+ msr->write = false;
+ msr->fault_expected = false;
+ break;
+ case 7:
+ /* Read only */
+ msr->idx = HV_X64_MSR_VP_RUNTIME;
+ msr->write = true;
+ msr->write_val = 1;
+ msr->fault_expected = true;
+ break;
+
+ case 8:
+ msr->idx = HV_X64_MSR_TIME_REF_COUNT;
+ msr->write = false;
+ msr->fault_expected = true;
+ break;
+ case 9:
+ vcpu_set_cpuid_feature(vcpu, HV_MSR_TIME_REF_COUNT_AVAILABLE);
+ msr->idx = HV_X64_MSR_TIME_REF_COUNT;
+ msr->write = false;
+ msr->fault_expected = false;
+ break;
+ case 10:
+ /* Read only */
+ msr->idx = HV_X64_MSR_TIME_REF_COUNT;
+ msr->write = true;
+ msr->write_val = 1;
+ msr->fault_expected = true;
+ break;
+
+ case 11:
+ msr->idx = HV_X64_MSR_VP_INDEX;
+ msr->write = false;
+ msr->fault_expected = true;
+ break;
+ case 12:
+ vcpu_set_cpuid_feature(vcpu, HV_MSR_VP_INDEX_AVAILABLE);
+ msr->idx = HV_X64_MSR_VP_INDEX;
+ msr->write = false;
+ msr->fault_expected = false;
+ break;
+ case 13:
+ /* Read only */
+ msr->idx = HV_X64_MSR_VP_INDEX;
+ msr->write = true;
+ msr->write_val = 1;
+ msr->fault_expected = true;
+ break;
+
+ case 14:
+ msr->idx = HV_X64_MSR_RESET;
+ msr->write = false;
+ msr->fault_expected = true;
+ break;
+ case 15:
+ vcpu_set_cpuid_feature(vcpu, HV_MSR_RESET_AVAILABLE);
+ msr->idx = HV_X64_MSR_RESET;
+ msr->write = false;
+ msr->fault_expected = false;
+ break;
+ case 16:
+ msr->idx = HV_X64_MSR_RESET;
+ msr->write = true;
+ /*
+ * TODO: the test only writes '0' to HV_X64_MSR_RESET
+ * at the moment, writing some other value there will
+ * trigger real vCPU reset and the code is not prepared
+ * to handle it yet.
+ */
+ msr->write_val = 0;
+ msr->fault_expected = false;
+ break;
+
+ case 17:
+ msr->idx = HV_X64_MSR_REFERENCE_TSC;
+ msr->write = false;
+ msr->fault_expected = true;
+ break;
+ case 18:
+ vcpu_set_cpuid_feature(vcpu, HV_MSR_REFERENCE_TSC_AVAILABLE);
+ msr->idx = HV_X64_MSR_REFERENCE_TSC;
+ msr->write = false;
+ msr->fault_expected = false;
+ break;
+ case 19:
+ msr->idx = HV_X64_MSR_REFERENCE_TSC;
+ msr->write = true;
+ msr->write_val = 0;
+ msr->fault_expected = false;
+ break;
+
+ case 20:
+ msr->idx = HV_X64_MSR_EOM;
+ msr->write = false;
+ msr->fault_expected = true;
+ break;
+ case 21:
+ /*
+ * Remains unavailable even with KVM_CAP_HYPERV_SYNIC2
+ * capability enabled and guest visible CPUID bit unset.
+ */
+ msr->idx = HV_X64_MSR_EOM;
+ msr->write = false;
+ msr->fault_expected = true;
+ break;
+ case 22:
+ vcpu_set_cpuid_feature(vcpu, HV_MSR_SYNIC_AVAILABLE);
+ msr->idx = HV_X64_MSR_EOM;
+ msr->write = false;
+ msr->fault_expected = false;
+ break;
+ case 23:
+ msr->idx = HV_X64_MSR_EOM;
+ msr->write = true;
+ msr->write_val = 0;
+ msr->fault_expected = false;
+ break;
+
+ case 24:
+ msr->idx = HV_X64_MSR_STIMER0_CONFIG;
+ msr->write = false;
+ msr->fault_expected = true;
+ break;
+ case 25:
+ vcpu_set_cpuid_feature(vcpu, HV_MSR_SYNTIMER_AVAILABLE);
+ msr->idx = HV_X64_MSR_STIMER0_CONFIG;
+ msr->write = false;
+ msr->fault_expected = false;
+ break;
+ case 26:
+ msr->idx = HV_X64_MSR_STIMER0_CONFIG;
+ msr->write = true;
+ msr->write_val = 0;
+ msr->fault_expected = false;
+ break;
+ case 27:
+ /* Direct mode test */
+ msr->idx = HV_X64_MSR_STIMER0_CONFIG;
+ msr->write = true;
+ msr->write_val = 1 << 12;
+ msr->fault_expected = true;
+ break;
+ case 28:
+ vcpu_set_cpuid_feature(vcpu, HV_STIMER_DIRECT_MODE_AVAILABLE);
+ msr->idx = HV_X64_MSR_STIMER0_CONFIG;
+ msr->write = true;
+ msr->write_val = 1 << 12;
+ msr->fault_expected = false;
+ break;
+
+ case 29:
+ msr->idx = HV_X64_MSR_EOI;
+ msr->write = false;
+ msr->fault_expected = true;
+ break;
+ case 30:
+ vcpu_set_cpuid_feature(vcpu, HV_MSR_APIC_ACCESS_AVAILABLE);
+ msr->idx = HV_X64_MSR_EOI;
+ msr->write = true;
+ msr->write_val = 1;
+ msr->fault_expected = false;
+ break;
+
+ case 31:
+ msr->idx = HV_X64_MSR_TSC_FREQUENCY;
+ msr->write = false;
+ msr->fault_expected = true;
+ break;
+ case 32:
+ vcpu_set_cpuid_feature(vcpu, HV_ACCESS_FREQUENCY_MSRS);
+ msr->idx = HV_X64_MSR_TSC_FREQUENCY;
+ msr->write = false;
+ msr->fault_expected = false;
+ break;
+ case 33:
+ /* Read only */
+ msr->idx = HV_X64_MSR_TSC_FREQUENCY;
+ msr->write = true;
+ msr->write_val = 1;
+ msr->fault_expected = true;
+ break;
+
+ case 34:
+ msr->idx = HV_X64_MSR_REENLIGHTENMENT_CONTROL;
+ msr->write = false;
+ msr->fault_expected = true;
+ break;
+ case 35:
+ vcpu_set_cpuid_feature(vcpu, HV_ACCESS_REENLIGHTENMENT);
+ msr->idx = HV_X64_MSR_REENLIGHTENMENT_CONTROL;
+ msr->write = false;
+ msr->fault_expected = false;
+ break;
+ case 36:
+ msr->idx = HV_X64_MSR_REENLIGHTENMENT_CONTROL;
+ msr->write = true;
+ msr->write_val = 1;
+ msr->fault_expected = false;
+ break;
+ case 37:
+ /* Can only write '0' */
+ msr->idx = HV_X64_MSR_TSC_EMULATION_STATUS;
+ msr->write = true;
+ msr->write_val = 1;
+ msr->fault_expected = true;
+ break;
+
+ case 38:
+ msr->idx = HV_X64_MSR_CRASH_P0;
+ msr->write = false;
+ msr->fault_expected = true;
+ break;
+ case 39:
+ vcpu_set_cpuid_feature(vcpu, HV_FEATURE_GUEST_CRASH_MSR_AVAILABLE);
+ msr->idx = HV_X64_MSR_CRASH_P0;
+ msr->write = false;
+ msr->fault_expected = false;
+ break;
+ case 40:
+ msr->idx = HV_X64_MSR_CRASH_P0;
+ msr->write = true;
+ msr->write_val = 1;
+ msr->fault_expected = false;
+ break;
+
+ case 41:
+ msr->idx = HV_X64_MSR_SYNDBG_STATUS;
+ msr->write = false;
+ msr->fault_expected = true;
+ break;
+ case 42:
+ vcpu_set_cpuid_feature(vcpu, HV_FEATURE_DEBUG_MSRS_AVAILABLE);
+ vcpu_set_cpuid_feature(vcpu, HV_X64_SYNDBG_CAP_ALLOW_KERNEL_DEBUGGING);
+ msr->idx = HV_X64_MSR_SYNDBG_STATUS;
+ msr->write = false;
+ msr->fault_expected = false;
+ break;
+ case 43:
+ msr->idx = HV_X64_MSR_SYNDBG_STATUS;
+ msr->write = true;
+ msr->write_val = 0;
+ msr->fault_expected = false;
+ break;
+
+ case 44:
+ /* MSR is not available when CPUID feature bit is unset */
+ if (!has_invtsc)
+ goto next_stage;
+ msr->idx = HV_X64_MSR_TSC_INVARIANT_CONTROL;
+ msr->write = false;
+ msr->fault_expected = true;
+ break;
+ case 45:
+ /* MSR is vailable when CPUID feature bit is set */
+ if (!has_invtsc)
+ goto next_stage;
+ vcpu_set_cpuid_feature(vcpu, HV_ACCESS_TSC_INVARIANT);
+ msr->idx = HV_X64_MSR_TSC_INVARIANT_CONTROL;
+ msr->write = false;
+ msr->fault_expected = false;
+ break;
+ case 46:
+ /* Writing bits other than 0 is forbidden */
+ if (!has_invtsc)
+ goto next_stage;
+ msr->idx = HV_X64_MSR_TSC_INVARIANT_CONTROL;
+ msr->write = true;
+ msr->write_val = 0xdeadbeef;
+ msr->fault_expected = true;
+ break;
+ case 47:
+ /* Setting bit 0 enables the feature */
+ if (!has_invtsc)
+ goto next_stage;
+ msr->idx = HV_X64_MSR_TSC_INVARIANT_CONTROL;
+ msr->write = true;
+ msr->write_val = 1;
+ msr->fault_expected = false;
+ break;
+
+ default:
+ kvm_vm_free(vm);
+ return;
+ }
+
+ vcpu_set_cpuid(vcpu);
+
+ memcpy(prev_cpuid, vcpu->cpuid, kvm_cpuid2_size(vcpu->cpuid->nent));
+
+ pr_debug("Stage %d: testing msr: 0x%x for %s\n", stage,
+ msr->idx, msr->write ? "write" : "read");
+
+ vcpu_run(vcpu);
+ TEST_ASSERT_KVM_EXIT_REASON(vcpu, KVM_EXIT_IO);
+
+ switch (get_ucall(vcpu, &uc)) {
+ case UCALL_ABORT:
+ REPORT_GUEST_ASSERT(uc);
+ return;
+ case UCALL_DONE:
+ break;
+ default:
+ TEST_FAIL("Unhandled ucall: %ld", uc.cmd);
+ return;
+ }
+
+next_stage:
+ stage++;
+ kvm_vm_free(vm);
+ }
+}
+
+static void guest_test_hcalls_access(void)
+{
+ struct kvm_cpuid2 *prev_cpuid = NULL;
+ struct kvm_vcpu *vcpu;
+ struct kvm_vm *vm;
+ struct ucall uc;
+ int stage = 0;
+ vm_vaddr_t hcall_page, hcall_params;
+ struct hcall_data *hcall;
+
+ while (true) {
+ vm = vm_create_with_one_vcpu(&vcpu, guest_hcall);
+
+ vm_init_descriptor_tables(vm);
+ vcpu_init_descriptor_tables(vcpu);
+
+ /* Hypercall input/output */
+ hcall_page = vm_vaddr_alloc_pages(vm, 2);
+ memset(addr_gva2hva(vm, hcall_page), 0x0, 2 * getpagesize());
+
+ hcall_params = vm_vaddr_alloc_page(vm);
+ memset(addr_gva2hva(vm, hcall_params), 0x0, getpagesize());
+ hcall = addr_gva2hva(vm, hcall_params);
+
+ vcpu_args_set(vcpu, 2, addr_gva2gpa(vm, hcall_page), hcall_params);
+ vcpu_enable_cap(vcpu, KVM_CAP_HYPERV_ENFORCE_CPUID, 1);
+
+ if (!prev_cpuid) {
+ vcpu_reset_hv_cpuid(vcpu);
+
+ prev_cpuid = allocate_kvm_cpuid2(vcpu->cpuid->nent);
+ } else {
+ vcpu_init_cpuid(vcpu, prev_cpuid);
+ }
+
+ switch (stage) {
+ case 0:
+ vcpu_set_cpuid_feature(vcpu, HV_MSR_HYPERCALL_AVAILABLE);
+ hcall->control = 0xbeef;
+ hcall->expect = HV_STATUS_INVALID_HYPERCALL_CODE;
+ break;
+
+ case 1:
+ hcall->control = HVCALL_POST_MESSAGE;
+ hcall->expect = HV_STATUS_ACCESS_DENIED;
+ break;
+ case 2:
+ vcpu_set_cpuid_feature(vcpu, HV_POST_MESSAGES);
+ hcall->control = HVCALL_POST_MESSAGE;
+ hcall->expect = HV_STATUS_INVALID_HYPERCALL_INPUT;
+ break;
+
+ case 3:
+ hcall->control = HVCALL_SIGNAL_EVENT;
+ hcall->expect = HV_STATUS_ACCESS_DENIED;
+ break;
+ case 4:
+ vcpu_set_cpuid_feature(vcpu, HV_SIGNAL_EVENTS);
+ hcall->control = HVCALL_SIGNAL_EVENT;
+ hcall->expect = HV_STATUS_INVALID_HYPERCALL_INPUT;
+ break;
+
+ case 5:
+ hcall->control = HVCALL_RESET_DEBUG_SESSION;
+ hcall->expect = HV_STATUS_INVALID_HYPERCALL_CODE;
+ break;
+ case 6:
+ vcpu_set_cpuid_feature(vcpu, HV_X64_SYNDBG_CAP_ALLOW_KERNEL_DEBUGGING);
+ hcall->control = HVCALL_RESET_DEBUG_SESSION;
+ hcall->expect = HV_STATUS_ACCESS_DENIED;
+ break;
+ case 7:
+ vcpu_set_cpuid_feature(vcpu, HV_DEBUGGING);
+ hcall->control = HVCALL_RESET_DEBUG_SESSION;
+ hcall->expect = HV_STATUS_OPERATION_DENIED;
+ break;
+
+ case 8:
+ hcall->control = HVCALL_FLUSH_VIRTUAL_ADDRESS_SPACE;
+ hcall->expect = HV_STATUS_ACCESS_DENIED;
+ break;
+ case 9:
+ vcpu_set_cpuid_feature(vcpu, HV_X64_REMOTE_TLB_FLUSH_RECOMMENDED);
+ hcall->control = HVCALL_FLUSH_VIRTUAL_ADDRESS_SPACE;
+ hcall->expect = HV_STATUS_SUCCESS;
+ break;
+ case 10:
+ hcall->control = HVCALL_FLUSH_VIRTUAL_ADDRESS_SPACE_EX;
+ hcall->expect = HV_STATUS_ACCESS_DENIED;
+ break;
+ case 11:
+ vcpu_set_cpuid_feature(vcpu, HV_X64_EX_PROCESSOR_MASKS_RECOMMENDED);
+ hcall->control = HVCALL_FLUSH_VIRTUAL_ADDRESS_SPACE_EX;
+ hcall->expect = HV_STATUS_SUCCESS;
+ break;
+
+ case 12:
+ hcall->control = HVCALL_SEND_IPI;
+ hcall->expect = HV_STATUS_ACCESS_DENIED;
+ break;
+ case 13:
+ vcpu_set_cpuid_feature(vcpu, HV_X64_CLUSTER_IPI_RECOMMENDED);
+ hcall->control = HVCALL_SEND_IPI;
+ hcall->expect = HV_STATUS_INVALID_HYPERCALL_INPUT;
+ break;
+ case 14:
+ /* Nothing in 'sparse banks' -> success */
+ hcall->control = HVCALL_SEND_IPI_EX;
+ hcall->expect = HV_STATUS_SUCCESS;
+ break;
+
+ case 15:
+ hcall->control = HVCALL_NOTIFY_LONG_SPIN_WAIT;
+ hcall->expect = HV_STATUS_ACCESS_DENIED;
+ break;
+ case 16:
+ vcpu_set_cpuid_feature(vcpu, HV_PV_SPINLOCKS_TEST);
+ hcall->control = HVCALL_NOTIFY_LONG_SPIN_WAIT;
+ hcall->expect = HV_STATUS_SUCCESS;
+ break;
+ case 17:
+ /* XMM fast hypercall */
+ hcall->control = HVCALL_FLUSH_VIRTUAL_ADDRESS_SPACE | HV_HYPERCALL_FAST_BIT;
+ hcall->ud_expected = true;
+ break;
+ case 18:
+ vcpu_set_cpuid_feature(vcpu, HV_X64_HYPERCALL_XMM_INPUT_AVAILABLE);
+ hcall->control = HVCALL_FLUSH_VIRTUAL_ADDRESS_SPACE | HV_HYPERCALL_FAST_BIT;
+ hcall->ud_expected = false;
+ hcall->expect = HV_STATUS_SUCCESS;
+ break;
+ case 19:
+ hcall->control = HV_EXT_CALL_QUERY_CAPABILITIES;
+ hcall->expect = HV_STATUS_ACCESS_DENIED;
+ break;
+ case 20:
+ vcpu_set_cpuid_feature(vcpu, HV_ENABLE_EXTENDED_HYPERCALLS);
+ hcall->control = HV_EXT_CALL_QUERY_CAPABILITIES | HV_HYPERCALL_FAST_BIT;
+ hcall->expect = HV_STATUS_INVALID_PARAMETER;
+ break;
+ case 21:
+ kvm_vm_free(vm);
+ return;
+ }
+
+ vcpu_set_cpuid(vcpu);
+
+ memcpy(prev_cpuid, vcpu->cpuid, kvm_cpuid2_size(vcpu->cpuid->nent));
+
+ pr_debug("Stage %d: testing hcall: 0x%lx\n", stage, hcall->control);
+
+ vcpu_run(vcpu);
+ TEST_ASSERT_KVM_EXIT_REASON(vcpu, KVM_EXIT_IO);
+
+ switch (get_ucall(vcpu, &uc)) {
+ case UCALL_ABORT:
+ REPORT_GUEST_ASSERT(uc);
+ return;
+ case UCALL_DONE:
+ break;
+ default:
+ TEST_FAIL("Unhandled ucall: %ld", uc.cmd);
+ return;
+ }
+
+ stage++;
+ kvm_vm_free(vm);
+ }
+}
+
+int main(void)
+{
+ TEST_REQUIRE(kvm_has_cap(KVM_CAP_HYPERV_ENFORCE_CPUID));
+
+ pr_info("Testing access to Hyper-V specific MSRs\n");
+ guest_test_msrs_access();
+
+ pr_info("Testing access to Hyper-V hypercalls\n");
+ guest_test_hcalls_access();
+}
diff --git a/tools/testing/selftests/kvm/x86_64/hyperv_ipi.c b/tools/testing/selftests/kvm/x86_64/hyperv_ipi.c
new file mode 100644
index 000000000000..f1617762c22f
--- /dev/null
+++ b/tools/testing/selftests/kvm/x86_64/hyperv_ipi.c
@@ -0,0 +1,313 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Hyper-V HvCallSendSyntheticClusterIpi{,Ex} tests
+ *
+ * Copyright (C) 2022, Red Hat, Inc.
+ *
+ */
+
+#define _GNU_SOURCE /* for program_invocation_short_name */
+#include <pthread.h>
+#include <inttypes.h>
+
+#include "kvm_util.h"
+#include "hyperv.h"
+#include "test_util.h"
+#include "vmx.h"
+
+#define RECEIVER_VCPU_ID_1 2
+#define RECEIVER_VCPU_ID_2 65
+
+#define IPI_VECTOR 0xfe
+
+static volatile uint64_t ipis_rcvd[RECEIVER_VCPU_ID_2 + 1];
+
+struct hv_vpset {
+ u64 format;
+ u64 valid_bank_mask;
+ u64 bank_contents[2];
+};
+
+enum HV_GENERIC_SET_FORMAT {
+ HV_GENERIC_SET_SPARSE_4K,
+ HV_GENERIC_SET_ALL,
+};
+
+/* HvCallSendSyntheticClusterIpi hypercall */
+struct hv_send_ipi {
+ u32 vector;
+ u32 reserved;
+ u64 cpu_mask;
+};
+
+/* HvCallSendSyntheticClusterIpiEx hypercall */
+struct hv_send_ipi_ex {
+ u32 vector;
+ u32 reserved;
+ struct hv_vpset vp_set;
+};
+
+static inline void hv_init(vm_vaddr_t pgs_gpa)
+{
+ wrmsr(HV_X64_MSR_GUEST_OS_ID, HYPERV_LINUX_OS_ID);
+ wrmsr(HV_X64_MSR_HYPERCALL, pgs_gpa);
+}
+
+static void receiver_code(void *hcall_page, vm_vaddr_t pgs_gpa)
+{
+ u32 vcpu_id;
+
+ x2apic_enable();
+ hv_init(pgs_gpa);
+
+ vcpu_id = rdmsr(HV_X64_MSR_VP_INDEX);
+
+ /* Signal sender vCPU we're ready */
+ ipis_rcvd[vcpu_id] = (u64)-1;
+
+ for (;;)
+ asm volatile("sti; hlt; cli");
+}
+
+static void guest_ipi_handler(struct ex_regs *regs)
+{
+ u32 vcpu_id = rdmsr(HV_X64_MSR_VP_INDEX);
+
+ ipis_rcvd[vcpu_id]++;
+ wrmsr(HV_X64_MSR_EOI, 1);
+}
+
+static inline void nop_loop(void)
+{
+ int i;
+
+ for (i = 0; i < 100000000; i++)
+ asm volatile("nop");
+}
+
+static void sender_guest_code(void *hcall_page, vm_vaddr_t pgs_gpa)
+{
+ struct hv_send_ipi *ipi = (struct hv_send_ipi *)hcall_page;
+ struct hv_send_ipi_ex *ipi_ex = (struct hv_send_ipi_ex *)hcall_page;
+ int stage = 1, ipis_expected[2] = {0};
+
+ hv_init(pgs_gpa);
+ GUEST_SYNC(stage++);
+
+ /* Wait for receiver vCPUs to come up */
+ while (!ipis_rcvd[RECEIVER_VCPU_ID_1] || !ipis_rcvd[RECEIVER_VCPU_ID_2])
+ nop_loop();
+ ipis_rcvd[RECEIVER_VCPU_ID_1] = ipis_rcvd[RECEIVER_VCPU_ID_2] = 0;
+
+ /* 'Slow' HvCallSendSyntheticClusterIpi to RECEIVER_VCPU_ID_1 */
+ ipi->vector = IPI_VECTOR;
+ ipi->cpu_mask = 1 << RECEIVER_VCPU_ID_1;
+ hyperv_hypercall(HVCALL_SEND_IPI, pgs_gpa, pgs_gpa + 4096);
+ nop_loop();
+ GUEST_ASSERT(ipis_rcvd[RECEIVER_VCPU_ID_1] == ++ipis_expected[0]);
+ GUEST_ASSERT(ipis_rcvd[RECEIVER_VCPU_ID_2] == ipis_expected[1]);
+ GUEST_SYNC(stage++);
+ /* 'Fast' HvCallSendSyntheticClusterIpi to RECEIVER_VCPU_ID_1 */
+ hyperv_hypercall(HVCALL_SEND_IPI | HV_HYPERCALL_FAST_BIT,
+ IPI_VECTOR, 1 << RECEIVER_VCPU_ID_1);
+ nop_loop();
+ GUEST_ASSERT(ipis_rcvd[RECEIVER_VCPU_ID_1] == ++ipis_expected[0]);
+ GUEST_ASSERT(ipis_rcvd[RECEIVER_VCPU_ID_2] == ipis_expected[1]);
+ GUEST_SYNC(stage++);
+
+ /* 'Slow' HvCallSendSyntheticClusterIpiEx to RECEIVER_VCPU_ID_1 */
+ memset(hcall_page, 0, 4096);
+ ipi_ex->vector = IPI_VECTOR;
+ ipi_ex->vp_set.format = HV_GENERIC_SET_SPARSE_4K;
+ ipi_ex->vp_set.valid_bank_mask = 1 << 0;
+ ipi_ex->vp_set.bank_contents[0] = BIT(RECEIVER_VCPU_ID_1);
+ hyperv_hypercall(HVCALL_SEND_IPI_EX | (1 << HV_HYPERCALL_VARHEAD_OFFSET),
+ pgs_gpa, pgs_gpa + 4096);
+ nop_loop();
+ GUEST_ASSERT(ipis_rcvd[RECEIVER_VCPU_ID_1] == ++ipis_expected[0]);
+ GUEST_ASSERT(ipis_rcvd[RECEIVER_VCPU_ID_2] == ipis_expected[1]);
+ GUEST_SYNC(stage++);
+ /* 'XMM Fast' HvCallSendSyntheticClusterIpiEx to RECEIVER_VCPU_ID_1 */
+ hyperv_write_xmm_input(&ipi_ex->vp_set.valid_bank_mask, 1);
+ hyperv_hypercall(HVCALL_SEND_IPI_EX | HV_HYPERCALL_FAST_BIT |
+ (1 << HV_HYPERCALL_VARHEAD_OFFSET),
+ IPI_VECTOR, HV_GENERIC_SET_SPARSE_4K);
+ nop_loop();
+ GUEST_ASSERT(ipis_rcvd[RECEIVER_VCPU_ID_1] == ++ipis_expected[0]);
+ GUEST_ASSERT(ipis_rcvd[RECEIVER_VCPU_ID_2] == ipis_expected[1]);
+ GUEST_SYNC(stage++);
+
+ /* 'Slow' HvCallSendSyntheticClusterIpiEx to RECEIVER_VCPU_ID_2 */
+ memset(hcall_page, 0, 4096);
+ ipi_ex->vector = IPI_VECTOR;
+ ipi_ex->vp_set.format = HV_GENERIC_SET_SPARSE_4K;
+ ipi_ex->vp_set.valid_bank_mask = 1 << 1;
+ ipi_ex->vp_set.bank_contents[0] = BIT(RECEIVER_VCPU_ID_2 - 64);
+ hyperv_hypercall(HVCALL_SEND_IPI_EX | (1 << HV_HYPERCALL_VARHEAD_OFFSET),
+ pgs_gpa, pgs_gpa + 4096);
+ nop_loop();
+ GUEST_ASSERT(ipis_rcvd[RECEIVER_VCPU_ID_1] == ipis_expected[0]);
+ GUEST_ASSERT(ipis_rcvd[RECEIVER_VCPU_ID_2] == ++ipis_expected[1]);
+ GUEST_SYNC(stage++);
+ /* 'XMM Fast' HvCallSendSyntheticClusterIpiEx to RECEIVER_VCPU_ID_2 */
+ hyperv_write_xmm_input(&ipi_ex->vp_set.valid_bank_mask, 1);
+ hyperv_hypercall(HVCALL_SEND_IPI_EX | HV_HYPERCALL_FAST_BIT |
+ (1 << HV_HYPERCALL_VARHEAD_OFFSET),
+ IPI_VECTOR, HV_GENERIC_SET_SPARSE_4K);
+ nop_loop();
+ GUEST_ASSERT(ipis_rcvd[RECEIVER_VCPU_ID_1] == ipis_expected[0]);
+ GUEST_ASSERT(ipis_rcvd[RECEIVER_VCPU_ID_2] == ++ipis_expected[1]);
+ GUEST_SYNC(stage++);
+
+ /* 'Slow' HvCallSendSyntheticClusterIpiEx to both RECEIVER_VCPU_ID_{1,2} */
+ memset(hcall_page, 0, 4096);
+ ipi_ex->vector = IPI_VECTOR;
+ ipi_ex->vp_set.format = HV_GENERIC_SET_SPARSE_4K;
+ ipi_ex->vp_set.valid_bank_mask = 1 << 1 | 1;
+ ipi_ex->vp_set.bank_contents[0] = BIT(RECEIVER_VCPU_ID_1);
+ ipi_ex->vp_set.bank_contents[1] = BIT(RECEIVER_VCPU_ID_2 - 64);
+ hyperv_hypercall(HVCALL_SEND_IPI_EX | (2 << HV_HYPERCALL_VARHEAD_OFFSET),
+ pgs_gpa, pgs_gpa + 4096);
+ nop_loop();
+ GUEST_ASSERT(ipis_rcvd[RECEIVER_VCPU_ID_1] == ++ipis_expected[0]);
+ GUEST_ASSERT(ipis_rcvd[RECEIVER_VCPU_ID_2] == ++ipis_expected[1]);
+ GUEST_SYNC(stage++);
+ /* 'XMM Fast' HvCallSendSyntheticClusterIpiEx to both RECEIVER_VCPU_ID_{1, 2} */
+ hyperv_write_xmm_input(&ipi_ex->vp_set.valid_bank_mask, 2);
+ hyperv_hypercall(HVCALL_SEND_IPI_EX | HV_HYPERCALL_FAST_BIT |
+ (2 << HV_HYPERCALL_VARHEAD_OFFSET),
+ IPI_VECTOR, HV_GENERIC_SET_SPARSE_4K);
+ nop_loop();
+ GUEST_ASSERT(ipis_rcvd[RECEIVER_VCPU_ID_1] == ++ipis_expected[0]);
+ GUEST_ASSERT(ipis_rcvd[RECEIVER_VCPU_ID_2] == ++ipis_expected[1]);
+ GUEST_SYNC(stage++);
+
+ /* 'Slow' HvCallSendSyntheticClusterIpiEx to HV_GENERIC_SET_ALL */
+ memset(hcall_page, 0, 4096);
+ ipi_ex->vector = IPI_VECTOR;
+ ipi_ex->vp_set.format = HV_GENERIC_SET_ALL;
+ hyperv_hypercall(HVCALL_SEND_IPI_EX, pgs_gpa, pgs_gpa + 4096);
+ nop_loop();
+ GUEST_ASSERT(ipis_rcvd[RECEIVER_VCPU_ID_1] == ++ipis_expected[0]);
+ GUEST_ASSERT(ipis_rcvd[RECEIVER_VCPU_ID_2] == ++ipis_expected[1]);
+ GUEST_SYNC(stage++);
+ /*
+ * 'XMM Fast' HvCallSendSyntheticClusterIpiEx to HV_GENERIC_SET_ALL.
+ */
+ ipi_ex->vp_set.valid_bank_mask = 0;
+ hyperv_write_xmm_input(&ipi_ex->vp_set.valid_bank_mask, 2);
+ hyperv_hypercall(HVCALL_SEND_IPI_EX | HV_HYPERCALL_FAST_BIT,
+ IPI_VECTOR, HV_GENERIC_SET_ALL);
+ nop_loop();
+ GUEST_ASSERT(ipis_rcvd[RECEIVER_VCPU_ID_1] == ++ipis_expected[0]);
+ GUEST_ASSERT(ipis_rcvd[RECEIVER_VCPU_ID_2] == ++ipis_expected[1]);
+ GUEST_SYNC(stage++);
+
+ GUEST_DONE();
+}
+
+static void *vcpu_thread(void *arg)
+{
+ struct kvm_vcpu *vcpu = (struct kvm_vcpu *)arg;
+ int old, r;
+
+ r = pthread_setcanceltype(PTHREAD_CANCEL_ASYNCHRONOUS, &old);
+ TEST_ASSERT(!r, "pthread_setcanceltype failed on vcpu_id=%u with errno=%d",
+ vcpu->id, r);
+
+ vcpu_run(vcpu);
+
+ TEST_FAIL("vCPU %u exited unexpectedly", vcpu->id);
+
+ return NULL;
+}
+
+static void cancel_join_vcpu_thread(pthread_t thread, struct kvm_vcpu *vcpu)
+{
+ void *retval;
+ int r;
+
+ r = pthread_cancel(thread);
+ TEST_ASSERT(!r, "pthread_cancel on vcpu_id=%d failed with errno=%d",
+ vcpu->id, r);
+
+ r = pthread_join(thread, &retval);
+ TEST_ASSERT(!r, "pthread_join on vcpu_id=%d failed with errno=%d",
+ vcpu->id, r);
+ TEST_ASSERT(retval == PTHREAD_CANCELED,
+ "expected retval=%p, got %p", PTHREAD_CANCELED,
+ retval);
+}
+
+int main(int argc, char *argv[])
+{
+ struct kvm_vm *vm;
+ struct kvm_vcpu *vcpu[3];
+ vm_vaddr_t hcall_page;
+ pthread_t threads[2];
+ int stage = 1, r;
+ struct ucall uc;
+
+ TEST_REQUIRE(kvm_has_cap(KVM_CAP_HYPERV_SEND_IPI));
+
+ vm = vm_create_with_one_vcpu(&vcpu[0], sender_guest_code);
+
+ /* Hypercall input/output */
+ hcall_page = vm_vaddr_alloc_pages(vm, 2);
+ memset(addr_gva2hva(vm, hcall_page), 0x0, 2 * getpagesize());
+
+ vm_init_descriptor_tables(vm);
+
+ vcpu[1] = vm_vcpu_add(vm, RECEIVER_VCPU_ID_1, receiver_code);
+ vcpu_init_descriptor_tables(vcpu[1]);
+ vcpu_args_set(vcpu[1], 2, hcall_page, addr_gva2gpa(vm, hcall_page));
+ vcpu_set_msr(vcpu[1], HV_X64_MSR_VP_INDEX, RECEIVER_VCPU_ID_1);
+ vcpu_set_hv_cpuid(vcpu[1]);
+
+ vcpu[2] = vm_vcpu_add(vm, RECEIVER_VCPU_ID_2, receiver_code);
+ vcpu_init_descriptor_tables(vcpu[2]);
+ vcpu_args_set(vcpu[2], 2, hcall_page, addr_gva2gpa(vm, hcall_page));
+ vcpu_set_msr(vcpu[2], HV_X64_MSR_VP_INDEX, RECEIVER_VCPU_ID_2);
+ vcpu_set_hv_cpuid(vcpu[2]);
+
+ vm_install_exception_handler(vm, IPI_VECTOR, guest_ipi_handler);
+
+ vcpu_args_set(vcpu[0], 2, hcall_page, addr_gva2gpa(vm, hcall_page));
+ vcpu_set_hv_cpuid(vcpu[0]);
+
+ r = pthread_create(&threads[0], NULL, vcpu_thread, vcpu[1]);
+ TEST_ASSERT(!r, "pthread_create failed errno=%d", r);
+
+ r = pthread_create(&threads[1], NULL, vcpu_thread, vcpu[2]);
+ TEST_ASSERT(!r, "pthread_create failed errno=%d", errno);
+
+ while (true) {
+ vcpu_run(vcpu[0]);
+
+ TEST_ASSERT_KVM_EXIT_REASON(vcpu[0], KVM_EXIT_IO);
+
+ switch (get_ucall(vcpu[0], &uc)) {
+ case UCALL_SYNC:
+ TEST_ASSERT(uc.args[1] == stage,
+ "Unexpected stage: %ld (%d expected)",
+ uc.args[1], stage);
+ break;
+ case UCALL_DONE:
+ goto done;
+ case UCALL_ABORT:
+ REPORT_GUEST_ASSERT(uc);
+ /* NOT REACHED */
+ default:
+ TEST_FAIL("Unknown ucall %lu", uc.cmd);
+ }
+
+ stage++;
+ }
+
+done:
+ cancel_join_vcpu_thread(threads[0], vcpu[1]);
+ cancel_join_vcpu_thread(threads[1], vcpu[2]);
+ kvm_vm_free(vm);
+
+ return r;
+}
diff --git a/tools/testing/selftests/kvm/x86_64/hyperv_svm_test.c b/tools/testing/selftests/kvm/x86_64/hyperv_svm_test.c
new file mode 100644
index 000000000000..c9b18707edc0
--- /dev/null
+++ b/tools/testing/selftests/kvm/x86_64/hyperv_svm_test.c
@@ -0,0 +1,200 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Copyright (C) 2022, Red Hat, Inc.
+ *
+ * Tests for Hyper-V extensions to SVM.
+ */
+#define _GNU_SOURCE /* for program_invocation_short_name */
+#include <fcntl.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <sys/ioctl.h>
+#include <linux/bitmap.h>
+
+#include "test_util.h"
+
+#include "kvm_util.h"
+#include "processor.h"
+#include "svm_util.h"
+#include "hyperv.h"
+
+#define L2_GUEST_STACK_SIZE 256
+
+/* Exit to L1 from L2 with RDMSR instruction */
+static inline void rdmsr_from_l2(uint32_t msr)
+{
+ /* Currently, L1 doesn't preserve GPRs during vmexits. */
+ __asm__ __volatile__ ("rdmsr" : : "c"(msr) :
+ "rax", "rbx", "rdx", "rsi", "rdi", "r8", "r9",
+ "r10", "r11", "r12", "r13", "r14", "r15");
+}
+
+void l2_guest_code(void)
+{
+ u64 unused;
+
+ GUEST_SYNC(3);
+ /* Exit to L1 */
+ vmmcall();
+
+ /* MSR-Bitmap tests */
+ rdmsr_from_l2(MSR_FS_BASE); /* intercepted */
+ rdmsr_from_l2(MSR_FS_BASE); /* intercepted */
+ rdmsr_from_l2(MSR_GS_BASE); /* not intercepted */
+ vmmcall();
+ rdmsr_from_l2(MSR_GS_BASE); /* intercepted */
+
+ GUEST_SYNC(5);
+
+ /* L2 TLB flush tests */
+ hyperv_hypercall(HVCALL_FLUSH_VIRTUAL_ADDRESS_SPACE |
+ HV_HYPERCALL_FAST_BIT, 0x0,
+ HV_FLUSH_ALL_VIRTUAL_ADDRESS_SPACES |
+ HV_FLUSH_ALL_PROCESSORS);
+ rdmsr_from_l2(MSR_FS_BASE);
+ /*
+ * Note: hypercall status (RAX) is not preserved correctly by L1 after
+ * synthetic vmexit, use unchecked version.
+ */
+ __hyperv_hypercall(HVCALL_FLUSH_VIRTUAL_ADDRESS_SPACE |
+ HV_HYPERCALL_FAST_BIT, 0x0,
+ HV_FLUSH_ALL_VIRTUAL_ADDRESS_SPACES |
+ HV_FLUSH_ALL_PROCESSORS, &unused);
+
+ /* Done, exit to L1 and never come back. */
+ vmmcall();
+}
+
+static void __attribute__((__flatten__)) guest_code(struct svm_test_data *svm,
+ struct hyperv_test_pages *hv_pages,
+ vm_vaddr_t pgs_gpa)
+{
+ unsigned long l2_guest_stack[L2_GUEST_STACK_SIZE];
+ struct vmcb *vmcb = svm->vmcb;
+ struct hv_vmcb_enlightenments *hve = &vmcb->control.hv_enlightenments;
+
+ GUEST_SYNC(1);
+
+ wrmsr(HV_X64_MSR_GUEST_OS_ID, HYPERV_LINUX_OS_ID);
+ wrmsr(HV_X64_MSR_HYPERCALL, pgs_gpa);
+ enable_vp_assist(hv_pages->vp_assist_gpa, hv_pages->vp_assist);
+
+ GUEST_ASSERT(svm->vmcb_gpa);
+ /* Prepare for L2 execution. */
+ generic_svm_setup(svm, l2_guest_code,
+ &l2_guest_stack[L2_GUEST_STACK_SIZE]);
+
+ /* L2 TLB flush setup */
+ hve->partition_assist_page = hv_pages->partition_assist_gpa;
+ hve->hv_enlightenments_control.nested_flush_hypercall = 1;
+ hve->hv_vm_id = 1;
+ hve->hv_vp_id = 1;
+ current_vp_assist->nested_control.features.directhypercall = 1;
+ *(u32 *)(hv_pages->partition_assist) = 0;
+
+ GUEST_SYNC(2);
+ run_guest(vmcb, svm->vmcb_gpa);
+ GUEST_ASSERT(vmcb->control.exit_code == SVM_EXIT_VMMCALL);
+ GUEST_SYNC(4);
+ vmcb->save.rip += 3;
+
+ /* Intercept RDMSR 0xc0000100 */
+ vmcb->control.intercept |= 1ULL << INTERCEPT_MSR_PROT;
+ __set_bit(2 * (MSR_FS_BASE & 0x1fff), svm->msr + 0x800);
+ run_guest(vmcb, svm->vmcb_gpa);
+ GUEST_ASSERT(vmcb->control.exit_code == SVM_EXIT_MSR);
+ vmcb->save.rip += 2; /* rdmsr */
+
+ /* Enable enlightened MSR bitmap */
+ hve->hv_enlightenments_control.msr_bitmap = 1;
+ run_guest(vmcb, svm->vmcb_gpa);
+ GUEST_ASSERT(vmcb->control.exit_code == SVM_EXIT_MSR);
+ vmcb->save.rip += 2; /* rdmsr */
+
+ /* Intercept RDMSR 0xc0000101 without telling KVM about it */
+ __set_bit(2 * (MSR_GS_BASE & 0x1fff), svm->msr + 0x800);
+ /* Make sure HV_VMX_ENLIGHTENED_CLEAN_FIELD_MSR_BITMAP is set */
+ vmcb->control.clean |= HV_VMCB_NESTED_ENLIGHTENMENTS;
+ run_guest(vmcb, svm->vmcb_gpa);
+ /* Make sure we don't see SVM_EXIT_MSR here so eMSR bitmap works */
+ GUEST_ASSERT(vmcb->control.exit_code == SVM_EXIT_VMMCALL);
+ vmcb->save.rip += 3; /* vmcall */
+
+ /* Now tell KVM we've changed MSR-Bitmap */
+ vmcb->control.clean &= ~HV_VMCB_NESTED_ENLIGHTENMENTS;
+ run_guest(vmcb, svm->vmcb_gpa);
+ GUEST_ASSERT(vmcb->control.exit_code == SVM_EXIT_MSR);
+ vmcb->save.rip += 2; /* rdmsr */
+
+
+ /*
+ * L2 TLB flush test. First VMCALL should be handled directly by L0,
+ * no VMCALL exit expected.
+ */
+ run_guest(vmcb, svm->vmcb_gpa);
+ GUEST_ASSERT(vmcb->control.exit_code == SVM_EXIT_MSR);
+ vmcb->save.rip += 2; /* rdmsr */
+ /* Enable synthetic vmexit */
+ *(u32 *)(hv_pages->partition_assist) = 1;
+ run_guest(vmcb, svm->vmcb_gpa);
+ GUEST_ASSERT(vmcb->control.exit_code == HV_SVM_EXITCODE_ENL);
+ GUEST_ASSERT(vmcb->control.exit_info_1 == HV_SVM_ENL_EXITCODE_TRAP_AFTER_FLUSH);
+
+ run_guest(vmcb, svm->vmcb_gpa);
+ GUEST_ASSERT(vmcb->control.exit_code == SVM_EXIT_VMMCALL);
+ GUEST_SYNC(6);
+
+ GUEST_DONE();
+}
+
+int main(int argc, char *argv[])
+{
+ vm_vaddr_t nested_gva = 0, hv_pages_gva = 0;
+ vm_vaddr_t hcall_page;
+ struct kvm_vcpu *vcpu;
+ struct kvm_vm *vm;
+ struct ucall uc;
+ int stage;
+
+ TEST_REQUIRE(kvm_cpu_has(X86_FEATURE_SVM));
+ TEST_REQUIRE(kvm_has_cap(KVM_CAP_HYPERV_DIRECT_TLBFLUSH));
+
+ /* Create VM */
+ vm = vm_create_with_one_vcpu(&vcpu, guest_code);
+ vcpu_set_hv_cpuid(vcpu);
+ vcpu_alloc_svm(vm, &nested_gva);
+ vcpu_alloc_hyperv_test_pages(vm, &hv_pages_gva);
+
+ hcall_page = vm_vaddr_alloc_pages(vm, 1);
+ memset(addr_gva2hva(vm, hcall_page), 0x0, getpagesize());
+
+ vcpu_args_set(vcpu, 3, nested_gva, hv_pages_gva, addr_gva2gpa(vm, hcall_page));
+ vcpu_set_msr(vcpu, HV_X64_MSR_VP_INDEX, vcpu->id);
+
+ for (stage = 1;; stage++) {
+ vcpu_run(vcpu);
+ TEST_ASSERT_KVM_EXIT_REASON(vcpu, KVM_EXIT_IO);
+
+ switch (get_ucall(vcpu, &uc)) {
+ case UCALL_ABORT:
+ REPORT_GUEST_ASSERT(uc);
+ /* NOT REACHED */
+ case UCALL_SYNC:
+ break;
+ case UCALL_DONE:
+ goto done;
+ default:
+ TEST_FAIL("Unknown ucall %lu", uc.cmd);
+ }
+
+ /* UCALL_SYNC is handled here. */
+ TEST_ASSERT(!strcmp((const char *)uc.args[0], "hello") &&
+ uc.args[1] == stage, "Stage %d: Unexpected register values vmexit, got %lx",
+ stage, (ulong)uc.args[1]);
+
+ }
+
+done:
+ kvm_vm_free(vm);
+}
diff --git a/tools/testing/selftests/kvm/x86_64/hyperv_tlb_flush.c b/tools/testing/selftests/kvm/x86_64/hyperv_tlb_flush.c
new file mode 100644
index 000000000000..05b56095cf76
--- /dev/null
+++ b/tools/testing/selftests/kvm/x86_64/hyperv_tlb_flush.c
@@ -0,0 +1,682 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Hyper-V HvFlushVirtualAddress{List,Space}{,Ex} tests
+ *
+ * Copyright (C) 2022, Red Hat, Inc.
+ *
+ */
+
+#define _GNU_SOURCE /* for program_invocation_short_name */
+#include <asm/barrier.h>
+#include <pthread.h>
+#include <inttypes.h>
+
+#include "kvm_util.h"
+#include "processor.h"
+#include "hyperv.h"
+#include "test_util.h"
+#include "vmx.h"
+
+#define WORKER_VCPU_ID_1 2
+#define WORKER_VCPU_ID_2 65
+
+#define NTRY 100
+#define NTEST_PAGES 2
+
+struct hv_vpset {
+ u64 format;
+ u64 valid_bank_mask;
+ u64 bank_contents[];
+};
+
+enum HV_GENERIC_SET_FORMAT {
+ HV_GENERIC_SET_SPARSE_4K,
+ HV_GENERIC_SET_ALL,
+};
+
+#define HV_FLUSH_ALL_PROCESSORS BIT(0)
+#define HV_FLUSH_ALL_VIRTUAL_ADDRESS_SPACES BIT(1)
+#define HV_FLUSH_NON_GLOBAL_MAPPINGS_ONLY BIT(2)
+#define HV_FLUSH_USE_EXTENDED_RANGE_FORMAT BIT(3)
+
+/* HvFlushVirtualAddressSpace, HvFlushVirtualAddressList hypercalls */
+struct hv_tlb_flush {
+ u64 address_space;
+ u64 flags;
+ u64 processor_mask;
+ u64 gva_list[];
+} __packed;
+
+/* HvFlushVirtualAddressSpaceEx, HvFlushVirtualAddressListEx hypercalls */
+struct hv_tlb_flush_ex {
+ u64 address_space;
+ u64 flags;
+ struct hv_vpset hv_vp_set;
+ u64 gva_list[];
+} __packed;
+
+/*
+ * Pass the following info to 'workers' and 'sender'
+ * - Hypercall page's GVA
+ * - Hypercall page's GPA
+ * - Test pages GVA
+ * - GVAs of the test pages' PTEs
+ */
+struct test_data {
+ vm_vaddr_t hcall_gva;
+ vm_paddr_t hcall_gpa;
+ vm_vaddr_t test_pages;
+ vm_vaddr_t test_pages_pte[NTEST_PAGES];
+};
+
+/* 'Worker' vCPU code checking the contents of the test page */
+static void worker_guest_code(vm_vaddr_t test_data)
+{
+ struct test_data *data = (struct test_data *)test_data;
+ u32 vcpu_id = rdmsr(HV_X64_MSR_VP_INDEX);
+ void *exp_page = (void *)data->test_pages + PAGE_SIZE * NTEST_PAGES;
+ u64 *this_cpu = (u64 *)(exp_page + vcpu_id * sizeof(u64));
+ u64 expected, val;
+
+ x2apic_enable();
+ wrmsr(HV_X64_MSR_GUEST_OS_ID, HYPERV_LINUX_OS_ID);
+
+ for (;;) {
+ cpu_relax();
+
+ expected = READ_ONCE(*this_cpu);
+
+ /*
+ * Make sure the value in the test page is read after reading
+ * the expectation for the first time. Pairs with wmb() in
+ * prepare_to_test().
+ */
+ rmb();
+
+ val = READ_ONCE(*(u64 *)data->test_pages);
+
+ /*
+ * Make sure the value in the test page is read after before
+ * reading the expectation for the second time. Pairs with wmb()
+ * post_test().
+ */
+ rmb();
+
+ /*
+ * '0' indicates the sender is between iterations, wait until
+ * the sender is ready for this vCPU to start checking again.
+ */
+ if (!expected)
+ continue;
+
+ /*
+ * Re-read the per-vCPU byte to ensure the sender didn't move
+ * onto a new iteration.
+ */
+ if (expected != READ_ONCE(*this_cpu))
+ continue;
+
+ GUEST_ASSERT(val == expected);
+ }
+}
+
+/*
+ * Write per-CPU info indicating what each 'worker' CPU is supposed to see in
+ * test page. '0' means don't check.
+ */
+static void set_expected_val(void *addr, u64 val, int vcpu_id)
+{
+ void *exp_page = addr + PAGE_SIZE * NTEST_PAGES;
+
+ *(u64 *)(exp_page + vcpu_id * sizeof(u64)) = val;
+}
+
+/*
+ * Update PTEs swapping two test pages.
+ * TODO: use swap()/xchg() when these are provided.
+ */
+static void swap_two_test_pages(vm_paddr_t pte_gva1, vm_paddr_t pte_gva2)
+{
+ uint64_t tmp = *(uint64_t *)pte_gva1;
+
+ *(uint64_t *)pte_gva1 = *(uint64_t *)pte_gva2;
+ *(uint64_t *)pte_gva2 = tmp;
+}
+
+/*
+ * TODO: replace the silly NOP loop with a proper udelay() implementation.
+ */
+static inline void do_delay(void)
+{
+ int i;
+
+ for (i = 0; i < 1000000; i++)
+ asm volatile("nop");
+}
+
+/*
+ * Prepare to test: 'disable' workers by setting the expectation to '0',
+ * clear hypercall input page and then swap two test pages.
+ */
+static inline void prepare_to_test(struct test_data *data)
+{
+ /* Clear hypercall input page */
+ memset((void *)data->hcall_gva, 0, PAGE_SIZE);
+
+ /* 'Disable' workers */
+ set_expected_val((void *)data->test_pages, 0x0, WORKER_VCPU_ID_1);
+ set_expected_val((void *)data->test_pages, 0x0, WORKER_VCPU_ID_2);
+
+ /* Make sure workers are 'disabled' before we swap PTEs. */
+ wmb();
+
+ /* Make sure workers have enough time to notice */
+ do_delay();
+
+ /* Swap test page mappings */
+ swap_two_test_pages(data->test_pages_pte[0], data->test_pages_pte[1]);
+}
+
+/*
+ * Finalize the test: check hypercall resule set the expected val for
+ * 'worker' CPUs and give them some time to test.
+ */
+static inline void post_test(struct test_data *data, u64 exp1, u64 exp2)
+{
+ /* Make sure we change the expectation after swapping PTEs */
+ wmb();
+
+ /* Set the expectation for workers, '0' means don't test */
+ set_expected_val((void *)data->test_pages, exp1, WORKER_VCPU_ID_1);
+ set_expected_val((void *)data->test_pages, exp2, WORKER_VCPU_ID_2);
+
+ /* Make sure workers have enough time to test */
+ do_delay();
+}
+
+#define TESTVAL1 0x0101010101010101
+#define TESTVAL2 0x0202020202020202
+
+/* Main vCPU doing the test */
+static void sender_guest_code(vm_vaddr_t test_data)
+{
+ struct test_data *data = (struct test_data *)test_data;
+ struct hv_tlb_flush *flush = (struct hv_tlb_flush *)data->hcall_gva;
+ struct hv_tlb_flush_ex *flush_ex = (struct hv_tlb_flush_ex *)data->hcall_gva;
+ vm_paddr_t hcall_gpa = data->hcall_gpa;
+ int i, stage = 1;
+
+ wrmsr(HV_X64_MSR_GUEST_OS_ID, HYPERV_LINUX_OS_ID);
+ wrmsr(HV_X64_MSR_HYPERCALL, data->hcall_gpa);
+
+ /* "Slow" hypercalls */
+
+ GUEST_SYNC(stage++);
+
+ /* HVCALL_FLUSH_VIRTUAL_ADDRESS_SPACE for WORKER_VCPU_ID_1 */
+ for (i = 0; i < NTRY; i++) {
+ prepare_to_test(data);
+ flush->flags = HV_FLUSH_ALL_VIRTUAL_ADDRESS_SPACES;
+ flush->processor_mask = BIT(WORKER_VCPU_ID_1);
+ hyperv_hypercall(HVCALL_FLUSH_VIRTUAL_ADDRESS_SPACE, hcall_gpa,
+ hcall_gpa + PAGE_SIZE);
+ post_test(data, i % 2 ? TESTVAL1 : TESTVAL2, 0x0);
+ }
+
+ GUEST_SYNC(stage++);
+
+ /* HVCALL_FLUSH_VIRTUAL_ADDRESS_LIST for WORKER_VCPU_ID_1 */
+ for (i = 0; i < NTRY; i++) {
+ prepare_to_test(data);
+ flush->flags = HV_FLUSH_ALL_VIRTUAL_ADDRESS_SPACES;
+ flush->processor_mask = BIT(WORKER_VCPU_ID_1);
+ flush->gva_list[0] = (u64)data->test_pages;
+ hyperv_hypercall(HVCALL_FLUSH_VIRTUAL_ADDRESS_LIST |
+ (1UL << HV_HYPERCALL_REP_COMP_OFFSET),
+ hcall_gpa, hcall_gpa + PAGE_SIZE);
+ post_test(data, i % 2 ? TESTVAL1 : TESTVAL2, 0x0);
+ }
+
+ GUEST_SYNC(stage++);
+
+ /* HVCALL_FLUSH_VIRTUAL_ADDRESS_SPACE for HV_FLUSH_ALL_PROCESSORS */
+ for (i = 0; i < NTRY; i++) {
+ prepare_to_test(data);
+ flush->flags = HV_FLUSH_ALL_VIRTUAL_ADDRESS_SPACES |
+ HV_FLUSH_ALL_PROCESSORS;
+ flush->processor_mask = 0;
+ hyperv_hypercall(HVCALL_FLUSH_VIRTUAL_ADDRESS_SPACE, hcall_gpa,
+ hcall_gpa + PAGE_SIZE);
+ post_test(data, i % 2 ? TESTVAL1 : TESTVAL2, i % 2 ? TESTVAL1 : TESTVAL2);
+ }
+
+ GUEST_SYNC(stage++);
+
+ /* HVCALL_FLUSH_VIRTUAL_ADDRESS_LIST for HV_FLUSH_ALL_PROCESSORS */
+ for (i = 0; i < NTRY; i++) {
+ prepare_to_test(data);
+ flush->flags = HV_FLUSH_ALL_VIRTUAL_ADDRESS_SPACES |
+ HV_FLUSH_ALL_PROCESSORS;
+ flush->gva_list[0] = (u64)data->test_pages;
+ hyperv_hypercall(HVCALL_FLUSH_VIRTUAL_ADDRESS_LIST |
+ (1UL << HV_HYPERCALL_REP_COMP_OFFSET),
+ hcall_gpa, hcall_gpa + PAGE_SIZE);
+ post_test(data, i % 2 ? TESTVAL1 : TESTVAL2,
+ i % 2 ? TESTVAL1 : TESTVAL2);
+ }
+
+ GUEST_SYNC(stage++);
+
+ /* HVCALL_FLUSH_VIRTUAL_ADDRESS_SPACE_EX for WORKER_VCPU_ID_2 */
+ for (i = 0; i < NTRY; i++) {
+ prepare_to_test(data);
+ flush_ex->flags = HV_FLUSH_ALL_VIRTUAL_ADDRESS_SPACES;
+ flush_ex->hv_vp_set.format = HV_GENERIC_SET_SPARSE_4K;
+ flush_ex->hv_vp_set.valid_bank_mask = BIT_ULL(WORKER_VCPU_ID_2 / 64);
+ flush_ex->hv_vp_set.bank_contents[0] = BIT_ULL(WORKER_VCPU_ID_2 % 64);
+ hyperv_hypercall(HVCALL_FLUSH_VIRTUAL_ADDRESS_SPACE_EX |
+ (1 << HV_HYPERCALL_VARHEAD_OFFSET),
+ hcall_gpa, hcall_gpa + PAGE_SIZE);
+ post_test(data, 0x0, i % 2 ? TESTVAL1 : TESTVAL2);
+ }
+
+ GUEST_SYNC(stage++);
+
+ /* HVCALL_FLUSH_VIRTUAL_ADDRESS_LIST_EX for WORKER_VCPU_ID_2 */
+ for (i = 0; i < NTRY; i++) {
+ prepare_to_test(data);
+ flush_ex->flags = HV_FLUSH_ALL_VIRTUAL_ADDRESS_SPACES;
+ flush_ex->hv_vp_set.format = HV_GENERIC_SET_SPARSE_4K;
+ flush_ex->hv_vp_set.valid_bank_mask = BIT_ULL(WORKER_VCPU_ID_2 / 64);
+ flush_ex->hv_vp_set.bank_contents[0] = BIT_ULL(WORKER_VCPU_ID_2 % 64);
+ /* bank_contents and gva_list occupy the same space, thus [1] */
+ flush_ex->gva_list[1] = (u64)data->test_pages;
+ hyperv_hypercall(HVCALL_FLUSH_VIRTUAL_ADDRESS_LIST_EX |
+ (1 << HV_HYPERCALL_VARHEAD_OFFSET) |
+ (1UL << HV_HYPERCALL_REP_COMP_OFFSET),
+ hcall_gpa, hcall_gpa + PAGE_SIZE);
+ post_test(data, 0x0, i % 2 ? TESTVAL1 : TESTVAL2);
+ }
+
+ GUEST_SYNC(stage++);
+
+ /* HVCALL_FLUSH_VIRTUAL_ADDRESS_SPACE_EX for both vCPUs */
+ for (i = 0; i < NTRY; i++) {
+ prepare_to_test(data);
+ flush_ex->flags = HV_FLUSH_ALL_VIRTUAL_ADDRESS_SPACES;
+ flush_ex->hv_vp_set.format = HV_GENERIC_SET_SPARSE_4K;
+ flush_ex->hv_vp_set.valid_bank_mask = BIT_ULL(WORKER_VCPU_ID_2 / 64) |
+ BIT_ULL(WORKER_VCPU_ID_1 / 64);
+ flush_ex->hv_vp_set.bank_contents[0] = BIT_ULL(WORKER_VCPU_ID_1 % 64);
+ flush_ex->hv_vp_set.bank_contents[1] = BIT_ULL(WORKER_VCPU_ID_2 % 64);
+ hyperv_hypercall(HVCALL_FLUSH_VIRTUAL_ADDRESS_SPACE_EX |
+ (2 << HV_HYPERCALL_VARHEAD_OFFSET),
+ hcall_gpa, hcall_gpa + PAGE_SIZE);
+ post_test(data, i % 2 ? TESTVAL1 : TESTVAL2,
+ i % 2 ? TESTVAL1 : TESTVAL2);
+ }
+
+ GUEST_SYNC(stage++);
+
+ /* HVCALL_FLUSH_VIRTUAL_ADDRESS_LIST_EX for both vCPUs */
+ for (i = 0; i < NTRY; i++) {
+ prepare_to_test(data);
+ flush_ex->flags = HV_FLUSH_ALL_VIRTUAL_ADDRESS_SPACES;
+ flush_ex->hv_vp_set.format = HV_GENERIC_SET_SPARSE_4K;
+ flush_ex->hv_vp_set.valid_bank_mask = BIT_ULL(WORKER_VCPU_ID_1 / 64) |
+ BIT_ULL(WORKER_VCPU_ID_2 / 64);
+ flush_ex->hv_vp_set.bank_contents[0] = BIT_ULL(WORKER_VCPU_ID_1 % 64);
+ flush_ex->hv_vp_set.bank_contents[1] = BIT_ULL(WORKER_VCPU_ID_2 % 64);
+ /* bank_contents and gva_list occupy the same space, thus [2] */
+ flush_ex->gva_list[2] = (u64)data->test_pages;
+ hyperv_hypercall(HVCALL_FLUSH_VIRTUAL_ADDRESS_LIST_EX |
+ (2 << HV_HYPERCALL_VARHEAD_OFFSET) |
+ (1UL << HV_HYPERCALL_REP_COMP_OFFSET),
+ hcall_gpa, hcall_gpa + PAGE_SIZE);
+ post_test(data, i % 2 ? TESTVAL1 : TESTVAL2,
+ i % 2 ? TESTVAL1 : TESTVAL2);
+ }
+
+ GUEST_SYNC(stage++);
+
+ /* HVCALL_FLUSH_VIRTUAL_ADDRESS_SPACE_EX for HV_GENERIC_SET_ALL */
+ for (i = 0; i < NTRY; i++) {
+ prepare_to_test(data);
+ flush_ex->flags = HV_FLUSH_ALL_VIRTUAL_ADDRESS_SPACES;
+ flush_ex->hv_vp_set.format = HV_GENERIC_SET_ALL;
+ hyperv_hypercall(HVCALL_FLUSH_VIRTUAL_ADDRESS_SPACE_EX,
+ hcall_gpa, hcall_gpa + PAGE_SIZE);
+ post_test(data, i % 2 ? TESTVAL1 : TESTVAL2,
+ i % 2 ? TESTVAL1 : TESTVAL2);
+ }
+
+ GUEST_SYNC(stage++);
+
+ /* HVCALL_FLUSH_VIRTUAL_ADDRESS_LIST_EX for HV_GENERIC_SET_ALL */
+ for (i = 0; i < NTRY; i++) {
+ prepare_to_test(data);
+ flush_ex->flags = HV_FLUSH_ALL_VIRTUAL_ADDRESS_SPACES;
+ flush_ex->hv_vp_set.format = HV_GENERIC_SET_ALL;
+ flush_ex->gva_list[0] = (u64)data->test_pages;
+ hyperv_hypercall(HVCALL_FLUSH_VIRTUAL_ADDRESS_LIST_EX |
+ (1UL << HV_HYPERCALL_REP_COMP_OFFSET),
+ hcall_gpa, hcall_gpa + PAGE_SIZE);
+ post_test(data, i % 2 ? TESTVAL1 : TESTVAL2,
+ i % 2 ? TESTVAL1 : TESTVAL2);
+ }
+
+ /* "Fast" hypercalls */
+
+ GUEST_SYNC(stage++);
+
+ /* HVCALL_FLUSH_VIRTUAL_ADDRESS_SPACE for WORKER_VCPU_ID_1 */
+ for (i = 0; i < NTRY; i++) {
+ prepare_to_test(data);
+ flush->processor_mask = BIT(WORKER_VCPU_ID_1);
+ hyperv_write_xmm_input(&flush->processor_mask, 1);
+ hyperv_hypercall(HVCALL_FLUSH_VIRTUAL_ADDRESS_SPACE |
+ HV_HYPERCALL_FAST_BIT, 0x0,
+ HV_FLUSH_ALL_VIRTUAL_ADDRESS_SPACES);
+ post_test(data, i % 2 ? TESTVAL1 : TESTVAL2, 0x0);
+ }
+
+ GUEST_SYNC(stage++);
+
+ /* HVCALL_FLUSH_VIRTUAL_ADDRESS_LIST for WORKER_VCPU_ID_1 */
+ for (i = 0; i < NTRY; i++) {
+ prepare_to_test(data);
+ flush->processor_mask = BIT(WORKER_VCPU_ID_1);
+ flush->gva_list[0] = (u64)data->test_pages;
+ hyperv_write_xmm_input(&flush->processor_mask, 1);
+ hyperv_hypercall(HVCALL_FLUSH_VIRTUAL_ADDRESS_LIST |
+ HV_HYPERCALL_FAST_BIT |
+ (1UL << HV_HYPERCALL_REP_COMP_OFFSET),
+ 0x0, HV_FLUSH_ALL_VIRTUAL_ADDRESS_SPACES);
+ post_test(data, i % 2 ? TESTVAL1 : TESTVAL2, 0x0);
+ }
+
+ GUEST_SYNC(stage++);
+
+ /* HVCALL_FLUSH_VIRTUAL_ADDRESS_SPACE for HV_FLUSH_ALL_PROCESSORS */
+ for (i = 0; i < NTRY; i++) {
+ prepare_to_test(data);
+ hyperv_write_xmm_input(&flush->processor_mask, 1);
+ hyperv_hypercall(HVCALL_FLUSH_VIRTUAL_ADDRESS_SPACE |
+ HV_HYPERCALL_FAST_BIT, 0x0,
+ HV_FLUSH_ALL_VIRTUAL_ADDRESS_SPACES |
+ HV_FLUSH_ALL_PROCESSORS);
+ post_test(data, i % 2 ? TESTVAL1 : TESTVAL2,
+ i % 2 ? TESTVAL1 : TESTVAL2);
+ }
+
+ GUEST_SYNC(stage++);
+
+ /* HVCALL_FLUSH_VIRTUAL_ADDRESS_LIST for HV_FLUSH_ALL_PROCESSORS */
+ for (i = 0; i < NTRY; i++) {
+ prepare_to_test(data);
+ flush->gva_list[0] = (u64)data->test_pages;
+ hyperv_write_xmm_input(&flush->processor_mask, 1);
+ hyperv_hypercall(HVCALL_FLUSH_VIRTUAL_ADDRESS_LIST |
+ HV_HYPERCALL_FAST_BIT |
+ (1UL << HV_HYPERCALL_REP_COMP_OFFSET), 0x0,
+ HV_FLUSH_ALL_VIRTUAL_ADDRESS_SPACES |
+ HV_FLUSH_ALL_PROCESSORS);
+ post_test(data, i % 2 ? TESTVAL1 : TESTVAL2,
+ i % 2 ? TESTVAL1 : TESTVAL2);
+ }
+
+ GUEST_SYNC(stage++);
+
+ /* HVCALL_FLUSH_VIRTUAL_ADDRESS_SPACE_EX for WORKER_VCPU_ID_2 */
+ for (i = 0; i < NTRY; i++) {
+ prepare_to_test(data);
+ flush_ex->hv_vp_set.format = HV_GENERIC_SET_SPARSE_4K;
+ flush_ex->hv_vp_set.valid_bank_mask = BIT_ULL(WORKER_VCPU_ID_2 / 64);
+ flush_ex->hv_vp_set.bank_contents[0] = BIT_ULL(WORKER_VCPU_ID_2 % 64);
+ hyperv_write_xmm_input(&flush_ex->hv_vp_set, 2);
+ hyperv_hypercall(HVCALL_FLUSH_VIRTUAL_ADDRESS_SPACE_EX |
+ HV_HYPERCALL_FAST_BIT |
+ (1 << HV_HYPERCALL_VARHEAD_OFFSET),
+ 0x0, HV_FLUSH_ALL_VIRTUAL_ADDRESS_SPACES);
+ post_test(data, 0x0, i % 2 ? TESTVAL1 : TESTVAL2);
+ }
+
+ GUEST_SYNC(stage++);
+
+ /* HVCALL_FLUSH_VIRTUAL_ADDRESS_LIST_EX for WORKER_VCPU_ID_2 */
+ for (i = 0; i < NTRY; i++) {
+ prepare_to_test(data);
+ flush_ex->hv_vp_set.format = HV_GENERIC_SET_SPARSE_4K;
+ flush_ex->hv_vp_set.valid_bank_mask = BIT_ULL(WORKER_VCPU_ID_2 / 64);
+ flush_ex->hv_vp_set.bank_contents[0] = BIT_ULL(WORKER_VCPU_ID_2 % 64);
+ /* bank_contents and gva_list occupy the same space, thus [1] */
+ flush_ex->gva_list[1] = (u64)data->test_pages;
+ hyperv_write_xmm_input(&flush_ex->hv_vp_set, 2);
+ hyperv_hypercall(HVCALL_FLUSH_VIRTUAL_ADDRESS_LIST_EX |
+ HV_HYPERCALL_FAST_BIT |
+ (1 << HV_HYPERCALL_VARHEAD_OFFSET) |
+ (1UL << HV_HYPERCALL_REP_COMP_OFFSET),
+ 0x0, HV_FLUSH_ALL_VIRTUAL_ADDRESS_SPACES);
+ post_test(data, 0x0, i % 2 ? TESTVAL1 : TESTVAL2);
+ }
+
+ GUEST_SYNC(stage++);
+
+ /* HVCALL_FLUSH_VIRTUAL_ADDRESS_SPACE_EX for both vCPUs */
+ for (i = 0; i < NTRY; i++) {
+ prepare_to_test(data);
+ flush_ex->hv_vp_set.format = HV_GENERIC_SET_SPARSE_4K;
+ flush_ex->hv_vp_set.valid_bank_mask = BIT_ULL(WORKER_VCPU_ID_2 / 64) |
+ BIT_ULL(WORKER_VCPU_ID_1 / 64);
+ flush_ex->hv_vp_set.bank_contents[0] = BIT_ULL(WORKER_VCPU_ID_1 % 64);
+ flush_ex->hv_vp_set.bank_contents[1] = BIT_ULL(WORKER_VCPU_ID_2 % 64);
+ hyperv_write_xmm_input(&flush_ex->hv_vp_set, 2);
+ hyperv_hypercall(HVCALL_FLUSH_VIRTUAL_ADDRESS_SPACE_EX |
+ HV_HYPERCALL_FAST_BIT |
+ (2 << HV_HYPERCALL_VARHEAD_OFFSET),
+ 0x0, HV_FLUSH_ALL_VIRTUAL_ADDRESS_SPACES);
+ post_test(data, i % 2 ? TESTVAL1 :
+ TESTVAL2, i % 2 ? TESTVAL1 : TESTVAL2);
+ }
+
+ GUEST_SYNC(stage++);
+
+ /* HVCALL_FLUSH_VIRTUAL_ADDRESS_LIST_EX for both vCPUs */
+ for (i = 0; i < NTRY; i++) {
+ prepare_to_test(data);
+ flush_ex->hv_vp_set.format = HV_GENERIC_SET_SPARSE_4K;
+ flush_ex->hv_vp_set.valid_bank_mask = BIT_ULL(WORKER_VCPU_ID_1 / 64) |
+ BIT_ULL(WORKER_VCPU_ID_2 / 64);
+ flush_ex->hv_vp_set.bank_contents[0] = BIT_ULL(WORKER_VCPU_ID_1 % 64);
+ flush_ex->hv_vp_set.bank_contents[1] = BIT_ULL(WORKER_VCPU_ID_2 % 64);
+ /* bank_contents and gva_list occupy the same space, thus [2] */
+ flush_ex->gva_list[2] = (u64)data->test_pages;
+ hyperv_write_xmm_input(&flush_ex->hv_vp_set, 3);
+ hyperv_hypercall(HVCALL_FLUSH_VIRTUAL_ADDRESS_LIST_EX |
+ HV_HYPERCALL_FAST_BIT |
+ (2 << HV_HYPERCALL_VARHEAD_OFFSET) |
+ (1UL << HV_HYPERCALL_REP_COMP_OFFSET),
+ 0x0, HV_FLUSH_ALL_VIRTUAL_ADDRESS_SPACES);
+ post_test(data, i % 2 ? TESTVAL1 : TESTVAL2,
+ i % 2 ? TESTVAL1 : TESTVAL2);
+ }
+
+ GUEST_SYNC(stage++);
+
+ /* HVCALL_FLUSH_VIRTUAL_ADDRESS_SPACE_EX for HV_GENERIC_SET_ALL */
+ for (i = 0; i < NTRY; i++) {
+ prepare_to_test(data);
+ flush_ex->flags = HV_FLUSH_ALL_VIRTUAL_ADDRESS_SPACES;
+ flush_ex->hv_vp_set.format = HV_GENERIC_SET_ALL;
+ hyperv_write_xmm_input(&flush_ex->hv_vp_set, 2);
+ hyperv_hypercall(HVCALL_FLUSH_VIRTUAL_ADDRESS_SPACE_EX |
+ HV_HYPERCALL_FAST_BIT,
+ 0x0, HV_FLUSH_ALL_VIRTUAL_ADDRESS_SPACES);
+ post_test(data, i % 2 ? TESTVAL1 : TESTVAL2,
+ i % 2 ? TESTVAL1 : TESTVAL2);
+ }
+
+ GUEST_SYNC(stage++);
+
+ /* HVCALL_FLUSH_VIRTUAL_ADDRESS_LIST_EX for HV_GENERIC_SET_ALL */
+ for (i = 0; i < NTRY; i++) {
+ prepare_to_test(data);
+ flush_ex->flags = HV_FLUSH_ALL_VIRTUAL_ADDRESS_SPACES;
+ flush_ex->hv_vp_set.format = HV_GENERIC_SET_ALL;
+ flush_ex->gva_list[0] = (u64)data->test_pages;
+ hyperv_write_xmm_input(&flush_ex->hv_vp_set, 2);
+ hyperv_hypercall(HVCALL_FLUSH_VIRTUAL_ADDRESS_LIST_EX |
+ HV_HYPERCALL_FAST_BIT |
+ (1UL << HV_HYPERCALL_REP_COMP_OFFSET),
+ 0x0, HV_FLUSH_ALL_VIRTUAL_ADDRESS_SPACES);
+ post_test(data, i % 2 ? TESTVAL1 : TESTVAL2,
+ i % 2 ? TESTVAL1 : TESTVAL2);
+ }
+
+ GUEST_DONE();
+}
+
+static void *vcpu_thread(void *arg)
+{
+ struct kvm_vcpu *vcpu = (struct kvm_vcpu *)arg;
+ struct ucall uc;
+ int old;
+ int r;
+
+ r = pthread_setcanceltype(PTHREAD_CANCEL_ASYNCHRONOUS, &old);
+ TEST_ASSERT(!r, "pthread_setcanceltype failed on vcpu_id=%u with errno=%d",
+ vcpu->id, r);
+
+ vcpu_run(vcpu);
+ TEST_ASSERT_KVM_EXIT_REASON(vcpu, KVM_EXIT_IO);
+
+ switch (get_ucall(vcpu, &uc)) {
+ case UCALL_ABORT:
+ REPORT_GUEST_ASSERT(uc);
+ /* NOT REACHED */
+ default:
+ TEST_FAIL("Unexpected ucall %lu, vCPU %d", uc.cmd, vcpu->id);
+ }
+
+ return NULL;
+}
+
+static void cancel_join_vcpu_thread(pthread_t thread, struct kvm_vcpu *vcpu)
+{
+ void *retval;
+ int r;
+
+ r = pthread_cancel(thread);
+ TEST_ASSERT(!r, "pthread_cancel on vcpu_id=%d failed with errno=%d",
+ vcpu->id, r);
+
+ r = pthread_join(thread, &retval);
+ TEST_ASSERT(!r, "pthread_join on vcpu_id=%d failed with errno=%d",
+ vcpu->id, r);
+ TEST_ASSERT(retval == PTHREAD_CANCELED,
+ "expected retval=%p, got %p", PTHREAD_CANCELED,
+ retval);
+}
+
+int main(int argc, char *argv[])
+{
+ struct kvm_vm *vm;
+ struct kvm_vcpu *vcpu[3];
+ pthread_t threads[2];
+ vm_vaddr_t test_data_page, gva;
+ vm_paddr_t gpa;
+ uint64_t *pte;
+ struct test_data *data;
+ struct ucall uc;
+ int stage = 1, r, i;
+
+ TEST_REQUIRE(kvm_has_cap(KVM_CAP_HYPERV_TLBFLUSH));
+
+ vm = vm_create_with_one_vcpu(&vcpu[0], sender_guest_code);
+
+ /* Test data page */
+ test_data_page = vm_vaddr_alloc_page(vm);
+ data = (struct test_data *)addr_gva2hva(vm, test_data_page);
+
+ /* Hypercall input/output */
+ data->hcall_gva = vm_vaddr_alloc_pages(vm, 2);
+ data->hcall_gpa = addr_gva2gpa(vm, data->hcall_gva);
+ memset(addr_gva2hva(vm, data->hcall_gva), 0x0, 2 * PAGE_SIZE);
+
+ /*
+ * Test pages: the first one is filled with '0x01's, the second with '0x02's
+ * and the test will swap their mappings. The third page keeps the indication
+ * about the current state of mappings.
+ */
+ data->test_pages = vm_vaddr_alloc_pages(vm, NTEST_PAGES + 1);
+ for (i = 0; i < NTEST_PAGES; i++)
+ memset(addr_gva2hva(vm, data->test_pages + PAGE_SIZE * i),
+ (u8)(i + 1), PAGE_SIZE);
+ set_expected_val(addr_gva2hva(vm, data->test_pages), 0x0, WORKER_VCPU_ID_1);
+ set_expected_val(addr_gva2hva(vm, data->test_pages), 0x0, WORKER_VCPU_ID_2);
+
+ /*
+ * Get PTE pointers for test pages and map them inside the guest.
+ * Use separate page for each PTE for simplicity.
+ */
+ gva = vm_vaddr_unused_gap(vm, NTEST_PAGES * PAGE_SIZE, KVM_UTIL_MIN_VADDR);
+ for (i = 0; i < NTEST_PAGES; i++) {
+ pte = vm_get_page_table_entry(vm, data->test_pages + i * PAGE_SIZE);
+ gpa = addr_hva2gpa(vm, pte);
+ __virt_pg_map(vm, gva + PAGE_SIZE * i, gpa & PAGE_MASK, PG_LEVEL_4K);
+ data->test_pages_pte[i] = gva + (gpa & ~PAGE_MASK);
+ }
+
+ /*
+ * Sender vCPU which performs the test: swaps test pages, sets expectation
+ * for 'workers' and issues TLB flush hypercalls.
+ */
+ vcpu_args_set(vcpu[0], 1, test_data_page);
+ vcpu_set_hv_cpuid(vcpu[0]);
+
+ /* Create worker vCPUs which check the contents of the test pages */
+ vcpu[1] = vm_vcpu_add(vm, WORKER_VCPU_ID_1, worker_guest_code);
+ vcpu_args_set(vcpu[1], 1, test_data_page);
+ vcpu_set_msr(vcpu[1], HV_X64_MSR_VP_INDEX, WORKER_VCPU_ID_1);
+ vcpu_set_hv_cpuid(vcpu[1]);
+
+ vcpu[2] = vm_vcpu_add(vm, WORKER_VCPU_ID_2, worker_guest_code);
+ vcpu_args_set(vcpu[2], 1, test_data_page);
+ vcpu_set_msr(vcpu[2], HV_X64_MSR_VP_INDEX, WORKER_VCPU_ID_2);
+ vcpu_set_hv_cpuid(vcpu[2]);
+
+ r = pthread_create(&threads[0], NULL, vcpu_thread, vcpu[1]);
+ TEST_ASSERT(!r, "pthread_create() failed");
+
+ r = pthread_create(&threads[1], NULL, vcpu_thread, vcpu[2]);
+ TEST_ASSERT(!r, "pthread_create() failed");
+
+ while (true) {
+ vcpu_run(vcpu[0]);
+ TEST_ASSERT_KVM_EXIT_REASON(vcpu[0], KVM_EXIT_IO);
+
+ switch (get_ucall(vcpu[0], &uc)) {
+ case UCALL_SYNC:
+ TEST_ASSERT(uc.args[1] == stage,
+ "Unexpected stage: %ld (%d expected)",
+ uc.args[1], stage);
+ break;
+ case UCALL_ABORT:
+ REPORT_GUEST_ASSERT(uc);
+ /* NOT REACHED */
+ case UCALL_DONE:
+ goto done;
+ default:
+ TEST_FAIL("Unknown ucall %lu", uc.cmd);
+ }
+
+ stage++;
+ }
+
+done:
+ cancel_join_vcpu_thread(threads[0], vcpu[1]);
+ cancel_join_vcpu_thread(threads[1], vcpu[2]);
+ kvm_vm_free(vm);
+
+ return 0;
+}
diff --git a/tools/testing/selftests/kvm/x86_64/kvm_clock_test.c b/tools/testing/selftests/kvm/x86_64/kvm_clock_test.c
new file mode 100644
index 000000000000..5bc12222d87a
--- /dev/null
+++ b/tools/testing/selftests/kvm/x86_64/kvm_clock_test.c
@@ -0,0 +1,156 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Copyright (C) 2021, Google LLC.
+ *
+ * Tests for adjusting the KVM clock from userspace
+ */
+#include <asm/kvm_para.h>
+#include <asm/pvclock.h>
+#include <asm/pvclock-abi.h>
+#include <stdint.h>
+#include <string.h>
+#include <sys/stat.h>
+#include <time.h>
+
+#include "test_util.h"
+#include "kvm_util.h"
+#include "processor.h"
+
+struct test_case {
+ uint64_t kvmclock_base;
+ int64_t realtime_offset;
+};
+
+static struct test_case test_cases[] = {
+ { .kvmclock_base = 0 },
+ { .kvmclock_base = 180 * NSEC_PER_SEC },
+ { .kvmclock_base = 0, .realtime_offset = -180 * NSEC_PER_SEC },
+ { .kvmclock_base = 0, .realtime_offset = 180 * NSEC_PER_SEC },
+};
+
+#define GUEST_SYNC_CLOCK(__stage, __val) \
+ GUEST_SYNC_ARGS(__stage, __val, 0, 0, 0)
+
+static void guest_main(vm_paddr_t pvti_pa, struct pvclock_vcpu_time_info *pvti)
+{
+ int i;
+
+ wrmsr(MSR_KVM_SYSTEM_TIME_NEW, pvti_pa | KVM_MSR_ENABLED);
+ for (i = 0; i < ARRAY_SIZE(test_cases); i++)
+ GUEST_SYNC_CLOCK(i, __pvclock_read_cycles(pvti, rdtsc()));
+}
+
+#define EXPECTED_FLAGS (KVM_CLOCK_REALTIME | KVM_CLOCK_HOST_TSC)
+
+static inline void assert_flags(struct kvm_clock_data *data)
+{
+ TEST_ASSERT((data->flags & EXPECTED_FLAGS) == EXPECTED_FLAGS,
+ "unexpected clock data flags: %x (want set: %x)",
+ data->flags, EXPECTED_FLAGS);
+}
+
+static void handle_sync(struct ucall *uc, struct kvm_clock_data *start,
+ struct kvm_clock_data *end)
+{
+ uint64_t obs, exp_lo, exp_hi;
+
+ obs = uc->args[2];
+ exp_lo = start->clock;
+ exp_hi = end->clock;
+
+ assert_flags(start);
+ assert_flags(end);
+
+ TEST_ASSERT(exp_lo <= obs && obs <= exp_hi,
+ "unexpected kvm-clock value: %"PRIu64" expected range: [%"PRIu64", %"PRIu64"]",
+ obs, exp_lo, exp_hi);
+
+ pr_info("kvm-clock value: %"PRIu64" expected range [%"PRIu64", %"PRIu64"]\n",
+ obs, exp_lo, exp_hi);
+}
+
+static void handle_abort(struct ucall *uc)
+{
+ REPORT_GUEST_ASSERT(*uc);
+}
+
+static void setup_clock(struct kvm_vm *vm, struct test_case *test_case)
+{
+ struct kvm_clock_data data;
+
+ memset(&data, 0, sizeof(data));
+
+ data.clock = test_case->kvmclock_base;
+ if (test_case->realtime_offset) {
+ struct timespec ts;
+ int r;
+
+ data.flags |= KVM_CLOCK_REALTIME;
+ do {
+ r = clock_gettime(CLOCK_REALTIME, &ts);
+ if (!r)
+ break;
+ } while (errno == EINTR);
+
+ TEST_ASSERT(!r, "clock_gettime() failed: %d", r);
+
+ data.realtime = ts.tv_sec * NSEC_PER_SEC;
+ data.realtime += ts.tv_nsec;
+ data.realtime += test_case->realtime_offset;
+ }
+
+ vm_ioctl(vm, KVM_SET_CLOCK, &data);
+}
+
+static void enter_guest(struct kvm_vcpu *vcpu)
+{
+ struct kvm_clock_data start, end;
+ struct kvm_vm *vm = vcpu->vm;
+ struct ucall uc;
+ int i;
+
+ for (i = 0; i < ARRAY_SIZE(test_cases); i++) {
+ setup_clock(vm, &test_cases[i]);
+
+ vm_ioctl(vm, KVM_GET_CLOCK, &start);
+
+ vcpu_run(vcpu);
+ vm_ioctl(vm, KVM_GET_CLOCK, &end);
+
+ TEST_ASSERT_KVM_EXIT_REASON(vcpu, KVM_EXIT_IO);
+
+ switch (get_ucall(vcpu, &uc)) {
+ case UCALL_SYNC:
+ handle_sync(&uc, &start, &end);
+ break;
+ case UCALL_ABORT:
+ handle_abort(&uc);
+ return;
+ default:
+ TEST_ASSERT(0, "unhandled ucall: %ld", uc.cmd);
+ }
+ }
+}
+
+int main(void)
+{
+ struct kvm_vcpu *vcpu;
+ vm_vaddr_t pvti_gva;
+ vm_paddr_t pvti_gpa;
+ struct kvm_vm *vm;
+ int flags;
+
+ flags = kvm_check_cap(KVM_CAP_ADJUST_CLOCK);
+ TEST_REQUIRE(flags & KVM_CLOCK_REALTIME);
+
+ TEST_REQUIRE(sys_clocksource_is_based_on_tsc());
+
+ vm = vm_create_with_one_vcpu(&vcpu, guest_main);
+
+ pvti_gva = vm_vaddr_alloc(vm, getpagesize(), 0x10000);
+ pvti_gpa = addr_gva2gpa(vm, pvti_gva);
+ vcpu_args_set(vcpu, 2, pvti_gpa, pvti_gva);
+
+ enter_guest(vcpu);
+ kvm_vm_free(vm);
+}
diff --git a/tools/testing/selftests/kvm/x86_64/kvm_pv_test.c b/tools/testing/selftests/kvm/x86_64/kvm_pv_test.c
new file mode 100644
index 000000000000..9e2879af7c20
--- /dev/null
+++ b/tools/testing/selftests/kvm/x86_64/kvm_pv_test.c
@@ -0,0 +1,154 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Copyright (C) 2020, Google LLC.
+ *
+ * Tests for KVM paravirtual feature disablement
+ */
+#include <asm/kvm_para.h>
+#include <linux/kvm_para.h>
+#include <stdint.h>
+
+#include "test_util.h"
+#include "kvm_util.h"
+#include "processor.h"
+
+struct msr_data {
+ uint32_t idx;
+ const char *name;
+};
+
+#define TEST_MSR(msr) { .idx = msr, .name = #msr }
+#define UCALL_PR_MSR 0xdeadbeef
+#define PR_MSR(msr) ucall(UCALL_PR_MSR, 1, msr)
+
+/*
+ * KVM paravirtual msrs to test. Expect a #GP if any of these msrs are read or
+ * written, as the KVM_CPUID_FEATURES leaf is cleared.
+ */
+static struct msr_data msrs_to_test[] = {
+ TEST_MSR(MSR_KVM_SYSTEM_TIME),
+ TEST_MSR(MSR_KVM_SYSTEM_TIME_NEW),
+ TEST_MSR(MSR_KVM_WALL_CLOCK),
+ TEST_MSR(MSR_KVM_WALL_CLOCK_NEW),
+ TEST_MSR(MSR_KVM_ASYNC_PF_EN),
+ TEST_MSR(MSR_KVM_STEAL_TIME),
+ TEST_MSR(MSR_KVM_PV_EOI_EN),
+ TEST_MSR(MSR_KVM_POLL_CONTROL),
+ TEST_MSR(MSR_KVM_ASYNC_PF_INT),
+ TEST_MSR(MSR_KVM_ASYNC_PF_ACK),
+};
+
+static void test_msr(struct msr_data *msr)
+{
+ uint64_t ignored;
+ uint8_t vector;
+
+ PR_MSR(msr);
+
+ vector = rdmsr_safe(msr->idx, &ignored);
+ GUEST_ASSERT_EQ(vector, GP_VECTOR);
+
+ vector = wrmsr_safe(msr->idx, 0);
+ GUEST_ASSERT_EQ(vector, GP_VECTOR);
+}
+
+struct hcall_data {
+ uint64_t nr;
+ const char *name;
+};
+
+#define TEST_HCALL(hc) { .nr = hc, .name = #hc }
+#define UCALL_PR_HCALL 0xdeadc0de
+#define PR_HCALL(hc) ucall(UCALL_PR_HCALL, 1, hc)
+
+/*
+ * KVM hypercalls to test. Expect -KVM_ENOSYS when called, as the corresponding
+ * features have been cleared in KVM_CPUID_FEATURES.
+ */
+static struct hcall_data hcalls_to_test[] = {
+ TEST_HCALL(KVM_HC_KICK_CPU),
+ TEST_HCALL(KVM_HC_SEND_IPI),
+ TEST_HCALL(KVM_HC_SCHED_YIELD),
+};
+
+static void test_hcall(struct hcall_data *hc)
+{
+ uint64_t r;
+
+ PR_HCALL(hc);
+ r = kvm_hypercall(hc->nr, 0, 0, 0, 0);
+ GUEST_ASSERT_EQ(r, -KVM_ENOSYS);
+}
+
+static void guest_main(void)
+{
+ int i;
+
+ for (i = 0; i < ARRAY_SIZE(msrs_to_test); i++) {
+ test_msr(&msrs_to_test[i]);
+ }
+
+ for (i = 0; i < ARRAY_SIZE(hcalls_to_test); i++) {
+ test_hcall(&hcalls_to_test[i]);
+ }
+
+ GUEST_DONE();
+}
+
+static void pr_msr(struct ucall *uc)
+{
+ struct msr_data *msr = (struct msr_data *)uc->args[0];
+
+ pr_info("testing msr: %s (%#x)\n", msr->name, msr->idx);
+}
+
+static void pr_hcall(struct ucall *uc)
+{
+ struct hcall_data *hc = (struct hcall_data *)uc->args[0];
+
+ pr_info("testing hcall: %s (%lu)\n", hc->name, hc->nr);
+}
+
+static void enter_guest(struct kvm_vcpu *vcpu)
+{
+ struct ucall uc;
+
+ while (true) {
+ vcpu_run(vcpu);
+ TEST_ASSERT_KVM_EXIT_REASON(vcpu, KVM_EXIT_IO);
+
+ switch (get_ucall(vcpu, &uc)) {
+ case UCALL_PR_MSR:
+ pr_msr(&uc);
+ break;
+ case UCALL_PR_HCALL:
+ pr_hcall(&uc);
+ break;
+ case UCALL_ABORT:
+ REPORT_GUEST_ASSERT(uc);
+ return;
+ case UCALL_DONE:
+ return;
+ }
+ }
+}
+
+int main(void)
+{
+ struct kvm_vcpu *vcpu;
+ struct kvm_vm *vm;
+
+ TEST_REQUIRE(kvm_has_cap(KVM_CAP_ENFORCE_PV_FEATURE_CPUID));
+
+ vm = vm_create_with_one_vcpu(&vcpu, guest_main);
+
+ vcpu_enable_cap(vcpu, KVM_CAP_ENFORCE_PV_FEATURE_CPUID, 1);
+
+ vcpu_clear_cpuid_entry(vcpu, KVM_CPUID_FEATURES);
+
+ vm_init_descriptor_tables(vm);
+ vcpu_init_descriptor_tables(vcpu);
+
+ enter_guest(vcpu);
+ kvm_vm_free(vm);
+}
diff --git a/tools/testing/selftests/kvm/x86_64/max_vcpuid_cap_test.c b/tools/testing/selftests/kvm/x86_64/max_vcpuid_cap_test.c
new file mode 100644
index 000000000000..3cc4b86832fe
--- /dev/null
+++ b/tools/testing/selftests/kvm/x86_64/max_vcpuid_cap_test.c
@@ -0,0 +1,44 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * maximum APIC ID capability tests
+ *
+ * Copyright (C) 2022, Intel, Inc.
+ *
+ * Tests for getting/setting maximum APIC ID capability
+ */
+
+#include "kvm_util.h"
+
+#define MAX_VCPU_ID 2
+
+int main(int argc, char *argv[])
+{
+ struct kvm_vm *vm;
+ int ret;
+
+ vm = vm_create_barebones();
+
+ /* Get KVM_CAP_MAX_VCPU_ID cap supported in KVM */
+ ret = vm_check_cap(vm, KVM_CAP_MAX_VCPU_ID);
+
+ /* Try to set KVM_CAP_MAX_VCPU_ID beyond KVM cap */
+ ret = __vm_enable_cap(vm, KVM_CAP_MAX_VCPU_ID, ret + 1);
+ TEST_ASSERT(ret < 0,
+ "Setting KVM_CAP_MAX_VCPU_ID beyond KVM cap should fail");
+
+ /* Set KVM_CAP_MAX_VCPU_ID */
+ vm_enable_cap(vm, KVM_CAP_MAX_VCPU_ID, MAX_VCPU_ID);
+
+
+ /* Try to set KVM_CAP_MAX_VCPU_ID again */
+ ret = __vm_enable_cap(vm, KVM_CAP_MAX_VCPU_ID, MAX_VCPU_ID + 1);
+ TEST_ASSERT(ret < 0,
+ "Setting KVM_CAP_MAX_VCPU_ID multiple times should fail");
+
+ /* Create vCPU with id beyond KVM_CAP_MAX_VCPU_ID cap*/
+ ret = __vm_ioctl(vm, KVM_CREATE_VCPU, (void *)MAX_VCPU_ID);
+ TEST_ASSERT(ret < 0, "Creating vCPU with ID > MAX_VCPU_ID should fail");
+
+ kvm_vm_free(vm);
+ return 0;
+}
diff --git a/tools/testing/selftests/kvm/x86_64/mmio_warning_test.c b/tools/testing/selftests/kvm/x86_64/mmio_warning_test.c
deleted file mode 100644
index e6480fd5c4bd..000000000000
--- a/tools/testing/selftests/kvm/x86_64/mmio_warning_test.c
+++ /dev/null
@@ -1,126 +0,0 @@
-/*
- * mmio_warning_test
- *
- * Copyright (C) 2019, Google LLC.
- *
- * This work is licensed under the terms of the GNU GPL, version 2.
- *
- * Test that we don't get a kernel warning when we call KVM_RUN after a
- * triple fault occurs. To get the triple fault to occur we call KVM_RUN
- * on a VCPU that hasn't been properly setup.
- *
- */
-
-#define _GNU_SOURCE
-#include <fcntl.h>
-#include <kvm_util.h>
-#include <linux/kvm.h>
-#include <processor.h>
-#include <pthread.h>
-#include <stdio.h>
-#include <stdlib.h>
-#include <string.h>
-#include <sys/ioctl.h>
-#include <sys/mman.h>
-#include <sys/stat.h>
-#include <sys/types.h>
-#include <sys/wait.h>
-#include <test_util.h>
-#include <unistd.h>
-
-#define NTHREAD 4
-#define NPROCESS 5
-
-struct thread_context {
- int kvmcpu;
- struct kvm_run *run;
-};
-
-void *thr(void *arg)
-{
- struct thread_context *tc = (struct thread_context *)arg;
- int res;
- int kvmcpu = tc->kvmcpu;
- struct kvm_run *run = tc->run;
-
- res = ioctl(kvmcpu, KVM_RUN, 0);
- pr_info("ret1=%d exit_reason=%d suberror=%d\n",
- res, run->exit_reason, run->internal.suberror);
-
- return 0;
-}
-
-void test(void)
-{
- int i, kvm, kvmvm, kvmcpu;
- pthread_t th[NTHREAD];
- struct kvm_run *run;
- struct thread_context tc;
-
- kvm = open("/dev/kvm", O_RDWR);
- TEST_ASSERT(kvm != -1, "failed to open /dev/kvm");
- kvmvm = ioctl(kvm, KVM_CREATE_VM, 0);
- TEST_ASSERT(kvmvm != -1, "KVM_CREATE_VM failed");
- kvmcpu = ioctl(kvmvm, KVM_CREATE_VCPU, 0);
- TEST_ASSERT(kvmcpu != -1, "KVM_CREATE_VCPU failed");
- run = (struct kvm_run *)mmap(0, 4096, PROT_READ|PROT_WRITE, MAP_SHARED,
- kvmcpu, 0);
- tc.kvmcpu = kvmcpu;
- tc.run = run;
- srand(getpid());
- for (i = 0; i < NTHREAD; i++) {
- pthread_create(&th[i], NULL, thr, (void *)(uintptr_t)&tc);
- usleep(rand() % 10000);
- }
- for (i = 0; i < NTHREAD; i++)
- pthread_join(th[i], NULL);
-}
-
-int get_warnings_count(void)
-{
- int warnings;
- FILE *f;
-
- f = popen("dmesg | grep \"WARNING:\" | wc -l", "r");
- fscanf(f, "%d", &warnings);
- fclose(f);
-
- return warnings;
-}
-
-int main(void)
-{
- int warnings_before, warnings_after;
-
- if (!is_intel_cpu()) {
- print_skip("Must be run on an Intel CPU");
- exit(KSFT_SKIP);
- }
-
- if (vm_is_unrestricted_guest(NULL)) {
- print_skip("Unrestricted guest must be disabled");
- exit(KSFT_SKIP);
- }
-
- warnings_before = get_warnings_count();
-
- for (int i = 0; i < NPROCESS; ++i) {
- int status;
- int pid = fork();
-
- if (pid < 0)
- exit(1);
- if (pid == 0) {
- test();
- exit(0);
- }
- while (waitpid(pid, &status, __WALL) != pid)
- ;
- }
-
- warnings_after = get_warnings_count();
- TEST_ASSERT(warnings_before == warnings_after,
- "Warnings found in kernel. Run 'dmesg' to inspect them.");
-
- return 0;
-}
diff --git a/tools/testing/selftests/kvm/x86_64/monitor_mwait_test.c b/tools/testing/selftests/kvm/x86_64/monitor_mwait_test.c
new file mode 100644
index 000000000000..853802641e1e
--- /dev/null
+++ b/tools/testing/selftests/kvm/x86_64/monitor_mwait_test.c
@@ -0,0 +1,131 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <fcntl.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <sys/ioctl.h>
+
+#include "kvm_util.h"
+#include "processor.h"
+
+#define CPUID_MWAIT (1u << 3)
+
+enum monitor_mwait_testcases {
+ MWAIT_QUIRK_DISABLED = BIT(0),
+ MISC_ENABLES_QUIRK_DISABLED = BIT(1),
+ MWAIT_DISABLED = BIT(2),
+};
+
+/*
+ * If both MWAIT and its quirk are disabled, MONITOR/MWAIT should #UD, in all
+ * other scenarios KVM should emulate them as nops.
+ */
+#define GUEST_ASSERT_MONITOR_MWAIT(insn, testcase, vector) \
+do { \
+ bool fault_wanted = ((testcase) & MWAIT_QUIRK_DISABLED) && \
+ ((testcase) & MWAIT_DISABLED); \
+ \
+ if (fault_wanted) \
+ __GUEST_ASSERT((vector) == UD_VECTOR, \
+ "Expected #UD on " insn " for testcase '0x%x', got '0x%x'", \
+ testcase, vector); \
+ else \
+ __GUEST_ASSERT(!(vector), \
+ "Expected success on " insn " for testcase '0x%x', got '0x%x'", \
+ testcase, vector); \
+} while (0)
+
+static void guest_monitor_wait(int testcase)
+{
+ u8 vector;
+
+ GUEST_SYNC(testcase);
+
+ /*
+ * Arbitrarily MONITOR this function, SVM performs fault checks before
+ * intercept checks, so the inputs for MONITOR and MWAIT must be valid.
+ */
+ vector = kvm_asm_safe("monitor", "a"(guest_monitor_wait), "c"(0), "d"(0));
+ GUEST_ASSERT_MONITOR_MWAIT("MONITOR", testcase, vector);
+
+ vector = kvm_asm_safe("mwait", "a"(guest_monitor_wait), "c"(0), "d"(0));
+ GUEST_ASSERT_MONITOR_MWAIT("MWAIT", testcase, vector);
+}
+
+static void guest_code(void)
+{
+ guest_monitor_wait(MWAIT_DISABLED);
+
+ guest_monitor_wait(MWAIT_QUIRK_DISABLED | MWAIT_DISABLED);
+
+ guest_monitor_wait(MISC_ENABLES_QUIRK_DISABLED | MWAIT_DISABLED);
+ guest_monitor_wait(MISC_ENABLES_QUIRK_DISABLED);
+
+ guest_monitor_wait(MISC_ENABLES_QUIRK_DISABLED | MWAIT_QUIRK_DISABLED | MWAIT_DISABLED);
+ guest_monitor_wait(MISC_ENABLES_QUIRK_DISABLED | MWAIT_QUIRK_DISABLED);
+
+ GUEST_DONE();
+}
+
+int main(int argc, char *argv[])
+{
+ uint64_t disabled_quirks;
+ struct kvm_vcpu *vcpu;
+ struct kvm_vm *vm;
+ struct ucall uc;
+ int testcase;
+
+ TEST_REQUIRE(kvm_has_cap(KVM_CAP_DISABLE_QUIRKS2));
+
+ vm = vm_create_with_one_vcpu(&vcpu, guest_code);
+ vcpu_clear_cpuid_feature(vcpu, X86_FEATURE_MWAIT);
+
+ vm_init_descriptor_tables(vm);
+ vcpu_init_descriptor_tables(vcpu);
+
+ while (1) {
+ vcpu_run(vcpu);
+ TEST_ASSERT_KVM_EXIT_REASON(vcpu, KVM_EXIT_IO);
+
+ switch (get_ucall(vcpu, &uc)) {
+ case UCALL_SYNC:
+ testcase = uc.args[1];
+ break;
+ case UCALL_ABORT:
+ REPORT_GUEST_ASSERT(uc);
+ goto done;
+ case UCALL_DONE:
+ goto done;
+ default:
+ TEST_FAIL("Unknown ucall %lu", uc.cmd);
+ goto done;
+ }
+
+ disabled_quirks = 0;
+ if (testcase & MWAIT_QUIRK_DISABLED)
+ disabled_quirks |= KVM_X86_QUIRK_MWAIT_NEVER_UD_FAULTS;
+ if (testcase & MISC_ENABLES_QUIRK_DISABLED)
+ disabled_quirks |= KVM_X86_QUIRK_MISC_ENABLE_NO_MWAIT;
+ vm_enable_cap(vm, KVM_CAP_DISABLE_QUIRKS2, disabled_quirks);
+
+ /*
+ * If the MISC_ENABLES quirk (KVM neglects to update CPUID to
+ * enable/disable MWAIT) is disabled, toggle the ENABLE_MWAIT
+ * bit in MISC_ENABLES accordingly. If the quirk is enabled,
+ * the only valid configuration is MWAIT disabled, as CPUID
+ * can't be manually changed after running the vCPU.
+ */
+ if (!(testcase & MISC_ENABLES_QUIRK_DISABLED)) {
+ TEST_ASSERT(testcase & MWAIT_DISABLED,
+ "Can't toggle CPUID features after running vCPU");
+ continue;
+ }
+
+ vcpu_set_msr(vcpu, MSR_IA32_MISC_ENABLE,
+ (testcase & MWAIT_DISABLED) ? 0 : MSR_IA32_MISC_ENABLE_MWAIT);
+ }
+
+done:
+ kvm_vm_free(vm);
+ return 0;
+}
diff --git a/tools/testing/selftests/kvm/x86_64/nested_exceptions_test.c b/tools/testing/selftests/kvm/x86_64/nested_exceptions_test.c
new file mode 100644
index 000000000000..3670331adf21
--- /dev/null
+++ b/tools/testing/selftests/kvm/x86_64/nested_exceptions_test.c
@@ -0,0 +1,290 @@
+// SPDX-License-Identifier: GPL-2.0-only
+#define _GNU_SOURCE /* for program_invocation_short_name */
+
+#include "test_util.h"
+#include "kvm_util.h"
+#include "processor.h"
+#include "vmx.h"
+#include "svm_util.h"
+
+#define L2_GUEST_STACK_SIZE 256
+
+/*
+ * Arbitrary, never shoved into KVM/hardware, just need to avoid conflict with
+ * the "real" exceptions used, #SS/#GP/#DF (12/13/8).
+ */
+#define FAKE_TRIPLE_FAULT_VECTOR 0xaa
+
+/* Arbitrary 32-bit error code injected by this test. */
+#define SS_ERROR_CODE 0xdeadbeef
+
+/*
+ * Bit '0' is set on Intel if the exception occurs while delivering a previous
+ * event/exception. AMD's wording is ambiguous, but presumably the bit is set
+ * if the exception occurs while delivering an external event, e.g. NMI or INTR,
+ * but not for exceptions that occur when delivering other exceptions or
+ * software interrupts.
+ *
+ * Note, Intel's name for it, "External event", is misleading and much more
+ * aligned with AMD's behavior, but the SDM is quite clear on its behavior.
+ */
+#define ERROR_CODE_EXT_FLAG BIT(0)
+
+/*
+ * Bit '1' is set if the fault occurred when looking up a descriptor in the
+ * IDT, which is the case here as the IDT is empty/NULL.
+ */
+#define ERROR_CODE_IDT_FLAG BIT(1)
+
+/*
+ * The #GP that occurs when vectoring #SS should show the index into the IDT
+ * for #SS, plus have the "IDT flag" set.
+ */
+#define GP_ERROR_CODE_AMD ((SS_VECTOR * 8) | ERROR_CODE_IDT_FLAG)
+#define GP_ERROR_CODE_INTEL ((SS_VECTOR * 8) | ERROR_CODE_IDT_FLAG | ERROR_CODE_EXT_FLAG)
+
+/*
+ * Intel and AMD both shove '0' into the error code on #DF, regardless of what
+ * led to the double fault.
+ */
+#define DF_ERROR_CODE 0
+
+#define INTERCEPT_SS (BIT_ULL(SS_VECTOR))
+#define INTERCEPT_SS_DF (INTERCEPT_SS | BIT_ULL(DF_VECTOR))
+#define INTERCEPT_SS_GP_DF (INTERCEPT_SS_DF | BIT_ULL(GP_VECTOR))
+
+static void l2_ss_pending_test(void)
+{
+ GUEST_SYNC(SS_VECTOR);
+}
+
+static void l2_ss_injected_gp_test(void)
+{
+ GUEST_SYNC(GP_VECTOR);
+}
+
+static void l2_ss_injected_df_test(void)
+{
+ GUEST_SYNC(DF_VECTOR);
+}
+
+static void l2_ss_injected_tf_test(void)
+{
+ GUEST_SYNC(FAKE_TRIPLE_FAULT_VECTOR);
+}
+
+static void svm_run_l2(struct svm_test_data *svm, void *l2_code, int vector,
+ uint32_t error_code)
+{
+ struct vmcb *vmcb = svm->vmcb;
+ struct vmcb_control_area *ctrl = &vmcb->control;
+
+ vmcb->save.rip = (u64)l2_code;
+ run_guest(vmcb, svm->vmcb_gpa);
+
+ if (vector == FAKE_TRIPLE_FAULT_VECTOR)
+ return;
+
+ GUEST_ASSERT_EQ(ctrl->exit_code, (SVM_EXIT_EXCP_BASE + vector));
+ GUEST_ASSERT_EQ(ctrl->exit_info_1, error_code);
+}
+
+static void l1_svm_code(struct svm_test_data *svm)
+{
+ struct vmcb_control_area *ctrl = &svm->vmcb->control;
+ unsigned long l2_guest_stack[L2_GUEST_STACK_SIZE];
+
+ generic_svm_setup(svm, NULL, &l2_guest_stack[L2_GUEST_STACK_SIZE]);
+ svm->vmcb->save.idtr.limit = 0;
+ ctrl->intercept |= BIT_ULL(INTERCEPT_SHUTDOWN);
+
+ ctrl->intercept_exceptions = INTERCEPT_SS_GP_DF;
+ svm_run_l2(svm, l2_ss_pending_test, SS_VECTOR, SS_ERROR_CODE);
+ svm_run_l2(svm, l2_ss_injected_gp_test, GP_VECTOR, GP_ERROR_CODE_AMD);
+
+ ctrl->intercept_exceptions = INTERCEPT_SS_DF;
+ svm_run_l2(svm, l2_ss_injected_df_test, DF_VECTOR, DF_ERROR_CODE);
+
+ ctrl->intercept_exceptions = INTERCEPT_SS;
+ svm_run_l2(svm, l2_ss_injected_tf_test, FAKE_TRIPLE_FAULT_VECTOR, 0);
+ GUEST_ASSERT_EQ(ctrl->exit_code, SVM_EXIT_SHUTDOWN);
+
+ GUEST_DONE();
+}
+
+static void vmx_run_l2(void *l2_code, int vector, uint32_t error_code)
+{
+ GUEST_ASSERT(!vmwrite(GUEST_RIP, (u64)l2_code));
+
+ GUEST_ASSERT_EQ(vector == SS_VECTOR ? vmlaunch() : vmresume(), 0);
+
+ if (vector == FAKE_TRIPLE_FAULT_VECTOR)
+ return;
+
+ GUEST_ASSERT_EQ(vmreadz(VM_EXIT_REASON), EXIT_REASON_EXCEPTION_NMI);
+ GUEST_ASSERT_EQ((vmreadz(VM_EXIT_INTR_INFO) & 0xff), vector);
+ GUEST_ASSERT_EQ(vmreadz(VM_EXIT_INTR_ERROR_CODE), error_code);
+}
+
+static void l1_vmx_code(struct vmx_pages *vmx)
+{
+ unsigned long l2_guest_stack[L2_GUEST_STACK_SIZE];
+
+ GUEST_ASSERT_EQ(prepare_for_vmx_operation(vmx), true);
+
+ GUEST_ASSERT_EQ(load_vmcs(vmx), true);
+
+ prepare_vmcs(vmx, NULL, &l2_guest_stack[L2_GUEST_STACK_SIZE]);
+ GUEST_ASSERT_EQ(vmwrite(GUEST_IDTR_LIMIT, 0), 0);
+
+ /*
+ * VMX disallows injecting an exception with error_code[31:16] != 0,
+ * and hardware will never generate a VM-Exit with bits 31:16 set.
+ * KVM should likewise truncate the "bad" userspace value.
+ */
+ GUEST_ASSERT_EQ(vmwrite(EXCEPTION_BITMAP, INTERCEPT_SS_GP_DF), 0);
+ vmx_run_l2(l2_ss_pending_test, SS_VECTOR, (u16)SS_ERROR_CODE);
+ vmx_run_l2(l2_ss_injected_gp_test, GP_VECTOR, GP_ERROR_CODE_INTEL);
+
+ GUEST_ASSERT_EQ(vmwrite(EXCEPTION_BITMAP, INTERCEPT_SS_DF), 0);
+ vmx_run_l2(l2_ss_injected_df_test, DF_VECTOR, DF_ERROR_CODE);
+
+ GUEST_ASSERT_EQ(vmwrite(EXCEPTION_BITMAP, INTERCEPT_SS), 0);
+ vmx_run_l2(l2_ss_injected_tf_test, FAKE_TRIPLE_FAULT_VECTOR, 0);
+ GUEST_ASSERT_EQ(vmreadz(VM_EXIT_REASON), EXIT_REASON_TRIPLE_FAULT);
+
+ GUEST_DONE();
+}
+
+static void __attribute__((__flatten__)) l1_guest_code(void *test_data)
+{
+ if (this_cpu_has(X86_FEATURE_SVM))
+ l1_svm_code(test_data);
+ else
+ l1_vmx_code(test_data);
+}
+
+static void assert_ucall_vector(struct kvm_vcpu *vcpu, int vector)
+{
+ struct ucall uc;
+
+ TEST_ASSERT_KVM_EXIT_REASON(vcpu, KVM_EXIT_IO);
+
+ switch (get_ucall(vcpu, &uc)) {
+ case UCALL_SYNC:
+ TEST_ASSERT(vector == uc.args[1],
+ "Expected L2 to ask for %d, got %ld", vector, uc.args[1]);
+ break;
+ case UCALL_DONE:
+ TEST_ASSERT(vector == -1,
+ "Expected L2 to ask for %d, L2 says it's done", vector);
+ break;
+ case UCALL_ABORT:
+ REPORT_GUEST_ASSERT(uc);
+ break;
+ default:
+ TEST_FAIL("Expected L2 to ask for %d, got unexpected ucall %lu", vector, uc.cmd);
+ }
+}
+
+static void queue_ss_exception(struct kvm_vcpu *vcpu, bool inject)
+{
+ struct kvm_vcpu_events events;
+
+ vcpu_events_get(vcpu, &events);
+
+ TEST_ASSERT(!events.exception.pending,
+ "Vector %d unexpectedlt pending", events.exception.nr);
+ TEST_ASSERT(!events.exception.injected,
+ "Vector %d unexpectedly injected", events.exception.nr);
+
+ events.flags = KVM_VCPUEVENT_VALID_PAYLOAD;
+ events.exception.pending = !inject;
+ events.exception.injected = inject;
+ events.exception.nr = SS_VECTOR;
+ events.exception.has_error_code = true;
+ events.exception.error_code = SS_ERROR_CODE;
+ vcpu_events_set(vcpu, &events);
+}
+
+/*
+ * Verify KVM_{G,S}ET_EVENTS play nice with pending vs. injected exceptions
+ * when an exception is being queued for L2. Specifically, verify that KVM
+ * honors L1 exception intercept controls when a #SS is pending/injected,
+ * triggers a #GP on vectoring the #SS, morphs to #DF if #GP isn't intercepted
+ * by L1, and finally causes (nested) SHUTDOWN if #DF isn't intercepted by L1.
+ */
+int main(int argc, char *argv[])
+{
+ vm_vaddr_t nested_test_data_gva;
+ struct kvm_vcpu_events events;
+ struct kvm_vcpu *vcpu;
+ struct kvm_vm *vm;
+
+ TEST_REQUIRE(kvm_has_cap(KVM_CAP_EXCEPTION_PAYLOAD));
+ TEST_REQUIRE(kvm_cpu_has(X86_FEATURE_SVM) || kvm_cpu_has(X86_FEATURE_VMX));
+
+ vm = vm_create_with_one_vcpu(&vcpu, l1_guest_code);
+ vm_enable_cap(vm, KVM_CAP_EXCEPTION_PAYLOAD, -2ul);
+
+ if (kvm_cpu_has(X86_FEATURE_SVM))
+ vcpu_alloc_svm(vm, &nested_test_data_gva);
+ else
+ vcpu_alloc_vmx(vm, &nested_test_data_gva);
+
+ vcpu_args_set(vcpu, 1, nested_test_data_gva);
+
+ /* Run L1 => L2. L2 should sync and request #SS. */
+ vcpu_run(vcpu);
+ assert_ucall_vector(vcpu, SS_VECTOR);
+
+ /* Pend #SS and request immediate exit. #SS should still be pending. */
+ queue_ss_exception(vcpu, false);
+ vcpu->run->immediate_exit = true;
+ vcpu_run_complete_io(vcpu);
+
+ /* Verify the pending events comes back out the same as it went in. */
+ vcpu_events_get(vcpu, &events);
+ TEST_ASSERT_EQ(events.flags & KVM_VCPUEVENT_VALID_PAYLOAD,
+ KVM_VCPUEVENT_VALID_PAYLOAD);
+ TEST_ASSERT_EQ(events.exception.pending, true);
+ TEST_ASSERT_EQ(events.exception.nr, SS_VECTOR);
+ TEST_ASSERT_EQ(events.exception.has_error_code, true);
+ TEST_ASSERT_EQ(events.exception.error_code, SS_ERROR_CODE);
+
+ /*
+ * Run for real with the pending #SS, L1 should get a VM-Exit due to
+ * #SS interception and re-enter L2 to request #GP (via injected #SS).
+ */
+ vcpu->run->immediate_exit = false;
+ vcpu_run(vcpu);
+ assert_ucall_vector(vcpu, GP_VECTOR);
+
+ /*
+ * Inject #SS, the #SS should bypass interception and cause #GP, which
+ * L1 should intercept before KVM morphs it to #DF. L1 should then
+ * disable #GP interception and run L2 to request #DF (via #SS => #GP).
+ */
+ queue_ss_exception(vcpu, true);
+ vcpu_run(vcpu);
+ assert_ucall_vector(vcpu, DF_VECTOR);
+
+ /*
+ * Inject #SS, the #SS should bypass interception and cause #GP, which
+ * L1 is no longer interception, and so should see a #DF VM-Exit. L1
+ * should then signal that is done.
+ */
+ queue_ss_exception(vcpu, true);
+ vcpu_run(vcpu);
+ assert_ucall_vector(vcpu, FAKE_TRIPLE_FAULT_VECTOR);
+
+ /*
+ * Inject #SS yet again. L1 is not intercepting #GP or #DF, and so
+ * should see nested TRIPLE_FAULT / SHUTDOWN.
+ */
+ queue_ss_exception(vcpu, true);
+ vcpu_run(vcpu);
+ assert_ucall_vector(vcpu, -1);
+
+ kvm_vm_free(vm);
+}
diff --git a/tools/testing/selftests/kvm/x86_64/nx_huge_pages_test.c b/tools/testing/selftests/kvm/x86_64/nx_huge_pages_test.c
new file mode 100644
index 000000000000..17bbb96fc4df
--- /dev/null
+++ b/tools/testing/selftests/kvm/x86_64/nx_huge_pages_test.c
@@ -0,0 +1,269 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Usage: to be run via nx_huge_page_test.sh, which does the necessary
+ * environment setup and teardown
+ *
+ * Copyright (C) 2022, Google LLC.
+ */
+
+#define _GNU_SOURCE
+
+#include <fcntl.h>
+#include <stdint.h>
+#include <time.h>
+
+#include <test_util.h>
+#include "kvm_util.h"
+#include "processor.h"
+
+#define HPAGE_SLOT 10
+#define HPAGE_GPA (4UL << 30) /* 4G prevents collision w/ slot 0 */
+#define HPAGE_GVA HPAGE_GPA /* GVA is arbitrary, so use GPA. */
+#define PAGES_PER_2MB_HUGE_PAGE 512
+#define HPAGE_SLOT_NPAGES (3 * PAGES_PER_2MB_HUGE_PAGE)
+
+/*
+ * Passed by nx_huge_pages_test.sh to provide an easy warning if this test is
+ * being run without it.
+ */
+#define MAGIC_TOKEN 887563923
+
+/*
+ * x86 opcode for the return instruction. Used to call into, and then
+ * immediately return from, memory backed with hugepages.
+ */
+#define RETURN_OPCODE 0xC3
+
+/* Call the specified memory address. */
+static void guest_do_CALL(uint64_t target)
+{
+ ((void (*)(void)) target)();
+}
+
+/*
+ * Exit the VM after each memory access so that the userspace component of the
+ * test can make assertions about the pages backing the VM.
+ *
+ * See the below for an explanation of how each access should affect the
+ * backing mappings.
+ */
+void guest_code(void)
+{
+ uint64_t hpage_1 = HPAGE_GVA;
+ uint64_t hpage_2 = hpage_1 + (PAGE_SIZE * 512);
+ uint64_t hpage_3 = hpage_2 + (PAGE_SIZE * 512);
+
+ READ_ONCE(*(uint64_t *)hpage_1);
+ GUEST_SYNC(1);
+
+ READ_ONCE(*(uint64_t *)hpage_2);
+ GUEST_SYNC(2);
+
+ guest_do_CALL(hpage_1);
+ GUEST_SYNC(3);
+
+ guest_do_CALL(hpage_3);
+ GUEST_SYNC(4);
+
+ READ_ONCE(*(uint64_t *)hpage_1);
+ GUEST_SYNC(5);
+
+ READ_ONCE(*(uint64_t *)hpage_3);
+ GUEST_SYNC(6);
+}
+
+static void check_2m_page_count(struct kvm_vm *vm, int expected_pages_2m)
+{
+ int actual_pages_2m;
+
+ actual_pages_2m = vm_get_stat(vm, "pages_2m");
+
+ TEST_ASSERT(actual_pages_2m == expected_pages_2m,
+ "Unexpected 2m page count. Expected %d, got %d",
+ expected_pages_2m, actual_pages_2m);
+}
+
+static void check_split_count(struct kvm_vm *vm, int expected_splits)
+{
+ int actual_splits;
+
+ actual_splits = vm_get_stat(vm, "nx_lpage_splits");
+
+ TEST_ASSERT(actual_splits == expected_splits,
+ "Unexpected NX huge page split count. Expected %d, got %d",
+ expected_splits, actual_splits);
+}
+
+static void wait_for_reclaim(int reclaim_period_ms)
+{
+ long reclaim_wait_ms;
+ struct timespec ts;
+
+ reclaim_wait_ms = reclaim_period_ms * 5;
+ ts.tv_sec = reclaim_wait_ms / 1000;
+ ts.tv_nsec = (reclaim_wait_ms - (ts.tv_sec * 1000)) * 1000000;
+ nanosleep(&ts, NULL);
+}
+
+void run_test(int reclaim_period_ms, bool disable_nx_huge_pages,
+ bool reboot_permissions)
+{
+ struct kvm_vcpu *vcpu;
+ struct kvm_vm *vm;
+ uint64_t nr_bytes;
+ void *hva;
+ int r;
+
+ vm = vm_create(1);
+
+ if (disable_nx_huge_pages) {
+ r = __vm_disable_nx_huge_pages(vm);
+ if (reboot_permissions) {
+ TEST_ASSERT(!r, "Disabling NX huge pages should succeed if process has reboot permissions");
+ } else {
+ TEST_ASSERT(r == -1 && errno == EPERM,
+ "This process should not have permission to disable NX huge pages");
+ return;
+ }
+ }
+
+ vcpu = vm_vcpu_add(vm, 0, guest_code);
+
+ vm_userspace_mem_region_add(vm, VM_MEM_SRC_ANONYMOUS_HUGETLB,
+ HPAGE_GPA, HPAGE_SLOT,
+ HPAGE_SLOT_NPAGES, 0);
+
+ nr_bytes = HPAGE_SLOT_NPAGES * vm->page_size;
+
+ /*
+ * Ensure that KVM can map HPAGE_SLOT with huge pages by mapping the
+ * region into the guest with 2MiB pages whenever TDP is disabled (i.e.
+ * whenever KVM is shadowing the guest page tables).
+ *
+ * When TDP is enabled, KVM should be able to map HPAGE_SLOT with huge
+ * pages irrespective of the guest page size, so map with 4KiB pages
+ * to test that that is the case.
+ */
+ if (kvm_is_tdp_enabled())
+ virt_map_level(vm, HPAGE_GVA, HPAGE_GPA, nr_bytes, PG_LEVEL_4K);
+ else
+ virt_map_level(vm, HPAGE_GVA, HPAGE_GPA, nr_bytes, PG_LEVEL_2M);
+
+ hva = addr_gpa2hva(vm, HPAGE_GPA);
+ memset(hva, RETURN_OPCODE, nr_bytes);
+
+ check_2m_page_count(vm, 0);
+ check_split_count(vm, 0);
+
+ /*
+ * The guest code will first read from the first hugepage, resulting
+ * in a huge page mapping being created.
+ */
+ vcpu_run(vcpu);
+ check_2m_page_count(vm, 1);
+ check_split_count(vm, 0);
+
+ /*
+ * Then the guest code will read from the second hugepage, resulting
+ * in another huge page mapping being created.
+ */
+ vcpu_run(vcpu);
+ check_2m_page_count(vm, 2);
+ check_split_count(vm, 0);
+
+ /*
+ * Next, the guest will execute from the first huge page, causing it
+ * to be remapped at 4k.
+ *
+ * If NX huge pages are disabled, this should have no effect.
+ */
+ vcpu_run(vcpu);
+ check_2m_page_count(vm, disable_nx_huge_pages ? 2 : 1);
+ check_split_count(vm, disable_nx_huge_pages ? 0 : 1);
+
+ /*
+ * Executing from the third huge page (previously unaccessed) will
+ * cause part to be mapped at 4k.
+ *
+ * If NX huge pages are disabled, it should be mapped at 2M.
+ */
+ vcpu_run(vcpu);
+ check_2m_page_count(vm, disable_nx_huge_pages ? 3 : 1);
+ check_split_count(vm, disable_nx_huge_pages ? 0 : 2);
+
+ /* Reading from the first huge page again should have no effect. */
+ vcpu_run(vcpu);
+ check_2m_page_count(vm, disable_nx_huge_pages ? 3 : 1);
+ check_split_count(vm, disable_nx_huge_pages ? 0 : 2);
+
+ /* Give recovery thread time to run. */
+ wait_for_reclaim(reclaim_period_ms);
+
+ /*
+ * Now that the reclaimer has run, all the split pages should be gone.
+ *
+ * If NX huge pages are disabled, the relaimer will not run, so
+ * nothing should change from here on.
+ */
+ check_2m_page_count(vm, disable_nx_huge_pages ? 3 : 1);
+ check_split_count(vm, 0);
+
+ /*
+ * The 4k mapping on hpage 3 should have been removed, so check that
+ * reading from it causes a huge page mapping to be installed.
+ */
+ vcpu_run(vcpu);
+ check_2m_page_count(vm, disable_nx_huge_pages ? 3 : 2);
+ check_split_count(vm, 0);
+
+ kvm_vm_free(vm);
+}
+
+static void help(char *name)
+{
+ puts("");
+ printf("usage: %s [-h] [-p period_ms] [-t token]\n", name);
+ puts("");
+ printf(" -p: The NX reclaim period in milliseconds.\n");
+ printf(" -t: The magic token to indicate environment setup is done.\n");
+ printf(" -r: The test has reboot permissions and can disable NX huge pages.\n");
+ puts("");
+ exit(0);
+}
+
+int main(int argc, char **argv)
+{
+ int reclaim_period_ms = 0, token = 0, opt;
+ bool reboot_permissions = false;
+
+ while ((opt = getopt(argc, argv, "hp:t:r")) != -1) {
+ switch (opt) {
+ case 'p':
+ reclaim_period_ms = atoi_positive("Reclaim period", optarg);
+ break;
+ case 't':
+ token = atoi_paranoid(optarg);
+ break;
+ case 'r':
+ reboot_permissions = true;
+ break;
+ case 'h':
+ default:
+ help(argv[0]);
+ break;
+ }
+ }
+
+ TEST_REQUIRE(kvm_has_cap(KVM_CAP_VM_DISABLE_NX_HUGE_PAGES));
+
+ __TEST_REQUIRE(token == MAGIC_TOKEN,
+ "This test must be run with the magic token via '-t %d'.\n"
+ "Running via nx_huge_pages_test.sh, which also handles "
+ "environment setup, is strongly recommended.", MAGIC_TOKEN);
+
+ run_test(reclaim_period_ms, false, reboot_permissions);
+ run_test(reclaim_period_ms, true, reboot_permissions);
+
+ return 0;
+}
+
diff --git a/tools/testing/selftests/kvm/x86_64/nx_huge_pages_test.sh b/tools/testing/selftests/kvm/x86_64/nx_huge_pages_test.sh
new file mode 100755
index 000000000000..7cbb409801ee
--- /dev/null
+++ b/tools/testing/selftests/kvm/x86_64/nx_huge_pages_test.sh
@@ -0,0 +1,58 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0-only */
+#
+# Wrapper script which performs setup and cleanup for nx_huge_pages_test.
+# Makes use of root privileges to set up huge pages and KVM module parameters.
+#
+# Copyright (C) 2022, Google LLC.
+
+set -e
+
+NX_HUGE_PAGES=$(cat /sys/module/kvm/parameters/nx_huge_pages)
+NX_HUGE_PAGES_RECOVERY_RATIO=$(cat /sys/module/kvm/parameters/nx_huge_pages_recovery_ratio)
+NX_HUGE_PAGES_RECOVERY_PERIOD=$(cat /sys/module/kvm/parameters/nx_huge_pages_recovery_period_ms)
+HUGE_PAGES=$(cat /sys/kernel/mm/hugepages/hugepages-2048kB/nr_hugepages)
+
+set +e
+
+function sudo_echo () {
+ echo "$1" | sudo tee -a "$2" > /dev/null
+}
+
+NXECUTABLE="$(dirname $0)/nx_huge_pages_test"
+
+sudo_echo test /dev/null || exit 4 # KSFT_SKIP=4
+
+(
+ set -e
+
+ sudo_echo 1 /sys/module/kvm/parameters/nx_huge_pages
+ sudo_echo 1 /sys/module/kvm/parameters/nx_huge_pages_recovery_ratio
+ sudo_echo 100 /sys/module/kvm/parameters/nx_huge_pages_recovery_period_ms
+ sudo_echo "$(( $HUGE_PAGES + 3 ))" /sys/kernel/mm/hugepages/hugepages-2048kB/nr_hugepages
+
+ # Test with reboot permissions
+ if [ $(whoami) == "root" ] || sudo setcap cap_sys_boot+ep $NXECUTABLE 2> /dev/null; then
+ echo Running test with CAP_SYS_BOOT enabled
+ $NXECUTABLE -t 887563923 -p 100 -r
+ test $(whoami) == "root" || sudo setcap cap_sys_boot-ep $NXECUTABLE
+ else
+ echo setcap failed, skipping nx_huge_pages_test with CAP_SYS_BOOT enabled
+ fi
+
+ # Test without reboot permissions
+ if [ $(whoami) != "root" ] ; then
+ echo Running test with CAP_SYS_BOOT disabled
+ $NXECUTABLE -t 887563923 -p 100
+ else
+ echo Running as root, skipping nx_huge_pages_test with CAP_SYS_BOOT disabled
+ fi
+)
+RET=$?
+
+sudo_echo "$NX_HUGE_PAGES" /sys/module/kvm/parameters/nx_huge_pages
+sudo_echo "$NX_HUGE_PAGES_RECOVERY_RATIO" /sys/module/kvm/parameters/nx_huge_pages_recovery_ratio
+sudo_echo "$NX_HUGE_PAGES_RECOVERY_PERIOD" /sys/module/kvm/parameters/nx_huge_pages_recovery_period_ms
+sudo_echo "$HUGE_PAGES" /sys/kernel/mm/hugepages/hugepages-2048kB/nr_hugepages
+
+exit $RET
diff --git a/tools/testing/selftests/kvm/x86_64/platform_info_test.c b/tools/testing/selftests/kvm/x86_64/platform_info_test.c
index 1e89688cbbbf..87011965dc41 100644
--- a/tools/testing/selftests/kvm/x86_64/platform_info_test.c
+++ b/tools/testing/selftests/kvm/x86_64/platform_info_test.c
@@ -21,7 +21,6 @@
#include "kvm_util.h"
#include "processor.h"
-#define VCPU_ID 0
#define MSR_PLATFORM_INFO_MAX_TURBO_RATIO 0xff00
static void guest_code(void)
@@ -35,71 +34,46 @@ static void guest_code(void)
}
}
-static void set_msr_platform_info_enabled(struct kvm_vm *vm, bool enable)
+static void test_msr_platform_info_enabled(struct kvm_vcpu *vcpu)
{
- struct kvm_enable_cap cap = {};
-
- cap.cap = KVM_CAP_MSR_PLATFORM_INFO;
- cap.flags = 0;
- cap.args[0] = (int)enable;
- vm_enable_cap(vm, &cap);
-}
-
-static void test_msr_platform_info_enabled(struct kvm_vm *vm)
-{
- struct kvm_run *run = vcpu_state(vm, VCPU_ID);
struct ucall uc;
- set_msr_platform_info_enabled(vm, true);
- vcpu_run(vm, VCPU_ID);
- TEST_ASSERT(run->exit_reason == KVM_EXIT_IO,
- "Exit_reason other than KVM_EXIT_IO: %u (%s),\n",
- run->exit_reason,
- exit_reason_str(run->exit_reason));
- get_ucall(vm, VCPU_ID, &uc);
+ vm_enable_cap(vcpu->vm, KVM_CAP_MSR_PLATFORM_INFO, true);
+ vcpu_run(vcpu);
+ TEST_ASSERT_KVM_EXIT_REASON(vcpu, KVM_EXIT_IO);
+
+ get_ucall(vcpu, &uc);
TEST_ASSERT(uc.cmd == UCALL_SYNC,
- "Received ucall other than UCALL_SYNC: %lu\n", uc.cmd);
+ "Received ucall other than UCALL_SYNC: %lu", uc.cmd);
TEST_ASSERT((uc.args[1] & MSR_PLATFORM_INFO_MAX_TURBO_RATIO) ==
MSR_PLATFORM_INFO_MAX_TURBO_RATIO,
"Expected MSR_PLATFORM_INFO to have max turbo ratio mask: %i.",
MSR_PLATFORM_INFO_MAX_TURBO_RATIO);
}
-static void test_msr_platform_info_disabled(struct kvm_vm *vm)
+static void test_msr_platform_info_disabled(struct kvm_vcpu *vcpu)
{
- struct kvm_run *run = vcpu_state(vm, VCPU_ID);
-
- set_msr_platform_info_enabled(vm, false);
- vcpu_run(vm, VCPU_ID);
- TEST_ASSERT(run->exit_reason == KVM_EXIT_SHUTDOWN,
- "Exit_reason other than KVM_EXIT_SHUTDOWN: %u (%s)\n",
- run->exit_reason,
- exit_reason_str(run->exit_reason));
+ vm_enable_cap(vcpu->vm, KVM_CAP_MSR_PLATFORM_INFO, false);
+ vcpu_run(vcpu);
+ TEST_ASSERT_KVM_EXIT_REASON(vcpu, KVM_EXIT_SHUTDOWN);
}
int main(int argc, char *argv[])
{
+ struct kvm_vcpu *vcpu;
struct kvm_vm *vm;
- int rv;
uint64_t msr_platform_info;
- /* Tell stdout not to buffer its content */
- setbuf(stdout, NULL);
-
- rv = kvm_check_cap(KVM_CAP_MSR_PLATFORM_INFO);
- if (!rv) {
- print_skip("KVM_CAP_MSR_PLATFORM_INFO not supported");
- exit(KSFT_SKIP);
- }
+ TEST_REQUIRE(kvm_has_cap(KVM_CAP_MSR_PLATFORM_INFO));
- vm = vm_create_default(VCPU_ID, 0, guest_code);
+ vm = vm_create_with_one_vcpu(&vcpu, guest_code);
- msr_platform_info = vcpu_get_msr(vm, VCPU_ID, MSR_PLATFORM_INFO);
- vcpu_set_msr(vm, VCPU_ID, MSR_PLATFORM_INFO,
- msr_platform_info | MSR_PLATFORM_INFO_MAX_TURBO_RATIO);
- test_msr_platform_info_enabled(vm);
- test_msr_platform_info_disabled(vm);
- vcpu_set_msr(vm, VCPU_ID, MSR_PLATFORM_INFO, msr_platform_info);
+ msr_platform_info = vcpu_get_msr(vcpu, MSR_PLATFORM_INFO);
+ vcpu_set_msr(vcpu, MSR_PLATFORM_INFO,
+ msr_platform_info | MSR_PLATFORM_INFO_MAX_TURBO_RATIO);
+ test_msr_platform_info_enabled(vcpu);
+ test_msr_platform_info_disabled(vcpu);
+ vcpu_set_msr(vcpu, MSR_PLATFORM_INFO, msr_platform_info);
kvm_vm_free(vm);
diff --git a/tools/testing/selftests/kvm/x86_64/pmu_counters_test.c b/tools/testing/selftests/kvm/x86_64/pmu_counters_test.c
new file mode 100644
index 000000000000..29609b52f8fa
--- /dev/null
+++ b/tools/testing/selftests/kvm/x86_64/pmu_counters_test.c
@@ -0,0 +1,620 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright (C) 2023, Tencent, Inc.
+ */
+
+#define _GNU_SOURCE /* for program_invocation_short_name */
+#include <x86intrin.h>
+
+#include "pmu.h"
+#include "processor.h"
+
+/* Number of LOOP instructions for the guest measurement payload. */
+#define NUM_BRANCHES 10
+/*
+ * Number of "extra" instructions that will be counted, i.e. the number of
+ * instructions that are needed to set up the loop and then disabled the
+ * counter. 1 CLFLUSH/CLFLUSHOPT/NOP, 1 MFENCE, 2 MOV, 2 XOR, 1 WRMSR.
+ */
+#define NUM_EXTRA_INSNS 7
+#define NUM_INSNS_RETIRED (NUM_BRANCHES + NUM_EXTRA_INSNS)
+
+static uint8_t kvm_pmu_version;
+static bool kvm_has_perf_caps;
+static bool is_forced_emulation_enabled;
+
+static struct kvm_vm *pmu_vm_create_with_one_vcpu(struct kvm_vcpu **vcpu,
+ void *guest_code,
+ uint8_t pmu_version,
+ uint64_t perf_capabilities)
+{
+ struct kvm_vm *vm;
+
+ vm = vm_create_with_one_vcpu(vcpu, guest_code);
+ vm_init_descriptor_tables(vm);
+ vcpu_init_descriptor_tables(*vcpu);
+
+ sync_global_to_guest(vm, kvm_pmu_version);
+ sync_global_to_guest(vm, is_forced_emulation_enabled);
+
+ /*
+ * Set PERF_CAPABILITIES before PMU version as KVM disallows enabling
+ * features via PERF_CAPABILITIES if the guest doesn't have a vPMU.
+ */
+ if (kvm_has_perf_caps)
+ vcpu_set_msr(*vcpu, MSR_IA32_PERF_CAPABILITIES, perf_capabilities);
+
+ vcpu_set_cpuid_property(*vcpu, X86_PROPERTY_PMU_VERSION, pmu_version);
+ return vm;
+}
+
+static void run_vcpu(struct kvm_vcpu *vcpu)
+{
+ struct ucall uc;
+
+ do {
+ vcpu_run(vcpu);
+ switch (get_ucall(vcpu, &uc)) {
+ case UCALL_SYNC:
+ break;
+ case UCALL_ABORT:
+ REPORT_GUEST_ASSERT(uc);
+ break;
+ case UCALL_PRINTF:
+ pr_info("%s", uc.buffer);
+ break;
+ case UCALL_DONE:
+ break;
+ default:
+ TEST_FAIL("Unexpected ucall: %lu", uc.cmd);
+ }
+ } while (uc.cmd != UCALL_DONE);
+}
+
+static uint8_t guest_get_pmu_version(void)
+{
+ /*
+ * Return the effective PMU version, i.e. the minimum between what KVM
+ * supports and what is enumerated to the guest. The host deliberately
+ * advertises a PMU version to the guest beyond what is actually
+ * supported by KVM to verify KVM doesn't freak out and do something
+ * bizarre with an architecturally valid, but unsupported, version.
+ */
+ return min_t(uint8_t, kvm_pmu_version, this_cpu_property(X86_PROPERTY_PMU_VERSION));
+}
+
+/*
+ * If an architectural event is supported and guaranteed to generate at least
+ * one "hit, assert that its count is non-zero. If an event isn't supported or
+ * the test can't guarantee the associated action will occur, then all bets are
+ * off regarding the count, i.e. no checks can be done.
+ *
+ * Sanity check that in all cases, the event doesn't count when it's disabled,
+ * and that KVM correctly emulates the write of an arbitrary value.
+ */
+static void guest_assert_event_count(uint8_t idx,
+ struct kvm_x86_pmu_feature event,
+ uint32_t pmc, uint32_t pmc_msr)
+{
+ uint64_t count;
+
+ count = _rdpmc(pmc);
+ if (!this_pmu_has(event))
+ goto sanity_checks;
+
+ switch (idx) {
+ case INTEL_ARCH_INSTRUCTIONS_RETIRED_INDEX:
+ GUEST_ASSERT_EQ(count, NUM_INSNS_RETIRED);
+ break;
+ case INTEL_ARCH_BRANCHES_RETIRED_INDEX:
+ GUEST_ASSERT_EQ(count, NUM_BRANCHES);
+ break;
+ case INTEL_ARCH_LLC_REFERENCES_INDEX:
+ case INTEL_ARCH_LLC_MISSES_INDEX:
+ if (!this_cpu_has(X86_FEATURE_CLFLUSHOPT) &&
+ !this_cpu_has(X86_FEATURE_CLFLUSH))
+ break;
+ fallthrough;
+ case INTEL_ARCH_CPU_CYCLES_INDEX:
+ case INTEL_ARCH_REFERENCE_CYCLES_INDEX:
+ GUEST_ASSERT_NE(count, 0);
+ break;
+ case INTEL_ARCH_TOPDOWN_SLOTS_INDEX:
+ GUEST_ASSERT(count >= NUM_INSNS_RETIRED);
+ break;
+ default:
+ break;
+ }
+
+sanity_checks:
+ __asm__ __volatile__("loop ." : "+c"((int){NUM_BRANCHES}));
+ GUEST_ASSERT_EQ(_rdpmc(pmc), count);
+
+ wrmsr(pmc_msr, 0xdead);
+ GUEST_ASSERT_EQ(_rdpmc(pmc), 0xdead);
+}
+
+/*
+ * Enable and disable the PMC in a monolithic asm blob to ensure that the
+ * compiler can't insert _any_ code into the measured sequence. Note, ECX
+ * doesn't need to be clobbered as the input value, @pmc_msr, is restored
+ * before the end of the sequence.
+ *
+ * If CLFUSH{,OPT} is supported, flush the cacheline containing (at least) the
+ * start of the loop to force LLC references and misses, i.e. to allow testing
+ * that those events actually count.
+ *
+ * If forced emulation is enabled (and specified), force emulation on a subset
+ * of the measured code to verify that KVM correctly emulates instructions and
+ * branches retired events in conjunction with hardware also counting said
+ * events.
+ */
+#define GUEST_MEASURE_EVENT(_msr, _value, clflush, FEP) \
+do { \
+ __asm__ __volatile__("wrmsr\n\t" \
+ clflush "\n\t" \
+ "mfence\n\t" \
+ "1: mov $" __stringify(NUM_BRANCHES) ", %%ecx\n\t" \
+ FEP "loop .\n\t" \
+ FEP "mov %%edi, %%ecx\n\t" \
+ FEP "xor %%eax, %%eax\n\t" \
+ FEP "xor %%edx, %%edx\n\t" \
+ "wrmsr\n\t" \
+ :: "a"((uint32_t)_value), "d"(_value >> 32), \
+ "c"(_msr), "D"(_msr) \
+ ); \
+} while (0)
+
+#define GUEST_TEST_EVENT(_idx, _event, _pmc, _pmc_msr, _ctrl_msr, _value, FEP) \
+do { \
+ wrmsr(pmc_msr, 0); \
+ \
+ if (this_cpu_has(X86_FEATURE_CLFLUSHOPT)) \
+ GUEST_MEASURE_EVENT(_ctrl_msr, _value, "clflushopt 1f", FEP); \
+ else if (this_cpu_has(X86_FEATURE_CLFLUSH)) \
+ GUEST_MEASURE_EVENT(_ctrl_msr, _value, "clflush 1f", FEP); \
+ else \
+ GUEST_MEASURE_EVENT(_ctrl_msr, _value, "nop", FEP); \
+ \
+ guest_assert_event_count(_idx, _event, _pmc, _pmc_msr); \
+} while (0)
+
+static void __guest_test_arch_event(uint8_t idx, struct kvm_x86_pmu_feature event,
+ uint32_t pmc, uint32_t pmc_msr,
+ uint32_t ctrl_msr, uint64_t ctrl_msr_value)
+{
+ GUEST_TEST_EVENT(idx, event, pmc, pmc_msr, ctrl_msr, ctrl_msr_value, "");
+
+ if (is_forced_emulation_enabled)
+ GUEST_TEST_EVENT(idx, event, pmc, pmc_msr, ctrl_msr, ctrl_msr_value, KVM_FEP);
+}
+
+#define X86_PMU_FEATURE_NULL \
+({ \
+ struct kvm_x86_pmu_feature feature = {}; \
+ \
+ feature; \
+})
+
+static bool pmu_is_null_feature(struct kvm_x86_pmu_feature event)
+{
+ return !(*(u64 *)&event);
+}
+
+static void guest_test_arch_event(uint8_t idx)
+{
+ const struct {
+ struct kvm_x86_pmu_feature gp_event;
+ struct kvm_x86_pmu_feature fixed_event;
+ } intel_event_to_feature[] = {
+ [INTEL_ARCH_CPU_CYCLES_INDEX] = { X86_PMU_FEATURE_CPU_CYCLES, X86_PMU_FEATURE_CPU_CYCLES_FIXED },
+ [INTEL_ARCH_INSTRUCTIONS_RETIRED_INDEX] = { X86_PMU_FEATURE_INSNS_RETIRED, X86_PMU_FEATURE_INSNS_RETIRED_FIXED },
+ /*
+ * Note, the fixed counter for reference cycles is NOT the same
+ * as the general purpose architectural event. The fixed counter
+ * explicitly counts at the same frequency as the TSC, whereas
+ * the GP event counts at a fixed, but uarch specific, frequency.
+ * Bundle them here for simplicity.
+ */
+ [INTEL_ARCH_REFERENCE_CYCLES_INDEX] = { X86_PMU_FEATURE_REFERENCE_CYCLES, X86_PMU_FEATURE_REFERENCE_TSC_CYCLES_FIXED },
+ [INTEL_ARCH_LLC_REFERENCES_INDEX] = { X86_PMU_FEATURE_LLC_REFERENCES, X86_PMU_FEATURE_NULL },
+ [INTEL_ARCH_LLC_MISSES_INDEX] = { X86_PMU_FEATURE_LLC_MISSES, X86_PMU_FEATURE_NULL },
+ [INTEL_ARCH_BRANCHES_RETIRED_INDEX] = { X86_PMU_FEATURE_BRANCH_INSNS_RETIRED, X86_PMU_FEATURE_NULL },
+ [INTEL_ARCH_BRANCHES_MISPREDICTED_INDEX] = { X86_PMU_FEATURE_BRANCHES_MISPREDICTED, X86_PMU_FEATURE_NULL },
+ [INTEL_ARCH_TOPDOWN_SLOTS_INDEX] = { X86_PMU_FEATURE_TOPDOWN_SLOTS, X86_PMU_FEATURE_TOPDOWN_SLOTS_FIXED },
+ };
+
+ uint32_t nr_gp_counters = this_cpu_property(X86_PROPERTY_PMU_NR_GP_COUNTERS);
+ uint32_t pmu_version = guest_get_pmu_version();
+ /* PERF_GLOBAL_CTRL exists only for Architectural PMU Version 2+. */
+ bool guest_has_perf_global_ctrl = pmu_version >= 2;
+ struct kvm_x86_pmu_feature gp_event, fixed_event;
+ uint32_t base_pmc_msr;
+ unsigned int i;
+
+ /* The host side shouldn't invoke this without a guest PMU. */
+ GUEST_ASSERT(pmu_version);
+
+ if (this_cpu_has(X86_FEATURE_PDCM) &&
+ rdmsr(MSR_IA32_PERF_CAPABILITIES) & PMU_CAP_FW_WRITES)
+ base_pmc_msr = MSR_IA32_PMC0;
+ else
+ base_pmc_msr = MSR_IA32_PERFCTR0;
+
+ gp_event = intel_event_to_feature[idx].gp_event;
+ GUEST_ASSERT_EQ(idx, gp_event.f.bit);
+
+ GUEST_ASSERT(nr_gp_counters);
+
+ for (i = 0; i < nr_gp_counters; i++) {
+ uint64_t eventsel = ARCH_PERFMON_EVENTSEL_OS |
+ ARCH_PERFMON_EVENTSEL_ENABLE |
+ intel_pmu_arch_events[idx];
+
+ wrmsr(MSR_P6_EVNTSEL0 + i, 0);
+ if (guest_has_perf_global_ctrl)
+ wrmsr(MSR_CORE_PERF_GLOBAL_CTRL, BIT_ULL(i));
+
+ __guest_test_arch_event(idx, gp_event, i, base_pmc_msr + i,
+ MSR_P6_EVNTSEL0 + i, eventsel);
+ }
+
+ if (!guest_has_perf_global_ctrl)
+ return;
+
+ fixed_event = intel_event_to_feature[idx].fixed_event;
+ if (pmu_is_null_feature(fixed_event) || !this_pmu_has(fixed_event))
+ return;
+
+ i = fixed_event.f.bit;
+
+ wrmsr(MSR_CORE_PERF_FIXED_CTR_CTRL, FIXED_PMC_CTRL(i, FIXED_PMC_KERNEL));
+
+ __guest_test_arch_event(idx, fixed_event, i | INTEL_RDPMC_FIXED,
+ MSR_CORE_PERF_FIXED_CTR0 + i,
+ MSR_CORE_PERF_GLOBAL_CTRL,
+ FIXED_PMC_GLOBAL_CTRL_ENABLE(i));
+}
+
+static void guest_test_arch_events(void)
+{
+ uint8_t i;
+
+ for (i = 0; i < NR_INTEL_ARCH_EVENTS; i++)
+ guest_test_arch_event(i);
+
+ GUEST_DONE();
+}
+
+static void test_arch_events(uint8_t pmu_version, uint64_t perf_capabilities,
+ uint8_t length, uint8_t unavailable_mask)
+{
+ struct kvm_vcpu *vcpu;
+ struct kvm_vm *vm;
+
+ /* Testing arch events requires a vPMU (there are no negative tests). */
+ if (!pmu_version)
+ return;
+
+ vm = pmu_vm_create_with_one_vcpu(&vcpu, guest_test_arch_events,
+ pmu_version, perf_capabilities);
+
+ vcpu_set_cpuid_property(vcpu, X86_PROPERTY_PMU_EBX_BIT_VECTOR_LENGTH,
+ length);
+ vcpu_set_cpuid_property(vcpu, X86_PROPERTY_PMU_EVENTS_MASK,
+ unavailable_mask);
+
+ run_vcpu(vcpu);
+
+ kvm_vm_free(vm);
+}
+
+/*
+ * Limit testing to MSRs that are actually defined by Intel (in the SDM). MSRs
+ * that aren't defined counter MSRs *probably* don't exist, but there's no
+ * guarantee that currently undefined MSR indices won't be used for something
+ * other than PMCs in the future.
+ */
+#define MAX_NR_GP_COUNTERS 8
+#define MAX_NR_FIXED_COUNTERS 3
+
+#define GUEST_ASSERT_PMC_MSR_ACCESS(insn, msr, expect_gp, vector) \
+__GUEST_ASSERT(expect_gp ? vector == GP_VECTOR : !vector, \
+ "Expected %s on " #insn "(0x%x), got vector %u", \
+ expect_gp ? "#GP" : "no fault", msr, vector) \
+
+#define GUEST_ASSERT_PMC_VALUE(insn, msr, val, expected) \
+ __GUEST_ASSERT(val == expected_val, \
+ "Expected " #insn "(0x%x) to yield 0x%lx, got 0x%lx", \
+ msr, expected_val, val);
+
+static void guest_test_rdpmc(uint32_t rdpmc_idx, bool expect_success,
+ uint64_t expected_val)
+{
+ uint8_t vector;
+ uint64_t val;
+
+ vector = rdpmc_safe(rdpmc_idx, &val);
+ GUEST_ASSERT_PMC_MSR_ACCESS(RDPMC, rdpmc_idx, !expect_success, vector);
+ if (expect_success)
+ GUEST_ASSERT_PMC_VALUE(RDPMC, rdpmc_idx, val, expected_val);
+
+ if (!is_forced_emulation_enabled)
+ return;
+
+ vector = rdpmc_safe_fep(rdpmc_idx, &val);
+ GUEST_ASSERT_PMC_MSR_ACCESS(RDPMC, rdpmc_idx, !expect_success, vector);
+ if (expect_success)
+ GUEST_ASSERT_PMC_VALUE(RDPMC, rdpmc_idx, val, expected_val);
+}
+
+static void guest_rd_wr_counters(uint32_t base_msr, uint8_t nr_possible_counters,
+ uint8_t nr_counters, uint32_t or_mask)
+{
+ const bool pmu_has_fast_mode = !guest_get_pmu_version();
+ uint8_t i;
+
+ for (i = 0; i < nr_possible_counters; i++) {
+ /*
+ * TODO: Test a value that validates full-width writes and the
+ * width of the counters.
+ */
+ const uint64_t test_val = 0xffff;
+ const uint32_t msr = base_msr + i;
+
+ /*
+ * Fixed counters are supported if the counter is less than the
+ * number of enumerated contiguous counters *or* the counter is
+ * explicitly enumerated in the supported counters mask.
+ */
+ const bool expect_success = i < nr_counters || (or_mask & BIT(i));
+
+ /*
+ * KVM drops writes to MSR_P6_PERFCTR[0|1] if the counters are
+ * unsupported, i.e. doesn't #GP and reads back '0'.
+ */
+ const uint64_t expected_val = expect_success ? test_val : 0;
+ const bool expect_gp = !expect_success && msr != MSR_P6_PERFCTR0 &&
+ msr != MSR_P6_PERFCTR1;
+ uint32_t rdpmc_idx;
+ uint8_t vector;
+ uint64_t val;
+
+ vector = wrmsr_safe(msr, test_val);
+ GUEST_ASSERT_PMC_MSR_ACCESS(WRMSR, msr, expect_gp, vector);
+
+ vector = rdmsr_safe(msr, &val);
+ GUEST_ASSERT_PMC_MSR_ACCESS(RDMSR, msr, expect_gp, vector);
+
+ /* On #GP, the result of RDMSR is undefined. */
+ if (!expect_gp)
+ GUEST_ASSERT_PMC_VALUE(RDMSR, msr, val, expected_val);
+
+ /*
+ * Redo the read tests with RDPMC, which has different indexing
+ * semantics and additional capabilities.
+ */
+ rdpmc_idx = i;
+ if (base_msr == MSR_CORE_PERF_FIXED_CTR0)
+ rdpmc_idx |= INTEL_RDPMC_FIXED;
+
+ guest_test_rdpmc(rdpmc_idx, expect_success, expected_val);
+
+ /*
+ * KVM doesn't support non-architectural PMUs, i.e. it should
+ * impossible to have fast mode RDPMC. Verify that attempting
+ * to use fast RDPMC always #GPs.
+ */
+ GUEST_ASSERT(!expect_success || !pmu_has_fast_mode);
+ rdpmc_idx |= INTEL_RDPMC_FAST;
+ guest_test_rdpmc(rdpmc_idx, false, -1ull);
+
+ vector = wrmsr_safe(msr, 0);
+ GUEST_ASSERT_PMC_MSR_ACCESS(WRMSR, msr, expect_gp, vector);
+ }
+}
+
+static void guest_test_gp_counters(void)
+{
+ uint8_t nr_gp_counters = 0;
+ uint32_t base_msr;
+
+ if (guest_get_pmu_version())
+ nr_gp_counters = this_cpu_property(X86_PROPERTY_PMU_NR_GP_COUNTERS);
+
+ if (this_cpu_has(X86_FEATURE_PDCM) &&
+ rdmsr(MSR_IA32_PERF_CAPABILITIES) & PMU_CAP_FW_WRITES)
+ base_msr = MSR_IA32_PMC0;
+ else
+ base_msr = MSR_IA32_PERFCTR0;
+
+ guest_rd_wr_counters(base_msr, MAX_NR_GP_COUNTERS, nr_gp_counters, 0);
+ GUEST_DONE();
+}
+
+static void test_gp_counters(uint8_t pmu_version, uint64_t perf_capabilities,
+ uint8_t nr_gp_counters)
+{
+ struct kvm_vcpu *vcpu;
+ struct kvm_vm *vm;
+
+ vm = pmu_vm_create_with_one_vcpu(&vcpu, guest_test_gp_counters,
+ pmu_version, perf_capabilities);
+
+ vcpu_set_cpuid_property(vcpu, X86_PROPERTY_PMU_NR_GP_COUNTERS,
+ nr_gp_counters);
+
+ run_vcpu(vcpu);
+
+ kvm_vm_free(vm);
+}
+
+static void guest_test_fixed_counters(void)
+{
+ uint64_t supported_bitmask = 0;
+ uint8_t nr_fixed_counters = 0;
+ uint8_t i;
+
+ /* Fixed counters require Architectural vPMU Version 2+. */
+ if (guest_get_pmu_version() >= 2)
+ nr_fixed_counters = this_cpu_property(X86_PROPERTY_PMU_NR_FIXED_COUNTERS);
+
+ /*
+ * The supported bitmask for fixed counters was introduced in PMU
+ * version 5.
+ */
+ if (guest_get_pmu_version() >= 5)
+ supported_bitmask = this_cpu_property(X86_PROPERTY_PMU_FIXED_COUNTERS_BITMASK);
+
+ guest_rd_wr_counters(MSR_CORE_PERF_FIXED_CTR0, MAX_NR_FIXED_COUNTERS,
+ nr_fixed_counters, supported_bitmask);
+
+ for (i = 0; i < MAX_NR_FIXED_COUNTERS; i++) {
+ uint8_t vector;
+ uint64_t val;
+
+ if (i >= nr_fixed_counters && !(supported_bitmask & BIT_ULL(i))) {
+ vector = wrmsr_safe(MSR_CORE_PERF_FIXED_CTR_CTRL,
+ FIXED_PMC_CTRL(i, FIXED_PMC_KERNEL));
+ __GUEST_ASSERT(vector == GP_VECTOR,
+ "Expected #GP for counter %u in FIXED_CTR_CTRL", i);
+
+ vector = wrmsr_safe(MSR_CORE_PERF_GLOBAL_CTRL,
+ FIXED_PMC_GLOBAL_CTRL_ENABLE(i));
+ __GUEST_ASSERT(vector == GP_VECTOR,
+ "Expected #GP for counter %u in PERF_GLOBAL_CTRL", i);
+ continue;
+ }
+
+ wrmsr(MSR_CORE_PERF_FIXED_CTR0 + i, 0);
+ wrmsr(MSR_CORE_PERF_FIXED_CTR_CTRL, FIXED_PMC_CTRL(i, FIXED_PMC_KERNEL));
+ wrmsr(MSR_CORE_PERF_GLOBAL_CTRL, FIXED_PMC_GLOBAL_CTRL_ENABLE(i));
+ __asm__ __volatile__("loop ." : "+c"((int){NUM_BRANCHES}));
+ wrmsr(MSR_CORE_PERF_GLOBAL_CTRL, 0);
+ val = rdmsr(MSR_CORE_PERF_FIXED_CTR0 + i);
+
+ GUEST_ASSERT_NE(val, 0);
+ }
+ GUEST_DONE();
+}
+
+static void test_fixed_counters(uint8_t pmu_version, uint64_t perf_capabilities,
+ uint8_t nr_fixed_counters,
+ uint32_t supported_bitmask)
+{
+ struct kvm_vcpu *vcpu;
+ struct kvm_vm *vm;
+
+ vm = pmu_vm_create_with_one_vcpu(&vcpu, guest_test_fixed_counters,
+ pmu_version, perf_capabilities);
+
+ vcpu_set_cpuid_property(vcpu, X86_PROPERTY_PMU_FIXED_COUNTERS_BITMASK,
+ supported_bitmask);
+ vcpu_set_cpuid_property(vcpu, X86_PROPERTY_PMU_NR_FIXED_COUNTERS,
+ nr_fixed_counters);
+
+ run_vcpu(vcpu);
+
+ kvm_vm_free(vm);
+}
+
+static void test_intel_counters(void)
+{
+ uint8_t nr_arch_events = kvm_cpu_property(X86_PROPERTY_PMU_EBX_BIT_VECTOR_LENGTH);
+ uint8_t nr_fixed_counters = kvm_cpu_property(X86_PROPERTY_PMU_NR_FIXED_COUNTERS);
+ uint8_t nr_gp_counters = kvm_cpu_property(X86_PROPERTY_PMU_NR_GP_COUNTERS);
+ uint8_t pmu_version = kvm_cpu_property(X86_PROPERTY_PMU_VERSION);
+ unsigned int i;
+ uint8_t v, j;
+ uint32_t k;
+
+ const uint64_t perf_caps[] = {
+ 0,
+ PMU_CAP_FW_WRITES,
+ };
+
+ /*
+ * Test up to PMU v5, which is the current maximum version defined by
+ * Intel, i.e. is the last version that is guaranteed to be backwards
+ * compatible with KVM's existing behavior.
+ */
+ uint8_t max_pmu_version = max_t(typeof(pmu_version), pmu_version, 5);
+
+ /*
+ * Detect the existence of events that aren't supported by selftests.
+ * This will (obviously) fail any time the kernel adds support for a
+ * new event, but it's worth paying that price to keep the test fresh.
+ */
+ TEST_ASSERT(nr_arch_events <= NR_INTEL_ARCH_EVENTS,
+ "New architectural event(s) detected; please update this test (length = %u, mask = %x)",
+ nr_arch_events, kvm_cpu_property(X86_PROPERTY_PMU_EVENTS_MASK));
+
+ /*
+ * Force iterating over known arch events regardless of whether or not
+ * KVM/hardware supports a given event.
+ */
+ nr_arch_events = max_t(typeof(nr_arch_events), nr_arch_events, NR_INTEL_ARCH_EVENTS);
+
+ for (v = 0; v <= max_pmu_version; v++) {
+ for (i = 0; i < ARRAY_SIZE(perf_caps); i++) {
+ if (!kvm_has_perf_caps && perf_caps[i])
+ continue;
+
+ pr_info("Testing arch events, PMU version %u, perf_caps = %lx\n",
+ v, perf_caps[i]);
+ /*
+ * To keep the total runtime reasonable, test every
+ * possible non-zero, non-reserved bitmap combination
+ * only with the native PMU version and the full bit
+ * vector length.
+ */
+ if (v == pmu_version) {
+ for (k = 1; k < (BIT(nr_arch_events) - 1); k++)
+ test_arch_events(v, perf_caps[i], nr_arch_events, k);
+ }
+ /*
+ * Test single bits for all PMU version and lengths up
+ * the number of events +1 (to verify KVM doesn't do
+ * weird things if the guest length is greater than the
+ * host length). Explicitly test a mask of '0' and all
+ * ones i.e. all events being available and unavailable.
+ */
+ for (j = 0; j <= nr_arch_events + 1; j++) {
+ test_arch_events(v, perf_caps[i], j, 0);
+ test_arch_events(v, perf_caps[i], j, 0xff);
+
+ for (k = 0; k < nr_arch_events; k++)
+ test_arch_events(v, perf_caps[i], j, BIT(k));
+ }
+
+ pr_info("Testing GP counters, PMU version %u, perf_caps = %lx\n",
+ v, perf_caps[i]);
+ for (j = 0; j <= nr_gp_counters; j++)
+ test_gp_counters(v, perf_caps[i], j);
+
+ pr_info("Testing fixed counters, PMU version %u, perf_caps = %lx\n",
+ v, perf_caps[i]);
+ for (j = 0; j <= nr_fixed_counters; j++) {
+ for (k = 0; k <= (BIT(nr_fixed_counters) - 1); k++)
+ test_fixed_counters(v, perf_caps[i], j, k);
+ }
+ }
+ }
+}
+
+int main(int argc, char *argv[])
+{
+ TEST_REQUIRE(kvm_is_pmu_enabled());
+
+ TEST_REQUIRE(host_cpu_is_intel);
+ TEST_REQUIRE(kvm_cpu_has_p(X86_PROPERTY_PMU_VERSION));
+ TEST_REQUIRE(kvm_cpu_property(X86_PROPERTY_PMU_VERSION) > 0);
+
+ kvm_pmu_version = kvm_cpu_property(X86_PROPERTY_PMU_VERSION);
+ kvm_has_perf_caps = kvm_cpu_has(X86_FEATURE_PDCM);
+ is_forced_emulation_enabled = kvm_is_forced_emulation_enabled();
+
+ test_intel_counters();
+
+ return 0;
+}
diff --git a/tools/testing/selftests/kvm/x86_64/pmu_event_filter_test.c b/tools/testing/selftests/kvm/x86_64/pmu_event_filter_test.c
new file mode 100644
index 000000000000..3c85d1ae9893
--- /dev/null
+++ b/tools/testing/selftests/kvm/x86_64/pmu_event_filter_test.c
@@ -0,0 +1,910 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Test for x86 KVM_SET_PMU_EVENT_FILTER.
+ *
+ * Copyright (C) 2022, Google LLC.
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2.
+ *
+ * Verifies the expected behavior of allow lists and deny lists for
+ * virtual PMU events.
+ */
+
+#define _GNU_SOURCE /* for program_invocation_short_name */
+
+#include "kvm_util.h"
+#include "pmu.h"
+#include "processor.h"
+#include "test_util.h"
+
+#define NUM_BRANCHES 42
+#define MAX_TEST_EVENTS 10
+
+#define PMU_EVENT_FILTER_INVALID_ACTION (KVM_PMU_EVENT_DENY + 1)
+#define PMU_EVENT_FILTER_INVALID_FLAGS (KVM_PMU_EVENT_FLAGS_VALID_MASK << 1)
+#define PMU_EVENT_FILTER_INVALID_NEVENTS (KVM_PMU_EVENT_FILTER_MAX_EVENTS + 1)
+
+struct __kvm_pmu_event_filter {
+ __u32 action;
+ __u32 nevents;
+ __u32 fixed_counter_bitmap;
+ __u32 flags;
+ __u32 pad[4];
+ __u64 events[KVM_PMU_EVENT_FILTER_MAX_EVENTS];
+};
+
+/*
+ * This event list comprises Intel's known architectural events, plus AMD's
+ * "retired branch instructions" for Zen1-Zen3 (and* possibly other AMD CPUs).
+ * Note, AMD and Intel use the same encoding for instructions retired.
+ */
+kvm_static_assert(INTEL_ARCH_INSTRUCTIONS_RETIRED == AMD_ZEN_INSTRUCTIONS_RETIRED);
+
+static const struct __kvm_pmu_event_filter base_event_filter = {
+ .nevents = ARRAY_SIZE(base_event_filter.events),
+ .events = {
+ INTEL_ARCH_CPU_CYCLES,
+ INTEL_ARCH_INSTRUCTIONS_RETIRED,
+ INTEL_ARCH_REFERENCE_CYCLES,
+ INTEL_ARCH_LLC_REFERENCES,
+ INTEL_ARCH_LLC_MISSES,
+ INTEL_ARCH_BRANCHES_RETIRED,
+ INTEL_ARCH_BRANCHES_MISPREDICTED,
+ INTEL_ARCH_TOPDOWN_SLOTS,
+ AMD_ZEN_BRANCHES_RETIRED,
+ },
+};
+
+struct {
+ uint64_t loads;
+ uint64_t stores;
+ uint64_t loads_stores;
+ uint64_t branches_retired;
+ uint64_t instructions_retired;
+} pmc_results;
+
+/*
+ * If we encounter a #GP during the guest PMU sanity check, then the guest
+ * PMU is not functional. Inform the hypervisor via GUEST_SYNC(0).
+ */
+static void guest_gp_handler(struct ex_regs *regs)
+{
+ GUEST_SYNC(-EFAULT);
+}
+
+/*
+ * Check that we can write a new value to the given MSR and read it back.
+ * The caller should provide a non-empty set of bits that are safe to flip.
+ *
+ * Return on success. GUEST_SYNC(0) on error.
+ */
+static void check_msr(uint32_t msr, uint64_t bits_to_flip)
+{
+ uint64_t v = rdmsr(msr) ^ bits_to_flip;
+
+ wrmsr(msr, v);
+ if (rdmsr(msr) != v)
+ GUEST_SYNC(-EIO);
+
+ v ^= bits_to_flip;
+ wrmsr(msr, v);
+ if (rdmsr(msr) != v)
+ GUEST_SYNC(-EIO);
+}
+
+static void run_and_measure_loop(uint32_t msr_base)
+{
+ const uint64_t branches_retired = rdmsr(msr_base + 0);
+ const uint64_t insn_retired = rdmsr(msr_base + 1);
+
+ __asm__ __volatile__("loop ." : "+c"((int){NUM_BRANCHES}));
+
+ pmc_results.branches_retired = rdmsr(msr_base + 0) - branches_retired;
+ pmc_results.instructions_retired = rdmsr(msr_base + 1) - insn_retired;
+}
+
+static void intel_guest_code(void)
+{
+ check_msr(MSR_CORE_PERF_GLOBAL_CTRL, 1);
+ check_msr(MSR_P6_EVNTSEL0, 0xffff);
+ check_msr(MSR_IA32_PMC0, 0xffff);
+ GUEST_SYNC(0);
+
+ for (;;) {
+ wrmsr(MSR_CORE_PERF_GLOBAL_CTRL, 0);
+ wrmsr(MSR_P6_EVNTSEL0, ARCH_PERFMON_EVENTSEL_ENABLE |
+ ARCH_PERFMON_EVENTSEL_OS | INTEL_ARCH_BRANCHES_RETIRED);
+ wrmsr(MSR_P6_EVNTSEL1, ARCH_PERFMON_EVENTSEL_ENABLE |
+ ARCH_PERFMON_EVENTSEL_OS | INTEL_ARCH_INSTRUCTIONS_RETIRED);
+ wrmsr(MSR_CORE_PERF_GLOBAL_CTRL, 0x3);
+
+ run_and_measure_loop(MSR_IA32_PMC0);
+ GUEST_SYNC(0);
+ }
+}
+
+/*
+ * To avoid needing a check for CPUID.80000001:ECX.PerfCtrExtCore[bit 23],
+ * this code uses the always-available, legacy K7 PMU MSRs, which alias to
+ * the first four of the six extended core PMU MSRs.
+ */
+static void amd_guest_code(void)
+{
+ check_msr(MSR_K7_EVNTSEL0, 0xffff);
+ check_msr(MSR_K7_PERFCTR0, 0xffff);
+ GUEST_SYNC(0);
+
+ for (;;) {
+ wrmsr(MSR_K7_EVNTSEL0, 0);
+ wrmsr(MSR_K7_EVNTSEL0, ARCH_PERFMON_EVENTSEL_ENABLE |
+ ARCH_PERFMON_EVENTSEL_OS | AMD_ZEN_BRANCHES_RETIRED);
+ wrmsr(MSR_K7_EVNTSEL1, ARCH_PERFMON_EVENTSEL_ENABLE |
+ ARCH_PERFMON_EVENTSEL_OS | AMD_ZEN_INSTRUCTIONS_RETIRED);
+
+ run_and_measure_loop(MSR_K7_PERFCTR0);
+ GUEST_SYNC(0);
+ }
+}
+
+/*
+ * Run the VM to the next GUEST_SYNC(value), and return the value passed
+ * to the sync. Any other exit from the guest is fatal.
+ */
+static uint64_t run_vcpu_to_sync(struct kvm_vcpu *vcpu)
+{
+ struct ucall uc;
+
+ vcpu_run(vcpu);
+ TEST_ASSERT_KVM_EXIT_REASON(vcpu, KVM_EXIT_IO);
+ get_ucall(vcpu, &uc);
+ TEST_ASSERT(uc.cmd == UCALL_SYNC,
+ "Received ucall other than UCALL_SYNC: %lu", uc.cmd);
+ return uc.args[1];
+}
+
+static void run_vcpu_and_sync_pmc_results(struct kvm_vcpu *vcpu)
+{
+ uint64_t r;
+
+ memset(&pmc_results, 0, sizeof(pmc_results));
+ sync_global_to_guest(vcpu->vm, pmc_results);
+
+ r = run_vcpu_to_sync(vcpu);
+ TEST_ASSERT(!r, "Unexpected sync value: 0x%lx", r);
+
+ sync_global_from_guest(vcpu->vm, pmc_results);
+}
+
+/*
+ * In a nested environment or if the vPMU is disabled, the guest PMU
+ * might not work as architected (accessing the PMU MSRs may raise
+ * #GP, or writes could simply be discarded). In those situations,
+ * there is no point in running these tests. The guest code will perform
+ * a sanity check and then GUEST_SYNC(success). In the case of failure,
+ * the behavior of the guest on resumption is undefined.
+ */
+static bool sanity_check_pmu(struct kvm_vcpu *vcpu)
+{
+ uint64_t r;
+
+ vm_install_exception_handler(vcpu->vm, GP_VECTOR, guest_gp_handler);
+ r = run_vcpu_to_sync(vcpu);
+ vm_install_exception_handler(vcpu->vm, GP_VECTOR, NULL);
+
+ return !r;
+}
+
+/*
+ * Remove the first occurrence of 'event' (if any) from the filter's
+ * event list.
+ */
+static void remove_event(struct __kvm_pmu_event_filter *f, uint64_t event)
+{
+ bool found = false;
+ int i;
+
+ for (i = 0; i < f->nevents; i++) {
+ if (found)
+ f->events[i - 1] = f->events[i];
+ else
+ found = f->events[i] == event;
+ }
+ if (found)
+ f->nevents--;
+}
+
+#define ASSERT_PMC_COUNTING_INSTRUCTIONS() \
+do { \
+ uint64_t br = pmc_results.branches_retired; \
+ uint64_t ir = pmc_results.instructions_retired; \
+ \
+ if (br && br != NUM_BRANCHES) \
+ pr_info("%s: Branch instructions retired = %lu (expected %u)\n", \
+ __func__, br, NUM_BRANCHES); \
+ TEST_ASSERT(br, "%s: Branch instructions retired = %lu (expected > 0)", \
+ __func__, br); \
+ TEST_ASSERT(ir, "%s: Instructions retired = %lu (expected > 0)", \
+ __func__, ir); \
+} while (0)
+
+#define ASSERT_PMC_NOT_COUNTING_INSTRUCTIONS() \
+do { \
+ uint64_t br = pmc_results.branches_retired; \
+ uint64_t ir = pmc_results.instructions_retired; \
+ \
+ TEST_ASSERT(!br, "%s: Branch instructions retired = %lu (expected 0)", \
+ __func__, br); \
+ TEST_ASSERT(!ir, "%s: Instructions retired = %lu (expected 0)", \
+ __func__, ir); \
+} while (0)
+
+static void test_without_filter(struct kvm_vcpu *vcpu)
+{
+ run_vcpu_and_sync_pmc_results(vcpu);
+
+ ASSERT_PMC_COUNTING_INSTRUCTIONS();
+}
+
+static void test_with_filter(struct kvm_vcpu *vcpu,
+ struct __kvm_pmu_event_filter *__f)
+{
+ struct kvm_pmu_event_filter *f = (void *)__f;
+
+ vm_ioctl(vcpu->vm, KVM_SET_PMU_EVENT_FILTER, f);
+ run_vcpu_and_sync_pmc_results(vcpu);
+}
+
+static void test_amd_deny_list(struct kvm_vcpu *vcpu)
+{
+ struct __kvm_pmu_event_filter f = {
+ .action = KVM_PMU_EVENT_DENY,
+ .nevents = 1,
+ .events = {
+ RAW_EVENT(0x1C2, 0),
+ },
+ };
+
+ test_with_filter(vcpu, &f);
+
+ ASSERT_PMC_COUNTING_INSTRUCTIONS();
+}
+
+static void test_member_deny_list(struct kvm_vcpu *vcpu)
+{
+ struct __kvm_pmu_event_filter f = base_event_filter;
+
+ f.action = KVM_PMU_EVENT_DENY;
+ test_with_filter(vcpu, &f);
+
+ ASSERT_PMC_NOT_COUNTING_INSTRUCTIONS();
+}
+
+static void test_member_allow_list(struct kvm_vcpu *vcpu)
+{
+ struct __kvm_pmu_event_filter f = base_event_filter;
+
+ f.action = KVM_PMU_EVENT_ALLOW;
+ test_with_filter(vcpu, &f);
+
+ ASSERT_PMC_COUNTING_INSTRUCTIONS();
+}
+
+static void test_not_member_deny_list(struct kvm_vcpu *vcpu)
+{
+ struct __kvm_pmu_event_filter f = base_event_filter;
+
+ f.action = KVM_PMU_EVENT_DENY;
+
+ remove_event(&f, INTEL_ARCH_INSTRUCTIONS_RETIRED);
+ remove_event(&f, INTEL_ARCH_BRANCHES_RETIRED);
+ remove_event(&f, AMD_ZEN_BRANCHES_RETIRED);
+ test_with_filter(vcpu, &f);
+
+ ASSERT_PMC_COUNTING_INSTRUCTIONS();
+}
+
+static void test_not_member_allow_list(struct kvm_vcpu *vcpu)
+{
+ struct __kvm_pmu_event_filter f = base_event_filter;
+
+ f.action = KVM_PMU_EVENT_ALLOW;
+
+ remove_event(&f, INTEL_ARCH_INSTRUCTIONS_RETIRED);
+ remove_event(&f, INTEL_ARCH_BRANCHES_RETIRED);
+ remove_event(&f, AMD_ZEN_BRANCHES_RETIRED);
+ test_with_filter(vcpu, &f);
+
+ ASSERT_PMC_NOT_COUNTING_INSTRUCTIONS();
+}
+
+/*
+ * Verify that setting KVM_PMU_CAP_DISABLE prevents the use of the PMU.
+ *
+ * Note that KVM_CAP_PMU_CAPABILITY must be invoked prior to creating VCPUs.
+ */
+static void test_pmu_config_disable(void (*guest_code)(void))
+{
+ struct kvm_vcpu *vcpu;
+ int r;
+ struct kvm_vm *vm;
+
+ r = kvm_check_cap(KVM_CAP_PMU_CAPABILITY);
+ if (!(r & KVM_PMU_CAP_DISABLE))
+ return;
+
+ vm = vm_create(1);
+
+ vm_enable_cap(vm, KVM_CAP_PMU_CAPABILITY, KVM_PMU_CAP_DISABLE);
+
+ vcpu = vm_vcpu_add(vm, 0, guest_code);
+ vm_init_descriptor_tables(vm);
+ vcpu_init_descriptor_tables(vcpu);
+
+ TEST_ASSERT(!sanity_check_pmu(vcpu),
+ "Guest should not be able to use disabled PMU.");
+
+ kvm_vm_free(vm);
+}
+
+/*
+ * On Intel, check for a non-zero PMU version, at least one general-purpose
+ * counter per logical processor, and support for counting the number of branch
+ * instructions retired.
+ */
+static bool use_intel_pmu(void)
+{
+ return host_cpu_is_intel &&
+ kvm_cpu_property(X86_PROPERTY_PMU_VERSION) &&
+ kvm_cpu_property(X86_PROPERTY_PMU_NR_GP_COUNTERS) &&
+ kvm_pmu_has(X86_PMU_FEATURE_BRANCH_INSNS_RETIRED);
+}
+
+static bool is_zen1(uint32_t family, uint32_t model)
+{
+ return family == 0x17 && model <= 0x0f;
+}
+
+static bool is_zen2(uint32_t family, uint32_t model)
+{
+ return family == 0x17 && model >= 0x30 && model <= 0x3f;
+}
+
+static bool is_zen3(uint32_t family, uint32_t model)
+{
+ return family == 0x19 && model <= 0x0f;
+}
+
+/*
+ * Determining AMD support for a PMU event requires consulting the AMD
+ * PPR for the CPU or reference material derived therefrom. The AMD
+ * test code herein has been verified to work on Zen1, Zen2, and Zen3.
+ *
+ * Feel free to add more AMD CPUs that are documented to support event
+ * select 0xc2 umask 0 as "retired branch instructions."
+ */
+static bool use_amd_pmu(void)
+{
+ uint32_t family = kvm_cpu_family();
+ uint32_t model = kvm_cpu_model();
+
+ return host_cpu_is_amd &&
+ (is_zen1(family, model) ||
+ is_zen2(family, model) ||
+ is_zen3(family, model));
+}
+
+/*
+ * "MEM_INST_RETIRED.ALL_LOADS", "MEM_INST_RETIRED.ALL_STORES", and
+ * "MEM_INST_RETIRED.ANY" from https://perfmon-events.intel.com/
+ * supported on Intel Xeon processors:
+ * - Sapphire Rapids, Ice Lake, Cascade Lake, Skylake.
+ */
+#define MEM_INST_RETIRED 0xD0
+#define MEM_INST_RETIRED_LOAD RAW_EVENT(MEM_INST_RETIRED, 0x81)
+#define MEM_INST_RETIRED_STORE RAW_EVENT(MEM_INST_RETIRED, 0x82)
+#define MEM_INST_RETIRED_LOAD_STORE RAW_EVENT(MEM_INST_RETIRED, 0x83)
+
+static bool supports_event_mem_inst_retired(void)
+{
+ uint32_t eax, ebx, ecx, edx;
+
+ cpuid(1, &eax, &ebx, &ecx, &edx);
+ if (x86_family(eax) == 0x6) {
+ switch (x86_model(eax)) {
+ /* Sapphire Rapids */
+ case 0x8F:
+ /* Ice Lake */
+ case 0x6A:
+ /* Skylake */
+ /* Cascade Lake */
+ case 0x55:
+ return true;
+ }
+ }
+
+ return false;
+}
+
+/*
+ * "LS Dispatch", from Processor Programming Reference
+ * (PPR) for AMD Family 17h Model 01h, Revision B1 Processors,
+ * Preliminary Processor Programming Reference (PPR) for AMD Family
+ * 17h Model 31h, Revision B0 Processors, and Preliminary Processor
+ * Programming Reference (PPR) for AMD Family 19h Model 01h, Revision
+ * B1 Processors Volume 1 of 2.
+ */
+#define LS_DISPATCH 0x29
+#define LS_DISPATCH_LOAD RAW_EVENT(LS_DISPATCH, BIT(0))
+#define LS_DISPATCH_STORE RAW_EVENT(LS_DISPATCH, BIT(1))
+#define LS_DISPATCH_LOAD_STORE RAW_EVENT(LS_DISPATCH, BIT(2))
+
+#define INCLUDE_MASKED_ENTRY(event_select, mask, match) \
+ KVM_PMU_ENCODE_MASKED_ENTRY(event_select, mask, match, false)
+#define EXCLUDE_MASKED_ENTRY(event_select, mask, match) \
+ KVM_PMU_ENCODE_MASKED_ENTRY(event_select, mask, match, true)
+
+static void masked_events_guest_test(uint32_t msr_base)
+{
+ /*
+ * The actual value of the counters don't determine the outcome of
+ * the test. Only that they are zero or non-zero.
+ */
+ const uint64_t loads = rdmsr(msr_base + 0);
+ const uint64_t stores = rdmsr(msr_base + 1);
+ const uint64_t loads_stores = rdmsr(msr_base + 2);
+ int val;
+
+
+ __asm__ __volatile__("movl $0, %[v];"
+ "movl %[v], %%eax;"
+ "incl %[v];"
+ : [v]"+m"(val) :: "eax");
+
+ pmc_results.loads = rdmsr(msr_base + 0) - loads;
+ pmc_results.stores = rdmsr(msr_base + 1) - stores;
+ pmc_results.loads_stores = rdmsr(msr_base + 2) - loads_stores;
+}
+
+static void intel_masked_events_guest_code(void)
+{
+ for (;;) {
+ wrmsr(MSR_CORE_PERF_GLOBAL_CTRL, 0);
+
+ wrmsr(MSR_P6_EVNTSEL0 + 0, ARCH_PERFMON_EVENTSEL_ENABLE |
+ ARCH_PERFMON_EVENTSEL_OS | MEM_INST_RETIRED_LOAD);
+ wrmsr(MSR_P6_EVNTSEL0 + 1, ARCH_PERFMON_EVENTSEL_ENABLE |
+ ARCH_PERFMON_EVENTSEL_OS | MEM_INST_RETIRED_STORE);
+ wrmsr(MSR_P6_EVNTSEL0 + 2, ARCH_PERFMON_EVENTSEL_ENABLE |
+ ARCH_PERFMON_EVENTSEL_OS | MEM_INST_RETIRED_LOAD_STORE);
+
+ wrmsr(MSR_CORE_PERF_GLOBAL_CTRL, 0x7);
+
+ masked_events_guest_test(MSR_IA32_PMC0);
+ GUEST_SYNC(0);
+ }
+}
+
+static void amd_masked_events_guest_code(void)
+{
+ for (;;) {
+ wrmsr(MSR_K7_EVNTSEL0, 0);
+ wrmsr(MSR_K7_EVNTSEL1, 0);
+ wrmsr(MSR_K7_EVNTSEL2, 0);
+
+ wrmsr(MSR_K7_EVNTSEL0, ARCH_PERFMON_EVENTSEL_ENABLE |
+ ARCH_PERFMON_EVENTSEL_OS | LS_DISPATCH_LOAD);
+ wrmsr(MSR_K7_EVNTSEL1, ARCH_PERFMON_EVENTSEL_ENABLE |
+ ARCH_PERFMON_EVENTSEL_OS | LS_DISPATCH_STORE);
+ wrmsr(MSR_K7_EVNTSEL2, ARCH_PERFMON_EVENTSEL_ENABLE |
+ ARCH_PERFMON_EVENTSEL_OS | LS_DISPATCH_LOAD_STORE);
+
+ masked_events_guest_test(MSR_K7_PERFCTR0);
+ GUEST_SYNC(0);
+ }
+}
+
+static void run_masked_events_test(struct kvm_vcpu *vcpu,
+ const uint64_t masked_events[],
+ const int nmasked_events)
+{
+ struct __kvm_pmu_event_filter f = {
+ .nevents = nmasked_events,
+ .action = KVM_PMU_EVENT_ALLOW,
+ .flags = KVM_PMU_EVENT_FLAG_MASKED_EVENTS,
+ };
+
+ memcpy(f.events, masked_events, sizeof(uint64_t) * nmasked_events);
+ test_with_filter(vcpu, &f);
+}
+
+#define ALLOW_LOADS BIT(0)
+#define ALLOW_STORES BIT(1)
+#define ALLOW_LOADS_STORES BIT(2)
+
+struct masked_events_test {
+ uint64_t intel_events[MAX_TEST_EVENTS];
+ uint64_t intel_event_end;
+ uint64_t amd_events[MAX_TEST_EVENTS];
+ uint64_t amd_event_end;
+ const char *msg;
+ uint32_t flags;
+};
+
+/*
+ * These are the test cases for the masked events tests.
+ *
+ * For each test, the guest enables 3 PMU counters (loads, stores,
+ * loads + stores). The filter is then set in KVM with the masked events
+ * provided. The test then verifies that the counters agree with which
+ * ones should be counting and which ones should be filtered.
+ */
+const struct masked_events_test test_cases[] = {
+ {
+ .intel_events = {
+ INCLUDE_MASKED_ENTRY(MEM_INST_RETIRED, 0xFF, 0x81),
+ },
+ .amd_events = {
+ INCLUDE_MASKED_ENTRY(LS_DISPATCH, 0xFF, BIT(0)),
+ },
+ .msg = "Only allow loads.",
+ .flags = ALLOW_LOADS,
+ }, {
+ .intel_events = {
+ INCLUDE_MASKED_ENTRY(MEM_INST_RETIRED, 0xFF, 0x82),
+ },
+ .amd_events = {
+ INCLUDE_MASKED_ENTRY(LS_DISPATCH, 0xFF, BIT(1)),
+ },
+ .msg = "Only allow stores.",
+ .flags = ALLOW_STORES,
+ }, {
+ .intel_events = {
+ INCLUDE_MASKED_ENTRY(MEM_INST_RETIRED, 0xFF, 0x83),
+ },
+ .amd_events = {
+ INCLUDE_MASKED_ENTRY(LS_DISPATCH, 0xFF, BIT(2)),
+ },
+ .msg = "Only allow loads + stores.",
+ .flags = ALLOW_LOADS_STORES,
+ }, {
+ .intel_events = {
+ INCLUDE_MASKED_ENTRY(MEM_INST_RETIRED, 0x7C, 0),
+ EXCLUDE_MASKED_ENTRY(MEM_INST_RETIRED, 0xFF, 0x83),
+ },
+ .amd_events = {
+ INCLUDE_MASKED_ENTRY(LS_DISPATCH, ~(BIT(0) | BIT(1)), 0),
+ },
+ .msg = "Only allow loads and stores.",
+ .flags = ALLOW_LOADS | ALLOW_STORES,
+ }, {
+ .intel_events = {
+ INCLUDE_MASKED_ENTRY(MEM_INST_RETIRED, 0x7C, 0),
+ EXCLUDE_MASKED_ENTRY(MEM_INST_RETIRED, 0xFF, 0x82),
+ },
+ .amd_events = {
+ INCLUDE_MASKED_ENTRY(LS_DISPATCH, 0xF8, 0),
+ EXCLUDE_MASKED_ENTRY(LS_DISPATCH, 0xFF, BIT(1)),
+ },
+ .msg = "Only allow loads and loads + stores.",
+ .flags = ALLOW_LOADS | ALLOW_LOADS_STORES
+ }, {
+ .intel_events = {
+ INCLUDE_MASKED_ENTRY(MEM_INST_RETIRED, 0xFE, 0x82),
+ },
+ .amd_events = {
+ INCLUDE_MASKED_ENTRY(LS_DISPATCH, 0xF8, 0),
+ EXCLUDE_MASKED_ENTRY(LS_DISPATCH, 0xFF, BIT(0)),
+ },
+ .msg = "Only allow stores and loads + stores.",
+ .flags = ALLOW_STORES | ALLOW_LOADS_STORES
+ }, {
+ .intel_events = {
+ INCLUDE_MASKED_ENTRY(MEM_INST_RETIRED, 0x7C, 0),
+ },
+ .amd_events = {
+ INCLUDE_MASKED_ENTRY(LS_DISPATCH, 0xF8, 0),
+ },
+ .msg = "Only allow loads, stores, and loads + stores.",
+ .flags = ALLOW_LOADS | ALLOW_STORES | ALLOW_LOADS_STORES
+ },
+};
+
+static int append_test_events(const struct masked_events_test *test,
+ uint64_t *events, int nevents)
+{
+ const uint64_t *evts;
+ int i;
+
+ evts = use_intel_pmu() ? test->intel_events : test->amd_events;
+ for (i = 0; i < MAX_TEST_EVENTS; i++) {
+ if (evts[i] == 0)
+ break;
+
+ events[nevents + i] = evts[i];
+ }
+
+ return nevents + i;
+}
+
+static bool bool_eq(bool a, bool b)
+{
+ return a == b;
+}
+
+static void run_masked_events_tests(struct kvm_vcpu *vcpu, uint64_t *events,
+ int nevents)
+{
+ int ntests = ARRAY_SIZE(test_cases);
+ int i, n;
+
+ for (i = 0; i < ntests; i++) {
+ const struct masked_events_test *test = &test_cases[i];
+
+ /* Do any test case events overflow MAX_TEST_EVENTS? */
+ assert(test->intel_event_end == 0);
+ assert(test->amd_event_end == 0);
+
+ n = append_test_events(test, events, nevents);
+
+ run_masked_events_test(vcpu, events, n);
+
+ TEST_ASSERT(bool_eq(pmc_results.loads, test->flags & ALLOW_LOADS) &&
+ bool_eq(pmc_results.stores, test->flags & ALLOW_STORES) &&
+ bool_eq(pmc_results.loads_stores,
+ test->flags & ALLOW_LOADS_STORES),
+ "%s loads: %lu, stores: %lu, loads + stores: %lu",
+ test->msg, pmc_results.loads, pmc_results.stores,
+ pmc_results.loads_stores);
+ }
+}
+
+static void add_dummy_events(uint64_t *events, int nevents)
+{
+ int i;
+
+ for (i = 0; i < nevents; i++) {
+ int event_select = i % 0xFF;
+ bool exclude = ((i % 4) == 0);
+
+ if (event_select == MEM_INST_RETIRED ||
+ event_select == LS_DISPATCH)
+ event_select++;
+
+ events[i] = KVM_PMU_ENCODE_MASKED_ENTRY(event_select, 0,
+ 0, exclude);
+ }
+}
+
+static void test_masked_events(struct kvm_vcpu *vcpu)
+{
+ int nevents = KVM_PMU_EVENT_FILTER_MAX_EVENTS - MAX_TEST_EVENTS;
+ uint64_t events[KVM_PMU_EVENT_FILTER_MAX_EVENTS];
+
+ /* Run the test cases against a sparse PMU event filter. */
+ run_masked_events_tests(vcpu, events, 0);
+
+ /* Run the test cases against a dense PMU event filter. */
+ add_dummy_events(events, KVM_PMU_EVENT_FILTER_MAX_EVENTS);
+ run_masked_events_tests(vcpu, events, nevents);
+}
+
+static int set_pmu_event_filter(struct kvm_vcpu *vcpu,
+ struct __kvm_pmu_event_filter *__f)
+{
+ struct kvm_pmu_event_filter *f = (void *)__f;
+
+ return __vm_ioctl(vcpu->vm, KVM_SET_PMU_EVENT_FILTER, f);
+}
+
+static int set_pmu_single_event_filter(struct kvm_vcpu *vcpu, uint64_t event,
+ uint32_t flags, uint32_t action)
+{
+ struct __kvm_pmu_event_filter f = {
+ .nevents = 1,
+ .flags = flags,
+ .action = action,
+ .events = {
+ event,
+ },
+ };
+
+ return set_pmu_event_filter(vcpu, &f);
+}
+
+static void test_filter_ioctl(struct kvm_vcpu *vcpu)
+{
+ uint8_t nr_fixed_counters = kvm_cpu_property(X86_PROPERTY_PMU_NR_FIXED_COUNTERS);
+ struct __kvm_pmu_event_filter f;
+ uint64_t e = ~0ul;
+ int r;
+
+ /*
+ * Unfortunately having invalid bits set in event data is expected to
+ * pass when flags == 0 (bits other than eventsel+umask).
+ */
+ r = set_pmu_single_event_filter(vcpu, e, 0, KVM_PMU_EVENT_ALLOW);
+ TEST_ASSERT(r == 0, "Valid PMU Event Filter is failing");
+
+ r = set_pmu_single_event_filter(vcpu, e,
+ KVM_PMU_EVENT_FLAG_MASKED_EVENTS,
+ KVM_PMU_EVENT_ALLOW);
+ TEST_ASSERT(r != 0, "Invalid PMU Event Filter is expected to fail");
+
+ e = KVM_PMU_ENCODE_MASKED_ENTRY(0xff, 0xff, 0xff, 0xf);
+ r = set_pmu_single_event_filter(vcpu, e,
+ KVM_PMU_EVENT_FLAG_MASKED_EVENTS,
+ KVM_PMU_EVENT_ALLOW);
+ TEST_ASSERT(r == 0, "Valid PMU Event Filter is failing");
+
+ f = base_event_filter;
+ f.action = PMU_EVENT_FILTER_INVALID_ACTION;
+ r = set_pmu_event_filter(vcpu, &f);
+ TEST_ASSERT(r, "Set invalid action is expected to fail");
+
+ f = base_event_filter;
+ f.flags = PMU_EVENT_FILTER_INVALID_FLAGS;
+ r = set_pmu_event_filter(vcpu, &f);
+ TEST_ASSERT(r, "Set invalid flags is expected to fail");
+
+ f = base_event_filter;
+ f.nevents = PMU_EVENT_FILTER_INVALID_NEVENTS;
+ r = set_pmu_event_filter(vcpu, &f);
+ TEST_ASSERT(r, "Exceeding the max number of filter events should fail");
+
+ f = base_event_filter;
+ f.fixed_counter_bitmap = ~GENMASK_ULL(nr_fixed_counters, 0);
+ r = set_pmu_event_filter(vcpu, &f);
+ TEST_ASSERT(!r, "Masking non-existent fixed counters should be allowed");
+}
+
+static void intel_run_fixed_counter_guest_code(uint8_t idx)
+{
+ for (;;) {
+ wrmsr(MSR_CORE_PERF_GLOBAL_CTRL, 0);
+ wrmsr(MSR_CORE_PERF_FIXED_CTR0 + idx, 0);
+
+ /* Only OS_EN bit is enabled for fixed counter[idx]. */
+ wrmsr(MSR_CORE_PERF_FIXED_CTR_CTRL, FIXED_PMC_CTRL(idx, FIXED_PMC_KERNEL));
+ wrmsr(MSR_CORE_PERF_GLOBAL_CTRL, FIXED_PMC_GLOBAL_CTRL_ENABLE(idx));
+ __asm__ __volatile__("loop ." : "+c"((int){NUM_BRANCHES}));
+ wrmsr(MSR_CORE_PERF_GLOBAL_CTRL, 0);
+
+ GUEST_SYNC(rdmsr(MSR_CORE_PERF_FIXED_CTR0 + idx));
+ }
+}
+
+static uint64_t test_with_fixed_counter_filter(struct kvm_vcpu *vcpu,
+ uint32_t action, uint32_t bitmap)
+{
+ struct __kvm_pmu_event_filter f = {
+ .action = action,
+ .fixed_counter_bitmap = bitmap,
+ };
+ set_pmu_event_filter(vcpu, &f);
+
+ return run_vcpu_to_sync(vcpu);
+}
+
+static uint64_t test_set_gp_and_fixed_event_filter(struct kvm_vcpu *vcpu,
+ uint32_t action,
+ uint32_t bitmap)
+{
+ struct __kvm_pmu_event_filter f = base_event_filter;
+
+ f.action = action;
+ f.fixed_counter_bitmap = bitmap;
+ set_pmu_event_filter(vcpu, &f);
+
+ return run_vcpu_to_sync(vcpu);
+}
+
+static void __test_fixed_counter_bitmap(struct kvm_vcpu *vcpu, uint8_t idx,
+ uint8_t nr_fixed_counters)
+{
+ unsigned int i;
+ uint32_t bitmap;
+ uint64_t count;
+
+ TEST_ASSERT(nr_fixed_counters < sizeof(bitmap) * 8,
+ "Invalid nr_fixed_counters");
+
+ /*
+ * Check the fixed performance counter can count normally when KVM
+ * userspace doesn't set any pmu filter.
+ */
+ count = run_vcpu_to_sync(vcpu);
+ TEST_ASSERT(count, "Unexpected count value: %ld", count);
+
+ for (i = 0; i < BIT(nr_fixed_counters); i++) {
+ bitmap = BIT(i);
+ count = test_with_fixed_counter_filter(vcpu, KVM_PMU_EVENT_ALLOW,
+ bitmap);
+ TEST_ASSERT_EQ(!!count, !!(bitmap & BIT(idx)));
+
+ count = test_with_fixed_counter_filter(vcpu, KVM_PMU_EVENT_DENY,
+ bitmap);
+ TEST_ASSERT_EQ(!!count, !(bitmap & BIT(idx)));
+
+ /*
+ * Check that fixed_counter_bitmap has higher priority than
+ * events[] when both are set.
+ */
+ count = test_set_gp_and_fixed_event_filter(vcpu,
+ KVM_PMU_EVENT_ALLOW,
+ bitmap);
+ TEST_ASSERT_EQ(!!count, !!(bitmap & BIT(idx)));
+
+ count = test_set_gp_and_fixed_event_filter(vcpu,
+ KVM_PMU_EVENT_DENY,
+ bitmap);
+ TEST_ASSERT_EQ(!!count, !(bitmap & BIT(idx)));
+ }
+}
+
+static void test_fixed_counter_bitmap(void)
+{
+ uint8_t nr_fixed_counters = kvm_cpu_property(X86_PROPERTY_PMU_NR_FIXED_COUNTERS);
+ struct kvm_vm *vm;
+ struct kvm_vcpu *vcpu;
+ uint8_t idx;
+
+ /*
+ * Check that pmu_event_filter works as expected when it's applied to
+ * fixed performance counters.
+ */
+ for (idx = 0; idx < nr_fixed_counters; idx++) {
+ vm = vm_create_with_one_vcpu(&vcpu,
+ intel_run_fixed_counter_guest_code);
+ vcpu_args_set(vcpu, 1, idx);
+ __test_fixed_counter_bitmap(vcpu, idx, nr_fixed_counters);
+ kvm_vm_free(vm);
+ }
+}
+
+int main(int argc, char *argv[])
+{
+ void (*guest_code)(void);
+ struct kvm_vcpu *vcpu, *vcpu2 = NULL;
+ struct kvm_vm *vm;
+
+ TEST_REQUIRE(kvm_is_pmu_enabled());
+ TEST_REQUIRE(kvm_has_cap(KVM_CAP_PMU_EVENT_FILTER));
+ TEST_REQUIRE(kvm_has_cap(KVM_CAP_PMU_EVENT_MASKED_EVENTS));
+
+ TEST_REQUIRE(use_intel_pmu() || use_amd_pmu());
+ guest_code = use_intel_pmu() ? intel_guest_code : amd_guest_code;
+
+ vm = vm_create_with_one_vcpu(&vcpu, guest_code);
+
+ vm_init_descriptor_tables(vm);
+ vcpu_init_descriptor_tables(vcpu);
+
+ TEST_REQUIRE(sanity_check_pmu(vcpu));
+
+ if (use_amd_pmu())
+ test_amd_deny_list(vcpu);
+
+ test_without_filter(vcpu);
+ test_member_deny_list(vcpu);
+ test_member_allow_list(vcpu);
+ test_not_member_deny_list(vcpu);
+ test_not_member_allow_list(vcpu);
+
+ if (use_intel_pmu() &&
+ supports_event_mem_inst_retired() &&
+ kvm_cpu_property(X86_PROPERTY_PMU_NR_GP_COUNTERS) >= 3)
+ vcpu2 = vm_vcpu_add(vm, 2, intel_masked_events_guest_code);
+ else if (use_amd_pmu())
+ vcpu2 = vm_vcpu_add(vm, 2, amd_masked_events_guest_code);
+
+ if (vcpu2)
+ test_masked_events(vcpu2);
+ test_filter_ioctl(vcpu);
+
+ kvm_vm_free(vm);
+
+ test_pmu_config_disable(guest_code);
+ test_fixed_counter_bitmap();
+
+ return 0;
+}
diff --git a/tools/testing/selftests/kvm/x86_64/private_mem_conversions_test.c b/tools/testing/selftests/kvm/x86_64/private_mem_conversions_test.c
new file mode 100644
index 000000000000..e0f642d2a3c4
--- /dev/null
+++ b/tools/testing/selftests/kvm/x86_64/private_mem_conversions_test.c
@@ -0,0 +1,484 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright (C) 2022, Google LLC.
+ */
+#define _GNU_SOURCE /* for program_invocation_short_name */
+#include <fcntl.h>
+#include <limits.h>
+#include <pthread.h>
+#include <sched.h>
+#include <signal.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <sys/ioctl.h>
+
+#include <linux/compiler.h>
+#include <linux/kernel.h>
+#include <linux/kvm_para.h>
+#include <linux/memfd.h>
+#include <linux/sizes.h>
+
+#include <test_util.h>
+#include <kvm_util.h>
+#include <processor.h>
+
+#define BASE_DATA_SLOT 10
+#define BASE_DATA_GPA ((uint64_t)(1ull << 32))
+#define PER_CPU_DATA_SIZE ((uint64_t)(SZ_2M + PAGE_SIZE))
+
+/* Horrific macro so that the line info is captured accurately :-( */
+#define memcmp_g(gpa, pattern, size) \
+do { \
+ uint8_t *mem = (uint8_t *)gpa; \
+ size_t i; \
+ \
+ for (i = 0; i < size; i++) \
+ __GUEST_ASSERT(mem[i] == pattern, \
+ "Guest expected 0x%x at offset %lu (gpa 0x%lx), got 0x%x", \
+ pattern, i, gpa + i, mem[i]); \
+} while (0)
+
+static void memcmp_h(uint8_t *mem, uint64_t gpa, uint8_t pattern, size_t size)
+{
+ size_t i;
+
+ for (i = 0; i < size; i++)
+ TEST_ASSERT(mem[i] == pattern,
+ "Host expected 0x%x at gpa 0x%lx, got 0x%x",
+ pattern, gpa + i, mem[i]);
+}
+
+/*
+ * Run memory conversion tests with explicit conversion:
+ * Execute KVM hypercall to map/unmap gpa range which will cause userspace exit
+ * to back/unback private memory. Subsequent accesses by guest to the gpa range
+ * will not cause exit to userspace.
+ *
+ * Test memory conversion scenarios with following steps:
+ * 1) Access private memory using private access and verify that memory contents
+ * are not visible to userspace.
+ * 2) Convert memory to shared using explicit conversions and ensure that
+ * userspace is able to access the shared regions.
+ * 3) Convert memory back to private using explicit conversions and ensure that
+ * userspace is again not able to access converted private regions.
+ */
+
+#define GUEST_STAGE(o, s) { .offset = o, .size = s }
+
+enum ucall_syncs {
+ SYNC_SHARED,
+ SYNC_PRIVATE,
+};
+
+static void guest_sync_shared(uint64_t gpa, uint64_t size,
+ uint8_t current_pattern, uint8_t new_pattern)
+{
+ GUEST_SYNC5(SYNC_SHARED, gpa, size, current_pattern, new_pattern);
+}
+
+static void guest_sync_private(uint64_t gpa, uint64_t size, uint8_t pattern)
+{
+ GUEST_SYNC4(SYNC_PRIVATE, gpa, size, pattern);
+}
+
+/* Arbitrary values, KVM doesn't care about the attribute flags. */
+#define MAP_GPA_SET_ATTRIBUTES BIT(0)
+#define MAP_GPA_SHARED BIT(1)
+#define MAP_GPA_DO_FALLOCATE BIT(2)
+
+static void guest_map_mem(uint64_t gpa, uint64_t size, bool map_shared,
+ bool do_fallocate)
+{
+ uint64_t flags = MAP_GPA_SET_ATTRIBUTES;
+
+ if (map_shared)
+ flags |= MAP_GPA_SHARED;
+ if (do_fallocate)
+ flags |= MAP_GPA_DO_FALLOCATE;
+ kvm_hypercall_map_gpa_range(gpa, size, flags);
+}
+
+static void guest_map_shared(uint64_t gpa, uint64_t size, bool do_fallocate)
+{
+ guest_map_mem(gpa, size, true, do_fallocate);
+}
+
+static void guest_map_private(uint64_t gpa, uint64_t size, bool do_fallocate)
+{
+ guest_map_mem(gpa, size, false, do_fallocate);
+}
+
+struct {
+ uint64_t offset;
+ uint64_t size;
+} static const test_ranges[] = {
+ GUEST_STAGE(0, PAGE_SIZE),
+ GUEST_STAGE(0, SZ_2M),
+ GUEST_STAGE(PAGE_SIZE, PAGE_SIZE),
+ GUEST_STAGE(PAGE_SIZE, SZ_2M),
+ GUEST_STAGE(SZ_2M, PAGE_SIZE),
+};
+
+static void guest_test_explicit_conversion(uint64_t base_gpa, bool do_fallocate)
+{
+ const uint8_t def_p = 0xaa;
+ const uint8_t init_p = 0xcc;
+ uint64_t j;
+ int i;
+
+ /* Memory should be shared by default. */
+ memset((void *)base_gpa, def_p, PER_CPU_DATA_SIZE);
+ memcmp_g(base_gpa, def_p, PER_CPU_DATA_SIZE);
+ guest_sync_shared(base_gpa, PER_CPU_DATA_SIZE, def_p, init_p);
+
+ memcmp_g(base_gpa, init_p, PER_CPU_DATA_SIZE);
+
+ for (i = 0; i < ARRAY_SIZE(test_ranges); i++) {
+ uint64_t gpa = base_gpa + test_ranges[i].offset;
+ uint64_t size = test_ranges[i].size;
+ uint8_t p1 = 0x11;
+ uint8_t p2 = 0x22;
+ uint8_t p3 = 0x33;
+ uint8_t p4 = 0x44;
+
+ /*
+ * Set the test region to pattern one to differentiate it from
+ * the data range as a whole (contains the initial pattern).
+ */
+ memset((void *)gpa, p1, size);
+
+ /*
+ * Convert to private, set and verify the private data, and
+ * then verify that the rest of the data (map shared) still
+ * holds the initial pattern, and that the host always sees the
+ * shared memory (initial pattern). Unlike shared memory,
+ * punching a hole in private memory is destructive, i.e.
+ * previous values aren't guaranteed to be preserved.
+ */
+ guest_map_private(gpa, size, do_fallocate);
+
+ if (size > PAGE_SIZE) {
+ memset((void *)gpa, p2, PAGE_SIZE);
+ goto skip;
+ }
+
+ memset((void *)gpa, p2, size);
+ guest_sync_private(gpa, size, p1);
+
+ /*
+ * Verify that the private memory was set to pattern two, and
+ * that shared memory still holds the initial pattern.
+ */
+ memcmp_g(gpa, p2, size);
+ if (gpa > base_gpa)
+ memcmp_g(base_gpa, init_p, gpa - base_gpa);
+ if (gpa + size < base_gpa + PER_CPU_DATA_SIZE)
+ memcmp_g(gpa + size, init_p,
+ (base_gpa + PER_CPU_DATA_SIZE) - (gpa + size));
+
+ /*
+ * Convert odd-number page frames back to shared to verify KVM
+ * also correctly handles holes in private ranges.
+ */
+ for (j = 0; j < size; j += PAGE_SIZE) {
+ if ((j >> PAGE_SHIFT) & 1) {
+ guest_map_shared(gpa + j, PAGE_SIZE, do_fallocate);
+ guest_sync_shared(gpa + j, PAGE_SIZE, p1, p3);
+
+ memcmp_g(gpa + j, p3, PAGE_SIZE);
+ } else {
+ guest_sync_private(gpa + j, PAGE_SIZE, p1);
+ }
+ }
+
+skip:
+ /*
+ * Convert the entire region back to shared, explicitly write
+ * pattern three to fill in the even-number frames before
+ * asking the host to verify (and write pattern four).
+ */
+ guest_map_shared(gpa, size, do_fallocate);
+ memset((void *)gpa, p3, size);
+ guest_sync_shared(gpa, size, p3, p4);
+ memcmp_g(gpa, p4, size);
+
+ /* Reset the shared memory back to the initial pattern. */
+ memset((void *)gpa, init_p, size);
+
+ /*
+ * Free (via PUNCH_HOLE) *all* private memory so that the next
+ * iteration starts from a clean slate, e.g. with respect to
+ * whether or not there are pages/folios in guest_mem.
+ */
+ guest_map_shared(base_gpa, PER_CPU_DATA_SIZE, true);
+ }
+}
+
+static void guest_punch_hole(uint64_t gpa, uint64_t size)
+{
+ /* "Mapping" memory shared via fallocate() is done via PUNCH_HOLE. */
+ uint64_t flags = MAP_GPA_SHARED | MAP_GPA_DO_FALLOCATE;
+
+ kvm_hypercall_map_gpa_range(gpa, size, flags);
+}
+
+/*
+ * Test that PUNCH_HOLE actually frees memory by punching holes without doing a
+ * proper conversion. Freeing (PUNCH_HOLE) should zap SPTEs, and reallocating
+ * (subsequent fault) should zero memory.
+ */
+static void guest_test_punch_hole(uint64_t base_gpa, bool precise)
+{
+ const uint8_t init_p = 0xcc;
+ int i;
+
+ /*
+ * Convert the entire range to private, this testcase is all about
+ * punching holes in guest_memfd, i.e. shared mappings aren't needed.
+ */
+ guest_map_private(base_gpa, PER_CPU_DATA_SIZE, false);
+
+ for (i = 0; i < ARRAY_SIZE(test_ranges); i++) {
+ uint64_t gpa = base_gpa + test_ranges[i].offset;
+ uint64_t size = test_ranges[i].size;
+
+ /*
+ * Free all memory before each iteration, even for the !precise
+ * case where the memory will be faulted back in. Freeing and
+ * reallocating should obviously work, and freeing all memory
+ * minimizes the probability of cross-testcase influence.
+ */
+ guest_punch_hole(base_gpa, PER_CPU_DATA_SIZE);
+
+ /* Fault-in and initialize memory, and verify the pattern. */
+ if (precise) {
+ memset((void *)gpa, init_p, size);
+ memcmp_g(gpa, init_p, size);
+ } else {
+ memset((void *)base_gpa, init_p, PER_CPU_DATA_SIZE);
+ memcmp_g(base_gpa, init_p, PER_CPU_DATA_SIZE);
+ }
+
+ /*
+ * Punch a hole at the target range and verify that reads from
+ * the guest succeed and return zeroes.
+ */
+ guest_punch_hole(gpa, size);
+ memcmp_g(gpa, 0, size);
+ }
+}
+
+static void guest_code(uint64_t base_gpa)
+{
+ /*
+ * Run the conversion test twice, with and without doing fallocate() on
+ * the guest_memfd backing when converting between shared and private.
+ */
+ guest_test_explicit_conversion(base_gpa, false);
+ guest_test_explicit_conversion(base_gpa, true);
+
+ /*
+ * Run the PUNCH_HOLE test twice too, once with the entire guest_memfd
+ * faulted in, once with only the target range faulted in.
+ */
+ guest_test_punch_hole(base_gpa, false);
+ guest_test_punch_hole(base_gpa, true);
+ GUEST_DONE();
+}
+
+static void handle_exit_hypercall(struct kvm_vcpu *vcpu)
+{
+ struct kvm_run *run = vcpu->run;
+ uint64_t gpa = run->hypercall.args[0];
+ uint64_t size = run->hypercall.args[1] * PAGE_SIZE;
+ bool set_attributes = run->hypercall.args[2] & MAP_GPA_SET_ATTRIBUTES;
+ bool map_shared = run->hypercall.args[2] & MAP_GPA_SHARED;
+ bool do_fallocate = run->hypercall.args[2] & MAP_GPA_DO_FALLOCATE;
+ struct kvm_vm *vm = vcpu->vm;
+
+ TEST_ASSERT(run->hypercall.nr == KVM_HC_MAP_GPA_RANGE,
+ "Wanted MAP_GPA_RANGE (%u), got '%llu'",
+ KVM_HC_MAP_GPA_RANGE, run->hypercall.nr);
+
+ if (do_fallocate)
+ vm_guest_mem_fallocate(vm, gpa, size, map_shared);
+
+ if (set_attributes)
+ vm_set_memory_attributes(vm, gpa, size,
+ map_shared ? 0 : KVM_MEMORY_ATTRIBUTE_PRIVATE);
+ run->hypercall.ret = 0;
+}
+
+static bool run_vcpus;
+
+static void *__test_mem_conversions(void *__vcpu)
+{
+ struct kvm_vcpu *vcpu = __vcpu;
+ struct kvm_run *run = vcpu->run;
+ struct kvm_vm *vm = vcpu->vm;
+ struct ucall uc;
+
+ while (!READ_ONCE(run_vcpus))
+ ;
+
+ for ( ;; ) {
+ vcpu_run(vcpu);
+
+ if (run->exit_reason == KVM_EXIT_HYPERCALL) {
+ handle_exit_hypercall(vcpu);
+ continue;
+ }
+
+ TEST_ASSERT(run->exit_reason == KVM_EXIT_IO,
+ "Wanted KVM_EXIT_IO, got exit reason: %u (%s)",
+ run->exit_reason, exit_reason_str(run->exit_reason));
+
+ switch (get_ucall(vcpu, &uc)) {
+ case UCALL_ABORT:
+ REPORT_GUEST_ASSERT(uc);
+ case UCALL_SYNC: {
+ uint64_t gpa = uc.args[1];
+ size_t size = uc.args[2];
+ size_t i;
+
+ TEST_ASSERT(uc.args[0] == SYNC_SHARED ||
+ uc.args[0] == SYNC_PRIVATE,
+ "Unknown sync command '%ld'", uc.args[0]);
+
+ for (i = 0; i < size; i += vm->page_size) {
+ size_t nr_bytes = min_t(size_t, vm->page_size, size - i);
+ uint8_t *hva = addr_gpa2hva(vm, gpa + i);
+
+ /* In all cases, the host should observe the shared data. */
+ memcmp_h(hva, gpa + i, uc.args[3], nr_bytes);
+
+ /* For shared, write the new pattern to guest memory. */
+ if (uc.args[0] == SYNC_SHARED)
+ memset(hva, uc.args[4], nr_bytes);
+ }
+ break;
+ }
+ case UCALL_DONE:
+ return NULL;
+ default:
+ TEST_FAIL("Unknown ucall 0x%lx.", uc.cmd);
+ }
+ }
+}
+
+static void test_mem_conversions(enum vm_mem_backing_src_type src_type, uint32_t nr_vcpus,
+ uint32_t nr_memslots)
+{
+ /*
+ * Allocate enough memory so that each vCPU's chunk of memory can be
+ * naturally aligned with respect to the size of the backing store.
+ */
+ const size_t alignment = max_t(size_t, SZ_2M, get_backing_src_pagesz(src_type));
+ const size_t per_cpu_size = align_up(PER_CPU_DATA_SIZE, alignment);
+ const size_t memfd_size = per_cpu_size * nr_vcpus;
+ const size_t slot_size = memfd_size / nr_memslots;
+ struct kvm_vcpu *vcpus[KVM_MAX_VCPUS];
+ pthread_t threads[KVM_MAX_VCPUS];
+ struct kvm_vm *vm;
+ int memfd, i, r;
+
+ const struct vm_shape shape = {
+ .mode = VM_MODE_DEFAULT,
+ .type = KVM_X86_SW_PROTECTED_VM,
+ };
+
+ TEST_ASSERT(slot_size * nr_memslots == memfd_size,
+ "The memfd size (0x%lx) needs to be cleanly divisible by the number of memslots (%u)",
+ memfd_size, nr_memslots);
+ vm = __vm_create_with_vcpus(shape, nr_vcpus, 0, guest_code, vcpus);
+
+ vm_enable_cap(vm, KVM_CAP_EXIT_HYPERCALL, (1 << KVM_HC_MAP_GPA_RANGE));
+
+ memfd = vm_create_guest_memfd(vm, memfd_size, 0);
+
+ for (i = 0; i < nr_memslots; i++)
+ vm_mem_add(vm, src_type, BASE_DATA_GPA + slot_size * i,
+ BASE_DATA_SLOT + i, slot_size / vm->page_size,
+ KVM_MEM_GUEST_MEMFD, memfd, slot_size * i);
+
+ for (i = 0; i < nr_vcpus; i++) {
+ uint64_t gpa = BASE_DATA_GPA + i * per_cpu_size;
+
+ vcpu_args_set(vcpus[i], 1, gpa);
+
+ /*
+ * Map only what is needed so that an out-of-bounds access
+ * results #PF => SHUTDOWN instead of data corruption.
+ */
+ virt_map(vm, gpa, gpa, PER_CPU_DATA_SIZE / vm->page_size);
+
+ pthread_create(&threads[i], NULL, __test_mem_conversions, vcpus[i]);
+ }
+
+ WRITE_ONCE(run_vcpus, true);
+
+ for (i = 0; i < nr_vcpus; i++)
+ pthread_join(threads[i], NULL);
+
+ kvm_vm_free(vm);
+
+ /*
+ * Allocate and free memory from the guest_memfd after closing the VM
+ * fd. The guest_memfd is gifted a reference to its owning VM, i.e.
+ * should prevent the VM from being fully destroyed until the last
+ * reference to the guest_memfd is also put.
+ */
+ r = fallocate(memfd, FALLOC_FL_KEEP_SIZE | FALLOC_FL_PUNCH_HOLE, 0, memfd_size);
+ TEST_ASSERT(!r, __KVM_SYSCALL_ERROR("fallocate()", r));
+
+ r = fallocate(memfd, FALLOC_FL_KEEP_SIZE, 0, memfd_size);
+ TEST_ASSERT(!r, __KVM_SYSCALL_ERROR("fallocate()", r));
+
+ close(memfd);
+}
+
+static void usage(const char *cmd)
+{
+ puts("");
+ printf("usage: %s [-h] [-m nr_memslots] [-s mem_type] [-n nr_vcpus]\n", cmd);
+ puts("");
+ backing_src_help("-s");
+ puts("");
+ puts(" -n: specify the number of vcpus (default: 1)");
+ puts("");
+ puts(" -m: specify the number of memslots (default: 1)");
+ puts("");
+}
+
+int main(int argc, char *argv[])
+{
+ enum vm_mem_backing_src_type src_type = DEFAULT_VM_MEM_SRC;
+ uint32_t nr_memslots = 1;
+ uint32_t nr_vcpus = 1;
+ int opt;
+
+ TEST_REQUIRE(kvm_check_cap(KVM_CAP_VM_TYPES) & BIT(KVM_X86_SW_PROTECTED_VM));
+
+ while ((opt = getopt(argc, argv, "hm:s:n:")) != -1) {
+ switch (opt) {
+ case 's':
+ src_type = parse_backing_src_type(optarg);
+ break;
+ case 'n':
+ nr_vcpus = atoi_positive("nr_vcpus", optarg);
+ break;
+ case 'm':
+ nr_memslots = atoi_positive("nr_memslots", optarg);
+ break;
+ case 'h':
+ default:
+ usage(argv[0]);
+ exit(0);
+ }
+ }
+
+ test_mem_conversions(src_type, nr_vcpus, nr_memslots);
+
+ return 0;
+}
diff --git a/tools/testing/selftests/kvm/x86_64/private_mem_kvm_exits_test.c b/tools/testing/selftests/kvm/x86_64/private_mem_kvm_exits_test.c
new file mode 100644
index 000000000000..13e72fcec8dd
--- /dev/null
+++ b/tools/testing/selftests/kvm/x86_64/private_mem_kvm_exits_test.c
@@ -0,0 +1,120 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Copyright (C) 2023, Google LLC.
+ */
+#include <linux/kvm.h>
+#include <pthread.h>
+#include <stdint.h>
+
+#include "kvm_util.h"
+#include "processor.h"
+#include "test_util.h"
+
+/* Arbitrarily selected to avoid overlaps with anything else */
+#define EXITS_TEST_GVA 0xc0000000
+#define EXITS_TEST_GPA EXITS_TEST_GVA
+#define EXITS_TEST_NPAGES 1
+#define EXITS_TEST_SIZE (EXITS_TEST_NPAGES * PAGE_SIZE)
+#define EXITS_TEST_SLOT 10
+
+static uint64_t guest_repeatedly_read(void)
+{
+ volatile uint64_t value;
+
+ while (true)
+ value = *((uint64_t *) EXITS_TEST_GVA);
+
+ return value;
+}
+
+static uint32_t run_vcpu_get_exit_reason(struct kvm_vcpu *vcpu)
+{
+ int r;
+
+ r = _vcpu_run(vcpu);
+ if (r) {
+ TEST_ASSERT(errno == EFAULT, KVM_IOCTL_ERROR(KVM_RUN, r));
+ TEST_ASSERT_EQ(vcpu->run->exit_reason, KVM_EXIT_MEMORY_FAULT);
+ }
+ return vcpu->run->exit_reason;
+}
+
+const struct vm_shape protected_vm_shape = {
+ .mode = VM_MODE_DEFAULT,
+ .type = KVM_X86_SW_PROTECTED_VM,
+};
+
+static void test_private_access_memslot_deleted(void)
+{
+ struct kvm_vm *vm;
+ struct kvm_vcpu *vcpu;
+ pthread_t vm_thread;
+ void *thread_return;
+ uint32_t exit_reason;
+
+ vm = vm_create_shape_with_one_vcpu(protected_vm_shape, &vcpu,
+ guest_repeatedly_read);
+
+ vm_userspace_mem_region_add(vm, VM_MEM_SRC_ANONYMOUS,
+ EXITS_TEST_GPA, EXITS_TEST_SLOT,
+ EXITS_TEST_NPAGES,
+ KVM_MEM_GUEST_MEMFD);
+
+ virt_map(vm, EXITS_TEST_GVA, EXITS_TEST_GPA, EXITS_TEST_NPAGES);
+
+ /* Request to access page privately */
+ vm_mem_set_private(vm, EXITS_TEST_GPA, EXITS_TEST_SIZE);
+
+ pthread_create(&vm_thread, NULL,
+ (void *(*)(void *))run_vcpu_get_exit_reason,
+ (void *)vcpu);
+
+ vm_mem_region_delete(vm, EXITS_TEST_SLOT);
+
+ pthread_join(vm_thread, &thread_return);
+ exit_reason = (uint32_t)(uint64_t)thread_return;
+
+ TEST_ASSERT_EQ(exit_reason, KVM_EXIT_MEMORY_FAULT);
+ TEST_ASSERT_EQ(vcpu->run->memory_fault.flags, KVM_MEMORY_EXIT_FLAG_PRIVATE);
+ TEST_ASSERT_EQ(vcpu->run->memory_fault.gpa, EXITS_TEST_GPA);
+ TEST_ASSERT_EQ(vcpu->run->memory_fault.size, EXITS_TEST_SIZE);
+
+ kvm_vm_free(vm);
+}
+
+static void test_private_access_memslot_not_private(void)
+{
+ struct kvm_vm *vm;
+ struct kvm_vcpu *vcpu;
+ uint32_t exit_reason;
+
+ vm = vm_create_shape_with_one_vcpu(protected_vm_shape, &vcpu,
+ guest_repeatedly_read);
+
+ /* Add a non-private memslot (flags = 0) */
+ vm_userspace_mem_region_add(vm, VM_MEM_SRC_ANONYMOUS,
+ EXITS_TEST_GPA, EXITS_TEST_SLOT,
+ EXITS_TEST_NPAGES, 0);
+
+ virt_map(vm, EXITS_TEST_GVA, EXITS_TEST_GPA, EXITS_TEST_NPAGES);
+
+ /* Request to access page privately */
+ vm_mem_set_private(vm, EXITS_TEST_GPA, EXITS_TEST_SIZE);
+
+ exit_reason = run_vcpu_get_exit_reason(vcpu);
+
+ TEST_ASSERT_EQ(exit_reason, KVM_EXIT_MEMORY_FAULT);
+ TEST_ASSERT_EQ(vcpu->run->memory_fault.flags, KVM_MEMORY_EXIT_FLAG_PRIVATE);
+ TEST_ASSERT_EQ(vcpu->run->memory_fault.gpa, EXITS_TEST_GPA);
+ TEST_ASSERT_EQ(vcpu->run->memory_fault.size, EXITS_TEST_SIZE);
+
+ kvm_vm_free(vm);
+}
+
+int main(int argc, char *argv[])
+{
+ TEST_REQUIRE(kvm_check_cap(KVM_CAP_VM_TYPES) & BIT(KVM_X86_SW_PROTECTED_VM));
+
+ test_private_access_memslot_deleted();
+ test_private_access_memslot_not_private();
+}
diff --git a/tools/testing/selftests/kvm/x86_64/recalc_apic_map_test.c b/tools/testing/selftests/kvm/x86_64/recalc_apic_map_test.c
new file mode 100644
index 000000000000..cbc92a862ea9
--- /dev/null
+++ b/tools/testing/selftests/kvm/x86_64/recalc_apic_map_test.c
@@ -0,0 +1,74 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Test edge cases and race conditions in kvm_recalculate_apic_map().
+ */
+
+#include <sys/ioctl.h>
+#include <pthread.h>
+#include <time.h>
+
+#include "processor.h"
+#include "test_util.h"
+#include "kvm_util.h"
+#include "apic.h"
+
+#define TIMEOUT 5 /* seconds */
+
+#define LAPIC_DISABLED 0
+#define LAPIC_X2APIC (MSR_IA32_APICBASE_ENABLE | X2APIC_ENABLE)
+#define MAX_XAPIC_ID 0xff
+
+static void *race(void *arg)
+{
+ struct kvm_lapic_state lapic = {};
+ struct kvm_vcpu *vcpu = arg;
+
+ while (1) {
+ /* Trigger kvm_recalculate_apic_map(). */
+ vcpu_ioctl(vcpu, KVM_SET_LAPIC, &lapic);
+ pthread_testcancel();
+ }
+
+ return NULL;
+}
+
+int main(void)
+{
+ struct kvm_vcpu *vcpus[KVM_MAX_VCPUS];
+ struct kvm_vcpu *vcpuN;
+ struct kvm_vm *vm;
+ pthread_t thread;
+ time_t t;
+ int i;
+
+ kvm_static_assert(KVM_MAX_VCPUS > MAX_XAPIC_ID);
+
+ /*
+ * Create the max number of vCPUs supported by selftests so that KVM
+ * has decent amount of work to do when recalculating the map, i.e. to
+ * make the problematic window large enough to hit.
+ */
+ vm = vm_create_with_vcpus(KVM_MAX_VCPUS, NULL, vcpus);
+
+ /*
+ * Enable x2APIC on all vCPUs so that KVM doesn't bail from the recalc
+ * due to vCPUs having aliased xAPIC IDs (truncated to 8 bits).
+ */
+ for (i = 0; i < KVM_MAX_VCPUS; i++)
+ vcpu_set_msr(vcpus[i], MSR_IA32_APICBASE, LAPIC_X2APIC);
+
+ TEST_ASSERT_EQ(pthread_create(&thread, NULL, race, vcpus[0]), 0);
+
+ vcpuN = vcpus[KVM_MAX_VCPUS - 1];
+ for (t = time(NULL) + TIMEOUT; time(NULL) < t;) {
+ vcpu_set_msr(vcpuN, MSR_IA32_APICBASE, LAPIC_X2APIC);
+ vcpu_set_msr(vcpuN, MSR_IA32_APICBASE, LAPIC_DISABLED);
+ }
+
+ TEST_ASSERT_EQ(pthread_cancel(thread), 0);
+ TEST_ASSERT_EQ(pthread_join(thread, NULL), 0);
+
+ kvm_vm_free(vm);
+
+ return 0;
+}
diff --git a/tools/testing/selftests/kvm/x86_64/set_boot_cpu_id.c b/tools/testing/selftests/kvm/x86_64/set_boot_cpu_id.c
new file mode 100644
index 000000000000..366cf18600bc
--- /dev/null
+++ b/tools/testing/selftests/kvm/x86_64/set_boot_cpu_id.c
@@ -0,0 +1,131 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Test that KVM_SET_BOOT_CPU_ID works as intended
+ *
+ * Copyright (C) 2020, Red Hat, Inc.
+ */
+#define _GNU_SOURCE /* for program_invocation_name */
+#include <fcntl.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <sys/ioctl.h>
+
+#include "test_util.h"
+#include "kvm_util.h"
+#include "processor.h"
+#include "apic.h"
+
+static void guest_bsp_vcpu(void *arg)
+{
+ GUEST_SYNC(1);
+
+ GUEST_ASSERT_NE(get_bsp_flag(), 0);
+
+ GUEST_DONE();
+}
+
+static void guest_not_bsp_vcpu(void *arg)
+{
+ GUEST_SYNC(1);
+
+ GUEST_ASSERT_EQ(get_bsp_flag(), 0);
+
+ GUEST_DONE();
+}
+
+static void test_set_bsp_busy(struct kvm_vcpu *vcpu, const char *msg)
+{
+ int r = __vm_ioctl(vcpu->vm, KVM_SET_BOOT_CPU_ID,
+ (void *)(unsigned long)vcpu->id);
+
+ TEST_ASSERT(r == -1 && errno == EBUSY, "KVM_SET_BOOT_CPU_ID set %s", msg);
+}
+
+static void run_vcpu(struct kvm_vcpu *vcpu)
+{
+ struct ucall uc;
+ int stage;
+
+ for (stage = 0; stage < 2; stage++) {
+
+ vcpu_run(vcpu);
+
+ switch (get_ucall(vcpu, &uc)) {
+ case UCALL_SYNC:
+ TEST_ASSERT(!strcmp((const char *)uc.args[0], "hello") &&
+ uc.args[1] == stage + 1,
+ "Stage %d: Unexpected register values vmexit, got %lx",
+ stage + 1, (ulong)uc.args[1]);
+ test_set_bsp_busy(vcpu, "while running vm");
+ break;
+ case UCALL_DONE:
+ TEST_ASSERT(stage == 1,
+ "Expected GUEST_DONE in stage 2, got stage %d",
+ stage);
+ break;
+ case UCALL_ABORT:
+ REPORT_GUEST_ASSERT(uc);
+ default:
+ TEST_ASSERT(false, "Unexpected exit: %s",
+ exit_reason_str(vcpu->run->exit_reason));
+ }
+ }
+}
+
+static struct kvm_vm *create_vm(uint32_t nr_vcpus, uint32_t bsp_vcpu_id,
+ struct kvm_vcpu *vcpus[])
+{
+ struct kvm_vm *vm;
+ uint32_t i;
+
+ vm = vm_create(nr_vcpus);
+
+ vm_ioctl(vm, KVM_SET_BOOT_CPU_ID, (void *)(unsigned long)bsp_vcpu_id);
+
+ for (i = 0; i < nr_vcpus; i++)
+ vcpus[i] = vm_vcpu_add(vm, i, i == bsp_vcpu_id ? guest_bsp_vcpu :
+ guest_not_bsp_vcpu);
+ return vm;
+}
+
+static void run_vm_bsp(uint32_t bsp_vcpu_id)
+{
+ struct kvm_vcpu *vcpus[2];
+ struct kvm_vm *vm;
+
+ vm = create_vm(ARRAY_SIZE(vcpus), bsp_vcpu_id, vcpus);
+
+ run_vcpu(vcpus[0]);
+ run_vcpu(vcpus[1]);
+
+ kvm_vm_free(vm);
+}
+
+static void check_set_bsp_busy(void)
+{
+ struct kvm_vcpu *vcpus[2];
+ struct kvm_vm *vm;
+
+ vm = create_vm(ARRAY_SIZE(vcpus), 0, vcpus);
+
+ test_set_bsp_busy(vcpus[1], "after adding vcpu");
+
+ run_vcpu(vcpus[0]);
+ run_vcpu(vcpus[1]);
+
+ test_set_bsp_busy(vcpus[1], "to a terminated vcpu");
+
+ kvm_vm_free(vm);
+}
+
+int main(int argc, char *argv[])
+{
+ TEST_REQUIRE(kvm_has_cap(KVM_CAP_SET_BOOT_CPU_ID));
+
+ run_vm_bsp(0);
+ run_vm_bsp(1);
+ run_vm_bsp(0);
+
+ check_set_bsp_busy();
+}
diff --git a/tools/testing/selftests/kvm/x86_64/set_sregs_test.c b/tools/testing/selftests/kvm/x86_64/set_sregs_test.c
index 9f7656184f31..3610981d9162 100644
--- a/tools/testing/selftests/kvm/x86_64/set_sregs_test.c
+++ b/tools/testing/selftests/kvm/x86_64/set_sregs_test.c
@@ -22,27 +22,117 @@
#include "kvm_util.h"
#include "processor.h"
-#define VCPU_ID 5
+#define TEST_INVALID_CR_BIT(vcpu, cr, orig, bit) \
+do { \
+ struct kvm_sregs new; \
+ int rc; \
+ \
+ /* Skip the sub-test, the feature/bit is supported. */ \
+ if (orig.cr & bit) \
+ break; \
+ \
+ memcpy(&new, &orig, sizeof(sregs)); \
+ new.cr |= bit; \
+ \
+ rc = _vcpu_sregs_set(vcpu, &new); \
+ TEST_ASSERT(rc, "KVM allowed invalid " #cr " bit (0x%lx)", bit); \
+ \
+ /* Sanity check that KVM didn't change anything. */ \
+ vcpu_sregs_get(vcpu, &new); \
+ TEST_ASSERT(!memcmp(&new, &orig, sizeof(new)), "KVM modified sregs"); \
+} while (0)
+
+static uint64_t calc_supported_cr4_feature_bits(void)
+{
+ uint64_t cr4;
+
+ cr4 = X86_CR4_VME | X86_CR4_PVI | X86_CR4_TSD | X86_CR4_DE |
+ X86_CR4_PSE | X86_CR4_PAE | X86_CR4_MCE | X86_CR4_PGE |
+ X86_CR4_PCE | X86_CR4_OSFXSR | X86_CR4_OSXMMEXCPT;
+ if (kvm_cpu_has(X86_FEATURE_UMIP))
+ cr4 |= X86_CR4_UMIP;
+ if (kvm_cpu_has(X86_FEATURE_LA57))
+ cr4 |= X86_CR4_LA57;
+ if (kvm_cpu_has(X86_FEATURE_VMX))
+ cr4 |= X86_CR4_VMXE;
+ if (kvm_cpu_has(X86_FEATURE_SMX))
+ cr4 |= X86_CR4_SMXE;
+ if (kvm_cpu_has(X86_FEATURE_FSGSBASE))
+ cr4 |= X86_CR4_FSGSBASE;
+ if (kvm_cpu_has(X86_FEATURE_PCID))
+ cr4 |= X86_CR4_PCIDE;
+ if (kvm_cpu_has(X86_FEATURE_XSAVE))
+ cr4 |= X86_CR4_OSXSAVE;
+ if (kvm_cpu_has(X86_FEATURE_SMEP))
+ cr4 |= X86_CR4_SMEP;
+ if (kvm_cpu_has(X86_FEATURE_SMAP))
+ cr4 |= X86_CR4_SMAP;
+ if (kvm_cpu_has(X86_FEATURE_PKU))
+ cr4 |= X86_CR4_PKE;
+
+ return cr4;
+}
int main(int argc, char *argv[])
{
struct kvm_sregs sregs;
+ struct kvm_vcpu *vcpu;
struct kvm_vm *vm;
- int rc;
+ uint64_t cr4;
+ int rc, i;
+
+ /*
+ * Create a dummy VM, specifically to avoid doing KVM_SET_CPUID2, and
+ * use it to verify all supported CR4 bits can be set prior to defining
+ * the vCPU model, i.e. without doing KVM_SET_CPUID2.
+ */
+ vm = vm_create_barebones();
+ vcpu = __vm_vcpu_add(vm, 0);
+
+ vcpu_sregs_get(vcpu, &sregs);
- /* Tell stdout not to buffer its content */
- setbuf(stdout, NULL);
+ sregs.cr0 = 0;
+ sregs.cr4 |= calc_supported_cr4_feature_bits();
+ cr4 = sregs.cr4;
+
+ rc = _vcpu_sregs_set(vcpu, &sregs);
+ TEST_ASSERT(!rc, "Failed to set supported CR4 bits (0x%lx)", cr4);
+
+ vcpu_sregs_get(vcpu, &sregs);
+ TEST_ASSERT(sregs.cr4 == cr4, "sregs.CR4 (0x%llx) != CR4 (0x%lx)",
+ sregs.cr4, cr4);
+
+ /* Verify all unsupported features are rejected by KVM. */
+ TEST_INVALID_CR_BIT(vcpu, cr4, sregs, X86_CR4_UMIP);
+ TEST_INVALID_CR_BIT(vcpu, cr4, sregs, X86_CR4_LA57);
+ TEST_INVALID_CR_BIT(vcpu, cr4, sregs, X86_CR4_VMXE);
+ TEST_INVALID_CR_BIT(vcpu, cr4, sregs, X86_CR4_SMXE);
+ TEST_INVALID_CR_BIT(vcpu, cr4, sregs, X86_CR4_FSGSBASE);
+ TEST_INVALID_CR_BIT(vcpu, cr4, sregs, X86_CR4_PCIDE);
+ TEST_INVALID_CR_BIT(vcpu, cr4, sregs, X86_CR4_OSXSAVE);
+ TEST_INVALID_CR_BIT(vcpu, cr4, sregs, X86_CR4_SMEP);
+ TEST_INVALID_CR_BIT(vcpu, cr4, sregs, X86_CR4_SMAP);
+ TEST_INVALID_CR_BIT(vcpu, cr4, sregs, X86_CR4_PKE);
+
+ for (i = 32; i < 64; i++)
+ TEST_INVALID_CR_BIT(vcpu, cr0, sregs, BIT(i));
+
+ /* NW without CD is illegal, as is PG without PE. */
+ TEST_INVALID_CR_BIT(vcpu, cr0, sregs, X86_CR0_NW);
+ TEST_INVALID_CR_BIT(vcpu, cr0, sregs, X86_CR0_PG);
+
+ kvm_vm_free(vm);
- /* Create VM */
- vm = vm_create_default(VCPU_ID, 0, NULL);
+ /* Create a "real" VM and verify APIC_BASE can be set. */
+ vm = vm_create_with_one_vcpu(&vcpu, NULL);
- vcpu_sregs_get(vm, VCPU_ID, &sregs);
+ vcpu_sregs_get(vcpu, &sregs);
sregs.apic_base = 1 << 10;
- rc = _vcpu_sregs_set(vm, VCPU_ID, &sregs);
+ rc = _vcpu_sregs_set(vcpu, &sregs);
TEST_ASSERT(rc, "Set IA32_APIC_BASE to %llx (invalid)",
sregs.apic_base);
sregs.apic_base = 1 << 11;
- rc = _vcpu_sregs_set(vm, VCPU_ID, &sregs);
+ rc = _vcpu_sregs_set(vcpu, &sregs);
TEST_ASSERT(!rc, "Couldn't set IA32_APIC_BASE to %llx (valid)",
sregs.apic_base);
diff --git a/tools/testing/selftests/kvm/x86_64/sev_migrate_tests.c b/tools/testing/selftests/kvm/x86_64/sev_migrate_tests.c
new file mode 100644
index 000000000000..0a6dfba3905b
--- /dev/null
+++ b/tools/testing/selftests/kvm/x86_64/sev_migrate_tests.c
@@ -0,0 +1,397 @@
+// SPDX-License-Identifier: GPL-2.0-only
+#include <linux/kvm.h>
+#include <linux/psp-sev.h>
+#include <stdio.h>
+#include <sys/ioctl.h>
+#include <stdlib.h>
+#include <errno.h>
+#include <pthread.h>
+
+#include "test_util.h"
+#include "kvm_util.h"
+#include "processor.h"
+#include "sev.h"
+#include "kselftest.h"
+
+#define NR_MIGRATE_TEST_VCPUS 4
+#define NR_MIGRATE_TEST_VMS 3
+#define NR_LOCK_TESTING_THREADS 3
+#define NR_LOCK_TESTING_ITERATIONS 10000
+
+bool have_sev_es;
+
+static struct kvm_vm *sev_vm_create(bool es)
+{
+ struct kvm_vm *vm;
+ int i;
+
+ vm = vm_create_barebones();
+ if (!es)
+ sev_vm_init(vm);
+ else
+ sev_es_vm_init(vm);
+
+ for (i = 0; i < NR_MIGRATE_TEST_VCPUS; ++i)
+ __vm_vcpu_add(vm, i);
+
+ sev_vm_launch(vm, es ? SEV_POLICY_ES : 0);
+
+ if (es)
+ vm_sev_ioctl(vm, KVM_SEV_LAUNCH_UPDATE_VMSA, NULL);
+ return vm;
+}
+
+static struct kvm_vm *aux_vm_create(bool with_vcpus)
+{
+ struct kvm_vm *vm;
+ int i;
+
+ vm = vm_create_barebones();
+ if (!with_vcpus)
+ return vm;
+
+ for (i = 0; i < NR_MIGRATE_TEST_VCPUS; ++i)
+ __vm_vcpu_add(vm, i);
+
+ return vm;
+}
+
+static int __sev_migrate_from(struct kvm_vm *dst, struct kvm_vm *src)
+{
+ return __vm_enable_cap(dst, KVM_CAP_VM_MOVE_ENC_CONTEXT_FROM, src->fd);
+}
+
+
+static void sev_migrate_from(struct kvm_vm *dst, struct kvm_vm *src)
+{
+ int ret;
+
+ ret = __sev_migrate_from(dst, src);
+ TEST_ASSERT(!ret, "Migration failed, ret: %d, errno: %d", ret, errno);
+}
+
+static void test_sev_migrate_from(bool es)
+{
+ struct kvm_vm *src_vm;
+ struct kvm_vm *dst_vms[NR_MIGRATE_TEST_VMS];
+ int i, ret;
+
+ src_vm = sev_vm_create(es);
+ for (i = 0; i < NR_MIGRATE_TEST_VMS; ++i)
+ dst_vms[i] = aux_vm_create(true);
+
+ /* Initial migration from the src to the first dst. */
+ sev_migrate_from(dst_vms[0], src_vm);
+
+ for (i = 1; i < NR_MIGRATE_TEST_VMS; i++)
+ sev_migrate_from(dst_vms[i], dst_vms[i - 1]);
+
+ /* Migrate the guest back to the original VM. */
+ ret = __sev_migrate_from(src_vm, dst_vms[NR_MIGRATE_TEST_VMS - 1]);
+ TEST_ASSERT(ret == -1 && errno == EIO,
+ "VM that was migrated from should be dead. ret %d, errno: %d", ret,
+ errno);
+
+ kvm_vm_free(src_vm);
+ for (i = 0; i < NR_MIGRATE_TEST_VMS; ++i)
+ kvm_vm_free(dst_vms[i]);
+}
+
+struct locking_thread_input {
+ struct kvm_vm *vm;
+ struct kvm_vm *source_vms[NR_LOCK_TESTING_THREADS];
+};
+
+static void *locking_test_thread(void *arg)
+{
+ int i, j;
+ struct locking_thread_input *input = (struct locking_thread_input *)arg;
+
+ for (i = 0; i < NR_LOCK_TESTING_ITERATIONS; ++i) {
+ j = i % NR_LOCK_TESTING_THREADS;
+ __sev_migrate_from(input->vm, input->source_vms[j]);
+ }
+
+ return NULL;
+}
+
+static void test_sev_migrate_locking(void)
+{
+ struct locking_thread_input input[NR_LOCK_TESTING_THREADS];
+ pthread_t pt[NR_LOCK_TESTING_THREADS];
+ int i;
+
+ for (i = 0; i < NR_LOCK_TESTING_THREADS; ++i) {
+ input[i].vm = sev_vm_create(/* es= */ false);
+ input[0].source_vms[i] = input[i].vm;
+ }
+ for (i = 1; i < NR_LOCK_TESTING_THREADS; ++i)
+ memcpy(input[i].source_vms, input[0].source_vms,
+ sizeof(input[i].source_vms));
+
+ for (i = 0; i < NR_LOCK_TESTING_THREADS; ++i)
+ pthread_create(&pt[i], NULL, locking_test_thread, &input[i]);
+
+ for (i = 0; i < NR_LOCK_TESTING_THREADS; ++i)
+ pthread_join(pt[i], NULL);
+ for (i = 0; i < NR_LOCK_TESTING_THREADS; ++i)
+ kvm_vm_free(input[i].vm);
+}
+
+static void test_sev_migrate_parameters(void)
+{
+ struct kvm_vm *sev_vm, *sev_es_vm, *vm_no_vcpu, *vm_no_sev,
+ *sev_es_vm_no_vmsa;
+ int ret;
+
+ vm_no_vcpu = vm_create_barebones();
+ vm_no_sev = aux_vm_create(true);
+ ret = __sev_migrate_from(vm_no_vcpu, vm_no_sev);
+ TEST_ASSERT(ret == -1 && errno == EINVAL,
+ "Migrations require SEV enabled. ret %d, errno: %d", ret,
+ errno);
+
+ if (!have_sev_es)
+ goto out;
+
+ sev_vm = sev_vm_create(/* es= */ false);
+ sev_es_vm = sev_vm_create(/* es= */ true);
+ sev_es_vm_no_vmsa = vm_create_barebones();
+ sev_es_vm_init(sev_es_vm_no_vmsa);
+ __vm_vcpu_add(sev_es_vm_no_vmsa, 1);
+
+ ret = __sev_migrate_from(sev_vm, sev_es_vm);
+ TEST_ASSERT(
+ ret == -1 && errno == EINVAL,
+ "Should not be able migrate to SEV enabled VM. ret: %d, errno: %d",
+ ret, errno);
+
+ ret = __sev_migrate_from(sev_es_vm, sev_vm);
+ TEST_ASSERT(
+ ret == -1 && errno == EINVAL,
+ "Should not be able migrate to SEV-ES enabled VM. ret: %d, errno: %d",
+ ret, errno);
+
+ ret = __sev_migrate_from(vm_no_vcpu, sev_es_vm);
+ TEST_ASSERT(
+ ret == -1 && errno == EINVAL,
+ "SEV-ES migrations require same number of vCPUS. ret: %d, errno: %d",
+ ret, errno);
+
+ ret = __sev_migrate_from(vm_no_vcpu, sev_es_vm_no_vmsa);
+ TEST_ASSERT(
+ ret == -1 && errno == EINVAL,
+ "SEV-ES migrations require UPDATE_VMSA. ret %d, errno: %d",
+ ret, errno);
+
+ kvm_vm_free(sev_vm);
+ kvm_vm_free(sev_es_vm);
+ kvm_vm_free(sev_es_vm_no_vmsa);
+out:
+ kvm_vm_free(vm_no_vcpu);
+ kvm_vm_free(vm_no_sev);
+}
+
+static int __sev_mirror_create(struct kvm_vm *dst, struct kvm_vm *src)
+{
+ return __vm_enable_cap(dst, KVM_CAP_VM_COPY_ENC_CONTEXT_FROM, src->fd);
+}
+
+
+static void sev_mirror_create(struct kvm_vm *dst, struct kvm_vm *src)
+{
+ int ret;
+
+ ret = __sev_mirror_create(dst, src);
+ TEST_ASSERT(!ret, "Copying context failed, ret: %d, errno: %d", ret, errno);
+}
+
+static void verify_mirror_allowed_cmds(struct kvm_vm *vm)
+{
+ struct kvm_sev_guest_status status;
+ int cmd_id;
+
+ for (cmd_id = KVM_SEV_INIT; cmd_id < KVM_SEV_NR_MAX; ++cmd_id) {
+ int ret;
+
+ /*
+ * These commands are allowed for mirror VMs, all others are
+ * not.
+ */
+ switch (cmd_id) {
+ case KVM_SEV_LAUNCH_UPDATE_VMSA:
+ case KVM_SEV_GUEST_STATUS:
+ case KVM_SEV_DBG_DECRYPT:
+ case KVM_SEV_DBG_ENCRYPT:
+ continue;
+ default:
+ break;
+ }
+
+ /*
+ * These commands should be disallowed before the data
+ * parameter is examined so NULL is OK here.
+ */
+ ret = __vm_sev_ioctl(vm, cmd_id, NULL);
+ TEST_ASSERT(
+ ret == -1 && errno == EINVAL,
+ "Should not be able call command: %d. ret: %d, errno: %d",
+ cmd_id, ret, errno);
+ }
+
+ vm_sev_ioctl(vm, KVM_SEV_GUEST_STATUS, &status);
+}
+
+static void test_sev_mirror(bool es)
+{
+ struct kvm_vm *src_vm, *dst_vm;
+ int i;
+
+ src_vm = sev_vm_create(es);
+ dst_vm = aux_vm_create(false);
+
+ sev_mirror_create(dst_vm, src_vm);
+
+ /* Check that we can complete creation of the mirror VM. */
+ for (i = 0; i < NR_MIGRATE_TEST_VCPUS; ++i)
+ __vm_vcpu_add(dst_vm, i);
+
+ if (es)
+ vm_sev_ioctl(dst_vm, KVM_SEV_LAUNCH_UPDATE_VMSA, NULL);
+
+ verify_mirror_allowed_cmds(dst_vm);
+
+ kvm_vm_free(src_vm);
+ kvm_vm_free(dst_vm);
+}
+
+static void test_sev_mirror_parameters(void)
+{
+ struct kvm_vm *sev_vm, *sev_es_vm, *vm_no_vcpu, *vm_with_vcpu;
+ int ret;
+
+ sev_vm = sev_vm_create(/* es= */ false);
+ vm_with_vcpu = aux_vm_create(true);
+ vm_no_vcpu = aux_vm_create(false);
+
+ ret = __sev_mirror_create(sev_vm, sev_vm);
+ TEST_ASSERT(
+ ret == -1 && errno == EINVAL,
+ "Should not be able copy context to self. ret: %d, errno: %d",
+ ret, errno);
+
+ ret = __sev_mirror_create(vm_no_vcpu, vm_with_vcpu);
+ TEST_ASSERT(ret == -1 && errno == EINVAL,
+ "Copy context requires SEV enabled. ret %d, errno: %d", ret,
+ errno);
+
+ ret = __sev_mirror_create(vm_with_vcpu, sev_vm);
+ TEST_ASSERT(
+ ret == -1 && errno == EINVAL,
+ "SEV copy context requires no vCPUS on the destination. ret: %d, errno: %d",
+ ret, errno);
+
+ if (!have_sev_es)
+ goto out;
+
+ sev_es_vm = sev_vm_create(/* es= */ true);
+ ret = __sev_mirror_create(sev_vm, sev_es_vm);
+ TEST_ASSERT(
+ ret == -1 && errno == EINVAL,
+ "Should not be able copy context to SEV enabled VM. ret: %d, errno: %d",
+ ret, errno);
+
+ ret = __sev_mirror_create(sev_es_vm, sev_vm);
+ TEST_ASSERT(
+ ret == -1 && errno == EINVAL,
+ "Should not be able copy context to SEV-ES enabled VM. ret: %d, errno: %d",
+ ret, errno);
+
+ kvm_vm_free(sev_es_vm);
+
+out:
+ kvm_vm_free(sev_vm);
+ kvm_vm_free(vm_with_vcpu);
+ kvm_vm_free(vm_no_vcpu);
+}
+
+static void test_sev_move_copy(void)
+{
+ struct kvm_vm *dst_vm, *dst2_vm, *dst3_vm, *sev_vm, *mirror_vm,
+ *dst_mirror_vm, *dst2_mirror_vm, *dst3_mirror_vm;
+
+ sev_vm = sev_vm_create(/* es= */ false);
+ dst_vm = aux_vm_create(true);
+ dst2_vm = aux_vm_create(true);
+ dst3_vm = aux_vm_create(true);
+ mirror_vm = aux_vm_create(false);
+ dst_mirror_vm = aux_vm_create(false);
+ dst2_mirror_vm = aux_vm_create(false);
+ dst3_mirror_vm = aux_vm_create(false);
+
+ sev_mirror_create(mirror_vm, sev_vm);
+
+ sev_migrate_from(dst_mirror_vm, mirror_vm);
+ sev_migrate_from(dst_vm, sev_vm);
+
+ sev_migrate_from(dst2_vm, dst_vm);
+ sev_migrate_from(dst2_mirror_vm, dst_mirror_vm);
+
+ sev_migrate_from(dst3_mirror_vm, dst2_mirror_vm);
+ sev_migrate_from(dst3_vm, dst2_vm);
+
+ kvm_vm_free(dst_vm);
+ kvm_vm_free(sev_vm);
+ kvm_vm_free(dst2_vm);
+ kvm_vm_free(dst3_vm);
+ kvm_vm_free(mirror_vm);
+ kvm_vm_free(dst_mirror_vm);
+ kvm_vm_free(dst2_mirror_vm);
+ kvm_vm_free(dst3_mirror_vm);
+
+ /*
+ * Run similar test be destroy mirrors before mirrored VMs to ensure
+ * destruction is done safely.
+ */
+ sev_vm = sev_vm_create(/* es= */ false);
+ dst_vm = aux_vm_create(true);
+ mirror_vm = aux_vm_create(false);
+ dst_mirror_vm = aux_vm_create(false);
+
+ sev_mirror_create(mirror_vm, sev_vm);
+
+ sev_migrate_from(dst_mirror_vm, mirror_vm);
+ sev_migrate_from(dst_vm, sev_vm);
+
+ kvm_vm_free(mirror_vm);
+ kvm_vm_free(dst_mirror_vm);
+ kvm_vm_free(dst_vm);
+ kvm_vm_free(sev_vm);
+}
+
+int main(int argc, char *argv[])
+{
+ TEST_REQUIRE(kvm_has_cap(KVM_CAP_VM_MOVE_ENC_CONTEXT_FROM));
+ TEST_REQUIRE(kvm_has_cap(KVM_CAP_VM_COPY_ENC_CONTEXT_FROM));
+
+ TEST_REQUIRE(kvm_cpu_has(X86_FEATURE_SEV));
+
+ have_sev_es = kvm_cpu_has(X86_FEATURE_SEV_ES);
+
+ if (kvm_has_cap(KVM_CAP_VM_MOVE_ENC_CONTEXT_FROM)) {
+ test_sev_migrate_from(/* es= */ false);
+ if (have_sev_es)
+ test_sev_migrate_from(/* es= */ true);
+ test_sev_migrate_locking();
+ test_sev_migrate_parameters();
+ if (kvm_has_cap(KVM_CAP_VM_COPY_ENC_CONTEXT_FROM))
+ test_sev_move_copy();
+ }
+ if (kvm_has_cap(KVM_CAP_VM_COPY_ENC_CONTEXT_FROM)) {
+ test_sev_mirror(/* es= */ false);
+ if (have_sev_es)
+ test_sev_mirror(/* es= */ true);
+ test_sev_mirror_parameters();
+ }
+ return 0;
+}
diff --git a/tools/testing/selftests/kvm/x86_64/sev_smoke_test.c b/tools/testing/selftests/kvm/x86_64/sev_smoke_test.c
new file mode 100644
index 000000000000..026779f3ed06
--- /dev/null
+++ b/tools/testing/selftests/kvm/x86_64/sev_smoke_test.c
@@ -0,0 +1,88 @@
+// SPDX-License-Identifier: GPL-2.0-only
+#include <fcntl.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <sys/ioctl.h>
+
+#include "test_util.h"
+#include "kvm_util.h"
+#include "processor.h"
+#include "svm_util.h"
+#include "linux/psp-sev.h"
+#include "sev.h"
+
+
+static void guest_sev_es_code(void)
+{
+ /* TODO: Check CPUID after GHCB-based hypercall support is added. */
+ GUEST_ASSERT(rdmsr(MSR_AMD64_SEV) & MSR_AMD64_SEV_ENABLED);
+ GUEST_ASSERT(rdmsr(MSR_AMD64_SEV) & MSR_AMD64_SEV_ES_ENABLED);
+
+ /*
+ * TODO: Add GHCB and ucall support for SEV-ES guests. For now, simply
+ * force "termination" to signal "done" via the GHCB MSR protocol.
+ */
+ wrmsr(MSR_AMD64_SEV_ES_GHCB, GHCB_MSR_TERM_REQ);
+ __asm__ __volatile__("rep; vmmcall");
+}
+
+static void guest_sev_code(void)
+{
+ GUEST_ASSERT(this_cpu_has(X86_FEATURE_SEV));
+ GUEST_ASSERT(rdmsr(MSR_AMD64_SEV) & MSR_AMD64_SEV_ENABLED);
+
+ GUEST_DONE();
+}
+
+static void test_sev(void *guest_code, uint64_t policy)
+{
+ struct kvm_vcpu *vcpu;
+ struct kvm_vm *vm;
+ struct ucall uc;
+
+ vm = vm_sev_create_with_one_vcpu(policy, guest_code, &vcpu);
+
+ for (;;) {
+ vcpu_run(vcpu);
+
+ if (policy & SEV_POLICY_ES) {
+ TEST_ASSERT(vcpu->run->exit_reason == KVM_EXIT_SYSTEM_EVENT,
+ "Wanted SYSTEM_EVENT, got %s",
+ exit_reason_str(vcpu->run->exit_reason));
+ TEST_ASSERT_EQ(vcpu->run->system_event.type, KVM_SYSTEM_EVENT_SEV_TERM);
+ TEST_ASSERT_EQ(vcpu->run->system_event.ndata, 1);
+ TEST_ASSERT_EQ(vcpu->run->system_event.data[0], GHCB_MSR_TERM_REQ);
+ break;
+ }
+
+ switch (get_ucall(vcpu, &uc)) {
+ case UCALL_SYNC:
+ continue;
+ case UCALL_DONE:
+ return;
+ case UCALL_ABORT:
+ REPORT_GUEST_ASSERT(uc);
+ default:
+ TEST_FAIL("Unexpected exit: %s",
+ exit_reason_str(vcpu->run->exit_reason));
+ }
+ }
+
+ kvm_vm_free(vm);
+}
+
+int main(int argc, char *argv[])
+{
+ TEST_REQUIRE(kvm_cpu_has(X86_FEATURE_SEV));
+
+ test_sev(guest_sev_code, SEV_POLICY_NO_DBG);
+ test_sev(guest_sev_code, 0);
+
+ if (kvm_cpu_has(X86_FEATURE_SEV_ES)) {
+ test_sev(guest_sev_es_code, SEV_POLICY_ES | SEV_POLICY_NO_DBG);
+ test_sev(guest_sev_es_code, SEV_POLICY_ES);
+ }
+
+ return 0;
+}
diff --git a/tools/testing/selftests/kvm/x86_64/smaller_maxphyaddr_emulation_test.c b/tools/testing/selftests/kvm/x86_64/smaller_maxphyaddr_emulation_test.c
new file mode 100644
index 000000000000..416207c38a17
--- /dev/null
+++ b/tools/testing/selftests/kvm/x86_64/smaller_maxphyaddr_emulation_test.c
@@ -0,0 +1,111 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright (C) 2020, Google LLC.
+ *
+ * Test that KVM emulates instructions in response to EPT violations when
+ * allow_smaller_maxphyaddr is enabled and guest.MAXPHYADDR < host.MAXPHYADDR.
+ */
+
+#define _GNU_SOURCE /* for program_invocation_short_name */
+
+#include "flds_emulation.h"
+
+#include "test_util.h"
+#include "kvm_util.h"
+#include "vmx.h"
+
+#define MAXPHYADDR 36
+
+#define MEM_REGION_GVA 0x0000123456789000
+#define MEM_REGION_GPA 0x0000000700000000
+#define MEM_REGION_SLOT 10
+#define MEM_REGION_SIZE PAGE_SIZE
+
+static void guest_code(bool tdp_enabled)
+{
+ uint64_t error_code;
+ uint64_t vector;
+
+ vector = kvm_asm_safe_ec(FLDS_MEM_EAX, error_code, "a"(MEM_REGION_GVA));
+
+ /*
+ * When TDP is enabled, flds will trigger an emulation failure, exit to
+ * userspace, and then the selftest host "VMM" skips the instruction.
+ *
+ * When TDP is disabled, no instruction emulation is required so flds
+ * should generate #PF(RSVD).
+ */
+ if (tdp_enabled) {
+ GUEST_ASSERT(!vector);
+ } else {
+ GUEST_ASSERT_EQ(vector, PF_VECTOR);
+ GUEST_ASSERT(error_code & PFERR_RSVD_MASK);
+ }
+
+ GUEST_DONE();
+}
+
+int main(int argc, char *argv[])
+{
+ struct kvm_vcpu *vcpu;
+ struct kvm_vm *vm;
+ struct ucall uc;
+ uint64_t *pte;
+ uint64_t *hva;
+ uint64_t gpa;
+ int rc;
+
+ TEST_REQUIRE(kvm_has_cap(KVM_CAP_SMALLER_MAXPHYADDR));
+
+ vm = vm_create_with_one_vcpu(&vcpu, guest_code);
+ vcpu_args_set(vcpu, 1, kvm_is_tdp_enabled());
+
+ vm_init_descriptor_tables(vm);
+ vcpu_init_descriptor_tables(vcpu);
+
+ vcpu_set_cpuid_property(vcpu, X86_PROPERTY_MAX_PHY_ADDR, MAXPHYADDR);
+
+ rc = kvm_check_cap(KVM_CAP_EXIT_ON_EMULATION_FAILURE);
+ TEST_ASSERT(rc, "KVM_CAP_EXIT_ON_EMULATION_FAILURE is unavailable");
+ vm_enable_cap(vm, KVM_CAP_EXIT_ON_EMULATION_FAILURE, 1);
+
+ vm_userspace_mem_region_add(vm, VM_MEM_SRC_ANONYMOUS,
+ MEM_REGION_GPA, MEM_REGION_SLOT,
+ MEM_REGION_SIZE / PAGE_SIZE, 0);
+ gpa = vm_phy_pages_alloc(vm, MEM_REGION_SIZE / PAGE_SIZE,
+ MEM_REGION_GPA, MEM_REGION_SLOT);
+ TEST_ASSERT(gpa == MEM_REGION_GPA, "Failed vm_phy_pages_alloc");
+ virt_map(vm, MEM_REGION_GVA, MEM_REGION_GPA, 1);
+ hva = addr_gpa2hva(vm, MEM_REGION_GPA);
+ memset(hva, 0, PAGE_SIZE);
+
+ pte = vm_get_page_table_entry(vm, MEM_REGION_GVA);
+ *pte |= BIT_ULL(MAXPHYADDR);
+
+ vcpu_run(vcpu);
+
+ /*
+ * When TDP is enabled, KVM must emulate in response the guest physical
+ * address that is illegal from the guest's perspective, but is legal
+ * from hardware's perspeective. This should result in an emulation
+ * failure exit to userspace since KVM doesn't support emulating flds.
+ */
+ if (kvm_is_tdp_enabled()) {
+ handle_flds_emulation_failure_exit(vcpu);
+ vcpu_run(vcpu);
+ }
+
+ switch (get_ucall(vcpu, &uc)) {
+ case UCALL_ABORT:
+ REPORT_GUEST_ASSERT(uc);
+ break;
+ case UCALL_DONE:
+ break;
+ default:
+ TEST_FAIL("Unrecognized ucall: %lu", uc.cmd);
+ }
+
+ kvm_vm_free(vm);
+
+ return 0;
+}
diff --git a/tools/testing/selftests/kvm/x86_64/smm_test.c b/tools/testing/selftests/kvm/x86_64/smm_test.c
index ae39a220609f..e18b86666e1f 100644
--- a/tools/testing/selftests/kvm/x86_64/smm_test.c
+++ b/tools/testing/selftests/kvm/x86_64/smm_test.c
@@ -19,10 +19,6 @@
#include "vmx.h"
#include "svm_util.h"
-#define VCPU_ID 1
-
-#define PAGE_SIZE 4096
-
#define SMRAM_SIZE 65536
#define SMRAM_MEMSLOT ((1 << 16) | 1)
#define SMRAM_PAGES (SMRAM_SIZE / PAGE_SIZE)
@@ -53,15 +49,28 @@ static inline void sync_with_host(uint64_t phase)
: "+a" (phase));
}
-void self_smi(void)
+static void self_smi(void)
+{
+ x2apic_write_reg(APIC_ICR,
+ APIC_DEST_SELF | APIC_INT_ASSERT | APIC_DM_SMI);
+}
+
+static void l2_guest_code(void)
{
- wrmsr(APIC_BASE_MSR + (APIC_ICR >> 4),
- APIC_DEST_SELF | APIC_INT_ASSERT | APIC_DM_SMI);
+ sync_with_host(8);
+
+ sync_with_host(10);
+
+ vmcall();
}
-void guest_code(void *arg)
+static void guest_code(void *arg)
{
+ #define L2_GUEST_STACK_SIZE 64
+ unsigned long l2_guest_stack[L2_GUEST_STACK_SIZE];
uint64_t apicbase = rdmsr(MSR_IA32_APICBASE);
+ struct svm_test_data *svm = arg;
+ struct vmx_pages *vmx_pages = arg;
sync_with_host(1);
@@ -74,37 +83,63 @@ void guest_code(void *arg)
sync_with_host(4);
if (arg) {
- if (cpu_has_svm())
- generic_svm_setup(arg, NULL, NULL);
- else
- GUEST_ASSERT(prepare_for_vmx_operation(arg));
+ if (this_cpu_has(X86_FEATURE_SVM)) {
+ generic_svm_setup(svm, l2_guest_code,
+ &l2_guest_stack[L2_GUEST_STACK_SIZE]);
+ } else {
+ GUEST_ASSERT(prepare_for_vmx_operation(vmx_pages));
+ GUEST_ASSERT(load_vmcs(vmx_pages));
+ prepare_vmcs(vmx_pages, l2_guest_code,
+ &l2_guest_stack[L2_GUEST_STACK_SIZE]);
+ }
sync_with_host(5);
self_smi();
sync_with_host(7);
+
+ if (this_cpu_has(X86_FEATURE_SVM)) {
+ run_guest(svm->vmcb, svm->vmcb_gpa);
+ run_guest(svm->vmcb, svm->vmcb_gpa);
+ } else {
+ vmlaunch();
+ vmresume();
+ }
+
+ /* Stages 8-11 are eaten by SMM (SMRAM_STAGE reported instead) */
+ sync_with_host(12);
}
sync_with_host(DONE);
}
+void inject_smi(struct kvm_vcpu *vcpu)
+{
+ struct kvm_vcpu_events events;
+
+ vcpu_events_get(vcpu, &events);
+
+ events.smi.pending = 1;
+ events.flags |= KVM_VCPUEVENT_VALID_SMM;
+
+ vcpu_events_set(vcpu, &events);
+}
+
int main(int argc, char *argv[])
{
vm_vaddr_t nested_gva = 0;
+ struct kvm_vcpu *vcpu;
struct kvm_regs regs;
struct kvm_vm *vm;
- struct kvm_run *run;
struct kvm_x86_state *state;
int stage, stage_reported;
- /* Create VM */
- vm = vm_create_default(VCPU_ID, 0, guest_code);
-
- vcpu_set_cpuid(vm, VCPU_ID, kvm_get_supported_cpuid());
+ TEST_REQUIRE(kvm_has_cap(KVM_CAP_X86_SMM));
- run = vcpu_state(vm, VCPU_ID);
+ /* Create VM */
+ vm = vm_create_with_one_vcpu(&vcpu, guest_code);
vm_userspace_mem_region_add(vm, VM_MEM_SRC_ANONYMOUS, SMRAM_GPA,
SMRAM_MEMSLOT, SMRAM_PAGES, 0);
@@ -115,29 +150,26 @@ int main(int argc, char *argv[])
memcpy(addr_gpa2hva(vm, SMRAM_GPA) + 0x8000, smi_handler,
sizeof(smi_handler));
- vcpu_set_msr(vm, VCPU_ID, MSR_IA32_SMBASE, SMRAM_GPA);
+ vcpu_set_msr(vcpu, MSR_IA32_SMBASE, SMRAM_GPA);
- if (kvm_check_cap(KVM_CAP_NESTED_STATE)) {
- if (nested_svm_supported())
+ if (kvm_has_cap(KVM_CAP_NESTED_STATE)) {
+ if (kvm_cpu_has(X86_FEATURE_SVM))
vcpu_alloc_svm(vm, &nested_gva);
- else if (nested_vmx_supported())
+ else if (kvm_cpu_has(X86_FEATURE_VMX))
vcpu_alloc_vmx(vm, &nested_gva);
}
if (!nested_gva)
pr_info("will skip SMM test with VMX enabled\n");
- vcpu_args_set(vm, VCPU_ID, 1, nested_gva);
+ vcpu_args_set(vcpu, 1, nested_gva);
for (stage = 1;; stage++) {
- _vcpu_run(vm, VCPU_ID);
- TEST_ASSERT(run->exit_reason == KVM_EXIT_IO,
- "Stage %d: unexpected exit reason: %u (%s),\n",
- stage, run->exit_reason,
- exit_reason_str(run->exit_reason));
+ vcpu_run(vcpu);
+ TEST_ASSERT_KVM_EXIT_REASON(vcpu, KVM_EXIT_IO);
memset(&regs, 0, sizeof(regs));
- vcpu_regs_get(vm, VCPU_ID, &regs);
+ vcpu_regs_get(vcpu, &regs);
stage_reported = regs.rax & 0xff;
@@ -149,14 +181,28 @@ int main(int argc, char *argv[])
"Unexpected stage: #%x, got %x",
stage, stage_reported);
- state = vcpu_save_state(vm, VCPU_ID);
+ /*
+ * Enter SMM during L2 execution and check that we correctly
+ * return from it. Do not perform save/restore while in SMM yet.
+ */
+ if (stage == 8) {
+ inject_smi(vcpu);
+ continue;
+ }
+
+ /*
+ * Perform save/restore while the guest is in SMM triggered
+ * during L2 execution.
+ */
+ if (stage == 10)
+ inject_smi(vcpu);
+
+ state = vcpu_save_state(vcpu);
kvm_vm_release(vm);
- kvm_vm_restart(vm, O_RDWR);
- vm_vcpu_add(vm, VCPU_ID);
- vcpu_set_cpuid(vm, VCPU_ID, kvm_get_supported_cpuid());
- vcpu_load_state(vm, VCPU_ID, state);
- run = vcpu_state(vm, VCPU_ID);
- free(state);
+
+ vcpu = vm_recreate_with_one_vcpu(vm);
+ vcpu_load_state(vcpu, state);
+ kvm_x86_state_cleanup(state);
}
done:
diff --git a/tools/testing/selftests/kvm/x86_64/state_test.c b/tools/testing/selftests/kvm/x86_64/state_test.c
index f6c8b9042f8a..88b58aab7207 100644
--- a/tools/testing/selftests/kvm/x86_64/state_test.c
+++ b/tools/testing/selftests/kvm/x86_64/state_test.c
@@ -20,7 +20,6 @@
#include "vmx.h"
#include "svm_util.h"
-#define VCPU_ID 5
#define L2_GUEST_STACK_SIZE 256
void svm_l2_guest_code(void)
@@ -140,10 +139,87 @@ static void vmx_l1_guest_code(struct vmx_pages *vmx_pages)
static void __attribute__((__flatten__)) guest_code(void *arg)
{
GUEST_SYNC(1);
+
+ if (this_cpu_has(X86_FEATURE_XSAVE)) {
+ uint64_t supported_xcr0 = this_cpu_supported_xcr0();
+ uint8_t buffer[4096];
+
+ memset(buffer, 0xcc, sizeof(buffer));
+
+ set_cr4(get_cr4() | X86_CR4_OSXSAVE);
+ GUEST_ASSERT(this_cpu_has(X86_FEATURE_OSXSAVE));
+
+ xsetbv(0, xgetbv(0) | supported_xcr0);
+
+ /*
+ * Modify state for all supported xfeatures to take them out of
+ * their "init" state, i.e. to make them show up in XSTATE_BV.
+ *
+ * Note off-by-default features, e.g. AMX, are out of scope for
+ * this particular testcase as they have a different ABI.
+ */
+ GUEST_ASSERT(supported_xcr0 & XFEATURE_MASK_FP);
+ asm volatile ("fincstp");
+
+ GUEST_ASSERT(supported_xcr0 & XFEATURE_MASK_SSE);
+ asm volatile ("vmovdqu %0, %%xmm0" :: "m" (buffer));
+
+ if (supported_xcr0 & XFEATURE_MASK_YMM)
+ asm volatile ("vmovdqu %0, %%ymm0" :: "m" (buffer));
+
+ if (supported_xcr0 & XFEATURE_MASK_AVX512) {
+ asm volatile ("kmovq %0, %%k1" :: "r" (-1ull));
+ asm volatile ("vmovupd %0, %%zmm0" :: "m" (buffer));
+ asm volatile ("vmovupd %0, %%zmm16" :: "m" (buffer));
+ }
+
+ if (this_cpu_has(X86_FEATURE_MPX)) {
+ uint64_t bounds[2] = { 10, 0xffffffffull };
+ uint64_t output[2] = { };
+
+ GUEST_ASSERT(supported_xcr0 & XFEATURE_MASK_BNDREGS);
+ GUEST_ASSERT(supported_xcr0 & XFEATURE_MASK_BNDCSR);
+
+ /*
+ * Don't bother trying to get BNDCSR into the INUSE
+ * state. MSR_IA32_BNDCFGS doesn't count as it isn't
+ * managed via XSAVE/XRSTOR, and BNDCFGU can only be
+ * modified by XRSTOR. Stuffing XSTATE_BV in the host
+ * is simpler than doing XRSTOR here in the guest.
+ *
+ * However, temporarily enable MPX in BNDCFGS so that
+ * BNDMOV actually loads BND1. If MPX isn't *fully*
+ * enabled, all MPX instructions are treated as NOPs.
+ *
+ * Hand encode "bndmov (%rax),%bnd1" as support for MPX
+ * mnemonics/registers has been removed from gcc and
+ * clang (and was never fully supported by clang).
+ */
+ wrmsr(MSR_IA32_BNDCFGS, BIT_ULL(0));
+ asm volatile (".byte 0x66,0x0f,0x1a,0x08" :: "a" (bounds));
+ /*
+ * Hand encode "bndmov %bnd1, (%rax)" to sanity check
+ * that BND1 actually got loaded.
+ */
+ asm volatile (".byte 0x66,0x0f,0x1b,0x08" :: "a" (output));
+ wrmsr(MSR_IA32_BNDCFGS, 0);
+
+ GUEST_ASSERT_EQ(bounds[0], output[0]);
+ GUEST_ASSERT_EQ(bounds[1], output[1]);
+ }
+ if (this_cpu_has(X86_FEATURE_PKU)) {
+ GUEST_ASSERT(supported_xcr0 & XFEATURE_MASK_PKRU);
+ set_cr4(get_cr4() | X86_CR4_PKE);
+ GUEST_ASSERT(this_cpu_has(X86_FEATURE_OSPKE));
+
+ wrpkru(-1u);
+ }
+ }
+
GUEST_SYNC(2);
if (arg) {
- if (cpu_has_svm())
+ if (this_cpu_has(X86_FEATURE_SVM))
svm_l1_guest_code(arg);
else
vmx_l1_guest_code(arg);
@@ -154,45 +230,40 @@ static void __attribute__((__flatten__)) guest_code(void *arg)
int main(int argc, char *argv[])
{
+ uint64_t *xstate_bv, saved_xstate_bv;
vm_vaddr_t nested_gva = 0;
-
+ struct kvm_cpuid2 empty_cpuid = {};
struct kvm_regs regs1, regs2;
+ struct kvm_vcpu *vcpu, *vcpuN;
struct kvm_vm *vm;
- struct kvm_run *run;
struct kvm_x86_state *state;
struct ucall uc;
int stage;
/* Create VM */
- vm = vm_create_default(VCPU_ID, 0, guest_code);
- vcpu_set_cpuid(vm, VCPU_ID, kvm_get_supported_cpuid());
- run = vcpu_state(vm, VCPU_ID);
+ vm = vm_create_with_one_vcpu(&vcpu, guest_code);
- vcpu_regs_get(vm, VCPU_ID, &regs1);
+ vcpu_regs_get(vcpu, &regs1);
- if (kvm_check_cap(KVM_CAP_NESTED_STATE)) {
- if (nested_svm_supported())
+ if (kvm_has_cap(KVM_CAP_NESTED_STATE)) {
+ if (kvm_cpu_has(X86_FEATURE_SVM))
vcpu_alloc_svm(vm, &nested_gva);
- else if (nested_vmx_supported())
+ else if (kvm_cpu_has(X86_FEATURE_VMX))
vcpu_alloc_vmx(vm, &nested_gva);
}
if (!nested_gva)
pr_info("will skip nested state checks\n");
- vcpu_args_set(vm, VCPU_ID, 1, nested_gva);
+ vcpu_args_set(vcpu, 1, nested_gva);
for (stage = 1;; stage++) {
- _vcpu_run(vm, VCPU_ID);
- TEST_ASSERT(run->exit_reason == KVM_EXIT_IO,
- "Stage %d: unexpected exit reason: %u (%s),\n",
- stage, run->exit_reason,
- exit_reason_str(run->exit_reason));
+ vcpu_run(vcpu);
+ TEST_ASSERT_KVM_EXIT_REASON(vcpu, KVM_EXIT_IO);
- switch (get_ucall(vm, VCPU_ID, &uc)) {
+ switch (get_ucall(vcpu, &uc)) {
case UCALL_ABORT:
- TEST_FAIL("%s at %s:%ld", (const char *)uc.args[0],
- __FILE__, uc.args[1]);
+ REPORT_GUEST_ASSERT(uc);
/* NOT REACHED */
case UCALL_SYNC:
break;
@@ -207,22 +278,47 @@ int main(int argc, char *argv[])
uc.args[1] == stage, "Stage %d: Unexpected register values vmexit, got %lx",
stage, (ulong)uc.args[1]);
- state = vcpu_save_state(vm, VCPU_ID);
+ state = vcpu_save_state(vcpu);
memset(&regs1, 0, sizeof(regs1));
- vcpu_regs_get(vm, VCPU_ID, &regs1);
+ vcpu_regs_get(vcpu, &regs1);
kvm_vm_release(vm);
/* Restore state in a new VM. */
- kvm_vm_restart(vm, O_RDWR);
- vm_vcpu_add(vm, VCPU_ID);
- vcpu_set_cpuid(vm, VCPU_ID, kvm_get_supported_cpuid());
- vcpu_load_state(vm, VCPU_ID, state);
- run = vcpu_state(vm, VCPU_ID);
- free(state);
+ vcpu = vm_recreate_with_one_vcpu(vm);
+ vcpu_load_state(vcpu, state);
+
+ /*
+ * Restore XSAVE state in a dummy vCPU, first without doing
+ * KVM_SET_CPUID2, and then with an empty guest CPUID. Except
+ * for off-by-default xfeatures, e.g. AMX, KVM is supposed to
+ * allow KVM_SET_XSAVE regardless of guest CPUID. Manually
+ * load only XSAVE state, MSRs in particular have a much more
+ * convoluted ABI.
+ *
+ * Load two versions of XSAVE state: one with the actual guest
+ * XSAVE state, and one with all supported features forced "on"
+ * in xstate_bv, e.g. to ensure that KVM allows loading all
+ * supported features, even if something goes awry in saving
+ * the original snapshot.
+ */
+ xstate_bv = (void *)&((uint8_t *)state->xsave->region)[512];
+ saved_xstate_bv = *xstate_bv;
+
+ vcpuN = __vm_vcpu_add(vm, vcpu->id + 1);
+ vcpu_xsave_set(vcpuN, state->xsave);
+ *xstate_bv = kvm_cpu_supported_xcr0();
+ vcpu_xsave_set(vcpuN, state->xsave);
+
+ vcpu_init_cpuid(vcpuN, &empty_cpuid);
+ vcpu_xsave_set(vcpuN, state->xsave);
+ *xstate_bv = saved_xstate_bv;
+ vcpu_xsave_set(vcpuN, state->xsave);
+
+ kvm_x86_state_cleanup(state);
memset(&regs2, 0, sizeof(regs2));
- vcpu_regs_get(vm, VCPU_ID, &regs2);
+ vcpu_regs_get(vcpu, &regs2);
TEST_ASSERT(!memcmp(&regs1, &regs2, sizeof(regs2)),
"Unexpected register values after vcpu_load_state; rdi: %lx rsi: %lx",
(ulong) regs2.rdi, (ulong) regs2.rsi);
diff --git a/tools/testing/selftests/kvm/x86_64/svm_int_ctl_test.c b/tools/testing/selftests/kvm/x86_64/svm_int_ctl_test.c
new file mode 100644
index 000000000000..32bef39bec21
--- /dev/null
+++ b/tools/testing/selftests/kvm/x86_64/svm_int_ctl_test.c
@@ -0,0 +1,121 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * svm_int_ctl_test
+ *
+ * Copyright (C) 2021, Red Hat, Inc.
+ *
+ * Nested SVM testing: test simultaneous use of V_IRQ from L1 and L0.
+ */
+
+#include "test_util.h"
+#include "kvm_util.h"
+#include "processor.h"
+#include "svm_util.h"
+#include "apic.h"
+
+bool vintr_irq_called;
+bool intr_irq_called;
+
+#define VINTR_IRQ_NUMBER 0x20
+#define INTR_IRQ_NUMBER 0x30
+
+static void vintr_irq_handler(struct ex_regs *regs)
+{
+ vintr_irq_called = true;
+}
+
+static void intr_irq_handler(struct ex_regs *regs)
+{
+ x2apic_write_reg(APIC_EOI, 0x00);
+ intr_irq_called = true;
+}
+
+static void l2_guest_code(struct svm_test_data *svm)
+{
+ /* This code raises interrupt INTR_IRQ_NUMBER in the L1's LAPIC,
+ * and since L1 didn't enable virtual interrupt masking,
+ * L2 should receive it and not L1.
+ *
+ * L2 also has virtual interrupt 'VINTR_IRQ_NUMBER' pending in V_IRQ
+ * so it should also receive it after the following 'sti'.
+ */
+ x2apic_write_reg(APIC_ICR,
+ APIC_DEST_SELF | APIC_INT_ASSERT | INTR_IRQ_NUMBER);
+
+ __asm__ __volatile__(
+ "sti\n"
+ "nop\n"
+ );
+
+ GUEST_ASSERT(vintr_irq_called);
+ GUEST_ASSERT(intr_irq_called);
+
+ __asm__ __volatile__(
+ "vmcall\n"
+ );
+}
+
+static void l1_guest_code(struct svm_test_data *svm)
+{
+ #define L2_GUEST_STACK_SIZE 64
+ unsigned long l2_guest_stack[L2_GUEST_STACK_SIZE];
+ struct vmcb *vmcb = svm->vmcb;
+
+ x2apic_enable();
+
+ /* Prepare for L2 execution. */
+ generic_svm_setup(svm, l2_guest_code,
+ &l2_guest_stack[L2_GUEST_STACK_SIZE]);
+
+ /* No virtual interrupt masking */
+ vmcb->control.int_ctl &= ~V_INTR_MASKING_MASK;
+
+ /* No intercepts for real and virtual interrupts */
+ vmcb->control.intercept &= ~(BIT(INTERCEPT_INTR) | BIT(INTERCEPT_VINTR));
+
+ /* Make a virtual interrupt VINTR_IRQ_NUMBER pending */
+ vmcb->control.int_ctl |= V_IRQ_MASK | (0x1 << V_INTR_PRIO_SHIFT);
+ vmcb->control.int_vector = VINTR_IRQ_NUMBER;
+
+ run_guest(vmcb, svm->vmcb_gpa);
+ GUEST_ASSERT(vmcb->control.exit_code == SVM_EXIT_VMMCALL);
+ GUEST_DONE();
+}
+
+int main(int argc, char *argv[])
+{
+ struct kvm_vcpu *vcpu;
+ vm_vaddr_t svm_gva;
+ struct kvm_vm *vm;
+ struct ucall uc;
+
+ TEST_REQUIRE(kvm_cpu_has(X86_FEATURE_SVM));
+
+ vm = vm_create_with_one_vcpu(&vcpu, l1_guest_code);
+
+ vm_init_descriptor_tables(vm);
+ vcpu_init_descriptor_tables(vcpu);
+
+ vm_install_exception_handler(vm, VINTR_IRQ_NUMBER, vintr_irq_handler);
+ vm_install_exception_handler(vm, INTR_IRQ_NUMBER, intr_irq_handler);
+
+ vcpu_alloc_svm(vm, &svm_gva);
+ vcpu_args_set(vcpu, 1, svm_gva);
+
+ vcpu_run(vcpu);
+ TEST_ASSERT_KVM_EXIT_REASON(vcpu, KVM_EXIT_IO);
+
+ switch (get_ucall(vcpu, &uc)) {
+ case UCALL_ABORT:
+ REPORT_GUEST_ASSERT(uc);
+ break;
+ /* NOT REACHED */
+ case UCALL_DONE:
+ goto done;
+ default:
+ TEST_FAIL("Unknown ucall 0x%lx.", uc.cmd);
+ }
+done:
+ kvm_vm_free(vm);
+ return 0;
+}
diff --git a/tools/testing/selftests/kvm/x86_64/svm_nested_shutdown_test.c b/tools/testing/selftests/kvm/x86_64/svm_nested_shutdown_test.c
new file mode 100644
index 000000000000..d6fcdcc3af31
--- /dev/null
+++ b/tools/testing/selftests/kvm/x86_64/svm_nested_shutdown_test.c
@@ -0,0 +1,62 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * svm_nested_shutdown_test
+ *
+ * Copyright (C) 2022, Red Hat, Inc.
+ *
+ * Nested SVM testing: test that unintercepted shutdown in L2 doesn't crash the host
+ */
+
+#include "test_util.h"
+#include "kvm_util.h"
+#include "processor.h"
+#include "svm_util.h"
+
+static void l2_guest_code(struct svm_test_data *svm)
+{
+ __asm__ __volatile__("ud2");
+}
+
+static void l1_guest_code(struct svm_test_data *svm, struct idt_entry *idt)
+{
+ #define L2_GUEST_STACK_SIZE 64
+ unsigned long l2_guest_stack[L2_GUEST_STACK_SIZE];
+ struct vmcb *vmcb = svm->vmcb;
+
+ generic_svm_setup(svm, l2_guest_code,
+ &l2_guest_stack[L2_GUEST_STACK_SIZE]);
+
+ vmcb->control.intercept &= ~(BIT(INTERCEPT_SHUTDOWN));
+
+ idt[6].p = 0; // #UD is intercepted but its injection will cause #NP
+ idt[11].p = 0; // #NP is not intercepted and will cause another
+ // #NP that will be converted to #DF
+ idt[8].p = 0; // #DF will cause #NP which will cause SHUTDOWN
+
+ run_guest(vmcb, svm->vmcb_gpa);
+
+ /* should not reach here */
+ GUEST_ASSERT(0);
+}
+
+int main(int argc, char *argv[])
+{
+ struct kvm_vcpu *vcpu;
+ vm_vaddr_t svm_gva;
+ struct kvm_vm *vm;
+
+ TEST_REQUIRE(kvm_cpu_has(X86_FEATURE_SVM));
+
+ vm = vm_create_with_one_vcpu(&vcpu, l1_guest_code);
+ vm_init_descriptor_tables(vm);
+ vcpu_init_descriptor_tables(vcpu);
+
+ vcpu_alloc_svm(vm, &svm_gva);
+
+ vcpu_args_set(vcpu, 2, svm_gva, vm->idt);
+
+ vcpu_run(vcpu);
+ TEST_ASSERT_KVM_EXIT_REASON(vcpu, KVM_EXIT_SHUTDOWN);
+
+ kvm_vm_free(vm);
+}
diff --git a/tools/testing/selftests/kvm/x86_64/svm_nested_soft_inject_test.c b/tools/testing/selftests/kvm/x86_64/svm_nested_soft_inject_test.c
new file mode 100644
index 000000000000..0c7ce3d4e83a
--- /dev/null
+++ b/tools/testing/selftests/kvm/x86_64/svm_nested_soft_inject_test.c
@@ -0,0 +1,213 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Copyright (C) 2022 Oracle and/or its affiliates.
+ *
+ * Based on:
+ * svm_int_ctl_test
+ *
+ * Copyright (C) 2021, Red Hat, Inc.
+ *
+ */
+#include <stdatomic.h>
+#include <stdio.h>
+#include <unistd.h>
+#include "apic.h"
+#include "kvm_util.h"
+#include "processor.h"
+#include "svm_util.h"
+#include "test_util.h"
+
+#define INT_NR 0x20
+
+static_assert(ATOMIC_INT_LOCK_FREE == 2, "atomic int is not lockless");
+
+static unsigned int bp_fired;
+static void guest_bp_handler(struct ex_regs *regs)
+{
+ bp_fired++;
+}
+
+static unsigned int int_fired;
+static void l2_guest_code_int(void);
+
+static void guest_int_handler(struct ex_regs *regs)
+{
+ int_fired++;
+ GUEST_ASSERT_EQ(regs->rip, (unsigned long)l2_guest_code_int);
+}
+
+static void l2_guest_code_int(void)
+{
+ GUEST_ASSERT_EQ(int_fired, 1);
+
+ /*
+ * Same as the vmmcall() function, but with a ud2 sneaked after the
+ * vmmcall. The caller injects an exception with the return address
+ * increased by 2, so the "pop rbp" must be after the ud2 and we cannot
+ * use vmmcall() directly.
+ */
+ __asm__ __volatile__("push %%rbp; vmmcall; ud2; pop %%rbp"
+ : : "a"(0xdeadbeef), "c"(0xbeefdead)
+ : "rbx", "rdx", "rsi", "rdi", "r8", "r9",
+ "r10", "r11", "r12", "r13", "r14", "r15");
+
+ GUEST_ASSERT_EQ(bp_fired, 1);
+ hlt();
+}
+
+static atomic_int nmi_stage;
+#define nmi_stage_get() atomic_load_explicit(&nmi_stage, memory_order_acquire)
+#define nmi_stage_inc() atomic_fetch_add_explicit(&nmi_stage, 1, memory_order_acq_rel)
+static void guest_nmi_handler(struct ex_regs *regs)
+{
+ nmi_stage_inc();
+
+ if (nmi_stage_get() == 1) {
+ vmmcall();
+ GUEST_FAIL("Unexpected resume after VMMCALL");
+ } else {
+ GUEST_ASSERT_EQ(nmi_stage_get(), 3);
+ GUEST_DONE();
+ }
+}
+
+static void l2_guest_code_nmi(void)
+{
+ ud2();
+}
+
+static void l1_guest_code(struct svm_test_data *svm, uint64_t is_nmi, uint64_t idt_alt)
+{
+ #define L2_GUEST_STACK_SIZE 64
+ unsigned long l2_guest_stack[L2_GUEST_STACK_SIZE];
+ struct vmcb *vmcb = svm->vmcb;
+
+ if (is_nmi)
+ x2apic_enable();
+
+ /* Prepare for L2 execution. */
+ generic_svm_setup(svm,
+ is_nmi ? l2_guest_code_nmi : l2_guest_code_int,
+ &l2_guest_stack[L2_GUEST_STACK_SIZE]);
+
+ vmcb->control.intercept_exceptions |= BIT(PF_VECTOR) | BIT(UD_VECTOR);
+ vmcb->control.intercept |= BIT(INTERCEPT_NMI) | BIT(INTERCEPT_HLT);
+
+ if (is_nmi) {
+ vmcb->control.event_inj = SVM_EVTINJ_VALID | SVM_EVTINJ_TYPE_NMI;
+ } else {
+ vmcb->control.event_inj = INT_NR | SVM_EVTINJ_VALID | SVM_EVTINJ_TYPE_SOFT;
+ /* The return address pushed on stack */
+ vmcb->control.next_rip = vmcb->save.rip;
+ }
+
+ run_guest(vmcb, svm->vmcb_gpa);
+ __GUEST_ASSERT(vmcb->control.exit_code == SVM_EXIT_VMMCALL,
+ "Expected VMMCAL #VMEXIT, got '0x%x', info1 = '0x%lx, info2 = '0x%lx'",
+ vmcb->control.exit_code,
+ vmcb->control.exit_info_1, vmcb->control.exit_info_2);
+
+ if (is_nmi) {
+ clgi();
+ x2apic_write_reg(APIC_ICR, APIC_DEST_SELF | APIC_INT_ASSERT | APIC_DM_NMI);
+
+ GUEST_ASSERT_EQ(nmi_stage_get(), 1);
+ nmi_stage_inc();
+
+ stgi();
+ /* self-NMI happens here */
+ while (true)
+ cpu_relax();
+ }
+
+ /* Skip over VMMCALL */
+ vmcb->save.rip += 3;
+
+ /* Switch to alternate IDT to cause intervening NPF again */
+ vmcb->save.idtr.base = idt_alt;
+ vmcb->control.clean = 0; /* &= ~BIT(VMCB_DT) would be enough */
+
+ vmcb->control.event_inj = BP_VECTOR | SVM_EVTINJ_VALID | SVM_EVTINJ_TYPE_EXEPT;
+ /* The return address pushed on stack, skip over UD2 */
+ vmcb->control.next_rip = vmcb->save.rip + 2;
+
+ run_guest(vmcb, svm->vmcb_gpa);
+ __GUEST_ASSERT(vmcb->control.exit_code == SVM_EXIT_HLT,
+ "Expected HLT #VMEXIT, got '0x%x', info1 = '0x%lx, info2 = '0x%lx'",
+ vmcb->control.exit_code,
+ vmcb->control.exit_info_1, vmcb->control.exit_info_2);
+
+ GUEST_DONE();
+}
+
+static void run_test(bool is_nmi)
+{
+ struct kvm_vcpu *vcpu;
+ struct kvm_vm *vm;
+ vm_vaddr_t svm_gva;
+ vm_vaddr_t idt_alt_vm;
+ struct kvm_guest_debug debug;
+
+ pr_info("Running %s test\n", is_nmi ? "NMI" : "soft int");
+
+ vm = vm_create_with_one_vcpu(&vcpu, l1_guest_code);
+
+ vm_init_descriptor_tables(vm);
+ vcpu_init_descriptor_tables(vcpu);
+
+ vm_install_exception_handler(vm, NMI_VECTOR, guest_nmi_handler);
+ vm_install_exception_handler(vm, BP_VECTOR, guest_bp_handler);
+ vm_install_exception_handler(vm, INT_NR, guest_int_handler);
+
+ vcpu_alloc_svm(vm, &svm_gva);
+
+ if (!is_nmi) {
+ void *idt, *idt_alt;
+
+ idt_alt_vm = vm_vaddr_alloc_page(vm);
+ idt_alt = addr_gva2hva(vm, idt_alt_vm);
+ idt = addr_gva2hva(vm, vm->idt);
+ memcpy(idt_alt, idt, getpagesize());
+ } else {
+ idt_alt_vm = 0;
+ }
+ vcpu_args_set(vcpu, 3, svm_gva, (uint64_t)is_nmi, (uint64_t)idt_alt_vm);
+
+ memset(&debug, 0, sizeof(debug));
+ vcpu_guest_debug_set(vcpu, &debug);
+
+ struct ucall uc;
+
+ alarm(2);
+ vcpu_run(vcpu);
+ alarm(0);
+ TEST_ASSERT_KVM_EXIT_REASON(vcpu, KVM_EXIT_IO);
+
+ switch (get_ucall(vcpu, &uc)) {
+ case UCALL_ABORT:
+ REPORT_GUEST_ASSERT(uc);
+ break;
+ /* NOT REACHED */
+ case UCALL_DONE:
+ goto done;
+ default:
+ TEST_FAIL("Unknown ucall 0x%lx.", uc.cmd);
+ }
+done:
+ kvm_vm_free(vm);
+}
+
+int main(int argc, char *argv[])
+{
+ TEST_REQUIRE(kvm_cpu_has(X86_FEATURE_SVM));
+
+ TEST_ASSERT(kvm_cpu_has(X86_FEATURE_NRIPS),
+ "KVM with nSVM is supposed to unconditionally advertise nRIP Save");
+
+ atomic_init(&nmi_stage, 0);
+
+ run_test(false);
+ run_test(true);
+
+ return 0;
+}
diff --git a/tools/testing/selftests/kvm/x86_64/svm_vmcall_test.c b/tools/testing/selftests/kvm/x86_64/svm_vmcall_test.c
index 0e1adb4e3199..8a62cca28cfb 100644
--- a/tools/testing/selftests/kvm/x86_64/svm_vmcall_test.c
+++ b/tools/testing/selftests/kvm/x86_64/svm_vmcall_test.c
@@ -12,10 +12,6 @@
#include "processor.h"
#include "svm_util.h"
-#define VCPU_ID 5
-
-static struct kvm_vm *vm;
-
static void l2_guest_code(struct svm_test_data *svm)
{
__asm__ __volatile__("vmcall");
@@ -39,29 +35,26 @@ static void l1_guest_code(struct svm_test_data *svm)
int main(int argc, char *argv[])
{
+ struct kvm_vcpu *vcpu;
vm_vaddr_t svm_gva;
+ struct kvm_vm *vm;
- nested_svm_check_supported();
+ TEST_REQUIRE(kvm_cpu_has(X86_FEATURE_SVM));
- vm = vm_create_default(VCPU_ID, 0, (void *) l1_guest_code);
- vcpu_set_cpuid(vm, VCPU_ID, kvm_get_supported_cpuid());
+ vm = vm_create_with_one_vcpu(&vcpu, l1_guest_code);
vcpu_alloc_svm(vm, &svm_gva);
- vcpu_args_set(vm, VCPU_ID, 1, svm_gva);
+ vcpu_args_set(vcpu, 1, svm_gva);
for (;;) {
- volatile struct kvm_run *run = vcpu_state(vm, VCPU_ID);
struct ucall uc;
- vcpu_run(vm, VCPU_ID);
- TEST_ASSERT(run->exit_reason == KVM_EXIT_IO,
- "Got exit_reason other than KVM_EXIT_IO: %u (%s)\n",
- run->exit_reason,
- exit_reason_str(run->exit_reason));
+ vcpu_run(vcpu);
+ TEST_ASSERT_KVM_EXIT_REASON(vcpu, KVM_EXIT_IO);
- switch (get_ucall(vm, VCPU_ID, &uc)) {
+ switch (get_ucall(vcpu, &uc)) {
case UCALL_ABORT:
- TEST_FAIL("%s", (const char *)uc.args[0]);
+ REPORT_GUEST_ASSERT(uc);
/* NOT REACHED */
case UCALL_SYNC:
break;
diff --git a/tools/testing/selftests/kvm/x86_64/sync_regs_test.c b/tools/testing/selftests/kvm/x86_64/sync_regs_test.c
index d672f0a473f8..adb5593daf48 100644
--- a/tools/testing/selftests/kvm/x86_64/sync_regs_test.c
+++ b/tools/testing/selftests/kvm/x86_64/sync_regs_test.c
@@ -15,15 +15,19 @@
#include <stdlib.h>
#include <string.h>
#include <sys/ioctl.h>
+#include <pthread.h>
+#include "kvm_test_harness.h"
#include "test_util.h"
#include "kvm_util.h"
#include "processor.h"
-#define VCPU_ID 5
-
#define UCALL_PIO_PORT ((uint16_t)0x1000)
+struct ucall uc_none = {
+ .cmd = UCALL_NONE,
+};
+
/*
* ucall is embedded here to protect against compiler reshuffling registers
* before calling a function. In this test we only need to get KVM_EXIT_IO
@@ -34,15 +38,18 @@ void guest_code(void)
asm volatile("1: in %[port], %%al\n"
"add $0x1, %%rbx\n"
"jmp 1b"
- : : [port] "d" (UCALL_PIO_PORT) : "rax", "rbx");
+ : : [port] "d" (UCALL_PIO_PORT), "D" (&uc_none)
+ : "rax", "rbx");
}
+KVM_ONE_VCPU_TEST_SUITE(sync_regs_test);
+
static void compare_regs(struct kvm_regs *left, struct kvm_regs *right)
{
#define REG_COMPARE(reg) \
TEST_ASSERT(left->reg == right->reg, \
"Register " #reg \
- " values did not match: 0x%llx, 0x%llx\n", \
+ " values did not match: 0x%llx, 0x%llx", \
left->reg, right->reg)
REG_COMPARE(rax);
REG_COMPARE(rbx);
@@ -77,80 +84,205 @@ static void compare_vcpu_events(struct kvm_vcpu_events *left,
#define TEST_SYNC_FIELDS (KVM_SYNC_X86_REGS|KVM_SYNC_X86_SREGS|KVM_SYNC_X86_EVENTS)
#define INVALID_SYNC_FIELD 0x80000000
-int main(int argc, char *argv[])
+/*
+ * Set an exception as pending *and* injected while KVM is processing events.
+ * KVM is supposed to ignore/drop pending exceptions if userspace is also
+ * requesting that an exception be injected.
+ */
+static void *race_events_inj_pen(void *arg)
{
- struct kvm_vm *vm;
- struct kvm_run *run;
- struct kvm_regs regs;
- struct kvm_sregs sregs;
- struct kvm_vcpu_events events;
- int rv, cap;
+ struct kvm_run *run = (struct kvm_run *)arg;
+ struct kvm_vcpu_events *events = &run->s.regs.events;
- /* Tell stdout not to buffer its content */
- setbuf(stdout, NULL);
+ WRITE_ONCE(events->exception.nr, UD_VECTOR);
- cap = kvm_check_cap(KVM_CAP_SYNC_REGS);
- if ((cap & TEST_SYNC_FIELDS) != TEST_SYNC_FIELDS) {
- print_skip("KVM_CAP_SYNC_REGS not supported");
- exit(KSFT_SKIP);
+ for (;;) {
+ WRITE_ONCE(run->kvm_dirty_regs, KVM_SYNC_X86_EVENTS);
+ WRITE_ONCE(events->flags, 0);
+ WRITE_ONCE(events->exception.injected, 1);
+ WRITE_ONCE(events->exception.pending, 1);
+
+ pthread_testcancel();
+ }
+
+ return NULL;
+}
+
+/*
+ * Set an invalid exception vector while KVM is processing events. KVM is
+ * supposed to reject any vector >= 32, as well as NMIs (vector 2).
+ */
+static void *race_events_exc(void *arg)
+{
+ struct kvm_run *run = (struct kvm_run *)arg;
+ struct kvm_vcpu_events *events = &run->s.regs.events;
+
+ for (;;) {
+ WRITE_ONCE(run->kvm_dirty_regs, KVM_SYNC_X86_EVENTS);
+ WRITE_ONCE(events->flags, 0);
+ WRITE_ONCE(events->exception.nr, UD_VECTOR);
+ WRITE_ONCE(events->exception.pending, 1);
+ WRITE_ONCE(events->exception.nr, 255);
+
+ pthread_testcancel();
+ }
+
+ return NULL;
+}
+
+/*
+ * Toggle CR4.PAE while KVM is processing SREGS, EFER.LME=1 with CR4.PAE=0 is
+ * illegal, and KVM's MMU heavily relies on vCPU state being valid.
+ */
+static noinline void *race_sregs_cr4(void *arg)
+{
+ struct kvm_run *run = (struct kvm_run *)arg;
+ __u64 *cr4 = &run->s.regs.sregs.cr4;
+ __u64 pae_enabled = *cr4;
+ __u64 pae_disabled = *cr4 & ~X86_CR4_PAE;
+
+ for (;;) {
+ WRITE_ONCE(run->kvm_dirty_regs, KVM_SYNC_X86_SREGS);
+ WRITE_ONCE(*cr4, pae_enabled);
+ asm volatile(".rept 512\n\t"
+ "nop\n\t"
+ ".endr");
+ WRITE_ONCE(*cr4, pae_disabled);
+
+ pthread_testcancel();
}
- if ((cap & INVALID_SYNC_FIELD) != 0) {
- print_skip("The \"invalid\" field is not invalid");
- exit(KSFT_SKIP);
+
+ return NULL;
+}
+
+static void race_sync_regs(struct kvm_vcpu *vcpu, void *racer)
+{
+ const time_t TIMEOUT = 2; /* seconds, roughly */
+ struct kvm_x86_state *state;
+ struct kvm_translation tr;
+ struct kvm_run *run;
+ pthread_t thread;
+ time_t t;
+
+ run = vcpu->run;
+
+ run->kvm_valid_regs = KVM_SYNC_X86_SREGS;
+ vcpu_run(vcpu);
+ run->kvm_valid_regs = 0;
+
+ /* Save state *before* spawning the thread that mucks with vCPU state. */
+ state = vcpu_save_state(vcpu);
+
+ /*
+ * Selftests run 64-bit guests by default, both EFER.LME and CR4.PAE
+ * should already be set in guest state.
+ */
+ TEST_ASSERT((run->s.regs.sregs.cr4 & X86_CR4_PAE) &&
+ (run->s.regs.sregs.efer & EFER_LME),
+ "vCPU should be in long mode, CR4.PAE=%d, EFER.LME=%d",
+ !!(run->s.regs.sregs.cr4 & X86_CR4_PAE),
+ !!(run->s.regs.sregs.efer & EFER_LME));
+
+ TEST_ASSERT_EQ(pthread_create(&thread, NULL, racer, (void *)run), 0);
+
+ for (t = time(NULL) + TIMEOUT; time(NULL) < t;) {
+ /*
+ * Reload known good state if the vCPU triple faults, e.g. due
+ * to the unhandled #GPs being injected. VMX preserves state
+ * on shutdown, but SVM synthesizes an INIT as the VMCB state
+ * is architecturally undefined on triple fault.
+ */
+ if (!__vcpu_run(vcpu) && run->exit_reason == KVM_EXIT_SHUTDOWN)
+ vcpu_load_state(vcpu, state);
+
+ if (racer == race_sregs_cr4) {
+ tr = (struct kvm_translation) { .linear_address = 0 };
+ __vcpu_ioctl(vcpu, KVM_TRANSLATE, &tr);
+ }
}
- /* Create VM */
- vm = vm_create_default(VCPU_ID, 0, guest_code);
+ TEST_ASSERT_EQ(pthread_cancel(thread), 0);
+ TEST_ASSERT_EQ(pthread_join(thread, NULL), 0);
- run = vcpu_state(vm, VCPU_ID);
+ kvm_x86_state_cleanup(state);
+}
+
+KVM_ONE_VCPU_TEST(sync_regs_test, read_invalid, guest_code)
+{
+ struct kvm_run *run = vcpu->run;
+ int rv;
/* Request reading invalid register set from VCPU. */
run->kvm_valid_regs = INVALID_SYNC_FIELD;
- rv = _vcpu_run(vm, VCPU_ID);
+ rv = _vcpu_run(vcpu);
TEST_ASSERT(rv < 0 && errno == EINVAL,
- "Invalid kvm_valid_regs did not cause expected KVM_RUN error: %d\n",
+ "Invalid kvm_valid_regs did not cause expected KVM_RUN error: %d",
rv);
- vcpu_state(vm, VCPU_ID)->kvm_valid_regs = 0;
+ run->kvm_valid_regs = 0;
run->kvm_valid_regs = INVALID_SYNC_FIELD | TEST_SYNC_FIELDS;
- rv = _vcpu_run(vm, VCPU_ID);
+ rv = _vcpu_run(vcpu);
TEST_ASSERT(rv < 0 && errno == EINVAL,
- "Invalid kvm_valid_regs did not cause expected KVM_RUN error: %d\n",
+ "Invalid kvm_valid_regs did not cause expected KVM_RUN error: %d",
rv);
- vcpu_state(vm, VCPU_ID)->kvm_valid_regs = 0;
+ run->kvm_valid_regs = 0;
+}
+
+KVM_ONE_VCPU_TEST(sync_regs_test, set_invalid, guest_code)
+{
+ struct kvm_run *run = vcpu->run;
+ int rv;
/* Request setting invalid register set into VCPU. */
run->kvm_dirty_regs = INVALID_SYNC_FIELD;
- rv = _vcpu_run(vm, VCPU_ID);
+ rv = _vcpu_run(vcpu);
TEST_ASSERT(rv < 0 && errno == EINVAL,
- "Invalid kvm_dirty_regs did not cause expected KVM_RUN error: %d\n",
+ "Invalid kvm_dirty_regs did not cause expected KVM_RUN error: %d",
rv);
- vcpu_state(vm, VCPU_ID)->kvm_dirty_regs = 0;
+ run->kvm_dirty_regs = 0;
run->kvm_dirty_regs = INVALID_SYNC_FIELD | TEST_SYNC_FIELDS;
- rv = _vcpu_run(vm, VCPU_ID);
+ rv = _vcpu_run(vcpu);
TEST_ASSERT(rv < 0 && errno == EINVAL,
- "Invalid kvm_dirty_regs did not cause expected KVM_RUN error: %d\n",
+ "Invalid kvm_dirty_regs did not cause expected KVM_RUN error: %d",
rv);
- vcpu_state(vm, VCPU_ID)->kvm_dirty_regs = 0;
+ run->kvm_dirty_regs = 0;
+}
+
+KVM_ONE_VCPU_TEST(sync_regs_test, req_and_verify_all_valid, guest_code)
+{
+ struct kvm_run *run = vcpu->run;
+ struct kvm_vcpu_events events;
+ struct kvm_sregs sregs;
+ struct kvm_regs regs;
/* Request and verify all valid register sets. */
/* TODO: BUILD TIME CHECK: TEST_ASSERT(KVM_SYNC_X86_NUM_FIELDS != 3); */
run->kvm_valid_regs = TEST_SYNC_FIELDS;
- rv = _vcpu_run(vm, VCPU_ID);
- TEST_ASSERT(run->exit_reason == KVM_EXIT_IO,
- "Unexpected exit reason: %u (%s),\n",
- run->exit_reason,
- exit_reason_str(run->exit_reason));
+ vcpu_run(vcpu);
+ TEST_ASSERT_KVM_EXIT_REASON(vcpu, KVM_EXIT_IO);
- vcpu_regs_get(vm, VCPU_ID, &regs);
+ vcpu_regs_get(vcpu, &regs);
compare_regs(&regs, &run->s.regs.regs);
- vcpu_sregs_get(vm, VCPU_ID, &sregs);
+ vcpu_sregs_get(vcpu, &sregs);
compare_sregs(&sregs, &run->s.regs.sregs);
- vcpu_events_get(vm, VCPU_ID, &events);
+ vcpu_events_get(vcpu, &events);
compare_vcpu_events(&events, &run->s.regs.events);
+}
+
+KVM_ONE_VCPU_TEST(sync_regs_test, set_and_verify_various, guest_code)
+{
+ struct kvm_run *run = vcpu->run;
+ struct kvm_vcpu_events events;
+ struct kvm_sregs sregs;
+ struct kvm_regs regs;
+
+ /* Run once to get register set */
+ run->kvm_valid_regs = TEST_SYNC_FIELDS;
+ vcpu_run(vcpu);
+ TEST_ASSERT_KVM_EXIT_REASON(vcpu, KVM_EXIT_IO);
/* Set and verify various register values. */
run->s.regs.regs.rbx = 0xBAD1DEA;
@@ -159,11 +291,8 @@ int main(int argc, char *argv[])
run->kvm_valid_regs = TEST_SYNC_FIELDS;
run->kvm_dirty_regs = KVM_SYNC_X86_REGS | KVM_SYNC_X86_SREGS;
- rv = _vcpu_run(vm, VCPU_ID);
- TEST_ASSERT(run->exit_reason == KVM_EXIT_IO,
- "Unexpected exit reason: %u (%s),\n",
- run->exit_reason,
- exit_reason_str(run->exit_reason));
+ vcpu_run(vcpu);
+ TEST_ASSERT_KVM_EXIT_REASON(vcpu, KVM_EXIT_IO);
TEST_ASSERT(run->s.regs.regs.rbx == 0xBAD1DEA + 1,
"rbx sync regs value incorrect 0x%llx.",
run->s.regs.regs.rbx);
@@ -171,14 +300,19 @@ int main(int argc, char *argv[])
"apic_base sync regs value incorrect 0x%llx.",
run->s.regs.sregs.apic_base);
- vcpu_regs_get(vm, VCPU_ID, &regs);
+ vcpu_regs_get(vcpu, &regs);
compare_regs(&regs, &run->s.regs.regs);
- vcpu_sregs_get(vm, VCPU_ID, &sregs);
+ vcpu_sregs_get(vcpu, &sregs);
compare_sregs(&sregs, &run->s.regs.sregs);
- vcpu_events_get(vm, VCPU_ID, &events);
+ vcpu_events_get(vcpu, &events);
compare_vcpu_events(&events, &run->s.regs.events);
+}
+
+KVM_ONE_VCPU_TEST(sync_regs_test, clear_kvm_dirty_regs_bits, guest_code)
+{
+ struct kvm_run *run = vcpu->run;
/* Clear kvm_dirty_regs bits, verify new s.regs values are
* overwritten with existing guest values.
@@ -186,14 +320,22 @@ int main(int argc, char *argv[])
run->kvm_valid_regs = TEST_SYNC_FIELDS;
run->kvm_dirty_regs = 0;
run->s.regs.regs.rbx = 0xDEADBEEF;
- rv = _vcpu_run(vm, VCPU_ID);
- TEST_ASSERT(run->exit_reason == KVM_EXIT_IO,
- "Unexpected exit reason: %u (%s),\n",
- run->exit_reason,
- exit_reason_str(run->exit_reason));
+ vcpu_run(vcpu);
+ TEST_ASSERT_KVM_EXIT_REASON(vcpu, KVM_EXIT_IO);
TEST_ASSERT(run->s.regs.regs.rbx != 0xDEADBEEF,
"rbx sync regs value incorrect 0x%llx.",
run->s.regs.regs.rbx);
+}
+
+KVM_ONE_VCPU_TEST(sync_regs_test, clear_kvm_valid_and_dirty_regs, guest_code)
+{
+ struct kvm_run *run = vcpu->run;
+ struct kvm_regs regs;
+
+ /* Run once to get register set */
+ run->kvm_valid_regs = TEST_SYNC_FIELDS;
+ vcpu_run(vcpu);
+ TEST_ASSERT_KVM_EXIT_REASON(vcpu, KVM_EXIT_IO);
/* Clear kvm_valid_regs bits and kvm_dirty_bits.
* Verify s.regs values are not overwritten with existing guest values
@@ -202,20 +344,29 @@ int main(int argc, char *argv[])
run->kvm_valid_regs = 0;
run->kvm_dirty_regs = 0;
run->s.regs.regs.rbx = 0xAAAA;
+ vcpu_regs_get(vcpu, &regs);
regs.rbx = 0xBAC0;
- vcpu_regs_set(vm, VCPU_ID, &regs);
- rv = _vcpu_run(vm, VCPU_ID);
- TEST_ASSERT(run->exit_reason == KVM_EXIT_IO,
- "Unexpected exit reason: %u (%s),\n",
- run->exit_reason,
- exit_reason_str(run->exit_reason));
+ vcpu_regs_set(vcpu, &regs);
+ vcpu_run(vcpu);
+ TEST_ASSERT_KVM_EXIT_REASON(vcpu, KVM_EXIT_IO);
TEST_ASSERT(run->s.regs.regs.rbx == 0xAAAA,
"rbx sync regs value incorrect 0x%llx.",
run->s.regs.regs.rbx);
- vcpu_regs_get(vm, VCPU_ID, &regs);
+ vcpu_regs_get(vcpu, &regs);
TEST_ASSERT(regs.rbx == 0xBAC0 + 1,
"rbx guest value incorrect 0x%llx.",
regs.rbx);
+}
+
+KVM_ONE_VCPU_TEST(sync_regs_test, clear_kvm_valid_regs_bits, guest_code)
+{
+ struct kvm_run *run = vcpu->run;
+ struct kvm_regs regs;
+
+ /* Run once to get register set */
+ run->kvm_valid_regs = TEST_SYNC_FIELDS;
+ vcpu_run(vcpu);
+ TEST_ASSERT_KVM_EXIT_REASON(vcpu, KVM_EXIT_IO);
/* Clear kvm_valid_regs bits. Verify s.regs values are not overwritten
* with existing guest values but that guest values are overwritten
@@ -224,20 +375,39 @@ int main(int argc, char *argv[])
run->kvm_valid_regs = 0;
run->kvm_dirty_regs = TEST_SYNC_FIELDS;
run->s.regs.regs.rbx = 0xBBBB;
- rv = _vcpu_run(vm, VCPU_ID);
- TEST_ASSERT(run->exit_reason == KVM_EXIT_IO,
- "Unexpected exit reason: %u (%s),\n",
- run->exit_reason,
- exit_reason_str(run->exit_reason));
+ vcpu_run(vcpu);
+ TEST_ASSERT_KVM_EXIT_REASON(vcpu, KVM_EXIT_IO);
TEST_ASSERT(run->s.regs.regs.rbx == 0xBBBB,
"rbx sync regs value incorrect 0x%llx.",
run->s.regs.regs.rbx);
- vcpu_regs_get(vm, VCPU_ID, &regs);
+ vcpu_regs_get(vcpu, &regs);
TEST_ASSERT(regs.rbx == 0xBBBB + 1,
"rbx guest value incorrect 0x%llx.",
regs.rbx);
+}
- kvm_vm_free(vm);
+KVM_ONE_VCPU_TEST(sync_regs_test, race_cr4, guest_code)
+{
+ race_sync_regs(vcpu, race_sregs_cr4);
+}
+
+KVM_ONE_VCPU_TEST(sync_regs_test, race_exc, guest_code)
+{
+ race_sync_regs(vcpu, race_events_exc);
+}
+
+KVM_ONE_VCPU_TEST(sync_regs_test, race_inj_pen, guest_code)
+{
+ race_sync_regs(vcpu, race_events_inj_pen);
+}
+
+int main(int argc, char *argv[])
+{
+ int cap;
+
+ cap = kvm_check_cap(KVM_CAP_SYNC_REGS);
+ TEST_REQUIRE((cap & TEST_SYNC_FIELDS) == TEST_SYNC_FIELDS);
+ TEST_REQUIRE(!(cap & INVALID_SYNC_FIELD));
- return 0;
+ return test_harness_run(argc, argv);
}
diff --git a/tools/testing/selftests/kvm/x86_64/triple_fault_event_test.c b/tools/testing/selftests/kvm/x86_64/triple_fault_event_test.c
new file mode 100644
index 000000000000..56306a19144a
--- /dev/null
+++ b/tools/testing/selftests/kvm/x86_64/triple_fault_event_test.c
@@ -0,0 +1,124 @@
+// SPDX-License-Identifier: GPL-2.0-only
+#include "test_util.h"
+#include "kvm_util.h"
+#include "processor.h"
+#include "vmx.h"
+#include "svm_util.h"
+
+#include <string.h>
+#include <sys/ioctl.h>
+
+#include "kselftest.h"
+
+#define ARBITRARY_IO_PORT 0x2000
+
+/* The virtual machine object. */
+static struct kvm_vm *vm;
+
+static void l2_guest_code(void)
+{
+ asm volatile("inb %%dx, %%al"
+ : : [port] "d" (ARBITRARY_IO_PORT) : "rax");
+}
+
+#define L2_GUEST_STACK_SIZE 64
+unsigned long l2_guest_stack[L2_GUEST_STACK_SIZE];
+
+void l1_guest_code_vmx(struct vmx_pages *vmx)
+{
+
+ GUEST_ASSERT(vmx->vmcs_gpa);
+ GUEST_ASSERT(prepare_for_vmx_operation(vmx));
+ GUEST_ASSERT(load_vmcs(vmx));
+
+ prepare_vmcs(vmx, l2_guest_code,
+ &l2_guest_stack[L2_GUEST_STACK_SIZE]);
+
+ GUEST_ASSERT(!vmlaunch());
+ /* L2 should triple fault after a triple fault event injected. */
+ GUEST_ASSERT(vmreadz(VM_EXIT_REASON) == EXIT_REASON_TRIPLE_FAULT);
+ GUEST_DONE();
+}
+
+void l1_guest_code_svm(struct svm_test_data *svm)
+{
+ struct vmcb *vmcb = svm->vmcb;
+
+ generic_svm_setup(svm, l2_guest_code,
+ &l2_guest_stack[L2_GUEST_STACK_SIZE]);
+
+ /* don't intercept shutdown to test the case of SVM allowing to do so */
+ vmcb->control.intercept &= ~(BIT(INTERCEPT_SHUTDOWN));
+
+ run_guest(vmcb, svm->vmcb_gpa);
+
+ /* should not reach here, L1 should crash */
+ GUEST_ASSERT(0);
+}
+
+int main(void)
+{
+ struct kvm_vcpu *vcpu;
+ struct kvm_run *run;
+ struct kvm_vcpu_events events;
+ struct ucall uc;
+
+ bool has_vmx = kvm_cpu_has(X86_FEATURE_VMX);
+ bool has_svm = kvm_cpu_has(X86_FEATURE_SVM);
+
+ TEST_REQUIRE(has_vmx || has_svm);
+
+ TEST_REQUIRE(kvm_has_cap(KVM_CAP_X86_TRIPLE_FAULT_EVENT));
+
+
+ if (has_vmx) {
+ vm_vaddr_t vmx_pages_gva;
+
+ vm = vm_create_with_one_vcpu(&vcpu, l1_guest_code_vmx);
+ vcpu_alloc_vmx(vm, &vmx_pages_gva);
+ vcpu_args_set(vcpu, 1, vmx_pages_gva);
+ } else {
+ vm_vaddr_t svm_gva;
+
+ vm = vm_create_with_one_vcpu(&vcpu, l1_guest_code_svm);
+ vcpu_alloc_svm(vm, &svm_gva);
+ vcpu_args_set(vcpu, 1, svm_gva);
+ }
+
+ vm_enable_cap(vm, KVM_CAP_X86_TRIPLE_FAULT_EVENT, 1);
+ run = vcpu->run;
+ vcpu_run(vcpu);
+
+ TEST_ASSERT_KVM_EXIT_REASON(vcpu, KVM_EXIT_IO);
+ TEST_ASSERT(run->io.port == ARBITRARY_IO_PORT,
+ "Expected IN from port %d from L2, got port %d",
+ ARBITRARY_IO_PORT, run->io.port);
+ vcpu_events_get(vcpu, &events);
+ events.flags |= KVM_VCPUEVENT_VALID_TRIPLE_FAULT;
+ events.triple_fault.pending = true;
+ vcpu_events_set(vcpu, &events);
+ run->immediate_exit = true;
+ vcpu_run_complete_io(vcpu);
+
+ vcpu_events_get(vcpu, &events);
+ TEST_ASSERT(events.flags & KVM_VCPUEVENT_VALID_TRIPLE_FAULT,
+ "Triple fault event invalid");
+ TEST_ASSERT(events.triple_fault.pending,
+ "No triple fault pending");
+ vcpu_run(vcpu);
+
+
+ if (has_svm) {
+ TEST_ASSERT_KVM_EXIT_REASON(vcpu, KVM_EXIT_SHUTDOWN);
+ } else {
+ switch (get_ucall(vcpu, &uc)) {
+ case UCALL_DONE:
+ break;
+ case UCALL_ABORT:
+ REPORT_GUEST_ASSERT(uc);
+ default:
+ TEST_FAIL("Unexpected ucall: %lu", uc.cmd);
+ }
+ }
+ return 0;
+}
diff --git a/tools/testing/selftests/kvm/x86_64/tsc_msrs_test.c b/tools/testing/selftests/kvm/x86_64/tsc_msrs_test.c
new file mode 100644
index 000000000000..12b0964f4f13
--- /dev/null
+++ b/tools/testing/selftests/kvm/x86_64/tsc_msrs_test.c
@@ -0,0 +1,161 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Tests for MSR_IA32_TSC and MSR_IA32_TSC_ADJUST.
+ *
+ * Copyright (C) 2020, Red Hat, Inc.
+ */
+#include <stdio.h>
+#include <string.h>
+#include "kvm_util.h"
+#include "processor.h"
+
+#define UNITY (1ull << 30)
+#define HOST_ADJUST (UNITY * 64)
+#define GUEST_STEP (UNITY * 4)
+#define ROUND(x) ((x + UNITY / 2) & -UNITY)
+#define rounded_rdmsr(x) ROUND(rdmsr(x))
+#define rounded_host_rdmsr(x) ROUND(vcpu_get_msr(vcpu, x))
+
+static void guest_code(void)
+{
+ u64 val = 0;
+
+ GUEST_ASSERT_EQ(rounded_rdmsr(MSR_IA32_TSC), val);
+ GUEST_ASSERT_EQ(rounded_rdmsr(MSR_IA32_TSC_ADJUST), val);
+
+ /* Guest: writes to MSR_IA32_TSC affect both MSRs. */
+ val = 1ull * GUEST_STEP;
+ wrmsr(MSR_IA32_TSC, val);
+ GUEST_ASSERT_EQ(rounded_rdmsr(MSR_IA32_TSC), val);
+ GUEST_ASSERT_EQ(rounded_rdmsr(MSR_IA32_TSC_ADJUST), val);
+
+ /* Guest: writes to MSR_IA32_TSC_ADJUST affect both MSRs. */
+ GUEST_SYNC(2);
+ val = 2ull * GUEST_STEP;
+ wrmsr(MSR_IA32_TSC_ADJUST, val);
+ GUEST_ASSERT_EQ(rounded_rdmsr(MSR_IA32_TSC), val);
+ GUEST_ASSERT_EQ(rounded_rdmsr(MSR_IA32_TSC_ADJUST), val);
+
+ /* Host: setting the TSC offset. */
+ GUEST_SYNC(3);
+ GUEST_ASSERT_EQ(rounded_rdmsr(MSR_IA32_TSC), HOST_ADJUST + val);
+ GUEST_ASSERT_EQ(rounded_rdmsr(MSR_IA32_TSC_ADJUST), val);
+
+ /*
+ * Guest: writes to MSR_IA32_TSC_ADJUST do not destroy the
+ * host-side offset and affect both MSRs.
+ */
+ GUEST_SYNC(4);
+ val = 3ull * GUEST_STEP;
+ wrmsr(MSR_IA32_TSC_ADJUST, val);
+ GUEST_ASSERT_EQ(rounded_rdmsr(MSR_IA32_TSC), HOST_ADJUST + val);
+ GUEST_ASSERT_EQ(rounded_rdmsr(MSR_IA32_TSC_ADJUST), val);
+
+ /*
+ * Guest: writes to MSR_IA32_TSC affect both MSRs, so the host-side
+ * offset is now visible in MSR_IA32_TSC_ADJUST.
+ */
+ GUEST_SYNC(5);
+ val = 4ull * GUEST_STEP;
+ wrmsr(MSR_IA32_TSC, val);
+ GUEST_ASSERT_EQ(rounded_rdmsr(MSR_IA32_TSC), val);
+ GUEST_ASSERT_EQ(rounded_rdmsr(MSR_IA32_TSC_ADJUST), val - HOST_ADJUST);
+
+ GUEST_DONE();
+}
+
+static void run_vcpu(struct kvm_vcpu *vcpu, int stage)
+{
+ struct ucall uc;
+
+ vcpu_run(vcpu);
+
+ switch (get_ucall(vcpu, &uc)) {
+ case UCALL_SYNC:
+ if (!strcmp((const char *)uc.args[0], "hello") &&
+ uc.args[1] == stage + 1)
+ ksft_test_result_pass("stage %d passed\n", stage + 1);
+ else
+ ksft_test_result_fail(
+ "stage %d: Unexpected register values vmexit, got %lx",
+ stage + 1, (ulong)uc.args[1]);
+ return;
+ case UCALL_DONE:
+ ksft_test_result_pass("stage %d passed\n", stage + 1);
+ return;
+ case UCALL_ABORT:
+ REPORT_GUEST_ASSERT(uc);
+ default:
+ TEST_ASSERT(false, "Unexpected exit: %s",
+ exit_reason_str(vcpu->run->exit_reason));
+ }
+}
+
+int main(void)
+{
+ struct kvm_vcpu *vcpu;
+ struct kvm_vm *vm;
+ uint64_t val;
+
+ ksft_print_header();
+ ksft_set_plan(5);
+
+ vm = vm_create_with_one_vcpu(&vcpu, guest_code);
+
+ val = 0;
+ TEST_ASSERT_EQ(rounded_host_rdmsr(MSR_IA32_TSC), val);
+ TEST_ASSERT_EQ(rounded_host_rdmsr(MSR_IA32_TSC_ADJUST), val);
+
+ /* Guest: writes to MSR_IA32_TSC affect both MSRs. */
+ run_vcpu(vcpu, 1);
+ val = 1ull * GUEST_STEP;
+ TEST_ASSERT_EQ(rounded_host_rdmsr(MSR_IA32_TSC), val);
+ TEST_ASSERT_EQ(rounded_host_rdmsr(MSR_IA32_TSC_ADJUST), val);
+
+ /* Guest: writes to MSR_IA32_TSC_ADJUST affect both MSRs. */
+ run_vcpu(vcpu, 2);
+ val = 2ull * GUEST_STEP;
+ TEST_ASSERT_EQ(rounded_host_rdmsr(MSR_IA32_TSC), val);
+ TEST_ASSERT_EQ(rounded_host_rdmsr(MSR_IA32_TSC_ADJUST), val);
+
+ /*
+ * Host: writes to MSR_IA32_TSC set the host-side offset
+ * and therefore do not change MSR_IA32_TSC_ADJUST.
+ */
+ vcpu_set_msr(vcpu, MSR_IA32_TSC, HOST_ADJUST + val);
+ TEST_ASSERT_EQ(rounded_host_rdmsr(MSR_IA32_TSC), HOST_ADJUST + val);
+ TEST_ASSERT_EQ(rounded_host_rdmsr(MSR_IA32_TSC_ADJUST), val);
+ run_vcpu(vcpu, 3);
+
+ /* Host: writes to MSR_IA32_TSC_ADJUST do not modify the TSC. */
+ vcpu_set_msr(vcpu, MSR_IA32_TSC_ADJUST, UNITY * 123456);
+ TEST_ASSERT_EQ(rounded_host_rdmsr(MSR_IA32_TSC), HOST_ADJUST + val);
+ TEST_ASSERT_EQ(vcpu_get_msr(vcpu, MSR_IA32_TSC_ADJUST), UNITY * 123456);
+
+ /* Restore previous value. */
+ vcpu_set_msr(vcpu, MSR_IA32_TSC_ADJUST, val);
+ TEST_ASSERT_EQ(rounded_host_rdmsr(MSR_IA32_TSC), HOST_ADJUST + val);
+ TEST_ASSERT_EQ(rounded_host_rdmsr(MSR_IA32_TSC_ADJUST), val);
+
+ /*
+ * Guest: writes to MSR_IA32_TSC_ADJUST do not destroy the
+ * host-side offset and affect both MSRs.
+ */
+ run_vcpu(vcpu, 4);
+ val = 3ull * GUEST_STEP;
+ TEST_ASSERT_EQ(rounded_host_rdmsr(MSR_IA32_TSC), HOST_ADJUST + val);
+ TEST_ASSERT_EQ(rounded_host_rdmsr(MSR_IA32_TSC_ADJUST), val);
+
+ /*
+ * Guest: writes to MSR_IA32_TSC affect both MSRs, so the host-side
+ * offset is now visible in MSR_IA32_TSC_ADJUST.
+ */
+ run_vcpu(vcpu, 5);
+ val = 4ull * GUEST_STEP;
+ TEST_ASSERT_EQ(rounded_host_rdmsr(MSR_IA32_TSC), val);
+ TEST_ASSERT_EQ(rounded_host_rdmsr(MSR_IA32_TSC_ADJUST), val - HOST_ADJUST);
+
+ kvm_vm_free(vm);
+
+ ksft_finished(); /* Print results and exit() accordingly */
+}
diff --git a/tools/testing/selftests/kvm/x86_64/tsc_scaling_sync.c b/tools/testing/selftests/kvm/x86_64/tsc_scaling_sync.c
new file mode 100644
index 000000000000..59c7304f805e
--- /dev/null
+++ b/tools/testing/selftests/kvm/x86_64/tsc_scaling_sync.c
@@ -0,0 +1,110 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Copyright © 2021 Amazon.com, Inc. or its affiliates.
+ */
+
+#include "test_util.h"
+#include "kvm_util.h"
+#include "processor.h"
+
+#include <stdint.h>
+#include <time.h>
+#include <sched.h>
+#include <signal.h>
+#include <pthread.h>
+
+#define NR_TEST_VCPUS 20
+
+static struct kvm_vm *vm;
+pthread_spinlock_t create_lock;
+
+#define TEST_TSC_KHZ 2345678UL
+#define TEST_TSC_OFFSET 200000000
+
+uint64_t tsc_sync;
+static void guest_code(void)
+{
+ uint64_t start_tsc, local_tsc, tmp;
+
+ start_tsc = rdtsc();
+ do {
+ tmp = READ_ONCE(tsc_sync);
+ local_tsc = rdtsc();
+ WRITE_ONCE(tsc_sync, local_tsc);
+ if (unlikely(local_tsc < tmp))
+ GUEST_SYNC_ARGS(0, local_tsc, tmp, 0, 0);
+
+ } while (local_tsc - start_tsc < 5000 * TEST_TSC_KHZ);
+
+ GUEST_DONE();
+}
+
+
+static void *run_vcpu(void *_cpu_nr)
+{
+ unsigned long vcpu_id = (unsigned long)_cpu_nr;
+ unsigned long failures = 0;
+ static bool first_cpu_done;
+ struct kvm_vcpu *vcpu;
+
+ /* The kernel is fine, but vm_vcpu_add() needs locking */
+ pthread_spin_lock(&create_lock);
+
+ vcpu = vm_vcpu_add(vm, vcpu_id, guest_code);
+
+ if (!first_cpu_done) {
+ first_cpu_done = true;
+ vcpu_set_msr(vcpu, MSR_IA32_TSC, TEST_TSC_OFFSET);
+ }
+
+ pthread_spin_unlock(&create_lock);
+
+ for (;;) {
+ struct ucall uc;
+
+ vcpu_run(vcpu);
+ TEST_ASSERT_KVM_EXIT_REASON(vcpu, KVM_EXIT_IO);
+
+ switch (get_ucall(vcpu, &uc)) {
+ case UCALL_DONE:
+ goto out;
+
+ case UCALL_SYNC:
+ printf("Guest %d sync %lx %lx %ld\n", vcpu->id,
+ uc.args[2], uc.args[3], uc.args[2] - uc.args[3]);
+ failures++;
+ break;
+
+ default:
+ TEST_FAIL("Unknown ucall %lu", uc.cmd);
+ }
+ }
+ out:
+ return (void *)failures;
+}
+
+int main(int argc, char *argv[])
+{
+ TEST_REQUIRE(kvm_has_cap(KVM_CAP_VM_TSC_CONTROL));
+
+ vm = vm_create(NR_TEST_VCPUS);
+ vm_ioctl(vm, KVM_SET_TSC_KHZ, (void *) TEST_TSC_KHZ);
+
+ pthread_spin_init(&create_lock, PTHREAD_PROCESS_PRIVATE);
+ pthread_t cpu_threads[NR_TEST_VCPUS];
+ unsigned long cpu;
+ for (cpu = 0; cpu < NR_TEST_VCPUS; cpu++)
+ pthread_create(&cpu_threads[cpu], NULL, run_vcpu, (void *)cpu);
+
+ unsigned long failures = 0;
+ for (cpu = 0; cpu < NR_TEST_VCPUS; cpu++) {
+ void *this_cpu_failures;
+ pthread_join(cpu_threads[cpu], &this_cpu_failures);
+ failures += (unsigned long)this_cpu_failures;
+ }
+
+ TEST_ASSERT(!failures, "TSC sync failed");
+ pthread_spin_destroy(&create_lock);
+ kvm_vm_free(vm);
+ return 0;
+}
diff --git a/tools/testing/selftests/kvm/x86_64/ucna_injection_test.c b/tools/testing/selftests/kvm/x86_64/ucna_injection_test.c
new file mode 100644
index 000000000000..dcbb3c29fb8e
--- /dev/null
+++ b/tools/testing/selftests/kvm/x86_64/ucna_injection_test.c
@@ -0,0 +1,302 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * ucna_injection_test
+ *
+ * Copyright (C) 2022, Google LLC.
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2.
+ *
+ * Test that user space can inject UnCorrectable No Action required (UCNA)
+ * memory errors to the guest.
+ *
+ * The test starts one vCPU with the MCG_CMCI_P enabled. It verifies that
+ * proper UCNA errors can be injected to a vCPU with MCG_CMCI_P and
+ * corresponding per-bank control register (MCI_CTL2) bit enabled.
+ * The test also checks that the UCNA errors get recorded in the
+ * Machine Check bank registers no matter the error signal interrupts get
+ * delivered into the guest or not.
+ *
+ */
+
+#define _GNU_SOURCE /* for program_invocation_short_name */
+#include <pthread.h>
+#include <inttypes.h>
+#include <string.h>
+#include <time.h>
+
+#include "kvm_util_base.h"
+#include "kvm_util.h"
+#include "mce.h"
+#include "processor.h"
+#include "test_util.h"
+#include "apic.h"
+
+#define SYNC_FIRST_UCNA 9
+#define SYNC_SECOND_UCNA 10
+#define SYNC_GP 11
+#define FIRST_UCNA_ADDR 0xdeadbeef
+#define SECOND_UCNA_ADDR 0xcafeb0ba
+
+/*
+ * Vector for the CMCI interrupt.
+ * Value is arbitrary. Any value in 0x20-0xFF should work:
+ * https://wiki.osdev.org/Interrupt_Vector_Table
+ */
+#define CMCI_VECTOR 0xa9
+
+#define UCNA_BANK 0x7 // IMC0 bank
+
+#define MCI_CTL2_RESERVED_BIT BIT_ULL(29)
+
+static uint64_t supported_mcg_caps;
+
+/*
+ * Record states about the injected UCNA.
+ * The variables started with the 'i_' prefixes are recorded in interrupt
+ * handler. Variables without the 'i_' prefixes are recorded in guest main
+ * execution thread.
+ */
+static volatile uint64_t i_ucna_rcvd;
+static volatile uint64_t i_ucna_addr;
+static volatile uint64_t ucna_addr;
+static volatile uint64_t ucna_addr2;
+
+struct thread_params {
+ struct kvm_vcpu *vcpu;
+ uint64_t *p_i_ucna_rcvd;
+ uint64_t *p_i_ucna_addr;
+ uint64_t *p_ucna_addr;
+ uint64_t *p_ucna_addr2;
+};
+
+static void verify_apic_base_addr(void)
+{
+ uint64_t msr = rdmsr(MSR_IA32_APICBASE);
+ uint64_t base = GET_APIC_BASE(msr);
+
+ GUEST_ASSERT(base == APIC_DEFAULT_GPA);
+}
+
+static void ucna_injection_guest_code(void)
+{
+ uint64_t ctl2;
+ verify_apic_base_addr();
+ xapic_enable();
+
+ /* Sets up the interrupt vector and enables per-bank CMCI sigaling. */
+ xapic_write_reg(APIC_LVTCMCI, CMCI_VECTOR | APIC_DM_FIXED);
+ ctl2 = rdmsr(MSR_IA32_MCx_CTL2(UCNA_BANK));
+ wrmsr(MSR_IA32_MCx_CTL2(UCNA_BANK), ctl2 | MCI_CTL2_CMCI_EN);
+
+ /* Enables interrupt in guest. */
+ asm volatile("sti");
+
+ /* Let user space inject the first UCNA */
+ GUEST_SYNC(SYNC_FIRST_UCNA);
+
+ ucna_addr = rdmsr(MSR_IA32_MCx_ADDR(UCNA_BANK));
+
+ /* Disables the per-bank CMCI signaling. */
+ ctl2 = rdmsr(MSR_IA32_MCx_CTL2(UCNA_BANK));
+ wrmsr(MSR_IA32_MCx_CTL2(UCNA_BANK), ctl2 & ~MCI_CTL2_CMCI_EN);
+
+ /* Let the user space inject the second UCNA */
+ GUEST_SYNC(SYNC_SECOND_UCNA);
+
+ ucna_addr2 = rdmsr(MSR_IA32_MCx_ADDR(UCNA_BANK));
+ GUEST_DONE();
+}
+
+static void cmci_disabled_guest_code(void)
+{
+ uint64_t ctl2 = rdmsr(MSR_IA32_MCx_CTL2(UCNA_BANK));
+ wrmsr(MSR_IA32_MCx_CTL2(UCNA_BANK), ctl2 | MCI_CTL2_CMCI_EN);
+
+ GUEST_DONE();
+}
+
+static void cmci_enabled_guest_code(void)
+{
+ uint64_t ctl2 = rdmsr(MSR_IA32_MCx_CTL2(UCNA_BANK));
+ wrmsr(MSR_IA32_MCx_CTL2(UCNA_BANK), ctl2 | MCI_CTL2_RESERVED_BIT);
+
+ GUEST_DONE();
+}
+
+static void guest_cmci_handler(struct ex_regs *regs)
+{
+ i_ucna_rcvd++;
+ i_ucna_addr = rdmsr(MSR_IA32_MCx_ADDR(UCNA_BANK));
+ xapic_write_reg(APIC_EOI, 0);
+}
+
+static void guest_gp_handler(struct ex_regs *regs)
+{
+ GUEST_SYNC(SYNC_GP);
+}
+
+static void run_vcpu_expect_gp(struct kvm_vcpu *vcpu)
+{
+ struct ucall uc;
+
+ vcpu_run(vcpu);
+
+ TEST_ASSERT_KVM_EXIT_REASON(vcpu, KVM_EXIT_IO);
+ TEST_ASSERT(get_ucall(vcpu, &uc) == UCALL_SYNC,
+ "Expect UCALL_SYNC");
+ TEST_ASSERT(uc.args[1] == SYNC_GP, "#GP is expected.");
+ printf("vCPU received GP in guest.\n");
+}
+
+static void inject_ucna(struct kvm_vcpu *vcpu, uint64_t addr) {
+ /*
+ * A UCNA error is indicated with VAL=1, UC=1, PCC=0, S=0 and AR=0 in
+ * the IA32_MCi_STATUS register.
+ * MSCOD=1 (BIT[16] - MscodDataRdErr).
+ * MCACOD=0x0090 (Memory controller error format, channel 0)
+ */
+ uint64_t status = MCI_STATUS_VAL | MCI_STATUS_UC | MCI_STATUS_EN |
+ MCI_STATUS_MISCV | MCI_STATUS_ADDRV | 0x10090;
+ struct kvm_x86_mce mce = {};
+ mce.status = status;
+ mce.mcg_status = 0;
+ /*
+ * MCM_ADDR_PHYS indicates the reported address is a physical address.
+ * Lowest 6 bits is the recoverable address LSB, i.e., the injected MCE
+ * is at 4KB granularity.
+ */
+ mce.misc = (MCM_ADDR_PHYS << 6) | 0xc;
+ mce.addr = addr;
+ mce.bank = UCNA_BANK;
+
+ vcpu_ioctl(vcpu, KVM_X86_SET_MCE, &mce);
+}
+
+static void *run_ucna_injection(void *arg)
+{
+ struct thread_params *params = (struct thread_params *)arg;
+ struct ucall uc;
+ int old;
+ int r;
+
+ r = pthread_setcanceltype(PTHREAD_CANCEL_ASYNCHRONOUS, &old);
+ TEST_ASSERT(r == 0,
+ "pthread_setcanceltype failed with errno=%d",
+ r);
+
+ vcpu_run(params->vcpu);
+
+ TEST_ASSERT_KVM_EXIT_REASON(params->vcpu, KVM_EXIT_IO);
+ TEST_ASSERT(get_ucall(params->vcpu, &uc) == UCALL_SYNC,
+ "Expect UCALL_SYNC");
+ TEST_ASSERT(uc.args[1] == SYNC_FIRST_UCNA, "Injecting first UCNA.");
+
+ printf("Injecting first UCNA at %#x.\n", FIRST_UCNA_ADDR);
+
+ inject_ucna(params->vcpu, FIRST_UCNA_ADDR);
+ vcpu_run(params->vcpu);
+
+ TEST_ASSERT_KVM_EXIT_REASON(params->vcpu, KVM_EXIT_IO);
+ TEST_ASSERT(get_ucall(params->vcpu, &uc) == UCALL_SYNC,
+ "Expect UCALL_SYNC");
+ TEST_ASSERT(uc.args[1] == SYNC_SECOND_UCNA, "Injecting second UCNA.");
+
+ printf("Injecting second UCNA at %#x.\n", SECOND_UCNA_ADDR);
+
+ inject_ucna(params->vcpu, SECOND_UCNA_ADDR);
+ vcpu_run(params->vcpu);
+
+ TEST_ASSERT_KVM_EXIT_REASON(params->vcpu, KVM_EXIT_IO);
+ if (get_ucall(params->vcpu, &uc) == UCALL_ABORT) {
+ TEST_ASSERT(false, "vCPU assertion failure: %s.",
+ (const char *)uc.args[0]);
+ }
+
+ return NULL;
+}
+
+static void test_ucna_injection(struct kvm_vcpu *vcpu, struct thread_params *params)
+{
+ struct kvm_vm *vm = vcpu->vm;
+ params->vcpu = vcpu;
+ params->p_i_ucna_rcvd = (uint64_t *)addr_gva2hva(vm, (uint64_t)&i_ucna_rcvd);
+ params->p_i_ucna_addr = (uint64_t *)addr_gva2hva(vm, (uint64_t)&i_ucna_addr);
+ params->p_ucna_addr = (uint64_t *)addr_gva2hva(vm, (uint64_t)&ucna_addr);
+ params->p_ucna_addr2 = (uint64_t *)addr_gva2hva(vm, (uint64_t)&ucna_addr2);
+
+ run_ucna_injection(params);
+
+ TEST_ASSERT(*params->p_i_ucna_rcvd == 1, "Only first UCNA get signaled.");
+ TEST_ASSERT(*params->p_i_ucna_addr == FIRST_UCNA_ADDR,
+ "Only first UCNA reported addr get recorded via interrupt.");
+ TEST_ASSERT(*params->p_ucna_addr == FIRST_UCNA_ADDR,
+ "First injected UCNAs should get exposed via registers.");
+ TEST_ASSERT(*params->p_ucna_addr2 == SECOND_UCNA_ADDR,
+ "Second injected UCNAs should get exposed via registers.");
+
+ printf("Test successful.\n"
+ "UCNA CMCI interrupts received: %ld\n"
+ "Last UCNA address received via CMCI: %lx\n"
+ "First UCNA address in vCPU thread: %lx\n"
+ "Second UCNA address in vCPU thread: %lx\n",
+ *params->p_i_ucna_rcvd, *params->p_i_ucna_addr,
+ *params->p_ucna_addr, *params->p_ucna_addr2);
+}
+
+static void setup_mce_cap(struct kvm_vcpu *vcpu, bool enable_cmci_p)
+{
+ uint64_t mcg_caps = MCG_CTL_P | MCG_SER_P | MCG_LMCE_P | KVM_MAX_MCE_BANKS;
+ if (enable_cmci_p)
+ mcg_caps |= MCG_CMCI_P;
+
+ mcg_caps &= supported_mcg_caps | MCG_CAP_BANKS_MASK;
+ vcpu_ioctl(vcpu, KVM_X86_SETUP_MCE, &mcg_caps);
+}
+
+static struct kvm_vcpu *create_vcpu_with_mce_cap(struct kvm_vm *vm, uint32_t vcpuid,
+ bool enable_cmci_p, void *guest_code)
+{
+ struct kvm_vcpu *vcpu = vm_vcpu_add(vm, vcpuid, guest_code);
+ setup_mce_cap(vcpu, enable_cmci_p);
+ return vcpu;
+}
+
+int main(int argc, char *argv[])
+{
+ struct thread_params params;
+ struct kvm_vm *vm;
+ struct kvm_vcpu *ucna_vcpu;
+ struct kvm_vcpu *cmcidis_vcpu;
+ struct kvm_vcpu *cmci_vcpu;
+
+ kvm_check_cap(KVM_CAP_MCE);
+
+ vm = __vm_create(VM_SHAPE_DEFAULT, 3, 0);
+
+ kvm_ioctl(vm->kvm_fd, KVM_X86_GET_MCE_CAP_SUPPORTED,
+ &supported_mcg_caps);
+
+ if (!(supported_mcg_caps & MCG_CMCI_P)) {
+ print_skip("MCG_CMCI_P is not supported");
+ exit(KSFT_SKIP);
+ }
+
+ ucna_vcpu = create_vcpu_with_mce_cap(vm, 0, true, ucna_injection_guest_code);
+ cmcidis_vcpu = create_vcpu_with_mce_cap(vm, 1, false, cmci_disabled_guest_code);
+ cmci_vcpu = create_vcpu_with_mce_cap(vm, 2, true, cmci_enabled_guest_code);
+
+ vm_init_descriptor_tables(vm);
+ vcpu_init_descriptor_tables(ucna_vcpu);
+ vcpu_init_descriptor_tables(cmcidis_vcpu);
+ vcpu_init_descriptor_tables(cmci_vcpu);
+ vm_install_exception_handler(vm, CMCI_VECTOR, guest_cmci_handler);
+ vm_install_exception_handler(vm, GP_VECTOR, guest_gp_handler);
+
+ virt_pg_map(vm, APIC_DEFAULT_GPA, APIC_DEFAULT_GPA);
+
+ test_ucna_injection(ucna_vcpu, &params);
+ run_vcpu_expect_gp(cmcidis_vcpu);
+ run_vcpu_expect_gp(cmci_vcpu);
+
+ kvm_vm_free(vm);
+}
diff --git a/tools/testing/selftests/kvm/x86_64/userspace_io_test.c b/tools/testing/selftests/kvm/x86_64/userspace_io_test.c
new file mode 100644
index 000000000000..9481cbcf284f
--- /dev/null
+++ b/tools/testing/selftests/kvm/x86_64/userspace_io_test.c
@@ -0,0 +1,103 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <fcntl.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <sys/ioctl.h>
+
+#include "test_util.h"
+
+#include "kvm_util.h"
+#include "processor.h"
+
+static void guest_ins_port80(uint8_t *buffer, unsigned int count)
+{
+ unsigned long end;
+
+ if (count == 2)
+ end = (unsigned long)buffer + 1;
+ else
+ end = (unsigned long)buffer + 8192;
+
+ asm volatile("cld; rep; insb" : "+D"(buffer), "+c"(count) : "d"(0x80) : "memory");
+ GUEST_ASSERT_EQ(count, 0);
+ GUEST_ASSERT_EQ((unsigned long)buffer, end);
+}
+
+static void guest_code(void)
+{
+ uint8_t buffer[8192];
+ int i;
+
+ /*
+ * Special case tests. main() will adjust RCX 2 => 1 and 3 => 8192 to
+ * test that KVM doesn't explode when userspace modifies the "count" on
+ * a userspace I/O exit. KVM isn't required to play nice with the I/O
+ * itself as KVM doesn't support manipulating the count, it just needs
+ * to not explode or overflow a buffer.
+ */
+ guest_ins_port80(buffer, 2);
+ guest_ins_port80(buffer, 3);
+
+ /* Verify KVM fills the buffer correctly when not stuffing RCX. */
+ memset(buffer, 0, sizeof(buffer));
+ guest_ins_port80(buffer, 8192);
+ for (i = 0; i < 8192; i++)
+ __GUEST_ASSERT(buffer[i] == 0xaa,
+ "Expected '0xaa', got '0x%x' at buffer[%u]",
+ buffer[i], i);
+
+ GUEST_DONE();
+}
+
+int main(int argc, char *argv[])
+{
+ struct kvm_vcpu *vcpu;
+ struct kvm_regs regs;
+ struct kvm_run *run;
+ struct kvm_vm *vm;
+ struct ucall uc;
+
+ vm = vm_create_with_one_vcpu(&vcpu, guest_code);
+ run = vcpu->run;
+
+ memset(&regs, 0, sizeof(regs));
+
+ while (1) {
+ vcpu_run(vcpu);
+ TEST_ASSERT_KVM_EXIT_REASON(vcpu, KVM_EXIT_IO);
+
+ if (get_ucall(vcpu, &uc))
+ break;
+
+ TEST_ASSERT(run->io.port == 0x80,
+ "Expected I/O at port 0x80, got port 0x%x", run->io.port);
+
+ /*
+ * Modify the rep string count in RCX: 2 => 1 and 3 => 8192.
+ * Note, this abuses KVM's batching of rep string I/O to avoid
+ * getting stuck in an infinite loop. That behavior isn't in
+ * scope from a testing perspective as it's not ABI in any way,
+ * i.e. it really is abusing internal KVM knowledge.
+ */
+ vcpu_regs_get(vcpu, &regs);
+ if (regs.rcx == 2)
+ regs.rcx = 1;
+ if (regs.rcx == 3)
+ regs.rcx = 8192;
+ memset((void *)run + run->io.data_offset, 0xaa, 4096);
+ vcpu_regs_set(vcpu, &regs);
+ }
+
+ switch (uc.cmd) {
+ case UCALL_DONE:
+ break;
+ case UCALL_ABORT:
+ REPORT_GUEST_ASSERT(uc);
+ default:
+ TEST_FAIL("Unknown ucall %lu", uc.cmd);
+ }
+
+ kvm_vm_free(vm);
+ return 0;
+}
diff --git a/tools/testing/selftests/kvm/x86_64/userspace_msr_exit_test.c b/tools/testing/selftests/kvm/x86_64/userspace_msr_exit_test.c
new file mode 100644
index 000000000000..f4f61a2d2464
--- /dev/null
+++ b/tools/testing/selftests/kvm/x86_64/userspace_msr_exit_test.c
@@ -0,0 +1,780 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright (C) 2020, Google LLC.
+ *
+ * Tests for exiting into userspace on registered MSRs
+ */
+
+#define _GNU_SOURCE /* for program_invocation_short_name */
+#include <sys/ioctl.h>
+
+#include "kvm_test_harness.h"
+#include "test_util.h"
+#include "kvm_util.h"
+#include "vmx.h"
+
+static bool fep_available;
+
+#define MSR_NON_EXISTENT 0x474f4f00
+
+static u64 deny_bits = 0;
+struct kvm_msr_filter filter_allow = {
+ .flags = KVM_MSR_FILTER_DEFAULT_ALLOW,
+ .ranges = {
+ {
+ .flags = KVM_MSR_FILTER_READ |
+ KVM_MSR_FILTER_WRITE,
+ .nmsrs = 1,
+ /* Test an MSR the kernel knows about. */
+ .base = MSR_IA32_XSS,
+ .bitmap = (uint8_t*)&deny_bits,
+ }, {
+ .flags = KVM_MSR_FILTER_READ |
+ KVM_MSR_FILTER_WRITE,
+ .nmsrs = 1,
+ /* Test an MSR the kernel doesn't know about. */
+ .base = MSR_IA32_FLUSH_CMD,
+ .bitmap = (uint8_t*)&deny_bits,
+ }, {
+ .flags = KVM_MSR_FILTER_READ |
+ KVM_MSR_FILTER_WRITE,
+ .nmsrs = 1,
+ /* Test a fabricated MSR that no one knows about. */
+ .base = MSR_NON_EXISTENT,
+ .bitmap = (uint8_t*)&deny_bits,
+ },
+ },
+};
+
+struct kvm_msr_filter filter_fs = {
+ .flags = KVM_MSR_FILTER_DEFAULT_ALLOW,
+ .ranges = {
+ {
+ .flags = KVM_MSR_FILTER_READ,
+ .nmsrs = 1,
+ .base = MSR_FS_BASE,
+ .bitmap = (uint8_t*)&deny_bits,
+ },
+ },
+};
+
+struct kvm_msr_filter filter_gs = {
+ .flags = KVM_MSR_FILTER_DEFAULT_ALLOW,
+ .ranges = {
+ {
+ .flags = KVM_MSR_FILTER_READ,
+ .nmsrs = 1,
+ .base = MSR_GS_BASE,
+ .bitmap = (uint8_t*)&deny_bits,
+ },
+ },
+};
+
+static uint64_t msr_non_existent_data;
+static int guest_exception_count;
+static u32 msr_reads, msr_writes;
+
+static u8 bitmap_00000000[KVM_MSR_FILTER_MAX_BITMAP_SIZE];
+static u8 bitmap_00000000_write[KVM_MSR_FILTER_MAX_BITMAP_SIZE];
+static u8 bitmap_40000000[KVM_MSR_FILTER_MAX_BITMAP_SIZE];
+static u8 bitmap_c0000000[KVM_MSR_FILTER_MAX_BITMAP_SIZE];
+static u8 bitmap_c0000000_read[KVM_MSR_FILTER_MAX_BITMAP_SIZE];
+static u8 bitmap_deadbeef[1] = { 0x1 };
+
+static void deny_msr(uint8_t *bitmap, u32 msr)
+{
+ u32 idx = msr & (KVM_MSR_FILTER_MAX_BITMAP_SIZE - 1);
+
+ bitmap[idx / 8] &= ~(1 << (idx % 8));
+}
+
+static void prepare_bitmaps(void)
+{
+ memset(bitmap_00000000, 0xff, sizeof(bitmap_00000000));
+ memset(bitmap_00000000_write, 0xff, sizeof(bitmap_00000000_write));
+ memset(bitmap_40000000, 0xff, sizeof(bitmap_40000000));
+ memset(bitmap_c0000000, 0xff, sizeof(bitmap_c0000000));
+ memset(bitmap_c0000000_read, 0xff, sizeof(bitmap_c0000000_read));
+
+ deny_msr(bitmap_00000000_write, MSR_IA32_POWER_CTL);
+ deny_msr(bitmap_c0000000_read, MSR_SYSCALL_MASK);
+ deny_msr(bitmap_c0000000_read, MSR_GS_BASE);
+}
+
+struct kvm_msr_filter filter_deny = {
+ .flags = KVM_MSR_FILTER_DEFAULT_DENY,
+ .ranges = {
+ {
+ .flags = KVM_MSR_FILTER_READ,
+ .base = 0x00000000,
+ .nmsrs = KVM_MSR_FILTER_MAX_BITMAP_SIZE * BITS_PER_BYTE,
+ .bitmap = bitmap_00000000,
+ }, {
+ .flags = KVM_MSR_FILTER_WRITE,
+ .base = 0x00000000,
+ .nmsrs = KVM_MSR_FILTER_MAX_BITMAP_SIZE * BITS_PER_BYTE,
+ .bitmap = bitmap_00000000_write,
+ }, {
+ .flags = KVM_MSR_FILTER_READ | KVM_MSR_FILTER_WRITE,
+ .base = 0x40000000,
+ .nmsrs = KVM_MSR_FILTER_MAX_BITMAP_SIZE * BITS_PER_BYTE,
+ .bitmap = bitmap_40000000,
+ }, {
+ .flags = KVM_MSR_FILTER_READ,
+ .base = 0xc0000000,
+ .nmsrs = KVM_MSR_FILTER_MAX_BITMAP_SIZE * BITS_PER_BYTE,
+ .bitmap = bitmap_c0000000_read,
+ }, {
+ .flags = KVM_MSR_FILTER_WRITE,
+ .base = 0xc0000000,
+ .nmsrs = KVM_MSR_FILTER_MAX_BITMAP_SIZE * BITS_PER_BYTE,
+ .bitmap = bitmap_c0000000,
+ }, {
+ .flags = KVM_MSR_FILTER_WRITE | KVM_MSR_FILTER_READ,
+ .base = 0xdeadbeef,
+ .nmsrs = 1,
+ .bitmap = bitmap_deadbeef,
+ },
+ },
+};
+
+struct kvm_msr_filter no_filter_deny = {
+ .flags = KVM_MSR_FILTER_DEFAULT_ALLOW,
+};
+
+/*
+ * Note: Force test_rdmsr() to not be inlined to prevent the labels,
+ * rdmsr_start and rdmsr_end, from being defined multiple times.
+ */
+static noinline uint64_t test_rdmsr(uint32_t msr)
+{
+ uint32_t a, d;
+
+ guest_exception_count = 0;
+
+ __asm__ __volatile__("rdmsr_start: rdmsr; rdmsr_end:" :
+ "=a"(a), "=d"(d) : "c"(msr) : "memory");
+
+ return a | ((uint64_t) d << 32);
+}
+
+/*
+ * Note: Force test_wrmsr() to not be inlined to prevent the labels,
+ * wrmsr_start and wrmsr_end, from being defined multiple times.
+ */
+static noinline void test_wrmsr(uint32_t msr, uint64_t value)
+{
+ uint32_t a = value;
+ uint32_t d = value >> 32;
+
+ guest_exception_count = 0;
+
+ __asm__ __volatile__("wrmsr_start: wrmsr; wrmsr_end:" ::
+ "a"(a), "d"(d), "c"(msr) : "memory");
+}
+
+extern char rdmsr_start, rdmsr_end;
+extern char wrmsr_start, wrmsr_end;
+
+/*
+ * Note: Force test_em_rdmsr() to not be inlined to prevent the labels,
+ * rdmsr_start and rdmsr_end, from being defined multiple times.
+ */
+static noinline uint64_t test_em_rdmsr(uint32_t msr)
+{
+ uint32_t a, d;
+
+ guest_exception_count = 0;
+
+ __asm__ __volatile__(KVM_FEP "em_rdmsr_start: rdmsr; em_rdmsr_end:" :
+ "=a"(a), "=d"(d) : "c"(msr) : "memory");
+
+ return a | ((uint64_t) d << 32);
+}
+
+/*
+ * Note: Force test_em_wrmsr() to not be inlined to prevent the labels,
+ * wrmsr_start and wrmsr_end, from being defined multiple times.
+ */
+static noinline void test_em_wrmsr(uint32_t msr, uint64_t value)
+{
+ uint32_t a = value;
+ uint32_t d = value >> 32;
+
+ guest_exception_count = 0;
+
+ __asm__ __volatile__(KVM_FEP "em_wrmsr_start: wrmsr; em_wrmsr_end:" ::
+ "a"(a), "d"(d), "c"(msr) : "memory");
+}
+
+extern char em_rdmsr_start, em_rdmsr_end;
+extern char em_wrmsr_start, em_wrmsr_end;
+
+static void guest_code_filter_allow(void)
+{
+ uint64_t data;
+
+ /*
+ * Test userspace intercepting rdmsr / wrmsr for MSR_IA32_XSS.
+ *
+ * A GP is thrown if anything other than 0 is written to
+ * MSR_IA32_XSS.
+ */
+ data = test_rdmsr(MSR_IA32_XSS);
+ GUEST_ASSERT(data == 0);
+ GUEST_ASSERT(guest_exception_count == 0);
+
+ test_wrmsr(MSR_IA32_XSS, 0);
+ GUEST_ASSERT(guest_exception_count == 0);
+
+ test_wrmsr(MSR_IA32_XSS, 1);
+ GUEST_ASSERT(guest_exception_count == 1);
+
+ /*
+ * Test userspace intercepting rdmsr / wrmsr for MSR_IA32_FLUSH_CMD.
+ *
+ * A GP is thrown if MSR_IA32_FLUSH_CMD is read
+ * from or if a value other than 1 is written to it.
+ */
+ test_rdmsr(MSR_IA32_FLUSH_CMD);
+ GUEST_ASSERT(guest_exception_count == 1);
+
+ test_wrmsr(MSR_IA32_FLUSH_CMD, 0);
+ GUEST_ASSERT(guest_exception_count == 1);
+
+ test_wrmsr(MSR_IA32_FLUSH_CMD, 1);
+ GUEST_ASSERT(guest_exception_count == 0);
+
+ /*
+ * Test userspace intercepting rdmsr / wrmsr for MSR_NON_EXISTENT.
+ *
+ * Test that a fabricated MSR can pass through the kernel
+ * and be handled in userspace.
+ */
+ test_wrmsr(MSR_NON_EXISTENT, 2);
+ GUEST_ASSERT(guest_exception_count == 0);
+
+ data = test_rdmsr(MSR_NON_EXISTENT);
+ GUEST_ASSERT(data == 2);
+ GUEST_ASSERT(guest_exception_count == 0);
+
+ if (fep_available) {
+ /* Let userspace know we aren't done. */
+ GUEST_SYNC(0);
+
+ /*
+ * Now run the same tests with the instruction emulator.
+ */
+ data = test_em_rdmsr(MSR_IA32_XSS);
+ GUEST_ASSERT(data == 0);
+ GUEST_ASSERT(guest_exception_count == 0);
+ test_em_wrmsr(MSR_IA32_XSS, 0);
+ GUEST_ASSERT(guest_exception_count == 0);
+ test_em_wrmsr(MSR_IA32_XSS, 1);
+ GUEST_ASSERT(guest_exception_count == 1);
+
+ test_em_rdmsr(MSR_IA32_FLUSH_CMD);
+ GUEST_ASSERT(guest_exception_count == 1);
+ test_em_wrmsr(MSR_IA32_FLUSH_CMD, 0);
+ GUEST_ASSERT(guest_exception_count == 1);
+ test_em_wrmsr(MSR_IA32_FLUSH_CMD, 1);
+ GUEST_ASSERT(guest_exception_count == 0);
+
+ test_em_wrmsr(MSR_NON_EXISTENT, 2);
+ GUEST_ASSERT(guest_exception_count == 0);
+ data = test_em_rdmsr(MSR_NON_EXISTENT);
+ GUEST_ASSERT(data == 2);
+ GUEST_ASSERT(guest_exception_count == 0);
+ }
+
+ GUEST_DONE();
+}
+
+static void guest_msr_calls(bool trapped)
+{
+ /* This goes into the in-kernel emulation */
+ wrmsr(MSR_SYSCALL_MASK, 0);
+
+ if (trapped) {
+ /* This goes into user space emulation */
+ GUEST_ASSERT(rdmsr(MSR_SYSCALL_MASK) == MSR_SYSCALL_MASK);
+ GUEST_ASSERT(rdmsr(MSR_GS_BASE) == MSR_GS_BASE);
+ } else {
+ GUEST_ASSERT(rdmsr(MSR_SYSCALL_MASK) != MSR_SYSCALL_MASK);
+ GUEST_ASSERT(rdmsr(MSR_GS_BASE) != MSR_GS_BASE);
+ }
+
+ /* If trapped == true, this goes into user space emulation */
+ wrmsr(MSR_IA32_POWER_CTL, 0x1234);
+
+ /* This goes into the in-kernel emulation */
+ rdmsr(MSR_IA32_POWER_CTL);
+
+ /* Invalid MSR, should always be handled by user space exit */
+ GUEST_ASSERT(rdmsr(0xdeadbeef) == 0xdeadbeef);
+ wrmsr(0xdeadbeef, 0x1234);
+}
+
+static void guest_code_filter_deny(void)
+{
+ guest_msr_calls(true);
+
+ /*
+ * Disable msr filtering, so that the kernel
+ * handles everything in the next round
+ */
+ GUEST_SYNC(0);
+
+ guest_msr_calls(false);
+
+ GUEST_DONE();
+}
+
+static void guest_code_permission_bitmap(void)
+{
+ uint64_t data;
+
+ data = test_rdmsr(MSR_FS_BASE);
+ GUEST_ASSERT(data == MSR_FS_BASE);
+ data = test_rdmsr(MSR_GS_BASE);
+ GUEST_ASSERT(data != MSR_GS_BASE);
+
+ /* Let userspace know to switch the filter */
+ GUEST_SYNC(0);
+
+ data = test_rdmsr(MSR_FS_BASE);
+ GUEST_ASSERT(data != MSR_FS_BASE);
+ data = test_rdmsr(MSR_GS_BASE);
+ GUEST_ASSERT(data == MSR_GS_BASE);
+
+ GUEST_DONE();
+}
+
+static void __guest_gp_handler(struct ex_regs *regs,
+ char *r_start, char *r_end,
+ char *w_start, char *w_end)
+{
+ if (regs->rip == (uintptr_t)r_start) {
+ regs->rip = (uintptr_t)r_end;
+ regs->rax = 0;
+ regs->rdx = 0;
+ } else if (regs->rip == (uintptr_t)w_start) {
+ regs->rip = (uintptr_t)w_end;
+ } else {
+ GUEST_ASSERT(!"RIP is at an unknown location!");
+ }
+
+ ++guest_exception_count;
+}
+
+static void guest_gp_handler(struct ex_regs *regs)
+{
+ __guest_gp_handler(regs, &rdmsr_start, &rdmsr_end,
+ &wrmsr_start, &wrmsr_end);
+}
+
+static void guest_fep_gp_handler(struct ex_regs *regs)
+{
+ __guest_gp_handler(regs, &em_rdmsr_start, &em_rdmsr_end,
+ &em_wrmsr_start, &em_wrmsr_end);
+}
+
+static void check_for_guest_assert(struct kvm_vcpu *vcpu)
+{
+ struct ucall uc;
+
+ if (vcpu->run->exit_reason == KVM_EXIT_IO &&
+ get_ucall(vcpu, &uc) == UCALL_ABORT) {
+ REPORT_GUEST_ASSERT(uc);
+ }
+}
+
+static void process_rdmsr(struct kvm_vcpu *vcpu, uint32_t msr_index)
+{
+ struct kvm_run *run = vcpu->run;
+
+ check_for_guest_assert(vcpu);
+
+ TEST_ASSERT_KVM_EXIT_REASON(vcpu, KVM_EXIT_X86_RDMSR);
+ TEST_ASSERT(run->msr.index == msr_index,
+ "Unexpected msr (0x%04x), expected 0x%04x",
+ run->msr.index, msr_index);
+
+ switch (run->msr.index) {
+ case MSR_IA32_XSS:
+ run->msr.data = 0;
+ break;
+ case MSR_IA32_FLUSH_CMD:
+ run->msr.error = 1;
+ break;
+ case MSR_NON_EXISTENT:
+ run->msr.data = msr_non_existent_data;
+ break;
+ case MSR_FS_BASE:
+ run->msr.data = MSR_FS_BASE;
+ break;
+ case MSR_GS_BASE:
+ run->msr.data = MSR_GS_BASE;
+ break;
+ default:
+ TEST_ASSERT(false, "Unexpected MSR: 0x%04x", run->msr.index);
+ }
+}
+
+static void process_wrmsr(struct kvm_vcpu *vcpu, uint32_t msr_index)
+{
+ struct kvm_run *run = vcpu->run;
+
+ check_for_guest_assert(vcpu);
+
+ TEST_ASSERT_KVM_EXIT_REASON(vcpu, KVM_EXIT_X86_WRMSR);
+ TEST_ASSERT(run->msr.index == msr_index,
+ "Unexpected msr (0x%04x), expected 0x%04x",
+ run->msr.index, msr_index);
+
+ switch (run->msr.index) {
+ case MSR_IA32_XSS:
+ if (run->msr.data != 0)
+ run->msr.error = 1;
+ break;
+ case MSR_IA32_FLUSH_CMD:
+ if (run->msr.data != 1)
+ run->msr.error = 1;
+ break;
+ case MSR_NON_EXISTENT:
+ msr_non_existent_data = run->msr.data;
+ break;
+ default:
+ TEST_ASSERT(false, "Unexpected MSR: 0x%04x", run->msr.index);
+ }
+}
+
+static void process_ucall_done(struct kvm_vcpu *vcpu)
+{
+ struct ucall uc;
+
+ check_for_guest_assert(vcpu);
+
+ TEST_ASSERT_KVM_EXIT_REASON(vcpu, KVM_EXIT_IO);
+
+ TEST_ASSERT(get_ucall(vcpu, &uc) == UCALL_DONE,
+ "Unexpected ucall command: %lu, expected UCALL_DONE (%d)",
+ uc.cmd, UCALL_DONE);
+}
+
+static uint64_t process_ucall(struct kvm_vcpu *vcpu)
+{
+ struct ucall uc = {};
+
+ check_for_guest_assert(vcpu);
+
+ TEST_ASSERT_KVM_EXIT_REASON(vcpu, KVM_EXIT_IO);
+
+ switch (get_ucall(vcpu, &uc)) {
+ case UCALL_SYNC:
+ break;
+ case UCALL_ABORT:
+ check_for_guest_assert(vcpu);
+ break;
+ case UCALL_DONE:
+ process_ucall_done(vcpu);
+ break;
+ default:
+ TEST_ASSERT(false, "Unexpected ucall");
+ }
+
+ return uc.cmd;
+}
+
+static void run_guest_then_process_rdmsr(struct kvm_vcpu *vcpu,
+ uint32_t msr_index)
+{
+ vcpu_run(vcpu);
+ process_rdmsr(vcpu, msr_index);
+}
+
+static void run_guest_then_process_wrmsr(struct kvm_vcpu *vcpu,
+ uint32_t msr_index)
+{
+ vcpu_run(vcpu);
+ process_wrmsr(vcpu, msr_index);
+}
+
+static uint64_t run_guest_then_process_ucall(struct kvm_vcpu *vcpu)
+{
+ vcpu_run(vcpu);
+ return process_ucall(vcpu);
+}
+
+static void run_guest_then_process_ucall_done(struct kvm_vcpu *vcpu)
+{
+ vcpu_run(vcpu);
+ process_ucall_done(vcpu);
+}
+
+KVM_ONE_VCPU_TEST_SUITE(user_msr);
+
+KVM_ONE_VCPU_TEST(user_msr, msr_filter_allow, guest_code_filter_allow)
+{
+ struct kvm_vm *vm = vcpu->vm;
+ uint64_t cmd;
+ int rc;
+
+ sync_global_to_guest(vm, fep_available);
+
+ rc = kvm_check_cap(KVM_CAP_X86_USER_SPACE_MSR);
+ TEST_ASSERT(rc, "KVM_CAP_X86_USER_SPACE_MSR is available");
+ vm_enable_cap(vm, KVM_CAP_X86_USER_SPACE_MSR, KVM_MSR_EXIT_REASON_FILTER);
+
+ rc = kvm_check_cap(KVM_CAP_X86_MSR_FILTER);
+ TEST_ASSERT(rc, "KVM_CAP_X86_MSR_FILTER is available");
+
+ vm_ioctl(vm, KVM_X86_SET_MSR_FILTER, &filter_allow);
+
+ vm_init_descriptor_tables(vm);
+ vcpu_init_descriptor_tables(vcpu);
+
+ vm_install_exception_handler(vm, GP_VECTOR, guest_gp_handler);
+
+ /* Process guest code userspace exits. */
+ run_guest_then_process_rdmsr(vcpu, MSR_IA32_XSS);
+ run_guest_then_process_wrmsr(vcpu, MSR_IA32_XSS);
+ run_guest_then_process_wrmsr(vcpu, MSR_IA32_XSS);
+
+ run_guest_then_process_rdmsr(vcpu, MSR_IA32_FLUSH_CMD);
+ run_guest_then_process_wrmsr(vcpu, MSR_IA32_FLUSH_CMD);
+ run_guest_then_process_wrmsr(vcpu, MSR_IA32_FLUSH_CMD);
+
+ run_guest_then_process_wrmsr(vcpu, MSR_NON_EXISTENT);
+ run_guest_then_process_rdmsr(vcpu, MSR_NON_EXISTENT);
+
+ vcpu_run(vcpu);
+ cmd = process_ucall(vcpu);
+
+ if (fep_available) {
+ TEST_ASSERT_EQ(cmd, UCALL_SYNC);
+ vm_install_exception_handler(vm, GP_VECTOR, guest_fep_gp_handler);
+
+ /* Process emulated rdmsr and wrmsr instructions. */
+ run_guest_then_process_rdmsr(vcpu, MSR_IA32_XSS);
+ run_guest_then_process_wrmsr(vcpu, MSR_IA32_XSS);
+ run_guest_then_process_wrmsr(vcpu, MSR_IA32_XSS);
+
+ run_guest_then_process_rdmsr(vcpu, MSR_IA32_FLUSH_CMD);
+ run_guest_then_process_wrmsr(vcpu, MSR_IA32_FLUSH_CMD);
+ run_guest_then_process_wrmsr(vcpu, MSR_IA32_FLUSH_CMD);
+
+ run_guest_then_process_wrmsr(vcpu, MSR_NON_EXISTENT);
+ run_guest_then_process_rdmsr(vcpu, MSR_NON_EXISTENT);
+
+ /* Confirm the guest completed without issues. */
+ run_guest_then_process_ucall_done(vcpu);
+ } else {
+ TEST_ASSERT_EQ(cmd, UCALL_DONE);
+ printf("To run the instruction emulated tests set the module parameter 'kvm.force_emulation_prefix=1'\n");
+ }
+}
+
+static int handle_ucall(struct kvm_vcpu *vcpu)
+{
+ struct ucall uc;
+
+ switch (get_ucall(vcpu, &uc)) {
+ case UCALL_ABORT:
+ REPORT_GUEST_ASSERT(uc);
+ break;
+ case UCALL_SYNC:
+ vm_ioctl(vcpu->vm, KVM_X86_SET_MSR_FILTER, &no_filter_deny);
+ break;
+ case UCALL_DONE:
+ return 1;
+ default:
+ TEST_FAIL("Unknown ucall %lu", uc.cmd);
+ }
+
+ return 0;
+}
+
+static void handle_rdmsr(struct kvm_run *run)
+{
+ run->msr.data = run->msr.index;
+ msr_reads++;
+
+ if (run->msr.index == MSR_SYSCALL_MASK ||
+ run->msr.index == MSR_GS_BASE) {
+ TEST_ASSERT(run->msr.reason == KVM_MSR_EXIT_REASON_FILTER,
+ "MSR read trap w/o access fault");
+ }
+
+ if (run->msr.index == 0xdeadbeef) {
+ TEST_ASSERT(run->msr.reason == KVM_MSR_EXIT_REASON_UNKNOWN,
+ "MSR deadbeef read trap w/o inval fault");
+ }
+}
+
+static void handle_wrmsr(struct kvm_run *run)
+{
+ /* ignore */
+ msr_writes++;
+
+ if (run->msr.index == MSR_IA32_POWER_CTL) {
+ TEST_ASSERT(run->msr.data == 0x1234,
+ "MSR data for MSR_IA32_POWER_CTL incorrect");
+ TEST_ASSERT(run->msr.reason == KVM_MSR_EXIT_REASON_FILTER,
+ "MSR_IA32_POWER_CTL trap w/o access fault");
+ }
+
+ if (run->msr.index == 0xdeadbeef) {
+ TEST_ASSERT(run->msr.data == 0x1234,
+ "MSR data for deadbeef incorrect");
+ TEST_ASSERT(run->msr.reason == KVM_MSR_EXIT_REASON_UNKNOWN,
+ "deadbeef trap w/o inval fault");
+ }
+}
+
+KVM_ONE_VCPU_TEST(user_msr, msr_filter_deny, guest_code_filter_deny)
+{
+ struct kvm_vm *vm = vcpu->vm;
+ struct kvm_run *run = vcpu->run;
+ int rc;
+
+ rc = kvm_check_cap(KVM_CAP_X86_USER_SPACE_MSR);
+ TEST_ASSERT(rc, "KVM_CAP_X86_USER_SPACE_MSR is available");
+ vm_enable_cap(vm, KVM_CAP_X86_USER_SPACE_MSR, KVM_MSR_EXIT_REASON_INVAL |
+ KVM_MSR_EXIT_REASON_UNKNOWN |
+ KVM_MSR_EXIT_REASON_FILTER);
+
+ rc = kvm_check_cap(KVM_CAP_X86_MSR_FILTER);
+ TEST_ASSERT(rc, "KVM_CAP_X86_MSR_FILTER is available");
+
+ prepare_bitmaps();
+ vm_ioctl(vm, KVM_X86_SET_MSR_FILTER, &filter_deny);
+
+ while (1) {
+ vcpu_run(vcpu);
+
+ switch (run->exit_reason) {
+ case KVM_EXIT_X86_RDMSR:
+ handle_rdmsr(run);
+ break;
+ case KVM_EXIT_X86_WRMSR:
+ handle_wrmsr(run);
+ break;
+ case KVM_EXIT_IO:
+ if (handle_ucall(vcpu))
+ goto done;
+ break;
+ }
+
+ }
+
+done:
+ TEST_ASSERT(msr_reads == 4, "Handled 4 rdmsr in user space");
+ TEST_ASSERT(msr_writes == 3, "Handled 3 wrmsr in user space");
+}
+
+KVM_ONE_VCPU_TEST(user_msr, msr_permission_bitmap, guest_code_permission_bitmap)
+{
+ struct kvm_vm *vm = vcpu->vm;
+ int rc;
+
+ rc = kvm_check_cap(KVM_CAP_X86_USER_SPACE_MSR);
+ TEST_ASSERT(rc, "KVM_CAP_X86_USER_SPACE_MSR is available");
+ vm_enable_cap(vm, KVM_CAP_X86_USER_SPACE_MSR, KVM_MSR_EXIT_REASON_FILTER);
+
+ rc = kvm_check_cap(KVM_CAP_X86_MSR_FILTER);
+ TEST_ASSERT(rc, "KVM_CAP_X86_MSR_FILTER is available");
+
+ vm_ioctl(vm, KVM_X86_SET_MSR_FILTER, &filter_fs);
+ run_guest_then_process_rdmsr(vcpu, MSR_FS_BASE);
+ TEST_ASSERT(run_guest_then_process_ucall(vcpu) == UCALL_SYNC,
+ "Expected ucall state to be UCALL_SYNC.");
+ vm_ioctl(vm, KVM_X86_SET_MSR_FILTER, &filter_gs);
+ run_guest_then_process_rdmsr(vcpu, MSR_GS_BASE);
+ run_guest_then_process_ucall_done(vcpu);
+}
+
+#define test_user_exit_msr_ioctl(vm, cmd, arg, flag, valid_mask) \
+({ \
+ int r = __vm_ioctl(vm, cmd, arg); \
+ \
+ if (flag & valid_mask) \
+ TEST_ASSERT(!r, __KVM_IOCTL_ERROR(#cmd, r)); \
+ else \
+ TEST_ASSERT(r == -1 && errno == EINVAL, \
+ "Wanted EINVAL for %s with flag = 0x%llx, got rc: %i errno: %i (%s)", \
+ #cmd, flag, r, errno, strerror(errno)); \
+})
+
+static void run_user_space_msr_flag_test(struct kvm_vm *vm)
+{
+ struct kvm_enable_cap cap = { .cap = KVM_CAP_X86_USER_SPACE_MSR };
+ int nflags = sizeof(cap.args[0]) * BITS_PER_BYTE;
+ int rc;
+ int i;
+
+ rc = kvm_check_cap(KVM_CAP_X86_USER_SPACE_MSR);
+ TEST_ASSERT(rc, "KVM_CAP_X86_USER_SPACE_MSR is available");
+
+ for (i = 0; i < nflags; i++) {
+ cap.args[0] = BIT_ULL(i);
+ test_user_exit_msr_ioctl(vm, KVM_ENABLE_CAP, &cap,
+ BIT_ULL(i), KVM_MSR_EXIT_REASON_VALID_MASK);
+ }
+}
+
+static void run_msr_filter_flag_test(struct kvm_vm *vm)
+{
+ u64 deny_bits = 0;
+ struct kvm_msr_filter filter = {
+ .flags = KVM_MSR_FILTER_DEFAULT_ALLOW,
+ .ranges = {
+ {
+ .flags = KVM_MSR_FILTER_READ,
+ .nmsrs = 1,
+ .base = 0,
+ .bitmap = (uint8_t *)&deny_bits,
+ },
+ },
+ };
+ int nflags;
+ int rc;
+ int i;
+
+ rc = kvm_check_cap(KVM_CAP_X86_MSR_FILTER);
+ TEST_ASSERT(rc, "KVM_CAP_X86_MSR_FILTER is available");
+
+ nflags = sizeof(filter.flags) * BITS_PER_BYTE;
+ for (i = 0; i < nflags; i++) {
+ filter.flags = BIT_ULL(i);
+ test_user_exit_msr_ioctl(vm, KVM_X86_SET_MSR_FILTER, &filter,
+ BIT_ULL(i), KVM_MSR_FILTER_VALID_MASK);
+ }
+
+ filter.flags = KVM_MSR_FILTER_DEFAULT_ALLOW;
+ nflags = sizeof(filter.ranges[0].flags) * BITS_PER_BYTE;
+ for (i = 0; i < nflags; i++) {
+ filter.ranges[0].flags = BIT_ULL(i);
+ test_user_exit_msr_ioctl(vm, KVM_X86_SET_MSR_FILTER, &filter,
+ BIT_ULL(i), KVM_MSR_FILTER_RANGE_VALID_MASK);
+ }
+}
+
+/* Test that attempts to write to the unused bits in a flag fails. */
+KVM_ONE_VCPU_TEST(user_msr, user_exit_msr_flags, NULL)
+{
+ struct kvm_vm *vm = vcpu->vm;
+
+ /* Test flags for KVM_CAP_X86_USER_SPACE_MSR. */
+ run_user_space_msr_flag_test(vm);
+
+ /* Test flags and range flags for KVM_X86_SET_MSR_FILTER. */
+ run_msr_filter_flag_test(vm);
+}
+
+int main(int argc, char *argv[])
+{
+ fep_available = kvm_is_forced_emulation_enabled();
+
+ return test_harness_run(argc, argv);
+}
diff --git a/tools/testing/selftests/kvm/x86_64/vmx_apic_access_test.c b/tools/testing/selftests/kvm/x86_64/vmx_apic_access_test.c
new file mode 100644
index 000000000000..a81a24761aac
--- /dev/null
+++ b/tools/testing/selftests/kvm/x86_64/vmx_apic_access_test.c
@@ -0,0 +1,124 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * vmx_apic_access_test
+ *
+ * Copyright (C) 2020, Google LLC.
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2.
+ *
+ * The first subtest simply checks to see that an L2 guest can be
+ * launched with a valid APIC-access address that is backed by a
+ * page of L1 physical memory.
+ *
+ * The second subtest sets the APIC-access address to a (valid) L1
+ * physical address that is not backed by memory. KVM can't handle
+ * this situation, so resuming L2 should result in a KVM exit for
+ * internal error (emulation). This is not an architectural
+ * requirement. It is just a shortcoming of KVM. The internal error
+ * is unfortunate, but it's better than what used to happen!
+ */
+
+#include "test_util.h"
+#include "kvm_util.h"
+#include "processor.h"
+#include "vmx.h"
+
+#include <string.h>
+#include <sys/ioctl.h>
+
+#include "kselftest.h"
+
+static void l2_guest_code(void)
+{
+ /* Exit to L1 */
+ __asm__ __volatile__("vmcall");
+}
+
+static void l1_guest_code(struct vmx_pages *vmx_pages, unsigned long high_gpa)
+{
+#define L2_GUEST_STACK_SIZE 64
+ unsigned long l2_guest_stack[L2_GUEST_STACK_SIZE];
+ uint32_t control;
+
+ GUEST_ASSERT(prepare_for_vmx_operation(vmx_pages));
+ GUEST_ASSERT(load_vmcs(vmx_pages));
+
+ /* Prepare the VMCS for L2 execution. */
+ prepare_vmcs(vmx_pages, l2_guest_code,
+ &l2_guest_stack[L2_GUEST_STACK_SIZE]);
+ control = vmreadz(CPU_BASED_VM_EXEC_CONTROL);
+ control |= CPU_BASED_ACTIVATE_SECONDARY_CONTROLS;
+ vmwrite(CPU_BASED_VM_EXEC_CONTROL, control);
+ control = vmreadz(SECONDARY_VM_EXEC_CONTROL);
+ control |= SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES;
+ vmwrite(SECONDARY_VM_EXEC_CONTROL, control);
+ vmwrite(APIC_ACCESS_ADDR, vmx_pages->apic_access_gpa);
+
+ /* Try to launch L2 with the memory-backed APIC-access address. */
+ GUEST_SYNC(vmreadz(APIC_ACCESS_ADDR));
+ GUEST_ASSERT(!vmlaunch());
+ GUEST_ASSERT(vmreadz(VM_EXIT_REASON) == EXIT_REASON_VMCALL);
+
+ vmwrite(APIC_ACCESS_ADDR, high_gpa);
+
+ /* Try to resume L2 with the unbacked APIC-access address. */
+ GUEST_SYNC(vmreadz(APIC_ACCESS_ADDR));
+ GUEST_ASSERT(!vmresume());
+ GUEST_ASSERT(vmreadz(VM_EXIT_REASON) == EXIT_REASON_VMCALL);
+
+ GUEST_DONE();
+}
+
+int main(int argc, char *argv[])
+{
+ unsigned long apic_access_addr = ~0ul;
+ vm_vaddr_t vmx_pages_gva;
+ unsigned long high_gpa;
+ struct vmx_pages *vmx;
+ bool done = false;
+
+ struct kvm_vcpu *vcpu;
+ struct kvm_vm *vm;
+
+ TEST_REQUIRE(kvm_cpu_has(X86_FEATURE_VMX));
+
+ vm = vm_create_with_one_vcpu(&vcpu, l1_guest_code);
+
+ high_gpa = (vm->max_gfn - 1) << vm->page_shift;
+
+ vmx = vcpu_alloc_vmx(vm, &vmx_pages_gva);
+ prepare_virtualize_apic_accesses(vmx, vm);
+ vcpu_args_set(vcpu, 2, vmx_pages_gva, high_gpa);
+
+ while (!done) {
+ volatile struct kvm_run *run = vcpu->run;
+ struct ucall uc;
+
+ vcpu_run(vcpu);
+ if (apic_access_addr == high_gpa) {
+ TEST_ASSERT_KVM_EXIT_REASON(vcpu, KVM_EXIT_INTERNAL_ERROR);
+ TEST_ASSERT(run->internal.suberror ==
+ KVM_INTERNAL_ERROR_EMULATION,
+ "Got internal suberror other than KVM_INTERNAL_ERROR_EMULATION: %u",
+ run->internal.suberror);
+ break;
+ }
+ TEST_ASSERT_KVM_EXIT_REASON(vcpu, KVM_EXIT_IO);
+
+ switch (get_ucall(vcpu, &uc)) {
+ case UCALL_ABORT:
+ REPORT_GUEST_ASSERT(uc);
+ /* NOT REACHED */
+ case UCALL_SYNC:
+ apic_access_addr = uc.args[1];
+ break;
+ case UCALL_DONE:
+ done = true;
+ break;
+ default:
+ TEST_ASSERT(false, "Unknown ucall %lu", uc.cmd);
+ }
+ }
+ kvm_vm_free(vm);
+ return 0;
+}
diff --git a/tools/testing/selftests/kvm/x86_64/vmx_close_while_nested_test.c b/tools/testing/selftests/kvm/x86_64/vmx_close_while_nested_test.c
index fe40ade06a49..dad988351493 100644
--- a/tools/testing/selftests/kvm/x86_64/vmx_close_while_nested_test.c
+++ b/tools/testing/selftests/kvm/x86_64/vmx_close_while_nested_test.c
@@ -18,20 +18,15 @@
#include "kselftest.h"
-#define VCPU_ID 5
-
enum {
PORT_L0_EXIT = 0x2000,
};
-/* The virtual machine object. */
-static struct kvm_vm *vm;
-
static void l2_guest_code(void)
{
/* Exit to L0 */
- asm volatile("inb %%dx, %%al"
- : : [port] "d" (PORT_L0_EXIT) : "rax");
+ asm volatile("inb %%dx, %%al"
+ : : [port] "d" (PORT_L0_EXIT) : "rax");
}
static void l1_guest_code(struct vmx_pages *vmx_pages)
@@ -53,32 +48,30 @@ static void l1_guest_code(struct vmx_pages *vmx_pages)
int main(int argc, char *argv[])
{
vm_vaddr_t vmx_pages_gva;
+ struct kvm_vcpu *vcpu;
+ struct kvm_vm *vm;
- nested_vmx_check_supported();
+ TEST_REQUIRE(kvm_cpu_has(X86_FEATURE_VMX));
- vm = vm_create_default(VCPU_ID, 0, (void *) l1_guest_code);
- vcpu_set_cpuid(vm, VCPU_ID, kvm_get_supported_cpuid());
+ vm = vm_create_with_one_vcpu(&vcpu, l1_guest_code);
/* Allocate VMX pages and shared descriptors (vmx_pages). */
vcpu_alloc_vmx(vm, &vmx_pages_gva);
- vcpu_args_set(vm, VCPU_ID, 1, vmx_pages_gva);
+ vcpu_args_set(vcpu, 1, vmx_pages_gva);
for (;;) {
- volatile struct kvm_run *run = vcpu_state(vm, VCPU_ID);
+ volatile struct kvm_run *run = vcpu->run;
struct ucall uc;
- vcpu_run(vm, VCPU_ID);
- TEST_ASSERT(run->exit_reason == KVM_EXIT_IO,
- "Got exit_reason other than KVM_EXIT_IO: %u (%s)\n",
- run->exit_reason,
- exit_reason_str(run->exit_reason));
+ vcpu_run(vcpu);
+ TEST_ASSERT_KVM_EXIT_REASON(vcpu, KVM_EXIT_IO);
if (run->io.port == PORT_L0_EXIT)
break;
- switch (get_ucall(vm, VCPU_ID, &uc)) {
+ switch (get_ucall(vcpu, &uc)) {
case UCALL_ABORT:
- TEST_FAIL("%s", (const char *)uc.args[0]);
+ REPORT_GUEST_ASSERT(uc);
/* NOT REACHED */
default:
TEST_FAIL("Unknown ucall %lu", uc.cmd);
diff --git a/tools/testing/selftests/kvm/x86_64/vmx_dirty_log_test.c b/tools/testing/selftests/kvm/x86_64/vmx_dirty_log_test.c
index e894a638a155..7f6f5f23fb9b 100644
--- a/tools/testing/selftests/kvm/x86_64/vmx_dirty_log_test.c
+++ b/tools/testing/selftests/kvm/x86_64/vmx_dirty_log_test.c
@@ -17,8 +17,6 @@
#include "processor.h"
#include "vmx.h"
-#define VCPU_ID 1
-
/* The memory slot index to track dirty pages */
#define TEST_MEM_SLOT_INDEX 1
#define TEST_MEM_PAGES 3
@@ -73,19 +71,18 @@ int main(int argc, char *argv[])
unsigned long *bmap;
uint64_t *host_test_mem;
+ struct kvm_vcpu *vcpu;
struct kvm_vm *vm;
- struct kvm_run *run;
struct ucall uc;
bool done = false;
- nested_vmx_check_supported();
+ TEST_REQUIRE(kvm_cpu_has(X86_FEATURE_VMX));
+ TEST_REQUIRE(kvm_cpu_has_ept());
/* Create VM */
- vm = vm_create_default(VCPU_ID, 0, l1_guest_code);
- vcpu_set_cpuid(vm, VCPU_ID, kvm_get_supported_cpuid());
+ vm = vm_create_with_one_vcpu(&vcpu, l1_guest_code);
vmx = vcpu_alloc_vmx(vm, &vmx_pages_gva);
- vcpu_args_set(vm, VCPU_ID, 1, vmx_pages_gva);
- run = vcpu_state(vm, VCPU_ID);
+ vcpu_args_set(vcpu, 1, vmx_pages_gva);
/* Add an extra memory slot for testing dirty logging */
vm_userspace_mem_region_add(vm, VM_MEM_SRC_ANONYMOUS,
@@ -98,7 +95,7 @@ int main(int argc, char *argv[])
* Add an identity map for GVA range [0xc0000000, 0xc0002000). This
* affects both L1 and L2. However...
*/
- virt_map(vm, GUEST_TEST_MEM, GUEST_TEST_MEM, TEST_MEM_PAGES, 0);
+ virt_map(vm, GUEST_TEST_MEM, GUEST_TEST_MEM, TEST_MEM_PAGES);
/*
* ... pages in the L2 GPA range [0xc0001000, 0xc0003000) will map to
@@ -108,25 +105,21 @@ int main(int argc, char *argv[])
* meaning after the last call to virt_map.
*/
prepare_eptp(vmx, vm, 0);
- nested_map_memslot(vmx, vm, 0, 0);
- nested_map(vmx, vm, NESTED_TEST_MEM1, GUEST_TEST_MEM, 4096, 0);
- nested_map(vmx, vm, NESTED_TEST_MEM2, GUEST_TEST_MEM, 4096, 0);
+ nested_map_memslot(vmx, vm, 0);
+ nested_map(vmx, vm, NESTED_TEST_MEM1, GUEST_TEST_MEM, 4096);
+ nested_map(vmx, vm, NESTED_TEST_MEM2, GUEST_TEST_MEM, 4096);
- bmap = bitmap_alloc(TEST_MEM_PAGES);
+ bmap = bitmap_zalloc(TEST_MEM_PAGES);
host_test_mem = addr_gpa2hva(vm, GUEST_TEST_MEM);
while (!done) {
memset(host_test_mem, 0xaa, TEST_MEM_PAGES * 4096);
- _vcpu_run(vm, VCPU_ID);
- TEST_ASSERT(run->exit_reason == KVM_EXIT_IO,
- "Unexpected exit reason: %u (%s),\n",
- run->exit_reason,
- exit_reason_str(run->exit_reason));
+ vcpu_run(vcpu);
+ TEST_ASSERT_KVM_EXIT_REASON(vcpu, KVM_EXIT_IO);
- switch (get_ucall(vm, VCPU_ID, &uc)) {
+ switch (get_ucall(vcpu, &uc)) {
case UCALL_ABORT:
- TEST_FAIL("%s at %s:%ld", (const char *)uc.args[0],
- __FILE__, uc.args[1]);
+ REPORT_GUEST_ASSERT(uc);
/* NOT REACHED */
case UCALL_SYNC:
/*
@@ -135,17 +128,17 @@ int main(int argc, char *argv[])
*/
kvm_vm_get_dirty_log(vm, TEST_MEM_SLOT_INDEX, bmap);
if (uc.args[1]) {
- TEST_ASSERT(test_bit(0, bmap), "Page 0 incorrectly reported clean\n");
- TEST_ASSERT(host_test_mem[0] == 1, "Page 0 not written by guest\n");
+ TEST_ASSERT(test_bit(0, bmap), "Page 0 incorrectly reported clean");
+ TEST_ASSERT(host_test_mem[0] == 1, "Page 0 not written by guest");
} else {
- TEST_ASSERT(!test_bit(0, bmap), "Page 0 incorrectly reported dirty\n");
- TEST_ASSERT(host_test_mem[0] == 0xaaaaaaaaaaaaaaaaULL, "Page 0 written by guest\n");
+ TEST_ASSERT(!test_bit(0, bmap), "Page 0 incorrectly reported dirty");
+ TEST_ASSERT(host_test_mem[0] == 0xaaaaaaaaaaaaaaaaULL, "Page 0 written by guest");
}
- TEST_ASSERT(!test_bit(1, bmap), "Page 1 incorrectly reported dirty\n");
- TEST_ASSERT(host_test_mem[4096 / 8] == 0xaaaaaaaaaaaaaaaaULL, "Page 1 written by guest\n");
- TEST_ASSERT(!test_bit(2, bmap), "Page 2 incorrectly reported dirty\n");
- TEST_ASSERT(host_test_mem[8192 / 8] == 0xaaaaaaaaaaaaaaaaULL, "Page 2 written by guest\n");
+ TEST_ASSERT(!test_bit(1, bmap), "Page 1 incorrectly reported dirty");
+ TEST_ASSERT(host_test_mem[4096 / 8] == 0xaaaaaaaaaaaaaaaaULL, "Page 1 written by guest");
+ TEST_ASSERT(!test_bit(2, bmap), "Page 2 incorrectly reported dirty");
+ TEST_ASSERT(host_test_mem[8192 / 8] == 0xaaaaaaaaaaaaaaaaULL, "Page 2 written by guest");
break;
case UCALL_DONE:
done = true;
diff --git a/tools/testing/selftests/kvm/x86_64/vmx_exception_with_invalid_guest_state.c b/tools/testing/selftests/kvm/x86_64/vmx_exception_with_invalid_guest_state.c
new file mode 100644
index 000000000000..fad3634fd9eb
--- /dev/null
+++ b/tools/testing/selftests/kvm/x86_64/vmx_exception_with_invalid_guest_state.c
@@ -0,0 +1,145 @@
+// SPDX-License-Identifier: GPL-2.0-only
+#include "test_util.h"
+#include "kvm_util.h"
+#include "processor.h"
+
+#include <signal.h>
+#include <string.h>
+#include <sys/ioctl.h>
+#include <sys/time.h>
+
+#include "kselftest.h"
+
+static void guest_ud_handler(struct ex_regs *regs)
+{
+ /* Loop on the ud2 until guest state is made invalid. */
+}
+
+static void guest_code(void)
+{
+ asm volatile("ud2");
+}
+
+static void __run_vcpu_with_invalid_state(struct kvm_vcpu *vcpu)
+{
+ struct kvm_run *run = vcpu->run;
+
+ vcpu_run(vcpu);
+
+ TEST_ASSERT_KVM_EXIT_REASON(vcpu, KVM_EXIT_INTERNAL_ERROR);
+ TEST_ASSERT(run->emulation_failure.suberror == KVM_INTERNAL_ERROR_EMULATION,
+ "Expected emulation failure, got %d",
+ run->emulation_failure.suberror);
+}
+
+static void run_vcpu_with_invalid_state(struct kvm_vcpu *vcpu)
+{
+ /*
+ * Always run twice to verify KVM handles the case where _KVM_ queues
+ * an exception with invalid state and then exits to userspace, i.e.
+ * that KVM doesn't explode if userspace ignores the initial error.
+ */
+ __run_vcpu_with_invalid_state(vcpu);
+ __run_vcpu_with_invalid_state(vcpu);
+}
+
+static void set_timer(void)
+{
+ struct itimerval timer;
+
+ timer.it_value.tv_sec = 0;
+ timer.it_value.tv_usec = 200;
+ timer.it_interval = timer.it_value;
+ TEST_ASSERT_EQ(setitimer(ITIMER_REAL, &timer, NULL), 0);
+}
+
+static void set_or_clear_invalid_guest_state(struct kvm_vcpu *vcpu, bool set)
+{
+ static struct kvm_sregs sregs;
+
+ if (!sregs.cr0)
+ vcpu_sregs_get(vcpu, &sregs);
+ sregs.tr.unusable = !!set;
+ vcpu_sregs_set(vcpu, &sregs);
+}
+
+static void set_invalid_guest_state(struct kvm_vcpu *vcpu)
+{
+ set_or_clear_invalid_guest_state(vcpu, true);
+}
+
+static void clear_invalid_guest_state(struct kvm_vcpu *vcpu)
+{
+ set_or_clear_invalid_guest_state(vcpu, false);
+}
+
+static struct kvm_vcpu *get_set_sigalrm_vcpu(struct kvm_vcpu *__vcpu)
+{
+ static struct kvm_vcpu *vcpu = NULL;
+
+ if (__vcpu)
+ vcpu = __vcpu;
+ return vcpu;
+}
+
+static void sigalrm_handler(int sig)
+{
+ struct kvm_vcpu *vcpu = get_set_sigalrm_vcpu(NULL);
+ struct kvm_vcpu_events events;
+
+ TEST_ASSERT(sig == SIGALRM, "Unexpected signal = %d", sig);
+
+ vcpu_events_get(vcpu, &events);
+
+ /*
+ * If an exception is pending, attempt KVM_RUN with invalid guest,
+ * otherwise rearm the timer and keep doing so until the timer fires
+ * between KVM queueing an exception and re-entering the guest.
+ */
+ if (events.exception.pending) {
+ set_invalid_guest_state(vcpu);
+ run_vcpu_with_invalid_state(vcpu);
+ } else {
+ set_timer();
+ }
+}
+
+int main(int argc, char *argv[])
+{
+ struct kvm_vcpu *vcpu;
+ struct kvm_vm *vm;
+
+ TEST_REQUIRE(host_cpu_is_intel);
+ TEST_REQUIRE(!vm_is_unrestricted_guest(NULL));
+
+ vm = vm_create_with_one_vcpu(&vcpu, guest_code);
+ get_set_sigalrm_vcpu(vcpu);
+
+ vm_init_descriptor_tables(vm);
+ vcpu_init_descriptor_tables(vcpu);
+
+ vm_install_exception_handler(vm, UD_VECTOR, guest_ud_handler);
+
+ /*
+ * Stuff invalid guest state for L2 by making TR unusuable. The next
+ * KVM_RUN should induce a TRIPLE_FAULT in L2 as KVM doesn't support
+ * emulating invalid guest state for L2.
+ */
+ set_invalid_guest_state(vcpu);
+ run_vcpu_with_invalid_state(vcpu);
+
+ /*
+ * Verify KVM also handles the case where userspace gains control while
+ * an exception is pending and stuffs invalid state. Run with valid
+ * guest state and a timer firing every 200us, and attempt to enter the
+ * guest with invalid state when the handler interrupts KVM with an
+ * exception pending.
+ */
+ clear_invalid_guest_state(vcpu);
+ TEST_ASSERT(signal(SIGALRM, sigalrm_handler) != SIG_ERR,
+ "Failed to register SIGALRM handler, errno = %d (%s)",
+ errno, strerror(errno));
+
+ set_timer();
+ run_vcpu_with_invalid_state(vcpu);
+}
diff --git a/tools/testing/selftests/kvm/x86_64/vmx_invalid_nested_guest_state.c b/tools/testing/selftests/kvm/x86_64/vmx_invalid_nested_guest_state.c
new file mode 100644
index 000000000000..a100ee5f0009
--- /dev/null
+++ b/tools/testing/selftests/kvm/x86_64/vmx_invalid_nested_guest_state.c
@@ -0,0 +1,103 @@
+// SPDX-License-Identifier: GPL-2.0-only
+#include "test_util.h"
+#include "kvm_util.h"
+#include "processor.h"
+#include "vmx.h"
+
+#include <string.h>
+#include <sys/ioctl.h>
+
+#include "kselftest.h"
+
+#define ARBITRARY_IO_PORT 0x2000
+
+static struct kvm_vm *vm;
+
+static void l2_guest_code(void)
+{
+ /*
+ * Generate an exit to L0 userspace, i.e. main(), via I/O to an
+ * arbitrary port.
+ */
+ asm volatile("inb %%dx, %%al"
+ : : [port] "d" (ARBITRARY_IO_PORT) : "rax");
+}
+
+static void l1_guest_code(struct vmx_pages *vmx_pages)
+{
+#define L2_GUEST_STACK_SIZE 64
+ unsigned long l2_guest_stack[L2_GUEST_STACK_SIZE];
+
+ GUEST_ASSERT(prepare_for_vmx_operation(vmx_pages));
+ GUEST_ASSERT(load_vmcs(vmx_pages));
+
+ /* Prepare the VMCS for L2 execution. */
+ prepare_vmcs(vmx_pages, l2_guest_code,
+ &l2_guest_stack[L2_GUEST_STACK_SIZE]);
+
+ /*
+ * L2 must be run without unrestricted guest, verify that the selftests
+ * library hasn't enabled it. Because KVM selftests jump directly to
+ * 64-bit mode, unrestricted guest support isn't required.
+ */
+ GUEST_ASSERT(!(vmreadz(CPU_BASED_VM_EXEC_CONTROL) & CPU_BASED_ACTIVATE_SECONDARY_CONTROLS) ||
+ !(vmreadz(SECONDARY_VM_EXEC_CONTROL) & SECONDARY_EXEC_UNRESTRICTED_GUEST));
+
+ GUEST_ASSERT(!vmlaunch());
+
+ /* L2 should triple fault after main() stuffs invalid guest state. */
+ GUEST_ASSERT(vmreadz(VM_EXIT_REASON) == EXIT_REASON_TRIPLE_FAULT);
+ GUEST_DONE();
+}
+
+int main(int argc, char *argv[])
+{
+ vm_vaddr_t vmx_pages_gva;
+ struct kvm_sregs sregs;
+ struct kvm_vcpu *vcpu;
+ struct kvm_run *run;
+ struct ucall uc;
+
+ TEST_REQUIRE(kvm_cpu_has(X86_FEATURE_VMX));
+
+ vm = vm_create_with_one_vcpu(&vcpu, l1_guest_code);
+
+ /* Allocate VMX pages and shared descriptors (vmx_pages). */
+ vcpu_alloc_vmx(vm, &vmx_pages_gva);
+ vcpu_args_set(vcpu, 1, vmx_pages_gva);
+
+ vcpu_run(vcpu);
+
+ run = vcpu->run;
+
+ /*
+ * The first exit to L0 userspace should be an I/O access from L2.
+ * Running L1 should launch L2 without triggering an exit to userspace.
+ */
+ TEST_ASSERT_KVM_EXIT_REASON(vcpu, KVM_EXIT_IO);
+
+ TEST_ASSERT(run->io.port == ARBITRARY_IO_PORT,
+ "Expected IN from port %d from L2, got port %d",
+ ARBITRARY_IO_PORT, run->io.port);
+
+ /*
+ * Stuff invalid guest state for L2 by making TR unusuable. The next
+ * KVM_RUN should induce a TRIPLE_FAULT in L2 as KVM doesn't support
+ * emulating invalid guest state for L2.
+ */
+ memset(&sregs, 0, sizeof(sregs));
+ vcpu_sregs_get(vcpu, &sregs);
+ sregs.tr.unusable = 1;
+ vcpu_sregs_set(vcpu, &sregs);
+
+ vcpu_run(vcpu);
+
+ switch (get_ucall(vcpu, &uc)) {
+ case UCALL_DONE:
+ break;
+ case UCALL_ABORT:
+ REPORT_GUEST_ASSERT(uc);
+ default:
+ TEST_FAIL("Unexpected ucall: %lu", uc.cmd);
+ }
+}
diff --git a/tools/testing/selftests/kvm/x86_64/vmx_msrs_test.c b/tools/testing/selftests/kvm/x86_64/vmx_msrs_test.c
new file mode 100644
index 000000000000..90720b6205f4
--- /dev/null
+++ b/tools/testing/selftests/kvm/x86_64/vmx_msrs_test.c
@@ -0,0 +1,131 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * VMX control MSR test
+ *
+ * Copyright (C) 2022 Google LLC.
+ *
+ * Tests for KVM ownership of bits in the VMX entry/exit control MSRs. Checks
+ * that KVM will set owned bits where appropriate, and will not if
+ * KVM_X86_QUIRK_TWEAK_VMX_CTRL_MSRS is disabled.
+ */
+#include <linux/bitmap.h>
+#include "kvm_util.h"
+#include "vmx.h"
+
+static void vmx_fixed1_msr_test(struct kvm_vcpu *vcpu, uint32_t msr_index,
+ uint64_t mask)
+{
+ uint64_t val = vcpu_get_msr(vcpu, msr_index);
+ uint64_t bit;
+
+ mask &= val;
+
+ for_each_set_bit(bit, &mask, 64) {
+ vcpu_set_msr(vcpu, msr_index, val & ~BIT_ULL(bit));
+ vcpu_set_msr(vcpu, msr_index, val);
+ }
+}
+
+static void vmx_fixed0_msr_test(struct kvm_vcpu *vcpu, uint32_t msr_index,
+ uint64_t mask)
+{
+ uint64_t val = vcpu_get_msr(vcpu, msr_index);
+ uint64_t bit;
+
+ mask = ~mask | val;
+
+ for_each_clear_bit(bit, &mask, 64) {
+ vcpu_set_msr(vcpu, msr_index, val | BIT_ULL(bit));
+ vcpu_set_msr(vcpu, msr_index, val);
+ }
+}
+
+static void vmx_fixed0and1_msr_test(struct kvm_vcpu *vcpu, uint32_t msr_index)
+{
+ vmx_fixed0_msr_test(vcpu, msr_index, GENMASK_ULL(31, 0));
+ vmx_fixed1_msr_test(vcpu, msr_index, GENMASK_ULL(63, 32));
+}
+
+static void vmx_save_restore_msrs_test(struct kvm_vcpu *vcpu)
+{
+ vcpu_set_msr(vcpu, MSR_IA32_VMX_VMCS_ENUM, 0);
+ vcpu_set_msr(vcpu, MSR_IA32_VMX_VMCS_ENUM, -1ull);
+
+ vmx_fixed1_msr_test(vcpu, MSR_IA32_VMX_BASIC,
+ BIT_ULL(49) | BIT_ULL(54) | BIT_ULL(55));
+
+ vmx_fixed1_msr_test(vcpu, MSR_IA32_VMX_MISC,
+ BIT_ULL(5) | GENMASK_ULL(8, 6) | BIT_ULL(14) |
+ BIT_ULL(15) | BIT_ULL(28) | BIT_ULL(29) | BIT_ULL(30));
+
+ vmx_fixed0and1_msr_test(vcpu, MSR_IA32_VMX_PROCBASED_CTLS2);
+ vmx_fixed1_msr_test(vcpu, MSR_IA32_VMX_EPT_VPID_CAP, -1ull);
+ vmx_fixed0and1_msr_test(vcpu, MSR_IA32_VMX_TRUE_PINBASED_CTLS);
+ vmx_fixed0and1_msr_test(vcpu, MSR_IA32_VMX_TRUE_PROCBASED_CTLS);
+ vmx_fixed0and1_msr_test(vcpu, MSR_IA32_VMX_TRUE_EXIT_CTLS);
+ vmx_fixed0and1_msr_test(vcpu, MSR_IA32_VMX_TRUE_ENTRY_CTLS);
+ vmx_fixed1_msr_test(vcpu, MSR_IA32_VMX_VMFUNC, -1ull);
+}
+
+static void __ia32_feature_control_msr_test(struct kvm_vcpu *vcpu,
+ uint64_t msr_bit,
+ struct kvm_x86_cpu_feature feature)
+{
+ uint64_t val;
+
+ vcpu_clear_cpuid_feature(vcpu, feature);
+
+ val = vcpu_get_msr(vcpu, MSR_IA32_FEAT_CTL);
+ vcpu_set_msr(vcpu, MSR_IA32_FEAT_CTL, val | msr_bit | FEAT_CTL_LOCKED);
+ vcpu_set_msr(vcpu, MSR_IA32_FEAT_CTL, (val & ~msr_bit) | FEAT_CTL_LOCKED);
+ vcpu_set_msr(vcpu, MSR_IA32_FEAT_CTL, val | msr_bit | FEAT_CTL_LOCKED);
+ vcpu_set_msr(vcpu, MSR_IA32_FEAT_CTL, (val & ~msr_bit) | FEAT_CTL_LOCKED);
+ vcpu_set_msr(vcpu, MSR_IA32_FEAT_CTL, val);
+
+ if (!kvm_cpu_has(feature))
+ return;
+
+ vcpu_set_cpuid_feature(vcpu, feature);
+}
+
+static void ia32_feature_control_msr_test(struct kvm_vcpu *vcpu)
+{
+ uint64_t supported_bits = FEAT_CTL_LOCKED |
+ FEAT_CTL_VMX_ENABLED_INSIDE_SMX |
+ FEAT_CTL_VMX_ENABLED_OUTSIDE_SMX |
+ FEAT_CTL_SGX_LC_ENABLED |
+ FEAT_CTL_SGX_ENABLED |
+ FEAT_CTL_LMCE_ENABLED;
+ int bit, r;
+
+ __ia32_feature_control_msr_test(vcpu, FEAT_CTL_VMX_ENABLED_INSIDE_SMX, X86_FEATURE_SMX);
+ __ia32_feature_control_msr_test(vcpu, FEAT_CTL_VMX_ENABLED_INSIDE_SMX, X86_FEATURE_VMX);
+ __ia32_feature_control_msr_test(vcpu, FEAT_CTL_VMX_ENABLED_OUTSIDE_SMX, X86_FEATURE_VMX);
+ __ia32_feature_control_msr_test(vcpu, FEAT_CTL_SGX_LC_ENABLED, X86_FEATURE_SGX_LC);
+ __ia32_feature_control_msr_test(vcpu, FEAT_CTL_SGX_LC_ENABLED, X86_FEATURE_SGX);
+ __ia32_feature_control_msr_test(vcpu, FEAT_CTL_SGX_ENABLED, X86_FEATURE_SGX);
+ __ia32_feature_control_msr_test(vcpu, FEAT_CTL_LMCE_ENABLED, X86_FEATURE_MCE);
+
+ for_each_clear_bit(bit, &supported_bits, 64) {
+ r = _vcpu_set_msr(vcpu, MSR_IA32_FEAT_CTL, BIT(bit));
+ TEST_ASSERT(r == 0,
+ "Setting reserved bit %d in IA32_FEATURE_CONTROL should fail", bit);
+ }
+}
+
+int main(void)
+{
+ struct kvm_vcpu *vcpu;
+ struct kvm_vm *vm;
+
+ TEST_REQUIRE(kvm_has_cap(KVM_CAP_DISABLE_QUIRKS2));
+ TEST_REQUIRE(kvm_cpu_has(X86_FEATURE_VMX));
+
+ /* No need to actually do KVM_RUN, thus no guest code. */
+ vm = vm_create_with_one_vcpu(&vcpu, NULL);
+
+ vmx_save_restore_msrs_test(vcpu);
+ ia32_feature_control_msr_test(vcpu);
+
+ kvm_vm_free(vm);
+}
diff --git a/tools/testing/selftests/kvm/x86_64/vmx_nested_tsc_scaling_test.c b/tools/testing/selftests/kvm/x86_64/vmx_nested_tsc_scaling_test.c
new file mode 100644
index 000000000000..1759fa5cb3f2
--- /dev/null
+++ b/tools/testing/selftests/kvm/x86_64/vmx_nested_tsc_scaling_test.c
@@ -0,0 +1,206 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * vmx_nested_tsc_scaling_test
+ *
+ * Copyright 2021 Amazon.com, Inc. or its affiliates. All Rights Reserved.
+ *
+ * This test case verifies that nested TSC scaling behaves as expected when
+ * both L1 and L2 are scaled using different ratios. For this test we scale
+ * L1 down and scale L2 up.
+ */
+
+#include <time.h>
+
+#include "kvm_util.h"
+#include "vmx.h"
+#include "kselftest.h"
+
+/* L2 is scaled up (from L1's perspective) by this factor */
+#define L2_SCALE_FACTOR 4ULL
+
+#define TSC_OFFSET_L2 ((uint64_t) -33125236320908)
+#define TSC_MULTIPLIER_L2 (L2_SCALE_FACTOR << 48)
+
+#define L2_GUEST_STACK_SIZE 64
+
+enum { USLEEP, UCHECK_L1, UCHECK_L2 };
+#define GUEST_SLEEP(sec) ucall(UCALL_SYNC, 2, USLEEP, sec)
+#define GUEST_CHECK(level, freq) ucall(UCALL_SYNC, 2, level, freq)
+
+
+/*
+ * This function checks whether the "actual" TSC frequency of a guest matches
+ * its expected frequency. In order to account for delays in taking the TSC
+ * measurements, a difference of 1% between the actual and the expected value
+ * is tolerated.
+ */
+static void compare_tsc_freq(uint64_t actual, uint64_t expected)
+{
+ uint64_t tolerance, thresh_low, thresh_high;
+
+ tolerance = expected / 100;
+ thresh_low = expected - tolerance;
+ thresh_high = expected + tolerance;
+
+ TEST_ASSERT(thresh_low < actual,
+ "TSC freq is expected to be between %"PRIu64" and %"PRIu64
+ " but it actually is %"PRIu64,
+ thresh_low, thresh_high, actual);
+ TEST_ASSERT(thresh_high > actual,
+ "TSC freq is expected to be between %"PRIu64" and %"PRIu64
+ " but it actually is %"PRIu64,
+ thresh_low, thresh_high, actual);
+}
+
+static void check_tsc_freq(int level)
+{
+ uint64_t tsc_start, tsc_end, tsc_freq;
+
+ /*
+ * Reading the TSC twice with about a second's difference should give
+ * us an approximation of the TSC frequency from the guest's
+ * perspective. Now, this won't be completely accurate, but it should
+ * be good enough for the purposes of this test.
+ */
+ tsc_start = rdmsr(MSR_IA32_TSC);
+ GUEST_SLEEP(1);
+ tsc_end = rdmsr(MSR_IA32_TSC);
+
+ tsc_freq = tsc_end - tsc_start;
+
+ GUEST_CHECK(level, tsc_freq);
+}
+
+static void l2_guest_code(void)
+{
+ check_tsc_freq(UCHECK_L2);
+
+ /* exit to L1 */
+ __asm__ __volatile__("vmcall");
+}
+
+static void l1_guest_code(struct vmx_pages *vmx_pages)
+{
+ unsigned long l2_guest_stack[L2_GUEST_STACK_SIZE];
+ uint32_t control;
+
+ /* check that L1's frequency looks alright before launching L2 */
+ check_tsc_freq(UCHECK_L1);
+
+ GUEST_ASSERT(prepare_for_vmx_operation(vmx_pages));
+ GUEST_ASSERT(load_vmcs(vmx_pages));
+
+ /* prepare the VMCS for L2 execution */
+ prepare_vmcs(vmx_pages, l2_guest_code, &l2_guest_stack[L2_GUEST_STACK_SIZE]);
+
+ /* enable TSC offsetting and TSC scaling for L2 */
+ control = vmreadz(CPU_BASED_VM_EXEC_CONTROL);
+ control |= CPU_BASED_USE_MSR_BITMAPS | CPU_BASED_USE_TSC_OFFSETTING;
+ vmwrite(CPU_BASED_VM_EXEC_CONTROL, control);
+
+ control = vmreadz(SECONDARY_VM_EXEC_CONTROL);
+ control |= SECONDARY_EXEC_TSC_SCALING;
+ vmwrite(SECONDARY_VM_EXEC_CONTROL, control);
+
+ vmwrite(TSC_OFFSET, TSC_OFFSET_L2);
+ vmwrite(TSC_MULTIPLIER, TSC_MULTIPLIER_L2);
+ vmwrite(TSC_MULTIPLIER_HIGH, TSC_MULTIPLIER_L2 >> 32);
+
+ /* launch L2 */
+ GUEST_ASSERT(!vmlaunch());
+ GUEST_ASSERT(vmreadz(VM_EXIT_REASON) == EXIT_REASON_VMCALL);
+
+ /* check that L1's frequency still looks good */
+ check_tsc_freq(UCHECK_L1);
+
+ GUEST_DONE();
+}
+
+int main(int argc, char *argv[])
+{
+ struct kvm_vcpu *vcpu;
+ struct kvm_vm *vm;
+ vm_vaddr_t vmx_pages_gva;
+
+ uint64_t tsc_start, tsc_end;
+ uint64_t tsc_khz;
+ uint64_t l1_scale_factor;
+ uint64_t l0_tsc_freq = 0;
+ uint64_t l1_tsc_freq = 0;
+ uint64_t l2_tsc_freq = 0;
+
+ TEST_REQUIRE(kvm_cpu_has(X86_FEATURE_VMX));
+ TEST_REQUIRE(kvm_has_cap(KVM_CAP_TSC_CONTROL));
+ TEST_REQUIRE(sys_clocksource_is_based_on_tsc());
+
+ /*
+ * We set L1's scale factor to be a random number from 2 to 10.
+ * Ideally we would do the same for L2's factor but that one is
+ * referenced by both main() and l1_guest_code() and using a global
+ * variable does not work.
+ */
+ srand(time(NULL));
+ l1_scale_factor = (rand() % 9) + 2;
+ printf("L1's scale down factor is: %"PRIu64"\n", l1_scale_factor);
+ printf("L2's scale up factor is: %llu\n", L2_SCALE_FACTOR);
+
+ tsc_start = rdtsc();
+ sleep(1);
+ tsc_end = rdtsc();
+
+ l0_tsc_freq = tsc_end - tsc_start;
+ printf("real TSC frequency is around: %"PRIu64"\n", l0_tsc_freq);
+
+ vm = vm_create_with_one_vcpu(&vcpu, l1_guest_code);
+ vcpu_alloc_vmx(vm, &vmx_pages_gva);
+ vcpu_args_set(vcpu, 1, vmx_pages_gva);
+
+ tsc_khz = __vcpu_ioctl(vcpu, KVM_GET_TSC_KHZ, NULL);
+ TEST_ASSERT(tsc_khz != -1, "vcpu ioctl KVM_GET_TSC_KHZ failed");
+
+ /* scale down L1's TSC frequency */
+ vcpu_ioctl(vcpu, KVM_SET_TSC_KHZ, (void *) (tsc_khz / l1_scale_factor));
+
+ for (;;) {
+ struct ucall uc;
+
+ vcpu_run(vcpu);
+ TEST_ASSERT_KVM_EXIT_REASON(vcpu, KVM_EXIT_IO);
+
+ switch (get_ucall(vcpu, &uc)) {
+ case UCALL_ABORT:
+ REPORT_GUEST_ASSERT(uc);
+ case UCALL_SYNC:
+ switch (uc.args[0]) {
+ case USLEEP:
+ sleep(uc.args[1]);
+ break;
+ case UCHECK_L1:
+ l1_tsc_freq = uc.args[1];
+ printf("L1's TSC frequency is around: %"PRIu64
+ "\n", l1_tsc_freq);
+
+ compare_tsc_freq(l1_tsc_freq,
+ l0_tsc_freq / l1_scale_factor);
+ break;
+ case UCHECK_L2:
+ l2_tsc_freq = uc.args[1];
+ printf("L2's TSC frequency is around: %"PRIu64
+ "\n", l2_tsc_freq);
+
+ compare_tsc_freq(l2_tsc_freq,
+ l1_tsc_freq * L2_SCALE_FACTOR);
+ break;
+ }
+ break;
+ case UCALL_DONE:
+ goto done;
+ default:
+ TEST_FAIL("Unknown ucall %lu", uc.cmd);
+ }
+ }
+
+done:
+ kvm_vm_free(vm);
+ return 0;
+}
diff --git a/tools/testing/selftests/kvm/x86_64/vmx_pmu_caps_test.c b/tools/testing/selftests/kvm/x86_64/vmx_pmu_caps_test.c
new file mode 100644
index 000000000000..ea0cb3cae0f7
--- /dev/null
+++ b/tools/testing/selftests/kvm/x86_64/vmx_pmu_caps_test.c
@@ -0,0 +1,228 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Test for VMX-pmu perf capability msr
+ *
+ * Copyright (C) 2021 Intel Corporation
+ *
+ * Test to check the effect of various CPUID settings on
+ * MSR_IA32_PERF_CAPABILITIES MSR, and check that what
+ * we write with KVM_SET_MSR is _not_ modified by the guest
+ * and check it can be retrieved with KVM_GET_MSR, also test
+ * the invalid LBR formats are rejected.
+ */
+#define _GNU_SOURCE /* for program_invocation_short_name */
+#include <sys/ioctl.h>
+
+#include <linux/bitmap.h>
+
+#include "kvm_test_harness.h"
+#include "kvm_util.h"
+#include "vmx.h"
+
+static union perf_capabilities {
+ struct {
+ u64 lbr_format:6;
+ u64 pebs_trap:1;
+ u64 pebs_arch_reg:1;
+ u64 pebs_format:4;
+ u64 smm_freeze:1;
+ u64 full_width_write:1;
+ u64 pebs_baseline:1;
+ u64 perf_metrics:1;
+ u64 pebs_output_pt_available:1;
+ u64 anythread_deprecated:1;
+ };
+ u64 capabilities;
+} host_cap;
+
+/*
+ * The LBR format and most PEBS features are immutable, all other features are
+ * fungible (if supported by the host and KVM).
+ */
+static const union perf_capabilities immutable_caps = {
+ .lbr_format = -1,
+ .pebs_trap = 1,
+ .pebs_arch_reg = 1,
+ .pebs_format = -1,
+ .pebs_baseline = 1,
+};
+
+static const union perf_capabilities format_caps = {
+ .lbr_format = -1,
+ .pebs_format = -1,
+};
+
+static void guest_test_perf_capabilities_gp(uint64_t val)
+{
+ uint8_t vector = wrmsr_safe(MSR_IA32_PERF_CAPABILITIES, val);
+
+ __GUEST_ASSERT(vector == GP_VECTOR,
+ "Expected #GP for value '0x%lx', got vector '0x%x'",
+ val, vector);
+}
+
+static void guest_code(uint64_t current_val)
+{
+ int i;
+
+ guest_test_perf_capabilities_gp(current_val);
+ guest_test_perf_capabilities_gp(0);
+
+ for (i = 0; i < 64; i++)
+ guest_test_perf_capabilities_gp(current_val ^ BIT_ULL(i));
+
+ GUEST_DONE();
+}
+
+KVM_ONE_VCPU_TEST_SUITE(vmx_pmu_caps);
+
+/*
+ * Verify that guest WRMSRs to PERF_CAPABILITIES #GP regardless of the value
+ * written, that the guest always sees the userspace controlled value, and that
+ * PERF_CAPABILITIES is immutable after KVM_RUN.
+ */
+KVM_ONE_VCPU_TEST(vmx_pmu_caps, guest_wrmsr_perf_capabilities, guest_code)
+{
+ struct ucall uc;
+ int r, i;
+
+ vm_init_descriptor_tables(vcpu->vm);
+ vcpu_init_descriptor_tables(vcpu);
+
+ vcpu_set_msr(vcpu, MSR_IA32_PERF_CAPABILITIES, host_cap.capabilities);
+
+ vcpu_args_set(vcpu, 1, host_cap.capabilities);
+ vcpu_run(vcpu);
+
+ switch (get_ucall(vcpu, &uc)) {
+ case UCALL_ABORT:
+ REPORT_GUEST_ASSERT(uc);
+ break;
+ case UCALL_DONE:
+ break;
+ default:
+ TEST_FAIL("Unexpected ucall: %lu", uc.cmd);
+ }
+
+ TEST_ASSERT_EQ(vcpu_get_msr(vcpu, MSR_IA32_PERF_CAPABILITIES),
+ host_cap.capabilities);
+
+ vcpu_set_msr(vcpu, MSR_IA32_PERF_CAPABILITIES, host_cap.capabilities);
+
+ r = _vcpu_set_msr(vcpu, MSR_IA32_PERF_CAPABILITIES, 0);
+ TEST_ASSERT(!r, "Post-KVM_RUN write '0' didn't fail");
+
+ for (i = 0; i < 64; i++) {
+ r = _vcpu_set_msr(vcpu, MSR_IA32_PERF_CAPABILITIES,
+ host_cap.capabilities ^ BIT_ULL(i));
+ TEST_ASSERT(!r, "Post-KVM_RUN write '0x%llx'didn't fail",
+ host_cap.capabilities ^ BIT_ULL(i));
+ }
+}
+
+/*
+ * Verify KVM allows writing PERF_CAPABILITIES with all KVM-supported features
+ * enabled, as well as '0' (to disable all features).
+ */
+KVM_ONE_VCPU_TEST(vmx_pmu_caps, basic_perf_capabilities, guest_code)
+{
+ vcpu_set_msr(vcpu, MSR_IA32_PERF_CAPABILITIES, 0);
+ vcpu_set_msr(vcpu, MSR_IA32_PERF_CAPABILITIES, host_cap.capabilities);
+}
+
+KVM_ONE_VCPU_TEST(vmx_pmu_caps, fungible_perf_capabilities, guest_code)
+{
+ const uint64_t fungible_caps = host_cap.capabilities & ~immutable_caps.capabilities;
+ int bit;
+
+ for_each_set_bit(bit, &fungible_caps, 64) {
+ vcpu_set_msr(vcpu, MSR_IA32_PERF_CAPABILITIES, BIT_ULL(bit));
+ vcpu_set_msr(vcpu, MSR_IA32_PERF_CAPABILITIES,
+ host_cap.capabilities & ~BIT_ULL(bit));
+ }
+ vcpu_set_msr(vcpu, MSR_IA32_PERF_CAPABILITIES, host_cap.capabilities);
+}
+
+/*
+ * Verify KVM rejects attempts to set unsupported and/or immutable features in
+ * PERF_CAPABILITIES. Note, LBR format and PEBS format need to be validated
+ * separately as they are multi-bit values, e.g. toggling or setting a single
+ * bit can generate a false positive without dedicated safeguards.
+ */
+KVM_ONE_VCPU_TEST(vmx_pmu_caps, immutable_perf_capabilities, guest_code)
+{
+ const uint64_t reserved_caps = (~host_cap.capabilities |
+ immutable_caps.capabilities) &
+ ~format_caps.capabilities;
+ union perf_capabilities val = host_cap;
+ int r, bit;
+
+ for_each_set_bit(bit, &reserved_caps, 64) {
+ r = _vcpu_set_msr(vcpu, MSR_IA32_PERF_CAPABILITIES,
+ host_cap.capabilities ^ BIT_ULL(bit));
+ TEST_ASSERT(!r, "%s immutable feature 0x%llx (bit %d) didn't fail",
+ host_cap.capabilities & BIT_ULL(bit) ? "Setting" : "Clearing",
+ BIT_ULL(bit), bit);
+ }
+
+ /*
+ * KVM only supports the host's native LBR format, as well as '0' (to
+ * disable LBR support). Verify KVM rejects all other LBR formats.
+ */
+ for (val.lbr_format = 1; val.lbr_format; val.lbr_format++) {
+ if (val.lbr_format == host_cap.lbr_format)
+ continue;
+
+ r = _vcpu_set_msr(vcpu, MSR_IA32_PERF_CAPABILITIES, val.capabilities);
+ TEST_ASSERT(!r, "Bad LBR FMT = 0x%x didn't fail, host = 0x%x",
+ val.lbr_format, host_cap.lbr_format);
+ }
+
+ /* Ditto for the PEBS format. */
+ for (val.pebs_format = 1; val.pebs_format; val.pebs_format++) {
+ if (val.pebs_format == host_cap.pebs_format)
+ continue;
+
+ r = _vcpu_set_msr(vcpu, MSR_IA32_PERF_CAPABILITIES, val.capabilities);
+ TEST_ASSERT(!r, "Bad PEBS FMT = 0x%x didn't fail, host = 0x%x",
+ val.pebs_format, host_cap.pebs_format);
+ }
+}
+
+/*
+ * Test that LBR MSRs are writable when LBRs are enabled, and then verify that
+ * disabling the vPMU via CPUID also disables LBR support. Set bits 2:0 of
+ * LBR_TOS as those bits are writable across all uarch implementations (arch
+ * LBRs will need to poke a different MSR).
+ */
+KVM_ONE_VCPU_TEST(vmx_pmu_caps, lbr_perf_capabilities, guest_code)
+{
+ int r;
+
+ if (!host_cap.lbr_format)
+ return;
+
+ vcpu_set_msr(vcpu, MSR_IA32_PERF_CAPABILITIES, host_cap.capabilities);
+ vcpu_set_msr(vcpu, MSR_LBR_TOS, 7);
+
+ vcpu_clear_cpuid_entry(vcpu, X86_PROPERTY_PMU_VERSION.function);
+
+ r = _vcpu_set_msr(vcpu, MSR_LBR_TOS, 7);
+ TEST_ASSERT(!r, "Writing LBR_TOS should fail after disabling vPMU");
+}
+
+int main(int argc, char *argv[])
+{
+ TEST_REQUIRE(kvm_is_pmu_enabled());
+ TEST_REQUIRE(kvm_cpu_has(X86_FEATURE_PDCM));
+
+ TEST_REQUIRE(kvm_cpu_has_p(X86_PROPERTY_PMU_VERSION));
+ TEST_REQUIRE(kvm_cpu_property(X86_PROPERTY_PMU_VERSION) > 0);
+
+ host_cap.capabilities = kvm_get_feature_msr(MSR_IA32_PERF_CAPABILITIES);
+
+ TEST_ASSERT(host_cap.full_width_write,
+ "Full-width writes should always be supported");
+
+ return test_harness_run(argc, argv);
+}
diff --git a/tools/testing/selftests/kvm/x86_64/vmx_preemption_timer_test.c b/tools/testing/selftests/kvm/x86_64/vmx_preemption_timer_test.c
index a7737af1224f..affc32800158 100644
--- a/tools/testing/selftests/kvm/x86_64/vmx_preemption_timer_test.c
+++ b/tools/testing/selftests/kvm/x86_64/vmx_preemption_timer_test.c
@@ -22,7 +22,6 @@
#include "processor.h"
#include "vmx.h"
-#define VCPU_ID 5
#define PREEMPTION_TIMER_VALUE 100000000ull
#define PREEMPTION_TIMER_VALUE_THRESHOLD1 80000000ull
@@ -158,7 +157,7 @@ int main(int argc, char *argv[])
struct kvm_regs regs1, regs2;
struct kvm_vm *vm;
- struct kvm_run *run;
+ struct kvm_vcpu *vcpu;
struct kvm_x86_state *state;
struct ucall uc;
int stage;
@@ -167,34 +166,25 @@ int main(int argc, char *argv[])
* AMD currently does not implement any VMX features, so for now we
* just early out.
*/
- nested_vmx_check_supported();
+ TEST_REQUIRE(kvm_cpu_has(X86_FEATURE_VMX));
+
+ TEST_REQUIRE(kvm_has_cap(KVM_CAP_NESTED_STATE));
/* Create VM */
- vm = vm_create_default(VCPU_ID, 0, guest_code);
- vcpu_set_cpuid(vm, VCPU_ID, kvm_get_supported_cpuid());
- run = vcpu_state(vm, VCPU_ID);
-
- vcpu_regs_get(vm, VCPU_ID, &regs1);
-
- if (kvm_check_cap(KVM_CAP_NESTED_STATE)) {
- vcpu_alloc_vmx(vm, &vmx_pages_gva);
- vcpu_args_set(vm, VCPU_ID, 1, vmx_pages_gva);
- } else {
- pr_info("will skip vmx preemption timer checks\n");
- goto done;
- }
+ vm = vm_create_with_one_vcpu(&vcpu, guest_code);
+
+ vcpu_regs_get(vcpu, &regs1);
+
+ vcpu_alloc_vmx(vm, &vmx_pages_gva);
+ vcpu_args_set(vcpu, 1, vmx_pages_gva);
for (stage = 1;; stage++) {
- _vcpu_run(vm, VCPU_ID);
- TEST_ASSERT(run->exit_reason == KVM_EXIT_IO,
- "Stage %d: unexpected exit reason: %u (%s),\n",
- stage, run->exit_reason,
- exit_reason_str(run->exit_reason));
+ vcpu_run(vcpu);
+ TEST_ASSERT_KVM_EXIT_REASON(vcpu, KVM_EXIT_IO);
- switch (get_ucall(vm, VCPU_ID, &uc)) {
+ switch (get_ucall(vcpu, &uc)) {
case UCALL_ABORT:
- TEST_FAIL("%s at %s:%ld", (const char *)uc.args[0],
- __FILE__, uc.args[1]);
+ REPORT_GUEST_ASSERT(uc);
/* NOT REACHED */
case UCALL_SYNC:
break;
@@ -233,22 +223,19 @@ int main(int argc, char *argv[])
stage, uc.args[4], uc.args[5]);
}
- state = vcpu_save_state(vm, VCPU_ID);
+ state = vcpu_save_state(vcpu);
memset(&regs1, 0, sizeof(regs1));
- vcpu_regs_get(vm, VCPU_ID, &regs1);
+ vcpu_regs_get(vcpu, &regs1);
kvm_vm_release(vm);
/* Restore state in a new VM. */
- kvm_vm_restart(vm, O_RDWR);
- vm_vcpu_add(vm, VCPU_ID);
- vcpu_set_cpuid(vm, VCPU_ID, kvm_get_supported_cpuid());
- vcpu_load_state(vm, VCPU_ID, state);
- run = vcpu_state(vm, VCPU_ID);
- free(state);
+ vcpu = vm_recreate_with_one_vcpu(vm);
+ vcpu_load_state(vcpu, state);
+ kvm_x86_state_cleanup(state);
memset(&regs2, 0, sizeof(regs2));
- vcpu_regs_get(vm, VCPU_ID, &regs2);
+ vcpu_regs_get(vcpu, &regs2);
TEST_ASSERT(!memcmp(&regs1, &regs2, sizeof(regs2)),
"Unexpected register values after vcpu_load_state; rdi: %lx rsi: %lx",
(ulong) regs2.rdi, (ulong) regs2.rsi);
diff --git a/tools/testing/selftests/kvm/x86_64/vmx_set_nested_state_test.c b/tools/testing/selftests/kvm/x86_64/vmx_set_nested_state_test.c
index 54cdefdfb49d..67a62a5a8895 100644
--- a/tools/testing/selftests/kvm/x86_64/vmx_set_nested_state_test.c
+++ b/tools/testing/selftests/kvm/x86_64/vmx_set_nested_state_test.c
@@ -23,38 +23,37 @@
* changes this should be updated.
*/
#define VMCS12_REVISION 0x11e57ed0
-#define VCPU_ID 5
bool have_evmcs;
-void test_nested_state(struct kvm_vm *vm, struct kvm_nested_state *state)
+void test_nested_state(struct kvm_vcpu *vcpu, struct kvm_nested_state *state)
{
- vcpu_nested_state_set(vm, VCPU_ID, state, false);
+ vcpu_nested_state_set(vcpu, state);
}
-void test_nested_state_expect_errno(struct kvm_vm *vm,
+void test_nested_state_expect_errno(struct kvm_vcpu *vcpu,
struct kvm_nested_state *state,
int expected_errno)
{
int rv;
- rv = vcpu_nested_state_set(vm, VCPU_ID, state, true);
+ rv = __vcpu_nested_state_set(vcpu, state);
TEST_ASSERT(rv == -1 && errno == expected_errno,
"Expected %s (%d) from vcpu_nested_state_set but got rv: %i errno: %s (%d)",
strerror(expected_errno), expected_errno, rv, strerror(errno),
errno);
}
-void test_nested_state_expect_einval(struct kvm_vm *vm,
+void test_nested_state_expect_einval(struct kvm_vcpu *vcpu,
struct kvm_nested_state *state)
{
- test_nested_state_expect_errno(vm, state, EINVAL);
+ test_nested_state_expect_errno(vcpu, state, EINVAL);
}
-void test_nested_state_expect_efault(struct kvm_vm *vm,
+void test_nested_state_expect_efault(struct kvm_vcpu *vcpu,
struct kvm_nested_state *state)
{
- test_nested_state_expect_errno(vm, state, EFAULT);
+ test_nested_state_expect_errno(vcpu, state, EFAULT);
}
void set_revision_id_for_vmcs12(struct kvm_nested_state *state,
@@ -76,10 +75,8 @@ void set_default_state(struct kvm_nested_state *state)
void set_default_vmx_state(struct kvm_nested_state *state, int size)
{
memset(state, 0, size);
- state->flags = KVM_STATE_NESTED_GUEST_MODE |
- KVM_STATE_NESTED_RUN_PENDING;
if (have_evmcs)
- state->flags |= KVM_STATE_NESTED_EVMCS;
+ state->flags = KVM_STATE_NESTED_EVMCS;
state->format = 0;
state->size = size;
state->hdr.vmx.vmxon_pa = 0x1000;
@@ -88,7 +85,7 @@ void set_default_vmx_state(struct kvm_nested_state *state, int size)
set_revision_id_for_vmcs12(state, VMCS12_REVISION);
}
-void test_vmx_nested_state(struct kvm_vm *vm)
+void test_vmx_nested_state(struct kvm_vcpu *vcpu)
{
/* Add a page for VMCS12. */
const int state_sz = sizeof(struct kvm_nested_state) + getpagesize();
@@ -98,14 +95,14 @@ void test_vmx_nested_state(struct kvm_vm *vm)
/* The format must be set to 0. 0 for VMX, 1 for SVM. */
set_default_vmx_state(state, state_sz);
state->format = 1;
- test_nested_state_expect_einval(vm, state);
+ test_nested_state_expect_einval(vcpu, state);
/*
* We cannot virtualize anything if the guest does not have VMX
* enabled.
*/
set_default_vmx_state(state, state_sz);
- test_nested_state_expect_einval(vm, state);
+ test_nested_state_expect_einval(vcpu, state);
/*
* We cannot virtualize anything if the guest does not have VMX
@@ -114,50 +111,59 @@ void test_vmx_nested_state(struct kvm_vm *vm)
*/
set_default_vmx_state(state, state_sz);
state->hdr.vmx.vmxon_pa = -1ull;
- test_nested_state_expect_einval(vm, state);
+ test_nested_state_expect_einval(vcpu, state);
state->hdr.vmx.vmcs12_pa = -1ull;
state->flags = KVM_STATE_NESTED_EVMCS;
- test_nested_state_expect_einval(vm, state);
+ test_nested_state_expect_einval(vcpu, state);
state->flags = 0;
- test_nested_state(vm, state);
+ test_nested_state(vcpu, state);
/* Enable VMX in the guest CPUID. */
- vcpu_set_cpuid(vm, VCPU_ID, kvm_get_supported_cpuid());
+ vcpu_set_cpuid_feature(vcpu, X86_FEATURE_VMX);
/*
* Setting vmxon_pa == -1ull and vmcs_pa == -1ull exits early without
- * setting the nested state but flags other than eVMCS must be clear.
- * The eVMCS flag can be set if the enlightened VMCS capability has
- * been enabled.
+ * setting the nested state. When the eVMCS flag is not set, the
+ * expected return value is '0'.
*/
set_default_vmx_state(state, state_sz);
+ state->flags = 0;
state->hdr.vmx.vmxon_pa = -1ull;
state->hdr.vmx.vmcs12_pa = -1ull;
- test_nested_state_expect_einval(vm, state);
+ test_nested_state(vcpu, state);
- state->flags &= KVM_STATE_NESTED_EVMCS;
+ /*
+ * When eVMCS is supported, the eVMCS flag can only be set if the
+ * enlightened VMCS capability has been enabled.
+ */
if (have_evmcs) {
- test_nested_state_expect_einval(vm, state);
- vcpu_enable_evmcs(vm, VCPU_ID);
+ state->flags = KVM_STATE_NESTED_EVMCS;
+ test_nested_state_expect_einval(vcpu, state);
+ vcpu_enable_evmcs(vcpu);
+ test_nested_state(vcpu, state);
}
- test_nested_state(vm, state);
/* It is invalid to have vmxon_pa == -1ull and SMM flags non-zero. */
state->hdr.vmx.smm.flags = 1;
- test_nested_state_expect_einval(vm, state);
+ test_nested_state_expect_einval(vcpu, state);
+
+ /* Invalid flags are rejected. */
+ set_default_vmx_state(state, state_sz);
+ state->hdr.vmx.flags = ~0;
+ test_nested_state_expect_einval(vcpu, state);
/* It is invalid to have vmxon_pa == -1ull and vmcs_pa != -1ull. */
set_default_vmx_state(state, state_sz);
state->hdr.vmx.vmxon_pa = -1ull;
state->flags = 0;
- test_nested_state_expect_einval(vm, state);
+ test_nested_state_expect_einval(vcpu, state);
/* It is invalid to have vmxon_pa set to a non-page aligned address. */
set_default_vmx_state(state, state_sz);
state->hdr.vmx.vmxon_pa = 1;
- test_nested_state_expect_einval(vm, state);
+ test_nested_state_expect_einval(vcpu, state);
/*
* It is invalid to have KVM_STATE_NESTED_SMM_GUEST_MODE and
@@ -167,7 +173,7 @@ void test_vmx_nested_state(struct kvm_vm *vm)
state->flags = KVM_STATE_NESTED_GUEST_MODE |
KVM_STATE_NESTED_RUN_PENDING;
state->hdr.vmx.smm.flags = KVM_STATE_NESTED_SMM_GUEST_MODE;
- test_nested_state_expect_einval(vm, state);
+ test_nested_state_expect_einval(vcpu, state);
/*
* It is invalid to have any of the SMM flags set besides:
@@ -177,29 +183,50 @@ void test_vmx_nested_state(struct kvm_vm *vm)
set_default_vmx_state(state, state_sz);
state->hdr.vmx.smm.flags = ~(KVM_STATE_NESTED_SMM_GUEST_MODE |
KVM_STATE_NESTED_SMM_VMXON);
- test_nested_state_expect_einval(vm, state);
+ test_nested_state_expect_einval(vcpu, state);
/* Outside SMM, SMM flags must be zero. */
set_default_vmx_state(state, state_sz);
state->flags = 0;
state->hdr.vmx.smm.flags = KVM_STATE_NESTED_SMM_GUEST_MODE;
- test_nested_state_expect_einval(vm, state);
+ test_nested_state_expect_einval(vcpu, state);
+
+ /*
+ * Size must be large enough to fit kvm_nested_state and vmcs12
+ * if VMCS12 physical address is set
+ */
+ set_default_vmx_state(state, state_sz);
+ state->size = sizeof(*state);
+ state->flags = 0;
+ test_nested_state_expect_einval(vcpu, state);
+
+ set_default_vmx_state(state, state_sz);
+ state->size = sizeof(*state);
+ state->flags = 0;
+ state->hdr.vmx.vmcs12_pa = -1;
+ test_nested_state(vcpu, state);
+
+ /*
+ * KVM_SET_NESTED_STATE succeeds with invalid VMCS
+ * contents but L2 not running.
+ */
+ set_default_vmx_state(state, state_sz);
+ state->flags = 0;
+ test_nested_state(vcpu, state);
- /* Size must be large enough to fit kvm_nested_state and vmcs12. */
+ /* Invalid flags are rejected, even if no VMCS loaded. */
set_default_vmx_state(state, state_sz);
state->size = sizeof(*state);
- test_nested_state(vm, state);
+ state->flags = 0;
+ state->hdr.vmx.vmcs12_pa = -1;
+ state->hdr.vmx.flags = ~0;
+ test_nested_state_expect_einval(vcpu, state);
/* vmxon_pa cannot be the same address as vmcs_pa. */
set_default_vmx_state(state, state_sz);
state->hdr.vmx.vmxon_pa = 0;
state->hdr.vmx.vmcs12_pa = 0;
- test_nested_state_expect_einval(vm, state);
-
- /* The revision id for vmcs12 must be VMCS12_REVISION. */
- set_default_vmx_state(state, state_sz);
- set_revision_id_for_vmcs12(state, 0);
- test_nested_state_expect_einval(vm, state);
+ test_nested_state_expect_einval(vcpu, state);
/*
* Test that if we leave nesting the state reflects that when we get
@@ -209,8 +236,8 @@ void test_vmx_nested_state(struct kvm_vm *vm)
state->hdr.vmx.vmxon_pa = -1ull;
state->hdr.vmx.vmcs12_pa = -1ull;
state->flags = 0;
- test_nested_state(vm, state);
- vcpu_nested_state_get(vm, VCPU_ID, state);
+ test_nested_state(vcpu, state);
+ vcpu_nested_state_get(vcpu, state);
TEST_ASSERT(state->size >= sizeof(*state) && state->size <= state_sz,
"Size must be between %ld and %d. The size returned was %d.",
sizeof(*state), state_sz, state->size);
@@ -224,29 +251,32 @@ int main(int argc, char *argv[])
{
struct kvm_vm *vm;
struct kvm_nested_state state;
+ struct kvm_vcpu *vcpu;
have_evmcs = kvm_check_cap(KVM_CAP_HYPERV_ENLIGHTENED_VMCS);
- if (!kvm_check_cap(KVM_CAP_NESTED_STATE)) {
- print_skip("KVM_CAP_NESTED_STATE not available");
- exit(KSFT_SKIP);
- }
+ TEST_REQUIRE(kvm_has_cap(KVM_CAP_NESTED_STATE));
/*
* AMD currently does not implement set_nested_state, so for now we
* just early out.
*/
- nested_vmx_check_supported();
+ TEST_REQUIRE(kvm_cpu_has(X86_FEATURE_VMX));
- vm = vm_create_default(VCPU_ID, 0, 0);
+ vm = vm_create_with_one_vcpu(&vcpu, NULL);
+
+ /*
+ * First run tests with VMX disabled to check error handling.
+ */
+ vcpu_clear_cpuid_feature(vcpu, X86_FEATURE_VMX);
/* Passing a NULL kvm_nested_state causes a EFAULT. */
- test_nested_state_expect_efault(vm, NULL);
+ test_nested_state_expect_efault(vcpu, NULL);
/* 'size' cannot be smaller than sizeof(kvm_nested_state). */
set_default_state(&state);
state.size = 0;
- test_nested_state_expect_einval(vm, &state);
+ test_nested_state_expect_einval(vcpu, &state);
/*
* Setting the flags 0xf fails the flags check. The only flags that
@@ -257,7 +287,7 @@ int main(int argc, char *argv[])
*/
set_default_state(&state);
state.flags = 0xf;
- test_nested_state_expect_einval(vm, &state);
+ test_nested_state_expect_einval(vcpu, &state);
/*
* If KVM_STATE_NESTED_RUN_PENDING is set then
@@ -265,9 +295,9 @@ int main(int argc, char *argv[])
*/
set_default_state(&state);
state.flags = KVM_STATE_NESTED_RUN_PENDING;
- test_nested_state_expect_einval(vm, &state);
+ test_nested_state_expect_einval(vcpu, &state);
- test_vmx_nested_state(vm);
+ test_vmx_nested_state(vcpu);
kvm_vm_free(vm);
return 0;
diff --git a/tools/testing/selftests/kvm/x86_64/vmx_tsc_adjust_test.c b/tools/testing/selftests/kvm/x86_64/vmx_tsc_adjust_test.c
index fbe8417cbc2c..2ceb5c78c442 100644
--- a/tools/testing/selftests/kvm/x86_64/vmx_tsc_adjust_test.c
+++ b/tools/testing/selftests/kvm/x86_64/vmx_tsc_adjust_test.c
@@ -32,9 +32,6 @@
#define MSR_IA32_TSC_ADJUST 0x3b
#endif
-#define PAGE_SIZE 4096
-#define VCPU_ID 5
-
#define TSC_ADJUST_VALUE (1ll << 32)
#define TSC_OFFSET_VALUE -(1ll << 48)
@@ -52,11 +49,6 @@ enum {
NUM_VMX_PAGES,
};
-struct kvm_single_msr {
- struct kvm_msrs header;
- struct kvm_msr_entry entry;
-} __attribute__((packed));
-
/* The virtual machine object. */
static struct kvm_vm *vm;
@@ -128,29 +120,25 @@ static void report(int64_t val)
int main(int argc, char *argv[])
{
vm_vaddr_t vmx_pages_gva;
+ struct kvm_vcpu *vcpu;
- nested_vmx_check_supported();
+ TEST_REQUIRE(kvm_cpu_has(X86_FEATURE_VMX));
- vm = vm_create_default(VCPU_ID, 0, (void *) l1_guest_code);
- vcpu_set_cpuid(vm, VCPU_ID, kvm_get_supported_cpuid());
+ vm = vm_create_with_one_vcpu(&vcpu, (void *) l1_guest_code);
/* Allocate VMX pages and shared descriptors (vmx_pages). */
vcpu_alloc_vmx(vm, &vmx_pages_gva);
- vcpu_args_set(vm, VCPU_ID, 1, vmx_pages_gva);
+ vcpu_args_set(vcpu, 1, vmx_pages_gva);
for (;;) {
- volatile struct kvm_run *run = vcpu_state(vm, VCPU_ID);
struct ucall uc;
- vcpu_run(vm, VCPU_ID);
- TEST_ASSERT(run->exit_reason == KVM_EXIT_IO,
- "Got exit_reason other than KVM_EXIT_IO: %u (%s)\n",
- run->exit_reason,
- exit_reason_str(run->exit_reason));
+ vcpu_run(vcpu);
+ TEST_ASSERT_KVM_EXIT_REASON(vcpu, KVM_EXIT_IO);
- switch (get_ucall(vm, VCPU_ID, &uc)) {
+ switch (get_ucall(vcpu, &uc)) {
case UCALL_ABORT:
- TEST_FAIL("%s", (const char *)uc.args[0]);
+ REPORT_GUEST_ASSERT(uc);
/* NOT REACHED */
case UCALL_SYNC:
report(uc.args[1]);
@@ -162,7 +150,7 @@ int main(int argc, char *argv[])
}
}
- kvm_vm_free(vm);
done:
+ kvm_vm_free(vm);
return 0;
}
diff --git a/tools/testing/selftests/kvm/x86_64/xapic_ipi_test.c b/tools/testing/selftests/kvm/x86_64/xapic_ipi_test.c
new file mode 100644
index 000000000000..725c206ba0b9
--- /dev/null
+++ b/tools/testing/selftests/kvm/x86_64/xapic_ipi_test.c
@@ -0,0 +1,491 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * xapic_ipi_test
+ *
+ * Copyright (C) 2020, Google LLC.
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2.
+ *
+ * Test that when the APIC is in xAPIC mode, a vCPU can send an IPI to wake
+ * another vCPU that is halted when KVM's backing page for the APIC access
+ * address has been moved by mm.
+ *
+ * The test starts two vCPUs: one that sends IPIs and one that continually
+ * executes HLT. The sender checks that the halter has woken from the HLT and
+ * has reentered HLT before sending the next IPI. While the vCPUs are running,
+ * the host continually calls migrate_pages to move all of the process' pages
+ * amongst the available numa nodes on the machine.
+ *
+ * Migration is a command line option. When used on non-numa machines will
+ * exit with error. Test is still usefull on non-numa for testing IPIs.
+ */
+
+#define _GNU_SOURCE /* for program_invocation_short_name */
+#include <getopt.h>
+#include <pthread.h>
+#include <inttypes.h>
+#include <string.h>
+#include <time.h>
+
+#include "kvm_util.h"
+#include "numaif.h"
+#include "processor.h"
+#include "test_util.h"
+#include "vmx.h"
+
+/* Default running time for the test */
+#define DEFAULT_RUN_SECS 3
+
+/* Default delay between migrate_pages calls (microseconds) */
+#define DEFAULT_DELAY_USECS 500000
+
+/*
+ * Vector for IPI from sender vCPU to halting vCPU.
+ * Value is arbitrary and was chosen for the alternating bit pattern. Any
+ * value should work.
+ */
+#define IPI_VECTOR 0xa5
+
+/*
+ * Incremented in the IPI handler. Provides evidence to the sender that the IPI
+ * arrived at the destination
+ */
+static volatile uint64_t ipis_rcvd;
+
+/* Data struct shared between host main thread and vCPUs */
+struct test_data_page {
+ uint32_t halter_apic_id;
+ volatile uint64_t hlt_count;
+ volatile uint64_t wake_count;
+ uint64_t ipis_sent;
+ uint64_t migrations_attempted;
+ uint64_t migrations_completed;
+ uint32_t icr;
+ uint32_t icr2;
+ uint32_t halter_tpr;
+ uint32_t halter_ppr;
+
+ /*
+ * Record local version register as a cross-check that APIC access
+ * worked. Value should match what KVM reports (APIC_VERSION in
+ * arch/x86/kvm/lapic.c). If test is failing, check that values match
+ * to determine whether APIC access exits are working.
+ */
+ uint32_t halter_lvr;
+};
+
+struct thread_params {
+ struct test_data_page *data;
+ struct kvm_vcpu *vcpu;
+ uint64_t *pipis_rcvd; /* host address of ipis_rcvd global */
+};
+
+void verify_apic_base_addr(void)
+{
+ uint64_t msr = rdmsr(MSR_IA32_APICBASE);
+ uint64_t base = GET_APIC_BASE(msr);
+
+ GUEST_ASSERT(base == APIC_DEFAULT_GPA);
+}
+
+static void halter_guest_code(struct test_data_page *data)
+{
+ verify_apic_base_addr();
+ xapic_enable();
+
+ data->halter_apic_id = GET_APIC_ID_FIELD(xapic_read_reg(APIC_ID));
+ data->halter_lvr = xapic_read_reg(APIC_LVR);
+
+ /*
+ * Loop forever HLTing and recording halts & wakes. Disable interrupts
+ * each time around to minimize window between signaling the pending
+ * halt to the sender vCPU and executing the halt. No need to disable on
+ * first run as this vCPU executes first and the host waits for it to
+ * signal going into first halt before starting the sender vCPU. Record
+ * TPR and PPR for diagnostic purposes in case the test fails.
+ */
+ for (;;) {
+ data->halter_tpr = xapic_read_reg(APIC_TASKPRI);
+ data->halter_ppr = xapic_read_reg(APIC_PROCPRI);
+ data->hlt_count++;
+ asm volatile("sti; hlt; cli");
+ data->wake_count++;
+ }
+}
+
+/*
+ * Runs on halter vCPU when IPI arrives. Write an arbitrary non-zero value to
+ * enable diagnosing errant writes to the APIC access address backing page in
+ * case of test failure.
+ */
+static void guest_ipi_handler(struct ex_regs *regs)
+{
+ ipis_rcvd++;
+ xapic_write_reg(APIC_EOI, 77);
+}
+
+static void sender_guest_code(struct test_data_page *data)
+{
+ uint64_t last_wake_count;
+ uint64_t last_hlt_count;
+ uint64_t last_ipis_rcvd_count;
+ uint32_t icr_val;
+ uint32_t icr2_val;
+ uint64_t tsc_start;
+
+ verify_apic_base_addr();
+ xapic_enable();
+
+ /*
+ * Init interrupt command register for sending IPIs
+ *
+ * Delivery mode=fixed, per SDM:
+ * "Delivers the interrupt specified in the vector field to the target
+ * processor."
+ *
+ * Destination mode=physical i.e. specify target by its local APIC
+ * ID. This vCPU assumes that the halter vCPU has already started and
+ * set data->halter_apic_id.
+ */
+ icr_val = (APIC_DEST_PHYSICAL | APIC_DM_FIXED | IPI_VECTOR);
+ icr2_val = SET_APIC_DEST_FIELD(data->halter_apic_id);
+ data->icr = icr_val;
+ data->icr2 = icr2_val;
+
+ last_wake_count = data->wake_count;
+ last_hlt_count = data->hlt_count;
+ last_ipis_rcvd_count = ipis_rcvd;
+ for (;;) {
+ /*
+ * Send IPI to halter vCPU.
+ * First IPI can be sent unconditionally because halter vCPU
+ * starts earlier.
+ */
+ xapic_write_reg(APIC_ICR2, icr2_val);
+ xapic_write_reg(APIC_ICR, icr_val);
+ data->ipis_sent++;
+
+ /*
+ * Wait up to ~1 sec for halter to indicate that it has:
+ * 1. Received the IPI
+ * 2. Woken up from the halt
+ * 3. Gone back into halt
+ * Current CPUs typically run at 2.x Ghz which is ~2
+ * billion ticks per second.
+ */
+ tsc_start = rdtsc();
+ while (rdtsc() - tsc_start < 2000000000) {
+ if ((ipis_rcvd != last_ipis_rcvd_count) &&
+ (data->wake_count != last_wake_count) &&
+ (data->hlt_count != last_hlt_count))
+ break;
+ }
+
+ GUEST_ASSERT((ipis_rcvd != last_ipis_rcvd_count) &&
+ (data->wake_count != last_wake_count) &&
+ (data->hlt_count != last_hlt_count));
+
+ last_wake_count = data->wake_count;
+ last_hlt_count = data->hlt_count;
+ last_ipis_rcvd_count = ipis_rcvd;
+ }
+}
+
+static void *vcpu_thread(void *arg)
+{
+ struct thread_params *params = (struct thread_params *)arg;
+ struct kvm_vcpu *vcpu = params->vcpu;
+ struct ucall uc;
+ int old;
+ int r;
+
+ r = pthread_setcanceltype(PTHREAD_CANCEL_ASYNCHRONOUS, &old);
+ TEST_ASSERT(r == 0,
+ "pthread_setcanceltype failed on vcpu_id=%u with errno=%d",
+ vcpu->id, r);
+
+ fprintf(stderr, "vCPU thread running vCPU %u\n", vcpu->id);
+ vcpu_run(vcpu);
+
+ TEST_ASSERT_KVM_EXIT_REASON(vcpu, KVM_EXIT_IO);
+
+ if (get_ucall(vcpu, &uc) == UCALL_ABORT) {
+ TEST_ASSERT(false,
+ "vCPU %u exited with error: %s.\n"
+ "Sending vCPU sent %lu IPIs to halting vCPU\n"
+ "Halting vCPU halted %lu times, woke %lu times, received %lu IPIs.\n"
+ "Halter TPR=%#x PPR=%#x LVR=%#x\n"
+ "Migrations attempted: %lu\n"
+ "Migrations completed: %lu",
+ vcpu->id, (const char *)uc.args[0],
+ params->data->ipis_sent, params->data->hlt_count,
+ params->data->wake_count,
+ *params->pipis_rcvd, params->data->halter_tpr,
+ params->data->halter_ppr, params->data->halter_lvr,
+ params->data->migrations_attempted,
+ params->data->migrations_completed);
+ }
+
+ return NULL;
+}
+
+static void cancel_join_vcpu_thread(pthread_t thread, struct kvm_vcpu *vcpu)
+{
+ void *retval;
+ int r;
+
+ r = pthread_cancel(thread);
+ TEST_ASSERT(r == 0,
+ "pthread_cancel on vcpu_id=%d failed with errno=%d",
+ vcpu->id, r);
+
+ r = pthread_join(thread, &retval);
+ TEST_ASSERT(r == 0,
+ "pthread_join on vcpu_id=%d failed with errno=%d",
+ vcpu->id, r);
+ TEST_ASSERT(retval == PTHREAD_CANCELED,
+ "expected retval=%p, got %p", PTHREAD_CANCELED,
+ retval);
+}
+
+void do_migrations(struct test_data_page *data, int run_secs, int delay_usecs,
+ uint64_t *pipis_rcvd)
+{
+ long pages_not_moved;
+ unsigned long nodemask = 0;
+ unsigned long nodemasks[sizeof(nodemask) * 8];
+ int nodes = 0;
+ time_t start_time, last_update, now;
+ time_t interval_secs = 1;
+ int i, r;
+ int from, to;
+ unsigned long bit;
+ uint64_t hlt_count;
+ uint64_t wake_count;
+ uint64_t ipis_sent;
+
+ fprintf(stderr, "Calling migrate_pages every %d microseconds\n",
+ delay_usecs);
+
+ /* Get set of first 64 numa nodes available */
+ r = get_mempolicy(NULL, &nodemask, sizeof(nodemask) * 8,
+ 0, MPOL_F_MEMS_ALLOWED);
+ TEST_ASSERT(r == 0, "get_mempolicy failed errno=%d", errno);
+
+ fprintf(stderr, "Numa nodes found amongst first %lu possible nodes "
+ "(each 1-bit indicates node is present): %#lx\n",
+ sizeof(nodemask) * 8, nodemask);
+
+ /* Init array of masks containing a single-bit in each, one for each
+ * available node. migrate_pages called below requires specifying nodes
+ * as bit masks.
+ */
+ for (i = 0, bit = 1; i < sizeof(nodemask) * 8; i++, bit <<= 1) {
+ if (nodemask & bit) {
+ nodemasks[nodes] = nodemask & bit;
+ nodes++;
+ }
+ }
+
+ TEST_ASSERT(nodes > 1,
+ "Did not find at least 2 numa nodes. Can't do migration");
+
+ fprintf(stderr, "Migrating amongst %d nodes found\n", nodes);
+
+ from = 0;
+ to = 1;
+ start_time = time(NULL);
+ last_update = start_time;
+
+ ipis_sent = data->ipis_sent;
+ hlt_count = data->hlt_count;
+ wake_count = data->wake_count;
+
+ while ((int)(time(NULL) - start_time) < run_secs) {
+ data->migrations_attempted++;
+
+ /*
+ * migrate_pages with PID=0 will migrate all pages of this
+ * process between the nodes specified as bitmasks. The page
+ * backing the APIC access address belongs to this process
+ * because it is allocated by KVM in the context of the
+ * KVM_CREATE_VCPU ioctl. If that assumption ever changes this
+ * test may break or give a false positive signal.
+ */
+ pages_not_moved = migrate_pages(0, sizeof(nodemasks[from]),
+ &nodemasks[from],
+ &nodemasks[to]);
+ if (pages_not_moved < 0)
+ fprintf(stderr,
+ "migrate_pages failed, errno=%d\n", errno);
+ else if (pages_not_moved > 0)
+ fprintf(stderr,
+ "migrate_pages could not move %ld pages\n",
+ pages_not_moved);
+ else
+ data->migrations_completed++;
+
+ from = to;
+ to++;
+ if (to == nodes)
+ to = 0;
+
+ now = time(NULL);
+ if (((now - start_time) % interval_secs == 0) &&
+ (now != last_update)) {
+ last_update = now;
+ fprintf(stderr,
+ "%lu seconds: Migrations attempted=%lu completed=%lu, "
+ "IPIs sent=%lu received=%lu, HLTs=%lu wakes=%lu\n",
+ now - start_time, data->migrations_attempted,
+ data->migrations_completed,
+ data->ipis_sent, *pipis_rcvd,
+ data->hlt_count, data->wake_count);
+
+ TEST_ASSERT(ipis_sent != data->ipis_sent &&
+ hlt_count != data->hlt_count &&
+ wake_count != data->wake_count,
+ "IPI, HLT and wake count have not increased "
+ "in the last %lu seconds. "
+ "HLTer is likely hung.", interval_secs);
+
+ ipis_sent = data->ipis_sent;
+ hlt_count = data->hlt_count;
+ wake_count = data->wake_count;
+ }
+ usleep(delay_usecs);
+ }
+}
+
+void get_cmdline_args(int argc, char *argv[], int *run_secs,
+ bool *migrate, int *delay_usecs)
+{
+ for (;;) {
+ int opt = getopt(argc, argv, "s:d:m");
+
+ if (opt == -1)
+ break;
+ switch (opt) {
+ case 's':
+ *run_secs = parse_size(optarg);
+ break;
+ case 'm':
+ *migrate = true;
+ break;
+ case 'd':
+ *delay_usecs = parse_size(optarg);
+ break;
+ default:
+ TEST_ASSERT(false,
+ "Usage: -s <runtime seconds>. Default is %d seconds.\n"
+ "-m adds calls to migrate_pages while vCPUs are running."
+ " Default is no migrations.\n"
+ "-d <delay microseconds> - delay between migrate_pages() calls."
+ " Default is %d microseconds.",
+ DEFAULT_RUN_SECS, DEFAULT_DELAY_USECS);
+ }
+ }
+}
+
+int main(int argc, char *argv[])
+{
+ int r;
+ int wait_secs;
+ const int max_halter_wait = 10;
+ int run_secs = 0;
+ int delay_usecs = 0;
+ struct test_data_page *data;
+ vm_vaddr_t test_data_page_vaddr;
+ bool migrate = false;
+ pthread_t threads[2];
+ struct thread_params params[2];
+ struct kvm_vm *vm;
+ uint64_t *pipis_rcvd;
+
+ get_cmdline_args(argc, argv, &run_secs, &migrate, &delay_usecs);
+ if (run_secs <= 0)
+ run_secs = DEFAULT_RUN_SECS;
+ if (delay_usecs <= 0)
+ delay_usecs = DEFAULT_DELAY_USECS;
+
+ vm = vm_create_with_one_vcpu(&params[0].vcpu, halter_guest_code);
+
+ vm_init_descriptor_tables(vm);
+ vcpu_init_descriptor_tables(params[0].vcpu);
+ vm_install_exception_handler(vm, IPI_VECTOR, guest_ipi_handler);
+
+ virt_pg_map(vm, APIC_DEFAULT_GPA, APIC_DEFAULT_GPA);
+
+ params[1].vcpu = vm_vcpu_add(vm, 1, sender_guest_code);
+
+ test_data_page_vaddr = vm_vaddr_alloc_page(vm);
+ data = addr_gva2hva(vm, test_data_page_vaddr);
+ memset(data, 0, sizeof(*data));
+ params[0].data = data;
+ params[1].data = data;
+
+ vcpu_args_set(params[0].vcpu, 1, test_data_page_vaddr);
+ vcpu_args_set(params[1].vcpu, 1, test_data_page_vaddr);
+
+ pipis_rcvd = (uint64_t *)addr_gva2hva(vm, (uint64_t)&ipis_rcvd);
+ params[0].pipis_rcvd = pipis_rcvd;
+ params[1].pipis_rcvd = pipis_rcvd;
+
+ /* Start halter vCPU thread and wait for it to execute first HLT. */
+ r = pthread_create(&threads[0], NULL, vcpu_thread, &params[0]);
+ TEST_ASSERT(r == 0,
+ "pthread_create halter failed errno=%d", errno);
+ fprintf(stderr, "Halter vCPU thread started\n");
+
+ wait_secs = 0;
+ while ((wait_secs < max_halter_wait) && !data->hlt_count) {
+ sleep(1);
+ wait_secs++;
+ }
+
+ TEST_ASSERT(data->hlt_count,
+ "Halter vCPU did not execute first HLT within %d seconds",
+ max_halter_wait);
+
+ fprintf(stderr,
+ "Halter vCPU thread reported its APIC ID: %u after %d seconds.\n",
+ data->halter_apic_id, wait_secs);
+
+ r = pthread_create(&threads[1], NULL, vcpu_thread, &params[1]);
+ TEST_ASSERT(r == 0, "pthread_create sender failed errno=%d", errno);
+
+ fprintf(stderr,
+ "IPI sender vCPU thread started. Letting vCPUs run for %d seconds.\n",
+ run_secs);
+
+ if (!migrate)
+ sleep(run_secs);
+ else
+ do_migrations(data, run_secs, delay_usecs, pipis_rcvd);
+
+ /*
+ * Cancel threads and wait for them to stop.
+ */
+ cancel_join_vcpu_thread(threads[0], params[0].vcpu);
+ cancel_join_vcpu_thread(threads[1], params[1].vcpu);
+
+ fprintf(stderr,
+ "Test successful after running for %d seconds.\n"
+ "Sending vCPU sent %lu IPIs to halting vCPU\n"
+ "Halting vCPU halted %lu times, woke %lu times, received %lu IPIs.\n"
+ "Halter APIC ID=%#x\n"
+ "Sender ICR value=%#x ICR2 value=%#x\n"
+ "Halter TPR=%#x PPR=%#x LVR=%#x\n"
+ "Migrations attempted: %lu\n"
+ "Migrations completed: %lu\n",
+ run_secs, data->ipis_sent,
+ data->hlt_count, data->wake_count, *pipis_rcvd,
+ data->halter_apic_id,
+ data->icr, data->icr2,
+ data->halter_tpr, data->halter_ppr, data->halter_lvr,
+ data->migrations_attempted, data->migrations_completed);
+
+ kvm_vm_free(vm);
+
+ return 0;
+}
diff --git a/tools/testing/selftests/kvm/x86_64/xapic_state_test.c b/tools/testing/selftests/kvm/x86_64/xapic_state_test.c
new file mode 100644
index 000000000000..ab75b873a4ad
--- /dev/null
+++ b/tools/testing/selftests/kvm/x86_64/xapic_state_test.c
@@ -0,0 +1,215 @@
+// SPDX-License-Identifier: GPL-2.0-only
+#define _GNU_SOURCE /* for program_invocation_short_name */
+#include <fcntl.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <sys/ioctl.h>
+
+#include "apic.h"
+#include "kvm_util.h"
+#include "processor.h"
+#include "test_util.h"
+
+struct xapic_vcpu {
+ struct kvm_vcpu *vcpu;
+ bool is_x2apic;
+};
+
+static void xapic_guest_code(void)
+{
+ asm volatile("cli");
+
+ xapic_enable();
+
+ while (1) {
+ uint64_t val = (u64)xapic_read_reg(APIC_IRR) |
+ (u64)xapic_read_reg(APIC_IRR + 0x10) << 32;
+
+ xapic_write_reg(APIC_ICR2, val >> 32);
+ xapic_write_reg(APIC_ICR, val);
+ GUEST_SYNC(val);
+ }
+}
+
+static void x2apic_guest_code(void)
+{
+ asm volatile("cli");
+
+ x2apic_enable();
+
+ do {
+ uint64_t val = x2apic_read_reg(APIC_IRR) |
+ x2apic_read_reg(APIC_IRR + 0x10) << 32;
+
+ x2apic_write_reg(APIC_ICR, val);
+ GUEST_SYNC(val);
+ } while (1);
+}
+
+static void ____test_icr(struct xapic_vcpu *x, uint64_t val)
+{
+ struct kvm_vcpu *vcpu = x->vcpu;
+ struct kvm_lapic_state xapic;
+ struct ucall uc;
+ uint64_t icr;
+
+ /*
+ * Tell the guest what ICR value to write. Use the IRR to pass info,
+ * all bits are valid and should not be modified by KVM (ignoring the
+ * fact that vectors 0-15 are technically illegal).
+ */
+ vcpu_ioctl(vcpu, KVM_GET_LAPIC, &xapic);
+ *((u32 *)&xapic.regs[APIC_IRR]) = val;
+ *((u32 *)&xapic.regs[APIC_IRR + 0x10]) = val >> 32;
+ vcpu_ioctl(vcpu, KVM_SET_LAPIC, &xapic);
+
+ vcpu_run(vcpu);
+ TEST_ASSERT_EQ(get_ucall(vcpu, &uc), UCALL_SYNC);
+ TEST_ASSERT_EQ(uc.args[1], val);
+
+ vcpu_ioctl(vcpu, KVM_GET_LAPIC, &xapic);
+ icr = (u64)(*((u32 *)&xapic.regs[APIC_ICR])) |
+ (u64)(*((u32 *)&xapic.regs[APIC_ICR2])) << 32;
+ if (!x->is_x2apic) {
+ val &= (-1u | (0xffull << (32 + 24)));
+ TEST_ASSERT_EQ(icr, val & ~APIC_ICR_BUSY);
+ } else {
+ TEST_ASSERT_EQ(icr & ~APIC_ICR_BUSY, val & ~APIC_ICR_BUSY);
+ }
+}
+
+#define X2APIC_RSVED_BITS_MASK (GENMASK_ULL(31,20) | \
+ GENMASK_ULL(17,16) | \
+ GENMASK_ULL(13,13))
+
+static void __test_icr(struct xapic_vcpu *x, uint64_t val)
+{
+ if (x->is_x2apic) {
+ /* Hardware writing vICR register requires reserved bits 31:20,
+ * 17:16 and 13 kept as zero to avoid #GP exception. Data value
+ * written to vICR should mask out those bits above.
+ */
+ val &= ~X2APIC_RSVED_BITS_MASK;
+ }
+ ____test_icr(x, val | APIC_ICR_BUSY);
+ ____test_icr(x, val & ~(u64)APIC_ICR_BUSY);
+}
+
+static void test_icr(struct xapic_vcpu *x)
+{
+ struct kvm_vcpu *vcpu = x->vcpu;
+ uint64_t icr, i, j;
+
+ icr = APIC_DEST_SELF | APIC_INT_ASSERT | APIC_DM_FIXED;
+ for (i = 0; i <= 0xff; i++)
+ __test_icr(x, icr | i);
+
+ icr = APIC_INT_ASSERT | APIC_DM_FIXED;
+ for (i = 0; i <= 0xff; i++)
+ __test_icr(x, icr | i);
+
+ /*
+ * Send all flavors of IPIs to non-existent vCPUs. TODO: use number of
+ * vCPUs, not vcpu.id + 1. Arbitrarily use vector 0xff.
+ */
+ icr = APIC_INT_ASSERT | 0xff;
+ for (i = 0; i < 0xff; i++) {
+ if (i == vcpu->id)
+ continue;
+ for (j = 0; j < 8; j++)
+ __test_icr(x, i << (32 + 24) | icr | (j << 8));
+ }
+
+ /* And again with a shorthand destination for all types of IPIs. */
+ icr = APIC_DEST_ALLBUT | APIC_INT_ASSERT;
+ for (i = 0; i < 8; i++)
+ __test_icr(x, icr | (i << 8));
+
+ /* And a few garbage value, just make sure it's an IRQ (blocked). */
+ __test_icr(x, 0xa5a5a5a5a5a5a5a5 & ~APIC_DM_FIXED_MASK);
+ __test_icr(x, 0x5a5a5a5a5a5a5a5a & ~APIC_DM_FIXED_MASK);
+ __test_icr(x, -1ull & ~APIC_DM_FIXED_MASK);
+}
+
+static void __test_apic_id(struct kvm_vcpu *vcpu, uint64_t apic_base)
+{
+ uint32_t apic_id, expected;
+ struct kvm_lapic_state xapic;
+
+ vcpu_set_msr(vcpu, MSR_IA32_APICBASE, apic_base);
+
+ vcpu_ioctl(vcpu, KVM_GET_LAPIC, &xapic);
+
+ expected = apic_base & X2APIC_ENABLE ? vcpu->id : vcpu->id << 24;
+ apic_id = *((u32 *)&xapic.regs[APIC_ID]);
+
+ TEST_ASSERT(apic_id == expected,
+ "APIC_ID not set back to %s format; wanted = %x, got = %x",
+ (apic_base & X2APIC_ENABLE) ? "x2APIC" : "xAPIC",
+ expected, apic_id);
+}
+
+/*
+ * Verify that KVM switches the APIC_ID between xAPIC and x2APIC when userspace
+ * stuffs MSR_IA32_APICBASE. Setting the APIC_ID when x2APIC is enabled and
+ * when the APIC transitions for DISABLED to ENABLED is architectural behavior
+ * (on Intel), whereas the x2APIC => xAPIC transition behavior is KVM ABI since
+ * attempted to transition from x2APIC to xAPIC without disabling the APIC is
+ * architecturally disallowed.
+ */
+static void test_apic_id(void)
+{
+ const uint32_t NR_VCPUS = 3;
+ struct kvm_vcpu *vcpus[NR_VCPUS];
+ uint64_t apic_base;
+ struct kvm_vm *vm;
+ int i;
+
+ vm = vm_create_with_vcpus(NR_VCPUS, NULL, vcpus);
+ vm_enable_cap(vm, KVM_CAP_X2APIC_API, KVM_X2APIC_API_USE_32BIT_IDS);
+
+ for (i = 0; i < NR_VCPUS; i++) {
+ apic_base = vcpu_get_msr(vcpus[i], MSR_IA32_APICBASE);
+
+ TEST_ASSERT(apic_base & MSR_IA32_APICBASE_ENABLE,
+ "APIC not in ENABLED state at vCPU RESET");
+ TEST_ASSERT(!(apic_base & X2APIC_ENABLE),
+ "APIC not in xAPIC mode at vCPU RESET");
+
+ __test_apic_id(vcpus[i], apic_base);
+ __test_apic_id(vcpus[i], apic_base | X2APIC_ENABLE);
+ __test_apic_id(vcpus[i], apic_base);
+ }
+
+ kvm_vm_free(vm);
+}
+
+int main(int argc, char *argv[])
+{
+ struct xapic_vcpu x = {
+ .vcpu = NULL,
+ .is_x2apic = true,
+ };
+ struct kvm_vm *vm;
+
+ vm = vm_create_with_one_vcpu(&x.vcpu, x2apic_guest_code);
+ test_icr(&x);
+ kvm_vm_free(vm);
+
+ /*
+ * Use a second VM for the xAPIC test so that x2APIC can be hidden from
+ * the guest in order to test AVIC. KVM disallows changing CPUID after
+ * KVM_RUN and AVIC is disabled if _any_ vCPU is allowed to use x2APIC.
+ */
+ vm = vm_create_with_one_vcpu(&x.vcpu, xapic_guest_code);
+ x.is_x2apic = false;
+
+ vcpu_clear_cpuid_feature(x.vcpu, X86_FEATURE_X2APIC);
+
+ virt_pg_map(vm, APIC_DEFAULT_GPA, APIC_DEFAULT_GPA);
+ test_icr(&x);
+ kvm_vm_free(vm);
+
+ test_apic_id();
+}
diff --git a/tools/testing/selftests/kvm/x86_64/xcr0_cpuid_test.c b/tools/testing/selftests/kvm/x86_64/xcr0_cpuid_test.c
new file mode 100644
index 000000000000..25a0b0db5c3c
--- /dev/null
+++ b/tools/testing/selftests/kvm/x86_64/xcr0_cpuid_test.c
@@ -0,0 +1,137 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * XCR0 cpuid test
+ *
+ * Copyright (C) 2022, Google LLC.
+ */
+#include <fcntl.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <sys/ioctl.h>
+
+#include "test_util.h"
+
+#include "kvm_util.h"
+#include "processor.h"
+
+/*
+ * Assert that architectural dependency rules are satisfied, e.g. that AVX is
+ * supported if and only if SSE is supported.
+ */
+#define ASSERT_XFEATURE_DEPENDENCIES(supported_xcr0, xfeatures, dependencies) \
+do { \
+ uint64_t __supported = (supported_xcr0) & ((xfeatures) | (dependencies)); \
+ \
+ __GUEST_ASSERT((__supported & (xfeatures)) != (xfeatures) || \
+ __supported == ((xfeatures) | (dependencies)), \
+ "supported = 0x%lx, xfeatures = 0x%llx, dependencies = 0x%llx", \
+ __supported, (xfeatures), (dependencies)); \
+} while (0)
+
+/*
+ * Assert that KVM reports a sane, usable as-is XCR0. Architecturally, a CPU
+ * isn't strictly required to _support_ all XFeatures related to a feature, but
+ * at the same time XSETBV will #GP if bundled XFeatures aren't enabled and
+ * disabled coherently. E.g. a CPU can technically enumerate supported for
+ * XTILE_CFG but not XTILE_DATA, but attempting to enable XTILE_CFG without
+ * XTILE_DATA will #GP.
+ */
+#define ASSERT_ALL_OR_NONE_XFEATURE(supported_xcr0, xfeatures) \
+do { \
+ uint64_t __supported = (supported_xcr0) & (xfeatures); \
+ \
+ __GUEST_ASSERT(!__supported || __supported == (xfeatures), \
+ "supported = 0x%lx, xfeatures = 0x%llx", \
+ __supported, (xfeatures)); \
+} while (0)
+
+static void guest_code(void)
+{
+ uint64_t xcr0_reset;
+ uint64_t supported_xcr0;
+ int i, vector;
+
+ set_cr4(get_cr4() | X86_CR4_OSXSAVE);
+
+ xcr0_reset = xgetbv(0);
+ supported_xcr0 = this_cpu_supported_xcr0();
+
+ GUEST_ASSERT(xcr0_reset == XFEATURE_MASK_FP);
+
+ /* Check AVX */
+ ASSERT_XFEATURE_DEPENDENCIES(supported_xcr0,
+ XFEATURE_MASK_YMM,
+ XFEATURE_MASK_SSE);
+
+ /* Check MPX */
+ ASSERT_ALL_OR_NONE_XFEATURE(supported_xcr0,
+ XFEATURE_MASK_BNDREGS | XFEATURE_MASK_BNDCSR);
+
+ /* Check AVX-512 */
+ ASSERT_XFEATURE_DEPENDENCIES(supported_xcr0,
+ XFEATURE_MASK_AVX512,
+ XFEATURE_MASK_SSE | XFEATURE_MASK_YMM);
+ ASSERT_ALL_OR_NONE_XFEATURE(supported_xcr0,
+ XFEATURE_MASK_AVX512);
+
+ /* Check AMX */
+ ASSERT_ALL_OR_NONE_XFEATURE(supported_xcr0,
+ XFEATURE_MASK_XTILE);
+
+ vector = xsetbv_safe(0, supported_xcr0);
+ __GUEST_ASSERT(!vector,
+ "Expected success on XSETBV(0x%lx), got vector '0x%x'",
+ supported_xcr0, vector);
+
+ for (i = 0; i < 64; i++) {
+ if (supported_xcr0 & BIT_ULL(i))
+ continue;
+
+ vector = xsetbv_safe(0, supported_xcr0 | BIT_ULL(i));
+ __GUEST_ASSERT(vector == GP_VECTOR,
+ "Expected #GP on XSETBV(0x%llx), supported XCR0 = %lx, got vector '0x%x'",
+ BIT_ULL(i), supported_xcr0, vector);
+ }
+
+ GUEST_DONE();
+}
+
+int main(int argc, char *argv[])
+{
+ struct kvm_vcpu *vcpu;
+ struct kvm_run *run;
+ struct kvm_vm *vm;
+ struct ucall uc;
+
+ TEST_REQUIRE(kvm_cpu_has(X86_FEATURE_XSAVE));
+
+ vm = vm_create_with_one_vcpu(&vcpu, guest_code);
+ run = vcpu->run;
+
+ vm_init_descriptor_tables(vm);
+ vcpu_init_descriptor_tables(vcpu);
+
+ while (1) {
+ vcpu_run(vcpu);
+
+ TEST_ASSERT(run->exit_reason == KVM_EXIT_IO,
+ "Unexpected exit reason: %u (%s),",
+ run->exit_reason,
+ exit_reason_str(run->exit_reason));
+
+ switch (get_ucall(vcpu, &uc)) {
+ case UCALL_ABORT:
+ REPORT_GUEST_ASSERT(uc);
+ break;
+ case UCALL_DONE:
+ goto done;
+ default:
+ TEST_FAIL("Unknown ucall %lu", uc.cmd);
+ }
+ }
+
+done:
+ kvm_vm_free(vm);
+ return 0;
+}
diff --git a/tools/testing/selftests/kvm/x86_64/xen_shinfo_test.c b/tools/testing/selftests/kvm/x86_64/xen_shinfo_test.c
new file mode 100644
index 000000000000..d2ea0435f4f7
--- /dev/null
+++ b/tools/testing/selftests/kvm/x86_64/xen_shinfo_test.c
@@ -0,0 +1,1156 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Copyright © 2021 Amazon.com, Inc. or its affiliates.
+ */
+
+#include "test_util.h"
+#include "kvm_util.h"
+#include "processor.h"
+
+#include <stdint.h>
+#include <time.h>
+#include <sched.h>
+#include <signal.h>
+#include <pthread.h>
+
+#include <sys/eventfd.h>
+
+#define SHINFO_REGION_GVA 0xc0000000ULL
+#define SHINFO_REGION_GPA 0xc0000000ULL
+#define SHINFO_REGION_SLOT 10
+
+#define DUMMY_REGION_GPA (SHINFO_REGION_GPA + (3 * PAGE_SIZE))
+#define DUMMY_REGION_SLOT 11
+
+#define DUMMY_REGION_GPA_2 (SHINFO_REGION_GPA + (4 * PAGE_SIZE))
+#define DUMMY_REGION_SLOT_2 12
+
+#define SHINFO_ADDR (SHINFO_REGION_GPA)
+#define VCPU_INFO_ADDR (SHINFO_REGION_GPA + 0x40)
+#define PVTIME_ADDR (SHINFO_REGION_GPA + PAGE_SIZE)
+#define RUNSTATE_ADDR (SHINFO_REGION_GPA + PAGE_SIZE + PAGE_SIZE - 15)
+
+#define SHINFO_VADDR (SHINFO_REGION_GVA)
+#define VCPU_INFO_VADDR (SHINFO_REGION_GVA + 0x40)
+#define RUNSTATE_VADDR (SHINFO_REGION_GVA + PAGE_SIZE + PAGE_SIZE - 15)
+
+#define EVTCHN_VECTOR 0x10
+
+#define EVTCHN_TEST1 15
+#define EVTCHN_TEST2 66
+#define EVTCHN_TIMER 13
+
+enum {
+ TEST_INJECT_VECTOR = 0,
+ TEST_RUNSTATE_runnable,
+ TEST_RUNSTATE_blocked,
+ TEST_RUNSTATE_offline,
+ TEST_RUNSTATE_ADJUST,
+ TEST_RUNSTATE_DATA,
+ TEST_STEAL_TIME,
+ TEST_EVTCHN_MASKED,
+ TEST_EVTCHN_UNMASKED,
+ TEST_EVTCHN_SLOWPATH,
+ TEST_EVTCHN_SEND_IOCTL,
+ TEST_EVTCHN_HCALL,
+ TEST_EVTCHN_HCALL_SLOWPATH,
+ TEST_EVTCHN_HCALL_EVENTFD,
+ TEST_TIMER_SETUP,
+ TEST_TIMER_WAIT,
+ TEST_TIMER_RESTORE,
+ TEST_POLL_READY,
+ TEST_POLL_TIMEOUT,
+ TEST_POLL_MASKED,
+ TEST_POLL_WAKE,
+ SET_VCPU_INFO,
+ TEST_TIMER_PAST,
+ TEST_LOCKING_SEND_RACE,
+ TEST_LOCKING_POLL_RACE,
+ TEST_LOCKING_POLL_TIMEOUT,
+ TEST_DONE,
+
+ TEST_GUEST_SAW_IRQ,
+};
+
+#define XEN_HYPERCALL_MSR 0x40000000
+
+#define MIN_STEAL_TIME 50000
+
+#define SHINFO_RACE_TIMEOUT 2 /* seconds */
+
+#define __HYPERVISOR_set_timer_op 15
+#define __HYPERVISOR_sched_op 29
+#define __HYPERVISOR_event_channel_op 32
+
+#define SCHEDOP_poll 3
+
+#define EVTCHNOP_send 4
+
+#define EVTCHNSTAT_interdomain 2
+
+struct evtchn_send {
+ u32 port;
+};
+
+struct sched_poll {
+ u32 *ports;
+ unsigned int nr_ports;
+ u64 timeout;
+};
+
+struct pvclock_vcpu_time_info {
+ u32 version;
+ u32 pad0;
+ u64 tsc_timestamp;
+ u64 system_time;
+ u32 tsc_to_system_mul;
+ s8 tsc_shift;
+ u8 flags;
+ u8 pad[2];
+} __attribute__((__packed__)); /* 32 bytes */
+
+struct pvclock_wall_clock {
+ u32 version;
+ u32 sec;
+ u32 nsec;
+} __attribute__((__packed__));
+
+struct vcpu_runstate_info {
+ uint32_t state;
+ uint64_t state_entry_time;
+ uint64_t time[5]; /* Extra field for overrun check */
+};
+
+struct compat_vcpu_runstate_info {
+ uint32_t state;
+ uint64_t state_entry_time;
+ uint64_t time[5];
+} __attribute__((__packed__));;
+
+struct arch_vcpu_info {
+ unsigned long cr2;
+ unsigned long pad; /* sizeof(vcpu_info_t) == 64 */
+};
+
+struct vcpu_info {
+ uint8_t evtchn_upcall_pending;
+ uint8_t evtchn_upcall_mask;
+ unsigned long evtchn_pending_sel;
+ struct arch_vcpu_info arch;
+ struct pvclock_vcpu_time_info time;
+}; /* 64 bytes (x86) */
+
+struct shared_info {
+ struct vcpu_info vcpu_info[32];
+ unsigned long evtchn_pending[64];
+ unsigned long evtchn_mask[64];
+ struct pvclock_wall_clock wc;
+ uint32_t wc_sec_hi;
+ /* arch_shared_info here */
+};
+
+#define RUNSTATE_running 0
+#define RUNSTATE_runnable 1
+#define RUNSTATE_blocked 2
+#define RUNSTATE_offline 3
+
+static const char *runstate_names[] = {
+ "running",
+ "runnable",
+ "blocked",
+ "offline"
+};
+
+struct {
+ struct kvm_irq_routing info;
+ struct kvm_irq_routing_entry entries[2];
+} irq_routes;
+
+static volatile bool guest_saw_irq;
+
+static void evtchn_handler(struct ex_regs *regs)
+{
+ struct vcpu_info *vi = (void *)VCPU_INFO_VADDR;
+ vi->evtchn_upcall_pending = 0;
+ vi->evtchn_pending_sel = 0;
+ guest_saw_irq = true;
+
+ GUEST_SYNC(TEST_GUEST_SAW_IRQ);
+}
+
+static void guest_wait_for_irq(void)
+{
+ while (!guest_saw_irq)
+ __asm__ __volatile__ ("rep nop" : : : "memory");
+ guest_saw_irq = false;
+}
+
+static void guest_code(void)
+{
+ struct vcpu_runstate_info *rs = (void *)RUNSTATE_VADDR;
+ int i;
+
+ __asm__ __volatile__(
+ "sti\n"
+ "nop\n"
+ );
+
+ /* Trigger an interrupt injection */
+ GUEST_SYNC(TEST_INJECT_VECTOR);
+
+ guest_wait_for_irq();
+
+ /* Test having the host set runstates manually */
+ GUEST_SYNC(TEST_RUNSTATE_runnable);
+ GUEST_ASSERT(rs->time[RUNSTATE_runnable] != 0);
+ GUEST_ASSERT(rs->state == 0);
+
+ GUEST_SYNC(TEST_RUNSTATE_blocked);
+ GUEST_ASSERT(rs->time[RUNSTATE_blocked] != 0);
+ GUEST_ASSERT(rs->state == 0);
+
+ GUEST_SYNC(TEST_RUNSTATE_offline);
+ GUEST_ASSERT(rs->time[RUNSTATE_offline] != 0);
+ GUEST_ASSERT(rs->state == 0);
+
+ /* Test runstate time adjust */
+ GUEST_SYNC(TEST_RUNSTATE_ADJUST);
+ GUEST_ASSERT(rs->time[RUNSTATE_blocked] == 0x5a);
+ GUEST_ASSERT(rs->time[RUNSTATE_offline] == 0x6b6b);
+
+ /* Test runstate time set */
+ GUEST_SYNC(TEST_RUNSTATE_DATA);
+ GUEST_ASSERT(rs->state_entry_time >= 0x8000);
+ GUEST_ASSERT(rs->time[RUNSTATE_runnable] == 0);
+ GUEST_ASSERT(rs->time[RUNSTATE_blocked] == 0x6b6b);
+ GUEST_ASSERT(rs->time[RUNSTATE_offline] == 0x5a);
+
+ /* sched_yield() should result in some 'runnable' time */
+ GUEST_SYNC(TEST_STEAL_TIME);
+ GUEST_ASSERT(rs->time[RUNSTATE_runnable] >= MIN_STEAL_TIME);
+
+ /* Attempt to deliver a *masked* interrupt */
+ GUEST_SYNC(TEST_EVTCHN_MASKED);
+
+ /* Wait until we see the bit set */
+ struct shared_info *si = (void *)SHINFO_VADDR;
+ while (!si->evtchn_pending[0])
+ __asm__ __volatile__ ("rep nop" : : : "memory");
+
+ /* Now deliver an *unmasked* interrupt */
+ GUEST_SYNC(TEST_EVTCHN_UNMASKED);
+
+ guest_wait_for_irq();
+
+ /* Change memslots and deliver an interrupt */
+ GUEST_SYNC(TEST_EVTCHN_SLOWPATH);
+
+ guest_wait_for_irq();
+
+ /* Deliver event channel with KVM_XEN_HVM_EVTCHN_SEND */
+ GUEST_SYNC(TEST_EVTCHN_SEND_IOCTL);
+
+ guest_wait_for_irq();
+
+ GUEST_SYNC(TEST_EVTCHN_HCALL);
+
+ /* Our turn. Deliver event channel (to ourselves) with
+ * EVTCHNOP_send hypercall. */
+ struct evtchn_send s = { .port = 127 };
+ xen_hypercall(__HYPERVISOR_event_channel_op, EVTCHNOP_send, &s);
+
+ guest_wait_for_irq();
+
+ GUEST_SYNC(TEST_EVTCHN_HCALL_SLOWPATH);
+
+ /*
+ * Same again, but this time the host has messed with memslots so it
+ * should take the slow path in kvm_xen_set_evtchn().
+ */
+ xen_hypercall(__HYPERVISOR_event_channel_op, EVTCHNOP_send, &s);
+
+ guest_wait_for_irq();
+
+ GUEST_SYNC(TEST_EVTCHN_HCALL_EVENTFD);
+
+ /* Deliver "outbound" event channel to an eventfd which
+ * happens to be one of our own irqfds. */
+ s.port = 197;
+ xen_hypercall(__HYPERVISOR_event_channel_op, EVTCHNOP_send, &s);
+
+ guest_wait_for_irq();
+
+ GUEST_SYNC(TEST_TIMER_SETUP);
+
+ /* Set a timer 100ms in the future. */
+ xen_hypercall(__HYPERVISOR_set_timer_op,
+ rs->state_entry_time + 100000000, NULL);
+
+ GUEST_SYNC(TEST_TIMER_WAIT);
+
+ /* Now wait for the timer */
+ guest_wait_for_irq();
+
+ GUEST_SYNC(TEST_TIMER_RESTORE);
+
+ /* The host has 'restored' the timer. Just wait for it. */
+ guest_wait_for_irq();
+
+ GUEST_SYNC(TEST_POLL_READY);
+
+ /* Poll for an event channel port which is already set */
+ u32 ports[1] = { EVTCHN_TIMER };
+ struct sched_poll p = {
+ .ports = ports,
+ .nr_ports = 1,
+ .timeout = 0,
+ };
+
+ xen_hypercall(__HYPERVISOR_sched_op, SCHEDOP_poll, &p);
+
+ GUEST_SYNC(TEST_POLL_TIMEOUT);
+
+ /* Poll for an unset port and wait for the timeout. */
+ p.timeout = 100000000;
+ xen_hypercall(__HYPERVISOR_sched_op, SCHEDOP_poll, &p);
+
+ GUEST_SYNC(TEST_POLL_MASKED);
+
+ /* A timer will wake the masked port we're waiting on, while we poll */
+ p.timeout = 0;
+ xen_hypercall(__HYPERVISOR_sched_op, SCHEDOP_poll, &p);
+
+ GUEST_SYNC(TEST_POLL_WAKE);
+
+ /* Set the vcpu_info to point at exactly the place it already is to
+ * make sure the attribute is functional. */
+ GUEST_SYNC(SET_VCPU_INFO);
+
+ /* A timer wake an *unmasked* port which should wake us with an
+ * actual interrupt, while we're polling on a different port. */
+ ports[0]++;
+ p.timeout = 0;
+ xen_hypercall(__HYPERVISOR_sched_op, SCHEDOP_poll, &p);
+
+ guest_wait_for_irq();
+
+ GUEST_SYNC(TEST_TIMER_PAST);
+
+ /* Timer should have fired already */
+ guest_wait_for_irq();
+
+ GUEST_SYNC(TEST_LOCKING_SEND_RACE);
+ /* Racing host ioctls */
+
+ guest_wait_for_irq();
+
+ GUEST_SYNC(TEST_LOCKING_POLL_RACE);
+ /* Racing vmcall against host ioctl */
+
+ ports[0] = 0;
+
+ p = (struct sched_poll) {
+ .ports = ports,
+ .nr_ports = 1,
+ .timeout = 0
+ };
+
+wait_for_timer:
+ /*
+ * Poll for a timer wake event while the worker thread is mucking with
+ * the shared info. KVM XEN drops timer IRQs if the shared info is
+ * invalid when the timer expires. Arbitrarily poll 100 times before
+ * giving up and asking the VMM to re-arm the timer. 100 polls should
+ * consume enough time to beat on KVM without taking too long if the
+ * timer IRQ is dropped due to an invalid event channel.
+ */
+ for (i = 0; i < 100 && !guest_saw_irq; i++)
+ __xen_hypercall(__HYPERVISOR_sched_op, SCHEDOP_poll, &p);
+
+ /*
+ * Re-send the timer IRQ if it was (likely) dropped due to the timer
+ * expiring while the event channel was invalid.
+ */
+ if (!guest_saw_irq) {
+ GUEST_SYNC(TEST_LOCKING_POLL_TIMEOUT);
+ goto wait_for_timer;
+ }
+ guest_saw_irq = false;
+
+ GUEST_SYNC(TEST_DONE);
+}
+
+static int cmp_timespec(struct timespec *a, struct timespec *b)
+{
+ if (a->tv_sec > b->tv_sec)
+ return 1;
+ else if (a->tv_sec < b->tv_sec)
+ return -1;
+ else if (a->tv_nsec > b->tv_nsec)
+ return 1;
+ else if (a->tv_nsec < b->tv_nsec)
+ return -1;
+ else
+ return 0;
+}
+
+static struct shared_info *shinfo;
+static struct vcpu_info *vinfo;
+static struct kvm_vcpu *vcpu;
+
+static void handle_alrm(int sig)
+{
+ if (vinfo)
+ printf("evtchn_upcall_pending 0x%x\n", vinfo->evtchn_upcall_pending);
+ vcpu_dump(stdout, vcpu, 0);
+ TEST_FAIL("IRQ delivery timed out");
+}
+
+static void *juggle_shinfo_state(void *arg)
+{
+ struct kvm_vm *vm = (struct kvm_vm *)arg;
+
+ struct kvm_xen_hvm_attr cache_activate_gfn = {
+ .type = KVM_XEN_ATTR_TYPE_SHARED_INFO,
+ .u.shared_info.gfn = SHINFO_REGION_GPA / PAGE_SIZE
+ };
+
+ struct kvm_xen_hvm_attr cache_deactivate_gfn = {
+ .type = KVM_XEN_ATTR_TYPE_SHARED_INFO,
+ .u.shared_info.gfn = KVM_XEN_INVALID_GFN
+ };
+
+ struct kvm_xen_hvm_attr cache_activate_hva = {
+ .type = KVM_XEN_ATTR_TYPE_SHARED_INFO_HVA,
+ .u.shared_info.hva = (unsigned long)shinfo
+ };
+
+ struct kvm_xen_hvm_attr cache_deactivate_hva = {
+ .type = KVM_XEN_ATTR_TYPE_SHARED_INFO,
+ .u.shared_info.hva = 0
+ };
+
+ int xen_caps = kvm_check_cap(KVM_CAP_XEN_HVM);
+
+ for (;;) {
+ __vm_ioctl(vm, KVM_XEN_HVM_SET_ATTR, &cache_activate_gfn);
+ pthread_testcancel();
+ __vm_ioctl(vm, KVM_XEN_HVM_SET_ATTR, &cache_deactivate_gfn);
+
+ if (xen_caps & KVM_XEN_HVM_CONFIG_SHARED_INFO_HVA) {
+ __vm_ioctl(vm, KVM_XEN_HVM_SET_ATTR, &cache_activate_hva);
+ pthread_testcancel();
+ __vm_ioctl(vm, KVM_XEN_HVM_SET_ATTR, &cache_deactivate_hva);
+ }
+ }
+
+ return NULL;
+}
+
+int main(int argc, char *argv[])
+{
+ struct timespec min_ts, max_ts, vm_ts;
+ struct kvm_xen_hvm_attr evt_reset;
+ struct kvm_vm *vm;
+ pthread_t thread;
+ bool verbose;
+ int ret;
+
+ verbose = argc > 1 && (!strncmp(argv[1], "-v", 3) ||
+ !strncmp(argv[1], "--verbose", 10));
+
+ int xen_caps = kvm_check_cap(KVM_CAP_XEN_HVM);
+ TEST_REQUIRE(xen_caps & KVM_XEN_HVM_CONFIG_SHARED_INFO);
+
+ bool do_runstate_tests = !!(xen_caps & KVM_XEN_HVM_CONFIG_RUNSTATE);
+ bool do_runstate_flag = !!(xen_caps & KVM_XEN_HVM_CONFIG_RUNSTATE_UPDATE_FLAG);
+ bool do_eventfd_tests = !!(xen_caps & KVM_XEN_HVM_CONFIG_EVTCHN_2LEVEL);
+ bool do_evtchn_tests = do_eventfd_tests && !!(xen_caps & KVM_XEN_HVM_CONFIG_EVTCHN_SEND);
+ bool has_shinfo_hva = !!(xen_caps & KVM_XEN_HVM_CONFIG_SHARED_INFO_HVA);
+
+ clock_gettime(CLOCK_REALTIME, &min_ts);
+
+ vm = vm_create_with_one_vcpu(&vcpu, guest_code);
+
+ /* Map a region for the shared_info page */
+ vm_userspace_mem_region_add(vm, VM_MEM_SRC_ANONYMOUS,
+ SHINFO_REGION_GPA, SHINFO_REGION_SLOT, 3, 0);
+ virt_map(vm, SHINFO_REGION_GVA, SHINFO_REGION_GPA, 3);
+
+ shinfo = addr_gpa2hva(vm, SHINFO_VADDR);
+
+ int zero_fd = open("/dev/zero", O_RDONLY);
+ TEST_ASSERT(zero_fd != -1, "Failed to open /dev/zero");
+
+ struct kvm_xen_hvm_config hvmc = {
+ .flags = KVM_XEN_HVM_CONFIG_INTERCEPT_HCALL,
+ .msr = XEN_HYPERCALL_MSR,
+ };
+
+ /* Let the kernel know that we *will* use it for sending all
+ * event channels, which lets it intercept SCHEDOP_poll */
+ if (do_evtchn_tests)
+ hvmc.flags |= KVM_XEN_HVM_CONFIG_EVTCHN_SEND;
+
+ vm_ioctl(vm, KVM_XEN_HVM_CONFIG, &hvmc);
+
+ struct kvm_xen_hvm_attr lm = {
+ .type = KVM_XEN_ATTR_TYPE_LONG_MODE,
+ .u.long_mode = 1,
+ };
+ vm_ioctl(vm, KVM_XEN_HVM_SET_ATTR, &lm);
+
+ if (do_runstate_flag) {
+ struct kvm_xen_hvm_attr ruf = {
+ .type = KVM_XEN_ATTR_TYPE_RUNSTATE_UPDATE_FLAG,
+ .u.runstate_update_flag = 1,
+ };
+ vm_ioctl(vm, KVM_XEN_HVM_SET_ATTR, &ruf);
+
+ ruf.u.runstate_update_flag = 0;
+ vm_ioctl(vm, KVM_XEN_HVM_GET_ATTR, &ruf);
+ TEST_ASSERT(ruf.u.runstate_update_flag == 1,
+ "Failed to read back RUNSTATE_UPDATE_FLAG attr");
+ }
+
+ struct kvm_xen_hvm_attr ha = {};
+
+ if (has_shinfo_hva) {
+ ha.type = KVM_XEN_ATTR_TYPE_SHARED_INFO_HVA;
+ ha.u.shared_info.hva = (unsigned long)shinfo;
+ } else {
+ ha.type = KVM_XEN_ATTR_TYPE_SHARED_INFO;
+ ha.u.shared_info.gfn = SHINFO_ADDR / PAGE_SIZE;
+ }
+
+ vm_ioctl(vm, KVM_XEN_HVM_SET_ATTR, &ha);
+
+ /*
+ * Test what happens when the HVA of the shinfo page is remapped after
+ * the kernel has a reference to it. But make sure we copy the clock
+ * info over since that's only set at setup time, and we test it later.
+ */
+ struct pvclock_wall_clock wc_copy = shinfo->wc;
+ void *m = mmap(shinfo, PAGE_SIZE, PROT_READ|PROT_WRITE, MAP_FIXED|MAP_PRIVATE, zero_fd, 0);
+ TEST_ASSERT(m == shinfo, "Failed to map /dev/zero over shared info");
+ shinfo->wc = wc_copy;
+
+ struct kvm_xen_vcpu_attr vi = {
+ .type = KVM_XEN_VCPU_ATTR_TYPE_VCPU_INFO,
+ .u.gpa = VCPU_INFO_ADDR,
+ };
+ vcpu_ioctl(vcpu, KVM_XEN_VCPU_SET_ATTR, &vi);
+
+ struct kvm_xen_vcpu_attr pvclock = {
+ .type = KVM_XEN_VCPU_ATTR_TYPE_VCPU_TIME_INFO,
+ .u.gpa = PVTIME_ADDR,
+ };
+ vcpu_ioctl(vcpu, KVM_XEN_VCPU_SET_ATTR, &pvclock);
+
+ struct kvm_xen_hvm_attr vec = {
+ .type = KVM_XEN_ATTR_TYPE_UPCALL_VECTOR,
+ .u.vector = EVTCHN_VECTOR,
+ };
+ vm_ioctl(vm, KVM_XEN_HVM_SET_ATTR, &vec);
+
+ vm_init_descriptor_tables(vm);
+ vcpu_init_descriptor_tables(vcpu);
+ vm_install_exception_handler(vm, EVTCHN_VECTOR, evtchn_handler);
+
+ if (do_runstate_tests) {
+ struct kvm_xen_vcpu_attr st = {
+ .type = KVM_XEN_VCPU_ATTR_TYPE_RUNSTATE_ADDR,
+ .u.gpa = RUNSTATE_ADDR,
+ };
+ vcpu_ioctl(vcpu, KVM_XEN_VCPU_SET_ATTR, &st);
+ }
+
+ int irq_fd[2] = { -1, -1 };
+
+ if (do_eventfd_tests) {
+ irq_fd[0] = eventfd(0, 0);
+ irq_fd[1] = eventfd(0, 0);
+
+ /* Unexpected, but not a KVM failure */
+ if (irq_fd[0] == -1 || irq_fd[1] == -1)
+ do_evtchn_tests = do_eventfd_tests = false;
+ }
+
+ if (do_eventfd_tests) {
+ irq_routes.info.nr = 2;
+
+ irq_routes.entries[0].gsi = 32;
+ irq_routes.entries[0].type = KVM_IRQ_ROUTING_XEN_EVTCHN;
+ irq_routes.entries[0].u.xen_evtchn.port = EVTCHN_TEST1;
+ irq_routes.entries[0].u.xen_evtchn.vcpu = vcpu->id;
+ irq_routes.entries[0].u.xen_evtchn.priority = KVM_IRQ_ROUTING_XEN_EVTCHN_PRIO_2LEVEL;
+
+ irq_routes.entries[1].gsi = 33;
+ irq_routes.entries[1].type = KVM_IRQ_ROUTING_XEN_EVTCHN;
+ irq_routes.entries[1].u.xen_evtchn.port = EVTCHN_TEST2;
+ irq_routes.entries[1].u.xen_evtchn.vcpu = vcpu->id;
+ irq_routes.entries[1].u.xen_evtchn.priority = KVM_IRQ_ROUTING_XEN_EVTCHN_PRIO_2LEVEL;
+
+ vm_ioctl(vm, KVM_SET_GSI_ROUTING, &irq_routes.info);
+
+ struct kvm_irqfd ifd = { };
+
+ ifd.fd = irq_fd[0];
+ ifd.gsi = 32;
+ vm_ioctl(vm, KVM_IRQFD, &ifd);
+
+ ifd.fd = irq_fd[1];
+ ifd.gsi = 33;
+ vm_ioctl(vm, KVM_IRQFD, &ifd);
+
+ struct sigaction sa = { };
+ sa.sa_handler = handle_alrm;
+ sigaction(SIGALRM, &sa, NULL);
+ }
+
+ struct kvm_xen_vcpu_attr tmr = {
+ .type = KVM_XEN_VCPU_ATTR_TYPE_TIMER,
+ .u.timer.port = EVTCHN_TIMER,
+ .u.timer.priority = KVM_IRQ_ROUTING_XEN_EVTCHN_PRIO_2LEVEL,
+ .u.timer.expires_ns = 0
+ };
+
+ if (do_evtchn_tests) {
+ struct kvm_xen_hvm_attr inj = {
+ .type = KVM_XEN_ATTR_TYPE_EVTCHN,
+ .u.evtchn.send_port = 127,
+ .u.evtchn.type = EVTCHNSTAT_interdomain,
+ .u.evtchn.flags = 0,
+ .u.evtchn.deliver.port.port = EVTCHN_TEST1,
+ .u.evtchn.deliver.port.vcpu = vcpu->id + 1,
+ .u.evtchn.deliver.port.priority = KVM_IRQ_ROUTING_XEN_EVTCHN_PRIO_2LEVEL,
+ };
+ vm_ioctl(vm, KVM_XEN_HVM_SET_ATTR, &inj);
+
+ /* Test migration to a different vCPU */
+ inj.u.evtchn.flags = KVM_XEN_EVTCHN_UPDATE;
+ inj.u.evtchn.deliver.port.vcpu = vcpu->id;
+ vm_ioctl(vm, KVM_XEN_HVM_SET_ATTR, &inj);
+
+ inj.u.evtchn.send_port = 197;
+ inj.u.evtchn.deliver.eventfd.port = 0;
+ inj.u.evtchn.deliver.eventfd.fd = irq_fd[1];
+ inj.u.evtchn.flags = 0;
+ vm_ioctl(vm, KVM_XEN_HVM_SET_ATTR, &inj);
+
+ vcpu_ioctl(vcpu, KVM_XEN_VCPU_SET_ATTR, &tmr);
+ }
+ vinfo = addr_gpa2hva(vm, VCPU_INFO_VADDR);
+ vinfo->evtchn_upcall_pending = 0;
+
+ struct vcpu_runstate_info *rs = addr_gpa2hva(vm, RUNSTATE_ADDR);
+ rs->state = 0x5a;
+
+ bool evtchn_irq_expected = false;
+
+ for (;;) {
+ struct ucall uc;
+
+ vcpu_run(vcpu);
+ TEST_ASSERT_KVM_EXIT_REASON(vcpu, KVM_EXIT_IO);
+
+ switch (get_ucall(vcpu, &uc)) {
+ case UCALL_ABORT:
+ REPORT_GUEST_ASSERT(uc);
+ /* NOT REACHED */
+ case UCALL_SYNC: {
+ struct kvm_xen_vcpu_attr rst;
+ long rundelay;
+
+ if (do_runstate_tests)
+ TEST_ASSERT(rs->state_entry_time == rs->time[0] +
+ rs->time[1] + rs->time[2] + rs->time[3],
+ "runstate times don't add up");
+
+ switch (uc.args[1]) {
+ case TEST_INJECT_VECTOR:
+ if (verbose)
+ printf("Delivering evtchn upcall\n");
+ evtchn_irq_expected = true;
+ vinfo->evtchn_upcall_pending = 1;
+ break;
+
+ case TEST_RUNSTATE_runnable...TEST_RUNSTATE_offline:
+ TEST_ASSERT(!evtchn_irq_expected, "Event channel IRQ not seen");
+ if (!do_runstate_tests)
+ goto done;
+ if (verbose)
+ printf("Testing runstate %s\n", runstate_names[uc.args[1]]);
+ rst.type = KVM_XEN_VCPU_ATTR_TYPE_RUNSTATE_CURRENT;
+ rst.u.runstate.state = uc.args[1] + RUNSTATE_runnable -
+ TEST_RUNSTATE_runnable;
+ vcpu_ioctl(vcpu, KVM_XEN_VCPU_SET_ATTR, &rst);
+ break;
+
+ case TEST_RUNSTATE_ADJUST:
+ if (verbose)
+ printf("Testing RUNSTATE_ADJUST\n");
+ rst.type = KVM_XEN_VCPU_ATTR_TYPE_RUNSTATE_ADJUST;
+ memset(&rst.u, 0, sizeof(rst.u));
+ rst.u.runstate.state = (uint64_t)-1;
+ rst.u.runstate.time_blocked =
+ 0x5a - rs->time[RUNSTATE_blocked];
+ rst.u.runstate.time_offline =
+ 0x6b6b - rs->time[RUNSTATE_offline];
+ rst.u.runstate.time_runnable = -rst.u.runstate.time_blocked -
+ rst.u.runstate.time_offline;
+ vcpu_ioctl(vcpu, KVM_XEN_VCPU_SET_ATTR, &rst);
+ break;
+
+ case TEST_RUNSTATE_DATA:
+ if (verbose)
+ printf("Testing RUNSTATE_DATA\n");
+ rst.type = KVM_XEN_VCPU_ATTR_TYPE_RUNSTATE_DATA;
+ memset(&rst.u, 0, sizeof(rst.u));
+ rst.u.runstate.state = RUNSTATE_running;
+ rst.u.runstate.state_entry_time = 0x6b6b + 0x5a;
+ rst.u.runstate.time_blocked = 0x6b6b;
+ rst.u.runstate.time_offline = 0x5a;
+ vcpu_ioctl(vcpu, KVM_XEN_VCPU_SET_ATTR, &rst);
+ break;
+
+ case TEST_STEAL_TIME:
+ if (verbose)
+ printf("Testing steal time\n");
+ /* Yield until scheduler delay exceeds target */
+ rundelay = get_run_delay() + MIN_STEAL_TIME;
+ do {
+ sched_yield();
+ } while (get_run_delay() < rundelay);
+ break;
+
+ case TEST_EVTCHN_MASKED:
+ if (!do_eventfd_tests)
+ goto done;
+ if (verbose)
+ printf("Testing masked event channel\n");
+ shinfo->evtchn_mask[0] = 1UL << EVTCHN_TEST1;
+ eventfd_write(irq_fd[0], 1UL);
+ alarm(1);
+ break;
+
+ case TEST_EVTCHN_UNMASKED:
+ if (verbose)
+ printf("Testing unmasked event channel\n");
+ /* Unmask that, but deliver the other one */
+ shinfo->evtchn_pending[0] = 0;
+ shinfo->evtchn_mask[0] = 0;
+ eventfd_write(irq_fd[1], 1UL);
+ evtchn_irq_expected = true;
+ alarm(1);
+ break;
+
+ case TEST_EVTCHN_SLOWPATH:
+ TEST_ASSERT(!evtchn_irq_expected,
+ "Expected event channel IRQ but it didn't happen");
+ shinfo->evtchn_pending[1] = 0;
+ if (verbose)
+ printf("Testing event channel after memslot change\n");
+ vm_userspace_mem_region_add(vm, VM_MEM_SRC_ANONYMOUS,
+ DUMMY_REGION_GPA, DUMMY_REGION_SLOT, 1, 0);
+ eventfd_write(irq_fd[0], 1UL);
+ evtchn_irq_expected = true;
+ alarm(1);
+ break;
+
+ case TEST_EVTCHN_SEND_IOCTL:
+ TEST_ASSERT(!evtchn_irq_expected,
+ "Expected event channel IRQ but it didn't happen");
+ if (!do_evtchn_tests)
+ goto done;
+
+ shinfo->evtchn_pending[0] = 0;
+ if (verbose)
+ printf("Testing injection with KVM_XEN_HVM_EVTCHN_SEND\n");
+
+ struct kvm_irq_routing_xen_evtchn e;
+ e.port = EVTCHN_TEST2;
+ e.vcpu = vcpu->id;
+ e.priority = KVM_IRQ_ROUTING_XEN_EVTCHN_PRIO_2LEVEL;
+
+ vm_ioctl(vm, KVM_XEN_HVM_EVTCHN_SEND, &e);
+ evtchn_irq_expected = true;
+ alarm(1);
+ break;
+
+ case TEST_EVTCHN_HCALL:
+ TEST_ASSERT(!evtchn_irq_expected,
+ "Expected event channel IRQ but it didn't happen");
+ shinfo->evtchn_pending[1] = 0;
+
+ if (verbose)
+ printf("Testing guest EVTCHNOP_send direct to evtchn\n");
+ evtchn_irq_expected = true;
+ alarm(1);
+ break;
+
+ case TEST_EVTCHN_HCALL_SLOWPATH:
+ TEST_ASSERT(!evtchn_irq_expected,
+ "Expected event channel IRQ but it didn't happen");
+ shinfo->evtchn_pending[0] = 0;
+
+ if (verbose)
+ printf("Testing guest EVTCHNOP_send direct to evtchn after memslot change\n");
+ vm_userspace_mem_region_add(vm, VM_MEM_SRC_ANONYMOUS,
+ DUMMY_REGION_GPA_2, DUMMY_REGION_SLOT_2, 1, 0);
+ evtchn_irq_expected = true;
+ alarm(1);
+ break;
+
+ case TEST_EVTCHN_HCALL_EVENTFD:
+ TEST_ASSERT(!evtchn_irq_expected,
+ "Expected event channel IRQ but it didn't happen");
+ shinfo->evtchn_pending[0] = 0;
+
+ if (verbose)
+ printf("Testing guest EVTCHNOP_send to eventfd\n");
+ evtchn_irq_expected = true;
+ alarm(1);
+ break;
+
+ case TEST_TIMER_SETUP:
+ TEST_ASSERT(!evtchn_irq_expected,
+ "Expected event channel IRQ but it didn't happen");
+ shinfo->evtchn_pending[1] = 0;
+
+ if (verbose)
+ printf("Testing guest oneshot timer\n");
+ break;
+
+ case TEST_TIMER_WAIT:
+ memset(&tmr, 0, sizeof(tmr));
+ tmr.type = KVM_XEN_VCPU_ATTR_TYPE_TIMER;
+ vcpu_ioctl(vcpu, KVM_XEN_VCPU_GET_ATTR, &tmr);
+ TEST_ASSERT(tmr.u.timer.port == EVTCHN_TIMER,
+ "Timer port not returned");
+ TEST_ASSERT(tmr.u.timer.priority == KVM_IRQ_ROUTING_XEN_EVTCHN_PRIO_2LEVEL,
+ "Timer priority not returned");
+ TEST_ASSERT(tmr.u.timer.expires_ns > rs->state_entry_time,
+ "Timer expiry not returned");
+ evtchn_irq_expected = true;
+ alarm(1);
+ break;
+
+ case TEST_TIMER_RESTORE:
+ TEST_ASSERT(!evtchn_irq_expected,
+ "Expected event channel IRQ but it didn't happen");
+ shinfo->evtchn_pending[0] = 0;
+
+ if (verbose)
+ printf("Testing restored oneshot timer\n");
+
+ tmr.u.timer.expires_ns = rs->state_entry_time + 100000000;
+ vcpu_ioctl(vcpu, KVM_XEN_VCPU_SET_ATTR, &tmr);
+ evtchn_irq_expected = true;
+ alarm(1);
+ break;
+
+ case TEST_POLL_READY:
+ TEST_ASSERT(!evtchn_irq_expected,
+ "Expected event channel IRQ but it didn't happen");
+
+ if (verbose)
+ printf("Testing SCHEDOP_poll with already pending event\n");
+ shinfo->evtchn_pending[0] = shinfo->evtchn_mask[0] = 1UL << EVTCHN_TIMER;
+ alarm(1);
+ break;
+
+ case TEST_POLL_TIMEOUT:
+ if (verbose)
+ printf("Testing SCHEDOP_poll timeout\n");
+ shinfo->evtchn_pending[0] = 0;
+ alarm(1);
+ break;
+
+ case TEST_POLL_MASKED:
+ if (verbose)
+ printf("Testing SCHEDOP_poll wake on masked event\n");
+
+ tmr.u.timer.expires_ns = rs->state_entry_time + 100000000;
+ vcpu_ioctl(vcpu, KVM_XEN_VCPU_SET_ATTR, &tmr);
+ alarm(1);
+ break;
+
+ case TEST_POLL_WAKE:
+ shinfo->evtchn_pending[0] = shinfo->evtchn_mask[0] = 0;
+ if (verbose)
+ printf("Testing SCHEDOP_poll wake on unmasked event\n");
+
+ evtchn_irq_expected = true;
+ tmr.u.timer.expires_ns = rs->state_entry_time + 100000000;
+ vcpu_ioctl(vcpu, KVM_XEN_VCPU_SET_ATTR, &tmr);
+
+ /* Read it back and check the pending time is reported correctly */
+ tmr.u.timer.expires_ns = 0;
+ vcpu_ioctl(vcpu, KVM_XEN_VCPU_GET_ATTR, &tmr);
+ TEST_ASSERT(tmr.u.timer.expires_ns == rs->state_entry_time + 100000000,
+ "Timer not reported pending");
+ alarm(1);
+ break;
+
+ case SET_VCPU_INFO:
+ if (has_shinfo_hva) {
+ struct kvm_xen_vcpu_attr vih = {
+ .type = KVM_XEN_VCPU_ATTR_TYPE_VCPU_INFO_HVA,
+ .u.hva = (unsigned long)vinfo
+ };
+ vcpu_ioctl(vcpu, KVM_XEN_VCPU_SET_ATTR, &vih);
+ }
+ break;
+
+ case TEST_TIMER_PAST:
+ TEST_ASSERT(!evtchn_irq_expected,
+ "Expected event channel IRQ but it didn't happen");
+ /* Read timer and check it is no longer pending */
+ vcpu_ioctl(vcpu, KVM_XEN_VCPU_GET_ATTR, &tmr);
+ TEST_ASSERT(!tmr.u.timer.expires_ns, "Timer still reported pending");
+
+ shinfo->evtchn_pending[0] = 0;
+ if (verbose)
+ printf("Testing timer in the past\n");
+
+ evtchn_irq_expected = true;
+ tmr.u.timer.expires_ns = rs->state_entry_time - 100000000ULL;
+ vcpu_ioctl(vcpu, KVM_XEN_VCPU_SET_ATTR, &tmr);
+ alarm(1);
+ break;
+
+ case TEST_LOCKING_SEND_RACE:
+ TEST_ASSERT(!evtchn_irq_expected,
+ "Expected event channel IRQ but it didn't happen");
+ alarm(0);
+
+ if (verbose)
+ printf("Testing shinfo lock corruption (KVM_XEN_HVM_EVTCHN_SEND)\n");
+
+ ret = pthread_create(&thread, NULL, &juggle_shinfo_state, (void *)vm);
+ TEST_ASSERT(ret == 0, "pthread_create() failed: %s", strerror(ret));
+
+ struct kvm_irq_routing_xen_evtchn uxe = {
+ .port = 1,
+ .vcpu = vcpu->id,
+ .priority = KVM_IRQ_ROUTING_XEN_EVTCHN_PRIO_2LEVEL
+ };
+
+ evtchn_irq_expected = true;
+ for (time_t t = time(NULL) + SHINFO_RACE_TIMEOUT; time(NULL) < t;)
+ __vm_ioctl(vm, KVM_XEN_HVM_EVTCHN_SEND, &uxe);
+ break;
+
+ case TEST_LOCKING_POLL_RACE:
+ TEST_ASSERT(!evtchn_irq_expected,
+ "Expected event channel IRQ but it didn't happen");
+
+ if (verbose)
+ printf("Testing shinfo lock corruption (SCHEDOP_poll)\n");
+
+ shinfo->evtchn_pending[0] = 1;
+
+ evtchn_irq_expected = true;
+ tmr.u.timer.expires_ns = rs->state_entry_time +
+ SHINFO_RACE_TIMEOUT * 1000000000ULL;
+ vcpu_ioctl(vcpu, KVM_XEN_VCPU_SET_ATTR, &tmr);
+ break;
+
+ case TEST_LOCKING_POLL_TIMEOUT:
+ /*
+ * Optional and possibly repeated sync point.
+ * Injecting the timer IRQ may fail if the
+ * shinfo is invalid when the timer expires.
+ * If the timer has expired but the IRQ hasn't
+ * been delivered, rearm the timer and retry.
+ */
+ vcpu_ioctl(vcpu, KVM_XEN_VCPU_GET_ATTR, &tmr);
+
+ /* Resume the guest if the timer is still pending. */
+ if (tmr.u.timer.expires_ns)
+ break;
+
+ /* All done if the IRQ was delivered. */
+ if (!evtchn_irq_expected)
+ break;
+
+ tmr.u.timer.expires_ns = rs->state_entry_time +
+ SHINFO_RACE_TIMEOUT * 1000000000ULL;
+ vcpu_ioctl(vcpu, KVM_XEN_VCPU_SET_ATTR, &tmr);
+ break;
+ case TEST_DONE:
+ TEST_ASSERT(!evtchn_irq_expected,
+ "Expected event channel IRQ but it didn't happen");
+
+ ret = pthread_cancel(thread);
+ TEST_ASSERT(ret == 0, "pthread_cancel() failed: %s", strerror(ret));
+
+ ret = pthread_join(thread, 0);
+ TEST_ASSERT(ret == 0, "pthread_join() failed: %s", strerror(ret));
+ goto done;
+
+ case TEST_GUEST_SAW_IRQ:
+ TEST_ASSERT(evtchn_irq_expected, "Unexpected event channel IRQ");
+ evtchn_irq_expected = false;
+ break;
+ }
+ break;
+ }
+ case UCALL_DONE:
+ goto done;
+ default:
+ TEST_FAIL("Unknown ucall 0x%lx.", uc.cmd);
+ }
+ }
+
+ done:
+ evt_reset.type = KVM_XEN_ATTR_TYPE_EVTCHN;
+ evt_reset.u.evtchn.flags = KVM_XEN_EVTCHN_RESET;
+ vm_ioctl(vm, KVM_XEN_HVM_SET_ATTR, &evt_reset);
+
+ alarm(0);
+ clock_gettime(CLOCK_REALTIME, &max_ts);
+
+ /*
+ * Just a *really* basic check that things are being put in the
+ * right place. The actual calculations are much the same for
+ * Xen as they are for the KVM variants, so no need to check.
+ */
+ struct pvclock_wall_clock *wc;
+ struct pvclock_vcpu_time_info *ti, *ti2;
+
+ wc = addr_gpa2hva(vm, SHINFO_REGION_GPA + 0xc00);
+ ti = addr_gpa2hva(vm, SHINFO_REGION_GPA + 0x40 + 0x20);
+ ti2 = addr_gpa2hva(vm, PVTIME_ADDR);
+
+ if (verbose) {
+ printf("Wall clock (v %d) %d.%09d\n", wc->version, wc->sec, wc->nsec);
+ printf("Time info 1: v %u tsc %" PRIu64 " time %" PRIu64 " mul %u shift %u flags %x\n",
+ ti->version, ti->tsc_timestamp, ti->system_time, ti->tsc_to_system_mul,
+ ti->tsc_shift, ti->flags);
+ printf("Time info 2: v %u tsc %" PRIu64 " time %" PRIu64 " mul %u shift %u flags %x\n",
+ ti2->version, ti2->tsc_timestamp, ti2->system_time, ti2->tsc_to_system_mul,
+ ti2->tsc_shift, ti2->flags);
+ }
+
+ vm_ts.tv_sec = wc->sec;
+ vm_ts.tv_nsec = wc->nsec;
+ TEST_ASSERT(wc->version && !(wc->version & 1),
+ "Bad wallclock version %x", wc->version);
+ TEST_ASSERT(cmp_timespec(&min_ts, &vm_ts) <= 0, "VM time too old");
+ TEST_ASSERT(cmp_timespec(&max_ts, &vm_ts) >= 0, "VM time too new");
+
+ TEST_ASSERT(ti->version && !(ti->version & 1),
+ "Bad time_info version %x", ti->version);
+ TEST_ASSERT(ti2->version && !(ti2->version & 1),
+ "Bad time_info version %x", ti->version);
+
+ if (do_runstate_tests) {
+ /*
+ * Fetch runstate and check sanity. Strictly speaking in the
+ * general case we might not expect the numbers to be identical
+ * but in this case we know we aren't running the vCPU any more.
+ */
+ struct kvm_xen_vcpu_attr rst = {
+ .type = KVM_XEN_VCPU_ATTR_TYPE_RUNSTATE_DATA,
+ };
+ vcpu_ioctl(vcpu, KVM_XEN_VCPU_GET_ATTR, &rst);
+
+ if (verbose) {
+ printf("Runstate: %s(%d), entry %" PRIu64 " ns\n",
+ rs->state <= RUNSTATE_offline ? runstate_names[rs->state] : "unknown",
+ rs->state, rs->state_entry_time);
+ for (int i = RUNSTATE_running; i <= RUNSTATE_offline; i++) {
+ printf("State %s: %" PRIu64 " ns\n",
+ runstate_names[i], rs->time[i]);
+ }
+ }
+
+ /*
+ * Exercise runstate info at all points across the page boundary, in
+ * 32-bit and 64-bit mode. In particular, test the case where it is
+ * configured in 32-bit mode and then switched to 64-bit mode while
+ * active, which takes it onto the second page.
+ */
+ unsigned long runstate_addr;
+ struct compat_vcpu_runstate_info *crs;
+ for (runstate_addr = SHINFO_REGION_GPA + PAGE_SIZE + PAGE_SIZE - sizeof(*rs) - 4;
+ runstate_addr < SHINFO_REGION_GPA + PAGE_SIZE + PAGE_SIZE + 4; runstate_addr++) {
+
+ rs = addr_gpa2hva(vm, runstate_addr);
+ crs = (void *)rs;
+
+ memset(rs, 0xa5, sizeof(*rs));
+
+ /* Set to compatibility mode */
+ lm.u.long_mode = 0;
+ vm_ioctl(vm, KVM_XEN_HVM_SET_ATTR, &lm);
+
+ /* Set runstate to new address (kernel will write it) */
+ struct kvm_xen_vcpu_attr st = {
+ .type = KVM_XEN_VCPU_ATTR_TYPE_RUNSTATE_ADDR,
+ .u.gpa = runstate_addr,
+ };
+ vcpu_ioctl(vcpu, KVM_XEN_VCPU_SET_ATTR, &st);
+
+ if (verbose)
+ printf("Compatibility runstate at %08lx\n", runstate_addr);
+
+ TEST_ASSERT(crs->state == rst.u.runstate.state, "Runstate mismatch");
+ TEST_ASSERT(crs->state_entry_time == rst.u.runstate.state_entry_time,
+ "State entry time mismatch");
+ TEST_ASSERT(crs->time[RUNSTATE_running] == rst.u.runstate.time_running,
+ "Running time mismatch");
+ TEST_ASSERT(crs->time[RUNSTATE_runnable] == rst.u.runstate.time_runnable,
+ "Runnable time mismatch");
+ TEST_ASSERT(crs->time[RUNSTATE_blocked] == rst.u.runstate.time_blocked,
+ "Blocked time mismatch");
+ TEST_ASSERT(crs->time[RUNSTATE_offline] == rst.u.runstate.time_offline,
+ "Offline time mismatch");
+ TEST_ASSERT(crs->time[RUNSTATE_offline + 1] == 0xa5a5a5a5a5a5a5a5ULL,
+ "Structure overrun");
+ TEST_ASSERT(crs->state_entry_time == crs->time[0] +
+ crs->time[1] + crs->time[2] + crs->time[3],
+ "runstate times don't add up");
+
+
+ /* Now switch to 64-bit mode */
+ lm.u.long_mode = 1;
+ vm_ioctl(vm, KVM_XEN_HVM_SET_ATTR, &lm);
+
+ memset(rs, 0xa5, sizeof(*rs));
+
+ /* Don't change the address, just trigger a write */
+ struct kvm_xen_vcpu_attr adj = {
+ .type = KVM_XEN_VCPU_ATTR_TYPE_RUNSTATE_ADJUST,
+ .u.runstate.state = (uint64_t)-1
+ };
+ vcpu_ioctl(vcpu, KVM_XEN_VCPU_SET_ATTR, &adj);
+
+ if (verbose)
+ printf("64-bit runstate at %08lx\n", runstate_addr);
+
+ TEST_ASSERT(rs->state == rst.u.runstate.state, "Runstate mismatch");
+ TEST_ASSERT(rs->state_entry_time == rst.u.runstate.state_entry_time,
+ "State entry time mismatch");
+ TEST_ASSERT(rs->time[RUNSTATE_running] == rst.u.runstate.time_running,
+ "Running time mismatch");
+ TEST_ASSERT(rs->time[RUNSTATE_runnable] == rst.u.runstate.time_runnable,
+ "Runnable time mismatch");
+ TEST_ASSERT(rs->time[RUNSTATE_blocked] == rst.u.runstate.time_blocked,
+ "Blocked time mismatch");
+ TEST_ASSERT(rs->time[RUNSTATE_offline] == rst.u.runstate.time_offline,
+ "Offline time mismatch");
+ TEST_ASSERT(rs->time[RUNSTATE_offline + 1] == 0xa5a5a5a5a5a5a5a5ULL,
+ "Structure overrun");
+
+ TEST_ASSERT(rs->state_entry_time == rs->time[0] +
+ rs->time[1] + rs->time[2] + rs->time[3],
+ "runstate times don't add up");
+ }
+ }
+
+ kvm_vm_free(vm);
+ return 0;
+}
diff --git a/tools/testing/selftests/kvm/x86_64/xen_vmcall_test.c b/tools/testing/selftests/kvm/x86_64/xen_vmcall_test.c
new file mode 100644
index 000000000000..e149d0574961
--- /dev/null
+++ b/tools/testing/selftests/kvm/x86_64/xen_vmcall_test.c
@@ -0,0 +1,142 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * xen_vmcall_test
+ *
+ * Copyright © 2020 Amazon.com, Inc. or its affiliates.
+ *
+ * Userspace hypercall testing
+ */
+
+#include "test_util.h"
+#include "kvm_util.h"
+#include "processor.h"
+
+#define HCALL_REGION_GPA 0xc0000000ULL
+#define HCALL_REGION_SLOT 10
+
+#define INPUTVALUE 17
+#define ARGVALUE(x) (0xdeadbeef5a5a0000UL + x)
+#define RETVALUE 0xcafef00dfbfbffffUL
+
+#define XEN_HYPERCALL_MSR 0x40000200
+#define HV_GUEST_OS_ID_MSR 0x40000000
+#define HV_HYPERCALL_MSR 0x40000001
+
+#define HVCALL_SIGNAL_EVENT 0x005d
+#define HV_STATUS_INVALID_ALIGNMENT 4
+
+static void guest_code(void)
+{
+ unsigned long rax = INPUTVALUE;
+ unsigned long rdi = ARGVALUE(1);
+ unsigned long rsi = ARGVALUE(2);
+ unsigned long rdx = ARGVALUE(3);
+ unsigned long rcx;
+ register unsigned long r10 __asm__("r10") = ARGVALUE(4);
+ register unsigned long r8 __asm__("r8") = ARGVALUE(5);
+ register unsigned long r9 __asm__("r9") = ARGVALUE(6);
+
+ /* First a direct invocation of 'vmcall' */
+ __asm__ __volatile__("vmcall" :
+ "=a"(rax) :
+ "a"(rax), "D"(rdi), "S"(rsi), "d"(rdx),
+ "r"(r10), "r"(r8), "r"(r9));
+ GUEST_ASSERT(rax == RETVALUE);
+
+ /* Fill in the Xen hypercall page */
+ __asm__ __volatile__("wrmsr" : : "c" (XEN_HYPERCALL_MSR),
+ "a" (HCALL_REGION_GPA & 0xffffffff),
+ "d" (HCALL_REGION_GPA >> 32));
+
+ /* Set Hyper-V Guest OS ID */
+ __asm__ __volatile__("wrmsr" : : "c" (HV_GUEST_OS_ID_MSR),
+ "a" (0x5a), "d" (0));
+
+ /* Hyper-V hypercall page */
+ u64 msrval = HCALL_REGION_GPA + PAGE_SIZE + 1;
+ __asm__ __volatile__("wrmsr" : : "c" (HV_HYPERCALL_MSR),
+ "a" (msrval & 0xffffffff),
+ "d" (msrval >> 32));
+
+ /* Invoke a Xen hypercall */
+ __asm__ __volatile__("call *%1" : "=a"(rax) :
+ "r"(HCALL_REGION_GPA + INPUTVALUE * 32),
+ "a"(rax), "D"(rdi), "S"(rsi), "d"(rdx),
+ "r"(r10), "r"(r8), "r"(r9));
+ GUEST_ASSERT(rax == RETVALUE);
+
+ /* Invoke a Hyper-V hypercall */
+ rax = 0;
+ rcx = HVCALL_SIGNAL_EVENT; /* code */
+ rdx = 0x5a5a5a5a; /* ingpa (badly aligned) */
+ __asm__ __volatile__("call *%1" : "=a"(rax) :
+ "r"(HCALL_REGION_GPA + PAGE_SIZE),
+ "a"(rax), "c"(rcx), "d"(rdx),
+ "r"(r8));
+ GUEST_ASSERT(rax == HV_STATUS_INVALID_ALIGNMENT);
+
+ GUEST_DONE();
+}
+
+int main(int argc, char *argv[])
+{
+ unsigned int xen_caps;
+ struct kvm_vcpu *vcpu;
+ struct kvm_vm *vm;
+
+ xen_caps = kvm_check_cap(KVM_CAP_XEN_HVM);
+ TEST_REQUIRE(xen_caps & KVM_XEN_HVM_CONFIG_INTERCEPT_HCALL);
+
+ vm = vm_create_with_one_vcpu(&vcpu, guest_code);
+ vcpu_set_hv_cpuid(vcpu);
+
+ struct kvm_xen_hvm_config hvmc = {
+ .flags = KVM_XEN_HVM_CONFIG_INTERCEPT_HCALL,
+ .msr = XEN_HYPERCALL_MSR,
+ };
+ vm_ioctl(vm, KVM_XEN_HVM_CONFIG, &hvmc);
+
+ /* Map a region for the hypercall pages */
+ vm_userspace_mem_region_add(vm, VM_MEM_SRC_ANONYMOUS,
+ HCALL_REGION_GPA, HCALL_REGION_SLOT, 2, 0);
+ virt_map(vm, HCALL_REGION_GPA, HCALL_REGION_GPA, 2);
+
+ for (;;) {
+ volatile struct kvm_run *run = vcpu->run;
+ struct ucall uc;
+
+ vcpu_run(vcpu);
+
+ if (run->exit_reason == KVM_EXIT_XEN) {
+ TEST_ASSERT_EQ(run->xen.type, KVM_EXIT_XEN_HCALL);
+ TEST_ASSERT_EQ(run->xen.u.hcall.cpl, 0);
+ TEST_ASSERT_EQ(run->xen.u.hcall.longmode, 1);
+ TEST_ASSERT_EQ(run->xen.u.hcall.input, INPUTVALUE);
+ TEST_ASSERT_EQ(run->xen.u.hcall.params[0], ARGVALUE(1));
+ TEST_ASSERT_EQ(run->xen.u.hcall.params[1], ARGVALUE(2));
+ TEST_ASSERT_EQ(run->xen.u.hcall.params[2], ARGVALUE(3));
+ TEST_ASSERT_EQ(run->xen.u.hcall.params[3], ARGVALUE(4));
+ TEST_ASSERT_EQ(run->xen.u.hcall.params[4], ARGVALUE(5));
+ TEST_ASSERT_EQ(run->xen.u.hcall.params[5], ARGVALUE(6));
+ run->xen.u.hcall.result = RETVALUE;
+ continue;
+ }
+
+ TEST_ASSERT_KVM_EXIT_REASON(vcpu, KVM_EXIT_IO);
+
+ switch (get_ucall(vcpu, &uc)) {
+ case UCALL_ABORT:
+ REPORT_GUEST_ASSERT(uc);
+ /* NOT REACHED */
+ case UCALL_SYNC:
+ break;
+ case UCALL_DONE:
+ goto done;
+ default:
+ TEST_FAIL("Unknown ucall 0x%lx.", uc.cmd);
+ }
+ }
+done:
+ kvm_vm_free(vm);
+ return 0;
+}
diff --git a/tools/testing/selftests/kvm/x86_64/xss_msr_test.c b/tools/testing/selftests/kvm/x86_64/xss_msr_test.c
index 3529376747c2..167c97abff1b 100644
--- a/tools/testing/selftests/kvm/x86_64/xss_msr_test.c
+++ b/tools/testing/selftests/kvm/x86_64/xss_msr_test.c
@@ -12,64 +12,44 @@
#include "kvm_util.h"
#include "vmx.h"
-#define VCPU_ID 1
#define MSR_BITS 64
-#define X86_FEATURE_XSAVES (1<<3)
-
-bool is_supported_msr(u32 msr_index)
-{
- struct kvm_msr_list *list;
- bool found = false;
- int i;
-
- list = kvm_get_msr_index_list();
- for (i = 0; i < list->nmsrs; ++i) {
- if (list->indices[i] == msr_index) {
- found = true;
- break;
- }
- }
-
- free(list);
- return found;
-}
-
int main(int argc, char *argv[])
{
- struct kvm_cpuid_entry2 *entry;
- bool xss_supported = false;
+ bool xss_in_msr_list;
struct kvm_vm *vm;
+ struct kvm_vcpu *vcpu;
uint64_t xss_val;
int i, r;
/* Create VM */
- vm = vm_create_default(VCPU_ID, 0, 0);
+ vm = vm_create_with_one_vcpu(&vcpu, NULL);
- if (kvm_get_cpuid_max_basic() >= 0xd) {
- entry = kvm_get_supported_cpuid_index(0xd, 1);
- xss_supported = entry && !!(entry->eax & X86_FEATURE_XSAVES);
- }
- if (!xss_supported) {
- print_skip("IA32_XSS is not supported by the vCPU");
- exit(KSFT_SKIP);
- }
+ TEST_REQUIRE(kvm_cpu_has(X86_FEATURE_XSAVES));
- xss_val = vcpu_get_msr(vm, VCPU_ID, MSR_IA32_XSS);
+ xss_val = vcpu_get_msr(vcpu, MSR_IA32_XSS);
TEST_ASSERT(xss_val == 0,
- "MSR_IA32_XSS should be initialized to zero\n");
+ "MSR_IA32_XSS should be initialized to zero");
+
+ vcpu_set_msr(vcpu, MSR_IA32_XSS, xss_val);
- vcpu_set_msr(vm, VCPU_ID, MSR_IA32_XSS, xss_val);
/*
* At present, KVM only supports a guest IA32_XSS value of 0. Verify
* that trying to set the guest IA32_XSS to an unsupported value fails.
* Also, in the future when a non-zero value succeeds check that
- * IA32_XSS is in the KVM_GET_MSR_INDEX_LIST.
+ * IA32_XSS is in the list of MSRs to save/restore.
*/
+ xss_in_msr_list = kvm_msr_is_in_save_restore_list(MSR_IA32_XSS);
for (i = 0; i < MSR_BITS; ++i) {
- r = _vcpu_set_msr(vm, VCPU_ID, MSR_IA32_XSS, 1ull << i);
- TEST_ASSERT(r == 0 || is_supported_msr(MSR_IA32_XSS),
- "IA32_XSS was able to be set, but was not found in KVM_GET_MSR_INDEX_LIST.\n");
+ r = _vcpu_set_msr(vcpu, MSR_IA32_XSS, 1ull << i);
+
+ /*
+ * Setting a list of MSRs returns the entry that "faulted", or
+ * the last entry +1 if all MSRs were successfully written.
+ */
+ TEST_ASSERT(!r || r == 1, KVM_IOCTL_ERROR(KVM_SET_MSRS, r));
+ TEST_ASSERT(r != 1 || xss_in_msr_list,
+ "IA32_XSS was able to be set, but was not in save/restore list");
}
kvm_vm_free(vm);