aboutsummaryrefslogtreecommitdiffstatshomepage
path: root/tools/testing/selftests/kvm/lib
diff options
context:
space:
mode:
Diffstat (limited to 'tools/testing/selftests/kvm/lib')
-rw-r--r--tools/testing/selftests/kvm/lib/aarch64/gic.c161
-rw-r--r--tools/testing/selftests/kvm/lib/aarch64/gic_private.h32
-rw-r--r--tools/testing/selftests/kvm/lib/aarch64/gic_v3.c398
-rw-r--r--tools/testing/selftests/kvm/lib/aarch64/handlers.S126
-rw-r--r--tools/testing/selftests/kvm/lib/aarch64/processor.c477
-rw-r--r--tools/testing/selftests/kvm/lib/aarch64/spinlock.c27
-rw-r--r--tools/testing/selftests/kvm/lib/aarch64/ucall.c102
-rw-r--r--tools/testing/selftests/kvm/lib/aarch64/vgic.c170
-rw-r--r--tools/testing/selftests/kvm/lib/assert.c24
-rw-r--r--tools/testing/selftests/kvm/lib/elf.c17
-rw-r--r--tools/testing/selftests/kvm/lib/guest_modes.c129
-rw-r--r--tools/testing/selftests/kvm/lib/guest_sprintf.c314
-rw-r--r--tools/testing/selftests/kvm/lib/kvm_util.c2026
-rw-r--r--tools/testing/selftests/kvm/lib/kvm_util_internal.h111
-rw-r--r--tools/testing/selftests/kvm/lib/memstress.c398
-rw-r--r--tools/testing/selftests/kvm/lib/rbtree.c1
-rw-r--r--tools/testing/selftests/kvm/lib/riscv/handlers.S101
-rw-r--r--tools/testing/selftests/kvm/lib/riscv/processor.c504
-rw-r--r--tools/testing/selftests/kvm/lib/riscv/ucall.c31
-rw-r--r--tools/testing/selftests/kvm/lib/s390x/diag318_test_handler.c80
-rw-r--r--tools/testing/selftests/kvm/lib/s390x/processor.c88
-rw-r--r--tools/testing/selftests/kvm/lib/s390x/ucall.c42
-rw-r--r--tools/testing/selftests/kvm/lib/sparsebit.c52
-rw-r--r--tools/testing/selftests/kvm/lib/string_override.c48
-rw-r--r--tools/testing/selftests/kvm/lib/test_util.c330
-rw-r--r--tools/testing/selftests/kvm/lib/ucall_common.c160
-rw-r--r--tools/testing/selftests/kvm/lib/userfaultfd_util.c186
-rw-r--r--tools/testing/selftests/kvm/lib/x86_64/apic.c43
-rw-r--r--tools/testing/selftests/kvm/lib/x86_64/handlers.S81
-rw-r--r--tools/testing/selftests/kvm/lib/x86_64/hyperv.c46
-rw-r--r--tools/testing/selftests/kvm/lib/x86_64/memstress.c112
-rw-r--r--tools/testing/selftests/kvm/lib/x86_64/pmu.c31
-rw-r--r--tools/testing/selftests/kvm/lib/x86_64/processor.c1450
-rw-r--r--tools/testing/selftests/kvm/lib/x86_64/sev.c114
-rw-r--r--tools/testing/selftests/kvm/lib/x86_64/svm.c47
-rw-r--r--tools/testing/selftests/kvm/lib/x86_64/ucall.c72
-rw-r--r--tools/testing/selftests/kvm/lib/x86_64/vmx.c266
37 files changed, 6417 insertions, 1980 deletions
diff --git a/tools/testing/selftests/kvm/lib/aarch64/gic.c b/tools/testing/selftests/kvm/lib/aarch64/gic.c
new file mode 100644
index 000000000000..55668631d546
--- /dev/null
+++ b/tools/testing/selftests/kvm/lib/aarch64/gic.c
@@ -0,0 +1,161 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * ARM Generic Interrupt Controller (GIC) support
+ */
+
+#include <errno.h>
+#include <linux/bits.h>
+#include <linux/sizes.h>
+
+#include "kvm_util.h"
+
+#include <gic.h>
+#include "gic_private.h"
+#include "processor.h"
+#include "spinlock.h"
+
+static const struct gic_common_ops *gic_common_ops;
+static struct spinlock gic_lock;
+
+static void gic_cpu_init(unsigned int cpu, void *redist_base)
+{
+ gic_common_ops->gic_cpu_init(cpu, redist_base);
+}
+
+static void
+gic_dist_init(enum gic_type type, unsigned int nr_cpus, void *dist_base)
+{
+ const struct gic_common_ops *gic_ops = NULL;
+
+ spin_lock(&gic_lock);
+
+ /* Distributor initialization is needed only once per VM */
+ if (gic_common_ops) {
+ spin_unlock(&gic_lock);
+ return;
+ }
+
+ if (type == GIC_V3)
+ gic_ops = &gicv3_ops;
+
+ GUEST_ASSERT(gic_ops);
+
+ gic_ops->gic_init(nr_cpus, dist_base);
+ gic_common_ops = gic_ops;
+
+ /* Make sure that the initialized data is visible to all the vCPUs */
+ dsb(sy);
+
+ spin_unlock(&gic_lock);
+}
+
+void gic_init(enum gic_type type, unsigned int nr_cpus,
+ void *dist_base, void *redist_base)
+{
+ uint32_t cpu = guest_get_vcpuid();
+
+ GUEST_ASSERT(type < GIC_TYPE_MAX);
+ GUEST_ASSERT(dist_base);
+ GUEST_ASSERT(redist_base);
+ GUEST_ASSERT(nr_cpus);
+
+ gic_dist_init(type, nr_cpus, dist_base);
+ gic_cpu_init(cpu, redist_base);
+}
+
+void gic_irq_enable(unsigned int intid)
+{
+ GUEST_ASSERT(gic_common_ops);
+ gic_common_ops->gic_irq_enable(intid);
+}
+
+void gic_irq_disable(unsigned int intid)
+{
+ GUEST_ASSERT(gic_common_ops);
+ gic_common_ops->gic_irq_disable(intid);
+}
+
+unsigned int gic_get_and_ack_irq(void)
+{
+ uint64_t irqstat;
+ unsigned int intid;
+
+ GUEST_ASSERT(gic_common_ops);
+
+ irqstat = gic_common_ops->gic_read_iar();
+ intid = irqstat & GENMASK(23, 0);
+
+ return intid;
+}
+
+void gic_set_eoi(unsigned int intid)
+{
+ GUEST_ASSERT(gic_common_ops);
+ gic_common_ops->gic_write_eoir(intid);
+}
+
+void gic_set_dir(unsigned int intid)
+{
+ GUEST_ASSERT(gic_common_ops);
+ gic_common_ops->gic_write_dir(intid);
+}
+
+void gic_set_eoi_split(bool split)
+{
+ GUEST_ASSERT(gic_common_ops);
+ gic_common_ops->gic_set_eoi_split(split);
+}
+
+void gic_set_priority_mask(uint64_t pmr)
+{
+ GUEST_ASSERT(gic_common_ops);
+ gic_common_ops->gic_set_priority_mask(pmr);
+}
+
+void gic_set_priority(unsigned int intid, unsigned int prio)
+{
+ GUEST_ASSERT(gic_common_ops);
+ gic_common_ops->gic_set_priority(intid, prio);
+}
+
+void gic_irq_set_active(unsigned int intid)
+{
+ GUEST_ASSERT(gic_common_ops);
+ gic_common_ops->gic_irq_set_active(intid);
+}
+
+void gic_irq_clear_active(unsigned int intid)
+{
+ GUEST_ASSERT(gic_common_ops);
+ gic_common_ops->gic_irq_clear_active(intid);
+}
+
+bool gic_irq_get_active(unsigned int intid)
+{
+ GUEST_ASSERT(gic_common_ops);
+ return gic_common_ops->gic_irq_get_active(intid);
+}
+
+void gic_irq_set_pending(unsigned int intid)
+{
+ GUEST_ASSERT(gic_common_ops);
+ gic_common_ops->gic_irq_set_pending(intid);
+}
+
+void gic_irq_clear_pending(unsigned int intid)
+{
+ GUEST_ASSERT(gic_common_ops);
+ gic_common_ops->gic_irq_clear_pending(intid);
+}
+
+bool gic_irq_get_pending(unsigned int intid)
+{
+ GUEST_ASSERT(gic_common_ops);
+ return gic_common_ops->gic_irq_get_pending(intid);
+}
+
+void gic_irq_set_config(unsigned int intid, bool is_edge)
+{
+ GUEST_ASSERT(gic_common_ops);
+ gic_common_ops->gic_irq_set_config(intid, is_edge);
+}
diff --git a/tools/testing/selftests/kvm/lib/aarch64/gic_private.h b/tools/testing/selftests/kvm/lib/aarch64/gic_private.h
new file mode 100644
index 000000000000..75d07313c893
--- /dev/null
+++ b/tools/testing/selftests/kvm/lib/aarch64/gic_private.h
@@ -0,0 +1,32 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * ARM Generic Interrupt Controller (GIC) private defines that's only
+ * shared among the GIC library code.
+ */
+
+#ifndef SELFTEST_KVM_GIC_PRIVATE_H
+#define SELFTEST_KVM_GIC_PRIVATE_H
+
+struct gic_common_ops {
+ void (*gic_init)(unsigned int nr_cpus, void *dist_base);
+ void (*gic_cpu_init)(unsigned int cpu, void *redist_base);
+ void (*gic_irq_enable)(unsigned int intid);
+ void (*gic_irq_disable)(unsigned int intid);
+ uint64_t (*gic_read_iar)(void);
+ void (*gic_write_eoir)(uint32_t irq);
+ void (*gic_write_dir)(uint32_t irq);
+ void (*gic_set_eoi_split)(bool split);
+ void (*gic_set_priority_mask)(uint64_t mask);
+ void (*gic_set_priority)(uint32_t intid, uint32_t prio);
+ void (*gic_irq_set_active)(uint32_t intid);
+ void (*gic_irq_clear_active)(uint32_t intid);
+ bool (*gic_irq_get_active)(uint32_t intid);
+ void (*gic_irq_set_pending)(uint32_t intid);
+ void (*gic_irq_clear_pending)(uint32_t intid);
+ bool (*gic_irq_get_pending)(uint32_t intid);
+ void (*gic_irq_set_config)(uint32_t intid, bool is_edge);
+};
+
+extern const struct gic_common_ops gicv3_ops;
+
+#endif /* SELFTEST_KVM_GIC_PRIVATE_H */
diff --git a/tools/testing/selftests/kvm/lib/aarch64/gic_v3.c b/tools/testing/selftests/kvm/lib/aarch64/gic_v3.c
new file mode 100644
index 000000000000..263bf3ed8fd5
--- /dev/null
+++ b/tools/testing/selftests/kvm/lib/aarch64/gic_v3.c
@@ -0,0 +1,398 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * ARM Generic Interrupt Controller (GIC) v3 support
+ */
+
+#include <linux/sizes.h>
+
+#include "kvm_util.h"
+#include "processor.h"
+#include "delay.h"
+
+#include "gic_v3.h"
+#include "gic_private.h"
+
+struct gicv3_data {
+ void *dist_base;
+ void *redist_base[GICV3_MAX_CPUS];
+ unsigned int nr_cpus;
+ unsigned int nr_spis;
+};
+
+#define sgi_base_from_redist(redist_base) (redist_base + SZ_64K)
+#define DIST_BIT (1U << 31)
+
+enum gicv3_intid_range {
+ SGI_RANGE,
+ PPI_RANGE,
+ SPI_RANGE,
+ INVALID_RANGE,
+};
+
+static struct gicv3_data gicv3_data;
+
+static void gicv3_gicd_wait_for_rwp(void)
+{
+ unsigned int count = 100000; /* 1s */
+
+ while (readl(gicv3_data.dist_base + GICD_CTLR) & GICD_CTLR_RWP) {
+ GUEST_ASSERT(count--);
+ udelay(10);
+ }
+}
+
+static void gicv3_gicr_wait_for_rwp(void *redist_base)
+{
+ unsigned int count = 100000; /* 1s */
+
+ while (readl(redist_base + GICR_CTLR) & GICR_CTLR_RWP) {
+ GUEST_ASSERT(count--);
+ udelay(10);
+ }
+}
+
+static void gicv3_wait_for_rwp(uint32_t cpu_or_dist)
+{
+ if (cpu_or_dist & DIST_BIT)
+ gicv3_gicd_wait_for_rwp();
+ else
+ gicv3_gicr_wait_for_rwp(gicv3_data.redist_base[cpu_or_dist]);
+}
+
+static enum gicv3_intid_range get_intid_range(unsigned int intid)
+{
+ switch (intid) {
+ case 0 ... 15:
+ return SGI_RANGE;
+ case 16 ... 31:
+ return PPI_RANGE;
+ case 32 ... 1019:
+ return SPI_RANGE;
+ }
+
+ /* We should not be reaching here */
+ GUEST_ASSERT(0);
+
+ return INVALID_RANGE;
+}
+
+static uint64_t gicv3_read_iar(void)
+{
+ uint64_t irqstat = read_sysreg_s(SYS_ICC_IAR1_EL1);
+
+ dsb(sy);
+ return irqstat;
+}
+
+static void gicv3_write_eoir(uint32_t irq)
+{
+ write_sysreg_s(irq, SYS_ICC_EOIR1_EL1);
+ isb();
+}
+
+static void gicv3_write_dir(uint32_t irq)
+{
+ write_sysreg_s(irq, SYS_ICC_DIR_EL1);
+ isb();
+}
+
+static void gicv3_set_priority_mask(uint64_t mask)
+{
+ write_sysreg_s(mask, SYS_ICC_PMR_EL1);
+}
+
+static void gicv3_set_eoi_split(bool split)
+{
+ uint32_t val;
+
+ /*
+ * All other fields are read-only, so no need to read CTLR first. In
+ * fact, the kernel does the same.
+ */
+ val = split ? (1U << 1) : 0;
+ write_sysreg_s(val, SYS_ICC_CTLR_EL1);
+ isb();
+}
+
+uint32_t gicv3_reg_readl(uint32_t cpu_or_dist, uint64_t offset)
+{
+ void *base = cpu_or_dist & DIST_BIT ? gicv3_data.dist_base
+ : sgi_base_from_redist(gicv3_data.redist_base[cpu_or_dist]);
+ return readl(base + offset);
+}
+
+void gicv3_reg_writel(uint32_t cpu_or_dist, uint64_t offset, uint32_t reg_val)
+{
+ void *base = cpu_or_dist & DIST_BIT ? gicv3_data.dist_base
+ : sgi_base_from_redist(gicv3_data.redist_base[cpu_or_dist]);
+ writel(reg_val, base + offset);
+}
+
+uint32_t gicv3_getl_fields(uint32_t cpu_or_dist, uint64_t offset, uint32_t mask)
+{
+ return gicv3_reg_readl(cpu_or_dist, offset) & mask;
+}
+
+void gicv3_setl_fields(uint32_t cpu_or_dist, uint64_t offset,
+ uint32_t mask, uint32_t reg_val)
+{
+ uint32_t tmp = gicv3_reg_readl(cpu_or_dist, offset) & ~mask;
+
+ tmp |= (reg_val & mask);
+ gicv3_reg_writel(cpu_or_dist, offset, tmp);
+}
+
+/*
+ * We use a single offset for the distributor and redistributor maps as they
+ * have the same value in both. The only exceptions are registers that only
+ * exist in one and not the other, like GICR_WAKER that doesn't exist in the
+ * distributor map. Such registers are conveniently marked as reserved in the
+ * map that doesn't implement it; like GICR_WAKER's offset of 0x0014 being
+ * marked as "Reserved" in the Distributor map.
+ */
+static void gicv3_access_reg(uint32_t intid, uint64_t offset,
+ uint32_t reg_bits, uint32_t bits_per_field,
+ bool write, uint32_t *val)
+{
+ uint32_t cpu = guest_get_vcpuid();
+ enum gicv3_intid_range intid_range = get_intid_range(intid);
+ uint32_t fields_per_reg, index, mask, shift;
+ uint32_t cpu_or_dist;
+
+ GUEST_ASSERT(bits_per_field <= reg_bits);
+ GUEST_ASSERT(!write || *val < (1U << bits_per_field));
+ /*
+ * This function does not support 64 bit accesses. Just asserting here
+ * until we implement readq/writeq.
+ */
+ GUEST_ASSERT(reg_bits == 32);
+
+ fields_per_reg = reg_bits / bits_per_field;
+ index = intid % fields_per_reg;
+ shift = index * bits_per_field;
+ mask = ((1U << bits_per_field) - 1) << shift;
+
+ /* Set offset to the actual register holding intid's config. */
+ offset += (intid / fields_per_reg) * (reg_bits / 8);
+
+ cpu_or_dist = (intid_range == SPI_RANGE) ? DIST_BIT : cpu;
+
+ if (write)
+ gicv3_setl_fields(cpu_or_dist, offset, mask, *val << shift);
+ *val = gicv3_getl_fields(cpu_or_dist, offset, mask) >> shift;
+}
+
+static void gicv3_write_reg(uint32_t intid, uint64_t offset,
+ uint32_t reg_bits, uint32_t bits_per_field, uint32_t val)
+{
+ gicv3_access_reg(intid, offset, reg_bits,
+ bits_per_field, true, &val);
+}
+
+static uint32_t gicv3_read_reg(uint32_t intid, uint64_t offset,
+ uint32_t reg_bits, uint32_t bits_per_field)
+{
+ uint32_t val;
+
+ gicv3_access_reg(intid, offset, reg_bits,
+ bits_per_field, false, &val);
+ return val;
+}
+
+static void gicv3_set_priority(uint32_t intid, uint32_t prio)
+{
+ gicv3_write_reg(intid, GICD_IPRIORITYR, 32, 8, prio);
+}
+
+/* Sets the intid to be level-sensitive or edge-triggered. */
+static void gicv3_irq_set_config(uint32_t intid, bool is_edge)
+{
+ uint32_t val;
+
+ /* N/A for private interrupts. */
+ GUEST_ASSERT(get_intid_range(intid) == SPI_RANGE);
+ val = is_edge ? 2 : 0;
+ gicv3_write_reg(intid, GICD_ICFGR, 32, 2, val);
+}
+
+static void gicv3_irq_enable(uint32_t intid)
+{
+ bool is_spi = get_intid_range(intid) == SPI_RANGE;
+ uint32_t cpu = guest_get_vcpuid();
+
+ gicv3_write_reg(intid, GICD_ISENABLER, 32, 1, 1);
+ gicv3_wait_for_rwp(is_spi ? DIST_BIT : cpu);
+}
+
+static void gicv3_irq_disable(uint32_t intid)
+{
+ bool is_spi = get_intid_range(intid) == SPI_RANGE;
+ uint32_t cpu = guest_get_vcpuid();
+
+ gicv3_write_reg(intid, GICD_ICENABLER, 32, 1, 1);
+ gicv3_wait_for_rwp(is_spi ? DIST_BIT : cpu);
+}
+
+static void gicv3_irq_set_active(uint32_t intid)
+{
+ gicv3_write_reg(intid, GICD_ISACTIVER, 32, 1, 1);
+}
+
+static void gicv3_irq_clear_active(uint32_t intid)
+{
+ gicv3_write_reg(intid, GICD_ICACTIVER, 32, 1, 1);
+}
+
+static bool gicv3_irq_get_active(uint32_t intid)
+{
+ return gicv3_read_reg(intid, GICD_ISACTIVER, 32, 1);
+}
+
+static void gicv3_irq_set_pending(uint32_t intid)
+{
+ gicv3_write_reg(intid, GICD_ISPENDR, 32, 1, 1);
+}
+
+static void gicv3_irq_clear_pending(uint32_t intid)
+{
+ gicv3_write_reg(intid, GICD_ICPENDR, 32, 1, 1);
+}
+
+static bool gicv3_irq_get_pending(uint32_t intid)
+{
+ return gicv3_read_reg(intid, GICD_ISPENDR, 32, 1);
+}
+
+static void gicv3_enable_redist(void *redist_base)
+{
+ uint32_t val = readl(redist_base + GICR_WAKER);
+ unsigned int count = 100000; /* 1s */
+
+ val &= ~GICR_WAKER_ProcessorSleep;
+ writel(val, redist_base + GICR_WAKER);
+
+ /* Wait until the processor is 'active' */
+ while (readl(redist_base + GICR_WAKER) & GICR_WAKER_ChildrenAsleep) {
+ GUEST_ASSERT(count--);
+ udelay(10);
+ }
+}
+
+static inline void *gicr_base_cpu(void *redist_base, uint32_t cpu)
+{
+ /* Align all the redistributors sequentially */
+ return redist_base + cpu * SZ_64K * 2;
+}
+
+static void gicv3_cpu_init(unsigned int cpu, void *redist_base)
+{
+ void *sgi_base;
+ unsigned int i;
+ void *redist_base_cpu;
+
+ GUEST_ASSERT(cpu < gicv3_data.nr_cpus);
+
+ redist_base_cpu = gicr_base_cpu(redist_base, cpu);
+ sgi_base = sgi_base_from_redist(redist_base_cpu);
+
+ gicv3_enable_redist(redist_base_cpu);
+
+ /*
+ * Mark all the SGI and PPI interrupts as non-secure Group-1.
+ * Also, deactivate and disable them.
+ */
+ writel(~0, sgi_base + GICR_IGROUPR0);
+ writel(~0, sgi_base + GICR_ICACTIVER0);
+ writel(~0, sgi_base + GICR_ICENABLER0);
+
+ /* Set a default priority for all the SGIs and PPIs */
+ for (i = 0; i < 32; i += 4)
+ writel(GICD_INT_DEF_PRI_X4,
+ sgi_base + GICR_IPRIORITYR0 + i);
+
+ gicv3_gicr_wait_for_rwp(redist_base_cpu);
+
+ /* Enable the GIC system register (ICC_*) access */
+ write_sysreg_s(read_sysreg_s(SYS_ICC_SRE_EL1) | ICC_SRE_EL1_SRE,
+ SYS_ICC_SRE_EL1);
+
+ /* Set a default priority threshold */
+ write_sysreg_s(ICC_PMR_DEF_PRIO, SYS_ICC_PMR_EL1);
+
+ /* Enable non-secure Group-1 interrupts */
+ write_sysreg_s(ICC_IGRPEN1_EL1_ENABLE, SYS_ICC_GRPEN1_EL1);
+
+ gicv3_data.redist_base[cpu] = redist_base_cpu;
+}
+
+static void gicv3_dist_init(void)
+{
+ void *dist_base = gicv3_data.dist_base;
+ unsigned int i;
+
+ /* Disable the distributor until we set things up */
+ writel(0, dist_base + GICD_CTLR);
+ gicv3_gicd_wait_for_rwp();
+
+ /*
+ * Mark all the SPI interrupts as non-secure Group-1.
+ * Also, deactivate and disable them.
+ */
+ for (i = 32; i < gicv3_data.nr_spis; i += 32) {
+ writel(~0, dist_base + GICD_IGROUPR + i / 8);
+ writel(~0, dist_base + GICD_ICACTIVER + i / 8);
+ writel(~0, dist_base + GICD_ICENABLER + i / 8);
+ }
+
+ /* Set a default priority for all the SPIs */
+ for (i = 32; i < gicv3_data.nr_spis; i += 4)
+ writel(GICD_INT_DEF_PRI_X4,
+ dist_base + GICD_IPRIORITYR + i);
+
+ /* Wait for the settings to sync-in */
+ gicv3_gicd_wait_for_rwp();
+
+ /* Finally, enable the distributor globally with ARE */
+ writel(GICD_CTLR_ARE_NS | GICD_CTLR_ENABLE_G1A |
+ GICD_CTLR_ENABLE_G1, dist_base + GICD_CTLR);
+ gicv3_gicd_wait_for_rwp();
+}
+
+static void gicv3_init(unsigned int nr_cpus, void *dist_base)
+{
+ GUEST_ASSERT(nr_cpus <= GICV3_MAX_CPUS);
+
+ gicv3_data.nr_cpus = nr_cpus;
+ gicv3_data.dist_base = dist_base;
+ gicv3_data.nr_spis = GICD_TYPER_SPIS(
+ readl(gicv3_data.dist_base + GICD_TYPER));
+ if (gicv3_data.nr_spis > 1020)
+ gicv3_data.nr_spis = 1020;
+
+ /*
+ * Initialize only the distributor for now.
+ * The redistributor and CPU interfaces are initialized
+ * later for every PE.
+ */
+ gicv3_dist_init();
+}
+
+const struct gic_common_ops gicv3_ops = {
+ .gic_init = gicv3_init,
+ .gic_cpu_init = gicv3_cpu_init,
+ .gic_irq_enable = gicv3_irq_enable,
+ .gic_irq_disable = gicv3_irq_disable,
+ .gic_read_iar = gicv3_read_iar,
+ .gic_write_eoir = gicv3_write_eoir,
+ .gic_write_dir = gicv3_write_dir,
+ .gic_set_priority_mask = gicv3_set_priority_mask,
+ .gic_set_eoi_split = gicv3_set_eoi_split,
+ .gic_set_priority = gicv3_set_priority,
+ .gic_irq_set_active = gicv3_irq_set_active,
+ .gic_irq_clear_active = gicv3_irq_clear_active,
+ .gic_irq_get_active = gicv3_irq_get_active,
+ .gic_irq_set_pending = gicv3_irq_set_pending,
+ .gic_irq_clear_pending = gicv3_irq_clear_pending,
+ .gic_irq_get_pending = gicv3_irq_get_pending,
+ .gic_irq_set_config = gicv3_irq_set_config,
+};
diff --git a/tools/testing/selftests/kvm/lib/aarch64/handlers.S b/tools/testing/selftests/kvm/lib/aarch64/handlers.S
new file mode 100644
index 000000000000..0e443eadfac6
--- /dev/null
+++ b/tools/testing/selftests/kvm/lib/aarch64/handlers.S
@@ -0,0 +1,126 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+.macro save_registers
+ add sp, sp, #-16 * 17
+
+ stp x0, x1, [sp, #16 * 0]
+ stp x2, x3, [sp, #16 * 1]
+ stp x4, x5, [sp, #16 * 2]
+ stp x6, x7, [sp, #16 * 3]
+ stp x8, x9, [sp, #16 * 4]
+ stp x10, x11, [sp, #16 * 5]
+ stp x12, x13, [sp, #16 * 6]
+ stp x14, x15, [sp, #16 * 7]
+ stp x16, x17, [sp, #16 * 8]
+ stp x18, x19, [sp, #16 * 9]
+ stp x20, x21, [sp, #16 * 10]
+ stp x22, x23, [sp, #16 * 11]
+ stp x24, x25, [sp, #16 * 12]
+ stp x26, x27, [sp, #16 * 13]
+ stp x28, x29, [sp, #16 * 14]
+
+ /*
+ * This stores sp_el1 into ex_regs.sp so exception handlers can "look"
+ * at it. It will _not_ be used to restore the sp on return from the
+ * exception so handlers can not update it.
+ */
+ add x1, sp, #16 * 17
+ stp x30, x1, [sp, #16 * 15] /* x30, SP */
+
+ mrs x1, elr_el1
+ mrs x2, spsr_el1
+ stp x1, x2, [sp, #16 * 16] /* PC, PSTATE */
+.endm
+
+.macro restore_registers
+ ldp x1, x2, [sp, #16 * 16] /* PC, PSTATE */
+ msr elr_el1, x1
+ msr spsr_el1, x2
+
+ /* sp is not restored */
+ ldp x30, xzr, [sp, #16 * 15] /* x30, SP */
+
+ ldp x28, x29, [sp, #16 * 14]
+ ldp x26, x27, [sp, #16 * 13]
+ ldp x24, x25, [sp, #16 * 12]
+ ldp x22, x23, [sp, #16 * 11]
+ ldp x20, x21, [sp, #16 * 10]
+ ldp x18, x19, [sp, #16 * 9]
+ ldp x16, x17, [sp, #16 * 8]
+ ldp x14, x15, [sp, #16 * 7]
+ ldp x12, x13, [sp, #16 * 6]
+ ldp x10, x11, [sp, #16 * 5]
+ ldp x8, x9, [sp, #16 * 4]
+ ldp x6, x7, [sp, #16 * 3]
+ ldp x4, x5, [sp, #16 * 2]
+ ldp x2, x3, [sp, #16 * 1]
+ ldp x0, x1, [sp, #16 * 0]
+
+ add sp, sp, #16 * 17
+
+ eret
+.endm
+
+.pushsection ".entry.text", "ax"
+.balign 0x800
+.global vectors
+vectors:
+.popsection
+
+.set vector, 0
+
+/*
+ * Build an exception handler for vector and append a jump to it into
+ * vectors (while making sure that it's 0x80 aligned).
+ */
+.macro HANDLER, label
+handler_\label:
+ save_registers
+ mov x0, sp
+ mov x1, #vector
+ bl route_exception
+ restore_registers
+
+.pushsection ".entry.text", "ax"
+.balign 0x80
+ b handler_\label
+.popsection
+
+.set vector, vector + 1
+.endm
+
+.macro HANDLER_INVALID
+.pushsection ".entry.text", "ax"
+.balign 0x80
+/* This will abort so no need to save and restore registers. */
+ mov x0, #vector
+ mov x1, #0 /* ec */
+ mov x2, #0 /* valid_ec */
+ b kvm_exit_unexpected_exception
+.popsection
+
+.set vector, vector + 1
+.endm
+
+/*
+ * Caution: be sure to not add anything between the declaration of vectors
+ * above and these macro calls that will build the vectors table below it.
+ */
+ HANDLER_INVALID // Synchronous EL1t
+ HANDLER_INVALID // IRQ EL1t
+ HANDLER_INVALID // FIQ EL1t
+ HANDLER_INVALID // Error EL1t
+
+ HANDLER el1h_sync // Synchronous EL1h
+ HANDLER el1h_irq // IRQ EL1h
+ HANDLER el1h_fiq // FIQ EL1h
+ HANDLER el1h_error // Error EL1h
+
+ HANDLER el0_sync_64 // Synchronous 64-bit EL0
+ HANDLER el0_irq_64 // IRQ 64-bit EL0
+ HANDLER el0_fiq_64 // FIQ 64-bit EL0
+ HANDLER el0_error_64 // Error 64-bit EL0
+
+ HANDLER el0_sync_32 // Synchronous 32-bit EL0
+ HANDLER el0_irq_32 // IRQ 32-bit EL0
+ HANDLER el0_fiq_32 // FIQ 32-bit EL0
+ HANDLER el0_error_32 // Error 32-bit EL0
diff --git a/tools/testing/selftests/kvm/lib/aarch64/processor.c b/tools/testing/selftests/kvm/lib/aarch64/processor.c
index 2afa6618b396..a9eb17295be4 100644
--- a/tools/testing/selftests/kvm/lib/aarch64/processor.c
+++ b/tools/testing/selftests/kvm/lib/aarch64/processor.c
@@ -5,17 +5,19 @@
* Copyright (C) 2018, Red Hat, Inc.
*/
-#define _GNU_SOURCE /* for program_invocation_name */
-
#include <linux/compiler.h>
+#include <assert.h>
+#include "guest_modes.h"
#include "kvm_util.h"
-#include "../kvm_util_internal.h"
#include "processor.h"
+#include <linux/bitfield.h>
+#include <linux/sizes.h>
-#define KVM_GUEST_PAGE_TABLE_MIN_PADDR 0x180000
#define DEFAULT_ARM64_GUEST_STACK_VADDR_MIN 0xac0000
+static vm_vaddr_t exception_handlers;
+
static uint64_t page_align(struct kvm_vm *vm, uint64_t v)
{
return (v + vm->page_size) & ~(vm->page_size - 1);
@@ -57,10 +59,44 @@ static uint64_t pte_index(struct kvm_vm *vm, vm_vaddr_t gva)
return (gva >> vm->page_shift) & mask;
}
-static uint64_t pte_addr(struct kvm_vm *vm, uint64_t entry)
+static inline bool use_lpa2_pte_format(struct kvm_vm *vm)
+{
+ return (vm->page_size == SZ_4K || vm->page_size == SZ_16K) &&
+ (vm->pa_bits > 48 || vm->va_bits > 48);
+}
+
+static uint64_t addr_pte(struct kvm_vm *vm, uint64_t pa, uint64_t attrs)
+{
+ uint64_t pte;
+
+ if (use_lpa2_pte_format(vm)) {
+ pte = pa & GENMASK(49, vm->page_shift);
+ pte |= FIELD_GET(GENMASK(51, 50), pa) << 8;
+ attrs &= ~GENMASK(9, 8);
+ } else {
+ pte = pa & GENMASK(47, vm->page_shift);
+ if (vm->page_shift == 16)
+ pte |= FIELD_GET(GENMASK(51, 48), pa) << 12;
+ }
+ pte |= attrs;
+
+ return pte;
+}
+
+static uint64_t pte_addr(struct kvm_vm *vm, uint64_t pte)
{
- uint64_t mask = ((1UL << (vm->va_bits - vm->page_shift)) - 1) << vm->page_shift;
- return entry & mask;
+ uint64_t pa;
+
+ if (use_lpa2_pte_format(vm)) {
+ pa = pte & GENMASK(49, vm->page_shift);
+ pa |= FIELD_GET(GENMASK(9, 8), pte) << 50;
+ } else {
+ pa = pte & GENMASK(47, vm->page_shift);
+ if (vm->page_shift == 16)
+ pa |= FIELD_GET(GENMASK(15, 12), pte) << 48;
+ }
+
+ return pa;
}
static uint64_t ptrs_per_pgd(struct kvm_vm *vm)
@@ -74,19 +110,21 @@ static uint64_t __maybe_unused ptrs_per_pte(struct kvm_vm *vm)
return 1 << (vm->page_shift - 3);
}
-void virt_pgd_alloc(struct kvm_vm *vm, uint32_t pgd_memslot)
+void virt_arch_pgd_alloc(struct kvm_vm *vm)
{
- if (!vm->pgd_created) {
- vm_paddr_t paddr = vm_phy_pages_alloc(vm,
- page_align(vm, ptrs_per_pgd(vm) * 8) / vm->page_size,
- KVM_GUEST_PAGE_TABLE_MIN_PADDR, pgd_memslot);
- vm->pgd = paddr;
- vm->pgd_created = true;
- }
+ size_t nr_pages = page_align(vm, ptrs_per_pgd(vm) * 8) / vm->page_size;
+
+ if (vm->pgd_created)
+ return;
+
+ vm->pgd = vm_phy_pages_alloc(vm, nr_pages,
+ KVM_GUEST_PAGE_TABLE_MIN_PADDR,
+ vm->memslots[MEM_REGION_PT]);
+ vm->pgd_created = true;
}
-void _virt_pg_map(struct kvm_vm *vm, uint64_t vaddr, uint64_t paddr,
- uint32_t pgd_memslot, uint64_t flags)
+static void _virt_pg_map(struct kvm_vm *vm, uint64_t vaddr, uint64_t paddr,
+ uint64_t flags)
{
uint8_t attr_idx = flags & 7;
uint64_t *ptep;
@@ -106,25 +144,19 @@ void _virt_pg_map(struct kvm_vm *vm, uint64_t vaddr, uint64_t paddr,
paddr, vm->max_gfn, vm->page_size);
ptep = addr_gpa2hva(vm, vm->pgd) + pgd_index(vm, vaddr) * 8;
- if (!*ptep) {
- *ptep = vm_phy_page_alloc(vm, KVM_GUEST_PAGE_TABLE_MIN_PADDR, pgd_memslot);
- *ptep |= 3;
- }
+ if (!*ptep)
+ *ptep = addr_pte(vm, vm_alloc_page_table(vm), 3);
switch (vm->pgtable_levels) {
case 4:
ptep = addr_gpa2hva(vm, pte_addr(vm, *ptep)) + pud_index(vm, vaddr) * 8;
- if (!*ptep) {
- *ptep = vm_phy_page_alloc(vm, KVM_GUEST_PAGE_TABLE_MIN_PADDR, pgd_memslot);
- *ptep |= 3;
- }
+ if (!*ptep)
+ *ptep = addr_pte(vm, vm_alloc_page_table(vm), 3);
/* fall through */
case 3:
ptep = addr_gpa2hva(vm, pte_addr(vm, *ptep)) + pmd_index(vm, vaddr) * 8;
- if (!*ptep) {
- *ptep = vm_phy_page_alloc(vm, KVM_GUEST_PAGE_TABLE_MIN_PADDR, pgd_memslot);
- *ptep |= 3;
- }
+ if (!*ptep)
+ *ptep = addr_pte(vm, vm_alloc_page_table(vm), 3);
/* fall through */
case 2:
ptep = addr_gpa2hva(vm, pte_addr(vm, *ptep)) + pte_index(vm, vaddr) * 8;
@@ -133,19 +165,17 @@ void _virt_pg_map(struct kvm_vm *vm, uint64_t vaddr, uint64_t paddr,
TEST_FAIL("Page table levels must be 2, 3, or 4");
}
- *ptep = paddr | 3;
- *ptep |= (attr_idx << 2) | (1 << 10) /* Access Flag */;
+ *ptep = addr_pte(vm, paddr, (attr_idx << 2) | (1 << 10) | 3); /* AF */
}
-void virt_pg_map(struct kvm_vm *vm, uint64_t vaddr, uint64_t paddr,
- uint32_t pgd_memslot)
+void virt_arch_pg_map(struct kvm_vm *vm, uint64_t vaddr, uint64_t paddr)
{
- uint64_t attr_idx = 4; /* NORMAL (See DEFAULT_MAIR_EL1) */
+ uint64_t attr_idx = MT_NORMAL;
- _virt_pg_map(vm, vaddr, paddr, pgd_memslot, attr_idx);
+ _virt_pg_map(vm, vaddr, paddr, attr_idx);
}
-vm_paddr_t addr_gva2gpa(struct kvm_vm *vm, vm_vaddr_t gva)
+uint64_t *virt_get_pte_hva(struct kvm_vm *vm, vm_vaddr_t gva)
{
uint64_t *ptep;
@@ -176,11 +206,18 @@ vm_paddr_t addr_gva2gpa(struct kvm_vm *vm, vm_vaddr_t gva)
TEST_FAIL("Page table levels must be 2, 3, or 4");
}
- return pte_addr(vm, *ptep) + (gva & (vm->page_size - 1));
+ return ptep;
unmapped_gva:
TEST_FAIL("No mapping for vm virtual address, gva: 0x%lx", gva);
- exit(1);
+ exit(EXIT_FAILURE);
+}
+
+vm_paddr_t addr_arch_gva2gpa(struct kvm_vm *vm, vm_vaddr_t gva)
+{
+ uint64_t *ptep = virt_get_pte_hva(vm, gva);
+
+ return pte_addr(vm, *ptep) + (gva & (vm->page_size - 1));
}
static void pte_dump(FILE *stream, struct kvm_vm *vm, uint8_t indent, uint64_t page, int level)
@@ -202,7 +239,7 @@ static void pte_dump(FILE *stream, struct kvm_vm *vm, uint8_t indent, uint64_t p
#endif
}
-void virt_dump(FILE *stream, struct kvm_vm *vm, uint8_t indent)
+void virt_arch_dump(FILE *stream, struct kvm_vm *vm, uint8_t indent)
{
int level = 4 - (vm->pgtable_levels - 1);
uint64_t pgd, *ptep;
@@ -219,25 +256,11 @@ void virt_dump(FILE *stream, struct kvm_vm *vm, uint8_t indent)
}
}
-struct kvm_vm *vm_create_default(uint32_t vcpuid, uint64_t extra_mem_pages,
- void *guest_code)
-{
- uint64_t ptrs_per_4k_pte = 512;
- uint64_t extra_pg_pages = (extra_mem_pages / ptrs_per_4k_pte) * 2;
- struct kvm_vm *vm;
-
- vm = vm_create(VM_MODE_DEFAULT, DEFAULT_GUEST_PHY_PAGES + extra_pg_pages, O_RDWR);
-
- kvm_vm_elf_load(vm, program_invocation_name, 0, 0);
- vm_vcpu_add_default(vm, vcpuid, guest_code);
-
- return vm;
-}
-
-void aarch64_vcpu_setup(struct kvm_vm *vm, int vcpuid, struct kvm_vcpu_init *init)
+void aarch64_vcpu_setup(struct kvm_vcpu *vcpu, struct kvm_vcpu_init *init)
{
struct kvm_vcpu_init default_init = { .target = -1, };
- uint64_t sctlr_el1, tcr_el1;
+ struct kvm_vm *vm = vcpu->vm;
+ uint64_t sctlr_el1, tcr_el1, ttbr0_el1;
if (!init)
init = &default_init;
@@ -248,44 +271,71 @@ void aarch64_vcpu_setup(struct kvm_vm *vm, int vcpuid, struct kvm_vcpu_init *ini
init->target = preferred.target;
}
- vcpu_ioctl(vm, vcpuid, KVM_ARM_VCPU_INIT, init);
+ vcpu_ioctl(vcpu, KVM_ARM_VCPU_INIT, init);
/*
* Enable FP/ASIMD to avoid trapping when accessing Q0-Q15
* registers, which the variable argument list macros do.
*/
- set_reg(vm, vcpuid, ARM64_SYS_REG(CPACR_EL1), 3 << 20);
+ vcpu_set_reg(vcpu, KVM_ARM64_SYS_REG(SYS_CPACR_EL1), 3 << 20);
- get_reg(vm, vcpuid, ARM64_SYS_REG(SCTLR_EL1), &sctlr_el1);
- get_reg(vm, vcpuid, ARM64_SYS_REG(TCR_EL1), &tcr_el1);
+ vcpu_get_reg(vcpu, KVM_ARM64_SYS_REG(SYS_SCTLR_EL1), &sctlr_el1);
+ vcpu_get_reg(vcpu, KVM_ARM64_SYS_REG(SYS_TCR_EL1), &tcr_el1);
+ /* Configure base granule size */
switch (vm->mode) {
- case VM_MODE_P52V48_4K:
- TEST_FAIL("AArch64 does not support 4K sized pages "
- "with 52-bit physical address ranges");
case VM_MODE_PXXV48_4K:
TEST_FAIL("AArch64 does not support 4K sized pages "
"with ANY-bit physical address ranges");
case VM_MODE_P52V48_64K:
+ case VM_MODE_P48V48_64K:
+ case VM_MODE_P40V48_64K:
+ case VM_MODE_P36V48_64K:
tcr_el1 |= 1ul << 14; /* TG0 = 64KB */
- tcr_el1 |= 6ul << 32; /* IPS = 52 bits */
break;
+ case VM_MODE_P52V48_16K:
+ case VM_MODE_P48V48_16K:
+ case VM_MODE_P40V48_16K:
+ case VM_MODE_P36V48_16K:
+ case VM_MODE_P36V47_16K:
+ tcr_el1 |= 2ul << 14; /* TG0 = 16KB */
+ break;
+ case VM_MODE_P52V48_4K:
case VM_MODE_P48V48_4K:
+ case VM_MODE_P40V48_4K:
+ case VM_MODE_P36V48_4K:
tcr_el1 |= 0ul << 14; /* TG0 = 4KB */
- tcr_el1 |= 5ul << 32; /* IPS = 48 bits */
break;
+ default:
+ TEST_FAIL("Unknown guest mode, mode: 0x%x", vm->mode);
+ }
+
+ ttbr0_el1 = vm->pgd & GENMASK(47, vm->page_shift);
+
+ /* Configure output size */
+ switch (vm->mode) {
+ case VM_MODE_P52V48_4K:
+ case VM_MODE_P52V48_16K:
+ case VM_MODE_P52V48_64K:
+ tcr_el1 |= 6ul << 32; /* IPS = 52 bits */
+ ttbr0_el1 |= FIELD_GET(GENMASK(51, 48), vm->pgd) << 2;
+ break;
+ case VM_MODE_P48V48_4K:
+ case VM_MODE_P48V48_16K:
case VM_MODE_P48V48_64K:
- tcr_el1 |= 1ul << 14; /* TG0 = 64KB */
tcr_el1 |= 5ul << 32; /* IPS = 48 bits */
break;
case VM_MODE_P40V48_4K:
- tcr_el1 |= 0ul << 14; /* TG0 = 4KB */
- tcr_el1 |= 2ul << 32; /* IPS = 40 bits */
- break;
+ case VM_MODE_P40V48_16K:
case VM_MODE_P40V48_64K:
- tcr_el1 |= 1ul << 14; /* TG0 = 64KB */
tcr_el1 |= 2ul << 32; /* IPS = 40 bits */
break;
+ case VM_MODE_P36V48_4K:
+ case VM_MODE_P36V48_16K:
+ case VM_MODE_P36V48_64K:
+ case VM_MODE_P36V47_16K:
+ tcr_el1 |= 1ul << 32; /* IPS = 36 bits */
+ break;
default:
TEST_FAIL("Unknown guest mode, mode: 0x%x", vm->mode);
}
@@ -294,59 +344,296 @@ void aarch64_vcpu_setup(struct kvm_vm *vm, int vcpuid, struct kvm_vcpu_init *ini
/* TCR_EL1 |= IRGN0:WBWA | ORGN0:WBWA | SH0:Inner-Shareable */;
tcr_el1 |= (1 << 8) | (1 << 10) | (3 << 12);
tcr_el1 |= (64 - vm->va_bits) /* T0SZ */;
-
- set_reg(vm, vcpuid, ARM64_SYS_REG(SCTLR_EL1), sctlr_el1);
- set_reg(vm, vcpuid, ARM64_SYS_REG(TCR_EL1), tcr_el1);
- set_reg(vm, vcpuid, ARM64_SYS_REG(MAIR_EL1), DEFAULT_MAIR_EL1);
- set_reg(vm, vcpuid, ARM64_SYS_REG(TTBR0_EL1), vm->pgd);
+ if (use_lpa2_pte_format(vm))
+ tcr_el1 |= (1ul << 59) /* DS */;
+
+ vcpu_set_reg(vcpu, KVM_ARM64_SYS_REG(SYS_SCTLR_EL1), sctlr_el1);
+ vcpu_set_reg(vcpu, KVM_ARM64_SYS_REG(SYS_TCR_EL1), tcr_el1);
+ vcpu_set_reg(vcpu, KVM_ARM64_SYS_REG(SYS_MAIR_EL1), DEFAULT_MAIR_EL1);
+ vcpu_set_reg(vcpu, KVM_ARM64_SYS_REG(SYS_TTBR0_EL1), ttbr0_el1);
+ vcpu_set_reg(vcpu, KVM_ARM64_SYS_REG(SYS_TPIDR_EL1), vcpu->id);
}
-void vcpu_dump(FILE *stream, struct kvm_vm *vm, uint32_t vcpuid, uint8_t indent)
+void vcpu_arch_dump(FILE *stream, struct kvm_vcpu *vcpu, uint8_t indent)
{
uint64_t pstate, pc;
- get_reg(vm, vcpuid, ARM64_CORE_REG(regs.pstate), &pstate);
- get_reg(vm, vcpuid, ARM64_CORE_REG(regs.pc), &pc);
+ vcpu_get_reg(vcpu, ARM64_CORE_REG(regs.pstate), &pstate);
+ vcpu_get_reg(vcpu, ARM64_CORE_REG(regs.pc), &pc);
fprintf(stream, "%*spstate: 0x%.16lx pc: 0x%.16lx\n",
indent, "", pstate, pc);
}
-void aarch64_vcpu_add_default(struct kvm_vm *vm, uint32_t vcpuid,
- struct kvm_vcpu_init *init, void *guest_code)
+void vcpu_arch_set_entry_point(struct kvm_vcpu *vcpu, void *guest_code)
+{
+ vcpu_set_reg(vcpu, ARM64_CORE_REG(regs.pc), (uint64_t)guest_code);
+}
+
+static struct kvm_vcpu *__aarch64_vcpu_add(struct kvm_vm *vm, uint32_t vcpu_id,
+ struct kvm_vcpu_init *init)
+{
+ size_t stack_size;
+ uint64_t stack_vaddr;
+ struct kvm_vcpu *vcpu = __vm_vcpu_add(vm, vcpu_id);
+
+ stack_size = vm->page_size == 4096 ? DEFAULT_STACK_PGS * vm->page_size :
+ vm->page_size;
+ stack_vaddr = __vm_vaddr_alloc(vm, stack_size,
+ DEFAULT_ARM64_GUEST_STACK_VADDR_MIN,
+ MEM_REGION_DATA);
+
+ aarch64_vcpu_setup(vcpu, init);
+
+ vcpu_set_reg(vcpu, ARM64_CORE_REG(sp_el1), stack_vaddr + stack_size);
+ return vcpu;
+}
+
+struct kvm_vcpu *aarch64_vcpu_add(struct kvm_vm *vm, uint32_t vcpu_id,
+ struct kvm_vcpu_init *init, void *guest_code)
{
- size_t stack_size = vm->page_size == 4096 ?
- DEFAULT_STACK_PGS * vm->page_size :
- vm->page_size;
- uint64_t stack_vaddr = vm_vaddr_alloc(vm, stack_size,
- DEFAULT_ARM64_GUEST_STACK_VADDR_MIN, 0, 0);
+ struct kvm_vcpu *vcpu = __aarch64_vcpu_add(vm, vcpu_id, init);
- vm_vcpu_add(vm, vcpuid);
- aarch64_vcpu_setup(vm, vcpuid, init);
+ vcpu_arch_set_entry_point(vcpu, guest_code);
- set_reg(vm, vcpuid, ARM64_CORE_REG(sp_el1), stack_vaddr + stack_size);
- set_reg(vm, vcpuid, ARM64_CORE_REG(regs.pc), (uint64_t)guest_code);
+ return vcpu;
}
-void vm_vcpu_add_default(struct kvm_vm *vm, uint32_t vcpuid, void *guest_code)
+struct kvm_vcpu *vm_arch_vcpu_add(struct kvm_vm *vm, uint32_t vcpu_id)
{
- aarch64_vcpu_add_default(vm, vcpuid, NULL, guest_code);
+ return __aarch64_vcpu_add(vm, vcpu_id, NULL);
}
-void vcpu_args_set(struct kvm_vm *vm, uint32_t vcpuid, unsigned int num, ...)
+void vcpu_args_set(struct kvm_vcpu *vcpu, unsigned int num, ...)
{
va_list ap;
int i;
TEST_ASSERT(num >= 1 && num <= 8, "Unsupported number of args,\n"
- " num: %u\n", num);
+ " num: %u", num);
va_start(ap, num);
for (i = 0; i < num; i++) {
- set_reg(vm, vcpuid, ARM64_CORE_REG(regs.regs[i]),
- va_arg(ap, uint64_t));
+ vcpu_set_reg(vcpu, ARM64_CORE_REG(regs.regs[i]),
+ va_arg(ap, uint64_t));
}
va_end(ap);
}
+
+void kvm_exit_unexpected_exception(int vector, uint64_t ec, bool valid_ec)
+{
+ ucall(UCALL_UNHANDLED, 3, vector, ec, valid_ec);
+ while (1)
+ ;
+}
+
+void assert_on_unhandled_exception(struct kvm_vcpu *vcpu)
+{
+ struct ucall uc;
+
+ if (get_ucall(vcpu, &uc) != UCALL_UNHANDLED)
+ return;
+
+ if (uc.args[2]) /* valid_ec */ {
+ assert(VECTOR_IS_SYNC(uc.args[0]));
+ TEST_FAIL("Unexpected exception (vector:0x%lx, ec:0x%lx)",
+ uc.args[0], uc.args[1]);
+ } else {
+ assert(!VECTOR_IS_SYNC(uc.args[0]));
+ TEST_FAIL("Unexpected exception (vector:0x%lx)",
+ uc.args[0]);
+ }
+}
+
+struct handlers {
+ handler_fn exception_handlers[VECTOR_NUM][ESR_EC_NUM];
+};
+
+void vcpu_init_descriptor_tables(struct kvm_vcpu *vcpu)
+{
+ extern char vectors;
+
+ vcpu_set_reg(vcpu, KVM_ARM64_SYS_REG(SYS_VBAR_EL1), (uint64_t)&vectors);
+}
+
+void route_exception(struct ex_regs *regs, int vector)
+{
+ struct handlers *handlers = (struct handlers *)exception_handlers;
+ bool valid_ec;
+ int ec = 0;
+
+ switch (vector) {
+ case VECTOR_SYNC_CURRENT:
+ case VECTOR_SYNC_LOWER_64:
+ ec = (read_sysreg(esr_el1) >> ESR_EC_SHIFT) & ESR_EC_MASK;
+ valid_ec = true;
+ break;
+ case VECTOR_IRQ_CURRENT:
+ case VECTOR_IRQ_LOWER_64:
+ case VECTOR_FIQ_CURRENT:
+ case VECTOR_FIQ_LOWER_64:
+ case VECTOR_ERROR_CURRENT:
+ case VECTOR_ERROR_LOWER_64:
+ ec = 0;
+ valid_ec = false;
+ break;
+ default:
+ valid_ec = false;
+ goto unexpected_exception;
+ }
+
+ if (handlers && handlers->exception_handlers[vector][ec])
+ return handlers->exception_handlers[vector][ec](regs);
+
+unexpected_exception:
+ kvm_exit_unexpected_exception(vector, ec, valid_ec);
+}
+
+void vm_init_descriptor_tables(struct kvm_vm *vm)
+{
+ vm->handlers = __vm_vaddr_alloc(vm, sizeof(struct handlers),
+ vm->page_size, MEM_REGION_DATA);
+
+ *(vm_vaddr_t *)addr_gva2hva(vm, (vm_vaddr_t)(&exception_handlers)) = vm->handlers;
+}
+
+void vm_install_sync_handler(struct kvm_vm *vm, int vector, int ec,
+ void (*handler)(struct ex_regs *))
+{
+ struct handlers *handlers = addr_gva2hva(vm, vm->handlers);
+
+ assert(VECTOR_IS_SYNC(vector));
+ assert(vector < VECTOR_NUM);
+ assert(ec < ESR_EC_NUM);
+ handlers->exception_handlers[vector][ec] = handler;
+}
+
+void vm_install_exception_handler(struct kvm_vm *vm, int vector,
+ void (*handler)(struct ex_regs *))
+{
+ struct handlers *handlers = addr_gva2hva(vm, vm->handlers);
+
+ assert(!VECTOR_IS_SYNC(vector));
+ assert(vector < VECTOR_NUM);
+ handlers->exception_handlers[vector][0] = handler;
+}
+
+uint32_t guest_get_vcpuid(void)
+{
+ return read_sysreg(tpidr_el1);
+}
+
+static uint32_t max_ipa_for_page_size(uint32_t vm_ipa, uint32_t gran,
+ uint32_t not_sup_val, uint32_t ipa52_min_val)
+{
+ if (gran == not_sup_val)
+ return 0;
+ else if (gran >= ipa52_min_val && vm_ipa >= 52)
+ return 52;
+ else
+ return min(vm_ipa, 48U);
+}
+
+void aarch64_get_supported_page_sizes(uint32_t ipa, uint32_t *ipa4k,
+ uint32_t *ipa16k, uint32_t *ipa64k)
+{
+ struct kvm_vcpu_init preferred_init;
+ int kvm_fd, vm_fd, vcpu_fd, err;
+ uint64_t val;
+ uint32_t gran;
+ struct kvm_one_reg reg = {
+ .id = KVM_ARM64_SYS_REG(SYS_ID_AA64MMFR0_EL1),
+ .addr = (uint64_t)&val,
+ };
+
+ kvm_fd = open_kvm_dev_path_or_exit();
+ vm_fd = __kvm_ioctl(kvm_fd, KVM_CREATE_VM, (void *)(unsigned long)ipa);
+ TEST_ASSERT(vm_fd >= 0, KVM_IOCTL_ERROR(KVM_CREATE_VM, vm_fd));
+
+ vcpu_fd = ioctl(vm_fd, KVM_CREATE_VCPU, 0);
+ TEST_ASSERT(vcpu_fd >= 0, KVM_IOCTL_ERROR(KVM_CREATE_VCPU, vcpu_fd));
+
+ err = ioctl(vm_fd, KVM_ARM_PREFERRED_TARGET, &preferred_init);
+ TEST_ASSERT(err == 0, KVM_IOCTL_ERROR(KVM_ARM_PREFERRED_TARGET, err));
+ err = ioctl(vcpu_fd, KVM_ARM_VCPU_INIT, &preferred_init);
+ TEST_ASSERT(err == 0, KVM_IOCTL_ERROR(KVM_ARM_VCPU_INIT, err));
+
+ err = ioctl(vcpu_fd, KVM_GET_ONE_REG, &reg);
+ TEST_ASSERT(err == 0, KVM_IOCTL_ERROR(KVM_GET_ONE_REG, vcpu_fd));
+
+ gran = FIELD_GET(ARM64_FEATURE_MASK(ID_AA64MMFR0_EL1_TGRAN4), val);
+ *ipa4k = max_ipa_for_page_size(ipa, gran, ID_AA64MMFR0_EL1_TGRAN4_NI,
+ ID_AA64MMFR0_EL1_TGRAN4_52_BIT);
+
+ gran = FIELD_GET(ARM64_FEATURE_MASK(ID_AA64MMFR0_EL1_TGRAN64), val);
+ *ipa64k = max_ipa_for_page_size(ipa, gran, ID_AA64MMFR0_EL1_TGRAN64_NI,
+ ID_AA64MMFR0_EL1_TGRAN64_IMP);
+
+ gran = FIELD_GET(ARM64_FEATURE_MASK(ID_AA64MMFR0_EL1_TGRAN16), val);
+ *ipa16k = max_ipa_for_page_size(ipa, gran, ID_AA64MMFR0_EL1_TGRAN16_NI,
+ ID_AA64MMFR0_EL1_TGRAN16_52_BIT);
+
+ close(vcpu_fd);
+ close(vm_fd);
+ close(kvm_fd);
+}
+
+#define __smccc_call(insn, function_id, arg0, arg1, arg2, arg3, arg4, arg5, \
+ arg6, res) \
+ asm volatile("mov w0, %w[function_id]\n" \
+ "mov x1, %[arg0]\n" \
+ "mov x2, %[arg1]\n" \
+ "mov x3, %[arg2]\n" \
+ "mov x4, %[arg3]\n" \
+ "mov x5, %[arg4]\n" \
+ "mov x6, %[arg5]\n" \
+ "mov x7, %[arg6]\n" \
+ #insn "#0\n" \
+ "mov %[res0], x0\n" \
+ "mov %[res1], x1\n" \
+ "mov %[res2], x2\n" \
+ "mov %[res3], x3\n" \
+ : [res0] "=r"(res->a0), [res1] "=r"(res->a1), \
+ [res2] "=r"(res->a2), [res3] "=r"(res->a3) \
+ : [function_id] "r"(function_id), [arg0] "r"(arg0), \
+ [arg1] "r"(arg1), [arg2] "r"(arg2), [arg3] "r"(arg3), \
+ [arg4] "r"(arg4), [arg5] "r"(arg5), [arg6] "r"(arg6) \
+ : "x0", "x1", "x2", "x3", "x4", "x5", "x6", "x7")
+
+
+void smccc_hvc(uint32_t function_id, uint64_t arg0, uint64_t arg1,
+ uint64_t arg2, uint64_t arg3, uint64_t arg4, uint64_t arg5,
+ uint64_t arg6, struct arm_smccc_res *res)
+{
+ __smccc_call(hvc, function_id, arg0, arg1, arg2, arg3, arg4, arg5,
+ arg6, res);
+}
+
+void smccc_smc(uint32_t function_id, uint64_t arg0, uint64_t arg1,
+ uint64_t arg2, uint64_t arg3, uint64_t arg4, uint64_t arg5,
+ uint64_t arg6, struct arm_smccc_res *res)
+{
+ __smccc_call(smc, function_id, arg0, arg1, arg2, arg3, arg4, arg5,
+ arg6, res);
+}
+
+void kvm_selftest_arch_init(void)
+{
+ /*
+ * arm64 doesn't have a true default mode, so start by computing the
+ * available IPA space and page sizes early.
+ */
+ guest_modes_append_default();
+}
+
+void vm_vaddr_populate_bitmap(struct kvm_vm *vm)
+{
+ /*
+ * arm64 selftests use only TTBR0_EL1, meaning that the valid VA space
+ * is [0, 2^(64 - TCR_EL1.T0SZ)).
+ */
+ sparsebit_set_num(vm->vpages_valid, 0,
+ (1ULL << vm->va_bits) >> vm->page_shift);
+}
diff --git a/tools/testing/selftests/kvm/lib/aarch64/spinlock.c b/tools/testing/selftests/kvm/lib/aarch64/spinlock.c
new file mode 100644
index 000000000000..a076e780be5d
--- /dev/null
+++ b/tools/testing/selftests/kvm/lib/aarch64/spinlock.c
@@ -0,0 +1,27 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * ARM64 Spinlock support
+ */
+#include <stdint.h>
+
+#include "spinlock.h"
+
+void spin_lock(struct spinlock *lock)
+{
+ int val, res;
+
+ asm volatile(
+ "1: ldaxr %w0, [%2]\n"
+ " cbnz %w0, 1b\n"
+ " mov %w0, #1\n"
+ " stxr %w1, %w0, [%2]\n"
+ " cbnz %w1, 1b\n"
+ : "=&r" (val), "=&r" (res)
+ : "r" (&lock->v)
+ : "memory");
+}
+
+void spin_unlock(struct spinlock *lock)
+{
+ asm volatile("stlr wzr, [%0]\n" : : "r" (&lock->v) : "memory");
+}
diff --git a/tools/testing/selftests/kvm/lib/aarch64/ucall.c b/tools/testing/selftests/kvm/lib/aarch64/ucall.c
index c8e0ec20d3bf..ddab0ce89d4d 100644
--- a/tools/testing/selftests/kvm/lib/aarch64/ucall.c
+++ b/tools/testing/selftests/kvm/lib/aarch64/ucall.c
@@ -5,108 +5,30 @@
* Copyright (C) 2018, Red Hat, Inc.
*/
#include "kvm_util.h"
-#include "../kvm_util_internal.h"
-static vm_vaddr_t *ucall_exit_mmio_addr;
+vm_vaddr_t *ucall_exit_mmio_addr;
-static bool ucall_mmio_init(struct kvm_vm *vm, vm_paddr_t gpa)
+void ucall_arch_init(struct kvm_vm *vm, vm_paddr_t mmio_gpa)
{
- if (kvm_userspace_memory_region_find(vm, gpa, gpa + 1))
- return false;
+ vm_vaddr_t mmio_gva = vm_vaddr_unused_gap(vm, vm->page_size, KVM_UTIL_MIN_VADDR);
- virt_pg_map(vm, gpa, gpa, 0);
+ virt_map(vm, mmio_gva, mmio_gpa, 1);
- ucall_exit_mmio_addr = (vm_vaddr_t *)gpa;
- sync_global_to_guest(vm, ucall_exit_mmio_addr);
+ vm->ucall_mmio_addr = mmio_gpa;
- return true;
+ write_guest_global(vm, ucall_exit_mmio_addr, (vm_vaddr_t *)mmio_gva);
}
-void ucall_init(struct kvm_vm *vm, void *arg)
+void *ucall_arch_get_ucall(struct kvm_vcpu *vcpu)
{
- vm_paddr_t gpa, start, end, step, offset;
- unsigned int bits;
- bool ret;
-
- if (arg) {
- gpa = (vm_paddr_t)arg;
- ret = ucall_mmio_init(vm, gpa);
- TEST_ASSERT(ret, "Can't set ucall mmio address to %lx", gpa);
- return;
- }
-
- /*
- * Find an address within the allowed physical and virtual address
- * spaces, that does _not_ have a KVM memory region associated with
- * it. Identity mapping an address like this allows the guest to
- * access it, but as KVM doesn't know what to do with it, it
- * will assume it's something userspace handles and exit with
- * KVM_EXIT_MMIO. Well, at least that's how it works for AArch64.
- * Here we start with a guess that the addresses around 5/8th
- * of the allowed space are unmapped and then work both down and
- * up from there in 1/16th allowed space sized steps.
- *
- * Note, we need to use VA-bits - 1 when calculating the allowed
- * virtual address space for an identity mapping because the upper
- * half of the virtual address space is the two's complement of the
- * lower and won't match physical addresses.
- */
- bits = vm->va_bits - 1;
- bits = vm->pa_bits < bits ? vm->pa_bits : bits;
- end = 1ul << bits;
- start = end * 5 / 8;
- step = end / 16;
- for (offset = 0; offset < end - start; offset += step) {
- if (ucall_mmio_init(vm, start - offset))
- return;
- if (ucall_mmio_init(vm, start + offset))
- return;
- }
- TEST_FAIL("Can't find a ucall mmio address");
-}
-
-void ucall_uninit(struct kvm_vm *vm)
-{
- ucall_exit_mmio_addr = 0;
- sync_global_to_guest(vm, ucall_exit_mmio_addr);
-}
-
-void ucall(uint64_t cmd, int nargs, ...)
-{
- struct ucall uc = {
- .cmd = cmd,
- };
- va_list va;
- int i;
-
- nargs = nargs <= UCALL_MAX_ARGS ? nargs : UCALL_MAX_ARGS;
-
- va_start(va, nargs);
- for (i = 0; i < nargs; ++i)
- uc.args[i] = va_arg(va, uint64_t);
- va_end(va);
-
- *ucall_exit_mmio_addr = (vm_vaddr_t)&uc;
-}
-
-uint64_t get_ucall(struct kvm_vm *vm, uint32_t vcpu_id, struct ucall *uc)
-{
- struct kvm_run *run = vcpu_state(vm, vcpu_id);
- struct ucall ucall = {};
+ struct kvm_run *run = vcpu->run;
if (run->exit_reason == KVM_EXIT_MMIO &&
- run->mmio.phys_addr == (uint64_t)ucall_exit_mmio_addr) {
- vm_vaddr_t gva;
-
- TEST_ASSERT(run->mmio.is_write && run->mmio.len == 8,
+ run->mmio.phys_addr == vcpu->vm->ucall_mmio_addr) {
+ TEST_ASSERT(run->mmio.is_write && run->mmio.len == sizeof(uint64_t),
"Unexpected ucall exit mmio address access");
- memcpy(&gva, run->mmio.data, sizeof(gva));
- memcpy(&ucall, addr_gva2hva(vm, gva), sizeof(ucall));
-
- vcpu_run_complete_io(vm, vcpu_id);
- if (uc)
- memcpy(uc, &ucall, sizeof(ucall));
+ return (void *)(*((uint64_t *)run->mmio.data));
}
- return ucall.cmd;
+ return NULL;
}
diff --git a/tools/testing/selftests/kvm/lib/aarch64/vgic.c b/tools/testing/selftests/kvm/lib/aarch64/vgic.c
new file mode 100644
index 000000000000..184378d593e9
--- /dev/null
+++ b/tools/testing/selftests/kvm/lib/aarch64/vgic.c
@@ -0,0 +1,170 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * ARM Generic Interrupt Controller (GIC) v3 host support
+ */
+
+#include <linux/kvm.h>
+#include <linux/sizes.h>
+#include <asm/kvm_para.h>
+#include <asm/kvm.h>
+
+#include "kvm_util.h"
+#include "vgic.h"
+#include "gic.h"
+#include "gic_v3.h"
+
+/*
+ * vGIC-v3 default host setup
+ *
+ * Input args:
+ * vm - KVM VM
+ * nr_vcpus - Number of vCPUs supported by this VM
+ * gicd_base_gpa - Guest Physical Address of the Distributor region
+ * gicr_base_gpa - Guest Physical Address of the Redistributor region
+ *
+ * Output args: None
+ *
+ * Return: GIC file-descriptor or negative error code upon failure
+ *
+ * The function creates a vGIC-v3 device and maps the distributor and
+ * redistributor regions of the guest. Since it depends on the number of
+ * vCPUs for the VM, it must be called after all the vCPUs have been created.
+ */
+int vgic_v3_setup(struct kvm_vm *vm, unsigned int nr_vcpus, uint32_t nr_irqs,
+ uint64_t gicd_base_gpa, uint64_t gicr_base_gpa)
+{
+ int gic_fd;
+ uint64_t redist_attr;
+ struct list_head *iter;
+ unsigned int nr_gic_pages, nr_vcpus_created = 0;
+
+ TEST_ASSERT(nr_vcpus, "Number of vCPUs cannot be empty");
+
+ /*
+ * Make sure that the caller is infact calling this
+ * function after all the vCPUs are added.
+ */
+ list_for_each(iter, &vm->vcpus)
+ nr_vcpus_created++;
+ TEST_ASSERT(nr_vcpus == nr_vcpus_created,
+ "Number of vCPUs requested (%u) doesn't match with the ones created for the VM (%u)",
+ nr_vcpus, nr_vcpus_created);
+
+ /* Distributor setup */
+ gic_fd = __kvm_create_device(vm, KVM_DEV_TYPE_ARM_VGIC_V3);
+ if (gic_fd < 0)
+ return gic_fd;
+
+ kvm_device_attr_set(gic_fd, KVM_DEV_ARM_VGIC_GRP_NR_IRQS, 0, &nr_irqs);
+
+ kvm_device_attr_set(gic_fd, KVM_DEV_ARM_VGIC_GRP_CTRL,
+ KVM_DEV_ARM_VGIC_CTRL_INIT, NULL);
+
+ kvm_device_attr_set(gic_fd, KVM_DEV_ARM_VGIC_GRP_ADDR,
+ KVM_VGIC_V3_ADDR_TYPE_DIST, &gicd_base_gpa);
+ nr_gic_pages = vm_calc_num_guest_pages(vm->mode, KVM_VGIC_V3_DIST_SIZE);
+ virt_map(vm, gicd_base_gpa, gicd_base_gpa, nr_gic_pages);
+
+ /* Redistributor setup */
+ redist_attr = REDIST_REGION_ATTR_ADDR(nr_vcpus, gicr_base_gpa, 0, 0);
+ kvm_device_attr_set(gic_fd, KVM_DEV_ARM_VGIC_GRP_ADDR,
+ KVM_VGIC_V3_ADDR_TYPE_REDIST_REGION, &redist_attr);
+ nr_gic_pages = vm_calc_num_guest_pages(vm->mode,
+ KVM_VGIC_V3_REDIST_SIZE * nr_vcpus);
+ virt_map(vm, gicr_base_gpa, gicr_base_gpa, nr_gic_pages);
+
+ kvm_device_attr_set(gic_fd, KVM_DEV_ARM_VGIC_GRP_CTRL,
+ KVM_DEV_ARM_VGIC_CTRL_INIT, NULL);
+
+ return gic_fd;
+}
+
+/* should only work for level sensitive interrupts */
+int _kvm_irq_set_level_info(int gic_fd, uint32_t intid, int level)
+{
+ uint64_t attr = 32 * (intid / 32);
+ uint64_t index = intid % 32;
+ uint64_t val;
+ int ret;
+
+ ret = __kvm_device_attr_get(gic_fd, KVM_DEV_ARM_VGIC_GRP_LEVEL_INFO,
+ attr, &val);
+ if (ret != 0)
+ return ret;
+
+ val |= 1U << index;
+ ret = __kvm_device_attr_set(gic_fd, KVM_DEV_ARM_VGIC_GRP_LEVEL_INFO,
+ attr, &val);
+ return ret;
+}
+
+void kvm_irq_set_level_info(int gic_fd, uint32_t intid, int level)
+{
+ int ret = _kvm_irq_set_level_info(gic_fd, intid, level);
+
+ TEST_ASSERT(!ret, KVM_IOCTL_ERROR(KVM_DEV_ARM_VGIC_GRP_LEVEL_INFO, ret));
+}
+
+int _kvm_arm_irq_line(struct kvm_vm *vm, uint32_t intid, int level)
+{
+ uint32_t irq = intid & KVM_ARM_IRQ_NUM_MASK;
+
+ TEST_ASSERT(!INTID_IS_SGI(intid), "KVM_IRQ_LINE's interface itself "
+ "doesn't allow injecting SGIs. There's no mask for it.");
+
+ if (INTID_IS_PPI(intid))
+ irq |= KVM_ARM_IRQ_TYPE_PPI << KVM_ARM_IRQ_TYPE_SHIFT;
+ else
+ irq |= KVM_ARM_IRQ_TYPE_SPI << KVM_ARM_IRQ_TYPE_SHIFT;
+
+ return _kvm_irq_line(vm, irq, level);
+}
+
+void kvm_arm_irq_line(struct kvm_vm *vm, uint32_t intid, int level)
+{
+ int ret = _kvm_arm_irq_line(vm, intid, level);
+
+ TEST_ASSERT(!ret, KVM_IOCTL_ERROR(KVM_IRQ_LINE, ret));
+}
+
+static void vgic_poke_irq(int gic_fd, uint32_t intid, struct kvm_vcpu *vcpu,
+ uint64_t reg_off)
+{
+ uint64_t reg = intid / 32;
+ uint64_t index = intid % 32;
+ uint64_t attr = reg_off + reg * 4;
+ uint64_t val;
+ bool intid_is_private = INTID_IS_SGI(intid) || INTID_IS_PPI(intid);
+
+ uint32_t group = intid_is_private ? KVM_DEV_ARM_VGIC_GRP_REDIST_REGS
+ : KVM_DEV_ARM_VGIC_GRP_DIST_REGS;
+
+ if (intid_is_private) {
+ /* TODO: only vcpu 0 implemented for now. */
+ assert(vcpu->id == 0);
+ attr += SZ_64K;
+ }
+
+ /* Check that the addr part of the attr is within 32 bits. */
+ assert((attr & ~KVM_DEV_ARM_VGIC_OFFSET_MASK) == 0);
+
+ /*
+ * All calls will succeed, even with invalid intid's, as long as the
+ * addr part of the attr is within 32 bits (checked above). An invalid
+ * intid will just make the read/writes point to above the intended
+ * register space (i.e., ICPENDR after ISPENDR).
+ */
+ kvm_device_attr_get(gic_fd, group, attr, &val);
+ val |= 1ULL << index;
+ kvm_device_attr_set(gic_fd, group, attr, &val);
+}
+
+void kvm_irq_write_ispendr(int gic_fd, uint32_t intid, struct kvm_vcpu *vcpu)
+{
+ vgic_poke_irq(gic_fd, intid, vcpu, GICD_ISPENDR);
+}
+
+void kvm_irq_write_isactiver(int gic_fd, uint32_t intid, struct kvm_vcpu *vcpu)
+{
+ vgic_poke_irq(gic_fd, intid, vcpu, GICD_ISACTIVER);
+}
diff --git a/tools/testing/selftests/kvm/lib/assert.c b/tools/testing/selftests/kvm/lib/assert.c
index 5ebbd0d6b472..2bd25b191d15 100644
--- a/tools/testing/selftests/kvm/lib/assert.c
+++ b/tools/testing/selftests/kvm/lib/assert.c
@@ -22,7 +22,7 @@ static void test_dump_stack(void)
* Build and run this command:
*
* addr2line -s -e /proc/$PPID/exe -fpai {backtrace addresses} | \
- * grep -v test_dump_stack | cat -n 1>&2
+ * cat -n 1>&2
*
* Note that the spacing is different and there's no newline.
*/
@@ -36,18 +36,24 @@ static void test_dump_stack(void)
n * (((sizeof(void *)) * 2) + 1) +
/* Null terminator: */
1];
- char *c;
+ char *c = cmd;
n = backtrace(stack, n);
- c = &cmd[0];
- c += sprintf(c, "%s", addr2line);
/*
- * Skip the first 3 frames: backtrace, test_dump_stack, and
- * test_assert. We hope that backtrace isn't inlined and the other two
- * we've declared noinline.
+ * Skip the first 2 frames, which should be test_dump_stack() and
+ * test_assert(); both of which are declared noinline. Bail if the
+ * resulting stack trace would be empty. Otherwise, addr2line will block
+ * waiting for addresses to be passed in via stdin.
*/
+ if (n <= 2) {
+ fputs(" (stack trace empty)\n", stderr);
+ return;
+ }
+
+ c += sprintf(c, "%s", addr2line);
for (i = 2; i < n; i++)
c += sprintf(c, " %lx", ((unsigned long) stack[i]) - 1);
+
c += sprintf(c, "%s", pipeline);
#pragma GCC diagnostic push
#pragma GCC diagnostic ignored "-Wunused-result"
@@ -71,9 +77,9 @@ test_assert(bool exp, const char *exp_str,
fprintf(stderr, "==== Test Assertion Failure ====\n"
" %s:%u: %s\n"
- " pid=%d tid=%d - %s\n",
+ " pid=%d tid=%d errno=%d - %s\n",
file, line, exp_str, getpid(), _gettid(),
- strerror(errno));
+ errno, strerror(errno));
test_dump_stack();
if (fmt) {
fputs(" ", stderr);
diff --git a/tools/testing/selftests/kvm/lib/elf.c b/tools/testing/selftests/kvm/lib/elf.c
index bc75a91e00a6..f34d926d9735 100644
--- a/tools/testing/selftests/kvm/lib/elf.c
+++ b/tools/testing/selftests/kvm/lib/elf.c
@@ -11,7 +11,6 @@
#include <linux/elf.h>
#include "kvm_util.h"
-#include "kvm_util_internal.h"
static void elfhdr_get(const char *filename, Elf64_Ehdr *hdrp)
{
@@ -91,6 +90,7 @@ static void elfhdr_get(const char *filename, Elf64_Ehdr *hdrp)
" hdrp->e_shentsize: %x\n"
" expected: %zx",
hdrp->e_shentsize, sizeof(Elf64_Shdr));
+ close(fd);
}
/* VM ELF Load
@@ -111,8 +111,7 @@ static void elfhdr_get(const char *filename, Elf64_Ehdr *hdrp)
* by the image and it needs to have sufficient available physical pages, to
* back the virtual pages used to load the image.
*/
-void kvm_vm_elf_load(struct kvm_vm *vm, const char *filename,
- uint32_t data_memslot, uint32_t pgd_memslot)
+void kvm_vm_elf_load(struct kvm_vm *vm, const char *filename)
{
off_t offset, offset_rv;
Elf64_Ehdr hdr;
@@ -140,7 +139,7 @@ void kvm_vm_elf_load(struct kvm_vm *vm, const char *filename,
offset = hdr.e_phoff + (n1 * hdr.e_phentsize);
offset_rv = lseek(fd, offset, SEEK_SET);
TEST_ASSERT(offset_rv == offset,
- "Failed to seek to begining of program header %u,\n"
+ "Failed to seek to beginning of program header %u,\n"
" filename: %s\n"
" rv: %jd errno: %i",
n1, filename, (intmax_t) offset_rv, errno);
@@ -158,14 +157,13 @@ void kvm_vm_elf_load(struct kvm_vm *vm, const char *filename,
"memsize of 0,\n"
" phdr index: %u p_memsz: 0x%" PRIx64,
n1, (uint64_t) phdr.p_memsz);
- vm_vaddr_t seg_vstart = phdr.p_vaddr;
- seg_vstart &= ~(vm_vaddr_t)(vm->page_size - 1);
+ vm_vaddr_t seg_vstart = align_down(phdr.p_vaddr, vm->page_size);
vm_vaddr_t seg_vend = phdr.p_vaddr + phdr.p_memsz - 1;
seg_vend |= vm->page_size - 1;
size_t seg_size = seg_vend - seg_vstart + 1;
- vm_vaddr_t vaddr = vm_vaddr_alloc(vm, seg_size, seg_vstart,
- data_memslot, pgd_memslot);
+ vm_vaddr_t vaddr = __vm_vaddr_alloc(vm, seg_size, seg_vstart,
+ MEM_REGION_CODE);
TEST_ASSERT(vaddr == seg_vstart, "Unable to allocate "
"virtual memory for segment at requested min addr,\n"
" segment idx: %u\n"
@@ -186,11 +184,12 @@ void kvm_vm_elf_load(struct kvm_vm *vm, const char *filename,
"Seek to program segment offset failed,\n"
" program header idx: %u errno: %i\n"
" offset_rv: 0x%jx\n"
- " expected: 0x%jx\n",
+ " expected: 0x%jx",
n1, errno, (intmax_t) offset_rv,
(intmax_t) phdr.p_offset);
test_read(fd, addr_gva2hva(vm, phdr.p_vaddr),
phdr.p_filesz);
}
}
+ close(fd);
}
diff --git a/tools/testing/selftests/kvm/lib/guest_modes.c b/tools/testing/selftests/kvm/lib/guest_modes.c
new file mode 100644
index 000000000000..b04901e55138
--- /dev/null
+++ b/tools/testing/selftests/kvm/lib/guest_modes.c
@@ -0,0 +1,129 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright (C) 2020, Red Hat, Inc.
+ */
+#include "guest_modes.h"
+
+#ifdef __aarch64__
+#include "processor.h"
+enum vm_guest_mode vm_mode_default;
+#endif
+
+struct guest_mode guest_modes[NUM_VM_MODES];
+
+void guest_modes_append_default(void)
+{
+#ifndef __aarch64__
+ guest_mode_append(VM_MODE_DEFAULT, true);
+#else
+ {
+ unsigned int limit = kvm_check_cap(KVM_CAP_ARM_VM_IPA_SIZE);
+ uint32_t ipa4k, ipa16k, ipa64k;
+ int i;
+
+ aarch64_get_supported_page_sizes(limit, &ipa4k, &ipa16k, &ipa64k);
+
+ guest_mode_append(VM_MODE_P52V48_4K, ipa4k >= 52);
+ guest_mode_append(VM_MODE_P52V48_16K, ipa16k >= 52);
+ guest_mode_append(VM_MODE_P52V48_64K, ipa64k >= 52);
+
+ guest_mode_append(VM_MODE_P48V48_4K, ipa4k >= 48);
+ guest_mode_append(VM_MODE_P48V48_16K, ipa16k >= 48);
+ guest_mode_append(VM_MODE_P48V48_64K, ipa64k >= 48);
+
+ guest_mode_append(VM_MODE_P40V48_4K, ipa4k >= 40);
+ guest_mode_append(VM_MODE_P40V48_16K, ipa16k >= 40);
+ guest_mode_append(VM_MODE_P40V48_64K, ipa64k >= 40);
+
+ guest_mode_append(VM_MODE_P36V48_4K, ipa4k >= 36);
+ guest_mode_append(VM_MODE_P36V48_16K, ipa16k >= 36);
+ guest_mode_append(VM_MODE_P36V48_64K, ipa64k >= 36);
+ guest_mode_append(VM_MODE_P36V47_16K, ipa16k >= 36);
+
+ vm_mode_default = ipa4k >= 40 ? VM_MODE_P40V48_4K : NUM_VM_MODES;
+
+ /*
+ * Pick the first supported IPA size if the default
+ * isn't available.
+ */
+ for (i = 0; vm_mode_default == NUM_VM_MODES && i < NUM_VM_MODES; i++) {
+ if (guest_modes[i].supported && guest_modes[i].enabled)
+ vm_mode_default = i;
+ }
+
+ TEST_ASSERT(vm_mode_default != NUM_VM_MODES,
+ "No supported mode!");
+ }
+#endif
+#ifdef __s390x__
+ {
+ int kvm_fd, vm_fd;
+ struct kvm_s390_vm_cpu_processor info;
+
+ kvm_fd = open_kvm_dev_path_or_exit();
+ vm_fd = __kvm_ioctl(kvm_fd, KVM_CREATE_VM, NULL);
+ kvm_device_attr_get(vm_fd, KVM_S390_VM_CPU_MODEL,
+ KVM_S390_VM_CPU_PROCESSOR, &info);
+ close(vm_fd);
+ close(kvm_fd);
+ /* Starting with z13 we have 47bits of physical address */
+ if (info.ibc >= 0x30)
+ guest_mode_append(VM_MODE_P47V64_4K, true);
+ }
+#endif
+#ifdef __riscv
+ {
+ unsigned int sz = kvm_check_cap(KVM_CAP_VM_GPA_BITS);
+
+ if (sz >= 52)
+ guest_mode_append(VM_MODE_P52V48_4K, true);
+ if (sz >= 48)
+ guest_mode_append(VM_MODE_P48V48_4K, true);
+ }
+#endif
+}
+
+void for_each_guest_mode(void (*func)(enum vm_guest_mode, void *), void *arg)
+{
+ int i;
+
+ for (i = 0; i < NUM_VM_MODES; ++i) {
+ if (!guest_modes[i].enabled)
+ continue;
+ TEST_ASSERT(guest_modes[i].supported,
+ "Guest mode ID %d (%s) not supported.",
+ i, vm_guest_mode_string(i));
+ func(i, arg);
+ }
+}
+
+void guest_modes_help(void)
+{
+ int i;
+
+ printf(" -m: specify the guest mode ID to test\n"
+ " (default: test all supported modes)\n"
+ " This option may be used multiple times.\n"
+ " Guest mode IDs:\n");
+ for (i = 0; i < NUM_VM_MODES; ++i) {
+ printf(" %d: %s%s\n", i, vm_guest_mode_string(i),
+ guest_modes[i].supported ? " (supported)" : "");
+ }
+}
+
+void guest_modes_cmdline(const char *arg)
+{
+ static bool mode_selected;
+ unsigned int mode;
+ int i;
+
+ if (!mode_selected) {
+ for (i = 0; i < NUM_VM_MODES; ++i)
+ guest_modes[i].enabled = false;
+ mode_selected = true;
+ }
+
+ mode = atoi_non_negative("Guest mode ID", arg);
+ TEST_ASSERT(mode < NUM_VM_MODES, "Guest mode ID %d too big", mode);
+ guest_modes[mode].enabled = true;
+}
diff --git a/tools/testing/selftests/kvm/lib/guest_sprintf.c b/tools/testing/selftests/kvm/lib/guest_sprintf.c
new file mode 100644
index 000000000000..74627514c4d4
--- /dev/null
+++ b/tools/testing/selftests/kvm/lib/guest_sprintf.c
@@ -0,0 +1,314 @@
+// SPDX-License-Identifier: GPL-2.0-only
+#include "test_util.h"
+#include "kvm_util.h"
+#include "ucall_common.h"
+
+#define APPEND_BUFFER_SAFE(str, end, v) \
+do { \
+ GUEST_ASSERT(str < end); \
+ *str++ = (v); \
+} while (0)
+
+static int isdigit(int ch)
+{
+ return (ch >= '0') && (ch <= '9');
+}
+
+static int skip_atoi(const char **s)
+{
+ int i = 0;
+
+ while (isdigit(**s))
+ i = i * 10 + *((*s)++) - '0';
+ return i;
+}
+
+#define ZEROPAD 1 /* pad with zero */
+#define SIGN 2 /* unsigned/signed long */
+#define PLUS 4 /* show plus */
+#define SPACE 8 /* space if plus */
+#define LEFT 16 /* left justified */
+#define SMALL 32 /* Must be 32 == 0x20 */
+#define SPECIAL 64 /* 0x */
+
+#define __do_div(n, base) \
+({ \
+ int __res; \
+ \
+ __res = ((uint64_t) n) % (uint32_t) base; \
+ n = ((uint64_t) n) / (uint32_t) base; \
+ __res; \
+})
+
+static char *number(char *str, const char *end, long num, int base, int size,
+ int precision, int type)
+{
+ /* we are called with base 8, 10 or 16, only, thus don't need "G..." */
+ static const char digits[16] = "0123456789ABCDEF"; /* "GHIJKLMNOPQRSTUVWXYZ"; */
+
+ char tmp[66];
+ char c, sign, locase;
+ int i;
+
+ /*
+ * locase = 0 or 0x20. ORing digits or letters with 'locase'
+ * produces same digits or (maybe lowercased) letters
+ */
+ locase = (type & SMALL);
+ if (type & LEFT)
+ type &= ~ZEROPAD;
+ if (base < 2 || base > 16)
+ return NULL;
+ c = (type & ZEROPAD) ? '0' : ' ';
+ sign = 0;
+ if (type & SIGN) {
+ if (num < 0) {
+ sign = '-';
+ num = -num;
+ size--;
+ } else if (type & PLUS) {
+ sign = '+';
+ size--;
+ } else if (type & SPACE) {
+ sign = ' ';
+ size--;
+ }
+ }
+ if (type & SPECIAL) {
+ if (base == 16)
+ size -= 2;
+ else if (base == 8)
+ size--;
+ }
+ i = 0;
+ if (num == 0)
+ tmp[i++] = '0';
+ else
+ while (num != 0)
+ tmp[i++] = (digits[__do_div(num, base)] | locase);
+ if (i > precision)
+ precision = i;
+ size -= precision;
+ if (!(type & (ZEROPAD + LEFT)))
+ while (size-- > 0)
+ APPEND_BUFFER_SAFE(str, end, ' ');
+ if (sign)
+ APPEND_BUFFER_SAFE(str, end, sign);
+ if (type & SPECIAL) {
+ if (base == 8)
+ APPEND_BUFFER_SAFE(str, end, '0');
+ else if (base == 16) {
+ APPEND_BUFFER_SAFE(str, end, '0');
+ APPEND_BUFFER_SAFE(str, end, 'x');
+ }
+ }
+ if (!(type & LEFT))
+ while (size-- > 0)
+ APPEND_BUFFER_SAFE(str, end, c);
+ while (i < precision--)
+ APPEND_BUFFER_SAFE(str, end, '0');
+ while (i-- > 0)
+ APPEND_BUFFER_SAFE(str, end, tmp[i]);
+ while (size-- > 0)
+ APPEND_BUFFER_SAFE(str, end, ' ');
+
+ return str;
+}
+
+int guest_vsnprintf(char *buf, int n, const char *fmt, va_list args)
+{
+ char *str, *end;
+ const char *s;
+ uint64_t num;
+ int i, base;
+ int len;
+
+ int flags; /* flags to number() */
+
+ int field_width; /* width of output field */
+ int precision; /*
+ * min. # of digits for integers; max
+ * number of chars for from string
+ */
+ int qualifier; /* 'h', 'l', or 'L' for integer fields */
+
+ end = buf + n;
+ GUEST_ASSERT(buf < end);
+ GUEST_ASSERT(n > 0);
+
+ for (str = buf; *fmt; ++fmt) {
+ if (*fmt != '%') {
+ APPEND_BUFFER_SAFE(str, end, *fmt);
+ continue;
+ }
+
+ /* process flags */
+ flags = 0;
+repeat:
+ ++fmt; /* this also skips first '%' */
+ switch (*fmt) {
+ case '-':
+ flags |= LEFT;
+ goto repeat;
+ case '+':
+ flags |= PLUS;
+ goto repeat;
+ case ' ':
+ flags |= SPACE;
+ goto repeat;
+ case '#':
+ flags |= SPECIAL;
+ goto repeat;
+ case '0':
+ flags |= ZEROPAD;
+ goto repeat;
+ }
+
+ /* get field width */
+ field_width = -1;
+ if (isdigit(*fmt))
+ field_width = skip_atoi(&fmt);
+ else if (*fmt == '*') {
+ ++fmt;
+ /* it's the next argument */
+ field_width = va_arg(args, int);
+ if (field_width < 0) {
+ field_width = -field_width;
+ flags |= LEFT;
+ }
+ }
+
+ /* get the precision */
+ precision = -1;
+ if (*fmt == '.') {
+ ++fmt;
+ if (isdigit(*fmt))
+ precision = skip_atoi(&fmt);
+ else if (*fmt == '*') {
+ ++fmt;
+ /* it's the next argument */
+ precision = va_arg(args, int);
+ }
+ if (precision < 0)
+ precision = 0;
+ }
+
+ /* get the conversion qualifier */
+ qualifier = -1;
+ if (*fmt == 'h' || *fmt == 'l' || *fmt == 'L') {
+ qualifier = *fmt;
+ ++fmt;
+ }
+
+ /*
+ * Play nice with %llu, %llx, etc. KVM selftests only support
+ * 64-bit builds, so just treat %ll* the same as %l*.
+ */
+ if (qualifier == 'l' && *fmt == 'l')
+ ++fmt;
+
+ /* default base */
+ base = 10;
+
+ switch (*fmt) {
+ case 'c':
+ if (!(flags & LEFT))
+ while (--field_width > 0)
+ APPEND_BUFFER_SAFE(str, end, ' ');
+ APPEND_BUFFER_SAFE(str, end,
+ (uint8_t)va_arg(args, int));
+ while (--field_width > 0)
+ APPEND_BUFFER_SAFE(str, end, ' ');
+ continue;
+
+ case 's':
+ s = va_arg(args, char *);
+ len = strnlen(s, precision);
+
+ if (!(flags & LEFT))
+ while (len < field_width--)
+ APPEND_BUFFER_SAFE(str, end, ' ');
+ for (i = 0; i < len; ++i)
+ APPEND_BUFFER_SAFE(str, end, *s++);
+ while (len < field_width--)
+ APPEND_BUFFER_SAFE(str, end, ' ');
+ continue;
+
+ case 'p':
+ if (field_width == -1) {
+ field_width = 2 * sizeof(void *);
+ flags |= SPECIAL | SMALL | ZEROPAD;
+ }
+ str = number(str, end,
+ (uint64_t)va_arg(args, void *), 16,
+ field_width, precision, flags);
+ continue;
+
+ case 'n':
+ if (qualifier == 'l') {
+ long *ip = va_arg(args, long *);
+ *ip = (str - buf);
+ } else {
+ int *ip = va_arg(args, int *);
+ *ip = (str - buf);
+ }
+ continue;
+
+ case '%':
+ APPEND_BUFFER_SAFE(str, end, '%');
+ continue;
+
+ /* integer number formats - set up the flags and "break" */
+ case 'o':
+ base = 8;
+ break;
+
+ case 'x':
+ flags |= SMALL;
+ case 'X':
+ base = 16;
+ break;
+
+ case 'd':
+ case 'i':
+ flags |= SIGN;
+ case 'u':
+ break;
+
+ default:
+ APPEND_BUFFER_SAFE(str, end, '%');
+ if (*fmt)
+ APPEND_BUFFER_SAFE(str, end, *fmt);
+ else
+ --fmt;
+ continue;
+ }
+ if (qualifier == 'l')
+ num = va_arg(args, uint64_t);
+ else if (qualifier == 'h') {
+ num = (uint16_t)va_arg(args, int);
+ if (flags & SIGN)
+ num = (int16_t)num;
+ } else if (flags & SIGN)
+ num = va_arg(args, int);
+ else
+ num = va_arg(args, uint32_t);
+ str = number(str, end, num, base, field_width, precision, flags);
+ }
+
+ GUEST_ASSERT(str < end);
+ *str = '\0';
+ return str - buf;
+}
+
+int guest_snprintf(char *buf, int n, const char *fmt, ...)
+{
+ va_list va;
+ int len;
+
+ va_start(va, fmt);
+ len = guest_vsnprintf(buf, n, fmt, va);
+ va_end(va);
+
+ return len;
+}
diff --git a/tools/testing/selftests/kvm/lib/kvm_util.c b/tools/testing/selftests/kvm/lib/kvm_util.c
index 74776ee228f2..b2262b5fad9e 100644
--- a/tools/testing/selftests/kvm/lib/kvm_util.c
+++ b/tools/testing/selftests/kvm/lib/kvm_util.c
@@ -5,172 +5,283 @@
* Copyright (C) 2018, Google LLC.
*/
+#define _GNU_SOURCE /* for program_invocation_name */
#include "test_util.h"
#include "kvm_util.h"
-#include "kvm_util_internal.h"
#include "processor.h"
#include <assert.h>
+#include <sched.h>
#include <sys/mman.h>
#include <sys/types.h>
#include <sys/stat.h>
+#include <unistd.h>
#include <linux/kernel.h>
-#define KVM_UTIL_PGS_PER_HUGEPG 512
#define KVM_UTIL_MIN_PFN 2
-/* Aligns x up to the next multiple of size. Size must be a power of 2. */
-static void *align(void *x, size_t size)
+static int vcpu_mmap_sz(void);
+
+int open_path_or_exit(const char *path, int flags)
{
- size_t mask = size - 1;
- TEST_ASSERT(size != 0 && !(size & (size - 1)),
- "size not a power of 2: %lu", size);
- return (void *) (((size_t) x + mask) & ~mask);
+ int fd;
+
+ fd = open(path, flags);
+ __TEST_REQUIRE(fd >= 0 || errno != ENOENT, "Cannot open %s: %s", path, strerror(errno));
+ TEST_ASSERT(fd >= 0, "Failed to open '%s'", path);
+
+ return fd;
}
/*
- * Capability
+ * Open KVM_DEV_PATH if available, otherwise exit the entire program.
*
* Input Args:
- * cap - Capability
- *
- * Output Args: None
+ * flags - The flags to pass when opening KVM_DEV_PATH.
*
* Return:
- * On success, the Value corresponding to the capability (KVM_CAP_*)
- * specified by the value of cap. On failure a TEST_ASSERT failure
- * is produced.
- *
- * Looks up and returns the value corresponding to the capability
- * (KVM_CAP_*) given by cap.
+ * The opened file descriptor of /dev/kvm.
*/
-int kvm_check_cap(long cap)
+static int _open_kvm_dev_path_or_exit(int flags)
{
- int ret;
- int kvm_fd;
+ return open_path_or_exit(KVM_DEV_PATH, flags);
+}
- kvm_fd = open(KVM_DEV_PATH, O_RDONLY);
- if (kvm_fd < 0)
- exit(KSFT_SKIP);
+int open_kvm_dev_path_or_exit(void)
+{
+ return _open_kvm_dev_path_or_exit(O_RDONLY);
+}
- ret = ioctl(kvm_fd, KVM_CHECK_EXTENSION, cap);
- TEST_ASSERT(ret != -1, "KVM_CHECK_EXTENSION IOCTL failed,\n"
- " rc: %i errno: %i", ret, errno);
+static ssize_t get_module_param(const char *module_name, const char *param,
+ void *buffer, size_t buffer_size)
+{
+ const int path_size = 128;
+ char path[path_size];
+ ssize_t bytes_read;
+ int fd, r;
- close(kvm_fd);
+ r = snprintf(path, path_size, "/sys/module/%s/parameters/%s",
+ module_name, param);
+ TEST_ASSERT(r < path_size,
+ "Failed to construct sysfs path in %d bytes.", path_size);
- return ret;
+ fd = open_path_or_exit(path, O_RDONLY);
+
+ bytes_read = read(fd, buffer, buffer_size);
+ TEST_ASSERT(bytes_read > 0, "read(%s) returned %ld, wanted %ld bytes",
+ path, bytes_read, buffer_size);
+
+ r = close(fd);
+ TEST_ASSERT(!r, "close(%s) failed", path);
+ return bytes_read;
}
-/* VM Enable Capability
- *
- * Input Args:
- * vm - Virtual Machine
- * cap - Capability
- *
- * Output Args: None
- *
- * Return: On success, 0. On failure a TEST_ASSERT failure is produced.
- *
- * Enables a capability (KVM_CAP_*) on the VM.
- */
-int vm_enable_cap(struct kvm_vm *vm, struct kvm_enable_cap *cap)
+static int get_module_param_integer(const char *module_name, const char *param)
{
- int ret;
+ /*
+ * 16 bytes to hold a 64-bit value (1 byte per char), 1 byte for the
+ * NUL char, and 1 byte because the kernel sucks and inserts a newline
+ * at the end.
+ */
+ char value[16 + 1 + 1];
+ ssize_t r;
- ret = ioctl(vm->fd, KVM_ENABLE_CAP, cap);
- TEST_ASSERT(ret == 0, "KVM_ENABLE_CAP IOCTL failed,\n"
- " rc: %i errno: %i", ret, errno);
+ memset(value, '\0', sizeof(value));
- return ret;
+ r = get_module_param(module_name, param, value, sizeof(value));
+ TEST_ASSERT(value[r - 1] == '\n',
+ "Expected trailing newline, got char '%c'", value[r - 1]);
+
+ /*
+ * Squash the newline, otherwise atoi_paranoid() will complain about
+ * trailing non-NUL characters in the string.
+ */
+ value[r - 1] = '\0';
+ return atoi_paranoid(value);
}
-static void vm_open(struct kvm_vm *vm, int perm)
+static bool get_module_param_bool(const char *module_name, const char *param)
{
- vm->kvm_fd = open(KVM_DEV_PATH, perm);
- if (vm->kvm_fd < 0)
- exit(KSFT_SKIP);
+ char value;
+ ssize_t r;
- if (!kvm_check_cap(KVM_CAP_IMMEDIATE_EXIT)) {
- print_skip("immediate_exit not available");
- exit(KSFT_SKIP);
- }
+ r = get_module_param(module_name, param, &value, sizeof(value));
+ TEST_ASSERT_EQ(r, 1);
- vm->fd = ioctl(vm->kvm_fd, KVM_CREATE_VM, vm->type);
- TEST_ASSERT(vm->fd >= 0, "KVM_CREATE_VM ioctl failed, "
- "rc: %i errno: %i", vm->fd, errno);
+ if (value == 'Y')
+ return true;
+ else if (value == 'N')
+ return false;
+
+ TEST_FAIL("Unrecognized value '%c' for boolean module param", value);
}
-const char * const vm_guest_mode_string[] = {
- "PA-bits:52, VA-bits:48, 4K pages",
- "PA-bits:52, VA-bits:48, 64K pages",
- "PA-bits:48, VA-bits:48, 4K pages",
- "PA-bits:48, VA-bits:48, 64K pages",
- "PA-bits:40, VA-bits:48, 4K pages",
- "PA-bits:40, VA-bits:48, 64K pages",
- "PA-bits:ANY, VA-bits:48, 4K pages",
-};
-_Static_assert(sizeof(vm_guest_mode_string)/sizeof(char *) == NUM_VM_MODES,
- "Missing new mode strings?");
-
-struct vm_guest_mode_params {
- unsigned int pa_bits;
- unsigned int va_bits;
- unsigned int page_size;
- unsigned int page_shift;
-};
+bool get_kvm_param_bool(const char *param)
+{
+ return get_module_param_bool("kvm", param);
+}
-static const struct vm_guest_mode_params vm_guest_mode_params[] = {
- { 52, 48, 0x1000, 12 },
- { 52, 48, 0x10000, 16 },
- { 48, 48, 0x1000, 12 },
- { 48, 48, 0x10000, 16 },
- { 40, 48, 0x1000, 12 },
- { 40, 48, 0x10000, 16 },
- { 0, 0, 0x1000, 12 },
-};
-_Static_assert(sizeof(vm_guest_mode_params)/sizeof(struct vm_guest_mode_params) == NUM_VM_MODES,
- "Missing new mode params?");
+bool get_kvm_intel_param_bool(const char *param)
+{
+ return get_module_param_bool("kvm_intel", param);
+}
+
+bool get_kvm_amd_param_bool(const char *param)
+{
+ return get_module_param_bool("kvm_amd", param);
+}
+
+int get_kvm_param_integer(const char *param)
+{
+ return get_module_param_integer("kvm", param);
+}
+
+int get_kvm_intel_param_integer(const char *param)
+{
+ return get_module_param_integer("kvm_intel", param);
+}
+
+int get_kvm_amd_param_integer(const char *param)
+{
+ return get_module_param_integer("kvm_amd", param);
+}
/*
- * VM Create
+ * Capability
*
* Input Args:
- * mode - VM Mode (e.g. VM_MODE_P52V48_4K)
- * phy_pages - Physical memory pages
- * perm - permission
+ * cap - Capability
*
* Output Args: None
*
* Return:
- * Pointer to opaque structure that describes the created VM.
+ * On success, the Value corresponding to the capability (KVM_CAP_*)
+ * specified by the value of cap. On failure a TEST_ASSERT failure
+ * is produced.
*
- * Creates a VM with the mode specified by mode (e.g. VM_MODE_P52V48_4K).
- * When phy_pages is non-zero, a memory region of phy_pages physical pages
- * is created and mapped starting at guest physical address 0. The file
- * descriptor to control the created VM is created with the permissions
- * given by perm (e.g. O_RDWR).
+ * Looks up and returns the value corresponding to the capability
+ * (KVM_CAP_*) given by cap.
*/
-struct kvm_vm *_vm_create(enum vm_guest_mode mode, uint64_t phy_pages, int perm)
+unsigned int kvm_check_cap(long cap)
{
- struct kvm_vm *vm;
+ int ret;
+ int kvm_fd;
+
+ kvm_fd = open_kvm_dev_path_or_exit();
+ ret = __kvm_ioctl(kvm_fd, KVM_CHECK_EXTENSION, (void *)cap);
+ TEST_ASSERT(ret >= 0, KVM_IOCTL_ERROR(KVM_CHECK_EXTENSION, ret));
+
+ close(kvm_fd);
- pr_debug("%s: mode='%s' pages='%ld' perm='%d'\n", __func__,
- vm_guest_mode_string(mode), phy_pages, perm);
+ return (unsigned int)ret;
+}
+
+void vm_enable_dirty_ring(struct kvm_vm *vm, uint32_t ring_size)
+{
+ if (vm_check_cap(vm, KVM_CAP_DIRTY_LOG_RING_ACQ_REL))
+ vm_enable_cap(vm, KVM_CAP_DIRTY_LOG_RING_ACQ_REL, ring_size);
+ else
+ vm_enable_cap(vm, KVM_CAP_DIRTY_LOG_RING, ring_size);
+ vm->dirty_ring_size = ring_size;
+}
+
+static void vm_open(struct kvm_vm *vm)
+{
+ vm->kvm_fd = _open_kvm_dev_path_or_exit(O_RDWR);
+
+ TEST_REQUIRE(kvm_has_cap(KVM_CAP_IMMEDIATE_EXIT));
+
+ vm->fd = __kvm_ioctl(vm->kvm_fd, KVM_CREATE_VM, (void *)vm->type);
+ TEST_ASSERT(vm->fd >= 0, KVM_IOCTL_ERROR(KVM_CREATE_VM, vm->fd));
+}
+
+const char *vm_guest_mode_string(uint32_t i)
+{
+ static const char * const strings[] = {
+ [VM_MODE_P52V48_4K] = "PA-bits:52, VA-bits:48, 4K pages",
+ [VM_MODE_P52V48_16K] = "PA-bits:52, VA-bits:48, 16K pages",
+ [VM_MODE_P52V48_64K] = "PA-bits:52, VA-bits:48, 64K pages",
+ [VM_MODE_P48V48_4K] = "PA-bits:48, VA-bits:48, 4K pages",
+ [VM_MODE_P48V48_16K] = "PA-bits:48, VA-bits:48, 16K pages",
+ [VM_MODE_P48V48_64K] = "PA-bits:48, VA-bits:48, 64K pages",
+ [VM_MODE_P40V48_4K] = "PA-bits:40, VA-bits:48, 4K pages",
+ [VM_MODE_P40V48_16K] = "PA-bits:40, VA-bits:48, 16K pages",
+ [VM_MODE_P40V48_64K] = "PA-bits:40, VA-bits:48, 64K pages",
+ [VM_MODE_PXXV48_4K] = "PA-bits:ANY, VA-bits:48, 4K pages",
+ [VM_MODE_P47V64_4K] = "PA-bits:47, VA-bits:64, 4K pages",
+ [VM_MODE_P44V64_4K] = "PA-bits:44, VA-bits:64, 4K pages",
+ [VM_MODE_P36V48_4K] = "PA-bits:36, VA-bits:48, 4K pages",
+ [VM_MODE_P36V48_16K] = "PA-bits:36, VA-bits:48, 16K pages",
+ [VM_MODE_P36V48_64K] = "PA-bits:36, VA-bits:48, 64K pages",
+ [VM_MODE_P36V47_16K] = "PA-bits:36, VA-bits:47, 16K pages",
+ };
+ _Static_assert(sizeof(strings)/sizeof(char *) == NUM_VM_MODES,
+ "Missing new mode strings?");
+
+ TEST_ASSERT(i < NUM_VM_MODES, "Guest mode ID %d too big", i);
+
+ return strings[i];
+}
+
+const struct vm_guest_mode_params vm_guest_mode_params[] = {
+ [VM_MODE_P52V48_4K] = { 52, 48, 0x1000, 12 },
+ [VM_MODE_P52V48_16K] = { 52, 48, 0x4000, 14 },
+ [VM_MODE_P52V48_64K] = { 52, 48, 0x10000, 16 },
+ [VM_MODE_P48V48_4K] = { 48, 48, 0x1000, 12 },
+ [VM_MODE_P48V48_16K] = { 48, 48, 0x4000, 14 },
+ [VM_MODE_P48V48_64K] = { 48, 48, 0x10000, 16 },
+ [VM_MODE_P40V48_4K] = { 40, 48, 0x1000, 12 },
+ [VM_MODE_P40V48_16K] = { 40, 48, 0x4000, 14 },
+ [VM_MODE_P40V48_64K] = { 40, 48, 0x10000, 16 },
+ [VM_MODE_PXXV48_4K] = { 0, 0, 0x1000, 12 },
+ [VM_MODE_P47V64_4K] = { 47, 64, 0x1000, 12 },
+ [VM_MODE_P44V64_4K] = { 44, 64, 0x1000, 12 },
+ [VM_MODE_P36V48_4K] = { 36, 48, 0x1000, 12 },
+ [VM_MODE_P36V48_16K] = { 36, 48, 0x4000, 14 },
+ [VM_MODE_P36V48_64K] = { 36, 48, 0x10000, 16 },
+ [VM_MODE_P36V47_16K] = { 36, 47, 0x4000, 14 },
+};
+_Static_assert(sizeof(vm_guest_mode_params)/sizeof(struct vm_guest_mode_params) == NUM_VM_MODES,
+ "Missing new mode params?");
+
+/*
+ * Initializes vm->vpages_valid to match the canonical VA space of the
+ * architecture.
+ *
+ * The default implementation is valid for architectures which split the
+ * range addressed by a single page table into a low and high region
+ * based on the MSB of the VA. On architectures with this behavior
+ * the VA region spans [0, 2^(va_bits - 1)), [-(2^(va_bits - 1), -1].
+ */
+__weak void vm_vaddr_populate_bitmap(struct kvm_vm *vm)
+{
+ sparsebit_set_num(vm->vpages_valid,
+ 0, (1ULL << (vm->va_bits - 1)) >> vm->page_shift);
+ sparsebit_set_num(vm->vpages_valid,
+ (~((1ULL << (vm->va_bits - 1)) - 1)) >> vm->page_shift,
+ (1ULL << (vm->va_bits - 1)) >> vm->page_shift);
+}
+
+struct kvm_vm *____vm_create(struct vm_shape shape)
+{
+ struct kvm_vm *vm;
vm = calloc(1, sizeof(*vm));
TEST_ASSERT(vm != NULL, "Insufficient Memory");
INIT_LIST_HEAD(&vm->vcpus);
- INIT_LIST_HEAD(&vm->userspace_mem_regions);
+ vm->regions.gpa_tree = RB_ROOT;
+ vm->regions.hva_tree = RB_ROOT;
+ hash_init(vm->regions.slot_hash);
- vm->mode = mode;
- vm->type = 0;
+ vm->mode = shape.mode;
+ vm->type = shape.type;
+ vm->subtype = shape.subtype;
- vm->pa_bits = vm_guest_mode_params[mode].pa_bits;
- vm->va_bits = vm_guest_mode_params[mode].va_bits;
- vm->page_size = vm_guest_mode_params[mode].page_size;
- vm->page_shift = vm_guest_mode_params[mode].page_shift;
+ vm->pa_bits = vm_guest_mode_params[vm->mode].pa_bits;
+ vm->va_bits = vm_guest_mode_params[vm->mode].va_bits;
+ vm->page_size = vm_guest_mode_params[vm->mode].page_size;
+ vm->page_shift = vm_guest_mode_params[vm->mode].page_shift;
/* Setup mode specific traits. */
switch (vm->mode) {
@@ -187,18 +298,30 @@ struct kvm_vm *_vm_create(enum vm_guest_mode mode, uint64_t phy_pages, int perm)
vm->pgtable_levels = 3;
break;
case VM_MODE_P40V48_4K:
+ case VM_MODE_P36V48_4K:
vm->pgtable_levels = 4;
break;
case VM_MODE_P40V48_64K:
+ case VM_MODE_P36V48_64K:
+ vm->pgtable_levels = 3;
+ break;
+ case VM_MODE_P52V48_16K:
+ case VM_MODE_P48V48_16K:
+ case VM_MODE_P40V48_16K:
+ case VM_MODE_P36V48_16K:
+ vm->pgtable_levels = 4;
+ break;
+ case VM_MODE_P36V47_16K:
vm->pgtable_levels = 3;
break;
case VM_MODE_PXXV48_4K:
#ifdef __x86_64__
kvm_get_cpu_address_width(&vm->pa_bits, &vm->va_bits);
+ kvm_init_vm_address_properties(vm);
/*
* Ignore KVM support for 5-level paging (vm->va_bits == 57),
* it doesn't take effect unless a CR4.LA57 is set, which it
- * isn't for this VM_MODE.
+ * isn't for this mode (48-bit virtual address space).
*/
TEST_ASSERT(vm->va_bits == 48 || vm->va_bits == 57,
"Linear address width (%d bits) not supported",
@@ -211,40 +334,158 @@ struct kvm_vm *_vm_create(enum vm_guest_mode mode, uint64_t phy_pages, int perm)
TEST_FAIL("VM_MODE_PXXV48_4K not supported on non-x86 platforms");
#endif
break;
+ case VM_MODE_P47V64_4K:
+ vm->pgtable_levels = 5;
+ break;
+ case VM_MODE_P44V64_4K:
+ vm->pgtable_levels = 5;
+ break;
default:
- TEST_FAIL("Unknown guest mode, mode: 0x%x", mode);
+ TEST_FAIL("Unknown guest mode: 0x%x", vm->mode);
}
#ifdef __aarch64__
+ TEST_ASSERT(!vm->type, "ARM doesn't support test-provided types");
if (vm->pa_bits != 40)
vm->type = KVM_VM_TYPE_ARM_IPA_SIZE(vm->pa_bits);
#endif
- vm_open(vm, perm);
+ vm_open(vm);
/* Limit to VA-bit canonical virtual addresses. */
vm->vpages_valid = sparsebit_alloc();
- sparsebit_set_num(vm->vpages_valid,
- 0, (1ULL << (vm->va_bits - 1)) >> vm->page_shift);
- sparsebit_set_num(vm->vpages_valid,
- (~((1ULL << (vm->va_bits - 1)) - 1)) >> vm->page_shift,
- (1ULL << (vm->va_bits - 1)) >> vm->page_shift);
+ vm_vaddr_populate_bitmap(vm);
/* Limit physical addresses to PA-bits. */
- vm->max_gfn = ((1ULL << vm->pa_bits) >> vm->page_shift) - 1;
+ vm->max_gfn = vm_compute_max_gfn(vm);
/* Allocate and setup memory for guest. */
vm->vpages_mapped = sparsebit_alloc();
- if (phy_pages != 0)
- vm_userspace_mem_region_add(vm, VM_MEM_SRC_ANONYMOUS,
- 0, 0, phy_pages, 0);
return vm;
}
-struct kvm_vm *vm_create(enum vm_guest_mode mode, uint64_t phy_pages, int perm)
+static uint64_t vm_nr_pages_required(enum vm_guest_mode mode,
+ uint32_t nr_runnable_vcpus,
+ uint64_t extra_mem_pages)
+{
+ uint64_t page_size = vm_guest_mode_params[mode].page_size;
+ uint64_t nr_pages;
+
+ TEST_ASSERT(nr_runnable_vcpus,
+ "Use vm_create_barebones() for VMs that _never_ have vCPUs");
+
+ TEST_ASSERT(nr_runnable_vcpus <= kvm_check_cap(KVM_CAP_MAX_VCPUS),
+ "nr_vcpus = %d too large for host, max-vcpus = %d",
+ nr_runnable_vcpus, kvm_check_cap(KVM_CAP_MAX_VCPUS));
+
+ /*
+ * Arbitrarily allocate 512 pages (2mb when page size is 4kb) for the
+ * test code and other per-VM assets that will be loaded into memslot0.
+ */
+ nr_pages = 512;
+
+ /* Account for the per-vCPU stacks on behalf of the test. */
+ nr_pages += nr_runnable_vcpus * DEFAULT_STACK_PGS;
+
+ /*
+ * Account for the number of pages needed for the page tables. The
+ * maximum page table size for a memory region will be when the
+ * smallest page size is used. Considering each page contains x page
+ * table descriptors, the total extra size for page tables (for extra
+ * N pages) will be: N/x+N/x^2+N/x^3+... which is definitely smaller
+ * than N/x*2.
+ */
+ nr_pages += (nr_pages + extra_mem_pages) / PTES_PER_MIN_PAGE * 2;
+
+ /* Account for the number of pages needed by ucall. */
+ nr_pages += ucall_nr_pages_required(page_size);
+
+ return vm_adjust_num_guest_pages(mode, nr_pages);
+}
+
+struct kvm_vm *__vm_create(struct vm_shape shape, uint32_t nr_runnable_vcpus,
+ uint64_t nr_extra_pages)
+{
+ uint64_t nr_pages = vm_nr_pages_required(shape.mode, nr_runnable_vcpus,
+ nr_extra_pages);
+ struct userspace_mem_region *slot0;
+ struct kvm_vm *vm;
+ int i;
+
+ pr_debug("%s: mode='%s' type='%d', pages='%ld'\n", __func__,
+ vm_guest_mode_string(shape.mode), shape.type, nr_pages);
+
+ vm = ____vm_create(shape);
+
+ vm_userspace_mem_region_add(vm, VM_MEM_SRC_ANONYMOUS, 0, 0, nr_pages, 0);
+ for (i = 0; i < NR_MEM_REGIONS; i++)
+ vm->memslots[i] = 0;
+
+ kvm_vm_elf_load(vm, program_invocation_name);
+
+ /*
+ * TODO: Add proper defines to protect the library's memslots, and then
+ * carve out memslot1 for the ucall MMIO address. KVM treats writes to
+ * read-only memslots as MMIO, and creating a read-only memslot for the
+ * MMIO region would prevent silently clobbering the MMIO region.
+ */
+ slot0 = memslot2region(vm, 0);
+ ucall_init(vm, slot0->region.guest_phys_addr + slot0->region.memory_size);
+
+ kvm_arch_vm_post_create(vm);
+
+ return vm;
+}
+
+/*
+ * VM Create with customized parameters
+ *
+ * Input Args:
+ * mode - VM Mode (e.g. VM_MODE_P52V48_4K)
+ * nr_vcpus - VCPU count
+ * extra_mem_pages - Non-slot0 physical memory total size
+ * guest_code - Guest entry point
+ * vcpuids - VCPU IDs
+ *
+ * Output Args: None
+ *
+ * Return:
+ * Pointer to opaque structure that describes the created VM.
+ *
+ * Creates a VM with the mode specified by mode (e.g. VM_MODE_P52V48_4K).
+ * extra_mem_pages is only used to calculate the maximum page table size,
+ * no real memory allocation for non-slot0 memory in this function.
+ */
+struct kvm_vm *__vm_create_with_vcpus(struct vm_shape shape, uint32_t nr_vcpus,
+ uint64_t extra_mem_pages,
+ void *guest_code, struct kvm_vcpu *vcpus[])
+{
+ struct kvm_vm *vm;
+ int i;
+
+ TEST_ASSERT(!nr_vcpus || vcpus, "Must provide vCPU array");
+
+ vm = __vm_create(shape, nr_vcpus, extra_mem_pages);
+
+ for (i = 0; i < nr_vcpus; ++i)
+ vcpus[i] = vm_vcpu_add(vm, i, guest_code);
+
+ return vm;
+}
+
+struct kvm_vm *__vm_create_shape_with_one_vcpu(struct vm_shape shape,
+ struct kvm_vcpu **vcpu,
+ uint64_t extra_mem_pages,
+ void *guest_code)
{
- return _vm_create(mode, phy_pages, perm);
+ struct kvm_vcpu *vcpus[1];
+ struct kvm_vm *vm;
+
+ vm = __vm_create_with_vcpus(shape, 1, extra_mem_pages, guest_code, vcpus);
+
+ *vcpu = vcpus[0];
+ return vm;
}
/*
@@ -252,7 +493,6 @@ struct kvm_vm *vm_create(enum vm_guest_mode mode, uint64_t phy_pages, int perm)
*
* Input Args:
* vm - VM that has been released before
- * perm - permission
*
* Output Args: None
*
@@ -260,17 +500,19 @@ struct kvm_vm *vm_create(enum vm_guest_mode mode, uint64_t phy_pages, int perm)
* global state, such as the irqchip and the memory regions that are mapped
* into the guest.
*/
-void kvm_vm_restart(struct kvm_vm *vmp, int perm)
+void kvm_vm_restart(struct kvm_vm *vmp)
{
+ int ctr;
struct userspace_mem_region *region;
- vm_open(vmp, perm);
+ vm_open(vmp);
if (vmp->has_irqchip)
vm_create_irqchip(vmp);
- list_for_each_entry(region, &vmp->userspace_mem_regions, list) {
- int ret = ioctl(vmp->fd, KVM_SET_USER_MEMORY_REGION, &region->region);
- TEST_ASSERT(ret == 0, "KVM_SET_USER_MEMORY_REGION IOCTL failed,\n"
+ hash_for_each(vmp->regions.slot_hash, ctr, region, slot_node) {
+ int ret = ioctl(vmp->fd, KVM_SET_USER_MEMORY_REGION2, &region->region);
+
+ TEST_ASSERT(ret == 0, "KVM_SET_USER_MEMORY_REGION2 IOCTL failed,\n"
" rc: %i errno: %i\n"
" slot: %u flags: 0x%x\n"
" guest_phys_addr: 0x%llx size: 0x%llx",
@@ -281,27 +523,87 @@ void kvm_vm_restart(struct kvm_vm *vmp, int perm)
}
}
-void kvm_vm_get_dirty_log(struct kvm_vm *vm, int slot, void *log)
+__weak struct kvm_vcpu *vm_arch_vcpu_recreate(struct kvm_vm *vm,
+ uint32_t vcpu_id)
{
- struct kvm_dirty_log args = { .dirty_bitmap = log, .slot = slot };
- int ret;
+ return __vm_vcpu_add(vm, vcpu_id);
+}
- ret = ioctl(vm->fd, KVM_GET_DIRTY_LOG, &args);
- TEST_ASSERT(ret == 0, "%s: KVM_GET_DIRTY_LOG failed: %s",
- __func__, strerror(-ret));
+struct kvm_vcpu *vm_recreate_with_one_vcpu(struct kvm_vm *vm)
+{
+ kvm_vm_restart(vm);
+
+ return vm_vcpu_recreate(vm, 0);
}
-void kvm_vm_clear_dirty_log(struct kvm_vm *vm, int slot, void *log,
- uint64_t first_page, uint32_t num_pages)
+void kvm_pin_this_task_to_pcpu(uint32_t pcpu)
{
- struct kvm_clear_dirty_log args = { .dirty_bitmap = log, .slot = slot,
- .first_page = first_page,
- .num_pages = num_pages };
- int ret;
+ cpu_set_t mask;
+ int r;
- ret = ioctl(vm->fd, KVM_CLEAR_DIRTY_LOG, &args);
- TEST_ASSERT(ret == 0, "%s: KVM_CLEAR_DIRTY_LOG failed: %s",
- __func__, strerror(-ret));
+ CPU_ZERO(&mask);
+ CPU_SET(pcpu, &mask);
+ r = sched_setaffinity(0, sizeof(mask), &mask);
+ TEST_ASSERT(!r, "sched_setaffinity() failed for pCPU '%u'.", pcpu);
+}
+
+static uint32_t parse_pcpu(const char *cpu_str, const cpu_set_t *allowed_mask)
+{
+ uint32_t pcpu = atoi_non_negative("CPU number", cpu_str);
+
+ TEST_ASSERT(CPU_ISSET(pcpu, allowed_mask),
+ "Not allowed to run on pCPU '%d', check cgroups?", pcpu);
+ return pcpu;
+}
+
+void kvm_print_vcpu_pinning_help(void)
+{
+ const char *name = program_invocation_name;
+
+ printf(" -c: Pin tasks to physical CPUs. Takes a list of comma separated\n"
+ " values (target pCPU), one for each vCPU, plus an optional\n"
+ " entry for the main application task (specified via entry\n"
+ " <nr_vcpus + 1>). If used, entries must be provided for all\n"
+ " vCPUs, i.e. pinning vCPUs is all or nothing.\n\n"
+ " E.g. to create 3 vCPUs, pin vCPU0=>pCPU22, vCPU1=>pCPU23,\n"
+ " vCPU2=>pCPU24, and pin the application task to pCPU50:\n\n"
+ " %s -v 3 -c 22,23,24,50\n\n"
+ " To leave the application task unpinned, drop the final entry:\n\n"
+ " %s -v 3 -c 22,23,24\n\n"
+ " (default: no pinning)\n", name, name);
+}
+
+void kvm_parse_vcpu_pinning(const char *pcpus_string, uint32_t vcpu_to_pcpu[],
+ int nr_vcpus)
+{
+ cpu_set_t allowed_mask;
+ char *cpu, *cpu_list;
+ char delim[2] = ",";
+ int i, r;
+
+ cpu_list = strdup(pcpus_string);
+ TEST_ASSERT(cpu_list, "strdup() allocation failed.");
+
+ r = sched_getaffinity(0, sizeof(allowed_mask), &allowed_mask);
+ TEST_ASSERT(!r, "sched_getaffinity() failed");
+
+ cpu = strtok(cpu_list, delim);
+
+ /* 1. Get all pcpus for vcpus. */
+ for (i = 0; i < nr_vcpus; i++) {
+ TEST_ASSERT(cpu, "pCPU not provided for vCPU '%d'", i);
+ vcpu_to_pcpu[i] = parse_pcpu(cpu, &allowed_mask);
+ cpu = strtok(NULL, delim);
+ }
+
+ /* 2. Check if the main worker needs to be pinned. */
+ if (cpu) {
+ kvm_pin_this_task_to_pcpu(parse_pcpu(cpu, &allowed_mask));
+ cpu = strtok(NULL, delim);
+ }
+
+ TEST_ASSERT(!cpu, "pCPU list contains trailing garbage characters '%s'", cpu);
+ free(cpu_list);
}
/*
@@ -326,74 +628,29 @@ void kvm_vm_clear_dirty_log(struct kvm_vm *vm, int slot, void *log,
static struct userspace_mem_region *
userspace_mem_region_find(struct kvm_vm *vm, uint64_t start, uint64_t end)
{
- struct userspace_mem_region *region;
+ struct rb_node *node;
- list_for_each_entry(region, &vm->userspace_mem_regions, list) {
+ for (node = vm->regions.gpa_tree.rb_node; node; ) {
+ struct userspace_mem_region *region =
+ container_of(node, struct userspace_mem_region, gpa_node);
uint64_t existing_start = region->region.guest_phys_addr;
uint64_t existing_end = region->region.guest_phys_addr
+ region->region.memory_size - 1;
if (start <= existing_end && end >= existing_start)
return region;
+
+ if (start < existing_start)
+ node = node->rb_left;
+ else
+ node = node->rb_right;
}
return NULL;
}
-/*
- * KVM Userspace Memory Region Find
- *
- * Input Args:
- * vm - Virtual Machine
- * start - Starting VM physical address
- * end - Ending VM physical address, inclusive.
- *
- * Output Args: None
- *
- * Return:
- * Pointer to overlapping region, NULL if no such region.
- *
- * Public interface to userspace_mem_region_find. Allows tests to look up
- * the memslot datastructure for a given range of guest physical memory.
- */
-struct kvm_userspace_memory_region *
-kvm_userspace_memory_region_find(struct kvm_vm *vm, uint64_t start,
- uint64_t end)
-{
- struct userspace_mem_region *region;
-
- region = userspace_mem_region_find(vm, start, end);
- if (!region)
- return NULL;
-
- return &region->region;
-}
-
-/*
- * VCPU Find
- *
- * Input Args:
- * vm - Virtual Machine
- * vcpuid - VCPU ID
- *
- * Output Args: None
- *
- * Return:
- * Pointer to VCPU structure
- *
- * Locates a vcpu structure that describes the VCPU specified by vcpuid and
- * returns a pointer to it. Returns NULL if the VM doesn't contain a VCPU
- * for the specified vcpuid.
- */
-struct vcpu *vcpu_find(struct kvm_vm *vm, uint32_t vcpuid)
+__weak void vcpu_arch_free(struct kvm_vcpu *vcpu)
{
- struct vcpu *vcpu;
- list_for_each_entry(vcpu, &vm->vcpus, list) {
- if (vcpu->id == vcpuid)
- return vcpu;
- }
-
- return NULL;
}
/*
@@ -408,53 +665,70 @@ struct vcpu *vcpu_find(struct kvm_vm *vm, uint32_t vcpuid)
*
* Removes a vCPU from a VM and frees its resources.
*/
-static void vm_vcpu_rm(struct vcpu *vcpu)
+static void vm_vcpu_rm(struct kvm_vm *vm, struct kvm_vcpu *vcpu)
{
int ret;
- ret = munmap(vcpu->state, sizeof(*vcpu->state));
- TEST_ASSERT(ret == 0, "munmap of VCPU fd failed, rc: %i "
- "errno: %i", ret, errno);
- close(vcpu->fd);
- TEST_ASSERT(ret == 0, "Close of VCPU fd failed, rc: %i "
- "errno: %i", ret, errno);
+ if (vcpu->dirty_gfns) {
+ ret = munmap(vcpu->dirty_gfns, vm->dirty_ring_size);
+ TEST_ASSERT(!ret, __KVM_SYSCALL_ERROR("munmap()", ret));
+ vcpu->dirty_gfns = NULL;
+ }
+
+ ret = munmap(vcpu->run, vcpu_mmap_sz());
+ TEST_ASSERT(!ret, __KVM_SYSCALL_ERROR("munmap()", ret));
+
+ ret = close(vcpu->fd);
+ TEST_ASSERT(!ret, __KVM_SYSCALL_ERROR("close()", ret));
list_del(&vcpu->list);
+
+ vcpu_arch_free(vcpu);
free(vcpu);
}
void kvm_vm_release(struct kvm_vm *vmp)
{
- struct vcpu *vcpu, *tmp;
+ struct kvm_vcpu *vcpu, *tmp;
int ret;
list_for_each_entry_safe(vcpu, tmp, &vmp->vcpus, list)
- vm_vcpu_rm(vcpu);
+ vm_vcpu_rm(vmp, vcpu);
ret = close(vmp->fd);
- TEST_ASSERT(ret == 0, "Close of vm fd failed,\n"
- " vmp->fd: %i rc: %i errno: %i", vmp->fd, ret, errno);
+ TEST_ASSERT(!ret, __KVM_SYSCALL_ERROR("close()", ret));
- close(vmp->kvm_fd);
- TEST_ASSERT(ret == 0, "Close of /dev/kvm fd failed,\n"
- " vmp->kvm_fd: %i rc: %i errno: %i", vmp->kvm_fd, ret, errno);
+ ret = close(vmp->kvm_fd);
+ TEST_ASSERT(!ret, __KVM_SYSCALL_ERROR("close()", ret));
}
static void __vm_mem_region_delete(struct kvm_vm *vm,
- struct userspace_mem_region *region)
+ struct userspace_mem_region *region,
+ bool unlink)
{
int ret;
- list_del(&region->list);
+ if (unlink) {
+ rb_erase(&region->gpa_node, &vm->regions.gpa_tree);
+ rb_erase(&region->hva_node, &vm->regions.hva_tree);
+ hash_del(&region->slot_node);
+ }
region->region.memory_size = 0;
- ret = ioctl(vm->fd, KVM_SET_USER_MEMORY_REGION, &region->region);
- TEST_ASSERT(ret == 0, "KVM_SET_USER_MEMORY_REGION IOCTL failed, "
- "rc: %i errno: %i", ret, errno);
+ vm_ioctl(vm, KVM_SET_USER_MEMORY_REGION2, &region->region);
sparsebit_free(&region->unused_phy_pages);
+ sparsebit_free(&region->protected_phy_pages);
ret = munmap(region->mmap_start, region->mmap_size);
- TEST_ASSERT(ret == 0, "munmap failed, rc: %i errno: %i", ret, errno);
+ TEST_ASSERT(!ret, __KVM_SYSCALL_ERROR("munmap()", ret));
+ if (region->fd >= 0) {
+ /* There's an extra map when using shared memory. */
+ ret = munmap(region->mmap_alias, region->mmap_size);
+ TEST_ASSERT(!ret, __KVM_SYSCALL_ERROR("munmap()", ret));
+ close(region->fd);
+ }
+ if (region->region.guest_memfd >= 0)
+ close(region->region.guest_memfd);
free(region);
}
@@ -464,14 +738,22 @@ static void __vm_mem_region_delete(struct kvm_vm *vm,
*/
void kvm_vm_free(struct kvm_vm *vmp)
{
- struct userspace_mem_region *region, *tmp;
+ int ctr;
+ struct hlist_node *node;
+ struct userspace_mem_region *region;
if (vmp == NULL)
return;
+ /* Free cached stats metadata and close FD */
+ if (vmp->stats_fd) {
+ free(vmp->stats_desc);
+ close(vmp->stats_fd);
+ }
+
/* Free userspace_mem_regions. */
- list_for_each_entry_safe(region, tmp, &vmp->userspace_mem_regions, list)
- __vm_mem_region_delete(vmp, region);
+ hash_for_each_safe(vmp->regions.slot_hash, ctr, node, region, slot_node)
+ __vm_mem_region_delete(vmp, region, false);
/* Free sparsebit arrays. */
sparsebit_free(&vmp->vpages_valid);
@@ -483,6 +765,26 @@ void kvm_vm_free(struct kvm_vm *vmp)
free(vmp);
}
+int kvm_memfd_alloc(size_t size, bool hugepages)
+{
+ int memfd_flags = MFD_CLOEXEC;
+ int fd, r;
+
+ if (hugepages)
+ memfd_flags |= MFD_HUGETLB;
+
+ fd = memfd_create("kvm_selftest", memfd_flags);
+ TEST_ASSERT(fd != -1, __KVM_SYSCALL_ERROR("memfd_create()", fd));
+
+ r = ftruncate(fd, size);
+ TEST_ASSERT(!r, __KVM_SYSCALL_ERROR("ftruncate()", r));
+
+ r = fallocate(fd, FALLOC_FL_PUNCH_HOLE | FALLOC_FL_KEEP_SIZE, 0, size);
+ TEST_ASSERT(!r, __KVM_SYSCALL_ERROR("fallocate()", r));
+
+ return fd;
+}
+
/*
* Memory Compare, host virtual to guest virtual
*
@@ -553,36 +855,119 @@ int kvm_memcmp_hva_gva(void *hva, struct kvm_vm *vm, vm_vaddr_t gva, size_t len)
return 0;
}
-/*
- * VM Userspace Memory Region Add
- *
- * Input Args:
- * vm - Virtual Machine
- * backing_src - Storage source for this region.
- * NULL to use anonymous memory.
- * guest_paddr - Starting guest physical address
- * slot - KVM region slot
- * npages - Number of physical pages
- * flags - KVM memory region flags (e.g. KVM_MEM_LOG_DIRTY_PAGES)
- *
- * Output Args: None
- *
- * Return: None
- *
- * Allocates a memory area of the number of pages specified by npages
- * and maps it to the VM specified by vm, at a starting physical address
- * given by guest_paddr. The region is created with a KVM region slot
- * given by slot, which must be unique and < KVM_MEM_SLOTS_NUM. The
- * region is created with the flags given by flags.
- */
-void vm_userspace_mem_region_add(struct kvm_vm *vm,
- enum vm_mem_backing_src_type src_type,
- uint64_t guest_paddr, uint32_t slot, uint64_t npages,
- uint32_t flags)
+static void vm_userspace_mem_region_gpa_insert(struct rb_root *gpa_tree,
+ struct userspace_mem_region *region)
+{
+ struct rb_node **cur, *parent;
+
+ for (cur = &gpa_tree->rb_node, parent = NULL; *cur; ) {
+ struct userspace_mem_region *cregion;
+
+ cregion = container_of(*cur, typeof(*cregion), gpa_node);
+ parent = *cur;
+ if (region->region.guest_phys_addr <
+ cregion->region.guest_phys_addr)
+ cur = &(*cur)->rb_left;
+ else {
+ TEST_ASSERT(region->region.guest_phys_addr !=
+ cregion->region.guest_phys_addr,
+ "Duplicate GPA in region tree");
+
+ cur = &(*cur)->rb_right;
+ }
+ }
+
+ rb_link_node(&region->gpa_node, parent, cur);
+ rb_insert_color(&region->gpa_node, gpa_tree);
+}
+
+static void vm_userspace_mem_region_hva_insert(struct rb_root *hva_tree,
+ struct userspace_mem_region *region)
+{
+ struct rb_node **cur, *parent;
+
+ for (cur = &hva_tree->rb_node, parent = NULL; *cur; ) {
+ struct userspace_mem_region *cregion;
+
+ cregion = container_of(*cur, typeof(*cregion), hva_node);
+ parent = *cur;
+ if (region->host_mem < cregion->host_mem)
+ cur = &(*cur)->rb_left;
+ else {
+ TEST_ASSERT(region->host_mem !=
+ cregion->host_mem,
+ "Duplicate HVA in region tree");
+
+ cur = &(*cur)->rb_right;
+ }
+ }
+
+ rb_link_node(&region->hva_node, parent, cur);
+ rb_insert_color(&region->hva_node, hva_tree);
+}
+
+
+int __vm_set_user_memory_region(struct kvm_vm *vm, uint32_t slot, uint32_t flags,
+ uint64_t gpa, uint64_t size, void *hva)
+{
+ struct kvm_userspace_memory_region region = {
+ .slot = slot,
+ .flags = flags,
+ .guest_phys_addr = gpa,
+ .memory_size = size,
+ .userspace_addr = (uintptr_t)hva,
+ };
+
+ return ioctl(vm->fd, KVM_SET_USER_MEMORY_REGION, &region);
+}
+
+void vm_set_user_memory_region(struct kvm_vm *vm, uint32_t slot, uint32_t flags,
+ uint64_t gpa, uint64_t size, void *hva)
+{
+ int ret = __vm_set_user_memory_region(vm, slot, flags, gpa, size, hva);
+
+ TEST_ASSERT(!ret, "KVM_SET_USER_MEMORY_REGION failed, errno = %d (%s)",
+ errno, strerror(errno));
+}
+
+int __vm_set_user_memory_region2(struct kvm_vm *vm, uint32_t slot, uint32_t flags,
+ uint64_t gpa, uint64_t size, void *hva,
+ uint32_t guest_memfd, uint64_t guest_memfd_offset)
+{
+ struct kvm_userspace_memory_region2 region = {
+ .slot = slot,
+ .flags = flags,
+ .guest_phys_addr = gpa,
+ .memory_size = size,
+ .userspace_addr = (uintptr_t)hva,
+ .guest_memfd = guest_memfd,
+ .guest_memfd_offset = guest_memfd_offset,
+ };
+
+ return ioctl(vm->fd, KVM_SET_USER_MEMORY_REGION2, &region);
+}
+
+void vm_set_user_memory_region2(struct kvm_vm *vm, uint32_t slot, uint32_t flags,
+ uint64_t gpa, uint64_t size, void *hva,
+ uint32_t guest_memfd, uint64_t guest_memfd_offset)
+{
+ int ret = __vm_set_user_memory_region2(vm, slot, flags, gpa, size, hva,
+ guest_memfd, guest_memfd_offset);
+
+ TEST_ASSERT(!ret, "KVM_SET_USER_MEMORY_REGION2 failed, errno = %d (%s)",
+ errno, strerror(errno));
+}
+
+
+/* FIXME: This thing needs to be ripped apart and rewritten. */
+void vm_mem_add(struct kvm_vm *vm, enum vm_mem_backing_src_type src_type,
+ uint64_t guest_paddr, uint32_t slot, uint64_t npages,
+ uint32_t flags, int guest_memfd, uint64_t guest_memfd_offset)
{
int ret;
struct userspace_mem_region *region;
- size_t huge_page_size = KVM_UTIL_PGS_PER_HUGEPG * vm->page_size;
+ size_t backing_src_pagesz = get_backing_src_pagesz(src_type);
+ size_t mem_size = npages * vm->page_size;
size_t alignment;
TEST_ASSERT(vm_adjust_num_guest_pages(vm->mode, npages) == npages,
@@ -617,7 +1002,8 @@ void vm_userspace_mem_region_add(struct kvm_vm *vm,
(uint64_t) region->region.memory_size);
/* Confirm no region with the requested slot already exists. */
- list_for_each_entry(region, &vm->userspace_mem_regions, list) {
+ hash_for_each_possible(vm->regions.slot_hash, region, slot_node,
+ slot) {
if (region->region.slot != slot)
continue;
@@ -634,7 +1020,7 @@ void vm_userspace_mem_region_add(struct kvm_vm *vm,
/* Allocate and initialize new mem region structure. */
region = calloc(1, sizeof(*region));
TEST_ASSERT(region != NULL, "Insufficient Memory");
- region->mmap_size = npages * vm->page_size;
+ region->mmap_size = mem_size;
#ifdef __s390x__
/* On s390x, the host address must be aligned to 1M (due to PGSTEs) */
@@ -643,37 +1029,79 @@ void vm_userspace_mem_region_add(struct kvm_vm *vm,
alignment = 1;
#endif
+ /*
+ * When using THP mmap is not guaranteed to returned a hugepage aligned
+ * address so we have to pad the mmap. Padding is not needed for HugeTLB
+ * because mmap will always return an address aligned to the HugeTLB
+ * page size.
+ */
if (src_type == VM_MEM_SRC_ANONYMOUS_THP)
- alignment = max(huge_page_size, alignment);
+ alignment = max(backing_src_pagesz, alignment);
+
+ TEST_ASSERT_EQ(guest_paddr, align_up(guest_paddr, backing_src_pagesz));
/* Add enough memory to align up if necessary */
if (alignment > 1)
region->mmap_size += alignment;
+ region->fd = -1;
+ if (backing_src_is_shared(src_type))
+ region->fd = kvm_memfd_alloc(region->mmap_size,
+ src_type == VM_MEM_SRC_SHARED_HUGETLB);
+
region->mmap_start = mmap(NULL, region->mmap_size,
PROT_READ | PROT_WRITE,
- MAP_PRIVATE | MAP_ANONYMOUS
- | (src_type == VM_MEM_SRC_ANONYMOUS_HUGETLB ? MAP_HUGETLB : 0),
- -1, 0);
+ vm_mem_backing_src_alias(src_type)->flag,
+ region->fd, 0);
TEST_ASSERT(region->mmap_start != MAP_FAILED,
- "test_malloc failed, mmap_start: %p errno: %i",
- region->mmap_start, errno);
+ __KVM_SYSCALL_ERROR("mmap()", (int)(unsigned long)MAP_FAILED));
+
+ TEST_ASSERT(!is_backing_src_hugetlb(src_type) ||
+ region->mmap_start == align_ptr_up(region->mmap_start, backing_src_pagesz),
+ "mmap_start %p is not aligned to HugeTLB page size 0x%lx",
+ region->mmap_start, backing_src_pagesz);
/* Align host address */
- region->host_mem = align(region->mmap_start, alignment);
+ region->host_mem = align_ptr_up(region->mmap_start, alignment);
/* As needed perform madvise */
- if (src_type == VM_MEM_SRC_ANONYMOUS || src_type == VM_MEM_SRC_ANONYMOUS_THP) {
- ret = madvise(region->host_mem, npages * vm->page_size,
- src_type == VM_MEM_SRC_ANONYMOUS ? MADV_NOHUGEPAGE : MADV_HUGEPAGE);
- TEST_ASSERT(ret == 0, "madvise failed,\n"
- " addr: %p\n"
- " length: 0x%lx\n"
- " src_type: %x",
- region->host_mem, npages * vm->page_size, src_type);
+ if ((src_type == VM_MEM_SRC_ANONYMOUS ||
+ src_type == VM_MEM_SRC_ANONYMOUS_THP) && thp_configured()) {
+ ret = madvise(region->host_mem, mem_size,
+ src_type == VM_MEM_SRC_ANONYMOUS ? MADV_NOHUGEPAGE : MADV_HUGEPAGE);
+ TEST_ASSERT(ret == 0, "madvise failed, addr: %p length: 0x%lx src_type: %s",
+ region->host_mem, mem_size,
+ vm_mem_backing_src_alias(src_type)->name);
+ }
+
+ region->backing_src_type = src_type;
+
+ if (flags & KVM_MEM_GUEST_MEMFD) {
+ if (guest_memfd < 0) {
+ uint32_t guest_memfd_flags = 0;
+ TEST_ASSERT(!guest_memfd_offset,
+ "Offset must be zero when creating new guest_memfd");
+ guest_memfd = vm_create_guest_memfd(vm, mem_size, guest_memfd_flags);
+ } else {
+ /*
+ * Install a unique fd for each memslot so that the fd
+ * can be closed when the region is deleted without
+ * needing to track if the fd is owned by the framework
+ * or by the caller.
+ */
+ guest_memfd = dup(guest_memfd);
+ TEST_ASSERT(guest_memfd >= 0, __KVM_SYSCALL_ERROR("dup()", guest_memfd));
+ }
+
+ region->region.guest_memfd = guest_memfd;
+ region->region.guest_memfd_offset = guest_memfd_offset;
+ } else {
+ region->region.guest_memfd = -1;
}
region->unused_phy_pages = sparsebit_alloc();
+ if (vm_arch_has_protected_memory(vm))
+ region->protected_phy_pages = sparsebit_alloc();
sparsebit_set_num(region->unused_phy_pages,
guest_paddr >> vm->page_shift, npages);
region->region.slot = slot;
@@ -681,16 +1109,40 @@ void vm_userspace_mem_region_add(struct kvm_vm *vm,
region->region.guest_phys_addr = guest_paddr;
region->region.memory_size = npages * vm->page_size;
region->region.userspace_addr = (uintptr_t) region->host_mem;
- ret = ioctl(vm->fd, KVM_SET_USER_MEMORY_REGION, &region->region);
- TEST_ASSERT(ret == 0, "KVM_SET_USER_MEMORY_REGION IOCTL failed,\n"
+ ret = __vm_ioctl(vm, KVM_SET_USER_MEMORY_REGION2, &region->region);
+ TEST_ASSERT(ret == 0, "KVM_SET_USER_MEMORY_REGION2 IOCTL failed,\n"
" rc: %i errno: %i\n"
" slot: %u flags: 0x%x\n"
- " guest_phys_addr: 0x%lx size: 0x%lx",
+ " guest_phys_addr: 0x%lx size: 0x%lx guest_memfd: %d",
ret, errno, slot, flags,
- guest_paddr, (uint64_t) region->region.memory_size);
+ guest_paddr, (uint64_t) region->region.memory_size,
+ region->region.guest_memfd);
+
+ /* Add to quick lookup data structures */
+ vm_userspace_mem_region_gpa_insert(&vm->regions.gpa_tree, region);
+ vm_userspace_mem_region_hva_insert(&vm->regions.hva_tree, region);
+ hash_add(vm->regions.slot_hash, &region->slot_node, slot);
+
+ /* If shared memory, create an alias. */
+ if (region->fd >= 0) {
+ region->mmap_alias = mmap(NULL, region->mmap_size,
+ PROT_READ | PROT_WRITE,
+ vm_mem_backing_src_alias(src_type)->flag,
+ region->fd, 0);
+ TEST_ASSERT(region->mmap_alias != MAP_FAILED,
+ __KVM_SYSCALL_ERROR("mmap()", (int)(unsigned long)MAP_FAILED));
+
+ /* Align host alias address */
+ region->host_alias = align_ptr_up(region->mmap_alias, alignment);
+ }
+}
- /* Add to linked-list of memory regions. */
- list_add(&region->list, &vm->userspace_mem_regions);
+void vm_userspace_mem_region_add(struct kvm_vm *vm,
+ enum vm_mem_backing_src_type src_type,
+ uint64_t guest_paddr, uint32_t slot,
+ uint64_t npages, uint32_t flags)
+{
+ vm_mem_add(vm, src_type, guest_paddr, slot, npages, flags, -1, 0);
}
/*
@@ -713,10 +1165,10 @@ memslot2region(struct kvm_vm *vm, uint32_t memslot)
{
struct userspace_mem_region *region;
- list_for_each_entry(region, &vm->userspace_mem_regions, list) {
+ hash_for_each_possible(vm->regions.slot_hash, region, slot_node,
+ memslot)
if (region->region.slot == memslot)
return region;
- }
fprintf(stderr, "No mem region with the requested slot found,\n"
" requested slot: %u\n", memslot);
@@ -749,9 +1201,9 @@ void vm_mem_region_set_flags(struct kvm_vm *vm, uint32_t slot, uint32_t flags)
region->region.flags = flags;
- ret = ioctl(vm->fd, KVM_SET_USER_MEMORY_REGION, &region->region);
+ ret = __vm_ioctl(vm, KVM_SET_USER_MEMORY_REGION2, &region->region);
- TEST_ASSERT(ret == 0, "KVM_SET_USER_MEMORY_REGION IOCTL failed,\n"
+ TEST_ASSERT(ret == 0, "KVM_SET_USER_MEMORY_REGION2 IOCTL failed,\n"
" rc: %i errno: %i slot: %u flags: 0x%x",
ret, errno, slot, flags);
}
@@ -779,9 +1231,9 @@ void vm_mem_region_move(struct kvm_vm *vm, uint32_t slot, uint64_t new_gpa)
region->region.guest_phys_addr = new_gpa;
- ret = ioctl(vm->fd, KVM_SET_USER_MEMORY_REGION, &region->region);
+ ret = __vm_ioctl(vm, KVM_SET_USER_MEMORY_REGION2, &region->region);
- TEST_ASSERT(!ret, "KVM_SET_USER_MEMORY_REGION failed\n"
+ TEST_ASSERT(!ret, "KVM_SET_USER_MEMORY_REGION2 failed\n"
"ret: %i errno: %i slot: %u new_gpa: 0x%lx",
ret, errno, slot, new_gpa);
}
@@ -801,85 +1253,97 @@ void vm_mem_region_move(struct kvm_vm *vm, uint32_t slot, uint64_t new_gpa)
*/
void vm_mem_region_delete(struct kvm_vm *vm, uint32_t slot)
{
- __vm_mem_region_delete(vm, memslot2region(vm, slot));
+ __vm_mem_region_delete(vm, memslot2region(vm, slot), true);
}
-/*
- * VCPU mmap Size
- *
- * Input Args: None
- *
- * Output Args: None
- *
- * Return:
- * Size of VCPU state
- *
- * Returns the size of the structure pointed to by the return value
- * of vcpu_state().
- */
+void vm_guest_mem_fallocate(struct kvm_vm *vm, uint64_t base, uint64_t size,
+ bool punch_hole)
+{
+ const int mode = FALLOC_FL_KEEP_SIZE | (punch_hole ? FALLOC_FL_PUNCH_HOLE : 0);
+ struct userspace_mem_region *region;
+ uint64_t end = base + size;
+ uint64_t gpa, len;
+ off_t fd_offset;
+ int ret;
+
+ for (gpa = base; gpa < end; gpa += len) {
+ uint64_t offset;
+
+ region = userspace_mem_region_find(vm, gpa, gpa);
+ TEST_ASSERT(region && region->region.flags & KVM_MEM_GUEST_MEMFD,
+ "Private memory region not found for GPA 0x%lx", gpa);
+
+ offset = gpa - region->region.guest_phys_addr;
+ fd_offset = region->region.guest_memfd_offset + offset;
+ len = min_t(uint64_t, end - gpa, region->region.memory_size - offset);
+
+ ret = fallocate(region->region.guest_memfd, mode, fd_offset, len);
+ TEST_ASSERT(!ret, "fallocate() failed to %s at %lx (len = %lu), fd = %d, mode = %x, offset = %lx",
+ punch_hole ? "punch hole" : "allocate", gpa, len,
+ region->region.guest_memfd, mode, fd_offset);
+ }
+}
+
+/* Returns the size of a vCPU's kvm_run structure. */
static int vcpu_mmap_sz(void)
{
int dev_fd, ret;
- dev_fd = open(KVM_DEV_PATH, O_RDONLY);
- if (dev_fd < 0)
- exit(KSFT_SKIP);
+ dev_fd = open_kvm_dev_path_or_exit();
ret = ioctl(dev_fd, KVM_GET_VCPU_MMAP_SIZE, NULL);
TEST_ASSERT(ret >= sizeof(struct kvm_run),
- "%s KVM_GET_VCPU_MMAP_SIZE ioctl failed, rc: %i errno: %i",
- __func__, ret, errno);
+ KVM_IOCTL_ERROR(KVM_GET_VCPU_MMAP_SIZE, ret));
close(dev_fd);
return ret;
}
+static bool vcpu_exists(struct kvm_vm *vm, uint32_t vcpu_id)
+{
+ struct kvm_vcpu *vcpu;
+
+ list_for_each_entry(vcpu, &vm->vcpus, list) {
+ if (vcpu->id == vcpu_id)
+ return true;
+ }
+
+ return false;
+}
+
/*
- * VM VCPU Add
- *
- * Input Args:
- * vm - Virtual Machine
- * vcpuid - VCPU ID
- *
- * Output Args: None
- *
- * Return: None
- *
- * Adds a virtual CPU to the VM specified by vm with the ID given by vcpuid.
- * No additional VCPU setup is done.
+ * Adds a virtual CPU to the VM specified by vm with the ID given by vcpu_id.
+ * No additional vCPU setup is done. Returns the vCPU.
*/
-void vm_vcpu_add(struct kvm_vm *vm, uint32_t vcpuid)
+struct kvm_vcpu *__vm_vcpu_add(struct kvm_vm *vm, uint32_t vcpu_id)
{
- struct vcpu *vcpu;
+ struct kvm_vcpu *vcpu;
/* Confirm a vcpu with the specified id doesn't already exist. */
- vcpu = vcpu_find(vm, vcpuid);
- if (vcpu != NULL)
- TEST_FAIL("vcpu with the specified id "
- "already exists,\n"
- " requested vcpuid: %u\n"
- " existing vcpuid: %u state: %p",
- vcpuid, vcpu->id, vcpu->state);
+ TEST_ASSERT(!vcpu_exists(vm, vcpu_id), "vCPU%d already exists", vcpu_id);
/* Allocate and initialize new vcpu structure. */
vcpu = calloc(1, sizeof(*vcpu));
TEST_ASSERT(vcpu != NULL, "Insufficient Memory");
- vcpu->id = vcpuid;
- vcpu->fd = ioctl(vm->fd, KVM_CREATE_VCPU, vcpuid);
- TEST_ASSERT(vcpu->fd >= 0, "KVM_CREATE_VCPU failed, rc: %i errno: %i",
- vcpu->fd, errno);
- TEST_ASSERT(vcpu_mmap_sz() >= sizeof(*vcpu->state), "vcpu mmap size "
+ vcpu->vm = vm;
+ vcpu->id = vcpu_id;
+ vcpu->fd = __vm_ioctl(vm, KVM_CREATE_VCPU, (void *)(unsigned long)vcpu_id);
+ TEST_ASSERT_VM_VCPU_IOCTL(vcpu->fd >= 0, KVM_CREATE_VCPU, vcpu->fd, vm);
+
+ TEST_ASSERT(vcpu_mmap_sz() >= sizeof(*vcpu->run), "vcpu mmap size "
"smaller than expected, vcpu_mmap_sz: %i expected_min: %zi",
- vcpu_mmap_sz(), sizeof(*vcpu->state));
- vcpu->state = (struct kvm_run *) mmap(NULL, sizeof(*vcpu->state),
+ vcpu_mmap_sz(), sizeof(*vcpu->run));
+ vcpu->run = (struct kvm_run *) mmap(NULL, vcpu_mmap_sz(),
PROT_READ | PROT_WRITE, MAP_SHARED, vcpu->fd, 0);
- TEST_ASSERT(vcpu->state != MAP_FAILED, "mmap vcpu_state failed, "
- "vcpu id: %u errno: %i", vcpuid, errno);
+ TEST_ASSERT(vcpu->run != MAP_FAILED,
+ __KVM_SYSCALL_ERROR("mmap()", (int)(unsigned long)MAP_FAILED));
/* Add to linked-list of VCPUs. */
list_add(&vcpu->list, &vm->vcpus);
+
+ return vcpu;
}
/*
@@ -902,8 +1366,8 @@ void vm_vcpu_add(struct kvm_vm *vm, uint32_t vcpuid)
* TEST_ASSERT failure occurs for invalid input or no area of at least
* sz unallocated bytes >= vaddr_min is available.
*/
-static vm_vaddr_t vm_vaddr_unused_gap(struct kvm_vm *vm, size_t sz,
- vm_vaddr_t vaddr_min)
+vm_vaddr_t vm_vaddr_unused_gap(struct kvm_vm *vm, size_t sz,
+ vm_vaddr_t vaddr_min)
{
uint64_t pages = (sz + vm->page_size - 1) >> vm->page_shift;
@@ -968,6 +1432,50 @@ va_found:
return pgidx_start * vm->page_size;
}
+static vm_vaddr_t ____vm_vaddr_alloc(struct kvm_vm *vm, size_t sz,
+ vm_vaddr_t vaddr_min,
+ enum kvm_mem_region_type type,
+ bool protected)
+{
+ uint64_t pages = (sz >> vm->page_shift) + ((sz % vm->page_size) != 0);
+
+ virt_pgd_alloc(vm);
+ vm_paddr_t paddr = __vm_phy_pages_alloc(vm, pages,
+ KVM_UTIL_MIN_PFN * vm->page_size,
+ vm->memslots[type], protected);
+
+ /*
+ * Find an unused range of virtual page addresses of at least
+ * pages in length.
+ */
+ vm_vaddr_t vaddr_start = vm_vaddr_unused_gap(vm, sz, vaddr_min);
+
+ /* Map the virtual pages. */
+ for (vm_vaddr_t vaddr = vaddr_start; pages > 0;
+ pages--, vaddr += vm->page_size, paddr += vm->page_size) {
+
+ virt_pg_map(vm, vaddr, paddr);
+
+ sparsebit_set(vm->vpages_mapped, vaddr >> vm->page_shift);
+ }
+
+ return vaddr_start;
+}
+
+vm_vaddr_t __vm_vaddr_alloc(struct kvm_vm *vm, size_t sz, vm_vaddr_t vaddr_min,
+ enum kvm_mem_region_type type)
+{
+ return ____vm_vaddr_alloc(vm, sz, vaddr_min, type,
+ vm_arch_has_protected_memory(vm));
+}
+
+vm_vaddr_t vm_vaddr_alloc_shared(struct kvm_vm *vm, size_t sz,
+ vm_vaddr_t vaddr_min,
+ enum kvm_mem_region_type type)
+{
+ return ____vm_vaddr_alloc(vm, sz, vaddr_min, type, false);
+}
+
/*
* VM Virtual Address Allocate
*
@@ -975,8 +1483,6 @@ va_found:
* vm - Virtual Machine
* sz - Size in bytes
* vaddr_min - Minimum starting virtual address
- * data_memslot - Memory region slot for data pages
- * pgd_memslot - Memory region slot for new virtual translation tables
*
* Output Args: None
*
@@ -987,36 +1493,54 @@ va_found:
* given by vm. The allocated bytes are mapped to a virtual address >=
* the address given by vaddr_min. Note that each allocation uses a
* a unique set of pages, with the minimum real allocation being at least
- * a page.
+ * a page. The allocated physical space comes from the TEST_DATA memory region.
*/
-vm_vaddr_t vm_vaddr_alloc(struct kvm_vm *vm, size_t sz, vm_vaddr_t vaddr_min,
- uint32_t data_memslot, uint32_t pgd_memslot)
+vm_vaddr_t vm_vaddr_alloc(struct kvm_vm *vm, size_t sz, vm_vaddr_t vaddr_min)
{
- uint64_t pages = (sz >> vm->page_shift) + ((sz % vm->page_size) != 0);
-
- virt_pgd_alloc(vm, pgd_memslot);
-
- /*
- * Find an unused range of virtual page addresses of at least
- * pages in length.
- */
- vm_vaddr_t vaddr_start = vm_vaddr_unused_gap(vm, sz, vaddr_min);
-
- /* Map the virtual pages. */
- for (vm_vaddr_t vaddr = vaddr_start; pages > 0;
- pages--, vaddr += vm->page_size) {
- vm_paddr_t paddr;
-
- paddr = vm_phy_page_alloc(vm,
- KVM_UTIL_MIN_PFN * vm->page_size, data_memslot);
+ return __vm_vaddr_alloc(vm, sz, vaddr_min, MEM_REGION_TEST_DATA);
+}
- virt_pg_map(vm, vaddr, paddr, pgd_memslot);
+/*
+ * VM Virtual Address Allocate Pages
+ *
+ * Input Args:
+ * vm - Virtual Machine
+ *
+ * Output Args: None
+ *
+ * Return:
+ * Starting guest virtual address
+ *
+ * Allocates at least N system pages worth of bytes within the virtual address
+ * space of the vm.
+ */
+vm_vaddr_t vm_vaddr_alloc_pages(struct kvm_vm *vm, int nr_pages)
+{
+ return vm_vaddr_alloc(vm, nr_pages * getpagesize(), KVM_UTIL_MIN_VADDR);
+}
- sparsebit_set(vm->vpages_mapped,
- vaddr >> vm->page_shift);
- }
+vm_vaddr_t __vm_vaddr_alloc_page(struct kvm_vm *vm, enum kvm_mem_region_type type)
+{
+ return __vm_vaddr_alloc(vm, getpagesize(), KVM_UTIL_MIN_VADDR, type);
+}
- return vaddr_start;
+/*
+ * VM Virtual Address Allocate Page
+ *
+ * Input Args:
+ * vm - Virtual Machine
+ *
+ * Output Args: None
+ *
+ * Return:
+ * Starting guest virtual address
+ *
+ * Allocates at least one system page worth of bytes within the virtual address
+ * space of the vm.
+ */
+vm_vaddr_t vm_vaddr_alloc_page(struct kvm_vm *vm)
+{
+ return vm_vaddr_alloc_pages(vm, 1);
}
/*
@@ -1027,7 +1551,6 @@ vm_vaddr_t vm_vaddr_alloc(struct kvm_vm *vm, size_t sz, vm_vaddr_t vaddr_min,
* vaddr - Virtuall address to map
* paddr - VM Physical Address
* npages - The number of pages to map
- * pgd_memslot - Memory region slot for new virtual translation tables
*
* Output Args: None
*
@@ -1037,7 +1560,7 @@ vm_vaddr_t vm_vaddr_alloc(struct kvm_vm *vm, size_t sz, vm_vaddr_t vaddr_min,
* @npages starting at @vaddr to the page range starting at @paddr.
*/
void virt_map(struct kvm_vm *vm, uint64_t vaddr, uint64_t paddr,
- unsigned int npages, uint32_t pgd_memslot)
+ unsigned int npages)
{
size_t page_size = vm->page_size;
size_t size = npages * page_size;
@@ -1046,7 +1569,9 @@ void virt_map(struct kvm_vm *vm, uint64_t vaddr, uint64_t paddr,
TEST_ASSERT(paddr + size > paddr, "Paddr overflow");
while (npages--) {
- virt_pg_map(vm, vaddr, paddr, pgd_memslot);
+ virt_pg_map(vm, vaddr, paddr);
+ sparsebit_set(vm->vpages_mapped, vaddr >> vm->page_shift);
+
vaddr += page_size;
paddr += page_size;
}
@@ -1073,16 +1598,16 @@ void *addr_gpa2hva(struct kvm_vm *vm, vm_paddr_t gpa)
{
struct userspace_mem_region *region;
- list_for_each_entry(region, &vm->userspace_mem_regions, list) {
- if ((gpa >= region->region.guest_phys_addr)
- && (gpa <= (region->region.guest_phys_addr
- + region->region.memory_size - 1)))
- return (void *) ((uintptr_t) region->host_mem
- + (gpa - region->region.guest_phys_addr));
+ gpa = vm_untag_gpa(vm, gpa);
+
+ region = userspace_mem_region_find(vm, gpa, gpa);
+ if (!region) {
+ TEST_FAIL("No vm physical memory at 0x%lx", gpa);
+ return NULL;
}
- TEST_FAIL("No vm physical memory at 0x%lx", gpa);
- return NULL;
+ return (void *)((uintptr_t)region->host_mem
+ + (gpa - region->region.guest_phys_addr));
}
/*
@@ -1104,15 +1629,22 @@ void *addr_gpa2hva(struct kvm_vm *vm, vm_paddr_t gpa)
*/
vm_paddr_t addr_hva2gpa(struct kvm_vm *vm, void *hva)
{
- struct userspace_mem_region *region;
+ struct rb_node *node;
+
+ for (node = vm->regions.hva_tree.rb_node; node; ) {
+ struct userspace_mem_region *region =
+ container_of(node, struct userspace_mem_region, hva_node);
- list_for_each_entry(region, &vm->userspace_mem_regions, list) {
- if ((hva >= region->host_mem)
- && (hva <= (region->host_mem
- + region->region.memory_size - 1)))
- return (vm_paddr_t) ((uintptr_t)
- region->region.guest_phys_addr
- + (hva - (uintptr_t) region->host_mem));
+ if (hva >= region->host_mem) {
+ if (hva <= (region->host_mem
+ + region->region.memory_size - 1))
+ return (vm_paddr_t)((uintptr_t)
+ region->region.guest_phys_addr
+ + (hva - (uintptr_t)region->host_mem));
+
+ node = node->rb_right;
+ } else
+ node = node->rb_left;
}
TEST_FAIL("No mapping to a guest physical address, hva: %p", hva);
@@ -1120,402 +1652,265 @@ vm_paddr_t addr_hva2gpa(struct kvm_vm *vm, void *hva)
}
/*
- * VM Create IRQ Chip
+ * Address VM physical to Host Virtual *alias*.
*
* Input Args:
* vm - Virtual Machine
+ * gpa - VM physical address
*
* Output Args: None
*
- * Return: None
- *
- * Creates an interrupt controller chip for the VM specified by vm.
+ * Return:
+ * Equivalent address within the host virtual *alias* area, or NULL
+ * (without failing the test) if the guest memory is not shared (so
+ * no alias exists).
+ *
+ * Create a writable, shared virtual=>physical alias for the specific GPA.
+ * The primary use case is to allow the host selftest to manipulate guest
+ * memory without mapping said memory in the guest's address space. And, for
+ * userfaultfd-based demand paging, to do so without triggering userfaults.
*/
-void vm_create_irqchip(struct kvm_vm *vm)
+void *addr_gpa2alias(struct kvm_vm *vm, vm_paddr_t gpa)
{
- int ret;
-
- ret = ioctl(vm->fd, KVM_CREATE_IRQCHIP, 0);
- TEST_ASSERT(ret == 0, "KVM_CREATE_IRQCHIP IOCTL failed, "
- "rc: %i errno: %i", ret, errno);
+ struct userspace_mem_region *region;
+ uintptr_t offset;
- vm->has_irqchip = true;
-}
+ region = userspace_mem_region_find(vm, gpa, gpa);
+ if (!region)
+ return NULL;
-/*
- * VM VCPU State
- *
- * Input Args:
- * vm - Virtual Machine
- * vcpuid - VCPU ID
- *
- * Output Args: None
- *
- * Return:
- * Pointer to structure that describes the state of the VCPU.
- *
- * Locates and returns a pointer to a structure that describes the
- * state of the VCPU with the given vcpuid.
- */
-struct kvm_run *vcpu_state(struct kvm_vm *vm, uint32_t vcpuid)
-{
- struct vcpu *vcpu = vcpu_find(vm, vcpuid);
- TEST_ASSERT(vcpu != NULL, "vcpu not found, vcpuid: %u", vcpuid);
+ if (!region->host_alias)
+ return NULL;
- return vcpu->state;
+ offset = gpa - region->region.guest_phys_addr;
+ return (void *) ((uintptr_t) region->host_alias + offset);
}
-/*
- * VM VCPU Run
- *
- * Input Args:
- * vm - Virtual Machine
- * vcpuid - VCPU ID
- *
- * Output Args: None
- *
- * Return: None
- *
- * Switch to executing the code for the VCPU given by vcpuid, within the VM
- * given by vm.
- */
-void vcpu_run(struct kvm_vm *vm, uint32_t vcpuid)
+/* Create an interrupt controller chip for the specified VM. */
+void vm_create_irqchip(struct kvm_vm *vm)
{
- int ret = _vcpu_run(vm, vcpuid);
- TEST_ASSERT(ret == 0, "KVM_RUN IOCTL failed, "
- "rc: %i errno: %i", ret, errno);
+ vm_ioctl(vm, KVM_CREATE_IRQCHIP, NULL);
+
+ vm->has_irqchip = true;
}
-int _vcpu_run(struct kvm_vm *vm, uint32_t vcpuid)
+int _vcpu_run(struct kvm_vcpu *vcpu)
{
- struct vcpu *vcpu = vcpu_find(vm, vcpuid);
int rc;
- TEST_ASSERT(vcpu != NULL, "vcpu not found, vcpuid: %u", vcpuid);
do {
- rc = ioctl(vcpu->fd, KVM_RUN, NULL);
+ rc = __vcpu_run(vcpu);
} while (rc == -1 && errno == EINTR);
- return rc;
-}
-
-void vcpu_run_complete_io(struct kvm_vm *vm, uint32_t vcpuid)
-{
- struct vcpu *vcpu = vcpu_find(vm, vcpuid);
- int ret;
-
- TEST_ASSERT(vcpu != NULL, "vcpu not found, vcpuid: %u", vcpuid);
-
- vcpu->state->immediate_exit = 1;
- ret = ioctl(vcpu->fd, KVM_RUN, NULL);
- vcpu->state->immediate_exit = 0;
-
- TEST_ASSERT(ret == -1 && errno == EINTR,
- "KVM_RUN IOCTL didn't exit immediately, rc: %i, errno: %i",
- ret, errno);
-}
-void vcpu_set_guest_debug(struct kvm_vm *vm, uint32_t vcpuid,
- struct kvm_guest_debug *debug)
-{
- struct vcpu *vcpu = vcpu_find(vm, vcpuid);
- int ret = ioctl(vcpu->fd, KVM_SET_GUEST_DEBUG, debug);
+ assert_on_unhandled_exception(vcpu);
- TEST_ASSERT(ret == 0, "KVM_SET_GUEST_DEBUG failed: %d", ret);
+ return rc;
}
/*
- * VM VCPU Set MP State
- *
- * Input Args:
- * vm - Virtual Machine
- * vcpuid - VCPU ID
- * mp_state - mp_state to be set
- *
- * Output Args: None
- *
- * Return: None
- *
- * Sets the MP state of the VCPU given by vcpuid, to the state given
- * by mp_state.
+ * Invoke KVM_RUN on a vCPU until KVM returns something other than -EINTR.
+ * Assert if the KVM returns an error (other than -EINTR).
*/
-void vcpu_set_mp_state(struct kvm_vm *vm, uint32_t vcpuid,
- struct kvm_mp_state *mp_state)
+void vcpu_run(struct kvm_vcpu *vcpu)
{
- struct vcpu *vcpu = vcpu_find(vm, vcpuid);
- int ret;
-
- TEST_ASSERT(vcpu != NULL, "vcpu not found, vcpuid: %u", vcpuid);
+ int ret = _vcpu_run(vcpu);
- ret = ioctl(vcpu->fd, KVM_SET_MP_STATE, mp_state);
- TEST_ASSERT(ret == 0, "KVM_SET_MP_STATE IOCTL failed, "
- "rc: %i errno: %i", ret, errno);
+ TEST_ASSERT(!ret, KVM_IOCTL_ERROR(KVM_RUN, ret));
}
-/*
- * VM VCPU Regs Get
- *
- * Input Args:
- * vm - Virtual Machine
- * vcpuid - VCPU ID
- *
- * Output Args:
- * regs - current state of VCPU regs
- *
- * Return: None
- *
- * Obtains the current register state for the VCPU specified by vcpuid
- * and stores it at the location given by regs.
- */
-void vcpu_regs_get(struct kvm_vm *vm, uint32_t vcpuid, struct kvm_regs *regs)
+void vcpu_run_complete_io(struct kvm_vcpu *vcpu)
{
- struct vcpu *vcpu = vcpu_find(vm, vcpuid);
int ret;
- TEST_ASSERT(vcpu != NULL, "vcpu not found, vcpuid: %u", vcpuid);
+ vcpu->run->immediate_exit = 1;
+ ret = __vcpu_run(vcpu);
+ vcpu->run->immediate_exit = 0;
- ret = ioctl(vcpu->fd, KVM_GET_REGS, regs);
- TEST_ASSERT(ret == 0, "KVM_GET_REGS failed, rc: %i errno: %i",
- ret, errno);
+ TEST_ASSERT(ret == -1 && errno == EINTR,
+ "KVM_RUN IOCTL didn't exit immediately, rc: %i, errno: %i",
+ ret, errno);
}
/*
- * VM VCPU Regs Set
- *
- * Input Args:
- * vm - Virtual Machine
- * vcpuid - VCPU ID
- * regs - Values to set VCPU regs to
- *
- * Output Args: None
- *
- * Return: None
- *
- * Sets the regs of the VCPU specified by vcpuid to the values
- * given by regs.
+ * Get the list of guest registers which are supported for
+ * KVM_GET_ONE_REG/KVM_SET_ONE_REG ioctls. Returns a kvm_reg_list pointer,
+ * it is the caller's responsibility to free the list.
*/
-void vcpu_regs_set(struct kvm_vm *vm, uint32_t vcpuid, struct kvm_regs *regs)
+struct kvm_reg_list *vcpu_get_reg_list(struct kvm_vcpu *vcpu)
{
- struct vcpu *vcpu = vcpu_find(vm, vcpuid);
+ struct kvm_reg_list reg_list_n = { .n = 0 }, *reg_list;
int ret;
- TEST_ASSERT(vcpu != NULL, "vcpu not found, vcpuid: %u", vcpuid);
+ ret = __vcpu_ioctl(vcpu, KVM_GET_REG_LIST, &reg_list_n);
+ TEST_ASSERT(ret == -1 && errno == E2BIG, "KVM_GET_REG_LIST n=0");
- ret = ioctl(vcpu->fd, KVM_SET_REGS, regs);
- TEST_ASSERT(ret == 0, "KVM_SET_REGS failed, rc: %i errno: %i",
- ret, errno);
+ reg_list = calloc(1, sizeof(*reg_list) + reg_list_n.n * sizeof(__u64));
+ reg_list->n = reg_list_n.n;
+ vcpu_ioctl(vcpu, KVM_GET_REG_LIST, reg_list);
+ return reg_list;
}
-#ifdef __KVM_HAVE_VCPU_EVENTS
-void vcpu_events_get(struct kvm_vm *vm, uint32_t vcpuid,
- struct kvm_vcpu_events *events)
+void *vcpu_map_dirty_ring(struct kvm_vcpu *vcpu)
{
- struct vcpu *vcpu = vcpu_find(vm, vcpuid);
- int ret;
+ uint32_t page_size = getpagesize();
+ uint32_t size = vcpu->vm->dirty_ring_size;
- TEST_ASSERT(vcpu != NULL, "vcpu not found, vcpuid: %u", vcpuid);
+ TEST_ASSERT(size > 0, "Should enable dirty ring first");
- ret = ioctl(vcpu->fd, KVM_GET_VCPU_EVENTS, events);
- TEST_ASSERT(ret == 0, "KVM_GET_VCPU_EVENTS, failed, rc: %i errno: %i",
- ret, errno);
-}
+ if (!vcpu->dirty_gfns) {
+ void *addr;
-void vcpu_events_set(struct kvm_vm *vm, uint32_t vcpuid,
- struct kvm_vcpu_events *events)
-{
- struct vcpu *vcpu = vcpu_find(vm, vcpuid);
- int ret;
+ addr = mmap(NULL, size, PROT_READ, MAP_PRIVATE, vcpu->fd,
+ page_size * KVM_DIRTY_LOG_PAGE_OFFSET);
+ TEST_ASSERT(addr == MAP_FAILED, "Dirty ring mapped private");
- TEST_ASSERT(vcpu != NULL, "vcpu not found, vcpuid: %u", vcpuid);
+ addr = mmap(NULL, size, PROT_READ | PROT_EXEC, MAP_PRIVATE, vcpu->fd,
+ page_size * KVM_DIRTY_LOG_PAGE_OFFSET);
+ TEST_ASSERT(addr == MAP_FAILED, "Dirty ring mapped exec");
- ret = ioctl(vcpu->fd, KVM_SET_VCPU_EVENTS, events);
- TEST_ASSERT(ret == 0, "KVM_SET_VCPU_EVENTS, failed, rc: %i errno: %i",
- ret, errno);
-}
-#endif
-
-#ifdef __x86_64__
-void vcpu_nested_state_get(struct kvm_vm *vm, uint32_t vcpuid,
- struct kvm_nested_state *state)
-{
- struct vcpu *vcpu = vcpu_find(vm, vcpuid);
- int ret;
+ addr = mmap(NULL, size, PROT_READ | PROT_WRITE, MAP_SHARED, vcpu->fd,
+ page_size * KVM_DIRTY_LOG_PAGE_OFFSET);
+ TEST_ASSERT(addr != MAP_FAILED, "Dirty ring map failed");
- TEST_ASSERT(vcpu != NULL, "vcpu not found, vcpuid: %u", vcpuid);
-
- ret = ioctl(vcpu->fd, KVM_GET_NESTED_STATE, state);
- TEST_ASSERT(ret == 0,
- "KVM_SET_NESTED_STATE failed, ret: %i errno: %i",
- ret, errno);
-}
-
-int vcpu_nested_state_set(struct kvm_vm *vm, uint32_t vcpuid,
- struct kvm_nested_state *state, bool ignore_error)
-{
- struct vcpu *vcpu = vcpu_find(vm, vcpuid);
- int ret;
-
- TEST_ASSERT(vcpu != NULL, "vcpu not found, vcpuid: %u", vcpuid);
-
- ret = ioctl(vcpu->fd, KVM_SET_NESTED_STATE, state);
- if (!ignore_error) {
- TEST_ASSERT(ret == 0,
- "KVM_SET_NESTED_STATE failed, ret: %i errno: %i",
- ret, errno);
+ vcpu->dirty_gfns = addr;
+ vcpu->dirty_gfns_count = size / sizeof(struct kvm_dirty_gfn);
}
- return ret;
+ return vcpu->dirty_gfns;
}
-#endif
/*
- * VM VCPU System Regs Get
- *
- * Input Args:
- * vm - Virtual Machine
- * vcpuid - VCPU ID
- *
- * Output Args:
- * sregs - current state of VCPU system regs
- *
- * Return: None
- *
- * Obtains the current system register state for the VCPU specified by
- * vcpuid and stores it at the location given by sregs.
+ * Device Ioctl
*/
-void vcpu_sregs_get(struct kvm_vm *vm, uint32_t vcpuid, struct kvm_sregs *sregs)
-{
- struct vcpu *vcpu = vcpu_find(vm, vcpuid);
- int ret;
- TEST_ASSERT(vcpu != NULL, "vcpu not found, vcpuid: %u", vcpuid);
+int __kvm_has_device_attr(int dev_fd, uint32_t group, uint64_t attr)
+{
+ struct kvm_device_attr attribute = {
+ .group = group,
+ .attr = attr,
+ .flags = 0,
+ };
- ret = ioctl(vcpu->fd, KVM_GET_SREGS, sregs);
- TEST_ASSERT(ret == 0, "KVM_GET_SREGS failed, rc: %i errno: %i",
- ret, errno);
+ return ioctl(dev_fd, KVM_HAS_DEVICE_ATTR, &attribute);
}
-/*
- * VM VCPU System Regs Set
- *
- * Input Args:
- * vm - Virtual Machine
- * vcpuid - VCPU ID
- * sregs - Values to set VCPU system regs to
- *
- * Output Args: None
- *
- * Return: None
- *
- * Sets the system regs of the VCPU specified by vcpuid to the values
- * given by sregs.
- */
-void vcpu_sregs_set(struct kvm_vm *vm, uint32_t vcpuid, struct kvm_sregs *sregs)
+int __kvm_test_create_device(struct kvm_vm *vm, uint64_t type)
{
- int ret = _vcpu_sregs_set(vm, vcpuid, sregs);
- TEST_ASSERT(ret == 0, "KVM_RUN IOCTL failed, "
- "rc: %i errno: %i", ret, errno);
+ struct kvm_create_device create_dev = {
+ .type = type,
+ .flags = KVM_CREATE_DEVICE_TEST,
+ };
+
+ return __vm_ioctl(vm, KVM_CREATE_DEVICE, &create_dev);
}
-int _vcpu_sregs_set(struct kvm_vm *vm, uint32_t vcpuid, struct kvm_sregs *sregs)
+int __kvm_create_device(struct kvm_vm *vm, uint64_t type)
{
- struct vcpu *vcpu = vcpu_find(vm, vcpuid);
+ struct kvm_create_device create_dev = {
+ .type = type,
+ .fd = -1,
+ .flags = 0,
+ };
+ int err;
- TEST_ASSERT(vcpu != NULL, "vcpu not found, vcpuid: %u", vcpuid);
+ err = __vm_ioctl(vm, KVM_CREATE_DEVICE, &create_dev);
+ TEST_ASSERT(err <= 0, "KVM_CREATE_DEVICE shouldn't return a positive value");
+ return err ? : create_dev.fd;
+}
- return ioctl(vcpu->fd, KVM_SET_SREGS, sregs);
+int __kvm_device_attr_get(int dev_fd, uint32_t group, uint64_t attr, void *val)
+{
+ struct kvm_device_attr kvmattr = {
+ .group = group,
+ .attr = attr,
+ .flags = 0,
+ .addr = (uintptr_t)val,
+ };
+
+ return __kvm_ioctl(dev_fd, KVM_GET_DEVICE_ATTR, &kvmattr);
}
-void vcpu_fpu_get(struct kvm_vm *vm, uint32_t vcpuid, struct kvm_fpu *fpu)
+int __kvm_device_attr_set(int dev_fd, uint32_t group, uint64_t attr, void *val)
{
- int ret;
+ struct kvm_device_attr kvmattr = {
+ .group = group,
+ .attr = attr,
+ .flags = 0,
+ .addr = (uintptr_t)val,
+ };
- ret = _vcpu_ioctl(vm, vcpuid, KVM_GET_FPU, fpu);
- TEST_ASSERT(ret == 0, "KVM_GET_FPU failed, rc: %i errno: %i (%s)",
- ret, errno, strerror(errno));
+ return __kvm_ioctl(dev_fd, KVM_SET_DEVICE_ATTR, &kvmattr);
}
-void vcpu_fpu_set(struct kvm_vm *vm, uint32_t vcpuid, struct kvm_fpu *fpu)
+/*
+ * IRQ related functions.
+ */
+
+int _kvm_irq_line(struct kvm_vm *vm, uint32_t irq, int level)
{
- int ret;
+ struct kvm_irq_level irq_level = {
+ .irq = irq,
+ .level = level,
+ };
- ret = _vcpu_ioctl(vm, vcpuid, KVM_SET_FPU, fpu);
- TEST_ASSERT(ret == 0, "KVM_SET_FPU failed, rc: %i errno: %i (%s)",
- ret, errno, strerror(errno));
+ return __vm_ioctl(vm, KVM_IRQ_LINE, &irq_level);
}
-void vcpu_get_reg(struct kvm_vm *vm, uint32_t vcpuid, struct kvm_one_reg *reg)
+void kvm_irq_line(struct kvm_vm *vm, uint32_t irq, int level)
{
- int ret;
+ int ret = _kvm_irq_line(vm, irq, level);
- ret = _vcpu_ioctl(vm, vcpuid, KVM_GET_ONE_REG, reg);
- TEST_ASSERT(ret == 0, "KVM_GET_ONE_REG failed, rc: %i errno: %i (%s)",
- ret, errno, strerror(errno));
+ TEST_ASSERT(ret >= 0, KVM_IOCTL_ERROR(KVM_IRQ_LINE, ret));
}
-void vcpu_set_reg(struct kvm_vm *vm, uint32_t vcpuid, struct kvm_one_reg *reg)
+struct kvm_irq_routing *kvm_gsi_routing_create(void)
{
- int ret;
+ struct kvm_irq_routing *routing;
+ size_t size;
- ret = _vcpu_ioctl(vm, vcpuid, KVM_SET_ONE_REG, reg);
- TEST_ASSERT(ret == 0, "KVM_SET_ONE_REG failed, rc: %i errno: %i (%s)",
- ret, errno, strerror(errno));
+ size = sizeof(struct kvm_irq_routing);
+ /* Allocate space for the max number of entries: this wastes 196 KBs. */
+ size += KVM_MAX_IRQ_ROUTES * sizeof(struct kvm_irq_routing_entry);
+ routing = calloc(1, size);
+ assert(routing);
+
+ return routing;
}
-/*
- * VCPU Ioctl
- *
- * Input Args:
- * vm - Virtual Machine
- * vcpuid - VCPU ID
- * cmd - Ioctl number
- * arg - Argument to pass to the ioctl
- *
- * Return: None
- *
- * Issues an arbitrary ioctl on a VCPU fd.
- */
-void vcpu_ioctl(struct kvm_vm *vm, uint32_t vcpuid,
- unsigned long cmd, void *arg)
+void kvm_gsi_routing_irqchip_add(struct kvm_irq_routing *routing,
+ uint32_t gsi, uint32_t pin)
{
- int ret;
+ int i;
- ret = _vcpu_ioctl(vm, vcpuid, cmd, arg);
- TEST_ASSERT(ret == 0, "vcpu ioctl %lu failed, rc: %i errno: %i (%s)",
- cmd, ret, errno, strerror(errno));
+ assert(routing);
+ assert(routing->nr < KVM_MAX_IRQ_ROUTES);
+
+ i = routing->nr;
+ routing->entries[i].gsi = gsi;
+ routing->entries[i].type = KVM_IRQ_ROUTING_IRQCHIP;
+ routing->entries[i].flags = 0;
+ routing->entries[i].u.irqchip.irqchip = 0;
+ routing->entries[i].u.irqchip.pin = pin;
+ routing->nr++;
}
-int _vcpu_ioctl(struct kvm_vm *vm, uint32_t vcpuid,
- unsigned long cmd, void *arg)
+int _kvm_gsi_routing_write(struct kvm_vm *vm, struct kvm_irq_routing *routing)
{
- struct vcpu *vcpu = vcpu_find(vm, vcpuid);
int ret;
- TEST_ASSERT(vcpu != NULL, "vcpu not found, vcpuid: %u", vcpuid);
-
- ret = ioctl(vcpu->fd, cmd, arg);
+ assert(routing);
+ ret = __vm_ioctl(vm, KVM_SET_GSI_ROUTING, routing);
+ free(routing);
return ret;
}
-/*
- * VM Ioctl
- *
- * Input Args:
- * vm - Virtual Machine
- * cmd - Ioctl number
- * arg - Argument to pass to the ioctl
- *
- * Return: None
- *
- * Issues an arbitrary ioctl on a VM fd.
- */
-void vm_ioctl(struct kvm_vm *vm, unsigned long cmd, void *arg)
+void kvm_gsi_routing_write(struct kvm_vm *vm, struct kvm_irq_routing *routing)
{
int ret;
- ret = ioctl(vm->fd, cmd, arg);
- TEST_ASSERT(ret == 0, "vm ioctl %lu failed, rc: %i errno: %i (%s)",
- cmd, ret, errno, strerror(errno));
+ ret = _kvm_gsi_routing_write(vm, routing);
+ TEST_ASSERT(!ret, KVM_IOCTL_ERROR(KVM_SET_GSI_ROUTING, ret));
}
/*
@@ -1535,14 +1930,15 @@ void vm_ioctl(struct kvm_vm *vm, unsigned long cmd, void *arg)
*/
void vm_dump(FILE *stream, struct kvm_vm *vm, uint8_t indent)
{
+ int ctr;
struct userspace_mem_region *region;
- struct vcpu *vcpu;
+ struct kvm_vcpu *vcpu;
fprintf(stream, "%*smode: 0x%x\n", indent, "", vm->mode);
fprintf(stream, "%*sfd: %i\n", indent, "", vm->fd);
fprintf(stream, "%*spage_size: 0x%x\n", indent, "", vm->page_size);
fprintf(stream, "%*sMem Regions:\n", indent, "");
- list_for_each_entry(region, &vm->userspace_mem_regions, list) {
+ hash_for_each(vm->regions.slot_hash, ctr, region, slot_node) {
fprintf(stream, "%*sguest_phys: 0x%lx size: 0x%lx "
"host_virt: %p\n", indent + 2, "",
(uint64_t) region->region.guest_phys_addr,
@@ -1550,6 +1946,10 @@ void vm_dump(FILE *stream, struct kvm_vm *vm, uint8_t indent)
region->host_mem);
fprintf(stream, "%*sunused_phy_pages: ", indent + 2, "");
sparsebit_dump(stream, region->unused_phy_pages, 0);
+ if (region->protected_phy_pages) {
+ fprintf(stream, "%*sprotected_phy_pages: ", indent + 2, "");
+ sparsebit_dump(stream, region->protected_phy_pages, 0);
+ }
}
fprintf(stream, "%*sMapped Virtual Pages:\n", indent, "");
sparsebit_dump(stream, vm->vpages_mapped, indent + 2);
@@ -1561,37 +1961,58 @@ void vm_dump(FILE *stream, struct kvm_vm *vm, uint8_t indent)
virt_dump(stream, vm, indent + 4);
}
fprintf(stream, "%*sVCPUs:\n", indent, "");
+
list_for_each_entry(vcpu, &vm->vcpus, list)
- vcpu_dump(stream, vm, vcpu->id, indent + 2);
+ vcpu_dump(stream, vcpu, indent + 2);
}
+#define KVM_EXIT_STRING(x) {KVM_EXIT_##x, #x}
+
/* Known KVM exit reasons */
static struct exit_reason {
unsigned int reason;
const char *name;
} exit_reasons_known[] = {
- {KVM_EXIT_UNKNOWN, "UNKNOWN"},
- {KVM_EXIT_EXCEPTION, "EXCEPTION"},
- {KVM_EXIT_IO, "IO"},
- {KVM_EXIT_HYPERCALL, "HYPERCALL"},
- {KVM_EXIT_DEBUG, "DEBUG"},
- {KVM_EXIT_HLT, "HLT"},
- {KVM_EXIT_MMIO, "MMIO"},
- {KVM_EXIT_IRQ_WINDOW_OPEN, "IRQ_WINDOW_OPEN"},
- {KVM_EXIT_SHUTDOWN, "SHUTDOWN"},
- {KVM_EXIT_FAIL_ENTRY, "FAIL_ENTRY"},
- {KVM_EXIT_INTR, "INTR"},
- {KVM_EXIT_SET_TPR, "SET_TPR"},
- {KVM_EXIT_TPR_ACCESS, "TPR_ACCESS"},
- {KVM_EXIT_S390_SIEIC, "S390_SIEIC"},
- {KVM_EXIT_S390_RESET, "S390_RESET"},
- {KVM_EXIT_DCR, "DCR"},
- {KVM_EXIT_NMI, "NMI"},
- {KVM_EXIT_INTERNAL_ERROR, "INTERNAL_ERROR"},
- {KVM_EXIT_OSI, "OSI"},
- {KVM_EXIT_PAPR_HCALL, "PAPR_HCALL"},
+ KVM_EXIT_STRING(UNKNOWN),
+ KVM_EXIT_STRING(EXCEPTION),
+ KVM_EXIT_STRING(IO),
+ KVM_EXIT_STRING(HYPERCALL),
+ KVM_EXIT_STRING(DEBUG),
+ KVM_EXIT_STRING(HLT),
+ KVM_EXIT_STRING(MMIO),
+ KVM_EXIT_STRING(IRQ_WINDOW_OPEN),
+ KVM_EXIT_STRING(SHUTDOWN),
+ KVM_EXIT_STRING(FAIL_ENTRY),
+ KVM_EXIT_STRING(INTR),
+ KVM_EXIT_STRING(SET_TPR),
+ KVM_EXIT_STRING(TPR_ACCESS),
+ KVM_EXIT_STRING(S390_SIEIC),
+ KVM_EXIT_STRING(S390_RESET),
+ KVM_EXIT_STRING(DCR),
+ KVM_EXIT_STRING(NMI),
+ KVM_EXIT_STRING(INTERNAL_ERROR),
+ KVM_EXIT_STRING(OSI),
+ KVM_EXIT_STRING(PAPR_HCALL),
+ KVM_EXIT_STRING(S390_UCONTROL),
+ KVM_EXIT_STRING(WATCHDOG),
+ KVM_EXIT_STRING(S390_TSCH),
+ KVM_EXIT_STRING(EPR),
+ KVM_EXIT_STRING(SYSTEM_EVENT),
+ KVM_EXIT_STRING(S390_STSI),
+ KVM_EXIT_STRING(IOAPIC_EOI),
+ KVM_EXIT_STRING(HYPERV),
+ KVM_EXIT_STRING(ARM_NISV),
+ KVM_EXIT_STRING(X86_RDMSR),
+ KVM_EXIT_STRING(X86_WRMSR),
+ KVM_EXIT_STRING(DIRTY_RING_FULL),
+ KVM_EXIT_STRING(AP_RESET_HOLD),
+ KVM_EXIT_STRING(X86_BUS_LOCK),
+ KVM_EXIT_STRING(XEN),
+ KVM_EXIT_STRING(RISCV_SBI),
+ KVM_EXIT_STRING(RISCV_CSR),
+ KVM_EXIT_STRING(NOTIFY),
#ifdef KVM_EXIT_MEMORY_NOT_PRESENT
- {KVM_EXIT_MEMORY_NOT_PRESENT, "MEMORY_NOT_PRESENT"},
+ KVM_EXIT_STRING(MEMORY_NOT_PRESENT),
#endif
};
@@ -1630,6 +2051,7 @@ const char *exit_reason_str(unsigned int exit_reason)
* num - number of pages
* paddr_min - Physical address minimum
* memslot - Memory region to allocate page from
+ * protected - True if the pages will be used as protected/private memory
*
* Output Args: None
*
@@ -1641,8 +2063,9 @@ const char *exit_reason_str(unsigned int exit_reason)
* and their base address is returned. A TEST_ASSERT failure occurs if
* not enough pages are available at or above paddr_min.
*/
-vm_paddr_t vm_phy_pages_alloc(struct kvm_vm *vm, size_t num,
- vm_paddr_t paddr_min, uint32_t memslot)
+vm_paddr_t __vm_phy_pages_alloc(struct kvm_vm *vm, size_t num,
+ vm_paddr_t paddr_min, uint32_t memslot,
+ bool protected)
{
struct userspace_mem_region *region;
sparsebit_idx_t pg, base;
@@ -1655,8 +2078,10 @@ vm_paddr_t vm_phy_pages_alloc(struct kvm_vm *vm, size_t num,
paddr_min, vm->page_size);
region = memslot2region(vm, memslot);
- base = pg = paddr_min >> vm->page_shift;
+ TEST_ASSERT(!protected || region->protected_phy_pages,
+ "Region doesn't support protected memory");
+ base = pg = paddr_min >> vm->page_shift;
do {
for (; pg < base + num; ++pg) {
if (!sparsebit_is_set(region->unused_phy_pages, pg)) {
@@ -1675,8 +2100,11 @@ vm_paddr_t vm_phy_pages_alloc(struct kvm_vm *vm, size_t num,
abort();
}
- for (pg = base; pg < base + num; ++pg)
+ for (pg = base; pg < base + num; ++pg) {
sparsebit_clear(region->unused_phy_pages, pg);
+ if (protected)
+ sparsebit_set(region->protected_phy_pages, pg);
+ }
return base * vm->page_size;
}
@@ -1687,6 +2115,12 @@ vm_paddr_t vm_phy_page_alloc(struct kvm_vm *vm, vm_paddr_t paddr_min,
return vm_phy_pages_alloc(vm, 1, paddr_min, memslot);
}
+vm_paddr_t vm_alloc_page_table(struct kvm_vm *vm)
+{
+ return vm_phy_page_alloc(vm, KVM_GUEST_PAGE_TABLE_MIN_PADDR,
+ vm->memslots[MEM_REGION_PT]);
+}
+
/*
* Address Guest Virtual to Host Virtual
*
@@ -1704,60 +2138,9 @@ void *addr_gva2hva(struct kvm_vm *vm, vm_vaddr_t gva)
return addr_gpa2hva(vm, addr_gva2gpa(vm, gva));
}
-/*
- * Is Unrestricted Guest
- *
- * Input Args:
- * vm - Virtual Machine
- *
- * Output Args: None
- *
- * Return: True if the unrestricted guest is set to 'Y', otherwise return false.
- *
- * Check if the unrestricted guest flag is enabled.
- */
-bool vm_is_unrestricted_guest(struct kvm_vm *vm)
-{
- char val = 'N';
- size_t count;
- FILE *f;
-
- if (vm == NULL) {
- /* Ensure that the KVM vendor-specific module is loaded. */
- f = fopen(KVM_DEV_PATH, "r");
- TEST_ASSERT(f != NULL, "Error in opening KVM dev file: %d",
- errno);
- fclose(f);
- }
-
- f = fopen("/sys/module/kvm_intel/parameters/unrestricted_guest", "r");
- if (f) {
- count = fread(&val, sizeof(char), 1, f);
- TEST_ASSERT(count == 1, "Unable to read from param file.");
- fclose(f);
- }
-
- return val == 'Y';
-}
-
-unsigned int vm_get_page_size(struct kvm_vm *vm)
+unsigned long __weak vm_compute_max_gfn(struct kvm_vm *vm)
{
- return vm->page_size;
-}
-
-unsigned int vm_get_page_shift(struct kvm_vm *vm)
-{
- return vm->page_shift;
-}
-
-unsigned int vm_get_max_gfn(struct kvm_vm *vm)
-{
- return vm->max_gfn;
-}
-
-int vm_get_fd(struct kvm_vm *vm)
-{
- return vm->fd;
+ return ((1ULL << vm->pa_bits) >> vm->page_shift) - 1;
}
static unsigned int vm_calc_num_pages(unsigned int num_pages,
@@ -1799,3 +2182,144 @@ unsigned int vm_calc_num_guest_pages(enum vm_guest_mode mode, size_t size)
n = DIV_ROUND_UP(size, vm_guest_mode_params[mode].page_size);
return vm_adjust_num_guest_pages(mode, n);
}
+
+/*
+ * Read binary stats descriptors
+ *
+ * Input Args:
+ * stats_fd - the file descriptor for the binary stats file from which to read
+ * header - the binary stats metadata header corresponding to the given FD
+ *
+ * Output Args: None
+ *
+ * Return:
+ * A pointer to a newly allocated series of stat descriptors.
+ * Caller is responsible for freeing the returned kvm_stats_desc.
+ *
+ * Read the stats descriptors from the binary stats interface.
+ */
+struct kvm_stats_desc *read_stats_descriptors(int stats_fd,
+ struct kvm_stats_header *header)
+{
+ struct kvm_stats_desc *stats_desc;
+ ssize_t desc_size, total_size, ret;
+
+ desc_size = get_stats_descriptor_size(header);
+ total_size = header->num_desc * desc_size;
+
+ stats_desc = calloc(header->num_desc, desc_size);
+ TEST_ASSERT(stats_desc, "Allocate memory for stats descriptors");
+
+ ret = pread(stats_fd, stats_desc, total_size, header->desc_offset);
+ TEST_ASSERT(ret == total_size, "Read KVM stats descriptors");
+
+ return stats_desc;
+}
+
+/*
+ * Read stat data for a particular stat
+ *
+ * Input Args:
+ * stats_fd - the file descriptor for the binary stats file from which to read
+ * header - the binary stats metadata header corresponding to the given FD
+ * desc - the binary stat metadata for the particular stat to be read
+ * max_elements - the maximum number of 8-byte values to read into data
+ *
+ * Output Args:
+ * data - the buffer into which stat data should be read
+ *
+ * Read the data values of a specified stat from the binary stats interface.
+ */
+void read_stat_data(int stats_fd, struct kvm_stats_header *header,
+ struct kvm_stats_desc *desc, uint64_t *data,
+ size_t max_elements)
+{
+ size_t nr_elements = min_t(ssize_t, desc->size, max_elements);
+ size_t size = nr_elements * sizeof(*data);
+ ssize_t ret;
+
+ TEST_ASSERT(desc->size, "No elements in stat '%s'", desc->name);
+ TEST_ASSERT(max_elements, "Zero elements requested for stat '%s'", desc->name);
+
+ ret = pread(stats_fd, data, size,
+ header->data_offset + desc->offset);
+
+ TEST_ASSERT(ret >= 0, "pread() failed on stat '%s', errno: %i (%s)",
+ desc->name, errno, strerror(errno));
+ TEST_ASSERT(ret == size,
+ "pread() on stat '%s' read %ld bytes, wanted %lu bytes",
+ desc->name, size, ret);
+}
+
+/*
+ * Read the data of the named stat
+ *
+ * Input Args:
+ * vm - the VM for which the stat should be read
+ * stat_name - the name of the stat to read
+ * max_elements - the maximum number of 8-byte values to read into data
+ *
+ * Output Args:
+ * data - the buffer into which stat data should be read
+ *
+ * Read the data values of a specified stat from the binary stats interface.
+ */
+void __vm_get_stat(struct kvm_vm *vm, const char *stat_name, uint64_t *data,
+ size_t max_elements)
+{
+ struct kvm_stats_desc *desc;
+ size_t size_desc;
+ int i;
+
+ if (!vm->stats_fd) {
+ vm->stats_fd = vm_get_stats_fd(vm);
+ read_stats_header(vm->stats_fd, &vm->stats_header);
+ vm->stats_desc = read_stats_descriptors(vm->stats_fd,
+ &vm->stats_header);
+ }
+
+ size_desc = get_stats_descriptor_size(&vm->stats_header);
+
+ for (i = 0; i < vm->stats_header.num_desc; ++i) {
+ desc = (void *)vm->stats_desc + (i * size_desc);
+
+ if (strcmp(desc->name, stat_name))
+ continue;
+
+ read_stat_data(vm->stats_fd, &vm->stats_header, desc,
+ data, max_elements);
+
+ break;
+ }
+}
+
+__weak void kvm_arch_vm_post_create(struct kvm_vm *vm)
+{
+}
+
+__weak void kvm_selftest_arch_init(void)
+{
+}
+
+void __attribute((constructor)) kvm_selftest_init(void)
+{
+ /* Tell stdout not to buffer its content. */
+ setbuf(stdout, NULL);
+
+ kvm_selftest_arch_init();
+}
+
+bool vm_is_gpa_protected(struct kvm_vm *vm, vm_paddr_t paddr)
+{
+ sparsebit_idx_t pg = 0;
+ struct userspace_mem_region *region;
+
+ if (!vm_arch_has_protected_memory(vm))
+ return false;
+
+ region = userspace_mem_region_find(vm, paddr, paddr);
+ TEST_ASSERT(region, "No vm physical memory at 0x%lx", paddr);
+
+ pg = paddr >> vm->page_shift;
+ return sparsebit_is_set(region->protected_phy_pages, pg);
+}
diff --git a/tools/testing/selftests/kvm/lib/kvm_util_internal.h b/tools/testing/selftests/kvm/lib/kvm_util_internal.h
deleted file mode 100644
index 2ef446520748..000000000000
--- a/tools/testing/selftests/kvm/lib/kvm_util_internal.h
+++ /dev/null
@@ -1,111 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0-only */
-/*
- * tools/testing/selftests/kvm/lib/kvm_util_internal.h
- *
- * Copyright (C) 2018, Google LLC.
- */
-
-#ifndef SELFTEST_KVM_UTIL_INTERNAL_H
-#define SELFTEST_KVM_UTIL_INTERNAL_H
-
-#include "sparsebit.h"
-
-#define KVM_DEV_PATH "/dev/kvm"
-
-struct userspace_mem_region {
- struct kvm_userspace_memory_region region;
- struct sparsebit *unused_phy_pages;
- int fd;
- off_t offset;
- void *host_mem;
- void *mmap_start;
- size_t mmap_size;
- struct list_head list;
-};
-
-struct vcpu {
- struct list_head list;
- uint32_t id;
- int fd;
- struct kvm_run *state;
-};
-
-struct kvm_vm {
- int mode;
- unsigned long type;
- int kvm_fd;
- int fd;
- unsigned int pgtable_levels;
- unsigned int page_size;
- unsigned int page_shift;
- unsigned int pa_bits;
- unsigned int va_bits;
- uint64_t max_gfn;
- struct list_head vcpus;
- struct list_head userspace_mem_regions;
- struct sparsebit *vpages_valid;
- struct sparsebit *vpages_mapped;
- bool has_irqchip;
- bool pgd_created;
- vm_paddr_t pgd;
- vm_vaddr_t gdt;
- vm_vaddr_t tss;
-};
-
-struct vcpu *vcpu_find(struct kvm_vm *vm, uint32_t vcpuid);
-
-/*
- * Virtual Translation Tables Dump
- *
- * Input Args:
- * stream - Output FILE stream
- * vm - Virtual Machine
- * indent - Left margin indent amount
- *
- * Output Args: None
- *
- * Return: None
- *
- * Dumps to the FILE stream given by @stream, the contents of all the
- * virtual translation tables for the VM given by @vm.
- */
-void virt_dump(FILE *stream, struct kvm_vm *vm, uint8_t indent);
-
-/*
- * Register Dump
- *
- * Input Args:
- * stream - Output FILE stream
- * regs - Registers
- * indent - Left margin indent amount
- *
- * Output Args: None
- *
- * Return: None
- *
- * Dumps the state of the registers given by @regs, to the FILE stream
- * given by @stream.
- */
-void regs_dump(FILE *stream, struct kvm_regs *regs, uint8_t indent);
-
-/*
- * System Register Dump
- *
- * Input Args:
- * stream - Output FILE stream
- * sregs - System registers
- * indent - Left margin indent amount
- *
- * Output Args: None
- *
- * Return: None
- *
- * Dumps the state of the system registers given by @sregs, to the FILE stream
- * given by @stream.
- */
-void sregs_dump(FILE *stream, struct kvm_sregs *sregs, uint8_t indent);
-
-struct userspace_mem_region *
-memslot2region(struct kvm_vm *vm, uint32_t memslot);
-
-#endif /* SELFTEST_KVM_UTIL_INTERNAL_H */
diff --git a/tools/testing/selftests/kvm/lib/memstress.c b/tools/testing/selftests/kvm/lib/memstress.c
new file mode 100644
index 000000000000..cf2c73971308
--- /dev/null
+++ b/tools/testing/selftests/kvm/lib/memstress.c
@@ -0,0 +1,398 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright (C) 2020, Google LLC.
+ */
+#define _GNU_SOURCE
+
+#include <inttypes.h>
+#include <linux/bitmap.h>
+
+#include "kvm_util.h"
+#include "memstress.h"
+#include "processor.h"
+
+struct memstress_args memstress_args;
+
+/*
+ * Guest virtual memory offset of the testing memory slot.
+ * Must not conflict with identity mapped test code.
+ */
+static uint64_t guest_test_virt_mem = DEFAULT_GUEST_TEST_MEM;
+
+struct vcpu_thread {
+ /* The index of the vCPU. */
+ int vcpu_idx;
+
+ /* The pthread backing the vCPU. */
+ pthread_t thread;
+
+ /* Set to true once the vCPU thread is up and running. */
+ bool running;
+};
+
+/* The vCPU threads involved in this test. */
+static struct vcpu_thread vcpu_threads[KVM_MAX_VCPUS];
+
+/* The function run by each vCPU thread, as provided by the test. */
+static void (*vcpu_thread_fn)(struct memstress_vcpu_args *);
+
+/* Set to true once all vCPU threads are up and running. */
+static bool all_vcpu_threads_running;
+
+static struct kvm_vcpu *vcpus[KVM_MAX_VCPUS];
+
+/*
+ * Continuously write to the first 8 bytes of each page in the
+ * specified region.
+ */
+void memstress_guest_code(uint32_t vcpu_idx)
+{
+ struct memstress_args *args = &memstress_args;
+ struct memstress_vcpu_args *vcpu_args = &args->vcpu_args[vcpu_idx];
+ struct guest_random_state rand_state;
+ uint64_t gva;
+ uint64_t pages;
+ uint64_t addr;
+ uint64_t page;
+ int i;
+
+ rand_state = new_guest_random_state(args->random_seed + vcpu_idx);
+
+ gva = vcpu_args->gva;
+ pages = vcpu_args->pages;
+
+ /* Make sure vCPU args data structure is not corrupt. */
+ GUEST_ASSERT(vcpu_args->vcpu_idx == vcpu_idx);
+
+ while (true) {
+ for (i = 0; i < sizeof(memstress_args); i += args->guest_page_size)
+ (void) *((volatile char *)args + i);
+
+ for (i = 0; i < pages; i++) {
+ if (args->random_access)
+ page = guest_random_u32(&rand_state) % pages;
+ else
+ page = i;
+
+ addr = gva + (page * args->guest_page_size);
+
+ if (guest_random_u32(&rand_state) % 100 < args->write_percent)
+ *(uint64_t *)addr = 0x0123456789ABCDEF;
+ else
+ READ_ONCE(*(uint64_t *)addr);
+ }
+
+ GUEST_SYNC(1);
+ }
+}
+
+void memstress_setup_vcpus(struct kvm_vm *vm, int nr_vcpus,
+ struct kvm_vcpu *vcpus[],
+ uint64_t vcpu_memory_bytes,
+ bool partition_vcpu_memory_access)
+{
+ struct memstress_args *args = &memstress_args;
+ struct memstress_vcpu_args *vcpu_args;
+ int i;
+
+ for (i = 0; i < nr_vcpus; i++) {
+ vcpu_args = &args->vcpu_args[i];
+
+ vcpu_args->vcpu = vcpus[i];
+ vcpu_args->vcpu_idx = i;
+
+ if (partition_vcpu_memory_access) {
+ vcpu_args->gva = guest_test_virt_mem +
+ (i * vcpu_memory_bytes);
+ vcpu_args->pages = vcpu_memory_bytes /
+ args->guest_page_size;
+ vcpu_args->gpa = args->gpa + (i * vcpu_memory_bytes);
+ } else {
+ vcpu_args->gva = guest_test_virt_mem;
+ vcpu_args->pages = (nr_vcpus * vcpu_memory_bytes) /
+ args->guest_page_size;
+ vcpu_args->gpa = args->gpa;
+ }
+
+ vcpu_args_set(vcpus[i], 1, i);
+
+ pr_debug("Added VCPU %d with test mem gpa [%lx, %lx)\n",
+ i, vcpu_args->gpa, vcpu_args->gpa +
+ (vcpu_args->pages * args->guest_page_size));
+ }
+}
+
+struct kvm_vm *memstress_create_vm(enum vm_guest_mode mode, int nr_vcpus,
+ uint64_t vcpu_memory_bytes, int slots,
+ enum vm_mem_backing_src_type backing_src,
+ bool partition_vcpu_memory_access)
+{
+ struct memstress_args *args = &memstress_args;
+ struct kvm_vm *vm;
+ uint64_t guest_num_pages, slot0_pages = 0;
+ uint64_t backing_src_pagesz = get_backing_src_pagesz(backing_src);
+ uint64_t region_end_gfn;
+ int i;
+
+ pr_info("Testing guest mode: %s\n", vm_guest_mode_string(mode));
+
+ /* By default vCPUs will write to memory. */
+ args->write_percent = 100;
+
+ /*
+ * Snapshot the non-huge page size. This is used by the guest code to
+ * access/dirty pages at the logging granularity.
+ */
+ args->guest_page_size = vm_guest_mode_params[mode].page_size;
+
+ guest_num_pages = vm_adjust_num_guest_pages(mode,
+ (nr_vcpus * vcpu_memory_bytes) / args->guest_page_size);
+
+ TEST_ASSERT(vcpu_memory_bytes % getpagesize() == 0,
+ "Guest memory size is not host page size aligned.");
+ TEST_ASSERT(vcpu_memory_bytes % args->guest_page_size == 0,
+ "Guest memory size is not guest page size aligned.");
+ TEST_ASSERT(guest_num_pages % slots == 0,
+ "Guest memory cannot be evenly divided into %d slots.",
+ slots);
+
+ /*
+ * If using nested, allocate extra pages for the nested page tables and
+ * in-memory data structures.
+ */
+ if (args->nested)
+ slot0_pages += memstress_nested_pages(nr_vcpus);
+
+ /*
+ * Pass guest_num_pages to populate the page tables for test memory.
+ * The memory is also added to memslot 0, but that's a benign side
+ * effect as KVM allows aliasing HVAs in meslots.
+ */
+ vm = __vm_create_with_vcpus(VM_SHAPE(mode), nr_vcpus,
+ slot0_pages + guest_num_pages,
+ memstress_guest_code, vcpus);
+
+ args->vm = vm;
+
+ /* Put the test region at the top guest physical memory. */
+ region_end_gfn = vm->max_gfn + 1;
+
+#ifdef __x86_64__
+ /*
+ * When running vCPUs in L2, restrict the test region to 48 bits to
+ * avoid needing 5-level page tables to identity map L2.
+ */
+ if (args->nested)
+ region_end_gfn = min(region_end_gfn, (1UL << 48) / args->guest_page_size);
+#endif
+ /*
+ * If there should be more memory in the guest test region than there
+ * can be pages in the guest, it will definitely cause problems.
+ */
+ TEST_ASSERT(guest_num_pages < region_end_gfn,
+ "Requested more guest memory than address space allows.\n"
+ " guest pages: %" PRIx64 " max gfn: %" PRIx64
+ " nr_vcpus: %d wss: %" PRIx64 "]",
+ guest_num_pages, region_end_gfn - 1, nr_vcpus, vcpu_memory_bytes);
+
+ args->gpa = (region_end_gfn - guest_num_pages - 1) * args->guest_page_size;
+ args->gpa = align_down(args->gpa, backing_src_pagesz);
+#ifdef __s390x__
+ /* Align to 1M (segment size) */
+ args->gpa = align_down(args->gpa, 1 << 20);
+#endif
+ args->size = guest_num_pages * args->guest_page_size;
+ pr_info("guest physical test memory: [0x%lx, 0x%lx)\n",
+ args->gpa, args->gpa + args->size);
+
+ /* Add extra memory slots for testing */
+ for (i = 0; i < slots; i++) {
+ uint64_t region_pages = guest_num_pages / slots;
+ vm_paddr_t region_start = args->gpa + region_pages * args->guest_page_size * i;
+
+ vm_userspace_mem_region_add(vm, backing_src, region_start,
+ MEMSTRESS_MEM_SLOT_INDEX + i,
+ region_pages, 0);
+ }
+
+ /* Do mapping for the demand paging memory slot */
+ virt_map(vm, guest_test_virt_mem, args->gpa, guest_num_pages);
+
+ memstress_setup_vcpus(vm, nr_vcpus, vcpus, vcpu_memory_bytes,
+ partition_vcpu_memory_access);
+
+ if (args->nested) {
+ pr_info("Configuring vCPUs to run in L2 (nested).\n");
+ memstress_setup_nested(vm, nr_vcpus, vcpus);
+ }
+
+ /* Export the shared variables to the guest. */
+ sync_global_to_guest(vm, memstress_args);
+
+ return vm;
+}
+
+void memstress_destroy_vm(struct kvm_vm *vm)
+{
+ kvm_vm_free(vm);
+}
+
+void memstress_set_write_percent(struct kvm_vm *vm, uint32_t write_percent)
+{
+ memstress_args.write_percent = write_percent;
+ sync_global_to_guest(vm, memstress_args.write_percent);
+}
+
+void memstress_set_random_seed(struct kvm_vm *vm, uint32_t random_seed)
+{
+ memstress_args.random_seed = random_seed;
+ sync_global_to_guest(vm, memstress_args.random_seed);
+}
+
+void memstress_set_random_access(struct kvm_vm *vm, bool random_access)
+{
+ memstress_args.random_access = random_access;
+ sync_global_to_guest(vm, memstress_args.random_access);
+}
+
+uint64_t __weak memstress_nested_pages(int nr_vcpus)
+{
+ return 0;
+}
+
+void __weak memstress_setup_nested(struct kvm_vm *vm, int nr_vcpus, struct kvm_vcpu **vcpus)
+{
+ pr_info("%s() not support on this architecture, skipping.\n", __func__);
+ exit(KSFT_SKIP);
+}
+
+static void *vcpu_thread_main(void *data)
+{
+ struct vcpu_thread *vcpu = data;
+ int vcpu_idx = vcpu->vcpu_idx;
+
+ if (memstress_args.pin_vcpus)
+ kvm_pin_this_task_to_pcpu(memstress_args.vcpu_to_pcpu[vcpu_idx]);
+
+ WRITE_ONCE(vcpu->running, true);
+
+ /*
+ * Wait for all vCPU threads to be up and running before calling the test-
+ * provided vCPU thread function. This prevents thread creation (which
+ * requires taking the mmap_sem in write mode) from interfering with the
+ * guest faulting in its memory.
+ */
+ while (!READ_ONCE(all_vcpu_threads_running))
+ ;
+
+ vcpu_thread_fn(&memstress_args.vcpu_args[vcpu_idx]);
+
+ return NULL;
+}
+
+void memstress_start_vcpu_threads(int nr_vcpus,
+ void (*vcpu_fn)(struct memstress_vcpu_args *))
+{
+ int i;
+
+ vcpu_thread_fn = vcpu_fn;
+ WRITE_ONCE(all_vcpu_threads_running, false);
+ WRITE_ONCE(memstress_args.stop_vcpus, false);
+
+ for (i = 0; i < nr_vcpus; i++) {
+ struct vcpu_thread *vcpu = &vcpu_threads[i];
+
+ vcpu->vcpu_idx = i;
+ WRITE_ONCE(vcpu->running, false);
+
+ pthread_create(&vcpu->thread, NULL, vcpu_thread_main, vcpu);
+ }
+
+ for (i = 0; i < nr_vcpus; i++) {
+ while (!READ_ONCE(vcpu_threads[i].running))
+ ;
+ }
+
+ WRITE_ONCE(all_vcpu_threads_running, true);
+}
+
+void memstress_join_vcpu_threads(int nr_vcpus)
+{
+ int i;
+
+ WRITE_ONCE(memstress_args.stop_vcpus, true);
+
+ for (i = 0; i < nr_vcpus; i++)
+ pthread_join(vcpu_threads[i].thread, NULL);
+}
+
+static void toggle_dirty_logging(struct kvm_vm *vm, int slots, bool enable)
+{
+ int i;
+
+ for (i = 0; i < slots; i++) {
+ int slot = MEMSTRESS_MEM_SLOT_INDEX + i;
+ int flags = enable ? KVM_MEM_LOG_DIRTY_PAGES : 0;
+
+ vm_mem_region_set_flags(vm, slot, flags);
+ }
+}
+
+void memstress_enable_dirty_logging(struct kvm_vm *vm, int slots)
+{
+ toggle_dirty_logging(vm, slots, true);
+}
+
+void memstress_disable_dirty_logging(struct kvm_vm *vm, int slots)
+{
+ toggle_dirty_logging(vm, slots, false);
+}
+
+void memstress_get_dirty_log(struct kvm_vm *vm, unsigned long *bitmaps[], int slots)
+{
+ int i;
+
+ for (i = 0; i < slots; i++) {
+ int slot = MEMSTRESS_MEM_SLOT_INDEX + i;
+
+ kvm_vm_get_dirty_log(vm, slot, bitmaps[i]);
+ }
+}
+
+void memstress_clear_dirty_log(struct kvm_vm *vm, unsigned long *bitmaps[],
+ int slots, uint64_t pages_per_slot)
+{
+ int i;
+
+ for (i = 0; i < slots; i++) {
+ int slot = MEMSTRESS_MEM_SLOT_INDEX + i;
+
+ kvm_vm_clear_dirty_log(vm, slot, bitmaps[i], 0, pages_per_slot);
+ }
+}
+
+unsigned long **memstress_alloc_bitmaps(int slots, uint64_t pages_per_slot)
+{
+ unsigned long **bitmaps;
+ int i;
+
+ bitmaps = malloc(slots * sizeof(bitmaps[0]));
+ TEST_ASSERT(bitmaps, "Failed to allocate bitmaps array.");
+
+ for (i = 0; i < slots; i++) {
+ bitmaps[i] = bitmap_zalloc(pages_per_slot);
+ TEST_ASSERT(bitmaps[i], "Failed to allocate slot bitmap.");
+ }
+
+ return bitmaps;
+}
+
+void memstress_free_bitmaps(unsigned long *bitmaps[], int slots)
+{
+ int i;
+
+ for (i = 0; i < slots; i++)
+ free(bitmaps[i]);
+
+ free(bitmaps);
+}
diff --git a/tools/testing/selftests/kvm/lib/rbtree.c b/tools/testing/selftests/kvm/lib/rbtree.c
new file mode 100644
index 000000000000..a703f0194ea3
--- /dev/null
+++ b/tools/testing/selftests/kvm/lib/rbtree.c
@@ -0,0 +1 @@
+#include "../../../../lib/rbtree.c"
diff --git a/tools/testing/selftests/kvm/lib/riscv/handlers.S b/tools/testing/selftests/kvm/lib/riscv/handlers.S
new file mode 100644
index 000000000000..aa0abd3f35bb
--- /dev/null
+++ b/tools/testing/selftests/kvm/lib/riscv/handlers.S
@@ -0,0 +1,101 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Copyright (c) 2023 Intel Corporation
+ */
+
+#ifndef __ASSEMBLY__
+#define __ASSEMBLY__
+#endif
+
+#include <asm/csr.h>
+
+.macro save_context
+ addi sp, sp, (-8*34)
+ sd x1, 0(sp)
+ sd x2, 8(sp)
+ sd x3, 16(sp)
+ sd x4, 24(sp)
+ sd x5, 32(sp)
+ sd x6, 40(sp)
+ sd x7, 48(sp)
+ sd x8, 56(sp)
+ sd x9, 64(sp)
+ sd x10, 72(sp)
+ sd x11, 80(sp)
+ sd x12, 88(sp)
+ sd x13, 96(sp)
+ sd x14, 104(sp)
+ sd x15, 112(sp)
+ sd x16, 120(sp)
+ sd x17, 128(sp)
+ sd x18, 136(sp)
+ sd x19, 144(sp)
+ sd x20, 152(sp)
+ sd x21, 160(sp)
+ sd x22, 168(sp)
+ sd x23, 176(sp)
+ sd x24, 184(sp)
+ sd x25, 192(sp)
+ sd x26, 200(sp)
+ sd x27, 208(sp)
+ sd x28, 216(sp)
+ sd x29, 224(sp)
+ sd x30, 232(sp)
+ sd x31, 240(sp)
+ csrr s0, CSR_SEPC
+ csrr s1, CSR_SSTATUS
+ csrr s2, CSR_SCAUSE
+ sd s0, 248(sp)
+ sd s1, 256(sp)
+ sd s2, 264(sp)
+.endm
+
+.macro restore_context
+ ld s2, 264(sp)
+ ld s1, 256(sp)
+ ld s0, 248(sp)
+ csrw CSR_SCAUSE, s2
+ csrw CSR_SSTATUS, s1
+ csrw CSR_SEPC, s0
+ ld x31, 240(sp)
+ ld x30, 232(sp)
+ ld x29, 224(sp)
+ ld x28, 216(sp)
+ ld x27, 208(sp)
+ ld x26, 200(sp)
+ ld x25, 192(sp)
+ ld x24, 184(sp)
+ ld x23, 176(sp)
+ ld x22, 168(sp)
+ ld x21, 160(sp)
+ ld x20, 152(sp)
+ ld x19, 144(sp)
+ ld x18, 136(sp)
+ ld x17, 128(sp)
+ ld x16, 120(sp)
+ ld x15, 112(sp)
+ ld x14, 104(sp)
+ ld x13, 96(sp)
+ ld x12, 88(sp)
+ ld x11, 80(sp)
+ ld x10, 72(sp)
+ ld x9, 64(sp)
+ ld x8, 56(sp)
+ ld x7, 48(sp)
+ ld x6, 40(sp)
+ ld x5, 32(sp)
+ ld x4, 24(sp)
+ ld x3, 16(sp)
+ ld x2, 8(sp)
+ ld x1, 0(sp)
+ addi sp, sp, (8*34)
+.endm
+
+.balign 4
+.global exception_vectors
+exception_vectors:
+ save_context
+ move a0, sp
+ call route_exception
+ restore_context
+ sret
diff --git a/tools/testing/selftests/kvm/lib/riscv/processor.c b/tools/testing/selftests/kvm/lib/riscv/processor.c
new file mode 100644
index 000000000000..e8211f5d6863
--- /dev/null
+++ b/tools/testing/selftests/kvm/lib/riscv/processor.c
@@ -0,0 +1,504 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * RISC-V code
+ *
+ * Copyright (C) 2021 Western Digital Corporation or its affiliates.
+ */
+
+#include <linux/compiler.h>
+#include <assert.h>
+
+#include "kvm_util.h"
+#include "processor.h"
+
+#define DEFAULT_RISCV_GUEST_STACK_VADDR_MIN 0xac0000
+
+static vm_vaddr_t exception_handlers;
+
+bool __vcpu_has_ext(struct kvm_vcpu *vcpu, uint64_t ext)
+{
+ unsigned long value = 0;
+ int ret;
+
+ ret = __vcpu_get_reg(vcpu, ext, &value);
+
+ return !ret && !!value;
+}
+
+static uint64_t page_align(struct kvm_vm *vm, uint64_t v)
+{
+ return (v + vm->page_size) & ~(vm->page_size - 1);
+}
+
+static uint64_t pte_addr(struct kvm_vm *vm, uint64_t entry)
+{
+ return ((entry & PGTBL_PTE_ADDR_MASK) >> PGTBL_PTE_ADDR_SHIFT) <<
+ PGTBL_PAGE_SIZE_SHIFT;
+}
+
+static uint64_t ptrs_per_pte(struct kvm_vm *vm)
+{
+ return PGTBL_PAGE_SIZE / sizeof(uint64_t);
+}
+
+static uint64_t pte_index_mask[] = {
+ PGTBL_L0_INDEX_MASK,
+ PGTBL_L1_INDEX_MASK,
+ PGTBL_L2_INDEX_MASK,
+ PGTBL_L3_INDEX_MASK,
+};
+
+static uint32_t pte_index_shift[] = {
+ PGTBL_L0_INDEX_SHIFT,
+ PGTBL_L1_INDEX_SHIFT,
+ PGTBL_L2_INDEX_SHIFT,
+ PGTBL_L3_INDEX_SHIFT,
+};
+
+static uint64_t pte_index(struct kvm_vm *vm, vm_vaddr_t gva, int level)
+{
+ TEST_ASSERT(level > -1,
+ "Negative page table level (%d) not possible", level);
+ TEST_ASSERT(level < vm->pgtable_levels,
+ "Invalid page table level (%d)", level);
+
+ return (gva & pte_index_mask[level]) >> pte_index_shift[level];
+}
+
+void virt_arch_pgd_alloc(struct kvm_vm *vm)
+{
+ size_t nr_pages = page_align(vm, ptrs_per_pte(vm) * 8) / vm->page_size;
+
+ if (vm->pgd_created)
+ return;
+
+ vm->pgd = vm_phy_pages_alloc(vm, nr_pages,
+ KVM_GUEST_PAGE_TABLE_MIN_PADDR,
+ vm->memslots[MEM_REGION_PT]);
+ vm->pgd_created = true;
+}
+
+void virt_arch_pg_map(struct kvm_vm *vm, uint64_t vaddr, uint64_t paddr)
+{
+ uint64_t *ptep, next_ppn;
+ int level = vm->pgtable_levels - 1;
+
+ TEST_ASSERT((vaddr % vm->page_size) == 0,
+ "Virtual address not on page boundary,\n"
+ " vaddr: 0x%lx vm->page_size: 0x%x", vaddr, vm->page_size);
+ TEST_ASSERT(sparsebit_is_set(vm->vpages_valid,
+ (vaddr >> vm->page_shift)),
+ "Invalid virtual address, vaddr: 0x%lx", vaddr);
+ TEST_ASSERT((paddr % vm->page_size) == 0,
+ "Physical address not on page boundary,\n"
+ " paddr: 0x%lx vm->page_size: 0x%x", paddr, vm->page_size);
+ TEST_ASSERT((paddr >> vm->page_shift) <= vm->max_gfn,
+ "Physical address beyond maximum supported,\n"
+ " paddr: 0x%lx vm->max_gfn: 0x%lx vm->page_size: 0x%x",
+ paddr, vm->max_gfn, vm->page_size);
+
+ ptep = addr_gpa2hva(vm, vm->pgd) + pte_index(vm, vaddr, level) * 8;
+ if (!*ptep) {
+ next_ppn = vm_alloc_page_table(vm) >> PGTBL_PAGE_SIZE_SHIFT;
+ *ptep = (next_ppn << PGTBL_PTE_ADDR_SHIFT) |
+ PGTBL_PTE_VALID_MASK;
+ }
+ level--;
+
+ while (level > -1) {
+ ptep = addr_gpa2hva(vm, pte_addr(vm, *ptep)) +
+ pte_index(vm, vaddr, level) * 8;
+ if (!*ptep && level > 0) {
+ next_ppn = vm_alloc_page_table(vm) >>
+ PGTBL_PAGE_SIZE_SHIFT;
+ *ptep = (next_ppn << PGTBL_PTE_ADDR_SHIFT) |
+ PGTBL_PTE_VALID_MASK;
+ }
+ level--;
+ }
+
+ paddr = paddr >> PGTBL_PAGE_SIZE_SHIFT;
+ *ptep = (paddr << PGTBL_PTE_ADDR_SHIFT) |
+ PGTBL_PTE_PERM_MASK | PGTBL_PTE_VALID_MASK;
+}
+
+vm_paddr_t addr_arch_gva2gpa(struct kvm_vm *vm, vm_vaddr_t gva)
+{
+ uint64_t *ptep;
+ int level = vm->pgtable_levels - 1;
+
+ if (!vm->pgd_created)
+ goto unmapped_gva;
+
+ ptep = addr_gpa2hva(vm, vm->pgd) + pte_index(vm, gva, level) * 8;
+ if (!ptep)
+ goto unmapped_gva;
+ level--;
+
+ while (level > -1) {
+ ptep = addr_gpa2hva(vm, pte_addr(vm, *ptep)) +
+ pte_index(vm, gva, level) * 8;
+ if (!ptep)
+ goto unmapped_gva;
+ level--;
+ }
+
+ return pte_addr(vm, *ptep) + (gva & (vm->page_size - 1));
+
+unmapped_gva:
+ TEST_FAIL("No mapping for vm virtual address gva: 0x%lx level: %d",
+ gva, level);
+ exit(1);
+}
+
+static void pte_dump(FILE *stream, struct kvm_vm *vm, uint8_t indent,
+ uint64_t page, int level)
+{
+#ifdef DEBUG
+ static const char *const type[] = { "pte", "pmd", "pud", "p4d"};
+ uint64_t pte, *ptep;
+
+ if (level < 0)
+ return;
+
+ for (pte = page; pte < page + ptrs_per_pte(vm) * 8; pte += 8) {
+ ptep = addr_gpa2hva(vm, pte);
+ if (!*ptep)
+ continue;
+ fprintf(stream, "%*s%s: %lx: %lx at %p\n", indent, "",
+ type[level], pte, *ptep, ptep);
+ pte_dump(stream, vm, indent + 1,
+ pte_addr(vm, *ptep), level - 1);
+ }
+#endif
+}
+
+void virt_arch_dump(FILE *stream, struct kvm_vm *vm, uint8_t indent)
+{
+ int level = vm->pgtable_levels - 1;
+ uint64_t pgd, *ptep;
+
+ if (!vm->pgd_created)
+ return;
+
+ for (pgd = vm->pgd; pgd < vm->pgd + ptrs_per_pte(vm) * 8; pgd += 8) {
+ ptep = addr_gpa2hva(vm, pgd);
+ if (!*ptep)
+ continue;
+ fprintf(stream, "%*spgd: %lx: %lx at %p\n", indent, "",
+ pgd, *ptep, ptep);
+ pte_dump(stream, vm, indent + 1,
+ pte_addr(vm, *ptep), level - 1);
+ }
+}
+
+void riscv_vcpu_mmu_setup(struct kvm_vcpu *vcpu)
+{
+ struct kvm_vm *vm = vcpu->vm;
+ unsigned long satp;
+
+ /*
+ * The RISC-V Sv48 MMU mode supports 56-bit physical address
+ * for 48-bit virtual address with 4KB last level page size.
+ */
+ switch (vm->mode) {
+ case VM_MODE_P52V48_4K:
+ case VM_MODE_P48V48_4K:
+ case VM_MODE_P40V48_4K:
+ break;
+ default:
+ TEST_FAIL("Unknown guest mode, mode: 0x%x", vm->mode);
+ }
+
+ satp = (vm->pgd >> PGTBL_PAGE_SIZE_SHIFT) & SATP_PPN;
+ satp |= SATP_MODE_48;
+
+ vcpu_set_reg(vcpu, RISCV_GENERAL_CSR_REG(satp), satp);
+}
+
+void vcpu_arch_dump(FILE *stream, struct kvm_vcpu *vcpu, uint8_t indent)
+{
+ struct kvm_riscv_core core;
+
+ vcpu_get_reg(vcpu, RISCV_CORE_REG(mode), &core.mode);
+ vcpu_get_reg(vcpu, RISCV_CORE_REG(regs.pc), &core.regs.pc);
+ vcpu_get_reg(vcpu, RISCV_CORE_REG(regs.ra), &core.regs.ra);
+ vcpu_get_reg(vcpu, RISCV_CORE_REG(regs.sp), &core.regs.sp);
+ vcpu_get_reg(vcpu, RISCV_CORE_REG(regs.gp), &core.regs.gp);
+ vcpu_get_reg(vcpu, RISCV_CORE_REG(regs.tp), &core.regs.tp);
+ vcpu_get_reg(vcpu, RISCV_CORE_REG(regs.t0), &core.regs.t0);
+ vcpu_get_reg(vcpu, RISCV_CORE_REG(regs.t1), &core.regs.t1);
+ vcpu_get_reg(vcpu, RISCV_CORE_REG(regs.t2), &core.regs.t2);
+ vcpu_get_reg(vcpu, RISCV_CORE_REG(regs.s0), &core.regs.s0);
+ vcpu_get_reg(vcpu, RISCV_CORE_REG(regs.s1), &core.regs.s1);
+ vcpu_get_reg(vcpu, RISCV_CORE_REG(regs.a0), &core.regs.a0);
+ vcpu_get_reg(vcpu, RISCV_CORE_REG(regs.a1), &core.regs.a1);
+ vcpu_get_reg(vcpu, RISCV_CORE_REG(regs.a2), &core.regs.a2);
+ vcpu_get_reg(vcpu, RISCV_CORE_REG(regs.a3), &core.regs.a3);
+ vcpu_get_reg(vcpu, RISCV_CORE_REG(regs.a4), &core.regs.a4);
+ vcpu_get_reg(vcpu, RISCV_CORE_REG(regs.a5), &core.regs.a5);
+ vcpu_get_reg(vcpu, RISCV_CORE_REG(regs.a6), &core.regs.a6);
+ vcpu_get_reg(vcpu, RISCV_CORE_REG(regs.a7), &core.regs.a7);
+ vcpu_get_reg(vcpu, RISCV_CORE_REG(regs.s2), &core.regs.s2);
+ vcpu_get_reg(vcpu, RISCV_CORE_REG(regs.s3), &core.regs.s3);
+ vcpu_get_reg(vcpu, RISCV_CORE_REG(regs.s4), &core.regs.s4);
+ vcpu_get_reg(vcpu, RISCV_CORE_REG(regs.s5), &core.regs.s5);
+ vcpu_get_reg(vcpu, RISCV_CORE_REG(regs.s6), &core.regs.s6);
+ vcpu_get_reg(vcpu, RISCV_CORE_REG(regs.s7), &core.regs.s7);
+ vcpu_get_reg(vcpu, RISCV_CORE_REG(regs.s8), &core.regs.s8);
+ vcpu_get_reg(vcpu, RISCV_CORE_REG(regs.s9), &core.regs.s9);
+ vcpu_get_reg(vcpu, RISCV_CORE_REG(regs.s10), &core.regs.s10);
+ vcpu_get_reg(vcpu, RISCV_CORE_REG(regs.s11), &core.regs.s11);
+ vcpu_get_reg(vcpu, RISCV_CORE_REG(regs.t3), &core.regs.t3);
+ vcpu_get_reg(vcpu, RISCV_CORE_REG(regs.t4), &core.regs.t4);
+ vcpu_get_reg(vcpu, RISCV_CORE_REG(regs.t5), &core.regs.t5);
+ vcpu_get_reg(vcpu, RISCV_CORE_REG(regs.t6), &core.regs.t6);
+
+ fprintf(stream,
+ " MODE: 0x%lx\n", core.mode);
+ fprintf(stream,
+ " PC: 0x%016lx RA: 0x%016lx SP: 0x%016lx GP: 0x%016lx\n",
+ core.regs.pc, core.regs.ra, core.regs.sp, core.regs.gp);
+ fprintf(stream,
+ " TP: 0x%016lx T0: 0x%016lx T1: 0x%016lx T2: 0x%016lx\n",
+ core.regs.tp, core.regs.t0, core.regs.t1, core.regs.t2);
+ fprintf(stream,
+ " S0: 0x%016lx S1: 0x%016lx A0: 0x%016lx A1: 0x%016lx\n",
+ core.regs.s0, core.regs.s1, core.regs.a0, core.regs.a1);
+ fprintf(stream,
+ " A2: 0x%016lx A3: 0x%016lx A4: 0x%016lx A5: 0x%016lx\n",
+ core.regs.a2, core.regs.a3, core.regs.a4, core.regs.a5);
+ fprintf(stream,
+ " A6: 0x%016lx A7: 0x%016lx S2: 0x%016lx S3: 0x%016lx\n",
+ core.regs.a6, core.regs.a7, core.regs.s2, core.regs.s3);
+ fprintf(stream,
+ " S4: 0x%016lx S5: 0x%016lx S6: 0x%016lx S7: 0x%016lx\n",
+ core.regs.s4, core.regs.s5, core.regs.s6, core.regs.s7);
+ fprintf(stream,
+ " S8: 0x%016lx S9: 0x%016lx S10: 0x%016lx S11: 0x%016lx\n",
+ core.regs.s8, core.regs.s9, core.regs.s10, core.regs.s11);
+ fprintf(stream,
+ " T3: 0x%016lx T4: 0x%016lx T5: 0x%016lx T6: 0x%016lx\n",
+ core.regs.t3, core.regs.t4, core.regs.t5, core.regs.t6);
+}
+
+static void __aligned(16) guest_unexp_trap(void)
+{
+ sbi_ecall(KVM_RISCV_SELFTESTS_SBI_EXT,
+ KVM_RISCV_SELFTESTS_SBI_UNEXP,
+ 0, 0, 0, 0, 0, 0);
+}
+
+void vcpu_arch_set_entry_point(struct kvm_vcpu *vcpu, void *guest_code)
+{
+ vcpu_set_reg(vcpu, RISCV_CORE_REG(regs.pc), (unsigned long)guest_code);
+}
+
+struct kvm_vcpu *vm_arch_vcpu_add(struct kvm_vm *vm, uint32_t vcpu_id)
+{
+ int r;
+ size_t stack_size;
+ unsigned long stack_vaddr;
+ unsigned long current_gp = 0;
+ struct kvm_mp_state mps;
+ struct kvm_vcpu *vcpu;
+
+ stack_size = vm->page_size == 4096 ? DEFAULT_STACK_PGS * vm->page_size :
+ vm->page_size;
+ stack_vaddr = __vm_vaddr_alloc(vm, stack_size,
+ DEFAULT_RISCV_GUEST_STACK_VADDR_MIN,
+ MEM_REGION_DATA);
+
+ vcpu = __vm_vcpu_add(vm, vcpu_id);
+ riscv_vcpu_mmu_setup(vcpu);
+
+ /*
+ * With SBI HSM support in KVM RISC-V, all secondary VCPUs are
+ * powered-off by default so we ensure that all secondary VCPUs
+ * are powered-on using KVM_SET_MP_STATE ioctl().
+ */
+ mps.mp_state = KVM_MP_STATE_RUNNABLE;
+ r = __vcpu_ioctl(vcpu, KVM_SET_MP_STATE, &mps);
+ TEST_ASSERT(!r, "IOCTL KVM_SET_MP_STATE failed (error %d)", r);
+
+ /* Setup global pointer of guest to be same as the host */
+ asm volatile (
+ "add %0, gp, zero" : "=r" (current_gp) : : "memory");
+ vcpu_set_reg(vcpu, RISCV_CORE_REG(regs.gp), current_gp);
+
+ /* Setup stack pointer and program counter of guest */
+ vcpu_set_reg(vcpu, RISCV_CORE_REG(regs.sp), stack_vaddr + stack_size);
+
+ /* Setup sscratch for guest_get_vcpuid() */
+ vcpu_set_reg(vcpu, RISCV_GENERAL_CSR_REG(sscratch), vcpu_id);
+
+ /* Setup default exception vector of guest */
+ vcpu_set_reg(vcpu, RISCV_GENERAL_CSR_REG(stvec), (unsigned long)guest_unexp_trap);
+
+ return vcpu;
+}
+
+void vcpu_args_set(struct kvm_vcpu *vcpu, unsigned int num, ...)
+{
+ va_list ap;
+ uint64_t id = RISCV_CORE_REG(regs.a0);
+ int i;
+
+ TEST_ASSERT(num >= 1 && num <= 8, "Unsupported number of args,\n"
+ " num: %u", num);
+
+ va_start(ap, num);
+
+ for (i = 0; i < num; i++) {
+ switch (i) {
+ case 0:
+ id = RISCV_CORE_REG(regs.a0);
+ break;
+ case 1:
+ id = RISCV_CORE_REG(regs.a1);
+ break;
+ case 2:
+ id = RISCV_CORE_REG(regs.a2);
+ break;
+ case 3:
+ id = RISCV_CORE_REG(regs.a3);
+ break;
+ case 4:
+ id = RISCV_CORE_REG(regs.a4);
+ break;
+ case 5:
+ id = RISCV_CORE_REG(regs.a5);
+ break;
+ case 6:
+ id = RISCV_CORE_REG(regs.a6);
+ break;
+ case 7:
+ id = RISCV_CORE_REG(regs.a7);
+ break;
+ }
+ vcpu_set_reg(vcpu, id, va_arg(ap, uint64_t));
+ }
+
+ va_end(ap);
+}
+
+void kvm_exit_unexpected_exception(int vector, int ec)
+{
+ ucall(UCALL_UNHANDLED, 2, vector, ec);
+}
+
+void assert_on_unhandled_exception(struct kvm_vcpu *vcpu)
+{
+ struct ucall uc;
+
+ if (get_ucall(vcpu, &uc) == UCALL_UNHANDLED) {
+ TEST_FAIL("Unexpected exception (vector:0x%lx, ec:0x%lx)",
+ uc.args[0], uc.args[1]);
+ }
+}
+
+struct handlers {
+ exception_handler_fn exception_handlers[NR_VECTORS][NR_EXCEPTIONS];
+};
+
+void route_exception(struct ex_regs *regs)
+{
+ struct handlers *handlers = (struct handlers *)exception_handlers;
+ int vector = 0, ec;
+
+ ec = regs->cause & ~CAUSE_IRQ_FLAG;
+ if (ec >= NR_EXCEPTIONS)
+ goto unexpected_exception;
+
+ /* Use the same handler for all the interrupts */
+ if (regs->cause & CAUSE_IRQ_FLAG) {
+ vector = 1;
+ ec = 0;
+ }
+
+ if (handlers && handlers->exception_handlers[vector][ec])
+ return handlers->exception_handlers[vector][ec](regs);
+
+unexpected_exception:
+ return kvm_exit_unexpected_exception(vector, ec);
+}
+
+void vcpu_init_vector_tables(struct kvm_vcpu *vcpu)
+{
+ extern char exception_vectors;
+
+ vcpu_set_reg(vcpu, RISCV_GENERAL_CSR_REG(stvec), (unsigned long)&exception_vectors);
+}
+
+void vm_init_vector_tables(struct kvm_vm *vm)
+{
+ vm->handlers = __vm_vaddr_alloc(vm, sizeof(struct handlers),
+ vm->page_size, MEM_REGION_DATA);
+
+ *(vm_vaddr_t *)addr_gva2hva(vm, (vm_vaddr_t)(&exception_handlers)) = vm->handlers;
+}
+
+void vm_install_exception_handler(struct kvm_vm *vm, int vector, exception_handler_fn handler)
+{
+ struct handlers *handlers = addr_gva2hva(vm, vm->handlers);
+
+ assert(vector < NR_EXCEPTIONS);
+ handlers->exception_handlers[0][vector] = handler;
+}
+
+void vm_install_interrupt_handler(struct kvm_vm *vm, exception_handler_fn handler)
+{
+ struct handlers *handlers = addr_gva2hva(vm, vm->handlers);
+
+ handlers->exception_handlers[1][0] = handler;
+}
+
+uint32_t guest_get_vcpuid(void)
+{
+ return csr_read(CSR_SSCRATCH);
+}
+
+struct sbiret sbi_ecall(int ext, int fid, unsigned long arg0,
+ unsigned long arg1, unsigned long arg2,
+ unsigned long arg3, unsigned long arg4,
+ unsigned long arg5)
+{
+ register uintptr_t a0 asm ("a0") = (uintptr_t)(arg0);
+ register uintptr_t a1 asm ("a1") = (uintptr_t)(arg1);
+ register uintptr_t a2 asm ("a2") = (uintptr_t)(arg2);
+ register uintptr_t a3 asm ("a3") = (uintptr_t)(arg3);
+ register uintptr_t a4 asm ("a4") = (uintptr_t)(arg4);
+ register uintptr_t a5 asm ("a5") = (uintptr_t)(arg5);
+ register uintptr_t a6 asm ("a6") = (uintptr_t)(fid);
+ register uintptr_t a7 asm ("a7") = (uintptr_t)(ext);
+ struct sbiret ret;
+
+ asm volatile (
+ "ecall"
+ : "+r" (a0), "+r" (a1)
+ : "r" (a2), "r" (a3), "r" (a4), "r" (a5), "r" (a6), "r" (a7)
+ : "memory");
+ ret.error = a0;
+ ret.value = a1;
+
+ return ret;
+}
+
+bool guest_sbi_probe_extension(int extid, long *out_val)
+{
+ struct sbiret ret;
+
+ ret = sbi_ecall(SBI_EXT_BASE, SBI_EXT_BASE_PROBE_EXT, extid,
+ 0, 0, 0, 0, 0);
+
+ __GUEST_ASSERT(!ret.error || ret.error == SBI_ERR_NOT_SUPPORTED,
+ "ret.error=%ld, ret.value=%ld\n", ret.error, ret.value);
+
+ if (ret.error == SBI_ERR_NOT_SUPPORTED)
+ return false;
+
+ if (out_val)
+ *out_val = ret.value;
+
+ return true;
+}
diff --git a/tools/testing/selftests/kvm/lib/riscv/ucall.c b/tools/testing/selftests/kvm/lib/riscv/ucall.c
new file mode 100644
index 000000000000..14ee17151a59
--- /dev/null
+++ b/tools/testing/selftests/kvm/lib/riscv/ucall.c
@@ -0,0 +1,31 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * ucall support. A ucall is a "hypercall to userspace".
+ *
+ * Copyright (C) 2021 Western Digital Corporation or its affiliates.
+ */
+
+#include <linux/kvm.h>
+
+#include "kvm_util.h"
+#include "processor.h"
+
+void *ucall_arch_get_ucall(struct kvm_vcpu *vcpu)
+{
+ struct kvm_run *run = vcpu->run;
+
+ if (run->exit_reason == KVM_EXIT_RISCV_SBI &&
+ run->riscv_sbi.extension_id == KVM_RISCV_SELFTESTS_SBI_EXT) {
+ switch (run->riscv_sbi.function_id) {
+ case KVM_RISCV_SELFTESTS_SBI_UCALL:
+ return (void *)run->riscv_sbi.args[0];
+ case KVM_RISCV_SELFTESTS_SBI_UNEXP:
+ vcpu_dump(stderr, vcpu, 2);
+ TEST_ASSERT(0, "Unexpected trap taken by guest");
+ break;
+ default:
+ break;
+ }
+ }
+ return NULL;
+}
diff --git a/tools/testing/selftests/kvm/lib/s390x/diag318_test_handler.c b/tools/testing/selftests/kvm/lib/s390x/diag318_test_handler.c
new file mode 100644
index 000000000000..2c432fa164f1
--- /dev/null
+++ b/tools/testing/selftests/kvm/lib/s390x/diag318_test_handler.c
@@ -0,0 +1,80 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * Test handler for the s390x DIAGNOSE 0x0318 instruction.
+ *
+ * Copyright (C) 2020, IBM
+ */
+
+#include "test_util.h"
+#include "kvm_util.h"
+
+#define ICPT_INSTRUCTION 0x04
+#define IPA0_DIAG 0x8300
+
+static void guest_code(void)
+{
+ uint64_t diag318_info = 0x12345678;
+
+ asm volatile ("diag %0,0,0x318\n" : : "d" (diag318_info));
+}
+
+/*
+ * The DIAGNOSE 0x0318 instruction call must be handled via userspace. As such,
+ * we create an ad-hoc VM here to handle the instruction then extract the
+ * necessary data. It is up to the caller to decide what to do with that data.
+ */
+static uint64_t diag318_handler(void)
+{
+ struct kvm_vcpu *vcpu;
+ struct kvm_vm *vm;
+ struct kvm_run *run;
+ uint64_t reg;
+ uint64_t diag318_info;
+
+ vm = vm_create_with_one_vcpu(&vcpu, guest_code);
+ vcpu_run(vcpu);
+ run = vcpu->run;
+
+ TEST_ASSERT_KVM_EXIT_REASON(vcpu, KVM_EXIT_S390_SIEIC);
+ TEST_ASSERT(run->s390_sieic.icptcode == ICPT_INSTRUCTION,
+ "Unexpected intercept code: 0x%x", run->s390_sieic.icptcode);
+ TEST_ASSERT((run->s390_sieic.ipa & 0xff00) == IPA0_DIAG,
+ "Unexpected IPA0 code: 0x%x", (run->s390_sieic.ipa & 0xff00));
+
+ reg = (run->s390_sieic.ipa & 0x00f0) >> 4;
+ diag318_info = run->s.regs.gprs[reg];
+
+ TEST_ASSERT(diag318_info != 0, "DIAGNOSE 0x0318 info not set");
+
+ kvm_vm_free(vm);
+
+ return diag318_info;
+}
+
+uint64_t get_diag318_info(void)
+{
+ static uint64_t diag318_info;
+ static bool printed_skip;
+
+ /*
+ * If KVM does not support diag318, then return 0 to
+ * ensure tests do not break.
+ */
+ if (!kvm_has_cap(KVM_CAP_S390_DIAG318)) {
+ if (!printed_skip) {
+ fprintf(stdout, "KVM_CAP_S390_DIAG318 not supported. "
+ "Skipping diag318 test.\n");
+ printed_skip = true;
+ }
+ return 0;
+ }
+
+ /*
+ * If a test has previously requested the diag318 info,
+ * then don't bother spinning up a temporary VM again.
+ */
+ if (!diag318_info)
+ diag318_info = diag318_handler();
+
+ return diag318_info;
+}
diff --git a/tools/testing/selftests/kvm/lib/s390x/processor.c b/tools/testing/selftests/kvm/lib/s390x/processor.c
index a88c5d665725..4ad4492eea1d 100644
--- a/tools/testing/selftests/kvm/lib/s390x/processor.c
+++ b/tools/testing/selftests/kvm/lib/s390x/processor.c
@@ -5,17 +5,12 @@
* Copyright (C) 2019, Red Hat, Inc.
*/
-#define _GNU_SOURCE /* for program_invocation_name */
-
#include "processor.h"
#include "kvm_util.h"
-#include "../kvm_util_internal.h"
-
-#define KVM_GUEST_PAGE_TABLE_MIN_PADDR 0x180000
#define PAGES_PER_REGION 4
-void virt_pgd_alloc(struct kvm_vm *vm, uint32_t memslot)
+void virt_arch_pgd_alloc(struct kvm_vm *vm)
{
vm_paddr_t paddr;
@@ -26,7 +21,8 @@ void virt_pgd_alloc(struct kvm_vm *vm, uint32_t memslot)
return;
paddr = vm_phy_pages_alloc(vm, PAGES_PER_REGION,
- KVM_GUEST_PAGE_TABLE_MIN_PADDR, memslot);
+ KVM_GUEST_PAGE_TABLE_MIN_PADDR,
+ vm->memslots[MEM_REGION_PT]);
memset(addr_gpa2hva(vm, paddr), 0xff, PAGES_PER_REGION * vm->page_size);
vm->pgd = paddr;
@@ -38,12 +34,12 @@ void virt_pgd_alloc(struct kvm_vm *vm, uint32_t memslot)
* a page table (ri == 4). Returns a suitable region/segment table entry
* which points to the freshly allocated pages.
*/
-static uint64_t virt_alloc_region(struct kvm_vm *vm, int ri, uint32_t memslot)
+static uint64_t virt_alloc_region(struct kvm_vm *vm, int ri)
{
uint64_t taddr;
taddr = vm_phy_pages_alloc(vm, ri < 4 ? PAGES_PER_REGION : 1,
- KVM_GUEST_PAGE_TABLE_MIN_PADDR, memslot);
+ KVM_GUEST_PAGE_TABLE_MIN_PADDR, 0);
memset(addr_gpa2hva(vm, taddr), 0xff, PAGES_PER_REGION * vm->page_size);
return (taddr & REGION_ENTRY_ORIGIN)
@@ -51,8 +47,7 @@ static uint64_t virt_alloc_region(struct kvm_vm *vm, int ri, uint32_t memslot)
| ((ri < 4 ? (PAGES_PER_REGION - 1) : 0) & REGION_ENTRY_LENGTH);
}
-void virt_pg_map(struct kvm_vm *vm, uint64_t gva, uint64_t gpa,
- uint32_t memslot)
+void virt_arch_pg_map(struct kvm_vm *vm, uint64_t gva, uint64_t gpa)
{
int ri, idx;
uint64_t *entry;
@@ -79,7 +74,7 @@ void virt_pg_map(struct kvm_vm *vm, uint64_t gva, uint64_t gpa,
for (ri = 1; ri <= 4; ri++) {
idx = (gva >> (64 - 11 * ri)) & 0x7ffu;
if (entry[idx] & REGION_ENTRY_INVALID)
- entry[idx] = virt_alloc_region(vm, ri, memslot);
+ entry[idx] = virt_alloc_region(vm, ri);
entry = addr_gpa2hva(vm, entry[idx] & REGION_ENTRY_ORIGIN);
}
@@ -91,7 +86,7 @@ void virt_pg_map(struct kvm_vm *vm, uint64_t gva, uint64_t gpa,
entry[idx] = gpa;
}
-vm_paddr_t addr_gva2gpa(struct kvm_vm *vm, vm_vaddr_t gva)
+vm_paddr_t addr_arch_gva2gpa(struct kvm_vm *vm, vm_vaddr_t gva)
{
int ri, idx;
uint64_t *entry;
@@ -152,7 +147,7 @@ static void virt_dump_region(FILE *stream, struct kvm_vm *vm, uint8_t indent,
}
}
-void virt_dump(FILE *stream, struct kvm_vm *vm, uint8_t indent)
+void virt_arch_dump(FILE *stream, struct kvm_vm *vm, uint8_t indent)
{
if (!vm->pgd_created)
return;
@@ -160,84 +155,69 @@ void virt_dump(FILE *stream, struct kvm_vm *vm, uint8_t indent)
virt_dump_region(stream, vm, indent, vm->pgd);
}
-struct kvm_vm *vm_create_default(uint32_t vcpuid, uint64_t extra_mem_pages,
- void *guest_code)
+void vcpu_arch_set_entry_point(struct kvm_vcpu *vcpu, void *guest_code)
{
- /*
- * The additional amount of pages required for the page tables is:
- * 1 * n / 256 + 4 * (n / 256) / 2048 + 4 * (n / 256) / 2048^2 + ...
- * which is definitely smaller than (n / 256) * 2.
- */
- uint64_t extra_pg_pages = extra_mem_pages / 256 * 2;
- struct kvm_vm *vm;
-
- vm = vm_create(VM_MODE_DEFAULT,
- DEFAULT_GUEST_PHY_PAGES + extra_pg_pages, O_RDWR);
-
- kvm_vm_elf_load(vm, program_invocation_name, 0, 0);
- vm_vcpu_add_default(vm, vcpuid, guest_code);
-
- return vm;
+ vcpu->run->psw_addr = (uintptr_t)guest_code;
}
-void vm_vcpu_add_default(struct kvm_vm *vm, uint32_t vcpuid, void *guest_code)
+struct kvm_vcpu *vm_arch_vcpu_add(struct kvm_vm *vm, uint32_t vcpu_id)
{
size_t stack_size = DEFAULT_STACK_PGS * getpagesize();
uint64_t stack_vaddr;
struct kvm_regs regs;
struct kvm_sregs sregs;
- struct kvm_run *run;
+ struct kvm_vcpu *vcpu;
TEST_ASSERT(vm->page_size == 4096, "Unsupported page size: 0x%x",
vm->page_size);
- stack_vaddr = vm_vaddr_alloc(vm, stack_size,
- DEFAULT_GUEST_STACK_VADDR_MIN, 0, 0);
+ stack_vaddr = __vm_vaddr_alloc(vm, stack_size,
+ DEFAULT_GUEST_STACK_VADDR_MIN,
+ MEM_REGION_DATA);
- vm_vcpu_add(vm, vcpuid);
+ vcpu = __vm_vcpu_add(vm, vcpu_id);
/* Setup guest registers */
- vcpu_regs_get(vm, vcpuid, &regs);
+ vcpu_regs_get(vcpu, &regs);
regs.gprs[15] = stack_vaddr + (DEFAULT_STACK_PGS * getpagesize()) - 160;
- vcpu_regs_set(vm, vcpuid, &regs);
+ vcpu_regs_set(vcpu, &regs);
- vcpu_sregs_get(vm, vcpuid, &sregs);
+ vcpu_sregs_get(vcpu, &sregs);
sregs.crs[0] |= 0x00040000; /* Enable floating point regs */
sregs.crs[1] = vm->pgd | 0xf; /* Primary region table */
- vcpu_sregs_set(vm, vcpuid, &sregs);
+ vcpu_sregs_set(vcpu, &sregs);
+
+ vcpu->run->psw_mask = 0x0400000180000000ULL; /* DAT enabled + 64 bit mode */
- run = vcpu_state(vm, vcpuid);
- run->psw_mask = 0x0400000180000000ULL; /* DAT enabled + 64 bit mode */
- run->psw_addr = (uintptr_t)guest_code;
+ return vcpu;
}
-void vcpu_args_set(struct kvm_vm *vm, uint32_t vcpuid, unsigned int num, ...)
+void vcpu_args_set(struct kvm_vcpu *vcpu, unsigned int num, ...)
{
va_list ap;
struct kvm_regs regs;
int i;
TEST_ASSERT(num >= 1 && num <= 5, "Unsupported number of args,\n"
- " num: %u\n",
+ " num: %u",
num);
va_start(ap, num);
- vcpu_regs_get(vm, vcpuid, &regs);
+ vcpu_regs_get(vcpu, &regs);
for (i = 0; i < num; i++)
regs.gprs[i + 2] = va_arg(ap, uint64_t);
- vcpu_regs_set(vm, vcpuid, &regs);
+ vcpu_regs_set(vcpu, &regs);
va_end(ap);
}
-void vcpu_dump(FILE *stream, struct kvm_vm *vm, uint32_t vcpuid, uint8_t indent)
+void vcpu_arch_dump(FILE *stream, struct kvm_vcpu *vcpu, uint8_t indent)
{
- struct vcpu *vcpu = vcpu_find(vm, vcpuid);
-
- if (!vcpu)
- return;
-
fprintf(stream, "%*spstate: psw: 0x%.16llx:0x%.16llx\n",
- indent, "", vcpu->state->psw_mask, vcpu->state->psw_addr);
+ indent, "", vcpu->run->psw_mask, vcpu->run->psw_addr);
+}
+
+void assert_on_unhandled_exception(struct kvm_vcpu *vcpu)
+{
}
diff --git a/tools/testing/selftests/kvm/lib/s390x/ucall.c b/tools/testing/selftests/kvm/lib/s390x/ucall.c
index fd589dc9bfab..cca98734653d 100644
--- a/tools/testing/selftests/kvm/lib/s390x/ucall.c
+++ b/tools/testing/selftests/kvm/lib/s390x/ucall.c
@@ -6,37 +6,9 @@
*/
#include "kvm_util.h"
-void ucall_init(struct kvm_vm *vm, void *arg)
+void *ucall_arch_get_ucall(struct kvm_vcpu *vcpu)
{
-}
-
-void ucall_uninit(struct kvm_vm *vm)
-{
-}
-
-void ucall(uint64_t cmd, int nargs, ...)
-{
- struct ucall uc = {
- .cmd = cmd,
- };
- va_list va;
- int i;
-
- nargs = nargs <= UCALL_MAX_ARGS ? nargs : UCALL_MAX_ARGS;
-
- va_start(va, nargs);
- for (i = 0; i < nargs; ++i)
- uc.args[i] = va_arg(va, uint64_t);
- va_end(va);
-
- /* Exit via DIAGNOSE 0x501 (normally used for breakpoints) */
- asm volatile ("diag 0,%0,0x501" : : "a"(&uc) : "memory");
-}
-
-uint64_t get_ucall(struct kvm_vm *vm, uint32_t vcpu_id, struct ucall *uc)
-{
- struct kvm_run *run = vcpu_state(vm, vcpu_id);
- struct ucall ucall = {};
+ struct kvm_run *run = vcpu->run;
if (run->exit_reason == KVM_EXIT_S390_SIEIC &&
run->s390_sieic.icptcode == 4 &&
@@ -44,13 +16,7 @@ uint64_t get_ucall(struct kvm_vm *vm, uint32_t vcpu_id, struct ucall *uc)
(run->s390_sieic.ipb >> 16) == 0x501) {
int reg = run->s390_sieic.ipa & 0xf;
- memcpy(&ucall, addr_gva2hva(vm, run->s.regs.gprs[reg]),
- sizeof(ucall));
-
- vcpu_run_complete_io(vm, vcpu_id);
- if (uc)
- memcpy(uc, &ucall, sizeof(ucall));
+ return (void *)run->s.regs.gprs[reg];
}
-
- return ucall.cmd;
+ return NULL;
}
diff --git a/tools/testing/selftests/kvm/lib/sparsebit.c b/tools/testing/selftests/kvm/lib/sparsebit.c
index 031ba3c932ed..cfed9d26cc71 100644
--- a/tools/testing/selftests/kvm/lib/sparsebit.c
+++ b/tools/testing/selftests/kvm/lib/sparsebit.c
@@ -202,7 +202,7 @@ static sparsebit_num_t node_num_set(struct node *nodep)
/* Returns a pointer to the node that describes the
* lowest bit index.
*/
-static struct node *node_first(struct sparsebit *s)
+static struct node *node_first(const struct sparsebit *s)
{
struct node *nodep;
@@ -216,7 +216,7 @@ static struct node *node_first(struct sparsebit *s)
* lowest bit index > the index of the node pointed to by np.
* Returns NULL if no node with a higher index exists.
*/
-static struct node *node_next(struct sparsebit *s, struct node *np)
+static struct node *node_next(const struct sparsebit *s, struct node *np)
{
struct node *nodep = np;
@@ -244,7 +244,7 @@ static struct node *node_next(struct sparsebit *s, struct node *np)
* highest index < the index of the node pointed to by np.
* Returns NULL if no node with a lower index exists.
*/
-static struct node *node_prev(struct sparsebit *s, struct node *np)
+static struct node *node_prev(const struct sparsebit *s, struct node *np)
{
struct node *nodep = np;
@@ -273,7 +273,7 @@ static struct node *node_prev(struct sparsebit *s, struct node *np)
* subtree and duplicates the bit settings to the newly allocated nodes.
* Returns the newly allocated copy of subtree.
*/
-static struct node *node_copy_subtree(struct node *subtree)
+static struct node *node_copy_subtree(const struct node *subtree)
{
struct node *root;
@@ -307,7 +307,7 @@ static struct node *node_copy_subtree(struct node *subtree)
* index is within the bits described by the mask bits or the number of
* contiguous bits set after the mask. Returns NULL if there is no such node.
*/
-static struct node *node_find(struct sparsebit *s, sparsebit_idx_t idx)
+static struct node *node_find(const struct sparsebit *s, sparsebit_idx_t idx)
{
struct node *nodep;
@@ -393,7 +393,7 @@ static struct node *node_add(struct sparsebit *s, sparsebit_idx_t idx)
}
/* Returns whether all the bits in the sparsebit array are set. */
-bool sparsebit_all_set(struct sparsebit *s)
+bool sparsebit_all_set(const struct sparsebit *s)
{
/*
* If any nodes there must be at least one bit set. Only case
@@ -634,7 +634,6 @@ static void node_reduce(struct sparsebit *s, struct node *nodep)
tmp = node_prev(s, nodep);
node_rm(s, nodep);
- nodep = NULL;
nodep = tmp;
reduction_performed = true;
@@ -776,7 +775,7 @@ static void node_reduce(struct sparsebit *s, struct node *nodep)
/* Returns whether the bit at the index given by idx, within the
* sparsebit array is set or not.
*/
-bool sparsebit_is_set(struct sparsebit *s, sparsebit_idx_t idx)
+bool sparsebit_is_set(const struct sparsebit *s, sparsebit_idx_t idx)
{
struct node *nodep;
@@ -922,7 +921,7 @@ static inline sparsebit_idx_t node_first_clear(struct node *nodep, int start)
* used by test cases after they detect an unexpected condition, as a means
* to capture diagnostic information.
*/
-static void sparsebit_dump_internal(FILE *stream, struct sparsebit *s,
+static void sparsebit_dump_internal(FILE *stream, const struct sparsebit *s,
unsigned int indent)
{
/* Dump the contents of s */
@@ -970,7 +969,7 @@ void sparsebit_free(struct sparsebit **sbitp)
* sparsebit_alloc(). It can though already have bits set, which
* if different from src will be cleared.
*/
-void sparsebit_copy(struct sparsebit *d, struct sparsebit *s)
+void sparsebit_copy(struct sparsebit *d, const struct sparsebit *s)
{
/* First clear any bits already set in the destination */
sparsebit_clear_all(d);
@@ -982,7 +981,7 @@ void sparsebit_copy(struct sparsebit *d, struct sparsebit *s)
}
/* Returns whether num consecutive bits starting at idx are all set. */
-bool sparsebit_is_set_num(struct sparsebit *s,
+bool sparsebit_is_set_num(const struct sparsebit *s,
sparsebit_idx_t idx, sparsebit_num_t num)
{
sparsebit_idx_t next_cleared;
@@ -1006,14 +1005,14 @@ bool sparsebit_is_set_num(struct sparsebit *s,
}
/* Returns whether the bit at the index given by idx. */
-bool sparsebit_is_clear(struct sparsebit *s,
+bool sparsebit_is_clear(const struct sparsebit *s,
sparsebit_idx_t idx)
{
return !sparsebit_is_set(s, idx);
}
/* Returns whether num consecutive bits starting at idx are all cleared. */
-bool sparsebit_is_clear_num(struct sparsebit *s,
+bool sparsebit_is_clear_num(const struct sparsebit *s,
sparsebit_idx_t idx, sparsebit_num_t num)
{
sparsebit_idx_t next_set;
@@ -1042,13 +1041,13 @@ bool sparsebit_is_clear_num(struct sparsebit *s,
* value. Use sparsebit_any_set(), instead of sparsebit_num_set() > 0,
* to determine if the sparsebit array has any bits set.
*/
-sparsebit_num_t sparsebit_num_set(struct sparsebit *s)
+sparsebit_num_t sparsebit_num_set(const struct sparsebit *s)
{
return s->num_set;
}
/* Returns whether any bit is set in the sparsebit array. */
-bool sparsebit_any_set(struct sparsebit *s)
+bool sparsebit_any_set(const struct sparsebit *s)
{
/*
* Nodes only describe set bits. If any nodes then there
@@ -1071,20 +1070,20 @@ bool sparsebit_any_set(struct sparsebit *s)
}
/* Returns whether all the bits in the sparsebit array are cleared. */
-bool sparsebit_all_clear(struct sparsebit *s)
+bool sparsebit_all_clear(const struct sparsebit *s)
{
return !sparsebit_any_set(s);
}
/* Returns whether all the bits in the sparsebit array are set. */
-bool sparsebit_any_clear(struct sparsebit *s)
+bool sparsebit_any_clear(const struct sparsebit *s)
{
return !sparsebit_all_set(s);
}
/* Returns the index of the first set bit. Abort if no bits are set.
*/
-sparsebit_idx_t sparsebit_first_set(struct sparsebit *s)
+sparsebit_idx_t sparsebit_first_set(const struct sparsebit *s)
{
struct node *nodep;
@@ -1098,7 +1097,7 @@ sparsebit_idx_t sparsebit_first_set(struct sparsebit *s)
/* Returns the index of the first cleared bit. Abort if
* no bits are cleared.
*/
-sparsebit_idx_t sparsebit_first_clear(struct sparsebit *s)
+sparsebit_idx_t sparsebit_first_clear(const struct sparsebit *s)
{
struct node *nodep1, *nodep2;
@@ -1152,7 +1151,7 @@ sparsebit_idx_t sparsebit_first_clear(struct sparsebit *s)
/* Returns index of next bit set within s after the index given by prev.
* Returns 0 if there are no bits after prev that are set.
*/
-sparsebit_idx_t sparsebit_next_set(struct sparsebit *s,
+sparsebit_idx_t sparsebit_next_set(const struct sparsebit *s,
sparsebit_idx_t prev)
{
sparsebit_idx_t lowest_possible = prev + 1;
@@ -1245,7 +1244,7 @@ sparsebit_idx_t sparsebit_next_set(struct sparsebit *s,
/* Returns index of next bit cleared within s after the index given by prev.
* Returns 0 if there are no bits after prev that are cleared.
*/
-sparsebit_idx_t sparsebit_next_clear(struct sparsebit *s,
+sparsebit_idx_t sparsebit_next_clear(const struct sparsebit *s,
sparsebit_idx_t prev)
{
sparsebit_idx_t lowest_possible = prev + 1;
@@ -1301,7 +1300,7 @@ sparsebit_idx_t sparsebit_next_clear(struct sparsebit *s,
* and returns the index of the first sequence of num consecutively set
* bits. Returns a value of 0 of no such sequence exists.
*/
-sparsebit_idx_t sparsebit_next_set_num(struct sparsebit *s,
+sparsebit_idx_t sparsebit_next_set_num(const struct sparsebit *s,
sparsebit_idx_t start, sparsebit_num_t num)
{
sparsebit_idx_t idx;
@@ -1336,7 +1335,7 @@ sparsebit_idx_t sparsebit_next_set_num(struct sparsebit *s,
* and returns the index of the first sequence of num consecutively cleared
* bits. Returns a value of 0 of no such sequence exists.
*/
-sparsebit_idx_t sparsebit_next_clear_num(struct sparsebit *s,
+sparsebit_idx_t sparsebit_next_clear_num(const struct sparsebit *s,
sparsebit_idx_t start, sparsebit_num_t num)
{
sparsebit_idx_t idx;
@@ -1584,7 +1583,7 @@ static size_t display_range(FILE *stream, sparsebit_idx_t low,
* contiguous bits. This is done because '-' is used to specify command-line
* options, and sometimes ranges are specified as command-line arguments.
*/
-void sparsebit_dump(FILE *stream, struct sparsebit *s,
+void sparsebit_dump(FILE *stream, const struct sparsebit *s,
unsigned int indent)
{
size_t current_line_len = 0;
@@ -1682,7 +1681,7 @@ void sparsebit_dump(FILE *stream, struct sparsebit *s,
* s. On error, diagnostic information is printed to stderr and
* abort is called.
*/
-void sparsebit_validate_internal(struct sparsebit *s)
+void sparsebit_validate_internal(const struct sparsebit *s)
{
bool error_detected = false;
struct node *nodep, *prev = NULL;
@@ -1866,7 +1865,7 @@ void sparsebit_validate_internal(struct sparsebit *s)
* of total bits set.
*/
if (s->num_set != total_bits_set) {
- fprintf(stderr, "Number of bits set missmatch,\n"
+ fprintf(stderr, "Number of bits set mismatch,\n"
" s->num_set: 0x%lx total_bits_set: 0x%lx",
s->num_set, total_bits_set);
@@ -1890,7 +1889,6 @@ void sparsebit_validate_internal(struct sparsebit *s)
*/
#include <stdlib.h>
-#include <assert.h>
struct range {
sparsebit_idx_t first, last;
diff --git a/tools/testing/selftests/kvm/lib/string_override.c b/tools/testing/selftests/kvm/lib/string_override.c
new file mode 100644
index 000000000000..5d1c87277c49
--- /dev/null
+++ b/tools/testing/selftests/kvm/lib/string_override.c
@@ -0,0 +1,48 @@
+// SPDX-License-Identifier: GPL-2.0-only
+#include <stddef.h>
+
+/*
+ * Override the "basic" built-in string helpers so that they can be used in
+ * guest code. KVM selftests don't support dynamic loading in guest code and
+ * will jump into the weeds if the compiler decides to insert an out-of-line
+ * call via the PLT.
+ */
+int memcmp(const void *cs, const void *ct, size_t count)
+{
+ const unsigned char *su1, *su2;
+ int res = 0;
+
+ for (su1 = cs, su2 = ct; 0 < count; ++su1, ++su2, count--) {
+ if ((res = *su1 - *su2) != 0)
+ break;
+ }
+ return res;
+}
+
+void *memcpy(void *dest, const void *src, size_t count)
+{
+ char *tmp = dest;
+ const char *s = src;
+
+ while (count--)
+ *tmp++ = *s++;
+ return dest;
+}
+
+void *memset(void *s, int c, size_t count)
+{
+ char *xs = s;
+
+ while (count--)
+ *xs++ = c;
+ return s;
+}
+
+size_t strnlen(const char *s, size_t count)
+{
+ const char *sc;
+
+ for (sc = s; count-- && *sc != '\0'; ++sc)
+ /* nothing */;
+ return sc - s;
+}
diff --git a/tools/testing/selftests/kvm/lib/test_util.c b/tools/testing/selftests/kvm/lib/test_util.c
index 689e97c27ee2..5a8f8becb129 100644
--- a/tools/testing/selftests/kvm/lib/test_util.c
+++ b/tools/testing/selftests/kvm/lib/test_util.c
@@ -4,13 +4,40 @@
*
* Copyright (C) 2020, Google LLC.
*/
-#include <stdlib.h>
+
+#define _GNU_SOURCE
+#include <stdio.h>
+#include <stdarg.h>
+#include <assert.h>
#include <ctype.h>
#include <limits.h>
-#include <assert.h>
+#include <stdlib.h>
+#include <time.h>
+#include <sys/stat.h>
+#include <sys/syscall.h>
+#include <linux/mman.h>
+#include "linux/kernel.h"
+
#include "test_util.h"
/*
+ * Random number generator that is usable from guest code. This is the
+ * Park-Miller LCG using standard constants.
+ */
+
+struct guest_random_state new_guest_random_state(uint32_t seed)
+{
+ struct guest_random_state s = {.seed = seed};
+ return s;
+}
+
+uint32_t guest_random_u32(struct guest_random_state *state)
+{
+ state->seed = (uint64_t)state->seed * 48271 % ((uint32_t)(1 << 31) - 1);
+ return state->seed;
+}
+
+/*
* Parses "[0-9]+[kmgt]?".
*/
size_t parse_size(const char *size)
@@ -81,6 +108,21 @@ struct timespec timespec_sub(struct timespec ts1, struct timespec ts2)
return timespec_add_ns((struct timespec){0}, ns1 - ns2);
}
+struct timespec timespec_elapsed(struct timespec start)
+{
+ struct timespec end;
+
+ clock_gettime(CLOCK_MONOTONIC, &end);
+ return timespec_sub(end, start);
+}
+
+struct timespec timespec_div(struct timespec ts, int divisor)
+{
+ int64_t ns = timespec_to_ns(ts) / divisor;
+
+ return timespec_add_ns((struct timespec){0}, ns);
+}
+
void print_skip(const char *fmt, ...)
{
va_list ap;
@@ -91,3 +133,287 @@ void print_skip(const char *fmt, ...)
va_end(ap);
puts(", skipping test");
}
+
+bool thp_configured(void)
+{
+ int ret;
+ struct stat statbuf;
+
+ ret = stat("/sys/kernel/mm/transparent_hugepage", &statbuf);
+ TEST_ASSERT(ret == 0 || (ret == -1 && errno == ENOENT),
+ "Error in stating /sys/kernel/mm/transparent_hugepage");
+
+ return ret == 0;
+}
+
+size_t get_trans_hugepagesz(void)
+{
+ size_t size;
+ FILE *f;
+ int ret;
+
+ TEST_ASSERT(thp_configured(), "THP is not configured in host kernel");
+
+ f = fopen("/sys/kernel/mm/transparent_hugepage/hpage_pmd_size", "r");
+ TEST_ASSERT(f != NULL, "Error in opening transparent_hugepage/hpage_pmd_size");
+
+ ret = fscanf(f, "%ld", &size);
+ ret = fscanf(f, "%ld", &size);
+ TEST_ASSERT(ret < 1, "Error reading transparent_hugepage/hpage_pmd_size");
+ fclose(f);
+
+ return size;
+}
+
+size_t get_def_hugetlb_pagesz(void)
+{
+ char buf[64];
+ const char *hugepagesize = "Hugepagesize:";
+ const char *hugepages_total = "HugePages_Total:";
+ FILE *f;
+
+ f = fopen("/proc/meminfo", "r");
+ TEST_ASSERT(f != NULL, "Error in opening /proc/meminfo");
+
+ while (fgets(buf, sizeof(buf), f) != NULL) {
+ if (strstr(buf, hugepages_total) == buf) {
+ unsigned long long total = strtoull(buf + strlen(hugepages_total), NULL, 10);
+ if (!total) {
+ fprintf(stderr, "HUGETLB is not enabled in /proc/sys/vm/nr_hugepages\n");
+ exit(KSFT_SKIP);
+ }
+ }
+ if (strstr(buf, hugepagesize) == buf) {
+ fclose(f);
+ return strtoull(buf + strlen(hugepagesize), NULL, 10) << 10;
+ }
+ }
+
+ if (feof(f)) {
+ fprintf(stderr, "HUGETLB is not configured in host kernel");
+ exit(KSFT_SKIP);
+ }
+
+ TEST_FAIL("Error in reading /proc/meminfo");
+}
+
+#define ANON_FLAGS (MAP_PRIVATE | MAP_ANONYMOUS)
+#define ANON_HUGE_FLAGS (ANON_FLAGS | MAP_HUGETLB)
+
+const struct vm_mem_backing_src_alias *vm_mem_backing_src_alias(uint32_t i)
+{
+ static const struct vm_mem_backing_src_alias aliases[] = {
+ [VM_MEM_SRC_ANONYMOUS] = {
+ .name = "anonymous",
+ .flag = ANON_FLAGS,
+ },
+ [VM_MEM_SRC_ANONYMOUS_THP] = {
+ .name = "anonymous_thp",
+ .flag = ANON_FLAGS,
+ },
+ [VM_MEM_SRC_ANONYMOUS_HUGETLB] = {
+ .name = "anonymous_hugetlb",
+ .flag = ANON_HUGE_FLAGS,
+ },
+ [VM_MEM_SRC_ANONYMOUS_HUGETLB_16KB] = {
+ .name = "anonymous_hugetlb_16kb",
+ .flag = ANON_HUGE_FLAGS | MAP_HUGE_16KB,
+ },
+ [VM_MEM_SRC_ANONYMOUS_HUGETLB_64KB] = {
+ .name = "anonymous_hugetlb_64kb",
+ .flag = ANON_HUGE_FLAGS | MAP_HUGE_64KB,
+ },
+ [VM_MEM_SRC_ANONYMOUS_HUGETLB_512KB] = {
+ .name = "anonymous_hugetlb_512kb",
+ .flag = ANON_HUGE_FLAGS | MAP_HUGE_512KB,
+ },
+ [VM_MEM_SRC_ANONYMOUS_HUGETLB_1MB] = {
+ .name = "anonymous_hugetlb_1mb",
+ .flag = ANON_HUGE_FLAGS | MAP_HUGE_1MB,
+ },
+ [VM_MEM_SRC_ANONYMOUS_HUGETLB_2MB] = {
+ .name = "anonymous_hugetlb_2mb",
+ .flag = ANON_HUGE_FLAGS | MAP_HUGE_2MB,
+ },
+ [VM_MEM_SRC_ANONYMOUS_HUGETLB_8MB] = {
+ .name = "anonymous_hugetlb_8mb",
+ .flag = ANON_HUGE_FLAGS | MAP_HUGE_8MB,
+ },
+ [VM_MEM_SRC_ANONYMOUS_HUGETLB_16MB] = {
+ .name = "anonymous_hugetlb_16mb",
+ .flag = ANON_HUGE_FLAGS | MAP_HUGE_16MB,
+ },
+ [VM_MEM_SRC_ANONYMOUS_HUGETLB_32MB] = {
+ .name = "anonymous_hugetlb_32mb",
+ .flag = ANON_HUGE_FLAGS | MAP_HUGE_32MB,
+ },
+ [VM_MEM_SRC_ANONYMOUS_HUGETLB_256MB] = {
+ .name = "anonymous_hugetlb_256mb",
+ .flag = ANON_HUGE_FLAGS | MAP_HUGE_256MB,
+ },
+ [VM_MEM_SRC_ANONYMOUS_HUGETLB_512MB] = {
+ .name = "anonymous_hugetlb_512mb",
+ .flag = ANON_HUGE_FLAGS | MAP_HUGE_512MB,
+ },
+ [VM_MEM_SRC_ANONYMOUS_HUGETLB_1GB] = {
+ .name = "anonymous_hugetlb_1gb",
+ .flag = ANON_HUGE_FLAGS | MAP_HUGE_1GB,
+ },
+ [VM_MEM_SRC_ANONYMOUS_HUGETLB_2GB] = {
+ .name = "anonymous_hugetlb_2gb",
+ .flag = ANON_HUGE_FLAGS | MAP_HUGE_2GB,
+ },
+ [VM_MEM_SRC_ANONYMOUS_HUGETLB_16GB] = {
+ .name = "anonymous_hugetlb_16gb",
+ .flag = ANON_HUGE_FLAGS | MAP_HUGE_16GB,
+ },
+ [VM_MEM_SRC_SHMEM] = {
+ .name = "shmem",
+ .flag = MAP_SHARED,
+ },
+ [VM_MEM_SRC_SHARED_HUGETLB] = {
+ .name = "shared_hugetlb",
+ /*
+ * No MAP_HUGETLB, we use MFD_HUGETLB instead. Since
+ * we're using "file backed" memory, we need to specify
+ * this when the FD is created, not when the area is
+ * mapped.
+ */
+ .flag = MAP_SHARED,
+ },
+ };
+ _Static_assert(ARRAY_SIZE(aliases) == NUM_SRC_TYPES,
+ "Missing new backing src types?");
+
+ TEST_ASSERT(i < NUM_SRC_TYPES, "Backing src type ID %d too big", i);
+
+ return &aliases[i];
+}
+
+#define MAP_HUGE_PAGE_SIZE(x) (1ULL << ((x >> MAP_HUGE_SHIFT) & MAP_HUGE_MASK))
+
+size_t get_backing_src_pagesz(uint32_t i)
+{
+ uint32_t flag = vm_mem_backing_src_alias(i)->flag;
+
+ switch (i) {
+ case VM_MEM_SRC_ANONYMOUS:
+ case VM_MEM_SRC_SHMEM:
+ return getpagesize();
+ case VM_MEM_SRC_ANONYMOUS_THP:
+ return get_trans_hugepagesz();
+ case VM_MEM_SRC_ANONYMOUS_HUGETLB:
+ case VM_MEM_SRC_SHARED_HUGETLB:
+ return get_def_hugetlb_pagesz();
+ default:
+ return MAP_HUGE_PAGE_SIZE(flag);
+ }
+}
+
+bool is_backing_src_hugetlb(uint32_t i)
+{
+ return !!(vm_mem_backing_src_alias(i)->flag & MAP_HUGETLB);
+}
+
+static void print_available_backing_src_types(const char *prefix)
+{
+ int i;
+
+ printf("%sAvailable backing src types:\n", prefix);
+
+ for (i = 0; i < NUM_SRC_TYPES; i++)
+ printf("%s %s\n", prefix, vm_mem_backing_src_alias(i)->name);
+}
+
+void backing_src_help(const char *flag)
+{
+ printf(" %s: specify the type of memory that should be used to\n"
+ " back the guest data region. (default: %s)\n",
+ flag, vm_mem_backing_src_alias(DEFAULT_VM_MEM_SRC)->name);
+ print_available_backing_src_types(" ");
+}
+
+enum vm_mem_backing_src_type parse_backing_src_type(const char *type_name)
+{
+ int i;
+
+ for (i = 0; i < NUM_SRC_TYPES; i++)
+ if (!strcmp(type_name, vm_mem_backing_src_alias(i)->name))
+ return i;
+
+ print_available_backing_src_types("");
+ TEST_FAIL("Unknown backing src type: %s", type_name);
+ return -1;
+}
+
+long get_run_delay(void)
+{
+ char path[64];
+ long val[2];
+ FILE *fp;
+
+ sprintf(path, "/proc/%ld/schedstat", syscall(SYS_gettid));
+ fp = fopen(path, "r");
+ /* Return MIN_RUN_DELAY_NS upon failure just to be safe */
+ if (fscanf(fp, "%ld %ld ", &val[0], &val[1]) < 2)
+ val[1] = MIN_RUN_DELAY_NS;
+ fclose(fp);
+
+ return val[1];
+}
+
+int atoi_paranoid(const char *num_str)
+{
+ char *end_ptr;
+ long num;
+
+ errno = 0;
+ num = strtol(num_str, &end_ptr, 0);
+ TEST_ASSERT(!errno, "strtol(\"%s\") failed", num_str);
+ TEST_ASSERT(num_str != end_ptr,
+ "strtol(\"%s\") didn't find a valid integer.", num_str);
+ TEST_ASSERT(*end_ptr == '\0',
+ "strtol(\"%s\") failed to parse trailing characters \"%s\".",
+ num_str, end_ptr);
+ TEST_ASSERT(num >= INT_MIN && num <= INT_MAX,
+ "%ld not in range of [%d, %d]", num, INT_MIN, INT_MAX);
+
+ return num;
+}
+
+char *strdup_printf(const char *fmt, ...)
+{
+ va_list ap;
+ char *str;
+
+ va_start(ap, fmt);
+ TEST_ASSERT(vasprintf(&str, fmt, ap) >= 0, "vasprintf() failed");
+ va_end(ap);
+
+ return str;
+}
+
+#define CLOCKSOURCE_PATH "/sys/devices/system/clocksource/clocksource0/current_clocksource"
+
+char *sys_get_cur_clocksource(void)
+{
+ char *clk_name;
+ struct stat st;
+ FILE *fp;
+
+ fp = fopen(CLOCKSOURCE_PATH, "r");
+ TEST_ASSERT(fp, "failed to open clocksource file, errno: %d", errno);
+
+ TEST_ASSERT(!fstat(fileno(fp), &st), "failed to stat clocksource file, errno: %d",
+ errno);
+
+ clk_name = malloc(st.st_size);
+ TEST_ASSERT(clk_name, "failed to allocate buffer to read file");
+
+ TEST_ASSERT(fgets(clk_name, st.st_size, fp), "failed to read clocksource file: %d",
+ ferror(fp));
+
+ fclose(fp);
+
+ return clk_name;
+}
diff --git a/tools/testing/selftests/kvm/lib/ucall_common.c b/tools/testing/selftests/kvm/lib/ucall_common.c
new file mode 100644
index 000000000000..f5af65a41c29
--- /dev/null
+++ b/tools/testing/selftests/kvm/lib/ucall_common.c
@@ -0,0 +1,160 @@
+// SPDX-License-Identifier: GPL-2.0-only
+#include "kvm_util.h"
+#include "linux/types.h"
+#include "linux/bitmap.h"
+#include "linux/atomic.h"
+
+#define GUEST_UCALL_FAILED -1
+
+struct ucall_header {
+ DECLARE_BITMAP(in_use, KVM_MAX_VCPUS);
+ struct ucall ucalls[KVM_MAX_VCPUS];
+};
+
+int ucall_nr_pages_required(uint64_t page_size)
+{
+ return align_up(sizeof(struct ucall_header), page_size) / page_size;
+}
+
+/*
+ * ucall_pool holds per-VM values (global data is duplicated by each VM), it
+ * must not be accessed from host code.
+ */
+static struct ucall_header *ucall_pool;
+
+void ucall_init(struct kvm_vm *vm, vm_paddr_t mmio_gpa)
+{
+ struct ucall_header *hdr;
+ struct ucall *uc;
+ vm_vaddr_t vaddr;
+ int i;
+
+ vaddr = vm_vaddr_alloc_shared(vm, sizeof(*hdr), KVM_UTIL_MIN_VADDR,
+ MEM_REGION_DATA);
+ hdr = (struct ucall_header *)addr_gva2hva(vm, vaddr);
+ memset(hdr, 0, sizeof(*hdr));
+
+ for (i = 0; i < KVM_MAX_VCPUS; ++i) {
+ uc = &hdr->ucalls[i];
+ uc->hva = uc;
+ }
+
+ write_guest_global(vm, ucall_pool, (struct ucall_header *)vaddr);
+
+ ucall_arch_init(vm, mmio_gpa);
+}
+
+static struct ucall *ucall_alloc(void)
+{
+ struct ucall *uc;
+ int i;
+
+ if (!ucall_pool)
+ goto ucall_failed;
+
+ for (i = 0; i < KVM_MAX_VCPUS; ++i) {
+ if (!test_and_set_bit(i, ucall_pool->in_use)) {
+ uc = &ucall_pool->ucalls[i];
+ memset(uc->args, 0, sizeof(uc->args));
+ return uc;
+ }
+ }
+
+ucall_failed:
+ /*
+ * If the vCPU cannot grab a ucall structure, make a bare ucall with a
+ * magic value to signal to get_ucall() that things went sideways.
+ * GUEST_ASSERT() depends on ucall_alloc() and so cannot be used here.
+ */
+ ucall_arch_do_ucall(GUEST_UCALL_FAILED);
+ return NULL;
+}
+
+static void ucall_free(struct ucall *uc)
+{
+ /* Beware, here be pointer arithmetic. */
+ clear_bit(uc - ucall_pool->ucalls, ucall_pool->in_use);
+}
+
+void ucall_assert(uint64_t cmd, const char *exp, const char *file,
+ unsigned int line, const char *fmt, ...)
+{
+ struct ucall *uc;
+ va_list va;
+
+ uc = ucall_alloc();
+ uc->cmd = cmd;
+
+ WRITE_ONCE(uc->args[GUEST_ERROR_STRING], (uint64_t)(exp));
+ WRITE_ONCE(uc->args[GUEST_FILE], (uint64_t)(file));
+ WRITE_ONCE(uc->args[GUEST_LINE], line);
+
+ va_start(va, fmt);
+ guest_vsnprintf(uc->buffer, UCALL_BUFFER_LEN, fmt, va);
+ va_end(va);
+
+ ucall_arch_do_ucall((vm_vaddr_t)uc->hva);
+
+ ucall_free(uc);
+}
+
+void ucall_fmt(uint64_t cmd, const char *fmt, ...)
+{
+ struct ucall *uc;
+ va_list va;
+
+ uc = ucall_alloc();
+ uc->cmd = cmd;
+
+ va_start(va, fmt);
+ guest_vsnprintf(uc->buffer, UCALL_BUFFER_LEN, fmt, va);
+ va_end(va);
+
+ ucall_arch_do_ucall((vm_vaddr_t)uc->hva);
+
+ ucall_free(uc);
+}
+
+void ucall(uint64_t cmd, int nargs, ...)
+{
+ struct ucall *uc;
+ va_list va;
+ int i;
+
+ uc = ucall_alloc();
+
+ WRITE_ONCE(uc->cmd, cmd);
+
+ nargs = min(nargs, UCALL_MAX_ARGS);
+
+ va_start(va, nargs);
+ for (i = 0; i < nargs; ++i)
+ WRITE_ONCE(uc->args[i], va_arg(va, uint64_t));
+ va_end(va);
+
+ ucall_arch_do_ucall((vm_vaddr_t)uc->hva);
+
+ ucall_free(uc);
+}
+
+uint64_t get_ucall(struct kvm_vcpu *vcpu, struct ucall *uc)
+{
+ struct ucall ucall;
+ void *addr;
+
+ if (!uc)
+ uc = &ucall;
+
+ addr = ucall_arch_get_ucall(vcpu);
+ if (addr) {
+ TEST_ASSERT(addr != (void *)GUEST_UCALL_FAILED,
+ "Guest failed to allocate ucall struct");
+
+ memcpy(uc, addr, sizeof(*uc));
+ vcpu_run_complete_io(vcpu);
+ } else {
+ memset(uc, 0, sizeof(*uc));
+ }
+
+ return uc->cmd;
+}
diff --git a/tools/testing/selftests/kvm/lib/userfaultfd_util.c b/tools/testing/selftests/kvm/lib/userfaultfd_util.c
new file mode 100644
index 000000000000..f4eef6eb2dc2
--- /dev/null
+++ b/tools/testing/selftests/kvm/lib/userfaultfd_util.c
@@ -0,0 +1,186 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * KVM userfaultfd util
+ * Adapted from demand_paging_test.c
+ *
+ * Copyright (C) 2018, Red Hat, Inc.
+ * Copyright (C) 2019-2022 Google LLC
+ */
+
+#define _GNU_SOURCE /* for pipe2 */
+
+#include <inttypes.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <time.h>
+#include <poll.h>
+#include <pthread.h>
+#include <linux/userfaultfd.h>
+#include <sys/syscall.h>
+
+#include "kvm_util.h"
+#include "test_util.h"
+#include "memstress.h"
+#include "userfaultfd_util.h"
+
+#ifdef __NR_userfaultfd
+
+static void *uffd_handler_thread_fn(void *arg)
+{
+ struct uffd_desc *uffd_desc = (struct uffd_desc *)arg;
+ int uffd = uffd_desc->uffd;
+ int pipefd = uffd_desc->pipefds[0];
+ useconds_t delay = uffd_desc->delay;
+ int64_t pages = 0;
+ struct timespec start;
+ struct timespec ts_diff;
+
+ clock_gettime(CLOCK_MONOTONIC, &start);
+ while (1) {
+ struct uffd_msg msg;
+ struct pollfd pollfd[2];
+ char tmp_chr;
+ int r;
+
+ pollfd[0].fd = uffd;
+ pollfd[0].events = POLLIN;
+ pollfd[1].fd = pipefd;
+ pollfd[1].events = POLLIN;
+
+ r = poll(pollfd, 2, -1);
+ switch (r) {
+ case -1:
+ pr_info("poll err");
+ continue;
+ case 0:
+ continue;
+ case 1:
+ break;
+ default:
+ pr_info("Polling uffd returned %d", r);
+ return NULL;
+ }
+
+ if (pollfd[0].revents & POLLERR) {
+ pr_info("uffd revents has POLLERR");
+ return NULL;
+ }
+
+ if (pollfd[1].revents & POLLIN) {
+ r = read(pollfd[1].fd, &tmp_chr, 1);
+ TEST_ASSERT(r == 1,
+ "Error reading pipefd in UFFD thread");
+ break;
+ }
+
+ if (!(pollfd[0].revents & POLLIN))
+ continue;
+
+ r = read(uffd, &msg, sizeof(msg));
+ if (r == -1) {
+ if (errno == EAGAIN)
+ continue;
+ pr_info("Read of uffd got errno %d\n", errno);
+ return NULL;
+ }
+
+ if (r != sizeof(msg)) {
+ pr_info("Read on uffd returned unexpected size: %d bytes", r);
+ return NULL;
+ }
+
+ if (!(msg.event & UFFD_EVENT_PAGEFAULT))
+ continue;
+
+ if (delay)
+ usleep(delay);
+ r = uffd_desc->handler(uffd_desc->uffd_mode, uffd, &msg);
+ if (r < 0)
+ return NULL;
+ pages++;
+ }
+
+ ts_diff = timespec_elapsed(start);
+ PER_VCPU_DEBUG("userfaulted %ld pages over %ld.%.9lds. (%f/sec)\n",
+ pages, ts_diff.tv_sec, ts_diff.tv_nsec,
+ pages / ((double)ts_diff.tv_sec + (double)ts_diff.tv_nsec / NSEC_PER_SEC));
+
+ return NULL;
+}
+
+struct uffd_desc *uffd_setup_demand_paging(int uffd_mode, useconds_t delay,
+ void *hva, uint64_t len,
+ uffd_handler_t handler)
+{
+ struct uffd_desc *uffd_desc;
+ bool is_minor = (uffd_mode == UFFDIO_REGISTER_MODE_MINOR);
+ int uffd;
+ struct uffdio_api uffdio_api;
+ struct uffdio_register uffdio_register;
+ uint64_t expected_ioctls = ((uint64_t) 1) << _UFFDIO_COPY;
+ int ret;
+
+ PER_PAGE_DEBUG("Userfaultfd %s mode, faults resolved with %s\n",
+ is_minor ? "MINOR" : "MISSING",
+ is_minor ? "UFFDIO_CONINUE" : "UFFDIO_COPY");
+
+ uffd_desc = malloc(sizeof(struct uffd_desc));
+ TEST_ASSERT(uffd_desc, "malloc failed");
+
+ /* In order to get minor faults, prefault via the alias. */
+ if (is_minor)
+ expected_ioctls = ((uint64_t) 1) << _UFFDIO_CONTINUE;
+
+ uffd = syscall(__NR_userfaultfd, O_CLOEXEC | O_NONBLOCK);
+ TEST_ASSERT(uffd >= 0, "uffd creation failed, errno: %d", errno);
+
+ uffdio_api.api = UFFD_API;
+ uffdio_api.features = 0;
+ TEST_ASSERT(ioctl(uffd, UFFDIO_API, &uffdio_api) != -1,
+ "ioctl UFFDIO_API failed: %" PRIu64,
+ (uint64_t)uffdio_api.api);
+
+ uffdio_register.range.start = (uint64_t)hva;
+ uffdio_register.range.len = len;
+ uffdio_register.mode = uffd_mode;
+ TEST_ASSERT(ioctl(uffd, UFFDIO_REGISTER, &uffdio_register) != -1,
+ "ioctl UFFDIO_REGISTER failed");
+ TEST_ASSERT((uffdio_register.ioctls & expected_ioctls) ==
+ expected_ioctls, "missing userfaultfd ioctls");
+
+ ret = pipe2(uffd_desc->pipefds, O_CLOEXEC | O_NONBLOCK);
+ TEST_ASSERT(!ret, "Failed to set up pipefd");
+
+ uffd_desc->uffd_mode = uffd_mode;
+ uffd_desc->uffd = uffd;
+ uffd_desc->delay = delay;
+ uffd_desc->handler = handler;
+ pthread_create(&uffd_desc->thread, NULL, uffd_handler_thread_fn,
+ uffd_desc);
+
+ PER_VCPU_DEBUG("Created uffd thread for HVA range [%p, %p)\n",
+ hva, hva + len);
+
+ return uffd_desc;
+}
+
+void uffd_stop_demand_paging(struct uffd_desc *uffd)
+{
+ char c = 0;
+ int ret;
+
+ ret = write(uffd->pipefds[1], &c, 1);
+ TEST_ASSERT(ret == 1, "Unable to write to pipefd");
+
+ ret = pthread_join(uffd->thread, NULL);
+ TEST_ASSERT(ret == 0, "Pthread_join failed.");
+
+ close(uffd->uffd);
+
+ close(uffd->pipefds[1]);
+ close(uffd->pipefds[0]);
+
+ free(uffd);
+}
+
+#endif /* __NR_userfaultfd */
diff --git a/tools/testing/selftests/kvm/lib/x86_64/apic.c b/tools/testing/selftests/kvm/lib/x86_64/apic.c
new file mode 100644
index 000000000000..89153a333e83
--- /dev/null
+++ b/tools/testing/selftests/kvm/lib/x86_64/apic.c
@@ -0,0 +1,43 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Copyright (C) 2021, Google LLC.
+ */
+
+#include "apic.h"
+
+void apic_disable(void)
+{
+ wrmsr(MSR_IA32_APICBASE,
+ rdmsr(MSR_IA32_APICBASE) &
+ ~(MSR_IA32_APICBASE_ENABLE | MSR_IA32_APICBASE_EXTD));
+}
+
+void xapic_enable(void)
+{
+ uint64_t val = rdmsr(MSR_IA32_APICBASE);
+
+ /* Per SDM: to enable xAPIC when in x2APIC must first disable APIC */
+ if (val & MSR_IA32_APICBASE_EXTD) {
+ apic_disable();
+ wrmsr(MSR_IA32_APICBASE,
+ rdmsr(MSR_IA32_APICBASE) | MSR_IA32_APICBASE_ENABLE);
+ } else if (!(val & MSR_IA32_APICBASE_ENABLE)) {
+ wrmsr(MSR_IA32_APICBASE, val | MSR_IA32_APICBASE_ENABLE);
+ }
+
+ /*
+ * Per SDM: reset value of spurious interrupt vector register has the
+ * APIC software enabled bit=0. It must be enabled in addition to the
+ * enable bit in the MSR.
+ */
+ val = xapic_read_reg(APIC_SPIV) | APIC_SPIV_APIC_ENABLED;
+ xapic_write_reg(APIC_SPIV, val);
+}
+
+void x2apic_enable(void)
+{
+ wrmsr(MSR_IA32_APICBASE, rdmsr(MSR_IA32_APICBASE) |
+ MSR_IA32_APICBASE_ENABLE | MSR_IA32_APICBASE_EXTD);
+ x2apic_write_reg(APIC_SPIV,
+ x2apic_read_reg(APIC_SPIV) | APIC_SPIV_APIC_ENABLED);
+}
diff --git a/tools/testing/selftests/kvm/lib/x86_64/handlers.S b/tools/testing/selftests/kvm/lib/x86_64/handlers.S
new file mode 100644
index 000000000000..7629819734af
--- /dev/null
+++ b/tools/testing/selftests/kvm/lib/x86_64/handlers.S
@@ -0,0 +1,81 @@
+handle_exception:
+ push %r15
+ push %r14
+ push %r13
+ push %r12
+ push %r11
+ push %r10
+ push %r9
+ push %r8
+
+ push %rdi
+ push %rsi
+ push %rbp
+ push %rbx
+ push %rdx
+ push %rcx
+ push %rax
+ mov %rsp, %rdi
+
+ call route_exception
+
+ pop %rax
+ pop %rcx
+ pop %rdx
+ pop %rbx
+ pop %rbp
+ pop %rsi
+ pop %rdi
+ pop %r8
+ pop %r9
+ pop %r10
+ pop %r11
+ pop %r12
+ pop %r13
+ pop %r14
+ pop %r15
+
+ /* Discard vector and error code. */
+ add $16, %rsp
+ iretq
+
+/*
+ * Build the handle_exception wrappers which push the vector/error code on the
+ * stack and an array of pointers to those wrappers.
+ */
+.pushsection .rodata
+.globl idt_handlers
+idt_handlers:
+.popsection
+
+.macro HANDLERS has_error from to
+ vector = \from
+ .rept \to - \from + 1
+ .align 8
+
+ /* Fetch current address and append it to idt_handlers. */
+666 :
+.pushsection .rodata
+ .quad 666b
+.popsection
+
+ .if ! \has_error
+ pushq $0
+ .endif
+ pushq $vector
+ jmp handle_exception
+ vector = vector + 1
+ .endr
+.endm
+
+.global idt_handler_code
+idt_handler_code:
+ HANDLERS has_error=0 from=0 to=7
+ HANDLERS has_error=1 from=8 to=8
+ HANDLERS has_error=0 from=9 to=9
+ HANDLERS has_error=1 from=10 to=14
+ HANDLERS has_error=0 from=15 to=16
+ HANDLERS has_error=1 from=17 to=17
+ HANDLERS has_error=0 from=18 to=255
+
+.section .note.GNU-stack, "", %progbits
diff --git a/tools/testing/selftests/kvm/lib/x86_64/hyperv.c b/tools/testing/selftests/kvm/lib/x86_64/hyperv.c
new file mode 100644
index 000000000000..efb7e7a1354d
--- /dev/null
+++ b/tools/testing/selftests/kvm/lib/x86_64/hyperv.c
@@ -0,0 +1,46 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Hyper-V specific functions.
+ *
+ * Copyright (C) 2021, Red Hat Inc.
+ */
+#include <stdint.h>
+#include "processor.h"
+#include "hyperv.h"
+
+struct hyperv_test_pages *vcpu_alloc_hyperv_test_pages(struct kvm_vm *vm,
+ vm_vaddr_t *p_hv_pages_gva)
+{
+ vm_vaddr_t hv_pages_gva = vm_vaddr_alloc_page(vm);
+ struct hyperv_test_pages *hv = addr_gva2hva(vm, hv_pages_gva);
+
+ /* Setup of a region of guest memory for the VP Assist page. */
+ hv->vp_assist = (void *)vm_vaddr_alloc_page(vm);
+ hv->vp_assist_hva = addr_gva2hva(vm, (uintptr_t)hv->vp_assist);
+ hv->vp_assist_gpa = addr_gva2gpa(vm, (uintptr_t)hv->vp_assist);
+
+ /* Setup of a region of guest memory for the partition assist page. */
+ hv->partition_assist = (void *)vm_vaddr_alloc_page(vm);
+ hv->partition_assist_hva = addr_gva2hva(vm, (uintptr_t)hv->partition_assist);
+ hv->partition_assist_gpa = addr_gva2gpa(vm, (uintptr_t)hv->partition_assist);
+
+ /* Setup of a region of guest memory for the enlightened VMCS. */
+ hv->enlightened_vmcs = (void *)vm_vaddr_alloc_page(vm);
+ hv->enlightened_vmcs_hva = addr_gva2hva(vm, (uintptr_t)hv->enlightened_vmcs);
+ hv->enlightened_vmcs_gpa = addr_gva2gpa(vm, (uintptr_t)hv->enlightened_vmcs);
+
+ *p_hv_pages_gva = hv_pages_gva;
+ return hv;
+}
+
+int enable_vp_assist(uint64_t vp_assist_pa, void *vp_assist)
+{
+ uint64_t val = (vp_assist_pa & HV_X64_MSR_VP_ASSIST_PAGE_ADDRESS_MASK) |
+ HV_X64_MSR_VP_ASSIST_PAGE_ENABLE;
+
+ wrmsr(HV_X64_MSR_VP_ASSIST_PAGE, val);
+
+ current_vp_assist = vp_assist;
+
+ return 0;
+}
diff --git a/tools/testing/selftests/kvm/lib/x86_64/memstress.c b/tools/testing/selftests/kvm/lib/x86_64/memstress.c
new file mode 100644
index 000000000000..d61e623afc8c
--- /dev/null
+++ b/tools/testing/selftests/kvm/lib/x86_64/memstress.c
@@ -0,0 +1,112 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * x86_64-specific extensions to memstress.c.
+ *
+ * Copyright (C) 2022, Google, Inc.
+ */
+#include <stdio.h>
+#include <stdlib.h>
+#include <linux/bitmap.h>
+#include <linux/bitops.h>
+
+#include "test_util.h"
+#include "kvm_util.h"
+#include "memstress.h"
+#include "processor.h"
+#include "vmx.h"
+
+void memstress_l2_guest_code(uint64_t vcpu_id)
+{
+ memstress_guest_code(vcpu_id);
+ vmcall();
+}
+
+extern char memstress_l2_guest_entry[];
+__asm__(
+"memstress_l2_guest_entry:"
+" mov (%rsp), %rdi;"
+" call memstress_l2_guest_code;"
+" ud2;"
+);
+
+static void memstress_l1_guest_code(struct vmx_pages *vmx, uint64_t vcpu_id)
+{
+#define L2_GUEST_STACK_SIZE 64
+ unsigned long l2_guest_stack[L2_GUEST_STACK_SIZE];
+ unsigned long *rsp;
+
+ GUEST_ASSERT(vmx->vmcs_gpa);
+ GUEST_ASSERT(prepare_for_vmx_operation(vmx));
+ GUEST_ASSERT(load_vmcs(vmx));
+ GUEST_ASSERT(ept_1g_pages_supported());
+
+ rsp = &l2_guest_stack[L2_GUEST_STACK_SIZE - 1];
+ *rsp = vcpu_id;
+ prepare_vmcs(vmx, memstress_l2_guest_entry, rsp);
+
+ GUEST_ASSERT(!vmlaunch());
+ GUEST_ASSERT(vmreadz(VM_EXIT_REASON) == EXIT_REASON_VMCALL);
+ GUEST_DONE();
+}
+
+uint64_t memstress_nested_pages(int nr_vcpus)
+{
+ /*
+ * 513 page tables is enough to identity-map 256 TiB of L2 with 1G
+ * pages and 4-level paging, plus a few pages per-vCPU for data
+ * structures such as the VMCS.
+ */
+ return 513 + 10 * nr_vcpus;
+}
+
+void memstress_setup_ept(struct vmx_pages *vmx, struct kvm_vm *vm)
+{
+ uint64_t start, end;
+
+ prepare_eptp(vmx, vm, 0);
+
+ /*
+ * Identity map the first 4G and the test region with 1G pages so that
+ * KVM can shadow the EPT12 with the maximum huge page size supported
+ * by the backing source.
+ */
+ nested_identity_map_1g(vmx, vm, 0, 0x100000000ULL);
+
+ start = align_down(memstress_args.gpa, PG_SIZE_1G);
+ end = align_up(memstress_args.gpa + memstress_args.size, PG_SIZE_1G);
+ nested_identity_map_1g(vmx, vm, start, end - start);
+}
+
+void memstress_setup_nested(struct kvm_vm *vm, int nr_vcpus, struct kvm_vcpu *vcpus[])
+{
+ struct vmx_pages *vmx, *vmx0 = NULL;
+ struct kvm_regs regs;
+ vm_vaddr_t vmx_gva;
+ int vcpu_id;
+
+ TEST_REQUIRE(kvm_cpu_has(X86_FEATURE_VMX));
+ TEST_REQUIRE(kvm_cpu_has_ept());
+
+ for (vcpu_id = 0; vcpu_id < nr_vcpus; vcpu_id++) {
+ vmx = vcpu_alloc_vmx(vm, &vmx_gva);
+
+ if (vcpu_id == 0) {
+ memstress_setup_ept(vmx, vm);
+ vmx0 = vmx;
+ } else {
+ /* Share the same EPT table across all vCPUs. */
+ vmx->eptp = vmx0->eptp;
+ vmx->eptp_hva = vmx0->eptp_hva;
+ vmx->eptp_gpa = vmx0->eptp_gpa;
+ }
+
+ /*
+ * Override the vCPU to run memstress_l1_guest_code() which will
+ * bounce it into L2 before calling memstress_guest_code().
+ */
+ vcpu_regs_get(vcpus[vcpu_id], &regs);
+ regs.rip = (unsigned long) memstress_l1_guest_code;
+ vcpu_regs_set(vcpus[vcpu_id], &regs);
+ vcpu_args_set(vcpus[vcpu_id], 2, vmx_gva, vcpu_id);
+ }
+}
diff --git a/tools/testing/selftests/kvm/lib/x86_64/pmu.c b/tools/testing/selftests/kvm/lib/x86_64/pmu.c
new file mode 100644
index 000000000000..f31f0427c17c
--- /dev/null
+++ b/tools/testing/selftests/kvm/lib/x86_64/pmu.c
@@ -0,0 +1,31 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Copyright (C) 2023, Tencent, Inc.
+ */
+
+#include <stdint.h>
+
+#include <linux/kernel.h>
+
+#include "kvm_util.h"
+#include "pmu.h"
+
+const uint64_t intel_pmu_arch_events[] = {
+ INTEL_ARCH_CPU_CYCLES,
+ INTEL_ARCH_INSTRUCTIONS_RETIRED,
+ INTEL_ARCH_REFERENCE_CYCLES,
+ INTEL_ARCH_LLC_REFERENCES,
+ INTEL_ARCH_LLC_MISSES,
+ INTEL_ARCH_BRANCHES_RETIRED,
+ INTEL_ARCH_BRANCHES_MISPREDICTED,
+ INTEL_ARCH_TOPDOWN_SLOTS,
+};
+kvm_static_assert(ARRAY_SIZE(intel_pmu_arch_events) == NR_INTEL_ARCH_EVENTS);
+
+const uint64_t amd_pmu_zen_events[] = {
+ AMD_ZEN_CORE_CYCLES,
+ AMD_ZEN_INSTRUCTIONS_RETIRED,
+ AMD_ZEN_BRANCHES_RETIRED,
+ AMD_ZEN_BRANCHES_MISPREDICTED,
+};
+kvm_static_assert(ARRAY_SIZE(amd_pmu_zen_events) == NR_AMD_ZEN_EVENTS);
diff --git a/tools/testing/selftests/kvm/lib/x86_64/processor.c b/tools/testing/selftests/kvm/lib/x86_64/processor.c
index f6eb34eaa0d2..74a4c736c9ae 100644
--- a/tools/testing/selftests/kvm/lib/x86_64/processor.c
+++ b/tools/testing/selftests/kvm/lib/x86_64/processor.c
@@ -5,80 +5,26 @@
* Copyright (C) 2018, Google LLC.
*/
-#define _GNU_SOURCE /* for program_invocation_name */
-
+#include "linux/bitmap.h"
#include "test_util.h"
#include "kvm_util.h"
-#include "../kvm_util_internal.h"
#include "processor.h"
+#include "sev.h"
+
+#ifndef NUM_INTERRUPTS
+#define NUM_INTERRUPTS 256
+#endif
+
+#define DEFAULT_CODE_SELECTOR 0x8
+#define DEFAULT_DATA_SELECTOR 0x10
-/* Minimum physical address used for virtual translation tables. */
-#define KVM_GUEST_PAGE_TABLE_MIN_PADDR 0x180000
-
-/* Virtual translation table structure declarations */
-struct pageMapL4Entry {
- uint64_t present:1;
- uint64_t writable:1;
- uint64_t user:1;
- uint64_t write_through:1;
- uint64_t cache_disable:1;
- uint64_t accessed:1;
- uint64_t ignored_06:1;
- uint64_t page_size:1;
- uint64_t ignored_11_08:4;
- uint64_t address:40;
- uint64_t ignored_62_52:11;
- uint64_t execute_disable:1;
-};
-
-struct pageDirectoryPointerEntry {
- uint64_t present:1;
- uint64_t writable:1;
- uint64_t user:1;
- uint64_t write_through:1;
- uint64_t cache_disable:1;
- uint64_t accessed:1;
- uint64_t ignored_06:1;
- uint64_t page_size:1;
- uint64_t ignored_11_08:4;
- uint64_t address:40;
- uint64_t ignored_62_52:11;
- uint64_t execute_disable:1;
-};
-
-struct pageDirectoryEntry {
- uint64_t present:1;
- uint64_t writable:1;
- uint64_t user:1;
- uint64_t write_through:1;
- uint64_t cache_disable:1;
- uint64_t accessed:1;
- uint64_t ignored_06:1;
- uint64_t page_size:1;
- uint64_t ignored_11_08:4;
- uint64_t address:40;
- uint64_t ignored_62_52:11;
- uint64_t execute_disable:1;
-};
-
-struct pageTableEntry {
- uint64_t present:1;
- uint64_t writable:1;
- uint64_t user:1;
- uint64_t write_through:1;
- uint64_t cache_disable:1;
- uint64_t accessed:1;
- uint64_t dirty:1;
- uint64_t reserved_07:1;
- uint64_t global:1;
- uint64_t ignored_11_09:3;
- uint64_t address:40;
- uint64_t ignored_62_52:11;
- uint64_t execute_disable:1;
-};
-
-void regs_dump(FILE *stream, struct kvm_regs *regs,
- uint8_t indent)
+#define MAX_NR_CPUID_ENTRIES 100
+
+vm_vaddr_t exception_handlers;
+bool host_cpu_is_amd;
+bool host_cpu_is_intel;
+
+static void regs_dump(FILE *stream, struct kvm_regs *regs, uint8_t indent)
{
fprintf(stream, "%*srax: 0x%.16llx rbx: 0x%.16llx "
"rcx: 0x%.16llx rdx: 0x%.16llx\n",
@@ -101,21 +47,6 @@ void regs_dump(FILE *stream, struct kvm_regs *regs,
regs->rip, regs->rflags);
}
-/*
- * Segment Dump
- *
- * Input Args:
- * stream - Output FILE stream
- * segment - KVM segment
- * indent - Left margin indent amount
- *
- * Output Args: None
- *
- * Return: None
- *
- * Dumps the state of the KVM segment given by @segment, to the FILE stream
- * given by @stream.
- */
static void segment_dump(FILE *stream, struct kvm_segment *segment,
uint8_t indent)
{
@@ -133,21 +64,6 @@ static void segment_dump(FILE *stream, struct kvm_segment *segment,
segment->unusable, segment->padding);
}
-/*
- * dtable Dump
- *
- * Input Args:
- * stream - Output FILE stream
- * dtable - KVM dtable
- * indent - Left margin indent amount
- *
- * Output Args: None
- *
- * Return: None
- *
- * Dumps the state of the KVM dtable given by @dtable, to the FILE stream
- * given by @stream.
- */
static void dtable_dump(FILE *stream, struct kvm_dtable *dtable,
uint8_t indent)
{
@@ -157,8 +73,7 @@ static void dtable_dump(FILE *stream, struct kvm_dtable *dtable,
dtable->padding[0], dtable->padding[1], dtable->padding[2]);
}
-void sregs_dump(FILE *stream, struct kvm_sregs *sregs,
- uint8_t indent)
+static void sregs_dump(FILE *stream, struct kvm_sregs *sregs, uint8_t indent)
{
unsigned int i;
@@ -200,97 +115,217 @@ void sregs_dump(FILE *stream, struct kvm_sregs *sregs,
}
}
-void virt_pgd_alloc(struct kvm_vm *vm, uint32_t pgd_memslot)
+bool kvm_is_tdp_enabled(void)
+{
+ if (host_cpu_is_intel)
+ return get_kvm_intel_param_bool("ept");
+ else
+ return get_kvm_amd_param_bool("npt");
+}
+
+void virt_arch_pgd_alloc(struct kvm_vm *vm)
{
TEST_ASSERT(vm->mode == VM_MODE_PXXV48_4K, "Attempt to use "
"unknown or unsupported guest mode, mode: 0x%x", vm->mode);
/* If needed, create page map l4 table. */
if (!vm->pgd_created) {
- vm_paddr_t paddr = vm_phy_page_alloc(vm,
- KVM_GUEST_PAGE_TABLE_MIN_PADDR, pgd_memslot);
- vm->pgd = paddr;
+ vm->pgd = vm_alloc_page_table(vm);
vm->pgd_created = true;
}
}
-void virt_pg_map(struct kvm_vm *vm, uint64_t vaddr, uint64_t paddr,
- uint32_t pgd_memslot)
+static void *virt_get_pte(struct kvm_vm *vm, uint64_t *parent_pte,
+ uint64_t vaddr, int level)
{
- uint16_t index[4];
- struct pageMapL4Entry *pml4e;
+ uint64_t pt_gpa = PTE_GET_PA(*parent_pte);
+ uint64_t *page_table = addr_gpa2hva(vm, pt_gpa);
+ int index = (vaddr >> PG_LEVEL_SHIFT(level)) & 0x1ffu;
+
+ TEST_ASSERT((*parent_pte & PTE_PRESENT_MASK) || parent_pte == &vm->pgd,
+ "Parent PTE (level %d) not PRESENT for gva: 0x%08lx",
+ level + 1, vaddr);
+
+ return &page_table[index];
+}
+
+static uint64_t *virt_create_upper_pte(struct kvm_vm *vm,
+ uint64_t *parent_pte,
+ uint64_t vaddr,
+ uint64_t paddr,
+ int current_level,
+ int target_level)
+{
+ uint64_t *pte = virt_get_pte(vm, parent_pte, vaddr, current_level);
+
+ paddr = vm_untag_gpa(vm, paddr);
+
+ if (!(*pte & PTE_PRESENT_MASK)) {
+ *pte = PTE_PRESENT_MASK | PTE_WRITABLE_MASK;
+ if (current_level == target_level)
+ *pte |= PTE_LARGE_MASK | (paddr & PHYSICAL_PAGE_MASK);
+ else
+ *pte |= vm_alloc_page_table(vm) & PHYSICAL_PAGE_MASK;
+ } else {
+ /*
+ * Entry already present. Assert that the caller doesn't want
+ * a hugepage at this level, and that there isn't a hugepage at
+ * this level.
+ */
+ TEST_ASSERT(current_level != target_level,
+ "Cannot create hugepage at level: %u, vaddr: 0x%lx",
+ current_level, vaddr);
+ TEST_ASSERT(!(*pte & PTE_LARGE_MASK),
+ "Cannot create page table at level: %u, vaddr: 0x%lx",
+ current_level, vaddr);
+ }
+ return pte;
+}
+
+void __virt_pg_map(struct kvm_vm *vm, uint64_t vaddr, uint64_t paddr, int level)
+{
+ const uint64_t pg_size = PG_LEVEL_SIZE(level);
+ uint64_t *pml4e, *pdpe, *pde;
+ uint64_t *pte;
+
+ TEST_ASSERT(vm->mode == VM_MODE_PXXV48_4K,
+ "Unknown or unsupported guest mode, mode: 0x%x", vm->mode);
+
+ TEST_ASSERT((vaddr % pg_size) == 0,
+ "Virtual address not aligned,\n"
+ "vaddr: 0x%lx page size: 0x%lx", vaddr, pg_size);
+ TEST_ASSERT(sparsebit_is_set(vm->vpages_valid, (vaddr >> vm->page_shift)),
+ "Invalid virtual address, vaddr: 0x%lx", vaddr);
+ TEST_ASSERT((paddr % pg_size) == 0,
+ "Physical address not aligned,\n"
+ " paddr: 0x%lx page size: 0x%lx", paddr, pg_size);
+ TEST_ASSERT((paddr >> vm->page_shift) <= vm->max_gfn,
+ "Physical address beyond maximum supported,\n"
+ " paddr: 0x%lx vm->max_gfn: 0x%lx vm->page_size: 0x%x",
+ paddr, vm->max_gfn, vm->page_size);
+ TEST_ASSERT(vm_untag_gpa(vm, paddr) == paddr,
+ "Unexpected bits in paddr: %lx", paddr);
+
+ /*
+ * Allocate upper level page tables, if not already present. Return
+ * early if a hugepage was created.
+ */
+ pml4e = virt_create_upper_pte(vm, &vm->pgd, vaddr, paddr, PG_LEVEL_512G, level);
+ if (*pml4e & PTE_LARGE_MASK)
+ return;
+
+ pdpe = virt_create_upper_pte(vm, pml4e, vaddr, paddr, PG_LEVEL_1G, level);
+ if (*pdpe & PTE_LARGE_MASK)
+ return;
+
+ pde = virt_create_upper_pte(vm, pdpe, vaddr, paddr, PG_LEVEL_2M, level);
+ if (*pde & PTE_LARGE_MASK)
+ return;
+
+ /* Fill in page table entry. */
+ pte = virt_get_pte(vm, pde, vaddr, PG_LEVEL_4K);
+ TEST_ASSERT(!(*pte & PTE_PRESENT_MASK),
+ "PTE already present for 4k page at vaddr: 0x%lx", vaddr);
+ *pte = PTE_PRESENT_MASK | PTE_WRITABLE_MASK | (paddr & PHYSICAL_PAGE_MASK);
+
+ /*
+ * Neither SEV nor TDX supports shared page tables, so only the final
+ * leaf PTE needs manually set the C/S-bit.
+ */
+ if (vm_is_gpa_protected(vm, paddr))
+ *pte |= vm->arch.c_bit;
+ else
+ *pte |= vm->arch.s_bit;
+}
+
+void virt_arch_pg_map(struct kvm_vm *vm, uint64_t vaddr, uint64_t paddr)
+{
+ __virt_pg_map(vm, vaddr, paddr, PG_LEVEL_4K);
+}
+
+void virt_map_level(struct kvm_vm *vm, uint64_t vaddr, uint64_t paddr,
+ uint64_t nr_bytes, int level)
+{
+ uint64_t pg_size = PG_LEVEL_SIZE(level);
+ uint64_t nr_pages = nr_bytes / pg_size;
+ int i;
+
+ TEST_ASSERT(nr_bytes % pg_size == 0,
+ "Region size not aligned: nr_bytes: 0x%lx, page size: 0x%lx",
+ nr_bytes, pg_size);
+
+ for (i = 0; i < nr_pages; i++) {
+ __virt_pg_map(vm, vaddr, paddr, level);
+
+ vaddr += pg_size;
+ paddr += pg_size;
+ }
+}
+
+static bool vm_is_target_pte(uint64_t *pte, int *level, int current_level)
+{
+ if (*pte & PTE_LARGE_MASK) {
+ TEST_ASSERT(*level == PG_LEVEL_NONE ||
+ *level == current_level,
+ "Unexpected hugepage at level %d", current_level);
+ *level = current_level;
+ }
+
+ return *level == current_level;
+}
+
+uint64_t *__vm_get_page_table_entry(struct kvm_vm *vm, uint64_t vaddr,
+ int *level)
+{
+ uint64_t *pml4e, *pdpe, *pde;
+
+ TEST_ASSERT(!vm->arch.is_pt_protected,
+ "Walking page tables of protected guests is impossible");
+
+ TEST_ASSERT(*level >= PG_LEVEL_NONE && *level < PG_LEVEL_NUM,
+ "Invalid PG_LEVEL_* '%d'", *level);
TEST_ASSERT(vm->mode == VM_MODE_PXXV48_4K, "Attempt to use "
"unknown or unsupported guest mode, mode: 0x%x", vm->mode);
-
- TEST_ASSERT((vaddr % vm->page_size) == 0,
- "Virtual address not on page boundary,\n"
- " vaddr: 0x%lx vm->page_size: 0x%x",
- vaddr, vm->page_size);
TEST_ASSERT(sparsebit_is_set(vm->vpages_valid,
(vaddr >> vm->page_shift)),
"Invalid virtual address, vaddr: 0x%lx",
vaddr);
- TEST_ASSERT((paddr % vm->page_size) == 0,
- "Physical address not on page boundary,\n"
- " paddr: 0x%lx vm->page_size: 0x%x",
- paddr, vm->page_size);
- TEST_ASSERT((paddr >> vm->page_shift) <= vm->max_gfn,
- "Physical address beyond beyond maximum supported,\n"
- " paddr: 0x%lx vm->max_gfn: 0x%lx vm->page_size: 0x%x",
- paddr, vm->max_gfn, vm->page_size);
-
- index[0] = (vaddr >> 12) & 0x1ffu;
- index[1] = (vaddr >> 21) & 0x1ffu;
- index[2] = (vaddr >> 30) & 0x1ffu;
- index[3] = (vaddr >> 39) & 0x1ffu;
-
- /* Allocate page directory pointer table if not present. */
- pml4e = addr_gpa2hva(vm, vm->pgd);
- if (!pml4e[index[3]].present) {
- pml4e[index[3]].address = vm_phy_page_alloc(vm,
- KVM_GUEST_PAGE_TABLE_MIN_PADDR, pgd_memslot)
- >> vm->page_shift;
- pml4e[index[3]].writable = true;
- pml4e[index[3]].present = true;
- }
+ /*
+ * Based on the mode check above there are 48 bits in the vaddr, so
+ * shift 16 to sign extend the last bit (bit-47),
+ */
+ TEST_ASSERT(vaddr == (((int64_t)vaddr << 16) >> 16),
+ "Canonical check failed. The virtual address is invalid.");
- /* Allocate page directory table if not present. */
- struct pageDirectoryPointerEntry *pdpe;
- pdpe = addr_gpa2hva(vm, pml4e[index[3]].address * vm->page_size);
- if (!pdpe[index[2]].present) {
- pdpe[index[2]].address = vm_phy_page_alloc(vm,
- KVM_GUEST_PAGE_TABLE_MIN_PADDR, pgd_memslot)
- >> vm->page_shift;
- pdpe[index[2]].writable = true;
- pdpe[index[2]].present = true;
- }
+ pml4e = virt_get_pte(vm, &vm->pgd, vaddr, PG_LEVEL_512G);
+ if (vm_is_target_pte(pml4e, level, PG_LEVEL_512G))
+ return pml4e;
- /* Allocate page table if not present. */
- struct pageDirectoryEntry *pde;
- pde = addr_gpa2hva(vm, pdpe[index[2]].address * vm->page_size);
- if (!pde[index[1]].present) {
- pde[index[1]].address = vm_phy_page_alloc(vm,
- KVM_GUEST_PAGE_TABLE_MIN_PADDR, pgd_memslot)
- >> vm->page_shift;
- pde[index[1]].writable = true;
- pde[index[1]].present = true;
- }
+ pdpe = virt_get_pte(vm, pml4e, vaddr, PG_LEVEL_1G);
+ if (vm_is_target_pte(pdpe, level, PG_LEVEL_1G))
+ return pdpe;
- /* Fill in page table entry. */
- struct pageTableEntry *pte;
- pte = addr_gpa2hva(vm, pde[index[1]].address * vm->page_size);
- pte[index[0]].address = paddr >> vm->page_shift;
- pte[index[0]].writable = true;
- pte[index[0]].present = 1;
+ pde = virt_get_pte(vm, pdpe, vaddr, PG_LEVEL_2M);
+ if (vm_is_target_pte(pde, level, PG_LEVEL_2M))
+ return pde;
+
+ return virt_get_pte(vm, pde, vaddr, PG_LEVEL_4K);
}
-void virt_dump(FILE *stream, struct kvm_vm *vm, uint8_t indent)
+uint64_t *vm_get_page_table_entry(struct kvm_vm *vm, uint64_t vaddr)
{
- struct pageMapL4Entry *pml4e, *pml4e_start;
- struct pageDirectoryPointerEntry *pdpe, *pdpe_start;
- struct pageDirectoryEntry *pde, *pde_start;
- struct pageTableEntry *pte, *pte_start;
+ int level = PG_LEVEL_4K;
+
+ return __vm_get_page_table_entry(vm, vaddr, &level);
+}
+
+void virt_arch_dump(FILE *stream, struct kvm_vm *vm, uint8_t indent)
+{
+ uint64_t *pml4e, *pml4e_start;
+ uint64_t *pdpe, *pdpe_start;
+ uint64_t *pde, *pde_start;
+ uint64_t *pte, *pte_start;
if (!vm->pgd_created)
return;
@@ -300,62 +335,58 @@ void virt_dump(FILE *stream, struct kvm_vm *vm, uint8_t indent)
fprintf(stream, "%*s index hvaddr gpaddr "
"addr w exec dirty\n",
indent, "");
- pml4e_start = (struct pageMapL4Entry *) addr_gpa2hva(vm,
- vm->pgd);
+ pml4e_start = (uint64_t *) addr_gpa2hva(vm, vm->pgd);
for (uint16_t n1 = 0; n1 <= 0x1ffu; n1++) {
pml4e = &pml4e_start[n1];
- if (!pml4e->present)
+ if (!(*pml4e & PTE_PRESENT_MASK))
continue;
- fprintf(stream, "%*spml4e 0x%-3zx %p 0x%-12lx 0x%-10lx %u "
+ fprintf(stream, "%*spml4e 0x%-3zx %p 0x%-12lx 0x%-10llx %u "
" %u\n",
indent, "",
pml4e - pml4e_start, pml4e,
- addr_hva2gpa(vm, pml4e), (uint64_t) pml4e->address,
- pml4e->writable, pml4e->execute_disable);
+ addr_hva2gpa(vm, pml4e), PTE_GET_PFN(*pml4e),
+ !!(*pml4e & PTE_WRITABLE_MASK), !!(*pml4e & PTE_NX_MASK));
- pdpe_start = addr_gpa2hva(vm, pml4e->address
- * vm->page_size);
+ pdpe_start = addr_gpa2hva(vm, *pml4e & PHYSICAL_PAGE_MASK);
for (uint16_t n2 = 0; n2 <= 0x1ffu; n2++) {
pdpe = &pdpe_start[n2];
- if (!pdpe->present)
+ if (!(*pdpe & PTE_PRESENT_MASK))
continue;
- fprintf(stream, "%*spdpe 0x%-3zx %p 0x%-12lx 0x%-10lx "
+ fprintf(stream, "%*spdpe 0x%-3zx %p 0x%-12lx 0x%-10llx "
"%u %u\n",
indent, "",
pdpe - pdpe_start, pdpe,
addr_hva2gpa(vm, pdpe),
- (uint64_t) pdpe->address, pdpe->writable,
- pdpe->execute_disable);
+ PTE_GET_PFN(*pdpe), !!(*pdpe & PTE_WRITABLE_MASK),
+ !!(*pdpe & PTE_NX_MASK));
- pde_start = addr_gpa2hva(vm,
- pdpe->address * vm->page_size);
+ pde_start = addr_gpa2hva(vm, *pdpe & PHYSICAL_PAGE_MASK);
for (uint16_t n3 = 0; n3 <= 0x1ffu; n3++) {
pde = &pde_start[n3];
- if (!pde->present)
+ if (!(*pde & PTE_PRESENT_MASK))
continue;
fprintf(stream, "%*spde 0x%-3zx %p "
- "0x%-12lx 0x%-10lx %u %u\n",
+ "0x%-12lx 0x%-10llx %u %u\n",
indent, "", pde - pde_start, pde,
addr_hva2gpa(vm, pde),
- (uint64_t) pde->address, pde->writable,
- pde->execute_disable);
+ PTE_GET_PFN(*pde), !!(*pde & PTE_WRITABLE_MASK),
+ !!(*pde & PTE_NX_MASK));
- pte_start = addr_gpa2hva(vm,
- pde->address * vm->page_size);
+ pte_start = addr_gpa2hva(vm, *pde & PHYSICAL_PAGE_MASK);
for (uint16_t n4 = 0; n4 <= 0x1ffu; n4++) {
pte = &pte_start[n4];
- if (!pte->present)
+ if (!(*pte & PTE_PRESENT_MASK))
continue;
fprintf(stream, "%*spte 0x%-3zx %p "
- "0x%-12lx 0x%-10lx %u %u "
+ "0x%-12lx 0x%-10llx %u %u "
" %u 0x%-10lx\n",
indent, "",
pte - pte_start, pte,
addr_hva2gpa(vm, pte),
- (uint64_t) pte->address,
- pte->writable,
- pte->execute_disable,
- pte->dirty,
+ PTE_GET_PFN(*pte),
+ !!(*pte & PTE_WRITABLE_MASK),
+ !!(*pte & PTE_NX_MASK),
+ !!(*pte & PTE_DIRTY_MASK),
((uint64_t) n1 << 27)
| ((uint64_t) n2 << 18)
| ((uint64_t) n3 << 9)
@@ -392,11 +423,12 @@ static void kvm_seg_fill_gdt_64bit(struct kvm_vm *vm, struct kvm_segment *segp)
desc->limit0 = segp->limit & 0xFFFF;
desc->base0 = segp->base & 0xFFFF;
desc->base1 = segp->base >> 16;
- desc->s = segp->s;
desc->type = segp->type;
+ desc->s = segp->s;
desc->dpl = segp->dpl;
desc->p = segp->present;
desc->limit1 = segp->limit >> 16;
+ desc->avl = segp->avl;
desc->l = segp->l;
desc->db = segp->db;
desc->g = segp->g;
@@ -469,65 +501,35 @@ static void kvm_seg_set_kernel_data_64bit(struct kvm_vm *vm, uint16_t selector,
kvm_seg_fill_gdt_64bit(vm, segp);
}
-vm_paddr_t addr_gva2gpa(struct kvm_vm *vm, vm_vaddr_t gva)
+vm_paddr_t addr_arch_gva2gpa(struct kvm_vm *vm, vm_vaddr_t gva)
{
- uint16_t index[4];
- struct pageMapL4Entry *pml4e;
- struct pageDirectoryPointerEntry *pdpe;
- struct pageDirectoryEntry *pde;
- struct pageTableEntry *pte;
-
- TEST_ASSERT(vm->mode == VM_MODE_PXXV48_4K, "Attempt to use "
- "unknown or unsupported guest mode, mode: 0x%x", vm->mode);
-
- index[0] = (gva >> 12) & 0x1ffu;
- index[1] = (gva >> 21) & 0x1ffu;
- index[2] = (gva >> 30) & 0x1ffu;
- index[3] = (gva >> 39) & 0x1ffu;
-
- if (!vm->pgd_created)
- goto unmapped_gva;
- pml4e = addr_gpa2hva(vm, vm->pgd);
- if (!pml4e[index[3]].present)
- goto unmapped_gva;
-
- pdpe = addr_gpa2hva(vm, pml4e[index[3]].address * vm->page_size);
- if (!pdpe[index[2]].present)
- goto unmapped_gva;
+ int level = PG_LEVEL_NONE;
+ uint64_t *pte = __vm_get_page_table_entry(vm, gva, &level);
- pde = addr_gpa2hva(vm, pdpe[index[2]].address * vm->page_size);
- if (!pde[index[1]].present)
- goto unmapped_gva;
+ TEST_ASSERT(*pte & PTE_PRESENT_MASK,
+ "Leaf PTE not PRESENT for gva: 0x%08lx", gva);
- pte = addr_gpa2hva(vm, pde[index[1]].address * vm->page_size);
- if (!pte[index[0]].present)
- goto unmapped_gva;
-
- return (pte[index[0]].address * vm->page_size) + (gva & 0xfffu);
-
-unmapped_gva:
- TEST_FAIL("No mapping for vm virtual address, gva: 0x%lx", gva);
- exit(EXIT_FAILURE);
+ /*
+ * No need for a hugepage mask on the PTE, x86-64 requires the "unused"
+ * address bits to be zero.
+ */
+ return vm_untag_gpa(vm, PTE_GET_PA(*pte)) | (gva & ~HUGEPAGE_MASK(level));
}
-static void kvm_setup_gdt(struct kvm_vm *vm, struct kvm_dtable *dt, int gdt_memslot,
- int pgd_memslot)
+static void kvm_setup_gdt(struct kvm_vm *vm, struct kvm_dtable *dt)
{
if (!vm->gdt)
- vm->gdt = vm_vaddr_alloc(vm, getpagesize(),
- KVM_UTIL_MIN_VADDR, gdt_memslot, pgd_memslot);
+ vm->gdt = __vm_vaddr_alloc_page(vm, MEM_REGION_DATA);
dt->base = vm->gdt;
dt->limit = getpagesize();
}
static void kvm_setup_tss_64bit(struct kvm_vm *vm, struct kvm_segment *segp,
- int selector, int gdt_memslot,
- int pgd_memslot)
+ int selector)
{
if (!vm->tss)
- vm->tss = vm_vaddr_alloc(vm, getpagesize(),
- KVM_UTIL_MIN_VADDR, gdt_memslot, pgd_memslot);
+ vm->tss = __vm_vaddr_alloc_page(vm, MEM_REGION_DATA);
memset(segp, 0, sizeof(*segp));
segp->base = vm->tss;
@@ -538,16 +540,16 @@ static void kvm_setup_tss_64bit(struct kvm_vm *vm, struct kvm_segment *segp,
kvm_seg_fill_gdt_64bit(vm, segp);
}
-static void vcpu_setup(struct kvm_vm *vm, int vcpuid, int pgd_memslot, int gdt_memslot)
+static void vcpu_setup(struct kvm_vm *vm, struct kvm_vcpu *vcpu)
{
struct kvm_sregs sregs;
/* Set mode specific system register values. */
- vcpu_sregs_get(vm, vcpuid, &sregs);
+ vcpu_sregs_get(vcpu, &sregs);
sregs.idt.limit = 0;
- kvm_setup_gdt(vm, &sregs.gdt, gdt_memslot, pgd_memslot);
+ kvm_setup_gdt(vm, &sregs.gdt);
switch (vm->mode) {
case VM_MODE_PXXV48_4K:
@@ -556,10 +558,10 @@ static void vcpu_setup(struct kvm_vm *vm, int vcpuid, int pgd_memslot, int gdt_m
sregs.efer |= (EFER_LME | EFER_LMA | EFER_NX);
kvm_seg_set_unusable(&sregs.ldt);
- kvm_seg_set_kernel_code_64bit(vm, 0x8, &sregs.cs);
- kvm_seg_set_kernel_data_64bit(vm, 0x10, &sregs.ds);
- kvm_seg_set_kernel_data_64bit(vm, 0x10, &sregs.es);
- kvm_setup_tss_64bit(vm, &sregs.tr, 0x18, gdt_memslot, pgd_memslot);
+ kvm_seg_set_kernel_code_64bit(vm, DEFAULT_CODE_SELECTOR, &sregs.cs);
+ kvm_seg_set_kernel_data_64bit(vm, DEFAULT_DATA_SELECTOR, &sregs.ds);
+ kvm_seg_set_kernel_data_64bit(vm, DEFAULT_DATA_SELECTOR, &sregs.es);
+ kvm_setup_tss_64bit(vm, &sregs.tr, 0x18);
break;
default:
@@ -567,293 +569,305 @@ static void vcpu_setup(struct kvm_vm *vm, int vcpuid, int pgd_memslot, int gdt_m
}
sregs.cr3 = vm->pgd;
- vcpu_sregs_set(vm, vcpuid, &sregs);
+ vcpu_sregs_set(vcpu, &sregs);
+}
+
+void kvm_arch_vm_post_create(struct kvm_vm *vm)
+{
+ vm_create_irqchip(vm);
+ sync_global_to_guest(vm, host_cpu_is_intel);
+ sync_global_to_guest(vm, host_cpu_is_amd);
+
+ if (vm->subtype == VM_SUBTYPE_SEV)
+ sev_vm_init(vm);
+ else if (vm->subtype == VM_SUBTYPE_SEV_ES)
+ sev_es_vm_init(vm);
}
-void vm_vcpu_add_default(struct kvm_vm *vm, uint32_t vcpuid, void *guest_code)
+void vcpu_arch_set_entry_point(struct kvm_vcpu *vcpu, void *guest_code)
+{
+ struct kvm_regs regs;
+
+ vcpu_regs_get(vcpu, &regs);
+ regs.rip = (unsigned long) guest_code;
+ vcpu_regs_set(vcpu, &regs);
+}
+
+struct kvm_vcpu *vm_arch_vcpu_add(struct kvm_vm *vm, uint32_t vcpu_id)
{
struct kvm_mp_state mp_state;
struct kvm_regs regs;
vm_vaddr_t stack_vaddr;
- stack_vaddr = vm_vaddr_alloc(vm, DEFAULT_STACK_PGS * getpagesize(),
- DEFAULT_GUEST_STACK_VADDR_MIN, 0, 0);
+ struct kvm_vcpu *vcpu;
- /* Create VCPU */
- vm_vcpu_add(vm, vcpuid);
- vcpu_setup(vm, vcpuid, 0, 0);
+ stack_vaddr = __vm_vaddr_alloc(vm, DEFAULT_STACK_PGS * getpagesize(),
+ DEFAULT_GUEST_STACK_VADDR_MIN,
+ MEM_REGION_DATA);
+
+ stack_vaddr += DEFAULT_STACK_PGS * getpagesize();
+
+ /*
+ * Align stack to match calling sequence requirements in section "The
+ * Stack Frame" of the System V ABI AMD64 Architecture Processor
+ * Supplement, which requires the value (%rsp + 8) to be a multiple of
+ * 16 when control is transferred to the function entry point.
+ *
+ * If this code is ever used to launch a vCPU with 32-bit entry point it
+ * may need to subtract 4 bytes instead of 8 bytes.
+ */
+ TEST_ASSERT(IS_ALIGNED(stack_vaddr, PAGE_SIZE),
+ "__vm_vaddr_alloc() did not provide a page-aligned address");
+ stack_vaddr -= 8;
+
+ vcpu = __vm_vcpu_add(vm, vcpu_id);
+ vcpu_init_cpuid(vcpu, kvm_get_supported_cpuid());
+ vcpu_setup(vm, vcpu);
/* Setup guest general purpose registers */
- vcpu_regs_get(vm, vcpuid, &regs);
+ vcpu_regs_get(vcpu, &regs);
regs.rflags = regs.rflags | 0x2;
- regs.rsp = stack_vaddr + (DEFAULT_STACK_PGS * getpagesize());
- regs.rip = (unsigned long) guest_code;
- vcpu_regs_set(vm, vcpuid, &regs);
+ regs.rsp = stack_vaddr;
+ vcpu_regs_set(vcpu, &regs);
/* Setup the MP state */
mp_state.mp_state = 0;
- vcpu_set_mp_state(vm, vcpuid, &mp_state);
+ vcpu_mp_state_set(vcpu, &mp_state);
+
+ return vcpu;
}
-/*
- * Allocate an instance of struct kvm_cpuid2
- *
- * Input Args: None
- *
- * Output Args: None
- *
- * Return: A pointer to the allocated struct. The caller is responsible
- * for freeing this struct.
- *
- * Since kvm_cpuid2 uses a 0-length array to allow a the size of the
- * array to be decided at allocation time, allocation is slightly
- * complicated. This function uses a reasonable default length for
- * the array and performs the appropriate allocation.
- */
-static struct kvm_cpuid2 *allocate_kvm_cpuid2(void)
-{
- struct kvm_cpuid2 *cpuid;
- int nent = 100;
- size_t size;
-
- size = sizeof(*cpuid);
- size += nent * sizeof(struct kvm_cpuid_entry2);
- cpuid = malloc(size);
- if (!cpuid) {
- perror("malloc");
- abort();
- }
+struct kvm_vcpu *vm_arch_vcpu_recreate(struct kvm_vm *vm, uint32_t vcpu_id)
+{
+ struct kvm_vcpu *vcpu = __vm_vcpu_add(vm, vcpu_id);
- cpuid->nent = nent;
+ vcpu_init_cpuid(vcpu, kvm_get_supported_cpuid());
- return cpuid;
+ return vcpu;
}
-/*
- * KVM Supported CPUID Get
- *
- * Input Args: None
- *
- * Output Args:
- *
- * Return: The supported KVM CPUID
- *
- * Get the guest CPUID supported by KVM.
- */
-struct kvm_cpuid2 *kvm_get_supported_cpuid(void)
+void vcpu_arch_free(struct kvm_vcpu *vcpu)
+{
+ if (vcpu->cpuid)
+ free(vcpu->cpuid);
+}
+
+/* Do not use kvm_supported_cpuid directly except for validity checks. */
+static void *kvm_supported_cpuid;
+
+const struct kvm_cpuid2 *kvm_get_supported_cpuid(void)
{
- static struct kvm_cpuid2 *cpuid;
- int ret;
int kvm_fd;
- if (cpuid)
- return cpuid;
+ if (kvm_supported_cpuid)
+ return kvm_supported_cpuid;
- cpuid = allocate_kvm_cpuid2();
- kvm_fd = open(KVM_DEV_PATH, O_RDONLY);
- if (kvm_fd < 0)
- exit(KSFT_SKIP);
+ kvm_supported_cpuid = allocate_kvm_cpuid2(MAX_NR_CPUID_ENTRIES);
+ kvm_fd = open_kvm_dev_path_or_exit();
- ret = ioctl(kvm_fd, KVM_GET_SUPPORTED_CPUID, cpuid);
- TEST_ASSERT(ret == 0, "KVM_GET_SUPPORTED_CPUID failed %d %d\n",
- ret, errno);
+ kvm_ioctl(kvm_fd, KVM_GET_SUPPORTED_CPUID,
+ (struct kvm_cpuid2 *)kvm_supported_cpuid);
close(kvm_fd);
- return cpuid;
+ return kvm_supported_cpuid;
}
-/*
- * Locate a cpuid entry.
- *
- * Input Args:
- * function: The function of the cpuid entry to find.
- * index: The index of the cpuid entry.
- *
- * Output Args: None
- *
- * Return: A pointer to the cpuid entry. Never returns NULL.
- */
-struct kvm_cpuid_entry2 *
-kvm_get_supported_cpuid_index(uint32_t function, uint32_t index)
+static uint32_t __kvm_cpu_has(const struct kvm_cpuid2 *cpuid,
+ uint32_t function, uint32_t index,
+ uint8_t reg, uint8_t lo, uint8_t hi)
{
- struct kvm_cpuid2 *cpuid;
- struct kvm_cpuid_entry2 *entry = NULL;
+ const struct kvm_cpuid_entry2 *entry;
int i;
- cpuid = kvm_get_supported_cpuid();
for (i = 0; i < cpuid->nent; i++) {
- if (cpuid->entries[i].function == function &&
- cpuid->entries[i].index == index) {
- entry = &cpuid->entries[i];
- break;
- }
+ entry = &cpuid->entries[i];
+
+ /*
+ * The output registers in kvm_cpuid_entry2 are in alphabetical
+ * order, but kvm_x86_cpu_feature matches that mess, so yay
+ * pointer shenanigans!
+ */
+ if (entry->function == function && entry->index == index)
+ return ((&entry->eax)[reg] & GENMASK(hi, lo)) >> lo;
}
- TEST_ASSERT(entry, "Guest CPUID entry not found: (EAX=%x, ECX=%x).",
- function, index);
- return entry;
+ return 0;
}
-/*
- * VM VCPU CPUID Set
- *
- * Input Args:
- * vm - Virtual Machine
- * vcpuid - VCPU id
- * cpuid - The CPUID values to set.
- *
- * Output Args: None
- *
- * Return: void
- *
- * Set the VCPU's CPUID.
- */
-void vcpu_set_cpuid(struct kvm_vm *vm,
- uint32_t vcpuid, struct kvm_cpuid2 *cpuid)
+bool kvm_cpuid_has(const struct kvm_cpuid2 *cpuid,
+ struct kvm_x86_cpu_feature feature)
+{
+ return __kvm_cpu_has(cpuid, feature.function, feature.index,
+ feature.reg, feature.bit, feature.bit);
+}
+
+uint32_t kvm_cpuid_property(const struct kvm_cpuid2 *cpuid,
+ struct kvm_x86_cpu_property property)
+{
+ return __kvm_cpu_has(cpuid, property.function, property.index,
+ property.reg, property.lo_bit, property.hi_bit);
+}
+
+uint64_t kvm_get_feature_msr(uint64_t msr_index)
{
- struct vcpu *vcpu = vcpu_find(vm, vcpuid);
- int rc;
+ struct {
+ struct kvm_msrs header;
+ struct kvm_msr_entry entry;
+ } buffer = {};
+ int r, kvm_fd;
- TEST_ASSERT(vcpu != NULL, "vcpu not found, vcpuid: %u", vcpuid);
+ buffer.header.nmsrs = 1;
+ buffer.entry.index = msr_index;
+ kvm_fd = open_kvm_dev_path_or_exit();
- rc = ioctl(vcpu->fd, KVM_SET_CPUID2, cpuid);
- TEST_ASSERT(rc == 0, "KVM_SET_CPUID2 failed, rc: %i errno: %i",
- rc, errno);
+ r = __kvm_ioctl(kvm_fd, KVM_GET_MSRS, &buffer.header);
+ TEST_ASSERT(r == 1, KVM_IOCTL_ERROR(KVM_GET_MSRS, r));
+ close(kvm_fd);
+ return buffer.entry.data;
}
-struct kvm_vm *vm_create_default(uint32_t vcpuid, uint64_t extra_mem_pages,
- void *guest_code)
+void __vm_xsave_require_permission(uint64_t xfeature, const char *name)
{
- struct kvm_vm *vm;
- /*
- * For x86 the maximum page table size for a memory region
- * will be when only 4K pages are used. In that case the
- * total extra size for page tables (for extra N pages) will
- * be: N/512+N/512^2+N/512^3+... which is definitely smaller
- * than N/512*2.
- */
- uint64_t extra_pg_pages = extra_mem_pages / 512 * 2;
+ int kvm_fd;
+ u64 bitmask;
+ long rc;
+ struct kvm_device_attr attr = {
+ .group = 0,
+ .attr = KVM_X86_XCOMP_GUEST_SUPP,
+ .addr = (unsigned long) &bitmask,
+ };
- /* Create VM */
- vm = vm_create(VM_MODE_DEFAULT,
- DEFAULT_GUEST_PHY_PAGES + extra_pg_pages,
- O_RDWR);
+ TEST_ASSERT(!kvm_supported_cpuid,
+ "kvm_get_supported_cpuid() cannot be used before ARCH_REQ_XCOMP_GUEST_PERM");
- /* Setup guest code */
- kvm_vm_elf_load(vm, program_invocation_name, 0, 0);
+ TEST_ASSERT(is_power_of_2(xfeature),
+ "Dynamic XFeatures must be enabled one at a time");
- /* Setup IRQ Chip */
- vm_create_irqchip(vm);
+ kvm_fd = open_kvm_dev_path_or_exit();
+ rc = __kvm_ioctl(kvm_fd, KVM_GET_DEVICE_ATTR, &attr);
+ close(kvm_fd);
+
+ if (rc == -1 && (errno == ENXIO || errno == EINVAL))
+ __TEST_REQUIRE(0, "KVM_X86_XCOMP_GUEST_SUPP not supported");
- /* Add the first vCPU. */
- vm_vcpu_add_default(vm, vcpuid, guest_code);
+ TEST_ASSERT(rc == 0, "KVM_GET_DEVICE_ATTR(0, KVM_X86_XCOMP_GUEST_SUPP) error: %ld", rc);
- return vm;
+ __TEST_REQUIRE(bitmask & xfeature,
+ "Required XSAVE feature '%s' not supported", name);
+
+ TEST_REQUIRE(!syscall(SYS_arch_prctl, ARCH_REQ_XCOMP_GUEST_PERM, ilog2(xfeature)));
+
+ rc = syscall(SYS_arch_prctl, ARCH_GET_XCOMP_GUEST_PERM, &bitmask);
+ TEST_ASSERT(rc == 0, "prctl(ARCH_GET_XCOMP_GUEST_PERM) error: %ld", rc);
+ TEST_ASSERT(bitmask & xfeature,
+ "'%s' (0x%lx) not permitted after prctl(ARCH_REQ_XCOMP_GUEST_PERM) permitted=0x%lx",
+ name, xfeature, bitmask);
}
-/*
- * VCPU Get MSR
- *
- * Input Args:
- * vm - Virtual Machine
- * vcpuid - VCPU ID
- * msr_index - Index of MSR
- *
- * Output Args: None
- *
- * Return: On success, value of the MSR. On failure a TEST_ASSERT is produced.
- *
- * Get value of MSR for VCPU.
- */
-uint64_t vcpu_get_msr(struct kvm_vm *vm, uint32_t vcpuid, uint64_t msr_index)
+void vcpu_init_cpuid(struct kvm_vcpu *vcpu, const struct kvm_cpuid2 *cpuid)
+{
+ TEST_ASSERT(cpuid != vcpu->cpuid, "@cpuid can't be the vCPU's CPUID");
+
+ /* Allow overriding the default CPUID. */
+ if (vcpu->cpuid && vcpu->cpuid->nent < cpuid->nent) {
+ free(vcpu->cpuid);
+ vcpu->cpuid = NULL;
+ }
+
+ if (!vcpu->cpuid)
+ vcpu->cpuid = allocate_kvm_cpuid2(cpuid->nent);
+
+ memcpy(vcpu->cpuid, cpuid, kvm_cpuid2_size(cpuid->nent));
+ vcpu_set_cpuid(vcpu);
+}
+
+void vcpu_set_cpuid_property(struct kvm_vcpu *vcpu,
+ struct kvm_x86_cpu_property property,
+ uint32_t value)
+{
+ struct kvm_cpuid_entry2 *entry;
+
+ entry = __vcpu_get_cpuid_entry(vcpu, property.function, property.index);
+
+ (&entry->eax)[property.reg] &= ~GENMASK(property.hi_bit, property.lo_bit);
+ (&entry->eax)[property.reg] |= value << property.lo_bit;
+
+ vcpu_set_cpuid(vcpu);
+
+ /* Sanity check that @value doesn't exceed the bounds in any way. */
+ TEST_ASSERT_EQ(kvm_cpuid_property(vcpu->cpuid, property), value);
+}
+
+void vcpu_clear_cpuid_entry(struct kvm_vcpu *vcpu, uint32_t function)
+{
+ struct kvm_cpuid_entry2 *entry = vcpu_get_cpuid_entry(vcpu, function);
+
+ entry->eax = 0;
+ entry->ebx = 0;
+ entry->ecx = 0;
+ entry->edx = 0;
+ vcpu_set_cpuid(vcpu);
+}
+
+void vcpu_set_or_clear_cpuid_feature(struct kvm_vcpu *vcpu,
+ struct kvm_x86_cpu_feature feature,
+ bool set)
+{
+ struct kvm_cpuid_entry2 *entry;
+ u32 *reg;
+
+ entry = __vcpu_get_cpuid_entry(vcpu, feature.function, feature.index);
+ reg = (&entry->eax) + feature.reg;
+
+ if (set)
+ *reg |= BIT(feature.bit);
+ else
+ *reg &= ~BIT(feature.bit);
+
+ vcpu_set_cpuid(vcpu);
+}
+
+uint64_t vcpu_get_msr(struct kvm_vcpu *vcpu, uint64_t msr_index)
{
- struct vcpu *vcpu = vcpu_find(vm, vcpuid);
struct {
struct kvm_msrs header;
struct kvm_msr_entry entry;
} buffer = {};
- int r;
- TEST_ASSERT(vcpu != NULL, "vcpu not found, vcpuid: %u", vcpuid);
buffer.header.nmsrs = 1;
buffer.entry.index = msr_index;
- r = ioctl(vcpu->fd, KVM_GET_MSRS, &buffer.header);
- TEST_ASSERT(r == 1, "KVM_GET_MSRS IOCTL failed,\n"
- " rc: %i errno: %i", r, errno);
+
+ vcpu_msrs_get(vcpu, &buffer.header);
return buffer.entry.data;
}
-/*
- * _VCPU Set MSR
- *
- * Input Args:
- * vm - Virtual Machine
- * vcpuid - VCPU ID
- * msr_index - Index of MSR
- * msr_value - New value of MSR
- *
- * Output Args: None
- *
- * Return: The result of KVM_SET_MSRS.
- *
- * Sets the value of an MSR for the given VCPU.
- */
-int _vcpu_set_msr(struct kvm_vm *vm, uint32_t vcpuid, uint64_t msr_index,
- uint64_t msr_value)
+int _vcpu_set_msr(struct kvm_vcpu *vcpu, uint64_t msr_index, uint64_t msr_value)
{
- struct vcpu *vcpu = vcpu_find(vm, vcpuid);
struct {
struct kvm_msrs header;
struct kvm_msr_entry entry;
} buffer = {};
- int r;
- TEST_ASSERT(vcpu != NULL, "vcpu not found, vcpuid: %u", vcpuid);
memset(&buffer, 0, sizeof(buffer));
buffer.header.nmsrs = 1;
buffer.entry.index = msr_index;
buffer.entry.data = msr_value;
- r = ioctl(vcpu->fd, KVM_SET_MSRS, &buffer.header);
- return r;
-}
-/*
- * VCPU Set MSR
- *
- * Input Args:
- * vm - Virtual Machine
- * vcpuid - VCPU ID
- * msr_index - Index of MSR
- * msr_value - New value of MSR
- *
- * Output Args: None
- *
- * Return: On success, nothing. On failure a TEST_ASSERT is produced.
- *
- * Set value of MSR for VCPU.
- */
-void vcpu_set_msr(struct kvm_vm *vm, uint32_t vcpuid, uint64_t msr_index,
- uint64_t msr_value)
-{
- int r;
-
- r = _vcpu_set_msr(vm, vcpuid, msr_index, msr_value);
- TEST_ASSERT(r == 1, "KVM_SET_MSRS IOCTL failed,\n"
- " rc: %i errno: %i", r, errno);
+ return __vcpu_ioctl(vcpu, KVM_SET_MSRS, &buffer.header);
}
-void vcpu_args_set(struct kvm_vm *vm, uint32_t vcpuid, unsigned int num, ...)
+void vcpu_args_set(struct kvm_vcpu *vcpu, unsigned int num, ...)
{
va_list ap;
struct kvm_regs regs;
TEST_ASSERT(num >= 1 && num <= 6, "Unsupported number of args,\n"
- " num: %u\n",
+ " num: %u",
num);
va_start(ap, num);
- vcpu_regs_get(vm, vcpuid, &regs);
+ vcpu_regs_get(vcpu, &regs);
if (num >= 1)
regs.rdi = va_arg(ap, uint64_t);
@@ -873,86 +887,112 @@ void vcpu_args_set(struct kvm_vm *vm, uint32_t vcpuid, unsigned int num, ...)
if (num >= 6)
regs.r9 = va_arg(ap, uint64_t);
- vcpu_regs_set(vm, vcpuid, &regs);
+ vcpu_regs_set(vcpu, &regs);
va_end(ap);
}
-void vcpu_dump(FILE *stream, struct kvm_vm *vm, uint32_t vcpuid, uint8_t indent)
+void vcpu_arch_dump(FILE *stream, struct kvm_vcpu *vcpu, uint8_t indent)
{
struct kvm_regs regs;
struct kvm_sregs sregs;
- fprintf(stream, "%*scpuid: %u\n", indent, "", vcpuid);
+ fprintf(stream, "%*svCPU ID: %u\n", indent, "", vcpu->id);
fprintf(stream, "%*sregs:\n", indent + 2, "");
- vcpu_regs_get(vm, vcpuid, &regs);
+ vcpu_regs_get(vcpu, &regs);
regs_dump(stream, &regs, indent + 4);
fprintf(stream, "%*ssregs:\n", indent + 2, "");
- vcpu_sregs_get(vm, vcpuid, &sregs);
+ vcpu_sregs_get(vcpu, &sregs);
sregs_dump(stream, &sregs, indent + 4);
}
-struct kvm_x86_state {
- struct kvm_vcpu_events events;
- struct kvm_mp_state mp_state;
- struct kvm_regs regs;
- struct kvm_xsave xsave;
- struct kvm_xcrs xcrs;
- struct kvm_sregs sregs;
- struct kvm_debugregs debugregs;
- union {
- struct kvm_nested_state nested;
- char nested_[16384];
- };
- struct kvm_msrs msrs;
-};
-
-static int kvm_get_num_msrs_fd(int kvm_fd)
+static struct kvm_msr_list *__kvm_get_msr_index_list(bool feature_msrs)
{
+ struct kvm_msr_list *list;
struct kvm_msr_list nmsrs;
- int r;
+ int kvm_fd, r;
+
+ kvm_fd = open_kvm_dev_path_or_exit();
nmsrs.nmsrs = 0;
- r = ioctl(kvm_fd, KVM_GET_MSR_INDEX_LIST, &nmsrs);
- TEST_ASSERT(r == -1 && errno == E2BIG, "Unexpected result from KVM_GET_MSR_INDEX_LIST probe, r: %i",
- r);
+ if (!feature_msrs)
+ r = __kvm_ioctl(kvm_fd, KVM_GET_MSR_INDEX_LIST, &nmsrs);
+ else
+ r = __kvm_ioctl(kvm_fd, KVM_GET_MSR_FEATURE_INDEX_LIST, &nmsrs);
+
+ TEST_ASSERT(r == -1 && errno == E2BIG,
+ "Expected -E2BIG, got rc: %i errno: %i (%s)",
+ r, errno, strerror(errno));
+
+ list = malloc(sizeof(*list) + nmsrs.nmsrs * sizeof(list->indices[0]));
+ TEST_ASSERT(list, "-ENOMEM when allocating MSR index list");
+ list->nmsrs = nmsrs.nmsrs;
+
+ if (!feature_msrs)
+ kvm_ioctl(kvm_fd, KVM_GET_MSR_INDEX_LIST, list);
+ else
+ kvm_ioctl(kvm_fd, KVM_GET_MSR_FEATURE_INDEX_LIST, list);
+ close(kvm_fd);
- return nmsrs.nmsrs;
+ TEST_ASSERT(list->nmsrs == nmsrs.nmsrs,
+ "Number of MSRs in list changed, was %d, now %d",
+ nmsrs.nmsrs, list->nmsrs);
+ return list;
}
-static int kvm_get_num_msrs(struct kvm_vm *vm)
+const struct kvm_msr_list *kvm_get_msr_index_list(void)
{
- return kvm_get_num_msrs_fd(vm->kvm_fd);
+ static const struct kvm_msr_list *list;
+
+ if (!list)
+ list = __kvm_get_msr_index_list(false);
+ return list;
}
-struct kvm_msr_list *kvm_get_msr_index_list(void)
+
+const struct kvm_msr_list *kvm_get_feature_msr_index_list(void)
{
- struct kvm_msr_list *list;
- int nmsrs, r, kvm_fd;
+ static const struct kvm_msr_list *list;
- kvm_fd = open(KVM_DEV_PATH, O_RDONLY);
- if (kvm_fd < 0)
- exit(KSFT_SKIP);
+ if (!list)
+ list = __kvm_get_msr_index_list(true);
+ return list;
+}
- nmsrs = kvm_get_num_msrs_fd(kvm_fd);
- list = malloc(sizeof(*list) + nmsrs * sizeof(list->indices[0]));
- list->nmsrs = nmsrs;
- r = ioctl(kvm_fd, KVM_GET_MSR_INDEX_LIST, list);
- close(kvm_fd);
+bool kvm_msr_is_in_save_restore_list(uint32_t msr_index)
+{
+ const struct kvm_msr_list *list = kvm_get_msr_index_list();
+ int i;
- TEST_ASSERT(r == 0, "Unexpected result from KVM_GET_MSR_INDEX_LIST, r: %i",
- r);
+ for (i = 0; i < list->nmsrs; ++i) {
+ if (list->indices[i] == msr_index)
+ return true;
+ }
- return list;
+ return false;
}
-struct kvm_x86_state *vcpu_save_state(struct kvm_vm *vm, uint32_t vcpuid)
+static void vcpu_save_xsave_state(struct kvm_vcpu *vcpu,
+ struct kvm_x86_state *state)
{
- struct vcpu *vcpu = vcpu_find(vm, vcpuid);
- struct kvm_msr_list *list;
+ int size = vm_check_cap(vcpu->vm, KVM_CAP_XSAVE2);
+
+ if (size) {
+ state->xsave = malloc(size);
+ vcpu_xsave2_get(vcpu, state->xsave);
+ } else {
+ state->xsave = malloc(sizeof(struct kvm_xsave));
+ vcpu_xsave_get(vcpu, state->xsave);
+ }
+}
+
+struct kvm_x86_state *vcpu_save_state(struct kvm_vcpu *vcpu)
+{
+ const struct kvm_msr_list *msr_list = kvm_get_msr_index_list();
struct kvm_x86_state *state;
- int nmsrs, r, i;
+ int i;
+
static int nested_size = -1;
if (nested_size == -1) {
@@ -968,153 +1008,351 @@ struct kvm_x86_state *vcpu_save_state(struct kvm_vm *vm, uint32_t vcpuid)
* kernel with KVM_RUN. Complete IO prior to migrating state
* to a new VM.
*/
- vcpu_run_complete_io(vm, vcpuid);
-
- nmsrs = kvm_get_num_msrs(vm);
- list = malloc(sizeof(*list) + nmsrs * sizeof(list->indices[0]));
- list->nmsrs = nmsrs;
- r = ioctl(vm->kvm_fd, KVM_GET_MSR_INDEX_LIST, list);
- TEST_ASSERT(r == 0, "Unexpected result from KVM_GET_MSR_INDEX_LIST, r: %i",
- r);
-
- state = malloc(sizeof(*state) + nmsrs * sizeof(state->msrs.entries[0]));
- r = ioctl(vcpu->fd, KVM_GET_VCPU_EVENTS, &state->events);
- TEST_ASSERT(r == 0, "Unexpected result from KVM_GET_VCPU_EVENTS, r: %i",
- r);
-
- r = ioctl(vcpu->fd, KVM_GET_MP_STATE, &state->mp_state);
- TEST_ASSERT(r == 0, "Unexpected result from KVM_GET_MP_STATE, r: %i",
- r);
-
- r = ioctl(vcpu->fd, KVM_GET_REGS, &state->regs);
- TEST_ASSERT(r == 0, "Unexpected result from KVM_GET_REGS, r: %i",
- r);
-
- r = ioctl(vcpu->fd, KVM_GET_XSAVE, &state->xsave);
- TEST_ASSERT(r == 0, "Unexpected result from KVM_GET_XSAVE, r: %i",
- r);
-
- if (kvm_check_cap(KVM_CAP_XCRS)) {
- r = ioctl(vcpu->fd, KVM_GET_XCRS, &state->xcrs);
- TEST_ASSERT(r == 0, "Unexpected result from KVM_GET_XCRS, r: %i",
- r);
- }
+ vcpu_run_complete_io(vcpu);
- r = ioctl(vcpu->fd, KVM_GET_SREGS, &state->sregs);
- TEST_ASSERT(r == 0, "Unexpected result from KVM_GET_SREGS, r: %i",
- r);
+ state = malloc(sizeof(*state) + msr_list->nmsrs * sizeof(state->msrs.entries[0]));
+ TEST_ASSERT(state, "-ENOMEM when allocating kvm state");
+
+ vcpu_events_get(vcpu, &state->events);
+ vcpu_mp_state_get(vcpu, &state->mp_state);
+ vcpu_regs_get(vcpu, &state->regs);
+ vcpu_save_xsave_state(vcpu, state);
+
+ if (kvm_has_cap(KVM_CAP_XCRS))
+ vcpu_xcrs_get(vcpu, &state->xcrs);
+
+ vcpu_sregs_get(vcpu, &state->sregs);
if (nested_size) {
state->nested.size = sizeof(state->nested_);
- r = ioctl(vcpu->fd, KVM_GET_NESTED_STATE, &state->nested);
- TEST_ASSERT(r == 0, "Unexpected result from KVM_GET_NESTED_STATE, r: %i",
- r);
+
+ vcpu_nested_state_get(vcpu, &state->nested);
TEST_ASSERT(state->nested.size <= nested_size,
- "Nested state size too big, %i (KVM_CHECK_CAP gave %i)",
- state->nested.size, nested_size);
- } else
+ "Nested state size too big, %i (KVM_CHECK_CAP gave %i)",
+ state->nested.size, nested_size);
+ } else {
state->nested.size = 0;
+ }
- state->msrs.nmsrs = nmsrs;
- for (i = 0; i < nmsrs; i++)
- state->msrs.entries[i].index = list->indices[i];
- r = ioctl(vcpu->fd, KVM_GET_MSRS, &state->msrs);
- TEST_ASSERT(r == nmsrs, "Unexpected result from KVM_GET_MSRS, r: %i (failed MSR was 0x%x)",
- r, r == nmsrs ? -1 : list->indices[r]);
+ state->msrs.nmsrs = msr_list->nmsrs;
+ for (i = 0; i < msr_list->nmsrs; i++)
+ state->msrs.entries[i].index = msr_list->indices[i];
+ vcpu_msrs_get(vcpu, &state->msrs);
- r = ioctl(vcpu->fd, KVM_GET_DEBUGREGS, &state->debugregs);
- TEST_ASSERT(r == 0, "Unexpected result from KVM_GET_DEBUGREGS, r: %i",
- r);
+ vcpu_debugregs_get(vcpu, &state->debugregs);
- free(list);
return state;
}
-void vcpu_load_state(struct kvm_vm *vm, uint32_t vcpuid, struct kvm_x86_state *state)
+void vcpu_load_state(struct kvm_vcpu *vcpu, struct kvm_x86_state *state)
{
- struct vcpu *vcpu = vcpu_find(vm, vcpuid);
- int r;
+ vcpu_sregs_set(vcpu, &state->sregs);
+ vcpu_msrs_set(vcpu, &state->msrs);
- r = ioctl(vcpu->fd, KVM_SET_XSAVE, &state->xsave);
- TEST_ASSERT(r == 0, "Unexpected result from KVM_SET_XSAVE, r: %i",
- r);
+ if (kvm_has_cap(KVM_CAP_XCRS))
+ vcpu_xcrs_set(vcpu, &state->xcrs);
+
+ vcpu_xsave_set(vcpu, state->xsave);
+ vcpu_events_set(vcpu, &state->events);
+ vcpu_mp_state_set(vcpu, &state->mp_state);
+ vcpu_debugregs_set(vcpu, &state->debugregs);
+ vcpu_regs_set(vcpu, &state->regs);
+
+ if (state->nested.size)
+ vcpu_nested_state_set(vcpu, &state->nested);
+}
+
+void kvm_x86_state_cleanup(struct kvm_x86_state *state)
+{
+ free(state->xsave);
+ free(state);
+}
+
+void kvm_get_cpu_address_width(unsigned int *pa_bits, unsigned int *va_bits)
+{
+ if (!kvm_cpu_has_p(X86_PROPERTY_MAX_PHY_ADDR)) {
+ *pa_bits = kvm_cpu_has(X86_FEATURE_PAE) ? 36 : 32;
+ *va_bits = 32;
+ } else {
+ *pa_bits = kvm_cpu_property(X86_PROPERTY_MAX_PHY_ADDR);
+ *va_bits = kvm_cpu_property(X86_PROPERTY_MAX_VIRT_ADDR);
+ }
+}
- if (kvm_check_cap(KVM_CAP_XCRS)) {
- r = ioctl(vcpu->fd, KVM_SET_XCRS, &state->xcrs);
- TEST_ASSERT(r == 0, "Unexpected result from KVM_SET_XCRS, r: %i",
- r);
+void kvm_init_vm_address_properties(struct kvm_vm *vm)
+{
+ if (vm->subtype == VM_SUBTYPE_SEV || vm->subtype == VM_SUBTYPE_SEV_ES) {
+ vm->arch.c_bit = BIT_ULL(this_cpu_property(X86_PROPERTY_SEV_C_BIT));
+ vm->gpa_tag_mask = vm->arch.c_bit;
}
+}
- r = ioctl(vcpu->fd, KVM_SET_SREGS, &state->sregs);
- TEST_ASSERT(r == 0, "Unexpected result from KVM_SET_SREGS, r: %i",
- r);
+static void set_idt_entry(struct kvm_vm *vm, int vector, unsigned long addr,
+ int dpl, unsigned short selector)
+{
+ struct idt_entry *base =
+ (struct idt_entry *)addr_gva2hva(vm, vm->idt);
+ struct idt_entry *e = &base[vector];
+
+ memset(e, 0, sizeof(*e));
+ e->offset0 = addr;
+ e->selector = selector;
+ e->ist = 0;
+ e->type = 14;
+ e->dpl = dpl;
+ e->p = 1;
+ e->offset1 = addr >> 16;
+ e->offset2 = addr >> 32;
+}
- r = ioctl(vcpu->fd, KVM_SET_MSRS, &state->msrs);
- TEST_ASSERT(r == state->msrs.nmsrs, "Unexpected result from KVM_SET_MSRS, r: %i (failed at %x)",
- r, r == state->msrs.nmsrs ? -1 : state->msrs.entries[r].index);
- r = ioctl(vcpu->fd, KVM_SET_VCPU_EVENTS, &state->events);
- TEST_ASSERT(r == 0, "Unexpected result from KVM_SET_VCPU_EVENTS, r: %i",
- r);
+static bool kvm_fixup_exception(struct ex_regs *regs)
+{
+ if (regs->r9 != KVM_EXCEPTION_MAGIC || regs->rip != regs->r10)
+ return false;
- r = ioctl(vcpu->fd, KVM_SET_MP_STATE, &state->mp_state);
- TEST_ASSERT(r == 0, "Unexpected result from KVM_SET_MP_STATE, r: %i",
- r);
+ if (regs->vector == DE_VECTOR)
+ return false;
- r = ioctl(vcpu->fd, KVM_SET_DEBUGREGS, &state->debugregs);
- TEST_ASSERT(r == 0, "Unexpected result from KVM_SET_DEBUGREGS, r: %i",
- r);
+ regs->rip = regs->r11;
+ regs->r9 = regs->vector;
+ regs->r10 = regs->error_code;
+ return true;
+}
- r = ioctl(vcpu->fd, KVM_SET_REGS, &state->regs);
- TEST_ASSERT(r == 0, "Unexpected result from KVM_SET_REGS, r: %i",
- r);
+void route_exception(struct ex_regs *regs)
+{
+ typedef void(*handler)(struct ex_regs *);
+ handler *handlers = (handler *)exception_handlers;
- if (state->nested.size) {
- r = ioctl(vcpu->fd, KVM_SET_NESTED_STATE, &state->nested);
- TEST_ASSERT(r == 0, "Unexpected result from KVM_SET_NESTED_STATE, r: %i",
- r);
+ if (handlers && handlers[regs->vector]) {
+ handlers[regs->vector](regs);
+ return;
}
+
+ if (kvm_fixup_exception(regs))
+ return;
+
+ ucall_assert(UCALL_UNHANDLED,
+ "Unhandled exception in guest", __FILE__, __LINE__,
+ "Unhandled exception '0x%lx' at guest RIP '0x%lx'",
+ regs->vector, regs->rip);
}
-bool is_intel_cpu(void)
+void vm_init_descriptor_tables(struct kvm_vm *vm)
{
- int eax, ebx, ecx, edx;
- const uint32_t *chunk;
- const int leaf = 0;
+ extern void *idt_handlers;
+ int i;
+
+ vm->idt = __vm_vaddr_alloc_page(vm, MEM_REGION_DATA);
+ vm->handlers = __vm_vaddr_alloc_page(vm, MEM_REGION_DATA);
+ /* Handlers have the same address in both address spaces.*/
+ for (i = 0; i < NUM_INTERRUPTS; i++)
+ set_idt_entry(vm, i, (unsigned long)(&idt_handlers)[i], 0,
+ DEFAULT_CODE_SELECTOR);
+}
- __asm__ __volatile__(
- "cpuid"
- : /* output */ "=a"(eax), "=b"(ebx),
- "=c"(ecx), "=d"(edx)
- : /* input */ "0"(leaf), "2"(0));
+void vcpu_init_descriptor_tables(struct kvm_vcpu *vcpu)
+{
+ struct kvm_vm *vm = vcpu->vm;
+ struct kvm_sregs sregs;
- chunk = (const uint32_t *)("GenuineIntel");
- return (ebx == chunk[0] && edx == chunk[1] && ecx == chunk[2]);
+ vcpu_sregs_get(vcpu, &sregs);
+ sregs.idt.base = vm->idt;
+ sregs.idt.limit = NUM_INTERRUPTS * sizeof(struct idt_entry) - 1;
+ sregs.gdt.base = vm->gdt;
+ sregs.gdt.limit = getpagesize() - 1;
+ kvm_seg_set_kernel_data_64bit(NULL, DEFAULT_DATA_SELECTOR, &sregs.gs);
+ vcpu_sregs_set(vcpu, &sregs);
+ *(vm_vaddr_t *)addr_gva2hva(vm, (vm_vaddr_t)(&exception_handlers)) = vm->handlers;
}
-uint32_t kvm_get_cpuid_max_basic(void)
+void vm_install_exception_handler(struct kvm_vm *vm, int vector,
+ void (*handler)(struct ex_regs *))
{
- return kvm_get_supported_cpuid_entry(0)->eax;
+ vm_vaddr_t *handlers = (vm_vaddr_t *)addr_gva2hva(vm, vm->handlers);
+
+ handlers[vector] = (vm_vaddr_t)handler;
}
-uint32_t kvm_get_cpuid_max_extended(void)
+void assert_on_unhandled_exception(struct kvm_vcpu *vcpu)
{
- return kvm_get_supported_cpuid_entry(0x80000000)->eax;
+ struct ucall uc;
+
+ if (get_ucall(vcpu, &uc) == UCALL_UNHANDLED)
+ REPORT_GUEST_ASSERT(uc);
}
-void kvm_get_cpu_address_width(unsigned int *pa_bits, unsigned int *va_bits)
+const struct kvm_cpuid_entry2 *get_cpuid_entry(const struct kvm_cpuid2 *cpuid,
+ uint32_t function, uint32_t index)
{
- struct kvm_cpuid_entry2 *entry;
- bool pae;
+ int i;
- /* SDM 4.1.4 */
- if (kvm_get_cpuid_max_extended() < 0x80000008) {
- pae = kvm_get_supported_cpuid_entry(1)->edx & (1 << 6);
- *pa_bits = pae ? 36 : 32;
- *va_bits = 32;
- } else {
- entry = kvm_get_supported_cpuid_entry(0x80000008);
- *pa_bits = entry->eax & 0xff;
- *va_bits = (entry->eax >> 8) & 0xff;
+ for (i = 0; i < cpuid->nent; i++) {
+ if (cpuid->entries[i].function == function &&
+ cpuid->entries[i].index == index)
+ return &cpuid->entries[i];
+ }
+
+ TEST_FAIL("CPUID function 0x%x index 0x%x not found ", function, index);
+
+ return NULL;
+}
+
+#define X86_HYPERCALL(inputs...) \
+({ \
+ uint64_t r; \
+ \
+ asm volatile("test %[use_vmmcall], %[use_vmmcall]\n\t" \
+ "jnz 1f\n\t" \
+ "vmcall\n\t" \
+ "jmp 2f\n\t" \
+ "1: vmmcall\n\t" \
+ "2:" \
+ : "=a"(r) \
+ : [use_vmmcall] "r" (host_cpu_is_amd), inputs); \
+ \
+ r; \
+})
+
+uint64_t kvm_hypercall(uint64_t nr, uint64_t a0, uint64_t a1, uint64_t a2,
+ uint64_t a3)
+{
+ return X86_HYPERCALL("a"(nr), "b"(a0), "c"(a1), "d"(a2), "S"(a3));
+}
+
+uint64_t __xen_hypercall(uint64_t nr, uint64_t a0, void *a1)
+{
+ return X86_HYPERCALL("a"(nr), "D"(a0), "S"(a1));
+}
+
+void xen_hypercall(uint64_t nr, uint64_t a0, void *a1)
+{
+ GUEST_ASSERT(!__xen_hypercall(nr, a0, a1));
+}
+
+const struct kvm_cpuid2 *kvm_get_supported_hv_cpuid(void)
+{
+ static struct kvm_cpuid2 *cpuid;
+ int kvm_fd;
+
+ if (cpuid)
+ return cpuid;
+
+ cpuid = allocate_kvm_cpuid2(MAX_NR_CPUID_ENTRIES);
+ kvm_fd = open_kvm_dev_path_or_exit();
+
+ kvm_ioctl(kvm_fd, KVM_GET_SUPPORTED_HV_CPUID, cpuid);
+
+ close(kvm_fd);
+ return cpuid;
+}
+
+void vcpu_set_hv_cpuid(struct kvm_vcpu *vcpu)
+{
+ static struct kvm_cpuid2 *cpuid_full;
+ const struct kvm_cpuid2 *cpuid_sys, *cpuid_hv;
+ int i, nent = 0;
+
+ if (!cpuid_full) {
+ cpuid_sys = kvm_get_supported_cpuid();
+ cpuid_hv = kvm_get_supported_hv_cpuid();
+
+ cpuid_full = allocate_kvm_cpuid2(cpuid_sys->nent + cpuid_hv->nent);
+ if (!cpuid_full) {
+ perror("malloc");
+ abort();
+ }
+
+ /* Need to skip KVM CPUID leaves 0x400000xx */
+ for (i = 0; i < cpuid_sys->nent; i++) {
+ if (cpuid_sys->entries[i].function >= 0x40000000 &&
+ cpuid_sys->entries[i].function < 0x40000100)
+ continue;
+ cpuid_full->entries[nent] = cpuid_sys->entries[i];
+ nent++;
+ }
+
+ memcpy(&cpuid_full->entries[nent], cpuid_hv->entries,
+ cpuid_hv->nent * sizeof(struct kvm_cpuid_entry2));
+ cpuid_full->nent = nent + cpuid_hv->nent;
}
+
+ vcpu_init_cpuid(vcpu, cpuid_full);
+}
+
+const struct kvm_cpuid2 *vcpu_get_supported_hv_cpuid(struct kvm_vcpu *vcpu)
+{
+ struct kvm_cpuid2 *cpuid = allocate_kvm_cpuid2(MAX_NR_CPUID_ENTRIES);
+
+ vcpu_ioctl(vcpu, KVM_GET_SUPPORTED_HV_CPUID, cpuid);
+
+ return cpuid;
+}
+
+unsigned long vm_compute_max_gfn(struct kvm_vm *vm)
+{
+ const unsigned long num_ht_pages = 12 << (30 - vm->page_shift); /* 12 GiB */
+ unsigned long ht_gfn, max_gfn, max_pfn;
+ uint8_t maxphyaddr;
+
+ max_gfn = (1ULL << (vm->pa_bits - vm->page_shift)) - 1;
+
+ /* Avoid reserved HyperTransport region on AMD processors. */
+ if (!host_cpu_is_amd)
+ return max_gfn;
+
+ /* On parts with <40 physical address bits, the area is fully hidden */
+ if (vm->pa_bits < 40)
+ return max_gfn;
+
+ /* Before family 17h, the HyperTransport area is just below 1T. */
+ ht_gfn = (1 << 28) - num_ht_pages;
+ if (this_cpu_family() < 0x17)
+ goto done;
+
+ /*
+ * Otherwise it's at the top of the physical address space, possibly
+ * reduced due to SME by bits 11:6 of CPUID[0x8000001f].EBX. Use
+ * the old conservative value if MAXPHYADDR is not enumerated.
+ */
+ if (!this_cpu_has_p(X86_PROPERTY_MAX_PHY_ADDR))
+ goto done;
+
+ maxphyaddr = this_cpu_property(X86_PROPERTY_MAX_PHY_ADDR);
+ max_pfn = (1ULL << (maxphyaddr - vm->page_shift)) - 1;
+
+ if (this_cpu_has_p(X86_PROPERTY_PHYS_ADDR_REDUCTION))
+ max_pfn >>= this_cpu_property(X86_PROPERTY_PHYS_ADDR_REDUCTION);
+
+ ht_gfn = max_pfn - num_ht_pages;
+done:
+ return min(max_gfn, ht_gfn - 1);
+}
+
+/* Returns true if kvm_intel was loaded with unrestricted_guest=1. */
+bool vm_is_unrestricted_guest(struct kvm_vm *vm)
+{
+ /* Ensure that a KVM vendor-specific module is loaded. */
+ if (vm == NULL)
+ close(open_kvm_dev_path_or_exit());
+
+ return get_kvm_intel_param_bool("unrestricted_guest");
+}
+
+void kvm_selftest_arch_init(void)
+{
+ host_cpu_is_intel = this_cpu_is_intel();
+ host_cpu_is_amd = this_cpu_is_amd();
+}
+
+bool sys_clocksource_is_based_on_tsc(void)
+{
+ char *clk_name = sys_get_cur_clocksource();
+ bool ret = !strcmp(clk_name, "tsc\n") ||
+ !strcmp(clk_name, "hyperv_clocksource_tsc_page\n");
+
+ free(clk_name);
+
+ return ret;
}
diff --git a/tools/testing/selftests/kvm/lib/x86_64/sev.c b/tools/testing/selftests/kvm/lib/x86_64/sev.c
new file mode 100644
index 000000000000..e248d3364b9c
--- /dev/null
+++ b/tools/testing/selftests/kvm/lib/x86_64/sev.c
@@ -0,0 +1,114 @@
+// SPDX-License-Identifier: GPL-2.0-only
+#define _GNU_SOURCE /* for program_invocation_short_name */
+#include <stdint.h>
+#include <stdbool.h>
+
+#include "sev.h"
+
+/*
+ * sparsebit_next_clear() can return 0 if [x, 2**64-1] are all set, and the
+ * -1 would then cause an underflow back to 2**64 - 1. This is expected and
+ * correct.
+ *
+ * If the last range in the sparsebit is [x, y] and we try to iterate,
+ * sparsebit_next_set() will return 0, and sparsebit_next_clear() will try
+ * and find the first range, but that's correct because the condition
+ * expression would cause us to quit the loop.
+ */
+static void encrypt_region(struct kvm_vm *vm, struct userspace_mem_region *region)
+{
+ const struct sparsebit *protected_phy_pages = region->protected_phy_pages;
+ const vm_paddr_t gpa_base = region->region.guest_phys_addr;
+ const sparsebit_idx_t lowest_page_in_region = gpa_base >> vm->page_shift;
+ sparsebit_idx_t i, j;
+
+ if (!sparsebit_any_set(protected_phy_pages))
+ return;
+
+ sev_register_encrypted_memory(vm, region);
+
+ sparsebit_for_each_set_range(protected_phy_pages, i, j) {
+ const uint64_t size = (j - i + 1) * vm->page_size;
+ const uint64_t offset = (i - lowest_page_in_region) * vm->page_size;
+
+ sev_launch_update_data(vm, gpa_base + offset, size);
+ }
+}
+
+void sev_vm_launch(struct kvm_vm *vm, uint32_t policy)
+{
+ struct kvm_sev_launch_start launch_start = {
+ .policy = policy,
+ };
+ struct userspace_mem_region *region;
+ struct kvm_sev_guest_status status;
+ int ctr;
+
+ vm_sev_ioctl(vm, KVM_SEV_LAUNCH_START, &launch_start);
+ vm_sev_ioctl(vm, KVM_SEV_GUEST_STATUS, &status);
+
+ TEST_ASSERT_EQ(status.policy, policy);
+ TEST_ASSERT_EQ(status.state, SEV_GUEST_STATE_LAUNCH_UPDATE);
+
+ hash_for_each(vm->regions.slot_hash, ctr, region, slot_node)
+ encrypt_region(vm, region);
+
+ if (policy & SEV_POLICY_ES)
+ vm_sev_ioctl(vm, KVM_SEV_LAUNCH_UPDATE_VMSA, NULL);
+
+ vm->arch.is_pt_protected = true;
+}
+
+void sev_vm_launch_measure(struct kvm_vm *vm, uint8_t *measurement)
+{
+ struct kvm_sev_launch_measure launch_measure;
+ struct kvm_sev_guest_status guest_status;
+
+ launch_measure.len = 256;
+ launch_measure.uaddr = (__u64)measurement;
+ vm_sev_ioctl(vm, KVM_SEV_LAUNCH_MEASURE, &launch_measure);
+
+ vm_sev_ioctl(vm, KVM_SEV_GUEST_STATUS, &guest_status);
+ TEST_ASSERT_EQ(guest_status.state, SEV_GUEST_STATE_LAUNCH_SECRET);
+}
+
+void sev_vm_launch_finish(struct kvm_vm *vm)
+{
+ struct kvm_sev_guest_status status;
+
+ vm_sev_ioctl(vm, KVM_SEV_GUEST_STATUS, &status);
+ TEST_ASSERT(status.state == SEV_GUEST_STATE_LAUNCH_UPDATE ||
+ status.state == SEV_GUEST_STATE_LAUNCH_SECRET,
+ "Unexpected guest state: %d", status.state);
+
+ vm_sev_ioctl(vm, KVM_SEV_LAUNCH_FINISH, NULL);
+
+ vm_sev_ioctl(vm, KVM_SEV_GUEST_STATUS, &status);
+ TEST_ASSERT_EQ(status.state, SEV_GUEST_STATE_RUNNING);
+}
+
+struct kvm_vm *vm_sev_create_with_one_vcpu(uint32_t policy, void *guest_code,
+ struct kvm_vcpu **cpu)
+{
+ struct vm_shape shape = {
+ .type = VM_TYPE_DEFAULT,
+ .mode = VM_MODE_DEFAULT,
+ .subtype = policy & SEV_POLICY_ES ? VM_SUBTYPE_SEV_ES :
+ VM_SUBTYPE_SEV,
+ };
+ struct kvm_vm *vm;
+ struct kvm_vcpu *cpus[1];
+ uint8_t measurement[512];
+
+ vm = __vm_create_with_vcpus(shape, 1, 0, guest_code, cpus);
+ *cpu = cpus[0];
+
+ sev_vm_launch(vm, policy);
+
+ /* TODO: Validate the measurement is as expected. */
+ sev_vm_launch_measure(vm, measurement);
+
+ sev_vm_launch_finish(vm);
+
+ return vm;
+}
diff --git a/tools/testing/selftests/kvm/lib/x86_64/svm.c b/tools/testing/selftests/kvm/lib/x86_64/svm.c
index 3a5c72ed2b79..5495a92dfd5a 100644
--- a/tools/testing/selftests/kvm/lib/x86_64/svm.c
+++ b/tools/testing/selftests/kvm/lib/x86_64/svm.c
@@ -9,10 +9,11 @@
#include "test_util.h"
#include "kvm_util.h"
-#include "../kvm_util_internal.h"
#include "processor.h"
#include "svm_util.h"
+#define SEV_DEV_PATH "/dev/sev"
+
struct gpr64_regs guest_regs;
u64 rflags;
@@ -30,20 +31,22 @@ u64 rflags;
struct svm_test_data *
vcpu_alloc_svm(struct kvm_vm *vm, vm_vaddr_t *p_svm_gva)
{
- vm_vaddr_t svm_gva = vm_vaddr_alloc(vm, getpagesize(),
- 0x10000, 0, 0);
+ vm_vaddr_t svm_gva = vm_vaddr_alloc_page(vm);
struct svm_test_data *svm = addr_gva2hva(vm, svm_gva);
- svm->vmcb = (void *)vm_vaddr_alloc(vm, getpagesize(),
- 0x10000, 0, 0);
+ svm->vmcb = (void *)vm_vaddr_alloc_page(vm);
svm->vmcb_hva = addr_gva2hva(vm, (uintptr_t)svm->vmcb);
svm->vmcb_gpa = addr_gva2gpa(vm, (uintptr_t)svm->vmcb);
- svm->save_area = (void *)vm_vaddr_alloc(vm, getpagesize(),
- 0x10000, 0, 0);
+ svm->save_area = (void *)vm_vaddr_alloc_page(vm);
svm->save_area_hva = addr_gva2hva(vm, (uintptr_t)svm->save_area);
svm->save_area_gpa = addr_gva2gpa(vm, (uintptr_t)svm->save_area);
+ svm->msr = (void *)vm_vaddr_alloc_page(vm);
+ svm->msr_hva = addr_gva2hva(vm, (uintptr_t)svm->msr);
+ svm->msr_gpa = addr_gva2gpa(vm, (uintptr_t)svm->msr);
+ memset(svm->msr_hva, 0, getpagesize());
+
*p_svm_gva = svm_gva;
return svm;
}
@@ -74,7 +77,7 @@ void generic_svm_setup(struct svm_test_data *svm, void *guest_rip, void *guest_r
wrmsr(MSR_VM_HSAVE_PA, svm->save_area_gpa);
memset(vmcb, 0, sizeof(*vmcb));
- asm volatile ("vmsave\n\t" : : "a" (vmcb_gpa) : "memory");
+ asm volatile ("vmsave %0\n\t" : : "a" (vmcb_gpa) : "memory");
vmcb_set_seg(&save->es, get_es(), 0, -1U, data_seg_attr);
vmcb_set_seg(&save->cs, get_cs(), 0, -1U, code_seg_attr);
vmcb_set_seg(&save->ss, get_ss(), 0, -1U, data_seg_attr);
@@ -95,6 +98,7 @@ void generic_svm_setup(struct svm_test_data *svm, void *guest_rip, void *guest_r
save->dbgctl = rdmsr(MSR_IA32_DEBUGCTLMSR);
ctrl->intercept = (1ULL << INTERCEPT_VMRUN) |
(1ULL << INTERCEPT_VMMCALL);
+ ctrl->msrpm_base_pa = svm->msr_gpa;
vmcb->save.rip = (u64)guest_rip;
vmcb->save.rsp = (u64)guest_rsp;
@@ -131,35 +135,30 @@ void generic_svm_setup(struct svm_test_data *svm, void *guest_rip, void *guest_r
void run_guest(struct vmcb *vmcb, uint64_t vmcb_gpa)
{
asm volatile (
- "vmload\n\t"
+ "vmload %[vmcb_gpa]\n\t"
"mov rflags, %%r15\n\t" // rflags
"mov %%r15, 0x170(%[vmcb])\n\t"
"mov guest_regs, %%r15\n\t" // rax
"mov %%r15, 0x1f8(%[vmcb])\n\t"
LOAD_GPR_C
- "vmrun\n\t"
+ "vmrun %[vmcb_gpa]\n\t"
SAVE_GPR_C
"mov 0x170(%[vmcb]), %%r15\n\t" // rflags
"mov %%r15, rflags\n\t"
"mov 0x1f8(%[vmcb]), %%r15\n\t" // rax
"mov %%r15, guest_regs\n\t"
- "vmsave\n\t"
+ "vmsave %[vmcb_gpa]\n\t"
: : [vmcb] "r" (vmcb), [vmcb_gpa] "a" (vmcb_gpa)
: "r15", "memory");
}
-bool nested_svm_supported(void)
-{
- struct kvm_cpuid_entry2 *entry =
- kvm_get_supported_cpuid_entry(0x80000001);
-
- return entry->ecx & CPUID_SVM;
-}
-
-void nested_svm_check_supported(void)
+/*
+ * Open SEV_DEV_PATH if available, otherwise exit the entire program.
+ *
+ * Return:
+ * The opened file descriptor of /dev/sev.
+ */
+int open_sev_dev_path_or_exit(void)
{
- if (!nested_svm_supported()) {
- print_skip("nested SVM not enabled");
- exit(KSFT_SKIP);
- }
+ return open_path_or_exit(SEV_DEV_PATH, 0);
}
diff --git a/tools/testing/selftests/kvm/lib/x86_64/ucall.c b/tools/testing/selftests/kvm/lib/x86_64/ucall.c
index da4d89ad5419..1265cecc7dd1 100644
--- a/tools/testing/selftests/kvm/lib/x86_64/ucall.c
+++ b/tools/testing/selftests/kvm/lib/x86_64/ucall.c
@@ -8,49 +8,49 @@
#define UCALL_PIO_PORT ((uint16_t)0x1000)
-void ucall_init(struct kvm_vm *vm, void *arg)
+void ucall_arch_do_ucall(vm_vaddr_t uc)
{
+ /*
+ * FIXME: Revert this hack (the entire commit that added it) once nVMX
+ * preserves L2 GPRs across a nested VM-Exit. If a ucall from L2, e.g.
+ * to do a GUEST_SYNC(), lands the vCPU in L1, any and all GPRs can be
+ * clobbered by L1. Save and restore non-volatile GPRs (clobbering RBP
+ * in particular is problematic) along with RDX and RDI (which are
+ * inputs), and clobber volatile GPRs. *sigh*
+ */
+#define HORRIFIC_L2_UCALL_CLOBBER_HACK \
+ "rcx", "rsi", "r8", "r9", "r10", "r11"
+
+ asm volatile("push %%rbp\n\t"
+ "push %%r15\n\t"
+ "push %%r14\n\t"
+ "push %%r13\n\t"
+ "push %%r12\n\t"
+ "push %%rbx\n\t"
+ "push %%rdx\n\t"
+ "push %%rdi\n\t"
+ "in %[port], %%al\n\t"
+ "pop %%rdi\n\t"
+ "pop %%rdx\n\t"
+ "pop %%rbx\n\t"
+ "pop %%r12\n\t"
+ "pop %%r13\n\t"
+ "pop %%r14\n\t"
+ "pop %%r15\n\t"
+ "pop %%rbp\n\t"
+ : : [port] "d" (UCALL_PIO_PORT), "D" (uc) : "rax", "memory",
+ HORRIFIC_L2_UCALL_CLOBBER_HACK);
}
-void ucall_uninit(struct kvm_vm *vm)
+void *ucall_arch_get_ucall(struct kvm_vcpu *vcpu)
{
-}
-
-void ucall(uint64_t cmd, int nargs, ...)
-{
- struct ucall uc = {
- .cmd = cmd,
- };
- va_list va;
- int i;
-
- nargs = nargs <= UCALL_MAX_ARGS ? nargs : UCALL_MAX_ARGS;
-
- va_start(va, nargs);
- for (i = 0; i < nargs; ++i)
- uc.args[i] = va_arg(va, uint64_t);
- va_end(va);
-
- asm volatile("in %[port], %%al"
- : : [port] "d" (UCALL_PIO_PORT), "D" (&uc) : "rax", "memory");
-}
-
-uint64_t get_ucall(struct kvm_vm *vm, uint32_t vcpu_id, struct ucall *uc)
-{
- struct kvm_run *run = vcpu_state(vm, vcpu_id);
- struct ucall ucall = {};
+ struct kvm_run *run = vcpu->run;
if (run->exit_reason == KVM_EXIT_IO && run->io.port == UCALL_PIO_PORT) {
struct kvm_regs regs;
- vcpu_regs_get(vm, vcpu_id, &regs);
- memcpy(&ucall, addr_gva2hva(vm, (vm_vaddr_t)regs.rdi),
- sizeof(ucall));
-
- vcpu_run_complete_io(vm, vcpu_id);
- if (uc)
- memcpy(uc, &ucall, sizeof(ucall));
+ vcpu_regs_get(vcpu, &regs);
+ return (void *)regs.rdi;
}
-
- return ucall.cmd;
+ return NULL;
}
diff --git a/tools/testing/selftests/kvm/lib/x86_64/vmx.c b/tools/testing/selftests/kvm/lib/x86_64/vmx.c
index f1e00d43eea2..089b8925b6b2 100644
--- a/tools/testing/selftests/kvm/lib/x86_64/vmx.c
+++ b/tools/testing/selftests/kvm/lib/x86_64/vmx.c
@@ -5,9 +5,10 @@
* Copyright (C) 2018, Google LLC.
*/
+#include <asm/msr-index.h>
+
#include "test_util.h"
#include "kvm_util.h"
-#include "../kvm_util_internal.h"
#include "processor.h"
#include "vmx.h"
@@ -43,21 +44,17 @@ struct eptPageTablePointer {
uint64_t address:40;
uint64_t reserved_63_52:12;
};
-int vcpu_enable_evmcs(struct kvm_vm *vm, int vcpu_id)
+int vcpu_enable_evmcs(struct kvm_vcpu *vcpu)
{
uint16_t evmcs_ver;
- struct kvm_enable_cap enable_evmcs_cap = {
- .cap = KVM_CAP_HYPERV_ENLIGHTENED_VMCS,
- .args[0] = (unsigned long)&evmcs_ver
- };
-
- vcpu_ioctl(vm, vcpu_id, KVM_ENABLE_CAP, &enable_evmcs_cap);
+ vcpu_enable_cap(vcpu, KVM_CAP_HYPERV_ENLIGHTENED_VMCS,
+ (unsigned long)&evmcs_ver);
/* KVM should return supported EVMCS version range */
TEST_ASSERT(((evmcs_ver >> 8) >= (evmcs_ver & 0xff)) &&
(evmcs_ver & 0xff) > 0,
- "Incorrect EVMCS version range: %x:%x\n",
+ "Incorrect EVMCS version range: %x:%x",
evmcs_ver & 0xff, evmcs_ver >> 8);
return evmcs_ver;
@@ -77,55 +74,41 @@ int vcpu_enable_evmcs(struct kvm_vm *vm, int vcpu_id)
struct vmx_pages *
vcpu_alloc_vmx(struct kvm_vm *vm, vm_vaddr_t *p_vmx_gva)
{
- vm_vaddr_t vmx_gva = vm_vaddr_alloc(vm, getpagesize(), 0x10000, 0, 0);
+ vm_vaddr_t vmx_gva = vm_vaddr_alloc_page(vm);
struct vmx_pages *vmx = addr_gva2hva(vm, vmx_gva);
/* Setup of a region of guest memory for the vmxon region. */
- vmx->vmxon = (void *)vm_vaddr_alloc(vm, getpagesize(), 0x10000, 0, 0);
+ vmx->vmxon = (void *)vm_vaddr_alloc_page(vm);
vmx->vmxon_hva = addr_gva2hva(vm, (uintptr_t)vmx->vmxon);
vmx->vmxon_gpa = addr_gva2gpa(vm, (uintptr_t)vmx->vmxon);
/* Setup of a region of guest memory for a vmcs. */
- vmx->vmcs = (void *)vm_vaddr_alloc(vm, getpagesize(), 0x10000, 0, 0);
+ vmx->vmcs = (void *)vm_vaddr_alloc_page(vm);
vmx->vmcs_hva = addr_gva2hva(vm, (uintptr_t)vmx->vmcs);
vmx->vmcs_gpa = addr_gva2gpa(vm, (uintptr_t)vmx->vmcs);
/* Setup of a region of guest memory for the MSR bitmap. */
- vmx->msr = (void *)vm_vaddr_alloc(vm, getpagesize(), 0x10000, 0, 0);
+ vmx->msr = (void *)vm_vaddr_alloc_page(vm);
vmx->msr_hva = addr_gva2hva(vm, (uintptr_t)vmx->msr);
vmx->msr_gpa = addr_gva2gpa(vm, (uintptr_t)vmx->msr);
memset(vmx->msr_hva, 0, getpagesize());
/* Setup of a region of guest memory for the shadow VMCS. */
- vmx->shadow_vmcs = (void *)vm_vaddr_alloc(vm, getpagesize(), 0x10000, 0, 0);
+ vmx->shadow_vmcs = (void *)vm_vaddr_alloc_page(vm);
vmx->shadow_vmcs_hva = addr_gva2hva(vm, (uintptr_t)vmx->shadow_vmcs);
vmx->shadow_vmcs_gpa = addr_gva2gpa(vm, (uintptr_t)vmx->shadow_vmcs);
/* Setup of a region of guest memory for the VMREAD and VMWRITE bitmaps. */
- vmx->vmread = (void *)vm_vaddr_alloc(vm, getpagesize(), 0x10000, 0, 0);
+ vmx->vmread = (void *)vm_vaddr_alloc_page(vm);
vmx->vmread_hva = addr_gva2hva(vm, (uintptr_t)vmx->vmread);
vmx->vmread_gpa = addr_gva2gpa(vm, (uintptr_t)vmx->vmread);
memset(vmx->vmread_hva, 0, getpagesize());
- vmx->vmwrite = (void *)vm_vaddr_alloc(vm, getpagesize(), 0x10000, 0, 0);
+ vmx->vmwrite = (void *)vm_vaddr_alloc_page(vm);
vmx->vmwrite_hva = addr_gva2hva(vm, (uintptr_t)vmx->vmwrite);
vmx->vmwrite_gpa = addr_gva2gpa(vm, (uintptr_t)vmx->vmwrite);
memset(vmx->vmwrite_hva, 0, getpagesize());
- /* Setup of a region of guest memory for the VP Assist page. */
- vmx->vp_assist = (void *)vm_vaddr_alloc(vm, getpagesize(),
- 0x10000, 0, 0);
- vmx->vp_assist_hva = addr_gva2hva(vm, (uintptr_t)vmx->vp_assist);
- vmx->vp_assist_gpa = addr_gva2gpa(vm, (uintptr_t)vmx->vp_assist);
-
- /* Setup of a region of guest memory for the enlightened VMCS. */
- vmx->enlightened_vmcs = (void *)vm_vaddr_alloc(vm, getpagesize(),
- 0x10000, 0, 0);
- vmx->enlightened_vmcs_hva =
- addr_gva2hva(vm, (uintptr_t)vmx->enlightened_vmcs);
- vmx->enlightened_vmcs_gpa =
- addr_gva2gpa(vm, (uintptr_t)vmx->enlightened_vmcs);
-
*p_vmx_gva = vmx_gva;
return vmx;
}
@@ -176,30 +159,32 @@ bool prepare_for_vmx_operation(struct vmx_pages *vmx)
bool load_vmcs(struct vmx_pages *vmx)
{
- if (!enable_evmcs) {
- /* Load a VMCS. */
- *(uint32_t *)(vmx->vmcs) = vmcs_revision();
- if (vmclear(vmx->vmcs_gpa))
- return false;
-
- if (vmptrld(vmx->vmcs_gpa))
- return false;
-
- /* Setup shadow VMCS, do not load it yet. */
- *(uint32_t *)(vmx->shadow_vmcs) =
- vmcs_revision() | 0x80000000ul;
- if (vmclear(vmx->shadow_vmcs_gpa))
- return false;
- } else {
- if (evmcs_vmptrld(vmx->enlightened_vmcs_gpa,
- vmx->enlightened_vmcs))
- return false;
- current_evmcs->revision_id = EVMCS_VERSION;
- }
+ /* Load a VMCS. */
+ *(uint32_t *)(vmx->vmcs) = vmcs_revision();
+ if (vmclear(vmx->vmcs_gpa))
+ return false;
+
+ if (vmptrld(vmx->vmcs_gpa))
+ return false;
+
+ /* Setup shadow VMCS, do not load it yet. */
+ *(uint32_t *)(vmx->shadow_vmcs) = vmcs_revision() | 0x80000000ul;
+ if (vmclear(vmx->shadow_vmcs_gpa))
+ return false;
return true;
}
+static bool ept_vpid_cap_supported(uint64_t mask)
+{
+ return rdmsr(MSR_IA32_VMX_EPT_VPID_CAP) & mask;
+}
+
+bool ept_1g_pages_supported(void)
+{
+ return ept_vpid_cap_supported(VMX_EPT_VPID_CAP_1G_PAGES);
+}
+
/*
* Initialize the control fields to the most basic settings possible.
*/
@@ -217,7 +202,7 @@ static inline void init_vmcs_control_fields(struct vmx_pages *vmx)
struct eptPageTablePointer eptp = {
.memory_type = VMX_BASIC_MEM_TYPE_WB,
.page_walk_length = 3, /* + 1 */
- .ad_enabled = !!(rdmsr(MSR_IA32_VMX_EPT_VPID_CAP) & VMX_EPT_VPID_CAP_AD_BITS),
+ .ad_enabled = ept_vpid_cap_supported(VMX_EPT_VPID_CAP_AD_BITS),
.address = vmx->eptp_gpa >> PAGE_SHIFT_4K,
};
@@ -379,101 +364,93 @@ void prepare_vmcs(struct vmx_pages *vmx, void *guest_rip, void *guest_rsp)
init_vmcs_guest_state(guest_rip, guest_rsp);
}
-bool nested_vmx_supported(void)
+static void nested_create_pte(struct kvm_vm *vm,
+ struct eptPageTableEntry *pte,
+ uint64_t nested_paddr,
+ uint64_t paddr,
+ int current_level,
+ int target_level)
{
- struct kvm_cpuid_entry2 *entry = kvm_get_supported_cpuid_entry(1);
-
- return entry->ecx & CPUID_VMX;
-}
-
-void nested_vmx_check_supported(void)
-{
- if (!nested_vmx_supported()) {
- print_skip("nested VMX not enabled");
- exit(KSFT_SKIP);
+ if (!pte->readable) {
+ pte->writable = true;
+ pte->readable = true;
+ pte->executable = true;
+ pte->page_size = (current_level == target_level);
+ if (pte->page_size)
+ pte->address = paddr >> vm->page_shift;
+ else
+ pte->address = vm_alloc_page_table(vm) >> vm->page_shift;
+ } else {
+ /*
+ * Entry already present. Assert that the caller doesn't want
+ * a hugepage at this level, and that there isn't a hugepage at
+ * this level.
+ */
+ TEST_ASSERT(current_level != target_level,
+ "Cannot create hugepage at level: %u, nested_paddr: 0x%lx",
+ current_level, nested_paddr);
+ TEST_ASSERT(!pte->page_size,
+ "Cannot create page table at level: %u, nested_paddr: 0x%lx",
+ current_level, nested_paddr);
}
}
-void nested_pg_map(struct vmx_pages *vmx, struct kvm_vm *vm,
- uint64_t nested_paddr, uint64_t paddr, uint32_t eptp_memslot)
+
+void __nested_pg_map(struct vmx_pages *vmx, struct kvm_vm *vm,
+ uint64_t nested_paddr, uint64_t paddr, int target_level)
{
- uint16_t index[4];
- struct eptPageTableEntry *pml4e;
+ const uint64_t page_size = PG_LEVEL_SIZE(target_level);
+ struct eptPageTableEntry *pt = vmx->eptp_hva, *pte;
+ uint16_t index;
TEST_ASSERT(vm->mode == VM_MODE_PXXV48_4K, "Attempt to use "
"unknown or unsupported guest mode, mode: 0x%x", vm->mode);
- TEST_ASSERT((nested_paddr % vm->page_size) == 0,
+ TEST_ASSERT((nested_paddr >> 48) == 0,
+ "Nested physical address 0x%lx requires 5-level paging",
+ nested_paddr);
+ TEST_ASSERT((nested_paddr % page_size) == 0,
"Nested physical address not on page boundary,\n"
- " nested_paddr: 0x%lx vm->page_size: 0x%x",
- nested_paddr, vm->page_size);
+ " nested_paddr: 0x%lx page_size: 0x%lx",
+ nested_paddr, page_size);
TEST_ASSERT((nested_paddr >> vm->page_shift) <= vm->max_gfn,
"Physical address beyond beyond maximum supported,\n"
" nested_paddr: 0x%lx vm->max_gfn: 0x%lx vm->page_size: 0x%x",
paddr, vm->max_gfn, vm->page_size);
- TEST_ASSERT((paddr % vm->page_size) == 0,
+ TEST_ASSERT((paddr % page_size) == 0,
"Physical address not on page boundary,\n"
- " paddr: 0x%lx vm->page_size: 0x%x",
- paddr, vm->page_size);
+ " paddr: 0x%lx page_size: 0x%lx",
+ paddr, page_size);
TEST_ASSERT((paddr >> vm->page_shift) <= vm->max_gfn,
"Physical address beyond beyond maximum supported,\n"
" paddr: 0x%lx vm->max_gfn: 0x%lx vm->page_size: 0x%x",
paddr, vm->max_gfn, vm->page_size);
- index[0] = (nested_paddr >> 12) & 0x1ffu;
- index[1] = (nested_paddr >> 21) & 0x1ffu;
- index[2] = (nested_paddr >> 30) & 0x1ffu;
- index[3] = (nested_paddr >> 39) & 0x1ffu;
-
- /* Allocate page directory pointer table if not present. */
- pml4e = vmx->eptp_hva;
- if (!pml4e[index[3]].readable) {
- pml4e[index[3]].address = vm_phy_page_alloc(vm,
- KVM_EPT_PAGE_TABLE_MIN_PADDR, eptp_memslot)
- >> vm->page_shift;
- pml4e[index[3]].writable = true;
- pml4e[index[3]].readable = true;
- pml4e[index[3]].executable = true;
- }
+ for (int level = PG_LEVEL_512G; level >= PG_LEVEL_4K; level--) {
+ index = (nested_paddr >> PG_LEVEL_SHIFT(level)) & 0x1ffu;
+ pte = &pt[index];
- /* Allocate page directory table if not present. */
- struct eptPageTableEntry *pdpe;
- pdpe = addr_gpa2hva(vm, pml4e[index[3]].address * vm->page_size);
- if (!pdpe[index[2]].readable) {
- pdpe[index[2]].address = vm_phy_page_alloc(vm,
- KVM_EPT_PAGE_TABLE_MIN_PADDR, eptp_memslot)
- >> vm->page_shift;
- pdpe[index[2]].writable = true;
- pdpe[index[2]].readable = true;
- pdpe[index[2]].executable = true;
- }
+ nested_create_pte(vm, pte, nested_paddr, paddr, level, target_level);
- /* Allocate page table if not present. */
- struct eptPageTableEntry *pde;
- pde = addr_gpa2hva(vm, pdpe[index[2]].address * vm->page_size);
- if (!pde[index[1]].readable) {
- pde[index[1]].address = vm_phy_page_alloc(vm,
- KVM_EPT_PAGE_TABLE_MIN_PADDR, eptp_memslot)
- >> vm->page_shift;
- pde[index[1]].writable = true;
- pde[index[1]].readable = true;
- pde[index[1]].executable = true;
- }
+ if (pte->page_size)
+ break;
- /* Fill in page table entry. */
- struct eptPageTableEntry *pte;
- pte = addr_gpa2hva(vm, pde[index[1]].address * vm->page_size);
- pte[index[0]].address = paddr >> vm->page_shift;
- pte[index[0]].writable = true;
- pte[index[0]].readable = true;
- pte[index[0]].executable = true;
+ pt = addr_gpa2hva(vm, pte->address * vm->page_size);
+ }
/*
* For now mark these as accessed and dirty because the only
* testcase we have needs that. Can be reconsidered later.
*/
- pte[index[0]].accessed = true;
- pte[index[0]].dirty = true;
+ pte->accessed = true;
+ pte->dirty = true;
+
+}
+
+void nested_pg_map(struct vmx_pages *vmx, struct kvm_vm *vm,
+ uint64_t nested_paddr, uint64_t paddr)
+{
+ __nested_pg_map(vmx, vm, nested_paddr, paddr, PG_LEVEL_4K);
}
/*
@@ -484,7 +461,7 @@ void nested_pg_map(struct vmx_pages *vmx, struct kvm_vm *vm,
* nested_paddr - Nested guest physical address to map
* paddr - VM Physical Address
* size - The size of the range to map
- * eptp_memslot - Memory region slot for new virtual translation tables
+ * level - The level at which to map the range
*
* Output Args: None
*
@@ -493,28 +470,34 @@ void nested_pg_map(struct vmx_pages *vmx, struct kvm_vm *vm,
* Within the VM given by vm, creates a nested guest translation for the
* page range starting at nested_paddr to the page range starting at paddr.
*/
-void nested_map(struct vmx_pages *vmx, struct kvm_vm *vm,
- uint64_t nested_paddr, uint64_t paddr, uint64_t size,
- uint32_t eptp_memslot)
+void __nested_map(struct vmx_pages *vmx, struct kvm_vm *vm,
+ uint64_t nested_paddr, uint64_t paddr, uint64_t size,
+ int level)
{
- size_t page_size = vm->page_size;
+ size_t page_size = PG_LEVEL_SIZE(level);
size_t npages = size / page_size;
TEST_ASSERT(nested_paddr + size > nested_paddr, "Vaddr overflow");
TEST_ASSERT(paddr + size > paddr, "Paddr overflow");
while (npages--) {
- nested_pg_map(vmx, vm, nested_paddr, paddr, eptp_memslot);
+ __nested_pg_map(vmx, vm, nested_paddr, paddr, level);
nested_paddr += page_size;
paddr += page_size;
}
}
+void nested_map(struct vmx_pages *vmx, struct kvm_vm *vm,
+ uint64_t nested_paddr, uint64_t paddr, uint64_t size)
+{
+ __nested_map(vmx, vm, nested_paddr, paddr, size, PG_LEVEL_4K);
+}
+
/* Prepare an identity extended page table that maps all the
* physical pages in VM.
*/
void nested_map_memslot(struct vmx_pages *vmx, struct kvm_vm *vm,
- uint32_t memslot, uint32_t eptp_memslot)
+ uint32_t memslot)
{
sparsebit_idx_t i, last;
struct userspace_mem_region *region =
@@ -530,15 +513,42 @@ void nested_map_memslot(struct vmx_pages *vmx, struct kvm_vm *vm,
nested_map(vmx, vm,
(uint64_t)i << vm->page_shift,
(uint64_t)i << vm->page_shift,
- 1 << vm->page_shift,
- eptp_memslot);
+ 1 << vm->page_shift);
}
}
+/* Identity map a region with 1GiB Pages. */
+void nested_identity_map_1g(struct vmx_pages *vmx, struct kvm_vm *vm,
+ uint64_t addr, uint64_t size)
+{
+ __nested_map(vmx, vm, addr, addr, size, PG_LEVEL_1G);
+}
+
+bool kvm_cpu_has_ept(void)
+{
+ uint64_t ctrl;
+
+ ctrl = kvm_get_feature_msr(MSR_IA32_VMX_TRUE_PROCBASED_CTLS) >> 32;
+ if (!(ctrl & CPU_BASED_ACTIVATE_SECONDARY_CONTROLS))
+ return false;
+
+ ctrl = kvm_get_feature_msr(MSR_IA32_VMX_PROCBASED_CTLS2) >> 32;
+ return ctrl & SECONDARY_EXEC_ENABLE_EPT;
+}
+
void prepare_eptp(struct vmx_pages *vmx, struct kvm_vm *vm,
uint32_t eptp_memslot)
{
- vmx->eptp = (void *)vm_vaddr_alloc(vm, getpagesize(), 0x10000, 0, 0);
+ TEST_ASSERT(kvm_cpu_has_ept(), "KVM doesn't support nested EPT");
+
+ vmx->eptp = (void *)vm_vaddr_alloc_page(vm);
vmx->eptp_hva = addr_gva2hva(vm, (uintptr_t)vmx->eptp);
vmx->eptp_gpa = addr_gva2gpa(vm, (uintptr_t)vmx->eptp);
}
+
+void prepare_virtualize_apic_accesses(struct vmx_pages *vmx, struct kvm_vm *vm)
+{
+ vmx->apic_access = (void *)vm_vaddr_alloc_page(vm);
+ vmx->apic_access_hva = addr_gva2hva(vm, (uintptr_t)vmx->apic_access);
+ vmx->apic_access_gpa = addr_gva2gpa(vm, (uintptr_t)vmx->apic_access);
+}