aboutsummaryrefslogtreecommitdiffstatshomepage
path: root/arch/x86/virt
diff options
context:
space:
mode:
Diffstat (limited to 'arch/x86/virt')
-rw-r--r--arch/x86/virt/Makefile2
-rw-r--r--arch/x86/virt/svm/Makefile1
-rw-r--r--arch/x86/virt/svm/cmdline.c45
-rw-r--r--arch/x86/virt/svm/sev.c704
-rw-r--r--arch/x86/virt/vmx/tdx/seamcall.S3
-rw-r--r--arch/x86/virt/vmx/tdx/tdx.c522
-rw-r--r--arch/x86/virt/vmx/tdx/tdx.h84
-rw-r--r--arch/x86/virt/vmx/tdx/tdx_global_metadata.c98
8 files changed, 1249 insertions, 210 deletions
diff --git a/arch/x86/virt/Makefile b/arch/x86/virt/Makefile
index 1e36502cd738..ea343fc392dc 100644
--- a/arch/x86/virt/Makefile
+++ b/arch/x86/virt/Makefile
@@ -1,2 +1,2 @@
# SPDX-License-Identifier: GPL-2.0-only
-obj-y += vmx/
+obj-y += svm/ vmx/
diff --git a/arch/x86/virt/svm/Makefile b/arch/x86/virt/svm/Makefile
index ef2a31bdcc70..eca6d71355fa 100644
--- a/arch/x86/virt/svm/Makefile
+++ b/arch/x86/virt/svm/Makefile
@@ -1,3 +1,4 @@
# SPDX-License-Identifier: GPL-2.0
obj-$(CONFIG_KVM_AMD_SEV) += sev.o
+obj-$(CONFIG_CPU_SUP_AMD) += cmdline.o
diff --git a/arch/x86/virt/svm/cmdline.c b/arch/x86/virt/svm/cmdline.c
new file mode 100644
index 000000000000..affa2759fa20
--- /dev/null
+++ b/arch/x86/virt/svm/cmdline.c
@@ -0,0 +1,45 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * AMD SVM-SEV command line parsing support
+ *
+ * Copyright (C) 2023 - 2024 Advanced Micro Devices, Inc.
+ *
+ * Author: Michael Roth <michael.roth@amd.com>
+ */
+
+#include <linux/string.h>
+#include <linux/printk.h>
+#include <linux/cache.h>
+#include <linux/cpufeature.h>
+
+#include <asm/sev-common.h>
+
+struct sev_config sev_cfg __read_mostly;
+
+static int __init init_sev_config(char *str)
+{
+ char *s;
+
+ while ((s = strsep(&str, ","))) {
+ if (!strcmp(s, "debug")) {
+ sev_cfg.debug = true;
+ continue;
+ }
+
+ if (!strcmp(s, "nosnp")) {
+ if (!cpu_feature_enabled(X86_FEATURE_HYPERVISOR)) {
+ setup_clear_cpu_cap(X86_FEATURE_SEV_SNP);
+ cc_platform_clear(CC_ATTR_HOST_SEV_SNP);
+ continue;
+ } else {
+ goto warn;
+ }
+ }
+
+warn:
+ pr_info("SEV command-line option '%s' was not recognized\n", s);
+ }
+
+ return 1;
+}
+__setup("sev=", init_sev_config);
diff --git a/arch/x86/virt/svm/sev.c b/arch/x86/virt/svm/sev.c
index cffe1157a90a..942372e69b4d 100644
--- a/arch/x86/virt/svm/sev.c
+++ b/arch/x86/virt/svm/sev.c
@@ -18,6 +18,7 @@
#include <linux/cpumask.h>
#include <linux/iommu.h>
#include <linux/amd-iommu.h>
+#include <linux/nospec.h>
#include <asm/sev.h>
#include <asm/processor.h>
@@ -26,15 +27,35 @@
#include <asm/smp.h>
#include <asm/cpu.h>
#include <asm/apic.h>
-#include <asm/cpuid.h>
+#include <asm/cpuid/api.h>
#include <asm/cmdline.h>
#include <asm/iommu.h>
+#include <asm/msr.h>
/*
- * The RMP entry format is not architectural. The format is defined in PPR
- * Family 19h Model 01h, Rev B1 processor.
+ * The RMP entry information as returned by the RMPREAD instruction.
*/
struct rmpentry {
+ u64 gpa;
+ u8 assigned :1,
+ rsvd1 :7;
+ u8 pagesize :1,
+ hpage_region_status :1,
+ rsvd2 :6;
+ u8 immutable :1,
+ rsvd3 :7;
+ u8 rsvd4;
+ u32 asid;
+} __packed;
+
+/*
+ * The raw RMP entry format is not architectural. The format is defined in PPR
+ * Family 19h Model 01h, Rev B1 processor. This format represents the actual
+ * entry in the RMP table memory. The bitfield definitions are used for machines
+ * without the RMPREAD instruction (Zen3 and Zen4), otherwise the "hi" and "lo"
+ * fields are only used for dumping the raw data.
+ */
+struct rmpentry_raw {
union {
struct {
u64 assigned : 1,
@@ -58,12 +79,48 @@ struct rmpentry {
*/
#define RMPTABLE_CPU_BOOKKEEPING_SZ 0x4000
+/*
+ * For a non-segmented RMP table, use the maximum physical addressing as the
+ * segment size in order to always arrive at index 0 in the table.
+ */
+#define RMPTABLE_NON_SEGMENTED_SHIFT 52
+
+struct rmp_segment_desc {
+ struct rmpentry_raw *rmp_entry;
+ u64 max_index;
+ u64 size;
+};
+
+/*
+ * Segmented RMP Table support.
+ * - The segment size is used for two purposes:
+ * - Identify the amount of memory covered by an RMP segment
+ * - Quickly locate an RMP segment table entry for a physical address
+ *
+ * - The RMP segment table contains pointers to an RMP table that covers
+ * a specific portion of memory. There can be up to 512 8-byte entries,
+ * one pages worth.
+ */
+#define RST_ENTRY_MAPPED_SIZE(x) ((x) & GENMASK_ULL(19, 0))
+#define RST_ENTRY_SEGMENT_BASE(x) ((x) & GENMASK_ULL(51, 20))
+
+#define RST_SIZE SZ_4K
+static struct rmp_segment_desc **rmp_segment_table __ro_after_init;
+static unsigned int rst_max_index __ro_after_init = 512;
+
+static unsigned int rmp_segment_shift;
+static u64 rmp_segment_size;
+static u64 rmp_segment_mask;
+
+#define RST_ENTRY_INDEX(x) ((x) >> rmp_segment_shift)
+#define RMP_ENTRY_INDEX(x) ((u64)(PHYS_PFN((x) & rmp_segment_mask)))
+
+static u64 rmp_cfg;
+
/* Mask to apply to a PFN to get the first PFN of a 2MB page */
#define PFN_PMD_MASK GENMASK_ULL(63, PMD_SHIFT - PAGE_SHIFT)
static u64 probed_rmp_base, probed_rmp_size;
-static struct rmpentry *rmptable __ro_after_init;
-static u64 rmptable_max_pfn __ro_after_init;
static LIST_HEAD(snp_leaked_pages_list);
static DEFINE_SPINLOCK(snp_leaked_pages_list_lock);
@@ -77,14 +134,14 @@ static int __mfd_enable(unsigned int cpu)
{
u64 val;
- if (!cpu_feature_enabled(X86_FEATURE_SEV_SNP))
+ if (!cc_platform_has(CC_ATTR_HOST_SEV_SNP))
return 0;
- rdmsrl(MSR_AMD64_SYSCFG, val);
+ rdmsrq(MSR_AMD64_SYSCFG, val);
val |= MSR_AMD64_SYSCFG_MFDM;
- wrmsrl(MSR_AMD64_SYSCFG, val);
+ wrmsrq(MSR_AMD64_SYSCFG, val);
return 0;
}
@@ -98,15 +155,15 @@ static int __snp_enable(unsigned int cpu)
{
u64 val;
- if (!cpu_feature_enabled(X86_FEATURE_SEV_SNP))
+ if (!cc_platform_has(CC_ATTR_HOST_SEV_SNP))
return 0;
- rdmsrl(MSR_AMD64_SYSCFG, val);
+ rdmsrq(MSR_AMD64_SYSCFG, val);
val |= MSR_AMD64_SYSCFG_SNP_EN;
val |= MSR_AMD64_SYSCFG_SNP_VMPL_EN;
- wrmsrl(MSR_AMD64_SYSCFG, val);
+ wrmsrq(MSR_AMD64_SYSCFG, val);
return 0;
}
@@ -116,88 +173,376 @@ static __init void snp_enable(void *arg)
__snp_enable(smp_processor_id());
}
-#define RMP_ADDR_MASK GENMASK_ULL(51, 13)
+static void __init __snp_fixup_e820_tables(u64 pa)
+{
+ if (IS_ALIGNED(pa, PMD_SIZE))
+ return;
-bool snp_probe_rmptable_info(void)
+ /*
+ * Handle cases where the RMP table placement by the BIOS is not
+ * 2M aligned and the kexec kernel could try to allocate
+ * from within that chunk which then causes a fatal RMP fault.
+ *
+ * The e820_table needs to be updated as it is converted to
+ * kernel memory resources and used by KEXEC_FILE_LOAD syscall
+ * to load kexec segments.
+ *
+ * The e820_table_firmware needs to be updated as it is exposed
+ * to sysfs and used by the KEXEC_LOAD syscall to load kexec
+ * segments.
+ *
+ * The e820_table_kexec needs to be updated as it passed to
+ * the kexec-ed kernel.
+ */
+ pa = ALIGN_DOWN(pa, PMD_SIZE);
+ if (e820__mapped_any(pa, pa + PMD_SIZE, E820_TYPE_RAM)) {
+ pr_info("Reserving start/end of RMP table on a 2MB boundary [0x%016llx]\n", pa);
+ e820__range_update(pa, PMD_SIZE, E820_TYPE_RAM, E820_TYPE_RESERVED);
+ e820__range_update_table(e820_table_kexec, pa, PMD_SIZE, E820_TYPE_RAM, E820_TYPE_RESERVED);
+ if (!memblock_is_region_reserved(pa, PMD_SIZE))
+ memblock_reserve(pa, PMD_SIZE);
+ }
+}
+
+static void __init fixup_e820_tables_for_segmented_rmp(void)
{
- u64 max_rmp_pfn, calc_rmp_sz, rmp_sz, rmp_base, rmp_end;
+ u64 pa, *rst, size, mapped_size;
+ unsigned int i;
- rdmsrl(MSR_AMD64_RMP_BASE, rmp_base);
- rdmsrl(MSR_AMD64_RMP_END, rmp_end);
+ __snp_fixup_e820_tables(probed_rmp_base);
- if (!(rmp_base & RMP_ADDR_MASK) || !(rmp_end & RMP_ADDR_MASK)) {
- pr_err("Memory for the RMP table has not been reserved by BIOS\n");
+ pa = probed_rmp_base + RMPTABLE_CPU_BOOKKEEPING_SZ;
+
+ __snp_fixup_e820_tables(pa + RST_SIZE);
+
+ rst = early_memremap(pa, RST_SIZE);
+ if (!rst)
+ return;
+
+ for (i = 0; i < rst_max_index; i++) {
+ pa = RST_ENTRY_SEGMENT_BASE(rst[i]);
+ mapped_size = RST_ENTRY_MAPPED_SIZE(rst[i]);
+ if (!mapped_size)
+ continue;
+
+ __snp_fixup_e820_tables(pa);
+
+ /*
+ * Mapped size in GB. Mapped size is allowed to exceed
+ * the segment coverage size, but gets reduced to the
+ * segment coverage size.
+ */
+ mapped_size <<= 30;
+ if (mapped_size > rmp_segment_size)
+ mapped_size = rmp_segment_size;
+
+ /* Calculate the RMP segment size (16 bytes/page mapped) */
+ size = PHYS_PFN(mapped_size) << 4;
+
+ __snp_fixup_e820_tables(pa + size);
+ }
+
+ early_memunmap(rst, RST_SIZE);
+}
+
+static void __init fixup_e820_tables_for_contiguous_rmp(void)
+{
+ __snp_fixup_e820_tables(probed_rmp_base);
+ __snp_fixup_e820_tables(probed_rmp_base + probed_rmp_size);
+}
+
+void __init snp_fixup_e820_tables(void)
+{
+ if (rmp_cfg & MSR_AMD64_SEG_RMP_ENABLED) {
+ fixup_e820_tables_for_segmented_rmp();
+ } else {
+ fixup_e820_tables_for_contiguous_rmp();
+ }
+}
+
+static bool __init clear_rmptable_bookkeeping(void)
+{
+ void *bk;
+
+ bk = memremap(probed_rmp_base, RMPTABLE_CPU_BOOKKEEPING_SZ, MEMREMAP_WB);
+ if (!bk) {
+ pr_err("Failed to map RMP bookkeeping area\n");
return false;
}
- if (rmp_base > rmp_end) {
- pr_err("RMP configuration not valid: base=%#llx, end=%#llx\n", rmp_base, rmp_end);
+ memset(bk, 0, RMPTABLE_CPU_BOOKKEEPING_SZ);
+
+ memunmap(bk);
+
+ return true;
+}
+
+static bool __init alloc_rmp_segment_desc(u64 segment_pa, u64 segment_size, u64 pa)
+{
+ u64 rst_index, rmp_segment_size_max;
+ struct rmp_segment_desc *desc;
+ void *rmp_segment;
+
+ /* Calculate the maximum size an RMP can be (16 bytes/page mapped) */
+ rmp_segment_size_max = PHYS_PFN(rmp_segment_size) << 4;
+
+ /* Validate the RMP segment size */
+ if (segment_size > rmp_segment_size_max) {
+ pr_err("Invalid RMP size 0x%llx for configured segment size 0x%llx\n",
+ segment_size, rmp_segment_size_max);
return false;
}
- rmp_sz = rmp_end - rmp_base + 1;
+ /* Validate the RMP segment table index */
+ rst_index = RST_ENTRY_INDEX(pa);
+ if (rst_index >= rst_max_index) {
+ pr_err("Invalid RMP segment base address 0x%llx for configured segment size 0x%llx\n",
+ pa, rmp_segment_size);
+ return false;
+ }
+
+ if (rmp_segment_table[rst_index]) {
+ pr_err("RMP segment descriptor already exists at index %llu\n", rst_index);
+ return false;
+ }
+
+ rmp_segment = memremap(segment_pa, segment_size, MEMREMAP_WB);
+ if (!rmp_segment) {
+ pr_err("Failed to map RMP segment addr 0x%llx size 0x%llx\n",
+ segment_pa, segment_size);
+ return false;
+ }
+
+ desc = kzalloc(sizeof(*desc), GFP_KERNEL);
+ if (!desc) {
+ memunmap(rmp_segment);
+ return false;
+ }
+
+ desc->rmp_entry = rmp_segment;
+ desc->max_index = segment_size / sizeof(*desc->rmp_entry);
+ desc->size = segment_size;
+
+ rmp_segment_table[rst_index] = desc;
+
+ return true;
+}
+
+static void __init free_rmp_segment_table(void)
+{
+ unsigned int i;
+
+ for (i = 0; i < rst_max_index; i++) {
+ struct rmp_segment_desc *desc;
+
+ desc = rmp_segment_table[i];
+ if (!desc)
+ continue;
+
+ memunmap(desc->rmp_entry);
+
+ kfree(desc);
+ }
+
+ free_page((unsigned long)rmp_segment_table);
+
+ rmp_segment_table = NULL;
+}
+
+/* Allocate the table used to index into the RMP segments */
+static bool __init alloc_rmp_segment_table(void)
+{
+ struct page *page;
+
+ page = alloc_page(__GFP_ZERO);
+ if (!page)
+ return false;
+
+ rmp_segment_table = page_address(page);
+
+ return true;
+}
+
+static bool __init setup_contiguous_rmptable(void)
+{
+ u64 max_rmp_pfn, calc_rmp_sz, rmptable_segment, rmptable_size, rmp_end;
+
+ if (!probed_rmp_size)
+ return false;
+
+ rmp_end = probed_rmp_base + probed_rmp_size - 1;
/*
- * Calculate the amount the memory that must be reserved by the BIOS to
+ * Calculate the amount of memory that must be reserved by the BIOS to
* address the whole RAM, including the bookkeeping area. The RMP itself
* must also be covered.
*/
max_rmp_pfn = max_pfn;
- if (PHYS_PFN(rmp_end) > max_pfn)
- max_rmp_pfn = PHYS_PFN(rmp_end);
+ if (PFN_UP(rmp_end) > max_pfn)
+ max_rmp_pfn = PFN_UP(rmp_end);
calc_rmp_sz = (max_rmp_pfn << 4) + RMPTABLE_CPU_BOOKKEEPING_SZ;
-
- if (calc_rmp_sz > rmp_sz) {
+ if (calc_rmp_sz > probed_rmp_size) {
pr_err("Memory reserved for the RMP table does not cover full system RAM (expected 0x%llx got 0x%llx)\n",
- calc_rmp_sz, rmp_sz);
+ calc_rmp_sz, probed_rmp_size);
return false;
}
- probed_rmp_base = rmp_base;
- probed_rmp_size = rmp_sz;
+ if (!alloc_rmp_segment_table())
+ return false;
- pr_info("RMP table physical range [0x%016llx - 0x%016llx]\n",
- probed_rmp_base, probed_rmp_base + probed_rmp_size - 1);
+ /* Map only the RMP entries */
+ rmptable_segment = probed_rmp_base + RMPTABLE_CPU_BOOKKEEPING_SZ;
+ rmptable_size = probed_rmp_size - RMPTABLE_CPU_BOOKKEEPING_SZ;
+
+ if (!alloc_rmp_segment_desc(rmptable_segment, rmptable_size, 0)) {
+ free_rmp_segment_table();
+ return false;
+ }
return true;
}
+static bool __init setup_segmented_rmptable(void)
+{
+ u64 rst_pa, *rst, pa, ram_pa_end, ram_pa_max;
+ unsigned int i, max_index;
+
+ if (!probed_rmp_base)
+ return false;
+
+ if (!alloc_rmp_segment_table())
+ return false;
+
+ rst_pa = probed_rmp_base + RMPTABLE_CPU_BOOKKEEPING_SZ;
+ rst = memremap(rst_pa, RST_SIZE, MEMREMAP_WB);
+ if (!rst) {
+ pr_err("Failed to map RMP segment table addr 0x%llx\n", rst_pa);
+ goto e_free;
+ }
+
+ pr_info("Segmented RMP using %lluGB segments\n", rmp_segment_size >> 30);
+
+ ram_pa_max = max_pfn << PAGE_SHIFT;
+
+ max_index = 0;
+ ram_pa_end = 0;
+ for (i = 0; i < rst_max_index; i++) {
+ u64 rmp_segment, rmp_size, mapped_size;
+
+ mapped_size = RST_ENTRY_MAPPED_SIZE(rst[i]);
+ if (!mapped_size)
+ continue;
+
+ max_index = i;
+
+ /*
+ * Mapped size in GB. Mapped size is allowed to exceed the
+ * segment coverage size, but gets reduced to the segment
+ * coverage size.
+ */
+ mapped_size <<= 30;
+ if (mapped_size > rmp_segment_size) {
+ pr_info("RMP segment %u mapped size (0x%llx) reduced to 0x%llx\n",
+ i, mapped_size, rmp_segment_size);
+ mapped_size = rmp_segment_size;
+ }
+
+ rmp_segment = RST_ENTRY_SEGMENT_BASE(rst[i]);
+
+ /* Calculate the RMP segment size (16 bytes/page mapped) */
+ rmp_size = PHYS_PFN(mapped_size) << 4;
+
+ pa = (u64)i << rmp_segment_shift;
+
+ /*
+ * Some segments may be for MMIO mapped above system RAM. These
+ * segments are used for Trusted I/O.
+ */
+ if (pa < ram_pa_max)
+ ram_pa_end = pa + mapped_size;
+
+ if (!alloc_rmp_segment_desc(rmp_segment, rmp_size, pa))
+ goto e_unmap;
+
+ pr_info("RMP segment %u physical address [0x%llx - 0x%llx] covering [0x%llx - 0x%llx]\n",
+ i, rmp_segment, rmp_segment + rmp_size - 1, pa, pa + mapped_size - 1);
+ }
+
+ if (ram_pa_max > ram_pa_end) {
+ pr_err("Segmented RMP does not cover full system RAM (expected 0x%llx got 0x%llx)\n",
+ ram_pa_max, ram_pa_end);
+ goto e_unmap;
+ }
+
+ /* Adjust the maximum index based on the found segments */
+ rst_max_index = max_index + 1;
+
+ memunmap(rst);
+
+ return true;
+
+e_unmap:
+ memunmap(rst);
+
+e_free:
+ free_rmp_segment_table();
+
+ return false;
+}
+
+static bool __init setup_rmptable(void)
+{
+ if (rmp_cfg & MSR_AMD64_SEG_RMP_ENABLED) {
+ return setup_segmented_rmptable();
+ } else {
+ return setup_contiguous_rmptable();
+ }
+}
+
/*
* Do the necessary preparations which are verified by the firmware as
* described in the SNP_INIT_EX firmware command description in the SNP
* firmware ABI spec.
*/
-static int __init snp_rmptable_init(void)
+int __init snp_rmptable_init(void)
{
- void *rmptable_start;
- u64 rmptable_size;
+ unsigned int i;
u64 val;
- if (!cpu_feature_enabled(X86_FEATURE_SEV_SNP))
- return 0;
-
- if (!amd_iommu_snp_en)
- return 0;
+ if (WARN_ON_ONCE(!cc_platform_has(CC_ATTR_HOST_SEV_SNP)))
+ return -ENOSYS;
- if (!probed_rmp_size)
- goto nosnp;
+ if (WARN_ON_ONCE(!amd_iommu_snp_en))
+ return -ENOSYS;
- rmptable_start = memremap(probed_rmp_base, probed_rmp_size, MEMREMAP_WB);
- if (!rmptable_start) {
- pr_err("Failed to map RMP table\n");
- return 1;
- }
+ if (!setup_rmptable())
+ return -ENOSYS;
/*
* Check if SEV-SNP is already enabled, this can happen in case of
* kexec boot.
*/
- rdmsrl(MSR_AMD64_SYSCFG, val);
+ rdmsrq(MSR_AMD64_SYSCFG, val);
if (val & MSR_AMD64_SYSCFG_SNP_EN)
goto skip_enable;
- memset(rmptable_start, 0, probed_rmp_size);
+ /* Zero out the RMP bookkeeping area */
+ if (!clear_rmptable_bookkeeping()) {
+ free_rmp_segment_table();
+ return -ENOSYS;
+ }
+
+ /* Zero out the RMP entries */
+ for (i = 0; i < rst_max_index; i++) {
+ struct rmp_segment_desc *desc;
+
+ desc = rmp_segment_table[i];
+ if (!desc)
+ continue;
+
+ memset(desc->rmp_entry, 0, desc->size);
+ }
/* Flush the caches to ensure that data is written before SNP is enabled. */
wbinvd_on_all_cpus();
@@ -208,12 +553,6 @@ static int __init snp_rmptable_init(void)
on_each_cpu(snp_enable, NULL, 1);
skip_enable:
- rmptable_start += RMPTABLE_CPU_BOOKKEEPING_SZ;
- rmptable_size = probed_rmp_size - RMPTABLE_CPU_BOOKKEEPING_SZ;
-
- rmptable = (struct rmpentry *)rmptable_start;
- rmptable_max_pfn = rmptable_size / sizeof(struct rmpentry) - 1;
-
cpuhp_setup_state(CPUHP_AP_ONLINE_DYN, "x86/rmptable_init:online", __snp_enable, NULL);
/*
@@ -223,59 +562,214 @@ skip_enable:
crash_kexec_post_notifiers = true;
return 0;
+}
+
+static void set_rmp_segment_info(unsigned int segment_shift)
+{
+ rmp_segment_shift = segment_shift;
+ rmp_segment_size = 1ULL << rmp_segment_shift;
+ rmp_segment_mask = rmp_segment_size - 1;
+}
+
+#define RMP_ADDR_MASK GENMASK_ULL(51, 13)
+
+static bool probe_contiguous_rmptable_info(void)
+{
+ u64 rmp_sz, rmp_base, rmp_end;
+
+ rdmsrq(MSR_AMD64_RMP_BASE, rmp_base);
+ rdmsrq(MSR_AMD64_RMP_END, rmp_end);
+
+ if (!(rmp_base & RMP_ADDR_MASK) || !(rmp_end & RMP_ADDR_MASK)) {
+ pr_err("Memory for the RMP table has not been reserved by BIOS\n");
+ return false;
+ }
+
+ if (rmp_base > rmp_end) {
+ pr_err("RMP configuration not valid: base=%#llx, end=%#llx\n", rmp_base, rmp_end);
+ return false;
+ }
+
+ rmp_sz = rmp_end - rmp_base + 1;
+
+ /* Treat the contiguous RMP table as a single segment */
+ rst_max_index = 1;
+
+ set_rmp_segment_info(RMPTABLE_NON_SEGMENTED_SHIFT);
+
+ probed_rmp_base = rmp_base;
+ probed_rmp_size = rmp_sz;
+
+ pr_info("RMP table physical range [0x%016llx - 0x%016llx]\n",
+ rmp_base, rmp_end);
+
+ return true;
+}
-nosnp:
- setup_clear_cpu_cap(X86_FEATURE_SEV_SNP);
- return -ENOSYS;
+static bool probe_segmented_rmptable_info(void)
+{
+ unsigned int eax, ebx, segment_shift, segment_shift_min, segment_shift_max;
+ u64 rmp_base, rmp_end;
+
+ rdmsrq(MSR_AMD64_RMP_BASE, rmp_base);
+ if (!(rmp_base & RMP_ADDR_MASK)) {
+ pr_err("Memory for the RMP table has not been reserved by BIOS\n");
+ return false;
+ }
+
+ rdmsrq(MSR_AMD64_RMP_END, rmp_end);
+ WARN_ONCE(rmp_end & RMP_ADDR_MASK,
+ "Segmented RMP enabled but RMP_END MSR is non-zero\n");
+
+ /* Obtain the min and max supported RMP segment size */
+ eax = cpuid_eax(0x80000025);
+ segment_shift_min = eax & GENMASK(5, 0);
+ segment_shift_max = (eax & GENMASK(11, 6)) >> 6;
+
+ /* Verify the segment size is within the supported limits */
+ segment_shift = MSR_AMD64_RMP_SEGMENT_SHIFT(rmp_cfg);
+ if (segment_shift > segment_shift_max || segment_shift < segment_shift_min) {
+ pr_err("RMP segment size (%u) is not within advertised bounds (min=%u, max=%u)\n",
+ segment_shift, segment_shift_min, segment_shift_max);
+ return false;
+ }
+
+ /* Override the max supported RST index if a hardware limit exists */
+ ebx = cpuid_ebx(0x80000025);
+ if (ebx & BIT(10))
+ rst_max_index = ebx & GENMASK(9, 0);
+
+ set_rmp_segment_info(segment_shift);
+
+ probed_rmp_base = rmp_base;
+ probed_rmp_size = 0;
+
+ pr_info("Segmented RMP base table physical range [0x%016llx - 0x%016llx]\n",
+ rmp_base, rmp_base + RMPTABLE_CPU_BOOKKEEPING_SZ + RST_SIZE);
+
+ return true;
+}
+
+bool snp_probe_rmptable_info(void)
+{
+ if (cpu_feature_enabled(X86_FEATURE_SEGMENTED_RMP))
+ rdmsrq(MSR_AMD64_RMP_CFG, rmp_cfg);
+
+ if (rmp_cfg & MSR_AMD64_SEG_RMP_ENABLED)
+ return probe_segmented_rmptable_info();
+ else
+ return probe_contiguous_rmptable_info();
}
/*
- * This must be called after the IOMMU has been initialized.
+ * About the array_index_nospec() usage below:
+ *
+ * This function can get called by exported functions like
+ * snp_lookup_rmpentry(), which is used by the KVM #PF handler, among
+ * others, and since the @pfn passed in cannot always be trusted,
+ * speculation should be stopped as a protective measure.
*/
-device_initcall(snp_rmptable_init);
-
-static struct rmpentry *get_rmpentry(u64 pfn)
+static struct rmpentry_raw *get_raw_rmpentry(u64 pfn)
{
- if (WARN_ON_ONCE(pfn > rmptable_max_pfn))
+ u64 paddr, rst_index, segment_index;
+ struct rmp_segment_desc *desc;
+
+ if (!rmp_segment_table)
+ return ERR_PTR(-ENODEV);
+
+ paddr = pfn << PAGE_SHIFT;
+
+ rst_index = RST_ENTRY_INDEX(paddr);
+ if (unlikely(rst_index >= rst_max_index))
+ return ERR_PTR(-EFAULT);
+
+ rst_index = array_index_nospec(rst_index, rst_max_index);
+
+ desc = rmp_segment_table[rst_index];
+ if (unlikely(!desc))
+ return ERR_PTR(-EFAULT);
+
+ segment_index = RMP_ENTRY_INDEX(paddr);
+ if (unlikely(segment_index >= desc->max_index))
return ERR_PTR(-EFAULT);
- return &rmptable[pfn];
+ segment_index = array_index_nospec(segment_index, desc->max_index);
+
+ return desc->rmp_entry + segment_index;
}
-static struct rmpentry *__snp_lookup_rmpentry(u64 pfn, int *level)
+static int get_rmpentry(u64 pfn, struct rmpentry *e)
{
- struct rmpentry *large_entry, *entry;
+ struct rmpentry_raw *e_raw;
- if (!cpu_feature_enabled(X86_FEATURE_SEV_SNP))
- return ERR_PTR(-ENODEV);
+ if (cpu_feature_enabled(X86_FEATURE_RMPREAD)) {
+ int ret;
+
+ /* Binutils version 2.44 supports the RMPREAD mnemonic. */
+ asm volatile(".byte 0xf2, 0x0f, 0x01, 0xfd"
+ : "=a" (ret)
+ : "a" (pfn << PAGE_SHIFT), "c" (e)
+ : "memory", "cc");
+
+ return ret;
+ }
+
+ e_raw = get_raw_rmpentry(pfn);
+ if (IS_ERR(e_raw))
+ return PTR_ERR(e_raw);
+
+ /*
+ * Map the raw RMP table entry onto the RMPREAD output format.
+ * The 2MB region status indicator (hpage_region_status field) is not
+ * calculated, since the overhead could be significant and the field
+ * is not used.
+ */
+ memset(e, 0, sizeof(*e));
+ e->gpa = e_raw->gpa << PAGE_SHIFT;
+ e->asid = e_raw->asid;
+ e->assigned = e_raw->assigned;
+ e->pagesize = e_raw->pagesize;
+ e->immutable = e_raw->immutable;
+
+ return 0;
+}
+
+static int __snp_lookup_rmpentry(u64 pfn, struct rmpentry *e, int *level)
+{
+ struct rmpentry e_large;
+ int ret;
+
+ if (!cc_platform_has(CC_ATTR_HOST_SEV_SNP))
+ return -ENODEV;
- entry = get_rmpentry(pfn);
- if (IS_ERR(entry))
- return entry;
+ ret = get_rmpentry(pfn, e);
+ if (ret)
+ return ret;
/*
* Find the authoritative RMP entry for a PFN. This can be either a 4K
* RMP entry or a special large RMP entry that is authoritative for a
* whole 2M area.
*/
- large_entry = get_rmpentry(pfn & PFN_PMD_MASK);
- if (IS_ERR(large_entry))
- return large_entry;
+ ret = get_rmpentry(pfn & PFN_PMD_MASK, &e_large);
+ if (ret)
+ return ret;
- *level = RMP_TO_PG_LEVEL(large_entry->pagesize);
+ *level = RMP_TO_PG_LEVEL(e_large.pagesize);
- return entry;
+ return 0;
}
int snp_lookup_rmpentry(u64 pfn, bool *assigned, int *level)
{
- struct rmpentry *e;
+ struct rmpentry e;
+ int ret;
- e = __snp_lookup_rmpentry(pfn, level);
- if (IS_ERR(e))
- return PTR_ERR(e);
+ ret = __snp_lookup_rmpentry(pfn, &e, level);
+ if (ret)
+ return ret;
- *assigned = !!e->assigned;
+ *assigned = !!e.assigned;
return 0;
}
EXPORT_SYMBOL_GPL(snp_lookup_rmpentry);
@@ -288,20 +782,28 @@ EXPORT_SYMBOL_GPL(snp_lookup_rmpentry);
*/
static void dump_rmpentry(u64 pfn)
{
+ struct rmpentry_raw *e_raw;
u64 pfn_i, pfn_end;
- struct rmpentry *e;
- int level;
+ struct rmpentry e;
+ int level, ret;
- e = __snp_lookup_rmpentry(pfn, &level);
- if (IS_ERR(e)) {
- pr_err("Failed to read RMP entry for PFN 0x%llx, error %ld\n",
- pfn, PTR_ERR(e));
+ ret = __snp_lookup_rmpentry(pfn, &e, &level);
+ if (ret) {
+ pr_err("Failed to read RMP entry for PFN 0x%llx, error %d\n",
+ pfn, ret);
return;
}
- if (e->assigned) {
+ if (e.assigned) {
+ e_raw = get_raw_rmpentry(pfn);
+ if (IS_ERR(e_raw)) {
+ pr_err("Failed to read RMP contents for PFN 0x%llx, error %ld\n",
+ pfn, PTR_ERR(e_raw));
+ return;
+ }
+
pr_info("PFN 0x%llx, RMP entry: [0x%016llx - 0x%016llx]\n",
- pfn, e->lo, e->hi);
+ pfn, e_raw->lo, e_raw->hi);
return;
}
@@ -320,16 +822,16 @@ static void dump_rmpentry(u64 pfn)
pfn, pfn_i, pfn_end);
while (pfn_i < pfn_end) {
- e = __snp_lookup_rmpentry(pfn_i, &level);
- if (IS_ERR(e)) {
- pr_err("Error %ld reading RMP entry for PFN 0x%llx\n",
- PTR_ERR(e), pfn_i);
+ e_raw = get_raw_rmpentry(pfn_i);
+ if (IS_ERR(e_raw)) {
+ pr_err("Error %ld reading RMP contents for PFN 0x%llx\n",
+ PTR_ERR(e_raw), pfn_i);
pfn_i++;
continue;
}
- if (e->lo || e->hi)
- pr_info("PFN: 0x%llx, [0x%016llx - 0x%016llx]\n", pfn_i, e->lo, e->hi);
+ if (e_raw->lo || e_raw->hi)
+ pr_info("PFN: 0x%llx, [0x%016llx - 0x%016llx]\n", pfn_i, e_raw->lo, e_raw->hi);
pfn_i++;
}
}
@@ -363,7 +865,7 @@ int psmash(u64 pfn)
unsigned long paddr = pfn << PAGE_SHIFT;
int ret;
- if (!cpu_feature_enabled(X86_FEATURE_SEV_SNP))
+ if (!cc_platform_has(CC_ATTR_HOST_SEV_SNP))
return -ENODEV;
if (!pfn_valid(pfn))
@@ -472,7 +974,7 @@ static int rmpupdate(u64 pfn, struct rmp_state *state)
unsigned long paddr = pfn << PAGE_SHIFT;
int ret, level;
- if (!cpu_feature_enabled(X86_FEATURE_SEV_SNP))
+ if (!cc_platform_has(CC_ATTR_HOST_SEV_SNP))
return -ENODEV;
level = RMP_TO_PG_LEVEL(state->pagesize);
@@ -558,3 +1060,13 @@ void snp_leak_pages(u64 pfn, unsigned int npages)
spin_unlock(&snp_leaked_pages_list_lock);
}
EXPORT_SYMBOL_GPL(snp_leak_pages);
+
+void kdump_sev_callback(void)
+{
+ /*
+ * Do wbinvd() on remote CPUs when SNP is enabled in order to
+ * safely do SNP_SHUTDOWN on the local CPU.
+ */
+ if (cc_platform_has(CC_ATTR_HOST_SEV_SNP))
+ wbinvd();
+}
diff --git a/arch/x86/virt/vmx/tdx/seamcall.S b/arch/x86/virt/vmx/tdx/seamcall.S
index 5b1f2286aea9..6854c52c374b 100644
--- a/arch/x86/virt/vmx/tdx/seamcall.S
+++ b/arch/x86/virt/vmx/tdx/seamcall.S
@@ -41,6 +41,9 @@ SYM_FUNC_START(__seamcall_ret)
TDX_MODULE_CALL host=1 ret=1
SYM_FUNC_END(__seamcall_ret)
+/* KVM requires non-instrumentable __seamcall_saved_ret() for TDH.VP.ENTER */
+.section .noinstr.text, "ax"
+
/*
* __seamcall_saved_ret() - Host-side interface functions to SEAM software
* (the P-SEAMLDR or the TDX module), with saving output registers to the
diff --git a/arch/x86/virt/vmx/tdx/tdx.c b/arch/x86/virt/vmx/tdx/tdx.c
index 4d6826a76f78..c7a9a087ccaf 100644
--- a/arch/x86/virt/vmx/tdx/tdx.c
+++ b/arch/x86/virt/vmx/tdx/tdx.c
@@ -5,6 +5,7 @@
* Intel Trusted Domain Extensions (TDX) support
*/
+#include "asm/page_types.h"
#define pr_fmt(fmt) "virt/tdx: " fmt
#include <linux/types.h>
@@ -27,14 +28,14 @@
#include <linux/log2.h>
#include <linux/acpi.h>
#include <linux/suspend.h>
-#include <linux/acpi.h>
+#include <linux/idr.h>
#include <asm/page.h>
#include <asm/special_insns.h>
#include <asm/msr-index.h>
#include <asm/msr.h>
#include <asm/cpufeature.h>
#include <asm/tdx.h>
-#include <asm/intel-family.h>
+#include <asm/cpu_device_id.h>
#include <asm/processor.h>
#include <asm/mce.h>
#include "tdx.h"
@@ -43,6 +44,8 @@ static u32 tdx_global_keyid __ro_after_init;
static u32 tdx_guest_keyid_start __ro_after_init;
static u32 tdx_nr_guest_keyids __ro_after_init;
+static DEFINE_IDA(tdx_guest_keyid_pool);
+
static DEFINE_PER_CPU(bool, tdx_lp_initialized);
static struct tdmr_info_list tdx_tdmr_list;
@@ -53,6 +56,8 @@ static DEFINE_MUTEX(tdx_module_lock);
/* All TDX-usable memory regions. Protected by mem_hotplug_lock. */
static LIST_HEAD(tdx_memlist);
+static struct tdx_sys_info tdx_sysinfo;
+
typedef void (*sc_err_func_t)(u64 fn, u64 err, struct tdx_module_args *args);
static inline void seamcall_err(u64 fn, u64 err, struct tdx_module_args *args)
@@ -70,8 +75,9 @@ static inline void seamcall_err_ret(u64 fn, u64 err,
args->r9, args->r10, args->r11);
}
-static inline int sc_retry_prerr(sc_func_t func, sc_err_func_t err_func,
- u64 fn, struct tdx_module_args *args)
+static __always_inline int sc_retry_prerr(sc_func_t func,
+ sc_err_func_t err_func,
+ u64 fn, struct tdx_module_args *args)
{
u64 sret = sc_retry(func, fn, args);
@@ -271,57 +277,15 @@ static int read_sys_metadata_field(u64 field_id, u64 *data)
return 0;
}
-static int read_sys_metadata_field16(u64 field_id,
- int offset,
- struct tdx_tdmr_sysinfo *ts)
-{
- u16 *ts_member = ((void *)ts) + offset;
- u64 tmp;
- int ret;
-
- if (WARN_ON_ONCE(MD_FIELD_ID_ELE_SIZE_CODE(field_id) !=
- MD_FIELD_ID_ELE_SIZE_16BIT))
- return -EINVAL;
-
- ret = read_sys_metadata_field(field_id, &tmp);
- if (ret)
- return ret;
-
- *ts_member = tmp;
-
- return 0;
-}
-
-struct field_mapping {
- u64 field_id;
- int offset;
-};
+#include "tdx_global_metadata.c"
-#define TD_SYSINFO_MAP(_field_id, _offset) \
- { .field_id = MD_FIELD_ID_##_field_id, \
- .offset = offsetof(struct tdx_tdmr_sysinfo, _offset) }
-
-/* Map TD_SYSINFO fields into 'struct tdx_tdmr_sysinfo': */
-static const struct field_mapping fields[] = {
- TD_SYSINFO_MAP(MAX_TDMRS, max_tdmrs),
- TD_SYSINFO_MAP(MAX_RESERVED_PER_TDMR, max_reserved_per_tdmr),
- TD_SYSINFO_MAP(PAMT_4K_ENTRY_SIZE, pamt_entry_size[TDX_PS_4K]),
- TD_SYSINFO_MAP(PAMT_2M_ENTRY_SIZE, pamt_entry_size[TDX_PS_2M]),
- TD_SYSINFO_MAP(PAMT_1G_ENTRY_SIZE, pamt_entry_size[TDX_PS_1G]),
-};
-
-static int get_tdx_tdmr_sysinfo(struct tdx_tdmr_sysinfo *tdmr_sysinfo)
+static int check_features(struct tdx_sys_info *sysinfo)
{
- int ret;
- int i;
+ u64 tdx_features0 = sysinfo->features.tdx_features0;
- /* Populate 'tdmr_sysinfo' fields using the mapping structure above: */
- for (i = 0; i < ARRAY_SIZE(fields); i++) {
- ret = read_sys_metadata_field16(fields[i].field_id,
- fields[i].offset,
- tdmr_sysinfo);
- if (ret)
- return ret;
+ if (!(tdx_features0 & TDX_FEATURES0_NO_RBP_MOD)) {
+ pr_err("frame pointer (RBP) clobber bug present, upgrade TDX module\n");
+ return -EINVAL;
}
return 0;
@@ -343,13 +307,13 @@ static int tdmr_size_single(u16 max_reserved_per_tdmr)
}
static int alloc_tdmr_list(struct tdmr_info_list *tdmr_list,
- struct tdx_tdmr_sysinfo *tdmr_sysinfo)
+ struct tdx_sys_info_tdmr *sysinfo_tdmr)
{
size_t tdmr_sz, tdmr_array_sz;
void *tdmr_array;
- tdmr_sz = tdmr_size_single(tdmr_sysinfo->max_reserved_per_tdmr);
- tdmr_array_sz = tdmr_sz * tdmr_sysinfo->max_tdmrs;
+ tdmr_sz = tdmr_size_single(sysinfo_tdmr->max_reserved_per_tdmr);
+ tdmr_array_sz = tdmr_sz * sysinfo_tdmr->max_tdmrs;
/*
* To keep things simple, allocate all TDMRs together.
@@ -368,7 +332,7 @@ static int alloc_tdmr_list(struct tdmr_info_list *tdmr_list,
* at a given index in the TDMR list.
*/
tdmr_list->tdmr_sz = tdmr_sz;
- tdmr_list->max_tdmrs = tdmr_sysinfo->max_tdmrs;
+ tdmr_list->max_tdmrs = sysinfo_tdmr->max_tdmrs;
tdmr_list->nr_consumed_tdmrs = 0;
return 0;
@@ -922,25 +886,29 @@ static int tdmrs_populate_rsvd_areas_all(struct tdmr_info_list *tdmr_list,
/*
* Construct a list of TDMRs on the preallocated space in @tdmr_list
* to cover all TDX memory regions in @tmb_list based on the TDX module
- * TDMR global information in @tdmr_sysinfo.
+ * TDMR global information in @sysinfo_tdmr.
*/
static int construct_tdmrs(struct list_head *tmb_list,
struct tdmr_info_list *tdmr_list,
- struct tdx_tdmr_sysinfo *tdmr_sysinfo)
+ struct tdx_sys_info_tdmr *sysinfo_tdmr)
{
+ u16 pamt_entry_size[TDX_PS_NR] = {
+ sysinfo_tdmr->pamt_4k_entry_size,
+ sysinfo_tdmr->pamt_2m_entry_size,
+ sysinfo_tdmr->pamt_1g_entry_size,
+ };
int ret;
ret = fill_out_tdmrs(tmb_list, tdmr_list);
if (ret)
return ret;
- ret = tdmrs_set_up_pamt_all(tdmr_list, tmb_list,
- tdmr_sysinfo->pamt_entry_size);
+ ret = tdmrs_set_up_pamt_all(tdmr_list, tmb_list, pamt_entry_size);
if (ret)
return ret;
ret = tdmrs_populate_rsvd_areas_all(tdmr_list, tmb_list,
- tdmr_sysinfo->max_reserved_per_tdmr);
+ sysinfo_tdmr->max_reserved_per_tdmr);
if (ret)
tdmrs_free_pamt_all(tdmr_list);
@@ -1099,9 +1067,17 @@ static int init_tdmrs(struct tdmr_info_list *tdmr_list)
static int init_tdx_module(void)
{
- struct tdx_tdmr_sysinfo tdmr_sysinfo;
int ret;
+ ret = get_tdx_sys_info(&tdx_sysinfo);
+ if (ret)
+ return ret;
+
+ /* Check whether the kernel can support this module */
+ ret = check_features(&tdx_sysinfo);
+ if (ret)
+ return ret;
+
/*
* To keep things simple, assume that all TDX-protected memory
* will come from the page allocator. Make sure all pages in the
@@ -1118,17 +1094,13 @@ static int init_tdx_module(void)
if (ret)
goto out_put_tdxmem;
- ret = get_tdx_tdmr_sysinfo(&tdmr_sysinfo);
- if (ret)
- goto err_free_tdxmem;
-
/* Allocate enough space for constructing TDMRs */
- ret = alloc_tdmr_list(&tdx_tdmr_list, &tdmr_sysinfo);
+ ret = alloc_tdmr_list(&tdx_tdmr_list, &tdx_sysinfo.tdmr);
if (ret)
goto err_free_tdxmem;
/* Cover all TDX-usable memory regions in TDMRs */
- ret = construct_tdmrs(&tdx_memlist, &tdx_tdmr_list, &tdmr_sysinfo);
+ ret = construct_tdmrs(&tdx_memlist, &tdx_tdmr_list, &tdx_sysinfo.tdmr);
if (ret)
goto err_free_tdmrs;
@@ -1427,9 +1399,9 @@ static void __init check_tdx_erratum(void)
* private memory poisons that memory, and a subsequent read of
* that memory triggers #MC.
*/
- switch (boot_cpu_data.x86_model) {
- case INTEL_FAM6_SAPPHIRERAPIDS_X:
- case INTEL_FAM6_EMERALDRAPIDS_X:
+ switch (boot_cpu_data.x86_vfm) {
+ case INTEL_SAPPHIRERAPIDS_X:
+ case INTEL_EMERALDRAPIDS_X:
setup_force_cpu_bug(X86_BUG_TDX_PW_MCE);
}
}
@@ -1490,3 +1462,411 @@ void __init tdx_init(void)
check_tdx_erratum();
}
+
+const struct tdx_sys_info *tdx_get_sysinfo(void)
+{
+ const struct tdx_sys_info *p = NULL;
+
+ /* Make sure all fields in @tdx_sysinfo have been populated */
+ mutex_lock(&tdx_module_lock);
+ if (tdx_module_status == TDX_MODULE_INITIALIZED)
+ p = (const struct tdx_sys_info *)&tdx_sysinfo;
+ mutex_unlock(&tdx_module_lock);
+
+ return p;
+}
+EXPORT_SYMBOL_GPL(tdx_get_sysinfo);
+
+u32 tdx_get_nr_guest_keyids(void)
+{
+ return tdx_nr_guest_keyids;
+}
+EXPORT_SYMBOL_GPL(tdx_get_nr_guest_keyids);
+
+int tdx_guest_keyid_alloc(void)
+{
+ return ida_alloc_range(&tdx_guest_keyid_pool, tdx_guest_keyid_start,
+ tdx_guest_keyid_start + tdx_nr_guest_keyids - 1,
+ GFP_KERNEL);
+}
+EXPORT_SYMBOL_GPL(tdx_guest_keyid_alloc);
+
+void tdx_guest_keyid_free(unsigned int keyid)
+{
+ ida_free(&tdx_guest_keyid_pool, keyid);
+}
+EXPORT_SYMBOL_GPL(tdx_guest_keyid_free);
+
+static inline u64 tdx_tdr_pa(struct tdx_td *td)
+{
+ return page_to_phys(td->tdr_page);
+}
+
+static inline u64 tdx_tdvpr_pa(struct tdx_vp *td)
+{
+ return page_to_phys(td->tdvpr_page);
+}
+
+/*
+ * The TDX module exposes a CLFLUSH_BEFORE_ALLOC bit to specify whether
+ * a CLFLUSH of pages is required before handing them to the TDX module.
+ * Be conservative and make the code simpler by doing the CLFLUSH
+ * unconditionally.
+ */
+static void tdx_clflush_page(struct page *page)
+{
+ clflush_cache_range(page_to_virt(page), PAGE_SIZE);
+}
+
+noinstr __flatten u64 tdh_vp_enter(struct tdx_vp *td, struct tdx_module_args *args)
+{
+ args->rcx = tdx_tdvpr_pa(td);
+
+ return __seamcall_saved_ret(TDH_VP_ENTER, args);
+}
+EXPORT_SYMBOL_GPL(tdh_vp_enter);
+
+u64 tdh_mng_addcx(struct tdx_td *td, struct page *tdcs_page)
+{
+ struct tdx_module_args args = {
+ .rcx = page_to_phys(tdcs_page),
+ .rdx = tdx_tdr_pa(td),
+ };
+
+ tdx_clflush_page(tdcs_page);
+ return seamcall(TDH_MNG_ADDCX, &args);
+}
+EXPORT_SYMBOL_GPL(tdh_mng_addcx);
+
+u64 tdh_mem_page_add(struct tdx_td *td, u64 gpa, struct page *page, struct page *source, u64 *ext_err1, u64 *ext_err2)
+{
+ struct tdx_module_args args = {
+ .rcx = gpa,
+ .rdx = tdx_tdr_pa(td),
+ .r8 = page_to_phys(page),
+ .r9 = page_to_phys(source),
+ };
+ u64 ret;
+
+ tdx_clflush_page(page);
+ ret = seamcall_ret(TDH_MEM_PAGE_ADD, &args);
+
+ *ext_err1 = args.rcx;
+ *ext_err2 = args.rdx;
+
+ return ret;
+}
+EXPORT_SYMBOL_GPL(tdh_mem_page_add);
+
+u64 tdh_mem_sept_add(struct tdx_td *td, u64 gpa, int level, struct page *page, u64 *ext_err1, u64 *ext_err2)
+{
+ struct tdx_module_args args = {
+ .rcx = gpa | level,
+ .rdx = tdx_tdr_pa(td),
+ .r8 = page_to_phys(page),
+ };
+ u64 ret;
+
+ tdx_clflush_page(page);
+ ret = seamcall_ret(TDH_MEM_SEPT_ADD, &args);
+
+ *ext_err1 = args.rcx;
+ *ext_err2 = args.rdx;
+
+ return ret;
+}
+EXPORT_SYMBOL_GPL(tdh_mem_sept_add);
+
+u64 tdh_vp_addcx(struct tdx_vp *vp, struct page *tdcx_page)
+{
+ struct tdx_module_args args = {
+ .rcx = page_to_phys(tdcx_page),
+ .rdx = tdx_tdvpr_pa(vp),
+ };
+
+ tdx_clflush_page(tdcx_page);
+ return seamcall(TDH_VP_ADDCX, &args);
+}
+EXPORT_SYMBOL_GPL(tdh_vp_addcx);
+
+u64 tdh_mem_page_aug(struct tdx_td *td, u64 gpa, int level, struct page *page, u64 *ext_err1, u64 *ext_err2)
+{
+ struct tdx_module_args args = {
+ .rcx = gpa | level,
+ .rdx = tdx_tdr_pa(td),
+ .r8 = page_to_phys(page),
+ };
+ u64 ret;
+
+ tdx_clflush_page(page);
+ ret = seamcall_ret(TDH_MEM_PAGE_AUG, &args);
+
+ *ext_err1 = args.rcx;
+ *ext_err2 = args.rdx;
+
+ return ret;
+}
+EXPORT_SYMBOL_GPL(tdh_mem_page_aug);
+
+u64 tdh_mem_range_block(struct tdx_td *td, u64 gpa, int level, u64 *ext_err1, u64 *ext_err2)
+{
+ struct tdx_module_args args = {
+ .rcx = gpa | level,
+ .rdx = tdx_tdr_pa(td),
+ };
+ u64 ret;
+
+ ret = seamcall_ret(TDH_MEM_RANGE_BLOCK, &args);
+
+ *ext_err1 = args.rcx;
+ *ext_err2 = args.rdx;
+
+ return ret;
+}
+EXPORT_SYMBOL_GPL(tdh_mem_range_block);
+
+u64 tdh_mng_key_config(struct tdx_td *td)
+{
+ struct tdx_module_args args = {
+ .rcx = tdx_tdr_pa(td),
+ };
+
+ return seamcall(TDH_MNG_KEY_CONFIG, &args);
+}
+EXPORT_SYMBOL_GPL(tdh_mng_key_config);
+
+u64 tdh_mng_create(struct tdx_td *td, u16 hkid)
+{
+ struct tdx_module_args args = {
+ .rcx = tdx_tdr_pa(td),
+ .rdx = hkid,
+ };
+
+ tdx_clflush_page(td->tdr_page);
+ return seamcall(TDH_MNG_CREATE, &args);
+}
+EXPORT_SYMBOL_GPL(tdh_mng_create);
+
+u64 tdh_vp_create(struct tdx_td *td, struct tdx_vp *vp)
+{
+ struct tdx_module_args args = {
+ .rcx = tdx_tdvpr_pa(vp),
+ .rdx = tdx_tdr_pa(td),
+ };
+
+ tdx_clflush_page(vp->tdvpr_page);
+ return seamcall(TDH_VP_CREATE, &args);
+}
+EXPORT_SYMBOL_GPL(tdh_vp_create);
+
+u64 tdh_mng_rd(struct tdx_td *td, u64 field, u64 *data)
+{
+ struct tdx_module_args args = {
+ .rcx = tdx_tdr_pa(td),
+ .rdx = field,
+ };
+ u64 ret;
+
+ ret = seamcall_ret(TDH_MNG_RD, &args);
+
+ /* R8: Content of the field, or 0 in case of error. */
+ *data = args.r8;
+
+ return ret;
+}
+EXPORT_SYMBOL_GPL(tdh_mng_rd);
+
+u64 tdh_mr_extend(struct tdx_td *td, u64 gpa, u64 *ext_err1, u64 *ext_err2)
+{
+ struct tdx_module_args args = {
+ .rcx = gpa,
+ .rdx = tdx_tdr_pa(td),
+ };
+ u64 ret;
+
+ ret = seamcall_ret(TDH_MR_EXTEND, &args);
+
+ *ext_err1 = args.rcx;
+ *ext_err2 = args.rdx;
+
+ return ret;
+}
+EXPORT_SYMBOL_GPL(tdh_mr_extend);
+
+u64 tdh_mr_finalize(struct tdx_td *td)
+{
+ struct tdx_module_args args = {
+ .rcx = tdx_tdr_pa(td),
+ };
+
+ return seamcall(TDH_MR_FINALIZE, &args);
+}
+EXPORT_SYMBOL_GPL(tdh_mr_finalize);
+
+u64 tdh_vp_flush(struct tdx_vp *vp)
+{
+ struct tdx_module_args args = {
+ .rcx = tdx_tdvpr_pa(vp),
+ };
+
+ return seamcall(TDH_VP_FLUSH, &args);
+}
+EXPORT_SYMBOL_GPL(tdh_vp_flush);
+
+u64 tdh_mng_vpflushdone(struct tdx_td *td)
+{
+ struct tdx_module_args args = {
+ .rcx = tdx_tdr_pa(td),
+ };
+
+ return seamcall(TDH_MNG_VPFLUSHDONE, &args);
+}
+EXPORT_SYMBOL_GPL(tdh_mng_vpflushdone);
+
+u64 tdh_mng_key_freeid(struct tdx_td *td)
+{
+ struct tdx_module_args args = {
+ .rcx = tdx_tdr_pa(td),
+ };
+
+ return seamcall(TDH_MNG_KEY_FREEID, &args);
+}
+EXPORT_SYMBOL_GPL(tdh_mng_key_freeid);
+
+u64 tdh_mng_init(struct tdx_td *td, u64 td_params, u64 *extended_err)
+{
+ struct tdx_module_args args = {
+ .rcx = tdx_tdr_pa(td),
+ .rdx = td_params,
+ };
+ u64 ret;
+
+ ret = seamcall_ret(TDH_MNG_INIT, &args);
+
+ *extended_err = args.rcx;
+
+ return ret;
+}
+EXPORT_SYMBOL_GPL(tdh_mng_init);
+
+u64 tdh_vp_rd(struct tdx_vp *vp, u64 field, u64 *data)
+{
+ struct tdx_module_args args = {
+ .rcx = tdx_tdvpr_pa(vp),
+ .rdx = field,
+ };
+ u64 ret;
+
+ ret = seamcall_ret(TDH_VP_RD, &args);
+
+ /* R8: Content of the field, or 0 in case of error. */
+ *data = args.r8;
+
+ return ret;
+}
+EXPORT_SYMBOL_GPL(tdh_vp_rd);
+
+u64 tdh_vp_wr(struct tdx_vp *vp, u64 field, u64 data, u64 mask)
+{
+ struct tdx_module_args args = {
+ .rcx = tdx_tdvpr_pa(vp),
+ .rdx = field,
+ .r8 = data,
+ .r9 = mask,
+ };
+
+ return seamcall(TDH_VP_WR, &args);
+}
+EXPORT_SYMBOL_GPL(tdh_vp_wr);
+
+u64 tdh_vp_init(struct tdx_vp *vp, u64 initial_rcx, u32 x2apicid)
+{
+ struct tdx_module_args args = {
+ .rcx = tdx_tdvpr_pa(vp),
+ .rdx = initial_rcx,
+ .r8 = x2apicid,
+ };
+
+ /* apicid requires version == 1. */
+ return seamcall(TDH_VP_INIT | (1ULL << TDX_VERSION_SHIFT), &args);
+}
+EXPORT_SYMBOL_GPL(tdh_vp_init);
+
+/*
+ * TDX ABI defines output operands as PT, OWNER and SIZE. These are TDX defined fomats.
+ * So despite the names, they must be interpted specially as described by the spec. Return
+ * them only for error reporting purposes.
+ */
+u64 tdh_phymem_page_reclaim(struct page *page, u64 *tdx_pt, u64 *tdx_owner, u64 *tdx_size)
+{
+ struct tdx_module_args args = {
+ .rcx = page_to_phys(page),
+ };
+ u64 ret;
+
+ ret = seamcall_ret(TDH_PHYMEM_PAGE_RECLAIM, &args);
+
+ *tdx_pt = args.rcx;
+ *tdx_owner = args.rdx;
+ *tdx_size = args.r8;
+
+ return ret;
+}
+EXPORT_SYMBOL_GPL(tdh_phymem_page_reclaim);
+
+u64 tdh_mem_track(struct tdx_td *td)
+{
+ struct tdx_module_args args = {
+ .rcx = tdx_tdr_pa(td),
+ };
+
+ return seamcall(TDH_MEM_TRACK, &args);
+}
+EXPORT_SYMBOL_GPL(tdh_mem_track);
+
+u64 tdh_mem_page_remove(struct tdx_td *td, u64 gpa, u64 level, u64 *ext_err1, u64 *ext_err2)
+{
+ struct tdx_module_args args = {
+ .rcx = gpa | level,
+ .rdx = tdx_tdr_pa(td),
+ };
+ u64 ret;
+
+ ret = seamcall_ret(TDH_MEM_PAGE_REMOVE, &args);
+
+ *ext_err1 = args.rcx;
+ *ext_err2 = args.rdx;
+
+ return ret;
+}
+EXPORT_SYMBOL_GPL(tdh_mem_page_remove);
+
+u64 tdh_phymem_cache_wb(bool resume)
+{
+ struct tdx_module_args args = {
+ .rcx = resume ? 1 : 0,
+ };
+
+ return seamcall(TDH_PHYMEM_CACHE_WB, &args);
+}
+EXPORT_SYMBOL_GPL(tdh_phymem_cache_wb);
+
+u64 tdh_phymem_page_wbinvd_tdr(struct tdx_td *td)
+{
+ struct tdx_module_args args = {};
+
+ args.rcx = mk_keyed_paddr(tdx_global_keyid, td->tdr_page);
+
+ return seamcall(TDH_PHYMEM_PAGE_WBINVD, &args);
+}
+EXPORT_SYMBOL_GPL(tdh_phymem_page_wbinvd_tdr);
+
+u64 tdh_phymem_page_wbinvd_hkid(u64 hkid, struct page *page)
+{
+ struct tdx_module_args args = {};
+
+ args.rcx = mk_keyed_paddr(hkid, page);
+
+ return seamcall(TDH_PHYMEM_PAGE_WBINVD, &args);
+}
+EXPORT_SYMBOL_GPL(tdh_phymem_page_wbinvd_hkid);
diff --git a/arch/x86/virt/vmx/tdx/tdx.h b/arch/x86/virt/vmx/tdx/tdx.h
index b701f69485d3..82bb82be8567 100644
--- a/arch/x86/virt/vmx/tdx/tdx.h
+++ b/arch/x86/virt/vmx/tdx/tdx.h
@@ -14,46 +14,50 @@
/*
* TDX module SEAMCALL leaf functions
*/
-#define TDH_PHYMEM_PAGE_RDMD 24
-#define TDH_SYS_KEY_CONFIG 31
-#define TDH_SYS_INIT 33
-#define TDH_SYS_RD 34
-#define TDH_SYS_LP_INIT 35
-#define TDH_SYS_TDMR_INIT 36
-#define TDH_SYS_CONFIG 45
-
-/* TDX page types */
-#define PT_NDA 0x0
-#define PT_RSVD 0x1
-
-/*
- * Global scope metadata field ID.
- *
- * See Table "Global Scope Metadata", TDX module 1.5 ABI spec.
- */
-#define MD_FIELD_ID_MAX_TDMRS 0x9100000100000008ULL
-#define MD_FIELD_ID_MAX_RESERVED_PER_TDMR 0x9100000100000009ULL
-#define MD_FIELD_ID_PAMT_4K_ENTRY_SIZE 0x9100000100000010ULL
-#define MD_FIELD_ID_PAMT_2M_ENTRY_SIZE 0x9100000100000011ULL
-#define MD_FIELD_ID_PAMT_1G_ENTRY_SIZE 0x9100000100000012ULL
+#define TDH_VP_ENTER 0
+#define TDH_MNG_ADDCX 1
+#define TDH_MEM_PAGE_ADD 2
+#define TDH_MEM_SEPT_ADD 3
+#define TDH_VP_ADDCX 4
+#define TDH_MEM_PAGE_AUG 6
+#define TDH_MEM_RANGE_BLOCK 7
+#define TDH_MNG_KEY_CONFIG 8
+#define TDH_MNG_CREATE 9
+#define TDH_MNG_RD 11
+#define TDH_MR_EXTEND 16
+#define TDH_MR_FINALIZE 17
+#define TDH_VP_FLUSH 18
+#define TDH_MNG_VPFLUSHDONE 19
+#define TDH_VP_CREATE 10
+#define TDH_MNG_KEY_FREEID 20
+#define TDH_MNG_INIT 21
+#define TDH_VP_INIT 22
+#define TDH_PHYMEM_PAGE_RDMD 24
+#define TDH_VP_RD 26
+#define TDH_PHYMEM_PAGE_RECLAIM 28
+#define TDH_MEM_PAGE_REMOVE 29
+#define TDH_SYS_KEY_CONFIG 31
+#define TDH_SYS_INIT 33
+#define TDH_SYS_RD 34
+#define TDH_SYS_LP_INIT 35
+#define TDH_SYS_TDMR_INIT 36
+#define TDH_MEM_TRACK 38
+#define TDH_PHYMEM_CACHE_WB 40
+#define TDH_PHYMEM_PAGE_WBINVD 41
+#define TDH_VP_WR 43
+#define TDH_SYS_CONFIG 45
/*
- * Sub-field definition of metadata field ID.
+ * SEAMCALL leaf:
*
- * See Table "MD_FIELD_ID (Metadata Field Identifier / Sequence Header)
- * Definition", TDX module 1.5 ABI spec.
- *
- * - Bit 33:32: ELEMENT_SIZE_CODE -- size of a single element of metadata
- *
- * 0: 8 bits
- * 1: 16 bits
- * 2: 32 bits
- * 3: 64 bits
+ * Bit 15:0 Leaf number
+ * Bit 23:16 Version number
*/
-#define MD_FIELD_ID_ELE_SIZE_CODE(_field_id) \
- (((_field_id) & GENMASK_ULL(33, 32)) >> 32)
+#define TDX_VERSION_SHIFT 16
-#define MD_FIELD_ID_ELE_SIZE_16BIT 1
+/* TDX page types */
+#define PT_NDA 0x0
+#define PT_RSVD 0x1
struct tdmr_reserved_area {
u64 offset;
@@ -80,6 +84,9 @@ struct tdmr_info {
DECLARE_FLEX_ARRAY(struct tdmr_reserved_area, reserved_areas);
} __packed __aligned(TDMR_INFO_ALIGNMENT);
+/* Bit definitions of TDX_FEATURES0 metadata field */
+#define TDX_FEATURES0_NO_RBP_MOD BIT(18)
+
/*
* Do not put any hardware-defined TDX structure representations below
* this comment!
@@ -99,13 +106,6 @@ struct tdx_memblock {
int nid;
};
-/* "TDMR info" part of "Global Scope Metadata" for constructing TDMRs */
-struct tdx_tdmr_sysinfo {
- u16 max_tdmrs;
- u16 max_reserved_per_tdmr;
- u16 pamt_entry_size[TDX_PS_NR];
-};
-
/* Warn if kernel has less than TDMR_NR_WARN TDMRs after allocation */
#define TDMR_NR_WARN 4
diff --git a/arch/x86/virt/vmx/tdx/tdx_global_metadata.c b/arch/x86/virt/vmx/tdx/tdx_global_metadata.c
new file mode 100644
index 000000000000..13ad2663488b
--- /dev/null
+++ b/arch/x86/virt/vmx/tdx/tdx_global_metadata.c
@@ -0,0 +1,98 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Automatically generated functions to read TDX global metadata.
+ *
+ * This file doesn't compile on its own as it lacks of inclusion
+ * of SEAMCALL wrapper primitive which reads global metadata.
+ * Include this file to other C file instead.
+ */
+
+static int get_tdx_sys_info_features(struct tdx_sys_info_features *sysinfo_features)
+{
+ int ret = 0;
+ u64 val;
+
+ if (!ret && !(ret = read_sys_metadata_field(0x0A00000300000008, &val)))
+ sysinfo_features->tdx_features0 = val;
+
+ return ret;
+}
+
+static int get_tdx_sys_info_tdmr(struct tdx_sys_info_tdmr *sysinfo_tdmr)
+{
+ int ret = 0;
+ u64 val;
+
+ if (!ret && !(ret = read_sys_metadata_field(0x9100000100000008, &val)))
+ sysinfo_tdmr->max_tdmrs = val;
+ if (!ret && !(ret = read_sys_metadata_field(0x9100000100000009, &val)))
+ sysinfo_tdmr->max_reserved_per_tdmr = val;
+ if (!ret && !(ret = read_sys_metadata_field(0x9100000100000010, &val)))
+ sysinfo_tdmr->pamt_4k_entry_size = val;
+ if (!ret && !(ret = read_sys_metadata_field(0x9100000100000011, &val)))
+ sysinfo_tdmr->pamt_2m_entry_size = val;
+ if (!ret && !(ret = read_sys_metadata_field(0x9100000100000012, &val)))
+ sysinfo_tdmr->pamt_1g_entry_size = val;
+
+ return ret;
+}
+
+static int get_tdx_sys_info_td_ctrl(struct tdx_sys_info_td_ctrl *sysinfo_td_ctrl)
+{
+ int ret = 0;
+ u64 val;
+
+ if (!ret && !(ret = read_sys_metadata_field(0x9800000100000000, &val)))
+ sysinfo_td_ctrl->tdr_base_size = val;
+ if (!ret && !(ret = read_sys_metadata_field(0x9800000100000100, &val)))
+ sysinfo_td_ctrl->tdcs_base_size = val;
+ if (!ret && !(ret = read_sys_metadata_field(0x9800000100000200, &val)))
+ sysinfo_td_ctrl->tdvps_base_size = val;
+
+ return ret;
+}
+
+static int get_tdx_sys_info_td_conf(struct tdx_sys_info_td_conf *sysinfo_td_conf)
+{
+ int ret = 0;
+ u64 val;
+ int i, j;
+
+ if (!ret && !(ret = read_sys_metadata_field(0x1900000300000000, &val)))
+ sysinfo_td_conf->attributes_fixed0 = val;
+ if (!ret && !(ret = read_sys_metadata_field(0x1900000300000001, &val)))
+ sysinfo_td_conf->attributes_fixed1 = val;
+ if (!ret && !(ret = read_sys_metadata_field(0x1900000300000002, &val)))
+ sysinfo_td_conf->xfam_fixed0 = val;
+ if (!ret && !(ret = read_sys_metadata_field(0x1900000300000003, &val)))
+ sysinfo_td_conf->xfam_fixed1 = val;
+ if (!ret && !(ret = read_sys_metadata_field(0x9900000100000004, &val)))
+ sysinfo_td_conf->num_cpuid_config = val;
+ if (!ret && !(ret = read_sys_metadata_field(0x9900000100000008, &val)))
+ sysinfo_td_conf->max_vcpus_per_td = val;
+ if (sysinfo_td_conf->num_cpuid_config > ARRAY_SIZE(sysinfo_td_conf->cpuid_config_leaves))
+ return -EINVAL;
+ for (i = 0; i < sysinfo_td_conf->num_cpuid_config; i++)
+ if (!ret && !(ret = read_sys_metadata_field(0x9900000300000400 + i, &val)))
+ sysinfo_td_conf->cpuid_config_leaves[i] = val;
+ if (sysinfo_td_conf->num_cpuid_config > ARRAY_SIZE(sysinfo_td_conf->cpuid_config_values))
+ return -EINVAL;
+ for (i = 0; i < sysinfo_td_conf->num_cpuid_config; i++)
+ for (j = 0; j < 2; j++)
+ if (!ret && !(ret = read_sys_metadata_field(0x9900000300000500 + i * 2 + j, &val)))
+ sysinfo_td_conf->cpuid_config_values[i][j] = val;
+
+ return ret;
+}
+
+static int get_tdx_sys_info(struct tdx_sys_info *sysinfo)
+{
+ int ret = 0;
+
+ ret = ret ?: get_tdx_sys_info_features(&sysinfo->features);
+ ret = ret ?: get_tdx_sys_info_tdmr(&sysinfo->tdmr);
+ ret = ret ?: get_tdx_sys_info_td_ctrl(&sysinfo->td_ctrl);
+ ret = ret ?: get_tdx_sys_info_td_conf(&sysinfo->td_conf);
+
+ return ret;
+}