aboutsummaryrefslogtreecommitdiffstatshomepage
path: root/arch
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2024-03-11 17:44:11 -0700
committerLinus Torvalds <torvalds@linux-foundation.org>2024-03-11 17:44:11 -0700
commit38b334fc767e44816be087b3ec5d84b1438b735f (patch)
tree6ee0cc2a1f738df713708ab6d83d6d3b44327eda /arch
parentMerge tag 'x86_cache_for_v6.9_rc1' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip (diff)
parentx86/sev: Disable KMSAN for memory encryption TUs (diff)
downloadwireguard-linux-38b334fc767e44816be087b3ec5d84b1438b735f.tar.xz
wireguard-linux-38b334fc767e44816be087b3ec5d84b1438b735f.zip
Merge tag 'x86_sev_for_v6.9_rc1' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip
Pull x86 SEV updates from Borislav Petkov: - Add the x86 part of the SEV-SNP host support. This will allow the kernel to be used as a KVM hypervisor capable of running SNP (Secure Nested Paging) guests. Roughly speaking, SEV-SNP is the ultimate goal of the AMD confidential computing side, providing the most comprehensive confidential computing environment up to date. This is the x86 part and there is a KVM part which did not get ready in time for the merge window so latter will be forthcoming in the next cycle. - Rework the early code's position-dependent SEV variable references in order to allow building the kernel with clang and -fPIE/-fPIC and -mcmodel=kernel - The usual set of fixes, cleanups and improvements all over the place * tag 'x86_sev_for_v6.9_rc1' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip: (36 commits) x86/sev: Disable KMSAN for memory encryption TUs x86/sev: Dump SEV_STATUS crypto: ccp - Have it depend on AMD_IOMMU iommu/amd: Fix failure return from snp_lookup_rmpentry() x86/sev: Fix position dependent variable references in startup code crypto: ccp: Make snp_range_list static x86/Kconfig: Remove CONFIG_AMD_MEM_ENCRYPT_ACTIVE_BY_DEFAULT Documentation: virt: Fix up pre-formatted text block for SEV ioctls crypto: ccp: Add the SNP_SET_CONFIG command crypto: ccp: Add the SNP_COMMIT command crypto: ccp: Add the SNP_PLATFORM_STATUS command x86/cpufeatures: Enable/unmask SEV-SNP CPU feature KVM: SEV: Make AVIC backing, VMSA and VMCB memory allocation SNP safe crypto: ccp: Add panic notifier for SEV/SNP firmware shutdown on kdump iommu/amd: Clean up RMP entries for IOMMU pages during SNP shutdown crypto: ccp: Handle legacy SEV commands when SNP is enabled crypto: ccp: Handle non-volatile INIT_EX data when SNP is enabled crypto: ccp: Handle the legacy TMR allocation when SNP is enabled x86/sev: Introduce an SNP leaked pages list crypto: ccp: Provide an API to issue SEV and SNP commands ...
Diffstat (limited to 'arch')
-rw-r--r--arch/x86/Kbuild2
-rw-r--r--arch/x86/Kconfig13
-rw-r--r--arch/x86/boot/compressed/sev.c6
-rw-r--r--arch/x86/coco/core.c7
-rw-r--r--arch/x86/include/asm/asm.h14
-rw-r--r--arch/x86/include/asm/coco.h9
-rw-r--r--arch/x86/include/asm/cpufeatures.h1
-rw-r--r--arch/x86/include/asm/disabled-features.h8
-rw-r--r--arch/x86/include/asm/iommu.h1
-rw-r--r--arch/x86/include/asm/kvm-x86-ops.h1
-rw-r--r--arch/x86/include/asm/kvm_host.h1
-rw-r--r--arch/x86/include/asm/mem_encrypt.h15
-rw-r--r--arch/x86/include/asm/msr-index.h66
-rw-r--r--arch/x86/include/asm/sev.h40
-rw-r--r--arch/x86/include/asm/trap_pf.h20
-rw-r--r--arch/x86/kernel/Makefile1
-rw-r--r--arch/x86/kernel/cpu/amd.c21
-rw-r--r--arch/x86/kernel/cpu/common.c7
-rw-r--r--arch/x86/kernel/cpu/mtrr/generic.c3
-rw-r--r--arch/x86/kernel/crash.c3
-rw-r--r--arch/x86/kernel/sev-shared.c114
-rw-r--r--arch/x86/kernel/sev.c54
-rw-r--r--arch/x86/kvm/lapic.c5
-rw-r--r--arch/x86/kvm/svm/nested.c2
-rw-r--r--arch/x86/kvm/svm/sev.c37
-rw-r--r--arch/x86/kvm/svm/svm.c17
-rw-r--r--arch/x86/kvm/svm/svm.h1
-rw-r--r--arch/x86/mm/Makefile1
-rw-r--r--arch/x86/mm/fault.c5
-rw-r--r--arch/x86/mm/mem_encrypt.c55
-rw-r--r--arch/x86/mm/mem_encrypt_identity.c40
-rw-r--r--arch/x86/virt/svm/Makefile3
-rw-r--r--arch/x86/virt/svm/sev.c560
33 files changed, 1003 insertions, 130 deletions
diff --git a/arch/x86/Kbuild b/arch/x86/Kbuild
index 5a83da703e87..6a1f36df6a18 100644
--- a/arch/x86/Kbuild
+++ b/arch/x86/Kbuild
@@ -28,5 +28,7 @@ obj-y += net/
obj-$(CONFIG_KEXEC_FILE) += purgatory/
+obj-y += virt/svm/
+
# for cleaning
subdir- += boot tools
diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index 854ab38a359a..0f869c4785b8 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -1548,19 +1548,6 @@ config AMD_MEM_ENCRYPT
This requires an AMD processor that supports Secure Memory
Encryption (SME).
-config AMD_MEM_ENCRYPT_ACTIVE_BY_DEFAULT
- bool "Activate AMD Secure Memory Encryption (SME) by default"
- depends on AMD_MEM_ENCRYPT
- help
- Say yes to have system memory encrypted by default if running on
- an AMD processor that supports Secure Memory Encryption (SME).
-
- If set to Y, then the encryption of system memory can be
- deactivated with the mem_encrypt=off command line option.
-
- If set to N, then the encryption of system memory can be
- activated with the mem_encrypt=on command line option.
-
# Common NUMA Features
config NUMA
bool "NUMA Memory Allocation and Scheduler Support"
diff --git a/arch/x86/boot/compressed/sev.c b/arch/x86/boot/compressed/sev.c
index 454acd7a2daf..9db630238034 100644
--- a/arch/x86/boot/compressed/sev.c
+++ b/arch/x86/boot/compressed/sev.c
@@ -304,6 +304,10 @@ void do_boot_stage2_vc(struct pt_regs *regs, unsigned long exit_code)
if (result != ES_OK)
goto finish;
+ result = vc_check_opcode_bytes(&ctxt, exit_code);
+ if (result != ES_OK)
+ goto finish;
+
switch (exit_code) {
case SVM_EXIT_RDTSC:
case SVM_EXIT_RDTSCP:
@@ -365,7 +369,7 @@ static void enforce_vmpl0(void)
MSR_AMD64_SNP_VMPL_SSS | \
MSR_AMD64_SNP_SECURE_TSC | \
MSR_AMD64_SNP_VMGEXIT_PARAM | \
- MSR_AMD64_SNP_VMSA_REG_PROTECTION | \
+ MSR_AMD64_SNP_VMSA_REG_PROT | \
MSR_AMD64_SNP_RESERVED_BIT13 | \
MSR_AMD64_SNP_RESERVED_BIT15 | \
MSR_AMD64_SNP_RESERVED_MASK)
diff --git a/arch/x86/coco/core.c b/arch/x86/coco/core.c
index eeec9986570e..d07be9d05cd0 100644
--- a/arch/x86/coco/core.c
+++ b/arch/x86/coco/core.c
@@ -14,7 +14,7 @@
#include <asm/processor.h>
enum cc_vendor cc_vendor __ro_after_init = CC_VENDOR_NONE;
-static u64 cc_mask __ro_after_init;
+u64 cc_mask __ro_after_init;
static bool noinstr intel_cc_platform_has(enum cc_attr attr)
{
@@ -148,8 +148,3 @@ u64 cc_mkdec(u64 val)
}
}
EXPORT_SYMBOL_GPL(cc_mkdec);
-
-__init void cc_set_mask(u64 mask)
-{
- cc_mask = mask;
-}
diff --git a/arch/x86/include/asm/asm.h b/arch/x86/include/asm/asm.h
index fbcfec4dc4cc..ca8eed1d496a 100644
--- a/arch/x86/include/asm/asm.h
+++ b/arch/x86/include/asm/asm.h
@@ -113,6 +113,20 @@
#endif
+#ifndef __ASSEMBLY__
+#ifndef __pic__
+static __always_inline __pure void *rip_rel_ptr(void *p)
+{
+ asm("leaq %c1(%%rip), %0" : "=r"(p) : "i"(p));
+
+ return p;
+}
+#define RIP_REL_REF(var) (*(typeof(&(var)))rip_rel_ptr(&(var)))
+#else
+#define RIP_REL_REF(var) (var)
+#endif
+#endif
+
/*
* Macros to generate condition code outputs from inline assembly,
* The output operand must be type "bool".
diff --git a/arch/x86/include/asm/coco.h b/arch/x86/include/asm/coco.h
index 76c310b19b11..fb7388bbc212 100644
--- a/arch/x86/include/asm/coco.h
+++ b/arch/x86/include/asm/coco.h
@@ -2,6 +2,7 @@
#ifndef _ASM_X86_COCO_H
#define _ASM_X86_COCO_H
+#include <asm/asm.h>
#include <asm/types.h>
enum cc_vendor {
@@ -12,7 +13,13 @@ enum cc_vendor {
#ifdef CONFIG_ARCH_HAS_CC_PLATFORM
extern enum cc_vendor cc_vendor;
-void cc_set_mask(u64 mask);
+extern u64 cc_mask;
+
+static inline void cc_set_mask(u64 mask)
+{
+ RIP_REL_REF(cc_mask) = mask;
+}
+
u64 cc_mkenc(u64 val);
u64 cc_mkdec(u64 val);
#else
diff --git a/arch/x86/include/asm/cpufeatures.h b/arch/x86/include/asm/cpufeatures.h
index 8e371c6cc5f9..0343caa016a9 100644
--- a/arch/x86/include/asm/cpufeatures.h
+++ b/arch/x86/include/asm/cpufeatures.h
@@ -442,6 +442,7 @@
#define X86_FEATURE_SEV (19*32+ 1) /* AMD Secure Encrypted Virtualization */
#define X86_FEATURE_VM_PAGE_FLUSH (19*32+ 2) /* "" VM Page Flush MSR is supported */
#define X86_FEATURE_SEV_ES (19*32+ 3) /* AMD Secure Encrypted Virtualization - Encrypted State */
+#define X86_FEATURE_SEV_SNP (19*32+ 4) /* AMD Secure Encrypted Virtualization - Secure Nested Paging */
#define X86_FEATURE_V_TSC_AUX (19*32+ 9) /* "" Virtual TSC_AUX */
#define X86_FEATURE_SME_COHERENT (19*32+10) /* "" AMD hardware-enforced cache coherency */
#define X86_FEATURE_DEBUG_SWAP (19*32+14) /* AMD SEV-ES full debug state swap support */
diff --git a/arch/x86/include/asm/disabled-features.h b/arch/x86/include/asm/disabled-features.h
index f40b29d3abad..d73fea9c3bf1 100644
--- a/arch/x86/include/asm/disabled-features.h
+++ b/arch/x86/include/asm/disabled-features.h
@@ -123,6 +123,12 @@
# define DISABLE_FRED (1 << (X86_FEATURE_FRED & 31))
#endif
+#ifdef CONFIG_KVM_AMD_SEV
+#define DISABLE_SEV_SNP 0
+#else
+#define DISABLE_SEV_SNP (1 << (X86_FEATURE_SEV_SNP & 31))
+#endif
+
/*
* Make sure to add features to the correct mask
*/
@@ -147,7 +153,7 @@
DISABLE_ENQCMD)
#define DISABLED_MASK17 0
#define DISABLED_MASK18 (DISABLE_IBT)
-#define DISABLED_MASK19 0
+#define DISABLED_MASK19 (DISABLE_SEV_SNP)
#define DISABLED_MASK20 0
#define DISABLED_MASK_CHECK BUILD_BUG_ON_ZERO(NCAPINTS != 21)
diff --git a/arch/x86/include/asm/iommu.h b/arch/x86/include/asm/iommu.h
index 2fd52b65deac..3be2451e7bc8 100644
--- a/arch/x86/include/asm/iommu.h
+++ b/arch/x86/include/asm/iommu.h
@@ -10,6 +10,7 @@ extern int force_iommu, no_iommu;
extern int iommu_detected;
extern int iommu_merge;
extern int panic_on_overflow;
+extern bool amd_iommu_snp_en;
#ifdef CONFIG_SWIOTLB
extern bool x86_swiotlb_enable;
diff --git a/arch/x86/include/asm/kvm-x86-ops.h b/arch/x86/include/asm/kvm-x86-ops.h
index 378ed944b849..ab24ce207988 100644
--- a/arch/x86/include/asm/kvm-x86-ops.h
+++ b/arch/x86/include/asm/kvm-x86-ops.h
@@ -138,6 +138,7 @@ KVM_X86_OP(complete_emulated_msr)
KVM_X86_OP(vcpu_deliver_sipi_vector)
KVM_X86_OP_OPTIONAL_RET0(vcpu_get_apicv_inhibit_reasons);
KVM_X86_OP_OPTIONAL(get_untagged_addr)
+KVM_X86_OP_OPTIONAL(alloc_apic_backing_page)
#undef KVM_X86_OP
#undef KVM_X86_OP_OPTIONAL
diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index d271ba20a0b2..18cbde14cf81 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -1796,6 +1796,7 @@ struct kvm_x86_ops {
unsigned long (*vcpu_get_apicv_inhibit_reasons)(struct kvm_vcpu *vcpu);
gva_t (*get_untagged_addr)(struct kvm_vcpu *vcpu, gva_t gva, unsigned int flags);
+ void *(*alloc_apic_backing_page)(struct kvm_vcpu *vcpu);
};
struct kvm_x86_nested_ops {
diff --git a/arch/x86/include/asm/mem_encrypt.h b/arch/x86/include/asm/mem_encrypt.h
index 359ada486fa9..b31eb9fd5954 100644
--- a/arch/x86/include/asm/mem_encrypt.h
+++ b/arch/x86/include/asm/mem_encrypt.h
@@ -15,7 +15,8 @@
#include <linux/init.h>
#include <linux/cc_platform.h>
-#include <asm/bootparam.h>
+#include <asm/asm.h>
+struct boot_params;
#ifdef CONFIG_X86_MEM_ENCRYPT
void __init mem_encrypt_init(void);
@@ -58,6 +59,11 @@ void __init mem_encrypt_free_decrypted_mem(void);
void __init sev_es_init_vc_handling(void);
+static inline u64 sme_get_me_mask(void)
+{
+ return RIP_REL_REF(sme_me_mask);
+}
+
#define __bss_decrypted __section(".bss..decrypted")
#else /* !CONFIG_AMD_MEM_ENCRYPT */
@@ -89,6 +95,8 @@ early_set_mem_enc_dec_hypercall(unsigned long vaddr, unsigned long size, bool en
static inline void mem_encrypt_free_decrypted_mem(void) { }
+static inline u64 sme_get_me_mask(void) { return 0; }
+
#define __bss_decrypted
#endif /* CONFIG_AMD_MEM_ENCRYPT */
@@ -106,11 +114,6 @@ void add_encrypt_protection_map(void);
extern char __start_bss_decrypted[], __end_bss_decrypted[], __start_bss_decrypted_unused[];
-static inline u64 sme_get_me_mask(void)
-{
- return sme_me_mask;
-}
-
#endif /* __ASSEMBLY__ */
#endif /* __X86_MEM_ENCRYPT_H__ */
diff --git a/arch/x86/include/asm/msr-index.h b/arch/x86/include/asm/msr-index.h
index 1f9dc9bd13eb..24c575cdd6b9 100644
--- a/arch/x86/include/asm/msr-index.h
+++ b/arch/x86/include/asm/msr-index.h
@@ -605,34 +605,47 @@
#define MSR_AMD64_SEV_ES_GHCB 0xc0010130
#define MSR_AMD64_SEV 0xc0010131
#define MSR_AMD64_SEV_ENABLED_BIT 0
-#define MSR_AMD64_SEV_ES_ENABLED_BIT 1
-#define MSR_AMD64_SEV_SNP_ENABLED_BIT 2
#define MSR_AMD64_SEV_ENABLED BIT_ULL(MSR_AMD64_SEV_ENABLED_BIT)
+#define MSR_AMD64_SEV_ES_ENABLED_BIT 1
#define MSR_AMD64_SEV_ES_ENABLED BIT_ULL(MSR_AMD64_SEV_ES_ENABLED_BIT)
+#define MSR_AMD64_SEV_SNP_ENABLED_BIT 2
#define MSR_AMD64_SEV_SNP_ENABLED BIT_ULL(MSR_AMD64_SEV_SNP_ENABLED_BIT)
-
-/* SNP feature bits enabled by the hypervisor */
-#define MSR_AMD64_SNP_VTOM BIT_ULL(3)
-#define MSR_AMD64_SNP_REFLECT_VC BIT_ULL(4)
-#define MSR_AMD64_SNP_RESTRICTED_INJ BIT_ULL(5)
-#define MSR_AMD64_SNP_ALT_INJ BIT_ULL(6)
-#define MSR_AMD64_SNP_DEBUG_SWAP BIT_ULL(7)
-#define MSR_AMD64_SNP_PREVENT_HOST_IBS BIT_ULL(8)
-#define MSR_AMD64_SNP_BTB_ISOLATION BIT_ULL(9)
-#define MSR_AMD64_SNP_VMPL_SSS BIT_ULL(10)
-#define MSR_AMD64_SNP_SECURE_TSC BIT_ULL(11)
-#define MSR_AMD64_SNP_VMGEXIT_PARAM BIT_ULL(12)
-#define MSR_AMD64_SNP_IBS_VIRT BIT_ULL(14)
-#define MSR_AMD64_SNP_VMSA_REG_PROTECTION BIT_ULL(16)
-#define MSR_AMD64_SNP_SMT_PROTECTION BIT_ULL(17)
-
-/* SNP feature bits reserved for future use. */
-#define MSR_AMD64_SNP_RESERVED_BIT13 BIT_ULL(13)
-#define MSR_AMD64_SNP_RESERVED_BIT15 BIT_ULL(15)
-#define MSR_AMD64_SNP_RESERVED_MASK GENMASK_ULL(63, 18)
+#define MSR_AMD64_SNP_VTOM_BIT 3
+#define MSR_AMD64_SNP_VTOM BIT_ULL(MSR_AMD64_SNP_VTOM_BIT)
+#define MSR_AMD64_SNP_REFLECT_VC_BIT 4
+#define MSR_AMD64_SNP_REFLECT_VC BIT_ULL(MSR_AMD64_SNP_REFLECT_VC_BIT)
+#define MSR_AMD64_SNP_RESTRICTED_INJ_BIT 5
+#define MSR_AMD64_SNP_RESTRICTED_INJ BIT_ULL(MSR_AMD64_SNP_RESTRICTED_INJ_BIT)
+#define MSR_AMD64_SNP_ALT_INJ_BIT 6
+#define MSR_AMD64_SNP_ALT_INJ BIT_ULL(MSR_AMD64_SNP_ALT_INJ_BIT)
+#define MSR_AMD64_SNP_DEBUG_SWAP_BIT 7
+#define MSR_AMD64_SNP_DEBUG_SWAP BIT_ULL(MSR_AMD64_SNP_DEBUG_SWAP_BIT)
+#define MSR_AMD64_SNP_PREVENT_HOST_IBS_BIT 8
+#define MSR_AMD64_SNP_PREVENT_HOST_IBS BIT_ULL(MSR_AMD64_SNP_PREVENT_HOST_IBS_BIT)
+#define MSR_AMD64_SNP_BTB_ISOLATION_BIT 9
+#define MSR_AMD64_SNP_BTB_ISOLATION BIT_ULL(MSR_AMD64_SNP_BTB_ISOLATION_BIT)
+#define MSR_AMD64_SNP_VMPL_SSS_BIT 10
+#define MSR_AMD64_SNP_VMPL_SSS BIT_ULL(MSR_AMD64_SNP_VMPL_SSS_BIT)
+#define MSR_AMD64_SNP_SECURE_TSC_BIT 11
+#define MSR_AMD64_SNP_SECURE_TSC BIT_ULL(MSR_AMD64_SNP_SECURE_TSC_BIT)
+#define MSR_AMD64_SNP_VMGEXIT_PARAM_BIT 12
+#define MSR_AMD64_SNP_VMGEXIT_PARAM BIT_ULL(MSR_AMD64_SNP_VMGEXIT_PARAM_BIT)
+#define MSR_AMD64_SNP_RESERVED_BIT13 BIT_ULL(13)
+#define MSR_AMD64_SNP_IBS_VIRT_BIT 14
+#define MSR_AMD64_SNP_IBS_VIRT BIT_ULL(MSR_AMD64_SNP_IBS_VIRT_BIT)
+#define MSR_AMD64_SNP_RESERVED_BIT15 BIT_ULL(15)
+#define MSR_AMD64_SNP_VMSA_REG_PROT_BIT 16
+#define MSR_AMD64_SNP_VMSA_REG_PROT BIT_ULL(MSR_AMD64_SNP_VMSA_REG_PROT_BIT)
+#define MSR_AMD64_SNP_SMT_PROT_BIT 17
+#define MSR_AMD64_SNP_SMT_PROT BIT_ULL(MSR_AMD64_SNP_SMT_PROT_BIT)
+#define MSR_AMD64_SNP_RESV_BIT 18
+#define MSR_AMD64_SNP_RESERVED_MASK GENMASK_ULL(63, MSR_AMD64_SNP_RESV_BIT)
#define MSR_AMD64_VIRT_SPEC_CTRL 0xc001011f
+#define MSR_AMD64_RMP_BASE 0xc0010132
+#define MSR_AMD64_RMP_END 0xc0010133
+
/* AMD Collaborative Processor Performance Control MSRs */
#define MSR_AMD_CPPC_CAP1 0xc00102b0
#define MSR_AMD_CPPC_ENABLE 0xc00102b1
@@ -719,8 +732,15 @@
#define MSR_K8_TOP_MEM1 0xc001001a
#define MSR_K8_TOP_MEM2 0xc001001d
#define MSR_AMD64_SYSCFG 0xc0010010
-#define MSR_AMD64_SYSCFG_MEM_ENCRYPT_BIT 23
+#define MSR_AMD64_SYSCFG_MEM_ENCRYPT_BIT 23
#define MSR_AMD64_SYSCFG_MEM_ENCRYPT BIT_ULL(MSR_AMD64_SYSCFG_MEM_ENCRYPT_BIT)
+#define MSR_AMD64_SYSCFG_SNP_EN_BIT 24
+#define MSR_AMD64_SYSCFG_SNP_EN BIT_ULL(MSR_AMD64_SYSCFG_SNP_EN_BIT)
+#define MSR_AMD64_SYSCFG_SNP_VMPL_EN_BIT 25
+#define MSR_AMD64_SYSCFG_SNP_VMPL_EN BIT_ULL(MSR_AMD64_SYSCFG_SNP_VMPL_EN_BIT)
+#define MSR_AMD64_SYSCFG_MFDM_BIT 19
+#define MSR_AMD64_SYSCFG_MFDM BIT_ULL(MSR_AMD64_SYSCFG_MFDM_BIT)
+
#define MSR_K8_INT_PENDING_MSG 0xc0010055
/* C1E active bits in int pending message */
#define K8_INTP_C1E_ACTIVE_MASK 0x18000000
diff --git a/arch/x86/include/asm/sev.h b/arch/x86/include/asm/sev.h
index 5b4a1ce3d368..f000635d6061 100644
--- a/arch/x86/include/asm/sev.h
+++ b/arch/x86/include/asm/sev.h
@@ -87,9 +87,23 @@ extern bool handle_vc_boot_ghcb(struct pt_regs *regs);
/* Software defined (when rFlags.CF = 1) */
#define PVALIDATE_FAIL_NOUPDATE 255
+/* RMUPDATE detected 4K page and 2MB page overlap. */
+#define RMPUPDATE_FAIL_OVERLAP 4
+
/* RMP page size */
#define RMP_PG_SIZE_4K 0
#define RMP_PG_SIZE_2M 1
+#define RMP_TO_PG_LEVEL(level) (((level) == RMP_PG_SIZE_4K) ? PG_LEVEL_4K : PG_LEVEL_2M)
+#define PG_LEVEL_TO_RMP(level) (((level) == PG_LEVEL_4K) ? RMP_PG_SIZE_4K : RMP_PG_SIZE_2M)
+
+struct rmp_state {
+ u64 gpa;
+ u8 assigned;
+ u8 pagesize;
+ u8 immutable;
+ u8 rsvd;
+ u32 asid;
+} __packed;
#define RMPADJUST_VMSA_PAGE_BIT BIT(16)
@@ -213,6 +227,8 @@ int snp_issue_guest_request(u64 exit_code, struct snp_req_data *input, struct sn
void snp_accept_memory(phys_addr_t start, phys_addr_t end);
u64 snp_get_unsupported_features(u64 status);
u64 sev_get_status(void);
+void kdump_sev_callback(void);
+void sev_show_status(void);
#else
static inline void sev_es_ist_enter(struct pt_regs *regs) { }
static inline void sev_es_ist_exit(void) { }
@@ -241,6 +257,30 @@ static inline int snp_issue_guest_request(u64 exit_code, struct snp_req_data *in
static inline void snp_accept_memory(phys_addr_t start, phys_addr_t end) { }
static inline u64 snp_get_unsupported_features(u64 status) { return 0; }
static inline u64 sev_get_status(void) { return 0; }
+static inline void kdump_sev_callback(void) { }
+static inline void sev_show_status(void) { }
+#endif
+
+#ifdef CONFIG_KVM_AMD_SEV
+bool snp_probe_rmptable_info(void);
+int snp_lookup_rmpentry(u64 pfn, bool *assigned, int *level);
+void snp_dump_hva_rmpentry(unsigned long address);
+int psmash(u64 pfn);
+int rmp_make_private(u64 pfn, u64 gpa, enum pg_level level, u32 asid, bool immutable);
+int rmp_make_shared(u64 pfn, enum pg_level level);
+void snp_leak_pages(u64 pfn, unsigned int npages);
+#else
+static inline bool snp_probe_rmptable_info(void) { return false; }
+static inline int snp_lookup_rmpentry(u64 pfn, bool *assigned, int *level) { return -ENODEV; }
+static inline void snp_dump_hva_rmpentry(unsigned long address) {}
+static inline int psmash(u64 pfn) { return -ENODEV; }
+static inline int rmp_make_private(u64 pfn, u64 gpa, enum pg_level level, u32 asid,
+ bool immutable)
+{
+ return -ENODEV;
+}
+static inline int rmp_make_shared(u64 pfn, enum pg_level level) { return -ENODEV; }
+static inline void snp_leak_pages(u64 pfn, unsigned int npages) {}
#endif
#endif
diff --git a/arch/x86/include/asm/trap_pf.h b/arch/x86/include/asm/trap_pf.h
index afa524325e55..a23a7b707b64 100644
--- a/arch/x86/include/asm/trap_pf.h
+++ b/arch/x86/include/asm/trap_pf.h
@@ -2,6 +2,8 @@
#ifndef _ASM_X86_TRAP_PF_H
#define _ASM_X86_TRAP_PF_H
+#include <linux/bits.h>
+
/*
* Page fault error code bits:
*
@@ -13,16 +15,18 @@
* bit 5 == 1: protection keys block access
* bit 6 == 1: shadow stack access fault
* bit 15 == 1: SGX MMU page-fault
+ * bit 31 == 1: fault was due to RMP violation
*/
enum x86_pf_error_code {
- X86_PF_PROT = 1 << 0,
- X86_PF_WRITE = 1 << 1,
- X86_PF_USER = 1 << 2,
- X86_PF_RSVD = 1 << 3,
- X86_PF_INSTR = 1 << 4,
- X86_PF_PK = 1 << 5,
- X86_PF_SHSTK = 1 << 6,
- X86_PF_SGX = 1 << 15,
+ X86_PF_PROT = BIT(0),
+ X86_PF_WRITE = BIT(1),
+ X86_PF_USER = BIT(2),
+ X86_PF_RSVD = BIT(3),
+ X86_PF_INSTR = BIT(4),
+ X86_PF_PK = BIT(5),
+ X86_PF_SHSTK = BIT(6),
+ X86_PF_SGX = BIT(15),
+ X86_PF_RMP = BIT(31),
};
#endif /* _ASM_X86_TRAP_PF_H */
diff --git a/arch/x86/kernel/Makefile b/arch/x86/kernel/Makefile
index 0dcbfc1a4c41..d0c744cb2a0e 100644
--- a/arch/x86/kernel/Makefile
+++ b/arch/x86/kernel/Makefile
@@ -33,6 +33,7 @@ KASAN_SANITIZE_sev.o := n
KCSAN_SANITIZE := n
KMSAN_SANITIZE_head$(BITS).o := n
KMSAN_SANITIZE_nmi.o := n
+KMSAN_SANITIZE_sev.o := n
# If instrumentation of the following files is enabled, boot hangs during
# first second.
diff --git a/arch/x86/kernel/cpu/amd.c b/arch/x86/kernel/cpu/amd.c
index 9cfd4e99944d..f8ae10222fd0 100644
--- a/arch/x86/kernel/cpu/amd.c
+++ b/arch/x86/kernel/cpu/amd.c
@@ -20,6 +20,7 @@
#include <asm/delay.h>
#include <asm/debugreg.h>
#include <asm/resctrl.h>
+#include <asm/sev.h>
#ifdef CONFIG_X86_64
# include <asm/mmconfig.h>
@@ -451,6 +452,21 @@ static void bsp_init_amd(struct cpuinfo_x86 *c)
break;
}
+ if (cpu_has(c, X86_FEATURE_SEV_SNP)) {
+ /*
+ * RMP table entry format is not architectural and it can vary by processor
+ * and is defined by the per-processor PPR. Restrict SNP support on the
+ * known CPU model and family for which the RMP table entry format is
+ * currently defined for.
+ */
+ if (!boot_cpu_has(X86_FEATURE_ZEN3) &&
+ !boot_cpu_has(X86_FEATURE_ZEN4) &&
+ !boot_cpu_has(X86_FEATURE_ZEN5))
+ setup_clear_cpu_cap(X86_FEATURE_SEV_SNP);
+ else if (!snp_probe_rmptable_info())
+ setup_clear_cpu_cap(X86_FEATURE_SEV_SNP);
+ }
+
return;
warn:
@@ -469,8 +485,8 @@ static void early_detect_mem_encrypt(struct cpuinfo_x86 *c)
* SME feature (set in scattered.c).
* If the kernel has not enabled SME via any means then
* don't advertise the SME feature.
- * For SEV: If BIOS has not enabled SEV then don't advertise the
- * SEV and SEV_ES feature (set in scattered.c).
+ * For SEV: If BIOS has not enabled SEV then don't advertise SEV and
+ * any additional functionality based on it.
*
* In all cases, since support for SME and SEV requires long mode,
* don't advertise the feature under CONFIG_X86_32.
@@ -505,6 +521,7 @@ clear_all:
clear_sev:
setup_clear_cpu_cap(X86_FEATURE_SEV);
setup_clear_cpu_cap(X86_FEATURE_SEV_ES);
+ setup_clear_cpu_cap(X86_FEATURE_SEV_SNP);
}
}
diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c
index c44e6f0c8972..e5d7dcaea209 100644
--- a/arch/x86/kernel/cpu/common.c
+++ b/arch/x86/kernel/cpu/common.c
@@ -1309,8 +1309,13 @@ static void __init cpu_set_bug_bits(struct cpuinfo_x86 *c)
/*
* AMD's AutoIBRS is equivalent to Intel's eIBRS - use the Intel feature
* flag and protect from vendor-specific bugs via the whitelist.
+ *
+ * Don't use AutoIBRS when SNP is enabled because it degrades host
+ * userspace indirect branch performance.
*/
- if ((ia32_cap & ARCH_CAP_IBRS_ALL) || cpu_has(c, X86_FEATURE_AUTOIBRS)) {
+ if ((ia32_cap & ARCH_CAP_IBRS_ALL) ||
+ (cpu_has(c, X86_FEATURE_AUTOIBRS) &&
+ !cpu_feature_enabled(X86_FEATURE_SEV_SNP))) {
setup_force_cpu_cap(X86_FEATURE_IBRS_ENHANCED);
if (!cpu_matches(cpu_vuln_whitelist, NO_EIBRS_PBRSB) &&
!(ia32_cap & ARCH_CAP_PBRSB_NO))
diff --git a/arch/x86/kernel/cpu/mtrr/generic.c b/arch/x86/kernel/cpu/mtrr/generic.c
index d3524778a545..422a4ddc2ab7 100644
--- a/arch/x86/kernel/cpu/mtrr/generic.c
+++ b/arch/x86/kernel/cpu/mtrr/generic.c
@@ -108,6 +108,9 @@ static inline void k8_check_syscfg_dram_mod_en(void)
(boot_cpu_data.x86 >= 0x0f)))
return;
+ if (cpu_feature_enabled(X86_FEATURE_SEV_SNP))
+ return;
+
rdmsr(MSR_AMD64_SYSCFG, lo, hi);
if (lo & K8_MTRRFIXRANGE_DRAM_MODIFY) {
pr_err(FW_WARN "MTRR: CPU %u: SYSCFG[MtrrFixDramModEn]"
diff --git a/arch/x86/kernel/crash.c b/arch/x86/kernel/crash.c
index b6b044356f1b..d184c29398db 100644
--- a/arch/x86/kernel/crash.c
+++ b/arch/x86/kernel/crash.c
@@ -40,6 +40,7 @@
#include <asm/intel_pt.h>
#include <asm/crash.h>
#include <asm/cmdline.h>
+#include <asm/sev.h>
/* Used while preparing memory map entries for second kernel */
struct crash_memmap_data {
@@ -59,6 +60,8 @@ static void kdump_nmi_callback(int cpu, struct pt_regs *regs)
*/
cpu_emergency_stop_pt();
+ kdump_sev_callback();
+
disable_local_APIC();
}
diff --git a/arch/x86/kernel/sev-shared.c b/arch/x86/kernel/sev-shared.c
index 1d24ec679915..ae79f9505298 100644
--- a/arch/x86/kernel/sev-shared.c
+++ b/arch/x86/kernel/sev-shared.c
@@ -10,11 +10,15 @@
*/
#ifndef __BOOT_COMPRESSED
-#define error(v) pr_err(v)
-#define has_cpuflag(f) boot_cpu_has(f)
+#define error(v) pr_err(v)
+#define has_cpuflag(f) boot_cpu_has(f)
+#define sev_printk(fmt, ...) printk(fmt, ##__VA_ARGS__)
+#define sev_printk_rtl(fmt, ...) printk_ratelimited(fmt, ##__VA_ARGS__)
#else
#undef WARN
#define WARN(condition, format...) (!!(condition))
+#define sev_printk(fmt, ...)
+#define sev_printk_rtl(fmt, ...)
#endif
/* I/O parameters for CPUID-related helpers */
@@ -556,9 +560,9 @@ static int snp_cpuid(struct ghcb *ghcb, struct es_em_ctxt *ctxt, struct cpuid_le
leaf->eax = leaf->ebx = leaf->ecx = leaf->edx = 0;
/* Skip post-processing for out-of-range zero leafs. */
- if (!(leaf->fn <= cpuid_std_range_max ||
- (leaf->fn >= 0x40000000 && leaf->fn <= cpuid_hyp_range_max) ||
- (leaf->fn >= 0x80000000 && leaf->fn <= cpuid_ext_range_max)))
+ if (!(leaf->fn <= RIP_REL_REF(cpuid_std_range_max) ||
+ (leaf->fn >= 0x40000000 && leaf->fn <= RIP_REL_REF(cpuid_hyp_range_max)) ||
+ (leaf->fn >= 0x80000000 && leaf->fn <= RIP_REL_REF(cpuid_ext_range_max))))
return 0;
}
@@ -574,6 +578,7 @@ void __init do_vc_no_ghcb(struct pt_regs *regs, unsigned long exit_code)
{
unsigned int subfn = lower_bits(regs->cx, 32);
unsigned int fn = lower_bits(regs->ax, 32);
+ u16 opcode = *(unsigned short *)regs->ip;
struct cpuid_leaf leaf;
int ret;
@@ -581,6 +586,10 @@ void __init do_vc_no_ghcb(struct pt_regs *regs, unsigned long exit_code)
if (exit_code != SVM_EXIT_CPUID)
goto fail;
+ /* Is it really a CPUID insn? */
+ if (opcode != 0xa20f)
+ goto fail;
+
leaf.fn = fn;
leaf.subfn = subfn;
@@ -1063,11 +1072,11 @@ static void __init setup_cpuid_table(const struct cc_blob_sev_info *cc_info)
const struct snp_cpuid_fn *fn = &cpuid_table->fn[i];
if (fn->eax_in == 0x0)
- cpuid_std_range_max = fn->eax;
+ RIP_REL_REF(cpuid_std_range_max) = fn->eax;
else if (fn->eax_in == 0x40000000)
- cpuid_hyp_range_max = fn->eax;
+ RIP_REL_REF(cpuid_hyp_range_max) = fn->eax;
else if (fn->eax_in == 0x80000000)
- cpuid_ext_range_max = fn->eax;
+ RIP_REL_REF(cpuid_ext_range_max) = fn->eax;
}
}
@@ -1170,3 +1179,92 @@ static int vmgexit_psc(struct ghcb *ghcb, struct snp_psc_desc *desc)
out:
return ret;
}
+
+static enum es_result vc_check_opcode_bytes(struct es_em_ctxt *ctxt,
+ unsigned long exit_code)
+{
+ unsigned int opcode = (unsigned int)ctxt->insn.opcode.value;
+ u8 modrm = ctxt->insn.modrm.value;
+
+ switch (exit_code) {
+
+ case SVM_EXIT_IOIO:
+ case SVM_EXIT_NPF:
+ /* handled separately */
+ return ES_OK;
+
+ case SVM_EXIT_CPUID:
+ if (opcode == 0xa20f)
+ return ES_OK;
+ break;
+
+ case SVM_EXIT_INVD:
+ if (opcode == 0x080f)
+ return ES_OK;
+ break;
+
+ case SVM_EXIT_MONITOR:
+ if (opcode == 0x010f && modrm == 0xc8)
+ return ES_OK;
+ break;
+
+ case SVM_EXIT_MWAIT:
+ if (opcode == 0x010f && modrm == 0xc9)
+ return ES_OK;
+ break;
+
+ case SVM_EXIT_MSR:
+ /* RDMSR */
+ if (opcode == 0x320f ||
+ /* WRMSR */
+ opcode == 0x300f)
+ return ES_OK;
+ break;
+
+ case SVM_EXIT_RDPMC:
+ if (opcode == 0x330f)
+ return ES_OK;
+ break;
+
+ case SVM_EXIT_RDTSC:
+ if (opcode == 0x310f)
+ return ES_OK;
+ break;
+
+ case SVM_EXIT_RDTSCP:
+ if (opcode == 0x010f && modrm == 0xf9)
+ return ES_OK;
+ break;
+
+ case SVM_EXIT_READ_DR7:
+ if (opcode == 0x210f &&
+ X86_MODRM_REG(ctxt->insn.modrm.value) == 7)
+ return ES_OK;
+ break;
+
+ case SVM_EXIT_VMMCALL:
+ if (opcode == 0x010f && modrm == 0xd9)
+ return ES_OK;
+
+ break;
+
+ case SVM_EXIT_WRITE_DR7:
+ if (opcode == 0x230f &&
+ X86_MODRM_REG(ctxt->insn.modrm.value) == 7)
+ return ES_OK;
+ break;
+
+ case SVM_EXIT_WBINVD:
+ if (opcode == 0x90f)
+ return ES_OK;
+ break;
+
+ default:
+ break;
+ }
+
+ sev_printk(KERN_ERR "Wrong/unhandled opcode bytes: 0x%x, exit_code: 0x%lx, rIP: 0x%lx\n",
+ opcode, exit_code, ctxt->regs->ip);
+
+ return ES_UNSUPPORTED;
+}
diff --git a/arch/x86/kernel/sev.c b/arch/x86/kernel/sev.c
index c67285824e82..7d242898852f 100644
--- a/arch/x86/kernel/sev.c
+++ b/arch/x86/kernel/sev.c
@@ -59,6 +59,25 @@
#define AP_INIT_CR0_DEFAULT 0x60000010
#define AP_INIT_MXCSR_DEFAULT 0x1f80
+static const char * const sev_status_feat_names[] = {
+ [MSR_AMD64_SEV_ENABLED_BIT] = "SEV",
+ [MSR_AMD64_SEV_ES_ENABLED_BIT] = "SEV-ES",
+ [MSR_AMD64_SEV_SNP_ENABLED_BIT] = "SEV-SNP",
+ [MSR_AMD64_SNP_VTOM_BIT] = "vTom",
+ [MSR_AMD64_SNP_REFLECT_VC_BIT] = "ReflectVC",
+ [MSR_AMD64_SNP_RESTRICTED_INJ_BIT] = "RI",
+ [MSR_AMD64_SNP_ALT_INJ_BIT] = "AI",
+ [MSR_AMD64_SNP_DEBUG_SWAP_BIT] = "DebugSwap",
+ [MSR_AMD64_SNP_PREVENT_HOST_IBS_BIT] = "NoHostIBS",
+ [MSR_AMD64_SNP_BTB_ISOLATION_BIT] = "BTBIsol",
+ [MSR_AMD64_SNP_VMPL_SSS_BIT] = "VmplSSS",
+ [MSR_AMD64_SNP_SECURE_TSC_BIT] = "SecureTSC",
+ [MSR_AMD64_SNP_VMGEXIT_PARAM_BIT] = "VMGExitParam",
+ [MSR_AMD64_SNP_IBS_VIRT_BIT] = "IBSVirt",
+ [MSR_AMD64_SNP_VMSA_REG_PROT_BIT] = "VMSARegProt",
+ [MSR_AMD64_SNP_SMT_PROT_BIT] = "SMTProt",
+};
+
/* For early boot hypervisor communication in SEV-ES enabled guests */
static struct ghcb boot_ghcb_page __bss_decrypted __aligned(PAGE_SIZE);
@@ -748,7 +767,7 @@ void __init early_snp_set_memory_private(unsigned long vaddr, unsigned long padd
* This eliminates worries about jump tables or checking boot_cpu_data
* in the cc_platform_has() function.
*/
- if (!(sev_status & MSR_AMD64_SEV_SNP_ENABLED))
+ if (!(RIP_REL_REF(sev_status) & MSR_AMD64_SEV_SNP_ENABLED))
return;
/*
@@ -767,7 +786,7 @@ void __init early_snp_set_memory_shared(unsigned long vaddr, unsigned long paddr
* This eliminates worries about jump tables or checking boot_cpu_data
* in the cc_platform_has() function.
*/
- if (!(sev_status & MSR_AMD64_SEV_SNP_ENABLED))
+ if (!(RIP_REL_REF(sev_status) & MSR_AMD64_SEV_SNP_ENABLED))
return;
/* Ask hypervisor to mark the memory pages shared in the RMP table. */
@@ -1752,7 +1771,10 @@ static enum es_result vc_handle_exitcode(struct es_em_ctxt *ctxt,
struct ghcb *ghcb,
unsigned long exit_code)
{
- enum es_result result;
+ enum es_result result = vc_check_opcode_bytes(ctxt, exit_code);
+
+ if (result != ES_OK)
+ return result;
switch (exit_code) {
case SVM_EXIT_READ_DR7:
@@ -2262,3 +2284,29 @@ static int __init snp_init_platform_device(void)
return 0;
}
device_initcall(snp_init_platform_device);
+
+void kdump_sev_callback(void)
+{
+ /*
+ * Do wbinvd() on remote CPUs when SNP is enabled in order to
+ * safely do SNP_SHUTDOWN on the local CPU.
+ */
+ if (cpu_feature_enabled(X86_FEATURE_SEV_SNP))
+ wbinvd();
+}
+
+void sev_show_status(void)
+{
+ int i;
+
+ pr_info("Status: ");
+ for (i = 0; i < MSR_AMD64_SNP_RESV_BIT; i++) {
+ if (sev_status & BIT_ULL(i)) {
+ if (!sev_status_feat_names[i])
+ continue;
+
+ pr_cont("%s ", sev_status_feat_names[i]);
+ }
+ }
+ pr_cont("\n");
+}
diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c
index 3242f3da2457..1edf93ee3395 100644
--- a/arch/x86/kvm/lapic.c
+++ b/arch/x86/kvm/lapic.c
@@ -2815,7 +2815,10 @@ int kvm_create_lapic(struct kvm_vcpu *vcpu, int timer_advance_ns)
vcpu->arch.apic = apic;
- apic->regs = (void *)get_zeroed_page(GFP_KERNEL_ACCOUNT);
+ if (kvm_x86_ops.alloc_apic_backing_page)
+ apic->regs = static_call(kvm_x86_alloc_apic_backing_page)(vcpu);
+ else
+ apic->regs = (void *)get_zeroed_page(GFP_KERNEL_ACCOUNT);
if (!apic->regs) {
printk(KERN_ERR "malloc apic regs error for vcpu %x\n",
vcpu->vcpu_id);
diff --git a/arch/x86/kvm/svm/nested.c b/arch/x86/kvm/svm/nested.c
index dee62362a360..55b9a6d96bcf 100644
--- a/arch/x86/kvm/svm/nested.c
+++ b/arch/x86/kvm/svm/nested.c
@@ -1181,7 +1181,7 @@ int svm_allocate_nested(struct vcpu_svm *svm)
if (svm->nested.initialized)
return 0;
- vmcb02_page = alloc_page(GFP_KERNEL_ACCOUNT | __GFP_ZERO);
+ vmcb02_page = snp_safe_alloc_page(&svm->vcpu);
if (!vmcb02_page)
return -ENOMEM;
svm->nested.vmcb02.ptr = page_address(vmcb02_page);
diff --git a/arch/x86/kvm/svm/sev.c b/arch/x86/kvm/svm/sev.c
index a8ce5226b3b5..ae0ac12382b9 100644
--- a/arch/x86/kvm/svm/sev.c
+++ b/arch/x86/kvm/svm/sev.c
@@ -246,6 +246,7 @@ static void sev_unbind_asid(struct kvm *kvm, unsigned int handle)
static int sev_guest_init(struct kvm *kvm, struct kvm_sev_cmd *argp)
{
struct kvm_sev_info *sev = &to_kvm_svm(kvm)->sev_info;
+ struct sev_platform_init_args init_args = {0};
int asid, ret;
if (kvm->created_vcpus)
@@ -262,7 +263,8 @@ static int sev_guest_init(struct kvm *kvm, struct kvm_sev_cmd *argp)
goto e_no_asid;
sev->asid = asid;
- ret = sev_platform_init(&argp->error);
+ init_args.probe = false;
+ ret = sev_platform_init(&init_args);
if (ret)
goto e_free;
@@ -274,6 +276,7 @@ static int sev_guest_init(struct kvm *kvm, struct kvm_sev_cmd *argp)
return 0;
e_free:
+ argp->error = init_args.error;
sev_asid_free(sev);
sev->asid = 0;
e_no_asid:
@@ -3165,3 +3168,35 @@ void sev_vcpu_deliver_sipi_vector(struct kvm_vcpu *vcpu, u8 vector)
ghcb_set_sw_exit_info_2(svm->sev_es.ghcb, 1);
}
+
+struct page *snp_safe_alloc_page(struct kvm_vcpu *vcpu)
+{
+ unsigned long pfn;
+ struct page *p;
+
+ if (!cpu_feature_enabled(X86_FEATURE_SEV_SNP))
+ return alloc_page(GFP_KERNEL_ACCOUNT | __GFP_ZERO);
+
+ /*
+ * Allocate an SNP-safe page to workaround the SNP erratum where
+ * the CPU will incorrectly signal an RMP violation #PF if a
+ * hugepage (2MB or 1GB) collides with the RMP entry of a
+ * 2MB-aligned VMCB, VMSA, or AVIC backing page.
+ *
+ * Allocate one extra page, choose a page which is not
+ * 2MB-aligned, and free the other.
+ */
+ p = alloc_pages(GFP_KERNEL_ACCOUNT | __GFP_ZERO, 1);
+ if (!p)
+ return NULL;
+
+ split_page(p, 1);
+
+ pfn = page_to_pfn(p);
+ if (IS_ALIGNED(pfn, PTRS_PER_PMD))
+ __free_page(p++);
+ else
+ __free_page(p + 1);
+
+ return p;
+}
diff --git a/arch/x86/kvm/svm/svm.c b/arch/x86/kvm/svm/svm.c
index e90b429c84f1..8284105bf704 100644
--- a/arch/x86/kvm/svm/svm.c
+++ b/arch/x86/kvm/svm/svm.c
@@ -703,7 +703,7 @@ static int svm_cpu_init(int cpu)
int ret = -ENOMEM;
memset(sd, 0, sizeof(struct svm_cpu_data));
- sd->save_area = alloc_page(GFP_KERNEL | __GFP_ZERO);
+ sd->save_area = snp_safe_alloc_page(NULL);
if (!sd->save_area)
return ret;
@@ -1421,7 +1421,7 @@ static int svm_vcpu_create(struct kvm_vcpu *vcpu)
svm = to_svm(vcpu);
err = -ENOMEM;
- vmcb01_page = alloc_page(GFP_KERNEL_ACCOUNT | __GFP_ZERO);
+ vmcb01_page = snp_safe_alloc_page(vcpu);
if (!vmcb01_page)
goto out;
@@ -1430,7 +1430,7 @@ static int svm_vcpu_create(struct kvm_vcpu *vcpu)
* SEV-ES guests require a separate VMSA page used to contain
* the encrypted register state of the guest.
*/
- vmsa_page = alloc_page(GFP_KERNEL_ACCOUNT | __GFP_ZERO);
+ vmsa_page = snp_safe_alloc_page(vcpu);
if (!vmsa_page)
goto error_free_vmcb_page;
@@ -4900,6 +4900,16 @@ static int svm_vm_init(struct kvm *kvm)
return 0;
}
+static void *svm_alloc_apic_backing_page(struct kvm_vcpu *vcpu)
+{
+ struct page *page = snp_safe_alloc_page(vcpu);
+
+ if (!page)
+ return NULL;
+
+ return page_address(page);
+}
+
static struct kvm_x86_ops svm_x86_ops __initdata = {
.name = KBUILD_MODNAME,
@@ -5031,6 +5041,7 @@ static struct kvm_x86_ops svm_x86_ops __initdata = {
.vcpu_deliver_sipi_vector = svm_vcpu_deliver_sipi_vector,
.vcpu_get_apicv_inhibit_reasons = avic_vcpu_get_apicv_inhibit_reasons,
+ .alloc_apic_backing_page = svm_alloc_apic_backing_page,
};
/*
diff --git a/arch/x86/kvm/svm/svm.h b/arch/x86/kvm/svm/svm.h
index 8ef95139cd24..7f1fbd874c45 100644
--- a/arch/x86/kvm/svm/svm.h
+++ b/arch/x86/kvm/svm/svm.h
@@ -694,6 +694,7 @@ void sev_es_vcpu_reset(struct vcpu_svm *svm);
void sev_vcpu_deliver_sipi_vector(struct kvm_vcpu *vcpu, u8 vector);
void sev_es_prepare_switch_to_guest(struct sev_es_save_area *hostsa);
void sev_es_unmap_ghcb(struct vcpu_svm *svm);
+struct page *snp_safe_alloc_page(struct kvm_vcpu *vcpu);
/* vmenter.S */
diff --git a/arch/x86/mm/Makefile b/arch/x86/mm/Makefile
index c80febc44cd2..6ec103bedcf1 100644
--- a/arch/x86/mm/Makefile
+++ b/arch/x86/mm/Makefile
@@ -16,6 +16,7 @@ KASAN_SANITIZE_pgprot.o := n
KCSAN_SANITIZE := n
# Avoid recursion by not calling KMSAN hooks for CEA code.
KMSAN_SANITIZE_cpu_entry_area.o := n
+KMSAN_SANITIZE_mem_encrypt_identity.o := n
ifdef CONFIG_FUNCTION_TRACER
CFLAGS_REMOVE_mem_encrypt.o = -pg
diff --git a/arch/x86/mm/fault.c b/arch/x86/mm/fault.c
index e1ac86396446..cdb5045a0428 100644
--- a/arch/x86/mm/fault.c
+++ b/arch/x86/mm/fault.c
@@ -35,6 +35,7 @@
#include <asm/vdso.h> /* fixup_vdso_exception() */
#include <asm/irq_stack.h>
#include <asm/fred.h>
+#include <asm/sev.h> /* snp_dump_hva_rmpentry() */
#define CREATE_TRACE_POINTS
#include <asm/trace/exceptions.h>
@@ -548,6 +549,7 @@ show_fault_oops(struct pt_regs *regs, unsigned long error_code, unsigned long ad
!(error_code & X86_PF_PROT) ? "not-present page" :
(error_code & X86_PF_RSVD) ? "reserved bit violation" :
(error_code & X86_PF_PK) ? "protection keys violation" :
+ (error_code & X86_PF_RMP) ? "RMP violation" :
"permissions violation");
if (!(error_code & X86_PF_USER) && user_mode(regs)) {
@@ -580,6 +582,9 @@ show_fault_oops(struct pt_regs *regs, unsigned long error_code, unsigned long ad
}
dump_pagetable(address);
+
+ if (error_code & X86_PF_RMP)
+ snp_dump_hva_rmpentry(address);
}
static noinline void
diff --git a/arch/x86/mm/mem_encrypt.c b/arch/x86/mm/mem_encrypt.c
index c290c55b632b..6f3b3e028718 100644
--- a/arch/x86/mm/mem_encrypt.c
+++ b/arch/x86/mm/mem_encrypt.c
@@ -14,6 +14,8 @@
#include <linux/mem_encrypt.h>
#include <linux/virtio_anchor.h>
+#include <asm/sev.h>
+
/* Override for DMA direct allocation check - ARCH_HAS_FORCE_DMA_UNENCRYPTED */
bool force_dma_unencrypted(struct device *dev)
{
@@ -42,38 +44,45 @@ bool force_dma_unencrypted(struct device *dev)
static void print_mem_encrypt_feature_info(void)
{
- pr_info("Memory Encryption Features active:");
-
- if (cpu_feature_enabled(X86_FEATURE_TDX_GUEST)) {
- pr_cont(" Intel TDX\n");
- return;
- }
+ pr_info("Memory Encryption Features active: ");
- pr_cont(" AMD");
+ switch (cc_vendor) {
+ case CC_VENDOR_INTEL:
+ pr_cont("Intel TDX\n");
+ break;
+ case CC_VENDOR_AMD:
+ pr_cont("AMD");
- /* Secure Memory Encryption */
- if (cc_platform_has(CC_ATTR_HOST_MEM_ENCRYPT)) {
+ /* Secure Memory Encryption */
+ if (cc_platform_has(CC_ATTR_HOST_MEM_ENCRYPT)) {
/*
* SME is mutually exclusive with any of the SEV
* features below.
- */
- pr_cont(" SME\n");
- return;
- }
+ */
+ pr_cont(" SME\n");
+ return;
+ }
- /* Secure Encrypted Virtualization */
- if (cc_platform_has(CC_ATTR_GUEST_MEM_ENCRYPT))
- pr_cont(" SEV");
+ /* Secure Encrypted Virtualization */
+ if (cc_platform_has(CC_ATTR_GUEST_MEM_ENCRYPT))
+ pr_cont(" SEV");
+
+ /* Encrypted Register State */
+ if (cc_platform_has(CC_ATTR_GUEST_STATE_ENCRYPT))
+ pr_cont(" SEV-ES");
- /* Encrypted Register State */
- if (cc_platform_has(CC_ATTR_GUEST_STATE_ENCRYPT))
- pr_cont(" SEV-ES");
+ /* Secure Nested Paging */
+ if (cc_platform_has(CC_ATTR_GUEST_SEV_SNP))
+ pr_cont(" SEV-SNP");
- /* Secure Nested Paging */
- if (cc_platform_has(CC_ATTR_GUEST_SEV_SNP))
- pr_cont(" SEV-SNP");
+ pr_cont("\n");
- pr_cont("\n");
+ sev_show_status();
+
+ break;
+ default:
+ pr_cont("Unknown\n");
+ }
}
/* Architecture __weak replacement functions */
diff --git a/arch/x86/mm/mem_encrypt_identity.c b/arch/x86/mm/mem_encrypt_identity.c
index d73aeb16417f..0166ab1780cc 100644
--- a/arch/x86/mm/mem_encrypt_identity.c
+++ b/arch/x86/mm/mem_encrypt_identity.c
@@ -97,7 +97,6 @@ static char sme_workarea[2 * PMD_SIZE] __section(".init.scratch");
static char sme_cmdline_arg[] __initdata = "mem_encrypt";
static char sme_cmdline_on[] __initdata = "on";
-static char sme_cmdline_off[] __initdata = "off";
static void __init sme_clear_pgd(struct sme_populate_pgd_data *ppd)
{
@@ -305,7 +304,8 @@ void __init sme_encrypt_kernel(struct boot_params *bp)
* instrumentation or checking boot_cpu_data in the cc_platform_has()
* function.
*/
- if (!sme_get_me_mask() || sev_status & MSR_AMD64_SEV_ENABLED)
+ if (!sme_get_me_mask() ||
+ RIP_REL_REF(sev_status) & MSR_AMD64_SEV_ENABLED)
return;
/*
@@ -504,10 +504,9 @@ void __init sme_encrypt_kernel(struct boot_params *bp)
void __init sme_enable(struct boot_params *bp)
{
- const char *cmdline_ptr, *cmdline_arg, *cmdline_on, *cmdline_off;
+ const char *cmdline_ptr, *cmdline_arg, *cmdline_on;
unsigned int eax, ebx, ecx, edx;
unsigned long feature_mask;
- bool active_by_default;
unsigned long me_mask;
char buffer[16];
bool snp;
@@ -543,11 +542,11 @@ void __init sme_enable(struct boot_params *bp)
me_mask = 1UL << (ebx & 0x3f);
/* Check the SEV MSR whether SEV or SME is enabled */
- sev_status = __rdmsr(MSR_AMD64_SEV);
- feature_mask = (sev_status & MSR_AMD64_SEV_ENABLED) ? AMD_SEV_BIT : AMD_SME_BIT;
+ RIP_REL_REF(sev_status) = msr = __rdmsr(MSR_AMD64_SEV);
+ feature_mask = (msr & MSR_AMD64_SEV_ENABLED) ? AMD_SEV_BIT : AMD_SME_BIT;
/* The SEV-SNP CC blob should never be present unless SEV-SNP is enabled. */
- if (snp && !(sev_status & MSR_AMD64_SEV_SNP_ENABLED))
+ if (snp && !(msr & MSR_AMD64_SEV_SNP_ENABLED))
snp_abort();
/* Check if memory encryption is enabled */
@@ -573,7 +572,6 @@ void __init sme_enable(struct boot_params *bp)
return;
} else {
/* SEV state cannot be controlled by a command line option */
- sme_me_mask = me_mask;
goto out;
}
@@ -588,31 +586,17 @@ void __init sme_enable(struct boot_params *bp)
asm ("lea sme_cmdline_on(%%rip), %0"
: "=r" (cmdline_on)
: "p" (sme_cmdline_on));
- asm ("lea sme_cmdline_off(%%rip), %0"
- : "=r" (cmdline_off)
- : "p" (sme_cmdline_off));
-
- if (IS_ENABLED(CONFIG_AMD_MEM_ENCRYPT_ACTIVE_BY_DEFAULT))
- active_by_default = true;
- else
- active_by_default = false;
cmdline_ptr = (const char *)((u64)bp->hdr.cmd_line_ptr |
((u64)bp->ext_cmd_line_ptr << 32));
- if (cmdline_find_option(cmdline_ptr, cmdline_arg, buffer, sizeof(buffer)) < 0)
+ if (cmdline_find_option(cmdline_ptr, cmdline_arg, buffer, sizeof(buffer)) < 0 ||
+ strncmp(buffer, cmdline_on, sizeof(buffer)))
return;
- if (!strncmp(buffer, cmdline_on, sizeof(buffer)))
- sme_me_mask = me_mask;
- else if (!strncmp(buffer, cmdline_off, sizeof(buffer)))
- sme_me_mask = 0;
- else
- sme_me_mask = active_by_default ? me_mask : 0;
out:
- if (sme_me_mask) {
- physical_mask &= ~sme_me_mask;
- cc_vendor = CC_VENDOR_AMD;
- cc_set_mask(sme_me_mask);
- }
+ RIP_REL_REF(sme_me_mask) = me_mask;
+ physical_mask &= ~me_mask;
+ cc_vendor = CC_VENDOR_AMD;
+ cc_set_mask(me_mask);
}
diff --git a/arch/x86/virt/svm/Makefile b/arch/x86/virt/svm/Makefile
new file mode 100644
index 000000000000..ef2a31bdcc70
--- /dev/null
+++ b/arch/x86/virt/svm/Makefile
@@ -0,0 +1,3 @@
+# SPDX-License-Identifier: GPL-2.0
+
+obj-$(CONFIG_KVM_AMD_SEV) += sev.o
diff --git a/arch/x86/virt/svm/sev.c b/arch/x86/virt/svm/sev.c
new file mode 100644
index 000000000000..cffe1157a90a
--- /dev/null
+++ b/arch/x86/virt/svm/sev.c
@@ -0,0 +1,560 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * AMD SVM-SEV Host Support.
+ *
+ * Copyright (C) 2023 Advanced Micro Devices, Inc.
+ *
+ * Author: Ashish Kalra <ashish.kalra@amd.com>
+ *
+ */
+
+#include <linux/cc_platform.h>
+#include <linux/printk.h>
+#include <linux/mm_types.h>
+#include <linux/set_memory.h>
+#include <linux/memblock.h>
+#include <linux/kernel.h>
+#include <linux/mm.h>
+#include <linux/cpumask.h>
+#include <linux/iommu.h>
+#include <linux/amd-iommu.h>
+
+#include <asm/sev.h>
+#include <asm/processor.h>
+#include <asm/setup.h>
+#include <asm/svm.h>
+#include <asm/smp.h>
+#include <asm/cpu.h>
+#include <asm/apic.h>
+#include <asm/cpuid.h>
+#include <asm/cmdline.h>
+#include <asm/iommu.h>
+
+/*
+ * The RMP entry format is not architectural. The format is defined in PPR
+ * Family 19h Model 01h, Rev B1 processor.
+ */
+struct rmpentry {
+ union {
+ struct {
+ u64 assigned : 1,
+ pagesize : 1,
+ immutable : 1,
+ rsvd1 : 9,
+ gpa : 39,
+ asid : 10,
+ vmsa : 1,
+ validated : 1,
+ rsvd2 : 1;
+ };
+ u64 lo;
+ };
+ u64 hi;
+} __packed;
+
+/*
+ * The first 16KB from the RMP_BASE is used by the processor for the
+ * bookkeeping, the range needs to be added during the RMP entry lookup.
+ */
+#define RMPTABLE_CPU_BOOKKEEPING_SZ 0x4000
+
+/* Mask to apply to a PFN to get the first PFN of a 2MB page */
+#define PFN_PMD_MASK GENMASK_ULL(63, PMD_SHIFT - PAGE_SHIFT)
+
+static u64 probed_rmp_base, probed_rmp_size;
+static struct rmpentry *rmptable __ro_after_init;
+static u64 rmptable_max_pfn __ro_after_init;
+
+static LIST_HEAD(snp_leaked_pages_list);
+static DEFINE_SPINLOCK(snp_leaked_pages_list_lock);
+
+static unsigned long snp_nr_leaked_pages;
+
+#undef pr_fmt
+#define pr_fmt(fmt) "SEV-SNP: " fmt
+
+static int __mfd_enable(unsigned int cpu)
+{
+ u64 val;
+
+ if (!cpu_feature_enabled(X86_FEATURE_SEV_SNP))
+ return 0;
+
+ rdmsrl(MSR_AMD64_SYSCFG, val);
+
+ val |= MSR_AMD64_SYSCFG_MFDM;
+
+ wrmsrl(MSR_AMD64_SYSCFG, val);
+
+ return 0;
+}
+
+static __init void mfd_enable(void *arg)
+{
+ __mfd_enable(smp_processor_id());
+}
+
+static int __snp_enable(unsigned int cpu)
+{
+ u64 val;
+
+ if (!cpu_feature_enabled(X86_FEATURE_SEV_SNP))
+ return 0;
+
+ rdmsrl(MSR_AMD64_SYSCFG, val);
+
+ val |= MSR_AMD64_SYSCFG_SNP_EN;
+ val |= MSR_AMD64_SYSCFG_SNP_VMPL_EN;
+
+ wrmsrl(MSR_AMD64_SYSCFG, val);
+
+ return 0;
+}
+
+static __init void snp_enable(void *arg)
+{
+ __snp_enable(smp_processor_id());
+}
+
+#define RMP_ADDR_MASK GENMASK_ULL(51, 13)
+
+bool snp_probe_rmptable_info(void)
+{
+ u64 max_rmp_pfn, calc_rmp_sz, rmp_sz, rmp_base, rmp_end;
+
+ rdmsrl(MSR_AMD64_RMP_BASE, rmp_base);
+ rdmsrl(MSR_AMD64_RMP_END, rmp_end);
+
+ if (!(rmp_base & RMP_ADDR_MASK) || !(rmp_end & RMP_ADDR_MASK)) {
+ pr_err("Memory for the RMP table has not been reserved by BIOS\n");
+ return false;
+ }
+
+ if (rmp_base > rmp_end) {
+ pr_err("RMP configuration not valid: base=%#llx, end=%#llx\n", rmp_base, rmp_end);
+ return false;
+ }
+
+ rmp_sz = rmp_end - rmp_base + 1;
+
+ /*
+ * Calculate the amount the memory that must be reserved by the BIOS to
+ * address the whole RAM, including the bookkeeping area. The RMP itself
+ * must also be covered.
+ */
+ max_rmp_pfn = max_pfn;
+ if (PHYS_PFN(rmp_end) > max_pfn)
+ max_rmp_pfn = PHYS_PFN(rmp_end);
+
+ calc_rmp_sz = (max_rmp_pfn << 4) + RMPTABLE_CPU_BOOKKEEPING_SZ;
+
+ if (calc_rmp_sz > rmp_sz) {
+ pr_err("Memory reserved for the RMP table does not cover full system RAM (expected 0x%llx got 0x%llx)\n",
+ calc_rmp_sz, rmp_sz);
+ return false;
+ }
+
+ probed_rmp_base = rmp_base;
+ probed_rmp_size = rmp_sz;
+
+ pr_info("RMP table physical range [0x%016llx - 0x%016llx]\n",
+ probed_rmp_base, probed_rmp_base + probed_rmp_size - 1);
+
+ return true;
+}
+
+/*
+ * Do the necessary preparations which are verified by the firmware as
+ * described in the SNP_INIT_EX firmware command description in the SNP
+ * firmware ABI spec.
+ */
+static int __init snp_rmptable_init(void)
+{
+ void *rmptable_start;
+ u64 rmptable_size;
+ u64 val;
+
+ if (!cpu_feature_enabled(X86_FEATURE_SEV_SNP))
+ return 0;
+
+ if (!amd_iommu_snp_en)
+ return 0;
+
+ if (!probed_rmp_size)
+ goto nosnp;
+
+ rmptable_start = memremap(probed_rmp_base, probed_rmp_size, MEMREMAP_WB);
+ if (!rmptable_start) {
+ pr_err("Failed to map RMP table\n");
+ return 1;
+ }
+
+ /*
+ * Check if SEV-SNP is already enabled, this can happen in case of
+ * kexec boot.
+ */
+ rdmsrl(MSR_AMD64_SYSCFG, val);
+ if (val & MSR_AMD64_SYSCFG_SNP_EN)
+ goto skip_enable;
+
+ memset(rmptable_start, 0, probed_rmp_size);
+
+ /* Flush the caches to ensure that data is written before SNP is enabled. */
+ wbinvd_on_all_cpus();
+
+ /* MtrrFixDramModEn must be enabled on all the CPUs prior to enabling SNP. */
+ on_each_cpu(mfd_enable, NULL, 1);
+
+ on_each_cpu(snp_enable, NULL, 1);
+
+skip_enable:
+ rmptable_start += RMPTABLE_CPU_BOOKKEEPING_SZ;
+ rmptable_size = probed_rmp_size - RMPTABLE_CPU_BOOKKEEPING_SZ;
+
+ rmptable = (struct rmpentry *)rmptable_start;
+ rmptable_max_pfn = rmptable_size / sizeof(struct rmpentry) - 1;
+
+ cpuhp_setup_state(CPUHP_AP_ONLINE_DYN, "x86/rmptable_init:online", __snp_enable, NULL);
+
+ /*
+ * Setting crash_kexec_post_notifiers to 'true' to ensure that SNP panic
+ * notifier is invoked to do SNP IOMMU shutdown before kdump.
+ */
+ crash_kexec_post_notifiers = true;
+
+ return 0;
+
+nosnp:
+ setup_clear_cpu_cap(X86_FEATURE_SEV_SNP);
+ return -ENOSYS;
+}
+
+/*
+ * This must be called after the IOMMU has been initialized.
+ */
+device_initcall(snp_rmptable_init);
+
+static struct rmpentry *get_rmpentry(u64 pfn)
+{
+ if (WARN_ON_ONCE(pfn > rmptable_max_pfn))
+ return ERR_PTR(-EFAULT);
+
+ return &rmptable[pfn];
+}
+
+static struct rmpentry *__snp_lookup_rmpentry(u64 pfn, int *level)
+{
+ struct rmpentry *large_entry, *entry;
+
+ if (!cpu_feature_enabled(X86_FEATURE_SEV_SNP))
+ return ERR_PTR(-ENODEV);
+
+ entry = get_rmpentry(pfn);
+ if (IS_ERR(entry))
+ return entry;
+
+ /*
+ * Find the authoritative RMP entry for a PFN. This can be either a 4K
+ * RMP entry or a special large RMP entry that is authoritative for a
+ * whole 2M area.
+ */
+ large_entry = get_rmpentry(pfn & PFN_PMD_MASK);
+ if (IS_ERR(large_entry))
+ return large_entry;
+
+ *level = RMP_TO_PG_LEVEL(large_entry->pagesize);
+
+ return entry;
+}
+
+int snp_lookup_rmpentry(u64 pfn, bool *assigned, int *level)
+{
+ struct rmpentry *e;
+
+ e = __snp_lookup_rmpentry(pfn, level);
+ if (IS_ERR(e))
+ return PTR_ERR(e);
+
+ *assigned = !!e->assigned;
+ return 0;
+}
+EXPORT_SYMBOL_GPL(snp_lookup_rmpentry);
+
+/*
+ * Dump the raw RMP entry for a particular PFN. These bits are documented in the
+ * PPR for a particular CPU model and provide useful information about how a
+ * particular PFN is being utilized by the kernel/firmware at the time certain
+ * unexpected events occur, such as RMP faults.
+ */
+static void dump_rmpentry(u64 pfn)
+{
+ u64 pfn_i, pfn_end;
+ struct rmpentry *e;
+ int level;
+
+ e = __snp_lookup_rmpentry(pfn, &level);
+ if (IS_ERR(e)) {
+ pr_err("Failed to read RMP entry for PFN 0x%llx, error %ld\n",
+ pfn, PTR_ERR(e));
+ return;
+ }
+
+ if (e->assigned) {
+ pr_info("PFN 0x%llx, RMP entry: [0x%016llx - 0x%016llx]\n",
+ pfn, e->lo, e->hi);
+ return;
+ }
+
+ /*
+ * If the RMP entry for a particular PFN is not in an assigned state,
+ * then it is sometimes useful to get an idea of whether or not any RMP
+ * entries for other PFNs within the same 2MB region are assigned, since
+ * those too can affect the ability to access a particular PFN in
+ * certain situations, such as when the PFN is being accessed via a 2MB
+ * mapping in the host page table.
+ */
+ pfn_i = ALIGN_DOWN(pfn, PTRS_PER_PMD);
+ pfn_end = pfn_i + PTRS_PER_PMD;
+
+ pr_info("PFN 0x%llx unassigned, dumping non-zero entries in 2M PFN region: [0x%llx - 0x%llx]\n",
+ pfn, pfn_i, pfn_end);
+
+ while (pfn_i < pfn_end) {
+ e = __snp_lookup_rmpentry(pfn_i, &level);
+ if (IS_ERR(e)) {
+ pr_err("Error %ld reading RMP entry for PFN 0x%llx\n",
+ PTR_ERR(e), pfn_i);
+ pfn_i++;
+ continue;
+ }
+
+ if (e->lo || e->hi)
+ pr_info("PFN: 0x%llx, [0x%016llx - 0x%016llx]\n", pfn_i, e->lo, e->hi);
+ pfn_i++;
+ }
+}
+
+void snp_dump_hva_rmpentry(unsigned long hva)
+{
+ unsigned long paddr;
+ unsigned int level;
+ pgd_t *pgd;
+ pte_t *pte;
+
+ pgd = __va(read_cr3_pa());
+ pgd += pgd_index(hva);
+ pte = lookup_address_in_pgd(pgd, hva, &level);
+
+ if (!pte) {
+ pr_err("Can't dump RMP entry for HVA %lx: no PTE/PFN found\n", hva);
+ return;
+ }
+
+ paddr = PFN_PHYS(pte_pfn(*pte)) | (hva & ~page_level_mask(level));
+ dump_rmpentry(PHYS_PFN(paddr));
+}
+
+/*
+ * PSMASH a 2MB aligned page into 4K pages in the RMP table while preserving the
+ * Validated bit.
+ */
+int psmash(u64 pfn)
+{
+ unsigned long paddr = pfn << PAGE_SHIFT;
+ int ret;
+
+ if (!cpu_feature_enabled(X86_FEATURE_SEV_SNP))
+ return -ENODEV;
+
+ if (!pfn_valid(pfn))
+ return -EINVAL;
+
+ /* Binutils version 2.36 supports the PSMASH mnemonic. */
+ asm volatile(".byte 0xF3, 0x0F, 0x01, 0xFF"
+ : "=a" (ret)
+ : "a" (paddr)
+ : "memory", "cc");
+
+ return ret;
+}
+EXPORT_SYMBOL_GPL(psmash);
+
+/*
+ * If the kernel uses a 2MB or larger directmap mapping to write to an address,
+ * and that mapping contains any 4KB pages that are set to private in the RMP
+ * table, an RMP #PF will trigger and cause a host crash. Hypervisor code that
+ * owns the PFNs being transitioned will never attempt such a write, but other
+ * kernel tasks writing to other PFNs in the range may trigger these checks
+ * inadvertently due a large directmap mapping that happens to overlap such a
+ * PFN.
+ *
+ * Prevent this by splitting any 2MB+ mappings that might end up containing a
+ * mix of private/shared PFNs as a result of a subsequent RMPUPDATE for the
+ * PFN/rmp_level passed in.
+ *
+ * Note that there is no attempt here to scan all the RMP entries for the 2MB
+ * physical range, since it would only be worthwhile in determining if a
+ * subsequent RMPUPDATE for a 4KB PFN would result in all the entries being of
+ * the same shared/private state, thus avoiding the need to split the mapping.
+ * But that would mean the entries are currently in a mixed state, and so the
+ * mapping would have already been split as a result of prior transitions.
+ * And since the 4K split is only done if the mapping is 2MB+, and there isn't
+ * currently a mechanism in place to restore 2MB+ mappings, such a check would
+ * not provide any usable benefit.
+ *
+ * More specifics on how these checks are carried out can be found in APM
+ * Volume 2, "RMP and VMPL Access Checks".
+ */
+static int adjust_direct_map(u64 pfn, int rmp_level)
+{
+ unsigned long vaddr;
+ unsigned int level;
+ int npages, ret;
+ pte_t *pte;
+
+ /*
+ * pfn_to_kaddr() will return a vaddr only within the direct
+ * map range.
+ */
+ vaddr = (unsigned long)pfn_to_kaddr(pfn);
+
+ /* Only 4KB/2MB RMP entries are supported by current hardware. */
+ if (WARN_ON_ONCE(rmp_level > PG_LEVEL_2M))
+ return -EINVAL;
+
+ if (!pfn_valid(pfn))
+ return -EINVAL;
+
+ if (rmp_level == PG_LEVEL_2M &&
+ (!IS_ALIGNED(pfn, PTRS_PER_PMD) || !pfn_valid(pfn + PTRS_PER_PMD - 1)))
+ return -EINVAL;
+
+ /*
+ * If an entire 2MB physical range is being transitioned, then there is
+ * no risk of RMP #PFs due to write accesses from overlapping mappings,
+ * since even accesses from 1GB mappings will be treated as 2MB accesses
+ * as far as RMP table checks are concerned.
+ */
+ if (rmp_level == PG_LEVEL_2M)
+ return 0;
+
+ pte = lookup_address(vaddr, &level);
+ if (!pte || pte_none(*pte))
+ return 0;
+
+ if (level == PG_LEVEL_4K)
+ return 0;
+
+ npages = page_level_size(rmp_level) / PAGE_SIZE;
+ ret = set_memory_4k(vaddr, npages);
+ if (ret)
+ pr_warn("Failed to split direct map for PFN 0x%llx, ret: %d\n",
+ pfn, ret);
+
+ return ret;
+}
+
+/*
+ * It is expected that those operations are seldom enough so that no mutual
+ * exclusion of updaters is needed and thus the overlap error condition below
+ * should happen very rarely and would get resolved relatively quickly by
+ * the firmware.
+ *
+ * If not, one could consider introducing a mutex or so here to sync concurrent
+ * RMP updates and thus diminish the amount of cases where firmware needs to
+ * lock 2M ranges to protect against concurrent updates.
+ *
+ * The optimal solution would be range locking to avoid locking disjoint
+ * regions unnecessarily but there's no support for that yet.
+ */
+static int rmpupdate(u64 pfn, struct rmp_state *state)
+{
+ unsigned long paddr = pfn << PAGE_SHIFT;
+ int ret, level;
+
+ if (!cpu_feature_enabled(X86_FEATURE_SEV_SNP))
+ return -ENODEV;
+
+ level = RMP_TO_PG_LEVEL(state->pagesize);
+
+ if (adjust_direct_map(pfn, level))
+ return -EFAULT;
+
+ do {
+ /* Binutils version 2.36 supports the RMPUPDATE mnemonic. */
+ asm volatile(".byte 0xF2, 0x0F, 0x01, 0xFE"
+ : "=a" (ret)
+ : "a" (paddr), "c" ((unsigned long)state)
+ : "memory", "cc");
+ } while (ret == RMPUPDATE_FAIL_OVERLAP);
+
+ if (ret) {
+ pr_err("RMPUPDATE failed for PFN %llx, pg_level: %d, ret: %d\n",
+ pfn, level, ret);
+ dump_rmpentry(pfn);
+ dump_stack();
+ return -EFAULT;
+ }
+
+ return 0;
+}
+
+/* Transition a page to guest-owned/private state in the RMP table. */
+int rmp_make_private(u64 pfn, u64 gpa, enum pg_level level, u32 asid, bool immutable)
+{
+ struct rmp_state state;
+
+ memset(&state, 0, sizeof(state));
+ state.assigned = 1;
+ state.asid = asid;
+ state.immutable = immutable;
+ state.gpa = gpa;
+ state.pagesize = PG_LEVEL_TO_RMP(level);
+
+ return rmpupdate(pfn, &state);
+}
+EXPORT_SYMBOL_GPL(rmp_make_private);
+
+/* Transition a page to hypervisor-owned/shared state in the RMP table. */
+int rmp_make_shared(u64 pfn, enum pg_level level)
+{
+ struct rmp_state state;
+
+ memset(&state, 0, sizeof(state));
+ state.pagesize = PG_LEVEL_TO_RMP(level);
+
+ return rmpupdate(pfn, &state);
+}
+EXPORT_SYMBOL_GPL(rmp_make_shared);
+
+void snp_leak_pages(u64 pfn, unsigned int npages)
+{
+ struct page *page = pfn_to_page(pfn);
+
+ pr_warn("Leaking PFN range 0x%llx-0x%llx\n", pfn, pfn + npages);
+
+ spin_lock(&snp_leaked_pages_list_lock);
+ while (npages--) {
+
+ /*
+ * Reuse the page's buddy list for chaining into the leaked
+ * pages list. This page should not be on a free list currently
+ * and is also unsafe to be added to a free list.
+ */
+ if (likely(!PageCompound(page)) ||
+
+ /*
+ * Skip inserting tail pages of compound page as
+ * page->buddy_list of tail pages is not usable.
+ */
+ (PageHead(page) && compound_nr(page) <= npages))
+ list_add_tail(&page->buddy_list, &snp_leaked_pages_list);
+
+ dump_rmpentry(pfn);
+ snp_nr_leaked_pages++;
+ pfn++;
+ page++;
+ }
+ spin_unlock(&snp_leaked_pages_list_lock);
+}
+EXPORT_SYMBOL_GPL(snp_leak_pages);