aboutsummaryrefslogtreecommitdiffstats
path: root/arch
diff options
context:
space:
mode:
Diffstat (limited to 'arch')
-rw-r--r--arch/arm64/include/asm/cputype.h2
-rw-r--r--arch/arm64/include/asm/syscall_wrapper.h2
-rw-r--r--arch/arm64/kernel/efi.c52
-rw-r--r--arch/arm64/mm/pageattr.c5
-rw-r--r--arch/riscv/boot/dts/sifive/hifive-unleashed-a00.dts38
-rw-r--r--arch/riscv/kernel/process.c2
-rw-r--r--arch/riscv/kernel/setup.c1
-rw-r--r--arch/riscv/kernel/vdso/Makefile5
-rw-r--r--arch/riscv/kernel/vdso/vdso.lds.S2
-rw-r--r--arch/riscv/mm/init.c1
-rw-r--r--arch/s390/Kconfig6
-rw-r--r--arch/s390/Makefile2
-rw-r--r--arch/s390/boot/Makefile3
-rw-r--r--arch/s390/boot/startup.c3
-rw-r--r--arch/s390/configs/btf.config1
-rw-r--r--arch/s390/configs/debug_defconfig65
-rw-r--r--arch/s390/configs/defconfig67
-rw-r--r--arch/s390/configs/kasan.config3
-rw-r--r--arch/s390/configs/zfcpdump_defconfig1
-rw-r--r--arch/s390/kvm/kvm-s390.c26
-rw-r--r--arch/s390/kvm/kvm-s390.h1
-rw-r--r--arch/s390/kvm/pci.c2
-rw-r--r--arch/x86/hyperv/hv_init.c19
-rw-r--r--arch/x86/include/asm/kvm_host.h7
-rw-r--r--arch/x86/include/asm/spec-ctrl.h10
-rw-r--r--arch/x86/kernel/asm-offsets.c6
-rw-r--r--arch/x86/kernel/cpu/bugs.c15
-rw-r--r--arch/x86/kernel/traps.c7
-rw-r--r--arch/x86/kvm/.gitignore2
-rw-r--r--arch/x86/kvm/Makefile12
-rw-r--r--arch/x86/kvm/kvm-asm-offsets.c29
-rw-r--r--arch/x86/kvm/mmu/mmu.c4
-rw-r--r--arch/x86/kvm/pmu.c2
-rw-r--r--arch/x86/kvm/svm/pmu.c7
-rw-r--r--arch/x86/kvm/svm/sev.c6
-rw-r--r--arch/x86/kvm/svm/svm.c105
-rw-r--r--arch/x86/kvm/svm/svm.h11
-rw-r--r--arch/x86/kvm/svm/svm_ops.h5
-rw-r--r--arch/x86/kvm/svm/vmenter.S260
-rw-r--r--arch/x86/kvm/vmx/pmu_intel.c4
-rw-r--r--arch/x86/kvm/vmx/vmenter.S2
-rw-r--r--arch/x86/kvm/x86.c25
-rw-r--r--arch/x86/lib/usercopy.c3
-rw-r--r--arch/x86/mm/hugetlbpage.c4
44 files changed, 533 insertions, 302 deletions
diff --git a/arch/arm64/include/asm/cputype.h b/arch/arm64/include/asm/cputype.h
index abc418650fec..65e53ef5a396 100644
--- a/arch/arm64/include/asm/cputype.h
+++ b/arch/arm64/include/asm/cputype.h
@@ -41,7 +41,7 @@
(((midr) & MIDR_IMPLEMENTOR_MASK) >> MIDR_IMPLEMENTOR_SHIFT)
#define MIDR_CPU_MODEL(imp, partnum) \
- (((imp) << MIDR_IMPLEMENTOR_SHIFT) | \
+ ((_AT(u32, imp) << MIDR_IMPLEMENTOR_SHIFT) | \
(0xf << MIDR_ARCHITECTURE_SHIFT) | \
((partnum) << MIDR_PARTNUM_SHIFT))
diff --git a/arch/arm64/include/asm/syscall_wrapper.h b/arch/arm64/include/asm/syscall_wrapper.h
index b383b4802a7b..d30217c21eff 100644
--- a/arch/arm64/include/asm/syscall_wrapper.h
+++ b/arch/arm64/include/asm/syscall_wrapper.h
@@ -8,7 +8,7 @@
#ifndef __ASM_SYSCALL_WRAPPER_H
#define __ASM_SYSCALL_WRAPPER_H
-struct pt_regs;
+#include <asm/ptrace.h>
#define SC_ARM64_REGS_TO_ARGS(x, ...) \
__MAP(x,__SC_ARGS \
diff --git a/arch/arm64/kernel/efi.c b/arch/arm64/kernel/efi.c
index 8d36e66a6e64..ee53f2a0aa03 100644
--- a/arch/arm64/kernel/efi.c
+++ b/arch/arm64/kernel/efi.c
@@ -13,6 +13,14 @@
#include <asm/efi.h>
+static bool region_is_misaligned(const efi_memory_desc_t *md)
+{
+ if (PAGE_SIZE == EFI_PAGE_SIZE)
+ return false;
+ return !PAGE_ALIGNED(md->phys_addr) ||
+ !PAGE_ALIGNED(md->num_pages << EFI_PAGE_SHIFT);
+}
+
/*
* Only regions of type EFI_RUNTIME_SERVICES_CODE need to be
* executable, everything else can be mapped with the XN bits
@@ -26,14 +34,22 @@ static __init pteval_t create_mapping_protection(efi_memory_desc_t *md)
if (type == EFI_MEMORY_MAPPED_IO)
return PROT_DEVICE_nGnRE;
- if (WARN_ONCE(!PAGE_ALIGNED(md->phys_addr),
- "UEFI Runtime regions are not aligned to 64 KB -- buggy firmware?"))
+ if (region_is_misaligned(md)) {
+ static bool __initdata code_is_misaligned;
+
/*
- * If the region is not aligned to the page size of the OS, we
- * can not use strict permissions, since that would also affect
- * the mapping attributes of the adjacent regions.
+ * Regions that are not aligned to the OS page size cannot be
+ * mapped with strict permissions, as those might interfere
+ * with the permissions that are needed by the adjacent
+ * region's mapping. However, if we haven't encountered any
+ * misaligned runtime code regions so far, we can safely use
+ * non-executable permissions for non-code regions.
*/
- return pgprot_val(PAGE_KERNEL_EXEC);
+ code_is_misaligned |= (type == EFI_RUNTIME_SERVICES_CODE);
+
+ return code_is_misaligned ? pgprot_val(PAGE_KERNEL_EXEC)
+ : pgprot_val(PAGE_KERNEL);
+ }
/* R-- */
if ((attr & (EFI_MEMORY_XP | EFI_MEMORY_RO)) ==
@@ -64,19 +80,16 @@ int __init efi_create_mapping(struct mm_struct *mm, efi_memory_desc_t *md)
bool page_mappings_only = (md->type == EFI_RUNTIME_SERVICES_CODE ||
md->type == EFI_RUNTIME_SERVICES_DATA);
- if (!PAGE_ALIGNED(md->phys_addr) ||
- !PAGE_ALIGNED(md->num_pages << EFI_PAGE_SHIFT)) {
- /*
- * If the end address of this region is not aligned to page
- * size, the mapping is rounded up, and may end up sharing a
- * page frame with the next UEFI memory region. If we create
- * a block entry now, we may need to split it again when mapping
- * the next region, and support for that is going to be removed
- * from the MMU routines. So avoid block mappings altogether in
- * that case.
- */
+ /*
+ * If this region is not aligned to the page size used by the OS, the
+ * mapping will be rounded outwards, and may end up sharing a page
+ * frame with an adjacent runtime memory region. Given that the page
+ * table descriptor covering the shared page will be rewritten when the
+ * adjacent region gets mapped, we must avoid block mappings here so we
+ * don't have to worry about splitting them when that happens.
+ */
+ if (region_is_misaligned(md))
page_mappings_only = true;
- }
create_pgd_mapping(mm, md->phys_addr, md->virt_addr,
md->num_pages << EFI_PAGE_SHIFT,
@@ -103,6 +116,9 @@ int __init efi_set_mapping_permissions(struct mm_struct *mm,
BUG_ON(md->type != EFI_RUNTIME_SERVICES_CODE &&
md->type != EFI_RUNTIME_SERVICES_DATA);
+ if (region_is_misaligned(md))
+ return 0;
+
/*
* Calling apply_to_page_range() is only safe on regions that are
* guaranteed to be mapped down to pages. Since we are only called
diff --git a/arch/arm64/mm/pageattr.c b/arch/arm64/mm/pageattr.c
index d107c3d434e2..5922178d7a06 100644
--- a/arch/arm64/mm/pageattr.c
+++ b/arch/arm64/mm/pageattr.c
@@ -26,7 +26,7 @@ bool can_set_direct_map(void)
* mapped at page granularity, so that it is possible to
* protect/unprotect single pages.
*/
- return rodata_full || debug_pagealloc_enabled() ||
+ return (rodata_enabled && rodata_full) || debug_pagealloc_enabled() ||
IS_ENABLED(CONFIG_KFENCE);
}
@@ -102,7 +102,8 @@ static int change_memory_common(unsigned long addr, int numpages,
* If we are manipulating read-only permissions, apply the same
* change to the linear mapping of the pages that back this VM area.
*/
- if (rodata_full && (pgprot_val(set_mask) == PTE_RDONLY ||
+ if (rodata_enabled &&
+ rodata_full && (pgprot_val(set_mask) == PTE_RDONLY ||
pgprot_val(clear_mask) == PTE_RDONLY)) {
for (i = 0; i < area->nr_pages; i++) {
__change_memory_common((u64)page_address(area->pages[i]),
diff --git a/arch/riscv/boot/dts/sifive/hifive-unleashed-a00.dts b/arch/riscv/boot/dts/sifive/hifive-unleashed-a00.dts
index ced0d4e47938..900a50526d77 100644
--- a/arch/riscv/boot/dts/sifive/hifive-unleashed-a00.dts
+++ b/arch/riscv/boot/dts/sifive/hifive-unleashed-a00.dts
@@ -3,6 +3,8 @@
#include "fu540-c000.dtsi"
#include <dt-bindings/gpio/gpio.h>
+#include <dt-bindings/leds/common.h>
+#include <dt-bindings/pwm/pwm.h>
/* Clock frequency (in Hz) of the PCB crystal for rtcclk */
#define RTCCLK_FREQ 1000000
@@ -42,6 +44,42 @@
compatible = "gpio-restart";
gpios = <&gpio 10 GPIO_ACTIVE_LOW>;
};
+
+ led-controller {
+ compatible = "pwm-leds";
+
+ led-d1 {
+ pwms = <&pwm0 0 7812500 PWM_POLARITY_INVERTED>;
+ active-low;
+ color = <LED_COLOR_ID_GREEN>;
+ max-brightness = <255>;
+ label = "d1";
+ };
+
+ led-d2 {
+ pwms = <&pwm0 1 7812500 PWM_POLARITY_INVERTED>;
+ active-low;
+ color = <LED_COLOR_ID_GREEN>;
+ max-brightness = <255>;
+ label = "d2";
+ };
+
+ led-d3 {
+ pwms = <&pwm0 2 7812500 PWM_POLARITY_INVERTED>;
+ active-low;
+ color = <LED_COLOR_ID_GREEN>;
+ max-brightness = <255>;
+ label = "d3";
+ };
+
+ led-d4 {
+ pwms = <&pwm0 3 7812500 PWM_POLARITY_INVERTED>;
+ active-low;
+ color = <LED_COLOR_ID_GREEN>;
+ max-brightness = <255>;
+ label = "d4";
+ };
+ };
};
&uart0 {
diff --git a/arch/riscv/kernel/process.c b/arch/riscv/kernel/process.c
index b0c63e8e867e..8955f2432c2d 100644
--- a/arch/riscv/kernel/process.c
+++ b/arch/riscv/kernel/process.c
@@ -164,6 +164,8 @@ int copy_thread(struct task_struct *p, const struct kernel_clone_args *args)
unsigned long tls = args->tls;
struct pt_regs *childregs = task_pt_regs(p);
+ memset(&p->thread.s, 0, sizeof(p->thread.s));
+
/* p->thread holds context to be restored by __switch_to() */
if (unlikely(args->fn)) {
/* Kernel thread */
diff --git a/arch/riscv/kernel/setup.c b/arch/riscv/kernel/setup.c
index ad76bb59b059..67ec1fadcfe2 100644
--- a/arch/riscv/kernel/setup.c
+++ b/arch/riscv/kernel/setup.c
@@ -283,6 +283,7 @@ void __init setup_arch(char **cmdline_p)
else
pr_err("No DTB found in kernel mappings\n");
#endif
+ early_init_fdt_scan_reserved_mem();
misc_mem_init();
init_resources();
diff --git a/arch/riscv/kernel/vdso/Makefile b/arch/riscv/kernel/vdso/Makefile
index f2e065671e4d..db6548509bb3 100644
--- a/arch/riscv/kernel/vdso/Makefile
+++ b/arch/riscv/kernel/vdso/Makefile
@@ -28,9 +28,12 @@ obj-vdso := $(addprefix $(obj)/, $(obj-vdso))
obj-y += vdso.o
CPPFLAGS_vdso.lds += -P -C -U$(ARCH)
+ifneq ($(filter vgettimeofday, $(vdso-syms)),)
+CPPFLAGS_vdso.lds += -DHAS_VGETTIMEOFDAY
+endif
# Disable -pg to prevent insert call site
-CFLAGS_REMOVE_vgettimeofday.o = $(CC_FLAGS_FTRACE) -Os
+CFLAGS_REMOVE_vgettimeofday.o = $(CC_FLAGS_FTRACE)
# Disable profiling and instrumentation for VDSO code
GCOV_PROFILE := n
diff --git a/arch/riscv/kernel/vdso/vdso.lds.S b/arch/riscv/kernel/vdso/vdso.lds.S
index 01d94aae5bf5..150b1a572e61 100644
--- a/arch/riscv/kernel/vdso/vdso.lds.S
+++ b/arch/riscv/kernel/vdso/vdso.lds.S
@@ -68,9 +68,11 @@ VERSION
LINUX_4.15 {
global:
__vdso_rt_sigreturn;
+#ifdef HAS_VGETTIMEOFDAY
__vdso_gettimeofday;
__vdso_clock_gettime;
__vdso_clock_getres;
+#endif
__vdso_getcpu;
__vdso_flush_icache;
local: *;
diff --git a/arch/riscv/mm/init.c b/arch/riscv/mm/init.c
index b56a0a75533f..50a1b6edd491 100644
--- a/arch/riscv/mm/init.c
+++ b/arch/riscv/mm/init.c
@@ -262,7 +262,6 @@ static void __init setup_bootmem(void)
memblock_reserve(dtb_early_pa, fdt_totalsize(dtb_early_va));
}
- early_init_fdt_scan_reserved_mem();
dma_contiguous_reserve(dma32_phys_limit);
if (IS_ENABLED(CONFIG_64BIT))
hugetlb_cma_reserve(PUD_SHIFT - PAGE_SHIFT);
diff --git a/arch/s390/Kconfig b/arch/s390/Kconfig
index 318fce77601d..de575af02ffe 100644
--- a/arch/s390/Kconfig
+++ b/arch/s390/Kconfig
@@ -568,8 +568,7 @@ config EXPOLINE_FULL
endchoice
config RELOCATABLE
- bool "Build a relocatable kernel"
- default y
+ def_bool y
help
This builds a kernel image that retains relocation information
so it can be loaded at an arbitrary address.
@@ -578,10 +577,11 @@ config RELOCATABLE
bootup process.
The relocations make the kernel image about 15% larger (compressed
10%), but are discarded at runtime.
+ Note: this option exists only for documentation purposes, please do
+ not remove it.
config RANDOMIZE_BASE
bool "Randomize the address of the kernel image (KASLR)"
- depends on RELOCATABLE
default y
help
In support of Kernel Address Space Layout Randomization (KASLR),
diff --git a/arch/s390/Makefile b/arch/s390/Makefile
index de6d8b2ea4d8..b3235ab0ace8 100644
--- a/arch/s390/Makefile
+++ b/arch/s390/Makefile
@@ -14,10 +14,8 @@ KBUILD_AFLAGS_MODULE += -fPIC
KBUILD_CFLAGS_MODULE += -fPIC
KBUILD_AFLAGS += -m64
KBUILD_CFLAGS += -m64
-ifeq ($(CONFIG_RELOCATABLE),y)
KBUILD_CFLAGS += -fPIE
LDFLAGS_vmlinux := -pie
-endif
aflags_dwarf := -Wa,-gdwarf-2
KBUILD_AFLAGS_DECOMPRESSOR := $(CLANG_FLAGS) -m64 -D__ASSEMBLY__
ifndef CONFIG_AS_IS_LLVM
diff --git a/arch/s390/boot/Makefile b/arch/s390/boot/Makefile
index 883357a211a3..d52c3e2e16bc 100644
--- a/arch/s390/boot/Makefile
+++ b/arch/s390/boot/Makefile
@@ -37,9 +37,8 @@ CFLAGS_sclp_early_core.o += -I$(srctree)/drivers/s390/char
obj-y := head.o als.o startup.o mem_detect.o ipl_parm.o ipl_report.o
obj-y += string.o ebcdic.o sclp_early_core.o mem.o ipl_vmparm.o cmdline.o
-obj-y += version.o pgm_check_info.o ctype.o ipl_data.o
+obj-y += version.o pgm_check_info.o ctype.o ipl_data.o machine_kexec_reloc.o
obj-$(findstring y, $(CONFIG_PROTECTED_VIRTUALIZATION_GUEST) $(CONFIG_PGSTE)) += uv.o
-obj-$(CONFIG_RELOCATABLE) += machine_kexec_reloc.o
obj-$(CONFIG_RANDOMIZE_BASE) += kaslr.o
obj-y += $(if $(CONFIG_KERNEL_UNCOMPRESSED),,decompressor.o) info.o
obj-$(CONFIG_KERNEL_ZSTD) += clz_ctz.o
diff --git a/arch/s390/boot/startup.c b/arch/s390/boot/startup.c
index 6e7f01ca53e6..47ca3264c023 100644
--- a/arch/s390/boot/startup.c
+++ b/arch/s390/boot/startup.c
@@ -291,8 +291,7 @@ void startup_kernel(void)
clear_bss_section();
copy_bootdata();
- if (IS_ENABLED(CONFIG_RELOCATABLE))
- handle_relocs(__kaslr_offset);
+ handle_relocs(__kaslr_offset);
if (__kaslr_offset) {
/*
diff --git a/arch/s390/configs/btf.config b/arch/s390/configs/btf.config
new file mode 100644
index 000000000000..39227b4511af
--- /dev/null
+++ b/arch/s390/configs/btf.config
@@ -0,0 +1 @@
+CONFIG_DEBUG_INFO_BTF=y
diff --git a/arch/s390/configs/debug_defconfig b/arch/s390/configs/debug_defconfig
index 2a827002934b..63807bd0b536 100644
--- a/arch/s390/configs/debug_defconfig
+++ b/arch/s390/configs/debug_defconfig
@@ -723,52 +723,42 @@ CONFIG_CRYPTO_ECDSA=m
CONFIG_CRYPTO_ECRDSA=m
CONFIG_CRYPTO_SM2=m
CONFIG_CRYPTO_CURVE25519=m
-CONFIG_CRYPTO_GCM=y
-CONFIG_CRYPTO_CHACHA20POLY1305=m
-CONFIG_CRYPTO_AEGIS128=m
-CONFIG_CRYPTO_SEQIV=y
-CONFIG_CRYPTO_CFB=m
-CONFIG_CRYPTO_LRW=m
-CONFIG_CRYPTO_PCBC=m
-CONFIG_CRYPTO_KEYWRAP=m
-CONFIG_CRYPTO_ADIANTUM=m
-CONFIG_CRYPTO_HCTR2=m
-CONFIG_CRYPTO_XCBC=m
-CONFIG_CRYPTO_VMAC=m
-CONFIG_CRYPTO_CRC32=m
-CONFIG_CRYPTO_CRC32_S390=y
-CONFIG_CRYPTO_MD4=m
-CONFIG_CRYPTO_MD5=y
-CONFIG_CRYPTO_MICHAEL_MIC=m
-CONFIG_CRYPTO_RMD160=m
-CONFIG_CRYPTO_SHA512_S390=m
-CONFIG_CRYPTO_SHA1_S390=m
-CONFIG_CRYPTO_SHA256_S390=m
-CONFIG_CRYPTO_SHA3=m
-CONFIG_CRYPTO_SHA3_256_S390=m
-CONFIG_CRYPTO_SHA3_512_S390=m
-CONFIG_CRYPTO_SM3_GENERIC=m
-CONFIG_CRYPTO_WP512=m
-CONFIG_CRYPTO_GHASH_S390=m
CONFIG_CRYPTO_AES_TI=m
-CONFIG_CRYPTO_AES_S390=m
CONFIG_CRYPTO_ANUBIS=m
-CONFIG_CRYPTO_ARC4=m
+CONFIG_CRYPTO_ARIA=m
CONFIG_CRYPTO_BLOWFISH=m
CONFIG_CRYPTO_CAMELLIA=m
CONFIG_CRYPTO_CAST5=m
CONFIG_CRYPTO_CAST6=m
CONFIG_CRYPTO_DES=m
-CONFIG_CRYPTO_DES_S390=m
CONFIG_CRYPTO_FCRYPT=m
CONFIG_CRYPTO_KHAZAD=m
-CONFIG_CRYPTO_CHACHA_S390=m
CONFIG_CRYPTO_SEED=m
-CONFIG_CRYPTO_ARIA=m
CONFIG_CRYPTO_SERPENT=m
CONFIG_CRYPTO_SM4_GENERIC=m
CONFIG_CRYPTO_TEA=m
CONFIG_CRYPTO_TWOFISH=m
+CONFIG_CRYPTO_ADIANTUM=m
+CONFIG_CRYPTO_ARC4=m
+CONFIG_CRYPTO_CFB=m
+CONFIG_CRYPTO_HCTR2=m
+CONFIG_CRYPTO_KEYWRAP=m
+CONFIG_CRYPTO_LRW=m
+CONFIG_CRYPTO_PCBC=m
+CONFIG_CRYPTO_AEGIS128=m
+CONFIG_CRYPTO_CHACHA20POLY1305=m
+CONFIG_CRYPTO_GCM=y
+CONFIG_CRYPTO_SEQIV=y
+CONFIG_CRYPTO_MD4=m
+CONFIG_CRYPTO_MD5=y
+CONFIG_CRYPTO_MICHAEL_MIC=m
+CONFIG_CRYPTO_RMD160=m
+CONFIG_CRYPTO_SHA3=m
+CONFIG_CRYPTO_SM3_GENERIC=m
+CONFIG_CRYPTO_VMAC=m
+CONFIG_CRYPTO_WP512=m
+CONFIG_CRYPTO_XCBC=m
+CONFIG_CRYPTO_CRC32=m
CONFIG_CRYPTO_842=m
CONFIG_CRYPTO_LZ4=m
CONFIG_CRYPTO_LZ4HC=m
@@ -779,6 +769,16 @@ CONFIG_CRYPTO_USER_API_SKCIPHER=m
CONFIG_CRYPTO_USER_API_RNG=m
CONFIG_CRYPTO_USER_API_AEAD=m
CONFIG_CRYPTO_STATS=y
+CONFIG_CRYPTO_CRC32_S390=y
+CONFIG_CRYPTO_SHA512_S390=m
+CONFIG_CRYPTO_SHA1_S390=m
+CONFIG_CRYPTO_SHA256_S390=m
+CONFIG_CRYPTO_SHA3_256_S390=m
+CONFIG_CRYPTO_SHA3_512_S390=m
+CONFIG_CRYPTO_GHASH_S390=m
+CONFIG_CRYPTO_AES_S390=m
+CONFIG_CRYPTO_DES_S390=m
+CONFIG_CRYPTO_CHACHA_S390=m
CONFIG_ZCRYPT=m
CONFIG_PKEY=m
CONFIG_CRYPTO_PAES_S390=m
@@ -797,7 +797,6 @@ CONFIG_CMA_SIZE_MBYTES=0
CONFIG_PRINTK_TIME=y
CONFIG_DYNAMIC_DEBUG=y
CONFIG_DEBUG_INFO_DWARF4=y
-CONFIG_DEBUG_INFO_BTF=y
CONFIG_GDB_SCRIPTS=y
CONFIG_HEADERS_INSTALL=y
CONFIG_DEBUG_SECTION_MISMATCH=y
diff --git a/arch/s390/configs/defconfig b/arch/s390/configs/defconfig
index fb780e80e4c8..4f9a98247442 100644
--- a/arch/s390/configs/defconfig
+++ b/arch/s390/configs/defconfig
@@ -707,53 +707,43 @@ CONFIG_CRYPTO_ECDSA=m
CONFIG_CRYPTO_ECRDSA=m
CONFIG_CRYPTO_SM2=m
CONFIG_CRYPTO_CURVE25519=m
-CONFIG_CRYPTO_GCM=y
-CONFIG_CRYPTO_CHACHA20POLY1305=m
-CONFIG_CRYPTO_AEGIS128=m
-CONFIG_CRYPTO_SEQIV=y
-CONFIG_CRYPTO_CFB=m
-CONFIG_CRYPTO_LRW=m
-CONFIG_CRYPTO_OFB=m
-CONFIG_CRYPTO_PCBC=m
-CONFIG_CRYPTO_KEYWRAP=m
-CONFIG_CRYPTO_ADIANTUM=m
-CONFIG_CRYPTO_HCTR2=m
-CONFIG_CRYPTO_XCBC=m
-CONFIG_CRYPTO_VMAC=m
-CONFIG_CRYPTO_CRC32=m
-CONFIG_CRYPTO_CRC32_S390=y
-CONFIG_CRYPTO_MD4=m
-CONFIG_CRYPTO_MD5=y
-CONFIG_CRYPTO_MICHAEL_MIC=m
-CONFIG_CRYPTO_RMD160=m
-CONFIG_CRYPTO_SHA512_S390=m
-CONFIG_CRYPTO_SHA1_S390=m
-CONFIG_CRYPTO_SHA256_S390=m
-CONFIG_CRYPTO_SHA3=m
-CONFIG_CRYPTO_SHA3_256_S390=m
-CONFIG_CRYPTO_SHA3_512_S390=m
-CONFIG_CRYPTO_SM3_GENERIC=m
-CONFIG_CRYPTO_WP512=m
-CONFIG_CRYPTO_GHASH_S390=m
CONFIG_CRYPTO_AES_TI=m
-CONFIG_CRYPTO_AES_S390=m
CONFIG_CRYPTO_ANUBIS=m
-CONFIG_CRYPTO_ARC4=m
+CONFIG_CRYPTO_ARIA=m
CONFIG_CRYPTO_BLOWFISH=m
CONFIG_CRYPTO_CAMELLIA=m
CONFIG_CRYPTO_CAST5=m
CONFIG_CRYPTO_CAST6=m
CONFIG_CRYPTO_DES=m
-CONFIG_CRYPTO_DES_S390=m
CONFIG_CRYPTO_FCRYPT=m
CONFIG_CRYPTO_KHAZAD=m
-CONFIG_CRYPTO_CHACHA_S390=m
CONFIG_CRYPTO_SEED=m
-CONFIG_CRYPTO_ARIA=m
CONFIG_CRYPTO_SERPENT=m
CONFIG_CRYPTO_SM4_GENERIC=m
CONFIG_CRYPTO_TEA=m
CONFIG_CRYPTO_TWOFISH=m
+CONFIG_CRYPTO_ADIANTUM=m
+CONFIG_CRYPTO_ARC4=m
+CONFIG_CRYPTO_CFB=m
+CONFIG_CRYPTO_HCTR2=m
+CONFIG_CRYPTO_KEYWRAP=m
+CONFIG_CRYPTO_LRW=m
+CONFIG_CRYPTO_OFB=m
+CONFIG_CRYPTO_PCBC=m
+CONFIG_CRYPTO_AEGIS128=m
+CONFIG_CRYPTO_CHACHA20POLY1305=m
+CONFIG_CRYPTO_GCM=y
+CONFIG_CRYPTO_SEQIV=y
+CONFIG_CRYPTO_MD4=m
+CONFIG_CRYPTO_MD5=y
+CONFIG_CRYPTO_MICHAEL_MIC=m
+CONFIG_CRYPTO_RMD160=m
+CONFIG_CRYPTO_SHA3=m
+CONFIG_CRYPTO_SM3_GENERIC=m
+CONFIG_CRYPTO_VMAC=m
+CONFIG_CRYPTO_WP512=m
+CONFIG_CRYPTO_XCBC=m
+CONFIG_CRYPTO_CRC32=m
CONFIG_CRYPTO_842=m
CONFIG_CRYPTO_LZ4=m
CONFIG_CRYPTO_LZ4HC=m
@@ -764,6 +754,16 @@ CONFIG_CRYPTO_USER_API_SKCIPHER=m
CONFIG_CRYPTO_USER_API_RNG=m
CONFIG_CRYPTO_USER_API_AEAD=m
CONFIG_CRYPTO_STATS=y
+CONFIG_CRYPTO_CRC32_S390=y
+CONFIG_CRYPTO_SHA512_S390=m
+CONFIG_CRYPTO_SHA1_S390=m
+CONFIG_CRYPTO_SHA256_S390=m
+CONFIG_CRYPTO_SHA3_256_S390=m
+CONFIG_CRYPTO_SHA3_512_S390=m
+CONFIG_CRYPTO_GHASH_S390=m
+CONFIG_CRYPTO_AES_S390=m
+CONFIG_CRYPTO_DES_S390=m
+CONFIG_CRYPTO_CHACHA_S390=m
CONFIG_ZCRYPT=m
CONFIG_PKEY=m
CONFIG_CRYPTO_PAES_S390=m
@@ -781,7 +781,6 @@ CONFIG_CMA_SIZE_MBYTES=0
CONFIG_PRINTK_TIME=y
CONFIG_DYNAMIC_DEBUG=y
CONFIG_DEBUG_INFO_DWARF4=y
-CONFIG_DEBUG_INFO_BTF=y
CONFIG_GDB_SCRIPTS=y
CONFIG_DEBUG_SECTION_MISMATCH=y
CONFIG_MAGIC_SYSRQ=y
diff --git a/arch/s390/configs/kasan.config b/arch/s390/configs/kasan.config
new file mode 100644
index 000000000000..700a8b25c3ff
--- /dev/null
+++ b/arch/s390/configs/kasan.config
@@ -0,0 +1,3 @@
+CONFIG_KASAN=y
+CONFIG_KASAN_INLINE=y
+CONFIG_KASAN_VMALLOC=y
diff --git a/arch/s390/configs/zfcpdump_defconfig b/arch/s390/configs/zfcpdump_defconfig
index a5576b8d4081..5fe9948be644 100644
--- a/arch/s390/configs/zfcpdump_defconfig
+++ b/arch/s390/configs/zfcpdump_defconfig
@@ -74,7 +74,6 @@ CONFIG_PRINTK_TIME=y
# CONFIG_SYMBOLIC_ERRNAME is not set
CONFIG_DEBUG_KERNEL=y
CONFIG_DEBUG_INFO_DWARF4=y
-CONFIG_DEBUG_INFO_BTF=y
CONFIG_DEBUG_FS=y
CONFIG_PANIC_ON_OOPS=y
# CONFIG_SCHED_DEBUG is not set
diff --git a/arch/s390/kvm/kvm-s390.c b/arch/s390/kvm/kvm-s390.c
index 45d4b8182b07..bc491a73815c 100644
--- a/arch/s390/kvm/kvm-s390.c
+++ b/arch/s390/kvm/kvm-s390.c
@@ -1207,6 +1207,8 @@ static int kvm_s390_vm_get_migration(struct kvm *kvm,
return 0;
}
+static void __kvm_s390_set_tod_clock(struct kvm *kvm, const struct kvm_s390_vm_tod_clock *gtod);
+
static int kvm_s390_set_tod_ext(struct kvm *kvm, struct kvm_device_attr *attr)
{
struct kvm_s390_vm_tod_clock gtod;
@@ -1216,7 +1218,7 @@ static int kvm_s390_set_tod_ext(struct kvm *kvm, struct kvm_device_attr *attr)
if (!test_kvm_facility(kvm, 139) && gtod.epoch_idx)
return -EINVAL;
- kvm_s390_set_tod_clock(kvm, &gtod);
+ __kvm_s390_set_tod_clock(kvm, &gtod);
VM_EVENT(kvm, 3, "SET: TOD extension: 0x%x, TOD base: 0x%llx",
gtod.epoch_idx, gtod.tod);
@@ -1247,7 +1249,7 @@ static int kvm_s390_set_tod_low(struct kvm *kvm, struct kvm_device_attr *attr)
sizeof(gtod.tod)))
return -EFAULT;
- kvm_s390_set_tod_clock(kvm, &gtod);
+ __kvm_s390_set_tod_clock(kvm, &gtod);
VM_EVENT(kvm, 3, "SET: TOD base: 0x%llx", gtod.tod);
return 0;
}
@@ -1259,6 +1261,16 @@ static int kvm_s390_set_tod(struct kvm *kvm, struct kvm_device_attr *attr)
if (attr->flags)
return -EINVAL;
+ mutex_lock(&kvm->lock);
+ /*
+ * For protected guests, the TOD is managed by the ultravisor, so trying
+ * to change it will never bring the expected results.
+ */
+ if (kvm_s390_pv_is_protected(kvm)) {
+ ret = -EOPNOTSUPP;
+ goto out_unlock;
+ }
+
switch (attr->attr) {
case KVM_S390_VM_TOD_EXT:
ret = kvm_s390_set_tod_ext(kvm, attr);
@@ -1273,6 +1285,9 @@ static int kvm_s390_set_tod(struct kvm *kvm, struct kvm_device_attr *attr)
ret = -ENXIO;
break;
}
+
+out_unlock:
+ mutex_unlock(&kvm->lock);
return ret;
}
@@ -4377,13 +4392,6 @@ static void __kvm_s390_set_tod_clock(struct kvm *kvm, const struct kvm_s390_vm_t
preempt_enable();
}
-void kvm_s390_set_tod_clock(struct kvm *kvm, const struct kvm_s390_vm_tod_clock *gtod)
-{
- mutex_lock(&kvm->lock);
- __kvm_s390_set_tod_clock(kvm, gtod);
- mutex_unlock(&kvm->lock);
-}
-
int kvm_s390_try_set_tod_clock(struct kvm *kvm, const struct kvm_s390_vm_tod_clock *gtod)
{
if (!mutex_trylock(&kvm->lock))
diff --git a/arch/s390/kvm/kvm-s390.h b/arch/s390/kvm/kvm-s390.h
index f6fd668f887e..4755492dfabc 100644
--- a/arch/s390/kvm/kvm-s390.h
+++ b/arch/s390/kvm/kvm-s390.h
@@ -363,7 +363,6 @@ int kvm_s390_handle_sigp(struct kvm_vcpu *vcpu);
int kvm_s390_handle_sigp_pei(struct kvm_vcpu *vcpu);
/* implemented in kvm-s390.c */
-void kvm_s390_set_tod_clock(struct kvm *kvm, const struct kvm_s390_vm_tod_clock *gtod);
int kvm_s390_try_set_tod_clock(struct kvm *kvm, const struct kvm_s390_vm_tod_clock *gtod);
long kvm_arch_fault_in_page(struct kvm_vcpu *vcpu, gpa_t gpa, int writable);
int kvm_s390_store_status_unloaded(struct kvm_vcpu *vcpu, unsigned long addr);
diff --git a/arch/s390/kvm/pci.c b/arch/s390/kvm/pci.c
index c50c1645c0ae..ded1af2ddae9 100644
--- a/arch/s390/kvm/pci.c
+++ b/arch/s390/kvm/pci.c
@@ -126,7 +126,7 @@ int kvm_s390_pci_aen_init(u8 nisc)
return -EPERM;
mutex_lock(&aift->aift_lock);
- aift->kzdev = kcalloc(ZPCI_NR_DEVICES, sizeof(struct kvm_zdev),
+ aift->kzdev = kcalloc(ZPCI_NR_DEVICES, sizeof(struct kvm_zdev *),
GFP_KERNEL);
if (!aift->kzdev) {
rc = -ENOMEM;
diff --git a/arch/x86/hyperv/hv_init.c b/arch/x86/hyperv/hv_init.c
index 29774126e931..f49bc3ec76e6 100644
--- a/arch/x86/hyperv/hv_init.c
+++ b/arch/x86/hyperv/hv_init.c
@@ -444,7 +444,7 @@ void __init hyperv_init(void)
if (hv_root_partition) {
struct page *pg;
- void *src, *dst;
+ void *src;
/*
* For the root partition, the hypervisor will set up its
@@ -459,13 +459,11 @@ void __init hyperv_init(void)
wrmsrl(HV_X64_MSR_HYPERCALL, hypercall_msr.as_uint64);
pg = vmalloc_to_page(hv_hypercall_pg);
- dst = kmap_local_page(pg);
src = memremap(hypercall_msr.guest_physical_address << PAGE_SHIFT, PAGE_SIZE,
MEMREMAP_WB);
- BUG_ON(!(src && dst));
- memcpy(dst, src, HV_HYP_PAGE_SIZE);
+ BUG_ON(!src);
+ memcpy_to_page(pg, 0, src, HV_HYP_PAGE_SIZE);
memunmap(src);
- kunmap_local(dst);
} else {
hypercall_msr.guest_physical_address = vmalloc_to_pfn(hv_hypercall_pg);
wrmsrl(HV_X64_MSR_HYPERCALL, hypercall_msr.as_uint64);
@@ -537,6 +535,7 @@ common_free:
void hyperv_cleanup(void)
{
union hv_x64_msr_hypercall_contents hypercall_msr;
+ union hv_reference_tsc_msr tsc_msr;
unregister_syscore_ops(&hv_syscore_ops);
@@ -552,12 +551,14 @@ void hyperv_cleanup(void)
hv_hypercall_pg = NULL;
/* Reset the hypercall page */
- hypercall_msr.as_uint64 = 0;
- wrmsrl(HV_X64_MSR_HYPERCALL, hypercall_msr.as_uint64);
+ hypercall_msr.as_uint64 = hv_get_register(HV_X64_MSR_HYPERCALL);
+ hypercall_msr.enable = 0;
+ hv_set_register(HV_X64_MSR_HYPERCALL, hypercall_msr.as_uint64);
/* Reset the TSC page */
- hypercall_msr.as_uint64 = 0;
- wrmsrl(HV_X64_MSR_REFERENCE_TSC, hypercall_msr.as_uint64);
+ tsc_msr.as_uint64 = hv_get_register(HV_X64_MSR_REFERENCE_TSC);
+ tsc_msr.enable = 0;
+ hv_set_register(HV_X64_MSR_REFERENCE_TSC, tsc_msr.as_uint64);
}
void hyperv_report_panic(struct pt_regs *regs, long err, bool in_die)
diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index 7551b6f9c31c..f05ebaa26f0f 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -501,7 +501,12 @@ struct kvm_pmc {
bool intr;
};
+/* More counters may conflict with other existing Architectural MSRs */
+#define KVM_INTEL_PMC_MAX_GENERIC 8
+#define MSR_ARCH_PERFMON_PERFCTR_MAX (MSR_ARCH_PERFMON_PERFCTR0 + KVM_INTEL_PMC_MAX_GENERIC - 1)
+#define MSR_ARCH_PERFMON_EVENTSEL_MAX (MSR_ARCH_PERFMON_EVENTSEL0 + KVM_INTEL_PMC_MAX_GENERIC - 1)
#define KVM_PMC_MAX_FIXED 3
+#define KVM_AMD_PMC_MAX_GENERIC 6
struct kvm_pmu {
unsigned nr_arch_gp_counters;
unsigned nr_arch_fixed_counters;
@@ -516,7 +521,7 @@ struct kvm_pmu {
u64 reserved_bits;
u64 raw_event_mask;
u8 version;
- struct kvm_pmc gp_counters[INTEL_PMC_MAX_GENERIC];
+ struct kvm_pmc gp_counters[KVM_INTEL_PMC_MAX_GENERIC];
struct kvm_pmc fixed_counters[KVM_PMC_MAX_FIXED];
struct irq_work irq_work;
DECLARE_BITMAP(reprogram_pmi, X86_PMC_IDX_MAX);
diff --git a/arch/x86/include/asm/spec-ctrl.h b/arch/x86/include/asm/spec-ctrl.h
index 5393babc0598..cb0386fc4dc3 100644
--- a/arch/x86/include/asm/spec-ctrl.h
+++ b/arch/x86/include/asm/spec-ctrl.h
@@ -13,7 +13,7 @@
* Takes the guest view of SPEC_CTRL MSR as a parameter and also
* the guest's version of VIRT_SPEC_CTRL, if emulated.
*/
-extern void x86_virt_spec_ctrl(u64 guest_spec_ctrl, u64 guest_virt_spec_ctrl, bool guest);
+extern void x86_virt_spec_ctrl(u64 guest_virt_spec_ctrl, bool guest);
/**
* x86_spec_ctrl_set_guest - Set speculation control registers for the guest
@@ -24,9 +24,9 @@ extern void x86_virt_spec_ctrl(u64 guest_spec_ctrl, u64 guest_virt_spec_ctrl, bo
* Avoids writing to the MSR if the content/bits are the same
*/
static inline
-void x86_spec_ctrl_set_guest(u64 guest_spec_ctrl, u64 guest_virt_spec_ctrl)
+void x86_spec_ctrl_set_guest(u64 guest_virt_spec_ctrl)
{
- x86_virt_spec_ctrl(guest_spec_ctrl, guest_virt_spec_ctrl, true);
+ x86_virt_spec_ctrl(guest_virt_spec_ctrl, true);
}
/**
@@ -38,9 +38,9 @@ void x86_spec_ctrl_set_guest(u64 guest_spec_ctrl, u64 guest_virt_spec_ctrl)
* Avoids writing to the MSR if the content/bits are the same
*/
static inline
-void x86_spec_ctrl_restore_host(u64 guest_spec_ctrl, u64 guest_virt_spec_ctrl)
+void x86_spec_ctrl_restore_host(u64 guest_virt_spec_ctrl)
{
- x86_virt_spec_ctrl(guest_spec_ctrl, guest_virt_spec_ctrl, false);
+ x86_virt_spec_ctrl(guest_virt_spec_ctrl, false);
}
/* AMD specific Speculative Store Bypass MSR data */
diff --git a/arch/x86/kernel/asm-offsets.c b/arch/x86/kernel/asm-offsets.c
index cb50589a7102..437308004ef2 100644
--- a/arch/x86/kernel/asm-offsets.c
+++ b/arch/x86/kernel/asm-offsets.c
@@ -19,7 +19,6 @@
#include <asm/suspend.h>
#include <asm/tlbflush.h>
#include <asm/tdx.h>
-#include "../kvm/vmx/vmx.h"
#ifdef CONFIG_XEN
#include <xen/interface/xen.h>
@@ -108,9 +107,4 @@ static void __used common(void)
OFFSET(TSS_sp0, tss_struct, x86_tss.sp0);
OFFSET(TSS_sp1, tss_struct, x86_tss.sp1);
OFFSET(TSS_sp2, tss_struct, x86_tss.sp2);
-
- if (IS_ENABLED(CONFIG_KVM_INTEL)) {
- BLANK();
- OFFSET(VMX_spec_ctrl, vcpu_vmx, spec_ctrl);
- }
}
diff --git a/arch/x86/kernel/cpu/bugs.c b/arch/x86/kernel/cpu/bugs.c
index da7c361f47e0..3e3230cccaa7 100644
--- a/arch/x86/kernel/cpu/bugs.c
+++ b/arch/x86/kernel/cpu/bugs.c
@@ -196,22 +196,15 @@ void __init check_bugs(void)
}
/*
- * NOTE: This function is *only* called for SVM. VMX spec_ctrl handling is
- * done in vmenter.S.
+ * NOTE: This function is *only* called for SVM, since Intel uses
+ * MSR_IA32_SPEC_CTRL for SSBD.
*/
void
-x86_virt_spec_ctrl(u64 guest_spec_ctrl, u64 guest_virt_spec_ctrl, bool setguest)
+x86_virt_spec_ctrl(u64 guest_virt_spec_ctrl, bool setguest)
{
- u64 msrval, guestval = guest_spec_ctrl, hostval = spec_ctrl_current();
+ u64 guestval, hostval;
struct thread_info *ti = current_thread_info();
- if (static_cpu_has(X86_FEATURE_MSR_SPEC_CTRL)) {
- if (hostval != guestval) {
- msrval = setguest ? guestval : hostval;
- wrmsrl(MSR_IA32_SPEC_CTRL, msrval);
- }
- }
-
/*
* If SSBD is not handled in MSR_SPEC_CTRL on AMD, update
* MSR_AMD64_L2_CFG or MSR_VIRT_SPEC_CTRL if supported.
diff --git a/arch/x86/kernel/traps.c b/arch/x86/kernel/traps.c
index 178015a820f0..d3fdec706f1d 100644
--- a/arch/x86/kernel/traps.c
+++ b/arch/x86/kernel/traps.c
@@ -15,6 +15,7 @@
#include <linux/context_tracking.h>
#include <linux/interrupt.h>
#include <linux/kallsyms.h>
+#include <linux/kmsan.h>
#include <linux/spinlock.h>
#include <linux/kprobes.h>
#include <linux/uaccess.h>
@@ -301,6 +302,12 @@ static noinstr bool handle_bug(struct pt_regs *regs)
{
bool handled = false;
+ /*
+ * Normally @regs are unpoisoned by irqentry_enter(), but handle_bug()
+ * is a rare case that uses @regs without passing them to
+ * irqentry_enter().
+ */
+ kmsan_unpoison_entry_regs(regs);
if (!is_valid_bugaddr(regs->ip))
return handled;
diff --git a/arch/x86/kvm/.gitignore b/arch/x86/kvm/.gitignore
new file mode 100644
index 000000000000..615d6ff35c00
--- /dev/null
+++ b/arch/x86/kvm/.gitignore
@@ -0,0 +1,2 @@
+/kvm-asm-offsets.s
+/kvm-asm-offsets.h
diff --git a/arch/x86/kvm/Makefile b/arch/x86/kvm/Makefile
index 30f244b64523..f453a0f96e24 100644
--- a/arch/x86/kvm/Makefile
+++ b/arch/x86/kvm/Makefile
@@ -34,3 +34,15 @@ endif
obj-$(CONFIG_KVM) += kvm.o
obj-$(CONFIG_KVM_INTEL) += kvm-intel.o
obj-$(CONFIG_KVM_AMD) += kvm-amd.o
+
+AFLAGS_svm/vmenter.o := -iquote $(obj)
+$(obj)/svm/vmenter.o: $(obj)/kvm-asm-offsets.h
+
+AFLAGS_vmx/vmenter.o := -iquote $(obj)
+$(obj)/vmx/vmenter.o: $(obj)/kvm-asm-offsets.h
+
+$(obj)/kvm-asm-offsets.h: $(obj)/kvm-asm-offsets.s FORCE
+ $(call filechk,offsets,__KVM_ASM_OFFSETS_H__)
+
+targets += kvm-asm-offsets.s
+clean-files += kvm-asm-offsets.h
diff --git a/arch/x86/kvm/kvm-asm-offsets.c b/arch/x86/kvm/kvm-asm-offsets.c
new file mode 100644
index 000000000000..24a710d37323
--- /dev/null
+++ b/arch/x86/kvm/kvm-asm-offsets.c
@@ -0,0 +1,29 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Generate definitions needed by assembly language modules.
+ * This code generates raw asm output which is post-processed to extract
+ * and format the required data.
+ */
+#define COMPILE_OFFSETS
+
+#include <linux/kbuild.h>
+#include "vmx/vmx.h"
+#include "svm/svm.h"
+
+static void __used common(void)
+{
+ if (IS_ENABLED(CONFIG_KVM_AMD)) {
+ BLANK();
+ OFFSET(SVM_vcpu_arch_regs, vcpu_svm, vcpu.arch.regs);
+ OFFSET(SVM_current_vmcb, vcpu_svm, current_vmcb);
+ OFFSET(SVM_spec_ctrl, vcpu_svm, spec_ctrl);
+ OFFSET(SVM_vmcb01, vcpu_svm, vmcb01);
+ OFFSET(KVM_VMCB_pa, kvm_vmcb_info, pa);
+ OFFSET(SD_save_area_pa, svm_cpu_data, save_area_pa);
+ }
+
+ if (IS_ENABLED(CONFIG_KVM_INTEL)) {
+ BLANK();
+ OFFSET(VMX_spec_ctrl, vcpu_vmx, spec_ctrl);
+ }
+}
diff --git a/arch/x86/kvm/mmu/mmu.c b/arch/x86/kvm/mmu/mmu.c
index 6f81539061d6..1ccb769f62af 100644
--- a/arch/x86/kvm/mmu/mmu.c
+++ b/arch/x86/kvm/mmu/mmu.c
@@ -6056,7 +6056,7 @@ void kvm_zap_gfn_range(struct kvm *kvm, gfn_t gfn_start, gfn_t gfn_end)
write_lock(&kvm->mmu_lock);
- kvm_mmu_invalidate_begin(kvm, gfn_start, gfn_end);
+ kvm_mmu_invalidate_begin(kvm, 0, -1ul);
flush = kvm_rmap_zap_gfn_range(kvm, gfn_start, gfn_end);
@@ -6070,7 +6070,7 @@ void kvm_zap_gfn_range(struct kvm *kvm, gfn_t gfn_start, gfn_t gfn_end)
kvm_flush_remote_tlbs_with_address(kvm, gfn_start,
gfn_end - gfn_start);
- kvm_mmu_invalidate_end(kvm, gfn_start, gfn_end);
+ kvm_mmu_invalidate_end(kvm, 0, -1ul);
write_unlock(&kvm->mmu_lock);
}
diff --git a/arch/x86/kvm/pmu.c b/arch/x86/kvm/pmu.c
index d9b9a0f0db17..de1fd7369736 100644
--- a/arch/x86/kvm/pmu.c
+++ b/arch/x86/kvm/pmu.c
@@ -56,7 +56,7 @@ static const struct x86_cpu_id vmx_icl_pebs_cpu[] = {
* code. Each pmc, stored in kvm_pmc.idx field, is unique across
* all perf counters (both gp and fixed). The mapping relationship
* between pmc and perf counters is as the following:
- * * Intel: [0 .. INTEL_PMC_MAX_GENERIC-1] <=> gp counters
+ * * Intel: [0 .. KVM_INTEL_PMC_MAX_GENERIC-1] <=> gp counters
* [INTEL_PMC_IDX_FIXED .. INTEL_PMC_IDX_FIXED + 2] <=> fixed
* * AMD: [0 .. AMD64_NUM_COUNTERS-1] and, for families 15H
* and later, [0 .. AMD64_NUM_COUNTERS_CORE-1] <=> gp counters
diff --git a/arch/x86/kvm/svm/pmu.c b/arch/x86/kvm/svm/pmu.c
index b68956299fa8..9d65cd095691 100644
--- a/arch/x86/kvm/svm/pmu.c
+++ b/arch/x86/kvm/svm/pmu.c
@@ -192,9 +192,10 @@ static void amd_pmu_init(struct kvm_vcpu *vcpu)
struct kvm_pmu *pmu = vcpu_to_pmu(vcpu);
int i;
- BUILD_BUG_ON(AMD64_NUM_COUNTERS_CORE > INTEL_PMC_MAX_GENERIC);
+ BUILD_BUG_ON(KVM_AMD_PMC_MAX_GENERIC > AMD64_NUM_COUNTERS_CORE);
+ BUILD_BUG_ON(KVM_AMD_PMC_MAX_GENERIC > INTEL_PMC_MAX_GENERIC);
- for (i = 0; i < AMD64_NUM_COUNTERS_CORE ; i++) {
+ for (i = 0; i < KVM_AMD_PMC_MAX_GENERIC ; i++) {
pmu->gp_counters[i].type = KVM_PMC_GP;
pmu->gp_counters[i].vcpu = vcpu;
pmu->gp_counters[i].idx = i;
@@ -207,7 +208,7 @@ static void amd_pmu_reset(struct kvm_vcpu *vcpu)
struct kvm_pmu *pmu = vcpu_to_pmu(vcpu);
int i;
- for (i = 0; i < AMD64_NUM_COUNTERS_CORE; i++) {
+ for (i = 0; i < KVM_AMD_PMC_MAX_GENERIC; i++) {
struct kvm_pmc *pmc = &pmu->gp_counters[i];
pmc_stop_counter(pmc);
diff --git a/arch/x86/kvm/svm/sev.c b/arch/x86/kvm/svm/sev.c
index 28064060413a..efaaef2b7ae1 100644
--- a/arch/x86/kvm/svm/sev.c
+++ b/arch/x86/kvm/svm/sev.c
@@ -196,7 +196,7 @@ static void sev_asid_free(struct kvm_sev_info *sev)
__set_bit(sev->asid, sev_reclaim_asid_bitmap);
for_each_possible_cpu(cpu) {
- sd = per_cpu(svm_data, cpu);
+ sd = per_cpu_ptr(&svm_data, cpu);
sd->sev_vmcbs[sev->asid] = NULL;
}
@@ -605,7 +605,7 @@ static int sev_es_sync_vmsa(struct vcpu_svm *svm)
save->dr6 = svm->vcpu.arch.dr6;
pr_debug("Virtual Machine Save Area (VMSA):\n");
- print_hex_dump(KERN_CONT, "", DUMP_PREFIX_NONE, 16, 1, save, sizeof(*save), false);
+ print_hex_dump_debug("", DUMP_PREFIX_NONE, 16, 1, save, sizeof(*save), false);
return 0;
}
@@ -2600,7 +2600,7 @@ void sev_es_unmap_ghcb(struct vcpu_svm *svm)
void pre_sev_run(struct vcpu_svm *svm, int cpu)
{
- struct svm_cpu_data *sd = per_cpu(svm_data, cpu);
+ struct svm_cpu_data *sd = per_cpu_ptr(&svm_data, cpu);
int asid = sev_get_asid(svm->vcpu.kvm);
/* Assign the asid allocated with this SEV guest */
diff --git a/arch/x86/kvm/svm/svm.c b/arch/x86/kvm/svm/svm.c
index 58f0077d9357..9f88c8e6766e 100644
--- a/arch/x86/kvm/svm/svm.c
+++ b/arch/x86/kvm/svm/svm.c
@@ -245,7 +245,7 @@ struct kvm_ldttss_desc {
u32 zero1;
} __attribute__((packed));
-DEFINE_PER_CPU(struct svm_cpu_data *, svm_data);
+DEFINE_PER_CPU(struct svm_cpu_data, svm_data);
/*
* Only MSR_TSC_AUX is switched via the user return hook. EFER is switched via
@@ -581,12 +581,7 @@ static int svm_hardware_enable(void)
pr_err("%s: err EOPNOTSUPP on %d\n", __func__, me);
return -EINVAL;
}
- sd = per_cpu(svm_data, me);
- if (!sd) {
- pr_err("%s: svm_data is NULL on %d\n", __func__, me);
- return -EINVAL;
- }
-
+ sd = per_cpu_ptr(&svm_data, me);
sd->asid_generation = 1;
sd->max_asid = cpuid_ebx(SVM_CPUID_FUNC) - 1;
sd->next_asid = sd->max_asid + 1;
@@ -597,7 +592,7 @@ static int svm_hardware_enable(void)
wrmsrl(MSR_EFER, efer | EFER_SVME);
- wrmsrl(MSR_VM_HSAVE_PA, __sme_page_pa(sd->save_area));
+ wrmsrl(MSR_VM_HSAVE_PA, sd->save_area_pa);
if (static_cpu_has(X86_FEATURE_TSCRATEMSR)) {
/*
@@ -646,42 +641,37 @@ static int svm_hardware_enable(void)
static void svm_cpu_uninit(int cpu)
{
- struct svm_cpu_data *sd = per_cpu(svm_data, cpu);
+ struct svm_cpu_data *sd = per_cpu_ptr(&svm_data, cpu);
- if (!sd)
+ if (!sd->save_area)
return;
- per_cpu(svm_data, cpu) = NULL;
kfree(sd->sev_vmcbs);
__free_page(sd->save_area);
- kfree(sd);
+ sd->save_area_pa = 0;
+ sd->save_area = NULL;
}
static int svm_cpu_init(int cpu)
{
- struct svm_cpu_data *sd;
+ struct svm_cpu_data *sd = per_cpu_ptr(&svm_data, cpu);
int ret = -ENOMEM;
- sd = kzalloc(sizeof(struct svm_cpu_data), GFP_KERNEL);
- if (!sd)
- return ret;
- sd->cpu = cpu;
+ memset(sd, 0, sizeof(struct svm_cpu_data));
sd->save_area = alloc_page(GFP_KERNEL | __GFP_ZERO);
if (!sd->save_area)
- goto free_cpu_data;
+ return ret;
ret = sev_cpu_init(sd);
if (ret)
goto free_save_area;
- per_cpu(svm_data, cpu) = sd;
-
+ sd->save_area_pa = __sme_page_pa(sd->save_area);
return 0;
free_save_area:
__free_page(sd->save_area);
-free_cpu_data:
- kfree(sd);
+ sd->save_area = NULL;
return ret;
}
@@ -730,6 +720,15 @@ static bool msr_write_intercepted(struct kvm_vcpu *vcpu, u32 msr)
u32 offset;
u32 *msrpm;
+ /*
+ * For non-nested case:
+ * If the L01 MSR bitmap does not intercept the MSR, then we need to
+ * save it.
+ *
+ * For nested case:
+ * If the L02 MSR bitmap does not intercept the MSR, then we need to
+ * save it.
+ */
msrpm = is_guest_mode(vcpu) ? to_svm(vcpu)->nested.msrpm:
to_svm(vcpu)->msrpm;
@@ -1425,7 +1424,7 @@ static void svm_clear_current_vmcb(struct vmcb *vmcb)
int i;
for_each_online_cpu(i)
- cmpxchg(&per_cpu(svm_data, i)->current_vmcb, vmcb, NULL);
+ cmpxchg(per_cpu_ptr(&svm_data.current_vmcb, i), vmcb, NULL);
}
static void svm_vcpu_free(struct kvm_vcpu *vcpu)
@@ -1450,7 +1449,7 @@ static void svm_vcpu_free(struct kvm_vcpu *vcpu)
static void svm_prepare_switch_to_guest(struct kvm_vcpu *vcpu)
{
struct vcpu_svm *svm = to_svm(vcpu);
- struct svm_cpu_data *sd = per_cpu(svm_data, vcpu->cpu);
+ struct svm_cpu_data *sd = per_cpu_ptr(&svm_data, vcpu->cpu);
if (sev_es_guest(vcpu->kvm))
sev_es_unmap_ghcb(svm);
@@ -1462,7 +1461,7 @@ static void svm_prepare_switch_to_guest(struct kvm_vcpu *vcpu)
* Save additional host state that will be restored on VMEXIT (sev-es)
* or subsequent vmload of host save area.
*/
- vmsave(__sme_page_pa(sd->save_area));
+ vmsave(sd->save_area_pa);
if (sev_es_guest(vcpu->kvm)) {
struct sev_es_save_area *hostsa;
hostsa = (struct sev_es_save_area *)(page_address(sd->save_area) + 0x400);
@@ -1487,7 +1486,7 @@ static void svm_prepare_host_switch(struct kvm_vcpu *vcpu)
static void svm_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
{
struct vcpu_svm *svm = to_svm(vcpu);
- struct svm_cpu_data *sd = per_cpu(svm_data, cpu);
+ struct svm_cpu_data *sd = per_cpu_ptr(&svm_data, cpu);
if (sd->current_vmcb != svm->vmcb) {
sd->current_vmcb = svm->vmcb;
@@ -3443,7 +3442,7 @@ static int svm_handle_exit(struct kvm_vcpu *vcpu, fastpath_t exit_fastpath)
static void reload_tss(struct kvm_vcpu *vcpu)
{
- struct svm_cpu_data *sd = per_cpu(svm_data, vcpu->cpu);
+ struct svm_cpu_data *sd = per_cpu_ptr(&svm_data, vcpu->cpu);
sd->tss_desc->type = 9; /* available 32/64-bit TSS */
load_TR_desc();
@@ -3451,7 +3450,7 @@ static void reload_tss(struct kvm_vcpu *vcpu)
static void pre_svm_run(struct kvm_vcpu *vcpu)
{
- struct svm_cpu_data *sd = per_cpu(svm_data, vcpu->cpu);
+ struct svm_cpu_data *sd = per_cpu_ptr(&svm_data, vcpu->cpu);
struct vcpu_svm *svm = to_svm(vcpu);
/*
@@ -3911,30 +3910,16 @@ static fastpath_t svm_exit_handlers_fastpath(struct kvm_vcpu *vcpu)
return EXIT_FASTPATH_NONE;
}
-static noinstr void svm_vcpu_enter_exit(struct kvm_vcpu *vcpu)
+static noinstr void svm_vcpu_enter_exit(struct kvm_vcpu *vcpu, bool spec_ctrl_intercepted)
{
struct vcpu_svm *svm = to_svm(vcpu);
- unsigned long vmcb_pa = svm->current_vmcb->pa;
guest_state_enter_irqoff();
- if (sev_es_guest(vcpu->kvm)) {
- __svm_sev_es_vcpu_run(vmcb_pa);
- } else {
- struct svm_cpu_data *sd = per_cpu(svm_data, vcpu->cpu);
-
- /*
- * Use a single vmcb (vmcb01 because it's always valid) for
- * context switching guest state via VMLOAD/VMSAVE, that way
- * the state doesn't need to be copied between vmcb01 and
- * vmcb02 when switching vmcbs for nested virtualization.
- */
- vmload(svm->vmcb01.pa);
- __svm_vcpu_run(vmcb_pa, (unsigned long *)&vcpu->arch.regs);
- vmsave(svm->vmcb01.pa);
-
- vmload(__sme_page_pa(sd->save_area));
- }
+ if (sev_es_guest(vcpu->kvm))
+ __svm_sev_es_vcpu_run(svm, spec_ctrl_intercepted);
+ else
+ __svm_vcpu_run(svm, spec_ctrl_intercepted);
guest_state_exit_irqoff();
}
@@ -3942,6 +3927,7 @@ static noinstr void svm_vcpu_enter_exit(struct kvm_vcpu *vcpu)
static __no_kcsan fastpath_t svm_vcpu_run(struct kvm_vcpu *vcpu)
{
struct vcpu_svm *svm = to_svm(vcpu);
+ bool spec_ctrl_intercepted = msr_write_intercepted(vcpu, MSR_IA32_SPEC_CTRL);
trace_kvm_entry(vcpu);
@@ -3998,34 +3984,15 @@ static __no_kcsan fastpath_t svm_vcpu_run(struct kvm_vcpu *vcpu)
* being speculatively taken.
*/
if (!static_cpu_has(X86_FEATURE_V_SPEC_CTRL))
- x86_spec_ctrl_set_guest(svm->spec_ctrl, svm->virt_spec_ctrl);
+ x86_spec_ctrl_set_guest(svm->virt_spec_ctrl);
- svm_vcpu_enter_exit(vcpu);
-
- /*
- * We do not use IBRS in the kernel. If this vCPU has used the
- * SPEC_CTRL MSR it may have left it on; save the value and
- * turn it off. This is much more efficient than blindly adding
- * it to the atomic save/restore list. Especially as the former
- * (Saving guest MSRs on vmexit) doesn't even exist in KVM.
- *
- * For non-nested case:
- * If the L01 MSR bitmap does not intercept the MSR, then we need to
- * save it.
- *
- * For nested case:
- * If the L02 MSR bitmap does not intercept the MSR, then we need to
- * save it.
- */
- if (!static_cpu_has(X86_FEATURE_V_SPEC_CTRL) &&
- unlikely(!msr_write_intercepted(vcpu, MSR_IA32_SPEC_CTRL)))
- svm->spec_ctrl = native_read_msr(MSR_IA32_SPEC_CTRL);
+ svm_vcpu_enter_exit(vcpu, spec_ctrl_intercepted);
if (!sev_es_guest(vcpu->kvm))
reload_tss(vcpu);
if (!static_cpu_has(X86_FEATURE_V_SPEC_CTRL))
- x86_spec_ctrl_restore_host(svm->spec_ctrl, svm->virt_spec_ctrl);
+ x86_spec_ctrl_restore_host(svm->virt_spec_ctrl);
if (!sev_es_guest(vcpu->kvm)) {
vcpu->arch.cr2 = svm->vmcb->save.cr2;
diff --git a/arch/x86/kvm/svm/svm.h b/arch/x86/kvm/svm/svm.h
index 6a7686bf6900..199a2ecef1ce 100644
--- a/arch/x86/kvm/svm/svm.h
+++ b/arch/x86/kvm/svm/svm.h
@@ -209,7 +209,6 @@ struct vcpu_svm {
struct vmcb *vmcb;
struct kvm_vmcb_info vmcb01;
struct kvm_vmcb_info *current_vmcb;
- struct svm_cpu_data *svm_data;
u32 asid;
u32 sysenter_esp_hi;
u32 sysenter_eip_hi;
@@ -281,8 +280,6 @@ struct vcpu_svm {
};
struct svm_cpu_data {
- int cpu;
-
u64 asid_generation;
u32 max_asid;
u32 next_asid;
@@ -290,13 +287,15 @@ struct svm_cpu_data {
struct kvm_ldttss_desc *tss_desc;
struct page *save_area;
+ unsigned long save_area_pa;
+
struct vmcb *current_vmcb;
/* index = sev_asid, value = vmcb pointer */
struct vmcb **sev_vmcbs;
};
-DECLARE_PER_CPU(struct svm_cpu_data *, svm_data);
+DECLARE_PER_CPU(struct svm_cpu_data, svm_data);
void recalc_intercepts(struct vcpu_svm *svm);
@@ -683,7 +682,7 @@ void sev_es_unmap_ghcb(struct vcpu_svm *svm);
/* vmenter.S */
-void __svm_sev_es_vcpu_run(unsigned long vmcb_pa);
-void __svm_vcpu_run(unsigned long vmcb_pa, unsigned long *regs);
+void __svm_sev_es_vcpu_run(struct vcpu_svm *svm, bool spec_ctrl_intercepted);
+void __svm_vcpu_run(struct vcpu_svm *svm, bool spec_ctrl_intercepted);
#endif
diff --git a/arch/x86/kvm/svm/svm_ops.h b/arch/x86/kvm/svm/svm_ops.h
index 9430d6437c9f..36c8af87a707 100644
--- a/arch/x86/kvm/svm/svm_ops.h
+++ b/arch/x86/kvm/svm/svm_ops.h
@@ -61,9 +61,4 @@ static __always_inline void vmsave(unsigned long pa)
svm_asm1(vmsave, "a" (pa), "memory");
}
-static __always_inline void vmload(unsigned long pa)
-{
- svm_asm1(vmload, "a" (pa), "memory");
-}
-
#endif /* __KVM_X86_SVM_OPS_H */
diff --git a/arch/x86/kvm/svm/vmenter.S b/arch/x86/kvm/svm/vmenter.S
index 723f8534986c..34367dc203f2 100644
--- a/arch/x86/kvm/svm/vmenter.S
+++ b/arch/x86/kvm/svm/vmenter.S
@@ -4,35 +4,97 @@
#include <asm/bitsperlong.h>
#include <asm/kvm_vcpu_regs.h>
#include <asm/nospec-branch.h>
+#include "kvm-asm-offsets.h"
#define WORD_SIZE (BITS_PER_LONG / 8)
/* Intentionally omit RAX as it's context switched by hardware */
-#define VCPU_RCX __VCPU_REGS_RCX * WORD_SIZE
-#define VCPU_RDX __VCPU_REGS_RDX * WORD_SIZE
-#define VCPU_RBX __VCPU_REGS_RBX * WORD_SIZE
+#define VCPU_RCX (SVM_vcpu_arch_regs + __VCPU_REGS_RCX * WORD_SIZE)
+#define VCPU_RDX (SVM_vcpu_arch_regs + __VCPU_REGS_RDX * WORD_SIZE)
+#define VCPU_RBX (SVM_vcpu_arch_regs + __VCPU_REGS_RBX * WORD_SIZE)
/* Intentionally omit RSP as it's context switched by hardware */
-#define VCPU_RBP __VCPU_REGS_RBP * WORD_SIZE
-#define VCPU_RSI __VCPU_REGS_RSI * WORD_SIZE
-#define VCPU_RDI __VCPU_REGS_RDI * WORD_SIZE
+#define VCPU_RBP (SVM_vcpu_arch_regs + __VCPU_REGS_RBP * WORD_SIZE)
+#define VCPU_RSI (SVM_vcpu_arch_regs + __VCPU_REGS_RSI * WORD_SIZE)
+#define VCPU_RDI (SVM_vcpu_arch_regs + __VCPU_REGS_RDI * WORD_SIZE)
#ifdef CONFIG_X86_64
-#define VCPU_R8 __VCPU_REGS_R8 * WORD_SIZE
-#define VCPU_R9 __VCPU_REGS_R9 * WORD_SIZE
-#define VCPU_R10 __VCPU_REGS_R10 * WORD_SIZE
-#define VCPU_R11 __VCPU_REGS_R11 * WORD_SIZE
-#define VCPU_R12 __VCPU_REGS_R12 * WORD_SIZE
-#define VCPU_R13 __VCPU_REGS_R13 * WORD_SIZE
-#define VCPU_R14 __VCPU_REGS_R14 * WORD_SIZE
-#define VCPU_R15 __VCPU_REGS_R15 * WORD_SIZE
+#define VCPU_R8 (SVM_vcpu_arch_regs + __VCPU_REGS_R8 * WORD_SIZE)
+#define VCPU_R9 (SVM_vcpu_arch_regs + __VCPU_REGS_R9 * WORD_SIZE)
+#define VCPU_R10 (SVM_vcpu_arch_regs + __VCPU_REGS_R10 * WORD_SIZE)
+#define VCPU_R11 (SVM_vcpu_arch_regs + __VCPU_REGS_R11 * WORD_SIZE)
+#define VCPU_R12 (SVM_vcpu_arch_regs + __VCPU_REGS_R12 * WORD_SIZE)
+#define VCPU_R13 (SVM_vcpu_arch_regs + __VCPU_REGS_R13 * WORD_SIZE)
+#define VCPU_R14 (SVM_vcpu_arch_regs + __VCPU_REGS_R14 * WORD_SIZE)
+#define VCPU_R15 (SVM_vcpu_arch_regs + __VCPU_REGS_R15 * WORD_SIZE)
#endif
+#define SVM_vmcb01_pa (SVM_vmcb01 + KVM_VMCB_pa)
+
.section .noinstr.text, "ax"
+.macro RESTORE_GUEST_SPEC_CTRL
+ /* No need to do anything if SPEC_CTRL is unset or V_SPEC_CTRL is set */
+ ALTERNATIVE_2 "", \
+ "jmp 800f", X86_FEATURE_MSR_SPEC_CTRL, \
+ "", X86_FEATURE_V_SPEC_CTRL
+801:
+.endm
+.macro RESTORE_GUEST_SPEC_CTRL_BODY
+800:
+ /*
+ * SPEC_CTRL handling: if the guest's SPEC_CTRL value differs from the
+ * host's, write the MSR. This is kept out-of-line so that the common
+ * case does not have to jump.
+ *
+ * IMPORTANT: To avoid RSB underflow attacks and any other nastiness,
+ * there must not be any returns or indirect branches between this code
+ * and vmentry.
+ */
+ movl SVM_spec_ctrl(%_ASM_DI), %eax
+ cmp PER_CPU_VAR(x86_spec_ctrl_current), %eax
+ je 801b
+ mov $MSR_IA32_SPEC_CTRL, %ecx
+ xor %edx, %edx
+ wrmsr
+ jmp 801b
+.endm
+
+.macro RESTORE_HOST_SPEC_CTRL
+ /* No need to do anything if SPEC_CTRL is unset or V_SPEC_CTRL is set */
+ ALTERNATIVE_2 "", \
+ "jmp 900f", X86_FEATURE_MSR_SPEC_CTRL, \
+ "", X86_FEATURE_V_SPEC_CTRL
+901:
+.endm
+.macro RESTORE_HOST_SPEC_CTRL_BODY
+900:
+ /* Same for after vmexit. */
+ mov $MSR_IA32_SPEC_CTRL, %ecx
+
+ /*
+ * Load the value that the guest had written into MSR_IA32_SPEC_CTRL,
+ * if it was not intercepted during guest execution.
+ */
+ cmpb $0, (%_ASM_SP)
+ jnz 998f
+ rdmsr
+ movl %eax, SVM_spec_ctrl(%_ASM_DI)
+998:
+
+ /* Now restore the host value of the MSR if different from the guest's. */
+ movl PER_CPU_VAR(x86_spec_ctrl_current), %eax
+ cmp SVM_spec_ctrl(%_ASM_DI), %eax
+ je 901b
+ xor %edx, %edx
+ wrmsr
+ jmp 901b
+.endm
+
+
/**
* __svm_vcpu_run - Run a vCPU via a transition to SVM guest mode
- * @vmcb_pa: unsigned long
- * @regs: unsigned long * (to guest registers)
+ * @svm: struct vcpu_svm *
+ * @spec_ctrl_intercepted: bool
*/
SYM_FUNC_START(__svm_vcpu_run)
push %_ASM_BP
@@ -47,49 +109,71 @@ SYM_FUNC_START(__svm_vcpu_run)
#endif
push %_ASM_BX
- /* Save @regs. */
+ /*
+ * Save variables needed after vmexit on the stack, in inverse
+ * order compared to when they are needed.
+ */
+
+ /* Accessed directly from the stack in RESTORE_HOST_SPEC_CTRL. */
push %_ASM_ARG2
- /* Save @vmcb. */
+ /* Needed to restore access to percpu variables. */
+ __ASM_SIZE(push) PER_CPU_VAR(svm_data + SD_save_area_pa)
+
+ /* Finally save @svm. */
push %_ASM_ARG1
- /* Move @regs to RAX. */
- mov %_ASM_ARG2, %_ASM_AX
+.ifnc _ASM_ARG1, _ASM_DI
+ /*
+ * Stash @svm in RDI early. On 32-bit, arguments are in RAX, RCX
+ * and RDX which are clobbered by RESTORE_GUEST_SPEC_CTRL.
+ */
+ mov %_ASM_ARG1, %_ASM_DI
+.endif
+
+ /* Clobbers RAX, RCX, RDX. */
+ RESTORE_GUEST_SPEC_CTRL
+
+ /*
+ * Use a single vmcb (vmcb01 because it's always valid) for
+ * context switching guest state via VMLOAD/VMSAVE, that way
+ * the state doesn't need to be copied between vmcb01 and
+ * vmcb02 when switching vmcbs for nested virtualization.
+ */
+ mov SVM_vmcb01_pa(%_ASM_DI), %_ASM_AX
+1: vmload %_ASM_AX
+2:
+
+ /* Get svm->current_vmcb->pa into RAX. */
+ mov SVM_current_vmcb(%_ASM_DI), %_ASM_AX
+ mov KVM_VMCB_pa(%_ASM_AX), %_ASM_AX
/* Load guest registers. */
- mov VCPU_RCX(%_ASM_AX), %_ASM_CX
- mov VCPU_RDX(%_ASM_AX), %_ASM_DX
- mov VCPU_RBX(%_ASM_AX), %_ASM_BX
- mov VCPU_RBP(%_ASM_AX), %_ASM_BP
- mov VCPU_RSI(%_ASM_AX), %_ASM_SI
- mov VCPU_RDI(%_ASM_AX), %_ASM_DI
+ mov VCPU_RCX(%_ASM_DI), %_ASM_CX
+ mov VCPU_RDX(%_ASM_DI), %_ASM_DX
+ mov VCPU_RBX(%_ASM_DI), %_ASM_BX
+ mov VCPU_RBP(%_ASM_DI), %_ASM_BP
+ mov VCPU_RSI(%_ASM_DI), %_ASM_SI
#ifdef CONFIG_X86_64
- mov VCPU_R8 (%_ASM_AX), %r8
- mov VCPU_R9 (%_ASM_AX), %r9
- mov VCPU_R10(%_ASM_AX), %r10
- mov VCPU_R11(%_ASM_AX), %r11
- mov VCPU_R12(%_ASM_AX), %r12
- mov VCPU_R13(%_ASM_AX), %r13
- mov VCPU_R14(%_ASM_AX), %r14
- mov VCPU_R15(%_ASM_AX), %r15
+ mov VCPU_R8 (%_ASM_DI), %r8
+ mov VCPU_R9 (%_ASM_DI), %r9
+ mov VCPU_R10(%_ASM_DI), %r10
+ mov VCPU_R11(%_ASM_DI), %r11
+ mov VCPU_R12(%_ASM_DI), %r12
+ mov VCPU_R13(%_ASM_DI), %r13
+ mov VCPU_R14(%_ASM_DI), %r14
+ mov VCPU_R15(%_ASM_DI), %r15
#endif
-
- /* "POP" @vmcb to RAX. */
- pop %_ASM_AX
+ mov VCPU_RDI(%_ASM_DI), %_ASM_DI
/* Enter guest mode */
sti
-1: vmrun %_ASM_AX
-
-2: cli
-
-#ifdef CONFIG_RETPOLINE
- /* IMPORTANT: Stuff the RSB immediately after VM-Exit, before RET! */
- FILL_RETURN_BUFFER %_ASM_AX, RSB_CLEAR_LOOPS, X86_FEATURE_RETPOLINE
-#endif
+3: vmrun %_ASM_AX
+4:
+ cli
- /* "POP" @regs to RAX. */
+ /* Pop @svm to RAX while it's the only available register. */
pop %_ASM_AX
/* Save all guest registers. */
@@ -110,6 +194,26 @@ SYM_FUNC_START(__svm_vcpu_run)
mov %r15, VCPU_R15(%_ASM_AX)
#endif
+ /* @svm can stay in RDI from now on. */
+ mov %_ASM_AX, %_ASM_DI
+
+ mov SVM_vmcb01_pa(%_ASM_DI), %_ASM_AX
+5: vmsave %_ASM_AX
+6:
+
+ /* Restores GSBASE among other things, allowing access to percpu data. */
+ pop %_ASM_AX
+7: vmload %_ASM_AX
+8:
+
+#ifdef CONFIG_RETPOLINE
+ /* IMPORTANT: Stuff the RSB immediately after VM-Exit, before RET! */
+ FILL_RETURN_BUFFER %_ASM_AX, RSB_CLEAR_LOOPS, X86_FEATURE_RETPOLINE
+#endif
+
+ /* Clobbers RAX, RCX, RDX. */
+ RESTORE_HOST_SPEC_CTRL
+
/*
* Mitigate RETBleed for AMD/Hygon Zen uarch. RET should be
* untrained as soon as we exit the VM and are back to the
@@ -145,6 +249,9 @@ SYM_FUNC_START(__svm_vcpu_run)
xor %r15d, %r15d
#endif
+ /* "Pop" @spec_ctrl_intercepted. */
+ pop %_ASM_BX
+
pop %_ASM_BX
#ifdef CONFIG_X86_64
@@ -159,17 +266,33 @@ SYM_FUNC_START(__svm_vcpu_run)
pop %_ASM_BP
RET
-3: cmpb $0, kvm_rebooting
+ RESTORE_GUEST_SPEC_CTRL_BODY
+ RESTORE_HOST_SPEC_CTRL_BODY
+
+10: cmpb $0, kvm_rebooting
jne 2b
ud2
+30: cmpb $0, kvm_rebooting
+ jne 4b
+ ud2
+50: cmpb $0, kvm_rebooting
+ jne 6b
+ ud2
+70: cmpb $0, kvm_rebooting
+ jne 8b
+ ud2
- _ASM_EXTABLE(1b, 3b)
+ _ASM_EXTABLE(1b, 10b)
+ _ASM_EXTABLE(3b, 30b)
+ _ASM_EXTABLE(5b, 50b)
+ _ASM_EXTABLE(7b, 70b)
SYM_FUNC_END(__svm_vcpu_run)
/**
* __svm_sev_es_vcpu_run - Run a SEV-ES vCPU via a transition to SVM guest mode
- * @vmcb_pa: unsigned long
+ * @svm: struct vcpu_svm *
+ * @spec_ctrl_intercepted: bool
*/
SYM_FUNC_START(__svm_sev_es_vcpu_run)
push %_ASM_BP
@@ -184,8 +307,31 @@ SYM_FUNC_START(__svm_sev_es_vcpu_run)
#endif
push %_ASM_BX
- /* Move @vmcb to RAX. */
- mov %_ASM_ARG1, %_ASM_AX
+ /*
+ * Save variables needed after vmexit on the stack, in inverse
+ * order compared to when they are needed.
+ */
+
+ /* Accessed directly from the stack in RESTORE_HOST_SPEC_CTRL. */
+ push %_ASM_ARG2
+
+ /* Save @svm. */
+ push %_ASM_ARG1
+
+.ifnc _ASM_ARG1, _ASM_DI
+ /*
+ * Stash @svm in RDI early. On 32-bit, arguments are in RAX, RCX
+ * and RDX which are clobbered by RESTORE_GUEST_SPEC_CTRL.
+ */
+ mov %_ASM_ARG1, %_ASM_DI
+.endif
+
+ /* Clobbers RAX, RCX, RDX. */
+ RESTORE_GUEST_SPEC_CTRL
+
+ /* Get svm->current_vmcb->pa into RAX. */
+ mov SVM_current_vmcb(%_ASM_DI), %_ASM_AX
+ mov KVM_VMCB_pa(%_ASM_AX), %_ASM_AX
/* Enter guest mode */
sti
@@ -194,11 +340,17 @@ SYM_FUNC_START(__svm_sev_es_vcpu_run)
2: cli
+ /* Pop @svm to RDI, guest registers have been saved already. */
+ pop %_ASM_DI
+
#ifdef CONFIG_RETPOLINE
/* IMPORTANT: Stuff the RSB immediately after VM-Exit, before RET! */
FILL_RETURN_BUFFER %_ASM_AX, RSB_CLEAR_LOOPS, X86_FEATURE_RETPOLINE
#endif
+ /* Clobbers RAX, RCX, RDX. */
+ RESTORE_HOST_SPEC_CTRL
+
/*
* Mitigate RETBleed for AMD/Hygon Zen uarch. RET should be
* untrained as soon as we exit the VM and are back to the
@@ -208,6 +360,9 @@ SYM_FUNC_START(__svm_sev_es_vcpu_run)
*/
UNTRAIN_RET
+ /* "Pop" @spec_ctrl_intercepted. */
+ pop %_ASM_BX
+
pop %_ASM_BX
#ifdef CONFIG_X86_64
@@ -222,6 +377,9 @@ SYM_FUNC_START(__svm_sev_es_vcpu_run)
pop %_ASM_BP
RET
+ RESTORE_GUEST_SPEC_CTRL_BODY
+ RESTORE_HOST_SPEC_CTRL_BODY
+
3: cmpb $0, kvm_rebooting
jne 2b
ud2
diff --git a/arch/x86/kvm/vmx/pmu_intel.c b/arch/x86/kvm/vmx/pmu_intel.c
index 25b70a85bef5..10b33da9bd05 100644
--- a/arch/x86/kvm/vmx/pmu_intel.c
+++ b/arch/x86/kvm/vmx/pmu_intel.c
@@ -617,7 +617,7 @@ static void intel_pmu_init(struct kvm_vcpu *vcpu)
struct kvm_pmu *pmu = vcpu_to_pmu(vcpu);
struct lbr_desc *lbr_desc = vcpu_to_lbr_desc(vcpu);
- for (i = 0; i < INTEL_PMC_MAX_GENERIC; i++) {
+ for (i = 0; i < KVM_INTEL_PMC_MAX_GENERIC; i++) {
pmu->gp_counters[i].type = KVM_PMC_GP;
pmu->gp_counters[i].vcpu = vcpu;
pmu->gp_counters[i].idx = i;
@@ -643,7 +643,7 @@ static void intel_pmu_reset(struct kvm_vcpu *vcpu)
struct kvm_pmc *pmc = NULL;
int i;
- for (i = 0; i < INTEL_PMC_MAX_GENERIC; i++) {
+ for (i = 0; i < KVM_INTEL_PMC_MAX_GENERIC; i++) {
pmc = &pmu->gp_counters[i];
pmc_stop_counter(pmc);
diff --git a/arch/x86/kvm/vmx/vmenter.S b/arch/x86/kvm/vmx/vmenter.S
index 8477d8bdd69c..0b5db4de4d09 100644
--- a/arch/x86/kvm/vmx/vmenter.S
+++ b/arch/x86/kvm/vmx/vmenter.S
@@ -1,12 +1,12 @@
/* SPDX-License-Identifier: GPL-2.0 */
#include <linux/linkage.h>
#include <asm/asm.h>
-#include <asm/asm-offsets.h>
#include <asm/bitsperlong.h>
#include <asm/kvm_vcpu_regs.h>
#include <asm/nospec-branch.h>
#include <asm/percpu.h>
#include <asm/segment.h>
+#include "kvm-asm-offsets.h"
#include "run_flags.h"
#define WORD_SIZE (BITS_PER_LONG / 8)
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 5f5eb577d583..ecea83f0da49 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -1438,32 +1438,27 @@ static const u32 msrs_to_save_all[] = {
MSR_ARCH_PERFMON_FIXED_CTR0 + 2,
MSR_CORE_PERF_FIXED_CTR_CTRL, MSR_CORE_PERF_GLOBAL_STATUS,
MSR_CORE_PERF_GLOBAL_CTRL, MSR_CORE_PERF_GLOBAL_OVF_CTRL,
+ MSR_IA32_PEBS_ENABLE, MSR_IA32_DS_AREA, MSR_PEBS_DATA_CFG,
+
+ /* This part of MSRs should match KVM_INTEL_PMC_MAX_GENERIC. */
MSR_ARCH_PERFMON_PERFCTR0, MSR_ARCH_PERFMON_PERFCTR1,
MSR_ARCH_PERFMON_PERFCTR0 + 2, MSR_ARCH_PERFMON_PERFCTR0 + 3,
MSR_ARCH_PERFMON_PERFCTR0 + 4, MSR_ARCH_PERFMON_PERFCTR0 + 5,
MSR_ARCH_PERFMON_PERFCTR0 + 6, MSR_ARCH_PERFMON_PERFCTR0 + 7,
- MSR_ARCH_PERFMON_PERFCTR0 + 8, MSR_ARCH_PERFMON_PERFCTR0 + 9,
- MSR_ARCH_PERFMON_PERFCTR0 + 10, MSR_ARCH_PERFMON_PERFCTR0 + 11,
- MSR_ARCH_PERFMON_PERFCTR0 + 12, MSR_ARCH_PERFMON_PERFCTR0 + 13,
- MSR_ARCH_PERFMON_PERFCTR0 + 14, MSR_ARCH_PERFMON_PERFCTR0 + 15,
- MSR_ARCH_PERFMON_PERFCTR0 + 16, MSR_ARCH_PERFMON_PERFCTR0 + 17,
MSR_ARCH_PERFMON_EVENTSEL0, MSR_ARCH_PERFMON_EVENTSEL1,
MSR_ARCH_PERFMON_EVENTSEL0 + 2, MSR_ARCH_PERFMON_EVENTSEL0 + 3,
MSR_ARCH_PERFMON_EVENTSEL0 + 4, MSR_ARCH_PERFMON_EVENTSEL0 + 5,
MSR_ARCH_PERFMON_EVENTSEL0 + 6, MSR_ARCH_PERFMON_EVENTSEL0 + 7,
- MSR_ARCH_PERFMON_EVENTSEL0 + 8, MSR_ARCH_PERFMON_EVENTSEL0 + 9,
- MSR_ARCH_PERFMON_EVENTSEL0 + 10, MSR_ARCH_PERFMON_EVENTSEL0 + 11,
- MSR_ARCH_PERFMON_EVENTSEL0 + 12, MSR_ARCH_PERFMON_EVENTSEL0 + 13,
- MSR_ARCH_PERFMON_EVENTSEL0 + 14, MSR_ARCH_PERFMON_EVENTSEL0 + 15,
- MSR_ARCH_PERFMON_EVENTSEL0 + 16, MSR_ARCH_PERFMON_EVENTSEL0 + 17,
- MSR_IA32_PEBS_ENABLE, MSR_IA32_DS_AREA, MSR_PEBS_DATA_CFG,
MSR_K7_EVNTSEL0, MSR_K7_EVNTSEL1, MSR_K7_EVNTSEL2, MSR_K7_EVNTSEL3,
MSR_K7_PERFCTR0, MSR_K7_PERFCTR1, MSR_K7_PERFCTR2, MSR_K7_PERFCTR3,
+
+ /* This part of MSRs should match KVM_AMD_PMC_MAX_GENERIC. */
MSR_F15H_PERF_CTL0, MSR_F15H_PERF_CTL1, MSR_F15H_PERF_CTL2,
MSR_F15H_PERF_CTL3, MSR_F15H_PERF_CTL4, MSR_F15H_PERF_CTL5,
MSR_F15H_PERF_CTR0, MSR_F15H_PERF_CTR1, MSR_F15H_PERF_CTR2,
MSR_F15H_PERF_CTR3, MSR_F15H_PERF_CTR4, MSR_F15H_PERF_CTR5,
+
MSR_IA32_XFD, MSR_IA32_XFD_ERR,
};
@@ -7041,14 +7036,14 @@ static void kvm_init_msr_list(void)
intel_pt_validate_hw_cap(PT_CAP_num_address_ranges) * 2)
continue;
break;
- case MSR_ARCH_PERFMON_PERFCTR0 ... MSR_ARCH_PERFMON_PERFCTR0 + 17:
+ case MSR_ARCH_PERFMON_PERFCTR0 ... MSR_ARCH_PERFMON_PERFCTR_MAX:
if (msrs_to_save_all[i] - MSR_ARCH_PERFMON_PERFCTR0 >=
- min(INTEL_PMC_MAX_GENERIC, kvm_pmu_cap.num_counters_gp))
+ min(KVM_INTEL_PMC_MAX_GENERIC, kvm_pmu_cap.num_counters_gp))
continue;
break;
- case MSR_ARCH_PERFMON_EVENTSEL0 ... MSR_ARCH_PERFMON_EVENTSEL0 + 17:
+ case MSR_ARCH_PERFMON_EVENTSEL0 ... MSR_ARCH_PERFMON_EVENTSEL_MAX:
if (msrs_to_save_all[i] - MSR_ARCH_PERFMON_EVENTSEL0 >=
- min(INTEL_PMC_MAX_GENERIC, kvm_pmu_cap.num_counters_gp))
+ min(KVM_INTEL_PMC_MAX_GENERIC, kvm_pmu_cap.num_counters_gp))
continue;
break;
case MSR_IA32_XFD:
diff --git a/arch/x86/lib/usercopy.c b/arch/x86/lib/usercopy.c
index f1bb18617156..24b48af27417 100644
--- a/arch/x86/lib/usercopy.c
+++ b/arch/x86/lib/usercopy.c
@@ -6,6 +6,7 @@
#include <linux/uaccess.h>
#include <linux/export.h>
+#include <linux/instrumented.h>
#include <asm/tlbflush.h>
@@ -44,7 +45,9 @@ copy_from_user_nmi(void *to, const void __user *from, unsigned long n)
* called from other contexts.
*/
pagefault_disable();
+ instrument_copy_from_user_before(to, from, n);
ret = raw_copy_from_user(to, from, n);
+ instrument_copy_from_user_after(to, from, n, ret);
pagefault_enable();
return ret;
diff --git a/arch/x86/mm/hugetlbpage.c b/arch/x86/mm/hugetlbpage.c
index 6b3033845c6d..5804bbae4f01 100644
--- a/arch/x86/mm/hugetlbpage.c
+++ b/arch/x86/mm/hugetlbpage.c
@@ -37,8 +37,12 @@ int pmd_huge(pmd_t pmd)
*/
int pud_huge(pud_t pud)
{
+#if CONFIG_PGTABLE_LEVELS > 2
return !pud_none(pud) &&
(pud_val(pud) & (_PAGE_PRESENT|_PAGE_PSE)) != _PAGE_PRESENT;
+#else
+ return 0;
+#endif
}
#ifdef CONFIG_HUGETLB_PAGE