aboutsummaryrefslogtreecommitdiffstats
path: root/arch
diff options
context:
space:
mode:
Diffstat (limited to 'arch')
-rw-r--r--arch/Kconfig10
-rw-r--r--arch/alpha/kernel/syscalls/syscall.tbl1
-rw-r--r--arch/arc/include/asm/cacheflush.h1
-rw-r--r--arch/arm/Kconfig1
-rw-r--r--arch/arm/Makefile22
-rw-r--r--arch/arm/boot/dts/bcm2711.dtsi8
-rw-r--r--arch/arm/boot/dts/bcm5301x.dtsi4
-rw-r--r--arch/arm/include/asm/cacheflush.h1
-rw-r--r--arch/arm/mach-socfpga/core.h2
-rw-r--r--arch/arm/mach-socfpga/platsmp.c8
-rw-r--r--arch/arm/mm/mmu.c4
-rw-r--r--arch/arm64/boot/dts/exynos/exynosautov9.dtsi3
-rw-r--r--arch/arm64/include/asm/esr.h1
-rw-r--r--arch/arm64/include/asm/ftrace.h11
-rw-r--r--arch/arm64/include/asm/kvm_arm.h4
-rw-r--r--arch/arm64/include/asm/kvm_host.h2
-rw-r--r--arch/arm64/include/asm/pgalloc.h2
-rw-r--r--arch/arm64/include/asm/stacktrace.h6
-rw-r--r--arch/arm64/include/asm/uaccess.h48
-rw-r--r--arch/arm64/kernel/ftrace.c6
-rw-r--r--arch/arm64/kernel/stacktrace.c18
-rw-r--r--arch/arm64/kvm/arm.c14
-rw-r--r--arch/arm64/kvm/guest.c7
-rw-r--r--arch/arm64/kvm/hyp/hyp-entry.S2
-rw-r--r--arch/arm64/kvm/hyp/include/hyp/switch.h14
-rw-r--r--arch/arm64/kvm/hyp/include/hyp/sysreg-sr.h7
-rw-r--r--arch/arm64/kvm/hyp/nvhe/host.S2
-rw-r--r--arch/arm64/kvm/hyp/nvhe/setup.c14
-rw-r--r--arch/arm64/kvm/hyp/nvhe/switch.c8
-rw-r--r--arch/arm64/kvm/hyp/nvhe/sys_regs.c2
-rw-r--r--arch/arm64/kvm/hyp/vhe/switch.c4
-rw-r--r--arch/hexagon/include/asm/timer-regs.h26
-rw-r--r--arch/hexagon/include/asm/timex.h3
-rw-r--r--arch/hexagon/kernel/.gitignore1
-rw-r--r--arch/hexagon/kernel/time.c12
-rw-r--r--arch/hexagon/lib/io.c4
-rw-r--r--arch/ia64/kernel/syscalls/syscall.tbl1
-rw-r--r--arch/m68k/include/asm/cacheflush_mm.h1
-rw-r--r--arch/m68k/kernel/syscalls/syscall.tbl1
-rw-r--r--arch/m68k/kernel/traps.c2
-rw-r--r--arch/microblaze/kernel/syscalls/syscall.tbl1
-rw-r--r--arch/mips/Kbuild.platforms2
-rw-r--r--arch/mips/Kconfig7
-rw-r--r--arch/mips/Makefile2
-rw-r--r--arch/mips/bcm63xx/clk.c6
-rw-r--r--arch/mips/boot/compressed/.gitignore3
-rw-r--r--arch/mips/boot/compressed/Makefile12
-rw-r--r--arch/mips/boot/compressed/ashldi3.c2
-rw-r--r--arch/mips/boot/compressed/bswapdi.c2
-rw-r--r--arch/mips/boot/compressed/bswapsi.c2
-rw-r--r--arch/mips/boot/compressed/uart-ath79.c2
-rw-r--r--arch/mips/boot/dts/ingenic/jz4725b.dtsi2
-rw-r--r--arch/mips/boot/dts/ingenic/jz4740.dtsi2
-rw-r--r--arch/mips/boot/dts/ingenic/jz4770.dtsi2
-rw-r--r--arch/mips/boot/dts/ingenic/jz4780.dtsi2
-rw-r--r--arch/mips/boot/dts/ingenic/x1000.dtsi2
-rw-r--r--arch/mips/boot/dts/ingenic/x1830.dtsi2
-rw-r--r--arch/mips/configs/bmips_stb_defconfig155
-rw-r--r--arch/mips/dec/setup.c6
-rw-r--r--arch/mips/generic/yamon-dt.c2
-rw-r--r--arch/mips/include/asm/cacheflush.h2
-rw-r--r--arch/mips/include/asm/traps.h2
-rw-r--r--arch/mips/kernel/cpu-probe.c4
-rw-r--r--arch/mips/kernel/proc.c2
-rw-r--r--arch/mips/kernel/syscalls/syscall_n32.tbl1
-rw-r--r--arch/mips/kernel/syscalls/syscall_n64.tbl1
-rw-r--r--arch/mips/kernel/syscalls/syscall_o32.tbl1
-rw-r--r--arch/mips/kernel/traps.c8
-rw-r--r--arch/mips/kvm/mips.c2
-rw-r--r--arch/mips/lantiq/clk.c6
-rw-r--r--arch/mips/sgi-ip22/ip22-berr.c2
-rw-r--r--arch/mips/sgi-ip22/ip28-berr.c2
-rw-r--r--arch/mips/sgi-ip27/ip27-berr.c2
-rw-r--r--arch/mips/sgi-ip32/ip32-berr.c2
-rw-r--r--arch/mips/sibyte/swarm/setup.c2
-rw-r--r--arch/mips/txx9/generic/setup_tx4927.c2
-rw-r--r--arch/mips/txx9/generic/setup_tx4938.c2
-rw-r--r--arch/mips/txx9/generic/setup_tx4939.c2
-rw-r--r--arch/mips/vdso/Makefile2
-rw-r--r--arch/nds32/include/asm/cacheflush.h1
-rw-r--r--arch/nios2/include/asm/cacheflush.h1
-rw-r--r--arch/parisc/configs/generic-32bit_defconfig1
-rw-r--r--arch/parisc/include/asm/assembly.h55
-rw-r--r--arch/parisc/include/asm/cacheflush.h1
-rw-r--r--arch/parisc/include/asm/jump_label.h1
-rw-r--r--arch/parisc/include/asm/pgtable.h10
-rw-r--r--arch/parisc/include/asm/rt_sigframe.h2
-rw-r--r--arch/parisc/kernel/cache.c4
-rw-r--r--arch/parisc/kernel/entry.S16
-rw-r--r--arch/parisc/kernel/signal.c13
-rw-r--r--arch/parisc/kernel/signal32.h2
-rw-r--r--arch/parisc/kernel/stacktrace.c1
-rw-r--r--arch/parisc/kernel/syscall.S4
-rw-r--r--arch/parisc/kernel/syscalls/syscall.tbl1
-rw-r--r--arch/powerpc/kernel/Makefile3
-rw-r--r--arch/powerpc/kernel/head_32.h6
-rw-r--r--arch/powerpc/kernel/head_8xx.S13
-rw-r--r--arch/powerpc/kernel/signal.h10
-rw-r--r--arch/powerpc/kernel/signal_32.c2
-rw-r--r--arch/powerpc/kernel/signal_64.c4
-rw-r--r--arch/powerpc/kernel/syscalls/syscall.tbl1
-rw-r--r--arch/powerpc/kernel/watchdog.c6
-rw-r--r--arch/powerpc/kvm/book3s_hv_builtin.c5
-rw-r--r--arch/powerpc/kvm/book3s_hv_rmhandlers.S4
-rw-r--r--arch/powerpc/kvm/powerpc.c4
-rw-r--r--arch/powerpc/mm/nohash/kaslr_booke.c2
-rw-r--r--arch/powerpc/mm/nohash/tlb.c4
-rw-r--r--arch/powerpc/mm/numa.c44
-rw-r--r--arch/powerpc/platforms/83xx/mcu_mpc8349emitx.c1
-rw-r--r--arch/powerpc/platforms/pseries/iommu.c26
-rw-r--r--arch/powerpc/sysdev/xive/Kconfig1
-rw-r--r--arch/powerpc/sysdev/xive/common.c3
-rw-r--r--arch/riscv/Kconfig1
-rw-r--r--arch/riscv/Makefile12
-rw-r--r--arch/riscv/boot/dts/microchip/microchip-mpfs-icicle-kit.dts18
-rw-r--r--arch/riscv/boot/dts/microchip/microchip-mpfs.dtsi40
-rw-r--r--arch/riscv/boot/dts/sifive/fu540-c000.dtsi2
-rw-r--r--arch/riscv/boot/dts/sifive/hifive-unleashed-a00.dts10
-rw-r--r--arch/riscv/boot/dts/sifive/hifive-unmatched-a00.dts7
-rw-r--r--arch/riscv/configs/32-bit.config2
-rw-r--r--arch/riscv/configs/64-bit.config2
-rw-r--r--arch/riscv/configs/defconfig9
-rw-r--r--arch/riscv/configs/rv32_defconfig2
-rw-r--r--arch/riscv/include/asm/kvm_host.h8
-rw-r--r--arch/riscv/include/asm/page.h2
-rw-r--r--arch/riscv/include/asm/pgtable.h6
-rw-r--r--arch/riscv/include/asm/vdso.h13
-rw-r--r--arch/riscv/include/asm/vdso/gettimeofday.h7
-rw-r--r--arch/riscv/kernel/head.S12
-rw-r--r--arch/riscv/kernel/reset.c12
-rw-r--r--arch/riscv/kernel/vdso.c250
-rw-r--r--arch/riscv/kernel/vdso/vdso.lds.S3
-rw-r--r--arch/riscv/kernel/vmlinux-xip.lds.S10
-rw-r--r--arch/riscv/kvm/mmu.c6
-rw-r--r--arch/riscv/kvm/vcpu.c2
-rw-r--r--arch/riscv/kvm/vcpu_sbi.c2
-rw-r--r--arch/riscv/kvm/vm.c2
-rw-r--r--arch/riscv/mm/context.c8
-rw-r--r--arch/riscv/mm/init.c7
-rw-r--r--arch/s390/Kconfig3
-rw-r--r--arch/s390/Makefile10
-rw-r--r--arch/s390/boot/startup.c88
-rw-r--r--arch/s390/include/asm/kexec.h6
-rw-r--r--arch/s390/include/asm/pci.h6
-rw-r--r--arch/s390/kernel/crash_dump.c4
-rw-r--r--arch/s390/kernel/ipl.c3
-rw-r--r--arch/s390/kernel/machine_kexec_file.c18
-rw-r--r--arch/s390/kernel/perf_cpum_cf.c4
-rw-r--r--arch/s390/kernel/setup.c22
-rw-r--r--arch/s390/kernel/syscalls/syscall.tbl1
-rw-r--r--arch/s390/kernel/traps.c2
-rw-r--r--arch/s390/kernel/vdso32/Makefile2
-rw-r--r--arch/s390/kernel/vdso64/Makefile7
-rw-r--r--arch/s390/kvm/kvm-s390.c2
-rw-r--r--arch/s390/pci/pci.c148
-rw-r--r--arch/s390/pci/pci_event.c230
-rw-r--r--arch/s390/pci/pci_insn.c4
-rw-r--r--arch/s390/pci/pci_irq.c9
-rw-r--r--arch/sh/Kconfig1
-rw-r--r--arch/sh/Kconfig.debug1
-rw-r--r--arch/sh/boards/mach-landisk/irq.c4
-rw-r--r--arch/sh/boot/Makefile4
-rw-r--r--arch/sh/boot/compressed/.gitignore5
-rw-r--r--arch/sh/boot/compressed/Makefile32
-rw-r--r--arch/sh/boot/compressed/ashiftrt.S2
-rw-r--r--arch/sh/boot/compressed/ashldi3.c2
-rw-r--r--arch/sh/boot/compressed/ashlsi3.S2
-rw-r--r--arch/sh/boot/compressed/ashrsi3.S2
-rw-r--r--arch/sh/boot/compressed/lshrsi3.S2
-rw-r--r--arch/sh/include/asm/cacheflush.h1
-rw-r--r--arch/sh/include/asm/checksum_32.h5
-rw-r--r--arch/sh/include/asm/irq.h11
-rw-r--r--arch/sh/include/asm/sfp-machine.h8
-rw-r--r--arch/sh/include/asm/uaccess.h4
-rw-r--r--arch/sh/kernel/cpu/sh4a/smp-shx3.c5
-rw-r--r--arch/sh/kernel/crash_dump.c4
-rw-r--r--arch/sh/kernel/syscalls/syscall.tbl1
-rw-r--r--arch/sh/kernel/traps_32.c8
-rw-r--r--arch/sh/math-emu/math.c147
-rw-r--r--arch/sh/mm/nommu.c4
-rw-r--r--arch/sparc/kernel/signal_32.c4
-rw-r--r--arch/sparc/kernel/syscalls/syscall.tbl1
-rw-r--r--arch/sparc/kernel/windows.c2
-rw-r--r--arch/x86/Kconfig2
-rw-r--r--arch/x86/entry/vsyscall/vsyscall_64.c2
-rw-r--r--arch/x86/events/intel/core.c6
-rw-r--r--arch/x86/events/intel/lbr.c2
-rw-r--r--arch/x86/events/intel/uncore_snbep.c12
-rw-r--r--arch/x86/hyperv/hv_init.c12
-rw-r--r--arch/x86/include/asm/fpu/api.h6
-rw-r--r--arch/x86/include/asm/fpu/xcr.h12
-rw-r--r--arch/x86/include/asm/fpu/xstate.h7
-rw-r--r--arch/x86/include/asm/intel-family.h2
-rw-r--r--arch/x86/include/asm/kvm_host.h7
-rw-r--r--arch/x86/include/asm/kvm_para.h12
-rw-r--r--arch/x86/include/asm/mem_encrypt.h4
-rw-r--r--arch/x86/include/asm/paravirt.h6
-rw-r--r--arch/x86/include/asm/paravirt_types.h1
-rw-r--r--arch/x86/include/asm/processor.h5
-rw-r--r--arch/x86/include/asm/set_memory.h1
-rw-r--r--arch/x86/include/asm/smp.h1
-rw-r--r--arch/x86/include/asm/static_call.h1
-rw-r--r--arch/x86/include/asm/xen/hypercall.h4
-rw-r--r--arch/x86/include/asm/xen/hypervisor.h1
-rw-r--r--arch/x86/include/uapi/asm/kvm_para.h1
-rw-r--r--arch/x86/kernel/cpu/cpuid-deps.c1
-rw-r--r--arch/x86/kernel/cpu/mce/intel.c5
-rw-r--r--arch/x86/kernel/cpu/mshyperv.c20
-rw-r--r--arch/x86/kernel/cpu/sgx/main.c12
-rw-r--r--arch/x86/kernel/fpu/xstate.h37
-rw-r--r--arch/x86/kernel/kvm.c109
-rw-r--r--arch/x86/kernel/paravirt.c1
-rw-r--r--arch/x86/kernel/process.c5
-rw-r--r--arch/x86/kernel/setup.c66
-rw-r--r--arch/x86/kernel/smpboot.c18
-rw-r--r--arch/x86/kernel/static_call.c14
-rw-r--r--arch/x86/kernel/vm86_32.c4
-rw-r--r--arch/x86/kvm/cpuid.c93
-rw-r--r--arch/x86/kvm/hyperv.c8
-rw-r--r--arch/x86/kvm/ioapic.h1
-rw-r--r--arch/x86/kvm/irq.h1
-rw-r--r--arch/x86/kvm/lapic.c25
-rw-r--r--arch/x86/kvm/lapic.h2
-rw-r--r--arch/x86/kvm/mmu/mmu.c108
-rw-r--r--arch/x86/kvm/mmu/tdp_mmu.c40
-rw-r--r--arch/x86/kvm/mmu/tdp_mmu.h5
-rw-r--r--arch/x86/kvm/pmu.c2
-rw-r--r--arch/x86/kvm/pmu.h4
-rw-r--r--arch/x86/kvm/svm/avic.c19
-rw-r--r--arch/x86/kvm/svm/pmu.c5
-rw-r--r--arch/x86/kvm/svm/sev.c410
-rw-r--r--arch/x86/kvm/svm/svm.c15
-rw-r--r--arch/x86/kvm/svm/svm.h31
-rw-r--r--arch/x86/kvm/vmx/nested.c309
-rw-r--r--arch/x86/kvm/vmx/pmu_intel.c7
-rw-r--r--arch/x86/kvm/vmx/posted_intr.c20
-rw-r--r--arch/x86/kvm/vmx/vmx.c139
-rw-r--r--arch/x86/kvm/vmx/vmx.h43
-rw-r--r--arch/x86/kvm/x86.c223
-rw-r--r--arch/x86/kvm/x86.h19
-rw-r--r--arch/x86/kvm/xen.c22
-rw-r--r--arch/x86/mm/mem_encrypt.c72
-rw-r--r--arch/x86/mm/pat/set_memory.c6
-rw-r--r--arch/x86/xen/smp_pv.c12
-rw-r--r--arch/xtensa/include/asm/cacheflush.h3
-rw-r--r--arch/xtensa/kernel/syscalls/syscall.tbl1
246 files changed, 2741 insertions, 1392 deletions
diff --git a/arch/Kconfig b/arch/Kconfig
index 26b8ed11639d..d3c4ab249e9c 100644
--- a/arch/Kconfig
+++ b/arch/Kconfig
@@ -991,6 +991,16 @@ config HAVE_ARCH_COMPAT_MMAP_BASES
and vice-versa 32-bit applications to call 64-bit mmap().
Required for applications doing different bitness syscalls.
+config PAGE_SIZE_LESS_THAN_64KB
+ def_bool y
+ depends on !ARM64_64K_PAGES
+ depends on !IA64_PAGE_SIZE_64KB
+ depends on !PAGE_SIZE_64KB
+ depends on !PARISC_PAGE_SIZE_64KB
+ depends on !PPC_64K_PAGES
+ depends on !PPC_256K_PAGES
+ depends on !PAGE_SIZE_256KB
+
# This allows to use a set of generic functions to determine mmap base
# address by giving priority to top-down scheme only if the process
# is not in legacy mode (compat task, unlimited stack size or
diff --git a/arch/alpha/kernel/syscalls/syscall.tbl b/arch/alpha/kernel/syscalls/syscall.tbl
index e4a041cd5715..ca5a32228cd6 100644
--- a/arch/alpha/kernel/syscalls/syscall.tbl
+++ b/arch/alpha/kernel/syscalls/syscall.tbl
@@ -488,3 +488,4 @@
556 common landlock_restrict_self sys_landlock_restrict_self
# 557 reserved for memfd_secret
558 common process_mrelease sys_process_mrelease
+559 common futex_waitv sys_futex_waitv
diff --git a/arch/arc/include/asm/cacheflush.h b/arch/arc/include/asm/cacheflush.h
index e8c2c7469e10..e201b4b1655a 100644
--- a/arch/arc/include/asm/cacheflush.h
+++ b/arch/arc/include/asm/cacheflush.h
@@ -36,7 +36,6 @@ void __flush_dcache_page(phys_addr_t paddr, unsigned long vaddr);
#define ARCH_IMPLEMENTS_FLUSH_DCACHE_PAGE 1
void flush_dcache_page(struct page *page);
-void flush_dcache_folio(struct folio *folio);
void dma_cache_wback_inv(phys_addr_t start, unsigned long sz);
void dma_cache_inv(phys_addr_t start, unsigned long sz);
diff --git a/arch/arm/Kconfig b/arch/arm/Kconfig
index f0f9e8bec83a..c2724d986fa0 100644
--- a/arch/arm/Kconfig
+++ b/arch/arm/Kconfig
@@ -1463,6 +1463,7 @@ config HIGHMEM
bool "High Memory Support"
depends on MMU
select KMAP_LOCAL
+ select KMAP_LOCAL_NON_LINEAR_PTE_ARRAY
help
The address space of ARM processors is only 4 Gigabytes large
and it has to accommodate user address space, kernel address
diff --git a/arch/arm/Makefile b/arch/arm/Makefile
index a522716565c6..77172d555c7e 100644
--- a/arch/arm/Makefile
+++ b/arch/arm/Makefile
@@ -60,15 +60,15 @@ KBUILD_CFLAGS += $(call cc-option,-fno-ipa-sra)
# Note that GCC does not numerically define an architecture version
# macro, but instead defines a whole series of macros which makes
# testing for a specific architecture or later rather impossible.
-arch-$(CONFIG_CPU_32v7M) =-D__LINUX_ARM_ARCH__=7 -march=armv7-m -Wa,-march=armv7-m
-arch-$(CONFIG_CPU_32v7) =-D__LINUX_ARM_ARCH__=7 $(call cc-option,-march=armv7-a,-march=armv5t -Wa$(comma)-march=armv7-a)
-arch-$(CONFIG_CPU_32v6) =-D__LINUX_ARM_ARCH__=6 $(call cc-option,-march=armv6,-march=armv5t -Wa$(comma)-march=armv6)
+arch-$(CONFIG_CPU_32v7M) =-D__LINUX_ARM_ARCH__=7 -march=armv7-m
+arch-$(CONFIG_CPU_32v7) =-D__LINUX_ARM_ARCH__=7 -march=armv7-a
+arch-$(CONFIG_CPU_32v6) =-D__LINUX_ARM_ARCH__=6 -march=armv6
# Only override the compiler option if ARMv6. The ARMv6K extensions are
# always available in ARMv7
ifeq ($(CONFIG_CPU_32v6),y)
-arch-$(CONFIG_CPU_32v6K) =-D__LINUX_ARM_ARCH__=6 $(call cc-option,-march=armv6k,-march=armv5t -Wa$(comma)-march=armv6k)
+arch-$(CONFIG_CPU_32v6K) =-D__LINUX_ARM_ARCH__=6 -march=armv6k
endif
-arch-$(CONFIG_CPU_32v5) =-D__LINUX_ARM_ARCH__=5 $(call cc-option,-march=armv5te,-march=armv4t)
+arch-$(CONFIG_CPU_32v5) =-D__LINUX_ARM_ARCH__=5 -march=armv5te
arch-$(CONFIG_CPU_32v4T) =-D__LINUX_ARM_ARCH__=4 -march=armv4t
arch-$(CONFIG_CPU_32v4) =-D__LINUX_ARM_ARCH__=4 -march=armv4
arch-$(CONFIG_CPU_32v3) =-D__LINUX_ARM_ARCH__=3 -march=armv3m
@@ -82,7 +82,7 @@ tune-$(CONFIG_CPU_ARM720T) =-mtune=arm7tdmi
tune-$(CONFIG_CPU_ARM740T) =-mtune=arm7tdmi
tune-$(CONFIG_CPU_ARM9TDMI) =-mtune=arm9tdmi
tune-$(CONFIG_CPU_ARM940T) =-mtune=arm9tdmi
-tune-$(CONFIG_CPU_ARM946E) =$(call cc-option,-mtune=arm9e,-mtune=arm9tdmi)
+tune-$(CONFIG_CPU_ARM946E) =-mtune=arm9e
tune-$(CONFIG_CPU_ARM920T) =-mtune=arm9tdmi
tune-$(CONFIG_CPU_ARM922T) =-mtune=arm9tdmi
tune-$(CONFIG_CPU_ARM925T) =-mtune=arm9tdmi
@@ -90,11 +90,11 @@ tune-$(CONFIG_CPU_ARM926T) =-mtune=arm9tdmi
tune-$(CONFIG_CPU_FA526) =-mtune=arm9tdmi
tune-$(CONFIG_CPU_SA110) =-mtune=strongarm110
tune-$(CONFIG_CPU_SA1100) =-mtune=strongarm1100
-tune-$(CONFIG_CPU_XSCALE) =$(call cc-option,-mtune=xscale,-mtune=strongarm110) -Wa,-mcpu=xscale
-tune-$(CONFIG_CPU_XSC3) =$(call cc-option,-mtune=xscale,-mtune=strongarm110) -Wa,-mcpu=xscale
-tune-$(CONFIG_CPU_FEROCEON) =$(call cc-option,-mtune=marvell-f,-mtune=xscale)
-tune-$(CONFIG_CPU_V6) =$(call cc-option,-mtune=arm1136j-s,-mtune=strongarm)
-tune-$(CONFIG_CPU_V6K) =$(call cc-option,-mtune=arm1136j-s,-mtune=strongarm)
+tune-$(CONFIG_CPU_XSCALE) =-mtune=xscale
+tune-$(CONFIG_CPU_XSC3) =-mtune=xscale
+tune-$(CONFIG_CPU_FEROCEON) =-mtune=xscale
+tune-$(CONFIG_CPU_V6) =-mtune=arm1136j-s
+tune-$(CONFIG_CPU_V6K) =-mtune=arm1136j-s
# Evaluate tune cc-option calls now
tune-y := $(tune-y)
diff --git a/arch/arm/boot/dts/bcm2711.dtsi b/arch/arm/boot/dts/bcm2711.dtsi
index 3b60297af7f6..9e01dbca4a01 100644
--- a/arch/arm/boot/dts/bcm2711.dtsi
+++ b/arch/arm/boot/dts/bcm2711.dtsi
@@ -506,11 +506,17 @@
#address-cells = <3>;
#interrupt-cells = <1>;
#size-cells = <2>;
- interrupts = <GIC_SPI 148 IRQ_TYPE_LEVEL_HIGH>,
+ interrupts = <GIC_SPI 147 IRQ_TYPE_LEVEL_HIGH>,
<GIC_SPI 148 IRQ_TYPE_LEVEL_HIGH>;
interrupt-names = "pcie", "msi";
interrupt-map-mask = <0x0 0x0 0x0 0x7>;
interrupt-map = <0 0 0 1 &gicv2 GIC_SPI 143
+ IRQ_TYPE_LEVEL_HIGH>,
+ <0 0 0 2 &gicv2 GIC_SPI 144
+ IRQ_TYPE_LEVEL_HIGH>,
+ <0 0 0 3 &gicv2 GIC_SPI 145
+ IRQ_TYPE_LEVEL_HIGH>,
+ <0 0 0 4 &gicv2 GIC_SPI 146
IRQ_TYPE_LEVEL_HIGH>;
msi-controller;
msi-parent = <&pcie0>;
diff --git a/arch/arm/boot/dts/bcm5301x.dtsi b/arch/arm/boot/dts/bcm5301x.dtsi
index d4f355015e3c..f69d2af3c1fa 100644
--- a/arch/arm/boot/dts/bcm5301x.dtsi
+++ b/arch/arm/boot/dts/bcm5301x.dtsi
@@ -242,6 +242,8 @@
gpio-controller;
#gpio-cells = <2>;
+ interrupt-controller;
+ #interrupt-cells = <2>;
};
pcie0: pcie@12000 {
@@ -408,7 +410,7 @@
i2c0: i2c@18009000 {
compatible = "brcm,iproc-i2c";
reg = <0x18009000 0x50>;
- interrupts = <GIC_SPI 121 IRQ_TYPE_LEVEL_HIGH>;
+ interrupts = <GIC_SPI 89 IRQ_TYPE_LEVEL_HIGH>;
#address-cells = <1>;
#size-cells = <0>;
clock-frequency = <100000>;
diff --git a/arch/arm/include/asm/cacheflush.h b/arch/arm/include/asm/cacheflush.h
index e68fb879e4f9..5e56288e343b 100644
--- a/arch/arm/include/asm/cacheflush.h
+++ b/arch/arm/include/asm/cacheflush.h
@@ -290,7 +290,6 @@ extern void flush_cache_page(struct vm_area_struct *vma, unsigned long user_addr
*/
#define ARCH_IMPLEMENTS_FLUSH_DCACHE_PAGE 1
extern void flush_dcache_page(struct page *);
-void flush_dcache_folio(struct folio *folio);
#define ARCH_IMPLEMENTS_FLUSH_KERNEL_VMAP_RANGE 1
static inline void flush_kernel_vmap_range(void *addr, int size)
diff --git a/arch/arm/mach-socfpga/core.h b/arch/arm/mach-socfpga/core.h
index fc2608b18a0d..18f01190dcfd 100644
--- a/arch/arm/mach-socfpga/core.h
+++ b/arch/arm/mach-socfpga/core.h
@@ -33,7 +33,7 @@ extern void __iomem *sdr_ctl_base_addr;
u32 socfpga_sdram_self_refresh(u32 sdr_base);
extern unsigned int socfpga_sdram_self_refresh_sz;
-extern char secondary_trampoline, secondary_trampoline_end;
+extern char secondary_trampoline[], secondary_trampoline_end[];
extern unsigned long socfpga_cpu1start_addr;
diff --git a/arch/arm/mach-socfpga/platsmp.c b/arch/arm/mach-socfpga/platsmp.c
index fbb80b883e5d..201191cf68f3 100644
--- a/arch/arm/mach-socfpga/platsmp.c
+++ b/arch/arm/mach-socfpga/platsmp.c
@@ -20,14 +20,14 @@
static int socfpga_boot_secondary(unsigned int cpu, struct task_struct *idle)
{
- int trampoline_size = &secondary_trampoline_end - &secondary_trampoline;
+ int trampoline_size = secondary_trampoline_end - secondary_trampoline;
if (socfpga_cpu1start_addr) {
/* This will put CPU #1 into reset. */
writel(RSTMGR_MPUMODRST_CPU1,
rst_manager_base_addr + SOCFPGA_RSTMGR_MODMPURST);
- memcpy(phys_to_virt(0), &secondary_trampoline, trampoline_size);
+ memcpy(phys_to_virt(0), secondary_trampoline, trampoline_size);
writel(__pa_symbol(secondary_startup),
sys_manager_base_addr + (socfpga_cpu1start_addr & 0x000000ff));
@@ -45,12 +45,12 @@ static int socfpga_boot_secondary(unsigned int cpu, struct task_struct *idle)
static int socfpga_a10_boot_secondary(unsigned int cpu, struct task_struct *idle)
{
- int trampoline_size = &secondary_trampoline_end - &secondary_trampoline;
+ int trampoline_size = secondary_trampoline_end - secondary_trampoline;
if (socfpga_cpu1start_addr) {
writel(RSTMGR_MPUMODRST_CPU1, rst_manager_base_addr +
SOCFPGA_A10_RSTMGR_MODMPURST);
- memcpy(phys_to_virt(0), &secondary_trampoline, trampoline_size);
+ memcpy(phys_to_virt(0), secondary_trampoline, trampoline_size);
writel(__pa_symbol(secondary_startup),
sys_manager_base_addr + (socfpga_cpu1start_addr & 0x00000fff));
diff --git a/arch/arm/mm/mmu.c b/arch/arm/mm/mmu.c
index a4e006005107..274e4f73fd33 100644
--- a/arch/arm/mm/mmu.c
+++ b/arch/arm/mm/mmu.c
@@ -390,9 +390,9 @@ void __set_fixmap(enum fixed_addresses idx, phys_addr_t phys, pgprot_t prot)
BUILD_BUG_ON(__fix_to_virt(__end_of_fixed_addresses) < FIXADDR_START);
BUG_ON(idx >= __end_of_fixed_addresses);
- /* we only support device mappings until pgprot_kernel has been set */
+ /* We support only device mappings before pgprot_kernel is set. */
if (WARN_ON(pgprot_val(prot) != pgprot_val(FIXMAP_PAGE_IO) &&
- pgprot_val(pgprot_kernel) == 0))
+ pgprot_val(prot) && pgprot_val(pgprot_kernel) == 0))
return;
if (pgprot_val(prot))
diff --git a/arch/arm64/boot/dts/exynos/exynosautov9.dtsi b/arch/arm64/boot/dts/exynos/exynosautov9.dtsi
index 3e4727344b4a..a960c0bc2dba 100644
--- a/arch/arm64/boot/dts/exynos/exynosautov9.dtsi
+++ b/arch/arm64/boot/dts/exynos/exynosautov9.dtsi
@@ -296,8 +296,7 @@
pinctrl-0 = <&ufs_rst_n &ufs_refclk_out>;
phys = <&ufs_0_phy>;
phy-names = "ufs-phy";
- samsung,sysreg = <&syscon_fsys2>;
- samsung,ufs-shareability-reg-offset = <0x710>;
+ samsung,sysreg = <&syscon_fsys2 0x710>;
status = "disabled";
};
};
diff --git a/arch/arm64/include/asm/esr.h b/arch/arm64/include/asm/esr.h
index a305ce256090..d52a0b269ee8 100644
--- a/arch/arm64/include/asm/esr.h
+++ b/arch/arm64/include/asm/esr.h
@@ -68,6 +68,7 @@
#define ESR_ELx_EC_MAX (0x3F)
#define ESR_ELx_EC_SHIFT (26)
+#define ESR_ELx_EC_WIDTH (6)
#define ESR_ELx_EC_MASK (UL(0x3F) << ESR_ELx_EC_SHIFT)
#define ESR_ELx_EC(esr) (((esr) & ESR_ELx_EC_MASK) >> ESR_ELx_EC_SHIFT)
diff --git a/arch/arm64/include/asm/ftrace.h b/arch/arm64/include/asm/ftrace.h
index 347b0cc68f07..1494cfa8639b 100644
--- a/arch/arm64/include/asm/ftrace.h
+++ b/arch/arm64/include/asm/ftrace.h
@@ -12,6 +12,17 @@
#define HAVE_FUNCTION_GRAPH_FP_TEST
+/*
+ * HAVE_FUNCTION_GRAPH_RET_ADDR_PTR means that the architecture can provide a
+ * "return address pointer" which can be used to uniquely identify a return
+ * address which has been overwritten.
+ *
+ * On arm64 we use the address of the caller's frame record, which remains the
+ * same for the lifetime of the instrumented function, unlike the return
+ * address in the LR.
+ */
+#define HAVE_FUNCTION_GRAPH_RET_ADDR_PTR
+
#ifdef CONFIG_DYNAMIC_FTRACE_WITH_REGS
#define ARCH_SUPPORTS_FTRACE_OPS 1
#else
diff --git a/arch/arm64/include/asm/kvm_arm.h b/arch/arm64/include/asm/kvm_arm.h
index a39fcf318c77..01d47c5886dc 100644
--- a/arch/arm64/include/asm/kvm_arm.h
+++ b/arch/arm64/include/asm/kvm_arm.h
@@ -91,7 +91,7 @@
#define HCR_HOST_VHE_FLAGS (HCR_RW | HCR_TGE | HCR_E2H)
/* TCR_EL2 Registers bits */
-#define TCR_EL2_RES1 ((1 << 31) | (1 << 23))
+#define TCR_EL2_RES1 ((1U << 31) | (1 << 23))
#define TCR_EL2_TBI (1 << 20)
#define TCR_EL2_PS_SHIFT 16
#define TCR_EL2_PS_MASK (7 << TCR_EL2_PS_SHIFT)
@@ -276,7 +276,7 @@
#define CPTR_EL2_TFP_SHIFT 10
/* Hyp Coprocessor Trap Register */
-#define CPTR_EL2_TCPAC (1 << 31)
+#define CPTR_EL2_TCPAC (1U << 31)
#define CPTR_EL2_TAM (1 << 30)
#define CPTR_EL2_TTA (1 << 20)
#define CPTR_EL2_TFP (1 << CPTR_EL2_TFP_SHIFT)
diff --git a/arch/arm64/include/asm/kvm_host.h b/arch/arm64/include/asm/kvm_host.h
index 4be8486042a7..2a5f7f38006f 100644
--- a/arch/arm64/include/asm/kvm_host.h
+++ b/arch/arm64/include/asm/kvm_host.h
@@ -584,7 +584,7 @@ struct kvm_vcpu_stat {
u64 exits;
};
-int kvm_vcpu_preferred_target(struct kvm_vcpu_init *init);
+void kvm_vcpu_preferred_target(struct kvm_vcpu_init *init);
unsigned long kvm_arm_num_regs(struct kvm_vcpu *vcpu);
int kvm_arm_copy_reg_indices(struct kvm_vcpu *vcpu, u64 __user *indices);
int kvm_arm_get_reg(struct kvm_vcpu *vcpu, const struct kvm_one_reg *reg);
diff --git a/arch/arm64/include/asm/pgalloc.h b/arch/arm64/include/asm/pgalloc.h
index 8433a2058eb1..237224484d0f 100644
--- a/arch/arm64/include/asm/pgalloc.h
+++ b/arch/arm64/include/asm/pgalloc.h
@@ -76,7 +76,7 @@ static inline void __pmd_populate(pmd_t *pmdp, phys_addr_t ptep,
static inline void
pmd_populate_kernel(struct mm_struct *mm, pmd_t *pmdp, pte_t *ptep)
{
- VM_BUG_ON(mm != &init_mm);
+ VM_BUG_ON(mm && mm != &init_mm);
__pmd_populate(pmdp, __pa(ptep), PMD_TYPE_TABLE | PMD_TABLE_UXN);
}
diff --git a/arch/arm64/include/asm/stacktrace.h b/arch/arm64/include/asm/stacktrace.h
index a4e046ef4568..6564a01cc085 100644
--- a/arch/arm64/include/asm/stacktrace.h
+++ b/arch/arm64/include/asm/stacktrace.h
@@ -47,9 +47,6 @@ struct stack_info {
* @prev_type: The type of stack this frame record was on, or a synthetic
* value of STACK_TYPE_UNKNOWN. This is used to detect a
* transition from one stack to another.
- *
- * @graph: When FUNCTION_GRAPH_TRACER is selected, holds the index of a
- * replacement lr value in the ftrace graph stack.
*/
struct stackframe {
unsigned long fp;
@@ -57,9 +54,6 @@ struct stackframe {
DECLARE_BITMAP(stacks_done, __NR_STACK_TYPES);
unsigned long prev_fp;
enum stack_type prev_type;
-#ifdef CONFIG_FUNCTION_GRAPH_TRACER
- int graph;
-#endif
#ifdef CONFIG_KRETPROBES
struct llist_node *kr_cur;
#endif
diff --git a/arch/arm64/include/asm/uaccess.h b/arch/arm64/include/asm/uaccess.h
index 6e2e0b7031ab..3a5ff5e20586 100644
--- a/arch/arm64/include/asm/uaccess.h
+++ b/arch/arm64/include/asm/uaccess.h
@@ -281,12 +281,22 @@ do { \
(x) = (__force __typeof__(*(ptr)))__gu_val; \
} while (0)
+/*
+ * We must not call into the scheduler between uaccess_ttbr0_enable() and
+ * uaccess_ttbr0_disable(). As `x` and `ptr` could contain blocking functions,
+ * we must evaluate these outside of the critical section.
+ */
#define __raw_get_user(x, ptr, err) \
do { \
+ __typeof__(*(ptr)) __user *__rgu_ptr = (ptr); \
+ __typeof__(x) __rgu_val; \
__chk_user_ptr(ptr); \
+ \
uaccess_ttbr0_enable(); \
- __raw_get_mem("ldtr", x, ptr, err); \
+ __raw_get_mem("ldtr", __rgu_val, __rgu_ptr, err); \
uaccess_ttbr0_disable(); \
+ \
+ (x) = __rgu_val; \
} while (0)
#define __get_user_error(x, ptr, err) \
@@ -310,14 +320,22 @@ do { \
#define get_user __get_user
+/*
+ * We must not call into the scheduler between __uaccess_enable_tco_async() and
+ * __uaccess_disable_tco_async(). As `dst` and `src` may contain blocking
+ * functions, we must evaluate these outside of the critical section.
+ */
#define __get_kernel_nofault(dst, src, type, err_label) \
do { \
+ __typeof__(dst) __gkn_dst = (dst); \
+ __typeof__(src) __gkn_src = (src); \
int __gkn_err = 0; \
\
__uaccess_enable_tco_async(); \
- __raw_get_mem("ldr", *((type *)(dst)), \
- (__force type *)(src), __gkn_err); \
+ __raw_get_mem("ldr", *((type *)(__gkn_dst)), \
+ (__force type *)(__gkn_src), __gkn_err); \
__uaccess_disable_tco_async(); \
+ \
if (unlikely(__gkn_err)) \
goto err_label; \
} while (0)
@@ -351,11 +369,19 @@ do { \
} \
} while (0)
+/*
+ * We must not call into the scheduler between uaccess_ttbr0_enable() and
+ * uaccess_ttbr0_disable(). As `x` and `ptr` could contain blocking functions,
+ * we must evaluate these outside of the critical section.
+ */
#define __raw_put_user(x, ptr, err) \
do { \
- __chk_user_ptr(ptr); \
+ __typeof__(*(ptr)) __user *__rpu_ptr = (ptr); \
+ __typeof__(*(ptr)) __rpu_val = (x); \
+ __chk_user_ptr(__rpu_ptr); \
+ \
uaccess_ttbr0_enable(); \
- __raw_put_mem("sttr", x, ptr, err); \
+ __raw_put_mem("sttr", __rpu_val, __rpu_ptr, err); \
uaccess_ttbr0_disable(); \
} while (0)
@@ -380,14 +406,22 @@ do { \
#define put_user __put_user
+/*
+ * We must not call into the scheduler between __uaccess_enable_tco_async() and
+ * __uaccess_disable_tco_async(). As `dst` and `src` may contain blocking
+ * functions, we must evaluate these outside of the critical section.
+ */
#define __put_kernel_nofault(dst, src, type, err_label) \
do { \
+ __typeof__(dst) __pkn_dst = (dst); \
+ __typeof__(src) __pkn_src = (src); \
int __pkn_err = 0; \
\
__uaccess_enable_tco_async(); \
- __raw_put_mem("str", *((type *)(src)), \
- (__force type *)(dst), __pkn_err); \
+ __raw_put_mem("str", *((type *)(__pkn_src)), \
+ (__force type *)(__pkn_dst), __pkn_err); \
__uaccess_disable_tco_async(); \
+ \
if (unlikely(__pkn_err)) \
goto err_label; \
} while(0)
diff --git a/arch/arm64/kernel/ftrace.c b/arch/arm64/kernel/ftrace.c
index fc62dfe73f93..4506c4a90ac1 100644
--- a/arch/arm64/kernel/ftrace.c
+++ b/arch/arm64/kernel/ftrace.c
@@ -244,8 +244,6 @@ void arch_ftrace_update_code(int command)
* on the way back to parent. For this purpose, this function is called
* in _mcount() or ftrace_caller() to replace return address (*parent) on
* the call stack to return_to_handler.
- *
- * Note that @frame_pointer is used only for sanity check later.
*/
void prepare_ftrace_return(unsigned long self_addr, unsigned long *parent,
unsigned long frame_pointer)
@@ -263,8 +261,10 @@ void prepare_ftrace_return(unsigned long self_addr, unsigned long *parent,
*/
old = *parent;
- if (!function_graph_enter(old, self_addr, frame_pointer, NULL))
+ if (!function_graph_enter(old, self_addr, frame_pointer,
+ (void *)frame_pointer)) {
*parent = return_hooker;
+ }
}
#ifdef CONFIG_DYNAMIC_FTRACE
diff --git a/arch/arm64/kernel/stacktrace.c b/arch/arm64/kernel/stacktrace.c
index c30624fff6ac..94f83cd44e50 100644
--- a/arch/arm64/kernel/stacktrace.c
+++ b/arch/arm64/kernel/stacktrace.c
@@ -38,9 +38,6 @@ void start_backtrace(struct stackframe *frame, unsigned long fp,
{
frame->fp = fp;
frame->pc = pc;
-#ifdef CONFIG_FUNCTION_GRAPH_TRACER
- frame->graph = 0;
-#endif
#ifdef CONFIG_KRETPROBES
frame->kr_cur = NULL;
#endif
@@ -116,20 +113,23 @@ int notrace unwind_frame(struct task_struct *tsk, struct stackframe *frame)
frame->prev_fp = fp;
frame->prev_type = info.type;
+ frame->pc = ptrauth_strip_insn_pac(frame->pc);
+
#ifdef CONFIG_FUNCTION_GRAPH_TRACER
if (tsk->ret_stack &&
- (ptrauth_strip_insn_pac(frame->pc) == (unsigned long)return_to_handler)) {
- struct ftrace_ret_stack *ret_stack;
+ (frame->pc == (unsigned long)return_to_handler)) {
+ unsigned long orig_pc;
/*
* This is a case where function graph tracer has
* modified a return address (LR) in a stack frame
* to hook a function return.
* So replace it to an original value.
*/
- ret_stack = ftrace_graph_get_ret_stack(tsk, frame->graph++);
- if (WARN_ON_ONCE(!ret_stack))
+ orig_pc = ftrace_graph_ret_addr(tsk, NULL, frame->pc,
+ (void *)frame->fp);
+ if (WARN_ON_ONCE(frame->pc == orig_pc))
return -EINVAL;
- frame->pc = ret_stack->ret;
+ frame->pc = orig_pc;
}
#endif /* CONFIG_FUNCTION_GRAPH_TRACER */
#ifdef CONFIG_KRETPROBES
@@ -137,8 +137,6 @@ int notrace unwind_frame(struct task_struct *tsk, struct stackframe *frame)
frame->pc = kretprobe_find_ret_addr(tsk, (void *)frame->fp, &frame->kr_cur);
#endif
- frame->pc = ptrauth_strip_insn_pac(frame->pc);
-
return 0;
}
NOKPROBE_SYMBOL(unwind_frame);
diff --git a/arch/arm64/kvm/arm.c b/arch/arm64/kvm/arm.c
index f5490afe1ebf..e4727dc771bf 100644
--- a/arch/arm64/kvm/arm.c
+++ b/arch/arm64/kvm/arm.c
@@ -223,7 +223,14 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext)
r = 1;
break;
case KVM_CAP_NR_VCPUS:
- r = num_online_cpus();
+ /*
+ * ARM64 treats KVM_CAP_NR_CPUS differently from all other
+ * architectures, as it does not always bound it to
+ * KVM_CAP_MAX_VCPUS. It should not matter much because
+ * this is just an advisory value.
+ */
+ r = min_t(unsigned int, num_online_cpus(),
+ kvm_arm_default_max_vcpus());
break;
case KVM_CAP_MAX_VCPUS:
case KVM_CAP_MAX_VCPU_ID:
@@ -1389,12 +1396,9 @@ long kvm_arch_vm_ioctl(struct file *filp,
return kvm_vm_ioctl_set_device_addr(kvm, &dev_addr);
}
case KVM_ARM_PREFERRED_TARGET: {
- int err;
struct kvm_vcpu_init init;
- err = kvm_vcpu_preferred_target(&init);
- if (err)
- return err;
+ kvm_vcpu_preferred_target(&init);
if (copy_to_user(argp, &init, sizeof(init)))
return -EFAULT;
diff --git a/arch/arm64/kvm/guest.c b/arch/arm64/kvm/guest.c
index 5ce26bedf23c..e116c7767730 100644
--- a/arch/arm64/kvm/guest.c
+++ b/arch/arm64/kvm/guest.c
@@ -869,13 +869,10 @@ u32 __attribute_const__ kvm_target_cpu(void)
return KVM_ARM_TARGET_GENERIC_V8;
}
-int kvm_vcpu_preferred_target(struct kvm_vcpu_init *init)
+void kvm_vcpu_preferred_target(struct kvm_vcpu_init *init)
{
u32 target = kvm_target_cpu();
- if (target < 0)
- return -ENODEV;
-
memset(init, 0, sizeof(*init));
/*
@@ -885,8 +882,6 @@ int kvm_vcpu_preferred_target(struct kvm_vcpu_init *init)
* target type.
*/
init->target = (__u32)target;
-
- return 0;
}
int kvm_arch_vcpu_ioctl_get_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu)
diff --git a/arch/arm64/kvm/hyp/hyp-entry.S b/arch/arm64/kvm/hyp/hyp-entry.S
index 9aa9b73475c9..b6b6801d96d5 100644
--- a/arch/arm64/kvm/hyp/hyp-entry.S
+++ b/arch/arm64/kvm/hyp/hyp-entry.S
@@ -44,7 +44,7 @@
el1_sync: // Guest trapped into EL2
mrs x0, esr_el2
- lsr x0, x0, #ESR_ELx_EC_SHIFT
+ ubfx x0, x0, #ESR_ELx_EC_SHIFT, #ESR_ELx_EC_WIDTH
cmp x0, #ESR_ELx_EC_HVC64
ccmp x0, #ESR_ELx_EC_HVC32, #4, ne
b.ne el1_trap
diff --git a/arch/arm64/kvm/hyp/include/hyp/switch.h b/arch/arm64/kvm/hyp/include/hyp/switch.h
index 7a0af1d39303..96c5f3fb7838 100644
--- a/arch/arm64/kvm/hyp/include/hyp/switch.h
+++ b/arch/arm64/kvm/hyp/include/hyp/switch.h
@@ -403,6 +403,8 @@ typedef bool (*exit_handler_fn)(struct kvm_vcpu *, u64 *);
static const exit_handler_fn *kvm_get_exit_handler_array(struct kvm_vcpu *vcpu);
+static void early_exit_filter(struct kvm_vcpu *vcpu, u64 *exit_code);
+
/*
* Allow the hypervisor to handle the exit with an exit handler if it has one.
*
@@ -429,6 +431,18 @@ static inline bool kvm_hyp_handle_exit(struct kvm_vcpu *vcpu, u64 *exit_code)
*/
static inline bool fixup_guest_exit(struct kvm_vcpu *vcpu, u64 *exit_code)
{
+ /*
+ * Save PSTATE early so that we can evaluate the vcpu mode
+ * early on.
+ */
+ vcpu->arch.ctxt.regs.pstate = read_sysreg_el2(SYS_SPSR);
+
+ /*
+ * Check whether we want to repaint the state one way or
+ * another.
+ */
+ early_exit_filter(vcpu, exit_code);
+
if (ARM_EXCEPTION_CODE(*exit_code) != ARM_EXCEPTION_IRQ)
vcpu->arch.fault.esr_el2 = read_sysreg_el2(SYS_ESR);
diff --git a/arch/arm64/kvm/hyp/include/hyp/sysreg-sr.h b/arch/arm64/kvm/hyp/include/hyp/sysreg-sr.h
index de7e14c862e6..7ecca8b07851 100644
--- a/arch/arm64/kvm/hyp/include/hyp/sysreg-sr.h
+++ b/arch/arm64/kvm/hyp/include/hyp/sysreg-sr.h
@@ -70,7 +70,12 @@ static inline void __sysreg_save_el1_state(struct kvm_cpu_context *ctxt)
static inline void __sysreg_save_el2_return_state(struct kvm_cpu_context *ctxt)
{
ctxt->regs.pc = read_sysreg_el2(SYS_ELR);
- ctxt->regs.pstate = read_sysreg_el2(SYS_SPSR);
+ /*
+ * Guest PSTATE gets saved at guest fixup time in all
+ * cases. We still need to handle the nVHE host side here.
+ */
+ if (!has_vhe() && ctxt->__hyp_running_vcpu)
+ ctxt->regs.pstate = read_sysreg_el2(SYS_SPSR);
if (cpus_have_final_cap(ARM64_HAS_RAS_EXTN))
ctxt_sys_reg(ctxt, DISR_EL1) = read_sysreg_s(SYS_VDISR_EL2);
diff --git a/arch/arm64/kvm/hyp/nvhe/host.S b/arch/arm64/kvm/hyp/nvhe/host.S
index 0c6116d34e18..3d613e721a75 100644
--- a/arch/arm64/kvm/hyp/nvhe/host.S
+++ b/arch/arm64/kvm/hyp/nvhe/host.S
@@ -141,7 +141,7 @@ SYM_FUNC_END(__host_hvc)
.L__vect_start\@:
stp x0, x1, [sp, #-16]!
mrs x0, esr_el2
- lsr x0, x0, #ESR_ELx_EC_SHIFT
+ ubfx x0, x0, #ESR_ELx_EC_SHIFT, #ESR_ELx_EC_WIDTH
cmp x0, #ESR_ELx_EC_HVC64
b.eq __host_hvc
b __host_exit
diff --git a/arch/arm64/kvm/hyp/nvhe/setup.c b/arch/arm64/kvm/hyp/nvhe/setup.c
index 862c7b514e20..578f71798c2e 100644
--- a/arch/arm64/kvm/hyp/nvhe/setup.c
+++ b/arch/arm64/kvm/hyp/nvhe/setup.c
@@ -178,7 +178,7 @@ static int finalize_host_mappings_walker(u64 addr, u64 end, u32 level,
phys = kvm_pte_to_phys(pte);
if (!addr_is_memory(phys))
- return 0;
+ return -EINVAL;
/*
* Adjust the host stage-2 mappings to match the ownership attributes
@@ -207,8 +207,18 @@ static int finalize_host_mappings(void)
.cb = finalize_host_mappings_walker,
.flags = KVM_PGTABLE_WALK_LEAF,
};
+ int i, ret;
+
+ for (i = 0; i < hyp_memblock_nr; i++) {
+ struct memblock_region *reg = &hyp_memory[i];
+ u64 start = (u64)hyp_phys_to_virt(reg->base);
+
+ ret = kvm_pgtable_walk(&pkvm_pgtable, start, reg->size, &walker);
+ if (ret)
+ return ret;
+ }
- return kvm_pgtable_walk(&pkvm_pgtable, 0, BIT(pkvm_pgtable.ia_bits), &walker);
+ return 0;
}
void __noreturn __pkvm_init_finalise(void)
diff --git a/arch/arm64/kvm/hyp/nvhe/switch.c b/arch/arm64/kvm/hyp/nvhe/switch.c
index c0e3fed26d93..d13115a12434 100644
--- a/arch/arm64/kvm/hyp/nvhe/switch.c
+++ b/arch/arm64/kvm/hyp/nvhe/switch.c
@@ -233,7 +233,7 @@ static const exit_handler_fn *kvm_get_exit_handler_array(struct kvm_vcpu *vcpu)
* Returns false if the guest ran in AArch32 when it shouldn't have, and
* thus should exit to the host, or true if a the guest run loop can continue.
*/
-static bool handle_aarch32_guest(struct kvm_vcpu *vcpu, u64 *exit_code)
+static void early_exit_filter(struct kvm_vcpu *vcpu, u64 *exit_code)
{
struct kvm *kvm = kern_hyp_va(vcpu->kvm);
@@ -248,10 +248,7 @@ static bool handle_aarch32_guest(struct kvm_vcpu *vcpu, u64 *exit_code)
vcpu->arch.target = -1;
*exit_code &= BIT(ARM_EXIT_WITH_SERROR_BIT);
*exit_code |= ARM_EXCEPTION_IL;
- return false;
}
-
- return true;
}
/* Switch to the guest for legacy non-VHE systems */
@@ -316,9 +313,6 @@ int __kvm_vcpu_run(struct kvm_vcpu *vcpu)
/* Jump in the fire! */
exit_code = __guest_enter(vcpu);
- if (unlikely(!handle_aarch32_guest(vcpu, &exit_code)))
- break;
-
/* And we're baaack! */
} while (fixup_guest_exit(vcpu, &exit_code));
diff --git a/arch/arm64/kvm/hyp/nvhe/sys_regs.c b/arch/arm64/kvm/hyp/nvhe/sys_regs.c
index 3787ee6fb1a2..792cf6e6ac92 100644
--- a/arch/arm64/kvm/hyp/nvhe/sys_regs.c
+++ b/arch/arm64/kvm/hyp/nvhe/sys_regs.c
@@ -474,7 +474,7 @@ bool kvm_handle_pvm_sysreg(struct kvm_vcpu *vcpu, u64 *exit_code)
return true;
}
-/**
+/*
* Handler for protected VM restricted exceptions.
*
* Inject an undefined exception into the guest and return true to indicate that
diff --git a/arch/arm64/kvm/hyp/vhe/switch.c b/arch/arm64/kvm/hyp/vhe/switch.c
index 5a2cb5d9bc4b..fbb26b93c347 100644
--- a/arch/arm64/kvm/hyp/vhe/switch.c
+++ b/arch/arm64/kvm/hyp/vhe/switch.c
@@ -112,6 +112,10 @@ static const exit_handler_fn *kvm_get_exit_handler_array(struct kvm_vcpu *vcpu)
return hyp_exit_handlers;
}
+static void early_exit_filter(struct kvm_vcpu *vcpu, u64 *exit_code)
+{
+}
+
/* Switch to the guest for VHE systems running in EL2 */
static int __kvm_vcpu_run_vhe(struct kvm_vcpu *vcpu)
{
diff --git a/arch/hexagon/include/asm/timer-regs.h b/arch/hexagon/include/asm/timer-regs.h
deleted file mode 100644
index ee6c61423a05..000000000000
--- a/arch/hexagon/include/asm/timer-regs.h
+++ /dev/null
@@ -1,26 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0-only */
-/*
- * Timer support for Hexagon
- *
- * Copyright (c) 2010-2011, The Linux Foundation. All rights reserved.
- */
-
-#ifndef _ASM_TIMER_REGS_H
-#define _ASM_TIMER_REGS_H
-
-/* This stuff should go into a platform specific file */
-#define TCX0_CLK_RATE 19200
-#define TIMER_ENABLE 0
-#define TIMER_CLR_ON_MATCH 1
-
-/*
- * 8x50 HDD Specs 5-8. Simulator co-sim not fixed until
- * release 1.1, and then it's "adjustable" and probably not defaulted.
- */
-#define RTOS_TIMER_INT 3
-#ifdef CONFIG_HEXAGON_COMET
-#define RTOS_TIMER_REGS_ADDR 0xAB000000UL
-#endif
-#define SLEEP_CLK_RATE 32000
-
-#endif
diff --git a/arch/hexagon/include/asm/timex.h b/arch/hexagon/include/asm/timex.h
index 8d4ec76fceb4..dfe69e118b2b 100644
--- a/arch/hexagon/include/asm/timex.h
+++ b/arch/hexagon/include/asm/timex.h
@@ -7,11 +7,10 @@
#define _ASM_TIMEX_H
#include <asm-generic/timex.h>
-#include <asm/timer-regs.h>
#include <asm/hexagon_vm.h>
/* Using TCX0 as our clock. CLOCK_TICK_RATE scheduled to be removed. */
-#define CLOCK_TICK_RATE TCX0_CLK_RATE
+#define CLOCK_TICK_RATE 19200
#define ARCH_HAS_READ_CURRENT_TIMER
diff --git a/arch/hexagon/kernel/.gitignore b/arch/hexagon/kernel/.gitignore
new file mode 100644
index 000000000000..c5f676c3c224
--- /dev/null
+++ b/arch/hexagon/kernel/.gitignore
@@ -0,0 +1 @@
+vmlinux.lds
diff --git a/arch/hexagon/kernel/time.c b/arch/hexagon/kernel/time.c
index feffe527ac92..febc95714d75 100644
--- a/arch/hexagon/kernel/time.c
+++ b/arch/hexagon/kernel/time.c
@@ -17,9 +17,10 @@
#include <linux/of_irq.h>
#include <linux/module.h>
-#include <asm/timer-regs.h>
#include <asm/hexagon_vm.h>
+#define TIMER_ENABLE BIT(0)
+
/*
* For the clocksource we need:
* pcycle frequency (600MHz)
@@ -33,6 +34,13 @@ cycles_t pcycle_freq_mhz;
cycles_t thread_freq_mhz;
cycles_t sleep_clk_freq;
+/*
+ * 8x50 HDD Specs 5-8. Simulator co-sim not fixed until
+ * release 1.1, and then it's "adjustable" and probably not defaulted.
+ */
+#define RTOS_TIMER_INT 3
+#define RTOS_TIMER_REGS_ADDR 0xAB000000UL
+
static struct resource rtos_timer_resources[] = {
{
.start = RTOS_TIMER_REGS_ADDR,
@@ -80,7 +88,7 @@ static int set_next_event(unsigned long delta, struct clock_event_device *evt)
iowrite32(0, &rtos_timer->clear);
iowrite32(delta, &rtos_timer->match);
- iowrite32(1 << TIMER_ENABLE, &rtos_timer->enable);
+ iowrite32(TIMER_ENABLE, &rtos_timer->enable);
return 0;
}
diff --git a/arch/hexagon/lib/io.c b/arch/hexagon/lib/io.c
index d35d69d6588c..55f75392857b 100644
--- a/arch/hexagon/lib/io.c
+++ b/arch/hexagon/lib/io.c
@@ -27,6 +27,7 @@ void __raw_readsw(const void __iomem *addr, void *data, int len)
*dst++ = *src;
}
+EXPORT_SYMBOL(__raw_readsw);
/*
* __raw_writesw - read words a short at a time
@@ -47,6 +48,7 @@ void __raw_writesw(void __iomem *addr, const void *data, int len)
}
+EXPORT_SYMBOL(__raw_writesw);
/* Pretty sure len is pre-adjusted for the length of the access already */
void __raw_readsl(const void __iomem *addr, void *data, int len)
@@ -62,6 +64,7 @@ void __raw_readsl(const void __iomem *addr, void *data, int len)
}
+EXPORT_SYMBOL(__raw_readsl);
void __raw_writesl(void __iomem *addr, const void *data, int len)
{
@@ -76,3 +79,4 @@ void __raw_writesl(void __iomem *addr, const void *data, int len)
}
+EXPORT_SYMBOL(__raw_writesl);
diff --git a/arch/ia64/kernel/syscalls/syscall.tbl b/arch/ia64/kernel/syscalls/syscall.tbl
index 6fea1844fb95..707ae121f6d3 100644
--- a/arch/ia64/kernel/syscalls/syscall.tbl
+++ b/arch/ia64/kernel/syscalls/syscall.tbl
@@ -369,3 +369,4 @@
446 common landlock_restrict_self sys_landlock_restrict_self
# 447 reserved for memfd_secret
448 common process_mrelease sys_process_mrelease
+449 common futex_waitv sys_futex_waitv
diff --git a/arch/m68k/include/asm/cacheflush_mm.h b/arch/m68k/include/asm/cacheflush_mm.h
index 8ab46625ddd3..1ac55e7b47f0 100644
--- a/arch/m68k/include/asm/cacheflush_mm.h
+++ b/arch/m68k/include/asm/cacheflush_mm.h
@@ -250,7 +250,6 @@ static inline void __flush_page_to_ram(void *vaddr)
#define ARCH_IMPLEMENTS_FLUSH_DCACHE_PAGE 1
#define flush_dcache_page(page) __flush_page_to_ram(page_address(page))
-void flush_dcache_folio(struct folio *folio);
#define flush_dcache_mmap_lock(mapping) do { } while (0)
#define flush_dcache_mmap_unlock(mapping) do { } while (0)
#define flush_icache_page(vma, page) __flush_page_to_ram(page_address(page))
diff --git a/arch/m68k/kernel/syscalls/syscall.tbl b/arch/m68k/kernel/syscalls/syscall.tbl
index 7976dff8f879..45bc32a41b90 100644
--- a/arch/m68k/kernel/syscalls/syscall.tbl
+++ b/arch/m68k/kernel/syscalls/syscall.tbl
@@ -448,3 +448,4 @@
446 common landlock_restrict_self sys_landlock_restrict_self
# 447 reserved for memfd_secret
448 common process_mrelease sys_process_mrelease
+449 common futex_waitv sys_futex_waitv
diff --git a/arch/m68k/kernel/traps.c b/arch/m68k/kernel/traps.c
index 99058a6da956..34d6458340b0 100644
--- a/arch/m68k/kernel/traps.c
+++ b/arch/m68k/kernel/traps.c
@@ -1145,7 +1145,7 @@ asmlinkage void set_esp0(unsigned long ssp)
*/
asmlinkage void fpsp040_die(void)
{
- force_fatal_sig(SIGSEGV);
+ force_exit_sig(SIGSEGV);
}
#ifdef CONFIG_M68KFPU_EMU
diff --git a/arch/microblaze/kernel/syscalls/syscall.tbl b/arch/microblaze/kernel/syscalls/syscall.tbl
index 6b0e11362bd2..2204bde3ce4a 100644
--- a/arch/microblaze/kernel/syscalls/syscall.tbl
+++ b/arch/microblaze/kernel/syscalls/syscall.tbl
@@ -454,3 +454,4 @@
446 common landlock_restrict_self sys_landlock_restrict_self
# 447 reserved for memfd_secret
448 common process_mrelease sys_process_mrelease
+449 common futex_waitv sys_futex_waitv
diff --git a/arch/mips/Kbuild.platforms b/arch/mips/Kbuild.platforms
index 2c57994b5217..30193bcf9caa 100644
--- a/arch/mips/Kbuild.platforms
+++ b/arch/mips/Kbuild.platforms
@@ -37,4 +37,4 @@ platform-$(CONFIG_MACH_TX49XX) += txx9/
platform-$(CONFIG_MACH_VR41XX) += vr41xx/
# include the platform specific files
-include $(patsubst %, $(srctree)/arch/mips/%/Platform, $(platform-y))
+include $(patsubst %/, $(srctree)/arch/mips/%/Platform, $(platform-y))
diff --git a/arch/mips/Kconfig b/arch/mips/Kconfig
index 86510741d49d..0215dc1529e9 100644
--- a/arch/mips/Kconfig
+++ b/arch/mips/Kconfig
@@ -292,6 +292,8 @@ config BMIPS_GENERIC
select USB_OHCI_BIG_ENDIAN_DESC if CPU_BIG_ENDIAN
select USB_OHCI_BIG_ENDIAN_MMIO if CPU_BIG_ENDIAN
select HARDIRQS_SW_RESEND
+ select HAVE_PCI
+ select PCI_DRIVERS_GENERIC
help
Build a generic DT-based kernel image that boots on select
BCM33xx cable modem chips, BCM63xx DSL chips, and BCM7xxx set-top
@@ -333,6 +335,9 @@ config BCM63XX
select SYS_SUPPORTS_32BIT_KERNEL
select SYS_SUPPORTS_BIG_ENDIAN
select SYS_HAS_EARLY_PRINTK
+ select SYS_HAS_CPU_BMIPS32_3300
+ select SYS_HAS_CPU_BMIPS4350
+ select SYS_HAS_CPU_BMIPS4380
select SWAP_IO_SPACE
select GPIOLIB
select MIPS_L1_CACHE_SHIFT_4
@@ -3092,7 +3097,7 @@ config STACKTRACE_SUPPORT
config PGTABLE_LEVELS
int
default 4 if PAGE_SIZE_4KB && MIPS_VA_BITS_48
- default 3 if 64BIT && !PAGE_SIZE_64KB
+ default 3 if 64BIT && (!PAGE_SIZE_64KB || MIPS_VA_BITS_48)
default 2
config MIPS_AUTO_PFN_OFFSET
diff --git a/arch/mips/Makefile b/arch/mips/Makefile
index e036fc025ccc..ace7f033de07 100644
--- a/arch/mips/Makefile
+++ b/arch/mips/Makefile
@@ -253,7 +253,9 @@ endif
#
# Board-dependent options and extra files
#
+ifdef need-compiler
include $(srctree)/arch/mips/Kbuild.platforms
+endif
ifdef CONFIG_PHYSICAL_START
load-y = $(CONFIG_PHYSICAL_START)
diff --git a/arch/mips/bcm63xx/clk.c b/arch/mips/bcm63xx/clk.c
index 5a3e325275d0..1c91064cb448 100644
--- a/arch/mips/bcm63xx/clk.c
+++ b/arch/mips/bcm63xx/clk.c
@@ -381,6 +381,12 @@ void clk_disable(struct clk *clk)
EXPORT_SYMBOL(clk_disable);
+struct clk *clk_get_parent(struct clk *clk)
+{
+ return NULL;
+}
+EXPORT_SYMBOL(clk_get_parent);
+
unsigned long clk_get_rate(struct clk *clk)
{
if (!clk)
diff --git a/arch/mips/boot/compressed/.gitignore b/arch/mips/boot/compressed/.gitignore
deleted file mode 100644
index d358395614c9..000000000000
--- a/arch/mips/boot/compressed/.gitignore
+++ /dev/null
@@ -1,3 +0,0 @@
-# SPDX-License-Identifier: GPL-2.0-only
-ashldi3.c
-bswapsi.c
diff --git a/arch/mips/boot/compressed/Makefile b/arch/mips/boot/compressed/Makefile
index 3548b3b45269..f27cf31b4140 100644
--- a/arch/mips/boot/compressed/Makefile
+++ b/arch/mips/boot/compressed/Makefile
@@ -50,19 +50,9 @@ vmlinuzobjs-$(CONFIG_MIPS_ALCHEMY) += $(obj)/uart-alchemy.o
vmlinuzobjs-$(CONFIG_ATH79) += $(obj)/uart-ath79.o
endif
-extra-y += uart-ath79.c
-$(obj)/uart-ath79.c: $(srctree)/arch/mips/ath79/early_printk.c
- $(call cmd,shipped)
-
vmlinuzobjs-$(CONFIG_KERNEL_XZ) += $(obj)/ashldi3.o
-extra-y += ashldi3.c
-$(obj)/ashldi3.c: $(obj)/%.c: $(srctree)/lib/%.c FORCE
- $(call if_changed,shipped)
-
-extra-y += bswapsi.c
-$(obj)/bswapsi.c: $(obj)/%.c: $(srctree)/arch/mips/lib/%.c FORCE
- $(call if_changed,shipped)
+vmlinuzobjs-$(CONFIG_KERNEL_ZSTD) += $(obj)/bswapdi.o $(obj)/ashldi3.o
targets := $(notdir $(vmlinuzobjs-y))
diff --git a/arch/mips/boot/compressed/ashldi3.c b/arch/mips/boot/compressed/ashldi3.c
new file mode 100644
index 000000000000..f7bf6a7aae31
--- /dev/null
+++ b/arch/mips/boot/compressed/ashldi3.c
@@ -0,0 +1,2 @@
+// SPDX-License-Identifier: GPL-2.0-only
+#include "../../../../lib/ashldi3.c"
diff --git a/arch/mips/boot/compressed/bswapdi.c b/arch/mips/boot/compressed/bswapdi.c
new file mode 100644
index 000000000000..acb28aebb025
--- /dev/null
+++ b/arch/mips/boot/compressed/bswapdi.c
@@ -0,0 +1,2 @@
+// SPDX-License-Identifier: GPL-2.0-only
+#include "../../lib/bswapdi.c"
diff --git a/arch/mips/boot/compressed/bswapsi.c b/arch/mips/boot/compressed/bswapsi.c
new file mode 100644
index 000000000000..fdb9c6476904
--- /dev/null
+++ b/arch/mips/boot/compressed/bswapsi.c
@@ -0,0 +1,2 @@
+// SPDX-License-Identifier: GPL-2.0-only
+#include "../../lib/bswapsi.c"
diff --git a/arch/mips/boot/compressed/uart-ath79.c b/arch/mips/boot/compressed/uart-ath79.c
new file mode 100644
index 000000000000..d686820921be
--- /dev/null
+++ b/arch/mips/boot/compressed/uart-ath79.c
@@ -0,0 +1,2 @@
+// SPDX-License-Identifier: GPL-2.0-only
+#include "../../ath79/early_printk.c"
diff --git a/arch/mips/boot/dts/ingenic/jz4725b.dtsi b/arch/mips/boot/dts/ingenic/jz4725b.dtsi
index a1f0b71c9223..0c6a5a4266f4 100644
--- a/arch/mips/boot/dts/ingenic/jz4725b.dtsi
+++ b/arch/mips/boot/dts/ingenic/jz4725b.dtsi
@@ -1,5 +1,5 @@
// SPDX-License-Identifier: GPL-2.0
-#include <dt-bindings/clock/jz4725b-cgu.h>
+#include <dt-bindings/clock/ingenic,jz4725b-cgu.h>
#include <dt-bindings/clock/ingenic,tcu.h>
/ {
diff --git a/arch/mips/boot/dts/ingenic/jz4740.dtsi b/arch/mips/boot/dts/ingenic/jz4740.dtsi
index c1afdfdaa8a3..772542e1f266 100644
--- a/arch/mips/boot/dts/ingenic/jz4740.dtsi
+++ b/arch/mips/boot/dts/ingenic/jz4740.dtsi
@@ -1,5 +1,5 @@
// SPDX-License-Identifier: GPL-2.0
-#include <dt-bindings/clock/jz4740-cgu.h>
+#include <dt-bindings/clock/ingenic,jz4740-cgu.h>
#include <dt-bindings/clock/ingenic,tcu.h>
/ {
diff --git a/arch/mips/boot/dts/ingenic/jz4770.dtsi b/arch/mips/boot/dts/ingenic/jz4770.dtsi
index 05c00b93088e..dfe74328ae5d 100644
--- a/arch/mips/boot/dts/ingenic/jz4770.dtsi
+++ b/arch/mips/boot/dts/ingenic/jz4770.dtsi
@@ -1,5 +1,5 @@
// SPDX-License-Identifier: GPL-2.0
-#include <dt-bindings/clock/jz4770-cgu.h>
+#include <dt-bindings/clock/ingenic,jz4770-cgu.h>
#include <dt-bindings/clock/ingenic,tcu.h>
/ {
diff --git a/arch/mips/boot/dts/ingenic/jz4780.dtsi b/arch/mips/boot/dts/ingenic/jz4780.dtsi
index 28adc3d93975..b0a4e2e019c3 100644
--- a/arch/mips/boot/dts/ingenic/jz4780.dtsi
+++ b/arch/mips/boot/dts/ingenic/jz4780.dtsi
@@ -1,5 +1,5 @@
// SPDX-License-Identifier: GPL-2.0
-#include <dt-bindings/clock/jz4780-cgu.h>
+#include <dt-bindings/clock/ingenic,jz4780-cgu.h>
#include <dt-bindings/clock/ingenic,tcu.h>
#include <dt-bindings/dma/jz4780-dma.h>
diff --git a/arch/mips/boot/dts/ingenic/x1000.dtsi b/arch/mips/boot/dts/ingenic/x1000.dtsi
index dec7909d4baa..8bd27edef216 100644
--- a/arch/mips/boot/dts/ingenic/x1000.dtsi
+++ b/arch/mips/boot/dts/ingenic/x1000.dtsi
@@ -1,6 +1,6 @@
// SPDX-License-Identifier: GPL-2.0
#include <dt-bindings/clock/ingenic,tcu.h>
-#include <dt-bindings/clock/x1000-cgu.h>
+#include <dt-bindings/clock/ingenic,x1000-cgu.h>
#include <dt-bindings/dma/x1000-dma.h>
/ {
diff --git a/arch/mips/boot/dts/ingenic/x1830.dtsi b/arch/mips/boot/dts/ingenic/x1830.dtsi
index 215257f8bb1a..2595df8671c7 100644
--- a/arch/mips/boot/dts/ingenic/x1830.dtsi
+++ b/arch/mips/boot/dts/ingenic/x1830.dtsi
@@ -1,6 +1,6 @@
// SPDX-License-Identifier: GPL-2.0
#include <dt-bindings/clock/ingenic,tcu.h>
-#include <dt-bindings/clock/x1830-cgu.h>
+#include <dt-bindings/clock/ingenic,x1830-cgu.h>
#include <dt-bindings/dma/x1830-dma.h>
/ {
diff --git a/arch/mips/configs/bmips_stb_defconfig b/arch/mips/configs/bmips_stb_defconfig
index 625bd2d7e685..5956fb95c19f 100644
--- a/arch/mips/configs/bmips_stb_defconfig
+++ b/arch/mips/configs/bmips_stb_defconfig
@@ -1,6 +1,7 @@
# CONFIG_LOCALVERSION_AUTO is not set
# CONFIG_SWAP is not set
CONFIG_NO_HZ=y
+CONFIG_HZ=1000
CONFIG_BLK_DEV_INITRD=y
CONFIG_EXPERT=y
# CONFIG_VM_EVENT_COUNTERS is not set
@@ -8,17 +9,34 @@ CONFIG_EXPERT=y
CONFIG_BMIPS_GENERIC=y
CONFIG_CPU_LITTLE_ENDIAN=y
CONFIG_HIGHMEM=y
+CONFIG_HIGH_RES_TIMERS=y
CONFIG_SMP=y
CONFIG_NR_CPUS=4
+CONFIG_CC_STACKPROTECTOR_STRONG=y
# CONFIG_SECCOMP is not set
CONFIG_MIPS_O32_FP64_SUPPORT=y
+# CONFIG_RD_GZIP is not set
+# CONFIG_RD_BZIP2 is not set
+# CONFIG_RD_LZMA is not set
+CONFIG_RD_XZ=y
+# CONFIG_RD_LZO is not set
+# CONFIG_RD_LZ4 is not set
+# CONFIG_IOSCHED_DEADLINE is not set
+# CONFIG_IOSCHED_CFQ is not set
+CONFIG_PCI=y
+CONFIG_PCI_MSI=y
+CONFIG_PCIEASPM_POWERSAVE=y
+CONFIG_PCIEPORTBUS=y
+CONFIG_PCIE_BRCMSTB=y
CONFIG_CPU_FREQ=y
CONFIG_CPU_FREQ_STAT=y
+CONFIG_CPU_FREQ_STAT_DETAILS=y
+CONFIG_CPU_FREQ_DEFAULT_GOV_PERFORMANCE=y
+CONFIG_CPU_FREQ_GOV_PERFORMANCE=y
CONFIG_CPU_FREQ_GOV_POWERSAVE=y
CONFIG_CPU_FREQ_GOV_USERSPACE=y
CONFIG_CPU_FREQ_GOV_ONDEMAND=y
CONFIG_CPU_FREQ_GOV_CONSERVATIVE=y
-CONFIG_CPU_FREQ_GOV_SCHEDUTIL=y
CONFIG_BMIPS_CPUFREQ=y
# CONFIG_BLK_DEV_BSG is not set
CONFIG_NET=y
@@ -32,32 +50,99 @@ CONFIG_INET=y
# CONFIG_INET_DIAG is not set
CONFIG_CFG80211=y
CONFIG_NL80211_TESTMODE=y
+CONFIG_WIRELESS=y
CONFIG_MAC80211=y
+CONFIG_NL80211=y
CONFIG_DEVTMPFS=y
CONFIG_DEVTMPFS_MOUNT=y
# CONFIG_STANDALONE is not set
# CONFIG_PREVENT_FIRMWARE_BUILD is not set
+CONFIG_BRCMSTB_GISB_ARB=y
+CONFIG_MODULES=y
+CONFIG_MODULE_FORCE_LOAD=y
+CONFIG_MODULE_UNLOAD=y
+CONFIG_MODVERSIONS=y
+CONFIG_IP_MULTICAST=y
+CONFIG_IP_PNP=y
+CONFIG_IP_PNP_DHCP=y
+CONFIG_IP_PNP_BOOTP=y
+CONFIG_IP_PNP_RARP=y
+CONFIG_IP_MROUTE=y
+CONFIG_IP_PIMSM_V1=y
+CONFIG_IP_PIMSM_V2=y
+# CONFIG_INET_XFRM_MODE_TRANSPORT is not set
+# CONFIG_INET_XFRM_MODE_TUNNEL is not set
+# CONFIG_INET_XFRM_MODE_BEET is not set
+# CONFIG_INET_LRO is not set
+CONFIG_INET_UDP_DIAG=y
+CONFIG_TCP_CONG_ADVANCED=y
+CONFIG_TCP_CONG_BIC=y
+# CONFIG_TCP_CONG_WESTWOOD is not set
+# CONFIG_TCP_CONG_HTCP is not set
+# CONFIG_IPV6 is not set
+CONFIG_IP_NF_IPTABLES=y
+CONFIG_IP_NF_FILTER=y
+CONFIG_NETFILTER=y
+CONFIG_NETFILTER_XTABLES=y
+CONFIG_BRIDGE=y
+CONFIG_BRIDGE_NETFILTER=m
+CONFIG_BRIDGE_NF_EBTABLES=m
+CONFIG_BRIDGE_EBT_BROUTE=m
+CONFIG_NET_DSA=y
+CONFIG_NET_SWITCHDEV=y
+CONFIG_DMA_CMA=y
+CONFIG_CMA_ALIGNMENT=12
+CONFIG_SPI=y
+CONFIG_SPI_BRCMSTB=y
CONFIG_MTD=y
+CONFIG_MTD_CMDLINE_PARTS=y
+CONFIG_MTD_BLOCK=y
CONFIG_MTD_CFI=y
+CONFIG_MTD_JEDECPROBE=y
CONFIG_MTD_CFI_INTELEXT=y
CONFIG_MTD_CFI_AMDSTD=y
-CONFIG_MTD_PHYSMAP=y
+CONFIG_MTD_CFI_STAA=y
+CONFIG_MTD_ROM=y
+CONFIG_MTD_ABSENT=y
+CONFIG_MTD_PHYSMAP_OF=y
+CONFIG_MTD_M25P80=y
+CONFIG_MTD_NAND=y
+CONFIG_MTD_NAND_BRCMNAND=y
+CONFIG_MTD_SPI_NOR=y
+# CONFIG_MTD_SPI_NOR_USE_4K_SECTORS is not set
+CONFIG_MTD_UBI=y
+CONFIG_MTD_UBI_GLUEBI=y
+CONFIG_PROC_DEVICETREE=y
+CONFIG_BLK_DEV_LOOP=y
+CONFIG_BLK_DEV_RAM=y
+CONFIG_BLK_DEV_RAM_SIZE=8192
# CONFIG_BLK_DEV is not set
CONFIG_SCSI=y
CONFIG_BLK_DEV_SD=y
+CONFIG_CHR_DEV_SG=y
+CONFIG_SCSI_MULTI_LUN=y
# CONFIG_SCSI_LOWLEVEL is not set
CONFIG_NETDEVICES=y
+CONFIG_VLAN_8021Q=y
+CONFIG_MACVLAN=y
CONFIG_BCMGENET=y
CONFIG_USB_USBNET=y
-# CONFIG_INPUT is not set
+CONFIG_INPUT_EVDEV=y
+# CONFIG_INPUT_KEYBOARD is not set
+# CONFIG_INPUT_MOUSE is not set
+CONFIG_INPUT_MISC=y
+CONFIG_INPUT_UINPUT=y
# CONFIG_SERIO is not set
-# CONFIG_VT is not set
+CONFIG_VT=y
+CONFIG_VT_HW_CONSOLE_BINDING=y
+# CONFIG_DEVKMEM is not set
CONFIG_SERIAL_8250=y
# CONFIG_SERIAL_8250_DEPRECATED_OPTIONS is not set
CONFIG_SERIAL_8250_CONSOLE=y
CONFIG_SERIAL_OF_PLATFORM=y
# CONFIG_HW_RANDOM is not set
CONFIG_POWER_RESET=y
+CONFIG_POWER_RESET_BRCMSTB=y
CONFIG_POWER_RESET_SYSCON=y
CONFIG_POWER_SUPPLY=y
# CONFIG_HWMON is not set
@@ -69,22 +154,76 @@ CONFIG_USB_OHCI_HCD=y
CONFIG_USB_OHCI_HCD_PLATFORM=y
CONFIG_USB_STORAGE=y
CONFIG_SOC_BRCMSTB=y
+CONFIG_MMC=y
+CONFIG_MMC_BLOCK_MINORS=16
+CONFIG_MMC_SDHCI=y
+CONFIG_MMC_SDHCI_PLTFM=y
CONFIG_EXT4_FS=y
CONFIG_EXT4_FS_POSIX_ACL=y
CONFIG_EXT4_FS_SECURITY=y
# CONFIG_DNOTIFY is not set
+CONFIG_PROC_KCORE=y
+CONFIG_CIFS=y
+CONFIG_JBD2_DEBUG=y
CONFIG_FUSE_FS=y
+CONFIG_FHANDLE=y
+CONFIG_CGROUPS=y
+CONFIG_CUSE=y
+CONFIG_ISO9660_FS=y
+CONFIG_JOLIET=y
+CONFIG_ZISOFS=y
+CONFIG_UDF_FS=y
+CONFIG_MSDOS_FS=y
CONFIG_VFAT_FS=y
-CONFIG_PROC_KCORE=y
CONFIG_TMPFS=y
+CONFIG_JFFS2_FS=y
+CONFIG_UBIFS_FS=y
+CONFIG_SQUASHFS=y
+CONFIG_SQUASHFS_LZO=y
+CONFIG_SQUASHFS_XZ=y
CONFIG_NFS_FS=y
-CONFIG_CIFS=y
+CONFIG_NFS_V3_ACL=y
+CONFIG_NFS_V4=y
+CONFIG_NFS_V4_1=y
+CONFIG_NFS_V4_2=y
+CONFIG_ROOT_NFS=y
CONFIG_NLS_CODEPAGE_437=y
-CONFIG_NLS_ASCII=y
CONFIG_NLS_ISO8859_1=y
-# CONFIG_CRYPTO_HW is not set
CONFIG_PRINTK_TIME=y
+CONFIG_DYNAMIC_DEBUG=y
+# CONFIG_DEBUG_INFO is not set
+# CONFIG_DEBUG_INFO_REDUCED is not set
CONFIG_DEBUG_FS=y
CONFIG_MAGIC_SYSRQ=y
+CONFIG_LOCKUP_DETECTOR=y
+CONFIG_DEBUG_USER=y
CONFIG_CMDLINE_BOOL=y
CONFIG_CMDLINE="earlycon"
+# CONFIG_MIPS_CMDLINE_FROM_DTB is not set
+CONFIG_MIPS_CMDLINE_DTB_EXTEND=y
+# CONFIG_MIPS_CMDLINE_FROM_BOOTLOADER is not set
+# CONFIG_CRYPTO_HW is not set
+CONFIG_DT_BCM974XX=y
+CONFIG_FW_CFE=y
+CONFIG_ATA=y
+CONFIG_SATA_AHCI_PLATFORM=y
+CONFIG_AHCI_BRCMSTB=y
+CONFIG_GENERIC_PHY=y
+CONFIG_GPIOLIB=y
+CONFIG_GPIO_SYSFS=y
+CONFIG_PHY_BRCM_USB=y
+CONFIG_PHY_BRCM_SATA=y
+CONFIG_PM_RUNTIME=y
+CONFIG_PM_DEBUG=y
+CONFIG_SYSVIPC=y
+CONFIG_FUNCTION_GRAPH_TRACER=y
+CONFIG_DYNAMIC_FTRACE=y
+CONFIG_FUNCTION_TRACER=y
+CONFIG_FUNCTION_PROFILER=y
+CONFIG_IRQSOFF_TRACER=y
+CONFIG_SCHED_TRACER=y
+CONFIG_BLK_DEV_IO_TRACE=y
+CONFIG_FTRACE_SYSCALLS=y
+CONFIG_TRACER_SNAPSHOT=y
+CONFIG_TRACER_SNAPSHOT_PER_CPU_SWAP=y
+CONFIG_STACK_TRACER=y
diff --git a/arch/mips/dec/setup.c b/arch/mips/dec/setup.c
index eaad0ed4b523..a8a30bb1dee8 100644
--- a/arch/mips/dec/setup.c
+++ b/arch/mips/dec/setup.c
@@ -117,21 +117,21 @@ static void __init dec_be_init(void)
{
switch (mips_machtype) {
case MACH_DS23100: /* DS2100/DS3100 Pmin/Pmax */
- board_be_handler = dec_kn01_be_handler;
+ mips_set_be_handler(dec_kn01_be_handler);
busirq_handler = dec_kn01_be_interrupt;
busirq_flags |= IRQF_SHARED;
dec_kn01_be_init();
break;
case MACH_DS5000_1XX: /* DS5000/1xx 3min */
case MACH_DS5000_XX: /* DS5000/xx Maxine */
- board_be_handler = dec_kn02xa_be_handler;
+ mips_set_be_handler(dec_kn02xa_be_handler);
busirq_handler = dec_kn02xa_be_interrupt;
dec_kn02xa_be_init();
break;
case MACH_DS5000_200: /* DS5000/200 3max */
case MACH_DS5000_2X0: /* DS5000/240 3max+ */
case MACH_DS5900: /* DS5900 bigmax */
- board_be_handler = dec_ecc_be_handler;
+ mips_set_be_handler(dec_ecc_be_handler);
busirq_handler = dec_ecc_be_interrupt;
dec_ecc_be_init();
break;
diff --git a/arch/mips/generic/yamon-dt.c b/arch/mips/generic/yamon-dt.c
index a3aa22c77cad..a07a5edbcda7 100644
--- a/arch/mips/generic/yamon-dt.c
+++ b/arch/mips/generic/yamon-dt.c
@@ -75,7 +75,7 @@ static unsigned int __init gen_fdt_mem_array(
__init int yamon_dt_append_memory(void *fdt,
const struct yamon_mem_region *regions)
{
- unsigned long phys_memsize, memsize;
+ unsigned long phys_memsize = 0, memsize;
__be32 mem_array[2 * MAX_MEM_ARRAY_ENTRIES];
unsigned int mem_entries;
int i, err, mem_off;
diff --git a/arch/mips/include/asm/cacheflush.h b/arch/mips/include/asm/cacheflush.h
index f207388541d5..b3dc9c589442 100644
--- a/arch/mips/include/asm/cacheflush.h
+++ b/arch/mips/include/asm/cacheflush.h
@@ -61,8 +61,6 @@ static inline void flush_dcache_page(struct page *page)
SetPageDcacheDirty(page);
}
-void flush_dcache_folio(struct folio *folio);
-
#define flush_dcache_mmap_lock(mapping) do { } while (0)
#define flush_dcache_mmap_unlock(mapping) do { } while (0)
diff --git a/arch/mips/include/asm/traps.h b/arch/mips/include/asm/traps.h
index b710e76c9c65..15cde638b407 100644
--- a/arch/mips/include/asm/traps.h
+++ b/arch/mips/include/asm/traps.h
@@ -15,7 +15,7 @@
#define MIPS_BE_FATAL 2 /* treat as an unrecoverable error */
extern void (*board_be_init)(void);
-extern int (*board_be_handler)(struct pt_regs *regs, int is_fixup);
+void mips_set_be_handler(int (*handler)(struct pt_regs *reg, int is_fixup));
extern void (*board_nmi_handler_setup)(void);
extern void (*board_ejtag_handler_setup)(void);
diff --git a/arch/mips/kernel/cpu-probe.c b/arch/mips/kernel/cpu-probe.c
index ac0e2cfc6d57..24a529c6c4be 100644
--- a/arch/mips/kernel/cpu-probe.c
+++ b/arch/mips/kernel/cpu-probe.c
@@ -1734,8 +1734,6 @@ static inline void decode_cpucfg(struct cpuinfo_mips *c)
static inline void cpu_probe_loongson(struct cpuinfo_mips *c, unsigned int cpu)
{
- decode_configs(c);
-
/* All Loongson processors covered here define ExcCode 16 as GSExc. */
c->options |= MIPS_CPU_GSEXCEX;
@@ -1796,6 +1794,8 @@ static inline void cpu_probe_loongson(struct cpuinfo_mips *c, unsigned int cpu)
panic("Unknown Loongson Processor ID!");
break;
}
+
+ decode_configs(c);
}
#else
static inline void cpu_probe_loongson(struct cpuinfo_mips *c, unsigned int cpu) { }
diff --git a/arch/mips/kernel/proc.c b/arch/mips/kernel/proc.c
index 376a6e2676e9..9f47a889b047 100644
--- a/arch/mips/kernel/proc.c
+++ b/arch/mips/kernel/proc.c
@@ -185,7 +185,7 @@ static int show_cpuinfo(struct seq_file *m, void *v)
seq_puts(m, " tx39_cache");
if (cpu_has_octeon_cache)
seq_puts(m, " octeon_cache");
- if (cpu_has_fpu)
+ if (raw_cpu_has_fpu)
seq_puts(m, " fpu");
if (cpu_has_32fpr)
seq_puts(m, " 32fpr");
diff --git a/arch/mips/kernel/syscalls/syscall_n32.tbl b/arch/mips/kernel/syscalls/syscall_n32.tbl
index 70e32de2bcaa..72d02d363f36 100644
--- a/arch/mips/kernel/syscalls/syscall_n32.tbl
+++ b/arch/mips/kernel/syscalls/syscall_n32.tbl
@@ -387,3 +387,4 @@
446 n32 landlock_restrict_self sys_landlock_restrict_self
# 447 reserved for memfd_secret
448 n32 process_mrelease sys_process_mrelease
+449 n32 futex_waitv sys_futex_waitv
diff --git a/arch/mips/kernel/syscalls/syscall_n64.tbl b/arch/mips/kernel/syscalls/syscall_n64.tbl
index 1ca7bc337932..e2c481fcede6 100644
--- a/arch/mips/kernel/syscalls/syscall_n64.tbl
+++ b/arch/mips/kernel/syscalls/syscall_n64.tbl
@@ -363,3 +363,4 @@
446 n64 landlock_restrict_self sys_landlock_restrict_self
# 447 reserved for memfd_secret
448 n64 process_mrelease sys_process_mrelease
+449 n64 futex_waitv sys_futex_waitv
diff --git a/arch/mips/kernel/syscalls/syscall_o32.tbl b/arch/mips/kernel/syscalls/syscall_o32.tbl
index a61c35edaa74..3714c97b2643 100644
--- a/arch/mips/kernel/syscalls/syscall_o32.tbl
+++ b/arch/mips/kernel/syscalls/syscall_o32.tbl
@@ -436,3 +436,4 @@
446 o32 landlock_restrict_self sys_landlock_restrict_self
# 447 reserved for memfd_secret
448 o32 process_mrelease sys_process_mrelease
+449 o32 futex_waitv sys_futex_waitv
diff --git a/arch/mips/kernel/traps.c b/arch/mips/kernel/traps.c
index 6f07362de5ce..d26b0fb8ea06 100644
--- a/arch/mips/kernel/traps.c
+++ b/arch/mips/kernel/traps.c
@@ -103,13 +103,19 @@ extern asmlinkage void handle_reserved(void);
extern void tlb_do_page_fault_0(void);
void (*board_be_init)(void);
-int (*board_be_handler)(struct pt_regs *regs, int is_fixup);
+static int (*board_be_handler)(struct pt_regs *regs, int is_fixup);
void (*board_nmi_handler_setup)(void);
void (*board_ejtag_handler_setup)(void);
void (*board_bind_eic_interrupt)(int irq, int regset);
void (*board_ebase_setup)(void);
void(*board_cache_error_setup)(void);
+void mips_set_be_handler(int (*handler)(struct pt_regs *regs, int is_fixup))
+{
+ board_be_handler = handler;
+}
+EXPORT_SYMBOL_GPL(mips_set_be_handler);
+
static void show_raw_backtrace(unsigned long reg29, const char *loglvl,
bool user)
{
diff --git a/arch/mips/kvm/mips.c b/arch/mips/kvm/mips.c
index 562aa878b266..aa20d074d388 100644
--- a/arch/mips/kvm/mips.c
+++ b/arch/mips/kvm/mips.c
@@ -1067,7 +1067,7 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext)
r = 1;
break;
case KVM_CAP_NR_VCPUS:
- r = num_online_cpus();
+ r = min_t(unsigned int, num_online_cpus(), KVM_MAX_VCPUS);
break;
case KVM_CAP_MAX_VCPUS:
r = KVM_MAX_VCPUS;
diff --git a/arch/mips/lantiq/clk.c b/arch/mips/lantiq/clk.c
index dd819e31fcbb..4916cccf378f 100644
--- a/arch/mips/lantiq/clk.c
+++ b/arch/mips/lantiq/clk.c
@@ -158,6 +158,12 @@ void clk_deactivate(struct clk *clk)
}
EXPORT_SYMBOL(clk_deactivate);
+struct clk *clk_get_parent(struct clk *clk)
+{
+ return NULL;
+}
+EXPORT_SYMBOL(clk_get_parent);
+
static inline u32 get_counter_resolution(void)
{
u32 res;
diff --git a/arch/mips/sgi-ip22/ip22-berr.c b/arch/mips/sgi-ip22/ip22-berr.c
index dc0110a607a5..afe8a61078e4 100644
--- a/arch/mips/sgi-ip22/ip22-berr.c
+++ b/arch/mips/sgi-ip22/ip22-berr.c
@@ -112,5 +112,5 @@ static int ip22_be_handler(struct pt_regs *regs, int is_fixup)
void __init ip22_be_init(void)
{
- board_be_handler = ip22_be_handler;
+ mips_set_be_handler(ip22_be_handler);
}
diff --git a/arch/mips/sgi-ip22/ip28-berr.c b/arch/mips/sgi-ip22/ip28-berr.c
index c61362d9ea95..16ca470deb80 100644
--- a/arch/mips/sgi-ip22/ip28-berr.c
+++ b/arch/mips/sgi-ip22/ip28-berr.c
@@ -468,7 +468,7 @@ static int ip28_be_handler(struct pt_regs *regs, int is_fixup)
void __init ip22_be_init(void)
{
- board_be_handler = ip28_be_handler;
+ mips_set_be_handler(ip28_be_handler);
}
int ip28_show_be_info(struct seq_file *m)
diff --git a/arch/mips/sgi-ip27/ip27-berr.c b/arch/mips/sgi-ip27/ip27-berr.c
index 5a38ae6bdfa9..923a63a51cda 100644
--- a/arch/mips/sgi-ip27/ip27-berr.c
+++ b/arch/mips/sgi-ip27/ip27-berr.c
@@ -85,7 +85,7 @@ void __init ip27_be_init(void)
int cpu = LOCAL_HUB_L(PI_CPU_NUM);
int cpuoff = cpu << 8;
- board_be_handler = ip27_be_handler;
+ mips_set_be_handler(ip27_be_handler);
LOCAL_HUB_S(PI_ERR_INT_PEND,
cpu ? PI_ERR_CLEAR_ALL_B : PI_ERR_CLEAR_ALL_A);
diff --git a/arch/mips/sgi-ip32/ip32-berr.c b/arch/mips/sgi-ip32/ip32-berr.c
index c860f95ab7ed..478b63b4c808 100644
--- a/arch/mips/sgi-ip32/ip32-berr.c
+++ b/arch/mips/sgi-ip32/ip32-berr.c
@@ -34,5 +34,5 @@ static int ip32_be_handler(struct pt_regs *regs, int is_fixup)
void __init ip32_be_init(void)
{
- board_be_handler = ip32_be_handler;
+ mips_set_be_handler(ip32_be_handler);
}
diff --git a/arch/mips/sibyte/swarm/setup.c b/arch/mips/sibyte/swarm/setup.c
index f07b15dd1c1a..72a31eeeebba 100644
--- a/arch/mips/sibyte/swarm/setup.c
+++ b/arch/mips/sibyte/swarm/setup.c
@@ -122,7 +122,7 @@ void __init plat_mem_setup(void)
#error invalid SiByte board configuration
#endif
- board_be_handler = swarm_be_handler;
+ mips_set_be_handler(swarm_be_handler);
if (xicor_probe())
swarm_rtc_type = RTC_XICOR;
diff --git a/arch/mips/txx9/generic/setup_tx4927.c b/arch/mips/txx9/generic/setup_tx4927.c
index 46e9c4101386..63f9725b2eb0 100644
--- a/arch/mips/txx9/generic/setup_tx4927.c
+++ b/arch/mips/txx9/generic/setup_tx4927.c
@@ -80,7 +80,7 @@ static int tx4927_be_handler(struct pt_regs *regs, int is_fixup)
}
static void __init tx4927_be_init(void)
{
- board_be_handler = tx4927_be_handler;
+ mips_set_be_handler(tx4927_be_handler);
}
static struct resource tx4927_sdram_resource[4];
diff --git a/arch/mips/txx9/generic/setup_tx4938.c b/arch/mips/txx9/generic/setup_tx4938.c
index 17395d5d15ca..ba646548c5f6 100644
--- a/arch/mips/txx9/generic/setup_tx4938.c
+++ b/arch/mips/txx9/generic/setup_tx4938.c
@@ -82,7 +82,7 @@ static int tx4938_be_handler(struct pt_regs *regs, int is_fixup)
}
static void __init tx4938_be_init(void)
{
- board_be_handler = tx4938_be_handler;
+ mips_set_be_handler(tx4938_be_handler);
}
static struct resource tx4938_sdram_resource[4];
diff --git a/arch/mips/txx9/generic/setup_tx4939.c b/arch/mips/txx9/generic/setup_tx4939.c
index bf8a3cdababf..f5f59b7401a3 100644
--- a/arch/mips/txx9/generic/setup_tx4939.c
+++ b/arch/mips/txx9/generic/setup_tx4939.c
@@ -86,7 +86,7 @@ static int tx4939_be_handler(struct pt_regs *regs, int is_fixup)
}
static void __init tx4939_be_init(void)
{
- board_be_handler = tx4939_be_handler;
+ mips_set_be_handler(tx4939_be_handler);
}
static struct resource tx4939_sdram_resource[4];
diff --git a/arch/mips/vdso/Makefile b/arch/mips/vdso/Makefile
index 1b2ea34c3d3b..d65f55f67e19 100644
--- a/arch/mips/vdso/Makefile
+++ b/arch/mips/vdso/Makefile
@@ -57,7 +57,7 @@ endif
# VDSO linker flags.
ldflags-y := -Bsymbolic --no-undefined -soname=linux-vdso.so.1 \
- $(filter -E%,$(KBUILD_CFLAGS)) -nostdlib -shared \
+ $(filter -E%,$(KBUILD_CFLAGS)) -shared \
-G 0 --eh-frame-hdr --hash-style=sysv --build-id=sha1 -T
CFLAGS_REMOVE_vdso.o = $(CC_FLAGS_FTRACE)
diff --git a/arch/nds32/include/asm/cacheflush.h b/arch/nds32/include/asm/cacheflush.h
index 3fc0bb7d6487..c2a222ebfa2a 100644
--- a/arch/nds32/include/asm/cacheflush.h
+++ b/arch/nds32/include/asm/cacheflush.h
@@ -27,7 +27,6 @@ void flush_cache_vunmap(unsigned long start, unsigned long end);
#define ARCH_IMPLEMENTS_FLUSH_DCACHE_PAGE 1
void flush_dcache_page(struct page *page);
-void flush_dcache_folio(struct folio *folio);
void copy_to_user_page(struct vm_area_struct *vma, struct page *page,
unsigned long vaddr, void *dst, void *src, int len);
void copy_from_user_page(struct vm_area_struct *vma, struct page *page,
diff --git a/arch/nios2/include/asm/cacheflush.h b/arch/nios2/include/asm/cacheflush.h
index 1999561b22aa..d0b71dd71287 100644
--- a/arch/nios2/include/asm/cacheflush.h
+++ b/arch/nios2/include/asm/cacheflush.h
@@ -29,7 +29,6 @@ extern void flush_cache_page(struct vm_area_struct *vma, unsigned long vmaddr,
unsigned long pfn);
#define ARCH_IMPLEMENTS_FLUSH_DCACHE_PAGE 1
void flush_dcache_page(struct page *page);
-void flush_dcache_folio(struct folio *folio);
extern void flush_icache_range(unsigned long start, unsigned long end);
extern void flush_icache_page(struct vm_area_struct *vma, struct page *page);
diff --git a/arch/parisc/configs/generic-32bit_defconfig b/arch/parisc/configs/generic-32bit_defconfig
index d6fd8fa7ed8c..53061cb2cf7f 100644
--- a/arch/parisc/configs/generic-32bit_defconfig
+++ b/arch/parisc/configs/generic-32bit_defconfig
@@ -231,6 +231,7 @@ CONFIG_CRYPTO_DEFLATE=y
CONFIG_CRC_CCITT=m
CONFIG_CRC_T10DIF=y
CONFIG_FONTS=y
+CONFIG_PRINTK_TIME=y
CONFIG_MAGIC_SYSRQ=y
CONFIG_DEBUG_FS=y
CONFIG_DEBUG_MEMORY_INIT=y
diff --git a/arch/parisc/include/asm/assembly.h b/arch/parisc/include/asm/assembly.h
index 7085df079702..6d13ae236fcb 100644
--- a/arch/parisc/include/asm/assembly.h
+++ b/arch/parisc/include/asm/assembly.h
@@ -3,38 +3,19 @@
* Copyright (C) 1999 Hewlett-Packard (Frank Rowand)
* Copyright (C) 1999 Philipp Rumpf <prumpf@tux.org>
* Copyright (C) 1999 SuSE GmbH
+ * Copyright (C) 2021 Helge Deller <deller@gmx.de>
*/
#ifndef _PARISC_ASSEMBLY_H
#define _PARISC_ASSEMBLY_H
-#define CALLEE_FLOAT_FRAME_SIZE 80
-
#ifdef CONFIG_64BIT
-#define LDREG ldd
-#define STREG std
-#define LDREGX ldd,s
-#define LDREGM ldd,mb
-#define STREGM std,ma
-#define SHRREG shrd
-#define SHLREG shld
-#define ANDCM andcm,*
-#define COND(x) * ## x
#define RP_OFFSET 16
#define FRAME_SIZE 128
#define CALLEE_REG_FRAME_SIZE 144
#define REG_SZ 8
#define ASM_ULONG_INSN .dword
#else /* CONFIG_64BIT */
-#define LDREG ldw
-#define STREG stw
-#define LDREGX ldwx,s
-#define LDREGM ldwm
-#define STREGM stwm
-#define SHRREG shr
-#define SHLREG shlw
-#define ANDCM andcm
-#define COND(x) x
#define RP_OFFSET 20
#define FRAME_SIZE 64
#define CALLEE_REG_FRAME_SIZE 128
@@ -45,6 +26,7 @@
/* Frame alignment for 32- and 64-bit */
#define FRAME_ALIGN 64
+#define CALLEE_FLOAT_FRAME_SIZE 80
#define CALLEE_SAVE_FRAME_SIZE (CALLEE_REG_FRAME_SIZE + CALLEE_FLOAT_FRAME_SIZE)
#ifdef CONFIG_PA20
@@ -68,6 +50,28 @@
#ifdef __ASSEMBLY__
#ifdef CONFIG_64BIT
+#define LDREG ldd
+#define STREG std
+#define LDREGX ldd,s
+#define LDREGM ldd,mb
+#define STREGM std,ma
+#define SHRREG shrd
+#define SHLREG shld
+#define ANDCM andcm,*
+#define COND(x) * ## x
+#else /* CONFIG_64BIT */
+#define LDREG ldw
+#define STREG stw
+#define LDREGX ldwx,s
+#define LDREGM ldwm
+#define STREGM stwm
+#define SHRREG shr
+#define SHLREG shlw
+#define ANDCM andcm
+#define COND(x) x
+#endif
+
+#ifdef CONFIG_64BIT
/* the 64-bit pa gnu assembler unfortunately defaults to .level 1.1 or 2.0 so
* work around that for now... */
.level 2.0w
@@ -143,6 +147,17 @@
extrd,u \r, 63-(\sa), 64-(\sa), \t
.endm
+ /* Extract unsigned for 32- and 64-bit
+ * The extru instruction leaves the most significant 32 bits of the
+ * target register in an undefined state on PA 2.0 systems. */
+ .macro extru_safe r, p, len, t
+#ifdef CONFIG_64BIT
+ extrd,u \r, 32+(\p), \len, \t
+#else
+ extru \r, \p, \len, \t
+#endif
+ .endm
+
/* load 32-bit 'value' into 'reg' compensating for the ldil
* sign-extension when running in wide mode.
* WARNING!! neither 'value' nor 'reg' can be expressions
diff --git a/arch/parisc/include/asm/cacheflush.h b/arch/parisc/include/asm/cacheflush.h
index da0cd4b3a28f..859b8a34adcf 100644
--- a/arch/parisc/include/asm/cacheflush.h
+++ b/arch/parisc/include/asm/cacheflush.h
@@ -50,7 +50,6 @@ void invalidate_kernel_vmap_range(void *vaddr, int size);
#define ARCH_IMPLEMENTS_FLUSH_DCACHE_PAGE 1
void flush_dcache_page(struct page *page);
-void flush_dcache_folio(struct folio *folio);
#define flush_dcache_mmap_lock(mapping) xa_lock_irq(&mapping->i_pages)
#define flush_dcache_mmap_unlock(mapping) xa_unlock_irq(&mapping->i_pages)
diff --git a/arch/parisc/include/asm/jump_label.h b/arch/parisc/include/asm/jump_label.h
index 7efb1aa2f7f8..af2a598bc0f8 100644
--- a/arch/parisc/include/asm/jump_label.h
+++ b/arch/parisc/include/asm/jump_label.h
@@ -5,6 +5,7 @@
#ifndef __ASSEMBLY__
#include <linux/types.h>
+#include <linux/stringify.h>
#include <asm/assembly.h>
#define JUMP_LABEL_NOP_SIZE 4
diff --git a/arch/parisc/include/asm/pgtable.h b/arch/parisc/include/asm/pgtable.h
index 7badd872f05a..3e7cf882639f 100644
--- a/arch/parisc/include/asm/pgtable.h
+++ b/arch/parisc/include/asm/pgtable.h
@@ -76,6 +76,8 @@ static inline void purge_tlb_entries(struct mm_struct *mm, unsigned long addr)
purge_tlb_end(flags);
}
+extern void __update_cache(pte_t pte);
+
/* Certain architectures need to do special things when PTEs
* within a page table are directly modified. Thus, the following
* hook is made available.
@@ -83,11 +85,14 @@ static inline void purge_tlb_entries(struct mm_struct *mm, unsigned long addr)
#define set_pte(pteptr, pteval) \
do { \
*(pteptr) = (pteval); \
- barrier(); \
+ mb(); \
} while(0)
#define set_pte_at(mm, addr, pteptr, pteval) \
do { \
+ if (pte_present(pteval) && \
+ pte_user(pteval)) \
+ __update_cache(pteval); \
*(pteptr) = (pteval); \
purge_tlb_entries(mm, addr); \
} while (0)
@@ -303,6 +308,7 @@ extern unsigned long *empty_zero_page;
#define pte_none(x) (pte_val(x) == 0)
#define pte_present(x) (pte_val(x) & _PAGE_PRESENT)
+#define pte_user(x) (pte_val(x) & _PAGE_USER)
#define pte_clear(mm, addr, xp) set_pte_at(mm, addr, xp, __pte(0))
#define pmd_flag(x) (pmd_val(x) & PxD_FLAG_MASK)
@@ -410,7 +416,7 @@ extern void paging_init (void);
#define PG_dcache_dirty PG_arch_1
-extern void update_mmu_cache(struct vm_area_struct *, unsigned long, pte_t *);
+#define update_mmu_cache(vms,addr,ptep) __update_cache(*ptep)
/* Encode and de-code a swap entry */
diff --git a/arch/parisc/include/asm/rt_sigframe.h b/arch/parisc/include/asm/rt_sigframe.h
index 4b9e3d707571..2b3010ade00e 100644
--- a/arch/parisc/include/asm/rt_sigframe.h
+++ b/arch/parisc/include/asm/rt_sigframe.h
@@ -2,7 +2,7 @@
#ifndef _ASM_PARISC_RT_SIGFRAME_H
#define _ASM_PARISC_RT_SIGFRAME_H
-#define SIGRETURN_TRAMP 3
+#define SIGRETURN_TRAMP 4
#define SIGRESTARTBLOCK_TRAMP 5
#define TRAMP_SIZE (SIGRETURN_TRAMP + SIGRESTARTBLOCK_TRAMP)
diff --git a/arch/parisc/kernel/cache.c b/arch/parisc/kernel/cache.c
index c61827e4928a..94150b91c96f 100644
--- a/arch/parisc/kernel/cache.c
+++ b/arch/parisc/kernel/cache.c
@@ -83,9 +83,9 @@ EXPORT_SYMBOL(flush_cache_all_local);
#define pfn_va(pfn) __va(PFN_PHYS(pfn))
void
-update_mmu_cache(struct vm_area_struct *vma, unsigned long address, pte_t *ptep)
+__update_cache(pte_t pte)
{
- unsigned long pfn = pte_pfn(*ptep);
+ unsigned long pfn = pte_pfn(pte);
struct page *page;
/* We don't have pte special. As a result, we can be called with
diff --git a/arch/parisc/kernel/entry.S b/arch/parisc/kernel/entry.S
index 57944d6f9ebb..6e9cdb269862 100644
--- a/arch/parisc/kernel/entry.S
+++ b/arch/parisc/kernel/entry.S
@@ -366,17 +366,9 @@
*/
.macro L2_ptep pmd,pte,index,va,fault
#if CONFIG_PGTABLE_LEVELS == 3
- extru \va,31-ASM_PMD_SHIFT,ASM_BITS_PER_PMD,\index
+ extru_safe \va,31-ASM_PMD_SHIFT,ASM_BITS_PER_PMD,\index
#else
-# if defined(CONFIG_64BIT)
- extrd,u \va,63-ASM_PGDIR_SHIFT,ASM_BITS_PER_PGD,\index
- #else
- # if PAGE_SIZE > 4096
- extru \va,31-ASM_PGDIR_SHIFT,32-ASM_PGDIR_SHIFT,\index
- # else
- extru \va,31-ASM_PGDIR_SHIFT,ASM_BITS_PER_PGD,\index
- # endif
-# endif
+ extru_safe \va,31-ASM_PGDIR_SHIFT,ASM_BITS_PER_PGD,\index
#endif
dep %r0,31,PAGE_SHIFT,\pmd /* clear offset */
#if CONFIG_PGTABLE_LEVELS < 3
@@ -386,7 +378,7 @@
bb,>=,n \pmd,_PxD_PRESENT_BIT,\fault
dep %r0,31,PxD_FLAG_SHIFT,\pmd /* clear flags */
SHLREG \pmd,PxD_VALUE_SHIFT,\pmd
- extru \va,31-PAGE_SHIFT,ASM_BITS_PER_PTE,\index
+ extru_safe \va,31-PAGE_SHIFT,ASM_BITS_PER_PTE,\index
dep %r0,31,PAGE_SHIFT,\pmd /* clear offset */
shladd \index,BITS_PER_PTE_ENTRY,\pmd,\pmd /* pmd is now pte */
.endm
@@ -1805,7 +1797,7 @@ syscall_restore:
/* Are we being ptraced? */
LDREG TASK_TI_FLAGS(%r1),%r19
- ldi _TIF_SYSCALL_TRACE_MASK,%r2
+ ldi _TIF_SINGLESTEP|_TIF_BLOCKSTEP,%r2
and,COND(=) %r19,%r2,%r0
b,n syscall_restore_rfi
diff --git a/arch/parisc/kernel/signal.c b/arch/parisc/kernel/signal.c
index bbfe23c40c01..46b1050640b8 100644
--- a/arch/parisc/kernel/signal.c
+++ b/arch/parisc/kernel/signal.c
@@ -288,21 +288,22 @@ setup_rt_frame(struct ksignal *ksig, sigset_t *set, struct pt_regs *regs,
already in userspace. The first words of tramp are used to
save the previous sigrestartblock trampoline that might be
on the stack. We start the sigreturn trampoline at
- SIGRESTARTBLOCK_TRAMP. */
+ SIGRESTARTBLOCK_TRAMP+X. */
err |= __put_user(in_syscall ? INSN_LDI_R25_1 : INSN_LDI_R25_0,
&frame->tramp[SIGRESTARTBLOCK_TRAMP+0]);
- err |= __put_user(INSN_BLE_SR2_R0,
+ err |= __put_user(INSN_LDI_R20,
&frame->tramp[SIGRESTARTBLOCK_TRAMP+1]);
- err |= __put_user(INSN_LDI_R20,
+ err |= __put_user(INSN_BLE_SR2_R0,
&frame->tramp[SIGRESTARTBLOCK_TRAMP+2]);
+ err |= __put_user(INSN_NOP, &frame->tramp[SIGRESTARTBLOCK_TRAMP+3]);
- start = (unsigned long) &frame->tramp[SIGRESTARTBLOCK_TRAMP+0];
- end = (unsigned long) &frame->tramp[SIGRESTARTBLOCK_TRAMP+3];
+ start = (unsigned long) &frame->tramp[0];
+ end = (unsigned long) &frame->tramp[TRAMP_SIZE];
flush_user_dcache_range_asm(start, end);
flush_user_icache_range_asm(start, end);
/* TRAMP Words 0-4, Length 5 = SIGRESTARTBLOCK_TRAMP
- * TRAMP Words 5-7, Length 3 = SIGRETURN_TRAMP
+ * TRAMP Words 5-9, Length 4 = SIGRETURN_TRAMP
* So the SIGRETURN_TRAMP is at the end of SIGRESTARTBLOCK_TRAMP
*/
rp = (unsigned long) &frame->tramp[SIGRESTARTBLOCK_TRAMP];
diff --git a/arch/parisc/kernel/signal32.h b/arch/parisc/kernel/signal32.h
index a5bdbb5678b7..f166250f2d06 100644
--- a/arch/parisc/kernel/signal32.h
+++ b/arch/parisc/kernel/signal32.h
@@ -36,7 +36,7 @@ struct compat_regfile {
compat_int_t rf_sar;
};
-#define COMPAT_SIGRETURN_TRAMP 3
+#define COMPAT_SIGRETURN_TRAMP 4
#define COMPAT_SIGRESTARTBLOCK_TRAMP 5
#define COMPAT_TRAMP_SIZE (COMPAT_SIGRETURN_TRAMP + \
COMPAT_SIGRESTARTBLOCK_TRAMP)
diff --git a/arch/parisc/kernel/stacktrace.c b/arch/parisc/kernel/stacktrace.c
index 6b4ca91932cf..023834ef582e 100644
--- a/arch/parisc/kernel/stacktrace.c
+++ b/arch/parisc/kernel/stacktrace.c
@@ -8,6 +8,7 @@
*
* TODO: Userspace stacktrace (CONFIG_USER_STACKTRACE_SUPPORT)
*/
+#include <linux/kernel.h>
#include <linux/stacktrace.h>
#include <asm/unwind.h>
diff --git a/arch/parisc/kernel/syscall.S b/arch/parisc/kernel/syscall.S
index 4fb3b6a993bf..d2497b339d13 100644
--- a/arch/parisc/kernel/syscall.S
+++ b/arch/parisc/kernel/syscall.S
@@ -566,7 +566,7 @@ lws_compare_and_swap:
ldo R%lws_lock_start(%r20), %r28
/* Extract eight bits from r26 and hash lock (Bits 3-11) */
- extru %r26, 28, 8, %r20
+ extru_safe %r26, 28, 8, %r20
/* Find lock to use, the hash is either one of 0 to
15, multiplied by 16 (keep it 16-byte aligned)
@@ -751,7 +751,7 @@ cas2_lock_start:
ldo R%lws_lock_start(%r20), %r28
/* Extract eight bits from r26 and hash lock (Bits 3-11) */
- extru %r26, 28, 8, %r20
+ extru_safe %r26, 28, 8, %r20
/* Find lock to use, the hash is either one of 0 to
15, multiplied by 16 (keep it 16-byte aligned)
diff --git a/arch/parisc/kernel/syscalls/syscall.tbl b/arch/parisc/kernel/syscalls/syscall.tbl
index bf751e0732b7..358c00000755 100644
--- a/arch/parisc/kernel/syscalls/syscall.tbl
+++ b/arch/parisc/kernel/syscalls/syscall.tbl
@@ -446,3 +446,4 @@
446 common landlock_restrict_self sys_landlock_restrict_self
# 447 reserved for memfd_secret
448 common process_mrelease sys_process_mrelease
+449 common futex_waitv sys_futex_waitv
diff --git a/arch/powerpc/kernel/Makefile b/arch/powerpc/kernel/Makefile
index 0e3640e14eb1..5fa68c2ef1f8 100644
--- a/arch/powerpc/kernel/Makefile
+++ b/arch/powerpc/kernel/Makefile
@@ -196,3 +196,6 @@ clean-files := vmlinux.lds
# Force dependency (incbin is bad)
$(obj)/vdso32_wrapper.o : $(obj)/vdso32/vdso32.so.dbg
$(obj)/vdso64_wrapper.o : $(obj)/vdso64/vdso64.so.dbg
+
+# for cleaning
+subdir- += vdso32 vdso64
diff --git a/arch/powerpc/kernel/head_32.h b/arch/powerpc/kernel/head_32.h
index 6b1ec9e3541b..349c4a820231 100644
--- a/arch/powerpc/kernel/head_32.h
+++ b/arch/powerpc/kernel/head_32.h
@@ -202,11 +202,11 @@ vmap_stack_overflow:
mfspr r1, SPRN_SPRG_THREAD
lwz r1, TASK_CPU - THREAD(r1)
slwi r1, r1, 3
- addis r1, r1, emergency_ctx@ha
+ addis r1, r1, emergency_ctx-PAGE_OFFSET@ha
#else
- lis r1, emergency_ctx@ha
+ lis r1, emergency_ctx-PAGE_OFFSET@ha
#endif
- lwz r1, emergency_ctx@l(r1)
+ lwz r1, emergency_ctx-PAGE_OFFSET@l(r1)
addi r1, r1, THREAD_SIZE - INT_FRAME_SIZE
EXCEPTION_PROLOG_2 0 vmap_stack_overflow
prepare_transfer_to_handler
diff --git a/arch/powerpc/kernel/head_8xx.S b/arch/powerpc/kernel/head_8xx.S
index 2d596881b70e..0d073b9fd52c 100644
--- a/arch/powerpc/kernel/head_8xx.S
+++ b/arch/powerpc/kernel/head_8xx.S
@@ -733,6 +733,7 @@ _GLOBAL(mmu_pin_tlb)
#ifdef CONFIG_PIN_TLB_DATA
LOAD_REG_IMMEDIATE(r6, PAGE_OFFSET)
LOAD_REG_IMMEDIATE(r7, MI_SVALID | MI_PS8MEG | _PMD_ACCESSED)
+ li r8, 0
#ifdef CONFIG_PIN_TLB_IMMR
li r0, 3
#else
@@ -741,26 +742,26 @@ _GLOBAL(mmu_pin_tlb)
mtctr r0
cmpwi r4, 0
beq 4f
- LOAD_REG_IMMEDIATE(r8, 0xf0 | _PAGE_RO | _PAGE_SPS | _PAGE_SH | _PAGE_PRESENT)
LOAD_REG_ADDR(r9, _sinittext)
2: ori r0, r6, MD_EVALID
+ ori r12, r8, 0xf0 | _PAGE_RO | _PAGE_SPS | _PAGE_SH | _PAGE_PRESENT
mtspr SPRN_MD_CTR, r5
mtspr SPRN_MD_EPN, r0
mtspr SPRN_MD_TWC, r7
- mtspr SPRN_MD_RPN, r8
+ mtspr SPRN_MD_RPN, r12
addi r5, r5, 0x100
addis r6, r6, SZ_8M@h
addis r8, r8, SZ_8M@h
cmplw r6, r9
bdnzt lt, 2b
-
-4: LOAD_REG_IMMEDIATE(r8, 0xf0 | _PAGE_DIRTY | _PAGE_SPS | _PAGE_SH | _PAGE_PRESENT)
+4:
2: ori r0, r6, MD_EVALID
+ ori r12, r8, 0xf0 | _PAGE_DIRTY | _PAGE_SPS | _PAGE_SH | _PAGE_PRESENT
mtspr SPRN_MD_CTR, r5
mtspr SPRN_MD_EPN, r0
mtspr SPRN_MD_TWC, r7
- mtspr SPRN_MD_RPN, r8
+ mtspr SPRN_MD_RPN, r12
addi r5, r5, 0x100
addis r6, r6, SZ_8M@h
addis r8, r8, SZ_8M@h
@@ -781,7 +782,7 @@ _GLOBAL(mmu_pin_tlb)
#endif
#if defined(CONFIG_PIN_TLB_IMMR) || defined(CONFIG_PIN_TLB_DATA)
lis r0, (MD_RSV4I | MD_TWAM)@h
- mtspr SPRN_MI_CTR, r0
+ mtspr SPRN_MD_CTR, r0
#endif
mtspr SPRN_SRR1, r10
mtspr SPRN_SRR0, r11
diff --git a/arch/powerpc/kernel/signal.h b/arch/powerpc/kernel/signal.h
index 1f07317964e4..618aeccdf691 100644
--- a/arch/powerpc/kernel/signal.h
+++ b/arch/powerpc/kernel/signal.h
@@ -25,8 +25,14 @@ static inline int __get_user_sigset(sigset_t *dst, const sigset_t __user *src)
return __get_user(dst->sig[0], (u64 __user *)&src->sig[0]);
}
-#define unsafe_get_user_sigset(dst, src, label) \
- unsafe_get_user((dst)->sig[0], (u64 __user *)&(src)->sig[0], label)
+#define unsafe_get_user_sigset(dst, src, label) do { \
+ sigset_t *__dst = dst; \
+ const sigset_t __user *__src = src; \
+ int i; \
+ \
+ for (i = 0; i < _NSIG_WORDS; i++) \
+ unsafe_get_user(__dst->sig[i], &__src->sig[i], label); \
+} while (0)
#ifdef CONFIG_VSX
extern unsigned long copy_vsx_to_user(void __user *to,
diff --git a/arch/powerpc/kernel/signal_32.c b/arch/powerpc/kernel/signal_32.c
index 00a9c9cd6d42..3e053e2fd6b6 100644
--- a/arch/powerpc/kernel/signal_32.c
+++ b/arch/powerpc/kernel/signal_32.c
@@ -1063,7 +1063,7 @@ SYSCALL_DEFINE3(swapcontext, struct ucontext __user *, old_ctx,
* We kill the task with a SIGSEGV in this situation.
*/
if (do_setcontext(new_ctx, regs, 0)) {
- force_fatal_sig(SIGSEGV);
+ force_exit_sig(SIGSEGV);
return -EFAULT;
}
diff --git a/arch/powerpc/kernel/signal_64.c b/arch/powerpc/kernel/signal_64.c
index ef518535d436..d1e1fc0acbea 100644
--- a/arch/powerpc/kernel/signal_64.c
+++ b/arch/powerpc/kernel/signal_64.c
@@ -704,7 +704,7 @@ SYSCALL_DEFINE3(swapcontext, struct ucontext __user *, old_ctx,
*/
if (__get_user_sigset(&set, &new_ctx->uc_sigmask)) {
- force_fatal_sig(SIGSEGV);
+ force_exit_sig(SIGSEGV);
return -EFAULT;
}
set_current_blocked(&set);
@@ -713,7 +713,7 @@ SYSCALL_DEFINE3(swapcontext, struct ucontext __user *, old_ctx,
return -EFAULT;
if (__unsafe_restore_sigcontext(current, NULL, 0, &new_ctx->uc_mcontext)) {
user_read_access_end();
- force_fatal_sig(SIGSEGV);
+ force_exit_sig(SIGSEGV);
return -EFAULT;
}
user_read_access_end();
diff --git a/arch/powerpc/kernel/syscalls/syscall.tbl b/arch/powerpc/kernel/syscalls/syscall.tbl
index 7bef917cc84e..15109af9d075 100644
--- a/arch/powerpc/kernel/syscalls/syscall.tbl
+++ b/arch/powerpc/kernel/syscalls/syscall.tbl
@@ -528,3 +528,4 @@
446 common landlock_restrict_self sys_landlock_restrict_self
# 447 reserved for memfd_secret
448 common process_mrelease sys_process_mrelease
+449 common futex_waitv sys_futex_waitv
diff --git a/arch/powerpc/kernel/watchdog.c b/arch/powerpc/kernel/watchdog.c
index f9ea0e5357f9..3fa6d240bade 100644
--- a/arch/powerpc/kernel/watchdog.c
+++ b/arch/powerpc/kernel/watchdog.c
@@ -187,6 +187,12 @@ static void watchdog_smp_panic(int cpu, u64 tb)
if (sysctl_hardlockup_all_cpu_backtrace)
trigger_allbutself_cpu_backtrace();
+ /*
+ * Force flush any remote buffers that might be stuck in IRQ context
+ * and therefore could not run their irq_work.
+ */
+ printk_trigger_flush();
+
if (hardlockup_panic)
nmi_panic(NULL, "Hard LOCKUP");
diff --git a/arch/powerpc/kvm/book3s_hv_builtin.c b/arch/powerpc/kvm/book3s_hv_builtin.c
index fcf4760a3a0e..70b7a8f97153 100644
--- a/arch/powerpc/kvm/book3s_hv_builtin.c
+++ b/arch/powerpc/kvm/book3s_hv_builtin.c
@@ -695,6 +695,7 @@ static void flush_guest_tlb(struct kvm *kvm)
"r" (0) : "memory");
}
asm volatile("ptesync": : :"memory");
+ // POWER9 congruence-class TLBIEL leaves ERAT. Flush it now.
asm volatile(PPC_RADIX_INVALIDATE_ERAT_GUEST : : :"memory");
} else {
for (set = 0; set < kvm->arch.tlb_sets; ++set) {
@@ -705,7 +706,9 @@ static void flush_guest_tlb(struct kvm *kvm)
rb += PPC_BIT(51); /* increment set number */
}
asm volatile("ptesync": : :"memory");
- asm volatile(PPC_ISA_3_0_INVALIDATE_ERAT : : :"memory");
+ // POWER9 congruence-class TLBIEL leaves ERAT. Flush it now.
+ if (cpu_has_feature(CPU_FTR_ARCH_300))
+ asm volatile(PPC_ISA_3_0_INVALIDATE_ERAT : : :"memory");
}
}
diff --git a/arch/powerpc/kvm/book3s_hv_rmhandlers.S b/arch/powerpc/kvm/book3s_hv_rmhandlers.S
index eb776d0c5d8e..32a4b4d412b9 100644
--- a/arch/powerpc/kvm/book3s_hv_rmhandlers.S
+++ b/arch/powerpc/kvm/book3s_hv_rmhandlers.S
@@ -2005,7 +2005,7 @@ hcall_real_table:
.globl hcall_real_table_end
hcall_real_table_end:
-_GLOBAL(kvmppc_h_set_xdabr)
+_GLOBAL_TOC(kvmppc_h_set_xdabr)
EXPORT_SYMBOL_GPL(kvmppc_h_set_xdabr)
andi. r0, r5, DABRX_USER | DABRX_KERNEL
beq 6f
@@ -2015,7 +2015,7 @@ EXPORT_SYMBOL_GPL(kvmppc_h_set_xdabr)
6: li r3, H_PARAMETER
blr
-_GLOBAL(kvmppc_h_set_dabr)
+_GLOBAL_TOC(kvmppc_h_set_dabr)
EXPORT_SYMBOL_GPL(kvmppc_h_set_dabr)
li r5, DABRX_USER | DABRX_KERNEL
3:
diff --git a/arch/powerpc/kvm/powerpc.c b/arch/powerpc/kvm/powerpc.c
index 35e9cccdeef9..a72920f4f221 100644
--- a/arch/powerpc/kvm/powerpc.c
+++ b/arch/powerpc/kvm/powerpc.c
@@ -641,9 +641,9 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext)
* implementations just count online CPUs.
*/
if (hv_enabled)
- r = num_present_cpus();
+ r = min_t(unsigned int, num_present_cpus(), KVM_MAX_VCPUS);
else
- r = num_online_cpus();
+ r = min_t(unsigned int, num_online_cpus(), KVM_MAX_VCPUS);
break;
case KVM_CAP_MAX_VCPUS:
r = KVM_MAX_VCPUS;
diff --git a/arch/powerpc/mm/nohash/kaslr_booke.c b/arch/powerpc/mm/nohash/kaslr_booke.c
index 8fc49b1b4a91..6ec978967da0 100644
--- a/arch/powerpc/mm/nohash/kaslr_booke.c
+++ b/arch/powerpc/mm/nohash/kaslr_booke.c
@@ -314,7 +314,7 @@ static unsigned long __init kaslr_choose_location(void *dt_ptr, phys_addr_t size
pr_warn("KASLR: No safe seed for randomizing the kernel base.\n");
ram = min_t(phys_addr_t, __max_low_memory, size);
- ram = map_mem_in_cams(ram, CONFIG_LOWMEM_CAM_NUM, true, false);
+ ram = map_mem_in_cams(ram, CONFIG_LOWMEM_CAM_NUM, true, true);
linear_sz = min_t(unsigned long, ram, SZ_512M);
/* If the linear size is smaller than 64M, do not randmize */
diff --git a/arch/powerpc/mm/nohash/tlb.c b/arch/powerpc/mm/nohash/tlb.c
index 89353d4f5604..647bf454a0fa 100644
--- a/arch/powerpc/mm/nohash/tlb.c
+++ b/arch/powerpc/mm/nohash/tlb.c
@@ -645,7 +645,7 @@ static void early_init_this_mmu(void)
if (map)
linear_map_top = map_mem_in_cams(linear_map_top,
- num_cams, true, true);
+ num_cams, false, true);
}
#endif
@@ -766,7 +766,7 @@ void setup_initial_memory_limit(phys_addr_t first_memblock_base,
num_cams = (mfspr(SPRN_TLB1CFG) & TLBnCFG_N_ENTRY) / 4;
linear_sz = map_mem_in_cams(first_memblock_size, num_cams,
- false, true);
+ true, true);
ppc64_rma_size = min_t(u64, linear_sz, 0x40000000);
} else
diff --git a/arch/powerpc/mm/numa.c b/arch/powerpc/mm/numa.c
index 6f14c8fb6359..59d3cfcd7887 100644
--- a/arch/powerpc/mm/numa.c
+++ b/arch/powerpc/mm/numa.c
@@ -376,9 +376,9 @@ static void initialize_form2_numa_distance_lookup_table(void)
{
int i, j;
struct device_node *root;
- const __u8 *numa_dist_table;
+ const __u8 *form2_distances;
const __be32 *numa_lookup_index;
- int numa_dist_table_length;
+ int form2_distances_length;
int max_numa_index, distance_index;
if (firmware_has_feature(FW_FEATURE_OPAL))
@@ -392,45 +392,41 @@ static void initialize_form2_numa_distance_lookup_table(void)
max_numa_index = of_read_number(&numa_lookup_index[0], 1);
/* first element of the array is the size and is encode-int */
- numa_dist_table = of_get_property(root, "ibm,numa-distance-table", NULL);
- numa_dist_table_length = of_read_number((const __be32 *)&numa_dist_table[0], 1);
+ form2_distances = of_get_property(root, "ibm,numa-distance-table", NULL);
+ form2_distances_length = of_read_number((const __be32 *)&form2_distances[0], 1);
/* Skip the size which is encoded int */
- numa_dist_table += sizeof(__be32);
+ form2_distances += sizeof(__be32);
- pr_debug("numa_dist_table_len = %d, numa_dist_indexes_len = %d\n",
- numa_dist_table_length, max_numa_index);
+ pr_debug("form2_distances_len = %d, numa_dist_indexes_len = %d\n",
+ form2_distances_length, max_numa_index);
for (i = 0; i < max_numa_index; i++)
/* +1 skip the max_numa_index in the property */
numa_id_index_table[i] = of_read_number(&numa_lookup_index[i + 1], 1);
- if (numa_dist_table_length != max_numa_index * max_numa_index) {
+ if (form2_distances_length != max_numa_index * max_numa_index) {
WARN(1, "Wrong NUMA distance information\n");
- /* consider everybody else just remote. */
- for (i = 0; i < max_numa_index; i++) {
- for (j = 0; j < max_numa_index; j++) {
- int nodeA = numa_id_index_table[i];
- int nodeB = numa_id_index_table[j];
-
- if (nodeA == nodeB)
- numa_distance_table[nodeA][nodeB] = LOCAL_DISTANCE;
- else
- numa_distance_table[nodeA][nodeB] = REMOTE_DISTANCE;
- }
- }
+ form2_distances = NULL; // don't use it
}
-
distance_index = 0;
for (i = 0; i < max_numa_index; i++) {
for (j = 0; j < max_numa_index; j++) {
int nodeA = numa_id_index_table[i];
int nodeB = numa_id_index_table[j];
-
- numa_distance_table[nodeA][nodeB] = numa_dist_table[distance_index++];
- pr_debug("dist[%d][%d]=%d ", nodeA, nodeB, numa_distance_table[nodeA][nodeB]);
+ int dist;
+
+ if (form2_distances)
+ dist = form2_distances[distance_index++];
+ else if (nodeA == nodeB)
+ dist = LOCAL_DISTANCE;
+ else
+ dist = REMOTE_DISTANCE;
+ numa_distance_table[nodeA][nodeB] = dist;
+ pr_debug("dist[%d][%d]=%d ", nodeA, nodeB, dist);
}
}
+
of_node_put(root);
}
diff --git a/arch/powerpc/platforms/83xx/mcu_mpc8349emitx.c b/arch/powerpc/platforms/83xx/mcu_mpc8349emitx.c
index bb789f33c70e..a38372f9ac12 100644
--- a/arch/powerpc/platforms/83xx/mcu_mpc8349emitx.c
+++ b/arch/powerpc/platforms/83xx/mcu_mpc8349emitx.c
@@ -186,7 +186,6 @@ err:
static int mcu_remove(struct i2c_client *client)
{
struct mcu *mcu = i2c_get_clientdata(client);
- int ret;
kthread_stop(shutdown_thread);
diff --git a/arch/powerpc/platforms/pseries/iommu.c b/arch/powerpc/platforms/pseries/iommu.c
index 49b401536d29..8f998e55735b 100644
--- a/arch/powerpc/platforms/pseries/iommu.c
+++ b/arch/powerpc/platforms/pseries/iommu.c
@@ -1094,15 +1094,6 @@ static phys_addr_t ddw_memory_hotplug_max(void)
phys_addr_t max_addr = memory_hotplug_max();
struct device_node *memory;
- /*
- * The "ibm,pmemory" can appear anywhere in the address space.
- * Assuming it is still backed by page structs, set the upper limit
- * for the huge DMA window as MAX_PHYSMEM_BITS.
- */
- if (of_find_node_by_type(NULL, "ibm,pmemory"))
- return (sizeof(phys_addr_t) * 8 <= MAX_PHYSMEM_BITS) ?
- (phys_addr_t) -1 : (1ULL << MAX_PHYSMEM_BITS);
-
for_each_node_by_type(memory, "memory") {
unsigned long start, size;
int n_mem_addr_cells, n_mem_size_cells, len;
@@ -1238,7 +1229,6 @@ static bool enable_ddw(struct pci_dev *dev, struct device_node *pdn)
u32 ddw_avail[DDW_APPLICABLE_SIZE];
struct dma_win *window;
struct property *win64;
- bool ddw_enabled = false;
struct failed_ddw_pdn *fpdn;
bool default_win_removed = false, direct_mapping = false;
bool pmem_present;
@@ -1253,7 +1243,6 @@ static bool enable_ddw(struct pci_dev *dev, struct device_node *pdn)
if (find_existing_ddw(pdn, &dev->dev.archdata.dma_offset, &len)) {
direct_mapping = (len >= max_ram_len);
- ddw_enabled = true;
goto out_unlock;
}
@@ -1367,8 +1356,10 @@ static bool enable_ddw(struct pci_dev *dev, struct device_node *pdn)
len = order_base_2(query.largest_available_block << page_shift);
win_name = DMA64_PROPNAME;
} else {
- direct_mapping = true;
- win_name = DIRECT64_PROPNAME;
+ direct_mapping = !default_win_removed ||
+ (len == MAX_PHYSMEM_BITS) ||
+ (!pmem_present && (len == max_ram_len));
+ win_name = direct_mapping ? DIRECT64_PROPNAME : DMA64_PROPNAME;
}
ret = create_ddw(dev, ddw_avail, &create, page_shift, len);
@@ -1406,8 +1397,8 @@ static bool enable_ddw(struct pci_dev *dev, struct device_node *pdn)
dev_info(&dev->dev, "failed to map DMA window for %pOF: %d\n",
dn, ret);
- /* Make sure to clean DDW if any TCE was set*/
- clean_dma_window(pdn, win64->value);
+ /* Make sure to clean DDW if any TCE was set*/
+ clean_dma_window(pdn, win64->value);
goto out_del_list;
}
} else {
@@ -1454,7 +1445,6 @@ static bool enable_ddw(struct pci_dev *dev, struct device_node *pdn)
spin_unlock(&dma_win_list_lock);
dev->dev.archdata.dma_offset = win_addr;
- ddw_enabled = true;
goto out_unlock;
out_del_list:
@@ -1490,10 +1480,10 @@ out_unlock:
* as RAM, then we failed to create a window to cover persistent
* memory and need to set the DMA limit.
*/
- if (pmem_present && ddw_enabled && direct_mapping && len == max_ram_len)
+ if (pmem_present && direct_mapping && len == max_ram_len)
dev->dev.bus_dma_limit = dev->dev.archdata.dma_offset + (1ULL << len);
- return ddw_enabled && direct_mapping;
+ return direct_mapping;
}
static void pci_dma_dev_setup_pSeriesLP(struct pci_dev *dev)
diff --git a/arch/powerpc/sysdev/xive/Kconfig b/arch/powerpc/sysdev/xive/Kconfig
index 97796c6b63f0..785c292d104b 100644
--- a/arch/powerpc/sysdev/xive/Kconfig
+++ b/arch/powerpc/sysdev/xive/Kconfig
@@ -3,7 +3,6 @@ config PPC_XIVE
bool
select PPC_SMP_MUXED_IPI
select HARDIRQS_SW_RESEND
- select IRQ_DOMAIN_NOMAP
config PPC_XIVE_NATIVE
bool
diff --git a/arch/powerpc/sysdev/xive/common.c b/arch/powerpc/sysdev/xive/common.c
index c5d75c02ad8b..7b69299c2912 100644
--- a/arch/powerpc/sysdev/xive/common.c
+++ b/arch/powerpc/sysdev/xive/common.c
@@ -1443,8 +1443,7 @@ static const struct irq_domain_ops xive_irq_domain_ops = {
static void __init xive_init_host(struct device_node *np)
{
- xive_irq_domain = irq_domain_add_nomap(np, XIVE_MAX_IRQ,
- &xive_irq_domain_ops, NULL);
+ xive_irq_domain = irq_domain_add_tree(np, &xive_irq_domain_ops, NULL);
if (WARN_ON(xive_irq_domain == NULL))
return;
irq_set_default_host(xive_irq_domain);
diff --git a/arch/riscv/Kconfig b/arch/riscv/Kconfig
index a34c531be4e7..821252b65f89 100644
--- a/arch/riscv/Kconfig
+++ b/arch/riscv/Kconfig
@@ -62,6 +62,7 @@ config RISCV
select GENERIC_SCHED_CLOCK
select GENERIC_SMP_IDLE_THREAD
select GENERIC_TIME_VSYSCALL if MMU && 64BIT
+ select GENERIC_VDSO_TIME_NS if HAVE_GENERIC_VDSO
select HAVE_ARCH_AUDITSYSCALL
select HAVE_ARCH_JUMP_LABEL if !XIP_KERNEL
select HAVE_ARCH_JUMP_LABEL_RELATIVE if !XIP_KERNEL
diff --git a/arch/riscv/Makefile b/arch/riscv/Makefile
index 7f19b784e649..8a107ed18b0d 100644
--- a/arch/riscv/Makefile
+++ b/arch/riscv/Makefile
@@ -107,11 +107,13 @@ PHONY += vdso_install
vdso_install:
$(Q)$(MAKE) $(build)=arch/riscv/kernel/vdso $@
+ifeq ($(KBUILD_EXTMOD),)
ifeq ($(CONFIG_MMU),y)
prepare: vdso_prepare
vdso_prepare: prepare0
$(Q)$(MAKE) $(build)=arch/riscv/kernel/vdso include/generated/vdso-offsets.h
endif
+endif
ifneq ($(CONFIG_XIP_KERNEL),y)
ifeq ($(CONFIG_RISCV_M_MODE)$(CONFIG_SOC_CANAAN),yy)
@@ -136,3 +138,13 @@ zinstall: install-image = Image.gz
install zinstall:
$(CONFIG_SHELL) $(srctree)/$(boot)/install.sh $(KERNELRELEASE) \
$(boot)/$(install-image) System.map "$(INSTALL_PATH)"
+
+PHONY += rv32_randconfig
+rv32_randconfig:
+ $(Q)$(MAKE) KCONFIG_ALLCONFIG=$(srctree)/arch/riscv/configs/32-bit.config \
+ -f $(srctree)/Makefile randconfig
+
+PHONY += rv64_randconfig
+rv64_randconfig:
+ $(Q)$(MAKE) KCONFIG_ALLCONFIG=$(srctree)/arch/riscv/configs/64-bit.config \
+ -f $(srctree)/Makefile randconfig
diff --git a/arch/riscv/boot/dts/microchip/microchip-mpfs-icicle-kit.dts b/arch/riscv/boot/dts/microchip/microchip-mpfs-icicle-kit.dts
index b254c60589a1..fc1e5869df1b 100644
--- a/arch/riscv/boot/dts/microchip/microchip-mpfs-icicle-kit.dts
+++ b/arch/riscv/boot/dts/microchip/microchip-mpfs-icicle-kit.dts
@@ -9,10 +9,8 @@
#define RTCCLK_FREQ 1000000
/ {
- #address-cells = <2>;
- #size-cells = <2>;
model = "Microchip PolarFire-SoC Icicle Kit";
- compatible = "microchip,mpfs-icicle-kit";
+ compatible = "microchip,mpfs-icicle-kit", "microchip,mpfs";
aliases {
ethernet0 = &emac1;
@@ -35,9 +33,6 @@
reg = <0x0 0x80000000 0x0 0x40000000>;
clocks = <&clkcfg 26>;
};
-
- soc {
- };
};
&serial0 {
@@ -56,8 +51,17 @@
status = "okay";
};
-&sdcard {
+&mmc {
status = "okay";
+
+ bus-width = <4>;
+ disable-wp;
+ cap-sd-highspeed;
+ card-detect-delay = <200>;
+ sd-uhs-sdr12;
+ sd-uhs-sdr25;
+ sd-uhs-sdr50;
+ sd-uhs-sdr104;
};
&emac0 {
diff --git a/arch/riscv/boot/dts/microchip/microchip-mpfs.dtsi b/arch/riscv/boot/dts/microchip/microchip-mpfs.dtsi
index 9d2fbbc1f777..c9f6d205d2ba 100644
--- a/arch/riscv/boot/dts/microchip/microchip-mpfs.dtsi
+++ b/arch/riscv/boot/dts/microchip/microchip-mpfs.dtsi
@@ -6,8 +6,8 @@
/ {
#address-cells = <2>;
#size-cells = <2>;
- model = "Microchip MPFS Icicle Kit";
- compatible = "microchip,mpfs-icicle-kit";
+ model = "Microchip PolarFire SoC";
+ compatible = "microchip,mpfs";
chosen {
};
@@ -161,7 +161,7 @@
};
clint@2000000 {
- compatible = "sifive,clint0";
+ compatible = "sifive,fu540-c000-clint", "sifive,clint0";
reg = <0x0 0x2000000 0x0 0xC000>;
interrupts-extended = <&cpu0_intc 3 &cpu0_intc 7
&cpu1_intc 3 &cpu1_intc 7
@@ -172,7 +172,7 @@
plic: interrupt-controller@c000000 {
#interrupt-cells = <1>;
- compatible = "sifive,plic-1.0.0";
+ compatible = "sifive,fu540-c000-plic", "sifive,plic-1.0.0";
reg = <0x0 0xc000000 0x0 0x4000000>;
riscv,ndev = <186>;
interrupt-controller;
@@ -262,39 +262,13 @@
status = "disabled";
};
- emmc: mmc@20008000 {
- compatible = "cdns,sd4hc";
+ /* Common node entry for emmc/sd */
+ mmc: mmc@20008000 {
+ compatible = "microchip,mpfs-sd4hc", "cdns,sd4hc";
reg = <0x0 0x20008000 0x0 0x1000>;
interrupt-parent = <&plic>;
interrupts = <88 89>;
- pinctrl-names = "default";
clocks = <&clkcfg 6>;
- bus-width = <4>;
- cap-mmc-highspeed;
- mmc-ddr-3_3v;
- max-frequency = <200000000>;
- non-removable;
- no-sd;
- no-sdio;
- voltage-ranges = <3300 3300>;
- status = "disabled";
- };
-
- sdcard: sdhc@20008000 {
- compatible = "cdns,sd4hc";
- reg = <0x0 0x20008000 0x0 0x1000>;
- interrupt-parent = <&plic>;
- interrupts = <88>;
- pinctrl-names = "default";
- clocks = <&clkcfg 6>;
- bus-width = <4>;
- disable-wp;
- cap-sd-highspeed;
- card-detect-delay = <200>;
- sd-uhs-sdr12;
- sd-uhs-sdr25;
- sd-uhs-sdr50;
- sd-uhs-sdr104;
max-frequency = <200000000>;
status = "disabled";
};
diff --git a/arch/riscv/boot/dts/sifive/fu540-c000.dtsi b/arch/riscv/boot/dts/sifive/fu540-c000.dtsi
index 7db861053483..0655b5c4201d 100644
--- a/arch/riscv/boot/dts/sifive/fu540-c000.dtsi
+++ b/arch/riscv/boot/dts/sifive/fu540-c000.dtsi
@@ -141,7 +141,7 @@
ranges;
plic0: interrupt-controller@c000000 {
#interrupt-cells = <1>;
- compatible = "sifive,plic-1.0.0";
+ compatible = "sifive,fu540-c000-plic", "sifive,plic-1.0.0";
reg = <0x0 0xc000000 0x0 0x4000000>;
riscv,ndev = <53>;
interrupt-controller;
diff --git a/arch/riscv/boot/dts/sifive/hifive-unleashed-a00.dts b/arch/riscv/boot/dts/sifive/hifive-unleashed-a00.dts
index 60846e88ae4b..ba304d4c455c 100644
--- a/arch/riscv/boot/dts/sifive/hifive-unleashed-a00.dts
+++ b/arch/riscv/boot/dts/sifive/hifive-unleashed-a00.dts
@@ -8,10 +8,9 @@
#define RTCCLK_FREQ 1000000
/ {
- #address-cells = <2>;
- #size-cells = <2>;
model = "SiFive HiFive Unleashed A00";
- compatible = "sifive,hifive-unleashed-a00", "sifive,fu540-c000";
+ compatible = "sifive,hifive-unleashed-a00", "sifive,fu540-c000",
+ "sifive,fu540";
chosen {
stdout-path = "serial0";
@@ -26,9 +25,6 @@
reg = <0x0 0x80000000 0x2 0x00000000>;
};
- soc {
- };
-
hfclk: hfclk {
#clock-cells = <0>;
compatible = "fixed-clock";
@@ -63,7 +59,7 @@
&qspi0 {
status = "okay";
flash@0 {
- compatible = "issi,is25wp256", "jedec,spi-nor";
+ compatible = "jedec,spi-nor";
reg = <0>;
spi-max-frequency = <50000000>;
m25p,fast-read;
diff --git a/arch/riscv/boot/dts/sifive/hifive-unmatched-a00.dts b/arch/riscv/boot/dts/sifive/hifive-unmatched-a00.dts
index 2e4ea84f27e7..4f66919215f6 100644
--- a/arch/riscv/boot/dts/sifive/hifive-unmatched-a00.dts
+++ b/arch/riscv/boot/dts/sifive/hifive-unmatched-a00.dts
@@ -8,8 +8,6 @@
#define RTCCLK_FREQ 1000000
/ {
- #address-cells = <2>;
- #size-cells = <2>;
model = "SiFive HiFive Unmatched A00";
compatible = "sifive,hifive-unmatched-a00", "sifive,fu740-c000",
"sifive,fu740";
@@ -27,9 +25,6 @@
reg = <0x0 0x80000000 0x4 0x00000000>;
};
- soc {
- };
-
hfclk: hfclk {
#clock-cells = <0>;
compatible = "fixed-clock";
@@ -211,7 +206,7 @@
&qspi0 {
status = "okay";
flash@0 {
- compatible = "issi,is25wp256", "jedec,spi-nor";
+ compatible = "jedec,spi-nor";
reg = <0>;
spi-max-frequency = <50000000>;
m25p,fast-read;
diff --git a/arch/riscv/configs/32-bit.config b/arch/riscv/configs/32-bit.config
new file mode 100644
index 000000000000..43f41323b67e
--- /dev/null
+++ b/arch/riscv/configs/32-bit.config
@@ -0,0 +1,2 @@
+CONFIG_ARCH_RV32I=y
+CONFIG_32BIT=y
diff --git a/arch/riscv/configs/64-bit.config b/arch/riscv/configs/64-bit.config
new file mode 100644
index 000000000000..313edc554d84
--- /dev/null
+++ b/arch/riscv/configs/64-bit.config
@@ -0,0 +1,2 @@
+CONFIG_ARCH_RV64I=y
+CONFIG_64BIT=y
diff --git a/arch/riscv/configs/defconfig b/arch/riscv/configs/defconfig
index 4ebc80315f01..ef473e2f503b 100644
--- a/arch/riscv/configs/defconfig
+++ b/arch/riscv/configs/defconfig
@@ -19,6 +19,8 @@ CONFIG_SOC_VIRT=y
CONFIG_SOC_MICROCHIP_POLARFIRE=y
CONFIG_SMP=y
CONFIG_HOTPLUG_CPU=y
+CONFIG_VIRTUALIZATION=y
+CONFIG_KVM=m
CONFIG_JUMP_LABEL=y
CONFIG_MODULES=y
CONFIG_MODULE_UNLOAD=y
@@ -72,9 +74,10 @@ CONFIG_GPIOLIB=y
CONFIG_GPIO_SIFIVE=y
# CONFIG_PTP_1588_CLOCK is not set
CONFIG_POWER_RESET=y
-CONFIG_DRM=y
-CONFIG_DRM_RADEON=y
-CONFIG_DRM_VIRTIO_GPU=y
+CONFIG_DRM=m
+CONFIG_DRM_RADEON=m
+CONFIG_DRM_NOUVEAU=m
+CONFIG_DRM_VIRTIO_GPU=m
CONFIG_FRAMEBUFFER_CONSOLE=y
CONFIG_USB=y
CONFIG_USB_XHCI_HCD=y
diff --git a/arch/riscv/configs/rv32_defconfig b/arch/riscv/configs/rv32_defconfig
index 434ef5b64599..6e9f12ff968a 100644
--- a/arch/riscv/configs/rv32_defconfig
+++ b/arch/riscv/configs/rv32_defconfig
@@ -19,6 +19,8 @@ CONFIG_SOC_VIRT=y
CONFIG_ARCH_RV32I=y
CONFIG_SMP=y
CONFIG_HOTPLUG_CPU=y
+CONFIG_VIRTUALIZATION=y
+CONFIG_KVM=m
CONFIG_JUMP_LABEL=y
CONFIG_MODULES=y
CONFIG_MODULE_UNLOAD=y
diff --git a/arch/riscv/include/asm/kvm_host.h b/arch/riscv/include/asm/kvm_host.h
index 25ba21f98504..2639b9ee48f9 100644
--- a/arch/riscv/include/asm/kvm_host.h
+++ b/arch/riscv/include/asm/kvm_host.h
@@ -12,14 +12,12 @@
#include <linux/types.h>
#include <linux/kvm.h>
#include <linux/kvm_types.h>
+#include <asm/csr.h>
#include <asm/kvm_vcpu_fp.h>
#include <asm/kvm_vcpu_timer.h>
-#ifdef CONFIG_64BIT
-#define KVM_MAX_VCPUS (1U << 16)
-#else
-#define KVM_MAX_VCPUS (1U << 9)
-#endif
+#define KVM_MAX_VCPUS \
+ ((HGATP_VMID_MASK >> HGATP_VMID_SHIFT) + 1)
#define KVM_HALT_POLL_NS_DEFAULT 500000
diff --git a/arch/riscv/include/asm/page.h b/arch/riscv/include/asm/page.h
index 109c97e991a6..b3e5ff0125fe 100644
--- a/arch/riscv/include/asm/page.h
+++ b/arch/riscv/include/asm/page.h
@@ -157,6 +157,8 @@ extern phys_addr_t __phys_addr_symbol(unsigned long x);
#define page_to_bus(page) (page_to_phys(page))
#define phys_to_page(paddr) (pfn_to_page(phys_to_pfn(paddr)))
+#define sym_to_pfn(x) __phys_to_pfn(__pa_symbol(x))
+
#ifdef CONFIG_FLATMEM
#define pfn_valid(pfn) \
(((pfn) >= ARCH_PFN_OFFSET) && (((pfn) - ARCH_PFN_OFFSET) < max_mapnr))
diff --git a/arch/riscv/include/asm/pgtable.h b/arch/riscv/include/asm/pgtable.h
index 39b550310ec6..bf204e7c1f74 100644
--- a/arch/riscv/include/asm/pgtable.h
+++ b/arch/riscv/include/asm/pgtable.h
@@ -75,7 +75,8 @@
#endif
#ifdef CONFIG_XIP_KERNEL
-#define XIP_OFFSET SZ_8M
+#define XIP_OFFSET SZ_32M
+#define XIP_OFFSET_MASK (SZ_32M - 1)
#else
#define XIP_OFFSET 0
#endif
@@ -97,7 +98,8 @@
#ifdef CONFIG_XIP_KERNEL
#define XIP_FIXUP(addr) ({ \
uintptr_t __a = (uintptr_t)(addr); \
- (__a >= CONFIG_XIP_PHYS_ADDR && __a < CONFIG_XIP_PHYS_ADDR + SZ_16M) ? \
+ (__a >= CONFIG_XIP_PHYS_ADDR && \
+ __a < CONFIG_XIP_PHYS_ADDR + XIP_OFFSET * 2) ? \
__a - CONFIG_XIP_PHYS_ADDR + CONFIG_PHYS_RAM_BASE - XIP_OFFSET :\
__a; \
})
diff --git a/arch/riscv/include/asm/vdso.h b/arch/riscv/include/asm/vdso.h
index 208e31bc5d1c..bc6f75f3a199 100644
--- a/arch/riscv/include/asm/vdso.h
+++ b/arch/riscv/include/asm/vdso.h
@@ -8,30 +8,19 @@
#ifndef _ASM_RISCV_VDSO_H
#define _ASM_RISCV_VDSO_H
-
-/*
- * All systems with an MMU have a VDSO, but systems without an MMU don't
- * support shared libraries and therefor don't have one.
- */
-#ifdef CONFIG_MMU
-
-#include <linux/types.h>
/*
* All systems with an MMU have a VDSO, but systems without an MMU don't
* support shared libraries and therefor don't have one.
*/
#ifdef CONFIG_MMU
-#define __VVAR_PAGES 1
+#define __VVAR_PAGES 2
#ifndef __ASSEMBLY__
#include <generated/vdso-offsets.h>
#define VDSO_SYMBOL(base, name) \
(void __user *)((unsigned long)(base) + __vdso_##name##_offset)
-
-#endif /* CONFIG_MMU */
-
#endif /* !__ASSEMBLY__ */
#endif /* CONFIG_MMU */
diff --git a/arch/riscv/include/asm/vdso/gettimeofday.h b/arch/riscv/include/asm/vdso/gettimeofday.h
index f839f16e0d2a..77d9c2f721c4 100644
--- a/arch/riscv/include/asm/vdso/gettimeofday.h
+++ b/arch/riscv/include/asm/vdso/gettimeofday.h
@@ -76,6 +76,13 @@ static __always_inline const struct vdso_data *__arch_get_vdso_data(void)
return _vdso_data;
}
+#ifdef CONFIG_TIME_NS
+static __always_inline
+const struct vdso_data *__arch_get_timens_vdso_data(const struct vdso_data *vd)
+{
+ return _timens_data;
+}
+#endif
#endif /* !__ASSEMBLY__ */
#endif /* __ASM_VDSO_GETTIMEOFDAY_H */
diff --git a/arch/riscv/kernel/head.S b/arch/riscv/kernel/head.S
index 25ec50573957..f52f01ecbeea 100644
--- a/arch/riscv/kernel/head.S
+++ b/arch/riscv/kernel/head.S
@@ -20,10 +20,20 @@
REG_L t0, _xip_fixup
add \reg, \reg, t0
.endm
+.macro XIP_FIXUP_FLASH_OFFSET reg
+ la t1, __data_loc
+ li t0, XIP_OFFSET_MASK
+ and t1, t1, t0
+ li t1, XIP_OFFSET
+ sub t0, t0, t1
+ sub \reg, \reg, t0
+.endm
_xip_fixup: .dword CONFIG_PHYS_RAM_BASE - CONFIG_XIP_PHYS_ADDR - XIP_OFFSET
#else
.macro XIP_FIXUP_OFFSET reg
.endm
+.macro XIP_FIXUP_FLASH_OFFSET reg
+.endm
#endif /* CONFIG_XIP_KERNEL */
__HEAD
@@ -267,6 +277,7 @@ pmp_done:
la a3, hart_lottery
mv a2, a3
XIP_FIXUP_OFFSET a2
+ XIP_FIXUP_FLASH_OFFSET a3
lw t1, (a3)
amoswap.w t0, t1, (a2)
/* first time here if hart_lottery in RAM is not set */
@@ -305,6 +316,7 @@ clear_bss_done:
XIP_FIXUP_OFFSET sp
#ifdef CONFIG_BUILTIN_DTB
la a0, __dtb_start
+ XIP_FIXUP_OFFSET a0
#else
mv a0, s1
#endif /* CONFIG_BUILTIN_DTB */
diff --git a/arch/riscv/kernel/reset.c b/arch/riscv/kernel/reset.c
index ee5878d968cc..9c842c41684a 100644
--- a/arch/riscv/kernel/reset.c
+++ b/arch/riscv/kernel/reset.c
@@ -12,7 +12,7 @@ static void default_power_off(void)
wait_for_interrupt();
}
-void (*pm_power_off)(void) = default_power_off;
+void (*pm_power_off)(void) = NULL;
EXPORT_SYMBOL(pm_power_off);
void machine_restart(char *cmd)
@@ -23,10 +23,16 @@ void machine_restart(char *cmd)
void machine_halt(void)
{
- pm_power_off();
+ if (pm_power_off != NULL)
+ pm_power_off();
+ else
+ default_power_off();
}
void machine_power_off(void)
{
- pm_power_off();
+ if (pm_power_off != NULL)
+ pm_power_off();
+ else
+ default_power_off();
}
diff --git a/arch/riscv/kernel/vdso.c b/arch/riscv/kernel/vdso.c
index b70956d80408..a9436a65161a 100644
--- a/arch/riscv/kernel/vdso.c
+++ b/arch/riscv/kernel/vdso.c
@@ -13,6 +13,7 @@
#include <linux/err.h>
#include <asm/page.h>
#include <asm/vdso.h>
+#include <linux/time_namespace.h>
#ifdef CONFIG_GENERIC_TIME_VSYSCALL
#include <vdso/datapage.h>
@@ -25,14 +26,12 @@ extern char vdso_start[], vdso_end[];
enum vvar_pages {
VVAR_DATA_PAGE_OFFSET,
+ VVAR_TIMENS_PAGE_OFFSET,
VVAR_NR_PAGES,
};
#define VVAR_SIZE (VVAR_NR_PAGES << PAGE_SHIFT)
-static unsigned int vdso_pages __ro_after_init;
-static struct page **vdso_pagelist __ro_after_init;
-
/*
* The vDSO data page.
*/
@@ -42,83 +41,228 @@ static union {
} vdso_data_store __page_aligned_data;
struct vdso_data *vdso_data = &vdso_data_store.data;
-static int __init vdso_init(void)
+struct __vdso_info {
+ const char *name;
+ const char *vdso_code_start;
+ const char *vdso_code_end;
+ unsigned long vdso_pages;
+ /* Data Mapping */
+ struct vm_special_mapping *dm;
+ /* Code Mapping */
+ struct vm_special_mapping *cm;
+};
+
+static struct __vdso_info vdso_info __ro_after_init = {
+ .name = "vdso",
+ .vdso_code_start = vdso_start,
+ .vdso_code_end = vdso_end,
+};
+
+static int vdso_mremap(const struct vm_special_mapping *sm,
+ struct vm_area_struct *new_vma)
+{
+ current->mm->context.vdso = (void *)new_vma->vm_start;
+
+ return 0;
+}
+
+static int __init __vdso_init(void)
{
unsigned int i;
+ struct page **vdso_pagelist;
+ unsigned long pfn;
- vdso_pages = (vdso_end - vdso_start) >> PAGE_SHIFT;
- vdso_pagelist =
- kcalloc(vdso_pages + VVAR_NR_PAGES, sizeof(struct page *), GFP_KERNEL);
- if (unlikely(vdso_pagelist == NULL)) {
- pr_err("vdso: pagelist allocation failed\n");
- return -ENOMEM;
+ if (memcmp(vdso_info.vdso_code_start, "\177ELF", 4)) {
+ pr_err("vDSO is not a valid ELF object!\n");
+ return -EINVAL;
}
- for (i = 0; i < vdso_pages; i++) {
- struct page *pg;
+ vdso_info.vdso_pages = (
+ vdso_info.vdso_code_end -
+ vdso_info.vdso_code_start) >>
+ PAGE_SHIFT;
+
+ vdso_pagelist = kcalloc(vdso_info.vdso_pages,
+ sizeof(struct page *),
+ GFP_KERNEL);
+ if (vdso_pagelist == NULL)
+ return -ENOMEM;
+
+ /* Grab the vDSO code pages. */
+ pfn = sym_to_pfn(vdso_info.vdso_code_start);
+
+ for (i = 0; i < vdso_info.vdso_pages; i++)
+ vdso_pagelist[i] = pfn_to_page(pfn + i);
+
+ vdso_info.cm->pages = vdso_pagelist;
+
+ return 0;
+}
+
+#ifdef CONFIG_TIME_NS
+struct vdso_data *arch_get_vdso_data(void *vvar_page)
+{
+ return (struct vdso_data *)(vvar_page);
+}
+
+/*
+ * The vvar mapping contains data for a specific time namespace, so when a task
+ * changes namespace we must unmap its vvar data for the old namespace.
+ * Subsequent faults will map in data for the new namespace.
+ *
+ * For more details see timens_setup_vdso_data().
+ */
+int vdso_join_timens(struct task_struct *task, struct time_namespace *ns)
+{
+ struct mm_struct *mm = task->mm;
+ struct vm_area_struct *vma;
+
+ mmap_read_lock(mm);
- pg = virt_to_page(vdso_start + (i << PAGE_SHIFT));
- vdso_pagelist[i] = pg;
+ for (vma = mm->mmap; vma; vma = vma->vm_next) {
+ unsigned long size = vma->vm_end - vma->vm_start;
+
+ if (vma_is_special_mapping(vma, vdso_info.dm))
+ zap_page_range(vma, vma->vm_start, size);
}
- vdso_pagelist[i] = virt_to_page(vdso_data);
+ mmap_read_unlock(mm);
return 0;
}
+
+static struct page *find_timens_vvar_page(struct vm_area_struct *vma)
+{
+ if (likely(vma->vm_mm == current->mm))
+ return current->nsproxy->time_ns->vvar_page;
+
+ /*
+ * VM_PFNMAP | VM_IO protect .fault() handler from being called
+ * through interfaces like /proc/$pid/mem or
+ * process_vm_{readv,writev}() as long as there's no .access()
+ * in special_mapping_vmops.
+ * For more details check_vma_flags() and __access_remote_vm()
+ */
+ WARN(1, "vvar_page accessed remotely");
+
+ return NULL;
+}
+#else
+static struct page *find_timens_vvar_page(struct vm_area_struct *vma)
+{
+ return NULL;
+}
+#endif
+
+static vm_fault_t vvar_fault(const struct vm_special_mapping *sm,
+ struct vm_area_struct *vma, struct vm_fault *vmf)
+{
+ struct page *timens_page = find_timens_vvar_page(vma);
+ unsigned long pfn;
+
+ switch (vmf->pgoff) {
+ case VVAR_DATA_PAGE_OFFSET:
+ if (timens_page)
+ pfn = page_to_pfn(timens_page);
+ else
+ pfn = sym_to_pfn(vdso_data);
+ break;
+#ifdef CONFIG_TIME_NS
+ case VVAR_TIMENS_PAGE_OFFSET:
+ /*
+ * If a task belongs to a time namespace then a namespace
+ * specific VVAR is mapped with the VVAR_DATA_PAGE_OFFSET and
+ * the real VVAR page is mapped with the VVAR_TIMENS_PAGE_OFFSET
+ * offset.
+ * See also the comment near timens_setup_vdso_data().
+ */
+ if (!timens_page)
+ return VM_FAULT_SIGBUS;
+ pfn = sym_to_pfn(vdso_data);
+ break;
+#endif /* CONFIG_TIME_NS */
+ default:
+ return VM_FAULT_SIGBUS;
+ }
+
+ return vmf_insert_pfn(vma, vmf->address, pfn);
+}
+
+enum rv_vdso_map {
+ RV_VDSO_MAP_VVAR,
+ RV_VDSO_MAP_VDSO,
+};
+
+static struct vm_special_mapping rv_vdso_maps[] __ro_after_init = {
+ [RV_VDSO_MAP_VVAR] = {
+ .name = "[vvar]",
+ .fault = vvar_fault,
+ },
+ [RV_VDSO_MAP_VDSO] = {
+ .name = "[vdso]",
+ .mremap = vdso_mremap,
+ },
+};
+
+static int __init vdso_init(void)
+{
+ vdso_info.dm = &rv_vdso_maps[RV_VDSO_MAP_VVAR];
+ vdso_info.cm = &rv_vdso_maps[RV_VDSO_MAP_VDSO];
+
+ return __vdso_init();
+}
arch_initcall(vdso_init);
-int arch_setup_additional_pages(struct linux_binprm *bprm,
- int uses_interp)
+static int __setup_additional_pages(struct mm_struct *mm,
+ struct linux_binprm *bprm,
+ int uses_interp)
{
- struct mm_struct *mm = current->mm;
- unsigned long vdso_base, vdso_len;
- int ret;
+ unsigned long vdso_base, vdso_text_len, vdso_mapping_len;
+ void *ret;
BUILD_BUG_ON(VVAR_NR_PAGES != __VVAR_PAGES);
- vdso_len = (vdso_pages + VVAR_NR_PAGES) << PAGE_SHIFT;
+ vdso_text_len = vdso_info.vdso_pages << PAGE_SHIFT;
+ /* Be sure to map the data page */
+ vdso_mapping_len = vdso_text_len + VVAR_SIZE;
- if (mmap_write_lock_killable(mm))
- return -EINTR;
-
- vdso_base = get_unmapped_area(NULL, 0, vdso_len, 0, 0);
+ vdso_base = get_unmapped_area(NULL, 0, vdso_mapping_len, 0, 0);
if (IS_ERR_VALUE(vdso_base)) {
- ret = vdso_base;
- goto end;
+ ret = ERR_PTR(vdso_base);
+ goto up_fail;
}
- mm->context.vdso = NULL;
- ret = install_special_mapping(mm, vdso_base, VVAR_SIZE,
- (VM_READ | VM_MAYREAD), &vdso_pagelist[vdso_pages]);
- if (unlikely(ret))
- goto end;
+ ret = _install_special_mapping(mm, vdso_base, VVAR_SIZE,
+ (VM_READ | VM_MAYREAD | VM_PFNMAP), vdso_info.dm);
+ if (IS_ERR(ret))
+ goto up_fail;
+ vdso_base += VVAR_SIZE;
+ mm->context.vdso = (void *)vdso_base;
ret =
- install_special_mapping(mm, vdso_base + VVAR_SIZE,
- vdso_pages << PAGE_SHIFT,
+ _install_special_mapping(mm, vdso_base, vdso_text_len,
(VM_READ | VM_EXEC | VM_MAYREAD | VM_MAYWRITE | VM_MAYEXEC),
- vdso_pagelist);
+ vdso_info.cm);
- if (unlikely(ret))
- goto end;
+ if (IS_ERR(ret))
+ goto up_fail;
- /*
- * Put vDSO base into mm struct. We need to do this before calling
- * install_special_mapping or the perf counter mmap tracking code
- * will fail to recognise it as a vDSO (since arch_vma_name fails).
- */
- mm->context.vdso = (void *)vdso_base + VVAR_SIZE;
+ return 0;
-end:
- mmap_write_unlock(mm);
- return ret;
+up_fail:
+ mm->context.vdso = NULL;
+ return PTR_ERR(ret);
}
-const char *arch_vma_name(struct vm_area_struct *vma)
+int arch_setup_additional_pages(struct linux_binprm *bprm, int uses_interp)
{
- if (vma->vm_mm && (vma->vm_start == (long)vma->vm_mm->context.vdso))
- return "[vdso]";
- if (vma->vm_mm && (vma->vm_start ==
- (long)vma->vm_mm->context.vdso - VVAR_SIZE))
- return "[vdso_data]";
- return NULL;
+ struct mm_struct *mm = current->mm;
+ int ret;
+
+ if (mmap_write_lock_killable(mm))
+ return -EINTR;
+
+ ret = __setup_additional_pages(mm, bprm, uses_interp);
+ mmap_write_unlock(mm);
+
+ return ret;
}
diff --git a/arch/riscv/kernel/vdso/vdso.lds.S b/arch/riscv/kernel/vdso/vdso.lds.S
index e9111f700af0..01d94aae5bf5 100644
--- a/arch/riscv/kernel/vdso/vdso.lds.S
+++ b/arch/riscv/kernel/vdso/vdso.lds.S
@@ -10,6 +10,9 @@ OUTPUT_ARCH(riscv)
SECTIONS
{
PROVIDE(_vdso_data = . - __VVAR_PAGES * PAGE_SIZE);
+#ifdef CONFIG_TIME_NS
+ PROVIDE(_timens_data = _vdso_data + PAGE_SIZE);
+#endif
. = SIZEOF_HEADERS;
.hash : { *(.hash) } :text
diff --git a/arch/riscv/kernel/vmlinux-xip.lds.S b/arch/riscv/kernel/vmlinux-xip.lds.S
index 9c9f35091ef0..f5ed08262139 100644
--- a/arch/riscv/kernel/vmlinux-xip.lds.S
+++ b/arch/riscv/kernel/vmlinux-xip.lds.S
@@ -64,8 +64,11 @@ SECTIONS
/*
* From this point, stuff is considered writable and will be copied to RAM
*/
- __data_loc = ALIGN(16); /* location in file */
- . = LOAD_OFFSET + XIP_OFFSET; /* location in memory */
+ __data_loc = ALIGN(PAGE_SIZE); /* location in file */
+ . = KERNEL_LINK_ADDR + XIP_OFFSET; /* location in memory */
+
+#undef LOAD_OFFSET
+#define LOAD_OFFSET (KERNEL_LINK_ADDR + XIP_OFFSET - (__data_loc & XIP_OFFSET_MASK))
_sdata = .; /* Start of data section */
_data = .;
@@ -96,7 +99,6 @@ SECTIONS
KEEP(*(__soc_builtin_dtb_table))
__soc_builtin_dtb_table_end = .;
}
- PERCPU_SECTION(L1_CACHE_BYTES)
. = ALIGN(8);
.alternative : {
@@ -122,6 +124,8 @@ SECTIONS
BSS_SECTION(PAGE_SIZE, PAGE_SIZE, 0)
+ PERCPU_SECTION(L1_CACHE_BYTES)
+
.rel.dyn : AT(ADDR(.rel.dyn) - LOAD_OFFSET) {
*(.rel.dyn*)
}
diff --git a/arch/riscv/kvm/mmu.c b/arch/riscv/kvm/mmu.c
index d81bae8eb55e..fc058ff5f4b6 100644
--- a/arch/riscv/kvm/mmu.c
+++ b/arch/riscv/kvm/mmu.c
@@ -453,6 +453,12 @@ void kvm_arch_flush_shadow_all(struct kvm *kvm)
void kvm_arch_flush_shadow_memslot(struct kvm *kvm,
struct kvm_memory_slot *slot)
{
+ gpa_t gpa = slot->base_gfn << PAGE_SHIFT;
+ phys_addr_t size = slot->npages << PAGE_SHIFT;
+
+ spin_lock(&kvm->mmu_lock);
+ stage2_unmap_range(kvm, gpa, size, false);
+ spin_unlock(&kvm->mmu_lock);
}
void kvm_arch_commit_memory_region(struct kvm *kvm,
diff --git a/arch/riscv/kvm/vcpu.c b/arch/riscv/kvm/vcpu.c
index e3d3aed46184..fb84619df012 100644
--- a/arch/riscv/kvm/vcpu.c
+++ b/arch/riscv/kvm/vcpu.c
@@ -740,7 +740,7 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu)
* Ensure we set mode to IN_GUEST_MODE after we disable
* interrupts and before the final VCPU requests check.
* See the comment in kvm_vcpu_exiting_guest_mode() and
- * Documentation/virtual/kvm/vcpu-requests.rst
+ * Documentation/virt/kvm/vcpu-requests.rst
*/
vcpu->mode = IN_GUEST_MODE;
diff --git a/arch/riscv/kvm/vcpu_sbi.c b/arch/riscv/kvm/vcpu_sbi.c
index eb3c045edf11..3b0e703d22cf 100644
--- a/arch/riscv/kvm/vcpu_sbi.c
+++ b/arch/riscv/kvm/vcpu_sbi.c
@@ -1,5 +1,5 @@
// SPDX-License-Identifier: GPL-2.0
-/**
+/*
* Copyright (c) 2019 Western Digital Corporation or its affiliates.
*
* Authors:
diff --git a/arch/riscv/kvm/vm.c b/arch/riscv/kvm/vm.c
index 26399df15b63..fb18af34a4b5 100644
--- a/arch/riscv/kvm/vm.c
+++ b/arch/riscv/kvm/vm.c
@@ -74,7 +74,7 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext)
r = 1;
break;
case KVM_CAP_NR_VCPUS:
- r = num_online_cpus();
+ r = min_t(unsigned int, num_online_cpus(), KVM_MAX_VCPUS);
break;
case KVM_CAP_MAX_VCPUS:
r = KVM_MAX_VCPUS;
diff --git a/arch/riscv/mm/context.c b/arch/riscv/mm/context.c
index ee3459cb6750..ea54cc0c9106 100644
--- a/arch/riscv/mm/context.c
+++ b/arch/riscv/mm/context.c
@@ -233,8 +233,10 @@ static int __init asids_init(void)
local_flush_tlb_all();
/* Pre-compute ASID details */
- num_asids = 1 << asid_bits;
- asid_mask = num_asids - 1;
+ if (asid_bits) {
+ num_asids = 1 << asid_bits;
+ asid_mask = num_asids - 1;
+ }
/*
* Use ASID allocator only if number of HW ASIDs are
@@ -255,7 +257,7 @@ static int __init asids_init(void)
pr_info("ASID allocator using %lu bits (%lu entries)\n",
asid_bits, num_asids);
} else {
- pr_info("ASID allocator disabled\n");
+ pr_info("ASID allocator disabled (%lu bits)\n", asid_bits);
}
return 0;
diff --git a/arch/riscv/mm/init.c b/arch/riscv/mm/init.c
index c0cddf0fc22d..24b2b8044602 100644
--- a/arch/riscv/mm/init.c
+++ b/arch/riscv/mm/init.c
@@ -41,7 +41,7 @@ phys_addr_t phys_ram_base __ro_after_init;
EXPORT_SYMBOL(phys_ram_base);
#ifdef CONFIG_XIP_KERNEL
-extern char _xiprom[], _exiprom[];
+extern char _xiprom[], _exiprom[], __data_loc;
#endif
unsigned long empty_zero_page[PAGE_SIZE / sizeof(unsigned long)]
@@ -454,10 +454,9 @@ static uintptr_t __init best_map_size(phys_addr_t base, phys_addr_t size)
/* called from head.S with MMU off */
asmlinkage void __init __copy_data(void)
{
- void *from = (void *)(&_sdata);
- void *end = (void *)(&_end);
+ void *from = (void *)(&__data_loc);
void *to = (void *)CONFIG_PHYS_RAM_BASE;
- size_t sz = (size_t)(end - from + 1);
+ size_t sz = (size_t)((uintptr_t)(&_end) - (uintptr_t)(&_sdata));
memcpy(to, from, sz);
}
diff --git a/arch/s390/Kconfig b/arch/s390/Kconfig
index 8857ec3b97eb..2a5bb4f29cfe 100644
--- a/arch/s390/Kconfig
+++ b/arch/s390/Kconfig
@@ -47,7 +47,7 @@ config ARCH_SUPPORTS_UPROBES
config KASAN_SHADOW_OFFSET
hex
depends on KASAN
- default 0x18000000000000
+ default 0x1C000000000000
config S390
def_bool y
@@ -194,6 +194,7 @@ config S390
select HAVE_RELIABLE_STACKTRACE
select HAVE_RSEQ
select HAVE_SAMPLE_FTRACE_DIRECT
+ select HAVE_SAMPLE_FTRACE_DIRECT_MULTI
select HAVE_SOFTIRQ_ON_OWN_STACK
select HAVE_SYSCALL_TRACEPOINTS
select HAVE_VIRT_CPU_ACCOUNTING
diff --git a/arch/s390/Makefile b/arch/s390/Makefile
index 69c45f600273..609e3697324b 100644
--- a/arch/s390/Makefile
+++ b/arch/s390/Makefile
@@ -77,10 +77,12 @@ KBUILD_AFLAGS_DECOMPRESSOR += $(aflags-y)
KBUILD_CFLAGS_DECOMPRESSOR += $(cflags-y)
ifneq ($(call cc-option,-mstack-size=8192 -mstack-guard=128),)
-cflags-$(CONFIG_CHECK_STACK) += -mstack-size=$(STACK_SIZE)
-ifeq ($(call cc-option,-mstack-size=8192),)
-cflags-$(CONFIG_CHECK_STACK) += -mstack-guard=$(CONFIG_STACK_GUARD)
-endif
+ CC_FLAGS_CHECK_STACK := -mstack-size=$(STACK_SIZE)
+ ifeq ($(call cc-option,-mstack-size=8192),)
+ CC_FLAGS_CHECK_STACK += -mstack-guard=$(CONFIG_STACK_GUARD)
+ endif
+ export CC_FLAGS_CHECK_STACK
+ cflags-$(CONFIG_CHECK_STACK) += $(CC_FLAGS_CHECK_STACK)
endif
ifdef CONFIG_EXPOLINE
diff --git a/arch/s390/boot/startup.c b/arch/s390/boot/startup.c
index 7571dee72a0c..1aa11a8f57dd 100644
--- a/arch/s390/boot/startup.c
+++ b/arch/s390/boot/startup.c
@@ -149,82 +149,56 @@ static void setup_ident_map_size(unsigned long max_physmem_end)
static void setup_kernel_memory_layout(void)
{
- bool vmalloc_size_verified = false;
- unsigned long vmemmap_off;
- unsigned long vspace_left;
+ unsigned long vmemmap_start;
unsigned long rte_size;
unsigned long pages;
- unsigned long vmax;
pages = ident_map_size / PAGE_SIZE;
/* vmemmap contains a multiple of PAGES_PER_SECTION struct pages */
vmemmap_size = SECTION_ALIGN_UP(pages) * sizeof(struct page);
/* choose kernel address space layout: 4 or 3 levels. */
- vmemmap_off = round_up(ident_map_size, _REGION3_SIZE);
+ vmemmap_start = round_up(ident_map_size, _REGION3_SIZE);
if (IS_ENABLED(CONFIG_KASAN) ||
vmalloc_size > _REGION2_SIZE ||
- vmemmap_off + vmemmap_size + vmalloc_size + MODULES_LEN > _REGION2_SIZE)
- vmax = _REGION1_SIZE;
- else
- vmax = _REGION2_SIZE;
-
- /* keep vmemmap_off aligned to a top level region table entry */
- rte_size = vmax == _REGION1_SIZE ? _REGION2_SIZE : _REGION3_SIZE;
- MODULES_END = vmax;
- if (is_prot_virt_host()) {
- /*
- * forcing modules and vmalloc area under the ultravisor
- * secure storage limit, so that any vmalloc allocation
- * we do could be used to back secure guest storage.
- */
- adjust_to_uv_max(&MODULES_END);
- }
-
-#ifdef CONFIG_KASAN
- if (MODULES_END < vmax) {
- /* force vmalloc and modules below kasan shadow */
- MODULES_END = min(MODULES_END, KASAN_SHADOW_START);
+ vmemmap_start + vmemmap_size + vmalloc_size + MODULES_LEN >
+ _REGION2_SIZE) {
+ MODULES_END = _REGION1_SIZE;
+ rte_size = _REGION2_SIZE;
} else {
- /*
- * leave vmalloc and modules above kasan shadow but make
- * sure they don't overlap with it
- */
- vmalloc_size = min(vmalloc_size, vmax - KASAN_SHADOW_END - MODULES_LEN);
- vmalloc_size_verified = true;
- vspace_left = KASAN_SHADOW_START;
+ MODULES_END = _REGION2_SIZE;
+ rte_size = _REGION3_SIZE;
}
+ /*
+ * forcing modules and vmalloc area under the ultravisor
+ * secure storage limit, so that any vmalloc allocation
+ * we do could be used to back secure guest storage.
+ */
+ adjust_to_uv_max(&MODULES_END);
+#ifdef CONFIG_KASAN
+ /* force vmalloc and modules below kasan shadow */
+ MODULES_END = min(MODULES_END, KASAN_SHADOW_START);
#endif
MODULES_VADDR = MODULES_END - MODULES_LEN;
VMALLOC_END = MODULES_VADDR;
- if (vmalloc_size_verified) {
- VMALLOC_START = VMALLOC_END - vmalloc_size;
- } else {
- vmemmap_off = round_up(ident_map_size, rte_size);
-
- if (vmemmap_off + vmemmap_size > VMALLOC_END ||
- vmalloc_size > VMALLOC_END - vmemmap_off - vmemmap_size) {
- /*
- * allow vmalloc area to occupy up to 1/2 of
- * the rest virtual space left.
- */
- vmalloc_size = min(vmalloc_size, VMALLOC_END / 2);
- }
- VMALLOC_START = VMALLOC_END - vmalloc_size;
- vspace_left = VMALLOC_START;
- }
+ /* allow vmalloc area to occupy up to about 1/2 of the rest virtual space left */
+ vmalloc_size = min(vmalloc_size, round_down(VMALLOC_END / 2, _REGION3_SIZE));
+ VMALLOC_START = VMALLOC_END - vmalloc_size;
- pages = vspace_left / (PAGE_SIZE + sizeof(struct page));
+ /* split remaining virtual space between 1:1 mapping & vmemmap array */
+ pages = VMALLOC_START / (PAGE_SIZE + sizeof(struct page));
pages = SECTION_ALIGN_UP(pages);
- vmemmap_off = round_up(vspace_left - pages * sizeof(struct page), rte_size);
- /* keep vmemmap left most starting from a fresh region table entry */
- vmemmap_off = min(vmemmap_off, round_up(ident_map_size, rte_size));
- /* take care that identity map is lower then vmemmap */
- ident_map_size = min(ident_map_size, vmemmap_off);
+ /* keep vmemmap_start aligned to a top level region table entry */
+ vmemmap_start = round_down(VMALLOC_START - pages * sizeof(struct page), rte_size);
+ /* vmemmap_start is the future VMEM_MAX_PHYS, make sure it is within MAX_PHYSMEM */
+ vmemmap_start = min(vmemmap_start, 1UL << MAX_PHYSMEM_BITS);
+ /* make sure identity map doesn't overlay with vmemmap */
+ ident_map_size = min(ident_map_size, vmemmap_start);
vmemmap_size = SECTION_ALIGN_UP(ident_map_size / PAGE_SIZE) * sizeof(struct page);
- VMALLOC_START = max(vmemmap_off + vmemmap_size, VMALLOC_START);
- vmemmap = (struct page *)vmemmap_off;
+ /* make sure vmemmap doesn't overlay with vmalloc area */
+ VMALLOC_START = max(vmemmap_start + vmemmap_size, VMALLOC_START);
+ vmemmap = (struct page *)vmemmap_start;
}
/*
diff --git a/arch/s390/include/asm/kexec.h b/arch/s390/include/asm/kexec.h
index ea398a05f643..7f3c9ac34bd8 100644
--- a/arch/s390/include/asm/kexec.h
+++ b/arch/s390/include/asm/kexec.h
@@ -74,6 +74,12 @@ void *kexec_file_add_components(struct kimage *image,
int arch_kexec_do_relocs(int r_type, void *loc, unsigned long val,
unsigned long addr);
+#define ARCH_HAS_KIMAGE_ARCH
+
+struct kimage_arch {
+ void *ipl_buf;
+};
+
extern const struct kexec_file_ops s390_kexec_image_ops;
extern const struct kexec_file_ops s390_kexec_elf_ops;
diff --git a/arch/s390/include/asm/pci.h b/arch/s390/include/asm/pci.h
index 6b3c366af78e..90824be5ce9a 100644
--- a/arch/s390/include/asm/pci.h
+++ b/arch/s390/include/asm/pci.h
@@ -210,9 +210,11 @@ int zpci_deconfigure_device(struct zpci_dev *zdev);
void zpci_device_reserved(struct zpci_dev *zdev);
bool zpci_is_device_configured(struct zpci_dev *zdev);
+int zpci_hot_reset_device(struct zpci_dev *zdev);
int zpci_register_ioat(struct zpci_dev *, u8, u64, u64, u64);
int zpci_unregister_ioat(struct zpci_dev *, u8);
void zpci_remove_reserved_devices(void);
+void zpci_update_fh(struct zpci_dev *zdev, u32 fh);
/* CLP */
int clp_setup_writeback_mio(void);
@@ -294,8 +296,10 @@ void zpci_debug_exit(void);
void zpci_debug_init_device(struct zpci_dev *, const char *);
void zpci_debug_exit_device(struct zpci_dev *);
-/* Error reporting */
+/* Error handling */
int zpci_report_error(struct pci_dev *, struct zpci_report_error_header *);
+int zpci_clear_error_state(struct zpci_dev *zdev);
+int zpci_reset_load_store_blocked(struct zpci_dev *zdev);
#ifdef CONFIG_NUMA
diff --git a/arch/s390/kernel/crash_dump.c b/arch/s390/kernel/crash_dump.c
index d72a6df058d7..785d54c9350c 100644
--- a/arch/s390/kernel/crash_dump.c
+++ b/arch/s390/kernel/crash_dump.c
@@ -191,8 +191,8 @@ static int copy_oldmem_user(void __user *dst, void *src, size_t count)
return rc;
} else {
/* Check for swapped kdump oldmem areas */
- if (oldmem_data.start && from - oldmem_data.size < oldmem_data.size) {
- from -= oldmem_data.size;
+ if (oldmem_data.start && from - oldmem_data.start < oldmem_data.size) {
+ from -= oldmem_data.start;
len = min(count, oldmem_data.size - from);
} else if (oldmem_data.start && from < oldmem_data.size) {
len = min(count, oldmem_data.size - from);
diff --git a/arch/s390/kernel/ipl.c b/arch/s390/kernel/ipl.c
index e2cc35775b99..5ad1dde23dc5 100644
--- a/arch/s390/kernel/ipl.c
+++ b/arch/s390/kernel/ipl.c
@@ -2156,7 +2156,7 @@ void *ipl_report_finish(struct ipl_report *report)
buf = vzalloc(report->size);
if (!buf)
- return ERR_PTR(-ENOMEM);
+ goto out;
ptr = buf;
memcpy(ptr, report->ipib, report->ipib->hdr.len);
@@ -2195,6 +2195,7 @@ void *ipl_report_finish(struct ipl_report *report)
}
BUG_ON(ptr > buf + report->size);
+out:
return buf;
}
diff --git a/arch/s390/kernel/machine_kexec_file.c b/arch/s390/kernel/machine_kexec_file.c
index 528edff085d9..9975ad200d74 100644
--- a/arch/s390/kernel/machine_kexec_file.c
+++ b/arch/s390/kernel/machine_kexec_file.c
@@ -12,6 +12,7 @@
#include <linux/kexec.h>
#include <linux/module_signature.h>
#include <linux/verification.h>
+#include <linux/vmalloc.h>
#include <asm/boot_data.h>
#include <asm/ipl.h>
#include <asm/setup.h>
@@ -170,6 +171,7 @@ static int kexec_file_add_ipl_report(struct kimage *image,
struct kexec_buf buf;
unsigned long addr;
void *ptr, *end;
+ int ret;
buf.image = image;
@@ -199,9 +201,13 @@ static int kexec_file_add_ipl_report(struct kimage *image,
ptr += len;
}
+ ret = -ENOMEM;
buf.buffer = ipl_report_finish(data->report);
+ if (!buf.buffer)
+ goto out;
buf.bufsz = data->report->size;
buf.memsz = buf.bufsz;
+ image->arch.ipl_buf = buf.buffer;
data->memsz += buf.memsz;
@@ -209,7 +215,9 @@ static int kexec_file_add_ipl_report(struct kimage *image,
data->kernel_buf + offsetof(struct lowcore, ipl_parmblock_ptr);
*lc_ipl_parmblock_ptr = (__u32)buf.mem;
- return kexec_add_buffer(&buf);
+ ret = kexec_add_buffer(&buf);
+out:
+ return ret;
}
void *kexec_file_add_components(struct kimage *image,
@@ -322,3 +330,11 @@ int arch_kexec_apply_relocations_add(struct purgatory_info *pi,
}
return 0;
}
+
+int arch_kimage_file_post_load_cleanup(struct kimage *image)
+{
+ vfree(image->arch.ipl_buf);
+ image->arch.ipl_buf = NULL;
+
+ return kexec_image_post_load_cleanup_default(image);
+}
diff --git a/arch/s390/kernel/perf_cpum_cf.c b/arch/s390/kernel/perf_cpum_cf.c
index 6f431fa9e4d7..ee8707abdb6a 100644
--- a/arch/s390/kernel/perf_cpum_cf.c
+++ b/arch/s390/kernel/perf_cpum_cf.c
@@ -687,8 +687,10 @@ static void cpumf_pmu_stop(struct perf_event *event, int flags)
false);
if (cfdiag_diffctr(cpuhw, event->hw.config_base))
cfdiag_push_sample(event, cpuhw);
- } else
+ } else if (cpuhw->flags & PMU_F_RESERVED) {
+ /* Only update when PMU not hotplugged off */
hw_perf_event_update(event);
+ }
hwc->state |= PERF_HES_UPTODATE;
}
}
diff --git a/arch/s390/kernel/setup.c b/arch/s390/kernel/setup.c
index 40405f2304f1..225ab2d0a4c6 100644
--- a/arch/s390/kernel/setup.c
+++ b/arch/s390/kernel/setup.c
@@ -606,7 +606,7 @@ static void __init setup_resources(void)
static void __init setup_memory_end(void)
{
- memblock_remove(ident_map_size, ULONG_MAX);
+ memblock_remove(ident_map_size, PHYS_ADDR_MAX - ident_map_size);
max_pfn = max_low_pfn = PFN_DOWN(ident_map_size);
pr_notice("The maximum memory size is %luMB\n", ident_map_size >> 20);
}
@@ -638,14 +638,6 @@ static struct notifier_block kdump_mem_nb = {
#endif
/*
- * Make sure that the area above identity mapping is protected
- */
-static void __init reserve_above_ident_map(void)
-{
- memblock_reserve(ident_map_size, ULONG_MAX);
-}
-
-/*
* Reserve memory for kdump kernel to be loaded with kexec
*/
static void __init reserve_crashkernel(void)
@@ -785,7 +777,6 @@ static void __init memblock_add_mem_detect_info(void)
}
memblock_set_bottom_up(false);
memblock_set_node(0, ULONG_MAX, &memblock.memory, 0);
- memblock_dump_all();
}
/*
@@ -826,9 +817,6 @@ static void __init setup_memory(void)
storage_key_init_range(start, end);
psw_set_key(PAGE_DEFAULT_KEY);
-
- /* Only cosmetics */
- memblock_enforce_memory_limit(memblock_end_of_DRAM());
}
static void __init relocate_amode31_section(void)
@@ -999,24 +987,24 @@ void __init setup_arch(char **cmdline_p)
setup_control_program_code();
/* Do some memory reservations *before* memory is added to memblock */
- reserve_above_ident_map();
reserve_kernel();
reserve_initrd();
reserve_certificate_list();
reserve_mem_detect_info();
+ memblock_set_current_limit(ident_map_size);
memblock_allow_resize();
/* Get information about *all* installed memory */
memblock_add_mem_detect_info();
free_mem_detect_info();
+ setup_memory_end();
+ memblock_dump_all();
+ setup_memory();
relocate_amode31_section();
setup_cr();
-
setup_uv();
- setup_memory_end();
- setup_memory();
dma_contiguous_reserve(ident_map_size);
vmcp_cma_reserve();
if (MACHINE_HAS_EDAT2)
diff --git a/arch/s390/kernel/syscalls/syscall.tbl b/arch/s390/kernel/syscalls/syscall.tbl
index df5261e5cfe1..ed9c5c2eafad 100644
--- a/arch/s390/kernel/syscalls/syscall.tbl
+++ b/arch/s390/kernel/syscalls/syscall.tbl
@@ -451,3 +451,4 @@
446 common landlock_restrict_self sys_landlock_restrict_self sys_landlock_restrict_self
# 447 reserved for memfd_secret
448 common process_mrelease sys_process_mrelease sys_process_mrelease
+449 common futex_waitv sys_futex_waitv sys_futex_waitv
diff --git a/arch/s390/kernel/traps.c b/arch/s390/kernel/traps.c
index 035705c9f23e..2b780786fc68 100644
--- a/arch/s390/kernel/traps.c
+++ b/arch/s390/kernel/traps.c
@@ -84,7 +84,7 @@ static void default_trap_handler(struct pt_regs *regs)
{
if (user_mode(regs)) {
report_user_fault(regs, SIGSEGV, 0);
- force_fatal_sig(SIGSEGV);
+ force_exit_sig(SIGSEGV);
} else
die(regs, "Unknown program exception");
}
diff --git a/arch/s390/kernel/vdso32/Makefile b/arch/s390/kernel/vdso32/Makefile
index e3e6ac5686df..245bddfe9bc0 100644
--- a/arch/s390/kernel/vdso32/Makefile
+++ b/arch/s390/kernel/vdso32/Makefile
@@ -22,7 +22,7 @@ KBUILD_AFLAGS_32 += -m31 -s
KBUILD_CFLAGS_32 := $(filter-out -m64,$(KBUILD_CFLAGS))
KBUILD_CFLAGS_32 += -m31 -fPIC -shared -fno-common -fno-builtin
-LDFLAGS_vdso32.so.dbg += -fPIC -shared -nostdlib -soname=linux-vdso32.so.1 \
+LDFLAGS_vdso32.so.dbg += -fPIC -shared -soname=linux-vdso32.so.1 \
--hash-style=both --build-id=sha1 -melf_s390 -T
$(targets:%=$(obj)/%.dbg): KBUILD_CFLAGS = $(KBUILD_CFLAGS_32)
diff --git a/arch/s390/kernel/vdso64/Makefile b/arch/s390/kernel/vdso64/Makefile
index 6568de236701..9e2b95a222a9 100644
--- a/arch/s390/kernel/vdso64/Makefile
+++ b/arch/s390/kernel/vdso64/Makefile
@@ -8,8 +8,9 @@ ARCH_REL_TYPE_ABS += R_390_GOT|R_390_PLT
include $(srctree)/lib/vdso/Makefile
obj-vdso64 = vdso_user_wrapper.o note.o
obj-cvdso64 = vdso64_generic.o getcpu.o
-CFLAGS_REMOVE_getcpu.o = -pg $(CC_FLAGS_FTRACE) $(CC_FLAGS_EXPOLINE)
-CFLAGS_REMOVE_vdso64_generic.o = -pg $(CC_FLAGS_FTRACE) $(CC_FLAGS_EXPOLINE)
+VDSO_CFLAGS_REMOVE := -pg $(CC_FLAGS_FTRACE) $(CC_FLAGS_EXPOLINE) $(CC_FLAGS_CHECK_STACK)
+CFLAGS_REMOVE_getcpu.o = $(VDSO_CFLAGS_REMOVE)
+CFLAGS_REMOVE_vdso64_generic.o = $(VDSO_CFLAGS_REMOVE)
# Build rules
@@ -25,7 +26,7 @@ KBUILD_AFLAGS_64 += -m64 -s
KBUILD_CFLAGS_64 := $(filter-out -m64,$(KBUILD_CFLAGS))
KBUILD_CFLAGS_64 += -m64 -fPIC -shared -fno-common -fno-builtin
-ldflags-y := -fPIC -shared -nostdlib -soname=linux-vdso64.so.1 \
+ldflags-y := -fPIC -shared -soname=linux-vdso64.so.1 \
--hash-style=both --build-id=sha1 -T
$(targets:%=$(obj)/%.dbg): KBUILD_CFLAGS = $(KBUILD_CFLAGS_64)
diff --git a/arch/s390/kvm/kvm-s390.c b/arch/s390/kvm/kvm-s390.c
index c6257f625929..14a18ba5ff2c 100644
--- a/arch/s390/kvm/kvm-s390.c
+++ b/arch/s390/kvm/kvm-s390.c
@@ -585,6 +585,8 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext)
r = KVM_MAX_VCPUS;
else if (sclp.has_esca && sclp.has_64bscao)
r = KVM_S390_ESCA_CPU_SLOTS;
+ if (ext == KVM_CAP_NR_VCPUS)
+ r = min_t(unsigned int, num_online_cpus(), r);
break;
case KVM_CAP_S390_COW:
r = MACHINE_HAS_ESOP;
diff --git a/arch/s390/pci/pci.c b/arch/s390/pci/pci.c
index 872d772b73d2..2f9b78fa82a5 100644
--- a/arch/s390/pci/pci.c
+++ b/arch/s390/pci/pci.c
@@ -481,6 +481,34 @@ static void zpci_free_iomap(struct zpci_dev *zdev, int entry)
spin_unlock(&zpci_iomap_lock);
}
+static void zpci_do_update_iomap_fh(struct zpci_dev *zdev, u32 fh)
+{
+ int bar, idx;
+
+ spin_lock(&zpci_iomap_lock);
+ for (bar = 0; bar < PCI_STD_NUM_BARS; bar++) {
+ if (!zdev->bars[bar].size)
+ continue;
+ idx = zdev->bars[bar].map_idx;
+ if (!zpci_iomap_start[idx].count)
+ continue;
+ WRITE_ONCE(zpci_iomap_start[idx].fh, zdev->fh);
+ }
+ spin_unlock(&zpci_iomap_lock);
+}
+
+void zpci_update_fh(struct zpci_dev *zdev, u32 fh)
+{
+ if (!fh || zdev->fh == fh)
+ return;
+
+ zdev->fh = fh;
+ if (zpci_use_mio(zdev))
+ return;
+ if (zdev->has_resources && zdev_enabled(zdev))
+ zpci_do_update_iomap_fh(zdev, fh);
+}
+
static struct resource *__alloc_res(struct zpci_dev *zdev, unsigned long start,
unsigned long size, unsigned long flags)
{
@@ -668,7 +696,7 @@ int zpci_enable_device(struct zpci_dev *zdev)
if (clp_enable_fh(zdev, &fh, ZPCI_NR_DMA_SPACES))
rc = -EIO;
else
- zdev->fh = fh;
+ zpci_update_fh(zdev, fh);
return rc;
}
@@ -679,14 +707,14 @@ int zpci_disable_device(struct zpci_dev *zdev)
cc = clp_disable_fh(zdev, &fh);
if (!cc) {
- zdev->fh = fh;
+ zpci_update_fh(zdev, fh);
} else if (cc == CLP_RC_SETPCIFN_ALRDY) {
pr_info("Disabling PCI function %08x had no effect as it was already disabled\n",
zdev->fid);
/* Function is already disabled - update handle */
rc = clp_refresh_fh(zdev->fid, &fh);
if (!rc) {
- zdev->fh = fh;
+ zpci_update_fh(zdev, fh);
rc = -EINVAL;
}
} else {
@@ -696,6 +724,65 @@ int zpci_disable_device(struct zpci_dev *zdev)
}
/**
+ * zpci_hot_reset_device - perform a reset of the given zPCI function
+ * @zdev: the slot which should be reset
+ *
+ * Performs a low level reset of the zPCI function. The reset is low level in
+ * the sense that the zPCI function can be reset without detaching it from the
+ * common PCI subsystem. The reset may be performed while under control of
+ * either DMA or IOMMU APIs in which case the existing DMA/IOMMU translation
+ * table is reinstated at the end of the reset.
+ *
+ * After the reset the functions internal state is reset to an initial state
+ * equivalent to its state during boot when first probing a driver.
+ * Consequently after reset the PCI function requires re-initialization via the
+ * common PCI code including re-enabling IRQs via pci_alloc_irq_vectors()
+ * and enabling the function via e.g.pci_enablde_device_flags().The caller
+ * must guard against concurrent reset attempts.
+ *
+ * In most cases this function should not be called directly but through
+ * pci_reset_function() or pci_reset_bus() which handle the save/restore and
+ * locking.
+ *
+ * Return: 0 on success and an error value otherwise
+ */
+int zpci_hot_reset_device(struct zpci_dev *zdev)
+{
+ int rc;
+
+ zpci_dbg(3, "rst fid:%x, fh:%x\n", zdev->fid, zdev->fh);
+ if (zdev_enabled(zdev)) {
+ /* Disables device access, DMAs and IRQs (reset state) */
+ rc = zpci_disable_device(zdev);
+ /*
+ * Due to a z/VM vs LPAR inconsistency in the error state the
+ * FH may indicate an enabled device but disable says the
+ * device is already disabled don't treat it as an error here.
+ */
+ if (rc == -EINVAL)
+ rc = 0;
+ if (rc)
+ return rc;
+ }
+
+ rc = zpci_enable_device(zdev);
+ if (rc)
+ return rc;
+
+ if (zdev->dma_table)
+ rc = zpci_register_ioat(zdev, 0, zdev->start_dma, zdev->end_dma,
+ (u64)zdev->dma_table);
+ else
+ rc = zpci_dma_init_device(zdev);
+ if (rc) {
+ zpci_disable_device(zdev);
+ return rc;
+ }
+
+ return 0;
+}
+
+/**
* zpci_create_device() - Create a new zpci_dev and add it to the zbus
* @fid: Function ID of the device to be created
* @fh: Current Function Handle of the device to be created
@@ -776,7 +863,7 @@ int zpci_scan_configured_device(struct zpci_dev *zdev, u32 fh)
{
int rc;
- zdev->fh = fh;
+ zpci_update_fh(zdev, fh);
/* the PCI function will be scanned once function 0 appears */
if (!zdev->zbus->bus)
return 0;
@@ -903,6 +990,59 @@ int zpci_report_error(struct pci_dev *pdev,
}
EXPORT_SYMBOL(zpci_report_error);
+/**
+ * zpci_clear_error_state() - Clears the zPCI error state of the device
+ * @zdev: The zdev for which the zPCI error state should be reset
+ *
+ * Clear the zPCI error state of the device. If clearing the zPCI error state
+ * fails the device is left in the error state. In this case it may make sense
+ * to call zpci_io_perm_failure() on the associated pdev if it exists.
+ *
+ * Returns: 0 on success, -EIO otherwise
+ */
+int zpci_clear_error_state(struct zpci_dev *zdev)
+{
+ u64 req = ZPCI_CREATE_REQ(zdev->fh, 0, ZPCI_MOD_FC_RESET_ERROR);
+ struct zpci_fib fib = {0};
+ u8 status;
+ int cc;
+
+ cc = zpci_mod_fc(req, &fib, &status);
+ if (cc) {
+ zpci_dbg(3, "ces fid:%x, cc:%d, status:%x\n", zdev->fid, cc, status);
+ return -EIO;
+ }
+
+ return 0;
+}
+
+/**
+ * zpci_reset_load_store_blocked() - Re-enables L/S from error state
+ * @zdev: The zdev for which to unblock load/store access
+ *
+ * Re-enables load/store access for a PCI function in the error state while
+ * keeping DMA blocked. In this state drivers can poke MMIO space to determine
+ * if error recovery is possible while catching any rogue DMA access from the
+ * device.
+ *
+ * Returns: 0 on success, -EIO otherwise
+ */
+int zpci_reset_load_store_blocked(struct zpci_dev *zdev)
+{
+ u64 req = ZPCI_CREATE_REQ(zdev->fh, 0, ZPCI_MOD_FC_RESET_BLOCK);
+ struct zpci_fib fib = {0};
+ u8 status;
+ int cc;
+
+ cc = zpci_mod_fc(req, &fib, &status);
+ if (cc) {
+ zpci_dbg(3, "rls fid:%x, cc:%d, status:%x\n", zdev->fid, cc, status);
+ return -EIO;
+ }
+
+ return 0;
+}
+
static int zpci_mem_init(void)
{
BUILD_BUG_ON(!is_power_of_2(__alignof__(struct zpci_fmb)) ||
diff --git a/arch/s390/pci/pci_event.c b/arch/s390/pci/pci_event.c
index 6a5bfa9dc1f2..2e3e5b278925 100644
--- a/arch/s390/pci/pci_event.c
+++ b/arch/s390/pci/pci_event.c
@@ -47,18 +47,223 @@ struct zpci_ccdf_avail {
u16 pec; /* PCI event code */
} __packed;
+static inline bool ers_result_indicates_abort(pci_ers_result_t ers_res)
+{
+ switch (ers_res) {
+ case PCI_ERS_RESULT_CAN_RECOVER:
+ case PCI_ERS_RESULT_RECOVERED:
+ case PCI_ERS_RESULT_NEED_RESET:
+ return false;
+ default:
+ return true;
+ }
+}
+
+static bool is_passed_through(struct zpci_dev *zdev)
+{
+ return zdev->s390_domain;
+}
+
+static bool is_driver_supported(struct pci_driver *driver)
+{
+ if (!driver || !driver->err_handler)
+ return false;
+ if (!driver->err_handler->error_detected)
+ return false;
+ if (!driver->err_handler->slot_reset)
+ return false;
+ if (!driver->err_handler->resume)
+ return false;
+ return true;
+}
+
+static pci_ers_result_t zpci_event_notify_error_detected(struct pci_dev *pdev,
+ struct pci_driver *driver)
+{
+ pci_ers_result_t ers_res = PCI_ERS_RESULT_DISCONNECT;
+
+ ers_res = driver->err_handler->error_detected(pdev, pdev->error_state);
+ if (ers_result_indicates_abort(ers_res))
+ pr_info("%s: Automatic recovery failed after initial reporting\n", pci_name(pdev));
+ else if (ers_res == PCI_ERS_RESULT_NEED_RESET)
+ pr_debug("%s: Driver needs reset to recover\n", pci_name(pdev));
+
+ return ers_res;
+}
+
+static pci_ers_result_t zpci_event_do_error_state_clear(struct pci_dev *pdev,
+ struct pci_driver *driver)
+{
+ pci_ers_result_t ers_res = PCI_ERS_RESULT_DISCONNECT;
+ struct zpci_dev *zdev = to_zpci(pdev);
+ int rc;
+
+ pr_info("%s: Unblocking device access for examination\n", pci_name(pdev));
+ rc = zpci_reset_load_store_blocked(zdev);
+ if (rc) {
+ pr_err("%s: Unblocking device access failed\n", pci_name(pdev));
+ /* Let's try a full reset instead */
+ return PCI_ERS_RESULT_NEED_RESET;
+ }
+
+ if (driver->err_handler->mmio_enabled) {
+ ers_res = driver->err_handler->mmio_enabled(pdev);
+ if (ers_result_indicates_abort(ers_res)) {
+ pr_info("%s: Automatic recovery failed after MMIO re-enable\n",
+ pci_name(pdev));
+ return ers_res;
+ } else if (ers_res == PCI_ERS_RESULT_NEED_RESET) {
+ pr_debug("%s: Driver needs reset to recover\n", pci_name(pdev));
+ return ers_res;
+ }
+ }
+
+ pr_debug("%s: Unblocking DMA\n", pci_name(pdev));
+ rc = zpci_clear_error_state(zdev);
+ if (!rc) {
+ pdev->error_state = pci_channel_io_normal;
+ } else {
+ pr_err("%s: Unblocking DMA failed\n", pci_name(pdev));
+ /* Let's try a full reset instead */
+ return PCI_ERS_RESULT_NEED_RESET;
+ }
+
+ return ers_res;
+}
+
+static pci_ers_result_t zpci_event_do_reset(struct pci_dev *pdev,
+ struct pci_driver *driver)
+{
+ pci_ers_result_t ers_res = PCI_ERS_RESULT_DISCONNECT;
+
+ pr_info("%s: Initiating reset\n", pci_name(pdev));
+ if (zpci_hot_reset_device(to_zpci(pdev))) {
+ pr_err("%s: The reset request failed\n", pci_name(pdev));
+ return ers_res;
+ }
+ pdev->error_state = pci_channel_io_normal;
+ ers_res = driver->err_handler->slot_reset(pdev);
+ if (ers_result_indicates_abort(ers_res)) {
+ pr_info("%s: Automatic recovery failed after slot reset\n", pci_name(pdev));
+ return ers_res;
+ }
+
+ return ers_res;
+}
+
+/* zpci_event_attempt_error_recovery - Try to recover the given PCI function
+ * @pdev: PCI function to recover currently in the error state
+ *
+ * We follow the scheme outlined in Documentation/PCI/pci-error-recovery.rst.
+ * With the simplification that recovery always happens per function
+ * and the platform determines which functions are affected for
+ * multi-function devices.
+ */
+static pci_ers_result_t zpci_event_attempt_error_recovery(struct pci_dev *pdev)
+{
+ pci_ers_result_t ers_res = PCI_ERS_RESULT_DISCONNECT;
+ struct pci_driver *driver;
+
+ /*
+ * Ensure that the PCI function is not removed concurrently, no driver
+ * is unbound or probed and that userspace can't access its
+ * configuration space while we perform recovery.
+ */
+ pci_dev_lock(pdev);
+ if (pdev->error_state == pci_channel_io_perm_failure) {
+ ers_res = PCI_ERS_RESULT_DISCONNECT;
+ goto out_unlock;
+ }
+ pdev->error_state = pci_channel_io_frozen;
+
+ if (is_passed_through(to_zpci(pdev))) {
+ pr_info("%s: Cannot be recovered in the host because it is a pass-through device\n",
+ pci_name(pdev));
+ goto out_unlock;
+ }
+
+ driver = to_pci_driver(pdev->dev.driver);
+ if (!is_driver_supported(driver)) {
+ if (!driver)
+ pr_info("%s: Cannot be recovered because no driver is bound to the device\n",
+ pci_name(pdev));
+ else
+ pr_info("%s: The %s driver bound to the device does not support error recovery\n",
+ pci_name(pdev),
+ driver->name);
+ goto out_unlock;
+ }
+
+ ers_res = zpci_event_notify_error_detected(pdev, driver);
+ if (ers_result_indicates_abort(ers_res))
+ goto out_unlock;
+
+ if (ers_res == PCI_ERS_RESULT_CAN_RECOVER) {
+ ers_res = zpci_event_do_error_state_clear(pdev, driver);
+ if (ers_result_indicates_abort(ers_res))
+ goto out_unlock;
+ }
+
+ if (ers_res == PCI_ERS_RESULT_NEED_RESET)
+ ers_res = zpci_event_do_reset(pdev, driver);
+
+ if (ers_res != PCI_ERS_RESULT_RECOVERED) {
+ pr_err("%s: Automatic recovery failed; operator intervention is required\n",
+ pci_name(pdev));
+ goto out_unlock;
+ }
+
+ pr_info("%s: The device is ready to resume operations\n", pci_name(pdev));
+ if (driver->err_handler->resume)
+ driver->err_handler->resume(pdev);
+out_unlock:
+ pci_dev_unlock(pdev);
+
+ return ers_res;
+}
+
+/* zpci_event_io_failure - Report PCI channel failure state to driver
+ * @pdev: PCI function for which to report
+ * @es: PCI channel failure state to report
+ */
+static void zpci_event_io_failure(struct pci_dev *pdev, pci_channel_state_t es)
+{
+ struct pci_driver *driver;
+
+ pci_dev_lock(pdev);
+ pdev->error_state = es;
+ /**
+ * While vfio-pci's error_detected callback notifies user-space QEMU
+ * reacts to this by freezing the guest. In an s390 environment PCI
+ * errors are rarely fatal so this is overkill. Instead in the future
+ * we will inject the error event and let the guest recover the device
+ * itself.
+ */
+ if (is_passed_through(to_zpci(pdev)))
+ goto out;
+ driver = to_pci_driver(pdev->dev.driver);
+ if (driver && driver->err_handler && driver->err_handler->error_detected)
+ driver->err_handler->error_detected(pdev, pdev->error_state);
+out:
+ pci_dev_unlock(pdev);
+}
+
static void __zpci_event_error(struct zpci_ccdf_err *ccdf)
{
struct zpci_dev *zdev = get_zdev_by_fid(ccdf->fid);
struct pci_dev *pdev = NULL;
+ pci_ers_result_t ers_res;
zpci_dbg(3, "err fid:%x, fh:%x, pec:%x\n",
ccdf->fid, ccdf->fh, ccdf->pec);
zpci_err("error CCDF:\n");
zpci_err_hex(ccdf, sizeof(*ccdf));
- if (zdev)
- pdev = pci_get_slot(zdev->zbus->bus, zdev->devfn);
+ if (zdev) {
+ zpci_update_fh(zdev, ccdf->fh);
+ if (zdev->zbus->bus)
+ pdev = pci_get_slot(zdev->zbus->bus, zdev->devfn);
+ }
pr_err("%s: Event 0x%x reports an error for PCI function 0x%x\n",
pdev ? pci_name(pdev) : "n/a", ccdf->pec, ccdf->fid);
@@ -66,7 +271,20 @@ static void __zpci_event_error(struct zpci_ccdf_err *ccdf)
if (!pdev)
return;
- pdev->error_state = pci_channel_io_perm_failure;
+ switch (ccdf->pec) {
+ case 0x003a: /* Service Action or Error Recovery Successful */
+ ers_res = zpci_event_attempt_error_recovery(pdev);
+ if (ers_res != PCI_ERS_RESULT_RECOVERED)
+ zpci_event_io_failure(pdev, pci_channel_io_perm_failure);
+ break;
+ default:
+ /*
+ * Mark as frozen not permanently failed because the device
+ * could be subsequently recovered by the platform.
+ */
+ zpci_event_io_failure(pdev, pci_channel_io_frozen);
+ break;
+ }
pci_dev_put(pdev);
}
@@ -78,7 +296,7 @@ void zpci_event_error(void *data)
static void zpci_event_hard_deconfigured(struct zpci_dev *zdev, u32 fh)
{
- zdev->fh = fh;
+ zpci_update_fh(zdev, fh);
/* Give the driver a hint that the function is
* already unusable.
*/
@@ -121,7 +339,7 @@ static void __zpci_event_availability(struct zpci_ccdf_avail *ccdf)
if (!zdev)
zpci_create_device(ccdf->fid, ccdf->fh, ZPCI_FN_STATE_STANDBY);
else
- zdev->fh = ccdf->fh;
+ zpci_update_fh(zdev, ccdf->fh);
break;
case 0x0303: /* Deconfiguration requested */
if (zdev) {
@@ -130,7 +348,7 @@ static void __zpci_event_availability(struct zpci_ccdf_avail *ccdf)
*/
if (zdev->state != ZPCI_FN_STATE_CONFIGURED)
break;
- zdev->fh = ccdf->fh;
+ zpci_update_fh(zdev, ccdf->fh);
zpci_deconfigure_device(zdev);
}
break;
diff --git a/arch/s390/pci/pci_insn.c b/arch/s390/pci/pci_insn.c
index 2e43996159f0..28d863aaafea 100644
--- a/arch/s390/pci/pci_insn.c
+++ b/arch/s390/pci/pci_insn.c
@@ -163,7 +163,7 @@ static inline int zpci_load_fh(u64 *data, const volatile void __iomem *addr,
unsigned long len)
{
struct zpci_iomap_entry *entry = &zpci_iomap_start[ZPCI_IDX(addr)];
- u64 req = ZPCI_CREATE_REQ(entry->fh, entry->bar, len);
+ u64 req = ZPCI_CREATE_REQ(READ_ONCE(entry->fh), entry->bar, len);
return __zpci_load(data, req, ZPCI_OFFSET(addr));
}
@@ -244,7 +244,7 @@ static inline int zpci_store_fh(const volatile void __iomem *addr, u64 data,
unsigned long len)
{
struct zpci_iomap_entry *entry = &zpci_iomap_start[ZPCI_IDX(addr)];
- u64 req = ZPCI_CREATE_REQ(entry->fh, entry->bar, len);
+ u64 req = ZPCI_CREATE_REQ(READ_ONCE(entry->fh), entry->bar, len);
return __zpci_store(data, req, ZPCI_OFFSET(addr));
}
diff --git a/arch/s390/pci/pci_irq.c b/arch/s390/pci/pci_irq.c
index 3823e159bf74..954bb7a83124 100644
--- a/arch/s390/pci/pci_irq.c
+++ b/arch/s390/pci/pci_irq.c
@@ -387,6 +387,15 @@ void arch_teardown_msi_irqs(struct pci_dev *pdev)
airq_iv_free(zpci_ibv[0], zdev->msi_first_bit, zdev->msi_nr_irqs);
}
+void arch_restore_msi_irqs(struct pci_dev *pdev)
+{
+ struct zpci_dev *zdev = to_zpci(pdev);
+
+ if (!zdev->irqs_registered)
+ zpci_set_irq(zdev);
+ default_restore_msi_irqs(pdev);
+}
+
static struct airq_struct zpci_airq = {
.handler = zpci_floating_irq_handler,
.isc = PCI_ISC,
diff --git a/arch/sh/Kconfig b/arch/sh/Kconfig
index 6904f4bdbf00..70afb30e0b32 100644
--- a/arch/sh/Kconfig
+++ b/arch/sh/Kconfig
@@ -56,7 +56,6 @@ config SUPERH
select HAVE_STACKPROTECTOR
select HAVE_SYSCALL_TRACEPOINTS
select IRQ_FORCED_THREADING
- select MAY_HAVE_SPARSE_IRQ
select MODULES_USE_ELF_RELA
select NEED_SG_DMA_LENGTH
select NO_DMA if !MMU && !DMA_COHERENT
diff --git a/arch/sh/Kconfig.debug b/arch/sh/Kconfig.debug
index 958f790273ab..10290e5c1f43 100644
--- a/arch/sh/Kconfig.debug
+++ b/arch/sh/Kconfig.debug
@@ -54,6 +54,7 @@ config DUMP_CODE
config DWARF_UNWINDER
bool "Enable the DWARF unwinder for stacktraces"
+ depends on DEBUG_KERNEL
select FRAME_POINTER
default n
help
diff --git a/arch/sh/boards/mach-landisk/irq.c b/arch/sh/boards/mach-landisk/irq.c
index 29b8b1f85246..0b672b80c561 100644
--- a/arch/sh/boards/mach-landisk/irq.c
+++ b/arch/sh/boards/mach-landisk/irq.c
@@ -26,8 +26,8 @@ enum {
PCI_INTD, /* PCI int D */
ATA, /* ATA */
FATA, /* CF */
- POWER, /* Power swtich */
- BUTTON, /* Button swtich */
+ POWER, /* Power switch */
+ BUTTON, /* Button switch */
};
/* Vectors for LANDISK */
diff --git a/arch/sh/boot/Makefile b/arch/sh/boot/Makefile
index c081e7e2d6e7..5c123f5b2797 100644
--- a/arch/sh/boot/Makefile
+++ b/arch/sh/boot/Makefile
@@ -27,8 +27,8 @@ suffix-$(CONFIG_KERNEL_XZ) := xz
suffix-$(CONFIG_KERNEL_LZO) := lzo
targets := zImage vmlinux.srec romImage uImage uImage.srec uImage.gz \
- uImage.bz2 uImage.lzma uImage.xz uImage.lzo uImage.bin
-extra-y += vmlinux.bin vmlinux.bin.gz vmlinux.bin.bz2 vmlinux.bin.lzma \
+ uImage.bz2 uImage.lzma uImage.xz uImage.lzo uImage.bin \
+ vmlinux.bin vmlinux.bin.gz vmlinux.bin.bz2 vmlinux.bin.lzma \
vmlinux.bin.xz vmlinux.bin.lzo
subdir- := compressed romimage
diff --git a/arch/sh/boot/compressed/.gitignore b/arch/sh/boot/compressed/.gitignore
index 37aa53057369..cd16663bc7c8 100644
--- a/arch/sh/boot/compressed/.gitignore
+++ b/arch/sh/boot/compressed/.gitignore
@@ -1,7 +1,2 @@
# SPDX-License-Identifier: GPL-2.0-only
-ashiftrt.S
-ashldi3.c
-ashlsi3.S
-ashrsi3.S
-lshrsi3.S
vmlinux.bin.*
diff --git a/arch/sh/boot/compressed/Makefile b/arch/sh/boot/compressed/Makefile
index 589d2d8a573d..cf3174df7859 100644
--- a/arch/sh/boot/compressed/Makefile
+++ b/arch/sh/boot/compressed/Makefile
@@ -5,12 +5,18 @@
# create a compressed vmlinux image from the original vmlinux
#
-targets := vmlinux vmlinux.bin vmlinux.bin.gz \
- vmlinux.bin.bz2 vmlinux.bin.lzma \
- vmlinux.bin.xz vmlinux.bin.lzo \
- head_32.o misc.o piggy.o
+OBJECTS := head_32.o misc.o cache.o piggy.o \
+ ashiftrt.o ashldi3.o ashrsi3.o ashlsi3.o lshrsi3.o
+
+# These were previously generated files. When you are building the kernel
+# with O=, make sure to remove the stale files in the output tree. Otherwise,
+# the build system wrongly compiles the stale ones.
+ifdef building_out_of_srctree
+$(shell rm -f $(addprefix $(obj)/, ashiftrt.S ashldi3.c ashrsi3.S ashlsi3.S lshrsi3.S))
+endif
-OBJECTS = $(obj)/head_32.o $(obj)/misc.o $(obj)/cache.o
+targets := vmlinux vmlinux.bin vmlinux.bin.gz vmlinux.bin.bz2 \
+ vmlinux.bin.lzma vmlinux.bin.xz vmlinux.bin.lzo $(OBJECTS)
GCOV_PROFILE := n
@@ -33,21 +39,9 @@ ccflags-remove-$(CONFIG_MCOUNT) += -pg
LDFLAGS_vmlinux := --oformat $(ld-bfd) -Ttext $(IMAGE_OFFSET) -e startup \
-T $(obj)/../../kernel/vmlinux.lds
-#
-# Pull in the necessary libgcc bits from the in-kernel implementation.
-#
-lib1funcs-y := ashiftrt.S ashldi3.c ashrsi3.S ashlsi3.S lshrsi3.S
-lib1funcs-obj := \
- $(addsuffix .o, $(basename $(addprefix $(obj)/, $(lib1funcs-y))))
-
-lib1funcs-dir := $(srctree)/arch/$(SRCARCH)/lib
-
-KBUILD_CFLAGS += -I$(lib1funcs-dir) -DDISABLE_BRANCH_PROFILING
-
-$(addprefix $(obj)/,$(lib1funcs-y)): $(obj)/%: $(lib1funcs-dir)/% FORCE
- $(call cmd,shipped)
+KBUILD_CFLAGS += -DDISABLE_BRANCH_PROFILING
-$(obj)/vmlinux: $(OBJECTS) $(obj)/piggy.o $(lib1funcs-obj) FORCE
+$(obj)/vmlinux: $(addprefix $(obj)/, $(OBJECTS)) FORCE
$(call if_changed,ld)
$(obj)/vmlinux.bin: vmlinux FORCE
diff --git a/arch/sh/boot/compressed/ashiftrt.S b/arch/sh/boot/compressed/ashiftrt.S
new file mode 100644
index 000000000000..0f3b291a3f4b
--- /dev/null
+++ b/arch/sh/boot/compressed/ashiftrt.S
@@ -0,0 +1,2 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+#include "../../lib/ashiftrt.S"
diff --git a/arch/sh/boot/compressed/ashldi3.c b/arch/sh/boot/compressed/ashldi3.c
new file mode 100644
index 000000000000..7cebd646df83
--- /dev/null
+++ b/arch/sh/boot/compressed/ashldi3.c
@@ -0,0 +1,2 @@
+// SPDX-License-Identifier: GPL-2.0-only
+#include "../../lib/ashldi3.c"
diff --git a/arch/sh/boot/compressed/ashlsi3.S b/arch/sh/boot/compressed/ashlsi3.S
new file mode 100644
index 000000000000..e354262b275f
--- /dev/null
+++ b/arch/sh/boot/compressed/ashlsi3.S
@@ -0,0 +1,2 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+#include "../../lib/ashlsi3.S"
diff --git a/arch/sh/boot/compressed/ashrsi3.S b/arch/sh/boot/compressed/ashrsi3.S
new file mode 100644
index 000000000000..e564be9a4dcd
--- /dev/null
+++ b/arch/sh/boot/compressed/ashrsi3.S
@@ -0,0 +1,2 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+#include "../../lib/ashrsi3.S"
diff --git a/arch/sh/boot/compressed/lshrsi3.S b/arch/sh/boot/compressed/lshrsi3.S
new file mode 100644
index 000000000000..5a8281b7e516
--- /dev/null
+++ b/arch/sh/boot/compressed/lshrsi3.S
@@ -0,0 +1,2 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+#include "../../lib/lshrsi3.S"
diff --git a/arch/sh/include/asm/cacheflush.h b/arch/sh/include/asm/cacheflush.h
index c7a97f32432f..481a664287e2 100644
--- a/arch/sh/include/asm/cacheflush.h
+++ b/arch/sh/include/asm/cacheflush.h
@@ -43,7 +43,6 @@ extern void flush_cache_range(struct vm_area_struct *vma,
unsigned long start, unsigned long end);
#define ARCH_IMPLEMENTS_FLUSH_DCACHE_PAGE 1
void flush_dcache_page(struct page *page);
-void flush_dcache_folio(struct folio *folio);
extern void flush_icache_range(unsigned long start, unsigned long end);
#define flush_icache_user_range flush_icache_range
extern void flush_icache_page(struct vm_area_struct *vma,
diff --git a/arch/sh/include/asm/checksum_32.h b/arch/sh/include/asm/checksum_32.h
index 1a391e3a7659..a6501b856f3e 100644
--- a/arch/sh/include/asm/checksum_32.h
+++ b/arch/sh/include/asm/checksum_32.h
@@ -84,7 +84,8 @@ static inline __sum16 csum_fold(__wsum sum)
*/
static inline __sum16 ip_fast_csum(const void *iph, unsigned int ihl)
{
- unsigned int sum, __dummy0, __dummy1;
+ __wsum sum;
+ unsigned int __dummy0, __dummy1;
__asm__ __volatile__(
"mov.l @%1+, %0\n\t"
@@ -197,6 +198,6 @@ static inline __wsum csum_and_copy_to_user(const void *src,
{
if (!access_ok(dst, len))
return 0;
- return csum_partial_copy_generic((__force const void *)src, dst, len);
+ return csum_partial_copy_generic(src, (__force void *)dst, len);
}
#endif /* __ASM_SH_CHECKSUM_H */
diff --git a/arch/sh/include/asm/irq.h b/arch/sh/include/asm/irq.h
index 839551ce398c..1c4923502fd4 100644
--- a/arch/sh/include/asm/irq.h
+++ b/arch/sh/include/asm/irq.h
@@ -6,17 +6,6 @@
#include <asm/machvec.h>
/*
- * Only legacy non-sparseirq platforms have to set a reasonably sane
- * value here. sparseirq platforms allocate their irq_descs on the fly,
- * so will expand automatically based on the number of registered IRQs.
- */
-#ifdef CONFIG_SPARSE_IRQ
-# define NR_IRQS 8
-#else
-# define NR_IRQS 512
-#endif
-
-/*
* This is a special IRQ number for indicating that no IRQ has been
* triggered and to simply ignore the IRQ dispatch. This is a special
* case that can happen with IRQ auto-distribution when multiple CPUs
diff --git a/arch/sh/include/asm/sfp-machine.h b/arch/sh/include/asm/sfp-machine.h
index cbc7cf8c97ce..2d2423478b71 100644
--- a/arch/sh/include/asm/sfp-machine.h
+++ b/arch/sh/include/asm/sfp-machine.h
@@ -13,6 +13,14 @@
#ifndef _SFP_MACHINE_H
#define _SFP_MACHINE_H
+#ifdef __BIG_ENDIAN__
+#define __BYTE_ORDER __BIG_ENDIAN
+#define __LITTLE_ENDIAN 0
+#else
+#define __BYTE_ORDER __LITTLE_ENDIAN
+#define __BIG_ENDIAN 0
+#endif
+
#define _FP_W_TYPE_SIZE 32
#define _FP_W_TYPE unsigned long
#define _FP_WS_TYPE signed long
diff --git a/arch/sh/include/asm/uaccess.h b/arch/sh/include/asm/uaccess.h
index 73f3b48d4a34..8867bb04b00e 100644
--- a/arch/sh/include/asm/uaccess.h
+++ b/arch/sh/include/asm/uaccess.h
@@ -68,7 +68,7 @@ struct __large_struct { unsigned long buf[100]; };
({ \
long __gu_err = -EFAULT; \
unsigned long __gu_val = 0; \
- const __typeof__(*(ptr)) *__gu_addr = (ptr); \
+ const __typeof__(*(ptr)) __user *__gu_addr = (ptr); \
if (likely(access_ok(__gu_addr, (size)))) \
__get_user_size(__gu_val, __gu_addr, (size), __gu_err); \
(x) = (__force __typeof__(*(ptr)))__gu_val; \
@@ -124,7 +124,7 @@ raw_copy_to_user(void __user *to, const void *from, unsigned long n)
* Clear the area and return remaining number of bytes
* (on failure. Usually it's 0.)
*/
-__kernel_size_t __clear_user(void *addr, __kernel_size_t size);
+__kernel_size_t __clear_user(void __user *addr, __kernel_size_t size);
#define clear_user(addr,n) \
({ \
diff --git a/arch/sh/kernel/cpu/sh4a/smp-shx3.c b/arch/sh/kernel/cpu/sh4a/smp-shx3.c
index f8a2bec0f260..1261dc7b84e8 100644
--- a/arch/sh/kernel/cpu/sh4a/smp-shx3.c
+++ b/arch/sh/kernel/cpu/sh4a/smp-shx3.c
@@ -73,8 +73,9 @@ static void shx3_prepare_cpus(unsigned int max_cpus)
BUILD_BUG_ON(SMP_MSG_NR >= 8);
for (i = 0; i < SMP_MSG_NR; i++)
- request_irq(104 + i, ipi_interrupt_handler,
- IRQF_PERCPU, "IPI", (void *)(long)i);
+ if (request_irq(104 + i, ipi_interrupt_handler,
+ IRQF_PERCPU, "IPI", (void *)(long)i))
+ pr_err("Failed to request irq %d\n", i);
for (i = 0; i < max_cpus; i++)
set_cpu_present(i, true);
diff --git a/arch/sh/kernel/crash_dump.c b/arch/sh/kernel/crash_dump.c
index a9086127b16d..5b41b59698c1 100644
--- a/arch/sh/kernel/crash_dump.c
+++ b/arch/sh/kernel/crash_dump.c
@@ -26,7 +26,7 @@
ssize_t copy_oldmem_page(unsigned long pfn, char *buf,
size_t csize, unsigned long offset, int userbuf)
{
- void *vaddr;
+ void __iomem *vaddr;
if (!csize)
return 0;
@@ -34,7 +34,7 @@ ssize_t copy_oldmem_page(unsigned long pfn, char *buf,
vaddr = ioremap(pfn << PAGE_SHIFT, PAGE_SIZE);
if (userbuf) {
- if (copy_to_user(buf, (vaddr + offset), csize)) {
+ if (copy_to_user((void __user *)buf, (vaddr + offset), csize)) {
iounmap(vaddr);
return -EFAULT;
}
diff --git a/arch/sh/kernel/syscalls/syscall.tbl b/arch/sh/kernel/syscalls/syscall.tbl
index 208f131659c5..d9539d28bdaa 100644
--- a/arch/sh/kernel/syscalls/syscall.tbl
+++ b/arch/sh/kernel/syscalls/syscall.tbl
@@ -451,3 +451,4 @@
446 common landlock_restrict_self sys_landlock_restrict_self
# 447 reserved for memfd_secret
448 common process_mrelease sys_process_mrelease
+449 common futex_waitv sys_futex_waitv
diff --git a/arch/sh/kernel/traps_32.c b/arch/sh/kernel/traps_32.c
index b62ad0ba2395..b3c715bc254b 100644
--- a/arch/sh/kernel/traps_32.c
+++ b/arch/sh/kernel/traps_32.c
@@ -490,7 +490,7 @@ asmlinkage void do_address_error(struct pt_regs *regs,
inc_unaligned_user_access();
oldfs = force_uaccess_begin();
- if (copy_from_user(&instruction, (insn_size_t *)(regs->pc & ~1),
+ if (copy_from_user(&instruction, (insn_size_t __user *)(regs->pc & ~1),
sizeof(instruction))) {
force_uaccess_end(oldfs);
goto uspace_segv;
@@ -614,7 +614,7 @@ asmlinkage void do_reserved_inst(void)
unsigned short inst = 0;
int err;
- get_user(inst, (unsigned short*)regs->pc);
+ get_user(inst, (unsigned short __user *)regs->pc);
err = do_fpu_inst(inst, regs);
if (!err) {
@@ -699,9 +699,9 @@ asmlinkage void do_illegal_slot_inst(void)
return;
#ifdef CONFIG_SH_FPU_EMU
- get_user(inst, (unsigned short *)regs->pc + 1);
+ get_user(inst, (unsigned short __user *)regs->pc + 1);
if (!do_fpu_inst(inst, regs)) {
- get_user(inst, (unsigned short *)regs->pc);
+ get_user(inst, (unsigned short __user *)regs->pc);
if (!emulate_branch(inst, regs))
return;
/* fault in branch.*/
diff --git a/arch/sh/math-emu/math.c b/arch/sh/math-emu/math.c
index e8be0eca0444..cdaef6501d76 100644
--- a/arch/sh/math-emu/math.c
+++ b/arch/sh/math-emu/math.c
@@ -51,8 +51,8 @@
#define Rn (regs->regs[n])
#define Rm (regs->regs[m])
-#define WRITE(d,a) ({if(put_user(d, (typeof (d)*)a)) return -EFAULT;})
-#define READ(d,a) ({if(get_user(d, (typeof (d)*)a)) return -EFAULT;})
+#define MWRITE(d,a) ({if(put_user(d, (typeof (d) __user *)a)) return -EFAULT;})
+#define MREAD(d,a) ({if(get_user(d, (typeof (d) __user *)a)) return -EFAULT;})
#define PACK_S(r,f) FP_PACK_SP(&r,f)
#define UNPACK_S(f,r) FP_UNPACK_SP(f,&r)
@@ -157,11 +157,11 @@ fmov_idx_reg(struct sh_fpu_soft_struct *fregs, struct pt_regs *regs, int m,
{
if (FPSCR_SZ) {
FMOV_EXT(n);
- READ(FRn, Rm + R0 + 4);
+ MREAD(FRn, Rm + R0 + 4);
n++;
- READ(FRn, Rm + R0);
+ MREAD(FRn, Rm + R0);
} else {
- READ(FRn, Rm + R0);
+ MREAD(FRn, Rm + R0);
}
return 0;
@@ -173,11 +173,11 @@ fmov_mem_reg(struct sh_fpu_soft_struct *fregs, struct pt_regs *regs, int m,
{
if (FPSCR_SZ) {
FMOV_EXT(n);
- READ(FRn, Rm + 4);
+ MREAD(FRn, Rm + 4);
n++;
- READ(FRn, Rm);
+ MREAD(FRn, Rm);
} else {
- READ(FRn, Rm);
+ MREAD(FRn, Rm);
}
return 0;
@@ -189,12 +189,12 @@ fmov_inc_reg(struct sh_fpu_soft_struct *fregs, struct pt_regs *regs, int m,
{
if (FPSCR_SZ) {
FMOV_EXT(n);
- READ(FRn, Rm + 4);
+ MREAD(FRn, Rm + 4);
n++;
- READ(FRn, Rm);
+ MREAD(FRn, Rm);
Rm += 8;
} else {
- READ(FRn, Rm);
+ MREAD(FRn, Rm);
Rm += 4;
}
@@ -207,11 +207,11 @@ fmov_reg_idx(struct sh_fpu_soft_struct *fregs, struct pt_regs *regs, int m,
{
if (FPSCR_SZ) {
FMOV_EXT(m);
- WRITE(FRm, Rn + R0 + 4);
+ MWRITE(FRm, Rn + R0 + 4);
m++;
- WRITE(FRm, Rn + R0);
+ MWRITE(FRm, Rn + R0);
} else {
- WRITE(FRm, Rn + R0);
+ MWRITE(FRm, Rn + R0);
}
return 0;
@@ -223,11 +223,11 @@ fmov_reg_mem(struct sh_fpu_soft_struct *fregs, struct pt_regs *regs, int m,
{
if (FPSCR_SZ) {
FMOV_EXT(m);
- WRITE(FRm, Rn + 4);
+ MWRITE(FRm, Rn + 4);
m++;
- WRITE(FRm, Rn);
+ MWRITE(FRm, Rn);
} else {
- WRITE(FRm, Rn);
+ MWRITE(FRm, Rn);
}
return 0;
@@ -240,12 +240,12 @@ fmov_reg_dec(struct sh_fpu_soft_struct *fregs, struct pt_regs *regs, int m,
if (FPSCR_SZ) {
FMOV_EXT(m);
Rn -= 8;
- WRITE(FRm, Rn + 4);
+ MWRITE(FRm, Rn + 4);
m++;
- WRITE(FRm, Rn);
+ MWRITE(FRm, Rn);
} else {
Rn -= 4;
- WRITE(FRm, Rn);
+ MWRITE(FRm, Rn);
}
return 0;
@@ -445,11 +445,11 @@ id_sys(struct sh_fpu_soft_struct *fregs, struct pt_regs *regs, u16 code)
case 0x4052:
case 0x4062:
Rn -= 4;
- WRITE(*reg, Rn);
+ MWRITE(*reg, Rn);
break;
case 0x4056:
case 0x4066:
- READ(*reg, Rn);
+ MREAD(*reg, Rn);
Rn += 4;
break;
default:
@@ -468,109 +468,6 @@ static int fpu_emulate(u16 code, struct sh_fpu_soft_struct *fregs, struct pt_reg
}
/**
- * denormal_to_double - Given denormalized float number,
- * store double float
- *
- * @fpu: Pointer to sh_fpu_soft structure
- * @n: Index to FP register
- */
-static void denormal_to_double(struct sh_fpu_soft_struct *fpu, int n)
-{
- unsigned long du, dl;
- unsigned long x = fpu->fpul;
- int exp = 1023 - 126;
-
- if (x != 0 && (x & 0x7f800000) == 0) {
- du = (x & 0x80000000);
- while ((x & 0x00800000) == 0) {
- x <<= 1;
- exp--;
- }
- x &= 0x007fffff;
- du |= (exp << 20) | (x >> 3);
- dl = x << 29;
-
- fpu->fp_regs[n] = du;
- fpu->fp_regs[n+1] = dl;
- }
-}
-
-/**
- * ieee_fpe_handler - Handle denormalized number exception
- *
- * @regs: Pointer to register structure
- *
- * Returns 1 when it's handled (should not cause exception).
- */
-static int ieee_fpe_handler(struct pt_regs *regs)
-{
- unsigned short insn = *(unsigned short *)regs->pc;
- unsigned short finsn;
- unsigned long nextpc;
- int nib[4] = {
- (insn >> 12) & 0xf,
- (insn >> 8) & 0xf,
- (insn >> 4) & 0xf,
- insn & 0xf};
-
- if (nib[0] == 0xb ||
- (nib[0] == 0x4 && nib[2] == 0x0 && nib[3] == 0xb)) /* bsr & jsr */
- regs->pr = regs->pc + 4;
-
- if (nib[0] == 0xa || nib[0] == 0xb) { /* bra & bsr */
- nextpc = regs->pc + 4 + ((short) ((insn & 0xfff) << 4) >> 3);
- finsn = *(unsigned short *) (regs->pc + 2);
- } else if (nib[0] == 0x8 && nib[1] == 0xd) { /* bt/s */
- if (regs->sr & 1)
- nextpc = regs->pc + 4 + ((char) (insn & 0xff) << 1);
- else
- nextpc = regs->pc + 4;
- finsn = *(unsigned short *) (regs->pc + 2);
- } else if (nib[0] == 0x8 && nib[1] == 0xf) { /* bf/s */
- if (regs->sr & 1)
- nextpc = regs->pc + 4;
- else
- nextpc = regs->pc + 4 + ((char) (insn & 0xff) << 1);
- finsn = *(unsigned short *) (regs->pc + 2);
- } else if (nib[0] == 0x4 && nib[3] == 0xb &&
- (nib[2] == 0x0 || nib[2] == 0x2)) { /* jmp & jsr */
- nextpc = regs->regs[nib[1]];
- finsn = *(unsigned short *) (regs->pc + 2);
- } else if (nib[0] == 0x0 && nib[3] == 0x3 &&
- (nib[2] == 0x0 || nib[2] == 0x2)) { /* braf & bsrf */
- nextpc = regs->pc + 4 + regs->regs[nib[1]];
- finsn = *(unsigned short *) (regs->pc + 2);
- } else if (insn == 0x000b) { /* rts */
- nextpc = regs->pr;
- finsn = *(unsigned short *) (regs->pc + 2);
- } else {
- nextpc = regs->pc + 2;
- finsn = insn;
- }
-
- if ((finsn & 0xf1ff) == 0xf0ad) { /* fcnvsd */
- struct task_struct *tsk = current;
-
- if ((tsk->thread.xstate->softfpu.fpscr & (1 << 17))) {
- /* FPU error */
- denormal_to_double (&tsk->thread.xstate->softfpu,
- (finsn >> 8) & 0xf);
- tsk->thread.xstate->softfpu.fpscr &=
- ~(FPSCR_CAUSE_MASK | FPSCR_FLAG_MASK);
- task_thread_info(tsk)->status |= TS_USEDFPU;
- } else {
- force_sig_fault(SIGFPE, FPE_FLTINV,
- (void __user *)regs->pc);
- }
-
- regs->pc = nextpc;
- return 1;
- }
-
- return 0;
-}
-
-/**
* fpu_init - Initialize FPU registers
* @fpu: Pointer to software emulated FPU registers.
*/
diff --git a/arch/sh/mm/nommu.c b/arch/sh/mm/nommu.c
index 8b4504413c5f..78c4b6e6d33b 100644
--- a/arch/sh/mm/nommu.c
+++ b/arch/sh/mm/nommu.c
@@ -28,9 +28,9 @@ __kernel_size_t __copy_user(void *to, const void *from, __kernel_size_t n)
return 0;
}
-__kernel_size_t __clear_user(void *to, __kernel_size_t n)
+__kernel_size_t __clear_user(void __user *to, __kernel_size_t n)
{
- memset(to, 0, n);
+ memset((__force void *)to, 0, n);
return 0;
}
diff --git a/arch/sparc/kernel/signal_32.c b/arch/sparc/kernel/signal_32.c
index cd677bc564a7..ffab16369bea 100644
--- a/arch/sparc/kernel/signal_32.c
+++ b/arch/sparc/kernel/signal_32.c
@@ -244,7 +244,7 @@ static int setup_frame(struct ksignal *ksig, struct pt_regs *regs,
get_sigframe(ksig, regs, sigframe_size);
if (invalid_frame_pointer(sf, sigframe_size)) {
- force_fatal_sig(SIGILL);
+ force_exit_sig(SIGILL);
return -EINVAL;
}
@@ -336,7 +336,7 @@ static int setup_rt_frame(struct ksignal *ksig, struct pt_regs *regs,
sf = (struct rt_signal_frame __user *)
get_sigframe(ksig, regs, sigframe_size);
if (invalid_frame_pointer(sf, sigframe_size)) {
- force_fatal_sig(SIGILL);
+ force_exit_sig(SIGILL);
return -EINVAL;
}
diff --git a/arch/sparc/kernel/syscalls/syscall.tbl b/arch/sparc/kernel/syscalls/syscall.tbl
index c37764dc764d..46adabcb1720 100644
--- a/arch/sparc/kernel/syscalls/syscall.tbl
+++ b/arch/sparc/kernel/syscalls/syscall.tbl
@@ -494,3 +494,4 @@
446 common landlock_restrict_self sys_landlock_restrict_self
# 447 reserved for memfd_secret
448 common process_mrelease sys_process_mrelease
+449 common futex_waitv sys_futex_waitv
diff --git a/arch/sparc/kernel/windows.c b/arch/sparc/kernel/windows.c
index bbbd40cc6b28..8f20862ccc83 100644
--- a/arch/sparc/kernel/windows.c
+++ b/arch/sparc/kernel/windows.c
@@ -122,7 +122,7 @@ void try_to_clear_window_buffer(struct pt_regs *regs, int who)
if ((sp & 7) ||
copy_to_user((char __user *) sp, &tp->reg_window[window],
sizeof(struct reg_window32))) {
- force_fatal_sig(SIGILL);
+ force_exit_sig(SIGILL);
return;
}
}
diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index 95dd1ee01546..7399327d1eff 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -193,7 +193,7 @@ config X86
select HAVE_DYNAMIC_FTRACE_WITH_ARGS if X86_64
select HAVE_DYNAMIC_FTRACE_WITH_DIRECT_CALLS
select HAVE_SAMPLE_FTRACE_DIRECT if X86_64
- select HAVE_SAMPLE_FTRACE_MULTI_DIRECT if X86_64
+ select HAVE_SAMPLE_FTRACE_DIRECT_MULTI if X86_64
select HAVE_EBPF_JIT
select HAVE_EFFICIENT_UNALIGNED_ACCESS
select HAVE_EISA
diff --git a/arch/x86/entry/vsyscall/vsyscall_64.c b/arch/x86/entry/vsyscall/vsyscall_64.c
index 0b6b277ee050..fd2ee9408e91 100644
--- a/arch/x86/entry/vsyscall/vsyscall_64.c
+++ b/arch/x86/entry/vsyscall/vsyscall_64.c
@@ -226,7 +226,7 @@ bool emulate_vsyscall(unsigned long error_code,
if ((!tmp && regs->orig_ax != syscall_nr) || regs->ip != address) {
warn_bad_vsyscall(KERN_DEBUG, regs,
"seccomp tried to change syscall nr or ip");
- force_fatal_sig(SIGSYS);
+ force_exit_sig(SIGSYS);
return true;
}
regs->orig_ax = -1;
diff --git a/arch/x86/events/intel/core.c b/arch/x86/events/intel/core.c
index 603964408d2d..ec6444f2c9dc 100644
--- a/arch/x86/events/intel/core.c
+++ b/arch/x86/events/intel/core.c
@@ -2211,7 +2211,6 @@ intel_pmu_snapshot_branch_stack(struct perf_branch_entry *entries, unsigned int
/* must not have branches... */
local_irq_save(flags);
__intel_pmu_disable_all(false); /* we don't care about BTS */
- __intel_pmu_pebs_disable_all();
__intel_pmu_lbr_disable();
/* ... until here */
return __intel_pmu_snapshot_branch_stack(entries, cnt, flags);
@@ -2225,7 +2224,6 @@ intel_pmu_snapshot_arch_branch_stack(struct perf_branch_entry *entries, unsigned
/* must not have branches... */
local_irq_save(flags);
__intel_pmu_disable_all(false); /* we don't care about BTS */
- __intel_pmu_pebs_disable_all();
__intel_pmu_arch_lbr_disable();
/* ... until here */
return __intel_pmu_snapshot_branch_stack(entries, cnt, flags);
@@ -3048,8 +3046,10 @@ intel_vlbr_constraints(struct perf_event *event)
{
struct event_constraint *c = &vlbr_constraint;
- if (unlikely(constraint_match(c, event->hw.config)))
+ if (unlikely(constraint_match(c, event->hw.config))) {
+ event->hw.flags |= c->flags;
return c;
+ }
return NULL;
}
diff --git a/arch/x86/events/intel/lbr.c b/arch/x86/events/intel/lbr.c
index 6b72e9b55c69..8043213b75a5 100644
--- a/arch/x86/events/intel/lbr.c
+++ b/arch/x86/events/intel/lbr.c
@@ -265,6 +265,8 @@ void intel_pmu_lbr_reset(void)
cpuc->last_task_ctx = NULL;
cpuc->last_log_id = 0;
+ if (!static_cpu_has(X86_FEATURE_ARCH_LBR) && cpuc->lbr_select)
+ wrmsrl(MSR_LBR_SELECT, 0);
}
/*
diff --git a/arch/x86/events/intel/uncore_snbep.c b/arch/x86/events/intel/uncore_snbep.c
index eb2c6cea9d0d..3660f698fb2a 100644
--- a/arch/x86/events/intel/uncore_snbep.c
+++ b/arch/x86/events/intel/uncore_snbep.c
@@ -3608,6 +3608,9 @@ static int skx_cha_hw_config(struct intel_uncore_box *box, struct perf_event *ev
struct hw_perf_event_extra *reg1 = &event->hw.extra_reg;
struct extra_reg *er;
int idx = 0;
+ /* Any of the CHA events may be filtered by Thread/Core-ID.*/
+ if (event->hw.config & SNBEP_CBO_PMON_CTL_TID_EN)
+ idx = SKX_CHA_MSR_PMON_BOX_FILTER_TID;
for (er = skx_uncore_cha_extra_regs; er->msr; er++) {
if (er->event != (event->hw.config & er->config_mask))
@@ -3675,6 +3678,7 @@ static struct event_constraint skx_uncore_iio_constraints[] = {
UNCORE_EVENT_CONSTRAINT(0xc0, 0xc),
UNCORE_EVENT_CONSTRAINT(0xc5, 0xc),
UNCORE_EVENT_CONSTRAINT(0xd4, 0xc),
+ UNCORE_EVENT_CONSTRAINT(0xd5, 0xc),
EVENT_CONSTRAINT_END
};
@@ -4525,6 +4529,13 @@ static void snr_iio_cleanup_mapping(struct intel_uncore_type *type)
pmu_iio_cleanup_mapping(type, &snr_iio_mapping_group);
}
+static struct event_constraint snr_uncore_iio_constraints[] = {
+ UNCORE_EVENT_CONSTRAINT(0x83, 0x3),
+ UNCORE_EVENT_CONSTRAINT(0xc0, 0xc),
+ UNCORE_EVENT_CONSTRAINT(0xd5, 0xc),
+ EVENT_CONSTRAINT_END
+};
+
static struct intel_uncore_type snr_uncore_iio = {
.name = "iio",
.num_counters = 4,
@@ -4536,6 +4547,7 @@ static struct intel_uncore_type snr_uncore_iio = {
.event_mask_ext = SNR_IIO_PMON_RAW_EVENT_MASK_EXT,
.box_ctl = SNR_IIO_MSR_PMON_BOX_CTL,
.msr_offset = SNR_IIO_MSR_OFFSET,
+ .constraints = snr_uncore_iio_constraints,
.ops = &ivbep_uncore_msr_ops,
.format_group = &snr_uncore_iio_format_group,
.attr_update = snr_iio_attr_update,
diff --git a/arch/x86/hyperv/hv_init.c b/arch/x86/hyperv/hv_init.c
index 24f4a06ac46a..96eb7db31c8e 100644
--- a/arch/x86/hyperv/hv_init.c
+++ b/arch/x86/hyperv/hv_init.c
@@ -177,6 +177,9 @@ void set_hv_tscchange_cb(void (*cb)(void))
return;
}
+ if (!hv_vp_index)
+ return;
+
hv_reenlightenment_cb = cb;
/* Make sure callback is registered before we write to MSRs */
@@ -383,20 +386,13 @@ static void __init hv_get_partition_id(void)
*/
void __init hyperv_init(void)
{
- u64 guest_id, required_msrs;
+ u64 guest_id;
union hv_x64_msr_hypercall_contents hypercall_msr;
int cpuhp;
if (x86_hyper_type != X86_HYPER_MS_HYPERV)
return;
- /* Absolutely required MSRs */
- required_msrs = HV_MSR_HYPERCALL_AVAILABLE |
- HV_MSR_VP_INDEX_AVAILABLE;
-
- if ((ms_hyperv.features & required_msrs) != required_msrs)
- return;
-
if (hv_common_init())
return;
diff --git a/arch/x86/include/asm/fpu/api.h b/arch/x86/include/asm/fpu/api.h
index 6053674f9132..c2767a6a387e 100644
--- a/arch/x86/include/asm/fpu/api.h
+++ b/arch/x86/include/asm/fpu/api.h
@@ -102,12 +102,6 @@ extern void switch_fpu_return(void);
*/
extern int cpu_has_xfeatures(u64 xfeatures_mask, const char **feature_name);
-/*
- * Tasks that are not using SVA have mm->pasid set to zero to note that they
- * will not have the valid bit set in MSR_IA32_PASID while they are running.
- */
-#define PASID_DISABLED 0
-
/* Trap handling */
extern int fpu__exception_code(struct fpu *fpu, int trap_nr);
extern void fpu_sync_fpstate(struct fpu *fpu);
diff --git a/arch/x86/include/asm/fpu/xcr.h b/arch/x86/include/asm/fpu/xcr.h
index 79f95d3787e2..9656a5bc6fea 100644
--- a/arch/x86/include/asm/fpu/xcr.h
+++ b/arch/x86/include/asm/fpu/xcr.h
@@ -3,6 +3,7 @@
#define _ASM_X86_FPU_XCR_H
#define XCR_XFEATURE_ENABLED_MASK 0x00000000
+#define XCR_XFEATURE_IN_USE_MASK 0x00000001
static inline u64 xgetbv(u32 index)
{
@@ -20,4 +21,15 @@ static inline void xsetbv(u32 index, u64 value)
asm volatile("xsetbv" :: "a" (eax), "d" (edx), "c" (index));
}
+/*
+ * Return a mask of xfeatures which are currently being tracked
+ * by the processor as being in the initial configuration.
+ *
+ * Callers should check X86_FEATURE_XGETBV1.
+ */
+static inline u64 xfeatures_in_use(void)
+{
+ return xgetbv(XCR_XFEATURE_IN_USE_MASK);
+}
+
#endif /* _ASM_X86_FPU_XCR_H */
diff --git a/arch/x86/include/asm/fpu/xstate.h b/arch/x86/include/asm/fpu/xstate.h
index 0f8b90ab18c9..cd3dd170e23a 100644
--- a/arch/x86/include/asm/fpu/xstate.h
+++ b/arch/x86/include/asm/fpu/xstate.h
@@ -92,6 +92,13 @@
#define XFEATURE_MASK_FPSTATE (XFEATURE_MASK_USER_RESTORE | \
XFEATURE_MASK_SUPERVISOR_SUPPORTED)
+/*
+ * Features in this mask have space allocated in the signal frame, but may not
+ * have that space initialized when the feature is in its init state.
+ */
+#define XFEATURE_MASK_SIGFRAME_INITOPT (XFEATURE_MASK_XTILE | \
+ XFEATURE_MASK_USER_DYNAMIC)
+
extern u64 xstate_fx_sw_bytes[USER_XSTATE_FX_SW_WORDS];
extern void __init update_regset_xstate_info(unsigned int size,
diff --git a/arch/x86/include/asm/intel-family.h b/arch/x86/include/asm/intel-family.h
index 27158436f322..5a0bcf8b78d7 100644
--- a/arch/x86/include/asm/intel-family.h
+++ b/arch/x86/include/asm/intel-family.h
@@ -108,6 +108,8 @@
#define INTEL_FAM6_ALDERLAKE 0x97 /* Golden Cove / Gracemont */
#define INTEL_FAM6_ALDERLAKE_L 0x9A /* Golden Cove / Gracemont */
+#define INTEL_FAM6_RAPTOR_LAKE 0xB7
+
/* "Small Core" Processors (Atom) */
#define INTEL_FAM6_ATOM_BONNELL 0x1C /* Diamondville, Pineview */
diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index 2acf37cc1991..6ac61f85e07b 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -38,7 +38,6 @@
#define __KVM_HAVE_ARCH_VCPU_DEBUGFS
#define KVM_MAX_VCPUS 1024
-#define KVM_SOFT_MAX_VCPUS 710
/*
* In x86, the VCPU ID corresponds to the APIC ID, and APIC IDs
@@ -364,6 +363,7 @@ union kvm_mmu_extended_role {
unsigned int cr4_smap:1;
unsigned int cr4_smep:1;
unsigned int cr4_la57:1;
+ unsigned int efer_lma:1;
};
};
@@ -725,6 +725,7 @@ struct kvm_vcpu_arch {
int cpuid_nent;
struct kvm_cpuid_entry2 *cpuid_entries;
+ u32 kvm_cpuid_base;
u64 reserved_gpa_bits;
int maxphyaddr;
@@ -748,7 +749,7 @@ struct kvm_vcpu_arch {
u8 preempted;
u64 msr_val;
u64 last_steal;
- struct gfn_to_pfn_cache cache;
+ struct gfn_to_hva_cache cache;
} st;
u64 l1_tsc_offset;
@@ -1034,6 +1035,7 @@ struct kvm_x86_msr_filter {
#define APICV_INHIBIT_REASON_IRQWIN 3
#define APICV_INHIBIT_REASON_PIT_REINJ 4
#define APICV_INHIBIT_REASON_X2APIC 5
+#define APICV_INHIBIT_REASON_BLOCKIRQ 6
struct kvm_arch {
unsigned long n_used_mmu_pages;
@@ -1476,6 +1478,7 @@ struct kvm_x86_ops {
int (*mem_enc_reg_region)(struct kvm *kvm, struct kvm_enc_region *argp);
int (*mem_enc_unreg_region)(struct kvm *kvm, struct kvm_enc_region *argp);
int (*vm_copy_enc_context_from)(struct kvm *kvm, unsigned int source_fd);
+ int (*vm_move_enc_context_from)(struct kvm *kvm, unsigned int source_fd);
int (*get_msr_feature)(struct kvm_msr_entry *entry);
diff --git a/arch/x86/include/asm/kvm_para.h b/arch/x86/include/asm/kvm_para.h
index 69299878b200..56935ebb1dfe 100644
--- a/arch/x86/include/asm/kvm_para.h
+++ b/arch/x86/include/asm/kvm_para.h
@@ -83,6 +83,18 @@ static inline long kvm_hypercall4(unsigned int nr, unsigned long p1,
return ret;
}
+static inline long kvm_sev_hypercall3(unsigned int nr, unsigned long p1,
+ unsigned long p2, unsigned long p3)
+{
+ long ret;
+
+ asm volatile("vmmcall"
+ : "=a"(ret)
+ : "a"(nr), "b"(p1), "c"(p2), "d"(p3)
+ : "memory");
+ return ret;
+}
+
#ifdef CONFIG_KVM_GUEST
void kvmclock_init(void);
void kvmclock_disable(void);
diff --git a/arch/x86/include/asm/mem_encrypt.h b/arch/x86/include/asm/mem_encrypt.h
index 2d4f5c17d79c..e2c6f433ed10 100644
--- a/arch/x86/include/asm/mem_encrypt.h
+++ b/arch/x86/include/asm/mem_encrypt.h
@@ -44,6 +44,8 @@ void __init sme_enable(struct boot_params *bp);
int __init early_set_memory_decrypted(unsigned long vaddr, unsigned long size);
int __init early_set_memory_encrypted(unsigned long vaddr, unsigned long size);
+void __init early_set_mem_enc_dec_hypercall(unsigned long vaddr, int npages,
+ bool enc);
void __init mem_encrypt_free_decrypted_mem(void);
@@ -78,6 +80,8 @@ static inline int __init
early_set_memory_decrypted(unsigned long vaddr, unsigned long size) { return 0; }
static inline int __init
early_set_memory_encrypted(unsigned long vaddr, unsigned long size) { return 0; }
+static inline void __init
+early_set_mem_enc_dec_hypercall(unsigned long vaddr, int npages, bool enc) {}
static inline void mem_encrypt_free_decrypted_mem(void) { }
diff --git a/arch/x86/include/asm/paravirt.h b/arch/x86/include/asm/paravirt.h
index cebec95a7124..21c4a694ca11 100644
--- a/arch/x86/include/asm/paravirt.h
+++ b/arch/x86/include/asm/paravirt.h
@@ -97,6 +97,12 @@ static inline void paravirt_arch_exit_mmap(struct mm_struct *mm)
PVOP_VCALL1(mmu.exit_mmap, mm);
}
+static inline void notify_page_enc_status_changed(unsigned long pfn,
+ int npages, bool enc)
+{
+ PVOP_VCALL3(mmu.notify_page_enc_status_changed, pfn, npages, enc);
+}
+
#ifdef CONFIG_PARAVIRT_XXL
static inline void load_sp0(unsigned long sp0)
{
diff --git a/arch/x86/include/asm/paravirt_types.h b/arch/x86/include/asm/paravirt_types.h
index fc1151e77569..a69012e1903f 100644
--- a/arch/x86/include/asm/paravirt_types.h
+++ b/arch/x86/include/asm/paravirt_types.h
@@ -168,6 +168,7 @@ struct pv_mmu_ops {
/* Hook for intercepting the destruction of an mm_struct. */
void (*exit_mmap)(struct mm_struct *mm);
+ void (*notify_page_enc_status_changed)(unsigned long pfn, int npages, bool enc);
#ifdef CONFIG_PARAVIRT_XXL
struct paravirt_callee_save read_cr2;
diff --git a/arch/x86/include/asm/processor.h b/arch/x86/include/asm/processor.h
index 191878a65c61..355d38c0cf60 100644
--- a/arch/x86/include/asm/processor.h
+++ b/arch/x86/include/asm/processor.h
@@ -806,11 +806,14 @@ static inline u32 amd_get_nodes_per_socket(void) { return 0; }
static inline u32 amd_get_highest_perf(void) { return 0; }
#endif
+#define for_each_possible_hypervisor_cpuid_base(function) \
+ for (function = 0x40000000; function < 0x40010000; function += 0x100)
+
static inline uint32_t hypervisor_cpuid_base(const char *sig, uint32_t leaves)
{
uint32_t base, eax, signature[3];
- for (base = 0x40000000; base < 0x40010000; base += 0x100) {
+ for_each_possible_hypervisor_cpuid_base(base) {
cpuid(base, &eax, &signature[0], &signature[1], &signature[2]);
if (!memcmp(sig, signature, 12) &&
diff --git a/arch/x86/include/asm/set_memory.h b/arch/x86/include/asm/set_memory.h
index 43fa081a1adb..872617542bbc 100644
--- a/arch/x86/include/asm/set_memory.h
+++ b/arch/x86/include/asm/set_memory.h
@@ -83,6 +83,7 @@ int set_pages_rw(struct page *page, int numpages);
int set_direct_map_invalid_noflush(struct page *page);
int set_direct_map_default_noflush(struct page *page);
bool kernel_page_present(struct page *page);
+void notify_range_enc_status_changed(unsigned long vaddr, int npages, bool enc);
extern int kernel_set_to_readonly;
diff --git a/arch/x86/include/asm/smp.h b/arch/x86/include/asm/smp.h
index 08b0e90623ad..81a0211a372d 100644
--- a/arch/x86/include/asm/smp.h
+++ b/arch/x86/include/asm/smp.h
@@ -126,6 +126,7 @@ static inline void arch_send_call_function_ipi_mask(const struct cpumask *mask)
void cpu_disable_common(void);
void native_smp_prepare_boot_cpu(void);
+void smp_prepare_cpus_common(void);
void native_smp_prepare_cpus(unsigned int max_cpus);
void calculate_max_logical_packages(void);
void native_smp_cpus_done(unsigned int max_cpus);
diff --git a/arch/x86/include/asm/static_call.h b/arch/x86/include/asm/static_call.h
index cbb67b6030f9..39ebe0511869 100644
--- a/arch/x86/include/asm/static_call.h
+++ b/arch/x86/include/asm/static_call.h
@@ -27,6 +27,7 @@
".globl " STATIC_CALL_TRAMP_STR(name) " \n" \
STATIC_CALL_TRAMP_STR(name) ": \n" \
insns " \n" \
+ ".byte 0x53, 0x43, 0x54 \n" \
".type " STATIC_CALL_TRAMP_STR(name) ", @function \n" \
".size " STATIC_CALL_TRAMP_STR(name) ", . - " STATIC_CALL_TRAMP_STR(name) " \n" \
".popsection \n")
diff --git a/arch/x86/include/asm/xen/hypercall.h b/arch/x86/include/asm/xen/hypercall.h
index 0575f5863b7f..e5e0fe10c692 100644
--- a/arch/x86/include/asm/xen/hypercall.h
+++ b/arch/x86/include/asm/xen/hypercall.h
@@ -281,13 +281,13 @@ HYPERVISOR_callback_op(int cmd, void *arg)
return _hypercall2(int, callback_op, cmd, arg);
}
-static inline int
+static __always_inline int
HYPERVISOR_set_debugreg(int reg, unsigned long value)
{
return _hypercall2(int, set_debugreg, reg, value);
}
-static inline unsigned long
+static __always_inline unsigned long
HYPERVISOR_get_debugreg(int reg)
{
return _hypercall1(unsigned long, get_debugreg, reg);
diff --git a/arch/x86/include/asm/xen/hypervisor.h b/arch/x86/include/asm/xen/hypervisor.h
index 4957f59deb40..5adab895127e 100644
--- a/arch/x86/include/asm/xen/hypervisor.h
+++ b/arch/x86/include/asm/xen/hypervisor.h
@@ -64,6 +64,7 @@ void xen_arch_unregister_cpu(int num);
#ifdef CONFIG_PVH
void __init xen_pvh_init(struct boot_params *boot_params);
+void __init mem_map_via_hcall(struct boot_params *boot_params_p);
#endif
#endif /* _ASM_X86_XEN_HYPERVISOR_H */
diff --git a/arch/x86/include/uapi/asm/kvm_para.h b/arch/x86/include/uapi/asm/kvm_para.h
index 5146bbab84d4..6e64b27b2c1e 100644
--- a/arch/x86/include/uapi/asm/kvm_para.h
+++ b/arch/x86/include/uapi/asm/kvm_para.h
@@ -8,6 +8,7 @@
* should be used to determine that a VM is running under KVM.
*/
#define KVM_CPUID_SIGNATURE 0x40000000
+#define KVM_SIGNATURE "KVMKVMKVM\0\0\0"
/* This CPUID returns two feature bitmaps in eax, edx. Before enabling
* a particular paravirtualization, the appropriate feature bit should
diff --git a/arch/x86/kernel/cpu/cpuid-deps.c b/arch/x86/kernel/cpu/cpuid-deps.c
index cb2fdd130aae..c881bcafba7d 100644
--- a/arch/x86/kernel/cpu/cpuid-deps.c
+++ b/arch/x86/kernel/cpu/cpuid-deps.c
@@ -76,6 +76,7 @@ static const struct cpuid_dep cpuid_deps[] = {
{ X86_FEATURE_SGX1, X86_FEATURE_SGX },
{ X86_FEATURE_SGX2, X86_FEATURE_SGX1 },
{ X86_FEATURE_XFD, X86_FEATURE_XSAVES },
+ { X86_FEATURE_XFD, X86_FEATURE_XGETBV1 },
{ X86_FEATURE_AMX_TILE, X86_FEATURE_XFD },
{}
};
diff --git a/arch/x86/kernel/cpu/mce/intel.c b/arch/x86/kernel/cpu/mce/intel.c
index acfd5d9f93c6..bb9a46a804bf 100644
--- a/arch/x86/kernel/cpu/mce/intel.c
+++ b/arch/x86/kernel/cpu/mce/intel.c
@@ -547,12 +547,13 @@ bool intel_filter_mce(struct mce *m)
{
struct cpuinfo_x86 *c = &boot_cpu_data;
- /* MCE errata HSD131, HSM142, HSW131, BDM48, and HSM142 */
+ /* MCE errata HSD131, HSM142, HSW131, BDM48, HSM142 and SKX37 */
if ((c->x86 == 6) &&
((c->x86_model == INTEL_FAM6_HASWELL) ||
(c->x86_model == INTEL_FAM6_HASWELL_L) ||
(c->x86_model == INTEL_FAM6_BROADWELL) ||
- (c->x86_model == INTEL_FAM6_HASWELL_G)) &&
+ (c->x86_model == INTEL_FAM6_HASWELL_G) ||
+ (c->x86_model == INTEL_FAM6_SKYLAKE_X)) &&
(m->bank == 0) &&
((m->status & 0xa0000000ffffffff) == 0x80000000000f0005))
return true;
diff --git a/arch/x86/kernel/cpu/mshyperv.c b/arch/x86/kernel/cpu/mshyperv.c
index 4794b716ec79..ff55df60228f 100644
--- a/arch/x86/kernel/cpu/mshyperv.c
+++ b/arch/x86/kernel/cpu/mshyperv.c
@@ -163,12 +163,22 @@ static uint32_t __init ms_hyperv_platform(void)
cpuid(HYPERV_CPUID_VENDOR_AND_MAX_FUNCTIONS,
&eax, &hyp_signature[0], &hyp_signature[1], &hyp_signature[2]);
- if (eax >= HYPERV_CPUID_MIN &&
- eax <= HYPERV_CPUID_MAX &&
- !memcmp("Microsoft Hv", hyp_signature, 12))
- return HYPERV_CPUID_VENDOR_AND_MAX_FUNCTIONS;
+ if (eax < HYPERV_CPUID_MIN || eax > HYPERV_CPUID_MAX ||
+ memcmp("Microsoft Hv", hyp_signature, 12))
+ return 0;
- return 0;
+ /* HYPERCALL and VP_INDEX MSRs are mandatory for all features. */
+ eax = cpuid_eax(HYPERV_CPUID_FEATURES);
+ if (!(eax & HV_MSR_HYPERCALL_AVAILABLE)) {
+ pr_warn("x86/hyperv: HYPERCALL MSR not available.\n");
+ return 0;
+ }
+ if (!(eax & HV_MSR_VP_INDEX_AVAILABLE)) {
+ pr_warn("x86/hyperv: VP_INDEX MSR not available.\n");
+ return 0;
+ }
+
+ return HYPERV_CPUID_VENDOR_AND_MAX_FUNCTIONS;
}
static unsigned char hv_get_nmi_reason(void)
diff --git a/arch/x86/kernel/cpu/sgx/main.c b/arch/x86/kernel/cpu/sgx/main.c
index 63d3de02bbcc..8471a8b9b48e 100644
--- a/arch/x86/kernel/cpu/sgx/main.c
+++ b/arch/x86/kernel/cpu/sgx/main.c
@@ -28,8 +28,7 @@ static DECLARE_WAIT_QUEUE_HEAD(ksgxd_waitq);
static LIST_HEAD(sgx_active_page_list);
static DEFINE_SPINLOCK(sgx_reclaimer_lock);
-/* The free page list lock protected variables prepend the lock. */
-static unsigned long sgx_nr_free_pages;
+static atomic_long_t sgx_nr_free_pages = ATOMIC_LONG_INIT(0);
/* Nodes with one or more EPC sections. */
static nodemask_t sgx_numa_mask;
@@ -403,14 +402,15 @@ skip:
spin_lock(&node->lock);
list_add_tail(&epc_page->list, &node->free_page_list);
- sgx_nr_free_pages++;
spin_unlock(&node->lock);
+ atomic_long_inc(&sgx_nr_free_pages);
}
}
static bool sgx_should_reclaim(unsigned long watermark)
{
- return sgx_nr_free_pages < watermark && !list_empty(&sgx_active_page_list);
+ return atomic_long_read(&sgx_nr_free_pages) < watermark &&
+ !list_empty(&sgx_active_page_list);
}
static int ksgxd(void *p)
@@ -471,9 +471,9 @@ static struct sgx_epc_page *__sgx_alloc_epc_page_from_node(int nid)
page = list_first_entry(&node->free_page_list, struct sgx_epc_page, list);
list_del_init(&page->list);
- sgx_nr_free_pages--;
spin_unlock(&node->lock);
+ atomic_long_dec(&sgx_nr_free_pages);
return page;
}
@@ -625,9 +625,9 @@ void sgx_free_epc_page(struct sgx_epc_page *page)
spin_lock(&node->lock);
list_add_tail(&page->list, &node->free_page_list);
- sgx_nr_free_pages++;
spin_unlock(&node->lock);
+ atomic_long_inc(&sgx_nr_free_pages);
}
static bool __init sgx_setup_epc_section(u64 phys_addr, u64 size,
diff --git a/arch/x86/kernel/fpu/xstate.h b/arch/x86/kernel/fpu/xstate.h
index e18210dff88c..86ea7c0fa2f6 100644
--- a/arch/x86/kernel/fpu/xstate.h
+++ b/arch/x86/kernel/fpu/xstate.h
@@ -4,6 +4,7 @@
#include <asm/cpufeature.h>
#include <asm/fpu/xstate.h>
+#include <asm/fpu/xcr.h>
#ifdef CONFIG_X86_64
DECLARE_PER_CPU(u64, xfd_state);
@@ -199,6 +200,32 @@ static inline void os_xrstor_supervisor(struct fpstate *fpstate)
}
/*
+ * XSAVE itself always writes all requested xfeatures. Removing features
+ * from the request bitmap reduces the features which are written.
+ * Generate a mask of features which must be written to a sigframe. The
+ * unset features can be optimized away and not written.
+ *
+ * This optimization is user-visible. Only use for states where
+ * uninitialized sigframe contents are tolerable, like dynamic features.
+ *
+ * Users of buffers produced with this optimization must check XSTATE_BV
+ * to determine which features have been optimized out.
+ */
+static inline u64 xfeatures_need_sigframe_write(void)
+{
+ u64 xfeaures_to_write;
+
+ /* In-use features must be written: */
+ xfeaures_to_write = xfeatures_in_use();
+
+ /* Also write all non-optimizable sigframe features: */
+ xfeaures_to_write |= XFEATURE_MASK_USER_SUPPORTED &
+ ~XFEATURE_MASK_SIGFRAME_INITOPT;
+
+ return xfeaures_to_write;
+}
+
+/*
* Save xstate to user space xsave area.
*
* We don't use modified optimization because xrstor/xrstors might track
@@ -220,10 +247,16 @@ static inline int xsave_to_user_sigframe(struct xregs_state __user *buf)
*/
struct fpstate *fpstate = current->thread.fpu.fpstate;
u64 mask = fpstate->user_xfeatures;
- u32 lmask = mask;
- u32 hmask = mask >> 32;
+ u32 lmask;
+ u32 hmask;
int err;
+ /* Optimize away writing unnecessary xfeatures: */
+ if (fpu_state_size_dynamic())
+ mask &= xfeatures_need_sigframe_write();
+
+ lmask = mask;
+ hmask = mask >> 32;
xfd_validate_state(fpstate, mask, false);
stac();
diff --git a/arch/x86/kernel/kvm.c b/arch/x86/kernel/kvm.c
index 8863d1941f1b..59abbdad7729 100644
--- a/arch/x86/kernel/kvm.c
+++ b/arch/x86/kernel/kvm.c
@@ -28,6 +28,7 @@
#include <linux/swait.h>
#include <linux/syscore_ops.h>
#include <linux/cc_platform.h>
+#include <linux/efi.h>
#include <asm/timer.h>
#include <asm/cpu.h>
#include <asm/traps.h>
@@ -41,6 +42,7 @@
#include <asm/ptrace.h>
#include <asm/reboot.h>
#include <asm/svm.h>
+#include <asm/e820/api.h>
DEFINE_STATIC_KEY_FALSE(kvm_async_pf_enabled);
@@ -434,6 +436,8 @@ static void kvm_guest_cpu_offline(bool shutdown)
kvm_disable_steal_time();
if (kvm_para_has_feature(KVM_FEATURE_PV_EOI))
wrmsrl(MSR_KVM_PV_EOI_EN, 0);
+ if (kvm_para_has_feature(KVM_FEATURE_MIGRATION_CONTROL))
+ wrmsrl(MSR_KVM_MIGRATION_CONTROL, 0);
kvm_pv_disable_apf();
if (!shutdown)
apf_task_wake_all();
@@ -548,6 +552,55 @@ static void kvm_send_ipi_mask_allbutself(const struct cpumask *mask, int vector)
__send_ipi_mask(local_mask, vector);
}
+static int __init setup_efi_kvm_sev_migration(void)
+{
+ efi_char16_t efi_sev_live_migration_enabled[] = L"SevLiveMigrationEnabled";
+ efi_guid_t efi_variable_guid = AMD_SEV_MEM_ENCRYPT_GUID;
+ efi_status_t status;
+ unsigned long size;
+ bool enabled;
+
+ if (!cc_platform_has(CC_ATTR_GUEST_MEM_ENCRYPT) ||
+ !kvm_para_has_feature(KVM_FEATURE_MIGRATION_CONTROL))
+ return 0;
+
+ if (!efi_enabled(EFI_BOOT))
+ return 0;
+
+ if (!efi_enabled(EFI_RUNTIME_SERVICES)) {
+ pr_info("%s : EFI runtime services are not enabled\n", __func__);
+ return 0;
+ }
+
+ size = sizeof(enabled);
+
+ /* Get variable contents into buffer */
+ status = efi.get_variable(efi_sev_live_migration_enabled,
+ &efi_variable_guid, NULL, &size, &enabled);
+
+ if (status == EFI_NOT_FOUND) {
+ pr_info("%s : EFI live migration variable not found\n", __func__);
+ return 0;
+ }
+
+ if (status != EFI_SUCCESS) {
+ pr_info("%s : EFI variable retrieval failed\n", __func__);
+ return 0;
+ }
+
+ if (enabled == 0) {
+ pr_info("%s: live migration disabled in EFI\n", __func__);
+ return 0;
+ }
+
+ pr_info("%s : live migration enabled in EFI\n", __func__);
+ wrmsrl(MSR_KVM_MIGRATION_CONTROL, KVM_MIGRATION_READY);
+
+ return 1;
+}
+
+late_initcall(setup_efi_kvm_sev_migration);
+
/*
* Set the IPI entry points
*/
@@ -756,7 +809,7 @@ static noinline uint32_t __kvm_cpuid_base(void)
return 0; /* So we don't blow up on old processors */
if (boot_cpu_has(X86_FEATURE_HYPERVISOR))
- return hypervisor_cpuid_base("KVMKVMKVM\0\0\0", 0);
+ return hypervisor_cpuid_base(KVM_SIGNATURE, 0);
return 0;
}
@@ -806,8 +859,62 @@ static bool __init kvm_msi_ext_dest_id(void)
return kvm_para_has_feature(KVM_FEATURE_MSI_EXT_DEST_ID);
}
+static void kvm_sev_hc_page_enc_status(unsigned long pfn, int npages, bool enc)
+{
+ kvm_sev_hypercall3(KVM_HC_MAP_GPA_RANGE, pfn << PAGE_SHIFT, npages,
+ KVM_MAP_GPA_RANGE_ENC_STAT(enc) | KVM_MAP_GPA_RANGE_PAGE_SZ_4K);
+}
+
static void __init kvm_init_platform(void)
{
+ if (cc_platform_has(CC_ATTR_GUEST_MEM_ENCRYPT) &&
+ kvm_para_has_feature(KVM_FEATURE_MIGRATION_CONTROL)) {
+ unsigned long nr_pages;
+ int i;
+
+ pv_ops.mmu.notify_page_enc_status_changed =
+ kvm_sev_hc_page_enc_status;
+
+ /*
+ * Reset the host's shared pages list related to kernel
+ * specific page encryption status settings before we load a
+ * new kernel by kexec. Reset the page encryption status
+ * during early boot intead of just before kexec to avoid SMP
+ * races during kvm_pv_guest_cpu_reboot().
+ * NOTE: We cannot reset the complete shared pages list
+ * here as we need to retain the UEFI/OVMF firmware
+ * specific settings.
+ */
+
+ for (i = 0; i < e820_table->nr_entries; i++) {
+ struct e820_entry *entry = &e820_table->entries[i];
+
+ if (entry->type != E820_TYPE_RAM)
+ continue;
+
+ nr_pages = DIV_ROUND_UP(entry->size, PAGE_SIZE);
+
+ kvm_sev_hypercall3(KVM_HC_MAP_GPA_RANGE, entry->addr,
+ nr_pages,
+ KVM_MAP_GPA_RANGE_ENCRYPTED | KVM_MAP_GPA_RANGE_PAGE_SZ_4K);
+ }
+
+ /*
+ * Ensure that _bss_decrypted section is marked as decrypted in the
+ * shared pages list.
+ */
+ nr_pages = DIV_ROUND_UP(__end_bss_decrypted - __start_bss_decrypted,
+ PAGE_SIZE);
+ early_set_mem_enc_dec_hypercall((unsigned long)__start_bss_decrypted,
+ nr_pages, 0);
+
+ /*
+ * If not booted using EFI, enable Live migration support.
+ */
+ if (!efi_enabled(EFI_BOOT))
+ wrmsrl(MSR_KVM_MIGRATION_CONTROL,
+ KVM_MIGRATION_READY);
+ }
kvmclock_init();
x86_platform.apic_post_init = kvm_apic_init;
}
diff --git a/arch/x86/kernel/paravirt.c b/arch/x86/kernel/paravirt.c
index 7157c2df3bc2..7f7636aac620 100644
--- a/arch/x86/kernel/paravirt.c
+++ b/arch/x86/kernel/paravirt.c
@@ -337,6 +337,7 @@ struct paravirt_patch_template pv_ops = {
(void (*)(struct mmu_gather *, void *))tlb_remove_page,
.mmu.exit_mmap = paravirt_nop,
+ .mmu.notify_page_enc_status_changed = paravirt_nop,
#ifdef CONFIG_PARAVIRT_XXL
.mmu.read_cr2 = __PV_IS_CALLEE_SAVE(pv_native_read_cr2),
diff --git a/arch/x86/kernel/process.c b/arch/x86/kernel/process.c
index e9ee8b526319..04143a653a8a 100644
--- a/arch/x86/kernel/process.c
+++ b/arch/x86/kernel/process.c
@@ -964,6 +964,9 @@ unsigned long __get_wchan(struct task_struct *p)
struct unwind_state state;
unsigned long addr = 0;
+ if (!try_get_task_stack(p))
+ return 0;
+
for (unwind_start(&state, p, NULL, NULL); !unwind_done(&state);
unwind_next_frame(&state)) {
addr = unwind_get_return_address(&state);
@@ -974,6 +977,8 @@ unsigned long __get_wchan(struct task_struct *p)
break;
}
+ put_task_stack(p);
+
return addr;
}
diff --git a/arch/x86/kernel/setup.c b/arch/x86/kernel/setup.c
index 49b596db5631..6a190c7f4d71 100644
--- a/arch/x86/kernel/setup.c
+++ b/arch/x86/kernel/setup.c
@@ -742,6 +742,28 @@ dump_kernel_offset(struct notifier_block *self, unsigned long v, void *p)
return 0;
}
+static char * __init prepare_command_line(void)
+{
+#ifdef CONFIG_CMDLINE_BOOL
+#ifdef CONFIG_CMDLINE_OVERRIDE
+ strlcpy(boot_command_line, builtin_cmdline, COMMAND_LINE_SIZE);
+#else
+ if (builtin_cmdline[0]) {
+ /* append boot loader cmdline to builtin */
+ strlcat(builtin_cmdline, " ", COMMAND_LINE_SIZE);
+ strlcat(builtin_cmdline, boot_command_line, COMMAND_LINE_SIZE);
+ strlcpy(boot_command_line, builtin_cmdline, COMMAND_LINE_SIZE);
+ }
+#endif
+#endif
+
+ strlcpy(command_line, boot_command_line, COMMAND_LINE_SIZE);
+
+ parse_early_param();
+
+ return command_line;
+}
+
/*
* Determine if we were loaded by an EFI loader. If so, then we have also been
* passed the efi memmap, systab, etc., so we should use these data structures
@@ -831,6 +853,23 @@ void __init setup_arch(char **cmdline_p)
x86_init.oem.arch_setup();
/*
+ * x86_configure_nx() is called before parse_early_param() (called by
+ * prepare_command_line()) to detect whether hardware doesn't support
+ * NX (so that the early EHCI debug console setup can safely call
+ * set_fixmap()). It may then be called again from within noexec_setup()
+ * during parsing early parameters to honor the respective command line
+ * option.
+ */
+ x86_configure_nx();
+
+ /*
+ * This parses early params and it needs to run before
+ * early_reserve_memory() because latter relies on such settings
+ * supplied as early params.
+ */
+ *cmdline_p = prepare_command_line();
+
+ /*
* Do some memory reservations *before* memory is added to memblock, so
* memblock allocations won't overwrite it.
*
@@ -863,33 +902,6 @@ void __init setup_arch(char **cmdline_p)
bss_resource.start = __pa_symbol(__bss_start);
bss_resource.end = __pa_symbol(__bss_stop)-1;
-#ifdef CONFIG_CMDLINE_BOOL
-#ifdef CONFIG_CMDLINE_OVERRIDE
- strlcpy(boot_command_line, builtin_cmdline, COMMAND_LINE_SIZE);
-#else
- if (builtin_cmdline[0]) {
- /* append boot loader cmdline to builtin */
- strlcat(builtin_cmdline, " ", COMMAND_LINE_SIZE);
- strlcat(builtin_cmdline, boot_command_line, COMMAND_LINE_SIZE);
- strlcpy(boot_command_line, builtin_cmdline, COMMAND_LINE_SIZE);
- }
-#endif
-#endif
-
- strlcpy(command_line, boot_command_line, COMMAND_LINE_SIZE);
- *cmdline_p = command_line;
-
- /*
- * x86_configure_nx() is called before parse_early_param() to detect
- * whether hardware doesn't support NX (so that the early EHCI debug
- * console setup can safely call set_fixmap()). It may then be called
- * again from within noexec_setup() during parsing early parameters
- * to honor the respective command line option.
- */
- x86_configure_nx();
-
- parse_early_param();
-
#ifdef CONFIG_MEMORY_HOTPLUG
/*
* Memory used by the kernel cannot be hot-removed because Linux
diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c
index 8241927addff..ac2909f0cab3 100644
--- a/arch/x86/kernel/smpboot.c
+++ b/arch/x86/kernel/smpboot.c
@@ -1350,12 +1350,7 @@ static void __init smp_get_logical_apicid(void)
cpu0_logical_apicid = GET_APIC_LOGICAL_ID(apic_read(APIC_LDR));
}
-/*
- * Prepare for SMP bootup.
- * @max_cpus: configured maximum number of CPUs, It is a legacy parameter
- * for common interface support.
- */
-void __init native_smp_prepare_cpus(unsigned int max_cpus)
+void __init smp_prepare_cpus_common(void)
{
unsigned int i;
@@ -1386,6 +1381,17 @@ void __init native_smp_prepare_cpus(unsigned int max_cpus)
set_sched_topology(x86_topology);
set_cpu_sibling_map(0);
+}
+
+/*
+ * Prepare for SMP bootup.
+ * @max_cpus: configured maximum number of CPUs, It is a legacy parameter
+ * for common interface support.
+ */
+void __init native_smp_prepare_cpus(unsigned int max_cpus)
+{
+ smp_prepare_cpus_common();
+
init_freq_invariance(false, false);
smp_sanity_check();
diff --git a/arch/x86/kernel/static_call.c b/arch/x86/kernel/static_call.c
index ea028e736831..9c407a33a774 100644
--- a/arch/x86/kernel/static_call.c
+++ b/arch/x86/kernel/static_call.c
@@ -56,10 +56,15 @@ static void __ref __static_call_transform(void *insn, enum insn_type type, void
text_poke_bp(insn, code, size, emulate);
}
-static void __static_call_validate(void *insn, bool tail)
+static void __static_call_validate(void *insn, bool tail, bool tramp)
{
u8 opcode = *(u8 *)insn;
+ if (tramp && memcmp(insn+5, "SCT", 3)) {
+ pr_err("trampoline signature fail");
+ BUG();
+ }
+
if (tail) {
if (opcode == JMP32_INSN_OPCODE ||
opcode == RET_INSN_OPCODE)
@@ -74,7 +79,8 @@ static void __static_call_validate(void *insn, bool tail)
/*
* If we ever trigger this, our text is corrupt, we'll probably not live long.
*/
- WARN_ONCE(1, "unexpected static_call insn opcode 0x%x at %pS\n", opcode, insn);
+ pr_err("unexpected static_call insn opcode 0x%x at %pS\n", opcode, insn);
+ BUG();
}
static inline enum insn_type __sc_insn(bool null, bool tail)
@@ -97,12 +103,12 @@ void arch_static_call_transform(void *site, void *tramp, void *func, bool tail)
mutex_lock(&text_mutex);
if (tramp) {
- __static_call_validate(tramp, true);
+ __static_call_validate(tramp, true, true);
__static_call_transform(tramp, __sc_insn(!func, true), func);
}
if (IS_ENABLED(CONFIG_HAVE_STATIC_CALL_INLINE) && site) {
- __static_call_validate(site, tail);
+ __static_call_validate(site, tail, false);
__static_call_transform(site, __sc_insn(!func, tail), func);
}
diff --git a/arch/x86/kernel/vm86_32.c b/arch/x86/kernel/vm86_32.c
index f14f69d7aa3c..c21bcd668284 100644
--- a/arch/x86/kernel/vm86_32.c
+++ b/arch/x86/kernel/vm86_32.c
@@ -106,7 +106,7 @@ void save_v86_state(struct kernel_vm86_regs *regs, int retval)
*/
local_irq_enable();
- BUG_ON(!vm86 || !vm86->user_vm86);
+ BUG_ON(!vm86);
set_flags(regs->pt.flags, VEFLAGS, X86_EFLAGS_VIF | vm86->veflags_mask);
user = vm86->user_vm86;
@@ -160,7 +160,7 @@ Efault_end:
user_access_end();
Efault:
pr_alert("could not access userspace vm86 info\n");
- force_fatal_sig(SIGSEGV);
+ force_exit_sig(SIGSEGV);
goto exit_vm86;
}
diff --git a/arch/x86/kvm/cpuid.c b/arch/x86/kvm/cpuid.c
index 2d70edb0f323..07e9215e911d 100644
--- a/arch/x86/kvm/cpuid.c
+++ b/arch/x86/kvm/cpuid.c
@@ -99,11 +99,45 @@ static int kvm_check_cpuid(struct kvm_cpuid_entry2 *entries, int nent)
return 0;
}
-void kvm_update_pv_runtime(struct kvm_vcpu *vcpu)
+static void kvm_update_kvm_cpuid_base(struct kvm_vcpu *vcpu)
{
- struct kvm_cpuid_entry2 *best;
+ u32 function;
+ struct kvm_cpuid_entry2 *entry;
+
+ vcpu->arch.kvm_cpuid_base = 0;
+
+ for_each_possible_hypervisor_cpuid_base(function) {
+ entry = kvm_find_cpuid_entry(vcpu, function, 0);
- best = kvm_find_cpuid_entry(vcpu, KVM_CPUID_FEATURES, 0);
+ if (entry) {
+ u32 signature[3];
+
+ signature[0] = entry->ebx;
+ signature[1] = entry->ecx;
+ signature[2] = entry->edx;
+
+ BUILD_BUG_ON(sizeof(signature) > sizeof(KVM_SIGNATURE));
+ if (!memcmp(signature, KVM_SIGNATURE, sizeof(signature))) {
+ vcpu->arch.kvm_cpuid_base = function;
+ break;
+ }
+ }
+ }
+}
+
+static struct kvm_cpuid_entry2 *kvm_find_kvm_cpuid_features(struct kvm_vcpu *vcpu)
+{
+ u32 base = vcpu->arch.kvm_cpuid_base;
+
+ if (!base)
+ return NULL;
+
+ return kvm_find_cpuid_entry(vcpu, base | KVM_CPUID_FEATURES, 0);
+}
+
+void kvm_update_pv_runtime(struct kvm_vcpu *vcpu)
+{
+ struct kvm_cpuid_entry2 *best = kvm_find_kvm_cpuid_features(vcpu);
/*
* save the feature bitmap to avoid cpuid lookup for every PV
@@ -142,7 +176,7 @@ void kvm_update_cpuid_runtime(struct kvm_vcpu *vcpu)
cpuid_entry_has(best, X86_FEATURE_XSAVEC)))
best->ebx = xstate_required_size(vcpu->arch.xcr0, true);
- best = kvm_find_cpuid_entry(vcpu, KVM_CPUID_FEATURES, 0);
+ best = kvm_find_kvm_cpuid_features(vcpu);
if (kvm_hlt_in_guest(vcpu->kvm) && best &&
(best->eax & (1 << KVM_FEATURE_PV_UNHALT)))
best->eax &= ~(1 << KVM_FEATURE_PV_UNHALT);
@@ -239,6 +273,26 @@ u64 kvm_vcpu_reserved_gpa_bits_raw(struct kvm_vcpu *vcpu)
return rsvd_bits(cpuid_maxphyaddr(vcpu), 63);
}
+static int kvm_set_cpuid(struct kvm_vcpu *vcpu, struct kvm_cpuid_entry2 *e2,
+ int nent)
+{
+ int r;
+
+ r = kvm_check_cpuid(e2, nent);
+ if (r)
+ return r;
+
+ kvfree(vcpu->arch.cpuid_entries);
+ vcpu->arch.cpuid_entries = e2;
+ vcpu->arch.cpuid_nent = nent;
+
+ kvm_update_kvm_cpuid_base(vcpu);
+ kvm_update_cpuid_runtime(vcpu);
+ kvm_vcpu_after_set_cpuid(vcpu);
+
+ return 0;
+}
+
/* when an old userspace process fills a new kernel module */
int kvm_vcpu_ioctl_set_cpuid(struct kvm_vcpu *vcpu,
struct kvm_cpuid *cpuid,
@@ -275,18 +329,9 @@ int kvm_vcpu_ioctl_set_cpuid(struct kvm_vcpu *vcpu,
e2[i].padding[2] = 0;
}
- r = kvm_check_cpuid(e2, cpuid->nent);
- if (r) {
+ r = kvm_set_cpuid(vcpu, e2, cpuid->nent);
+ if (r)
kvfree(e2);
- goto out_free_cpuid;
- }
-
- kvfree(vcpu->arch.cpuid_entries);
- vcpu->arch.cpuid_entries = e2;
- vcpu->arch.cpuid_nent = cpuid->nent;
-
- kvm_update_cpuid_runtime(vcpu);
- kvm_vcpu_after_set_cpuid(vcpu);
out_free_cpuid:
kvfree(e);
@@ -310,20 +355,11 @@ int kvm_vcpu_ioctl_set_cpuid2(struct kvm_vcpu *vcpu,
return PTR_ERR(e2);
}
- r = kvm_check_cpuid(e2, cpuid->nent);
- if (r) {
+ r = kvm_set_cpuid(vcpu, e2, cpuid->nent);
+ if (r)
kvfree(e2);
- return r;
- }
- kvfree(vcpu->arch.cpuid_entries);
- vcpu->arch.cpuid_entries = e2;
- vcpu->arch.cpuid_nent = cpuid->nent;
-
- kvm_update_cpuid_runtime(vcpu);
- kvm_vcpu_after_set_cpuid(vcpu);
-
- return 0;
+ return r;
}
int kvm_vcpu_ioctl_get_cpuid2(struct kvm_vcpu *vcpu,
@@ -871,8 +907,7 @@ static inline int __do_cpuid_func(struct kvm_cpuid_array *array, u32 function)
}
break;
case KVM_CPUID_SIGNATURE: {
- static const char signature[12] = "KVMKVMKVM\0\0";
- const u32 *sigptr = (const u32 *)signature;
+ const u32 *sigptr = (const u32 *)KVM_SIGNATURE;
entry->eax = KVM_CPUID_FEATURES;
entry->ebx = sigptr[0];
entry->ecx = sigptr[1];
diff --git a/arch/x86/kvm/hyperv.c b/arch/x86/kvm/hyperv.c
index 4f15c0165c05..5e19e6e4c2ce 100644
--- a/arch/x86/kvm/hyperv.c
+++ b/arch/x86/kvm/hyperv.c
@@ -1472,7 +1472,7 @@ static int kvm_hv_set_msr(struct kvm_vcpu *vcpu, u32 msr, u64 data, bool host)
if (!(data & HV_X64_MSR_VP_ASSIST_PAGE_ENABLE)) {
hv_vcpu->hv_vapic = data;
- if (kvm_lapic_enable_pv_eoi(vcpu, 0, 0))
+ if (kvm_lapic_set_pv_eoi(vcpu, 0, 0))
return 1;
break;
}
@@ -1490,7 +1490,7 @@ static int kvm_hv_set_msr(struct kvm_vcpu *vcpu, u32 msr, u64 data, bool host)
return 1;
hv_vcpu->hv_vapic = data;
kvm_vcpu_mark_page_dirty(vcpu, gfn);
- if (kvm_lapic_enable_pv_eoi(vcpu,
+ if (kvm_lapic_set_pv_eoi(vcpu,
gfn_to_gpa(gfn) | KVM_MSR_ENABLED,
sizeof(struct hv_vp_assist_page)))
return 1;
@@ -2022,7 +2022,7 @@ static void kvm_hv_hypercall_set_result(struct kvm_vcpu *vcpu, u64 result)
{
bool longmode;
- longmode = is_64_bit_mode(vcpu);
+ longmode = is_64_bit_hypercall(vcpu);
if (longmode)
kvm_rax_write(vcpu, result);
else {
@@ -2171,7 +2171,7 @@ int kvm_hv_hypercall(struct kvm_vcpu *vcpu)
}
#ifdef CONFIG_X86_64
- if (is_64_bit_mode(vcpu)) {
+ if (is_64_bit_hypercall(vcpu)) {
hc.param = kvm_rcx_read(vcpu);
hc.ingpa = kvm_rdx_read(vcpu);
hc.outgpa = kvm_r8_read(vcpu);
diff --git a/arch/x86/kvm/ioapic.h b/arch/x86/kvm/ioapic.h
index e66e620c3bed..539333ac4b38 100644
--- a/arch/x86/kvm/ioapic.h
+++ b/arch/x86/kvm/ioapic.h
@@ -81,7 +81,6 @@ struct kvm_ioapic {
unsigned long irq_states[IOAPIC_NUM_PINS];
struct kvm_io_device dev;
struct kvm *kvm;
- void (*ack_notifier)(void *opaque, int irq);
spinlock_t lock;
struct rtc_status rtc_status;
struct delayed_work eoi_inject;
diff --git a/arch/x86/kvm/irq.h b/arch/x86/kvm/irq.h
index 650642b18d15..c2d7cfe82d00 100644
--- a/arch/x86/kvm/irq.h
+++ b/arch/x86/kvm/irq.h
@@ -56,7 +56,6 @@ struct kvm_pic {
struct kvm_io_device dev_master;
struct kvm_io_device dev_slave;
struct kvm_io_device dev_elcr;
- void (*ack_notifier)(void *opaque, int irq);
unsigned long irq_states[PIC_NUM_PINS];
};
diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c
index d6ac32f3f650..f206fc35deff 100644
--- a/arch/x86/kvm/lapic.c
+++ b/arch/x86/kvm/lapic.c
@@ -707,7 +707,7 @@ static void pv_eoi_clr_pending(struct kvm_vcpu *vcpu)
static int apic_has_interrupt_for_ppr(struct kvm_lapic *apic, u32 ppr)
{
int highest_irr;
- if (apic->vcpu->arch.apicv_active)
+ if (kvm_x86_ops.sync_pir_to_irr)
highest_irr = static_call(kvm_x86_sync_pir_to_irr)(apic->vcpu);
else
highest_irr = apic_find_highest_irr(apic);
@@ -2856,25 +2856,30 @@ int kvm_hv_vapic_msr_read(struct kvm_vcpu *vcpu, u32 reg, u64 *data)
return 0;
}
-int kvm_lapic_enable_pv_eoi(struct kvm_vcpu *vcpu, u64 data, unsigned long len)
+int kvm_lapic_set_pv_eoi(struct kvm_vcpu *vcpu, u64 data, unsigned long len)
{
u64 addr = data & ~KVM_MSR_ENABLED;
struct gfn_to_hva_cache *ghc = &vcpu->arch.pv_eoi.data;
unsigned long new_len;
+ int ret;
if (!IS_ALIGNED(addr, 4))
return 1;
- vcpu->arch.pv_eoi.msr_val = data;
- if (!pv_eoi_enabled(vcpu))
- return 0;
+ if (data & KVM_MSR_ENABLED) {
+ if (addr == ghc->gpa && len <= ghc->len)
+ new_len = ghc->len;
+ else
+ new_len = len;
- if (addr == ghc->gpa && len <= ghc->len)
- new_len = ghc->len;
- else
- new_len = len;
+ ret = kvm_gfn_to_hva_cache_init(vcpu->kvm, ghc, addr, new_len);
+ if (ret)
+ return ret;
+ }
+
+ vcpu->arch.pv_eoi.msr_val = data;
- return kvm_gfn_to_hva_cache_init(vcpu->kvm, ghc, addr, new_len);
+ return 0;
}
int kvm_apic_accept_events(struct kvm_vcpu *vcpu)
diff --git a/arch/x86/kvm/lapic.h b/arch/x86/kvm/lapic.h
index d7c25d0c1354..2b44e533fc8d 100644
--- a/arch/x86/kvm/lapic.h
+++ b/arch/x86/kvm/lapic.h
@@ -127,7 +127,7 @@ int kvm_x2apic_msr_read(struct kvm_vcpu *vcpu, u32 msr, u64 *data);
int kvm_hv_vapic_msr_write(struct kvm_vcpu *vcpu, u32 msr, u64 data);
int kvm_hv_vapic_msr_read(struct kvm_vcpu *vcpu, u32 msr, u64 *data);
-int kvm_lapic_enable_pv_eoi(struct kvm_vcpu *vcpu, u64 data, unsigned long len);
+int kvm_lapic_set_pv_eoi(struct kvm_vcpu *vcpu, u64 data, unsigned long len);
void kvm_lapic_exit(void);
#define VEC_POS(v) ((v) & (32 - 1))
diff --git a/arch/x86/kvm/mmu/mmu.c b/arch/x86/kvm/mmu/mmu.c
index 323b5057d08f..6354297e92ae 100644
--- a/arch/x86/kvm/mmu/mmu.c
+++ b/arch/x86/kvm/mmu/mmu.c
@@ -1582,7 +1582,7 @@ bool kvm_unmap_gfn_range(struct kvm *kvm, struct kvm_gfn_range *range)
flush = kvm_handle_gfn_range(kvm, range, kvm_unmap_rmapp);
if (is_tdp_mmu_enabled(kvm))
- flush |= kvm_tdp_mmu_unmap_gfn_range(kvm, range, flush);
+ flush = kvm_tdp_mmu_unmap_gfn_range(kvm, range, flush);
return flush;
}
@@ -2173,10 +2173,10 @@ static void shadow_walk_init_using_root(struct kvm_shadow_walk_iterator *iterato
iterator->shadow_addr = root;
iterator->level = vcpu->arch.mmu->shadow_root_level;
- if (iterator->level == PT64_ROOT_4LEVEL &&
+ if (iterator->level >= PT64_ROOT_4LEVEL &&
vcpu->arch.mmu->root_level < PT64_ROOT_4LEVEL &&
!vcpu->arch.mmu->direct_map)
- --iterator->level;
+ iterator->level = PT32E_ROOT_LEVEL;
if (iterator->level == PT32E_ROOT_LEVEL) {
/*
@@ -3191,17 +3191,17 @@ static int fast_page_fault(struct kvm_vcpu *vcpu, struct kvm_page_fault *fault)
new_spte |= PT_WRITABLE_MASK;
/*
- * Do not fix write-permission on the large spte. Since
- * we only dirty the first page into the dirty-bitmap in
+ * Do not fix write-permission on the large spte when
+ * dirty logging is enabled. Since we only dirty the
+ * first page into the dirty-bitmap in
* fast_pf_fix_direct_spte(), other pages are missed
* if its slot has dirty logging enabled.
*
* Instead, we let the slow page fault path create a
* normal spte to fix the access.
- *
- * See the comments in kvm_arch_commit_memory_region().
*/
- if (sp->role.level > PG_LEVEL_4K)
+ if (sp->role.level > PG_LEVEL_4K &&
+ kvm_slot_dirty_track_enabled(fault->slot))
break;
}
@@ -4682,6 +4682,7 @@ static union kvm_mmu_extended_role kvm_calc_mmu_role_ext(struct kvm_vcpu *vcpu,
/* PKEY and LA57 are active iff long mode is active. */
ext.cr4_pke = ____is_efer_lma(regs) && ____is_cr4_pke(regs);
ext.cr4_la57 = ____is_efer_lma(regs) && ____is_cr4_la57(regs);
+ ext.efer_lma = ____is_efer_lma(regs);
}
ext.valid = 1;
@@ -4854,7 +4855,7 @@ void kvm_init_shadow_npt_mmu(struct kvm_vcpu *vcpu, unsigned long cr0,
struct kvm_mmu *context = &vcpu->arch.guest_mmu;
struct kvm_mmu_role_regs regs = {
.cr0 = cr0,
- .cr4 = cr4,
+ .cr4 = cr4 & ~X86_CR4_PKE,
.efer = efer,
};
union kvm_mmu_role new_role;
@@ -4918,7 +4919,7 @@ void kvm_init_shadow_ept_mmu(struct kvm_vcpu *vcpu, bool execonly,
context->direct_map = false;
update_permission_bitmask(context, true);
- update_pkru_bitmask(context);
+ context->pkru_mask = 0;
reset_rsvds_bits_mask_ept(vcpu, context, execonly);
reset_ept_shadow_zero_bits_mask(vcpu, context, execonly);
}
@@ -5024,6 +5025,14 @@ void kvm_mmu_after_set_cpuid(struct kvm_vcpu *vcpu)
/*
* Invalidate all MMU roles to force them to reinitialize as CPUID
* information is factored into reserved bit calculations.
+ *
+ * Correctly handling multiple vCPU models with respect to paging and
+ * physical address properties) in a single VM would require tracking
+ * all relevant CPUID information in kvm_mmu_page_role. That is very
+ * undesirable as it would increase the memory requirements for
+ * gfn_track (see struct kvm_mmu_page_role comments). For now that
+ * problem is swept under the rug; KVM's CPUID API is horrific and
+ * it's all but impossible to solve it without introducing a new API.
*/
vcpu->arch.root_mmu.mmu_role.ext.valid = 0;
vcpu->arch.guest_mmu.mmu_role.ext.valid = 0;
@@ -5031,24 +5040,10 @@ void kvm_mmu_after_set_cpuid(struct kvm_vcpu *vcpu)
kvm_mmu_reset_context(vcpu);
/*
- * KVM does not correctly handle changing guest CPUID after KVM_RUN, as
- * MAXPHYADDR, GBPAGES support, AMD reserved bit behavior, etc.. aren't
- * tracked in kvm_mmu_page_role. As a result, KVM may miss guest page
- * faults due to reusing SPs/SPTEs. Alert userspace, but otherwise
- * sweep the problem under the rug.
- *
- * KVM's horrific CPUID ABI makes the problem all but impossible to
- * solve, as correctly handling multiple vCPU models (with respect to
- * paging and physical address properties) in a single VM would require
- * tracking all relevant CPUID information in kvm_mmu_page_role. That
- * is very undesirable as it would double the memory requirements for
- * gfn_track (see struct kvm_mmu_page_role comments), and in practice
- * no sane VMM mucks with the core vCPU model on the fly.
+ * Changing guest CPUID after KVM_RUN is forbidden, see the comment in
+ * kvm_arch_vcpu_ioctl().
*/
- if (vcpu->arch.last_vmentry_cpu != -1) {
- pr_warn_ratelimited("KVM: KVM_SET_CPUID{,2} after KVM_RUN may cause guest instability\n");
- pr_warn_ratelimited("KVM: KVM_SET_CPUID{,2} will fail after KVM_RUN starting with Linux 5.16\n");
- }
+ KVM_BUG_ON(vcpu->arch.last_vmentry_cpu != -1, vcpu->kvm);
}
void kvm_mmu_reset_context(struct kvm_vcpu *vcpu)
@@ -5368,7 +5363,7 @@ void kvm_mmu_invalidate_gva(struct kvm_vcpu *vcpu, struct kvm_mmu *mmu,
void kvm_mmu_invlpg(struct kvm_vcpu *vcpu, gva_t gva)
{
- kvm_mmu_invalidate_gva(vcpu, vcpu->arch.mmu, gva, INVALID_PAGE);
+ kvm_mmu_invalidate_gva(vcpu, vcpu->arch.walk_mmu, gva, INVALID_PAGE);
++vcpu->stat.invlpg;
}
EXPORT_SYMBOL_GPL(kvm_mmu_invlpg);
@@ -5853,8 +5848,6 @@ restart:
void kvm_mmu_zap_collapsible_sptes(struct kvm *kvm,
const struct kvm_memory_slot *slot)
{
- bool flush = false;
-
if (kvm_memslots_have_rmaps(kvm)) {
write_lock(&kvm->mmu_lock);
/*
@@ -5862,17 +5855,14 @@ void kvm_mmu_zap_collapsible_sptes(struct kvm *kvm,
* logging at a 4k granularity and never creates collapsible
* 2m SPTEs during dirty logging.
*/
- flush = slot_handle_level_4k(kvm, slot, kvm_mmu_zap_collapsible_spte, true);
- if (flush)
+ if (slot_handle_level_4k(kvm, slot, kvm_mmu_zap_collapsible_spte, true))
kvm_arch_flush_remote_tlbs_memslot(kvm, slot);
write_unlock(&kvm->mmu_lock);
}
if (is_tdp_mmu_enabled(kvm)) {
read_lock(&kvm->mmu_lock);
- flush = kvm_tdp_mmu_zap_collapsible_sptes(kvm, slot, flush);
- if (flush)
- kvm_arch_flush_remote_tlbs_memslot(kvm, slot);
+ kvm_tdp_mmu_zap_collapsible_sptes(kvm, slot);
read_unlock(&kvm->mmu_lock);
}
}
@@ -6181,23 +6171,46 @@ void kvm_mmu_module_exit(void)
mmu_audit_disable();
}
+/*
+ * Calculate the effective recovery period, accounting for '0' meaning "let KVM
+ * select a halving time of 1 hour". Returns true if recovery is enabled.
+ */
+static bool calc_nx_huge_pages_recovery_period(uint *period)
+{
+ /*
+ * Use READ_ONCE to get the params, this may be called outside of the
+ * param setters, e.g. by the kthread to compute its next timeout.
+ */
+ bool enabled = READ_ONCE(nx_huge_pages);
+ uint ratio = READ_ONCE(nx_huge_pages_recovery_ratio);
+
+ if (!enabled || !ratio)
+ return false;
+
+ *period = READ_ONCE(nx_huge_pages_recovery_period_ms);
+ if (!*period) {
+ /* Make sure the period is not less than one second. */
+ ratio = min(ratio, 3600u);
+ *period = 60 * 60 * 1000 / ratio;
+ }
+ return true;
+}
+
static int set_nx_huge_pages_recovery_param(const char *val, const struct kernel_param *kp)
{
bool was_recovery_enabled, is_recovery_enabled;
uint old_period, new_period;
int err;
- was_recovery_enabled = nx_huge_pages_recovery_ratio;
- old_period = nx_huge_pages_recovery_period_ms;
+ was_recovery_enabled = calc_nx_huge_pages_recovery_period(&old_period);
err = param_set_uint(val, kp);
if (err)
return err;
- is_recovery_enabled = nx_huge_pages_recovery_ratio;
- new_period = nx_huge_pages_recovery_period_ms;
+ is_recovery_enabled = calc_nx_huge_pages_recovery_period(&new_period);
- if (READ_ONCE(nx_huge_pages) && is_recovery_enabled &&
+ if (is_recovery_enabled &&
(!was_recovery_enabled || old_period > new_period)) {
struct kvm *kvm;
@@ -6261,18 +6274,13 @@ static void kvm_recover_nx_lpages(struct kvm *kvm)
static long get_nx_lpage_recovery_timeout(u64 start_time)
{
- uint ratio = READ_ONCE(nx_huge_pages_recovery_ratio);
- uint period = READ_ONCE(nx_huge_pages_recovery_period_ms);
+ bool enabled;
+ uint period;
- if (!period && ratio) {
- /* Make sure the period is not less than one second. */
- ratio = min(ratio, 3600u);
- period = 60 * 60 * 1000 / ratio;
- }
+ enabled = calc_nx_huge_pages_recovery_period(&period);
- return READ_ONCE(nx_huge_pages) && ratio
- ? start_time + msecs_to_jiffies(period) - get_jiffies_64()
- : MAX_SCHEDULE_TIMEOUT;
+ return enabled ? start_time + msecs_to_jiffies(period) - get_jiffies_64()
+ : MAX_SCHEDULE_TIMEOUT;
}
static int kvm_nx_lpage_recovery_worker(struct kvm *kvm, uintptr_t data)
diff --git a/arch/x86/kvm/mmu/tdp_mmu.c b/arch/x86/kvm/mmu/tdp_mmu.c
index 7c5dd83e52de..1db8496259ad 100644
--- a/arch/x86/kvm/mmu/tdp_mmu.c
+++ b/arch/x86/kvm/mmu/tdp_mmu.c
@@ -317,9 +317,6 @@ static void handle_removed_tdp_mmu_page(struct kvm *kvm, tdp_ptep_t pt,
struct kvm_mmu_page *sp = sptep_to_sp(rcu_dereference(pt));
int level = sp->role.level;
gfn_t base_gfn = sp->gfn;
- u64 old_child_spte;
- u64 *sptep;
- gfn_t gfn;
int i;
trace_kvm_mmu_prepare_zap_page(sp);
@@ -327,8 +324,9 @@ static void handle_removed_tdp_mmu_page(struct kvm *kvm, tdp_ptep_t pt,
tdp_mmu_unlink_page(kvm, sp, shared);
for (i = 0; i < PT64_ENT_PER_PAGE; i++) {
- sptep = rcu_dereference(pt) + i;
- gfn = base_gfn + i * KVM_PAGES_PER_HPAGE(level);
+ u64 *sptep = rcu_dereference(pt) + i;
+ gfn_t gfn = base_gfn + i * KVM_PAGES_PER_HPAGE(level);
+ u64 old_child_spte;
if (shared) {
/*
@@ -374,7 +372,7 @@ static void handle_removed_tdp_mmu_page(struct kvm *kvm, tdp_ptep_t pt,
shared);
}
- kvm_flush_remote_tlbs_with_address(kvm, gfn,
+ kvm_flush_remote_tlbs_with_address(kvm, base_gfn,
KVM_PAGES_PER_HPAGE(level + 1));
call_rcu(&sp->rcu_head, tdp_mmu_free_sp_rcu_callback);
@@ -897,7 +895,7 @@ static int tdp_mmu_map_handle_target_level(struct kvm_vcpu *vcpu,
struct kvm_page_fault *fault,
struct tdp_iter *iter)
{
- struct kvm_mmu_page *sp = sptep_to_sp(iter->sptep);
+ struct kvm_mmu_page *sp = sptep_to_sp(rcu_dereference(iter->sptep));
u64 new_spte;
int ret = RET_PF_FIXED;
bool wrprot = false;
@@ -1033,9 +1031,9 @@ bool kvm_tdp_mmu_unmap_gfn_range(struct kvm *kvm, struct kvm_gfn_range *range,
{
struct kvm_mmu_page *root;
- for_each_tdp_mmu_root(kvm, root, range->slot->as_id)
- flush |= zap_gfn_range(kvm, root, range->start, range->end,
- range->may_block, flush, false);
+ for_each_tdp_mmu_root_yield_safe(kvm, root, range->slot->as_id, false)
+ flush = zap_gfn_range(kvm, root, range->start, range->end,
+ range->may_block, flush, false);
return flush;
}
@@ -1364,10 +1362,9 @@ void kvm_tdp_mmu_clear_dirty_pt_masked(struct kvm *kvm,
* Clear leaf entries which could be replaced by large mappings, for
* GFNs within the slot.
*/
-static bool zap_collapsible_spte_range(struct kvm *kvm,
+static void zap_collapsible_spte_range(struct kvm *kvm,
struct kvm_mmu_page *root,
- const struct kvm_memory_slot *slot,
- bool flush)
+ const struct kvm_memory_slot *slot)
{
gfn_t start = slot->base_gfn;
gfn_t end = start + slot->npages;
@@ -1378,10 +1375,8 @@ static bool zap_collapsible_spte_range(struct kvm *kvm,
tdp_root_for_each_pte(iter, root, start, end) {
retry:
- if (tdp_mmu_iter_cond_resched(kvm, &iter, flush, true)) {
- flush = false;
+ if (tdp_mmu_iter_cond_resched(kvm, &iter, false, true))
continue;
- }
if (!is_shadow_present_pte(iter.old_spte) ||
!is_last_spte(iter.old_spte, iter.level))
@@ -1393,6 +1388,7 @@ retry:
pfn, PG_LEVEL_NUM))
continue;
+ /* Note, a successful atomic zap also does a remote TLB flush. */
if (!tdp_mmu_zap_spte_atomic(kvm, &iter)) {
/*
* The iter must explicitly re-read the SPTE because
@@ -1401,30 +1397,24 @@ retry:
iter.old_spte = READ_ONCE(*rcu_dereference(iter.sptep));
goto retry;
}
- flush = true;
}
rcu_read_unlock();
-
- return flush;
}
/*
* Clear non-leaf entries (and free associated page tables) which could
* be replaced by large mappings, for GFNs within the slot.
*/
-bool kvm_tdp_mmu_zap_collapsible_sptes(struct kvm *kvm,
- const struct kvm_memory_slot *slot,
- bool flush)
+void kvm_tdp_mmu_zap_collapsible_sptes(struct kvm *kvm,
+ const struct kvm_memory_slot *slot)
{
struct kvm_mmu_page *root;
lockdep_assert_held_read(&kvm->mmu_lock);
for_each_tdp_mmu_root_yield_safe(kvm, root, slot->as_id, true)
- flush = zap_collapsible_spte_range(kvm, root, slot, flush);
-
- return flush;
+ zap_collapsible_spte_range(kvm, root, slot);
}
/*
diff --git a/arch/x86/kvm/mmu/tdp_mmu.h b/arch/x86/kvm/mmu/tdp_mmu.h
index 476b133544dd..3899004a5d91 100644
--- a/arch/x86/kvm/mmu/tdp_mmu.h
+++ b/arch/x86/kvm/mmu/tdp_mmu.h
@@ -64,9 +64,8 @@ void kvm_tdp_mmu_clear_dirty_pt_masked(struct kvm *kvm,
struct kvm_memory_slot *slot,
gfn_t gfn, unsigned long mask,
bool wrprot);
-bool kvm_tdp_mmu_zap_collapsible_sptes(struct kvm *kvm,
- const struct kvm_memory_slot *slot,
- bool flush);
+void kvm_tdp_mmu_zap_collapsible_sptes(struct kvm *kvm,
+ const struct kvm_memory_slot *slot);
bool kvm_tdp_mmu_write_protect_gfn(struct kvm *kvm,
struct kvm_memory_slot *slot, gfn_t gfn,
diff --git a/arch/x86/kvm/pmu.c b/arch/x86/kvm/pmu.c
index 0772bad9165c..09873f6488f7 100644
--- a/arch/x86/kvm/pmu.c
+++ b/arch/x86/kvm/pmu.c
@@ -319,7 +319,7 @@ void kvm_pmu_handle_event(struct kvm_vcpu *vcpu)
}
/* check if idx is a valid index to access PMU */
-int kvm_pmu_is_valid_rdpmc_ecx(struct kvm_vcpu *vcpu, unsigned int idx)
+bool kvm_pmu_is_valid_rdpmc_ecx(struct kvm_vcpu *vcpu, unsigned int idx)
{
return kvm_x86_ops.pmu_ops->is_valid_rdpmc_ecx(vcpu, idx);
}
diff --git a/arch/x86/kvm/pmu.h b/arch/x86/kvm/pmu.h
index 0e4f2b1fa9fb..59d6b76203d5 100644
--- a/arch/x86/kvm/pmu.h
+++ b/arch/x86/kvm/pmu.h
@@ -32,7 +32,7 @@ struct kvm_pmu_ops {
struct kvm_pmc *(*rdpmc_ecx_to_pmc)(struct kvm_vcpu *vcpu,
unsigned int idx, u64 *mask);
struct kvm_pmc *(*msr_idx_to_pmc)(struct kvm_vcpu *vcpu, u32 msr);
- int (*is_valid_rdpmc_ecx)(struct kvm_vcpu *vcpu, unsigned int idx);
+ bool (*is_valid_rdpmc_ecx)(struct kvm_vcpu *vcpu, unsigned int idx);
bool (*is_valid_msr)(struct kvm_vcpu *vcpu, u32 msr);
int (*get_msr)(struct kvm_vcpu *vcpu, struct msr_data *msr_info);
int (*set_msr)(struct kvm_vcpu *vcpu, struct msr_data *msr_info);
@@ -149,7 +149,7 @@ void reprogram_counter(struct kvm_pmu *pmu, int pmc_idx);
void kvm_pmu_deliver_pmi(struct kvm_vcpu *vcpu);
void kvm_pmu_handle_event(struct kvm_vcpu *vcpu);
int kvm_pmu_rdpmc(struct kvm_vcpu *vcpu, unsigned pmc, u64 *data);
-int kvm_pmu_is_valid_rdpmc_ecx(struct kvm_vcpu *vcpu, unsigned int idx);
+bool kvm_pmu_is_valid_rdpmc_ecx(struct kvm_vcpu *vcpu, unsigned int idx);
bool kvm_pmu_is_valid_msr(struct kvm_vcpu *vcpu, u32 msr);
int kvm_pmu_get_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info);
int kvm_pmu_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info);
diff --git a/arch/x86/kvm/svm/avic.c b/arch/x86/kvm/svm/avic.c
index 8052d92069e0..9d6066eb7c10 100644
--- a/arch/x86/kvm/svm/avic.c
+++ b/arch/x86/kvm/svm/avic.c
@@ -904,7 +904,8 @@ bool svm_check_apicv_inhibit_reasons(ulong bit)
BIT(APICV_INHIBIT_REASON_NESTED) |
BIT(APICV_INHIBIT_REASON_IRQWIN) |
BIT(APICV_INHIBIT_REASON_PIT_REINJ) |
- BIT(APICV_INHIBIT_REASON_X2APIC);
+ BIT(APICV_INHIBIT_REASON_X2APIC) |
+ BIT(APICV_INHIBIT_REASON_BLOCKIRQ);
return supported & BIT(bit);
}
@@ -988,16 +989,18 @@ void avic_vcpu_put(struct kvm_vcpu *vcpu)
static void avic_set_running(struct kvm_vcpu *vcpu, bool is_run)
{
struct vcpu_svm *svm = to_svm(vcpu);
+ int cpu = get_cpu();
+ WARN_ON(cpu != vcpu->cpu);
svm->avic_is_running = is_run;
- if (!kvm_vcpu_apicv_active(vcpu))
- return;
-
- if (is_run)
- avic_vcpu_load(vcpu, vcpu->cpu);
- else
- avic_vcpu_put(vcpu);
+ if (kvm_vcpu_apicv_active(vcpu)) {
+ if (is_run)
+ avic_vcpu_load(vcpu, cpu);
+ else
+ avic_vcpu_put(vcpu);
+ }
+ put_cpu();
}
void svm_vcpu_blocking(struct kvm_vcpu *vcpu)
diff --git a/arch/x86/kvm/svm/pmu.c b/arch/x86/kvm/svm/pmu.c
index fdf587f19c5f..871c426ec389 100644
--- a/arch/x86/kvm/svm/pmu.c
+++ b/arch/x86/kvm/svm/pmu.c
@@ -181,14 +181,13 @@ static struct kvm_pmc *amd_pmc_idx_to_pmc(struct kvm_pmu *pmu, int pmc_idx)
return get_gp_pmc_amd(pmu, base + pmc_idx, PMU_TYPE_COUNTER);
}
-/* returns 0 if idx's corresponding MSR exists; otherwise returns 1. */
-static int amd_is_valid_rdpmc_ecx(struct kvm_vcpu *vcpu, unsigned int idx)
+static bool amd_is_valid_rdpmc_ecx(struct kvm_vcpu *vcpu, unsigned int idx)
{
struct kvm_pmu *pmu = vcpu_to_pmu(vcpu);
idx &= ~(3u << 30);
- return (idx >= pmu->nr_arch_gp_counters);
+ return idx < pmu->nr_arch_gp_counters;
}
/* idx is the ECX register of RDPMC instruction */
diff --git a/arch/x86/kvm/svm/sev.c b/arch/x86/kvm/svm/sev.c
index 1964b9a174be..59727a966f90 100644
--- a/arch/x86/kvm/svm/sev.c
+++ b/arch/x86/kvm/svm/sev.c
@@ -120,16 +120,26 @@ static bool __sev_recycle_asids(int min_asid, int max_asid)
return true;
}
+static int sev_misc_cg_try_charge(struct kvm_sev_info *sev)
+{
+ enum misc_res_type type = sev->es_active ? MISC_CG_RES_SEV_ES : MISC_CG_RES_SEV;
+ return misc_cg_try_charge(type, sev->misc_cg, 1);
+}
+
+static void sev_misc_cg_uncharge(struct kvm_sev_info *sev)
+{
+ enum misc_res_type type = sev->es_active ? MISC_CG_RES_SEV_ES : MISC_CG_RES_SEV;
+ misc_cg_uncharge(type, sev->misc_cg, 1);
+}
+
static int sev_asid_new(struct kvm_sev_info *sev)
{
int asid, min_asid, max_asid, ret;
bool retry = true;
- enum misc_res_type type;
- type = sev->es_active ? MISC_CG_RES_SEV_ES : MISC_CG_RES_SEV;
WARN_ON(sev->misc_cg);
sev->misc_cg = get_current_misc_cg();
- ret = misc_cg_try_charge(type, sev->misc_cg, 1);
+ ret = sev_misc_cg_try_charge(sev);
if (ret) {
put_misc_cg(sev->misc_cg);
sev->misc_cg = NULL;
@@ -162,7 +172,7 @@ again:
return asid;
e_uncharge:
- misc_cg_uncharge(type, sev->misc_cg, 1);
+ sev_misc_cg_uncharge(sev);
put_misc_cg(sev->misc_cg);
sev->misc_cg = NULL;
return ret;
@@ -179,7 +189,6 @@ static void sev_asid_free(struct kvm_sev_info *sev)
{
struct svm_cpu_data *sd;
int cpu;
- enum misc_res_type type;
mutex_lock(&sev_bitmap_lock);
@@ -192,8 +201,7 @@ static void sev_asid_free(struct kvm_sev_info *sev)
mutex_unlock(&sev_bitmap_lock);
- type = sev->es_active ? MISC_CG_RES_SEV_ES : MISC_CG_RES_SEV;
- misc_cg_uncharge(type, sev->misc_cg, 1);
+ sev_misc_cg_uncharge(sev);
put_misc_cg(sev->misc_cg);
sev->misc_cg = NULL;
}
@@ -229,7 +237,6 @@ static void sev_unbind_asid(struct kvm *kvm, unsigned int handle)
static int sev_guest_init(struct kvm *kvm, struct kvm_sev_cmd *argp)
{
struct kvm_sev_info *sev = &to_kvm_svm(kvm)->sev_info;
- bool es_active = argp->id == KVM_SEV_ES_INIT;
int asid, ret;
if (kvm->created_vcpus)
@@ -239,7 +246,8 @@ static int sev_guest_init(struct kvm *kvm, struct kvm_sev_cmd *argp)
if (unlikely(sev->active))
return ret;
- sev->es_active = es_active;
+ sev->active = true;
+ sev->es_active = argp->id == KVM_SEV_ES_INIT;
asid = sev_asid_new(sev);
if (asid < 0)
goto e_no_asid;
@@ -249,8 +257,6 @@ static int sev_guest_init(struct kvm *kvm, struct kvm_sev_cmd *argp)
if (ret)
goto e_free;
- sev->active = true;
- sev->asid = asid;
INIT_LIST_HEAD(&sev->regions_list);
return 0;
@@ -260,6 +266,7 @@ e_free:
sev->asid = 0;
e_no_asid:
sev->es_active = false;
+ sev->active = false;
return ret;
}
@@ -590,7 +597,7 @@ static int sev_es_sync_vmsa(struct vcpu_svm *svm)
* traditional VMSA as it has been built so far (in prep
* for LAUNCH_UPDATE_VMSA) to be the initial SEV-ES state.
*/
- memcpy(svm->vmsa, save, sizeof(*save));
+ memcpy(svm->sev_es.vmsa, save, sizeof(*save));
return 0;
}
@@ -612,11 +619,11 @@ static int __sev_launch_update_vmsa(struct kvm *kvm, struct kvm_vcpu *vcpu,
* the VMSA memory content (i.e it will write the same memory region
* with the guest's key), so invalidate it first.
*/
- clflush_cache_range(svm->vmsa, PAGE_SIZE);
+ clflush_cache_range(svm->sev_es.vmsa, PAGE_SIZE);
vmsa.reserved = 0;
vmsa.handle = to_kvm_svm(kvm)->sev_info.handle;
- vmsa.address = __sme_pa(svm->vmsa);
+ vmsa.address = __sme_pa(svm->sev_es.vmsa);
vmsa.len = PAGE_SIZE;
ret = sev_issue_cmd(kvm, SEV_CMD_LAUNCH_UPDATE_VMSA, &vmsa, error);
if (ret)
@@ -1522,7 +1529,7 @@ static int sev_receive_finish(struct kvm *kvm, struct kvm_sev_cmd *argp)
return sev_issue_cmd(kvm, SEV_CMD_RECEIVE_FINISH, &data, &argp->error);
}
-static bool cmd_allowed_from_miror(u32 cmd_id)
+static bool is_cmd_allowed_from_mirror(u32 cmd_id)
{
/*
* Allow mirrors VM to call KVM_SEV_LAUNCH_UPDATE_VMSA to enable SEV-ES
@@ -1536,6 +1543,223 @@ static bool cmd_allowed_from_miror(u32 cmd_id)
return false;
}
+static int sev_lock_two_vms(struct kvm *dst_kvm, struct kvm *src_kvm)
+{
+ struct kvm_sev_info *dst_sev = &to_kvm_svm(dst_kvm)->sev_info;
+ struct kvm_sev_info *src_sev = &to_kvm_svm(src_kvm)->sev_info;
+ int r = -EBUSY;
+
+ if (dst_kvm == src_kvm)
+ return -EINVAL;
+
+ /*
+ * Bail if these VMs are already involved in a migration to avoid
+ * deadlock between two VMs trying to migrate to/from each other.
+ */
+ if (atomic_cmpxchg_acquire(&dst_sev->migration_in_progress, 0, 1))
+ return -EBUSY;
+
+ if (atomic_cmpxchg_acquire(&src_sev->migration_in_progress, 0, 1))
+ goto release_dst;
+
+ r = -EINTR;
+ if (mutex_lock_killable(&dst_kvm->lock))
+ goto release_src;
+ if (mutex_lock_killable(&src_kvm->lock))
+ goto unlock_dst;
+ return 0;
+
+unlock_dst:
+ mutex_unlock(&dst_kvm->lock);
+release_src:
+ atomic_set_release(&src_sev->migration_in_progress, 0);
+release_dst:
+ atomic_set_release(&dst_sev->migration_in_progress, 0);
+ return r;
+}
+
+static void sev_unlock_two_vms(struct kvm *dst_kvm, struct kvm *src_kvm)
+{
+ struct kvm_sev_info *dst_sev = &to_kvm_svm(dst_kvm)->sev_info;
+ struct kvm_sev_info *src_sev = &to_kvm_svm(src_kvm)->sev_info;
+
+ mutex_unlock(&dst_kvm->lock);
+ mutex_unlock(&src_kvm->lock);
+ atomic_set_release(&dst_sev->migration_in_progress, 0);
+ atomic_set_release(&src_sev->migration_in_progress, 0);
+}
+
+
+static int sev_lock_vcpus_for_migration(struct kvm *kvm)
+{
+ struct kvm_vcpu *vcpu;
+ int i, j;
+
+ kvm_for_each_vcpu(i, vcpu, kvm) {
+ if (mutex_lock_killable(&vcpu->mutex))
+ goto out_unlock;
+ }
+
+ return 0;
+
+out_unlock:
+ kvm_for_each_vcpu(j, vcpu, kvm) {
+ if (i == j)
+ break;
+
+ mutex_unlock(&vcpu->mutex);
+ }
+ return -EINTR;
+}
+
+static void sev_unlock_vcpus_for_migration(struct kvm *kvm)
+{
+ struct kvm_vcpu *vcpu;
+ int i;
+
+ kvm_for_each_vcpu(i, vcpu, kvm) {
+ mutex_unlock(&vcpu->mutex);
+ }
+}
+
+static void sev_migrate_from(struct kvm_sev_info *dst,
+ struct kvm_sev_info *src)
+{
+ dst->active = true;
+ dst->asid = src->asid;
+ dst->handle = src->handle;
+ dst->pages_locked = src->pages_locked;
+ dst->enc_context_owner = src->enc_context_owner;
+
+ src->asid = 0;
+ src->active = false;
+ src->handle = 0;
+ src->pages_locked = 0;
+ src->enc_context_owner = NULL;
+
+ list_cut_before(&dst->regions_list, &src->regions_list, &src->regions_list);
+}
+
+static int sev_es_migrate_from(struct kvm *dst, struct kvm *src)
+{
+ int i;
+ struct kvm_vcpu *dst_vcpu, *src_vcpu;
+ struct vcpu_svm *dst_svm, *src_svm;
+
+ if (atomic_read(&src->online_vcpus) != atomic_read(&dst->online_vcpus))
+ return -EINVAL;
+
+ kvm_for_each_vcpu(i, src_vcpu, src) {
+ if (!src_vcpu->arch.guest_state_protected)
+ return -EINVAL;
+ }
+
+ kvm_for_each_vcpu(i, src_vcpu, src) {
+ src_svm = to_svm(src_vcpu);
+ dst_vcpu = kvm_get_vcpu(dst, i);
+ dst_svm = to_svm(dst_vcpu);
+
+ /*
+ * Transfer VMSA and GHCB state to the destination. Nullify and
+ * clear source fields as appropriate, the state now belongs to
+ * the destination.
+ */
+ memcpy(&dst_svm->sev_es, &src_svm->sev_es, sizeof(src_svm->sev_es));
+ dst_svm->vmcb->control.ghcb_gpa = src_svm->vmcb->control.ghcb_gpa;
+ dst_svm->vmcb->control.vmsa_pa = src_svm->vmcb->control.vmsa_pa;
+ dst_vcpu->arch.guest_state_protected = true;
+
+ memset(&src_svm->sev_es, 0, sizeof(src_svm->sev_es));
+ src_svm->vmcb->control.ghcb_gpa = INVALID_PAGE;
+ src_svm->vmcb->control.vmsa_pa = INVALID_PAGE;
+ src_vcpu->arch.guest_state_protected = false;
+ }
+ to_kvm_svm(src)->sev_info.es_active = false;
+ to_kvm_svm(dst)->sev_info.es_active = true;
+
+ return 0;
+}
+
+int svm_vm_migrate_from(struct kvm *kvm, unsigned int source_fd)
+{
+ struct kvm_sev_info *dst_sev = &to_kvm_svm(kvm)->sev_info;
+ struct kvm_sev_info *src_sev, *cg_cleanup_sev;
+ struct file *source_kvm_file;
+ struct kvm *source_kvm;
+ bool charged = false;
+ int ret;
+
+ source_kvm_file = fget(source_fd);
+ if (!file_is_kvm(source_kvm_file)) {
+ ret = -EBADF;
+ goto out_fput;
+ }
+
+ source_kvm = source_kvm_file->private_data;
+ ret = sev_lock_two_vms(kvm, source_kvm);
+ if (ret)
+ goto out_fput;
+
+ if (sev_guest(kvm) || !sev_guest(source_kvm)) {
+ ret = -EINVAL;
+ goto out_unlock;
+ }
+
+ src_sev = &to_kvm_svm(source_kvm)->sev_info;
+
+ /*
+ * VMs mirroring src's encryption context rely on it to keep the
+ * ASID allocated, but below we are clearing src_sev->asid.
+ */
+ if (src_sev->num_mirrored_vms) {
+ ret = -EBUSY;
+ goto out_unlock;
+ }
+
+ dst_sev->misc_cg = get_current_misc_cg();
+ cg_cleanup_sev = dst_sev;
+ if (dst_sev->misc_cg != src_sev->misc_cg) {
+ ret = sev_misc_cg_try_charge(dst_sev);
+ if (ret)
+ goto out_dst_cgroup;
+ charged = true;
+ }
+
+ ret = sev_lock_vcpus_for_migration(kvm);
+ if (ret)
+ goto out_dst_cgroup;
+ ret = sev_lock_vcpus_for_migration(source_kvm);
+ if (ret)
+ goto out_dst_vcpu;
+
+ if (sev_es_guest(source_kvm)) {
+ ret = sev_es_migrate_from(kvm, source_kvm);
+ if (ret)
+ goto out_source_vcpu;
+ }
+ sev_migrate_from(dst_sev, src_sev);
+ kvm_vm_dead(source_kvm);
+ cg_cleanup_sev = src_sev;
+ ret = 0;
+
+out_source_vcpu:
+ sev_unlock_vcpus_for_migration(source_kvm);
+out_dst_vcpu:
+ sev_unlock_vcpus_for_migration(kvm);
+out_dst_cgroup:
+ /* Operates on the source on success, on the destination on failure. */
+ if (charged)
+ sev_misc_cg_uncharge(cg_cleanup_sev);
+ put_misc_cg(cg_cleanup_sev->misc_cg);
+ cg_cleanup_sev->misc_cg = NULL;
+out_unlock:
+ sev_unlock_two_vms(kvm, source_kvm);
+out_fput:
+ if (source_kvm_file)
+ fput(source_kvm_file);
+ return ret;
+}
+
int svm_mem_enc_op(struct kvm *kvm, void __user *argp)
{
struct kvm_sev_cmd sev_cmd;
@@ -1554,7 +1778,7 @@ int svm_mem_enc_op(struct kvm *kvm, void __user *argp)
/* Only the enc_context_owner handles some memory enc operations. */
if (is_mirroring_enc_context(kvm) &&
- !cmd_allowed_from_miror(sev_cmd.id)) {
+ !is_cmd_allowed_from_mirror(sev_cmd.id)) {
r = -EINVAL;
goto out;
}
@@ -1751,71 +1975,60 @@ int svm_vm_copy_asid_from(struct kvm *kvm, unsigned int source_fd)
{
struct file *source_kvm_file;
struct kvm *source_kvm;
- struct kvm_sev_info source_sev, *mirror_sev;
+ struct kvm_sev_info *source_sev, *mirror_sev;
int ret;
source_kvm_file = fget(source_fd);
if (!file_is_kvm(source_kvm_file)) {
ret = -EBADF;
- goto e_source_put;
+ goto e_source_fput;
}
source_kvm = source_kvm_file->private_data;
- mutex_lock(&source_kvm->lock);
-
- if (!sev_guest(source_kvm)) {
- ret = -EINVAL;
- goto e_source_unlock;
- }
+ ret = sev_lock_two_vms(kvm, source_kvm);
+ if (ret)
+ goto e_source_fput;
- /* Mirrors of mirrors should work, but let's not get silly */
- if (is_mirroring_enc_context(source_kvm) || source_kvm == kvm) {
+ /*
+ * Mirrors of mirrors should work, but let's not get silly. Also
+ * disallow out-of-band SEV/SEV-ES init if the target is already an
+ * SEV guest, or if vCPUs have been created. KVM relies on vCPUs being
+ * created after SEV/SEV-ES initialization, e.g. to init intercepts.
+ */
+ if (sev_guest(kvm) || !sev_guest(source_kvm) ||
+ is_mirroring_enc_context(source_kvm) || kvm->created_vcpus) {
ret = -EINVAL;
- goto e_source_unlock;
+ goto e_unlock;
}
- memcpy(&source_sev, &to_kvm_svm(source_kvm)->sev_info,
- sizeof(source_sev));
-
/*
* The mirror kvm holds an enc_context_owner ref so its asid can't
* disappear until we're done with it
*/
+ source_sev = &to_kvm_svm(source_kvm)->sev_info;
kvm_get_kvm(source_kvm);
-
- fput(source_kvm_file);
- mutex_unlock(&source_kvm->lock);
- mutex_lock(&kvm->lock);
-
- if (sev_guest(kvm)) {
- ret = -EINVAL;
- goto e_mirror_unlock;
- }
+ source_sev->num_mirrored_vms++;
/* Set enc_context_owner and copy its encryption context over */
mirror_sev = &to_kvm_svm(kvm)->sev_info;
mirror_sev->enc_context_owner = source_kvm;
mirror_sev->active = true;
- mirror_sev->asid = source_sev.asid;
- mirror_sev->fd = source_sev.fd;
- mirror_sev->es_active = source_sev.es_active;
- mirror_sev->handle = source_sev.handle;
+ mirror_sev->asid = source_sev->asid;
+ mirror_sev->fd = source_sev->fd;
+ mirror_sev->es_active = source_sev->es_active;
+ mirror_sev->handle = source_sev->handle;
+ INIT_LIST_HEAD(&mirror_sev->regions_list);
+ ret = 0;
+
/*
* Do not copy ap_jump_table. Since the mirror does not share the same
* KVM contexts as the original, and they may have different
* memory-views.
*/
- mutex_unlock(&kvm->lock);
- return 0;
-
-e_mirror_unlock:
- mutex_unlock(&kvm->lock);
- kvm_put_kvm(source_kvm);
- return ret;
-e_source_unlock:
- mutex_unlock(&source_kvm->lock);
-e_source_put:
+e_unlock:
+ sev_unlock_two_vms(kvm, source_kvm);
+e_source_fput:
if (source_kvm_file)
fput(source_kvm_file);
return ret;
@@ -1827,17 +2040,24 @@ void sev_vm_destroy(struct kvm *kvm)
struct list_head *head = &sev->regions_list;
struct list_head *pos, *q;
+ WARN_ON(sev->num_mirrored_vms);
+
if (!sev_guest(kvm))
return;
/* If this is a mirror_kvm release the enc_context_owner and skip sev cleanup */
if (is_mirroring_enc_context(kvm)) {
- kvm_put_kvm(sev->enc_context_owner);
+ struct kvm *owner_kvm = sev->enc_context_owner;
+ struct kvm_sev_info *owner_sev = &to_kvm_svm(owner_kvm)->sev_info;
+
+ mutex_lock(&owner_kvm->lock);
+ if (!WARN_ON(!owner_sev->num_mirrored_vms))
+ owner_sev->num_mirrored_vms--;
+ mutex_unlock(&owner_kvm->lock);
+ kvm_put_kvm(owner_kvm);
return;
}
- mutex_lock(&kvm->lock);
-
/*
* Ensure that all guest tagged cache entries are flushed before
* releasing the pages back to the system for use. CLFLUSH will
@@ -1857,8 +2077,6 @@ void sev_vm_destroy(struct kvm *kvm)
}
}
- mutex_unlock(&kvm->lock);
-
sev_unbind_asid(kvm, sev->handle);
sev_asid_free(sev);
}
@@ -2038,16 +2256,16 @@ void sev_free_vcpu(struct kvm_vcpu *vcpu)
svm = to_svm(vcpu);
if (vcpu->arch.guest_state_protected)
- sev_flush_guest_memory(svm, svm->vmsa, PAGE_SIZE);
- __free_page(virt_to_page(svm->vmsa));
+ sev_flush_guest_memory(svm, svm->sev_es.vmsa, PAGE_SIZE);
+ __free_page(virt_to_page(svm->sev_es.vmsa));
- if (svm->ghcb_sa_free)
- kfree(svm->ghcb_sa);
+ if (svm->sev_es.ghcb_sa_free)
+ kfree(svm->sev_es.ghcb_sa);
}
static void dump_ghcb(struct vcpu_svm *svm)
{
- struct ghcb *ghcb = svm->ghcb;
+ struct ghcb *ghcb = svm->sev_es.ghcb;
unsigned int nbits;
/* Re-use the dump_invalid_vmcb module parameter */
@@ -2073,7 +2291,7 @@ static void dump_ghcb(struct vcpu_svm *svm)
static void sev_es_sync_to_ghcb(struct vcpu_svm *svm)
{
struct kvm_vcpu *vcpu = &svm->vcpu;
- struct ghcb *ghcb = svm->ghcb;
+ struct ghcb *ghcb = svm->sev_es.ghcb;
/*
* The GHCB protocol so far allows for the following data
@@ -2093,7 +2311,7 @@ static void sev_es_sync_from_ghcb(struct vcpu_svm *svm)
{
struct vmcb_control_area *control = &svm->vmcb->control;
struct kvm_vcpu *vcpu = &svm->vcpu;
- struct ghcb *ghcb = svm->ghcb;
+ struct ghcb *ghcb = svm->sev_es.ghcb;
u64 exit_code;
/*
@@ -2140,7 +2358,7 @@ static int sev_es_validate_vmgexit(struct vcpu_svm *svm)
struct ghcb *ghcb;
u64 exit_code = 0;
- ghcb = svm->ghcb;
+ ghcb = svm->sev_es.ghcb;
/* Only GHCB Usage code 0 is supported */
if (ghcb->ghcb_usage)
@@ -2258,33 +2476,34 @@ vmgexit_err:
void sev_es_unmap_ghcb(struct vcpu_svm *svm)
{
- if (!svm->ghcb)
+ if (!svm->sev_es.ghcb)
return;
- if (svm->ghcb_sa_free) {
+ if (svm->sev_es.ghcb_sa_free) {
/*
* The scratch area lives outside the GHCB, so there is a
* buffer that, depending on the operation performed, may
* need to be synced, then freed.
*/
- if (svm->ghcb_sa_sync) {
+ if (svm->sev_es.ghcb_sa_sync) {
kvm_write_guest(svm->vcpu.kvm,
- ghcb_get_sw_scratch(svm->ghcb),
- svm->ghcb_sa, svm->ghcb_sa_len);
- svm->ghcb_sa_sync = false;
+ ghcb_get_sw_scratch(svm->sev_es.ghcb),
+ svm->sev_es.ghcb_sa,
+ svm->sev_es.ghcb_sa_len);
+ svm->sev_es.ghcb_sa_sync = false;
}
- kfree(svm->ghcb_sa);
- svm->ghcb_sa = NULL;
- svm->ghcb_sa_free = false;
+ kfree(svm->sev_es.ghcb_sa);
+ svm->sev_es.ghcb_sa = NULL;
+ svm->sev_es.ghcb_sa_free = false;
}
- trace_kvm_vmgexit_exit(svm->vcpu.vcpu_id, svm->ghcb);
+ trace_kvm_vmgexit_exit(svm->vcpu.vcpu_id, svm->sev_es.ghcb);
sev_es_sync_to_ghcb(svm);
- kvm_vcpu_unmap(&svm->vcpu, &svm->ghcb_map, true);
- svm->ghcb = NULL;
+ kvm_vcpu_unmap(&svm->vcpu, &svm->sev_es.ghcb_map, true);
+ svm->sev_es.ghcb = NULL;
}
void pre_sev_run(struct vcpu_svm *svm, int cpu)
@@ -2314,7 +2533,7 @@ void pre_sev_run(struct vcpu_svm *svm, int cpu)
static bool setup_vmgexit_scratch(struct vcpu_svm *svm, bool sync, u64 len)
{
struct vmcb_control_area *control = &svm->vmcb->control;
- struct ghcb *ghcb = svm->ghcb;
+ struct ghcb *ghcb = svm->sev_es.ghcb;
u64 ghcb_scratch_beg, ghcb_scratch_end;
u64 scratch_gpa_beg, scratch_gpa_end;
void *scratch_va;
@@ -2350,7 +2569,7 @@ static bool setup_vmgexit_scratch(struct vcpu_svm *svm, bool sync, u64 len)
return false;
}
- scratch_va = (void *)svm->ghcb;
+ scratch_va = (void *)svm->sev_es.ghcb;
scratch_va += (scratch_gpa_beg - control->ghcb_gpa);
} else {
/*
@@ -2380,12 +2599,12 @@ static bool setup_vmgexit_scratch(struct vcpu_svm *svm, bool sync, u64 len)
* the vCPU next time (i.e. a read was requested so the data
* must be written back to the guest memory).
*/
- svm->ghcb_sa_sync = sync;
- svm->ghcb_sa_free = true;
+ svm->sev_es.ghcb_sa_sync = sync;
+ svm->sev_es.ghcb_sa_free = true;
}
- svm->ghcb_sa = scratch_va;
- svm->ghcb_sa_len = len;
+ svm->sev_es.ghcb_sa = scratch_va;
+ svm->sev_es.ghcb_sa_len = len;
return true;
}
@@ -2504,15 +2723,15 @@ int sev_handle_vmgexit(struct kvm_vcpu *vcpu)
return -EINVAL;
}
- if (kvm_vcpu_map(vcpu, ghcb_gpa >> PAGE_SHIFT, &svm->ghcb_map)) {
+ if (kvm_vcpu_map(vcpu, ghcb_gpa >> PAGE_SHIFT, &svm->sev_es.ghcb_map)) {
/* Unable to map GHCB from guest */
vcpu_unimpl(vcpu, "vmgexit: error mapping GHCB [%#llx] from guest\n",
ghcb_gpa);
return -EINVAL;
}
- svm->ghcb = svm->ghcb_map.hva;
- ghcb = svm->ghcb_map.hva;
+ svm->sev_es.ghcb = svm->sev_es.ghcb_map.hva;
+ ghcb = svm->sev_es.ghcb_map.hva;
trace_kvm_vmgexit_enter(vcpu->vcpu_id, ghcb);
@@ -2535,7 +2754,7 @@ int sev_handle_vmgexit(struct kvm_vcpu *vcpu)
ret = kvm_sev_es_mmio_read(vcpu,
control->exit_info_1,
control->exit_info_2,
- svm->ghcb_sa);
+ svm->sev_es.ghcb_sa);
break;
case SVM_VMGEXIT_MMIO_WRITE:
if (!setup_vmgexit_scratch(svm, false, control->exit_info_2))
@@ -2544,7 +2763,7 @@ int sev_handle_vmgexit(struct kvm_vcpu *vcpu)
ret = kvm_sev_es_mmio_write(vcpu,
control->exit_info_1,
control->exit_info_2,
- svm->ghcb_sa);
+ svm->sev_es.ghcb_sa);
break;
case SVM_VMGEXIT_NMI_COMPLETE:
ret = svm_invoke_exit_handler(vcpu, SVM_EXIT_IRET);
@@ -2604,7 +2823,8 @@ int sev_es_string_io(struct vcpu_svm *svm, int size, unsigned int port, int in)
if (!setup_vmgexit_scratch(svm, in, bytes))
return -EINVAL;
- return kvm_sev_es_string_io(&svm->vcpu, size, port, svm->ghcb_sa, count, in);
+ return kvm_sev_es_string_io(&svm->vcpu, size, port, svm->sev_es.ghcb_sa,
+ count, in);
}
void sev_es_init_vmcb(struct vcpu_svm *svm)
@@ -2619,7 +2839,7 @@ void sev_es_init_vmcb(struct vcpu_svm *svm)
* VMCB page. Do not include the encryption mask on the VMSA physical
* address since hardware will access it using the guest key.
*/
- svm->vmcb->control.vmsa_pa = __pa(svm->vmsa);
+ svm->vmcb->control.vmsa_pa = __pa(svm->sev_es.vmsa);
/* Can't intercept CR register access, HV can't modify CR registers */
svm_clr_intercept(svm, INTERCEPT_CR0_READ);
@@ -2691,8 +2911,8 @@ void sev_vcpu_deliver_sipi_vector(struct kvm_vcpu *vcpu, u8 vector)
struct vcpu_svm *svm = to_svm(vcpu);
/* First SIPI: Use the values as initially set by the VMM */
- if (!svm->received_first_sipi) {
- svm->received_first_sipi = true;
+ if (!svm->sev_es.received_first_sipi) {
+ svm->sev_es.received_first_sipi = true;
return;
}
@@ -2701,8 +2921,8 @@ void sev_vcpu_deliver_sipi_vector(struct kvm_vcpu *vcpu, u8 vector)
* the guest will set the CS and RIP. Set SW_EXIT_INFO_2 to a
* non-zero value.
*/
- if (!svm->ghcb)
+ if (!svm->sev_es.ghcb)
return;
- ghcb_set_sw_exit_info_2(svm->ghcb, 1);
+ ghcb_set_sw_exit_info_2(svm->sev_es.ghcb, 1);
}
diff --git a/arch/x86/kvm/svm/svm.c b/arch/x86/kvm/svm/svm.c
index b36ca4e476c2..d0f68d11ec70 100644
--- a/arch/x86/kvm/svm/svm.c
+++ b/arch/x86/kvm/svm/svm.c
@@ -1452,7 +1452,7 @@ static int svm_create_vcpu(struct kvm_vcpu *vcpu)
svm_switch_vmcb(svm, &svm->vmcb01);
if (vmsa_page)
- svm->vmsa = page_address(vmsa_page);
+ svm->sev_es.vmsa = page_address(vmsa_page);
svm->guest_state_loaded = false;
@@ -2835,11 +2835,11 @@ static int svm_get_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
static int svm_complete_emulated_msr(struct kvm_vcpu *vcpu, int err)
{
struct vcpu_svm *svm = to_svm(vcpu);
- if (!err || !sev_es_guest(vcpu->kvm) || WARN_ON_ONCE(!svm->ghcb))
+ if (!err || !sev_es_guest(vcpu->kvm) || WARN_ON_ONCE(!svm->sev_es.ghcb))
return kvm_complete_insn_gp(vcpu, err);
- ghcb_set_sw_exit_info_1(svm->ghcb, 1);
- ghcb_set_sw_exit_info_2(svm->ghcb,
+ ghcb_set_sw_exit_info_1(svm->sev_es.ghcb, 1);
+ ghcb_set_sw_exit_info_2(svm->sev_es.ghcb,
X86_TRAP_GP |
SVM_EVTINJ_TYPE_EXEPT |
SVM_EVTINJ_VALID);
@@ -3121,11 +3121,6 @@ static int invpcid_interception(struct kvm_vcpu *vcpu)
type = svm->vmcb->control.exit_info_2;
gva = svm->vmcb->control.exit_info_1;
- if (type > 3) {
- kvm_inject_gp(vcpu, 0);
- return 1;
- }
-
return kvm_handle_invpcid(vcpu, type, gva);
}
@@ -4656,7 +4651,6 @@ static struct kvm_x86_ops svm_x86_ops __initdata = {
.load_eoi_exitmap = svm_load_eoi_exitmap,
.hwapic_irr_update = svm_hwapic_irr_update,
.hwapic_isr_update = svm_hwapic_isr_update,
- .sync_pir_to_irr = kvm_lapic_find_highest_irr,
.apicv_post_state_restore = avic_post_state_restore,
.set_tss_addr = svm_set_tss_addr,
@@ -4701,6 +4695,7 @@ static struct kvm_x86_ops svm_x86_ops __initdata = {
.mem_enc_unreg_region = svm_unregister_enc_region,
.vm_copy_enc_context_from = svm_vm_copy_asid_from,
+ .vm_move_enc_context_from = svm_vm_migrate_from,
.can_emulate_instruction = svm_can_emulate_instruction,
diff --git a/arch/x86/kvm/svm/svm.h b/arch/x86/kvm/svm/svm.h
index 5e9510d4574e..1c7306c370fa 100644
--- a/arch/x86/kvm/svm/svm.h
+++ b/arch/x86/kvm/svm/svm.h
@@ -79,7 +79,9 @@ struct kvm_sev_info {
struct list_head regions_list; /* List of registered regions */
u64 ap_jump_table; /* SEV-ES AP Jump Table address */
struct kvm *enc_context_owner; /* Owner of copied encryption context */
+ unsigned long num_mirrored_vms; /* Number of VMs sharing this ASID */
struct misc_cg *misc_cg; /* For misc cgroup accounting */
+ atomic_t migration_in_progress;
};
struct kvm_svm {
@@ -123,6 +125,20 @@ struct svm_nested_state {
bool initialized;
};
+struct vcpu_sev_es_state {
+ /* SEV-ES support */
+ struct vmcb_save_area *vmsa;
+ struct ghcb *ghcb;
+ struct kvm_host_map ghcb_map;
+ bool received_first_sipi;
+
+ /* SEV-ES scratch area support */
+ void *ghcb_sa;
+ u32 ghcb_sa_len;
+ bool ghcb_sa_sync;
+ bool ghcb_sa_free;
+};
+
struct vcpu_svm {
struct kvm_vcpu vcpu;
/* vmcb always points at current_vmcb->ptr, it's purely a shorthand. */
@@ -186,17 +202,7 @@ struct vcpu_svm {
DECLARE_BITMAP(write, MAX_DIRECT_ACCESS_MSRS);
} shadow_msr_intercept;
- /* SEV-ES support */
- struct vmcb_save_area *vmsa;
- struct ghcb *ghcb;
- struct kvm_host_map ghcb_map;
- bool received_first_sipi;
-
- /* SEV-ES scratch area support */
- void *ghcb_sa;
- u32 ghcb_sa_len;
- bool ghcb_sa_sync;
- bool ghcb_sa_free;
+ struct vcpu_sev_es_state sev_es;
bool guest_state_loaded;
};
@@ -242,7 +248,7 @@ static __always_inline bool sev_es_guest(struct kvm *kvm)
#ifdef CONFIG_KVM_AMD_SEV
struct kvm_sev_info *sev = &to_kvm_svm(kvm)->sev_info;
- return sev_guest(kvm) && sev->es_active;
+ return sev->es_active && !WARN_ON_ONCE(!sev->active);
#else
return false;
#endif
@@ -558,6 +564,7 @@ int svm_register_enc_region(struct kvm *kvm,
int svm_unregister_enc_region(struct kvm *kvm,
struct kvm_enc_region *range);
int svm_vm_copy_asid_from(struct kvm *kvm, unsigned int source_fd);
+int svm_vm_migrate_from(struct kvm *kvm, unsigned int source_fd);
void pre_sev_run(struct vcpu_svm *svm, int cpu);
void __init sev_set_cpu_caps(void);
void __init sev_hardware_setup(void);
diff --git a/arch/x86/kvm/vmx/nested.c b/arch/x86/kvm/vmx/nested.c
index b4ee5e9f9e20..64f2828035c2 100644
--- a/arch/x86/kvm/vmx/nested.c
+++ b/arch/x86/kvm/vmx/nested.c
@@ -525,67 +525,19 @@ static int nested_vmx_check_tpr_shadow_controls(struct kvm_vcpu *vcpu,
}
/*
- * Check if MSR is intercepted for L01 MSR bitmap.
+ * For x2APIC MSRs, ignore the vmcs01 bitmap. L1 can enable x2APIC without L1
+ * itself utilizing x2APIC. All MSRs were previously set to be intercepted,
+ * only the "disable intercept" case needs to be handled.
*/
-static bool msr_write_intercepted_l01(struct kvm_vcpu *vcpu, u32 msr)
+static void nested_vmx_disable_intercept_for_x2apic_msr(unsigned long *msr_bitmap_l1,
+ unsigned long *msr_bitmap_l0,
+ u32 msr, int type)
{
- unsigned long *msr_bitmap;
- int f = sizeof(unsigned long);
+ if (type & MSR_TYPE_R && !vmx_test_msr_bitmap_read(msr_bitmap_l1, msr))
+ vmx_clear_msr_bitmap_read(msr_bitmap_l0, msr);
- if (!cpu_has_vmx_msr_bitmap())
- return true;
-
- msr_bitmap = to_vmx(vcpu)->vmcs01.msr_bitmap;
-
- if (msr <= 0x1fff) {
- return !!test_bit(msr, msr_bitmap + 0x800 / f);
- } else if ((msr >= 0xc0000000) && (msr <= 0xc0001fff)) {
- msr &= 0x1fff;
- return !!test_bit(msr, msr_bitmap + 0xc00 / f);
- }
-
- return true;
-}
-
-/*
- * If a msr is allowed by L0, we should check whether it is allowed by L1.
- * The corresponding bit will be cleared unless both of L0 and L1 allow it.
- */
-static void nested_vmx_disable_intercept_for_msr(unsigned long *msr_bitmap_l1,
- unsigned long *msr_bitmap_nested,
- u32 msr, int type)
-{
- int f = sizeof(unsigned long);
-
- /*
- * See Intel PRM Vol. 3, 20.6.9 (MSR-Bitmap Address). Early manuals
- * have the write-low and read-high bitmap offsets the wrong way round.
- * We can control MSRs 0x00000000-0x00001fff and 0xc0000000-0xc0001fff.
- */
- if (msr <= 0x1fff) {
- if (type & MSR_TYPE_R &&
- !test_bit(msr, msr_bitmap_l1 + 0x000 / f))
- /* read-low */
- __clear_bit(msr, msr_bitmap_nested + 0x000 / f);
-
- if (type & MSR_TYPE_W &&
- !test_bit(msr, msr_bitmap_l1 + 0x800 / f))
- /* write-low */
- __clear_bit(msr, msr_bitmap_nested + 0x800 / f);
-
- } else if ((msr >= 0xc0000000) && (msr <= 0xc0001fff)) {
- msr &= 0x1fff;
- if (type & MSR_TYPE_R &&
- !test_bit(msr, msr_bitmap_l1 + 0x400 / f))
- /* read-high */
- __clear_bit(msr, msr_bitmap_nested + 0x400 / f);
-
- if (type & MSR_TYPE_W &&
- !test_bit(msr, msr_bitmap_l1 + 0xc00 / f))
- /* write-high */
- __clear_bit(msr, msr_bitmap_nested + 0xc00 / f);
-
- }
+ if (type & MSR_TYPE_W && !vmx_test_msr_bitmap_write(msr_bitmap_l1, msr))
+ vmx_clear_msr_bitmap_write(msr_bitmap_l0, msr);
}
static inline void enable_x2apic_msr_intercepts(unsigned long *msr_bitmap)
@@ -600,6 +552,34 @@ static inline void enable_x2apic_msr_intercepts(unsigned long *msr_bitmap)
}
}
+#define BUILD_NVMX_MSR_INTERCEPT_HELPER(rw) \
+static inline \
+void nested_vmx_set_msr_##rw##_intercept(struct vcpu_vmx *vmx, \
+ unsigned long *msr_bitmap_l1, \
+ unsigned long *msr_bitmap_l0, u32 msr) \
+{ \
+ if (vmx_test_msr_bitmap_##rw(vmx->vmcs01.msr_bitmap, msr) || \
+ vmx_test_msr_bitmap_##rw(msr_bitmap_l1, msr)) \
+ vmx_set_msr_bitmap_##rw(msr_bitmap_l0, msr); \
+ else \
+ vmx_clear_msr_bitmap_##rw(msr_bitmap_l0, msr); \
+}
+BUILD_NVMX_MSR_INTERCEPT_HELPER(read)
+BUILD_NVMX_MSR_INTERCEPT_HELPER(write)
+
+static inline void nested_vmx_set_intercept_for_msr(struct vcpu_vmx *vmx,
+ unsigned long *msr_bitmap_l1,
+ unsigned long *msr_bitmap_l0,
+ u32 msr, int types)
+{
+ if (types & MSR_TYPE_R)
+ nested_vmx_set_msr_read_intercept(vmx, msr_bitmap_l1,
+ msr_bitmap_l0, msr);
+ if (types & MSR_TYPE_W)
+ nested_vmx_set_msr_write_intercept(vmx, msr_bitmap_l1,
+ msr_bitmap_l0, msr);
+}
+
/*
* Merge L0's and L1's MSR bitmap, return false to indicate that
* we do not use the hardware.
@@ -607,10 +587,11 @@ static inline void enable_x2apic_msr_intercepts(unsigned long *msr_bitmap)
static inline bool nested_vmx_prepare_msr_bitmap(struct kvm_vcpu *vcpu,
struct vmcs12 *vmcs12)
{
+ struct vcpu_vmx *vmx = to_vmx(vcpu);
int msr;
unsigned long *msr_bitmap_l1;
- unsigned long *msr_bitmap_l0 = to_vmx(vcpu)->nested.vmcs02.msr_bitmap;
- struct kvm_host_map *map = &to_vmx(vcpu)->nested.msr_bitmap_map;
+ unsigned long *msr_bitmap_l0 = vmx->nested.vmcs02.msr_bitmap;
+ struct kvm_host_map *map = &vmx->nested.msr_bitmap_map;
/* Nothing to do if the MSR bitmap is not in use. */
if (!cpu_has_vmx_msr_bitmap() ||
@@ -625,7 +606,7 @@ static inline bool nested_vmx_prepare_msr_bitmap(struct kvm_vcpu *vcpu,
/*
* To keep the control flow simple, pay eight 8-byte writes (sixteen
* 4-byte writes on 32-bit systems) up front to enable intercepts for
- * the x2APIC MSR range and selectively disable them below.
+ * the x2APIC MSR range and selectively toggle those relevant to L2.
*/
enable_x2apic_msr_intercepts(msr_bitmap_l0);
@@ -644,61 +625,44 @@ static inline bool nested_vmx_prepare_msr_bitmap(struct kvm_vcpu *vcpu,
}
}
- nested_vmx_disable_intercept_for_msr(
+ nested_vmx_disable_intercept_for_x2apic_msr(
msr_bitmap_l1, msr_bitmap_l0,
X2APIC_MSR(APIC_TASKPRI),
MSR_TYPE_R | MSR_TYPE_W);
if (nested_cpu_has_vid(vmcs12)) {
- nested_vmx_disable_intercept_for_msr(
+ nested_vmx_disable_intercept_for_x2apic_msr(
msr_bitmap_l1, msr_bitmap_l0,
X2APIC_MSR(APIC_EOI),
MSR_TYPE_W);
- nested_vmx_disable_intercept_for_msr(
+ nested_vmx_disable_intercept_for_x2apic_msr(
msr_bitmap_l1, msr_bitmap_l0,
X2APIC_MSR(APIC_SELF_IPI),
MSR_TYPE_W);
}
}
- /* KVM unconditionally exposes the FS/GS base MSRs to L1. */
+ /*
+ * Always check vmcs01's bitmap to honor userspace MSR filters and any
+ * other runtime changes to vmcs01's bitmap, e.g. dynamic pass-through.
+ */
#ifdef CONFIG_X86_64
- nested_vmx_disable_intercept_for_msr(msr_bitmap_l1, msr_bitmap_l0,
- MSR_FS_BASE, MSR_TYPE_RW);
+ nested_vmx_set_intercept_for_msr(vmx, msr_bitmap_l1, msr_bitmap_l0,
+ MSR_FS_BASE, MSR_TYPE_RW);
- nested_vmx_disable_intercept_for_msr(msr_bitmap_l1, msr_bitmap_l0,
- MSR_GS_BASE, MSR_TYPE_RW);
+ nested_vmx_set_intercept_for_msr(vmx, msr_bitmap_l1, msr_bitmap_l0,
+ MSR_GS_BASE, MSR_TYPE_RW);
- nested_vmx_disable_intercept_for_msr(msr_bitmap_l1, msr_bitmap_l0,
- MSR_KERNEL_GS_BASE, MSR_TYPE_RW);
+ nested_vmx_set_intercept_for_msr(vmx, msr_bitmap_l1, msr_bitmap_l0,
+ MSR_KERNEL_GS_BASE, MSR_TYPE_RW);
#endif
+ nested_vmx_set_intercept_for_msr(vmx, msr_bitmap_l1, msr_bitmap_l0,
+ MSR_IA32_SPEC_CTRL, MSR_TYPE_RW);
- /*
- * Checking the L0->L1 bitmap is trying to verify two things:
- *
- * 1. L0 gave a permission to L1 to actually passthrough the MSR. This
- * ensures that we do not accidentally generate an L02 MSR bitmap
- * from the L12 MSR bitmap that is too permissive.
- * 2. That L1 or L2s have actually used the MSR. This avoids
- * unnecessarily merging of the bitmap if the MSR is unused. This
- * works properly because we only update the L01 MSR bitmap lazily.
- * So even if L0 should pass L1 these MSRs, the L01 bitmap is only
- * updated to reflect this when L1 (or its L2s) actually write to
- * the MSR.
- */
- if (!msr_write_intercepted_l01(vcpu, MSR_IA32_SPEC_CTRL))
- nested_vmx_disable_intercept_for_msr(
- msr_bitmap_l1, msr_bitmap_l0,
- MSR_IA32_SPEC_CTRL,
- MSR_TYPE_R | MSR_TYPE_W);
-
- if (!msr_write_intercepted_l01(vcpu, MSR_IA32_PRED_CMD))
- nested_vmx_disable_intercept_for_msr(
- msr_bitmap_l1, msr_bitmap_l0,
- MSR_IA32_PRED_CMD,
- MSR_TYPE_W);
+ nested_vmx_set_intercept_for_msr(vmx, msr_bitmap_l1, msr_bitmap_l0,
+ MSR_IA32_PRED_CMD, MSR_TYPE_W);
- kvm_vcpu_unmap(vcpu, &to_vmx(vcpu)->nested.msr_bitmap_map, false);
+ kvm_vcpu_unmap(vcpu, &vmx->nested.msr_bitmap_map, false);
return true;
}
@@ -706,33 +670,39 @@ static inline bool nested_vmx_prepare_msr_bitmap(struct kvm_vcpu *vcpu,
static void nested_cache_shadow_vmcs12(struct kvm_vcpu *vcpu,
struct vmcs12 *vmcs12)
{
- struct kvm_host_map map;
- struct vmcs12 *shadow;
+ struct vcpu_vmx *vmx = to_vmx(vcpu);
+ struct gfn_to_hva_cache *ghc = &vmx->nested.shadow_vmcs12_cache;
if (!nested_cpu_has_shadow_vmcs(vmcs12) ||
vmcs12->vmcs_link_pointer == INVALID_GPA)
return;
- shadow = get_shadow_vmcs12(vcpu);
-
- if (kvm_vcpu_map(vcpu, gpa_to_gfn(vmcs12->vmcs_link_pointer), &map))
+ if (ghc->gpa != vmcs12->vmcs_link_pointer &&
+ kvm_gfn_to_hva_cache_init(vcpu->kvm, ghc,
+ vmcs12->vmcs_link_pointer, VMCS12_SIZE))
return;
- memcpy(shadow, map.hva, VMCS12_SIZE);
- kvm_vcpu_unmap(vcpu, &map, false);
+ kvm_read_guest_cached(vmx->vcpu.kvm, ghc, get_shadow_vmcs12(vcpu),
+ VMCS12_SIZE);
}
static void nested_flush_cached_shadow_vmcs12(struct kvm_vcpu *vcpu,
struct vmcs12 *vmcs12)
{
struct vcpu_vmx *vmx = to_vmx(vcpu);
+ struct gfn_to_hva_cache *ghc = &vmx->nested.shadow_vmcs12_cache;
if (!nested_cpu_has_shadow_vmcs(vmcs12) ||
vmcs12->vmcs_link_pointer == INVALID_GPA)
return;
- kvm_write_guest(vmx->vcpu.kvm, vmcs12->vmcs_link_pointer,
- get_shadow_vmcs12(vcpu), VMCS12_SIZE);
+ if (ghc->gpa != vmcs12->vmcs_link_pointer &&
+ kvm_gfn_to_hva_cache_init(vcpu->kvm, ghc,
+ vmcs12->vmcs_link_pointer, VMCS12_SIZE))
+ return;
+
+ kvm_write_guest_cached(vmx->vcpu.kvm, ghc, get_shadow_vmcs12(vcpu),
+ VMCS12_SIZE);
}
/*
@@ -1192,29 +1162,26 @@ static void nested_vmx_transition_tlb_flush(struct kvm_vcpu *vcpu,
WARN_ON(!enable_vpid);
/*
- * If VPID is enabled and used by vmc12, but L2 does not have a unique
- * TLB tag (ASID), i.e. EPT is disabled and KVM was unable to allocate
- * a VPID for L2, flush the current context as the effective ASID is
- * common to both L1 and L2.
- *
- * Defer the flush so that it runs after vmcs02.EPTP has been set by
- * KVM_REQ_LOAD_MMU_PGD (if nested EPT is enabled) and to avoid
- * redundant flushes further down the nested pipeline.
- *
- * If a TLB flush isn't required due to any of the above, and vpid12 is
- * changing then the new "virtual" VPID (vpid12) will reuse the same
- * "real" VPID (vpid02), and so needs to be flushed. There's no direct
- * mapping between vpid02 and vpid12, vpid02 is per-vCPU and reused for
- * all nested vCPUs. Remember, a flush on VM-Enter does not invalidate
- * guest-physical mappings, so there is no need to sync the nEPT MMU.
+ * VPID is enabled and in use by vmcs12. If vpid12 is changing, then
+ * emulate a guest TLB flush as KVM does not track vpid12 history nor
+ * is the VPID incorporated into the MMU context. I.e. KVM must assume
+ * that the new vpid12 has never been used and thus represents a new
+ * guest ASID that cannot have entries in the TLB.
*/
- if (!nested_has_guest_tlb_tag(vcpu)) {
- kvm_make_request(KVM_REQ_TLB_FLUSH_CURRENT, vcpu);
- } else if (is_vmenter &&
- vmcs12->virtual_processor_id != vmx->nested.last_vpid) {
+ if (is_vmenter && vmcs12->virtual_processor_id != vmx->nested.last_vpid) {
vmx->nested.last_vpid = vmcs12->virtual_processor_id;
- vpid_sync_context(nested_get_vpid02(vcpu));
+ kvm_make_request(KVM_REQ_TLB_FLUSH_GUEST, vcpu);
+ return;
}
+
+ /*
+ * If VPID is enabled, used by vmc12, and vpid12 is not changing but
+ * does not have a unique TLB tag (ASID), i.e. EPT is disabled and
+ * KVM was unable to allocate a VPID for L2, flush the current context
+ * as the effective ASID is common to both L1 and L2.
+ */
+ if (!nested_has_guest_tlb_tag(vcpu))
+ kvm_make_request(KVM_REQ_TLB_FLUSH_CURRENT, vcpu);
}
static bool is_bitwise_subset(u64 superset, u64 subset, u64 mask)
@@ -2866,6 +2833,17 @@ static int nested_vmx_check_controls(struct kvm_vcpu *vcpu,
return 0;
}
+static int nested_vmx_check_address_space_size(struct kvm_vcpu *vcpu,
+ struct vmcs12 *vmcs12)
+{
+#ifdef CONFIG_X86_64
+ if (CC(!!(vmcs12->vm_exit_controls & VM_EXIT_HOST_ADDR_SPACE_SIZE) !=
+ !!(vcpu->arch.efer & EFER_LMA)))
+ return -EINVAL;
+#endif
+ return 0;
+}
+
static int nested_vmx_check_host_state(struct kvm_vcpu *vcpu,
struct vmcs12 *vmcs12)
{
@@ -2890,18 +2868,16 @@ static int nested_vmx_check_host_state(struct kvm_vcpu *vcpu,
return -EINVAL;
#ifdef CONFIG_X86_64
- ia32e = !!(vcpu->arch.efer & EFER_LMA);
+ ia32e = !!(vmcs12->vm_exit_controls & VM_EXIT_HOST_ADDR_SPACE_SIZE);
#else
ia32e = false;
#endif
if (ia32e) {
- if (CC(!(vmcs12->vm_exit_controls & VM_EXIT_HOST_ADDR_SPACE_SIZE)) ||
- CC(!(vmcs12->host_cr4 & X86_CR4_PAE)))
+ if (CC(!(vmcs12->host_cr4 & X86_CR4_PAE)))
return -EINVAL;
} else {
- if (CC(vmcs12->vm_exit_controls & VM_EXIT_HOST_ADDR_SPACE_SIZE) ||
- CC(vmcs12->vm_entry_controls & VM_ENTRY_IA32E_MODE) ||
+ if (CC(vmcs12->vm_entry_controls & VM_ENTRY_IA32E_MODE) ||
CC(vmcs12->host_cr4 & X86_CR4_PCIDE) ||
CC((vmcs12->host_rip) >> 32))
return -EINVAL;
@@ -2946,9 +2922,9 @@ static int nested_vmx_check_host_state(struct kvm_vcpu *vcpu,
static int nested_vmx_check_vmcs_link_ptr(struct kvm_vcpu *vcpu,
struct vmcs12 *vmcs12)
{
- int r = 0;
- struct vmcs12 *shadow;
- struct kvm_host_map map;
+ struct vcpu_vmx *vmx = to_vmx(vcpu);
+ struct gfn_to_hva_cache *ghc = &vmx->nested.shadow_vmcs12_cache;
+ struct vmcs_hdr hdr;
if (vmcs12->vmcs_link_pointer == INVALID_GPA)
return 0;
@@ -2956,17 +2932,21 @@ static int nested_vmx_check_vmcs_link_ptr(struct kvm_vcpu *vcpu,
if (CC(!page_address_valid(vcpu, vmcs12->vmcs_link_pointer)))
return -EINVAL;
- if (CC(kvm_vcpu_map(vcpu, gpa_to_gfn(vmcs12->vmcs_link_pointer), &map)))
- return -EINVAL;
+ if (ghc->gpa != vmcs12->vmcs_link_pointer &&
+ CC(kvm_gfn_to_hva_cache_init(vcpu->kvm, ghc,
+ vmcs12->vmcs_link_pointer, VMCS12_SIZE)))
+ return -EINVAL;
- shadow = map.hva;
+ if (CC(kvm_read_guest_offset_cached(vcpu->kvm, ghc, &hdr,
+ offsetof(struct vmcs12, hdr),
+ sizeof(hdr))))
+ return -EINVAL;
- if (CC(shadow->hdr.revision_id != VMCS12_REVISION) ||
- CC(shadow->hdr.shadow_vmcs != nested_cpu_has_shadow_vmcs(vmcs12)))
- r = -EINVAL;
+ if (CC(hdr.revision_id != VMCS12_REVISION) ||
+ CC(hdr.shadow_vmcs != nested_cpu_has_shadow_vmcs(vmcs12)))
+ return -EINVAL;
- kvm_vcpu_unmap(vcpu, &map, false);
- return r;
+ return 0;
}
/*
@@ -3361,8 +3341,7 @@ enum nvmx_vmentry_status nested_vmx_enter_non_root_mode(struct kvm_vcpu *vcpu,
};
u32 failed_index;
- if (kvm_check_request(KVM_REQ_TLB_FLUSH_CURRENT, vcpu))
- kvm_vcpu_flush_tlb_current(vcpu);
+ kvm_service_local_tlb_flush_requests(vcpu);
evaluate_pending_interrupts = exec_controls_get(vmx) &
(CPU_BASED_INTR_WINDOW_EXITING | CPU_BASED_NMI_WINDOW_EXITING);
@@ -3571,6 +3550,9 @@ static int nested_vmx_run(struct kvm_vcpu *vcpu, bool launch)
if (nested_vmx_check_controls(vcpu, vmcs12))
return nested_vmx_fail(vcpu, VMXERR_ENTRY_INVALID_CONTROL_FIELD);
+ if (nested_vmx_check_address_space_size(vcpu, vmcs12))
+ return nested_vmx_fail(vcpu, VMXERR_ENTRY_INVALID_HOST_STATE_FIELD);
+
if (nested_vmx_check_host_state(vcpu, vmcs12))
return nested_vmx_fail(vcpu, VMXERR_ENTRY_INVALID_HOST_STATE_FIELD);
@@ -4516,9 +4498,8 @@ void nested_vmx_vmexit(struct kvm_vcpu *vcpu, u32 vm_exit_reason,
(void)nested_get_evmcs_page(vcpu);
}
- /* Service the TLB flush request for L2 before switching to L1. */
- if (kvm_check_request(KVM_REQ_TLB_FLUSH_CURRENT, vcpu))
- kvm_vcpu_flush_tlb_current(vcpu);
+ /* Service pending TLB flush requests for L2 before switching to L1. */
+ kvm_service_local_tlb_flush_requests(vcpu);
/*
* VCPU_EXREG_PDPTR will be clobbered in arch/x86/kvm/vmx/vmx.h between
@@ -4871,6 +4852,7 @@ static int enter_vmx_operation(struct kvm_vcpu *vcpu)
if (!vmx->nested.cached_vmcs12)
goto out_cached_vmcs12;
+ vmx->nested.shadow_vmcs12_cache.gpa = INVALID_GPA;
vmx->nested.cached_shadow_vmcs12 = kzalloc(VMCS12_SIZE, GFP_KERNEL_ACCOUNT);
if (!vmx->nested.cached_shadow_vmcs12)
goto out_cached_shadow_vmcs12;
@@ -5300,10 +5282,10 @@ static int handle_vmptrld(struct kvm_vcpu *vcpu)
return 1;
if (vmx->nested.current_vmptr != vmptr) {
- struct kvm_host_map map;
- struct vmcs12 *new_vmcs12;
+ struct gfn_to_hva_cache *ghc = &vmx->nested.vmcs12_cache;
+ struct vmcs_hdr hdr;
- if (kvm_vcpu_map(vcpu, gpa_to_gfn(vmptr), &map)) {
+ if (kvm_gfn_to_hva_cache_init(vcpu->kvm, ghc, vmptr, VMCS12_SIZE)) {
/*
* Reads from an unbacked page return all 1s,
* which means that the 32 bits located at the
@@ -5314,12 +5296,16 @@ static int handle_vmptrld(struct kvm_vcpu *vcpu)
VMXERR_VMPTRLD_INCORRECT_VMCS_REVISION_ID);
}
- new_vmcs12 = map.hva;
+ if (kvm_read_guest_offset_cached(vcpu->kvm, ghc, &hdr,
+ offsetof(struct vmcs12, hdr),
+ sizeof(hdr))) {
+ return nested_vmx_fail(vcpu,
+ VMXERR_VMPTRLD_INCORRECT_VMCS_REVISION_ID);
+ }
- if (new_vmcs12->hdr.revision_id != VMCS12_REVISION ||
- (new_vmcs12->hdr.shadow_vmcs &&
+ if (hdr.revision_id != VMCS12_REVISION ||
+ (hdr.shadow_vmcs &&
!nested_cpu_has_vmx_shadow_vmcs(vcpu))) {
- kvm_vcpu_unmap(vcpu, &map, false);
return nested_vmx_fail(vcpu,
VMXERR_VMPTRLD_INCORRECT_VMCS_REVISION_ID);
}
@@ -5330,8 +5316,11 @@ static int handle_vmptrld(struct kvm_vcpu *vcpu)
* Load VMCS12 from guest memory since it is not already
* cached.
*/
- memcpy(vmx->nested.cached_vmcs12, new_vmcs12, VMCS12_SIZE);
- kvm_vcpu_unmap(vcpu, &map, false);
+ if (kvm_read_guest_cached(vcpu->kvm, ghc, vmx->nested.cached_vmcs12,
+ VMCS12_SIZE)) {
+ return nested_vmx_fail(vcpu,
+ VMXERR_VMPTRLD_INCORRECT_VMCS_REVISION_ID);
+ }
set_current_vmptr(vmx, vmptr);
}
@@ -5379,7 +5368,7 @@ static int handle_invept(struct kvm_vcpu *vcpu)
struct {
u64 eptp, gpa;
} operand;
- int i, r;
+ int i, r, gpr_index;
if (!(vmx->nested.msrs.secondary_ctls_high &
SECONDARY_EXEC_ENABLE_EPT) ||
@@ -5392,7 +5381,8 @@ static int handle_invept(struct kvm_vcpu *vcpu)
return 1;
vmx_instruction_info = vmcs_read32(VMX_INSTRUCTION_INFO);
- type = kvm_register_read(vcpu, (vmx_instruction_info >> 28) & 0xf);
+ gpr_index = vmx_get_instr_info_reg2(vmx_instruction_info);
+ type = kvm_register_read(vcpu, gpr_index);
types = (vmx->nested.msrs.ept_caps >> VMX_EPT_EXTENT_SHIFT) & 6;
@@ -5459,7 +5449,7 @@ static int handle_invvpid(struct kvm_vcpu *vcpu)
u64 gla;
} operand;
u16 vpid02;
- int r;
+ int r, gpr_index;
if (!(vmx->nested.msrs.secondary_ctls_high &
SECONDARY_EXEC_ENABLE_VPID) ||
@@ -5472,7 +5462,8 @@ static int handle_invvpid(struct kvm_vcpu *vcpu)
return 1;
vmx_instruction_info = vmcs_read32(VMX_INSTRUCTION_INFO);
- type = kvm_register_read(vcpu, (vmx_instruction_info >> 28) & 0xf);
+ gpr_index = vmx_get_instr_info_reg2(vmx_instruction_info);
+ type = kvm_register_read(vcpu, gpr_index);
types = (vmx->nested.msrs.vpid_caps &
VMX_VPID_EXTENT_SUPPORTED_MASK) >> 8;
diff --git a/arch/x86/kvm/vmx/pmu_intel.c b/arch/x86/kvm/vmx/pmu_intel.c
index b8e0d21b7c8a..1b7456b2177b 100644
--- a/arch/x86/kvm/vmx/pmu_intel.c
+++ b/arch/x86/kvm/vmx/pmu_intel.c
@@ -118,16 +118,15 @@ static struct kvm_pmc *intel_pmc_idx_to_pmc(struct kvm_pmu *pmu, int pmc_idx)
}
}
-/* returns 0 if idx's corresponding MSR exists; otherwise returns 1. */
-static int intel_is_valid_rdpmc_ecx(struct kvm_vcpu *vcpu, unsigned int idx)
+static bool intel_is_valid_rdpmc_ecx(struct kvm_vcpu *vcpu, unsigned int idx)
{
struct kvm_pmu *pmu = vcpu_to_pmu(vcpu);
bool fixed = idx & (1u << 30);
idx &= ~(3u << 30);
- return (!fixed && idx >= pmu->nr_arch_gp_counters) ||
- (fixed && idx >= pmu->nr_arch_fixed_counters);
+ return fixed ? idx < pmu->nr_arch_fixed_counters
+ : idx < pmu->nr_arch_gp_counters;
}
static struct kvm_pmc *intel_rdpmc_ecx_to_pmc(struct kvm_vcpu *vcpu,
diff --git a/arch/x86/kvm/vmx/posted_intr.c b/arch/x86/kvm/vmx/posted_intr.c
index 5f81ef092bd4..1c94783b5a54 100644
--- a/arch/x86/kvm/vmx/posted_intr.c
+++ b/arch/x86/kvm/vmx/posted_intr.c
@@ -5,6 +5,7 @@
#include <asm/cpu.h>
#include "lapic.h"
+#include "irq.h"
#include "posted_intr.h"
#include "trace.h"
#include "vmx.h"
@@ -77,13 +78,18 @@ after_clear_sn:
pi_set_on(pi_desc);
}
+static bool vmx_can_use_vtd_pi(struct kvm *kvm)
+{
+ return irqchip_in_kernel(kvm) && enable_apicv &&
+ kvm_arch_has_assigned_device(kvm) &&
+ irq_remapping_cap(IRQ_POSTING_CAP);
+}
+
void vmx_vcpu_pi_put(struct kvm_vcpu *vcpu)
{
struct pi_desc *pi_desc = vcpu_to_pi_desc(vcpu);
- if (!kvm_arch_has_assigned_device(vcpu->kvm) ||
- !irq_remapping_cap(IRQ_POSTING_CAP) ||
- !kvm_vcpu_apicv_active(vcpu))
+ if (!vmx_can_use_vtd_pi(vcpu->kvm))
return;
/* Set SN when the vCPU is preempted */
@@ -141,9 +147,7 @@ int pi_pre_block(struct kvm_vcpu *vcpu)
struct pi_desc old, new;
struct pi_desc *pi_desc = vcpu_to_pi_desc(vcpu);
- if (!kvm_arch_has_assigned_device(vcpu->kvm) ||
- !irq_remapping_cap(IRQ_POSTING_CAP) ||
- !kvm_vcpu_apicv_active(vcpu))
+ if (!vmx_can_use_vtd_pi(vcpu->kvm))
return 0;
WARN_ON(irqs_disabled());
@@ -270,9 +274,7 @@ int pi_update_irte(struct kvm *kvm, unsigned int host_irq, uint32_t guest_irq,
struct vcpu_data vcpu_info;
int idx, ret = 0;
- if (!kvm_arch_has_assigned_device(kvm) ||
- !irq_remapping_cap(IRQ_POSTING_CAP) ||
- !kvm_vcpu_apicv_active(kvm->vcpus[0]))
+ if (!vmx_can_use_vtd_pi(kvm))
return 0;
idx = srcu_read_lock(&kvm->irq_srcu);
diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c
index 76861b66bbcf..f90448809690 100644
--- a/arch/x86/kvm/vmx/vmx.c
+++ b/arch/x86/kvm/vmx/vmx.c
@@ -769,24 +769,13 @@ void vmx_update_exception_bitmap(struct kvm_vcpu *vcpu)
/*
* Check if MSR is intercepted for currently loaded MSR bitmap.
*/
-static bool msr_write_intercepted(struct kvm_vcpu *vcpu, u32 msr)
+static bool msr_write_intercepted(struct vcpu_vmx *vmx, u32 msr)
{
- unsigned long *msr_bitmap;
- int f = sizeof(unsigned long);
-
- if (!cpu_has_vmx_msr_bitmap())
+ if (!(exec_controls_get(vmx) & CPU_BASED_USE_MSR_BITMAPS))
return true;
- msr_bitmap = to_vmx(vcpu)->loaded_vmcs->msr_bitmap;
-
- if (msr <= 0x1fff) {
- return !!test_bit(msr, msr_bitmap + 0x800 / f);
- } else if ((msr >= 0xc0000000) && (msr <= 0xc0001fff)) {
- msr &= 0x1fff;
- return !!test_bit(msr, msr_bitmap + 0xc00 / f);
- }
-
- return true;
+ return vmx_test_msr_bitmap_write(vmx->loaded_vmcs->msr_bitmap,
+ MSR_IA32_SPEC_CTRL);
}
static void clear_atomic_switch_msr_special(struct vcpu_vmx *vmx,
@@ -2929,6 +2918,13 @@ static void vmx_flush_tlb_all(struct kvm_vcpu *vcpu)
}
}
+static inline int vmx_get_current_vpid(struct kvm_vcpu *vcpu)
+{
+ if (is_guest_mode(vcpu))
+ return nested_get_vpid02(vcpu);
+ return to_vmx(vcpu)->vpid;
+}
+
static void vmx_flush_tlb_current(struct kvm_vcpu *vcpu)
{
struct kvm_mmu *mmu = vcpu->arch.mmu;
@@ -2941,31 +2937,29 @@ static void vmx_flush_tlb_current(struct kvm_vcpu *vcpu)
if (enable_ept)
ept_sync_context(construct_eptp(vcpu, root_hpa,
mmu->shadow_root_level));
- else if (!is_guest_mode(vcpu))
- vpid_sync_context(to_vmx(vcpu)->vpid);
else
- vpid_sync_context(nested_get_vpid02(vcpu));
+ vpid_sync_context(vmx_get_current_vpid(vcpu));
}
static void vmx_flush_tlb_gva(struct kvm_vcpu *vcpu, gva_t addr)
{
/*
- * vpid_sync_vcpu_addr() is a nop if vmx->vpid==0, see the comment in
+ * vpid_sync_vcpu_addr() is a nop if vpid==0, see the comment in
* vmx_flush_tlb_guest() for an explanation of why this is ok.
*/
- vpid_sync_vcpu_addr(to_vmx(vcpu)->vpid, addr);
+ vpid_sync_vcpu_addr(vmx_get_current_vpid(vcpu), addr);
}
static void vmx_flush_tlb_guest(struct kvm_vcpu *vcpu)
{
/*
- * vpid_sync_context() is a nop if vmx->vpid==0, e.g. if enable_vpid==0
- * or a vpid couldn't be allocated for this vCPU. VM-Enter and VM-Exit
- * are required to flush GVA->{G,H}PA mappings from the TLB if vpid is
+ * vpid_sync_context() is a nop if vpid==0, e.g. if enable_vpid==0 or a
+ * vpid couldn't be allocated for this vCPU. VM-Enter and VM-Exit are
+ * required to flush GVA->{G,H}PA mappings from the TLB if vpid is
* disabled (VM-Enter with vpid enabled and vpid==0 is disallowed),
* i.e. no explicit INVVPID is necessary.
*/
- vpid_sync_context(to_vmx(vcpu)->vpid);
+ vpid_sync_context(vmx_get_current_vpid(vcpu));
}
void vmx_ept_load_pdptrs(struct kvm_vcpu *vcpu)
@@ -3697,46 +3691,6 @@ void free_vpid(int vpid)
spin_unlock(&vmx_vpid_lock);
}
-static void vmx_clear_msr_bitmap_read(ulong *msr_bitmap, u32 msr)
-{
- int f = sizeof(unsigned long);
-
- if (msr <= 0x1fff)
- __clear_bit(msr, msr_bitmap + 0x000 / f);
- else if ((msr >= 0xc0000000) && (msr <= 0xc0001fff))
- __clear_bit(msr & 0x1fff, msr_bitmap + 0x400 / f);
-}
-
-static void vmx_clear_msr_bitmap_write(ulong *msr_bitmap, u32 msr)
-{
- int f = sizeof(unsigned long);
-
- if (msr <= 0x1fff)
- __clear_bit(msr, msr_bitmap + 0x800 / f);
- else if ((msr >= 0xc0000000) && (msr <= 0xc0001fff))
- __clear_bit(msr & 0x1fff, msr_bitmap + 0xc00 / f);
-}
-
-static void vmx_set_msr_bitmap_read(ulong *msr_bitmap, u32 msr)
-{
- int f = sizeof(unsigned long);
-
- if (msr <= 0x1fff)
- __set_bit(msr, msr_bitmap + 0x000 / f);
- else if ((msr >= 0xc0000000) && (msr <= 0xc0001fff))
- __set_bit(msr & 0x1fff, msr_bitmap + 0x400 / f);
-}
-
-static void vmx_set_msr_bitmap_write(ulong *msr_bitmap, u32 msr)
-{
- int f = sizeof(unsigned long);
-
- if (msr <= 0x1fff)
- __set_bit(msr, msr_bitmap + 0x800 / f);
- else if ((msr >= 0xc0000000) && (msr <= 0xc0001fff))
- __set_bit(msr & 0x1fff, msr_bitmap + 0xc00 / f);
-}
-
void vmx_disable_intercept_for_msr(struct kvm_vcpu *vcpu, u32 msr, int type)
{
struct vcpu_vmx *vmx = to_vmx(vcpu);
@@ -5494,6 +5448,7 @@ static int handle_invpcid(struct kvm_vcpu *vcpu)
u64 pcid;
u64 gla;
} operand;
+ int gpr_index;
if (!guest_cpuid_has(vcpu, X86_FEATURE_INVPCID)) {
kvm_queue_exception(vcpu, UD_VECTOR);
@@ -5501,12 +5456,8 @@ static int handle_invpcid(struct kvm_vcpu *vcpu)
}
vmx_instruction_info = vmcs_read32(VMX_INSTRUCTION_INFO);
- type = kvm_register_read(vcpu, (vmx_instruction_info >> 28) & 0xf);
-
- if (type > 3) {
- kvm_inject_gp(vcpu, 0);
- return 1;
- }
+ gpr_index = vmx_get_instr_info_reg2(vmx_instruction_info);
+ type = kvm_register_read(vcpu, gpr_index);
/* According to the Intel instruction reference, the memory operand
* is read even if it isn't needed (e.g., for type==all)
@@ -6316,9 +6267,9 @@ static int vmx_sync_pir_to_irr(struct kvm_vcpu *vcpu)
{
struct vcpu_vmx *vmx = to_vmx(vcpu);
int max_irr;
- bool max_irr_updated;
+ bool got_posted_interrupt;
- if (KVM_BUG_ON(!vcpu->arch.apicv_active, vcpu->kvm))
+ if (KVM_BUG_ON(!enable_apicv, vcpu->kvm))
return -EIO;
if (pi_test_on(&vmx->pi_desc)) {
@@ -6328,22 +6279,33 @@ static int vmx_sync_pir_to_irr(struct kvm_vcpu *vcpu)
* But on x86 this is just a compiler barrier anyway.
*/
smp_mb__after_atomic();
- max_irr_updated =
+ got_posted_interrupt =
kvm_apic_update_irr(vcpu, vmx->pi_desc.pir, &max_irr);
-
- /*
- * If we are running L2 and L1 has a new pending interrupt
- * which can be injected, this may cause a vmexit or it may
- * be injected into L2. Either way, this interrupt will be
- * processed via KVM_REQ_EVENT, not RVI, because we do not use
- * virtual interrupt delivery to inject L1 interrupts into L2.
- */
- if (is_guest_mode(vcpu) && max_irr_updated)
- kvm_make_request(KVM_REQ_EVENT, vcpu);
} else {
max_irr = kvm_lapic_find_highest_irr(vcpu);
+ got_posted_interrupt = false;
}
- vmx_hwapic_irr_update(vcpu, max_irr);
+
+ /*
+ * Newly recognized interrupts are injected via either virtual interrupt
+ * delivery (RVI) or KVM_REQ_EVENT. Virtual interrupt delivery is
+ * disabled in two cases:
+ *
+ * 1) If L2 is running and the vCPU has a new pending interrupt. If L1
+ * wants to exit on interrupts, KVM_REQ_EVENT is needed to synthesize a
+ * VM-Exit to L1. If L1 doesn't want to exit, the interrupt is injected
+ * into L2, but KVM doesn't use virtual interrupt delivery to inject
+ * interrupts into L2, and so KVM_REQ_EVENT is again needed.
+ *
+ * 2) If APICv is disabled for this vCPU, assigned devices may still
+ * attempt to post interrupts. The posted interrupt vector will cause
+ * a VM-Exit and the subsequent entry will call sync_pir_to_irr.
+ */
+ if (!is_guest_mode(vcpu) && kvm_vcpu_apicv_active(vcpu))
+ vmx_set_rvi(max_irr);
+ else if (got_posted_interrupt)
+ kvm_make_request(KVM_REQ_EVENT, vcpu);
+
return max_irr;
}
@@ -6749,7 +6711,7 @@ static fastpath_t vmx_vcpu_run(struct kvm_vcpu *vcpu)
* If the L02 MSR bitmap does not intercept the MSR, then we need to
* save it.
*/
- if (unlikely(!msr_write_intercepted(vcpu, MSR_IA32_SPEC_CTRL)))
+ if (unlikely(!msr_write_intercepted(vmx, MSR_IA32_SPEC_CTRL)))
vmx->spec_ctrl = native_read_msr(MSR_IA32_SPEC_CTRL);
x86_spec_ctrl_restore_host(vmx->spec_ctrl, 0);
@@ -7563,7 +7525,8 @@ static void hardware_unsetup(void)
static bool vmx_check_apicv_inhibit_reasons(ulong bit)
{
ulong supported = BIT(APICV_INHIBIT_REASON_DISABLE) |
- BIT(APICV_INHIBIT_REASON_HYPERV);
+ BIT(APICV_INHIBIT_REASON_HYPERV) |
+ BIT(APICV_INHIBIT_REASON_BLOCKIRQ);
return supported & BIT(bit);
}
@@ -7814,10 +7777,10 @@ static __init int hardware_setup(void)
ple_window_shrink = 0;
}
- if (!cpu_has_vmx_apicv()) {
+ if (!cpu_has_vmx_apicv())
enable_apicv = 0;
+ if (!enable_apicv)
vmx_x86_ops.sync_pir_to_irr = NULL;
- }
if (cpu_has_vmx_tsc_scaling()) {
kvm_has_tsc_control = true;
diff --git a/arch/x86/kvm/vmx/vmx.h b/arch/x86/kvm/vmx/vmx.h
index e7db42e3b0ce..4df2ac24ffc1 100644
--- a/arch/x86/kvm/vmx/vmx.h
+++ b/arch/x86/kvm/vmx/vmx.h
@@ -142,6 +142,16 @@ struct nested_vmx {
struct vmcs12 *cached_shadow_vmcs12;
/*
+ * GPA to HVA cache for accessing vmcs12->vmcs_link_pointer
+ */
+ struct gfn_to_hva_cache shadow_vmcs12_cache;
+
+ /*
+ * GPA to HVA cache for VMCS12
+ */
+ struct gfn_to_hva_cache vmcs12_cache;
+
+ /*
* Indicates if the shadow vmcs or enlightened vmcs must be updated
* with the data held by struct vmcs12.
*/
@@ -400,6 +410,34 @@ static inline void vmx_set_intercept_for_msr(struct kvm_vcpu *vcpu, u32 msr,
void vmx_update_cpu_dirty_logging(struct kvm_vcpu *vcpu);
+/*
+ * Note, early Intel manuals have the write-low and read-high bitmap offsets
+ * the wrong way round. The bitmaps control MSRs 0x00000000-0x00001fff and
+ * 0xc0000000-0xc0001fff. The former (low) uses bytes 0-0x3ff for reads and
+ * 0x800-0xbff for writes. The latter (high) uses 0x400-0x7ff for reads and
+ * 0xc00-0xfff for writes. MSRs not covered by either of the ranges always
+ * VM-Exit.
+ */
+#define __BUILD_VMX_MSR_BITMAP_HELPER(rtype, action, bitop, access, base) \
+static inline rtype vmx_##action##_msr_bitmap_##access(unsigned long *bitmap, \
+ u32 msr) \
+{ \
+ int f = sizeof(unsigned long); \
+ \
+ if (msr <= 0x1fff) \
+ return bitop##_bit(msr, bitmap + base / f); \
+ else if ((msr >= 0xc0000000) && (msr <= 0xc0001fff)) \
+ return bitop##_bit(msr & 0x1fff, bitmap + (base + 0x400) / f); \
+ return (rtype)true; \
+}
+#define BUILD_VMX_MSR_BITMAP_HELPERS(ret_type, action, bitop) \
+ __BUILD_VMX_MSR_BITMAP_HELPER(ret_type, action, bitop, read, 0x0) \
+ __BUILD_VMX_MSR_BITMAP_HELPER(ret_type, action, bitop, write, 0x800)
+
+BUILD_VMX_MSR_BITMAP_HELPERS(bool, test, test)
+BUILD_VMX_MSR_BITMAP_HELPERS(void, clear, __clear)
+BUILD_VMX_MSR_BITMAP_HELPERS(void, set, __set)
+
static inline u8 vmx_get_rvi(void)
{
return vmcs_read16(GUEST_INTR_STATUS) & 0xff;
@@ -522,4 +560,9 @@ static inline bool vmx_guest_state_valid(struct kvm_vcpu *vcpu)
void dump_vmcs(struct kvm_vcpu *vcpu);
+static inline int vmx_get_instr_info_reg2(u32 vmx_instr_info)
+{
+ return (vmx_instr_info >> 28) & 0xf;
+}
+
#endif /* __KVM_X86_VMX_H */
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index c1c4e2b05a63..0ee1a039b490 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -3258,10 +3258,36 @@ static void kvm_vcpu_flush_tlb_guest(struct kvm_vcpu *vcpu)
static_call(kvm_x86_tlb_flush_guest)(vcpu);
}
+
+static inline void kvm_vcpu_flush_tlb_current(struct kvm_vcpu *vcpu)
+{
+ ++vcpu->stat.tlb_flush;
+ static_call(kvm_x86_tlb_flush_current)(vcpu);
+}
+
+/*
+ * Service "local" TLB flush requests, which are specific to the current MMU
+ * context. In addition to the generic event handling in vcpu_enter_guest(),
+ * TLB flushes that are targeted at an MMU context also need to be serviced
+ * prior before nested VM-Enter/VM-Exit.
+ */
+void kvm_service_local_tlb_flush_requests(struct kvm_vcpu *vcpu)
+{
+ if (kvm_check_request(KVM_REQ_TLB_FLUSH_CURRENT, vcpu))
+ kvm_vcpu_flush_tlb_current(vcpu);
+
+ if (kvm_check_request(KVM_REQ_TLB_FLUSH_GUEST, vcpu))
+ kvm_vcpu_flush_tlb_guest(vcpu);
+}
+EXPORT_SYMBOL_GPL(kvm_service_local_tlb_flush_requests);
+
static void record_steal_time(struct kvm_vcpu *vcpu)
{
- struct kvm_host_map map;
- struct kvm_steal_time *st;
+ struct gfn_to_hva_cache *ghc = &vcpu->arch.st.cache;
+ struct kvm_steal_time __user *st;
+ struct kvm_memslots *slots;
+ u64 steal;
+ u32 version;
if (kvm_xen_msr_enabled(vcpu->kvm)) {
kvm_xen_runstate_set_running(vcpu);
@@ -3271,47 +3297,86 @@ static void record_steal_time(struct kvm_vcpu *vcpu)
if (!(vcpu->arch.st.msr_val & KVM_MSR_ENABLED))
return;
- /* -EAGAIN is returned in atomic context so we can just return. */
- if (kvm_map_gfn(vcpu, vcpu->arch.st.msr_val >> PAGE_SHIFT,
- &map, &vcpu->arch.st.cache, false))
+ if (WARN_ON_ONCE(current->mm != vcpu->kvm->mm))
return;
- st = map.hva +
- offset_in_page(vcpu->arch.st.msr_val & KVM_STEAL_VALID_BITS);
+ slots = kvm_memslots(vcpu->kvm);
+
+ if (unlikely(slots->generation != ghc->generation ||
+ kvm_is_error_hva(ghc->hva) || !ghc->memslot)) {
+ gfn_t gfn = vcpu->arch.st.msr_val & KVM_STEAL_VALID_BITS;
+
+ /* We rely on the fact that it fits in a single page. */
+ BUILD_BUG_ON((sizeof(*st) - 1) & KVM_STEAL_VALID_BITS);
+
+ if (kvm_gfn_to_hva_cache_init(vcpu->kvm, ghc, gfn, sizeof(*st)) ||
+ kvm_is_error_hva(ghc->hva) || !ghc->memslot)
+ return;
+ }
+ st = (struct kvm_steal_time __user *)ghc->hva;
/*
* Doing a TLB flush here, on the guest's behalf, can avoid
* expensive IPIs.
*/
if (guest_pv_has(vcpu, KVM_FEATURE_PV_TLB_FLUSH)) {
- u8 st_preempted = xchg(&st->preempted, 0);
+ u8 st_preempted = 0;
+ int err = -EFAULT;
+
+ if (!user_access_begin(st, sizeof(*st)))
+ return;
+
+ asm volatile("1: xchgb %0, %2\n"
+ "xor %1, %1\n"
+ "2:\n"
+ _ASM_EXTABLE_UA(1b, 2b)
+ : "+q" (st_preempted),
+ "+&r" (err),
+ "+m" (st->preempted));
+ if (err)
+ goto out;
+
+ user_access_end();
+
+ vcpu->arch.st.preempted = 0;
trace_kvm_pv_tlb_flush(vcpu->vcpu_id,
st_preempted & KVM_VCPU_FLUSH_TLB);
if (st_preempted & KVM_VCPU_FLUSH_TLB)
kvm_vcpu_flush_tlb_guest(vcpu);
+
+ if (!user_access_begin(st, sizeof(*st)))
+ goto dirty;
} else {
- st->preempted = 0;
- }
+ if (!user_access_begin(st, sizeof(*st)))
+ return;
- vcpu->arch.st.preempted = 0;
+ unsafe_put_user(0, &st->preempted, out);
+ vcpu->arch.st.preempted = 0;
+ }
- if (st->version & 1)
- st->version += 1; /* first time write, random junk */
+ unsafe_get_user(version, &st->version, out);
+ if (version & 1)
+ version += 1; /* first time write, random junk */
- st->version += 1;
+ version += 1;
+ unsafe_put_user(version, &st->version, out);
smp_wmb();
- st->steal += current->sched_info.run_delay -
+ unsafe_get_user(steal, &st->steal, out);
+ steal += current->sched_info.run_delay -
vcpu->arch.st.last_steal;
vcpu->arch.st.last_steal = current->sched_info.run_delay;
+ unsafe_put_user(steal, &st->steal, out);
- smp_wmb();
-
- st->version += 1;
+ version += 1;
+ unsafe_put_user(version, &st->version, out);
- kvm_unmap_gfn(vcpu, &map, &vcpu->arch.st.cache, true, false);
+ out:
+ user_access_end();
+ dirty:
+ mark_page_dirty_in_slot(vcpu->kvm, ghc->memslot, gpa_to_gfn(ghc->gpa));
}
int kvm_set_msr_common(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
@@ -3517,7 +3582,7 @@ int kvm_set_msr_common(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
if (!guest_pv_has(vcpu, KVM_FEATURE_PV_EOI))
return 1;
- if (kvm_lapic_enable_pv_eoi(vcpu, data, sizeof(u8)))
+ if (kvm_lapic_set_pv_eoi(vcpu, data, sizeof(u8)))
return 1;
break;
@@ -4091,6 +4156,7 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext)
case KVM_CAP_SGX_ATTRIBUTE:
#endif
case KVM_CAP_VM_COPY_ENC_CONTEXT_FROM:
+ case KVM_CAP_VM_MOVE_ENC_CONTEXT_FROM:
case KVM_CAP_SREGS2:
case KVM_CAP_EXIT_ON_EMULATION_FAILURE:
case KVM_CAP_VCPU_ATTRIBUTES:
@@ -4137,7 +4203,7 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext)
r = !static_call(kvm_x86_cpu_has_accelerated_tpr)();
break;
case KVM_CAP_NR_VCPUS:
- r = KVM_SOFT_MAX_VCPUS;
+ r = min_t(unsigned int, num_online_cpus(), KVM_MAX_VCPUS);
break;
case KVM_CAP_MAX_VCPUS:
r = KVM_MAX_VCPUS;
@@ -4351,8 +4417,10 @@ void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
static void kvm_steal_time_set_preempted(struct kvm_vcpu *vcpu)
{
- struct kvm_host_map map;
- struct kvm_steal_time *st;
+ struct gfn_to_hva_cache *ghc = &vcpu->arch.st.cache;
+ struct kvm_steal_time __user *st;
+ struct kvm_memslots *slots;
+ static const u8 preempted = KVM_VCPU_PREEMPTED;
if (!(vcpu->arch.st.msr_val & KVM_MSR_ENABLED))
return;
@@ -4360,16 +4428,23 @@ static void kvm_steal_time_set_preempted(struct kvm_vcpu *vcpu)
if (vcpu->arch.st.preempted)
return;
- if (kvm_map_gfn(vcpu, vcpu->arch.st.msr_val >> PAGE_SHIFT, &map,
- &vcpu->arch.st.cache, true))
+ /* This happens on process exit */
+ if (unlikely(current->mm != vcpu->kvm->mm))
+ return;
+
+ slots = kvm_memslots(vcpu->kvm);
+
+ if (unlikely(slots->generation != ghc->generation ||
+ kvm_is_error_hva(ghc->hva) || !ghc->memslot))
return;
- st = map.hva +
- offset_in_page(vcpu->arch.st.msr_val & KVM_STEAL_VALID_BITS);
+ st = (struct kvm_steal_time __user *)ghc->hva;
+ BUILD_BUG_ON(sizeof(st->preempted) != sizeof(preempted));
- st->preempted = vcpu->arch.st.preempted = KVM_VCPU_PREEMPTED;
+ if (!copy_to_user_nofault(&st->preempted, &preempted, sizeof(preempted)))
+ vcpu->arch.st.preempted = KVM_VCPU_PREEMPTED;
- kvm_unmap_gfn(vcpu, &map, &vcpu->arch.st.cache, true, true);
+ mark_page_dirty_in_slot(vcpu->kvm, ghc->memslot, gpa_to_gfn(ghc->gpa));
}
void kvm_arch_vcpu_put(struct kvm_vcpu *vcpu)
@@ -4397,8 +4472,7 @@ void kvm_arch_vcpu_put(struct kvm_vcpu *vcpu)
static int kvm_vcpu_ioctl_get_lapic(struct kvm_vcpu *vcpu,
struct kvm_lapic_state *s)
{
- if (vcpu->arch.apicv_active)
- static_call(kvm_x86_sync_pir_to_irr)(vcpu);
+ static_call_cond(kvm_x86_sync_pir_to_irr)(vcpu);
return kvm_apic_get_state(vcpu, s);
}
@@ -5073,6 +5147,17 @@ long kvm_arch_vcpu_ioctl(struct file *filp,
struct kvm_cpuid __user *cpuid_arg = argp;
struct kvm_cpuid cpuid;
+ /*
+ * KVM does not correctly handle changing guest CPUID after KVM_RUN, as
+ * MAXPHYADDR, GBPAGES support, AMD reserved bit behavior, etc.. aren't
+ * tracked in kvm_mmu_page_role. As a result, KVM may miss guest page
+ * faults due to reusing SPs/SPTEs. In practice no sane VMM mucks with
+ * the core vCPU model on the fly, so fail.
+ */
+ r = -EINVAL;
+ if (vcpu->arch.last_vmentry_cpu != -1)
+ goto out;
+
r = -EFAULT;
if (copy_from_user(&cpuid, cpuid_arg, sizeof(cpuid)))
goto out;
@@ -5083,6 +5168,14 @@ long kvm_arch_vcpu_ioctl(struct file *filp,
struct kvm_cpuid2 __user *cpuid_arg = argp;
struct kvm_cpuid2 cpuid;
+ /*
+ * KVM_SET_CPUID{,2} after KVM_RUN is forbidded, see the comment in
+ * KVM_SET_CPUID case above.
+ */
+ r = -EINVAL;
+ if (vcpu->arch.last_vmentry_cpu != -1)
+ goto out;
+
r = -EFAULT;
if (copy_from_user(&cpuid, cpuid_arg, sizeof(cpuid)))
goto out;
@@ -5728,6 +5821,12 @@ split_irqchip_unlock:
if (kvm_x86_ops.vm_copy_enc_context_from)
r = kvm_x86_ops.vm_copy_enc_context_from(kvm, cap->args[0]);
return r;
+ case KVM_CAP_VM_MOVE_ENC_CONTEXT_FROM:
+ r = -EINVAL;
+ if (kvm_x86_ops.vm_move_enc_context_from)
+ r = kvm_x86_ops.vm_move_enc_context_from(
+ kvm, cap->args[0]);
+ return r;
case KVM_CAP_EXIT_HYPERCALL:
if (cap->args[0] & ~KVM_EXIT_HYPERCALL_VALID_MASK) {
r = -EINVAL;
@@ -7328,7 +7427,9 @@ static void emulator_set_smbase(struct x86_emulate_ctxt *ctxt, u64 smbase)
static int emulator_check_pmc(struct x86_emulate_ctxt *ctxt,
u32 pmc)
{
- return kvm_pmu_is_valid_rdpmc_ecx(emul_to_vcpu(ctxt), pmc);
+ if (kvm_pmu_is_valid_rdpmc_ecx(emul_to_vcpu(ctxt), pmc))
+ return 0;
+ return -EINVAL;
}
static int emulator_read_pmc(struct x86_emulate_ctxt *ctxt,
@@ -8789,7 +8890,7 @@ int kvm_emulate_hypercall(struct kvm_vcpu *vcpu)
trace_kvm_hypercall(nr, a0, a1, a2, a3);
- op_64_bit = is_64_bit_mode(vcpu);
+ op_64_bit = is_64_bit_hypercall(vcpu);
if (!op_64_bit) {
nr &= 0xFFFFFFFF;
a0 &= 0xFFFFFFFF;
@@ -9469,8 +9570,7 @@ static void vcpu_scan_ioapic(struct kvm_vcpu *vcpu)
if (irqchip_split(vcpu->kvm))
kvm_scan_ioapic_routes(vcpu, vcpu->arch.ioapic_handled_vectors);
else {
- if (vcpu->arch.apicv_active)
- static_call(kvm_x86_sync_pir_to_irr)(vcpu);
+ static_call_cond(kvm_x86_sync_pir_to_irr)(vcpu);
if (ioapic_in_kernel(vcpu->kvm))
kvm_ioapic_scan_entry(vcpu, vcpu->arch.ioapic_handled_vectors);
}
@@ -9488,12 +9588,16 @@ static void vcpu_load_eoi_exitmap(struct kvm_vcpu *vcpu)
if (!kvm_apic_hw_enabled(vcpu->arch.apic))
return;
- if (to_hv_vcpu(vcpu))
+ if (to_hv_vcpu(vcpu)) {
bitmap_or((ulong *)eoi_exit_bitmap,
vcpu->arch.ioapic_handled_vectors,
to_hv_synic(vcpu)->vec_bitmap, 256);
+ static_call(kvm_x86_load_eoi_exitmap)(vcpu, eoi_exit_bitmap);
+ return;
+ }
- static_call(kvm_x86_load_eoi_exitmap)(vcpu, eoi_exit_bitmap);
+ static_call(kvm_x86_load_eoi_exitmap)(
+ vcpu, (u64 *)vcpu->arch.ioapic_handled_vectors);
}
void kvm_arch_mmu_notifier_invalidate_range(struct kvm *kvm,
@@ -9552,7 +9656,7 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu)
}
if (kvm_request_pending(vcpu)) {
- if (kvm_check_request(KVM_REQ_VM_BUGGED, vcpu)) {
+ if (kvm_check_request(KVM_REQ_VM_DEAD, vcpu)) {
r = -EIO;
goto out;
}
@@ -9585,10 +9689,7 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu)
/* Flushing all ASIDs flushes the current ASID... */
kvm_clear_request(KVM_REQ_TLB_FLUSH_CURRENT, vcpu);
}
- if (kvm_check_request(KVM_REQ_TLB_FLUSH_CURRENT, vcpu))
- kvm_vcpu_flush_tlb_current(vcpu);
- if (kvm_check_request(KVM_REQ_TLB_FLUSH_GUEST, vcpu))
- kvm_vcpu_flush_tlb_guest(vcpu);
+ kvm_service_local_tlb_flush_requests(vcpu);
if (kvm_check_request(KVM_REQ_REPORT_TPR_ACCESS, vcpu)) {
vcpu->run->exit_reason = KVM_EXIT_TPR_ACCESS;
@@ -9739,10 +9840,12 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu)
/*
* This handles the case where a posted interrupt was
- * notified with kvm_vcpu_kick.
+ * notified with kvm_vcpu_kick. Assigned devices can
+ * use the POSTED_INTR_VECTOR even if APICv is disabled,
+ * so do it even if APICv is disabled on this vCPU.
*/
- if (kvm_lapic_enabled(vcpu) && vcpu->arch.apicv_active)
- static_call(kvm_x86_sync_pir_to_irr)(vcpu);
+ if (kvm_lapic_enabled(vcpu))
+ static_call_cond(kvm_x86_sync_pir_to_irr)(vcpu);
if (kvm_vcpu_exit_request(vcpu)) {
vcpu->mode = OUTSIDE_GUEST_MODE;
@@ -9786,8 +9889,8 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu)
if (likely(exit_fastpath != EXIT_FASTPATH_REENTER_GUEST))
break;
- if (vcpu->arch.apicv_active)
- static_call(kvm_x86_sync_pir_to_irr)(vcpu);
+ if (kvm_lapic_enabled(vcpu))
+ static_call_cond(kvm_x86_sync_pir_to_irr)(vcpu);
if (unlikely(kvm_vcpu_exit_request(vcpu))) {
exit_fastpath = EXIT_FASTPATH_EXIT_HANDLED;
@@ -10564,6 +10667,24 @@ int kvm_arch_vcpu_ioctl_set_sregs(struct kvm_vcpu *vcpu,
return ret;
}
+static void kvm_arch_vcpu_guestdbg_update_apicv_inhibit(struct kvm *kvm)
+{
+ bool inhibit = false;
+ struct kvm_vcpu *vcpu;
+ int i;
+
+ down_write(&kvm->arch.apicv_update_lock);
+
+ kvm_for_each_vcpu(i, vcpu, kvm) {
+ if (vcpu->guest_debug & KVM_GUESTDBG_BLOCKIRQ) {
+ inhibit = true;
+ break;
+ }
+ }
+ __kvm_request_apicv_update(kvm, !inhibit, APICV_INHIBIT_REASON_BLOCKIRQ);
+ up_write(&kvm->arch.apicv_update_lock);
+}
+
int kvm_arch_vcpu_ioctl_set_guest_debug(struct kvm_vcpu *vcpu,
struct kvm_guest_debug *dbg)
{
@@ -10616,6 +10737,8 @@ int kvm_arch_vcpu_ioctl_set_guest_debug(struct kvm_vcpu *vcpu,
static_call(kvm_x86_update_exception_bitmap)(vcpu);
+ kvm_arch_vcpu_guestdbg_update_apicv_inhibit(vcpu->kvm);
+
r = 0;
out:
@@ -10859,11 +10982,8 @@ void kvm_arch_vcpu_postcreate(struct kvm_vcpu *vcpu)
void kvm_arch_vcpu_destroy(struct kvm_vcpu *vcpu)
{
- struct gfn_to_pfn_cache *cache = &vcpu->arch.st.cache;
int idx;
- kvm_release_pfn(cache->pfn, cache->dirty, cache);
-
kvmclock_reset(vcpu);
static_call(kvm_x86_vcpu_free)(vcpu);
@@ -12275,7 +12395,8 @@ int kvm_handle_invpcid(struct kvm_vcpu *vcpu, unsigned long type, gva_t gva)
return kvm_skip_emulated_instruction(vcpu);
default:
- BUG(); /* We have already checked above that type <= 3 */
+ kvm_inject_gp(vcpu, 0);
+ return 1;
}
}
EXPORT_SYMBOL_GPL(kvm_handle_invpcid);
diff --git a/arch/x86/kvm/x86.h b/arch/x86/kvm/x86.h
index ea264c4502e4..4abcd8d9836d 100644
--- a/arch/x86/kvm/x86.h
+++ b/arch/x86/kvm/x86.h
@@ -103,6 +103,7 @@ static inline unsigned int __shrink_ple_window(unsigned int val,
#define MSR_IA32_CR_PAT_DEFAULT 0x0007040600070406ULL
+void kvm_service_local_tlb_flush_requests(struct kvm_vcpu *vcpu);
int kvm_check_nested_events(struct kvm_vcpu *vcpu);
static inline void kvm_clear_exception_queue(struct kvm_vcpu *vcpu)
@@ -153,12 +154,24 @@ static inline bool is_64_bit_mode(struct kvm_vcpu *vcpu)
{
int cs_db, cs_l;
+ WARN_ON_ONCE(vcpu->arch.guest_state_protected);
+
if (!is_long_mode(vcpu))
return false;
static_call(kvm_x86_get_cs_db_l_bits)(vcpu, &cs_db, &cs_l);
return cs_l;
}
+static inline bool is_64_bit_hypercall(struct kvm_vcpu *vcpu)
+{
+ /*
+ * If running with protected guest state, the CS register is not
+ * accessible. The hypercall register values will have had to been
+ * provided in 64-bit mode, so assume the guest is in 64-bit.
+ */
+ return vcpu->arch.guest_state_protected || is_64_bit_mode(vcpu);
+}
+
static inline bool x86_exception_has_error_code(unsigned int vector)
{
static u32 exception_has_error_code = BIT(DF_VECTOR) | BIT(TS_VECTOR) |
@@ -173,12 +186,6 @@ static inline bool mmu_is_nested(struct kvm_vcpu *vcpu)
return vcpu->arch.walk_mmu == &vcpu->arch.nested_mmu;
}
-static inline void kvm_vcpu_flush_tlb_current(struct kvm_vcpu *vcpu)
-{
- ++vcpu->stat.tlb_flush;
- static_call(kvm_x86_tlb_flush_current)(vcpu);
-}
-
static inline int is_pae(struct kvm_vcpu *vcpu)
{
return kvm_read_cr4_bits(vcpu, X86_CR4_PAE);
diff --git a/arch/x86/kvm/xen.c b/arch/x86/kvm/xen.c
index 8f62baebd028..dff2bdf9507a 100644
--- a/arch/x86/kvm/xen.c
+++ b/arch/x86/kvm/xen.c
@@ -127,9 +127,9 @@ void kvm_xen_update_runstate_guest(struct kvm_vcpu *v, int state)
state_entry_time = vx->runstate_entry_time;
state_entry_time |= XEN_RUNSTATE_UPDATE;
- BUILD_BUG_ON(sizeof(((struct vcpu_runstate_info *)0)->state_entry_time) !=
+ BUILD_BUG_ON(sizeof_field(struct vcpu_runstate_info, state_entry_time) !=
sizeof(state_entry_time));
- BUILD_BUG_ON(sizeof(((struct compat_vcpu_runstate_info *)0)->state_entry_time) !=
+ BUILD_BUG_ON(sizeof_field(struct compat_vcpu_runstate_info, state_entry_time) !=
sizeof(state_entry_time));
if (kvm_write_guest_offset_cached(v->kvm, &v->arch.xen.runstate_cache,
@@ -144,9 +144,9 @@ void kvm_xen_update_runstate_guest(struct kvm_vcpu *v, int state)
*/
BUILD_BUG_ON(offsetof(struct vcpu_runstate_info, state) !=
offsetof(struct compat_vcpu_runstate_info, state));
- BUILD_BUG_ON(sizeof(((struct vcpu_runstate_info *)0)->state) !=
+ BUILD_BUG_ON(sizeof_field(struct vcpu_runstate_info, state) !=
sizeof(vx->current_runstate));
- BUILD_BUG_ON(sizeof(((struct compat_vcpu_runstate_info *)0)->state) !=
+ BUILD_BUG_ON(sizeof_field(struct compat_vcpu_runstate_info, state) !=
sizeof(vx->current_runstate));
if (kvm_write_guest_offset_cached(v->kvm, &v->arch.xen.runstate_cache,
@@ -163,9 +163,9 @@ void kvm_xen_update_runstate_guest(struct kvm_vcpu *v, int state)
offsetof(struct vcpu_runstate_info, time) - sizeof(u64));
BUILD_BUG_ON(offsetof(struct compat_vcpu_runstate_info, state_entry_time) !=
offsetof(struct compat_vcpu_runstate_info, time) - sizeof(u64));
- BUILD_BUG_ON(sizeof(((struct vcpu_runstate_info *)0)->time) !=
- sizeof(((struct compat_vcpu_runstate_info *)0)->time));
- BUILD_BUG_ON(sizeof(((struct vcpu_runstate_info *)0)->time) !=
+ BUILD_BUG_ON(sizeof_field(struct vcpu_runstate_info, time) !=
+ sizeof_field(struct compat_vcpu_runstate_info, time));
+ BUILD_BUG_ON(sizeof_field(struct vcpu_runstate_info, time) !=
sizeof(vx->runstate_times));
if (kvm_write_guest_offset_cached(v->kvm, &v->arch.xen.runstate_cache,
@@ -205,9 +205,9 @@ int __kvm_xen_has_interrupt(struct kvm_vcpu *v)
BUILD_BUG_ON(offsetof(struct vcpu_info, evtchn_upcall_pending) !=
offsetof(struct compat_vcpu_info, evtchn_upcall_pending));
BUILD_BUG_ON(sizeof(rc) !=
- sizeof(((struct vcpu_info *)0)->evtchn_upcall_pending));
+ sizeof_field(struct vcpu_info, evtchn_upcall_pending));
BUILD_BUG_ON(sizeof(rc) !=
- sizeof(((struct compat_vcpu_info *)0)->evtchn_upcall_pending));
+ sizeof_field(struct compat_vcpu_info, evtchn_upcall_pending));
/*
* For efficiency, this mirrors the checks for using the valid
@@ -299,7 +299,7 @@ int kvm_xen_hvm_get_attr(struct kvm *kvm, struct kvm_xen_hvm_attr *data)
break;
case KVM_XEN_ATTR_TYPE_SHARED_INFO:
- data->u.shared_info.gfn = gpa_to_gfn(kvm->arch.xen.shinfo_gfn);
+ data->u.shared_info.gfn = kvm->arch.xen.shinfo_gfn;
r = 0;
break;
@@ -698,7 +698,7 @@ int kvm_xen_hypercall(struct kvm_vcpu *vcpu)
kvm_hv_hypercall_enabled(vcpu))
return kvm_hv_hypercall(vcpu);
- longmode = is_64_bit_mode(vcpu);
+ longmode = is_64_bit_hypercall(vcpu);
if (!longmode) {
params[0] = (u32)kvm_rbx_read(vcpu);
params[1] = (u32)kvm_rcx_read(vcpu);
diff --git a/arch/x86/mm/mem_encrypt.c b/arch/x86/mm/mem_encrypt.c
index 23d54b810f08..35487305d8af 100644
--- a/arch/x86/mm/mem_encrypt.c
+++ b/arch/x86/mm/mem_encrypt.c
@@ -229,28 +229,75 @@ void __init sev_setup_arch(void)
swiotlb_adjust_size(size);
}
-static void __init __set_clr_pte_enc(pte_t *kpte, int level, bool enc)
+static unsigned long pg_level_to_pfn(int level, pte_t *kpte, pgprot_t *ret_prot)
{
- pgprot_t old_prot, new_prot;
- unsigned long pfn, pa, size;
- pte_t new_pte;
+ unsigned long pfn = 0;
+ pgprot_t prot;
switch (level) {
case PG_LEVEL_4K:
pfn = pte_pfn(*kpte);
- old_prot = pte_pgprot(*kpte);
+ prot = pte_pgprot(*kpte);
break;
case PG_LEVEL_2M:
pfn = pmd_pfn(*(pmd_t *)kpte);
- old_prot = pmd_pgprot(*(pmd_t *)kpte);
+ prot = pmd_pgprot(*(pmd_t *)kpte);
break;
case PG_LEVEL_1G:
pfn = pud_pfn(*(pud_t *)kpte);
- old_prot = pud_pgprot(*(pud_t *)kpte);
+ prot = pud_pgprot(*(pud_t *)kpte);
break;
default:
- return;
+ WARN_ONCE(1, "Invalid level for kpte\n");
+ return 0;
+ }
+
+ if (ret_prot)
+ *ret_prot = prot;
+
+ return pfn;
+}
+
+void notify_range_enc_status_changed(unsigned long vaddr, int npages, bool enc)
+{
+#ifdef CONFIG_PARAVIRT
+ unsigned long sz = npages << PAGE_SHIFT;
+ unsigned long vaddr_end = vaddr + sz;
+
+ while (vaddr < vaddr_end) {
+ int psize, pmask, level;
+ unsigned long pfn;
+ pte_t *kpte;
+
+ kpte = lookup_address(vaddr, &level);
+ if (!kpte || pte_none(*kpte)) {
+ WARN_ONCE(1, "kpte lookup for vaddr\n");
+ return;
+ }
+
+ pfn = pg_level_to_pfn(level, kpte, NULL);
+ if (!pfn)
+ continue;
+
+ psize = page_level_size(level);
+ pmask = page_level_mask(level);
+
+ notify_page_enc_status_changed(pfn, psize >> PAGE_SHIFT, enc);
+
+ vaddr = (vaddr & pmask) + psize;
}
+#endif
+}
+
+static void __init __set_clr_pte_enc(pte_t *kpte, int level, bool enc)
+{
+ pgprot_t old_prot, new_prot;
+ unsigned long pfn, pa, size;
+ pte_t new_pte;
+
+ pfn = pg_level_to_pfn(level, kpte, &old_prot);
+ if (!pfn)
+ return;
new_prot = old_prot;
if (enc)
@@ -286,12 +333,13 @@ static void __init __set_clr_pte_enc(pte_t *kpte, int level, bool enc)
static int __init early_set_memory_enc_dec(unsigned long vaddr,
unsigned long size, bool enc)
{
- unsigned long vaddr_end, vaddr_next;
+ unsigned long vaddr_end, vaddr_next, start;
unsigned long psize, pmask;
int split_page_size_mask;
int level, ret;
pte_t *kpte;
+ start = vaddr;
vaddr_next = vaddr;
vaddr_end = vaddr + size;
@@ -346,6 +394,7 @@ static int __init early_set_memory_enc_dec(unsigned long vaddr,
ret = 0;
+ notify_range_enc_status_changed(start, PAGE_ALIGN(size) >> PAGE_SHIFT, enc);
out:
__flush_tlb_all();
return ret;
@@ -361,6 +410,11 @@ int __init early_set_memory_encrypted(unsigned long vaddr, unsigned long size)
return early_set_memory_enc_dec(vaddr, size, true);
}
+void __init early_set_mem_enc_dec_hypercall(unsigned long vaddr, int npages, bool enc)
+{
+ notify_range_enc_status_changed(vaddr, npages, enc);
+}
+
/* Override for DMA direct allocation check - ARCH_HAS_FORCE_DMA_UNENCRYPTED */
bool force_dma_unencrypted(struct device *dev)
{
diff --git a/arch/x86/mm/pat/set_memory.c b/arch/x86/mm/pat/set_memory.c
index 934dc5b2df36..b4072115c8ef 100644
--- a/arch/x86/mm/pat/set_memory.c
+++ b/arch/x86/mm/pat/set_memory.c
@@ -2023,6 +2023,12 @@ static int __set_memory_enc_pgtable(unsigned long addr, int numpages, bool enc)
*/
cpa_flush(&cpa, 0);
+ /*
+ * Notify hypervisor that a given memory range is mapped encrypted
+ * or decrypted.
+ */
+ notify_range_enc_status_changed(addr, numpages, enc);
+
return ret;
}
diff --git a/arch/x86/xen/smp_pv.c b/arch/x86/xen/smp_pv.c
index 9e55bcbfcd33..6a8f3b53ab83 100644
--- a/arch/x86/xen/smp_pv.c
+++ b/arch/x86/xen/smp_pv.c
@@ -225,7 +225,6 @@ static void __init xen_pv_smp_prepare_boot_cpu(void)
static void __init xen_pv_smp_prepare_cpus(unsigned int max_cpus)
{
unsigned cpu;
- unsigned int i;
if (skip_ioapic_setup) {
char *m = (max_cpus == 0) ?
@@ -238,16 +237,9 @@ static void __init xen_pv_smp_prepare_cpus(unsigned int max_cpus)
}
xen_init_lock_cpu(0);
- smp_store_boot_cpu_info();
- cpu_data(0).x86_max_cores = 1;
+ smp_prepare_cpus_common();
- for_each_possible_cpu(i) {
- zalloc_cpumask_var(&per_cpu(cpu_sibling_map, i), GFP_KERNEL);
- zalloc_cpumask_var(&per_cpu(cpu_core_map, i), GFP_KERNEL);
- zalloc_cpumask_var(&per_cpu(cpu_die_map, i), GFP_KERNEL);
- zalloc_cpumask_var(&per_cpu(cpu_llc_shared_map, i), GFP_KERNEL);
- }
- set_cpu_sibling_map(0);
+ cpu_data(0).x86_max_cores = 1;
speculative_store_bypass_ht_init();
diff --git a/arch/xtensa/include/asm/cacheflush.h b/arch/xtensa/include/asm/cacheflush.h
index a8a041609c5d..7b4359312c25 100644
--- a/arch/xtensa/include/asm/cacheflush.h
+++ b/arch/xtensa/include/asm/cacheflush.h
@@ -121,7 +121,6 @@ void flush_cache_page(struct vm_area_struct*,
#define ARCH_IMPLEMENTS_FLUSH_DCACHE_PAGE 1
void flush_dcache_page(struct page *);
-void flush_dcache_folio(struct folio *);
void local_flush_cache_range(struct vm_area_struct *vma,
unsigned long start, unsigned long end);
@@ -138,9 +137,7 @@ void local_flush_cache_page(struct vm_area_struct *vma,
#define flush_cache_vunmap(start,end) do { } while (0)
#define ARCH_IMPLEMENTS_FLUSH_DCACHE_PAGE 0
-#define ARCH_IMPLEMENTS_FLUSH_DCACHE_FOLIO
#define flush_dcache_page(page) do { } while (0)
-static inline void flush_dcache_folio(struct folio *folio) { }
#define flush_icache_range local_flush_icache_range
#define flush_cache_page(vma, addr, pfn) do { } while (0)
diff --git a/arch/xtensa/kernel/syscalls/syscall.tbl b/arch/xtensa/kernel/syscalls/syscall.tbl
index 104b327f8ac9..3e3e1a506bed 100644
--- a/arch/xtensa/kernel/syscalls/syscall.tbl
+++ b/arch/xtensa/kernel/syscalls/syscall.tbl
@@ -419,3 +419,4 @@
446 common landlock_restrict_self sys_landlock_restrict_self
# 447 reserved for memfd_secret
448 common process_mrelease sys_process_mrelease
+449 common futex_waitv sys_futex_waitv